xref: /linux/drivers/gpu/drm/xe/xe_vm.c (revision 965c995c9a4b395471ff48790a0155ee986ca405)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_vm.h"
7 
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21 
22 #include <generated/xe_wa_oob.h>
23 
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_tlb_invalidation.h"
32 #include "xe_migrate.h"
33 #include "xe_pat.h"
34 #include "xe_pm.h"
35 #include "xe_preempt_fence.h"
36 #include "xe_pt.h"
37 #include "xe_pxp.h"
38 #include "xe_res_cursor.h"
39 #include "xe_svm.h"
40 #include "xe_sync.h"
41 #include "xe_trace_bo.h"
42 #include "xe_wa.h"
43 #include "xe_hmm.h"
44 
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 	return vm->gpuvm.r_obj;
48 }
49 
50 /**
51  * xe_vma_userptr_check_repin() - Advisory check for repin needed
52  * @uvma: The userptr vma
53  *
54  * Check if the userptr vma has been invalidated since last successful
55  * repin. The check is advisory only and can the function can be called
56  * without the vm->userptr.notifier_lock held. There is no guarantee that the
57  * vma userptr will remain valid after a lockless check, so typically
58  * the call needs to be followed by a proper check under the notifier_lock.
59  *
60  * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
61  */
xe_vma_userptr_check_repin(struct xe_userptr_vma * uvma)62 int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
63 {
64 	return mmu_interval_check_retry(&uvma->userptr.notifier,
65 					uvma->userptr.notifier_seq) ?
66 		-EAGAIN : 0;
67 }
68 
xe_vma_userptr_pin_pages(struct xe_userptr_vma * uvma)69 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
70 {
71 	struct xe_vma *vma = &uvma->vma;
72 	struct xe_vm *vm = xe_vma_vm(vma);
73 	struct xe_device *xe = vm->xe;
74 
75 	lockdep_assert_held(&vm->lock);
76 	xe_assert(xe, xe_vma_is_userptr(vma));
77 
78 	return xe_hmm_userptr_populate_range(uvma, false);
79 }
80 
preempt_fences_waiting(struct xe_vm * vm)81 static bool preempt_fences_waiting(struct xe_vm *vm)
82 {
83 	struct xe_exec_queue *q;
84 
85 	lockdep_assert_held(&vm->lock);
86 	xe_vm_assert_held(vm);
87 
88 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
89 		if (!q->lr.pfence ||
90 		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
91 			     &q->lr.pfence->flags)) {
92 			return true;
93 		}
94 	}
95 
96 	return false;
97 }
98 
free_preempt_fences(struct list_head * list)99 static void free_preempt_fences(struct list_head *list)
100 {
101 	struct list_head *link, *next;
102 
103 	list_for_each_safe(link, next, list)
104 		xe_preempt_fence_free(to_preempt_fence_from_link(link));
105 }
106 
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)107 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
108 				unsigned int *count)
109 {
110 	lockdep_assert_held(&vm->lock);
111 	xe_vm_assert_held(vm);
112 
113 	if (*count >= vm->preempt.num_exec_queues)
114 		return 0;
115 
116 	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
117 		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
118 
119 		if (IS_ERR(pfence))
120 			return PTR_ERR(pfence);
121 
122 		list_move_tail(xe_preempt_fence_link(pfence), list);
123 	}
124 
125 	return 0;
126 }
127 
wait_for_existing_preempt_fences(struct xe_vm * vm)128 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
129 {
130 	struct xe_exec_queue *q;
131 
132 	xe_vm_assert_held(vm);
133 
134 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
135 		if (q->lr.pfence) {
136 			long timeout = dma_fence_wait(q->lr.pfence, false);
137 
138 			/* Only -ETIME on fence indicates VM needs to be killed */
139 			if (timeout < 0 || q->lr.pfence->error == -ETIME)
140 				return -ETIME;
141 
142 			dma_fence_put(q->lr.pfence);
143 			q->lr.pfence = NULL;
144 		}
145 	}
146 
147 	return 0;
148 }
149 
xe_vm_is_idle(struct xe_vm * vm)150 static bool xe_vm_is_idle(struct xe_vm *vm)
151 {
152 	struct xe_exec_queue *q;
153 
154 	xe_vm_assert_held(vm);
155 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
156 		if (!xe_exec_queue_is_idle(q))
157 			return false;
158 	}
159 
160 	return true;
161 }
162 
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)163 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
164 {
165 	struct list_head *link;
166 	struct xe_exec_queue *q;
167 
168 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
169 		struct dma_fence *fence;
170 
171 		link = list->next;
172 		xe_assert(vm->xe, link != list);
173 
174 		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
175 					     q, q->lr.context,
176 					     ++q->lr.seqno);
177 		dma_fence_put(q->lr.pfence);
178 		q->lr.pfence = fence;
179 	}
180 }
181 
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)182 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
183 {
184 	struct xe_exec_queue *q;
185 	int err;
186 
187 	xe_bo_assert_held(bo);
188 
189 	if (!vm->preempt.num_exec_queues)
190 		return 0;
191 
192 	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
193 	if (err)
194 		return err;
195 
196 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
197 		if (q->lr.pfence) {
198 			dma_resv_add_fence(bo->ttm.base.resv,
199 					   q->lr.pfence,
200 					   DMA_RESV_USAGE_BOOKKEEP);
201 		}
202 
203 	return 0;
204 }
205 
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)206 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
207 						struct drm_exec *exec)
208 {
209 	struct xe_exec_queue *q;
210 
211 	lockdep_assert_held(&vm->lock);
212 	xe_vm_assert_held(vm);
213 
214 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
215 		q->ops->resume(q);
216 
217 		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
218 					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
219 	}
220 }
221 
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)222 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
223 {
224 	struct drm_gpuvm_exec vm_exec = {
225 		.vm = &vm->gpuvm,
226 		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
227 		.num_fences = 1,
228 	};
229 	struct drm_exec *exec = &vm_exec.exec;
230 	struct dma_fence *pfence;
231 	int err;
232 	bool wait;
233 
234 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
235 
236 	down_write(&vm->lock);
237 	err = drm_gpuvm_exec_lock(&vm_exec);
238 	if (err)
239 		goto out_up_write;
240 
241 	pfence = xe_preempt_fence_create(q, q->lr.context,
242 					 ++q->lr.seqno);
243 	if (!pfence) {
244 		err = -ENOMEM;
245 		goto out_fini;
246 	}
247 
248 	list_add(&q->lr.link, &vm->preempt.exec_queues);
249 	++vm->preempt.num_exec_queues;
250 	q->lr.pfence = pfence;
251 
252 	down_read(&vm->userptr.notifier_lock);
253 
254 	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
255 				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
256 
257 	/*
258 	 * Check to see if a preemption on VM is in flight or userptr
259 	 * invalidation, if so trigger this preempt fence to sync state with
260 	 * other preempt fences on the VM.
261 	 */
262 	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
263 	if (wait)
264 		dma_fence_enable_sw_signaling(pfence);
265 
266 	up_read(&vm->userptr.notifier_lock);
267 
268 out_fini:
269 	drm_exec_fini(exec);
270 out_up_write:
271 	up_write(&vm->lock);
272 
273 	return err;
274 }
275 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
276 
277 /**
278  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
279  * @vm: The VM.
280  * @q: The exec_queue
281  *
282  * Note that this function might be called multiple times on the same queue.
283  */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)284 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
285 {
286 	if (!xe_vm_in_preempt_fence_mode(vm))
287 		return;
288 
289 	down_write(&vm->lock);
290 	if (!list_empty(&q->lr.link)) {
291 		list_del_init(&q->lr.link);
292 		--vm->preempt.num_exec_queues;
293 	}
294 	if (q->lr.pfence) {
295 		dma_fence_enable_sw_signaling(q->lr.pfence);
296 		dma_fence_put(q->lr.pfence);
297 		q->lr.pfence = NULL;
298 	}
299 	up_write(&vm->lock);
300 }
301 
302 /**
303  * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
304  * that need repinning.
305  * @vm: The VM.
306  *
307  * This function checks for whether the VM has userptrs that need repinning,
308  * and provides a release-type barrier on the userptr.notifier_lock after
309  * checking.
310  *
311  * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
312  */
__xe_vm_userptr_needs_repin(struct xe_vm * vm)313 int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
314 {
315 	lockdep_assert_held_read(&vm->userptr.notifier_lock);
316 
317 	return (list_empty(&vm->userptr.repin_list) &&
318 		list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
319 }
320 
321 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
322 
323 /**
324  * xe_vm_kill() - VM Kill
325  * @vm: The VM.
326  * @unlocked: Flag indicates the VM's dma-resv is not held
327  *
328  * Kill the VM by setting banned flag indicated VM is no longer available for
329  * use. If in preempt fence mode, also kill all exec queue attached to the VM.
330  */
xe_vm_kill(struct xe_vm * vm,bool unlocked)331 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
332 {
333 	struct xe_exec_queue *q;
334 
335 	lockdep_assert_held(&vm->lock);
336 
337 	if (unlocked)
338 		xe_vm_lock(vm, false);
339 
340 	vm->flags |= XE_VM_FLAG_BANNED;
341 	trace_xe_vm_kill(vm);
342 
343 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
344 		q->ops->kill(q);
345 
346 	if (unlocked)
347 		xe_vm_unlock(vm);
348 
349 	/* TODO: Inform user the VM is banned */
350 }
351 
352 /**
353  * xe_vm_validate_should_retry() - Whether to retry after a validate error.
354  * @exec: The drm_exec object used for locking before validation.
355  * @err: The error returned from ttm_bo_validate().
356  * @end: A ktime_t cookie that should be set to 0 before first use and
357  * that should be reused on subsequent calls.
358  *
359  * With multiple active VMs, under memory pressure, it is possible that
360  * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
361  * Until ttm properly handles locking in such scenarios, best thing the
362  * driver can do is retry with a timeout. Check if that is necessary, and
363  * if so unlock the drm_exec's objects while keeping the ticket to prepare
364  * for a rerun.
365  *
366  * Return: true if a retry after drm_exec_init() is recommended;
367  * false otherwise.
368  */
xe_vm_validate_should_retry(struct drm_exec * exec,int err,ktime_t * end)369 bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
370 {
371 	ktime_t cur;
372 
373 	if (err != -ENOMEM)
374 		return false;
375 
376 	cur = ktime_get();
377 	*end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
378 	if (!ktime_before(cur, *end))
379 		return false;
380 
381 	msleep(20);
382 	return true;
383 }
384 
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)385 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
386 {
387 	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
388 	struct drm_gpuva *gpuva;
389 	int ret;
390 
391 	lockdep_assert_held(&vm->lock);
392 	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
393 		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
394 			       &vm->rebind_list);
395 
396 	if (!try_wait_for_completion(&vm->xe->pm_block))
397 		return -EAGAIN;
398 
399 	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
400 	if (ret)
401 		return ret;
402 
403 	vm_bo->evicted = false;
404 	return 0;
405 }
406 
407 /**
408  * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
409  * @vm: The vm for which we are rebinding.
410  * @exec: The struct drm_exec with the locked GEM objects.
411  * @num_fences: The number of fences to reserve for the operation, not
412  * including rebinds and validations.
413  *
414  * Validates all evicted gem objects and rebinds their vmas. Note that
415  * rebindings may cause evictions and hence the validation-rebind
416  * sequence is rerun until there are no more objects to validate.
417  *
418  * Return: 0 on success, negative error code on error. In particular,
419  * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
420  * the drm_exec transaction needs to be restarted.
421  */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)422 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
423 			  unsigned int num_fences)
424 {
425 	struct drm_gem_object *obj;
426 	unsigned long index;
427 	int ret;
428 
429 	do {
430 		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
431 		if (ret)
432 			return ret;
433 
434 		ret = xe_vm_rebind(vm, false);
435 		if (ret)
436 			return ret;
437 	} while (!list_empty(&vm->gpuvm.evict.list));
438 
439 	drm_exec_for_each_locked_object(exec, index, obj) {
440 		ret = dma_resv_reserve_fences(obj->resv, num_fences);
441 		if (ret)
442 			return ret;
443 	}
444 
445 	return 0;
446 }
447 
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)448 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
449 				 bool *done)
450 {
451 	int err;
452 
453 	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
454 	if (err)
455 		return err;
456 
457 	if (xe_vm_is_idle(vm)) {
458 		vm->preempt.rebind_deactivated = true;
459 		*done = true;
460 		return 0;
461 	}
462 
463 	if (!preempt_fences_waiting(vm)) {
464 		*done = true;
465 		return 0;
466 	}
467 
468 	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
469 	if (err)
470 		return err;
471 
472 	err = wait_for_existing_preempt_fences(vm);
473 	if (err)
474 		return err;
475 
476 	/*
477 	 * Add validation and rebinding to the locking loop since both can
478 	 * cause evictions which may require blocing dma_resv locks.
479 	 * The fence reservation here is intended for the new preempt fences
480 	 * we attach at the end of the rebind work.
481 	 */
482 	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
483 }
484 
vm_suspend_rebind_worker(struct xe_vm * vm)485 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
486 {
487 	struct xe_device *xe = vm->xe;
488 	bool ret = false;
489 
490 	mutex_lock(&xe->rebind_resume_lock);
491 	if (!try_wait_for_completion(&vm->xe->pm_block)) {
492 		ret = true;
493 		list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
494 	}
495 	mutex_unlock(&xe->rebind_resume_lock);
496 
497 	return ret;
498 }
499 
500 /**
501  * xe_vm_resume_rebind_worker() - Resume the rebind worker.
502  * @vm: The vm whose preempt worker to resume.
503  *
504  * Resume a preempt worker that was previously suspended by
505  * vm_suspend_rebind_worker().
506  */
xe_vm_resume_rebind_worker(struct xe_vm * vm)507 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
508 {
509 	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
510 }
511 
preempt_rebind_work_func(struct work_struct * w)512 static void preempt_rebind_work_func(struct work_struct *w)
513 {
514 	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
515 	struct drm_exec exec;
516 	unsigned int fence_count = 0;
517 	LIST_HEAD(preempt_fences);
518 	ktime_t end = 0;
519 	int err = 0;
520 	long wait;
521 	int __maybe_unused tries = 0;
522 
523 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
524 	trace_xe_vm_rebind_worker_enter(vm);
525 
526 	down_write(&vm->lock);
527 
528 	if (xe_vm_is_closed_or_banned(vm)) {
529 		up_write(&vm->lock);
530 		trace_xe_vm_rebind_worker_exit(vm);
531 		return;
532 	}
533 
534 retry:
535 	if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
536 		up_write(&vm->lock);
537 		return;
538 	}
539 
540 	if (xe_vm_userptr_check_repin(vm)) {
541 		err = xe_vm_userptr_pin(vm);
542 		if (err)
543 			goto out_unlock_outer;
544 	}
545 
546 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
547 
548 	drm_exec_until_all_locked(&exec) {
549 		bool done = false;
550 
551 		err = xe_preempt_work_begin(&exec, vm, &done);
552 		drm_exec_retry_on_contention(&exec);
553 		if (err || done) {
554 			drm_exec_fini(&exec);
555 			if (err && xe_vm_validate_should_retry(&exec, err, &end))
556 				err = -EAGAIN;
557 
558 			goto out_unlock_outer;
559 		}
560 	}
561 
562 	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
563 	if (err)
564 		goto out_unlock;
565 
566 	err = xe_vm_rebind(vm, true);
567 	if (err)
568 		goto out_unlock;
569 
570 	/* Wait on rebinds and munmap style VM unbinds */
571 	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
572 				     DMA_RESV_USAGE_KERNEL,
573 				     false, MAX_SCHEDULE_TIMEOUT);
574 	if (wait <= 0) {
575 		err = -ETIME;
576 		goto out_unlock;
577 	}
578 
579 #define retry_required(__tries, __vm) \
580 	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
581 	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
582 	__xe_vm_userptr_needs_repin(__vm))
583 
584 	down_read(&vm->userptr.notifier_lock);
585 	if (retry_required(tries, vm)) {
586 		up_read(&vm->userptr.notifier_lock);
587 		err = -EAGAIN;
588 		goto out_unlock;
589 	}
590 
591 #undef retry_required
592 
593 	spin_lock(&vm->xe->ttm.lru_lock);
594 	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
595 	spin_unlock(&vm->xe->ttm.lru_lock);
596 
597 	/* Point of no return. */
598 	arm_preempt_fences(vm, &preempt_fences);
599 	resume_and_reinstall_preempt_fences(vm, &exec);
600 	up_read(&vm->userptr.notifier_lock);
601 
602 out_unlock:
603 	drm_exec_fini(&exec);
604 out_unlock_outer:
605 	if (err == -EAGAIN) {
606 		trace_xe_vm_rebind_worker_retry(vm);
607 		goto retry;
608 	}
609 
610 	if (err) {
611 		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
612 		xe_vm_kill(vm, true);
613 	}
614 	up_write(&vm->lock);
615 
616 	free_preempt_fences(&preempt_fences);
617 
618 	trace_xe_vm_rebind_worker_exit(vm);
619 }
620 
__vma_userptr_invalidate(struct xe_vm * vm,struct xe_userptr_vma * uvma)621 static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
622 {
623 	struct xe_userptr *userptr = &uvma->userptr;
624 	struct xe_vma *vma = &uvma->vma;
625 	struct dma_resv_iter cursor;
626 	struct dma_fence *fence;
627 	long err;
628 
629 	/*
630 	 * Tell exec and rebind worker they need to repin and rebind this
631 	 * userptr.
632 	 */
633 	if (!xe_vm_in_fault_mode(vm) &&
634 	    !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
635 		spin_lock(&vm->userptr.invalidated_lock);
636 		list_move_tail(&userptr->invalidate_link,
637 			       &vm->userptr.invalidated);
638 		spin_unlock(&vm->userptr.invalidated_lock);
639 	}
640 
641 	/*
642 	 * Preempt fences turn into schedule disables, pipeline these.
643 	 * Note that even in fault mode, we need to wait for binds and
644 	 * unbinds to complete, and those are attached as BOOKMARK fences
645 	 * to the vm.
646 	 */
647 	dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
648 			    DMA_RESV_USAGE_BOOKKEEP);
649 	dma_resv_for_each_fence_unlocked(&cursor, fence)
650 		dma_fence_enable_sw_signaling(fence);
651 	dma_resv_iter_end(&cursor);
652 
653 	err = dma_resv_wait_timeout(xe_vm_resv(vm),
654 				    DMA_RESV_USAGE_BOOKKEEP,
655 				    false, MAX_SCHEDULE_TIMEOUT);
656 	XE_WARN_ON(err <= 0);
657 
658 	if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
659 		err = xe_vm_invalidate_vma(vma);
660 		XE_WARN_ON(err);
661 	}
662 
663 	xe_hmm_userptr_unmap(uvma);
664 }
665 
vma_userptr_invalidate(struct mmu_interval_notifier * mni,const struct mmu_notifier_range * range,unsigned long cur_seq)666 static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
667 				   const struct mmu_notifier_range *range,
668 				   unsigned long cur_seq)
669 {
670 	struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
671 	struct xe_vma *vma = &uvma->vma;
672 	struct xe_vm *vm = xe_vma_vm(vma);
673 
674 	xe_assert(vm->xe, xe_vma_is_userptr(vma));
675 	trace_xe_vma_userptr_invalidate(vma);
676 
677 	if (!mmu_notifier_range_blockable(range))
678 		return false;
679 
680 	vm_dbg(&xe_vma_vm(vma)->xe->drm,
681 	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
682 		xe_vma_start(vma), xe_vma_size(vma));
683 
684 	down_write(&vm->userptr.notifier_lock);
685 	mmu_interval_set_seq(mni, cur_seq);
686 
687 	__vma_userptr_invalidate(vm, uvma);
688 	up_write(&vm->userptr.notifier_lock);
689 	trace_xe_vma_userptr_invalidate_complete(vma);
690 
691 	return true;
692 }
693 
694 static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
695 	.invalidate = vma_userptr_invalidate,
696 };
697 
698 #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
699 /**
700  * xe_vma_userptr_force_invalidate() - force invalidate a userptr
701  * @uvma: The userptr vma to invalidate
702  *
703  * Perform a forced userptr invalidation for testing purposes.
704  */
xe_vma_userptr_force_invalidate(struct xe_userptr_vma * uvma)705 void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
706 {
707 	struct xe_vm *vm = xe_vma_vm(&uvma->vma);
708 
709 	/* Protect against concurrent userptr pinning */
710 	lockdep_assert_held(&vm->lock);
711 	/* Protect against concurrent notifiers */
712 	lockdep_assert_held(&vm->userptr.notifier_lock);
713 	/*
714 	 * Protect against concurrent instances of this function and
715 	 * the critical exec sections
716 	 */
717 	xe_vm_assert_held(vm);
718 
719 	if (!mmu_interval_read_retry(&uvma->userptr.notifier,
720 				     uvma->userptr.notifier_seq))
721 		uvma->userptr.notifier_seq -= 2;
722 	__vma_userptr_invalidate(vm, uvma);
723 }
724 #endif
725 
xe_vm_userptr_pin(struct xe_vm * vm)726 int xe_vm_userptr_pin(struct xe_vm *vm)
727 {
728 	struct xe_userptr_vma *uvma, *next;
729 	int err = 0;
730 
731 	xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
732 	lockdep_assert_held_write(&vm->lock);
733 
734 	/* Collect invalidated userptrs */
735 	spin_lock(&vm->userptr.invalidated_lock);
736 	xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
737 	list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
738 				 userptr.invalidate_link) {
739 		list_del_init(&uvma->userptr.invalidate_link);
740 		list_add_tail(&uvma->userptr.repin_link,
741 			      &vm->userptr.repin_list);
742 	}
743 	spin_unlock(&vm->userptr.invalidated_lock);
744 
745 	/* Pin and move to bind list */
746 	list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
747 				 userptr.repin_link) {
748 		err = xe_vma_userptr_pin_pages(uvma);
749 		if (err == -EFAULT) {
750 			list_del_init(&uvma->userptr.repin_link);
751 			/*
752 			 * We might have already done the pin once already, but
753 			 * then had to retry before the re-bind happened, due
754 			 * some other condition in the caller, but in the
755 			 * meantime the userptr got dinged by the notifier such
756 			 * that we need to revalidate here, but this time we hit
757 			 * the EFAULT. In such a case make sure we remove
758 			 * ourselves from the rebind list to avoid going down in
759 			 * flames.
760 			 */
761 			if (!list_empty(&uvma->vma.combined_links.rebind))
762 				list_del_init(&uvma->vma.combined_links.rebind);
763 
764 			/* Wait for pending binds */
765 			xe_vm_lock(vm, false);
766 			dma_resv_wait_timeout(xe_vm_resv(vm),
767 					      DMA_RESV_USAGE_BOOKKEEP,
768 					      false, MAX_SCHEDULE_TIMEOUT);
769 
770 			down_read(&vm->userptr.notifier_lock);
771 			err = xe_vm_invalidate_vma(&uvma->vma);
772 			up_read(&vm->userptr.notifier_lock);
773 			xe_vm_unlock(vm);
774 			if (err)
775 				break;
776 		} else {
777 			if (err)
778 				break;
779 
780 			list_del_init(&uvma->userptr.repin_link);
781 			list_move_tail(&uvma->vma.combined_links.rebind,
782 				       &vm->rebind_list);
783 		}
784 	}
785 
786 	if (err) {
787 		down_write(&vm->userptr.notifier_lock);
788 		spin_lock(&vm->userptr.invalidated_lock);
789 		list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
790 					 userptr.repin_link) {
791 			list_del_init(&uvma->userptr.repin_link);
792 			list_move_tail(&uvma->userptr.invalidate_link,
793 				       &vm->userptr.invalidated);
794 		}
795 		spin_unlock(&vm->userptr.invalidated_lock);
796 		up_write(&vm->userptr.notifier_lock);
797 	}
798 	return err;
799 }
800 
801 /**
802  * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
803  * that need repinning.
804  * @vm: The VM.
805  *
806  * This function does an advisory check for whether the VM has userptrs that
807  * need repinning.
808  *
809  * Return: 0 if there are no indications of userptrs needing repinning,
810  * -EAGAIN if there are.
811  */
xe_vm_userptr_check_repin(struct xe_vm * vm)812 int xe_vm_userptr_check_repin(struct xe_vm *vm)
813 {
814 	return (list_empty_careful(&vm->userptr.repin_list) &&
815 		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
816 }
817 
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)818 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
819 {
820 	int i;
821 
822 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
823 		if (!vops->pt_update_ops[i].num_ops)
824 			continue;
825 
826 		vops->pt_update_ops[i].ops =
827 			kmalloc_array(vops->pt_update_ops[i].num_ops,
828 				      sizeof(*vops->pt_update_ops[i].ops),
829 				      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
830 		if (!vops->pt_update_ops[i].ops)
831 			return array_of_binds ? -ENOBUFS : -ENOMEM;
832 	}
833 
834 	return 0;
835 }
836 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
837 
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)838 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
839 {
840 	struct xe_vma *vma;
841 
842 	vma = gpuva_to_vma(op->base.prefetch.va);
843 
844 	if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
845 		xa_destroy(&op->prefetch_range.range);
846 }
847 
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)848 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
849 {
850 	struct xe_vma_op *op;
851 
852 	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
853 		return;
854 
855 	list_for_each_entry(op, &vops->list, link)
856 		xe_vma_svm_prefetch_op_fini(op);
857 }
858 
xe_vma_ops_fini(struct xe_vma_ops * vops)859 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
860 {
861 	int i;
862 
863 	xe_vma_svm_prefetch_ops_fini(vops);
864 
865 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
866 		kfree(vops->pt_update_ops[i].ops);
867 }
868 
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)869 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
870 {
871 	int i;
872 
873 	if (!inc_val)
874 		return;
875 
876 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
877 		if (BIT(i) & tile_mask)
878 			vops->pt_update_ops[i].num_ops += inc_val;
879 }
880 
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)881 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
882 				  u8 tile_mask)
883 {
884 	INIT_LIST_HEAD(&op->link);
885 	op->tile_mask = tile_mask;
886 	op->base.op = DRM_GPUVA_OP_MAP;
887 	op->base.map.va.addr = vma->gpuva.va.addr;
888 	op->base.map.va.range = vma->gpuva.va.range;
889 	op->base.map.gem.obj = vma->gpuva.gem.obj;
890 	op->base.map.gem.offset = vma->gpuva.gem.offset;
891 	op->map.vma = vma;
892 	op->map.immediate = true;
893 	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
894 	op->map.is_null = xe_vma_is_null(vma);
895 }
896 
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)897 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
898 				u8 tile_mask)
899 {
900 	struct xe_vma_op *op;
901 
902 	op = kzalloc(sizeof(*op), GFP_KERNEL);
903 	if (!op)
904 		return -ENOMEM;
905 
906 	xe_vm_populate_rebind(op, vma, tile_mask);
907 	list_add_tail(&op->link, &vops->list);
908 	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
909 
910 	return 0;
911 }
912 
913 static struct dma_fence *ops_execute(struct xe_vm *vm,
914 				     struct xe_vma_ops *vops);
915 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
916 			    struct xe_exec_queue *q,
917 			    struct xe_sync_entry *syncs, u32 num_syncs);
918 
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)919 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
920 {
921 	struct dma_fence *fence;
922 	struct xe_vma *vma, *next;
923 	struct xe_vma_ops vops;
924 	struct xe_vma_op *op, *next_op;
925 	int err, i;
926 
927 	lockdep_assert_held(&vm->lock);
928 	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
929 	    list_empty(&vm->rebind_list))
930 		return 0;
931 
932 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
933 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
934 		vops.pt_update_ops[i].wait_vm_bookkeep = true;
935 
936 	xe_vm_assert_held(vm);
937 	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
938 		xe_assert(vm->xe, vma->tile_present);
939 
940 		if (rebind_worker)
941 			trace_xe_vma_rebind_worker(vma);
942 		else
943 			trace_xe_vma_rebind_exec(vma);
944 
945 		err = xe_vm_ops_add_rebind(&vops, vma,
946 					   vma->tile_present);
947 		if (err)
948 			goto free_ops;
949 	}
950 
951 	err = xe_vma_ops_alloc(&vops, false);
952 	if (err)
953 		goto free_ops;
954 
955 	fence = ops_execute(vm, &vops);
956 	if (IS_ERR(fence)) {
957 		err = PTR_ERR(fence);
958 	} else {
959 		dma_fence_put(fence);
960 		list_for_each_entry_safe(vma, next, &vm->rebind_list,
961 					 combined_links.rebind)
962 			list_del_init(&vma->combined_links.rebind);
963 	}
964 free_ops:
965 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
966 		list_del(&op->link);
967 		kfree(op);
968 	}
969 	xe_vma_ops_fini(&vops);
970 
971 	return err;
972 }
973 
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)974 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
975 {
976 	struct dma_fence *fence = NULL;
977 	struct xe_vma_ops vops;
978 	struct xe_vma_op *op, *next_op;
979 	struct xe_tile *tile;
980 	u8 id;
981 	int err;
982 
983 	lockdep_assert_held(&vm->lock);
984 	xe_vm_assert_held(vm);
985 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
986 
987 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
988 	for_each_tile(tile, vm->xe, id) {
989 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
990 		vops.pt_update_ops[tile->id].q =
991 			xe_tile_migrate_exec_queue(tile);
992 	}
993 
994 	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
995 	if (err)
996 		return ERR_PTR(err);
997 
998 	err = xe_vma_ops_alloc(&vops, false);
999 	if (err) {
1000 		fence = ERR_PTR(err);
1001 		goto free_ops;
1002 	}
1003 
1004 	fence = ops_execute(vm, &vops);
1005 
1006 free_ops:
1007 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
1008 		list_del(&op->link);
1009 		kfree(op);
1010 	}
1011 	xe_vma_ops_fini(&vops);
1012 
1013 	return fence;
1014 }
1015 
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1016 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
1017 					struct xe_vma *vma,
1018 					struct xe_svm_range *range,
1019 					u8 tile_mask)
1020 {
1021 	INIT_LIST_HEAD(&op->link);
1022 	op->tile_mask = tile_mask;
1023 	op->base.op = DRM_GPUVA_OP_DRIVER;
1024 	op->subop = XE_VMA_SUBOP_MAP_RANGE;
1025 	op->map_range.vma = vma;
1026 	op->map_range.range = range;
1027 }
1028 
1029 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1030 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
1031 			   struct xe_vma *vma,
1032 			   struct xe_svm_range *range,
1033 			   u8 tile_mask)
1034 {
1035 	struct xe_vma_op *op;
1036 
1037 	op = kzalloc(sizeof(*op), GFP_KERNEL);
1038 	if (!op)
1039 		return -ENOMEM;
1040 
1041 	xe_vm_populate_range_rebind(op, vma, range, tile_mask);
1042 	list_add_tail(&op->link, &vops->list);
1043 	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
1044 
1045 	return 0;
1046 }
1047 
1048 /**
1049  * xe_vm_range_rebind() - VM range (re)bind
1050  * @vm: The VM which the range belongs to.
1051  * @vma: The VMA which the range belongs to.
1052  * @range: SVM range to rebind.
1053  * @tile_mask: Tile mask to bind the range to.
1054  *
1055  * (re)bind SVM range setting up GPU page tables for the range.
1056  *
1057  * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
1058  * failure
1059  */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)1060 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
1061 				     struct xe_vma *vma,
1062 				     struct xe_svm_range *range,
1063 				     u8 tile_mask)
1064 {
1065 	struct dma_fence *fence = NULL;
1066 	struct xe_vma_ops vops;
1067 	struct xe_vma_op *op, *next_op;
1068 	struct xe_tile *tile;
1069 	u8 id;
1070 	int err;
1071 
1072 	lockdep_assert_held(&vm->lock);
1073 	xe_vm_assert_held(vm);
1074 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1075 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
1076 
1077 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1078 	for_each_tile(tile, vm->xe, id) {
1079 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
1080 		vops.pt_update_ops[tile->id].q =
1081 			xe_tile_migrate_exec_queue(tile);
1082 	}
1083 
1084 	err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
1085 	if (err)
1086 		return ERR_PTR(err);
1087 
1088 	err = xe_vma_ops_alloc(&vops, false);
1089 	if (err) {
1090 		fence = ERR_PTR(err);
1091 		goto free_ops;
1092 	}
1093 
1094 	fence = ops_execute(vm, &vops);
1095 
1096 free_ops:
1097 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
1098 		list_del(&op->link);
1099 		kfree(op);
1100 	}
1101 	xe_vma_ops_fini(&vops);
1102 
1103 	return fence;
1104 }
1105 
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)1106 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
1107 					struct xe_svm_range *range)
1108 {
1109 	INIT_LIST_HEAD(&op->link);
1110 	op->tile_mask = range->tile_present;
1111 	op->base.op = DRM_GPUVA_OP_DRIVER;
1112 	op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
1113 	op->unmap_range.range = range;
1114 }
1115 
1116 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)1117 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
1118 			   struct xe_svm_range *range)
1119 {
1120 	struct xe_vma_op *op;
1121 
1122 	op = kzalloc(sizeof(*op), GFP_KERNEL);
1123 	if (!op)
1124 		return -ENOMEM;
1125 
1126 	xe_vm_populate_range_unbind(op, range);
1127 	list_add_tail(&op->link, &vops->list);
1128 	xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
1129 
1130 	return 0;
1131 }
1132 
1133 /**
1134  * xe_vm_range_unbind() - VM range unbind
1135  * @vm: The VM which the range belongs to.
1136  * @range: SVM range to rebind.
1137  *
1138  * Unbind SVM range removing the GPU page tables for the range.
1139  *
1140  * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
1141  * failure
1142  */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)1143 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
1144 				     struct xe_svm_range *range)
1145 {
1146 	struct dma_fence *fence = NULL;
1147 	struct xe_vma_ops vops;
1148 	struct xe_vma_op *op, *next_op;
1149 	struct xe_tile *tile;
1150 	u8 id;
1151 	int err;
1152 
1153 	lockdep_assert_held(&vm->lock);
1154 	xe_vm_assert_held(vm);
1155 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1156 
1157 	if (!range->tile_present)
1158 		return dma_fence_get_stub();
1159 
1160 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1161 	for_each_tile(tile, vm->xe, id) {
1162 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
1163 		vops.pt_update_ops[tile->id].q =
1164 			xe_tile_migrate_exec_queue(tile);
1165 	}
1166 
1167 	err = xe_vm_ops_add_range_unbind(&vops, range);
1168 	if (err)
1169 		return ERR_PTR(err);
1170 
1171 	err = xe_vma_ops_alloc(&vops, false);
1172 	if (err) {
1173 		fence = ERR_PTR(err);
1174 		goto free_ops;
1175 	}
1176 
1177 	fence = ops_execute(vm, &vops);
1178 
1179 free_ops:
1180 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
1181 		list_del(&op->link);
1182 		kfree(op);
1183 	}
1184 	xe_vma_ops_fini(&vops);
1185 
1186 	return fence;
1187 }
1188 
xe_vma_free(struct xe_vma * vma)1189 static void xe_vma_free(struct xe_vma *vma)
1190 {
1191 	if (xe_vma_is_userptr(vma))
1192 		kfree(to_userptr_vma(vma));
1193 	else
1194 		kfree(vma);
1195 }
1196 
1197 #define VMA_CREATE_FLAG_READ_ONLY		BIT(0)
1198 #define VMA_CREATE_FLAG_IS_NULL			BIT(1)
1199 #define VMA_CREATE_FLAG_DUMPABLE		BIT(2)
1200 #define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR	BIT(3)
1201 
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,u16 pat_index,unsigned int flags)1202 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1203 				    struct xe_bo *bo,
1204 				    u64 bo_offset_or_userptr,
1205 				    u64 start, u64 end,
1206 				    u16 pat_index, unsigned int flags)
1207 {
1208 	struct xe_vma *vma;
1209 	struct xe_tile *tile;
1210 	u8 id;
1211 	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
1212 	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
1213 	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
1214 	bool is_cpu_addr_mirror =
1215 		(flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
1216 
1217 	xe_assert(vm->xe, start < end);
1218 	xe_assert(vm->xe, end < vm->size);
1219 
1220 	/*
1221 	 * Allocate and ensure that the xe_vma_is_userptr() return
1222 	 * matches what was allocated.
1223 	 */
1224 	if (!bo && !is_null && !is_cpu_addr_mirror) {
1225 		struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
1226 
1227 		if (!uvma)
1228 			return ERR_PTR(-ENOMEM);
1229 
1230 		vma = &uvma->vma;
1231 	} else {
1232 		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
1233 		if (!vma)
1234 			return ERR_PTR(-ENOMEM);
1235 
1236 		if (is_cpu_addr_mirror)
1237 			vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
1238 		if (is_null)
1239 			vma->gpuva.flags |= DRM_GPUVA_SPARSE;
1240 		if (bo)
1241 			vma->gpuva.gem.obj = &bo->ttm.base;
1242 	}
1243 
1244 	INIT_LIST_HEAD(&vma->combined_links.rebind);
1245 
1246 	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1247 	vma->gpuva.vm = &vm->gpuvm;
1248 	vma->gpuva.va.addr = start;
1249 	vma->gpuva.va.range = end - start + 1;
1250 	if (read_only)
1251 		vma->gpuva.flags |= XE_VMA_READ_ONLY;
1252 	if (dumpable)
1253 		vma->gpuva.flags |= XE_VMA_DUMPABLE;
1254 
1255 	for_each_tile(tile, vm->xe, id)
1256 		vma->tile_mask |= 0x1 << id;
1257 
1258 	if (vm->xe->info.has_atomic_enable_pte_bit)
1259 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1260 
1261 	vma->pat_index = pat_index;
1262 
1263 	if (bo) {
1264 		struct drm_gpuvm_bo *vm_bo;
1265 
1266 		xe_bo_assert_held(bo);
1267 
1268 		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
1269 		if (IS_ERR(vm_bo)) {
1270 			xe_vma_free(vma);
1271 			return ERR_CAST(vm_bo);
1272 		}
1273 
1274 		drm_gpuvm_bo_extobj_add(vm_bo);
1275 		drm_gem_object_get(&bo->ttm.base);
1276 		vma->gpuva.gem.offset = bo_offset_or_userptr;
1277 		drm_gpuva_link(&vma->gpuva, vm_bo);
1278 		drm_gpuvm_bo_put(vm_bo);
1279 	} else /* userptr or null */ {
1280 		if (!is_null && !is_cpu_addr_mirror) {
1281 			struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
1282 			u64 size = end - start + 1;
1283 			int err;
1284 
1285 			INIT_LIST_HEAD(&userptr->invalidate_link);
1286 			INIT_LIST_HEAD(&userptr->repin_link);
1287 			vma->gpuva.gem.offset = bo_offset_or_userptr;
1288 			mutex_init(&userptr->unmap_mutex);
1289 
1290 			err = mmu_interval_notifier_insert(&userptr->notifier,
1291 							   current->mm,
1292 							   xe_vma_userptr(vma), size,
1293 							   &vma_userptr_notifier_ops);
1294 			if (err) {
1295 				xe_vma_free(vma);
1296 				return ERR_PTR(err);
1297 			}
1298 
1299 			userptr->notifier_seq = LONG_MAX;
1300 		}
1301 
1302 		xe_vm_get(vm);
1303 	}
1304 
1305 	return vma;
1306 }
1307 
xe_vma_destroy_late(struct xe_vma * vma)1308 static void xe_vma_destroy_late(struct xe_vma *vma)
1309 {
1310 	struct xe_vm *vm = xe_vma_vm(vma);
1311 
1312 	if (vma->ufence) {
1313 		xe_sync_ufence_put(vma->ufence);
1314 		vma->ufence = NULL;
1315 	}
1316 
1317 	if (xe_vma_is_userptr(vma)) {
1318 		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1319 		struct xe_userptr *userptr = &uvma->userptr;
1320 
1321 		if (userptr->sg)
1322 			xe_hmm_userptr_free_sg(uvma);
1323 
1324 		/*
1325 		 * Since userptr pages are not pinned, we can't remove
1326 		 * the notifier until we're sure the GPU is not accessing
1327 		 * them anymore
1328 		 */
1329 		mmu_interval_notifier_remove(&userptr->notifier);
1330 		mutex_destroy(&userptr->unmap_mutex);
1331 		xe_vm_put(vm);
1332 	} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1333 		xe_vm_put(vm);
1334 	} else {
1335 		xe_bo_put(xe_vma_bo(vma));
1336 	}
1337 
1338 	xe_vma_free(vma);
1339 }
1340 
vma_destroy_work_func(struct work_struct * w)1341 static void vma_destroy_work_func(struct work_struct *w)
1342 {
1343 	struct xe_vma *vma =
1344 		container_of(w, struct xe_vma, destroy_work);
1345 
1346 	xe_vma_destroy_late(vma);
1347 }
1348 
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1349 static void vma_destroy_cb(struct dma_fence *fence,
1350 			   struct dma_fence_cb *cb)
1351 {
1352 	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1353 
1354 	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1355 	queue_work(system_unbound_wq, &vma->destroy_work);
1356 }
1357 
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1358 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1359 {
1360 	struct xe_vm *vm = xe_vma_vm(vma);
1361 
1362 	lockdep_assert_held_write(&vm->lock);
1363 	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1364 
1365 	if (xe_vma_is_userptr(vma)) {
1366 		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1367 
1368 		spin_lock(&vm->userptr.invalidated_lock);
1369 		xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
1370 		list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
1371 		spin_unlock(&vm->userptr.invalidated_lock);
1372 	} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1373 		xe_bo_assert_held(xe_vma_bo(vma));
1374 
1375 		drm_gpuva_unlink(&vma->gpuva);
1376 	}
1377 
1378 	xe_vm_assert_held(vm);
1379 	if (fence) {
1380 		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1381 						 vma_destroy_cb);
1382 
1383 		if (ret) {
1384 			XE_WARN_ON(ret != -ENOENT);
1385 			xe_vma_destroy_late(vma);
1386 		}
1387 	} else {
1388 		xe_vma_destroy_late(vma);
1389 	}
1390 }
1391 
1392 /**
1393  * xe_vm_lock_vma() - drm_exec utility to lock a vma
1394  * @exec: The drm_exec object we're currently locking for.
1395  * @vma: The vma for witch we want to lock the vm resv and any attached
1396  * object's resv.
1397  *
1398  * Return: 0 on success, negative error code on error. In particular
1399  * may return -EDEADLK on WW transaction contention and -EINTR if
1400  * an interruptible wait is terminated by a signal.
1401  */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1402 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1403 {
1404 	struct xe_vm *vm = xe_vma_vm(vma);
1405 	struct xe_bo *bo = xe_vma_bo(vma);
1406 	int err;
1407 
1408 	XE_WARN_ON(!vm);
1409 
1410 	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1411 	if (!err && bo && !bo->vm)
1412 		err = drm_exec_lock_obj(exec, &bo->ttm.base);
1413 
1414 	return err;
1415 }
1416 
xe_vma_destroy_unlocked(struct xe_vma * vma)1417 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1418 {
1419 	struct drm_exec exec;
1420 	int err;
1421 
1422 	drm_exec_init(&exec, 0, 0);
1423 	drm_exec_until_all_locked(&exec) {
1424 		err = xe_vm_lock_vma(&exec, vma);
1425 		drm_exec_retry_on_contention(&exec);
1426 		if (XE_WARN_ON(err))
1427 			break;
1428 	}
1429 
1430 	xe_vma_destroy(vma, NULL);
1431 
1432 	drm_exec_fini(&exec);
1433 }
1434 
1435 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1436 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1437 {
1438 	struct drm_gpuva *gpuva;
1439 
1440 	lockdep_assert_held(&vm->lock);
1441 
1442 	if (xe_vm_is_closed_or_banned(vm))
1443 		return NULL;
1444 
1445 	xe_assert(vm->xe, start + range <= vm->size);
1446 
1447 	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1448 
1449 	return gpuva ? gpuva_to_vma(gpuva) : NULL;
1450 }
1451 
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1452 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1453 {
1454 	int err;
1455 
1456 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1457 	lockdep_assert_held(&vm->lock);
1458 
1459 	mutex_lock(&vm->snap_mutex);
1460 	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1461 	mutex_unlock(&vm->snap_mutex);
1462 	XE_WARN_ON(err);	/* Shouldn't be possible */
1463 
1464 	return err;
1465 }
1466 
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1467 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1468 {
1469 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1470 	lockdep_assert_held(&vm->lock);
1471 
1472 	mutex_lock(&vm->snap_mutex);
1473 	drm_gpuva_remove(&vma->gpuva);
1474 	mutex_unlock(&vm->snap_mutex);
1475 	if (vm->usm.last_fault_vma == vma)
1476 		vm->usm.last_fault_vma = NULL;
1477 }
1478 
xe_vm_op_alloc(void)1479 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1480 {
1481 	struct xe_vma_op *op;
1482 
1483 	op = kzalloc(sizeof(*op), GFP_KERNEL);
1484 
1485 	if (unlikely(!op))
1486 		return NULL;
1487 
1488 	return &op->base;
1489 }
1490 
1491 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1492 
1493 static const struct drm_gpuvm_ops gpuvm_ops = {
1494 	.op_alloc = xe_vm_op_alloc,
1495 	.vm_bo_validate = xe_gpuvm_validate,
1496 	.vm_free = xe_vm_free,
1497 };
1498 
pde_encode_pat_index(u16 pat_index)1499 static u64 pde_encode_pat_index(u16 pat_index)
1500 {
1501 	u64 pte = 0;
1502 
1503 	if (pat_index & BIT(0))
1504 		pte |= XE_PPGTT_PTE_PAT0;
1505 
1506 	if (pat_index & BIT(1))
1507 		pte |= XE_PPGTT_PTE_PAT1;
1508 
1509 	return pte;
1510 }
1511 
pte_encode_pat_index(u16 pat_index,u32 pt_level)1512 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1513 {
1514 	u64 pte = 0;
1515 
1516 	if (pat_index & BIT(0))
1517 		pte |= XE_PPGTT_PTE_PAT0;
1518 
1519 	if (pat_index & BIT(1))
1520 		pte |= XE_PPGTT_PTE_PAT1;
1521 
1522 	if (pat_index & BIT(2)) {
1523 		if (pt_level)
1524 			pte |= XE_PPGTT_PDE_PDPE_PAT2;
1525 		else
1526 			pte |= XE_PPGTT_PTE_PAT2;
1527 	}
1528 
1529 	if (pat_index & BIT(3))
1530 		pte |= XELPG_PPGTT_PTE_PAT3;
1531 
1532 	if (pat_index & (BIT(4)))
1533 		pte |= XE2_PPGTT_PTE_PAT4;
1534 
1535 	return pte;
1536 }
1537 
pte_encode_ps(u32 pt_level)1538 static u64 pte_encode_ps(u32 pt_level)
1539 {
1540 	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1541 
1542 	if (pt_level == 1)
1543 		return XE_PDE_PS_2M;
1544 	else if (pt_level == 2)
1545 		return XE_PDPE_PS_1G;
1546 
1547 	return 0;
1548 }
1549 
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset,const u16 pat_index)1550 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
1551 			      const u16 pat_index)
1552 {
1553 	u64 pde;
1554 
1555 	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1556 	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1557 	pde |= pde_encode_pat_index(pat_index);
1558 
1559 	return pde;
1560 }
1561 
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1562 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1563 			      u16 pat_index, u32 pt_level)
1564 {
1565 	u64 pte;
1566 
1567 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1568 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1569 	pte |= pte_encode_pat_index(pat_index, pt_level);
1570 	pte |= pte_encode_ps(pt_level);
1571 
1572 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1573 		pte |= XE_PPGTT_PTE_DM;
1574 
1575 	return pte;
1576 }
1577 
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1578 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1579 			       u16 pat_index, u32 pt_level)
1580 {
1581 	pte |= XE_PAGE_PRESENT;
1582 
1583 	if (likely(!xe_vma_read_only(vma)))
1584 		pte |= XE_PAGE_RW;
1585 
1586 	pte |= pte_encode_pat_index(pat_index, pt_level);
1587 	pte |= pte_encode_ps(pt_level);
1588 
1589 	if (unlikely(xe_vma_is_null(vma)))
1590 		pte |= XE_PTE_NULL;
1591 
1592 	return pte;
1593 }
1594 
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1595 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1596 				u16 pat_index,
1597 				u32 pt_level, bool devmem, u64 flags)
1598 {
1599 	u64 pte;
1600 
1601 	/* Avoid passing random bits directly as flags */
1602 	xe_assert(xe, !(flags & ~XE_PTE_PS64));
1603 
1604 	pte = addr;
1605 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1606 	pte |= pte_encode_pat_index(pat_index, pt_level);
1607 	pte |= pte_encode_ps(pt_level);
1608 
1609 	if (devmem)
1610 		pte |= XE_PPGTT_PTE_DM;
1611 
1612 	pte |= flags;
1613 
1614 	return pte;
1615 }
1616 
1617 static const struct xe_pt_ops xelp_pt_ops = {
1618 	.pte_encode_bo = xelp_pte_encode_bo,
1619 	.pte_encode_vma = xelp_pte_encode_vma,
1620 	.pte_encode_addr = xelp_pte_encode_addr,
1621 	.pde_encode_bo = xelp_pde_encode_bo,
1622 };
1623 
1624 static void vm_destroy_work_func(struct work_struct *w);
1625 
1626 /**
1627  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1628  * given tile and vm.
1629  * @xe: xe device.
1630  * @tile: tile to set up for.
1631  * @vm: vm to set up for.
1632  *
1633  * Sets up a pagetable tree with one page-table per level and a single
1634  * leaf PTE. All pagetable entries point to the single page-table or,
1635  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1636  * writes become NOPs.
1637  *
1638  * Return: 0 on success, negative error code on error.
1639  */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm)1640 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1641 				struct xe_vm *vm)
1642 {
1643 	u8 id = tile->id;
1644 	int i;
1645 
1646 	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1647 		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
1648 		if (IS_ERR(vm->scratch_pt[id][i])) {
1649 			int err = PTR_ERR(vm->scratch_pt[id][i]);
1650 
1651 			vm->scratch_pt[id][i] = NULL;
1652 			return err;
1653 		}
1654 
1655 		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1656 	}
1657 
1658 	return 0;
1659 }
1660 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1661 
xe_vm_free_scratch(struct xe_vm * vm)1662 static void xe_vm_free_scratch(struct xe_vm *vm)
1663 {
1664 	struct xe_tile *tile;
1665 	u8 id;
1666 
1667 	if (!xe_vm_has_scratch(vm))
1668 		return;
1669 
1670 	for_each_tile(tile, vm->xe, id) {
1671 		u32 i;
1672 
1673 		if (!vm->pt_root[id])
1674 			continue;
1675 
1676 		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1677 			if (vm->scratch_pt[id][i])
1678 				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1679 	}
1680 }
1681 
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1682 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1683 {
1684 	struct drm_gem_object *vm_resv_obj;
1685 	struct xe_vm *vm;
1686 	int err, number_tiles = 0;
1687 	struct xe_tile *tile;
1688 	u8 id;
1689 
1690 	/*
1691 	 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1692 	 * ever be in faulting mode.
1693 	 */
1694 	xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1695 
1696 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1697 	if (!vm)
1698 		return ERR_PTR(-ENOMEM);
1699 
1700 	vm->xe = xe;
1701 
1702 	vm->size = 1ull << xe->info.va_bits;
1703 	vm->flags = flags;
1704 
1705 	if (xef)
1706 		vm->xef = xe_file_get(xef);
1707 	/**
1708 	 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1709 	 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1710 	 * under a user-VM lock when the PXP session is started at exec_queue
1711 	 * creation time. Those are different VMs and therefore there is no risk
1712 	 * of deadlock, but we need to tell lockdep that this is the case or it
1713 	 * will print a warning.
1714 	 */
1715 	if (flags & XE_VM_FLAG_GSC) {
1716 		static struct lock_class_key gsc_vm_key;
1717 
1718 		__init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1719 	} else {
1720 		init_rwsem(&vm->lock);
1721 	}
1722 	mutex_init(&vm->snap_mutex);
1723 
1724 	INIT_LIST_HEAD(&vm->rebind_list);
1725 
1726 	INIT_LIST_HEAD(&vm->userptr.repin_list);
1727 	INIT_LIST_HEAD(&vm->userptr.invalidated);
1728 	init_rwsem(&vm->userptr.notifier_lock);
1729 	spin_lock_init(&vm->userptr.invalidated_lock);
1730 
1731 	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1732 
1733 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1734 
1735 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
1736 	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
1737 
1738 	for_each_tile(tile, xe, id)
1739 		xe_range_fence_tree_init(&vm->rftree[id]);
1740 
1741 	vm->pt_ops = &xelp_pt_ops;
1742 
1743 	/*
1744 	 * Long-running workloads are not protected by the scheduler references.
1745 	 * By design, run_job for long-running workloads returns NULL and the
1746 	 * scheduler drops all the references of it, hence protecting the VM
1747 	 * for this case is necessary.
1748 	 */
1749 	if (flags & XE_VM_FLAG_LR_MODE) {
1750 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1751 		xe_pm_runtime_get_noresume(xe);
1752 		INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1753 	}
1754 
1755 	if (flags & XE_VM_FLAG_FAULT_MODE) {
1756 		err = xe_svm_init(vm);
1757 		if (err)
1758 			goto err_no_resv;
1759 	}
1760 
1761 	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1762 	if (!vm_resv_obj) {
1763 		err = -ENOMEM;
1764 		goto err_svm_fini;
1765 	}
1766 
1767 	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1768 		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1769 
1770 	drm_gem_object_put(vm_resv_obj);
1771 
1772 	err = xe_vm_lock(vm, true);
1773 	if (err)
1774 		goto err_close;
1775 
1776 	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1777 		vm->flags |= XE_VM_FLAG_64K;
1778 
1779 	for_each_tile(tile, xe, id) {
1780 		if (flags & XE_VM_FLAG_MIGRATION &&
1781 		    tile->id != XE_VM_FLAG_TILE_ID(flags))
1782 			continue;
1783 
1784 		vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
1785 		if (IS_ERR(vm->pt_root[id])) {
1786 			err = PTR_ERR(vm->pt_root[id]);
1787 			vm->pt_root[id] = NULL;
1788 			goto err_unlock_close;
1789 		}
1790 	}
1791 
1792 	if (xe_vm_has_scratch(vm)) {
1793 		for_each_tile(tile, xe, id) {
1794 			if (!vm->pt_root[id])
1795 				continue;
1796 
1797 			err = xe_vm_create_scratch(xe, tile, vm);
1798 			if (err)
1799 				goto err_unlock_close;
1800 		}
1801 		vm->batch_invalidate_tlb = true;
1802 	}
1803 
1804 	if (vm->flags & XE_VM_FLAG_LR_MODE)
1805 		vm->batch_invalidate_tlb = false;
1806 
1807 	/* Fill pt_root after allocating scratch tables */
1808 	for_each_tile(tile, xe, id) {
1809 		if (!vm->pt_root[id])
1810 			continue;
1811 
1812 		xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1813 	}
1814 	xe_vm_unlock(vm);
1815 
1816 	/* Kernel migration VM shouldn't have a circular loop.. */
1817 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
1818 		for_each_tile(tile, xe, id) {
1819 			struct xe_exec_queue *q;
1820 			u32 create_flags = EXEC_QUEUE_FLAG_VM;
1821 
1822 			if (!vm->pt_root[id])
1823 				continue;
1824 
1825 			q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1826 			if (IS_ERR(q)) {
1827 				err = PTR_ERR(q);
1828 				goto err_close;
1829 			}
1830 			vm->q[id] = q;
1831 			number_tiles++;
1832 		}
1833 	}
1834 
1835 	if (number_tiles > 1)
1836 		vm->composite_fence_ctx = dma_fence_context_alloc(1);
1837 
1838 	if (xef && xe->info.has_asid) {
1839 		u32 asid;
1840 
1841 		down_write(&xe->usm.lock);
1842 		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1843 				      XA_LIMIT(1, XE_MAX_ASID - 1),
1844 				      &xe->usm.next_asid, GFP_KERNEL);
1845 		up_write(&xe->usm.lock);
1846 		if (err < 0)
1847 			goto err_unlock_close;
1848 
1849 		vm->usm.asid = asid;
1850 	}
1851 
1852 	trace_xe_vm_create(vm);
1853 
1854 	return vm;
1855 
1856 err_unlock_close:
1857 	xe_vm_unlock(vm);
1858 err_close:
1859 	xe_vm_close_and_put(vm);
1860 	return ERR_PTR(err);
1861 
1862 err_svm_fini:
1863 	if (flags & XE_VM_FLAG_FAULT_MODE) {
1864 		vm->size = 0; /* close the vm */
1865 		xe_svm_fini(vm);
1866 	}
1867 err_no_resv:
1868 	mutex_destroy(&vm->snap_mutex);
1869 	for_each_tile(tile, xe, id)
1870 		xe_range_fence_tree_fini(&vm->rftree[id]);
1871 	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1872 	if (vm->xef)
1873 		xe_file_put(vm->xef);
1874 	kfree(vm);
1875 	if (flags & XE_VM_FLAG_LR_MODE)
1876 		xe_pm_runtime_put(xe);
1877 	return ERR_PTR(err);
1878 }
1879 
xe_vm_close(struct xe_vm * vm)1880 static void xe_vm_close(struct xe_vm *vm)
1881 {
1882 	struct xe_device *xe = vm->xe;
1883 	bool bound;
1884 	int idx;
1885 
1886 	bound = drm_dev_enter(&xe->drm, &idx);
1887 
1888 	down_write(&vm->lock);
1889 	if (xe_vm_in_fault_mode(vm))
1890 		xe_svm_notifier_lock(vm);
1891 
1892 	vm->size = 0;
1893 
1894 	if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1895 		struct xe_tile *tile;
1896 		struct xe_gt *gt;
1897 		u8 id;
1898 
1899 		/* Wait for pending binds */
1900 		dma_resv_wait_timeout(xe_vm_resv(vm),
1901 				      DMA_RESV_USAGE_BOOKKEEP,
1902 				      false, MAX_SCHEDULE_TIMEOUT);
1903 
1904 		if (bound) {
1905 			for_each_tile(tile, xe, id)
1906 				if (vm->pt_root[id])
1907 					xe_pt_clear(xe, vm->pt_root[id]);
1908 
1909 			for_each_gt(gt, xe, id)
1910 				xe_gt_tlb_invalidation_vm(gt, vm);
1911 		}
1912 	}
1913 
1914 	if (xe_vm_in_fault_mode(vm))
1915 		xe_svm_notifier_unlock(vm);
1916 	up_write(&vm->lock);
1917 
1918 	if (bound)
1919 		drm_dev_exit(idx);
1920 }
1921 
xe_vm_close_and_put(struct xe_vm * vm)1922 void xe_vm_close_and_put(struct xe_vm *vm)
1923 {
1924 	LIST_HEAD(contested);
1925 	struct xe_device *xe = vm->xe;
1926 	struct xe_tile *tile;
1927 	struct xe_vma *vma, *next_vma;
1928 	struct drm_gpuva *gpuva, *next;
1929 	u8 id;
1930 
1931 	xe_assert(xe, !vm->preempt.num_exec_queues);
1932 
1933 	xe_vm_close(vm);
1934 	if (xe_vm_in_preempt_fence_mode(vm)) {
1935 		mutex_lock(&xe->rebind_resume_lock);
1936 		list_del_init(&vm->preempt.pm_activate_link);
1937 		mutex_unlock(&xe->rebind_resume_lock);
1938 		flush_work(&vm->preempt.rebind_work);
1939 	}
1940 	if (xe_vm_in_fault_mode(vm))
1941 		xe_svm_close(vm);
1942 
1943 	down_write(&vm->lock);
1944 	for_each_tile(tile, xe, id) {
1945 		if (vm->q[id])
1946 			xe_exec_queue_last_fence_put(vm->q[id], vm);
1947 	}
1948 	up_write(&vm->lock);
1949 
1950 	for_each_tile(tile, xe, id) {
1951 		if (vm->q[id]) {
1952 			xe_exec_queue_kill(vm->q[id]);
1953 			xe_exec_queue_put(vm->q[id]);
1954 			vm->q[id] = NULL;
1955 		}
1956 	}
1957 
1958 	down_write(&vm->lock);
1959 	xe_vm_lock(vm, false);
1960 	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1961 		vma = gpuva_to_vma(gpuva);
1962 
1963 		if (xe_vma_has_no_bo(vma)) {
1964 			down_read(&vm->userptr.notifier_lock);
1965 			vma->gpuva.flags |= XE_VMA_DESTROYED;
1966 			up_read(&vm->userptr.notifier_lock);
1967 		}
1968 
1969 		xe_vm_remove_vma(vm, vma);
1970 
1971 		/* easy case, remove from VMA? */
1972 		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1973 			list_del_init(&vma->combined_links.rebind);
1974 			xe_vma_destroy(vma, NULL);
1975 			continue;
1976 		}
1977 
1978 		list_move_tail(&vma->combined_links.destroy, &contested);
1979 		vma->gpuva.flags |= XE_VMA_DESTROYED;
1980 	}
1981 
1982 	/*
1983 	 * All vm operations will add shared fences to resv.
1984 	 * The only exception is eviction for a shared object,
1985 	 * but even so, the unbind when evicted would still
1986 	 * install a fence to resv. Hence it's safe to
1987 	 * destroy the pagetables immediately.
1988 	 */
1989 	xe_vm_free_scratch(vm);
1990 
1991 	for_each_tile(tile, xe, id) {
1992 		if (vm->pt_root[id]) {
1993 			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1994 			vm->pt_root[id] = NULL;
1995 		}
1996 	}
1997 	xe_vm_unlock(vm);
1998 
1999 	/*
2000 	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
2001 	 * Since we hold a refcount to the bo, we can remove and free
2002 	 * the members safely without locking.
2003 	 */
2004 	list_for_each_entry_safe(vma, next_vma, &contested,
2005 				 combined_links.destroy) {
2006 		list_del_init(&vma->combined_links.destroy);
2007 		xe_vma_destroy_unlocked(vma);
2008 	}
2009 
2010 	if (xe_vm_in_fault_mode(vm))
2011 		xe_svm_fini(vm);
2012 
2013 	up_write(&vm->lock);
2014 
2015 	down_write(&xe->usm.lock);
2016 	if (vm->usm.asid) {
2017 		void *lookup;
2018 
2019 		xe_assert(xe, xe->info.has_asid);
2020 		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
2021 
2022 		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
2023 		xe_assert(xe, lookup == vm);
2024 	}
2025 	up_write(&xe->usm.lock);
2026 
2027 	for_each_tile(tile, xe, id)
2028 		xe_range_fence_tree_fini(&vm->rftree[id]);
2029 
2030 	xe_vm_put(vm);
2031 }
2032 
vm_destroy_work_func(struct work_struct * w)2033 static void vm_destroy_work_func(struct work_struct *w)
2034 {
2035 	struct xe_vm *vm =
2036 		container_of(w, struct xe_vm, destroy_work);
2037 	struct xe_device *xe = vm->xe;
2038 	struct xe_tile *tile;
2039 	u8 id;
2040 
2041 	/* xe_vm_close_and_put was not called? */
2042 	xe_assert(xe, !vm->size);
2043 
2044 	if (xe_vm_in_preempt_fence_mode(vm))
2045 		flush_work(&vm->preempt.rebind_work);
2046 
2047 	mutex_destroy(&vm->snap_mutex);
2048 
2049 	if (vm->flags & XE_VM_FLAG_LR_MODE)
2050 		xe_pm_runtime_put(xe);
2051 
2052 	for_each_tile(tile, xe, id)
2053 		XE_WARN_ON(vm->pt_root[id]);
2054 
2055 	trace_xe_vm_free(vm);
2056 
2057 	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
2058 
2059 	if (vm->xef)
2060 		xe_file_put(vm->xef);
2061 
2062 	kfree(vm);
2063 }
2064 
xe_vm_free(struct drm_gpuvm * gpuvm)2065 static void xe_vm_free(struct drm_gpuvm *gpuvm)
2066 {
2067 	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
2068 
2069 	/* To destroy the VM we need to be able to sleep */
2070 	queue_work(system_unbound_wq, &vm->destroy_work);
2071 }
2072 
xe_vm_lookup(struct xe_file * xef,u32 id)2073 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
2074 {
2075 	struct xe_vm *vm;
2076 
2077 	mutex_lock(&xef->vm.lock);
2078 	vm = xa_load(&xef->vm.xa, id);
2079 	if (vm)
2080 		xe_vm_get(vm);
2081 	mutex_unlock(&xef->vm.lock);
2082 
2083 	return vm;
2084 }
2085 
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2086 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2087 {
2088 	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
2089 					 tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
2090 }
2091 
2092 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2093 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2094 {
2095 	return q ? q : vm->q[0];
2096 }
2097 
2098 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2099 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2100 {
2101 	unsigned int i;
2102 
2103 	for (i = 0; i < num_syncs; i++) {
2104 		struct xe_sync_entry *e = &syncs[i];
2105 
2106 		if (xe_sync_is_ufence(e))
2107 			return xe_sync_ufence_get(e);
2108 	}
2109 
2110 	return NULL;
2111 }
2112 
2113 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2114 				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2115 				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2116 
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2117 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2118 		       struct drm_file *file)
2119 {
2120 	struct xe_device *xe = to_xe_device(dev);
2121 	struct xe_file *xef = to_xe_file(file);
2122 	struct drm_xe_vm_create *args = data;
2123 	struct xe_vm *vm;
2124 	u32 id;
2125 	int err;
2126 	u32 flags = 0;
2127 
2128 	if (XE_IOCTL_DBG(xe, args->extensions))
2129 		return -EINVAL;
2130 
2131 	if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
2132 		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2133 
2134 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2135 			 !xe->info.has_usm))
2136 		return -EINVAL;
2137 
2138 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2139 		return -EINVAL;
2140 
2141 	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2142 		return -EINVAL;
2143 
2144 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2145 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2146 			 !xe->info.needs_scratch))
2147 		return -EINVAL;
2148 
2149 	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2150 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2151 		return -EINVAL;
2152 
2153 	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2154 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
2155 	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2156 		flags |= XE_VM_FLAG_LR_MODE;
2157 	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2158 		flags |= XE_VM_FLAG_FAULT_MODE;
2159 
2160 	vm = xe_vm_create(xe, flags, xef);
2161 	if (IS_ERR(vm))
2162 		return PTR_ERR(vm);
2163 
2164 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2165 	/* Warning: Security issue - never enable by default */
2166 	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2167 #endif
2168 
2169 	/* user id alloc must always be last in ioctl to prevent UAF */
2170 	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2171 	if (err)
2172 		goto err_close_and_put;
2173 
2174 	args->vm_id = id;
2175 
2176 	return 0;
2177 
2178 err_close_and_put:
2179 	xe_vm_close_and_put(vm);
2180 
2181 	return err;
2182 }
2183 
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2184 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2185 			struct drm_file *file)
2186 {
2187 	struct xe_device *xe = to_xe_device(dev);
2188 	struct xe_file *xef = to_xe_file(file);
2189 	struct drm_xe_vm_destroy *args = data;
2190 	struct xe_vm *vm;
2191 	int err = 0;
2192 
2193 	if (XE_IOCTL_DBG(xe, args->pad) ||
2194 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2195 		return -EINVAL;
2196 
2197 	mutex_lock(&xef->vm.lock);
2198 	vm = xa_load(&xef->vm.xa, args->vm_id);
2199 	if (XE_IOCTL_DBG(xe, !vm))
2200 		err = -ENOENT;
2201 	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2202 		err = -EBUSY;
2203 	else
2204 		xa_erase(&xef->vm.xa, args->vm_id);
2205 	mutex_unlock(&xef->vm.lock);
2206 
2207 	if (!err)
2208 		xe_vm_close_and_put(vm);
2209 
2210 	return err;
2211 }
2212 
vma_matches(struct xe_vma * vma,u64 page_addr)2213 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2214 {
2215 	if (page_addr > xe_vma_end(vma) - 1 ||
2216 	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
2217 		return false;
2218 
2219 	return true;
2220 }
2221 
2222 /**
2223  * xe_vm_find_vma_by_addr() - Find a VMA by its address
2224  *
2225  * @vm: the xe_vm the vma belongs to
2226  * @page_addr: address to look up
2227  */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2228 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2229 {
2230 	struct xe_vma *vma = NULL;
2231 
2232 	if (vm->usm.last_fault_vma) {   /* Fast lookup */
2233 		if (vma_matches(vm->usm.last_fault_vma, page_addr))
2234 			vma = vm->usm.last_fault_vma;
2235 	}
2236 	if (!vma)
2237 		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2238 
2239 	return vma;
2240 }
2241 
2242 static const u32 region_to_mem_type[] = {
2243 	XE_PL_TT,
2244 	XE_PL_VRAM0,
2245 	XE_PL_VRAM1,
2246 };
2247 
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2248 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2249 			     bool post_commit)
2250 {
2251 	down_read(&vm->userptr.notifier_lock);
2252 	vma->gpuva.flags |= XE_VMA_DESTROYED;
2253 	up_read(&vm->userptr.notifier_lock);
2254 	if (post_commit)
2255 		xe_vm_remove_vma(vm, vma);
2256 }
2257 
2258 #undef ULL
2259 #define ULL	unsigned long long
2260 
2261 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2262 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2263 {
2264 	struct xe_vma *vma;
2265 
2266 	switch (op->op) {
2267 	case DRM_GPUVA_OP_MAP:
2268 		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2269 		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
2270 		break;
2271 	case DRM_GPUVA_OP_REMAP:
2272 		vma = gpuva_to_vma(op->remap.unmap->va);
2273 		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2274 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2275 		       op->remap.unmap->keep ? 1 : 0);
2276 		if (op->remap.prev)
2277 			vm_dbg(&xe->drm,
2278 			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2279 			       (ULL)op->remap.prev->va.addr,
2280 			       (ULL)op->remap.prev->va.range);
2281 		if (op->remap.next)
2282 			vm_dbg(&xe->drm,
2283 			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2284 			       (ULL)op->remap.next->va.addr,
2285 			       (ULL)op->remap.next->va.range);
2286 		break;
2287 	case DRM_GPUVA_OP_UNMAP:
2288 		vma = gpuva_to_vma(op->unmap.va);
2289 		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2290 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2291 		       op->unmap.keep ? 1 : 0);
2292 		break;
2293 	case DRM_GPUVA_OP_PREFETCH:
2294 		vma = gpuva_to_vma(op->prefetch.va);
2295 		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2296 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2297 		break;
2298 	default:
2299 		drm_warn(&xe->drm, "NOT POSSIBLE");
2300 	}
2301 }
2302 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2303 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2304 {
2305 }
2306 #endif
2307 
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2308 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2309 {
2310 	if (!xe_vm_in_fault_mode(vm))
2311 		return false;
2312 
2313 	if (!xe_vm_has_scratch(vm))
2314 		return false;
2315 
2316 	if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2317 		return false;
2318 
2319 	return true;
2320 }
2321 
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2322 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2323 {
2324 	struct drm_gpuva_op *__op;
2325 
2326 	drm_gpuva_for_each_op(__op, ops) {
2327 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2328 
2329 		xe_vma_svm_prefetch_op_fini(op);
2330 	}
2331 }
2332 
2333 /*
2334  * Create operations list from IOCTL arguments, setup operations fields so parse
2335  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2336  */
2337 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2338 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2339 			 struct xe_bo *bo, u64 bo_offset_or_userptr,
2340 			 u64 addr, u64 range,
2341 			 u32 operation, u32 flags,
2342 			 u32 prefetch_region, u16 pat_index)
2343 {
2344 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2345 	struct drm_gpuva_ops *ops;
2346 	struct drm_gpuva_op *__op;
2347 	struct drm_gpuvm_bo *vm_bo;
2348 	u64 range_end = addr + range;
2349 	int err;
2350 
2351 	lockdep_assert_held_write(&vm->lock);
2352 
2353 	vm_dbg(&vm->xe->drm,
2354 	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2355 	       operation, (ULL)addr, (ULL)range,
2356 	       (ULL)bo_offset_or_userptr);
2357 
2358 	switch (operation) {
2359 	case DRM_XE_VM_BIND_OP_MAP:
2360 	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
2361 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
2362 						  obj, bo_offset_or_userptr);
2363 		break;
2364 	case DRM_XE_VM_BIND_OP_UNMAP:
2365 		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2366 		break;
2367 	case DRM_XE_VM_BIND_OP_PREFETCH:
2368 		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2369 		break;
2370 	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2371 		xe_assert(vm->xe, bo);
2372 
2373 		err = xe_bo_lock(bo, true);
2374 		if (err)
2375 			return ERR_PTR(err);
2376 
2377 		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2378 		if (IS_ERR(vm_bo)) {
2379 			xe_bo_unlock(bo);
2380 			return ERR_CAST(vm_bo);
2381 		}
2382 
2383 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2384 		drm_gpuvm_bo_put(vm_bo);
2385 		xe_bo_unlock(bo);
2386 		break;
2387 	default:
2388 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2389 		ops = ERR_PTR(-EINVAL);
2390 	}
2391 	if (IS_ERR(ops))
2392 		return ops;
2393 
2394 	drm_gpuva_for_each_op(__op, ops) {
2395 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2396 
2397 		if (__op->op == DRM_GPUVA_OP_MAP) {
2398 			op->map.immediate =
2399 				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2400 			op->map.read_only =
2401 				flags & DRM_XE_VM_BIND_FLAG_READONLY;
2402 			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
2403 			op->map.is_cpu_addr_mirror = flags &
2404 				DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
2405 			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
2406 			op->map.pat_index = pat_index;
2407 			op->map.invalidate_on_bind =
2408 				__xe_vm_needs_clear_scratch_pages(vm, flags);
2409 		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2410 			struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2411 			struct xe_svm_range *svm_range;
2412 			struct drm_gpusvm_ctx ctx = {};
2413 			struct xe_tile *tile;
2414 			u8 id, tile_mask = 0;
2415 			u32 i;
2416 
2417 			if (!xe_vma_is_cpu_addr_mirror(vma)) {
2418 				op->prefetch.region = prefetch_region;
2419 				break;
2420 			}
2421 
2422 			ctx.read_only = xe_vma_read_only(vma);
2423 			ctx.devmem_possible = IS_DGFX(vm->xe) &&
2424 					      IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2425 
2426 			for_each_tile(tile, vm->xe, id)
2427 				tile_mask |= 0x1 << id;
2428 
2429 			xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2430 			op->prefetch_range.region = prefetch_region;
2431 			op->prefetch_range.ranges_count = 0;
2432 alloc_next_range:
2433 			svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2434 
2435 			if (PTR_ERR(svm_range) == -ENOENT) {
2436 				u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2437 
2438 				addr = ret == ULONG_MAX ? 0 : ret;
2439 				if (addr)
2440 					goto alloc_next_range;
2441 				else
2442 					goto print_op_label;
2443 			}
2444 
2445 			if (IS_ERR(svm_range)) {
2446 				err = PTR_ERR(svm_range);
2447 				goto unwind_prefetch_ops;
2448 			}
2449 
2450 			if (xe_svm_range_validate(vm, svm_range, tile_mask, !!prefetch_region)) {
2451 				xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2452 				goto check_next_range;
2453 			}
2454 
2455 			err = xa_alloc(&op->prefetch_range.range,
2456 				       &i, svm_range, xa_limit_32b,
2457 				       GFP_KERNEL);
2458 
2459 			if (err)
2460 				goto unwind_prefetch_ops;
2461 
2462 			op->prefetch_range.ranges_count++;
2463 			vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2464 			xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2465 check_next_range:
2466 			if (range_end > xe_svm_range_end(svm_range) &&
2467 			    xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2468 				addr = xe_svm_range_end(svm_range);
2469 				goto alloc_next_range;
2470 			}
2471 		}
2472 print_op_label:
2473 		print_op(vm->xe, __op);
2474 	}
2475 
2476 	return ops;
2477 
2478 unwind_prefetch_ops:
2479 	xe_svm_prefetch_gpuva_ops_fini(ops);
2480 	drm_gpuva_ops_free(&vm->gpuvm, ops);
2481 	return ERR_PTR(err);
2482 }
2483 
2484 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2485 
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,u16 pat_index,unsigned int flags)2486 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2487 			      u16 pat_index, unsigned int flags)
2488 {
2489 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2490 	struct drm_exec exec;
2491 	struct xe_vma *vma;
2492 	int err = 0;
2493 
2494 	lockdep_assert_held_write(&vm->lock);
2495 
2496 	if (bo) {
2497 		drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
2498 		drm_exec_until_all_locked(&exec) {
2499 			err = 0;
2500 			if (!bo->vm) {
2501 				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2502 				drm_exec_retry_on_contention(&exec);
2503 			}
2504 			if (!err) {
2505 				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2506 				drm_exec_retry_on_contention(&exec);
2507 			}
2508 			if (err) {
2509 				drm_exec_fini(&exec);
2510 				return ERR_PTR(err);
2511 			}
2512 		}
2513 	}
2514 	vma = xe_vma_create(vm, bo, op->gem.offset,
2515 			    op->va.addr, op->va.addr +
2516 			    op->va.range - 1, pat_index, flags);
2517 	if (IS_ERR(vma))
2518 		goto err_unlock;
2519 
2520 	if (xe_vma_is_userptr(vma))
2521 		err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2522 	else if (!xe_vma_has_no_bo(vma) && !bo->vm)
2523 		err = add_preempt_fences(vm, bo);
2524 
2525 err_unlock:
2526 	if (bo)
2527 		drm_exec_fini(&exec);
2528 
2529 	if (err) {
2530 		prep_vma_destroy(vm, vma, false);
2531 		xe_vma_destroy_unlocked(vma);
2532 		vma = ERR_PTR(err);
2533 	}
2534 
2535 	return vma;
2536 }
2537 
xe_vma_max_pte_size(struct xe_vma * vma)2538 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2539 {
2540 	if (vma->gpuva.flags & XE_VMA_PTE_1G)
2541 		return SZ_1G;
2542 	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2543 		return SZ_2M;
2544 	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2545 		return SZ_64K;
2546 	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2547 		return SZ_4K;
2548 
2549 	return SZ_1G;	/* Uninitialized, used max size */
2550 }
2551 
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2552 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2553 {
2554 	switch (size) {
2555 	case SZ_1G:
2556 		vma->gpuva.flags |= XE_VMA_PTE_1G;
2557 		break;
2558 	case SZ_2M:
2559 		vma->gpuva.flags |= XE_VMA_PTE_2M;
2560 		break;
2561 	case SZ_64K:
2562 		vma->gpuva.flags |= XE_VMA_PTE_64K;
2563 		break;
2564 	case SZ_4K:
2565 		vma->gpuva.flags |= XE_VMA_PTE_4K;
2566 		break;
2567 	}
2568 }
2569 
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2570 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2571 {
2572 	int err = 0;
2573 
2574 	lockdep_assert_held_write(&vm->lock);
2575 
2576 	switch (op->base.op) {
2577 	case DRM_GPUVA_OP_MAP:
2578 		err |= xe_vm_insert_vma(vm, op->map.vma);
2579 		if (!err)
2580 			op->flags |= XE_VMA_OP_COMMITTED;
2581 		break;
2582 	case DRM_GPUVA_OP_REMAP:
2583 	{
2584 		u8 tile_present =
2585 			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2586 
2587 		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2588 				 true);
2589 		op->flags |= XE_VMA_OP_COMMITTED;
2590 
2591 		if (op->remap.prev) {
2592 			err |= xe_vm_insert_vma(vm, op->remap.prev);
2593 			if (!err)
2594 				op->flags |= XE_VMA_OP_PREV_COMMITTED;
2595 			if (!err && op->remap.skip_prev) {
2596 				op->remap.prev->tile_present =
2597 					tile_present;
2598 				op->remap.prev = NULL;
2599 			}
2600 		}
2601 		if (op->remap.next) {
2602 			err |= xe_vm_insert_vma(vm, op->remap.next);
2603 			if (!err)
2604 				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2605 			if (!err && op->remap.skip_next) {
2606 				op->remap.next->tile_present =
2607 					tile_present;
2608 				op->remap.next = NULL;
2609 			}
2610 		}
2611 
2612 		/* Adjust for partial unbind after removing VMA from VM */
2613 		if (!err) {
2614 			op->base.remap.unmap->va->va.addr = op->remap.start;
2615 			op->base.remap.unmap->va->va.range = op->remap.range;
2616 		}
2617 		break;
2618 	}
2619 	case DRM_GPUVA_OP_UNMAP:
2620 		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2621 		op->flags |= XE_VMA_OP_COMMITTED;
2622 		break;
2623 	case DRM_GPUVA_OP_PREFETCH:
2624 		op->flags |= XE_VMA_OP_COMMITTED;
2625 		break;
2626 	default:
2627 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2628 	}
2629 
2630 	return err;
2631 }
2632 
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2633 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2634 				   struct xe_vma_ops *vops)
2635 {
2636 	struct xe_device *xe = vm->xe;
2637 	struct drm_gpuva_op *__op;
2638 	struct xe_tile *tile;
2639 	u8 id, tile_mask = 0;
2640 	int err = 0;
2641 
2642 	lockdep_assert_held_write(&vm->lock);
2643 
2644 	for_each_tile(tile, vm->xe, id)
2645 		tile_mask |= 0x1 << id;
2646 
2647 	drm_gpuva_for_each_op(__op, ops) {
2648 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2649 		struct xe_vma *vma;
2650 		unsigned int flags = 0;
2651 
2652 		INIT_LIST_HEAD(&op->link);
2653 		list_add_tail(&op->link, &vops->list);
2654 		op->tile_mask = tile_mask;
2655 
2656 		switch (op->base.op) {
2657 		case DRM_GPUVA_OP_MAP:
2658 		{
2659 			flags |= op->map.read_only ?
2660 				VMA_CREATE_FLAG_READ_ONLY : 0;
2661 			flags |= op->map.is_null ?
2662 				VMA_CREATE_FLAG_IS_NULL : 0;
2663 			flags |= op->map.dumpable ?
2664 				VMA_CREATE_FLAG_DUMPABLE : 0;
2665 			flags |= op->map.is_cpu_addr_mirror ?
2666 				VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2667 
2668 			vma = new_vma(vm, &op->base.map, op->map.pat_index,
2669 				      flags);
2670 			if (IS_ERR(vma))
2671 				return PTR_ERR(vma);
2672 
2673 			op->map.vma = vma;
2674 			if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2675 			     !op->map.is_cpu_addr_mirror) ||
2676 			    op->map.invalidate_on_bind)
2677 				xe_vma_ops_incr_pt_update_ops(vops,
2678 							      op->tile_mask, 1);
2679 			break;
2680 		}
2681 		case DRM_GPUVA_OP_REMAP:
2682 		{
2683 			struct xe_vma *old =
2684 				gpuva_to_vma(op->base.remap.unmap->va);
2685 			bool skip = xe_vma_is_cpu_addr_mirror(old);
2686 			u64 start = xe_vma_start(old), end = xe_vma_end(old);
2687 			int num_remap_ops = 0;
2688 
2689 			if (op->base.remap.prev)
2690 				start = op->base.remap.prev->va.addr +
2691 					op->base.remap.prev->va.range;
2692 			if (op->base.remap.next)
2693 				end = op->base.remap.next->va.addr;
2694 
2695 			if (xe_vma_is_cpu_addr_mirror(old) &&
2696 			    xe_svm_has_mapping(vm, start, end))
2697 				return -EBUSY;
2698 
2699 			op->remap.start = xe_vma_start(old);
2700 			op->remap.range = xe_vma_size(old);
2701 
2702 			flags |= op->base.remap.unmap->va->flags &
2703 				XE_VMA_READ_ONLY ?
2704 				VMA_CREATE_FLAG_READ_ONLY : 0;
2705 			flags |= op->base.remap.unmap->va->flags &
2706 				DRM_GPUVA_SPARSE ?
2707 				VMA_CREATE_FLAG_IS_NULL : 0;
2708 			flags |= op->base.remap.unmap->va->flags &
2709 				XE_VMA_DUMPABLE ?
2710 				VMA_CREATE_FLAG_DUMPABLE : 0;
2711 			flags |= xe_vma_is_cpu_addr_mirror(old) ?
2712 				VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
2713 
2714 			if (op->base.remap.prev) {
2715 				vma = new_vma(vm, op->base.remap.prev,
2716 					      old->pat_index, flags);
2717 				if (IS_ERR(vma))
2718 					return PTR_ERR(vma);
2719 
2720 				op->remap.prev = vma;
2721 
2722 				/*
2723 				 * Userptr creates a new SG mapping so
2724 				 * we must also rebind.
2725 				 */
2726 				op->remap.skip_prev = skip ||
2727 					(!xe_vma_is_userptr(old) &&
2728 					IS_ALIGNED(xe_vma_end(vma),
2729 						   xe_vma_max_pte_size(old)));
2730 				if (op->remap.skip_prev) {
2731 					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2732 					op->remap.range -=
2733 						xe_vma_end(vma) -
2734 						xe_vma_start(old);
2735 					op->remap.start = xe_vma_end(vma);
2736 					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2737 					       (ULL)op->remap.start,
2738 					       (ULL)op->remap.range);
2739 				} else {
2740 					num_remap_ops++;
2741 				}
2742 			}
2743 
2744 			if (op->base.remap.next) {
2745 				vma = new_vma(vm, op->base.remap.next,
2746 					      old->pat_index, flags);
2747 				if (IS_ERR(vma))
2748 					return PTR_ERR(vma);
2749 
2750 				op->remap.next = vma;
2751 
2752 				/*
2753 				 * Userptr creates a new SG mapping so
2754 				 * we must also rebind.
2755 				 */
2756 				op->remap.skip_next = skip ||
2757 					(!xe_vma_is_userptr(old) &&
2758 					IS_ALIGNED(xe_vma_start(vma),
2759 						   xe_vma_max_pte_size(old)));
2760 				if (op->remap.skip_next) {
2761 					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2762 					op->remap.range -=
2763 						xe_vma_end(old) -
2764 						xe_vma_start(vma);
2765 					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2766 					       (ULL)op->remap.start,
2767 					       (ULL)op->remap.range);
2768 				} else {
2769 					num_remap_ops++;
2770 				}
2771 			}
2772 			if (!skip)
2773 				num_remap_ops++;
2774 
2775 			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2776 			break;
2777 		}
2778 		case DRM_GPUVA_OP_UNMAP:
2779 			vma = gpuva_to_vma(op->base.unmap.va);
2780 
2781 			if (xe_vma_is_cpu_addr_mirror(vma) &&
2782 			    xe_svm_has_mapping(vm, xe_vma_start(vma),
2783 					       xe_vma_end(vma)))
2784 				return -EBUSY;
2785 
2786 			if (!xe_vma_is_cpu_addr_mirror(vma))
2787 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2788 			break;
2789 		case DRM_GPUVA_OP_PREFETCH:
2790 			vma = gpuva_to_vma(op->base.prefetch.va);
2791 
2792 			if (xe_vma_is_userptr(vma)) {
2793 				err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2794 				if (err)
2795 					return err;
2796 			}
2797 
2798 			if (xe_vma_is_cpu_addr_mirror(vma))
2799 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2800 							      op->prefetch_range.ranges_count);
2801 			else
2802 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2803 
2804 			break;
2805 		default:
2806 			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2807 		}
2808 
2809 		err = xe_vma_op_commit(vm, op);
2810 		if (err)
2811 			return err;
2812 	}
2813 
2814 	return 0;
2815 }
2816 
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2817 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2818 			     bool post_commit, bool prev_post_commit,
2819 			     bool next_post_commit)
2820 {
2821 	lockdep_assert_held_write(&vm->lock);
2822 
2823 	switch (op->base.op) {
2824 	case DRM_GPUVA_OP_MAP:
2825 		if (op->map.vma) {
2826 			prep_vma_destroy(vm, op->map.vma, post_commit);
2827 			xe_vma_destroy_unlocked(op->map.vma);
2828 		}
2829 		break;
2830 	case DRM_GPUVA_OP_UNMAP:
2831 	{
2832 		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2833 
2834 		if (vma) {
2835 			down_read(&vm->userptr.notifier_lock);
2836 			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2837 			up_read(&vm->userptr.notifier_lock);
2838 			if (post_commit)
2839 				xe_vm_insert_vma(vm, vma);
2840 		}
2841 		break;
2842 	}
2843 	case DRM_GPUVA_OP_REMAP:
2844 	{
2845 		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2846 
2847 		if (op->remap.prev) {
2848 			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2849 			xe_vma_destroy_unlocked(op->remap.prev);
2850 		}
2851 		if (op->remap.next) {
2852 			prep_vma_destroy(vm, op->remap.next, next_post_commit);
2853 			xe_vma_destroy_unlocked(op->remap.next);
2854 		}
2855 		if (vma) {
2856 			down_read(&vm->userptr.notifier_lock);
2857 			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2858 			up_read(&vm->userptr.notifier_lock);
2859 			if (post_commit)
2860 				xe_vm_insert_vma(vm, vma);
2861 		}
2862 		break;
2863 	}
2864 	case DRM_GPUVA_OP_PREFETCH:
2865 		/* Nothing to do */
2866 		break;
2867 	default:
2868 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2869 	}
2870 }
2871 
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2872 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2873 				     struct drm_gpuva_ops **ops,
2874 				     int num_ops_list)
2875 {
2876 	int i;
2877 
2878 	for (i = num_ops_list - 1; i >= 0; --i) {
2879 		struct drm_gpuva_ops *__ops = ops[i];
2880 		struct drm_gpuva_op *__op;
2881 
2882 		if (!__ops)
2883 			continue;
2884 
2885 		drm_gpuva_for_each_op_reverse(__op, __ops) {
2886 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2887 
2888 			xe_vma_op_unwind(vm, op,
2889 					 op->flags & XE_VMA_OP_COMMITTED,
2890 					 op->flags & XE_VMA_OP_PREV_COMMITTED,
2891 					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2892 		}
2893 	}
2894 }
2895 
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool validate)2896 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2897 				 bool validate)
2898 {
2899 	struct xe_bo *bo = xe_vma_bo(vma);
2900 	struct xe_vm *vm = xe_vma_vm(vma);
2901 	int err = 0;
2902 
2903 	if (bo) {
2904 		if (!bo->vm)
2905 			err = drm_exec_lock_obj(exec, &bo->ttm.base);
2906 		if (!err && validate)
2907 			err = xe_bo_validate(bo, vm,
2908 					     !xe_vm_in_preempt_fence_mode(vm));
2909 	}
2910 
2911 	return err;
2912 }
2913 
check_ufence(struct xe_vma * vma)2914 static int check_ufence(struct xe_vma *vma)
2915 {
2916 	if (vma->ufence) {
2917 		struct xe_user_fence * const f = vma->ufence;
2918 
2919 		if (!xe_sync_ufence_get_status(f))
2920 			return -EBUSY;
2921 
2922 		vma->ufence = NULL;
2923 		xe_sync_ufence_put(f);
2924 	}
2925 
2926 	return 0;
2927 }
2928 
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2929 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2930 {
2931 	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2932 	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2933 	int err = 0;
2934 
2935 	struct xe_svm_range *svm_range;
2936 	struct drm_gpusvm_ctx ctx = {};
2937 	struct xe_tile *tile;
2938 	unsigned long i;
2939 	u32 region;
2940 
2941 	if (!xe_vma_is_cpu_addr_mirror(vma))
2942 		return 0;
2943 
2944 	region = op->prefetch_range.region;
2945 
2946 	ctx.read_only = xe_vma_read_only(vma);
2947 	ctx.devmem_possible = devmem_possible;
2948 	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2949 
2950 	/* TODO: Threading the migration */
2951 	xa_for_each(&op->prefetch_range.range, i, svm_range) {
2952 		if (!region)
2953 			xe_svm_range_migrate_to_smem(vm, svm_range);
2954 
2955 		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
2956 			tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
2957 			err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2958 			if (err) {
2959 				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2960 					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2961 				return -ENODATA;
2962 			}
2963 			xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2964 		}
2965 
2966 		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2967 		if (err) {
2968 			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2969 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2970 			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2971 				err = -ENODATA;
2972 			return err;
2973 		}
2974 		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2975 	}
2976 
2977 	return err;
2978 }
2979 
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_op * op)2980 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2981 			    struct xe_vma_op *op)
2982 {
2983 	int err = 0;
2984 
2985 	switch (op->base.op) {
2986 	case DRM_GPUVA_OP_MAP:
2987 		if (!op->map.invalidate_on_bind)
2988 			err = vma_lock_and_validate(exec, op->map.vma,
2989 						    !xe_vm_in_fault_mode(vm) ||
2990 						    op->map.immediate);
2991 		break;
2992 	case DRM_GPUVA_OP_REMAP:
2993 		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2994 		if (err)
2995 			break;
2996 
2997 		err = vma_lock_and_validate(exec,
2998 					    gpuva_to_vma(op->base.remap.unmap->va),
2999 					    false);
3000 		if (!err && op->remap.prev)
3001 			err = vma_lock_and_validate(exec, op->remap.prev, true);
3002 		if (!err && op->remap.next)
3003 			err = vma_lock_and_validate(exec, op->remap.next, true);
3004 		break;
3005 	case DRM_GPUVA_OP_UNMAP:
3006 		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3007 		if (err)
3008 			break;
3009 
3010 		err = vma_lock_and_validate(exec,
3011 					    gpuva_to_vma(op->base.unmap.va),
3012 					    false);
3013 		break;
3014 	case DRM_GPUVA_OP_PREFETCH:
3015 	{
3016 		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3017 		u32 region;
3018 
3019 		if (xe_vma_is_cpu_addr_mirror(vma))
3020 			region = op->prefetch_range.region;
3021 		else
3022 			region = op->prefetch.region;
3023 
3024 		xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
3025 
3026 		err = vma_lock_and_validate(exec,
3027 					    gpuva_to_vma(op->base.prefetch.va),
3028 					    false);
3029 		if (!err && !xe_vma_has_no_bo(vma))
3030 			err = xe_bo_migrate(xe_vma_bo(vma),
3031 					    region_to_mem_type[region]);
3032 		break;
3033 	}
3034 	default:
3035 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3036 	}
3037 
3038 	return err;
3039 }
3040 
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3041 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3042 {
3043 	struct xe_vma_op *op;
3044 	int err;
3045 
3046 	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3047 		return 0;
3048 
3049 	list_for_each_entry(op, &vops->list, link) {
3050 		if (op->base.op  == DRM_GPUVA_OP_PREFETCH) {
3051 			err = prefetch_ranges(vm, op);
3052 			if (err)
3053 				return err;
3054 		}
3055 	}
3056 
3057 	return 0;
3058 }
3059 
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3060 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3061 					   struct xe_vm *vm,
3062 					   struct xe_vma_ops *vops)
3063 {
3064 	struct xe_vma_op *op;
3065 	int err;
3066 
3067 	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3068 	if (err)
3069 		return err;
3070 
3071 	list_for_each_entry(op, &vops->list, link) {
3072 		err = op_lock_and_prep(exec, vm, op);
3073 		if (err)
3074 			return err;
3075 	}
3076 
3077 #ifdef TEST_VM_OPS_ERROR
3078 	if (vops->inject_error &&
3079 	    vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3080 		return -ENOSPC;
3081 #endif
3082 
3083 	return 0;
3084 }
3085 
op_trace(struct xe_vma_op * op)3086 static void op_trace(struct xe_vma_op *op)
3087 {
3088 	switch (op->base.op) {
3089 	case DRM_GPUVA_OP_MAP:
3090 		trace_xe_vma_bind(op->map.vma);
3091 		break;
3092 	case DRM_GPUVA_OP_REMAP:
3093 		trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3094 		if (op->remap.prev)
3095 			trace_xe_vma_bind(op->remap.prev);
3096 		if (op->remap.next)
3097 			trace_xe_vma_bind(op->remap.next);
3098 		break;
3099 	case DRM_GPUVA_OP_UNMAP:
3100 		trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3101 		break;
3102 	case DRM_GPUVA_OP_PREFETCH:
3103 		trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3104 		break;
3105 	case DRM_GPUVA_OP_DRIVER:
3106 		break;
3107 	default:
3108 		XE_WARN_ON("NOT POSSIBLE");
3109 	}
3110 }
3111 
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3112 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3113 {
3114 	struct xe_vma_op *op;
3115 
3116 	list_for_each_entry(op, &vops->list, link)
3117 		op_trace(op);
3118 }
3119 
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3120 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3121 {
3122 	struct xe_exec_queue *q = vops->q;
3123 	struct xe_tile *tile;
3124 	int number_tiles = 0;
3125 	u8 id;
3126 
3127 	for_each_tile(tile, vm->xe, id) {
3128 		if (vops->pt_update_ops[id].num_ops)
3129 			++number_tiles;
3130 
3131 		if (vops->pt_update_ops[id].q)
3132 			continue;
3133 
3134 		if (q) {
3135 			vops->pt_update_ops[id].q = q;
3136 			if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3137 				q = list_next_entry(q, multi_gt_list);
3138 		} else {
3139 			vops->pt_update_ops[id].q = vm->q[id];
3140 		}
3141 	}
3142 
3143 	return number_tiles;
3144 }
3145 
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3146 static struct dma_fence *ops_execute(struct xe_vm *vm,
3147 				     struct xe_vma_ops *vops)
3148 {
3149 	struct xe_tile *tile;
3150 	struct dma_fence *fence = NULL;
3151 	struct dma_fence **fences = NULL;
3152 	struct dma_fence_array *cf = NULL;
3153 	int number_tiles = 0, current_fence = 0, err;
3154 	u8 id;
3155 
3156 	number_tiles = vm_ops_setup_tile_args(vm, vops);
3157 	if (number_tiles == 0)
3158 		return ERR_PTR(-ENODATA);
3159 
3160 	if (number_tiles > 1) {
3161 		fences = kmalloc_array(number_tiles, sizeof(*fences),
3162 				       GFP_KERNEL);
3163 		if (!fences) {
3164 			fence = ERR_PTR(-ENOMEM);
3165 			goto err_trace;
3166 		}
3167 	}
3168 
3169 	for_each_tile(tile, vm->xe, id) {
3170 		if (!vops->pt_update_ops[id].num_ops)
3171 			continue;
3172 
3173 		err = xe_pt_update_ops_prepare(tile, vops);
3174 		if (err) {
3175 			fence = ERR_PTR(err);
3176 			goto err_out;
3177 		}
3178 	}
3179 
3180 	trace_xe_vm_ops_execute(vops);
3181 
3182 	for_each_tile(tile, vm->xe, id) {
3183 		if (!vops->pt_update_ops[id].num_ops)
3184 			continue;
3185 
3186 		fence = xe_pt_update_ops_run(tile, vops);
3187 		if (IS_ERR(fence))
3188 			goto err_out;
3189 
3190 		if (fences)
3191 			fences[current_fence++] = fence;
3192 	}
3193 
3194 	if (fences) {
3195 		cf = dma_fence_array_create(number_tiles, fences,
3196 					    vm->composite_fence_ctx,
3197 					    vm->composite_fence_seqno++,
3198 					    false);
3199 		if (!cf) {
3200 			--vm->composite_fence_seqno;
3201 			fence = ERR_PTR(-ENOMEM);
3202 			goto err_out;
3203 		}
3204 		fence = &cf->base;
3205 	}
3206 
3207 	for_each_tile(tile, vm->xe, id) {
3208 		if (!vops->pt_update_ops[id].num_ops)
3209 			continue;
3210 
3211 		xe_pt_update_ops_fini(tile, vops);
3212 	}
3213 
3214 	return fence;
3215 
3216 err_out:
3217 	for_each_tile(tile, vm->xe, id) {
3218 		if (!vops->pt_update_ops[id].num_ops)
3219 			continue;
3220 
3221 		xe_pt_update_ops_abort(tile, vops);
3222 	}
3223 	while (current_fence)
3224 		dma_fence_put(fences[--current_fence]);
3225 	kfree(fences);
3226 	kfree(cf);
3227 
3228 err_trace:
3229 	trace_xe_vm_ops_fail(vm);
3230 	return fence;
3231 }
3232 
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3233 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3234 {
3235 	if (vma->ufence)
3236 		xe_sync_ufence_put(vma->ufence);
3237 	vma->ufence = __xe_sync_ufence_get(ufence);
3238 }
3239 
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3240 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3241 			  struct xe_user_fence *ufence)
3242 {
3243 	switch (op->base.op) {
3244 	case DRM_GPUVA_OP_MAP:
3245 		vma_add_ufence(op->map.vma, ufence);
3246 		break;
3247 	case DRM_GPUVA_OP_REMAP:
3248 		if (op->remap.prev)
3249 			vma_add_ufence(op->remap.prev, ufence);
3250 		if (op->remap.next)
3251 			vma_add_ufence(op->remap.next, ufence);
3252 		break;
3253 	case DRM_GPUVA_OP_UNMAP:
3254 		break;
3255 	case DRM_GPUVA_OP_PREFETCH:
3256 		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3257 		break;
3258 	default:
3259 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3260 	}
3261 }
3262 
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3263 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3264 				   struct dma_fence *fence)
3265 {
3266 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3267 	struct xe_user_fence *ufence;
3268 	struct xe_vma_op *op;
3269 	int i;
3270 
3271 	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3272 	list_for_each_entry(op, &vops->list, link) {
3273 		if (ufence)
3274 			op_add_ufence(vm, op, ufence);
3275 
3276 		if (op->base.op == DRM_GPUVA_OP_UNMAP)
3277 			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3278 		else if (op->base.op == DRM_GPUVA_OP_REMAP)
3279 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3280 				       fence);
3281 	}
3282 	if (ufence)
3283 		xe_sync_ufence_put(ufence);
3284 	if (fence) {
3285 		for (i = 0; i < vops->num_syncs; i++)
3286 			xe_sync_entry_signal(vops->syncs + i, fence);
3287 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3288 	}
3289 }
3290 
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3291 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3292 						   struct xe_vma_ops *vops)
3293 {
3294 	struct drm_exec exec;
3295 	struct dma_fence *fence;
3296 	int err;
3297 
3298 	lockdep_assert_held_write(&vm->lock);
3299 
3300 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
3301 		      DRM_EXEC_IGNORE_DUPLICATES, 0);
3302 	drm_exec_until_all_locked(&exec) {
3303 		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3304 		drm_exec_retry_on_contention(&exec);
3305 		if (err) {
3306 			fence = ERR_PTR(err);
3307 			goto unlock;
3308 		}
3309 
3310 		fence = ops_execute(vm, vops);
3311 		if (IS_ERR(fence)) {
3312 			if (PTR_ERR(fence) == -ENODATA)
3313 				vm_bind_ioctl_ops_fini(vm, vops, NULL);
3314 			goto unlock;
3315 		}
3316 
3317 		vm_bind_ioctl_ops_fini(vm, vops, fence);
3318 	}
3319 
3320 unlock:
3321 	drm_exec_fini(&exec);
3322 	return fence;
3323 }
3324 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3325 
3326 #define SUPPORTED_FLAGS_STUB  \
3327 	(DRM_XE_VM_BIND_FLAG_READONLY | \
3328 	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3329 	 DRM_XE_VM_BIND_FLAG_NULL | \
3330 	 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3331 	 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3332 	 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
3333 
3334 #ifdef TEST_VM_OPS_ERROR
3335 #define SUPPORTED_FLAGS	(SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3336 #else
3337 #define SUPPORTED_FLAGS	SUPPORTED_FLAGS_STUB
3338 #endif
3339 
3340 #define XE_64K_PAGE_MASK 0xffffull
3341 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3342 
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3343 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3344 				    struct drm_xe_vm_bind *args,
3345 				    struct drm_xe_vm_bind_op **bind_ops)
3346 {
3347 	int err;
3348 	int i;
3349 
3350 	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3351 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3352 		return -EINVAL;
3353 
3354 	if (XE_IOCTL_DBG(xe, args->extensions))
3355 		return -EINVAL;
3356 
3357 	if (args->num_binds > 1) {
3358 		u64 __user *bind_user =
3359 			u64_to_user_ptr(args->vector_of_binds);
3360 
3361 		*bind_ops = kvmalloc_array(args->num_binds,
3362 					   sizeof(struct drm_xe_vm_bind_op),
3363 					   GFP_KERNEL | __GFP_ACCOUNT |
3364 					   __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3365 		if (!*bind_ops)
3366 			return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3367 
3368 		err = copy_from_user(*bind_ops, bind_user,
3369 				     sizeof(struct drm_xe_vm_bind_op) *
3370 				     args->num_binds);
3371 		if (XE_IOCTL_DBG(xe, err)) {
3372 			err = -EFAULT;
3373 			goto free_bind_ops;
3374 		}
3375 	} else {
3376 		*bind_ops = &args->bind;
3377 	}
3378 
3379 	for (i = 0; i < args->num_binds; ++i) {
3380 		u64 range = (*bind_ops)[i].range;
3381 		u64 addr = (*bind_ops)[i].addr;
3382 		u32 op = (*bind_ops)[i].op;
3383 		u32 flags = (*bind_ops)[i].flags;
3384 		u32 obj = (*bind_ops)[i].obj;
3385 		u64 obj_offset = (*bind_ops)[i].obj_offset;
3386 		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3387 		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3388 		bool is_cpu_addr_mirror = flags &
3389 			DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3390 		u16 pat_index = (*bind_ops)[i].pat_index;
3391 		u16 coh_mode;
3392 
3393 		if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3394 				 (!xe_vm_in_fault_mode(vm) ||
3395 				 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3396 			err = -EINVAL;
3397 			goto free_bind_ops;
3398 		}
3399 
3400 		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3401 			err = -EINVAL;
3402 			goto free_bind_ops;
3403 		}
3404 
3405 		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3406 		(*bind_ops)[i].pat_index = pat_index;
3407 		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3408 		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3409 			err = -EINVAL;
3410 			goto free_bind_ops;
3411 		}
3412 
3413 		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3414 			err = -EINVAL;
3415 			goto free_bind_ops;
3416 		}
3417 
3418 		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3419 		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3420 		    XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3421 		    XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3422 						    is_cpu_addr_mirror)) ||
3423 		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3424 				 (is_null || is_cpu_addr_mirror)) ||
3425 		    XE_IOCTL_DBG(xe, !obj &&
3426 				 op == DRM_XE_VM_BIND_OP_MAP &&
3427 				 !is_null && !is_cpu_addr_mirror) ||
3428 		    XE_IOCTL_DBG(xe, !obj &&
3429 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3430 		    XE_IOCTL_DBG(xe, addr &&
3431 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3432 		    XE_IOCTL_DBG(xe, range &&
3433 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3434 		    XE_IOCTL_DBG(xe, obj &&
3435 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3436 		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3437 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3438 		    XE_IOCTL_DBG(xe, obj &&
3439 				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3440 		    XE_IOCTL_DBG(xe, prefetch_region &&
3441 				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3442 		    XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
3443 				       xe->info.mem_region_mask)) ||
3444 		    XE_IOCTL_DBG(xe, obj &&
3445 				 op == DRM_XE_VM_BIND_OP_UNMAP)) {
3446 			err = -EINVAL;
3447 			goto free_bind_ops;
3448 		}
3449 
3450 		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3451 		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3452 		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3453 		    XE_IOCTL_DBG(xe, !range &&
3454 				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3455 			err = -EINVAL;
3456 			goto free_bind_ops;
3457 		}
3458 	}
3459 
3460 	return 0;
3461 
3462 free_bind_ops:
3463 	if (args->num_binds > 1)
3464 		kvfree(*bind_ops);
3465 	*bind_ops = NULL;
3466 	return err;
3467 }
3468 
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3469 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3470 				       struct xe_exec_queue *q,
3471 				       struct xe_sync_entry *syncs,
3472 				       int num_syncs)
3473 {
3474 	struct dma_fence *fence;
3475 	int i, err = 0;
3476 
3477 	fence = xe_sync_in_fence_get(syncs, num_syncs,
3478 				     to_wait_exec_queue(vm, q), vm);
3479 	if (IS_ERR(fence))
3480 		return PTR_ERR(fence);
3481 
3482 	for (i = 0; i < num_syncs; i++)
3483 		xe_sync_entry_signal(&syncs[i], fence);
3484 
3485 	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3486 				     fence);
3487 	dma_fence_put(fence);
3488 
3489 	return err;
3490 }
3491 
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3492 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3493 			    struct xe_exec_queue *q,
3494 			    struct xe_sync_entry *syncs, u32 num_syncs)
3495 {
3496 	memset(vops, 0, sizeof(*vops));
3497 	INIT_LIST_HEAD(&vops->list);
3498 	vops->vm = vm;
3499 	vops->q = q;
3500 	vops->syncs = syncs;
3501 	vops->num_syncs = num_syncs;
3502 	vops->flags = 0;
3503 }
3504 
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3505 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3506 					u64 addr, u64 range, u64 obj_offset,
3507 					u16 pat_index, u32 op, u32 bind_flags)
3508 {
3509 	u16 coh_mode;
3510 
3511 	if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3512 	    XE_IOCTL_DBG(xe, obj_offset >
3513 			 xe_bo_size(bo) - range)) {
3514 		return -EINVAL;
3515 	}
3516 
3517 	/*
3518 	 * Some platforms require 64k VM_BIND alignment,
3519 	 * specifically those with XE_VRAM_FLAGS_NEED64K.
3520 	 *
3521 	 * Other platforms may have BO's set to 64k physical placement,
3522 	 * but can be mapped at 4k offsets anyway. This check is only
3523 	 * there for the former case.
3524 	 */
3525 	if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3526 	    (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3527 		if (XE_IOCTL_DBG(xe, obj_offset &
3528 				 XE_64K_PAGE_MASK) ||
3529 		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3530 		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3531 			return -EINVAL;
3532 		}
3533 	}
3534 
3535 	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3536 	if (bo->cpu_caching) {
3537 		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3538 				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3539 			return -EINVAL;
3540 		}
3541 	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3542 		/*
3543 		 * Imported dma-buf from a different device should
3544 		 * require 1way or 2way coherency since we don't know
3545 		 * how it was mapped on the CPU. Just assume is it
3546 		 * potentially cached on CPU side.
3547 		 */
3548 		return -EINVAL;
3549 	}
3550 
3551 	/* If a BO is protected it can only be mapped if the key is still valid */
3552 	if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3553 	    op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3554 		if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3555 			return -ENOEXEC;
3556 
3557 	return 0;
3558 }
3559 
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3560 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3561 {
3562 	struct xe_device *xe = to_xe_device(dev);
3563 	struct xe_file *xef = to_xe_file(file);
3564 	struct drm_xe_vm_bind *args = data;
3565 	struct drm_xe_sync __user *syncs_user;
3566 	struct xe_bo **bos = NULL;
3567 	struct drm_gpuva_ops **ops = NULL;
3568 	struct xe_vm *vm;
3569 	struct xe_exec_queue *q = NULL;
3570 	u32 num_syncs, num_ufence = 0;
3571 	struct xe_sync_entry *syncs = NULL;
3572 	struct drm_xe_vm_bind_op *bind_ops = NULL;
3573 	struct xe_vma_ops vops;
3574 	struct dma_fence *fence;
3575 	int err;
3576 	int i;
3577 
3578 	vm = xe_vm_lookup(xef, args->vm_id);
3579 	if (XE_IOCTL_DBG(xe, !vm))
3580 		return -EINVAL;
3581 
3582 	err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3583 	if (err)
3584 		goto put_vm;
3585 
3586 	if (args->exec_queue_id) {
3587 		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3588 		if (XE_IOCTL_DBG(xe, !q)) {
3589 			err = -ENOENT;
3590 			goto put_vm;
3591 		}
3592 
3593 		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3594 			err = -EINVAL;
3595 			goto put_exec_queue;
3596 		}
3597 	}
3598 
3599 	/* Ensure all UNMAPs visible */
3600 	xe_svm_flush(vm);
3601 
3602 	err = down_write_killable(&vm->lock);
3603 	if (err)
3604 		goto put_exec_queue;
3605 
3606 	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3607 		err = -ENOENT;
3608 		goto release_vm_lock;
3609 	}
3610 
3611 	for (i = 0; i < args->num_binds; ++i) {
3612 		u64 range = bind_ops[i].range;
3613 		u64 addr = bind_ops[i].addr;
3614 
3615 		if (XE_IOCTL_DBG(xe, range > vm->size) ||
3616 		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3617 			err = -EINVAL;
3618 			goto release_vm_lock;
3619 		}
3620 	}
3621 
3622 	if (args->num_binds) {
3623 		bos = kvcalloc(args->num_binds, sizeof(*bos),
3624 			       GFP_KERNEL | __GFP_ACCOUNT |
3625 			       __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3626 		if (!bos) {
3627 			err = -ENOMEM;
3628 			goto release_vm_lock;
3629 		}
3630 
3631 		ops = kvcalloc(args->num_binds, sizeof(*ops),
3632 			       GFP_KERNEL | __GFP_ACCOUNT |
3633 			       __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3634 		if (!ops) {
3635 			err = -ENOMEM;
3636 			goto release_vm_lock;
3637 		}
3638 	}
3639 
3640 	for (i = 0; i < args->num_binds; ++i) {
3641 		struct drm_gem_object *gem_obj;
3642 		u64 range = bind_ops[i].range;
3643 		u64 addr = bind_ops[i].addr;
3644 		u32 obj = bind_ops[i].obj;
3645 		u64 obj_offset = bind_ops[i].obj_offset;
3646 		u16 pat_index = bind_ops[i].pat_index;
3647 		u32 op = bind_ops[i].op;
3648 		u32 bind_flags = bind_ops[i].flags;
3649 
3650 		if (!obj)
3651 			continue;
3652 
3653 		gem_obj = drm_gem_object_lookup(file, obj);
3654 		if (XE_IOCTL_DBG(xe, !gem_obj)) {
3655 			err = -ENOENT;
3656 			goto put_obj;
3657 		}
3658 		bos[i] = gem_to_xe_bo(gem_obj);
3659 
3660 		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3661 						   obj_offset, pat_index, op,
3662 						   bind_flags);
3663 		if (err)
3664 			goto put_obj;
3665 	}
3666 
3667 	if (args->num_syncs) {
3668 		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3669 		if (!syncs) {
3670 			err = -ENOMEM;
3671 			goto put_obj;
3672 		}
3673 	}
3674 
3675 	syncs_user = u64_to_user_ptr(args->syncs);
3676 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3677 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3678 					  &syncs_user[num_syncs],
3679 					  (xe_vm_in_lr_mode(vm) ?
3680 					   SYNC_PARSE_FLAG_LR_MODE : 0) |
3681 					  (!args->num_binds ?
3682 					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3683 		if (err)
3684 			goto free_syncs;
3685 
3686 		if (xe_sync_is_ufence(&syncs[num_syncs]))
3687 			num_ufence++;
3688 	}
3689 
3690 	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3691 		err = -EINVAL;
3692 		goto free_syncs;
3693 	}
3694 
3695 	if (!args->num_binds) {
3696 		err = -ENODATA;
3697 		goto free_syncs;
3698 	}
3699 
3700 	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3701 	for (i = 0; i < args->num_binds; ++i) {
3702 		u64 range = bind_ops[i].range;
3703 		u64 addr = bind_ops[i].addr;
3704 		u32 op = bind_ops[i].op;
3705 		u32 flags = bind_ops[i].flags;
3706 		u64 obj_offset = bind_ops[i].obj_offset;
3707 		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3708 		u16 pat_index = bind_ops[i].pat_index;
3709 
3710 		ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3711 						  addr, range, op, flags,
3712 						  prefetch_region, pat_index);
3713 		if (IS_ERR(ops[i])) {
3714 			err = PTR_ERR(ops[i]);
3715 			ops[i] = NULL;
3716 			goto unwind_ops;
3717 		}
3718 
3719 		err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3720 		if (err)
3721 			goto unwind_ops;
3722 
3723 #ifdef TEST_VM_OPS_ERROR
3724 		if (flags & FORCE_OP_ERROR) {
3725 			vops.inject_error = true;
3726 			vm->xe->vm_inject_error_position =
3727 				(vm->xe->vm_inject_error_position + 1) %
3728 				FORCE_OP_ERROR_COUNT;
3729 		}
3730 #endif
3731 	}
3732 
3733 	/* Nothing to do */
3734 	if (list_empty(&vops.list)) {
3735 		err = -ENODATA;
3736 		goto unwind_ops;
3737 	}
3738 
3739 	err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3740 	if (err)
3741 		goto unwind_ops;
3742 
3743 	err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3744 	if (err)
3745 		goto unwind_ops;
3746 
3747 	fence = vm_bind_ioctl_ops_execute(vm, &vops);
3748 	if (IS_ERR(fence))
3749 		err = PTR_ERR(fence);
3750 	else
3751 		dma_fence_put(fence);
3752 
3753 unwind_ops:
3754 	if (err && err != -ENODATA)
3755 		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3756 	xe_vma_ops_fini(&vops);
3757 	for (i = args->num_binds - 1; i >= 0; --i)
3758 		if (ops[i])
3759 			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3760 free_syncs:
3761 	if (err == -ENODATA)
3762 		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3763 	while (num_syncs--)
3764 		xe_sync_entry_cleanup(&syncs[num_syncs]);
3765 
3766 	kfree(syncs);
3767 put_obj:
3768 	for (i = 0; i < args->num_binds; ++i)
3769 		xe_bo_put(bos[i]);
3770 release_vm_lock:
3771 	up_write(&vm->lock);
3772 put_exec_queue:
3773 	if (q)
3774 		xe_exec_queue_put(q);
3775 put_vm:
3776 	xe_vm_put(vm);
3777 	kvfree(bos);
3778 	kvfree(ops);
3779 	if (args->num_binds > 1)
3780 		kvfree(bind_ops);
3781 	return err;
3782 }
3783 
3784 /**
3785  * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3786  * @vm: VM to bind the BO to
3787  * @bo: BO to bind
3788  * @q: exec queue to use for the bind (optional)
3789  * @addr: address at which to bind the BO
3790  * @cache_lvl: PAT cache level to use
3791  *
3792  * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3793  * kernel-owned VM.
3794  *
3795  * Returns a dma_fence to track the binding completion if the job to do so was
3796  * successfully submitted, an error pointer otherwise.
3797  */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3798 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3799 				       struct xe_exec_queue *q, u64 addr,
3800 				       enum xe_cache_level cache_lvl)
3801 {
3802 	struct xe_vma_ops vops;
3803 	struct drm_gpuva_ops *ops = NULL;
3804 	struct dma_fence *fence;
3805 	int err;
3806 
3807 	xe_bo_get(bo);
3808 	xe_vm_get(vm);
3809 	if (q)
3810 		xe_exec_queue_get(q);
3811 
3812 	down_write(&vm->lock);
3813 
3814 	xe_vma_ops_init(&vops, vm, q, NULL, 0);
3815 
3816 	ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3817 				       DRM_XE_VM_BIND_OP_MAP, 0, 0,
3818 				       vm->xe->pat.idx[cache_lvl]);
3819 	if (IS_ERR(ops)) {
3820 		err = PTR_ERR(ops);
3821 		goto release_vm_lock;
3822 	}
3823 
3824 	err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3825 	if (err)
3826 		goto release_vm_lock;
3827 
3828 	xe_assert(vm->xe, !list_empty(&vops.list));
3829 
3830 	err = xe_vma_ops_alloc(&vops, false);
3831 	if (err)
3832 		goto unwind_ops;
3833 
3834 	fence = vm_bind_ioctl_ops_execute(vm, &vops);
3835 	if (IS_ERR(fence))
3836 		err = PTR_ERR(fence);
3837 
3838 unwind_ops:
3839 	if (err && err != -ENODATA)
3840 		vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3841 
3842 	xe_vma_ops_fini(&vops);
3843 	drm_gpuva_ops_free(&vm->gpuvm, ops);
3844 
3845 release_vm_lock:
3846 	up_write(&vm->lock);
3847 
3848 	if (q)
3849 		xe_exec_queue_put(q);
3850 	xe_vm_put(vm);
3851 	xe_bo_put(bo);
3852 
3853 	if (err)
3854 		fence = ERR_PTR(err);
3855 
3856 	return fence;
3857 }
3858 
3859 /**
3860  * xe_vm_lock() - Lock the vm's dma_resv object
3861  * @vm: The struct xe_vm whose lock is to be locked
3862  * @intr: Whether to perform any wait interruptible
3863  *
3864  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3865  * contended lock was interrupted. If @intr is false, the function
3866  * always returns 0.
3867  */
xe_vm_lock(struct xe_vm * vm,bool intr)3868 int xe_vm_lock(struct xe_vm *vm, bool intr)
3869 {
3870 	if (intr)
3871 		return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3872 
3873 	return dma_resv_lock(xe_vm_resv(vm), NULL);
3874 }
3875 
3876 /**
3877  * xe_vm_unlock() - Unlock the vm's dma_resv object
3878  * @vm: The struct xe_vm whose lock is to be released.
3879  *
3880  * Unlock a buffer object lock that was locked by xe_vm_lock().
3881  */
xe_vm_unlock(struct xe_vm * vm)3882 void xe_vm_unlock(struct xe_vm *vm)
3883 {
3884 	dma_resv_unlock(xe_vm_resv(vm));
3885 }
3886 
3887 /**
3888  * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an
3889  * address range
3890  * @vm: The VM
3891  * @start: start address
3892  * @end: end address
3893  * @tile_mask: mask for which gt's issue tlb invalidation
3894  *
3895  * Issue a range based TLB invalidation for gt's in tilemask
3896  *
3897  * Returns 0 for success, negative error code otherwise.
3898  */
xe_vm_range_tilemask_tlb_invalidation(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3899 int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start,
3900 					  u64 end, u8 tile_mask)
3901 {
3902 	struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3903 	struct xe_tile *tile;
3904 	u32 fence_id = 0;
3905 	u8 id;
3906 	int err;
3907 
3908 	if (!tile_mask)
3909 		return 0;
3910 
3911 	for_each_tile(tile, vm->xe, id) {
3912 		if (tile_mask & BIT(id)) {
3913 			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
3914 							  &fence[fence_id], true);
3915 
3916 			err = xe_gt_tlb_invalidation_range(tile->primary_gt,
3917 							   &fence[fence_id],
3918 							   start,
3919 							   end,
3920 							   vm->usm.asid);
3921 			if (err)
3922 				goto wait;
3923 			++fence_id;
3924 
3925 			if (!tile->media_gt)
3926 				continue;
3927 
3928 			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
3929 							  &fence[fence_id], true);
3930 
3931 			err = xe_gt_tlb_invalidation_range(tile->media_gt,
3932 							   &fence[fence_id],
3933 							   start,
3934 							   end,
3935 							   vm->usm.asid);
3936 			if (err)
3937 				goto wait;
3938 			++fence_id;
3939 		}
3940 	}
3941 
3942 wait:
3943 	for (id = 0; id < fence_id; ++id)
3944 		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
3945 
3946 	return err;
3947 }
3948 
3949 /**
3950  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3951  * @vma: VMA to invalidate
3952  *
3953  * Walks a list of page tables leaves which it memset the entries owned by this
3954  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3955  * complete.
3956  *
3957  * Returns 0 for success, negative error code otherwise.
3958  */
xe_vm_invalidate_vma(struct xe_vma * vma)3959 int xe_vm_invalidate_vma(struct xe_vma *vma)
3960 {
3961 	struct xe_device *xe = xe_vma_vm(vma)->xe;
3962 	struct xe_vm *vm = xe_vma_vm(vma);
3963 	struct xe_tile *tile;
3964 	u8 tile_mask = 0;
3965 	int ret = 0;
3966 	u8 id;
3967 
3968 	xe_assert(xe, !xe_vma_is_null(vma));
3969 	xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3970 	trace_xe_vma_invalidate(vma);
3971 
3972 	vm_dbg(&vm->xe->drm,
3973 	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3974 		xe_vma_start(vma), xe_vma_size(vma));
3975 
3976 	/*
3977 	 * Check that we don't race with page-table updates, tile_invalidated
3978 	 * update is safe
3979 	 */
3980 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3981 		if (xe_vma_is_userptr(vma)) {
3982 			lockdep_assert(lockdep_is_held_type(&vm->userptr.notifier_lock, 0) ||
3983 				       (lockdep_is_held_type(&vm->userptr.notifier_lock, 1) &&
3984 					lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3985 
3986 			WARN_ON_ONCE(!mmu_interval_check_retry
3987 				     (&to_userptr_vma(vma)->userptr.notifier,
3988 				      to_userptr_vma(vma)->userptr.notifier_seq));
3989 			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3990 							     DMA_RESV_USAGE_BOOKKEEP));
3991 
3992 		} else {
3993 			xe_bo_assert_held(xe_vma_bo(vma));
3994 		}
3995 	}
3996 
3997 	for_each_tile(tile, xe, id)
3998 		if (xe_pt_zap_ptes(tile, vma))
3999 			tile_mask |= BIT(id);
4000 
4001 	xe_device_wmb(xe);
4002 
4003 	ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma),
4004 						    xe_vma_end(vma), tile_mask);
4005 
4006 	/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4007 	WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4008 
4009 	return ret;
4010 }
4011 
xe_vm_validate_protected(struct xe_vm * vm)4012 int xe_vm_validate_protected(struct xe_vm *vm)
4013 {
4014 	struct drm_gpuva *gpuva;
4015 	int err = 0;
4016 
4017 	if (!vm)
4018 		return -ENODEV;
4019 
4020 	mutex_lock(&vm->snap_mutex);
4021 
4022 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4023 		struct xe_vma *vma = gpuva_to_vma(gpuva);
4024 		struct xe_bo *bo = vma->gpuva.gem.obj ?
4025 			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4026 
4027 		if (!bo)
4028 			continue;
4029 
4030 		if (xe_bo_is_protected(bo)) {
4031 			err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4032 			if (err)
4033 				break;
4034 		}
4035 	}
4036 
4037 	mutex_unlock(&vm->snap_mutex);
4038 	return err;
4039 }
4040 
4041 struct xe_vm_snapshot {
4042 	unsigned long num_snaps;
4043 	struct {
4044 		u64 ofs, bo_ofs;
4045 		unsigned long len;
4046 		struct xe_bo *bo;
4047 		void *data;
4048 		struct mm_struct *mm;
4049 	} snap[];
4050 };
4051 
xe_vm_snapshot_capture(struct xe_vm * vm)4052 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4053 {
4054 	unsigned long num_snaps = 0, i;
4055 	struct xe_vm_snapshot *snap = NULL;
4056 	struct drm_gpuva *gpuva;
4057 
4058 	if (!vm)
4059 		return NULL;
4060 
4061 	mutex_lock(&vm->snap_mutex);
4062 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4063 		if (gpuva->flags & XE_VMA_DUMPABLE)
4064 			num_snaps++;
4065 	}
4066 
4067 	if (num_snaps)
4068 		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4069 	if (!snap) {
4070 		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4071 		goto out_unlock;
4072 	}
4073 
4074 	snap->num_snaps = num_snaps;
4075 	i = 0;
4076 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4077 		struct xe_vma *vma = gpuva_to_vma(gpuva);
4078 		struct xe_bo *bo = vma->gpuva.gem.obj ?
4079 			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4080 
4081 		if (!(gpuva->flags & XE_VMA_DUMPABLE))
4082 			continue;
4083 
4084 		snap->snap[i].ofs = xe_vma_start(vma);
4085 		snap->snap[i].len = xe_vma_size(vma);
4086 		if (bo) {
4087 			snap->snap[i].bo = xe_bo_get(bo);
4088 			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4089 		} else if (xe_vma_is_userptr(vma)) {
4090 			struct mm_struct *mm =
4091 				to_userptr_vma(vma)->userptr.notifier.mm;
4092 
4093 			if (mmget_not_zero(mm))
4094 				snap->snap[i].mm = mm;
4095 			else
4096 				snap->snap[i].data = ERR_PTR(-EFAULT);
4097 
4098 			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4099 		} else {
4100 			snap->snap[i].data = ERR_PTR(-ENOENT);
4101 		}
4102 		i++;
4103 	}
4104 
4105 out_unlock:
4106 	mutex_unlock(&vm->snap_mutex);
4107 	return snap;
4108 }
4109 
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4110 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4111 {
4112 	if (IS_ERR_OR_NULL(snap))
4113 		return;
4114 
4115 	for (int i = 0; i < snap->num_snaps; i++) {
4116 		struct xe_bo *bo = snap->snap[i].bo;
4117 		int err;
4118 
4119 		if (IS_ERR(snap->snap[i].data))
4120 			continue;
4121 
4122 		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4123 		if (!snap->snap[i].data) {
4124 			snap->snap[i].data = ERR_PTR(-ENOMEM);
4125 			goto cleanup_bo;
4126 		}
4127 
4128 		if (bo) {
4129 			err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4130 					 snap->snap[i].data, snap->snap[i].len);
4131 		} else {
4132 			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4133 
4134 			kthread_use_mm(snap->snap[i].mm);
4135 			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4136 				err = 0;
4137 			else
4138 				err = -EFAULT;
4139 			kthread_unuse_mm(snap->snap[i].mm);
4140 
4141 			mmput(snap->snap[i].mm);
4142 			snap->snap[i].mm = NULL;
4143 		}
4144 
4145 		if (err) {
4146 			kvfree(snap->snap[i].data);
4147 			snap->snap[i].data = ERR_PTR(err);
4148 		}
4149 
4150 cleanup_bo:
4151 		xe_bo_put(bo);
4152 		snap->snap[i].bo = NULL;
4153 	}
4154 }
4155 
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4156 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4157 {
4158 	unsigned long i, j;
4159 
4160 	if (IS_ERR_OR_NULL(snap)) {
4161 		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4162 		return;
4163 	}
4164 
4165 	for (i = 0; i < snap->num_snaps; i++) {
4166 		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4167 
4168 		if (IS_ERR(snap->snap[i].data)) {
4169 			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4170 				   PTR_ERR(snap->snap[i].data));
4171 			continue;
4172 		}
4173 
4174 		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4175 
4176 		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4177 			u32 *val = snap->snap[i].data + j;
4178 			char dumped[ASCII85_BUFSZ];
4179 
4180 			drm_puts(p, ascii85_encode(*val, dumped));
4181 		}
4182 
4183 		drm_puts(p, "\n");
4184 
4185 		if (drm_coredump_printer_is_full(p))
4186 			return;
4187 	}
4188 }
4189 
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4190 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4191 {
4192 	unsigned long i;
4193 
4194 	if (IS_ERR_OR_NULL(snap))
4195 		return;
4196 
4197 	for (i = 0; i < snap->num_snaps; i++) {
4198 		if (!IS_ERR(snap->snap[i].data))
4199 			kvfree(snap->snap[i].data);
4200 		xe_bo_put(snap->snap[i].bo);
4201 		if (snap->snap[i].mm)
4202 			mmput(snap->snap[i].mm);
4203 	}
4204 	kvfree(snap);
4205 }
4206