xref: /linux/drivers/gpu/drm/xe/xe_vm.c (revision 6916d5703ddf9a38f1f6c2cc793381a24ee914c6)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_vm.h"
7 
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21 
22 #include <generated/xe_wa_oob.h>
23 
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_sriov_vf.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_vm_madvise.h"
44 #include "xe_wa.h"
45 
xe_vm_obj(struct xe_vm * vm)46 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
47 {
48 	return vm->gpuvm.r_obj;
49 }
50 
51 /**
52  * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
53  * @vm: The vm whose resv is to be locked.
54  * @exec: The drm_exec transaction.
55  *
56  * Helper to lock the vm's resv as part of a drm_exec transaction.
57  *
58  * Return: %0 on success. See drm_exec_lock_obj() for error codes.
59  */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)60 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
61 {
62 	return drm_exec_lock_obj(exec, xe_vm_obj(vm));
63 }
64 
preempt_fences_waiting(struct xe_vm * vm)65 static bool preempt_fences_waiting(struct xe_vm *vm)
66 {
67 	struct xe_exec_queue *q;
68 
69 	lockdep_assert_held(&vm->lock);
70 	xe_vm_assert_held(vm);
71 
72 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
73 		if (!q->lr.pfence ||
74 		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
75 			     &q->lr.pfence->flags)) {
76 			return true;
77 		}
78 	}
79 
80 	return false;
81 }
82 
free_preempt_fences(struct list_head * list)83 static void free_preempt_fences(struct list_head *list)
84 {
85 	struct list_head *link, *next;
86 
87 	list_for_each_safe(link, next, list)
88 		xe_preempt_fence_free(to_preempt_fence_from_link(link));
89 }
90 
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)91 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
92 				unsigned int *count)
93 {
94 	lockdep_assert_held(&vm->lock);
95 	xe_vm_assert_held(vm);
96 
97 	if (*count >= vm->preempt.num_exec_queues)
98 		return 0;
99 
100 	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
101 		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
102 
103 		if (IS_ERR(pfence))
104 			return PTR_ERR(pfence);
105 
106 		list_move_tail(xe_preempt_fence_link(pfence), list);
107 	}
108 
109 	return 0;
110 }
111 
wait_for_existing_preempt_fences(struct xe_vm * vm)112 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
113 {
114 	struct xe_exec_queue *q;
115 	bool vf_migration = IS_SRIOV_VF(vm->xe) &&
116 		xe_sriov_vf_migration_supported(vm->xe);
117 	signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
118 
119 	xe_vm_assert_held(vm);
120 
121 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
122 		if (q->lr.pfence) {
123 			long timeout;
124 
125 			timeout = dma_fence_wait_timeout(q->lr.pfence, false,
126 							 wait_time);
127 			if (!timeout) {
128 				xe_assert(vm->xe, vf_migration);
129 				return -EAGAIN;
130 			}
131 
132 			/* Only -ETIME on fence indicates VM needs to be killed */
133 			if (timeout < 0 || q->lr.pfence->error == -ETIME)
134 				return -ETIME;
135 
136 			dma_fence_put(q->lr.pfence);
137 			q->lr.pfence = NULL;
138 		}
139 	}
140 
141 	return 0;
142 }
143 
xe_vm_is_idle(struct xe_vm * vm)144 static bool xe_vm_is_idle(struct xe_vm *vm)
145 {
146 	struct xe_exec_queue *q;
147 
148 	xe_vm_assert_held(vm);
149 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
150 		if (!xe_exec_queue_is_idle(q))
151 			return false;
152 	}
153 
154 	return true;
155 }
156 
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)157 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
158 {
159 	struct list_head *link;
160 	struct xe_exec_queue *q;
161 
162 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
163 		struct dma_fence *fence;
164 
165 		link = list->next;
166 		xe_assert(vm->xe, link != list);
167 
168 		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
169 					     q, q->lr.context,
170 					     ++q->lr.seqno);
171 		dma_fence_put(q->lr.pfence);
172 		q->lr.pfence = fence;
173 	}
174 }
175 
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)176 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
177 {
178 	struct xe_exec_queue *q;
179 	int err;
180 
181 	xe_bo_assert_held(bo);
182 
183 	if (!vm->preempt.num_exec_queues)
184 		return 0;
185 
186 	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
187 	if (err)
188 		return err;
189 
190 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
191 		if (q->lr.pfence) {
192 			dma_resv_add_fence(bo->ttm.base.resv,
193 					   q->lr.pfence,
194 					   DMA_RESV_USAGE_BOOKKEEP);
195 		}
196 
197 	return 0;
198 }
199 
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)200 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
201 						struct drm_exec *exec)
202 {
203 	struct xe_exec_queue *q;
204 
205 	lockdep_assert_held(&vm->lock);
206 	xe_vm_assert_held(vm);
207 
208 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
209 		q->ops->resume(q);
210 
211 		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
212 					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
213 	}
214 }
215 
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)216 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
217 {
218 	struct drm_gpuvm_exec vm_exec = {
219 		.vm = &vm->gpuvm,
220 		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
221 		.num_fences = 1,
222 	};
223 	struct drm_exec *exec = &vm_exec.exec;
224 	struct xe_validation_ctx ctx;
225 	struct dma_fence *pfence;
226 	int err;
227 	bool wait;
228 
229 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
230 
231 	down_write(&vm->lock);
232 	err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
233 	if (err)
234 		goto out_up_write;
235 
236 	pfence = xe_preempt_fence_create(q, q->lr.context,
237 					 ++q->lr.seqno);
238 	if (IS_ERR(pfence)) {
239 		err = PTR_ERR(pfence);
240 		goto out_fini;
241 	}
242 
243 	list_add(&q->lr.link, &vm->preempt.exec_queues);
244 	++vm->preempt.num_exec_queues;
245 	q->lr.pfence = pfence;
246 
247 	xe_svm_notifier_lock(vm);
248 
249 	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
250 				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
251 
252 	/*
253 	 * Check to see if a preemption on VM is in flight or userptr
254 	 * invalidation, if so trigger this preempt fence to sync state with
255 	 * other preempt fences on the VM.
256 	 */
257 	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
258 	if (wait)
259 		dma_fence_enable_sw_signaling(pfence);
260 
261 	xe_svm_notifier_unlock(vm);
262 
263 out_fini:
264 	xe_validation_ctx_fini(&ctx);
265 out_up_write:
266 	up_write(&vm->lock);
267 
268 	return err;
269 }
270 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
271 
272 /**
273  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
274  * @vm: The VM.
275  * @q: The exec_queue
276  *
277  * Note that this function might be called multiple times on the same queue.
278  */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)279 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
280 {
281 	if (!xe_vm_in_preempt_fence_mode(vm))
282 		return;
283 
284 	down_write(&vm->lock);
285 	if (!list_empty(&q->lr.link)) {
286 		list_del_init(&q->lr.link);
287 		--vm->preempt.num_exec_queues;
288 	}
289 	if (q->lr.pfence) {
290 		dma_fence_enable_sw_signaling(q->lr.pfence);
291 		dma_fence_put(q->lr.pfence);
292 		q->lr.pfence = NULL;
293 	}
294 	up_write(&vm->lock);
295 }
296 
297 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
298 
299 /**
300  * xe_vm_kill() - VM Kill
301  * @vm: The VM.
302  * @unlocked: Flag indicates the VM's dma-resv is not held
303  *
304  * Kill the VM by setting banned flag indicated VM is no longer available for
305  * use. If in preempt fence mode, also kill all exec queue attached to the VM.
306  */
xe_vm_kill(struct xe_vm * vm,bool unlocked)307 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
308 {
309 	struct xe_exec_queue *q;
310 
311 	lockdep_assert_held(&vm->lock);
312 
313 	if (unlocked)
314 		xe_vm_lock(vm, false);
315 
316 	vm->flags |= XE_VM_FLAG_BANNED;
317 	trace_xe_vm_kill(vm);
318 
319 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
320 		q->ops->kill(q);
321 
322 	if (unlocked)
323 		xe_vm_unlock(vm);
324 
325 	/* TODO: Inform user the VM is banned */
326 }
327 
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)328 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
329 {
330 	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
331 	struct xe_bo *bo = gem_to_xe_bo(vm_bo->obj);
332 	struct drm_gpuva *gpuva;
333 	int ret;
334 
335 	lockdep_assert_held(&vm->lock);
336 	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
337 		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
338 			       &vm->rebind_list);
339 
340 	/* Skip re-populating purged BOs, rebind maps scratch pages. */
341 	if (xe_bo_is_purged(bo)) {
342 		vm_bo->evicted = false;
343 		return 0;
344 	}
345 
346 	if (!try_wait_for_completion(&vm->xe->pm_block))
347 		return -EAGAIN;
348 
349 	ret = xe_bo_validate(bo, vm, false, exec);
350 	if (ret)
351 		return ret;
352 
353 	vm_bo->evicted = false;
354 	return 0;
355 }
356 
357 /**
358  * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
359  * @vm: The vm for which we are rebinding.
360  * @exec: The struct drm_exec with the locked GEM objects.
361  * @num_fences: The number of fences to reserve for the operation, not
362  * including rebinds and validations.
363  *
364  * Validates all evicted gem objects and rebinds their vmas. Note that
365  * rebindings may cause evictions and hence the validation-rebind
366  * sequence is rerun until there are no more objects to validate.
367  *
368  * Return: 0 on success, negative error code on error. In particular,
369  * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
370  * the drm_exec transaction needs to be restarted.
371  */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)372 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
373 			  unsigned int num_fences)
374 {
375 	struct drm_gem_object *obj;
376 	unsigned long index;
377 	int ret;
378 
379 	do {
380 		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
381 		if (ret)
382 			return ret;
383 
384 		ret = xe_vm_rebind(vm, false);
385 		if (ret)
386 			return ret;
387 	} while (!list_empty(&vm->gpuvm.evict.list));
388 
389 	drm_exec_for_each_locked_object(exec, index, obj) {
390 		ret = dma_resv_reserve_fences(obj->resv, num_fences);
391 		if (ret)
392 			return ret;
393 	}
394 
395 	return 0;
396 }
397 
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)398 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
399 				 bool *done)
400 {
401 	int err;
402 
403 	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
404 	if (err)
405 		return err;
406 
407 	if (xe_vm_is_idle(vm)) {
408 		vm->preempt.rebind_deactivated = true;
409 		*done = true;
410 		return 0;
411 	}
412 
413 	if (!preempt_fences_waiting(vm)) {
414 		*done = true;
415 		return 0;
416 	}
417 
418 	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
419 	if (err)
420 		return err;
421 
422 	err = wait_for_existing_preempt_fences(vm);
423 	if (err)
424 		return err;
425 
426 	/*
427 	 * Add validation and rebinding to the locking loop since both can
428 	 * cause evictions which may require blocing dma_resv locks.
429 	 * The fence reservation here is intended for the new preempt fences
430 	 * we attach at the end of the rebind work.
431 	 */
432 	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
433 }
434 
vm_suspend_rebind_worker(struct xe_vm * vm)435 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
436 {
437 	struct xe_device *xe = vm->xe;
438 	bool ret = false;
439 
440 	mutex_lock(&xe->rebind_resume_lock);
441 	if (!try_wait_for_completion(&vm->xe->pm_block)) {
442 		ret = true;
443 		list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
444 	}
445 	mutex_unlock(&xe->rebind_resume_lock);
446 
447 	return ret;
448 }
449 
450 /**
451  * xe_vm_resume_rebind_worker() - Resume the rebind worker.
452  * @vm: The vm whose preempt worker to resume.
453  *
454  * Resume a preempt worker that was previously suspended by
455  * vm_suspend_rebind_worker().
456  */
xe_vm_resume_rebind_worker(struct xe_vm * vm)457 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
458 {
459 	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
460 }
461 
preempt_rebind_work_func(struct work_struct * w)462 static void preempt_rebind_work_func(struct work_struct *w)
463 {
464 	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
465 	struct xe_validation_ctx ctx;
466 	struct drm_exec exec;
467 	unsigned int fence_count = 0;
468 	LIST_HEAD(preempt_fences);
469 	int err = 0;
470 	long wait;
471 	int __maybe_unused tries = 0;
472 
473 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
474 	trace_xe_vm_rebind_worker_enter(vm);
475 
476 	down_write(&vm->lock);
477 
478 	if (xe_vm_is_closed_or_banned(vm)) {
479 		up_write(&vm->lock);
480 		trace_xe_vm_rebind_worker_exit(vm);
481 		return;
482 	}
483 
484 retry:
485 	if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
486 		up_write(&vm->lock);
487 		/* We don't actually block but don't make progress. */
488 		xe_pm_might_block_on_suspend();
489 		return;
490 	}
491 
492 	if (xe_vm_userptr_check_repin(vm)) {
493 		err = xe_vm_userptr_pin(vm);
494 		if (err)
495 			goto out_unlock_outer;
496 	}
497 
498 	err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
499 				     (struct xe_val_flags) {.interruptible = true});
500 	if (err)
501 		goto out_unlock_outer;
502 
503 	drm_exec_until_all_locked(&exec) {
504 		bool done = false;
505 
506 		err = xe_preempt_work_begin(&exec, vm, &done);
507 		drm_exec_retry_on_contention(&exec);
508 		xe_validation_retry_on_oom(&ctx, &err);
509 		if (err || done) {
510 			xe_validation_ctx_fini(&ctx);
511 			goto out_unlock_outer;
512 		}
513 	}
514 
515 	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
516 	if (err)
517 		goto out_unlock;
518 
519 	xe_vm_set_validation_exec(vm, &exec);
520 	err = xe_vm_rebind(vm, true);
521 	xe_vm_set_validation_exec(vm, NULL);
522 	if (err)
523 		goto out_unlock;
524 
525 	/* Wait on rebinds and munmap style VM unbinds */
526 	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
527 				     DMA_RESV_USAGE_KERNEL,
528 				     false, MAX_SCHEDULE_TIMEOUT);
529 	if (wait <= 0) {
530 		err = -ETIME;
531 		goto out_unlock;
532 	}
533 
534 #define retry_required(__tries, __vm) \
535 	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
536 	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
537 	__xe_vm_userptr_needs_repin(__vm))
538 
539 	xe_svm_notifier_lock(vm);
540 	if (retry_required(tries, vm)) {
541 		xe_svm_notifier_unlock(vm);
542 		err = -EAGAIN;
543 		goto out_unlock;
544 	}
545 
546 #undef retry_required
547 
548 	spin_lock(&vm->xe->ttm.lru_lock);
549 	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
550 	spin_unlock(&vm->xe->ttm.lru_lock);
551 
552 	/* Point of no return. */
553 	arm_preempt_fences(vm, &preempt_fences);
554 	resume_and_reinstall_preempt_fences(vm, &exec);
555 	xe_svm_notifier_unlock(vm);
556 
557 out_unlock:
558 	xe_validation_ctx_fini(&ctx);
559 out_unlock_outer:
560 	if (err == -EAGAIN) {
561 		trace_xe_vm_rebind_worker_retry(vm);
562 
563 		/*
564 		 * We can't block in workers on a VF which supports migration
565 		 * given this can block the VF post-migration workers from
566 		 * getting scheduled.
567 		 */
568 		if (IS_SRIOV_VF(vm->xe) &&
569 		    xe_sriov_vf_migration_supported(vm->xe)) {
570 			up_write(&vm->lock);
571 			xe_vm_queue_rebind_worker(vm);
572 			return;
573 		}
574 
575 		goto retry;
576 	}
577 
578 	if (err) {
579 		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
580 		xe_vm_kill(vm, true);
581 	}
582 	up_write(&vm->lock);
583 
584 	free_preempt_fences(&preempt_fences);
585 
586 	trace_xe_vm_rebind_worker_exit(vm);
587 }
588 
589 /**
590  * xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
591  * @vm: The VM.
592  * @pf: The pagefault.
593  *
594  * This function takes the data from the pagefault @pf and saves it to @vm->faults.list.
595  *
596  * The function exits silently if the list is full, and reports a warning if the pagefault
597  * could not be saved to the list.
598  */
xe_vm_add_fault_entry_pf(struct xe_vm * vm,struct xe_pagefault * pf)599 void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf)
600 {
601 	struct xe_vm_fault_entry *e;
602 	struct xe_hw_engine *hwe;
603 
604 	/* Do not report faults on reserved engines */
605 	hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class,
606 			      pf->consumer.engine_instance, false);
607 	if (!hwe || xe_hw_engine_is_reserved(hwe))
608 		return;
609 
610 	e = kzalloc_obj(*e);
611 	if (!e) {
612 		drm_warn(&vm->xe->drm,
613 			 "Could not allocate memory for fault!\n");
614 		return;
615 	}
616 
617 	guard(spinlock)(&vm->faults.lock);
618 
619 	/*
620 	 * Limit the number of faults in the fault list to prevent
621 	 * memory overuse.
622 	 */
623 	if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) {
624 		kfree(e);
625 		return;
626 	}
627 
628 	e->address = pf->consumer.page_addr;
629 	/*
630 	 * TODO:
631 	 * Address precision is currently always SZ_4K, but this may change
632 	 * in the future.
633 	 */
634 	e->address_precision = SZ_4K;
635 	e->access_type = pf->consumer.access_type;
636 	e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK,
637 				  pf->consumer.fault_type_level),
638 	e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK,
639 				   pf->consumer.fault_type_level),
640 
641 	list_add_tail(&e->list, &vm->faults.list);
642 	vm->faults.len++;
643 }
644 
xe_vm_clear_fault_entries(struct xe_vm * vm)645 static void xe_vm_clear_fault_entries(struct xe_vm *vm)
646 {
647 	struct xe_vm_fault_entry *e, *tmp;
648 
649 	guard(spinlock)(&vm->faults.lock);
650 	list_for_each_entry_safe(e, tmp, &vm->faults.list, list) {
651 		list_del(&e->list);
652 		kfree(e);
653 	}
654 	vm->faults.len = 0;
655 }
656 
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)657 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
658 {
659 	int i;
660 
661 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
662 		if (!vops->pt_update_ops[i].num_ops)
663 			continue;
664 
665 		vops->pt_update_ops[i].ops =
666 			kmalloc_objs(*vops->pt_update_ops[i].ops,
667 				     vops->pt_update_ops[i].num_ops,
668 				     GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
669 		if (!vops->pt_update_ops[i].ops)
670 			return array_of_binds ? -ENOBUFS : -ENOMEM;
671 	}
672 
673 	return 0;
674 }
675 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
676 
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)677 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
678 {
679 	struct xe_vma *vma;
680 
681 	vma = gpuva_to_vma(op->base.prefetch.va);
682 
683 	if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
684 		xa_destroy(&op->prefetch_range.range);
685 }
686 
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)687 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
688 {
689 	struct xe_vma_op *op;
690 
691 	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
692 		return;
693 
694 	list_for_each_entry(op, &vops->list, link)
695 		xe_vma_svm_prefetch_op_fini(op);
696 }
697 
xe_vma_ops_fini(struct xe_vma_ops * vops)698 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
699 {
700 	int i;
701 
702 	xe_vma_svm_prefetch_ops_fini(vops);
703 
704 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
705 		kfree(vops->pt_update_ops[i].ops);
706 }
707 
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)708 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
709 {
710 	int i;
711 
712 	if (!inc_val)
713 		return;
714 
715 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
716 		if (BIT(i) & tile_mask)
717 			vops->pt_update_ops[i].num_ops += inc_val;
718 }
719 
720 #define XE_VMA_CREATE_MASK (		    \
721 	XE_VMA_READ_ONLY |		    \
722 	XE_VMA_DUMPABLE |		    \
723 	XE_VMA_SYSTEM_ALLOCATOR |           \
724 	DRM_GPUVA_SPARSE |		    \
725 	XE_VMA_MADV_AUTORESET)
726 
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)727 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
728 				  u8 tile_mask)
729 {
730 	INIT_LIST_HEAD(&op->link);
731 	op->tile_mask = tile_mask;
732 	op->base.op = DRM_GPUVA_OP_MAP;
733 	op->base.map.va.addr = vma->gpuva.va.addr;
734 	op->base.map.va.range = vma->gpuva.va.range;
735 	op->base.map.gem.obj = vma->gpuva.gem.obj;
736 	op->base.map.gem.offset = vma->gpuva.gem.offset;
737 	op->map.vma = vma;
738 	op->map.immediate = true;
739 	op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
740 }
741 
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)742 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
743 				u8 tile_mask)
744 {
745 	struct xe_vma_op *op;
746 
747 	op = kzalloc_obj(*op);
748 	if (!op)
749 		return -ENOMEM;
750 
751 	xe_vm_populate_rebind(op, vma, tile_mask);
752 	list_add_tail(&op->link, &vops->list);
753 	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
754 
755 	return 0;
756 }
757 
758 static struct dma_fence *ops_execute(struct xe_vm *vm,
759 				     struct xe_vma_ops *vops);
760 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
761 			    struct xe_exec_queue *q,
762 			    struct xe_sync_entry *syncs, u32 num_syncs);
763 
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)764 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
765 {
766 	struct dma_fence *fence;
767 	struct xe_vma *vma, *next;
768 	struct xe_vma_ops vops;
769 	struct xe_vma_op *op, *next_op;
770 	int err, i;
771 
772 	lockdep_assert_held(&vm->lock);
773 	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
774 	    list_empty(&vm->rebind_list))
775 		return 0;
776 
777 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
778 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
779 		vops.pt_update_ops[i].wait_vm_bookkeep = true;
780 
781 	xe_vm_assert_held(vm);
782 	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
783 		xe_assert(vm->xe, vma->tile_present);
784 
785 		if (rebind_worker)
786 			trace_xe_vma_rebind_worker(vma);
787 		else
788 			trace_xe_vma_rebind_exec(vma);
789 
790 		err = xe_vm_ops_add_rebind(&vops, vma,
791 					   vma->tile_present);
792 		if (err)
793 			goto free_ops;
794 	}
795 
796 	err = xe_vma_ops_alloc(&vops, false);
797 	if (err)
798 		goto free_ops;
799 
800 	fence = ops_execute(vm, &vops);
801 	if (IS_ERR(fence)) {
802 		err = PTR_ERR(fence);
803 	} else {
804 		dma_fence_put(fence);
805 		list_for_each_entry_safe(vma, next, &vm->rebind_list,
806 					 combined_links.rebind)
807 			list_del_init(&vma->combined_links.rebind);
808 	}
809 free_ops:
810 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
811 		list_del(&op->link);
812 		kfree(op);
813 	}
814 	xe_vma_ops_fini(&vops);
815 
816 	return err;
817 }
818 
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)819 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
820 {
821 	struct dma_fence *fence = NULL;
822 	struct xe_vma_ops vops;
823 	struct xe_vma_op *op, *next_op;
824 	struct xe_tile *tile;
825 	u8 id;
826 	int err;
827 
828 	lockdep_assert_held(&vm->lock);
829 	xe_vm_assert_held(vm);
830 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
831 
832 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
833 	vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
834 	for_each_tile(tile, vm->xe, id) {
835 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
836 		vops.pt_update_ops[tile->id].q =
837 			xe_migrate_exec_queue(tile->migrate);
838 	}
839 
840 	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
841 	if (err)
842 		return ERR_PTR(err);
843 
844 	err = xe_vma_ops_alloc(&vops, false);
845 	if (err) {
846 		fence = ERR_PTR(err);
847 		goto free_ops;
848 	}
849 
850 	fence = ops_execute(vm, &vops);
851 
852 free_ops:
853 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
854 		list_del(&op->link);
855 		kfree(op);
856 	}
857 	xe_vma_ops_fini(&vops);
858 
859 	return fence;
860 }
861 
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)862 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
863 					struct xe_vma *vma,
864 					struct xe_svm_range *range,
865 					u8 tile_mask)
866 {
867 	INIT_LIST_HEAD(&op->link);
868 	op->tile_mask = tile_mask;
869 	op->base.op = DRM_GPUVA_OP_DRIVER;
870 	op->subop = XE_VMA_SUBOP_MAP_RANGE;
871 	op->map_range.vma = vma;
872 	op->map_range.range = range;
873 }
874 
875 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)876 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
877 			   struct xe_vma *vma,
878 			   struct xe_svm_range *range,
879 			   u8 tile_mask)
880 {
881 	struct xe_vma_op *op;
882 
883 	op = kzalloc_obj(*op);
884 	if (!op)
885 		return -ENOMEM;
886 
887 	xe_vm_populate_range_rebind(op, vma, range, tile_mask);
888 	list_add_tail(&op->link, &vops->list);
889 	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
890 
891 	return 0;
892 }
893 
894 /**
895  * xe_vm_range_rebind() - VM range (re)bind
896  * @vm: The VM which the range belongs to.
897  * @vma: The VMA which the range belongs to.
898  * @range: SVM range to rebind.
899  * @tile_mask: Tile mask to bind the range to.
900  *
901  * (re)bind SVM range setting up GPU page tables for the range.
902  *
903  * Return: dma fence for rebind to signal completion on success, ERR_PTR on
904  * failure
905  */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)906 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
907 				     struct xe_vma *vma,
908 				     struct xe_svm_range *range,
909 				     u8 tile_mask)
910 {
911 	struct dma_fence *fence = NULL;
912 	struct xe_vma_ops vops;
913 	struct xe_vma_op *op, *next_op;
914 	struct xe_tile *tile;
915 	u8 id;
916 	int err;
917 
918 	lockdep_assert_held(&vm->lock);
919 	xe_vm_assert_held(vm);
920 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
921 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
922 
923 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
924 	vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
925 	for_each_tile(tile, vm->xe, id) {
926 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
927 		vops.pt_update_ops[tile->id].q =
928 			xe_migrate_exec_queue(tile->migrate);
929 	}
930 
931 	err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
932 	if (err)
933 		return ERR_PTR(err);
934 
935 	err = xe_vma_ops_alloc(&vops, false);
936 	if (err) {
937 		fence = ERR_PTR(err);
938 		goto free_ops;
939 	}
940 
941 	fence = ops_execute(vm, &vops);
942 
943 free_ops:
944 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
945 		list_del(&op->link);
946 		kfree(op);
947 	}
948 	xe_vma_ops_fini(&vops);
949 
950 	return fence;
951 }
952 
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)953 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
954 					struct xe_svm_range *range)
955 {
956 	INIT_LIST_HEAD(&op->link);
957 	op->tile_mask = range->tile_present;
958 	op->base.op = DRM_GPUVA_OP_DRIVER;
959 	op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
960 	op->unmap_range.range = range;
961 }
962 
963 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)964 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
965 			   struct xe_svm_range *range)
966 {
967 	struct xe_vma_op *op;
968 
969 	op = kzalloc_obj(*op);
970 	if (!op)
971 		return -ENOMEM;
972 
973 	xe_vm_populate_range_unbind(op, range);
974 	list_add_tail(&op->link, &vops->list);
975 	xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
976 
977 	return 0;
978 }
979 
980 /**
981  * xe_vm_range_unbind() - VM range unbind
982  * @vm: The VM which the range belongs to.
983  * @range: SVM range to rebind.
984  *
985  * Unbind SVM range removing the GPU page tables for the range.
986  *
987  * Return: dma fence for unbind to signal completion on success, ERR_PTR on
988  * failure
989  */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)990 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
991 				     struct xe_svm_range *range)
992 {
993 	struct dma_fence *fence = NULL;
994 	struct xe_vma_ops vops;
995 	struct xe_vma_op *op, *next_op;
996 	struct xe_tile *tile;
997 	u8 id;
998 	int err;
999 
1000 	lockdep_assert_held(&vm->lock);
1001 	xe_vm_assert_held(vm);
1002 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
1003 
1004 	if (!range->tile_present)
1005 		return dma_fence_get_stub();
1006 
1007 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
1008 	for_each_tile(tile, vm->xe, id) {
1009 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
1010 		vops.pt_update_ops[tile->id].q =
1011 			xe_migrate_exec_queue(tile->migrate);
1012 	}
1013 
1014 	err = xe_vm_ops_add_range_unbind(&vops, range);
1015 	if (err)
1016 		return ERR_PTR(err);
1017 
1018 	err = xe_vma_ops_alloc(&vops, false);
1019 	if (err) {
1020 		fence = ERR_PTR(err);
1021 		goto free_ops;
1022 	}
1023 
1024 	fence = ops_execute(vm, &vops);
1025 
1026 free_ops:
1027 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
1028 		list_del(&op->link);
1029 		kfree(op);
1030 	}
1031 	xe_vma_ops_fini(&vops);
1032 
1033 	return fence;
1034 }
1035 
xe_vma_mem_attr_fini(struct xe_vma_mem_attr * attr)1036 static void xe_vma_mem_attr_fini(struct xe_vma_mem_attr *attr)
1037 {
1038 	drm_pagemap_put(attr->preferred_loc.dpagemap);
1039 }
1040 
xe_vma_free(struct xe_vma * vma)1041 static void xe_vma_free(struct xe_vma *vma)
1042 {
1043 	xe_vma_mem_attr_fini(&vma->attr);
1044 
1045 	if (xe_vma_is_userptr(vma))
1046 		kfree(to_userptr_vma(vma));
1047 	else
1048 		kfree(vma);
1049 }
1050 
1051 /**
1052  * xe_vma_mem_attr_copy() - copy an xe_vma_mem_attr structure.
1053  * @to: Destination.
1054  * @from: Source.
1055  *
1056  * Copies an xe_vma_mem_attr structure taking care to get reference
1057  * counting of individual members right.
1058  */
xe_vma_mem_attr_copy(struct xe_vma_mem_attr * to,struct xe_vma_mem_attr * from)1059 void xe_vma_mem_attr_copy(struct xe_vma_mem_attr *to, struct xe_vma_mem_attr *from)
1060 {
1061 	xe_vma_mem_attr_fini(to);
1062 	*to = *from;
1063 	if (to->preferred_loc.dpagemap)
1064 		drm_pagemap_get(to->preferred_loc.dpagemap);
1065 }
1066 
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)1067 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
1068 				    struct xe_bo *bo,
1069 				    u64 bo_offset_or_userptr,
1070 				    u64 start, u64 end,
1071 				    struct xe_vma_mem_attr *attr,
1072 				    unsigned int flags)
1073 {
1074 	struct xe_vma *vma;
1075 	struct xe_tile *tile;
1076 	u8 id;
1077 	bool is_null = (flags & DRM_GPUVA_SPARSE);
1078 	bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
1079 
1080 	xe_assert(vm->xe, start < end);
1081 	xe_assert(vm->xe, end < vm->size);
1082 
1083 	/*
1084 	 * Allocate and ensure that the xe_vma_is_userptr() return
1085 	 * matches what was allocated.
1086 	 */
1087 	if (!bo && !is_null && !is_cpu_addr_mirror) {
1088 		struct xe_userptr_vma *uvma = kzalloc_obj(*uvma);
1089 
1090 		if (!uvma)
1091 			return ERR_PTR(-ENOMEM);
1092 
1093 		vma = &uvma->vma;
1094 	} else {
1095 		vma = kzalloc_obj(*vma);
1096 		if (!vma)
1097 			return ERR_PTR(-ENOMEM);
1098 
1099 		if (bo)
1100 			vma->gpuva.gem.obj = &bo->ttm.base;
1101 	}
1102 
1103 	INIT_LIST_HEAD(&vma->combined_links.rebind);
1104 
1105 	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
1106 	vma->gpuva.vm = &vm->gpuvm;
1107 	vma->gpuva.va.addr = start;
1108 	vma->gpuva.va.range = end - start + 1;
1109 	vma->gpuva.flags = flags;
1110 
1111 	for_each_tile(tile, vm->xe, id)
1112 		vma->tile_mask |= 0x1 << id;
1113 
1114 	if (vm->xe->info.has_atomic_enable_pte_bit)
1115 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
1116 
1117 	xe_vma_mem_attr_copy(&vma->attr, attr);
1118 	if (bo) {
1119 		struct drm_gpuvm_bo *vm_bo;
1120 
1121 		xe_bo_assert_held(bo);
1122 
1123 		/*
1124 		 * Reject only WILLNEED mappings on DONTNEED/PURGED BOs. This
1125 		 * gates new vm_bind ioctls (user supplies WILLNEED) while
1126 		 * still allowing partial-unbind / remap splits whose new VMAs
1127 		 * inherit the parent's DONTNEED attr. It must also run before
1128 		 * xe_bo_willneed_get_locked() below so a 0->1 holder bump
1129 		 * cannot silently promote DONTNEED back to WILLNEED.
1130 		 */
1131 		if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED) {
1132 			if (xe_bo_madv_is_dontneed(bo)) {
1133 				xe_vma_free(vma);
1134 				return ERR_PTR(-EBUSY);
1135 			}
1136 			if (xe_bo_is_purged(bo)) {
1137 				xe_vma_free(vma);
1138 				return ERR_PTR(-EINVAL);
1139 			}
1140 		}
1141 
1142 		vm_bo = drm_gpuvm_bo_obtain_locked(vma->gpuva.vm, &bo->ttm.base);
1143 		if (IS_ERR(vm_bo)) {
1144 			xe_vma_free(vma);
1145 			return ERR_CAST(vm_bo);
1146 		}
1147 
1148 		drm_gpuvm_bo_extobj_add(vm_bo);
1149 		drm_gem_object_get(&bo->ttm.base);
1150 		vma->gpuva.gem.offset = bo_offset_or_userptr;
1151 		drm_gpuva_link(&vma->gpuva, vm_bo);
1152 		drm_gpuvm_bo_put(vm_bo);
1153 
1154 		xe_bo_vma_count_inc_locked(bo);
1155 		if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
1156 			xe_bo_willneed_get_locked(bo);
1157 	} else /* userptr or null */ {
1158 		if (!is_null && !is_cpu_addr_mirror) {
1159 			struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1160 			u64 size = end - start + 1;
1161 			int err;
1162 
1163 			vma->gpuva.gem.offset = bo_offset_or_userptr;
1164 
1165 			err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1166 			if (err) {
1167 				xe_vma_free(vma);
1168 				return ERR_PTR(err);
1169 			}
1170 		}
1171 
1172 		xe_vm_get(vm);
1173 	}
1174 
1175 	return vma;
1176 }
1177 
xe_vma_destroy_late(struct xe_vma * vma)1178 static void xe_vma_destroy_late(struct xe_vma *vma)
1179 {
1180 	struct xe_vm *vm = xe_vma_vm(vma);
1181 	struct xe_bo *bo = xe_vma_bo(vma);
1182 
1183 	if (vma->ufence) {
1184 		xe_sync_ufence_put(vma->ufence);
1185 		vma->ufence = NULL;
1186 	}
1187 
1188 	if (xe_vma_is_userptr(vma)) {
1189 		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1190 
1191 		xe_userptr_remove(uvma);
1192 		xe_vm_put(vm);
1193 	} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1194 		xe_vm_put(vm);
1195 	} else {
1196 		xe_bo_put(bo);
1197 	}
1198 
1199 	xe_vma_free(vma);
1200 }
1201 
vma_destroy_work_func(struct work_struct * w)1202 static void vma_destroy_work_func(struct work_struct *w)
1203 {
1204 	struct xe_vma *vma =
1205 		container_of(w, struct xe_vma, destroy_work);
1206 
1207 	xe_vma_destroy_late(vma);
1208 }
1209 
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1210 static void vma_destroy_cb(struct dma_fence *fence,
1211 			   struct dma_fence_cb *cb)
1212 {
1213 	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1214 
1215 	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1216 	queue_work(system_dfl_wq, &vma->destroy_work);
1217 }
1218 
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1219 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1220 {
1221 	struct xe_vm *vm = xe_vma_vm(vma);
1222 	struct xe_bo *bo = xe_vma_bo(vma);
1223 
1224 	lockdep_assert_held_write(&vm->lock);
1225 	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1226 
1227 	if (xe_vma_is_userptr(vma)) {
1228 		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1229 		xe_userptr_destroy(to_userptr_vma(vma));
1230 	} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1231 		xe_bo_assert_held(bo);
1232 
1233 		drm_gpuva_unlink(&vma->gpuva);
1234 
1235 		xe_bo_vma_count_dec_locked(bo);
1236 		if (vma->attr.purgeable_state == XE_MADV_PURGEABLE_WILLNEED)
1237 			xe_bo_willneed_put_locked(bo);
1238 	}
1239 
1240 	xe_vm_assert_held(vm);
1241 	if (fence) {
1242 		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1243 						 vma_destroy_cb);
1244 
1245 		if (ret) {
1246 			XE_WARN_ON(ret != -ENOENT);
1247 			xe_vma_destroy_late(vma);
1248 		}
1249 	} else {
1250 		xe_vma_destroy_late(vma);
1251 	}
1252 }
1253 
1254 /**
1255  * xe_vm_lock_vma() - drm_exec utility to lock a vma
1256  * @exec: The drm_exec object we're currently locking for.
1257  * @vma: The vma for witch we want to lock the vm resv and any attached
1258  * object's resv.
1259  *
1260  * Return: 0 on success, negative error code on error. In particular
1261  * may return -EDEADLK on WW transaction contention and -EINTR if
1262  * an interruptible wait is terminated by a signal.
1263  */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1264 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1265 {
1266 	struct xe_vm *vm = xe_vma_vm(vma);
1267 	struct xe_bo *bo = xe_vma_bo(vma);
1268 	int err;
1269 
1270 	XE_WARN_ON(!vm);
1271 
1272 	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1273 	if (!err && bo && !bo->vm)
1274 		err = drm_exec_lock_obj(exec, &bo->ttm.base);
1275 
1276 	return err;
1277 }
1278 
xe_vma_destroy_unlocked(struct xe_vma * vma)1279 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1280 {
1281 	struct xe_device *xe = xe_vma_vm(vma)->xe;
1282 	struct xe_validation_ctx ctx;
1283 	struct drm_exec exec;
1284 	int err = 0;
1285 
1286 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1287 		err = xe_vm_lock_vma(&exec, vma);
1288 		drm_exec_retry_on_contention(&exec);
1289 		if (XE_WARN_ON(err))
1290 			break;
1291 		xe_vma_destroy(vma, NULL);
1292 	}
1293 	xe_assert(xe, !err);
1294 }
1295 
1296 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1297 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1298 {
1299 	struct drm_gpuva *gpuva;
1300 
1301 	lockdep_assert_held(&vm->lock);
1302 
1303 	if (xe_vm_is_closed_or_banned(vm))
1304 		return NULL;
1305 
1306 	xe_assert(vm->xe, start + range <= vm->size);
1307 
1308 	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1309 
1310 	return gpuva ? gpuva_to_vma(gpuva) : NULL;
1311 }
1312 
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1313 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1314 {
1315 	int err;
1316 
1317 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1318 	lockdep_assert_held(&vm->lock);
1319 
1320 	mutex_lock(&vm->snap_mutex);
1321 	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1322 	mutex_unlock(&vm->snap_mutex);
1323 	XE_WARN_ON(err);	/* Shouldn't be possible */
1324 
1325 	return err;
1326 }
1327 
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1328 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1329 {
1330 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1331 	lockdep_assert_held(&vm->lock);
1332 
1333 	mutex_lock(&vm->snap_mutex);
1334 	drm_gpuva_remove(&vma->gpuva);
1335 	mutex_unlock(&vm->snap_mutex);
1336 	if (vm->usm.last_fault_vma == vma)
1337 		vm->usm.last_fault_vma = NULL;
1338 }
1339 
xe_vm_op_alloc(void)1340 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1341 {
1342 	struct xe_vma_op *op;
1343 
1344 	op = kzalloc_obj(*op);
1345 
1346 	if (unlikely(!op))
1347 		return NULL;
1348 
1349 	return &op->base;
1350 }
1351 
1352 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1353 
1354 static const struct drm_gpuvm_ops gpuvm_ops = {
1355 	.op_alloc = xe_vm_op_alloc,
1356 	.vm_bo_validate = xe_gpuvm_validate,
1357 	.vm_free = xe_vm_free,
1358 };
1359 
pde_encode_pat_index(u16 pat_index)1360 static u64 pde_encode_pat_index(u16 pat_index)
1361 {
1362 	u64 pte = 0;
1363 
1364 	if (pat_index & BIT(0))
1365 		pte |= XE_PPGTT_PTE_PAT0;
1366 
1367 	if (pat_index & BIT(1))
1368 		pte |= XE_PPGTT_PTE_PAT1;
1369 
1370 	return pte;
1371 }
1372 
pte_encode_pat_index(u16 pat_index,u32 pt_level)1373 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1374 {
1375 	u64 pte = 0;
1376 
1377 	if (pat_index & BIT(0))
1378 		pte |= XE_PPGTT_PTE_PAT0;
1379 
1380 	if (pat_index & BIT(1))
1381 		pte |= XE_PPGTT_PTE_PAT1;
1382 
1383 	if (pat_index & BIT(2)) {
1384 		if (pt_level)
1385 			pte |= XE_PPGTT_PDE_PDPE_PAT2;
1386 		else
1387 			pte |= XE_PPGTT_PTE_PAT2;
1388 	}
1389 
1390 	if (pat_index & BIT(3))
1391 		pte |= XELPG_PPGTT_PTE_PAT3;
1392 
1393 	if (pat_index & (BIT(4)))
1394 		pte |= XE2_PPGTT_PTE_PAT4;
1395 
1396 	return pte;
1397 }
1398 
pte_encode_ps(u32 pt_level)1399 static u64 pte_encode_ps(u32 pt_level)
1400 {
1401 	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1402 
1403 	if (pt_level == 1)
1404 		return XE_PDE_PS_2M;
1405 	else if (pt_level == 2)
1406 		return XE_PDPE_PS_1G;
1407 
1408 	return 0;
1409 }
1410 
pde_pat_index(struct xe_bo * bo)1411 static u16 pde_pat_index(struct xe_bo *bo)
1412 {
1413 	struct xe_device *xe = xe_bo_device(bo);
1414 	u16 pat_index;
1415 
1416 	/*
1417 	 * We only have two bits to encode the PAT index in non-leaf nodes, but
1418 	 * these only point to other paging structures so we only need a minimal
1419 	 * selection of options. The user PAT index is only for encoding leaf
1420 	 * nodes, where we have use of more bits to do the encoding. The
1421 	 * non-leaf nodes are instead under driver control so the chosen index
1422 	 * here should be distinct from the user PAT index. Also the
1423 	 * corresponding coherency of the PAT index should be tied to the
1424 	 * allocation type of the page table (or at least we should pick
1425 	 * something which is always safe).
1426 	 */
1427 	if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1428 		pat_index = xe->pat.idx[XE_CACHE_WB];
1429 	else
1430 		pat_index = xe->pat.idx[XE_CACHE_NONE];
1431 
1432 	xe_assert(xe, pat_index <= 3);
1433 
1434 	return pat_index;
1435 }
1436 
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1437 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1438 {
1439 	u64 pde;
1440 
1441 	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1442 	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1443 	pde |= pde_encode_pat_index(pde_pat_index(bo));
1444 
1445 	return pde;
1446 }
1447 
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1448 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1449 			      u16 pat_index, u32 pt_level)
1450 {
1451 	u64 pte;
1452 
1453 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1454 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1455 	pte |= pte_encode_pat_index(pat_index, pt_level);
1456 	pte |= pte_encode_ps(pt_level);
1457 
1458 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1459 		pte |= XE_PPGTT_PTE_DM;
1460 
1461 	return pte;
1462 }
1463 
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1464 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1465 			       u16 pat_index, u32 pt_level)
1466 {
1467 	struct xe_bo *bo = xe_vma_bo(vma);
1468 	struct xe_vm *vm = xe_vma_vm(vma);
1469 
1470 	pte |= XE_PAGE_PRESENT;
1471 
1472 	if (likely(!xe_vma_read_only(vma)))
1473 		pte |= XE_PAGE_RW;
1474 
1475 	pte |= pte_encode_pat_index(pat_index, pt_level);
1476 	pte |= pte_encode_ps(pt_level);
1477 
1478 	/*
1479 	 * NULL PTEs redirect to scratch page (return zeros on read).
1480 	 * Set for: 1) explicit null VMAs, 2) purged BOs on scratch VMs.
1481 	 * Never set NULL flag without scratch page - causes undefined behavior.
1482 	 */
1483 	if (unlikely(xe_vma_is_null(vma) ||
1484 		     (bo && xe_bo_is_purged(bo) && xe_vm_has_scratch(vm))))
1485 		pte |= XE_PTE_NULL;
1486 
1487 	return pte;
1488 }
1489 
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1490 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1491 				u16 pat_index,
1492 				u32 pt_level, bool devmem, u64 flags)
1493 {
1494 	u64 pte;
1495 
1496 	/* Avoid passing random bits directly as flags */
1497 	xe_assert(xe, !(flags & ~XE_PTE_PS64));
1498 
1499 	pte = addr;
1500 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1501 	pte |= pte_encode_pat_index(pat_index, pt_level);
1502 	pte |= pte_encode_ps(pt_level);
1503 
1504 	if (devmem)
1505 		pte |= XE_PPGTT_PTE_DM;
1506 
1507 	pte |= flags;
1508 
1509 	return pte;
1510 }
1511 
1512 static const struct xe_pt_ops xelp_pt_ops = {
1513 	.pte_encode_bo = xelp_pte_encode_bo,
1514 	.pte_encode_vma = xelp_pte_encode_vma,
1515 	.pte_encode_addr = xelp_pte_encode_addr,
1516 	.pde_encode_bo = xelp_pde_encode_bo,
1517 };
1518 
1519 static void vm_destroy_work_func(struct work_struct *w);
1520 
1521 /**
1522  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1523  * given tile and vm.
1524  * @xe: xe device.
1525  * @tile: tile to set up for.
1526  * @vm: vm to set up for.
1527  * @exec: The struct drm_exec object used to lock the vm resv.
1528  *
1529  * Sets up a pagetable tree with one page-table per level and a single
1530  * leaf PTE. All pagetable entries point to the single page-table or,
1531  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1532  * writes become NOPs.
1533  *
1534  * Return: 0 on success, negative error code on error.
1535  */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1536 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1537 				struct xe_vm *vm, struct drm_exec *exec)
1538 {
1539 	u8 id = tile->id;
1540 	int i;
1541 
1542 	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1543 		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1544 		if (IS_ERR(vm->scratch_pt[id][i])) {
1545 			int err = PTR_ERR(vm->scratch_pt[id][i]);
1546 
1547 			vm->scratch_pt[id][i] = NULL;
1548 			return err;
1549 		}
1550 		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1551 	}
1552 
1553 	return 0;
1554 }
1555 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1556 
xe_vm_free_scratch(struct xe_vm * vm)1557 static void xe_vm_free_scratch(struct xe_vm *vm)
1558 {
1559 	struct xe_tile *tile;
1560 	u8 id;
1561 
1562 	if (!xe_vm_has_scratch(vm))
1563 		return;
1564 
1565 	for_each_tile(tile, vm->xe, id) {
1566 		u32 i;
1567 
1568 		if (!vm->pt_root[id])
1569 			continue;
1570 
1571 		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1572 			if (vm->scratch_pt[id][i])
1573 				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1574 	}
1575 }
1576 
xe_vm_pt_destroy(struct xe_vm * vm)1577 static void xe_vm_pt_destroy(struct xe_vm *vm)
1578 {
1579 	struct xe_tile *tile;
1580 	u8 id;
1581 
1582 	xe_vm_assert_held(vm);
1583 
1584 	for_each_tile(tile, vm->xe, id) {
1585 		if (vm->pt_root[id]) {
1586 			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1587 			vm->pt_root[id] = NULL;
1588 		}
1589 	}
1590 }
1591 
xe_vm_init_prove_locking(struct xe_device * xe,struct xe_vm * vm)1592 static void xe_vm_init_prove_locking(struct xe_device *xe, struct xe_vm *vm)
1593 {
1594 	if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
1595 		return;
1596 
1597 	fs_reclaim_acquire(GFP_KERNEL);
1598 	might_lock(&vm->exec_queues.lock);
1599 	fs_reclaim_release(GFP_KERNEL);
1600 
1601 	down_read(&vm->exec_queues.lock);
1602 	might_lock(&xe_root_mmio_gt(xe)->uc.guc.ct.lock);
1603 	up_read(&vm->exec_queues.lock);
1604 }
1605 
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1606 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1607 {
1608 	struct drm_gem_object *vm_resv_obj;
1609 	struct xe_validation_ctx ctx;
1610 	struct drm_exec exec;
1611 	struct xe_vm *vm;
1612 	int err;
1613 	struct xe_tile *tile;
1614 	u8 id;
1615 
1616 	/*
1617 	 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1618 	 * ever be in faulting mode.
1619 	 */
1620 	xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1621 
1622 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1623 	if (!vm)
1624 		return ERR_PTR(-ENOMEM);
1625 
1626 	vm->xe = xe;
1627 
1628 	vm->size = 1ull << xe->info.va_bits;
1629 	vm->flags = flags;
1630 
1631 	if (xef)
1632 		vm->xef = xe_file_get(xef);
1633 	/**
1634 	 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1635 	 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1636 	 * under a user-VM lock when the PXP session is started at exec_queue
1637 	 * creation time. Those are different VMs and therefore there is no risk
1638 	 * of deadlock, but we need to tell lockdep that this is the case or it
1639 	 * will print a warning.
1640 	 */
1641 	if (flags & XE_VM_FLAG_GSC) {
1642 		static struct lock_class_key gsc_vm_key;
1643 
1644 		__init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1645 	} else {
1646 		init_rwsem(&vm->lock);
1647 	}
1648 	mutex_init(&vm->snap_mutex);
1649 
1650 	INIT_LIST_HEAD(&vm->rebind_list);
1651 
1652 	INIT_LIST_HEAD(&vm->userptr.repin_list);
1653 	INIT_LIST_HEAD(&vm->userptr.invalidated);
1654 	spin_lock_init(&vm->userptr.invalidated_lock);
1655 
1656 	INIT_LIST_HEAD(&vm->faults.list);
1657 	spin_lock_init(&vm->faults.lock);
1658 
1659 	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1660 
1661 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1662 
1663 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
1664 	for (id = 0; id < XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE; ++id)
1665 		INIT_LIST_HEAD(&vm->exec_queues.list[id]);
1666 	if (flags & XE_VM_FLAG_FAULT_MODE)
1667 		vm->preempt.min_run_period_ms = xe->min_run_period_pf_ms;
1668 	else
1669 		vm->preempt.min_run_period_ms = xe->min_run_period_lr_ms;
1670 
1671 	init_rwsem(&vm->exec_queues.lock);
1672 	xe_vm_init_prove_locking(xe, vm);
1673 
1674 	for_each_tile(tile, xe, id)
1675 		xe_range_fence_tree_init(&vm->rftree[id]);
1676 
1677 	vm->pt_ops = &xelp_pt_ops;
1678 
1679 	/*
1680 	 * Long-running workloads are not protected by the scheduler references.
1681 	 * By design, run_job for long-running workloads returns NULL and the
1682 	 * scheduler drops all the references of it, hence protecting the VM
1683 	 * for this case is necessary.
1684 	 */
1685 	if (flags & XE_VM_FLAG_LR_MODE) {
1686 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1687 		xe_pm_runtime_get_noresume(xe);
1688 		INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1689 	}
1690 
1691 	err = xe_svm_init(vm);
1692 	if (err)
1693 		goto err_no_resv;
1694 
1695 	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1696 	if (!vm_resv_obj) {
1697 		err = -ENOMEM;
1698 		goto err_svm_fini;
1699 	}
1700 
1701 	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1702 		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1703 
1704 	drm_gem_object_put(vm_resv_obj);
1705 
1706 	err = 0;
1707 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1708 			    err) {
1709 		err = xe_vm_drm_exec_lock(vm, &exec);
1710 		drm_exec_retry_on_contention(&exec);
1711 
1712 		if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1713 			vm->flags |= XE_VM_FLAG_64K;
1714 
1715 		for_each_tile(tile, xe, id) {
1716 			if (flags & XE_VM_FLAG_MIGRATION &&
1717 			    tile->id != XE_VM_FLAG_TILE_ID(flags))
1718 				continue;
1719 
1720 			vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1721 						       &exec);
1722 			if (IS_ERR(vm->pt_root[id])) {
1723 				err = PTR_ERR(vm->pt_root[id]);
1724 				vm->pt_root[id] = NULL;
1725 				xe_vm_pt_destroy(vm);
1726 				drm_exec_retry_on_contention(&exec);
1727 				xe_validation_retry_on_oom(&ctx, &err);
1728 				break;
1729 			}
1730 		}
1731 		if (err)
1732 			break;
1733 
1734 		if (xe_vm_has_scratch(vm)) {
1735 			for_each_tile(tile, xe, id) {
1736 				if (!vm->pt_root[id])
1737 					continue;
1738 
1739 				err = xe_vm_create_scratch(xe, tile, vm, &exec);
1740 				if (err) {
1741 					xe_vm_free_scratch(vm);
1742 					xe_vm_pt_destroy(vm);
1743 					drm_exec_retry_on_contention(&exec);
1744 					xe_validation_retry_on_oom(&ctx, &err);
1745 					break;
1746 				}
1747 			}
1748 			if (err)
1749 				break;
1750 			vm->batch_invalidate_tlb = true;
1751 		}
1752 
1753 		if (vm->flags & XE_VM_FLAG_LR_MODE) {
1754 			INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1755 			vm->batch_invalidate_tlb = false;
1756 		}
1757 
1758 		/* Fill pt_root after allocating scratch tables */
1759 		for_each_tile(tile, xe, id) {
1760 			if (!vm->pt_root[id])
1761 				continue;
1762 
1763 			xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1764 		}
1765 	}
1766 	if (err)
1767 		goto err_close;
1768 
1769 	/* Kernel migration VM shouldn't have a circular loop.. */
1770 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
1771 		for_each_tile(tile, xe, id) {
1772 			struct xe_exec_queue *q;
1773 			u32 create_flags = EXEC_QUEUE_FLAG_VM;
1774 
1775 			if (!vm->pt_root[id])
1776 				continue;
1777 
1778 			if (!xef) /* Not from userspace */
1779 				create_flags |= EXEC_QUEUE_FLAG_KERNEL;
1780 
1781 			q = xe_exec_queue_create_bind(xe, tile, vm, create_flags, 0);
1782 			if (IS_ERR(q)) {
1783 				err = PTR_ERR(q);
1784 				goto err_close;
1785 			}
1786 			vm->q[id] = q;
1787 		}
1788 	}
1789 
1790 	if (xef && xe->info.has_asid) {
1791 		u32 asid;
1792 
1793 		down_write(&xe->usm.lock);
1794 		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1795 				      XA_LIMIT(1, XE_MAX_ASID - 1),
1796 				      &xe->usm.next_asid, GFP_NOWAIT);
1797 		up_write(&xe->usm.lock);
1798 		if (err < 0)
1799 			goto err_close;
1800 
1801 		vm->usm.asid = asid;
1802 	}
1803 
1804 	trace_xe_vm_create(vm);
1805 
1806 	return vm;
1807 
1808 err_close:
1809 	xe_vm_close_and_put(vm);
1810 	return ERR_PTR(err);
1811 
1812 err_svm_fini:
1813 	if (flags & XE_VM_FLAG_FAULT_MODE) {
1814 		vm->size = 0; /* close the vm */
1815 		xe_svm_fini(vm);
1816 	}
1817 err_no_resv:
1818 	mutex_destroy(&vm->snap_mutex);
1819 	for_each_tile(tile, xe, id)
1820 		xe_range_fence_tree_fini(&vm->rftree[id]);
1821 	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1822 	if (vm->xef)
1823 		xe_file_put(vm->xef);
1824 	kfree(vm);
1825 	if (flags & XE_VM_FLAG_LR_MODE)
1826 		xe_pm_runtime_put(xe);
1827 	return ERR_PTR(err);
1828 }
1829 
xe_vm_close(struct xe_vm * vm)1830 static void xe_vm_close(struct xe_vm *vm)
1831 {
1832 	struct xe_device *xe = vm->xe;
1833 	bool bound;
1834 	int idx;
1835 
1836 	bound = drm_dev_enter(&xe->drm, &idx);
1837 
1838 	down_write(&vm->lock);
1839 	if (xe_vm_in_fault_mode(vm))
1840 		xe_svm_notifier_lock(vm);
1841 
1842 	vm->size = 0;
1843 
1844 	if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1845 		struct xe_tile *tile;
1846 		struct xe_gt *gt;
1847 		u8 id;
1848 
1849 		/* Wait for pending binds */
1850 		dma_resv_wait_timeout(xe_vm_resv(vm),
1851 				      DMA_RESV_USAGE_BOOKKEEP,
1852 				      false, MAX_SCHEDULE_TIMEOUT);
1853 
1854 		if (bound) {
1855 			for_each_tile(tile, xe, id)
1856 				if (vm->pt_root[id])
1857 					xe_pt_clear(xe, vm->pt_root[id]);
1858 
1859 			for_each_gt(gt, xe, id)
1860 				xe_tlb_inval_vm(&gt->tlb_inval, vm);
1861 		}
1862 	}
1863 
1864 	if (xe_vm_in_fault_mode(vm))
1865 		xe_svm_notifier_unlock(vm);
1866 	up_write(&vm->lock);
1867 
1868 	if (bound)
1869 		drm_dev_exit(idx);
1870 }
1871 
xe_vm_close_and_put(struct xe_vm * vm)1872 void xe_vm_close_and_put(struct xe_vm *vm)
1873 {
1874 	LIST_HEAD(contested);
1875 	struct xe_device *xe = vm->xe;
1876 	struct xe_tile *tile;
1877 	struct xe_vma *vma, *next_vma;
1878 	struct drm_gpuva *gpuva, *next;
1879 	u8 id;
1880 
1881 	xe_assert(xe, !vm->preempt.num_exec_queues);
1882 
1883 	xe_vm_close(vm);
1884 	if (xe_vm_in_preempt_fence_mode(vm)) {
1885 		mutex_lock(&xe->rebind_resume_lock);
1886 		list_del_init(&vm->preempt.pm_activate_link);
1887 		mutex_unlock(&xe->rebind_resume_lock);
1888 		flush_work(&vm->preempt.rebind_work);
1889 	}
1890 	if (xe_vm_in_fault_mode(vm))
1891 		xe_svm_close(vm);
1892 
1893 	down_write(&vm->lock);
1894 	for_each_tile(tile, xe, id) {
1895 		if (vm->q[id]) {
1896 			int i;
1897 
1898 			xe_exec_queue_last_fence_put(vm->q[id], vm);
1899 			for_each_tlb_inval(i)
1900 				xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
1901 		}
1902 	}
1903 	up_write(&vm->lock);
1904 
1905 	for_each_tile(tile, xe, id) {
1906 		if (vm->q[id]) {
1907 			xe_exec_queue_kill(vm->q[id]);
1908 			xe_exec_queue_put(vm->q[id]);
1909 			vm->q[id] = NULL;
1910 		}
1911 	}
1912 
1913 	down_write(&vm->lock);
1914 	xe_vm_lock(vm, false);
1915 	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1916 		vma = gpuva_to_vma(gpuva);
1917 
1918 		if (xe_vma_has_no_bo(vma)) {
1919 			xe_svm_notifier_lock(vm);
1920 			vma->gpuva.flags |= XE_VMA_DESTROYED;
1921 			xe_svm_notifier_unlock(vm);
1922 		}
1923 
1924 		xe_vm_remove_vma(vm, vma);
1925 
1926 		/* easy case, remove from VMA? */
1927 		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1928 			list_del_init(&vma->combined_links.rebind);
1929 			xe_vma_destroy(vma, NULL);
1930 			continue;
1931 		}
1932 
1933 		list_move_tail(&vma->combined_links.destroy, &contested);
1934 		vma->gpuva.flags |= XE_VMA_DESTROYED;
1935 	}
1936 
1937 	/*
1938 	 * All vm operations will add shared fences to resv.
1939 	 * The only exception is eviction for a shared object,
1940 	 * but even so, the unbind when evicted would still
1941 	 * install a fence to resv. Hence it's safe to
1942 	 * destroy the pagetables immediately.
1943 	 */
1944 	xe_vm_free_scratch(vm);
1945 	xe_vm_pt_destroy(vm);
1946 	xe_vm_unlock(vm);
1947 
1948 	/*
1949 	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1950 	 * Since we hold a refcount to the bo, we can remove and free
1951 	 * the members safely without locking.
1952 	 */
1953 	list_for_each_entry_safe(vma, next_vma, &contested,
1954 				 combined_links.destroy) {
1955 		list_del_init(&vma->combined_links.destroy);
1956 		xe_vma_destroy_unlocked(vma);
1957 	}
1958 
1959 	xe_svm_fini(vm);
1960 
1961 	up_write(&vm->lock);
1962 
1963 	down_write(&xe->usm.lock);
1964 	if (vm->usm.asid) {
1965 		void *lookup;
1966 
1967 		xe_assert(xe, xe->info.has_asid);
1968 		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1969 
1970 		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1971 		xe_assert(xe, lookup == vm);
1972 	}
1973 	up_write(&xe->usm.lock);
1974 
1975 	xe_vm_clear_fault_entries(vm);
1976 
1977 	for_each_tile(tile, xe, id)
1978 		xe_range_fence_tree_fini(&vm->rftree[id]);
1979 
1980 	xe_vm_put(vm);
1981 }
1982 
vm_destroy_work_func(struct work_struct * w)1983 static void vm_destroy_work_func(struct work_struct *w)
1984 {
1985 	struct xe_vm *vm =
1986 		container_of(w, struct xe_vm, destroy_work);
1987 	struct xe_device *xe = vm->xe;
1988 	struct xe_tile *tile;
1989 	u8 id;
1990 
1991 	/* xe_vm_close_and_put was not called? */
1992 	xe_assert(xe, !vm->size);
1993 
1994 	if (xe_vm_in_preempt_fence_mode(vm))
1995 		flush_work(&vm->preempt.rebind_work);
1996 
1997 	mutex_destroy(&vm->snap_mutex);
1998 
1999 	if (vm->flags & XE_VM_FLAG_LR_MODE)
2000 		xe_pm_runtime_put(xe);
2001 
2002 	for_each_tile(tile, xe, id)
2003 		XE_WARN_ON(vm->pt_root[id]);
2004 
2005 	trace_xe_vm_free(vm);
2006 
2007 	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
2008 
2009 	if (vm->xef)
2010 		xe_file_put(vm->xef);
2011 
2012 	kfree(vm);
2013 }
2014 
xe_vm_free(struct drm_gpuvm * gpuvm)2015 static void xe_vm_free(struct drm_gpuvm *gpuvm)
2016 {
2017 	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
2018 
2019 	/* To destroy the VM we need to be able to sleep */
2020 	queue_work(system_dfl_wq, &vm->destroy_work);
2021 }
2022 
xe_vm_lookup(struct xe_file * xef,u32 id)2023 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
2024 {
2025 	struct xe_vm *vm;
2026 
2027 	mutex_lock(&xef->vm.lock);
2028 	vm = xa_load(&xef->vm.xa, id);
2029 	if (vm)
2030 		xe_vm_get(vm);
2031 	mutex_unlock(&xef->vm.lock);
2032 
2033 	return vm;
2034 }
2035 
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)2036 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
2037 {
2038 	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
2039 }
2040 
2041 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)2042 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
2043 {
2044 	return q ? q : vm->q[0];
2045 }
2046 
2047 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)2048 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
2049 {
2050 	unsigned int i;
2051 
2052 	for (i = 0; i < num_syncs; i++) {
2053 		struct xe_sync_entry *e = &syncs[i];
2054 
2055 		if (xe_sync_is_ufence(e))
2056 			return xe_sync_ufence_get(e);
2057 	}
2058 
2059 	return NULL;
2060 }
2061 
2062 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
2063 				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
2064 				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
2065 				    DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2066 
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2067 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
2068 		       struct drm_file *file)
2069 {
2070 	struct xe_device *xe = to_xe_device(dev);
2071 	struct xe_file *xef = to_xe_file(file);
2072 	struct drm_xe_vm_create *args = data;
2073 	struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
2074 	struct xe_vm *vm;
2075 	u32 id;
2076 	int err;
2077 	u32 flags = 0;
2078 
2079 	if (XE_IOCTL_DBG(xe, args->extensions))
2080 		return -EINVAL;
2081 
2082 	if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
2083 		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
2084 
2085 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2086 			 !xe->info.has_usm))
2087 		return -EINVAL;
2088 
2089 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2090 		return -EINVAL;
2091 
2092 	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
2093 		return -EINVAL;
2094 
2095 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
2096 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
2097 			 !xe->info.needs_scratch))
2098 		return -EINVAL;
2099 
2100 	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
2101 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
2102 		return -EINVAL;
2103 
2104 	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
2105 			 args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
2106 		return -EINVAL;
2107 
2108 	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
2109 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
2110 	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
2111 		flags |= XE_VM_FLAG_LR_MODE;
2112 	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
2113 		flags |= XE_VM_FLAG_FAULT_MODE;
2114 	if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
2115 		flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
2116 
2117 	vm = xe_vm_create(xe, flags, xef);
2118 	if (IS_ERR(vm))
2119 		return PTR_ERR(vm);
2120 
2121 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
2122 	/* Warning: Security issue - never enable by default */
2123 	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
2124 #endif
2125 
2126 	/* user id alloc must always be last in ioctl to prevent UAF */
2127 	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
2128 	if (err)
2129 		goto err_close_and_put;
2130 
2131 	args->vm_id = id;
2132 
2133 	return 0;
2134 
2135 err_close_and_put:
2136 	xe_vm_close_and_put(vm);
2137 
2138 	return err;
2139 }
2140 
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2141 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
2142 			struct drm_file *file)
2143 {
2144 	struct xe_device *xe = to_xe_device(dev);
2145 	struct xe_file *xef = to_xe_file(file);
2146 	struct drm_xe_vm_destroy *args = data;
2147 	struct xe_vm *vm;
2148 	int err = 0;
2149 
2150 	if (XE_IOCTL_DBG(xe, args->pad) ||
2151 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2152 		return -EINVAL;
2153 
2154 	mutex_lock(&xef->vm.lock);
2155 	vm = xa_load(&xef->vm.xa, args->vm_id);
2156 	if (XE_IOCTL_DBG(xe, !vm))
2157 		err = -ENOENT;
2158 	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
2159 		err = -EBUSY;
2160 	else
2161 		xa_erase(&xef->vm.xa, args->vm_id);
2162 	mutex_unlock(&xef->vm.lock);
2163 
2164 	if (!err)
2165 		xe_vm_close_and_put(vm);
2166 
2167 	return err;
2168 }
2169 
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)2170 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2171 {
2172 	struct drm_gpuva *gpuva;
2173 	u32 num_vmas = 0;
2174 
2175 	lockdep_assert_held(&vm->lock);
2176 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2177 		num_vmas++;
2178 
2179 	return num_vmas;
2180 }
2181 
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)2182 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
2183 			 u64 end, struct drm_xe_mem_range_attr *attrs)
2184 {
2185 	struct drm_gpuva *gpuva;
2186 	int i = 0;
2187 
2188 	lockdep_assert_held(&vm->lock);
2189 
2190 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2191 		struct xe_vma *vma = gpuva_to_vma(gpuva);
2192 
2193 		if (i == *num_vmas)
2194 			return -ENOSPC;
2195 
2196 		attrs[i].start = xe_vma_start(vma);
2197 		attrs[i].end = xe_vma_end(vma);
2198 		attrs[i].atomic.val = vma->attr.atomic_access;
2199 		attrs[i].pat_index.val = vma->attr.pat_index;
2200 		attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2201 		attrs[i].preferred_mem_loc.migration_policy =
2202 		vma->attr.preferred_loc.migration_policy;
2203 
2204 		i++;
2205 	}
2206 
2207 	*num_vmas = i;
2208 	return 0;
2209 }
2210 
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2211 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2212 {
2213 	struct xe_device *xe = to_xe_device(dev);
2214 	struct xe_file *xef = to_xe_file(file);
2215 	struct drm_xe_mem_range_attr *mem_attrs;
2216 	struct drm_xe_vm_query_mem_range_attr *args = data;
2217 	u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2218 	struct xe_vm *vm;
2219 	int err = 0;
2220 
2221 	if (XE_IOCTL_DBG(xe,
2222 			 ((args->num_mem_ranges == 0 &&
2223 			  (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2224 			 (args->num_mem_ranges > 0 &&
2225 			  (!attrs_user ||
2226 			   args->sizeof_mem_range_attr !=
2227 			   sizeof(struct drm_xe_mem_range_attr))))))
2228 		return -EINVAL;
2229 
2230 	vm = xe_vm_lookup(xef, args->vm_id);
2231 	if (XE_IOCTL_DBG(xe, !vm))
2232 		return -EINVAL;
2233 
2234 	err = down_read_interruptible(&vm->lock);
2235 	if (err)
2236 		goto put_vm;
2237 
2238 	attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2239 
2240 	if (args->num_mem_ranges == 0 && !attrs_user) {
2241 		args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2242 		args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2243 		goto unlock_vm;
2244 	}
2245 
2246 	mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2247 				   GFP_KERNEL | __GFP_ACCOUNT |
2248 				   __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2249 	if (!mem_attrs) {
2250 		err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2251 		goto unlock_vm;
2252 	}
2253 
2254 	memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2255 	err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2256 			    args->start + args->range, mem_attrs);
2257 	if (err)
2258 		goto free_mem_attrs;
2259 
2260 	err = copy_to_user(attrs_user, mem_attrs,
2261 			   args->sizeof_mem_range_attr * args->num_mem_ranges);
2262 	if (err)
2263 		err = -EFAULT;
2264 
2265 free_mem_attrs:
2266 	kvfree(mem_attrs);
2267 unlock_vm:
2268 	up_read(&vm->lock);
2269 put_vm:
2270 	xe_vm_put(vm);
2271 	return err;
2272 }
2273 
vma_matches(struct xe_vma * vma,u64 page_addr)2274 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2275 {
2276 	if (page_addr > xe_vma_end(vma) - 1 ||
2277 	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
2278 		return false;
2279 
2280 	return true;
2281 }
2282 
2283 /**
2284  * xe_vm_find_vma_by_addr() - Find a VMA by its address
2285  *
2286  * @vm: the xe_vm the vma belongs to
2287  * @page_addr: address to look up
2288  */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2289 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2290 {
2291 	struct xe_vma *vma = NULL;
2292 
2293 	if (vm->usm.last_fault_vma) {   /* Fast lookup */
2294 		if (vma_matches(vm->usm.last_fault_vma, page_addr))
2295 			vma = vm->usm.last_fault_vma;
2296 	}
2297 	if (!vma)
2298 		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2299 
2300 	return vma;
2301 }
2302 
2303 static const u32 region_to_mem_type[] = {
2304 	XE_PL_TT,
2305 	XE_PL_VRAM0,
2306 	XE_PL_VRAM1,
2307 };
2308 
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2309 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2310 			     bool post_commit)
2311 {
2312 	xe_svm_notifier_lock(vm);
2313 	vma->gpuva.flags |= XE_VMA_DESTROYED;
2314 	xe_svm_notifier_unlock(vm);
2315 	if (post_commit)
2316 		xe_vm_remove_vma(vm, vma);
2317 }
2318 
2319 #undef ULL
2320 #define ULL	unsigned long long
2321 
2322 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2323 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2324 {
2325 	struct xe_vma *vma;
2326 
2327 	switch (op->op) {
2328 	case DRM_GPUVA_OP_MAP:
2329 		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2330 		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
2331 		break;
2332 	case DRM_GPUVA_OP_REMAP:
2333 		vma = gpuva_to_vma(op->remap.unmap->va);
2334 		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2335 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2336 		       op->remap.unmap->keep ? 1 : 0);
2337 		if (op->remap.prev)
2338 			vm_dbg(&xe->drm,
2339 			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2340 			       (ULL)op->remap.prev->va.addr,
2341 			       (ULL)op->remap.prev->va.range);
2342 		if (op->remap.next)
2343 			vm_dbg(&xe->drm,
2344 			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2345 			       (ULL)op->remap.next->va.addr,
2346 			       (ULL)op->remap.next->va.range);
2347 		break;
2348 	case DRM_GPUVA_OP_UNMAP:
2349 		vma = gpuva_to_vma(op->unmap.va);
2350 		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2351 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2352 		       op->unmap.keep ? 1 : 0);
2353 		break;
2354 	case DRM_GPUVA_OP_PREFETCH:
2355 		vma = gpuva_to_vma(op->prefetch.va);
2356 		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2357 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2358 		break;
2359 	default:
2360 		drm_warn(&xe->drm, "NOT POSSIBLE\n");
2361 	}
2362 }
2363 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2364 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2365 {
2366 }
2367 #endif
2368 
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2369 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2370 {
2371 	if (!xe_vm_in_fault_mode(vm))
2372 		return false;
2373 
2374 	if (!xe_vm_has_scratch(vm))
2375 		return false;
2376 
2377 	if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2378 		return false;
2379 
2380 	return true;
2381 }
2382 
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2383 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2384 {
2385 	struct drm_gpuva_op *__op;
2386 
2387 	drm_gpuva_for_each_op(__op, ops) {
2388 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2389 
2390 		xe_vma_svm_prefetch_op_fini(op);
2391 	}
2392 }
2393 
2394 /*
2395  * Create operations list from IOCTL arguments, setup operations fields so parse
2396  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2397  */
2398 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2399 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2400 			 struct xe_bo *bo, u64 bo_offset_or_userptr,
2401 			 u64 addr, u64 range,
2402 			 u32 operation, u32 flags,
2403 			 u32 prefetch_region, u16 pat_index)
2404 {
2405 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2406 	struct drm_gpuva_ops *ops;
2407 	struct drm_gpuva_op *__op;
2408 	struct drm_gpuvm_bo *vm_bo;
2409 	u64 range_start = addr;
2410 	u64 range_end = addr + range;
2411 	int err;
2412 
2413 	lockdep_assert_held_write(&vm->lock);
2414 
2415 	vm_dbg(&vm->xe->drm,
2416 	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2417 	       operation, (ULL)addr, (ULL)range,
2418 	       (ULL)bo_offset_or_userptr);
2419 
2420 	switch (operation) {
2421 	case DRM_XE_VM_BIND_OP_MAP:
2422 		if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) {
2423 			xe_vm_find_cpu_addr_mirror_vma_range(vm, &range_start, &range_end);
2424 			vops->flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
2425 		}
2426 
2427 		fallthrough;
2428 	case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2429 		struct drm_gpuvm_map_req map_req = {
2430 			.map.va.addr = range_start,
2431 			.map.va.range = range_end - range_start,
2432 			.map.gem.obj = obj,
2433 			.map.gem.offset = bo_offset_or_userptr,
2434 		};
2435 
2436 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2437 		break;
2438 	}
2439 	case DRM_XE_VM_BIND_OP_UNMAP:
2440 		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2441 		break;
2442 	case DRM_XE_VM_BIND_OP_PREFETCH:
2443 		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2444 		break;
2445 	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2446 		xe_assert(vm->xe, bo);
2447 
2448 		err = xe_bo_lock(bo, true);
2449 		if (err)
2450 			return ERR_PTR(err);
2451 
2452 		vm_bo = drm_gpuvm_bo_obtain_locked(&vm->gpuvm, obj);
2453 		if (IS_ERR(vm_bo)) {
2454 			xe_bo_unlock(bo);
2455 			return ERR_CAST(vm_bo);
2456 		}
2457 
2458 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2459 		drm_gpuvm_bo_put(vm_bo);
2460 		xe_bo_unlock(bo);
2461 		break;
2462 	default:
2463 		drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2464 		ops = ERR_PTR(-EINVAL);
2465 	}
2466 	if (IS_ERR(ops))
2467 		return ops;
2468 
2469 	drm_gpuva_for_each_op(__op, ops) {
2470 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2471 
2472 		if (__op->op == DRM_GPUVA_OP_MAP) {
2473 			op->map.immediate =
2474 				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2475 			if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2476 				op->map.vma_flags |= XE_VMA_READ_ONLY;
2477 			if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2478 				op->map.vma_flags |= DRM_GPUVA_SPARSE;
2479 			if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2480 				op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2481 			if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2482 				op->map.vma_flags |= XE_VMA_DUMPABLE;
2483 			if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2484 				op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2485 			op->map.request_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
2486 			op->map.pat_index = pat_index;
2487 			op->map.invalidate_on_bind =
2488 				__xe_vm_needs_clear_scratch_pages(vm, flags);
2489 		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2490 			struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2491 			struct xe_tile *tile;
2492 			struct xe_svm_range *svm_range;
2493 			struct drm_gpusvm_ctx ctx = {};
2494 			struct drm_pagemap *dpagemap = NULL;
2495 			u8 id, tile_mask = 0;
2496 			u32 i;
2497 
2498 			if (!xe_vma_is_cpu_addr_mirror(vma)) {
2499 				op->prefetch.region = prefetch_region;
2500 				break;
2501 			}
2502 
2503 			ctx.read_only = xe_vma_read_only(vma);
2504 			ctx.devmem_possible = IS_DGFX(vm->xe) &&
2505 					      IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2506 
2507 			for_each_tile(tile, vm->xe, id)
2508 				tile_mask |= 0x1 << id;
2509 
2510 			xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2511 			op->prefetch_range.ranges_count = 0;
2512 
2513 			if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2514 				dpagemap = xe_vma_resolve_pagemap(vma,
2515 								  xe_device_get_root_tile(vm->xe));
2516 			} else if (prefetch_region) {
2517 				tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2518 						      XE_PL_VRAM0];
2519 				dpagemap = xe_tile_local_pagemap(tile);
2520 			}
2521 
2522 			op->prefetch_range.dpagemap = dpagemap;
2523 alloc_next_range:
2524 			svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2525 
2526 			if (PTR_ERR(svm_range) == -ENOENT) {
2527 				u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2528 
2529 				addr = ret == ULONG_MAX ? 0 : ret;
2530 				if (addr)
2531 					goto alloc_next_range;
2532 				else
2533 					goto print_op_label;
2534 			}
2535 
2536 			if (IS_ERR(svm_range)) {
2537 				err = PTR_ERR(svm_range);
2538 				goto unwind_prefetch_ops;
2539 			}
2540 
2541 			if (xe_svm_range_validate(vm, svm_range, tile_mask, dpagemap)) {
2542 				xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2543 				goto check_next_range;
2544 			}
2545 
2546 			err = xa_alloc(&op->prefetch_range.range,
2547 				       &i, svm_range, xa_limit_32b,
2548 				       GFP_KERNEL);
2549 
2550 			if (err)
2551 				goto unwind_prefetch_ops;
2552 
2553 			op->prefetch_range.ranges_count++;
2554 			vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2555 			xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2556 check_next_range:
2557 			if (range_end > xe_svm_range_end(svm_range) &&
2558 			    xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2559 				addr = xe_svm_range_end(svm_range);
2560 				goto alloc_next_range;
2561 			}
2562 		}
2563 print_op_label:
2564 		print_op(vm->xe, __op);
2565 	}
2566 
2567 	return ops;
2568 
2569 unwind_prefetch_ops:
2570 	xe_svm_prefetch_gpuva_ops_fini(ops);
2571 	drm_gpuva_ops_free(&vm->gpuvm, ops);
2572 	return ERR_PTR(err);
2573 }
2574 
2575 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2576 
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2577 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2578 			      struct xe_vma_mem_attr *attr, unsigned int flags)
2579 {
2580 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2581 	struct xe_validation_ctx ctx;
2582 	struct drm_exec exec;
2583 	struct xe_vma *vma;
2584 	int err = 0;
2585 
2586 	lockdep_assert_held_write(&vm->lock);
2587 
2588 	if (bo) {
2589 		err = 0;
2590 		xe_validation_guard(&ctx, &vm->xe->val, &exec,
2591 				    (struct xe_val_flags) {.interruptible = true}, err) {
2592 			if (!bo->vm) {
2593 				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2594 				drm_exec_retry_on_contention(&exec);
2595 			}
2596 			if (!err) {
2597 				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2598 				drm_exec_retry_on_contention(&exec);
2599 			}
2600 			if (err)
2601 				return ERR_PTR(err);
2602 
2603 			vma = xe_vma_create(vm, bo, op->gem.offset,
2604 					    op->va.addr, op->va.addr +
2605 					    op->va.range - 1, attr, flags);
2606 			if (IS_ERR(vma))
2607 				return vma;
2608 
2609 			if (!bo->vm) {
2610 				err = add_preempt_fences(vm, bo);
2611 				if (err) {
2612 					prep_vma_destroy(vm, vma, false);
2613 					xe_vma_destroy(vma, NULL);
2614 				}
2615 			}
2616 		}
2617 		if (err)
2618 			return ERR_PTR(err);
2619 	} else {
2620 		vma = xe_vma_create(vm, NULL, op->gem.offset,
2621 				    op->va.addr, op->va.addr +
2622 				    op->va.range - 1, attr, flags);
2623 		if (IS_ERR(vma))
2624 			return vma;
2625 
2626 		if (xe_vma_is_userptr(vma)) {
2627 			err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2628 			/*
2629 			 * -EBUSY has dedicated meaning that a user fence
2630 			 * attached to the VMA is busy, in practice
2631 			 * xe_vma_userptr_pin_pages can only fail with -EBUSY if
2632 			 * we are low on memory so convert this to -ENOMEM.
2633 			 */
2634 			if (err == -EBUSY)
2635 				err = -ENOMEM;
2636 		}
2637 	}
2638 	if (err) {
2639 		prep_vma_destroy(vm, vma, false);
2640 		xe_vma_destroy_unlocked(vma);
2641 		vma = ERR_PTR(err);
2642 	}
2643 
2644 	return vma;
2645 }
2646 
xe_vma_max_pte_size(struct xe_vma * vma)2647 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2648 {
2649 	if (vma->gpuva.flags & XE_VMA_PTE_1G)
2650 		return SZ_1G;
2651 	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2652 		return SZ_2M;
2653 	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2654 		return SZ_64K;
2655 	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2656 		return SZ_4K;
2657 
2658 	return SZ_1G;	/* Uninitialized, used max size */
2659 }
2660 
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2661 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2662 {
2663 	switch (size) {
2664 	case SZ_1G:
2665 		vma->gpuva.flags |= XE_VMA_PTE_1G;
2666 		break;
2667 	case SZ_2M:
2668 		vma->gpuva.flags |= XE_VMA_PTE_2M;
2669 		break;
2670 	case SZ_64K:
2671 		vma->gpuva.flags |= XE_VMA_PTE_64K;
2672 		break;
2673 	case SZ_4K:
2674 		vma->gpuva.flags |= XE_VMA_PTE_4K;
2675 		break;
2676 	}
2677 }
2678 
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2679 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2680 {
2681 	int err = 0;
2682 
2683 	lockdep_assert_held_write(&vm->lock);
2684 
2685 	switch (op->base.op) {
2686 	case DRM_GPUVA_OP_MAP:
2687 		err |= xe_vm_insert_vma(vm, op->map.vma);
2688 		if (!err)
2689 			op->flags |= XE_VMA_OP_COMMITTED;
2690 		break;
2691 	case DRM_GPUVA_OP_REMAP:
2692 	{
2693 		u8 tile_present =
2694 			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2695 
2696 		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2697 				 true);
2698 		op->flags |= XE_VMA_OP_COMMITTED;
2699 
2700 		if (op->remap.prev) {
2701 			err |= xe_vm_insert_vma(vm, op->remap.prev);
2702 			if (!err)
2703 				op->flags |= XE_VMA_OP_PREV_COMMITTED;
2704 			if (!err && op->remap.skip_prev) {
2705 				op->remap.prev->tile_present =
2706 					tile_present;
2707 			}
2708 		}
2709 		if (op->remap.next) {
2710 			err |= xe_vm_insert_vma(vm, op->remap.next);
2711 			if (!err)
2712 				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2713 			if (!err && op->remap.skip_next) {
2714 				op->remap.next->tile_present =
2715 					tile_present;
2716 			}
2717 		}
2718 
2719 		/*
2720 		 * Adjust for partial unbind after removing VMA from VM. In case
2721 		 * of unwind we might need to undo this later.
2722 		 */
2723 		if (!err) {
2724 			op->base.remap.unmap->va->va.addr = op->remap.start;
2725 			op->base.remap.unmap->va->va.range = op->remap.range;
2726 		}
2727 		break;
2728 	}
2729 	case DRM_GPUVA_OP_UNMAP:
2730 		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2731 		op->flags |= XE_VMA_OP_COMMITTED;
2732 		break;
2733 	case DRM_GPUVA_OP_PREFETCH:
2734 		op->flags |= XE_VMA_OP_COMMITTED;
2735 		break;
2736 	default:
2737 		drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2738 	}
2739 
2740 	return err;
2741 }
2742 
2743 /**
2744  * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2745  * @vma: Pointer to the xe_vma structure to check
2746  *
2747  * This function determines whether the given VMA (Virtual Memory Area)
2748  * has its memory attributes set to their default values. Specifically,
2749  * it checks the following conditions:
2750  *
2751  * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2752  * - `pat_index` is equal to `default_pat_index`
2753  * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2754  * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2755  *
2756  * Return: true if all attributes are at their default values, false otherwise.
2757  */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2758 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2759 {
2760 	return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2761 		vma->attr.pat_index ==  vma->attr.default_pat_index &&
2762 		vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2763 		vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2764 }
2765 
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2766 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2767 				   struct xe_vma_ops *vops)
2768 {
2769 	struct xe_device *xe = vm->xe;
2770 	struct drm_gpuva_op *__op;
2771 	struct xe_tile *tile;
2772 	u8 id, tile_mask = 0;
2773 	int err = 0;
2774 
2775 	lockdep_assert_held_write(&vm->lock);
2776 
2777 	for_each_tile(tile, vm->xe, id)
2778 		tile_mask |= 0x1 << id;
2779 
2780 	drm_gpuva_for_each_op(__op, ops) {
2781 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2782 		struct xe_vma *vma;
2783 		unsigned int flags = 0;
2784 
2785 		INIT_LIST_HEAD(&op->link);
2786 		list_add_tail(&op->link, &vops->list);
2787 		op->tile_mask = tile_mask;
2788 
2789 		switch (op->base.op) {
2790 		case DRM_GPUVA_OP_MAP:
2791 		{
2792 			struct xe_vma_mem_attr default_attr = {
2793 				.preferred_loc = {
2794 					.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2795 					.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2796 				},
2797 				.atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2798 				.default_pat_index = op->map.pat_index,
2799 				.pat_index = op->map.pat_index,
2800 				.purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
2801 			};
2802 
2803 			flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2804 
2805 			vma = new_vma(vm, &op->base.map, &default_attr,
2806 				      flags);
2807 			if (IS_ERR(vma))
2808 				return PTR_ERR(vma);
2809 
2810 			op->map.vma = vma;
2811 			if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2812 			     !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2813 			    op->map.invalidate_on_bind)
2814 				xe_vma_ops_incr_pt_update_ops(vops,
2815 							      op->tile_mask, 1);
2816 			break;
2817 		}
2818 		case DRM_GPUVA_OP_REMAP:
2819 		{
2820 			struct xe_vma *old =
2821 				gpuva_to_vma(op->base.remap.unmap->va);
2822 			bool skip = xe_vma_is_cpu_addr_mirror(old);
2823 			u64 start = xe_vma_start(old), end = xe_vma_end(old);
2824 			int num_remap_ops = 0;
2825 
2826 			if (op->base.remap.prev)
2827 				start = op->base.remap.prev->va.addr +
2828 					op->base.remap.prev->va.range;
2829 			if (op->base.remap.next)
2830 				end = op->base.remap.next->va.addr;
2831 
2832 			if (xe_vma_is_cpu_addr_mirror(old) &&
2833 			    xe_svm_has_mapping(vm, start, end)) {
2834 				if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2835 					xe_svm_unmap_address_range(vm, start, end);
2836 				else
2837 					return -EBUSY;
2838 			}
2839 
2840 			op->remap.start = xe_vma_start(old);
2841 			op->remap.range = xe_vma_size(old);
2842 			op->remap.old_start = op->remap.start;
2843 			op->remap.old_range = op->remap.range;
2844 
2845 			flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2846 			if (op->base.remap.prev) {
2847 				vma = new_vma(vm, op->base.remap.prev,
2848 					      &old->attr, flags);
2849 				if (IS_ERR(vma))
2850 					return PTR_ERR(vma);
2851 
2852 				op->remap.prev = vma;
2853 
2854 				/*
2855 				 * Userptr creates a new SG mapping so
2856 				 * we must also rebind.
2857 				 */
2858 				op->remap.skip_prev = skip ||
2859 					(!xe_vma_is_userptr(old) &&
2860 					IS_ALIGNED(xe_vma_end(vma),
2861 						   xe_vma_max_pte_size(old)));
2862 				if (op->remap.skip_prev) {
2863 					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2864 					op->remap.range -=
2865 						xe_vma_end(vma) -
2866 						xe_vma_start(old);
2867 					op->remap.start = xe_vma_end(vma);
2868 					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2869 					       (ULL)op->remap.start,
2870 					       (ULL)op->remap.range);
2871 				} else {
2872 					num_remap_ops++;
2873 				}
2874 			}
2875 
2876 			if (op->base.remap.next) {
2877 				vma = new_vma(vm, op->base.remap.next,
2878 					      &old->attr, flags);
2879 				if (IS_ERR(vma))
2880 					return PTR_ERR(vma);
2881 
2882 				op->remap.next = vma;
2883 
2884 				/*
2885 				 * Userptr creates a new SG mapping so
2886 				 * we must also rebind.
2887 				 */
2888 				op->remap.skip_next = skip ||
2889 					(!xe_vma_is_userptr(old) &&
2890 					IS_ALIGNED(xe_vma_start(vma),
2891 						   xe_vma_max_pte_size(old)));
2892 				if (op->remap.skip_next) {
2893 					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2894 					op->remap.range -=
2895 						xe_vma_end(old) -
2896 						xe_vma_start(vma);
2897 					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2898 					       (ULL)op->remap.start,
2899 					       (ULL)op->remap.range);
2900 				} else {
2901 					num_remap_ops++;
2902 				}
2903 			}
2904 			if (!skip)
2905 				num_remap_ops++;
2906 
2907 			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2908 			break;
2909 		}
2910 		case DRM_GPUVA_OP_UNMAP:
2911 			vma = gpuva_to_vma(op->base.unmap.va);
2912 
2913 			if (xe_vma_is_cpu_addr_mirror(vma) &&
2914 			    xe_svm_has_mapping(vm, xe_vma_start(vma),
2915 					       xe_vma_end(vma)) &&
2916 			    !(vops->flags & XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP))
2917 				return -EBUSY;
2918 
2919 			if (!xe_vma_is_cpu_addr_mirror(vma))
2920 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2921 			break;
2922 		case DRM_GPUVA_OP_PREFETCH:
2923 			vma = gpuva_to_vma(op->base.prefetch.va);
2924 
2925 			if (xe_vma_is_userptr(vma)) {
2926 				err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2927 				if (err)
2928 					return err;
2929 			}
2930 
2931 			if (xe_vma_is_cpu_addr_mirror(vma))
2932 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2933 							      op->prefetch_range.ranges_count);
2934 			else
2935 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2936 
2937 			break;
2938 		default:
2939 			drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
2940 		}
2941 
2942 		err = xe_vma_op_commit(vm, op);
2943 		if (err)
2944 			return err;
2945 	}
2946 
2947 	return 0;
2948 }
2949 
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2950 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2951 			     bool post_commit, bool prev_post_commit,
2952 			     bool next_post_commit)
2953 {
2954 	lockdep_assert_held_write(&vm->lock);
2955 
2956 	switch (op->base.op) {
2957 	case DRM_GPUVA_OP_MAP:
2958 		if (op->map.vma) {
2959 			prep_vma_destroy(vm, op->map.vma, post_commit);
2960 			xe_vma_destroy_unlocked(op->map.vma);
2961 		}
2962 		break;
2963 	case DRM_GPUVA_OP_UNMAP:
2964 	{
2965 		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2966 
2967 		if (vma) {
2968 			xe_svm_notifier_lock(vm);
2969 			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2970 			xe_svm_notifier_unlock(vm);
2971 			if (post_commit)
2972 				xe_vm_insert_vma(vm, vma);
2973 		}
2974 		break;
2975 	}
2976 	case DRM_GPUVA_OP_REMAP:
2977 	{
2978 		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2979 
2980 		if (op->remap.prev) {
2981 			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2982 			xe_vma_destroy_unlocked(op->remap.prev);
2983 		}
2984 		if (op->remap.next) {
2985 			prep_vma_destroy(vm, op->remap.next, next_post_commit);
2986 			xe_vma_destroy_unlocked(op->remap.next);
2987 		}
2988 		if (vma) {
2989 			xe_svm_notifier_lock(vm);
2990 			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2991 			xe_svm_notifier_unlock(vm);
2992 			if (post_commit) {
2993 				/*
2994 				 * Restore the old va range, in case of the
2995 				 * prev/next skip optimisation. Otherwise what
2996 				 * we re-insert here could be smaller than the
2997 				 * original range.
2998 				 */
2999 				op->base.remap.unmap->va->va.addr =
3000 					op->remap.old_start;
3001 				op->base.remap.unmap->va->va.range =
3002 					op->remap.old_range;
3003 				xe_vm_insert_vma(vm, vma);
3004 			}
3005 		}
3006 		break;
3007 	}
3008 	case DRM_GPUVA_OP_PREFETCH:
3009 		/* Nothing to do */
3010 		break;
3011 	default:
3012 		drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3013 	}
3014 }
3015 
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)3016 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
3017 				     struct drm_gpuva_ops **ops,
3018 				     int num_ops_list)
3019 {
3020 	int i;
3021 
3022 	for (i = num_ops_list - 1; i >= 0; --i) {
3023 		struct drm_gpuva_ops *__ops = ops[i];
3024 		struct drm_gpuva_op *__op;
3025 
3026 		if (!__ops)
3027 			continue;
3028 
3029 		drm_gpuva_for_each_op_reverse(__op, __ops) {
3030 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
3031 
3032 			xe_vma_op_unwind(vm, op,
3033 					 op->flags & XE_VMA_OP_COMMITTED,
3034 					 op->flags & XE_VMA_OP_PREV_COMMITTED,
3035 					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
3036 		}
3037 	}
3038 }
3039 
3040 /**
3041  * struct xe_vma_lock_and_validate_flags - Flags for vma_lock_and_validate()
3042  * @res_evict: Allow evicting resources during validation
3043  * @validate: Perform BO validation
3044  * @request_decompress: Request BO decompression
3045  * @check_purged: Reject operation if BO is DONTNEED or PURGED
3046  */
3047 struct xe_vma_lock_and_validate_flags {
3048 	u32 res_evict : 1;
3049 	u32 validate : 1;
3050 	u32 request_decompress : 1;
3051 	u32 check_purged : 1;
3052 };
3053 
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,struct xe_vma_lock_and_validate_flags flags)3054 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
3055 				 struct xe_vma_lock_and_validate_flags flags)
3056 {
3057 	struct xe_bo *bo = xe_vma_bo(vma);
3058 	struct xe_vm *vm = xe_vma_vm(vma);
3059 	bool validate_bo = flags.validate;
3060 	int err = 0;
3061 
3062 	if (bo) {
3063 		if (!bo->vm)
3064 			err = drm_exec_lock_obj(exec, &bo->ttm.base);
3065 
3066 		/* Reject new mappings to DONTNEED/purged BOs; allow cleanup operations */
3067 		if (!err && flags.check_purged) {
3068 			if (xe_bo_madv_is_dontneed(bo))
3069 				err = -EBUSY;  /* BO marked purgeable */
3070 			else if (xe_bo_is_purged(bo))
3071 				err = -EINVAL; /* BO already purged */
3072 		}
3073 
3074 		/* Don't validate the BO for DONTNEED/PURGED remap remnants. */
3075 		if (vma->attr.purgeable_state != XE_MADV_PURGEABLE_WILLNEED)
3076 			validate_bo = false;
3077 
3078 		if (!err && validate_bo)
3079 			err = xe_bo_validate(bo, vm,
3080 					     xe_vm_allow_vm_eviction(vm) &&
3081 					     flags.res_evict, exec);
3082 
3083 		if (err)
3084 			return err;
3085 
3086 		if (flags.request_decompress)
3087 			err = xe_bo_decompress(bo);
3088 	}
3089 
3090 	return err;
3091 }
3092 
check_ufence(struct xe_vma * vma)3093 static int check_ufence(struct xe_vma *vma)
3094 {
3095 	if (vma->ufence) {
3096 		struct xe_user_fence * const f = vma->ufence;
3097 
3098 		if (!xe_sync_ufence_get_status(f))
3099 			return -EBUSY;
3100 
3101 		vma->ufence = NULL;
3102 		xe_sync_ufence_put(f);
3103 	}
3104 
3105 	return 0;
3106 }
3107 
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)3108 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
3109 {
3110 	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
3111 	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3112 	struct drm_pagemap *dpagemap = op->prefetch_range.dpagemap;
3113 	int err = 0;
3114 
3115 	struct xe_svm_range *svm_range;
3116 	struct drm_gpusvm_ctx ctx = {};
3117 	unsigned long i;
3118 
3119 	if (!xe_vma_is_cpu_addr_mirror(vma))
3120 		return 0;
3121 
3122 	ctx.read_only = xe_vma_read_only(vma);
3123 	ctx.devmem_possible = devmem_possible;
3124 	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
3125 	ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
3126 
3127 	/* TODO: Threading the migration */
3128 	xa_for_each(&op->prefetch_range.range, i, svm_range) {
3129 		if (!dpagemap)
3130 			xe_svm_range_migrate_to_smem(vm, svm_range);
3131 
3132 		if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
3133 			drm_dbg(&vm->xe->drm,
3134 				"Prefetch pagemap is %s start 0x%016lx end 0x%016lx\n",
3135 				dpagemap ? dpagemap->drm->unique : "system",
3136 				xe_svm_range_start(svm_range), xe_svm_range_end(svm_range));
3137 		}
3138 
3139 		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, dpagemap)) {
3140 			err = xe_svm_alloc_vram(svm_range, &ctx, dpagemap);
3141 			if (err) {
3142 				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
3143 					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3144 				return -ENODATA;
3145 			}
3146 			xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
3147 		}
3148 
3149 		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
3150 		if (err) {
3151 			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
3152 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
3153 			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
3154 				err = -ENODATA;
3155 			return err;
3156 		}
3157 		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
3158 	}
3159 
3160 	return err;
3161 }
3162 
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)3163 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
3164 			    struct xe_vma_ops *vops, struct xe_vma_op *op)
3165 {
3166 	int err = 0;
3167 	bool res_evict;
3168 
3169 	/*
3170 	 * We only allow evicting a BO within the VM if it is not part of an
3171 	 * array of binds, as an array of binds can evict another BO within the
3172 	 * bind.
3173 	 */
3174 	res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
3175 
3176 	switch (op->base.op) {
3177 	case DRM_GPUVA_OP_MAP:
3178 		if (!op->map.invalidate_on_bind)
3179 			err = vma_lock_and_validate(exec, op->map.vma,
3180 						    (struct xe_vma_lock_and_validate_flags) {
3181 							.res_evict = res_evict,
3182 							.validate = !xe_vm_in_fault_mode(vm) ||
3183 								    op->map.immediate,
3184 							.request_decompress =
3185 							op->map.request_decompress,
3186 							.check_purged = false,
3187 						    });
3188 		break;
3189 	case DRM_GPUVA_OP_REMAP:
3190 		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
3191 		if (err)
3192 			break;
3193 
3194 		err = vma_lock_and_validate(exec,
3195 					    gpuva_to_vma(op->base.remap.unmap->va),
3196 					    (struct xe_vma_lock_and_validate_flags) {
3197 						    .res_evict = res_evict,
3198 						    .validate = false,
3199 						    .request_decompress = false,
3200 						    .check_purged = false,
3201 					    });
3202 		if (!err && op->remap.prev)
3203 			err = vma_lock_and_validate(exec, op->remap.prev,
3204 						    (struct xe_vma_lock_and_validate_flags) {
3205 							    .res_evict = res_evict,
3206 							    .validate = true,
3207 							    .request_decompress = false,
3208 							    .check_purged = false,
3209 						    });
3210 		if (!err && op->remap.next)
3211 			err = vma_lock_and_validate(exec, op->remap.next,
3212 						    (struct xe_vma_lock_and_validate_flags) {
3213 							    .res_evict = res_evict,
3214 							    .validate = true,
3215 							    .request_decompress = false,
3216 							    .check_purged = false,
3217 						    });
3218 		break;
3219 	case DRM_GPUVA_OP_UNMAP:
3220 		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
3221 		if (err)
3222 			break;
3223 
3224 		err = vma_lock_and_validate(exec,
3225 					    gpuva_to_vma(op->base.unmap.va),
3226 					    (struct xe_vma_lock_and_validate_flags) {
3227 						    .res_evict = res_evict,
3228 						    .validate = false,
3229 						    .request_decompress = false,
3230 						    .check_purged = false,
3231 					    });
3232 		break;
3233 	case DRM_GPUVA_OP_PREFETCH:
3234 	{
3235 		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
3236 		u32 region;
3237 
3238 		if (!xe_vma_is_cpu_addr_mirror(vma)) {
3239 			region = op->prefetch.region;
3240 			xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
3241 				  region <= ARRAY_SIZE(region_to_mem_type));
3242 		}
3243 
3244 		/*
3245 		 * PREFETCH is the only op that still gates on BO purge state.
3246 		 * MAP/REMAP handle this inside xe_vma_create() so partial
3247 		 * unbind on a DONTNEED BO still works. PREFETCH skips
3248 		 * xe_vma_create() and would migrate a BO with no backing
3249 		 * store, so reject DONTNEED/PURGED here.
3250 		 */
3251 		err = vma_lock_and_validate(exec,
3252 					    gpuva_to_vma(op->base.prefetch.va),
3253 					    (struct xe_vma_lock_and_validate_flags) {
3254 						    .res_evict = res_evict,
3255 						    .validate = false,
3256 						    .request_decompress = false,
3257 						    .check_purged = true,
3258 					    });
3259 		if (!err && !xe_vma_has_no_bo(vma))
3260 			err = xe_bo_migrate(xe_vma_bo(vma),
3261 					    region_to_mem_type[region],
3262 					    NULL,
3263 					    exec);
3264 		break;
3265 	}
3266 	default:
3267 		drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3268 	}
3269 
3270 	return err;
3271 }
3272 
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)3273 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
3274 {
3275 	struct xe_vma_op *op;
3276 	int err;
3277 
3278 	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3279 		return 0;
3280 
3281 	list_for_each_entry(op, &vops->list, link) {
3282 		if (op->base.op  == DRM_GPUVA_OP_PREFETCH) {
3283 			err = prefetch_ranges(vm, op);
3284 			if (err)
3285 				return err;
3286 		}
3287 	}
3288 
3289 	return 0;
3290 }
3291 
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)3292 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3293 					   struct xe_vm *vm,
3294 					   struct xe_vma_ops *vops)
3295 {
3296 	struct xe_vma_op *op;
3297 	int err;
3298 
3299 	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
3300 	if (err)
3301 		return err;
3302 
3303 	list_for_each_entry(op, &vops->list, link) {
3304 		err = op_lock_and_prep(exec, vm, vops, op);
3305 		if (err)
3306 			return err;
3307 	}
3308 
3309 #ifdef TEST_VM_OPS_ERROR
3310 	if (vops->inject_error &&
3311 	    vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3312 		return -ENOSPC;
3313 #endif
3314 
3315 	return 0;
3316 }
3317 
op_trace(struct xe_vma_op * op)3318 static void op_trace(struct xe_vma_op *op)
3319 {
3320 	switch (op->base.op) {
3321 	case DRM_GPUVA_OP_MAP:
3322 		trace_xe_vma_bind(op->map.vma);
3323 		break;
3324 	case DRM_GPUVA_OP_REMAP:
3325 		trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3326 		if (op->remap.prev)
3327 			trace_xe_vma_bind(op->remap.prev);
3328 		if (op->remap.next)
3329 			trace_xe_vma_bind(op->remap.next);
3330 		break;
3331 	case DRM_GPUVA_OP_UNMAP:
3332 		trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3333 		break;
3334 	case DRM_GPUVA_OP_PREFETCH:
3335 		trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3336 		break;
3337 	case DRM_GPUVA_OP_DRIVER:
3338 		break;
3339 	default:
3340 		XE_WARN_ON("NOT POSSIBLE");
3341 	}
3342 }
3343 
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3344 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3345 {
3346 	struct xe_vma_op *op;
3347 
3348 	list_for_each_entry(op, &vops->list, link)
3349 		op_trace(op);
3350 }
3351 
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3352 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3353 {
3354 	struct xe_exec_queue *q = vops->q;
3355 	struct xe_tile *tile;
3356 	int number_tiles = 0;
3357 	u8 id;
3358 
3359 	for_each_tile(tile, vm->xe, id) {
3360 		if (vops->pt_update_ops[id].num_ops)
3361 			++number_tiles;
3362 
3363 		if (vops->pt_update_ops[id].q)
3364 			continue;
3365 
3366 		if (q) {
3367 			vops->pt_update_ops[id].q = q;
3368 			if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3369 				q = list_next_entry(q, multi_gt_list);
3370 		} else {
3371 			vops->pt_update_ops[id].q = vm->q[id];
3372 		}
3373 	}
3374 
3375 	return number_tiles;
3376 }
3377 
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3378 static struct dma_fence *ops_execute(struct xe_vm *vm,
3379 				     struct xe_vma_ops *vops)
3380 {
3381 	struct xe_tile *tile;
3382 	struct dma_fence *fence = NULL;
3383 	struct dma_fence **fences = NULL;
3384 	struct dma_fence_array *cf = NULL;
3385 	int number_tiles = 0, current_fence = 0, n_fence = 0, err, i;
3386 	u8 id;
3387 
3388 	number_tiles = vm_ops_setup_tile_args(vm, vops);
3389 	if (number_tiles == 0)
3390 		return ERR_PTR(-ENODATA);
3391 
3392 	for_each_tile(tile, vm->xe, id) {
3393 		++n_fence;
3394 
3395 		if (!(vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT))
3396 			for_each_tlb_inval(i)
3397 				++n_fence;
3398 	}
3399 
3400 	fences = kmalloc_objs(*fences, n_fence);
3401 	if (!fences) {
3402 		fence = ERR_PTR(-ENOMEM);
3403 		goto err_trace;
3404 	}
3405 
3406 	cf = dma_fence_array_alloc(n_fence);
3407 	if (!cf) {
3408 		fence = ERR_PTR(-ENOMEM);
3409 		goto err_out;
3410 	}
3411 
3412 	for_each_tile(tile, vm->xe, id) {
3413 		if (!vops->pt_update_ops[id].num_ops)
3414 			continue;
3415 
3416 		err = xe_pt_update_ops_prepare(tile, vops);
3417 		if (err) {
3418 			fence = ERR_PTR(err);
3419 			goto err_out;
3420 		}
3421 	}
3422 
3423 	trace_xe_vm_ops_execute(vops);
3424 
3425 	for_each_tile(tile, vm->xe, id) {
3426 		struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3427 
3428 		fence = NULL;
3429 		if (!vops->pt_update_ops[id].num_ops)
3430 			goto collect_fences;
3431 
3432 		fence = xe_pt_update_ops_run(tile, vops);
3433 		if (IS_ERR(fence))
3434 			goto err_out;
3435 
3436 collect_fences:
3437 		fences[current_fence++] = fence ?: dma_fence_get_stub();
3438 		if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3439 			continue;
3440 
3441 		xe_migrate_job_lock(tile->migrate, q);
3442 		for_each_tlb_inval(i)
3443 			fences[current_fence++] =
3444 				xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
3445 		xe_migrate_job_unlock(tile->migrate, q);
3446 	}
3447 
3448 	xe_assert(vm->xe, current_fence == n_fence);
3449 	dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
3450 			     1, false);
3451 	fence = &cf->base;
3452 
3453 	for_each_tile(tile, vm->xe, id) {
3454 		if (!vops->pt_update_ops[id].num_ops)
3455 			continue;
3456 
3457 		xe_pt_update_ops_fini(tile, vops);
3458 	}
3459 
3460 	return fence;
3461 
3462 err_out:
3463 	for_each_tile(tile, vm->xe, id) {
3464 		if (!vops->pt_update_ops[id].num_ops)
3465 			continue;
3466 
3467 		xe_pt_update_ops_abort(tile, vops);
3468 	}
3469 	while (current_fence)
3470 		dma_fence_put(fences[--current_fence]);
3471 	kfree(fences);
3472 	kfree(cf);
3473 
3474 err_trace:
3475 	trace_xe_vm_ops_fail(vm);
3476 	return fence;
3477 }
3478 
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3479 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3480 {
3481 	if (vma->ufence)
3482 		xe_sync_ufence_put(vma->ufence);
3483 	vma->ufence = __xe_sync_ufence_get(ufence);
3484 }
3485 
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3486 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3487 			  struct xe_user_fence *ufence)
3488 {
3489 	switch (op->base.op) {
3490 	case DRM_GPUVA_OP_MAP:
3491 		if (!xe_vma_is_cpu_addr_mirror(op->map.vma))
3492 			vma_add_ufence(op->map.vma, ufence);
3493 		break;
3494 	case DRM_GPUVA_OP_REMAP:
3495 		if (op->remap.prev)
3496 			vma_add_ufence(op->remap.prev, ufence);
3497 		if (op->remap.next)
3498 			vma_add_ufence(op->remap.next, ufence);
3499 		break;
3500 	case DRM_GPUVA_OP_UNMAP:
3501 		break;
3502 	case DRM_GPUVA_OP_PREFETCH:
3503 		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3504 		break;
3505 	default:
3506 		drm_warn(&vm->xe->drm, "NOT POSSIBLE\n");
3507 	}
3508 }
3509 
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3510 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3511 				   struct dma_fence *fence)
3512 {
3513 	struct xe_user_fence *ufence;
3514 	struct xe_vma_op *op;
3515 	int i;
3516 
3517 	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3518 	list_for_each_entry(op, &vops->list, link) {
3519 		if (ufence)
3520 			op_add_ufence(vm, op, ufence);
3521 
3522 		if (op->base.op == DRM_GPUVA_OP_UNMAP)
3523 			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3524 		else if (op->base.op == DRM_GPUVA_OP_REMAP)
3525 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3526 				       fence);
3527 	}
3528 	if (ufence)
3529 		xe_sync_ufence_put(ufence);
3530 	if (fence) {
3531 		for (i = 0; i < vops->num_syncs; i++)
3532 			xe_sync_entry_signal(vops->syncs + i, fence);
3533 	}
3534 }
3535 
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3536 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3537 						   struct xe_vma_ops *vops)
3538 {
3539 	struct xe_validation_ctx ctx;
3540 	struct drm_exec exec;
3541 	struct dma_fence *fence;
3542 	int err = 0;
3543 
3544 	lockdep_assert_held_write(&vm->lock);
3545 
3546 	xe_validation_guard(&ctx, &vm->xe->val, &exec,
3547 			    ((struct xe_val_flags) {
3548 				    .interruptible = true,
3549 				    .exec_ignore_duplicates = true,
3550 			    }), err) {
3551 		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3552 		drm_exec_retry_on_contention(&exec);
3553 		xe_validation_retry_on_oom(&ctx, &err);
3554 		if (err)
3555 			return ERR_PTR(err);
3556 
3557 		xe_vm_set_validation_exec(vm, &exec);
3558 		fence = ops_execute(vm, vops);
3559 		xe_vm_set_validation_exec(vm, NULL);
3560 		if (IS_ERR(fence)) {
3561 			if (PTR_ERR(fence) == -ENODATA)
3562 				vm_bind_ioctl_ops_fini(vm, vops, NULL);
3563 			return fence;
3564 		}
3565 
3566 		vm_bind_ioctl_ops_fini(vm, vops, fence);
3567 	}
3568 
3569 	return err ? ERR_PTR(err) : fence;
3570 }
3571 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3572 
3573 #define SUPPORTED_FLAGS_STUB  \
3574 	(DRM_XE_VM_BIND_FLAG_READONLY | \
3575 	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3576 	 DRM_XE_VM_BIND_FLAG_NULL | \
3577 	 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3578 	 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3579 	 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3580 	 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET | \
3581 	 DRM_XE_VM_BIND_FLAG_DECOMPRESS)
3582 
3583 #ifdef TEST_VM_OPS_ERROR
3584 #define SUPPORTED_FLAGS	(SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3585 #else
3586 #define SUPPORTED_FLAGS	SUPPORTED_FLAGS_STUB
3587 #endif
3588 
3589 #define XE_64K_PAGE_MASK 0xffffull
3590 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3591 
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3592 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3593 				    struct drm_xe_vm_bind *args,
3594 				    struct drm_xe_vm_bind_op **bind_ops)
3595 {
3596 	int err;
3597 	int i;
3598 
3599 	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3600 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3601 		return -EINVAL;
3602 
3603 	if (XE_IOCTL_DBG(xe, args->extensions))
3604 		return -EINVAL;
3605 
3606 	if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3607 		return -EINVAL;
3608 
3609 	if (args->num_binds > 1) {
3610 		u64 __user *bind_user =
3611 			u64_to_user_ptr(args->vector_of_binds);
3612 
3613 		*bind_ops = kvmalloc_objs(struct drm_xe_vm_bind_op,
3614 					  args->num_binds,
3615 					  GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3616 		if (!*bind_ops)
3617 			return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3618 
3619 		err = copy_from_user(*bind_ops, bind_user,
3620 				     sizeof(struct drm_xe_vm_bind_op) *
3621 				     args->num_binds);
3622 		if (XE_IOCTL_DBG(xe, err)) {
3623 			err = -EFAULT;
3624 			goto free_bind_ops;
3625 		}
3626 	} else {
3627 		*bind_ops = &args->bind;
3628 	}
3629 
3630 	for (i = 0; i < args->num_binds; ++i) {
3631 		u64 range = (*bind_ops)[i].range;
3632 		u64 addr = (*bind_ops)[i].addr;
3633 		u32 op = (*bind_ops)[i].op;
3634 		u32 flags = (*bind_ops)[i].flags;
3635 		u32 obj = (*bind_ops)[i].obj;
3636 		u64 obj_offset = (*bind_ops)[i].obj_offset;
3637 		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3638 		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3639 		bool is_cpu_addr_mirror = flags &
3640 			DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3641 		bool is_decompress = flags & DRM_XE_VM_BIND_FLAG_DECOMPRESS;
3642 		u16 pat_index = (*bind_ops)[i].pat_index;
3643 		u16 coh_mode;
3644 		bool comp_en;
3645 
3646 		if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3647 				 (!xe_vm_in_fault_mode(vm) ||
3648 				 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3649 			err = -EINVAL;
3650 			goto free_bind_ops;
3651 		}
3652 
3653 		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3654 			err = -EINVAL;
3655 			goto free_bind_ops;
3656 		}
3657 
3658 		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3659 		(*bind_ops)[i].pat_index = pat_index;
3660 		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3661 		comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3662 		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3663 			err = -EINVAL;
3664 			goto free_bind_ops;
3665 		}
3666 
3667 		if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) {
3668 			err = -EINVAL;
3669 			goto free_bind_ops;
3670 		}
3671 
3672 		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3673 		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3674 		    XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3675 		    XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3676 						    is_cpu_addr_mirror)) ||
3677 		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3678 				 (is_decompress || is_null || is_cpu_addr_mirror)) ||
3679 		    XE_IOCTL_DBG(xe, is_decompress &&
3680 				 xe_pat_index_get_comp_en(xe, pat_index)) ||
3681 		    XE_IOCTL_DBG(xe, !obj &&
3682 				 op == DRM_XE_VM_BIND_OP_MAP &&
3683 				 !is_null && !is_cpu_addr_mirror) ||
3684 		    XE_IOCTL_DBG(xe, !obj &&
3685 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3686 		    XE_IOCTL_DBG(xe, addr &&
3687 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3688 		    XE_IOCTL_DBG(xe, range &&
3689 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3690 		    XE_IOCTL_DBG(xe, obj &&
3691 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3692 		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3693 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3694 		    XE_IOCTL_DBG(xe, !IS_DGFX(xe) && coh_mode == XE_COH_NONE &&
3695 				 is_cpu_addr_mirror) ||
3696 		    XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
3697 				 (op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
3698 				  is_cpu_addr_mirror) &&
3699 				 (pat_index != 19 && coh_mode != XE_COH_2WAY)) ||
3700 		    XE_IOCTL_DBG(xe, comp_en &&
3701 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3702 		    XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3703 				 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3704 		    XE_IOCTL_DBG(xe, obj &&
3705 				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3706 		    XE_IOCTL_DBG(xe, prefetch_region &&
3707 				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3708 		    XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3709 				      /* Guard against undefined shift in BIT(prefetch_region) */
3710 				      (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
3711 				      !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
3712 		    XE_IOCTL_DBG(xe, obj &&
3713 				 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3714 		    XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3715 				 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3716 			err = -EINVAL;
3717 			goto free_bind_ops;
3718 		}
3719 
3720 		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3721 		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3722 		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3723 		    XE_IOCTL_DBG(xe, !range &&
3724 				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3725 			err = -EINVAL;
3726 			goto free_bind_ops;
3727 		}
3728 
3729 		if (is_decompress && (XE_IOCTL_DBG(xe, !xe_device_has_flat_ccs(xe)) ||
3730 				      XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20) ||
3731 				      XE_IOCTL_DBG(xe, !IS_DGFX(xe)))) {
3732 			err = -EOPNOTSUPP;
3733 			goto free_bind_ops;
3734 		}
3735 	}
3736 
3737 	return 0;
3738 
3739 free_bind_ops:
3740 	if (args->num_binds > 1)
3741 		kvfree(*bind_ops);
3742 	*bind_ops = NULL;
3743 	return err;
3744 }
3745 
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3746 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3747 				       struct xe_exec_queue *q,
3748 				       struct xe_sync_entry *syncs,
3749 				       int num_syncs)
3750 {
3751 	struct dma_fence *fence = NULL;
3752 	int i, err = 0;
3753 
3754 	if (num_syncs) {
3755 		fence = xe_sync_in_fence_get(syncs, num_syncs,
3756 					     to_wait_exec_queue(vm, q), vm);
3757 		if (IS_ERR(fence))
3758 			return PTR_ERR(fence);
3759 
3760 		for (i = 0; i < num_syncs; i++)
3761 			xe_sync_entry_signal(&syncs[i], fence);
3762 	}
3763 
3764 	dma_fence_put(fence);
3765 
3766 	return err;
3767 }
3768 
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3769 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3770 			    struct xe_exec_queue *q,
3771 			    struct xe_sync_entry *syncs, u32 num_syncs)
3772 {
3773 	memset(vops, 0, sizeof(*vops));
3774 	INIT_LIST_HEAD(&vops->list);
3775 	vops->vm = vm;
3776 	vops->q = q;
3777 	vops->syncs = syncs;
3778 	vops->num_syncs = num_syncs;
3779 	vops->flags = 0;
3780 }
3781 
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3782 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3783 					u64 addr, u64 range, u64 obj_offset,
3784 					u16 pat_index, u32 op, u32 bind_flags)
3785 {
3786 	u16 coh_mode;
3787 	bool comp_en;
3788 
3789 	if (XE_IOCTL_DBG(xe, (bo->flags & XE_BO_FLAG_NO_COMPRESSION) &&
3790 			 xe_pat_index_get_comp_en(xe, pat_index)))
3791 		return -EINVAL;
3792 
3793 	if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3794 	    XE_IOCTL_DBG(xe, obj_offset >
3795 			 xe_bo_size(bo) - range)) {
3796 		return -EINVAL;
3797 	}
3798 
3799 	/*
3800 	 * Some platforms require 64k VM_BIND alignment,
3801 	 * specifically those with XE_VRAM_FLAGS_NEED64K.
3802 	 *
3803 	 * Other platforms may have BO's set to 64k physical placement,
3804 	 * but can be mapped at 4k offsets anyway. This check is only
3805 	 * there for the former case.
3806 	 */
3807 	if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3808 	    (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3809 		if (XE_IOCTL_DBG(xe, obj_offset &
3810 				 XE_64K_PAGE_MASK) ||
3811 		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3812 		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3813 			return -EINVAL;
3814 		}
3815 	}
3816 
3817 	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3818 	if (bo->cpu_caching) {
3819 		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3820 				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3821 			return -EINVAL;
3822 		}
3823 	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3824 		/*
3825 		 * Imported dma-buf from a different device should
3826 		 * require 1way or 2way coherency since we don't know
3827 		 * how it was mapped on the CPU. Just assume is it
3828 		 * potentially cached on CPU side.
3829 		 */
3830 		return -EINVAL;
3831 	}
3832 
3833 	/*
3834 	 * Ensures that imported buffer objects (dma-bufs) are not mapped
3835 	 * with a PAT index that enables compression.
3836 	 */
3837 	comp_en = xe_pat_index_get_comp_en(xe, pat_index);
3838 	if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
3839 		return -EINVAL;
3840 
3841 	if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) &&
3842 			 (pat_index != 19 && coh_mode != XE_COH_2WAY)))
3843 		return -EINVAL;
3844 
3845 	/* If a BO is protected it can only be mapped if the key is still valid */
3846 	if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3847 	    op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3848 		if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3849 			return -ENOEXEC;
3850 
3851 	return 0;
3852 }
3853 
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3854 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3855 {
3856 	struct xe_device *xe = to_xe_device(dev);
3857 	struct xe_file *xef = to_xe_file(file);
3858 	struct drm_xe_vm_bind *args = data;
3859 	struct drm_xe_sync __user *syncs_user;
3860 	struct xe_bo **bos = NULL;
3861 	struct drm_gpuva_ops **ops = NULL;
3862 	struct xe_vm *vm;
3863 	struct xe_exec_queue *q = NULL;
3864 	u32 num_syncs, num_ufence = 0;
3865 	struct xe_sync_entry *syncs = NULL;
3866 	struct drm_xe_vm_bind_op *bind_ops = NULL;
3867 	struct xe_vma_ops vops;
3868 	struct dma_fence *fence;
3869 	int err;
3870 	int i;
3871 
3872 	vm = xe_vm_lookup(xef, args->vm_id);
3873 	if (XE_IOCTL_DBG(xe, !vm))
3874 		return -EINVAL;
3875 
3876 	err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3877 	if (err)
3878 		goto put_vm;
3879 
3880 	if (args->exec_queue_id) {
3881 		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3882 		if (XE_IOCTL_DBG(xe, !q)) {
3883 			err = -ENOENT;
3884 			goto free_bind_ops;
3885 		}
3886 
3887 		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3888 			err = -EINVAL;
3889 			goto put_exec_queue;
3890 		}
3891 	}
3892 
3893 	if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
3894 		err = -EINVAL;
3895 		goto put_exec_queue;
3896 	}
3897 
3898 	/* Ensure all UNMAPs visible */
3899 	xe_svm_flush(vm);
3900 
3901 	err = down_write_killable(&vm->lock);
3902 	if (err)
3903 		goto put_exec_queue;
3904 
3905 	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3906 		err = -ENOENT;
3907 		goto release_vm_lock;
3908 	}
3909 
3910 	for (i = 0; i < args->num_binds; ++i) {
3911 		u64 range = bind_ops[i].range;
3912 		u64 addr = bind_ops[i].addr;
3913 
3914 		if (XE_IOCTL_DBG(xe, range > vm->size) ||
3915 		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3916 			err = -EINVAL;
3917 			goto release_vm_lock;
3918 		}
3919 	}
3920 
3921 	if (args->num_binds) {
3922 		bos = kvzalloc_objs(*bos, args->num_binds,
3923 				    GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3924 		if (!bos) {
3925 			err = -ENOMEM;
3926 			goto release_vm_lock;
3927 		}
3928 
3929 		ops = kvzalloc_objs(*ops, args->num_binds,
3930 				    GFP_KERNEL | __GFP_ACCOUNT | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3931 		if (!ops) {
3932 			err = -ENOMEM;
3933 			goto free_bos;
3934 		}
3935 	}
3936 
3937 	for (i = 0; i < args->num_binds; ++i) {
3938 		struct drm_gem_object *gem_obj;
3939 		u64 range = bind_ops[i].range;
3940 		u64 addr = bind_ops[i].addr;
3941 		u32 obj = bind_ops[i].obj;
3942 		u64 obj_offset = bind_ops[i].obj_offset;
3943 		u16 pat_index = bind_ops[i].pat_index;
3944 		u32 op = bind_ops[i].op;
3945 		u32 bind_flags = bind_ops[i].flags;
3946 
3947 		if (!obj)
3948 			continue;
3949 
3950 		gem_obj = drm_gem_object_lookup(file, obj);
3951 		if (XE_IOCTL_DBG(xe, !gem_obj)) {
3952 			err = -ENOENT;
3953 			goto put_obj;
3954 		}
3955 		bos[i] = gem_to_xe_bo(gem_obj);
3956 
3957 		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3958 						   obj_offset, pat_index, op,
3959 						   bind_flags);
3960 		if (err)
3961 			goto put_obj;
3962 	}
3963 
3964 	if (args->num_syncs) {
3965 		syncs = kzalloc_objs(*syncs, args->num_syncs);
3966 		if (!syncs) {
3967 			err = -ENOMEM;
3968 			goto put_obj;
3969 		}
3970 	}
3971 
3972 	syncs_user = u64_to_user_ptr(args->syncs);
3973 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3974 		struct xe_exec_queue *__q = q ?: vm->q[0];
3975 
3976 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3977 					  &syncs_user[num_syncs],
3978 					  __q->ufence_syncobj,
3979 					  ++__q->ufence_timeline_value,
3980 					  (xe_vm_in_lr_mode(vm) ?
3981 					   SYNC_PARSE_FLAG_LR_MODE : 0) |
3982 					  (!args->num_binds ?
3983 					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3984 		if (err)
3985 			goto free_syncs;
3986 
3987 		if (xe_sync_is_ufence(&syncs[num_syncs]))
3988 			num_ufence++;
3989 	}
3990 
3991 	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3992 		err = -EINVAL;
3993 		goto free_syncs;
3994 	}
3995 
3996 	if (!args->num_binds) {
3997 		err = -ENODATA;
3998 		goto free_syncs;
3999 	}
4000 
4001 	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
4002 	if (args->num_binds > 1)
4003 		vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
4004 	for (i = 0; i < args->num_binds; ++i) {
4005 		u64 range = bind_ops[i].range;
4006 		u64 addr = bind_ops[i].addr;
4007 		u32 op = bind_ops[i].op;
4008 		u32 flags = bind_ops[i].flags;
4009 		u64 obj_offset = bind_ops[i].obj_offset;
4010 		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
4011 		u16 pat_index = bind_ops[i].pat_index;
4012 
4013 		ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
4014 						  addr, range, op, flags,
4015 						  prefetch_region, pat_index);
4016 		if (IS_ERR(ops[i])) {
4017 			err = PTR_ERR(ops[i]);
4018 			ops[i] = NULL;
4019 			goto unwind_ops;
4020 		}
4021 
4022 		err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
4023 		if (err)
4024 			goto unwind_ops;
4025 
4026 #ifdef TEST_VM_OPS_ERROR
4027 		if (flags & FORCE_OP_ERROR) {
4028 			vops.inject_error = true;
4029 			vm->xe->vm_inject_error_position =
4030 				(vm->xe->vm_inject_error_position + 1) %
4031 				FORCE_OP_ERROR_COUNT;
4032 		}
4033 #endif
4034 	}
4035 
4036 	/* Nothing to do */
4037 	if (list_empty(&vops.list)) {
4038 		err = -ENODATA;
4039 		goto unwind_ops;
4040 	}
4041 
4042 	err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
4043 	if (err)
4044 		goto unwind_ops;
4045 
4046 	err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
4047 	if (err)
4048 		goto unwind_ops;
4049 
4050 	fence = vm_bind_ioctl_ops_execute(vm, &vops);
4051 	if (IS_ERR(fence))
4052 		err = PTR_ERR(fence);
4053 	else
4054 		dma_fence_put(fence);
4055 
4056 unwind_ops:
4057 	if (err && err != -ENODATA)
4058 		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
4059 	xe_vma_ops_fini(&vops);
4060 	for (i = args->num_binds - 1; i >= 0; --i)
4061 		if (ops[i])
4062 			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
4063 free_syncs:
4064 	if (err == -ENODATA)
4065 		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
4066 	while (num_syncs--)
4067 		xe_sync_entry_cleanup(&syncs[num_syncs]);
4068 
4069 	kfree(syncs);
4070 put_obj:
4071 	for (i = 0; i < args->num_binds; ++i)
4072 		xe_bo_put(bos[i]);
4073 
4074 	kvfree(ops);
4075 free_bos:
4076 	kvfree(bos);
4077 release_vm_lock:
4078 	up_write(&vm->lock);
4079 put_exec_queue:
4080 	if (q)
4081 		xe_exec_queue_put(q);
4082 free_bind_ops:
4083 	if (args->num_binds > 1)
4084 		kvfree(bind_ops);
4085 put_vm:
4086 	xe_vm_put(vm);
4087 	return err;
4088 }
4089 
4090 /*
4091  * Map access type, fault type, and fault level from current bspec
4092  * specification to user spec abstraction.  The current mapping is
4093  * approximately 1-to-1, with access type being the only notable
4094  * exception as it carries additional data with respect to prefetch
4095  * status that needs to be masked out.
4096  */
xe_to_user_access_type(u8 access_type)4097 static u8 xe_to_user_access_type(u8 access_type)
4098 {
4099 	return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK;
4100 }
4101 
xe_to_user_fault_type(u8 fault_type)4102 static u8 xe_to_user_fault_type(u8 fault_type)
4103 {
4104 	return fault_type;
4105 }
4106 
xe_to_user_fault_level(u8 fault_level)4107 static u8 xe_to_user_fault_level(u8 fault_level)
4108 {
4109 	return fault_level;
4110 }
4111 
fill_faults(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4112 static int fill_faults(struct xe_vm *vm,
4113 		       struct drm_xe_vm_get_property *args)
4114 {
4115 	struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data);
4116 	struct xe_vm_fault *fault_list, fault_entry = { 0 };
4117 	struct xe_vm_fault_entry *entry;
4118 	int ret = 0, i = 0, count, entry_size;
4119 
4120 	entry_size = sizeof(struct xe_vm_fault);
4121 	count = args->size / entry_size;
4122 
4123 	fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL);
4124 	if (!fault_list)
4125 		return -ENOMEM;
4126 
4127 	spin_lock(&vm->faults.lock);
4128 	list_for_each_entry(entry, &vm->faults.list, list) {
4129 		if (i == count)
4130 			break;
4131 
4132 		fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address);
4133 		fault_entry.address_precision = entry->address_precision;
4134 
4135 		fault_entry.access_type = xe_to_user_access_type(entry->access_type);
4136 		fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type);
4137 		fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level);
4138 
4139 		memcpy(&fault_list[i], &fault_entry, entry_size);
4140 
4141 		i++;
4142 	}
4143 	spin_unlock(&vm->faults.lock);
4144 
4145 	ret = copy_to_user(usr_ptr, fault_list, args->size);
4146 
4147 	kfree(fault_list);
4148 	return ret ? -EFAULT : 0;
4149 }
4150 
xe_vm_get_property_helper(struct xe_vm * vm,struct drm_xe_vm_get_property * args)4151 static int xe_vm_get_property_helper(struct xe_vm *vm,
4152 				     struct drm_xe_vm_get_property *args)
4153 {
4154 	size_t size;
4155 
4156 	switch (args->property) {
4157 	case DRM_XE_VM_GET_PROPERTY_FAULTS:
4158 		spin_lock(&vm->faults.lock);
4159 		size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len);
4160 		spin_unlock(&vm->faults.lock);
4161 
4162 		if (!args->size) {
4163 			args->size = size;
4164 			return 0;
4165 		}
4166 
4167 		/*
4168 		 * Number of faults may increase between calls to
4169 		 * xe_vm_get_property_ioctl, so just report the number of
4170 		 * faults the user requests if it's less than or equal to
4171 		 * the number of faults in the VM fault array.
4172 		 *
4173 		 * We should also at least assert that the args->size value
4174 		 * is a multiple of the xe_vm_fault struct size.
4175 		 */
4176 		if (args->size > size || args->size % sizeof(struct xe_vm_fault))
4177 			return -EINVAL;
4178 
4179 		return fill_faults(vm, args);
4180 	}
4181 	return -EINVAL;
4182 }
4183 
xe_vm_get_property_ioctl(struct drm_device * drm,void * data,struct drm_file * file)4184 int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
4185 			     struct drm_file *file)
4186 {
4187 	struct xe_device *xe = to_xe_device(drm);
4188 	struct xe_file *xef = to_xe_file(file);
4189 	struct drm_xe_vm_get_property *args = data;
4190 	struct xe_vm *vm;
4191 	int ret = 0;
4192 
4193 	if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
4194 			      args->reserved[2] || args->extensions ||
4195 			      args->pad)))
4196 		return -EINVAL;
4197 
4198 	vm = xe_vm_lookup(xef, args->vm_id);
4199 	if (XE_IOCTL_DBG(xe, !vm))
4200 		return -ENOENT;
4201 
4202 	ret = xe_vm_get_property_helper(vm, args);
4203 
4204 	xe_vm_put(vm);
4205 	return ret;
4206 }
4207 
4208 /**
4209  * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
4210  * @vm: VM to bind the BO to
4211  * @bo: BO to bind
4212  * @q: exec queue to use for the bind (optional)
4213  * @addr: address at which to bind the BO
4214  * @cache_lvl: PAT cache level to use
4215  *
4216  * Execute a VM bind map operation on a kernel-owned BO to bind it into a
4217  * kernel-owned VM.
4218  *
4219  * Returns a dma_fence to track the binding completion if the job to do so was
4220  * successfully submitted, an error pointer otherwise.
4221  */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)4222 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
4223 				       struct xe_exec_queue *q, u64 addr,
4224 				       enum xe_cache_level cache_lvl)
4225 {
4226 	struct xe_vma_ops vops;
4227 	struct drm_gpuva_ops *ops = NULL;
4228 	struct dma_fence *fence;
4229 	int err;
4230 
4231 	xe_bo_get(bo);
4232 	xe_vm_get(vm);
4233 	if (q)
4234 		xe_exec_queue_get(q);
4235 
4236 	down_write(&vm->lock);
4237 
4238 	xe_vma_ops_init(&vops, vm, q, NULL, 0);
4239 
4240 	ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
4241 				       DRM_XE_VM_BIND_OP_MAP, 0, 0,
4242 				       vm->xe->pat.idx[cache_lvl]);
4243 	if (IS_ERR(ops)) {
4244 		err = PTR_ERR(ops);
4245 		goto release_vm_lock;
4246 	}
4247 
4248 	err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4249 	if (err)
4250 		goto release_vm_lock;
4251 
4252 	xe_assert(vm->xe, !list_empty(&vops.list));
4253 
4254 	err = xe_vma_ops_alloc(&vops, false);
4255 	if (err)
4256 		goto unwind_ops;
4257 
4258 	fence = vm_bind_ioctl_ops_execute(vm, &vops);
4259 	if (IS_ERR(fence))
4260 		err = PTR_ERR(fence);
4261 
4262 unwind_ops:
4263 	if (err && err != -ENODATA)
4264 		vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4265 
4266 	xe_vma_ops_fini(&vops);
4267 	drm_gpuva_ops_free(&vm->gpuvm, ops);
4268 
4269 release_vm_lock:
4270 	up_write(&vm->lock);
4271 
4272 	if (q)
4273 		xe_exec_queue_put(q);
4274 	xe_vm_put(vm);
4275 	xe_bo_put(bo);
4276 
4277 	if (err)
4278 		fence = ERR_PTR(err);
4279 
4280 	return fence;
4281 }
4282 
4283 /**
4284  * xe_vm_lock() - Lock the vm's dma_resv object
4285  * @vm: The struct xe_vm whose lock is to be locked
4286  * @intr: Whether to perform any wait interruptible
4287  *
4288  * Return: 0 on success, -EINTR if @intr is true and the wait for a
4289  * contended lock was interrupted. If @intr is false, the function
4290  * always returns 0.
4291  */
xe_vm_lock(struct xe_vm * vm,bool intr)4292 int xe_vm_lock(struct xe_vm *vm, bool intr)
4293 {
4294 	int ret;
4295 
4296 	if (intr)
4297 		ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
4298 	else
4299 		ret = dma_resv_lock(xe_vm_resv(vm), NULL);
4300 
4301 	return ret;
4302 }
4303 
4304 /**
4305  * xe_vm_unlock() - Unlock the vm's dma_resv object
4306  * @vm: The struct xe_vm whose lock is to be released.
4307  *
4308  * Unlock a buffer object lock that was locked by xe_vm_lock().
4309  */
xe_vm_unlock(struct xe_vm * vm)4310 void xe_vm_unlock(struct xe_vm *vm)
4311 {
4312 	dma_resv_unlock(xe_vm_resv(vm));
4313 }
4314 
4315 /**
4316  * xe_vm_invalidate_vma_submit - Submit a job to invalidate GPU mappings for
4317  * VMA.
4318  * @vma: VMA to invalidate
4319  * @batch: TLB invalidation batch to populate; caller must later call
4320  *         xe_tlb_inval_batch_wait() on it to wait for completion
4321  *
4322  * Walks a list of page tables leaves which it memset the entries owned by this
4323  * VMA to zero, invalidates the TLBs, but doesn't block waiting for TLB flush
4324  * to complete, but instead populates @batch which can be waited on using
4325  * xe_tlb_inval_batch_wait().
4326  *
4327  * Returns 0 for success, negative error code otherwise.
4328  */
xe_vm_invalidate_vma_submit(struct xe_vma * vma,struct xe_tlb_inval_batch * batch)4329 int xe_vm_invalidate_vma_submit(struct xe_vma *vma, struct xe_tlb_inval_batch *batch)
4330 {
4331 	struct xe_device *xe = xe_vma_vm(vma)->xe;
4332 	struct xe_vm *vm = xe_vma_vm(vma);
4333 	struct xe_tile *tile;
4334 	u8 tile_mask = 0;
4335 	int ret = 0;
4336 	u8 id;
4337 
4338 	xe_assert(xe, !xe_vma_is_null(vma));
4339 	xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
4340 	trace_xe_vma_invalidate(vma);
4341 
4342 	vm_dbg(&vm->xe->drm,
4343 	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
4344 		xe_vma_start(vma), xe_vma_size(vma));
4345 
4346 	/*
4347 	 * Check that we don't race with page-table updates, tile_invalidated
4348 	 * update is safe
4349 	 */
4350 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
4351 		if (xe_vma_is_userptr(vma)) {
4352 			lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
4353 				       (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
4354 					lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
4355 
4356 			WARN_ON_ONCE(!mmu_interval_check_retry
4357 				     (&to_userptr_vma(vma)->userptr.notifier,
4358 				      to_userptr_vma(vma)->userptr.pages.notifier_seq));
4359 			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
4360 							     DMA_RESV_USAGE_BOOKKEEP));
4361 
4362 		} else {
4363 			xe_bo_assert_held(xe_vma_bo(vma));
4364 		}
4365 	}
4366 
4367 	for_each_tile(tile, xe, id)
4368 		if (xe_pt_zap_ptes(tile, vma))
4369 			tile_mask |= BIT(id);
4370 
4371 	xe_device_wmb(xe);
4372 
4373 	ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
4374 						 xe_vma_start(vma), xe_vma_end(vma),
4375 						 tile_mask, batch);
4376 
4377 	/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
4378 	WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4379 	return ret;
4380 }
4381 
4382 /**
4383  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
4384  * @vma: VMA to invalidate
4385  *
4386  * Walks a list of page tables leaves which it memset the entries owned by this
4387  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
4388  * complete.
4389  *
4390  * Returns 0 for success, negative error code otherwise.
4391  */
xe_vm_invalidate_vma(struct xe_vma * vma)4392 int xe_vm_invalidate_vma(struct xe_vma *vma)
4393 {
4394 	struct xe_tlb_inval_batch batch;
4395 	int ret;
4396 
4397 	ret = xe_vm_invalidate_vma_submit(vma, &batch);
4398 	if (ret)
4399 		return ret;
4400 
4401 	xe_tlb_inval_batch_wait(&batch);
4402 	return ret;
4403 }
4404 
xe_vm_validate_protected(struct xe_vm * vm)4405 int xe_vm_validate_protected(struct xe_vm *vm)
4406 {
4407 	struct drm_gpuva *gpuva;
4408 	int err = 0;
4409 
4410 	if (!vm)
4411 		return -ENODEV;
4412 
4413 	mutex_lock(&vm->snap_mutex);
4414 
4415 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4416 		struct xe_vma *vma = gpuva_to_vma(gpuva);
4417 		struct xe_bo *bo = vma->gpuva.gem.obj ?
4418 			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4419 
4420 		if (!bo)
4421 			continue;
4422 
4423 		if (xe_bo_is_protected(bo)) {
4424 			err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
4425 			if (err)
4426 				break;
4427 		}
4428 	}
4429 
4430 	mutex_unlock(&vm->snap_mutex);
4431 	return err;
4432 }
4433 
4434 struct xe_vm_snapshot {
4435 	int uapi_flags;
4436 	unsigned long num_snaps;
4437 	struct {
4438 		u64 ofs, bo_ofs;
4439 		unsigned long len;
4440 #define XE_VM_SNAP_FLAG_USERPTR		BIT(0)
4441 #define XE_VM_SNAP_FLAG_READ_ONLY	BIT(1)
4442 #define XE_VM_SNAP_FLAG_IS_NULL		BIT(2)
4443 		unsigned long flags;
4444 		int uapi_mem_region;
4445 		int pat_index;
4446 		int cpu_caching;
4447 		struct xe_bo *bo;
4448 		void *data;
4449 		struct mm_struct *mm;
4450 	} snap[];
4451 };
4452 
xe_vm_snapshot_capture(struct xe_vm * vm)4453 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
4454 {
4455 	unsigned long num_snaps = 0, i;
4456 	struct xe_vm_snapshot *snap = NULL;
4457 	struct drm_gpuva *gpuva;
4458 
4459 	if (!vm)
4460 		return NULL;
4461 
4462 	mutex_lock(&vm->snap_mutex);
4463 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4464 		if (gpuva->flags & XE_VMA_DUMPABLE)
4465 			num_snaps++;
4466 	}
4467 
4468 	if (num_snaps)
4469 		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4470 	if (!snap) {
4471 		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4472 		goto out_unlock;
4473 	}
4474 
4475 	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
4476 		snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
4477 	if (vm->flags & XE_VM_FLAG_LR_MODE)
4478 		snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_LR_MODE;
4479 	if (vm->flags & XE_VM_FLAG_SCRATCH_PAGE)
4480 		snap->uapi_flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
4481 
4482 	snap->num_snaps = num_snaps;
4483 	i = 0;
4484 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4485 		struct xe_vma *vma = gpuva_to_vma(gpuva);
4486 		struct xe_bo *bo = vma->gpuva.gem.obj ?
4487 			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4488 
4489 		if (!(gpuva->flags & XE_VMA_DUMPABLE))
4490 			continue;
4491 
4492 		snap->snap[i].ofs = xe_vma_start(vma);
4493 		snap->snap[i].len = xe_vma_size(vma);
4494 		snap->snap[i].flags = xe_vma_read_only(vma) ?
4495 			XE_VM_SNAP_FLAG_READ_ONLY : 0;
4496 		snap->snap[i].pat_index = vma->attr.pat_index;
4497 		if (bo) {
4498 			snap->snap[i].cpu_caching = bo->cpu_caching;
4499 			snap->snap[i].bo = xe_bo_get(bo);
4500 			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4501 			switch (bo->ttm.resource->mem_type) {
4502 			case XE_PL_SYSTEM:
4503 			case XE_PL_TT:
4504 				snap->snap[i].uapi_mem_region = 0;
4505 				break;
4506 			case XE_PL_VRAM0:
4507 				snap->snap[i].uapi_mem_region = 1;
4508 				break;
4509 			case XE_PL_VRAM1:
4510 				snap->snap[i].uapi_mem_region = 2;
4511 				break;
4512 			}
4513 		} else if (xe_vma_is_userptr(vma)) {
4514 			struct mm_struct *mm =
4515 				to_userptr_vma(vma)->userptr.notifier.mm;
4516 
4517 			if (mmget_not_zero(mm))
4518 				snap->snap[i].mm = mm;
4519 			else
4520 				snap->snap[i].data = ERR_PTR(-EFAULT);
4521 
4522 			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4523 			snap->snap[i].flags |= XE_VM_SNAP_FLAG_USERPTR;
4524 			snap->snap[i].uapi_mem_region = 0;
4525 		} else if (xe_vma_is_null(vma)) {
4526 			snap->snap[i].flags |= XE_VM_SNAP_FLAG_IS_NULL;
4527 			snap->snap[i].uapi_mem_region = -1;
4528 		} else {
4529 			snap->snap[i].data = ERR_PTR(-ENOENT);
4530 			snap->snap[i].uapi_mem_region = -1;
4531 		}
4532 		i++;
4533 	}
4534 
4535 out_unlock:
4536 	mutex_unlock(&vm->snap_mutex);
4537 	return snap;
4538 }
4539 
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4540 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4541 {
4542 	if (IS_ERR_OR_NULL(snap))
4543 		return;
4544 
4545 	for (int i = 0; i < snap->num_snaps; i++) {
4546 		struct xe_bo *bo = snap->snap[i].bo;
4547 		int err;
4548 
4549 		if (IS_ERR(snap->snap[i].data) ||
4550 		    snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4551 			continue;
4552 
4553 		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4554 		if (!snap->snap[i].data) {
4555 			snap->snap[i].data = ERR_PTR(-ENOMEM);
4556 			goto cleanup_bo;
4557 		}
4558 
4559 		if (bo) {
4560 			err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4561 					 snap->snap[i].data, snap->snap[i].len);
4562 		} else {
4563 			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4564 
4565 			kthread_use_mm(snap->snap[i].mm);
4566 			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4567 				err = 0;
4568 			else
4569 				err = -EFAULT;
4570 			kthread_unuse_mm(snap->snap[i].mm);
4571 
4572 			mmput(snap->snap[i].mm);
4573 			snap->snap[i].mm = NULL;
4574 		}
4575 
4576 		if (err) {
4577 			kvfree(snap->snap[i].data);
4578 			snap->snap[i].data = ERR_PTR(err);
4579 		}
4580 
4581 cleanup_bo:
4582 		xe_bo_put(bo);
4583 		snap->snap[i].bo = NULL;
4584 	}
4585 }
4586 
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4587 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4588 {
4589 	unsigned long i, j;
4590 
4591 	if (IS_ERR_OR_NULL(snap)) {
4592 		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4593 		return;
4594 	}
4595 
4596 	drm_printf(p, "VM.uapi_flags: 0x%x\n", snap->uapi_flags);
4597 	for (i = 0; i < snap->num_snaps; i++) {
4598 		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4599 
4600 		drm_printf(p, "[%llx].properties: %s|%s|mem_region=0x%lx|pat_index=%d|cpu_caching=%d\n",
4601 			   snap->snap[i].ofs,
4602 			   snap->snap[i].flags & XE_VM_SNAP_FLAG_READ_ONLY ?
4603 			   "read_only" : "read_write",
4604 			   snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL ?
4605 			   "null_sparse" :
4606 			   snap->snap[i].flags & XE_VM_SNAP_FLAG_USERPTR ?
4607 			   "userptr" : "bo",
4608 			   snap->snap[i].uapi_mem_region == -1 ? 0 :
4609 			   BIT(snap->snap[i].uapi_mem_region),
4610 			   snap->snap[i].pat_index,
4611 			   snap->snap[i].cpu_caching);
4612 
4613 		if (IS_ERR(snap->snap[i].data)) {
4614 			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4615 				   PTR_ERR(snap->snap[i].data));
4616 			continue;
4617 		}
4618 
4619 		if (snap->snap[i].flags & XE_VM_SNAP_FLAG_IS_NULL)
4620 			continue;
4621 
4622 		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4623 
4624 		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4625 			u32 *val = snap->snap[i].data + j;
4626 			char dumped[ASCII85_BUFSZ];
4627 
4628 			drm_puts(p, ascii85_encode(*val, dumped));
4629 		}
4630 
4631 		drm_puts(p, "\n");
4632 
4633 		if (drm_coredump_printer_is_full(p))
4634 			return;
4635 	}
4636 }
4637 
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4638 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4639 {
4640 	unsigned long i;
4641 
4642 	if (IS_ERR_OR_NULL(snap))
4643 		return;
4644 
4645 	for (i = 0; i < snap->num_snaps; i++) {
4646 		if (!IS_ERR(snap->snap[i].data))
4647 			kvfree(snap->snap[i].data);
4648 		xe_bo_put(snap->snap[i].bo);
4649 		if (snap->snap[i].mm)
4650 			mmput(snap->snap[i].mm);
4651 	}
4652 	kvfree(snap);
4653 }
4654 
4655 /**
4656  * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4657  * @xe: Pointer to the Xe device structure
4658  * @vma: Pointer to the virtual memory area (VMA) structure
4659  * @is_atomic: In pagefault path and atomic operation
4660  *
4661  * This function determines whether the given VMA needs to be migrated to
4662  * VRAM in order to do atomic GPU operation.
4663  *
4664  * Return:
4665  *   1        - Migration to VRAM is required
4666  *   0        - Migration is not required
4667  *   -EACCES  - Invalid access for atomic memory attr
4668  *
4669  */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4670 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4671 {
4672 	u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4673 					     vma->attr.atomic_access;
4674 
4675 	if (!IS_DGFX(xe) || !is_atomic)
4676 		return false;
4677 
4678 	/*
4679 	 * NOTE: The checks implemented here are platform-specific. For
4680 	 * instance, on a device supporting CXL atomics, these would ideally
4681 	 * work universally without additional handling.
4682 	 */
4683 	switch (atomic_access) {
4684 	case DRM_XE_ATOMIC_DEVICE:
4685 		return !xe->info.has_device_atomics_on_smem;
4686 
4687 	case DRM_XE_ATOMIC_CPU:
4688 		return -EACCES;
4689 
4690 	case DRM_XE_ATOMIC_UNDEFINED:
4691 	case DRM_XE_ATOMIC_GLOBAL:
4692 	default:
4693 		return 1;
4694 	}
4695 }
4696 
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4697 static int xe_vm_alloc_vma(struct xe_vm *vm,
4698 			   struct drm_gpuvm_map_req *map_req,
4699 			   bool is_madvise)
4700 {
4701 	struct xe_vma_ops vops;
4702 	struct drm_gpuva_ops *ops = NULL;
4703 	struct drm_gpuva_op *__op;
4704 	unsigned int vma_flags = 0;
4705 	bool remap_op = false;
4706 	struct xe_vma_mem_attr tmp_attr = {};
4707 	u16 default_pat;
4708 	int err;
4709 
4710 	lockdep_assert_held_write(&vm->lock);
4711 
4712 	if (is_madvise)
4713 		ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4714 	else
4715 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4716 
4717 	if (IS_ERR(ops))
4718 		return PTR_ERR(ops);
4719 
4720 	if (list_empty(&ops->list)) {
4721 		err = 0;
4722 		goto free_ops;
4723 	}
4724 
4725 	drm_gpuva_for_each_op(__op, ops) {
4726 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4727 		struct xe_vma *vma = NULL;
4728 
4729 		if (!is_madvise) {
4730 			if (__op->op == DRM_GPUVA_OP_UNMAP) {
4731 				vma = gpuva_to_vma(op->base.unmap.va);
4732 				XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4733 				default_pat = vma->attr.default_pat_index;
4734 				vma_flags = vma->gpuva.flags;
4735 			}
4736 
4737 			if (__op->op == DRM_GPUVA_OP_REMAP) {
4738 				vma = gpuva_to_vma(op->base.remap.unmap->va);
4739 				default_pat = vma->attr.default_pat_index;
4740 				vma_flags = vma->gpuva.flags;
4741 			}
4742 
4743 			if (__op->op == DRM_GPUVA_OP_MAP) {
4744 				op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4745 				op->map.pat_index = default_pat;
4746 			}
4747 		} else {
4748 			if (__op->op == DRM_GPUVA_OP_REMAP) {
4749 				vma = gpuva_to_vma(op->base.remap.unmap->va);
4750 				xe_assert(vm->xe, !remap_op);
4751 				xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4752 				remap_op = true;
4753 				vma_flags = vma->gpuva.flags;
4754 			}
4755 
4756 			if (__op->op == DRM_GPUVA_OP_MAP) {
4757 				xe_assert(vm->xe, remap_op);
4758 				remap_op = false;
4759 				/*
4760 				 * In case of madvise ops DRM_GPUVA_OP_MAP is
4761 				 * always after DRM_GPUVA_OP_REMAP, so ensure
4762 				 * to propagate the flags from the vma we're
4763 				 * unmapping.
4764 				 */
4765 				op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4766 			}
4767 		}
4768 		print_op(vm->xe, __op);
4769 	}
4770 
4771 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4772 
4773 	if (is_madvise)
4774 		vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4775 	else
4776 		vops.flags |= XE_VMA_OPS_FLAG_ALLOW_SVM_UNMAP;
4777 
4778 	err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4779 	if (err)
4780 		goto unwind_ops;
4781 
4782 	xe_vm_lock(vm, false);
4783 
4784 	drm_gpuva_for_each_op(__op, ops) {
4785 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4786 		struct xe_vma *vma;
4787 
4788 		if (__op->op == DRM_GPUVA_OP_UNMAP) {
4789 			vma = gpuva_to_vma(op->base.unmap.va);
4790 			/* There should be no unmap for madvise */
4791 			if (is_madvise)
4792 				XE_WARN_ON("UNEXPECTED UNMAP");
4793 
4794 			xe_vma_destroy(vma, NULL);
4795 		} else if (__op->op == DRM_GPUVA_OP_REMAP) {
4796 			vma = gpuva_to_vma(op->base.remap.unmap->va);
4797 			/* In case of madvise ops Store attributes for REMAP UNMAPPED
4798 			 * VMA, so they can be assigned to newly MAP created vma.
4799 			 */
4800 			if (is_madvise)
4801 				xe_vma_mem_attr_copy(&tmp_attr, &vma->attr);
4802 
4803 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4804 		} else if (__op->op == DRM_GPUVA_OP_MAP) {
4805 			vma = op->map.vma;
4806 			/* In case of madvise call, MAP will always be followed by REMAP.
4807 			 * Therefore temp_attr will always have sane values, making it safe to
4808 			 * copy them to new vma.
4809 			 */
4810 			if (is_madvise)
4811 				xe_vma_mem_attr_copy(&vma->attr, &tmp_attr);
4812 		}
4813 	}
4814 
4815 	xe_vm_unlock(vm);
4816 	drm_gpuva_ops_free(&vm->gpuvm, ops);
4817 	xe_vma_mem_attr_fini(&tmp_attr);
4818 	return 0;
4819 
4820 unwind_ops:
4821 	vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4822 free_ops:
4823 	drm_gpuva_ops_free(&vm->gpuvm, ops);
4824 	return err;
4825 }
4826 
4827 /**
4828  * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4829  * @vm: Pointer to the xe_vm structure
4830  * @start: Starting input address
4831  * @range: Size of the input range
4832  *
4833  * This function splits existing vma to create new vma for user provided input range
4834  *
4835  * Return: 0 if success
4836  */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4837 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4838 {
4839 	struct drm_gpuvm_map_req map_req = {
4840 		.map.va.addr = start,
4841 		.map.va.range = range,
4842 	};
4843 
4844 	lockdep_assert_held_write(&vm->lock);
4845 
4846 	vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4847 
4848 	return xe_vm_alloc_vma(vm, &map_req, true);
4849 }
4850 
is_cpu_addr_vma_with_default_attr(struct xe_vma * vma)4851 static bool is_cpu_addr_vma_with_default_attr(struct xe_vma *vma)
4852 {
4853 	return vma && xe_vma_is_cpu_addr_mirror(vma) &&
4854 	       xe_vma_has_default_mem_attrs(vma);
4855 }
4856 
4857 /**
4858  * xe_vm_find_cpu_addr_mirror_vma_range - Extend a VMA range to include adjacent CPU-mirrored VMAs
4859  * @vm: VM to search within
4860  * @start: Input/output pointer to the starting address of the range
4861  * @end: Input/output pointer to the end address of the range
4862  *
4863  * Given a range defined by @start and @range, this function checks the VMAs
4864  * immediately before and after the range. If those neighboring VMAs are
4865  * CPU-address-mirrored and have default memory attributes, the function
4866  * updates @start and @range to include them. This extended range can then
4867  * be used for merging or other operations that require a unified VMA.
4868  *
4869  * The function does not perform the merge itself; it only computes the
4870  * mergeable boundaries.
4871  */
xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm * vm,u64 * start,u64 * end)4872 void xe_vm_find_cpu_addr_mirror_vma_range(struct xe_vm *vm, u64 *start, u64 *end)
4873 {
4874 	struct xe_vma *prev, *next;
4875 
4876 	lockdep_assert_held(&vm->lock);
4877 
4878 	if (*start >= SZ_4K) {
4879 		prev = xe_vm_find_vma_by_addr(vm, *start - SZ_4K);
4880 		if (is_cpu_addr_vma_with_default_attr(prev))
4881 			*start = xe_vma_start(prev);
4882 	}
4883 
4884 	if (*end < vm->size) {
4885 		next = xe_vm_find_vma_by_addr(vm, *end + 1);
4886 		if (is_cpu_addr_vma_with_default_attr(next))
4887 			*end = xe_vma_end(next);
4888 	}
4889 }
4890 
4891 /**
4892  * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4893  * @vm: Pointer to the xe_vm structure
4894  * @start: Starting input address
4895  * @range: Size of the input range
4896  *
4897  * This function splits/merges existing vma to create new vma for user provided input range
4898  *
4899  * Return: 0 if success
4900  */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4901 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4902 {
4903 	struct drm_gpuvm_map_req map_req = {
4904 		.map.va.addr = start,
4905 		.map.va.range = range,
4906 	};
4907 
4908 	lockdep_assert_held_write(&vm->lock);
4909 
4910 	vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4911 	       start, range);
4912 
4913 	return xe_vm_alloc_vma(vm, &map_req, false);
4914 }
4915 
4916 /**
4917  * xe_vm_add_exec_queue() - Add exec queue to VM
4918  * @vm: The VM.
4919  * @q: The exec_queue
4920  *
4921  * Add exec queue to VM, skipped if the device does not have context based TLB
4922  * invalidations.
4923  */
xe_vm_add_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4924 void xe_vm_add_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4925 {
4926 	struct xe_device *xe = vm->xe;
4927 
4928 	/* User VMs and queues only */
4929 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
4930 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
4931 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
4932 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_MIGRATE));
4933 	xe_assert(xe, vm->xef);
4934 	xe_assert(xe, vm == q->vm);
4935 
4936 	if (!xe->info.has_ctx_tlb_inval)
4937 		return;
4938 
4939 	down_write(&vm->exec_queues.lock);
4940 	list_add(&q->vm_exec_queue_link, &vm->exec_queues.list[q->gt->info.id]);
4941 	++vm->exec_queues.count[q->gt->info.id];
4942 	up_write(&vm->exec_queues.lock);
4943 }
4944 
4945 /**
4946  * xe_vm_remove_exec_queue() - Remove exec queue from VM
4947  * @vm: The VM.
4948  * @q: The exec_queue
4949  *
4950  * Remove exec queue from VM, skipped if the device does not have context based
4951  * TLB invalidations.
4952  */
xe_vm_remove_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)4953 void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
4954 {
4955 	if (!vm->xe->info.has_ctx_tlb_inval)
4956 		return;
4957 
4958 	down_write(&vm->exec_queues.lock);
4959 	if (!list_empty(&q->vm_exec_queue_link)) {
4960 		list_del(&q->vm_exec_queue_link);
4961 		--vm->exec_queues.count[q->gt->info.id];
4962 	}
4963 	up_write(&vm->exec_queues.lock);
4964 }
4965