xref: /linux/drivers/gpu/drm/xe/xe_vm.c (revision 390db60f8e2bd21fae544917eb3a8618265c058c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_vm.h"
7 
8 #include <linux/dma-fence-array.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_exec.h>
13 #include <drm/drm_print.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <uapi/drm/xe_drm.h>
16 #include <linux/ascii85.h>
17 #include <linux/delay.h>
18 #include <linux/kthread.h>
19 #include <linux/mm.h>
20 #include <linux/swap.h>
21 
22 #include <generated/xe_wa_oob.h>
23 
24 #include "regs/xe_gtt_defs.h"
25 #include "xe_assert.h"
26 #include "xe_bo.h"
27 #include "xe_device.h"
28 #include "xe_drm_client.h"
29 #include "xe_exec_queue.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_migrate.h"
32 #include "xe_pat.h"
33 #include "xe_pm.h"
34 #include "xe_preempt_fence.h"
35 #include "xe_pt.h"
36 #include "xe_pxp.h"
37 #include "xe_res_cursor.h"
38 #include "xe_svm.h"
39 #include "xe_sync.h"
40 #include "xe_tile.h"
41 #include "xe_tlb_inval.h"
42 #include "xe_trace_bo.h"
43 #include "xe_wa.h"
44 
xe_vm_obj(struct xe_vm * vm)45 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
46 {
47 	return vm->gpuvm.r_obj;
48 }
49 
50 /**
51  * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
52  * @vm: The vm whose resv is to be locked.
53  * @exec: The drm_exec transaction.
54  *
55  * Helper to lock the vm's resv as part of a drm_exec transaction.
56  *
57  * Return: %0 on success. See drm_exec_lock_obj() for error codes.
58  */
xe_vm_drm_exec_lock(struct xe_vm * vm,struct drm_exec * exec)59 int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
60 {
61 	return drm_exec_lock_obj(exec, xe_vm_obj(vm));
62 }
63 
preempt_fences_waiting(struct xe_vm * vm)64 static bool preempt_fences_waiting(struct xe_vm *vm)
65 {
66 	struct xe_exec_queue *q;
67 
68 	lockdep_assert_held(&vm->lock);
69 	xe_vm_assert_held(vm);
70 
71 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
72 		if (!q->lr.pfence ||
73 		    test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
74 			     &q->lr.pfence->flags)) {
75 			return true;
76 		}
77 	}
78 
79 	return false;
80 }
81 
free_preempt_fences(struct list_head * list)82 static void free_preempt_fences(struct list_head *list)
83 {
84 	struct list_head *link, *next;
85 
86 	list_for_each_safe(link, next, list)
87 		xe_preempt_fence_free(to_preempt_fence_from_link(link));
88 }
89 
alloc_preempt_fences(struct xe_vm * vm,struct list_head * list,unsigned int * count)90 static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
91 				unsigned int *count)
92 {
93 	lockdep_assert_held(&vm->lock);
94 	xe_vm_assert_held(vm);
95 
96 	if (*count >= vm->preempt.num_exec_queues)
97 		return 0;
98 
99 	for (; *count < vm->preempt.num_exec_queues; ++(*count)) {
100 		struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
101 
102 		if (IS_ERR(pfence))
103 			return PTR_ERR(pfence);
104 
105 		list_move_tail(xe_preempt_fence_link(pfence), list);
106 	}
107 
108 	return 0;
109 }
110 
wait_for_existing_preempt_fences(struct xe_vm * vm)111 static int wait_for_existing_preempt_fences(struct xe_vm *vm)
112 {
113 	struct xe_exec_queue *q;
114 
115 	xe_vm_assert_held(vm);
116 
117 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
118 		if (q->lr.pfence) {
119 			long timeout = dma_fence_wait(q->lr.pfence, false);
120 
121 			/* Only -ETIME on fence indicates VM needs to be killed */
122 			if (timeout < 0 || q->lr.pfence->error == -ETIME)
123 				return -ETIME;
124 
125 			dma_fence_put(q->lr.pfence);
126 			q->lr.pfence = NULL;
127 		}
128 	}
129 
130 	return 0;
131 }
132 
xe_vm_is_idle(struct xe_vm * vm)133 static bool xe_vm_is_idle(struct xe_vm *vm)
134 {
135 	struct xe_exec_queue *q;
136 
137 	xe_vm_assert_held(vm);
138 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
139 		if (!xe_exec_queue_is_idle(q))
140 			return false;
141 	}
142 
143 	return true;
144 }
145 
arm_preempt_fences(struct xe_vm * vm,struct list_head * list)146 static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list)
147 {
148 	struct list_head *link;
149 	struct xe_exec_queue *q;
150 
151 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
152 		struct dma_fence *fence;
153 
154 		link = list->next;
155 		xe_assert(vm->xe, link != list);
156 
157 		fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link),
158 					     q, q->lr.context,
159 					     ++q->lr.seqno);
160 		dma_fence_put(q->lr.pfence);
161 		q->lr.pfence = fence;
162 	}
163 }
164 
add_preempt_fences(struct xe_vm * vm,struct xe_bo * bo)165 static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo)
166 {
167 	struct xe_exec_queue *q;
168 	int err;
169 
170 	xe_bo_assert_held(bo);
171 
172 	if (!vm->preempt.num_exec_queues)
173 		return 0;
174 
175 	err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues);
176 	if (err)
177 		return err;
178 
179 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
180 		if (q->lr.pfence) {
181 			dma_resv_add_fence(bo->ttm.base.resv,
182 					   q->lr.pfence,
183 					   DMA_RESV_USAGE_BOOKKEEP);
184 		}
185 
186 	return 0;
187 }
188 
resume_and_reinstall_preempt_fences(struct xe_vm * vm,struct drm_exec * exec)189 static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
190 						struct drm_exec *exec)
191 {
192 	struct xe_exec_queue *q;
193 
194 	lockdep_assert_held(&vm->lock);
195 	xe_vm_assert_held(vm);
196 
197 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
198 		q->ops->resume(q);
199 
200 		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence,
201 					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
202 	}
203 }
204 
xe_vm_add_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)205 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
206 {
207 	struct drm_gpuvm_exec vm_exec = {
208 		.vm = &vm->gpuvm,
209 		.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
210 		.num_fences = 1,
211 	};
212 	struct drm_exec *exec = &vm_exec.exec;
213 	struct xe_validation_ctx ctx;
214 	struct dma_fence *pfence;
215 	int err;
216 	bool wait;
217 
218 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
219 
220 	down_write(&vm->lock);
221 	err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
222 	if (err)
223 		goto out_up_write;
224 
225 	pfence = xe_preempt_fence_create(q, q->lr.context,
226 					 ++q->lr.seqno);
227 	if (IS_ERR(pfence)) {
228 		err = PTR_ERR(pfence);
229 		goto out_fini;
230 	}
231 
232 	list_add(&q->lr.link, &vm->preempt.exec_queues);
233 	++vm->preempt.num_exec_queues;
234 	q->lr.pfence = pfence;
235 
236 	xe_svm_notifier_lock(vm);
237 
238 	drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
239 				 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
240 
241 	/*
242 	 * Check to see if a preemption on VM is in flight or userptr
243 	 * invalidation, if so trigger this preempt fence to sync state with
244 	 * other preempt fences on the VM.
245 	 */
246 	wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm);
247 	if (wait)
248 		dma_fence_enable_sw_signaling(pfence);
249 
250 	xe_svm_notifier_unlock(vm);
251 
252 out_fini:
253 	xe_validation_ctx_fini(&ctx);
254 out_up_write:
255 	up_write(&vm->lock);
256 
257 	return err;
258 }
259 ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
260 
261 /**
262  * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
263  * @vm: The VM.
264  * @q: The exec_queue
265  *
266  * Note that this function might be called multiple times on the same queue.
267  */
xe_vm_remove_compute_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)268 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
269 {
270 	if (!xe_vm_in_preempt_fence_mode(vm))
271 		return;
272 
273 	down_write(&vm->lock);
274 	if (!list_empty(&q->lr.link)) {
275 		list_del_init(&q->lr.link);
276 		--vm->preempt.num_exec_queues;
277 	}
278 	if (q->lr.pfence) {
279 		dma_fence_enable_sw_signaling(q->lr.pfence);
280 		dma_fence_put(q->lr.pfence);
281 		q->lr.pfence = NULL;
282 	}
283 	up_write(&vm->lock);
284 }
285 
286 #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
287 
288 /**
289  * xe_vm_kill() - VM Kill
290  * @vm: The VM.
291  * @unlocked: Flag indicates the VM's dma-resv is not held
292  *
293  * Kill the VM by setting banned flag indicated VM is no longer available for
294  * use. If in preempt fence mode, also kill all exec queue attached to the VM.
295  */
xe_vm_kill(struct xe_vm * vm,bool unlocked)296 void xe_vm_kill(struct xe_vm *vm, bool unlocked)
297 {
298 	struct xe_exec_queue *q;
299 
300 	lockdep_assert_held(&vm->lock);
301 
302 	if (unlocked)
303 		xe_vm_lock(vm, false);
304 
305 	vm->flags |= XE_VM_FLAG_BANNED;
306 	trace_xe_vm_kill(vm);
307 
308 	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
309 		q->ops->kill(q);
310 
311 	if (unlocked)
312 		xe_vm_unlock(vm);
313 
314 	/* TODO: Inform user the VM is banned */
315 }
316 
xe_gpuvm_validate(struct drm_gpuvm_bo * vm_bo,struct drm_exec * exec)317 static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
318 {
319 	struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
320 	struct drm_gpuva *gpuva;
321 	int ret;
322 
323 	lockdep_assert_held(&vm->lock);
324 	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
325 		list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
326 			       &vm->rebind_list);
327 
328 	if (!try_wait_for_completion(&vm->xe->pm_block))
329 		return -EAGAIN;
330 
331 	ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
332 	if (ret)
333 		return ret;
334 
335 	vm_bo->evicted = false;
336 	return 0;
337 }
338 
339 /**
340  * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
341  * @vm: The vm for which we are rebinding.
342  * @exec: The struct drm_exec with the locked GEM objects.
343  * @num_fences: The number of fences to reserve for the operation, not
344  * including rebinds and validations.
345  *
346  * Validates all evicted gem objects and rebinds their vmas. Note that
347  * rebindings may cause evictions and hence the validation-rebind
348  * sequence is rerun until there are no more objects to validate.
349  *
350  * Return: 0 on success, negative error code on error. In particular,
351  * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
352  * the drm_exec transaction needs to be restarted.
353  */
xe_vm_validate_rebind(struct xe_vm * vm,struct drm_exec * exec,unsigned int num_fences)354 int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
355 			  unsigned int num_fences)
356 {
357 	struct drm_gem_object *obj;
358 	unsigned long index;
359 	int ret;
360 
361 	do {
362 		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
363 		if (ret)
364 			return ret;
365 
366 		ret = xe_vm_rebind(vm, false);
367 		if (ret)
368 			return ret;
369 	} while (!list_empty(&vm->gpuvm.evict.list));
370 
371 	drm_exec_for_each_locked_object(exec, index, obj) {
372 		ret = dma_resv_reserve_fences(obj->resv, num_fences);
373 		if (ret)
374 			return ret;
375 	}
376 
377 	return 0;
378 }
379 
xe_preempt_work_begin(struct drm_exec * exec,struct xe_vm * vm,bool * done)380 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
381 				 bool *done)
382 {
383 	int err;
384 
385 	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
386 	if (err)
387 		return err;
388 
389 	if (xe_vm_is_idle(vm)) {
390 		vm->preempt.rebind_deactivated = true;
391 		*done = true;
392 		return 0;
393 	}
394 
395 	if (!preempt_fences_waiting(vm)) {
396 		*done = true;
397 		return 0;
398 	}
399 
400 	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
401 	if (err)
402 		return err;
403 
404 	err = wait_for_existing_preempt_fences(vm);
405 	if (err)
406 		return err;
407 
408 	/*
409 	 * Add validation and rebinding to the locking loop since both can
410 	 * cause evictions which may require blocing dma_resv locks.
411 	 * The fence reservation here is intended for the new preempt fences
412 	 * we attach at the end of the rebind work.
413 	 */
414 	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
415 }
416 
vm_suspend_rebind_worker(struct xe_vm * vm)417 static bool vm_suspend_rebind_worker(struct xe_vm *vm)
418 {
419 	struct xe_device *xe = vm->xe;
420 	bool ret = false;
421 
422 	mutex_lock(&xe->rebind_resume_lock);
423 	if (!try_wait_for_completion(&vm->xe->pm_block)) {
424 		ret = true;
425 		list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
426 	}
427 	mutex_unlock(&xe->rebind_resume_lock);
428 
429 	return ret;
430 }
431 
432 /**
433  * xe_vm_resume_rebind_worker() - Resume the rebind worker.
434  * @vm: The vm whose preempt worker to resume.
435  *
436  * Resume a preempt worker that was previously suspended by
437  * vm_suspend_rebind_worker().
438  */
xe_vm_resume_rebind_worker(struct xe_vm * vm)439 void xe_vm_resume_rebind_worker(struct xe_vm *vm)
440 {
441 	queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
442 }
443 
preempt_rebind_work_func(struct work_struct * w)444 static void preempt_rebind_work_func(struct work_struct *w)
445 {
446 	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
447 	struct xe_validation_ctx ctx;
448 	struct drm_exec exec;
449 	unsigned int fence_count = 0;
450 	LIST_HEAD(preempt_fences);
451 	int err = 0;
452 	long wait;
453 	int __maybe_unused tries = 0;
454 
455 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
456 	trace_xe_vm_rebind_worker_enter(vm);
457 
458 	down_write(&vm->lock);
459 
460 	if (xe_vm_is_closed_or_banned(vm)) {
461 		up_write(&vm->lock);
462 		trace_xe_vm_rebind_worker_exit(vm);
463 		return;
464 	}
465 
466 retry:
467 	if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
468 		up_write(&vm->lock);
469 		return;
470 	}
471 
472 	if (xe_vm_userptr_check_repin(vm)) {
473 		err = xe_vm_userptr_pin(vm);
474 		if (err)
475 			goto out_unlock_outer;
476 	}
477 
478 	err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
479 				     (struct xe_val_flags) {.interruptible = true});
480 	if (err)
481 		goto out_unlock_outer;
482 
483 	drm_exec_until_all_locked(&exec) {
484 		bool done = false;
485 
486 		err = xe_preempt_work_begin(&exec, vm, &done);
487 		drm_exec_retry_on_contention(&exec);
488 		xe_validation_retry_on_oom(&ctx, &err);
489 		if (err || done) {
490 			xe_validation_ctx_fini(&ctx);
491 			goto out_unlock_outer;
492 		}
493 	}
494 
495 	err = alloc_preempt_fences(vm, &preempt_fences, &fence_count);
496 	if (err)
497 		goto out_unlock;
498 
499 	xe_vm_set_validation_exec(vm, &exec);
500 	err = xe_vm_rebind(vm, true);
501 	xe_vm_set_validation_exec(vm, NULL);
502 	if (err)
503 		goto out_unlock;
504 
505 	/* Wait on rebinds and munmap style VM unbinds */
506 	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
507 				     DMA_RESV_USAGE_KERNEL,
508 				     false, MAX_SCHEDULE_TIMEOUT);
509 	if (wait <= 0) {
510 		err = -ETIME;
511 		goto out_unlock;
512 	}
513 
514 #define retry_required(__tries, __vm) \
515 	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
516 	(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
517 	__xe_vm_userptr_needs_repin(__vm))
518 
519 	xe_svm_notifier_lock(vm);
520 	if (retry_required(tries, vm)) {
521 		xe_svm_notifier_unlock(vm);
522 		err = -EAGAIN;
523 		goto out_unlock;
524 	}
525 
526 #undef retry_required
527 
528 	spin_lock(&vm->xe->ttm.lru_lock);
529 	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
530 	spin_unlock(&vm->xe->ttm.lru_lock);
531 
532 	/* Point of no return. */
533 	arm_preempt_fences(vm, &preempt_fences);
534 	resume_and_reinstall_preempt_fences(vm, &exec);
535 	xe_svm_notifier_unlock(vm);
536 
537 out_unlock:
538 	xe_validation_ctx_fini(&ctx);
539 out_unlock_outer:
540 	if (err == -EAGAIN) {
541 		trace_xe_vm_rebind_worker_retry(vm);
542 		goto retry;
543 	}
544 
545 	if (err) {
546 		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
547 		xe_vm_kill(vm, true);
548 	}
549 	up_write(&vm->lock);
550 
551 	free_preempt_fences(&preempt_fences);
552 
553 	trace_xe_vm_rebind_worker_exit(vm);
554 }
555 
xe_vma_ops_alloc(struct xe_vma_ops * vops,bool array_of_binds)556 static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
557 {
558 	int i;
559 
560 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) {
561 		if (!vops->pt_update_ops[i].num_ops)
562 			continue;
563 
564 		vops->pt_update_ops[i].ops =
565 			kmalloc_array(vops->pt_update_ops[i].num_ops,
566 				      sizeof(*vops->pt_update_ops[i].ops),
567 				      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
568 		if (!vops->pt_update_ops[i].ops)
569 			return array_of_binds ? -ENOBUFS : -ENOMEM;
570 	}
571 
572 	return 0;
573 }
574 ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
575 
xe_vma_svm_prefetch_op_fini(struct xe_vma_op * op)576 static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
577 {
578 	struct xe_vma *vma;
579 
580 	vma = gpuva_to_vma(op->base.prefetch.va);
581 
582 	if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
583 		xa_destroy(&op->prefetch_range.range);
584 }
585 
xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops * vops)586 static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
587 {
588 	struct xe_vma_op *op;
589 
590 	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
591 		return;
592 
593 	list_for_each_entry(op, &vops->list, link)
594 		xe_vma_svm_prefetch_op_fini(op);
595 }
596 
xe_vma_ops_fini(struct xe_vma_ops * vops)597 static void xe_vma_ops_fini(struct xe_vma_ops *vops)
598 {
599 	int i;
600 
601 	xe_vma_svm_prefetch_ops_fini(vops);
602 
603 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
604 		kfree(vops->pt_update_ops[i].ops);
605 }
606 
xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops * vops,u8 tile_mask,int inc_val)607 static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
608 {
609 	int i;
610 
611 	if (!inc_val)
612 		return;
613 
614 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
615 		if (BIT(i) & tile_mask)
616 			vops->pt_update_ops[i].num_ops += inc_val;
617 }
618 
619 #define XE_VMA_CREATE_MASK (		    \
620 	XE_VMA_READ_ONLY |		    \
621 	XE_VMA_DUMPABLE |		    \
622 	XE_VMA_SYSTEM_ALLOCATOR |           \
623 	DRM_GPUVA_SPARSE |		    \
624 	XE_VMA_MADV_AUTORESET)
625 
xe_vm_populate_rebind(struct xe_vma_op * op,struct xe_vma * vma,u8 tile_mask)626 static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
627 				  u8 tile_mask)
628 {
629 	INIT_LIST_HEAD(&op->link);
630 	op->tile_mask = tile_mask;
631 	op->base.op = DRM_GPUVA_OP_MAP;
632 	op->base.map.va.addr = vma->gpuva.va.addr;
633 	op->base.map.va.range = vma->gpuva.va.range;
634 	op->base.map.gem.obj = vma->gpuva.gem.obj;
635 	op->base.map.gem.offset = vma->gpuva.gem.offset;
636 	op->map.vma = vma;
637 	op->map.immediate = true;
638 	op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
639 }
640 
xe_vm_ops_add_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,u8 tile_mask)641 static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
642 				u8 tile_mask)
643 {
644 	struct xe_vma_op *op;
645 
646 	op = kzalloc(sizeof(*op), GFP_KERNEL);
647 	if (!op)
648 		return -ENOMEM;
649 
650 	xe_vm_populate_rebind(op, vma, tile_mask);
651 	list_add_tail(&op->link, &vops->list);
652 	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
653 
654 	return 0;
655 }
656 
657 static struct dma_fence *ops_execute(struct xe_vm *vm,
658 				     struct xe_vma_ops *vops);
659 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
660 			    struct xe_exec_queue *q,
661 			    struct xe_sync_entry *syncs, u32 num_syncs);
662 
xe_vm_rebind(struct xe_vm * vm,bool rebind_worker)663 int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
664 {
665 	struct dma_fence *fence;
666 	struct xe_vma *vma, *next;
667 	struct xe_vma_ops vops;
668 	struct xe_vma_op *op, *next_op;
669 	int err, i;
670 
671 	lockdep_assert_held(&vm->lock);
672 	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
673 	    list_empty(&vm->rebind_list))
674 		return 0;
675 
676 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
677 	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
678 		vops.pt_update_ops[i].wait_vm_bookkeep = true;
679 
680 	xe_vm_assert_held(vm);
681 	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
682 		xe_assert(vm->xe, vma->tile_present);
683 
684 		if (rebind_worker)
685 			trace_xe_vma_rebind_worker(vma);
686 		else
687 			trace_xe_vma_rebind_exec(vma);
688 
689 		err = xe_vm_ops_add_rebind(&vops, vma,
690 					   vma->tile_present);
691 		if (err)
692 			goto free_ops;
693 	}
694 
695 	err = xe_vma_ops_alloc(&vops, false);
696 	if (err)
697 		goto free_ops;
698 
699 	fence = ops_execute(vm, &vops);
700 	if (IS_ERR(fence)) {
701 		err = PTR_ERR(fence);
702 	} else {
703 		dma_fence_put(fence);
704 		list_for_each_entry_safe(vma, next, &vm->rebind_list,
705 					 combined_links.rebind)
706 			list_del_init(&vma->combined_links.rebind);
707 	}
708 free_ops:
709 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
710 		list_del(&op->link);
711 		kfree(op);
712 	}
713 	xe_vma_ops_fini(&vops);
714 
715 	return err;
716 }
717 
xe_vma_rebind(struct xe_vm * vm,struct xe_vma * vma,u8 tile_mask)718 struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask)
719 {
720 	struct dma_fence *fence = NULL;
721 	struct xe_vma_ops vops;
722 	struct xe_vma_op *op, *next_op;
723 	struct xe_tile *tile;
724 	u8 id;
725 	int err;
726 
727 	lockdep_assert_held(&vm->lock);
728 	xe_vm_assert_held(vm);
729 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
730 
731 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
732 	for_each_tile(tile, vm->xe, id) {
733 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
734 		vops.pt_update_ops[tile->id].q =
735 			xe_migrate_exec_queue(tile->migrate);
736 	}
737 
738 	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
739 	if (err)
740 		return ERR_PTR(err);
741 
742 	err = xe_vma_ops_alloc(&vops, false);
743 	if (err) {
744 		fence = ERR_PTR(err);
745 		goto free_ops;
746 	}
747 
748 	fence = ops_execute(vm, &vops);
749 
750 free_ops:
751 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
752 		list_del(&op->link);
753 		kfree(op);
754 	}
755 	xe_vma_ops_fini(&vops);
756 
757 	return fence;
758 }
759 
xe_vm_populate_range_rebind(struct xe_vma_op * op,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)760 static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
761 					struct xe_vma *vma,
762 					struct xe_svm_range *range,
763 					u8 tile_mask)
764 {
765 	INIT_LIST_HEAD(&op->link);
766 	op->tile_mask = tile_mask;
767 	op->base.op = DRM_GPUVA_OP_DRIVER;
768 	op->subop = XE_VMA_SUBOP_MAP_RANGE;
769 	op->map_range.vma = vma;
770 	op->map_range.range = range;
771 }
772 
773 static int
xe_vm_ops_add_range_rebind(struct xe_vma_ops * vops,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)774 xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
775 			   struct xe_vma *vma,
776 			   struct xe_svm_range *range,
777 			   u8 tile_mask)
778 {
779 	struct xe_vma_op *op;
780 
781 	op = kzalloc(sizeof(*op), GFP_KERNEL);
782 	if (!op)
783 		return -ENOMEM;
784 
785 	xe_vm_populate_range_rebind(op, vma, range, tile_mask);
786 	list_add_tail(&op->link, &vops->list);
787 	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
788 
789 	return 0;
790 }
791 
792 /**
793  * xe_vm_range_rebind() - VM range (re)bind
794  * @vm: The VM which the range belongs to.
795  * @vma: The VMA which the range belongs to.
796  * @range: SVM range to rebind.
797  * @tile_mask: Tile mask to bind the range to.
798  *
799  * (re)bind SVM range setting up GPU page tables for the range.
800  *
801  * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
802  * failure
803  */
xe_vm_range_rebind(struct xe_vm * vm,struct xe_vma * vma,struct xe_svm_range * range,u8 tile_mask)804 struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
805 				     struct xe_vma *vma,
806 				     struct xe_svm_range *range,
807 				     u8 tile_mask)
808 {
809 	struct dma_fence *fence = NULL;
810 	struct xe_vma_ops vops;
811 	struct xe_vma_op *op, *next_op;
812 	struct xe_tile *tile;
813 	u8 id;
814 	int err;
815 
816 	lockdep_assert_held(&vm->lock);
817 	xe_vm_assert_held(vm);
818 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
819 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
820 
821 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
822 	for_each_tile(tile, vm->xe, id) {
823 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
824 		vops.pt_update_ops[tile->id].q =
825 			xe_migrate_exec_queue(tile->migrate);
826 	}
827 
828 	err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
829 	if (err)
830 		return ERR_PTR(err);
831 
832 	err = xe_vma_ops_alloc(&vops, false);
833 	if (err) {
834 		fence = ERR_PTR(err);
835 		goto free_ops;
836 	}
837 
838 	fence = ops_execute(vm, &vops);
839 
840 free_ops:
841 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
842 		list_del(&op->link);
843 		kfree(op);
844 	}
845 	xe_vma_ops_fini(&vops);
846 
847 	return fence;
848 }
849 
xe_vm_populate_range_unbind(struct xe_vma_op * op,struct xe_svm_range * range)850 static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
851 					struct xe_svm_range *range)
852 {
853 	INIT_LIST_HEAD(&op->link);
854 	op->tile_mask = range->tile_present;
855 	op->base.op = DRM_GPUVA_OP_DRIVER;
856 	op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
857 	op->unmap_range.range = range;
858 }
859 
860 static int
xe_vm_ops_add_range_unbind(struct xe_vma_ops * vops,struct xe_svm_range * range)861 xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
862 			   struct xe_svm_range *range)
863 {
864 	struct xe_vma_op *op;
865 
866 	op = kzalloc(sizeof(*op), GFP_KERNEL);
867 	if (!op)
868 		return -ENOMEM;
869 
870 	xe_vm_populate_range_unbind(op, range);
871 	list_add_tail(&op->link, &vops->list);
872 	xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
873 
874 	return 0;
875 }
876 
877 /**
878  * xe_vm_range_unbind() - VM range unbind
879  * @vm: The VM which the range belongs to.
880  * @range: SVM range to rebind.
881  *
882  * Unbind SVM range removing the GPU page tables for the range.
883  *
884  * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
885  * failure
886  */
xe_vm_range_unbind(struct xe_vm * vm,struct xe_svm_range * range)887 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
888 				     struct xe_svm_range *range)
889 {
890 	struct dma_fence *fence = NULL;
891 	struct xe_vma_ops vops;
892 	struct xe_vma_op *op, *next_op;
893 	struct xe_tile *tile;
894 	u8 id;
895 	int err;
896 
897 	lockdep_assert_held(&vm->lock);
898 	xe_vm_assert_held(vm);
899 	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
900 
901 	if (!range->tile_present)
902 		return dma_fence_get_stub();
903 
904 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
905 	for_each_tile(tile, vm->xe, id) {
906 		vops.pt_update_ops[id].wait_vm_bookkeep = true;
907 		vops.pt_update_ops[tile->id].q =
908 			xe_migrate_exec_queue(tile->migrate);
909 	}
910 
911 	err = xe_vm_ops_add_range_unbind(&vops, range);
912 	if (err)
913 		return ERR_PTR(err);
914 
915 	err = xe_vma_ops_alloc(&vops, false);
916 	if (err) {
917 		fence = ERR_PTR(err);
918 		goto free_ops;
919 	}
920 
921 	fence = ops_execute(vm, &vops);
922 
923 free_ops:
924 	list_for_each_entry_safe(op, next_op, &vops.list, link) {
925 		list_del(&op->link);
926 		kfree(op);
927 	}
928 	xe_vma_ops_fini(&vops);
929 
930 	return fence;
931 }
932 
xe_vma_free(struct xe_vma * vma)933 static void xe_vma_free(struct xe_vma *vma)
934 {
935 	if (xe_vma_is_userptr(vma))
936 		kfree(to_userptr_vma(vma));
937 	else
938 		kfree(vma);
939 }
940 
xe_vma_create(struct xe_vm * vm,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 start,u64 end,struct xe_vma_mem_attr * attr,unsigned int flags)941 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
942 				    struct xe_bo *bo,
943 				    u64 bo_offset_or_userptr,
944 				    u64 start, u64 end,
945 				    struct xe_vma_mem_attr *attr,
946 				    unsigned int flags)
947 {
948 	struct xe_vma *vma;
949 	struct xe_tile *tile;
950 	u8 id;
951 	bool is_null = (flags & DRM_GPUVA_SPARSE);
952 	bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
953 
954 	xe_assert(vm->xe, start < end);
955 	xe_assert(vm->xe, end < vm->size);
956 
957 	/*
958 	 * Allocate and ensure that the xe_vma_is_userptr() return
959 	 * matches what was allocated.
960 	 */
961 	if (!bo && !is_null && !is_cpu_addr_mirror) {
962 		struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL);
963 
964 		if (!uvma)
965 			return ERR_PTR(-ENOMEM);
966 
967 		vma = &uvma->vma;
968 	} else {
969 		vma = kzalloc(sizeof(*vma), GFP_KERNEL);
970 		if (!vma)
971 			return ERR_PTR(-ENOMEM);
972 
973 		if (bo)
974 			vma->gpuva.gem.obj = &bo->ttm.base;
975 	}
976 
977 	INIT_LIST_HEAD(&vma->combined_links.rebind);
978 
979 	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
980 	vma->gpuva.vm = &vm->gpuvm;
981 	vma->gpuva.va.addr = start;
982 	vma->gpuva.va.range = end - start + 1;
983 	vma->gpuva.flags = flags;
984 
985 	for_each_tile(tile, vm->xe, id)
986 		vma->tile_mask |= 0x1 << id;
987 
988 	if (vm->xe->info.has_atomic_enable_pte_bit)
989 		vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
990 
991 	vma->attr = *attr;
992 
993 	if (bo) {
994 		struct drm_gpuvm_bo *vm_bo;
995 
996 		xe_bo_assert_held(bo);
997 
998 		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
999 		if (IS_ERR(vm_bo)) {
1000 			xe_vma_free(vma);
1001 			return ERR_CAST(vm_bo);
1002 		}
1003 
1004 		drm_gpuvm_bo_extobj_add(vm_bo);
1005 		drm_gem_object_get(&bo->ttm.base);
1006 		vma->gpuva.gem.offset = bo_offset_or_userptr;
1007 		drm_gpuva_link(&vma->gpuva, vm_bo);
1008 		drm_gpuvm_bo_put(vm_bo);
1009 	} else /* userptr or null */ {
1010 		if (!is_null && !is_cpu_addr_mirror) {
1011 			struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1012 			u64 size = end - start + 1;
1013 			int err;
1014 
1015 			vma->gpuva.gem.offset = bo_offset_or_userptr;
1016 
1017 			err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
1018 			if (err) {
1019 				xe_vma_free(vma);
1020 				return ERR_PTR(err);
1021 			}
1022 		}
1023 
1024 		xe_vm_get(vm);
1025 	}
1026 
1027 	return vma;
1028 }
1029 
xe_vma_destroy_late(struct xe_vma * vma)1030 static void xe_vma_destroy_late(struct xe_vma *vma)
1031 {
1032 	struct xe_vm *vm = xe_vma_vm(vma);
1033 
1034 	if (vma->ufence) {
1035 		xe_sync_ufence_put(vma->ufence);
1036 		vma->ufence = NULL;
1037 	}
1038 
1039 	if (xe_vma_is_userptr(vma)) {
1040 		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1041 
1042 		xe_userptr_remove(uvma);
1043 		xe_vm_put(vm);
1044 	} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
1045 		xe_vm_put(vm);
1046 	} else {
1047 		xe_bo_put(xe_vma_bo(vma));
1048 	}
1049 
1050 	xe_vma_free(vma);
1051 }
1052 
vma_destroy_work_func(struct work_struct * w)1053 static void vma_destroy_work_func(struct work_struct *w)
1054 {
1055 	struct xe_vma *vma =
1056 		container_of(w, struct xe_vma, destroy_work);
1057 
1058 	xe_vma_destroy_late(vma);
1059 }
1060 
vma_destroy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1061 static void vma_destroy_cb(struct dma_fence *fence,
1062 			   struct dma_fence_cb *cb)
1063 {
1064 	struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb);
1065 
1066 	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1067 	queue_work(system_unbound_wq, &vma->destroy_work);
1068 }
1069 
xe_vma_destroy(struct xe_vma * vma,struct dma_fence * fence)1070 static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
1071 {
1072 	struct xe_vm *vm = xe_vma_vm(vma);
1073 
1074 	lockdep_assert_held_write(&vm->lock);
1075 	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1076 
1077 	if (xe_vma_is_userptr(vma)) {
1078 		xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1079 		xe_userptr_destroy(to_userptr_vma(vma));
1080 	} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1081 		xe_bo_assert_held(xe_vma_bo(vma));
1082 
1083 		drm_gpuva_unlink(&vma->gpuva);
1084 	}
1085 
1086 	xe_vm_assert_held(vm);
1087 	if (fence) {
1088 		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
1089 						 vma_destroy_cb);
1090 
1091 		if (ret) {
1092 			XE_WARN_ON(ret != -ENOENT);
1093 			xe_vma_destroy_late(vma);
1094 		}
1095 	} else {
1096 		xe_vma_destroy_late(vma);
1097 	}
1098 }
1099 
1100 /**
1101  * xe_vm_lock_vma() - drm_exec utility to lock a vma
1102  * @exec: The drm_exec object we're currently locking for.
1103  * @vma: The vma for witch we want to lock the vm resv and any attached
1104  * object's resv.
1105  *
1106  * Return: 0 on success, negative error code on error. In particular
1107  * may return -EDEADLK on WW transaction contention and -EINTR if
1108  * an interruptible wait is terminated by a signal.
1109  */
xe_vm_lock_vma(struct drm_exec * exec,struct xe_vma * vma)1110 int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
1111 {
1112 	struct xe_vm *vm = xe_vma_vm(vma);
1113 	struct xe_bo *bo = xe_vma_bo(vma);
1114 	int err;
1115 
1116 	XE_WARN_ON(!vm);
1117 
1118 	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
1119 	if (!err && bo && !bo->vm)
1120 		err = drm_exec_lock_obj(exec, &bo->ttm.base);
1121 
1122 	return err;
1123 }
1124 
xe_vma_destroy_unlocked(struct xe_vma * vma)1125 static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1126 {
1127 	struct xe_device *xe = xe_vma_vm(vma)->xe;
1128 	struct xe_validation_ctx ctx;
1129 	struct drm_exec exec;
1130 	int err = 0;
1131 
1132 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1133 		err = xe_vm_lock_vma(&exec, vma);
1134 		drm_exec_retry_on_contention(&exec);
1135 		if (XE_WARN_ON(err))
1136 			break;
1137 		xe_vma_destroy(vma, NULL);
1138 	}
1139 	xe_assert(xe, !err);
1140 }
1141 
1142 struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm * vm,u64 start,u64 range)1143 xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1144 {
1145 	struct drm_gpuva *gpuva;
1146 
1147 	lockdep_assert_held(&vm->lock);
1148 
1149 	if (xe_vm_is_closed_or_banned(vm))
1150 		return NULL;
1151 
1152 	xe_assert(vm->xe, start + range <= vm->size);
1153 
1154 	gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range);
1155 
1156 	return gpuva ? gpuva_to_vma(gpuva) : NULL;
1157 }
1158 
xe_vm_insert_vma(struct xe_vm * vm,struct xe_vma * vma)1159 static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
1160 {
1161 	int err;
1162 
1163 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1164 	lockdep_assert_held(&vm->lock);
1165 
1166 	mutex_lock(&vm->snap_mutex);
1167 	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
1168 	mutex_unlock(&vm->snap_mutex);
1169 	XE_WARN_ON(err);	/* Shouldn't be possible */
1170 
1171 	return err;
1172 }
1173 
xe_vm_remove_vma(struct xe_vm * vm,struct xe_vma * vma)1174 static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
1175 {
1176 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1177 	lockdep_assert_held(&vm->lock);
1178 
1179 	mutex_lock(&vm->snap_mutex);
1180 	drm_gpuva_remove(&vma->gpuva);
1181 	mutex_unlock(&vm->snap_mutex);
1182 	if (vm->usm.last_fault_vma == vma)
1183 		vm->usm.last_fault_vma = NULL;
1184 }
1185 
xe_vm_op_alloc(void)1186 static struct drm_gpuva_op *xe_vm_op_alloc(void)
1187 {
1188 	struct xe_vma_op *op;
1189 
1190 	op = kzalloc(sizeof(*op), GFP_KERNEL);
1191 
1192 	if (unlikely(!op))
1193 		return NULL;
1194 
1195 	return &op->base;
1196 }
1197 
1198 static void xe_vm_free(struct drm_gpuvm *gpuvm);
1199 
1200 static const struct drm_gpuvm_ops gpuvm_ops = {
1201 	.op_alloc = xe_vm_op_alloc,
1202 	.vm_bo_validate = xe_gpuvm_validate,
1203 	.vm_free = xe_vm_free,
1204 };
1205 
pde_encode_pat_index(u16 pat_index)1206 static u64 pde_encode_pat_index(u16 pat_index)
1207 {
1208 	u64 pte = 0;
1209 
1210 	if (pat_index & BIT(0))
1211 		pte |= XE_PPGTT_PTE_PAT0;
1212 
1213 	if (pat_index & BIT(1))
1214 		pte |= XE_PPGTT_PTE_PAT1;
1215 
1216 	return pte;
1217 }
1218 
pte_encode_pat_index(u16 pat_index,u32 pt_level)1219 static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1220 {
1221 	u64 pte = 0;
1222 
1223 	if (pat_index & BIT(0))
1224 		pte |= XE_PPGTT_PTE_PAT0;
1225 
1226 	if (pat_index & BIT(1))
1227 		pte |= XE_PPGTT_PTE_PAT1;
1228 
1229 	if (pat_index & BIT(2)) {
1230 		if (pt_level)
1231 			pte |= XE_PPGTT_PDE_PDPE_PAT2;
1232 		else
1233 			pte |= XE_PPGTT_PTE_PAT2;
1234 	}
1235 
1236 	if (pat_index & BIT(3))
1237 		pte |= XELPG_PPGTT_PTE_PAT3;
1238 
1239 	if (pat_index & (BIT(4)))
1240 		pte |= XE2_PPGTT_PTE_PAT4;
1241 
1242 	return pte;
1243 }
1244 
pte_encode_ps(u32 pt_level)1245 static u64 pte_encode_ps(u32 pt_level)
1246 {
1247 	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1248 
1249 	if (pt_level == 1)
1250 		return XE_PDE_PS_2M;
1251 	else if (pt_level == 2)
1252 		return XE_PDPE_PS_1G;
1253 
1254 	return 0;
1255 }
1256 
pde_pat_index(struct xe_bo * bo)1257 static u16 pde_pat_index(struct xe_bo *bo)
1258 {
1259 	struct xe_device *xe = xe_bo_device(bo);
1260 	u16 pat_index;
1261 
1262 	/*
1263 	 * We only have two bits to encode the PAT index in non-leaf nodes, but
1264 	 * these only point to other paging structures so we only need a minimal
1265 	 * selection of options. The user PAT index is only for encoding leaf
1266 	 * nodes, where we have use of more bits to do the encoding. The
1267 	 * non-leaf nodes are instead under driver control so the chosen index
1268 	 * here should be distict from the user PAT index. Also the
1269 	 * corresponding coherency of the PAT index should be tied to the
1270 	 * allocation type of the page table (or at least we should pick
1271 	 * something which is always safe).
1272 	 */
1273 	if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1274 		pat_index = xe->pat.idx[XE_CACHE_WB];
1275 	else
1276 		pat_index = xe->pat.idx[XE_CACHE_NONE];
1277 
1278 	xe_assert(xe, pat_index <= 3);
1279 
1280 	return pat_index;
1281 }
1282 
xelp_pde_encode_bo(struct xe_bo * bo,u64 bo_offset)1283 static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1284 {
1285 	u64 pde;
1286 
1287 	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1288 	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
1289 	pde |= pde_encode_pat_index(pde_pat_index(bo));
1290 
1291 	return pde;
1292 }
1293 
xelp_pte_encode_bo(struct xe_bo * bo,u64 bo_offset,u16 pat_index,u32 pt_level)1294 static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1295 			      u16 pat_index, u32 pt_level)
1296 {
1297 	u64 pte;
1298 
1299 	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
1300 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1301 	pte |= pte_encode_pat_index(pat_index, pt_level);
1302 	pte |= pte_encode_ps(pt_level);
1303 
1304 	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
1305 		pte |= XE_PPGTT_PTE_DM;
1306 
1307 	return pte;
1308 }
1309 
xelp_pte_encode_vma(u64 pte,struct xe_vma * vma,u16 pat_index,u32 pt_level)1310 static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1311 			       u16 pat_index, u32 pt_level)
1312 {
1313 	pte |= XE_PAGE_PRESENT;
1314 
1315 	if (likely(!xe_vma_read_only(vma)))
1316 		pte |= XE_PAGE_RW;
1317 
1318 	pte |= pte_encode_pat_index(pat_index, pt_level);
1319 	pte |= pte_encode_ps(pt_level);
1320 
1321 	if (unlikely(xe_vma_is_null(vma)))
1322 		pte |= XE_PTE_NULL;
1323 
1324 	return pte;
1325 }
1326 
xelp_pte_encode_addr(struct xe_device * xe,u64 addr,u16 pat_index,u32 pt_level,bool devmem,u64 flags)1327 static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1328 				u16 pat_index,
1329 				u32 pt_level, bool devmem, u64 flags)
1330 {
1331 	u64 pte;
1332 
1333 	/* Avoid passing random bits directly as flags */
1334 	xe_assert(xe, !(flags & ~XE_PTE_PS64));
1335 
1336 	pte = addr;
1337 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
1338 	pte |= pte_encode_pat_index(pat_index, pt_level);
1339 	pte |= pte_encode_ps(pt_level);
1340 
1341 	if (devmem)
1342 		pte |= XE_PPGTT_PTE_DM;
1343 
1344 	pte |= flags;
1345 
1346 	return pte;
1347 }
1348 
1349 static const struct xe_pt_ops xelp_pt_ops = {
1350 	.pte_encode_bo = xelp_pte_encode_bo,
1351 	.pte_encode_vma = xelp_pte_encode_vma,
1352 	.pte_encode_addr = xelp_pte_encode_addr,
1353 	.pde_encode_bo = xelp_pde_encode_bo,
1354 };
1355 
1356 static void vm_destroy_work_func(struct work_struct *w);
1357 
1358 /**
1359  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1360  * given tile and vm.
1361  * @xe: xe device.
1362  * @tile: tile to set up for.
1363  * @vm: vm to set up for.
1364  * @exec: The struct drm_exec object used to lock the vm resv.
1365  *
1366  * Sets up a pagetable tree with one page-table per level and a single
1367  * leaf PTE. All pagetable entries point to the single page-table or,
1368  * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1369  * writes become NOPs.
1370  *
1371  * Return: 0 on success, negative error code on error.
1372  */
xe_vm_create_scratch(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,struct drm_exec * exec)1373 static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
1374 				struct xe_vm *vm, struct drm_exec *exec)
1375 {
1376 	u8 id = tile->id;
1377 	int i;
1378 
1379 	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1380 		vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
1381 		if (IS_ERR(vm->scratch_pt[id][i])) {
1382 			int err = PTR_ERR(vm->scratch_pt[id][i]);
1383 
1384 			vm->scratch_pt[id][i] = NULL;
1385 			return err;
1386 		}
1387 		xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
1388 	}
1389 
1390 	return 0;
1391 }
1392 ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1393 
xe_vm_free_scratch(struct xe_vm * vm)1394 static void xe_vm_free_scratch(struct xe_vm *vm)
1395 {
1396 	struct xe_tile *tile;
1397 	u8 id;
1398 
1399 	if (!xe_vm_has_scratch(vm))
1400 		return;
1401 
1402 	for_each_tile(tile, vm->xe, id) {
1403 		u32 i;
1404 
1405 		if (!vm->pt_root[id])
1406 			continue;
1407 
1408 		for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1409 			if (vm->scratch_pt[id][i])
1410 				xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL);
1411 	}
1412 }
1413 
xe_vm_pt_destroy(struct xe_vm * vm)1414 static void xe_vm_pt_destroy(struct xe_vm *vm)
1415 {
1416 	struct xe_tile *tile;
1417 	u8 id;
1418 
1419 	xe_vm_assert_held(vm);
1420 
1421 	for_each_tile(tile, vm->xe, id) {
1422 		if (vm->pt_root[id]) {
1423 			xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
1424 			vm->pt_root[id] = NULL;
1425 		}
1426 	}
1427 }
1428 
xe_vm_create(struct xe_device * xe,u32 flags,struct xe_file * xef)1429 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
1430 {
1431 	struct drm_gem_object *vm_resv_obj;
1432 	struct xe_validation_ctx ctx;
1433 	struct drm_exec exec;
1434 	struct xe_vm *vm;
1435 	int err, number_tiles = 0;
1436 	struct xe_tile *tile;
1437 	u8 id;
1438 
1439 	/*
1440 	 * Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1441 	 * ever be in faulting mode.
1442 	 */
1443 	xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1444 
1445 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1446 	if (!vm)
1447 		return ERR_PTR(-ENOMEM);
1448 
1449 	vm->xe = xe;
1450 
1451 	vm->size = 1ull << xe->info.va_bits;
1452 	vm->flags = flags;
1453 
1454 	if (xef)
1455 		vm->xef = xe_file_get(xef);
1456 	/**
1457 	 * GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1458 	 * manipulated under the PXP mutex. However, the PXP mutex can be taken
1459 	 * under a user-VM lock when the PXP session is started at exec_queue
1460 	 * creation time. Those are different VMs and therefore there is no risk
1461 	 * of deadlock, but we need to tell lockdep that this is the case or it
1462 	 * will print a warning.
1463 	 */
1464 	if (flags & XE_VM_FLAG_GSC) {
1465 		static struct lock_class_key gsc_vm_key;
1466 
1467 		__init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
1468 	} else {
1469 		init_rwsem(&vm->lock);
1470 	}
1471 	mutex_init(&vm->snap_mutex);
1472 
1473 	INIT_LIST_HEAD(&vm->rebind_list);
1474 
1475 	INIT_LIST_HEAD(&vm->userptr.repin_list);
1476 	INIT_LIST_HEAD(&vm->userptr.invalidated);
1477 	spin_lock_init(&vm->userptr.invalidated_lock);
1478 
1479 	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
1480 
1481 	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1482 
1483 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
1484 	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
1485 
1486 	for_each_tile(tile, xe, id)
1487 		xe_range_fence_tree_init(&vm->rftree[id]);
1488 
1489 	vm->pt_ops = &xelp_pt_ops;
1490 
1491 	/*
1492 	 * Long-running workloads are not protected by the scheduler references.
1493 	 * By design, run_job for long-running workloads returns NULL and the
1494 	 * scheduler drops all the references of it, hence protecting the VM
1495 	 * for this case is necessary.
1496 	 */
1497 	if (flags & XE_VM_FLAG_LR_MODE) {
1498 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1499 		xe_pm_runtime_get_noresume(xe);
1500 		INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
1501 	}
1502 
1503 	err = xe_svm_init(vm);
1504 	if (err)
1505 		goto err_no_resv;
1506 
1507 	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
1508 	if (!vm_resv_obj) {
1509 		err = -ENOMEM;
1510 		goto err_svm_fini;
1511 	}
1512 
1513 	drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
1514 		       vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops);
1515 
1516 	drm_gem_object_put(vm_resv_obj);
1517 
1518 	err = 0;
1519 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1520 			    err) {
1521 		err = xe_vm_drm_exec_lock(vm, &exec);
1522 		drm_exec_retry_on_contention(&exec);
1523 
1524 		if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1525 			vm->flags |= XE_VM_FLAG_64K;
1526 
1527 		for_each_tile(tile, xe, id) {
1528 			if (flags & XE_VM_FLAG_MIGRATION &&
1529 			    tile->id != XE_VM_FLAG_TILE_ID(flags))
1530 				continue;
1531 
1532 			vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
1533 						       &exec);
1534 			if (IS_ERR(vm->pt_root[id])) {
1535 				err = PTR_ERR(vm->pt_root[id]);
1536 				vm->pt_root[id] = NULL;
1537 				xe_vm_pt_destroy(vm);
1538 				drm_exec_retry_on_contention(&exec);
1539 				xe_validation_retry_on_oom(&ctx, &err);
1540 				break;
1541 			}
1542 		}
1543 		if (err)
1544 			break;
1545 
1546 		if (xe_vm_has_scratch(vm)) {
1547 			for_each_tile(tile, xe, id) {
1548 				if (!vm->pt_root[id])
1549 					continue;
1550 
1551 				err = xe_vm_create_scratch(xe, tile, vm, &exec);
1552 				if (err) {
1553 					xe_vm_free_scratch(vm);
1554 					xe_vm_pt_destroy(vm);
1555 					drm_exec_retry_on_contention(&exec);
1556 					xe_validation_retry_on_oom(&ctx, &err);
1557 					break;
1558 				}
1559 			}
1560 			if (err)
1561 				break;
1562 			vm->batch_invalidate_tlb = true;
1563 		}
1564 
1565 		if (vm->flags & XE_VM_FLAG_LR_MODE) {
1566 			INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1567 			vm->batch_invalidate_tlb = false;
1568 		}
1569 
1570 		/* Fill pt_root after allocating scratch tables */
1571 		for_each_tile(tile, xe, id) {
1572 			if (!vm->pt_root[id])
1573 				continue;
1574 
1575 			xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
1576 		}
1577 	}
1578 	if (err)
1579 		goto err_close;
1580 
1581 	/* Kernel migration VM shouldn't have a circular loop.. */
1582 	if (!(flags & XE_VM_FLAG_MIGRATION)) {
1583 		for_each_tile(tile, xe, id) {
1584 			struct xe_exec_queue *q;
1585 			u32 create_flags = EXEC_QUEUE_FLAG_VM;
1586 
1587 			if (!vm->pt_root[id])
1588 				continue;
1589 
1590 			q = xe_exec_queue_create_bind(xe, tile, create_flags, 0);
1591 			if (IS_ERR(q)) {
1592 				err = PTR_ERR(q);
1593 				goto err_close;
1594 			}
1595 			vm->q[id] = q;
1596 			number_tiles++;
1597 		}
1598 	}
1599 
1600 	if (number_tiles > 1)
1601 		vm->composite_fence_ctx = dma_fence_context_alloc(1);
1602 
1603 	if (xef && xe->info.has_asid) {
1604 		u32 asid;
1605 
1606 		down_write(&xe->usm.lock);
1607 		err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
1608 				      XA_LIMIT(1, XE_MAX_ASID - 1),
1609 				      &xe->usm.next_asid, GFP_KERNEL);
1610 		up_write(&xe->usm.lock);
1611 		if (err < 0)
1612 			goto err_close;
1613 
1614 		vm->usm.asid = asid;
1615 	}
1616 
1617 	trace_xe_vm_create(vm);
1618 
1619 	return vm;
1620 
1621 err_close:
1622 	xe_vm_close_and_put(vm);
1623 	return ERR_PTR(err);
1624 
1625 err_svm_fini:
1626 	if (flags & XE_VM_FLAG_FAULT_MODE) {
1627 		vm->size = 0; /* close the vm */
1628 		xe_svm_fini(vm);
1629 	}
1630 err_no_resv:
1631 	mutex_destroy(&vm->snap_mutex);
1632 	for_each_tile(tile, xe, id)
1633 		xe_range_fence_tree_fini(&vm->rftree[id]);
1634 	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1635 	if (vm->xef)
1636 		xe_file_put(vm->xef);
1637 	kfree(vm);
1638 	if (flags & XE_VM_FLAG_LR_MODE)
1639 		xe_pm_runtime_put(xe);
1640 	return ERR_PTR(err);
1641 }
1642 
xe_vm_close(struct xe_vm * vm)1643 static void xe_vm_close(struct xe_vm *vm)
1644 {
1645 	struct xe_device *xe = vm->xe;
1646 	bool bound;
1647 	int idx;
1648 
1649 	bound = drm_dev_enter(&xe->drm, &idx);
1650 
1651 	down_write(&vm->lock);
1652 	if (xe_vm_in_fault_mode(vm))
1653 		xe_svm_notifier_lock(vm);
1654 
1655 	vm->size = 0;
1656 
1657 	if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1658 		struct xe_tile *tile;
1659 		struct xe_gt *gt;
1660 		u8 id;
1661 
1662 		/* Wait for pending binds */
1663 		dma_resv_wait_timeout(xe_vm_resv(vm),
1664 				      DMA_RESV_USAGE_BOOKKEEP,
1665 				      false, MAX_SCHEDULE_TIMEOUT);
1666 
1667 		if (bound) {
1668 			for_each_tile(tile, xe, id)
1669 				if (vm->pt_root[id])
1670 					xe_pt_clear(xe, vm->pt_root[id]);
1671 
1672 			for_each_gt(gt, xe, id)
1673 				xe_tlb_inval_vm(&gt->tlb_inval, vm);
1674 		}
1675 	}
1676 
1677 	if (xe_vm_in_fault_mode(vm))
1678 		xe_svm_notifier_unlock(vm);
1679 	up_write(&vm->lock);
1680 
1681 	if (bound)
1682 		drm_dev_exit(idx);
1683 }
1684 
xe_vm_close_and_put(struct xe_vm * vm)1685 void xe_vm_close_and_put(struct xe_vm *vm)
1686 {
1687 	LIST_HEAD(contested);
1688 	struct xe_device *xe = vm->xe;
1689 	struct xe_tile *tile;
1690 	struct xe_vma *vma, *next_vma;
1691 	struct drm_gpuva *gpuva, *next;
1692 	u8 id;
1693 
1694 	xe_assert(xe, !vm->preempt.num_exec_queues);
1695 
1696 	xe_vm_close(vm);
1697 	if (xe_vm_in_preempt_fence_mode(vm)) {
1698 		mutex_lock(&xe->rebind_resume_lock);
1699 		list_del_init(&vm->preempt.pm_activate_link);
1700 		mutex_unlock(&xe->rebind_resume_lock);
1701 		flush_work(&vm->preempt.rebind_work);
1702 	}
1703 	if (xe_vm_in_fault_mode(vm))
1704 		xe_svm_close(vm);
1705 
1706 	down_write(&vm->lock);
1707 	for_each_tile(tile, xe, id) {
1708 		if (vm->q[id])
1709 			xe_exec_queue_last_fence_put(vm->q[id], vm);
1710 	}
1711 	up_write(&vm->lock);
1712 
1713 	for_each_tile(tile, xe, id) {
1714 		if (vm->q[id]) {
1715 			xe_exec_queue_kill(vm->q[id]);
1716 			xe_exec_queue_put(vm->q[id]);
1717 			vm->q[id] = NULL;
1718 		}
1719 	}
1720 
1721 	down_write(&vm->lock);
1722 	xe_vm_lock(vm, false);
1723 	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1724 		vma = gpuva_to_vma(gpuva);
1725 
1726 		if (xe_vma_has_no_bo(vma)) {
1727 			xe_svm_notifier_lock(vm);
1728 			vma->gpuva.flags |= XE_VMA_DESTROYED;
1729 			xe_svm_notifier_unlock(vm);
1730 		}
1731 
1732 		xe_vm_remove_vma(vm, vma);
1733 
1734 		/* easy case, remove from VMA? */
1735 		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
1736 			list_del_init(&vma->combined_links.rebind);
1737 			xe_vma_destroy(vma, NULL);
1738 			continue;
1739 		}
1740 
1741 		list_move_tail(&vma->combined_links.destroy, &contested);
1742 		vma->gpuva.flags |= XE_VMA_DESTROYED;
1743 	}
1744 
1745 	/*
1746 	 * All vm operations will add shared fences to resv.
1747 	 * The only exception is eviction for a shared object,
1748 	 * but even so, the unbind when evicted would still
1749 	 * install a fence to resv. Hence it's safe to
1750 	 * destroy the pagetables immediately.
1751 	 */
1752 	xe_vm_free_scratch(vm);
1753 	xe_vm_pt_destroy(vm);
1754 	xe_vm_unlock(vm);
1755 
1756 	/*
1757 	 * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1758 	 * Since we hold a refcount to the bo, we can remove and free
1759 	 * the members safely without locking.
1760 	 */
1761 	list_for_each_entry_safe(vma, next_vma, &contested,
1762 				 combined_links.destroy) {
1763 		list_del_init(&vma->combined_links.destroy);
1764 		xe_vma_destroy_unlocked(vma);
1765 	}
1766 
1767 	xe_svm_fini(vm);
1768 
1769 	up_write(&vm->lock);
1770 
1771 	down_write(&xe->usm.lock);
1772 	if (vm->usm.asid) {
1773 		void *lookup;
1774 
1775 		xe_assert(xe, xe->info.has_asid);
1776 		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1777 
1778 		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
1779 		xe_assert(xe, lookup == vm);
1780 	}
1781 	up_write(&xe->usm.lock);
1782 
1783 	for_each_tile(tile, xe, id)
1784 		xe_range_fence_tree_fini(&vm->rftree[id]);
1785 
1786 	xe_vm_put(vm);
1787 }
1788 
vm_destroy_work_func(struct work_struct * w)1789 static void vm_destroy_work_func(struct work_struct *w)
1790 {
1791 	struct xe_vm *vm =
1792 		container_of(w, struct xe_vm, destroy_work);
1793 	struct xe_device *xe = vm->xe;
1794 	struct xe_tile *tile;
1795 	u8 id;
1796 
1797 	/* xe_vm_close_and_put was not called? */
1798 	xe_assert(xe, !vm->size);
1799 
1800 	if (xe_vm_in_preempt_fence_mode(vm))
1801 		flush_work(&vm->preempt.rebind_work);
1802 
1803 	mutex_destroy(&vm->snap_mutex);
1804 
1805 	if (vm->flags & XE_VM_FLAG_LR_MODE)
1806 		xe_pm_runtime_put(xe);
1807 
1808 	for_each_tile(tile, xe, id)
1809 		XE_WARN_ON(vm->pt_root[id]);
1810 
1811 	trace_xe_vm_free(vm);
1812 
1813 	ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
1814 
1815 	if (vm->xef)
1816 		xe_file_put(vm->xef);
1817 
1818 	kfree(vm);
1819 }
1820 
xe_vm_free(struct drm_gpuvm * gpuvm)1821 static void xe_vm_free(struct drm_gpuvm *gpuvm)
1822 {
1823 	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
1824 
1825 	/* To destroy the VM we need to be able to sleep */
1826 	queue_work(system_unbound_wq, &vm->destroy_work);
1827 }
1828 
xe_vm_lookup(struct xe_file * xef,u32 id)1829 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
1830 {
1831 	struct xe_vm *vm;
1832 
1833 	mutex_lock(&xef->vm.lock);
1834 	vm = xa_load(&xef->vm.xa, id);
1835 	if (vm)
1836 		xe_vm_get(vm);
1837 	mutex_unlock(&xef->vm.lock);
1838 
1839 	return vm;
1840 }
1841 
xe_vm_pdp4_descriptor(struct xe_vm * vm,struct xe_tile * tile)1842 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
1843 {
1844 	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
1845 }
1846 
1847 static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm * vm,struct xe_exec_queue * q)1848 to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
1849 {
1850 	return q ? q : vm->q[0];
1851 }
1852 
1853 static struct xe_user_fence *
find_ufence_get(struct xe_sync_entry * syncs,u32 num_syncs)1854 find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1855 {
1856 	unsigned int i;
1857 
1858 	for (i = 0; i < num_syncs; i++) {
1859 		struct xe_sync_entry *e = &syncs[i];
1860 
1861 		if (xe_sync_is_ufence(e))
1862 			return xe_sync_ufence_get(e);
1863 	}
1864 
1865 	return NULL;
1866 }
1867 
1868 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
1869 				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
1870 				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1871 
xe_vm_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1872 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
1873 		       struct drm_file *file)
1874 {
1875 	struct xe_device *xe = to_xe_device(dev);
1876 	struct xe_file *xef = to_xe_file(file);
1877 	struct drm_xe_vm_create *args = data;
1878 	struct xe_vm *vm;
1879 	u32 id;
1880 	int err;
1881 	u32 flags = 0;
1882 
1883 	if (XE_IOCTL_DBG(xe, args->extensions))
1884 		return -EINVAL;
1885 
1886 	if (XE_GT_WA(xe_root_mmio_gt(xe), 14016763929))
1887 		args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1888 
1889 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1890 			 !xe->info.has_usm))
1891 		return -EINVAL;
1892 
1893 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1894 		return -EINVAL;
1895 
1896 	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1897 		return -EINVAL;
1898 
1899 	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1900 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1901 			 !xe->info.needs_scratch))
1902 		return -EINVAL;
1903 
1904 	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1905 			 args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1906 		return -EINVAL;
1907 
1908 	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1909 		flags |= XE_VM_FLAG_SCRATCH_PAGE;
1910 	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1911 		flags |= XE_VM_FLAG_LR_MODE;
1912 	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1913 		flags |= XE_VM_FLAG_FAULT_MODE;
1914 
1915 	vm = xe_vm_create(xe, flags, xef);
1916 	if (IS_ERR(vm))
1917 		return PTR_ERR(vm);
1918 
1919 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1920 	/* Warning: Security issue - never enable by default */
1921 	args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
1922 #endif
1923 
1924 	/* user id alloc must always be last in ioctl to prevent UAF */
1925 	err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL);
1926 	if (err)
1927 		goto err_close_and_put;
1928 
1929 	args->vm_id = id;
1930 
1931 	return 0;
1932 
1933 err_close_and_put:
1934 	xe_vm_close_and_put(vm);
1935 
1936 	return err;
1937 }
1938 
xe_vm_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1939 int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
1940 			struct drm_file *file)
1941 {
1942 	struct xe_device *xe = to_xe_device(dev);
1943 	struct xe_file *xef = to_xe_file(file);
1944 	struct drm_xe_vm_destroy *args = data;
1945 	struct xe_vm *vm;
1946 	int err = 0;
1947 
1948 	if (XE_IOCTL_DBG(xe, args->pad) ||
1949 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1950 		return -EINVAL;
1951 
1952 	mutex_lock(&xef->vm.lock);
1953 	vm = xa_load(&xef->vm.xa, args->vm_id);
1954 	if (XE_IOCTL_DBG(xe, !vm))
1955 		err = -ENOENT;
1956 	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1957 		err = -EBUSY;
1958 	else
1959 		xa_erase(&xef->vm.xa, args->vm_id);
1960 	mutex_unlock(&xef->vm.lock);
1961 
1962 	if (!err)
1963 		xe_vm_close_and_put(vm);
1964 
1965 	return err;
1966 }
1967 
xe_vm_query_vmas(struct xe_vm * vm,u64 start,u64 end)1968 static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
1969 {
1970 	struct drm_gpuva *gpuva;
1971 	u32 num_vmas = 0;
1972 
1973 	lockdep_assert_held(&vm->lock);
1974 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
1975 		num_vmas++;
1976 
1977 	return num_vmas;
1978 }
1979 
get_mem_attrs(struct xe_vm * vm,u32 * num_vmas,u64 start,u64 end,struct drm_xe_mem_range_attr * attrs)1980 static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
1981 			 u64 end, struct drm_xe_mem_range_attr *attrs)
1982 {
1983 	struct drm_gpuva *gpuva;
1984 	int i = 0;
1985 
1986 	lockdep_assert_held(&vm->lock);
1987 
1988 	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
1989 		struct xe_vma *vma = gpuva_to_vma(gpuva);
1990 
1991 		if (i == *num_vmas)
1992 			return -ENOSPC;
1993 
1994 		attrs[i].start = xe_vma_start(vma);
1995 		attrs[i].end = xe_vma_end(vma);
1996 		attrs[i].atomic.val = vma->attr.atomic_access;
1997 		attrs[i].pat_index.val = vma->attr.pat_index;
1998 		attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
1999 		attrs[i].preferred_mem_loc.migration_policy =
2000 		vma->attr.preferred_loc.migration_policy;
2001 
2002 		i++;
2003 	}
2004 
2005 	*num_vmas = i;
2006 	return 0;
2007 }
2008 
xe_vm_query_vmas_attrs_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2009 int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2010 {
2011 	struct xe_device *xe = to_xe_device(dev);
2012 	struct xe_file *xef = to_xe_file(file);
2013 	struct drm_xe_mem_range_attr *mem_attrs;
2014 	struct drm_xe_vm_query_mem_range_attr *args = data;
2015 	u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2016 	struct xe_vm *vm;
2017 	int err = 0;
2018 
2019 	if (XE_IOCTL_DBG(xe,
2020 			 ((args->num_mem_ranges == 0 &&
2021 			  (attrs_user || args->sizeof_mem_range_attr != 0)) ||
2022 			 (args->num_mem_ranges > 0 &&
2023 			  (!attrs_user ||
2024 			   args->sizeof_mem_range_attr !=
2025 			   sizeof(struct drm_xe_mem_range_attr))))))
2026 		return -EINVAL;
2027 
2028 	vm = xe_vm_lookup(xef, args->vm_id);
2029 	if (XE_IOCTL_DBG(xe, !vm))
2030 		return -EINVAL;
2031 
2032 	err = down_read_interruptible(&vm->lock);
2033 	if (err)
2034 		goto put_vm;
2035 
2036 	attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2037 
2038 	if (args->num_mem_ranges == 0 && !attrs_user) {
2039 		args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
2040 		args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2041 		goto unlock_vm;
2042 	}
2043 
2044 	mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2045 				   GFP_KERNEL | __GFP_ACCOUNT |
2046 				   __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
2047 	if (!mem_attrs) {
2048 		err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
2049 		goto unlock_vm;
2050 	}
2051 
2052 	memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
2053 	err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
2054 			    args->start + args->range, mem_attrs);
2055 	if (err)
2056 		goto free_mem_attrs;
2057 
2058 	err = copy_to_user(attrs_user, mem_attrs,
2059 			   args->sizeof_mem_range_attr * args->num_mem_ranges);
2060 	if (err)
2061 		err = -EFAULT;
2062 
2063 free_mem_attrs:
2064 	kvfree(mem_attrs);
2065 unlock_vm:
2066 	up_read(&vm->lock);
2067 put_vm:
2068 	xe_vm_put(vm);
2069 	return err;
2070 }
2071 
vma_matches(struct xe_vma * vma,u64 page_addr)2072 static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2073 {
2074 	if (page_addr > xe_vma_end(vma) - 1 ||
2075 	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
2076 		return false;
2077 
2078 	return true;
2079 }
2080 
2081 /**
2082  * xe_vm_find_vma_by_addr() - Find a VMA by its address
2083  *
2084  * @vm: the xe_vm the vma belongs to
2085  * @page_addr: address to look up
2086  */
xe_vm_find_vma_by_addr(struct xe_vm * vm,u64 page_addr)2087 struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
2088 {
2089 	struct xe_vma *vma = NULL;
2090 
2091 	if (vm->usm.last_fault_vma) {   /* Fast lookup */
2092 		if (vma_matches(vm->usm.last_fault_vma, page_addr))
2093 			vma = vm->usm.last_fault_vma;
2094 	}
2095 	if (!vma)
2096 		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
2097 
2098 	return vma;
2099 }
2100 
2101 static const u32 region_to_mem_type[] = {
2102 	XE_PL_TT,
2103 	XE_PL_VRAM0,
2104 	XE_PL_VRAM1,
2105 };
2106 
prep_vma_destroy(struct xe_vm * vm,struct xe_vma * vma,bool post_commit)2107 static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
2108 			     bool post_commit)
2109 {
2110 	xe_svm_notifier_lock(vm);
2111 	vma->gpuva.flags |= XE_VMA_DESTROYED;
2112 	xe_svm_notifier_unlock(vm);
2113 	if (post_commit)
2114 		xe_vm_remove_vma(vm, vma);
2115 }
2116 
2117 #undef ULL
2118 #define ULL	unsigned long long
2119 
2120 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2121 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2122 {
2123 	struct xe_vma *vma;
2124 
2125 	switch (op->op) {
2126 	case DRM_GPUVA_OP_MAP:
2127 		vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2128 		       (ULL)op->map.va.addr, (ULL)op->map.va.range);
2129 		break;
2130 	case DRM_GPUVA_OP_REMAP:
2131 		vma = gpuva_to_vma(op->remap.unmap->va);
2132 		vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2133 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2134 		       op->remap.unmap->keep ? 1 : 0);
2135 		if (op->remap.prev)
2136 			vm_dbg(&xe->drm,
2137 			       "REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2138 			       (ULL)op->remap.prev->va.addr,
2139 			       (ULL)op->remap.prev->va.range);
2140 		if (op->remap.next)
2141 			vm_dbg(&xe->drm,
2142 			       "REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2143 			       (ULL)op->remap.next->va.addr,
2144 			       (ULL)op->remap.next->va.range);
2145 		break;
2146 	case DRM_GPUVA_OP_UNMAP:
2147 		vma = gpuva_to_vma(op->unmap.va);
2148 		vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2149 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2150 		       op->unmap.keep ? 1 : 0);
2151 		break;
2152 	case DRM_GPUVA_OP_PREFETCH:
2153 		vma = gpuva_to_vma(op->prefetch.va);
2154 		vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2155 		       (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2156 		break;
2157 	default:
2158 		drm_warn(&xe->drm, "NOT POSSIBLE");
2159 	}
2160 }
2161 #else
print_op(struct xe_device * xe,struct drm_gpuva_op * op)2162 static void print_op(struct xe_device *xe, struct drm_gpuva_op *op)
2163 {
2164 }
2165 #endif
2166 
__xe_vm_needs_clear_scratch_pages(struct xe_vm * vm,u32 bind_flags)2167 static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2168 {
2169 	if (!xe_vm_in_fault_mode(vm))
2170 		return false;
2171 
2172 	if (!xe_vm_has_scratch(vm))
2173 		return false;
2174 
2175 	if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2176 		return false;
2177 
2178 	return true;
2179 }
2180 
xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops * ops)2181 static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2182 {
2183 	struct drm_gpuva_op *__op;
2184 
2185 	drm_gpuva_for_each_op(__op, ops) {
2186 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2187 
2188 		xe_vma_svm_prefetch_op_fini(op);
2189 	}
2190 }
2191 
2192 /*
2193  * Create operations list from IOCTL arguments, setup operations fields so parse
2194  * and commit steps are decoupled from IOCTL arguments. This step can fail.
2195  */
2196 static struct drm_gpuva_ops *
vm_bind_ioctl_ops_create(struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_bo * bo,u64 bo_offset_or_userptr,u64 addr,u64 range,u32 operation,u32 flags,u32 prefetch_region,u16 pat_index)2197 vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
2198 			 struct xe_bo *bo, u64 bo_offset_or_userptr,
2199 			 u64 addr, u64 range,
2200 			 u32 operation, u32 flags,
2201 			 u32 prefetch_region, u16 pat_index)
2202 {
2203 	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2204 	struct drm_gpuva_ops *ops;
2205 	struct drm_gpuva_op *__op;
2206 	struct drm_gpuvm_bo *vm_bo;
2207 	u64 range_end = addr + range;
2208 	int err;
2209 
2210 	lockdep_assert_held_write(&vm->lock);
2211 
2212 	vm_dbg(&vm->xe->drm,
2213 	       "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2214 	       operation, (ULL)addr, (ULL)range,
2215 	       (ULL)bo_offset_or_userptr);
2216 
2217 	switch (operation) {
2218 	case DRM_XE_VM_BIND_OP_MAP:
2219 	case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2220 		struct drm_gpuvm_map_req map_req = {
2221 			.map.va.addr = addr,
2222 			.map.va.range = range,
2223 			.map.gem.obj = obj,
2224 			.map.gem.offset = bo_offset_or_userptr,
2225 		};
2226 
2227 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
2228 		break;
2229 	}
2230 	case DRM_XE_VM_BIND_OP_UNMAP:
2231 		ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
2232 		break;
2233 	case DRM_XE_VM_BIND_OP_PREFETCH:
2234 		ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range);
2235 		break;
2236 	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2237 		xe_assert(vm->xe, bo);
2238 
2239 		err = xe_bo_lock(bo, true);
2240 		if (err)
2241 			return ERR_PTR(err);
2242 
2243 		vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj);
2244 		if (IS_ERR(vm_bo)) {
2245 			xe_bo_unlock(bo);
2246 			return ERR_CAST(vm_bo);
2247 		}
2248 
2249 		ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2250 		drm_gpuvm_bo_put(vm_bo);
2251 		xe_bo_unlock(bo);
2252 		break;
2253 	default:
2254 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2255 		ops = ERR_PTR(-EINVAL);
2256 	}
2257 	if (IS_ERR(ops))
2258 		return ops;
2259 
2260 	drm_gpuva_for_each_op(__op, ops) {
2261 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2262 
2263 		if (__op->op == DRM_GPUVA_OP_MAP) {
2264 			op->map.immediate =
2265 				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2266 			if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2267 				op->map.vma_flags |= XE_VMA_READ_ONLY;
2268 			if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2269 				op->map.vma_flags |= DRM_GPUVA_SPARSE;
2270 			if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2271 				op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
2272 			if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2273 				op->map.vma_flags |= XE_VMA_DUMPABLE;
2274 			if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2275 				op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
2276 			op->map.pat_index = pat_index;
2277 			op->map.invalidate_on_bind =
2278 				__xe_vm_needs_clear_scratch_pages(vm, flags);
2279 		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2280 			struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2281 			struct xe_tile *tile;
2282 			struct xe_svm_range *svm_range;
2283 			struct drm_gpusvm_ctx ctx = {};
2284 			struct drm_pagemap *dpagemap;
2285 			u8 id, tile_mask = 0;
2286 			u32 i;
2287 
2288 			if (!xe_vma_is_cpu_addr_mirror(vma)) {
2289 				op->prefetch.region = prefetch_region;
2290 				break;
2291 			}
2292 
2293 			ctx.read_only = xe_vma_read_only(vma);
2294 			ctx.devmem_possible = IS_DGFX(vm->xe) &&
2295 					      IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2296 
2297 			for_each_tile(tile, vm->xe, id)
2298 				tile_mask |= 0x1 << id;
2299 
2300 			xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
2301 			op->prefetch_range.ranges_count = 0;
2302 			tile = NULL;
2303 
2304 			if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2305 				dpagemap = xe_vma_resolve_pagemap(vma,
2306 								  xe_device_get_root_tile(vm->xe));
2307 				/*
2308 				 * TODO: Once multigpu support is enabled will need
2309 				 * something to dereference tile from dpagemap.
2310 				 */
2311 				if (dpagemap)
2312 					tile = xe_device_get_root_tile(vm->xe);
2313 			} else if (prefetch_region) {
2314 				tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2315 						      XE_PL_VRAM0];
2316 			}
2317 
2318 			op->prefetch_range.tile = tile;
2319 alloc_next_range:
2320 			svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
2321 
2322 			if (PTR_ERR(svm_range) == -ENOENT) {
2323 				u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
2324 
2325 				addr = ret == ULONG_MAX ? 0 : ret;
2326 				if (addr)
2327 					goto alloc_next_range;
2328 				else
2329 					goto print_op_label;
2330 			}
2331 
2332 			if (IS_ERR(svm_range)) {
2333 				err = PTR_ERR(svm_range);
2334 				goto unwind_prefetch_ops;
2335 			}
2336 
2337 			if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
2338 				xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
2339 				goto check_next_range;
2340 			}
2341 
2342 			err = xa_alloc(&op->prefetch_range.range,
2343 				       &i, svm_range, xa_limit_32b,
2344 				       GFP_KERNEL);
2345 
2346 			if (err)
2347 				goto unwind_prefetch_ops;
2348 
2349 			op->prefetch_range.ranges_count++;
2350 			vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2351 			xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
2352 check_next_range:
2353 			if (range_end > xe_svm_range_end(svm_range) &&
2354 			    xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
2355 				addr = xe_svm_range_end(svm_range);
2356 				goto alloc_next_range;
2357 			}
2358 		}
2359 print_op_label:
2360 		print_op(vm->xe, __op);
2361 	}
2362 
2363 	return ops;
2364 
2365 unwind_prefetch_ops:
2366 	xe_svm_prefetch_gpuva_ops_fini(ops);
2367 	drm_gpuva_ops_free(&vm->gpuvm, ops);
2368 	return ERR_PTR(err);
2369 }
2370 
2371 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2372 
new_vma(struct xe_vm * vm,struct drm_gpuva_op_map * op,struct xe_vma_mem_attr * attr,unsigned int flags)2373 static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
2374 			      struct xe_vma_mem_attr *attr, unsigned int flags)
2375 {
2376 	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
2377 	struct xe_validation_ctx ctx;
2378 	struct drm_exec exec;
2379 	struct xe_vma *vma;
2380 	int err = 0;
2381 
2382 	lockdep_assert_held_write(&vm->lock);
2383 
2384 	if (bo) {
2385 		err = 0;
2386 		xe_validation_guard(&ctx, &vm->xe->val, &exec,
2387 				    (struct xe_val_flags) {.interruptible = true}, err) {
2388 			if (!bo->vm) {
2389 				err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
2390 				drm_exec_retry_on_contention(&exec);
2391 			}
2392 			if (!err) {
2393 				err = drm_exec_lock_obj(&exec, &bo->ttm.base);
2394 				drm_exec_retry_on_contention(&exec);
2395 			}
2396 			if (err)
2397 				return ERR_PTR(err);
2398 
2399 			vma = xe_vma_create(vm, bo, op->gem.offset,
2400 					    op->va.addr, op->va.addr +
2401 					    op->va.range - 1, attr, flags);
2402 			if (IS_ERR(vma))
2403 				return vma;
2404 
2405 			if (!bo->vm) {
2406 				err = add_preempt_fences(vm, bo);
2407 				if (err) {
2408 					prep_vma_destroy(vm, vma, false);
2409 					xe_vma_destroy(vma, NULL);
2410 				}
2411 			}
2412 		}
2413 		if (err)
2414 			return ERR_PTR(err);
2415 	} else {
2416 		vma = xe_vma_create(vm, NULL, op->gem.offset,
2417 				    op->va.addr, op->va.addr +
2418 				    op->va.range - 1, attr, flags);
2419 		if (IS_ERR(vma))
2420 			return vma;
2421 
2422 		if (xe_vma_is_userptr(vma))
2423 			err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2424 	}
2425 	if (err) {
2426 		prep_vma_destroy(vm, vma, false);
2427 		xe_vma_destroy_unlocked(vma);
2428 		vma = ERR_PTR(err);
2429 	}
2430 
2431 	return vma;
2432 }
2433 
xe_vma_max_pte_size(struct xe_vma * vma)2434 static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2435 {
2436 	if (vma->gpuva.flags & XE_VMA_PTE_1G)
2437 		return SZ_1G;
2438 	else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT))
2439 		return SZ_2M;
2440 	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2441 		return SZ_64K;
2442 	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2443 		return SZ_4K;
2444 
2445 	return SZ_1G;	/* Uninitialized, used max size */
2446 }
2447 
xe_vma_set_pte_size(struct xe_vma * vma,u64 size)2448 static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2449 {
2450 	switch (size) {
2451 	case SZ_1G:
2452 		vma->gpuva.flags |= XE_VMA_PTE_1G;
2453 		break;
2454 	case SZ_2M:
2455 		vma->gpuva.flags |= XE_VMA_PTE_2M;
2456 		break;
2457 	case SZ_64K:
2458 		vma->gpuva.flags |= XE_VMA_PTE_64K;
2459 		break;
2460 	case SZ_4K:
2461 		vma->gpuva.flags |= XE_VMA_PTE_4K;
2462 		break;
2463 	}
2464 }
2465 
xe_vma_op_commit(struct xe_vm * vm,struct xe_vma_op * op)2466 static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
2467 {
2468 	int err = 0;
2469 
2470 	lockdep_assert_held_write(&vm->lock);
2471 
2472 	switch (op->base.op) {
2473 	case DRM_GPUVA_OP_MAP:
2474 		err |= xe_vm_insert_vma(vm, op->map.vma);
2475 		if (!err)
2476 			op->flags |= XE_VMA_OP_COMMITTED;
2477 		break;
2478 	case DRM_GPUVA_OP_REMAP:
2479 	{
2480 		u8 tile_present =
2481 			gpuva_to_vma(op->base.remap.unmap->va)->tile_present;
2482 
2483 		prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va),
2484 				 true);
2485 		op->flags |= XE_VMA_OP_COMMITTED;
2486 
2487 		if (op->remap.prev) {
2488 			err |= xe_vm_insert_vma(vm, op->remap.prev);
2489 			if (!err)
2490 				op->flags |= XE_VMA_OP_PREV_COMMITTED;
2491 			if (!err && op->remap.skip_prev) {
2492 				op->remap.prev->tile_present =
2493 					tile_present;
2494 				op->remap.prev = NULL;
2495 			}
2496 		}
2497 		if (op->remap.next) {
2498 			err |= xe_vm_insert_vma(vm, op->remap.next);
2499 			if (!err)
2500 				op->flags |= XE_VMA_OP_NEXT_COMMITTED;
2501 			if (!err && op->remap.skip_next) {
2502 				op->remap.next->tile_present =
2503 					tile_present;
2504 				op->remap.next = NULL;
2505 			}
2506 		}
2507 
2508 		/* Adjust for partial unbind after removing VMA from VM */
2509 		if (!err) {
2510 			op->base.remap.unmap->va->va.addr = op->remap.start;
2511 			op->base.remap.unmap->va->va.range = op->remap.range;
2512 		}
2513 		break;
2514 	}
2515 	case DRM_GPUVA_OP_UNMAP:
2516 		prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true);
2517 		op->flags |= XE_VMA_OP_COMMITTED;
2518 		break;
2519 	case DRM_GPUVA_OP_PREFETCH:
2520 		op->flags |= XE_VMA_OP_COMMITTED;
2521 		break;
2522 	default:
2523 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2524 	}
2525 
2526 	return err;
2527 }
2528 
2529 /**
2530  * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2531  * @vma: Pointer to the xe_vma structure to check
2532  *
2533  * This function determines whether the given VMA (Virtual Memory Area)
2534  * has its memory attributes set to their default values. Specifically,
2535  * it checks the following conditions:
2536  *
2537  * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2538  * - `pat_index` is equal to `default_pat_index`
2539  * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2540  * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2541  *
2542  * Return: true if all attributes are at their default values, false otherwise.
2543  */
xe_vma_has_default_mem_attrs(struct xe_vma * vma)2544 bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2545 {
2546 	return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2547 		vma->attr.pat_index ==  vma->attr.default_pat_index &&
2548 		vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2549 		vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2550 }
2551 
vm_bind_ioctl_ops_parse(struct xe_vm * vm,struct drm_gpuva_ops * ops,struct xe_vma_ops * vops)2552 static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
2553 				   struct xe_vma_ops *vops)
2554 {
2555 	struct xe_device *xe = vm->xe;
2556 	struct drm_gpuva_op *__op;
2557 	struct xe_tile *tile;
2558 	u8 id, tile_mask = 0;
2559 	int err = 0;
2560 
2561 	lockdep_assert_held_write(&vm->lock);
2562 
2563 	for_each_tile(tile, vm->xe, id)
2564 		tile_mask |= 0x1 << id;
2565 
2566 	drm_gpuva_for_each_op(__op, ops) {
2567 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2568 		struct xe_vma *vma;
2569 		unsigned int flags = 0;
2570 
2571 		INIT_LIST_HEAD(&op->link);
2572 		list_add_tail(&op->link, &vops->list);
2573 		op->tile_mask = tile_mask;
2574 
2575 		switch (op->base.op) {
2576 		case DRM_GPUVA_OP_MAP:
2577 		{
2578 			struct xe_vma_mem_attr default_attr = {
2579 				.preferred_loc = {
2580 					.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2581 					.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2582 				},
2583 				.atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2584 				.default_pat_index = op->map.pat_index,
2585 				.pat_index = op->map.pat_index,
2586 			};
2587 
2588 			flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
2589 
2590 			vma = new_vma(vm, &op->base.map, &default_attr,
2591 				      flags);
2592 			if (IS_ERR(vma))
2593 				return PTR_ERR(vma);
2594 
2595 			op->map.vma = vma;
2596 			if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
2597 			     !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
2598 			    op->map.invalidate_on_bind)
2599 				xe_vma_ops_incr_pt_update_ops(vops,
2600 							      op->tile_mask, 1);
2601 			break;
2602 		}
2603 		case DRM_GPUVA_OP_REMAP:
2604 		{
2605 			struct xe_vma *old =
2606 				gpuva_to_vma(op->base.remap.unmap->va);
2607 			bool skip = xe_vma_is_cpu_addr_mirror(old);
2608 			u64 start = xe_vma_start(old), end = xe_vma_end(old);
2609 			int num_remap_ops = 0;
2610 
2611 			if (op->base.remap.prev)
2612 				start = op->base.remap.prev->va.addr +
2613 					op->base.remap.prev->va.range;
2614 			if (op->base.remap.next)
2615 				end = op->base.remap.next->va.addr;
2616 
2617 			if (xe_vma_is_cpu_addr_mirror(old) &&
2618 			    xe_svm_has_mapping(vm, start, end)) {
2619 				if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2620 					xe_svm_unmap_address_range(vm, start, end);
2621 				else
2622 					return -EBUSY;
2623 			}
2624 
2625 			op->remap.start = xe_vma_start(old);
2626 			op->remap.range = xe_vma_size(old);
2627 
2628 			flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2629 			if (op->base.remap.prev) {
2630 				vma = new_vma(vm, op->base.remap.prev,
2631 					      &old->attr, flags);
2632 				if (IS_ERR(vma))
2633 					return PTR_ERR(vma);
2634 
2635 				op->remap.prev = vma;
2636 
2637 				/*
2638 				 * Userptr creates a new SG mapping so
2639 				 * we must also rebind.
2640 				 */
2641 				op->remap.skip_prev = skip ||
2642 					(!xe_vma_is_userptr(old) &&
2643 					IS_ALIGNED(xe_vma_end(vma),
2644 						   xe_vma_max_pte_size(old)));
2645 				if (op->remap.skip_prev) {
2646 					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2647 					op->remap.range -=
2648 						xe_vma_end(vma) -
2649 						xe_vma_start(old);
2650 					op->remap.start = xe_vma_end(vma);
2651 					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2652 					       (ULL)op->remap.start,
2653 					       (ULL)op->remap.range);
2654 				} else {
2655 					num_remap_ops++;
2656 				}
2657 			}
2658 
2659 			if (op->base.remap.next) {
2660 				vma = new_vma(vm, op->base.remap.next,
2661 					      &old->attr, flags);
2662 				if (IS_ERR(vma))
2663 					return PTR_ERR(vma);
2664 
2665 				op->remap.next = vma;
2666 
2667 				/*
2668 				 * Userptr creates a new SG mapping so
2669 				 * we must also rebind.
2670 				 */
2671 				op->remap.skip_next = skip ||
2672 					(!xe_vma_is_userptr(old) &&
2673 					IS_ALIGNED(xe_vma_start(vma),
2674 						   xe_vma_max_pte_size(old)));
2675 				if (op->remap.skip_next) {
2676 					xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old));
2677 					op->remap.range -=
2678 						xe_vma_end(old) -
2679 						xe_vma_start(vma);
2680 					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2681 					       (ULL)op->remap.start,
2682 					       (ULL)op->remap.range);
2683 				} else {
2684 					num_remap_ops++;
2685 				}
2686 			}
2687 			if (!skip)
2688 				num_remap_ops++;
2689 
2690 			xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
2691 			break;
2692 		}
2693 		case DRM_GPUVA_OP_UNMAP:
2694 			vma = gpuva_to_vma(op->base.unmap.va);
2695 
2696 			if (xe_vma_is_cpu_addr_mirror(vma) &&
2697 			    xe_svm_has_mapping(vm, xe_vma_start(vma),
2698 					       xe_vma_end(vma)))
2699 				return -EBUSY;
2700 
2701 			if (!xe_vma_is_cpu_addr_mirror(vma))
2702 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2703 			break;
2704 		case DRM_GPUVA_OP_PREFETCH:
2705 			vma = gpuva_to_vma(op->base.prefetch.va);
2706 
2707 			if (xe_vma_is_userptr(vma)) {
2708 				err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
2709 				if (err)
2710 					return err;
2711 			}
2712 
2713 			if (xe_vma_is_cpu_addr_mirror(vma))
2714 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
2715 							      op->prefetch_range.ranges_count);
2716 			else
2717 				xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
2718 
2719 			break;
2720 		default:
2721 			drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2722 		}
2723 
2724 		err = xe_vma_op_commit(vm, op);
2725 		if (err)
2726 			return err;
2727 	}
2728 
2729 	return 0;
2730 }
2731 
xe_vma_op_unwind(struct xe_vm * vm,struct xe_vma_op * op,bool post_commit,bool prev_post_commit,bool next_post_commit)2732 static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
2733 			     bool post_commit, bool prev_post_commit,
2734 			     bool next_post_commit)
2735 {
2736 	lockdep_assert_held_write(&vm->lock);
2737 
2738 	switch (op->base.op) {
2739 	case DRM_GPUVA_OP_MAP:
2740 		if (op->map.vma) {
2741 			prep_vma_destroy(vm, op->map.vma, post_commit);
2742 			xe_vma_destroy_unlocked(op->map.vma);
2743 		}
2744 		break;
2745 	case DRM_GPUVA_OP_UNMAP:
2746 	{
2747 		struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
2748 
2749 		if (vma) {
2750 			xe_svm_notifier_lock(vm);
2751 			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2752 			xe_svm_notifier_unlock(vm);
2753 			if (post_commit)
2754 				xe_vm_insert_vma(vm, vma);
2755 		}
2756 		break;
2757 	}
2758 	case DRM_GPUVA_OP_REMAP:
2759 	{
2760 		struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va);
2761 
2762 		if (op->remap.prev) {
2763 			prep_vma_destroy(vm, op->remap.prev, prev_post_commit);
2764 			xe_vma_destroy_unlocked(op->remap.prev);
2765 		}
2766 		if (op->remap.next) {
2767 			prep_vma_destroy(vm, op->remap.next, next_post_commit);
2768 			xe_vma_destroy_unlocked(op->remap.next);
2769 		}
2770 		if (vma) {
2771 			xe_svm_notifier_lock(vm);
2772 			vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2773 			xe_svm_notifier_unlock(vm);
2774 			if (post_commit)
2775 				xe_vm_insert_vma(vm, vma);
2776 		}
2777 		break;
2778 	}
2779 	case DRM_GPUVA_OP_PREFETCH:
2780 		/* Nothing to do */
2781 		break;
2782 	default:
2783 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2784 	}
2785 }
2786 
vm_bind_ioctl_ops_unwind(struct xe_vm * vm,struct drm_gpuva_ops ** ops,int num_ops_list)2787 static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2788 				     struct drm_gpuva_ops **ops,
2789 				     int num_ops_list)
2790 {
2791 	int i;
2792 
2793 	for (i = num_ops_list - 1; i >= 0; --i) {
2794 		struct drm_gpuva_ops *__ops = ops[i];
2795 		struct drm_gpuva_op *__op;
2796 
2797 		if (!__ops)
2798 			continue;
2799 
2800 		drm_gpuva_for_each_op_reverse(__op, __ops) {
2801 			struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
2802 
2803 			xe_vma_op_unwind(vm, op,
2804 					 op->flags & XE_VMA_OP_COMMITTED,
2805 					 op->flags & XE_VMA_OP_PREV_COMMITTED,
2806 					 op->flags & XE_VMA_OP_NEXT_COMMITTED);
2807 		}
2808 	}
2809 }
2810 
vma_lock_and_validate(struct drm_exec * exec,struct xe_vma * vma,bool res_evict,bool validate)2811 static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2812 				 bool res_evict, bool validate)
2813 {
2814 	struct xe_bo *bo = xe_vma_bo(vma);
2815 	struct xe_vm *vm = xe_vma_vm(vma);
2816 	int err = 0;
2817 
2818 	if (bo) {
2819 		if (!bo->vm)
2820 			err = drm_exec_lock_obj(exec, &bo->ttm.base);
2821 		if (!err && validate)
2822 			err = xe_bo_validate(bo, vm,
2823 					     !xe_vm_in_preempt_fence_mode(vm) &&
2824 					     res_evict, exec);
2825 	}
2826 
2827 	return err;
2828 }
2829 
check_ufence(struct xe_vma * vma)2830 static int check_ufence(struct xe_vma *vma)
2831 {
2832 	if (vma->ufence) {
2833 		struct xe_user_fence * const f = vma->ufence;
2834 
2835 		if (!xe_sync_ufence_get_status(f))
2836 			return -EBUSY;
2837 
2838 		vma->ufence = NULL;
2839 		xe_sync_ufence_put(f);
2840 	}
2841 
2842 	return 0;
2843 }
2844 
prefetch_ranges(struct xe_vm * vm,struct xe_vma_op * op)2845 static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
2846 {
2847 	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2848 	struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2849 	struct xe_tile *tile = op->prefetch_range.tile;
2850 	int err = 0;
2851 
2852 	struct xe_svm_range *svm_range;
2853 	struct drm_gpusvm_ctx ctx = {};
2854 	unsigned long i;
2855 
2856 	if (!xe_vma_is_cpu_addr_mirror(vma))
2857 		return 0;
2858 
2859 	ctx.read_only = xe_vma_read_only(vma);
2860 	ctx.devmem_possible = devmem_possible;
2861 	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
2862 	ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
2863 
2864 	/* TODO: Threading the migration */
2865 	xa_for_each(&op->prefetch_range.range, i, svm_range) {
2866 		if (!tile)
2867 			xe_svm_range_migrate_to_smem(vm, svm_range);
2868 
2869 		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
2870 			err = xe_svm_alloc_vram(tile, svm_range, &ctx);
2871 			if (err) {
2872 				drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2873 					vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2874 				return -ENODATA;
2875 			}
2876 			xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
2877 		}
2878 
2879 		err = xe_svm_range_get_pages(vm, svm_range, &ctx);
2880 		if (err) {
2881 			drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2882 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2883 			if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
2884 				err = -ENODATA;
2885 			return err;
2886 		}
2887 		xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
2888 	}
2889 
2890 	return err;
2891 }
2892 
op_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops,struct xe_vma_op * op)2893 static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2894 			    struct xe_vma_ops *vops, struct xe_vma_op *op)
2895 {
2896 	int err = 0;
2897 	bool res_evict;
2898 
2899 	/*
2900 	 * We only allow evicting a BO within the VM if it is not part of an
2901 	 * array of binds, as an array of binds can evict another BO within the
2902 	 * bind.
2903 	 */
2904 	res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
2905 
2906 	switch (op->base.op) {
2907 	case DRM_GPUVA_OP_MAP:
2908 		if (!op->map.invalidate_on_bind)
2909 			err = vma_lock_and_validate(exec, op->map.vma,
2910 						    res_evict,
2911 						    !xe_vm_in_fault_mode(vm) ||
2912 						    op->map.immediate);
2913 		break;
2914 	case DRM_GPUVA_OP_REMAP:
2915 		err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va));
2916 		if (err)
2917 			break;
2918 
2919 		err = vma_lock_and_validate(exec,
2920 					    gpuva_to_vma(op->base.remap.unmap->va),
2921 					    res_evict, false);
2922 		if (!err && op->remap.prev)
2923 			err = vma_lock_and_validate(exec, op->remap.prev,
2924 						    res_evict, true);
2925 		if (!err && op->remap.next)
2926 			err = vma_lock_and_validate(exec, op->remap.next,
2927 						    res_evict, true);
2928 		break;
2929 	case DRM_GPUVA_OP_UNMAP:
2930 		err = check_ufence(gpuva_to_vma(op->base.unmap.va));
2931 		if (err)
2932 			break;
2933 
2934 		err = vma_lock_and_validate(exec,
2935 					    gpuva_to_vma(op->base.unmap.va),
2936 					    res_evict, false);
2937 		break;
2938 	case DRM_GPUVA_OP_PREFETCH:
2939 	{
2940 		struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
2941 		u32 region;
2942 
2943 		if (!xe_vma_is_cpu_addr_mirror(vma)) {
2944 			region = op->prefetch.region;
2945 			xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
2946 				  region <= ARRAY_SIZE(region_to_mem_type));
2947 		}
2948 
2949 		err = vma_lock_and_validate(exec,
2950 					    gpuva_to_vma(op->base.prefetch.va),
2951 					    res_evict, false);
2952 		if (!err && !xe_vma_has_no_bo(vma))
2953 			err = xe_bo_migrate(xe_vma_bo(vma),
2954 					    region_to_mem_type[region],
2955 					    NULL,
2956 					    exec);
2957 		break;
2958 	}
2959 	default:
2960 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2961 	}
2962 
2963 	return err;
2964 }
2965 
vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm * vm,struct xe_vma_ops * vops)2966 static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
2967 {
2968 	struct xe_vma_op *op;
2969 	int err;
2970 
2971 	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
2972 		return 0;
2973 
2974 	list_for_each_entry(op, &vops->list, link) {
2975 		if (op->base.op  == DRM_GPUVA_OP_PREFETCH) {
2976 			err = prefetch_ranges(vm, op);
2977 			if (err)
2978 				return err;
2979 		}
2980 	}
2981 
2982 	return 0;
2983 }
2984 
vm_bind_ioctl_ops_lock_and_prep(struct drm_exec * exec,struct xe_vm * vm,struct xe_vma_ops * vops)2985 static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
2986 					   struct xe_vm *vm,
2987 					   struct xe_vma_ops *vops)
2988 {
2989 	struct xe_vma_op *op;
2990 	int err;
2991 
2992 	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
2993 	if (err)
2994 		return err;
2995 
2996 	list_for_each_entry(op, &vops->list, link) {
2997 		err = op_lock_and_prep(exec, vm, vops, op);
2998 		if (err)
2999 			return err;
3000 	}
3001 
3002 #ifdef TEST_VM_OPS_ERROR
3003 	if (vops->inject_error &&
3004 	    vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3005 		return -ENOSPC;
3006 #endif
3007 
3008 	return 0;
3009 }
3010 
op_trace(struct xe_vma_op * op)3011 static void op_trace(struct xe_vma_op *op)
3012 {
3013 	switch (op->base.op) {
3014 	case DRM_GPUVA_OP_MAP:
3015 		trace_xe_vma_bind(op->map.vma);
3016 		break;
3017 	case DRM_GPUVA_OP_REMAP:
3018 		trace_xe_vma_unbind(gpuva_to_vma(op->base.remap.unmap->va));
3019 		if (op->remap.prev)
3020 			trace_xe_vma_bind(op->remap.prev);
3021 		if (op->remap.next)
3022 			trace_xe_vma_bind(op->remap.next);
3023 		break;
3024 	case DRM_GPUVA_OP_UNMAP:
3025 		trace_xe_vma_unbind(gpuva_to_vma(op->base.unmap.va));
3026 		break;
3027 	case DRM_GPUVA_OP_PREFETCH:
3028 		trace_xe_vma_bind(gpuva_to_vma(op->base.prefetch.va));
3029 		break;
3030 	case DRM_GPUVA_OP_DRIVER:
3031 		break;
3032 	default:
3033 		XE_WARN_ON("NOT POSSIBLE");
3034 	}
3035 }
3036 
trace_xe_vm_ops_execute(struct xe_vma_ops * vops)3037 static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3038 {
3039 	struct xe_vma_op *op;
3040 
3041 	list_for_each_entry(op, &vops->list, link)
3042 		op_trace(op);
3043 }
3044 
vm_ops_setup_tile_args(struct xe_vm * vm,struct xe_vma_ops * vops)3045 static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops)
3046 {
3047 	struct xe_exec_queue *q = vops->q;
3048 	struct xe_tile *tile;
3049 	int number_tiles = 0;
3050 	u8 id;
3051 
3052 	for_each_tile(tile, vm->xe, id) {
3053 		if (vops->pt_update_ops[id].num_ops)
3054 			++number_tiles;
3055 
3056 		if (vops->pt_update_ops[id].q)
3057 			continue;
3058 
3059 		if (q) {
3060 			vops->pt_update_ops[id].q = q;
3061 			if (vm->pt_root[id] && !list_empty(&q->multi_gt_list))
3062 				q = list_next_entry(q, multi_gt_list);
3063 		} else {
3064 			vops->pt_update_ops[id].q = vm->q[id];
3065 		}
3066 	}
3067 
3068 	return number_tiles;
3069 }
3070 
ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3071 static struct dma_fence *ops_execute(struct xe_vm *vm,
3072 				     struct xe_vma_ops *vops)
3073 {
3074 	struct xe_tile *tile;
3075 	struct dma_fence *fence = NULL;
3076 	struct dma_fence **fences = NULL;
3077 	struct dma_fence_array *cf = NULL;
3078 	int number_tiles = 0, current_fence = 0, err;
3079 	u8 id;
3080 
3081 	number_tiles = vm_ops_setup_tile_args(vm, vops);
3082 	if (number_tiles == 0)
3083 		return ERR_PTR(-ENODATA);
3084 
3085 	if (number_tiles > 1) {
3086 		fences = kmalloc_array(number_tiles, sizeof(*fences),
3087 				       GFP_KERNEL);
3088 		if (!fences) {
3089 			fence = ERR_PTR(-ENOMEM);
3090 			goto err_trace;
3091 		}
3092 	}
3093 
3094 	for_each_tile(tile, vm->xe, id) {
3095 		if (!vops->pt_update_ops[id].num_ops)
3096 			continue;
3097 
3098 		err = xe_pt_update_ops_prepare(tile, vops);
3099 		if (err) {
3100 			fence = ERR_PTR(err);
3101 			goto err_out;
3102 		}
3103 	}
3104 
3105 	trace_xe_vm_ops_execute(vops);
3106 
3107 	for_each_tile(tile, vm->xe, id) {
3108 		if (!vops->pt_update_ops[id].num_ops)
3109 			continue;
3110 
3111 		fence = xe_pt_update_ops_run(tile, vops);
3112 		if (IS_ERR(fence))
3113 			goto err_out;
3114 
3115 		if (fences)
3116 			fences[current_fence++] = fence;
3117 	}
3118 
3119 	if (fences) {
3120 		cf = dma_fence_array_create(number_tiles, fences,
3121 					    vm->composite_fence_ctx,
3122 					    vm->composite_fence_seqno++,
3123 					    false);
3124 		if (!cf) {
3125 			--vm->composite_fence_seqno;
3126 			fence = ERR_PTR(-ENOMEM);
3127 			goto err_out;
3128 		}
3129 		fence = &cf->base;
3130 	}
3131 
3132 	for_each_tile(tile, vm->xe, id) {
3133 		if (!vops->pt_update_ops[id].num_ops)
3134 			continue;
3135 
3136 		xe_pt_update_ops_fini(tile, vops);
3137 	}
3138 
3139 	return fence;
3140 
3141 err_out:
3142 	for_each_tile(tile, vm->xe, id) {
3143 		if (!vops->pt_update_ops[id].num_ops)
3144 			continue;
3145 
3146 		xe_pt_update_ops_abort(tile, vops);
3147 	}
3148 	while (current_fence)
3149 		dma_fence_put(fences[--current_fence]);
3150 	kfree(fences);
3151 	kfree(cf);
3152 
3153 err_trace:
3154 	trace_xe_vm_ops_fail(vm);
3155 	return fence;
3156 }
3157 
vma_add_ufence(struct xe_vma * vma,struct xe_user_fence * ufence)3158 static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence)
3159 {
3160 	if (vma->ufence)
3161 		xe_sync_ufence_put(vma->ufence);
3162 	vma->ufence = __xe_sync_ufence_get(ufence);
3163 }
3164 
op_add_ufence(struct xe_vm * vm,struct xe_vma_op * op,struct xe_user_fence * ufence)3165 static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
3166 			  struct xe_user_fence *ufence)
3167 {
3168 	switch (op->base.op) {
3169 	case DRM_GPUVA_OP_MAP:
3170 		vma_add_ufence(op->map.vma, ufence);
3171 		break;
3172 	case DRM_GPUVA_OP_REMAP:
3173 		if (op->remap.prev)
3174 			vma_add_ufence(op->remap.prev, ufence);
3175 		if (op->remap.next)
3176 			vma_add_ufence(op->remap.next, ufence);
3177 		break;
3178 	case DRM_GPUVA_OP_UNMAP:
3179 		break;
3180 	case DRM_GPUVA_OP_PREFETCH:
3181 		vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence);
3182 		break;
3183 	default:
3184 		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3185 	}
3186 }
3187 
vm_bind_ioctl_ops_fini(struct xe_vm * vm,struct xe_vma_ops * vops,struct dma_fence * fence)3188 static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
3189 				   struct dma_fence *fence)
3190 {
3191 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
3192 	struct xe_user_fence *ufence;
3193 	struct xe_vma_op *op;
3194 	int i;
3195 
3196 	ufence = find_ufence_get(vops->syncs, vops->num_syncs);
3197 	list_for_each_entry(op, &vops->list, link) {
3198 		if (ufence)
3199 			op_add_ufence(vm, op, ufence);
3200 
3201 		if (op->base.op == DRM_GPUVA_OP_UNMAP)
3202 			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence);
3203 		else if (op->base.op == DRM_GPUVA_OP_REMAP)
3204 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va),
3205 				       fence);
3206 	}
3207 	if (ufence)
3208 		xe_sync_ufence_put(ufence);
3209 	if (fence) {
3210 		for (i = 0; i < vops->num_syncs; i++)
3211 			xe_sync_entry_signal(vops->syncs + i, fence);
3212 		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
3213 	}
3214 }
3215 
vm_bind_ioctl_ops_execute(struct xe_vm * vm,struct xe_vma_ops * vops)3216 static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
3217 						   struct xe_vma_ops *vops)
3218 {
3219 	struct xe_validation_ctx ctx;
3220 	struct drm_exec exec;
3221 	struct dma_fence *fence;
3222 	int err = 0;
3223 
3224 	lockdep_assert_held_write(&vm->lock);
3225 
3226 	xe_validation_guard(&ctx, &vm->xe->val, &exec,
3227 			    ((struct xe_val_flags) {
3228 				    .interruptible = true,
3229 				    .exec_ignore_duplicates = true,
3230 			    }), err) {
3231 		err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
3232 		drm_exec_retry_on_contention(&exec);
3233 		xe_validation_retry_on_oom(&ctx, &err);
3234 		if (err)
3235 			return ERR_PTR(err);
3236 
3237 		xe_vm_set_validation_exec(vm, &exec);
3238 		fence = ops_execute(vm, vops);
3239 		xe_vm_set_validation_exec(vm, NULL);
3240 		if (IS_ERR(fence)) {
3241 			if (PTR_ERR(fence) == -ENODATA)
3242 				vm_bind_ioctl_ops_fini(vm, vops, NULL);
3243 			return fence;
3244 		}
3245 
3246 		vm_bind_ioctl_ops_fini(vm, vops, fence);
3247 	}
3248 
3249 	return err ? ERR_PTR(err) : fence;
3250 }
3251 ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3252 
3253 #define SUPPORTED_FLAGS_STUB  \
3254 	(DRM_XE_VM_BIND_FLAG_READONLY | \
3255 	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
3256 	 DRM_XE_VM_BIND_FLAG_NULL | \
3257 	 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
3258 	 DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
3259 	 DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
3260 	 DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
3261 
3262 #ifdef TEST_VM_OPS_ERROR
3263 #define SUPPORTED_FLAGS	(SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
3264 #else
3265 #define SUPPORTED_FLAGS	SUPPORTED_FLAGS_STUB
3266 #endif
3267 
3268 #define XE_64K_PAGE_MASK 0xffffull
3269 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3270 
vm_bind_ioctl_check_args(struct xe_device * xe,struct xe_vm * vm,struct drm_xe_vm_bind * args,struct drm_xe_vm_bind_op ** bind_ops)3271 static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
3272 				    struct drm_xe_vm_bind *args,
3273 				    struct drm_xe_vm_bind_op **bind_ops)
3274 {
3275 	int err;
3276 	int i;
3277 
3278 	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
3279 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3280 		return -EINVAL;
3281 
3282 	if (XE_IOCTL_DBG(xe, args->extensions))
3283 		return -EINVAL;
3284 
3285 	if (args->num_binds > 1) {
3286 		u64 __user *bind_user =
3287 			u64_to_user_ptr(args->vector_of_binds);
3288 
3289 		*bind_ops = kvmalloc_array(args->num_binds,
3290 					   sizeof(struct drm_xe_vm_bind_op),
3291 					   GFP_KERNEL | __GFP_ACCOUNT |
3292 					   __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3293 		if (!*bind_ops)
3294 			return args->num_binds > 1 ? -ENOBUFS : -ENOMEM;
3295 
3296 		err = copy_from_user(*bind_ops, bind_user,
3297 				     sizeof(struct drm_xe_vm_bind_op) *
3298 				     args->num_binds);
3299 		if (XE_IOCTL_DBG(xe, err)) {
3300 			err = -EFAULT;
3301 			goto free_bind_ops;
3302 		}
3303 	} else {
3304 		*bind_ops = &args->bind;
3305 	}
3306 
3307 	for (i = 0; i < args->num_binds; ++i) {
3308 		u64 range = (*bind_ops)[i].range;
3309 		u64 addr = (*bind_ops)[i].addr;
3310 		u32 op = (*bind_ops)[i].op;
3311 		u32 flags = (*bind_ops)[i].flags;
3312 		u32 obj = (*bind_ops)[i].obj;
3313 		u64 obj_offset = (*bind_ops)[i].obj_offset;
3314 		u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3315 		bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3316 		bool is_cpu_addr_mirror = flags &
3317 			DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3318 		u16 pat_index = (*bind_ops)[i].pat_index;
3319 		u16 coh_mode;
3320 
3321 		if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3322 				 (!xe_vm_in_fault_mode(vm) ||
3323 				 !IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3324 			err = -EINVAL;
3325 			goto free_bind_ops;
3326 		}
3327 
3328 		if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3329 			err = -EINVAL;
3330 			goto free_bind_ops;
3331 		}
3332 
3333 		pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3334 		(*bind_ops)[i].pat_index = pat_index;
3335 		coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3336 		if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */
3337 			err = -EINVAL;
3338 			goto free_bind_ops;
3339 		}
3340 
3341 		if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3342 			err = -EINVAL;
3343 			goto free_bind_ops;
3344 		}
3345 
3346 		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
3347 		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
3348 		    XE_IOCTL_DBG(xe, obj && (is_null || is_cpu_addr_mirror)) ||
3349 		    XE_IOCTL_DBG(xe, obj_offset && (is_null ||
3350 						    is_cpu_addr_mirror)) ||
3351 		    XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3352 				 (is_null || is_cpu_addr_mirror)) ||
3353 		    XE_IOCTL_DBG(xe, !obj &&
3354 				 op == DRM_XE_VM_BIND_OP_MAP &&
3355 				 !is_null && !is_cpu_addr_mirror) ||
3356 		    XE_IOCTL_DBG(xe, !obj &&
3357 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3358 		    XE_IOCTL_DBG(xe, addr &&
3359 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3360 		    XE_IOCTL_DBG(xe, range &&
3361 				 op == DRM_XE_VM_BIND_OP_UNMAP_ALL) ||
3362 		    XE_IOCTL_DBG(xe, obj &&
3363 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3364 		    XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3365 				 op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
3366 		    XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3367 				 !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
3368 		    XE_IOCTL_DBG(xe, obj &&
3369 				 op == DRM_XE_VM_BIND_OP_PREFETCH) ||
3370 		    XE_IOCTL_DBG(xe, prefetch_region &&
3371 				 op != DRM_XE_VM_BIND_OP_PREFETCH) ||
3372 		    XE_IOCTL_DBG(xe,  (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3373 				       !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
3374 		    XE_IOCTL_DBG(xe, obj &&
3375 				 op == DRM_XE_VM_BIND_OP_UNMAP) ||
3376 		    XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3377 				 (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
3378 			err = -EINVAL;
3379 			goto free_bind_ops;
3380 		}
3381 
3382 		if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) ||
3383 		    XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) ||
3384 		    XE_IOCTL_DBG(xe, range & ~PAGE_MASK) ||
3385 		    XE_IOCTL_DBG(xe, !range &&
3386 				 op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3387 			err = -EINVAL;
3388 			goto free_bind_ops;
3389 		}
3390 	}
3391 
3392 	return 0;
3393 
3394 free_bind_ops:
3395 	if (args->num_binds > 1)
3396 		kvfree(*bind_ops);
3397 	*bind_ops = NULL;
3398 	return err;
3399 }
3400 
vm_bind_ioctl_signal_fences(struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,int num_syncs)3401 static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3402 				       struct xe_exec_queue *q,
3403 				       struct xe_sync_entry *syncs,
3404 				       int num_syncs)
3405 {
3406 	struct dma_fence *fence;
3407 	int i, err = 0;
3408 
3409 	fence = xe_sync_in_fence_get(syncs, num_syncs,
3410 				     to_wait_exec_queue(vm, q), vm);
3411 	if (IS_ERR(fence))
3412 		return PTR_ERR(fence);
3413 
3414 	for (i = 0; i < num_syncs; i++)
3415 		xe_sync_entry_signal(&syncs[i], fence);
3416 
3417 	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
3418 				     fence);
3419 	dma_fence_put(fence);
3420 
3421 	return err;
3422 }
3423 
xe_vma_ops_init(struct xe_vma_ops * vops,struct xe_vm * vm,struct xe_exec_queue * q,struct xe_sync_entry * syncs,u32 num_syncs)3424 static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
3425 			    struct xe_exec_queue *q,
3426 			    struct xe_sync_entry *syncs, u32 num_syncs)
3427 {
3428 	memset(vops, 0, sizeof(*vops));
3429 	INIT_LIST_HEAD(&vops->list);
3430 	vops->vm = vm;
3431 	vops->q = q;
3432 	vops->syncs = syncs;
3433 	vops->num_syncs = num_syncs;
3434 	vops->flags = 0;
3435 }
3436 
xe_vm_bind_ioctl_validate_bo(struct xe_device * xe,struct xe_bo * bo,u64 addr,u64 range,u64 obj_offset,u16 pat_index,u32 op,u32 bind_flags)3437 static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
3438 					u64 addr, u64 range, u64 obj_offset,
3439 					u16 pat_index, u32 op, u32 bind_flags)
3440 {
3441 	u16 coh_mode;
3442 
3443 	if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
3444 	    XE_IOCTL_DBG(xe, obj_offset >
3445 			 xe_bo_size(bo) - range)) {
3446 		return -EINVAL;
3447 	}
3448 
3449 	/*
3450 	 * Some platforms require 64k VM_BIND alignment,
3451 	 * specifically those with XE_VRAM_FLAGS_NEED64K.
3452 	 *
3453 	 * Other platforms may have BO's set to 64k physical placement,
3454 	 * but can be mapped at 4k offsets anyway. This check is only
3455 	 * there for the former case.
3456 	 */
3457 	if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3458 	    (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3459 		if (XE_IOCTL_DBG(xe, obj_offset &
3460 				 XE_64K_PAGE_MASK) ||
3461 		    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
3462 		    XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3463 			return -EINVAL;
3464 		}
3465 	}
3466 
3467 	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3468 	if (bo->cpu_caching) {
3469 		if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3470 				 bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3471 			return -EINVAL;
3472 		}
3473 	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3474 		/*
3475 		 * Imported dma-buf from a different device should
3476 		 * require 1way or 2way coherency since we don't know
3477 		 * how it was mapped on the CPU. Just assume is it
3478 		 * potentially cached on CPU side.
3479 		 */
3480 		return -EINVAL;
3481 	}
3482 
3483 	/* If a BO is protected it can only be mapped if the key is still valid */
3484 	if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3485 	    op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3486 		if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != 0))
3487 			return -ENOEXEC;
3488 
3489 	return 0;
3490 }
3491 
xe_vm_bind_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3492 int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
3493 {
3494 	struct xe_device *xe = to_xe_device(dev);
3495 	struct xe_file *xef = to_xe_file(file);
3496 	struct drm_xe_vm_bind *args = data;
3497 	struct drm_xe_sync __user *syncs_user;
3498 	struct xe_bo **bos = NULL;
3499 	struct drm_gpuva_ops **ops = NULL;
3500 	struct xe_vm *vm;
3501 	struct xe_exec_queue *q = NULL;
3502 	u32 num_syncs, num_ufence = 0;
3503 	struct xe_sync_entry *syncs = NULL;
3504 	struct drm_xe_vm_bind_op *bind_ops = NULL;
3505 	struct xe_vma_ops vops;
3506 	struct dma_fence *fence;
3507 	int err;
3508 	int i;
3509 
3510 	vm = xe_vm_lookup(xef, args->vm_id);
3511 	if (XE_IOCTL_DBG(xe, !vm))
3512 		return -EINVAL;
3513 
3514 	err = vm_bind_ioctl_check_args(xe, vm, args, &bind_ops);
3515 	if (err)
3516 		goto put_vm;
3517 
3518 	if (args->exec_queue_id) {
3519 		q = xe_exec_queue_lookup(xef, args->exec_queue_id);
3520 		if (XE_IOCTL_DBG(xe, !q)) {
3521 			err = -ENOENT;
3522 			goto free_bind_ops;
3523 		}
3524 
3525 		if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3526 			err = -EINVAL;
3527 			goto put_exec_queue;
3528 		}
3529 	}
3530 
3531 	/* Ensure all UNMAPs visible */
3532 	xe_svm_flush(vm);
3533 
3534 	err = down_write_killable(&vm->lock);
3535 	if (err)
3536 		goto put_exec_queue;
3537 
3538 	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3539 		err = -ENOENT;
3540 		goto release_vm_lock;
3541 	}
3542 
3543 	for (i = 0; i < args->num_binds; ++i) {
3544 		u64 range = bind_ops[i].range;
3545 		u64 addr = bind_ops[i].addr;
3546 
3547 		if (XE_IOCTL_DBG(xe, range > vm->size) ||
3548 		    XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3549 			err = -EINVAL;
3550 			goto release_vm_lock;
3551 		}
3552 	}
3553 
3554 	if (args->num_binds) {
3555 		bos = kvcalloc(args->num_binds, sizeof(*bos),
3556 			       GFP_KERNEL | __GFP_ACCOUNT |
3557 			       __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3558 		if (!bos) {
3559 			err = -ENOMEM;
3560 			goto release_vm_lock;
3561 		}
3562 
3563 		ops = kvcalloc(args->num_binds, sizeof(*ops),
3564 			       GFP_KERNEL | __GFP_ACCOUNT |
3565 			       __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
3566 		if (!ops) {
3567 			err = -ENOMEM;
3568 			goto free_bos;
3569 		}
3570 	}
3571 
3572 	for (i = 0; i < args->num_binds; ++i) {
3573 		struct drm_gem_object *gem_obj;
3574 		u64 range = bind_ops[i].range;
3575 		u64 addr = bind_ops[i].addr;
3576 		u32 obj = bind_ops[i].obj;
3577 		u64 obj_offset = bind_ops[i].obj_offset;
3578 		u16 pat_index = bind_ops[i].pat_index;
3579 		u32 op = bind_ops[i].op;
3580 		u32 bind_flags = bind_ops[i].flags;
3581 
3582 		if (!obj)
3583 			continue;
3584 
3585 		gem_obj = drm_gem_object_lookup(file, obj);
3586 		if (XE_IOCTL_DBG(xe, !gem_obj)) {
3587 			err = -ENOENT;
3588 			goto put_obj;
3589 		}
3590 		bos[i] = gem_to_xe_bo(gem_obj);
3591 
3592 		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
3593 						   obj_offset, pat_index, op,
3594 						   bind_flags);
3595 		if (err)
3596 			goto put_obj;
3597 	}
3598 
3599 	if (args->num_syncs) {
3600 		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3601 		if (!syncs) {
3602 			err = -ENOMEM;
3603 			goto put_obj;
3604 		}
3605 	}
3606 
3607 	syncs_user = u64_to_user_ptr(args->syncs);
3608 	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
3609 		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
3610 					  &syncs_user[num_syncs],
3611 					  (xe_vm_in_lr_mode(vm) ?
3612 					   SYNC_PARSE_FLAG_LR_MODE : 0) |
3613 					  (!args->num_binds ?
3614 					   SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0));
3615 		if (err)
3616 			goto free_syncs;
3617 
3618 		if (xe_sync_is_ufence(&syncs[num_syncs]))
3619 			num_ufence++;
3620 	}
3621 
3622 	if (XE_IOCTL_DBG(xe, num_ufence > 1)) {
3623 		err = -EINVAL;
3624 		goto free_syncs;
3625 	}
3626 
3627 	if (!args->num_binds) {
3628 		err = -ENODATA;
3629 		goto free_syncs;
3630 	}
3631 
3632 	xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3633 	if (args->num_binds > 1)
3634 		vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
3635 	for (i = 0; i < args->num_binds; ++i) {
3636 		u64 range = bind_ops[i].range;
3637 		u64 addr = bind_ops[i].addr;
3638 		u32 op = bind_ops[i].op;
3639 		u32 flags = bind_ops[i].flags;
3640 		u64 obj_offset = bind_ops[i].obj_offset;
3641 		u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3642 		u16 pat_index = bind_ops[i].pat_index;
3643 
3644 		ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
3645 						  addr, range, op, flags,
3646 						  prefetch_region, pat_index);
3647 		if (IS_ERR(ops[i])) {
3648 			err = PTR_ERR(ops[i]);
3649 			ops[i] = NULL;
3650 			goto unwind_ops;
3651 		}
3652 
3653 		err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops);
3654 		if (err)
3655 			goto unwind_ops;
3656 
3657 #ifdef TEST_VM_OPS_ERROR
3658 		if (flags & FORCE_OP_ERROR) {
3659 			vops.inject_error = true;
3660 			vm->xe->vm_inject_error_position =
3661 				(vm->xe->vm_inject_error_position + 1) %
3662 				FORCE_OP_ERROR_COUNT;
3663 		}
3664 #endif
3665 	}
3666 
3667 	/* Nothing to do */
3668 	if (list_empty(&vops.list)) {
3669 		err = -ENODATA;
3670 		goto unwind_ops;
3671 	}
3672 
3673 	err = xe_vma_ops_alloc(&vops, args->num_binds > 1);
3674 	if (err)
3675 		goto unwind_ops;
3676 
3677 	err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
3678 	if (err)
3679 		goto unwind_ops;
3680 
3681 	fence = vm_bind_ioctl_ops_execute(vm, &vops);
3682 	if (IS_ERR(fence))
3683 		err = PTR_ERR(fence);
3684 	else
3685 		dma_fence_put(fence);
3686 
3687 unwind_ops:
3688 	if (err && err != -ENODATA)
3689 		vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
3690 	xe_vma_ops_fini(&vops);
3691 	for (i = args->num_binds - 1; i >= 0; --i)
3692 		if (ops[i])
3693 			drm_gpuva_ops_free(&vm->gpuvm, ops[i]);
3694 free_syncs:
3695 	if (err == -ENODATA)
3696 		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3697 	while (num_syncs--)
3698 		xe_sync_entry_cleanup(&syncs[num_syncs]);
3699 
3700 	kfree(syncs);
3701 put_obj:
3702 	for (i = 0; i < args->num_binds; ++i)
3703 		xe_bo_put(bos[i]);
3704 
3705 	kvfree(ops);
3706 free_bos:
3707 	kvfree(bos);
3708 release_vm_lock:
3709 	up_write(&vm->lock);
3710 put_exec_queue:
3711 	if (q)
3712 		xe_exec_queue_put(q);
3713 free_bind_ops:
3714 	if (args->num_binds > 1)
3715 		kvfree(bind_ops);
3716 put_vm:
3717 	xe_vm_put(vm);
3718 	return err;
3719 }
3720 
3721 /**
3722  * xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3723  * @vm: VM to bind the BO to
3724  * @bo: BO to bind
3725  * @q: exec queue to use for the bind (optional)
3726  * @addr: address at which to bind the BO
3727  * @cache_lvl: PAT cache level to use
3728  *
3729  * Execute a VM bind map operation on a kernel-owned BO to bind it into a
3730  * kernel-owned VM.
3731  *
3732  * Returns a dma_fence to track the binding completion if the job to do so was
3733  * successfully submitted, an error pointer otherwise.
3734  */
xe_vm_bind_kernel_bo(struct xe_vm * vm,struct xe_bo * bo,struct xe_exec_queue * q,u64 addr,enum xe_cache_level cache_lvl)3735 struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
3736 				       struct xe_exec_queue *q, u64 addr,
3737 				       enum xe_cache_level cache_lvl)
3738 {
3739 	struct xe_vma_ops vops;
3740 	struct drm_gpuva_ops *ops = NULL;
3741 	struct dma_fence *fence;
3742 	int err;
3743 
3744 	xe_bo_get(bo);
3745 	xe_vm_get(vm);
3746 	if (q)
3747 		xe_exec_queue_get(q);
3748 
3749 	down_write(&vm->lock);
3750 
3751 	xe_vma_ops_init(&vops, vm, q, NULL, 0);
3752 
3753 	ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
3754 				       DRM_XE_VM_BIND_OP_MAP, 0, 0,
3755 				       vm->xe->pat.idx[cache_lvl]);
3756 	if (IS_ERR(ops)) {
3757 		err = PTR_ERR(ops);
3758 		goto release_vm_lock;
3759 	}
3760 
3761 	err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
3762 	if (err)
3763 		goto release_vm_lock;
3764 
3765 	xe_assert(vm->xe, !list_empty(&vops.list));
3766 
3767 	err = xe_vma_ops_alloc(&vops, false);
3768 	if (err)
3769 		goto unwind_ops;
3770 
3771 	fence = vm_bind_ioctl_ops_execute(vm, &vops);
3772 	if (IS_ERR(fence))
3773 		err = PTR_ERR(fence);
3774 
3775 unwind_ops:
3776 	if (err && err != -ENODATA)
3777 		vm_bind_ioctl_ops_unwind(vm, &ops, 1);
3778 
3779 	xe_vma_ops_fini(&vops);
3780 	drm_gpuva_ops_free(&vm->gpuvm, ops);
3781 
3782 release_vm_lock:
3783 	up_write(&vm->lock);
3784 
3785 	if (q)
3786 		xe_exec_queue_put(q);
3787 	xe_vm_put(vm);
3788 	xe_bo_put(bo);
3789 
3790 	if (err)
3791 		fence = ERR_PTR(err);
3792 
3793 	return fence;
3794 }
3795 
3796 /**
3797  * xe_vm_lock() - Lock the vm's dma_resv object
3798  * @vm: The struct xe_vm whose lock is to be locked
3799  * @intr: Whether to perform any wait interruptible
3800  *
3801  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3802  * contended lock was interrupted. If @intr is false, the function
3803  * always returns 0.
3804  */
xe_vm_lock(struct xe_vm * vm,bool intr)3805 int xe_vm_lock(struct xe_vm *vm, bool intr)
3806 {
3807 	int ret;
3808 
3809 	if (intr)
3810 		ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
3811 	else
3812 		ret = dma_resv_lock(xe_vm_resv(vm), NULL);
3813 
3814 	return ret;
3815 }
3816 
3817 /**
3818  * xe_vm_unlock() - Unlock the vm's dma_resv object
3819  * @vm: The struct xe_vm whose lock is to be released.
3820  *
3821  * Unlock a buffer object lock that was locked by xe_vm_lock().
3822  */
xe_vm_unlock(struct xe_vm * vm)3823 void xe_vm_unlock(struct xe_vm *vm)
3824 {
3825 	dma_resv_unlock(xe_vm_resv(vm));
3826 }
3827 
3828 /**
3829  * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3830  * address range
3831  * @vm: The VM
3832  * @start: start address
3833  * @end: end address
3834  * @tile_mask: mask for which gt's issue tlb invalidation
3835  *
3836  * Issue a range based TLB invalidation for gt's in tilemask
3837  *
3838  * Returns 0 for success, negative error code otherwise.
3839  */
xe_vm_range_tilemask_tlb_inval(struct xe_vm * vm,u64 start,u64 end,u8 tile_mask)3840 int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3841 				   u64 end, u8 tile_mask)
3842 {
3843 	struct xe_tlb_inval_fence
3844 		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3845 	struct xe_tile *tile;
3846 	u32 fence_id = 0;
3847 	u8 id;
3848 	int err;
3849 
3850 	if (!tile_mask)
3851 		return 0;
3852 
3853 	for_each_tile(tile, vm->xe, id) {
3854 		if (!(tile_mask & BIT(id)))
3855 			continue;
3856 
3857 		xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
3858 					&fence[fence_id], true);
3859 
3860 		err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
3861 					 &fence[fence_id], start, end,
3862 					 vm->usm.asid);
3863 		if (err)
3864 			goto wait;
3865 		++fence_id;
3866 
3867 		if (!tile->media_gt)
3868 			continue;
3869 
3870 		xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
3871 					&fence[fence_id], true);
3872 
3873 		err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
3874 					 &fence[fence_id], start, end,
3875 					 vm->usm.asid);
3876 		if (err)
3877 			goto wait;
3878 		++fence_id;
3879 	}
3880 
3881 wait:
3882 	for (id = 0; id < fence_id; ++id)
3883 		xe_tlb_inval_fence_wait(&fence[id]);
3884 
3885 	return err;
3886 }
3887 
3888 /**
3889  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3890  * @vma: VMA to invalidate
3891  *
3892  * Walks a list of page tables leaves which it memset the entries owned by this
3893  * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3894  * complete.
3895  *
3896  * Returns 0 for success, negative error code otherwise.
3897  */
xe_vm_invalidate_vma(struct xe_vma * vma)3898 int xe_vm_invalidate_vma(struct xe_vma *vma)
3899 {
3900 	struct xe_device *xe = xe_vma_vm(vma)->xe;
3901 	struct xe_vm *vm = xe_vma_vm(vma);
3902 	struct xe_tile *tile;
3903 	u8 tile_mask = 0;
3904 	int ret = 0;
3905 	u8 id;
3906 
3907 	xe_assert(xe, !xe_vma_is_null(vma));
3908 	xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3909 	trace_xe_vma_invalidate(vma);
3910 
3911 	vm_dbg(&vm->xe->drm,
3912 	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
3913 		xe_vma_start(vma), xe_vma_size(vma));
3914 
3915 	/*
3916 	 * Check that we don't race with page-table updates, tile_invalidated
3917 	 * update is safe
3918 	 */
3919 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3920 		if (xe_vma_is_userptr(vma)) {
3921 			lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
3922 				       (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
3923 					lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3924 
3925 			WARN_ON_ONCE(!mmu_interval_check_retry
3926 				     (&to_userptr_vma(vma)->userptr.notifier,
3927 				      to_userptr_vma(vma)->userptr.pages.notifier_seq));
3928 			WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3929 							     DMA_RESV_USAGE_BOOKKEEP));
3930 
3931 		} else {
3932 			xe_bo_assert_held(xe_vma_bo(vma));
3933 		}
3934 	}
3935 
3936 	for_each_tile(tile, xe, id)
3937 		if (xe_pt_zap_ptes(tile, vma))
3938 			tile_mask |= BIT(id);
3939 
3940 	xe_device_wmb(xe);
3941 
3942 	ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
3943 					     xe_vma_end(vma), tile_mask);
3944 
3945 	/* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
3946 	WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
3947 
3948 	return ret;
3949 }
3950 
xe_vm_validate_protected(struct xe_vm * vm)3951 int xe_vm_validate_protected(struct xe_vm *vm)
3952 {
3953 	struct drm_gpuva *gpuva;
3954 	int err = 0;
3955 
3956 	if (!vm)
3957 		return -ENODEV;
3958 
3959 	mutex_lock(&vm->snap_mutex);
3960 
3961 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
3962 		struct xe_vma *vma = gpuva_to_vma(gpuva);
3963 		struct xe_bo *bo = vma->gpuva.gem.obj ?
3964 			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
3965 
3966 		if (!bo)
3967 			continue;
3968 
3969 		if (xe_bo_is_protected(bo)) {
3970 			err = xe_pxp_bo_key_check(vm->xe->pxp, bo);
3971 			if (err)
3972 				break;
3973 		}
3974 	}
3975 
3976 	mutex_unlock(&vm->snap_mutex);
3977 	return err;
3978 }
3979 
3980 struct xe_vm_snapshot {
3981 	unsigned long num_snaps;
3982 	struct {
3983 		u64 ofs, bo_ofs;
3984 		unsigned long len;
3985 		struct xe_bo *bo;
3986 		void *data;
3987 		struct mm_struct *mm;
3988 	} snap[];
3989 };
3990 
xe_vm_snapshot_capture(struct xe_vm * vm)3991 struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
3992 {
3993 	unsigned long num_snaps = 0, i;
3994 	struct xe_vm_snapshot *snap = NULL;
3995 	struct drm_gpuva *gpuva;
3996 
3997 	if (!vm)
3998 		return NULL;
3999 
4000 	mutex_lock(&vm->snap_mutex);
4001 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4002 		if (gpuva->flags & XE_VMA_DUMPABLE)
4003 			num_snaps++;
4004 	}
4005 
4006 	if (num_snaps)
4007 		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4008 	if (!snap) {
4009 		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
4010 		goto out_unlock;
4011 	}
4012 
4013 	snap->num_snaps = num_snaps;
4014 	i = 0;
4015 	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4016 		struct xe_vma *vma = gpuva_to_vma(gpuva);
4017 		struct xe_bo *bo = vma->gpuva.gem.obj ?
4018 			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
4019 
4020 		if (!(gpuva->flags & XE_VMA_DUMPABLE))
4021 			continue;
4022 
4023 		snap->snap[i].ofs = xe_vma_start(vma);
4024 		snap->snap[i].len = xe_vma_size(vma);
4025 		if (bo) {
4026 			snap->snap[i].bo = xe_bo_get(bo);
4027 			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4028 		} else if (xe_vma_is_userptr(vma)) {
4029 			struct mm_struct *mm =
4030 				to_userptr_vma(vma)->userptr.notifier.mm;
4031 
4032 			if (mmget_not_zero(mm))
4033 				snap->snap[i].mm = mm;
4034 			else
4035 				snap->snap[i].data = ERR_PTR(-EFAULT);
4036 
4037 			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4038 		} else {
4039 			snap->snap[i].data = ERR_PTR(-ENOENT);
4040 		}
4041 		i++;
4042 	}
4043 
4044 out_unlock:
4045 	mutex_unlock(&vm->snap_mutex);
4046 	return snap;
4047 }
4048 
xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot * snap)4049 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4050 {
4051 	if (IS_ERR_OR_NULL(snap))
4052 		return;
4053 
4054 	for (int i = 0; i < snap->num_snaps; i++) {
4055 		struct xe_bo *bo = snap->snap[i].bo;
4056 		int err;
4057 
4058 		if (IS_ERR(snap->snap[i].data))
4059 			continue;
4060 
4061 		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4062 		if (!snap->snap[i].data) {
4063 			snap->snap[i].data = ERR_PTR(-ENOMEM);
4064 			goto cleanup_bo;
4065 		}
4066 
4067 		if (bo) {
4068 			err = xe_bo_read(bo, snap->snap[i].bo_ofs,
4069 					 snap->snap[i].data, snap->snap[i].len);
4070 		} else {
4071 			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
4072 
4073 			kthread_use_mm(snap->snap[i].mm);
4074 			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
4075 				err = 0;
4076 			else
4077 				err = -EFAULT;
4078 			kthread_unuse_mm(snap->snap[i].mm);
4079 
4080 			mmput(snap->snap[i].mm);
4081 			snap->snap[i].mm = NULL;
4082 		}
4083 
4084 		if (err) {
4085 			kvfree(snap->snap[i].data);
4086 			snap->snap[i].data = ERR_PTR(err);
4087 		}
4088 
4089 cleanup_bo:
4090 		xe_bo_put(bo);
4091 		snap->snap[i].bo = NULL;
4092 	}
4093 }
4094 
xe_vm_snapshot_print(struct xe_vm_snapshot * snap,struct drm_printer * p)4095 void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
4096 {
4097 	unsigned long i, j;
4098 
4099 	if (IS_ERR_OR_NULL(snap)) {
4100 		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
4101 		return;
4102 	}
4103 
4104 	for (i = 0; i < snap->num_snaps; i++) {
4105 		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4106 
4107 		if (IS_ERR(snap->snap[i].data)) {
4108 			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
4109 				   PTR_ERR(snap->snap[i].data));
4110 			continue;
4111 		}
4112 
4113 		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
4114 
4115 		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
4116 			u32 *val = snap->snap[i].data + j;
4117 			char dumped[ASCII85_BUFSZ];
4118 
4119 			drm_puts(p, ascii85_encode(*val, dumped));
4120 		}
4121 
4122 		drm_puts(p, "\n");
4123 
4124 		if (drm_coredump_printer_is_full(p))
4125 			return;
4126 	}
4127 }
4128 
xe_vm_snapshot_free(struct xe_vm_snapshot * snap)4129 void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4130 {
4131 	unsigned long i;
4132 
4133 	if (IS_ERR_OR_NULL(snap))
4134 		return;
4135 
4136 	for (i = 0; i < snap->num_snaps; i++) {
4137 		if (!IS_ERR(snap->snap[i].data))
4138 			kvfree(snap->snap[i].data);
4139 		xe_bo_put(snap->snap[i].bo);
4140 		if (snap->snap[i].mm)
4141 			mmput(snap->snap[i].mm);
4142 	}
4143 	kvfree(snap);
4144 }
4145 
4146 /**
4147  * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4148  * @xe: Pointer to the XE device structure
4149  * @vma: Pointer to the virtual memory area (VMA) structure
4150  * @is_atomic: In pagefault path and atomic operation
4151  *
4152  * This function determines whether the given VMA needs to be migrated to
4153  * VRAM in order to do atomic GPU operation.
4154  *
4155  * Return:
4156  *   1        - Migration to VRAM is required
4157  *   0        - Migration is not required
4158  *   -EACCES  - Invalid access for atomic memory attr
4159  *
4160  */
xe_vma_need_vram_for_atomic(struct xe_device * xe,struct xe_vma * vma,bool is_atomic)4161 int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
4162 {
4163 	u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4164 					     vma->attr.atomic_access;
4165 
4166 	if (!IS_DGFX(xe) || !is_atomic)
4167 		return false;
4168 
4169 	/*
4170 	 * NOTE: The checks implemented here are platform-specific. For
4171 	 * instance, on a device supporting CXL atomics, these would ideally
4172 	 * work universally without additional handling.
4173 	 */
4174 	switch (atomic_access) {
4175 	case DRM_XE_ATOMIC_DEVICE:
4176 		return !xe->info.has_device_atomics_on_smem;
4177 
4178 	case DRM_XE_ATOMIC_CPU:
4179 		return -EACCES;
4180 
4181 	case DRM_XE_ATOMIC_UNDEFINED:
4182 	case DRM_XE_ATOMIC_GLOBAL:
4183 	default:
4184 		return 1;
4185 	}
4186 }
4187 
xe_vm_alloc_vma(struct xe_vm * vm,struct drm_gpuvm_map_req * map_req,bool is_madvise)4188 static int xe_vm_alloc_vma(struct xe_vm *vm,
4189 			   struct drm_gpuvm_map_req *map_req,
4190 			   bool is_madvise)
4191 {
4192 	struct xe_vma_ops vops;
4193 	struct drm_gpuva_ops *ops = NULL;
4194 	struct drm_gpuva_op *__op;
4195 	unsigned int vma_flags = 0;
4196 	bool remap_op = false;
4197 	struct xe_vma_mem_attr tmp_attr;
4198 	u16 default_pat;
4199 	int err;
4200 
4201 	lockdep_assert_held_write(&vm->lock);
4202 
4203 	if (is_madvise)
4204 		ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
4205 	else
4206 		ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
4207 
4208 	if (IS_ERR(ops))
4209 		return PTR_ERR(ops);
4210 
4211 	if (list_empty(&ops->list)) {
4212 		err = 0;
4213 		goto free_ops;
4214 	}
4215 
4216 	drm_gpuva_for_each_op(__op, ops) {
4217 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4218 		struct xe_vma *vma = NULL;
4219 
4220 		if (!is_madvise) {
4221 			if (__op->op == DRM_GPUVA_OP_UNMAP) {
4222 				vma = gpuva_to_vma(op->base.unmap.va);
4223 				XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4224 				default_pat = vma->attr.default_pat_index;
4225 				vma_flags = vma->gpuva.flags;
4226 			}
4227 
4228 			if (__op->op == DRM_GPUVA_OP_REMAP) {
4229 				vma = gpuva_to_vma(op->base.remap.unmap->va);
4230 				default_pat = vma->attr.default_pat_index;
4231 				vma_flags = vma->gpuva.flags;
4232 			}
4233 
4234 			if (__op->op == DRM_GPUVA_OP_MAP) {
4235 				op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4236 				op->map.pat_index = default_pat;
4237 			}
4238 		} else {
4239 			if (__op->op == DRM_GPUVA_OP_REMAP) {
4240 				vma = gpuva_to_vma(op->base.remap.unmap->va);
4241 				xe_assert(vm->xe, !remap_op);
4242 				xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4243 				remap_op = true;
4244 				vma_flags = vma->gpuva.flags;
4245 			}
4246 
4247 			if (__op->op == DRM_GPUVA_OP_MAP) {
4248 				xe_assert(vm->xe, remap_op);
4249 				remap_op = false;
4250 				/*
4251 				 * In case of madvise ops DRM_GPUVA_OP_MAP is
4252 				 * always after DRM_GPUVA_OP_REMAP, so ensure
4253 				 * to propagate the flags from the vma we're
4254 				 * unmapping.
4255 				 */
4256 				op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
4257 			}
4258 		}
4259 		print_op(vm->xe, __op);
4260 	}
4261 
4262 	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
4263 
4264 	if (is_madvise)
4265 		vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
4266 
4267 	err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
4268 	if (err)
4269 		goto unwind_ops;
4270 
4271 	xe_vm_lock(vm, false);
4272 
4273 	drm_gpuva_for_each_op(__op, ops) {
4274 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
4275 		struct xe_vma *vma;
4276 
4277 		if (__op->op == DRM_GPUVA_OP_UNMAP) {
4278 			vma = gpuva_to_vma(op->base.unmap.va);
4279 			/* There should be no unmap for madvise */
4280 			if (is_madvise)
4281 				XE_WARN_ON("UNEXPECTED UNMAP");
4282 
4283 			xe_vma_destroy(vma, NULL);
4284 		} else if (__op->op == DRM_GPUVA_OP_REMAP) {
4285 			vma = gpuva_to_vma(op->base.remap.unmap->va);
4286 			/* In case of madvise ops Store attributes for REMAP UNMAPPED
4287 			 * VMA, so they can be assigned to newly MAP created vma.
4288 			 */
4289 			if (is_madvise)
4290 				tmp_attr = vma->attr;
4291 
4292 			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
4293 		} else if (__op->op == DRM_GPUVA_OP_MAP) {
4294 			vma = op->map.vma;
4295 			/* In case of madvise call, MAP will always be follwed by REMAP.
4296 			 * Therefore temp_attr will always have sane values, making it safe to
4297 			 * copy them to new vma.
4298 			 */
4299 			if (is_madvise)
4300 				vma->attr = tmp_attr;
4301 		}
4302 	}
4303 
4304 	xe_vm_unlock(vm);
4305 	drm_gpuva_ops_free(&vm->gpuvm, ops);
4306 	return 0;
4307 
4308 unwind_ops:
4309 	vm_bind_ioctl_ops_unwind(vm, &ops, 1);
4310 free_ops:
4311 	drm_gpuva_ops_free(&vm->gpuvm, ops);
4312 	return err;
4313 }
4314 
4315 /**
4316  * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4317  * @vm: Pointer to the xe_vm structure
4318  * @start: Starting input address
4319  * @range: Size of the input range
4320  *
4321  * This function splits existing vma to create new vma for user provided input range
4322  *
4323  * Return: 0 if success
4324  */
xe_vm_alloc_madvise_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4325 int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4326 {
4327 	struct drm_gpuvm_map_req map_req = {
4328 		.map.va.addr = start,
4329 		.map.va.range = range,
4330 	};
4331 
4332 	lockdep_assert_held_write(&vm->lock);
4333 
4334 	vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4335 
4336 	return xe_vm_alloc_vma(vm, &map_req, true);
4337 }
4338 
4339 /**
4340  * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4341  * @vm: Pointer to the xe_vm structure
4342  * @start: Starting input address
4343  * @range: Size of the input range
4344  *
4345  * This function splits/merges existing vma to create new vma for user provided input range
4346  *
4347  * Return: 0 if success
4348  */
xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm * vm,uint64_t start,uint64_t range)4349 int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4350 {
4351 	struct drm_gpuvm_map_req map_req = {
4352 		.map.va.addr = start,
4353 		.map.va.range = range,
4354 	};
4355 
4356 	lockdep_assert_held_write(&vm->lock);
4357 
4358 	vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4359 	       start, range);
4360 
4361 	return xe_vm_alloc_vma(vm, &map_req, false);
4362 }
4363