xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c (revision 758a868043dcb07eca923bc451c16da3e73dc47c)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/dma-fence-array.h>
30 #include <linux/interval_tree_generic.h>
31 #include <linux/idr.h>
32 #include <linux/dma-buf.h>
33 
34 #include <drm/amdgpu_drm.h>
35 #include <drm/drm_drv.h>
36 #include <drm/ttm/ttm_tt.h>
37 #include <drm/drm_exec.h>
38 #include "amdgpu.h"
39 #include "amdgpu_vm.h"
40 #include "amdgpu_trace.h"
41 #include "amdgpu_amdkfd.h"
42 #include "amdgpu_gmc.h"
43 #include "amdgpu_xgmi.h"
44 #include "amdgpu_dma_buf.h"
45 #include "amdgpu_res_cursor.h"
46 #include "kfd_svm.h"
47 
48 /**
49  * DOC: GPUVM
50  *
51  * GPUVM is the MMU functionality provided on the GPU.
52  * GPUVM is similar to the legacy GART on older asics, however
53  * rather than there being a single global GART table
54  * for the entire GPU, there can be multiple GPUVM page tables active
55  * at any given time.  The GPUVM page tables can contain a mix
56  * VRAM pages and system pages (both memory and MMIO) and system pages
57  * can be mapped as snooped (cached system pages) or unsnooped
58  * (uncached system pages).
59  *
60  * Each active GPUVM has an ID associated with it and there is a page table
61  * linked with each VMID.  When executing a command buffer,
62  * the kernel tells the engine what VMID to use for that command
63  * buffer.  VMIDs are allocated dynamically as commands are submitted.
64  * The userspace drivers maintain their own address space and the kernel
65  * sets up their pages tables accordingly when they submit their
66  * command buffers and a VMID is assigned.
67  * The hardware supports up to 16 active GPUVMs at any given time.
68  *
69  * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
70  * on the ASIC family.  GPUVM supports RWX attributes on each page as well
71  * as other features such as encryption and caching attributes.
72  *
73  * VMID 0 is special.  It is the GPUVM used for the kernel driver.  In
74  * addition to an aperture managed by a page table, VMID 0 also has
75  * several other apertures.  There is an aperture for direct access to VRAM
76  * and there is a legacy AGP aperture which just forwards accesses directly
77  * to the matching system physical addresses (or IOVAs when an IOMMU is
78  * present).  These apertures provide direct access to these memories without
79  * incurring the overhead of a page table.  VMID 0 is used by the kernel
80  * driver for tasks like memory management.
81  *
82  * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
83  * For user applications, each application can have their own unique GPUVM
84  * address space.  The application manages the address space and the kernel
85  * driver manages the GPUVM page tables for each process.  If an GPU client
86  * accesses an invalid page, it will generate a GPU page fault, similar to
87  * accessing an invalid page on a CPU.
88  */
89 
90 #define START(node) ((node)->start)
91 #define LAST(node) ((node)->last)
92 
93 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
94 		     START, LAST, static, amdgpu_vm_it)
95 
96 #undef START
97 #undef LAST
98 
99 /**
100  * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
101  */
102 struct amdgpu_prt_cb {
103 
104 	/**
105 	 * @adev: amdgpu device
106 	 */
107 	struct amdgpu_device *adev;
108 
109 	/**
110 	 * @cb: callback
111 	 */
112 	struct dma_fence_cb cb;
113 };
114 
115 /**
116  * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
117  */
118 struct amdgpu_vm_tlb_seq_struct {
119 	/**
120 	 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
121 	 */
122 	struct amdgpu_vm *vm;
123 
124 	/**
125 	 * @cb: callback
126 	 */
127 	struct dma_fence_cb cb;
128 };
129 
130 /**
131  * amdgpu_vm_assert_locked - check if VM is correctly locked
132  * @vm: the VM which schould be tested
133  *
134  * Asserts that the VM root PD is locked.
135  */
136 static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
137 {
138 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
139 }
140 
141 /* Initialize the amdgpu_vm_bo_status object */
142 static void amdgpu_vm_bo_status_init(struct amdgpu_vm_bo_status *lists)
143 {
144 	INIT_LIST_HEAD(&lists->evicted);
145 	INIT_LIST_HEAD(&lists->moved);
146 	INIT_LIST_HEAD(&lists->idle);
147 }
148 
149 /*
150  * Make sure we have the lock to modify the vm_bo status and return the object
151  * with the status lists.
152  */
153 static struct amdgpu_vm_bo_status *
154 amdgpu_vm_bo_lock_lists(struct amdgpu_vm_bo_base *vm_bo)
155 {
156 	struct amdgpu_vm *vm = vm_bo->vm;
157 	struct amdgpu_bo *bo = vm_bo->bo;
158 
159 	if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
160 		/* No extra locking needed, protected by the root PD resv lock */
161 		amdgpu_vm_assert_locked(vm);
162 
163 		if (bo->tbo.type == ttm_bo_type_kernel)
164 			return &vm->kernel;
165 
166 		return &vm->always_valid;
167 	}
168 
169 	spin_lock(&vm_bo->vm->individual_lock);
170 	return &vm->individual;
171 }
172 
173 /* Eventually unlock the status list lock again */
174 static void amdgpu_vm_bo_unlock_lists(struct amdgpu_vm_bo_base *vm_bo)
175 {
176 	if (amdgpu_vm_is_bo_always_valid(vm_bo->vm, vm_bo->bo))
177 		amdgpu_vm_assert_locked(vm_bo->vm);
178 	else
179 		spin_unlock(&vm_bo->vm->individual_lock);
180 }
181 
182 /**
183  * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
184  *
185  * @vm: VM to test against.
186  * @bo: BO to be tested.
187  *
188  * Returns true if the BO shares the dma_resv object with the root PD and is
189  * always guaranteed to be valid inside the VM.
190  */
191 bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
192 {
193 	return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
194 }
195 
196 /**
197  * amdgpu_vm_bo_evicted - vm_bo is evicted
198  *
199  * @vm_bo: vm_bo which is evicted
200  *
201  * State for vm_bo objects meaning the underlying BO was evicted and need to
202  * move in place again.
203  */
204 static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
205 {
206 	struct amdgpu_vm_bo_status *lists;
207 
208 	lists = amdgpu_vm_bo_lock_lists(vm_bo);
209 	vm_bo->moved = true;
210 	list_move(&vm_bo->vm_status, &lists->evicted);
211 	amdgpu_vm_bo_unlock_lists(vm_bo);
212 }
213 /**
214  * amdgpu_vm_bo_moved - vm_bo is moved
215  *
216  * @vm_bo: vm_bo which is moved
217  *
218  * State for vm_bo objects meaning the underlying BO was moved but the new
219  * location not yet reflected in the page tables.
220  */
221 static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
222 {
223 	struct amdgpu_vm_bo_status *lists;
224 	struct amdgpu_bo *bo = vm_bo->bo;
225 
226 	/*
227 	 * The root PD doesn't have a parent PDE and goes directly into the
228 	 * idle state.
229 	 */
230 	lists = amdgpu_vm_bo_lock_lists(vm_bo);
231 	if (bo && bo->tbo.type == ttm_bo_type_kernel && !bo->parent) {
232 		vm_bo->moved = false;
233 		list_move(&vm_bo->vm_status, &lists->idle);
234 	} else {
235 		vm_bo->moved = true;
236 		list_move(&vm_bo->vm_status, &lists->moved);
237 	}
238 	amdgpu_vm_bo_unlock_lists(vm_bo);
239 }
240 
241 /**
242  * amdgpu_vm_bo_idle - vm_bo is idle
243  *
244  * @vm_bo: vm_bo which is now idle
245  *
246  * State for vm_bo objects meaning we are done with the state machine and no
247  * further action is necessary.
248  */
249 static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
250 {
251 	struct amdgpu_vm_bo_status *lists;
252 
253 	lists = amdgpu_vm_bo_lock_lists(vm_bo);
254 	if (!amdgpu_vm_is_bo_always_valid(vm_bo->vm, vm_bo->bo))
255 		vm_bo->moved = false;
256 	list_move(&vm_bo->vm_status, &lists->idle);
257 	amdgpu_vm_bo_unlock_lists(vm_bo);
258 }
259 
260 /**
261  * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
262  * @vm: the VM which state machine to reset
263  *
264  * Move all vm_bo object in the VM into a state where their location will be
265  * updated in the page tables again.
266  */
267 static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
268 {
269 	struct amdgpu_vm_bo_base *vm_bo, *tmp;
270 
271 	/*
272 	 * Don't use list splice here, we need the special handling for the root
273 	 * PD and set the moved flag appropriately.
274 	 */
275 	amdgpu_vm_assert_locked(vm);
276 	list_for_each_entry_safe(vm_bo, tmp, &vm->kernel.idle, vm_status)
277 		amdgpu_vm_bo_moved(vm_bo);
278 	list_for_each_entry_safe(vm_bo, tmp, &vm->always_valid.idle, vm_status)
279 		amdgpu_vm_bo_moved(vm_bo);
280 
281 	spin_lock(&vm->individual_lock);
282 	list_for_each_entry_safe(vm_bo, tmp, &vm->individual.idle, vm_status) {
283 		vm_bo->moved = true;
284 		list_move(&vm_bo->vm_status, &vm->individual.moved);
285 	}
286 	spin_unlock(&vm->individual_lock);
287 }
288 
289 /**
290  * amdgpu_vm_update_shared - helper to update shared memory stat
291  * @base: base structure for tracking BO usage in a VM
292  *
293  * Takes the vm stats_lock and updates the shared memory stat. If the basic
294  * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
295  * as well.
296  */
297 static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
298 {
299 	struct amdgpu_vm *vm = base->vm;
300 	struct amdgpu_bo *bo = base->bo;
301 	uint64_t size = amdgpu_bo_size(bo);
302 	uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
303 	bool shared;
304 
305 	dma_resv_assert_held(bo->tbo.base.resv);
306 	spin_lock(&vm->stats_lock);
307 	shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
308 	if (base->shared != shared) {
309 		base->shared = shared;
310 		if (shared) {
311 			vm->stats[bo_memtype].drm.shared += size;
312 			vm->stats[bo_memtype].drm.private -= size;
313 		} else {
314 			vm->stats[bo_memtype].drm.shared -= size;
315 			vm->stats[bo_memtype].drm.private += size;
316 		}
317 	}
318 	spin_unlock(&vm->stats_lock);
319 }
320 
321 /**
322  * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
323  * @bo: amdgpu buffer object
324  *
325  * Update the per VM stats for all the vm if needed from private to shared or
326  * vice versa.
327  */
328 void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
329 {
330 	struct amdgpu_vm_bo_base *base;
331 
332 	for (base = bo->vm_bo; base; base = base->next)
333 		amdgpu_vm_update_shared(base);
334 }
335 
336 /**
337  * amdgpu_vm_update_stats_locked - helper to update normal memory stat
338  * @base: base structure for tracking BO usage in a VM
339  * @res:  the ttm_resource to use for the purpose of accounting, may or may not
340  *        be bo->tbo.resource
341  * @sign: if we should add (+1) or subtract (-1) from the stat
342  *
343  * Caller need to have the vm stats_lock held. Useful for when multiple update
344  * need to happen at the same time.
345  */
346 static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
347 					  struct ttm_resource *res, int sign)
348 {
349 	struct amdgpu_vm *vm = base->vm;
350 	struct amdgpu_bo *bo = base->bo;
351 	int64_t size = sign * amdgpu_bo_size(bo);
352 	uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
353 
354 	/* For drm-total- and drm-shared-, BO are accounted by their preferred
355 	 * placement, see also amdgpu_bo_mem_stats_placement.
356 	 */
357 	if (base->shared)
358 		vm->stats[bo_memtype].drm.shared += size;
359 	else
360 		vm->stats[bo_memtype].drm.private += size;
361 
362 	if (res && res->mem_type < __AMDGPU_PL_NUM) {
363 		uint32_t res_memtype = res->mem_type;
364 
365 		vm->stats[res_memtype].drm.resident += size;
366 		/* BO only count as purgeable if it is resident,
367 		 * since otherwise there's nothing to purge.
368 		 */
369 		if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
370 			vm->stats[res_memtype].drm.purgeable += size;
371 		if (!(bo->preferred_domains &
372 		      amdgpu_mem_type_to_domain(res_memtype)))
373 			vm->stats[bo_memtype].evicted += size;
374 	}
375 }
376 
377 /**
378  * amdgpu_vm_update_stats - helper to update normal memory stat
379  * @base: base structure for tracking BO usage in a VM
380  * @res:  the ttm_resource to use for the purpose of accounting, may or may not
381  *        be bo->tbo.resource
382  * @sign: if we should add (+1) or subtract (-1) from the stat
383  *
384  * Updates the basic memory stat when bo is added/deleted/moved.
385  */
386 void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
387 			    struct ttm_resource *res, int sign)
388 {
389 	struct amdgpu_vm *vm = base->vm;
390 
391 	spin_lock(&vm->stats_lock);
392 	amdgpu_vm_update_stats_locked(base, res, sign);
393 	spin_unlock(&vm->stats_lock);
394 }
395 
396 /**
397  * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
398  *
399  * @base: base structure for tracking BO usage in a VM
400  * @vm: vm to which bo is to be added
401  * @bo: amdgpu buffer object
402  *
403  * Initialize a bo_va_base structure and add it to the appropriate lists
404  *
405  */
406 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
407 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo)
408 {
409 	base->vm = vm;
410 	base->bo = bo;
411 	base->next = NULL;
412 	INIT_LIST_HEAD(&base->vm_status);
413 
414 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
415 	if (!bo)
416 		return;
417 
418 	base->next = bo->vm_bo;
419 	bo->vm_bo = base;
420 
421 	spin_lock(&vm->stats_lock);
422 	base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
423 	amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
424 	spin_unlock(&vm->stats_lock);
425 
426 	if (!amdgpu_vm_is_bo_always_valid(vm, bo)) {
427 		amdgpu_vm_bo_idle(base);
428 		return;
429 	}
430 
431 	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
432 
433 	/*
434 	 * When a per VM isn't in the desired domain put it into the evicted
435 	 * state to make sure that it gets validated on the next best occasion.
436 	 */
437 	if (bo->preferred_domains &
438 	    amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
439 		amdgpu_vm_bo_moved(base);
440 	else
441 		amdgpu_vm_bo_evicted(base);
442 }
443 
444 /**
445  * amdgpu_vm_lock_pd - lock PD in drm_exec
446  *
447  * @vm: vm providing the BOs
448  * @exec: drm execution context
449  * @num_fences: number of extra fences to reserve
450  *
451  * Lock the VM root PD in the DRM execution context.
452  */
453 int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
454 		      unsigned int num_fences)
455 {
456 	/* We need at least two fences for the VM PD/PT updates */
457 	return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
458 				    2 + num_fences);
459 }
460 
461 /**
462  * amdgpu_vm_lock_individual - lock all BOs on the individual idle list
463  * @vm: vm providing the BOs
464  * @exec: drm execution context
465  * @num_fences: number of extra fences to reserve
466  *
467  * Lock the BOs on the individual idle list in the DRM execution context.
468  */
469 int amdgpu_vm_lock_individual(struct amdgpu_vm *vm, struct drm_exec *exec,
470 			      unsigned int num_fences)
471 {
472 	struct list_head *prev = &vm->individual.idle;
473 	struct amdgpu_bo_va *bo_va;
474 	struct amdgpu_bo *bo;
475 	int ret;
476 
477 	/* We can only trust prev->next while holding the lock */
478 	spin_lock(&vm->individual_lock);
479 	while (!list_is_head(prev->next, &vm->individual.idle)) {
480 		bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
481 
482 		bo = bo_va->base.bo;
483 		if (bo) {
484 			amdgpu_bo_ref(bo);
485 			spin_unlock(&vm->individual_lock);
486 
487 			ret = drm_exec_prepare_obj(exec, &bo->tbo.base, num_fences);
488 			amdgpu_bo_unref(&bo);
489 			if (unlikely(ret))
490 				return ret;
491 
492 			spin_lock(&vm->individual_lock);
493 		}
494 		prev = prev->next;
495 	}
496 	spin_unlock(&vm->individual_lock);
497 
498 	return 0;
499 }
500 
501 /**
502  * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
503  *
504  * @adev: amdgpu device pointer
505  * @vm: vm providing the BOs
506  *
507  * Move all BOs to the end of LRU and remember their positions to put them
508  * together.
509  */
510 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
511 				struct amdgpu_vm *vm)
512 {
513 	spin_lock(&adev->mman.bdev.lru_lock);
514 	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
515 	spin_unlock(&adev->mman.bdev.lru_lock);
516 }
517 
518 /* Create scheduler entities for page table updates */
519 static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
520 				   struct amdgpu_vm *vm)
521 {
522 	int r;
523 
524 	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
525 				  adev->vm_manager.vm_pte_scheds,
526 				  adev->vm_manager.vm_pte_num_scheds, NULL);
527 	if (r)
528 		goto error;
529 
530 	return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
531 				     adev->vm_manager.vm_pte_scheds,
532 				     adev->vm_manager.vm_pte_num_scheds, NULL);
533 
534 error:
535 	drm_sched_entity_destroy(&vm->immediate);
536 	return r;
537 }
538 
539 /* Destroy the entities for page table updates again */
540 static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
541 {
542 	drm_sched_entity_destroy(&vm->immediate);
543 	drm_sched_entity_destroy(&vm->delayed);
544 }
545 
546 /**
547  * amdgpu_vm_generation - return the page table re-generation counter
548  * @adev: the amdgpu_device
549  * @vm: optional VM to check, might be NULL
550  *
551  * Returns a page table re-generation token to allow checking if submissions
552  * are still valid to use this VM. The VM parameter might be NULL in which case
553  * just the VRAM lost counter will be used.
554  */
555 uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
556 {
557 	uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
558 
559 	if (!vm)
560 		return result;
561 
562 	result += lower_32_bits(vm->generation);
563 	/* Add one if the page tables will be re-generated on next CS */
564 	if (drm_sched_entity_error(&vm->delayed))
565 		++result;
566 
567 	return result;
568 }
569 
570 /**
571  * amdgpu_vm_validate - validate evicted BOs tracked in the VM
572  *
573  * @adev: amdgpu device pointer
574  * @vm: vm providing the BOs
575  * @ticket: optional reservation ticket used to reserve the VM
576  * @validate: callback to do the validation
577  * @param: parameter for the validation callback
578  *
579  * Validate the page table BOs and per-VM BOs on command submission if
580  * necessary. If a ticket is given, also try to validate evicted user queue
581  * BOs. They must already be reserved with the given ticket.
582  *
583  * Returns:
584  * Validation result.
585  */
586 int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
587 		       struct ww_acquire_ctx *ticket,
588 		       int (*validate)(void *p, struct amdgpu_bo *bo),
589 		       void *param)
590 {
591 	uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
592 	struct amdgpu_vm_bo_base *bo_base, *tmp;
593 	int r;
594 
595 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
596 	if (vm->generation != new_vm_generation) {
597 		vm->generation = new_vm_generation;
598 		amdgpu_vm_bo_reset_state_machine(vm);
599 		amdgpu_vm_fini_entities(vm);
600 		r = amdgpu_vm_init_entities(adev, vm);
601 		if (r)
602 			return r;
603 	}
604 
605 	list_for_each_entry_safe(bo_base, tmp, &vm->kernel.evicted, vm_status) {
606 		r = validate(param, bo_base->bo);
607 		if (r)
608 			return r;
609 
610 		vm->update_funcs->map_table(to_amdgpu_bo_vm(bo_base->bo));
611 		amdgpu_vm_bo_moved(bo_base);
612 	}
613 
614 	/*
615 	 * As soon as all page tables are in place we can start updating them
616 	 * again.
617 	 */
618 	amdgpu_vm_eviction_lock(vm);
619 	vm->evicting = false;
620 	amdgpu_vm_eviction_unlock(vm);
621 
622 	list_for_each_entry_safe(bo_base, tmp, &vm->always_valid.evicted,
623 				 vm_status) {
624 		r = validate(param, bo_base->bo);
625 		if (r)
626 			return r;
627 
628 		amdgpu_vm_bo_moved(bo_base);
629 	}
630 
631 	if (!ticket)
632 		return 0;
633 
634 	spin_lock(&vm->individual_lock);
635 restart:
636 	list_for_each_entry(bo_base, &vm->individual.evicted, vm_status) {
637 		struct amdgpu_bo *bo = bo_base->bo;
638 
639 		if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket)
640 			continue;
641 
642 		spin_unlock(&vm->individual_lock);
643 
644 		r = validate(param, bo);
645 		if (r)
646 			return r;
647 
648 		amdgpu_vm_bo_moved(bo_base);
649 
650 		/* It's a bit inefficient to always jump back to the start, but
651 		 * we would need to re-structure the KFD for properly fixing
652 		 * that.
653 		 */
654 		spin_lock(&vm->individual_lock);
655 		goto restart;
656 	}
657 	spin_unlock(&vm->individual_lock);
658 
659 	return 0;
660 }
661 
662 /**
663  * amdgpu_vm_ready - check VM is ready for updates
664  *
665  * @vm: VM to check
666  *
667  * Check if all VM PDs/PTs are ready for updates
668  *
669  * Returns:
670  * True if VM is not evicting and all VM entities are not stopped
671  */
672 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
673 {
674 	bool ret;
675 
676 	amdgpu_vm_assert_locked(vm);
677 
678 	amdgpu_vm_eviction_lock(vm);
679 	ret = !vm->evicting;
680 	amdgpu_vm_eviction_unlock(vm);
681 
682 	ret &= list_empty(&vm->kernel.evicted);
683 
684 	spin_lock(&vm->immediate.lock);
685 	ret &= !vm->immediate.stopped;
686 	spin_unlock(&vm->immediate.lock);
687 
688 	spin_lock(&vm->delayed.lock);
689 	ret &= !vm->delayed.stopped;
690 	spin_unlock(&vm->delayed.lock);
691 
692 	return ret;
693 }
694 
695 /**
696  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
697  *
698  * @adev: amdgpu_device pointer
699  */
700 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
701 {
702 	const struct amdgpu_ip_block *ip_block;
703 	bool has_compute_vm_bug;
704 	struct amdgpu_ring *ring;
705 	int i;
706 
707 	has_compute_vm_bug = false;
708 
709 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
710 	if (ip_block) {
711 		/* Compute has a VM bug for GFX version < 7.
712 		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
713 		if (ip_block->version->major <= 7)
714 			has_compute_vm_bug = true;
715 		else if (ip_block->version->major == 8)
716 			if (adev->gfx.mec_fw_version < 673)
717 				has_compute_vm_bug = true;
718 	}
719 
720 	for (i = 0; i < adev->num_rings; i++) {
721 		ring = adev->rings[i];
722 		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
723 			/* only compute rings */
724 			ring->has_compute_vm_bug = has_compute_vm_bug;
725 		else
726 			ring->has_compute_vm_bug = false;
727 	}
728 }
729 
730 /**
731  * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
732  *
733  * @ring: ring on which the job will be submitted
734  * @job: job to submit
735  *
736  * Returns:
737  * True if sync is needed.
738  */
739 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
740 				  struct amdgpu_job *job)
741 {
742 	struct amdgpu_device *adev = ring->adev;
743 	unsigned vmhub = ring->vm_hub;
744 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
745 
746 	if (job->vmid == 0)
747 		return false;
748 
749 	if (job->vm_needs_flush || ring->has_compute_vm_bug)
750 		return true;
751 
752 	if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
753 		return true;
754 
755 	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
756 		return true;
757 
758 	return false;
759 }
760 
761 /**
762  * amdgpu_vm_flush - hardware flush the vm
763  *
764  * @ring: ring to use for flush
765  * @job:  related job
766  * @need_pipe_sync: is pipe sync needed
767  *
768  * Emit a VM flush when it is necessary.
769  */
770 void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
771 		     bool need_pipe_sync)
772 {
773 	struct amdgpu_device *adev = ring->adev;
774 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
775 	unsigned vmhub = ring->vm_hub;
776 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
777 	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
778 	bool spm_update_needed = job->spm_update_needed;
779 	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
780 		job->gds_switch_needed;
781 	bool vm_flush_needed = job->vm_needs_flush;
782 	bool cleaner_shader_needed = false;
783 	bool pasid_mapping_needed = false;
784 	struct dma_fence *fence = NULL;
785 	unsigned int patch = 0;
786 
787 	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
788 		gds_switch_needed = true;
789 		vm_flush_needed = true;
790 		pasid_mapping_needed = true;
791 		spm_update_needed = true;
792 	}
793 
794 	mutex_lock(&id_mgr->lock);
795 	if (id->pasid != job->pasid || !id->pasid_mapping ||
796 	    !dma_fence_is_signaled(id->pasid_mapping))
797 		pasid_mapping_needed = true;
798 	mutex_unlock(&id_mgr->lock);
799 
800 	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
801 	vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
802 			job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
803 	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
804 		ring->funcs->emit_wreg;
805 
806 	cleaner_shader_needed = job->run_cleaner_shader &&
807 		adev->gfx.enable_cleaner_shader &&
808 		ring->funcs->emit_cleaner_shader && job->base.s_fence &&
809 		&job->base.s_fence->scheduled == isolation->spearhead;
810 
811 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
812 	    !cleaner_shader_needed)
813 		return;
814 
815 	amdgpu_ring_ib_begin(ring);
816 
817 	/* There is no matching insert_end for this on purpose for the vm flush.
818 	 * The IB portion of the submission has both.  Having multiple
819 	 * insert_start sequences is ok, but you can only have one insert_end
820 	 * per submission based on the way VCN FW works.  For JPEG
821 	 * you can as many insert_start and insert_end sequences as you like as
822 	 * long as the rest of the packets come between start and end sequences.
823 	 */
824 	if (ring->funcs->insert_start)
825 		ring->funcs->insert_start(ring);
826 
827 	if (ring->funcs->init_cond_exec)
828 		patch = amdgpu_ring_init_cond_exec(ring,
829 						   ring->cond_exe_gpu_addr);
830 
831 	if (need_pipe_sync)
832 		amdgpu_ring_emit_pipeline_sync(ring);
833 
834 	if (cleaner_shader_needed)
835 		ring->funcs->emit_cleaner_shader(ring);
836 
837 	if (vm_flush_needed) {
838 		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
839 		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
840 	}
841 
842 	if (pasid_mapping_needed)
843 		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
844 
845 	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
846 		adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid);
847 
848 	if (ring->funcs->emit_gds_switch &&
849 	    gds_switch_needed) {
850 		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
851 					    job->gds_size, job->gws_base,
852 					    job->gws_size, job->oa_base,
853 					    job->oa_size);
854 	}
855 
856 	if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
857 		amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
858 		fence = &job->hw_vm_fence->base;
859 		/* get a ref for the job */
860 		dma_fence_get(fence);
861 	}
862 
863 	if (vm_flush_needed) {
864 		mutex_lock(&id_mgr->lock);
865 		dma_fence_put(id->last_flush);
866 		id->last_flush = dma_fence_get(fence);
867 		id->current_gpu_reset_count =
868 			atomic_read(&adev->gpu_reset_counter);
869 		mutex_unlock(&id_mgr->lock);
870 	}
871 
872 	if (pasid_mapping_needed) {
873 		mutex_lock(&id_mgr->lock);
874 		id->pasid = job->pasid;
875 		dma_fence_put(id->pasid_mapping);
876 		id->pasid_mapping = dma_fence_get(fence);
877 		mutex_unlock(&id_mgr->lock);
878 	}
879 
880 	/*
881 	 * Make sure that all other submissions wait for the cleaner shader to
882 	 * finish before we push them to the HW.
883 	 */
884 	if (cleaner_shader_needed) {
885 		trace_amdgpu_cleaner_shader(ring, fence);
886 		mutex_lock(&adev->enforce_isolation_mutex);
887 		dma_fence_put(isolation->spearhead);
888 		isolation->spearhead = dma_fence_get(fence);
889 		mutex_unlock(&adev->enforce_isolation_mutex);
890 	}
891 	dma_fence_put(fence);
892 
893 	amdgpu_ring_patch_cond_exec(ring, patch);
894 
895 	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
896 	if (ring->funcs->emit_switch_buffer) {
897 		amdgpu_ring_emit_switch_buffer(ring);
898 		amdgpu_ring_emit_switch_buffer(ring);
899 	}
900 
901 	amdgpu_ring_ib_end(ring);
902 }
903 
904 /**
905  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
906  *
907  * @vm: requested vm
908  * @bo: requested buffer object
909  *
910  * Find @bo inside the requested vm.
911  * Search inside the @bos vm list for the requested vm
912  * Returns the found bo_va or NULL if none is found
913  *
914  * Object has to be reserved!
915  *
916  * Returns:
917  * Found bo_va or NULL.
918  */
919 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
920 				       struct amdgpu_bo *bo)
921 {
922 	struct amdgpu_vm_bo_base *base;
923 
924 	for (base = bo->vm_bo; base; base = base->next) {
925 		if (base->vm != vm)
926 			continue;
927 
928 		return container_of(base, struct amdgpu_bo_va, base);
929 	}
930 	return NULL;
931 }
932 
933 /**
934  * amdgpu_vm_map_gart - Resolve gart mapping of addr
935  *
936  * @pages_addr: optional DMA address to use for lookup
937  * @addr: the unmapped addr
938  *
939  * Look up the physical address of the page that the pte resolves
940  * to.
941  *
942  * Returns:
943  * The pointer for the page table entry.
944  */
945 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
946 {
947 	uint64_t result;
948 
949 	/* page table offset */
950 	result = pages_addr[addr >> PAGE_SHIFT];
951 
952 	/* in case cpu page size != gpu page size*/
953 	result |= addr & (~PAGE_MASK);
954 
955 	result &= 0xFFFFFFFFFFFFF000ULL;
956 
957 	return result;
958 }
959 
960 /**
961  * amdgpu_vm_update_pdes - make sure that all directories are valid
962  *
963  * @adev: amdgpu_device pointer
964  * @vm: requested vm
965  * @immediate: submit immediately to the paging queue
966  *
967  * Makes sure all directories are up to date.
968  *
969  * Returns:
970  * 0 for success, error for failure.
971  */
972 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
973 			  struct amdgpu_vm *vm, bool immediate)
974 {
975 	struct amdgpu_vm_update_params params;
976 	struct amdgpu_vm_bo_base *entry, *tmp;
977 	bool flush_tlb_needed = false;
978 	int r, idx;
979 
980 	amdgpu_vm_assert_locked(vm);
981 
982 	if (list_empty(&vm->kernel.moved))
983 		return 0;
984 
985 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
986 		return -ENODEV;
987 
988 	memset(&params, 0, sizeof(params));
989 	params.adev = adev;
990 	params.vm = vm;
991 	params.immediate = immediate;
992 
993 	r = vm->update_funcs->prepare(&params, NULL,
994 				      AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
995 	if (r)
996 		goto error;
997 
998 	list_for_each_entry(entry, &vm->kernel.moved, vm_status) {
999 		/* vm_flush_needed after updating moved PDEs */
1000 		flush_tlb_needed |= entry->moved;
1001 
1002 		r = amdgpu_vm_pde_update(&params, entry);
1003 		if (r)
1004 			goto error;
1005 	}
1006 
1007 	r = vm->update_funcs->commit(&params, &vm->last_update);
1008 	if (r)
1009 		goto error;
1010 
1011 	if (flush_tlb_needed)
1012 		atomic64_inc(&vm->tlb_seq);
1013 
1014 	list_for_each_entry_safe(entry, tmp, &vm->kernel.moved, vm_status)
1015 		amdgpu_vm_bo_idle(entry);
1016 
1017 error:
1018 	drm_dev_exit(idx);
1019 	return r;
1020 }
1021 
1022 /**
1023  * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
1024  * @fence: unused
1025  * @cb: the callback structure
1026  *
1027  * Increments the tlb sequence to make sure that future CS execute a VM flush.
1028  */
1029 static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
1030 				 struct dma_fence_cb *cb)
1031 {
1032 	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
1033 
1034 	tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
1035 	atomic64_inc(&tlb_cb->vm->tlb_seq);
1036 	kfree(tlb_cb);
1037 }
1038 
1039 /**
1040  * amdgpu_vm_tlb_flush - prepare TLB flush
1041  *
1042  * @params: parameters for update
1043  * @fence: input fence to sync TLB flush with
1044  * @tlb_cb: the callback structure
1045  *
1046  * Increments the tlb sequence to make sure that future CS execute a VM flush.
1047  */
1048 static void
1049 amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
1050 		    struct dma_fence **fence,
1051 		    struct amdgpu_vm_tlb_seq_struct *tlb_cb)
1052 {
1053 	struct amdgpu_vm *vm = params->vm;
1054 
1055 	tlb_cb->vm = vm;
1056 	if (!fence || !*fence) {
1057 		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
1058 		return;
1059 	}
1060 
1061 	if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
1062 				    amdgpu_vm_tlb_seq_cb)) {
1063 		dma_fence_put(vm->last_tlb_flush);
1064 		vm->last_tlb_flush = dma_fence_get(*fence);
1065 	} else {
1066 		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
1067 	}
1068 
1069 	/* Prepare a TLB flush fence to be attached to PTs */
1070 	/* The check for need_tlb_fence should be dropped once we
1071 	 * sort out the issues with KIQ/MES TLB invalidation timeouts.
1072 	 */
1073 	if (!params->unlocked && vm->need_tlb_fence) {
1074 		amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
1075 
1076 		/* Makes sure no PD/PT is freed before the flush */
1077 		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
1078 				   DMA_RESV_USAGE_BOOKKEEP);
1079 	}
1080 }
1081 
1082 /**
1083  * amdgpu_vm_update_range - update a range in the vm page table
1084  *
1085  * @adev: amdgpu_device pointer to use for commands
1086  * @vm: the VM to update the range
1087  * @immediate: immediate submission in a page fault
1088  * @unlocked: unlocked invalidation during MM callback
1089  * @flush_tlb: trigger tlb invalidation after update completed
1090  * @allow_override: change MTYPE for local NUMA nodes
1091  * @sync: fences we need to sync to
1092  * @start: start of mapped range
1093  * @last: last mapped entry
1094  * @flags: flags for the entries
1095  * @offset: offset into nodes and pages_addr
1096  * @vram_base: base for vram mappings
1097  * @res: ttm_resource to map
1098  * @pages_addr: DMA addresses to use for mapping
1099  * @fence: optional resulting fence
1100  *
1101  * Fill in the page table entries between @start and @last.
1102  *
1103  * Returns:
1104  * 0 for success, negative erro code for failure.
1105  */
1106 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1107 			   bool immediate, bool unlocked, bool flush_tlb,
1108 			   bool allow_override, struct amdgpu_sync *sync,
1109 			   uint64_t start, uint64_t last, uint64_t flags,
1110 			   uint64_t offset, uint64_t vram_base,
1111 			   struct ttm_resource *res, dma_addr_t *pages_addr,
1112 			   struct dma_fence **fence)
1113 {
1114 	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
1115 	struct amdgpu_vm_update_params params;
1116 	struct amdgpu_res_cursor cursor;
1117 	int r, idx;
1118 
1119 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
1120 		return -ENODEV;
1121 
1122 	tlb_cb = kmalloc_obj(*tlb_cb);
1123 	if (!tlb_cb) {
1124 		drm_dev_exit(idx);
1125 		return -ENOMEM;
1126 	}
1127 
1128 	/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
1129 	 * heavy-weight flush TLB unconditionally.
1130 	 */
1131 	flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
1132 		     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
1133 
1134 	/*
1135 	 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
1136 	 */
1137 	flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
1138 
1139 	memset(&params, 0, sizeof(params));
1140 	params.adev = adev;
1141 	params.vm = vm;
1142 	params.immediate = immediate;
1143 	params.pages_addr = pages_addr;
1144 	params.unlocked = unlocked;
1145 	params.needs_flush = flush_tlb;
1146 	params.override_pte = allow_override && adev->gmc.override_pte;
1147 	INIT_LIST_HEAD(&params.tlb_flush_waitlist);
1148 
1149 	amdgpu_vm_eviction_lock(vm);
1150 	if (vm->evicting) {
1151 		r = -EBUSY;
1152 		goto error_free;
1153 	}
1154 
1155 	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
1156 		struct dma_fence *tmp = dma_fence_get_stub();
1157 
1158 		amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
1159 		swap(vm->last_unlocked, tmp);
1160 		dma_fence_put(tmp);
1161 	}
1162 
1163 	r = vm->update_funcs->prepare(&params, sync,
1164 				      AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
1165 	if (r)
1166 		goto error_free;
1167 
1168 	amdgpu_res_first(pages_addr ? NULL : res, offset,
1169 			 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
1170 	while (cursor.remaining) {
1171 		uint64_t tmp, num_entries, addr;
1172 
1173 		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
1174 		if (pages_addr) {
1175 			bool contiguous = true;
1176 
1177 			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
1178 				uint64_t pfn = cursor.start >> PAGE_SHIFT;
1179 				uint64_t count;
1180 
1181 				contiguous = pages_addr[pfn + 1] ==
1182 					pages_addr[pfn] + PAGE_SIZE;
1183 
1184 				tmp = num_entries /
1185 					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1186 				for (count = 2; count < tmp; ++count) {
1187 					uint64_t idx = pfn + count;
1188 
1189 					if (contiguous != (pages_addr[idx] ==
1190 					    pages_addr[idx - 1] + PAGE_SIZE))
1191 						break;
1192 				}
1193 				if (!contiguous)
1194 					count--;
1195 				num_entries = count *
1196 					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1197 			}
1198 
1199 			if (!contiguous) {
1200 				addr = cursor.start;
1201 				params.pages_addr = pages_addr;
1202 			} else {
1203 				addr = pages_addr[cursor.start >> PAGE_SHIFT];
1204 				params.pages_addr = NULL;
1205 			}
1206 
1207 		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
1208 			addr = vram_base + cursor.start;
1209 		} else {
1210 			addr = 0;
1211 		}
1212 
1213 		tmp = start + num_entries;
1214 		r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
1215 		if (r)
1216 			goto error_free;
1217 
1218 		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
1219 		start = tmp;
1220 	}
1221 
1222 	r = vm->update_funcs->commit(&params, fence);
1223 	if (r)
1224 		goto error_free;
1225 
1226 	if (params.needs_flush) {
1227 		amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
1228 		tlb_cb = NULL;
1229 	}
1230 
1231 	amdgpu_vm_pt_free_list(adev, &params);
1232 
1233 error_free:
1234 	kfree(tlb_cb);
1235 	amdgpu_vm_eviction_unlock(vm);
1236 	drm_dev_exit(idx);
1237 	return r;
1238 }
1239 
1240 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
1241 			  struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
1242 {
1243 	spin_lock(&vm->stats_lock);
1244 	memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
1245 	spin_unlock(&vm->stats_lock);
1246 }
1247 
1248 /**
1249  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1250  *
1251  * @adev: amdgpu_device pointer
1252  * @bo_va: requested BO and VM object
1253  * @clear: if true clear the entries
1254  *
1255  * Fill in the page table entries for @bo_va.
1256  *
1257  * Returns:
1258  * 0 for success, -EINVAL for failure.
1259  */
1260 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
1261 			bool clear)
1262 {
1263 	struct amdgpu_bo *bo = bo_va->base.bo;
1264 	struct amdgpu_vm *vm = bo_va->base.vm;
1265 	struct amdgpu_bo_va_mapping *mapping;
1266 	struct dma_fence **last_update;
1267 	dma_addr_t *pages_addr = NULL;
1268 	struct ttm_resource *mem;
1269 	struct amdgpu_sync sync;
1270 	bool flush_tlb = clear;
1271 	uint64_t vram_base;
1272 	uint64_t flags;
1273 	bool uncached;
1274 	int r;
1275 
1276 	amdgpu_sync_create(&sync);
1277 	if (clear) {
1278 		mem = NULL;
1279 
1280 		/* Implicitly sync to command submissions in the same VM before
1281 		 * unmapping.
1282 		 */
1283 		r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
1284 				     AMDGPU_SYNC_EQ_OWNER, vm);
1285 		if (r)
1286 			goto error_free;
1287 		if (bo) {
1288 			r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
1289 			if (r)
1290 				goto error_free;
1291 		}
1292 	} else if (!bo) {
1293 		mem = NULL;
1294 
1295 		/* PRT map operations don't need to sync to anything. */
1296 
1297 	} else {
1298 		struct drm_gem_object *obj = &bo->tbo.base;
1299 
1300 		if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
1301 			struct dma_buf *dma_buf = obj->import_attach->dmabuf;
1302 			struct drm_gem_object *gobj = dma_buf->priv;
1303 			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
1304 
1305 			if (abo->tbo.resource &&
1306 			    abo->tbo.resource->mem_type == TTM_PL_VRAM)
1307 				bo = gem_to_amdgpu_bo(gobj);
1308 		}
1309 		mem = bo->tbo.resource;
1310 		if (mem && (mem->mem_type == TTM_PL_TT ||
1311 			    mem->mem_type == AMDGPU_PL_PREEMPT))
1312 			pages_addr = bo->tbo.ttm->dma_address;
1313 
1314 		/* Implicitly sync to moving fences before mapping anything */
1315 		r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
1316 				     AMDGPU_SYNC_EXPLICIT, vm);
1317 		if (r)
1318 			goto error_free;
1319 	}
1320 
1321 	if (bo) {
1322 		struct amdgpu_device *bo_adev;
1323 
1324 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1325 
1326 		if (amdgpu_bo_encrypted(bo))
1327 			flags |= AMDGPU_PTE_TMZ;
1328 
1329 		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
1330 		vram_base = bo_adev->vm_manager.vram_base_offset;
1331 		uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
1332 	} else {
1333 		flags = 0x0;
1334 		vram_base = 0;
1335 		uncached = false;
1336 	}
1337 
1338 	if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
1339 		last_update = &vm->last_update;
1340 	else
1341 		last_update = &bo_va->last_pt_update;
1342 
1343 	if (!clear && bo_va->base.moved) {
1344 		flush_tlb = true;
1345 		list_splice_init(&bo_va->valids, &bo_va->invalids);
1346 
1347 	} else if (bo_va->cleared != clear) {
1348 		list_splice_init(&bo_va->valids, &bo_va->invalids);
1349 	}
1350 
1351 	list_for_each_entry(mapping, &bo_va->invalids, list) {
1352 		uint64_t update_flags = flags;
1353 
1354 		/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1355 		 * but in case of something, we filter the flags in first place
1356 		 */
1357 		if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
1358 			update_flags &= ~AMDGPU_PTE_READABLE;
1359 		if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
1360 			update_flags &= ~AMDGPU_PTE_WRITEABLE;
1361 
1362 		/* Apply ASIC specific mapping flags */
1363 		amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
1364 				      &update_flags);
1365 
1366 		trace_amdgpu_vm_bo_update(mapping);
1367 
1368 		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
1369 					   !uncached, &sync, mapping->start,
1370 					   mapping->last, update_flags,
1371 					   mapping->offset, vram_base, mem,
1372 					   pages_addr, last_update);
1373 		if (r)
1374 			goto error_free;
1375 	}
1376 
1377 	/* If the BO is not in its preferred location add it back to
1378 	 * the evicted list so that it gets validated again on the
1379 	 * next command submission.
1380 	 */
1381 	if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
1382 		if (bo->tbo.resource &&
1383 		    !(bo->preferred_domains &
1384 		      amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
1385 			amdgpu_vm_bo_evicted(&bo_va->base);
1386 		else
1387 			amdgpu_vm_bo_idle(&bo_va->base);
1388 	} else {
1389 		amdgpu_vm_bo_idle(&bo_va->base);
1390 	}
1391 
1392 	list_splice_init(&bo_va->invalids, &bo_va->valids);
1393 	bo_va->cleared = clear;
1394 	bo_va->base.moved = false;
1395 
1396 	if (trace_amdgpu_vm_bo_mapping_enabled()) {
1397 		list_for_each_entry(mapping, &bo_va->valids, list)
1398 			trace_amdgpu_vm_bo_mapping(mapping);
1399 	}
1400 
1401 error_free:
1402 	amdgpu_sync_free(&sync);
1403 	return r;
1404 }
1405 
1406 /**
1407  * amdgpu_vm_update_prt_state - update the global PRT state
1408  *
1409  * @adev: amdgpu_device pointer
1410  */
1411 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1412 {
1413 	unsigned long flags;
1414 	bool enable;
1415 
1416 	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1417 	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1418 	adev->gmc.gmc_funcs->set_prt(adev, enable);
1419 	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1420 }
1421 
1422 /**
1423  * amdgpu_vm_prt_get - add a PRT user
1424  *
1425  * @adev: amdgpu_device pointer
1426  */
1427 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1428 {
1429 	if (!adev->gmc.gmc_funcs->set_prt)
1430 		return;
1431 
1432 	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1433 		amdgpu_vm_update_prt_state(adev);
1434 }
1435 
1436 /**
1437  * amdgpu_vm_prt_put - drop a PRT user
1438  *
1439  * @adev: amdgpu_device pointer
1440  */
1441 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1442 {
1443 	if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1444 		amdgpu_vm_update_prt_state(adev);
1445 }
1446 
1447 /**
1448  * amdgpu_vm_prt_cb - callback for updating the PRT status
1449  *
1450  * @fence: fence for the callback
1451  * @_cb: the callback function
1452  */
1453 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1454 {
1455 	struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1456 
1457 	amdgpu_vm_prt_put(cb->adev);
1458 	kfree(cb);
1459 }
1460 
1461 /**
1462  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1463  *
1464  * @adev: amdgpu_device pointer
1465  * @fence: fence for the callback
1466  */
1467 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1468 				 struct dma_fence *fence)
1469 {
1470 	struct amdgpu_prt_cb *cb;
1471 
1472 	if (!adev->gmc.gmc_funcs->set_prt)
1473 		return;
1474 
1475 	cb = kmalloc_obj(struct amdgpu_prt_cb);
1476 	if (!cb) {
1477 		/* Last resort when we are OOM */
1478 		if (fence)
1479 			dma_fence_wait(fence, false);
1480 
1481 		amdgpu_vm_prt_put(adev);
1482 	} else {
1483 		cb->adev = adev;
1484 		if (!fence || dma_fence_add_callback(fence, &cb->cb,
1485 						     amdgpu_vm_prt_cb))
1486 			amdgpu_vm_prt_cb(fence, &cb->cb);
1487 	}
1488 }
1489 
1490 /**
1491  * amdgpu_vm_free_mapping - free a mapping
1492  *
1493  * @adev: amdgpu_device pointer
1494  * @vm: requested vm
1495  * @mapping: mapping to be freed
1496  * @fence: fence of the unmap operation
1497  *
1498  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1499  */
1500 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1501 				   struct amdgpu_vm *vm,
1502 				   struct amdgpu_bo_va_mapping *mapping,
1503 				   struct dma_fence *fence)
1504 {
1505 	if (mapping->flags & AMDGPU_VM_PAGE_PRT)
1506 		amdgpu_vm_add_prt_cb(adev, fence);
1507 	kfree(mapping);
1508 }
1509 
1510 /**
1511  * amdgpu_vm_prt_fini - finish all prt mappings
1512  *
1513  * @adev: amdgpu_device pointer
1514  * @vm: requested vm
1515  *
1516  * Register a cleanup callback to disable PRT support after VM dies.
1517  */
1518 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1519 {
1520 	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
1521 	struct dma_resv_iter cursor;
1522 	struct dma_fence *fence;
1523 
1524 	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
1525 		/* Add a callback for each fence in the reservation object */
1526 		amdgpu_vm_prt_get(adev);
1527 		amdgpu_vm_add_prt_cb(adev, fence);
1528 	}
1529 }
1530 
1531 /**
1532  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1533  *
1534  * @adev: amdgpu_device pointer
1535  * @vm: requested vm
1536  * @fence: optional resulting fence (unchanged if no work needed to be done
1537  * or if an error occurred)
1538  *
1539  * Make sure all freed BOs are cleared in the PT.
1540  * PTs have to be reserved and mutex must be locked!
1541  *
1542  * Returns:
1543  * 0 for success.
1544  *
1545  */
1546 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1547 			  struct amdgpu_vm *vm,
1548 			  struct dma_fence **fence)
1549 {
1550 	struct amdgpu_bo_va_mapping *mapping;
1551 	struct dma_fence *f = NULL;
1552 	struct amdgpu_sync sync;
1553 	int r;
1554 
1555 
1556 	/*
1557 	 * Implicitly sync to command submissions in the same VM before
1558 	 * unmapping.
1559 	 */
1560 	amdgpu_sync_create(&sync);
1561 	r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
1562 			     AMDGPU_SYNC_EQ_OWNER, vm);
1563 	if (r)
1564 		goto error_free;
1565 
1566 	while (!list_empty(&vm->freed)) {
1567 		mapping = list_first_entry(&vm->freed,
1568 			struct amdgpu_bo_va_mapping, list);
1569 		list_del(&mapping->list);
1570 
1571 		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
1572 					   &sync, mapping->start, mapping->last,
1573 					   0, 0, 0, NULL, NULL, &f);
1574 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
1575 		if (r) {
1576 			dma_fence_put(f);
1577 			goto error_free;
1578 		}
1579 	}
1580 
1581 	if (fence && f) {
1582 		dma_fence_put(*fence);
1583 		*fence = f;
1584 	} else {
1585 		dma_fence_put(f);
1586 	}
1587 
1588 error_free:
1589 	amdgpu_sync_free(&sync);
1590 	return r;
1591 
1592 }
1593 
1594 /**
1595  * amdgpu_vm_handle_moved - handle moved BOs in the PT
1596  *
1597  * @adev: amdgpu_device pointer
1598  * @vm: requested vm
1599  * @ticket: optional reservation ticket used to reserve the VM
1600  *
1601  * Make sure all BOs which are moved are updated in the PTs.
1602  *
1603  * Returns:
1604  * 0 for success.
1605  *
1606  * PTs have to be reserved!
1607  */
1608 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1609 			   struct amdgpu_vm *vm,
1610 			   struct ww_acquire_ctx *ticket)
1611 {
1612 	struct amdgpu_bo_va *bo_va, *tmp;
1613 	struct dma_resv *resv;
1614 	struct amdgpu_bo *bo;
1615 	bool clear, unlock;
1616 	int r;
1617 
1618 	list_for_each_entry_safe(bo_va, tmp, &vm->always_valid.moved,
1619 				 base.vm_status) {
1620 		/* Per VM BOs never need to bo cleared in the page tables */
1621 		r = amdgpu_vm_bo_update(adev, bo_va, false);
1622 		if (r)
1623 			return r;
1624 	}
1625 
1626 	spin_lock(&vm->individual_lock);
1627 	while (!list_empty(&vm->individual.moved)) {
1628 		bo_va = list_first_entry(&vm->individual.moved,
1629 					 typeof(*bo_va), base.vm_status);
1630 		bo = bo_va->base.bo;
1631 		resv = bo->tbo.base.resv;
1632 		spin_unlock(&vm->individual_lock);
1633 
1634 		/* Try to reserve the BO to avoid clearing its ptes */
1635 		if (!adev->debug_vm && !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
1636 		    dma_resv_trylock(resv)) {
1637 			clear = false;
1638 			unlock = true;
1639 		/* The caller is already holding the reservation lock */
1640 		} else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
1641 			clear = false;
1642 			unlock = false;
1643 		/* Somebody else is using the BO right now */
1644 		} else {
1645 			clear = true;
1646 			unlock = false;
1647 		}
1648 
1649 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
1650 
1651 		if (unlock)
1652 			dma_resv_unlock(resv);
1653 		if (r)
1654 			return r;
1655 
1656 		/* Remember evicted DMABuf imports in compute VMs for later
1657 		 * validation
1658 		 */
1659 		if (vm->is_compute_context &&
1660 		    drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
1661 		    (!bo_va->base.bo->tbo.resource ||
1662 		     bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
1663 			amdgpu_vm_bo_evicted(&bo_va->base);
1664 
1665 		spin_lock(&vm->individual_lock);
1666 	}
1667 	spin_unlock(&vm->individual_lock);
1668 
1669 	return 0;
1670 }
1671 
1672 /**
1673  * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
1674  *
1675  * @adev: amdgpu_device pointer
1676  * @vm: requested vm
1677  * @flush_type: flush type
1678  * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
1679  *
1680  * Flush TLB if needed for a compute VM.
1681  *
1682  * Returns:
1683  * 0 for success.
1684  */
1685 int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
1686 				struct amdgpu_vm *vm,
1687 				uint32_t flush_type,
1688 				uint32_t xcc_mask)
1689 {
1690 	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
1691 	bool all_hub = false;
1692 	int xcc = 0, r = 0;
1693 
1694 	WARN_ON_ONCE(!vm->is_compute_context);
1695 
1696 	/*
1697 	 * It can be that we race and lose here, but that is extremely unlikely
1698 	 * and the worst thing which could happen is that we flush the changes
1699 	 * into the TLB once more which is harmless.
1700 	 */
1701 	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
1702 		return 0;
1703 
1704 	if (adev->family == AMDGPU_FAMILY_AI ||
1705 	    adev->family == AMDGPU_FAMILY_RV)
1706 		all_hub = true;
1707 
1708 	for_each_inst(xcc, xcc_mask) {
1709 		r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
1710 						   all_hub, xcc);
1711 		if (r)
1712 			break;
1713 	}
1714 	return r;
1715 }
1716 
1717 /**
1718  * amdgpu_vm_bo_add - add a bo to a specific vm
1719  *
1720  * @adev: amdgpu_device pointer
1721  * @vm: requested vm
1722  * @bo: amdgpu buffer object
1723  *
1724  * Add @bo into the requested vm.
1725  * Add @bo to the list of bos associated with the vm
1726  *
1727  * Returns:
1728  * Newly added bo_va or NULL for failure
1729  *
1730  * Object has to be reserved!
1731  */
1732 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1733 				      struct amdgpu_vm *vm,
1734 				      struct amdgpu_bo *bo)
1735 {
1736 	struct amdgpu_bo_va *bo_va;
1737 
1738 	amdgpu_vm_assert_locked(vm);
1739 
1740 	bo_va = kzalloc_obj(struct amdgpu_bo_va);
1741 	if (bo_va == NULL) {
1742 		return NULL;
1743 	}
1744 	amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1745 
1746 	bo_va->ref_count = 1;
1747 	bo_va->last_pt_update = dma_fence_get_stub();
1748 	INIT_LIST_HEAD(&bo_va->valids);
1749 	INIT_LIST_HEAD(&bo_va->invalids);
1750 
1751 	if (!bo)
1752 		return bo_va;
1753 
1754 	dma_resv_assert_held(bo->tbo.base.resv);
1755 	if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
1756 		bo_va->is_xgmi = true;
1757 		/* Power up XGMI if it can be potentially used */
1758 		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
1759 	}
1760 
1761 	return bo_va;
1762 }
1763 
1764 
1765 /**
1766  * amdgpu_vm_bo_insert_map - insert a new mapping
1767  *
1768  * @adev: amdgpu_device pointer
1769  * @bo_va: bo_va to store the address
1770  * @mapping: the mapping to insert
1771  *
1772  * Insert a new mapping into all structures.
1773  */
1774 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1775 				    struct amdgpu_bo_va *bo_va,
1776 				    struct amdgpu_bo_va_mapping *mapping)
1777 {
1778 	struct amdgpu_vm *vm = bo_va->base.vm;
1779 	struct amdgpu_bo *bo = bo_va->base.bo;
1780 
1781 	mapping->bo_va = bo_va;
1782 	list_add(&mapping->list, &bo_va->invalids);
1783 	amdgpu_vm_it_insert(mapping, &vm->va);
1784 
1785 	if (mapping->flags & AMDGPU_VM_PAGE_PRT)
1786 		amdgpu_vm_prt_get(adev);
1787 
1788 	if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
1789 		amdgpu_vm_bo_moved(&bo_va->base);
1790 
1791 	trace_amdgpu_vm_bo_map(bo_va, mapping);
1792 }
1793 
1794 /* Validate operation parameters to prevent potential abuse */
1795 static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
1796 					  struct amdgpu_bo *bo,
1797 					  uint64_t saddr,
1798 					  uint64_t offset,
1799 					  uint64_t size)
1800 {
1801 	uint64_t tmp, lpfn;
1802 
1803 	if (saddr & AMDGPU_GPU_PAGE_MASK
1804 	    || offset & AMDGPU_GPU_PAGE_MASK
1805 	    || size & AMDGPU_GPU_PAGE_MASK)
1806 		return -EINVAL;
1807 
1808 	if (check_add_overflow(saddr, size, &tmp)
1809 	    || check_add_overflow(offset, size, &tmp)
1810 	    || size == 0 /* which also leads to end < begin */)
1811 		return -EINVAL;
1812 
1813 	/* make sure object fit at this offset */
1814 	if (bo && offset + size > amdgpu_bo_size(bo))
1815 		return -EINVAL;
1816 
1817 	/* Ensure last pfn not exceed max_pfn */
1818 	lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
1819 	if (lpfn >= adev->vm_manager.max_pfn)
1820 		return -EINVAL;
1821 
1822 	return 0;
1823 }
1824 
1825 /**
1826  * amdgpu_vm_bo_map - map bo inside a vm
1827  *
1828  * @adev: amdgpu_device pointer
1829  * @bo_va: bo_va to store the address
1830  * @saddr: where to map the BO
1831  * @offset: requested offset in the BO
1832  * @size: BO size in bytes
1833  * @flags: attributes of pages (read/write/valid/etc.)
1834  *
1835  * Add a mapping of the BO at the specefied addr into the VM.
1836  *
1837  * Returns:
1838  * 0 for success, error for failure.
1839  *
1840  * Object has to be reserved and unreserved outside!
1841  */
1842 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1843 		     struct amdgpu_bo_va *bo_va,
1844 		     uint64_t saddr, uint64_t offset,
1845 		     uint64_t size, uint32_t flags)
1846 {
1847 	struct amdgpu_bo_va_mapping *mapping, *tmp;
1848 	struct amdgpu_bo *bo = bo_va->base.bo;
1849 	struct amdgpu_vm *vm = bo_va->base.vm;
1850 	uint64_t eaddr;
1851 	int r;
1852 
1853 	r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
1854 	if (r)
1855 		return r;
1856 
1857 	saddr /= AMDGPU_GPU_PAGE_SIZE;
1858 	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
1859 
1860 	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1861 	if (tmp) {
1862 		/* bo and tmp overlap, invalid addr */
1863 		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1864 			"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1865 			tmp->start, tmp->last + 1);
1866 		return -EINVAL;
1867 	}
1868 
1869 	mapping = kmalloc_obj(*mapping);
1870 	if (!mapping)
1871 		return -ENOMEM;
1872 
1873 	mapping->start = saddr;
1874 	mapping->last = eaddr;
1875 	mapping->offset = offset;
1876 	mapping->flags = flags;
1877 
1878 	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1879 
1880 	return 0;
1881 }
1882 
1883 /**
1884  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1885  *
1886  * @adev: amdgpu_device pointer
1887  * @bo_va: bo_va to store the address
1888  * @saddr: where to map the BO
1889  * @offset: requested offset in the BO
1890  * @size: BO size in bytes
1891  * @flags: attributes of pages (read/write/valid/etc.)
1892  *
1893  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1894  * mappings as we do so.
1895  *
1896  * Returns:
1897  * 0 for success, error for failure.
1898  *
1899  * Object has to be reserved and unreserved outside!
1900  */
1901 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1902 			     struct amdgpu_bo_va *bo_va,
1903 			     uint64_t saddr, uint64_t offset,
1904 			     uint64_t size, uint32_t flags)
1905 {
1906 	struct amdgpu_bo_va_mapping *mapping;
1907 	struct amdgpu_bo *bo = bo_va->base.bo;
1908 	uint64_t eaddr;
1909 	int r;
1910 
1911 	r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
1912 	if (r)
1913 		return r;
1914 
1915 	/* Allocate all the needed memory */
1916 	mapping = kmalloc_obj(*mapping);
1917 	if (!mapping)
1918 		return -ENOMEM;
1919 
1920 	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1921 	if (r) {
1922 		kfree(mapping);
1923 		return r;
1924 	}
1925 
1926 	saddr /= AMDGPU_GPU_PAGE_SIZE;
1927 	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
1928 
1929 	mapping->start = saddr;
1930 	mapping->last = eaddr;
1931 	mapping->offset = offset;
1932 	mapping->flags = flags;
1933 
1934 	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1935 
1936 	return 0;
1937 }
1938 
1939 /**
1940  * amdgpu_vm_bo_unmap - remove bo mapping from vm
1941  *
1942  * @adev: amdgpu_device pointer
1943  * @bo_va: bo_va to remove the address from
1944  * @saddr: where to the BO is mapped
1945  *
1946  * Remove a mapping of the BO at the specefied addr from the VM.
1947  *
1948  * Returns:
1949  * 0 for success, error for failure.
1950  *
1951  * Object has to be reserved and unreserved outside!
1952  */
1953 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1954 		       struct amdgpu_bo_va *bo_va,
1955 		       uint64_t saddr)
1956 {
1957 	struct amdgpu_bo_va_mapping *mapping;
1958 	struct amdgpu_vm *vm = bo_va->base.vm;
1959 	bool valid = true;
1960 
1961 	saddr /= AMDGPU_GPU_PAGE_SIZE;
1962 
1963 	list_for_each_entry(mapping, &bo_va->valids, list) {
1964 		if (mapping->start == saddr)
1965 			break;
1966 	}
1967 
1968 	if (&mapping->list == &bo_va->valids) {
1969 		valid = false;
1970 
1971 		list_for_each_entry(mapping, &bo_va->invalids, list) {
1972 			if (mapping->start == saddr)
1973 				break;
1974 		}
1975 
1976 		if (&mapping->list == &bo_va->invalids)
1977 			return -ENOENT;
1978 	}
1979 
1980 	/* It's unlikely to happen that the mapping userq hasn't been idled
1981 	 * during user requests GEM unmap IOCTL except for forcing the unmap
1982 	 * from user space.
1983 	 */
1984 	if (unlikely(bo_va->userq_va_mapped))
1985 		amdgpu_userq_gem_va_unmap_validate(adev, mapping);
1986 
1987 	list_del(&mapping->list);
1988 	amdgpu_vm_it_remove(mapping, &vm->va);
1989 	mapping->bo_va = NULL;
1990 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1991 
1992 	if (valid)
1993 		list_add(&mapping->list, &vm->freed);
1994 	else
1995 		amdgpu_vm_free_mapping(adev, vm, mapping,
1996 				       bo_va->last_pt_update);
1997 
1998 	return 0;
1999 }
2000 
2001 /**
2002  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
2003  *
2004  * @adev: amdgpu_device pointer
2005  * @vm: VM structure to use
2006  * @saddr: start of the range
2007  * @size: size of the range
2008  *
2009  * Remove all mappings in a range, split them as appropriate.
2010  *
2011  * Returns:
2012  * 0 for success, error for failure.
2013  */
2014 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2015 				struct amdgpu_vm *vm,
2016 				uint64_t saddr, uint64_t size)
2017 {
2018 	struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2019 	LIST_HEAD(removed);
2020 	uint64_t eaddr;
2021 	int r;
2022 
2023 	r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
2024 	if (r)
2025 		return r;
2026 
2027 	saddr /= AMDGPU_GPU_PAGE_SIZE;
2028 	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
2029 
2030 	/* Allocate all the needed memory */
2031 	before = kzalloc_obj(*before);
2032 	if (!before)
2033 		return -ENOMEM;
2034 	INIT_LIST_HEAD(&before->list);
2035 
2036 	after = kzalloc_obj(*after);
2037 	if (!after) {
2038 		kfree(before);
2039 		return -ENOMEM;
2040 	}
2041 	INIT_LIST_HEAD(&after->list);
2042 
2043 	/* Now gather all removed mappings */
2044 	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2045 	while (tmp) {
2046 		/* Remember mapping split at the start */
2047 		if (tmp->start < saddr) {
2048 			before->start = tmp->start;
2049 			before->last = saddr - 1;
2050 			before->offset = tmp->offset;
2051 			before->flags = tmp->flags;
2052 			before->bo_va = tmp->bo_va;
2053 			list_add(&before->list, &tmp->bo_va->invalids);
2054 		}
2055 
2056 		/* Remember mapping split at the end */
2057 		if (tmp->last > eaddr) {
2058 			after->start = eaddr + 1;
2059 			after->last = tmp->last;
2060 			after->offset = tmp->offset;
2061 			after->offset += (after->start - tmp->start) << PAGE_SHIFT;
2062 			after->flags = tmp->flags;
2063 			after->bo_va = tmp->bo_va;
2064 			list_add(&after->list, &tmp->bo_va->invalids);
2065 		}
2066 
2067 		list_del(&tmp->list);
2068 		list_add(&tmp->list, &removed);
2069 
2070 		tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2071 	}
2072 
2073 	/* And free them up */
2074 	list_for_each_entry_safe(tmp, next, &removed, list) {
2075 		amdgpu_vm_it_remove(tmp, &vm->va);
2076 		list_del(&tmp->list);
2077 
2078 		if (tmp->start < saddr)
2079 		    tmp->start = saddr;
2080 		if (tmp->last > eaddr)
2081 		    tmp->last = eaddr;
2082 
2083 		tmp->bo_va = NULL;
2084 		list_add(&tmp->list, &vm->freed);
2085 		trace_amdgpu_vm_bo_unmap(NULL, tmp);
2086 	}
2087 
2088 	/* Insert partial mapping before the range */
2089 	if (!list_empty(&before->list)) {
2090 		struct amdgpu_bo *bo = before->bo_va->base.bo;
2091 
2092 		amdgpu_vm_it_insert(before, &vm->va);
2093 		if (before->flags & AMDGPU_VM_PAGE_PRT)
2094 			amdgpu_vm_prt_get(adev);
2095 
2096 		if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
2097 		    !before->bo_va->base.moved)
2098 			amdgpu_vm_bo_moved(&before->bo_va->base);
2099 	} else {
2100 		kfree(before);
2101 	}
2102 
2103 	/* Insert partial mapping after the range */
2104 	if (!list_empty(&after->list)) {
2105 		struct amdgpu_bo *bo = after->bo_va->base.bo;
2106 
2107 		amdgpu_vm_it_insert(after, &vm->va);
2108 		if (after->flags & AMDGPU_VM_PAGE_PRT)
2109 			amdgpu_vm_prt_get(adev);
2110 
2111 		if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
2112 		    !after->bo_va->base.moved)
2113 			amdgpu_vm_bo_moved(&after->bo_va->base);
2114 	} else {
2115 		kfree(after);
2116 	}
2117 
2118 	return 0;
2119 }
2120 
2121 /**
2122  * amdgpu_vm_bo_lookup_mapping - find mapping by address
2123  *
2124  * @vm: the requested VM
2125  * @addr: the address
2126  *
2127  * Find a mapping by it's address.
2128  *
2129  * Returns:
2130  * The amdgpu_bo_va_mapping matching for addr or NULL
2131  *
2132  */
2133 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2134 							 uint64_t addr)
2135 {
2136 	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2137 }
2138 
2139 /**
2140  * amdgpu_vm_bo_trace_cs - trace all reserved mappings
2141  *
2142  * @vm: the requested vm
2143  * @ticket: CS ticket
2144  *
2145  * Trace all mappings of BOs reserved during a command submission.
2146  */
2147 void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
2148 {
2149 	struct amdgpu_bo_va_mapping *mapping;
2150 
2151 	if (!trace_amdgpu_vm_bo_cs_enabled())
2152 		return;
2153 
2154 	for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
2155 	     mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
2156 		if (mapping->bo_va && mapping->bo_va->base.bo) {
2157 			struct amdgpu_bo *bo;
2158 
2159 			bo = mapping->bo_va->base.bo;
2160 			if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
2161 			    ticket)
2162 				continue;
2163 		}
2164 
2165 		trace_amdgpu_vm_bo_cs(mapping);
2166 	}
2167 }
2168 
2169 /**
2170  * amdgpu_vm_bo_del - remove a bo from a specific vm
2171  *
2172  * @adev: amdgpu_device pointer
2173  * @bo_va: requested bo_va
2174  *
2175  * Remove @bo_va->bo from the requested vm.
2176  *
2177  * Object have to be reserved!
2178  */
2179 void amdgpu_vm_bo_del(struct amdgpu_device *adev,
2180 		      struct amdgpu_bo_va *bo_va)
2181 {
2182 	struct amdgpu_bo_va_mapping *mapping, *next;
2183 	struct amdgpu_bo *bo = bo_va->base.bo;
2184 	struct amdgpu_vm *vm = bo_va->base.vm;
2185 	struct amdgpu_vm_bo_base **base;
2186 
2187 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
2188 
2189 	if (bo) {
2190 		dma_resv_assert_held(bo->tbo.base.resv);
2191 		if (amdgpu_vm_is_bo_always_valid(vm, bo))
2192 			ttm_bo_set_bulk_move(&bo->tbo, NULL);
2193 
2194 		for (base = &bo_va->base.bo->vm_bo; *base;
2195 		     base = &(*base)->next) {
2196 			if (*base != &bo_va->base)
2197 				continue;
2198 
2199 			amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
2200 			*base = bo_va->base.next;
2201 			break;
2202 		}
2203 	}
2204 
2205 	spin_lock(&vm->individual_lock);
2206 	list_del(&bo_va->base.vm_status);
2207 	spin_unlock(&vm->individual_lock);
2208 
2209 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2210 		list_del(&mapping->list);
2211 		amdgpu_vm_it_remove(mapping, &vm->va);
2212 		mapping->bo_va = NULL;
2213 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2214 		list_add(&mapping->list, &vm->freed);
2215 	}
2216 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2217 		list_del(&mapping->list);
2218 		amdgpu_vm_it_remove(mapping, &vm->va);
2219 		amdgpu_vm_free_mapping(adev, vm, mapping,
2220 				       bo_va->last_pt_update);
2221 	}
2222 
2223 	dma_fence_put(bo_va->last_pt_update);
2224 
2225 	if (bo && bo_va->is_xgmi)
2226 		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
2227 
2228 	kfree(bo_va);
2229 }
2230 
2231 /**
2232  * amdgpu_vm_evictable - check if we can evict a VM
2233  *
2234  * @bo: A page table of the VM.
2235  *
2236  * Check if it is possible to evict a VM.
2237  */
2238 bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
2239 {
2240 	struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
2241 
2242 	/* Page tables of a destroyed VM can go away immediately */
2243 	if (!bo_base || !bo_base->vm)
2244 		return true;
2245 
2246 	/* Don't evict VM page tables while they are busy */
2247 	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
2248 		return false;
2249 
2250 	/* Try to block ongoing updates */
2251 	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
2252 		return false;
2253 
2254 	/* Don't evict VM page tables while they are updated */
2255 	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
2256 		amdgpu_vm_eviction_unlock(bo_base->vm);
2257 		return false;
2258 	}
2259 
2260 	bo_base->vm->evicting = true;
2261 	amdgpu_vm_eviction_unlock(bo_base->vm);
2262 	return true;
2263 }
2264 
2265 /**
2266  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2267  *
2268  * @bo: amdgpu buffer object
2269  * @evicted: is the BO evicted
2270  *
2271  * Mark @bo as invalid.
2272  */
2273 void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
2274 {
2275 	struct amdgpu_vm_bo_base *bo_base;
2276 
2277 	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
2278 		struct amdgpu_vm *vm = bo_base->vm;
2279 
2280 		if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
2281 			amdgpu_vm_bo_evicted(bo_base);
2282 			continue;
2283 		}
2284 
2285 		if (bo_base->moved)
2286 			continue;
2287 		amdgpu_vm_bo_moved(bo_base);
2288 	}
2289 }
2290 
2291 /**
2292  * amdgpu_vm_bo_move - handle BO move
2293  *
2294  * @bo: amdgpu buffer object
2295  * @new_mem: the new placement of the BO move
2296  * @evicted: is the BO evicted
2297  *
2298  * Update the memory stats for the new placement and mark @bo as invalid.
2299  */
2300 void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
2301 		       bool evicted)
2302 {
2303 	struct amdgpu_vm_bo_base *bo_base;
2304 
2305 	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
2306 		struct amdgpu_vm *vm = bo_base->vm;
2307 
2308 		spin_lock(&vm->stats_lock);
2309 		amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
2310 		amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
2311 		spin_unlock(&vm->stats_lock);
2312 	}
2313 
2314 	amdgpu_vm_bo_invalidate(bo, evicted);
2315 }
2316 
2317 /**
2318  * amdgpu_vm_get_block_size - calculate VM page table size as power of two
2319  *
2320  * @vm_size: VM size
2321  *
2322  * Returns:
2323  * VM page table as power of two
2324  */
2325 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2326 {
2327 	/* Total bits covered by PD + PTs */
2328 	unsigned bits = ilog2(vm_size) + 18;
2329 
2330 	/* Make sure the PD is 4K in size up to 8GB address space.
2331 	   Above that split equal between PD and PTs */
2332 	if (vm_size <= 8)
2333 		return (bits - 9);
2334 	else
2335 		return ((bits + 3) / 2);
2336 }
2337 
2338 /**
2339  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2340  *
2341  * @adev: amdgpu_device pointer
2342  * @min_vm_size: the minimum vm size in GB if it's set auto
2343  * @fragment_size_default: Default PTE fragment size
2344  * @max_level: max VMPT level
2345  * @max_bits: max address space size in bits
2346  *
2347  */
2348 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
2349 			   uint32_t fragment_size_default, unsigned max_level,
2350 			   unsigned max_bits)
2351 {
2352 	unsigned int max_size = 1 << (max_bits - 30);
2353 	unsigned int vm_size;
2354 	uint64_t tmp;
2355 
2356 	/* adjust vm size first */
2357 	if (amdgpu_vm_size != -1) {
2358 		vm_size = amdgpu_vm_size;
2359 		if (vm_size > max_size) {
2360 			dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2361 				 amdgpu_vm_size, max_size);
2362 			vm_size = max_size;
2363 		}
2364 	} else {
2365 		struct sysinfo si;
2366 		unsigned int phys_ram_gb;
2367 
2368 		/* Optimal VM size depends on the amount of physical
2369 		 * RAM available. Underlying requirements and
2370 		 * assumptions:
2371 		 *
2372 		 *  - Need to map system memory and VRAM from all GPUs
2373 		 *     - VRAM from other GPUs not known here
2374 		 *     - Assume VRAM <= system memory
2375 		 *  - On GFX8 and older, VM space can be segmented for
2376 		 *    different MTYPEs
2377 		 *  - Need to allow room for fragmentation, guard pages etc.
2378 		 *
2379 		 * This adds up to a rough guess of system memory x3.
2380 		 * Round up to power of two to maximize the available
2381 		 * VM size with the given page table size.
2382 		 */
2383 		si_meminfo(&si);
2384 		phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
2385 			       (1 << 30) - 1) >> 30;
2386 		vm_size = roundup_pow_of_two(
2387 			clamp(phys_ram_gb * 3, min_vm_size, max_size));
2388 	}
2389 
2390 	adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2391 	adev->vm_manager.max_level = max_level;
2392 
2393 	tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2394 	if (amdgpu_vm_block_size != -1)
2395 		tmp >>= amdgpu_vm_block_size - 9;
2396 	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2397 	adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
2398 	switch (adev->vm_manager.num_level) {
2399 	case 4:
2400 		adev->vm_manager.root_level = AMDGPU_VM_PDB3;
2401 		break;
2402 	case 3:
2403 		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2404 		break;
2405 	case 2:
2406 		adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2407 		break;
2408 	case 1:
2409 		adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2410 		break;
2411 	default:
2412 		dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2413 	}
2414 	/* block size depends on vm size and hw setup*/
2415 	if (amdgpu_vm_block_size != -1)
2416 		adev->vm_manager.block_size =
2417 			min((unsigned)amdgpu_vm_block_size, max_bits
2418 			    - AMDGPU_GPU_PAGE_SHIFT
2419 			    - 9 * adev->vm_manager.num_level);
2420 	else if (adev->vm_manager.num_level > 1)
2421 		adev->vm_manager.block_size = 9;
2422 	else
2423 		adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2424 
2425 	if (amdgpu_vm_fragment_size == -1)
2426 		adev->vm_manager.fragment_size = fragment_size_default;
2427 	else
2428 		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2429 
2430 	dev_info(
2431 		adev->dev,
2432 		"vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2433 		vm_size, adev->vm_manager.num_level + 1,
2434 		adev->vm_manager.block_size, adev->vm_manager.fragment_size);
2435 }
2436 
2437 /**
2438  * amdgpu_vm_wait_idle - wait for the VM to become idle
2439  *
2440  * @vm: VM object to wait for
2441  * @timeout: timeout to wait for VM to become idle
2442  */
2443 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
2444 {
2445 	timeout = drm_sched_entity_flush(&vm->immediate, timeout);
2446 	if (timeout <= 0)
2447 		return timeout;
2448 
2449 	return drm_sched_entity_flush(&vm->delayed, timeout);
2450 }
2451 
2452 static void amdgpu_vm_destroy_task_info(struct kref *kref)
2453 {
2454 	struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
2455 
2456 	kfree(ti);
2457 }
2458 
2459 static inline struct amdgpu_vm *
2460 amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
2461 {
2462 	struct amdgpu_vm *vm;
2463 	unsigned long flags;
2464 
2465 	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
2466 	vm = xa_load(&adev->vm_manager.pasids, pasid);
2467 	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
2468 
2469 	return vm;
2470 }
2471 
2472 /**
2473  * amdgpu_vm_put_task_info - reference down the vm task_info ptr
2474  *
2475  * @task_info: task_info struct under discussion.
2476  *
2477  * frees the vm task_info ptr at the last put
2478  */
2479 void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
2480 {
2481 	if (task_info)
2482 		kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
2483 }
2484 
2485 /**
2486  * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
2487  *
2488  * @vm: VM to get info from
2489  *
2490  * Returns the reference counted task_info structure, which must be
2491  * referenced down with amdgpu_vm_put_task_info.
2492  */
2493 struct amdgpu_task_info *
2494 amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
2495 {
2496 	struct amdgpu_task_info *ti = NULL;
2497 
2498 	if (vm) {
2499 		ti = vm->task_info;
2500 		kref_get(&vm->task_info->refcount);
2501 	}
2502 
2503 	return ti;
2504 }
2505 
2506 /**
2507  * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
2508  *
2509  * @adev: drm device pointer
2510  * @pasid: PASID identifier for VM
2511  *
2512  * Returns the reference counted task_info structure, which must be
2513  * referenced down with amdgpu_vm_put_task_info.
2514  */
2515 struct amdgpu_task_info *
2516 amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
2517 {
2518 	return amdgpu_vm_get_task_info_vm(
2519 			amdgpu_vm_get_vm_from_pasid(adev, pasid));
2520 }
2521 
2522 static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
2523 {
2524 	vm->task_info = kzalloc_obj(struct amdgpu_task_info);
2525 	if (!vm->task_info)
2526 		return -ENOMEM;
2527 
2528 	kref_init(&vm->task_info->refcount);
2529 	return 0;
2530 }
2531 
2532 /**
2533  * amdgpu_vm_set_task_info - Sets VMs task info.
2534  *
2535  * @vm: vm for which to set the info
2536  */
2537 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
2538 {
2539 	if (!vm->task_info)
2540 		return;
2541 
2542 	if (vm->task_info->task.pid == current->pid)
2543 		return;
2544 
2545 	vm->task_info->task.pid = current->pid;
2546 	get_task_comm(vm->task_info->task.comm, current);
2547 
2548 	vm->task_info->tgid = current->tgid;
2549 	get_task_comm(vm->task_info->process_name, current->group_leader);
2550 }
2551 
2552 /**
2553  * amdgpu_vm_init - initialize a vm instance
2554  *
2555  * @adev: amdgpu_device pointer
2556  * @vm: requested vm
2557  * @xcp_id: GPU partition selection id
2558  * @pasid: the pasid the VM is using on this GPU
2559  *
2560  * Init @vm fields.
2561  *
2562  * Returns:
2563  * 0 for success, error for failure.
2564  */
2565 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2566 		   int32_t xcp_id, uint32_t pasid)
2567 {
2568 	struct amdgpu_bo *root_bo;
2569 	struct amdgpu_bo_vm *root;
2570 	int r, i;
2571 
2572 	vm->va = RB_ROOT_CACHED;
2573 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2574 		vm->reserved_vmid[i] = NULL;
2575 
2576 	amdgpu_vm_bo_status_init(&vm->kernel);
2577 	amdgpu_vm_bo_status_init(&vm->always_valid);
2578 	spin_lock_init(&vm->individual_lock);
2579 	amdgpu_vm_bo_status_init(&vm->individual);
2580 	INIT_LIST_HEAD(&vm->freed);
2581 	INIT_KFIFO(vm->faults);
2582 	spin_lock_init(&vm->stats_lock);
2583 
2584 	r = amdgpu_vm_init_entities(adev, vm);
2585 	if (r)
2586 		return r;
2587 
2588 	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
2589 
2590 	vm->is_compute_context = false;
2591 	vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev);
2592 
2593 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2594 				    AMDGPU_VM_USE_CPU_FOR_GFX);
2595 
2596 	dev_dbg(adev->dev, "VM update mode is %s\n",
2597 		vm->use_cpu_for_update ? "CPU" : "SDMA");
2598 	WARN_ONCE((vm->use_cpu_for_update &&
2599 		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2600 		  "CPU update of VM recommended only for large BAR system\n");
2601 
2602 	if (vm->use_cpu_for_update)
2603 		vm->update_funcs = &amdgpu_vm_cpu_funcs;
2604 	else
2605 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
2606 
2607 	vm->last_update = dma_fence_get_stub();
2608 	vm->last_unlocked = dma_fence_get_stub();
2609 	vm->last_tlb_flush = dma_fence_get_stub();
2610 	vm->generation = amdgpu_vm_generation(adev, NULL);
2611 
2612 	mutex_init(&vm->eviction_lock);
2613 	vm->evicting = false;
2614 	vm->tlb_fence_context = dma_fence_context_alloc(1);
2615 
2616 	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
2617 				false, &root, xcp_id);
2618 	if (r)
2619 		goto error_free_delayed;
2620 
2621 	root_bo = amdgpu_bo_ref(&root->bo);
2622 	r = amdgpu_bo_reserve(root_bo, true);
2623 	if (r) {
2624 		amdgpu_bo_unref(&root_bo);
2625 		goto error_free_delayed;
2626 	}
2627 
2628 	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
2629 	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
2630 	if (r)
2631 		goto error_free_root;
2632 
2633 	r = amdgpu_vm_pt_clear(adev, vm, root, false);
2634 	if (r)
2635 		goto error_free_root;
2636 
2637 	r = amdgpu_vm_create_task_info(vm);
2638 	if (r)
2639 		dev_dbg(adev->dev, "Failed to create task info for VM\n");
2640 
2641 	/* Store new PASID in XArray (if non-zero) */
2642 	if (pasid != 0) {
2643 		r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
2644 		if (r < 0)
2645 			goto error_free_root;
2646 
2647 		vm->pasid = pasid;
2648 	}
2649 
2650 	amdgpu_bo_unreserve(vm->root.bo);
2651 	amdgpu_bo_unref(&root_bo);
2652 
2653 	return 0;
2654 
2655 error_free_root:
2656 	/* If PASID was partially set, erase it from XArray before failing */
2657 	if (vm->pasid != 0) {
2658 		xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
2659 		vm->pasid = 0;
2660 	}
2661 	amdgpu_vm_pt_free_root(adev, vm);
2662 	amdgpu_bo_unreserve(vm->root.bo);
2663 	amdgpu_bo_unref(&root_bo);
2664 
2665 error_free_delayed:
2666 	dma_fence_put(vm->last_tlb_flush);
2667 	dma_fence_put(vm->last_unlocked);
2668 	ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
2669 	amdgpu_vm_fini_entities(vm);
2670 
2671 	return r;
2672 }
2673 
2674 /**
2675  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2676  *
2677  * @adev: amdgpu_device pointer
2678  * @vm: requested vm
2679  *
2680  * This only works on GFX VMs that don't have any BOs added and no
2681  * page tables allocated yet.
2682  *
2683  * Changes the following VM parameters:
2684  * - use_cpu_for_update
2685  * - pte_supports_ats
2686  *
2687  * Reinitializes the page directory to reflect the changed ATS
2688  * setting.
2689  *
2690  * Returns:
2691  * 0 for success, -errno for errors.
2692  */
2693 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2694 {
2695 	int r;
2696 
2697 	r = amdgpu_bo_reserve(vm->root.bo, true);
2698 	if (r)
2699 		return r;
2700 
2701 	/* Update VM state */
2702 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2703 				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2704 	dev_dbg(adev->dev, "VM update mode is %s\n",
2705 		vm->use_cpu_for_update ? "CPU" : "SDMA");
2706 	WARN_ONCE((vm->use_cpu_for_update &&
2707 		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2708 		  "CPU update of VM recommended only for large BAR system\n");
2709 
2710 	if (vm->use_cpu_for_update) {
2711 		/* Sync with last SDMA update/clear before switching to CPU */
2712 		r = amdgpu_bo_sync_wait(vm->root.bo,
2713 					AMDGPU_FENCE_OWNER_UNDEFINED, true);
2714 		if (r)
2715 			goto unreserve_bo;
2716 
2717 		vm->update_funcs = &amdgpu_vm_cpu_funcs;
2718 		r = amdgpu_vm_pt_map_tables(adev, vm);
2719 		if (r)
2720 			goto unreserve_bo;
2721 
2722 	} else {
2723 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
2724 	}
2725 
2726 	dma_fence_put(vm->last_update);
2727 	vm->last_update = dma_fence_get_stub();
2728 	vm->is_compute_context = true;
2729 	vm->need_tlb_fence = true;
2730 
2731 unreserve_bo:
2732 	amdgpu_bo_unreserve(vm->root.bo);
2733 	return r;
2734 }
2735 
2736 static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
2737 {
2738 	for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
2739 		if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
2740 		      vm->stats[i].evicted == 0))
2741 			return false;
2742 	}
2743 	return true;
2744 }
2745 
2746 /**
2747  * amdgpu_vm_fini - tear down a vm instance
2748  *
2749  * @adev: amdgpu_device pointer
2750  * @vm: requested vm
2751  *
2752  * Tear down @vm.
2753  * Unbind the VM and remove all bos from the vm bo list
2754  */
2755 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2756 {
2757 	struct amdgpu_bo_va_mapping *mapping, *tmp;
2758 	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2759 	struct amdgpu_bo *root;
2760 	unsigned long flags;
2761 	int i;
2762 
2763 	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2764 
2765 	root = amdgpu_bo_ref(vm->root.bo);
2766 	amdgpu_bo_reserve(root, true);
2767 	/* Remove PASID mapping before destroying VM */
2768 	if (vm->pasid != 0) {
2769 		xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
2770 		vm->pasid = 0;
2771 	}
2772 	dma_fence_wait(vm->last_unlocked, false);
2773 	dma_fence_put(vm->last_unlocked);
2774 	dma_fence_wait(vm->last_tlb_flush, false);
2775 	/* Make sure that all fence callbacks have completed */
2776 	dma_fence_lock_irqsave(vm->last_tlb_flush, flags);
2777 	dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags);
2778 	dma_fence_put(vm->last_tlb_flush);
2779 
2780 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2781 		if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
2782 			amdgpu_vm_prt_fini(adev, vm);
2783 			prt_fini_needed = false;
2784 		}
2785 
2786 		list_del(&mapping->list);
2787 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2788 	}
2789 
2790 	amdgpu_vm_pt_free_root(adev, vm);
2791 	amdgpu_bo_unreserve(root);
2792 	amdgpu_bo_unref(&root);
2793 	WARN_ON(vm->root.bo);
2794 
2795 	amdgpu_vm_fini_entities(vm);
2796 
2797 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2798 		dev_err(adev->dev, "still active bo inside vm\n");
2799 	}
2800 	rbtree_postorder_for_each_entry_safe(mapping, tmp,
2801 					     &vm->va.rb_root, rb) {
2802 		/* Don't remove the mapping here, we don't want to trigger a
2803 		 * rebalance and the tree is about to be destroyed anyway.
2804 		 */
2805 		list_del(&mapping->list);
2806 		kfree(mapping);
2807 	}
2808 
2809 	dma_fence_put(vm->last_update);
2810 
2811 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
2812 		amdgpu_vmid_free_reserved(adev, vm, i);
2813 	}
2814 
2815 	ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
2816 
2817 	if (!amdgpu_vm_stats_is_zero(vm)) {
2818 		struct amdgpu_task_info *ti = vm->task_info;
2819 
2820 		dev_warn(adev->dev,
2821 			 "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
2822 			 ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
2823 	}
2824 
2825 	amdgpu_vm_put_task_info(vm->task_info);
2826 }
2827 
2828 /**
2829  * amdgpu_vm_manager_init - init the VM manager
2830  *
2831  * @adev: amdgpu_device pointer
2832  *
2833  * Initialize the VM manager structures
2834  */
2835 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2836 {
2837 	/* Concurrent flushes are only possible starting with Vega10 and
2838 	 * are broken on Navi10 and Navi14.
2839 	 */
2840 	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
2841 					      adev->asic_type == CHIP_NAVI10 ||
2842 					      adev->asic_type == CHIP_NAVI14);
2843 	amdgpu_vmid_mgr_init(adev);
2844 
2845 	spin_lock_init(&adev->vm_manager.prt_lock);
2846 	atomic_set(&adev->vm_manager.num_prt_users, 0);
2847 
2848 	/* If not overridden by the user, by default, only in large BAR systems
2849 	 * Compute VM tables will be updated by CPU
2850 	 */
2851 #ifdef CONFIG_X86_64
2852 	if (amdgpu_vm_update_mode == -1) {
2853 		/* For asic with VF MMIO access protection
2854 		 * avoid using CPU for VM table updates
2855 		 */
2856 		if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
2857 		    !amdgpu_sriov_vf_mmio_access_protection(adev))
2858 			adev->vm_manager.vm_update_mode =
2859 				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2860 		else
2861 			adev->vm_manager.vm_update_mode = 0;
2862 	} else
2863 		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2864 #else
2865 	adev->vm_manager.vm_update_mode = 0;
2866 #endif
2867 
2868 	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
2869 }
2870 
2871 /**
2872  * amdgpu_vm_manager_fini - cleanup VM manager
2873  *
2874  * @adev: amdgpu_device pointer
2875  *
2876  * Cleanup the VM manager and free resources.
2877  */
2878 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2879 {
2880 	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
2881 	xa_destroy(&adev->vm_manager.pasids);
2882 
2883 	amdgpu_vmid_mgr_fini(adev);
2884 	amdgpu_pasid_mgr_cleanup();
2885 }
2886 
2887 /**
2888  * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
2889  *
2890  * @dev: drm device pointer
2891  * @data: drm_amdgpu_vm
2892  * @filp: drm file pointer
2893  *
2894  * Returns:
2895  * 0 for success, -errno for errors.
2896  */
2897 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2898 {
2899 	union drm_amdgpu_vm *args = data;
2900 	struct amdgpu_device *adev = drm_to_adev(dev);
2901 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
2902 	struct amdgpu_vm *vm = &fpriv->vm;
2903 
2904 	/* No valid flags defined yet */
2905 	if (args->in.flags)
2906 		return -EINVAL;
2907 
2908 	switch (args->in.op) {
2909 	case AMDGPU_VM_OP_RESERVE_VMID:
2910 		/* We only have requirement to reserve vmid from gfxhub */
2911 		return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
2912 	case AMDGPU_VM_OP_UNRESERVE_VMID:
2913 		amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
2914 		break;
2915 	default:
2916 		return -EINVAL;
2917 	}
2918 
2919 	return 0;
2920 }
2921 
2922 /**
2923  * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
2924  * @adev: amdgpu device pointer
2925  * @root: root BO of the VM
2926  * @pasid: PASID of the VM
2927  * The caller needs to unreserve and unref the root bo on success.
2928  */
2929 struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
2930 					  struct amdgpu_bo **root, u32 pasid)
2931 {
2932 	unsigned long irqflags;
2933 	struct amdgpu_vm *vm;
2934 	int r;
2935 
2936 	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2937 	vm = xa_load(&adev->vm_manager.pasids, pasid);
2938 	*root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
2939 	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2940 
2941 	if (!*root)
2942 		return NULL;
2943 
2944 	r = amdgpu_bo_reserve(*root, true);
2945 	if (r)
2946 		goto error_unref;
2947 
2948 	/* Double check that the VM still exists */
2949 	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2950 	vm = xa_load(&adev->vm_manager.pasids, pasid);
2951 	if (vm && vm->root.bo != *root)
2952 		vm = NULL;
2953 	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2954 	if (!vm)
2955 		goto error_unlock;
2956 
2957 	return vm;
2958 error_unlock:
2959 	amdgpu_bo_unreserve(*root);
2960 
2961 error_unref:
2962 	amdgpu_bo_unref(root);
2963 	return NULL;
2964 }
2965 
2966 /**
2967  * amdgpu_vm_handle_fault - graceful handling of VM faults.
2968  * @adev: amdgpu device pointer
2969  * @pasid: PASID of the VM
2970  * @ts: Timestamp of the fault
2971  * @vmid: VMID, only used for GFX 9.4.3.
2972  * @node_id: Node_id received in IH cookie. Only applicable for
2973  *           GFX 9.4.3.
2974  * @addr: Address of the fault
2975  * @write_fault: true is write fault, false is read fault
2976  *
2977  * Try to gracefully handle a VM fault. Return true if the fault was handled and
2978  * shouldn't be reported any more.
2979  */
2980 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
2981 			    u32 vmid, u32 node_id, uint64_t addr,
2982 			    uint64_t ts, bool write_fault)
2983 {
2984 	bool is_compute_context = false;
2985 	struct amdgpu_bo *root;
2986 	uint64_t value, flags;
2987 	struct amdgpu_vm *vm;
2988 	int r;
2989 
2990 	vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
2991 	if (!vm)
2992 		return false;
2993 
2994 	is_compute_context = vm->is_compute_context;
2995 
2996 	if (is_compute_context) {
2997 		/* Unreserve root since svm_range_restore_pages might try to reserve it. */
2998 		/* TODO: rework svm_range_restore_pages so that this isn't necessary. */
2999 		amdgpu_bo_unreserve(root);
3000 
3001 		if (!svm_range_restore_pages(adev, pasid, vmid,
3002 					     node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
3003 			amdgpu_bo_unref(&root);
3004 			return true;
3005 		}
3006 		amdgpu_bo_unref(&root);
3007 
3008 		/* Re-acquire the VM lock, could be that the VM was freed in between. */
3009 		vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
3010 		if (!vm)
3011 			return false;
3012 	}
3013 
3014 	addr /= AMDGPU_GPU_PAGE_SIZE;
3015 	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
3016 		AMDGPU_PTE_SYSTEM;
3017 
3018 	if (is_compute_context) {
3019 		/* Intentionally setting invalid PTE flag
3020 		 * combination to force a no-retry-fault
3021 		 */
3022 		flags = AMDGPU_VM_NORETRY_FLAGS;
3023 		value = 0;
3024 	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
3025 		/* Redirect the access to the dummy page */
3026 		value = adev->dummy_page_addr;
3027 		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
3028 			AMDGPU_PTE_WRITEABLE;
3029 
3030 	} else {
3031 		/* Let the hw retry silently on the PTE */
3032 		value = 0;
3033 	}
3034 
3035 	r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
3036 	if (r) {
3037 		pr_debug("failed %d to reserve fence slot\n", r);
3038 		goto error_unlock;
3039 	}
3040 
3041 	r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
3042 				   NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
3043 	if (r)
3044 		goto error_unlock;
3045 
3046 	r = amdgpu_vm_update_pdes(adev, vm, true);
3047 
3048 error_unlock:
3049 	amdgpu_bo_unreserve(root);
3050 	if (r < 0)
3051 		dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
3052 
3053 	amdgpu_bo_unref(&root);
3054 
3055 	return false;
3056 }
3057 
3058 #if defined(CONFIG_DEBUG_FS)
3059 
3060 /* print the debug info for a specific set of status lists */
3061 static void amdgpu_debugfs_vm_bo_status_info(struct seq_file *m,
3062 					     struct amdgpu_vm_bo_status *lists)
3063 {
3064 	struct amdgpu_vm_bo_base *base;
3065 	unsigned int id;
3066 
3067 	id = 0;
3068 	seq_puts(m, "\tEvicted BOs:\n");
3069 	list_for_each_entry(base, &lists->evicted, vm_status) {
3070 		if (!base->bo)
3071 			continue;
3072 
3073 		amdgpu_bo_print_info(id++, base->bo, m);
3074 	}
3075 
3076 	id = 0;
3077 	seq_puts(m, "\tMoved BOs:\n");
3078 	list_for_each_entry(base, &lists->moved, vm_status) {
3079 		if (!base->bo)
3080 			continue;
3081 
3082 		amdgpu_bo_print_info(id++, base->bo, m);
3083 	}
3084 
3085 	id = 0;
3086 	seq_puts(m, "\tIdle BOs:\n");
3087 	list_for_each_entry(base, &lists->moved, vm_status) {
3088 		if (!base->bo)
3089 			continue;
3090 
3091 		amdgpu_bo_print_info(id++, base->bo, m);
3092 	}
3093 }
3094 
3095 /**
3096  * amdgpu_debugfs_vm_bo_info  - print BO info for the VM
3097  *
3098  * @vm: Requested VM for printing BO info
3099  * @m: debugfs file
3100  *
3101  * Print BO information in debugfs file for the VM
3102  */
3103 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
3104 {
3105 	amdgpu_vm_assert_locked(vm);
3106 
3107 	seq_puts(m, "\tKernel PT/PDs:\n");
3108 	amdgpu_debugfs_vm_bo_status_info(m, &vm->kernel);
3109 
3110 	seq_puts(m, "\tPer VM BOs:\n");
3111 	amdgpu_debugfs_vm_bo_status_info(m, &vm->always_valid);
3112 
3113 	seq_puts(m, "\tIndividual BOs:\n");
3114 	spin_lock(&vm->individual_lock);
3115 	amdgpu_debugfs_vm_bo_status_info(m, &vm->individual);
3116 	spin_unlock(&vm->individual_lock);
3117 }
3118 #endif
3119 
3120 /**
3121  * amdgpu_vm_update_fault_cache - update cached fault into.
3122  * @adev: amdgpu device pointer
3123  * @pasid: PASID of the VM
3124  * @addr: Address of the fault
3125  * @status: GPUVM fault status register
3126  * @vmhub: which vmhub got the fault
3127  *
3128  * Cache the fault info for later use by userspace in debugging.
3129  */
3130 void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
3131 				  unsigned int pasid,
3132 				  uint64_t addr,
3133 				  uint32_t status,
3134 				  unsigned int vmhub)
3135 {
3136 	struct amdgpu_vm *vm;
3137 	unsigned long flags;
3138 
3139 	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
3140 
3141 	vm = xa_load(&adev->vm_manager.pasids, pasid);
3142 	/* Don't update the fault cache if status is 0.  In the multiple
3143 	 * fault case, subsequent faults will return a 0 status which is
3144 	 * useless for userspace and replaces the useful fault status, so
3145 	 * only update if status is non-0.
3146 	 */
3147 	if (vm && status) {
3148 		vm->fault_info.addr = addr;
3149 		vm->fault_info.status = status;
3150 		/*
3151 		 * Update the fault information globally for later usage
3152 		 * when vm could be stale or freed.
3153 		 */
3154 		adev->vm_manager.fault_info.addr = addr;
3155 		adev->vm_manager.fault_info.vmhub = vmhub;
3156 		adev->vm_manager.fault_info.status = status;
3157 
3158 		if (AMDGPU_IS_GFXHUB(vmhub)) {
3159 			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
3160 			vm->fault_info.vmhub |=
3161 				(vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
3162 		} else if (AMDGPU_IS_MMHUB0(vmhub)) {
3163 			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
3164 			vm->fault_info.vmhub |=
3165 				(vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
3166 		} else if (AMDGPU_IS_MMHUB1(vmhub)) {
3167 			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
3168 			vm->fault_info.vmhub |=
3169 				(vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
3170 		} else {
3171 			WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
3172 		}
3173 	}
3174 	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
3175 }
3176 
3177 void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
3178 			       struct amdgpu_task_info *task_info)
3179 {
3180 	dev_err(adev->dev,
3181 		" Process %s pid %d thread %s pid %d\n",
3182 		task_info->process_name, task_info->tgid,
3183 		task_info->task.comm, task_info->task.pid);
3184 }
3185 
3186 void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev,
3187 				   const struct amdgpu_vm_pte_funcs *vm_pte_funcs)
3188 {
3189 	struct drm_gpu_scheduler *sched;
3190 	int i;
3191 
3192 	for (i = 0; i < adev->sdma.num_instances; i++) {
3193 		if (adev->sdma.has_page_queue)
3194 			sched = &adev->sdma.instance[i].page.sched;
3195 		else
3196 			sched = &adev->sdma.instance[i].ring.sched;
3197 		adev->vm_manager.vm_pte_scheds[i] = sched;
3198 	}
3199 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
3200 	adev->vm_manager.vm_pte_funcs = vm_pte_funcs;
3201 }
3202