xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c (revision 99676aed1fec109d62822e21a06760eb098dc5f4)
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 
29 #include <linux/dma-fence-array.h>
30 #include <linux/interval_tree_generic.h>
31 #include <linux/idr.h>
32 #include <linux/dma-buf.h>
33 
34 #include <drm/amdgpu_drm.h>
35 #include <drm/drm_drv.h>
36 #include <drm/ttm/ttm_tt.h>
37 #include <drm/drm_exec.h>
38 #include "amdgpu.h"
39 #include "amdgpu_vm.h"
40 #include "amdgpu_trace.h"
41 #include "amdgpu_amdkfd.h"
42 #include "amdgpu_gmc.h"
43 #include "amdgpu_xgmi.h"
44 #include "amdgpu_dma_buf.h"
45 #include "amdgpu_res_cursor.h"
46 #include "kfd_svm.h"
47 
48 /**
49  * DOC: GPUVM
50  *
51  * GPUVM is the MMU functionality provided on the GPU.
52  * GPUVM is similar to the legacy GART on older asics, however
53  * rather than there being a single global GART table
54  * for the entire GPU, there can be multiple GPUVM page tables active
55  * at any given time.  The GPUVM page tables can contain a mix
56  * VRAM pages and system pages (both memory and MMIO) and system pages
57  * can be mapped as snooped (cached system pages) or unsnooped
58  * (uncached system pages).
59  *
60  * Each active GPUVM has an ID associated with it and there is a page table
61  * linked with each VMID.  When executing a command buffer,
62  * the kernel tells the engine what VMID to use for that command
63  * buffer.  VMIDs are allocated dynamically as commands are submitted.
64  * The userspace drivers maintain their own address space and the kernel
65  * sets up their pages tables accordingly when they submit their
66  * command buffers and a VMID is assigned.
67  * The hardware supports up to 16 active GPUVMs at any given time.
68  *
69  * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
70  * on the ASIC family.  GPUVM supports RWX attributes on each page as well
71  * as other features such as encryption and caching attributes.
72  *
73  * VMID 0 is special.  It is the GPUVM used for the kernel driver.  In
74  * addition to an aperture managed by a page table, VMID 0 also has
75  * several other apertures.  There is an aperture for direct access to VRAM
76  * and there is a legacy AGP aperture which just forwards accesses directly
77  * to the matching system physical addresses (or IOVAs when an IOMMU is
78  * present).  These apertures provide direct access to these memories without
79  * incurring the overhead of a page table.  VMID 0 is used by the kernel
80  * driver for tasks like memory management.
81  *
82  * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
83  * For user applications, each application can have their own unique GPUVM
84  * address space.  The application manages the address space and the kernel
85  * driver manages the GPUVM page tables for each process.  If an GPU client
86  * accesses an invalid page, it will generate a GPU page fault, similar to
87  * accessing an invalid page on a CPU.
88  */
89 
90 #define START(node) ((node)->start)
91 #define LAST(node) ((node)->last)
92 
93 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
94 		     START, LAST, static, amdgpu_vm_it)
95 
96 #undef START
97 #undef LAST
98 
99 /**
100  * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
101  */
102 struct amdgpu_prt_cb {
103 
104 	/**
105 	 * @adev: amdgpu device
106 	 */
107 	struct amdgpu_device *adev;
108 
109 	/**
110 	 * @cb: callback
111 	 */
112 	struct dma_fence_cb cb;
113 };
114 
115 /**
116  * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
117  */
118 struct amdgpu_vm_tlb_seq_struct {
119 	/**
120 	 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
121 	 */
122 	struct amdgpu_vm *vm;
123 
124 	/**
125 	 * @cb: callback
126 	 */
127 	struct dma_fence_cb cb;
128 };
129 
130 /**
131  * amdgpu_vm_assert_locked - check if VM is correctly locked
132  * @vm: the VM which schould be tested
133  *
134  * Asserts that the VM root PD is locked.
135  */
136 static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
137 {
138 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
139 }
140 
141 /* Initialize the amdgpu_vm_bo_status object */
142 static void amdgpu_vm_bo_status_init(struct amdgpu_vm_bo_status *lists)
143 {
144 	INIT_LIST_HEAD(&lists->evicted);
145 	INIT_LIST_HEAD(&lists->moved);
146 	INIT_LIST_HEAD(&lists->idle);
147 }
148 
149 /*
150  * Make sure we have the lock to modify the vm_bo status and return the object
151  * with the status lists.
152  */
153 static struct amdgpu_vm_bo_status *
154 amdgpu_vm_bo_lock_lists(struct amdgpu_vm_bo_base *vm_bo)
155 {
156 	struct amdgpu_vm *vm = vm_bo->vm;
157 	struct amdgpu_bo *bo = vm_bo->bo;
158 
159 	if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
160 		/* No extra locking needed, protected by the root PD resv lock */
161 		amdgpu_vm_assert_locked(vm);
162 
163 		if (bo->tbo.type == ttm_bo_type_kernel)
164 			return &vm->kernel;
165 
166 		return &vm->always_valid;
167 	}
168 
169 	spin_lock(&vm_bo->vm->individual_lock);
170 	return &vm->individual;
171 }
172 
173 /* Eventually unlock the status list lock again */
174 static void amdgpu_vm_bo_unlock_lists(struct amdgpu_vm_bo_base *vm_bo)
175 {
176 	if (amdgpu_vm_is_bo_always_valid(vm_bo->vm, vm_bo->bo))
177 		amdgpu_vm_assert_locked(vm_bo->vm);
178 	else
179 		spin_unlock(&vm_bo->vm->individual_lock);
180 }
181 
182 /**
183  * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
184  *
185  * @vm: VM to test against.
186  * @bo: BO to be tested.
187  *
188  * Returns true if the BO shares the dma_resv object with the root PD and is
189  * always guaranteed to be valid inside the VM.
190  */
191 bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
192 {
193 	return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
194 }
195 
196 /**
197  * amdgpu_vm_bo_evicted - vm_bo is evicted
198  *
199  * @vm_bo: vm_bo which is evicted
200  *
201  * State for vm_bo objects meaning the underlying BO was evicted and need to
202  * move in place again.
203  */
204 static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
205 {
206 	struct amdgpu_vm_bo_status *lists;
207 
208 	lists = amdgpu_vm_bo_lock_lists(vm_bo);
209 	vm_bo->moved = true;
210 	list_move(&vm_bo->vm_status, &lists->evicted);
211 	amdgpu_vm_bo_unlock_lists(vm_bo);
212 }
213 /**
214  * amdgpu_vm_bo_moved - vm_bo is moved
215  *
216  * @vm_bo: vm_bo which is moved
217  *
218  * State for vm_bo objects meaning the underlying BO was moved but the new
219  * location not yet reflected in the page tables.
220  */
221 static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
222 {
223 	struct amdgpu_vm_bo_status *lists;
224 	struct amdgpu_bo *bo = vm_bo->bo;
225 
226 	/*
227 	 * The root PD doesn't have a parent PDE and goes directly into the
228 	 * idle state.
229 	 */
230 	lists = amdgpu_vm_bo_lock_lists(vm_bo);
231 	if (bo && bo->tbo.type == ttm_bo_type_kernel && !bo->parent) {
232 		vm_bo->moved = false;
233 		list_move(&vm_bo->vm_status, &lists->idle);
234 	} else {
235 		vm_bo->moved = true;
236 		list_move(&vm_bo->vm_status, &lists->moved);
237 	}
238 	amdgpu_vm_bo_unlock_lists(vm_bo);
239 }
240 
241 /**
242  * amdgpu_vm_bo_idle - vm_bo is idle
243  *
244  * @vm_bo: vm_bo which is now idle
245  *
246  * State for vm_bo objects meaning we are done with the state machine and no
247  * further action is necessary.
248  */
249 static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
250 {
251 	struct amdgpu_vm_bo_status *lists;
252 
253 	lists = amdgpu_vm_bo_lock_lists(vm_bo);
254 	if (!amdgpu_vm_is_bo_always_valid(vm_bo->vm, vm_bo->bo))
255 		vm_bo->moved = false;
256 	list_move(&vm_bo->vm_status, &lists->idle);
257 	amdgpu_vm_bo_unlock_lists(vm_bo);
258 }
259 
260 /**
261  * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
262  * @vm: the VM which state machine to reset
263  *
264  * Move all vm_bo object in the VM into a state where their location will be
265  * updated in the page tables again.
266  */
267 static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
268 {
269 	amdgpu_vm_assert_locked(vm);
270 	list_splice_init(&vm->kernel.idle, &vm->kernel.moved);
271 	list_splice_init(&vm->always_valid.idle, &vm->always_valid.moved);
272 
273 	spin_lock(&vm->individual_lock);
274 	list_splice_init(&vm->individual.idle, &vm->individual.moved);
275 	spin_unlock(&vm->individual_lock);
276 }
277 
278 /**
279  * amdgpu_vm_update_shared - helper to update shared memory stat
280  * @base: base structure for tracking BO usage in a VM
281  *
282  * Takes the vm stats_lock and updates the shared memory stat. If the basic
283  * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
284  * as well.
285  */
286 static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
287 {
288 	struct amdgpu_vm *vm = base->vm;
289 	struct amdgpu_bo *bo = base->bo;
290 	uint64_t size = amdgpu_bo_size(bo);
291 	uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
292 	bool shared;
293 
294 	dma_resv_assert_held(bo->tbo.base.resv);
295 	spin_lock(&vm->stats_lock);
296 	shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
297 	if (base->shared != shared) {
298 		base->shared = shared;
299 		if (shared) {
300 			vm->stats[bo_memtype].drm.shared += size;
301 			vm->stats[bo_memtype].drm.private -= size;
302 		} else {
303 			vm->stats[bo_memtype].drm.shared -= size;
304 			vm->stats[bo_memtype].drm.private += size;
305 		}
306 	}
307 	spin_unlock(&vm->stats_lock);
308 }
309 
310 /**
311  * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
312  * @bo: amdgpu buffer object
313  *
314  * Update the per VM stats for all the vm if needed from private to shared or
315  * vice versa.
316  */
317 void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
318 {
319 	struct amdgpu_vm_bo_base *base;
320 
321 	for (base = bo->vm_bo; base; base = base->next)
322 		amdgpu_vm_update_shared(base);
323 }
324 
325 /**
326  * amdgpu_vm_update_stats_locked - helper to update normal memory stat
327  * @base: base structure for tracking BO usage in a VM
328  * @res:  the ttm_resource to use for the purpose of accounting, may or may not
329  *        be bo->tbo.resource
330  * @sign: if we should add (+1) or subtract (-1) from the stat
331  *
332  * Caller need to have the vm stats_lock held. Useful for when multiple update
333  * need to happen at the same time.
334  */
335 static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
336 					  struct ttm_resource *res, int sign)
337 {
338 	struct amdgpu_vm *vm = base->vm;
339 	struct amdgpu_bo *bo = base->bo;
340 	int64_t size = sign * amdgpu_bo_size(bo);
341 	uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
342 
343 	/* For drm-total- and drm-shared-, BO are accounted by their preferred
344 	 * placement, see also amdgpu_bo_mem_stats_placement.
345 	 */
346 	if (base->shared)
347 		vm->stats[bo_memtype].drm.shared += size;
348 	else
349 		vm->stats[bo_memtype].drm.private += size;
350 
351 	if (res && res->mem_type < __AMDGPU_PL_NUM) {
352 		uint32_t res_memtype = res->mem_type;
353 
354 		vm->stats[res_memtype].drm.resident += size;
355 		/* BO only count as purgeable if it is resident,
356 		 * since otherwise there's nothing to purge.
357 		 */
358 		if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
359 			vm->stats[res_memtype].drm.purgeable += size;
360 		if (!(bo->preferred_domains &
361 		      amdgpu_mem_type_to_domain(res_memtype)))
362 			vm->stats[bo_memtype].evicted += size;
363 	}
364 }
365 
366 /**
367  * amdgpu_vm_update_stats - helper to update normal memory stat
368  * @base: base structure for tracking BO usage in a VM
369  * @res:  the ttm_resource to use for the purpose of accounting, may or may not
370  *        be bo->tbo.resource
371  * @sign: if we should add (+1) or subtract (-1) from the stat
372  *
373  * Updates the basic memory stat when bo is added/deleted/moved.
374  */
375 void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
376 			    struct ttm_resource *res, int sign)
377 {
378 	struct amdgpu_vm *vm = base->vm;
379 
380 	spin_lock(&vm->stats_lock);
381 	amdgpu_vm_update_stats_locked(base, res, sign);
382 	spin_unlock(&vm->stats_lock);
383 }
384 
385 /**
386  * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
387  *
388  * @base: base structure for tracking BO usage in a VM
389  * @vm: vm to which bo is to be added
390  * @bo: amdgpu buffer object
391  *
392  * Initialize a bo_va_base structure and add it to the appropriate lists
393  *
394  */
395 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
396 			    struct amdgpu_vm *vm, struct amdgpu_bo *bo)
397 {
398 	base->vm = vm;
399 	base->bo = bo;
400 	base->next = NULL;
401 	INIT_LIST_HEAD(&base->vm_status);
402 
403 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
404 	if (!bo)
405 		return;
406 
407 	base->next = bo->vm_bo;
408 	bo->vm_bo = base;
409 
410 	spin_lock(&vm->stats_lock);
411 	base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
412 	amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
413 	spin_unlock(&vm->stats_lock);
414 
415 	if (!amdgpu_vm_is_bo_always_valid(vm, bo)) {
416 		amdgpu_vm_bo_idle(base);
417 		return;
418 	}
419 
420 	ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
421 
422 	/*
423 	 * When a per VM isn't in the desired domain put it into the evicted
424 	 * state to make sure that it gets validated on the next best occasion.
425 	 */
426 	if (bo->preferred_domains &
427 	    amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
428 		amdgpu_vm_bo_moved(base);
429 	else
430 		amdgpu_vm_bo_evicted(base);
431 }
432 
433 /**
434  * amdgpu_vm_lock_pd - lock PD in drm_exec
435  *
436  * @vm: vm providing the BOs
437  * @exec: drm execution context
438  * @num_fences: number of extra fences to reserve
439  *
440  * Lock the VM root PD in the DRM execution context.
441  */
442 int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
443 		      unsigned int num_fences)
444 {
445 	/* We need at least two fences for the VM PD/PT updates */
446 	return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
447 				    2 + num_fences);
448 }
449 
450 /**
451  * amdgpu_vm_lock_individual - lock all BOs on the individual idle list
452  * @vm: vm providing the BOs
453  * @exec: drm execution context
454  * @num_fences: number of extra fences to reserve
455  *
456  * Lock the BOs on the individual idle list in the DRM execution context.
457  */
458 int amdgpu_vm_lock_individual(struct amdgpu_vm *vm, struct drm_exec *exec,
459 			      unsigned int num_fences)
460 {
461 	struct list_head *prev = &vm->individual.idle;
462 	struct amdgpu_bo_va *bo_va;
463 	struct amdgpu_bo *bo;
464 	int ret;
465 
466 	/* We can only trust prev->next while holding the lock */
467 	spin_lock(&vm->individual_lock);
468 	while (!list_is_head(prev->next, &vm->individual.idle)) {
469 		bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
470 
471 		bo = bo_va->base.bo;
472 		if (bo) {
473 			amdgpu_bo_ref(bo);
474 			spin_unlock(&vm->individual_lock);
475 
476 			ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
477 			amdgpu_bo_unref(&bo);
478 			if (unlikely(ret))
479 				return ret;
480 
481 			spin_lock(&vm->individual_lock);
482 		}
483 		prev = prev->next;
484 	}
485 	spin_unlock(&vm->individual_lock);
486 
487 	return 0;
488 }
489 
490 /**
491  * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
492  *
493  * @adev: amdgpu device pointer
494  * @vm: vm providing the BOs
495  *
496  * Move all BOs to the end of LRU and remember their positions to put them
497  * together.
498  */
499 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
500 				struct amdgpu_vm *vm)
501 {
502 	spin_lock(&adev->mman.bdev.lru_lock);
503 	ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
504 	spin_unlock(&adev->mman.bdev.lru_lock);
505 }
506 
507 /* Create scheduler entities for page table updates */
508 static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
509 				   struct amdgpu_vm *vm)
510 {
511 	int r;
512 
513 	r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
514 				  adev->vm_manager.vm_pte_scheds,
515 				  adev->vm_manager.vm_pte_num_scheds, NULL);
516 	if (r)
517 		goto error;
518 
519 	return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
520 				     adev->vm_manager.vm_pte_scheds,
521 				     adev->vm_manager.vm_pte_num_scheds, NULL);
522 
523 error:
524 	drm_sched_entity_destroy(&vm->immediate);
525 	return r;
526 }
527 
528 /* Destroy the entities for page table updates again */
529 static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
530 {
531 	drm_sched_entity_destroy(&vm->immediate);
532 	drm_sched_entity_destroy(&vm->delayed);
533 }
534 
535 /**
536  * amdgpu_vm_generation - return the page table re-generation counter
537  * @adev: the amdgpu_device
538  * @vm: optional VM to check, might be NULL
539  *
540  * Returns a page table re-generation token to allow checking if submissions
541  * are still valid to use this VM. The VM parameter might be NULL in which case
542  * just the VRAM lost counter will be used.
543  */
544 uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
545 {
546 	uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
547 
548 	if (!vm)
549 		return result;
550 
551 	result += lower_32_bits(vm->generation);
552 	/* Add one if the page tables will be re-generated on next CS */
553 	if (drm_sched_entity_error(&vm->delayed))
554 		++result;
555 
556 	return result;
557 }
558 
559 /**
560  * amdgpu_vm_validate - validate evicted BOs tracked in the VM
561  *
562  * @adev: amdgpu device pointer
563  * @vm: vm providing the BOs
564  * @ticket: optional reservation ticket used to reserve the VM
565  * @validate: callback to do the validation
566  * @param: parameter for the validation callback
567  *
568  * Validate the page table BOs and per-VM BOs on command submission if
569  * necessary. If a ticket is given, also try to validate evicted user queue
570  * BOs. They must already be reserved with the given ticket.
571  *
572  * Returns:
573  * Validation result.
574  */
575 int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
576 		       struct ww_acquire_ctx *ticket,
577 		       int (*validate)(void *p, struct amdgpu_bo *bo),
578 		       void *param)
579 {
580 	uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
581 	struct amdgpu_vm_bo_base *bo_base, *tmp;
582 	int r;
583 
584 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
585 	if (vm->generation != new_vm_generation) {
586 		vm->generation = new_vm_generation;
587 		amdgpu_vm_bo_reset_state_machine(vm);
588 		amdgpu_vm_fini_entities(vm);
589 		r = amdgpu_vm_init_entities(adev, vm);
590 		if (r)
591 			return r;
592 	}
593 
594 	list_for_each_entry_safe(bo_base, tmp, &vm->kernel.evicted, vm_status) {
595 		r = validate(param, bo_base->bo);
596 		if (r)
597 			return r;
598 
599 		vm->update_funcs->map_table(to_amdgpu_bo_vm(bo_base->bo));
600 		amdgpu_vm_bo_moved(bo_base);
601 	}
602 
603 	/*
604 	 * As soon as all page tables are in place we can start updating them
605 	 * again.
606 	 */
607 	amdgpu_vm_eviction_lock(vm);
608 	vm->evicting = false;
609 	amdgpu_vm_eviction_unlock(vm);
610 
611 	list_for_each_entry_safe(bo_base, tmp, &vm->always_valid.evicted,
612 				 vm_status) {
613 		r = validate(param, bo_base->bo);
614 		if (r)
615 			return r;
616 
617 		amdgpu_vm_bo_moved(bo_base);
618 	}
619 
620 	if (!ticket)
621 		return 0;
622 
623 	spin_lock(&vm->individual_lock);
624 restart:
625 	list_for_each_entry(bo_base, &vm->individual.evicted, vm_status) {
626 		struct amdgpu_bo *bo = bo_base->bo;
627 
628 		if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket)
629 			continue;
630 
631 		spin_unlock(&vm->individual_lock);
632 
633 		r = validate(param, bo);
634 		if (r)
635 			return r;
636 
637 		amdgpu_vm_bo_moved(bo_base);
638 
639 		/* It's a bit inefficient to always jump back to the start, but
640 		 * we would need to re-structure the KFD for properly fixing
641 		 * that.
642 		 */
643 		spin_lock(&vm->individual_lock);
644 		goto restart;
645 	}
646 	spin_unlock(&vm->individual_lock);
647 
648 	return 0;
649 }
650 
651 /**
652  * amdgpu_vm_ready - check VM is ready for updates
653  *
654  * @vm: VM to check
655  *
656  * Check if all VM PDs/PTs are ready for updates
657  *
658  * Returns:
659  * True if VM is not evicting and all VM entities are not stopped
660  */
661 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
662 {
663 	bool ret;
664 
665 	amdgpu_vm_assert_locked(vm);
666 
667 	amdgpu_vm_eviction_lock(vm);
668 	ret = !vm->evicting;
669 	amdgpu_vm_eviction_unlock(vm);
670 
671 	ret &= list_empty(&vm->kernel.evicted);
672 
673 	spin_lock(&vm->immediate.lock);
674 	ret &= !vm->immediate.stopped;
675 	spin_unlock(&vm->immediate.lock);
676 
677 	spin_lock(&vm->delayed.lock);
678 	ret &= !vm->delayed.stopped;
679 	spin_unlock(&vm->delayed.lock);
680 
681 	return ret;
682 }
683 
684 /**
685  * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
686  *
687  * @adev: amdgpu_device pointer
688  */
689 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
690 {
691 	const struct amdgpu_ip_block *ip_block;
692 	bool has_compute_vm_bug;
693 	struct amdgpu_ring *ring;
694 	int i;
695 
696 	has_compute_vm_bug = false;
697 
698 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
699 	if (ip_block) {
700 		/* Compute has a VM bug for GFX version < 7.
701 		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
702 		if (ip_block->version->major <= 7)
703 			has_compute_vm_bug = true;
704 		else if (ip_block->version->major == 8)
705 			if (adev->gfx.mec_fw_version < 673)
706 				has_compute_vm_bug = true;
707 	}
708 
709 	for (i = 0; i < adev->num_rings; i++) {
710 		ring = adev->rings[i];
711 		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
712 			/* only compute rings */
713 			ring->has_compute_vm_bug = has_compute_vm_bug;
714 		else
715 			ring->has_compute_vm_bug = false;
716 	}
717 }
718 
719 /**
720  * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
721  *
722  * @ring: ring on which the job will be submitted
723  * @job: job to submit
724  *
725  * Returns:
726  * True if sync is needed.
727  */
728 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
729 				  struct amdgpu_job *job)
730 {
731 	struct amdgpu_device *adev = ring->adev;
732 	unsigned vmhub = ring->vm_hub;
733 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
734 
735 	if (job->vmid == 0)
736 		return false;
737 
738 	if (job->vm_needs_flush || ring->has_compute_vm_bug)
739 		return true;
740 
741 	if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
742 		return true;
743 
744 	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
745 		return true;
746 
747 	return false;
748 }
749 
750 /**
751  * amdgpu_vm_flush - hardware flush the vm
752  *
753  * @ring: ring to use for flush
754  * @job:  related job
755  * @need_pipe_sync: is pipe sync needed
756  *
757  * Emit a VM flush when it is necessary.
758  */
759 void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
760 		     bool need_pipe_sync)
761 {
762 	struct amdgpu_device *adev = ring->adev;
763 	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
764 	unsigned vmhub = ring->vm_hub;
765 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
766 	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
767 	bool spm_update_needed = job->spm_update_needed;
768 	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
769 		job->gds_switch_needed;
770 	bool vm_flush_needed = job->vm_needs_flush;
771 	bool cleaner_shader_needed = false;
772 	bool pasid_mapping_needed = false;
773 	struct dma_fence *fence = NULL;
774 	unsigned int patch = 0;
775 
776 	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
777 		gds_switch_needed = true;
778 		vm_flush_needed = true;
779 		pasid_mapping_needed = true;
780 		spm_update_needed = true;
781 	}
782 
783 	mutex_lock(&id_mgr->lock);
784 	if (id->pasid != job->pasid || !id->pasid_mapping ||
785 	    !dma_fence_is_signaled(id->pasid_mapping))
786 		pasid_mapping_needed = true;
787 	mutex_unlock(&id_mgr->lock);
788 
789 	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
790 	vm_flush_needed &= !!ring->funcs->emit_vm_flush  &&
791 			job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
792 	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
793 		ring->funcs->emit_wreg;
794 
795 	cleaner_shader_needed = job->run_cleaner_shader &&
796 		adev->gfx.enable_cleaner_shader &&
797 		ring->funcs->emit_cleaner_shader && job->base.s_fence &&
798 		&job->base.s_fence->scheduled == isolation->spearhead;
799 
800 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
801 	    !cleaner_shader_needed)
802 		return;
803 
804 	amdgpu_ring_ib_begin(ring);
805 
806 	/* There is no matching insert_end for this on purpose for the vm flush.
807 	 * The IB portion of the submission has both.  Having multiple
808 	 * insert_start sequences is ok, but you can only have one insert_end
809 	 * per submission based on the way VCN FW works.  For JPEG
810 	 * you can as many insert_start and insert_end sequences as you like as
811 	 * long as the rest of the packets come between start and end sequences.
812 	 */
813 	if (ring->funcs->insert_start)
814 		ring->funcs->insert_start(ring);
815 
816 	if (ring->funcs->init_cond_exec)
817 		patch = amdgpu_ring_init_cond_exec(ring,
818 						   ring->cond_exe_gpu_addr);
819 
820 	if (need_pipe_sync)
821 		amdgpu_ring_emit_pipeline_sync(ring);
822 
823 	if (cleaner_shader_needed)
824 		ring->funcs->emit_cleaner_shader(ring);
825 
826 	if (vm_flush_needed) {
827 		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
828 		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
829 	}
830 
831 	if (pasid_mapping_needed)
832 		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
833 
834 	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
835 		adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid);
836 
837 	if (ring->funcs->emit_gds_switch &&
838 	    gds_switch_needed) {
839 		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
840 					    job->gds_size, job->gws_base,
841 					    job->gws_size, job->oa_base,
842 					    job->oa_size);
843 	}
844 
845 	if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
846 		amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
847 		fence = &job->hw_vm_fence->base;
848 		/* get a ref for the job */
849 		dma_fence_get(fence);
850 	}
851 
852 	if (vm_flush_needed) {
853 		mutex_lock(&id_mgr->lock);
854 		dma_fence_put(id->last_flush);
855 		id->last_flush = dma_fence_get(fence);
856 		id->current_gpu_reset_count =
857 			atomic_read(&adev->gpu_reset_counter);
858 		mutex_unlock(&id_mgr->lock);
859 	}
860 
861 	if (pasid_mapping_needed) {
862 		mutex_lock(&id_mgr->lock);
863 		id->pasid = job->pasid;
864 		dma_fence_put(id->pasid_mapping);
865 		id->pasid_mapping = dma_fence_get(fence);
866 		mutex_unlock(&id_mgr->lock);
867 	}
868 
869 	/*
870 	 * Make sure that all other submissions wait for the cleaner shader to
871 	 * finish before we push them to the HW.
872 	 */
873 	if (cleaner_shader_needed) {
874 		trace_amdgpu_cleaner_shader(ring, fence);
875 		mutex_lock(&adev->enforce_isolation_mutex);
876 		dma_fence_put(isolation->spearhead);
877 		isolation->spearhead = dma_fence_get(fence);
878 		mutex_unlock(&adev->enforce_isolation_mutex);
879 	}
880 	dma_fence_put(fence);
881 
882 	amdgpu_ring_patch_cond_exec(ring, patch);
883 
884 	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
885 	if (ring->funcs->emit_switch_buffer) {
886 		amdgpu_ring_emit_switch_buffer(ring);
887 		amdgpu_ring_emit_switch_buffer(ring);
888 	}
889 
890 	amdgpu_ring_ib_end(ring);
891 }
892 
893 /**
894  * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
895  *
896  * @vm: requested vm
897  * @bo: requested buffer object
898  *
899  * Find @bo inside the requested vm.
900  * Search inside the @bos vm list for the requested vm
901  * Returns the found bo_va or NULL if none is found
902  *
903  * Object has to be reserved!
904  *
905  * Returns:
906  * Found bo_va or NULL.
907  */
908 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
909 				       struct amdgpu_bo *bo)
910 {
911 	struct amdgpu_vm_bo_base *base;
912 
913 	for (base = bo->vm_bo; base; base = base->next) {
914 		if (base->vm != vm)
915 			continue;
916 
917 		return container_of(base, struct amdgpu_bo_va, base);
918 	}
919 	return NULL;
920 }
921 
922 /**
923  * amdgpu_vm_map_gart - Resolve gart mapping of addr
924  *
925  * @pages_addr: optional DMA address to use for lookup
926  * @addr: the unmapped addr
927  *
928  * Look up the physical address of the page that the pte resolves
929  * to.
930  *
931  * Returns:
932  * The pointer for the page table entry.
933  */
934 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
935 {
936 	uint64_t result;
937 
938 	/* page table offset */
939 	result = pages_addr[addr >> PAGE_SHIFT];
940 
941 	/* in case cpu page size != gpu page size*/
942 	result |= addr & (~PAGE_MASK);
943 
944 	result &= 0xFFFFFFFFFFFFF000ULL;
945 
946 	return result;
947 }
948 
949 /**
950  * amdgpu_vm_update_pdes - make sure that all directories are valid
951  *
952  * @adev: amdgpu_device pointer
953  * @vm: requested vm
954  * @immediate: submit immediately to the paging queue
955  *
956  * Makes sure all directories are up to date.
957  *
958  * Returns:
959  * 0 for success, error for failure.
960  */
961 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
962 			  struct amdgpu_vm *vm, bool immediate)
963 {
964 	struct amdgpu_vm_update_params params;
965 	struct amdgpu_vm_bo_base *entry, *tmp;
966 	bool flush_tlb_needed = false;
967 	int r, idx;
968 
969 	amdgpu_vm_assert_locked(vm);
970 
971 	if (list_empty(&vm->kernel.moved))
972 		return 0;
973 
974 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
975 		return -ENODEV;
976 
977 	memset(&params, 0, sizeof(params));
978 	params.adev = adev;
979 	params.vm = vm;
980 	params.immediate = immediate;
981 
982 	r = vm->update_funcs->prepare(&params, NULL,
983 				      AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
984 	if (r)
985 		goto error;
986 
987 	list_for_each_entry(entry, &vm->kernel.moved, vm_status) {
988 		/* vm_flush_needed after updating moved PDEs */
989 		flush_tlb_needed |= entry->moved;
990 
991 		r = amdgpu_vm_pde_update(&params, entry);
992 		if (r)
993 			goto error;
994 	}
995 
996 	r = vm->update_funcs->commit(&params, &vm->last_update);
997 	if (r)
998 		goto error;
999 
1000 	if (flush_tlb_needed)
1001 		atomic64_inc(&vm->tlb_seq);
1002 
1003 	list_for_each_entry_safe(entry, tmp, &vm->kernel.moved, vm_status)
1004 		amdgpu_vm_bo_idle(entry);
1005 
1006 error:
1007 	drm_dev_exit(idx);
1008 	return r;
1009 }
1010 
1011 /**
1012  * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
1013  * @fence: unused
1014  * @cb: the callback structure
1015  *
1016  * Increments the tlb sequence to make sure that future CS execute a VM flush.
1017  */
1018 static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
1019 				 struct dma_fence_cb *cb)
1020 {
1021 	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
1022 
1023 	tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
1024 	atomic64_inc(&tlb_cb->vm->tlb_seq);
1025 	kfree(tlb_cb);
1026 }
1027 
1028 /**
1029  * amdgpu_vm_tlb_flush - prepare TLB flush
1030  *
1031  * @params: parameters for update
1032  * @fence: input fence to sync TLB flush with
1033  * @tlb_cb: the callback structure
1034  *
1035  * Increments the tlb sequence to make sure that future CS execute a VM flush.
1036  */
1037 static void
1038 amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
1039 		    struct dma_fence **fence,
1040 		    struct amdgpu_vm_tlb_seq_struct *tlb_cb)
1041 {
1042 	struct amdgpu_vm *vm = params->vm;
1043 
1044 	tlb_cb->vm = vm;
1045 	if (!fence || !*fence) {
1046 		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
1047 		return;
1048 	}
1049 
1050 	if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
1051 				    amdgpu_vm_tlb_seq_cb)) {
1052 		dma_fence_put(vm->last_tlb_flush);
1053 		vm->last_tlb_flush = dma_fence_get(*fence);
1054 	} else {
1055 		amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
1056 	}
1057 
1058 	/* Prepare a TLB flush fence to be attached to PTs */
1059 	/* The check for need_tlb_fence should be dropped once we
1060 	 * sort out the issues with KIQ/MES TLB invalidation timeouts.
1061 	 */
1062 	if (!params->unlocked && vm->need_tlb_fence) {
1063 		amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
1064 
1065 		/* Makes sure no PD/PT is freed before the flush */
1066 		dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
1067 				   DMA_RESV_USAGE_BOOKKEEP);
1068 	}
1069 }
1070 
1071 /**
1072  * amdgpu_vm_update_range - update a range in the vm page table
1073  *
1074  * @adev: amdgpu_device pointer to use for commands
1075  * @vm: the VM to update the range
1076  * @immediate: immediate submission in a page fault
1077  * @unlocked: unlocked invalidation during MM callback
1078  * @flush_tlb: trigger tlb invalidation after update completed
1079  * @allow_override: change MTYPE for local NUMA nodes
1080  * @sync: fences we need to sync to
1081  * @start: start of mapped range
1082  * @last: last mapped entry
1083  * @flags: flags for the entries
1084  * @offset: offset into nodes and pages_addr
1085  * @vram_base: base for vram mappings
1086  * @res: ttm_resource to map
1087  * @pages_addr: DMA addresses to use for mapping
1088  * @fence: optional resulting fence
1089  *
1090  * Fill in the page table entries between @start and @last.
1091  *
1092  * Returns:
1093  * 0 for success, negative erro code for failure.
1094  */
1095 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1096 			   bool immediate, bool unlocked, bool flush_tlb,
1097 			   bool allow_override, struct amdgpu_sync *sync,
1098 			   uint64_t start, uint64_t last, uint64_t flags,
1099 			   uint64_t offset, uint64_t vram_base,
1100 			   struct ttm_resource *res, dma_addr_t *pages_addr,
1101 			   struct dma_fence **fence)
1102 {
1103 	struct amdgpu_vm_tlb_seq_struct *tlb_cb;
1104 	struct amdgpu_vm_update_params params;
1105 	struct amdgpu_res_cursor cursor;
1106 	int r, idx;
1107 
1108 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
1109 		return -ENODEV;
1110 
1111 	tlb_cb = kmalloc_obj(*tlb_cb);
1112 	if (!tlb_cb) {
1113 		drm_dev_exit(idx);
1114 		return -ENOMEM;
1115 	}
1116 
1117 	/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
1118 	 * heavy-weight flush TLB unconditionally.
1119 	 */
1120 	flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
1121 		     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
1122 
1123 	/*
1124 	 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
1125 	 */
1126 	flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
1127 
1128 	memset(&params, 0, sizeof(params));
1129 	params.adev = adev;
1130 	params.vm = vm;
1131 	params.immediate = immediate;
1132 	params.pages_addr = pages_addr;
1133 	params.unlocked = unlocked;
1134 	params.needs_flush = flush_tlb;
1135 	params.override_pte = allow_override && adev->gmc.override_pte;
1136 	INIT_LIST_HEAD(&params.tlb_flush_waitlist);
1137 
1138 	amdgpu_vm_eviction_lock(vm);
1139 	if (vm->evicting) {
1140 		r = -EBUSY;
1141 		goto error_free;
1142 	}
1143 
1144 	if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
1145 		struct dma_fence *tmp = dma_fence_get_stub();
1146 
1147 		amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
1148 		swap(vm->last_unlocked, tmp);
1149 		dma_fence_put(tmp);
1150 	}
1151 
1152 	r = vm->update_funcs->prepare(&params, sync,
1153 				      AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
1154 	if (r)
1155 		goto error_free;
1156 
1157 	amdgpu_res_first(pages_addr ? NULL : res, offset,
1158 			 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
1159 	while (cursor.remaining) {
1160 		uint64_t tmp, num_entries, addr;
1161 
1162 		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
1163 		if (pages_addr) {
1164 			bool contiguous = true;
1165 
1166 			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
1167 				uint64_t pfn = cursor.start >> PAGE_SHIFT;
1168 				uint64_t count;
1169 
1170 				contiguous = pages_addr[pfn + 1] ==
1171 					pages_addr[pfn] + PAGE_SIZE;
1172 
1173 				tmp = num_entries /
1174 					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1175 				for (count = 2; count < tmp; ++count) {
1176 					uint64_t idx = pfn + count;
1177 
1178 					if (contiguous != (pages_addr[idx] ==
1179 					    pages_addr[idx - 1] + PAGE_SIZE))
1180 						break;
1181 				}
1182 				if (!contiguous)
1183 					count--;
1184 				num_entries = count *
1185 					AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1186 			}
1187 
1188 			if (!contiguous) {
1189 				addr = cursor.start;
1190 				params.pages_addr = pages_addr;
1191 			} else {
1192 				addr = pages_addr[cursor.start >> PAGE_SHIFT];
1193 				params.pages_addr = NULL;
1194 			}
1195 
1196 		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
1197 			addr = vram_base + cursor.start;
1198 		} else {
1199 			addr = 0;
1200 		}
1201 
1202 		tmp = start + num_entries;
1203 		r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
1204 		if (r)
1205 			goto error_free;
1206 
1207 		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
1208 		start = tmp;
1209 	}
1210 
1211 	r = vm->update_funcs->commit(&params, fence);
1212 	if (r)
1213 		goto error_free;
1214 
1215 	if (params.needs_flush) {
1216 		amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
1217 		tlb_cb = NULL;
1218 	}
1219 
1220 	amdgpu_vm_pt_free_list(adev, &params);
1221 
1222 error_free:
1223 	kfree(tlb_cb);
1224 	amdgpu_vm_eviction_unlock(vm);
1225 	drm_dev_exit(idx);
1226 	return r;
1227 }
1228 
1229 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
1230 			  struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
1231 {
1232 	spin_lock(&vm->stats_lock);
1233 	memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
1234 	spin_unlock(&vm->stats_lock);
1235 }
1236 
1237 /**
1238  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1239  *
1240  * @adev: amdgpu_device pointer
1241  * @bo_va: requested BO and VM object
1242  * @clear: if true clear the entries
1243  *
1244  * Fill in the page table entries for @bo_va.
1245  *
1246  * Returns:
1247  * 0 for success, -EINVAL for failure.
1248  */
1249 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
1250 			bool clear)
1251 {
1252 	struct amdgpu_bo *bo = bo_va->base.bo;
1253 	struct amdgpu_vm *vm = bo_va->base.vm;
1254 	struct amdgpu_bo_va_mapping *mapping;
1255 	struct dma_fence **last_update;
1256 	dma_addr_t *pages_addr = NULL;
1257 	struct ttm_resource *mem;
1258 	struct amdgpu_sync sync;
1259 	bool flush_tlb = clear;
1260 	uint64_t vram_base;
1261 	uint64_t flags;
1262 	bool uncached;
1263 	int r;
1264 
1265 	amdgpu_sync_create(&sync);
1266 	if (clear) {
1267 		mem = NULL;
1268 
1269 		/* Implicitly sync to command submissions in the same VM before
1270 		 * unmapping.
1271 		 */
1272 		r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
1273 				     AMDGPU_SYNC_EQ_OWNER, vm);
1274 		if (r)
1275 			goto error_free;
1276 		if (bo) {
1277 			r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
1278 			if (r)
1279 				goto error_free;
1280 		}
1281 	} else if (!bo) {
1282 		mem = NULL;
1283 
1284 		/* PRT map operations don't need to sync to anything. */
1285 
1286 	} else {
1287 		struct drm_gem_object *obj = &bo->tbo.base;
1288 
1289 		if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
1290 			struct dma_buf *dma_buf = obj->import_attach->dmabuf;
1291 			struct drm_gem_object *gobj = dma_buf->priv;
1292 			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
1293 
1294 			if (abo->tbo.resource &&
1295 			    abo->tbo.resource->mem_type == TTM_PL_VRAM)
1296 				bo = gem_to_amdgpu_bo(gobj);
1297 		}
1298 		mem = bo->tbo.resource;
1299 		if (mem && (mem->mem_type == TTM_PL_TT ||
1300 			    mem->mem_type == AMDGPU_PL_PREEMPT))
1301 			pages_addr = bo->tbo.ttm->dma_address;
1302 
1303 		/* Implicitly sync to moving fences before mapping anything */
1304 		r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
1305 				     AMDGPU_SYNC_EXPLICIT, vm);
1306 		if (r)
1307 			goto error_free;
1308 	}
1309 
1310 	if (bo) {
1311 		struct amdgpu_device *bo_adev;
1312 
1313 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1314 
1315 		if (amdgpu_bo_encrypted(bo))
1316 			flags |= AMDGPU_PTE_TMZ;
1317 
1318 		bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
1319 		vram_base = bo_adev->vm_manager.vram_base_offset;
1320 		uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
1321 	} else {
1322 		flags = 0x0;
1323 		vram_base = 0;
1324 		uncached = false;
1325 	}
1326 
1327 	if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
1328 		last_update = &vm->last_update;
1329 	else
1330 		last_update = &bo_va->last_pt_update;
1331 
1332 	if (!clear && bo_va->base.moved) {
1333 		flush_tlb = true;
1334 		list_splice_init(&bo_va->valids, &bo_va->invalids);
1335 
1336 	} else if (bo_va->cleared != clear) {
1337 		list_splice_init(&bo_va->valids, &bo_va->invalids);
1338 	}
1339 
1340 	list_for_each_entry(mapping, &bo_va->invalids, list) {
1341 		uint64_t update_flags = flags;
1342 
1343 		/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1344 		 * but in case of something, we filter the flags in first place
1345 		 */
1346 		if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
1347 			update_flags &= ~AMDGPU_PTE_READABLE;
1348 		if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
1349 			update_flags &= ~AMDGPU_PTE_WRITEABLE;
1350 
1351 		/* Apply ASIC specific mapping flags */
1352 		amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
1353 				      &update_flags);
1354 
1355 		trace_amdgpu_vm_bo_update(mapping);
1356 
1357 		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
1358 					   !uncached, &sync, mapping->start,
1359 					   mapping->last, update_flags,
1360 					   mapping->offset, vram_base, mem,
1361 					   pages_addr, last_update);
1362 		if (r)
1363 			goto error_free;
1364 	}
1365 
1366 	/* If the BO is not in its preferred location add it back to
1367 	 * the evicted list so that it gets validated again on the
1368 	 * next command submission.
1369 	 */
1370 	if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
1371 		if (bo->tbo.resource &&
1372 		    !(bo->preferred_domains &
1373 		      amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
1374 			amdgpu_vm_bo_evicted(&bo_va->base);
1375 		else
1376 			amdgpu_vm_bo_idle(&bo_va->base);
1377 	} else {
1378 		amdgpu_vm_bo_idle(&bo_va->base);
1379 	}
1380 
1381 	list_splice_init(&bo_va->invalids, &bo_va->valids);
1382 	bo_va->cleared = clear;
1383 	bo_va->base.moved = false;
1384 
1385 	if (trace_amdgpu_vm_bo_mapping_enabled()) {
1386 		list_for_each_entry(mapping, &bo_va->valids, list)
1387 			trace_amdgpu_vm_bo_mapping(mapping);
1388 	}
1389 
1390 error_free:
1391 	amdgpu_sync_free(&sync);
1392 	return r;
1393 }
1394 
1395 /**
1396  * amdgpu_vm_update_prt_state - update the global PRT state
1397  *
1398  * @adev: amdgpu_device pointer
1399  */
1400 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1401 {
1402 	unsigned long flags;
1403 	bool enable;
1404 
1405 	spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1406 	enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1407 	adev->gmc.gmc_funcs->set_prt(adev, enable);
1408 	spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1409 }
1410 
1411 /**
1412  * amdgpu_vm_prt_get - add a PRT user
1413  *
1414  * @adev: amdgpu_device pointer
1415  */
1416 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1417 {
1418 	if (!adev->gmc.gmc_funcs->set_prt)
1419 		return;
1420 
1421 	if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1422 		amdgpu_vm_update_prt_state(adev);
1423 }
1424 
1425 /**
1426  * amdgpu_vm_prt_put - drop a PRT user
1427  *
1428  * @adev: amdgpu_device pointer
1429  */
1430 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1431 {
1432 	if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1433 		amdgpu_vm_update_prt_state(adev);
1434 }
1435 
1436 /**
1437  * amdgpu_vm_prt_cb - callback for updating the PRT status
1438  *
1439  * @fence: fence for the callback
1440  * @_cb: the callback function
1441  */
1442 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1443 {
1444 	struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1445 
1446 	amdgpu_vm_prt_put(cb->adev);
1447 	kfree(cb);
1448 }
1449 
1450 /**
1451  * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1452  *
1453  * @adev: amdgpu_device pointer
1454  * @fence: fence for the callback
1455  */
1456 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1457 				 struct dma_fence *fence)
1458 {
1459 	struct amdgpu_prt_cb *cb;
1460 
1461 	if (!adev->gmc.gmc_funcs->set_prt)
1462 		return;
1463 
1464 	cb = kmalloc_obj(struct amdgpu_prt_cb);
1465 	if (!cb) {
1466 		/* Last resort when we are OOM */
1467 		if (fence)
1468 			dma_fence_wait(fence, false);
1469 
1470 		amdgpu_vm_prt_put(adev);
1471 	} else {
1472 		cb->adev = adev;
1473 		if (!fence || dma_fence_add_callback(fence, &cb->cb,
1474 						     amdgpu_vm_prt_cb))
1475 			amdgpu_vm_prt_cb(fence, &cb->cb);
1476 	}
1477 }
1478 
1479 /**
1480  * amdgpu_vm_free_mapping - free a mapping
1481  *
1482  * @adev: amdgpu_device pointer
1483  * @vm: requested vm
1484  * @mapping: mapping to be freed
1485  * @fence: fence of the unmap operation
1486  *
1487  * Free a mapping and make sure we decrease the PRT usage count if applicable.
1488  */
1489 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1490 				   struct amdgpu_vm *vm,
1491 				   struct amdgpu_bo_va_mapping *mapping,
1492 				   struct dma_fence *fence)
1493 {
1494 	if (mapping->flags & AMDGPU_VM_PAGE_PRT)
1495 		amdgpu_vm_add_prt_cb(adev, fence);
1496 	kfree(mapping);
1497 }
1498 
1499 /**
1500  * amdgpu_vm_prt_fini - finish all prt mappings
1501  *
1502  * @adev: amdgpu_device pointer
1503  * @vm: requested vm
1504  *
1505  * Register a cleanup callback to disable PRT support after VM dies.
1506  */
1507 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1508 {
1509 	struct dma_resv *resv = vm->root.bo->tbo.base.resv;
1510 	struct dma_resv_iter cursor;
1511 	struct dma_fence *fence;
1512 
1513 	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
1514 		/* Add a callback for each fence in the reservation object */
1515 		amdgpu_vm_prt_get(adev);
1516 		amdgpu_vm_add_prt_cb(adev, fence);
1517 	}
1518 }
1519 
1520 /**
1521  * amdgpu_vm_clear_freed - clear freed BOs in the PT
1522  *
1523  * @adev: amdgpu_device pointer
1524  * @vm: requested vm
1525  * @fence: optional resulting fence (unchanged if no work needed to be done
1526  * or if an error occurred)
1527  *
1528  * Make sure all freed BOs are cleared in the PT.
1529  * PTs have to be reserved and mutex must be locked!
1530  *
1531  * Returns:
1532  * 0 for success.
1533  *
1534  */
1535 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1536 			  struct amdgpu_vm *vm,
1537 			  struct dma_fence **fence)
1538 {
1539 	struct amdgpu_bo_va_mapping *mapping;
1540 	struct dma_fence *f = NULL;
1541 	struct amdgpu_sync sync;
1542 	int r;
1543 
1544 
1545 	/*
1546 	 * Implicitly sync to command submissions in the same VM before
1547 	 * unmapping.
1548 	 */
1549 	amdgpu_sync_create(&sync);
1550 	r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
1551 			     AMDGPU_SYNC_EQ_OWNER, vm);
1552 	if (r)
1553 		goto error_free;
1554 
1555 	while (!list_empty(&vm->freed)) {
1556 		mapping = list_first_entry(&vm->freed,
1557 			struct amdgpu_bo_va_mapping, list);
1558 		list_del(&mapping->list);
1559 
1560 		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
1561 					   &sync, mapping->start, mapping->last,
1562 					   0, 0, 0, NULL, NULL, &f);
1563 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
1564 		if (r) {
1565 			dma_fence_put(f);
1566 			goto error_free;
1567 		}
1568 	}
1569 
1570 	if (fence && f) {
1571 		dma_fence_put(*fence);
1572 		*fence = f;
1573 	} else {
1574 		dma_fence_put(f);
1575 	}
1576 
1577 error_free:
1578 	amdgpu_sync_free(&sync);
1579 	return r;
1580 
1581 }
1582 
1583 /**
1584  * amdgpu_vm_handle_moved - handle moved BOs in the PT
1585  *
1586  * @adev: amdgpu_device pointer
1587  * @vm: requested vm
1588  * @ticket: optional reservation ticket used to reserve the VM
1589  *
1590  * Make sure all BOs which are moved are updated in the PTs.
1591  *
1592  * Returns:
1593  * 0 for success.
1594  *
1595  * PTs have to be reserved!
1596  */
1597 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1598 			   struct amdgpu_vm *vm,
1599 			   struct ww_acquire_ctx *ticket)
1600 {
1601 	struct amdgpu_bo_va *bo_va, *tmp;
1602 	struct dma_resv *resv;
1603 	bool clear, unlock;
1604 	int r;
1605 
1606 	list_for_each_entry_safe(bo_va, tmp, &vm->always_valid.moved,
1607 				 base.vm_status) {
1608 		/* Per VM BOs never need to bo cleared in the page tables */
1609 		r = amdgpu_vm_bo_update(adev, bo_va, false);
1610 		if (r)
1611 			return r;
1612 	}
1613 
1614 	spin_lock(&vm->individual_lock);
1615 	while (!list_empty(&vm->individual.moved)) {
1616 		bo_va = list_first_entry(&vm->individual.moved,
1617 					 typeof(*bo_va), base.vm_status);
1618 		resv = bo_va->base.bo->tbo.base.resv;
1619 		spin_unlock(&vm->individual_lock);
1620 
1621 		/* Try to reserve the BO to avoid clearing its ptes */
1622 		if (!adev->debug_vm && dma_resv_trylock(resv)) {
1623 			clear = false;
1624 			unlock = true;
1625 		/* The caller is already holding the reservation lock */
1626 		} else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
1627 			clear = false;
1628 			unlock = false;
1629 		/* Somebody else is using the BO right now */
1630 		} else {
1631 			clear = true;
1632 			unlock = false;
1633 		}
1634 
1635 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
1636 
1637 		if (unlock)
1638 			dma_resv_unlock(resv);
1639 		if (r)
1640 			return r;
1641 
1642 		/* Remember evicted DMABuf imports in compute VMs for later
1643 		 * validation
1644 		 */
1645 		if (vm->is_compute_context &&
1646 		    drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
1647 		    (!bo_va->base.bo->tbo.resource ||
1648 		     bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
1649 			amdgpu_vm_bo_evicted(&bo_va->base);
1650 
1651 		spin_lock(&vm->individual_lock);
1652 	}
1653 	spin_unlock(&vm->individual_lock);
1654 
1655 	return 0;
1656 }
1657 
1658 /**
1659  * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
1660  *
1661  * @adev: amdgpu_device pointer
1662  * @vm: requested vm
1663  * @flush_type: flush type
1664  * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
1665  *
1666  * Flush TLB if needed for a compute VM.
1667  *
1668  * Returns:
1669  * 0 for success.
1670  */
1671 int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
1672 				struct amdgpu_vm *vm,
1673 				uint32_t flush_type,
1674 				uint32_t xcc_mask)
1675 {
1676 	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
1677 	bool all_hub = false;
1678 	int xcc = 0, r = 0;
1679 
1680 	WARN_ON_ONCE(!vm->is_compute_context);
1681 
1682 	/*
1683 	 * It can be that we race and lose here, but that is extremely unlikely
1684 	 * and the worst thing which could happen is that we flush the changes
1685 	 * into the TLB once more which is harmless.
1686 	 */
1687 	if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
1688 		return 0;
1689 
1690 	if (adev->family == AMDGPU_FAMILY_AI ||
1691 	    adev->family == AMDGPU_FAMILY_RV)
1692 		all_hub = true;
1693 
1694 	for_each_inst(xcc, xcc_mask) {
1695 		r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
1696 						   all_hub, xcc);
1697 		if (r)
1698 			break;
1699 	}
1700 	return r;
1701 }
1702 
1703 /**
1704  * amdgpu_vm_bo_add - add a bo to a specific vm
1705  *
1706  * @adev: amdgpu_device pointer
1707  * @vm: requested vm
1708  * @bo: amdgpu buffer object
1709  *
1710  * Add @bo into the requested vm.
1711  * Add @bo to the list of bos associated with the vm
1712  *
1713  * Returns:
1714  * Newly added bo_va or NULL for failure
1715  *
1716  * Object has to be reserved!
1717  */
1718 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1719 				      struct amdgpu_vm *vm,
1720 				      struct amdgpu_bo *bo)
1721 {
1722 	struct amdgpu_bo_va *bo_va;
1723 
1724 	amdgpu_vm_assert_locked(vm);
1725 
1726 	bo_va = kzalloc_obj(struct amdgpu_bo_va);
1727 	if (bo_va == NULL) {
1728 		return NULL;
1729 	}
1730 	amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1731 
1732 	bo_va->ref_count = 1;
1733 	bo_va->last_pt_update = dma_fence_get_stub();
1734 	INIT_LIST_HEAD(&bo_va->valids);
1735 	INIT_LIST_HEAD(&bo_va->invalids);
1736 
1737 	if (!bo)
1738 		return bo_va;
1739 
1740 	dma_resv_assert_held(bo->tbo.base.resv);
1741 	if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
1742 		bo_va->is_xgmi = true;
1743 		/* Power up XGMI if it can be potentially used */
1744 		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
1745 	}
1746 
1747 	return bo_va;
1748 }
1749 
1750 
1751 /**
1752  * amdgpu_vm_bo_insert_map - insert a new mapping
1753  *
1754  * @adev: amdgpu_device pointer
1755  * @bo_va: bo_va to store the address
1756  * @mapping: the mapping to insert
1757  *
1758  * Insert a new mapping into all structures.
1759  */
1760 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1761 				    struct amdgpu_bo_va *bo_va,
1762 				    struct amdgpu_bo_va_mapping *mapping)
1763 {
1764 	struct amdgpu_vm *vm = bo_va->base.vm;
1765 	struct amdgpu_bo *bo = bo_va->base.bo;
1766 
1767 	mapping->bo_va = bo_va;
1768 	list_add(&mapping->list, &bo_va->invalids);
1769 	amdgpu_vm_it_insert(mapping, &vm->va);
1770 
1771 	if (mapping->flags & AMDGPU_VM_PAGE_PRT)
1772 		amdgpu_vm_prt_get(adev);
1773 
1774 	if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
1775 		amdgpu_vm_bo_moved(&bo_va->base);
1776 
1777 	trace_amdgpu_vm_bo_map(bo_va, mapping);
1778 }
1779 
1780 /* Validate operation parameters to prevent potential abuse */
1781 static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
1782 					  struct amdgpu_bo *bo,
1783 					  uint64_t saddr,
1784 					  uint64_t offset,
1785 					  uint64_t size)
1786 {
1787 	uint64_t tmp, lpfn;
1788 
1789 	if (saddr & AMDGPU_GPU_PAGE_MASK
1790 	    || offset & AMDGPU_GPU_PAGE_MASK
1791 	    || size & AMDGPU_GPU_PAGE_MASK)
1792 		return -EINVAL;
1793 
1794 	if (check_add_overflow(saddr, size, &tmp)
1795 	    || check_add_overflow(offset, size, &tmp)
1796 	    || size == 0 /* which also leads to end < begin */)
1797 		return -EINVAL;
1798 
1799 	/* make sure object fit at this offset */
1800 	if (bo && offset + size > amdgpu_bo_size(bo))
1801 		return -EINVAL;
1802 
1803 	/* Ensure last pfn not exceed max_pfn */
1804 	lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
1805 	if (lpfn >= adev->vm_manager.max_pfn)
1806 		return -EINVAL;
1807 
1808 	return 0;
1809 }
1810 
1811 /**
1812  * amdgpu_vm_bo_map - map bo inside a vm
1813  *
1814  * @adev: amdgpu_device pointer
1815  * @bo_va: bo_va to store the address
1816  * @saddr: where to map the BO
1817  * @offset: requested offset in the BO
1818  * @size: BO size in bytes
1819  * @flags: attributes of pages (read/write/valid/etc.)
1820  *
1821  * Add a mapping of the BO at the specefied addr into the VM.
1822  *
1823  * Returns:
1824  * 0 for success, error for failure.
1825  *
1826  * Object has to be reserved and unreserved outside!
1827  */
1828 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1829 		     struct amdgpu_bo_va *bo_va,
1830 		     uint64_t saddr, uint64_t offset,
1831 		     uint64_t size, uint32_t flags)
1832 {
1833 	struct amdgpu_bo_va_mapping *mapping, *tmp;
1834 	struct amdgpu_bo *bo = bo_va->base.bo;
1835 	struct amdgpu_vm *vm = bo_va->base.vm;
1836 	uint64_t eaddr;
1837 	int r;
1838 
1839 	r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
1840 	if (r)
1841 		return r;
1842 
1843 	saddr /= AMDGPU_GPU_PAGE_SIZE;
1844 	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
1845 
1846 	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1847 	if (tmp) {
1848 		/* bo and tmp overlap, invalid addr */
1849 		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1850 			"0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1851 			tmp->start, tmp->last + 1);
1852 		return -EINVAL;
1853 	}
1854 
1855 	mapping = kmalloc_obj(*mapping);
1856 	if (!mapping)
1857 		return -ENOMEM;
1858 
1859 	mapping->start = saddr;
1860 	mapping->last = eaddr;
1861 	mapping->offset = offset;
1862 	mapping->flags = flags;
1863 
1864 	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1865 
1866 	return 0;
1867 }
1868 
1869 /**
1870  * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1871  *
1872  * @adev: amdgpu_device pointer
1873  * @bo_va: bo_va to store the address
1874  * @saddr: where to map the BO
1875  * @offset: requested offset in the BO
1876  * @size: BO size in bytes
1877  * @flags: attributes of pages (read/write/valid/etc.)
1878  *
1879  * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1880  * mappings as we do so.
1881  *
1882  * Returns:
1883  * 0 for success, error for failure.
1884  *
1885  * Object has to be reserved and unreserved outside!
1886  */
1887 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1888 			     struct amdgpu_bo_va *bo_va,
1889 			     uint64_t saddr, uint64_t offset,
1890 			     uint64_t size, uint32_t flags)
1891 {
1892 	struct amdgpu_bo_va_mapping *mapping;
1893 	struct amdgpu_bo *bo = bo_va->base.bo;
1894 	uint64_t eaddr;
1895 	int r;
1896 
1897 	r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
1898 	if (r)
1899 		return r;
1900 
1901 	/* Allocate all the needed memory */
1902 	mapping = kmalloc_obj(*mapping);
1903 	if (!mapping)
1904 		return -ENOMEM;
1905 
1906 	r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1907 	if (r) {
1908 		kfree(mapping);
1909 		return r;
1910 	}
1911 
1912 	saddr /= AMDGPU_GPU_PAGE_SIZE;
1913 	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
1914 
1915 	mapping->start = saddr;
1916 	mapping->last = eaddr;
1917 	mapping->offset = offset;
1918 	mapping->flags = flags;
1919 
1920 	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1921 
1922 	return 0;
1923 }
1924 
1925 /**
1926  * amdgpu_vm_bo_unmap - remove bo mapping from vm
1927  *
1928  * @adev: amdgpu_device pointer
1929  * @bo_va: bo_va to remove the address from
1930  * @saddr: where to the BO is mapped
1931  *
1932  * Remove a mapping of the BO at the specefied addr from the VM.
1933  *
1934  * Returns:
1935  * 0 for success, error for failure.
1936  *
1937  * Object has to be reserved and unreserved outside!
1938  */
1939 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1940 		       struct amdgpu_bo_va *bo_va,
1941 		       uint64_t saddr)
1942 {
1943 	struct amdgpu_bo_va_mapping *mapping;
1944 	struct amdgpu_vm *vm = bo_va->base.vm;
1945 	bool valid = true;
1946 
1947 	saddr /= AMDGPU_GPU_PAGE_SIZE;
1948 
1949 	list_for_each_entry(mapping, &bo_va->valids, list) {
1950 		if (mapping->start == saddr)
1951 			break;
1952 	}
1953 
1954 	if (&mapping->list == &bo_va->valids) {
1955 		valid = false;
1956 
1957 		list_for_each_entry(mapping, &bo_va->invalids, list) {
1958 			if (mapping->start == saddr)
1959 				break;
1960 		}
1961 
1962 		if (&mapping->list == &bo_va->invalids)
1963 			return -ENOENT;
1964 	}
1965 
1966 	/* It's unlikely to happen that the mapping userq hasn't been idled
1967 	 * during user requests GEM unmap IOCTL except for forcing the unmap
1968 	 * from user space.
1969 	 */
1970 	if (unlikely(bo_va->userq_va_mapped))
1971 		amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr);
1972 
1973 	list_del(&mapping->list);
1974 	amdgpu_vm_it_remove(mapping, &vm->va);
1975 	mapping->bo_va = NULL;
1976 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1977 
1978 	if (valid)
1979 		list_add(&mapping->list, &vm->freed);
1980 	else
1981 		amdgpu_vm_free_mapping(adev, vm, mapping,
1982 				       bo_va->last_pt_update);
1983 
1984 	return 0;
1985 }
1986 
1987 /**
1988  * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
1989  *
1990  * @adev: amdgpu_device pointer
1991  * @vm: VM structure to use
1992  * @saddr: start of the range
1993  * @size: size of the range
1994  *
1995  * Remove all mappings in a range, split them as appropriate.
1996  *
1997  * Returns:
1998  * 0 for success, error for failure.
1999  */
2000 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2001 				struct amdgpu_vm *vm,
2002 				uint64_t saddr, uint64_t size)
2003 {
2004 	struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2005 	LIST_HEAD(removed);
2006 	uint64_t eaddr;
2007 	int r;
2008 
2009 	r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
2010 	if (r)
2011 		return r;
2012 
2013 	saddr /= AMDGPU_GPU_PAGE_SIZE;
2014 	eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
2015 
2016 	/* Allocate all the needed memory */
2017 	before = kzalloc_obj(*before);
2018 	if (!before)
2019 		return -ENOMEM;
2020 	INIT_LIST_HEAD(&before->list);
2021 
2022 	after = kzalloc_obj(*after);
2023 	if (!after) {
2024 		kfree(before);
2025 		return -ENOMEM;
2026 	}
2027 	INIT_LIST_HEAD(&after->list);
2028 
2029 	/* Now gather all removed mappings */
2030 	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2031 	while (tmp) {
2032 		/* Remember mapping split at the start */
2033 		if (tmp->start < saddr) {
2034 			before->start = tmp->start;
2035 			before->last = saddr - 1;
2036 			before->offset = tmp->offset;
2037 			before->flags = tmp->flags;
2038 			before->bo_va = tmp->bo_va;
2039 			list_add(&before->list, &tmp->bo_va->invalids);
2040 		}
2041 
2042 		/* Remember mapping split at the end */
2043 		if (tmp->last > eaddr) {
2044 			after->start = eaddr + 1;
2045 			after->last = tmp->last;
2046 			after->offset = tmp->offset;
2047 			after->offset += (after->start - tmp->start) << PAGE_SHIFT;
2048 			after->flags = tmp->flags;
2049 			after->bo_va = tmp->bo_va;
2050 			list_add(&after->list, &tmp->bo_va->invalids);
2051 		}
2052 
2053 		list_del(&tmp->list);
2054 		list_add(&tmp->list, &removed);
2055 
2056 		tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2057 	}
2058 
2059 	/* And free them up */
2060 	list_for_each_entry_safe(tmp, next, &removed, list) {
2061 		amdgpu_vm_it_remove(tmp, &vm->va);
2062 		list_del(&tmp->list);
2063 
2064 		if (tmp->start < saddr)
2065 		    tmp->start = saddr;
2066 		if (tmp->last > eaddr)
2067 		    tmp->last = eaddr;
2068 
2069 		tmp->bo_va = NULL;
2070 		list_add(&tmp->list, &vm->freed);
2071 		trace_amdgpu_vm_bo_unmap(NULL, tmp);
2072 	}
2073 
2074 	/* Insert partial mapping before the range */
2075 	if (!list_empty(&before->list)) {
2076 		struct amdgpu_bo *bo = before->bo_va->base.bo;
2077 
2078 		amdgpu_vm_it_insert(before, &vm->va);
2079 		if (before->flags & AMDGPU_VM_PAGE_PRT)
2080 			amdgpu_vm_prt_get(adev);
2081 
2082 		if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
2083 		    !before->bo_va->base.moved)
2084 			amdgpu_vm_bo_moved(&before->bo_va->base);
2085 	} else {
2086 		kfree(before);
2087 	}
2088 
2089 	/* Insert partial mapping after the range */
2090 	if (!list_empty(&after->list)) {
2091 		struct amdgpu_bo *bo = after->bo_va->base.bo;
2092 
2093 		amdgpu_vm_it_insert(after, &vm->va);
2094 		if (after->flags & AMDGPU_VM_PAGE_PRT)
2095 			amdgpu_vm_prt_get(adev);
2096 
2097 		if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
2098 		    !after->bo_va->base.moved)
2099 			amdgpu_vm_bo_moved(&after->bo_va->base);
2100 	} else {
2101 		kfree(after);
2102 	}
2103 
2104 	return 0;
2105 }
2106 
2107 /**
2108  * amdgpu_vm_bo_lookup_mapping - find mapping by address
2109  *
2110  * @vm: the requested VM
2111  * @addr: the address
2112  *
2113  * Find a mapping by it's address.
2114  *
2115  * Returns:
2116  * The amdgpu_bo_va_mapping matching for addr or NULL
2117  *
2118  */
2119 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2120 							 uint64_t addr)
2121 {
2122 	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2123 }
2124 
2125 /**
2126  * amdgpu_vm_bo_trace_cs - trace all reserved mappings
2127  *
2128  * @vm: the requested vm
2129  * @ticket: CS ticket
2130  *
2131  * Trace all mappings of BOs reserved during a command submission.
2132  */
2133 void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
2134 {
2135 	struct amdgpu_bo_va_mapping *mapping;
2136 
2137 	if (!trace_amdgpu_vm_bo_cs_enabled())
2138 		return;
2139 
2140 	for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
2141 	     mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
2142 		if (mapping->bo_va && mapping->bo_va->base.bo) {
2143 			struct amdgpu_bo *bo;
2144 
2145 			bo = mapping->bo_va->base.bo;
2146 			if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
2147 			    ticket)
2148 				continue;
2149 		}
2150 
2151 		trace_amdgpu_vm_bo_cs(mapping);
2152 	}
2153 }
2154 
2155 /**
2156  * amdgpu_vm_bo_del - remove a bo from a specific vm
2157  *
2158  * @adev: amdgpu_device pointer
2159  * @bo_va: requested bo_va
2160  *
2161  * Remove @bo_va->bo from the requested vm.
2162  *
2163  * Object have to be reserved!
2164  */
2165 void amdgpu_vm_bo_del(struct amdgpu_device *adev,
2166 		      struct amdgpu_bo_va *bo_va)
2167 {
2168 	struct amdgpu_bo_va_mapping *mapping, *next;
2169 	struct amdgpu_bo *bo = bo_va->base.bo;
2170 	struct amdgpu_vm *vm = bo_va->base.vm;
2171 	struct amdgpu_vm_bo_base **base;
2172 
2173 	dma_resv_assert_held(vm->root.bo->tbo.base.resv);
2174 
2175 	if (bo) {
2176 		dma_resv_assert_held(bo->tbo.base.resv);
2177 		if (amdgpu_vm_is_bo_always_valid(vm, bo))
2178 			ttm_bo_set_bulk_move(&bo->tbo, NULL);
2179 
2180 		for (base = &bo_va->base.bo->vm_bo; *base;
2181 		     base = &(*base)->next) {
2182 			if (*base != &bo_va->base)
2183 				continue;
2184 
2185 			amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
2186 			*base = bo_va->base.next;
2187 			break;
2188 		}
2189 	}
2190 
2191 	spin_lock(&vm->individual_lock);
2192 	list_del(&bo_va->base.vm_status);
2193 	spin_unlock(&vm->individual_lock);
2194 
2195 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2196 		list_del(&mapping->list);
2197 		amdgpu_vm_it_remove(mapping, &vm->va);
2198 		mapping->bo_va = NULL;
2199 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2200 		list_add(&mapping->list, &vm->freed);
2201 	}
2202 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2203 		list_del(&mapping->list);
2204 		amdgpu_vm_it_remove(mapping, &vm->va);
2205 		amdgpu_vm_free_mapping(adev, vm, mapping,
2206 				       bo_va->last_pt_update);
2207 	}
2208 
2209 	dma_fence_put(bo_va->last_pt_update);
2210 
2211 	if (bo && bo_va->is_xgmi)
2212 		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
2213 
2214 	kfree(bo_va);
2215 }
2216 
2217 /**
2218  * amdgpu_vm_evictable - check if we can evict a VM
2219  *
2220  * @bo: A page table of the VM.
2221  *
2222  * Check if it is possible to evict a VM.
2223  */
2224 bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
2225 {
2226 	struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
2227 
2228 	/* Page tables of a destroyed VM can go away immediately */
2229 	if (!bo_base || !bo_base->vm)
2230 		return true;
2231 
2232 	/* Don't evict VM page tables while they are busy */
2233 	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
2234 		return false;
2235 
2236 	/* Try to block ongoing updates */
2237 	if (!amdgpu_vm_eviction_trylock(bo_base->vm))
2238 		return false;
2239 
2240 	/* Don't evict VM page tables while they are updated */
2241 	if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
2242 		amdgpu_vm_eviction_unlock(bo_base->vm);
2243 		return false;
2244 	}
2245 
2246 	bo_base->vm->evicting = true;
2247 	amdgpu_vm_eviction_unlock(bo_base->vm);
2248 	return true;
2249 }
2250 
2251 /**
2252  * amdgpu_vm_bo_invalidate - mark the bo as invalid
2253  *
2254  * @bo: amdgpu buffer object
2255  * @evicted: is the BO evicted
2256  *
2257  * Mark @bo as invalid.
2258  */
2259 void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
2260 {
2261 	struct amdgpu_vm_bo_base *bo_base;
2262 
2263 	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
2264 		struct amdgpu_vm *vm = bo_base->vm;
2265 
2266 		if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
2267 			amdgpu_vm_bo_evicted(bo_base);
2268 			continue;
2269 		}
2270 
2271 		if (bo_base->moved)
2272 			continue;
2273 		amdgpu_vm_bo_moved(bo_base);
2274 	}
2275 }
2276 
2277 /**
2278  * amdgpu_vm_bo_move - handle BO move
2279  *
2280  * @bo: amdgpu buffer object
2281  * @new_mem: the new placement of the BO move
2282  * @evicted: is the BO evicted
2283  *
2284  * Update the memory stats for the new placement and mark @bo as invalid.
2285  */
2286 void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
2287 		       bool evicted)
2288 {
2289 	struct amdgpu_vm_bo_base *bo_base;
2290 
2291 	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
2292 		struct amdgpu_vm *vm = bo_base->vm;
2293 
2294 		spin_lock(&vm->stats_lock);
2295 		amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
2296 		amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
2297 		spin_unlock(&vm->stats_lock);
2298 	}
2299 
2300 	amdgpu_vm_bo_invalidate(bo, evicted);
2301 }
2302 
2303 /**
2304  * amdgpu_vm_get_block_size - calculate VM page table size as power of two
2305  *
2306  * @vm_size: VM size
2307  *
2308  * Returns:
2309  * VM page table as power of two
2310  */
2311 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2312 {
2313 	/* Total bits covered by PD + PTs */
2314 	unsigned bits = ilog2(vm_size) + 18;
2315 
2316 	/* Make sure the PD is 4K in size up to 8GB address space.
2317 	   Above that split equal between PD and PTs */
2318 	if (vm_size <= 8)
2319 		return (bits - 9);
2320 	else
2321 		return ((bits + 3) / 2);
2322 }
2323 
2324 /**
2325  * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2326  *
2327  * @adev: amdgpu_device pointer
2328  * @min_vm_size: the minimum vm size in GB if it's set auto
2329  * @fragment_size_default: Default PTE fragment size
2330  * @max_level: max VMPT level
2331  * @max_bits: max address space size in bits
2332  *
2333  */
2334 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
2335 			   uint32_t fragment_size_default, unsigned max_level,
2336 			   unsigned max_bits)
2337 {
2338 	unsigned int max_size = 1 << (max_bits - 30);
2339 	unsigned int vm_size;
2340 	uint64_t tmp;
2341 
2342 	/* adjust vm size first */
2343 	if (amdgpu_vm_size != -1) {
2344 		vm_size = amdgpu_vm_size;
2345 		if (vm_size > max_size) {
2346 			dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2347 				 amdgpu_vm_size, max_size);
2348 			vm_size = max_size;
2349 		}
2350 	} else {
2351 		struct sysinfo si;
2352 		unsigned int phys_ram_gb;
2353 
2354 		/* Optimal VM size depends on the amount of physical
2355 		 * RAM available. Underlying requirements and
2356 		 * assumptions:
2357 		 *
2358 		 *  - Need to map system memory and VRAM from all GPUs
2359 		 *     - VRAM from other GPUs not known here
2360 		 *     - Assume VRAM <= system memory
2361 		 *  - On GFX8 and older, VM space can be segmented for
2362 		 *    different MTYPEs
2363 		 *  - Need to allow room for fragmentation, guard pages etc.
2364 		 *
2365 		 * This adds up to a rough guess of system memory x3.
2366 		 * Round up to power of two to maximize the available
2367 		 * VM size with the given page table size.
2368 		 */
2369 		si_meminfo(&si);
2370 		phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
2371 			       (1 << 30) - 1) >> 30;
2372 		vm_size = roundup_pow_of_two(
2373 			clamp(phys_ram_gb * 3, min_vm_size, max_size));
2374 	}
2375 
2376 	adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2377 	adev->vm_manager.max_level = max_level;
2378 
2379 	tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2380 	if (amdgpu_vm_block_size != -1)
2381 		tmp >>= amdgpu_vm_block_size - 9;
2382 	tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2383 	adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
2384 	switch (adev->vm_manager.num_level) {
2385 	case 4:
2386 		adev->vm_manager.root_level = AMDGPU_VM_PDB3;
2387 		break;
2388 	case 3:
2389 		adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2390 		break;
2391 	case 2:
2392 		adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2393 		break;
2394 	case 1:
2395 		adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2396 		break;
2397 	default:
2398 		dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2399 	}
2400 	/* block size depends on vm size and hw setup*/
2401 	if (amdgpu_vm_block_size != -1)
2402 		adev->vm_manager.block_size =
2403 			min((unsigned)amdgpu_vm_block_size, max_bits
2404 			    - AMDGPU_GPU_PAGE_SHIFT
2405 			    - 9 * adev->vm_manager.num_level);
2406 	else if (adev->vm_manager.num_level > 1)
2407 		adev->vm_manager.block_size = 9;
2408 	else
2409 		adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2410 
2411 	if (amdgpu_vm_fragment_size == -1)
2412 		adev->vm_manager.fragment_size = fragment_size_default;
2413 	else
2414 		adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2415 
2416 	dev_info(
2417 		adev->dev,
2418 		"vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2419 		vm_size, adev->vm_manager.num_level + 1,
2420 		adev->vm_manager.block_size, adev->vm_manager.fragment_size);
2421 }
2422 
2423 /**
2424  * amdgpu_vm_wait_idle - wait for the VM to become idle
2425  *
2426  * @vm: VM object to wait for
2427  * @timeout: timeout to wait for VM to become idle
2428  */
2429 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
2430 {
2431 	timeout = drm_sched_entity_flush(&vm->immediate, timeout);
2432 	if (timeout <= 0)
2433 		return timeout;
2434 
2435 	return drm_sched_entity_flush(&vm->delayed, timeout);
2436 }
2437 
2438 static void amdgpu_vm_destroy_task_info(struct kref *kref)
2439 {
2440 	struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
2441 
2442 	kfree(ti);
2443 }
2444 
2445 static inline struct amdgpu_vm *
2446 amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
2447 {
2448 	struct amdgpu_vm *vm;
2449 	unsigned long flags;
2450 
2451 	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
2452 	vm = xa_load(&adev->vm_manager.pasids, pasid);
2453 	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
2454 
2455 	return vm;
2456 }
2457 
2458 /**
2459  * amdgpu_vm_put_task_info - reference down the vm task_info ptr
2460  *
2461  * @task_info: task_info struct under discussion.
2462  *
2463  * frees the vm task_info ptr at the last put
2464  */
2465 void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
2466 {
2467 	if (task_info)
2468 		kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
2469 }
2470 
2471 /**
2472  * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
2473  *
2474  * @vm: VM to get info from
2475  *
2476  * Returns the reference counted task_info structure, which must be
2477  * referenced down with amdgpu_vm_put_task_info.
2478  */
2479 struct amdgpu_task_info *
2480 amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
2481 {
2482 	struct amdgpu_task_info *ti = NULL;
2483 
2484 	if (vm) {
2485 		ti = vm->task_info;
2486 		kref_get(&vm->task_info->refcount);
2487 	}
2488 
2489 	return ti;
2490 }
2491 
2492 /**
2493  * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
2494  *
2495  * @adev: drm device pointer
2496  * @pasid: PASID identifier for VM
2497  *
2498  * Returns the reference counted task_info structure, which must be
2499  * referenced down with amdgpu_vm_put_task_info.
2500  */
2501 struct amdgpu_task_info *
2502 amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
2503 {
2504 	return amdgpu_vm_get_task_info_vm(
2505 			amdgpu_vm_get_vm_from_pasid(adev, pasid));
2506 }
2507 
2508 static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
2509 {
2510 	vm->task_info = kzalloc_obj(struct amdgpu_task_info);
2511 	if (!vm->task_info)
2512 		return -ENOMEM;
2513 
2514 	kref_init(&vm->task_info->refcount);
2515 	return 0;
2516 }
2517 
2518 /**
2519  * amdgpu_vm_set_task_info - Sets VMs task info.
2520  *
2521  * @vm: vm for which to set the info
2522  */
2523 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
2524 {
2525 	if (!vm->task_info)
2526 		return;
2527 
2528 	if (vm->task_info->task.pid == current->pid)
2529 		return;
2530 
2531 	vm->task_info->task.pid = current->pid;
2532 	get_task_comm(vm->task_info->task.comm, current);
2533 
2534 	vm->task_info->tgid = current->tgid;
2535 	get_task_comm(vm->task_info->process_name, current->group_leader);
2536 }
2537 
2538 /**
2539  * amdgpu_vm_init - initialize a vm instance
2540  *
2541  * @adev: amdgpu_device pointer
2542  * @vm: requested vm
2543  * @xcp_id: GPU partition selection id
2544  * @pasid: the pasid the VM is using on this GPU
2545  *
2546  * Init @vm fields.
2547  *
2548  * Returns:
2549  * 0 for success, error for failure.
2550  */
2551 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2552 		   int32_t xcp_id, uint32_t pasid)
2553 {
2554 	struct amdgpu_bo *root_bo;
2555 	struct amdgpu_bo_vm *root;
2556 	int r, i;
2557 
2558 	vm->va = RB_ROOT_CACHED;
2559 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2560 		vm->reserved_vmid[i] = NULL;
2561 
2562 	amdgpu_vm_bo_status_init(&vm->kernel);
2563 	amdgpu_vm_bo_status_init(&vm->always_valid);
2564 	spin_lock_init(&vm->individual_lock);
2565 	amdgpu_vm_bo_status_init(&vm->individual);
2566 	INIT_LIST_HEAD(&vm->freed);
2567 	INIT_KFIFO(vm->faults);
2568 	spin_lock_init(&vm->stats_lock);
2569 
2570 	r = amdgpu_vm_init_entities(adev, vm);
2571 	if (r)
2572 		return r;
2573 
2574 	ttm_lru_bulk_move_init(&vm->lru_bulk_move);
2575 
2576 	vm->is_compute_context = false;
2577 	vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev);
2578 
2579 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2580 				    AMDGPU_VM_USE_CPU_FOR_GFX);
2581 
2582 	dev_dbg(adev->dev, "VM update mode is %s\n",
2583 		vm->use_cpu_for_update ? "CPU" : "SDMA");
2584 	WARN_ONCE((vm->use_cpu_for_update &&
2585 		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2586 		  "CPU update of VM recommended only for large BAR system\n");
2587 
2588 	if (vm->use_cpu_for_update)
2589 		vm->update_funcs = &amdgpu_vm_cpu_funcs;
2590 	else
2591 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
2592 
2593 	vm->last_update = dma_fence_get_stub();
2594 	vm->last_unlocked = dma_fence_get_stub();
2595 	vm->last_tlb_flush = dma_fence_get_stub();
2596 	vm->generation = amdgpu_vm_generation(adev, NULL);
2597 
2598 	mutex_init(&vm->eviction_lock);
2599 	vm->evicting = false;
2600 	vm->tlb_fence_context = dma_fence_context_alloc(1);
2601 
2602 	r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
2603 				false, &root, xcp_id);
2604 	if (r)
2605 		goto error_free_delayed;
2606 
2607 	root_bo = amdgpu_bo_ref(&root->bo);
2608 	r = amdgpu_bo_reserve(root_bo, true);
2609 	if (r) {
2610 		amdgpu_bo_unref(&root_bo);
2611 		goto error_free_delayed;
2612 	}
2613 
2614 	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
2615 	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
2616 	if (r)
2617 		goto error_free_root;
2618 
2619 	r = amdgpu_vm_pt_clear(adev, vm, root, false);
2620 	if (r)
2621 		goto error_free_root;
2622 
2623 	r = amdgpu_vm_create_task_info(vm);
2624 	if (r)
2625 		dev_dbg(adev->dev, "Failed to create task info for VM\n");
2626 
2627 	/* Store new PASID in XArray (if non-zero) */
2628 	if (pasid != 0) {
2629 		r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
2630 		if (r < 0)
2631 			goto error_free_root;
2632 
2633 		vm->pasid = pasid;
2634 	}
2635 
2636 	amdgpu_bo_unreserve(vm->root.bo);
2637 	amdgpu_bo_unref(&root_bo);
2638 
2639 	return 0;
2640 
2641 error_free_root:
2642 	/* If PASID was partially set, erase it from XArray before failing */
2643 	if (vm->pasid != 0) {
2644 		xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
2645 		vm->pasid = 0;
2646 	}
2647 	amdgpu_vm_pt_free_root(adev, vm);
2648 	amdgpu_bo_unreserve(vm->root.bo);
2649 	amdgpu_bo_unref(&root_bo);
2650 
2651 error_free_delayed:
2652 	dma_fence_put(vm->last_tlb_flush);
2653 	dma_fence_put(vm->last_unlocked);
2654 	ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
2655 	amdgpu_vm_fini_entities(vm);
2656 
2657 	return r;
2658 }
2659 
2660 /**
2661  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2662  *
2663  * @adev: amdgpu_device pointer
2664  * @vm: requested vm
2665  *
2666  * This only works on GFX VMs that don't have any BOs added and no
2667  * page tables allocated yet.
2668  *
2669  * Changes the following VM parameters:
2670  * - use_cpu_for_update
2671  * - pte_supports_ats
2672  *
2673  * Reinitializes the page directory to reflect the changed ATS
2674  * setting.
2675  *
2676  * Returns:
2677  * 0 for success, -errno for errors.
2678  */
2679 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2680 {
2681 	int r;
2682 
2683 	r = amdgpu_bo_reserve(vm->root.bo, true);
2684 	if (r)
2685 		return r;
2686 
2687 	/* Update VM state */
2688 	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2689 				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2690 	dev_dbg(adev->dev, "VM update mode is %s\n",
2691 		vm->use_cpu_for_update ? "CPU" : "SDMA");
2692 	WARN_ONCE((vm->use_cpu_for_update &&
2693 		   !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2694 		  "CPU update of VM recommended only for large BAR system\n");
2695 
2696 	if (vm->use_cpu_for_update) {
2697 		/* Sync with last SDMA update/clear before switching to CPU */
2698 		r = amdgpu_bo_sync_wait(vm->root.bo,
2699 					AMDGPU_FENCE_OWNER_UNDEFINED, true);
2700 		if (r)
2701 			goto unreserve_bo;
2702 
2703 		vm->update_funcs = &amdgpu_vm_cpu_funcs;
2704 		r = amdgpu_vm_pt_map_tables(adev, vm);
2705 		if (r)
2706 			goto unreserve_bo;
2707 
2708 	} else {
2709 		vm->update_funcs = &amdgpu_vm_sdma_funcs;
2710 	}
2711 
2712 	dma_fence_put(vm->last_update);
2713 	vm->last_update = dma_fence_get_stub();
2714 	vm->is_compute_context = true;
2715 	vm->need_tlb_fence = true;
2716 
2717 unreserve_bo:
2718 	amdgpu_bo_unreserve(vm->root.bo);
2719 	return r;
2720 }
2721 
2722 static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
2723 {
2724 	for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
2725 		if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
2726 		      vm->stats[i].evicted == 0))
2727 			return false;
2728 	}
2729 	return true;
2730 }
2731 
2732 /**
2733  * amdgpu_vm_fini - tear down a vm instance
2734  *
2735  * @adev: amdgpu_device pointer
2736  * @vm: requested vm
2737  *
2738  * Tear down @vm.
2739  * Unbind the VM and remove all bos from the vm bo list
2740  */
2741 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2742 {
2743 	struct amdgpu_bo_va_mapping *mapping, *tmp;
2744 	bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2745 	struct amdgpu_bo *root;
2746 	unsigned long flags;
2747 	int i;
2748 
2749 	amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2750 
2751 	root = amdgpu_bo_ref(vm->root.bo);
2752 	amdgpu_bo_reserve(root, true);
2753 	/* Remove PASID mapping before destroying VM */
2754 	if (vm->pasid != 0) {
2755 		xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
2756 		vm->pasid = 0;
2757 	}
2758 	dma_fence_wait(vm->last_unlocked, false);
2759 	dma_fence_put(vm->last_unlocked);
2760 	dma_fence_wait(vm->last_tlb_flush, false);
2761 	/* Make sure that all fence callbacks have completed */
2762 	dma_fence_lock_irqsave(vm->last_tlb_flush, flags);
2763 	dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags);
2764 	dma_fence_put(vm->last_tlb_flush);
2765 
2766 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2767 		if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
2768 			amdgpu_vm_prt_fini(adev, vm);
2769 			prt_fini_needed = false;
2770 		}
2771 
2772 		list_del(&mapping->list);
2773 		amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2774 	}
2775 
2776 	amdgpu_vm_pt_free_root(adev, vm);
2777 	amdgpu_bo_unreserve(root);
2778 	amdgpu_bo_unref(&root);
2779 	WARN_ON(vm->root.bo);
2780 
2781 	amdgpu_vm_fini_entities(vm);
2782 
2783 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2784 		dev_err(adev->dev, "still active bo inside vm\n");
2785 	}
2786 	rbtree_postorder_for_each_entry_safe(mapping, tmp,
2787 					     &vm->va.rb_root, rb) {
2788 		/* Don't remove the mapping here, we don't want to trigger a
2789 		 * rebalance and the tree is about to be destroyed anyway.
2790 		 */
2791 		list_del(&mapping->list);
2792 		kfree(mapping);
2793 	}
2794 
2795 	dma_fence_put(vm->last_update);
2796 
2797 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
2798 		amdgpu_vmid_free_reserved(adev, vm, i);
2799 	}
2800 
2801 	ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
2802 
2803 	if (!amdgpu_vm_stats_is_zero(vm)) {
2804 		struct amdgpu_task_info *ti = vm->task_info;
2805 
2806 		dev_warn(adev->dev,
2807 			 "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
2808 			 ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
2809 	}
2810 
2811 	amdgpu_vm_put_task_info(vm->task_info);
2812 }
2813 
2814 /**
2815  * amdgpu_vm_manager_init - init the VM manager
2816  *
2817  * @adev: amdgpu_device pointer
2818  *
2819  * Initialize the VM manager structures
2820  */
2821 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2822 {
2823 	/* Concurrent flushes are only possible starting with Vega10 and
2824 	 * are broken on Navi10 and Navi14.
2825 	 */
2826 	adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
2827 					      adev->asic_type == CHIP_NAVI10 ||
2828 					      adev->asic_type == CHIP_NAVI14);
2829 	amdgpu_vmid_mgr_init(adev);
2830 
2831 	spin_lock_init(&adev->vm_manager.prt_lock);
2832 	atomic_set(&adev->vm_manager.num_prt_users, 0);
2833 
2834 	/* If not overridden by the user, by default, only in large BAR systems
2835 	 * Compute VM tables will be updated by CPU
2836 	 */
2837 #ifdef CONFIG_X86_64
2838 	if (amdgpu_vm_update_mode == -1) {
2839 		/* For asic with VF MMIO access protection
2840 		 * avoid using CPU for VM table updates
2841 		 */
2842 		if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
2843 		    !amdgpu_sriov_vf_mmio_access_protection(adev))
2844 			adev->vm_manager.vm_update_mode =
2845 				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2846 		else
2847 			adev->vm_manager.vm_update_mode = 0;
2848 	} else
2849 		adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2850 #else
2851 	adev->vm_manager.vm_update_mode = 0;
2852 #endif
2853 
2854 	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
2855 }
2856 
2857 /**
2858  * amdgpu_vm_manager_fini - cleanup VM manager
2859  *
2860  * @adev: amdgpu_device pointer
2861  *
2862  * Cleanup the VM manager and free resources.
2863  */
2864 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2865 {
2866 	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
2867 	xa_destroy(&adev->vm_manager.pasids);
2868 
2869 	amdgpu_vmid_mgr_fini(adev);
2870 	amdgpu_pasid_mgr_cleanup();
2871 }
2872 
2873 /**
2874  * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
2875  *
2876  * @dev: drm device pointer
2877  * @data: drm_amdgpu_vm
2878  * @filp: drm file pointer
2879  *
2880  * Returns:
2881  * 0 for success, -errno for errors.
2882  */
2883 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2884 {
2885 	union drm_amdgpu_vm *args = data;
2886 	struct amdgpu_device *adev = drm_to_adev(dev);
2887 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
2888 	struct amdgpu_vm *vm = &fpriv->vm;
2889 
2890 	/* No valid flags defined yet */
2891 	if (args->in.flags)
2892 		return -EINVAL;
2893 
2894 	switch (args->in.op) {
2895 	case AMDGPU_VM_OP_RESERVE_VMID:
2896 		/* We only have requirement to reserve vmid from gfxhub */
2897 		return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
2898 	case AMDGPU_VM_OP_UNRESERVE_VMID:
2899 		amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
2900 		break;
2901 	default:
2902 		return -EINVAL;
2903 	}
2904 
2905 	return 0;
2906 }
2907 
2908 /**
2909  * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible.
2910  * @adev: amdgpu device pointer
2911  * @root: root BO of the VM
2912  * @pasid: PASID of the VM
2913  * The caller needs to unreserve and unref the root bo on success.
2914  */
2915 struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev,
2916 					  struct amdgpu_bo **root, u32 pasid)
2917 {
2918 	unsigned long irqflags;
2919 	struct amdgpu_vm *vm;
2920 	int r;
2921 
2922 	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2923 	vm = xa_load(&adev->vm_manager.pasids, pasid);
2924 	*root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL;
2925 	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2926 
2927 	if (!*root)
2928 		return NULL;
2929 
2930 	r = amdgpu_bo_reserve(*root, true);
2931 	if (r)
2932 		goto error_unref;
2933 
2934 	/* Double check that the VM still exists */
2935 	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2936 	vm = xa_load(&adev->vm_manager.pasids, pasid);
2937 	if (vm && vm->root.bo != *root)
2938 		vm = NULL;
2939 	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2940 	if (!vm)
2941 		goto error_unlock;
2942 
2943 	return vm;
2944 error_unlock:
2945 	amdgpu_bo_unreserve(*root);
2946 
2947 error_unref:
2948 	amdgpu_bo_unref(root);
2949 	return NULL;
2950 }
2951 
2952 /**
2953  * amdgpu_vm_handle_fault - graceful handling of VM faults.
2954  * @adev: amdgpu device pointer
2955  * @pasid: PASID of the VM
2956  * @ts: Timestamp of the fault
2957  * @vmid: VMID, only used for GFX 9.4.3.
2958  * @node_id: Node_id received in IH cookie. Only applicable for
2959  *           GFX 9.4.3.
2960  * @addr: Address of the fault
2961  * @write_fault: true is write fault, false is read fault
2962  *
2963  * Try to gracefully handle a VM fault. Return true if the fault was handled and
2964  * shouldn't be reported any more.
2965  */
2966 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
2967 			    u32 vmid, u32 node_id, uint64_t addr,
2968 			    uint64_t ts, bool write_fault)
2969 {
2970 	bool is_compute_context = false;
2971 	struct amdgpu_bo *root;
2972 	uint64_t value, flags;
2973 	struct amdgpu_vm *vm;
2974 	int r;
2975 
2976 	vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
2977 	if (!vm)
2978 		return false;
2979 
2980 	is_compute_context = vm->is_compute_context;
2981 
2982 	if (is_compute_context) {
2983 		/* Unreserve root since svm_range_restore_pages might try to reserve it. */
2984 		/* TODO: rework svm_range_restore_pages so that this isn't necessary. */
2985 		amdgpu_bo_unreserve(root);
2986 
2987 		if (!svm_range_restore_pages(adev, pasid, vmid,
2988 					     node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
2989 			amdgpu_bo_unref(&root);
2990 			return true;
2991 		}
2992 		amdgpu_bo_unref(&root);
2993 
2994 		/* Re-acquire the VM lock, could be that the VM was freed in between. */
2995 		vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
2996 		if (!vm)
2997 			return false;
2998 	}
2999 
3000 	addr /= AMDGPU_GPU_PAGE_SIZE;
3001 	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
3002 		AMDGPU_PTE_SYSTEM;
3003 
3004 	if (is_compute_context) {
3005 		/* Intentionally setting invalid PTE flag
3006 		 * combination to force a no-retry-fault
3007 		 */
3008 		flags = AMDGPU_VM_NORETRY_FLAGS;
3009 		value = 0;
3010 	} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
3011 		/* Redirect the access to the dummy page */
3012 		value = adev->dummy_page_addr;
3013 		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
3014 			AMDGPU_PTE_WRITEABLE;
3015 
3016 	} else {
3017 		/* Let the hw retry silently on the PTE */
3018 		value = 0;
3019 	}
3020 
3021 	r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
3022 	if (r) {
3023 		pr_debug("failed %d to reserve fence slot\n", r);
3024 		goto error_unlock;
3025 	}
3026 
3027 	r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
3028 				   NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
3029 	if (r)
3030 		goto error_unlock;
3031 
3032 	r = amdgpu_vm_update_pdes(adev, vm, true);
3033 
3034 error_unlock:
3035 	amdgpu_bo_unreserve(root);
3036 	if (r < 0)
3037 		dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
3038 
3039 	amdgpu_bo_unref(&root);
3040 
3041 	return false;
3042 }
3043 
3044 #if defined(CONFIG_DEBUG_FS)
3045 
3046 /* print the debug info for a specific set of status lists */
3047 static void amdgpu_debugfs_vm_bo_status_info(struct seq_file *m,
3048 					     struct amdgpu_vm_bo_status *lists)
3049 {
3050 	struct amdgpu_vm_bo_base *base;
3051 	unsigned int id;
3052 
3053 	id = 0;
3054 	seq_puts(m, "\tEvicted BOs:\n");
3055 	list_for_each_entry(base, &lists->evicted, vm_status) {
3056 		if (!base->bo)
3057 			continue;
3058 
3059 		amdgpu_bo_print_info(id++, base->bo, m);
3060 	}
3061 
3062 	id = 0;
3063 	seq_puts(m, "\tMoved BOs:\n");
3064 	list_for_each_entry(base, &lists->moved, vm_status) {
3065 		if (!base->bo)
3066 			continue;
3067 
3068 		amdgpu_bo_print_info(id++, base->bo, m);
3069 	}
3070 
3071 	id = 0;
3072 	seq_puts(m, "\tIdle BOs:\n");
3073 	list_for_each_entry(base, &lists->moved, vm_status) {
3074 		if (!base->bo)
3075 			continue;
3076 
3077 		amdgpu_bo_print_info(id++, base->bo, m);
3078 	}
3079 }
3080 
3081 /**
3082  * amdgpu_debugfs_vm_bo_info  - print BO info for the VM
3083  *
3084  * @vm: Requested VM for printing BO info
3085  * @m: debugfs file
3086  *
3087  * Print BO information in debugfs file for the VM
3088  */
3089 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
3090 {
3091 	amdgpu_vm_assert_locked(vm);
3092 
3093 	seq_puts(m, "\tKernel PT/PDs:\n");
3094 	amdgpu_debugfs_vm_bo_status_info(m, &vm->kernel);
3095 
3096 	seq_puts(m, "\tPer VM BOs:\n");
3097 	amdgpu_debugfs_vm_bo_status_info(m, &vm->always_valid);
3098 
3099 	seq_puts(m, "\tIndividual BOs:\n");
3100 	spin_lock(&vm->individual_lock);
3101 	amdgpu_debugfs_vm_bo_status_info(m, &vm->individual);
3102 	spin_unlock(&vm->individual_lock);
3103 }
3104 #endif
3105 
3106 /**
3107  * amdgpu_vm_update_fault_cache - update cached fault into.
3108  * @adev: amdgpu device pointer
3109  * @pasid: PASID of the VM
3110  * @addr: Address of the fault
3111  * @status: GPUVM fault status register
3112  * @vmhub: which vmhub got the fault
3113  *
3114  * Cache the fault info for later use by userspace in debugging.
3115  */
3116 void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
3117 				  unsigned int pasid,
3118 				  uint64_t addr,
3119 				  uint32_t status,
3120 				  unsigned int vmhub)
3121 {
3122 	struct amdgpu_vm *vm;
3123 	unsigned long flags;
3124 
3125 	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
3126 
3127 	vm = xa_load(&adev->vm_manager.pasids, pasid);
3128 	/* Don't update the fault cache if status is 0.  In the multiple
3129 	 * fault case, subsequent faults will return a 0 status which is
3130 	 * useless for userspace and replaces the useful fault status, so
3131 	 * only update if status is non-0.
3132 	 */
3133 	if (vm && status) {
3134 		vm->fault_info.addr = addr;
3135 		vm->fault_info.status = status;
3136 		/*
3137 		 * Update the fault information globally for later usage
3138 		 * when vm could be stale or freed.
3139 		 */
3140 		adev->vm_manager.fault_info.addr = addr;
3141 		adev->vm_manager.fault_info.vmhub = vmhub;
3142 		adev->vm_manager.fault_info.status = status;
3143 
3144 		if (AMDGPU_IS_GFXHUB(vmhub)) {
3145 			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
3146 			vm->fault_info.vmhub |=
3147 				(vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
3148 		} else if (AMDGPU_IS_MMHUB0(vmhub)) {
3149 			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
3150 			vm->fault_info.vmhub |=
3151 				(vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
3152 		} else if (AMDGPU_IS_MMHUB1(vmhub)) {
3153 			vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
3154 			vm->fault_info.vmhub |=
3155 				(vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
3156 		} else {
3157 			WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
3158 		}
3159 	}
3160 	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
3161 }
3162 
3163 void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
3164 			       struct amdgpu_task_info *task_info)
3165 {
3166 	dev_err(adev->dev,
3167 		" Process %s pid %d thread %s pid %d\n",
3168 		task_info->process_name, task_info->tgid,
3169 		task_info->task.comm, task_info->task.pid);
3170 }
3171 
3172 void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev,
3173 				   const struct amdgpu_vm_pte_funcs *vm_pte_funcs)
3174 {
3175 	struct drm_gpu_scheduler *sched;
3176 	int i;
3177 
3178 	for (i = 0; i < adev->sdma.num_instances; i++) {
3179 		if (adev->sdma.has_page_queue)
3180 			sched = &adev->sdma.instance[i].page.sched;
3181 		else
3182 			sched = &adev->sdma.instance[i].ring.sched;
3183 		adev->vm_manager.vm_pte_scheds[i] = sched;
3184 	}
3185 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
3186 	adev->vm_manager.vm_pte_funcs = vm_pte_funcs;
3187 }
3188