1 /*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28
29 #include <linux/dma-fence-array.h>
30 #include <linux/interval_tree_generic.h>
31 #include <linux/idr.h>
32 #include <linux/dma-buf.h>
33
34 #include <drm/amdgpu_drm.h>
35 #include <drm/drm_drv.h>
36 #include <drm/ttm/ttm_tt.h>
37 #include <drm/drm_exec.h>
38 #include "amdgpu.h"
39 #include "amdgpu_vm.h"
40 #include "amdgpu_trace.h"
41 #include "amdgpu_amdkfd.h"
42 #include "amdgpu_gmc.h"
43 #include "amdgpu_xgmi.h"
44 #include "amdgpu_dma_buf.h"
45 #include "amdgpu_res_cursor.h"
46 #include "kfd_svm.h"
47
48 /**
49 * DOC: GPUVM
50 *
51 * GPUVM is the MMU functionality provided on the GPU.
52 * GPUVM is similar to the legacy GART on older asics, however
53 * rather than there being a single global GART table
54 * for the entire GPU, there can be multiple GPUVM page tables active
55 * at any given time. The GPUVM page tables can contain a mix
56 * VRAM pages and system pages (both memory and MMIO) and system pages
57 * can be mapped as snooped (cached system pages) or unsnooped
58 * (uncached system pages).
59 *
60 * Each active GPUVM has an ID associated with it and there is a page table
61 * linked with each VMID. When executing a command buffer,
62 * the kernel tells the engine what VMID to use for that command
63 * buffer. VMIDs are allocated dynamically as commands are submitted.
64 * The userspace drivers maintain their own address space and the kernel
65 * sets up their pages tables accordingly when they submit their
66 * command buffers and a VMID is assigned.
67 * The hardware supports up to 16 active GPUVMs at any given time.
68 *
69 * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
70 * on the ASIC family. GPUVM supports RWX attributes on each page as well
71 * as other features such as encryption and caching attributes.
72 *
73 * VMID 0 is special. It is the GPUVM used for the kernel driver. In
74 * addition to an aperture managed by a page table, VMID 0 also has
75 * several other apertures. There is an aperture for direct access to VRAM
76 * and there is a legacy AGP aperture which just forwards accesses directly
77 * to the matching system physical addresses (or IOVAs when an IOMMU is
78 * present). These apertures provide direct access to these memories without
79 * incurring the overhead of a page table. VMID 0 is used by the kernel
80 * driver for tasks like memory management.
81 *
82 * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
83 * For user applications, each application can have their own unique GPUVM
84 * address space. The application manages the address space and the kernel
85 * driver manages the GPUVM page tables for each process. If an GPU client
86 * accesses an invalid page, it will generate a GPU page fault, similar to
87 * accessing an invalid page on a CPU.
88 */
89
90 #define START(node) ((node)->start)
91 #define LAST(node) ((node)->last)
92
93 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
94 START, LAST, static, amdgpu_vm_it)
95
96 #undef START
97 #undef LAST
98
99 /**
100 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
101 */
102 struct amdgpu_prt_cb {
103
104 /**
105 * @adev: amdgpu device
106 */
107 struct amdgpu_device *adev;
108
109 /**
110 * @cb: callback
111 */
112 struct dma_fence_cb cb;
113 };
114
115 /**
116 * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence
117 */
118 struct amdgpu_vm_tlb_seq_struct {
119 /**
120 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
121 */
122 struct amdgpu_vm *vm;
123
124 /**
125 * @cb: callback
126 */
127 struct dma_fence_cb cb;
128 };
129
130 /**
131 * amdgpu_vm_assert_locked - check if VM is correctly locked
132 * @vm: the VM which schould be tested
133 *
134 * Asserts that the VM root PD is locked.
135 */
amdgpu_vm_assert_locked(struct amdgpu_vm * vm)136 static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm)
137 {
138 dma_resv_assert_held(vm->root.bo->tbo.base.resv);
139 }
140
141 /**
142 * amdgpu_vm_bo_evicted - vm_bo is evicted
143 *
144 * @vm_bo: vm_bo which is evicted
145 *
146 * State for PDs/PTs and per VM BOs which are not at the location they should
147 * be.
148 */
amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base * vm_bo)149 static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
150 {
151 struct amdgpu_vm *vm = vm_bo->vm;
152 struct amdgpu_bo *bo = vm_bo->bo;
153
154 vm_bo->moved = true;
155 amdgpu_vm_assert_locked(vm);
156 spin_lock(&vm_bo->vm->status_lock);
157 if (bo->tbo.type == ttm_bo_type_kernel)
158 list_move(&vm_bo->vm_status, &vm->evicted);
159 else
160 list_move_tail(&vm_bo->vm_status, &vm->evicted);
161 spin_unlock(&vm_bo->vm->status_lock);
162 }
163 /**
164 * amdgpu_vm_bo_moved - vm_bo is moved
165 *
166 * @vm_bo: vm_bo which is moved
167 *
168 * State for per VM BOs which are moved, but that change is not yet reflected
169 * in the page tables.
170 */
amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base * vm_bo)171 static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
172 {
173 amdgpu_vm_assert_locked(vm_bo->vm);
174 spin_lock(&vm_bo->vm->status_lock);
175 list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
176 spin_unlock(&vm_bo->vm->status_lock);
177 }
178
179 /**
180 * amdgpu_vm_bo_idle - vm_bo is idle
181 *
182 * @vm_bo: vm_bo which is now idle
183 *
184 * State for PDs/PTs and per VM BOs which have gone through the state machine
185 * and are now idle.
186 */
amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base * vm_bo)187 static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
188 {
189 amdgpu_vm_assert_locked(vm_bo->vm);
190 spin_lock(&vm_bo->vm->status_lock);
191 list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
192 spin_unlock(&vm_bo->vm->status_lock);
193 vm_bo->moved = false;
194 }
195
196 /**
197 * amdgpu_vm_bo_invalidated - vm_bo is invalidated
198 *
199 * @vm_bo: vm_bo which is now invalidated
200 *
201 * State for normal BOs which are invalidated and that change not yet reflected
202 * in the PTs.
203 */
amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base * vm_bo)204 static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
205 {
206 spin_lock(&vm_bo->vm->status_lock);
207 list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
208 spin_unlock(&vm_bo->vm->status_lock);
209 }
210
211 /**
212 * amdgpu_vm_bo_evicted_user - vm_bo is evicted
213 *
214 * @vm_bo: vm_bo which is evicted
215 *
216 * State for BOs used by user mode queues which are not at the location they
217 * should be.
218 */
amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base * vm_bo)219 static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo)
220 {
221 vm_bo->moved = true;
222 spin_lock(&vm_bo->vm->status_lock);
223 list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user);
224 spin_unlock(&vm_bo->vm->status_lock);
225 }
226
227 /**
228 * amdgpu_vm_bo_relocated - vm_bo is reloacted
229 *
230 * @vm_bo: vm_bo which is relocated
231 *
232 * State for PDs/PTs which needs to update their parent PD.
233 * For the root PD, just move to idle state.
234 */
amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base * vm_bo)235 static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
236 {
237 amdgpu_vm_assert_locked(vm_bo->vm);
238 if (vm_bo->bo->parent) {
239 spin_lock(&vm_bo->vm->status_lock);
240 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
241 spin_unlock(&vm_bo->vm->status_lock);
242 } else {
243 amdgpu_vm_bo_idle(vm_bo);
244 }
245 }
246
247 /**
248 * amdgpu_vm_bo_done - vm_bo is done
249 *
250 * @vm_bo: vm_bo which is now done
251 *
252 * State for normal BOs which are invalidated and that change has been updated
253 * in the PTs.
254 */
amdgpu_vm_bo_done(struct amdgpu_vm_bo_base * vm_bo)255 static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
256 {
257 amdgpu_vm_assert_locked(vm_bo->vm);
258 spin_lock(&vm_bo->vm->status_lock);
259 list_move(&vm_bo->vm_status, &vm_bo->vm->done);
260 spin_unlock(&vm_bo->vm->status_lock);
261 }
262
263 /**
264 * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine
265 * @vm: the VM which state machine to reset
266 *
267 * Move all vm_bo object in the VM into a state where they will be updated
268 * again during validation.
269 */
amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm * vm)270 static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm)
271 {
272 struct amdgpu_vm_bo_base *vm_bo, *tmp;
273
274 amdgpu_vm_assert_locked(vm);
275
276 spin_lock(&vm->status_lock);
277 list_splice_init(&vm->done, &vm->invalidated);
278 list_for_each_entry(vm_bo, &vm->invalidated, vm_status)
279 vm_bo->moved = true;
280
281 list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) {
282 struct amdgpu_bo *bo = vm_bo->bo;
283
284 vm_bo->moved = true;
285 if (!bo || bo->tbo.type != ttm_bo_type_kernel)
286 list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
287 else if (bo->parent)
288 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
289 }
290 spin_unlock(&vm->status_lock);
291 }
292
293 /**
294 * amdgpu_vm_update_shared - helper to update shared memory stat
295 * @base: base structure for tracking BO usage in a VM
296 *
297 * Takes the vm status_lock and updates the shared memory stat. If the basic
298 * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called
299 * as well.
300 */
amdgpu_vm_update_shared(struct amdgpu_vm_bo_base * base)301 static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base)
302 {
303 struct amdgpu_vm *vm = base->vm;
304 struct amdgpu_bo *bo = base->bo;
305 uint64_t size = amdgpu_bo_size(bo);
306 uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
307 bool shared;
308
309 dma_resv_assert_held(bo->tbo.base.resv);
310 spin_lock(&vm->status_lock);
311 shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
312 if (base->shared != shared) {
313 base->shared = shared;
314 if (shared) {
315 vm->stats[bo_memtype].drm.shared += size;
316 vm->stats[bo_memtype].drm.private -= size;
317 } else {
318 vm->stats[bo_memtype].drm.shared -= size;
319 vm->stats[bo_memtype].drm.private += size;
320 }
321 }
322 spin_unlock(&vm->status_lock);
323 }
324
325 /**
326 * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared
327 * @bo: amdgpu buffer object
328 *
329 * Update the per VM stats for all the vm if needed from private to shared or
330 * vice versa.
331 */
amdgpu_vm_bo_update_shared(struct amdgpu_bo * bo)332 void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo)
333 {
334 struct amdgpu_vm_bo_base *base;
335
336 for (base = bo->vm_bo; base; base = base->next)
337 amdgpu_vm_update_shared(base);
338 }
339
340 /**
341 * amdgpu_vm_update_stats_locked - helper to update normal memory stat
342 * @base: base structure for tracking BO usage in a VM
343 * @res: the ttm_resource to use for the purpose of accounting, may or may not
344 * be bo->tbo.resource
345 * @sign: if we should add (+1) or subtract (-1) from the stat
346 *
347 * Caller need to have the vm status_lock held. Useful for when multiple update
348 * need to happen at the same time.
349 */
amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base * base,struct ttm_resource * res,int sign)350 static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base,
351 struct ttm_resource *res, int sign)
352 {
353 struct amdgpu_vm *vm = base->vm;
354 struct amdgpu_bo *bo = base->bo;
355 int64_t size = sign * amdgpu_bo_size(bo);
356 uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo);
357
358 /* For drm-total- and drm-shared-, BO are accounted by their preferred
359 * placement, see also amdgpu_bo_mem_stats_placement.
360 */
361 if (base->shared)
362 vm->stats[bo_memtype].drm.shared += size;
363 else
364 vm->stats[bo_memtype].drm.private += size;
365
366 if (res && res->mem_type < __AMDGPU_PL_NUM) {
367 uint32_t res_memtype = res->mem_type;
368
369 vm->stats[res_memtype].drm.resident += size;
370 /* BO only count as purgeable if it is resident,
371 * since otherwise there's nothing to purge.
372 */
373 if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE)
374 vm->stats[res_memtype].drm.purgeable += size;
375 if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype)))
376 vm->stats[bo_memtype].evicted += size;
377 }
378 }
379
380 /**
381 * amdgpu_vm_update_stats - helper to update normal memory stat
382 * @base: base structure for tracking BO usage in a VM
383 * @res: the ttm_resource to use for the purpose of accounting, may or may not
384 * be bo->tbo.resource
385 * @sign: if we should add (+1) or subtract (-1) from the stat
386 *
387 * Updates the basic memory stat when bo is added/deleted/moved.
388 */
amdgpu_vm_update_stats(struct amdgpu_vm_bo_base * base,struct ttm_resource * res,int sign)389 void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base,
390 struct ttm_resource *res, int sign)
391 {
392 struct amdgpu_vm *vm = base->vm;
393
394 spin_lock(&vm->status_lock);
395 amdgpu_vm_update_stats_locked(base, res, sign);
396 spin_unlock(&vm->status_lock);
397 }
398
399 /**
400 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
401 *
402 * @base: base structure for tracking BO usage in a VM
403 * @vm: vm to which bo is to be added
404 * @bo: amdgpu buffer object
405 *
406 * Initialize a bo_va_base structure and add it to the appropriate lists
407 *
408 */
amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base * base,struct amdgpu_vm * vm,struct amdgpu_bo * bo)409 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
410 struct amdgpu_vm *vm, struct amdgpu_bo *bo)
411 {
412 base->vm = vm;
413 base->bo = bo;
414 base->next = NULL;
415 INIT_LIST_HEAD(&base->vm_status);
416
417 if (!bo)
418 return;
419 base->next = bo->vm_bo;
420 bo->vm_bo = base;
421
422 spin_lock(&vm->status_lock);
423 base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base);
424 amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1);
425 spin_unlock(&vm->status_lock);
426
427 if (!amdgpu_vm_is_bo_always_valid(vm, bo))
428 return;
429
430 dma_resv_assert_held(vm->root.bo->tbo.base.resv);
431
432 ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
433 if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
434 amdgpu_vm_bo_relocated(base);
435 else
436 amdgpu_vm_bo_idle(base);
437
438 if (bo->preferred_domains &
439 amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
440 return;
441
442 /*
443 * we checked all the prerequisites, but it looks like this per vm bo
444 * is currently evicted. add the bo to the evicted list to make sure it
445 * is validated on next vm use to avoid fault.
446 * */
447 amdgpu_vm_bo_evicted(base);
448 }
449
450 /**
451 * amdgpu_vm_lock_pd - lock PD in drm_exec
452 *
453 * @vm: vm providing the BOs
454 * @exec: drm execution context
455 * @num_fences: number of extra fences to reserve
456 *
457 * Lock the VM root PD in the DRM execution context.
458 */
amdgpu_vm_lock_pd(struct amdgpu_vm * vm,struct drm_exec * exec,unsigned int num_fences)459 int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
460 unsigned int num_fences)
461 {
462 /* We need at least two fences for the VM PD/PT updates */
463 return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
464 2 + num_fences);
465 }
466
467 /**
468 * amdgpu_vm_lock_done_list - lock all BOs on the done list
469 * @vm: vm providing the BOs
470 * @exec: drm execution context
471 * @num_fences: number of extra fences to reserve
472 *
473 * Lock the BOs on the done list in the DRM execution context.
474 */
amdgpu_vm_lock_done_list(struct amdgpu_vm * vm,struct drm_exec * exec,unsigned int num_fences)475 int amdgpu_vm_lock_done_list(struct amdgpu_vm *vm, struct drm_exec *exec,
476 unsigned int num_fences)
477 {
478 struct list_head *prev = &vm->done;
479 struct amdgpu_bo_va *bo_va;
480 struct amdgpu_bo *bo;
481 int ret;
482
483 /* We can only trust prev->next while holding the lock */
484 spin_lock(&vm->status_lock);
485 while (!list_is_head(prev->next, &vm->done)) {
486 bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status);
487 spin_unlock(&vm->status_lock);
488
489 bo = bo_va->base.bo;
490 if (bo) {
491 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1);
492 if (unlikely(ret))
493 return ret;
494 }
495 spin_lock(&vm->status_lock);
496 prev = prev->next;
497 }
498 spin_unlock(&vm->status_lock);
499
500 return 0;
501 }
502
503 /**
504 * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
505 *
506 * @adev: amdgpu device pointer
507 * @vm: vm providing the BOs
508 *
509 * Move all BOs to the end of LRU and remember their positions to put them
510 * together.
511 */
amdgpu_vm_move_to_lru_tail(struct amdgpu_device * adev,struct amdgpu_vm * vm)512 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
513 struct amdgpu_vm *vm)
514 {
515 spin_lock(&adev->mman.bdev.lru_lock);
516 ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
517 spin_unlock(&adev->mman.bdev.lru_lock);
518 }
519
520 /* Create scheduler entities for page table updates */
amdgpu_vm_init_entities(struct amdgpu_device * adev,struct amdgpu_vm * vm)521 static int amdgpu_vm_init_entities(struct amdgpu_device *adev,
522 struct amdgpu_vm *vm)
523 {
524 int r;
525
526 r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
527 adev->vm_manager.vm_pte_scheds,
528 adev->vm_manager.vm_pte_num_scheds, NULL);
529 if (r)
530 goto error;
531
532 return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
533 adev->vm_manager.vm_pte_scheds,
534 adev->vm_manager.vm_pte_num_scheds, NULL);
535
536 error:
537 drm_sched_entity_destroy(&vm->immediate);
538 return r;
539 }
540
541 /* Destroy the entities for page table updates again */
amdgpu_vm_fini_entities(struct amdgpu_vm * vm)542 static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm)
543 {
544 drm_sched_entity_destroy(&vm->immediate);
545 drm_sched_entity_destroy(&vm->delayed);
546 }
547
548 /**
549 * amdgpu_vm_generation - return the page table re-generation counter
550 * @adev: the amdgpu_device
551 * @vm: optional VM to check, might be NULL
552 *
553 * Returns a page table re-generation token to allow checking if submissions
554 * are still valid to use this VM. The VM parameter might be NULL in which case
555 * just the VRAM lost counter will be used.
556 */
amdgpu_vm_generation(struct amdgpu_device * adev,struct amdgpu_vm * vm)557 uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
558 {
559 uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32;
560
561 if (!vm)
562 return result;
563
564 result += lower_32_bits(vm->generation);
565 /* Add one if the page tables will be re-generated on next CS */
566 if (drm_sched_entity_error(&vm->delayed))
567 ++result;
568
569 return result;
570 }
571
572 /**
573 * amdgpu_vm_validate - validate evicted BOs tracked in the VM
574 *
575 * @adev: amdgpu device pointer
576 * @vm: vm providing the BOs
577 * @ticket: optional reservation ticket used to reserve the VM
578 * @validate: callback to do the validation
579 * @param: parameter for the validation callback
580 *
581 * Validate the page table BOs and per-VM BOs on command submission if
582 * necessary. If a ticket is given, also try to validate evicted user queue
583 * BOs. They must already be reserved with the given ticket.
584 *
585 * Returns:
586 * Validation result.
587 */
amdgpu_vm_validate(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct ww_acquire_ctx * ticket,int (* validate)(void * p,struct amdgpu_bo * bo),void * param)588 int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
589 struct ww_acquire_ctx *ticket,
590 int (*validate)(void *p, struct amdgpu_bo *bo),
591 void *param)
592 {
593 uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
594 struct amdgpu_vm_bo_base *bo_base;
595 struct amdgpu_bo *bo;
596 int r;
597
598 if (vm->generation != new_vm_generation) {
599 vm->generation = new_vm_generation;
600 amdgpu_vm_bo_reset_state_machine(vm);
601 amdgpu_vm_fini_entities(vm);
602 r = amdgpu_vm_init_entities(adev, vm);
603 if (r)
604 return r;
605 }
606
607 spin_lock(&vm->status_lock);
608 while (!list_empty(&vm->evicted)) {
609 bo_base = list_first_entry(&vm->evicted,
610 struct amdgpu_vm_bo_base,
611 vm_status);
612 spin_unlock(&vm->status_lock);
613
614 bo = bo_base->bo;
615
616 r = validate(param, bo);
617 if (r)
618 return r;
619
620 if (bo->tbo.type != ttm_bo_type_kernel) {
621 amdgpu_vm_bo_moved(bo_base);
622 } else {
623 vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
624 amdgpu_vm_bo_relocated(bo_base);
625 }
626 spin_lock(&vm->status_lock);
627 }
628 while (ticket && !list_empty(&vm->evicted_user)) {
629 bo_base = list_first_entry(&vm->evicted_user,
630 struct amdgpu_vm_bo_base,
631 vm_status);
632 spin_unlock(&vm->status_lock);
633
634 bo = bo_base->bo;
635 dma_resv_assert_held(bo->tbo.base.resv);
636
637 r = validate(param, bo);
638 if (r)
639 return r;
640
641 amdgpu_vm_bo_invalidated(bo_base);
642
643 spin_lock(&vm->status_lock);
644 }
645 spin_unlock(&vm->status_lock);
646
647 amdgpu_vm_eviction_lock(vm);
648 vm->evicting = false;
649 amdgpu_vm_eviction_unlock(vm);
650
651 return 0;
652 }
653
654 /**
655 * amdgpu_vm_ready - check VM is ready for updates
656 *
657 * @vm: VM to check
658 *
659 * Check if all VM PDs/PTs are ready for updates
660 *
661 * Returns:
662 * True if VM is not evicting and all VM entities are not stopped
663 */
amdgpu_vm_ready(struct amdgpu_vm * vm)664 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
665 {
666 bool ret;
667
668 amdgpu_vm_assert_locked(vm);
669
670 amdgpu_vm_eviction_lock(vm);
671 ret = !vm->evicting;
672 amdgpu_vm_eviction_unlock(vm);
673
674 spin_lock(&vm->status_lock);
675 ret &= list_empty(&vm->evicted);
676 spin_unlock(&vm->status_lock);
677
678 spin_lock(&vm->immediate.lock);
679 ret &= !vm->immediate.stopped;
680 spin_unlock(&vm->immediate.lock);
681
682 spin_lock(&vm->delayed.lock);
683 ret &= !vm->delayed.stopped;
684 spin_unlock(&vm->delayed.lock);
685
686 return ret;
687 }
688
689 /**
690 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
691 *
692 * @adev: amdgpu_device pointer
693 */
amdgpu_vm_check_compute_bug(struct amdgpu_device * adev)694 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
695 {
696 const struct amdgpu_ip_block *ip_block;
697 bool has_compute_vm_bug;
698 struct amdgpu_ring *ring;
699 int i;
700
701 has_compute_vm_bug = false;
702
703 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
704 if (ip_block) {
705 /* Compute has a VM bug for GFX version < 7.
706 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
707 if (ip_block->version->major <= 7)
708 has_compute_vm_bug = true;
709 else if (ip_block->version->major == 8)
710 if (adev->gfx.mec_fw_version < 673)
711 has_compute_vm_bug = true;
712 }
713
714 for (i = 0; i < adev->num_rings; i++) {
715 ring = adev->rings[i];
716 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
717 /* only compute rings */
718 ring->has_compute_vm_bug = has_compute_vm_bug;
719 else
720 ring->has_compute_vm_bug = false;
721 }
722 }
723
724 /**
725 * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
726 *
727 * @ring: ring on which the job will be submitted
728 * @job: job to submit
729 *
730 * Returns:
731 * True if sync is needed.
732 */
amdgpu_vm_need_pipeline_sync(struct amdgpu_ring * ring,struct amdgpu_job * job)733 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
734 struct amdgpu_job *job)
735 {
736 struct amdgpu_device *adev = ring->adev;
737 unsigned vmhub = ring->vm_hub;
738 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
739
740 if (job->vmid == 0)
741 return false;
742
743 if (job->vm_needs_flush || ring->has_compute_vm_bug)
744 return true;
745
746 if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
747 return true;
748
749 if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
750 return true;
751
752 return false;
753 }
754
755 /**
756 * amdgpu_vm_flush - hardware flush the vm
757 *
758 * @ring: ring to use for flush
759 * @job: related job
760 * @need_pipe_sync: is pipe sync needed
761 *
762 * Emit a VM flush when it is necessary.
763 *
764 * Returns:
765 * 0 on success, errno otherwise.
766 */
amdgpu_vm_flush(struct amdgpu_ring * ring,struct amdgpu_job * job,bool need_pipe_sync)767 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
768 bool need_pipe_sync)
769 {
770 struct amdgpu_device *adev = ring->adev;
771 struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
772 unsigned vmhub = ring->vm_hub;
773 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
774 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
775 bool spm_update_needed = job->spm_update_needed;
776 bool gds_switch_needed = ring->funcs->emit_gds_switch &&
777 job->gds_switch_needed;
778 bool vm_flush_needed = job->vm_needs_flush;
779 bool cleaner_shader_needed = false;
780 bool pasid_mapping_needed = false;
781 struct dma_fence *fence = NULL;
782 struct amdgpu_fence *af;
783 unsigned int patch;
784 int r;
785
786 if (amdgpu_vmid_had_gpu_reset(adev, id)) {
787 gds_switch_needed = true;
788 vm_flush_needed = true;
789 pasid_mapping_needed = true;
790 spm_update_needed = true;
791 }
792
793 mutex_lock(&id_mgr->lock);
794 if (id->pasid != job->pasid || !id->pasid_mapping ||
795 !dma_fence_is_signaled(id->pasid_mapping))
796 pasid_mapping_needed = true;
797 mutex_unlock(&id_mgr->lock);
798
799 gds_switch_needed &= !!ring->funcs->emit_gds_switch;
800 vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
801 job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
802 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
803 ring->funcs->emit_wreg;
804
805 cleaner_shader_needed = job->run_cleaner_shader &&
806 adev->gfx.enable_cleaner_shader &&
807 ring->funcs->emit_cleaner_shader && job->base.s_fence &&
808 &job->base.s_fence->scheduled == isolation->spearhead;
809
810 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
811 !cleaner_shader_needed)
812 return 0;
813
814 amdgpu_ring_ib_begin(ring);
815 if (ring->funcs->init_cond_exec)
816 patch = amdgpu_ring_init_cond_exec(ring,
817 ring->cond_exe_gpu_addr);
818
819 if (need_pipe_sync)
820 amdgpu_ring_emit_pipeline_sync(ring);
821
822 if (cleaner_shader_needed)
823 ring->funcs->emit_cleaner_shader(ring);
824
825 if (vm_flush_needed) {
826 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
827 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
828 }
829
830 if (pasid_mapping_needed)
831 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
832
833 if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
834 adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid);
835
836 if (ring->funcs->emit_gds_switch &&
837 gds_switch_needed) {
838 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
839 job->gds_size, job->gws_base,
840 job->gws_size, job->oa_base,
841 job->oa_size);
842 }
843
844 if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
845 r = amdgpu_fence_emit(ring, &fence, NULL, 0);
846 if (r)
847 return r;
848 /* this is part of the job's context */
849 af = container_of(fence, struct amdgpu_fence, base);
850 af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0;
851 }
852
853 if (vm_flush_needed) {
854 mutex_lock(&id_mgr->lock);
855 dma_fence_put(id->last_flush);
856 id->last_flush = dma_fence_get(fence);
857 id->current_gpu_reset_count =
858 atomic_read(&adev->gpu_reset_counter);
859 mutex_unlock(&id_mgr->lock);
860 }
861
862 if (pasid_mapping_needed) {
863 mutex_lock(&id_mgr->lock);
864 id->pasid = job->pasid;
865 dma_fence_put(id->pasid_mapping);
866 id->pasid_mapping = dma_fence_get(fence);
867 mutex_unlock(&id_mgr->lock);
868 }
869
870 /*
871 * Make sure that all other submissions wait for the cleaner shader to
872 * finish before we push them to the HW.
873 */
874 if (cleaner_shader_needed) {
875 trace_amdgpu_cleaner_shader(ring, fence);
876 mutex_lock(&adev->enforce_isolation_mutex);
877 dma_fence_put(isolation->spearhead);
878 isolation->spearhead = dma_fence_get(fence);
879 mutex_unlock(&adev->enforce_isolation_mutex);
880 }
881 dma_fence_put(fence);
882
883 amdgpu_ring_patch_cond_exec(ring, patch);
884
885 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
886 if (ring->funcs->emit_switch_buffer) {
887 amdgpu_ring_emit_switch_buffer(ring);
888 amdgpu_ring_emit_switch_buffer(ring);
889 }
890
891 amdgpu_ring_ib_end(ring);
892 return 0;
893 }
894
895 /**
896 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
897 *
898 * @vm: requested vm
899 * @bo: requested buffer object
900 *
901 * Find @bo inside the requested vm.
902 * Search inside the @bos vm list for the requested vm
903 * Returns the found bo_va or NULL if none is found
904 *
905 * Object has to be reserved!
906 *
907 * Returns:
908 * Found bo_va or NULL.
909 */
amdgpu_vm_bo_find(struct amdgpu_vm * vm,struct amdgpu_bo * bo)910 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
911 struct amdgpu_bo *bo)
912 {
913 struct amdgpu_vm_bo_base *base;
914
915 for (base = bo->vm_bo; base; base = base->next) {
916 if (base->vm != vm)
917 continue;
918
919 return container_of(base, struct amdgpu_bo_va, base);
920 }
921 return NULL;
922 }
923
924 /**
925 * amdgpu_vm_map_gart - Resolve gart mapping of addr
926 *
927 * @pages_addr: optional DMA address to use for lookup
928 * @addr: the unmapped addr
929 *
930 * Look up the physical address of the page that the pte resolves
931 * to.
932 *
933 * Returns:
934 * The pointer for the page table entry.
935 */
amdgpu_vm_map_gart(const dma_addr_t * pages_addr,uint64_t addr)936 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
937 {
938 uint64_t result;
939
940 /* page table offset */
941 result = pages_addr[addr >> PAGE_SHIFT];
942
943 /* in case cpu page size != gpu page size*/
944 result |= addr & (~PAGE_MASK);
945
946 result &= 0xFFFFFFFFFFFFF000ULL;
947
948 return result;
949 }
950
951 /**
952 * amdgpu_vm_update_pdes - make sure that all directories are valid
953 *
954 * @adev: amdgpu_device pointer
955 * @vm: requested vm
956 * @immediate: submit immediately to the paging queue
957 *
958 * Makes sure all directories are up to date.
959 *
960 * Returns:
961 * 0 for success, error for failure.
962 */
amdgpu_vm_update_pdes(struct amdgpu_device * adev,struct amdgpu_vm * vm,bool immediate)963 int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
964 struct amdgpu_vm *vm, bool immediate)
965 {
966 struct amdgpu_vm_update_params params;
967 struct amdgpu_vm_bo_base *entry;
968 bool flush_tlb_needed = false;
969 LIST_HEAD(relocated);
970 int r, idx;
971
972 amdgpu_vm_assert_locked(vm);
973
974 spin_lock(&vm->status_lock);
975 list_splice_init(&vm->relocated, &relocated);
976 spin_unlock(&vm->status_lock);
977
978 if (list_empty(&relocated))
979 return 0;
980
981 if (!drm_dev_enter(adev_to_drm(adev), &idx))
982 return -ENODEV;
983
984 memset(¶ms, 0, sizeof(params));
985 params.adev = adev;
986 params.vm = vm;
987 params.immediate = immediate;
988
989 r = vm->update_funcs->prepare(¶ms, NULL,
990 AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES);
991 if (r)
992 goto error;
993
994 list_for_each_entry(entry, &relocated, vm_status) {
995 /* vm_flush_needed after updating moved PDEs */
996 flush_tlb_needed |= entry->moved;
997
998 r = amdgpu_vm_pde_update(¶ms, entry);
999 if (r)
1000 goto error;
1001 }
1002
1003 r = vm->update_funcs->commit(¶ms, &vm->last_update);
1004 if (r)
1005 goto error;
1006
1007 if (flush_tlb_needed)
1008 atomic64_inc(&vm->tlb_seq);
1009
1010 while (!list_empty(&relocated)) {
1011 entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
1012 vm_status);
1013 amdgpu_vm_bo_idle(entry);
1014 }
1015
1016 error:
1017 drm_dev_exit(idx);
1018 return r;
1019 }
1020
1021 /**
1022 * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
1023 * @fence: unused
1024 * @cb: the callback structure
1025 *
1026 * Increments the tlb sequence to make sure that future CS execute a VM flush.
1027 */
amdgpu_vm_tlb_seq_cb(struct dma_fence * fence,struct dma_fence_cb * cb)1028 static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
1029 struct dma_fence_cb *cb)
1030 {
1031 struct amdgpu_vm_tlb_seq_struct *tlb_cb;
1032
1033 tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
1034 atomic64_inc(&tlb_cb->vm->tlb_seq);
1035 kfree(tlb_cb);
1036 }
1037
1038 /**
1039 * amdgpu_vm_tlb_flush - prepare TLB flush
1040 *
1041 * @params: parameters for update
1042 * @fence: input fence to sync TLB flush with
1043 * @tlb_cb: the callback structure
1044 *
1045 * Increments the tlb sequence to make sure that future CS execute a VM flush.
1046 */
1047 static void
amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params * params,struct dma_fence ** fence,struct amdgpu_vm_tlb_seq_struct * tlb_cb)1048 amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
1049 struct dma_fence **fence,
1050 struct amdgpu_vm_tlb_seq_struct *tlb_cb)
1051 {
1052 struct amdgpu_vm *vm = params->vm;
1053
1054 tlb_cb->vm = vm;
1055 if (!fence || !*fence) {
1056 amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
1057 return;
1058 }
1059
1060 if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
1061 amdgpu_vm_tlb_seq_cb)) {
1062 dma_fence_put(vm->last_tlb_flush);
1063 vm->last_tlb_flush = dma_fence_get(*fence);
1064 } else {
1065 amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
1066 }
1067
1068 /* Prepare a TLB flush fence to be attached to PTs */
1069 if (!params->unlocked && vm->is_compute_context) {
1070 amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
1071
1072 /* Makes sure no PD/PT is freed before the flush */
1073 dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
1074 DMA_RESV_USAGE_BOOKKEEP);
1075 }
1076 }
1077
1078 /**
1079 * amdgpu_vm_update_range - update a range in the vm page table
1080 *
1081 * @adev: amdgpu_device pointer to use for commands
1082 * @vm: the VM to update the range
1083 * @immediate: immediate submission in a page fault
1084 * @unlocked: unlocked invalidation during MM callback
1085 * @flush_tlb: trigger tlb invalidation after update completed
1086 * @allow_override: change MTYPE for local NUMA nodes
1087 * @sync: fences we need to sync to
1088 * @start: start of mapped range
1089 * @last: last mapped entry
1090 * @flags: flags for the entries
1091 * @offset: offset into nodes and pages_addr
1092 * @vram_base: base for vram mappings
1093 * @res: ttm_resource to map
1094 * @pages_addr: DMA addresses to use for mapping
1095 * @fence: optional resulting fence
1096 *
1097 * Fill in the page table entries between @start and @last.
1098 *
1099 * Returns:
1100 * 0 for success, negative erro code for failure.
1101 */
amdgpu_vm_update_range(struct amdgpu_device * adev,struct amdgpu_vm * vm,bool immediate,bool unlocked,bool flush_tlb,bool allow_override,struct amdgpu_sync * sync,uint64_t start,uint64_t last,uint64_t flags,uint64_t offset,uint64_t vram_base,struct ttm_resource * res,dma_addr_t * pages_addr,struct dma_fence ** fence)1102 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1103 bool immediate, bool unlocked, bool flush_tlb,
1104 bool allow_override, struct amdgpu_sync *sync,
1105 uint64_t start, uint64_t last, uint64_t flags,
1106 uint64_t offset, uint64_t vram_base,
1107 struct ttm_resource *res, dma_addr_t *pages_addr,
1108 struct dma_fence **fence)
1109 {
1110 struct amdgpu_vm_tlb_seq_struct *tlb_cb;
1111 struct amdgpu_vm_update_params params;
1112 struct amdgpu_res_cursor cursor;
1113 int r, idx;
1114
1115 if (!drm_dev_enter(adev_to_drm(adev), &idx))
1116 return -ENODEV;
1117
1118 tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
1119 if (!tlb_cb) {
1120 drm_dev_exit(idx);
1121 return -ENOMEM;
1122 }
1123
1124 /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
1125 * heavy-weight flush TLB unconditionally.
1126 */
1127 flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
1128 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0);
1129
1130 /*
1131 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
1132 */
1133 flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0);
1134
1135 memset(¶ms, 0, sizeof(params));
1136 params.adev = adev;
1137 params.vm = vm;
1138 params.immediate = immediate;
1139 params.pages_addr = pages_addr;
1140 params.unlocked = unlocked;
1141 params.needs_flush = flush_tlb;
1142 params.allow_override = allow_override;
1143 INIT_LIST_HEAD(¶ms.tlb_flush_waitlist);
1144
1145 amdgpu_vm_eviction_lock(vm);
1146 if (vm->evicting) {
1147 r = -EBUSY;
1148 goto error_free;
1149 }
1150
1151 if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
1152 struct dma_fence *tmp = dma_fence_get_stub();
1153
1154 amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true);
1155 swap(vm->last_unlocked, tmp);
1156 dma_fence_put(tmp);
1157 }
1158
1159 r = vm->update_funcs->prepare(¶ms, sync,
1160 AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE);
1161 if (r)
1162 goto error_free;
1163
1164 amdgpu_res_first(pages_addr ? NULL : res, offset,
1165 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
1166 while (cursor.remaining) {
1167 uint64_t tmp, num_entries, addr;
1168
1169 num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
1170 if (pages_addr) {
1171 bool contiguous = true;
1172
1173 if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
1174 uint64_t pfn = cursor.start >> PAGE_SHIFT;
1175 uint64_t count;
1176
1177 contiguous = pages_addr[pfn + 1] ==
1178 pages_addr[pfn] + PAGE_SIZE;
1179
1180 tmp = num_entries /
1181 AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1182 for (count = 2; count < tmp; ++count) {
1183 uint64_t idx = pfn + count;
1184
1185 if (contiguous != (pages_addr[idx] ==
1186 pages_addr[idx - 1] + PAGE_SIZE))
1187 break;
1188 }
1189 if (!contiguous)
1190 count--;
1191 num_entries = count *
1192 AMDGPU_GPU_PAGES_IN_CPU_PAGE;
1193 }
1194
1195 if (!contiguous) {
1196 addr = cursor.start;
1197 params.pages_addr = pages_addr;
1198 } else {
1199 addr = pages_addr[cursor.start >> PAGE_SHIFT];
1200 params.pages_addr = NULL;
1201 }
1202
1203 } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
1204 addr = vram_base + cursor.start;
1205 } else {
1206 addr = 0;
1207 }
1208
1209 tmp = start + num_entries;
1210 r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags);
1211 if (r)
1212 goto error_free;
1213
1214 amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
1215 start = tmp;
1216 }
1217
1218 r = vm->update_funcs->commit(¶ms, fence);
1219 if (r)
1220 goto error_free;
1221
1222 if (params.needs_flush) {
1223 amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb);
1224 tlb_cb = NULL;
1225 }
1226
1227 amdgpu_vm_pt_free_list(adev, ¶ms);
1228
1229 error_free:
1230 kfree(tlb_cb);
1231 amdgpu_vm_eviction_unlock(vm);
1232 drm_dev_exit(idx);
1233 return r;
1234 }
1235
amdgpu_vm_get_memory(struct amdgpu_vm * vm,struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])1236 void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
1237 struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM])
1238 {
1239 spin_lock(&vm->status_lock);
1240 memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM);
1241 spin_unlock(&vm->status_lock);
1242 }
1243
1244 /**
1245 * amdgpu_vm_bo_update - update all BO mappings in the vm page table
1246 *
1247 * @adev: amdgpu_device pointer
1248 * @bo_va: requested BO and VM object
1249 * @clear: if true clear the entries
1250 *
1251 * Fill in the page table entries for @bo_va.
1252 *
1253 * Returns:
1254 * 0 for success, -EINVAL for failure.
1255 */
amdgpu_vm_bo_update(struct amdgpu_device * adev,struct amdgpu_bo_va * bo_va,bool clear)1256 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
1257 bool clear)
1258 {
1259 struct amdgpu_bo *bo = bo_va->base.bo;
1260 struct amdgpu_vm *vm = bo_va->base.vm;
1261 struct amdgpu_bo_va_mapping *mapping;
1262 struct dma_fence **last_update;
1263 dma_addr_t *pages_addr = NULL;
1264 struct ttm_resource *mem;
1265 struct amdgpu_sync sync;
1266 bool flush_tlb = clear;
1267 uint64_t vram_base;
1268 uint64_t flags;
1269 bool uncached;
1270 int r;
1271
1272 amdgpu_sync_create(&sync);
1273 if (clear) {
1274 mem = NULL;
1275
1276 /* Implicitly sync to command submissions in the same VM before
1277 * unmapping.
1278 */
1279 r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
1280 AMDGPU_SYNC_EQ_OWNER, vm);
1281 if (r)
1282 goto error_free;
1283 if (bo) {
1284 r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
1285 if (r)
1286 goto error_free;
1287 }
1288 } else if (!bo) {
1289 mem = NULL;
1290
1291 /* PRT map operations don't need to sync to anything. */
1292
1293 } else {
1294 struct drm_gem_object *obj = &bo->tbo.base;
1295
1296 if (drm_gem_is_imported(obj) && bo_va->is_xgmi) {
1297 struct dma_buf *dma_buf = obj->import_attach->dmabuf;
1298 struct drm_gem_object *gobj = dma_buf->priv;
1299 struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
1300
1301 if (abo->tbo.resource &&
1302 abo->tbo.resource->mem_type == TTM_PL_VRAM)
1303 bo = gem_to_amdgpu_bo(gobj);
1304 }
1305 mem = bo->tbo.resource;
1306 if (mem && (mem->mem_type == TTM_PL_TT ||
1307 mem->mem_type == AMDGPU_PL_PREEMPT))
1308 pages_addr = bo->tbo.ttm->dma_address;
1309
1310 /* Implicitly sync to moving fences before mapping anything */
1311 r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
1312 AMDGPU_SYNC_EXPLICIT, vm);
1313 if (r)
1314 goto error_free;
1315 }
1316
1317 if (bo) {
1318 struct amdgpu_device *bo_adev;
1319
1320 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
1321
1322 if (amdgpu_bo_encrypted(bo))
1323 flags |= AMDGPU_PTE_TMZ;
1324
1325 bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
1326 vram_base = bo_adev->vm_manager.vram_base_offset;
1327 uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0;
1328 } else {
1329 flags = 0x0;
1330 vram_base = 0;
1331 uncached = false;
1332 }
1333
1334 if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
1335 last_update = &vm->last_update;
1336 else
1337 last_update = &bo_va->last_pt_update;
1338
1339 if (!clear && bo_va->base.moved) {
1340 flush_tlb = true;
1341 list_splice_init(&bo_va->valids, &bo_va->invalids);
1342
1343 } else if (bo_va->cleared != clear) {
1344 list_splice_init(&bo_va->valids, &bo_va->invalids);
1345 }
1346
1347 list_for_each_entry(mapping, &bo_va->invalids, list) {
1348 uint64_t update_flags = flags;
1349
1350 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
1351 * but in case of something, we filter the flags in first place
1352 */
1353 if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
1354 update_flags &= ~AMDGPU_PTE_READABLE;
1355 if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
1356 update_flags &= ~AMDGPU_PTE_WRITEABLE;
1357
1358 /* Apply ASIC specific mapping flags */
1359 amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
1360 &update_flags);
1361
1362 trace_amdgpu_vm_bo_update(mapping);
1363
1364 r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
1365 !uncached, &sync, mapping->start,
1366 mapping->last, update_flags,
1367 mapping->offset, vram_base, mem,
1368 pages_addr, last_update);
1369 if (r)
1370 goto error_free;
1371 }
1372
1373 /* If the BO is not in its preferred location add it back to
1374 * the evicted list so that it gets validated again on the
1375 * next command submission.
1376 */
1377 if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
1378 if (bo->tbo.resource &&
1379 !(bo->preferred_domains &
1380 amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)))
1381 amdgpu_vm_bo_evicted(&bo_va->base);
1382 else
1383 amdgpu_vm_bo_idle(&bo_va->base);
1384 } else {
1385 amdgpu_vm_bo_done(&bo_va->base);
1386 }
1387
1388 list_splice_init(&bo_va->invalids, &bo_va->valids);
1389 bo_va->cleared = clear;
1390 bo_va->base.moved = false;
1391
1392 if (trace_amdgpu_vm_bo_mapping_enabled()) {
1393 list_for_each_entry(mapping, &bo_va->valids, list)
1394 trace_amdgpu_vm_bo_mapping(mapping);
1395 }
1396
1397 error_free:
1398 amdgpu_sync_free(&sync);
1399 return r;
1400 }
1401
1402 /**
1403 * amdgpu_vm_update_prt_state - update the global PRT state
1404 *
1405 * @adev: amdgpu_device pointer
1406 */
amdgpu_vm_update_prt_state(struct amdgpu_device * adev)1407 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
1408 {
1409 unsigned long flags;
1410 bool enable;
1411
1412 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
1413 enable = !!atomic_read(&adev->vm_manager.num_prt_users);
1414 adev->gmc.gmc_funcs->set_prt(adev, enable);
1415 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
1416 }
1417
1418 /**
1419 * amdgpu_vm_prt_get - add a PRT user
1420 *
1421 * @adev: amdgpu_device pointer
1422 */
amdgpu_vm_prt_get(struct amdgpu_device * adev)1423 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
1424 {
1425 if (!adev->gmc.gmc_funcs->set_prt)
1426 return;
1427
1428 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
1429 amdgpu_vm_update_prt_state(adev);
1430 }
1431
1432 /**
1433 * amdgpu_vm_prt_put - drop a PRT user
1434 *
1435 * @adev: amdgpu_device pointer
1436 */
amdgpu_vm_prt_put(struct amdgpu_device * adev)1437 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
1438 {
1439 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0)
1440 amdgpu_vm_update_prt_state(adev);
1441 }
1442
1443 /**
1444 * amdgpu_vm_prt_cb - callback for updating the PRT status
1445 *
1446 * @fence: fence for the callback
1447 * @_cb: the callback function
1448 */
amdgpu_vm_prt_cb(struct dma_fence * fence,struct dma_fence_cb * _cb)1449 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
1450 {
1451 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
1452
1453 amdgpu_vm_prt_put(cb->adev);
1454 kfree(cb);
1455 }
1456
1457 /**
1458 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
1459 *
1460 * @adev: amdgpu_device pointer
1461 * @fence: fence for the callback
1462 */
amdgpu_vm_add_prt_cb(struct amdgpu_device * adev,struct dma_fence * fence)1463 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
1464 struct dma_fence *fence)
1465 {
1466 struct amdgpu_prt_cb *cb;
1467
1468 if (!adev->gmc.gmc_funcs->set_prt)
1469 return;
1470
1471 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
1472 if (!cb) {
1473 /* Last resort when we are OOM */
1474 if (fence)
1475 dma_fence_wait(fence, false);
1476
1477 amdgpu_vm_prt_put(adev);
1478 } else {
1479 cb->adev = adev;
1480 if (!fence || dma_fence_add_callback(fence, &cb->cb,
1481 amdgpu_vm_prt_cb))
1482 amdgpu_vm_prt_cb(fence, &cb->cb);
1483 }
1484 }
1485
1486 /**
1487 * amdgpu_vm_free_mapping - free a mapping
1488 *
1489 * @adev: amdgpu_device pointer
1490 * @vm: requested vm
1491 * @mapping: mapping to be freed
1492 * @fence: fence of the unmap operation
1493 *
1494 * Free a mapping and make sure we decrease the PRT usage count if applicable.
1495 */
amdgpu_vm_free_mapping(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct amdgpu_bo_va_mapping * mapping,struct dma_fence * fence)1496 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
1497 struct amdgpu_vm *vm,
1498 struct amdgpu_bo_va_mapping *mapping,
1499 struct dma_fence *fence)
1500 {
1501 if (mapping->flags & AMDGPU_VM_PAGE_PRT)
1502 amdgpu_vm_add_prt_cb(adev, fence);
1503 kfree(mapping);
1504 }
1505
1506 /**
1507 * amdgpu_vm_prt_fini - finish all prt mappings
1508 *
1509 * @adev: amdgpu_device pointer
1510 * @vm: requested vm
1511 *
1512 * Register a cleanup callback to disable PRT support after VM dies.
1513 */
amdgpu_vm_prt_fini(struct amdgpu_device * adev,struct amdgpu_vm * vm)1514 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
1515 {
1516 struct dma_resv *resv = vm->root.bo->tbo.base.resv;
1517 struct dma_resv_iter cursor;
1518 struct dma_fence *fence;
1519
1520 dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
1521 /* Add a callback for each fence in the reservation object */
1522 amdgpu_vm_prt_get(adev);
1523 amdgpu_vm_add_prt_cb(adev, fence);
1524 }
1525 }
1526
1527 /**
1528 * amdgpu_vm_clear_freed - clear freed BOs in the PT
1529 *
1530 * @adev: amdgpu_device pointer
1531 * @vm: requested vm
1532 * @fence: optional resulting fence (unchanged if no work needed to be done
1533 * or if an error occurred)
1534 *
1535 * Make sure all freed BOs are cleared in the PT.
1536 * PTs have to be reserved and mutex must be locked!
1537 *
1538 * Returns:
1539 * 0 for success.
1540 *
1541 */
amdgpu_vm_clear_freed(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct dma_fence ** fence)1542 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
1543 struct amdgpu_vm *vm,
1544 struct dma_fence **fence)
1545 {
1546 struct amdgpu_bo_va_mapping *mapping;
1547 struct dma_fence *f = NULL;
1548 struct amdgpu_sync sync;
1549 int r;
1550
1551
1552 /*
1553 * Implicitly sync to command submissions in the same VM before
1554 * unmapping.
1555 */
1556 amdgpu_sync_create(&sync);
1557 r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
1558 AMDGPU_SYNC_EQ_OWNER, vm);
1559 if (r)
1560 goto error_free;
1561
1562 while (!list_empty(&vm->freed)) {
1563 mapping = list_first_entry(&vm->freed,
1564 struct amdgpu_bo_va_mapping, list);
1565 list_del(&mapping->list);
1566
1567 r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
1568 &sync, mapping->start, mapping->last,
1569 0, 0, 0, NULL, NULL, &f);
1570 amdgpu_vm_free_mapping(adev, vm, mapping, f);
1571 if (r) {
1572 dma_fence_put(f);
1573 goto error_free;
1574 }
1575 }
1576
1577 if (fence && f) {
1578 dma_fence_put(*fence);
1579 *fence = f;
1580 } else {
1581 dma_fence_put(f);
1582 }
1583
1584 error_free:
1585 amdgpu_sync_free(&sync);
1586 return r;
1587
1588 }
1589
1590 /**
1591 * amdgpu_vm_handle_moved - handle moved BOs in the PT
1592 *
1593 * @adev: amdgpu_device pointer
1594 * @vm: requested vm
1595 * @ticket: optional reservation ticket used to reserve the VM
1596 *
1597 * Make sure all BOs which are moved are updated in the PTs.
1598 *
1599 * Returns:
1600 * 0 for success.
1601 *
1602 * PTs have to be reserved!
1603 */
amdgpu_vm_handle_moved(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct ww_acquire_ctx * ticket)1604 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
1605 struct amdgpu_vm *vm,
1606 struct ww_acquire_ctx *ticket)
1607 {
1608 struct amdgpu_bo_va *bo_va;
1609 struct dma_resv *resv;
1610 bool clear, unlock;
1611 int r;
1612
1613 spin_lock(&vm->status_lock);
1614 while (!list_empty(&vm->moved)) {
1615 bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
1616 base.vm_status);
1617 spin_unlock(&vm->status_lock);
1618
1619 /* Per VM BOs never need to bo cleared in the page tables */
1620 r = amdgpu_vm_bo_update(adev, bo_va, false);
1621 if (r)
1622 return r;
1623 spin_lock(&vm->status_lock);
1624 }
1625
1626 while (!list_empty(&vm->invalidated)) {
1627 bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
1628 base.vm_status);
1629 resv = bo_va->base.bo->tbo.base.resv;
1630 spin_unlock(&vm->status_lock);
1631
1632 /* Try to reserve the BO to avoid clearing its ptes */
1633 if (!adev->debug_vm && dma_resv_trylock(resv)) {
1634 clear = false;
1635 unlock = true;
1636 /* The caller is already holding the reservation lock */
1637 } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
1638 clear = false;
1639 unlock = false;
1640 /* Somebody else is using the BO right now */
1641 } else {
1642 clear = true;
1643 unlock = false;
1644 }
1645
1646 r = amdgpu_vm_bo_update(adev, bo_va, clear);
1647
1648 if (unlock)
1649 dma_resv_unlock(resv);
1650 if (r)
1651 return r;
1652
1653 /* Remember evicted DMABuf imports in compute VMs for later
1654 * validation
1655 */
1656 if (vm->is_compute_context &&
1657 drm_gem_is_imported(&bo_va->base.bo->tbo.base) &&
1658 (!bo_va->base.bo->tbo.resource ||
1659 bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM))
1660 amdgpu_vm_bo_evicted_user(&bo_va->base);
1661
1662 spin_lock(&vm->status_lock);
1663 }
1664 spin_unlock(&vm->status_lock);
1665
1666 return 0;
1667 }
1668
1669 /**
1670 * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
1671 *
1672 * @adev: amdgpu_device pointer
1673 * @vm: requested vm
1674 * @flush_type: flush type
1675 * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush.
1676 *
1677 * Flush TLB if needed for a compute VM.
1678 *
1679 * Returns:
1680 * 0 for success.
1681 */
amdgpu_vm_flush_compute_tlb(struct amdgpu_device * adev,struct amdgpu_vm * vm,uint32_t flush_type,uint32_t xcc_mask)1682 int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
1683 struct amdgpu_vm *vm,
1684 uint32_t flush_type,
1685 uint32_t xcc_mask)
1686 {
1687 uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
1688 bool all_hub = false;
1689 int xcc = 0, r = 0;
1690
1691 WARN_ON_ONCE(!vm->is_compute_context);
1692
1693 /*
1694 * It can be that we race and lose here, but that is extremely unlikely
1695 * and the worst thing which could happen is that we flush the changes
1696 * into the TLB once more which is harmless.
1697 */
1698 if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
1699 return 0;
1700
1701 if (adev->family == AMDGPU_FAMILY_AI ||
1702 adev->family == AMDGPU_FAMILY_RV)
1703 all_hub = true;
1704
1705 for_each_inst(xcc, xcc_mask) {
1706 r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
1707 all_hub, xcc);
1708 if (r)
1709 break;
1710 }
1711 return r;
1712 }
1713
1714 /**
1715 * amdgpu_vm_bo_add - add a bo to a specific vm
1716 *
1717 * @adev: amdgpu_device pointer
1718 * @vm: requested vm
1719 * @bo: amdgpu buffer object
1720 *
1721 * Add @bo into the requested vm.
1722 * Add @bo to the list of bos associated with the vm
1723 *
1724 * Returns:
1725 * Newly added bo_va or NULL for failure
1726 *
1727 * Object has to be reserved!
1728 */
amdgpu_vm_bo_add(struct amdgpu_device * adev,struct amdgpu_vm * vm,struct amdgpu_bo * bo)1729 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
1730 struct amdgpu_vm *vm,
1731 struct amdgpu_bo *bo)
1732 {
1733 struct amdgpu_bo_va *bo_va;
1734
1735 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
1736 if (bo_va == NULL) {
1737 return NULL;
1738 }
1739 amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
1740
1741 bo_va->ref_count = 1;
1742 bo_va->last_pt_update = dma_fence_get_stub();
1743 INIT_LIST_HEAD(&bo_va->valids);
1744 INIT_LIST_HEAD(&bo_va->invalids);
1745
1746 if (!bo)
1747 return bo_va;
1748
1749 dma_resv_assert_held(bo->tbo.base.resv);
1750 if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
1751 bo_va->is_xgmi = true;
1752 /* Power up XGMI if it can be potentially used */
1753 amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
1754 }
1755
1756 return bo_va;
1757 }
1758
1759
1760 /**
1761 * amdgpu_vm_bo_insert_map - insert a new mapping
1762 *
1763 * @adev: amdgpu_device pointer
1764 * @bo_va: bo_va to store the address
1765 * @mapping: the mapping to insert
1766 *
1767 * Insert a new mapping into all structures.
1768 */
amdgpu_vm_bo_insert_map(struct amdgpu_device * adev,struct amdgpu_bo_va * bo_va,struct amdgpu_bo_va_mapping * mapping)1769 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
1770 struct amdgpu_bo_va *bo_va,
1771 struct amdgpu_bo_va_mapping *mapping)
1772 {
1773 struct amdgpu_vm *vm = bo_va->base.vm;
1774 struct amdgpu_bo *bo = bo_va->base.bo;
1775
1776 mapping->bo_va = bo_va;
1777 list_add(&mapping->list, &bo_va->invalids);
1778 amdgpu_vm_it_insert(mapping, &vm->va);
1779
1780 if (mapping->flags & AMDGPU_VM_PAGE_PRT)
1781 amdgpu_vm_prt_get(adev);
1782
1783 if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
1784 amdgpu_vm_bo_moved(&bo_va->base);
1785
1786 trace_amdgpu_vm_bo_map(bo_va, mapping);
1787 }
1788
1789 /* Validate operation parameters to prevent potential abuse */
amdgpu_vm_verify_parameters(struct amdgpu_device * adev,struct amdgpu_bo * bo,uint64_t saddr,uint64_t offset,uint64_t size)1790 static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
1791 struct amdgpu_bo *bo,
1792 uint64_t saddr,
1793 uint64_t offset,
1794 uint64_t size)
1795 {
1796 uint64_t tmp, lpfn;
1797
1798 if (saddr & AMDGPU_GPU_PAGE_MASK
1799 || offset & AMDGPU_GPU_PAGE_MASK
1800 || size & AMDGPU_GPU_PAGE_MASK)
1801 return -EINVAL;
1802
1803 if (check_add_overflow(saddr, size, &tmp)
1804 || check_add_overflow(offset, size, &tmp)
1805 || size == 0 /* which also leads to end < begin */)
1806 return -EINVAL;
1807
1808 /* make sure object fit at this offset */
1809 if (bo && offset + size > amdgpu_bo_size(bo))
1810 return -EINVAL;
1811
1812 /* Ensure last pfn not exceed max_pfn */
1813 lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
1814 if (lpfn >= adev->vm_manager.max_pfn)
1815 return -EINVAL;
1816
1817 return 0;
1818 }
1819
1820 /**
1821 * amdgpu_vm_bo_map - map bo inside a vm
1822 *
1823 * @adev: amdgpu_device pointer
1824 * @bo_va: bo_va to store the address
1825 * @saddr: where to map the BO
1826 * @offset: requested offset in the BO
1827 * @size: BO size in bytes
1828 * @flags: attributes of pages (read/write/valid/etc.)
1829 *
1830 * Add a mapping of the BO at the specefied addr into the VM.
1831 *
1832 * Returns:
1833 * 0 for success, error for failure.
1834 *
1835 * Object has to be reserved and unreserved outside!
1836 */
amdgpu_vm_bo_map(struct amdgpu_device * adev,struct amdgpu_bo_va * bo_va,uint64_t saddr,uint64_t offset,uint64_t size,uint32_t flags)1837 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
1838 struct amdgpu_bo_va *bo_va,
1839 uint64_t saddr, uint64_t offset,
1840 uint64_t size, uint32_t flags)
1841 {
1842 struct amdgpu_bo_va_mapping *mapping, *tmp;
1843 struct amdgpu_bo *bo = bo_va->base.bo;
1844 struct amdgpu_vm *vm = bo_va->base.vm;
1845 uint64_t eaddr;
1846 int r;
1847
1848 r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
1849 if (r)
1850 return r;
1851
1852 saddr /= AMDGPU_GPU_PAGE_SIZE;
1853 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
1854
1855 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
1856 if (tmp) {
1857 /* bo and tmp overlap, invalid addr */
1858 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
1859 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr,
1860 tmp->start, tmp->last + 1);
1861 return -EINVAL;
1862 }
1863
1864 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1865 if (!mapping)
1866 return -ENOMEM;
1867
1868 mapping->start = saddr;
1869 mapping->last = eaddr;
1870 mapping->offset = offset;
1871 mapping->flags = flags;
1872
1873 amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1874
1875 return 0;
1876 }
1877
1878 /**
1879 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings
1880 *
1881 * @adev: amdgpu_device pointer
1882 * @bo_va: bo_va to store the address
1883 * @saddr: where to map the BO
1884 * @offset: requested offset in the BO
1885 * @size: BO size in bytes
1886 * @flags: attributes of pages (read/write/valid/etc.)
1887 *
1888 * Add a mapping of the BO at the specefied addr into the VM. Replace existing
1889 * mappings as we do so.
1890 *
1891 * Returns:
1892 * 0 for success, error for failure.
1893 *
1894 * Object has to be reserved and unreserved outside!
1895 */
amdgpu_vm_bo_replace_map(struct amdgpu_device * adev,struct amdgpu_bo_va * bo_va,uint64_t saddr,uint64_t offset,uint64_t size,uint32_t flags)1896 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
1897 struct amdgpu_bo_va *bo_va,
1898 uint64_t saddr, uint64_t offset,
1899 uint64_t size, uint32_t flags)
1900 {
1901 struct amdgpu_bo_va_mapping *mapping;
1902 struct amdgpu_bo *bo = bo_va->base.bo;
1903 uint64_t eaddr;
1904 int r;
1905
1906 r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
1907 if (r)
1908 return r;
1909
1910 /* Allocate all the needed memory */
1911 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
1912 if (!mapping)
1913 return -ENOMEM;
1914
1915 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size);
1916 if (r) {
1917 kfree(mapping);
1918 return r;
1919 }
1920
1921 saddr /= AMDGPU_GPU_PAGE_SIZE;
1922 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
1923
1924 mapping->start = saddr;
1925 mapping->last = eaddr;
1926 mapping->offset = offset;
1927 mapping->flags = flags;
1928
1929 amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
1930
1931 return 0;
1932 }
1933
1934 /**
1935 * amdgpu_vm_bo_unmap - remove bo mapping from vm
1936 *
1937 * @adev: amdgpu_device pointer
1938 * @bo_va: bo_va to remove the address from
1939 * @saddr: where to the BO is mapped
1940 *
1941 * Remove a mapping of the BO at the specefied addr from the VM.
1942 *
1943 * Returns:
1944 * 0 for success, error for failure.
1945 *
1946 * Object has to be reserved and unreserved outside!
1947 */
amdgpu_vm_bo_unmap(struct amdgpu_device * adev,struct amdgpu_bo_va * bo_va,uint64_t saddr)1948 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
1949 struct amdgpu_bo_va *bo_va,
1950 uint64_t saddr)
1951 {
1952 struct amdgpu_bo_va_mapping *mapping;
1953 struct amdgpu_vm *vm = bo_va->base.vm;
1954 bool valid = true;
1955
1956 saddr /= AMDGPU_GPU_PAGE_SIZE;
1957
1958 list_for_each_entry(mapping, &bo_va->valids, list) {
1959 if (mapping->start == saddr)
1960 break;
1961 }
1962
1963 if (&mapping->list == &bo_va->valids) {
1964 valid = false;
1965
1966 list_for_each_entry(mapping, &bo_va->invalids, list) {
1967 if (mapping->start == saddr)
1968 break;
1969 }
1970
1971 if (&mapping->list == &bo_va->invalids)
1972 return -ENOENT;
1973 }
1974
1975 list_del(&mapping->list);
1976 amdgpu_vm_it_remove(mapping, &vm->va);
1977 mapping->bo_va = NULL;
1978 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
1979
1980 if (valid)
1981 list_add(&mapping->list, &vm->freed);
1982 else
1983 amdgpu_vm_free_mapping(adev, vm, mapping,
1984 bo_va->last_pt_update);
1985
1986 return 0;
1987 }
1988
1989 /**
1990 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range
1991 *
1992 * @adev: amdgpu_device pointer
1993 * @vm: VM structure to use
1994 * @saddr: start of the range
1995 * @size: size of the range
1996 *
1997 * Remove all mappings in a range, split them as appropriate.
1998 *
1999 * Returns:
2000 * 0 for success, error for failure.
2001 */
amdgpu_vm_bo_clear_mappings(struct amdgpu_device * adev,struct amdgpu_vm * vm,uint64_t saddr,uint64_t size)2002 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
2003 struct amdgpu_vm *vm,
2004 uint64_t saddr, uint64_t size)
2005 {
2006 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
2007 LIST_HEAD(removed);
2008 uint64_t eaddr;
2009 int r;
2010
2011 r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
2012 if (r)
2013 return r;
2014
2015 saddr /= AMDGPU_GPU_PAGE_SIZE;
2016 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
2017
2018 /* Allocate all the needed memory */
2019 before = kzalloc(sizeof(*before), GFP_KERNEL);
2020 if (!before)
2021 return -ENOMEM;
2022 INIT_LIST_HEAD(&before->list);
2023
2024 after = kzalloc(sizeof(*after), GFP_KERNEL);
2025 if (!after) {
2026 kfree(before);
2027 return -ENOMEM;
2028 }
2029 INIT_LIST_HEAD(&after->list);
2030
2031 /* Now gather all removed mappings */
2032 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
2033 while (tmp) {
2034 /* Remember mapping split at the start */
2035 if (tmp->start < saddr) {
2036 before->start = tmp->start;
2037 before->last = saddr - 1;
2038 before->offset = tmp->offset;
2039 before->flags = tmp->flags;
2040 before->bo_va = tmp->bo_va;
2041 list_add(&before->list, &tmp->bo_va->invalids);
2042 }
2043
2044 /* Remember mapping split at the end */
2045 if (tmp->last > eaddr) {
2046 after->start = eaddr + 1;
2047 after->last = tmp->last;
2048 after->offset = tmp->offset;
2049 after->offset += (after->start - tmp->start) << PAGE_SHIFT;
2050 after->flags = tmp->flags;
2051 after->bo_va = tmp->bo_va;
2052 list_add(&after->list, &tmp->bo_va->invalids);
2053 }
2054
2055 list_del(&tmp->list);
2056 list_add(&tmp->list, &removed);
2057
2058 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
2059 }
2060
2061 /* And free them up */
2062 list_for_each_entry_safe(tmp, next, &removed, list) {
2063 amdgpu_vm_it_remove(tmp, &vm->va);
2064 list_del(&tmp->list);
2065
2066 if (tmp->start < saddr)
2067 tmp->start = saddr;
2068 if (tmp->last > eaddr)
2069 tmp->last = eaddr;
2070
2071 tmp->bo_va = NULL;
2072 list_add(&tmp->list, &vm->freed);
2073 trace_amdgpu_vm_bo_unmap(NULL, tmp);
2074 }
2075
2076 /* Insert partial mapping before the range */
2077 if (!list_empty(&before->list)) {
2078 struct amdgpu_bo *bo = before->bo_va->base.bo;
2079
2080 amdgpu_vm_it_insert(before, &vm->va);
2081 if (before->flags & AMDGPU_PTE_PRT_FLAG(adev))
2082 amdgpu_vm_prt_get(adev);
2083
2084 if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
2085 !before->bo_va->base.moved)
2086 amdgpu_vm_bo_moved(&before->bo_va->base);
2087 } else {
2088 kfree(before);
2089 }
2090
2091 /* Insert partial mapping after the range */
2092 if (!list_empty(&after->list)) {
2093 struct amdgpu_bo *bo = after->bo_va->base.bo;
2094
2095 amdgpu_vm_it_insert(after, &vm->va);
2096 if (after->flags & AMDGPU_PTE_PRT_FLAG(adev))
2097 amdgpu_vm_prt_get(adev);
2098
2099 if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
2100 !after->bo_va->base.moved)
2101 amdgpu_vm_bo_moved(&after->bo_va->base);
2102 } else {
2103 kfree(after);
2104 }
2105
2106 return 0;
2107 }
2108
2109 /**
2110 * amdgpu_vm_bo_lookup_mapping - find mapping by address
2111 *
2112 * @vm: the requested VM
2113 * @addr: the address
2114 *
2115 * Find a mapping by it's address.
2116 *
2117 * Returns:
2118 * The amdgpu_bo_va_mapping matching for addr or NULL
2119 *
2120 */
amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm * vm,uint64_t addr)2121 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
2122 uint64_t addr)
2123 {
2124 return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
2125 }
2126
2127 /**
2128 * amdgpu_vm_bo_trace_cs - trace all reserved mappings
2129 *
2130 * @vm: the requested vm
2131 * @ticket: CS ticket
2132 *
2133 * Trace all mappings of BOs reserved during a command submission.
2134 */
amdgpu_vm_bo_trace_cs(struct amdgpu_vm * vm,struct ww_acquire_ctx * ticket)2135 void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
2136 {
2137 struct amdgpu_bo_va_mapping *mapping;
2138
2139 if (!trace_amdgpu_vm_bo_cs_enabled())
2140 return;
2141
2142 for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
2143 mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
2144 if (mapping->bo_va && mapping->bo_va->base.bo) {
2145 struct amdgpu_bo *bo;
2146
2147 bo = mapping->bo_va->base.bo;
2148 if (dma_resv_locking_ctx(bo->tbo.base.resv) !=
2149 ticket)
2150 continue;
2151 }
2152
2153 trace_amdgpu_vm_bo_cs(mapping);
2154 }
2155 }
2156
2157 /**
2158 * amdgpu_vm_bo_del - remove a bo from a specific vm
2159 *
2160 * @adev: amdgpu_device pointer
2161 * @bo_va: requested bo_va
2162 *
2163 * Remove @bo_va->bo from the requested vm.
2164 *
2165 * Object have to be reserved!
2166 */
amdgpu_vm_bo_del(struct amdgpu_device * adev,struct amdgpu_bo_va * bo_va)2167 void amdgpu_vm_bo_del(struct amdgpu_device *adev,
2168 struct amdgpu_bo_va *bo_va)
2169 {
2170 struct amdgpu_bo_va_mapping *mapping, *next;
2171 struct amdgpu_bo *bo = bo_va->base.bo;
2172 struct amdgpu_vm *vm = bo_va->base.vm;
2173 struct amdgpu_vm_bo_base **base;
2174
2175 dma_resv_assert_held(vm->root.bo->tbo.base.resv);
2176
2177 if (bo) {
2178 dma_resv_assert_held(bo->tbo.base.resv);
2179 if (amdgpu_vm_is_bo_always_valid(vm, bo))
2180 ttm_bo_set_bulk_move(&bo->tbo, NULL);
2181
2182 for (base = &bo_va->base.bo->vm_bo; *base;
2183 base = &(*base)->next) {
2184 if (*base != &bo_va->base)
2185 continue;
2186
2187 amdgpu_vm_update_stats(*base, bo->tbo.resource, -1);
2188 *base = bo_va->base.next;
2189 break;
2190 }
2191 }
2192
2193 spin_lock(&vm->status_lock);
2194 list_del(&bo_va->base.vm_status);
2195 spin_unlock(&vm->status_lock);
2196
2197 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
2198 list_del(&mapping->list);
2199 amdgpu_vm_it_remove(mapping, &vm->va);
2200 mapping->bo_va = NULL;
2201 trace_amdgpu_vm_bo_unmap(bo_va, mapping);
2202 list_add(&mapping->list, &vm->freed);
2203 }
2204 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
2205 list_del(&mapping->list);
2206 amdgpu_vm_it_remove(mapping, &vm->va);
2207 amdgpu_vm_free_mapping(adev, vm, mapping,
2208 bo_va->last_pt_update);
2209 }
2210
2211 dma_fence_put(bo_va->last_pt_update);
2212
2213 if (bo && bo_va->is_xgmi)
2214 amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
2215
2216 kfree(bo_va);
2217 }
2218
2219 /**
2220 * amdgpu_vm_evictable - check if we can evict a VM
2221 *
2222 * @bo: A page table of the VM.
2223 *
2224 * Check if it is possible to evict a VM.
2225 */
amdgpu_vm_evictable(struct amdgpu_bo * bo)2226 bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
2227 {
2228 struct amdgpu_vm_bo_base *bo_base = bo->vm_bo;
2229
2230 /* Page tables of a destroyed VM can go away immediately */
2231 if (!bo_base || !bo_base->vm)
2232 return true;
2233
2234 /* Don't evict VM page tables while they are busy */
2235 if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
2236 return false;
2237
2238 /* Try to block ongoing updates */
2239 if (!amdgpu_vm_eviction_trylock(bo_base->vm))
2240 return false;
2241
2242 /* Don't evict VM page tables while they are updated */
2243 if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) {
2244 amdgpu_vm_eviction_unlock(bo_base->vm);
2245 return false;
2246 }
2247
2248 bo_base->vm->evicting = true;
2249 amdgpu_vm_eviction_unlock(bo_base->vm);
2250 return true;
2251 }
2252
2253 /**
2254 * amdgpu_vm_bo_invalidate - mark the bo as invalid
2255 *
2256 * @bo: amdgpu buffer object
2257 * @evicted: is the BO evicted
2258 *
2259 * Mark @bo as invalid.
2260 */
amdgpu_vm_bo_invalidate(struct amdgpu_bo * bo,bool evicted)2261 void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted)
2262 {
2263 struct amdgpu_vm_bo_base *bo_base;
2264
2265 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
2266 struct amdgpu_vm *vm = bo_base->vm;
2267
2268 if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
2269 amdgpu_vm_bo_evicted(bo_base);
2270 continue;
2271 }
2272
2273 if (bo_base->moved)
2274 continue;
2275 bo_base->moved = true;
2276
2277 if (bo->tbo.type == ttm_bo_type_kernel)
2278 amdgpu_vm_bo_relocated(bo_base);
2279 else if (amdgpu_vm_is_bo_always_valid(vm, bo))
2280 amdgpu_vm_bo_moved(bo_base);
2281 else
2282 amdgpu_vm_bo_invalidated(bo_base);
2283 }
2284 }
2285
2286 /**
2287 * amdgpu_vm_bo_move - handle BO move
2288 *
2289 * @bo: amdgpu buffer object
2290 * @new_mem: the new placement of the BO move
2291 * @evicted: is the BO evicted
2292 *
2293 * Update the memory stats for the new placement and mark @bo as invalid.
2294 */
amdgpu_vm_bo_move(struct amdgpu_bo * bo,struct ttm_resource * new_mem,bool evicted)2295 void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem,
2296 bool evicted)
2297 {
2298 struct amdgpu_vm_bo_base *bo_base;
2299
2300 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
2301 struct amdgpu_vm *vm = bo_base->vm;
2302
2303 spin_lock(&vm->status_lock);
2304 amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1);
2305 amdgpu_vm_update_stats_locked(bo_base, new_mem, +1);
2306 spin_unlock(&vm->status_lock);
2307 }
2308
2309 amdgpu_vm_bo_invalidate(bo, evicted);
2310 }
2311
2312 /**
2313 * amdgpu_vm_get_block_size - calculate VM page table size as power of two
2314 *
2315 * @vm_size: VM size
2316 *
2317 * Returns:
2318 * VM page table as power of two
2319 */
amdgpu_vm_get_block_size(uint64_t vm_size)2320 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
2321 {
2322 /* Total bits covered by PD + PTs */
2323 unsigned bits = ilog2(vm_size) + 18;
2324
2325 /* Make sure the PD is 4K in size up to 8GB address space.
2326 Above that split equal between PD and PTs */
2327 if (vm_size <= 8)
2328 return (bits - 9);
2329 else
2330 return ((bits + 3) / 2);
2331 }
2332
2333 /**
2334 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
2335 *
2336 * @adev: amdgpu_device pointer
2337 * @min_vm_size: the minimum vm size in GB if it's set auto
2338 * @fragment_size_default: Default PTE fragment size
2339 * @max_level: max VMPT level
2340 * @max_bits: max address space size in bits
2341 *
2342 */
amdgpu_vm_adjust_size(struct amdgpu_device * adev,uint32_t min_vm_size,uint32_t fragment_size_default,unsigned max_level,unsigned max_bits)2343 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
2344 uint32_t fragment_size_default, unsigned max_level,
2345 unsigned max_bits)
2346 {
2347 unsigned int max_size = 1 << (max_bits - 30);
2348 unsigned int vm_size;
2349 uint64_t tmp;
2350
2351 /* adjust vm size first */
2352 if (amdgpu_vm_size != -1) {
2353 vm_size = amdgpu_vm_size;
2354 if (vm_size > max_size) {
2355 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
2356 amdgpu_vm_size, max_size);
2357 vm_size = max_size;
2358 }
2359 } else {
2360 struct sysinfo si;
2361 unsigned int phys_ram_gb;
2362
2363 /* Optimal VM size depends on the amount of physical
2364 * RAM available. Underlying requirements and
2365 * assumptions:
2366 *
2367 * - Need to map system memory and VRAM from all GPUs
2368 * - VRAM from other GPUs not known here
2369 * - Assume VRAM <= system memory
2370 * - On GFX8 and older, VM space can be segmented for
2371 * different MTYPEs
2372 * - Need to allow room for fragmentation, guard pages etc.
2373 *
2374 * This adds up to a rough guess of system memory x3.
2375 * Round up to power of two to maximize the available
2376 * VM size with the given page table size.
2377 */
2378 si_meminfo(&si);
2379 phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
2380 (1 << 30) - 1) >> 30;
2381 vm_size = roundup_pow_of_two(
2382 clamp(phys_ram_gb * 3, min_vm_size, max_size));
2383 }
2384
2385 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
2386
2387 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn);
2388 if (amdgpu_vm_block_size != -1)
2389 tmp >>= amdgpu_vm_block_size - 9;
2390 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1;
2391 adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp);
2392 switch (adev->vm_manager.num_level) {
2393 case 3:
2394 adev->vm_manager.root_level = AMDGPU_VM_PDB2;
2395 break;
2396 case 2:
2397 adev->vm_manager.root_level = AMDGPU_VM_PDB1;
2398 break;
2399 case 1:
2400 adev->vm_manager.root_level = AMDGPU_VM_PDB0;
2401 break;
2402 default:
2403 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n");
2404 }
2405 /* block size depends on vm size and hw setup*/
2406 if (amdgpu_vm_block_size != -1)
2407 adev->vm_manager.block_size =
2408 min((unsigned)amdgpu_vm_block_size, max_bits
2409 - AMDGPU_GPU_PAGE_SHIFT
2410 - 9 * adev->vm_manager.num_level);
2411 else if (adev->vm_manager.num_level > 1)
2412 adev->vm_manager.block_size = 9;
2413 else
2414 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp);
2415
2416 if (amdgpu_vm_fragment_size == -1)
2417 adev->vm_manager.fragment_size = fragment_size_default;
2418 else
2419 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size;
2420
2421 dev_info(
2422 adev->dev,
2423 "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",
2424 vm_size, adev->vm_manager.num_level + 1,
2425 adev->vm_manager.block_size, adev->vm_manager.fragment_size);
2426 }
2427
2428 /**
2429 * amdgpu_vm_wait_idle - wait for the VM to become idle
2430 *
2431 * @vm: VM object to wait for
2432 * @timeout: timeout to wait for VM to become idle
2433 */
amdgpu_vm_wait_idle(struct amdgpu_vm * vm,long timeout)2434 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
2435 {
2436 timeout = drm_sched_entity_flush(&vm->immediate, timeout);
2437 if (timeout <= 0)
2438 return timeout;
2439
2440 return drm_sched_entity_flush(&vm->delayed, timeout);
2441 }
2442
amdgpu_vm_destroy_task_info(struct kref * kref)2443 static void amdgpu_vm_destroy_task_info(struct kref *kref)
2444 {
2445 struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount);
2446
2447 kfree(ti);
2448 }
2449
2450 static inline struct amdgpu_vm *
amdgpu_vm_get_vm_from_pasid(struct amdgpu_device * adev,u32 pasid)2451 amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid)
2452 {
2453 struct amdgpu_vm *vm;
2454 unsigned long flags;
2455
2456 xa_lock_irqsave(&adev->vm_manager.pasids, flags);
2457 vm = xa_load(&adev->vm_manager.pasids, pasid);
2458 xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
2459
2460 return vm;
2461 }
2462
2463 /**
2464 * amdgpu_vm_put_task_info - reference down the vm task_info ptr
2465 *
2466 * @task_info: task_info struct under discussion.
2467 *
2468 * frees the vm task_info ptr at the last put
2469 */
amdgpu_vm_put_task_info(struct amdgpu_task_info * task_info)2470 void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info)
2471 {
2472 if (task_info)
2473 kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info);
2474 }
2475
2476 /**
2477 * amdgpu_vm_get_task_info_vm - Extracts task info for a vm.
2478 *
2479 * @vm: VM to get info from
2480 *
2481 * Returns the reference counted task_info structure, which must be
2482 * referenced down with amdgpu_vm_put_task_info.
2483 */
2484 struct amdgpu_task_info *
amdgpu_vm_get_task_info_vm(struct amdgpu_vm * vm)2485 amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm)
2486 {
2487 struct amdgpu_task_info *ti = NULL;
2488
2489 if (vm) {
2490 ti = vm->task_info;
2491 kref_get(&vm->task_info->refcount);
2492 }
2493
2494 return ti;
2495 }
2496
2497 /**
2498 * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID.
2499 *
2500 * @adev: drm device pointer
2501 * @pasid: PASID identifier for VM
2502 *
2503 * Returns the reference counted task_info structure, which must be
2504 * referenced down with amdgpu_vm_put_task_info.
2505 */
2506 struct amdgpu_task_info *
amdgpu_vm_get_task_info_pasid(struct amdgpu_device * adev,u32 pasid)2507 amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid)
2508 {
2509 return amdgpu_vm_get_task_info_vm(
2510 amdgpu_vm_get_vm_from_pasid(adev, pasid));
2511 }
2512
amdgpu_vm_create_task_info(struct amdgpu_vm * vm)2513 static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm)
2514 {
2515 vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL);
2516 if (!vm->task_info)
2517 return -ENOMEM;
2518
2519 kref_init(&vm->task_info->refcount);
2520 return 0;
2521 }
2522
2523 /**
2524 * amdgpu_vm_set_task_info - Sets VMs task info.
2525 *
2526 * @vm: vm for which to set the info
2527 */
amdgpu_vm_set_task_info(struct amdgpu_vm * vm)2528 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
2529 {
2530 if (!vm->task_info)
2531 return;
2532
2533 if (vm->task_info->task.pid == current->pid)
2534 return;
2535
2536 vm->task_info->task.pid = current->pid;
2537 get_task_comm(vm->task_info->task.comm, current);
2538
2539 if (current->group_leader->mm != current->mm)
2540 return;
2541
2542 vm->task_info->tgid = current->group_leader->pid;
2543 get_task_comm(vm->task_info->process_name, current->group_leader);
2544 }
2545
2546 /**
2547 * amdgpu_vm_init - initialize a vm instance
2548 *
2549 * @adev: amdgpu_device pointer
2550 * @vm: requested vm
2551 * @xcp_id: GPU partition selection id
2552 * @pasid: the pasid the VM is using on this GPU
2553 *
2554 * Init @vm fields.
2555 *
2556 * Returns:
2557 * 0 for success, error for failure.
2558 */
amdgpu_vm_init(struct amdgpu_device * adev,struct amdgpu_vm * vm,int32_t xcp_id,uint32_t pasid)2559 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2560 int32_t xcp_id, uint32_t pasid)
2561 {
2562 struct amdgpu_bo *root_bo;
2563 struct amdgpu_bo_vm *root;
2564 int r, i;
2565
2566 vm->va = RB_ROOT_CACHED;
2567 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
2568 vm->reserved_vmid[i] = NULL;
2569 INIT_LIST_HEAD(&vm->evicted);
2570 INIT_LIST_HEAD(&vm->evicted_user);
2571 INIT_LIST_HEAD(&vm->relocated);
2572 INIT_LIST_HEAD(&vm->moved);
2573 INIT_LIST_HEAD(&vm->idle);
2574 INIT_LIST_HEAD(&vm->invalidated);
2575 spin_lock_init(&vm->status_lock);
2576 INIT_LIST_HEAD(&vm->freed);
2577 INIT_LIST_HEAD(&vm->done);
2578 INIT_KFIFO(vm->faults);
2579
2580 r = amdgpu_vm_init_entities(adev, vm);
2581 if (r)
2582 return r;
2583
2584 ttm_lru_bulk_move_init(&vm->lru_bulk_move);
2585
2586 vm->is_compute_context = false;
2587
2588 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2589 AMDGPU_VM_USE_CPU_FOR_GFX);
2590
2591 dev_dbg(adev->dev, "VM update mode is %s\n",
2592 vm->use_cpu_for_update ? "CPU" : "SDMA");
2593 WARN_ONCE((vm->use_cpu_for_update &&
2594 !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2595 "CPU update of VM recommended only for large BAR system\n");
2596
2597 if (vm->use_cpu_for_update)
2598 vm->update_funcs = &amdgpu_vm_cpu_funcs;
2599 else
2600 vm->update_funcs = &amdgpu_vm_sdma_funcs;
2601
2602 vm->last_update = dma_fence_get_stub();
2603 vm->last_unlocked = dma_fence_get_stub();
2604 vm->last_tlb_flush = dma_fence_get_stub();
2605 vm->generation = amdgpu_vm_generation(adev, NULL);
2606
2607 mutex_init(&vm->eviction_lock);
2608 vm->evicting = false;
2609 vm->tlb_fence_context = dma_fence_context_alloc(1);
2610
2611 r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
2612 false, &root, xcp_id);
2613 if (r)
2614 goto error_free_delayed;
2615
2616 root_bo = amdgpu_bo_ref(&root->bo);
2617 r = amdgpu_bo_reserve(root_bo, true);
2618 if (r) {
2619 amdgpu_bo_unref(&root_bo);
2620 goto error_free_delayed;
2621 }
2622
2623 amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
2624 r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
2625 if (r)
2626 goto error_free_root;
2627
2628 r = amdgpu_vm_pt_clear(adev, vm, root, false);
2629 if (r)
2630 goto error_free_root;
2631
2632 r = amdgpu_vm_create_task_info(vm);
2633 if (r)
2634 dev_dbg(adev->dev, "Failed to create task info for VM\n");
2635
2636 /* Store new PASID in XArray (if non-zero) */
2637 if (pasid != 0) {
2638 r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL));
2639 if (r < 0)
2640 goto error_free_root;
2641
2642 vm->pasid = pasid;
2643 }
2644
2645 amdgpu_bo_unreserve(vm->root.bo);
2646 amdgpu_bo_unref(&root_bo);
2647
2648 return 0;
2649
2650 error_free_root:
2651 /* If PASID was partially set, erase it from XArray before failing */
2652 if (vm->pasid != 0) {
2653 xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
2654 vm->pasid = 0;
2655 }
2656 amdgpu_vm_pt_free_root(adev, vm);
2657 amdgpu_bo_unreserve(vm->root.bo);
2658 amdgpu_bo_unref(&root_bo);
2659
2660 error_free_delayed:
2661 dma_fence_put(vm->last_tlb_flush);
2662 dma_fence_put(vm->last_unlocked);
2663 ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
2664 amdgpu_vm_fini_entities(vm);
2665
2666 return r;
2667 }
2668
2669 /**
2670 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
2671 *
2672 * @adev: amdgpu_device pointer
2673 * @vm: requested vm
2674 *
2675 * This only works on GFX VMs that don't have any BOs added and no
2676 * page tables allocated yet.
2677 *
2678 * Changes the following VM parameters:
2679 * - use_cpu_for_update
2680 * - pte_supports_ats
2681 *
2682 * Reinitializes the page directory to reflect the changed ATS
2683 * setting.
2684 *
2685 * Returns:
2686 * 0 for success, -errno for errors.
2687 */
amdgpu_vm_make_compute(struct amdgpu_device * adev,struct amdgpu_vm * vm)2688 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2689 {
2690 int r;
2691
2692 r = amdgpu_bo_reserve(vm->root.bo, true);
2693 if (r)
2694 return r;
2695
2696 /* Update VM state */
2697 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
2698 AMDGPU_VM_USE_CPU_FOR_COMPUTE);
2699 dev_dbg(adev->dev, "VM update mode is %s\n",
2700 vm->use_cpu_for_update ? "CPU" : "SDMA");
2701 WARN_ONCE((vm->use_cpu_for_update &&
2702 !amdgpu_gmc_vram_full_visible(&adev->gmc)),
2703 "CPU update of VM recommended only for large BAR system\n");
2704
2705 if (vm->use_cpu_for_update) {
2706 /* Sync with last SDMA update/clear before switching to CPU */
2707 r = amdgpu_bo_sync_wait(vm->root.bo,
2708 AMDGPU_FENCE_OWNER_UNDEFINED, true);
2709 if (r)
2710 goto unreserve_bo;
2711
2712 vm->update_funcs = &amdgpu_vm_cpu_funcs;
2713 r = amdgpu_vm_pt_map_tables(adev, vm);
2714 if (r)
2715 goto unreserve_bo;
2716
2717 } else {
2718 vm->update_funcs = &amdgpu_vm_sdma_funcs;
2719 }
2720
2721 dma_fence_put(vm->last_update);
2722 vm->last_update = dma_fence_get_stub();
2723 vm->is_compute_context = true;
2724
2725 unreserve_bo:
2726 amdgpu_bo_unreserve(vm->root.bo);
2727 return r;
2728 }
2729
amdgpu_vm_stats_is_zero(struct amdgpu_vm * vm)2730 static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm)
2731 {
2732 for (int i = 0; i < __AMDGPU_PL_NUM; ++i) {
2733 if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) &&
2734 vm->stats[i].evicted == 0))
2735 return false;
2736 }
2737 return true;
2738 }
2739
2740 /**
2741 * amdgpu_vm_fini - tear down a vm instance
2742 *
2743 * @adev: amdgpu_device pointer
2744 * @vm: requested vm
2745 *
2746 * Tear down @vm.
2747 * Unbind the VM and remove all bos from the vm bo list
2748 */
amdgpu_vm_fini(struct amdgpu_device * adev,struct amdgpu_vm * vm)2749 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2750 {
2751 struct amdgpu_bo_va_mapping *mapping, *tmp;
2752 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
2753 struct amdgpu_bo *root;
2754 unsigned long flags;
2755 int i;
2756
2757 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
2758
2759 root = amdgpu_bo_ref(vm->root.bo);
2760 amdgpu_bo_reserve(root, true);
2761 /* Remove PASID mapping before destroying VM */
2762 if (vm->pasid != 0) {
2763 xa_erase_irq(&adev->vm_manager.pasids, vm->pasid);
2764 vm->pasid = 0;
2765 }
2766 dma_fence_wait(vm->last_unlocked, false);
2767 dma_fence_put(vm->last_unlocked);
2768 dma_fence_wait(vm->last_tlb_flush, false);
2769 /* Make sure that all fence callbacks have completed */
2770 spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
2771 spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
2772 dma_fence_put(vm->last_tlb_flush);
2773
2774 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
2775 if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
2776 amdgpu_vm_prt_fini(adev, vm);
2777 prt_fini_needed = false;
2778 }
2779
2780 list_del(&mapping->list);
2781 amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
2782 }
2783
2784 amdgpu_vm_pt_free_root(adev, vm);
2785 amdgpu_bo_unreserve(root);
2786 amdgpu_bo_unref(&root);
2787 WARN_ON(vm->root.bo);
2788
2789 amdgpu_vm_fini_entities(vm);
2790
2791 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
2792 dev_err(adev->dev, "still active bo inside vm\n");
2793 }
2794 rbtree_postorder_for_each_entry_safe(mapping, tmp,
2795 &vm->va.rb_root, rb) {
2796 /* Don't remove the mapping here, we don't want to trigger a
2797 * rebalance and the tree is about to be destroyed anyway.
2798 */
2799 list_del(&mapping->list);
2800 kfree(mapping);
2801 }
2802
2803 dma_fence_put(vm->last_update);
2804
2805 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
2806 amdgpu_vmid_free_reserved(adev, vm, i);
2807 }
2808
2809 ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move);
2810
2811 if (!amdgpu_vm_stats_is_zero(vm)) {
2812 struct amdgpu_task_info *ti = vm->task_info;
2813
2814 dev_warn(adev->dev,
2815 "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n",
2816 ti->process_name, ti->task.pid, ti->task.comm, ti->tgid);
2817 }
2818
2819 amdgpu_vm_put_task_info(vm->task_info);
2820 }
2821
2822 /**
2823 * amdgpu_vm_manager_init - init the VM manager
2824 *
2825 * @adev: amdgpu_device pointer
2826 *
2827 * Initialize the VM manager structures
2828 */
amdgpu_vm_manager_init(struct amdgpu_device * adev)2829 void amdgpu_vm_manager_init(struct amdgpu_device *adev)
2830 {
2831 unsigned i;
2832
2833 /* Concurrent flushes are only possible starting with Vega10 and
2834 * are broken on Navi10 and Navi14.
2835 */
2836 adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 ||
2837 adev->asic_type == CHIP_NAVI10 ||
2838 adev->asic_type == CHIP_NAVI14);
2839 amdgpu_vmid_mgr_init(adev);
2840
2841 adev->vm_manager.fence_context =
2842 dma_fence_context_alloc(AMDGPU_MAX_RINGS);
2843 for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
2844 adev->vm_manager.seqno[i] = 0;
2845
2846 spin_lock_init(&adev->vm_manager.prt_lock);
2847 atomic_set(&adev->vm_manager.num_prt_users, 0);
2848
2849 /* If not overridden by the user, by default, only in large BAR systems
2850 * Compute VM tables will be updated by CPU
2851 */
2852 #ifdef CONFIG_X86_64
2853 if (amdgpu_vm_update_mode == -1) {
2854 /* For asic with VF MMIO access protection
2855 * avoid using CPU for VM table updates
2856 */
2857 if (amdgpu_gmc_vram_full_visible(&adev->gmc) &&
2858 !amdgpu_sriov_vf_mmio_access_protection(adev))
2859 adev->vm_manager.vm_update_mode =
2860 AMDGPU_VM_USE_CPU_FOR_COMPUTE;
2861 else
2862 adev->vm_manager.vm_update_mode = 0;
2863 } else
2864 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode;
2865 #else
2866 adev->vm_manager.vm_update_mode = 0;
2867 #endif
2868
2869 xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
2870 }
2871
2872 /**
2873 * amdgpu_vm_manager_fini - cleanup VM manager
2874 *
2875 * @adev: amdgpu_device pointer
2876 *
2877 * Cleanup the VM manager and free resources.
2878 */
amdgpu_vm_manager_fini(struct amdgpu_device * adev)2879 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
2880 {
2881 WARN_ON(!xa_empty(&adev->vm_manager.pasids));
2882 xa_destroy(&adev->vm_manager.pasids);
2883
2884 amdgpu_vmid_mgr_fini(adev);
2885 }
2886
2887 /**
2888 * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
2889 *
2890 * @dev: drm device pointer
2891 * @data: drm_amdgpu_vm
2892 * @filp: drm file pointer
2893 *
2894 * Returns:
2895 * 0 for success, -errno for errors.
2896 */
amdgpu_vm_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)2897 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
2898 {
2899 union drm_amdgpu_vm *args = data;
2900 struct amdgpu_device *adev = drm_to_adev(dev);
2901 struct amdgpu_fpriv *fpriv = filp->driver_priv;
2902 struct amdgpu_vm *vm = &fpriv->vm;
2903
2904 /* No valid flags defined yet */
2905 if (args->in.flags)
2906 return -EINVAL;
2907
2908 switch (args->in.op) {
2909 case AMDGPU_VM_OP_RESERVE_VMID:
2910 /* We only have requirement to reserve vmid from gfxhub */
2911 amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
2912 break;
2913 case AMDGPU_VM_OP_UNRESERVE_VMID:
2914 amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
2915 break;
2916 default:
2917 return -EINVAL;
2918 }
2919
2920 return 0;
2921 }
2922
2923 /**
2924 * amdgpu_vm_handle_fault - graceful handling of VM faults.
2925 * @adev: amdgpu device pointer
2926 * @pasid: PASID of the VM
2927 * @ts: Timestamp of the fault
2928 * @vmid: VMID, only used for GFX 9.4.3.
2929 * @node_id: Node_id received in IH cookie. Only applicable for
2930 * GFX 9.4.3.
2931 * @addr: Address of the fault
2932 * @write_fault: true is write fault, false is read fault
2933 *
2934 * Try to gracefully handle a VM fault. Return true if the fault was handled and
2935 * shouldn't be reported any more.
2936 */
amdgpu_vm_handle_fault(struct amdgpu_device * adev,u32 pasid,u32 vmid,u32 node_id,uint64_t addr,uint64_t ts,bool write_fault)2937 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
2938 u32 vmid, u32 node_id, uint64_t addr, uint64_t ts,
2939 bool write_fault)
2940 {
2941 bool is_compute_context = false;
2942 struct amdgpu_bo *root;
2943 unsigned long irqflags;
2944 uint64_t value, flags;
2945 struct amdgpu_vm *vm;
2946 int r;
2947
2948 xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2949 vm = xa_load(&adev->vm_manager.pasids, pasid);
2950 if (vm) {
2951 root = amdgpu_bo_ref(vm->root.bo);
2952 is_compute_context = vm->is_compute_context;
2953 } else {
2954 root = NULL;
2955 }
2956 xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2957
2958 if (!root)
2959 return false;
2960
2961 addr /= AMDGPU_GPU_PAGE_SIZE;
2962
2963 if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
2964 node_id, addr, ts, write_fault)) {
2965 amdgpu_bo_unref(&root);
2966 return true;
2967 }
2968
2969 r = amdgpu_bo_reserve(root, true);
2970 if (r)
2971 goto error_unref;
2972
2973 /* Double check that the VM still exists */
2974 xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
2975 vm = xa_load(&adev->vm_manager.pasids, pasid);
2976 if (vm && vm->root.bo != root)
2977 vm = NULL;
2978 xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
2979 if (!vm)
2980 goto error_unlock;
2981
2982 flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
2983 AMDGPU_PTE_SYSTEM;
2984
2985 if (is_compute_context) {
2986 /* Intentionally setting invalid PTE flag
2987 * combination to force a no-retry-fault
2988 */
2989 flags = AMDGPU_VM_NORETRY_FLAGS;
2990 value = 0;
2991 } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
2992 /* Redirect the access to the dummy page */
2993 value = adev->dummy_page_addr;
2994 flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
2995 AMDGPU_PTE_WRITEABLE;
2996
2997 } else {
2998 /* Let the hw retry silently on the PTE */
2999 value = 0;
3000 }
3001
3002 r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
3003 if (r) {
3004 pr_debug("failed %d to reserve fence slot\n", r);
3005 goto error_unlock;
3006 }
3007
3008 r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
3009 NULL, addr, addr, flags, value, 0, NULL, NULL, NULL);
3010 if (r)
3011 goto error_unlock;
3012
3013 r = amdgpu_vm_update_pdes(adev, vm, true);
3014
3015 error_unlock:
3016 amdgpu_bo_unreserve(root);
3017 if (r < 0)
3018 dev_err(adev->dev, "Can't handle page fault (%d)\n", r);
3019
3020 error_unref:
3021 amdgpu_bo_unref(&root);
3022
3023 return false;
3024 }
3025
3026 #if defined(CONFIG_DEBUG_FS)
3027 /**
3028 * amdgpu_debugfs_vm_bo_info - print BO info for the VM
3029 *
3030 * @vm: Requested VM for printing BO info
3031 * @m: debugfs file
3032 *
3033 * Print BO information in debugfs file for the VM
3034 */
amdgpu_debugfs_vm_bo_info(struct amdgpu_vm * vm,struct seq_file * m)3035 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
3036 {
3037 struct amdgpu_bo_va *bo_va, *tmp;
3038 u64 total_idle = 0;
3039 u64 total_evicted = 0;
3040 u64 total_relocated = 0;
3041 u64 total_moved = 0;
3042 u64 total_invalidated = 0;
3043 u64 total_done = 0;
3044 unsigned int total_idle_objs = 0;
3045 unsigned int total_evicted_objs = 0;
3046 unsigned int total_relocated_objs = 0;
3047 unsigned int total_moved_objs = 0;
3048 unsigned int total_invalidated_objs = 0;
3049 unsigned int total_done_objs = 0;
3050 unsigned int id = 0;
3051
3052 amdgpu_vm_assert_locked(vm);
3053
3054 spin_lock(&vm->status_lock);
3055 seq_puts(m, "\tIdle BOs:\n");
3056 list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
3057 if (!bo_va->base.bo)
3058 continue;
3059 total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
3060 }
3061 total_idle_objs = id;
3062 id = 0;
3063
3064 seq_puts(m, "\tEvicted BOs:\n");
3065 list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
3066 if (!bo_va->base.bo)
3067 continue;
3068 total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
3069 }
3070 total_evicted_objs = id;
3071 id = 0;
3072
3073 seq_puts(m, "\tRelocated BOs:\n");
3074 list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
3075 if (!bo_va->base.bo)
3076 continue;
3077 total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
3078 }
3079 total_relocated_objs = id;
3080 id = 0;
3081
3082 seq_puts(m, "\tMoved BOs:\n");
3083 list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
3084 if (!bo_va->base.bo)
3085 continue;
3086 total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
3087 }
3088 total_moved_objs = id;
3089 id = 0;
3090
3091 seq_puts(m, "\tInvalidated BOs:\n");
3092 list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
3093 if (!bo_va->base.bo)
3094 continue;
3095 total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
3096 }
3097 total_invalidated_objs = id;
3098 id = 0;
3099
3100 seq_puts(m, "\tDone BOs:\n");
3101 list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
3102 if (!bo_va->base.bo)
3103 continue;
3104 total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
3105 }
3106 spin_unlock(&vm->status_lock);
3107 total_done_objs = id;
3108
3109 seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
3110 total_idle_objs);
3111 seq_printf(m, "\tTotal evicted size: %12lld\tobjs:\t%d\n", total_evicted,
3112 total_evicted_objs);
3113 seq_printf(m, "\tTotal relocated size: %12lld\tobjs:\t%d\n", total_relocated,
3114 total_relocated_objs);
3115 seq_printf(m, "\tTotal moved size: %12lld\tobjs:\t%d\n", total_moved,
3116 total_moved_objs);
3117 seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
3118 total_invalidated_objs);
3119 seq_printf(m, "\tTotal done size: %12lld\tobjs:\t%d\n", total_done,
3120 total_done_objs);
3121 }
3122 #endif
3123
3124 /**
3125 * amdgpu_vm_update_fault_cache - update cached fault into.
3126 * @adev: amdgpu device pointer
3127 * @pasid: PASID of the VM
3128 * @addr: Address of the fault
3129 * @status: GPUVM fault status register
3130 * @vmhub: which vmhub got the fault
3131 *
3132 * Cache the fault info for later use by userspace in debugging.
3133 */
amdgpu_vm_update_fault_cache(struct amdgpu_device * adev,unsigned int pasid,uint64_t addr,uint32_t status,unsigned int vmhub)3134 void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
3135 unsigned int pasid,
3136 uint64_t addr,
3137 uint32_t status,
3138 unsigned int vmhub)
3139 {
3140 struct amdgpu_vm *vm;
3141 unsigned long flags;
3142
3143 xa_lock_irqsave(&adev->vm_manager.pasids, flags);
3144
3145 vm = xa_load(&adev->vm_manager.pasids, pasid);
3146 /* Don't update the fault cache if status is 0. In the multiple
3147 * fault case, subsequent faults will return a 0 status which is
3148 * useless for userspace and replaces the useful fault status, so
3149 * only update if status is non-0.
3150 */
3151 if (vm && status) {
3152 vm->fault_info.addr = addr;
3153 vm->fault_info.status = status;
3154 /*
3155 * Update the fault information globally for later usage
3156 * when vm could be stale or freed.
3157 */
3158 adev->vm_manager.fault_info.addr = addr;
3159 adev->vm_manager.fault_info.vmhub = vmhub;
3160 adev->vm_manager.fault_info.status = status;
3161
3162 if (AMDGPU_IS_GFXHUB(vmhub)) {
3163 vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
3164 vm->fault_info.vmhub |=
3165 (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT;
3166 } else if (AMDGPU_IS_MMHUB0(vmhub)) {
3167 vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0;
3168 vm->fault_info.vmhub |=
3169 (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT;
3170 } else if (AMDGPU_IS_MMHUB1(vmhub)) {
3171 vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1;
3172 vm->fault_info.vmhub |=
3173 (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT;
3174 } else {
3175 WARN_ONCE(1, "Invalid vmhub %u\n", vmhub);
3176 }
3177 }
3178 xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
3179 }
3180
3181 /**
3182 * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
3183 *
3184 * @vm: VM to test against.
3185 * @bo: BO to be tested.
3186 *
3187 * Returns true if the BO shares the dma_resv object with the root PD and is
3188 * always guaranteed to be valid inside the VM.
3189 */
amdgpu_vm_is_bo_always_valid(struct amdgpu_vm * vm,struct amdgpu_bo * bo)3190 bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
3191 {
3192 return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
3193 }
3194
amdgpu_vm_print_task_info(struct amdgpu_device * adev,struct amdgpu_task_info * task_info)3195 void amdgpu_vm_print_task_info(struct amdgpu_device *adev,
3196 struct amdgpu_task_info *task_info)
3197 {
3198 dev_err(adev->dev,
3199 " Process %s pid %d thread %s pid %d\n",
3200 task_info->process_name, task_info->tgid,
3201 task_info->task.comm, task_info->task.pid);
3202 }
3203