1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 29 #include <linux/dma-fence-array.h> 30 #include <linux/interval_tree_generic.h> 31 #include <linux/idr.h> 32 #include <linux/dma-buf.h> 33 34 #include <drm/amdgpu_drm.h> 35 #include <drm/drm_drv.h> 36 #include <drm/ttm/ttm_tt.h> 37 #include <drm/drm_exec.h> 38 #include "amdgpu.h" 39 #include "amdgpu_vm.h" 40 #include "amdgpu_trace.h" 41 #include "amdgpu_amdkfd.h" 42 #include "amdgpu_gmc.h" 43 #include "amdgpu_xgmi.h" 44 #include "amdgpu_dma_buf.h" 45 #include "amdgpu_res_cursor.h" 46 #include "kfd_svm.h" 47 48 /** 49 * DOC: GPUVM 50 * 51 * GPUVM is the MMU functionality provided on the GPU. 52 * GPUVM is similar to the legacy GART on older asics, however 53 * rather than there being a single global GART table 54 * for the entire GPU, there can be multiple GPUVM page tables active 55 * at any given time. The GPUVM page tables can contain a mix 56 * VRAM pages and system pages (both memory and MMIO) and system pages 57 * can be mapped as snooped (cached system pages) or unsnooped 58 * (uncached system pages). 59 * 60 * Each active GPUVM has an ID associated with it and there is a page table 61 * linked with each VMID. When executing a command buffer, 62 * the kernel tells the engine what VMID to use for that command 63 * buffer. VMIDs are allocated dynamically as commands are submitted. 64 * The userspace drivers maintain their own address space and the kernel 65 * sets up their pages tables accordingly when they submit their 66 * command buffers and a VMID is assigned. 67 * The hardware supports up to 16 active GPUVMs at any given time. 68 * 69 * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending 70 * on the ASIC family. GPUVM supports RWX attributes on each page as well 71 * as other features such as encryption and caching attributes. 72 * 73 * VMID 0 is special. It is the GPUVM used for the kernel driver. In 74 * addition to an aperture managed by a page table, VMID 0 also has 75 * several other apertures. There is an aperture for direct access to VRAM 76 * and there is a legacy AGP aperture which just forwards accesses directly 77 * to the matching system physical addresses (or IOVAs when an IOMMU is 78 * present). These apertures provide direct access to these memories without 79 * incurring the overhead of a page table. VMID 0 is used by the kernel 80 * driver for tasks like memory management. 81 * 82 * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. 83 * For user applications, each application can have their own unique GPUVM 84 * address space. The application manages the address space and the kernel 85 * driver manages the GPUVM page tables for each process. If an GPU client 86 * accesses an invalid page, it will generate a GPU page fault, similar to 87 * accessing an invalid page on a CPU. 88 */ 89 90 #define START(node) ((node)->start) 91 #define LAST(node) ((node)->last) 92 93 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, 94 START, LAST, static, amdgpu_vm_it) 95 96 #undef START 97 #undef LAST 98 99 /** 100 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback 101 */ 102 struct amdgpu_prt_cb { 103 104 /** 105 * @adev: amdgpu device 106 */ 107 struct amdgpu_device *adev; 108 109 /** 110 * @cb: callback 111 */ 112 struct dma_fence_cb cb; 113 }; 114 115 /** 116 * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence 117 */ 118 struct amdgpu_vm_tlb_seq_struct { 119 /** 120 * @vm: pointer to the amdgpu_vm structure to set the fence sequence on 121 */ 122 struct amdgpu_vm *vm; 123 124 /** 125 * @cb: callback 126 */ 127 struct dma_fence_cb cb; 128 }; 129 130 /** 131 * amdgpu_vm_assert_locked - check if VM is correctly locked 132 * @vm: the VM which schould be tested 133 * 134 * Asserts that the VM root PD is locked. 135 */ 136 static void amdgpu_vm_assert_locked(struct amdgpu_vm *vm) 137 { 138 dma_resv_assert_held(vm->root.bo->tbo.base.resv); 139 } 140 141 /* Initialize the amdgpu_vm_bo_status object */ 142 static void amdgpu_vm_bo_status_init(struct amdgpu_vm_bo_status *lists) 143 { 144 INIT_LIST_HEAD(&lists->evicted); 145 INIT_LIST_HEAD(&lists->moved); 146 INIT_LIST_HEAD(&lists->idle); 147 } 148 149 /* 150 * Make sure we have the lock to modify the vm_bo status and return the object 151 * with the status lists. 152 */ 153 static struct amdgpu_vm_bo_status * 154 amdgpu_vm_bo_lock_lists(struct amdgpu_vm_bo_base *vm_bo) 155 { 156 struct amdgpu_vm *vm = vm_bo->vm; 157 struct amdgpu_bo *bo = vm_bo->bo; 158 159 if (amdgpu_vm_is_bo_always_valid(vm, bo)) { 160 /* No extra locking needed, protected by the root PD resv lock */ 161 amdgpu_vm_assert_locked(vm); 162 163 if (bo->tbo.type == ttm_bo_type_kernel) 164 return &vm->kernel; 165 166 return &vm->always_valid; 167 } 168 169 spin_lock(&vm_bo->vm->individual_lock); 170 return &vm->individual; 171 } 172 173 /* Eventually unlock the status list lock again */ 174 static void amdgpu_vm_bo_unlock_lists(struct amdgpu_vm_bo_base *vm_bo) 175 { 176 if (amdgpu_vm_is_bo_always_valid(vm_bo->vm, vm_bo->bo)) 177 amdgpu_vm_assert_locked(vm_bo->vm); 178 else 179 spin_unlock(&vm_bo->vm->individual_lock); 180 } 181 182 /** 183 * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid 184 * 185 * @vm: VM to test against. 186 * @bo: BO to be tested. 187 * 188 * Returns true if the BO shares the dma_resv object with the root PD and is 189 * always guaranteed to be valid inside the VM. 190 */ 191 bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) 192 { 193 return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; 194 } 195 196 /** 197 * amdgpu_vm_bo_evicted - vm_bo is evicted 198 * 199 * @vm_bo: vm_bo which is evicted 200 * 201 * State for vm_bo objects meaning the underlying BO was evicted and need to 202 * move in place again. 203 */ 204 static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) 205 { 206 struct amdgpu_vm_bo_status *lists; 207 208 lists = amdgpu_vm_bo_lock_lists(vm_bo); 209 vm_bo->moved = true; 210 list_move(&vm_bo->vm_status, &lists->evicted); 211 amdgpu_vm_bo_unlock_lists(vm_bo); 212 } 213 /** 214 * amdgpu_vm_bo_moved - vm_bo is moved 215 * 216 * @vm_bo: vm_bo which is moved 217 * 218 * State for vm_bo objects meaning the underlying BO was moved but the new 219 * location not yet reflected in the page tables. 220 */ 221 static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) 222 { 223 struct amdgpu_vm_bo_status *lists; 224 struct amdgpu_bo *bo = vm_bo->bo; 225 226 /* 227 * The root PD doesn't have a parent PDE and goes directly into the 228 * idle state. 229 */ 230 lists = amdgpu_vm_bo_lock_lists(vm_bo); 231 if (bo && bo->tbo.type == ttm_bo_type_kernel && !bo->parent) { 232 vm_bo->moved = false; 233 list_move(&vm_bo->vm_status, &lists->idle); 234 } else { 235 vm_bo->moved = true; 236 list_move(&vm_bo->vm_status, &lists->moved); 237 } 238 amdgpu_vm_bo_unlock_lists(vm_bo); 239 } 240 241 /** 242 * amdgpu_vm_bo_idle - vm_bo is idle 243 * 244 * @vm_bo: vm_bo which is now idle 245 * 246 * State for vm_bo objects meaning we are done with the state machine and no 247 * further action is necessary. 248 */ 249 static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) 250 { 251 struct amdgpu_vm_bo_status *lists; 252 253 lists = amdgpu_vm_bo_lock_lists(vm_bo); 254 if (!amdgpu_vm_is_bo_always_valid(vm_bo->vm, vm_bo->bo)) 255 vm_bo->moved = false; 256 list_move(&vm_bo->vm_status, &lists->idle); 257 amdgpu_vm_bo_unlock_lists(vm_bo); 258 } 259 260 /** 261 * amdgpu_vm_bo_reset_state_machine - reset the vm_bo state machine 262 * @vm: the VM which state machine to reset 263 * 264 * Move all vm_bo object in the VM into a state where their location will be 265 * updated in the page tables again. 266 */ 267 static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) 268 { 269 amdgpu_vm_assert_locked(vm); 270 list_splice_init(&vm->kernel.idle, &vm->kernel.moved); 271 list_splice_init(&vm->always_valid.idle, &vm->always_valid.moved); 272 273 spin_lock(&vm->individual_lock); 274 list_splice_init(&vm->individual.idle, &vm->individual.moved); 275 spin_unlock(&vm->individual_lock); 276 } 277 278 /** 279 * amdgpu_vm_update_shared - helper to update shared memory stat 280 * @base: base structure for tracking BO usage in a VM 281 * 282 * Takes the vm stats_lock and updates the shared memory stat. If the basic 283 * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called 284 * as well. 285 */ 286 static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) 287 { 288 struct amdgpu_vm *vm = base->vm; 289 struct amdgpu_bo *bo = base->bo; 290 uint64_t size = amdgpu_bo_size(bo); 291 uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); 292 bool shared; 293 294 dma_resv_assert_held(bo->tbo.base.resv); 295 spin_lock(&vm->stats_lock); 296 shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); 297 if (base->shared != shared) { 298 base->shared = shared; 299 if (shared) { 300 vm->stats[bo_memtype].drm.shared += size; 301 vm->stats[bo_memtype].drm.private -= size; 302 } else { 303 vm->stats[bo_memtype].drm.shared -= size; 304 vm->stats[bo_memtype].drm.private += size; 305 } 306 } 307 spin_unlock(&vm->stats_lock); 308 } 309 310 /** 311 * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared 312 * @bo: amdgpu buffer object 313 * 314 * Update the per VM stats for all the vm if needed from private to shared or 315 * vice versa. 316 */ 317 void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo) 318 { 319 struct amdgpu_vm_bo_base *base; 320 321 for (base = bo->vm_bo; base; base = base->next) 322 amdgpu_vm_update_shared(base); 323 } 324 325 /** 326 * amdgpu_vm_update_stats_locked - helper to update normal memory stat 327 * @base: base structure for tracking BO usage in a VM 328 * @res: the ttm_resource to use for the purpose of accounting, may or may not 329 * be bo->tbo.resource 330 * @sign: if we should add (+1) or subtract (-1) from the stat 331 * 332 * Caller need to have the vm stats_lock held. Useful for when multiple update 333 * need to happen at the same time. 334 */ 335 static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, 336 struct ttm_resource *res, int sign) 337 { 338 struct amdgpu_vm *vm = base->vm; 339 struct amdgpu_bo *bo = base->bo; 340 int64_t size = sign * amdgpu_bo_size(bo); 341 uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); 342 343 /* For drm-total- and drm-shared-, BO are accounted by their preferred 344 * placement, see also amdgpu_bo_mem_stats_placement. 345 */ 346 if (base->shared) 347 vm->stats[bo_memtype].drm.shared += size; 348 else 349 vm->stats[bo_memtype].drm.private += size; 350 351 if (res && res->mem_type < __AMDGPU_PL_NUM) { 352 uint32_t res_memtype = res->mem_type; 353 354 vm->stats[res_memtype].drm.resident += size; 355 /* BO only count as purgeable if it is resident, 356 * since otherwise there's nothing to purge. 357 */ 358 if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) 359 vm->stats[res_memtype].drm.purgeable += size; 360 if (!(bo->preferred_domains & 361 amdgpu_mem_type_to_domain(res_memtype))) 362 vm->stats[bo_memtype].evicted += size; 363 } 364 } 365 366 /** 367 * amdgpu_vm_update_stats - helper to update normal memory stat 368 * @base: base structure for tracking BO usage in a VM 369 * @res: the ttm_resource to use for the purpose of accounting, may or may not 370 * be bo->tbo.resource 371 * @sign: if we should add (+1) or subtract (-1) from the stat 372 * 373 * Updates the basic memory stat when bo is added/deleted/moved. 374 */ 375 void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, 376 struct ttm_resource *res, int sign) 377 { 378 struct amdgpu_vm *vm = base->vm; 379 380 spin_lock(&vm->stats_lock); 381 amdgpu_vm_update_stats_locked(base, res, sign); 382 spin_unlock(&vm->stats_lock); 383 } 384 385 /** 386 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm 387 * 388 * @base: base structure for tracking BO usage in a VM 389 * @vm: vm to which bo is to be added 390 * @bo: amdgpu buffer object 391 * 392 * Initialize a bo_va_base structure and add it to the appropriate lists 393 * 394 */ 395 void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, 396 struct amdgpu_vm *vm, struct amdgpu_bo *bo) 397 { 398 base->vm = vm; 399 base->bo = bo; 400 base->next = NULL; 401 INIT_LIST_HEAD(&base->vm_status); 402 403 dma_resv_assert_held(vm->root.bo->tbo.base.resv); 404 if (!bo) 405 return; 406 407 base->next = bo->vm_bo; 408 bo->vm_bo = base; 409 410 spin_lock(&vm->stats_lock); 411 base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); 412 amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); 413 spin_unlock(&vm->stats_lock); 414 415 if (!amdgpu_vm_is_bo_always_valid(vm, bo)) { 416 amdgpu_vm_bo_idle(base); 417 return; 418 } 419 420 ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move); 421 422 /* 423 * When a per VM isn't in the desired domain put it into the evicted 424 * state to make sure that it gets validated on the next best occasion. 425 */ 426 if (bo->preferred_domains & 427 amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type)) 428 amdgpu_vm_bo_moved(base); 429 else 430 amdgpu_vm_bo_evicted(base); 431 } 432 433 /** 434 * amdgpu_vm_lock_pd - lock PD in drm_exec 435 * 436 * @vm: vm providing the BOs 437 * @exec: drm execution context 438 * @num_fences: number of extra fences to reserve 439 * 440 * Lock the VM root PD in the DRM execution context. 441 */ 442 int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, 443 unsigned int num_fences) 444 { 445 /* We need at least two fences for the VM PD/PT updates */ 446 return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base, 447 2 + num_fences); 448 } 449 450 /** 451 * amdgpu_vm_lock_individual - lock all BOs on the individual idle list 452 * @vm: vm providing the BOs 453 * @exec: drm execution context 454 * @num_fences: number of extra fences to reserve 455 * 456 * Lock the BOs on the individual idle list in the DRM execution context. 457 */ 458 int amdgpu_vm_lock_individual(struct amdgpu_vm *vm, struct drm_exec *exec, 459 unsigned int num_fences) 460 { 461 struct list_head *prev = &vm->individual.idle; 462 struct amdgpu_bo_va *bo_va; 463 struct amdgpu_bo *bo; 464 int ret; 465 466 /* We can only trust prev->next while holding the lock */ 467 spin_lock(&vm->individual_lock); 468 while (!list_is_head(prev->next, &vm->individual.idle)) { 469 bo_va = list_entry(prev->next, typeof(*bo_va), base.vm_status); 470 471 bo = bo_va->base.bo; 472 if (bo) { 473 amdgpu_bo_ref(bo); 474 spin_unlock(&vm->individual_lock); 475 476 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 1); 477 amdgpu_bo_unref(&bo); 478 if (unlikely(ret)) 479 return ret; 480 481 spin_lock(&vm->individual_lock); 482 } 483 prev = prev->next; 484 } 485 spin_unlock(&vm->individual_lock); 486 487 return 0; 488 } 489 490 /** 491 * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU 492 * 493 * @adev: amdgpu device pointer 494 * @vm: vm providing the BOs 495 * 496 * Move all BOs to the end of LRU and remember their positions to put them 497 * together. 498 */ 499 void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, 500 struct amdgpu_vm *vm) 501 { 502 spin_lock(&adev->mman.bdev.lru_lock); 503 ttm_lru_bulk_move_tail(&vm->lru_bulk_move); 504 spin_unlock(&adev->mman.bdev.lru_lock); 505 } 506 507 /* Create scheduler entities for page table updates */ 508 static int amdgpu_vm_init_entities(struct amdgpu_device *adev, 509 struct amdgpu_vm *vm) 510 { 511 int r; 512 513 r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, 514 adev->vm_manager.vm_pte_scheds, 515 adev->vm_manager.vm_pte_num_scheds, NULL); 516 if (r) 517 goto error; 518 519 return drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, 520 adev->vm_manager.vm_pte_scheds, 521 adev->vm_manager.vm_pte_num_scheds, NULL); 522 523 error: 524 drm_sched_entity_destroy(&vm->immediate); 525 return r; 526 } 527 528 /* Destroy the entities for page table updates again */ 529 static void amdgpu_vm_fini_entities(struct amdgpu_vm *vm) 530 { 531 drm_sched_entity_destroy(&vm->immediate); 532 drm_sched_entity_destroy(&vm->delayed); 533 } 534 535 /** 536 * amdgpu_vm_generation - return the page table re-generation counter 537 * @adev: the amdgpu_device 538 * @vm: optional VM to check, might be NULL 539 * 540 * Returns a page table re-generation token to allow checking if submissions 541 * are still valid to use this VM. The VM parameter might be NULL in which case 542 * just the VRAM lost counter will be used. 543 */ 544 uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) 545 { 546 uint64_t result = (u64)atomic_read(&adev->vram_lost_counter) << 32; 547 548 if (!vm) 549 return result; 550 551 result += lower_32_bits(vm->generation); 552 /* Add one if the page tables will be re-generated on next CS */ 553 if (drm_sched_entity_error(&vm->delayed)) 554 ++result; 555 556 return result; 557 } 558 559 /** 560 * amdgpu_vm_validate - validate evicted BOs tracked in the VM 561 * 562 * @adev: amdgpu device pointer 563 * @vm: vm providing the BOs 564 * @ticket: optional reservation ticket used to reserve the VM 565 * @validate: callback to do the validation 566 * @param: parameter for the validation callback 567 * 568 * Validate the page table BOs and per-VM BOs on command submission if 569 * necessary. If a ticket is given, also try to validate evicted user queue 570 * BOs. They must already be reserved with the given ticket. 571 * 572 * Returns: 573 * Validation result. 574 */ 575 int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, 576 struct ww_acquire_ctx *ticket, 577 int (*validate)(void *p, struct amdgpu_bo *bo), 578 void *param) 579 { 580 uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm); 581 struct amdgpu_vm_bo_base *bo_base, *tmp; 582 int r; 583 584 dma_resv_assert_held(vm->root.bo->tbo.base.resv); 585 if (vm->generation != new_vm_generation) { 586 vm->generation = new_vm_generation; 587 amdgpu_vm_bo_reset_state_machine(vm); 588 amdgpu_vm_fini_entities(vm); 589 r = amdgpu_vm_init_entities(adev, vm); 590 if (r) 591 return r; 592 } 593 594 list_for_each_entry_safe(bo_base, tmp, &vm->kernel.evicted, vm_status) { 595 r = validate(param, bo_base->bo); 596 if (r) 597 return r; 598 599 vm->update_funcs->map_table(to_amdgpu_bo_vm(bo_base->bo)); 600 amdgpu_vm_bo_moved(bo_base); 601 } 602 603 /* 604 * As soon as all page tables are in place we can start updating them 605 * again. 606 */ 607 amdgpu_vm_eviction_lock(vm); 608 vm->evicting = false; 609 amdgpu_vm_eviction_unlock(vm); 610 611 list_for_each_entry_safe(bo_base, tmp, &vm->always_valid.evicted, 612 vm_status) { 613 r = validate(param, bo_base->bo); 614 if (r) 615 return r; 616 617 amdgpu_vm_bo_moved(bo_base); 618 } 619 620 if (!ticket) 621 return 0; 622 623 spin_lock(&vm->individual_lock); 624 restart: 625 list_for_each_entry(bo_base, &vm->individual.evicted, vm_status) { 626 struct amdgpu_bo *bo = bo_base->bo; 627 628 if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) 629 continue; 630 631 spin_unlock(&vm->individual_lock); 632 633 r = validate(param, bo); 634 if (r) 635 return r; 636 637 amdgpu_vm_bo_moved(bo_base); 638 639 /* It's a bit inefficient to always jump back to the start, but 640 * we would need to re-structure the KFD for properly fixing 641 * that. 642 */ 643 spin_lock(&vm->individual_lock); 644 goto restart; 645 } 646 spin_unlock(&vm->individual_lock); 647 648 return 0; 649 } 650 651 /** 652 * amdgpu_vm_ready - check VM is ready for updates 653 * 654 * @vm: VM to check 655 * 656 * Check if all VM PDs/PTs are ready for updates 657 * 658 * Returns: 659 * True if VM is not evicting and all VM entities are not stopped 660 */ 661 bool amdgpu_vm_ready(struct amdgpu_vm *vm) 662 { 663 bool ret; 664 665 amdgpu_vm_assert_locked(vm); 666 667 amdgpu_vm_eviction_lock(vm); 668 ret = !vm->evicting; 669 amdgpu_vm_eviction_unlock(vm); 670 671 ret &= list_empty(&vm->kernel.evicted); 672 673 spin_lock(&vm->immediate.lock); 674 ret &= !vm->immediate.stopped; 675 spin_unlock(&vm->immediate.lock); 676 677 spin_lock(&vm->delayed.lock); 678 ret &= !vm->delayed.stopped; 679 spin_unlock(&vm->delayed.lock); 680 681 return ret; 682 } 683 684 /** 685 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug 686 * 687 * @adev: amdgpu_device pointer 688 */ 689 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) 690 { 691 const struct amdgpu_ip_block *ip_block; 692 bool has_compute_vm_bug; 693 struct amdgpu_ring *ring; 694 int i; 695 696 has_compute_vm_bug = false; 697 698 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 699 if (ip_block) { 700 /* Compute has a VM bug for GFX version < 7. 701 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ 702 if (ip_block->version->major <= 7) 703 has_compute_vm_bug = true; 704 else if (ip_block->version->major == 8) 705 if (adev->gfx.mec_fw_version < 673) 706 has_compute_vm_bug = true; 707 } 708 709 for (i = 0; i < adev->num_rings; i++) { 710 ring = adev->rings[i]; 711 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 712 /* only compute rings */ 713 ring->has_compute_vm_bug = has_compute_vm_bug; 714 else 715 ring->has_compute_vm_bug = false; 716 } 717 } 718 719 /** 720 * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. 721 * 722 * @ring: ring on which the job will be submitted 723 * @job: job to submit 724 * 725 * Returns: 726 * True if sync is needed. 727 */ 728 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 729 struct amdgpu_job *job) 730 { 731 struct amdgpu_device *adev = ring->adev; 732 unsigned vmhub = ring->vm_hub; 733 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 734 735 if (job->vmid == 0) 736 return false; 737 738 if (job->vm_needs_flush || ring->has_compute_vm_bug) 739 return true; 740 741 if (ring->funcs->emit_gds_switch && job->gds_switch_needed) 742 return true; 743 744 if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) 745 return true; 746 747 return false; 748 } 749 750 /** 751 * amdgpu_vm_flush - hardware flush the vm 752 * 753 * @ring: ring to use for flush 754 * @job: related job 755 * @need_pipe_sync: is pipe sync needed 756 * 757 * Emit a VM flush when it is necessary. 758 */ 759 void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, 760 bool need_pipe_sync) 761 { 762 struct amdgpu_device *adev = ring->adev; 763 struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; 764 unsigned vmhub = ring->vm_hub; 765 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 766 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; 767 bool spm_update_needed = job->spm_update_needed; 768 bool gds_switch_needed = ring->funcs->emit_gds_switch && 769 job->gds_switch_needed; 770 bool vm_flush_needed = job->vm_needs_flush; 771 bool cleaner_shader_needed = false; 772 bool pasid_mapping_needed = false; 773 struct dma_fence *fence = NULL; 774 unsigned int patch = 0; 775 776 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 777 gds_switch_needed = true; 778 vm_flush_needed = true; 779 pasid_mapping_needed = true; 780 spm_update_needed = true; 781 } 782 783 mutex_lock(&id_mgr->lock); 784 if (id->pasid != job->pasid || !id->pasid_mapping || 785 !dma_fence_is_signaled(id->pasid_mapping)) 786 pasid_mapping_needed = true; 787 mutex_unlock(&id_mgr->lock); 788 789 gds_switch_needed &= !!ring->funcs->emit_gds_switch; 790 vm_flush_needed &= !!ring->funcs->emit_vm_flush && 791 job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; 792 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 793 ring->funcs->emit_wreg; 794 795 cleaner_shader_needed = job->run_cleaner_shader && 796 adev->gfx.enable_cleaner_shader && 797 ring->funcs->emit_cleaner_shader && job->base.s_fence && 798 &job->base.s_fence->scheduled == isolation->spearhead; 799 800 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && 801 !cleaner_shader_needed) 802 return; 803 804 amdgpu_ring_ib_begin(ring); 805 806 /* There is no matching insert_end for this on purpose for the vm flush. 807 * The IB portion of the submission has both. Having multiple 808 * insert_start sequences is ok, but you can only have one insert_end 809 * per submission based on the way VCN FW works. For JPEG 810 * you can as many insert_start and insert_end sequences as you like as 811 * long as the rest of the packets come between start and end sequences. 812 */ 813 if (ring->funcs->insert_start) 814 ring->funcs->insert_start(ring); 815 816 if (ring->funcs->init_cond_exec) 817 patch = amdgpu_ring_init_cond_exec(ring, 818 ring->cond_exe_gpu_addr); 819 820 if (need_pipe_sync) 821 amdgpu_ring_emit_pipeline_sync(ring); 822 823 if (cleaner_shader_needed) 824 ring->funcs->emit_cleaner_shader(ring); 825 826 if (vm_flush_needed) { 827 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 828 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 829 } 830 831 if (pasid_mapping_needed) 832 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 833 834 if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) 835 adev->gfx.rlc.funcs->update_spm_vmid(adev, ring->xcc_id, ring, job->vmid); 836 837 if (ring->funcs->emit_gds_switch && 838 gds_switch_needed) { 839 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 840 job->gds_size, job->gws_base, 841 job->gws_size, job->oa_base, 842 job->oa_size); 843 } 844 845 if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { 846 amdgpu_fence_emit(ring, job->hw_vm_fence, 0); 847 fence = &job->hw_vm_fence->base; 848 /* get a ref for the job */ 849 dma_fence_get(fence); 850 } 851 852 if (vm_flush_needed) { 853 mutex_lock(&id_mgr->lock); 854 dma_fence_put(id->last_flush); 855 id->last_flush = dma_fence_get(fence); 856 id->current_gpu_reset_count = 857 atomic_read(&adev->gpu_reset_counter); 858 mutex_unlock(&id_mgr->lock); 859 } 860 861 if (pasid_mapping_needed) { 862 mutex_lock(&id_mgr->lock); 863 id->pasid = job->pasid; 864 dma_fence_put(id->pasid_mapping); 865 id->pasid_mapping = dma_fence_get(fence); 866 mutex_unlock(&id_mgr->lock); 867 } 868 869 /* 870 * Make sure that all other submissions wait for the cleaner shader to 871 * finish before we push them to the HW. 872 */ 873 if (cleaner_shader_needed) { 874 trace_amdgpu_cleaner_shader(ring, fence); 875 mutex_lock(&adev->enforce_isolation_mutex); 876 dma_fence_put(isolation->spearhead); 877 isolation->spearhead = dma_fence_get(fence); 878 mutex_unlock(&adev->enforce_isolation_mutex); 879 } 880 dma_fence_put(fence); 881 882 amdgpu_ring_patch_cond_exec(ring, patch); 883 884 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ 885 if (ring->funcs->emit_switch_buffer) { 886 amdgpu_ring_emit_switch_buffer(ring); 887 amdgpu_ring_emit_switch_buffer(ring); 888 } 889 890 amdgpu_ring_ib_end(ring); 891 } 892 893 /** 894 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 895 * 896 * @vm: requested vm 897 * @bo: requested buffer object 898 * 899 * Find @bo inside the requested vm. 900 * Search inside the @bos vm list for the requested vm 901 * Returns the found bo_va or NULL if none is found 902 * 903 * Object has to be reserved! 904 * 905 * Returns: 906 * Found bo_va or NULL. 907 */ 908 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 909 struct amdgpu_bo *bo) 910 { 911 struct amdgpu_vm_bo_base *base; 912 913 for (base = bo->vm_bo; base; base = base->next) { 914 if (base->vm != vm) 915 continue; 916 917 return container_of(base, struct amdgpu_bo_va, base); 918 } 919 return NULL; 920 } 921 922 /** 923 * amdgpu_vm_map_gart - Resolve gart mapping of addr 924 * 925 * @pages_addr: optional DMA address to use for lookup 926 * @addr: the unmapped addr 927 * 928 * Look up the physical address of the page that the pte resolves 929 * to. 930 * 931 * Returns: 932 * The pointer for the page table entry. 933 */ 934 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 935 { 936 uint64_t result; 937 938 /* page table offset */ 939 result = pages_addr[addr >> PAGE_SHIFT]; 940 941 /* in case cpu page size != gpu page size*/ 942 result |= addr & (~PAGE_MASK); 943 944 result &= 0xFFFFFFFFFFFFF000ULL; 945 946 return result; 947 } 948 949 /** 950 * amdgpu_vm_update_pdes - make sure that all directories are valid 951 * 952 * @adev: amdgpu_device pointer 953 * @vm: requested vm 954 * @immediate: submit immediately to the paging queue 955 * 956 * Makes sure all directories are up to date. 957 * 958 * Returns: 959 * 0 for success, error for failure. 960 */ 961 int amdgpu_vm_update_pdes(struct amdgpu_device *adev, 962 struct amdgpu_vm *vm, bool immediate) 963 { 964 struct amdgpu_vm_update_params params; 965 struct amdgpu_vm_bo_base *entry, *tmp; 966 bool flush_tlb_needed = false; 967 int r, idx; 968 969 amdgpu_vm_assert_locked(vm); 970 971 if (list_empty(&vm->kernel.moved)) 972 return 0; 973 974 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 975 return -ENODEV; 976 977 memset(¶ms, 0, sizeof(params)); 978 params.adev = adev; 979 params.vm = vm; 980 params.immediate = immediate; 981 982 r = vm->update_funcs->prepare(¶ms, NULL, 983 AMDGPU_KERNEL_JOB_ID_VM_UPDATE_PDES); 984 if (r) 985 goto error; 986 987 list_for_each_entry(entry, &vm->kernel.moved, vm_status) { 988 /* vm_flush_needed after updating moved PDEs */ 989 flush_tlb_needed |= entry->moved; 990 991 r = amdgpu_vm_pde_update(¶ms, entry); 992 if (r) 993 goto error; 994 } 995 996 r = vm->update_funcs->commit(¶ms, &vm->last_update); 997 if (r) 998 goto error; 999 1000 if (flush_tlb_needed) 1001 atomic64_inc(&vm->tlb_seq); 1002 1003 list_for_each_entry_safe(entry, tmp, &vm->kernel.moved, vm_status) 1004 amdgpu_vm_bo_idle(entry); 1005 1006 error: 1007 drm_dev_exit(idx); 1008 return r; 1009 } 1010 1011 /** 1012 * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence 1013 * @fence: unused 1014 * @cb: the callback structure 1015 * 1016 * Increments the tlb sequence to make sure that future CS execute a VM flush. 1017 */ 1018 static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, 1019 struct dma_fence_cb *cb) 1020 { 1021 struct amdgpu_vm_tlb_seq_struct *tlb_cb; 1022 1023 tlb_cb = container_of(cb, typeof(*tlb_cb), cb); 1024 atomic64_inc(&tlb_cb->vm->tlb_seq); 1025 kfree(tlb_cb); 1026 } 1027 1028 /** 1029 * amdgpu_vm_tlb_flush - prepare TLB flush 1030 * 1031 * @params: parameters for update 1032 * @fence: input fence to sync TLB flush with 1033 * @tlb_cb: the callback structure 1034 * 1035 * Increments the tlb sequence to make sure that future CS execute a VM flush. 1036 */ 1037 static void 1038 amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, 1039 struct dma_fence **fence, 1040 struct amdgpu_vm_tlb_seq_struct *tlb_cb) 1041 { 1042 struct amdgpu_vm *vm = params->vm; 1043 1044 tlb_cb->vm = vm; 1045 if (!fence || !*fence) { 1046 amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); 1047 return; 1048 } 1049 1050 if (!dma_fence_add_callback(*fence, &tlb_cb->cb, 1051 amdgpu_vm_tlb_seq_cb)) { 1052 dma_fence_put(vm->last_tlb_flush); 1053 vm->last_tlb_flush = dma_fence_get(*fence); 1054 } else { 1055 amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); 1056 } 1057 1058 /* Prepare a TLB flush fence to be attached to PTs */ 1059 /* The check for need_tlb_fence should be dropped once we 1060 * sort out the issues with KIQ/MES TLB invalidation timeouts. 1061 */ 1062 if (!params->unlocked && vm->need_tlb_fence) { 1063 amdgpu_vm_tlb_fence_create(params->adev, vm, fence); 1064 1065 /* Makes sure no PD/PT is freed before the flush */ 1066 dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, 1067 DMA_RESV_USAGE_BOOKKEEP); 1068 } 1069 } 1070 1071 /** 1072 * amdgpu_vm_update_range - update a range in the vm page table 1073 * 1074 * @adev: amdgpu_device pointer to use for commands 1075 * @vm: the VM to update the range 1076 * @immediate: immediate submission in a page fault 1077 * @unlocked: unlocked invalidation during MM callback 1078 * @flush_tlb: trigger tlb invalidation after update completed 1079 * @allow_override: change MTYPE for local NUMA nodes 1080 * @sync: fences we need to sync to 1081 * @start: start of mapped range 1082 * @last: last mapped entry 1083 * @flags: flags for the entries 1084 * @offset: offset into nodes and pages_addr 1085 * @vram_base: base for vram mappings 1086 * @res: ttm_resource to map 1087 * @pages_addr: DMA addresses to use for mapping 1088 * @fence: optional resulting fence 1089 * 1090 * Fill in the page table entries between @start and @last. 1091 * 1092 * Returns: 1093 * 0 for success, negative erro code for failure. 1094 */ 1095 int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1096 bool immediate, bool unlocked, bool flush_tlb, 1097 bool allow_override, struct amdgpu_sync *sync, 1098 uint64_t start, uint64_t last, uint64_t flags, 1099 uint64_t offset, uint64_t vram_base, 1100 struct ttm_resource *res, dma_addr_t *pages_addr, 1101 struct dma_fence **fence) 1102 { 1103 struct amdgpu_vm_tlb_seq_struct *tlb_cb; 1104 struct amdgpu_vm_update_params params; 1105 struct amdgpu_res_cursor cursor; 1106 int r, idx; 1107 1108 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 1109 return -ENODEV; 1110 1111 tlb_cb = kmalloc_obj(*tlb_cb); 1112 if (!tlb_cb) { 1113 drm_dev_exit(idx); 1114 return -ENOMEM; 1115 } 1116 1117 /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, 1118 * heavy-weight flush TLB unconditionally. 1119 */ 1120 flush_tlb |= adev->gmc.xgmi.num_physical_nodes && 1121 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0); 1122 1123 /* 1124 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB 1125 */ 1126 flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0); 1127 1128 memset(¶ms, 0, sizeof(params)); 1129 params.adev = adev; 1130 params.vm = vm; 1131 params.immediate = immediate; 1132 params.pages_addr = pages_addr; 1133 params.unlocked = unlocked; 1134 params.needs_flush = flush_tlb; 1135 params.override_pte = allow_override && adev->gmc.override_pte; 1136 INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); 1137 1138 amdgpu_vm_eviction_lock(vm); 1139 if (vm->evicting) { 1140 r = -EBUSY; 1141 goto error_free; 1142 } 1143 1144 if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { 1145 struct dma_fence *tmp = dma_fence_get_stub(); 1146 1147 amdgpu_bo_fence(vm->root.bo, vm->last_unlocked, true); 1148 swap(vm->last_unlocked, tmp); 1149 dma_fence_put(tmp); 1150 } 1151 1152 r = vm->update_funcs->prepare(¶ms, sync, 1153 AMDGPU_KERNEL_JOB_ID_VM_UPDATE_RANGE); 1154 if (r) 1155 goto error_free; 1156 1157 amdgpu_res_first(pages_addr ? NULL : res, offset, 1158 (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); 1159 while (cursor.remaining) { 1160 uint64_t tmp, num_entries, addr; 1161 1162 num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT; 1163 if (pages_addr) { 1164 bool contiguous = true; 1165 1166 if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { 1167 uint64_t pfn = cursor.start >> PAGE_SHIFT; 1168 uint64_t count; 1169 1170 contiguous = pages_addr[pfn + 1] == 1171 pages_addr[pfn] + PAGE_SIZE; 1172 1173 tmp = num_entries / 1174 AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1175 for (count = 2; count < tmp; ++count) { 1176 uint64_t idx = pfn + count; 1177 1178 if (contiguous != (pages_addr[idx] == 1179 pages_addr[idx - 1] + PAGE_SIZE)) 1180 break; 1181 } 1182 if (!contiguous) 1183 count--; 1184 num_entries = count * 1185 AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1186 } 1187 1188 if (!contiguous) { 1189 addr = cursor.start; 1190 params.pages_addr = pages_addr; 1191 } else { 1192 addr = pages_addr[cursor.start >> PAGE_SHIFT]; 1193 params.pages_addr = NULL; 1194 } 1195 1196 } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) { 1197 addr = vram_base + cursor.start; 1198 } else { 1199 addr = 0; 1200 } 1201 1202 tmp = start + num_entries; 1203 r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags); 1204 if (r) 1205 goto error_free; 1206 1207 amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); 1208 start = tmp; 1209 } 1210 1211 r = vm->update_funcs->commit(¶ms, fence); 1212 if (r) 1213 goto error_free; 1214 1215 if (params.needs_flush) { 1216 amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); 1217 tlb_cb = NULL; 1218 } 1219 1220 amdgpu_vm_pt_free_list(adev, ¶ms); 1221 1222 error_free: 1223 kfree(tlb_cb); 1224 amdgpu_vm_eviction_unlock(vm); 1225 drm_dev_exit(idx); 1226 return r; 1227 } 1228 1229 void amdgpu_vm_get_memory(struct amdgpu_vm *vm, 1230 struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) 1231 { 1232 spin_lock(&vm->stats_lock); 1233 memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); 1234 spin_unlock(&vm->stats_lock); 1235 } 1236 1237 /** 1238 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 1239 * 1240 * @adev: amdgpu_device pointer 1241 * @bo_va: requested BO and VM object 1242 * @clear: if true clear the entries 1243 * 1244 * Fill in the page table entries for @bo_va. 1245 * 1246 * Returns: 1247 * 0 for success, -EINVAL for failure. 1248 */ 1249 int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, 1250 bool clear) 1251 { 1252 struct amdgpu_bo *bo = bo_va->base.bo; 1253 struct amdgpu_vm *vm = bo_va->base.vm; 1254 struct amdgpu_bo_va_mapping *mapping; 1255 struct dma_fence **last_update; 1256 dma_addr_t *pages_addr = NULL; 1257 struct ttm_resource *mem; 1258 struct amdgpu_sync sync; 1259 bool flush_tlb = clear; 1260 uint64_t vram_base; 1261 uint64_t flags; 1262 bool uncached; 1263 int r; 1264 1265 amdgpu_sync_create(&sync); 1266 if (clear) { 1267 mem = NULL; 1268 1269 /* Implicitly sync to command submissions in the same VM before 1270 * unmapping. 1271 */ 1272 r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv, 1273 AMDGPU_SYNC_EQ_OWNER, vm); 1274 if (r) 1275 goto error_free; 1276 if (bo) { 1277 r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv); 1278 if (r) 1279 goto error_free; 1280 } 1281 } else if (!bo) { 1282 mem = NULL; 1283 1284 /* PRT map operations don't need to sync to anything. */ 1285 1286 } else { 1287 struct drm_gem_object *obj = &bo->tbo.base; 1288 1289 if (drm_gem_is_imported(obj) && bo_va->is_xgmi) { 1290 struct dma_buf *dma_buf = obj->import_attach->dmabuf; 1291 struct drm_gem_object *gobj = dma_buf->priv; 1292 struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); 1293 1294 if (abo->tbo.resource && 1295 abo->tbo.resource->mem_type == TTM_PL_VRAM) 1296 bo = gem_to_amdgpu_bo(gobj); 1297 } 1298 mem = bo->tbo.resource; 1299 if (mem && (mem->mem_type == TTM_PL_TT || 1300 mem->mem_type == AMDGPU_PL_PREEMPT)) 1301 pages_addr = bo->tbo.ttm->dma_address; 1302 1303 /* Implicitly sync to moving fences before mapping anything */ 1304 r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, 1305 AMDGPU_SYNC_EXPLICIT, vm); 1306 if (r) 1307 goto error_free; 1308 } 1309 1310 if (bo) { 1311 struct amdgpu_device *bo_adev; 1312 1313 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); 1314 1315 if (amdgpu_bo_encrypted(bo)) 1316 flags |= AMDGPU_PTE_TMZ; 1317 1318 bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); 1319 vram_base = bo_adev->vm_manager.vram_base_offset; 1320 uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0; 1321 } else { 1322 flags = 0x0; 1323 vram_base = 0; 1324 uncached = false; 1325 } 1326 1327 if (clear || amdgpu_vm_is_bo_always_valid(vm, bo)) 1328 last_update = &vm->last_update; 1329 else 1330 last_update = &bo_va->last_pt_update; 1331 1332 if (!clear && bo_va->base.moved) { 1333 flush_tlb = true; 1334 list_splice_init(&bo_va->valids, &bo_va->invalids); 1335 1336 } else if (bo_va->cleared != clear) { 1337 list_splice_init(&bo_va->valids, &bo_va->invalids); 1338 } 1339 1340 list_for_each_entry(mapping, &bo_va->invalids, list) { 1341 uint64_t update_flags = flags; 1342 1343 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1344 * but in case of something, we filter the flags in first place 1345 */ 1346 if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE)) 1347 update_flags &= ~AMDGPU_PTE_READABLE; 1348 if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE)) 1349 update_flags &= ~AMDGPU_PTE_WRITEABLE; 1350 1351 /* Apply ASIC specific mapping flags */ 1352 amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags, 1353 &update_flags); 1354 1355 trace_amdgpu_vm_bo_update(mapping); 1356 1357 r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, 1358 !uncached, &sync, mapping->start, 1359 mapping->last, update_flags, 1360 mapping->offset, vram_base, mem, 1361 pages_addr, last_update); 1362 if (r) 1363 goto error_free; 1364 } 1365 1366 /* If the BO is not in its preferred location add it back to 1367 * the evicted list so that it gets validated again on the 1368 * next command submission. 1369 */ 1370 if (amdgpu_vm_is_bo_always_valid(vm, bo)) { 1371 if (bo->tbo.resource && 1372 !(bo->preferred_domains & 1373 amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))) 1374 amdgpu_vm_bo_evicted(&bo_va->base); 1375 else 1376 amdgpu_vm_bo_idle(&bo_va->base); 1377 } else { 1378 amdgpu_vm_bo_idle(&bo_va->base); 1379 } 1380 1381 list_splice_init(&bo_va->invalids, &bo_va->valids); 1382 bo_va->cleared = clear; 1383 bo_va->base.moved = false; 1384 1385 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1386 list_for_each_entry(mapping, &bo_va->valids, list) 1387 trace_amdgpu_vm_bo_mapping(mapping); 1388 } 1389 1390 error_free: 1391 amdgpu_sync_free(&sync); 1392 return r; 1393 } 1394 1395 /** 1396 * amdgpu_vm_update_prt_state - update the global PRT state 1397 * 1398 * @adev: amdgpu_device pointer 1399 */ 1400 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) 1401 { 1402 unsigned long flags; 1403 bool enable; 1404 1405 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1406 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1407 adev->gmc.gmc_funcs->set_prt(adev, enable); 1408 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1409 } 1410 1411 /** 1412 * amdgpu_vm_prt_get - add a PRT user 1413 * 1414 * @adev: amdgpu_device pointer 1415 */ 1416 static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1417 { 1418 if (!adev->gmc.gmc_funcs->set_prt) 1419 return; 1420 1421 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1422 amdgpu_vm_update_prt_state(adev); 1423 } 1424 1425 /** 1426 * amdgpu_vm_prt_put - drop a PRT user 1427 * 1428 * @adev: amdgpu_device pointer 1429 */ 1430 static void amdgpu_vm_prt_put(struct amdgpu_device *adev) 1431 { 1432 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) 1433 amdgpu_vm_update_prt_state(adev); 1434 } 1435 1436 /** 1437 * amdgpu_vm_prt_cb - callback for updating the PRT status 1438 * 1439 * @fence: fence for the callback 1440 * @_cb: the callback function 1441 */ 1442 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) 1443 { 1444 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); 1445 1446 amdgpu_vm_prt_put(cb->adev); 1447 kfree(cb); 1448 } 1449 1450 /** 1451 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status 1452 * 1453 * @adev: amdgpu_device pointer 1454 * @fence: fence for the callback 1455 */ 1456 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, 1457 struct dma_fence *fence) 1458 { 1459 struct amdgpu_prt_cb *cb; 1460 1461 if (!adev->gmc.gmc_funcs->set_prt) 1462 return; 1463 1464 cb = kmalloc_obj(struct amdgpu_prt_cb); 1465 if (!cb) { 1466 /* Last resort when we are OOM */ 1467 if (fence) 1468 dma_fence_wait(fence, false); 1469 1470 amdgpu_vm_prt_put(adev); 1471 } else { 1472 cb->adev = adev; 1473 if (!fence || dma_fence_add_callback(fence, &cb->cb, 1474 amdgpu_vm_prt_cb)) 1475 amdgpu_vm_prt_cb(fence, &cb->cb); 1476 } 1477 } 1478 1479 /** 1480 * amdgpu_vm_free_mapping - free a mapping 1481 * 1482 * @adev: amdgpu_device pointer 1483 * @vm: requested vm 1484 * @mapping: mapping to be freed 1485 * @fence: fence of the unmap operation 1486 * 1487 * Free a mapping and make sure we decrease the PRT usage count if applicable. 1488 */ 1489 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, 1490 struct amdgpu_vm *vm, 1491 struct amdgpu_bo_va_mapping *mapping, 1492 struct dma_fence *fence) 1493 { 1494 if (mapping->flags & AMDGPU_VM_PAGE_PRT) 1495 amdgpu_vm_add_prt_cb(adev, fence); 1496 kfree(mapping); 1497 } 1498 1499 /** 1500 * amdgpu_vm_prt_fini - finish all prt mappings 1501 * 1502 * @adev: amdgpu_device pointer 1503 * @vm: requested vm 1504 * 1505 * Register a cleanup callback to disable PRT support after VM dies. 1506 */ 1507 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1508 { 1509 struct dma_resv *resv = vm->root.bo->tbo.base.resv; 1510 struct dma_resv_iter cursor; 1511 struct dma_fence *fence; 1512 1513 dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { 1514 /* Add a callback for each fence in the reservation object */ 1515 amdgpu_vm_prt_get(adev); 1516 amdgpu_vm_add_prt_cb(adev, fence); 1517 } 1518 } 1519 1520 /** 1521 * amdgpu_vm_clear_freed - clear freed BOs in the PT 1522 * 1523 * @adev: amdgpu_device pointer 1524 * @vm: requested vm 1525 * @fence: optional resulting fence (unchanged if no work needed to be done 1526 * or if an error occurred) 1527 * 1528 * Make sure all freed BOs are cleared in the PT. 1529 * PTs have to be reserved and mutex must be locked! 1530 * 1531 * Returns: 1532 * 0 for success. 1533 * 1534 */ 1535 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 1536 struct amdgpu_vm *vm, 1537 struct dma_fence **fence) 1538 { 1539 struct amdgpu_bo_va_mapping *mapping; 1540 struct dma_fence *f = NULL; 1541 struct amdgpu_sync sync; 1542 int r; 1543 1544 1545 /* 1546 * Implicitly sync to command submissions in the same VM before 1547 * unmapping. 1548 */ 1549 amdgpu_sync_create(&sync); 1550 r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv, 1551 AMDGPU_SYNC_EQ_OWNER, vm); 1552 if (r) 1553 goto error_free; 1554 1555 while (!list_empty(&vm->freed)) { 1556 mapping = list_first_entry(&vm->freed, 1557 struct amdgpu_bo_va_mapping, list); 1558 list_del(&mapping->list); 1559 1560 r = amdgpu_vm_update_range(adev, vm, false, false, true, false, 1561 &sync, mapping->start, mapping->last, 1562 0, 0, 0, NULL, NULL, &f); 1563 amdgpu_vm_free_mapping(adev, vm, mapping, f); 1564 if (r) { 1565 dma_fence_put(f); 1566 goto error_free; 1567 } 1568 } 1569 1570 if (fence && f) { 1571 dma_fence_put(*fence); 1572 *fence = f; 1573 } else { 1574 dma_fence_put(f); 1575 } 1576 1577 error_free: 1578 amdgpu_sync_free(&sync); 1579 return r; 1580 1581 } 1582 1583 /** 1584 * amdgpu_vm_handle_moved - handle moved BOs in the PT 1585 * 1586 * @adev: amdgpu_device pointer 1587 * @vm: requested vm 1588 * @ticket: optional reservation ticket used to reserve the VM 1589 * 1590 * Make sure all BOs which are moved are updated in the PTs. 1591 * 1592 * Returns: 1593 * 0 for success. 1594 * 1595 * PTs have to be reserved! 1596 */ 1597 int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 1598 struct amdgpu_vm *vm, 1599 struct ww_acquire_ctx *ticket) 1600 { 1601 struct amdgpu_bo_va *bo_va, *tmp; 1602 struct dma_resv *resv; 1603 bool clear, unlock; 1604 int r; 1605 1606 list_for_each_entry_safe(bo_va, tmp, &vm->always_valid.moved, 1607 base.vm_status) { 1608 /* Per VM BOs never need to bo cleared in the page tables */ 1609 r = amdgpu_vm_bo_update(adev, bo_va, false); 1610 if (r) 1611 return r; 1612 } 1613 1614 spin_lock(&vm->individual_lock); 1615 while (!list_empty(&vm->individual.moved)) { 1616 bo_va = list_first_entry(&vm->individual.moved, 1617 typeof(*bo_va), base.vm_status); 1618 resv = bo_va->base.bo->tbo.base.resv; 1619 spin_unlock(&vm->individual_lock); 1620 1621 /* Try to reserve the BO to avoid clearing its ptes */ 1622 if (!adev->debug_vm && dma_resv_trylock(resv)) { 1623 clear = false; 1624 unlock = true; 1625 /* The caller is already holding the reservation lock */ 1626 } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { 1627 clear = false; 1628 unlock = false; 1629 /* Somebody else is using the BO right now */ 1630 } else { 1631 clear = true; 1632 unlock = false; 1633 } 1634 1635 r = amdgpu_vm_bo_update(adev, bo_va, clear); 1636 1637 if (unlock) 1638 dma_resv_unlock(resv); 1639 if (r) 1640 return r; 1641 1642 /* Remember evicted DMABuf imports in compute VMs for later 1643 * validation 1644 */ 1645 if (vm->is_compute_context && 1646 drm_gem_is_imported(&bo_va->base.bo->tbo.base) && 1647 (!bo_va->base.bo->tbo.resource || 1648 bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) 1649 amdgpu_vm_bo_evicted(&bo_va->base); 1650 1651 spin_lock(&vm->individual_lock); 1652 } 1653 spin_unlock(&vm->individual_lock); 1654 1655 return 0; 1656 } 1657 1658 /** 1659 * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM 1660 * 1661 * @adev: amdgpu_device pointer 1662 * @vm: requested vm 1663 * @flush_type: flush type 1664 * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush. 1665 * 1666 * Flush TLB if needed for a compute VM. 1667 * 1668 * Returns: 1669 * 0 for success. 1670 */ 1671 int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, 1672 struct amdgpu_vm *vm, 1673 uint32_t flush_type, 1674 uint32_t xcc_mask) 1675 { 1676 uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); 1677 bool all_hub = false; 1678 int xcc = 0, r = 0; 1679 1680 WARN_ON_ONCE(!vm->is_compute_context); 1681 1682 /* 1683 * It can be that we race and lose here, but that is extremely unlikely 1684 * and the worst thing which could happen is that we flush the changes 1685 * into the TLB once more which is harmless. 1686 */ 1687 if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq) 1688 return 0; 1689 1690 if (adev->family == AMDGPU_FAMILY_AI || 1691 adev->family == AMDGPU_FAMILY_RV) 1692 all_hub = true; 1693 1694 for_each_inst(xcc, xcc_mask) { 1695 r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type, 1696 all_hub, xcc); 1697 if (r) 1698 break; 1699 } 1700 return r; 1701 } 1702 1703 /** 1704 * amdgpu_vm_bo_add - add a bo to a specific vm 1705 * 1706 * @adev: amdgpu_device pointer 1707 * @vm: requested vm 1708 * @bo: amdgpu buffer object 1709 * 1710 * Add @bo into the requested vm. 1711 * Add @bo to the list of bos associated with the vm 1712 * 1713 * Returns: 1714 * Newly added bo_va or NULL for failure 1715 * 1716 * Object has to be reserved! 1717 */ 1718 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 1719 struct amdgpu_vm *vm, 1720 struct amdgpu_bo *bo) 1721 { 1722 struct amdgpu_bo_va *bo_va; 1723 1724 amdgpu_vm_assert_locked(vm); 1725 1726 bo_va = kzalloc_obj(struct amdgpu_bo_va); 1727 if (bo_va == NULL) { 1728 return NULL; 1729 } 1730 amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); 1731 1732 bo_va->ref_count = 1; 1733 bo_va->last_pt_update = dma_fence_get_stub(); 1734 INIT_LIST_HEAD(&bo_va->valids); 1735 INIT_LIST_HEAD(&bo_va->invalids); 1736 1737 if (!bo) 1738 return bo_va; 1739 1740 dma_resv_assert_held(bo->tbo.base.resv); 1741 if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) { 1742 bo_va->is_xgmi = true; 1743 /* Power up XGMI if it can be potentially used */ 1744 amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20); 1745 } 1746 1747 return bo_va; 1748 } 1749 1750 1751 /** 1752 * amdgpu_vm_bo_insert_map - insert a new mapping 1753 * 1754 * @adev: amdgpu_device pointer 1755 * @bo_va: bo_va to store the address 1756 * @mapping: the mapping to insert 1757 * 1758 * Insert a new mapping into all structures. 1759 */ 1760 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, 1761 struct amdgpu_bo_va *bo_va, 1762 struct amdgpu_bo_va_mapping *mapping) 1763 { 1764 struct amdgpu_vm *vm = bo_va->base.vm; 1765 struct amdgpu_bo *bo = bo_va->base.bo; 1766 1767 mapping->bo_va = bo_va; 1768 list_add(&mapping->list, &bo_va->invalids); 1769 amdgpu_vm_it_insert(mapping, &vm->va); 1770 1771 if (mapping->flags & AMDGPU_VM_PAGE_PRT) 1772 amdgpu_vm_prt_get(adev); 1773 1774 if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) 1775 amdgpu_vm_bo_moved(&bo_va->base); 1776 1777 trace_amdgpu_vm_bo_map(bo_va, mapping); 1778 } 1779 1780 /* Validate operation parameters to prevent potential abuse */ 1781 static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, 1782 struct amdgpu_bo *bo, 1783 uint64_t saddr, 1784 uint64_t offset, 1785 uint64_t size) 1786 { 1787 uint64_t tmp, lpfn; 1788 1789 if (saddr & AMDGPU_GPU_PAGE_MASK 1790 || offset & AMDGPU_GPU_PAGE_MASK 1791 || size & AMDGPU_GPU_PAGE_MASK) 1792 return -EINVAL; 1793 1794 if (check_add_overflow(saddr, size, &tmp) 1795 || check_add_overflow(offset, size, &tmp) 1796 || size == 0 /* which also leads to end < begin */) 1797 return -EINVAL; 1798 1799 /* make sure object fit at this offset */ 1800 if (bo && offset + size > amdgpu_bo_size(bo)) 1801 return -EINVAL; 1802 1803 /* Ensure last pfn not exceed max_pfn */ 1804 lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; 1805 if (lpfn >= adev->vm_manager.max_pfn) 1806 return -EINVAL; 1807 1808 return 0; 1809 } 1810 1811 /** 1812 * amdgpu_vm_bo_map - map bo inside a vm 1813 * 1814 * @adev: amdgpu_device pointer 1815 * @bo_va: bo_va to store the address 1816 * @saddr: where to map the BO 1817 * @offset: requested offset in the BO 1818 * @size: BO size in bytes 1819 * @flags: attributes of pages (read/write/valid/etc.) 1820 * 1821 * Add a mapping of the BO at the specefied addr into the VM. 1822 * 1823 * Returns: 1824 * 0 for success, error for failure. 1825 * 1826 * Object has to be reserved and unreserved outside! 1827 */ 1828 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 1829 struct amdgpu_bo_va *bo_va, 1830 uint64_t saddr, uint64_t offset, 1831 uint64_t size, uint32_t flags) 1832 { 1833 struct amdgpu_bo_va_mapping *mapping, *tmp; 1834 struct amdgpu_bo *bo = bo_va->base.bo; 1835 struct amdgpu_vm *vm = bo_va->base.vm; 1836 uint64_t eaddr; 1837 int r; 1838 1839 r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); 1840 if (r) 1841 return r; 1842 1843 saddr /= AMDGPU_GPU_PAGE_SIZE; 1844 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; 1845 1846 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 1847 if (tmp) { 1848 /* bo and tmp overlap, invalid addr */ 1849 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 1850 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, 1851 tmp->start, tmp->last + 1); 1852 return -EINVAL; 1853 } 1854 1855 mapping = kmalloc_obj(*mapping); 1856 if (!mapping) 1857 return -ENOMEM; 1858 1859 mapping->start = saddr; 1860 mapping->last = eaddr; 1861 mapping->offset = offset; 1862 mapping->flags = flags; 1863 1864 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 1865 1866 return 0; 1867 } 1868 1869 /** 1870 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings 1871 * 1872 * @adev: amdgpu_device pointer 1873 * @bo_va: bo_va to store the address 1874 * @saddr: where to map the BO 1875 * @offset: requested offset in the BO 1876 * @size: BO size in bytes 1877 * @flags: attributes of pages (read/write/valid/etc.) 1878 * 1879 * Add a mapping of the BO at the specefied addr into the VM. Replace existing 1880 * mappings as we do so. 1881 * 1882 * Returns: 1883 * 0 for success, error for failure. 1884 * 1885 * Object has to be reserved and unreserved outside! 1886 */ 1887 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, 1888 struct amdgpu_bo_va *bo_va, 1889 uint64_t saddr, uint64_t offset, 1890 uint64_t size, uint32_t flags) 1891 { 1892 struct amdgpu_bo_va_mapping *mapping; 1893 struct amdgpu_bo *bo = bo_va->base.bo; 1894 uint64_t eaddr; 1895 int r; 1896 1897 r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); 1898 if (r) 1899 return r; 1900 1901 /* Allocate all the needed memory */ 1902 mapping = kmalloc_obj(*mapping); 1903 if (!mapping) 1904 return -ENOMEM; 1905 1906 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); 1907 if (r) { 1908 kfree(mapping); 1909 return r; 1910 } 1911 1912 saddr /= AMDGPU_GPU_PAGE_SIZE; 1913 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; 1914 1915 mapping->start = saddr; 1916 mapping->last = eaddr; 1917 mapping->offset = offset; 1918 mapping->flags = flags; 1919 1920 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 1921 1922 return 0; 1923 } 1924 1925 /** 1926 * amdgpu_vm_bo_unmap - remove bo mapping from vm 1927 * 1928 * @adev: amdgpu_device pointer 1929 * @bo_va: bo_va to remove the address from 1930 * @saddr: where to the BO is mapped 1931 * 1932 * Remove a mapping of the BO at the specefied addr from the VM. 1933 * 1934 * Returns: 1935 * 0 for success, error for failure. 1936 * 1937 * Object has to be reserved and unreserved outside! 1938 */ 1939 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1940 struct amdgpu_bo_va *bo_va, 1941 uint64_t saddr) 1942 { 1943 struct amdgpu_bo_va_mapping *mapping; 1944 struct amdgpu_vm *vm = bo_va->base.vm; 1945 bool valid = true; 1946 1947 saddr /= AMDGPU_GPU_PAGE_SIZE; 1948 1949 list_for_each_entry(mapping, &bo_va->valids, list) { 1950 if (mapping->start == saddr) 1951 break; 1952 } 1953 1954 if (&mapping->list == &bo_va->valids) { 1955 valid = false; 1956 1957 list_for_each_entry(mapping, &bo_va->invalids, list) { 1958 if (mapping->start == saddr) 1959 break; 1960 } 1961 1962 if (&mapping->list == &bo_va->invalids) 1963 return -ENOENT; 1964 } 1965 1966 /* It's unlikely to happen that the mapping userq hasn't been idled 1967 * during user requests GEM unmap IOCTL except for forcing the unmap 1968 * from user space. 1969 */ 1970 if (unlikely(bo_va->userq_va_mapped)) 1971 amdgpu_userq_gem_va_unmap_validate(adev, mapping, saddr); 1972 1973 list_del(&mapping->list); 1974 amdgpu_vm_it_remove(mapping, &vm->va); 1975 mapping->bo_va = NULL; 1976 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1977 1978 if (valid) 1979 list_add(&mapping->list, &vm->freed); 1980 else 1981 amdgpu_vm_free_mapping(adev, vm, mapping, 1982 bo_va->last_pt_update); 1983 1984 return 0; 1985 } 1986 1987 /** 1988 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range 1989 * 1990 * @adev: amdgpu_device pointer 1991 * @vm: VM structure to use 1992 * @saddr: start of the range 1993 * @size: size of the range 1994 * 1995 * Remove all mappings in a range, split them as appropriate. 1996 * 1997 * Returns: 1998 * 0 for success, error for failure. 1999 */ 2000 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 2001 struct amdgpu_vm *vm, 2002 uint64_t saddr, uint64_t size) 2003 { 2004 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; 2005 LIST_HEAD(removed); 2006 uint64_t eaddr; 2007 int r; 2008 2009 r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); 2010 if (r) 2011 return r; 2012 2013 saddr /= AMDGPU_GPU_PAGE_SIZE; 2014 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; 2015 2016 /* Allocate all the needed memory */ 2017 before = kzalloc_obj(*before); 2018 if (!before) 2019 return -ENOMEM; 2020 INIT_LIST_HEAD(&before->list); 2021 2022 after = kzalloc_obj(*after); 2023 if (!after) { 2024 kfree(before); 2025 return -ENOMEM; 2026 } 2027 INIT_LIST_HEAD(&after->list); 2028 2029 /* Now gather all removed mappings */ 2030 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2031 while (tmp) { 2032 /* Remember mapping split at the start */ 2033 if (tmp->start < saddr) { 2034 before->start = tmp->start; 2035 before->last = saddr - 1; 2036 before->offset = tmp->offset; 2037 before->flags = tmp->flags; 2038 before->bo_va = tmp->bo_va; 2039 list_add(&before->list, &tmp->bo_va->invalids); 2040 } 2041 2042 /* Remember mapping split at the end */ 2043 if (tmp->last > eaddr) { 2044 after->start = eaddr + 1; 2045 after->last = tmp->last; 2046 after->offset = tmp->offset; 2047 after->offset += (after->start - tmp->start) << PAGE_SHIFT; 2048 after->flags = tmp->flags; 2049 after->bo_va = tmp->bo_va; 2050 list_add(&after->list, &tmp->bo_va->invalids); 2051 } 2052 2053 list_del(&tmp->list); 2054 list_add(&tmp->list, &removed); 2055 2056 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); 2057 } 2058 2059 /* And free them up */ 2060 list_for_each_entry_safe(tmp, next, &removed, list) { 2061 amdgpu_vm_it_remove(tmp, &vm->va); 2062 list_del(&tmp->list); 2063 2064 if (tmp->start < saddr) 2065 tmp->start = saddr; 2066 if (tmp->last > eaddr) 2067 tmp->last = eaddr; 2068 2069 tmp->bo_va = NULL; 2070 list_add(&tmp->list, &vm->freed); 2071 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2072 } 2073 2074 /* Insert partial mapping before the range */ 2075 if (!list_empty(&before->list)) { 2076 struct amdgpu_bo *bo = before->bo_va->base.bo; 2077 2078 amdgpu_vm_it_insert(before, &vm->va); 2079 if (before->flags & AMDGPU_VM_PAGE_PRT) 2080 amdgpu_vm_prt_get(adev); 2081 2082 if (amdgpu_vm_is_bo_always_valid(vm, bo) && 2083 !before->bo_va->base.moved) 2084 amdgpu_vm_bo_moved(&before->bo_va->base); 2085 } else { 2086 kfree(before); 2087 } 2088 2089 /* Insert partial mapping after the range */ 2090 if (!list_empty(&after->list)) { 2091 struct amdgpu_bo *bo = after->bo_va->base.bo; 2092 2093 amdgpu_vm_it_insert(after, &vm->va); 2094 if (after->flags & AMDGPU_VM_PAGE_PRT) 2095 amdgpu_vm_prt_get(adev); 2096 2097 if (amdgpu_vm_is_bo_always_valid(vm, bo) && 2098 !after->bo_va->base.moved) 2099 amdgpu_vm_bo_moved(&after->bo_va->base); 2100 } else { 2101 kfree(after); 2102 } 2103 2104 return 0; 2105 } 2106 2107 /** 2108 * amdgpu_vm_bo_lookup_mapping - find mapping by address 2109 * 2110 * @vm: the requested VM 2111 * @addr: the address 2112 * 2113 * Find a mapping by it's address. 2114 * 2115 * Returns: 2116 * The amdgpu_bo_va_mapping matching for addr or NULL 2117 * 2118 */ 2119 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, 2120 uint64_t addr) 2121 { 2122 return amdgpu_vm_it_iter_first(&vm->va, addr, addr); 2123 } 2124 2125 /** 2126 * amdgpu_vm_bo_trace_cs - trace all reserved mappings 2127 * 2128 * @vm: the requested vm 2129 * @ticket: CS ticket 2130 * 2131 * Trace all mappings of BOs reserved during a command submission. 2132 */ 2133 void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) 2134 { 2135 struct amdgpu_bo_va_mapping *mapping; 2136 2137 if (!trace_amdgpu_vm_bo_cs_enabled()) 2138 return; 2139 2140 for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping; 2141 mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) { 2142 if (mapping->bo_va && mapping->bo_va->base.bo) { 2143 struct amdgpu_bo *bo; 2144 2145 bo = mapping->bo_va->base.bo; 2146 if (dma_resv_locking_ctx(bo->tbo.base.resv) != 2147 ticket) 2148 continue; 2149 } 2150 2151 trace_amdgpu_vm_bo_cs(mapping); 2152 } 2153 } 2154 2155 /** 2156 * amdgpu_vm_bo_del - remove a bo from a specific vm 2157 * 2158 * @adev: amdgpu_device pointer 2159 * @bo_va: requested bo_va 2160 * 2161 * Remove @bo_va->bo from the requested vm. 2162 * 2163 * Object have to be reserved! 2164 */ 2165 void amdgpu_vm_bo_del(struct amdgpu_device *adev, 2166 struct amdgpu_bo_va *bo_va) 2167 { 2168 struct amdgpu_bo_va_mapping *mapping, *next; 2169 struct amdgpu_bo *bo = bo_va->base.bo; 2170 struct amdgpu_vm *vm = bo_va->base.vm; 2171 struct amdgpu_vm_bo_base **base; 2172 2173 dma_resv_assert_held(vm->root.bo->tbo.base.resv); 2174 2175 if (bo) { 2176 dma_resv_assert_held(bo->tbo.base.resv); 2177 if (amdgpu_vm_is_bo_always_valid(vm, bo)) 2178 ttm_bo_set_bulk_move(&bo->tbo, NULL); 2179 2180 for (base = &bo_va->base.bo->vm_bo; *base; 2181 base = &(*base)->next) { 2182 if (*base != &bo_va->base) 2183 continue; 2184 2185 amdgpu_vm_update_stats(*base, bo->tbo.resource, -1); 2186 *base = bo_va->base.next; 2187 break; 2188 } 2189 } 2190 2191 spin_lock(&vm->individual_lock); 2192 list_del(&bo_va->base.vm_status); 2193 spin_unlock(&vm->individual_lock); 2194 2195 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2196 list_del(&mapping->list); 2197 amdgpu_vm_it_remove(mapping, &vm->va); 2198 mapping->bo_va = NULL; 2199 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2200 list_add(&mapping->list, &vm->freed); 2201 } 2202 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 2203 list_del(&mapping->list); 2204 amdgpu_vm_it_remove(mapping, &vm->va); 2205 amdgpu_vm_free_mapping(adev, vm, mapping, 2206 bo_va->last_pt_update); 2207 } 2208 2209 dma_fence_put(bo_va->last_pt_update); 2210 2211 if (bo && bo_va->is_xgmi) 2212 amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN); 2213 2214 kfree(bo_va); 2215 } 2216 2217 /** 2218 * amdgpu_vm_evictable - check if we can evict a VM 2219 * 2220 * @bo: A page table of the VM. 2221 * 2222 * Check if it is possible to evict a VM. 2223 */ 2224 bool amdgpu_vm_evictable(struct amdgpu_bo *bo) 2225 { 2226 struct amdgpu_vm_bo_base *bo_base = bo->vm_bo; 2227 2228 /* Page tables of a destroyed VM can go away immediately */ 2229 if (!bo_base || !bo_base->vm) 2230 return true; 2231 2232 /* Don't evict VM page tables while they are busy */ 2233 if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) 2234 return false; 2235 2236 /* Try to block ongoing updates */ 2237 if (!amdgpu_vm_eviction_trylock(bo_base->vm)) 2238 return false; 2239 2240 /* Don't evict VM page tables while they are updated */ 2241 if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) { 2242 amdgpu_vm_eviction_unlock(bo_base->vm); 2243 return false; 2244 } 2245 2246 bo_base->vm->evicting = true; 2247 amdgpu_vm_eviction_unlock(bo_base->vm); 2248 return true; 2249 } 2250 2251 /** 2252 * amdgpu_vm_bo_invalidate - mark the bo as invalid 2253 * 2254 * @bo: amdgpu buffer object 2255 * @evicted: is the BO evicted 2256 * 2257 * Mark @bo as invalid. 2258 */ 2259 void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted) 2260 { 2261 struct amdgpu_vm_bo_base *bo_base; 2262 2263 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { 2264 struct amdgpu_vm *vm = bo_base->vm; 2265 2266 if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) { 2267 amdgpu_vm_bo_evicted(bo_base); 2268 continue; 2269 } 2270 2271 if (bo_base->moved) 2272 continue; 2273 amdgpu_vm_bo_moved(bo_base); 2274 } 2275 } 2276 2277 /** 2278 * amdgpu_vm_bo_move - handle BO move 2279 * 2280 * @bo: amdgpu buffer object 2281 * @new_mem: the new placement of the BO move 2282 * @evicted: is the BO evicted 2283 * 2284 * Update the memory stats for the new placement and mark @bo as invalid. 2285 */ 2286 void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, 2287 bool evicted) 2288 { 2289 struct amdgpu_vm_bo_base *bo_base; 2290 2291 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { 2292 struct amdgpu_vm *vm = bo_base->vm; 2293 2294 spin_lock(&vm->stats_lock); 2295 amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); 2296 amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); 2297 spin_unlock(&vm->stats_lock); 2298 } 2299 2300 amdgpu_vm_bo_invalidate(bo, evicted); 2301 } 2302 2303 /** 2304 * amdgpu_vm_get_block_size - calculate VM page table size as power of two 2305 * 2306 * @vm_size: VM size 2307 * 2308 * Returns: 2309 * VM page table as power of two 2310 */ 2311 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) 2312 { 2313 /* Total bits covered by PD + PTs */ 2314 unsigned bits = ilog2(vm_size) + 18; 2315 2316 /* Make sure the PD is 4K in size up to 8GB address space. 2317 Above that split equal between PD and PTs */ 2318 if (vm_size <= 8) 2319 return (bits - 9); 2320 else 2321 return ((bits + 3) / 2); 2322 } 2323 2324 /** 2325 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2326 * 2327 * @adev: amdgpu_device pointer 2328 * @min_vm_size: the minimum vm size in GB if it's set auto 2329 * @fragment_size_default: Default PTE fragment size 2330 * @max_level: max VMPT level 2331 * @max_bits: max address space size in bits 2332 * 2333 */ 2334 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, 2335 uint32_t fragment_size_default, unsigned max_level, 2336 unsigned max_bits) 2337 { 2338 unsigned int max_size = 1 << (max_bits - 30); 2339 unsigned int vm_size; 2340 uint64_t tmp; 2341 2342 /* adjust vm size first */ 2343 if (amdgpu_vm_size != -1) { 2344 vm_size = amdgpu_vm_size; 2345 if (vm_size > max_size) { 2346 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", 2347 amdgpu_vm_size, max_size); 2348 vm_size = max_size; 2349 } 2350 } else { 2351 struct sysinfo si; 2352 unsigned int phys_ram_gb; 2353 2354 /* Optimal VM size depends on the amount of physical 2355 * RAM available. Underlying requirements and 2356 * assumptions: 2357 * 2358 * - Need to map system memory and VRAM from all GPUs 2359 * - VRAM from other GPUs not known here 2360 * - Assume VRAM <= system memory 2361 * - On GFX8 and older, VM space can be segmented for 2362 * different MTYPEs 2363 * - Need to allow room for fragmentation, guard pages etc. 2364 * 2365 * This adds up to a rough guess of system memory x3. 2366 * Round up to power of two to maximize the available 2367 * VM size with the given page table size. 2368 */ 2369 si_meminfo(&si); 2370 phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + 2371 (1 << 30) - 1) >> 30; 2372 vm_size = roundup_pow_of_two( 2373 clamp(phys_ram_gb * 3, min_vm_size, max_size)); 2374 } 2375 2376 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; 2377 adev->vm_manager.max_level = max_level; 2378 2379 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); 2380 if (amdgpu_vm_block_size != -1) 2381 tmp >>= amdgpu_vm_block_size - 9; 2382 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; 2383 adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp); 2384 switch (adev->vm_manager.num_level) { 2385 case 4: 2386 adev->vm_manager.root_level = AMDGPU_VM_PDB3; 2387 break; 2388 case 3: 2389 adev->vm_manager.root_level = AMDGPU_VM_PDB2; 2390 break; 2391 case 2: 2392 adev->vm_manager.root_level = AMDGPU_VM_PDB1; 2393 break; 2394 case 1: 2395 adev->vm_manager.root_level = AMDGPU_VM_PDB0; 2396 break; 2397 default: 2398 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); 2399 } 2400 /* block size depends on vm size and hw setup*/ 2401 if (amdgpu_vm_block_size != -1) 2402 adev->vm_manager.block_size = 2403 min((unsigned)amdgpu_vm_block_size, max_bits 2404 - AMDGPU_GPU_PAGE_SHIFT 2405 - 9 * adev->vm_manager.num_level); 2406 else if (adev->vm_manager.num_level > 1) 2407 adev->vm_manager.block_size = 9; 2408 else 2409 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); 2410 2411 if (amdgpu_vm_fragment_size == -1) 2412 adev->vm_manager.fragment_size = fragment_size_default; 2413 else 2414 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; 2415 2416 dev_info( 2417 adev->dev, 2418 "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", 2419 vm_size, adev->vm_manager.num_level + 1, 2420 adev->vm_manager.block_size, adev->vm_manager.fragment_size); 2421 } 2422 2423 /** 2424 * amdgpu_vm_wait_idle - wait for the VM to become idle 2425 * 2426 * @vm: VM object to wait for 2427 * @timeout: timeout to wait for VM to become idle 2428 */ 2429 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) 2430 { 2431 timeout = drm_sched_entity_flush(&vm->immediate, timeout); 2432 if (timeout <= 0) 2433 return timeout; 2434 2435 return drm_sched_entity_flush(&vm->delayed, timeout); 2436 } 2437 2438 static void amdgpu_vm_destroy_task_info(struct kref *kref) 2439 { 2440 struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); 2441 2442 kfree(ti); 2443 } 2444 2445 static inline struct amdgpu_vm * 2446 amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) 2447 { 2448 struct amdgpu_vm *vm; 2449 unsigned long flags; 2450 2451 xa_lock_irqsave(&adev->vm_manager.pasids, flags); 2452 vm = xa_load(&adev->vm_manager.pasids, pasid); 2453 xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); 2454 2455 return vm; 2456 } 2457 2458 /** 2459 * amdgpu_vm_put_task_info - reference down the vm task_info ptr 2460 * 2461 * @task_info: task_info struct under discussion. 2462 * 2463 * frees the vm task_info ptr at the last put 2464 */ 2465 void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) 2466 { 2467 if (task_info) 2468 kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); 2469 } 2470 2471 /** 2472 * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. 2473 * 2474 * @vm: VM to get info from 2475 * 2476 * Returns the reference counted task_info structure, which must be 2477 * referenced down with amdgpu_vm_put_task_info. 2478 */ 2479 struct amdgpu_task_info * 2480 amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) 2481 { 2482 struct amdgpu_task_info *ti = NULL; 2483 2484 if (vm) { 2485 ti = vm->task_info; 2486 kref_get(&vm->task_info->refcount); 2487 } 2488 2489 return ti; 2490 } 2491 2492 /** 2493 * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. 2494 * 2495 * @adev: drm device pointer 2496 * @pasid: PASID identifier for VM 2497 * 2498 * Returns the reference counted task_info structure, which must be 2499 * referenced down with amdgpu_vm_put_task_info. 2500 */ 2501 struct amdgpu_task_info * 2502 amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) 2503 { 2504 return amdgpu_vm_get_task_info_vm( 2505 amdgpu_vm_get_vm_from_pasid(adev, pasid)); 2506 } 2507 2508 static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) 2509 { 2510 vm->task_info = kzalloc_obj(struct amdgpu_task_info); 2511 if (!vm->task_info) 2512 return -ENOMEM; 2513 2514 kref_init(&vm->task_info->refcount); 2515 return 0; 2516 } 2517 2518 /** 2519 * amdgpu_vm_set_task_info - Sets VMs task info. 2520 * 2521 * @vm: vm for which to set the info 2522 */ 2523 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) 2524 { 2525 if (!vm->task_info) 2526 return; 2527 2528 if (vm->task_info->task.pid == current->pid) 2529 return; 2530 2531 vm->task_info->task.pid = current->pid; 2532 get_task_comm(vm->task_info->task.comm, current); 2533 2534 vm->task_info->tgid = current->tgid; 2535 get_task_comm(vm->task_info->process_name, current->group_leader); 2536 } 2537 2538 /** 2539 * amdgpu_vm_init - initialize a vm instance 2540 * 2541 * @adev: amdgpu_device pointer 2542 * @vm: requested vm 2543 * @xcp_id: GPU partition selection id 2544 * @pasid: the pasid the VM is using on this GPU 2545 * 2546 * Init @vm fields. 2547 * 2548 * Returns: 2549 * 0 for success, error for failure. 2550 */ 2551 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2552 int32_t xcp_id, uint32_t pasid) 2553 { 2554 struct amdgpu_bo *root_bo; 2555 struct amdgpu_bo_vm *root; 2556 int r, i; 2557 2558 vm->va = RB_ROOT_CACHED; 2559 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2560 vm->reserved_vmid[i] = NULL; 2561 2562 amdgpu_vm_bo_status_init(&vm->kernel); 2563 amdgpu_vm_bo_status_init(&vm->always_valid); 2564 spin_lock_init(&vm->individual_lock); 2565 amdgpu_vm_bo_status_init(&vm->individual); 2566 INIT_LIST_HEAD(&vm->freed); 2567 INIT_KFIFO(vm->faults); 2568 spin_lock_init(&vm->stats_lock); 2569 2570 r = amdgpu_vm_init_entities(adev, vm); 2571 if (r) 2572 return r; 2573 2574 ttm_lru_bulk_move_init(&vm->lru_bulk_move); 2575 2576 vm->is_compute_context = false; 2577 vm->need_tlb_fence = amdgpu_userq_enabled(&adev->ddev); 2578 2579 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2580 AMDGPU_VM_USE_CPU_FOR_GFX); 2581 2582 dev_dbg(adev->dev, "VM update mode is %s\n", 2583 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2584 WARN_ONCE((vm->use_cpu_for_update && 2585 !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2586 "CPU update of VM recommended only for large BAR system\n"); 2587 2588 if (vm->use_cpu_for_update) 2589 vm->update_funcs = &amdgpu_vm_cpu_funcs; 2590 else 2591 vm->update_funcs = &amdgpu_vm_sdma_funcs; 2592 2593 vm->last_update = dma_fence_get_stub(); 2594 vm->last_unlocked = dma_fence_get_stub(); 2595 vm->last_tlb_flush = dma_fence_get_stub(); 2596 vm->generation = amdgpu_vm_generation(adev, NULL); 2597 2598 mutex_init(&vm->eviction_lock); 2599 vm->evicting = false; 2600 vm->tlb_fence_context = dma_fence_context_alloc(1); 2601 2602 r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, 2603 false, &root, xcp_id); 2604 if (r) 2605 goto error_free_delayed; 2606 2607 root_bo = amdgpu_bo_ref(&root->bo); 2608 r = amdgpu_bo_reserve(root_bo, true); 2609 if (r) { 2610 amdgpu_bo_unref(&root_bo); 2611 goto error_free_delayed; 2612 } 2613 2614 amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); 2615 r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); 2616 if (r) 2617 goto error_free_root; 2618 2619 r = amdgpu_vm_pt_clear(adev, vm, root, false); 2620 if (r) 2621 goto error_free_root; 2622 2623 r = amdgpu_vm_create_task_info(vm); 2624 if (r) 2625 dev_dbg(adev->dev, "Failed to create task info for VM\n"); 2626 2627 /* Store new PASID in XArray (if non-zero) */ 2628 if (pasid != 0) { 2629 r = xa_err(xa_store_irq(&adev->vm_manager.pasids, pasid, vm, GFP_KERNEL)); 2630 if (r < 0) 2631 goto error_free_root; 2632 2633 vm->pasid = pasid; 2634 } 2635 2636 amdgpu_bo_unreserve(vm->root.bo); 2637 amdgpu_bo_unref(&root_bo); 2638 2639 return 0; 2640 2641 error_free_root: 2642 /* If PASID was partially set, erase it from XArray before failing */ 2643 if (vm->pasid != 0) { 2644 xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); 2645 vm->pasid = 0; 2646 } 2647 amdgpu_vm_pt_free_root(adev, vm); 2648 amdgpu_bo_unreserve(vm->root.bo); 2649 amdgpu_bo_unref(&root_bo); 2650 2651 error_free_delayed: 2652 dma_fence_put(vm->last_tlb_flush); 2653 dma_fence_put(vm->last_unlocked); 2654 ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); 2655 amdgpu_vm_fini_entities(vm); 2656 2657 return r; 2658 } 2659 2660 /** 2661 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM 2662 * 2663 * @adev: amdgpu_device pointer 2664 * @vm: requested vm 2665 * 2666 * This only works on GFX VMs that don't have any BOs added and no 2667 * page tables allocated yet. 2668 * 2669 * Changes the following VM parameters: 2670 * - use_cpu_for_update 2671 * - pte_supports_ats 2672 * 2673 * Reinitializes the page directory to reflect the changed ATS 2674 * setting. 2675 * 2676 * Returns: 2677 * 0 for success, -errno for errors. 2678 */ 2679 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2680 { 2681 int r; 2682 2683 r = amdgpu_bo_reserve(vm->root.bo, true); 2684 if (r) 2685 return r; 2686 2687 /* Update VM state */ 2688 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2689 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2690 dev_dbg(adev->dev, "VM update mode is %s\n", 2691 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2692 WARN_ONCE((vm->use_cpu_for_update && 2693 !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2694 "CPU update of VM recommended only for large BAR system\n"); 2695 2696 if (vm->use_cpu_for_update) { 2697 /* Sync with last SDMA update/clear before switching to CPU */ 2698 r = amdgpu_bo_sync_wait(vm->root.bo, 2699 AMDGPU_FENCE_OWNER_UNDEFINED, true); 2700 if (r) 2701 goto unreserve_bo; 2702 2703 vm->update_funcs = &amdgpu_vm_cpu_funcs; 2704 r = amdgpu_vm_pt_map_tables(adev, vm); 2705 if (r) 2706 goto unreserve_bo; 2707 2708 } else { 2709 vm->update_funcs = &amdgpu_vm_sdma_funcs; 2710 } 2711 2712 dma_fence_put(vm->last_update); 2713 vm->last_update = dma_fence_get_stub(); 2714 vm->is_compute_context = true; 2715 vm->need_tlb_fence = true; 2716 2717 unreserve_bo: 2718 amdgpu_bo_unreserve(vm->root.bo); 2719 return r; 2720 } 2721 2722 static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm) 2723 { 2724 for (int i = 0; i < __AMDGPU_PL_NUM; ++i) { 2725 if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) && 2726 vm->stats[i].evicted == 0)) 2727 return false; 2728 } 2729 return true; 2730 } 2731 2732 /** 2733 * amdgpu_vm_fini - tear down a vm instance 2734 * 2735 * @adev: amdgpu_device pointer 2736 * @vm: requested vm 2737 * 2738 * Tear down @vm. 2739 * Unbind the VM and remove all bos from the vm bo list 2740 */ 2741 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2742 { 2743 struct amdgpu_bo_va_mapping *mapping, *tmp; 2744 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; 2745 struct amdgpu_bo *root; 2746 unsigned long flags; 2747 int i; 2748 2749 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); 2750 2751 root = amdgpu_bo_ref(vm->root.bo); 2752 amdgpu_bo_reserve(root, true); 2753 /* Remove PASID mapping before destroying VM */ 2754 if (vm->pasid != 0) { 2755 xa_erase_irq(&adev->vm_manager.pasids, vm->pasid); 2756 vm->pasid = 0; 2757 } 2758 dma_fence_wait(vm->last_unlocked, false); 2759 dma_fence_put(vm->last_unlocked); 2760 dma_fence_wait(vm->last_tlb_flush, false); 2761 /* Make sure that all fence callbacks have completed */ 2762 dma_fence_lock_irqsave(vm->last_tlb_flush, flags); 2763 dma_fence_unlock_irqrestore(vm->last_tlb_flush, flags); 2764 dma_fence_put(vm->last_tlb_flush); 2765 2766 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 2767 if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) { 2768 amdgpu_vm_prt_fini(adev, vm); 2769 prt_fini_needed = false; 2770 } 2771 2772 list_del(&mapping->list); 2773 amdgpu_vm_free_mapping(adev, vm, mapping, NULL); 2774 } 2775 2776 amdgpu_vm_pt_free_root(adev, vm); 2777 amdgpu_bo_unreserve(root); 2778 amdgpu_bo_unref(&root); 2779 WARN_ON(vm->root.bo); 2780 2781 amdgpu_vm_fini_entities(vm); 2782 2783 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { 2784 dev_err(adev->dev, "still active bo inside vm\n"); 2785 } 2786 rbtree_postorder_for_each_entry_safe(mapping, tmp, 2787 &vm->va.rb_root, rb) { 2788 /* Don't remove the mapping here, we don't want to trigger a 2789 * rebalance and the tree is about to be destroyed anyway. 2790 */ 2791 list_del(&mapping->list); 2792 kfree(mapping); 2793 } 2794 2795 dma_fence_put(vm->last_update); 2796 2797 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) { 2798 amdgpu_vmid_free_reserved(adev, vm, i); 2799 } 2800 2801 ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); 2802 2803 if (!amdgpu_vm_stats_is_zero(vm)) { 2804 struct amdgpu_task_info *ti = vm->task_info; 2805 2806 dev_warn(adev->dev, 2807 "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n", 2808 ti->process_name, ti->task.pid, ti->task.comm, ti->tgid); 2809 } 2810 2811 amdgpu_vm_put_task_info(vm->task_info); 2812 } 2813 2814 /** 2815 * amdgpu_vm_manager_init - init the VM manager 2816 * 2817 * @adev: amdgpu_device pointer 2818 * 2819 * Initialize the VM manager structures 2820 */ 2821 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 2822 { 2823 /* Concurrent flushes are only possible starting with Vega10 and 2824 * are broken on Navi10 and Navi14. 2825 */ 2826 adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 || 2827 adev->asic_type == CHIP_NAVI10 || 2828 adev->asic_type == CHIP_NAVI14); 2829 amdgpu_vmid_mgr_init(adev); 2830 2831 spin_lock_init(&adev->vm_manager.prt_lock); 2832 atomic_set(&adev->vm_manager.num_prt_users, 0); 2833 2834 /* If not overridden by the user, by default, only in large BAR systems 2835 * Compute VM tables will be updated by CPU 2836 */ 2837 #ifdef CONFIG_X86_64 2838 if (amdgpu_vm_update_mode == -1) { 2839 /* For asic with VF MMIO access protection 2840 * avoid using CPU for VM table updates 2841 */ 2842 if (amdgpu_gmc_vram_full_visible(&adev->gmc) && 2843 !amdgpu_sriov_vf_mmio_access_protection(adev)) 2844 adev->vm_manager.vm_update_mode = 2845 AMDGPU_VM_USE_CPU_FOR_COMPUTE; 2846 else 2847 adev->vm_manager.vm_update_mode = 0; 2848 } else 2849 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; 2850 #else 2851 adev->vm_manager.vm_update_mode = 0; 2852 #endif 2853 2854 xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ); 2855 } 2856 2857 /** 2858 * amdgpu_vm_manager_fini - cleanup VM manager 2859 * 2860 * @adev: amdgpu_device pointer 2861 * 2862 * Cleanup the VM manager and free resources. 2863 */ 2864 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 2865 { 2866 WARN_ON(!xa_empty(&adev->vm_manager.pasids)); 2867 xa_destroy(&adev->vm_manager.pasids); 2868 2869 amdgpu_vmid_mgr_fini(adev); 2870 amdgpu_pasid_mgr_cleanup(); 2871 } 2872 2873 /** 2874 * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. 2875 * 2876 * @dev: drm device pointer 2877 * @data: drm_amdgpu_vm 2878 * @filp: drm file pointer 2879 * 2880 * Returns: 2881 * 0 for success, -errno for errors. 2882 */ 2883 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 2884 { 2885 union drm_amdgpu_vm *args = data; 2886 struct amdgpu_device *adev = drm_to_adev(dev); 2887 struct amdgpu_fpriv *fpriv = filp->driver_priv; 2888 struct amdgpu_vm *vm = &fpriv->vm; 2889 2890 /* No valid flags defined yet */ 2891 if (args->in.flags) 2892 return -EINVAL; 2893 2894 switch (args->in.op) { 2895 case AMDGPU_VM_OP_RESERVE_VMID: 2896 /* We only have requirement to reserve vmid from gfxhub */ 2897 return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0)); 2898 case AMDGPU_VM_OP_UNRESERVE_VMID: 2899 amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0)); 2900 break; 2901 default: 2902 return -EINVAL; 2903 } 2904 2905 return 0; 2906 } 2907 2908 /** 2909 * amdgpu_vm_lock_by_pasid - return an amdgpu_vm and its root bo from a pasid, if possible. 2910 * @adev: amdgpu device pointer 2911 * @root: root BO of the VM 2912 * @pasid: PASID of the VM 2913 * The caller needs to unreserve and unref the root bo on success. 2914 */ 2915 struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct amdgpu_device *adev, 2916 struct amdgpu_bo **root, u32 pasid) 2917 { 2918 unsigned long irqflags; 2919 struct amdgpu_vm *vm; 2920 int r; 2921 2922 xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); 2923 vm = xa_load(&adev->vm_manager.pasids, pasid); 2924 *root = vm ? amdgpu_bo_ref(vm->root.bo) : NULL; 2925 xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); 2926 2927 if (!*root) 2928 return NULL; 2929 2930 r = amdgpu_bo_reserve(*root, true); 2931 if (r) 2932 goto error_unref; 2933 2934 /* Double check that the VM still exists */ 2935 xa_lock_irqsave(&adev->vm_manager.pasids, irqflags); 2936 vm = xa_load(&adev->vm_manager.pasids, pasid); 2937 if (vm && vm->root.bo != *root) 2938 vm = NULL; 2939 xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags); 2940 if (!vm) 2941 goto error_unlock; 2942 2943 return vm; 2944 error_unlock: 2945 amdgpu_bo_unreserve(*root); 2946 2947 error_unref: 2948 amdgpu_bo_unref(root); 2949 return NULL; 2950 } 2951 2952 /** 2953 * amdgpu_vm_handle_fault - graceful handling of VM faults. 2954 * @adev: amdgpu device pointer 2955 * @pasid: PASID of the VM 2956 * @ts: Timestamp of the fault 2957 * @vmid: VMID, only used for GFX 9.4.3. 2958 * @node_id: Node_id received in IH cookie. Only applicable for 2959 * GFX 9.4.3. 2960 * @addr: Address of the fault 2961 * @write_fault: true is write fault, false is read fault 2962 * 2963 * Try to gracefully handle a VM fault. Return true if the fault was handled and 2964 * shouldn't be reported any more. 2965 */ 2966 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, 2967 u32 vmid, u32 node_id, uint64_t addr, 2968 uint64_t ts, bool write_fault) 2969 { 2970 bool is_compute_context = false; 2971 struct amdgpu_bo *root; 2972 uint64_t value, flags; 2973 struct amdgpu_vm *vm; 2974 int r; 2975 2976 vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); 2977 if (!vm) 2978 return false; 2979 2980 is_compute_context = vm->is_compute_context; 2981 2982 if (is_compute_context) { 2983 /* Unreserve root since svm_range_restore_pages might try to reserve it. */ 2984 /* TODO: rework svm_range_restore_pages so that this isn't necessary. */ 2985 amdgpu_bo_unreserve(root); 2986 2987 if (!svm_range_restore_pages(adev, pasid, vmid, 2988 node_id, addr >> PAGE_SHIFT, ts, write_fault)) { 2989 amdgpu_bo_unref(&root); 2990 return true; 2991 } 2992 amdgpu_bo_unref(&root); 2993 2994 /* Re-acquire the VM lock, could be that the VM was freed in between. */ 2995 vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid); 2996 if (!vm) 2997 return false; 2998 } 2999 3000 addr /= AMDGPU_GPU_PAGE_SIZE; 3001 flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | 3002 AMDGPU_PTE_SYSTEM; 3003 3004 if (is_compute_context) { 3005 /* Intentionally setting invalid PTE flag 3006 * combination to force a no-retry-fault 3007 */ 3008 flags = AMDGPU_VM_NORETRY_FLAGS; 3009 value = 0; 3010 } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { 3011 /* Redirect the access to the dummy page */ 3012 value = adev->dummy_page_addr; 3013 flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | 3014 AMDGPU_PTE_WRITEABLE; 3015 3016 } else { 3017 /* Let the hw retry silently on the PTE */ 3018 value = 0; 3019 } 3020 3021 r = dma_resv_reserve_fences(root->tbo.base.resv, 1); 3022 if (r) { 3023 pr_debug("failed %d to reserve fence slot\n", r); 3024 goto error_unlock; 3025 } 3026 3027 r = amdgpu_vm_update_range(adev, vm, true, false, false, false, 3028 NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); 3029 if (r) 3030 goto error_unlock; 3031 3032 r = amdgpu_vm_update_pdes(adev, vm, true); 3033 3034 error_unlock: 3035 amdgpu_bo_unreserve(root); 3036 if (r < 0) 3037 dev_err(adev->dev, "Can't handle page fault (%d)\n", r); 3038 3039 amdgpu_bo_unref(&root); 3040 3041 return false; 3042 } 3043 3044 #if defined(CONFIG_DEBUG_FS) 3045 3046 /* print the debug info for a specific set of status lists */ 3047 static void amdgpu_debugfs_vm_bo_status_info(struct seq_file *m, 3048 struct amdgpu_vm_bo_status *lists) 3049 { 3050 struct amdgpu_vm_bo_base *base; 3051 unsigned int id; 3052 3053 id = 0; 3054 seq_puts(m, "\tEvicted BOs:\n"); 3055 list_for_each_entry(base, &lists->evicted, vm_status) { 3056 if (!base->bo) 3057 continue; 3058 3059 amdgpu_bo_print_info(id++, base->bo, m); 3060 } 3061 3062 id = 0; 3063 seq_puts(m, "\tMoved BOs:\n"); 3064 list_for_each_entry(base, &lists->moved, vm_status) { 3065 if (!base->bo) 3066 continue; 3067 3068 amdgpu_bo_print_info(id++, base->bo, m); 3069 } 3070 3071 id = 0; 3072 seq_puts(m, "\tIdle BOs:\n"); 3073 list_for_each_entry(base, &lists->moved, vm_status) { 3074 if (!base->bo) 3075 continue; 3076 3077 amdgpu_bo_print_info(id++, base->bo, m); 3078 } 3079 } 3080 3081 /** 3082 * amdgpu_debugfs_vm_bo_info - print BO info for the VM 3083 * 3084 * @vm: Requested VM for printing BO info 3085 * @m: debugfs file 3086 * 3087 * Print BO information in debugfs file for the VM 3088 */ 3089 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) 3090 { 3091 amdgpu_vm_assert_locked(vm); 3092 3093 seq_puts(m, "\tKernel PT/PDs:\n"); 3094 amdgpu_debugfs_vm_bo_status_info(m, &vm->kernel); 3095 3096 seq_puts(m, "\tPer VM BOs:\n"); 3097 amdgpu_debugfs_vm_bo_status_info(m, &vm->always_valid); 3098 3099 seq_puts(m, "\tIndividual BOs:\n"); 3100 spin_lock(&vm->individual_lock); 3101 amdgpu_debugfs_vm_bo_status_info(m, &vm->individual); 3102 spin_unlock(&vm->individual_lock); 3103 } 3104 #endif 3105 3106 /** 3107 * amdgpu_vm_update_fault_cache - update cached fault into. 3108 * @adev: amdgpu device pointer 3109 * @pasid: PASID of the VM 3110 * @addr: Address of the fault 3111 * @status: GPUVM fault status register 3112 * @vmhub: which vmhub got the fault 3113 * 3114 * Cache the fault info for later use by userspace in debugging. 3115 */ 3116 void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, 3117 unsigned int pasid, 3118 uint64_t addr, 3119 uint32_t status, 3120 unsigned int vmhub) 3121 { 3122 struct amdgpu_vm *vm; 3123 unsigned long flags; 3124 3125 xa_lock_irqsave(&adev->vm_manager.pasids, flags); 3126 3127 vm = xa_load(&adev->vm_manager.pasids, pasid); 3128 /* Don't update the fault cache if status is 0. In the multiple 3129 * fault case, subsequent faults will return a 0 status which is 3130 * useless for userspace and replaces the useful fault status, so 3131 * only update if status is non-0. 3132 */ 3133 if (vm && status) { 3134 vm->fault_info.addr = addr; 3135 vm->fault_info.status = status; 3136 /* 3137 * Update the fault information globally for later usage 3138 * when vm could be stale or freed. 3139 */ 3140 adev->vm_manager.fault_info.addr = addr; 3141 adev->vm_manager.fault_info.vmhub = vmhub; 3142 adev->vm_manager.fault_info.status = status; 3143 3144 if (AMDGPU_IS_GFXHUB(vmhub)) { 3145 vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; 3146 vm->fault_info.vmhub |= 3147 (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT; 3148 } else if (AMDGPU_IS_MMHUB0(vmhub)) { 3149 vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0; 3150 vm->fault_info.vmhub |= 3151 (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT; 3152 } else if (AMDGPU_IS_MMHUB1(vmhub)) { 3153 vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1; 3154 vm->fault_info.vmhub |= 3155 (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT; 3156 } else { 3157 WARN_ONCE(1, "Invalid vmhub %u\n", vmhub); 3158 } 3159 } 3160 xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); 3161 } 3162 3163 void amdgpu_vm_print_task_info(struct amdgpu_device *adev, 3164 struct amdgpu_task_info *task_info) 3165 { 3166 dev_err(adev->dev, 3167 " Process %s pid %d thread %s pid %d\n", 3168 task_info->process_name, task_info->tgid, 3169 task_info->task.comm, task_info->task.pid); 3170 } 3171 3172 void amdgpu_sdma_set_vm_pte_scheds(struct amdgpu_device *adev, 3173 const struct amdgpu_vm_pte_funcs *vm_pte_funcs) 3174 { 3175 struct drm_gpu_scheduler *sched; 3176 int i; 3177 3178 for (i = 0; i < adev->sdma.num_instances; i++) { 3179 if (adev->sdma.has_page_queue) 3180 sched = &adev->sdma.instance[i].page.sched; 3181 else 3182 sched = &adev->sdma.instance[i].ring.sched; 3183 adev->vm_manager.vm_pte_scheds[i] = sched; 3184 } 3185 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 3186 adev->vm_manager.vm_pte_funcs = vm_pte_funcs; 3187 } 3188