1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/dma-fence-array.h> 29 #include <linux/interval_tree_generic.h> 30 #include <linux/idr.h> 31 #include <drm/drmP.h> 32 #include <drm/amdgpu_drm.h> 33 #include "amdgpu.h" 34 #include "amdgpu_trace.h" 35 36 /* 37 * PASID manager 38 * 39 * PASIDs are global address space identifiers that can be shared 40 * between the GPU, an IOMMU and the driver. VMs on different devices 41 * may use the same PASID if they share the same address 42 * space. Therefore PASIDs are allocated using a global IDA. VMs are 43 * looked up from the PASID per amdgpu_device. 44 */ 45 static DEFINE_IDA(amdgpu_vm_pasid_ida); 46 47 /** 48 * amdgpu_vm_alloc_pasid - Allocate a PASID 49 * @bits: Maximum width of the PASID in bits, must be at least 1 50 * 51 * Allocates a PASID of the given width while keeping smaller PASIDs 52 * available if possible. 53 * 54 * Returns a positive integer on success. Returns %-EINVAL if bits==0. 55 * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on 56 * memory allocation failure. 57 */ 58 int amdgpu_vm_alloc_pasid(unsigned int bits) 59 { 60 int pasid = -EINVAL; 61 62 for (bits = min(bits, 31U); bits > 0; bits--) { 63 pasid = ida_simple_get(&amdgpu_vm_pasid_ida, 64 1U << (bits - 1), 1U << bits, 65 GFP_KERNEL); 66 if (pasid != -ENOSPC) 67 break; 68 } 69 70 return pasid; 71 } 72 73 /** 74 * amdgpu_vm_free_pasid - Free a PASID 75 * @pasid: PASID to free 76 */ 77 void amdgpu_vm_free_pasid(unsigned int pasid) 78 { 79 ida_simple_remove(&amdgpu_vm_pasid_ida, pasid); 80 } 81 82 /* 83 * GPUVM 84 * GPUVM is similar to the legacy gart on older asics, however 85 * rather than there being a single global gart table 86 * for the entire GPU, there are multiple VM page tables active 87 * at any given time. The VM page tables can contain a mix 88 * vram pages and system memory pages and system memory pages 89 * can be mapped as snooped (cached system pages) or unsnooped 90 * (uncached system pages). 91 * Each VM has an ID associated with it and there is a page table 92 * associated with each VMID. When execting a command buffer, 93 * the kernel tells the the ring what VMID to use for that command 94 * buffer. VMIDs are allocated dynamically as commands are submitted. 95 * The userspace drivers maintain their own address space and the kernel 96 * sets up their pages tables accordingly when they submit their 97 * command buffers and a VMID is assigned. 98 * Cayman/Trinity support up to 8 active VMs at any given time; 99 * SI supports 16. 100 */ 101 102 #define START(node) ((node)->start) 103 #define LAST(node) ((node)->last) 104 105 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, 106 START, LAST, static, amdgpu_vm_it) 107 108 #undef START 109 #undef LAST 110 111 /* Local structure. Encapsulate some VM table update parameters to reduce 112 * the number of function parameters 113 */ 114 struct amdgpu_pte_update_params { 115 /* amdgpu device we do this update for */ 116 struct amdgpu_device *adev; 117 /* optional amdgpu_vm we do this update for */ 118 struct amdgpu_vm *vm; 119 /* address where to copy page table entries from */ 120 uint64_t src; 121 /* indirect buffer to fill with commands */ 122 struct amdgpu_ib *ib; 123 /* Function which actually does the update */ 124 void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, 125 uint64_t addr, unsigned count, uint32_t incr, 126 uint64_t flags); 127 /* The next two are used during VM update by CPU 128 * DMA addresses to use for mapping 129 * Kernel pointer of PD/PT BO that needs to be updated 130 */ 131 dma_addr_t *pages_addr; 132 void *kptr; 133 }; 134 135 /* Helper to disable partial resident texture feature from a fence callback */ 136 struct amdgpu_prt_cb { 137 struct amdgpu_device *adev; 138 struct dma_fence_cb cb; 139 }; 140 141 /** 142 * amdgpu_vm_num_entries - return the number of entries in a PD/PT 143 * 144 * @adev: amdgpu_device pointer 145 * 146 * Calculate the number of entries in a page directory or page table. 147 */ 148 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, 149 unsigned level) 150 { 151 if (level == 0) 152 /* For the root directory */ 153 return adev->vm_manager.max_pfn >> 154 (adev->vm_manager.block_size * 155 adev->vm_manager.num_level); 156 else if (level == adev->vm_manager.num_level) 157 /* For the page tables on the leaves */ 158 return AMDGPU_VM_PTE_COUNT(adev); 159 else 160 /* Everything in between */ 161 return 1 << adev->vm_manager.block_size; 162 } 163 164 /** 165 * amdgpu_vm_bo_size - returns the size of the BOs in bytes 166 * 167 * @adev: amdgpu_device pointer 168 * 169 * Calculate the size of the BO for a page directory or page table in bytes. 170 */ 171 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) 172 { 173 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); 174 } 175 176 /** 177 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 178 * 179 * @vm: vm providing the BOs 180 * @validated: head of validation list 181 * @entry: entry to add 182 * 183 * Add the page directory to the list of BOs to 184 * validate for command submission. 185 */ 186 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 187 struct list_head *validated, 188 struct amdgpu_bo_list_entry *entry) 189 { 190 entry->robj = vm->root.base.bo; 191 entry->priority = 0; 192 entry->tv.bo = &entry->robj->tbo; 193 entry->tv.shared = true; 194 entry->user_pages = NULL; 195 list_add(&entry->tv.head, validated); 196 } 197 198 /** 199 * amdgpu_vm_validate_pt_bos - validate the page table BOs 200 * 201 * @adev: amdgpu device pointer 202 * @vm: vm providing the BOs 203 * @validate: callback to do the validation 204 * @param: parameter for the validation callback 205 * 206 * Validate the page table BOs on command submission if neccessary. 207 */ 208 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 209 int (*validate)(void *p, struct amdgpu_bo *bo), 210 void *param) 211 { 212 struct ttm_bo_global *glob = adev->mman.bdev.glob; 213 int r; 214 215 spin_lock(&vm->status_lock); 216 while (!list_empty(&vm->evicted)) { 217 struct amdgpu_vm_bo_base *bo_base; 218 struct amdgpu_bo *bo; 219 220 bo_base = list_first_entry(&vm->evicted, 221 struct amdgpu_vm_bo_base, 222 vm_status); 223 spin_unlock(&vm->status_lock); 224 225 bo = bo_base->bo; 226 BUG_ON(!bo); 227 if (bo->parent) { 228 r = validate(param, bo); 229 if (r) 230 return r; 231 232 spin_lock(&glob->lru_lock); 233 ttm_bo_move_to_lru_tail(&bo->tbo); 234 if (bo->shadow) 235 ttm_bo_move_to_lru_tail(&bo->shadow->tbo); 236 spin_unlock(&glob->lru_lock); 237 } 238 239 if (bo->tbo.type == ttm_bo_type_kernel && 240 vm->use_cpu_for_update) { 241 r = amdgpu_bo_kmap(bo, NULL); 242 if (r) 243 return r; 244 } 245 246 spin_lock(&vm->status_lock); 247 if (bo->tbo.type != ttm_bo_type_kernel) 248 list_move(&bo_base->vm_status, &vm->moved); 249 else 250 list_move(&bo_base->vm_status, &vm->relocated); 251 } 252 spin_unlock(&vm->status_lock); 253 254 return 0; 255 } 256 257 /** 258 * amdgpu_vm_ready - check VM is ready for updates 259 * 260 * @vm: VM to check 261 * 262 * Check if all VM PDs/PTs are ready for updates 263 */ 264 bool amdgpu_vm_ready(struct amdgpu_vm *vm) 265 { 266 bool ready; 267 268 spin_lock(&vm->status_lock); 269 ready = list_empty(&vm->evicted); 270 spin_unlock(&vm->status_lock); 271 272 return ready; 273 } 274 275 /** 276 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 277 * 278 * @adev: amdgpu_device pointer 279 * @vm: requested vm 280 * @saddr: start of the address range 281 * @eaddr: end of the address range 282 * 283 * Make sure the page directories and page tables are allocated 284 */ 285 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, 286 struct amdgpu_vm *vm, 287 struct amdgpu_vm_pt *parent, 288 uint64_t saddr, uint64_t eaddr, 289 unsigned level) 290 { 291 unsigned shift = (adev->vm_manager.num_level - level) * 292 adev->vm_manager.block_size; 293 unsigned pt_idx, from, to; 294 int r; 295 u64 flags; 296 uint64_t init_value = 0; 297 298 if (!parent->entries) { 299 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 300 301 parent->entries = kvmalloc_array(num_entries, 302 sizeof(struct amdgpu_vm_pt), 303 GFP_KERNEL | __GFP_ZERO); 304 if (!parent->entries) 305 return -ENOMEM; 306 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); 307 } 308 309 from = saddr >> shift; 310 to = eaddr >> shift; 311 if (from >= amdgpu_vm_num_entries(adev, level) || 312 to >= amdgpu_vm_num_entries(adev, level)) 313 return -EINVAL; 314 315 if (to > parent->last_entry_used) 316 parent->last_entry_used = to; 317 318 ++level; 319 saddr = saddr & ((1 << shift) - 1); 320 eaddr = eaddr & ((1 << shift) - 1); 321 322 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 323 AMDGPU_GEM_CREATE_VRAM_CLEARED; 324 if (vm->use_cpu_for_update) 325 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 326 else 327 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 328 AMDGPU_GEM_CREATE_SHADOW); 329 330 if (vm->pte_support_ats) { 331 init_value = AMDGPU_PTE_SYSTEM; 332 if (level != adev->vm_manager.num_level - 1) 333 init_value |= AMDGPU_PDE_PTE; 334 } 335 336 /* walk over the address space and allocate the page tables */ 337 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 338 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 339 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 340 struct amdgpu_bo *pt; 341 342 if (!entry->base.bo) { 343 r = amdgpu_bo_create(adev, 344 amdgpu_vm_bo_size(adev, level), 345 AMDGPU_GPU_PAGE_SIZE, true, 346 AMDGPU_GEM_DOMAIN_VRAM, 347 flags, 348 NULL, resv, init_value, &pt); 349 if (r) 350 return r; 351 352 if (vm->use_cpu_for_update) { 353 r = amdgpu_bo_kmap(pt, NULL); 354 if (r) { 355 amdgpu_bo_unref(&pt); 356 return r; 357 } 358 } 359 360 /* Keep a reference to the root directory to avoid 361 * freeing them up in the wrong order. 362 */ 363 pt->parent = amdgpu_bo_ref(parent->base.bo); 364 365 entry->base.vm = vm; 366 entry->base.bo = pt; 367 list_add_tail(&entry->base.bo_list, &pt->va); 368 spin_lock(&vm->status_lock); 369 list_add(&entry->base.vm_status, &vm->relocated); 370 spin_unlock(&vm->status_lock); 371 entry->addr = 0; 372 } 373 374 if (level < adev->vm_manager.num_level) { 375 uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; 376 uint64_t sub_eaddr = (pt_idx == to) ? eaddr : 377 ((1 << shift) - 1); 378 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, 379 sub_eaddr, level); 380 if (r) 381 return r; 382 } 383 } 384 385 return 0; 386 } 387 388 /** 389 * amdgpu_vm_alloc_pts - Allocate page tables. 390 * 391 * @adev: amdgpu_device pointer 392 * @vm: VM to allocate page tables for 393 * @saddr: Start address which needs to be allocated 394 * @size: Size from start address we need. 395 * 396 * Make sure the page tables are allocated. 397 */ 398 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 399 struct amdgpu_vm *vm, 400 uint64_t saddr, uint64_t size) 401 { 402 uint64_t last_pfn; 403 uint64_t eaddr; 404 405 /* validate the parameters */ 406 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 407 return -EINVAL; 408 409 eaddr = saddr + size - 1; 410 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 411 if (last_pfn >= adev->vm_manager.max_pfn) { 412 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", 413 last_pfn, adev->vm_manager.max_pfn); 414 return -EINVAL; 415 } 416 417 saddr /= AMDGPU_GPU_PAGE_SIZE; 418 eaddr /= AMDGPU_GPU_PAGE_SIZE; 419 420 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); 421 } 422 423 /** 424 * amdgpu_vm_had_gpu_reset - check if reset occured since last use 425 * 426 * @adev: amdgpu_device pointer 427 * @id: VMID structure 428 * 429 * Check if GPU reset occured since last use of the VMID. 430 */ 431 static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, 432 struct amdgpu_vm_id *id) 433 { 434 return id->current_gpu_reset_count != 435 atomic_read(&adev->gpu_reset_counter); 436 } 437 438 static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) 439 { 440 return !!vm->reserved_vmid[vmhub]; 441 } 442 443 /* idr_mgr->lock must be held */ 444 static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, 445 struct amdgpu_ring *ring, 446 struct amdgpu_sync *sync, 447 struct dma_fence *fence, 448 struct amdgpu_job *job) 449 { 450 struct amdgpu_device *adev = ring->adev; 451 unsigned vmhub = ring->funcs->vmhub; 452 uint64_t fence_context = adev->fence_context + ring->idx; 453 struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; 454 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 455 struct dma_fence *updates = sync->last_vm_update; 456 int r = 0; 457 struct dma_fence *flushed, *tmp; 458 bool needs_flush = vm->use_cpu_for_update; 459 460 flushed = id->flushed_updates; 461 if ((amdgpu_vm_had_gpu_reset(adev, id)) || 462 (atomic64_read(&id->owner) != vm->client_id) || 463 (job->vm_pd_addr != id->pd_gpu_addr) || 464 (updates && (!flushed || updates->context != flushed->context || 465 dma_fence_is_later(updates, flushed))) || 466 (!id->last_flush || (id->last_flush->context != fence_context && 467 !dma_fence_is_signaled(id->last_flush)))) { 468 needs_flush = true; 469 /* to prevent one context starved by another context */ 470 id->pd_gpu_addr = 0; 471 tmp = amdgpu_sync_peek_fence(&id->active, ring); 472 if (tmp) { 473 r = amdgpu_sync_fence(adev, sync, tmp); 474 return r; 475 } 476 } 477 478 /* Good we can use this VMID. Remember this submission as 479 * user of the VMID. 480 */ 481 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 482 if (r) 483 goto out; 484 485 if (updates && (!flushed || updates->context != flushed->context || 486 dma_fence_is_later(updates, flushed))) { 487 dma_fence_put(id->flushed_updates); 488 id->flushed_updates = dma_fence_get(updates); 489 } 490 id->pd_gpu_addr = job->vm_pd_addr; 491 atomic64_set(&id->owner, vm->client_id); 492 job->vm_needs_flush = needs_flush; 493 if (needs_flush) { 494 dma_fence_put(id->last_flush); 495 id->last_flush = NULL; 496 } 497 job->vm_id = id - id_mgr->ids; 498 trace_amdgpu_vm_grab_id(vm, ring, job); 499 out: 500 return r; 501 } 502 503 /** 504 * amdgpu_vm_grab_id - allocate the next free VMID 505 * 506 * @vm: vm to allocate id for 507 * @ring: ring we want to submit job to 508 * @sync: sync object where we add dependencies 509 * @fence: fence protecting ID from reuse 510 * 511 * Allocate an id for the vm, adding fences to the sync obj as necessary. 512 */ 513 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 514 struct amdgpu_sync *sync, struct dma_fence *fence, 515 struct amdgpu_job *job) 516 { 517 struct amdgpu_device *adev = ring->adev; 518 unsigned vmhub = ring->funcs->vmhub; 519 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 520 uint64_t fence_context = adev->fence_context + ring->idx; 521 struct dma_fence *updates = sync->last_vm_update; 522 struct amdgpu_vm_id *id, *idle; 523 struct dma_fence **fences; 524 unsigned i; 525 int r = 0; 526 527 mutex_lock(&id_mgr->lock); 528 if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { 529 r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); 530 mutex_unlock(&id_mgr->lock); 531 return r; 532 } 533 fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); 534 if (!fences) { 535 mutex_unlock(&id_mgr->lock); 536 return -ENOMEM; 537 } 538 /* Check if we have an idle VMID */ 539 i = 0; 540 list_for_each_entry(idle, &id_mgr->ids_lru, list) { 541 fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); 542 if (!fences[i]) 543 break; 544 ++i; 545 } 546 547 /* If we can't find a idle VMID to use, wait till one becomes available */ 548 if (&idle->list == &id_mgr->ids_lru) { 549 u64 fence_context = adev->vm_manager.fence_context + ring->idx; 550 unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; 551 struct dma_fence_array *array; 552 unsigned j; 553 554 for (j = 0; j < i; ++j) 555 dma_fence_get(fences[j]); 556 557 array = dma_fence_array_create(i, fences, fence_context, 558 seqno, true); 559 if (!array) { 560 for (j = 0; j < i; ++j) 561 dma_fence_put(fences[j]); 562 kfree(fences); 563 r = -ENOMEM; 564 goto error; 565 } 566 567 568 r = amdgpu_sync_fence(ring->adev, sync, &array->base); 569 dma_fence_put(&array->base); 570 if (r) 571 goto error; 572 573 mutex_unlock(&id_mgr->lock); 574 return 0; 575 576 } 577 kfree(fences); 578 579 job->vm_needs_flush = vm->use_cpu_for_update; 580 /* Check if we can use a VMID already assigned to this VM */ 581 list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { 582 struct dma_fence *flushed; 583 bool needs_flush = vm->use_cpu_for_update; 584 585 /* Check all the prerequisites to using this VMID */ 586 if (amdgpu_vm_had_gpu_reset(adev, id)) 587 continue; 588 589 if (atomic64_read(&id->owner) != vm->client_id) 590 continue; 591 592 if (job->vm_pd_addr != id->pd_gpu_addr) 593 continue; 594 595 if (!id->last_flush || 596 (id->last_flush->context != fence_context && 597 !dma_fence_is_signaled(id->last_flush))) 598 needs_flush = true; 599 600 flushed = id->flushed_updates; 601 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) 602 needs_flush = true; 603 604 /* Concurrent flushes are only possible starting with Vega10 */ 605 if (adev->asic_type < CHIP_VEGA10 && needs_flush) 606 continue; 607 608 /* Good we can use this VMID. Remember this submission as 609 * user of the VMID. 610 */ 611 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 612 if (r) 613 goto error; 614 615 if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { 616 dma_fence_put(id->flushed_updates); 617 id->flushed_updates = dma_fence_get(updates); 618 } 619 620 if (needs_flush) 621 goto needs_flush; 622 else 623 goto no_flush_needed; 624 625 }; 626 627 /* Still no ID to use? Then use the idle one found earlier */ 628 id = idle; 629 630 /* Remember this submission as user of the VMID */ 631 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 632 if (r) 633 goto error; 634 635 id->pd_gpu_addr = job->vm_pd_addr; 636 dma_fence_put(id->flushed_updates); 637 id->flushed_updates = dma_fence_get(updates); 638 atomic64_set(&id->owner, vm->client_id); 639 640 needs_flush: 641 job->vm_needs_flush = true; 642 dma_fence_put(id->last_flush); 643 id->last_flush = NULL; 644 645 no_flush_needed: 646 list_move_tail(&id->list, &id_mgr->ids_lru); 647 648 job->vm_id = id - id_mgr->ids; 649 trace_amdgpu_vm_grab_id(vm, ring, job); 650 651 error: 652 mutex_unlock(&id_mgr->lock); 653 return r; 654 } 655 656 static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, 657 struct amdgpu_vm *vm, 658 unsigned vmhub) 659 { 660 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 661 662 mutex_lock(&id_mgr->lock); 663 if (vm->reserved_vmid[vmhub]) { 664 list_add(&vm->reserved_vmid[vmhub]->list, 665 &id_mgr->ids_lru); 666 vm->reserved_vmid[vmhub] = NULL; 667 atomic_dec(&id_mgr->reserved_vmid_num); 668 } 669 mutex_unlock(&id_mgr->lock); 670 } 671 672 static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, 673 struct amdgpu_vm *vm, 674 unsigned vmhub) 675 { 676 struct amdgpu_vm_id_manager *id_mgr; 677 struct amdgpu_vm_id *idle; 678 int r = 0; 679 680 id_mgr = &adev->vm_manager.id_mgr[vmhub]; 681 mutex_lock(&id_mgr->lock); 682 if (vm->reserved_vmid[vmhub]) 683 goto unlock; 684 if (atomic_inc_return(&id_mgr->reserved_vmid_num) > 685 AMDGPU_VM_MAX_RESERVED_VMID) { 686 DRM_ERROR("Over limitation of reserved vmid\n"); 687 atomic_dec(&id_mgr->reserved_vmid_num); 688 r = -EINVAL; 689 goto unlock; 690 } 691 /* Select the first entry VMID */ 692 idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); 693 list_del_init(&idle->list); 694 vm->reserved_vmid[vmhub] = idle; 695 mutex_unlock(&id_mgr->lock); 696 697 return 0; 698 unlock: 699 mutex_unlock(&id_mgr->lock); 700 return r; 701 } 702 703 /** 704 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug 705 * 706 * @adev: amdgpu_device pointer 707 */ 708 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) 709 { 710 const struct amdgpu_ip_block *ip_block; 711 bool has_compute_vm_bug; 712 struct amdgpu_ring *ring; 713 int i; 714 715 has_compute_vm_bug = false; 716 717 ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 718 if (ip_block) { 719 /* Compute has a VM bug for GFX version < 7. 720 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ 721 if (ip_block->version->major <= 7) 722 has_compute_vm_bug = true; 723 else if (ip_block->version->major == 8) 724 if (adev->gfx.mec_fw_version < 673) 725 has_compute_vm_bug = true; 726 } 727 728 for (i = 0; i < adev->num_rings; i++) { 729 ring = adev->rings[i]; 730 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 731 /* only compute rings */ 732 ring->has_compute_vm_bug = has_compute_vm_bug; 733 else 734 ring->has_compute_vm_bug = false; 735 } 736 } 737 738 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 739 struct amdgpu_job *job) 740 { 741 struct amdgpu_device *adev = ring->adev; 742 unsigned vmhub = ring->funcs->vmhub; 743 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 744 struct amdgpu_vm_id *id; 745 bool gds_switch_needed; 746 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; 747 748 if (job->vm_id == 0) 749 return false; 750 id = &id_mgr->ids[job->vm_id]; 751 gds_switch_needed = ring->funcs->emit_gds_switch && ( 752 id->gds_base != job->gds_base || 753 id->gds_size != job->gds_size || 754 id->gws_base != job->gws_base || 755 id->gws_size != job->gws_size || 756 id->oa_base != job->oa_base || 757 id->oa_size != job->oa_size); 758 759 if (amdgpu_vm_had_gpu_reset(adev, id)) 760 return true; 761 762 return vm_flush_needed || gds_switch_needed; 763 } 764 765 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) 766 { 767 return (adev->mc.real_vram_size == adev->mc.visible_vram_size); 768 } 769 770 /** 771 * amdgpu_vm_flush - hardware flush the vm 772 * 773 * @ring: ring to use for flush 774 * @vm_id: vmid number to use 775 * @pd_addr: address of the page directory 776 * 777 * Emit a VM flush when it is necessary. 778 */ 779 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) 780 { 781 struct amdgpu_device *adev = ring->adev; 782 unsigned vmhub = ring->funcs->vmhub; 783 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 784 struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id]; 785 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 786 id->gds_base != job->gds_base || 787 id->gds_size != job->gds_size || 788 id->gws_base != job->gws_base || 789 id->gws_size != job->gws_size || 790 id->oa_base != job->oa_base || 791 id->oa_size != job->oa_size); 792 bool vm_flush_needed = job->vm_needs_flush; 793 unsigned patch_offset = 0; 794 int r; 795 796 if (amdgpu_vm_had_gpu_reset(adev, id)) { 797 gds_switch_needed = true; 798 vm_flush_needed = true; 799 } 800 801 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 802 return 0; 803 804 if (ring->funcs->init_cond_exec) 805 patch_offset = amdgpu_ring_init_cond_exec(ring); 806 807 if (need_pipe_sync) 808 amdgpu_ring_emit_pipeline_sync(ring); 809 810 if (ring->funcs->emit_vm_flush && vm_flush_needed) { 811 struct dma_fence *fence; 812 813 trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); 814 amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); 815 816 r = amdgpu_fence_emit(ring, &fence); 817 if (r) 818 return r; 819 820 mutex_lock(&id_mgr->lock); 821 dma_fence_put(id->last_flush); 822 id->last_flush = fence; 823 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); 824 mutex_unlock(&id_mgr->lock); 825 } 826 827 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 828 id->gds_base = job->gds_base; 829 id->gds_size = job->gds_size; 830 id->gws_base = job->gws_base; 831 id->gws_size = job->gws_size; 832 id->oa_base = job->oa_base; 833 id->oa_size = job->oa_size; 834 amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base, 835 job->gds_size, job->gws_base, 836 job->gws_size, job->oa_base, 837 job->oa_size); 838 } 839 840 if (ring->funcs->patch_cond_exec) 841 amdgpu_ring_patch_cond_exec(ring, patch_offset); 842 843 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ 844 if (ring->funcs->emit_switch_buffer) { 845 amdgpu_ring_emit_switch_buffer(ring); 846 amdgpu_ring_emit_switch_buffer(ring); 847 } 848 return 0; 849 } 850 851 /** 852 * amdgpu_vm_reset_id - reset VMID to zero 853 * 854 * @adev: amdgpu device structure 855 * @vm_id: vmid number to use 856 * 857 * Reset saved GDW, GWS and OA to force switch on next flush. 858 */ 859 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, 860 unsigned vmid) 861 { 862 struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 863 struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; 864 865 atomic64_set(&id->owner, 0); 866 id->gds_base = 0; 867 id->gds_size = 0; 868 id->gws_base = 0; 869 id->gws_size = 0; 870 id->oa_base = 0; 871 id->oa_size = 0; 872 } 873 874 /** 875 * amdgpu_vm_reset_all_id - reset VMID to zero 876 * 877 * @adev: amdgpu device structure 878 * 879 * Reset VMID to force flush on next use 880 */ 881 void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) 882 { 883 unsigned i, j; 884 885 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 886 struct amdgpu_vm_id_manager *id_mgr = 887 &adev->vm_manager.id_mgr[i]; 888 889 for (j = 1; j < id_mgr->num_ids; ++j) 890 amdgpu_vm_reset_id(adev, i, j); 891 } 892 } 893 894 /** 895 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 896 * 897 * @vm: requested vm 898 * @bo: requested buffer object 899 * 900 * Find @bo inside the requested vm. 901 * Search inside the @bos vm list for the requested vm 902 * Returns the found bo_va or NULL if none is found 903 * 904 * Object has to be reserved! 905 */ 906 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 907 struct amdgpu_bo *bo) 908 { 909 struct amdgpu_bo_va *bo_va; 910 911 list_for_each_entry(bo_va, &bo->va, base.bo_list) { 912 if (bo_va->base.vm == vm) { 913 return bo_va; 914 } 915 } 916 return NULL; 917 } 918 919 /** 920 * amdgpu_vm_do_set_ptes - helper to call the right asic function 921 * 922 * @params: see amdgpu_pte_update_params definition 923 * @pe: addr of the page entry 924 * @addr: dst addr to write into pe 925 * @count: number of page entries to update 926 * @incr: increase next addr by incr bytes 927 * @flags: hw access flags 928 * 929 * Traces the parameters and calls the right asic functions 930 * to setup the page table using the DMA. 931 */ 932 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, 933 uint64_t pe, uint64_t addr, 934 unsigned count, uint32_t incr, 935 uint64_t flags) 936 { 937 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 938 939 if (count < 3) { 940 amdgpu_vm_write_pte(params->adev, params->ib, pe, 941 addr | flags, count, incr); 942 943 } else { 944 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, 945 count, incr, flags); 946 } 947 } 948 949 /** 950 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART 951 * 952 * @params: see amdgpu_pte_update_params definition 953 * @pe: addr of the page entry 954 * @addr: dst addr to write into pe 955 * @count: number of page entries to update 956 * @incr: increase next addr by incr bytes 957 * @flags: hw access flags 958 * 959 * Traces the parameters and calls the DMA function to copy the PTEs. 960 */ 961 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, 962 uint64_t pe, uint64_t addr, 963 unsigned count, uint32_t incr, 964 uint64_t flags) 965 { 966 uint64_t src = (params->src + (addr >> 12) * 8); 967 968 969 trace_amdgpu_vm_copy_ptes(pe, src, count); 970 971 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); 972 } 973 974 /** 975 * amdgpu_vm_map_gart - Resolve gart mapping of addr 976 * 977 * @pages_addr: optional DMA address to use for lookup 978 * @addr: the unmapped addr 979 * 980 * Look up the physical address of the page that the pte resolves 981 * to and return the pointer for the page table entry. 982 */ 983 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 984 { 985 uint64_t result; 986 987 /* page table offset */ 988 result = pages_addr[addr >> PAGE_SHIFT]; 989 990 /* in case cpu page size != gpu page size*/ 991 result |= addr & (~PAGE_MASK); 992 993 result &= 0xFFFFFFFFFFFFF000ULL; 994 995 return result; 996 } 997 998 /** 999 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU 1000 * 1001 * @params: see amdgpu_pte_update_params definition 1002 * @pe: kmap addr of the page entry 1003 * @addr: dst addr to write into pe 1004 * @count: number of page entries to update 1005 * @incr: increase next addr by incr bytes 1006 * @flags: hw access flags 1007 * 1008 * Write count number of PT/PD entries directly. 1009 */ 1010 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, 1011 uint64_t pe, uint64_t addr, 1012 unsigned count, uint32_t incr, 1013 uint64_t flags) 1014 { 1015 unsigned int i; 1016 uint64_t value; 1017 1018 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 1019 1020 for (i = 0; i < count; i++) { 1021 value = params->pages_addr ? 1022 amdgpu_vm_map_gart(params->pages_addr, addr) : 1023 addr; 1024 amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, 1025 i, value, flags); 1026 addr += incr; 1027 } 1028 } 1029 1030 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, 1031 void *owner) 1032 { 1033 struct amdgpu_sync sync; 1034 int r; 1035 1036 amdgpu_sync_create(&sync); 1037 amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); 1038 r = amdgpu_sync_wait(&sync, true); 1039 amdgpu_sync_free(&sync); 1040 1041 return r; 1042 } 1043 1044 /* 1045 * amdgpu_vm_update_level - update a single level in the hierarchy 1046 * 1047 * @adev: amdgpu_device pointer 1048 * @vm: requested vm 1049 * @parent: parent directory 1050 * 1051 * Makes sure all entries in @parent are up to date. 1052 * Returns 0 for success, error for failure. 1053 */ 1054 static int amdgpu_vm_update_level(struct amdgpu_device *adev, 1055 struct amdgpu_vm *vm, 1056 struct amdgpu_vm_pt *parent) 1057 { 1058 struct amdgpu_bo *shadow; 1059 struct amdgpu_ring *ring = NULL; 1060 uint64_t pd_addr, shadow_addr = 0; 1061 uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; 1062 unsigned count = 0, pt_idx, ndw = 0; 1063 struct amdgpu_job *job; 1064 struct amdgpu_pte_update_params params; 1065 struct dma_fence *fence = NULL; 1066 uint32_t incr; 1067 1068 int r; 1069 1070 if (!parent->entries) 1071 return 0; 1072 1073 memset(¶ms, 0, sizeof(params)); 1074 params.adev = adev; 1075 shadow = parent->base.bo->shadow; 1076 1077 if (vm->use_cpu_for_update) { 1078 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 1079 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); 1080 if (unlikely(r)) 1081 return r; 1082 1083 params.func = amdgpu_vm_cpu_set_ptes; 1084 } else { 1085 ring = container_of(vm->entity.sched, struct amdgpu_ring, 1086 sched); 1087 1088 /* padding, etc. */ 1089 ndw = 64; 1090 1091 /* assume the worst case */ 1092 ndw += parent->last_entry_used * 6; 1093 1094 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); 1095 1096 if (shadow) { 1097 shadow_addr = amdgpu_bo_gpu_offset(shadow); 1098 ndw *= 2; 1099 } else { 1100 shadow_addr = 0; 1101 } 1102 1103 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1104 if (r) 1105 return r; 1106 1107 params.ib = &job->ibs[0]; 1108 params.func = amdgpu_vm_do_set_ptes; 1109 } 1110 1111 1112 /* walk over the address space and update the directory */ 1113 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { 1114 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 1115 struct amdgpu_bo *bo = entry->base.bo; 1116 uint64_t pde, pt; 1117 1118 if (bo == NULL) 1119 continue; 1120 1121 spin_lock(&vm->status_lock); 1122 list_del_init(&entry->base.vm_status); 1123 spin_unlock(&vm->status_lock); 1124 1125 pt = amdgpu_bo_gpu_offset(bo); 1126 pt = amdgpu_gart_get_vm_pde(adev, pt); 1127 /* Don't update huge pages here */ 1128 if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || 1129 parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) 1130 continue; 1131 1132 parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; 1133 1134 pde = pd_addr + pt_idx * 8; 1135 incr = amdgpu_bo_size(bo); 1136 if (((last_pde + 8 * count) != pde) || 1137 ((last_pt + incr * count) != pt) || 1138 (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { 1139 1140 if (count) { 1141 if (shadow) 1142 params.func(¶ms, 1143 last_shadow, 1144 last_pt, count, 1145 incr, 1146 AMDGPU_PTE_VALID); 1147 1148 params.func(¶ms, last_pde, 1149 last_pt, count, incr, 1150 AMDGPU_PTE_VALID); 1151 } 1152 1153 count = 1; 1154 last_pde = pde; 1155 last_shadow = shadow_addr + pt_idx * 8; 1156 last_pt = pt; 1157 } else { 1158 ++count; 1159 } 1160 } 1161 1162 if (count) { 1163 if (vm->root.base.bo->shadow) 1164 params.func(¶ms, last_shadow, last_pt, 1165 count, incr, AMDGPU_PTE_VALID); 1166 1167 params.func(¶ms, last_pde, last_pt, 1168 count, incr, AMDGPU_PTE_VALID); 1169 } 1170 1171 if (!vm->use_cpu_for_update) { 1172 if (params.ib->length_dw == 0) { 1173 amdgpu_job_free(job); 1174 } else { 1175 amdgpu_ring_pad_ib(ring, params.ib); 1176 amdgpu_sync_resv(adev, &job->sync, 1177 parent->base.bo->tbo.resv, 1178 AMDGPU_FENCE_OWNER_VM); 1179 if (shadow) 1180 amdgpu_sync_resv(adev, &job->sync, 1181 shadow->tbo.resv, 1182 AMDGPU_FENCE_OWNER_VM); 1183 1184 WARN_ON(params.ib->length_dw > ndw); 1185 r = amdgpu_job_submit(job, ring, &vm->entity, 1186 AMDGPU_FENCE_OWNER_VM, &fence); 1187 if (r) 1188 goto error_free; 1189 1190 amdgpu_bo_fence(parent->base.bo, fence, true); 1191 dma_fence_put(vm->last_update); 1192 vm->last_update = fence; 1193 } 1194 } 1195 1196 return 0; 1197 1198 error_free: 1199 amdgpu_job_free(job); 1200 return r; 1201 } 1202 1203 /* 1204 * amdgpu_vm_invalidate_level - mark all PD levels as invalid 1205 * 1206 * @parent: parent PD 1207 * 1208 * Mark all PD level as invalid after an error. 1209 */ 1210 static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, 1211 struct amdgpu_vm_pt *parent) 1212 { 1213 unsigned pt_idx; 1214 1215 /* 1216 * Recurse into the subdirectories. This recursion is harmless because 1217 * we only have a maximum of 5 layers. 1218 */ 1219 for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { 1220 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 1221 1222 if (!entry->base.bo) 1223 continue; 1224 1225 entry->addr = ~0ULL; 1226 spin_lock(&vm->status_lock); 1227 if (list_empty(&entry->base.vm_status)) 1228 list_add(&entry->base.vm_status, &vm->relocated); 1229 spin_unlock(&vm->status_lock); 1230 amdgpu_vm_invalidate_level(vm, entry); 1231 } 1232 } 1233 1234 /* 1235 * amdgpu_vm_update_directories - make sure that all directories are valid 1236 * 1237 * @adev: amdgpu_device pointer 1238 * @vm: requested vm 1239 * 1240 * Makes sure all directories are up to date. 1241 * Returns 0 for success, error for failure. 1242 */ 1243 int amdgpu_vm_update_directories(struct amdgpu_device *adev, 1244 struct amdgpu_vm *vm) 1245 { 1246 int r; 1247 1248 spin_lock(&vm->status_lock); 1249 while (!list_empty(&vm->relocated)) { 1250 struct amdgpu_vm_bo_base *bo_base; 1251 struct amdgpu_bo *bo; 1252 1253 bo_base = list_first_entry(&vm->relocated, 1254 struct amdgpu_vm_bo_base, 1255 vm_status); 1256 spin_unlock(&vm->status_lock); 1257 1258 bo = bo_base->bo->parent; 1259 if (bo) { 1260 struct amdgpu_vm_bo_base *parent; 1261 struct amdgpu_vm_pt *pt; 1262 1263 parent = list_first_entry(&bo->va, 1264 struct amdgpu_vm_bo_base, 1265 bo_list); 1266 pt = container_of(parent, struct amdgpu_vm_pt, base); 1267 1268 r = amdgpu_vm_update_level(adev, vm, pt); 1269 if (r) { 1270 amdgpu_vm_invalidate_level(vm, &vm->root); 1271 return r; 1272 } 1273 spin_lock(&vm->status_lock); 1274 } else { 1275 spin_lock(&vm->status_lock); 1276 list_del_init(&bo_base->vm_status); 1277 } 1278 } 1279 spin_unlock(&vm->status_lock); 1280 1281 if (vm->use_cpu_for_update) { 1282 /* Flush HDP */ 1283 mb(); 1284 amdgpu_gart_flush_gpu_tlb(adev, 0); 1285 } 1286 1287 return r; 1288 } 1289 1290 /** 1291 * amdgpu_vm_find_entry - find the entry for an address 1292 * 1293 * @p: see amdgpu_pte_update_params definition 1294 * @addr: virtual address in question 1295 * @entry: resulting entry or NULL 1296 * @parent: parent entry 1297 * 1298 * Find the vm_pt entry and it's parent for the given address. 1299 */ 1300 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, 1301 struct amdgpu_vm_pt **entry, 1302 struct amdgpu_vm_pt **parent) 1303 { 1304 unsigned idx, level = p->adev->vm_manager.num_level; 1305 1306 *parent = NULL; 1307 *entry = &p->vm->root; 1308 while ((*entry)->entries) { 1309 idx = addr >> (p->adev->vm_manager.block_size * level--); 1310 idx %= amdgpu_bo_size((*entry)->base.bo) / 8; 1311 *parent = *entry; 1312 *entry = &(*entry)->entries[idx]; 1313 } 1314 1315 if (level) 1316 *entry = NULL; 1317 } 1318 1319 /** 1320 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages 1321 * 1322 * @p: see amdgpu_pte_update_params definition 1323 * @entry: vm_pt entry to check 1324 * @parent: parent entry 1325 * @nptes: number of PTEs updated with this operation 1326 * @dst: destination address where the PTEs should point to 1327 * @flags: access flags fro the PTEs 1328 * 1329 * Check if we can update the PD with a huge page. 1330 */ 1331 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, 1332 struct amdgpu_vm_pt *entry, 1333 struct amdgpu_vm_pt *parent, 1334 unsigned nptes, uint64_t dst, 1335 uint64_t flags) 1336 { 1337 bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); 1338 uint64_t pd_addr, pde; 1339 1340 /* In the case of a mixed PT the PDE must point to it*/ 1341 if (p->adev->asic_type < CHIP_VEGA10 || 1342 nptes != AMDGPU_VM_PTE_COUNT(p->adev) || 1343 p->src || 1344 !(flags & AMDGPU_PTE_VALID)) { 1345 1346 dst = amdgpu_bo_gpu_offset(entry->base.bo); 1347 dst = amdgpu_gart_get_vm_pde(p->adev, dst); 1348 flags = AMDGPU_PTE_VALID; 1349 } else { 1350 /* Set the huge page flag to stop scanning at this PDE */ 1351 flags |= AMDGPU_PDE_PTE; 1352 } 1353 1354 if (entry->addr == (dst | flags)) 1355 return; 1356 1357 entry->addr = (dst | flags); 1358 1359 if (use_cpu_update) { 1360 /* In case a huge page is replaced with a system 1361 * memory mapping, p->pages_addr != NULL and 1362 * amdgpu_vm_cpu_set_ptes would try to translate dst 1363 * through amdgpu_vm_map_gart. But dst is already a 1364 * GPU address (of the page table). Disable 1365 * amdgpu_vm_map_gart temporarily. 1366 */ 1367 dma_addr_t *tmp; 1368 1369 tmp = p->pages_addr; 1370 p->pages_addr = NULL; 1371 1372 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 1373 pde = pd_addr + (entry - parent->entries) * 8; 1374 amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); 1375 1376 p->pages_addr = tmp; 1377 } else { 1378 if (parent->base.bo->shadow) { 1379 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); 1380 pde = pd_addr + (entry - parent->entries) * 8; 1381 amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); 1382 } 1383 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); 1384 pde = pd_addr + (entry - parent->entries) * 8; 1385 amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); 1386 } 1387 } 1388 1389 /** 1390 * amdgpu_vm_update_ptes - make sure that page tables are valid 1391 * 1392 * @params: see amdgpu_pte_update_params definition 1393 * @vm: requested vm 1394 * @start: start of GPU address range 1395 * @end: end of GPU address range 1396 * @dst: destination address to map to, the next dst inside the function 1397 * @flags: mapping flags 1398 * 1399 * Update the page tables in the range @start - @end. 1400 * Returns 0 for success, -EINVAL for failure. 1401 */ 1402 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, 1403 uint64_t start, uint64_t end, 1404 uint64_t dst, uint64_t flags) 1405 { 1406 struct amdgpu_device *adev = params->adev; 1407 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; 1408 1409 uint64_t addr, pe_start; 1410 struct amdgpu_bo *pt; 1411 unsigned nptes; 1412 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); 1413 1414 /* walk over the address space and update the page tables */ 1415 for (addr = start; addr < end; addr += nptes, 1416 dst += nptes * AMDGPU_GPU_PAGE_SIZE) { 1417 struct amdgpu_vm_pt *entry, *parent; 1418 1419 amdgpu_vm_get_entry(params, addr, &entry, &parent); 1420 if (!entry) 1421 return -ENOENT; 1422 1423 if ((addr & ~mask) == (end & ~mask)) 1424 nptes = end - addr; 1425 else 1426 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); 1427 1428 amdgpu_vm_handle_huge_pages(params, entry, parent, 1429 nptes, dst, flags); 1430 /* We don't need to update PTEs for huge pages */ 1431 if (entry->addr & AMDGPU_PDE_PTE) 1432 continue; 1433 1434 pt = entry->base.bo; 1435 if (use_cpu_update) { 1436 pe_start = (unsigned long)amdgpu_bo_kptr(pt); 1437 } else { 1438 if (pt->shadow) { 1439 pe_start = amdgpu_bo_gpu_offset(pt->shadow); 1440 pe_start += (addr & mask) * 8; 1441 params->func(params, pe_start, dst, nptes, 1442 AMDGPU_GPU_PAGE_SIZE, flags); 1443 } 1444 pe_start = amdgpu_bo_gpu_offset(pt); 1445 } 1446 1447 pe_start += (addr & mask) * 8; 1448 params->func(params, pe_start, dst, nptes, 1449 AMDGPU_GPU_PAGE_SIZE, flags); 1450 } 1451 1452 return 0; 1453 } 1454 1455 /* 1456 * amdgpu_vm_frag_ptes - add fragment information to PTEs 1457 * 1458 * @params: see amdgpu_pte_update_params definition 1459 * @vm: requested vm 1460 * @start: first PTE to handle 1461 * @end: last PTE to handle 1462 * @dst: addr those PTEs should point to 1463 * @flags: hw mapping flags 1464 * Returns 0 for success, -EINVAL for failure. 1465 */ 1466 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, 1467 uint64_t start, uint64_t end, 1468 uint64_t dst, uint64_t flags) 1469 { 1470 /** 1471 * The MC L1 TLB supports variable sized pages, based on a fragment 1472 * field in the PTE. When this field is set to a non-zero value, page 1473 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 1474 * flags are considered valid for all PTEs within the fragment range 1475 * and corresponding mappings are assumed to be physically contiguous. 1476 * 1477 * The L1 TLB can store a single PTE for the whole fragment, 1478 * significantly increasing the space available for translation 1479 * caching. This leads to large improvements in throughput when the 1480 * TLB is under pressure. 1481 * 1482 * The L2 TLB distributes small and large fragments into two 1483 * asymmetric partitions. The large fragment cache is significantly 1484 * larger. Thus, we try to use large fragments wherever possible. 1485 * Userspace can support this by aligning virtual base address and 1486 * allocation size to the fragment size. 1487 */ 1488 unsigned max_frag = params->adev->vm_manager.fragment_size; 1489 int r; 1490 1491 /* system pages are non continuously */ 1492 if (params->src || !(flags & AMDGPU_PTE_VALID)) 1493 return amdgpu_vm_update_ptes(params, start, end, dst, flags); 1494 1495 while (start != end) { 1496 uint64_t frag_flags, frag_end; 1497 unsigned frag; 1498 1499 /* This intentionally wraps around if no bit is set */ 1500 frag = min((unsigned)ffs(start) - 1, 1501 (unsigned)fls64(end - start) - 1); 1502 if (frag >= max_frag) { 1503 frag_flags = AMDGPU_PTE_FRAG(max_frag); 1504 frag_end = end & ~((1ULL << max_frag) - 1); 1505 } else { 1506 frag_flags = AMDGPU_PTE_FRAG(frag); 1507 frag_end = start + (1 << frag); 1508 } 1509 1510 r = amdgpu_vm_update_ptes(params, start, frag_end, dst, 1511 flags | frag_flags); 1512 if (r) 1513 return r; 1514 1515 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; 1516 start = frag_end; 1517 } 1518 1519 return 0; 1520 } 1521 1522 /** 1523 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 1524 * 1525 * @adev: amdgpu_device pointer 1526 * @exclusive: fence we need to sync to 1527 * @pages_addr: DMA addresses to use for mapping 1528 * @vm: requested vm 1529 * @start: start of mapped range 1530 * @last: last mapped entry 1531 * @flags: flags for the entries 1532 * @addr: addr to set the area to 1533 * @fence: optional resulting fence 1534 * 1535 * Fill in the page table entries between @start and @last. 1536 * Returns 0 for success, -EINVAL for failure. 1537 */ 1538 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 1539 struct dma_fence *exclusive, 1540 dma_addr_t *pages_addr, 1541 struct amdgpu_vm *vm, 1542 uint64_t start, uint64_t last, 1543 uint64_t flags, uint64_t addr, 1544 struct dma_fence **fence) 1545 { 1546 struct amdgpu_ring *ring; 1547 void *owner = AMDGPU_FENCE_OWNER_VM; 1548 unsigned nptes, ncmds, ndw; 1549 struct amdgpu_job *job; 1550 struct amdgpu_pte_update_params params; 1551 struct dma_fence *f = NULL; 1552 int r; 1553 1554 memset(¶ms, 0, sizeof(params)); 1555 params.adev = adev; 1556 params.vm = vm; 1557 1558 /* sync to everything on unmapping */ 1559 if (!(flags & AMDGPU_PTE_VALID)) 1560 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 1561 1562 if (vm->use_cpu_for_update) { 1563 /* params.src is used as flag to indicate system Memory */ 1564 if (pages_addr) 1565 params.src = ~0; 1566 1567 /* Wait for PT BOs to be free. PTs share the same resv. object 1568 * as the root PD BO 1569 */ 1570 r = amdgpu_vm_wait_pd(adev, vm, owner); 1571 if (unlikely(r)) 1572 return r; 1573 1574 params.func = amdgpu_vm_cpu_set_ptes; 1575 params.pages_addr = pages_addr; 1576 return amdgpu_vm_frag_ptes(¶ms, start, last + 1, 1577 addr, flags); 1578 } 1579 1580 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 1581 1582 nptes = last - start + 1; 1583 1584 /* 1585 * reserve space for two commands every (1 << BLOCK_SIZE) 1586 * entries or 2k dwords (whatever is smaller) 1587 * 1588 * The second command is for the shadow pagetables. 1589 */ 1590 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; 1591 1592 /* padding, etc. */ 1593 ndw = 64; 1594 1595 /* one PDE write for each huge page */ 1596 ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; 1597 1598 if (pages_addr) { 1599 /* copy commands needed */ 1600 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; 1601 1602 /* and also PTEs */ 1603 ndw += nptes * 2; 1604 1605 params.func = amdgpu_vm_do_copy_ptes; 1606 1607 } else { 1608 /* set page commands needed */ 1609 ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; 1610 1611 /* extra commands for begin/end fragments */ 1612 ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw 1613 * adev->vm_manager.fragment_size; 1614 1615 params.func = amdgpu_vm_do_set_ptes; 1616 } 1617 1618 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1619 if (r) 1620 return r; 1621 1622 params.ib = &job->ibs[0]; 1623 1624 if (pages_addr) { 1625 uint64_t *pte; 1626 unsigned i; 1627 1628 /* Put the PTEs at the end of the IB. */ 1629 i = ndw - nptes * 2; 1630 pte= (uint64_t *)&(job->ibs->ptr[i]); 1631 params.src = job->ibs->gpu_addr + i * 4; 1632 1633 for (i = 0; i < nptes; ++i) { 1634 pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * 1635 AMDGPU_GPU_PAGE_SIZE); 1636 pte[i] |= flags; 1637 } 1638 addr = 0; 1639 } 1640 1641 r = amdgpu_sync_fence(adev, &job->sync, exclusive); 1642 if (r) 1643 goto error_free; 1644 1645 r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, 1646 owner); 1647 if (r) 1648 goto error_free; 1649 1650 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); 1651 if (r) 1652 goto error_free; 1653 1654 r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); 1655 if (r) 1656 goto error_free; 1657 1658 amdgpu_ring_pad_ib(ring, params.ib); 1659 WARN_ON(params.ib->length_dw > ndw); 1660 r = amdgpu_job_submit(job, ring, &vm->entity, 1661 AMDGPU_FENCE_OWNER_VM, &f); 1662 if (r) 1663 goto error_free; 1664 1665 amdgpu_bo_fence(vm->root.base.bo, f, true); 1666 dma_fence_put(*fence); 1667 *fence = f; 1668 return 0; 1669 1670 error_free: 1671 amdgpu_job_free(job); 1672 amdgpu_vm_invalidate_level(vm, &vm->root); 1673 return r; 1674 } 1675 1676 /** 1677 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 1678 * 1679 * @adev: amdgpu_device pointer 1680 * @exclusive: fence we need to sync to 1681 * @pages_addr: DMA addresses to use for mapping 1682 * @vm: requested vm 1683 * @mapping: mapped range and flags to use for the update 1684 * @flags: HW flags for the mapping 1685 * @nodes: array of drm_mm_nodes with the MC addresses 1686 * @fence: optional resulting fence 1687 * 1688 * Split the mapping into smaller chunks so that each update fits 1689 * into a SDMA IB. 1690 * Returns 0 for success, -EINVAL for failure. 1691 */ 1692 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 1693 struct dma_fence *exclusive, 1694 dma_addr_t *pages_addr, 1695 struct amdgpu_vm *vm, 1696 struct amdgpu_bo_va_mapping *mapping, 1697 uint64_t flags, 1698 struct drm_mm_node *nodes, 1699 struct dma_fence **fence) 1700 { 1701 uint64_t pfn, start = mapping->start; 1702 int r; 1703 1704 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1705 * but in case of something, we filter the flags in first place 1706 */ 1707 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 1708 flags &= ~AMDGPU_PTE_READABLE; 1709 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 1710 flags &= ~AMDGPU_PTE_WRITEABLE; 1711 1712 flags &= ~AMDGPU_PTE_EXECUTABLE; 1713 flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; 1714 1715 flags &= ~AMDGPU_PTE_MTYPE_MASK; 1716 flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); 1717 1718 if ((mapping->flags & AMDGPU_PTE_PRT) && 1719 (adev->asic_type >= CHIP_VEGA10)) { 1720 flags |= AMDGPU_PTE_PRT; 1721 flags &= ~AMDGPU_PTE_VALID; 1722 } 1723 1724 trace_amdgpu_vm_bo_update(mapping); 1725 1726 pfn = mapping->offset >> PAGE_SHIFT; 1727 if (nodes) { 1728 while (pfn >= nodes->size) { 1729 pfn -= nodes->size; 1730 ++nodes; 1731 } 1732 } 1733 1734 do { 1735 uint64_t max_entries; 1736 uint64_t addr, last; 1737 1738 if (nodes) { 1739 addr = nodes->start << PAGE_SHIFT; 1740 max_entries = (nodes->size - pfn) * 1741 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); 1742 } else { 1743 addr = 0; 1744 max_entries = S64_MAX; 1745 } 1746 1747 if (pages_addr) { 1748 max_entries = min(max_entries, 16ull * 1024ull); 1749 addr = 0; 1750 } else if (flags & AMDGPU_PTE_VALID) { 1751 addr += adev->vm_manager.vram_base_offset; 1752 } 1753 addr += pfn << PAGE_SHIFT; 1754 1755 last = min((uint64_t)mapping->last, start + max_entries - 1); 1756 r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, 1757 start, last, flags, addr, 1758 fence); 1759 if (r) 1760 return r; 1761 1762 pfn += last - start + 1; 1763 if (nodes && nodes->size == pfn) { 1764 pfn = 0; 1765 ++nodes; 1766 } 1767 start = last + 1; 1768 1769 } while (unlikely(start != mapping->last + 1)); 1770 1771 return 0; 1772 } 1773 1774 /** 1775 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 1776 * 1777 * @adev: amdgpu_device pointer 1778 * @bo_va: requested BO and VM object 1779 * @clear: if true clear the entries 1780 * 1781 * Fill in the page table entries for @bo_va. 1782 * Returns 0 for success, -EINVAL for failure. 1783 */ 1784 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 1785 struct amdgpu_bo_va *bo_va, 1786 bool clear) 1787 { 1788 struct amdgpu_bo *bo = bo_va->base.bo; 1789 struct amdgpu_vm *vm = bo_va->base.vm; 1790 struct amdgpu_bo_va_mapping *mapping; 1791 dma_addr_t *pages_addr = NULL; 1792 struct ttm_mem_reg *mem; 1793 struct drm_mm_node *nodes; 1794 struct dma_fence *exclusive, **last_update; 1795 uint64_t flags; 1796 int r; 1797 1798 if (clear || !bo_va->base.bo) { 1799 mem = NULL; 1800 nodes = NULL; 1801 exclusive = NULL; 1802 } else { 1803 struct ttm_dma_tt *ttm; 1804 1805 mem = &bo_va->base.bo->tbo.mem; 1806 nodes = mem->mm_node; 1807 if (mem->mem_type == TTM_PL_TT) { 1808 ttm = container_of(bo_va->base.bo->tbo.ttm, 1809 struct ttm_dma_tt, ttm); 1810 pages_addr = ttm->dma_address; 1811 } 1812 exclusive = reservation_object_get_excl(bo->tbo.resv); 1813 } 1814 1815 if (bo) 1816 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); 1817 else 1818 flags = 0x0; 1819 1820 if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) 1821 last_update = &vm->last_update; 1822 else 1823 last_update = &bo_va->last_pt_update; 1824 1825 if (!clear && bo_va->base.moved) { 1826 bo_va->base.moved = false; 1827 list_splice_init(&bo_va->valids, &bo_va->invalids); 1828 1829 } else if (bo_va->cleared != clear) { 1830 list_splice_init(&bo_va->valids, &bo_va->invalids); 1831 } 1832 1833 list_for_each_entry(mapping, &bo_va->invalids, list) { 1834 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, 1835 mapping, flags, nodes, 1836 last_update); 1837 if (r) 1838 return r; 1839 } 1840 1841 if (vm->use_cpu_for_update) { 1842 /* Flush HDP */ 1843 mb(); 1844 amdgpu_gart_flush_gpu_tlb(adev, 0); 1845 } 1846 1847 spin_lock(&vm->status_lock); 1848 list_del_init(&bo_va->base.vm_status); 1849 spin_unlock(&vm->status_lock); 1850 1851 list_splice_init(&bo_va->invalids, &bo_va->valids); 1852 bo_va->cleared = clear; 1853 1854 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1855 list_for_each_entry(mapping, &bo_va->valids, list) 1856 trace_amdgpu_vm_bo_mapping(mapping); 1857 } 1858 1859 return 0; 1860 } 1861 1862 /** 1863 * amdgpu_vm_update_prt_state - update the global PRT state 1864 */ 1865 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) 1866 { 1867 unsigned long flags; 1868 bool enable; 1869 1870 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1871 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1872 adev->gart.gart_funcs->set_prt(adev, enable); 1873 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1874 } 1875 1876 /** 1877 * amdgpu_vm_prt_get - add a PRT user 1878 */ 1879 static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1880 { 1881 if (!adev->gart.gart_funcs->set_prt) 1882 return; 1883 1884 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1885 amdgpu_vm_update_prt_state(adev); 1886 } 1887 1888 /** 1889 * amdgpu_vm_prt_put - drop a PRT user 1890 */ 1891 static void amdgpu_vm_prt_put(struct amdgpu_device *adev) 1892 { 1893 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) 1894 amdgpu_vm_update_prt_state(adev); 1895 } 1896 1897 /** 1898 * amdgpu_vm_prt_cb - callback for updating the PRT status 1899 */ 1900 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) 1901 { 1902 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); 1903 1904 amdgpu_vm_prt_put(cb->adev); 1905 kfree(cb); 1906 } 1907 1908 /** 1909 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status 1910 */ 1911 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, 1912 struct dma_fence *fence) 1913 { 1914 struct amdgpu_prt_cb *cb; 1915 1916 if (!adev->gart.gart_funcs->set_prt) 1917 return; 1918 1919 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); 1920 if (!cb) { 1921 /* Last resort when we are OOM */ 1922 if (fence) 1923 dma_fence_wait(fence, false); 1924 1925 amdgpu_vm_prt_put(adev); 1926 } else { 1927 cb->adev = adev; 1928 if (!fence || dma_fence_add_callback(fence, &cb->cb, 1929 amdgpu_vm_prt_cb)) 1930 amdgpu_vm_prt_cb(fence, &cb->cb); 1931 } 1932 } 1933 1934 /** 1935 * amdgpu_vm_free_mapping - free a mapping 1936 * 1937 * @adev: amdgpu_device pointer 1938 * @vm: requested vm 1939 * @mapping: mapping to be freed 1940 * @fence: fence of the unmap operation 1941 * 1942 * Free a mapping and make sure we decrease the PRT usage count if applicable. 1943 */ 1944 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, 1945 struct amdgpu_vm *vm, 1946 struct amdgpu_bo_va_mapping *mapping, 1947 struct dma_fence *fence) 1948 { 1949 if (mapping->flags & AMDGPU_PTE_PRT) 1950 amdgpu_vm_add_prt_cb(adev, fence); 1951 kfree(mapping); 1952 } 1953 1954 /** 1955 * amdgpu_vm_prt_fini - finish all prt mappings 1956 * 1957 * @adev: amdgpu_device pointer 1958 * @vm: requested vm 1959 * 1960 * Register a cleanup callback to disable PRT support after VM dies. 1961 */ 1962 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1963 { 1964 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 1965 struct dma_fence *excl, **shared; 1966 unsigned i, shared_count; 1967 int r; 1968 1969 r = reservation_object_get_fences_rcu(resv, &excl, 1970 &shared_count, &shared); 1971 if (r) { 1972 /* Not enough memory to grab the fence list, as last resort 1973 * block for all the fences to complete. 1974 */ 1975 reservation_object_wait_timeout_rcu(resv, true, false, 1976 MAX_SCHEDULE_TIMEOUT); 1977 return; 1978 } 1979 1980 /* Add a callback for each fence in the reservation object */ 1981 amdgpu_vm_prt_get(adev); 1982 amdgpu_vm_add_prt_cb(adev, excl); 1983 1984 for (i = 0; i < shared_count; ++i) { 1985 amdgpu_vm_prt_get(adev); 1986 amdgpu_vm_add_prt_cb(adev, shared[i]); 1987 } 1988 1989 kfree(shared); 1990 } 1991 1992 /** 1993 * amdgpu_vm_clear_freed - clear freed BOs in the PT 1994 * 1995 * @adev: amdgpu_device pointer 1996 * @vm: requested vm 1997 * @fence: optional resulting fence (unchanged if no work needed to be done 1998 * or if an error occurred) 1999 * 2000 * Make sure all freed BOs are cleared in the PT. 2001 * Returns 0 for success. 2002 * 2003 * PTs have to be reserved and mutex must be locked! 2004 */ 2005 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 2006 struct amdgpu_vm *vm, 2007 struct dma_fence **fence) 2008 { 2009 struct amdgpu_bo_va_mapping *mapping; 2010 struct dma_fence *f = NULL; 2011 int r; 2012 uint64_t init_pte_value = 0; 2013 2014 while (!list_empty(&vm->freed)) { 2015 mapping = list_first_entry(&vm->freed, 2016 struct amdgpu_bo_va_mapping, list); 2017 list_del(&mapping->list); 2018 2019 if (vm->pte_support_ats) 2020 init_pte_value = AMDGPU_PTE_SYSTEM; 2021 2022 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, 2023 mapping->start, mapping->last, 2024 init_pte_value, 0, &f); 2025 amdgpu_vm_free_mapping(adev, vm, mapping, f); 2026 if (r) { 2027 dma_fence_put(f); 2028 return r; 2029 } 2030 } 2031 2032 if (fence && f) { 2033 dma_fence_put(*fence); 2034 *fence = f; 2035 } else { 2036 dma_fence_put(f); 2037 } 2038 2039 return 0; 2040 2041 } 2042 2043 /** 2044 * amdgpu_vm_handle_moved - handle moved BOs in the PT 2045 * 2046 * @adev: amdgpu_device pointer 2047 * @vm: requested vm 2048 * @sync: sync object to add fences to 2049 * 2050 * Make sure all BOs which are moved are updated in the PTs. 2051 * Returns 0 for success. 2052 * 2053 * PTs have to be reserved! 2054 */ 2055 int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 2056 struct amdgpu_vm *vm) 2057 { 2058 bool clear; 2059 int r = 0; 2060 2061 spin_lock(&vm->status_lock); 2062 while (!list_empty(&vm->moved)) { 2063 struct amdgpu_bo_va *bo_va; 2064 2065 bo_va = list_first_entry(&vm->moved, 2066 struct amdgpu_bo_va, base.vm_status); 2067 spin_unlock(&vm->status_lock); 2068 2069 /* Per VM BOs never need to bo cleared in the page tables */ 2070 clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; 2071 2072 r = amdgpu_vm_bo_update(adev, bo_va, clear); 2073 if (r) 2074 return r; 2075 2076 spin_lock(&vm->status_lock); 2077 } 2078 spin_unlock(&vm->status_lock); 2079 2080 return r; 2081 } 2082 2083 /** 2084 * amdgpu_vm_bo_add - add a bo to a specific vm 2085 * 2086 * @adev: amdgpu_device pointer 2087 * @vm: requested vm 2088 * @bo: amdgpu buffer object 2089 * 2090 * Add @bo into the requested vm. 2091 * Add @bo to the list of bos associated with the vm 2092 * Returns newly added bo_va or NULL for failure 2093 * 2094 * Object has to be reserved! 2095 */ 2096 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 2097 struct amdgpu_vm *vm, 2098 struct amdgpu_bo *bo) 2099 { 2100 struct amdgpu_bo_va *bo_va; 2101 2102 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 2103 if (bo_va == NULL) { 2104 return NULL; 2105 } 2106 bo_va->base.vm = vm; 2107 bo_va->base.bo = bo; 2108 INIT_LIST_HEAD(&bo_va->base.bo_list); 2109 INIT_LIST_HEAD(&bo_va->base.vm_status); 2110 2111 bo_va->ref_count = 1; 2112 INIT_LIST_HEAD(&bo_va->valids); 2113 INIT_LIST_HEAD(&bo_va->invalids); 2114 2115 if (bo) 2116 list_add_tail(&bo_va->base.bo_list, &bo->va); 2117 2118 return bo_va; 2119 } 2120 2121 2122 /** 2123 * amdgpu_vm_bo_insert_mapping - insert a new mapping 2124 * 2125 * @adev: amdgpu_device pointer 2126 * @bo_va: bo_va to store the address 2127 * @mapping: the mapping to insert 2128 * 2129 * Insert a new mapping into all structures. 2130 */ 2131 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, 2132 struct amdgpu_bo_va *bo_va, 2133 struct amdgpu_bo_va_mapping *mapping) 2134 { 2135 struct amdgpu_vm *vm = bo_va->base.vm; 2136 struct amdgpu_bo *bo = bo_va->base.bo; 2137 2138 mapping->bo_va = bo_va; 2139 list_add(&mapping->list, &bo_va->invalids); 2140 amdgpu_vm_it_insert(mapping, &vm->va); 2141 2142 if (mapping->flags & AMDGPU_PTE_PRT) 2143 amdgpu_vm_prt_get(adev); 2144 2145 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 2146 spin_lock(&vm->status_lock); 2147 if (list_empty(&bo_va->base.vm_status)) 2148 list_add(&bo_va->base.vm_status, &vm->moved); 2149 spin_unlock(&vm->status_lock); 2150 } 2151 trace_amdgpu_vm_bo_map(bo_va, mapping); 2152 } 2153 2154 /** 2155 * amdgpu_vm_bo_map - map bo inside a vm 2156 * 2157 * @adev: amdgpu_device pointer 2158 * @bo_va: bo_va to store the address 2159 * @saddr: where to map the BO 2160 * @offset: requested offset in the BO 2161 * @flags: attributes of pages (read/write/valid/etc.) 2162 * 2163 * Add a mapping of the BO at the specefied addr into the VM. 2164 * Returns 0 for success, error for failure. 2165 * 2166 * Object has to be reserved and unreserved outside! 2167 */ 2168 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 2169 struct amdgpu_bo_va *bo_va, 2170 uint64_t saddr, uint64_t offset, 2171 uint64_t size, uint64_t flags) 2172 { 2173 struct amdgpu_bo_va_mapping *mapping, *tmp; 2174 struct amdgpu_bo *bo = bo_va->base.bo; 2175 struct amdgpu_vm *vm = bo_va->base.vm; 2176 uint64_t eaddr; 2177 2178 /* validate the parameters */ 2179 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 2180 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 2181 return -EINVAL; 2182 2183 /* make sure object fit at this offset */ 2184 eaddr = saddr + size - 1; 2185 if (saddr >= eaddr || 2186 (bo && offset + size > amdgpu_bo_size(bo))) 2187 return -EINVAL; 2188 2189 saddr /= AMDGPU_GPU_PAGE_SIZE; 2190 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2191 2192 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2193 if (tmp) { 2194 /* bo and tmp overlap, invalid addr */ 2195 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 2196 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, 2197 tmp->start, tmp->last + 1); 2198 return -EINVAL; 2199 } 2200 2201 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 2202 if (!mapping) 2203 return -ENOMEM; 2204 2205 mapping->start = saddr; 2206 mapping->last = eaddr; 2207 mapping->offset = offset; 2208 mapping->flags = flags; 2209 2210 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2211 2212 return 0; 2213 } 2214 2215 /** 2216 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings 2217 * 2218 * @adev: amdgpu_device pointer 2219 * @bo_va: bo_va to store the address 2220 * @saddr: where to map the BO 2221 * @offset: requested offset in the BO 2222 * @flags: attributes of pages (read/write/valid/etc.) 2223 * 2224 * Add a mapping of the BO at the specefied addr into the VM. Replace existing 2225 * mappings as we do so. 2226 * Returns 0 for success, error for failure. 2227 * 2228 * Object has to be reserved and unreserved outside! 2229 */ 2230 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, 2231 struct amdgpu_bo_va *bo_va, 2232 uint64_t saddr, uint64_t offset, 2233 uint64_t size, uint64_t flags) 2234 { 2235 struct amdgpu_bo_va_mapping *mapping; 2236 struct amdgpu_bo *bo = bo_va->base.bo; 2237 uint64_t eaddr; 2238 int r; 2239 2240 /* validate the parameters */ 2241 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 2242 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 2243 return -EINVAL; 2244 2245 /* make sure object fit at this offset */ 2246 eaddr = saddr + size - 1; 2247 if (saddr >= eaddr || 2248 (bo && offset + size > amdgpu_bo_size(bo))) 2249 return -EINVAL; 2250 2251 /* Allocate all the needed memory */ 2252 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 2253 if (!mapping) 2254 return -ENOMEM; 2255 2256 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); 2257 if (r) { 2258 kfree(mapping); 2259 return r; 2260 } 2261 2262 saddr /= AMDGPU_GPU_PAGE_SIZE; 2263 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2264 2265 mapping->start = saddr; 2266 mapping->last = eaddr; 2267 mapping->offset = offset; 2268 mapping->flags = flags; 2269 2270 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2271 2272 return 0; 2273 } 2274 2275 /** 2276 * amdgpu_vm_bo_unmap - remove bo mapping from vm 2277 * 2278 * @adev: amdgpu_device pointer 2279 * @bo_va: bo_va to remove the address from 2280 * @saddr: where to the BO is mapped 2281 * 2282 * Remove a mapping of the BO at the specefied addr from the VM. 2283 * Returns 0 for success, error for failure. 2284 * 2285 * Object has to be reserved and unreserved outside! 2286 */ 2287 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 2288 struct amdgpu_bo_va *bo_va, 2289 uint64_t saddr) 2290 { 2291 struct amdgpu_bo_va_mapping *mapping; 2292 struct amdgpu_vm *vm = bo_va->base.vm; 2293 bool valid = true; 2294 2295 saddr /= AMDGPU_GPU_PAGE_SIZE; 2296 2297 list_for_each_entry(mapping, &bo_va->valids, list) { 2298 if (mapping->start == saddr) 2299 break; 2300 } 2301 2302 if (&mapping->list == &bo_va->valids) { 2303 valid = false; 2304 2305 list_for_each_entry(mapping, &bo_va->invalids, list) { 2306 if (mapping->start == saddr) 2307 break; 2308 } 2309 2310 if (&mapping->list == &bo_va->invalids) 2311 return -ENOENT; 2312 } 2313 2314 list_del(&mapping->list); 2315 amdgpu_vm_it_remove(mapping, &vm->va); 2316 mapping->bo_va = NULL; 2317 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2318 2319 if (valid) 2320 list_add(&mapping->list, &vm->freed); 2321 else 2322 amdgpu_vm_free_mapping(adev, vm, mapping, 2323 bo_va->last_pt_update); 2324 2325 return 0; 2326 } 2327 2328 /** 2329 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range 2330 * 2331 * @adev: amdgpu_device pointer 2332 * @vm: VM structure to use 2333 * @saddr: start of the range 2334 * @size: size of the range 2335 * 2336 * Remove all mappings in a range, split them as appropriate. 2337 * Returns 0 for success, error for failure. 2338 */ 2339 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 2340 struct amdgpu_vm *vm, 2341 uint64_t saddr, uint64_t size) 2342 { 2343 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; 2344 LIST_HEAD(removed); 2345 uint64_t eaddr; 2346 2347 eaddr = saddr + size - 1; 2348 saddr /= AMDGPU_GPU_PAGE_SIZE; 2349 eaddr /= AMDGPU_GPU_PAGE_SIZE; 2350 2351 /* Allocate all the needed memory */ 2352 before = kzalloc(sizeof(*before), GFP_KERNEL); 2353 if (!before) 2354 return -ENOMEM; 2355 INIT_LIST_HEAD(&before->list); 2356 2357 after = kzalloc(sizeof(*after), GFP_KERNEL); 2358 if (!after) { 2359 kfree(before); 2360 return -ENOMEM; 2361 } 2362 INIT_LIST_HEAD(&after->list); 2363 2364 /* Now gather all removed mappings */ 2365 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2366 while (tmp) { 2367 /* Remember mapping split at the start */ 2368 if (tmp->start < saddr) { 2369 before->start = tmp->start; 2370 before->last = saddr - 1; 2371 before->offset = tmp->offset; 2372 before->flags = tmp->flags; 2373 list_add(&before->list, &tmp->list); 2374 } 2375 2376 /* Remember mapping split at the end */ 2377 if (tmp->last > eaddr) { 2378 after->start = eaddr + 1; 2379 after->last = tmp->last; 2380 after->offset = tmp->offset; 2381 after->offset += after->start - tmp->start; 2382 after->flags = tmp->flags; 2383 list_add(&after->list, &tmp->list); 2384 } 2385 2386 list_del(&tmp->list); 2387 list_add(&tmp->list, &removed); 2388 2389 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); 2390 } 2391 2392 /* And free them up */ 2393 list_for_each_entry_safe(tmp, next, &removed, list) { 2394 amdgpu_vm_it_remove(tmp, &vm->va); 2395 list_del(&tmp->list); 2396 2397 if (tmp->start < saddr) 2398 tmp->start = saddr; 2399 if (tmp->last > eaddr) 2400 tmp->last = eaddr; 2401 2402 tmp->bo_va = NULL; 2403 list_add(&tmp->list, &vm->freed); 2404 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2405 } 2406 2407 /* Insert partial mapping before the range */ 2408 if (!list_empty(&before->list)) { 2409 amdgpu_vm_it_insert(before, &vm->va); 2410 if (before->flags & AMDGPU_PTE_PRT) 2411 amdgpu_vm_prt_get(adev); 2412 } else { 2413 kfree(before); 2414 } 2415 2416 /* Insert partial mapping after the range */ 2417 if (!list_empty(&after->list)) { 2418 amdgpu_vm_it_insert(after, &vm->va); 2419 if (after->flags & AMDGPU_PTE_PRT) 2420 amdgpu_vm_prt_get(adev); 2421 } else { 2422 kfree(after); 2423 } 2424 2425 return 0; 2426 } 2427 2428 /** 2429 * amdgpu_vm_bo_lookup_mapping - find mapping by address 2430 * 2431 * @vm: the requested VM 2432 * 2433 * Find a mapping by it's address. 2434 */ 2435 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, 2436 uint64_t addr) 2437 { 2438 return amdgpu_vm_it_iter_first(&vm->va, addr, addr); 2439 } 2440 2441 /** 2442 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 2443 * 2444 * @adev: amdgpu_device pointer 2445 * @bo_va: requested bo_va 2446 * 2447 * Remove @bo_va->bo from the requested vm. 2448 * 2449 * Object have to be reserved! 2450 */ 2451 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2452 struct amdgpu_bo_va *bo_va) 2453 { 2454 struct amdgpu_bo_va_mapping *mapping, *next; 2455 struct amdgpu_vm *vm = bo_va->base.vm; 2456 2457 list_del(&bo_va->base.bo_list); 2458 2459 spin_lock(&vm->status_lock); 2460 list_del(&bo_va->base.vm_status); 2461 spin_unlock(&vm->status_lock); 2462 2463 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2464 list_del(&mapping->list); 2465 amdgpu_vm_it_remove(mapping, &vm->va); 2466 mapping->bo_va = NULL; 2467 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2468 list_add(&mapping->list, &vm->freed); 2469 } 2470 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 2471 list_del(&mapping->list); 2472 amdgpu_vm_it_remove(mapping, &vm->va); 2473 amdgpu_vm_free_mapping(adev, vm, mapping, 2474 bo_va->last_pt_update); 2475 } 2476 2477 dma_fence_put(bo_va->last_pt_update); 2478 kfree(bo_va); 2479 } 2480 2481 /** 2482 * amdgpu_vm_bo_invalidate - mark the bo as invalid 2483 * 2484 * @adev: amdgpu_device pointer 2485 * @vm: requested vm 2486 * @bo: amdgpu buffer object 2487 * 2488 * Mark @bo as invalid. 2489 */ 2490 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 2491 struct amdgpu_bo *bo, bool evicted) 2492 { 2493 struct amdgpu_vm_bo_base *bo_base; 2494 2495 list_for_each_entry(bo_base, &bo->va, bo_list) { 2496 struct amdgpu_vm *vm = bo_base->vm; 2497 2498 bo_base->moved = true; 2499 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 2500 spin_lock(&bo_base->vm->status_lock); 2501 if (bo->tbo.type == ttm_bo_type_kernel) 2502 list_move(&bo_base->vm_status, &vm->evicted); 2503 else 2504 list_move_tail(&bo_base->vm_status, 2505 &vm->evicted); 2506 spin_unlock(&bo_base->vm->status_lock); 2507 continue; 2508 } 2509 2510 if (bo->tbo.type == ttm_bo_type_kernel) { 2511 spin_lock(&bo_base->vm->status_lock); 2512 if (list_empty(&bo_base->vm_status)) 2513 list_add(&bo_base->vm_status, &vm->relocated); 2514 spin_unlock(&bo_base->vm->status_lock); 2515 continue; 2516 } 2517 2518 spin_lock(&bo_base->vm->status_lock); 2519 if (list_empty(&bo_base->vm_status)) 2520 list_add(&bo_base->vm_status, &vm->moved); 2521 spin_unlock(&bo_base->vm->status_lock); 2522 } 2523 } 2524 2525 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) 2526 { 2527 /* Total bits covered by PD + PTs */ 2528 unsigned bits = ilog2(vm_size) + 18; 2529 2530 /* Make sure the PD is 4K in size up to 8GB address space. 2531 Above that split equal between PD and PTs */ 2532 if (vm_size <= 8) 2533 return (bits - 9); 2534 else 2535 return ((bits + 3) / 2); 2536 } 2537 2538 /** 2539 * amdgpu_vm_set_fragment_size - adjust fragment size in PTE 2540 * 2541 * @adev: amdgpu_device pointer 2542 * @fragment_size_default: the default fragment size if it's set auto 2543 */ 2544 void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) 2545 { 2546 if (amdgpu_vm_fragment_size == -1) 2547 adev->vm_manager.fragment_size = fragment_size_default; 2548 else 2549 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; 2550 } 2551 2552 /** 2553 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2554 * 2555 * @adev: amdgpu_device pointer 2556 * @vm_size: the default vm size if it's set auto 2557 */ 2558 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default) 2559 { 2560 /* adjust vm size firstly */ 2561 if (amdgpu_vm_size == -1) 2562 adev->vm_manager.vm_size = vm_size; 2563 else 2564 adev->vm_manager.vm_size = amdgpu_vm_size; 2565 2566 /* block size depends on vm size */ 2567 if (amdgpu_vm_block_size == -1) 2568 adev->vm_manager.block_size = 2569 amdgpu_vm_get_block_size(adev->vm_manager.vm_size); 2570 else 2571 adev->vm_manager.block_size = amdgpu_vm_block_size; 2572 2573 amdgpu_vm_set_fragment_size(adev, fragment_size_default); 2574 2575 DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", 2576 adev->vm_manager.vm_size, adev->vm_manager.block_size, 2577 adev->vm_manager.fragment_size); 2578 } 2579 2580 /** 2581 * amdgpu_vm_init - initialize a vm instance 2582 * 2583 * @adev: amdgpu_device pointer 2584 * @vm: requested vm 2585 * @vm_context: Indicates if it GFX or Compute context 2586 * 2587 * Init @vm fields. 2588 */ 2589 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2590 int vm_context, unsigned int pasid) 2591 { 2592 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2593 AMDGPU_VM_PTE_COUNT(adev) * 8); 2594 unsigned ring_instance; 2595 struct amdgpu_ring *ring; 2596 struct amd_sched_rq *rq; 2597 int r, i; 2598 u64 flags; 2599 uint64_t init_pde_value = 0; 2600 2601 vm->va = RB_ROOT; 2602 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); 2603 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2604 vm->reserved_vmid[i] = NULL; 2605 spin_lock_init(&vm->status_lock); 2606 INIT_LIST_HEAD(&vm->evicted); 2607 INIT_LIST_HEAD(&vm->relocated); 2608 INIT_LIST_HEAD(&vm->moved); 2609 INIT_LIST_HEAD(&vm->freed); 2610 2611 /* create scheduler entity for page table updates */ 2612 2613 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); 2614 ring_instance %= adev->vm_manager.vm_pte_num_rings; 2615 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 2616 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; 2617 r = amd_sched_entity_init(&ring->sched, &vm->entity, 2618 rq, amdgpu_sched_jobs); 2619 if (r) 2620 return r; 2621 2622 vm->pte_support_ats = false; 2623 2624 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { 2625 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2626 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2627 2628 if (adev->asic_type == CHIP_RAVEN) { 2629 vm->pte_support_ats = true; 2630 init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; 2631 } 2632 } else 2633 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2634 AMDGPU_VM_USE_CPU_FOR_GFX); 2635 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2636 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2637 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), 2638 "CPU update of VM recommended only for large BAR system\n"); 2639 vm->last_update = NULL; 2640 2641 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 2642 AMDGPU_GEM_CREATE_VRAM_CLEARED; 2643 if (vm->use_cpu_for_update) 2644 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2645 else 2646 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 2647 AMDGPU_GEM_CREATE_SHADOW); 2648 2649 r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, 2650 AMDGPU_GEM_DOMAIN_VRAM, 2651 flags, 2652 NULL, NULL, init_pde_value, &vm->root.base.bo); 2653 if (r) 2654 goto error_free_sched_entity; 2655 2656 vm->root.base.vm = vm; 2657 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); 2658 INIT_LIST_HEAD(&vm->root.base.vm_status); 2659 2660 if (vm->use_cpu_for_update) { 2661 r = amdgpu_bo_reserve(vm->root.base.bo, false); 2662 if (r) 2663 goto error_free_root; 2664 2665 r = amdgpu_bo_kmap(vm->root.base.bo, NULL); 2666 amdgpu_bo_unreserve(vm->root.base.bo); 2667 if (r) 2668 goto error_free_root; 2669 } 2670 2671 if (pasid) { 2672 unsigned long flags; 2673 2674 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2675 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, 2676 GFP_ATOMIC); 2677 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2678 if (r < 0) 2679 goto error_free_root; 2680 2681 vm->pasid = pasid; 2682 } 2683 2684 INIT_KFIFO(vm->faults); 2685 2686 return 0; 2687 2688 error_free_root: 2689 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2690 amdgpu_bo_unref(&vm->root.base.bo); 2691 vm->root.base.bo = NULL; 2692 2693 error_free_sched_entity: 2694 amd_sched_entity_fini(&ring->sched, &vm->entity); 2695 2696 return r; 2697 } 2698 2699 /** 2700 * amdgpu_vm_free_levels - free PD/PT levels 2701 * 2702 * @level: PD/PT starting level to free 2703 * 2704 * Free the page directory or page table level and all sub levels. 2705 */ 2706 static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) 2707 { 2708 unsigned i; 2709 2710 if (level->base.bo) { 2711 list_del(&level->base.bo_list); 2712 list_del(&level->base.vm_status); 2713 amdgpu_bo_unref(&level->base.bo->shadow); 2714 amdgpu_bo_unref(&level->base.bo); 2715 } 2716 2717 if (level->entries) 2718 for (i = 0; i <= level->last_entry_used; i++) 2719 amdgpu_vm_free_levels(&level->entries[i]); 2720 2721 kvfree(level->entries); 2722 } 2723 2724 /** 2725 * amdgpu_vm_fini - tear down a vm instance 2726 * 2727 * @adev: amdgpu_device pointer 2728 * @vm: requested vm 2729 * 2730 * Tear down @vm. 2731 * Unbind the VM and remove all bos from the vm bo list 2732 */ 2733 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2734 { 2735 struct amdgpu_bo_va_mapping *mapping, *tmp; 2736 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; 2737 u64 fault; 2738 int i; 2739 2740 /* Clear pending page faults from IH when the VM is destroyed */ 2741 while (kfifo_get(&vm->faults, &fault)) 2742 amdgpu_ih_clear_fault(adev, fault); 2743 2744 if (vm->pasid) { 2745 unsigned long flags; 2746 2747 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2748 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 2749 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2750 } 2751 2752 amd_sched_entity_fini(vm->entity.sched, &vm->entity); 2753 2754 if (!RB_EMPTY_ROOT(&vm->va)) { 2755 dev_err(adev->dev, "still active bo inside vm\n"); 2756 } 2757 rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { 2758 list_del(&mapping->list); 2759 amdgpu_vm_it_remove(mapping, &vm->va); 2760 kfree(mapping); 2761 } 2762 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 2763 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { 2764 amdgpu_vm_prt_fini(adev, vm); 2765 prt_fini_needed = false; 2766 } 2767 2768 list_del(&mapping->list); 2769 amdgpu_vm_free_mapping(adev, vm, mapping, NULL); 2770 } 2771 2772 amdgpu_vm_free_levels(&vm->root); 2773 dma_fence_put(vm->last_update); 2774 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2775 amdgpu_vm_free_reserved_vmid(adev, vm, i); 2776 } 2777 2778 /** 2779 * amdgpu_vm_manager_init - init the VM manager 2780 * 2781 * @adev: amdgpu_device pointer 2782 * 2783 * Initialize the VM manager structures 2784 */ 2785 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 2786 { 2787 unsigned i, j; 2788 2789 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 2790 struct amdgpu_vm_id_manager *id_mgr = 2791 &adev->vm_manager.id_mgr[i]; 2792 2793 mutex_init(&id_mgr->lock); 2794 INIT_LIST_HEAD(&id_mgr->ids_lru); 2795 atomic_set(&id_mgr->reserved_vmid_num, 0); 2796 2797 /* skip over VMID 0, since it is the system VM */ 2798 for (j = 1; j < id_mgr->num_ids; ++j) { 2799 amdgpu_vm_reset_id(adev, i, j); 2800 amdgpu_sync_create(&id_mgr->ids[i].active); 2801 list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); 2802 } 2803 } 2804 2805 adev->vm_manager.fence_context = 2806 dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2807 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 2808 adev->vm_manager.seqno[i] = 0; 2809 2810 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 2811 atomic64_set(&adev->vm_manager.client_counter, 0); 2812 spin_lock_init(&adev->vm_manager.prt_lock); 2813 atomic_set(&adev->vm_manager.num_prt_users, 0); 2814 2815 /* If not overridden by the user, by default, only in large BAR systems 2816 * Compute VM tables will be updated by CPU 2817 */ 2818 #ifdef CONFIG_X86_64 2819 if (amdgpu_vm_update_mode == -1) { 2820 if (amdgpu_vm_is_large_bar(adev)) 2821 adev->vm_manager.vm_update_mode = 2822 AMDGPU_VM_USE_CPU_FOR_COMPUTE; 2823 else 2824 adev->vm_manager.vm_update_mode = 0; 2825 } else 2826 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; 2827 #else 2828 adev->vm_manager.vm_update_mode = 0; 2829 #endif 2830 2831 idr_init(&adev->vm_manager.pasid_idr); 2832 spin_lock_init(&adev->vm_manager.pasid_lock); 2833 } 2834 2835 /** 2836 * amdgpu_vm_manager_fini - cleanup VM manager 2837 * 2838 * @adev: amdgpu_device pointer 2839 * 2840 * Cleanup the VM manager and free resources. 2841 */ 2842 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 2843 { 2844 unsigned i, j; 2845 2846 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); 2847 idr_destroy(&adev->vm_manager.pasid_idr); 2848 2849 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { 2850 struct amdgpu_vm_id_manager *id_mgr = 2851 &adev->vm_manager.id_mgr[i]; 2852 2853 mutex_destroy(&id_mgr->lock); 2854 for (j = 0; j < AMDGPU_NUM_VM; ++j) { 2855 struct amdgpu_vm_id *id = &id_mgr->ids[j]; 2856 2857 amdgpu_sync_free(&id->active); 2858 dma_fence_put(id->flushed_updates); 2859 dma_fence_put(id->last_flush); 2860 } 2861 } 2862 } 2863 2864 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 2865 { 2866 union drm_amdgpu_vm *args = data; 2867 struct amdgpu_device *adev = dev->dev_private; 2868 struct amdgpu_fpriv *fpriv = filp->driver_priv; 2869 int r; 2870 2871 switch (args->in.op) { 2872 case AMDGPU_VM_OP_RESERVE_VMID: 2873 /* current, we only have requirement to reserve vmid from gfxhub */ 2874 r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, 2875 AMDGPU_GFXHUB); 2876 if (r) 2877 return r; 2878 break; 2879 case AMDGPU_VM_OP_UNRESERVE_VMID: 2880 amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); 2881 break; 2882 default: 2883 return -EINVAL; 2884 } 2885 2886 return 0; 2887 } 2888