1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <drm/drmP.h> 29 #include <drm/amdgpu_drm.h> 30 #include "amdgpu.h" 31 #include "amdgpu_trace.h" 32 33 /* 34 * GPUVM 35 * GPUVM is similar to the legacy gart on older asics, however 36 * rather than there being a single global gart table 37 * for the entire GPU, there are multiple VM page tables active 38 * at any given time. The VM page tables can contain a mix 39 * vram pages and system memory pages and system memory pages 40 * can be mapped as snooped (cached system pages) or unsnooped 41 * (uncached system pages). 42 * Each VM has an ID associated with it and there is a page table 43 * associated with each VMID. When execting a command buffer, 44 * the kernel tells the the ring what VMID to use for that command 45 * buffer. VMIDs are allocated dynamically as commands are submitted. 46 * The userspace drivers maintain their own address space and the kernel 47 * sets up their pages tables accordingly when they submit their 48 * command buffers and a VMID is assigned. 49 * Cayman/Trinity support up to 8 active VMs at any given time; 50 * SI supports 16. 51 */ 52 53 /* Special value that no flush is necessary */ 54 #define AMDGPU_VM_NO_FLUSH (~0ll) 55 56 /* Local structure. Encapsulate some VM table update parameters to reduce 57 * the number of function parameters 58 */ 59 struct amdgpu_vm_update_params { 60 /* address where to copy page table entries from */ 61 uint64_t src; 62 /* DMA addresses to use for mapping */ 63 dma_addr_t *pages_addr; 64 /* indirect buffer to fill with commands */ 65 struct amdgpu_ib *ib; 66 }; 67 68 /** 69 * amdgpu_vm_num_pde - return the number of page directory entries 70 * 71 * @adev: amdgpu_device pointer 72 * 73 * Calculate the number of page directory entries. 74 */ 75 static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) 76 { 77 return adev->vm_manager.max_pfn >> amdgpu_vm_block_size; 78 } 79 80 /** 81 * amdgpu_vm_directory_size - returns the size of the page directory in bytes 82 * 83 * @adev: amdgpu_device pointer 84 * 85 * Calculate the size of the page directory in bytes. 86 */ 87 static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) 88 { 89 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev) * 8); 90 } 91 92 /** 93 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 94 * 95 * @vm: vm providing the BOs 96 * @validated: head of validation list 97 * @entry: entry to add 98 * 99 * Add the page directory to the list of BOs to 100 * validate for command submission. 101 */ 102 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 103 struct list_head *validated, 104 struct amdgpu_bo_list_entry *entry) 105 { 106 entry->robj = vm->page_directory; 107 entry->priority = 0; 108 entry->tv.bo = &vm->page_directory->tbo; 109 entry->tv.shared = true; 110 entry->user_pages = NULL; 111 list_add(&entry->tv.head, validated); 112 } 113 114 /** 115 * amdgpu_vm_get_bos - add the vm BOs to a duplicates list 116 * 117 * @vm: vm providing the BOs 118 * @duplicates: head of duplicates list 119 * 120 * Add the page directory to the BO duplicates list 121 * for command submission. 122 */ 123 void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates) 124 { 125 unsigned i; 126 127 /* add the vm page table to the list */ 128 for (i = 0; i <= vm->max_pde_used; ++i) { 129 struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; 130 131 if (!entry->robj) 132 continue; 133 134 list_add(&entry->tv.head, duplicates); 135 } 136 137 } 138 139 /** 140 * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail 141 * 142 * @adev: amdgpu device instance 143 * @vm: vm providing the BOs 144 * 145 * Move the PT BOs to the tail of the LRU. 146 */ 147 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, 148 struct amdgpu_vm *vm) 149 { 150 struct ttm_bo_global *glob = adev->mman.bdev.glob; 151 unsigned i; 152 153 spin_lock(&glob->lru_lock); 154 for (i = 0; i <= vm->max_pde_used; ++i) { 155 struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; 156 157 if (!entry->robj) 158 continue; 159 160 ttm_bo_move_to_lru_tail(&entry->robj->tbo); 161 } 162 spin_unlock(&glob->lru_lock); 163 } 164 165 /** 166 * amdgpu_vm_grab_id - allocate the next free VMID 167 * 168 * @vm: vm to allocate id for 169 * @ring: ring we want to submit job to 170 * @sync: sync object where we add dependencies 171 * @fence: fence protecting ID from reuse 172 * 173 * Allocate an id for the vm, adding fences to the sync obj as necessary. 174 */ 175 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 176 struct amdgpu_sync *sync, struct fence *fence, 177 unsigned *vm_id, uint64_t *vm_pd_addr) 178 { 179 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 180 struct amdgpu_device *adev = ring->adev; 181 struct fence *updates = sync->last_vm_update; 182 struct amdgpu_vm_id *id; 183 unsigned i = ring->idx; 184 int r; 185 186 mutex_lock(&adev->vm_manager.lock); 187 188 /* Check if we can use a VMID already assigned to this VM */ 189 do { 190 struct fence *flushed; 191 192 id = vm->ids[i++]; 193 if (i == AMDGPU_MAX_RINGS) 194 i = 0; 195 196 /* Check all the prerequisites to using this VMID */ 197 if (!id) 198 continue; 199 200 if (atomic64_read(&id->owner) != vm->client_id) 201 continue; 202 203 if (pd_addr != id->pd_gpu_addr) 204 continue; 205 206 if (id->last_user != ring && 207 (!id->last_flush || !fence_is_signaled(id->last_flush))) 208 continue; 209 210 flushed = id->flushed_updates; 211 if (updates && (!flushed || fence_is_later(updates, flushed))) 212 continue; 213 214 /* Good we can use this VMID */ 215 if (id->last_user == ring) { 216 r = amdgpu_sync_fence(ring->adev, sync, 217 id->first); 218 if (r) 219 goto error; 220 } 221 222 /* And remember this submission as user of the VMID */ 223 r = amdgpu_sync_fence(ring->adev, &id->active, fence); 224 if (r) 225 goto error; 226 227 list_move_tail(&id->list, &adev->vm_manager.ids_lru); 228 vm->ids[ring->idx] = id; 229 230 *vm_id = id - adev->vm_manager.ids; 231 *vm_pd_addr = AMDGPU_VM_NO_FLUSH; 232 trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); 233 234 mutex_unlock(&adev->vm_manager.lock); 235 return 0; 236 237 } while (i != ring->idx); 238 239 id = list_first_entry(&adev->vm_manager.ids_lru, 240 struct amdgpu_vm_id, 241 list); 242 243 if (!amdgpu_sync_is_idle(&id->active)) { 244 struct list_head *head = &adev->vm_manager.ids_lru; 245 struct amdgpu_vm_id *tmp; 246 247 list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru, 248 list) { 249 if (amdgpu_sync_is_idle(&id->active)) { 250 list_move(&id->list, head); 251 head = &id->list; 252 } 253 } 254 id = list_first_entry(&adev->vm_manager.ids_lru, 255 struct amdgpu_vm_id, 256 list); 257 } 258 259 r = amdgpu_sync_cycle_fences(sync, &id->active, fence); 260 if (r) 261 goto error; 262 263 fence_put(id->first); 264 id->first = fence_get(fence); 265 266 fence_put(id->last_flush); 267 id->last_flush = NULL; 268 269 fence_put(id->flushed_updates); 270 id->flushed_updates = fence_get(updates); 271 272 id->pd_gpu_addr = pd_addr; 273 274 list_move_tail(&id->list, &adev->vm_manager.ids_lru); 275 id->last_user = ring; 276 atomic64_set(&id->owner, vm->client_id); 277 vm->ids[ring->idx] = id; 278 279 *vm_id = id - adev->vm_manager.ids; 280 *vm_pd_addr = pd_addr; 281 trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); 282 283 error: 284 mutex_unlock(&adev->vm_manager.lock); 285 return r; 286 } 287 288 /** 289 * amdgpu_vm_flush - hardware flush the vm 290 * 291 * @ring: ring to use for flush 292 * @vm_id: vmid number to use 293 * @pd_addr: address of the page directory 294 * 295 * Emit a VM flush when it is necessary. 296 */ 297 int amdgpu_vm_flush(struct amdgpu_ring *ring, 298 unsigned vm_id, uint64_t pd_addr, 299 uint32_t gds_base, uint32_t gds_size, 300 uint32_t gws_base, uint32_t gws_size, 301 uint32_t oa_base, uint32_t oa_size, 302 bool vmid_switch) 303 { 304 struct amdgpu_device *adev = ring->adev; 305 struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; 306 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 307 id->gds_base != gds_base || 308 id->gds_size != gds_size || 309 id->gws_base != gws_base || 310 id->gws_size != gws_size || 311 id->oa_base != oa_base || 312 id->oa_size != oa_size); 313 int r; 314 315 if (ring->funcs->emit_pipeline_sync && ( 316 pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch)) 317 amdgpu_ring_emit_pipeline_sync(ring); 318 319 if (ring->funcs->emit_vm_flush && 320 pd_addr != AMDGPU_VM_NO_FLUSH) { 321 struct fence *fence; 322 323 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); 324 amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); 325 326 mutex_lock(&adev->vm_manager.lock); 327 if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) { 328 r = amdgpu_fence_emit(ring, &fence); 329 if (r) { 330 mutex_unlock(&adev->vm_manager.lock); 331 return r; 332 } 333 fence_put(id->last_flush); 334 id->last_flush = fence; 335 } 336 mutex_unlock(&adev->vm_manager.lock); 337 } 338 339 if (gds_switch_needed) { 340 id->gds_base = gds_base; 341 id->gds_size = gds_size; 342 id->gws_base = gws_base; 343 id->gws_size = gws_size; 344 id->oa_base = oa_base; 345 id->oa_size = oa_size; 346 amdgpu_ring_emit_gds_switch(ring, vm_id, 347 gds_base, gds_size, 348 gws_base, gws_size, 349 oa_base, oa_size); 350 } 351 352 return 0; 353 } 354 355 /** 356 * amdgpu_vm_reset_id - reset VMID to zero 357 * 358 * @adev: amdgpu device structure 359 * @vm_id: vmid number to use 360 * 361 * Reset saved GDW, GWS and OA to force switch on next flush. 362 */ 363 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) 364 { 365 struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; 366 367 id->gds_base = 0; 368 id->gds_size = 0; 369 id->gws_base = 0; 370 id->gws_size = 0; 371 id->oa_base = 0; 372 id->oa_size = 0; 373 } 374 375 /** 376 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 377 * 378 * @vm: requested vm 379 * @bo: requested buffer object 380 * 381 * Find @bo inside the requested vm. 382 * Search inside the @bos vm list for the requested vm 383 * Returns the found bo_va or NULL if none is found 384 * 385 * Object has to be reserved! 386 */ 387 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 388 struct amdgpu_bo *bo) 389 { 390 struct amdgpu_bo_va *bo_va; 391 392 list_for_each_entry(bo_va, &bo->va, bo_list) { 393 if (bo_va->vm == vm) { 394 return bo_va; 395 } 396 } 397 return NULL; 398 } 399 400 /** 401 * amdgpu_vm_update_pages - helper to call the right asic function 402 * 403 * @adev: amdgpu_device pointer 404 * @vm_update_params: see amdgpu_vm_update_params definition 405 * @pe: addr of the page entry 406 * @addr: dst addr to write into pe 407 * @count: number of page entries to update 408 * @incr: increase next addr by incr bytes 409 * @flags: hw access flags 410 * 411 * Traces the parameters and calls the right asic functions 412 * to setup the page table using the DMA. 413 */ 414 static void amdgpu_vm_update_pages(struct amdgpu_device *adev, 415 struct amdgpu_vm_update_params 416 *vm_update_params, 417 uint64_t pe, uint64_t addr, 418 unsigned count, uint32_t incr, 419 uint32_t flags) 420 { 421 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); 422 423 if (vm_update_params->src) { 424 amdgpu_vm_copy_pte(adev, vm_update_params->ib, 425 pe, (vm_update_params->src + (addr >> 12) * 8), count); 426 427 } else if (vm_update_params->pages_addr) { 428 amdgpu_vm_write_pte(adev, vm_update_params->ib, 429 vm_update_params->pages_addr, 430 pe, addr, count, incr, flags); 431 432 } else if (count < 3) { 433 amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr, 434 count, incr, flags); 435 436 } else { 437 amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr, 438 count, incr, flags); 439 } 440 } 441 442 /** 443 * amdgpu_vm_clear_bo - initially clear the page dir/table 444 * 445 * @adev: amdgpu_device pointer 446 * @bo: bo to clear 447 * 448 * need to reserve bo first before calling it. 449 */ 450 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 451 struct amdgpu_vm *vm, 452 struct amdgpu_bo *bo) 453 { 454 struct amdgpu_ring *ring; 455 struct fence *fence = NULL; 456 struct amdgpu_job *job; 457 struct amdgpu_vm_update_params vm_update_params; 458 unsigned entries; 459 uint64_t addr; 460 int r; 461 462 memset(&vm_update_params, 0, sizeof(vm_update_params)); 463 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 464 465 r = reservation_object_reserve_shared(bo->tbo.resv); 466 if (r) 467 return r; 468 469 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 470 if (r) 471 goto error; 472 473 addr = amdgpu_bo_gpu_offset(bo); 474 entries = amdgpu_bo_size(bo) / 8; 475 476 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 477 if (r) 478 goto error; 479 480 vm_update_params.ib = &job->ibs[0]; 481 amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries, 482 0, 0); 483 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 484 485 WARN_ON(job->ibs[0].length_dw > 64); 486 r = amdgpu_job_submit(job, ring, &vm->entity, 487 AMDGPU_FENCE_OWNER_VM, &fence); 488 if (r) 489 goto error_free; 490 491 amdgpu_bo_fence(bo, fence, true); 492 fence_put(fence); 493 return 0; 494 495 error_free: 496 amdgpu_job_free(job); 497 498 error: 499 return r; 500 } 501 502 /** 503 * amdgpu_vm_map_gart - Resolve gart mapping of addr 504 * 505 * @pages_addr: optional DMA address to use for lookup 506 * @addr: the unmapped addr 507 * 508 * Look up the physical address of the page that the pte resolves 509 * to and return the pointer for the page table entry. 510 */ 511 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 512 { 513 uint64_t result; 514 515 if (pages_addr) { 516 /* page table offset */ 517 result = pages_addr[addr >> PAGE_SHIFT]; 518 519 /* in case cpu page size != gpu page size*/ 520 result |= addr & (~PAGE_MASK); 521 522 } else { 523 /* No mapping required */ 524 result = addr; 525 } 526 527 result &= 0xFFFFFFFFFFFFF000ULL; 528 529 return result; 530 } 531 532 /** 533 * amdgpu_vm_update_pdes - make sure that page directory is valid 534 * 535 * @adev: amdgpu_device pointer 536 * @vm: requested vm 537 * @start: start of GPU address range 538 * @end: end of GPU address range 539 * 540 * Allocates new page tables if necessary 541 * and updates the page directory. 542 * Returns 0 for success, error for failure. 543 */ 544 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, 545 struct amdgpu_vm *vm) 546 { 547 struct amdgpu_ring *ring; 548 struct amdgpu_bo *pd = vm->page_directory; 549 uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); 550 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; 551 uint64_t last_pde = ~0, last_pt = ~0; 552 unsigned count = 0, pt_idx, ndw; 553 struct amdgpu_job *job; 554 struct amdgpu_vm_update_params vm_update_params; 555 struct fence *fence = NULL; 556 557 int r; 558 559 memset(&vm_update_params, 0, sizeof(vm_update_params)); 560 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 561 562 /* padding, etc. */ 563 ndw = 64; 564 565 /* assume the worst case */ 566 ndw += vm->max_pde_used * 6; 567 568 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 569 if (r) 570 return r; 571 572 vm_update_params.ib = &job->ibs[0]; 573 574 /* walk over the address space and update the page directory */ 575 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 576 struct amdgpu_bo *bo = vm->page_tables[pt_idx].entry.robj; 577 uint64_t pde, pt; 578 579 if (bo == NULL) 580 continue; 581 582 pt = amdgpu_bo_gpu_offset(bo); 583 if (vm->page_tables[pt_idx].addr == pt) 584 continue; 585 vm->page_tables[pt_idx].addr = pt; 586 587 pde = pd_addr + pt_idx * 8; 588 if (((last_pde + 8 * count) != pde) || 589 ((last_pt + incr * count) != pt)) { 590 591 if (count) { 592 amdgpu_vm_update_pages(adev, &vm_update_params, 593 last_pde, last_pt, 594 count, incr, 595 AMDGPU_PTE_VALID); 596 } 597 598 count = 1; 599 last_pde = pde; 600 last_pt = pt; 601 } else { 602 ++count; 603 } 604 } 605 606 if (count) 607 amdgpu_vm_update_pages(adev, &vm_update_params, 608 last_pde, last_pt, 609 count, incr, AMDGPU_PTE_VALID); 610 611 if (vm_update_params.ib->length_dw != 0) { 612 amdgpu_ring_pad_ib(ring, vm_update_params.ib); 613 amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, 614 AMDGPU_FENCE_OWNER_VM); 615 WARN_ON(vm_update_params.ib->length_dw > ndw); 616 r = amdgpu_job_submit(job, ring, &vm->entity, 617 AMDGPU_FENCE_OWNER_VM, &fence); 618 if (r) 619 goto error_free; 620 621 amdgpu_bo_fence(pd, fence, true); 622 fence_put(vm->page_directory_fence); 623 vm->page_directory_fence = fence_get(fence); 624 fence_put(fence); 625 626 } else { 627 amdgpu_job_free(job); 628 } 629 630 return 0; 631 632 error_free: 633 amdgpu_job_free(job); 634 return r; 635 } 636 637 /** 638 * amdgpu_vm_frag_ptes - add fragment information to PTEs 639 * 640 * @adev: amdgpu_device pointer 641 * @vm_update_params: see amdgpu_vm_update_params definition 642 * @pe_start: first PTE to handle 643 * @pe_end: last PTE to handle 644 * @addr: addr those PTEs should point to 645 * @flags: hw mapping flags 646 */ 647 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, 648 struct amdgpu_vm_update_params 649 *vm_update_params, 650 uint64_t pe_start, uint64_t pe_end, 651 uint64_t addr, uint32_t flags) 652 { 653 /** 654 * The MC L1 TLB supports variable sized pages, based on a fragment 655 * field in the PTE. When this field is set to a non-zero value, page 656 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 657 * flags are considered valid for all PTEs within the fragment range 658 * and corresponding mappings are assumed to be physically contiguous. 659 * 660 * The L1 TLB can store a single PTE for the whole fragment, 661 * significantly increasing the space available for translation 662 * caching. This leads to large improvements in throughput when the 663 * TLB is under pressure. 664 * 665 * The L2 TLB distributes small and large fragments into two 666 * asymmetric partitions. The large fragment cache is significantly 667 * larger. Thus, we try to use large fragments wherever possible. 668 * Userspace can support this by aligning virtual base address and 669 * allocation size to the fragment size. 670 */ 671 672 /* SI and newer are optimized for 64KB */ 673 uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB; 674 uint64_t frag_align = 0x80; 675 676 uint64_t frag_start = ALIGN(pe_start, frag_align); 677 uint64_t frag_end = pe_end & ~(frag_align - 1); 678 679 unsigned count; 680 681 /* Abort early if there isn't anything to do */ 682 if (pe_start == pe_end) 683 return; 684 685 /* system pages are non continuously */ 686 if (vm_update_params->src || vm_update_params->pages_addr || 687 !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { 688 689 count = (pe_end - pe_start) / 8; 690 amdgpu_vm_update_pages(adev, vm_update_params, pe_start, 691 addr, count, AMDGPU_GPU_PAGE_SIZE, 692 flags); 693 return; 694 } 695 696 /* handle the 4K area at the beginning */ 697 if (pe_start != frag_start) { 698 count = (frag_start - pe_start) / 8; 699 amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr, 700 count, AMDGPU_GPU_PAGE_SIZE, flags); 701 addr += AMDGPU_GPU_PAGE_SIZE * count; 702 } 703 704 /* handle the area in the middle */ 705 count = (frag_end - frag_start) / 8; 706 amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count, 707 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); 708 709 /* handle the 4K area at the end */ 710 if (frag_end != pe_end) { 711 addr += AMDGPU_GPU_PAGE_SIZE * count; 712 count = (pe_end - frag_end) / 8; 713 amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr, 714 count, AMDGPU_GPU_PAGE_SIZE, flags); 715 } 716 } 717 718 /** 719 * amdgpu_vm_update_ptes - make sure that page tables are valid 720 * 721 * @adev: amdgpu_device pointer 722 * @vm_update_params: see amdgpu_vm_update_params definition 723 * @vm: requested vm 724 * @start: start of GPU address range 725 * @end: end of GPU address range 726 * @dst: destination address to map to 727 * @flags: mapping flags 728 * 729 * Update the page tables in the range @start - @end. 730 */ 731 static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, 732 struct amdgpu_vm_update_params 733 *vm_update_params, 734 struct amdgpu_vm *vm, 735 uint64_t start, uint64_t end, 736 uint64_t dst, uint32_t flags) 737 { 738 const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 739 740 uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0; 741 uint64_t addr; 742 743 /* walk over the address space and update the page tables */ 744 for (addr = start; addr < end; ) { 745 uint64_t pt_idx = addr >> amdgpu_vm_block_size; 746 struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; 747 unsigned nptes; 748 uint64_t pe_start; 749 750 if ((addr & ~mask) == (end & ~mask)) 751 nptes = end - addr; 752 else 753 nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); 754 755 pe_start = amdgpu_bo_gpu_offset(pt); 756 pe_start += (addr & mask) * 8; 757 758 if (last_pe_end != pe_start) { 759 760 amdgpu_vm_frag_ptes(adev, vm_update_params, 761 last_pe_start, last_pe_end, 762 last_dst, flags); 763 764 last_pe_start = pe_start; 765 last_pe_end = pe_start + 8 * nptes; 766 last_dst = dst; 767 } else { 768 last_pe_end += 8 * nptes; 769 } 770 771 addr += nptes; 772 dst += nptes * AMDGPU_GPU_PAGE_SIZE; 773 } 774 775 amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start, 776 last_pe_end, last_dst, flags); 777 } 778 779 /** 780 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 781 * 782 * @adev: amdgpu_device pointer 783 * @src: address where to copy page table entries from 784 * @pages_addr: DMA addresses to use for mapping 785 * @vm: requested vm 786 * @start: start of mapped range 787 * @last: last mapped entry 788 * @flags: flags for the entries 789 * @addr: addr to set the area to 790 * @fence: optional resulting fence 791 * 792 * Fill in the page table entries between @start and @last. 793 * Returns 0 for success, -EINVAL for failure. 794 */ 795 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 796 uint64_t src, 797 dma_addr_t *pages_addr, 798 struct amdgpu_vm *vm, 799 uint64_t start, uint64_t last, 800 uint32_t flags, uint64_t addr, 801 struct fence **fence) 802 { 803 struct amdgpu_ring *ring; 804 void *owner = AMDGPU_FENCE_OWNER_VM; 805 unsigned nptes, ncmds, ndw; 806 struct amdgpu_job *job; 807 struct amdgpu_vm_update_params vm_update_params; 808 struct fence *f = NULL; 809 int r; 810 811 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 812 memset(&vm_update_params, 0, sizeof(vm_update_params)); 813 vm_update_params.src = src; 814 vm_update_params.pages_addr = pages_addr; 815 816 /* sync to everything on unmapping */ 817 if (!(flags & AMDGPU_PTE_VALID)) 818 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 819 820 nptes = last - start + 1; 821 822 /* 823 * reserve space for one command every (1 << BLOCK_SIZE) 824 * entries or 2k dwords (whatever is smaller) 825 */ 826 ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1; 827 828 /* padding, etc. */ 829 ndw = 64; 830 831 if (vm_update_params.src) { 832 /* only copy commands needed */ 833 ndw += ncmds * 7; 834 835 } else if (vm_update_params.pages_addr) { 836 /* header for write data commands */ 837 ndw += ncmds * 4; 838 839 /* body of write data command */ 840 ndw += nptes * 2; 841 842 } else { 843 /* set page commands needed */ 844 ndw += ncmds * 10; 845 846 /* two extra commands for begin/end of fragment */ 847 ndw += 2 * 10; 848 } 849 850 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 851 if (r) 852 return r; 853 854 vm_update_params.ib = &job->ibs[0]; 855 856 r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, 857 owner); 858 if (r) 859 goto error_free; 860 861 r = reservation_object_reserve_shared(vm->page_directory->tbo.resv); 862 if (r) 863 goto error_free; 864 865 amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start, 866 last + 1, addr, flags); 867 868 amdgpu_ring_pad_ib(ring, vm_update_params.ib); 869 WARN_ON(vm_update_params.ib->length_dw > ndw); 870 r = amdgpu_job_submit(job, ring, &vm->entity, 871 AMDGPU_FENCE_OWNER_VM, &f); 872 if (r) 873 goto error_free; 874 875 amdgpu_bo_fence(vm->page_directory, f, true); 876 if (fence) { 877 fence_put(*fence); 878 *fence = fence_get(f); 879 } 880 fence_put(f); 881 return 0; 882 883 error_free: 884 amdgpu_job_free(job); 885 return r; 886 } 887 888 /** 889 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 890 * 891 * @adev: amdgpu_device pointer 892 * @gtt_flags: flags as they are used for GTT 893 * @pages_addr: DMA addresses to use for mapping 894 * @vm: requested vm 895 * @mapping: mapped range and flags to use for the update 896 * @addr: addr to set the area to 897 * @flags: HW flags for the mapping 898 * @fence: optional resulting fence 899 * 900 * Split the mapping into smaller chunks so that each update fits 901 * into a SDMA IB. 902 * Returns 0 for success, -EINVAL for failure. 903 */ 904 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 905 uint32_t gtt_flags, 906 dma_addr_t *pages_addr, 907 struct amdgpu_vm *vm, 908 struct amdgpu_bo_va_mapping *mapping, 909 uint32_t flags, uint64_t addr, 910 struct fence **fence) 911 { 912 const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; 913 914 uint64_t src = 0, start = mapping->it.start; 915 int r; 916 917 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 918 * but in case of something, we filter the flags in first place 919 */ 920 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 921 flags &= ~AMDGPU_PTE_READABLE; 922 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 923 flags &= ~AMDGPU_PTE_WRITEABLE; 924 925 trace_amdgpu_vm_bo_update(mapping); 926 927 if (pages_addr) { 928 if (flags == gtt_flags) 929 src = adev->gart.table_addr + (addr >> 12) * 8; 930 addr = 0; 931 } 932 addr += mapping->offset; 933 934 if (!pages_addr || src) 935 return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, 936 start, mapping->it.last, 937 flags, addr, fence); 938 939 while (start != mapping->it.last + 1) { 940 uint64_t last; 941 942 last = min((uint64_t)mapping->it.last, start + max_size - 1); 943 r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, 944 start, last, flags, addr, 945 fence); 946 if (r) 947 return r; 948 949 start = last + 1; 950 addr += max_size * AMDGPU_GPU_PAGE_SIZE; 951 } 952 953 return 0; 954 } 955 956 /** 957 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 958 * 959 * @adev: amdgpu_device pointer 960 * @bo_va: requested BO and VM object 961 * @mem: ttm mem 962 * 963 * Fill in the page table entries for @bo_va. 964 * Returns 0 for success, -EINVAL for failure. 965 * 966 * Object have to be reserved and mutex must be locked! 967 */ 968 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 969 struct amdgpu_bo_va *bo_va, 970 struct ttm_mem_reg *mem) 971 { 972 struct amdgpu_vm *vm = bo_va->vm; 973 struct amdgpu_bo_va_mapping *mapping; 974 dma_addr_t *pages_addr = NULL; 975 uint32_t gtt_flags, flags; 976 uint64_t addr; 977 int r; 978 979 if (mem) { 980 struct ttm_dma_tt *ttm; 981 982 addr = (u64)mem->start << PAGE_SHIFT; 983 switch (mem->mem_type) { 984 case TTM_PL_TT: 985 ttm = container_of(bo_va->bo->tbo.ttm, struct 986 ttm_dma_tt, ttm); 987 pages_addr = ttm->dma_address; 988 break; 989 990 case TTM_PL_VRAM: 991 addr += adev->vm_manager.vram_base_offset; 992 break; 993 994 default: 995 break; 996 } 997 } else { 998 addr = 0; 999 } 1000 1001 flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); 1002 gtt_flags = (adev == bo_va->bo->adev) ? flags : 0; 1003 1004 spin_lock(&vm->status_lock); 1005 if (!list_empty(&bo_va->vm_status)) 1006 list_splice_init(&bo_va->valids, &bo_va->invalids); 1007 spin_unlock(&vm->status_lock); 1008 1009 list_for_each_entry(mapping, &bo_va->invalids, list) { 1010 r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm, 1011 mapping, flags, addr, 1012 &bo_va->last_pt_update); 1013 if (r) 1014 return r; 1015 } 1016 1017 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1018 list_for_each_entry(mapping, &bo_va->valids, list) 1019 trace_amdgpu_vm_bo_mapping(mapping); 1020 1021 list_for_each_entry(mapping, &bo_va->invalids, list) 1022 trace_amdgpu_vm_bo_mapping(mapping); 1023 } 1024 1025 spin_lock(&vm->status_lock); 1026 list_splice_init(&bo_va->invalids, &bo_va->valids); 1027 list_del_init(&bo_va->vm_status); 1028 if (!mem) 1029 list_add(&bo_va->vm_status, &vm->cleared); 1030 spin_unlock(&vm->status_lock); 1031 1032 return 0; 1033 } 1034 1035 /** 1036 * amdgpu_vm_clear_freed - clear freed BOs in the PT 1037 * 1038 * @adev: amdgpu_device pointer 1039 * @vm: requested vm 1040 * 1041 * Make sure all freed BOs are cleared in the PT. 1042 * Returns 0 for success. 1043 * 1044 * PTs have to be reserved and mutex must be locked! 1045 */ 1046 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 1047 struct amdgpu_vm *vm) 1048 { 1049 struct amdgpu_bo_va_mapping *mapping; 1050 int r; 1051 1052 while (!list_empty(&vm->freed)) { 1053 mapping = list_first_entry(&vm->freed, 1054 struct amdgpu_bo_va_mapping, list); 1055 list_del(&mapping->list); 1056 1057 r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping, 1058 0, 0, NULL); 1059 kfree(mapping); 1060 if (r) 1061 return r; 1062 1063 } 1064 return 0; 1065 1066 } 1067 1068 /** 1069 * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT 1070 * 1071 * @adev: amdgpu_device pointer 1072 * @vm: requested vm 1073 * 1074 * Make sure all invalidated BOs are cleared in the PT. 1075 * Returns 0 for success. 1076 * 1077 * PTs have to be reserved and mutex must be locked! 1078 */ 1079 int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, 1080 struct amdgpu_vm *vm, struct amdgpu_sync *sync) 1081 { 1082 struct amdgpu_bo_va *bo_va = NULL; 1083 int r = 0; 1084 1085 spin_lock(&vm->status_lock); 1086 while (!list_empty(&vm->invalidated)) { 1087 bo_va = list_first_entry(&vm->invalidated, 1088 struct amdgpu_bo_va, vm_status); 1089 spin_unlock(&vm->status_lock); 1090 1091 r = amdgpu_vm_bo_update(adev, bo_va, NULL); 1092 if (r) 1093 return r; 1094 1095 spin_lock(&vm->status_lock); 1096 } 1097 spin_unlock(&vm->status_lock); 1098 1099 if (bo_va) 1100 r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); 1101 1102 return r; 1103 } 1104 1105 /** 1106 * amdgpu_vm_bo_add - add a bo to a specific vm 1107 * 1108 * @adev: amdgpu_device pointer 1109 * @vm: requested vm 1110 * @bo: amdgpu buffer object 1111 * 1112 * Add @bo into the requested vm. 1113 * Add @bo to the list of bos associated with the vm 1114 * Returns newly added bo_va or NULL for failure 1115 * 1116 * Object has to be reserved! 1117 */ 1118 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 1119 struct amdgpu_vm *vm, 1120 struct amdgpu_bo *bo) 1121 { 1122 struct amdgpu_bo_va *bo_va; 1123 1124 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 1125 if (bo_va == NULL) { 1126 return NULL; 1127 } 1128 bo_va->vm = vm; 1129 bo_va->bo = bo; 1130 bo_va->ref_count = 1; 1131 INIT_LIST_HEAD(&bo_va->bo_list); 1132 INIT_LIST_HEAD(&bo_va->valids); 1133 INIT_LIST_HEAD(&bo_va->invalids); 1134 INIT_LIST_HEAD(&bo_va->vm_status); 1135 1136 list_add_tail(&bo_va->bo_list, &bo->va); 1137 1138 return bo_va; 1139 } 1140 1141 /** 1142 * amdgpu_vm_bo_map - map bo inside a vm 1143 * 1144 * @adev: amdgpu_device pointer 1145 * @bo_va: bo_va to store the address 1146 * @saddr: where to map the BO 1147 * @offset: requested offset in the BO 1148 * @flags: attributes of pages (read/write/valid/etc.) 1149 * 1150 * Add a mapping of the BO at the specefied addr into the VM. 1151 * Returns 0 for success, error for failure. 1152 * 1153 * Object has to be reserved and unreserved outside! 1154 */ 1155 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 1156 struct amdgpu_bo_va *bo_va, 1157 uint64_t saddr, uint64_t offset, 1158 uint64_t size, uint32_t flags) 1159 { 1160 struct amdgpu_bo_va_mapping *mapping; 1161 struct amdgpu_vm *vm = bo_va->vm; 1162 struct interval_tree_node *it; 1163 unsigned last_pfn, pt_idx; 1164 uint64_t eaddr; 1165 int r; 1166 1167 /* validate the parameters */ 1168 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1169 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 1170 return -EINVAL; 1171 1172 /* make sure object fit at this offset */ 1173 eaddr = saddr + size - 1; 1174 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) 1175 return -EINVAL; 1176 1177 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 1178 if (last_pfn >= adev->vm_manager.max_pfn) { 1179 dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n", 1180 last_pfn, adev->vm_manager.max_pfn); 1181 return -EINVAL; 1182 } 1183 1184 saddr /= AMDGPU_GPU_PAGE_SIZE; 1185 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1186 1187 it = interval_tree_iter_first(&vm->va, saddr, eaddr); 1188 if (it) { 1189 struct amdgpu_bo_va_mapping *tmp; 1190 tmp = container_of(it, struct amdgpu_bo_va_mapping, it); 1191 /* bo and tmp overlap, invalid addr */ 1192 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 1193 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, 1194 tmp->it.start, tmp->it.last + 1); 1195 r = -EINVAL; 1196 goto error; 1197 } 1198 1199 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1200 if (!mapping) { 1201 r = -ENOMEM; 1202 goto error; 1203 } 1204 1205 INIT_LIST_HEAD(&mapping->list); 1206 mapping->it.start = saddr; 1207 mapping->it.last = eaddr; 1208 mapping->offset = offset; 1209 mapping->flags = flags; 1210 1211 list_add(&mapping->list, &bo_va->invalids); 1212 interval_tree_insert(&mapping->it, &vm->va); 1213 1214 /* Make sure the page tables are allocated */ 1215 saddr >>= amdgpu_vm_block_size; 1216 eaddr >>= amdgpu_vm_block_size; 1217 1218 BUG_ON(eaddr >= amdgpu_vm_num_pdes(adev)); 1219 1220 if (eaddr > vm->max_pde_used) 1221 vm->max_pde_used = eaddr; 1222 1223 /* walk over the address space and allocate the page tables */ 1224 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { 1225 struct reservation_object *resv = vm->page_directory->tbo.resv; 1226 struct amdgpu_bo_list_entry *entry; 1227 struct amdgpu_bo *pt; 1228 1229 entry = &vm->page_tables[pt_idx].entry; 1230 if (entry->robj) 1231 continue; 1232 1233 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, 1234 AMDGPU_GPU_PAGE_SIZE, true, 1235 AMDGPU_GEM_DOMAIN_VRAM, 1236 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1237 NULL, resv, &pt); 1238 if (r) 1239 goto error_free; 1240 1241 /* Keep a reference to the page table to avoid freeing 1242 * them up in the wrong order. 1243 */ 1244 pt->parent = amdgpu_bo_ref(vm->page_directory); 1245 1246 r = amdgpu_vm_clear_bo(adev, vm, pt); 1247 if (r) { 1248 amdgpu_bo_unref(&pt); 1249 goto error_free; 1250 } 1251 1252 entry->robj = pt; 1253 entry->priority = 0; 1254 entry->tv.bo = &entry->robj->tbo; 1255 entry->tv.shared = true; 1256 entry->user_pages = NULL; 1257 vm->page_tables[pt_idx].addr = 0; 1258 } 1259 1260 return 0; 1261 1262 error_free: 1263 list_del(&mapping->list); 1264 interval_tree_remove(&mapping->it, &vm->va); 1265 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1266 kfree(mapping); 1267 1268 error: 1269 return r; 1270 } 1271 1272 /** 1273 * amdgpu_vm_bo_unmap - remove bo mapping from vm 1274 * 1275 * @adev: amdgpu_device pointer 1276 * @bo_va: bo_va to remove the address from 1277 * @saddr: where to the BO is mapped 1278 * 1279 * Remove a mapping of the BO at the specefied addr from the VM. 1280 * Returns 0 for success, error for failure. 1281 * 1282 * Object has to be reserved and unreserved outside! 1283 */ 1284 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1285 struct amdgpu_bo_va *bo_va, 1286 uint64_t saddr) 1287 { 1288 struct amdgpu_bo_va_mapping *mapping; 1289 struct amdgpu_vm *vm = bo_va->vm; 1290 bool valid = true; 1291 1292 saddr /= AMDGPU_GPU_PAGE_SIZE; 1293 1294 list_for_each_entry(mapping, &bo_va->valids, list) { 1295 if (mapping->it.start == saddr) 1296 break; 1297 } 1298 1299 if (&mapping->list == &bo_va->valids) { 1300 valid = false; 1301 1302 list_for_each_entry(mapping, &bo_va->invalids, list) { 1303 if (mapping->it.start == saddr) 1304 break; 1305 } 1306 1307 if (&mapping->list == &bo_va->invalids) 1308 return -ENOENT; 1309 } 1310 1311 list_del(&mapping->list); 1312 interval_tree_remove(&mapping->it, &vm->va); 1313 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1314 1315 if (valid) 1316 list_add(&mapping->list, &vm->freed); 1317 else 1318 kfree(mapping); 1319 1320 return 0; 1321 } 1322 1323 /** 1324 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 1325 * 1326 * @adev: amdgpu_device pointer 1327 * @bo_va: requested bo_va 1328 * 1329 * Remove @bo_va->bo from the requested vm. 1330 * 1331 * Object have to be reserved! 1332 */ 1333 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 1334 struct amdgpu_bo_va *bo_va) 1335 { 1336 struct amdgpu_bo_va_mapping *mapping, *next; 1337 struct amdgpu_vm *vm = bo_va->vm; 1338 1339 list_del(&bo_va->bo_list); 1340 1341 spin_lock(&vm->status_lock); 1342 list_del(&bo_va->vm_status); 1343 spin_unlock(&vm->status_lock); 1344 1345 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 1346 list_del(&mapping->list); 1347 interval_tree_remove(&mapping->it, &vm->va); 1348 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1349 list_add(&mapping->list, &vm->freed); 1350 } 1351 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 1352 list_del(&mapping->list); 1353 interval_tree_remove(&mapping->it, &vm->va); 1354 kfree(mapping); 1355 } 1356 1357 fence_put(bo_va->last_pt_update); 1358 kfree(bo_va); 1359 } 1360 1361 /** 1362 * amdgpu_vm_bo_invalidate - mark the bo as invalid 1363 * 1364 * @adev: amdgpu_device pointer 1365 * @vm: requested vm 1366 * @bo: amdgpu buffer object 1367 * 1368 * Mark @bo as invalid. 1369 */ 1370 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 1371 struct amdgpu_bo *bo) 1372 { 1373 struct amdgpu_bo_va *bo_va; 1374 1375 list_for_each_entry(bo_va, &bo->va, bo_list) { 1376 spin_lock(&bo_va->vm->status_lock); 1377 if (list_empty(&bo_va->vm_status)) 1378 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1379 spin_unlock(&bo_va->vm->status_lock); 1380 } 1381 } 1382 1383 /** 1384 * amdgpu_vm_init - initialize a vm instance 1385 * 1386 * @adev: amdgpu_device pointer 1387 * @vm: requested vm 1388 * 1389 * Init @vm fields. 1390 */ 1391 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1392 { 1393 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 1394 AMDGPU_VM_PTE_COUNT * 8); 1395 unsigned pd_size, pd_entries; 1396 unsigned ring_instance; 1397 struct amdgpu_ring *ring; 1398 struct amd_sched_rq *rq; 1399 int i, r; 1400 1401 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 1402 vm->ids[i] = NULL; 1403 vm->va = RB_ROOT; 1404 vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); 1405 spin_lock_init(&vm->status_lock); 1406 INIT_LIST_HEAD(&vm->invalidated); 1407 INIT_LIST_HEAD(&vm->cleared); 1408 INIT_LIST_HEAD(&vm->freed); 1409 1410 pd_size = amdgpu_vm_directory_size(adev); 1411 pd_entries = amdgpu_vm_num_pdes(adev); 1412 1413 /* allocate page table array */ 1414 vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt)); 1415 if (vm->page_tables == NULL) { 1416 DRM_ERROR("Cannot allocate memory for page table array\n"); 1417 return -ENOMEM; 1418 } 1419 1420 /* create scheduler entity for page table updates */ 1421 1422 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); 1423 ring_instance %= adev->vm_manager.vm_pte_num_rings; 1424 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 1425 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; 1426 r = amd_sched_entity_init(&ring->sched, &vm->entity, 1427 rq, amdgpu_sched_jobs); 1428 if (r) 1429 return r; 1430 1431 vm->page_directory_fence = NULL; 1432 1433 r = amdgpu_bo_create(adev, pd_size, align, true, 1434 AMDGPU_GEM_DOMAIN_VRAM, 1435 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1436 NULL, NULL, &vm->page_directory); 1437 if (r) 1438 goto error_free_sched_entity; 1439 1440 r = amdgpu_bo_reserve(vm->page_directory, false); 1441 if (r) 1442 goto error_free_page_directory; 1443 1444 r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory); 1445 amdgpu_bo_unreserve(vm->page_directory); 1446 if (r) 1447 goto error_free_page_directory; 1448 1449 return 0; 1450 1451 error_free_page_directory: 1452 amdgpu_bo_unref(&vm->page_directory); 1453 vm->page_directory = NULL; 1454 1455 error_free_sched_entity: 1456 amd_sched_entity_fini(&ring->sched, &vm->entity); 1457 1458 return r; 1459 } 1460 1461 /** 1462 * amdgpu_vm_fini - tear down a vm instance 1463 * 1464 * @adev: amdgpu_device pointer 1465 * @vm: requested vm 1466 * 1467 * Tear down @vm. 1468 * Unbind the VM and remove all bos from the vm bo list 1469 */ 1470 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1471 { 1472 struct amdgpu_bo_va_mapping *mapping, *tmp; 1473 int i; 1474 1475 amd_sched_entity_fini(vm->entity.sched, &vm->entity); 1476 1477 if (!RB_EMPTY_ROOT(&vm->va)) { 1478 dev_err(adev->dev, "still active bo inside vm\n"); 1479 } 1480 rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) { 1481 list_del(&mapping->list); 1482 interval_tree_remove(&mapping->it, &vm->va); 1483 kfree(mapping); 1484 } 1485 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 1486 list_del(&mapping->list); 1487 kfree(mapping); 1488 } 1489 1490 for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) 1491 amdgpu_bo_unref(&vm->page_tables[i].entry.robj); 1492 drm_free_large(vm->page_tables); 1493 1494 amdgpu_bo_unref(&vm->page_directory); 1495 fence_put(vm->page_directory_fence); 1496 } 1497 1498 /** 1499 * amdgpu_vm_manager_init - init the VM manager 1500 * 1501 * @adev: amdgpu_device pointer 1502 * 1503 * Initialize the VM manager structures 1504 */ 1505 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 1506 { 1507 unsigned i; 1508 1509 INIT_LIST_HEAD(&adev->vm_manager.ids_lru); 1510 1511 /* skip over VMID 0, since it is the system VM */ 1512 for (i = 1; i < adev->vm_manager.num_ids; ++i) { 1513 amdgpu_vm_reset_id(adev, i); 1514 amdgpu_sync_create(&adev->vm_manager.ids[i].active); 1515 list_add_tail(&adev->vm_manager.ids[i].list, 1516 &adev->vm_manager.ids_lru); 1517 } 1518 1519 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 1520 atomic64_set(&adev->vm_manager.client_counter, 0); 1521 } 1522 1523 /** 1524 * amdgpu_vm_manager_fini - cleanup VM manager 1525 * 1526 * @adev: amdgpu_device pointer 1527 * 1528 * Cleanup the VM manager and free resources. 1529 */ 1530 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 1531 { 1532 unsigned i; 1533 1534 for (i = 0; i < AMDGPU_NUM_VM; ++i) { 1535 struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; 1536 1537 fence_put(adev->vm_manager.ids[i].first); 1538 amdgpu_sync_free(&adev->vm_manager.ids[i].active); 1539 fence_put(id->flushed_updates); 1540 } 1541 } 1542