1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 29 #include <drm/radeon_drm.h> 30 #include "radeon.h" 31 #include "radeon_trace.h" 32 33 /* 34 * GPUVM 35 * GPUVM is similar to the legacy gart on older asics, however 36 * rather than there being a single global gart table 37 * for the entire GPU, there are multiple VM page tables active 38 * at any given time. The VM page tables can contain a mix 39 * vram pages and system memory pages and system memory pages 40 * can be mapped as snooped (cached system pages) or unsnooped 41 * (uncached system pages). 42 * Each VM has an ID associated with it and there is a page table 43 * associated with each VMID. When execting a command buffer, 44 * the kernel tells the ring what VMID to use for that command 45 * buffer. VMIDs are allocated dynamically as commands are submitted. 46 * The userspace drivers maintain their own address space and the kernel 47 * sets up their pages tables accordingly when they submit their 48 * command buffers and a VMID is assigned. 49 * Cayman/Trinity support up to 8 active VMs at any given time; 50 * SI supports 16. 51 */ 52 53 /** 54 * radeon_vm_num_pdes - return the number of page directory entries 55 * 56 * @rdev: radeon_device pointer 57 * 58 * Calculate the number of page directory entries (cayman+). 59 */ 60 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 61 { 62 return rdev->vm_manager.max_pfn >> radeon_vm_block_size; 63 } 64 65 /** 66 * radeon_vm_directory_size - returns the size of the page directory in bytes 67 * 68 * @rdev: radeon_device pointer 69 * 70 * Calculate the size of the page directory in bytes (cayman+). 71 */ 72 static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 73 { 74 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 75 } 76 77 /** 78 * radeon_vm_manager_init - init the vm manager 79 * 80 * @rdev: radeon_device pointer 81 * 82 * Init the vm manager (cayman+). 83 * Returns 0 for success, error for failure. 84 */ 85 int radeon_vm_manager_init(struct radeon_device *rdev) 86 { 87 int r; 88 89 if (!rdev->vm_manager.enabled) { 90 r = radeon_asic_vm_init(rdev); 91 if (r) 92 return r; 93 94 rdev->vm_manager.enabled = true; 95 } 96 return 0; 97 } 98 99 /** 100 * radeon_vm_manager_fini - tear down the vm manager 101 * 102 * @rdev: radeon_device pointer 103 * 104 * Tear down the VM manager (cayman+). 105 */ 106 void radeon_vm_manager_fini(struct radeon_device *rdev) 107 { 108 int i; 109 110 if (!rdev->vm_manager.enabled) 111 return; 112 113 for (i = 0; i < RADEON_NUM_VM; ++i) 114 radeon_fence_unref(&rdev->vm_manager.active[i]); 115 radeon_asic_vm_fini(rdev); 116 rdev->vm_manager.enabled = false; 117 } 118 119 /** 120 * radeon_vm_get_bos - add the vm BOs to a validation list 121 * 122 * @rdev: radeon_device pointer 123 * @vm: vm providing the BOs 124 * @head: head of validation list 125 * 126 * Add the page directory to the list of BOs to 127 * validate for command submission (cayman+). 128 */ 129 struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, 130 struct radeon_vm *vm, 131 struct list_head *head) 132 { 133 struct radeon_bo_list *list; 134 unsigned i, idx; 135 136 list = kvmalloc_array(vm->max_pde_used + 2, 137 sizeof(struct radeon_bo_list), GFP_KERNEL); 138 if (!list) 139 return NULL; 140 141 /* add the vm page table to the list */ 142 list[0].robj = vm->page_directory; 143 list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 144 list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 145 list[0].shared = true; 146 list[0].tiling_flags = 0; 147 list_add(&list[0].list, head); 148 149 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 150 if (!vm->page_tables[i].bo) 151 continue; 152 153 list[idx].robj = vm->page_tables[i].bo; 154 list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 155 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 156 list[idx].shared = true; 157 list[idx].tiling_flags = 0; 158 list_add(&list[idx++].list, head); 159 } 160 161 return list; 162 } 163 164 /** 165 * radeon_vm_grab_id - allocate the next free VMID 166 * 167 * @rdev: radeon_device pointer 168 * @vm: vm to allocate id for 169 * @ring: ring we want to submit job to 170 * 171 * Allocate an id for the vm (cayman+). 172 * Returns the fence we need to sync to (if any). 173 * 174 * Global and local mutex must be locked! 175 */ 176 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 177 struct radeon_vm *vm, int ring) 178 { 179 struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 180 struct radeon_vm_id *vm_id = &vm->ids[ring]; 181 182 unsigned choices[2] = {}; 183 unsigned i; 184 185 /* check if the id is still valid */ 186 if (vm_id->id && vm_id->last_id_use && 187 vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) 188 return NULL; 189 190 /* we definitely need to flush */ 191 vm_id->pd_gpu_addr = ~0ll; 192 193 /* skip over VMID 0, since it is the system VM */ 194 for (i = 1; i < rdev->vm_manager.nvm; ++i) { 195 struct radeon_fence *fence = rdev->vm_manager.active[i]; 196 197 if (fence == NULL) { 198 /* found a free one */ 199 vm_id->id = i; 200 trace_radeon_vm_grab_id(i, ring); 201 return NULL; 202 } 203 204 if (radeon_fence_is_earlier(fence, best[fence->ring])) { 205 best[fence->ring] = fence; 206 choices[fence->ring == ring ? 0 : 1] = i; 207 } 208 } 209 210 for (i = 0; i < 2; ++i) { 211 if (choices[i]) { 212 vm_id->id = choices[i]; 213 trace_radeon_vm_grab_id(choices[i], ring); 214 return rdev->vm_manager.active[choices[i]]; 215 } 216 } 217 218 /* should never happen */ 219 BUG(); 220 return NULL; 221 } 222 223 /** 224 * radeon_vm_flush - hardware flush the vm 225 * 226 * @rdev: radeon_device pointer 227 * @vm: vm we want to flush 228 * @ring: ring to use for flush 229 * @updates: last vm update that is waited for 230 * 231 * Flush the vm (cayman+). 232 * 233 * Global and local mutex must be locked! 234 */ 235 void radeon_vm_flush(struct radeon_device *rdev, 236 struct radeon_vm *vm, 237 int ring, struct radeon_fence *updates) 238 { 239 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 240 struct radeon_vm_id *vm_id = &vm->ids[ring]; 241 242 if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || 243 radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { 244 245 trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); 246 radeon_fence_unref(&vm_id->flushed_updates); 247 vm_id->flushed_updates = radeon_fence_ref(updates); 248 vm_id->pd_gpu_addr = pd_addr; 249 radeon_ring_vm_flush(rdev, &rdev->ring[ring], 250 vm_id->id, vm_id->pd_gpu_addr); 251 252 } 253 } 254 255 /** 256 * radeon_vm_fence - remember fence for vm 257 * 258 * @rdev: radeon_device pointer 259 * @vm: vm we want to fence 260 * @fence: fence to remember 261 * 262 * Fence the vm (cayman+). 263 * Set the fence used to protect page table and id. 264 * 265 * Global and local mutex must be locked! 266 */ 267 void radeon_vm_fence(struct radeon_device *rdev, 268 struct radeon_vm *vm, 269 struct radeon_fence *fence) 270 { 271 unsigned vm_id = vm->ids[fence->ring].id; 272 273 radeon_fence_unref(&rdev->vm_manager.active[vm_id]); 274 rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); 275 276 radeon_fence_unref(&vm->ids[fence->ring].last_id_use); 277 vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); 278 } 279 280 /** 281 * radeon_vm_bo_find - find the bo_va for a specific vm & bo 282 * 283 * @vm: requested vm 284 * @bo: requested buffer object 285 * 286 * Find @bo inside the requested vm (cayman+). 287 * Search inside the @bos vm list for the requested vm 288 * Returns the found bo_va or NULL if none is found 289 * 290 * Object has to be reserved! 291 */ 292 struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 293 struct radeon_bo *bo) 294 { 295 struct radeon_bo_va *bo_va; 296 297 list_for_each_entry(bo_va, &bo->va, bo_list) { 298 if (bo_va->vm == vm) 299 return bo_va; 300 301 } 302 return NULL; 303 } 304 305 /** 306 * radeon_vm_bo_add - add a bo to a specific vm 307 * 308 * @rdev: radeon_device pointer 309 * @vm: requested vm 310 * @bo: radeon buffer object 311 * 312 * Add @bo into the requested vm (cayman+). 313 * Add @bo to the list of bos associated with the vm 314 * Returns newly added bo_va or NULL for failure 315 * 316 * Object has to be reserved! 317 */ 318 struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 319 struct radeon_vm *vm, 320 struct radeon_bo *bo) 321 { 322 struct radeon_bo_va *bo_va; 323 324 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 325 if (bo_va == NULL) 326 return NULL; 327 328 bo_va->vm = vm; 329 bo_va->bo = bo; 330 bo_va->it.start = 0; 331 bo_va->it.last = 0; 332 bo_va->flags = 0; 333 bo_va->ref_count = 1; 334 INIT_LIST_HEAD(&bo_va->bo_list); 335 INIT_LIST_HEAD(&bo_va->vm_status); 336 337 mutex_lock(&vm->mutex); 338 list_add_tail(&bo_va->bo_list, &bo->va); 339 mutex_unlock(&vm->mutex); 340 341 return bo_va; 342 } 343 344 /** 345 * radeon_vm_set_pages - helper to call the right asic function 346 * 347 * @rdev: radeon_device pointer 348 * @ib: indirect buffer to fill with commands 349 * @pe: addr of the page entry 350 * @addr: dst addr to write into pe 351 * @count: number of page entries to update 352 * @incr: increase next addr by incr bytes 353 * @flags: hw access flags 354 * 355 * Traces the parameters and calls the right asic functions 356 * to setup the page table using the DMA. 357 */ 358 static void radeon_vm_set_pages(struct radeon_device *rdev, 359 struct radeon_ib *ib, 360 uint64_t pe, 361 uint64_t addr, unsigned count, 362 uint32_t incr, uint32_t flags) 363 { 364 trace_radeon_vm_set_page(pe, addr, count, incr, flags); 365 366 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 367 uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 368 radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); 369 370 } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { 371 radeon_asic_vm_write_pages(rdev, ib, pe, addr, 372 count, incr, flags); 373 374 } else { 375 radeon_asic_vm_set_pages(rdev, ib, pe, addr, 376 count, incr, flags); 377 } 378 } 379 380 /** 381 * radeon_vm_clear_bo - initially clear the page dir/table 382 * 383 * @rdev: radeon_device pointer 384 * @bo: bo to clear 385 */ 386 static int radeon_vm_clear_bo(struct radeon_device *rdev, 387 struct radeon_bo *bo) 388 { 389 struct ttm_operation_ctx ctx = { true, false }; 390 struct radeon_ib ib; 391 unsigned entries; 392 uint64_t addr; 393 int r; 394 395 r = radeon_bo_reserve(bo, false); 396 if (r) 397 return r; 398 399 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 400 if (r) 401 goto error_unreserve; 402 403 addr = radeon_bo_gpu_offset(bo); 404 entries = radeon_bo_size(bo) / 8; 405 406 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); 407 if (r) 408 goto error_unreserve; 409 410 ib.length_dw = 0; 411 412 radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); 413 radeon_asic_vm_pad_ib(rdev, &ib); 414 WARN_ON(ib.length_dw > 64); 415 416 r = radeon_ib_schedule(rdev, &ib, NULL, false); 417 if (r) 418 goto error_free; 419 420 ib.fence->is_vm_update = true; 421 radeon_bo_fence(bo, ib.fence, false); 422 423 error_free: 424 radeon_ib_free(rdev, &ib); 425 426 error_unreserve: 427 radeon_bo_unreserve(bo); 428 return r; 429 } 430 431 /** 432 * radeon_vm_bo_set_addr - set bos virtual address inside a vm 433 * 434 * @rdev: radeon_device pointer 435 * @bo_va: bo_va to store the address 436 * @soffset: requested offset of the buffer in the VM address space 437 * @flags: attributes of pages (read/write/valid/etc.) 438 * 439 * Set offset of @bo_va (cayman+). 440 * Validate and set the offset requested within the vm address space. 441 * Returns 0 for success, error for failure. 442 * 443 * Object has to be reserved and gets unreserved by this function! 444 */ 445 int radeon_vm_bo_set_addr(struct radeon_device *rdev, 446 struct radeon_bo_va *bo_va, 447 uint64_t soffset, 448 uint32_t flags) 449 { 450 uint64_t size = radeon_bo_size(bo_va->bo); 451 struct radeon_vm *vm = bo_va->vm; 452 unsigned last_pfn, pt_idx; 453 uint64_t eoffset; 454 int r; 455 456 if (soffset) { 457 /* make sure object fit at this offset */ 458 eoffset = soffset + size - 1; 459 if (soffset >= eoffset) { 460 r = -EINVAL; 461 goto error_unreserve; 462 } 463 464 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 465 if (last_pfn >= rdev->vm_manager.max_pfn) { 466 dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n", 467 last_pfn, rdev->vm_manager.max_pfn); 468 r = -EINVAL; 469 goto error_unreserve; 470 } 471 472 } else { 473 eoffset = last_pfn = 0; 474 } 475 476 mutex_lock(&vm->mutex); 477 soffset /= RADEON_GPU_PAGE_SIZE; 478 eoffset /= RADEON_GPU_PAGE_SIZE; 479 if (soffset || eoffset) { 480 struct interval_tree_node *it; 481 it = interval_tree_iter_first(&vm->va, soffset, eoffset); 482 if (it && it != &bo_va->it) { 483 struct radeon_bo_va *tmp; 484 tmp = container_of(it, struct radeon_bo_va, it); 485 /* bo and tmp overlap, invalid offset */ 486 dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " 487 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, 488 soffset, tmp->bo, tmp->it.start, tmp->it.last); 489 mutex_unlock(&vm->mutex); 490 r = -EINVAL; 491 goto error_unreserve; 492 } 493 } 494 495 if (bo_va->it.start || bo_va->it.last) { 496 /* add a clone of the bo_va to clear the old address */ 497 struct radeon_bo_va *tmp; 498 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 499 if (!tmp) { 500 mutex_unlock(&vm->mutex); 501 r = -ENOMEM; 502 goto error_unreserve; 503 } 504 tmp->it.start = bo_va->it.start; 505 tmp->it.last = bo_va->it.last; 506 tmp->vm = vm; 507 tmp->bo = radeon_bo_ref(bo_va->bo); 508 509 interval_tree_remove(&bo_va->it, &vm->va); 510 spin_lock(&vm->status_lock); 511 bo_va->it.start = 0; 512 bo_va->it.last = 0; 513 list_del_init(&bo_va->vm_status); 514 list_add(&tmp->vm_status, &vm->freed); 515 spin_unlock(&vm->status_lock); 516 } 517 518 if (soffset || eoffset) { 519 spin_lock(&vm->status_lock); 520 bo_va->it.start = soffset; 521 bo_va->it.last = eoffset; 522 list_add(&bo_va->vm_status, &vm->cleared); 523 spin_unlock(&vm->status_lock); 524 interval_tree_insert(&bo_va->it, &vm->va); 525 } 526 527 bo_va->flags = flags; 528 529 soffset >>= radeon_vm_block_size; 530 eoffset >>= radeon_vm_block_size; 531 532 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 533 534 if (eoffset > vm->max_pde_used) 535 vm->max_pde_used = eoffset; 536 537 radeon_bo_unreserve(bo_va->bo); 538 539 /* walk over the address space and allocate the page tables */ 540 for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { 541 struct radeon_bo *pt; 542 543 if (vm->page_tables[pt_idx].bo) 544 continue; 545 546 /* drop mutex to allocate and clear page table */ 547 mutex_unlock(&vm->mutex); 548 549 r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, 550 RADEON_GPU_PAGE_SIZE, true, 551 RADEON_GEM_DOMAIN_VRAM, 0, 552 NULL, NULL, &pt); 553 if (r) 554 return r; 555 556 r = radeon_vm_clear_bo(rdev, pt); 557 if (r) { 558 radeon_bo_unref(&pt); 559 return r; 560 } 561 562 /* aquire mutex again */ 563 mutex_lock(&vm->mutex); 564 if (vm->page_tables[pt_idx].bo) { 565 /* someone else allocated the pt in the meantime */ 566 mutex_unlock(&vm->mutex); 567 radeon_bo_unref(&pt); 568 mutex_lock(&vm->mutex); 569 continue; 570 } 571 572 vm->page_tables[pt_idx].addr = 0; 573 vm->page_tables[pt_idx].bo = pt; 574 } 575 576 mutex_unlock(&vm->mutex); 577 return 0; 578 579 error_unreserve: 580 radeon_bo_unreserve(bo_va->bo); 581 return r; 582 } 583 584 /** 585 * radeon_vm_map_gart - get the physical address of a gart page 586 * 587 * @rdev: radeon_device pointer 588 * @addr: the unmapped addr 589 * 590 * Look up the physical address of the page that the pte resolves 591 * to (cayman+). 592 * Returns the physical address of the page. 593 */ 594 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 595 { 596 uint64_t result; 597 598 /* page table offset */ 599 result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT]; 600 result &= ~RADEON_GPU_PAGE_MASK; 601 602 return result; 603 } 604 605 /** 606 * radeon_vm_page_flags - translate page flags to what the hw uses 607 * 608 * @flags: flags comming from userspace 609 * 610 * Translate the flags the userspace ABI uses to hw flags. 611 */ 612 static uint32_t radeon_vm_page_flags(uint32_t flags) 613 { 614 uint32_t hw_flags = 0; 615 616 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 617 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 618 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 619 if (flags & RADEON_VM_PAGE_SYSTEM) { 620 hw_flags |= R600_PTE_SYSTEM; 621 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 622 } 623 return hw_flags; 624 } 625 626 /** 627 * radeon_vm_update_page_directory - make sure that page directory is valid 628 * 629 * @rdev: radeon_device pointer 630 * @vm: requested vm 631 * 632 * Allocates new page tables if necessary 633 * and updates the page directory (cayman+). 634 * Returns 0 for success, error for failure. 635 * 636 * Global and local mutex must be locked! 637 */ 638 int radeon_vm_update_page_directory(struct radeon_device *rdev, 639 struct radeon_vm *vm) 640 { 641 struct radeon_bo *pd = vm->page_directory; 642 uint64_t pd_addr = radeon_bo_gpu_offset(pd); 643 uint32_t incr = RADEON_VM_PTE_COUNT * 8; 644 uint64_t last_pde = ~0, last_pt = ~0; 645 unsigned count = 0, pt_idx, ndw; 646 struct radeon_ib ib; 647 int r; 648 649 /* padding, etc. */ 650 ndw = 64; 651 652 /* assume the worst case */ 653 ndw += vm->max_pde_used * 6; 654 655 /* update too big for an IB */ 656 if (ndw > 0xfffff) 657 return -ENOMEM; 658 659 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 660 if (r) 661 return r; 662 ib.length_dw = 0; 663 664 /* walk over the address space and update the page directory */ 665 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 666 struct radeon_bo *bo = vm->page_tables[pt_idx].bo; 667 uint64_t pde, pt; 668 669 if (bo == NULL) 670 continue; 671 672 pt = radeon_bo_gpu_offset(bo); 673 if (vm->page_tables[pt_idx].addr == pt) 674 continue; 675 vm->page_tables[pt_idx].addr = pt; 676 677 pde = pd_addr + pt_idx * 8; 678 if (((last_pde + 8 * count) != pde) || 679 ((last_pt + incr * count) != pt)) { 680 681 if (count) { 682 radeon_vm_set_pages(rdev, &ib, last_pde, 683 last_pt, count, incr, 684 R600_PTE_VALID); 685 } 686 687 count = 1; 688 last_pde = pde; 689 last_pt = pt; 690 } else { 691 ++count; 692 } 693 } 694 695 if (count) 696 radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, 697 incr, R600_PTE_VALID); 698 699 if (ib.length_dw != 0) { 700 radeon_asic_vm_pad_ib(rdev, &ib); 701 702 radeon_sync_resv(rdev, &ib.sync, pd->tbo.base.resv, true); 703 WARN_ON(ib.length_dw > ndw); 704 r = radeon_ib_schedule(rdev, &ib, NULL, false); 705 if (r) { 706 radeon_ib_free(rdev, &ib); 707 return r; 708 } 709 ib.fence->is_vm_update = true; 710 radeon_bo_fence(pd, ib.fence, false); 711 } 712 radeon_ib_free(rdev, &ib); 713 714 return 0; 715 } 716 717 /** 718 * radeon_vm_frag_ptes - add fragment information to PTEs 719 * 720 * @rdev: radeon_device pointer 721 * @ib: IB for the update 722 * @pe_start: first PTE to handle 723 * @pe_end: last PTE to handle 724 * @addr: addr those PTEs should point to 725 * @flags: hw mapping flags 726 * 727 * Global and local mutex must be locked! 728 */ 729 static void radeon_vm_frag_ptes(struct radeon_device *rdev, 730 struct radeon_ib *ib, 731 uint64_t pe_start, uint64_t pe_end, 732 uint64_t addr, uint32_t flags) 733 { 734 /** 735 * The MC L1 TLB supports variable sized pages, based on a fragment 736 * field in the PTE. When this field is set to a non-zero value, page 737 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 738 * flags are considered valid for all PTEs within the fragment range 739 * and corresponding mappings are assumed to be physically contiguous. 740 * 741 * The L1 TLB can store a single PTE for the whole fragment, 742 * significantly increasing the space available for translation 743 * caching. This leads to large improvements in throughput when the 744 * TLB is under pressure. 745 * 746 * The L2 TLB distributes small and large fragments into two 747 * asymmetric partitions. The large fragment cache is significantly 748 * larger. Thus, we try to use large fragments wherever possible. 749 * Userspace can support this by aligning virtual base address and 750 * allocation size to the fragment size. 751 */ 752 753 /* NI is optimized for 256KB fragments, SI and newer for 64KB */ 754 uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) || 755 (rdev->family == CHIP_ARUBA)) ? 756 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; 757 uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) || 758 (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80; 759 760 uint64_t frag_start = ALIGN(pe_start, frag_align); 761 uint64_t frag_end = pe_end & ~(frag_align - 1); 762 763 unsigned count; 764 765 /* system pages are non continuously */ 766 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || 767 (frag_start >= frag_end)) { 768 769 count = (pe_end - pe_start) / 8; 770 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 771 RADEON_GPU_PAGE_SIZE, flags); 772 return; 773 } 774 775 /* handle the 4K area at the beginning */ 776 if (pe_start != frag_start) { 777 count = (frag_start - pe_start) / 8; 778 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 779 RADEON_GPU_PAGE_SIZE, flags); 780 addr += RADEON_GPU_PAGE_SIZE * count; 781 } 782 783 /* handle the area in the middle */ 784 count = (frag_end - frag_start) / 8; 785 radeon_vm_set_pages(rdev, ib, frag_start, addr, count, 786 RADEON_GPU_PAGE_SIZE, flags | frag_flags); 787 788 /* handle the 4K area at the end */ 789 if (frag_end != pe_end) { 790 addr += RADEON_GPU_PAGE_SIZE * count; 791 count = (pe_end - frag_end) / 8; 792 radeon_vm_set_pages(rdev, ib, frag_end, addr, count, 793 RADEON_GPU_PAGE_SIZE, flags); 794 } 795 } 796 797 /** 798 * radeon_vm_update_ptes - make sure that page tables are valid 799 * 800 * @rdev: radeon_device pointer 801 * @vm: requested vm 802 * @ib: indirect buffer to use for the update 803 * @start: start of GPU address range 804 * @end: end of GPU address range 805 * @dst: destination address to map to 806 * @flags: mapping flags 807 * 808 * Update the page tables in the range @start - @end (cayman+). 809 * 810 * Global and local mutex must be locked! 811 */ 812 static int radeon_vm_update_ptes(struct radeon_device *rdev, 813 struct radeon_vm *vm, 814 struct radeon_ib *ib, 815 uint64_t start, uint64_t end, 816 uint64_t dst, uint32_t flags) 817 { 818 uint64_t mask = RADEON_VM_PTE_COUNT - 1; 819 uint64_t last_pte = ~0, last_dst = ~0; 820 unsigned count = 0; 821 uint64_t addr; 822 823 /* walk over the address space and update the page tables */ 824 for (addr = start; addr < end; ) { 825 uint64_t pt_idx = addr >> radeon_vm_block_size; 826 struct radeon_bo *pt = vm->page_tables[pt_idx].bo; 827 unsigned nptes; 828 uint64_t pte; 829 int r; 830 831 radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true); 832 r = dma_resv_reserve_fences(pt->tbo.base.resv, 1); 833 if (r) 834 return r; 835 836 if ((addr & ~mask) == (end & ~mask)) 837 nptes = end - addr; 838 else 839 nptes = RADEON_VM_PTE_COUNT - (addr & mask); 840 841 pte = radeon_bo_gpu_offset(pt); 842 pte += (addr & mask) * 8; 843 844 if ((last_pte + 8 * count) != pte) { 845 846 if (count) { 847 radeon_vm_frag_ptes(rdev, ib, last_pte, 848 last_pte + 8 * count, 849 last_dst, flags); 850 } 851 852 count = nptes; 853 last_pte = pte; 854 last_dst = dst; 855 } else { 856 count += nptes; 857 } 858 859 addr += nptes; 860 dst += nptes * RADEON_GPU_PAGE_SIZE; 861 } 862 863 if (count) { 864 radeon_vm_frag_ptes(rdev, ib, last_pte, 865 last_pte + 8 * count, 866 last_dst, flags); 867 } 868 869 return 0; 870 } 871 872 /** 873 * radeon_vm_fence_pts - fence page tables after an update 874 * 875 * @vm: requested vm 876 * @start: start of GPU address range 877 * @end: end of GPU address range 878 * @fence: fence to use 879 * 880 * Fence the page tables in the range @start - @end (cayman+). 881 * 882 * Global and local mutex must be locked! 883 */ 884 static void radeon_vm_fence_pts(struct radeon_vm *vm, 885 uint64_t start, uint64_t end, 886 struct radeon_fence *fence) 887 { 888 unsigned i; 889 890 start >>= radeon_vm_block_size; 891 end = (end - 1) >> radeon_vm_block_size; 892 893 for (i = start; i <= end; ++i) 894 radeon_bo_fence(vm->page_tables[i].bo, fence, true); 895 } 896 897 /** 898 * radeon_vm_bo_update - map a bo into the vm page table 899 * 900 * @rdev: radeon_device pointer 901 * @bo_va: radeon buffer virtual address object 902 * @mem: ttm mem 903 * 904 * Fill in the page table entries for @bo (cayman+). 905 * Returns 0 for success, -EINVAL for failure. 906 * 907 * Object have to be reserved and mutex must be locked! 908 */ 909 int radeon_vm_bo_update(struct radeon_device *rdev, 910 struct radeon_bo_va *bo_va, 911 struct ttm_resource *mem) 912 { 913 struct radeon_vm *vm = bo_va->vm; 914 struct radeon_ib ib; 915 unsigned nptes, ncmds, ndw; 916 uint64_t addr; 917 uint32_t flags; 918 int r; 919 920 if (!bo_va->it.start) { 921 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 922 bo_va->bo, vm); 923 return -EINVAL; 924 } 925 926 spin_lock(&vm->status_lock); 927 if (mem) { 928 if (list_empty(&bo_va->vm_status)) { 929 spin_unlock(&vm->status_lock); 930 return 0; 931 } 932 list_del_init(&bo_va->vm_status); 933 } else { 934 list_del(&bo_va->vm_status); 935 list_add(&bo_va->vm_status, &vm->cleared); 936 } 937 spin_unlock(&vm->status_lock); 938 939 bo_va->flags &= ~RADEON_VM_PAGE_VALID; 940 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 941 bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; 942 if (bo_va->bo && radeon_ttm_tt_is_readonly(rdev, bo_va->bo->tbo.ttm)) 943 bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; 944 945 if (mem) { 946 addr = (u64)mem->start << PAGE_SHIFT; 947 if (mem->mem_type != TTM_PL_SYSTEM) 948 bo_va->flags |= RADEON_VM_PAGE_VALID; 949 950 if (mem->mem_type == TTM_PL_TT) { 951 bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 952 if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) 953 bo_va->flags |= RADEON_VM_PAGE_SNOOPED; 954 955 } else { 956 addr += rdev->vm_manager.vram_base_offset; 957 } 958 } else { 959 addr = 0; 960 } 961 962 trace_radeon_vm_bo_update(bo_va); 963 964 nptes = bo_va->it.last - bo_va->it.start + 1; 965 966 /* reserve space for one command every (1 << BLOCK_SIZE) entries 967 or 2k dwords (whatever is smaller) */ 968 ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; 969 970 /* padding, etc. */ 971 ndw = 64; 972 973 flags = radeon_vm_page_flags(bo_va->flags); 974 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 975 /* only copy commands needed */ 976 ndw += ncmds * 7; 977 978 } else if (flags & R600_PTE_SYSTEM) { 979 /* header for write data commands */ 980 ndw += ncmds * 4; 981 982 /* body of write data command */ 983 ndw += nptes * 2; 984 985 } else { 986 /* set page commands needed */ 987 ndw += ncmds * 10; 988 989 /* two extra commands for begin/end of fragment */ 990 ndw += 2 * 10; 991 } 992 993 /* update too big for an IB */ 994 if (ndw > 0xfffff) 995 return -ENOMEM; 996 997 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 998 if (r) 999 return r; 1000 ib.length_dw = 0; 1001 1002 if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { 1003 unsigned i; 1004 1005 for (i = 0; i < RADEON_NUM_RINGS; ++i) 1006 radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); 1007 } 1008 1009 r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, 1010 bo_va->it.last + 1, addr, 1011 radeon_vm_page_flags(bo_va->flags)); 1012 if (r) { 1013 radeon_ib_free(rdev, &ib); 1014 return r; 1015 } 1016 1017 radeon_asic_vm_pad_ib(rdev, &ib); 1018 WARN_ON(ib.length_dw > ndw); 1019 1020 r = radeon_ib_schedule(rdev, &ib, NULL, false); 1021 if (r) { 1022 radeon_ib_free(rdev, &ib); 1023 return r; 1024 } 1025 ib.fence->is_vm_update = true; 1026 radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); 1027 radeon_fence_unref(&bo_va->last_pt_update); 1028 bo_va->last_pt_update = radeon_fence_ref(ib.fence); 1029 radeon_ib_free(rdev, &ib); 1030 1031 return 0; 1032 } 1033 1034 /** 1035 * radeon_vm_clear_freed - clear freed BOs in the PT 1036 * 1037 * @rdev: radeon_device pointer 1038 * @vm: requested vm 1039 * 1040 * Make sure all freed BOs are cleared in the PT. 1041 * Returns 0 for success. 1042 * 1043 * PTs have to be reserved and mutex must be locked! 1044 */ 1045 int radeon_vm_clear_freed(struct radeon_device *rdev, 1046 struct radeon_vm *vm) 1047 { 1048 struct radeon_bo_va *bo_va; 1049 int r = 0; 1050 1051 spin_lock(&vm->status_lock); 1052 while (!list_empty(&vm->freed)) { 1053 bo_va = list_first_entry(&vm->freed, 1054 struct radeon_bo_va, vm_status); 1055 spin_unlock(&vm->status_lock); 1056 1057 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1058 radeon_bo_unref(&bo_va->bo); 1059 radeon_fence_unref(&bo_va->last_pt_update); 1060 spin_lock(&vm->status_lock); 1061 list_del(&bo_va->vm_status); 1062 kfree(bo_va); 1063 if (r) 1064 break; 1065 1066 } 1067 spin_unlock(&vm->status_lock); 1068 return r; 1069 1070 } 1071 1072 /** 1073 * radeon_vm_clear_invalids - clear invalidated BOs in the PT 1074 * 1075 * @rdev: radeon_device pointer 1076 * @vm: requested vm 1077 * 1078 * Make sure all invalidated BOs are cleared in the PT. 1079 * Returns 0 for success. 1080 * 1081 * PTs have to be reserved and mutex must be locked! 1082 */ 1083 int radeon_vm_clear_invalids(struct radeon_device *rdev, 1084 struct radeon_vm *vm) 1085 { 1086 struct radeon_bo_va *bo_va; 1087 int r; 1088 1089 spin_lock(&vm->status_lock); 1090 while (!list_empty(&vm->invalidated)) { 1091 bo_va = list_first_entry(&vm->invalidated, 1092 struct radeon_bo_va, vm_status); 1093 spin_unlock(&vm->status_lock); 1094 1095 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1096 if (r) 1097 return r; 1098 1099 spin_lock(&vm->status_lock); 1100 } 1101 spin_unlock(&vm->status_lock); 1102 1103 return 0; 1104 } 1105 1106 /** 1107 * radeon_vm_bo_rmv - remove a bo to a specific vm 1108 * 1109 * @rdev: radeon_device pointer 1110 * @bo_va: requested bo_va 1111 * 1112 * Remove @bo_va->bo from the requested vm (cayman+). 1113 * 1114 * Object have to be reserved! 1115 */ 1116 void radeon_vm_bo_rmv(struct radeon_device *rdev, 1117 struct radeon_bo_va *bo_va) 1118 { 1119 struct radeon_vm *vm = bo_va->vm; 1120 1121 list_del(&bo_va->bo_list); 1122 1123 mutex_lock(&vm->mutex); 1124 if (bo_va->it.start || bo_va->it.last) 1125 interval_tree_remove(&bo_va->it, &vm->va); 1126 1127 spin_lock(&vm->status_lock); 1128 list_del(&bo_va->vm_status); 1129 if (bo_va->it.start || bo_va->it.last) { 1130 bo_va->bo = radeon_bo_ref(bo_va->bo); 1131 list_add(&bo_va->vm_status, &vm->freed); 1132 } else { 1133 radeon_fence_unref(&bo_va->last_pt_update); 1134 kfree(bo_va); 1135 } 1136 spin_unlock(&vm->status_lock); 1137 1138 mutex_unlock(&vm->mutex); 1139 } 1140 1141 /** 1142 * radeon_vm_bo_invalidate - mark the bo as invalid 1143 * 1144 * @rdev: radeon_device pointer 1145 * @bo: radeon buffer object 1146 * 1147 * Mark @bo as invalid (cayman+). 1148 */ 1149 void radeon_vm_bo_invalidate(struct radeon_device *rdev, 1150 struct radeon_bo *bo) 1151 { 1152 struct radeon_bo_va *bo_va; 1153 1154 list_for_each_entry(bo_va, &bo->va, bo_list) { 1155 spin_lock(&bo_va->vm->status_lock); 1156 if (list_empty(&bo_va->vm_status) && 1157 (bo_va->it.start || bo_va->it.last)) 1158 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1159 spin_unlock(&bo_va->vm->status_lock); 1160 } 1161 } 1162 1163 /** 1164 * radeon_vm_init - initialize a vm instance 1165 * 1166 * @rdev: radeon_device pointer 1167 * @vm: requested vm 1168 * 1169 * Init @vm fields (cayman+). 1170 */ 1171 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1172 { 1173 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, 1174 RADEON_VM_PTE_COUNT * 8); 1175 unsigned pd_size, pd_entries, pts_size; 1176 int i, r; 1177 1178 vm->ib_bo_va = NULL; 1179 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1180 vm->ids[i].id = 0; 1181 vm->ids[i].flushed_updates = NULL; 1182 vm->ids[i].last_id_use = NULL; 1183 } 1184 mutex_init(&vm->mutex); 1185 vm->va = RB_ROOT_CACHED; 1186 spin_lock_init(&vm->status_lock); 1187 INIT_LIST_HEAD(&vm->invalidated); 1188 INIT_LIST_HEAD(&vm->freed); 1189 INIT_LIST_HEAD(&vm->cleared); 1190 1191 pd_size = radeon_vm_directory_size(rdev); 1192 pd_entries = radeon_vm_num_pdes(rdev); 1193 1194 /* allocate page table array */ 1195 pts_size = pd_entries * sizeof(struct radeon_vm_pt); 1196 vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 1197 if (vm->page_tables == NULL) { 1198 DRM_ERROR("Cannot allocate memory for page table array\n"); 1199 return -ENOMEM; 1200 } 1201 1202 r = radeon_bo_create(rdev, pd_size, align, true, 1203 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 1204 NULL, &vm->page_directory); 1205 if (r) { 1206 kfree(vm->page_tables); 1207 vm->page_tables = NULL; 1208 return r; 1209 } 1210 r = radeon_vm_clear_bo(rdev, vm->page_directory); 1211 if (r) { 1212 radeon_bo_unref(&vm->page_directory); 1213 vm->page_directory = NULL; 1214 kfree(vm->page_tables); 1215 vm->page_tables = NULL; 1216 return r; 1217 } 1218 1219 return 0; 1220 } 1221 1222 /** 1223 * radeon_vm_fini - tear down a vm instance 1224 * 1225 * @rdev: radeon_device pointer 1226 * @vm: requested vm 1227 * 1228 * Tear down @vm (cayman+). 1229 * Unbind the VM and remove all bos from the vm bo list 1230 */ 1231 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 1232 { 1233 struct radeon_bo_va *bo_va, *tmp; 1234 int i, r; 1235 1236 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) 1237 dev_err(rdev->dev, "still active bo inside vm\n"); 1238 1239 rbtree_postorder_for_each_entry_safe(bo_va, tmp, 1240 &vm->va.rb_root, it.rb) { 1241 interval_tree_remove(&bo_va->it, &vm->va); 1242 r = radeon_bo_reserve(bo_va->bo, false); 1243 if (!r) { 1244 list_del_init(&bo_va->bo_list); 1245 radeon_bo_unreserve(bo_va->bo); 1246 radeon_fence_unref(&bo_va->last_pt_update); 1247 kfree(bo_va); 1248 } 1249 } 1250 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { 1251 radeon_bo_unref(&bo_va->bo); 1252 radeon_fence_unref(&bo_va->last_pt_update); 1253 kfree(bo_va); 1254 } 1255 1256 for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 1257 radeon_bo_unref(&vm->page_tables[i].bo); 1258 kfree(vm->page_tables); 1259 1260 radeon_bo_unref(&vm->page_directory); 1261 1262 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1263 radeon_fence_unref(&vm->ids[i].flushed_updates); 1264 radeon_fence_unref(&vm->ids[i].last_id_use); 1265 } 1266 1267 mutex_destroy(&vm->mutex); 1268 } 1269