1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 29 #include <drm/radeon_drm.h> 30 #include "radeon.h" 31 #include "radeon_trace.h" 32 33 /* 34 * GPUVM 35 * GPUVM is similar to the legacy gart on older asics, however 36 * rather than there being a single global gart table 37 * for the entire GPU, there are multiple VM page tables active 38 * at any given time. The VM page tables can contain a mix 39 * vram pages and system memory pages and system memory pages 40 * can be mapped as snooped (cached system pages) or unsnooped 41 * (uncached system pages). 42 * Each VM has an ID associated with it and there is a page table 43 * associated with each VMID. When execting a command buffer, 44 * the kernel tells the ring what VMID to use for that command 45 * buffer. VMIDs are allocated dynamically as commands are submitted. 46 * The userspace drivers maintain their own address space and the kernel 47 * sets up their pages tables accordingly when they submit their 48 * command buffers and a VMID is assigned. 49 * Cayman/Trinity support up to 8 active VMs at any given time; 50 * SI supports 16. 51 */ 52 53 /** 54 * radeon_vm_num_pdes - return the number of page directory entries 55 * 56 * @rdev: radeon_device pointer 57 * 58 * Calculate the number of page directory entries (cayman+). 59 */ 60 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 61 { 62 return rdev->vm_manager.max_pfn >> radeon_vm_block_size; 63 } 64 65 /** 66 * radeon_vm_directory_size - returns the size of the page directory in bytes 67 * 68 * @rdev: radeon_device pointer 69 * 70 * Calculate the size of the page directory in bytes (cayman+). 71 */ 72 static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 73 { 74 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 75 } 76 77 /** 78 * radeon_vm_manager_init - init the vm manager 79 * 80 * @rdev: radeon_device pointer 81 * 82 * Init the vm manager (cayman+). 83 * Returns 0 for success, error for failure. 84 */ 85 int radeon_vm_manager_init(struct radeon_device *rdev) 86 { 87 int r; 88 89 if (!rdev->vm_manager.enabled) { 90 r = radeon_asic_vm_init(rdev); 91 if (r) 92 return r; 93 94 rdev->vm_manager.enabled = true; 95 } 96 return 0; 97 } 98 99 /** 100 * radeon_vm_manager_fini - tear down the vm manager 101 * 102 * @rdev: radeon_device pointer 103 * 104 * Tear down the VM manager (cayman+). 105 */ 106 void radeon_vm_manager_fini(struct radeon_device *rdev) 107 { 108 int i; 109 110 if (!rdev->vm_manager.enabled) 111 return; 112 113 for (i = 0; i < RADEON_NUM_VM; ++i) 114 radeon_fence_unref(&rdev->vm_manager.active[i]); 115 radeon_asic_vm_fini(rdev); 116 rdev->vm_manager.enabled = false; 117 } 118 119 /** 120 * radeon_vm_get_bos - add the vm BOs to a validation list 121 * 122 * @rdev: radeon_device pointer 123 * @vm: vm providing the BOs 124 * @head: head of validation list 125 * 126 * Add the page directory to the list of BOs to 127 * validate for command submission (cayman+). 128 */ 129 struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, 130 struct radeon_vm *vm, 131 struct list_head *head) 132 { 133 struct radeon_bo_list *list; 134 unsigned i, idx; 135 136 list = kvmalloc_objs(struct radeon_bo_list, vm->max_pde_used + 2); 137 if (!list) 138 return NULL; 139 140 /* add the vm page table to the list */ 141 list[0].robj = vm->page_directory; 142 list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 143 list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 144 list[0].shared = true; 145 list[0].tiling_flags = 0; 146 list_add(&list[0].list, head); 147 148 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 149 if (!vm->page_tables[i].bo) 150 continue; 151 152 list[idx].robj = vm->page_tables[i].bo; 153 list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; 154 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 155 list[idx].shared = true; 156 list[idx].tiling_flags = 0; 157 list_add(&list[idx++].list, head); 158 } 159 160 return list; 161 } 162 163 /** 164 * radeon_vm_grab_id - allocate the next free VMID 165 * 166 * @rdev: radeon_device pointer 167 * @vm: vm to allocate id for 168 * @ring: ring we want to submit job to 169 * 170 * Allocate an id for the vm (cayman+). 171 * Returns the fence we need to sync to (if any). 172 * 173 * Global and local mutex must be locked! 174 */ 175 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 176 struct radeon_vm *vm, int ring) 177 { 178 struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 179 struct radeon_vm_id *vm_id = &vm->ids[ring]; 180 181 unsigned choices[2] = {}; 182 unsigned i; 183 184 /* check if the id is still valid */ 185 if (vm_id->id && vm_id->last_id_use && 186 vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) 187 return NULL; 188 189 /* we definitely need to flush */ 190 vm_id->pd_gpu_addr = ~0ll; 191 192 /* skip over VMID 0, since it is the system VM */ 193 for (i = 1; i < rdev->vm_manager.nvm; ++i) { 194 struct radeon_fence *fence = rdev->vm_manager.active[i]; 195 196 if (fence == NULL) { 197 /* found a free one */ 198 vm_id->id = i; 199 trace_radeon_vm_grab_id(i, ring); 200 return NULL; 201 } 202 203 if (radeon_fence_is_earlier(fence, best[fence->ring])) { 204 best[fence->ring] = fence; 205 choices[fence->ring == ring ? 0 : 1] = i; 206 } 207 } 208 209 for (i = 0; i < 2; ++i) { 210 if (choices[i]) { 211 vm_id->id = choices[i]; 212 trace_radeon_vm_grab_id(choices[i], ring); 213 return rdev->vm_manager.active[choices[i]]; 214 } 215 } 216 217 /* should never happen */ 218 BUG(); 219 return NULL; 220 } 221 222 /** 223 * radeon_vm_flush - hardware flush the vm 224 * 225 * @rdev: radeon_device pointer 226 * @vm: vm we want to flush 227 * @ring: ring to use for flush 228 * @updates: last vm update that is waited for 229 * 230 * Flush the vm (cayman+). 231 * 232 * Global and local mutex must be locked! 233 */ 234 void radeon_vm_flush(struct radeon_device *rdev, 235 struct radeon_vm *vm, 236 int ring, struct radeon_fence *updates) 237 { 238 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 239 struct radeon_vm_id *vm_id = &vm->ids[ring]; 240 241 if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || 242 radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { 243 244 trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); 245 radeon_fence_unref(&vm_id->flushed_updates); 246 vm_id->flushed_updates = radeon_fence_ref(updates); 247 vm_id->pd_gpu_addr = pd_addr; 248 radeon_ring_vm_flush(rdev, &rdev->ring[ring], 249 vm_id->id, vm_id->pd_gpu_addr); 250 251 } 252 } 253 254 /** 255 * radeon_vm_fence - remember fence for vm 256 * 257 * @rdev: radeon_device pointer 258 * @vm: vm we want to fence 259 * @fence: fence to remember 260 * 261 * Fence the vm (cayman+). 262 * Set the fence used to protect page table and id. 263 * 264 * Global and local mutex must be locked! 265 */ 266 void radeon_vm_fence(struct radeon_device *rdev, 267 struct radeon_vm *vm, 268 struct radeon_fence *fence) 269 { 270 unsigned vm_id = vm->ids[fence->ring].id; 271 272 radeon_fence_unref(&rdev->vm_manager.active[vm_id]); 273 rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); 274 275 radeon_fence_unref(&vm->ids[fence->ring].last_id_use); 276 vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); 277 } 278 279 /** 280 * radeon_vm_bo_find - find the bo_va for a specific vm & bo 281 * 282 * @vm: requested vm 283 * @bo: requested buffer object 284 * 285 * Find @bo inside the requested vm (cayman+). 286 * Search inside the @bos vm list for the requested vm 287 * Returns the found bo_va or NULL if none is found 288 * 289 * Object has to be reserved! 290 */ 291 struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 292 struct radeon_bo *bo) 293 { 294 struct radeon_bo_va *bo_va; 295 296 list_for_each_entry(bo_va, &bo->va, bo_list) { 297 if (bo_va->vm == vm) 298 return bo_va; 299 300 } 301 return NULL; 302 } 303 304 /** 305 * radeon_vm_bo_add - add a bo to a specific vm 306 * 307 * @rdev: radeon_device pointer 308 * @vm: requested vm 309 * @bo: radeon buffer object 310 * 311 * Add @bo into the requested vm (cayman+). 312 * Add @bo to the list of bos associated with the vm 313 * Returns newly added bo_va or NULL for failure 314 * 315 * Object has to be reserved! 316 */ 317 struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 318 struct radeon_vm *vm, 319 struct radeon_bo *bo) 320 { 321 struct radeon_bo_va *bo_va; 322 323 bo_va = kzalloc_obj(struct radeon_bo_va); 324 if (bo_va == NULL) 325 return NULL; 326 327 bo_va->vm = vm; 328 bo_va->bo = bo; 329 bo_va->it.start = 0; 330 bo_va->it.last = 0; 331 bo_va->flags = 0; 332 bo_va->ref_count = 1; 333 INIT_LIST_HEAD(&bo_va->bo_list); 334 INIT_LIST_HEAD(&bo_va->vm_status); 335 336 mutex_lock(&vm->mutex); 337 list_add_tail(&bo_va->bo_list, &bo->va); 338 mutex_unlock(&vm->mutex); 339 340 return bo_va; 341 } 342 343 /** 344 * radeon_vm_set_pages - helper to call the right asic function 345 * 346 * @rdev: radeon_device pointer 347 * @ib: indirect buffer to fill with commands 348 * @pe: addr of the page entry 349 * @addr: dst addr to write into pe 350 * @count: number of page entries to update 351 * @incr: increase next addr by incr bytes 352 * @flags: hw access flags 353 * 354 * Traces the parameters and calls the right asic functions 355 * to setup the page table using the DMA. 356 */ 357 static void radeon_vm_set_pages(struct radeon_device *rdev, 358 struct radeon_ib *ib, 359 uint64_t pe, 360 uint64_t addr, unsigned count, 361 uint32_t incr, uint32_t flags) 362 { 363 trace_radeon_vm_set_page(pe, addr, count, incr, flags); 364 365 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 366 uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 367 radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); 368 369 } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { 370 radeon_asic_vm_write_pages(rdev, ib, pe, addr, 371 count, incr, flags); 372 373 } else { 374 radeon_asic_vm_set_pages(rdev, ib, pe, addr, 375 count, incr, flags); 376 } 377 } 378 379 /** 380 * radeon_vm_clear_bo - initially clear the page dir/table 381 * 382 * @rdev: radeon_device pointer 383 * @bo: bo to clear 384 */ 385 static int radeon_vm_clear_bo(struct radeon_device *rdev, 386 struct radeon_bo *bo) 387 { 388 struct ttm_operation_ctx ctx = { true, false }; 389 struct radeon_ib ib; 390 unsigned entries; 391 uint64_t addr; 392 int r; 393 394 r = radeon_bo_reserve(bo, false); 395 if (r) 396 return r; 397 398 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 399 if (r) 400 goto error_unreserve; 401 402 addr = radeon_bo_gpu_offset(bo); 403 entries = radeon_bo_size(bo) / 8; 404 405 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); 406 if (r) 407 goto error_unreserve; 408 409 ib.length_dw = 0; 410 411 radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); 412 radeon_asic_vm_pad_ib(rdev, &ib); 413 WARN_ON(ib.length_dw > 64); 414 415 r = radeon_ib_schedule(rdev, &ib, NULL, false); 416 if (r) 417 goto error_free; 418 419 ib.fence->is_vm_update = true; 420 radeon_bo_fence(bo, ib.fence, false); 421 422 error_free: 423 radeon_ib_free(rdev, &ib); 424 425 error_unreserve: 426 radeon_bo_unreserve(bo); 427 return r; 428 } 429 430 /** 431 * radeon_vm_bo_set_addr - set bos virtual address inside a vm 432 * 433 * @rdev: radeon_device pointer 434 * @bo_va: bo_va to store the address 435 * @soffset: requested offset of the buffer in the VM address space 436 * @flags: attributes of pages (read/write/valid/etc.) 437 * 438 * Set offset of @bo_va (cayman+). 439 * Validate and set the offset requested within the vm address space. 440 * Returns 0 for success, error for failure. 441 * 442 * Object has to be reserved and gets unreserved by this function! 443 */ 444 int radeon_vm_bo_set_addr(struct radeon_device *rdev, 445 struct radeon_bo_va *bo_va, 446 uint64_t soffset, 447 uint32_t flags) 448 { 449 uint64_t size = radeon_bo_size(bo_va->bo); 450 struct radeon_vm *vm = bo_va->vm; 451 unsigned last_pfn, pt_idx; 452 uint64_t eoffset; 453 int r; 454 455 if (soffset) { 456 /* make sure object fit at this offset */ 457 eoffset = soffset + size - 1; 458 if (soffset >= eoffset) { 459 r = -EINVAL; 460 goto error_unreserve; 461 } 462 463 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 464 if (last_pfn >= rdev->vm_manager.max_pfn) { 465 dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n", 466 last_pfn, rdev->vm_manager.max_pfn); 467 r = -EINVAL; 468 goto error_unreserve; 469 } 470 471 } else { 472 eoffset = last_pfn = 0; 473 } 474 475 mutex_lock(&vm->mutex); 476 soffset /= RADEON_GPU_PAGE_SIZE; 477 eoffset /= RADEON_GPU_PAGE_SIZE; 478 if (soffset || eoffset) { 479 struct interval_tree_node *it; 480 it = interval_tree_iter_first(&vm->va, soffset, eoffset); 481 if (it && it != &bo_va->it) { 482 struct radeon_bo_va *tmp; 483 tmp = container_of(it, struct radeon_bo_va, it); 484 /* bo and tmp overlap, invalid offset */ 485 dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " 486 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, 487 soffset, tmp->bo, tmp->it.start, tmp->it.last); 488 mutex_unlock(&vm->mutex); 489 r = -EINVAL; 490 goto error_unreserve; 491 } 492 } 493 494 if (bo_va->it.start || bo_va->it.last) { 495 /* add a clone of the bo_va to clear the old address */ 496 struct radeon_bo_va *tmp; 497 tmp = kzalloc_obj(struct radeon_bo_va); 498 if (!tmp) { 499 mutex_unlock(&vm->mutex); 500 r = -ENOMEM; 501 goto error_unreserve; 502 } 503 tmp->it.start = bo_va->it.start; 504 tmp->it.last = bo_va->it.last; 505 tmp->vm = vm; 506 tmp->bo = radeon_bo_ref(bo_va->bo); 507 508 interval_tree_remove(&bo_va->it, &vm->va); 509 spin_lock(&vm->status_lock); 510 bo_va->it.start = 0; 511 bo_va->it.last = 0; 512 list_del_init(&bo_va->vm_status); 513 list_add(&tmp->vm_status, &vm->freed); 514 spin_unlock(&vm->status_lock); 515 } 516 517 if (soffset || eoffset) { 518 spin_lock(&vm->status_lock); 519 bo_va->it.start = soffset; 520 bo_va->it.last = eoffset; 521 list_add(&bo_va->vm_status, &vm->cleared); 522 spin_unlock(&vm->status_lock); 523 interval_tree_insert(&bo_va->it, &vm->va); 524 } 525 526 bo_va->flags = flags; 527 528 soffset >>= radeon_vm_block_size; 529 eoffset >>= radeon_vm_block_size; 530 531 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 532 533 if (eoffset > vm->max_pde_used) 534 vm->max_pde_used = eoffset; 535 536 radeon_bo_unreserve(bo_va->bo); 537 538 /* walk over the address space and allocate the page tables */ 539 for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { 540 struct radeon_bo *pt; 541 542 if (vm->page_tables[pt_idx].bo) 543 continue; 544 545 /* drop mutex to allocate and clear page table */ 546 mutex_unlock(&vm->mutex); 547 548 r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, 549 RADEON_GPU_PAGE_SIZE, true, 550 RADEON_GEM_DOMAIN_VRAM, 0, 551 NULL, NULL, &pt); 552 if (r) 553 return r; 554 555 r = radeon_vm_clear_bo(rdev, pt); 556 if (r) { 557 radeon_bo_unref(&pt); 558 return r; 559 } 560 561 /* aquire mutex again */ 562 mutex_lock(&vm->mutex); 563 if (vm->page_tables[pt_idx].bo) { 564 /* someone else allocated the pt in the meantime */ 565 mutex_unlock(&vm->mutex); 566 radeon_bo_unref(&pt); 567 mutex_lock(&vm->mutex); 568 continue; 569 } 570 571 vm->page_tables[pt_idx].addr = 0; 572 vm->page_tables[pt_idx].bo = pt; 573 } 574 575 mutex_unlock(&vm->mutex); 576 return 0; 577 578 error_unreserve: 579 radeon_bo_unreserve(bo_va->bo); 580 return r; 581 } 582 583 /** 584 * radeon_vm_map_gart - get the physical address of a gart page 585 * 586 * @rdev: radeon_device pointer 587 * @addr: the unmapped addr 588 * 589 * Look up the physical address of the page that the pte resolves 590 * to (cayman+). 591 * Returns the physical address of the page. 592 */ 593 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 594 { 595 uint64_t result; 596 597 /* page table offset */ 598 result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT]; 599 result &= ~RADEON_GPU_PAGE_MASK; 600 601 return result; 602 } 603 604 /** 605 * radeon_vm_page_flags - translate page flags to what the hw uses 606 * 607 * @flags: flags comming from userspace 608 * 609 * Translate the flags the userspace ABI uses to hw flags. 610 */ 611 static uint32_t radeon_vm_page_flags(uint32_t flags) 612 { 613 uint32_t hw_flags = 0; 614 615 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 616 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 617 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 618 if (flags & RADEON_VM_PAGE_SYSTEM) { 619 hw_flags |= R600_PTE_SYSTEM; 620 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 621 } 622 return hw_flags; 623 } 624 625 /** 626 * radeon_vm_update_page_directory - make sure that page directory is valid 627 * 628 * @rdev: radeon_device pointer 629 * @vm: requested vm 630 * 631 * Allocates new page tables if necessary 632 * and updates the page directory (cayman+). 633 * Returns 0 for success, error for failure. 634 * 635 * Global and local mutex must be locked! 636 */ 637 int radeon_vm_update_page_directory(struct radeon_device *rdev, 638 struct radeon_vm *vm) 639 { 640 struct radeon_bo *pd = vm->page_directory; 641 uint64_t pd_addr = radeon_bo_gpu_offset(pd); 642 uint32_t incr = RADEON_VM_PTE_COUNT * 8; 643 uint64_t last_pde = ~0, last_pt = ~0; 644 unsigned count = 0, pt_idx, ndw; 645 struct radeon_ib ib; 646 int r; 647 648 /* padding, etc. */ 649 ndw = 64; 650 651 /* assume the worst case */ 652 ndw += vm->max_pde_used * 6; 653 654 /* update too big for an IB */ 655 if (ndw > 0xfffff) 656 return -ENOMEM; 657 658 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 659 if (r) 660 return r; 661 ib.length_dw = 0; 662 663 /* walk over the address space and update the page directory */ 664 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 665 struct radeon_bo *bo = vm->page_tables[pt_idx].bo; 666 uint64_t pde, pt; 667 668 if (bo == NULL) 669 continue; 670 671 pt = radeon_bo_gpu_offset(bo); 672 if (vm->page_tables[pt_idx].addr == pt) 673 continue; 674 vm->page_tables[pt_idx].addr = pt; 675 676 pde = pd_addr + pt_idx * 8; 677 if (((last_pde + 8 * count) != pde) || 678 ((last_pt + incr * count) != pt)) { 679 680 if (count) { 681 radeon_vm_set_pages(rdev, &ib, last_pde, 682 last_pt, count, incr, 683 R600_PTE_VALID); 684 } 685 686 count = 1; 687 last_pde = pde; 688 last_pt = pt; 689 } else { 690 ++count; 691 } 692 } 693 694 if (count) 695 radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, 696 incr, R600_PTE_VALID); 697 698 if (ib.length_dw != 0) { 699 radeon_asic_vm_pad_ib(rdev, &ib); 700 701 radeon_sync_resv(rdev, &ib.sync, pd->tbo.base.resv, true); 702 WARN_ON(ib.length_dw > ndw); 703 r = radeon_ib_schedule(rdev, &ib, NULL, false); 704 if (r) { 705 radeon_ib_free(rdev, &ib); 706 return r; 707 } 708 ib.fence->is_vm_update = true; 709 radeon_bo_fence(pd, ib.fence, false); 710 } 711 radeon_ib_free(rdev, &ib); 712 713 return 0; 714 } 715 716 /** 717 * radeon_vm_frag_ptes - add fragment information to PTEs 718 * 719 * @rdev: radeon_device pointer 720 * @ib: IB for the update 721 * @pe_start: first PTE to handle 722 * @pe_end: last PTE to handle 723 * @addr: addr those PTEs should point to 724 * @flags: hw mapping flags 725 * 726 * Global and local mutex must be locked! 727 */ 728 static void radeon_vm_frag_ptes(struct radeon_device *rdev, 729 struct radeon_ib *ib, 730 uint64_t pe_start, uint64_t pe_end, 731 uint64_t addr, uint32_t flags) 732 { 733 /** 734 * The MC L1 TLB supports variable sized pages, based on a fragment 735 * field in the PTE. When this field is set to a non-zero value, page 736 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 737 * flags are considered valid for all PTEs within the fragment range 738 * and corresponding mappings are assumed to be physically contiguous. 739 * 740 * The L1 TLB can store a single PTE for the whole fragment, 741 * significantly increasing the space available for translation 742 * caching. This leads to large improvements in throughput when the 743 * TLB is under pressure. 744 * 745 * The L2 TLB distributes small and large fragments into two 746 * asymmetric partitions. The large fragment cache is significantly 747 * larger. Thus, we try to use large fragments wherever possible. 748 * Userspace can support this by aligning virtual base address and 749 * allocation size to the fragment size. 750 */ 751 752 /* NI is optimized for 256KB fragments, SI and newer for 64KB */ 753 uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) || 754 (rdev->family == CHIP_ARUBA)) ? 755 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; 756 uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) || 757 (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80; 758 759 uint64_t frag_start = ALIGN(pe_start, frag_align); 760 uint64_t frag_end = pe_end & ~(frag_align - 1); 761 762 unsigned count; 763 764 /* system pages are non continuously */ 765 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || 766 (frag_start >= frag_end)) { 767 768 count = (pe_end - pe_start) / 8; 769 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 770 RADEON_GPU_PAGE_SIZE, flags); 771 return; 772 } 773 774 /* handle the 4K area at the beginning */ 775 if (pe_start != frag_start) { 776 count = (frag_start - pe_start) / 8; 777 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 778 RADEON_GPU_PAGE_SIZE, flags); 779 addr += RADEON_GPU_PAGE_SIZE * count; 780 } 781 782 /* handle the area in the middle */ 783 count = (frag_end - frag_start) / 8; 784 radeon_vm_set_pages(rdev, ib, frag_start, addr, count, 785 RADEON_GPU_PAGE_SIZE, flags | frag_flags); 786 787 /* handle the 4K area at the end */ 788 if (frag_end != pe_end) { 789 addr += RADEON_GPU_PAGE_SIZE * count; 790 count = (pe_end - frag_end) / 8; 791 radeon_vm_set_pages(rdev, ib, frag_end, addr, count, 792 RADEON_GPU_PAGE_SIZE, flags); 793 } 794 } 795 796 /** 797 * radeon_vm_update_ptes - make sure that page tables are valid 798 * 799 * @rdev: radeon_device pointer 800 * @vm: requested vm 801 * @ib: indirect buffer to use for the update 802 * @start: start of GPU address range 803 * @end: end of GPU address range 804 * @dst: destination address to map to 805 * @flags: mapping flags 806 * 807 * Update the page tables in the range @start - @end (cayman+). 808 * 809 * Global and local mutex must be locked! 810 */ 811 static int radeon_vm_update_ptes(struct radeon_device *rdev, 812 struct radeon_vm *vm, 813 struct radeon_ib *ib, 814 uint64_t start, uint64_t end, 815 uint64_t dst, uint32_t flags) 816 { 817 uint64_t mask = RADEON_VM_PTE_COUNT - 1; 818 uint64_t last_pte = ~0, last_dst = ~0; 819 unsigned count = 0; 820 uint64_t addr; 821 822 /* walk over the address space and update the page tables */ 823 for (addr = start; addr < end; ) { 824 uint64_t pt_idx = addr >> radeon_vm_block_size; 825 struct radeon_bo *pt = vm->page_tables[pt_idx].bo; 826 unsigned nptes; 827 uint64_t pte; 828 int r; 829 830 radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true); 831 r = dma_resv_reserve_fences(pt->tbo.base.resv, 1); 832 if (r) 833 return r; 834 835 if ((addr & ~mask) == (end & ~mask)) 836 nptes = end - addr; 837 else 838 nptes = RADEON_VM_PTE_COUNT - (addr & mask); 839 840 pte = radeon_bo_gpu_offset(pt); 841 pte += (addr & mask) * 8; 842 843 if ((last_pte + 8 * count) != pte) { 844 845 if (count) { 846 radeon_vm_frag_ptes(rdev, ib, last_pte, 847 last_pte + 8 * count, 848 last_dst, flags); 849 } 850 851 count = nptes; 852 last_pte = pte; 853 last_dst = dst; 854 } else { 855 count += nptes; 856 } 857 858 addr += nptes; 859 dst += nptes * RADEON_GPU_PAGE_SIZE; 860 } 861 862 if (count) { 863 radeon_vm_frag_ptes(rdev, ib, last_pte, 864 last_pte + 8 * count, 865 last_dst, flags); 866 } 867 868 return 0; 869 } 870 871 /** 872 * radeon_vm_fence_pts - fence page tables after an update 873 * 874 * @vm: requested vm 875 * @start: start of GPU address range 876 * @end: end of GPU address range 877 * @fence: fence to use 878 * 879 * Fence the page tables in the range @start - @end (cayman+). 880 * 881 * Global and local mutex must be locked! 882 */ 883 static void radeon_vm_fence_pts(struct radeon_vm *vm, 884 uint64_t start, uint64_t end, 885 struct radeon_fence *fence) 886 { 887 unsigned i; 888 889 start >>= radeon_vm_block_size; 890 end = (end - 1) >> radeon_vm_block_size; 891 892 for (i = start; i <= end; ++i) 893 radeon_bo_fence(vm->page_tables[i].bo, fence, true); 894 } 895 896 /** 897 * radeon_vm_bo_update - map a bo into the vm page table 898 * 899 * @rdev: radeon_device pointer 900 * @bo_va: radeon buffer virtual address object 901 * @mem: ttm mem 902 * 903 * Fill in the page table entries for @bo (cayman+). 904 * Returns 0 for success, -EINVAL for failure. 905 * 906 * Object have to be reserved and mutex must be locked! 907 */ 908 int radeon_vm_bo_update(struct radeon_device *rdev, 909 struct radeon_bo_va *bo_va, 910 struct ttm_resource *mem) 911 { 912 struct radeon_vm *vm = bo_va->vm; 913 struct radeon_ib ib; 914 unsigned nptes, ncmds, ndw; 915 uint64_t addr; 916 uint32_t flags; 917 int r; 918 919 if (!bo_va->it.start) { 920 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 921 bo_va->bo, vm); 922 return -EINVAL; 923 } 924 925 spin_lock(&vm->status_lock); 926 if (mem) { 927 if (list_empty(&bo_va->vm_status)) { 928 spin_unlock(&vm->status_lock); 929 return 0; 930 } 931 list_del_init(&bo_va->vm_status); 932 } else { 933 list_del(&bo_va->vm_status); 934 list_add(&bo_va->vm_status, &vm->cleared); 935 } 936 spin_unlock(&vm->status_lock); 937 938 bo_va->flags &= ~RADEON_VM_PAGE_VALID; 939 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 940 bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; 941 if (bo_va->bo && radeon_ttm_tt_is_readonly(rdev, bo_va->bo->tbo.ttm)) 942 bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; 943 944 if (mem) { 945 addr = (u64)mem->start << PAGE_SHIFT; 946 if (mem->mem_type != TTM_PL_SYSTEM) 947 bo_va->flags |= RADEON_VM_PAGE_VALID; 948 949 if (mem->mem_type == TTM_PL_TT) { 950 bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 951 if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) 952 bo_va->flags |= RADEON_VM_PAGE_SNOOPED; 953 954 } else { 955 addr += rdev->vm_manager.vram_base_offset; 956 } 957 } else { 958 addr = 0; 959 } 960 961 trace_radeon_vm_bo_update(bo_va); 962 963 nptes = bo_va->it.last - bo_va->it.start + 1; 964 965 /* reserve space for one command every (1 << BLOCK_SIZE) entries 966 or 2k dwords (whatever is smaller) */ 967 ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; 968 969 /* padding, etc. */ 970 ndw = 64; 971 972 flags = radeon_vm_page_flags(bo_va->flags); 973 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 974 /* only copy commands needed */ 975 ndw += ncmds * 7; 976 977 } else if (flags & R600_PTE_SYSTEM) { 978 /* header for write data commands */ 979 ndw += ncmds * 4; 980 981 /* body of write data command */ 982 ndw += nptes * 2; 983 984 } else { 985 /* set page commands needed */ 986 ndw += ncmds * 10; 987 988 /* two extra commands for begin/end of fragment */ 989 ndw += 2 * 10; 990 } 991 992 /* update too big for an IB */ 993 if (ndw > 0xfffff) 994 return -ENOMEM; 995 996 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 997 if (r) 998 return r; 999 ib.length_dw = 0; 1000 1001 if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { 1002 unsigned i; 1003 1004 for (i = 0; i < RADEON_NUM_RINGS; ++i) 1005 radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); 1006 } 1007 1008 r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, 1009 bo_va->it.last + 1, addr, 1010 radeon_vm_page_flags(bo_va->flags)); 1011 if (r) { 1012 radeon_ib_free(rdev, &ib); 1013 return r; 1014 } 1015 1016 radeon_asic_vm_pad_ib(rdev, &ib); 1017 WARN_ON(ib.length_dw > ndw); 1018 1019 r = radeon_ib_schedule(rdev, &ib, NULL, false); 1020 if (r) { 1021 radeon_ib_free(rdev, &ib); 1022 return r; 1023 } 1024 ib.fence->is_vm_update = true; 1025 radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); 1026 radeon_fence_unref(&bo_va->last_pt_update); 1027 bo_va->last_pt_update = radeon_fence_ref(ib.fence); 1028 radeon_ib_free(rdev, &ib); 1029 1030 return 0; 1031 } 1032 1033 /** 1034 * radeon_vm_clear_freed - clear freed BOs in the PT 1035 * 1036 * @rdev: radeon_device pointer 1037 * @vm: requested vm 1038 * 1039 * Make sure all freed BOs are cleared in the PT. 1040 * Returns 0 for success. 1041 * 1042 * PTs have to be reserved and mutex must be locked! 1043 */ 1044 int radeon_vm_clear_freed(struct radeon_device *rdev, 1045 struct radeon_vm *vm) 1046 { 1047 struct radeon_bo_va *bo_va; 1048 int r = 0; 1049 1050 spin_lock(&vm->status_lock); 1051 while (!list_empty(&vm->freed)) { 1052 bo_va = list_first_entry(&vm->freed, 1053 struct radeon_bo_va, vm_status); 1054 spin_unlock(&vm->status_lock); 1055 1056 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1057 radeon_bo_unref(&bo_va->bo); 1058 radeon_fence_unref(&bo_va->last_pt_update); 1059 spin_lock(&vm->status_lock); 1060 list_del(&bo_va->vm_status); 1061 kfree(bo_va); 1062 if (r) 1063 break; 1064 1065 } 1066 spin_unlock(&vm->status_lock); 1067 return r; 1068 1069 } 1070 1071 /** 1072 * radeon_vm_clear_invalids - clear invalidated BOs in the PT 1073 * 1074 * @rdev: radeon_device pointer 1075 * @vm: requested vm 1076 * 1077 * Make sure all invalidated BOs are cleared in the PT. 1078 * Returns 0 for success. 1079 * 1080 * PTs have to be reserved and mutex must be locked! 1081 */ 1082 int radeon_vm_clear_invalids(struct radeon_device *rdev, 1083 struct radeon_vm *vm) 1084 { 1085 struct radeon_bo_va *bo_va; 1086 int r; 1087 1088 spin_lock(&vm->status_lock); 1089 while (!list_empty(&vm->invalidated)) { 1090 bo_va = list_first_entry(&vm->invalidated, 1091 struct radeon_bo_va, vm_status); 1092 spin_unlock(&vm->status_lock); 1093 1094 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1095 if (r) 1096 return r; 1097 1098 spin_lock(&vm->status_lock); 1099 } 1100 spin_unlock(&vm->status_lock); 1101 1102 return 0; 1103 } 1104 1105 /** 1106 * radeon_vm_bo_rmv - remove a bo to a specific vm 1107 * 1108 * @rdev: radeon_device pointer 1109 * @bo_va: requested bo_va 1110 * 1111 * Remove @bo_va->bo from the requested vm (cayman+). 1112 * 1113 * Object have to be reserved! 1114 */ 1115 void radeon_vm_bo_rmv(struct radeon_device *rdev, 1116 struct radeon_bo_va *bo_va) 1117 { 1118 struct radeon_vm *vm = bo_va->vm; 1119 1120 list_del(&bo_va->bo_list); 1121 1122 mutex_lock(&vm->mutex); 1123 if (bo_va->it.start || bo_va->it.last) 1124 interval_tree_remove(&bo_va->it, &vm->va); 1125 1126 spin_lock(&vm->status_lock); 1127 list_del(&bo_va->vm_status); 1128 if (bo_va->it.start || bo_va->it.last) { 1129 bo_va->bo = radeon_bo_ref(bo_va->bo); 1130 list_add(&bo_va->vm_status, &vm->freed); 1131 } else { 1132 radeon_fence_unref(&bo_va->last_pt_update); 1133 kfree(bo_va); 1134 } 1135 spin_unlock(&vm->status_lock); 1136 1137 mutex_unlock(&vm->mutex); 1138 } 1139 1140 /** 1141 * radeon_vm_bo_invalidate - mark the bo as invalid 1142 * 1143 * @rdev: radeon_device pointer 1144 * @bo: radeon buffer object 1145 * 1146 * Mark @bo as invalid (cayman+). 1147 */ 1148 void radeon_vm_bo_invalidate(struct radeon_device *rdev, 1149 struct radeon_bo *bo) 1150 { 1151 struct radeon_bo_va *bo_va; 1152 1153 list_for_each_entry(bo_va, &bo->va, bo_list) { 1154 spin_lock(&bo_va->vm->status_lock); 1155 if (list_empty(&bo_va->vm_status) && 1156 (bo_va->it.start || bo_va->it.last)) 1157 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1158 spin_unlock(&bo_va->vm->status_lock); 1159 } 1160 } 1161 1162 /** 1163 * radeon_vm_init - initialize a vm instance 1164 * 1165 * @rdev: radeon_device pointer 1166 * @vm: requested vm 1167 * 1168 * Init @vm fields (cayman+). 1169 */ 1170 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1171 { 1172 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, 1173 RADEON_VM_PTE_COUNT * 8); 1174 unsigned pd_size, pd_entries, pts_size; 1175 int i, r; 1176 1177 vm->ib_bo_va = NULL; 1178 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1179 vm->ids[i].id = 0; 1180 vm->ids[i].flushed_updates = NULL; 1181 vm->ids[i].last_id_use = NULL; 1182 } 1183 mutex_init(&vm->mutex); 1184 vm->va = RB_ROOT_CACHED; 1185 spin_lock_init(&vm->status_lock); 1186 INIT_LIST_HEAD(&vm->invalidated); 1187 INIT_LIST_HEAD(&vm->freed); 1188 INIT_LIST_HEAD(&vm->cleared); 1189 1190 pd_size = radeon_vm_directory_size(rdev); 1191 pd_entries = radeon_vm_num_pdes(rdev); 1192 1193 /* allocate page table array */ 1194 pts_size = pd_entries * sizeof(struct radeon_vm_pt); 1195 vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 1196 if (vm->page_tables == NULL) { 1197 DRM_ERROR("Cannot allocate memory for page table array\n"); 1198 return -ENOMEM; 1199 } 1200 1201 r = radeon_bo_create(rdev, pd_size, align, true, 1202 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 1203 NULL, &vm->page_directory); 1204 if (r) { 1205 kfree(vm->page_tables); 1206 vm->page_tables = NULL; 1207 return r; 1208 } 1209 r = radeon_vm_clear_bo(rdev, vm->page_directory); 1210 if (r) { 1211 radeon_bo_unref(&vm->page_directory); 1212 vm->page_directory = NULL; 1213 kfree(vm->page_tables); 1214 vm->page_tables = NULL; 1215 return r; 1216 } 1217 1218 return 0; 1219 } 1220 1221 /** 1222 * radeon_vm_fini - tear down a vm instance 1223 * 1224 * @rdev: radeon_device pointer 1225 * @vm: requested vm 1226 * 1227 * Tear down @vm (cayman+). 1228 * Unbind the VM and remove all bos from the vm bo list 1229 */ 1230 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 1231 { 1232 struct radeon_bo_va *bo_va, *tmp; 1233 int i, r; 1234 1235 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) 1236 dev_err(rdev->dev, "still active bo inside vm\n"); 1237 1238 rbtree_postorder_for_each_entry_safe(bo_va, tmp, 1239 &vm->va.rb_root, it.rb) { 1240 interval_tree_remove(&bo_va->it, &vm->va); 1241 r = radeon_bo_reserve(bo_va->bo, false); 1242 if (!r) { 1243 list_del_init(&bo_va->bo_list); 1244 radeon_bo_unreserve(bo_va->bo); 1245 radeon_fence_unref(&bo_va->last_pt_update); 1246 kfree(bo_va); 1247 } 1248 } 1249 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { 1250 radeon_bo_unref(&bo_va->bo); 1251 radeon_fence_unref(&bo_va->last_pt_update); 1252 kfree(bo_va); 1253 } 1254 1255 for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 1256 radeon_bo_unref(&vm->page_tables[i].bo); 1257 kfree(vm->page_tables); 1258 1259 radeon_bo_unref(&vm->page_directory); 1260 1261 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 1262 radeon_fence_unref(&vm->ids[i].flushed_updates); 1263 radeon_fence_unref(&vm->ids[i].last_id_use); 1264 } 1265 1266 mutex_destroy(&vm->mutex); 1267 } 1268