1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <drm/drm_drv.h> 25 26 #include "amdgpu.h" 27 #include "amdgpu_trace.h" 28 #include "amdgpu_vm.h" 29 #include "amdgpu_job.h" 30 31 /* 32 * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt 33 */ 34 struct amdgpu_vm_pt_cursor { 35 uint64_t pfn; 36 struct amdgpu_vm_bo_base *parent; 37 struct amdgpu_vm_bo_base *entry; 38 unsigned int level; 39 }; 40 41 /** 42 * amdgpu_vm_pt_level_shift - return the addr shift for each level 43 * 44 * @adev: amdgpu_device pointer 45 * @level: VMPT level 46 * 47 * Returns: 48 * The number of bits the pfn needs to be right shifted for a level. 49 */ 50 static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, 51 unsigned int level) 52 { 53 switch (level) { 54 case AMDGPU_VM_PDB3: 55 case AMDGPU_VM_PDB2: 56 case AMDGPU_VM_PDB1: 57 case AMDGPU_VM_PDB0: 58 return 9 * (AMDGPU_VM_PDB0 - level) + 59 adev->vm_manager.block_size; 60 case AMDGPU_VM_PTB: 61 return 0; 62 default: 63 return ~0; 64 } 65 } 66 67 /** 68 * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT 69 * 70 * @adev: amdgpu_device pointer 71 * @level: VMPT level 72 * 73 * Returns: 74 * The number of entries in a page directory or page table. 75 */ 76 static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, 77 unsigned int level) 78 { 79 unsigned int shift; 80 81 shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); 82 if (level == adev->vm_manager.root_level) 83 /* For the root directory */ 84 return round_up(adev->vm_manager.max_pfn, 1ULL << shift) 85 >> shift; 86 else if (level != AMDGPU_VM_PTB) 87 /* Everything in between */ 88 return 512; 89 90 /* For the page tables on the leaves */ 91 return AMDGPU_VM_PTE_COUNT(adev); 92 } 93 94 /** 95 * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT 96 * 97 * @adev: amdgpu_device pointer 98 * @level: VMPT level 99 * 100 * Returns: 101 * The mask to extract the entry number of a PD/PT from an address. 102 */ 103 static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, 104 unsigned int level) 105 { 106 if (level <= adev->vm_manager.root_level) 107 return 0xffffffff; 108 else if (level != AMDGPU_VM_PTB) 109 return 0x1ff; 110 else 111 return AMDGPU_VM_PTE_COUNT(adev) - 1; 112 } 113 114 /** 115 * amdgpu_vm_pt_size - returns the size of the page table in bytes 116 * 117 * @adev: amdgpu_device pointer 118 * @level: VMPT level 119 * 120 * Returns: 121 * The size of the BO for a page directory or page table in bytes. 122 */ 123 static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, 124 unsigned int level) 125 { 126 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); 127 } 128 129 /** 130 * amdgpu_vm_pt_parent - get the parent page directory 131 * 132 * @pt: child page table 133 * 134 * Helper to get the parent entry for the child page table. NULL if we are at 135 * the root page directory. 136 */ 137 static struct amdgpu_vm_bo_base * 138 amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) 139 { 140 struct amdgpu_bo *parent = pt->bo->parent; 141 142 if (!parent) 143 return NULL; 144 145 return parent->vm_bo; 146 } 147 148 /** 149 * amdgpu_vm_pt_start - start PD/PT walk 150 * 151 * @adev: amdgpu_device pointer 152 * @vm: amdgpu_vm structure 153 * @start: start address of the walk 154 * @cursor: state to initialize 155 * 156 * Initialize a amdgpu_vm_pt_cursor to start a walk. 157 */ 158 static void amdgpu_vm_pt_start(struct amdgpu_device *adev, 159 struct amdgpu_vm *vm, uint64_t start, 160 struct amdgpu_vm_pt_cursor *cursor) 161 { 162 cursor->pfn = start; 163 cursor->parent = NULL; 164 cursor->entry = &vm->root; 165 cursor->level = adev->vm_manager.root_level; 166 } 167 168 /** 169 * amdgpu_vm_pt_descendant - go to child node 170 * 171 * @adev: amdgpu_device pointer 172 * @cursor: current state 173 * 174 * Walk to the child node of the current node. 175 * Returns: 176 * True if the walk was possible, false otherwise. 177 */ 178 static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, 179 struct amdgpu_vm_pt_cursor *cursor) 180 { 181 unsigned int mask, shift, idx; 182 183 if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || 184 !cursor->entry->bo) 185 return false; 186 187 mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); 188 shift = amdgpu_vm_pt_level_shift(adev, cursor->level); 189 190 ++cursor->level; 191 idx = (cursor->pfn >> shift) & mask; 192 cursor->parent = cursor->entry; 193 cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; 194 return true; 195 } 196 197 /** 198 * amdgpu_vm_pt_sibling - go to sibling node 199 * 200 * @adev: amdgpu_device pointer 201 * @cursor: current state 202 * 203 * Walk to the sibling node of the current node. 204 * Returns: 205 * True if the walk was possible, false otherwise. 206 */ 207 static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, 208 struct amdgpu_vm_pt_cursor *cursor) 209 { 210 211 unsigned int shift, num_entries; 212 struct amdgpu_bo_vm *parent; 213 214 /* Root doesn't have a sibling */ 215 if (!cursor->parent) 216 return false; 217 218 /* Go to our parents and see if we got a sibling */ 219 shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); 220 num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); 221 parent = to_amdgpu_bo_vm(cursor->parent->bo); 222 223 if (cursor->entry == &parent->entries[num_entries - 1]) 224 return false; 225 226 cursor->pfn += 1ULL << shift; 227 cursor->pfn &= ~((1ULL << shift) - 1); 228 ++cursor->entry; 229 return true; 230 } 231 232 /** 233 * amdgpu_vm_pt_ancestor - go to parent node 234 * 235 * @cursor: current state 236 * 237 * Walk to the parent node of the current node. 238 * Returns: 239 * True if the walk was possible, false otherwise. 240 */ 241 static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) 242 { 243 if (!cursor->parent) 244 return false; 245 246 --cursor->level; 247 cursor->entry = cursor->parent; 248 cursor->parent = amdgpu_vm_pt_parent(cursor->parent); 249 return true; 250 } 251 252 /** 253 * amdgpu_vm_pt_next - get next PD/PT in hieratchy 254 * 255 * @adev: amdgpu_device pointer 256 * @cursor: current state 257 * 258 * Walk the PD/PT tree to the next node. 259 */ 260 static void amdgpu_vm_pt_next(struct amdgpu_device *adev, 261 struct amdgpu_vm_pt_cursor *cursor) 262 { 263 /* First try a newborn child */ 264 if (amdgpu_vm_pt_descendant(adev, cursor)) 265 return; 266 267 /* If that didn't worked try to find a sibling */ 268 while (!amdgpu_vm_pt_sibling(adev, cursor)) { 269 /* No sibling, go to our parents and grandparents */ 270 if (!amdgpu_vm_pt_ancestor(cursor)) { 271 cursor->pfn = ~0ll; 272 return; 273 } 274 } 275 } 276 277 /** 278 * amdgpu_vm_pt_first_dfs - start a deep first search 279 * 280 * @adev: amdgpu_device structure 281 * @vm: amdgpu_vm structure 282 * @start: optional cursor to start with 283 * @cursor: state to initialize 284 * 285 * Starts a deep first traversal of the PD/PT tree. 286 */ 287 static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, 288 struct amdgpu_vm *vm, 289 struct amdgpu_vm_pt_cursor *start, 290 struct amdgpu_vm_pt_cursor *cursor) 291 { 292 if (start) 293 *cursor = *start; 294 else 295 amdgpu_vm_pt_start(adev, vm, 0, cursor); 296 297 while (amdgpu_vm_pt_descendant(adev, cursor)) 298 ; 299 } 300 301 /** 302 * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue 303 * 304 * @start: starting point for the search 305 * @entry: current entry 306 * 307 * Returns: 308 * True when the search should continue, false otherwise. 309 */ 310 static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, 311 struct amdgpu_vm_bo_base *entry) 312 { 313 return entry && (!start || entry != start->entry); 314 } 315 316 /** 317 * amdgpu_vm_pt_next_dfs - get the next node for a deep first search 318 * 319 * @adev: amdgpu_device structure 320 * @cursor: current state 321 * 322 * Move the cursor to the next node in a deep first search. 323 */ 324 static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, 325 struct amdgpu_vm_pt_cursor *cursor) 326 { 327 if (!cursor->entry) 328 return; 329 330 if (!cursor->parent) 331 cursor->entry = NULL; 332 else if (amdgpu_vm_pt_sibling(adev, cursor)) 333 while (amdgpu_vm_pt_descendant(adev, cursor)) 334 ; 335 else 336 amdgpu_vm_pt_ancestor(cursor); 337 } 338 339 /* 340 * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs 341 */ 342 #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ 343 for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ 344 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ 345 amdgpu_vm_pt_continue_dfs((start), (entry)); \ 346 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) 347 348 /** 349 * amdgpu_vm_pt_clear - initially clear the PDs/PTs 350 * 351 * @adev: amdgpu_device pointer 352 * @vm: VM to clear BO from 353 * @vmbo: BO to clear 354 * @immediate: use an immediate update 355 * 356 * Root PD needs to be reserved when calling this. 357 * 358 * Returns: 359 * 0 on success, errno otherwise. 360 */ 361 int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, 362 struct amdgpu_bo_vm *vmbo, bool immediate) 363 { 364 unsigned int level = adev->vm_manager.root_level; 365 struct ttm_operation_ctx ctx = { true, false }; 366 struct amdgpu_vm_update_params params; 367 struct amdgpu_bo *ancestor = &vmbo->bo; 368 unsigned int entries; 369 struct amdgpu_bo *bo = &vmbo->bo; 370 uint64_t value = 0, flags = 0; 371 uint64_t addr; 372 int r, idx; 373 374 /* Figure out our place in the hierarchy */ 375 if (ancestor->parent) { 376 ++level; 377 while (ancestor->parent->parent) { 378 ++level; 379 ancestor = ancestor->parent; 380 } 381 } 382 383 entries = amdgpu_bo_size(bo) / 8; 384 385 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 386 if (r) 387 return r; 388 389 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 390 return -ENODEV; 391 392 r = vm->update_funcs->map_table(vmbo); 393 if (r) 394 goto exit; 395 396 memset(¶ms, 0, sizeof(params)); 397 params.adev = adev; 398 params.vm = vm; 399 params.immediate = immediate; 400 401 r = vm->update_funcs->prepare(¶ms, NULL, 402 AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR); 403 if (r) 404 goto exit; 405 406 addr = 0; 407 408 if (adev->asic_type >= CHIP_VEGA10) { 409 if (level != AMDGPU_VM_PTB) { 410 /* Handle leaf PDEs as PTEs */ 411 flags |= AMDGPU_PDE_PTE_FLAG(adev); 412 amdgpu_gmc_get_vm_pde(adev, level, 413 &value, &flags); 414 } else { 415 /* Workaround for fault priority problem on GMC9 */ 416 flags = AMDGPU_PTE_EXECUTABLE | adev->gmc.init_pte_flags; 417 } 418 } 419 420 r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, 421 value, flags); 422 if (r) 423 goto exit; 424 425 r = vm->update_funcs->commit(¶ms, NULL); 426 exit: 427 drm_dev_exit(idx); 428 return r; 429 } 430 431 /** 432 * amdgpu_vm_pt_create - create bo for PD/PT 433 * 434 * @adev: amdgpu_device pointer 435 * @vm: requesting vm 436 * @level: the page table level 437 * @immediate: use a immediate update 438 * @vmbo: pointer to the buffer object pointer 439 * @xcp_id: GPU partition id 440 */ 441 int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, 442 int level, bool immediate, struct amdgpu_bo_vm **vmbo, 443 int32_t xcp_id) 444 { 445 struct amdgpu_bo_param bp; 446 unsigned int num_entries; 447 448 memset(&bp, 0, sizeof(bp)); 449 450 bp.size = amdgpu_vm_pt_size(adev, level); 451 bp.byte_align = AMDGPU_GPU_PAGE_SIZE; 452 453 if (!adev->gmc.is_app_apu) 454 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 455 else 456 bp.domain = AMDGPU_GEM_DOMAIN_GTT; 457 458 bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); 459 bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 460 AMDGPU_GEM_CREATE_CPU_GTT_USWC; 461 462 if (level < AMDGPU_VM_PTB) 463 num_entries = amdgpu_vm_pt_num_entries(adev, level); 464 else 465 num_entries = 0; 466 467 bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); 468 469 if (vm->use_cpu_for_update) 470 bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 471 472 bp.type = ttm_bo_type_kernel; 473 bp.no_wait_gpu = immediate; 474 bp.xcp_id_plus1 = xcp_id + 1; 475 476 if (vm->root.bo) 477 bp.resv = vm->root.bo->tbo.base.resv; 478 479 return amdgpu_bo_create_vm(adev, &bp, vmbo); 480 } 481 482 /** 483 * amdgpu_vm_pt_alloc - Allocate a specific page table 484 * 485 * @adev: amdgpu_device pointer 486 * @vm: VM to allocate page tables for 487 * @cursor: Which page table to allocate 488 * @immediate: use an immediate update 489 * 490 * Make sure a specific page table or directory is allocated. 491 * 492 * Returns: 493 * 1 if page table needed to be allocated, 0 if page table was already 494 * allocated, negative errno if an error occurred. 495 */ 496 static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, 497 struct amdgpu_vm *vm, 498 struct amdgpu_vm_pt_cursor *cursor, 499 bool immediate) 500 { 501 struct amdgpu_vm_bo_base *entry = cursor->entry; 502 struct amdgpu_bo *pt_bo; 503 struct amdgpu_bo_vm *pt; 504 int r; 505 506 if (entry->bo) 507 return 0; 508 509 amdgpu_vm_eviction_unlock(vm); 510 r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt, 511 vm->root.bo->xcp_id); 512 amdgpu_vm_eviction_lock(vm); 513 if (r) 514 return r; 515 516 /* Keep a reference to the root directory to avoid 517 * freeing them up in the wrong order. 518 */ 519 pt_bo = &pt->bo; 520 pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); 521 amdgpu_vm_bo_base_init(entry, vm, pt_bo); 522 r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); 523 if (r) 524 goto error_free_pt; 525 526 return 0; 527 528 error_free_pt: 529 amdgpu_bo_unref(&pt_bo); 530 return r; 531 } 532 533 /** 534 * amdgpu_vm_pt_free - free one PD/PT 535 * 536 * @entry: PDE to free 537 */ 538 static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) 539 { 540 if (!entry->bo) 541 return; 542 543 amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1); 544 entry->bo->vm_bo = NULL; 545 ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); 546 547 spin_lock(&entry->vm->status_lock); 548 list_del(&entry->vm_status); 549 spin_unlock(&entry->vm->status_lock); 550 amdgpu_bo_unref(&entry->bo); 551 } 552 553 /** 554 * amdgpu_vm_pt_free_list - free PD/PT levels 555 * 556 * @adev: amdgpu device structure 557 * @params: see amdgpu_vm_update_params definition 558 * 559 * Free the page directory objects saved in the flush list 560 */ 561 void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, 562 struct amdgpu_vm_update_params *params) 563 { 564 struct amdgpu_vm_bo_base *entry, *next; 565 bool unlocked = params->unlocked; 566 567 if (list_empty(¶ms->tlb_flush_waitlist)) 568 return; 569 570 /* 571 * unlocked unmap clear page table leaves, warning to free the page entry. 572 */ 573 WARN_ON(unlocked); 574 575 list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) 576 amdgpu_vm_pt_free(entry); 577 } 578 579 /** 580 * amdgpu_vm_pt_add_list - add PD/PT level to the flush list 581 * 582 * @params: parameters for the update 583 * @cursor: first PT entry to start DF search from, non NULL 584 * 585 * This list will be freed after TLB flush. 586 */ 587 static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, 588 struct amdgpu_vm_pt_cursor *cursor) 589 { 590 struct amdgpu_vm_pt_cursor seek; 591 struct amdgpu_vm_bo_base *entry; 592 593 spin_lock(¶ms->vm->status_lock); 594 for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { 595 if (entry && entry->bo) 596 list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); 597 } 598 599 /* enter start node now */ 600 list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); 601 spin_unlock(¶ms->vm->status_lock); 602 } 603 604 /** 605 * amdgpu_vm_pt_free_root - free root PD 606 * @adev: amdgpu device structure 607 * @vm: amdgpu vm structure 608 * 609 * Free the root page directory and everything below it. 610 */ 611 void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) 612 { 613 struct amdgpu_vm_pt_cursor cursor; 614 struct amdgpu_vm_bo_base *entry; 615 616 for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { 617 if (entry) 618 amdgpu_vm_pt_free(entry); 619 } 620 } 621 622 /** 623 * amdgpu_vm_pde_update - update a single level in the hierarchy 624 * 625 * @params: parameters for the update 626 * @entry: entry to update 627 * 628 * Makes sure the requested entry in parent is up to date. 629 */ 630 int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, 631 struct amdgpu_vm_bo_base *entry) 632 { 633 struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); 634 struct amdgpu_bo *bo, *pbo; 635 struct amdgpu_vm *vm = params->vm; 636 uint64_t pde, pt, flags; 637 unsigned int level; 638 639 if (WARN_ON(!parent)) 640 return -EINVAL; 641 642 bo = parent->bo; 643 for (level = 0, pbo = bo->parent; pbo; ++level) 644 pbo = pbo->parent; 645 646 level += params->adev->vm_manager.root_level; 647 amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); 648 pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; 649 return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, 650 1, 0, flags); 651 } 652 653 /** 654 * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags 655 * 656 * @adev: amdgpu_device pointer 657 * @flags: pointer to PTE flags 658 * 659 * Update PTE no-retry flags when TF is enabled. 660 */ 661 static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev, 662 uint64_t *flags) 663 { 664 /* 665 * Update no-retry flags with the corresponding TF 666 * no-retry combination. 667 */ 668 if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) { 669 *flags &= ~AMDGPU_VM_NORETRY_FLAGS; 670 *flags |= adev->gmc.noretry_flags; 671 } 672 } 673 674 /* 675 * amdgpu_vm_pte_update_flags - figure out flags for PTE updates 676 * 677 * Make sure to set the right flags for the PTEs at the desired level. 678 */ 679 static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, 680 struct amdgpu_bo_vm *pt, 681 unsigned int level, 682 uint64_t pe, uint64_t addr, 683 unsigned int count, uint32_t incr, 684 uint64_t flags) 685 { 686 struct amdgpu_device *adev = params->adev; 687 688 if (level != AMDGPU_VM_PTB) { 689 flags |= AMDGPU_PDE_PTE_FLAG(params->adev); 690 amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); 691 692 } else if (adev->asic_type >= CHIP_VEGA10 && 693 !(flags & AMDGPU_PTE_VALID) && 694 !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) { 695 696 /* Workaround for fault priority problem on GMC9 */ 697 flags |= AMDGPU_PTE_EXECUTABLE; 698 } 699 700 /* 701 * Update no-retry flags to use the no-retry flag combination 702 * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination 703 * does not work when TF is enabled. So, replace them with 704 * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for 705 * all cases. 706 */ 707 if (level == AMDGPU_VM_PTB) 708 amdgpu_vm_pte_update_noretry_flags(adev, &flags); 709 710 /* APUs mapping system memory may need different MTYPEs on different 711 * NUMA nodes. Only do this for contiguous ranges that can be assumed 712 * to be on the same NUMA node. 713 */ 714 if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && 715 adev->gmc.gmc_funcs->override_vm_pte_flags && 716 num_possible_nodes() > 1 && !params->pages_addr && params->allow_override) 717 amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags); 718 719 params->vm->update_funcs->update(params, pt, pe, addr, count, incr, 720 flags); 721 } 722 723 /** 724 * amdgpu_vm_pte_fragment - get fragment for PTEs 725 * 726 * @params: see amdgpu_vm_update_params definition 727 * @start: first PTE to handle 728 * @end: last PTE to handle 729 * @flags: hw mapping flags 730 * @frag: resulting fragment size 731 * @frag_end: end of this fragment 732 * 733 * Returns the first possible fragment for the start and end address. 734 */ 735 static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, 736 uint64_t start, uint64_t end, uint64_t flags, 737 unsigned int *frag, uint64_t *frag_end) 738 { 739 /** 740 * The MC L1 TLB supports variable sized pages, based on a fragment 741 * field in the PTE. When this field is set to a non-zero value, page 742 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 743 * flags are considered valid for all PTEs within the fragment range 744 * and corresponding mappings are assumed to be physically contiguous. 745 * 746 * The L1 TLB can store a single PTE for the whole fragment, 747 * significantly increasing the space available for translation 748 * caching. This leads to large improvements in throughput when the 749 * TLB is under pressure. 750 * 751 * The L2 TLB distributes small and large fragments into two 752 * asymmetric partitions. The large fragment cache is significantly 753 * larger. Thus, we try to use large fragments wherever possible. 754 * Userspace can support this by aligning virtual base address and 755 * allocation size to the fragment size. 756 * 757 * Starting with Vega10 the fragment size only controls the L1. The L2 758 * is now directly feed with small/huge/giant pages from the walker. 759 */ 760 unsigned int max_frag; 761 762 if (params->adev->asic_type < CHIP_VEGA10) 763 max_frag = params->adev->vm_manager.fragment_size; 764 else 765 max_frag = 31; 766 767 /* system pages are non continuously */ 768 if (params->pages_addr) { 769 *frag = 0; 770 *frag_end = end; 771 return; 772 } 773 774 /* This intentionally wraps around if no bit is set */ 775 *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); 776 if (*frag >= max_frag) { 777 *frag = max_frag; 778 *frag_end = end & ~((1ULL << max_frag) - 1); 779 } else { 780 *frag_end = start + (1 << *frag); 781 } 782 } 783 784 /** 785 * amdgpu_vm_ptes_update - make sure that page tables are valid 786 * 787 * @params: see amdgpu_vm_update_params definition 788 * @start: start of GPU address range 789 * @end: end of GPU address range 790 * @dst: destination address to map to, the next dst inside the function 791 * @flags: mapping flags 792 * 793 * Update the page tables in the range @start - @end. 794 * 795 * Returns: 796 * 0 for success, -EINVAL for failure. 797 */ 798 int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, 799 uint64_t start, uint64_t end, 800 uint64_t dst, uint64_t flags) 801 { 802 struct amdgpu_device *adev = params->adev; 803 struct amdgpu_vm_pt_cursor cursor; 804 uint64_t frag_start = start, frag_end; 805 unsigned int frag; 806 int r; 807 808 /* figure out the initial fragment */ 809 amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, 810 &frag_end); 811 812 /* walk over the address space and update the PTs */ 813 amdgpu_vm_pt_start(adev, params->vm, start, &cursor); 814 while (cursor.pfn < end) { 815 unsigned int shift, parent_shift, mask; 816 uint64_t incr, entry_end, pe_start; 817 struct amdgpu_bo *pt; 818 819 if (!params->unlocked) { 820 /* make sure that the page tables covering the 821 * address range are actually allocated 822 */ 823 r = amdgpu_vm_pt_alloc(params->adev, params->vm, 824 &cursor, params->immediate); 825 if (r) 826 return r; 827 } 828 829 shift = amdgpu_vm_pt_level_shift(adev, cursor.level); 830 parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); 831 if (params->unlocked) { 832 /* Unlocked updates are only allowed on the leaves */ 833 if (amdgpu_vm_pt_descendant(adev, &cursor)) 834 continue; 835 } else if (adev->asic_type < CHIP_VEGA10 && 836 (flags & AMDGPU_PTE_VALID)) { 837 /* No huge page support before GMC v9 */ 838 if (cursor.level != AMDGPU_VM_PTB) { 839 if (!amdgpu_vm_pt_descendant(adev, &cursor)) 840 return -ENOENT; 841 continue; 842 } 843 } else if (frag < shift) { 844 /* We can't use this level when the fragment size is 845 * smaller than the address shift. Go to the next 846 * child entry and try again. 847 */ 848 if (amdgpu_vm_pt_descendant(adev, &cursor)) 849 continue; 850 } else if (frag >= parent_shift) { 851 /* If the fragment size is even larger than the parent 852 * shift we should go up one level and check it again. 853 */ 854 if (!amdgpu_vm_pt_ancestor(&cursor)) 855 return -EINVAL; 856 continue; 857 } 858 859 pt = cursor.entry->bo; 860 if (!pt) { 861 /* We need all PDs and PTs for mapping something, */ 862 if (flags & AMDGPU_PTE_VALID) 863 return -ENOENT; 864 865 /* but unmapping something can happen at a higher 866 * level. 867 */ 868 if (!amdgpu_vm_pt_ancestor(&cursor)) 869 return -EINVAL; 870 871 pt = cursor.entry->bo; 872 shift = parent_shift; 873 frag_end = max(frag_end, ALIGN(frag_start + 1, 874 1ULL << shift)); 875 } 876 877 /* Looks good so far, calculate parameters for the update */ 878 incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; 879 mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); 880 pe_start = ((cursor.pfn >> shift) & mask) * 8; 881 882 if (cursor.level < AMDGPU_VM_PTB && params->unlocked) 883 /* 884 * MMU notifier callback unlocked unmap huge page, leave is PDE entry, 885 * only clear one entry. Next entry search again for PDE or PTE leave. 886 */ 887 entry_end = 1ULL << shift; 888 else 889 entry_end = ((uint64_t)mask + 1) << shift; 890 entry_end += cursor.pfn & ~(entry_end - 1); 891 entry_end = min(entry_end, end); 892 893 do { 894 struct amdgpu_vm *vm = params->vm; 895 uint64_t upd_end = min(entry_end, frag_end); 896 unsigned int nptes = (upd_end - frag_start) >> shift; 897 uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); 898 899 /* This can happen when we set higher level PDs to 900 * silent to stop fault floods. 901 */ 902 nptes = max(nptes, 1u); 903 904 trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, 905 min(nptes, 32u), dst, incr, 906 upd_flags, 907 vm->task_info ? vm->task_info->tgid : 0, 908 vm->immediate.fence_context); 909 amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), 910 cursor.level, pe_start, dst, 911 nptes, incr, upd_flags); 912 913 pe_start += nptes * 8; 914 dst += nptes * incr; 915 916 frag_start = upd_end; 917 if (frag_start >= frag_end) { 918 /* figure out the next fragment */ 919 amdgpu_vm_pte_fragment(params, frag_start, end, 920 flags, &frag, &frag_end); 921 if (frag < shift) 922 break; 923 } 924 } while (frag_start < entry_end); 925 926 if (amdgpu_vm_pt_descendant(adev, &cursor)) { 927 /* Free all child entries. 928 * Update the tables with the flags and addresses and free up subsequent 929 * tables in the case of huge pages or freed up areas. 930 * This is the maximum you can free, because all other page tables are not 931 * completely covered by the range and so potentially still in use. 932 */ 933 while (cursor.pfn < frag_start) { 934 /* Make sure previous mapping is freed */ 935 if (cursor.entry->bo) { 936 params->needs_flush = true; 937 amdgpu_vm_pt_add_list(params, &cursor); 938 } 939 amdgpu_vm_pt_next(adev, &cursor); 940 } 941 942 } else if (frag >= shift) { 943 /* or just move on to the next on the same level. */ 944 amdgpu_vm_pt_next(adev, &cursor); 945 } 946 } 947 948 return 0; 949 } 950 951 /** 952 * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible 953 * @adev: amdgpu device structure 954 * @vm: amdgpu vm structure 955 * 956 * make root page directory and everything below it cpu accessible. 957 */ 958 int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm) 959 { 960 struct amdgpu_vm_pt_cursor cursor; 961 struct amdgpu_vm_bo_base *entry; 962 963 for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { 964 965 struct amdgpu_bo_vm *bo; 966 int r; 967 968 if (entry->bo) { 969 bo = to_amdgpu_bo_vm(entry->bo); 970 r = vm->update_funcs->map_table(bo); 971 if (r) 972 return r; 973 } 974 } 975 976 return 0; 977 } 978