1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <drm/drm_drv.h> 25 26 #include "amdgpu.h" 27 #include "amdgpu_trace.h" 28 #include "amdgpu_vm.h" 29 #include "amdgpu_job.h" 30 31 /* 32 * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt 33 */ 34 struct amdgpu_vm_pt_cursor { 35 uint64_t pfn; 36 struct amdgpu_vm_bo_base *parent; 37 struct amdgpu_vm_bo_base *entry; 38 unsigned int level; 39 }; 40 41 /** 42 * amdgpu_vm_pt_level_shift - return the addr shift for each level 43 * 44 * @adev: amdgpu_device pointer 45 * @level: VMPT level 46 * 47 * Returns: 48 * The number of bits the pfn needs to be right shifted for a level. 49 */ 50 static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, 51 unsigned int level) 52 { 53 switch (level) { 54 case AMDGPU_VM_PDB3: 55 case AMDGPU_VM_PDB2: 56 case AMDGPU_VM_PDB1: 57 case AMDGPU_VM_PDB0: 58 return 9 * (AMDGPU_VM_PDB0 - level) + 59 adev->vm_manager.block_size; 60 case AMDGPU_VM_PTB: 61 return 0; 62 default: 63 return ~0; 64 } 65 } 66 67 /** 68 * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT 69 * 70 * @adev: amdgpu_device pointer 71 * @level: VMPT level 72 * 73 * Returns: 74 * The number of entries in a page directory or page table. 75 */ 76 static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, 77 unsigned int level) 78 { 79 unsigned int shift; 80 81 shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); 82 if (level == adev->vm_manager.root_level) 83 /* For the root directory */ 84 return round_up(adev->vm_manager.max_pfn, 1ULL << shift) 85 >> shift; 86 else if (level != AMDGPU_VM_PTB) 87 /* Everything in between */ 88 return 512; 89 90 /* For the page tables on the leaves */ 91 return AMDGPU_VM_PTE_COUNT(adev); 92 } 93 94 /** 95 * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT 96 * 97 * @adev: amdgpu_device pointer 98 * @level: VMPT level 99 * 100 * Returns: 101 * The mask to extract the entry number of a PD/PT from an address. 102 */ 103 static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, 104 unsigned int level) 105 { 106 if (level <= adev->vm_manager.root_level) 107 return 0xffffffff; 108 else if (level != AMDGPU_VM_PTB) 109 return 0x1ff; 110 else 111 return AMDGPU_VM_PTE_COUNT(adev) - 1; 112 } 113 114 /** 115 * amdgpu_vm_pt_size - returns the size of the page table in bytes 116 * 117 * @adev: amdgpu_device pointer 118 * @level: VMPT level 119 * 120 * Returns: 121 * The size of the BO for a page directory or page table in bytes. 122 */ 123 static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, 124 unsigned int level) 125 { 126 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); 127 } 128 129 /** 130 * amdgpu_vm_pt_parent - get the parent page directory 131 * 132 * @pt: child page table 133 * 134 * Helper to get the parent entry for the child page table. NULL if we are at 135 * the root page directory. 136 */ 137 static struct amdgpu_vm_bo_base * 138 amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) 139 { 140 struct amdgpu_bo *parent = pt->bo->parent; 141 142 if (!parent) 143 return NULL; 144 145 return parent->vm_bo; 146 } 147 148 /** 149 * amdgpu_vm_pt_start - start PD/PT walk 150 * 151 * @adev: amdgpu_device pointer 152 * @vm: amdgpu_vm structure 153 * @start: start address of the walk 154 * @cursor: state to initialize 155 * 156 * Initialize a amdgpu_vm_pt_cursor to start a walk. 157 */ 158 static void amdgpu_vm_pt_start(struct amdgpu_device *adev, 159 struct amdgpu_vm *vm, uint64_t start, 160 struct amdgpu_vm_pt_cursor *cursor) 161 { 162 cursor->pfn = start; 163 cursor->parent = NULL; 164 cursor->entry = &vm->root; 165 cursor->level = adev->vm_manager.root_level; 166 } 167 168 /** 169 * amdgpu_vm_pt_descendant - go to child node 170 * 171 * @adev: amdgpu_device pointer 172 * @cursor: current state 173 * 174 * Walk to the child node of the current node. 175 * Returns: 176 * True if the walk was possible, false otherwise. 177 */ 178 static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, 179 struct amdgpu_vm_pt_cursor *cursor) 180 { 181 unsigned int mask, shift, idx; 182 183 if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || 184 !cursor->entry->bo) 185 return false; 186 187 mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); 188 shift = amdgpu_vm_pt_level_shift(adev, cursor->level); 189 190 ++cursor->level; 191 idx = (cursor->pfn >> shift) & mask; 192 cursor->parent = cursor->entry; 193 cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; 194 return true; 195 } 196 197 /** 198 * amdgpu_vm_pt_sibling - go to sibling node 199 * 200 * @adev: amdgpu_device pointer 201 * @cursor: current state 202 * 203 * Walk to the sibling node of the current node. 204 * Returns: 205 * True if the walk was possible, false otherwise. 206 */ 207 static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, 208 struct amdgpu_vm_pt_cursor *cursor) 209 { 210 211 unsigned int shift, num_entries; 212 struct amdgpu_bo_vm *parent; 213 214 /* Root doesn't have a sibling */ 215 if (!cursor->parent) 216 return false; 217 218 /* Go to our parents and see if we got a sibling */ 219 shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); 220 num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); 221 parent = to_amdgpu_bo_vm(cursor->parent->bo); 222 223 if (cursor->entry == &parent->entries[num_entries - 1]) 224 return false; 225 226 cursor->pfn += 1ULL << shift; 227 cursor->pfn &= ~((1ULL << shift) - 1); 228 ++cursor->entry; 229 return true; 230 } 231 232 /** 233 * amdgpu_vm_pt_ancestor - go to parent node 234 * 235 * @cursor: current state 236 * 237 * Walk to the parent node of the current node. 238 * Returns: 239 * True if the walk was possible, false otherwise. 240 */ 241 static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) 242 { 243 if (!cursor->parent) 244 return false; 245 246 --cursor->level; 247 cursor->entry = cursor->parent; 248 cursor->parent = amdgpu_vm_pt_parent(cursor->parent); 249 return true; 250 } 251 252 /** 253 * amdgpu_vm_pt_next - get next PD/PT in hieratchy 254 * 255 * @adev: amdgpu_device pointer 256 * @cursor: current state 257 * 258 * Walk the PD/PT tree to the next node. 259 */ 260 static void amdgpu_vm_pt_next(struct amdgpu_device *adev, 261 struct amdgpu_vm_pt_cursor *cursor) 262 { 263 /* First try a newborn child */ 264 if (amdgpu_vm_pt_descendant(adev, cursor)) 265 return; 266 267 /* If that didn't worked try to find a sibling */ 268 while (!amdgpu_vm_pt_sibling(adev, cursor)) { 269 /* No sibling, go to our parents and grandparents */ 270 if (!amdgpu_vm_pt_ancestor(cursor)) { 271 cursor->pfn = ~0ll; 272 return; 273 } 274 } 275 } 276 277 /** 278 * amdgpu_vm_pt_first_dfs - start a deep first search 279 * 280 * @adev: amdgpu_device structure 281 * @vm: amdgpu_vm structure 282 * @start: optional cursor to start with 283 * @cursor: state to initialize 284 * 285 * Starts a deep first traversal of the PD/PT tree. 286 */ 287 static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, 288 struct amdgpu_vm *vm, 289 struct amdgpu_vm_pt_cursor *start, 290 struct amdgpu_vm_pt_cursor *cursor) 291 { 292 if (start) 293 *cursor = *start; 294 else 295 amdgpu_vm_pt_start(adev, vm, 0, cursor); 296 297 while (amdgpu_vm_pt_descendant(adev, cursor)) 298 ; 299 } 300 301 /** 302 * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue 303 * 304 * @start: starting point for the search 305 * @entry: current entry 306 * 307 * Returns: 308 * True when the search should continue, false otherwise. 309 */ 310 static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, 311 struct amdgpu_vm_bo_base *entry) 312 { 313 return entry && (!start || entry != start->entry); 314 } 315 316 /** 317 * amdgpu_vm_pt_next_dfs - get the next node for a deep first search 318 * 319 * @adev: amdgpu_device structure 320 * @cursor: current state 321 * 322 * Move the cursor to the next node in a deep first search. 323 */ 324 static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, 325 struct amdgpu_vm_pt_cursor *cursor) 326 { 327 if (!cursor->entry) 328 return; 329 330 if (!cursor->parent) 331 cursor->entry = NULL; 332 else if (amdgpu_vm_pt_sibling(adev, cursor)) 333 while (amdgpu_vm_pt_descendant(adev, cursor)) 334 ; 335 else 336 amdgpu_vm_pt_ancestor(cursor); 337 } 338 339 /* 340 * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs 341 */ 342 #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ 343 for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ 344 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ 345 amdgpu_vm_pt_continue_dfs((start), (entry)); \ 346 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) 347 348 /** 349 * amdgpu_vm_pt_clear - initially clear the PDs/PTs 350 * 351 * @adev: amdgpu_device pointer 352 * @vm: VM to clear BO from 353 * @vmbo: BO to clear 354 * @immediate: use an immediate update 355 * 356 * Root PD needs to be reserved when calling this. 357 * 358 * Returns: 359 * 0 on success, errno otherwise. 360 */ 361 int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, 362 struct amdgpu_bo_vm *vmbo, bool immediate) 363 { 364 unsigned int level = adev->vm_manager.root_level; 365 struct ttm_operation_ctx ctx = { true, false }; 366 struct amdgpu_vm_update_params params; 367 struct amdgpu_bo *ancestor = &vmbo->bo; 368 unsigned int entries; 369 struct amdgpu_bo *bo = &vmbo->bo; 370 uint64_t value = 0, flags = 0; 371 uint64_t addr; 372 int r, idx; 373 374 /* Figure out our place in the hierarchy */ 375 if (ancestor->parent) { 376 ++level; 377 while (ancestor->parent->parent) { 378 ++level; 379 ancestor = ancestor->parent; 380 } 381 } 382 383 entries = amdgpu_bo_size(bo) / 8; 384 385 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 386 if (r) 387 return r; 388 389 if (!drm_dev_enter(adev_to_drm(adev), &idx)) 390 return -ENODEV; 391 392 r = vm->update_funcs->map_table(vmbo); 393 if (r) 394 goto exit; 395 396 memset(¶ms, 0, sizeof(params)); 397 params.adev = adev; 398 params.vm = vm; 399 params.immediate = immediate; 400 401 r = vm->update_funcs->prepare(¶ms, NULL, 402 AMDGPU_KERNEL_JOB_ID_VM_PT_CLEAR); 403 if (r) 404 goto exit; 405 406 addr = 0; 407 408 if (adev->asic_type >= CHIP_VEGA10) { 409 if (level != AMDGPU_VM_PTB) { 410 /* Handle leaf PDEs as PTEs */ 411 flags |= AMDGPU_PDE_PTE_FLAG(adev); 412 amdgpu_gmc_get_vm_pde(adev, level, 413 &value, &flags); 414 } else { 415 /* Workaround for fault priority problem on GMC9 */ 416 flags = AMDGPU_PTE_EXECUTABLE | adev->gmc.init_pte_flags; 417 } 418 } 419 420 r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, 421 value, flags); 422 if (r) 423 goto exit; 424 425 r = vm->update_funcs->commit(¶ms, NULL); 426 exit: 427 drm_dev_exit(idx); 428 return r; 429 } 430 431 /** 432 * amdgpu_vm_pt_create - create bo for PD/PT 433 * 434 * @adev: amdgpu_device pointer 435 * @vm: requesting vm 436 * @level: the page table level 437 * @immediate: use a immediate update 438 * @vmbo: pointer to the buffer object pointer 439 * @xcp_id: GPU partition id 440 */ 441 int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, 442 int level, bool immediate, struct amdgpu_bo_vm **vmbo, 443 int32_t xcp_id) 444 { 445 struct amdgpu_bo_param bp; 446 unsigned int num_entries; 447 448 memset(&bp, 0, sizeof(bp)); 449 450 bp.size = amdgpu_vm_pt_size(adev, level); 451 bp.byte_align = AMDGPU_GPU_PAGE_SIZE; 452 453 if (!adev->gmc.is_app_apu) 454 bp.domain = AMDGPU_GEM_DOMAIN_VRAM; 455 else 456 bp.domain = AMDGPU_GEM_DOMAIN_GTT; 457 458 bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); 459 bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 460 AMDGPU_GEM_CREATE_CPU_GTT_USWC; 461 462 if (level < AMDGPU_VM_PTB) 463 num_entries = amdgpu_vm_pt_num_entries(adev, level); 464 else 465 num_entries = 0; 466 467 bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); 468 469 if (vm->use_cpu_for_update) 470 bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 471 472 bp.type = ttm_bo_type_kernel; 473 bp.no_wait_gpu = immediate; 474 bp.xcp_id_plus1 = xcp_id + 1; 475 476 if (vm->root.bo) 477 bp.resv = vm->root.bo->tbo.base.resv; 478 479 return amdgpu_bo_create_vm(adev, &bp, vmbo); 480 } 481 482 /** 483 * amdgpu_vm_pt_alloc - Allocate a specific page table 484 * 485 * @adev: amdgpu_device pointer 486 * @vm: VM to allocate page tables for 487 * @cursor: Which page table to allocate 488 * @immediate: use an immediate update 489 * 490 * Make sure a specific page table or directory is allocated. 491 * 492 * Returns: 493 * 1 if page table needed to be allocated, 0 if page table was already 494 * allocated, negative errno if an error occurred. 495 */ 496 static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, 497 struct amdgpu_vm *vm, 498 struct amdgpu_vm_pt_cursor *cursor, 499 bool immediate) 500 { 501 struct amdgpu_vm_bo_base *entry = cursor->entry; 502 struct amdgpu_bo *pt_bo; 503 struct amdgpu_bo_vm *pt; 504 int r; 505 506 if (entry->bo) 507 return 0; 508 509 amdgpu_vm_eviction_unlock(vm); 510 r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt, 511 vm->root.bo->xcp_id); 512 amdgpu_vm_eviction_lock(vm); 513 if (r) 514 return r; 515 516 /* Keep a reference to the root directory to avoid 517 * freeing them up in the wrong order. 518 */ 519 pt_bo = &pt->bo; 520 pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); 521 amdgpu_vm_bo_base_init(entry, vm, pt_bo); 522 r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); 523 if (r) 524 goto error_free_pt; 525 526 return 0; 527 528 error_free_pt: 529 amdgpu_bo_unref(&pt_bo); 530 return r; 531 } 532 533 /** 534 * amdgpu_vm_pt_free - free one PD/PT 535 * 536 * @entry: PDE to free 537 */ 538 static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) 539 { 540 if (!entry->bo) 541 return; 542 543 amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1); 544 entry->bo->vm_bo = NULL; 545 ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); 546 547 list_del(&entry->vm_status); 548 amdgpu_bo_unref(&entry->bo); 549 } 550 551 /** 552 * amdgpu_vm_pt_free_list - free PD/PT levels 553 * 554 * @adev: amdgpu device structure 555 * @params: see amdgpu_vm_update_params definition 556 * 557 * Free the page directory objects saved in the flush list 558 */ 559 void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, 560 struct amdgpu_vm_update_params *params) 561 { 562 struct amdgpu_vm_bo_base *entry, *next; 563 bool unlocked = params->unlocked; 564 565 if (list_empty(¶ms->tlb_flush_waitlist)) 566 return; 567 568 /* 569 * unlocked unmap clear page table leaves, warning to free the page entry. 570 */ 571 WARN_ON(unlocked); 572 573 list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) 574 amdgpu_vm_pt_free(entry); 575 } 576 577 /** 578 * amdgpu_vm_pt_add_list - add PD/PT level to the flush list 579 * 580 * @params: parameters for the update 581 * @cursor: first PT entry to start DF search from, non NULL 582 * 583 * This list will be freed after TLB flush. 584 */ 585 static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, 586 struct amdgpu_vm_pt_cursor *cursor) 587 { 588 struct amdgpu_vm_pt_cursor seek; 589 struct amdgpu_vm_bo_base *entry; 590 591 for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { 592 if (entry && entry->bo) 593 list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); 594 } 595 596 /* enter start node now */ 597 list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); 598 } 599 600 /** 601 * amdgpu_vm_pt_free_root - free root PD 602 * @adev: amdgpu device structure 603 * @vm: amdgpu vm structure 604 * 605 * Free the root page directory and everything below it. 606 */ 607 void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) 608 { 609 struct amdgpu_vm_pt_cursor cursor; 610 struct amdgpu_vm_bo_base *entry; 611 612 for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { 613 if (entry) 614 amdgpu_vm_pt_free(entry); 615 } 616 } 617 618 /** 619 * amdgpu_vm_pde_update - update a single level in the hierarchy 620 * 621 * @params: parameters for the update 622 * @entry: entry to update 623 * 624 * Makes sure the requested entry in parent is up to date. 625 */ 626 int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, 627 struct amdgpu_vm_bo_base *entry) 628 { 629 struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); 630 struct amdgpu_bo *bo, *pbo; 631 struct amdgpu_vm *vm = params->vm; 632 uint64_t pde, pt, flags; 633 unsigned int level; 634 635 if (WARN_ON(!parent)) 636 return -EINVAL; 637 638 bo = parent->bo; 639 for (level = 0, pbo = bo->parent; pbo; ++level) 640 pbo = pbo->parent; 641 642 level += params->adev->vm_manager.root_level; 643 amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); 644 pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; 645 return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, 646 1, 0, flags); 647 } 648 649 /** 650 * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags 651 * 652 * @adev: amdgpu_device pointer 653 * @flags: pointer to PTE flags 654 * 655 * Update PTE no-retry flags when TF is enabled. 656 */ 657 static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev, 658 uint64_t *flags) 659 { 660 /* 661 * Update no-retry flags with the corresponding TF 662 * no-retry combination. 663 */ 664 if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) { 665 *flags &= ~AMDGPU_VM_NORETRY_FLAGS; 666 *flags |= adev->gmc.noretry_flags; 667 } 668 } 669 670 /* 671 * amdgpu_vm_pte_update_flags - figure out flags for PTE updates 672 * 673 * Make sure to set the right flags for the PTEs at the desired level. 674 */ 675 static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, 676 struct amdgpu_bo_vm *pt, 677 unsigned int level, 678 uint64_t pe, uint64_t addr, 679 unsigned int count, uint32_t incr, 680 uint64_t flags) 681 { 682 struct amdgpu_device *adev = params->adev; 683 684 if (level != AMDGPU_VM_PTB) { 685 flags |= AMDGPU_PDE_PTE_FLAG(params->adev); 686 amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); 687 688 } else if (adev->asic_type >= CHIP_VEGA10 && 689 !(flags & AMDGPU_PTE_VALID) && 690 !(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) { 691 692 /* Workaround for fault priority problem on GMC9 and GFX12, 693 * EXECUTABLE for GMC9 fault priority and init_pte_flags 694 * (e.g. AMDGPU_PTE_IS_PTE on GFX12) 695 */ 696 flags |= AMDGPU_PTE_EXECUTABLE | adev->gmc.init_pte_flags; 697 } 698 699 /* 700 * Update no-retry flags to use the no-retry flag combination 701 * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination 702 * does not work when TF is enabled. So, replace them with 703 * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for 704 * all cases. 705 */ 706 if (level == AMDGPU_VM_PTB) 707 amdgpu_vm_pte_update_noretry_flags(adev, &flags); 708 709 params->vm->update_funcs->update(params, pt, pe, addr, count, incr, 710 flags); 711 } 712 713 /** 714 * amdgpu_vm_pte_fragment - get fragment for PTEs 715 * 716 * @params: see amdgpu_vm_update_params definition 717 * @start: first PTE to handle 718 * @end: last PTE to handle 719 * @flags: hw mapping flags 720 * @frag: resulting fragment size 721 * @frag_end: end of this fragment 722 * 723 * Returns the first possible fragment for the start and end address. 724 */ 725 static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, 726 uint64_t start, uint64_t end, uint64_t flags, 727 unsigned int *frag, uint64_t *frag_end) 728 { 729 /** 730 * The MC L1 TLB supports variable sized pages, based on a fragment 731 * field in the PTE. When this field is set to a non-zero value, page 732 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 733 * flags are considered valid for all PTEs within the fragment range 734 * and corresponding mappings are assumed to be physically contiguous. 735 * 736 * The L1 TLB can store a single PTE for the whole fragment, 737 * significantly increasing the space available for translation 738 * caching. This leads to large improvements in throughput when the 739 * TLB is under pressure. 740 * 741 * The L2 TLB distributes small and large fragments into two 742 * asymmetric partitions. The large fragment cache is significantly 743 * larger. Thus, we try to use large fragments wherever possible. 744 * Userspace can support this by aligning virtual base address and 745 * allocation size to the fragment size. 746 * 747 * Starting with Vega10 the fragment size only controls the L1. The L2 748 * is now directly feed with small/huge/giant pages from the walker. 749 */ 750 unsigned int max_frag; 751 752 if (params->adev->asic_type < CHIP_VEGA10) 753 max_frag = params->adev->vm_manager.fragment_size; 754 else 755 max_frag = 31; 756 757 /* system pages are non continuously */ 758 if (params->pages_addr) { 759 *frag = 0; 760 *frag_end = end; 761 return; 762 } 763 764 /* This intentionally wraps around if no bit is set */ 765 *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); 766 if (*frag >= max_frag) { 767 *frag = max_frag; 768 *frag_end = end & ~((1ULL << max_frag) - 1); 769 } else { 770 *frag_end = start + (1 << *frag); 771 } 772 } 773 774 /** 775 * amdgpu_vm_ptes_update - make sure that page tables are valid 776 * 777 * @params: see amdgpu_vm_update_params definition 778 * @start: start of GPU address range 779 * @end: end of GPU address range 780 * @dst: destination address to map to, the next dst inside the function 781 * @flags: mapping flags 782 * 783 * Update the page tables in the range @start - @end. 784 * 785 * Returns: 786 * 0 for success, -EINVAL for failure. 787 */ 788 int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, 789 uint64_t start, uint64_t end, 790 uint64_t dst, uint64_t flags) 791 { 792 struct amdgpu_device *adev = params->adev; 793 struct amdgpu_vm_pt_cursor cursor; 794 uint64_t frag_start = start, frag_end; 795 unsigned int frag; 796 int r; 797 798 /* figure out the initial fragment */ 799 amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, 800 &frag_end); 801 802 /* walk over the address space and update the PTs */ 803 amdgpu_vm_pt_start(adev, params->vm, start, &cursor); 804 while (cursor.pfn < end) { 805 unsigned int shift, parent_shift, mask; 806 uint64_t incr, entry_end, pe_start; 807 struct amdgpu_bo *pt; 808 809 if (!params->unlocked) { 810 /* make sure that the page tables covering the 811 * address range are actually allocated 812 */ 813 r = amdgpu_vm_pt_alloc(params->adev, params->vm, 814 &cursor, params->immediate); 815 if (r) 816 return r; 817 } 818 819 shift = amdgpu_vm_pt_level_shift(adev, cursor.level); 820 parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); 821 if (params->unlocked) { 822 /* Unlocked updates are only allowed on the leaves */ 823 if (amdgpu_vm_pt_descendant(adev, &cursor)) 824 continue; 825 } else if (adev->asic_type < CHIP_VEGA10 && 826 (flags & AMDGPU_PTE_VALID)) { 827 /* No huge page support before GMC v9 */ 828 if (cursor.level != AMDGPU_VM_PTB) { 829 if (!amdgpu_vm_pt_descendant(adev, &cursor)) 830 return -ENOENT; 831 continue; 832 } 833 } else if (frag < shift) { 834 /* We can't use this level when the fragment size is 835 * smaller than the address shift. Go to the next 836 * child entry and try again. 837 */ 838 if (amdgpu_vm_pt_descendant(adev, &cursor)) 839 continue; 840 } else if (frag >= parent_shift) { 841 /* If the fragment size is even larger than the parent 842 * shift we should go up one level and check it again. 843 */ 844 if (!amdgpu_vm_pt_ancestor(&cursor)) 845 return -EINVAL; 846 continue; 847 } 848 849 pt = cursor.entry->bo; 850 if (!pt) { 851 /* We need all PDs and PTs for mapping something, */ 852 if (flags & AMDGPU_PTE_VALID) 853 return -ENOENT; 854 855 /* but unmapping something can happen at a higher 856 * level. 857 */ 858 if (!amdgpu_vm_pt_ancestor(&cursor)) 859 return -EINVAL; 860 861 pt = cursor.entry->bo; 862 shift = parent_shift; 863 frag_end = max(frag_end, ALIGN(frag_start + 1, 864 1ULL << shift)); 865 } 866 867 /* Looks good so far, calculate parameters for the update */ 868 incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; 869 mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); 870 pe_start = ((cursor.pfn >> shift) & mask) * 8; 871 872 if (cursor.level < AMDGPU_VM_PTB && params->unlocked) 873 /* 874 * MMU notifier callback unlocked unmap huge page, leave is PDE entry, 875 * only clear one entry. Next entry search again for PDE or PTE leave. 876 */ 877 entry_end = 1ULL << shift; 878 else 879 entry_end = ((uint64_t)mask + 1) << shift; 880 entry_end += cursor.pfn & ~(entry_end - 1); 881 entry_end = min(entry_end, end); 882 883 do { 884 struct amdgpu_vm *vm = params->vm; 885 uint64_t upd_end = min(entry_end, frag_end); 886 unsigned int nptes = (upd_end - frag_start) >> shift; 887 uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); 888 889 /* This can happen when we set higher level PDs to 890 * silent to stop fault floods. 891 */ 892 nptes = max(nptes, 1u); 893 894 trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, 895 min(nptes, 32u), dst, incr, 896 upd_flags, 897 vm->task_info ? vm->task_info->tgid : 0, 898 vm->immediate.fence_context); 899 amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), 900 cursor.level, pe_start, dst, 901 nptes, incr, upd_flags); 902 903 pe_start += nptes * 8; 904 dst += nptes * incr; 905 906 frag_start = upd_end; 907 if (frag_start >= frag_end) { 908 /* figure out the next fragment */ 909 amdgpu_vm_pte_fragment(params, frag_start, end, 910 flags, &frag, &frag_end); 911 if (frag < shift) 912 break; 913 } 914 } while (frag_start < entry_end); 915 916 if (amdgpu_vm_pt_descendant(adev, &cursor)) { 917 /* Free all child entries. 918 * Update the tables with the flags and addresses and free up subsequent 919 * tables in the case of huge pages or freed up areas. 920 * This is the maximum you can free, because all other page tables are not 921 * completely covered by the range and so potentially still in use. 922 */ 923 while (cursor.pfn < frag_start) { 924 /* Make sure previous mapping is freed */ 925 if (cursor.entry->bo) { 926 params->needs_flush = true; 927 amdgpu_vm_pt_add_list(params, &cursor); 928 } 929 amdgpu_vm_pt_next(adev, &cursor); 930 } 931 932 } else if (frag >= shift) { 933 /* or just move on to the next on the same level. */ 934 amdgpu_vm_pt_next(adev, &cursor); 935 } 936 } 937 938 return 0; 939 } 940 941 /** 942 * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible 943 * @adev: amdgpu device structure 944 * @vm: amdgpu vm structure 945 * 946 * make root page directory and everything below it cpu accessible. 947 */ 948 int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm) 949 { 950 struct amdgpu_vm_pt_cursor cursor; 951 struct amdgpu_vm_bo_base *entry; 952 953 for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { 954 955 struct amdgpu_bo_vm *bo; 956 int r; 957 958 if (entry->bo) { 959 bo = to_amdgpu_bo_vm(entry->bo); 960 r = vm->update_funcs->map_table(bo); 961 if (r) 962 return r; 963 } 964 } 965 966 return 0; 967 } 968