1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include "xe_bo.h" 7 #include "xe_gt_tlb_invalidation.h" 8 #include "xe_migrate.h" 9 #include "xe_module.h" 10 #include "xe_pt.h" 11 #include "xe_svm.h" 12 #include "xe_ttm_vram_mgr.h" 13 #include "xe_vm.h" 14 #include "xe_vm_types.h" 15 16 static bool xe_svm_range_in_vram(struct xe_svm_range *range) 17 { 18 /* Not reliable without notifier lock */ 19 return range->base.flags.has_devmem_pages; 20 } 21 22 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) 23 { 24 /* Not reliable without notifier lock */ 25 return xe_svm_range_in_vram(range) && range->tile_present; 26 } 27 28 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm) 29 { 30 return container_of(gpusvm, struct xe_vm, svm.gpusvm); 31 } 32 33 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) 34 { 35 return gpusvm_to_vm(r->gpusvm); 36 } 37 38 static unsigned long xe_svm_range_start(struct xe_svm_range *range) 39 { 40 return drm_gpusvm_range_start(&range->base); 41 } 42 43 static unsigned long xe_svm_range_end(struct xe_svm_range *range) 44 { 45 return drm_gpusvm_range_end(&range->base); 46 } 47 48 static unsigned long xe_svm_range_size(struct xe_svm_range *range) 49 { 50 return drm_gpusvm_range_size(&range->base); 51 } 52 53 #define range_debug(r__, operaton__) \ 54 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 55 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ 56 "start=0x%014lx, end=0x%014lx, size=%lu", \ 57 (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ 58 (r__)->base.gpusvm, \ 59 xe_svm_range_in_vram((r__)) ? 1 : 0, \ 60 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ 61 (r__)->base.notifier_seq, \ 62 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ 63 xe_svm_range_size((r__))) 64 65 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) 66 { 67 range_debug(range, operation); 68 } 69 70 static void *xe_svm_devm_owner(struct xe_device *xe) 71 { 72 return xe; 73 } 74 75 static struct drm_gpusvm_range * 76 xe_svm_range_alloc(struct drm_gpusvm *gpusvm) 77 { 78 struct xe_svm_range *range; 79 80 range = kzalloc(sizeof(*range), GFP_KERNEL); 81 if (!range) 82 return NULL; 83 84 INIT_LIST_HEAD(&range->garbage_collector_link); 85 xe_vm_get(gpusvm_to_vm(gpusvm)); 86 87 return &range->base; 88 } 89 90 static void xe_svm_range_free(struct drm_gpusvm_range *range) 91 { 92 xe_vm_put(range_to_vm(range)); 93 kfree(range); 94 } 95 96 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) 97 { 98 return container_of(r, struct xe_svm_range, base); 99 } 100 101 static void 102 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, 103 const struct mmu_notifier_range *mmu_range) 104 { 105 struct xe_device *xe = vm->xe; 106 107 range_debug(range, "GARBAGE COLLECTOR ADD"); 108 109 drm_gpusvm_range_set_unmapped(&range->base, mmu_range); 110 111 spin_lock(&vm->svm.garbage_collector.lock); 112 if (list_empty(&range->garbage_collector_link)) 113 list_add_tail(&range->garbage_collector_link, 114 &vm->svm.garbage_collector.range_list); 115 spin_unlock(&vm->svm.garbage_collector.lock); 116 117 queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq, 118 &vm->svm.garbage_collector.work); 119 } 120 121 static u8 122 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, 123 const struct mmu_notifier_range *mmu_range, 124 u64 *adj_start, u64 *adj_end) 125 { 126 struct xe_svm_range *range = to_xe_range(r); 127 struct xe_device *xe = vm->xe; 128 struct xe_tile *tile; 129 u8 tile_mask = 0; 130 u8 id; 131 132 xe_svm_assert_in_notifier(vm); 133 134 range_debug(range, "NOTIFIER"); 135 136 /* Skip if already unmapped or if no binding exist */ 137 if (range->base.flags.unmapped || !range->tile_present) 138 return 0; 139 140 range_debug(range, "NOTIFIER - EXECUTE"); 141 142 /* Adjust invalidation to range boundaries */ 143 *adj_start = min(xe_svm_range_start(range), mmu_range->start); 144 *adj_end = max(xe_svm_range_end(range), mmu_range->end); 145 146 /* 147 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the 148 * invalidation code can't correctly cope with sparse ranges or 149 * invalidations spanning multiple ranges. 150 */ 151 for_each_tile(tile, xe, id) 152 if (xe_pt_zap_ptes_range(tile, vm, range)) { 153 tile_mask |= BIT(id); 154 range->tile_invalidated |= BIT(id); 155 } 156 157 return tile_mask; 158 } 159 160 static void 161 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, 162 const struct mmu_notifier_range *mmu_range) 163 { 164 struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 165 166 xe_svm_assert_in_notifier(vm); 167 168 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); 169 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP) 170 xe_svm_garbage_collector_add_range(vm, to_xe_range(r), 171 mmu_range); 172 } 173 174 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, 175 struct drm_gpusvm_notifier *notifier, 176 const struct mmu_notifier_range *mmu_range) 177 { 178 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 179 struct xe_device *xe = vm->xe; 180 struct xe_tile *tile; 181 struct drm_gpusvm_range *r, *first; 182 struct xe_gt_tlb_invalidation_fence 183 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 184 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 185 u8 tile_mask = 0; 186 u8 id; 187 u32 fence_id = 0; 188 long err; 189 190 xe_svm_assert_in_notifier(vm); 191 192 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm, 193 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d", 194 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq, 195 mmu_range->start, mmu_range->end, mmu_range->event); 196 197 /* Adjust invalidation to notifier boundaries */ 198 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start); 199 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); 200 201 first = drm_gpusvm_range_find(notifier, adj_start, adj_end); 202 if (!first) 203 return; 204 205 /* 206 * PTs may be getting destroyed so not safe to touch these but PT should 207 * be invalidated at this point in time. Regardless we still need to 208 * ensure any dma mappings are unmapped in the here. 209 */ 210 if (xe_vm_is_closed(vm)) 211 goto range_notifier_event_end; 212 213 /* 214 * XXX: Less than ideal to always wait on VM's resv slots if an 215 * invalidation is not required. Could walk range list twice to figure 216 * out if an invalidations is need, but also not ideal. 217 */ 218 err = dma_resv_wait_timeout(xe_vm_resv(vm), 219 DMA_RESV_USAGE_BOOKKEEP, 220 false, MAX_SCHEDULE_TIMEOUT); 221 XE_WARN_ON(err <= 0); 222 223 r = first; 224 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 225 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range, 226 &adj_start, 227 &adj_end); 228 if (!tile_mask) 229 goto range_notifier_event_end; 230 231 xe_device_wmb(xe); 232 233 for_each_tile(tile, xe, id) { 234 if (tile_mask & BIT(id)) { 235 int err; 236 237 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 238 &fence[fence_id], true); 239 240 err = xe_gt_tlb_invalidation_range(tile->primary_gt, 241 &fence[fence_id], 242 adj_start, 243 adj_end, 244 vm->usm.asid); 245 if (WARN_ON_ONCE(err < 0)) 246 goto wait; 247 ++fence_id; 248 249 if (!tile->media_gt) 250 continue; 251 252 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 253 &fence[fence_id], true); 254 255 err = xe_gt_tlb_invalidation_range(tile->media_gt, 256 &fence[fence_id], 257 adj_start, 258 adj_end, 259 vm->usm.asid); 260 if (WARN_ON_ONCE(err < 0)) 261 goto wait; 262 ++fence_id; 263 } 264 } 265 266 wait: 267 for (id = 0; id < fence_id; ++id) 268 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 269 270 range_notifier_event_end: 271 r = first; 272 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 273 xe_svm_range_notifier_event_end(vm, r, mmu_range); 274 } 275 276 static int __xe_svm_garbage_collector(struct xe_vm *vm, 277 struct xe_svm_range *range) 278 { 279 struct dma_fence *fence; 280 281 range_debug(range, "GARBAGE COLLECTOR"); 282 283 xe_vm_lock(vm, false); 284 fence = xe_vm_range_unbind(vm, range); 285 xe_vm_unlock(vm); 286 if (IS_ERR(fence)) 287 return PTR_ERR(fence); 288 dma_fence_put(fence); 289 290 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base); 291 292 return 0; 293 } 294 295 static int xe_svm_garbage_collector(struct xe_vm *vm) 296 { 297 struct xe_svm_range *range; 298 int err; 299 300 lockdep_assert_held_write(&vm->lock); 301 302 if (xe_vm_is_closed_or_banned(vm)) 303 return -ENOENT; 304 305 spin_lock(&vm->svm.garbage_collector.lock); 306 for (;;) { 307 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, 308 typeof(*range), 309 garbage_collector_link); 310 if (!range) 311 break; 312 313 list_del(&range->garbage_collector_link); 314 spin_unlock(&vm->svm.garbage_collector.lock); 315 316 err = __xe_svm_garbage_collector(vm, range); 317 if (err) { 318 drm_warn(&vm->xe->drm, 319 "Garbage collection failed: %pe\n", 320 ERR_PTR(err)); 321 xe_vm_kill(vm, true); 322 return err; 323 } 324 325 spin_lock(&vm->svm.garbage_collector.lock); 326 } 327 spin_unlock(&vm->svm.garbage_collector.lock); 328 329 return 0; 330 } 331 332 static void xe_svm_garbage_collector_work_func(struct work_struct *w) 333 { 334 struct xe_vm *vm = container_of(w, struct xe_vm, 335 svm.garbage_collector.work); 336 337 down_write(&vm->lock); 338 xe_svm_garbage_collector(vm); 339 up_write(&vm->lock); 340 } 341 342 static struct xe_vram_region *page_to_vr(struct page *page) 343 { 344 return container_of(page_pgmap(page), struct xe_vram_region, pagemap); 345 } 346 347 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) 348 { 349 return container_of(vr, struct xe_tile, mem.vram); 350 } 351 352 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, 353 struct page *page) 354 { 355 u64 dpa; 356 struct xe_tile *tile = vr_to_tile(vr); 357 u64 pfn = page_to_pfn(page); 358 u64 offset; 359 360 xe_tile_assert(tile, is_device_private_page(page)); 361 xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); 362 363 offset = (pfn << PAGE_SHIFT) - vr->hpa_base; 364 dpa = vr->dpa_base + offset; 365 366 return dpa; 367 } 368 369 enum xe_svm_copy_dir { 370 XE_SVM_COPY_TO_VRAM, 371 XE_SVM_COPY_TO_SRAM, 372 }; 373 374 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, 375 unsigned long npages, const enum xe_svm_copy_dir dir) 376 { 377 struct xe_vram_region *vr = NULL; 378 struct xe_tile *tile; 379 struct dma_fence *fence = NULL; 380 unsigned long i; 381 #define XE_VRAM_ADDR_INVALID ~0x0ull 382 u64 vram_addr = XE_VRAM_ADDR_INVALID; 383 int err = 0, pos = 0; 384 bool sram = dir == XE_SVM_COPY_TO_SRAM; 385 386 /* 387 * This flow is complex: it locates physically contiguous device pages, 388 * derives the starting physical address, and performs a single GPU copy 389 * to for every 8M chunk in a DMA address array. Both device pages and 390 * DMA addresses may be sparsely populated. If either is NULL, a copy is 391 * triggered based on the current search state. The last GPU copy is 392 * waited on to ensure all copies are complete. 393 */ 394 395 for (i = 0; i < npages; ++i) { 396 struct page *spage = pages[i]; 397 struct dma_fence *__fence; 398 u64 __vram_addr; 399 bool match = false, chunk, last; 400 401 #define XE_MIGRATE_CHUNK_SIZE SZ_8M 402 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 403 last = (i + 1) == npages; 404 405 /* No CPU page and no device pages queue'd to copy */ 406 if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) 407 continue; 408 409 if (!vr && spage) { 410 vr = page_to_vr(spage); 411 tile = vr_to_tile(vr); 412 } 413 XE_WARN_ON(spage && page_to_vr(spage) != vr); 414 415 /* 416 * CPU page and device page valid, capture physical address on 417 * first device page, check if physical contiguous on subsequent 418 * device pages. 419 */ 420 if (dma_addr[i] && spage) { 421 __vram_addr = xe_vram_region_page_to_dpa(vr, spage); 422 if (vram_addr == XE_VRAM_ADDR_INVALID) { 423 vram_addr = __vram_addr; 424 pos = i; 425 } 426 427 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; 428 } 429 430 /* 431 * Mismatched physical address, 8M copy chunk, or last page - 432 * trigger a copy. 433 */ 434 if (!match || chunk || last) { 435 /* 436 * Extra page for first copy if last page and matching 437 * physical address. 438 */ 439 int incr = (match && last) ? 1 : 0; 440 441 if (vram_addr != XE_VRAM_ADDR_INVALID) { 442 if (sram) { 443 vm_dbg(&tile->xe->drm, 444 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 445 vram_addr, (u64)dma_addr[pos], i - pos + incr); 446 __fence = xe_migrate_from_vram(tile->migrate, 447 i - pos + incr, 448 vram_addr, 449 dma_addr + pos); 450 } else { 451 vm_dbg(&tile->xe->drm, 452 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 453 (u64)dma_addr[pos], vram_addr, i - pos + incr); 454 __fence = xe_migrate_to_vram(tile->migrate, 455 i - pos + incr, 456 dma_addr + pos, 457 vram_addr); 458 } 459 if (IS_ERR(__fence)) { 460 err = PTR_ERR(__fence); 461 goto err_out; 462 } 463 464 dma_fence_put(fence); 465 fence = __fence; 466 } 467 468 /* Setup physical address of next device page */ 469 if (dma_addr[i] && spage) { 470 vram_addr = __vram_addr; 471 pos = i; 472 } else { 473 vram_addr = XE_VRAM_ADDR_INVALID; 474 } 475 476 /* Extra mismatched device page, copy it */ 477 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { 478 if (sram) { 479 vm_dbg(&tile->xe->drm, 480 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 481 vram_addr, (u64)dma_addr[pos], 1); 482 __fence = xe_migrate_from_vram(tile->migrate, 1, 483 vram_addr, 484 dma_addr + pos); 485 } else { 486 vm_dbg(&tile->xe->drm, 487 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 488 (u64)dma_addr[pos], vram_addr, 1); 489 __fence = xe_migrate_to_vram(tile->migrate, 1, 490 dma_addr + pos, 491 vram_addr); 492 } 493 if (IS_ERR(__fence)) { 494 err = PTR_ERR(__fence); 495 goto err_out; 496 } 497 498 dma_fence_put(fence); 499 fence = __fence; 500 } 501 } 502 } 503 504 err_out: 505 /* Wait for all copies to complete */ 506 if (fence) { 507 dma_fence_wait(fence, false); 508 dma_fence_put(fence); 509 } 510 511 return err; 512 #undef XE_MIGRATE_CHUNK_SIZE 513 #undef XE_VRAM_ADDR_INVALID 514 } 515 516 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, 517 unsigned long npages) 518 { 519 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); 520 } 521 522 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, 523 unsigned long npages) 524 { 525 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); 526 } 527 528 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) 529 { 530 return container_of(devmem_allocation, struct xe_bo, devmem_allocation); 531 } 532 533 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) 534 { 535 struct xe_bo *bo = to_xe_bo(devmem_allocation); 536 537 xe_bo_put_async(bo); 538 } 539 540 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) 541 { 542 return PHYS_PFN(offset + vr->hpa_base); 543 } 544 545 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) 546 { 547 return &tile->mem.vram.ttm.mm; 548 } 549 550 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, 551 unsigned long npages, unsigned long *pfn) 552 { 553 struct xe_bo *bo = to_xe_bo(devmem_allocation); 554 struct ttm_resource *res = bo->ttm.resource; 555 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks; 556 struct drm_buddy_block *block; 557 int j = 0; 558 559 list_for_each_entry(block, blocks, link) { 560 struct xe_vram_region *vr = block->private; 561 struct xe_tile *tile = vr_to_tile(vr); 562 struct drm_buddy *buddy = tile_to_buddy(tile); 563 u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); 564 int i; 565 566 for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) 567 pfn[j++] = block_pfn + i; 568 } 569 570 return 0; 571 } 572 573 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { 574 .devmem_release = xe_svm_devmem_release, 575 .populate_devmem_pfn = xe_svm_populate_devmem_pfn, 576 .copy_to_devmem = xe_svm_copy_to_devmem, 577 .copy_to_ram = xe_svm_copy_to_ram, 578 }; 579 580 static const struct drm_gpusvm_ops gpusvm_ops = { 581 .range_alloc = xe_svm_range_alloc, 582 .range_free = xe_svm_range_free, 583 .invalidate = xe_svm_invalidate, 584 }; 585 586 static const unsigned long fault_chunk_sizes[] = { 587 SZ_2M, 588 SZ_64K, 589 SZ_4K, 590 }; 591 592 /** 593 * xe_svm_init() - SVM initialize 594 * @vm: The VM. 595 * 596 * Initialize SVM state which is embedded within the VM. 597 * 598 * Return: 0 on success, negative error code on error. 599 */ 600 int xe_svm_init(struct xe_vm *vm) 601 { 602 int err; 603 604 spin_lock_init(&vm->svm.garbage_collector.lock); 605 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 606 INIT_WORK(&vm->svm.garbage_collector.work, 607 xe_svm_garbage_collector_work_func); 608 609 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 610 current->mm, xe_svm_devm_owner(vm->xe), 0, 611 vm->size, xe_modparam.svm_notifier_size * SZ_1M, 612 &gpusvm_ops, fault_chunk_sizes, 613 ARRAY_SIZE(fault_chunk_sizes)); 614 if (err) 615 return err; 616 617 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 618 619 return 0; 620 } 621 622 /** 623 * xe_svm_close() - SVM close 624 * @vm: The VM. 625 * 626 * Close SVM state (i.e., stop and flush all SVM actions). 627 */ 628 void xe_svm_close(struct xe_vm *vm) 629 { 630 xe_assert(vm->xe, xe_vm_is_closed(vm)); 631 flush_work(&vm->svm.garbage_collector.work); 632 } 633 634 /** 635 * xe_svm_fini() - SVM finalize 636 * @vm: The VM. 637 * 638 * Finalize SVM state which is embedded within the VM. 639 */ 640 void xe_svm_fini(struct xe_vm *vm) 641 { 642 xe_assert(vm->xe, xe_vm_is_closed(vm)); 643 644 drm_gpusvm_fini(&vm->svm.gpusvm); 645 } 646 647 static bool xe_svm_range_is_valid(struct xe_svm_range *range, 648 struct xe_tile *tile) 649 { 650 return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); 651 } 652 653 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) 654 { 655 return &tile->mem.vram; 656 } 657 658 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 659 struct xe_svm_range *range, 660 const struct drm_gpusvm_ctx *ctx) 661 { 662 struct mm_struct *mm = vm->svm.gpusvm.mm; 663 struct xe_vram_region *vr = tile_to_vr(tile); 664 struct drm_buddy_block *block; 665 struct list_head *blocks; 666 struct xe_bo *bo; 667 ktime_t end = 0; 668 int err; 669 670 range_debug(range, "ALLOCATE VRAM"); 671 672 if (!mmget_not_zero(mm)) 673 return -EFAULT; 674 mmap_read_lock(mm); 675 676 retry: 677 bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, 678 xe_svm_range_size(range), 679 ttm_bo_type_device, 680 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 681 XE_BO_FLAG_CPU_ADDR_MIRROR); 682 if (IS_ERR(bo)) { 683 err = PTR_ERR(bo); 684 if (xe_vm_validate_should_retry(NULL, err, &end)) 685 goto retry; 686 goto unlock; 687 } 688 689 drm_gpusvm_devmem_init(&bo->devmem_allocation, 690 vm->xe->drm.dev, mm, 691 &gpusvm_devmem_ops, 692 &tile->mem.vram.dpagemap, 693 xe_svm_range_size(range)); 694 695 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 696 list_for_each_entry(block, blocks, link) 697 block->private = vr; 698 699 xe_bo_get(bo); 700 err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, 701 &bo->devmem_allocation, ctx); 702 if (err) 703 xe_svm_devmem_release(&bo->devmem_allocation); 704 705 xe_bo_unlock(bo); 706 xe_bo_put(bo); 707 708 unlock: 709 mmap_read_unlock(mm); 710 mmput(mm); 711 712 return err; 713 } 714 715 /** 716 * xe_svm_handle_pagefault() - SVM handle page fault 717 * @vm: The VM. 718 * @vma: The CPU address mirror VMA. 719 * @tile: The tile upon the fault occurred. 720 * @fault_addr: The GPU fault address. 721 * @atomic: The fault atomic access bit. 722 * 723 * Create GPU bindings for a SVM page fault. Optionally migrate to device 724 * memory. 725 * 726 * Return: 0 on success, negative error code on error. 727 */ 728 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 729 struct xe_tile *tile, u64 fault_addr, 730 bool atomic) 731 { 732 struct drm_gpusvm_ctx ctx = { 733 .read_only = xe_vma_read_only(vma), 734 .devmem_possible = IS_DGFX(vm->xe) && 735 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), 736 .check_pages_threshold = IS_DGFX(vm->xe) && 737 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, 738 }; 739 struct xe_svm_range *range; 740 struct drm_gpusvm_range *r; 741 struct drm_exec exec; 742 struct dma_fence *fence; 743 ktime_t end = 0; 744 int err; 745 746 lockdep_assert_held_write(&vm->lock); 747 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 748 749 retry: 750 /* Always process UNMAPs first so view SVM ranges is current */ 751 err = xe_svm_garbage_collector(vm); 752 if (err) 753 return err; 754 755 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, 756 xe_vma_start(vma), xe_vma_end(vma), 757 &ctx); 758 if (IS_ERR(r)) 759 return PTR_ERR(r); 760 761 range = to_xe_range(r); 762 if (xe_svm_range_is_valid(range, tile)) 763 return 0; 764 765 range_debug(range, "PAGE FAULT"); 766 767 /* XXX: Add migration policy, for now migrate range once */ 768 if (!range->skip_migrate && range->base.flags.migrate_devmem && 769 xe_svm_range_size(range) >= SZ_64K) { 770 range->skip_migrate = true; 771 772 err = xe_svm_alloc_vram(vm, tile, range, &ctx); 773 if (err) { 774 drm_dbg(&vm->xe->drm, 775 "VRAM allocation failed, falling back to " 776 "retrying fault, asid=%u, errno=%pe\n", 777 vm->usm.asid, ERR_PTR(err)); 778 goto retry; 779 } 780 } 781 782 range_debug(range, "GET PAGES"); 783 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); 784 /* Corner where CPU mappings have changed */ 785 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { 786 if (err == -EOPNOTSUPP) { 787 range_debug(range, "PAGE FAULT - EVICT PAGES"); 788 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 789 } 790 drm_dbg(&vm->xe->drm, 791 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", 792 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 793 range_debug(range, "PAGE FAULT - RETRY PAGES"); 794 goto retry; 795 } 796 if (err) { 797 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); 798 goto err_out; 799 } 800 801 range_debug(range, "PAGE FAULT - BIND"); 802 803 retry_bind: 804 drm_exec_init(&exec, 0, 0); 805 drm_exec_until_all_locked(&exec) { 806 err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); 807 drm_exec_retry_on_contention(&exec); 808 if (err) { 809 drm_exec_fini(&exec); 810 goto err_out; 811 } 812 813 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 814 if (IS_ERR(fence)) { 815 drm_exec_fini(&exec); 816 err = PTR_ERR(fence); 817 if (err == -EAGAIN) { 818 range_debug(range, "PAGE FAULT - RETRY BIND"); 819 goto retry; 820 } 821 if (xe_vm_validate_should_retry(&exec, err, &end)) 822 goto retry_bind; 823 goto err_out; 824 } 825 } 826 drm_exec_fini(&exec); 827 828 if (xe_modparam.always_migrate_to_vram) 829 range->skip_migrate = false; 830 831 dma_fence_wait(fence, false); 832 dma_fence_put(fence); 833 834 err_out: 835 836 return err; 837 } 838 839 /** 840 * xe_svm_has_mapping() - SVM has mappings 841 * @vm: The VM. 842 * @start: Start address. 843 * @end: End address. 844 * 845 * Check if an address range has SVM mappings. 846 * 847 * Return: True if address range has a SVM mapping, False otherwise 848 */ 849 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) 850 { 851 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end); 852 } 853 854 /** 855 * xe_svm_bo_evict() - SVM evict BO to system memory 856 * @bo: BO to evict 857 * 858 * SVM evict BO to system memory. GPU SVM layer ensures all device pages 859 * are evicted before returning. 860 * 861 * Return: 0 on success standard error code otherwise 862 */ 863 int xe_svm_bo_evict(struct xe_bo *bo) 864 { 865 return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); 866 } 867 868 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 869 static struct drm_pagemap_device_addr 870 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, 871 struct device *dev, 872 struct page *page, 873 unsigned int order, 874 enum dma_data_direction dir) 875 { 876 struct device *pgmap_dev = dpagemap->dev; 877 enum drm_interconnect_protocol prot; 878 dma_addr_t addr; 879 880 if (pgmap_dev == dev) { 881 addr = xe_vram_region_page_to_dpa(page_to_vr(page), page); 882 prot = XE_INTERCONNECT_VRAM; 883 } else { 884 addr = DMA_MAPPING_ERROR; 885 prot = 0; 886 } 887 888 return drm_pagemap_device_addr_encode(addr, prot, order, dir); 889 } 890 891 static const struct drm_pagemap_ops xe_drm_pagemap_ops = { 892 .device_map = xe_drm_pagemap_device_map, 893 }; 894 895 /** 896 * xe_devm_add: Remap and provide memmap backing for device memory 897 * @tile: tile that the memory region belongs to 898 * @vr: vram memory region to remap 899 * 900 * This remap device memory to host physical address space and create 901 * struct page to back device memory 902 * 903 * Return: 0 on success standard error code otherwise 904 */ 905 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 906 { 907 struct xe_device *xe = tile_to_xe(tile); 908 struct device *dev = &to_pci_dev(xe->drm.dev)->dev; 909 struct resource *res; 910 void *addr; 911 int ret; 912 913 res = devm_request_free_mem_region(dev, &iomem_resource, 914 vr->usable_size); 915 if (IS_ERR(res)) { 916 ret = PTR_ERR(res); 917 return ret; 918 } 919 920 vr->pagemap.type = MEMORY_DEVICE_PRIVATE; 921 vr->pagemap.range.start = res->start; 922 vr->pagemap.range.end = res->end; 923 vr->pagemap.nr_range = 1; 924 vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); 925 vr->pagemap.owner = xe_svm_devm_owner(xe); 926 addr = devm_memremap_pages(dev, &vr->pagemap); 927 928 vr->dpagemap.dev = dev; 929 vr->dpagemap.ops = &xe_drm_pagemap_ops; 930 931 if (IS_ERR(addr)) { 932 devm_release_mem_region(dev, res->start, resource_size(res)); 933 ret = PTR_ERR(addr); 934 drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n", 935 tile->id, ERR_PTR(ret)); 936 return ret; 937 } 938 vr->hpa_base = res->start; 939 940 drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n", 941 tile->id, vr->io_start, vr->io_start + vr->usable_size, res); 942 return 0; 943 } 944 #else 945 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 946 { 947 return 0; 948 } 949 #endif 950