1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include "xe_bo.h" 7 #include "xe_gt_stats.h" 8 #include "xe_gt_tlb_invalidation.h" 9 #include "xe_migrate.h" 10 #include "xe_module.h" 11 #include "xe_pt.h" 12 #include "xe_svm.h" 13 #include "xe_ttm_vram_mgr.h" 14 #include "xe_vm.h" 15 #include "xe_vm_types.h" 16 17 static bool xe_svm_range_in_vram(struct xe_svm_range *range) 18 { 19 /* Not reliable without notifier lock */ 20 return range->base.flags.has_devmem_pages; 21 } 22 23 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) 24 { 25 /* Not reliable without notifier lock */ 26 return xe_svm_range_in_vram(range) && range->tile_present; 27 } 28 29 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm) 30 { 31 return container_of(gpusvm, struct xe_vm, svm.gpusvm); 32 } 33 34 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) 35 { 36 return gpusvm_to_vm(r->gpusvm); 37 } 38 39 static unsigned long xe_svm_range_start(struct xe_svm_range *range) 40 { 41 return drm_gpusvm_range_start(&range->base); 42 } 43 44 static unsigned long xe_svm_range_end(struct xe_svm_range *range) 45 { 46 return drm_gpusvm_range_end(&range->base); 47 } 48 49 static unsigned long xe_svm_range_size(struct xe_svm_range *range) 50 { 51 return drm_gpusvm_range_size(&range->base); 52 } 53 54 #define range_debug(r__, operaton__) \ 55 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 56 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ 57 "start=0x%014lx, end=0x%014lx, size=%lu", \ 58 (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ 59 (r__)->base.gpusvm, \ 60 xe_svm_range_in_vram((r__)) ? 1 : 0, \ 61 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ 62 (r__)->base.notifier_seq, \ 63 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ 64 xe_svm_range_size((r__))) 65 66 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) 67 { 68 range_debug(range, operation); 69 } 70 71 static void *xe_svm_devm_owner(struct xe_device *xe) 72 { 73 return xe; 74 } 75 76 static struct drm_gpusvm_range * 77 xe_svm_range_alloc(struct drm_gpusvm *gpusvm) 78 { 79 struct xe_svm_range *range; 80 81 range = kzalloc(sizeof(*range), GFP_KERNEL); 82 if (!range) 83 return NULL; 84 85 INIT_LIST_HEAD(&range->garbage_collector_link); 86 xe_vm_get(gpusvm_to_vm(gpusvm)); 87 88 return &range->base; 89 } 90 91 static void xe_svm_range_free(struct drm_gpusvm_range *range) 92 { 93 xe_vm_put(range_to_vm(range)); 94 kfree(range); 95 } 96 97 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) 98 { 99 return container_of(r, struct xe_svm_range, base); 100 } 101 102 static void 103 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, 104 const struct mmu_notifier_range *mmu_range) 105 { 106 struct xe_device *xe = vm->xe; 107 108 range_debug(range, "GARBAGE COLLECTOR ADD"); 109 110 drm_gpusvm_range_set_unmapped(&range->base, mmu_range); 111 112 spin_lock(&vm->svm.garbage_collector.lock); 113 if (list_empty(&range->garbage_collector_link)) 114 list_add_tail(&range->garbage_collector_link, 115 &vm->svm.garbage_collector.range_list); 116 spin_unlock(&vm->svm.garbage_collector.lock); 117 118 queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq, 119 &vm->svm.garbage_collector.work); 120 } 121 122 static u8 123 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, 124 const struct mmu_notifier_range *mmu_range, 125 u64 *adj_start, u64 *adj_end) 126 { 127 struct xe_svm_range *range = to_xe_range(r); 128 struct xe_device *xe = vm->xe; 129 struct xe_tile *tile; 130 u8 tile_mask = 0; 131 u8 id; 132 133 xe_svm_assert_in_notifier(vm); 134 135 range_debug(range, "NOTIFIER"); 136 137 /* Skip if already unmapped or if no binding exist */ 138 if (range->base.flags.unmapped || !range->tile_present) 139 return 0; 140 141 range_debug(range, "NOTIFIER - EXECUTE"); 142 143 /* Adjust invalidation to range boundaries */ 144 *adj_start = min(xe_svm_range_start(range), mmu_range->start); 145 *adj_end = max(xe_svm_range_end(range), mmu_range->end); 146 147 /* 148 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the 149 * invalidation code can't correctly cope with sparse ranges or 150 * invalidations spanning multiple ranges. 151 */ 152 for_each_tile(tile, xe, id) 153 if (xe_pt_zap_ptes_range(tile, vm, range)) { 154 tile_mask |= BIT(id); 155 range->tile_invalidated |= BIT(id); 156 } 157 158 return tile_mask; 159 } 160 161 static void 162 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, 163 const struct mmu_notifier_range *mmu_range) 164 { 165 struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 166 167 xe_svm_assert_in_notifier(vm); 168 169 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); 170 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP) 171 xe_svm_garbage_collector_add_range(vm, to_xe_range(r), 172 mmu_range); 173 } 174 175 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, 176 struct drm_gpusvm_notifier *notifier, 177 const struct mmu_notifier_range *mmu_range) 178 { 179 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 180 struct xe_device *xe = vm->xe; 181 struct xe_tile *tile; 182 struct drm_gpusvm_range *r, *first; 183 struct xe_gt_tlb_invalidation_fence 184 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 185 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 186 u8 tile_mask = 0; 187 u8 id; 188 u32 fence_id = 0; 189 long err; 190 191 xe_svm_assert_in_notifier(vm); 192 193 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm, 194 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d", 195 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq, 196 mmu_range->start, mmu_range->end, mmu_range->event); 197 198 /* Adjust invalidation to notifier boundaries */ 199 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start); 200 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); 201 202 first = drm_gpusvm_range_find(notifier, adj_start, adj_end); 203 if (!first) 204 return; 205 206 /* 207 * PTs may be getting destroyed so not safe to touch these but PT should 208 * be invalidated at this point in time. Regardless we still need to 209 * ensure any dma mappings are unmapped in the here. 210 */ 211 if (xe_vm_is_closed(vm)) 212 goto range_notifier_event_end; 213 214 /* 215 * XXX: Less than ideal to always wait on VM's resv slots if an 216 * invalidation is not required. Could walk range list twice to figure 217 * out if an invalidations is need, but also not ideal. 218 */ 219 err = dma_resv_wait_timeout(xe_vm_resv(vm), 220 DMA_RESV_USAGE_BOOKKEEP, 221 false, MAX_SCHEDULE_TIMEOUT); 222 XE_WARN_ON(err <= 0); 223 224 r = first; 225 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 226 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range, 227 &adj_start, 228 &adj_end); 229 if (!tile_mask) 230 goto range_notifier_event_end; 231 232 xe_device_wmb(xe); 233 234 for_each_tile(tile, xe, id) { 235 if (tile_mask & BIT(id)) { 236 int err; 237 238 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 239 &fence[fence_id], true); 240 241 err = xe_gt_tlb_invalidation_range(tile->primary_gt, 242 &fence[fence_id], 243 adj_start, 244 adj_end, 245 vm->usm.asid); 246 if (WARN_ON_ONCE(err < 0)) 247 goto wait; 248 ++fence_id; 249 250 if (!tile->media_gt) 251 continue; 252 253 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 254 &fence[fence_id], true); 255 256 err = xe_gt_tlb_invalidation_range(tile->media_gt, 257 &fence[fence_id], 258 adj_start, 259 adj_end, 260 vm->usm.asid); 261 if (WARN_ON_ONCE(err < 0)) 262 goto wait; 263 ++fence_id; 264 } 265 } 266 267 wait: 268 for (id = 0; id < fence_id; ++id) 269 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 270 271 range_notifier_event_end: 272 r = first; 273 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 274 xe_svm_range_notifier_event_end(vm, r, mmu_range); 275 } 276 277 static int __xe_svm_garbage_collector(struct xe_vm *vm, 278 struct xe_svm_range *range) 279 { 280 struct dma_fence *fence; 281 282 range_debug(range, "GARBAGE COLLECTOR"); 283 284 xe_vm_lock(vm, false); 285 fence = xe_vm_range_unbind(vm, range); 286 xe_vm_unlock(vm); 287 if (IS_ERR(fence)) 288 return PTR_ERR(fence); 289 dma_fence_put(fence); 290 291 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base); 292 293 return 0; 294 } 295 296 static int xe_svm_garbage_collector(struct xe_vm *vm) 297 { 298 struct xe_svm_range *range; 299 int err; 300 301 lockdep_assert_held_write(&vm->lock); 302 303 if (xe_vm_is_closed_or_banned(vm)) 304 return -ENOENT; 305 306 spin_lock(&vm->svm.garbage_collector.lock); 307 for (;;) { 308 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, 309 typeof(*range), 310 garbage_collector_link); 311 if (!range) 312 break; 313 314 list_del(&range->garbage_collector_link); 315 spin_unlock(&vm->svm.garbage_collector.lock); 316 317 err = __xe_svm_garbage_collector(vm, range); 318 if (err) { 319 drm_warn(&vm->xe->drm, 320 "Garbage collection failed: %pe\n", 321 ERR_PTR(err)); 322 xe_vm_kill(vm, true); 323 return err; 324 } 325 326 spin_lock(&vm->svm.garbage_collector.lock); 327 } 328 spin_unlock(&vm->svm.garbage_collector.lock); 329 330 return 0; 331 } 332 333 static void xe_svm_garbage_collector_work_func(struct work_struct *w) 334 { 335 struct xe_vm *vm = container_of(w, struct xe_vm, 336 svm.garbage_collector.work); 337 338 down_write(&vm->lock); 339 xe_svm_garbage_collector(vm); 340 up_write(&vm->lock); 341 } 342 343 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 344 345 static struct xe_vram_region *page_to_vr(struct page *page) 346 { 347 return container_of(page_pgmap(page), struct xe_vram_region, pagemap); 348 } 349 350 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) 351 { 352 return container_of(vr, struct xe_tile, mem.vram); 353 } 354 355 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, 356 struct page *page) 357 { 358 u64 dpa; 359 struct xe_tile *tile = vr_to_tile(vr); 360 u64 pfn = page_to_pfn(page); 361 u64 offset; 362 363 xe_tile_assert(tile, is_device_private_page(page)); 364 xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); 365 366 offset = (pfn << PAGE_SHIFT) - vr->hpa_base; 367 dpa = vr->dpa_base + offset; 368 369 return dpa; 370 } 371 372 enum xe_svm_copy_dir { 373 XE_SVM_COPY_TO_VRAM, 374 XE_SVM_COPY_TO_SRAM, 375 }; 376 377 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, 378 unsigned long npages, const enum xe_svm_copy_dir dir) 379 { 380 struct xe_vram_region *vr = NULL; 381 struct xe_tile *tile; 382 struct dma_fence *fence = NULL; 383 unsigned long i; 384 #define XE_VRAM_ADDR_INVALID ~0x0ull 385 u64 vram_addr = XE_VRAM_ADDR_INVALID; 386 int err = 0, pos = 0; 387 bool sram = dir == XE_SVM_COPY_TO_SRAM; 388 389 /* 390 * This flow is complex: it locates physically contiguous device pages, 391 * derives the starting physical address, and performs a single GPU copy 392 * to for every 8M chunk in a DMA address array. Both device pages and 393 * DMA addresses may be sparsely populated. If either is NULL, a copy is 394 * triggered based on the current search state. The last GPU copy is 395 * waited on to ensure all copies are complete. 396 */ 397 398 for (i = 0; i < npages; ++i) { 399 struct page *spage = pages[i]; 400 struct dma_fence *__fence; 401 u64 __vram_addr; 402 bool match = false, chunk, last; 403 404 #define XE_MIGRATE_CHUNK_SIZE SZ_8M 405 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 406 last = (i + 1) == npages; 407 408 /* No CPU page and no device pages queue'd to copy */ 409 if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) 410 continue; 411 412 if (!vr && spage) { 413 vr = page_to_vr(spage); 414 tile = vr_to_tile(vr); 415 } 416 XE_WARN_ON(spage && page_to_vr(spage) != vr); 417 418 /* 419 * CPU page and device page valid, capture physical address on 420 * first device page, check if physical contiguous on subsequent 421 * device pages. 422 */ 423 if (dma_addr[i] && spage) { 424 __vram_addr = xe_vram_region_page_to_dpa(vr, spage); 425 if (vram_addr == XE_VRAM_ADDR_INVALID) { 426 vram_addr = __vram_addr; 427 pos = i; 428 } 429 430 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; 431 } 432 433 /* 434 * Mismatched physical address, 8M copy chunk, or last page - 435 * trigger a copy. 436 */ 437 if (!match || chunk || last) { 438 /* 439 * Extra page for first copy if last page and matching 440 * physical address. 441 */ 442 int incr = (match && last) ? 1 : 0; 443 444 if (vram_addr != XE_VRAM_ADDR_INVALID) { 445 if (sram) { 446 vm_dbg(&tile->xe->drm, 447 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 448 vram_addr, (u64)dma_addr[pos], i - pos + incr); 449 __fence = xe_migrate_from_vram(tile->migrate, 450 i - pos + incr, 451 vram_addr, 452 dma_addr + pos); 453 } else { 454 vm_dbg(&tile->xe->drm, 455 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 456 (u64)dma_addr[pos], vram_addr, i - pos + incr); 457 __fence = xe_migrate_to_vram(tile->migrate, 458 i - pos + incr, 459 dma_addr + pos, 460 vram_addr); 461 } 462 if (IS_ERR(__fence)) { 463 err = PTR_ERR(__fence); 464 goto err_out; 465 } 466 467 dma_fence_put(fence); 468 fence = __fence; 469 } 470 471 /* Setup physical address of next device page */ 472 if (dma_addr[i] && spage) { 473 vram_addr = __vram_addr; 474 pos = i; 475 } else { 476 vram_addr = XE_VRAM_ADDR_INVALID; 477 } 478 479 /* Extra mismatched device page, copy it */ 480 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { 481 if (sram) { 482 vm_dbg(&tile->xe->drm, 483 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 484 vram_addr, (u64)dma_addr[pos], 1); 485 __fence = xe_migrate_from_vram(tile->migrate, 1, 486 vram_addr, 487 dma_addr + pos); 488 } else { 489 vm_dbg(&tile->xe->drm, 490 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 491 (u64)dma_addr[pos], vram_addr, 1); 492 __fence = xe_migrate_to_vram(tile->migrate, 1, 493 dma_addr + pos, 494 vram_addr); 495 } 496 if (IS_ERR(__fence)) { 497 err = PTR_ERR(__fence); 498 goto err_out; 499 } 500 501 dma_fence_put(fence); 502 fence = __fence; 503 } 504 } 505 } 506 507 err_out: 508 /* Wait for all copies to complete */ 509 if (fence) { 510 dma_fence_wait(fence, false); 511 dma_fence_put(fence); 512 } 513 514 return err; 515 #undef XE_MIGRATE_CHUNK_SIZE 516 #undef XE_VRAM_ADDR_INVALID 517 } 518 519 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, 520 unsigned long npages) 521 { 522 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); 523 } 524 525 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, 526 unsigned long npages) 527 { 528 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); 529 } 530 531 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) 532 { 533 return container_of(devmem_allocation, struct xe_bo, devmem_allocation); 534 } 535 536 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) 537 { 538 struct xe_bo *bo = to_xe_bo(devmem_allocation); 539 540 xe_bo_put_async(bo); 541 } 542 543 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) 544 { 545 return PHYS_PFN(offset + vr->hpa_base); 546 } 547 548 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) 549 { 550 return &tile->mem.vram.ttm.mm; 551 } 552 553 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, 554 unsigned long npages, unsigned long *pfn) 555 { 556 struct xe_bo *bo = to_xe_bo(devmem_allocation); 557 struct ttm_resource *res = bo->ttm.resource; 558 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks; 559 struct drm_buddy_block *block; 560 int j = 0; 561 562 list_for_each_entry(block, blocks, link) { 563 struct xe_vram_region *vr = block->private; 564 struct xe_tile *tile = vr_to_tile(vr); 565 struct drm_buddy *buddy = tile_to_buddy(tile); 566 u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); 567 int i; 568 569 for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) 570 pfn[j++] = block_pfn + i; 571 } 572 573 return 0; 574 } 575 576 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { 577 .devmem_release = xe_svm_devmem_release, 578 .populate_devmem_pfn = xe_svm_populate_devmem_pfn, 579 .copy_to_devmem = xe_svm_copy_to_devmem, 580 .copy_to_ram = xe_svm_copy_to_ram, 581 }; 582 583 #endif 584 585 static const struct drm_gpusvm_ops gpusvm_ops = { 586 .range_alloc = xe_svm_range_alloc, 587 .range_free = xe_svm_range_free, 588 .invalidate = xe_svm_invalidate, 589 }; 590 591 static const unsigned long fault_chunk_sizes[] = { 592 SZ_2M, 593 SZ_64K, 594 SZ_4K, 595 }; 596 597 /** 598 * xe_svm_init() - SVM initialize 599 * @vm: The VM. 600 * 601 * Initialize SVM state which is embedded within the VM. 602 * 603 * Return: 0 on success, negative error code on error. 604 */ 605 int xe_svm_init(struct xe_vm *vm) 606 { 607 int err; 608 609 spin_lock_init(&vm->svm.garbage_collector.lock); 610 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 611 INIT_WORK(&vm->svm.garbage_collector.work, 612 xe_svm_garbage_collector_work_func); 613 614 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 615 current->mm, xe_svm_devm_owner(vm->xe), 0, 616 vm->size, xe_modparam.svm_notifier_size * SZ_1M, 617 &gpusvm_ops, fault_chunk_sizes, 618 ARRAY_SIZE(fault_chunk_sizes)); 619 if (err) 620 return err; 621 622 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 623 624 return 0; 625 } 626 627 /** 628 * xe_svm_close() - SVM close 629 * @vm: The VM. 630 * 631 * Close SVM state (i.e., stop and flush all SVM actions). 632 */ 633 void xe_svm_close(struct xe_vm *vm) 634 { 635 xe_assert(vm->xe, xe_vm_is_closed(vm)); 636 flush_work(&vm->svm.garbage_collector.work); 637 } 638 639 /** 640 * xe_svm_fini() - SVM finalize 641 * @vm: The VM. 642 * 643 * Finalize SVM state which is embedded within the VM. 644 */ 645 void xe_svm_fini(struct xe_vm *vm) 646 { 647 xe_assert(vm->xe, xe_vm_is_closed(vm)); 648 649 drm_gpusvm_fini(&vm->svm.gpusvm); 650 } 651 652 static bool xe_svm_range_is_valid(struct xe_svm_range *range, 653 struct xe_tile *tile) 654 { 655 return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); 656 } 657 658 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 659 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) 660 { 661 return &tile->mem.vram; 662 } 663 664 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 665 struct xe_svm_range *range, 666 const struct drm_gpusvm_ctx *ctx) 667 { 668 struct mm_struct *mm = vm->svm.gpusvm.mm; 669 struct xe_vram_region *vr = tile_to_vr(tile); 670 struct drm_buddy_block *block; 671 struct list_head *blocks; 672 struct xe_bo *bo; 673 ktime_t end = 0; 674 int err; 675 676 range_debug(range, "ALLOCATE VRAM"); 677 678 if (!mmget_not_zero(mm)) 679 return -EFAULT; 680 mmap_read_lock(mm); 681 682 retry: 683 bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, 684 xe_svm_range_size(range), 685 ttm_bo_type_device, 686 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 687 XE_BO_FLAG_CPU_ADDR_MIRROR); 688 if (IS_ERR(bo)) { 689 err = PTR_ERR(bo); 690 if (xe_vm_validate_should_retry(NULL, err, &end)) 691 goto retry; 692 goto unlock; 693 } 694 695 drm_gpusvm_devmem_init(&bo->devmem_allocation, 696 vm->xe->drm.dev, mm, 697 &gpusvm_devmem_ops, 698 &tile->mem.vram.dpagemap, 699 xe_svm_range_size(range)); 700 701 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 702 list_for_each_entry(block, blocks, link) 703 block->private = vr; 704 705 xe_bo_get(bo); 706 err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, 707 &bo->devmem_allocation, ctx); 708 if (err) 709 xe_svm_devmem_release(&bo->devmem_allocation); 710 711 xe_bo_unlock(bo); 712 xe_bo_put(bo); 713 714 unlock: 715 mmap_read_unlock(mm); 716 mmput(mm); 717 718 return err; 719 } 720 #else 721 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 722 struct xe_svm_range *range, 723 const struct drm_gpusvm_ctx *ctx) 724 { 725 return -EOPNOTSUPP; 726 } 727 #endif 728 729 /** 730 * xe_svm_handle_pagefault() - SVM handle page fault 731 * @vm: The VM. 732 * @vma: The CPU address mirror VMA. 733 * @gt: The gt upon the fault occurred. 734 * @fault_addr: The GPU fault address. 735 * @atomic: The fault atomic access bit. 736 * 737 * Create GPU bindings for a SVM page fault. Optionally migrate to device 738 * memory. 739 * 740 * Return: 0 on success, negative error code on error. 741 */ 742 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 743 struct xe_gt *gt, u64 fault_addr, 744 bool atomic) 745 { 746 struct drm_gpusvm_ctx ctx = { 747 .read_only = xe_vma_read_only(vma), 748 .devmem_possible = IS_DGFX(vm->xe) && 749 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), 750 .check_pages_threshold = IS_DGFX(vm->xe) && 751 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, 752 }; 753 struct xe_svm_range *range; 754 struct drm_gpusvm_range *r; 755 struct drm_exec exec; 756 struct dma_fence *fence; 757 struct xe_tile *tile = gt_to_tile(gt); 758 ktime_t end = 0; 759 int err; 760 761 lockdep_assert_held_write(&vm->lock); 762 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 763 764 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); 765 766 retry: 767 /* Always process UNMAPs first so view SVM ranges is current */ 768 err = xe_svm_garbage_collector(vm); 769 if (err) 770 return err; 771 772 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, 773 xe_vma_start(vma), xe_vma_end(vma), 774 &ctx); 775 if (IS_ERR(r)) 776 return PTR_ERR(r); 777 778 range = to_xe_range(r); 779 if (xe_svm_range_is_valid(range, tile)) 780 return 0; 781 782 range_debug(range, "PAGE FAULT"); 783 784 /* XXX: Add migration policy, for now migrate range once */ 785 if (!range->skip_migrate && range->base.flags.migrate_devmem && 786 xe_svm_range_size(range) >= SZ_64K) { 787 range->skip_migrate = true; 788 789 err = xe_svm_alloc_vram(vm, tile, range, &ctx); 790 if (err) { 791 drm_dbg(&vm->xe->drm, 792 "VRAM allocation failed, falling back to " 793 "retrying fault, asid=%u, errno=%pe\n", 794 vm->usm.asid, ERR_PTR(err)); 795 goto retry; 796 } 797 } 798 799 range_debug(range, "GET PAGES"); 800 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); 801 /* Corner where CPU mappings have changed */ 802 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { 803 if (err == -EOPNOTSUPP) { 804 range_debug(range, "PAGE FAULT - EVICT PAGES"); 805 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 806 } 807 drm_dbg(&vm->xe->drm, 808 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", 809 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 810 range_debug(range, "PAGE FAULT - RETRY PAGES"); 811 goto retry; 812 } 813 if (err) { 814 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); 815 goto err_out; 816 } 817 818 range_debug(range, "PAGE FAULT - BIND"); 819 820 retry_bind: 821 drm_exec_init(&exec, 0, 0); 822 drm_exec_until_all_locked(&exec) { 823 err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); 824 drm_exec_retry_on_contention(&exec); 825 if (err) { 826 drm_exec_fini(&exec); 827 goto err_out; 828 } 829 830 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 831 if (IS_ERR(fence)) { 832 drm_exec_fini(&exec); 833 err = PTR_ERR(fence); 834 if (err == -EAGAIN) { 835 range_debug(range, "PAGE FAULT - RETRY BIND"); 836 goto retry; 837 } 838 if (xe_vm_validate_should_retry(&exec, err, &end)) 839 goto retry_bind; 840 goto err_out; 841 } 842 } 843 drm_exec_fini(&exec); 844 845 if (xe_modparam.always_migrate_to_vram) 846 range->skip_migrate = false; 847 848 dma_fence_wait(fence, false); 849 dma_fence_put(fence); 850 851 err_out: 852 853 return err; 854 } 855 856 /** 857 * xe_svm_has_mapping() - SVM has mappings 858 * @vm: The VM. 859 * @start: Start address. 860 * @end: End address. 861 * 862 * Check if an address range has SVM mappings. 863 * 864 * Return: True if address range has a SVM mapping, False otherwise 865 */ 866 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) 867 { 868 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end); 869 } 870 871 /** 872 * xe_svm_bo_evict() - SVM evict BO to system memory 873 * @bo: BO to evict 874 * 875 * SVM evict BO to system memory. GPU SVM layer ensures all device pages 876 * are evicted before returning. 877 * 878 * Return: 0 on success standard error code otherwise 879 */ 880 int xe_svm_bo_evict(struct xe_bo *bo) 881 { 882 return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); 883 } 884 885 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 886 887 static struct drm_pagemap_device_addr 888 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, 889 struct device *dev, 890 struct page *page, 891 unsigned int order, 892 enum dma_data_direction dir) 893 { 894 struct device *pgmap_dev = dpagemap->dev; 895 enum drm_interconnect_protocol prot; 896 dma_addr_t addr; 897 898 if (pgmap_dev == dev) { 899 addr = xe_vram_region_page_to_dpa(page_to_vr(page), page); 900 prot = XE_INTERCONNECT_VRAM; 901 } else { 902 addr = DMA_MAPPING_ERROR; 903 prot = 0; 904 } 905 906 return drm_pagemap_device_addr_encode(addr, prot, order, dir); 907 } 908 909 static const struct drm_pagemap_ops xe_drm_pagemap_ops = { 910 .device_map = xe_drm_pagemap_device_map, 911 }; 912 913 /** 914 * xe_devm_add: Remap and provide memmap backing for device memory 915 * @tile: tile that the memory region belongs to 916 * @vr: vram memory region to remap 917 * 918 * This remap device memory to host physical address space and create 919 * struct page to back device memory 920 * 921 * Return: 0 on success standard error code otherwise 922 */ 923 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 924 { 925 struct xe_device *xe = tile_to_xe(tile); 926 struct device *dev = &to_pci_dev(xe->drm.dev)->dev; 927 struct resource *res; 928 void *addr; 929 int ret; 930 931 res = devm_request_free_mem_region(dev, &iomem_resource, 932 vr->usable_size); 933 if (IS_ERR(res)) { 934 ret = PTR_ERR(res); 935 return ret; 936 } 937 938 vr->pagemap.type = MEMORY_DEVICE_PRIVATE; 939 vr->pagemap.range.start = res->start; 940 vr->pagemap.range.end = res->end; 941 vr->pagemap.nr_range = 1; 942 vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); 943 vr->pagemap.owner = xe_svm_devm_owner(xe); 944 addr = devm_memremap_pages(dev, &vr->pagemap); 945 946 vr->dpagemap.dev = dev; 947 vr->dpagemap.ops = &xe_drm_pagemap_ops; 948 949 if (IS_ERR(addr)) { 950 devm_release_mem_region(dev, res->start, resource_size(res)); 951 ret = PTR_ERR(addr); 952 drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n", 953 tile->id, ERR_PTR(ret)); 954 return ret; 955 } 956 vr->hpa_base = res->start; 957 958 drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n", 959 tile->id, vr->io_start, vr->io_start + vr->usable_size, res); 960 return 0; 961 } 962 #else 963 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 964 { 965 return 0; 966 } 967 #endif 968 969 /** 970 * xe_svm_flush() - SVM flush 971 * @vm: The VM. 972 * 973 * Flush all SVM actions. 974 */ 975 void xe_svm_flush(struct xe_vm *vm) 976 { 977 if (xe_vm_in_fault_mode(vm)) 978 flush_work(&vm->svm.garbage_collector.work); 979 } 980