1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include "xe_bo.h" 7 #include "xe_gt_stats.h" 8 #include "xe_gt_tlb_invalidation.h" 9 #include "xe_migrate.h" 10 #include "xe_module.h" 11 #include "xe_pt.h" 12 #include "xe_svm.h" 13 #include "xe_ttm_vram_mgr.h" 14 #include "xe_vm.h" 15 #include "xe_vm_types.h" 16 17 static bool xe_svm_range_in_vram(struct xe_svm_range *range) 18 { 19 /* Not reliable without notifier lock */ 20 return range->base.flags.has_devmem_pages; 21 } 22 23 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) 24 { 25 /* Not reliable without notifier lock */ 26 return xe_svm_range_in_vram(range) && range->tile_present; 27 } 28 29 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm) 30 { 31 return container_of(gpusvm, struct xe_vm, svm.gpusvm); 32 } 33 34 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) 35 { 36 return gpusvm_to_vm(r->gpusvm); 37 } 38 39 static unsigned long xe_svm_range_start(struct xe_svm_range *range) 40 { 41 return drm_gpusvm_range_start(&range->base); 42 } 43 44 static unsigned long xe_svm_range_end(struct xe_svm_range *range) 45 { 46 return drm_gpusvm_range_end(&range->base); 47 } 48 49 static unsigned long xe_svm_range_size(struct xe_svm_range *range) 50 { 51 return drm_gpusvm_range_size(&range->base); 52 } 53 54 #define range_debug(r__, operaton__) \ 55 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 56 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ 57 "start=0x%014lx, end=0x%014lx, size=%lu", \ 58 (operaton__), range_to_vm(&(r__)->base)->usm.asid, \ 59 (r__)->base.gpusvm, \ 60 xe_svm_range_in_vram((r__)) ? 1 : 0, \ 61 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ 62 (r__)->base.notifier_seq, \ 63 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ 64 xe_svm_range_size((r__))) 65 66 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) 67 { 68 range_debug(range, operation); 69 } 70 71 static void *xe_svm_devm_owner(struct xe_device *xe) 72 { 73 return xe; 74 } 75 76 static struct drm_gpusvm_range * 77 xe_svm_range_alloc(struct drm_gpusvm *gpusvm) 78 { 79 struct xe_svm_range *range; 80 81 range = kzalloc(sizeof(*range), GFP_KERNEL); 82 if (!range) 83 return ERR_PTR(-ENOMEM); 84 85 INIT_LIST_HEAD(&range->garbage_collector_link); 86 xe_vm_get(gpusvm_to_vm(gpusvm)); 87 88 return &range->base; 89 } 90 91 static void xe_svm_range_free(struct drm_gpusvm_range *range) 92 { 93 xe_vm_put(range_to_vm(range)); 94 kfree(range); 95 } 96 97 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r) 98 { 99 return container_of(r, struct xe_svm_range, base); 100 } 101 102 static void 103 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, 104 const struct mmu_notifier_range *mmu_range) 105 { 106 struct xe_device *xe = vm->xe; 107 108 range_debug(range, "GARBAGE COLLECTOR ADD"); 109 110 drm_gpusvm_range_set_unmapped(&range->base, mmu_range); 111 112 spin_lock(&vm->svm.garbage_collector.lock); 113 if (list_empty(&range->garbage_collector_link)) 114 list_add_tail(&range->garbage_collector_link, 115 &vm->svm.garbage_collector.range_list); 116 spin_unlock(&vm->svm.garbage_collector.lock); 117 118 queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq, 119 &vm->svm.garbage_collector.work); 120 } 121 122 static u8 123 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, 124 const struct mmu_notifier_range *mmu_range, 125 u64 *adj_start, u64 *adj_end) 126 { 127 struct xe_svm_range *range = to_xe_range(r); 128 struct xe_device *xe = vm->xe; 129 struct xe_tile *tile; 130 u8 tile_mask = 0; 131 u8 id; 132 133 xe_svm_assert_in_notifier(vm); 134 135 range_debug(range, "NOTIFIER"); 136 137 /* Skip if already unmapped or if no binding exist */ 138 if (range->base.flags.unmapped || !range->tile_present) 139 return 0; 140 141 range_debug(range, "NOTIFIER - EXECUTE"); 142 143 /* Adjust invalidation to range boundaries */ 144 *adj_start = min(xe_svm_range_start(range), mmu_range->start); 145 *adj_end = max(xe_svm_range_end(range), mmu_range->end); 146 147 /* 148 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the 149 * invalidation code can't correctly cope with sparse ranges or 150 * invalidations spanning multiple ranges. 151 */ 152 for_each_tile(tile, xe, id) 153 if (xe_pt_zap_ptes_range(tile, vm, range)) { 154 tile_mask |= BIT(id); 155 range->tile_invalidated |= BIT(id); 156 } 157 158 return tile_mask; 159 } 160 161 static void 162 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, 163 const struct mmu_notifier_range *mmu_range) 164 { 165 struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 166 167 xe_svm_assert_in_notifier(vm); 168 169 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); 170 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP) 171 xe_svm_garbage_collector_add_range(vm, to_xe_range(r), 172 mmu_range); 173 } 174 175 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, 176 struct drm_gpusvm_notifier *notifier, 177 const struct mmu_notifier_range *mmu_range) 178 { 179 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 180 struct xe_device *xe = vm->xe; 181 struct xe_tile *tile; 182 struct drm_gpusvm_range *r, *first; 183 struct xe_gt_tlb_invalidation_fence 184 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; 185 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 186 u8 tile_mask = 0; 187 u8 id; 188 u32 fence_id = 0; 189 long err; 190 191 xe_svm_assert_in_notifier(vm); 192 193 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm, 194 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d", 195 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq, 196 mmu_range->start, mmu_range->end, mmu_range->event); 197 198 /* Adjust invalidation to notifier boundaries */ 199 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start); 200 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); 201 202 first = drm_gpusvm_range_find(notifier, adj_start, adj_end); 203 if (!first) 204 return; 205 206 /* 207 * PTs may be getting destroyed so not safe to touch these but PT should 208 * be invalidated at this point in time. Regardless we still need to 209 * ensure any dma mappings are unmapped in the here. 210 */ 211 if (xe_vm_is_closed(vm)) 212 goto range_notifier_event_end; 213 214 /* 215 * XXX: Less than ideal to always wait on VM's resv slots if an 216 * invalidation is not required. Could walk range list twice to figure 217 * out if an invalidations is need, but also not ideal. 218 */ 219 err = dma_resv_wait_timeout(xe_vm_resv(vm), 220 DMA_RESV_USAGE_BOOKKEEP, 221 false, MAX_SCHEDULE_TIMEOUT); 222 XE_WARN_ON(err <= 0); 223 224 r = first; 225 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 226 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range, 227 &adj_start, 228 &adj_end); 229 if (!tile_mask) 230 goto range_notifier_event_end; 231 232 xe_device_wmb(xe); 233 234 for_each_tile(tile, xe, id) { 235 if (tile_mask & BIT(id)) { 236 int err; 237 238 xe_gt_tlb_invalidation_fence_init(tile->primary_gt, 239 &fence[fence_id], true); 240 241 err = xe_gt_tlb_invalidation_range(tile->primary_gt, 242 &fence[fence_id], 243 adj_start, 244 adj_end, 245 vm->usm.asid); 246 if (WARN_ON_ONCE(err < 0)) 247 goto wait; 248 ++fence_id; 249 250 if (!tile->media_gt) 251 continue; 252 253 xe_gt_tlb_invalidation_fence_init(tile->media_gt, 254 &fence[fence_id], true); 255 256 err = xe_gt_tlb_invalidation_range(tile->media_gt, 257 &fence[fence_id], 258 adj_start, 259 adj_end, 260 vm->usm.asid); 261 if (WARN_ON_ONCE(err < 0)) 262 goto wait; 263 ++fence_id; 264 } 265 } 266 267 wait: 268 for (id = 0; id < fence_id; ++id) 269 xe_gt_tlb_invalidation_fence_wait(&fence[id]); 270 271 range_notifier_event_end: 272 r = first; 273 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 274 xe_svm_range_notifier_event_end(vm, r, mmu_range); 275 } 276 277 static int __xe_svm_garbage_collector(struct xe_vm *vm, 278 struct xe_svm_range *range) 279 { 280 struct dma_fence *fence; 281 282 range_debug(range, "GARBAGE COLLECTOR"); 283 284 xe_vm_lock(vm, false); 285 fence = xe_vm_range_unbind(vm, range); 286 xe_vm_unlock(vm); 287 if (IS_ERR(fence)) 288 return PTR_ERR(fence); 289 dma_fence_put(fence); 290 291 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base); 292 293 return 0; 294 } 295 296 static int xe_svm_garbage_collector(struct xe_vm *vm) 297 { 298 struct xe_svm_range *range; 299 int err; 300 301 lockdep_assert_held_write(&vm->lock); 302 303 if (xe_vm_is_closed_or_banned(vm)) 304 return -ENOENT; 305 306 spin_lock(&vm->svm.garbage_collector.lock); 307 for (;;) { 308 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, 309 typeof(*range), 310 garbage_collector_link); 311 if (!range) 312 break; 313 314 list_del(&range->garbage_collector_link); 315 spin_unlock(&vm->svm.garbage_collector.lock); 316 317 err = __xe_svm_garbage_collector(vm, range); 318 if (err) { 319 drm_warn(&vm->xe->drm, 320 "Garbage collection failed: %pe\n", 321 ERR_PTR(err)); 322 xe_vm_kill(vm, true); 323 return err; 324 } 325 326 spin_lock(&vm->svm.garbage_collector.lock); 327 } 328 spin_unlock(&vm->svm.garbage_collector.lock); 329 330 return 0; 331 } 332 333 static void xe_svm_garbage_collector_work_func(struct work_struct *w) 334 { 335 struct xe_vm *vm = container_of(w, struct xe_vm, 336 svm.garbage_collector.work); 337 338 down_write(&vm->lock); 339 xe_svm_garbage_collector(vm); 340 up_write(&vm->lock); 341 } 342 343 static struct xe_vram_region *page_to_vr(struct page *page) 344 { 345 return container_of(page->pgmap, struct xe_vram_region, pagemap); 346 } 347 348 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr) 349 { 350 return container_of(vr, struct xe_tile, mem.vram); 351 } 352 353 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr, 354 struct page *page) 355 { 356 u64 dpa; 357 struct xe_tile *tile = vr_to_tile(vr); 358 u64 pfn = page_to_pfn(page); 359 u64 offset; 360 361 xe_tile_assert(tile, is_device_private_page(page)); 362 xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base); 363 364 offset = (pfn << PAGE_SHIFT) - vr->hpa_base; 365 dpa = vr->dpa_base + offset; 366 367 return dpa; 368 } 369 370 enum xe_svm_copy_dir { 371 XE_SVM_COPY_TO_VRAM, 372 XE_SVM_COPY_TO_SRAM, 373 }; 374 375 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr, 376 unsigned long npages, const enum xe_svm_copy_dir dir) 377 { 378 struct xe_vram_region *vr = NULL; 379 struct xe_tile *tile; 380 struct dma_fence *fence = NULL; 381 unsigned long i; 382 #define XE_VRAM_ADDR_INVALID ~0x0ull 383 u64 vram_addr = XE_VRAM_ADDR_INVALID; 384 int err = 0, pos = 0; 385 bool sram = dir == XE_SVM_COPY_TO_SRAM; 386 387 /* 388 * This flow is complex: it locates physically contiguous device pages, 389 * derives the starting physical address, and performs a single GPU copy 390 * to for every 8M chunk in a DMA address array. Both device pages and 391 * DMA addresses may be sparsely populated. If either is NULL, a copy is 392 * triggered based on the current search state. The last GPU copy is 393 * waited on to ensure all copies are complete. 394 */ 395 396 for (i = 0; i < npages; ++i) { 397 struct page *spage = pages[i]; 398 struct dma_fence *__fence; 399 u64 __vram_addr; 400 bool match = false, chunk, last; 401 402 #define XE_MIGRATE_CHUNK_SIZE SZ_8M 403 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 404 last = (i + 1) == npages; 405 406 /* No CPU page and no device pages queue'd to copy */ 407 if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID) 408 continue; 409 410 if (!vr && spage) { 411 vr = page_to_vr(spage); 412 tile = vr_to_tile(vr); 413 } 414 XE_WARN_ON(spage && page_to_vr(spage) != vr); 415 416 /* 417 * CPU page and device page valid, capture physical address on 418 * first device page, check if physical contiguous on subsequent 419 * device pages. 420 */ 421 if (dma_addr[i] && spage) { 422 __vram_addr = xe_vram_region_page_to_dpa(vr, spage); 423 if (vram_addr == XE_VRAM_ADDR_INVALID) { 424 vram_addr = __vram_addr; 425 pos = i; 426 } 427 428 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; 429 } 430 431 /* 432 * Mismatched physical address, 8M copy chunk, or last page - 433 * trigger a copy. 434 */ 435 if (!match || chunk || last) { 436 /* 437 * Extra page for first copy if last page and matching 438 * physical address. 439 */ 440 int incr = (match && last) ? 1 : 0; 441 442 if (vram_addr != XE_VRAM_ADDR_INVALID) { 443 if (sram) { 444 vm_dbg(&tile->xe->drm, 445 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 446 vram_addr, (u64)dma_addr[pos], i - pos + incr); 447 __fence = xe_migrate_from_vram(tile->migrate, 448 i - pos + incr, 449 vram_addr, 450 dma_addr + pos); 451 } else { 452 vm_dbg(&tile->xe->drm, 453 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 454 (u64)dma_addr[pos], vram_addr, i - pos + incr); 455 __fence = xe_migrate_to_vram(tile->migrate, 456 i - pos + incr, 457 dma_addr + pos, 458 vram_addr); 459 } 460 if (IS_ERR(__fence)) { 461 err = PTR_ERR(__fence); 462 goto err_out; 463 } 464 465 dma_fence_put(fence); 466 fence = __fence; 467 } 468 469 /* Setup physical address of next device page */ 470 if (dma_addr[i] && spage) { 471 vram_addr = __vram_addr; 472 pos = i; 473 } else { 474 vram_addr = XE_VRAM_ADDR_INVALID; 475 } 476 477 /* Extra mismatched device page, copy it */ 478 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { 479 if (sram) { 480 vm_dbg(&tile->xe->drm, 481 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 482 vram_addr, (u64)dma_addr[pos], 1); 483 __fence = xe_migrate_from_vram(tile->migrate, 1, 484 vram_addr, 485 dma_addr + pos); 486 } else { 487 vm_dbg(&tile->xe->drm, 488 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 489 (u64)dma_addr[pos], vram_addr, 1); 490 __fence = xe_migrate_to_vram(tile->migrate, 1, 491 dma_addr + pos, 492 vram_addr); 493 } 494 if (IS_ERR(__fence)) { 495 err = PTR_ERR(__fence); 496 goto err_out; 497 } 498 499 dma_fence_put(fence); 500 fence = __fence; 501 } 502 } 503 } 504 505 err_out: 506 /* Wait for all copies to complete */ 507 if (fence) { 508 dma_fence_wait(fence, false); 509 dma_fence_put(fence); 510 } 511 512 return err; 513 #undef XE_MIGRATE_CHUNK_SIZE 514 #undef XE_VRAM_ADDR_INVALID 515 } 516 517 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr, 518 unsigned long npages) 519 { 520 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM); 521 } 522 523 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, 524 unsigned long npages) 525 { 526 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); 527 } 528 529 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) 530 { 531 return container_of(devmem_allocation, struct xe_bo, devmem_allocation); 532 } 533 534 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) 535 { 536 struct xe_bo *bo = to_xe_bo(devmem_allocation); 537 538 xe_bo_put_async(bo); 539 } 540 541 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) 542 { 543 return PHYS_PFN(offset + vr->hpa_base); 544 } 545 546 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) 547 { 548 return &tile->mem.vram.ttm.mm; 549 } 550 551 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, 552 unsigned long npages, unsigned long *pfn) 553 { 554 struct xe_bo *bo = to_xe_bo(devmem_allocation); 555 struct ttm_resource *res = bo->ttm.resource; 556 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks; 557 struct drm_buddy_block *block; 558 int j = 0; 559 560 list_for_each_entry(block, blocks, link) { 561 struct xe_vram_region *vr = block->private; 562 struct xe_tile *tile = vr_to_tile(vr); 563 struct drm_buddy *buddy = tile_to_buddy(tile); 564 u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block)); 565 int i; 566 567 for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) 568 pfn[j++] = block_pfn + i; 569 } 570 571 return 0; 572 } 573 574 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { 575 .devmem_release = xe_svm_devmem_release, 576 .populate_devmem_pfn = xe_svm_populate_devmem_pfn, 577 .copy_to_devmem = xe_svm_copy_to_devmem, 578 .copy_to_ram = xe_svm_copy_to_ram, 579 }; 580 581 static const struct drm_gpusvm_ops gpusvm_ops = { 582 .range_alloc = xe_svm_range_alloc, 583 .range_free = xe_svm_range_free, 584 .invalidate = xe_svm_invalidate, 585 }; 586 587 static const unsigned long fault_chunk_sizes[] = { 588 SZ_2M, 589 SZ_64K, 590 SZ_4K, 591 }; 592 593 /** 594 * xe_svm_init() - SVM initialize 595 * @vm: The VM. 596 * 597 * Initialize SVM state which is embedded within the VM. 598 * 599 * Return: 0 on success, negative error code on error. 600 */ 601 int xe_svm_init(struct xe_vm *vm) 602 { 603 int err; 604 605 spin_lock_init(&vm->svm.garbage_collector.lock); 606 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 607 INIT_WORK(&vm->svm.garbage_collector.work, 608 xe_svm_garbage_collector_work_func); 609 610 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 611 current->mm, xe_svm_devm_owner(vm->xe), 0, 612 vm->size, xe_modparam.svm_notifier_size * SZ_1M, 613 &gpusvm_ops, fault_chunk_sizes, 614 ARRAY_SIZE(fault_chunk_sizes)); 615 if (err) 616 return err; 617 618 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 619 620 return 0; 621 } 622 623 /** 624 * xe_svm_close() - SVM close 625 * @vm: The VM. 626 * 627 * Close SVM state (i.e., stop and flush all SVM actions). 628 */ 629 void xe_svm_close(struct xe_vm *vm) 630 { 631 xe_assert(vm->xe, xe_vm_is_closed(vm)); 632 flush_work(&vm->svm.garbage_collector.work); 633 } 634 635 /** 636 * xe_svm_fini() - SVM finalize 637 * @vm: The VM. 638 * 639 * Finalize SVM state which is embedded within the VM. 640 */ 641 void xe_svm_fini(struct xe_vm *vm) 642 { 643 xe_assert(vm->xe, xe_vm_is_closed(vm)); 644 645 drm_gpusvm_fini(&vm->svm.gpusvm); 646 } 647 648 static bool xe_svm_range_is_valid(struct xe_svm_range *range, 649 struct xe_tile *tile) 650 { 651 return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id); 652 } 653 654 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) 655 { 656 return &tile->mem.vram; 657 } 658 659 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, 660 struct xe_svm_range *range, 661 const struct drm_gpusvm_ctx *ctx) 662 { 663 struct mm_struct *mm = vm->svm.gpusvm.mm; 664 struct xe_vram_region *vr = tile_to_vr(tile); 665 struct drm_buddy_block *block; 666 struct list_head *blocks; 667 struct xe_bo *bo; 668 ktime_t end = 0; 669 int err; 670 671 range_debug(range, "ALLOCATE VRAM"); 672 673 if (!mmget_not_zero(mm)) 674 return -EFAULT; 675 mmap_read_lock(mm); 676 677 retry: 678 bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, 679 xe_svm_range_size(range), 680 ttm_bo_type_device, 681 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 682 XE_BO_FLAG_CPU_ADDR_MIRROR); 683 if (IS_ERR(bo)) { 684 err = PTR_ERR(bo); 685 if (xe_vm_validate_should_retry(NULL, err, &end)) 686 goto retry; 687 goto unlock; 688 } 689 690 drm_gpusvm_devmem_init(&bo->devmem_allocation, 691 vm->xe->drm.dev, mm, 692 &gpusvm_devmem_ops, 693 &tile->mem.vram.dpagemap, 694 xe_svm_range_size(range)); 695 696 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 697 list_for_each_entry(block, blocks, link) 698 block->private = vr; 699 700 err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, 701 &bo->devmem_allocation, ctx); 702 xe_bo_unlock(bo); 703 if (err) 704 xe_bo_put(bo); /* Creation ref */ 705 706 unlock: 707 mmap_read_unlock(mm); 708 mmput(mm); 709 710 return err; 711 } 712 713 /** 714 * xe_svm_handle_pagefault() - SVM handle page fault 715 * @vm: The VM. 716 * @vma: The CPU address mirror VMA. 717 * @gt: The gt upon the fault occurred. 718 * @fault_addr: The GPU fault address. 719 * @atomic: The fault atomic access bit. 720 * 721 * Create GPU bindings for a SVM page fault. Optionally migrate to device 722 * memory. 723 * 724 * Return: 0 on success, negative error code on error. 725 */ 726 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 727 struct xe_gt *gt, u64 fault_addr, 728 bool atomic) 729 { 730 struct drm_gpusvm_ctx ctx = { 731 .read_only = xe_vma_read_only(vma), 732 .devmem_possible = IS_DGFX(vm->xe) && 733 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), 734 .check_pages_threshold = IS_DGFX(vm->xe) && 735 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, 736 }; 737 struct xe_svm_range *range; 738 struct drm_gpusvm_range *r; 739 struct drm_exec exec; 740 struct dma_fence *fence; 741 struct xe_tile *tile = gt_to_tile(gt); 742 ktime_t end = 0; 743 int err; 744 745 lockdep_assert_held_write(&vm->lock); 746 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 747 748 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); 749 750 retry: 751 /* Always process UNMAPs first so view SVM ranges is current */ 752 err = xe_svm_garbage_collector(vm); 753 if (err) 754 return err; 755 756 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr, 757 xe_vma_start(vma), xe_vma_end(vma), 758 &ctx); 759 if (IS_ERR(r)) 760 return PTR_ERR(r); 761 762 range = to_xe_range(r); 763 if (xe_svm_range_is_valid(range, tile)) 764 return 0; 765 766 range_debug(range, "PAGE FAULT"); 767 768 /* XXX: Add migration policy, for now migrate range once */ 769 if (!range->skip_migrate && range->base.flags.migrate_devmem && 770 xe_svm_range_size(range) >= SZ_64K) { 771 range->skip_migrate = true; 772 773 err = xe_svm_alloc_vram(vm, tile, range, &ctx); 774 if (err) { 775 drm_dbg(&vm->xe->drm, 776 "VRAM allocation failed, falling back to " 777 "retrying fault, asid=%u, errno=%pe\n", 778 vm->usm.asid, ERR_PTR(err)); 779 goto retry; 780 } 781 } 782 783 range_debug(range, "GET PAGES"); 784 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx); 785 /* Corner where CPU mappings have changed */ 786 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { 787 if (err == -EOPNOTSUPP) { 788 range_debug(range, "PAGE FAULT - EVICT PAGES"); 789 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 790 } 791 drm_dbg(&vm->xe->drm, 792 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", 793 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 794 range_debug(range, "PAGE FAULT - RETRY PAGES"); 795 goto retry; 796 } 797 if (err) { 798 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); 799 goto err_out; 800 } 801 802 range_debug(range, "PAGE FAULT - BIND"); 803 804 retry_bind: 805 drm_exec_init(&exec, 0, 0); 806 drm_exec_until_all_locked(&exec) { 807 err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj); 808 drm_exec_retry_on_contention(&exec); 809 if (err) { 810 drm_exec_fini(&exec); 811 goto err_out; 812 } 813 814 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 815 if (IS_ERR(fence)) { 816 drm_exec_fini(&exec); 817 err = PTR_ERR(fence); 818 if (err == -EAGAIN) { 819 range_debug(range, "PAGE FAULT - RETRY BIND"); 820 goto retry; 821 } 822 if (xe_vm_validate_should_retry(&exec, err, &end)) 823 goto retry_bind; 824 goto err_out; 825 } 826 } 827 drm_exec_fini(&exec); 828 829 if (xe_modparam.always_migrate_to_vram) 830 range->skip_migrate = false; 831 832 dma_fence_wait(fence, false); 833 dma_fence_put(fence); 834 835 err_out: 836 837 return err; 838 } 839 840 /** 841 * xe_svm_has_mapping() - SVM has mappings 842 * @vm: The VM. 843 * @start: Start address. 844 * @end: End address. 845 * 846 * Check if an address range has SVM mappings. 847 * 848 * Return: True if address range has a SVM mapping, False otherwise 849 */ 850 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) 851 { 852 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end); 853 } 854 855 /** 856 * xe_svm_bo_evict() - SVM evict BO to system memory 857 * @bo: BO to evict 858 * 859 * SVM evict BO to system memory. GPU SVM layer ensures all device pages 860 * are evicted before returning. 861 * 862 * Return: 0 on success standard error code otherwise 863 */ 864 int xe_svm_bo_evict(struct xe_bo *bo) 865 { 866 return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); 867 } 868 869 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) 870 static struct drm_pagemap_device_addr 871 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, 872 struct device *dev, 873 struct page *page, 874 unsigned int order, 875 enum dma_data_direction dir) 876 { 877 struct device *pgmap_dev = dpagemap->dev; 878 enum drm_interconnect_protocol prot; 879 dma_addr_t addr; 880 881 if (pgmap_dev == dev) { 882 addr = xe_vram_region_page_to_dpa(page_to_vr(page), page); 883 prot = XE_INTERCONNECT_VRAM; 884 } else { 885 addr = DMA_MAPPING_ERROR; 886 prot = 0; 887 } 888 889 return drm_pagemap_device_addr_encode(addr, prot, order, dir); 890 } 891 892 static const struct drm_pagemap_ops xe_drm_pagemap_ops = { 893 .device_map = xe_drm_pagemap_device_map, 894 }; 895 896 /** 897 * xe_devm_add: Remap and provide memmap backing for device memory 898 * @tile: tile that the memory region belongs to 899 * @vr: vram memory region to remap 900 * 901 * This remap device memory to host physical address space and create 902 * struct page to back device memory 903 * 904 * Return: 0 on success standard error code otherwise 905 */ 906 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 907 { 908 struct xe_device *xe = tile_to_xe(tile); 909 struct device *dev = &to_pci_dev(xe->drm.dev)->dev; 910 struct resource *res; 911 void *addr; 912 int ret; 913 914 res = devm_request_free_mem_region(dev, &iomem_resource, 915 vr->usable_size); 916 if (IS_ERR(res)) { 917 ret = PTR_ERR(res); 918 return ret; 919 } 920 921 vr->pagemap.type = MEMORY_DEVICE_PRIVATE; 922 vr->pagemap.range.start = res->start; 923 vr->pagemap.range.end = res->end; 924 vr->pagemap.nr_range = 1; 925 vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); 926 vr->pagemap.owner = xe_svm_devm_owner(xe); 927 addr = devm_memremap_pages(dev, &vr->pagemap); 928 929 vr->dpagemap.dev = dev; 930 vr->dpagemap.ops = &xe_drm_pagemap_ops; 931 932 if (IS_ERR(addr)) { 933 devm_release_mem_region(dev, res->start, resource_size(res)); 934 ret = PTR_ERR(addr); 935 drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n", 936 tile->id, ERR_PTR(ret)); 937 return ret; 938 } 939 vr->hpa_base = res->start; 940 941 drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n", 942 tile->id, vr->io_start, vr->io_start + vr->usable_size, res); 943 return 0; 944 } 945 #else 946 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) 947 { 948 return 0; 949 } 950 #endif 951