1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <linux/pci-p2pdma.h> 7 8 #include <drm/drm_drv.h> 9 #include <drm/drm_managed.h> 10 #include <drm/drm_pagemap.h> 11 #include <drm/drm_pagemap_util.h> 12 13 #include "xe_bo.h" 14 #include "xe_exec_queue_types.h" 15 #include "xe_gt_stats.h" 16 #include "xe_migrate.h" 17 #include "xe_module.h" 18 #include "xe_pm.h" 19 #include "xe_pt.h" 20 #include "xe_svm.h" 21 #include "xe_tile.h" 22 #include "xe_tlb_inval.h" 23 #include "xe_ttm_vram_mgr.h" 24 #include "xe_vm.h" 25 #include "xe_vm_types.h" 26 #include "xe_vram_types.h" 27 28 /* Identifies subclasses of struct drm_pagemap_peer */ 29 #define XE_PEER_PAGEMAP ((void *)0ul) 30 #define XE_PEER_VM ((void *)1ul) 31 32 /** 33 * DOC: drm_pagemap reference-counting in xe: 34 * 35 * In addition to the drm_pagemap internal reference counting by its zone 36 * device data, the xe driver holds the following long-time references: 37 * 38 * - struct xe_pagemap: 39 * The xe_pagemap struct derives from struct drm_pagemap and uses its 40 * reference count. 41 * - SVM-enabled VMs: 42 * SVM-enabled VMs look up and keeps a reference to all xe_pagemaps on 43 * the same device. 44 * - VMAs: 45 * vmas keep a reference on the drm_pagemap indicated by a gpu_madvise() 46 * call. 47 * 48 * In addition, all drm_pagemap or xe_pagemap pointers where lifetime cannot 49 * be guaranteed by a vma reference under the vm lock should keep a reference. 50 * That includes the range->pages.dpagemap pointer. 51 */ 52 53 static int xe_svm_get_pagemaps(struct xe_vm *vm); 54 55 void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem) 56 { 57 return force_smem ? NULL : vm->svm.peer.owner; 58 } 59 60 static bool xe_svm_range_in_vram(struct xe_svm_range *range) 61 { 62 /* 63 * Advisory only check whether the range is currently backed by VRAM 64 * memory. 65 */ 66 67 struct drm_gpusvm_pages_flags flags = { 68 /* Pairs with WRITE_ONCE in drm_gpusvm.c */ 69 .__flags = READ_ONCE(range->base.pages.flags.__flags), 70 }; 71 72 return flags.has_devmem_pages; 73 } 74 75 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) 76 { 77 /* Not reliable without notifier lock */ 78 return xe_svm_range_in_vram(range) && range->tile_present; 79 } 80 81 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm) 82 { 83 return container_of(gpusvm, struct xe_vm, svm.gpusvm); 84 } 85 86 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) 87 { 88 return gpusvm_to_vm(r->gpusvm); 89 } 90 91 #define range_debug(r__, operation__) \ 92 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 93 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ 94 "start=0x%014lx, end=0x%014lx, size=%lu", \ 95 (operation__), range_to_vm(&(r__)->base)->usm.asid, \ 96 (r__)->base.gpusvm, \ 97 xe_svm_range_in_vram((r__)) ? 1 : 0, \ 98 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ 99 (r__)->base.pages.notifier_seq, \ 100 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ 101 xe_svm_range_size((r__))) 102 103 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) 104 { 105 range_debug(range, operation); 106 } 107 108 static struct drm_gpusvm_range * 109 xe_svm_range_alloc(struct drm_gpusvm *gpusvm) 110 { 111 struct xe_svm_range *range; 112 113 range = kzalloc_obj(*range); 114 if (!range) 115 return NULL; 116 117 INIT_LIST_HEAD(&range->garbage_collector_link); 118 xe_vm_get(gpusvm_to_vm(gpusvm)); 119 120 return &range->base; 121 } 122 123 static void xe_svm_range_free(struct drm_gpusvm_range *range) 124 { 125 xe_vm_put(range_to_vm(range)); 126 kfree(range); 127 } 128 129 static void 130 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, 131 const struct mmu_notifier_range *mmu_range) 132 { 133 struct xe_device *xe = vm->xe; 134 135 range_debug(range, "GARBAGE COLLECTOR ADD"); 136 137 drm_gpusvm_range_set_unmapped(&range->base, mmu_range); 138 139 spin_lock(&vm->svm.garbage_collector.lock); 140 if (list_empty(&range->garbage_collector_link)) 141 list_add_tail(&range->garbage_collector_link, 142 &vm->svm.garbage_collector.range_list); 143 spin_unlock(&vm->svm.garbage_collector.lock); 144 145 queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work); 146 } 147 148 static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) 149 { 150 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1); 151 } 152 153 static u8 154 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, 155 const struct mmu_notifier_range *mmu_range, 156 u64 *adj_start, u64 *adj_end) 157 { 158 struct xe_svm_range *range = to_xe_range(r); 159 struct xe_device *xe = vm->xe; 160 struct xe_tile *tile; 161 u8 tile_mask = 0; 162 u8 id; 163 164 xe_svm_assert_in_notifier(vm); 165 166 range_debug(range, "NOTIFIER"); 167 168 /* Skip if already unmapped or if no binding exist */ 169 if (range->base.pages.flags.unmapped || !range->tile_present) 170 return 0; 171 172 range_debug(range, "NOTIFIER - EXECUTE"); 173 174 /* Adjust invalidation to range boundaries */ 175 *adj_start = min(xe_svm_range_start(range), mmu_range->start); 176 *adj_end = max(xe_svm_range_end(range), mmu_range->end); 177 178 /* 179 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the 180 * invalidation code can't correctly cope with sparse ranges or 181 * invalidations spanning multiple ranges. 182 */ 183 for_each_tile(tile, xe, id) 184 if (xe_pt_zap_ptes_range(tile, vm, range)) { 185 /* 186 * WRITE_ONCE pairs with READ_ONCE in 187 * xe_vm_has_valid_gpu_mapping() 188 */ 189 WRITE_ONCE(range->tile_invalidated, 190 range->tile_invalidated | BIT(id)); 191 192 if (!(tile_mask & BIT(id))) { 193 xe_svm_tlb_inval_count_stats_incr(tile->primary_gt); 194 if (tile->media_gt) 195 xe_svm_tlb_inval_count_stats_incr(tile->media_gt); 196 tile_mask |= BIT(id); 197 } 198 } 199 200 return tile_mask; 201 } 202 203 static void 204 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, 205 const struct mmu_notifier_range *mmu_range) 206 { 207 struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 208 209 xe_svm_assert_in_notifier(vm); 210 211 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); 212 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP) 213 xe_svm_garbage_collector_add_range(vm, to_xe_range(r), 214 mmu_range); 215 } 216 217 static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) 218 { 219 s64 us_delta = xe_gt_stats_ktime_us_delta(start); 220 221 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); 222 } 223 224 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, 225 struct drm_gpusvm_notifier *notifier, 226 const struct mmu_notifier_range *mmu_range) 227 { 228 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 229 struct xe_tlb_inval_batch batch; 230 struct xe_device *xe = vm->xe; 231 struct drm_gpusvm_range *r, *first; 232 struct xe_tile *tile; 233 ktime_t start = xe_gt_stats_ktime_get(); 234 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 235 u8 tile_mask = 0, id; 236 long err; 237 238 xe_svm_assert_in_notifier(vm); 239 240 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm, 241 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d", 242 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq, 243 mmu_range->start, mmu_range->end, mmu_range->event); 244 245 /* Adjust invalidation to notifier boundaries */ 246 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start); 247 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); 248 249 first = drm_gpusvm_range_find(notifier, adj_start, adj_end); 250 if (!first) 251 return; 252 253 /* 254 * PTs may be getting destroyed so not safe to touch these but PT should 255 * be invalidated at this point in time. Regardless we still need to 256 * ensure any dma mappings are unmapped in the here. 257 */ 258 if (xe_vm_is_closed(vm)) 259 goto range_notifier_event_end; 260 261 /* 262 * XXX: Less than ideal to always wait on VM's resv slots if an 263 * invalidation is not required. Could walk range list twice to figure 264 * out if an invalidations is need, but also not ideal. 265 */ 266 err = dma_resv_wait_timeout(xe_vm_resv(vm), 267 DMA_RESV_USAGE_BOOKKEEP, 268 false, MAX_SCHEDULE_TIMEOUT); 269 XE_WARN_ON(err <= 0); 270 271 r = first; 272 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 273 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range, 274 &adj_start, 275 &adj_end); 276 if (!tile_mask) 277 goto range_notifier_event_end; 278 279 xe_device_wmb(xe); 280 281 err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end, 282 tile_mask, &batch); 283 if (!WARN_ON_ONCE(err)) 284 xe_tlb_inval_batch_wait(&batch); 285 286 range_notifier_event_end: 287 r = first; 288 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 289 xe_svm_range_notifier_event_end(vm, r, mmu_range); 290 for_each_tile(tile, xe, id) { 291 if (tile_mask & BIT(id)) { 292 xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start); 293 if (tile->media_gt) 294 xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start); 295 } 296 } 297 } 298 299 static int __xe_svm_garbage_collector(struct xe_vm *vm, 300 struct xe_svm_range *range) 301 { 302 struct dma_fence *fence; 303 304 range_debug(range, "GARBAGE COLLECTOR"); 305 306 xe_vm_lock(vm, false); 307 fence = xe_vm_range_unbind(vm, range); 308 xe_vm_unlock(vm); 309 if (IS_ERR(fence)) 310 return PTR_ERR(fence); 311 dma_fence_put(fence); 312 313 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base); 314 315 return 0; 316 } 317 318 static void xe_vma_set_default_attributes(struct xe_vma *vma) 319 { 320 struct xe_vma_mem_attr default_attr = { 321 .preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 322 .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 323 .pat_index = vma->attr.default_pat_index, 324 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 325 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 326 }; 327 328 xe_vma_mem_attr_copy(&vma->attr, &default_attr); 329 } 330 331 static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end) 332 { 333 struct xe_vma *vma; 334 bool has_default_attr; 335 int err; 336 337 vma = xe_vm_find_vma_by_addr(vm, start); 338 if (!vma) 339 return -EINVAL; 340 341 if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { 342 drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); 343 return 0; 344 } 345 346 vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", 347 xe_vma_start(vma), xe_vma_end(vma)); 348 349 has_default_attr = xe_vma_has_default_mem_attrs(vma); 350 351 if (has_default_attr) { 352 start = xe_vma_start(vma); 353 end = xe_vma_end(vma); 354 } else if (xe_vma_start(vma) == start && xe_vma_end(vma) == end) { 355 xe_vma_set_default_attributes(vma); 356 } 357 358 xe_vm_find_cpu_addr_mirror_vma_range(vm, &start, &end); 359 360 if (xe_vma_start(vma) == start && xe_vma_end(vma) == end && has_default_attr) 361 return 0; 362 363 vm_dbg(&vm->xe->drm, "New VMA start=0x%016llx, vma_end=0x%016llx", start, end); 364 365 err = xe_vm_alloc_cpu_addr_mirror_vma(vm, start, end - start); 366 if (err) { 367 drm_warn(&vm->xe->drm, "New VMA MAP failed: %pe\n", ERR_PTR(err)); 368 xe_vm_kill(vm, true); 369 return err; 370 } 371 372 /* 373 * On call from xe_svm_handle_pagefault original VMA might be changed 374 * signal this to lookup for VMA again. 375 */ 376 return -EAGAIN; 377 } 378 379 static int xe_svm_garbage_collector(struct xe_vm *vm) 380 { 381 struct xe_svm_range *range; 382 u64 range_start; 383 u64 range_end; 384 int err, ret = 0; 385 386 lockdep_assert_held_write(&vm->lock); 387 388 if (xe_vm_is_closed_or_banned(vm)) 389 return -ENOENT; 390 391 for (;;) { 392 spin_lock(&vm->svm.garbage_collector.lock); 393 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, 394 typeof(*range), 395 garbage_collector_link); 396 if (!range) 397 break; 398 399 range_start = xe_svm_range_start(range); 400 range_end = xe_svm_range_end(range); 401 402 list_del(&range->garbage_collector_link); 403 spin_unlock(&vm->svm.garbage_collector.lock); 404 405 err = __xe_svm_garbage_collector(vm, range); 406 if (err) { 407 drm_warn(&vm->xe->drm, 408 "Garbage collection failed: %pe\n", 409 ERR_PTR(err)); 410 xe_vm_kill(vm, true); 411 return err; 412 } 413 414 err = xe_svm_range_set_default_attr(vm, range_start, range_end); 415 if (err) { 416 if (err == -EAGAIN) 417 ret = -EAGAIN; 418 else 419 return err; 420 } 421 } 422 spin_unlock(&vm->svm.garbage_collector.lock); 423 424 return ret; 425 } 426 427 static void xe_svm_garbage_collector_work_func(struct work_struct *w) 428 { 429 struct xe_vm *vm = container_of(w, struct xe_vm, 430 svm.garbage_collector.work); 431 432 down_write(&vm->lock); 433 xe_svm_garbage_collector(vm); 434 up_write(&vm->lock); 435 } 436 437 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 438 439 static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap) 440 { 441 return xpagemap->vr; 442 } 443 444 static struct xe_pagemap *xe_page_to_pagemap(struct page *page) 445 { 446 return container_of(page_pgmap(page), struct xe_pagemap, pagemap); 447 } 448 449 static struct xe_vram_region *xe_page_to_vr(struct page *page) 450 { 451 return xe_pagemap_to_vr(xe_page_to_pagemap(page)); 452 } 453 454 static u64 xe_page_to_dpa(struct page *page) 455 { 456 struct xe_pagemap *xpagemap = xe_page_to_pagemap(page); 457 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); 458 u64 hpa_base = xpagemap->hpa_base; 459 u64 pfn = page_to_pfn(page); 460 u64 offset; 461 u64 dpa; 462 463 xe_assert(vr->xe, is_device_private_page(page)); 464 xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base); 465 466 offset = (pfn << PAGE_SHIFT) - hpa_base; 467 dpa = vr->dpa_base + offset; 468 469 return dpa; 470 } 471 472 static u64 xe_page_to_pcie(struct page *page) 473 { 474 struct xe_pagemap *xpagemap = xe_page_to_pagemap(page); 475 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); 476 477 return xe_page_to_dpa(page) - vr->dpa_base + vr->io_start; 478 } 479 480 enum xe_svm_copy_dir { 481 XE_SVM_COPY_TO_VRAM, 482 XE_SVM_COPY_TO_SRAM, 483 }; 484 485 static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, 486 const enum xe_svm_copy_dir dir, 487 int kb) 488 { 489 if (dir == XE_SVM_COPY_TO_VRAM) { 490 switch (kb) { 491 case 4: 492 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB, kb); 493 break; 494 case 64: 495 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB, kb); 496 break; 497 case 2048: 498 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB, kb); 499 break; 500 } 501 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); 502 } else { 503 switch (kb) { 504 case 4: 505 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB, kb); 506 break; 507 case 64: 508 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB, kb); 509 break; 510 case 2048: 511 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB, kb); 512 break; 513 } 514 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); 515 } 516 } 517 518 static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, 519 const enum xe_svm_copy_dir dir, 520 unsigned long npages, 521 ktime_t start) 522 { 523 s64 us_delta = xe_gt_stats_ktime_us_delta(start); 524 525 if (dir == XE_SVM_COPY_TO_VRAM) { 526 switch (npages) { 527 case 1: 528 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, 529 us_delta); 530 break; 531 case 16: 532 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, 533 us_delta); 534 break; 535 case 512: 536 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, 537 us_delta); 538 break; 539 } 540 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US, 541 us_delta); 542 } else { 543 switch (npages) { 544 case 1: 545 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, 546 us_delta); 547 break; 548 case 16: 549 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, 550 us_delta); 551 break; 552 case 512: 553 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, 554 us_delta); 555 break; 556 } 557 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US, 558 us_delta); 559 } 560 } 561 562 static int xe_svm_copy(struct page **pages, 563 struct drm_pagemap_addr *pagemap_addr, 564 unsigned long npages, const enum xe_svm_copy_dir dir, 565 struct dma_fence *pre_migrate_fence) 566 { 567 struct xe_vram_region *vr = NULL; 568 struct xe_gt *gt = NULL; 569 struct xe_device *xe; 570 struct dma_fence *fence = NULL; 571 unsigned long i; 572 #define XE_VRAM_ADDR_INVALID ~0x0ull 573 u64 vram_addr = XE_VRAM_ADDR_INVALID; 574 int err = 0, pos = 0; 575 bool sram = dir == XE_SVM_COPY_TO_SRAM; 576 ktime_t start = xe_gt_stats_ktime_get(); 577 578 /* 579 * This flow is complex: it locates physically contiguous device pages, 580 * derives the starting physical address, and performs a single GPU copy 581 * to for every 8M chunk in a DMA address array. Both device pages and 582 * DMA addresses may be sparsely populated. If either is NULL, a copy is 583 * triggered based on the current search state. The last GPU copy is 584 * waited on to ensure all copies are complete. 585 */ 586 587 for (i = 0; i < npages; ++i) { 588 struct page *spage = pages[i]; 589 struct dma_fence *__fence; 590 u64 __vram_addr; 591 bool match = false, chunk, last; 592 593 #define XE_MIGRATE_CHUNK_SIZE SZ_8M 594 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 595 last = (i + 1) == npages; 596 597 /* No CPU page and no device pages queue'd to copy */ 598 if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID) 599 continue; 600 601 if (!vr && spage) { 602 vr = xe_page_to_vr(spage); 603 gt = xe_migrate_exec_queue(vr->migrate)->gt; 604 xe = vr->xe; 605 } 606 XE_WARN_ON(spage && xe_page_to_vr(spage) != vr); 607 608 /* 609 * CPU page and device page valid, capture physical address on 610 * first device page, check if physical contiguous on subsequent 611 * device pages. 612 */ 613 if (pagemap_addr[i].addr && spage) { 614 __vram_addr = xe_page_to_dpa(spage); 615 if (vram_addr == XE_VRAM_ADDR_INVALID) { 616 vram_addr = __vram_addr; 617 pos = i; 618 } 619 620 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; 621 /* Expected with contiguous memory */ 622 xe_assert(vr->xe, match); 623 624 if (pagemap_addr[i].order) { 625 i += NR_PAGES(pagemap_addr[i].order) - 1; 626 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 627 last = (i + 1) == npages; 628 } 629 } 630 631 /* 632 * Mismatched physical address, 8M copy chunk, or last page - 633 * trigger a copy. 634 */ 635 if (!match || chunk || last) { 636 /* 637 * Extra page for first copy if last page and matching 638 * physical address. 639 */ 640 int incr = (match && last) ? 1 : 0; 641 642 if (vram_addr != XE_VRAM_ADDR_INVALID) { 643 xe_svm_copy_kb_stats_incr(gt, dir, 644 (i - pos + incr) * 645 (PAGE_SIZE / SZ_1K)); 646 if (sram) { 647 vm_dbg(&xe->drm, 648 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 649 vram_addr, 650 (u64)pagemap_addr[pos].addr, i - pos + incr); 651 __fence = xe_migrate_from_vram(vr->migrate, 652 i - pos + incr, 653 vram_addr, 654 &pagemap_addr[pos], 655 pre_migrate_fence); 656 } else { 657 vm_dbg(&xe->drm, 658 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 659 (u64)pagemap_addr[pos].addr, vram_addr, 660 i - pos + incr); 661 __fence = xe_migrate_to_vram(vr->migrate, 662 i - pos + incr, 663 &pagemap_addr[pos], 664 vram_addr, 665 pre_migrate_fence); 666 } 667 if (IS_ERR(__fence)) { 668 err = PTR_ERR(__fence); 669 goto err_out; 670 } 671 pre_migrate_fence = NULL; 672 dma_fence_put(fence); 673 fence = __fence; 674 } 675 676 /* Setup physical address of next device page */ 677 if (pagemap_addr[i].addr && spage) { 678 vram_addr = __vram_addr; 679 pos = i; 680 } else { 681 vram_addr = XE_VRAM_ADDR_INVALID; 682 } 683 684 /* Extra mismatched device page, copy it */ 685 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { 686 xe_svm_copy_kb_stats_incr(gt, dir, 687 (PAGE_SIZE / SZ_1K)); 688 if (sram) { 689 vm_dbg(&xe->drm, 690 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 691 vram_addr, (u64)pagemap_addr[pos].addr, 1); 692 __fence = xe_migrate_from_vram(vr->migrate, 1, 693 vram_addr, 694 &pagemap_addr[pos], 695 pre_migrate_fence); 696 } else { 697 vm_dbg(&xe->drm, 698 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 699 (u64)pagemap_addr[pos].addr, vram_addr, 1); 700 __fence = xe_migrate_to_vram(vr->migrate, 1, 701 &pagemap_addr[pos], 702 vram_addr, 703 pre_migrate_fence); 704 } 705 if (IS_ERR(__fence)) { 706 err = PTR_ERR(__fence); 707 goto err_out; 708 } 709 pre_migrate_fence = NULL; 710 dma_fence_put(fence); 711 fence = __fence; 712 } 713 } 714 } 715 716 err_out: 717 /* Wait for all copies to complete */ 718 if (fence) { 719 dma_fence_wait(fence, false); 720 dma_fence_put(fence); 721 } 722 if (pre_migrate_fence) 723 dma_fence_wait(pre_migrate_fence, false); 724 725 /* 726 * XXX: We can't derive the GT here (or anywhere in this functions, but 727 * compute always uses the primary GT so accumulate stats on the likely 728 * GT of the fault. 729 */ 730 if (gt) 731 xe_svm_copy_us_stats_incr(gt, dir, npages, start); 732 733 return err; 734 #undef XE_MIGRATE_CHUNK_SIZE 735 #undef XE_VRAM_ADDR_INVALID 736 } 737 738 static int xe_svm_copy_to_devmem(struct page **pages, 739 struct drm_pagemap_addr *pagemap_addr, 740 unsigned long npages, 741 struct dma_fence *pre_migrate_fence) 742 { 743 return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM, 744 pre_migrate_fence); 745 } 746 747 static int xe_svm_copy_to_ram(struct page **pages, 748 struct drm_pagemap_addr *pagemap_addr, 749 unsigned long npages, 750 struct dma_fence *pre_migrate_fence) 751 { 752 return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM, 753 pre_migrate_fence); 754 } 755 756 static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) 757 { 758 return container_of(devmem_allocation, struct xe_bo, devmem_allocation); 759 } 760 761 static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) 762 { 763 struct xe_bo *bo = to_xe_bo(devmem_allocation); 764 struct xe_device *xe = xe_bo_device(bo); 765 766 dma_fence_put(devmem_allocation->pre_migrate_fence); 767 xe_bo_put_async(bo); 768 xe_pm_runtime_put(xe); 769 } 770 771 static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset) 772 { 773 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 774 775 return PHYS_PFN(offset + xpagemap->hpa_base); 776 } 777 778 static struct gpu_buddy *vram_to_buddy(struct xe_vram_region *vram) 779 { 780 return &vram->ttm.mm; 781 } 782 783 static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, 784 unsigned long npages, unsigned long *pfn) 785 { 786 struct xe_bo *bo = to_xe_bo(devmem_allocation); 787 struct ttm_resource *res = bo->ttm.resource; 788 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks; 789 struct xe_vram_region *vr = xe_map_resource_to_region(res); 790 struct gpu_buddy *buddy = vram_to_buddy(vr); 791 struct gpu_buddy_block *block; 792 int j = 0; 793 794 list_for_each_entry(block, blocks, link) { 795 u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap, 796 gpu_buddy_block_offset(block)); 797 int i; 798 799 for (i = 0; i < gpu_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) 800 pfn[j++] = block_pfn + i; 801 } 802 803 return 0; 804 } 805 806 static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { 807 .devmem_release = xe_svm_devmem_release, 808 .populate_devmem_pfn = xe_svm_populate_devmem_pfn, 809 .copy_to_devmem = xe_svm_copy_to_devmem, 810 .copy_to_ram = xe_svm_copy_to_ram, 811 }; 812 813 #else 814 static int xe_svm_get_pagemaps(struct xe_vm *vm) 815 { 816 return 0; 817 } 818 #endif 819 820 static const struct drm_gpusvm_ops gpusvm_ops = { 821 .range_alloc = xe_svm_range_alloc, 822 .range_free = xe_svm_range_free, 823 .invalidate = xe_svm_invalidate, 824 }; 825 826 static const unsigned long fault_chunk_sizes[] = { 827 SZ_2M, 828 SZ_64K, 829 SZ_4K, 830 }; 831 832 static void xe_pagemap_put(struct xe_pagemap *xpagemap) 833 { 834 drm_pagemap_put(&xpagemap->dpagemap); 835 } 836 837 static void xe_svm_put_pagemaps(struct xe_vm *vm) 838 { 839 struct xe_device *xe = vm->xe; 840 struct xe_tile *tile; 841 int id; 842 843 for_each_tile(tile, xe, id) { 844 struct xe_pagemap *xpagemap = vm->svm.pagemaps[id]; 845 846 if (xpagemap) 847 xe_pagemap_put(xpagemap); 848 vm->svm.pagemaps[id] = NULL; 849 } 850 } 851 852 static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer) 853 { 854 if (peer->private == XE_PEER_PAGEMAP) 855 return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev; 856 857 return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev; 858 } 859 860 static bool xe_has_interconnect(struct drm_pagemap_peer *peer1, 861 struct drm_pagemap_peer *peer2) 862 { 863 struct device *dev1 = xe_peer_to_dev(peer1); 864 struct device *dev2 = xe_peer_to_dev(peer2); 865 866 if (dev1 == dev2) 867 return true; 868 869 return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0; 870 } 871 872 static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list); 873 874 /** 875 * xe_svm_init() - SVM initialize 876 * @vm: The VM. 877 * 878 * Initialize SVM state which is embedded within the VM. 879 * 880 * Return: 0 on success, negative error code on error. 881 */ 882 int xe_svm_init(struct xe_vm *vm) 883 { 884 int err; 885 886 if (vm->flags & XE_VM_FLAG_FAULT_MODE) { 887 spin_lock_init(&vm->svm.garbage_collector.lock); 888 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 889 INIT_WORK(&vm->svm.garbage_collector.work, 890 xe_svm_garbage_collector_work_func); 891 892 vm->svm.peer.private = XE_PEER_VM; 893 err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list, 894 xe_has_interconnect); 895 if (err) 896 return err; 897 898 err = xe_svm_get_pagemaps(vm); 899 if (err) { 900 drm_pagemap_release_owner(&vm->svm.peer); 901 return err; 902 } 903 904 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 905 current->mm, 0, vm->size, 906 xe_modparam.svm_notifier_size * SZ_1M, 907 &gpusvm_ops, fault_chunk_sizes, 908 ARRAY_SIZE(fault_chunk_sizes)); 909 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 910 911 if (err) { 912 xe_svm_put_pagemaps(vm); 913 drm_pagemap_release_owner(&vm->svm.peer); 914 return err; 915 } 916 } else { 917 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", 918 &vm->xe->drm, NULL, 0, 0, 0, NULL, 919 NULL, 0); 920 } 921 922 return err; 923 } 924 925 /** 926 * xe_svm_close() - SVM close 927 * @vm: The VM. 928 * 929 * Close SVM state (i.e., stop and flush all SVM actions). 930 */ 931 void xe_svm_close(struct xe_vm *vm) 932 { 933 xe_assert(vm->xe, xe_vm_is_closed(vm)); 934 disable_work_sync(&vm->svm.garbage_collector.work); 935 xe_svm_put_pagemaps(vm); 936 drm_pagemap_release_owner(&vm->svm.peer); 937 } 938 939 /** 940 * xe_svm_fini() - SVM finalize 941 * @vm: The VM. 942 * 943 * Finalize SVM state which is embedded within the VM. 944 */ 945 void xe_svm_fini(struct xe_vm *vm) 946 { 947 xe_assert(vm->xe, xe_vm_is_closed(vm)); 948 949 drm_gpusvm_fini(&vm->svm.gpusvm); 950 } 951 952 static bool xe_svm_range_has_pagemap_locked(const struct xe_svm_range *range, 953 const struct drm_pagemap *dpagemap) 954 { 955 return range->base.pages.dpagemap == dpagemap; 956 } 957 958 static bool xe_svm_range_has_pagemap(struct xe_svm_range *range, 959 const struct drm_pagemap *dpagemap) 960 { 961 struct xe_vm *vm = range_to_vm(&range->base); 962 bool ret; 963 964 xe_svm_notifier_lock(vm); 965 ret = xe_svm_range_has_pagemap_locked(range, dpagemap); 966 xe_svm_notifier_unlock(vm); 967 968 return ret; 969 } 970 971 static bool xe_svm_range_is_valid(struct xe_svm_range *range, 972 struct xe_tile *tile, 973 bool devmem_only, 974 const struct drm_pagemap *dpagemap) 975 976 { 977 return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, 978 range->tile_invalidated) && 979 (!devmem_only || xe_svm_range_has_pagemap(range, dpagemap))); 980 } 981 982 /** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM 983 * @vm: xe_vm pointer 984 * @range: Pointer to the SVM range structure 985 * 986 * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM 987 * and migrates them to SMEM 988 */ 989 void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) 990 { 991 if (xe_svm_range_in_vram(range)) 992 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 993 } 994 995 /** 996 * xe_svm_range_validate() - Check if the SVM range is valid 997 * @vm: xe_vm pointer 998 * @range: Pointer to the SVM range structure 999 * @tile_mask: Mask representing the tiles to be checked 1000 * @dpagemap: if !%NULL, the range is expected to be present 1001 * in device memory identified by this parameter. 1002 * 1003 * The xe_svm_range_validate() function checks if a range is 1004 * valid and located in the desired memory region. 1005 * 1006 * Return: true if the range is valid, false otherwise 1007 */ 1008 bool xe_svm_range_validate(struct xe_vm *vm, 1009 struct xe_svm_range *range, 1010 u8 tile_mask, const struct drm_pagemap *dpagemap) 1011 { 1012 bool ret; 1013 1014 xe_svm_notifier_lock(vm); 1015 1016 ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask; 1017 if (dpagemap) 1018 ret = ret && xe_svm_range_has_pagemap_locked(range, dpagemap); 1019 else 1020 ret = ret && !range->base.pages.dpagemap; 1021 1022 xe_svm_notifier_unlock(vm); 1023 1024 return ret; 1025 } 1026 1027 /** 1028 * xe_svm_find_vma_start - Find start of CPU VMA 1029 * @vm: xe_vm pointer 1030 * @start: start address 1031 * @end: end address 1032 * @vma: Pointer to struct xe_vma 1033 * 1034 * 1035 * This function searches for a cpu vma, within the specified 1036 * range [start, end] in the given VM. It adjusts the range based on the 1037 * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. 1038 * 1039 * Return: The starting address of the VMA within the range, 1040 * or ULONG_MAX if no VMA is found 1041 */ 1042 u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma) 1043 { 1044 return drm_gpusvm_find_vma_start(&vm->svm.gpusvm, 1045 max(start, xe_vma_start(vma)), 1046 min(end, xe_vma_end(vma))); 1047 } 1048 1049 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 1050 static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, 1051 unsigned long start, unsigned long end, 1052 struct mm_struct *mm, 1053 unsigned long timeslice_ms) 1054 { 1055 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 1056 struct drm_pagemap_migrate_details mdetails = { 1057 .timeslice_ms = timeslice_ms, 1058 }; 1059 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); 1060 struct dma_fence *pre_migrate_fence = NULL; 1061 struct xe_device *xe = vr->xe; 1062 struct device *dev = xe->drm.dev; 1063 struct xe_validation_ctx vctx; 1064 struct drm_exec exec; 1065 struct xe_bo *bo; 1066 int err = 0, idx; 1067 1068 if (!drm_dev_enter(&xe->drm, &idx)) 1069 return -ENODEV; 1070 1071 xe_pm_runtime_get(xe); 1072 1073 xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1074 bo = xe_bo_create_locked(xe, NULL, NULL, end - start, 1075 ttm_bo_type_device, 1076 (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | 1077 XE_BO_FLAG_CPU_ADDR_MIRROR, &exec); 1078 drm_exec_retry_on_contention(&exec); 1079 if (IS_ERR(bo)) { 1080 err = PTR_ERR(bo); 1081 xe_validation_retry_on_oom(&vctx, &err); 1082 break; 1083 } 1084 1085 /* Ensure that any clearing or async eviction will complete before migration. */ 1086 if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) { 1087 err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, 1088 &pre_migrate_fence); 1089 if (err) 1090 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, 1091 false, MAX_SCHEDULE_TIMEOUT); 1092 else if (pre_migrate_fence) 1093 dma_fence_enable_sw_signaling(pre_migrate_fence); 1094 } 1095 1096 drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, 1097 &dpagemap_devmem_ops, dpagemap, end - start, 1098 pre_migrate_fence); 1099 1100 xe_bo_get(bo); 1101 1102 /* Ensure the device has a pm ref while there are device pages active. */ 1103 xe_pm_runtime_get_noresume(xe); 1104 /* Consumes the devmem allocation ref. */ 1105 err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, 1106 start, end, &mdetails); 1107 xe_bo_unlock(bo); 1108 xe_bo_put(bo); 1109 } 1110 xe_pm_runtime_put(xe); 1111 drm_dev_exit(idx); 1112 1113 return err; 1114 } 1115 #endif 1116 1117 static bool supports_4K_migration(struct xe_device *xe) 1118 { 1119 if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1120 return false; 1121 1122 return true; 1123 } 1124 1125 /** 1126 * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not 1127 * @range: SVM range for which migration needs to be decided 1128 * @vma: vma which has range 1129 * @dpagemap: The preferred struct drm_pagemap to migrate to. 1130 * 1131 * Return: True for range needing migration and migration is supported else false 1132 */ 1133 bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, 1134 const struct drm_pagemap *dpagemap) 1135 { 1136 struct xe_vm *vm = range_to_vm(&range->base); 1137 u64 range_size = xe_svm_range_size(range); 1138 1139 if (!range->base.pages.flags.migrate_devmem || !dpagemap) 1140 return false; 1141 1142 xe_assert(vm->xe, IS_DGFX(vm->xe)); 1143 1144 if (xe_svm_range_has_pagemap(range, dpagemap)) { 1145 drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); 1146 return false; 1147 } 1148 1149 if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { 1150 drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); 1151 return false; 1152 } 1153 1154 return true; 1155 } 1156 1157 #define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \ 1158 static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \ 1159 struct xe_svm_range *range) \ 1160 { \ 1161 switch (xe_svm_range_size(range)) { \ 1162 case SZ_4K: \ 1163 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \ 1164 break; \ 1165 case SZ_64K: \ 1166 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \ 1167 break; \ 1168 case SZ_2M: \ 1169 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \ 1170 break; \ 1171 } \ 1172 } \ 1173 1174 DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT) 1175 DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT) 1176 DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE) 1177 1178 #define DECL_SVM_RANGE_US_STATS(elem, stat) \ 1179 static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ 1180 struct xe_svm_range *range, \ 1181 ktime_t start) \ 1182 { \ 1183 s64 us_delta = xe_gt_stats_ktime_us_delta(start); \ 1184 \ 1185 switch (xe_svm_range_size(range)) { \ 1186 case SZ_4K: \ 1187 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \ 1188 us_delta); \ 1189 break; \ 1190 case SZ_64K: \ 1191 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \ 1192 us_delta); \ 1193 break; \ 1194 case SZ_2M: \ 1195 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \ 1196 us_delta); \ 1197 break; \ 1198 } \ 1199 } \ 1200 1201 DECL_SVM_RANGE_US_STATS(migrate, MIGRATE) 1202 DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES) 1203 DECL_SVM_RANGE_US_STATS(bind, BIND) 1204 DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT) 1205 1206 static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 1207 struct xe_gt *gt, u64 fault_addr, 1208 bool need_vram) 1209 { 1210 int devmem_possible = IS_DGFX(vm->xe) && 1211 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 1212 struct drm_gpusvm_ctx ctx = { 1213 .read_only = xe_vma_read_only(vma), 1214 .devmem_possible = devmem_possible, 1215 .check_pages_threshold = devmem_possible ? SZ_64K : 0, 1216 .devmem_only = need_vram && devmem_possible, 1217 .timeslice_ms = need_vram && devmem_possible ? 1218 vm->xe->atomic_svm_timeslice_ms : 0, 1219 }; 1220 struct xe_validation_ctx vctx; 1221 struct drm_exec exec; 1222 struct xe_svm_range *range; 1223 struct dma_fence *fence; 1224 struct drm_pagemap *dpagemap; 1225 struct xe_tile *tile = gt_to_tile(gt); 1226 int migrate_try_count = ctx.devmem_only ? 3 : 1; 1227 ktime_t start = xe_gt_stats_ktime_get(), bind_start, get_pages_start; 1228 int err; 1229 1230 lockdep_assert_held_write(&vm->lock); 1231 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1232 1233 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); 1234 1235 retry: 1236 /* Always process UNMAPs first so view SVM ranges is current */ 1237 err = xe_svm_garbage_collector(vm); 1238 if (err) 1239 return err; 1240 1241 dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) : 1242 xe_vma_resolve_pagemap(vma, tile); 1243 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); 1244 range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); 1245 1246 if (IS_ERR(range)) 1247 return PTR_ERR(range); 1248 1249 xe_svm_range_fault_count_stats_incr(gt, range); 1250 1251 if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { 1252 err = -EACCES; 1253 goto out; 1254 } 1255 1256 if (xe_svm_range_is_valid(range, tile, ctx.devmem_only, dpagemap)) { 1257 xe_svm_range_valid_fault_count_stats_incr(gt, range); 1258 range_debug(range, "PAGE FAULT - VALID"); 1259 goto out; 1260 } 1261 1262 range_debug(range, "PAGE FAULT"); 1263 1264 if (--migrate_try_count >= 0 && 1265 xe_svm_range_needs_migrate_to_vram(range, vma, dpagemap)) { 1266 ktime_t migrate_start = xe_gt_stats_ktime_get(); 1267 1268 xe_svm_range_migrate_count_stats_incr(gt, range); 1269 err = xe_svm_alloc_vram(range, &ctx, dpagemap); 1270 xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); 1271 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1272 if (err) { 1273 if (migrate_try_count || !ctx.devmem_only) { 1274 drm_dbg(&vm->xe->drm, 1275 "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", 1276 vm->usm.asid, ERR_PTR(err)); 1277 1278 /* 1279 * In the devmem-only case, mixed mappings may 1280 * be found. The get_pages function will fix 1281 * these up to a single location, allowing the 1282 * page fault handler to make forward progress. 1283 */ 1284 if (ctx.devmem_only) 1285 goto get_pages; 1286 else 1287 goto retry; 1288 } else { 1289 drm_err(&vm->xe->drm, 1290 "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", 1291 vm->usm.asid, ERR_PTR(err)); 1292 return err; 1293 } 1294 } 1295 } 1296 1297 get_pages: 1298 get_pages_start = xe_gt_stats_ktime_get(); 1299 1300 range_debug(range, "GET PAGES"); 1301 err = xe_svm_range_get_pages(vm, range, &ctx); 1302 /* Corner where CPU mappings have changed */ 1303 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { 1304 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1305 if (migrate_try_count > 0 || !ctx.devmem_only) { 1306 drm_dbg(&vm->xe->drm, 1307 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", 1308 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 1309 range_debug(range, "PAGE FAULT - RETRY PAGES"); 1310 goto retry; 1311 } else { 1312 drm_err(&vm->xe->drm, 1313 "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", 1314 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 1315 } 1316 } 1317 if (err) { 1318 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); 1319 goto out; 1320 } else if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { 1321 drm_dbg(&vm->xe->drm, "After page collect data location is %sin \"%s\".\n", 1322 xe_svm_range_has_pagemap(range, dpagemap) ? "" : "NOT ", 1323 dpagemap ? dpagemap->drm->unique : "System."); 1324 } 1325 1326 xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); 1327 range_debug(range, "PAGE FAULT - BIND"); 1328 1329 bind_start = xe_gt_stats_ktime_get(); 1330 xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { 1331 err = xe_vm_drm_exec_lock(vm, &exec); 1332 drm_exec_retry_on_contention(&exec); 1333 1334 xe_vm_set_validation_exec(vm, &exec); 1335 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 1336 xe_vm_set_validation_exec(vm, NULL); 1337 if (IS_ERR(fence)) { 1338 drm_exec_retry_on_contention(&exec); 1339 err = PTR_ERR(fence); 1340 xe_validation_retry_on_oom(&vctx, &err); 1341 xe_svm_range_bind_us_stats_incr(gt, range, bind_start); 1342 break; 1343 } 1344 } 1345 if (err) 1346 goto err_out; 1347 1348 dma_fence_wait(fence, false); 1349 dma_fence_put(fence); 1350 xe_svm_range_bind_us_stats_incr(gt, range, bind_start); 1351 1352 out: 1353 xe_svm_range_fault_us_stats_incr(gt, range, start); 1354 return 0; 1355 1356 err_out: 1357 if (err == -EAGAIN) { 1358 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1359 range_debug(range, "PAGE FAULT - RETRY BIND"); 1360 goto retry; 1361 } 1362 1363 return err; 1364 } 1365 1366 /** 1367 * xe_svm_handle_pagefault() - SVM handle page fault 1368 * @vm: The VM. 1369 * @vma: The CPU address mirror VMA. 1370 * @gt: The gt upon the fault occurred. 1371 * @fault_addr: The GPU fault address. 1372 * @atomic: The fault atomic access bit. 1373 * 1374 * Create GPU bindings for a SVM page fault. Optionally migrate to device 1375 * memory. 1376 * 1377 * Return: 0 on success, negative error code on error. 1378 */ 1379 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 1380 struct xe_gt *gt, u64 fault_addr, 1381 bool atomic) 1382 { 1383 int need_vram, ret; 1384 retry: 1385 need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); 1386 if (need_vram < 0) 1387 return need_vram; 1388 1389 ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, 1390 need_vram ? true : false); 1391 if (ret == -EAGAIN) { 1392 /* 1393 * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA 1394 * may have been split by xe_svm_range_set_default_attr. 1395 */ 1396 vma = xe_vm_find_vma_by_addr(vm, fault_addr); 1397 if (!vma) 1398 return -EINVAL; 1399 1400 goto retry; 1401 } 1402 return ret; 1403 } 1404 1405 /** 1406 * xe_svm_has_mapping() - SVM has mappings 1407 * @vm: The VM. 1408 * @start: Start address. 1409 * @end: End address. 1410 * 1411 * Check if an address range has SVM mappings. 1412 * 1413 * Return: True if address range has a SVM mapping, False otherwise 1414 */ 1415 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) 1416 { 1417 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end); 1418 } 1419 1420 /** 1421 * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges 1422 * @vm: The VM 1423 * @start: start addr 1424 * @end: end addr 1425 * 1426 * This function UNMAPS svm ranges if start or end address are inside them. 1427 */ 1428 void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) 1429 { 1430 struct drm_gpusvm_notifier *notifier, *next; 1431 1432 lockdep_assert_held_write(&vm->lock); 1433 1434 drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) { 1435 struct drm_gpusvm_range *range, *__next; 1436 1437 drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) { 1438 if (start > drm_gpusvm_range_start(range) || 1439 end < drm_gpusvm_range_end(range)) { 1440 if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range))) 1441 drm_gpusvm_range_evict(&vm->svm.gpusvm, range); 1442 drm_gpusvm_range_get(range); 1443 __xe_svm_garbage_collector(vm, to_xe_range(range)); 1444 if (!list_empty(&to_xe_range(range)->garbage_collector_link)) { 1445 spin_lock(&vm->svm.garbage_collector.lock); 1446 list_del(&to_xe_range(range)->garbage_collector_link); 1447 spin_unlock(&vm->svm.garbage_collector.lock); 1448 } 1449 drm_gpusvm_range_put(range); 1450 } 1451 } 1452 } 1453 } 1454 1455 /** 1456 * xe_svm_bo_evict() - SVM evict BO to system memory 1457 * @bo: BO to evict 1458 * 1459 * SVM evict BO to system memory. GPU SVM layer ensures all device pages 1460 * are evicted before returning. 1461 * 1462 * Return: 0 on success standard error code otherwise 1463 */ 1464 int xe_svm_bo_evict(struct xe_bo *bo) 1465 { 1466 return drm_pagemap_evict_to_ram(&bo->devmem_allocation); 1467 } 1468 1469 /** 1470 * xe_svm_range_find_or_insert- Find or insert GPU SVM range 1471 * @vm: xe_vm pointer 1472 * @addr: address for which range needs to be found/inserted 1473 * @vma: Pointer to struct xe_vma which mirrors CPU 1474 * @ctx: GPU SVM context 1475 * 1476 * This function finds or inserts a newly allocated a SVM range based on the 1477 * address. 1478 * 1479 * Return: Pointer to the SVM range on success, ERR_PTR() on failure. 1480 */ 1481 struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, 1482 struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) 1483 { 1484 struct drm_gpusvm_range *r; 1485 1486 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), 1487 xe_vma_start(vma), xe_vma_end(vma), ctx); 1488 if (IS_ERR(r)) 1489 return ERR_CAST(r); 1490 1491 return to_xe_range(r); 1492 } 1493 1494 /** 1495 * xe_svm_range_get_pages() - Get pages for a SVM range 1496 * @vm: Pointer to the struct xe_vm 1497 * @range: Pointer to the xe SVM range structure 1498 * @ctx: GPU SVM context 1499 * 1500 * This function gets pages for a SVM range and ensures they are mapped for 1501 * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. 1502 * 1503 * Return: 0 on success, negative error code on failure. 1504 */ 1505 int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, 1506 struct drm_gpusvm_ctx *ctx) 1507 { 1508 int err = 0; 1509 1510 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx); 1511 if (err == -EOPNOTSUPP) { 1512 range_debug(range, "PAGE FAULT - EVICT PAGES"); 1513 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 1514 } 1515 1516 return err; 1517 } 1518 1519 /** 1520 * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range 1521 * @vm: Pointer to the xe_vm structure 1522 * @start: Start of the input range 1523 * @end: End of the input range 1524 * 1525 * This function removes the page table entries (PTEs) associated 1526 * with the svm ranges within the given input start and end 1527 * 1528 * Return: tile_mask for which gt's need to be tlb invalidated. 1529 */ 1530 u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) 1531 { 1532 struct drm_gpusvm_notifier *notifier; 1533 struct xe_svm_range *range; 1534 u64 adj_start, adj_end; 1535 struct xe_tile *tile; 1536 u8 tile_mask = 0; 1537 u8 id; 1538 1539 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 1540 lockdep_is_held_type(&vm->lock, 0)); 1541 1542 drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) { 1543 struct drm_gpusvm_range *r = NULL; 1544 1545 adj_start = max(start, drm_gpusvm_notifier_start(notifier)); 1546 adj_end = min(end, drm_gpusvm_notifier_end(notifier)); 1547 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) { 1548 range = to_xe_range(r); 1549 for_each_tile(tile, vm->xe, id) { 1550 if (xe_pt_zap_ptes_range(tile, vm, range)) { 1551 tile_mask |= BIT(id); 1552 /* 1553 * WRITE_ONCE pairs with READ_ONCE in 1554 * xe_vm_has_valid_gpu_mapping(). 1555 * Must not fail after setting 1556 * tile_invalidated and before 1557 * TLB invalidation. 1558 */ 1559 WRITE_ONCE(range->tile_invalidated, 1560 range->tile_invalidated | BIT(id)); 1561 } 1562 } 1563 } 1564 } 1565 1566 return tile_mask; 1567 } 1568 1569 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 1570 1571 /** 1572 * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA 1573 * @vma: Pointer to the xe_vma structure containing memory attributes 1574 * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping 1575 * 1576 * This function determines the correct DRM pagemap to use for a given VMA. 1577 * It first checks if a valid devmem_fd is provided in the VMA's preferred 1578 * location. If the devmem_fd is negative, it returns NULL, indicating no 1579 * pagemap is available and smem to be used as preferred location. 1580 * If the devmem_fd is equal to the default faulting 1581 * GT identifier, it returns the VRAM pagemap associated with the tile. 1582 * 1583 * Future support for multi-device configurations may use drm_pagemap_from_fd() 1584 * to resolve pagemaps from arbitrary file descriptors. 1585 * 1586 * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable. 1587 */ 1588 struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) 1589 { 1590 struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap; 1591 s32 fd; 1592 1593 if (dpagemap) 1594 return dpagemap; 1595 1596 fd = (s32)vma->attr.preferred_loc.devmem_fd; 1597 1598 if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM) 1599 return NULL; 1600 1601 if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE) 1602 return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL; 1603 1604 return NULL; 1605 } 1606 1607 /** 1608 * xe_svm_alloc_vram()- Allocate device memory pages for range, 1609 * migrating existing data. 1610 * @range: SVM range 1611 * @ctx: DRM GPU SVM context 1612 * @dpagemap: The struct drm_pagemap representing the memory to allocate. 1613 * 1614 * Return: 0 on success, error code on failure. 1615 */ 1616 int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, 1617 struct drm_pagemap *dpagemap) 1618 { 1619 static DECLARE_RWSEM(driver_migrate_lock); 1620 struct xe_vm *vm = range_to_vm(&range->base); 1621 enum drm_gpusvm_scan_result migration_state; 1622 struct xe_device *xe = vm->xe; 1623 int err, retries = 1; 1624 bool write_locked = false; 1625 1626 xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem); 1627 range_debug(range, "ALLOCATE VRAM"); 1628 1629 migration_state = drm_gpusvm_scan_mm(&range->base, 1630 xe_svm_private_page_owner(vm, false), 1631 dpagemap->pagemap); 1632 1633 if (migration_state == DRM_GPUSVM_SCAN_EQUAL) { 1634 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1635 drm_dbg(dpagemap->drm, "Already migrated!\n"); 1636 return 0; 1637 } 1638 1639 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1640 drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n", 1641 dpagemap->drm->unique); 1642 1643 err = down_read_interruptible(&driver_migrate_lock); 1644 if (err) 1645 return err; 1646 do { 1647 err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), 1648 xe_svm_range_end(range), 1649 range->base.gpusvm->mm, 1650 ctx->timeslice_ms); 1651 1652 if (err == -EBUSY && retries) { 1653 if (!write_locked) { 1654 int lock_err; 1655 1656 up_read(&driver_migrate_lock); 1657 lock_err = down_write_killable(&driver_migrate_lock); 1658 if (lock_err) 1659 return lock_err; 1660 write_locked = true; 1661 } 1662 drm_gpusvm_range_evict(range->base.gpusvm, &range->base); 1663 } 1664 } while (err == -EBUSY && retries--); 1665 if (write_locked) 1666 up_write(&driver_migrate_lock); 1667 else 1668 up_read(&driver_migrate_lock); 1669 1670 return err; 1671 } 1672 1673 static struct drm_pagemap_addr 1674 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, 1675 struct device *dev, 1676 struct page *page, 1677 unsigned int order, 1678 enum dma_data_direction dir) 1679 { 1680 struct device *pgmap_dev = dpagemap->drm->dev; 1681 enum drm_interconnect_protocol prot; 1682 dma_addr_t addr; 1683 1684 if (pgmap_dev == dev) { 1685 addr = xe_page_to_dpa(page); 1686 prot = XE_INTERCONNECT_VRAM; 1687 } else { 1688 addr = dma_map_resource(dev, 1689 xe_page_to_pcie(page), 1690 PAGE_SIZE << order, dir, 1691 DMA_ATTR_SKIP_CPU_SYNC); 1692 prot = XE_INTERCONNECT_P2P; 1693 } 1694 1695 return drm_pagemap_addr_encode(addr, prot, order, dir); 1696 } 1697 1698 static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap, 1699 struct device *dev, 1700 const struct drm_pagemap_addr *addr) 1701 { 1702 if (addr->proto != XE_INTERCONNECT_P2P) 1703 return; 1704 1705 dma_unmap_resource(dev, addr->addr, PAGE_SIZE << addr->order, 1706 addr->dir, DMA_ATTR_SKIP_CPU_SYNC); 1707 } 1708 1709 static void xe_pagemap_destroy_work(struct work_struct *work) 1710 { 1711 struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work); 1712 struct dev_pagemap *pagemap = &xpagemap->pagemap; 1713 struct drm_device *drm = xpagemap->dpagemap.drm; 1714 int idx; 1715 1716 /* 1717 * Only unmap / release if devm_ release hasn't run yet. 1718 * Otherwise the devm_ callbacks have already released, or 1719 * will do shortly. 1720 */ 1721 if (drm_dev_enter(drm, &idx)) { 1722 devm_memunmap_pages(drm->dev, pagemap); 1723 devm_release_mem_region(drm->dev, pagemap->range.start, 1724 pagemap->range.end - pagemap->range.start + 1); 1725 drm_dev_exit(idx); 1726 } 1727 1728 drm_pagemap_release_owner(&xpagemap->peer); 1729 kfree(xpagemap); 1730 } 1731 1732 static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim) 1733 { 1734 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 1735 struct xe_device *xe = to_xe_device(dpagemap->drm); 1736 1737 if (from_atomic_or_reclaim) 1738 queue_work(xe->destroy_wq, &xpagemap->destroy_work); 1739 else 1740 xe_pagemap_destroy_work(&xpagemap->destroy_work); 1741 } 1742 1743 static const struct drm_pagemap_ops xe_drm_pagemap_ops = { 1744 .device_map = xe_drm_pagemap_device_map, 1745 .device_unmap = xe_drm_pagemap_device_unmap, 1746 .populate_mm = xe_drm_pagemap_populate_mm, 1747 .destroy = xe_pagemap_destroy, 1748 }; 1749 1750 /** 1751 * xe_pagemap_create() - Create a struct xe_pagemap object 1752 * @xe: The xe device. 1753 * @vr: Back-pointer to the struct xe_vram_region. 1754 * 1755 * Allocate and initialize a struct xe_pagemap. On successful 1756 * return, drm_pagemap_put() on the embedded struct drm_pagemap 1757 * should be used to unreference. 1758 * 1759 * Return: Pointer to a struct xe_pagemap if successful. Error pointer 1760 * on failure. 1761 */ 1762 static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr) 1763 { 1764 struct device *dev = xe->drm.dev; 1765 struct xe_pagemap *xpagemap; 1766 struct dev_pagemap *pagemap; 1767 struct drm_pagemap *dpagemap; 1768 struct resource *res; 1769 void *addr; 1770 int err; 1771 1772 xpagemap = kzalloc_obj(*xpagemap); 1773 if (!xpagemap) 1774 return ERR_PTR(-ENOMEM); 1775 1776 pagemap = &xpagemap->pagemap; 1777 dpagemap = &xpagemap->dpagemap; 1778 INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work); 1779 xpagemap->vr = vr; 1780 xpagemap->peer.private = XE_PEER_PAGEMAP; 1781 1782 err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops); 1783 if (err) 1784 goto out_no_dpagemap; 1785 1786 res = devm_request_free_mem_region(dev, &iomem_resource, 1787 vr->usable_size); 1788 if (IS_ERR(res)) { 1789 err = PTR_ERR(res); 1790 goto out_err; 1791 } 1792 1793 err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list, 1794 xe_has_interconnect); 1795 if (err) 1796 goto out_no_owner; 1797 1798 pagemap->type = MEMORY_DEVICE_PRIVATE; 1799 pagemap->range.start = res->start; 1800 pagemap->range.end = res->end; 1801 pagemap->nr_range = 1; 1802 pagemap->owner = xpagemap->peer.owner; 1803 pagemap->ops = drm_pagemap_pagemap_ops_get(); 1804 addr = devm_memremap_pages(dev, pagemap); 1805 if (IS_ERR(addr)) { 1806 err = PTR_ERR(addr); 1807 goto out_no_pages; 1808 } 1809 xpagemap->hpa_base = res->start; 1810 return xpagemap; 1811 1812 out_no_pages: 1813 drm_pagemap_release_owner(&xpagemap->peer); 1814 out_no_owner: 1815 devm_release_mem_region(dev, res->start, res->end - res->start + 1); 1816 out_err: 1817 drm_pagemap_put(dpagemap); 1818 return ERR_PTR(err); 1819 1820 out_no_dpagemap: 1821 kfree(xpagemap); 1822 return ERR_PTR(err); 1823 } 1824 1825 /** 1826 * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap 1827 * @xe: The xe device. 1828 * @cache: The struct xe_pagemap_cache. 1829 * @vr: The VRAM region. 1830 * 1831 * Check if there is an already used xe_pagemap for this tile, and in that case, 1832 * return it. 1833 * If not, check if there is a cached xe_pagemap for this tile, and in that case, 1834 * cancel its destruction, re-initialize it and return it. 1835 * Finally if there is no cached or already used pagemap, create one and 1836 * register it in the tile's pagemap cache. 1837 * 1838 * Note that this function is typically called from within an IOCTL, and waits are 1839 * therefore carried out interruptible if possible. 1840 * 1841 * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure. 1842 */ 1843 static struct xe_pagemap * 1844 xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache, 1845 struct xe_vram_region *vr) 1846 { 1847 struct drm_pagemap *dpagemap; 1848 struct xe_pagemap *xpagemap; 1849 int err; 1850 1851 err = drm_pagemap_cache_lock_lookup(cache); 1852 if (err) 1853 return ERR_PTR(err); 1854 1855 dpagemap = drm_pagemap_get_from_cache(cache); 1856 if (IS_ERR(dpagemap)) { 1857 xpagemap = ERR_CAST(dpagemap); 1858 } else if (!dpagemap) { 1859 xpagemap = xe_pagemap_create(xe, vr); 1860 if (IS_ERR(xpagemap)) 1861 goto out_unlock; 1862 drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap); 1863 } else { 1864 xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 1865 } 1866 1867 out_unlock: 1868 drm_pagemap_cache_unlock_lookup(cache); 1869 return xpagemap; 1870 } 1871 1872 static int xe_svm_get_pagemaps(struct xe_vm *vm) 1873 { 1874 struct xe_device *xe = vm->xe; 1875 struct xe_pagemap *xpagemap; 1876 struct xe_tile *tile; 1877 int id; 1878 1879 for_each_tile(tile, xe, id) { 1880 struct xe_vram_region *vr; 1881 1882 if (!((BIT(id) << 1) & xe->info.mem_region_mask)) 1883 continue; 1884 1885 vr = xe_tile_to_vr(tile); 1886 xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); 1887 if (IS_ERR(xpagemap)) 1888 break; 1889 vm->svm.pagemaps[id] = xpagemap; 1890 } 1891 1892 if (IS_ERR(xpagemap)) { 1893 xe_svm_put_pagemaps(vm); 1894 return PTR_ERR(xpagemap); 1895 } 1896 1897 return 0; 1898 } 1899 1900 /** 1901 * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker 1902 * @xe: The xe device 1903 * 1904 * Create a drm_pagemap shrinker and register with the xe device. 1905 * 1906 * Return: %0 on success, negative error code on failure. 1907 */ 1908 int xe_pagemap_shrinker_create(struct xe_device *xe) 1909 { 1910 xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm); 1911 return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker); 1912 } 1913 1914 /** 1915 * xe_pagemap_cache_create() - Create a drm_pagemap cache 1916 * @tile: The tile to register the cache with 1917 * 1918 * Create a drm_pagemap cache and register with the tile. 1919 * 1920 * Return: %0 on success, negative error code on failure. 1921 */ 1922 int xe_pagemap_cache_create(struct xe_tile *tile) 1923 { 1924 struct xe_device *xe = tile_to_xe(tile); 1925 1926 if (IS_DGFX(xe)) { 1927 struct drm_pagemap_cache *cache = 1928 drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker); 1929 1930 if (IS_ERR(cache)) 1931 return PTR_ERR(cache); 1932 1933 tile->mem.vram->dpagemap_cache = cache; 1934 } 1935 1936 return 0; 1937 } 1938 1939 static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance) 1940 { 1941 u32 tile_id = region_instance - 1; 1942 struct xe_pagemap *xpagemap; 1943 struct xe_vram_region *vr; 1944 1945 if (tile_id >= xe->info.tile_count) 1946 return ERR_PTR(-ENOENT); 1947 1948 if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask)) 1949 return ERR_PTR(-ENOENT); 1950 1951 vr = xe_tile_to_vr(&xe->tiles[tile_id]); 1952 1953 /* Returns a reference-counted embedded struct drm_pagemap */ 1954 xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); 1955 if (IS_ERR(xpagemap)) 1956 return ERR_CAST(xpagemap); 1957 1958 return &xpagemap->dpagemap; 1959 } 1960 1961 /** 1962 * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a 1963 * (file_descriptor, region_instance) pair. 1964 * @fd: An fd opened against an xe device. 1965 * @region_instance: The region instance representing the device memory 1966 * on the opened xe device. 1967 * 1968 * Opens a struct drm_pagemap pointer on the 1969 * indicated device and region_instance. 1970 * 1971 * Return: A reference-counted struct drm_pagemap pointer on success, 1972 * negative error pointer on failure. 1973 */ 1974 struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) 1975 { 1976 struct drm_pagemap *dpagemap; 1977 struct file *file; 1978 struct drm_file *fpriv; 1979 struct drm_device *drm; 1980 int idx; 1981 1982 if (fd <= 0) 1983 return ERR_PTR(-EINVAL); 1984 1985 file = fget(fd); 1986 if (!file) 1987 return ERR_PTR(-ENOENT); 1988 1989 if (!xe_is_xe_file(file)) { 1990 dpagemap = ERR_PTR(-ENOENT); 1991 goto out; 1992 } 1993 1994 fpriv = file->private_data; 1995 drm = fpriv->minor->dev; 1996 if (!drm_dev_enter(drm, &idx)) { 1997 dpagemap = ERR_PTR(-ENODEV); 1998 goto out; 1999 } 2000 2001 dpagemap = xe_devmem_open(to_xe_device(drm), region_instance); 2002 drm_dev_exit(idx); 2003 out: 2004 fput(file); 2005 return dpagemap; 2006 } 2007 2008 #else 2009 2010 int xe_pagemap_shrinker_create(struct xe_device *xe) 2011 { 2012 return 0; 2013 } 2014 2015 int xe_pagemap_cache_create(struct xe_tile *tile) 2016 { 2017 return 0; 2018 } 2019 2020 int xe_svm_alloc_vram(struct xe_svm_range *range, 2021 const struct drm_gpusvm_ctx *ctx, 2022 struct drm_pagemap *dpagemap) 2023 { 2024 return -EOPNOTSUPP; 2025 } 2026 2027 struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) 2028 { 2029 return NULL; 2030 } 2031 2032 struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) 2033 { 2034 return ERR_PTR(-ENOENT); 2035 } 2036 2037 #endif 2038 2039 /** 2040 * xe_svm_flush() - SVM flush 2041 * @vm: The VM. 2042 * 2043 * Flush all SVM actions. 2044 */ 2045 void xe_svm_flush(struct xe_vm *vm) 2046 { 2047 if (xe_vm_in_fault_mode(vm)) 2048 flush_work(&vm->svm.garbage_collector.work); 2049 } 2050