1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <linux/pci-p2pdma.h> 7 8 #include <drm/drm_drv.h> 9 #include <drm/drm_managed.h> 10 #include <drm/drm_pagemap.h> 11 #include <drm/drm_pagemap_util.h> 12 13 #include "xe_bo.h" 14 #include "xe_exec_queue_types.h" 15 #include "xe_gt_stats.h" 16 #include "xe_migrate.h" 17 #include "xe_module.h" 18 #include "xe_pm.h" 19 #include "xe_pt.h" 20 #include "xe_svm.h" 21 #include "xe_tile.h" 22 #include "xe_tlb_inval.h" 23 #include "xe_ttm_vram_mgr.h" 24 #include "xe_vm.h" 25 #include "xe_vm_types.h" 26 #include "xe_vram_types.h" 27 28 /* Identifies subclasses of struct drm_pagemap_peer */ 29 #define XE_PEER_PAGEMAP ((void *)0ul) 30 #define XE_PEER_VM ((void *)1ul) 31 32 /** 33 * DOC: drm_pagemap reference-counting in xe: 34 * 35 * In addition to the drm_pagemap internal reference counting by its zone 36 * device data, the xe driver holds the following long-time references: 37 * 38 * - struct xe_pagemap: 39 * The xe_pagemap struct derives from struct drm_pagemap and uses its 40 * reference count. 41 * - SVM-enabled VMs: 42 * SVM-enabled VMs look up and keeps a reference to all xe_pagemaps on 43 * the same device. 44 * - VMAs: 45 * vmas keep a reference on the drm_pagemap indicated by a gpu_madvise() 46 * call. 47 * 48 * In addition, all drm_pagemap or xe_pagemap pointers where lifetime cannot 49 * be guaranteed by a vma reference under the vm lock should keep a reference. 50 * That includes the range->pages.dpagemap pointer. 51 */ 52 53 static int xe_svm_get_pagemaps(struct xe_vm *vm); 54 55 void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem) 56 { 57 return force_smem ? NULL : vm->svm.peer.owner; 58 } 59 60 static bool xe_svm_range_in_vram(struct xe_svm_range *range) 61 { 62 /* 63 * Advisory only check whether the range is currently backed by VRAM 64 * memory. 65 */ 66 67 struct drm_gpusvm_pages_flags flags = { 68 /* Pairs with WRITE_ONCE in drm_gpusvm.c */ 69 .__flags = READ_ONCE(range->base.pages.flags.__flags), 70 }; 71 72 return flags.has_devmem_pages; 73 } 74 75 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) 76 { 77 /* Not reliable without notifier lock */ 78 return xe_svm_range_in_vram(range) && range->tile_present; 79 } 80 81 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm) 82 { 83 return container_of(gpusvm, struct xe_vm, svm.gpusvm); 84 } 85 86 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r) 87 { 88 return gpusvm_to_vm(r->gpusvm); 89 } 90 91 #define range_debug(r__, operation__) \ 92 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \ 93 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \ 94 "start=0x%014lx, end=0x%014lx, size=%lu", \ 95 (operation__), range_to_vm(&(r__)->base)->usm.asid, \ 96 (r__)->base.gpusvm, \ 97 xe_svm_range_in_vram((r__)) ? 1 : 0, \ 98 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \ 99 (r__)->base.pages.notifier_seq, \ 100 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \ 101 xe_svm_range_size((r__))) 102 103 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) 104 { 105 range_debug(range, operation); 106 } 107 108 static struct drm_gpusvm_range * 109 xe_svm_range_alloc(struct drm_gpusvm *gpusvm) 110 { 111 struct xe_svm_range *range; 112 113 range = kzalloc_obj(*range); 114 if (!range) 115 return NULL; 116 117 INIT_LIST_HEAD(&range->garbage_collector_link); 118 xe_vm_get(gpusvm_to_vm(gpusvm)); 119 120 return &range->base; 121 } 122 123 static void xe_svm_range_free(struct drm_gpusvm_range *range) 124 { 125 xe_vm_put(range_to_vm(range)); 126 kfree(range); 127 } 128 129 static void 130 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range, 131 const struct mmu_notifier_range *mmu_range) 132 { 133 struct xe_device *xe = vm->xe; 134 135 range_debug(range, "GARBAGE COLLECTOR ADD"); 136 137 drm_gpusvm_range_set_unmapped(&range->base, mmu_range); 138 139 spin_lock(&vm->svm.garbage_collector.lock); 140 if (list_empty(&range->garbage_collector_link)) 141 list_add_tail(&range->garbage_collector_link, 142 &vm->svm.garbage_collector.range_list); 143 spin_unlock(&vm->svm.garbage_collector.lock); 144 145 queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work); 146 } 147 148 static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt) 149 { 150 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1); 151 } 152 153 static u8 154 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r, 155 const struct mmu_notifier_range *mmu_range, 156 u64 *adj_start, u64 *adj_end) 157 { 158 struct xe_svm_range *range = to_xe_range(r); 159 struct xe_device *xe = vm->xe; 160 struct xe_tile *tile; 161 u8 tile_mask = 0; 162 u8 id; 163 164 xe_svm_assert_in_notifier(vm); 165 166 range_debug(range, "NOTIFIER"); 167 168 /* Skip if already unmapped or if no binding exist */ 169 if (range->base.pages.flags.unmapped || !range->tile_present) 170 return 0; 171 172 range_debug(range, "NOTIFIER - EXECUTE"); 173 174 /* Adjust invalidation to range boundaries */ 175 *adj_start = min(xe_svm_range_start(range), mmu_range->start); 176 *adj_end = max(xe_svm_range_end(range), mmu_range->end); 177 178 /* 179 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the 180 * invalidation code can't correctly cope with sparse ranges or 181 * invalidations spanning multiple ranges. 182 */ 183 for_each_tile(tile, xe, id) 184 if (xe_pt_zap_ptes_range(tile, vm, range)) { 185 /* 186 * WRITE_ONCE pairs with READ_ONCE in 187 * xe_vm_has_valid_gpu_mapping() 188 */ 189 WRITE_ONCE(range->tile_invalidated, 190 range->tile_invalidated | BIT(id)); 191 192 if (!(tile_mask & BIT(id))) { 193 xe_svm_tlb_inval_count_stats_incr(tile->primary_gt); 194 if (tile->media_gt) 195 xe_svm_tlb_inval_count_stats_incr(tile->media_gt); 196 tile_mask |= BIT(id); 197 } 198 } 199 200 return tile_mask; 201 } 202 203 static void 204 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r, 205 const struct mmu_notifier_range *mmu_range) 206 { 207 struct drm_gpusvm_ctx ctx = { .in_notifier = true, }; 208 209 xe_svm_assert_in_notifier(vm); 210 211 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx); 212 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP) 213 xe_svm_garbage_collector_add_range(vm, to_xe_range(r), 214 mmu_range); 215 } 216 217 static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start) 218 { 219 s64 us_delta = xe_gt_stats_ktime_us_delta(start); 220 221 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta); 222 } 223 224 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm, 225 struct drm_gpusvm_notifier *notifier, 226 const struct mmu_notifier_range *mmu_range) 227 { 228 struct xe_vm *vm = gpusvm_to_vm(gpusvm); 229 struct xe_tlb_inval_batch batch; 230 struct xe_device *xe = vm->xe; 231 struct drm_gpusvm_range *r, *first; 232 struct xe_tile *tile; 233 ktime_t start = xe_gt_stats_ktime_get(); 234 u64 adj_start = mmu_range->start, adj_end = mmu_range->end; 235 u8 tile_mask = 0, id; 236 long err; 237 238 xe_svm_assert_in_notifier(vm); 239 240 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm, 241 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d", 242 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq, 243 mmu_range->start, mmu_range->end, mmu_range->event); 244 245 /* Adjust invalidation to notifier boundaries */ 246 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start); 247 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end); 248 249 first = drm_gpusvm_range_find(notifier, adj_start, adj_end); 250 if (!first) 251 return; 252 253 /* 254 * PTs may be getting destroyed so not safe to touch these but PT should 255 * be invalidated at this point in time. Regardless we still need to 256 * ensure any dma mappings are unmapped in the here. 257 */ 258 if (xe_vm_is_closed(vm)) 259 goto range_notifier_event_end; 260 261 /* 262 * XXX: Less than ideal to always wait on VM's resv slots if an 263 * invalidation is not required. Could walk range list twice to figure 264 * out if an invalidations is need, but also not ideal. 265 */ 266 err = dma_resv_wait_timeout(xe_vm_resv(vm), 267 DMA_RESV_USAGE_BOOKKEEP, 268 false, MAX_SCHEDULE_TIMEOUT); 269 XE_WARN_ON(err <= 0); 270 271 r = first; 272 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 273 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range, 274 &adj_start, 275 &adj_end); 276 if (!tile_mask) 277 goto range_notifier_event_end; 278 279 xe_device_wmb(xe); 280 281 err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end, 282 tile_mask, &batch); 283 if (!WARN_ON_ONCE(err)) 284 xe_tlb_inval_batch_wait(&batch); 285 286 range_notifier_event_end: 287 r = first; 288 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) 289 xe_svm_range_notifier_event_end(vm, r, mmu_range); 290 for_each_tile(tile, xe, id) { 291 if (tile_mask & BIT(id)) { 292 xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start); 293 if (tile->media_gt) 294 xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start); 295 } 296 } 297 } 298 299 static int __xe_svm_garbage_collector(struct xe_vm *vm, 300 struct xe_svm_range *range) 301 { 302 struct dma_fence *fence; 303 304 range_debug(range, "GARBAGE COLLECTOR"); 305 306 xe_vm_lock(vm, false); 307 fence = xe_vm_range_unbind(vm, range); 308 xe_vm_unlock(vm); 309 if (IS_ERR(fence)) 310 return PTR_ERR(fence); 311 dma_fence_put(fence); 312 313 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base); 314 315 return 0; 316 } 317 318 static void xe_vma_set_default_attributes(struct xe_vma *vma) 319 { 320 struct xe_vma_mem_attr default_attr = { 321 .preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE, 322 .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES, 323 .pat_index = vma->attr.default_pat_index, 324 .atomic_access = DRM_XE_ATOMIC_UNDEFINED, 325 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED, 326 }; 327 328 xe_vma_mem_attr_copy(&vma->attr, &default_attr); 329 } 330 331 static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end) 332 { 333 struct xe_vma *vma; 334 bool has_default_attr; 335 int err; 336 337 vma = xe_vm_find_vma_by_addr(vm, start); 338 if (!vma) 339 return -EINVAL; 340 341 if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { 342 drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); 343 return 0; 344 } 345 346 vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx", 347 xe_vma_start(vma), xe_vma_end(vma)); 348 349 has_default_attr = xe_vma_has_default_mem_attrs(vma); 350 351 if (has_default_attr) { 352 start = xe_vma_start(vma); 353 end = xe_vma_end(vma); 354 } else if (xe_vma_start(vma) == start && xe_vma_end(vma) == end) { 355 xe_vma_set_default_attributes(vma); 356 } 357 358 xe_vm_find_cpu_addr_mirror_vma_range(vm, &start, &end); 359 360 if (xe_vma_start(vma) == start && xe_vma_end(vma) == end && has_default_attr) 361 return 0; 362 363 vm_dbg(&vm->xe->drm, "New VMA start=0x%016llx, vma_end=0x%016llx", start, end); 364 365 err = xe_vm_alloc_cpu_addr_mirror_vma(vm, start, end - start); 366 if (err) { 367 drm_warn(&vm->xe->drm, "New VMA MAP failed: %pe\n", ERR_PTR(err)); 368 xe_vm_kill(vm, true); 369 return err; 370 } 371 372 /* 373 * On call from xe_svm_handle_pagefault original VMA might be changed 374 * signal this to lookup for VMA again. 375 */ 376 return -EAGAIN; 377 } 378 379 static int xe_svm_garbage_collector(struct xe_vm *vm) 380 { 381 struct xe_svm_range *range; 382 u64 range_start; 383 u64 range_end; 384 int err, ret = 0; 385 386 lockdep_assert_held_write(&vm->lock); 387 388 if (xe_vm_is_closed_or_banned(vm)) 389 return -ENOENT; 390 391 for (;;) { 392 spin_lock(&vm->svm.garbage_collector.lock); 393 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list, 394 typeof(*range), 395 garbage_collector_link); 396 if (!range) 397 break; 398 399 range_start = xe_svm_range_start(range); 400 range_end = xe_svm_range_end(range); 401 402 list_del(&range->garbage_collector_link); 403 spin_unlock(&vm->svm.garbage_collector.lock); 404 405 err = __xe_svm_garbage_collector(vm, range); 406 if (err) { 407 drm_warn(&vm->xe->drm, 408 "Garbage collection failed: %pe\n", 409 ERR_PTR(err)); 410 xe_vm_kill(vm, true); 411 return err; 412 } 413 414 err = xe_svm_range_set_default_attr(vm, range_start, range_end); 415 if (err) { 416 if (err == -EAGAIN) 417 ret = -EAGAIN; 418 else 419 return err; 420 } 421 } 422 spin_unlock(&vm->svm.garbage_collector.lock); 423 424 return ret; 425 } 426 427 static void xe_svm_garbage_collector_work_func(struct work_struct *w) 428 { 429 struct xe_vm *vm = container_of(w, struct xe_vm, 430 svm.garbage_collector.work); 431 432 down_write(&vm->lock); 433 xe_svm_garbage_collector(vm); 434 up_write(&vm->lock); 435 } 436 437 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 438 439 static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap) 440 { 441 return xpagemap->vr; 442 } 443 444 static struct xe_pagemap *xe_page_to_pagemap(struct page *page) 445 { 446 return container_of(page_pgmap(page), struct xe_pagemap, pagemap); 447 } 448 449 static struct xe_vram_region *xe_page_to_vr(struct page *page) 450 { 451 return xe_pagemap_to_vr(xe_page_to_pagemap(page)); 452 } 453 454 static u64 xe_page_to_dpa(struct page *page) 455 { 456 struct xe_pagemap *xpagemap = xe_page_to_pagemap(page); 457 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); 458 u64 hpa_base = xpagemap->hpa_base; 459 u64 pfn = page_to_pfn(page); 460 u64 offset; 461 u64 dpa; 462 463 xe_assert(vr->xe, is_device_private_page(page)); 464 xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base); 465 466 offset = (pfn << PAGE_SHIFT) - hpa_base; 467 dpa = vr->dpa_base + offset; 468 469 return dpa; 470 } 471 472 static u64 xe_page_to_pcie(struct page *page) 473 { 474 struct xe_pagemap *xpagemap = xe_page_to_pagemap(page); 475 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); 476 477 return xe_page_to_dpa(page) - vr->dpa_base + vr->io_start; 478 } 479 480 enum xe_svm_copy_dir { 481 XE_SVM_COPY_TO_VRAM, 482 XE_SVM_COPY_TO_SRAM, 483 }; 484 485 static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt, 486 const enum xe_svm_copy_dir dir, 487 int kb) 488 { 489 if (dir == XE_SVM_COPY_TO_VRAM) { 490 switch (kb) { 491 case 4: 492 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB, kb); 493 break; 494 case 64: 495 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB, kb); 496 break; 497 case 2048: 498 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB, kb); 499 break; 500 } 501 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb); 502 } else { 503 switch (kb) { 504 case 4: 505 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB, kb); 506 break; 507 case 64: 508 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB, kb); 509 break; 510 case 2048: 511 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB, kb); 512 break; 513 } 514 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb); 515 } 516 } 517 518 static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, 519 const enum xe_svm_copy_dir dir, 520 unsigned long npages, 521 ktime_t start) 522 { 523 s64 us_delta = xe_gt_stats_ktime_us_delta(start); 524 525 if (dir == XE_SVM_COPY_TO_VRAM) { 526 switch (npages) { 527 case 1: 528 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US, 529 us_delta); 530 break; 531 case 16: 532 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US, 533 us_delta); 534 break; 535 case 512: 536 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US, 537 us_delta); 538 break; 539 } 540 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US, 541 us_delta); 542 } else { 543 switch (npages) { 544 case 1: 545 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US, 546 us_delta); 547 break; 548 case 16: 549 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US, 550 us_delta); 551 break; 552 case 512: 553 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US, 554 us_delta); 555 break; 556 } 557 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US, 558 us_delta); 559 } 560 } 561 562 static int xe_svm_copy(struct page **pages, 563 struct drm_pagemap_addr *pagemap_addr, 564 unsigned long npages, const enum xe_svm_copy_dir dir, 565 struct dma_fence *pre_migrate_fence) 566 { 567 struct xe_vram_region *vr = NULL; 568 struct xe_gt *gt = NULL; 569 struct xe_device *xe; 570 struct dma_fence *fence = NULL; 571 unsigned long i; 572 #define XE_VRAM_ADDR_INVALID ~0x0ull 573 u64 vram_addr = XE_VRAM_ADDR_INVALID; 574 int err = 0, pos = 0; 575 bool sram = dir == XE_SVM_COPY_TO_SRAM; 576 ktime_t start = xe_gt_stats_ktime_get(); 577 578 /* 579 * This flow is complex: it locates physically contiguous device pages, 580 * derives the starting physical address, and performs a single GPU copy 581 * to for every 8M chunk in a DMA address array. Both device pages and 582 * DMA addresses may be sparsely populated. If either is NULL, a copy is 583 * triggered based on the current search state. The last GPU copy is 584 * waited on to ensure all copies are complete. 585 */ 586 587 for (i = 0; i < npages; ++i) { 588 struct page *spage = pages[i]; 589 struct dma_fence *__fence; 590 u64 __vram_addr; 591 bool match = false, chunk, last; 592 593 #define XE_MIGRATE_CHUNK_SIZE SZ_8M 594 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 595 last = (i + 1) == npages; 596 597 /* No CPU page and no device pages queue'd to copy */ 598 if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID) 599 continue; 600 601 if (!vr && spage) { 602 vr = xe_page_to_vr(spage); 603 gt = xe_migrate_exec_queue(vr->migrate)->gt; 604 xe = vr->xe; 605 } 606 XE_WARN_ON(spage && xe_page_to_vr(spage) != vr); 607 608 /* 609 * CPU page and device page valid, capture physical address on 610 * first device page, check if physical contiguous on subsequent 611 * device pages. 612 */ 613 if (pagemap_addr[i].addr && spage) { 614 __vram_addr = xe_page_to_dpa(spage); 615 if (vram_addr == XE_VRAM_ADDR_INVALID) { 616 vram_addr = __vram_addr; 617 pos = i; 618 } 619 620 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr; 621 /* Expected with contiguous memory */ 622 xe_assert(vr->xe, match); 623 624 if (pagemap_addr[i].order) { 625 i += NR_PAGES(pagemap_addr[i].order) - 1; 626 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE); 627 last = (i + 1) == npages; 628 } 629 } 630 631 /* 632 * Mismatched physical address, 8M copy chunk, or last page - 633 * trigger a copy. 634 */ 635 if (!match || chunk || last) { 636 /* 637 * Extra page for first copy if last page and matching 638 * physical address. 639 */ 640 int incr = (match && last) ? 1 : 0; 641 642 if (vram_addr != XE_VRAM_ADDR_INVALID) { 643 xe_svm_copy_kb_stats_incr(gt, dir, 644 (i - pos + incr) * 645 (PAGE_SIZE / SZ_1K)); 646 if (sram) { 647 vm_dbg(&xe->drm, 648 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 649 vram_addr, 650 (u64)pagemap_addr[pos].addr, i - pos + incr); 651 __fence = xe_migrate_from_vram(vr->migrate, 652 i - pos + incr, 653 vram_addr, 654 &pagemap_addr[pos], 655 pre_migrate_fence); 656 } else { 657 vm_dbg(&xe->drm, 658 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld", 659 (u64)pagemap_addr[pos].addr, vram_addr, 660 i - pos + incr); 661 __fence = xe_migrate_to_vram(vr->migrate, 662 i - pos + incr, 663 &pagemap_addr[pos], 664 vram_addr, 665 pre_migrate_fence); 666 } 667 if (IS_ERR(__fence)) { 668 err = PTR_ERR(__fence); 669 goto err_out; 670 } 671 pre_migrate_fence = NULL; 672 dma_fence_put(fence); 673 fence = __fence; 674 } 675 676 /* Setup physical address of next device page */ 677 if (pagemap_addr[i].addr && spage) { 678 vram_addr = __vram_addr; 679 pos = i; 680 } else { 681 vram_addr = XE_VRAM_ADDR_INVALID; 682 } 683 684 /* Extra mismatched device page, copy it */ 685 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) { 686 xe_svm_copy_kb_stats_incr(gt, dir, 687 (PAGE_SIZE / SZ_1K)); 688 if (sram) { 689 vm_dbg(&xe->drm, 690 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 691 vram_addr, (u64)pagemap_addr[pos].addr, 1); 692 __fence = xe_migrate_from_vram(vr->migrate, 1, 693 vram_addr, 694 &pagemap_addr[pos], 695 pre_migrate_fence); 696 } else { 697 vm_dbg(&xe->drm, 698 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d", 699 (u64)pagemap_addr[pos].addr, vram_addr, 1); 700 __fence = xe_migrate_to_vram(vr->migrate, 1, 701 &pagemap_addr[pos], 702 vram_addr, 703 pre_migrate_fence); 704 } 705 if (IS_ERR(__fence)) { 706 err = PTR_ERR(__fence); 707 goto err_out; 708 } 709 pre_migrate_fence = NULL; 710 dma_fence_put(fence); 711 fence = __fence; 712 } 713 } 714 } 715 716 err_out: 717 /* Wait for all copies to complete */ 718 if (fence) { 719 dma_fence_wait(fence, false); 720 dma_fence_put(fence); 721 } 722 if (pre_migrate_fence) 723 dma_fence_wait(pre_migrate_fence, false); 724 725 /* 726 * XXX: We can't derive the GT here (or anywhere in this functions, but 727 * compute always uses the primary GT so accumulate stats on the likely 728 * GT of the fault. 729 */ 730 if (gt) 731 xe_svm_copy_us_stats_incr(gt, dir, npages, start); 732 733 return err; 734 #undef XE_MIGRATE_CHUNK_SIZE 735 #undef XE_VRAM_ADDR_INVALID 736 } 737 738 static int xe_svm_copy_to_devmem(struct page **pages, 739 struct drm_pagemap_addr *pagemap_addr, 740 unsigned long npages, 741 struct dma_fence *pre_migrate_fence) 742 { 743 return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM, 744 pre_migrate_fence); 745 } 746 747 static int xe_svm_copy_to_ram(struct page **pages, 748 struct drm_pagemap_addr *pagemap_addr, 749 unsigned long npages, 750 struct dma_fence *pre_migrate_fence) 751 { 752 return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM, 753 pre_migrate_fence); 754 } 755 756 static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) 757 { 758 return container_of(devmem_allocation, struct xe_bo, devmem_allocation); 759 } 760 761 static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) 762 { 763 struct xe_bo *bo = to_xe_bo(devmem_allocation); 764 struct xe_device *xe = xe_bo_device(bo); 765 766 dma_fence_put(devmem_allocation->pre_migrate_fence); 767 xe_bo_put_async(bo); 768 xe_pm_runtime_put(xe); 769 } 770 771 static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset) 772 { 773 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 774 775 return PHYS_PFN(offset + xpagemap->hpa_base); 776 } 777 778 static struct gpu_buddy *vram_to_buddy(struct xe_vram_region *vram) 779 { 780 return &vram->ttm.mm; 781 } 782 783 static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, 784 unsigned long npages, unsigned long *pfn) 785 { 786 struct xe_bo *bo = to_xe_bo(devmem_allocation); 787 struct ttm_resource *res = bo->ttm.resource; 788 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks; 789 struct gpu_buddy_block *block; 790 int j = 0; 791 792 list_for_each_entry(block, blocks, link) { 793 struct xe_vram_region *vr = block->private; 794 struct gpu_buddy *buddy = vram_to_buddy(vr); 795 u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap, 796 gpu_buddy_block_offset(block)); 797 int i; 798 799 for (i = 0; i < gpu_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i) 800 pfn[j++] = block_pfn + i; 801 } 802 803 return 0; 804 } 805 806 static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { 807 .devmem_release = xe_svm_devmem_release, 808 .populate_devmem_pfn = xe_svm_populate_devmem_pfn, 809 .copy_to_devmem = xe_svm_copy_to_devmem, 810 .copy_to_ram = xe_svm_copy_to_ram, 811 }; 812 813 #else 814 static int xe_svm_get_pagemaps(struct xe_vm *vm) 815 { 816 return 0; 817 } 818 #endif 819 820 static const struct drm_gpusvm_ops gpusvm_ops = { 821 .range_alloc = xe_svm_range_alloc, 822 .range_free = xe_svm_range_free, 823 .invalidate = xe_svm_invalidate, 824 }; 825 826 static const unsigned long fault_chunk_sizes[] = { 827 SZ_2M, 828 SZ_64K, 829 SZ_4K, 830 }; 831 832 static void xe_pagemap_put(struct xe_pagemap *xpagemap) 833 { 834 drm_pagemap_put(&xpagemap->dpagemap); 835 } 836 837 static void xe_svm_put_pagemaps(struct xe_vm *vm) 838 { 839 struct xe_device *xe = vm->xe; 840 struct xe_tile *tile; 841 int id; 842 843 for_each_tile(tile, xe, id) { 844 struct xe_pagemap *xpagemap = vm->svm.pagemaps[id]; 845 846 if (xpagemap) 847 xe_pagemap_put(xpagemap); 848 vm->svm.pagemaps[id] = NULL; 849 } 850 } 851 852 static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer) 853 { 854 if (peer->private == XE_PEER_PAGEMAP) 855 return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev; 856 857 return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev; 858 } 859 860 static bool xe_has_interconnect(struct drm_pagemap_peer *peer1, 861 struct drm_pagemap_peer *peer2) 862 { 863 struct device *dev1 = xe_peer_to_dev(peer1); 864 struct device *dev2 = xe_peer_to_dev(peer2); 865 866 if (dev1 == dev2) 867 return true; 868 869 return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0; 870 } 871 872 static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list); 873 874 /** 875 * xe_svm_init() - SVM initialize 876 * @vm: The VM. 877 * 878 * Initialize SVM state which is embedded within the VM. 879 * 880 * Return: 0 on success, negative error code on error. 881 */ 882 int xe_svm_init(struct xe_vm *vm) 883 { 884 int err; 885 886 if (vm->flags & XE_VM_FLAG_FAULT_MODE) { 887 spin_lock_init(&vm->svm.garbage_collector.lock); 888 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list); 889 INIT_WORK(&vm->svm.garbage_collector.work, 890 xe_svm_garbage_collector_work_func); 891 892 vm->svm.peer.private = XE_PEER_VM; 893 err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list, 894 xe_has_interconnect); 895 if (err) 896 return err; 897 898 err = xe_svm_get_pagemaps(vm); 899 if (err) { 900 drm_pagemap_release_owner(&vm->svm.peer); 901 return err; 902 } 903 904 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm, 905 current->mm, 0, vm->size, 906 xe_modparam.svm_notifier_size * SZ_1M, 907 &gpusvm_ops, fault_chunk_sizes, 908 ARRAY_SIZE(fault_chunk_sizes)); 909 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock); 910 911 if (err) { 912 xe_svm_put_pagemaps(vm); 913 drm_pagemap_release_owner(&vm->svm.peer); 914 return err; 915 } 916 } else { 917 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", 918 &vm->xe->drm, NULL, 0, 0, 0, NULL, 919 NULL, 0); 920 } 921 922 return err; 923 } 924 925 /** 926 * xe_svm_close() - SVM close 927 * @vm: The VM. 928 * 929 * Close SVM state (i.e., stop and flush all SVM actions). 930 */ 931 void xe_svm_close(struct xe_vm *vm) 932 { 933 xe_assert(vm->xe, xe_vm_is_closed(vm)); 934 disable_work_sync(&vm->svm.garbage_collector.work); 935 xe_svm_put_pagemaps(vm); 936 drm_pagemap_release_owner(&vm->svm.peer); 937 } 938 939 /** 940 * xe_svm_fini() - SVM finalize 941 * @vm: The VM. 942 * 943 * Finalize SVM state which is embedded within the VM. 944 */ 945 void xe_svm_fini(struct xe_vm *vm) 946 { 947 xe_assert(vm->xe, xe_vm_is_closed(vm)); 948 949 drm_gpusvm_fini(&vm->svm.gpusvm); 950 } 951 952 static bool xe_svm_range_has_pagemap_locked(const struct xe_svm_range *range, 953 const struct drm_pagemap *dpagemap) 954 { 955 return range->base.pages.dpagemap == dpagemap; 956 } 957 958 static bool xe_svm_range_has_pagemap(struct xe_svm_range *range, 959 const struct drm_pagemap *dpagemap) 960 { 961 struct xe_vm *vm = range_to_vm(&range->base); 962 bool ret; 963 964 xe_svm_notifier_lock(vm); 965 ret = xe_svm_range_has_pagemap_locked(range, dpagemap); 966 xe_svm_notifier_unlock(vm); 967 968 return ret; 969 } 970 971 static bool xe_svm_range_is_valid(struct xe_svm_range *range, 972 struct xe_tile *tile, 973 bool devmem_only, 974 const struct drm_pagemap *dpagemap) 975 976 { 977 return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present, 978 range->tile_invalidated) && 979 (!devmem_only || xe_svm_range_has_pagemap(range, dpagemap))); 980 } 981 982 /** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM 983 * @vm: xe_vm pointer 984 * @range: Pointer to the SVM range structure 985 * 986 * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM 987 * and migrates them to SMEM 988 */ 989 void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range) 990 { 991 if (xe_svm_range_in_vram(range)) 992 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 993 } 994 995 /** 996 * xe_svm_range_validate() - Check if the SVM range is valid 997 * @vm: xe_vm pointer 998 * @range: Pointer to the SVM range structure 999 * @tile_mask: Mask representing the tiles to be checked 1000 * @dpagemap: if !%NULL, the range is expected to be present 1001 * in device memory identified by this parameter. 1002 * 1003 * The xe_svm_range_validate() function checks if a range is 1004 * valid and located in the desired memory region. 1005 * 1006 * Return: true if the range is valid, false otherwise 1007 */ 1008 bool xe_svm_range_validate(struct xe_vm *vm, 1009 struct xe_svm_range *range, 1010 u8 tile_mask, const struct drm_pagemap *dpagemap) 1011 { 1012 bool ret; 1013 1014 xe_svm_notifier_lock(vm); 1015 1016 ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask; 1017 if (dpagemap) 1018 ret = ret && xe_svm_range_has_pagemap_locked(range, dpagemap); 1019 else 1020 ret = ret && !range->base.pages.dpagemap; 1021 1022 xe_svm_notifier_unlock(vm); 1023 1024 return ret; 1025 } 1026 1027 /** 1028 * xe_svm_find_vma_start - Find start of CPU VMA 1029 * @vm: xe_vm pointer 1030 * @start: start address 1031 * @end: end address 1032 * @vma: Pointer to struct xe_vma 1033 * 1034 * 1035 * This function searches for a cpu vma, within the specified 1036 * range [start, end] in the given VM. It adjusts the range based on the 1037 * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX. 1038 * 1039 * Return: The starting address of the VMA within the range, 1040 * or ULONG_MAX if no VMA is found 1041 */ 1042 u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma) 1043 { 1044 return drm_gpusvm_find_vma_start(&vm->svm.gpusvm, 1045 max(start, xe_vma_start(vma)), 1046 min(end, xe_vma_end(vma))); 1047 } 1048 1049 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 1050 static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, 1051 unsigned long start, unsigned long end, 1052 struct mm_struct *mm, 1053 unsigned long timeslice_ms) 1054 { 1055 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 1056 struct drm_pagemap_migrate_details mdetails = { 1057 .timeslice_ms = timeslice_ms, 1058 .source_peer_migrates = 1, 1059 }; 1060 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap); 1061 struct dma_fence *pre_migrate_fence = NULL; 1062 struct xe_device *xe = vr->xe; 1063 struct device *dev = xe->drm.dev; 1064 struct gpu_buddy_block *block; 1065 struct xe_validation_ctx vctx; 1066 struct list_head *blocks; 1067 struct drm_exec exec; 1068 struct xe_bo *bo; 1069 int err = 0, idx; 1070 1071 if (!drm_dev_enter(&xe->drm, &idx)) 1072 return -ENODEV; 1073 1074 xe_pm_runtime_get(xe); 1075 1076 xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) { 1077 bo = xe_bo_create_locked(xe, NULL, NULL, end - start, 1078 ttm_bo_type_device, 1079 (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) | 1080 XE_BO_FLAG_CPU_ADDR_MIRROR, &exec); 1081 drm_exec_retry_on_contention(&exec); 1082 if (IS_ERR(bo)) { 1083 err = PTR_ERR(bo); 1084 xe_validation_retry_on_oom(&vctx, &err); 1085 break; 1086 } 1087 1088 /* Ensure that any clearing or async eviction will complete before migration. */ 1089 if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) { 1090 err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, 1091 &pre_migrate_fence); 1092 if (err) 1093 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, 1094 false, MAX_SCHEDULE_TIMEOUT); 1095 else if (pre_migrate_fence) 1096 dma_fence_enable_sw_signaling(pre_migrate_fence); 1097 } 1098 1099 drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, 1100 &dpagemap_devmem_ops, dpagemap, end - start, 1101 pre_migrate_fence); 1102 1103 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; 1104 list_for_each_entry(block, blocks, link) 1105 block->private = vr; 1106 1107 xe_bo_get(bo); 1108 1109 /* Ensure the device has a pm ref while there are device pages active. */ 1110 xe_pm_runtime_get_noresume(xe); 1111 /* Consumes the devmem allocation ref. */ 1112 err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, 1113 start, end, &mdetails); 1114 xe_bo_unlock(bo); 1115 xe_bo_put(bo); 1116 } 1117 xe_pm_runtime_put(xe); 1118 drm_dev_exit(idx); 1119 1120 return err; 1121 } 1122 #endif 1123 1124 static bool supports_4K_migration(struct xe_device *xe) 1125 { 1126 if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 1127 return false; 1128 1129 return true; 1130 } 1131 1132 /** 1133 * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not 1134 * @range: SVM range for which migration needs to be decided 1135 * @vma: vma which has range 1136 * @dpagemap: The preferred struct drm_pagemap to migrate to. 1137 * 1138 * Return: True for range needing migration and migration is supported else false 1139 */ 1140 bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma, 1141 const struct drm_pagemap *dpagemap) 1142 { 1143 struct xe_vm *vm = range_to_vm(&range->base); 1144 u64 range_size = xe_svm_range_size(range); 1145 1146 if (!range->base.pages.flags.migrate_devmem || !dpagemap) 1147 return false; 1148 1149 xe_assert(vm->xe, IS_DGFX(vm->xe)); 1150 1151 if (xe_svm_range_has_pagemap(range, dpagemap)) { 1152 drm_dbg(&vm->xe->drm, "Range is already in VRAM\n"); 1153 return false; 1154 } 1155 1156 if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { 1157 drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); 1158 return false; 1159 } 1160 1161 return true; 1162 } 1163 1164 #define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \ 1165 static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \ 1166 struct xe_svm_range *range) \ 1167 { \ 1168 switch (xe_svm_range_size(range)) { \ 1169 case SZ_4K: \ 1170 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \ 1171 break; \ 1172 case SZ_64K: \ 1173 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \ 1174 break; \ 1175 case SZ_2M: \ 1176 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \ 1177 break; \ 1178 } \ 1179 } \ 1180 1181 DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT) 1182 DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT) 1183 DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE) 1184 1185 #define DECL_SVM_RANGE_US_STATS(elem, stat) \ 1186 static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \ 1187 struct xe_svm_range *range, \ 1188 ktime_t start) \ 1189 { \ 1190 s64 us_delta = xe_gt_stats_ktime_us_delta(start); \ 1191 \ 1192 switch (xe_svm_range_size(range)) { \ 1193 case SZ_4K: \ 1194 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \ 1195 us_delta); \ 1196 break; \ 1197 case SZ_64K: \ 1198 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \ 1199 us_delta); \ 1200 break; \ 1201 case SZ_2M: \ 1202 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \ 1203 us_delta); \ 1204 break; \ 1205 } \ 1206 } \ 1207 1208 DECL_SVM_RANGE_US_STATS(migrate, MIGRATE) 1209 DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES) 1210 DECL_SVM_RANGE_US_STATS(bind, BIND) 1211 DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT) 1212 1213 static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 1214 struct xe_gt *gt, u64 fault_addr, 1215 bool need_vram) 1216 { 1217 int devmem_possible = IS_DGFX(vm->xe) && 1218 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); 1219 struct drm_gpusvm_ctx ctx = { 1220 .read_only = xe_vma_read_only(vma), 1221 .devmem_possible = devmem_possible, 1222 .check_pages_threshold = devmem_possible ? SZ_64K : 0, 1223 .devmem_only = need_vram && devmem_possible, 1224 .timeslice_ms = need_vram && devmem_possible ? 1225 vm->xe->atomic_svm_timeslice_ms : 0, 1226 }; 1227 struct xe_validation_ctx vctx; 1228 struct drm_exec exec; 1229 struct xe_svm_range *range; 1230 struct dma_fence *fence; 1231 struct drm_pagemap *dpagemap; 1232 struct xe_tile *tile = gt_to_tile(gt); 1233 int migrate_try_count = ctx.devmem_only ? 3 : 1; 1234 ktime_t start = xe_gt_stats_ktime_get(), bind_start, get_pages_start; 1235 int err; 1236 1237 lockdep_assert_held_write(&vm->lock); 1238 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma)); 1239 1240 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1); 1241 1242 retry: 1243 /* Always process UNMAPs first so view SVM ranges is current */ 1244 err = xe_svm_garbage_collector(vm); 1245 if (err) 1246 return err; 1247 1248 dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) : 1249 xe_vma_resolve_pagemap(vma, tile); 1250 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap); 1251 range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx); 1252 1253 if (IS_ERR(range)) 1254 return PTR_ERR(range); 1255 1256 xe_svm_range_fault_count_stats_incr(gt, range); 1257 1258 if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) { 1259 err = -EACCES; 1260 goto out; 1261 } 1262 1263 if (xe_svm_range_is_valid(range, tile, ctx.devmem_only, dpagemap)) { 1264 xe_svm_range_valid_fault_count_stats_incr(gt, range); 1265 range_debug(range, "PAGE FAULT - VALID"); 1266 goto out; 1267 } 1268 1269 range_debug(range, "PAGE FAULT"); 1270 1271 if (--migrate_try_count >= 0 && 1272 xe_svm_range_needs_migrate_to_vram(range, vma, dpagemap)) { 1273 ktime_t migrate_start = xe_gt_stats_ktime_get(); 1274 1275 xe_svm_range_migrate_count_stats_incr(gt, range); 1276 err = xe_svm_alloc_vram(range, &ctx, dpagemap); 1277 xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start); 1278 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1279 if (err) { 1280 if (migrate_try_count || !ctx.devmem_only) { 1281 drm_dbg(&vm->xe->drm, 1282 "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n", 1283 vm->usm.asid, ERR_PTR(err)); 1284 1285 /* 1286 * In the devmem-only case, mixed mappings may 1287 * be found. The get_pages function will fix 1288 * these up to a single location, allowing the 1289 * page fault handler to make forward progress. 1290 */ 1291 if (ctx.devmem_only) 1292 goto get_pages; 1293 else 1294 goto retry; 1295 } else { 1296 drm_err(&vm->xe->drm, 1297 "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n", 1298 vm->usm.asid, ERR_PTR(err)); 1299 return err; 1300 } 1301 } 1302 } 1303 1304 get_pages: 1305 get_pages_start = xe_gt_stats_ktime_get(); 1306 1307 range_debug(range, "GET PAGES"); 1308 err = xe_svm_range_get_pages(vm, range, &ctx); 1309 /* Corner where CPU mappings have changed */ 1310 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) { 1311 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1312 if (migrate_try_count > 0 || !ctx.devmem_only) { 1313 drm_dbg(&vm->xe->drm, 1314 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n", 1315 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 1316 range_debug(range, "PAGE FAULT - RETRY PAGES"); 1317 goto retry; 1318 } else { 1319 drm_err(&vm->xe->drm, 1320 "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n", 1321 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); 1322 } 1323 } 1324 if (err) { 1325 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT"); 1326 goto out; 1327 } else if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) { 1328 drm_dbg(&vm->xe->drm, "After page collect data location is %sin \"%s\".\n", 1329 xe_svm_range_has_pagemap(range, dpagemap) ? "" : "NOT ", 1330 dpagemap ? dpagemap->drm->unique : "System."); 1331 } 1332 1333 xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start); 1334 range_debug(range, "PAGE FAULT - BIND"); 1335 1336 bind_start = xe_gt_stats_ktime_get(); 1337 xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) { 1338 err = xe_vm_drm_exec_lock(vm, &exec); 1339 drm_exec_retry_on_contention(&exec); 1340 1341 xe_vm_set_validation_exec(vm, &exec); 1342 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id)); 1343 xe_vm_set_validation_exec(vm, NULL); 1344 if (IS_ERR(fence)) { 1345 drm_exec_retry_on_contention(&exec); 1346 err = PTR_ERR(fence); 1347 xe_validation_retry_on_oom(&vctx, &err); 1348 xe_svm_range_bind_us_stats_incr(gt, range, bind_start); 1349 break; 1350 } 1351 } 1352 if (err) 1353 goto err_out; 1354 1355 dma_fence_wait(fence, false); 1356 dma_fence_put(fence); 1357 xe_svm_range_bind_us_stats_incr(gt, range, bind_start); 1358 1359 out: 1360 xe_svm_range_fault_us_stats_incr(gt, range, start); 1361 return 0; 1362 1363 err_out: 1364 if (err == -EAGAIN) { 1365 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ 1366 range_debug(range, "PAGE FAULT - RETRY BIND"); 1367 goto retry; 1368 } 1369 1370 return err; 1371 } 1372 1373 /** 1374 * xe_svm_handle_pagefault() - SVM handle page fault 1375 * @vm: The VM. 1376 * @vma: The CPU address mirror VMA. 1377 * @gt: The gt upon the fault occurred. 1378 * @fault_addr: The GPU fault address. 1379 * @atomic: The fault atomic access bit. 1380 * 1381 * Create GPU bindings for a SVM page fault. Optionally migrate to device 1382 * memory. 1383 * 1384 * Return: 0 on success, negative error code on error. 1385 */ 1386 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, 1387 struct xe_gt *gt, u64 fault_addr, 1388 bool atomic) 1389 { 1390 int need_vram, ret; 1391 retry: 1392 need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); 1393 if (need_vram < 0) 1394 return need_vram; 1395 1396 ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr, 1397 need_vram ? true : false); 1398 if (ret == -EAGAIN) { 1399 /* 1400 * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA 1401 * may have been split by xe_svm_range_set_default_attr. 1402 */ 1403 vma = xe_vm_find_vma_by_addr(vm, fault_addr); 1404 if (!vma) 1405 return -EINVAL; 1406 1407 goto retry; 1408 } 1409 return ret; 1410 } 1411 1412 /** 1413 * xe_svm_has_mapping() - SVM has mappings 1414 * @vm: The VM. 1415 * @start: Start address. 1416 * @end: End address. 1417 * 1418 * Check if an address range has SVM mappings. 1419 * 1420 * Return: True if address range has a SVM mapping, False otherwise 1421 */ 1422 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) 1423 { 1424 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end); 1425 } 1426 1427 /** 1428 * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges 1429 * @vm: The VM 1430 * @start: start addr 1431 * @end: end addr 1432 * 1433 * This function UNMAPS svm ranges if start or end address are inside them. 1434 */ 1435 void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end) 1436 { 1437 struct drm_gpusvm_notifier *notifier, *next; 1438 1439 lockdep_assert_held_write(&vm->lock); 1440 1441 drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) { 1442 struct drm_gpusvm_range *range, *__next; 1443 1444 drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) { 1445 if (start > drm_gpusvm_range_start(range) || 1446 end < drm_gpusvm_range_end(range)) { 1447 if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range))) 1448 drm_gpusvm_range_evict(&vm->svm.gpusvm, range); 1449 drm_gpusvm_range_get(range); 1450 __xe_svm_garbage_collector(vm, to_xe_range(range)); 1451 if (!list_empty(&to_xe_range(range)->garbage_collector_link)) { 1452 spin_lock(&vm->svm.garbage_collector.lock); 1453 list_del(&to_xe_range(range)->garbage_collector_link); 1454 spin_unlock(&vm->svm.garbage_collector.lock); 1455 } 1456 drm_gpusvm_range_put(range); 1457 } 1458 } 1459 } 1460 } 1461 1462 /** 1463 * xe_svm_bo_evict() - SVM evict BO to system memory 1464 * @bo: BO to evict 1465 * 1466 * SVM evict BO to system memory. GPU SVM layer ensures all device pages 1467 * are evicted before returning. 1468 * 1469 * Return: 0 on success standard error code otherwise 1470 */ 1471 int xe_svm_bo_evict(struct xe_bo *bo) 1472 { 1473 return drm_pagemap_evict_to_ram(&bo->devmem_allocation); 1474 } 1475 1476 /** 1477 * xe_svm_range_find_or_insert- Find or insert GPU SVM range 1478 * @vm: xe_vm pointer 1479 * @addr: address for which range needs to be found/inserted 1480 * @vma: Pointer to struct xe_vma which mirrors CPU 1481 * @ctx: GPU SVM context 1482 * 1483 * This function finds or inserts a newly allocated a SVM range based on the 1484 * address. 1485 * 1486 * Return: Pointer to the SVM range on success, ERR_PTR() on failure. 1487 */ 1488 struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, 1489 struct xe_vma *vma, struct drm_gpusvm_ctx *ctx) 1490 { 1491 struct drm_gpusvm_range *r; 1492 1493 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)), 1494 xe_vma_start(vma), xe_vma_end(vma), ctx); 1495 if (IS_ERR(r)) 1496 return ERR_CAST(r); 1497 1498 return to_xe_range(r); 1499 } 1500 1501 /** 1502 * xe_svm_range_get_pages() - Get pages for a SVM range 1503 * @vm: Pointer to the struct xe_vm 1504 * @range: Pointer to the xe SVM range structure 1505 * @ctx: GPU SVM context 1506 * 1507 * This function gets pages for a SVM range and ensures they are mapped for 1508 * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range. 1509 * 1510 * Return: 0 on success, negative error code on failure. 1511 */ 1512 int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, 1513 struct drm_gpusvm_ctx *ctx) 1514 { 1515 int err = 0; 1516 1517 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx); 1518 if (err == -EOPNOTSUPP) { 1519 range_debug(range, "PAGE FAULT - EVICT PAGES"); 1520 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base); 1521 } 1522 1523 return err; 1524 } 1525 1526 /** 1527 * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range 1528 * @vm: Pointer to the xe_vm structure 1529 * @start: Start of the input range 1530 * @end: End of the input range 1531 * 1532 * This function removes the page table entries (PTEs) associated 1533 * with the svm ranges within the given input start and end 1534 * 1535 * Return: tile_mask for which gt's need to be tlb invalidated. 1536 */ 1537 u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end) 1538 { 1539 struct drm_gpusvm_notifier *notifier; 1540 struct xe_svm_range *range; 1541 u64 adj_start, adj_end; 1542 struct xe_tile *tile; 1543 u8 tile_mask = 0; 1544 u8 id; 1545 1546 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) && 1547 lockdep_is_held_type(&vm->lock, 0)); 1548 1549 drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) { 1550 struct drm_gpusvm_range *r = NULL; 1551 1552 adj_start = max(start, drm_gpusvm_notifier_start(notifier)); 1553 adj_end = min(end, drm_gpusvm_notifier_end(notifier)); 1554 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) { 1555 range = to_xe_range(r); 1556 for_each_tile(tile, vm->xe, id) { 1557 if (xe_pt_zap_ptes_range(tile, vm, range)) { 1558 tile_mask |= BIT(id); 1559 /* 1560 * WRITE_ONCE pairs with READ_ONCE in 1561 * xe_vm_has_valid_gpu_mapping(). 1562 * Must not fail after setting 1563 * tile_invalidated and before 1564 * TLB invalidation. 1565 */ 1566 WRITE_ONCE(range->tile_invalidated, 1567 range->tile_invalidated | BIT(id)); 1568 } 1569 } 1570 } 1571 } 1572 1573 return tile_mask; 1574 } 1575 1576 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) 1577 1578 /** 1579 * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA 1580 * @vma: Pointer to the xe_vma structure containing memory attributes 1581 * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping 1582 * 1583 * This function determines the correct DRM pagemap to use for a given VMA. 1584 * It first checks if a valid devmem_fd is provided in the VMA's preferred 1585 * location. If the devmem_fd is negative, it returns NULL, indicating no 1586 * pagemap is available and smem to be used as preferred location. 1587 * If the devmem_fd is equal to the default faulting 1588 * GT identifier, it returns the VRAM pagemap associated with the tile. 1589 * 1590 * Future support for multi-device configurations may use drm_pagemap_from_fd() 1591 * to resolve pagemaps from arbitrary file descriptors. 1592 * 1593 * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable. 1594 */ 1595 struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) 1596 { 1597 struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap; 1598 s32 fd; 1599 1600 if (dpagemap) 1601 return dpagemap; 1602 1603 fd = (s32)vma->attr.preferred_loc.devmem_fd; 1604 1605 if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM) 1606 return NULL; 1607 1608 if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE) 1609 return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL; 1610 1611 return NULL; 1612 } 1613 1614 /** 1615 * xe_svm_alloc_vram()- Allocate device memory pages for range, 1616 * migrating existing data. 1617 * @range: SVM range 1618 * @ctx: DRM GPU SVM context 1619 * @dpagemap: The struct drm_pagemap representing the memory to allocate. 1620 * 1621 * Return: 0 on success, error code on failure. 1622 */ 1623 int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, 1624 struct drm_pagemap *dpagemap) 1625 { 1626 static DECLARE_RWSEM(driver_migrate_lock); 1627 struct xe_vm *vm = range_to_vm(&range->base); 1628 enum drm_gpusvm_scan_result migration_state; 1629 struct xe_device *xe = vm->xe; 1630 int err, retries = 1; 1631 bool write_locked = false; 1632 1633 xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem); 1634 range_debug(range, "ALLOCATE VRAM"); 1635 1636 migration_state = drm_gpusvm_scan_mm(&range->base, 1637 xe_svm_private_page_owner(vm, false), 1638 dpagemap->pagemap); 1639 1640 if (migration_state == DRM_GPUSVM_SCAN_EQUAL) { 1641 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1642 drm_dbg(dpagemap->drm, "Already migrated!\n"); 1643 return 0; 1644 } 1645 1646 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) 1647 drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n", 1648 dpagemap->drm->unique); 1649 1650 err = down_read_interruptible(&driver_migrate_lock); 1651 if (err) 1652 return err; 1653 do { 1654 err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), 1655 xe_svm_range_end(range), 1656 range->base.gpusvm->mm, 1657 ctx->timeslice_ms); 1658 1659 if (err == -EBUSY && retries) { 1660 if (!write_locked) { 1661 int lock_err; 1662 1663 up_read(&driver_migrate_lock); 1664 lock_err = down_write_killable(&driver_migrate_lock); 1665 if (lock_err) 1666 return lock_err; 1667 write_locked = true; 1668 } 1669 drm_gpusvm_range_evict(range->base.gpusvm, &range->base); 1670 } 1671 } while (err == -EBUSY && retries--); 1672 if (write_locked) 1673 up_write(&driver_migrate_lock); 1674 else 1675 up_read(&driver_migrate_lock); 1676 1677 return err; 1678 } 1679 1680 static struct drm_pagemap_addr 1681 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, 1682 struct device *dev, 1683 struct page *page, 1684 unsigned int order, 1685 enum dma_data_direction dir) 1686 { 1687 struct device *pgmap_dev = dpagemap->drm->dev; 1688 enum drm_interconnect_protocol prot; 1689 dma_addr_t addr; 1690 1691 if (pgmap_dev == dev) { 1692 addr = xe_page_to_dpa(page); 1693 prot = XE_INTERCONNECT_VRAM; 1694 } else { 1695 addr = dma_map_resource(dev, 1696 xe_page_to_pcie(page), 1697 PAGE_SIZE << order, dir, 1698 DMA_ATTR_SKIP_CPU_SYNC); 1699 prot = XE_INTERCONNECT_P2P; 1700 } 1701 1702 return drm_pagemap_addr_encode(addr, prot, order, dir); 1703 } 1704 1705 static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap, 1706 struct device *dev, 1707 const struct drm_pagemap_addr *addr) 1708 { 1709 if (addr->proto != XE_INTERCONNECT_P2P) 1710 return; 1711 1712 dma_unmap_resource(dev, addr->addr, PAGE_SIZE << addr->order, 1713 addr->dir, DMA_ATTR_SKIP_CPU_SYNC); 1714 } 1715 1716 static void xe_pagemap_destroy_work(struct work_struct *work) 1717 { 1718 struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work); 1719 struct dev_pagemap *pagemap = &xpagemap->pagemap; 1720 struct drm_device *drm = xpagemap->dpagemap.drm; 1721 int idx; 1722 1723 /* 1724 * Only unmap / release if devm_ release hasn't run yet. 1725 * Otherwise the devm_ callbacks have already released, or 1726 * will do shortly. 1727 */ 1728 if (drm_dev_enter(drm, &idx)) { 1729 devm_memunmap_pages(drm->dev, pagemap); 1730 devm_release_mem_region(drm->dev, pagemap->range.start, 1731 pagemap->range.end - pagemap->range.start + 1); 1732 drm_dev_exit(idx); 1733 } 1734 1735 drm_pagemap_release_owner(&xpagemap->peer); 1736 kfree(xpagemap); 1737 } 1738 1739 static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim) 1740 { 1741 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 1742 struct xe_device *xe = to_xe_device(dpagemap->drm); 1743 1744 if (from_atomic_or_reclaim) 1745 queue_work(xe->destroy_wq, &xpagemap->destroy_work); 1746 else 1747 xe_pagemap_destroy_work(&xpagemap->destroy_work); 1748 } 1749 1750 static const struct drm_pagemap_ops xe_drm_pagemap_ops = { 1751 .device_map = xe_drm_pagemap_device_map, 1752 .device_unmap = xe_drm_pagemap_device_unmap, 1753 .populate_mm = xe_drm_pagemap_populate_mm, 1754 .destroy = xe_pagemap_destroy, 1755 }; 1756 1757 /** 1758 * xe_pagemap_create() - Create a struct xe_pagemap object 1759 * @xe: The xe device. 1760 * @vr: Back-pointer to the struct xe_vram_region. 1761 * 1762 * Allocate and initialize a struct xe_pagemap. On successful 1763 * return, drm_pagemap_put() on the embedded struct drm_pagemap 1764 * should be used to unreference. 1765 * 1766 * Return: Pointer to a struct xe_pagemap if successful. Error pointer 1767 * on failure. 1768 */ 1769 static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr) 1770 { 1771 struct device *dev = xe->drm.dev; 1772 struct xe_pagemap *xpagemap; 1773 struct dev_pagemap *pagemap; 1774 struct drm_pagemap *dpagemap; 1775 struct resource *res; 1776 void *addr; 1777 int err; 1778 1779 xpagemap = kzalloc_obj(*xpagemap); 1780 if (!xpagemap) 1781 return ERR_PTR(-ENOMEM); 1782 1783 pagemap = &xpagemap->pagemap; 1784 dpagemap = &xpagemap->dpagemap; 1785 INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work); 1786 xpagemap->vr = vr; 1787 xpagemap->peer.private = XE_PEER_PAGEMAP; 1788 1789 err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops); 1790 if (err) 1791 goto out_no_dpagemap; 1792 1793 res = devm_request_free_mem_region(dev, &iomem_resource, 1794 vr->usable_size); 1795 if (IS_ERR(res)) { 1796 err = PTR_ERR(res); 1797 goto out_err; 1798 } 1799 1800 err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list, 1801 xe_has_interconnect); 1802 if (err) 1803 goto out_no_owner; 1804 1805 pagemap->type = MEMORY_DEVICE_PRIVATE; 1806 pagemap->range.start = res->start; 1807 pagemap->range.end = res->end; 1808 pagemap->nr_range = 1; 1809 pagemap->owner = xpagemap->peer.owner; 1810 pagemap->ops = drm_pagemap_pagemap_ops_get(); 1811 addr = devm_memremap_pages(dev, pagemap); 1812 if (IS_ERR(addr)) { 1813 err = PTR_ERR(addr); 1814 goto out_no_pages; 1815 } 1816 xpagemap->hpa_base = res->start; 1817 return xpagemap; 1818 1819 out_no_pages: 1820 drm_pagemap_release_owner(&xpagemap->peer); 1821 out_no_owner: 1822 devm_release_mem_region(dev, res->start, res->end - res->start + 1); 1823 out_err: 1824 drm_pagemap_put(dpagemap); 1825 return ERR_PTR(err); 1826 1827 out_no_dpagemap: 1828 kfree(xpagemap); 1829 return ERR_PTR(err); 1830 } 1831 1832 /** 1833 * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap 1834 * @xe: The xe device. 1835 * @cache: The struct xe_pagemap_cache. 1836 * @vr: The VRAM region. 1837 * 1838 * Check if there is an already used xe_pagemap for this tile, and in that case, 1839 * return it. 1840 * If not, check if there is a cached xe_pagemap for this tile, and in that case, 1841 * cancel its destruction, re-initialize it and return it. 1842 * Finally if there is no cached or already used pagemap, create one and 1843 * register it in the tile's pagemap cache. 1844 * 1845 * Note that this function is typically called from within an IOCTL, and waits are 1846 * therefore carried out interruptible if possible. 1847 * 1848 * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure. 1849 */ 1850 static struct xe_pagemap * 1851 xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache, 1852 struct xe_vram_region *vr) 1853 { 1854 struct drm_pagemap *dpagemap; 1855 struct xe_pagemap *xpagemap; 1856 int err; 1857 1858 err = drm_pagemap_cache_lock_lookup(cache); 1859 if (err) 1860 return ERR_PTR(err); 1861 1862 dpagemap = drm_pagemap_get_from_cache(cache); 1863 if (IS_ERR(dpagemap)) { 1864 xpagemap = ERR_CAST(dpagemap); 1865 } else if (!dpagemap) { 1866 xpagemap = xe_pagemap_create(xe, vr); 1867 if (IS_ERR(xpagemap)) 1868 goto out_unlock; 1869 drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap); 1870 } else { 1871 xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap); 1872 } 1873 1874 out_unlock: 1875 drm_pagemap_cache_unlock_lookup(cache); 1876 return xpagemap; 1877 } 1878 1879 static int xe_svm_get_pagemaps(struct xe_vm *vm) 1880 { 1881 struct xe_device *xe = vm->xe; 1882 struct xe_pagemap *xpagemap; 1883 struct xe_tile *tile; 1884 int id; 1885 1886 for_each_tile(tile, xe, id) { 1887 struct xe_vram_region *vr; 1888 1889 if (!((BIT(id) << 1) & xe->info.mem_region_mask)) 1890 continue; 1891 1892 vr = xe_tile_to_vr(tile); 1893 xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); 1894 if (IS_ERR(xpagemap)) 1895 break; 1896 vm->svm.pagemaps[id] = xpagemap; 1897 } 1898 1899 if (IS_ERR(xpagemap)) { 1900 xe_svm_put_pagemaps(vm); 1901 return PTR_ERR(xpagemap); 1902 } 1903 1904 return 0; 1905 } 1906 1907 /** 1908 * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker 1909 * @xe: The xe device 1910 * 1911 * Create a drm_pagemap shrinker and register with the xe device. 1912 * 1913 * Return: %0 on success, negative error code on failure. 1914 */ 1915 int xe_pagemap_shrinker_create(struct xe_device *xe) 1916 { 1917 xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm); 1918 return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker); 1919 } 1920 1921 /** 1922 * xe_pagemap_cache_create() - Create a drm_pagemap cache 1923 * @tile: The tile to register the cache with 1924 * 1925 * Create a drm_pagemap cache and register with the tile. 1926 * 1927 * Return: %0 on success, negative error code on failure. 1928 */ 1929 int xe_pagemap_cache_create(struct xe_tile *tile) 1930 { 1931 struct xe_device *xe = tile_to_xe(tile); 1932 1933 if (IS_DGFX(xe)) { 1934 struct drm_pagemap_cache *cache = 1935 drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker); 1936 1937 if (IS_ERR(cache)) 1938 return PTR_ERR(cache); 1939 1940 tile->mem.vram->dpagemap_cache = cache; 1941 } 1942 1943 return 0; 1944 } 1945 1946 static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance) 1947 { 1948 u32 tile_id = region_instance - 1; 1949 struct xe_pagemap *xpagemap; 1950 struct xe_vram_region *vr; 1951 1952 if (tile_id >= xe->info.tile_count) 1953 return ERR_PTR(-ENOENT); 1954 1955 if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask)) 1956 return ERR_PTR(-ENOENT); 1957 1958 vr = xe_tile_to_vr(&xe->tiles[tile_id]); 1959 1960 /* Returns a reference-counted embedded struct drm_pagemap */ 1961 xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr); 1962 if (IS_ERR(xpagemap)) 1963 return ERR_CAST(xpagemap); 1964 1965 return &xpagemap->dpagemap; 1966 } 1967 1968 /** 1969 * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a 1970 * (file_descriptor, region_instance) pair. 1971 * @fd: An fd opened against an xe device. 1972 * @region_instance: The region instance representing the device memory 1973 * on the opened xe device. 1974 * 1975 * Opens a struct drm_pagemap pointer on the 1976 * indicated device and region_instance. 1977 * 1978 * Return: A reference-counted struct drm_pagemap pointer on success, 1979 * negative error pointer on failure. 1980 */ 1981 struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) 1982 { 1983 struct drm_pagemap *dpagemap; 1984 struct file *file; 1985 struct drm_file *fpriv; 1986 struct drm_device *drm; 1987 int idx; 1988 1989 if (fd <= 0) 1990 return ERR_PTR(-EINVAL); 1991 1992 file = fget(fd); 1993 if (!file) 1994 return ERR_PTR(-ENOENT); 1995 1996 if (!xe_is_xe_file(file)) { 1997 dpagemap = ERR_PTR(-ENOENT); 1998 goto out; 1999 } 2000 2001 fpriv = file->private_data; 2002 drm = fpriv->minor->dev; 2003 if (!drm_dev_enter(drm, &idx)) { 2004 dpagemap = ERR_PTR(-ENODEV); 2005 goto out; 2006 } 2007 2008 dpagemap = xe_devmem_open(to_xe_device(drm), region_instance); 2009 drm_dev_exit(idx); 2010 out: 2011 fput(file); 2012 return dpagemap; 2013 } 2014 2015 #else 2016 2017 int xe_pagemap_shrinker_create(struct xe_device *xe) 2018 { 2019 return 0; 2020 } 2021 2022 int xe_pagemap_cache_create(struct xe_tile *tile) 2023 { 2024 return 0; 2025 } 2026 2027 int xe_svm_alloc_vram(struct xe_svm_range *range, 2028 const struct drm_gpusvm_ctx *ctx, 2029 struct drm_pagemap *dpagemap) 2030 { 2031 return -EOPNOTSUPP; 2032 } 2033 2034 struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile) 2035 { 2036 return NULL; 2037 } 2038 2039 struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance) 2040 { 2041 return ERR_PTR(-ENOENT); 2042 } 2043 2044 #endif 2045 2046 /** 2047 * xe_svm_flush() - SVM flush 2048 * @vm: The VM. 2049 * 2050 * Flush all SVM actions. 2051 */ 2052 void xe_svm_flush(struct xe_vm *vm) 2053 { 2054 if (xe_vm_in_fault_mode(vm)) 2055 flush_work(&vm->svm.garbage_collector.work); 2056 } 2057