1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2024 Intel Corporation
4 */
5
6 #include <linux/pci-p2pdma.h>
7
8 #include <drm/drm_drv.h>
9 #include <drm/drm_managed.h>
10 #include <drm/drm_pagemap.h>
11 #include <drm/drm_pagemap_util.h>
12
13 #include "xe_bo.h"
14 #include "xe_exec_queue_types.h"
15 #include "xe_gt_stats.h"
16 #include "xe_migrate.h"
17 #include "xe_module.h"
18 #include "xe_pm.h"
19 #include "xe_pt.h"
20 #include "xe_svm.h"
21 #include "xe_tile.h"
22 #include "xe_tlb_inval.h"
23 #include "xe_ttm_vram_mgr.h"
24 #include "xe_vm.h"
25 #include "xe_vm_types.h"
26 #include "xe_vram_types.h"
27
28 /* Identifies subclasses of struct drm_pagemap_peer */
29 #define XE_PEER_PAGEMAP ((void *)0ul)
30 #define XE_PEER_VM ((void *)1ul)
31
32 /**
33 * DOC: drm_pagemap reference-counting in xe:
34 *
35 * In addition to the drm_pagemap internal reference counting by its zone
36 * device data, the xe driver holds the following long-time references:
37 *
38 * - struct xe_pagemap:
39 * The xe_pagemap struct derives from struct drm_pagemap and uses its
40 * reference count.
41 * - SVM-enabled VMs:
42 * SVM-enabled VMs look up and keeps a reference to all xe_pagemaps on
43 * the same device.
44 * - VMAs:
45 * vmas keep a reference on the drm_pagemap indicated by a gpu_madvise()
46 * call.
47 *
48 * In addition, all drm_pagemap or xe_pagemap pointers where lifetime cannot
49 * be guaranteed by a vma reference under the vm lock should keep a reference.
50 * That includes the range->pages.dpagemap pointer.
51 */
52
53 static int xe_svm_get_pagemaps(struct xe_vm *vm);
54
xe_svm_private_page_owner(struct xe_vm * vm,bool force_smem)55 void *xe_svm_private_page_owner(struct xe_vm *vm, bool force_smem)
56 {
57 return force_smem ? NULL : vm->svm.peer.owner;
58 }
59
xe_svm_range_in_vram(struct xe_svm_range * range)60 static bool xe_svm_range_in_vram(struct xe_svm_range *range)
61 {
62 /*
63 * Advisory only check whether the range is currently backed by VRAM
64 * memory.
65 */
66
67 struct drm_gpusvm_pages_flags flags = {
68 /* Pairs with WRITE_ONCE in drm_gpusvm.c */
69 .__flags = READ_ONCE(range->base.pages.flags.__flags),
70 };
71
72 return flags.has_devmem_pages;
73 }
74
xe_svm_range_has_vram_binding(struct xe_svm_range * range)75 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
76 {
77 /* Not reliable without notifier lock */
78 return xe_svm_range_in_vram(range) && range->tile_present;
79 }
80
gpusvm_to_vm(struct drm_gpusvm * gpusvm)81 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm)
82 {
83 return container_of(gpusvm, struct xe_vm, svm.gpusvm);
84 }
85
range_to_vm(struct drm_gpusvm_range * r)86 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
87 {
88 return gpusvm_to_vm(r->gpusvm);
89 }
90
91 #define range_debug(r__, operation__) \
92 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \
93 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
94 "start=0x%014lx, end=0x%014lx, size=%lu", \
95 (operation__), range_to_vm(&(r__)->base)->usm.asid, \
96 (r__)->base.gpusvm, \
97 xe_svm_range_in_vram((r__)) ? 1 : 0, \
98 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \
99 (r__)->base.pages.notifier_seq, \
100 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \
101 xe_svm_range_size((r__)))
102
xe_svm_range_debug(struct xe_svm_range * range,const char * operation)103 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
104 {
105 range_debug(range, operation);
106 }
107
108 static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm * gpusvm)109 xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
110 {
111 struct xe_svm_range *range;
112
113 range = kzalloc_obj(*range);
114 if (!range)
115 return NULL;
116
117 INIT_LIST_HEAD(&range->garbage_collector_link);
118 xe_vm_get(gpusvm_to_vm(gpusvm));
119
120 return &range->base;
121 }
122
xe_svm_range_free(struct drm_gpusvm_range * range)123 static void xe_svm_range_free(struct drm_gpusvm_range *range)
124 {
125 xe_vm_put(range_to_vm(range));
126 kfree(range);
127 }
128
129 static void
xe_svm_garbage_collector_add_range(struct xe_vm * vm,struct xe_svm_range * range,const struct mmu_notifier_range * mmu_range)130 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
131 const struct mmu_notifier_range *mmu_range)
132 {
133 struct xe_device *xe = vm->xe;
134
135 range_debug(range, "GARBAGE COLLECTOR ADD");
136
137 drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
138
139 spin_lock(&vm->svm.garbage_collector.lock);
140 if (list_empty(&range->garbage_collector_link))
141 list_add_tail(&range->garbage_collector_link,
142 &vm->svm.garbage_collector.range_list);
143 spin_unlock(&vm->svm.garbage_collector.lock);
144
145 queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work);
146 }
147
xe_svm_tlb_inval_count_stats_incr(struct xe_gt * gt)148 static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt)
149 {
150 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1);
151 }
152
153 static u8
xe_svm_range_notifier_event_begin(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range,u64 * adj_start,u64 * adj_end)154 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
155 const struct mmu_notifier_range *mmu_range,
156 u64 *adj_start, u64 *adj_end)
157 {
158 struct xe_svm_range *range = to_xe_range(r);
159 struct xe_device *xe = vm->xe;
160 struct xe_tile *tile;
161 u8 tile_mask = 0;
162 u8 id;
163
164 xe_svm_assert_in_notifier(vm);
165
166 range_debug(range, "NOTIFIER");
167
168 /* Skip if already unmapped or if no binding exist */
169 if (range->base.pages.flags.unmapped || !range->tile_present)
170 return 0;
171
172 range_debug(range, "NOTIFIER - EXECUTE");
173
174 /* Adjust invalidation to range boundaries */
175 *adj_start = min(xe_svm_range_start(range), mmu_range->start);
176 *adj_end = max(xe_svm_range_end(range), mmu_range->end);
177
178 /*
179 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the
180 * invalidation code can't correctly cope with sparse ranges or
181 * invalidations spanning multiple ranges.
182 */
183 for_each_tile(tile, xe, id)
184 if (xe_pt_zap_ptes_range(tile, vm, range)) {
185 /*
186 * WRITE_ONCE pairs with READ_ONCE in
187 * xe_vm_has_valid_gpu_mapping()
188 */
189 WRITE_ONCE(range->tile_invalidated,
190 range->tile_invalidated | BIT(id));
191
192 if (!(tile_mask & BIT(id))) {
193 xe_svm_tlb_inval_count_stats_incr(tile->primary_gt);
194 if (tile->media_gt)
195 xe_svm_tlb_inval_count_stats_incr(tile->media_gt);
196 tile_mask |= BIT(id);
197 }
198 }
199
200 return tile_mask;
201 }
202
203 static void
xe_svm_range_notifier_event_end(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range)204 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
205 const struct mmu_notifier_range *mmu_range)
206 {
207 struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
208
209 xe_svm_assert_in_notifier(vm);
210
211 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
212 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
213 xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
214 mmu_range);
215 }
216
xe_svm_tlb_inval_us_stats_incr(struct xe_gt * gt,ktime_t start)217 static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start)
218 {
219 s64 us_delta = xe_gt_stats_ktime_us_delta(start);
220
221 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta);
222 }
223
xe_svm_invalidate(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,const struct mmu_notifier_range * mmu_range)224 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
225 struct drm_gpusvm_notifier *notifier,
226 const struct mmu_notifier_range *mmu_range)
227 {
228 struct xe_vm *vm = gpusvm_to_vm(gpusvm);
229 struct xe_tlb_inval_batch batch;
230 struct xe_device *xe = vm->xe;
231 struct drm_gpusvm_range *r, *first;
232 struct xe_tile *tile;
233 ktime_t start = xe_gt_stats_ktime_get();
234 u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
235 u8 tile_mask = 0, id;
236 long err;
237
238 xe_svm_assert_in_notifier(vm);
239
240 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm,
241 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d",
242 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq,
243 mmu_range->start, mmu_range->end, mmu_range->event);
244
245 /* Adjust invalidation to notifier boundaries */
246 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start);
247 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end);
248
249 first = drm_gpusvm_range_find(notifier, adj_start, adj_end);
250 if (!first)
251 return;
252
253 /*
254 * PTs may be getting destroyed so not safe to touch these but PT should
255 * be invalidated at this point in time. Regardless we still need to
256 * ensure any dma mappings are unmapped in the here.
257 */
258 if (xe_vm_is_closed(vm))
259 goto range_notifier_event_end;
260
261 /*
262 * XXX: Less than ideal to always wait on VM's resv slots if an
263 * invalidation is not required. Could walk range list twice to figure
264 * out if an invalidations is need, but also not ideal.
265 */
266 err = dma_resv_wait_timeout(xe_vm_resv(vm),
267 DMA_RESV_USAGE_BOOKKEEP,
268 false, MAX_SCHEDULE_TIMEOUT);
269 XE_WARN_ON(err <= 0);
270
271 r = first;
272 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
273 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
274 &adj_start,
275 &adj_end);
276 if (!tile_mask)
277 goto range_notifier_event_end;
278
279 xe_device_wmb(xe);
280
281 err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end,
282 tile_mask, &batch);
283 if (!WARN_ON_ONCE(err))
284 xe_tlb_inval_batch_wait(&batch);
285
286 range_notifier_event_end:
287 r = first;
288 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
289 xe_svm_range_notifier_event_end(vm, r, mmu_range);
290 for_each_tile(tile, xe, id) {
291 if (tile_mask & BIT(id)) {
292 xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start);
293 if (tile->media_gt)
294 xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start);
295 }
296 }
297 }
298
__xe_svm_garbage_collector(struct xe_vm * vm,struct xe_svm_range * range)299 static int __xe_svm_garbage_collector(struct xe_vm *vm,
300 struct xe_svm_range *range)
301 {
302 struct dma_fence *fence;
303
304 range_debug(range, "GARBAGE COLLECTOR");
305
306 xe_vm_lock(vm, false);
307 fence = xe_vm_range_unbind(vm, range);
308 xe_vm_unlock(vm);
309 if (IS_ERR(fence))
310 return PTR_ERR(fence);
311 dma_fence_put(fence);
312
313 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
314
315 return 0;
316 }
317
xe_vma_set_default_attributes(struct xe_vma * vma)318 static void xe_vma_set_default_attributes(struct xe_vma *vma)
319 {
320 struct xe_vma_mem_attr default_attr = {
321 .preferred_loc.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
322 .preferred_loc.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
323 .pat_index = vma->attr.default_pat_index,
324 .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
325 .purgeable_state = XE_MADV_PURGEABLE_WILLNEED,
326 };
327
328 xe_vma_mem_attr_copy(&vma->attr, &default_attr);
329 }
330
xe_svm_range_set_default_attr(struct xe_vm * vm,u64 start,u64 end)331 static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 start, u64 end)
332 {
333 struct xe_vma *vma;
334 bool has_default_attr;
335 int err;
336
337 vma = xe_vm_find_vma_by_addr(vm, start);
338 if (!vma)
339 return -EINVAL;
340
341 if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) {
342 drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n");
343 return 0;
344 }
345
346 vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx",
347 xe_vma_start(vma), xe_vma_end(vma));
348
349 has_default_attr = xe_vma_has_default_mem_attrs(vma);
350
351 if (has_default_attr) {
352 start = xe_vma_start(vma);
353 end = xe_vma_end(vma);
354 } else if (xe_vma_start(vma) == start && xe_vma_end(vma) == end) {
355 xe_vma_set_default_attributes(vma);
356 }
357
358 xe_vm_find_cpu_addr_mirror_vma_range(vm, &start, &end);
359
360 if (xe_vma_start(vma) == start && xe_vma_end(vma) == end && has_default_attr)
361 return 0;
362
363 vm_dbg(&vm->xe->drm, "New VMA start=0x%016llx, vma_end=0x%016llx", start, end);
364
365 err = xe_vm_alloc_cpu_addr_mirror_vma(vm, start, end - start);
366 if (err) {
367 drm_warn(&vm->xe->drm, "New VMA MAP failed: %pe\n", ERR_PTR(err));
368 xe_vm_kill(vm, true);
369 return err;
370 }
371
372 /*
373 * On call from xe_svm_handle_pagefault original VMA might be changed
374 * signal this to lookup for VMA again.
375 */
376 return -EAGAIN;
377 }
378
xe_svm_garbage_collector(struct xe_vm * vm)379 static int xe_svm_garbage_collector(struct xe_vm *vm)
380 {
381 struct xe_svm_range *range;
382 u64 range_start;
383 u64 range_end;
384 int err, ret = 0;
385
386 lockdep_assert_held_write(&vm->lock);
387
388 if (xe_vm_is_closed_or_banned(vm))
389 return -ENOENT;
390
391 for (;;) {
392 spin_lock(&vm->svm.garbage_collector.lock);
393 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
394 typeof(*range),
395 garbage_collector_link);
396 if (!range)
397 break;
398
399 range_start = xe_svm_range_start(range);
400 range_end = xe_svm_range_end(range);
401
402 list_del(&range->garbage_collector_link);
403 spin_unlock(&vm->svm.garbage_collector.lock);
404
405 err = __xe_svm_garbage_collector(vm, range);
406 if (err) {
407 drm_warn(&vm->xe->drm,
408 "Garbage collection failed: %pe\n",
409 ERR_PTR(err));
410 xe_vm_kill(vm, true);
411 return err;
412 }
413
414 err = xe_svm_range_set_default_attr(vm, range_start, range_end);
415 if (err) {
416 if (err == -EAGAIN)
417 ret = -EAGAIN;
418 else
419 return err;
420 }
421 }
422 spin_unlock(&vm->svm.garbage_collector.lock);
423
424 return ret;
425 }
426
xe_svm_garbage_collector_work_func(struct work_struct * w)427 static void xe_svm_garbage_collector_work_func(struct work_struct *w)
428 {
429 struct xe_vm *vm = container_of(w, struct xe_vm,
430 svm.garbage_collector.work);
431
432 down_write(&vm->lock);
433 xe_svm_garbage_collector(vm);
434 up_write(&vm->lock);
435 }
436
437 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
438
xe_pagemap_to_vr(struct xe_pagemap * xpagemap)439 static struct xe_vram_region *xe_pagemap_to_vr(struct xe_pagemap *xpagemap)
440 {
441 return xpagemap->vr;
442 }
443
xe_page_to_pagemap(struct page * page)444 static struct xe_pagemap *xe_page_to_pagemap(struct page *page)
445 {
446 return container_of(page_pgmap(page), struct xe_pagemap, pagemap);
447 }
448
xe_page_to_vr(struct page * page)449 static struct xe_vram_region *xe_page_to_vr(struct page *page)
450 {
451 return xe_pagemap_to_vr(xe_page_to_pagemap(page));
452 }
453
xe_page_to_dpa(struct page * page)454 static u64 xe_page_to_dpa(struct page *page)
455 {
456 struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
457 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
458 u64 hpa_base = xpagemap->hpa_base;
459 u64 pfn = page_to_pfn(page);
460 u64 offset;
461 u64 dpa;
462
463 xe_assert(vr->xe, is_device_private_page(page));
464 xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= hpa_base);
465
466 offset = (pfn << PAGE_SHIFT) - hpa_base;
467 dpa = vr->dpa_base + offset;
468
469 return dpa;
470 }
471
xe_page_to_pcie(struct page * page)472 static u64 xe_page_to_pcie(struct page *page)
473 {
474 struct xe_pagemap *xpagemap = xe_page_to_pagemap(page);
475 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
476
477 return xe_page_to_dpa(page) - vr->dpa_base + vr->io_start;
478 }
479
480 enum xe_svm_copy_dir {
481 XE_SVM_COPY_TO_VRAM,
482 XE_SVM_COPY_TO_SRAM,
483 };
484
xe_svm_copy_kb_stats_incr(struct xe_gt * gt,const enum xe_svm_copy_dir dir,int kb)485 static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt,
486 const enum xe_svm_copy_dir dir,
487 int kb)
488 {
489 if (dir == XE_SVM_COPY_TO_VRAM) {
490 switch (kb) {
491 case 4:
492 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB, kb);
493 break;
494 case 64:
495 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB, kb);
496 break;
497 case 2048:
498 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB, kb);
499 break;
500 }
501 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb);
502 } else {
503 switch (kb) {
504 case 4:
505 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB, kb);
506 break;
507 case 64:
508 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB, kb);
509 break;
510 case 2048:
511 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB, kb);
512 break;
513 }
514 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb);
515 }
516 }
517
xe_svm_copy_us_stats_incr(struct xe_gt * gt,const enum xe_svm_copy_dir dir,unsigned long npages,ktime_t start)518 static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
519 const enum xe_svm_copy_dir dir,
520 unsigned long npages,
521 ktime_t start)
522 {
523 s64 us_delta = xe_gt_stats_ktime_us_delta(start);
524
525 if (dir == XE_SVM_COPY_TO_VRAM) {
526 switch (npages) {
527 case 1:
528 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
529 us_delta);
530 break;
531 case 16:
532 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
533 us_delta);
534 break;
535 case 512:
536 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
537 us_delta);
538 break;
539 }
540 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
541 us_delta);
542 } else {
543 switch (npages) {
544 case 1:
545 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
546 us_delta);
547 break;
548 case 16:
549 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
550 us_delta);
551 break;
552 case 512:
553 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
554 us_delta);
555 break;
556 }
557 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US,
558 us_delta);
559 }
560 }
561
xe_svm_copy(struct page ** pages,struct drm_pagemap_addr * pagemap_addr,unsigned long npages,const enum xe_svm_copy_dir dir,struct dma_fence * pre_migrate_fence)562 static int xe_svm_copy(struct page **pages,
563 struct drm_pagemap_addr *pagemap_addr,
564 unsigned long npages, const enum xe_svm_copy_dir dir,
565 struct dma_fence *pre_migrate_fence)
566 {
567 struct xe_vram_region *vr = NULL;
568 struct xe_gt *gt = NULL;
569 struct xe_device *xe;
570 struct dma_fence *fence = NULL;
571 unsigned long i;
572 #define XE_VRAM_ADDR_INVALID ~0x0ull
573 u64 vram_addr = XE_VRAM_ADDR_INVALID;
574 int err = 0, pos = 0;
575 bool sram = dir == XE_SVM_COPY_TO_SRAM;
576 ktime_t start = xe_gt_stats_ktime_get();
577
578 /*
579 * This flow is complex: it locates physically contiguous device pages,
580 * derives the starting physical address, and performs a single GPU copy
581 * to for every 8M chunk in a DMA address array. Both device pages and
582 * DMA addresses may be sparsely populated. If either is NULL, a copy is
583 * triggered based on the current search state. The last GPU copy is
584 * waited on to ensure all copies are complete.
585 */
586
587 for (i = 0; i < npages; ++i) {
588 struct page *spage = pages[i];
589 struct dma_fence *__fence;
590 u64 __vram_addr;
591 bool match = false, chunk, last;
592
593 #define XE_MIGRATE_CHUNK_SIZE SZ_8M
594 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
595 last = (i + 1) == npages;
596
597 /* No CPU page and no device pages queue'd to copy */
598 if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID)
599 continue;
600
601 if (!vr && spage) {
602 vr = xe_page_to_vr(spage);
603 gt = xe_migrate_exec_queue(vr->migrate)->gt;
604 xe = vr->xe;
605 }
606 XE_WARN_ON(spage && xe_page_to_vr(spage) != vr);
607
608 /*
609 * CPU page and device page valid, capture physical address on
610 * first device page, check if physical contiguous on subsequent
611 * device pages.
612 */
613 if (pagemap_addr[i].addr && spage) {
614 __vram_addr = xe_page_to_dpa(spage);
615 if (vram_addr == XE_VRAM_ADDR_INVALID) {
616 vram_addr = __vram_addr;
617 pos = i;
618 }
619
620 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
621 /* Expected with contiguous memory */
622 xe_assert(vr->xe, match);
623
624 if (pagemap_addr[i].order) {
625 i += NR_PAGES(pagemap_addr[i].order) - 1;
626 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
627 last = (i + 1) == npages;
628 }
629 }
630
631 /*
632 * Mismatched physical address, 8M copy chunk, or last page -
633 * trigger a copy.
634 */
635 if (!match || chunk || last) {
636 /*
637 * Extra page for first copy if last page and matching
638 * physical address.
639 */
640 int incr = (match && last) ? 1 : 0;
641
642 if (vram_addr != XE_VRAM_ADDR_INVALID) {
643 xe_svm_copy_kb_stats_incr(gt, dir,
644 (i - pos + incr) *
645 (PAGE_SIZE / SZ_1K));
646 if (sram) {
647 vm_dbg(&xe->drm,
648 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
649 vram_addr,
650 (u64)pagemap_addr[pos].addr, i - pos + incr);
651 __fence = xe_migrate_from_vram(vr->migrate,
652 i - pos + incr,
653 vram_addr,
654 &pagemap_addr[pos],
655 pre_migrate_fence);
656 } else {
657 vm_dbg(&xe->drm,
658 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
659 (u64)pagemap_addr[pos].addr, vram_addr,
660 i - pos + incr);
661 __fence = xe_migrate_to_vram(vr->migrate,
662 i - pos + incr,
663 &pagemap_addr[pos],
664 vram_addr,
665 pre_migrate_fence);
666 }
667 if (IS_ERR(__fence)) {
668 err = PTR_ERR(__fence);
669 goto err_out;
670 }
671 pre_migrate_fence = NULL;
672 dma_fence_put(fence);
673 fence = __fence;
674 }
675
676 /* Setup physical address of next device page */
677 if (pagemap_addr[i].addr && spage) {
678 vram_addr = __vram_addr;
679 pos = i;
680 } else {
681 vram_addr = XE_VRAM_ADDR_INVALID;
682 }
683
684 /* Extra mismatched device page, copy it */
685 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
686 xe_svm_copy_kb_stats_incr(gt, dir,
687 (PAGE_SIZE / SZ_1K));
688 if (sram) {
689 vm_dbg(&xe->drm,
690 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
691 vram_addr, (u64)pagemap_addr[pos].addr, 1);
692 __fence = xe_migrate_from_vram(vr->migrate, 1,
693 vram_addr,
694 &pagemap_addr[pos],
695 pre_migrate_fence);
696 } else {
697 vm_dbg(&xe->drm,
698 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
699 (u64)pagemap_addr[pos].addr, vram_addr, 1);
700 __fence = xe_migrate_to_vram(vr->migrate, 1,
701 &pagemap_addr[pos],
702 vram_addr,
703 pre_migrate_fence);
704 }
705 if (IS_ERR(__fence)) {
706 err = PTR_ERR(__fence);
707 goto err_out;
708 }
709 pre_migrate_fence = NULL;
710 dma_fence_put(fence);
711 fence = __fence;
712 }
713 }
714 }
715
716 err_out:
717 /* Wait for all copies to complete */
718 if (fence) {
719 dma_fence_wait(fence, false);
720 dma_fence_put(fence);
721 }
722 if (pre_migrate_fence)
723 dma_fence_wait(pre_migrate_fence, false);
724
725 /*
726 * XXX: We can't derive the GT here (or anywhere in this functions, but
727 * compute always uses the primary GT so accumulate stats on the likely
728 * GT of the fault.
729 */
730 if (gt)
731 xe_svm_copy_us_stats_incr(gt, dir, npages, start);
732
733 return err;
734 #undef XE_MIGRATE_CHUNK_SIZE
735 #undef XE_VRAM_ADDR_INVALID
736 }
737
xe_svm_copy_to_devmem(struct page ** pages,struct drm_pagemap_addr * pagemap_addr,unsigned long npages,struct dma_fence * pre_migrate_fence)738 static int xe_svm_copy_to_devmem(struct page **pages,
739 struct drm_pagemap_addr *pagemap_addr,
740 unsigned long npages,
741 struct dma_fence *pre_migrate_fence)
742 {
743 return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM,
744 pre_migrate_fence);
745 }
746
xe_svm_copy_to_ram(struct page ** pages,struct drm_pagemap_addr * pagemap_addr,unsigned long npages,struct dma_fence * pre_migrate_fence)747 static int xe_svm_copy_to_ram(struct page **pages,
748 struct drm_pagemap_addr *pagemap_addr,
749 unsigned long npages,
750 struct dma_fence *pre_migrate_fence)
751 {
752 return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM,
753 pre_migrate_fence);
754 }
755
to_xe_bo(struct drm_pagemap_devmem * devmem_allocation)756 static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation)
757 {
758 return container_of(devmem_allocation, struct xe_bo, devmem_allocation);
759 }
760
xe_svm_devmem_release(struct drm_pagemap_devmem * devmem_allocation)761 static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation)
762 {
763 struct xe_bo *bo = to_xe_bo(devmem_allocation);
764 struct xe_device *xe = xe_bo_device(bo);
765
766 dma_fence_put(devmem_allocation->pre_migrate_fence);
767 xe_bo_put_async(bo);
768 xe_pm_runtime_put(xe);
769 }
770
block_offset_to_pfn(struct drm_pagemap * dpagemap,u64 offset)771 static u64 block_offset_to_pfn(struct drm_pagemap *dpagemap, u64 offset)
772 {
773 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
774
775 return PHYS_PFN(offset + xpagemap->hpa_base);
776 }
777
vram_to_buddy(struct xe_vram_region * vram)778 static struct gpu_buddy *vram_to_buddy(struct xe_vram_region *vram)
779 {
780 return &vram->ttm.mm;
781 }
782
xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem * devmem_allocation,unsigned long npages,unsigned long * pfn)783 static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation,
784 unsigned long npages, unsigned long *pfn)
785 {
786 struct xe_bo *bo = to_xe_bo(devmem_allocation);
787 struct ttm_resource *res = bo->ttm.resource;
788 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
789 struct gpu_buddy_block *block;
790 int j = 0;
791
792 list_for_each_entry(block, blocks, link) {
793 struct xe_vram_region *vr = block->private;
794 struct gpu_buddy *buddy = vram_to_buddy(vr);
795 u64 block_pfn = block_offset_to_pfn(devmem_allocation->dpagemap,
796 gpu_buddy_block_offset(block));
797 int i;
798
799 for (i = 0; i < gpu_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
800 pfn[j++] = block_pfn + i;
801 }
802
803 return 0;
804 }
805
806 static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = {
807 .devmem_release = xe_svm_devmem_release,
808 .populate_devmem_pfn = xe_svm_populate_devmem_pfn,
809 .copy_to_devmem = xe_svm_copy_to_devmem,
810 .copy_to_ram = xe_svm_copy_to_ram,
811 };
812
813 #else
xe_svm_get_pagemaps(struct xe_vm * vm)814 static int xe_svm_get_pagemaps(struct xe_vm *vm)
815 {
816 return 0;
817 }
818 #endif
819
820 static const struct drm_gpusvm_ops gpusvm_ops = {
821 .range_alloc = xe_svm_range_alloc,
822 .range_free = xe_svm_range_free,
823 .invalidate = xe_svm_invalidate,
824 };
825
826 static const unsigned long fault_chunk_sizes[] = {
827 SZ_2M,
828 SZ_64K,
829 SZ_4K,
830 };
831
xe_pagemap_put(struct xe_pagemap * xpagemap)832 static void xe_pagemap_put(struct xe_pagemap *xpagemap)
833 {
834 drm_pagemap_put(&xpagemap->dpagemap);
835 }
836
xe_svm_put_pagemaps(struct xe_vm * vm)837 static void xe_svm_put_pagemaps(struct xe_vm *vm)
838 {
839 struct xe_device *xe = vm->xe;
840 struct xe_tile *tile;
841 int id;
842
843 for_each_tile(tile, xe, id) {
844 struct xe_pagemap *xpagemap = vm->svm.pagemaps[id];
845
846 if (xpagemap)
847 xe_pagemap_put(xpagemap);
848 vm->svm.pagemaps[id] = NULL;
849 }
850 }
851
xe_peer_to_dev(struct drm_pagemap_peer * peer)852 static struct device *xe_peer_to_dev(struct drm_pagemap_peer *peer)
853 {
854 if (peer->private == XE_PEER_PAGEMAP)
855 return container_of(peer, struct xe_pagemap, peer)->dpagemap.drm->dev;
856
857 return container_of(peer, struct xe_vm, svm.peer)->xe->drm.dev;
858 }
859
xe_has_interconnect(struct drm_pagemap_peer * peer1,struct drm_pagemap_peer * peer2)860 static bool xe_has_interconnect(struct drm_pagemap_peer *peer1,
861 struct drm_pagemap_peer *peer2)
862 {
863 struct device *dev1 = xe_peer_to_dev(peer1);
864 struct device *dev2 = xe_peer_to_dev(peer2);
865
866 if (dev1 == dev2)
867 return true;
868
869 return pci_p2pdma_distance(to_pci_dev(dev1), dev2, true) >= 0;
870 }
871
872 static DRM_PAGEMAP_OWNER_LIST_DEFINE(xe_owner_list);
873
874 /**
875 * xe_svm_init() - SVM initialize
876 * @vm: The VM.
877 *
878 * Initialize SVM state which is embedded within the VM.
879 *
880 * Return: 0 on success, negative error code on error.
881 */
xe_svm_init(struct xe_vm * vm)882 int xe_svm_init(struct xe_vm *vm)
883 {
884 int err;
885
886 if (vm->flags & XE_VM_FLAG_FAULT_MODE) {
887 spin_lock_init(&vm->svm.garbage_collector.lock);
888 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
889 INIT_WORK(&vm->svm.garbage_collector.work,
890 xe_svm_garbage_collector_work_func);
891
892 vm->svm.peer.private = XE_PEER_VM;
893 err = drm_pagemap_acquire_owner(&vm->svm.peer, &xe_owner_list,
894 xe_has_interconnect);
895 if (err)
896 return err;
897
898 err = xe_svm_get_pagemaps(vm);
899 if (err) {
900 drm_pagemap_release_owner(&vm->svm.peer);
901 return err;
902 }
903
904 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
905 current->mm, 0, vm->size,
906 xe_modparam.svm_notifier_size * SZ_1M,
907 &gpusvm_ops, fault_chunk_sizes,
908 ARRAY_SIZE(fault_chunk_sizes));
909 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
910
911 if (err) {
912 xe_svm_put_pagemaps(vm);
913 drm_pagemap_release_owner(&vm->svm.peer);
914 return err;
915 }
916 } else {
917 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
918 &vm->xe->drm, NULL, 0, 0, 0, NULL,
919 NULL, 0);
920 }
921
922 return err;
923 }
924
925 /**
926 * xe_svm_close() - SVM close
927 * @vm: The VM.
928 *
929 * Close SVM state (i.e., stop and flush all SVM actions).
930 */
xe_svm_close(struct xe_vm * vm)931 void xe_svm_close(struct xe_vm *vm)
932 {
933 xe_assert(vm->xe, xe_vm_is_closed(vm));
934 disable_work_sync(&vm->svm.garbage_collector.work);
935 xe_svm_put_pagemaps(vm);
936 drm_pagemap_release_owner(&vm->svm.peer);
937 }
938
939 /**
940 * xe_svm_fini() - SVM finalize
941 * @vm: The VM.
942 *
943 * Finalize SVM state which is embedded within the VM.
944 */
xe_svm_fini(struct xe_vm * vm)945 void xe_svm_fini(struct xe_vm *vm)
946 {
947 xe_assert(vm->xe, xe_vm_is_closed(vm));
948
949 drm_gpusvm_fini(&vm->svm.gpusvm);
950 }
951
xe_svm_range_has_pagemap_locked(const struct xe_svm_range * range,const struct drm_pagemap * dpagemap)952 static bool xe_svm_range_has_pagemap_locked(const struct xe_svm_range *range,
953 const struct drm_pagemap *dpagemap)
954 {
955 return range->base.pages.dpagemap == dpagemap;
956 }
957
xe_svm_range_has_pagemap(struct xe_svm_range * range,const struct drm_pagemap * dpagemap)958 static bool xe_svm_range_has_pagemap(struct xe_svm_range *range,
959 const struct drm_pagemap *dpagemap)
960 {
961 struct xe_vm *vm = range_to_vm(&range->base);
962 bool ret;
963
964 xe_svm_notifier_lock(vm);
965 ret = xe_svm_range_has_pagemap_locked(range, dpagemap);
966 xe_svm_notifier_unlock(vm);
967
968 return ret;
969 }
970
xe_svm_range_is_valid(struct xe_svm_range * range,struct xe_tile * tile,bool devmem_only,const struct drm_pagemap * dpagemap)971 static bool xe_svm_range_is_valid(struct xe_svm_range *range,
972 struct xe_tile *tile,
973 bool devmem_only,
974 const struct drm_pagemap *dpagemap)
975
976 {
977 return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present,
978 range->tile_invalidated) &&
979 (!devmem_only || xe_svm_range_has_pagemap(range, dpagemap)));
980 }
981
982 /** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM
983 * @vm: xe_vm pointer
984 * @range: Pointer to the SVM range structure
985 *
986 * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM
987 * and migrates them to SMEM
988 */
xe_svm_range_migrate_to_smem(struct xe_vm * vm,struct xe_svm_range * range)989 void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range)
990 {
991 if (xe_svm_range_in_vram(range))
992 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
993 }
994
995 /**
996 * xe_svm_range_validate() - Check if the SVM range is valid
997 * @vm: xe_vm pointer
998 * @range: Pointer to the SVM range structure
999 * @tile_mask: Mask representing the tiles to be checked
1000 * @dpagemap: if !%NULL, the range is expected to be present
1001 * in device memory identified by this parameter.
1002 *
1003 * The xe_svm_range_validate() function checks if a range is
1004 * valid and located in the desired memory region.
1005 *
1006 * Return: true if the range is valid, false otherwise
1007 */
xe_svm_range_validate(struct xe_vm * vm,struct xe_svm_range * range,u8 tile_mask,const struct drm_pagemap * dpagemap)1008 bool xe_svm_range_validate(struct xe_vm *vm,
1009 struct xe_svm_range *range,
1010 u8 tile_mask, const struct drm_pagemap *dpagemap)
1011 {
1012 bool ret;
1013
1014 xe_svm_notifier_lock(vm);
1015
1016 ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask;
1017 if (dpagemap)
1018 ret = ret && xe_svm_range_has_pagemap_locked(range, dpagemap);
1019 else
1020 ret = ret && !range->base.pages.dpagemap;
1021
1022 xe_svm_notifier_unlock(vm);
1023
1024 return ret;
1025 }
1026
1027 /**
1028 * xe_svm_find_vma_start - Find start of CPU VMA
1029 * @vm: xe_vm pointer
1030 * @start: start address
1031 * @end: end address
1032 * @vma: Pointer to struct xe_vma
1033 *
1034 *
1035 * This function searches for a cpu vma, within the specified
1036 * range [start, end] in the given VM. It adjusts the range based on the
1037 * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX.
1038 *
1039 * Return: The starting address of the VMA within the range,
1040 * or ULONG_MAX if no VMA is found
1041 */
xe_svm_find_vma_start(struct xe_vm * vm,u64 start,u64 end,struct xe_vma * vma)1042 u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma)
1043 {
1044 return drm_gpusvm_find_vma_start(&vm->svm.gpusvm,
1045 max(start, xe_vma_start(vma)),
1046 min(end, xe_vma_end(vma)));
1047 }
1048
1049 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
xe_drm_pagemap_populate_mm(struct drm_pagemap * dpagemap,unsigned long start,unsigned long end,struct mm_struct * mm,unsigned long timeslice_ms)1050 static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
1051 unsigned long start, unsigned long end,
1052 struct mm_struct *mm,
1053 unsigned long timeslice_ms)
1054 {
1055 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
1056 struct drm_pagemap_migrate_details mdetails = {
1057 .timeslice_ms = timeslice_ms,
1058 .source_peer_migrates = 1,
1059 };
1060 struct xe_vram_region *vr = xe_pagemap_to_vr(xpagemap);
1061 struct dma_fence *pre_migrate_fence = NULL;
1062 struct xe_device *xe = vr->xe;
1063 struct device *dev = xe->drm.dev;
1064 struct gpu_buddy_block *block;
1065 struct xe_validation_ctx vctx;
1066 struct list_head *blocks;
1067 struct drm_exec exec;
1068 struct xe_bo *bo;
1069 int err = 0, idx;
1070
1071 if (!drm_dev_enter(&xe->drm, &idx))
1072 return -ENODEV;
1073
1074 xe_pm_runtime_get(xe);
1075
1076 xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1077 bo = xe_bo_create_locked(xe, NULL, NULL, end - start,
1078 ttm_bo_type_device,
1079 (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
1080 XE_BO_FLAG_CPU_ADDR_MIRROR, &exec);
1081 drm_exec_retry_on_contention(&exec);
1082 if (IS_ERR(bo)) {
1083 err = PTR_ERR(bo);
1084 xe_validation_retry_on_oom(&vctx, &err);
1085 break;
1086 }
1087
1088 /* Ensure that any clearing or async eviction will complete before migration. */
1089 if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) {
1090 err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
1091 &pre_migrate_fence);
1092 if (err)
1093 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
1094 false, MAX_SCHEDULE_TIMEOUT);
1095 else if (pre_migrate_fence)
1096 dma_fence_enable_sw_signaling(pre_migrate_fence);
1097 }
1098
1099 drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
1100 &dpagemap_devmem_ops, dpagemap, end - start,
1101 pre_migrate_fence);
1102
1103 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
1104 list_for_each_entry(block, blocks, link)
1105 block->private = vr;
1106
1107 xe_bo_get(bo);
1108
1109 /* Ensure the device has a pm ref while there are device pages active. */
1110 xe_pm_runtime_get_noresume(xe);
1111 /* Consumes the devmem allocation ref. */
1112 err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
1113 start, end, &mdetails);
1114 xe_bo_unlock(bo);
1115 xe_bo_put(bo);
1116 }
1117 xe_pm_runtime_put(xe);
1118 drm_dev_exit(idx);
1119
1120 return err;
1121 }
1122 #endif
1123
supports_4K_migration(struct xe_device * xe)1124 static bool supports_4K_migration(struct xe_device *xe)
1125 {
1126 if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1127 return false;
1128
1129 return true;
1130 }
1131
1132 /**
1133 * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not
1134 * @range: SVM range for which migration needs to be decided
1135 * @vma: vma which has range
1136 * @dpagemap: The preferred struct drm_pagemap to migrate to.
1137 *
1138 * Return: True for range needing migration and migration is supported else false
1139 */
xe_svm_range_needs_migrate_to_vram(struct xe_svm_range * range,struct xe_vma * vma,const struct drm_pagemap * dpagemap)1140 bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
1141 const struct drm_pagemap *dpagemap)
1142 {
1143 struct xe_vm *vm = range_to_vm(&range->base);
1144 u64 range_size = xe_svm_range_size(range);
1145
1146 if (!range->base.pages.flags.migrate_devmem || !dpagemap)
1147 return false;
1148
1149 xe_assert(vm->xe, IS_DGFX(vm->xe));
1150
1151 if (xe_svm_range_has_pagemap(range, dpagemap)) {
1152 drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
1153 return false;
1154 }
1155
1156 if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
1157 drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
1158 return false;
1159 }
1160
1161 return true;
1162 }
1163
1164 #define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \
1165 static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \
1166 struct xe_svm_range *range) \
1167 { \
1168 switch (xe_svm_range_size(range)) { \
1169 case SZ_4K: \
1170 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \
1171 break; \
1172 case SZ_64K: \
1173 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \
1174 break; \
1175 case SZ_2M: \
1176 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \
1177 break; \
1178 } \
1179 } \
1180
DECL_SVM_RANGE_COUNT_STATS(fault,PAGEFAULT)1181 DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT)
1182 DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT)
1183 DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE)
1184
1185 #define DECL_SVM_RANGE_US_STATS(elem, stat) \
1186 static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \
1187 struct xe_svm_range *range, \
1188 ktime_t start) \
1189 { \
1190 s64 us_delta = xe_gt_stats_ktime_us_delta(start); \
1191 \
1192 switch (xe_svm_range_size(range)) { \
1193 case SZ_4K: \
1194 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \
1195 us_delta); \
1196 break; \
1197 case SZ_64K: \
1198 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \
1199 us_delta); \
1200 break; \
1201 case SZ_2M: \
1202 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \
1203 us_delta); \
1204 break; \
1205 } \
1206 } \
1207
1208 DECL_SVM_RANGE_US_STATS(migrate, MIGRATE)
1209 DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES)
1210 DECL_SVM_RANGE_US_STATS(bind, BIND)
1211 DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT)
1212
1213 static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
1214 struct xe_gt *gt, u64 fault_addr,
1215 bool need_vram)
1216 {
1217 int devmem_possible = IS_DGFX(vm->xe) &&
1218 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
1219 struct drm_gpusvm_ctx ctx = {
1220 .read_only = xe_vma_read_only(vma),
1221 .devmem_possible = devmem_possible,
1222 .check_pages_threshold = devmem_possible ? SZ_64K : 0,
1223 .devmem_only = need_vram && devmem_possible,
1224 .timeslice_ms = need_vram && devmem_possible ?
1225 vm->xe->atomic_svm_timeslice_ms : 0,
1226 };
1227 struct xe_validation_ctx vctx;
1228 struct drm_exec exec;
1229 struct xe_svm_range *range;
1230 struct dma_fence *fence;
1231 struct drm_pagemap *dpagemap;
1232 struct xe_tile *tile = gt_to_tile(gt);
1233 int migrate_try_count = ctx.devmem_only ? 3 : 1;
1234 ktime_t start = xe_gt_stats_ktime_get(), bind_start, get_pages_start;
1235 int err;
1236
1237 lockdep_assert_held_write(&vm->lock);
1238 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
1239
1240 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
1241
1242 retry:
1243 /* Always process UNMAPs first so view SVM ranges is current */
1244 err = xe_svm_garbage_collector(vm);
1245 if (err)
1246 return err;
1247
1248 dpagemap = ctx.devmem_only ? xe_tile_local_pagemap(tile) :
1249 xe_vma_resolve_pagemap(vma, tile);
1250 ctx.device_private_page_owner = xe_svm_private_page_owner(vm, !dpagemap);
1251 range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
1252
1253 if (IS_ERR(range))
1254 return PTR_ERR(range);
1255
1256 xe_svm_range_fault_count_stats_incr(gt, range);
1257
1258 if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) {
1259 err = -EACCES;
1260 goto out;
1261 }
1262
1263 if (xe_svm_range_is_valid(range, tile, ctx.devmem_only, dpagemap)) {
1264 xe_svm_range_valid_fault_count_stats_incr(gt, range);
1265 range_debug(range, "PAGE FAULT - VALID");
1266 goto out;
1267 }
1268
1269 range_debug(range, "PAGE FAULT");
1270
1271 if (--migrate_try_count >= 0 &&
1272 xe_svm_range_needs_migrate_to_vram(range, vma, dpagemap)) {
1273 ktime_t migrate_start = xe_gt_stats_ktime_get();
1274
1275 xe_svm_range_migrate_count_stats_incr(gt, range);
1276 err = xe_svm_alloc_vram(range, &ctx, dpagemap);
1277 xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
1278 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
1279 if (err) {
1280 if (migrate_try_count || !ctx.devmem_only) {
1281 drm_dbg(&vm->xe->drm,
1282 "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
1283 vm->usm.asid, ERR_PTR(err));
1284
1285 /*
1286 * In the devmem-only case, mixed mappings may
1287 * be found. The get_pages function will fix
1288 * these up to a single location, allowing the
1289 * page fault handler to make forward progress.
1290 */
1291 if (ctx.devmem_only)
1292 goto get_pages;
1293 else
1294 goto retry;
1295 } else {
1296 drm_err(&vm->xe->drm,
1297 "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
1298 vm->usm.asid, ERR_PTR(err));
1299 return err;
1300 }
1301 }
1302 }
1303
1304 get_pages:
1305 get_pages_start = xe_gt_stats_ktime_get();
1306
1307 range_debug(range, "GET PAGES");
1308 err = xe_svm_range_get_pages(vm, range, &ctx);
1309 /* Corner where CPU mappings have changed */
1310 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
1311 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
1312 if (migrate_try_count > 0 || !ctx.devmem_only) {
1313 drm_dbg(&vm->xe->drm,
1314 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
1315 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
1316 range_debug(range, "PAGE FAULT - RETRY PAGES");
1317 goto retry;
1318 } else {
1319 drm_err(&vm->xe->drm,
1320 "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
1321 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
1322 }
1323 }
1324 if (err) {
1325 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
1326 goto out;
1327 } else if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) {
1328 drm_dbg(&vm->xe->drm, "After page collect data location is %sin \"%s\".\n",
1329 xe_svm_range_has_pagemap(range, dpagemap) ? "" : "NOT ",
1330 dpagemap ? dpagemap->drm->unique : "System.");
1331 }
1332
1333 xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
1334 range_debug(range, "PAGE FAULT - BIND");
1335
1336 bind_start = xe_gt_stats_ktime_get();
1337 xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) {
1338 err = xe_vm_drm_exec_lock(vm, &exec);
1339 drm_exec_retry_on_contention(&exec);
1340
1341 xe_vm_set_validation_exec(vm, &exec);
1342 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
1343 xe_vm_set_validation_exec(vm, NULL);
1344 if (IS_ERR(fence)) {
1345 drm_exec_retry_on_contention(&exec);
1346 err = PTR_ERR(fence);
1347 xe_validation_retry_on_oom(&vctx, &err);
1348 xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
1349 break;
1350 }
1351 }
1352 if (err)
1353 goto err_out;
1354
1355 dma_fence_wait(fence, false);
1356 dma_fence_put(fence);
1357 xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
1358
1359 out:
1360 xe_svm_range_fault_us_stats_incr(gt, range, start);
1361 return 0;
1362
1363 err_out:
1364 if (err == -EAGAIN) {
1365 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
1366 range_debug(range, "PAGE FAULT - RETRY BIND");
1367 goto retry;
1368 }
1369
1370 return err;
1371 }
1372
1373 /**
1374 * xe_svm_handle_pagefault() - SVM handle page fault
1375 * @vm: The VM.
1376 * @vma: The CPU address mirror VMA.
1377 * @gt: The gt upon the fault occurred.
1378 * @fault_addr: The GPU fault address.
1379 * @atomic: The fault atomic access bit.
1380 *
1381 * Create GPU bindings for a SVM page fault. Optionally migrate to device
1382 * memory.
1383 *
1384 * Return: 0 on success, negative error code on error.
1385 */
xe_svm_handle_pagefault(struct xe_vm * vm,struct xe_vma * vma,struct xe_gt * gt,u64 fault_addr,bool atomic)1386 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
1387 struct xe_gt *gt, u64 fault_addr,
1388 bool atomic)
1389 {
1390 int need_vram, ret;
1391 retry:
1392 need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
1393 if (need_vram < 0)
1394 return need_vram;
1395
1396 ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr,
1397 need_vram ? true : false);
1398 if (ret == -EAGAIN) {
1399 /*
1400 * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA
1401 * may have been split by xe_svm_range_set_default_attr.
1402 */
1403 vma = xe_vm_find_vma_by_addr(vm, fault_addr);
1404 if (!vma)
1405 return -EINVAL;
1406
1407 goto retry;
1408 }
1409 return ret;
1410 }
1411
1412 /**
1413 * xe_svm_has_mapping() - SVM has mappings
1414 * @vm: The VM.
1415 * @start: Start address.
1416 * @end: End address.
1417 *
1418 * Check if an address range has SVM mappings.
1419 *
1420 * Return: True if address range has a SVM mapping, False otherwise
1421 */
xe_svm_has_mapping(struct xe_vm * vm,u64 start,u64 end)1422 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
1423 {
1424 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
1425 }
1426
1427 /**
1428 * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges
1429 * @vm: The VM
1430 * @start: start addr
1431 * @end: end addr
1432 *
1433 * This function UNMAPS svm ranges if start or end address are inside them.
1434 */
xe_svm_unmap_address_range(struct xe_vm * vm,u64 start,u64 end)1435 void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end)
1436 {
1437 struct drm_gpusvm_notifier *notifier, *next;
1438
1439 lockdep_assert_held_write(&vm->lock);
1440
1441 drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) {
1442 struct drm_gpusvm_range *range, *__next;
1443
1444 drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) {
1445 if (start > drm_gpusvm_range_start(range) ||
1446 end < drm_gpusvm_range_end(range)) {
1447 if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range)))
1448 drm_gpusvm_range_evict(&vm->svm.gpusvm, range);
1449 drm_gpusvm_range_get(range);
1450 __xe_svm_garbage_collector(vm, to_xe_range(range));
1451 if (!list_empty(&to_xe_range(range)->garbage_collector_link)) {
1452 spin_lock(&vm->svm.garbage_collector.lock);
1453 list_del(&to_xe_range(range)->garbage_collector_link);
1454 spin_unlock(&vm->svm.garbage_collector.lock);
1455 }
1456 drm_gpusvm_range_put(range);
1457 }
1458 }
1459 }
1460 }
1461
1462 /**
1463 * xe_svm_bo_evict() - SVM evict BO to system memory
1464 * @bo: BO to evict
1465 *
1466 * SVM evict BO to system memory. GPU SVM layer ensures all device pages
1467 * are evicted before returning.
1468 *
1469 * Return: 0 on success standard error code otherwise
1470 */
xe_svm_bo_evict(struct xe_bo * bo)1471 int xe_svm_bo_evict(struct xe_bo *bo)
1472 {
1473 return drm_pagemap_evict_to_ram(&bo->devmem_allocation);
1474 }
1475
1476 /**
1477 * xe_svm_range_find_or_insert- Find or insert GPU SVM range
1478 * @vm: xe_vm pointer
1479 * @addr: address for which range needs to be found/inserted
1480 * @vma: Pointer to struct xe_vma which mirrors CPU
1481 * @ctx: GPU SVM context
1482 *
1483 * This function finds or inserts a newly allocated a SVM range based on the
1484 * address.
1485 *
1486 * Return: Pointer to the SVM range on success, ERR_PTR() on failure.
1487 */
xe_svm_range_find_or_insert(struct xe_vm * vm,u64 addr,struct xe_vma * vma,struct drm_gpusvm_ctx * ctx)1488 struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
1489 struct xe_vma *vma, struct drm_gpusvm_ctx *ctx)
1490 {
1491 struct drm_gpusvm_range *r;
1492
1493 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)),
1494 xe_vma_start(vma), xe_vma_end(vma), ctx);
1495 if (IS_ERR(r))
1496 return ERR_CAST(r);
1497
1498 return to_xe_range(r);
1499 }
1500
1501 /**
1502 * xe_svm_range_get_pages() - Get pages for a SVM range
1503 * @vm: Pointer to the struct xe_vm
1504 * @range: Pointer to the xe SVM range structure
1505 * @ctx: GPU SVM context
1506 *
1507 * This function gets pages for a SVM range and ensures they are mapped for
1508 * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range.
1509 *
1510 * Return: 0 on success, negative error code on failure.
1511 */
xe_svm_range_get_pages(struct xe_vm * vm,struct xe_svm_range * range,struct drm_gpusvm_ctx * ctx)1512 int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
1513 struct drm_gpusvm_ctx *ctx)
1514 {
1515 int err = 0;
1516
1517 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx);
1518 if (err == -EOPNOTSUPP) {
1519 range_debug(range, "PAGE FAULT - EVICT PAGES");
1520 drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
1521 }
1522
1523 return err;
1524 }
1525
1526 /**
1527 * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range
1528 * @vm: Pointer to the xe_vm structure
1529 * @start: Start of the input range
1530 * @end: End of the input range
1531 *
1532 * This function removes the page table entries (PTEs) associated
1533 * with the svm ranges within the given input start and end
1534 *
1535 * Return: tile_mask for which gt's need to be tlb invalidated.
1536 */
xe_svm_ranges_zap_ptes_in_range(struct xe_vm * vm,u64 start,u64 end)1537 u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
1538 {
1539 struct drm_gpusvm_notifier *notifier;
1540 struct xe_svm_range *range;
1541 u64 adj_start, adj_end;
1542 struct xe_tile *tile;
1543 u8 tile_mask = 0;
1544 u8 id;
1545
1546 lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
1547 lockdep_is_held_type(&vm->lock, 0));
1548
1549 drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) {
1550 struct drm_gpusvm_range *r = NULL;
1551
1552 adj_start = max(start, drm_gpusvm_notifier_start(notifier));
1553 adj_end = min(end, drm_gpusvm_notifier_end(notifier));
1554 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) {
1555 range = to_xe_range(r);
1556 for_each_tile(tile, vm->xe, id) {
1557 if (xe_pt_zap_ptes_range(tile, vm, range)) {
1558 tile_mask |= BIT(id);
1559 /*
1560 * WRITE_ONCE pairs with READ_ONCE in
1561 * xe_vm_has_valid_gpu_mapping().
1562 * Must not fail after setting
1563 * tile_invalidated and before
1564 * TLB invalidation.
1565 */
1566 WRITE_ONCE(range->tile_invalidated,
1567 range->tile_invalidated | BIT(id));
1568 }
1569 }
1570 }
1571 }
1572
1573 return tile_mask;
1574 }
1575
1576 #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
1577
1578 /**
1579 * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA
1580 * @vma: Pointer to the xe_vma structure containing memory attributes
1581 * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping
1582 *
1583 * This function determines the correct DRM pagemap to use for a given VMA.
1584 * It first checks if a valid devmem_fd is provided in the VMA's preferred
1585 * location. If the devmem_fd is negative, it returns NULL, indicating no
1586 * pagemap is available and smem to be used as preferred location.
1587 * If the devmem_fd is equal to the default faulting
1588 * GT identifier, it returns the VRAM pagemap associated with the tile.
1589 *
1590 * Future support for multi-device configurations may use drm_pagemap_from_fd()
1591 * to resolve pagemaps from arbitrary file descriptors.
1592 *
1593 * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable.
1594 */
xe_vma_resolve_pagemap(struct xe_vma * vma,struct xe_tile * tile)1595 struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
1596 {
1597 struct drm_pagemap *dpagemap = vma->attr.preferred_loc.dpagemap;
1598 s32 fd;
1599
1600 if (dpagemap)
1601 return dpagemap;
1602
1603 fd = (s32)vma->attr.preferred_loc.devmem_fd;
1604
1605 if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
1606 return NULL;
1607
1608 if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
1609 return IS_DGFX(tile_to_xe(tile)) ? xe_tile_local_pagemap(tile) : NULL;
1610
1611 return NULL;
1612 }
1613
1614 /**
1615 * xe_svm_alloc_vram()- Allocate device memory pages for range,
1616 * migrating existing data.
1617 * @range: SVM range
1618 * @ctx: DRM GPU SVM context
1619 * @dpagemap: The struct drm_pagemap representing the memory to allocate.
1620 *
1621 * Return: 0 on success, error code on failure.
1622 */
xe_svm_alloc_vram(struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx,struct drm_pagemap * dpagemap)1623 int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx,
1624 struct drm_pagemap *dpagemap)
1625 {
1626 static DECLARE_RWSEM(driver_migrate_lock);
1627 struct xe_vm *vm = range_to_vm(&range->base);
1628 enum drm_gpusvm_scan_result migration_state;
1629 struct xe_device *xe = vm->xe;
1630 int err, retries = 1;
1631 bool write_locked = false;
1632
1633 xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem);
1634 range_debug(range, "ALLOCATE VRAM");
1635
1636 migration_state = drm_gpusvm_scan_mm(&range->base,
1637 xe_svm_private_page_owner(vm, false),
1638 dpagemap->pagemap);
1639
1640 if (migration_state == DRM_GPUSVM_SCAN_EQUAL) {
1641 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
1642 drm_dbg(dpagemap->drm, "Already migrated!\n");
1643 return 0;
1644 }
1645
1646 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM))
1647 drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n",
1648 dpagemap->drm->unique);
1649
1650 err = down_read_interruptible(&driver_migrate_lock);
1651 if (err)
1652 return err;
1653 do {
1654 err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
1655 xe_svm_range_end(range),
1656 range->base.gpusvm->mm,
1657 ctx->timeslice_ms);
1658
1659 if (err == -EBUSY && retries) {
1660 if (!write_locked) {
1661 int lock_err;
1662
1663 up_read(&driver_migrate_lock);
1664 lock_err = down_write_killable(&driver_migrate_lock);
1665 if (lock_err)
1666 return lock_err;
1667 write_locked = true;
1668 }
1669 drm_gpusvm_range_evict(range->base.gpusvm, &range->base);
1670 }
1671 } while (err == -EBUSY && retries--);
1672 if (write_locked)
1673 up_write(&driver_migrate_lock);
1674 else
1675 up_read(&driver_migrate_lock);
1676
1677 return err;
1678 }
1679
1680 static struct drm_pagemap_addr
xe_drm_pagemap_device_map(struct drm_pagemap * dpagemap,struct device * dev,struct page * page,unsigned int order,enum dma_data_direction dir)1681 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
1682 struct device *dev,
1683 struct page *page,
1684 unsigned int order,
1685 enum dma_data_direction dir)
1686 {
1687 struct device *pgmap_dev = dpagemap->drm->dev;
1688 enum drm_interconnect_protocol prot;
1689 dma_addr_t addr;
1690
1691 if (pgmap_dev == dev) {
1692 addr = xe_page_to_dpa(page);
1693 prot = XE_INTERCONNECT_VRAM;
1694 } else {
1695 addr = dma_map_resource(dev,
1696 xe_page_to_pcie(page),
1697 PAGE_SIZE << order, dir,
1698 DMA_ATTR_SKIP_CPU_SYNC);
1699 prot = XE_INTERCONNECT_P2P;
1700 }
1701
1702 return drm_pagemap_addr_encode(addr, prot, order, dir);
1703 }
1704
xe_drm_pagemap_device_unmap(struct drm_pagemap * dpagemap,struct device * dev,const struct drm_pagemap_addr * addr)1705 static void xe_drm_pagemap_device_unmap(struct drm_pagemap *dpagemap,
1706 struct device *dev,
1707 const struct drm_pagemap_addr *addr)
1708 {
1709 if (addr->proto != XE_INTERCONNECT_P2P)
1710 return;
1711
1712 dma_unmap_resource(dev, addr->addr, PAGE_SIZE << addr->order,
1713 addr->dir, DMA_ATTR_SKIP_CPU_SYNC);
1714 }
1715
xe_pagemap_destroy_work(struct work_struct * work)1716 static void xe_pagemap_destroy_work(struct work_struct *work)
1717 {
1718 struct xe_pagemap *xpagemap = container_of(work, typeof(*xpagemap), destroy_work);
1719 struct dev_pagemap *pagemap = &xpagemap->pagemap;
1720 struct drm_device *drm = xpagemap->dpagemap.drm;
1721 int idx;
1722
1723 /*
1724 * Only unmap / release if devm_ release hasn't run yet.
1725 * Otherwise the devm_ callbacks have already released, or
1726 * will do shortly.
1727 */
1728 if (drm_dev_enter(drm, &idx)) {
1729 devm_memunmap_pages(drm->dev, pagemap);
1730 devm_release_mem_region(drm->dev, pagemap->range.start,
1731 pagemap->range.end - pagemap->range.start + 1);
1732 drm_dev_exit(idx);
1733 }
1734
1735 drm_pagemap_release_owner(&xpagemap->peer);
1736 kfree(xpagemap);
1737 }
1738
xe_pagemap_destroy(struct drm_pagemap * dpagemap,bool from_atomic_or_reclaim)1739 static void xe_pagemap_destroy(struct drm_pagemap *dpagemap, bool from_atomic_or_reclaim)
1740 {
1741 struct xe_pagemap *xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
1742 struct xe_device *xe = to_xe_device(dpagemap->drm);
1743
1744 if (from_atomic_or_reclaim)
1745 queue_work(xe->destroy_wq, &xpagemap->destroy_work);
1746 else
1747 xe_pagemap_destroy_work(&xpagemap->destroy_work);
1748 }
1749
1750 static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
1751 .device_map = xe_drm_pagemap_device_map,
1752 .device_unmap = xe_drm_pagemap_device_unmap,
1753 .populate_mm = xe_drm_pagemap_populate_mm,
1754 .destroy = xe_pagemap_destroy,
1755 };
1756
1757 /**
1758 * xe_pagemap_create() - Create a struct xe_pagemap object
1759 * @xe: The xe device.
1760 * @vr: Back-pointer to the struct xe_vram_region.
1761 *
1762 * Allocate and initialize a struct xe_pagemap. On successful
1763 * return, drm_pagemap_put() on the embedded struct drm_pagemap
1764 * should be used to unreference.
1765 *
1766 * Return: Pointer to a struct xe_pagemap if successful. Error pointer
1767 * on failure.
1768 */
xe_pagemap_create(struct xe_device * xe,struct xe_vram_region * vr)1769 static struct xe_pagemap *xe_pagemap_create(struct xe_device *xe, struct xe_vram_region *vr)
1770 {
1771 struct device *dev = xe->drm.dev;
1772 struct xe_pagemap *xpagemap;
1773 struct dev_pagemap *pagemap;
1774 struct drm_pagemap *dpagemap;
1775 struct resource *res;
1776 void *addr;
1777 int err;
1778
1779 xpagemap = kzalloc_obj(*xpagemap);
1780 if (!xpagemap)
1781 return ERR_PTR(-ENOMEM);
1782
1783 pagemap = &xpagemap->pagemap;
1784 dpagemap = &xpagemap->dpagemap;
1785 INIT_WORK(&xpagemap->destroy_work, xe_pagemap_destroy_work);
1786 xpagemap->vr = vr;
1787 xpagemap->peer.private = XE_PEER_PAGEMAP;
1788
1789 err = drm_pagemap_init(dpagemap, pagemap, &xe->drm, &xe_drm_pagemap_ops);
1790 if (err)
1791 goto out_no_dpagemap;
1792
1793 res = devm_request_free_mem_region(dev, &iomem_resource,
1794 vr->usable_size);
1795 if (IS_ERR(res)) {
1796 err = PTR_ERR(res);
1797 goto out_err;
1798 }
1799
1800 err = drm_pagemap_acquire_owner(&xpagemap->peer, &xe_owner_list,
1801 xe_has_interconnect);
1802 if (err)
1803 goto out_no_owner;
1804
1805 pagemap->type = MEMORY_DEVICE_PRIVATE;
1806 pagemap->range.start = res->start;
1807 pagemap->range.end = res->end;
1808 pagemap->nr_range = 1;
1809 pagemap->owner = xpagemap->peer.owner;
1810 pagemap->ops = drm_pagemap_pagemap_ops_get();
1811 addr = devm_memremap_pages(dev, pagemap);
1812 if (IS_ERR(addr)) {
1813 err = PTR_ERR(addr);
1814 goto out_no_pages;
1815 }
1816 xpagemap->hpa_base = res->start;
1817 return xpagemap;
1818
1819 out_no_pages:
1820 drm_pagemap_release_owner(&xpagemap->peer);
1821 out_no_owner:
1822 devm_release_mem_region(dev, res->start, res->end - res->start + 1);
1823 out_err:
1824 drm_pagemap_put(dpagemap);
1825 return ERR_PTR(err);
1826
1827 out_no_dpagemap:
1828 kfree(xpagemap);
1829 return ERR_PTR(err);
1830 }
1831
1832 /**
1833 * xe_pagemap_find_or_create() - Find or create a struct xe_pagemap
1834 * @xe: The xe device.
1835 * @cache: The struct xe_pagemap_cache.
1836 * @vr: The VRAM region.
1837 *
1838 * Check if there is an already used xe_pagemap for this tile, and in that case,
1839 * return it.
1840 * If not, check if there is a cached xe_pagemap for this tile, and in that case,
1841 * cancel its destruction, re-initialize it and return it.
1842 * Finally if there is no cached or already used pagemap, create one and
1843 * register it in the tile's pagemap cache.
1844 *
1845 * Note that this function is typically called from within an IOCTL, and waits are
1846 * therefore carried out interruptible if possible.
1847 *
1848 * Return: A pointer to a struct xe_pagemap if successful, Error pointer on failure.
1849 */
1850 static struct xe_pagemap *
xe_pagemap_find_or_create(struct xe_device * xe,struct drm_pagemap_cache * cache,struct xe_vram_region * vr)1851 xe_pagemap_find_or_create(struct xe_device *xe, struct drm_pagemap_cache *cache,
1852 struct xe_vram_region *vr)
1853 {
1854 struct drm_pagemap *dpagemap;
1855 struct xe_pagemap *xpagemap;
1856 int err;
1857
1858 err = drm_pagemap_cache_lock_lookup(cache);
1859 if (err)
1860 return ERR_PTR(err);
1861
1862 dpagemap = drm_pagemap_get_from_cache(cache);
1863 if (IS_ERR(dpagemap)) {
1864 xpagemap = ERR_CAST(dpagemap);
1865 } else if (!dpagemap) {
1866 xpagemap = xe_pagemap_create(xe, vr);
1867 if (IS_ERR(xpagemap))
1868 goto out_unlock;
1869 drm_pagemap_cache_set_pagemap(cache, &xpagemap->dpagemap);
1870 } else {
1871 xpagemap = container_of(dpagemap, typeof(*xpagemap), dpagemap);
1872 }
1873
1874 out_unlock:
1875 drm_pagemap_cache_unlock_lookup(cache);
1876 return xpagemap;
1877 }
1878
xe_svm_get_pagemaps(struct xe_vm * vm)1879 static int xe_svm_get_pagemaps(struct xe_vm *vm)
1880 {
1881 struct xe_device *xe = vm->xe;
1882 struct xe_pagemap *xpagemap;
1883 struct xe_tile *tile;
1884 int id;
1885
1886 for_each_tile(tile, xe, id) {
1887 struct xe_vram_region *vr;
1888
1889 if (!((BIT(id) << 1) & xe->info.mem_region_mask))
1890 continue;
1891
1892 vr = xe_tile_to_vr(tile);
1893 xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
1894 if (IS_ERR(xpagemap))
1895 break;
1896 vm->svm.pagemaps[id] = xpagemap;
1897 }
1898
1899 if (IS_ERR(xpagemap)) {
1900 xe_svm_put_pagemaps(vm);
1901 return PTR_ERR(xpagemap);
1902 }
1903
1904 return 0;
1905 }
1906
1907 /**
1908 * xe_pagemap_shrinker_create() - Create a drm_pagemap shrinker
1909 * @xe: The xe device
1910 *
1911 * Create a drm_pagemap shrinker and register with the xe device.
1912 *
1913 * Return: %0 on success, negative error code on failure.
1914 */
xe_pagemap_shrinker_create(struct xe_device * xe)1915 int xe_pagemap_shrinker_create(struct xe_device *xe)
1916 {
1917 xe->usm.dpagemap_shrinker = drm_pagemap_shrinker_create_devm(&xe->drm);
1918 return PTR_ERR_OR_ZERO(xe->usm.dpagemap_shrinker);
1919 }
1920
1921 /**
1922 * xe_pagemap_cache_create() - Create a drm_pagemap cache
1923 * @tile: The tile to register the cache with
1924 *
1925 * Create a drm_pagemap cache and register with the tile.
1926 *
1927 * Return: %0 on success, negative error code on failure.
1928 */
xe_pagemap_cache_create(struct xe_tile * tile)1929 int xe_pagemap_cache_create(struct xe_tile *tile)
1930 {
1931 struct xe_device *xe = tile_to_xe(tile);
1932
1933 if (IS_DGFX(xe)) {
1934 struct drm_pagemap_cache *cache =
1935 drm_pagemap_cache_create_devm(xe->usm.dpagemap_shrinker);
1936
1937 if (IS_ERR(cache))
1938 return PTR_ERR(cache);
1939
1940 tile->mem.vram->dpagemap_cache = cache;
1941 }
1942
1943 return 0;
1944 }
1945
xe_devmem_open(struct xe_device * xe,u32 region_instance)1946 static struct drm_pagemap *xe_devmem_open(struct xe_device *xe, u32 region_instance)
1947 {
1948 u32 tile_id = region_instance - 1;
1949 struct xe_pagemap *xpagemap;
1950 struct xe_vram_region *vr;
1951
1952 if (tile_id >= xe->info.tile_count)
1953 return ERR_PTR(-ENOENT);
1954
1955 if (!((BIT(tile_id) << 1) & xe->info.mem_region_mask))
1956 return ERR_PTR(-ENOENT);
1957
1958 vr = xe_tile_to_vr(&xe->tiles[tile_id]);
1959
1960 /* Returns a reference-counted embedded struct drm_pagemap */
1961 xpagemap = xe_pagemap_find_or_create(xe, vr->dpagemap_cache, vr);
1962 if (IS_ERR(xpagemap))
1963 return ERR_CAST(xpagemap);
1964
1965 return &xpagemap->dpagemap;
1966 }
1967
1968 /**
1969 * xe_drm_pagemap_from_fd() - Return a drm_pagemap pointer from a
1970 * (file_descriptor, region_instance) pair.
1971 * @fd: An fd opened against an xe device.
1972 * @region_instance: The region instance representing the device memory
1973 * on the opened xe device.
1974 *
1975 * Opens a struct drm_pagemap pointer on the
1976 * indicated device and region_instance.
1977 *
1978 * Return: A reference-counted struct drm_pagemap pointer on success,
1979 * negative error pointer on failure.
1980 */
xe_drm_pagemap_from_fd(int fd,u32 region_instance)1981 struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
1982 {
1983 struct drm_pagemap *dpagemap;
1984 struct file *file;
1985 struct drm_file *fpriv;
1986 struct drm_device *drm;
1987 int idx;
1988
1989 if (fd <= 0)
1990 return ERR_PTR(-EINVAL);
1991
1992 file = fget(fd);
1993 if (!file)
1994 return ERR_PTR(-ENOENT);
1995
1996 if (!xe_is_xe_file(file)) {
1997 dpagemap = ERR_PTR(-ENOENT);
1998 goto out;
1999 }
2000
2001 fpriv = file->private_data;
2002 drm = fpriv->minor->dev;
2003 if (!drm_dev_enter(drm, &idx)) {
2004 dpagemap = ERR_PTR(-ENODEV);
2005 goto out;
2006 }
2007
2008 dpagemap = xe_devmem_open(to_xe_device(drm), region_instance);
2009 drm_dev_exit(idx);
2010 out:
2011 fput(file);
2012 return dpagemap;
2013 }
2014
2015 #else
2016
xe_pagemap_shrinker_create(struct xe_device * xe)2017 int xe_pagemap_shrinker_create(struct xe_device *xe)
2018 {
2019 return 0;
2020 }
2021
xe_pagemap_cache_create(struct xe_tile * tile)2022 int xe_pagemap_cache_create(struct xe_tile *tile)
2023 {
2024 return 0;
2025 }
2026
xe_svm_alloc_vram(struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx,struct drm_pagemap * dpagemap)2027 int xe_svm_alloc_vram(struct xe_svm_range *range,
2028 const struct drm_gpusvm_ctx *ctx,
2029 struct drm_pagemap *dpagemap)
2030 {
2031 return -EOPNOTSUPP;
2032 }
2033
xe_vma_resolve_pagemap(struct xe_vma * vma,struct xe_tile * tile)2034 struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
2035 {
2036 return NULL;
2037 }
2038
xe_drm_pagemap_from_fd(int fd,u32 region_instance)2039 struct drm_pagemap *xe_drm_pagemap_from_fd(int fd, u32 region_instance)
2040 {
2041 return ERR_PTR(-ENOENT);
2042 }
2043
2044 #endif
2045
2046 /**
2047 * xe_svm_flush() - SVM flush
2048 * @vm: The VM.
2049 *
2050 * Flush all SVM actions.
2051 */
xe_svm_flush(struct xe_vm * vm)2052 void xe_svm_flush(struct xe_vm *vm)
2053 {
2054 if (xe_vm_in_fault_mode(vm))
2055 flush_work(&vm->svm.garbage_collector.work);
2056 }
2057