1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2024 Intel Corporation
4 */
5
6 #include "xe_bo.h"
7 #include "xe_gt_stats.h"
8 #include "xe_gt_tlb_invalidation.h"
9 #include "xe_migrate.h"
10 #include "xe_module.h"
11 #include "xe_pt.h"
12 #include "xe_svm.h"
13 #include "xe_ttm_vram_mgr.h"
14 #include "xe_vm.h"
15 #include "xe_vm_types.h"
16
xe_svm_range_in_vram(struct xe_svm_range * range)17 static bool xe_svm_range_in_vram(struct xe_svm_range *range)
18 {
19 /*
20 * Advisory only check whether the range is currently backed by VRAM
21 * memory.
22 */
23
24 struct drm_gpusvm_range_flags flags = {
25 /* Pairs with WRITE_ONCE in drm_gpusvm.c */
26 .__flags = READ_ONCE(range->base.flags.__flags),
27 };
28
29 return flags.has_devmem_pages;
30 }
31
xe_svm_range_has_vram_binding(struct xe_svm_range * range)32 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
33 {
34 /* Not reliable without notifier lock */
35 return xe_svm_range_in_vram(range) && range->tile_present;
36 }
37
gpusvm_to_vm(struct drm_gpusvm * gpusvm)38 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm)
39 {
40 return container_of(gpusvm, struct xe_vm, svm.gpusvm);
41 }
42
range_to_vm(struct drm_gpusvm_range * r)43 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
44 {
45 return gpusvm_to_vm(r->gpusvm);
46 }
47
xe_svm_range_start(struct xe_svm_range * range)48 static unsigned long xe_svm_range_start(struct xe_svm_range *range)
49 {
50 return drm_gpusvm_range_start(&range->base);
51 }
52
xe_svm_range_end(struct xe_svm_range * range)53 static unsigned long xe_svm_range_end(struct xe_svm_range *range)
54 {
55 return drm_gpusvm_range_end(&range->base);
56 }
57
xe_svm_range_size(struct xe_svm_range * range)58 static unsigned long xe_svm_range_size(struct xe_svm_range *range)
59 {
60 return drm_gpusvm_range_size(&range->base);
61 }
62
63 #define range_debug(r__, operaton__) \
64 vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \
65 "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
66 "start=0x%014lx, end=0x%014lx, size=%lu", \
67 (operaton__), range_to_vm(&(r__)->base)->usm.asid, \
68 (r__)->base.gpusvm, \
69 xe_svm_range_in_vram((r__)) ? 1 : 0, \
70 xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \
71 (r__)->base.notifier_seq, \
72 xe_svm_range_start((r__)), xe_svm_range_end((r__)), \
73 xe_svm_range_size((r__)))
74
xe_svm_range_debug(struct xe_svm_range * range,const char * operation)75 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
76 {
77 range_debug(range, operation);
78 }
79
xe_svm_devm_owner(struct xe_device * xe)80 static void *xe_svm_devm_owner(struct xe_device *xe)
81 {
82 return xe;
83 }
84
85 static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm * gpusvm)86 xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
87 {
88 struct xe_svm_range *range;
89
90 range = kzalloc(sizeof(*range), GFP_KERNEL);
91 if (!range)
92 return NULL;
93
94 INIT_LIST_HEAD(&range->garbage_collector_link);
95 xe_vm_get(gpusvm_to_vm(gpusvm));
96
97 return &range->base;
98 }
99
xe_svm_range_free(struct drm_gpusvm_range * range)100 static void xe_svm_range_free(struct drm_gpusvm_range *range)
101 {
102 xe_vm_put(range_to_vm(range));
103 kfree(range);
104 }
105
to_xe_range(struct drm_gpusvm_range * r)106 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
107 {
108 return container_of(r, struct xe_svm_range, base);
109 }
110
111 static void
xe_svm_garbage_collector_add_range(struct xe_vm * vm,struct xe_svm_range * range,const struct mmu_notifier_range * mmu_range)112 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
113 const struct mmu_notifier_range *mmu_range)
114 {
115 struct xe_device *xe = vm->xe;
116
117 range_debug(range, "GARBAGE COLLECTOR ADD");
118
119 drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
120
121 spin_lock(&vm->svm.garbage_collector.lock);
122 if (list_empty(&range->garbage_collector_link))
123 list_add_tail(&range->garbage_collector_link,
124 &vm->svm.garbage_collector.range_list);
125 spin_unlock(&vm->svm.garbage_collector.lock);
126
127 queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
128 &vm->svm.garbage_collector.work);
129 }
130
131 static u8
xe_svm_range_notifier_event_begin(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range,u64 * adj_start,u64 * adj_end)132 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
133 const struct mmu_notifier_range *mmu_range,
134 u64 *adj_start, u64 *adj_end)
135 {
136 struct xe_svm_range *range = to_xe_range(r);
137 struct xe_device *xe = vm->xe;
138 struct xe_tile *tile;
139 u8 tile_mask = 0;
140 u8 id;
141
142 xe_svm_assert_in_notifier(vm);
143
144 range_debug(range, "NOTIFIER");
145
146 /* Skip if already unmapped or if no binding exist */
147 if (range->base.flags.unmapped || !range->tile_present)
148 return 0;
149
150 range_debug(range, "NOTIFIER - EXECUTE");
151
152 /* Adjust invalidation to range boundaries */
153 *adj_start = min(xe_svm_range_start(range), mmu_range->start);
154 *adj_end = max(xe_svm_range_end(range), mmu_range->end);
155
156 /*
157 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the
158 * invalidation code can't correctly cope with sparse ranges or
159 * invalidations spanning multiple ranges.
160 */
161 for_each_tile(tile, xe, id)
162 if (xe_pt_zap_ptes_range(tile, vm, range)) {
163 tile_mask |= BIT(id);
164 range->tile_invalidated |= BIT(id);
165 }
166
167 return tile_mask;
168 }
169
170 static void
xe_svm_range_notifier_event_end(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range)171 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
172 const struct mmu_notifier_range *mmu_range)
173 {
174 struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
175
176 xe_svm_assert_in_notifier(vm);
177
178 drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
179 if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
180 xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
181 mmu_range);
182 }
183
xe_svm_invalidate(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,const struct mmu_notifier_range * mmu_range)184 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
185 struct drm_gpusvm_notifier *notifier,
186 const struct mmu_notifier_range *mmu_range)
187 {
188 struct xe_vm *vm = gpusvm_to_vm(gpusvm);
189 struct xe_device *xe = vm->xe;
190 struct xe_tile *tile;
191 struct drm_gpusvm_range *r, *first;
192 struct xe_gt_tlb_invalidation_fence
193 fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
194 u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
195 u8 tile_mask = 0;
196 u8 id;
197 u32 fence_id = 0;
198 long err;
199
200 xe_svm_assert_in_notifier(vm);
201
202 vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm,
203 "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d",
204 vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq,
205 mmu_range->start, mmu_range->end, mmu_range->event);
206
207 /* Adjust invalidation to notifier boundaries */
208 adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start);
209 adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end);
210
211 first = drm_gpusvm_range_find(notifier, adj_start, adj_end);
212 if (!first)
213 return;
214
215 /*
216 * PTs may be getting destroyed so not safe to touch these but PT should
217 * be invalidated at this point in time. Regardless we still need to
218 * ensure any dma mappings are unmapped in the here.
219 */
220 if (xe_vm_is_closed(vm))
221 goto range_notifier_event_end;
222
223 /*
224 * XXX: Less than ideal to always wait on VM's resv slots if an
225 * invalidation is not required. Could walk range list twice to figure
226 * out if an invalidations is need, but also not ideal.
227 */
228 err = dma_resv_wait_timeout(xe_vm_resv(vm),
229 DMA_RESV_USAGE_BOOKKEEP,
230 false, MAX_SCHEDULE_TIMEOUT);
231 XE_WARN_ON(err <= 0);
232
233 r = first;
234 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
235 tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
236 &adj_start,
237 &adj_end);
238 if (!tile_mask)
239 goto range_notifier_event_end;
240
241 xe_device_wmb(xe);
242
243 for_each_tile(tile, xe, id) {
244 if (tile_mask & BIT(id)) {
245 int err;
246
247 xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
248 &fence[fence_id], true);
249
250 err = xe_gt_tlb_invalidation_range(tile->primary_gt,
251 &fence[fence_id],
252 adj_start,
253 adj_end,
254 vm->usm.asid);
255 if (WARN_ON_ONCE(err < 0))
256 goto wait;
257 ++fence_id;
258
259 if (!tile->media_gt)
260 continue;
261
262 xe_gt_tlb_invalidation_fence_init(tile->media_gt,
263 &fence[fence_id], true);
264
265 err = xe_gt_tlb_invalidation_range(tile->media_gt,
266 &fence[fence_id],
267 adj_start,
268 adj_end,
269 vm->usm.asid);
270 if (WARN_ON_ONCE(err < 0))
271 goto wait;
272 ++fence_id;
273 }
274 }
275
276 wait:
277 for (id = 0; id < fence_id; ++id)
278 xe_gt_tlb_invalidation_fence_wait(&fence[id]);
279
280 range_notifier_event_end:
281 r = first;
282 drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
283 xe_svm_range_notifier_event_end(vm, r, mmu_range);
284 }
285
__xe_svm_garbage_collector(struct xe_vm * vm,struct xe_svm_range * range)286 static int __xe_svm_garbage_collector(struct xe_vm *vm,
287 struct xe_svm_range *range)
288 {
289 struct dma_fence *fence;
290
291 range_debug(range, "GARBAGE COLLECTOR");
292
293 xe_vm_lock(vm, false);
294 fence = xe_vm_range_unbind(vm, range);
295 xe_vm_unlock(vm);
296 if (IS_ERR(fence))
297 return PTR_ERR(fence);
298 dma_fence_put(fence);
299
300 drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
301
302 return 0;
303 }
304
xe_svm_garbage_collector(struct xe_vm * vm)305 static int xe_svm_garbage_collector(struct xe_vm *vm)
306 {
307 struct xe_svm_range *range;
308 int err;
309
310 lockdep_assert_held_write(&vm->lock);
311
312 if (xe_vm_is_closed_or_banned(vm))
313 return -ENOENT;
314
315 spin_lock(&vm->svm.garbage_collector.lock);
316 for (;;) {
317 range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
318 typeof(*range),
319 garbage_collector_link);
320 if (!range)
321 break;
322
323 list_del(&range->garbage_collector_link);
324 spin_unlock(&vm->svm.garbage_collector.lock);
325
326 err = __xe_svm_garbage_collector(vm, range);
327 if (err) {
328 drm_warn(&vm->xe->drm,
329 "Garbage collection failed: %pe\n",
330 ERR_PTR(err));
331 xe_vm_kill(vm, true);
332 return err;
333 }
334
335 spin_lock(&vm->svm.garbage_collector.lock);
336 }
337 spin_unlock(&vm->svm.garbage_collector.lock);
338
339 return 0;
340 }
341
xe_svm_garbage_collector_work_func(struct work_struct * w)342 static void xe_svm_garbage_collector_work_func(struct work_struct *w)
343 {
344 struct xe_vm *vm = container_of(w, struct xe_vm,
345 svm.garbage_collector.work);
346
347 down_write(&vm->lock);
348 xe_svm_garbage_collector(vm);
349 up_write(&vm->lock);
350 }
351
352 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
353
page_to_vr(struct page * page)354 static struct xe_vram_region *page_to_vr(struct page *page)
355 {
356 return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
357 }
358
vr_to_tile(struct xe_vram_region * vr)359 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr)
360 {
361 return container_of(vr, struct xe_tile, mem.vram);
362 }
363
xe_vram_region_page_to_dpa(struct xe_vram_region * vr,struct page * page)364 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
365 struct page *page)
366 {
367 u64 dpa;
368 struct xe_tile *tile = vr_to_tile(vr);
369 u64 pfn = page_to_pfn(page);
370 u64 offset;
371
372 xe_tile_assert(tile, is_device_private_page(page));
373 xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base);
374
375 offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
376 dpa = vr->dpa_base + offset;
377
378 return dpa;
379 }
380
381 enum xe_svm_copy_dir {
382 XE_SVM_COPY_TO_VRAM,
383 XE_SVM_COPY_TO_SRAM,
384 };
385
xe_svm_copy(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages,const enum xe_svm_copy_dir dir)386 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
387 unsigned long npages, const enum xe_svm_copy_dir dir)
388 {
389 struct xe_vram_region *vr = NULL;
390 struct xe_tile *tile;
391 struct dma_fence *fence = NULL;
392 unsigned long i;
393 #define XE_VRAM_ADDR_INVALID ~0x0ull
394 u64 vram_addr = XE_VRAM_ADDR_INVALID;
395 int err = 0, pos = 0;
396 bool sram = dir == XE_SVM_COPY_TO_SRAM;
397
398 /*
399 * This flow is complex: it locates physically contiguous device pages,
400 * derives the starting physical address, and performs a single GPU copy
401 * to for every 8M chunk in a DMA address array. Both device pages and
402 * DMA addresses may be sparsely populated. If either is NULL, a copy is
403 * triggered based on the current search state. The last GPU copy is
404 * waited on to ensure all copies are complete.
405 */
406
407 for (i = 0; i < npages; ++i) {
408 struct page *spage = pages[i];
409 struct dma_fence *__fence;
410 u64 __vram_addr;
411 bool match = false, chunk, last;
412
413 #define XE_MIGRATE_CHUNK_SIZE SZ_8M
414 chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
415 last = (i + 1) == npages;
416
417 /* No CPU page and no device pages queue'd to copy */
418 if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
419 continue;
420
421 if (!vr && spage) {
422 vr = page_to_vr(spage);
423 tile = vr_to_tile(vr);
424 }
425 XE_WARN_ON(spage && page_to_vr(spage) != vr);
426
427 /*
428 * CPU page and device page valid, capture physical address on
429 * first device page, check if physical contiguous on subsequent
430 * device pages.
431 */
432 if (dma_addr[i] && spage) {
433 __vram_addr = xe_vram_region_page_to_dpa(vr, spage);
434 if (vram_addr == XE_VRAM_ADDR_INVALID) {
435 vram_addr = __vram_addr;
436 pos = i;
437 }
438
439 match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
440 }
441
442 /*
443 * Mismatched physical address, 8M copy chunk, or last page -
444 * trigger a copy.
445 */
446 if (!match || chunk || last) {
447 /*
448 * Extra page for first copy if last page and matching
449 * physical address.
450 */
451 int incr = (match && last) ? 1 : 0;
452
453 if (vram_addr != XE_VRAM_ADDR_INVALID) {
454 if (sram) {
455 vm_dbg(&tile->xe->drm,
456 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
457 vram_addr, (u64)dma_addr[pos], i - pos + incr);
458 __fence = xe_migrate_from_vram(tile->migrate,
459 i - pos + incr,
460 vram_addr,
461 dma_addr + pos);
462 } else {
463 vm_dbg(&tile->xe->drm,
464 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
465 (u64)dma_addr[pos], vram_addr, i - pos + incr);
466 __fence = xe_migrate_to_vram(tile->migrate,
467 i - pos + incr,
468 dma_addr + pos,
469 vram_addr);
470 }
471 if (IS_ERR(__fence)) {
472 err = PTR_ERR(__fence);
473 goto err_out;
474 }
475
476 dma_fence_put(fence);
477 fence = __fence;
478 }
479
480 /* Setup physical address of next device page */
481 if (dma_addr[i] && spage) {
482 vram_addr = __vram_addr;
483 pos = i;
484 } else {
485 vram_addr = XE_VRAM_ADDR_INVALID;
486 }
487
488 /* Extra mismatched device page, copy it */
489 if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
490 if (sram) {
491 vm_dbg(&tile->xe->drm,
492 "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
493 vram_addr, (u64)dma_addr[pos], 1);
494 __fence = xe_migrate_from_vram(tile->migrate, 1,
495 vram_addr,
496 dma_addr + pos);
497 } else {
498 vm_dbg(&tile->xe->drm,
499 "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
500 (u64)dma_addr[pos], vram_addr, 1);
501 __fence = xe_migrate_to_vram(tile->migrate, 1,
502 dma_addr + pos,
503 vram_addr);
504 }
505 if (IS_ERR(__fence)) {
506 err = PTR_ERR(__fence);
507 goto err_out;
508 }
509
510 dma_fence_put(fence);
511 fence = __fence;
512 }
513 }
514 }
515
516 err_out:
517 /* Wait for all copies to complete */
518 if (fence) {
519 dma_fence_wait(fence, false);
520 dma_fence_put(fence);
521 }
522
523 return err;
524 #undef XE_MIGRATE_CHUNK_SIZE
525 #undef XE_VRAM_ADDR_INVALID
526 }
527
xe_svm_copy_to_devmem(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages)528 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
529 unsigned long npages)
530 {
531 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
532 }
533
xe_svm_copy_to_ram(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages)534 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
535 unsigned long npages)
536 {
537 return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
538 }
539
to_xe_bo(struct drm_gpusvm_devmem * devmem_allocation)540 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation)
541 {
542 return container_of(devmem_allocation, struct xe_bo, devmem_allocation);
543 }
544
xe_svm_devmem_release(struct drm_gpusvm_devmem * devmem_allocation)545 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation)
546 {
547 struct xe_bo *bo = to_xe_bo(devmem_allocation);
548
549 xe_bo_put_async(bo);
550 }
551
block_offset_to_pfn(struct xe_vram_region * vr,u64 offset)552 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
553 {
554 return PHYS_PFN(offset + vr->hpa_base);
555 }
556
tile_to_buddy(struct xe_tile * tile)557 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile)
558 {
559 return &tile->mem.vram.ttm.mm;
560 }
561
xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem * devmem_allocation,unsigned long npages,unsigned long * pfn)562 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation,
563 unsigned long npages, unsigned long *pfn)
564 {
565 struct xe_bo *bo = to_xe_bo(devmem_allocation);
566 struct ttm_resource *res = bo->ttm.resource;
567 struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
568 struct drm_buddy_block *block;
569 int j = 0;
570
571 list_for_each_entry(block, blocks, link) {
572 struct xe_vram_region *vr = block->private;
573 struct xe_tile *tile = vr_to_tile(vr);
574 struct drm_buddy *buddy = tile_to_buddy(tile);
575 u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
576 int i;
577
578 for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
579 pfn[j++] = block_pfn + i;
580 }
581
582 return 0;
583 }
584
585 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
586 .devmem_release = xe_svm_devmem_release,
587 .populate_devmem_pfn = xe_svm_populate_devmem_pfn,
588 .copy_to_devmem = xe_svm_copy_to_devmem,
589 .copy_to_ram = xe_svm_copy_to_ram,
590 };
591
592 #endif
593
594 static const struct drm_gpusvm_ops gpusvm_ops = {
595 .range_alloc = xe_svm_range_alloc,
596 .range_free = xe_svm_range_free,
597 .invalidate = xe_svm_invalidate,
598 };
599
600 static const unsigned long fault_chunk_sizes[] = {
601 SZ_2M,
602 SZ_64K,
603 SZ_4K,
604 };
605
606 /**
607 * xe_svm_init() - SVM initialize
608 * @vm: The VM.
609 *
610 * Initialize SVM state which is embedded within the VM.
611 *
612 * Return: 0 on success, negative error code on error.
613 */
xe_svm_init(struct xe_vm * vm)614 int xe_svm_init(struct xe_vm *vm)
615 {
616 int err;
617
618 spin_lock_init(&vm->svm.garbage_collector.lock);
619 INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
620 INIT_WORK(&vm->svm.garbage_collector.work,
621 xe_svm_garbage_collector_work_func);
622
623 err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
624 current->mm, xe_svm_devm_owner(vm->xe), 0,
625 vm->size, xe_modparam.svm_notifier_size * SZ_1M,
626 &gpusvm_ops, fault_chunk_sizes,
627 ARRAY_SIZE(fault_chunk_sizes));
628 if (err)
629 return err;
630
631 drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
632
633 return 0;
634 }
635
636 /**
637 * xe_svm_close() - SVM close
638 * @vm: The VM.
639 *
640 * Close SVM state (i.e., stop and flush all SVM actions).
641 */
xe_svm_close(struct xe_vm * vm)642 void xe_svm_close(struct xe_vm *vm)
643 {
644 xe_assert(vm->xe, xe_vm_is_closed(vm));
645 flush_work(&vm->svm.garbage_collector.work);
646 }
647
648 /**
649 * xe_svm_fini() - SVM finalize
650 * @vm: The VM.
651 *
652 * Finalize SVM state which is embedded within the VM.
653 */
xe_svm_fini(struct xe_vm * vm)654 void xe_svm_fini(struct xe_vm *vm)
655 {
656 xe_assert(vm->xe, xe_vm_is_closed(vm));
657
658 drm_gpusvm_fini(&vm->svm.gpusvm);
659 }
660
xe_svm_range_is_valid(struct xe_svm_range * range,struct xe_tile * tile,bool devmem_only)661 static bool xe_svm_range_is_valid(struct xe_svm_range *range,
662 struct xe_tile *tile,
663 bool devmem_only)
664 {
665 /*
666 * Advisory only check whether the range currently has a valid mapping,
667 * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
668 */
669 return ((READ_ONCE(range->tile_present) &
670 ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
671 (!devmem_only || xe_svm_range_in_vram(range));
672 }
673
674 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
tile_to_vr(struct xe_tile * tile)675 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
676 {
677 return &tile->mem.vram;
678 }
679
xe_svm_alloc_vram(struct xe_vm * vm,struct xe_tile * tile,struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx)680 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
681 struct xe_svm_range *range,
682 const struct drm_gpusvm_ctx *ctx)
683 {
684 struct mm_struct *mm = vm->svm.gpusvm.mm;
685 struct xe_vram_region *vr = tile_to_vr(tile);
686 struct drm_buddy_block *block;
687 struct list_head *blocks;
688 struct xe_bo *bo;
689 ktime_t end = 0;
690 int err;
691
692 range_debug(range, "ALLOCATE VRAM");
693
694 if (!mmget_not_zero(mm))
695 return -EFAULT;
696 mmap_read_lock(mm);
697
698 retry:
699 bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL,
700 xe_svm_range_size(range),
701 ttm_bo_type_device,
702 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
703 XE_BO_FLAG_CPU_ADDR_MIRROR);
704 if (IS_ERR(bo)) {
705 err = PTR_ERR(bo);
706 if (xe_vm_validate_should_retry(NULL, err, &end))
707 goto retry;
708 goto unlock;
709 }
710
711 drm_gpusvm_devmem_init(&bo->devmem_allocation,
712 vm->xe->drm.dev, mm,
713 &gpusvm_devmem_ops,
714 &tile->mem.vram.dpagemap,
715 xe_svm_range_size(range));
716
717 blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
718 list_for_each_entry(block, blocks, link)
719 block->private = vr;
720
721 xe_bo_get(bo);
722 err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
723 &bo->devmem_allocation, ctx);
724 if (err)
725 xe_svm_devmem_release(&bo->devmem_allocation);
726
727 xe_bo_unlock(bo);
728 xe_bo_put(bo);
729
730 unlock:
731 mmap_read_unlock(mm);
732 mmput(mm);
733
734 return err;
735 }
736 #else
xe_svm_alloc_vram(struct xe_vm * vm,struct xe_tile * tile,struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx)737 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
738 struct xe_svm_range *range,
739 const struct drm_gpusvm_ctx *ctx)
740 {
741 return -EOPNOTSUPP;
742 }
743 #endif
744
supports_4K_migration(struct xe_device * xe)745 static bool supports_4K_migration(struct xe_device *xe)
746 {
747 if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
748 return false;
749
750 return true;
751 }
752
xe_svm_range_needs_migrate_to_vram(struct xe_svm_range * range,struct xe_vma * vma)753 static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
754 struct xe_vma *vma)
755 {
756 struct xe_vm *vm = range_to_vm(&range->base);
757 u64 range_size = xe_svm_range_size(range);
758
759 if (!range->base.flags.migrate_devmem)
760 return false;
761
762 if (xe_svm_range_in_vram(range)) {
763 drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
764 return false;
765 }
766
767 if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
768 drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
769 return false;
770 }
771
772 return true;
773 }
774
775 /**
776 * xe_svm_handle_pagefault() - SVM handle page fault
777 * @vm: The VM.
778 * @vma: The CPU address mirror VMA.
779 * @gt: The gt upon the fault occurred.
780 * @fault_addr: The GPU fault address.
781 * @atomic: The fault atomic access bit.
782 *
783 * Create GPU bindings for a SVM page fault. Optionally migrate to device
784 * memory.
785 *
786 * Return: 0 on success, negative error code on error.
787 */
xe_svm_handle_pagefault(struct xe_vm * vm,struct xe_vma * vma,struct xe_gt * gt,u64 fault_addr,bool atomic)788 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
789 struct xe_gt *gt, u64 fault_addr,
790 bool atomic)
791 {
792 struct drm_gpusvm_ctx ctx = {
793 .read_only = xe_vma_read_only(vma),
794 .devmem_possible = IS_DGFX(vm->xe) &&
795 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
796 .check_pages_threshold = IS_DGFX(vm->xe) &&
797 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
798 .devmem_only = atomic && IS_DGFX(vm->xe) &&
799 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
800 .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
801 IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
802 };
803 struct xe_svm_range *range;
804 struct drm_gpusvm_range *r;
805 struct drm_exec exec;
806 struct dma_fence *fence;
807 int migrate_try_count = ctx.devmem_only ? 3 : 1;
808 struct xe_tile *tile = gt_to_tile(gt);
809 ktime_t end = 0;
810 int err;
811
812 lockdep_assert_held_write(&vm->lock);
813 xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
814
815 xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
816
817 retry:
818 /* Always process UNMAPs first so view SVM ranges is current */
819 err = xe_svm_garbage_collector(vm);
820 if (err)
821 return err;
822
823 r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
824 xe_vma_start(vma), xe_vma_end(vma),
825 &ctx);
826 if (IS_ERR(r))
827 return PTR_ERR(r);
828
829 if (ctx.devmem_only && !r->flags.migrate_devmem)
830 return -EACCES;
831
832 range = to_xe_range(r);
833 if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
834 return 0;
835
836 range_debug(range, "PAGE FAULT");
837
838 if (--migrate_try_count >= 0 &&
839 xe_svm_range_needs_migrate_to_vram(range, vma)) {
840 err = xe_svm_alloc_vram(vm, tile, range, &ctx);
841 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
842 if (err) {
843 if (migrate_try_count || !ctx.devmem_only) {
844 drm_dbg(&vm->xe->drm,
845 "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
846 vm->usm.asid, ERR_PTR(err));
847 goto retry;
848 } else {
849 drm_err(&vm->xe->drm,
850 "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
851 vm->usm.asid, ERR_PTR(err));
852 return err;
853 }
854 }
855 }
856
857 range_debug(range, "GET PAGES");
858 err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
859 /* Corner where CPU mappings have changed */
860 if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
861 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
862 if (migrate_try_count > 0 || !ctx.devmem_only) {
863 if (err == -EOPNOTSUPP) {
864 range_debug(range, "PAGE FAULT - EVICT PAGES");
865 drm_gpusvm_range_evict(&vm->svm.gpusvm,
866 &range->base);
867 }
868 drm_dbg(&vm->xe->drm,
869 "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
870 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
871 range_debug(range, "PAGE FAULT - RETRY PAGES");
872 goto retry;
873 } else {
874 drm_err(&vm->xe->drm,
875 "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
876 vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
877 }
878 }
879 if (err) {
880 range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
881 goto err_out;
882 }
883
884 range_debug(range, "PAGE FAULT - BIND");
885
886 retry_bind:
887 drm_exec_init(&exec, 0, 0);
888 drm_exec_until_all_locked(&exec) {
889 err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj);
890 drm_exec_retry_on_contention(&exec);
891 if (err) {
892 drm_exec_fini(&exec);
893 goto err_out;
894 }
895
896 fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
897 if (IS_ERR(fence)) {
898 drm_exec_fini(&exec);
899 err = PTR_ERR(fence);
900 if (err == -EAGAIN) {
901 ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
902 range_debug(range, "PAGE FAULT - RETRY BIND");
903 goto retry;
904 }
905 if (xe_vm_validate_should_retry(&exec, err, &end))
906 goto retry_bind;
907 goto err_out;
908 }
909 }
910 drm_exec_fini(&exec);
911
912 dma_fence_wait(fence, false);
913 dma_fence_put(fence);
914
915 err_out:
916
917 return err;
918 }
919
920 /**
921 * xe_svm_has_mapping() - SVM has mappings
922 * @vm: The VM.
923 * @start: Start address.
924 * @end: End address.
925 *
926 * Check if an address range has SVM mappings.
927 *
928 * Return: True if address range has a SVM mapping, False otherwise
929 */
xe_svm_has_mapping(struct xe_vm * vm,u64 start,u64 end)930 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
931 {
932 return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
933 }
934
935 /**
936 * xe_svm_bo_evict() - SVM evict BO to system memory
937 * @bo: BO to evict
938 *
939 * SVM evict BO to system memory. GPU SVM layer ensures all device pages
940 * are evicted before returning.
941 *
942 * Return: 0 on success standard error code otherwise
943 */
xe_svm_bo_evict(struct xe_bo * bo)944 int xe_svm_bo_evict(struct xe_bo *bo)
945 {
946 return drm_gpusvm_evict_to_ram(&bo->devmem_allocation);
947 }
948
949 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
950
951 static struct drm_pagemap_device_addr
xe_drm_pagemap_device_map(struct drm_pagemap * dpagemap,struct device * dev,struct page * page,unsigned int order,enum dma_data_direction dir)952 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
953 struct device *dev,
954 struct page *page,
955 unsigned int order,
956 enum dma_data_direction dir)
957 {
958 struct device *pgmap_dev = dpagemap->dev;
959 enum drm_interconnect_protocol prot;
960 dma_addr_t addr;
961
962 if (pgmap_dev == dev) {
963 addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
964 prot = XE_INTERCONNECT_VRAM;
965 } else {
966 addr = DMA_MAPPING_ERROR;
967 prot = 0;
968 }
969
970 return drm_pagemap_device_addr_encode(addr, prot, order, dir);
971 }
972
973 static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
974 .device_map = xe_drm_pagemap_device_map,
975 };
976
977 /**
978 * xe_devm_add: Remap and provide memmap backing for device memory
979 * @tile: tile that the memory region belongs to
980 * @vr: vram memory region to remap
981 *
982 * This remap device memory to host physical address space and create
983 * struct page to back device memory
984 *
985 * Return: 0 on success standard error code otherwise
986 */
xe_devm_add(struct xe_tile * tile,struct xe_vram_region * vr)987 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
988 {
989 struct xe_device *xe = tile_to_xe(tile);
990 struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
991 struct resource *res;
992 void *addr;
993 int ret;
994
995 res = devm_request_free_mem_region(dev, &iomem_resource,
996 vr->usable_size);
997 if (IS_ERR(res)) {
998 ret = PTR_ERR(res);
999 return ret;
1000 }
1001
1002 vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
1003 vr->pagemap.range.start = res->start;
1004 vr->pagemap.range.end = res->end;
1005 vr->pagemap.nr_range = 1;
1006 vr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
1007 vr->pagemap.owner = xe_svm_devm_owner(xe);
1008 addr = devm_memremap_pages(dev, &vr->pagemap);
1009
1010 vr->dpagemap.dev = dev;
1011 vr->dpagemap.ops = &xe_drm_pagemap_ops;
1012
1013 if (IS_ERR(addr)) {
1014 devm_release_mem_region(dev, res->start, resource_size(res));
1015 ret = PTR_ERR(addr);
1016 drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
1017 tile->id, ERR_PTR(ret));
1018 return ret;
1019 }
1020 vr->hpa_base = res->start;
1021
1022 drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
1023 tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
1024 return 0;
1025 }
1026 #else
xe_devm_add(struct xe_tile * tile,struct xe_vram_region * vr)1027 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
1028 {
1029 return 0;
1030 }
1031 #endif
1032
1033 /**
1034 * xe_svm_flush() - SVM flush
1035 * @vm: The VM.
1036 *
1037 * Flush all SVM actions.
1038 */
xe_svm_flush(struct xe_vm * vm)1039 void xe_svm_flush(struct xe_vm *vm)
1040 {
1041 if (xe_vm_in_fault_mode(vm))
1042 flush_work(&vm->svm.garbage_collector.work);
1043 }
1044