xref: /linux/drivers/gpu/drm/xe/xe_svm.c (revision 28472374291c380c22f40deec07a90d09bcbffb6)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 #include "xe_gt_stats.h"
8 #include "xe_gt_tlb_invalidation.h"
9 #include "xe_migrate.h"
10 #include "xe_module.h"
11 #include "xe_pt.h"
12 #include "xe_svm.h"
13 #include "xe_ttm_vram_mgr.h"
14 #include "xe_vm.h"
15 #include "xe_vm_types.h"
16 
17 static bool xe_svm_range_in_vram(struct xe_svm_range *range)
18 {
19 	/* Not reliable without notifier lock */
20 	return range->base.flags.has_devmem_pages;
21 }
22 
23 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
24 {
25 	/* Not reliable without notifier lock */
26 	return xe_svm_range_in_vram(range) && range->tile_present;
27 }
28 
29 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm)
30 {
31 	return container_of(gpusvm, struct xe_vm, svm.gpusvm);
32 }
33 
34 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
35 {
36 	return gpusvm_to_vm(r->gpusvm);
37 }
38 
39 static unsigned long xe_svm_range_start(struct xe_svm_range *range)
40 {
41 	return drm_gpusvm_range_start(&range->base);
42 }
43 
44 static unsigned long xe_svm_range_end(struct xe_svm_range *range)
45 {
46 	return drm_gpusvm_range_end(&range->base);
47 }
48 
49 static unsigned long xe_svm_range_size(struct xe_svm_range *range)
50 {
51 	return drm_gpusvm_range_size(&range->base);
52 }
53 
54 #define range_debug(r__, operaton__)					\
55 	vm_dbg(&range_to_vm(&(r__)->base)->xe->drm,			\
56 	       "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
57 	       "start=0x%014lx, end=0x%014lx, size=%lu",		\
58 	       (operaton__), range_to_vm(&(r__)->base)->usm.asid,	\
59 	       (r__)->base.gpusvm,					\
60 	       xe_svm_range_in_vram((r__)) ? 1 : 0,			\
61 	       xe_svm_range_has_vram_binding((r__)) ? 1 : 0,		\
62 	       (r__)->base.notifier_seq,				\
63 	       xe_svm_range_start((r__)), xe_svm_range_end((r__)),	\
64 	       xe_svm_range_size((r__)))
65 
66 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
67 {
68 	range_debug(range, operation);
69 }
70 
71 static void *xe_svm_devm_owner(struct xe_device *xe)
72 {
73 	return xe;
74 }
75 
76 static struct drm_gpusvm_range *
77 xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
78 {
79 	struct xe_svm_range *range;
80 
81 	range = kzalloc(sizeof(*range), GFP_KERNEL);
82 	if (!range)
83 		return NULL;
84 
85 	INIT_LIST_HEAD(&range->garbage_collector_link);
86 	xe_vm_get(gpusvm_to_vm(gpusvm));
87 
88 	return &range->base;
89 }
90 
91 static void xe_svm_range_free(struct drm_gpusvm_range *range)
92 {
93 	xe_vm_put(range_to_vm(range));
94 	kfree(range);
95 }
96 
97 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
98 {
99 	return container_of(r, struct xe_svm_range, base);
100 }
101 
102 static void
103 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
104 				   const struct mmu_notifier_range *mmu_range)
105 {
106 	struct xe_device *xe = vm->xe;
107 
108 	range_debug(range, "GARBAGE COLLECTOR ADD");
109 
110 	drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
111 
112 	spin_lock(&vm->svm.garbage_collector.lock);
113 	if (list_empty(&range->garbage_collector_link))
114 		list_add_tail(&range->garbage_collector_link,
115 			      &vm->svm.garbage_collector.range_list);
116 	spin_unlock(&vm->svm.garbage_collector.lock);
117 
118 	queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
119 		   &vm->svm.garbage_collector.work);
120 }
121 
122 static u8
123 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
124 				  const struct mmu_notifier_range *mmu_range,
125 				  u64 *adj_start, u64 *adj_end)
126 {
127 	struct xe_svm_range *range = to_xe_range(r);
128 	struct xe_device *xe = vm->xe;
129 	struct xe_tile *tile;
130 	u8 tile_mask = 0;
131 	u8 id;
132 
133 	xe_svm_assert_in_notifier(vm);
134 
135 	range_debug(range, "NOTIFIER");
136 
137 	/* Skip if already unmapped or if no binding exist */
138 	if (range->base.flags.unmapped || !range->tile_present)
139 		return 0;
140 
141 	range_debug(range, "NOTIFIER - EXECUTE");
142 
143 	/* Adjust invalidation to range boundaries */
144 	*adj_start = min(xe_svm_range_start(range), mmu_range->start);
145 	*adj_end = max(xe_svm_range_end(range), mmu_range->end);
146 
147 	/*
148 	 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the
149 	 * invalidation code can't correctly cope with sparse ranges or
150 	 * invalidations spanning multiple ranges.
151 	 */
152 	for_each_tile(tile, xe, id)
153 		if (xe_pt_zap_ptes_range(tile, vm, range)) {
154 			tile_mask |= BIT(id);
155 			range->tile_invalidated |= BIT(id);
156 		}
157 
158 	return tile_mask;
159 }
160 
161 static void
162 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
163 				const struct mmu_notifier_range *mmu_range)
164 {
165 	struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
166 
167 	xe_svm_assert_in_notifier(vm);
168 
169 	drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
170 	if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
171 		xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
172 						   mmu_range);
173 }
174 
175 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
176 			      struct drm_gpusvm_notifier *notifier,
177 			      const struct mmu_notifier_range *mmu_range)
178 {
179 	struct xe_vm *vm = gpusvm_to_vm(gpusvm);
180 	struct xe_device *xe = vm->xe;
181 	struct xe_tile *tile;
182 	struct drm_gpusvm_range *r, *first;
183 	struct xe_gt_tlb_invalidation_fence
184 		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
185 	u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
186 	u8 tile_mask = 0;
187 	u8 id;
188 	u32 fence_id = 0;
189 	long err;
190 
191 	xe_svm_assert_in_notifier(vm);
192 
193 	vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm,
194 	       "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d",
195 	       vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq,
196 	       mmu_range->start, mmu_range->end, mmu_range->event);
197 
198 	/* Adjust invalidation to notifier boundaries */
199 	adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start);
200 	adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end);
201 
202 	first = drm_gpusvm_range_find(notifier, adj_start, adj_end);
203 	if (!first)
204 		return;
205 
206 	/*
207 	 * PTs may be getting destroyed so not safe to touch these but PT should
208 	 * be invalidated at this point in time. Regardless we still need to
209 	 * ensure any dma mappings are unmapped in the here.
210 	 */
211 	if (xe_vm_is_closed(vm))
212 		goto range_notifier_event_end;
213 
214 	/*
215 	 * XXX: Less than ideal to always wait on VM's resv slots if an
216 	 * invalidation is not required. Could walk range list twice to figure
217 	 * out if an invalidations is need, but also not ideal.
218 	 */
219 	err = dma_resv_wait_timeout(xe_vm_resv(vm),
220 				    DMA_RESV_USAGE_BOOKKEEP,
221 				    false, MAX_SCHEDULE_TIMEOUT);
222 	XE_WARN_ON(err <= 0);
223 
224 	r = first;
225 	drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
226 		tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
227 							       &adj_start,
228 							       &adj_end);
229 	if (!tile_mask)
230 		goto range_notifier_event_end;
231 
232 	xe_device_wmb(xe);
233 
234 	for_each_tile(tile, xe, id) {
235 		if (tile_mask & BIT(id)) {
236 			int err;
237 
238 			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
239 							  &fence[fence_id], true);
240 
241 			err = xe_gt_tlb_invalidation_range(tile->primary_gt,
242 							   &fence[fence_id],
243 							   adj_start,
244 							   adj_end,
245 							   vm->usm.asid);
246 			if (WARN_ON_ONCE(err < 0))
247 				goto wait;
248 			++fence_id;
249 
250 			if (!tile->media_gt)
251 				continue;
252 
253 			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
254 							  &fence[fence_id], true);
255 
256 			err = xe_gt_tlb_invalidation_range(tile->media_gt,
257 							   &fence[fence_id],
258 							   adj_start,
259 							   adj_end,
260 							   vm->usm.asid);
261 			if (WARN_ON_ONCE(err < 0))
262 				goto wait;
263 			++fence_id;
264 		}
265 	}
266 
267 wait:
268 	for (id = 0; id < fence_id; ++id)
269 		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
270 
271 range_notifier_event_end:
272 	r = first;
273 	drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
274 		xe_svm_range_notifier_event_end(vm, r, mmu_range);
275 }
276 
277 static int __xe_svm_garbage_collector(struct xe_vm *vm,
278 				      struct xe_svm_range *range)
279 {
280 	struct dma_fence *fence;
281 
282 	range_debug(range, "GARBAGE COLLECTOR");
283 
284 	xe_vm_lock(vm, false);
285 	fence = xe_vm_range_unbind(vm, range);
286 	xe_vm_unlock(vm);
287 	if (IS_ERR(fence))
288 		return PTR_ERR(fence);
289 	dma_fence_put(fence);
290 
291 	drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
292 
293 	return 0;
294 }
295 
296 static int xe_svm_garbage_collector(struct xe_vm *vm)
297 {
298 	struct xe_svm_range *range;
299 	int err;
300 
301 	lockdep_assert_held_write(&vm->lock);
302 
303 	if (xe_vm_is_closed_or_banned(vm))
304 		return -ENOENT;
305 
306 	spin_lock(&vm->svm.garbage_collector.lock);
307 	for (;;) {
308 		range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
309 						 typeof(*range),
310 						 garbage_collector_link);
311 		if (!range)
312 			break;
313 
314 		list_del(&range->garbage_collector_link);
315 		spin_unlock(&vm->svm.garbage_collector.lock);
316 
317 		err = __xe_svm_garbage_collector(vm, range);
318 		if (err) {
319 			drm_warn(&vm->xe->drm,
320 				 "Garbage collection failed: %pe\n",
321 				 ERR_PTR(err));
322 			xe_vm_kill(vm, true);
323 			return err;
324 		}
325 
326 		spin_lock(&vm->svm.garbage_collector.lock);
327 	}
328 	spin_unlock(&vm->svm.garbage_collector.lock);
329 
330 	return 0;
331 }
332 
333 static void xe_svm_garbage_collector_work_func(struct work_struct *w)
334 {
335 	struct xe_vm *vm = container_of(w, struct xe_vm,
336 					svm.garbage_collector.work);
337 
338 	down_write(&vm->lock);
339 	xe_svm_garbage_collector(vm);
340 	up_write(&vm->lock);
341 }
342 
343 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
344 
345 static struct xe_vram_region *page_to_vr(struct page *page)
346 {
347 	return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
348 }
349 
350 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr)
351 {
352 	return container_of(vr, struct xe_tile, mem.vram);
353 }
354 
355 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
356 				      struct page *page)
357 {
358 	u64 dpa;
359 	struct xe_tile *tile = vr_to_tile(vr);
360 	u64 pfn = page_to_pfn(page);
361 	u64 offset;
362 
363 	xe_tile_assert(tile, is_device_private_page(page));
364 	xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base);
365 
366 	offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
367 	dpa = vr->dpa_base + offset;
368 
369 	return dpa;
370 }
371 
372 enum xe_svm_copy_dir {
373 	XE_SVM_COPY_TO_VRAM,
374 	XE_SVM_COPY_TO_SRAM,
375 };
376 
377 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
378 		       unsigned long npages, const enum xe_svm_copy_dir dir)
379 {
380 	struct xe_vram_region *vr = NULL;
381 	struct xe_tile *tile;
382 	struct dma_fence *fence = NULL;
383 	unsigned long i;
384 #define XE_VRAM_ADDR_INVALID	~0x0ull
385 	u64 vram_addr = XE_VRAM_ADDR_INVALID;
386 	int err = 0, pos = 0;
387 	bool sram = dir == XE_SVM_COPY_TO_SRAM;
388 
389 	/*
390 	 * This flow is complex: it locates physically contiguous device pages,
391 	 * derives the starting physical address, and performs a single GPU copy
392 	 * to for every 8M chunk in a DMA address array. Both device pages and
393 	 * DMA addresses may be sparsely populated. If either is NULL, a copy is
394 	 * triggered based on the current search state. The last GPU copy is
395 	 * waited on to ensure all copies are complete.
396 	 */
397 
398 	for (i = 0; i < npages; ++i) {
399 		struct page *spage = pages[i];
400 		struct dma_fence *__fence;
401 		u64 __vram_addr;
402 		bool match = false, chunk, last;
403 
404 #define XE_MIGRATE_CHUNK_SIZE	SZ_8M
405 		chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
406 		last = (i + 1) == npages;
407 
408 		/* No CPU page and no device pages queue'd to copy */
409 		if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
410 			continue;
411 
412 		if (!vr && spage) {
413 			vr = page_to_vr(spage);
414 			tile = vr_to_tile(vr);
415 		}
416 		XE_WARN_ON(spage && page_to_vr(spage) != vr);
417 
418 		/*
419 		 * CPU page and device page valid, capture physical address on
420 		 * first device page, check if physical contiguous on subsequent
421 		 * device pages.
422 		 */
423 		if (dma_addr[i] && spage) {
424 			__vram_addr = xe_vram_region_page_to_dpa(vr, spage);
425 			if (vram_addr == XE_VRAM_ADDR_INVALID) {
426 				vram_addr = __vram_addr;
427 				pos = i;
428 			}
429 
430 			match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
431 		}
432 
433 		/*
434 		 * Mismatched physical address, 8M copy chunk, or last page -
435 		 * trigger a copy.
436 		 */
437 		if (!match || chunk || last) {
438 			/*
439 			 * Extra page for first copy if last page and matching
440 			 * physical address.
441 			 */
442 			int incr = (match && last) ? 1 : 0;
443 
444 			if (vram_addr != XE_VRAM_ADDR_INVALID) {
445 				if (sram) {
446 					vm_dbg(&tile->xe->drm,
447 					       "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
448 					       vram_addr, (u64)dma_addr[pos], i - pos + incr);
449 					__fence = xe_migrate_from_vram(tile->migrate,
450 								       i - pos + incr,
451 								       vram_addr,
452 								       dma_addr + pos);
453 				} else {
454 					vm_dbg(&tile->xe->drm,
455 					       "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
456 					       (u64)dma_addr[pos], vram_addr, i - pos + incr);
457 					__fence = xe_migrate_to_vram(tile->migrate,
458 								     i - pos + incr,
459 								     dma_addr + pos,
460 								     vram_addr);
461 				}
462 				if (IS_ERR(__fence)) {
463 					err = PTR_ERR(__fence);
464 					goto err_out;
465 				}
466 
467 				dma_fence_put(fence);
468 				fence = __fence;
469 			}
470 
471 			/* Setup physical address of next device page */
472 			if (dma_addr[i] && spage) {
473 				vram_addr = __vram_addr;
474 				pos = i;
475 			} else {
476 				vram_addr = XE_VRAM_ADDR_INVALID;
477 			}
478 
479 			/* Extra mismatched device page, copy it */
480 			if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
481 				if (sram) {
482 					vm_dbg(&tile->xe->drm,
483 					       "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
484 					       vram_addr, (u64)dma_addr[pos], 1);
485 					__fence = xe_migrate_from_vram(tile->migrate, 1,
486 								       vram_addr,
487 								       dma_addr + pos);
488 				} else {
489 					vm_dbg(&tile->xe->drm,
490 					       "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
491 					       (u64)dma_addr[pos], vram_addr, 1);
492 					__fence = xe_migrate_to_vram(tile->migrate, 1,
493 								     dma_addr + pos,
494 								     vram_addr);
495 				}
496 				if (IS_ERR(__fence)) {
497 					err = PTR_ERR(__fence);
498 					goto err_out;
499 				}
500 
501 				dma_fence_put(fence);
502 				fence = __fence;
503 			}
504 		}
505 	}
506 
507 err_out:
508 	/* Wait for all copies to complete */
509 	if (fence) {
510 		dma_fence_wait(fence, false);
511 		dma_fence_put(fence);
512 	}
513 
514 	return err;
515 #undef XE_MIGRATE_CHUNK_SIZE
516 #undef XE_VRAM_ADDR_INVALID
517 }
518 
519 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
520 				 unsigned long npages)
521 {
522 	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
523 }
524 
525 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
526 			      unsigned long npages)
527 {
528 	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
529 }
530 
531 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation)
532 {
533 	return container_of(devmem_allocation, struct xe_bo, devmem_allocation);
534 }
535 
536 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation)
537 {
538 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
539 
540 	xe_bo_put_async(bo);
541 }
542 
543 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
544 {
545 	return PHYS_PFN(offset + vr->hpa_base);
546 }
547 
548 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile)
549 {
550 	return &tile->mem.vram.ttm.mm;
551 }
552 
553 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation,
554 				      unsigned long npages, unsigned long *pfn)
555 {
556 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
557 	struct ttm_resource *res = bo->ttm.resource;
558 	struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
559 	struct drm_buddy_block *block;
560 	int j = 0;
561 
562 	list_for_each_entry(block, blocks, link) {
563 		struct xe_vram_region *vr = block->private;
564 		struct xe_tile *tile = vr_to_tile(vr);
565 		struct drm_buddy *buddy = tile_to_buddy(tile);
566 		u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
567 		int i;
568 
569 		for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
570 			pfn[j++] = block_pfn + i;
571 	}
572 
573 	return 0;
574 }
575 
576 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
577 	.devmem_release = xe_svm_devmem_release,
578 	.populate_devmem_pfn = xe_svm_populate_devmem_pfn,
579 	.copy_to_devmem = xe_svm_copy_to_devmem,
580 	.copy_to_ram = xe_svm_copy_to_ram,
581 };
582 
583 #endif
584 
585 static const struct drm_gpusvm_ops gpusvm_ops = {
586 	.range_alloc = xe_svm_range_alloc,
587 	.range_free = xe_svm_range_free,
588 	.invalidate = xe_svm_invalidate,
589 };
590 
591 static const unsigned long fault_chunk_sizes[] = {
592 	SZ_2M,
593 	SZ_64K,
594 	SZ_4K,
595 };
596 
597 /**
598  * xe_svm_init() - SVM initialize
599  * @vm: The VM.
600  *
601  * Initialize SVM state which is embedded within the VM.
602  *
603  * Return: 0 on success, negative error code on error.
604  */
605 int xe_svm_init(struct xe_vm *vm)
606 {
607 	int err;
608 
609 	spin_lock_init(&vm->svm.garbage_collector.lock);
610 	INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
611 	INIT_WORK(&vm->svm.garbage_collector.work,
612 		  xe_svm_garbage_collector_work_func);
613 
614 	err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
615 			      current->mm, xe_svm_devm_owner(vm->xe), 0,
616 			      vm->size, xe_modparam.svm_notifier_size * SZ_1M,
617 			      &gpusvm_ops, fault_chunk_sizes,
618 			      ARRAY_SIZE(fault_chunk_sizes));
619 	if (err)
620 		return err;
621 
622 	drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
623 
624 	return 0;
625 }
626 
627 /**
628  * xe_svm_close() - SVM close
629  * @vm: The VM.
630  *
631  * Close SVM state (i.e., stop and flush all SVM actions).
632  */
633 void xe_svm_close(struct xe_vm *vm)
634 {
635 	xe_assert(vm->xe, xe_vm_is_closed(vm));
636 	flush_work(&vm->svm.garbage_collector.work);
637 }
638 
639 /**
640  * xe_svm_fini() - SVM finalize
641  * @vm: The VM.
642  *
643  * Finalize SVM state which is embedded within the VM.
644  */
645 void xe_svm_fini(struct xe_vm *vm)
646 {
647 	xe_assert(vm->xe, xe_vm_is_closed(vm));
648 
649 	drm_gpusvm_fini(&vm->svm.gpusvm);
650 }
651 
652 static bool xe_svm_range_is_valid(struct xe_svm_range *range,
653 				  struct xe_tile *tile)
654 {
655 	return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
656 }
657 
658 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
659 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
660 {
661 	return &tile->mem.vram;
662 }
663 
664 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
665 			     struct xe_svm_range *range,
666 			     const struct drm_gpusvm_ctx *ctx)
667 {
668 	struct mm_struct *mm = vm->svm.gpusvm.mm;
669 	struct xe_vram_region *vr = tile_to_vr(tile);
670 	struct drm_buddy_block *block;
671 	struct list_head *blocks;
672 	struct xe_bo *bo;
673 	ktime_t end = 0;
674 	int err;
675 
676 	range_debug(range, "ALLOCATE VRAM");
677 
678 	if (!mmget_not_zero(mm))
679 		return -EFAULT;
680 	mmap_read_lock(mm);
681 
682 retry:
683 	bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL,
684 				 xe_svm_range_size(range),
685 				 ttm_bo_type_device,
686 				 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
687 				 XE_BO_FLAG_CPU_ADDR_MIRROR);
688 	if (IS_ERR(bo)) {
689 		err = PTR_ERR(bo);
690 		if (xe_vm_validate_should_retry(NULL, err, &end))
691 			goto retry;
692 		goto unlock;
693 	}
694 
695 	drm_gpusvm_devmem_init(&bo->devmem_allocation,
696 			       vm->xe->drm.dev, mm,
697 			       &gpusvm_devmem_ops,
698 			       &tile->mem.vram.dpagemap,
699 			       xe_svm_range_size(range));
700 
701 	blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
702 	list_for_each_entry(block, blocks, link)
703 		block->private = vr;
704 
705 	xe_bo_get(bo);
706 	err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
707 					   &bo->devmem_allocation, ctx);
708 	if (err)
709 		xe_svm_devmem_release(&bo->devmem_allocation);
710 
711 	xe_bo_unlock(bo);
712 	xe_bo_put(bo);
713 
714 unlock:
715 	mmap_read_unlock(mm);
716 	mmput(mm);
717 
718 	return err;
719 }
720 #else
721 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
722 			     struct xe_svm_range *range,
723 			     const struct drm_gpusvm_ctx *ctx)
724 {
725 	return -EOPNOTSUPP;
726 }
727 #endif
728 
729 /**
730  * xe_svm_handle_pagefault() - SVM handle page fault
731  * @vm: The VM.
732  * @vma: The CPU address mirror VMA.
733  * @gt: The gt upon the fault occurred.
734  * @fault_addr: The GPU fault address.
735  * @atomic: The fault atomic access bit.
736  *
737  * Create GPU bindings for a SVM page fault. Optionally migrate to device
738  * memory.
739  *
740  * Return: 0 on success, negative error code on error.
741  */
742 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
743 			    struct xe_gt *gt, u64 fault_addr,
744 			    bool atomic)
745 {
746 	struct drm_gpusvm_ctx ctx = {
747 		.read_only = xe_vma_read_only(vma),
748 		.devmem_possible = IS_DGFX(vm->xe) &&
749 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
750 		.check_pages_threshold = IS_DGFX(vm->xe) &&
751 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
752 	};
753 	struct xe_svm_range *range;
754 	struct drm_gpusvm_range *r;
755 	struct drm_exec exec;
756 	struct dma_fence *fence;
757 	struct xe_tile *tile = gt_to_tile(gt);
758 	ktime_t end = 0;
759 	int err;
760 
761 	lockdep_assert_held_write(&vm->lock);
762 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
763 
764 	xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
765 
766 retry:
767 	/* Always process UNMAPs first so view SVM ranges is current */
768 	err = xe_svm_garbage_collector(vm);
769 	if (err)
770 		return err;
771 
772 	r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
773 					    xe_vma_start(vma), xe_vma_end(vma),
774 					    &ctx);
775 	if (IS_ERR(r))
776 		return PTR_ERR(r);
777 
778 	range = to_xe_range(r);
779 	if (xe_svm_range_is_valid(range, tile))
780 		return 0;
781 
782 	range_debug(range, "PAGE FAULT");
783 
784 	/* XXX: Add migration policy, for now migrate range once */
785 	if (!range->skip_migrate && range->base.flags.migrate_devmem &&
786 	    xe_svm_range_size(range) >= SZ_64K) {
787 		range->skip_migrate = true;
788 
789 		err = xe_svm_alloc_vram(vm, tile, range, &ctx);
790 		if (err) {
791 			drm_dbg(&vm->xe->drm,
792 				"VRAM allocation failed, falling back to "
793 				"retrying fault, asid=%u, errno=%pe\n",
794 				vm->usm.asid, ERR_PTR(err));
795 			goto retry;
796 		}
797 	}
798 
799 	range_debug(range, "GET PAGES");
800 	err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
801 	/* Corner where CPU mappings have changed */
802 	if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
803 		if (err == -EOPNOTSUPP) {
804 			range_debug(range, "PAGE FAULT - EVICT PAGES");
805 			drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
806 		}
807 		drm_dbg(&vm->xe->drm,
808 			"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
809 			vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
810 		range_debug(range, "PAGE FAULT - RETRY PAGES");
811 		goto retry;
812 	}
813 	if (err) {
814 		range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
815 		goto err_out;
816 	}
817 
818 	range_debug(range, "PAGE FAULT - BIND");
819 
820 retry_bind:
821 	drm_exec_init(&exec, 0, 0);
822 	drm_exec_until_all_locked(&exec) {
823 		err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj);
824 		drm_exec_retry_on_contention(&exec);
825 		if (err) {
826 			drm_exec_fini(&exec);
827 			goto err_out;
828 		}
829 
830 		fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
831 		if (IS_ERR(fence)) {
832 			drm_exec_fini(&exec);
833 			err = PTR_ERR(fence);
834 			if (err == -EAGAIN) {
835 				range_debug(range, "PAGE FAULT - RETRY BIND");
836 				goto retry;
837 			}
838 			if (xe_vm_validate_should_retry(&exec, err, &end))
839 				goto retry_bind;
840 			goto err_out;
841 		}
842 	}
843 	drm_exec_fini(&exec);
844 
845 	if (xe_modparam.always_migrate_to_vram)
846 		range->skip_migrate = false;
847 
848 	dma_fence_wait(fence, false);
849 	dma_fence_put(fence);
850 
851 err_out:
852 
853 	return err;
854 }
855 
856 /**
857  * xe_svm_has_mapping() - SVM has mappings
858  * @vm: The VM.
859  * @start: Start address.
860  * @end: End address.
861  *
862  * Check if an address range has SVM mappings.
863  *
864  * Return: True if address range has a SVM mapping, False otherwise
865  */
866 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
867 {
868 	return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
869 }
870 
871 /**
872  * xe_svm_bo_evict() - SVM evict BO to system memory
873  * @bo: BO to evict
874  *
875  * SVM evict BO to system memory. GPU SVM layer ensures all device pages
876  * are evicted before returning.
877  *
878  * Return: 0 on success standard error code otherwise
879  */
880 int xe_svm_bo_evict(struct xe_bo *bo)
881 {
882 	return drm_gpusvm_evict_to_ram(&bo->devmem_allocation);
883 }
884 
885 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
886 
887 static struct drm_pagemap_device_addr
888 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
889 			  struct device *dev,
890 			  struct page *page,
891 			  unsigned int order,
892 			  enum dma_data_direction dir)
893 {
894 	struct device *pgmap_dev = dpagemap->dev;
895 	enum drm_interconnect_protocol prot;
896 	dma_addr_t addr;
897 
898 	if (pgmap_dev == dev) {
899 		addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
900 		prot = XE_INTERCONNECT_VRAM;
901 	} else {
902 		addr = DMA_MAPPING_ERROR;
903 		prot = 0;
904 	}
905 
906 	return drm_pagemap_device_addr_encode(addr, prot, order, dir);
907 }
908 
909 static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
910 	.device_map = xe_drm_pagemap_device_map,
911 };
912 
913 /**
914  * xe_devm_add: Remap and provide memmap backing for device memory
915  * @tile: tile that the memory region belongs to
916  * @vr: vram memory region to remap
917  *
918  * This remap device memory to host physical address space and create
919  * struct page to back device memory
920  *
921  * Return: 0 on success standard error code otherwise
922  */
923 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
924 {
925 	struct xe_device *xe = tile_to_xe(tile);
926 	struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
927 	struct resource *res;
928 	void *addr;
929 	int ret;
930 
931 	res = devm_request_free_mem_region(dev, &iomem_resource,
932 					   vr->usable_size);
933 	if (IS_ERR(res)) {
934 		ret = PTR_ERR(res);
935 		return ret;
936 	}
937 
938 	vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
939 	vr->pagemap.range.start = res->start;
940 	vr->pagemap.range.end = res->end;
941 	vr->pagemap.nr_range = 1;
942 	vr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
943 	vr->pagemap.owner = xe_svm_devm_owner(xe);
944 	addr = devm_memremap_pages(dev, &vr->pagemap);
945 
946 	vr->dpagemap.dev = dev;
947 	vr->dpagemap.ops = &xe_drm_pagemap_ops;
948 
949 	if (IS_ERR(addr)) {
950 		devm_release_mem_region(dev, res->start, resource_size(res));
951 		ret = PTR_ERR(addr);
952 		drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
953 			tile->id, ERR_PTR(ret));
954 		return ret;
955 	}
956 	vr->hpa_base = res->start;
957 
958 	drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
959 		tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
960 	return 0;
961 }
962 #else
963 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
964 {
965 	return 0;
966 }
967 #endif
968 
969 /**
970  * xe_svm_flush() - SVM flush
971  * @vm: The VM.
972  *
973  * Flush all SVM actions.
974  */
975 void xe_svm_flush(struct xe_vm *vm)
976 {
977 	if (xe_vm_in_fault_mode(vm))
978 		flush_work(&vm->svm.garbage_collector.work);
979 }
980