xref: /linux/drivers/gpu/drm/xe/xe_svm.c (revision 25294cb8a404e8116eecaf2f151ee2fd6c17fb9b)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 #include "xe_gt_stats.h"
8 #include "xe_gt_tlb_invalidation.h"
9 #include "xe_migrate.h"
10 #include "xe_module.h"
11 #include "xe_pt.h"
12 #include "xe_svm.h"
13 #include "xe_ttm_vram_mgr.h"
14 #include "xe_vm.h"
15 #include "xe_vm_types.h"
16 
xe_svm_range_in_vram(struct xe_svm_range * range)17 static bool xe_svm_range_in_vram(struct xe_svm_range *range)
18 {
19 	/*
20 	 * Advisory only check whether the range is currently backed by VRAM
21 	 * memory.
22 	 */
23 
24 	struct drm_gpusvm_range_flags flags = {
25 		/* Pairs with WRITE_ONCE in drm_gpusvm.c */
26 		.__flags = READ_ONCE(range->base.flags.__flags),
27 	};
28 
29 	return flags.has_devmem_pages;
30 }
31 
xe_svm_range_has_vram_binding(struct xe_svm_range * range)32 static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
33 {
34 	/* Not reliable without notifier lock */
35 	return xe_svm_range_in_vram(range) && range->tile_present;
36 }
37 
gpusvm_to_vm(struct drm_gpusvm * gpusvm)38 static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm)
39 {
40 	return container_of(gpusvm, struct xe_vm, svm.gpusvm);
41 }
42 
range_to_vm(struct drm_gpusvm_range * r)43 static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
44 {
45 	return gpusvm_to_vm(r->gpusvm);
46 }
47 
xe_svm_range_start(struct xe_svm_range * range)48 static unsigned long xe_svm_range_start(struct xe_svm_range *range)
49 {
50 	return drm_gpusvm_range_start(&range->base);
51 }
52 
xe_svm_range_end(struct xe_svm_range * range)53 static unsigned long xe_svm_range_end(struct xe_svm_range *range)
54 {
55 	return drm_gpusvm_range_end(&range->base);
56 }
57 
xe_svm_range_size(struct xe_svm_range * range)58 static unsigned long xe_svm_range_size(struct xe_svm_range *range)
59 {
60 	return drm_gpusvm_range_size(&range->base);
61 }
62 
63 #define range_debug(r__, operaton__)					\
64 	vm_dbg(&range_to_vm(&(r__)->base)->xe->drm,			\
65 	       "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
66 	       "start=0x%014lx, end=0x%014lx, size=%lu",		\
67 	       (operaton__), range_to_vm(&(r__)->base)->usm.asid,	\
68 	       (r__)->base.gpusvm,					\
69 	       xe_svm_range_in_vram((r__)) ? 1 : 0,			\
70 	       xe_svm_range_has_vram_binding((r__)) ? 1 : 0,		\
71 	       (r__)->base.notifier_seq,				\
72 	       xe_svm_range_start((r__)), xe_svm_range_end((r__)),	\
73 	       xe_svm_range_size((r__)))
74 
xe_svm_range_debug(struct xe_svm_range * range,const char * operation)75 void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
76 {
77 	range_debug(range, operation);
78 }
79 
xe_svm_devm_owner(struct xe_device * xe)80 static void *xe_svm_devm_owner(struct xe_device *xe)
81 {
82 	return xe;
83 }
84 
85 static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm * gpusvm)86 xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
87 {
88 	struct xe_svm_range *range;
89 
90 	range = kzalloc(sizeof(*range), GFP_KERNEL);
91 	if (!range)
92 		return NULL;
93 
94 	INIT_LIST_HEAD(&range->garbage_collector_link);
95 	xe_vm_get(gpusvm_to_vm(gpusvm));
96 
97 	return &range->base;
98 }
99 
xe_svm_range_free(struct drm_gpusvm_range * range)100 static void xe_svm_range_free(struct drm_gpusvm_range *range)
101 {
102 	xe_vm_put(range_to_vm(range));
103 	kfree(range);
104 }
105 
to_xe_range(struct drm_gpusvm_range * r)106 static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
107 {
108 	return container_of(r, struct xe_svm_range, base);
109 }
110 
111 static void
xe_svm_garbage_collector_add_range(struct xe_vm * vm,struct xe_svm_range * range,const struct mmu_notifier_range * mmu_range)112 xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
113 				   const struct mmu_notifier_range *mmu_range)
114 {
115 	struct xe_device *xe = vm->xe;
116 
117 	range_debug(range, "GARBAGE COLLECTOR ADD");
118 
119 	drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
120 
121 	spin_lock(&vm->svm.garbage_collector.lock);
122 	if (list_empty(&range->garbage_collector_link))
123 		list_add_tail(&range->garbage_collector_link,
124 			      &vm->svm.garbage_collector.range_list);
125 	spin_unlock(&vm->svm.garbage_collector.lock);
126 
127 	queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
128 		   &vm->svm.garbage_collector.work);
129 }
130 
131 static u8
xe_svm_range_notifier_event_begin(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range,u64 * adj_start,u64 * adj_end)132 xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
133 				  const struct mmu_notifier_range *mmu_range,
134 				  u64 *adj_start, u64 *adj_end)
135 {
136 	struct xe_svm_range *range = to_xe_range(r);
137 	struct xe_device *xe = vm->xe;
138 	struct xe_tile *tile;
139 	u8 tile_mask = 0;
140 	u8 id;
141 
142 	xe_svm_assert_in_notifier(vm);
143 
144 	range_debug(range, "NOTIFIER");
145 
146 	/* Skip if already unmapped or if no binding exist */
147 	if (range->base.flags.unmapped || !range->tile_present)
148 		return 0;
149 
150 	range_debug(range, "NOTIFIER - EXECUTE");
151 
152 	/* Adjust invalidation to range boundaries */
153 	*adj_start = min(xe_svm_range_start(range), mmu_range->start);
154 	*adj_end = max(xe_svm_range_end(range), mmu_range->end);
155 
156 	/*
157 	 * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the
158 	 * invalidation code can't correctly cope with sparse ranges or
159 	 * invalidations spanning multiple ranges.
160 	 */
161 	for_each_tile(tile, xe, id)
162 		if (xe_pt_zap_ptes_range(tile, vm, range)) {
163 			tile_mask |= BIT(id);
164 			range->tile_invalidated |= BIT(id);
165 		}
166 
167 	return tile_mask;
168 }
169 
170 static void
xe_svm_range_notifier_event_end(struct xe_vm * vm,struct drm_gpusvm_range * r,const struct mmu_notifier_range * mmu_range)171 xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
172 				const struct mmu_notifier_range *mmu_range)
173 {
174 	struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
175 
176 	xe_svm_assert_in_notifier(vm);
177 
178 	drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
179 	if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
180 		xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
181 						   mmu_range);
182 }
183 
xe_svm_invalidate(struct drm_gpusvm * gpusvm,struct drm_gpusvm_notifier * notifier,const struct mmu_notifier_range * mmu_range)184 static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
185 			      struct drm_gpusvm_notifier *notifier,
186 			      const struct mmu_notifier_range *mmu_range)
187 {
188 	struct xe_vm *vm = gpusvm_to_vm(gpusvm);
189 	struct xe_device *xe = vm->xe;
190 	struct xe_tile *tile;
191 	struct drm_gpusvm_range *r, *first;
192 	struct xe_gt_tlb_invalidation_fence
193 		fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
194 	u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
195 	u8 tile_mask = 0;
196 	u8 id;
197 	u32 fence_id = 0;
198 	long err;
199 
200 	xe_svm_assert_in_notifier(vm);
201 
202 	vm_dbg(&gpusvm_to_vm(gpusvm)->xe->drm,
203 	       "INVALIDATE: asid=%u, gpusvm=%p, seqno=%lu, start=0x%016lx, end=0x%016lx, event=%d",
204 	       vm->usm.asid, gpusvm, notifier->notifier.invalidate_seq,
205 	       mmu_range->start, mmu_range->end, mmu_range->event);
206 
207 	/* Adjust invalidation to notifier boundaries */
208 	adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start);
209 	adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end);
210 
211 	first = drm_gpusvm_range_find(notifier, adj_start, adj_end);
212 	if (!first)
213 		return;
214 
215 	/*
216 	 * PTs may be getting destroyed so not safe to touch these but PT should
217 	 * be invalidated at this point in time. Regardless we still need to
218 	 * ensure any dma mappings are unmapped in the here.
219 	 */
220 	if (xe_vm_is_closed(vm))
221 		goto range_notifier_event_end;
222 
223 	/*
224 	 * XXX: Less than ideal to always wait on VM's resv slots if an
225 	 * invalidation is not required. Could walk range list twice to figure
226 	 * out if an invalidations is need, but also not ideal.
227 	 */
228 	err = dma_resv_wait_timeout(xe_vm_resv(vm),
229 				    DMA_RESV_USAGE_BOOKKEEP,
230 				    false, MAX_SCHEDULE_TIMEOUT);
231 	XE_WARN_ON(err <= 0);
232 
233 	r = first;
234 	drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
235 		tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
236 							       &adj_start,
237 							       &adj_end);
238 	if (!tile_mask)
239 		goto range_notifier_event_end;
240 
241 	xe_device_wmb(xe);
242 
243 	for_each_tile(tile, xe, id) {
244 		if (tile_mask & BIT(id)) {
245 			int err;
246 
247 			xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
248 							  &fence[fence_id], true);
249 
250 			err = xe_gt_tlb_invalidation_range(tile->primary_gt,
251 							   &fence[fence_id],
252 							   adj_start,
253 							   adj_end,
254 							   vm->usm.asid);
255 			if (WARN_ON_ONCE(err < 0))
256 				goto wait;
257 			++fence_id;
258 
259 			if (!tile->media_gt)
260 				continue;
261 
262 			xe_gt_tlb_invalidation_fence_init(tile->media_gt,
263 							  &fence[fence_id], true);
264 
265 			err = xe_gt_tlb_invalidation_range(tile->media_gt,
266 							   &fence[fence_id],
267 							   adj_start,
268 							   adj_end,
269 							   vm->usm.asid);
270 			if (WARN_ON_ONCE(err < 0))
271 				goto wait;
272 			++fence_id;
273 		}
274 	}
275 
276 wait:
277 	for (id = 0; id < fence_id; ++id)
278 		xe_gt_tlb_invalidation_fence_wait(&fence[id]);
279 
280 range_notifier_event_end:
281 	r = first;
282 	drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
283 		xe_svm_range_notifier_event_end(vm, r, mmu_range);
284 }
285 
__xe_svm_garbage_collector(struct xe_vm * vm,struct xe_svm_range * range)286 static int __xe_svm_garbage_collector(struct xe_vm *vm,
287 				      struct xe_svm_range *range)
288 {
289 	struct dma_fence *fence;
290 
291 	range_debug(range, "GARBAGE COLLECTOR");
292 
293 	xe_vm_lock(vm, false);
294 	fence = xe_vm_range_unbind(vm, range);
295 	xe_vm_unlock(vm);
296 	if (IS_ERR(fence))
297 		return PTR_ERR(fence);
298 	dma_fence_put(fence);
299 
300 	drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
301 
302 	return 0;
303 }
304 
xe_svm_garbage_collector(struct xe_vm * vm)305 static int xe_svm_garbage_collector(struct xe_vm *vm)
306 {
307 	struct xe_svm_range *range;
308 	int err;
309 
310 	lockdep_assert_held_write(&vm->lock);
311 
312 	if (xe_vm_is_closed_or_banned(vm))
313 		return -ENOENT;
314 
315 	spin_lock(&vm->svm.garbage_collector.lock);
316 	for (;;) {
317 		range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
318 						 typeof(*range),
319 						 garbage_collector_link);
320 		if (!range)
321 			break;
322 
323 		list_del(&range->garbage_collector_link);
324 		spin_unlock(&vm->svm.garbage_collector.lock);
325 
326 		err = __xe_svm_garbage_collector(vm, range);
327 		if (err) {
328 			drm_warn(&vm->xe->drm,
329 				 "Garbage collection failed: %pe\n",
330 				 ERR_PTR(err));
331 			xe_vm_kill(vm, true);
332 			return err;
333 		}
334 
335 		spin_lock(&vm->svm.garbage_collector.lock);
336 	}
337 	spin_unlock(&vm->svm.garbage_collector.lock);
338 
339 	return 0;
340 }
341 
xe_svm_garbage_collector_work_func(struct work_struct * w)342 static void xe_svm_garbage_collector_work_func(struct work_struct *w)
343 {
344 	struct xe_vm *vm = container_of(w, struct xe_vm,
345 					svm.garbage_collector.work);
346 
347 	down_write(&vm->lock);
348 	xe_svm_garbage_collector(vm);
349 	up_write(&vm->lock);
350 }
351 
352 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
353 
page_to_vr(struct page * page)354 static struct xe_vram_region *page_to_vr(struct page *page)
355 {
356 	return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
357 }
358 
vr_to_tile(struct xe_vram_region * vr)359 static struct xe_tile *vr_to_tile(struct xe_vram_region *vr)
360 {
361 	return container_of(vr, struct xe_tile, mem.vram);
362 }
363 
xe_vram_region_page_to_dpa(struct xe_vram_region * vr,struct page * page)364 static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
365 				      struct page *page)
366 {
367 	u64 dpa;
368 	struct xe_tile *tile = vr_to_tile(vr);
369 	u64 pfn = page_to_pfn(page);
370 	u64 offset;
371 
372 	xe_tile_assert(tile, is_device_private_page(page));
373 	xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base);
374 
375 	offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
376 	dpa = vr->dpa_base + offset;
377 
378 	return dpa;
379 }
380 
381 enum xe_svm_copy_dir {
382 	XE_SVM_COPY_TO_VRAM,
383 	XE_SVM_COPY_TO_SRAM,
384 };
385 
xe_svm_copy(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages,const enum xe_svm_copy_dir dir)386 static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
387 		       unsigned long npages, const enum xe_svm_copy_dir dir)
388 {
389 	struct xe_vram_region *vr = NULL;
390 	struct xe_tile *tile;
391 	struct dma_fence *fence = NULL;
392 	unsigned long i;
393 #define XE_VRAM_ADDR_INVALID	~0x0ull
394 	u64 vram_addr = XE_VRAM_ADDR_INVALID;
395 	int err = 0, pos = 0;
396 	bool sram = dir == XE_SVM_COPY_TO_SRAM;
397 
398 	/*
399 	 * This flow is complex: it locates physically contiguous device pages,
400 	 * derives the starting physical address, and performs a single GPU copy
401 	 * to for every 8M chunk in a DMA address array. Both device pages and
402 	 * DMA addresses may be sparsely populated. If either is NULL, a copy is
403 	 * triggered based on the current search state. The last GPU copy is
404 	 * waited on to ensure all copies are complete.
405 	 */
406 
407 	for (i = 0; i < npages; ++i) {
408 		struct page *spage = pages[i];
409 		struct dma_fence *__fence;
410 		u64 __vram_addr;
411 		bool match = false, chunk, last;
412 
413 #define XE_MIGRATE_CHUNK_SIZE	SZ_8M
414 		chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
415 		last = (i + 1) == npages;
416 
417 		/* No CPU page and no device pages queue'd to copy */
418 		if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
419 			continue;
420 
421 		if (!vr && spage) {
422 			vr = page_to_vr(spage);
423 			tile = vr_to_tile(vr);
424 		}
425 		XE_WARN_ON(spage && page_to_vr(spage) != vr);
426 
427 		/*
428 		 * CPU page and device page valid, capture physical address on
429 		 * first device page, check if physical contiguous on subsequent
430 		 * device pages.
431 		 */
432 		if (dma_addr[i] && spage) {
433 			__vram_addr = xe_vram_region_page_to_dpa(vr, spage);
434 			if (vram_addr == XE_VRAM_ADDR_INVALID) {
435 				vram_addr = __vram_addr;
436 				pos = i;
437 			}
438 
439 			match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
440 		}
441 
442 		/*
443 		 * Mismatched physical address, 8M copy chunk, or last page -
444 		 * trigger a copy.
445 		 */
446 		if (!match || chunk || last) {
447 			/*
448 			 * Extra page for first copy if last page and matching
449 			 * physical address.
450 			 */
451 			int incr = (match && last) ? 1 : 0;
452 
453 			if (vram_addr != XE_VRAM_ADDR_INVALID) {
454 				if (sram) {
455 					vm_dbg(&tile->xe->drm,
456 					       "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
457 					       vram_addr, (u64)dma_addr[pos], i - pos + incr);
458 					__fence = xe_migrate_from_vram(tile->migrate,
459 								       i - pos + incr,
460 								       vram_addr,
461 								       dma_addr + pos);
462 				} else {
463 					vm_dbg(&tile->xe->drm,
464 					       "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
465 					       (u64)dma_addr[pos], vram_addr, i - pos + incr);
466 					__fence = xe_migrate_to_vram(tile->migrate,
467 								     i - pos + incr,
468 								     dma_addr + pos,
469 								     vram_addr);
470 				}
471 				if (IS_ERR(__fence)) {
472 					err = PTR_ERR(__fence);
473 					goto err_out;
474 				}
475 
476 				dma_fence_put(fence);
477 				fence = __fence;
478 			}
479 
480 			/* Setup physical address of next device page */
481 			if (dma_addr[i] && spage) {
482 				vram_addr = __vram_addr;
483 				pos = i;
484 			} else {
485 				vram_addr = XE_VRAM_ADDR_INVALID;
486 			}
487 
488 			/* Extra mismatched device page, copy it */
489 			if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
490 				if (sram) {
491 					vm_dbg(&tile->xe->drm,
492 					       "COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
493 					       vram_addr, (u64)dma_addr[pos], 1);
494 					__fence = xe_migrate_from_vram(tile->migrate, 1,
495 								       vram_addr,
496 								       dma_addr + pos);
497 				} else {
498 					vm_dbg(&tile->xe->drm,
499 					       "COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
500 					       (u64)dma_addr[pos], vram_addr, 1);
501 					__fence = xe_migrate_to_vram(tile->migrate, 1,
502 								     dma_addr + pos,
503 								     vram_addr);
504 				}
505 				if (IS_ERR(__fence)) {
506 					err = PTR_ERR(__fence);
507 					goto err_out;
508 				}
509 
510 				dma_fence_put(fence);
511 				fence = __fence;
512 			}
513 		}
514 	}
515 
516 err_out:
517 	/* Wait for all copies to complete */
518 	if (fence) {
519 		dma_fence_wait(fence, false);
520 		dma_fence_put(fence);
521 	}
522 
523 	return err;
524 #undef XE_MIGRATE_CHUNK_SIZE
525 #undef XE_VRAM_ADDR_INVALID
526 }
527 
xe_svm_copy_to_devmem(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages)528 static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
529 				 unsigned long npages)
530 {
531 	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
532 }
533 
xe_svm_copy_to_ram(struct page ** pages,dma_addr_t * dma_addr,unsigned long npages)534 static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
535 			      unsigned long npages)
536 {
537 	return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
538 }
539 
to_xe_bo(struct drm_gpusvm_devmem * devmem_allocation)540 static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation)
541 {
542 	return container_of(devmem_allocation, struct xe_bo, devmem_allocation);
543 }
544 
xe_svm_devmem_release(struct drm_gpusvm_devmem * devmem_allocation)545 static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation)
546 {
547 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
548 
549 	xe_bo_put_async(bo);
550 }
551 
block_offset_to_pfn(struct xe_vram_region * vr,u64 offset)552 static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
553 {
554 	return PHYS_PFN(offset + vr->hpa_base);
555 }
556 
tile_to_buddy(struct xe_tile * tile)557 static struct drm_buddy *tile_to_buddy(struct xe_tile *tile)
558 {
559 	return &tile->mem.vram.ttm.mm;
560 }
561 
xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem * devmem_allocation,unsigned long npages,unsigned long * pfn)562 static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation,
563 				      unsigned long npages, unsigned long *pfn)
564 {
565 	struct xe_bo *bo = to_xe_bo(devmem_allocation);
566 	struct ttm_resource *res = bo->ttm.resource;
567 	struct list_head *blocks = &to_xe_ttm_vram_mgr_resource(res)->blocks;
568 	struct drm_buddy_block *block;
569 	int j = 0;
570 
571 	list_for_each_entry(block, blocks, link) {
572 		struct xe_vram_region *vr = block->private;
573 		struct xe_tile *tile = vr_to_tile(vr);
574 		struct drm_buddy *buddy = tile_to_buddy(tile);
575 		u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
576 		int i;
577 
578 		for (i = 0; i < drm_buddy_block_size(buddy, block) >> PAGE_SHIFT; ++i)
579 			pfn[j++] = block_pfn + i;
580 	}
581 
582 	return 0;
583 }
584 
585 static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
586 	.devmem_release = xe_svm_devmem_release,
587 	.populate_devmem_pfn = xe_svm_populate_devmem_pfn,
588 	.copy_to_devmem = xe_svm_copy_to_devmem,
589 	.copy_to_ram = xe_svm_copy_to_ram,
590 };
591 
592 #endif
593 
594 static const struct drm_gpusvm_ops gpusvm_ops = {
595 	.range_alloc = xe_svm_range_alloc,
596 	.range_free = xe_svm_range_free,
597 	.invalidate = xe_svm_invalidate,
598 };
599 
600 static const unsigned long fault_chunk_sizes[] = {
601 	SZ_2M,
602 	SZ_64K,
603 	SZ_4K,
604 };
605 
606 /**
607  * xe_svm_init() - SVM initialize
608  * @vm: The VM.
609  *
610  * Initialize SVM state which is embedded within the VM.
611  *
612  * Return: 0 on success, negative error code on error.
613  */
xe_svm_init(struct xe_vm * vm)614 int xe_svm_init(struct xe_vm *vm)
615 {
616 	int err;
617 
618 	spin_lock_init(&vm->svm.garbage_collector.lock);
619 	INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
620 	INIT_WORK(&vm->svm.garbage_collector.work,
621 		  xe_svm_garbage_collector_work_func);
622 
623 	err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
624 			      current->mm, xe_svm_devm_owner(vm->xe), 0,
625 			      vm->size, xe_modparam.svm_notifier_size * SZ_1M,
626 			      &gpusvm_ops, fault_chunk_sizes,
627 			      ARRAY_SIZE(fault_chunk_sizes));
628 	if (err)
629 		return err;
630 
631 	drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
632 
633 	return 0;
634 }
635 
636 /**
637  * xe_svm_close() - SVM close
638  * @vm: The VM.
639  *
640  * Close SVM state (i.e., stop and flush all SVM actions).
641  */
xe_svm_close(struct xe_vm * vm)642 void xe_svm_close(struct xe_vm *vm)
643 {
644 	xe_assert(vm->xe, xe_vm_is_closed(vm));
645 	flush_work(&vm->svm.garbage_collector.work);
646 }
647 
648 /**
649  * xe_svm_fini() - SVM finalize
650  * @vm: The VM.
651  *
652  * Finalize SVM state which is embedded within the VM.
653  */
xe_svm_fini(struct xe_vm * vm)654 void xe_svm_fini(struct xe_vm *vm)
655 {
656 	xe_assert(vm->xe, xe_vm_is_closed(vm));
657 
658 	drm_gpusvm_fini(&vm->svm.gpusvm);
659 }
660 
xe_svm_range_is_valid(struct xe_svm_range * range,struct xe_tile * tile,bool devmem_only)661 static bool xe_svm_range_is_valid(struct xe_svm_range *range,
662 				  struct xe_tile *tile,
663 				  bool devmem_only)
664 {
665 	/*
666 	 * Advisory only check whether the range currently has a valid mapping,
667 	 * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
668 	 */
669 	return ((READ_ONCE(range->tile_present) &
670 		 ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
671 		(!devmem_only || xe_svm_range_in_vram(range));
672 }
673 
674 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
tile_to_vr(struct xe_tile * tile)675 static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
676 {
677 	return &tile->mem.vram;
678 }
679 
xe_svm_alloc_vram(struct xe_vm * vm,struct xe_tile * tile,struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx)680 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
681 			     struct xe_svm_range *range,
682 			     const struct drm_gpusvm_ctx *ctx)
683 {
684 	struct mm_struct *mm = vm->svm.gpusvm.mm;
685 	struct xe_vram_region *vr = tile_to_vr(tile);
686 	struct drm_buddy_block *block;
687 	struct list_head *blocks;
688 	struct xe_bo *bo;
689 	ktime_t end = 0;
690 	int err;
691 
692 	range_debug(range, "ALLOCATE VRAM");
693 
694 	if (!mmget_not_zero(mm))
695 		return -EFAULT;
696 	mmap_read_lock(mm);
697 
698 retry:
699 	bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL,
700 				 xe_svm_range_size(range),
701 				 ttm_bo_type_device,
702 				 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
703 				 XE_BO_FLAG_CPU_ADDR_MIRROR);
704 	if (IS_ERR(bo)) {
705 		err = PTR_ERR(bo);
706 		if (xe_vm_validate_should_retry(NULL, err, &end))
707 			goto retry;
708 		goto unlock;
709 	}
710 
711 	drm_gpusvm_devmem_init(&bo->devmem_allocation,
712 			       vm->xe->drm.dev, mm,
713 			       &gpusvm_devmem_ops,
714 			       &tile->mem.vram.dpagemap,
715 			       xe_svm_range_size(range));
716 
717 	blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
718 	list_for_each_entry(block, blocks, link)
719 		block->private = vr;
720 
721 	xe_bo_get(bo);
722 	err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
723 					   &bo->devmem_allocation, ctx);
724 	if (err)
725 		xe_svm_devmem_release(&bo->devmem_allocation);
726 
727 	xe_bo_unlock(bo);
728 	xe_bo_put(bo);
729 
730 unlock:
731 	mmap_read_unlock(mm);
732 	mmput(mm);
733 
734 	return err;
735 }
736 #else
xe_svm_alloc_vram(struct xe_vm * vm,struct xe_tile * tile,struct xe_svm_range * range,const struct drm_gpusvm_ctx * ctx)737 static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
738 			     struct xe_svm_range *range,
739 			     const struct drm_gpusvm_ctx *ctx)
740 {
741 	return -EOPNOTSUPP;
742 }
743 #endif
744 
supports_4K_migration(struct xe_device * xe)745 static bool supports_4K_migration(struct xe_device *xe)
746 {
747 	if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
748 		return false;
749 
750 	return true;
751 }
752 
xe_svm_range_needs_migrate_to_vram(struct xe_svm_range * range,struct xe_vma * vma)753 static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
754 					       struct xe_vma *vma)
755 {
756 	struct xe_vm *vm = range_to_vm(&range->base);
757 	u64 range_size = xe_svm_range_size(range);
758 
759 	if (!range->base.flags.migrate_devmem)
760 		return false;
761 
762 	if (xe_svm_range_in_vram(range)) {
763 		drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
764 		return false;
765 	}
766 
767 	if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
768 		drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
769 		return false;
770 	}
771 
772 	return true;
773 }
774 
775 /**
776  * xe_svm_handle_pagefault() - SVM handle page fault
777  * @vm: The VM.
778  * @vma: The CPU address mirror VMA.
779  * @gt: The gt upon the fault occurred.
780  * @fault_addr: The GPU fault address.
781  * @atomic: The fault atomic access bit.
782  *
783  * Create GPU bindings for a SVM page fault. Optionally migrate to device
784  * memory.
785  *
786  * Return: 0 on success, negative error code on error.
787  */
xe_svm_handle_pagefault(struct xe_vm * vm,struct xe_vma * vma,struct xe_gt * gt,u64 fault_addr,bool atomic)788 int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
789 			    struct xe_gt *gt, u64 fault_addr,
790 			    bool atomic)
791 {
792 	struct drm_gpusvm_ctx ctx = {
793 		.read_only = xe_vma_read_only(vma),
794 		.devmem_possible = IS_DGFX(vm->xe) &&
795 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
796 		.check_pages_threshold = IS_DGFX(vm->xe) &&
797 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
798 		.devmem_only = atomic && IS_DGFX(vm->xe) &&
799 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
800 		.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
801 			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
802 	};
803 	struct xe_svm_range *range;
804 	struct drm_gpusvm_range *r;
805 	struct drm_exec exec;
806 	struct dma_fence *fence;
807 	int migrate_try_count = ctx.devmem_only ? 3 : 1;
808 	struct xe_tile *tile = gt_to_tile(gt);
809 	ktime_t end = 0;
810 	int err;
811 
812 	lockdep_assert_held_write(&vm->lock);
813 	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
814 
815 	xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
816 
817 retry:
818 	/* Always process UNMAPs first so view SVM ranges is current */
819 	err = xe_svm_garbage_collector(vm);
820 	if (err)
821 		return err;
822 
823 	r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
824 					    xe_vma_start(vma), xe_vma_end(vma),
825 					    &ctx);
826 	if (IS_ERR(r))
827 		return PTR_ERR(r);
828 
829 	if (ctx.devmem_only && !r->flags.migrate_devmem)
830 		return -EACCES;
831 
832 	range = to_xe_range(r);
833 	if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
834 		return 0;
835 
836 	range_debug(range, "PAGE FAULT");
837 
838 	if (--migrate_try_count >= 0 &&
839 	    xe_svm_range_needs_migrate_to_vram(range, vma)) {
840 		err = xe_svm_alloc_vram(vm, tile, range, &ctx);
841 		ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
842 		if (err) {
843 			if (migrate_try_count || !ctx.devmem_only) {
844 				drm_dbg(&vm->xe->drm,
845 					"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
846 					vm->usm.asid, ERR_PTR(err));
847 				goto retry;
848 			} else {
849 				drm_err(&vm->xe->drm,
850 					"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
851 					vm->usm.asid, ERR_PTR(err));
852 				return err;
853 			}
854 		}
855 	}
856 
857 	range_debug(range, "GET PAGES");
858 	err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
859 	/* Corner where CPU mappings have changed */
860 	if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
861 		ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
862 		if (migrate_try_count > 0 || !ctx.devmem_only) {
863 			if (err == -EOPNOTSUPP) {
864 				range_debug(range, "PAGE FAULT - EVICT PAGES");
865 				drm_gpusvm_range_evict(&vm->svm.gpusvm,
866 						       &range->base);
867 			}
868 			drm_dbg(&vm->xe->drm,
869 				"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
870 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
871 			range_debug(range, "PAGE FAULT - RETRY PAGES");
872 			goto retry;
873 		} else {
874 			drm_err(&vm->xe->drm,
875 				"Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
876 				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
877 		}
878 	}
879 	if (err) {
880 		range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
881 		goto err_out;
882 	}
883 
884 	range_debug(range, "PAGE FAULT - BIND");
885 
886 retry_bind:
887 	drm_exec_init(&exec, 0, 0);
888 	drm_exec_until_all_locked(&exec) {
889 		err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj);
890 		drm_exec_retry_on_contention(&exec);
891 		if (err) {
892 			drm_exec_fini(&exec);
893 			goto err_out;
894 		}
895 
896 		fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
897 		if (IS_ERR(fence)) {
898 			drm_exec_fini(&exec);
899 			err = PTR_ERR(fence);
900 			if (err == -EAGAIN) {
901 				ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
902 				range_debug(range, "PAGE FAULT - RETRY BIND");
903 				goto retry;
904 			}
905 			if (xe_vm_validate_should_retry(&exec, err, &end))
906 				goto retry_bind;
907 			goto err_out;
908 		}
909 	}
910 	drm_exec_fini(&exec);
911 
912 	dma_fence_wait(fence, false);
913 	dma_fence_put(fence);
914 
915 err_out:
916 
917 	return err;
918 }
919 
920 /**
921  * xe_svm_has_mapping() - SVM has mappings
922  * @vm: The VM.
923  * @start: Start address.
924  * @end: End address.
925  *
926  * Check if an address range has SVM mappings.
927  *
928  * Return: True if address range has a SVM mapping, False otherwise
929  */
xe_svm_has_mapping(struct xe_vm * vm,u64 start,u64 end)930 bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
931 {
932 	return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
933 }
934 
935 /**
936  * xe_svm_bo_evict() - SVM evict BO to system memory
937  * @bo: BO to evict
938  *
939  * SVM evict BO to system memory. GPU SVM layer ensures all device pages
940  * are evicted before returning.
941  *
942  * Return: 0 on success standard error code otherwise
943  */
xe_svm_bo_evict(struct xe_bo * bo)944 int xe_svm_bo_evict(struct xe_bo *bo)
945 {
946 	return drm_gpusvm_evict_to_ram(&bo->devmem_allocation);
947 }
948 
949 #if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
950 
951 static struct drm_pagemap_device_addr
xe_drm_pagemap_device_map(struct drm_pagemap * dpagemap,struct device * dev,struct page * page,unsigned int order,enum dma_data_direction dir)952 xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
953 			  struct device *dev,
954 			  struct page *page,
955 			  unsigned int order,
956 			  enum dma_data_direction dir)
957 {
958 	struct device *pgmap_dev = dpagemap->dev;
959 	enum drm_interconnect_protocol prot;
960 	dma_addr_t addr;
961 
962 	if (pgmap_dev == dev) {
963 		addr = xe_vram_region_page_to_dpa(page_to_vr(page), page);
964 		prot = XE_INTERCONNECT_VRAM;
965 	} else {
966 		addr = DMA_MAPPING_ERROR;
967 		prot = 0;
968 	}
969 
970 	return drm_pagemap_device_addr_encode(addr, prot, order, dir);
971 }
972 
973 static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
974 	.device_map = xe_drm_pagemap_device_map,
975 };
976 
977 /**
978  * xe_devm_add: Remap and provide memmap backing for device memory
979  * @tile: tile that the memory region belongs to
980  * @vr: vram memory region to remap
981  *
982  * This remap device memory to host physical address space and create
983  * struct page to back device memory
984  *
985  * Return: 0 on success standard error code otherwise
986  */
xe_devm_add(struct xe_tile * tile,struct xe_vram_region * vr)987 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
988 {
989 	struct xe_device *xe = tile_to_xe(tile);
990 	struct device *dev = &to_pci_dev(xe->drm.dev)->dev;
991 	struct resource *res;
992 	void *addr;
993 	int ret;
994 
995 	res = devm_request_free_mem_region(dev, &iomem_resource,
996 					   vr->usable_size);
997 	if (IS_ERR(res)) {
998 		ret = PTR_ERR(res);
999 		return ret;
1000 	}
1001 
1002 	vr->pagemap.type = MEMORY_DEVICE_PRIVATE;
1003 	vr->pagemap.range.start = res->start;
1004 	vr->pagemap.range.end = res->end;
1005 	vr->pagemap.nr_range = 1;
1006 	vr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
1007 	vr->pagemap.owner = xe_svm_devm_owner(xe);
1008 	addr = devm_memremap_pages(dev, &vr->pagemap);
1009 
1010 	vr->dpagemap.dev = dev;
1011 	vr->dpagemap.ops = &xe_drm_pagemap_ops;
1012 
1013 	if (IS_ERR(addr)) {
1014 		devm_release_mem_region(dev, res->start, resource_size(res));
1015 		ret = PTR_ERR(addr);
1016 		drm_err(&xe->drm, "Failed to remap tile %d memory, errno %pe\n",
1017 			tile->id, ERR_PTR(ret));
1018 		return ret;
1019 	}
1020 	vr->hpa_base = res->start;
1021 
1022 	drm_dbg(&xe->drm, "Added tile %d memory [%llx-%llx] to devm, remapped to %pr\n",
1023 		tile->id, vr->io_start, vr->io_start + vr->usable_size, res);
1024 	return 0;
1025 }
1026 #else
xe_devm_add(struct xe_tile * tile,struct xe_vram_region * vr)1027 int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
1028 {
1029 	return 0;
1030 }
1031 #endif
1032 
1033 /**
1034  * xe_svm_flush() - SVM flush
1035  * @vm: The VM.
1036  *
1037  * Flush all SVM actions.
1038  */
xe_svm_flush(struct xe_vm * vm)1039 void xe_svm_flush(struct xe_vm *vm)
1040 {
1041 	if (xe_vm_in_fault_mode(vm))
1042 		flush_work(&vm->svm.garbage_collector.work);
1043 }
1044