xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision f5bd9d528ebac41a31919aa41f1a99eccb8917c8)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_backup.h>
14 #include <drm/ttm/ttm_device.h>
15 #include <drm/ttm/ttm_placement.h>
16 #include <drm/ttm/ttm_tt.h>
17 #include <uapi/drm/xe_drm.h>
18 
19 #include "xe_device.h"
20 #include "xe_dma_buf.h"
21 #include "xe_drm_client.h"
22 #include "xe_ggtt.h"
23 #include "xe_gt.h"
24 #include "xe_map.h"
25 #include "xe_migrate.h"
26 #include "xe_pm.h"
27 #include "xe_preempt_fence.h"
28 #include "xe_res_cursor.h"
29 #include "xe_shrinker.h"
30 #include "xe_trace_bo.h"
31 #include "xe_ttm_stolen_mgr.h"
32 #include "xe_vm.h"
33 
34 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
35 	[XE_PL_SYSTEM] = "system",
36 	[XE_PL_TT] = "gtt",
37 	[XE_PL_VRAM0] = "vram0",
38 	[XE_PL_VRAM1] = "vram1",
39 	[XE_PL_STOLEN] = "stolen"
40 };
41 
42 static const struct ttm_place sys_placement_flags = {
43 	.fpfn = 0,
44 	.lpfn = 0,
45 	.mem_type = XE_PL_SYSTEM,
46 	.flags = 0,
47 };
48 
49 static struct ttm_placement sys_placement = {
50 	.num_placement = 1,
51 	.placement = &sys_placement_flags,
52 };
53 
54 static const struct ttm_place tt_placement_flags[] = {
55 	{
56 		.fpfn = 0,
57 		.lpfn = 0,
58 		.mem_type = XE_PL_TT,
59 		.flags = TTM_PL_FLAG_DESIRED,
60 	},
61 	{
62 		.fpfn = 0,
63 		.lpfn = 0,
64 		.mem_type = XE_PL_SYSTEM,
65 		.flags = TTM_PL_FLAG_FALLBACK,
66 	}
67 };
68 
69 static struct ttm_placement tt_placement = {
70 	.num_placement = 2,
71 	.placement = tt_placement_flags,
72 };
73 
74 bool mem_type_is_vram(u32 mem_type)
75 {
76 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
77 }
78 
79 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
80 {
81 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
82 }
83 
84 static bool resource_is_vram(struct ttm_resource *res)
85 {
86 	return mem_type_is_vram(res->mem_type);
87 }
88 
89 bool xe_bo_is_vram(struct xe_bo *bo)
90 {
91 	return resource_is_vram(bo->ttm.resource) ||
92 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
93 }
94 
95 bool xe_bo_is_stolen(struct xe_bo *bo)
96 {
97 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
98 }
99 
100 /**
101  * xe_bo_has_single_placement - check if BO is placed only in one memory location
102  * @bo: The BO
103  *
104  * This function checks whether a given BO is placed in only one memory location.
105  *
106  * Returns: true if the BO is placed in a single memory location, false otherwise.
107  *
108  */
109 bool xe_bo_has_single_placement(struct xe_bo *bo)
110 {
111 	return bo->placement.num_placement == 1;
112 }
113 
114 /**
115  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
116  * @bo: The BO
117  *
118  * The stolen memory is accessed through the PCI BAR for both DGFX and some
119  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
120  *
121  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
122  */
123 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
124 {
125 	return xe_bo_is_stolen(bo) &&
126 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
127 }
128 
129 static bool xe_bo_is_user(struct xe_bo *bo)
130 {
131 	return bo->flags & XE_BO_FLAG_USER;
132 }
133 
134 static struct xe_migrate *
135 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
136 {
137 	struct xe_tile *tile;
138 
139 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
140 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
141 	return tile->migrate;
142 }
143 
144 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
145 {
146 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
147 	struct ttm_resource_manager *mgr;
148 
149 	xe_assert(xe, resource_is_vram(res));
150 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
151 	return to_xe_ttm_vram_mgr(mgr)->vram;
152 }
153 
154 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
155 			   u32 bo_flags, u32 *c)
156 {
157 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
158 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
159 
160 		bo->placements[*c] = (struct ttm_place) {
161 			.mem_type = XE_PL_TT,
162 		};
163 		*c += 1;
164 	}
165 }
166 
167 static bool force_contiguous(u32 bo_flags)
168 {
169 	/*
170 	 * For eviction / restore on suspend / resume objects pinned in VRAM
171 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
172 	 */
173 	return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
174 }
175 
176 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
177 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
178 {
179 	struct ttm_place place = { .mem_type = mem_type };
180 	struct xe_mem_region *vram;
181 	u64 io_size;
182 
183 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
184 
185 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
186 	xe_assert(xe, vram && vram->usable_size);
187 	io_size = vram->io_size;
188 
189 	if (force_contiguous(bo_flags))
190 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
191 
192 	if (io_size < vram->usable_size) {
193 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
194 			place.fpfn = 0;
195 			place.lpfn = io_size >> PAGE_SHIFT;
196 		} else {
197 			place.flags |= TTM_PL_FLAG_TOPDOWN;
198 		}
199 	}
200 	places[*c] = place;
201 	*c += 1;
202 }
203 
204 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
205 			 u32 bo_flags, u32 *c)
206 {
207 	if (bo_flags & XE_BO_FLAG_VRAM0)
208 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
209 	if (bo_flags & XE_BO_FLAG_VRAM1)
210 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
211 }
212 
213 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
214 			   u32 bo_flags, u32 *c)
215 {
216 	if (bo_flags & XE_BO_FLAG_STOLEN) {
217 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
218 
219 		bo->placements[*c] = (struct ttm_place) {
220 			.mem_type = XE_PL_STOLEN,
221 			.flags = force_contiguous(bo_flags) ?
222 				TTM_PL_FLAG_CONTIGUOUS : 0,
223 		};
224 		*c += 1;
225 	}
226 }
227 
228 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
229 				       u32 bo_flags)
230 {
231 	u32 c = 0;
232 
233 	try_add_vram(xe, bo, bo_flags, &c);
234 	try_add_system(xe, bo, bo_flags, &c);
235 	try_add_stolen(xe, bo, bo_flags, &c);
236 
237 	if (!c)
238 		return -EINVAL;
239 
240 	bo->placement = (struct ttm_placement) {
241 		.num_placement = c,
242 		.placement = bo->placements,
243 	};
244 
245 	return 0;
246 }
247 
248 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
249 			      u32 bo_flags)
250 {
251 	xe_bo_assert_held(bo);
252 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
253 }
254 
255 static void xe_evict_flags(struct ttm_buffer_object *tbo,
256 			   struct ttm_placement *placement)
257 {
258 	if (!xe_bo_is_xe_bo(tbo)) {
259 		/* Don't handle scatter gather BOs */
260 		if (tbo->type == ttm_bo_type_sg) {
261 			placement->num_placement = 0;
262 			return;
263 		}
264 
265 		*placement = sys_placement;
266 		return;
267 	}
268 
269 	/*
270 	 * For xe, sg bos that are evicted to system just triggers a
271 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
272 	 */
273 	switch (tbo->resource->mem_type) {
274 	case XE_PL_VRAM0:
275 	case XE_PL_VRAM1:
276 	case XE_PL_STOLEN:
277 		*placement = tt_placement;
278 		break;
279 	case XE_PL_TT:
280 	default:
281 		*placement = sys_placement;
282 		break;
283 	}
284 }
285 
286 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
287 struct xe_ttm_tt {
288 	struct ttm_tt ttm;
289 	/** @xe - The xe device */
290 	struct xe_device *xe;
291 	struct sg_table sgt;
292 	struct sg_table *sg;
293 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
294 	bool purgeable;
295 };
296 
297 static int xe_tt_map_sg(struct ttm_tt *tt)
298 {
299 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
300 	unsigned long num_pages = tt->num_pages;
301 	int ret;
302 
303 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
304 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
305 
306 	if (xe_tt->sg)
307 		return 0;
308 
309 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
310 						num_pages, 0,
311 						(u64)num_pages << PAGE_SHIFT,
312 						xe_sg_segment_size(xe_tt->xe->drm.dev),
313 						GFP_KERNEL);
314 	if (ret)
315 		return ret;
316 
317 	xe_tt->sg = &xe_tt->sgt;
318 	ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
319 			      DMA_ATTR_SKIP_CPU_SYNC);
320 	if (ret) {
321 		sg_free_table(xe_tt->sg);
322 		xe_tt->sg = NULL;
323 		return ret;
324 	}
325 
326 	return 0;
327 }
328 
329 static void xe_tt_unmap_sg(struct ttm_tt *tt)
330 {
331 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
332 
333 	if (xe_tt->sg) {
334 		dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
335 				  DMA_BIDIRECTIONAL, 0);
336 		sg_free_table(xe_tt->sg);
337 		xe_tt->sg = NULL;
338 	}
339 }
340 
341 struct sg_table *xe_bo_sg(struct xe_bo *bo)
342 {
343 	struct ttm_tt *tt = bo->ttm.ttm;
344 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
345 
346 	return xe_tt->sg;
347 }
348 
349 /*
350  * Account ttm pages against the device shrinker's shrinkable and
351  * purgeable counts.
352  */
353 static void xe_ttm_tt_account_add(struct ttm_tt *tt)
354 {
355 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
356 
357 	if (xe_tt->purgeable)
358 		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages);
359 	else
360 		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0);
361 }
362 
363 static void xe_ttm_tt_account_subtract(struct ttm_tt *tt)
364 {
365 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
366 
367 	if (xe_tt->purgeable)
368 		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages);
369 	else
370 		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0);
371 }
372 
373 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
374 				       u32 page_flags)
375 {
376 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
377 	struct xe_device *xe = xe_bo_device(bo);
378 	struct xe_ttm_tt *xe_tt;
379 	struct ttm_tt *tt;
380 	unsigned long extra_pages;
381 	enum ttm_caching caching = ttm_cached;
382 	int err;
383 
384 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
385 	if (!xe_tt)
386 		return NULL;
387 
388 	tt = &xe_tt->ttm;
389 	xe_tt->xe = xe;
390 
391 	extra_pages = 0;
392 	if (xe_bo_needs_ccs_pages(bo))
393 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
394 					   PAGE_SIZE);
395 
396 	/*
397 	 * DGFX system memory is always WB / ttm_cached, since
398 	 * other caching modes are only supported on x86. DGFX
399 	 * GPU system memory accesses are always coherent with the
400 	 * CPU.
401 	 */
402 	if (!IS_DGFX(xe)) {
403 		switch (bo->cpu_caching) {
404 		case DRM_XE_GEM_CPU_CACHING_WC:
405 			caching = ttm_write_combined;
406 			break;
407 		default:
408 			caching = ttm_cached;
409 			break;
410 		}
411 
412 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
413 
414 		/*
415 		 * Display scanout is always non-coherent with the CPU cache.
416 		 *
417 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
418 		 * non-coherent and require a CPU:WC mapping.
419 		 */
420 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
421 		    (xe->info.graphics_verx100 >= 1270 &&
422 		     bo->flags & XE_BO_FLAG_PAGETABLE))
423 			caching = ttm_write_combined;
424 	}
425 
426 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
427 		/*
428 		 * Valid only for internally-created buffers only, for
429 		 * which cpu_caching is never initialized.
430 		 */
431 		xe_assert(xe, bo->cpu_caching == 0);
432 		caching = ttm_uncached;
433 	}
434 
435 	if (ttm_bo->type != ttm_bo_type_sg)
436 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
437 
438 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
439 	if (err) {
440 		kfree(xe_tt);
441 		return NULL;
442 	}
443 
444 	if (ttm_bo->type != ttm_bo_type_sg) {
445 		err = ttm_tt_setup_backup(tt);
446 		if (err) {
447 			ttm_tt_fini(tt);
448 			kfree(xe_tt);
449 			return NULL;
450 		}
451 	}
452 
453 	return tt;
454 }
455 
456 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
457 			      struct ttm_operation_ctx *ctx)
458 {
459 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
460 	int err;
461 
462 	/*
463 	 * dma-bufs are not populated with pages, and the dma-
464 	 * addresses are set up when moved to XE_PL_TT.
465 	 */
466 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
467 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
468 		return 0;
469 
470 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
471 		err = ttm_tt_restore(ttm_dev, tt, ctx);
472 	} else {
473 		ttm_tt_clear_backed_up(tt);
474 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
475 	}
476 	if (err)
477 		return err;
478 
479 	xe_tt->purgeable = false;
480 	xe_ttm_tt_account_add(tt);
481 
482 	return 0;
483 }
484 
485 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
486 {
487 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
488 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
489 		return;
490 
491 	xe_tt_unmap_sg(tt);
492 
493 	ttm_pool_free(&ttm_dev->pool, tt);
494 	xe_ttm_tt_account_subtract(tt);
495 }
496 
497 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
498 {
499 	ttm_tt_fini(tt);
500 	kfree(tt);
501 }
502 
503 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
504 {
505 	struct xe_ttm_vram_mgr_resource *vres =
506 		to_xe_ttm_vram_mgr_resource(mem);
507 
508 	return vres->used_visible_size == mem->size;
509 }
510 
511 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
512 				 struct ttm_resource *mem)
513 {
514 	struct xe_device *xe = ttm_to_xe_device(bdev);
515 
516 	switch (mem->mem_type) {
517 	case XE_PL_SYSTEM:
518 	case XE_PL_TT:
519 		return 0;
520 	case XE_PL_VRAM0:
521 	case XE_PL_VRAM1: {
522 		struct xe_mem_region *vram = res_to_mem_region(mem);
523 
524 		if (!xe_ttm_resource_visible(mem))
525 			return -EINVAL;
526 
527 		mem->bus.offset = mem->start << PAGE_SHIFT;
528 
529 		if (vram->mapping &&
530 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
531 			mem->bus.addr = (u8 __force *)vram->mapping +
532 				mem->bus.offset;
533 
534 		mem->bus.offset += vram->io_start;
535 		mem->bus.is_iomem = true;
536 
537 #if  !IS_ENABLED(CONFIG_X86)
538 		mem->bus.caching = ttm_write_combined;
539 #endif
540 		return 0;
541 	} case XE_PL_STOLEN:
542 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
543 	default:
544 		return -EINVAL;
545 	}
546 }
547 
548 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
549 				const struct ttm_operation_ctx *ctx)
550 {
551 	struct dma_resv_iter cursor;
552 	struct dma_fence *fence;
553 	struct drm_gem_object *obj = &bo->ttm.base;
554 	struct drm_gpuvm_bo *vm_bo;
555 	bool idle = false;
556 	int ret = 0;
557 
558 	dma_resv_assert_held(bo->ttm.base.resv);
559 
560 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
561 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
562 				    DMA_RESV_USAGE_BOOKKEEP);
563 		dma_resv_for_each_fence_unlocked(&cursor, fence)
564 			dma_fence_enable_sw_signaling(fence);
565 		dma_resv_iter_end(&cursor);
566 	}
567 
568 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
569 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
570 		struct drm_gpuva *gpuva;
571 
572 		if (!xe_vm_in_fault_mode(vm)) {
573 			drm_gpuvm_bo_evict(vm_bo, true);
574 			continue;
575 		}
576 
577 		if (!idle) {
578 			long timeout;
579 
580 			if (ctx->no_wait_gpu &&
581 			    !dma_resv_test_signaled(bo->ttm.base.resv,
582 						    DMA_RESV_USAGE_BOOKKEEP))
583 				return -EBUSY;
584 
585 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
586 							DMA_RESV_USAGE_BOOKKEEP,
587 							ctx->interruptible,
588 							MAX_SCHEDULE_TIMEOUT);
589 			if (!timeout)
590 				return -ETIME;
591 			if (timeout < 0)
592 				return timeout;
593 
594 			idle = true;
595 		}
596 
597 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
598 			struct xe_vma *vma = gpuva_to_vma(gpuva);
599 
600 			trace_xe_vma_evict(vma);
601 			ret = xe_vm_invalidate_vma(vma);
602 			if (XE_WARN_ON(ret))
603 				return ret;
604 		}
605 	}
606 
607 	return ret;
608 }
609 
610 /*
611  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
612  * Note that unmapping the attachment is deferred to the next
613  * map_attachment time, or to bo destroy (after idling) whichever comes first.
614  * This is to avoid syncing before unmap_attachment(), assuming that the
615  * caller relies on idling the reservation object before moving the
616  * backing store out. Should that assumption not hold, then we will be able
617  * to unconditionally call unmap_attachment() when moving out to system.
618  */
619 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
620 			     struct ttm_resource *new_res)
621 {
622 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
623 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
624 					       ttm);
625 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
626 	struct sg_table *sg;
627 
628 	xe_assert(xe, attach);
629 	xe_assert(xe, ttm_bo->ttm);
630 
631 	if (new_res->mem_type == XE_PL_SYSTEM)
632 		goto out;
633 
634 	if (ttm_bo->sg) {
635 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
636 		ttm_bo->sg = NULL;
637 	}
638 
639 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
640 	if (IS_ERR(sg))
641 		return PTR_ERR(sg);
642 
643 	ttm_bo->sg = sg;
644 	xe_tt->sg = sg;
645 
646 out:
647 	ttm_bo_move_null(ttm_bo, new_res);
648 
649 	return 0;
650 }
651 
652 /**
653  * xe_bo_move_notify - Notify subsystems of a pending move
654  * @bo: The buffer object
655  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
656  *
657  * This function notifies subsystems of an upcoming buffer move.
658  * Upon receiving such a notification, subsystems should schedule
659  * halting access to the underlying pages and optionally add a fence
660  * to the buffer object's dma_resv object, that signals when access is
661  * stopped. The caller will wait on all dma_resv fences before
662  * starting the move.
663  *
664  * A subsystem may commence access to the object after obtaining
665  * bindings to the new backing memory under the object lock.
666  *
667  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
668  * negative error code on error.
669  */
670 static int xe_bo_move_notify(struct xe_bo *bo,
671 			     const struct ttm_operation_ctx *ctx)
672 {
673 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
674 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
675 	struct ttm_resource *old_mem = ttm_bo->resource;
676 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
677 	int ret;
678 
679 	/*
680 	 * If this starts to call into many components, consider
681 	 * using a notification chain here.
682 	 */
683 
684 	if (xe_bo_is_pinned(bo))
685 		return -EINVAL;
686 
687 	xe_bo_vunmap(bo);
688 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
689 	if (ret)
690 		return ret;
691 
692 	/* Don't call move_notify() for imported dma-bufs. */
693 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
694 		dma_buf_move_notify(ttm_bo->base.dma_buf);
695 
696 	/*
697 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
698 	 * so if we moved from VRAM make sure to unlink this from the userfault
699 	 * tracking.
700 	 */
701 	if (mem_type_is_vram(old_mem_type)) {
702 		mutex_lock(&xe->mem_access.vram_userfault.lock);
703 		if (!list_empty(&bo->vram_userfault_link))
704 			list_del_init(&bo->vram_userfault_link);
705 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
706 	}
707 
708 	return 0;
709 }
710 
711 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
712 		      struct ttm_operation_ctx *ctx,
713 		      struct ttm_resource *new_mem,
714 		      struct ttm_place *hop)
715 {
716 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
717 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
718 	struct ttm_resource *old_mem = ttm_bo->resource;
719 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
720 	struct ttm_tt *ttm = ttm_bo->ttm;
721 	struct xe_migrate *migrate = NULL;
722 	struct dma_fence *fence;
723 	bool move_lacks_source;
724 	bool tt_has_data;
725 	bool needs_clear;
726 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
727 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
728 	int ret = 0;
729 
730 	/* Bo creation path, moving to system or TT. */
731 	if ((!old_mem && ttm) && !handle_system_ccs) {
732 		if (new_mem->mem_type == XE_PL_TT)
733 			ret = xe_tt_map_sg(ttm);
734 		if (!ret)
735 			ttm_bo_move_null(ttm_bo, new_mem);
736 		goto out;
737 	}
738 
739 	if (ttm_bo->type == ttm_bo_type_sg) {
740 		ret = xe_bo_move_notify(bo, ctx);
741 		if (!ret)
742 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
743 		return ret;
744 	}
745 
746 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
747 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
748 
749 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
750 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
751 
752 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
753 		(!ttm && ttm_bo->type == ttm_bo_type_device);
754 
755 	if (new_mem->mem_type == XE_PL_TT) {
756 		ret = xe_tt_map_sg(ttm);
757 		if (ret)
758 			goto out;
759 	}
760 
761 	if ((move_lacks_source && !needs_clear)) {
762 		ttm_bo_move_null(ttm_bo, new_mem);
763 		goto out;
764 	}
765 
766 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
767 		ttm_bo_move_null(ttm_bo, new_mem);
768 		goto out;
769 	}
770 
771 	/*
772 	 * Failed multi-hop where the old_mem is still marked as
773 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
774 	 */
775 	if (old_mem_type == XE_PL_TT &&
776 	    new_mem->mem_type == XE_PL_TT) {
777 		ttm_bo_move_null(ttm_bo, new_mem);
778 		goto out;
779 	}
780 
781 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
782 		ret = xe_bo_move_notify(bo, ctx);
783 		if (ret)
784 			goto out;
785 	}
786 
787 	if (old_mem_type == XE_PL_TT &&
788 	    new_mem->mem_type == XE_PL_SYSTEM) {
789 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
790 						     DMA_RESV_USAGE_BOOKKEEP,
791 						     false,
792 						     MAX_SCHEDULE_TIMEOUT);
793 		if (timeout < 0) {
794 			ret = timeout;
795 			goto out;
796 		}
797 
798 		if (!handle_system_ccs) {
799 			ttm_bo_move_null(ttm_bo, new_mem);
800 			goto out;
801 		}
802 	}
803 
804 	if (!move_lacks_source &&
805 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
806 	     (mem_type_is_vram(old_mem_type) &&
807 	      new_mem->mem_type == XE_PL_SYSTEM))) {
808 		hop->fpfn = 0;
809 		hop->lpfn = 0;
810 		hop->mem_type = XE_PL_TT;
811 		hop->flags = TTM_PL_FLAG_TEMPORARY;
812 		ret = -EMULTIHOP;
813 		goto out;
814 	}
815 
816 	if (bo->tile)
817 		migrate = bo->tile->migrate;
818 	else if (resource_is_vram(new_mem))
819 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
820 	else if (mem_type_is_vram(old_mem_type))
821 		migrate = mem_type_to_migrate(xe, old_mem_type);
822 	else
823 		migrate = xe->tiles[0].migrate;
824 
825 	xe_assert(xe, migrate);
826 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
827 	if (xe_rpm_reclaim_safe(xe)) {
828 		/*
829 		 * We might be called through swapout in the validation path of
830 		 * another TTM device, so acquire rpm here.
831 		 */
832 		xe_pm_runtime_get(xe);
833 	} else {
834 		drm_WARN_ON(&xe->drm, handle_system_ccs);
835 		xe_pm_runtime_get_noresume(xe);
836 	}
837 
838 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
839 		/*
840 		 * Kernel memory that is pinned should only be moved on suspend
841 		 * / resume, some of the pinned memory is required for the
842 		 * device to resume / use the GPU to move other evicted memory
843 		 * (user memory) around. This likely could be optimized a bit
844 		 * further where we find the minimum set of pinned memory
845 		 * required for resume but for simplity doing a memcpy for all
846 		 * pinned memory.
847 		 */
848 		ret = xe_bo_vmap(bo);
849 		if (!ret) {
850 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
851 
852 			/* Create a new VMAP once kernel BO back in VRAM */
853 			if (!ret && resource_is_vram(new_mem)) {
854 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
855 				void __iomem *new_addr = vram->mapping +
856 					(new_mem->start << PAGE_SHIFT);
857 
858 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
859 					ret = -EINVAL;
860 					xe_pm_runtime_put(xe);
861 					goto out;
862 				}
863 
864 				xe_assert(xe, new_mem->start ==
865 					  bo->placements->fpfn);
866 
867 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
868 			}
869 		}
870 	} else {
871 		if (move_lacks_source) {
872 			u32 flags = 0;
873 
874 			if (mem_type_is_vram(new_mem->mem_type))
875 				flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
876 			else if (handle_system_ccs)
877 				flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
878 
879 			fence = xe_migrate_clear(migrate, bo, new_mem, flags);
880 		}
881 		else
882 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
883 						new_mem, handle_system_ccs);
884 		if (IS_ERR(fence)) {
885 			ret = PTR_ERR(fence);
886 			xe_pm_runtime_put(xe);
887 			goto out;
888 		}
889 		if (!move_lacks_source) {
890 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
891 							true, new_mem);
892 			if (ret) {
893 				dma_fence_wait(fence, false);
894 				ttm_bo_move_null(ttm_bo, new_mem);
895 				ret = 0;
896 			}
897 		} else {
898 			/*
899 			 * ttm_bo_move_accel_cleanup() may blow up if
900 			 * bo->resource == NULL, so just attach the
901 			 * fence and set the new resource.
902 			 */
903 			dma_resv_add_fence(ttm_bo->base.resv, fence,
904 					   DMA_RESV_USAGE_KERNEL);
905 			ttm_bo_move_null(ttm_bo, new_mem);
906 		}
907 
908 		dma_fence_put(fence);
909 	}
910 
911 	xe_pm_runtime_put(xe);
912 
913 out:
914 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
915 	    ttm_bo->ttm) {
916 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
917 						     DMA_RESV_USAGE_KERNEL,
918 						     false,
919 						     MAX_SCHEDULE_TIMEOUT);
920 		if (timeout < 0)
921 			ret = timeout;
922 
923 		xe_tt_unmap_sg(ttm_bo->ttm);
924 	}
925 
926 	return ret;
927 }
928 
929 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
930 			       struct ttm_buffer_object *bo,
931 			       unsigned long *scanned)
932 {
933 	long lret;
934 
935 	/* Fake move to system, without copying data. */
936 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
937 		struct ttm_resource *new_resource;
938 
939 		lret = ttm_bo_wait_ctx(bo, ctx);
940 		if (lret)
941 			return lret;
942 
943 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
944 		if (lret)
945 			return lret;
946 
947 		xe_tt_unmap_sg(bo->ttm);
948 		ttm_bo_move_null(bo, new_resource);
949 	}
950 
951 	*scanned += bo->ttm->num_pages;
952 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
953 			     {.purge = true,
954 			      .writeback = false,
955 			      .allow_move = false});
956 
957 	if (lret > 0)
958 		xe_ttm_tt_account_subtract(bo->ttm);
959 
960 	return lret;
961 }
962 
963 /**
964  * xe_bo_shrink() - Try to shrink an xe bo.
965  * @ctx: The struct ttm_operation_ctx used for shrinking.
966  * @bo: The TTM buffer object whose pages to shrink.
967  * @flags: Flags governing the shrink behaviour.
968  * @scanned: Pointer to a counter of the number of pages
969  * attempted to shrink.
970  *
971  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
972  * Note that we need to be able to handle also non xe bos
973  * (ghost bos), but only if the struct ttm_tt is embedded in
974  * a struct xe_ttm_tt. When the function attempts to shrink
975  * the pages of a buffer object, The value pointed to by @scanned
976  * is updated.
977  *
978  * Return: The number of pages shrunken or purged, or negative error
979  * code on failure.
980  */
981 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
982 		  const struct xe_bo_shrink_flags flags,
983 		  unsigned long *scanned)
984 {
985 	struct ttm_tt *tt = bo->ttm;
986 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
987 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
988 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
989 	struct xe_device *xe = xe_tt->xe;
990 	bool needs_rpm;
991 	long lret = 0L;
992 
993 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
994 	    (flags.purge && !xe_tt->purgeable))
995 		return -EBUSY;
996 
997 	if (!ttm_bo_eviction_valuable(bo, &place))
998 		return -EBUSY;
999 
1000 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1001 		return xe_bo_shrink_purge(ctx, bo, scanned);
1002 
1003 	if (xe_tt->purgeable) {
1004 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1005 			lret = xe_bo_move_notify(xe_bo, ctx);
1006 		if (!lret)
1007 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1008 		goto out_unref;
1009 	}
1010 
1011 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1012 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1013 		     xe_bo_needs_ccs_pages(xe_bo));
1014 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1015 		goto out_unref;
1016 
1017 	*scanned += tt->num_pages;
1018 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1019 			     {.purge = false,
1020 			      .writeback = flags.writeback,
1021 			      .allow_move = true});
1022 	if (needs_rpm)
1023 		xe_pm_runtime_put(xe);
1024 
1025 	if (lret > 0)
1026 		xe_ttm_tt_account_subtract(tt);
1027 
1028 out_unref:
1029 	xe_bo_put(xe_bo);
1030 
1031 	return lret;
1032 }
1033 
1034 /**
1035  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1036  * @bo: The buffer object to move.
1037  *
1038  * On successful completion, the object memory will be moved to system memory.
1039  *
1040  * This is needed to for special handling of pinned VRAM object during
1041  * suspend-resume.
1042  *
1043  * Return: 0 on success. Negative error code on failure.
1044  */
1045 int xe_bo_evict_pinned(struct xe_bo *bo)
1046 {
1047 	struct ttm_place place = {
1048 		.mem_type = XE_PL_TT,
1049 	};
1050 	struct ttm_placement placement = {
1051 		.placement = &place,
1052 		.num_placement = 1,
1053 	};
1054 	struct ttm_operation_ctx ctx = {
1055 		.interruptible = false,
1056 		.gfp_retry_mayfail = true,
1057 	};
1058 	struct ttm_resource *new_mem;
1059 	int ret;
1060 
1061 	xe_bo_assert_held(bo);
1062 
1063 	if (WARN_ON(!bo->ttm.resource))
1064 		return -EINVAL;
1065 
1066 	if (WARN_ON(!xe_bo_is_pinned(bo)))
1067 		return -EINVAL;
1068 
1069 	if (!xe_bo_is_vram(bo))
1070 		return 0;
1071 
1072 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
1073 	if (ret)
1074 		return ret;
1075 
1076 	if (!bo->ttm.ttm) {
1077 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
1078 		if (!bo->ttm.ttm) {
1079 			ret = -ENOMEM;
1080 			goto err_res_free;
1081 		}
1082 	}
1083 
1084 	ret = ttm_bo_populate(&bo->ttm, &ctx);
1085 	if (ret)
1086 		goto err_res_free;
1087 
1088 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1089 	if (ret)
1090 		goto err_res_free;
1091 
1092 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
1093 	if (ret)
1094 		goto err_res_free;
1095 
1096 	return 0;
1097 
1098 err_res_free:
1099 	ttm_resource_free(&bo->ttm, &new_mem);
1100 	return ret;
1101 }
1102 
1103 /**
1104  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1105  * @bo: The buffer object to move.
1106  *
1107  * On successful completion, the object memory will be moved back to VRAM.
1108  *
1109  * This is needed to for special handling of pinned VRAM object during
1110  * suspend-resume.
1111  *
1112  * Return: 0 on success. Negative error code on failure.
1113  */
1114 int xe_bo_restore_pinned(struct xe_bo *bo)
1115 {
1116 	struct ttm_operation_ctx ctx = {
1117 		.interruptible = false,
1118 		.gfp_retry_mayfail = false,
1119 	};
1120 	struct ttm_resource *new_mem;
1121 	struct ttm_place *place = &bo->placements[0];
1122 	int ret;
1123 
1124 	xe_bo_assert_held(bo);
1125 
1126 	if (WARN_ON(!bo->ttm.resource))
1127 		return -EINVAL;
1128 
1129 	if (WARN_ON(!xe_bo_is_pinned(bo)))
1130 		return -EINVAL;
1131 
1132 	if (WARN_ON(xe_bo_is_vram(bo)))
1133 		return -EINVAL;
1134 
1135 	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
1136 		return -EINVAL;
1137 
1138 	if (!mem_type_is_vram(place->mem_type))
1139 		return 0;
1140 
1141 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
1142 	if (ret)
1143 		return ret;
1144 
1145 	ret = ttm_bo_populate(&bo->ttm, &ctx);
1146 	if (ret)
1147 		goto err_res_free;
1148 
1149 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1150 	if (ret)
1151 		goto err_res_free;
1152 
1153 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
1154 	if (ret)
1155 		goto err_res_free;
1156 
1157 	return 0;
1158 
1159 err_res_free:
1160 	ttm_resource_free(&bo->ttm, &new_mem);
1161 	return ret;
1162 }
1163 
1164 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1165 				       unsigned long page_offset)
1166 {
1167 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1168 	struct xe_res_cursor cursor;
1169 	struct xe_mem_region *vram;
1170 
1171 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1172 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1173 
1174 	vram = res_to_mem_region(ttm_bo->resource);
1175 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1176 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1177 }
1178 
1179 static void __xe_bo_vunmap(struct xe_bo *bo);
1180 
1181 /*
1182  * TODO: Move this function to TTM so we don't rely on how TTM does its
1183  * locking, thereby abusing TTM internals.
1184  */
1185 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1186 {
1187 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1188 	bool locked;
1189 
1190 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1191 
1192 	/*
1193 	 * We can typically only race with TTM trylocking under the
1194 	 * lru_lock, which will immediately be unlocked again since
1195 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1196 	 * always succeed here, as long as we hold the lru lock.
1197 	 */
1198 	spin_lock(&ttm_bo->bdev->lru_lock);
1199 	locked = dma_resv_trylock(ttm_bo->base.resv);
1200 	spin_unlock(&ttm_bo->bdev->lru_lock);
1201 	xe_assert(xe, locked);
1202 
1203 	return locked;
1204 }
1205 
1206 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1207 {
1208 	struct dma_resv_iter cursor;
1209 	struct dma_fence *fence;
1210 	struct dma_fence *replacement = NULL;
1211 	struct xe_bo *bo;
1212 
1213 	if (!xe_bo_is_xe_bo(ttm_bo))
1214 		return;
1215 
1216 	bo = ttm_to_xe_bo(ttm_bo);
1217 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1218 
1219 	/*
1220 	 * Corner case where TTM fails to allocate memory and this BOs resv
1221 	 * still points the VMs resv
1222 	 */
1223 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1224 		return;
1225 
1226 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1227 		return;
1228 
1229 	/*
1230 	 * Scrub the preempt fences if any. The unbind fence is already
1231 	 * attached to the resv.
1232 	 * TODO: Don't do this for external bos once we scrub them after
1233 	 * unbind.
1234 	 */
1235 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1236 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1237 		if (xe_fence_is_xe_preempt(fence) &&
1238 		    !dma_fence_is_signaled(fence)) {
1239 			if (!replacement)
1240 				replacement = dma_fence_get_stub();
1241 
1242 			dma_resv_replace_fences(ttm_bo->base.resv,
1243 						fence->context,
1244 						replacement,
1245 						DMA_RESV_USAGE_BOOKKEEP);
1246 		}
1247 	}
1248 	dma_fence_put(replacement);
1249 
1250 	dma_resv_unlock(ttm_bo->base.resv);
1251 }
1252 
1253 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1254 {
1255 	if (!xe_bo_is_xe_bo(ttm_bo))
1256 		return;
1257 
1258 	/*
1259 	 * Object is idle and about to be destroyed. Release the
1260 	 * dma-buf attachment.
1261 	 */
1262 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1263 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1264 						       struct xe_ttm_tt, ttm);
1265 
1266 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1267 					 DMA_BIDIRECTIONAL);
1268 		ttm_bo->sg = NULL;
1269 		xe_tt->sg = NULL;
1270 	}
1271 }
1272 
1273 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1274 {
1275 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1276 
1277 	if (ttm_bo->ttm) {
1278 		struct ttm_placement place = {};
1279 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1280 
1281 		drm_WARN_ON(&xe->drm, ret);
1282 	}
1283 }
1284 
1285 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1286 {
1287 	struct ttm_operation_ctx ctx = {
1288 		.interruptible = false,
1289 		.gfp_retry_mayfail = false,
1290 	};
1291 
1292 	if (ttm_bo->ttm) {
1293 		struct xe_ttm_tt *xe_tt =
1294 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1295 
1296 		if (xe_tt->purgeable)
1297 			xe_ttm_bo_purge(ttm_bo, &ctx);
1298 	}
1299 }
1300 
1301 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1302 				unsigned long offset, void *buf, int len,
1303 				int write)
1304 {
1305 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1306 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1307 	struct iosys_map vmap;
1308 	struct xe_res_cursor cursor;
1309 	struct xe_mem_region *vram;
1310 	int bytes_left = len;
1311 
1312 	xe_bo_assert_held(bo);
1313 	xe_device_assert_mem_access(xe);
1314 
1315 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1316 		return -EIO;
1317 
1318 	/* FIXME: Use GPU for non-visible VRAM */
1319 	if (!xe_ttm_resource_visible(ttm_bo->resource))
1320 		return -EIO;
1321 
1322 	vram = res_to_mem_region(ttm_bo->resource);
1323 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1324 		     bo->size - (offset & PAGE_MASK), &cursor);
1325 
1326 	do {
1327 		unsigned long page_offset = (offset & ~PAGE_MASK);
1328 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1329 
1330 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1331 					  cursor.start);
1332 		if (write)
1333 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1334 		else
1335 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1336 
1337 		buf += byte_count;
1338 		offset += byte_count;
1339 		bytes_left -= byte_count;
1340 		if (bytes_left)
1341 			xe_res_next(&cursor, PAGE_SIZE);
1342 	} while (bytes_left);
1343 
1344 	return len;
1345 }
1346 
1347 const struct ttm_device_funcs xe_ttm_funcs = {
1348 	.ttm_tt_create = xe_ttm_tt_create,
1349 	.ttm_tt_populate = xe_ttm_tt_populate,
1350 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1351 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1352 	.evict_flags = xe_evict_flags,
1353 	.move = xe_bo_move,
1354 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1355 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1356 	.access_memory = xe_ttm_access_memory,
1357 	.release_notify = xe_ttm_bo_release_notify,
1358 	.eviction_valuable = ttm_bo_eviction_valuable,
1359 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1360 	.swap_notify = xe_ttm_bo_swap_notify,
1361 };
1362 
1363 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1364 {
1365 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1366 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1367 	struct xe_tile *tile;
1368 	u8 id;
1369 
1370 	if (bo->ttm.base.import_attach)
1371 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1372 	drm_gem_object_release(&bo->ttm.base);
1373 
1374 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1375 
1376 	for_each_tile(tile, xe, id)
1377 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1378 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1379 
1380 #ifdef CONFIG_PROC_FS
1381 	if (bo->client)
1382 		xe_drm_client_remove_bo(bo);
1383 #endif
1384 
1385 	if (bo->vm && xe_bo_is_user(bo))
1386 		xe_vm_put(bo->vm);
1387 
1388 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1389 	if (!list_empty(&bo->vram_userfault_link))
1390 		list_del(&bo->vram_userfault_link);
1391 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1392 
1393 	kfree(bo);
1394 }
1395 
1396 static void xe_gem_object_free(struct drm_gem_object *obj)
1397 {
1398 	/* Our BO reference counting scheme works as follows:
1399 	 *
1400 	 * The gem object kref is typically used throughout the driver,
1401 	 * and the gem object holds a ttm_buffer_object refcount, so
1402 	 * that when the last gem object reference is put, which is when
1403 	 * we end up in this function, we put also that ttm_buffer_object
1404 	 * refcount. Anything using gem interfaces is then no longer
1405 	 * allowed to access the object in a way that requires a gem
1406 	 * refcount, including locking the object.
1407 	 *
1408 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1409 	 * refcount directly if needed.
1410 	 */
1411 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1412 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1413 }
1414 
1415 static void xe_gem_object_close(struct drm_gem_object *obj,
1416 				struct drm_file *file_priv)
1417 {
1418 	struct xe_bo *bo = gem_to_xe_bo(obj);
1419 
1420 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1421 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1422 
1423 		xe_bo_lock(bo, false);
1424 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1425 		xe_bo_unlock(bo);
1426 	}
1427 }
1428 
1429 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1430 {
1431 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1432 	struct drm_device *ddev = tbo->base.dev;
1433 	struct xe_device *xe = to_xe_device(ddev);
1434 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1435 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1436 	vm_fault_t ret;
1437 	int idx;
1438 
1439 	if (needs_rpm)
1440 		xe_pm_runtime_get(xe);
1441 
1442 	ret = ttm_bo_vm_reserve(tbo, vmf);
1443 	if (ret)
1444 		goto out;
1445 
1446 	if (drm_dev_enter(ddev, &idx)) {
1447 		trace_xe_bo_cpu_fault(bo);
1448 
1449 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1450 					       TTM_BO_VM_NUM_PREFAULT);
1451 		drm_dev_exit(idx);
1452 	} else {
1453 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1454 	}
1455 
1456 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1457 		goto out;
1458 	/*
1459 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1460 	 */
1461 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1462 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1463 		if (list_empty(&bo->vram_userfault_link))
1464 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1465 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1466 	}
1467 
1468 	dma_resv_unlock(tbo->base.resv);
1469 out:
1470 	if (needs_rpm)
1471 		xe_pm_runtime_put(xe);
1472 
1473 	return ret;
1474 }
1475 
1476 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1477 			   void *buf, int len, int write)
1478 {
1479 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1480 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1481 	struct xe_device *xe = xe_bo_device(bo);
1482 	int ret;
1483 
1484 	xe_pm_runtime_get(xe);
1485 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1486 	xe_pm_runtime_put(xe);
1487 
1488 	return ret;
1489 }
1490 
1491 /**
1492  * xe_bo_read() - Read from an xe_bo
1493  * @bo: The buffer object to read from.
1494  * @offset: The byte offset to start reading from.
1495  * @dst: Location to store the read.
1496  * @size: Size in bytes for the read.
1497  *
1498  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1499  *
1500  * Return: Zero on success, or negative error.
1501  */
1502 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1503 {
1504 	int ret;
1505 
1506 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1507 	if (ret >= 0 && ret != size)
1508 		ret = -EIO;
1509 	else if (ret == size)
1510 		ret = 0;
1511 
1512 	return ret;
1513 }
1514 
1515 static const struct vm_operations_struct xe_gem_vm_ops = {
1516 	.fault = xe_gem_fault,
1517 	.open = ttm_bo_vm_open,
1518 	.close = ttm_bo_vm_close,
1519 	.access = xe_bo_vm_access,
1520 };
1521 
1522 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1523 	.free = xe_gem_object_free,
1524 	.close = xe_gem_object_close,
1525 	.mmap = drm_gem_ttm_mmap,
1526 	.export = xe_gem_prime_export,
1527 	.vm_ops = &xe_gem_vm_ops,
1528 };
1529 
1530 /**
1531  * xe_bo_alloc - Allocate storage for a struct xe_bo
1532  *
1533  * This function is intended to allocate storage to be used for input
1534  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1535  * created is needed before the call to __xe_bo_create_locked().
1536  * If __xe_bo_create_locked ends up never to be called, then the
1537  * storage allocated with this function needs to be freed using
1538  * xe_bo_free().
1539  *
1540  * Return: A pointer to an uninitialized struct xe_bo on success,
1541  * ERR_PTR(-ENOMEM) on error.
1542  */
1543 struct xe_bo *xe_bo_alloc(void)
1544 {
1545 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1546 
1547 	if (!bo)
1548 		return ERR_PTR(-ENOMEM);
1549 
1550 	return bo;
1551 }
1552 
1553 /**
1554  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1555  * @bo: The buffer object storage.
1556  *
1557  * Refer to xe_bo_alloc() documentation for valid use-cases.
1558  */
1559 void xe_bo_free(struct xe_bo *bo)
1560 {
1561 	kfree(bo);
1562 }
1563 
1564 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1565 				     struct xe_tile *tile, struct dma_resv *resv,
1566 				     struct ttm_lru_bulk_move *bulk, size_t size,
1567 				     u16 cpu_caching, enum ttm_bo_type type,
1568 				     u32 flags)
1569 {
1570 	struct ttm_operation_ctx ctx = {
1571 		.interruptible = true,
1572 		.no_wait_gpu = false,
1573 		.gfp_retry_mayfail = true,
1574 	};
1575 	struct ttm_placement *placement;
1576 	uint32_t alignment;
1577 	size_t aligned_size;
1578 	int err;
1579 
1580 	/* Only kernel objects should set GT */
1581 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1582 
1583 	if (XE_WARN_ON(!size)) {
1584 		xe_bo_free(bo);
1585 		return ERR_PTR(-EINVAL);
1586 	}
1587 
1588 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1589 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1590 		return ERR_PTR(-EINVAL);
1591 
1592 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1593 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1594 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1595 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1596 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1597 
1598 		aligned_size = ALIGN(size, align);
1599 		if (type != ttm_bo_type_device)
1600 			size = ALIGN(size, align);
1601 		flags |= XE_BO_FLAG_INTERNAL_64K;
1602 		alignment = align >> PAGE_SHIFT;
1603 	} else {
1604 		aligned_size = ALIGN(size, SZ_4K);
1605 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1606 		alignment = SZ_4K >> PAGE_SHIFT;
1607 	}
1608 
1609 	if (type == ttm_bo_type_device && aligned_size != size)
1610 		return ERR_PTR(-EINVAL);
1611 
1612 	if (!bo) {
1613 		bo = xe_bo_alloc();
1614 		if (IS_ERR(bo))
1615 			return bo;
1616 	}
1617 
1618 	bo->ccs_cleared = false;
1619 	bo->tile = tile;
1620 	bo->size = size;
1621 	bo->flags = flags;
1622 	bo->cpu_caching = cpu_caching;
1623 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1624 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1625 	INIT_LIST_HEAD(&bo->pinned_link);
1626 #ifdef CONFIG_PROC_FS
1627 	INIT_LIST_HEAD(&bo->client_link);
1628 #endif
1629 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1630 
1631 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1632 
1633 	if (resv) {
1634 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1635 		ctx.resv = resv;
1636 	}
1637 
1638 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1639 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1640 		if (WARN_ON(err)) {
1641 			xe_ttm_bo_destroy(&bo->ttm);
1642 			return ERR_PTR(err);
1643 		}
1644 	}
1645 
1646 	/* Defer populating type_sg bos */
1647 	placement = (type == ttm_bo_type_sg ||
1648 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1649 		&bo->placement;
1650 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1651 				   placement, alignment,
1652 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1653 	if (err)
1654 		return ERR_PTR(err);
1655 
1656 	/*
1657 	 * The VRAM pages underneath are potentially still being accessed by the
1658 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1659 	 * sure to add any corresponding move/clear fences into the objects
1660 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1661 	 *
1662 	 * For KMD internal buffers we don't care about GPU clearing, however we
1663 	 * still need to handle async evictions, where the VRAM is still being
1664 	 * accessed by the GPU. Most internal callers are not expecting this,
1665 	 * since they are missing the required synchronisation before accessing
1666 	 * the memory. To keep things simple just sync wait any kernel fences
1667 	 * here, if the buffer is designated KMD internal.
1668 	 *
1669 	 * For normal userspace objects we should already have the required
1670 	 * pipelining or sync waiting elsewhere, since we already have to deal
1671 	 * with things like async GPU clearing.
1672 	 */
1673 	if (type == ttm_bo_type_kernel) {
1674 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1675 						     DMA_RESV_USAGE_KERNEL,
1676 						     ctx.interruptible,
1677 						     MAX_SCHEDULE_TIMEOUT);
1678 
1679 		if (timeout < 0) {
1680 			if (!resv)
1681 				dma_resv_unlock(bo->ttm.base.resv);
1682 			xe_bo_put(bo);
1683 			return ERR_PTR(timeout);
1684 		}
1685 	}
1686 
1687 	bo->created = true;
1688 	if (bulk)
1689 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1690 	else
1691 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1692 
1693 	return bo;
1694 }
1695 
1696 static int __xe_bo_fixed_placement(struct xe_device *xe,
1697 				   struct xe_bo *bo,
1698 				   u32 flags,
1699 				   u64 start, u64 end, u64 size)
1700 {
1701 	struct ttm_place *place = bo->placements;
1702 
1703 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1704 		return -EINVAL;
1705 
1706 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1707 	place->fpfn = start >> PAGE_SHIFT;
1708 	place->lpfn = end >> PAGE_SHIFT;
1709 
1710 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1711 	case XE_BO_FLAG_VRAM0:
1712 		place->mem_type = XE_PL_VRAM0;
1713 		break;
1714 	case XE_BO_FLAG_VRAM1:
1715 		place->mem_type = XE_PL_VRAM1;
1716 		break;
1717 	case XE_BO_FLAG_STOLEN:
1718 		place->mem_type = XE_PL_STOLEN;
1719 		break;
1720 
1721 	default:
1722 		/* 0 or multiple of the above set */
1723 		return -EINVAL;
1724 	}
1725 
1726 	bo->placement = (struct ttm_placement) {
1727 		.num_placement = 1,
1728 		.placement = place,
1729 	};
1730 
1731 	return 0;
1732 }
1733 
1734 static struct xe_bo *
1735 __xe_bo_create_locked(struct xe_device *xe,
1736 		      struct xe_tile *tile, struct xe_vm *vm,
1737 		      size_t size, u64 start, u64 end,
1738 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
1739 		      u64 alignment)
1740 {
1741 	struct xe_bo *bo = NULL;
1742 	int err;
1743 
1744 	if (vm)
1745 		xe_vm_assert_held(vm);
1746 
1747 	if (start || end != ~0ULL) {
1748 		bo = xe_bo_alloc();
1749 		if (IS_ERR(bo))
1750 			return bo;
1751 
1752 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1753 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1754 		if (err) {
1755 			xe_bo_free(bo);
1756 			return ERR_PTR(err);
1757 		}
1758 	}
1759 
1760 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1761 				    vm && !xe_vm_in_fault_mode(vm) &&
1762 				    flags & XE_BO_FLAG_USER ?
1763 				    &vm->lru_bulk_move : NULL, size,
1764 				    cpu_caching, type, flags);
1765 	if (IS_ERR(bo))
1766 		return bo;
1767 
1768 	bo->min_align = alignment;
1769 
1770 	/*
1771 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1772 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1773 	 * will keep a reference to the vm, and avoid circular references
1774 	 * by having all the vm's bo refereferences released at vm close
1775 	 * time.
1776 	 */
1777 	if (vm && xe_bo_is_user(bo))
1778 		xe_vm_get(vm);
1779 	bo->vm = vm;
1780 
1781 	if (bo->flags & XE_BO_FLAG_GGTT) {
1782 		struct xe_tile *t;
1783 		u8 id;
1784 
1785 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
1786 			if (!tile && flags & XE_BO_FLAG_STOLEN)
1787 				tile = xe_device_get_root_tile(xe);
1788 
1789 			xe_assert(xe, tile);
1790 		}
1791 
1792 		for_each_tile(t, xe, id) {
1793 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
1794 				continue;
1795 
1796 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1797 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
1798 							   start + bo->size, U64_MAX);
1799 			} else {
1800 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
1801 			}
1802 			if (err)
1803 				goto err_unlock_put_bo;
1804 		}
1805 	}
1806 
1807 	return bo;
1808 
1809 err_unlock_put_bo:
1810 	__xe_bo_unset_bulk_move(bo);
1811 	xe_bo_unlock_vm_held(bo);
1812 	xe_bo_put(bo);
1813 	return ERR_PTR(err);
1814 }
1815 
1816 struct xe_bo *
1817 xe_bo_create_locked_range(struct xe_device *xe,
1818 			  struct xe_tile *tile, struct xe_vm *vm,
1819 			  size_t size, u64 start, u64 end,
1820 			  enum ttm_bo_type type, u32 flags, u64 alignment)
1821 {
1822 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
1823 				     flags, alignment);
1824 }
1825 
1826 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1827 				  struct xe_vm *vm, size_t size,
1828 				  enum ttm_bo_type type, u32 flags)
1829 {
1830 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
1831 				     flags, 0);
1832 }
1833 
1834 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1835 				struct xe_vm *vm, size_t size,
1836 				u16 cpu_caching,
1837 				u32 flags)
1838 {
1839 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1840 						 cpu_caching, ttm_bo_type_device,
1841 						 flags | XE_BO_FLAG_USER, 0);
1842 	if (!IS_ERR(bo))
1843 		xe_bo_unlock_vm_held(bo);
1844 
1845 	return bo;
1846 }
1847 
1848 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1849 			   struct xe_vm *vm, size_t size,
1850 			   enum ttm_bo_type type, u32 flags)
1851 {
1852 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1853 
1854 	if (!IS_ERR(bo))
1855 		xe_bo_unlock_vm_held(bo);
1856 
1857 	return bo;
1858 }
1859 
1860 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1861 				      struct xe_vm *vm,
1862 				      size_t size, u64 offset,
1863 				      enum ttm_bo_type type, u32 flags)
1864 {
1865 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
1866 					       type, flags, 0);
1867 }
1868 
1869 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
1870 					      struct xe_tile *tile,
1871 					      struct xe_vm *vm,
1872 					      size_t size, u64 offset,
1873 					      enum ttm_bo_type type, u32 flags,
1874 					      u64 alignment)
1875 {
1876 	struct xe_bo *bo;
1877 	int err;
1878 	u64 start = offset == ~0ull ? 0 : offset;
1879 	u64 end = offset == ~0ull ? offset : start + size;
1880 
1881 	if (flags & XE_BO_FLAG_STOLEN &&
1882 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1883 		flags |= XE_BO_FLAG_GGTT;
1884 
1885 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1886 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
1887 				       alignment);
1888 	if (IS_ERR(bo))
1889 		return bo;
1890 
1891 	err = xe_bo_pin(bo);
1892 	if (err)
1893 		goto err_put;
1894 
1895 	err = xe_bo_vmap(bo);
1896 	if (err)
1897 		goto err_unpin;
1898 
1899 	xe_bo_unlock_vm_held(bo);
1900 
1901 	return bo;
1902 
1903 err_unpin:
1904 	xe_bo_unpin(bo);
1905 err_put:
1906 	xe_bo_unlock_vm_held(bo);
1907 	xe_bo_put(bo);
1908 	return ERR_PTR(err);
1909 }
1910 
1911 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1912 				   struct xe_vm *vm, size_t size,
1913 				   enum ttm_bo_type type, u32 flags)
1914 {
1915 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1916 }
1917 
1918 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1919 				     const void *data, size_t size,
1920 				     enum ttm_bo_type type, u32 flags)
1921 {
1922 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1923 						ALIGN(size, PAGE_SIZE),
1924 						type, flags);
1925 	if (IS_ERR(bo))
1926 		return bo;
1927 
1928 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1929 
1930 	return bo;
1931 }
1932 
1933 static void __xe_bo_unpin_map_no_vm(void *arg)
1934 {
1935 	xe_bo_unpin_map_no_vm(arg);
1936 }
1937 
1938 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1939 					   size_t size, u32 flags)
1940 {
1941 	struct xe_bo *bo;
1942 	int ret;
1943 
1944 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1945 	if (IS_ERR(bo))
1946 		return bo;
1947 
1948 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
1949 	if (ret)
1950 		return ERR_PTR(ret);
1951 
1952 	return bo;
1953 }
1954 
1955 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1956 					     const void *data, size_t size, u32 flags)
1957 {
1958 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1959 
1960 	if (IS_ERR(bo))
1961 		return bo;
1962 
1963 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1964 
1965 	return bo;
1966 }
1967 
1968 /**
1969  * xe_managed_bo_reinit_in_vram
1970  * @xe: xe device
1971  * @tile: Tile where the new buffer will be created
1972  * @src: Managed buffer object allocated in system memory
1973  *
1974  * Replace a managed src buffer object allocated in system memory with a new
1975  * one allocated in vram, copying the data between them.
1976  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1977  * is responsible for making sure that any old references to it are updated.
1978  *
1979  * Returns 0 for success, negative error code otherwise.
1980  */
1981 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1982 {
1983 	struct xe_bo *bo;
1984 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1985 
1986 	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1987 
1988 	xe_assert(xe, IS_DGFX(xe));
1989 	xe_assert(xe, !(*src)->vmap.is_iomem);
1990 
1991 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1992 					    (*src)->size, dst_flags);
1993 	if (IS_ERR(bo))
1994 		return PTR_ERR(bo);
1995 
1996 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
1997 	*src = bo;
1998 
1999 	return 0;
2000 }
2001 
2002 /*
2003  * XXX: This is in the VM bind data path, likely should calculate this once and
2004  * store, with a recalculation if the BO is moved.
2005  */
2006 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2007 {
2008 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2009 
2010 	if (res->mem_type == XE_PL_STOLEN)
2011 		return xe_ttm_stolen_gpu_offset(xe);
2012 
2013 	return res_to_mem_region(res)->dpa_base;
2014 }
2015 
2016 /**
2017  * xe_bo_pin_external - pin an external BO
2018  * @bo: buffer object to be pinned
2019  *
2020  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2021  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2022  * asserts and code to ensure evict / restore on suspend / resume.
2023  *
2024  * Returns 0 for success, negative error code otherwise.
2025  */
2026 int xe_bo_pin_external(struct xe_bo *bo)
2027 {
2028 	struct xe_device *xe = xe_bo_device(bo);
2029 	int err;
2030 
2031 	xe_assert(xe, !bo->vm);
2032 	xe_assert(xe, xe_bo_is_user(bo));
2033 
2034 	if (!xe_bo_is_pinned(bo)) {
2035 		err = xe_bo_validate(bo, NULL, false);
2036 		if (err)
2037 			return err;
2038 
2039 		if (xe_bo_is_vram(bo)) {
2040 			spin_lock(&xe->pinned.lock);
2041 			list_add_tail(&bo->pinned_link,
2042 				      &xe->pinned.external_vram);
2043 			spin_unlock(&xe->pinned.lock);
2044 		}
2045 	}
2046 
2047 	ttm_bo_pin(&bo->ttm);
2048 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2049 		xe_ttm_tt_account_subtract(bo->ttm.ttm);
2050 
2051 	/*
2052 	 * FIXME: If we always use the reserve / unreserve functions for locking
2053 	 * we do not need this.
2054 	 */
2055 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2056 
2057 	return 0;
2058 }
2059 
2060 int xe_bo_pin(struct xe_bo *bo)
2061 {
2062 	struct ttm_place *place = &bo->placements[0];
2063 	struct xe_device *xe = xe_bo_device(bo);
2064 	int err;
2065 
2066 	/* We currently don't expect user BO to be pinned */
2067 	xe_assert(xe, !xe_bo_is_user(bo));
2068 
2069 	/* Pinned object must be in GGTT or have pinned flag */
2070 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2071 				   XE_BO_FLAG_GGTT));
2072 
2073 	/*
2074 	 * No reason we can't support pinning imported dma-bufs we just don't
2075 	 * expect to pin an imported dma-buf.
2076 	 */
2077 	xe_assert(xe, !bo->ttm.base.import_attach);
2078 
2079 	/* We only expect at most 1 pin */
2080 	xe_assert(xe, !xe_bo_is_pinned(bo));
2081 
2082 	err = xe_bo_validate(bo, NULL, false);
2083 	if (err)
2084 		return err;
2085 
2086 	/*
2087 	 * For pinned objects in on DGFX, which are also in vram, we expect
2088 	 * these to be in contiguous VRAM memory. Required eviction / restore
2089 	 * during suspend / resume (force restore to same physical address).
2090 	 */
2091 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
2092 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
2093 		if (mem_type_is_vram(place->mem_type)) {
2094 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
2095 
2096 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
2097 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
2098 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
2099 		}
2100 	}
2101 
2102 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2103 		spin_lock(&xe->pinned.lock);
2104 		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
2105 		spin_unlock(&xe->pinned.lock);
2106 	}
2107 
2108 	ttm_bo_pin(&bo->ttm);
2109 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2110 		xe_ttm_tt_account_subtract(bo->ttm.ttm);
2111 
2112 	/*
2113 	 * FIXME: If we always use the reserve / unreserve functions for locking
2114 	 * we do not need this.
2115 	 */
2116 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2117 
2118 	return 0;
2119 }
2120 
2121 /**
2122  * xe_bo_unpin_external - unpin an external BO
2123  * @bo: buffer object to be unpinned
2124  *
2125  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2126  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2127  * asserts and code to ensure evict / restore on suspend / resume.
2128  *
2129  * Returns 0 for success, negative error code otherwise.
2130  */
2131 void xe_bo_unpin_external(struct xe_bo *bo)
2132 {
2133 	struct xe_device *xe = xe_bo_device(bo);
2134 
2135 	xe_assert(xe, !bo->vm);
2136 	xe_assert(xe, xe_bo_is_pinned(bo));
2137 	xe_assert(xe, xe_bo_is_user(bo));
2138 
2139 	spin_lock(&xe->pinned.lock);
2140 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2141 		list_del_init(&bo->pinned_link);
2142 	spin_unlock(&xe->pinned.lock);
2143 
2144 	ttm_bo_unpin(&bo->ttm);
2145 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2146 		xe_ttm_tt_account_add(bo->ttm.ttm);
2147 
2148 	/*
2149 	 * FIXME: If we always use the reserve / unreserve functions for locking
2150 	 * we do not need this.
2151 	 */
2152 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2153 }
2154 
2155 void xe_bo_unpin(struct xe_bo *bo)
2156 {
2157 	struct ttm_place *place = &bo->placements[0];
2158 	struct xe_device *xe = xe_bo_device(bo);
2159 
2160 	xe_assert(xe, !bo->ttm.base.import_attach);
2161 	xe_assert(xe, xe_bo_is_pinned(bo));
2162 
2163 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2164 		spin_lock(&xe->pinned.lock);
2165 		xe_assert(xe, !list_empty(&bo->pinned_link));
2166 		list_del_init(&bo->pinned_link);
2167 		spin_unlock(&xe->pinned.lock);
2168 	}
2169 	ttm_bo_unpin(&bo->ttm);
2170 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2171 		xe_ttm_tt_account_add(bo->ttm.ttm);
2172 }
2173 
2174 /**
2175  * xe_bo_validate() - Make sure the bo is in an allowed placement
2176  * @bo: The bo,
2177  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2178  *      NULL. Used together with @allow_res_evict.
2179  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2180  *                   reservation object.
2181  *
2182  * Make sure the bo is in allowed placement, migrating it if necessary. If
2183  * needed, other bos will be evicted. If bos selected for eviction shares
2184  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2185  * set to true, otherwise they will be bypassed.
2186  *
2187  * Return: 0 on success, negative error code on failure. May return
2188  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2189  */
2190 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2191 {
2192 	struct ttm_operation_ctx ctx = {
2193 		.interruptible = true,
2194 		.no_wait_gpu = false,
2195 		.gfp_retry_mayfail = true,
2196 	};
2197 
2198 	if (vm) {
2199 		lockdep_assert_held(&vm->lock);
2200 		xe_vm_assert_held(vm);
2201 
2202 		ctx.allow_res_evict = allow_res_evict;
2203 		ctx.resv = xe_vm_resv(vm);
2204 	}
2205 
2206 	trace_xe_bo_validate(bo);
2207 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2208 }
2209 
2210 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2211 {
2212 	if (bo->destroy == &xe_ttm_bo_destroy)
2213 		return true;
2214 
2215 	return false;
2216 }
2217 
2218 /*
2219  * Resolve a BO address. There is no assert to check if the proper lock is held
2220  * so it should only be used in cases where it is not fatal to get the wrong
2221  * address, such as printing debug information, but not in cases where memory is
2222  * written based on this result.
2223  */
2224 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2225 {
2226 	struct xe_device *xe = xe_bo_device(bo);
2227 	struct xe_res_cursor cur;
2228 	u64 page;
2229 
2230 	xe_assert(xe, page_size <= PAGE_SIZE);
2231 	page = offset >> PAGE_SHIFT;
2232 	offset &= (PAGE_SIZE - 1);
2233 
2234 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2235 		xe_assert(xe, bo->ttm.ttm);
2236 
2237 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2238 				page_size, &cur);
2239 		return xe_res_dma(&cur) + offset;
2240 	} else {
2241 		struct xe_res_cursor cur;
2242 
2243 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2244 			     page_size, &cur);
2245 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2246 	}
2247 }
2248 
2249 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2250 {
2251 	if (!READ_ONCE(bo->ttm.pin_count))
2252 		xe_bo_assert_held(bo);
2253 	return __xe_bo_addr(bo, offset, page_size);
2254 }
2255 
2256 int xe_bo_vmap(struct xe_bo *bo)
2257 {
2258 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2259 	void *virtual;
2260 	bool is_iomem;
2261 	int ret;
2262 
2263 	xe_bo_assert_held(bo);
2264 
2265 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2266 			!force_contiguous(bo->flags)))
2267 		return -EINVAL;
2268 
2269 	if (!iosys_map_is_null(&bo->vmap))
2270 		return 0;
2271 
2272 	/*
2273 	 * We use this more or less deprecated interface for now since
2274 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2275 	 * single page bos, which is done here.
2276 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2277 	 * to use struct iosys_map.
2278 	 */
2279 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
2280 	if (ret)
2281 		return ret;
2282 
2283 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2284 	if (is_iomem)
2285 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2286 	else
2287 		iosys_map_set_vaddr(&bo->vmap, virtual);
2288 
2289 	return 0;
2290 }
2291 
2292 static void __xe_bo_vunmap(struct xe_bo *bo)
2293 {
2294 	if (!iosys_map_is_null(&bo->vmap)) {
2295 		iosys_map_clear(&bo->vmap);
2296 		ttm_bo_kunmap(&bo->kmap);
2297 	}
2298 }
2299 
2300 void xe_bo_vunmap(struct xe_bo *bo)
2301 {
2302 	xe_bo_assert_held(bo);
2303 	__xe_bo_vunmap(bo);
2304 }
2305 
2306 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2307 			struct drm_file *file)
2308 {
2309 	struct xe_device *xe = to_xe_device(dev);
2310 	struct xe_file *xef = to_xe_file(file);
2311 	struct drm_xe_gem_create *args = data;
2312 	struct xe_vm *vm = NULL;
2313 	struct xe_bo *bo;
2314 	unsigned int bo_flags;
2315 	u32 handle;
2316 	int err;
2317 
2318 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2319 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2320 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2321 		return -EINVAL;
2322 
2323 	/* at least one valid memory placement must be specified */
2324 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2325 			 !args->placement))
2326 		return -EINVAL;
2327 
2328 	if (XE_IOCTL_DBG(xe, args->flags &
2329 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2330 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2331 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2332 		return -EINVAL;
2333 
2334 	if (XE_IOCTL_DBG(xe, args->handle))
2335 		return -EINVAL;
2336 
2337 	if (XE_IOCTL_DBG(xe, !args->size))
2338 		return -EINVAL;
2339 
2340 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2341 		return -EINVAL;
2342 
2343 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2344 		return -EINVAL;
2345 
2346 	bo_flags = 0;
2347 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2348 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2349 
2350 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2351 		bo_flags |= XE_BO_FLAG_SCANOUT;
2352 
2353 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2354 
2355 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2356 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2357 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2358 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2359 	    IS_ALIGNED(args->size, SZ_64K))
2360 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2361 
2362 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2363 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2364 			return -EINVAL;
2365 
2366 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2367 	}
2368 
2369 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2370 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2371 		return -EINVAL;
2372 
2373 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2374 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2375 		return -EINVAL;
2376 
2377 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2378 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2379 		return -EINVAL;
2380 
2381 	if (args->vm_id) {
2382 		vm = xe_vm_lookup(xef, args->vm_id);
2383 		if (XE_IOCTL_DBG(xe, !vm))
2384 			return -ENOENT;
2385 		err = xe_vm_lock(vm, true);
2386 		if (err)
2387 			goto out_vm;
2388 	}
2389 
2390 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2391 			       bo_flags);
2392 
2393 	if (vm)
2394 		xe_vm_unlock(vm);
2395 
2396 	if (IS_ERR(bo)) {
2397 		err = PTR_ERR(bo);
2398 		goto out_vm;
2399 	}
2400 
2401 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2402 	if (err)
2403 		goto out_bulk;
2404 
2405 	args->handle = handle;
2406 	goto out_put;
2407 
2408 out_bulk:
2409 	if (vm && !xe_vm_in_fault_mode(vm)) {
2410 		xe_vm_lock(vm, false);
2411 		__xe_bo_unset_bulk_move(bo);
2412 		xe_vm_unlock(vm);
2413 	}
2414 out_put:
2415 	xe_bo_put(bo);
2416 out_vm:
2417 	if (vm)
2418 		xe_vm_put(vm);
2419 
2420 	return err;
2421 }
2422 
2423 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2424 			     struct drm_file *file)
2425 {
2426 	struct xe_device *xe = to_xe_device(dev);
2427 	struct drm_xe_gem_mmap_offset *args = data;
2428 	struct drm_gem_object *gem_obj;
2429 
2430 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2431 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2432 		return -EINVAL;
2433 
2434 	if (XE_IOCTL_DBG(xe, args->flags))
2435 		return -EINVAL;
2436 
2437 	gem_obj = drm_gem_object_lookup(file, args->handle);
2438 	if (XE_IOCTL_DBG(xe, !gem_obj))
2439 		return -ENOENT;
2440 
2441 	/* The mmap offset was set up at BO allocation time. */
2442 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2443 
2444 	xe_bo_put(gem_to_xe_bo(gem_obj));
2445 	return 0;
2446 }
2447 
2448 /**
2449  * xe_bo_lock() - Lock the buffer object's dma_resv object
2450  * @bo: The struct xe_bo whose lock is to be taken
2451  * @intr: Whether to perform any wait interruptible
2452  *
2453  * Locks the buffer object's dma_resv object. If the buffer object is
2454  * pointing to a shared dma_resv object, that shared lock is locked.
2455  *
2456  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2457  * contended lock was interrupted. If @intr is set to false, the
2458  * function always returns 0.
2459  */
2460 int xe_bo_lock(struct xe_bo *bo, bool intr)
2461 {
2462 	if (intr)
2463 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2464 
2465 	dma_resv_lock(bo->ttm.base.resv, NULL);
2466 
2467 	return 0;
2468 }
2469 
2470 /**
2471  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2472  * @bo: The struct xe_bo whose lock is to be released.
2473  *
2474  * Unlock a buffer object lock that was locked by xe_bo_lock().
2475  */
2476 void xe_bo_unlock(struct xe_bo *bo)
2477 {
2478 	dma_resv_unlock(bo->ttm.base.resv);
2479 }
2480 
2481 /**
2482  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2483  * @bo: The buffer object to migrate
2484  * @mem_type: The TTM memory type intended to migrate to
2485  *
2486  * Check whether the buffer object supports migration to the
2487  * given memory type. Note that pinning may affect the ability to migrate as
2488  * returned by this function.
2489  *
2490  * This function is primarily intended as a helper for checking the
2491  * possibility to migrate buffer objects and can be called without
2492  * the object lock held.
2493  *
2494  * Return: true if migration is possible, false otherwise.
2495  */
2496 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2497 {
2498 	unsigned int cur_place;
2499 
2500 	if (bo->ttm.type == ttm_bo_type_kernel)
2501 		return true;
2502 
2503 	if (bo->ttm.type == ttm_bo_type_sg)
2504 		return false;
2505 
2506 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2507 	     cur_place++) {
2508 		if (bo->placements[cur_place].mem_type == mem_type)
2509 			return true;
2510 	}
2511 
2512 	return false;
2513 }
2514 
2515 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2516 {
2517 	memset(place, 0, sizeof(*place));
2518 	place->mem_type = mem_type;
2519 }
2520 
2521 /**
2522  * xe_bo_migrate - Migrate an object to the desired region id
2523  * @bo: The buffer object to migrate.
2524  * @mem_type: The TTM region type to migrate to.
2525  *
2526  * Attempt to migrate the buffer object to the desired memory region. The
2527  * buffer object may not be pinned, and must be locked.
2528  * On successful completion, the object memory type will be updated,
2529  * but an async migration task may not have completed yet, and to
2530  * accomplish that, the object's kernel fences must be signaled with
2531  * the object lock held.
2532  *
2533  * Return: 0 on success. Negative error code on failure. In particular may
2534  * return -EINTR or -ERESTARTSYS if signal pending.
2535  */
2536 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2537 {
2538 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2539 	struct ttm_operation_ctx ctx = {
2540 		.interruptible = true,
2541 		.no_wait_gpu = false,
2542 		.gfp_retry_mayfail = true,
2543 	};
2544 	struct ttm_placement placement;
2545 	struct ttm_place requested;
2546 
2547 	xe_bo_assert_held(bo);
2548 
2549 	if (bo->ttm.resource->mem_type == mem_type)
2550 		return 0;
2551 
2552 	if (xe_bo_is_pinned(bo))
2553 		return -EBUSY;
2554 
2555 	if (!xe_bo_can_migrate(bo, mem_type))
2556 		return -EINVAL;
2557 
2558 	xe_place_from_ttm_type(mem_type, &requested);
2559 	placement.num_placement = 1;
2560 	placement.placement = &requested;
2561 
2562 	/*
2563 	 * Stolen needs to be handled like below VRAM handling if we ever need
2564 	 * to support it.
2565 	 */
2566 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2567 
2568 	if (mem_type_is_vram(mem_type)) {
2569 		u32 c = 0;
2570 
2571 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2572 	}
2573 
2574 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2575 }
2576 
2577 /**
2578  * xe_bo_evict - Evict an object to evict placement
2579  * @bo: The buffer object to migrate.
2580  * @force_alloc: Set force_alloc in ttm_operation_ctx
2581  *
2582  * On successful completion, the object memory will be moved to evict
2583  * placement. This function blocks until the object has been fully moved.
2584  *
2585  * Return: 0 on success. Negative error code on failure.
2586  */
2587 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2588 {
2589 	struct ttm_operation_ctx ctx = {
2590 		.interruptible = false,
2591 		.no_wait_gpu = false,
2592 		.force_alloc = force_alloc,
2593 		.gfp_retry_mayfail = true,
2594 	};
2595 	struct ttm_placement placement;
2596 	int ret;
2597 
2598 	xe_evict_flags(&bo->ttm, &placement);
2599 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2600 	if (ret)
2601 		return ret;
2602 
2603 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2604 			      false, MAX_SCHEDULE_TIMEOUT);
2605 
2606 	return 0;
2607 }
2608 
2609 /**
2610  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2611  * placed in system memory.
2612  * @bo: The xe_bo
2613  *
2614  * Return: true if extra pages need to be allocated, false otherwise.
2615  */
2616 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2617 {
2618 	struct xe_device *xe = xe_bo_device(bo);
2619 
2620 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2621 		return false;
2622 
2623 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2624 		return false;
2625 
2626 	/* On discrete GPUs, if the GPU can access this buffer from
2627 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2628 	 * can't be used since there's no CCS storage associated with
2629 	 * non-VRAM addresses.
2630 	 */
2631 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2632 		return false;
2633 
2634 	return true;
2635 }
2636 
2637 /**
2638  * __xe_bo_release_dummy() - Dummy kref release function
2639  * @kref: The embedded struct kref.
2640  *
2641  * Dummy release function for xe_bo_put_deferred(). Keep off.
2642  */
2643 void __xe_bo_release_dummy(struct kref *kref)
2644 {
2645 }
2646 
2647 /**
2648  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2649  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2650  *
2651  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2652  * The @deferred list can be either an onstack local list or a global
2653  * shared list used by a workqueue.
2654  */
2655 void xe_bo_put_commit(struct llist_head *deferred)
2656 {
2657 	struct llist_node *freed;
2658 	struct xe_bo *bo, *next;
2659 
2660 	if (!deferred)
2661 		return;
2662 
2663 	freed = llist_del_all(deferred);
2664 	if (!freed)
2665 		return;
2666 
2667 	llist_for_each_entry_safe(bo, next, freed, freed)
2668 		drm_gem_object_free(&bo->ttm.base.refcount);
2669 }
2670 
2671 void xe_bo_put(struct xe_bo *bo)
2672 {
2673 	struct xe_tile *tile;
2674 	u8 id;
2675 
2676 	might_sleep();
2677 	if (bo) {
2678 #ifdef CONFIG_PROC_FS
2679 		if (bo->client)
2680 			might_lock(&bo->client->bos_lock);
2681 #endif
2682 		for_each_tile(tile, xe_bo_device(bo), id)
2683 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
2684 				might_lock(&bo->ggtt_node[id]->ggtt->lock);
2685 		drm_gem_object_put(&bo->ttm.base);
2686 	}
2687 }
2688 
2689 /**
2690  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2691  * @file_priv: ...
2692  * @dev: ...
2693  * @args: ...
2694  *
2695  * See dumb_create() hook in include/drm/drm_drv.h
2696  *
2697  * Return: ...
2698  */
2699 int xe_bo_dumb_create(struct drm_file *file_priv,
2700 		      struct drm_device *dev,
2701 		      struct drm_mode_create_dumb *args)
2702 {
2703 	struct xe_device *xe = to_xe_device(dev);
2704 	struct xe_bo *bo;
2705 	uint32_t handle;
2706 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2707 	int err;
2708 	u32 page_size = max_t(u32, PAGE_SIZE,
2709 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2710 
2711 	args->pitch = ALIGN(args->width * cpp, 64);
2712 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2713 			   page_size);
2714 
2715 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2716 			       DRM_XE_GEM_CPU_CACHING_WC,
2717 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2718 			       XE_BO_FLAG_SCANOUT |
2719 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
2720 	if (IS_ERR(bo))
2721 		return PTR_ERR(bo);
2722 
2723 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2724 	/* drop reference from allocate - handle holds it now */
2725 	drm_gem_object_put(&bo->ttm.base);
2726 	if (!err)
2727 		args->handle = handle;
2728 	return err;
2729 }
2730 
2731 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2732 {
2733 	struct ttm_buffer_object *tbo = &bo->ttm;
2734 	struct ttm_device *bdev = tbo->bdev;
2735 
2736 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2737 
2738 	list_del_init(&bo->vram_userfault_link);
2739 }
2740 
2741 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2742 #include "tests/xe_bo.c"
2743 #endif
2744