xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 1697398555f69b31e939e070b304292513d4c9ff)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_gem_ttm_helper.h>
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_backup.h>
15 #include <drm/ttm/ttm_device.h>
16 #include <drm/ttm/ttm_placement.h>
17 #include <drm/ttm/ttm_tt.h>
18 #include <uapi/drm/xe_drm.h>
19 
20 #include <kunit/static_stub.h>
21 
22 #include <trace/events/gpu_mem.h>
23 
24 #include "xe_device.h"
25 #include "xe_dma_buf.h"
26 #include "xe_drm_client.h"
27 #include "xe_ggtt.h"
28 #include "xe_gt.h"
29 #include "xe_map.h"
30 #include "xe_migrate.h"
31 #include "xe_pm.h"
32 #include "xe_preempt_fence.h"
33 #include "xe_pxp.h"
34 #include "xe_res_cursor.h"
35 #include "xe_shrinker.h"
36 #include "xe_sriov_vf_ccs.h"
37 #include "xe_trace_bo.h"
38 #include "xe_ttm_stolen_mgr.h"
39 #include "xe_vm.h"
40 #include "xe_vram_types.h"
41 
42 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
43 	[XE_PL_SYSTEM] = "system",
44 	[XE_PL_TT] = "gtt",
45 	[XE_PL_VRAM0] = "vram0",
46 	[XE_PL_VRAM1] = "vram1",
47 	[XE_PL_STOLEN] = "stolen"
48 };
49 
50 static const struct ttm_place sys_placement_flags = {
51 	.fpfn = 0,
52 	.lpfn = 0,
53 	.mem_type = XE_PL_SYSTEM,
54 	.flags = 0,
55 };
56 
57 static struct ttm_placement sys_placement = {
58 	.num_placement = 1,
59 	.placement = &sys_placement_flags,
60 };
61 
62 static struct ttm_placement purge_placement;
63 
64 static const struct ttm_place tt_placement_flags[] = {
65 	{
66 		.fpfn = 0,
67 		.lpfn = 0,
68 		.mem_type = XE_PL_TT,
69 		.flags = TTM_PL_FLAG_DESIRED,
70 	},
71 	{
72 		.fpfn = 0,
73 		.lpfn = 0,
74 		.mem_type = XE_PL_SYSTEM,
75 		.flags = TTM_PL_FLAG_FALLBACK,
76 	}
77 };
78 
79 static struct ttm_placement tt_placement = {
80 	.num_placement = 2,
81 	.placement = tt_placement_flags,
82 };
83 
84 bool mem_type_is_vram(u32 mem_type)
85 {
86 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
87 }
88 
89 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
90 {
91 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
92 }
93 
94 static bool resource_is_vram(struct ttm_resource *res)
95 {
96 	return mem_type_is_vram(res->mem_type);
97 }
98 
99 bool xe_bo_is_vram(struct xe_bo *bo)
100 {
101 	return resource_is_vram(bo->ttm.resource) ||
102 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
103 }
104 
105 bool xe_bo_is_stolen(struct xe_bo *bo)
106 {
107 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
108 }
109 
110 /**
111  * xe_bo_has_single_placement - check if BO is placed only in one memory location
112  * @bo: The BO
113  *
114  * This function checks whether a given BO is placed in only one memory location.
115  *
116  * Returns: true if the BO is placed in a single memory location, false otherwise.
117  *
118  */
119 bool xe_bo_has_single_placement(struct xe_bo *bo)
120 {
121 	return bo->placement.num_placement == 1;
122 }
123 
124 /**
125  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
126  * @bo: The BO
127  *
128  * The stolen memory is accessed through the PCI BAR for both DGFX and some
129  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
130  *
131  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
132  */
133 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
134 {
135 	return xe_bo_is_stolen(bo) &&
136 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
137 }
138 
139 /**
140  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
141  * @bo: The BO
142  *
143  * Check if a given bo is bound through VM_BIND. This requires the
144  * reservation lock for the BO to be held.
145  *
146  * Returns: boolean
147  */
148 bool xe_bo_is_vm_bound(struct xe_bo *bo)
149 {
150 	xe_bo_assert_held(bo);
151 
152 	return !list_empty(&bo->ttm.base.gpuva.list);
153 }
154 
155 static bool xe_bo_is_user(struct xe_bo *bo)
156 {
157 	return bo->flags & XE_BO_FLAG_USER;
158 }
159 
160 static struct xe_migrate *
161 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
162 {
163 	struct xe_tile *tile;
164 
165 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
166 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
167 	return tile->migrate;
168 }
169 
170 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
171 {
172 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
173 	struct ttm_resource_manager *mgr;
174 	struct xe_ttm_vram_mgr *vram_mgr;
175 
176 	xe_assert(xe, resource_is_vram(res));
177 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
178 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
179 
180 	return container_of(vram_mgr, struct xe_vram_region, ttm);
181 }
182 
183 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
184 			   u32 bo_flags, u32 *c)
185 {
186 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
187 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
188 
189 		bo->placements[*c] = (struct ttm_place) {
190 			.mem_type = XE_PL_TT,
191 		};
192 		*c += 1;
193 	}
194 }
195 
196 static bool force_contiguous(u32 bo_flags)
197 {
198 	if (bo_flags & XE_BO_FLAG_STOLEN)
199 		return true; /* users expect this */
200 	else if (bo_flags & XE_BO_FLAG_PINNED &&
201 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
202 		return true; /* needs vmap */
203 	else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
204 		return true;
205 
206 	/*
207 	 * For eviction / restore on suspend / resume objects pinned in VRAM
208 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
209 	 */
210 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
211 	       bo_flags & XE_BO_FLAG_PINNED;
212 }
213 
214 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
215 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
216 {
217 	struct ttm_place place = { .mem_type = mem_type };
218 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
219 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
220 
221 	struct xe_vram_region *vram;
222 	u64 io_size;
223 
224 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
225 
226 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
227 	xe_assert(xe, vram && vram->usable_size);
228 	io_size = vram->io_size;
229 
230 	if (force_contiguous(bo_flags))
231 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
232 
233 	if (io_size < vram->usable_size) {
234 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
235 			place.fpfn = 0;
236 			place.lpfn = io_size >> PAGE_SHIFT;
237 		} else {
238 			place.flags |= TTM_PL_FLAG_TOPDOWN;
239 		}
240 	}
241 	places[*c] = place;
242 	*c += 1;
243 }
244 
245 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
246 			 u32 bo_flags, u32 *c)
247 {
248 	if (bo_flags & XE_BO_FLAG_VRAM0)
249 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
250 	if (bo_flags & XE_BO_FLAG_VRAM1)
251 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
252 }
253 
254 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
255 			   u32 bo_flags, u32 *c)
256 {
257 	if (bo_flags & XE_BO_FLAG_STOLEN) {
258 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
259 
260 		bo->placements[*c] = (struct ttm_place) {
261 			.mem_type = XE_PL_STOLEN,
262 			.flags = force_contiguous(bo_flags) ?
263 				TTM_PL_FLAG_CONTIGUOUS : 0,
264 		};
265 		*c += 1;
266 	}
267 }
268 
269 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
270 				       u32 bo_flags)
271 {
272 	u32 c = 0;
273 
274 	try_add_vram(xe, bo, bo_flags, &c);
275 	try_add_system(xe, bo, bo_flags, &c);
276 	try_add_stolen(xe, bo, bo_flags, &c);
277 
278 	if (!c)
279 		return -EINVAL;
280 
281 	bo->placement = (struct ttm_placement) {
282 		.num_placement = c,
283 		.placement = bo->placements,
284 	};
285 
286 	return 0;
287 }
288 
289 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
290 			      u32 bo_flags)
291 {
292 	xe_bo_assert_held(bo);
293 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
294 }
295 
296 static void xe_evict_flags(struct ttm_buffer_object *tbo,
297 			   struct ttm_placement *placement)
298 {
299 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
300 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
301 	struct xe_bo *bo;
302 
303 	if (!xe_bo_is_xe_bo(tbo)) {
304 		/* Don't handle scatter gather BOs */
305 		if (tbo->type == ttm_bo_type_sg) {
306 			placement->num_placement = 0;
307 			return;
308 		}
309 
310 		*placement = device_unplugged ? purge_placement : sys_placement;
311 		return;
312 	}
313 
314 	bo = ttm_to_xe_bo(tbo);
315 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
316 		*placement = sys_placement;
317 		return;
318 	}
319 
320 	if (device_unplugged && !tbo->base.dma_buf) {
321 		*placement = purge_placement;
322 		return;
323 	}
324 
325 	/*
326 	 * For xe, sg bos that are evicted to system just triggers a
327 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
328 	 */
329 	switch (tbo->resource->mem_type) {
330 	case XE_PL_VRAM0:
331 	case XE_PL_VRAM1:
332 	case XE_PL_STOLEN:
333 		*placement = tt_placement;
334 		break;
335 	case XE_PL_TT:
336 	default:
337 		*placement = sys_placement;
338 		break;
339 	}
340 }
341 
342 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
343 struct xe_ttm_tt {
344 	struct ttm_tt ttm;
345 	struct sg_table sgt;
346 	struct sg_table *sg;
347 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
348 	bool purgeable;
349 };
350 
351 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
352 {
353 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
354 	unsigned long num_pages = tt->num_pages;
355 	int ret;
356 
357 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
358 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
359 
360 	if (xe_tt->sg)
361 		return 0;
362 
363 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
364 						num_pages, 0,
365 						(u64)num_pages << PAGE_SHIFT,
366 						xe_sg_segment_size(xe->drm.dev),
367 						GFP_KERNEL);
368 	if (ret)
369 		return ret;
370 
371 	xe_tt->sg = &xe_tt->sgt;
372 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
373 			      DMA_ATTR_SKIP_CPU_SYNC);
374 	if (ret) {
375 		sg_free_table(xe_tt->sg);
376 		xe_tt->sg = NULL;
377 		return ret;
378 	}
379 
380 	return 0;
381 }
382 
383 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
384 {
385 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
386 
387 	if (xe_tt->sg) {
388 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
389 				  DMA_BIDIRECTIONAL, 0);
390 		sg_free_table(xe_tt->sg);
391 		xe_tt->sg = NULL;
392 	}
393 }
394 
395 struct sg_table *xe_bo_sg(struct xe_bo *bo)
396 {
397 	struct ttm_tt *tt = bo->ttm.ttm;
398 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
399 
400 	return xe_tt->sg;
401 }
402 
403 /*
404  * Account ttm pages against the device shrinker's shrinkable and
405  * purgeable counts.
406  */
407 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
408 {
409 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
410 
411 	if (xe_tt->purgeable)
412 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
413 	else
414 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
415 }
416 
417 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
418 {
419 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
420 
421 	if (xe_tt->purgeable)
422 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
423 	else
424 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
425 }
426 
427 static void update_global_total_pages(struct ttm_device *ttm_dev,
428 				      long num_pages)
429 {
430 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
431 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
432 	u64 global_total_pages =
433 		atomic64_add_return(num_pages, &xe->global_total_pages);
434 
435 	trace_gpu_mem_total(xe->drm.primary->index, 0,
436 			    global_total_pages << PAGE_SHIFT);
437 #endif
438 }
439 
440 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
441 				       u32 page_flags)
442 {
443 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
444 	struct xe_device *xe = xe_bo_device(bo);
445 	struct xe_ttm_tt *xe_tt;
446 	struct ttm_tt *tt;
447 	unsigned long extra_pages;
448 	enum ttm_caching caching = ttm_cached;
449 	int err;
450 
451 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
452 	if (!xe_tt)
453 		return NULL;
454 
455 	tt = &xe_tt->ttm;
456 
457 	extra_pages = 0;
458 	if (xe_bo_needs_ccs_pages(bo))
459 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
460 					   PAGE_SIZE);
461 
462 	/*
463 	 * DGFX system memory is always WB / ttm_cached, since
464 	 * other caching modes are only supported on x86. DGFX
465 	 * GPU system memory accesses are always coherent with the
466 	 * CPU.
467 	 */
468 	if (!IS_DGFX(xe)) {
469 		switch (bo->cpu_caching) {
470 		case DRM_XE_GEM_CPU_CACHING_WC:
471 			caching = ttm_write_combined;
472 			break;
473 		default:
474 			caching = ttm_cached;
475 			break;
476 		}
477 
478 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
479 
480 		/*
481 		 * Display scanout is always non-coherent with the CPU cache.
482 		 *
483 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
484 		 * non-coherent and require a CPU:WC mapping.
485 		 */
486 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
487 		    (xe->info.graphics_verx100 >= 1270 &&
488 		     bo->flags & XE_BO_FLAG_PAGETABLE))
489 			caching = ttm_write_combined;
490 	}
491 
492 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
493 		/*
494 		 * Valid only for internally-created buffers only, for
495 		 * which cpu_caching is never initialized.
496 		 */
497 		xe_assert(xe, bo->cpu_caching == 0);
498 		caching = ttm_uncached;
499 	}
500 
501 	if (ttm_bo->type != ttm_bo_type_sg)
502 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
503 
504 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
505 	if (err) {
506 		kfree(xe_tt);
507 		return NULL;
508 	}
509 
510 	if (ttm_bo->type != ttm_bo_type_sg) {
511 		err = ttm_tt_setup_backup(tt);
512 		if (err) {
513 			ttm_tt_fini(tt);
514 			kfree(xe_tt);
515 			return NULL;
516 		}
517 	}
518 
519 	return tt;
520 }
521 
522 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
523 			      struct ttm_operation_ctx *ctx)
524 {
525 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
526 	int err;
527 
528 	/*
529 	 * dma-bufs are not populated with pages, and the dma-
530 	 * addresses are set up when moved to XE_PL_TT.
531 	 */
532 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
533 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
534 		return 0;
535 
536 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
537 		err = ttm_tt_restore(ttm_dev, tt, ctx);
538 	} else {
539 		ttm_tt_clear_backed_up(tt);
540 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
541 	}
542 	if (err)
543 		return err;
544 
545 	xe_tt->purgeable = false;
546 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
547 	update_global_total_pages(ttm_dev, tt->num_pages);
548 
549 	return 0;
550 }
551 
552 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
553 {
554 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
555 
556 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
557 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
558 		return;
559 
560 	xe_tt_unmap_sg(xe, tt);
561 
562 	ttm_pool_free(&ttm_dev->pool, tt);
563 	xe_ttm_tt_account_subtract(xe, tt);
564 	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
565 }
566 
567 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
568 {
569 	ttm_tt_fini(tt);
570 	kfree(tt);
571 }
572 
573 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
574 {
575 	struct xe_ttm_vram_mgr_resource *vres =
576 		to_xe_ttm_vram_mgr_resource(mem);
577 
578 	return vres->used_visible_size == mem->size;
579 }
580 
581 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
582 				 struct ttm_resource *mem)
583 {
584 	struct xe_device *xe = ttm_to_xe_device(bdev);
585 
586 	switch (mem->mem_type) {
587 	case XE_PL_SYSTEM:
588 	case XE_PL_TT:
589 		return 0;
590 	case XE_PL_VRAM0:
591 	case XE_PL_VRAM1: {
592 		struct xe_vram_region *vram = res_to_mem_region(mem);
593 
594 		if (!xe_ttm_resource_visible(mem))
595 			return -EINVAL;
596 
597 		mem->bus.offset = mem->start << PAGE_SHIFT;
598 
599 		if (vram->mapping &&
600 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
601 			mem->bus.addr = (u8 __force *)vram->mapping +
602 				mem->bus.offset;
603 
604 		mem->bus.offset += vram->io_start;
605 		mem->bus.is_iomem = true;
606 
607 #if  !IS_ENABLED(CONFIG_X86)
608 		mem->bus.caching = ttm_write_combined;
609 #endif
610 		return 0;
611 	} case XE_PL_STOLEN:
612 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
613 	default:
614 		return -EINVAL;
615 	}
616 }
617 
618 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
619 				const struct ttm_operation_ctx *ctx)
620 {
621 	struct dma_resv_iter cursor;
622 	struct dma_fence *fence;
623 	struct drm_gem_object *obj = &bo->ttm.base;
624 	struct drm_gpuvm_bo *vm_bo;
625 	bool idle = false;
626 	int ret = 0;
627 
628 	dma_resv_assert_held(bo->ttm.base.resv);
629 
630 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
631 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
632 				    DMA_RESV_USAGE_BOOKKEEP);
633 		dma_resv_for_each_fence_unlocked(&cursor, fence)
634 			dma_fence_enable_sw_signaling(fence);
635 		dma_resv_iter_end(&cursor);
636 	}
637 
638 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
639 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
640 		struct drm_gpuva *gpuva;
641 
642 		if (!xe_vm_in_fault_mode(vm)) {
643 			drm_gpuvm_bo_evict(vm_bo, true);
644 			continue;
645 		}
646 
647 		if (!idle) {
648 			long timeout;
649 
650 			if (ctx->no_wait_gpu &&
651 			    !dma_resv_test_signaled(bo->ttm.base.resv,
652 						    DMA_RESV_USAGE_BOOKKEEP))
653 				return -EBUSY;
654 
655 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
656 							DMA_RESV_USAGE_BOOKKEEP,
657 							ctx->interruptible,
658 							MAX_SCHEDULE_TIMEOUT);
659 			if (!timeout)
660 				return -ETIME;
661 			if (timeout < 0)
662 				return timeout;
663 
664 			idle = true;
665 		}
666 
667 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
668 			struct xe_vma *vma = gpuva_to_vma(gpuva);
669 
670 			trace_xe_vma_evict(vma);
671 			ret = xe_vm_invalidate_vma(vma);
672 			if (XE_WARN_ON(ret))
673 				return ret;
674 		}
675 	}
676 
677 	return ret;
678 }
679 
680 /*
681  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
682  * Note that unmapping the attachment is deferred to the next
683  * map_attachment time, or to bo destroy (after idling) whichever comes first.
684  * This is to avoid syncing before unmap_attachment(), assuming that the
685  * caller relies on idling the reservation object before moving the
686  * backing store out. Should that assumption not hold, then we will be able
687  * to unconditionally call unmap_attachment() when moving out to system.
688  */
689 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
690 			     struct ttm_resource *new_res)
691 {
692 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
693 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
694 					       ttm);
695 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
696 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
697 	struct sg_table *sg;
698 
699 	xe_assert(xe, attach);
700 	xe_assert(xe, ttm_bo->ttm);
701 
702 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
703 	    ttm_bo->sg) {
704 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
705 				      false, MAX_SCHEDULE_TIMEOUT);
706 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
707 		ttm_bo->sg = NULL;
708 	}
709 
710 	if (new_res->mem_type == XE_PL_SYSTEM)
711 		goto out;
712 
713 	if (ttm_bo->sg) {
714 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
715 		ttm_bo->sg = NULL;
716 	}
717 
718 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
719 	if (IS_ERR(sg))
720 		return PTR_ERR(sg);
721 
722 	ttm_bo->sg = sg;
723 	xe_tt->sg = sg;
724 
725 out:
726 	ttm_bo_move_null(ttm_bo, new_res);
727 
728 	return 0;
729 }
730 
731 /**
732  * xe_bo_move_notify - Notify subsystems of a pending move
733  * @bo: The buffer object
734  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
735  *
736  * This function notifies subsystems of an upcoming buffer move.
737  * Upon receiving such a notification, subsystems should schedule
738  * halting access to the underlying pages and optionally add a fence
739  * to the buffer object's dma_resv object, that signals when access is
740  * stopped. The caller will wait on all dma_resv fences before
741  * starting the move.
742  *
743  * A subsystem may commence access to the object after obtaining
744  * bindings to the new backing memory under the object lock.
745  *
746  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
747  * negative error code on error.
748  */
749 static int xe_bo_move_notify(struct xe_bo *bo,
750 			     const struct ttm_operation_ctx *ctx)
751 {
752 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
753 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
754 	struct ttm_resource *old_mem = ttm_bo->resource;
755 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
756 	int ret;
757 
758 	/*
759 	 * If this starts to call into many components, consider
760 	 * using a notification chain here.
761 	 */
762 
763 	if (xe_bo_is_pinned(bo))
764 		return -EINVAL;
765 
766 	xe_bo_vunmap(bo);
767 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
768 	if (ret)
769 		return ret;
770 
771 	/* Don't call move_notify() for imported dma-bufs. */
772 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
773 		dma_buf_move_notify(ttm_bo->base.dma_buf);
774 
775 	/*
776 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
777 	 * so if we moved from VRAM make sure to unlink this from the userfault
778 	 * tracking.
779 	 */
780 	if (mem_type_is_vram(old_mem_type)) {
781 		mutex_lock(&xe->mem_access.vram_userfault.lock);
782 		if (!list_empty(&bo->vram_userfault_link))
783 			list_del_init(&bo->vram_userfault_link);
784 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
785 	}
786 
787 	return 0;
788 }
789 
790 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
791 		      struct ttm_operation_ctx *ctx,
792 		      struct ttm_resource *new_mem,
793 		      struct ttm_place *hop)
794 {
795 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
796 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
797 	struct ttm_resource *old_mem = ttm_bo->resource;
798 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
799 	struct ttm_tt *ttm = ttm_bo->ttm;
800 	struct xe_migrate *migrate = NULL;
801 	struct dma_fence *fence;
802 	bool move_lacks_source;
803 	bool tt_has_data;
804 	bool needs_clear;
805 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
806 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
807 	int ret = 0;
808 
809 	/* Bo creation path, moving to system or TT. */
810 	if ((!old_mem && ttm) && !handle_system_ccs) {
811 		if (new_mem->mem_type == XE_PL_TT)
812 			ret = xe_tt_map_sg(xe, ttm);
813 		if (!ret)
814 			ttm_bo_move_null(ttm_bo, new_mem);
815 		goto out;
816 	}
817 
818 	if (ttm_bo->type == ttm_bo_type_sg) {
819 		if (new_mem->mem_type == XE_PL_SYSTEM)
820 			ret = xe_bo_move_notify(bo, ctx);
821 		if (!ret)
822 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
823 		return ret;
824 	}
825 
826 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
827 
828 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
829 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
830 
831 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
832 		(!ttm && ttm_bo->type == ttm_bo_type_device);
833 
834 	if (new_mem->mem_type == XE_PL_TT) {
835 		ret = xe_tt_map_sg(xe, ttm);
836 		if (ret)
837 			goto out;
838 	}
839 
840 	if ((move_lacks_source && !needs_clear)) {
841 		ttm_bo_move_null(ttm_bo, new_mem);
842 		goto out;
843 	}
844 
845 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
846 	    new_mem->mem_type == XE_PL_SYSTEM) {
847 		ret = xe_svm_bo_evict(bo);
848 		if (!ret) {
849 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
850 			ttm_bo_move_null(ttm_bo, new_mem);
851 		} else {
852 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
853 				ERR_PTR(ret));
854 		}
855 
856 		goto out;
857 	}
858 
859 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
860 		ttm_bo_move_null(ttm_bo, new_mem);
861 		goto out;
862 	}
863 
864 	/*
865 	 * Failed multi-hop where the old_mem is still marked as
866 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
867 	 */
868 	if (old_mem_type == XE_PL_TT &&
869 	    new_mem->mem_type == XE_PL_TT) {
870 		ttm_bo_move_null(ttm_bo, new_mem);
871 		goto out;
872 	}
873 
874 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
875 		ret = xe_bo_move_notify(bo, ctx);
876 		if (ret)
877 			goto out;
878 	}
879 
880 	if (old_mem_type == XE_PL_TT &&
881 	    new_mem->mem_type == XE_PL_SYSTEM) {
882 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
883 						     DMA_RESV_USAGE_BOOKKEEP,
884 						     false,
885 						     MAX_SCHEDULE_TIMEOUT);
886 		if (timeout < 0) {
887 			ret = timeout;
888 			goto out;
889 		}
890 
891 		if (!handle_system_ccs) {
892 			ttm_bo_move_null(ttm_bo, new_mem);
893 			goto out;
894 		}
895 	}
896 
897 	if (!move_lacks_source &&
898 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
899 	     (mem_type_is_vram(old_mem_type) &&
900 	      new_mem->mem_type == XE_PL_SYSTEM))) {
901 		hop->fpfn = 0;
902 		hop->lpfn = 0;
903 		hop->mem_type = XE_PL_TT;
904 		hop->flags = TTM_PL_FLAG_TEMPORARY;
905 		ret = -EMULTIHOP;
906 		goto out;
907 	}
908 
909 	if (bo->tile)
910 		migrate = bo->tile->migrate;
911 	else if (resource_is_vram(new_mem))
912 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
913 	else if (mem_type_is_vram(old_mem_type))
914 		migrate = mem_type_to_migrate(xe, old_mem_type);
915 	else
916 		migrate = xe->tiles[0].migrate;
917 
918 	xe_assert(xe, migrate);
919 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
920 	if (xe_rpm_reclaim_safe(xe)) {
921 		/*
922 		 * We might be called through swapout in the validation path of
923 		 * another TTM device, so acquire rpm here.
924 		 */
925 		xe_pm_runtime_get(xe);
926 	} else {
927 		drm_WARN_ON(&xe->drm, handle_system_ccs);
928 		xe_pm_runtime_get_noresume(xe);
929 	}
930 
931 	if (move_lacks_source) {
932 		u32 flags = 0;
933 
934 		if (mem_type_is_vram(new_mem->mem_type))
935 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
936 		else if (handle_system_ccs)
937 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
938 
939 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
940 	} else {
941 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
942 					handle_system_ccs);
943 	}
944 	if (IS_ERR(fence)) {
945 		ret = PTR_ERR(fence);
946 		xe_pm_runtime_put(xe);
947 		goto out;
948 	}
949 	if (!move_lacks_source) {
950 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
951 						new_mem);
952 		if (ret) {
953 			dma_fence_wait(fence, false);
954 			ttm_bo_move_null(ttm_bo, new_mem);
955 			ret = 0;
956 		}
957 	} else {
958 		/*
959 		 * ttm_bo_move_accel_cleanup() may blow up if
960 		 * bo->resource == NULL, so just attach the
961 		 * fence and set the new resource.
962 		 */
963 		dma_resv_add_fence(ttm_bo->base.resv, fence,
964 				   DMA_RESV_USAGE_KERNEL);
965 		ttm_bo_move_null(ttm_bo, new_mem);
966 	}
967 
968 	dma_fence_put(fence);
969 	xe_pm_runtime_put(xe);
970 
971 	/*
972 	 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
973 	 * BBs from BO as it is no longer needed.
974 	 */
975 	if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT &&
976 	    new_mem->mem_type == XE_PL_SYSTEM)
977 		xe_sriov_vf_ccs_detach_bo(bo);
978 
979 	if (IS_SRIOV_VF(xe) &&
980 	    ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
981 	     (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
982 	    handle_system_ccs)
983 		ret = xe_sriov_vf_ccs_attach_bo(bo);
984 
985 out:
986 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
987 	    ttm_bo->ttm) {
988 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
989 						     DMA_RESV_USAGE_KERNEL,
990 						     false,
991 						     MAX_SCHEDULE_TIMEOUT);
992 		if (timeout < 0)
993 			ret = timeout;
994 
995 		if (IS_VF_CCS_BB_VALID(xe, bo))
996 			xe_sriov_vf_ccs_detach_bo(bo);
997 
998 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
999 	}
1000 
1001 	return ret;
1002 }
1003 
1004 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1005 			       struct ttm_buffer_object *bo,
1006 			       unsigned long *scanned)
1007 {
1008 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1009 	long lret;
1010 
1011 	/* Fake move to system, without copying data. */
1012 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
1013 		struct ttm_resource *new_resource;
1014 
1015 		lret = ttm_bo_wait_ctx(bo, ctx);
1016 		if (lret)
1017 			return lret;
1018 
1019 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1020 		if (lret)
1021 			return lret;
1022 
1023 		xe_tt_unmap_sg(xe, bo->ttm);
1024 		ttm_bo_move_null(bo, new_resource);
1025 	}
1026 
1027 	*scanned += bo->ttm->num_pages;
1028 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1029 			     {.purge = true,
1030 			      .writeback = false,
1031 			      .allow_move = false});
1032 
1033 	if (lret > 0)
1034 		xe_ttm_tt_account_subtract(xe, bo->ttm);
1035 
1036 	return lret;
1037 }
1038 
1039 static bool
1040 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1041 {
1042 	struct drm_gpuvm_bo *vm_bo;
1043 
1044 	if (!ttm_bo_eviction_valuable(bo, place))
1045 		return false;
1046 
1047 	if (!xe_bo_is_xe_bo(bo))
1048 		return true;
1049 
1050 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1051 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1052 			return false;
1053 	}
1054 
1055 	return true;
1056 }
1057 
1058 /**
1059  * xe_bo_shrink() - Try to shrink an xe bo.
1060  * @ctx: The struct ttm_operation_ctx used for shrinking.
1061  * @bo: The TTM buffer object whose pages to shrink.
1062  * @flags: Flags governing the shrink behaviour.
1063  * @scanned: Pointer to a counter of the number of pages
1064  * attempted to shrink.
1065  *
1066  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1067  * Note that we need to be able to handle also non xe bos
1068  * (ghost bos), but only if the struct ttm_tt is embedded in
1069  * a struct xe_ttm_tt. When the function attempts to shrink
1070  * the pages of a buffer object, The value pointed to by @scanned
1071  * is updated.
1072  *
1073  * Return: The number of pages shrunken or purged, or negative error
1074  * code on failure.
1075  */
1076 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1077 		  const struct xe_bo_shrink_flags flags,
1078 		  unsigned long *scanned)
1079 {
1080 	struct ttm_tt *tt = bo->ttm;
1081 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1082 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1083 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1084 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1085 	bool needs_rpm;
1086 	long lret = 0L;
1087 
1088 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1089 	    (flags.purge && !xe_tt->purgeable))
1090 		return -EBUSY;
1091 
1092 	if (!xe_bo_eviction_valuable(bo, &place))
1093 		return -EBUSY;
1094 
1095 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1096 		return xe_bo_shrink_purge(ctx, bo, scanned);
1097 
1098 	if (xe_tt->purgeable) {
1099 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1100 			lret = xe_bo_move_notify(xe_bo, ctx);
1101 		if (!lret)
1102 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1103 		goto out_unref;
1104 	}
1105 
1106 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1107 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1108 		     xe_bo_needs_ccs_pages(xe_bo));
1109 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1110 		goto out_unref;
1111 
1112 	*scanned += tt->num_pages;
1113 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1114 			     {.purge = false,
1115 			      .writeback = flags.writeback,
1116 			      .allow_move = true});
1117 	if (needs_rpm)
1118 		xe_pm_runtime_put(xe);
1119 
1120 	if (lret > 0)
1121 		xe_ttm_tt_account_subtract(xe, tt);
1122 
1123 out_unref:
1124 	xe_bo_put(xe_bo);
1125 
1126 	return lret;
1127 }
1128 
1129 /**
1130  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1131  * up in system memory.
1132  * @bo: The buffer object to prepare.
1133  *
1134  * On successful completion, the object backup pages are allocated. Expectation
1135  * is that this is called from the PM notifier, prior to suspend/hibernation.
1136  *
1137  * Return: 0 on success. Negative error code on failure.
1138  */
1139 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1140 {
1141 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1142 	struct xe_bo *backup;
1143 	int ret = 0;
1144 
1145 	xe_bo_lock(bo, false);
1146 
1147 	xe_assert(xe, !bo->backup_obj);
1148 
1149 	/*
1150 	 * Since this is called from the PM notifier we might have raced with
1151 	 * someone unpinning this after we dropped the pinned list lock and
1152 	 * grabbing the above bo lock.
1153 	 */
1154 	if (!xe_bo_is_pinned(bo))
1155 		goto out_unlock_bo;
1156 
1157 	if (!xe_bo_is_vram(bo))
1158 		goto out_unlock_bo;
1159 
1160 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1161 		goto out_unlock_bo;
1162 
1163 	backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1164 					DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1165 					XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1166 					XE_BO_FLAG_PINNED);
1167 	if (IS_ERR(backup)) {
1168 		ret = PTR_ERR(backup);
1169 		goto out_unlock_bo;
1170 	}
1171 
1172 	backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1173 	ttm_bo_pin(&backup->ttm);
1174 	bo->backup_obj = backup;
1175 
1176 out_unlock_bo:
1177 	xe_bo_unlock(bo);
1178 	return ret;
1179 }
1180 
1181 /**
1182  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1183  * @bo: The buffer object to undo the prepare for.
1184  *
1185  * Always returns 0. The backup object is removed, if still present. Expectation
1186  * it that this called from the PM notifier when undoing the prepare step.
1187  *
1188  * Return: Always returns 0.
1189  */
1190 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1191 {
1192 	xe_bo_lock(bo, false);
1193 	if (bo->backup_obj) {
1194 		ttm_bo_unpin(&bo->backup_obj->ttm);
1195 		xe_bo_put(bo->backup_obj);
1196 		bo->backup_obj = NULL;
1197 	}
1198 	xe_bo_unlock(bo);
1199 
1200 	return 0;
1201 }
1202 
1203 /**
1204  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1205  * @bo: The buffer object to move.
1206  *
1207  * On successful completion, the object memory will be moved to system memory.
1208  *
1209  * This is needed to for special handling of pinned VRAM object during
1210  * suspend-resume.
1211  *
1212  * Return: 0 on success. Negative error code on failure.
1213  */
1214 int xe_bo_evict_pinned(struct xe_bo *bo)
1215 {
1216 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1217 	struct xe_bo *backup = bo->backup_obj;
1218 	bool backup_created = false;
1219 	bool unmap = false;
1220 	int ret = 0;
1221 
1222 	xe_bo_lock(bo, false);
1223 
1224 	if (WARN_ON(!bo->ttm.resource)) {
1225 		ret = -EINVAL;
1226 		goto out_unlock_bo;
1227 	}
1228 
1229 	if (WARN_ON(!xe_bo_is_pinned(bo))) {
1230 		ret = -EINVAL;
1231 		goto out_unlock_bo;
1232 	}
1233 
1234 	if (!xe_bo_is_vram(bo))
1235 		goto out_unlock_bo;
1236 
1237 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1238 		goto out_unlock_bo;
1239 
1240 	if (!backup) {
1241 		backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv,
1242 						NULL, xe_bo_size(bo),
1243 						DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1244 						XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1245 						XE_BO_FLAG_PINNED);
1246 		if (IS_ERR(backup)) {
1247 			ret = PTR_ERR(backup);
1248 			goto out_unlock_bo;
1249 		}
1250 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1251 		backup_created = true;
1252 	}
1253 
1254 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1255 		struct xe_migrate *migrate;
1256 		struct dma_fence *fence;
1257 
1258 		if (bo->tile)
1259 			migrate = bo->tile->migrate;
1260 		else
1261 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1262 
1263 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1264 		if (ret)
1265 			goto out_backup;
1266 
1267 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1268 		if (ret)
1269 			goto out_backup;
1270 
1271 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1272 					backup->ttm.resource, false);
1273 		if (IS_ERR(fence)) {
1274 			ret = PTR_ERR(fence);
1275 			goto out_backup;
1276 		}
1277 
1278 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1279 				   DMA_RESV_USAGE_KERNEL);
1280 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1281 				   DMA_RESV_USAGE_KERNEL);
1282 		dma_fence_put(fence);
1283 	} else {
1284 		ret = xe_bo_vmap(backup);
1285 		if (ret)
1286 			goto out_backup;
1287 
1288 		if (iosys_map_is_null(&bo->vmap)) {
1289 			ret = xe_bo_vmap(bo);
1290 			if (ret)
1291 				goto out_backup;
1292 			unmap = true;
1293 		}
1294 
1295 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1296 				   xe_bo_size(bo));
1297 	}
1298 
1299 	if (!bo->backup_obj)
1300 		bo->backup_obj = backup;
1301 
1302 out_backup:
1303 	xe_bo_vunmap(backup);
1304 	if (ret && backup_created)
1305 		xe_bo_put(backup);
1306 out_unlock_bo:
1307 	if (unmap)
1308 		xe_bo_vunmap(bo);
1309 	xe_bo_unlock(bo);
1310 	return ret;
1311 }
1312 
1313 /**
1314  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1315  * @bo: The buffer object to move.
1316  *
1317  * On successful completion, the object memory will be moved back to VRAM.
1318  *
1319  * This is needed to for special handling of pinned VRAM object during
1320  * suspend-resume.
1321  *
1322  * Return: 0 on success. Negative error code on failure.
1323  */
1324 int xe_bo_restore_pinned(struct xe_bo *bo)
1325 {
1326 	struct ttm_operation_ctx ctx = {
1327 		.interruptible = false,
1328 		.gfp_retry_mayfail = false,
1329 	};
1330 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1331 	struct xe_bo *backup = bo->backup_obj;
1332 	bool unmap = false;
1333 	int ret;
1334 
1335 	if (!backup)
1336 		return 0;
1337 
1338 	xe_bo_lock(bo, false);
1339 
1340 	if (!xe_bo_is_pinned(backup)) {
1341 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1342 		if (ret)
1343 			goto out_unlock_bo;
1344 	}
1345 
1346 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1347 		struct xe_migrate *migrate;
1348 		struct dma_fence *fence;
1349 
1350 		if (bo->tile)
1351 			migrate = bo->tile->migrate;
1352 		else
1353 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1354 
1355 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1356 		if (ret)
1357 			goto out_unlock_bo;
1358 
1359 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1360 		if (ret)
1361 			goto out_unlock_bo;
1362 
1363 		fence = xe_migrate_copy(migrate, backup, bo,
1364 					backup->ttm.resource, bo->ttm.resource,
1365 					false);
1366 		if (IS_ERR(fence)) {
1367 			ret = PTR_ERR(fence);
1368 			goto out_unlock_bo;
1369 		}
1370 
1371 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1372 				   DMA_RESV_USAGE_KERNEL);
1373 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1374 				   DMA_RESV_USAGE_KERNEL);
1375 		dma_fence_put(fence);
1376 	} else {
1377 		ret = xe_bo_vmap(backup);
1378 		if (ret)
1379 			goto out_unlock_bo;
1380 
1381 		if (iosys_map_is_null(&bo->vmap)) {
1382 			ret = xe_bo_vmap(bo);
1383 			if (ret)
1384 				goto out_backup;
1385 			unmap = true;
1386 		}
1387 
1388 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1389 				 xe_bo_size(bo));
1390 	}
1391 
1392 	bo->backup_obj = NULL;
1393 
1394 out_backup:
1395 	xe_bo_vunmap(backup);
1396 	if (!bo->backup_obj) {
1397 		if (xe_bo_is_pinned(backup))
1398 			ttm_bo_unpin(&backup->ttm);
1399 		xe_bo_put(backup);
1400 	}
1401 out_unlock_bo:
1402 	if (unmap)
1403 		xe_bo_vunmap(bo);
1404 	xe_bo_unlock(bo);
1405 	return ret;
1406 }
1407 
1408 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1409 {
1410 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1411 	struct ttm_tt *tt = ttm_bo->ttm;
1412 
1413 	if (tt) {
1414 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1415 
1416 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1417 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1418 						 ttm_bo->sg,
1419 						 DMA_BIDIRECTIONAL);
1420 			ttm_bo->sg = NULL;
1421 			xe_tt->sg = NULL;
1422 		} else if (xe_tt->sg) {
1423 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1424 					  xe_tt->sg,
1425 					  DMA_BIDIRECTIONAL, 0);
1426 			sg_free_table(xe_tt->sg);
1427 			xe_tt->sg = NULL;
1428 		}
1429 	}
1430 
1431 	return 0;
1432 }
1433 
1434 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1435 				       unsigned long page_offset)
1436 {
1437 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1438 	struct xe_res_cursor cursor;
1439 	struct xe_vram_region *vram;
1440 
1441 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1442 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1443 
1444 	vram = res_to_mem_region(ttm_bo->resource);
1445 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1446 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1447 }
1448 
1449 static void __xe_bo_vunmap(struct xe_bo *bo);
1450 
1451 /*
1452  * TODO: Move this function to TTM so we don't rely on how TTM does its
1453  * locking, thereby abusing TTM internals.
1454  */
1455 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1456 {
1457 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1458 	bool locked;
1459 
1460 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1461 
1462 	/*
1463 	 * We can typically only race with TTM trylocking under the
1464 	 * lru_lock, which will immediately be unlocked again since
1465 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1466 	 * always succeed here, as long as we hold the lru lock.
1467 	 */
1468 	spin_lock(&ttm_bo->bdev->lru_lock);
1469 	locked = dma_resv_trylock(ttm_bo->base.resv);
1470 	spin_unlock(&ttm_bo->bdev->lru_lock);
1471 	xe_assert(xe, locked);
1472 
1473 	return locked;
1474 }
1475 
1476 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1477 {
1478 	struct dma_resv_iter cursor;
1479 	struct dma_fence *fence;
1480 	struct dma_fence *replacement = NULL;
1481 	struct xe_bo *bo;
1482 
1483 	if (!xe_bo_is_xe_bo(ttm_bo))
1484 		return;
1485 
1486 	bo = ttm_to_xe_bo(ttm_bo);
1487 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1488 
1489 	/*
1490 	 * Corner case where TTM fails to allocate memory and this BOs resv
1491 	 * still points the VMs resv
1492 	 */
1493 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1494 		return;
1495 
1496 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1497 		return;
1498 
1499 	/*
1500 	 * Scrub the preempt fences if any. The unbind fence is already
1501 	 * attached to the resv.
1502 	 * TODO: Don't do this for external bos once we scrub them after
1503 	 * unbind.
1504 	 */
1505 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1506 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1507 		if (xe_fence_is_xe_preempt(fence) &&
1508 		    !dma_fence_is_signaled(fence)) {
1509 			if (!replacement)
1510 				replacement = dma_fence_get_stub();
1511 
1512 			dma_resv_replace_fences(ttm_bo->base.resv,
1513 						fence->context,
1514 						replacement,
1515 						DMA_RESV_USAGE_BOOKKEEP);
1516 		}
1517 	}
1518 	dma_fence_put(replacement);
1519 
1520 	dma_resv_unlock(ttm_bo->base.resv);
1521 }
1522 
1523 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1524 {
1525 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1526 
1527 	if (!xe_bo_is_xe_bo(ttm_bo))
1528 		return;
1529 
1530 	if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo))
1531 		xe_sriov_vf_ccs_detach_bo(bo);
1532 
1533 	/*
1534 	 * Object is idle and about to be destroyed. Release the
1535 	 * dma-buf attachment.
1536 	 */
1537 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1538 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1539 						       struct xe_ttm_tt, ttm);
1540 
1541 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1542 					 DMA_BIDIRECTIONAL);
1543 		ttm_bo->sg = NULL;
1544 		xe_tt->sg = NULL;
1545 	}
1546 }
1547 
1548 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1549 {
1550 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1551 
1552 	if (ttm_bo->ttm) {
1553 		struct ttm_placement place = {};
1554 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1555 
1556 		drm_WARN_ON(&xe->drm, ret);
1557 	}
1558 }
1559 
1560 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1561 {
1562 	struct ttm_operation_ctx ctx = {
1563 		.interruptible = false,
1564 		.gfp_retry_mayfail = false,
1565 	};
1566 
1567 	if (ttm_bo->ttm) {
1568 		struct xe_ttm_tt *xe_tt =
1569 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1570 
1571 		if (xe_tt->purgeable)
1572 			xe_ttm_bo_purge(ttm_bo, &ctx);
1573 	}
1574 }
1575 
1576 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1577 				unsigned long offset, void *buf, int len,
1578 				int write)
1579 {
1580 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1581 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1582 	struct iosys_map vmap;
1583 	struct xe_res_cursor cursor;
1584 	struct xe_vram_region *vram;
1585 	int bytes_left = len;
1586 	int err = 0;
1587 
1588 	xe_bo_assert_held(bo);
1589 	xe_device_assert_mem_access(xe);
1590 
1591 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1592 		return -EIO;
1593 
1594 	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1595 		struct xe_migrate *migrate =
1596 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1597 
1598 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1599 					       write);
1600 		goto out;
1601 	}
1602 
1603 	vram = res_to_mem_region(ttm_bo->resource);
1604 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1605 		     xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1606 
1607 	do {
1608 		unsigned long page_offset = (offset & ~PAGE_MASK);
1609 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1610 
1611 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1612 					  cursor.start);
1613 		if (write)
1614 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1615 		else
1616 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1617 
1618 		buf += byte_count;
1619 		offset += byte_count;
1620 		bytes_left -= byte_count;
1621 		if (bytes_left)
1622 			xe_res_next(&cursor, PAGE_SIZE);
1623 	} while (bytes_left);
1624 
1625 out:
1626 	return err ?: len;
1627 }
1628 
1629 const struct ttm_device_funcs xe_ttm_funcs = {
1630 	.ttm_tt_create = xe_ttm_tt_create,
1631 	.ttm_tt_populate = xe_ttm_tt_populate,
1632 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1633 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1634 	.evict_flags = xe_evict_flags,
1635 	.move = xe_bo_move,
1636 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1637 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1638 	.access_memory = xe_ttm_access_memory,
1639 	.release_notify = xe_ttm_bo_release_notify,
1640 	.eviction_valuable = xe_bo_eviction_valuable,
1641 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1642 	.swap_notify = xe_ttm_bo_swap_notify,
1643 };
1644 
1645 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1646 {
1647 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1648 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1649 	struct xe_tile *tile;
1650 	u8 id;
1651 
1652 	if (bo->ttm.base.import_attach)
1653 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1654 	drm_gem_object_release(&bo->ttm.base);
1655 
1656 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1657 
1658 	for_each_tile(tile, xe, id)
1659 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1660 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1661 
1662 #ifdef CONFIG_PROC_FS
1663 	if (bo->client)
1664 		xe_drm_client_remove_bo(bo);
1665 #endif
1666 
1667 	if (bo->vm && xe_bo_is_user(bo))
1668 		xe_vm_put(bo->vm);
1669 
1670 	if (bo->parent_obj)
1671 		xe_bo_put(bo->parent_obj);
1672 
1673 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1674 	if (!list_empty(&bo->vram_userfault_link))
1675 		list_del(&bo->vram_userfault_link);
1676 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1677 
1678 	kfree(bo);
1679 }
1680 
1681 static void xe_gem_object_free(struct drm_gem_object *obj)
1682 {
1683 	/* Our BO reference counting scheme works as follows:
1684 	 *
1685 	 * The gem object kref is typically used throughout the driver,
1686 	 * and the gem object holds a ttm_buffer_object refcount, so
1687 	 * that when the last gem object reference is put, which is when
1688 	 * we end up in this function, we put also that ttm_buffer_object
1689 	 * refcount. Anything using gem interfaces is then no longer
1690 	 * allowed to access the object in a way that requires a gem
1691 	 * refcount, including locking the object.
1692 	 *
1693 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1694 	 * refcount directly if needed.
1695 	 */
1696 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1697 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1698 }
1699 
1700 static void xe_gem_object_close(struct drm_gem_object *obj,
1701 				struct drm_file *file_priv)
1702 {
1703 	struct xe_bo *bo = gem_to_xe_bo(obj);
1704 
1705 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1706 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1707 
1708 		xe_bo_lock(bo, false);
1709 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1710 		xe_bo_unlock(bo);
1711 	}
1712 }
1713 
1714 static bool should_migrate_to_smem(struct xe_bo *bo)
1715 {
1716 	/*
1717 	 * NOTE: The following atomic checks are platform-specific. For example,
1718 	 * if a device supports CXL atomics, these may not be necessary or
1719 	 * may behave differently.
1720 	 */
1721 
1722 	return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1723 	       bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1724 }
1725 
1726 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1727 {
1728 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1729 	struct drm_device *ddev = tbo->base.dev;
1730 	struct xe_device *xe = to_xe_device(ddev);
1731 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1732 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1733 	vm_fault_t ret;
1734 	int idx, r = 0;
1735 
1736 	if (needs_rpm)
1737 		xe_pm_runtime_get(xe);
1738 
1739 	ret = ttm_bo_vm_reserve(tbo, vmf);
1740 	if (ret)
1741 		goto out;
1742 
1743 	if (drm_dev_enter(ddev, &idx)) {
1744 		trace_xe_bo_cpu_fault(bo);
1745 
1746 		if (should_migrate_to_smem(bo)) {
1747 			xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM);
1748 
1749 			r = xe_bo_migrate(bo, XE_PL_TT);
1750 			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1751 				ret = VM_FAULT_NOPAGE;
1752 			else if (r)
1753 				ret = VM_FAULT_SIGBUS;
1754 		}
1755 		if (!ret)
1756 			ret = ttm_bo_vm_fault_reserved(vmf,
1757 						       vmf->vma->vm_page_prot,
1758 						       TTM_BO_VM_NUM_PREFAULT);
1759 		drm_dev_exit(idx);
1760 
1761 		if (ret == VM_FAULT_RETRY &&
1762 		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1763 			goto out;
1764 
1765 		/*
1766 		 * ttm_bo_vm_reserve() already has dma_resv_lock.
1767 		 */
1768 		if (ret == VM_FAULT_NOPAGE &&
1769 		    mem_type_is_vram(tbo->resource->mem_type)) {
1770 			mutex_lock(&xe->mem_access.vram_userfault.lock);
1771 			if (list_empty(&bo->vram_userfault_link))
1772 				list_add(&bo->vram_userfault_link,
1773 					 &xe->mem_access.vram_userfault.list);
1774 			mutex_unlock(&xe->mem_access.vram_userfault.lock);
1775 		}
1776 	} else {
1777 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1778 	}
1779 
1780 	dma_resv_unlock(tbo->base.resv);
1781 out:
1782 	if (needs_rpm)
1783 		xe_pm_runtime_put(xe);
1784 
1785 	return ret;
1786 }
1787 
1788 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1789 			   void *buf, int len, int write)
1790 {
1791 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1792 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1793 	struct xe_device *xe = xe_bo_device(bo);
1794 	int ret;
1795 
1796 	xe_pm_runtime_get(xe);
1797 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1798 	xe_pm_runtime_put(xe);
1799 
1800 	return ret;
1801 }
1802 
1803 /**
1804  * xe_bo_read() - Read from an xe_bo
1805  * @bo: The buffer object to read from.
1806  * @offset: The byte offset to start reading from.
1807  * @dst: Location to store the read.
1808  * @size: Size in bytes for the read.
1809  *
1810  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1811  *
1812  * Return: Zero on success, or negative error.
1813  */
1814 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1815 {
1816 	int ret;
1817 
1818 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1819 	if (ret >= 0 && ret != size)
1820 		ret = -EIO;
1821 	else if (ret == size)
1822 		ret = 0;
1823 
1824 	return ret;
1825 }
1826 
1827 static const struct vm_operations_struct xe_gem_vm_ops = {
1828 	.fault = xe_gem_fault,
1829 	.open = ttm_bo_vm_open,
1830 	.close = ttm_bo_vm_close,
1831 	.access = xe_bo_vm_access,
1832 };
1833 
1834 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1835 	.free = xe_gem_object_free,
1836 	.close = xe_gem_object_close,
1837 	.mmap = drm_gem_ttm_mmap,
1838 	.export = xe_gem_prime_export,
1839 	.vm_ops = &xe_gem_vm_ops,
1840 };
1841 
1842 /**
1843  * xe_bo_alloc - Allocate storage for a struct xe_bo
1844  *
1845  * This function is intended to allocate storage to be used for input
1846  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1847  * created is needed before the call to __xe_bo_create_locked().
1848  * If __xe_bo_create_locked ends up never to be called, then the
1849  * storage allocated with this function needs to be freed using
1850  * xe_bo_free().
1851  *
1852  * Return: A pointer to an uninitialized struct xe_bo on success,
1853  * ERR_PTR(-ENOMEM) on error.
1854  */
1855 struct xe_bo *xe_bo_alloc(void)
1856 {
1857 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1858 
1859 	if (!bo)
1860 		return ERR_PTR(-ENOMEM);
1861 
1862 	return bo;
1863 }
1864 
1865 /**
1866  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1867  * @bo: The buffer object storage.
1868  *
1869  * Refer to xe_bo_alloc() documentation for valid use-cases.
1870  */
1871 void xe_bo_free(struct xe_bo *bo)
1872 {
1873 	kfree(bo);
1874 }
1875 
1876 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1877 				     struct xe_tile *tile, struct dma_resv *resv,
1878 				     struct ttm_lru_bulk_move *bulk, size_t size,
1879 				     u16 cpu_caching, enum ttm_bo_type type,
1880 				     u32 flags)
1881 {
1882 	struct ttm_operation_ctx ctx = {
1883 		.interruptible = true,
1884 		.no_wait_gpu = false,
1885 		.gfp_retry_mayfail = true,
1886 	};
1887 	struct ttm_placement *placement;
1888 	uint32_t alignment;
1889 	size_t aligned_size;
1890 	int err;
1891 
1892 	/* Only kernel objects should set GT */
1893 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1894 
1895 	if (XE_WARN_ON(!size)) {
1896 		xe_bo_free(bo);
1897 		return ERR_PTR(-EINVAL);
1898 	}
1899 
1900 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1901 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1902 		return ERR_PTR(-EINVAL);
1903 
1904 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1905 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1906 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1907 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1908 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1909 
1910 		aligned_size = ALIGN(size, align);
1911 		if (type != ttm_bo_type_device)
1912 			size = ALIGN(size, align);
1913 		flags |= XE_BO_FLAG_INTERNAL_64K;
1914 		alignment = align >> PAGE_SHIFT;
1915 	} else {
1916 		aligned_size = ALIGN(size, SZ_4K);
1917 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1918 		alignment = SZ_4K >> PAGE_SHIFT;
1919 	}
1920 
1921 	if (type == ttm_bo_type_device && aligned_size != size)
1922 		return ERR_PTR(-EINVAL);
1923 
1924 	if (!bo) {
1925 		bo = xe_bo_alloc();
1926 		if (IS_ERR(bo))
1927 			return bo;
1928 	}
1929 
1930 	bo->ccs_cleared = false;
1931 	bo->tile = tile;
1932 	bo->flags = flags;
1933 	bo->cpu_caching = cpu_caching;
1934 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1935 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1936 	INIT_LIST_HEAD(&bo->pinned_link);
1937 #ifdef CONFIG_PROC_FS
1938 	INIT_LIST_HEAD(&bo->client_link);
1939 #endif
1940 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1941 
1942 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1943 
1944 	if (resv) {
1945 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1946 		ctx.resv = resv;
1947 	}
1948 
1949 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1950 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1951 		if (WARN_ON(err)) {
1952 			xe_ttm_bo_destroy(&bo->ttm);
1953 			return ERR_PTR(err);
1954 		}
1955 	}
1956 
1957 	/* Defer populating type_sg bos */
1958 	placement = (type == ttm_bo_type_sg ||
1959 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1960 		&bo->placement;
1961 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1962 				   placement, alignment,
1963 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1964 	if (err)
1965 		return ERR_PTR(err);
1966 
1967 	/*
1968 	 * The VRAM pages underneath are potentially still being accessed by the
1969 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1970 	 * sure to add any corresponding move/clear fences into the objects
1971 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1972 	 *
1973 	 * For KMD internal buffers we don't care about GPU clearing, however we
1974 	 * still need to handle async evictions, where the VRAM is still being
1975 	 * accessed by the GPU. Most internal callers are not expecting this,
1976 	 * since they are missing the required synchronisation before accessing
1977 	 * the memory. To keep things simple just sync wait any kernel fences
1978 	 * here, if the buffer is designated KMD internal.
1979 	 *
1980 	 * For normal userspace objects we should already have the required
1981 	 * pipelining or sync waiting elsewhere, since we already have to deal
1982 	 * with things like async GPU clearing.
1983 	 */
1984 	if (type == ttm_bo_type_kernel) {
1985 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1986 						     DMA_RESV_USAGE_KERNEL,
1987 						     ctx.interruptible,
1988 						     MAX_SCHEDULE_TIMEOUT);
1989 
1990 		if (timeout < 0) {
1991 			if (!resv)
1992 				dma_resv_unlock(bo->ttm.base.resv);
1993 			xe_bo_put(bo);
1994 			return ERR_PTR(timeout);
1995 		}
1996 	}
1997 
1998 	bo->created = true;
1999 	if (bulk)
2000 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
2001 	else
2002 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2003 
2004 	return bo;
2005 }
2006 
2007 static int __xe_bo_fixed_placement(struct xe_device *xe,
2008 				   struct xe_bo *bo,
2009 				   u32 flags,
2010 				   u64 start, u64 end, u64 size)
2011 {
2012 	struct ttm_place *place = bo->placements;
2013 
2014 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2015 		return -EINVAL;
2016 
2017 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
2018 	place->fpfn = start >> PAGE_SHIFT;
2019 	place->lpfn = end >> PAGE_SHIFT;
2020 
2021 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
2022 	case XE_BO_FLAG_VRAM0:
2023 		place->mem_type = XE_PL_VRAM0;
2024 		break;
2025 	case XE_BO_FLAG_VRAM1:
2026 		place->mem_type = XE_PL_VRAM1;
2027 		break;
2028 	case XE_BO_FLAG_STOLEN:
2029 		place->mem_type = XE_PL_STOLEN;
2030 		break;
2031 
2032 	default:
2033 		/* 0 or multiple of the above set */
2034 		return -EINVAL;
2035 	}
2036 
2037 	bo->placement = (struct ttm_placement) {
2038 		.num_placement = 1,
2039 		.placement = place,
2040 	};
2041 
2042 	return 0;
2043 }
2044 
2045 static struct xe_bo *
2046 __xe_bo_create_locked(struct xe_device *xe,
2047 		      struct xe_tile *tile, struct xe_vm *vm,
2048 		      size_t size, u64 start, u64 end,
2049 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2050 		      u64 alignment)
2051 {
2052 	struct xe_bo *bo = NULL;
2053 	int err;
2054 
2055 	if (vm)
2056 		xe_vm_assert_held(vm);
2057 
2058 	if (start || end != ~0ULL) {
2059 		bo = xe_bo_alloc();
2060 		if (IS_ERR(bo))
2061 			return bo;
2062 
2063 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2064 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
2065 		if (err) {
2066 			xe_bo_free(bo);
2067 			return ERR_PTR(err);
2068 		}
2069 	}
2070 
2071 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2072 				    vm && !xe_vm_in_fault_mode(vm) &&
2073 				    flags & XE_BO_FLAG_USER ?
2074 				    &vm->lru_bulk_move : NULL, size,
2075 				    cpu_caching, type, flags);
2076 	if (IS_ERR(bo))
2077 		return bo;
2078 
2079 	bo->min_align = alignment;
2080 
2081 	/*
2082 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2083 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2084 	 * will keep a reference to the vm, and avoid circular references
2085 	 * by having all the vm's bo refereferences released at vm close
2086 	 * time.
2087 	 */
2088 	if (vm && xe_bo_is_user(bo))
2089 		xe_vm_get(vm);
2090 	bo->vm = vm;
2091 
2092 	if (bo->flags & XE_BO_FLAG_GGTT) {
2093 		struct xe_tile *t;
2094 		u8 id;
2095 
2096 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2097 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2098 				tile = xe_device_get_root_tile(xe);
2099 
2100 			xe_assert(xe, tile);
2101 		}
2102 
2103 		for_each_tile(t, xe, id) {
2104 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2105 				continue;
2106 
2107 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2108 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2109 							   start + xe_bo_size(bo), U64_MAX);
2110 			} else {
2111 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
2112 			}
2113 			if (err)
2114 				goto err_unlock_put_bo;
2115 		}
2116 	}
2117 
2118 	trace_xe_bo_create(bo);
2119 	return bo;
2120 
2121 err_unlock_put_bo:
2122 	__xe_bo_unset_bulk_move(bo);
2123 	xe_bo_unlock_vm_held(bo);
2124 	xe_bo_put(bo);
2125 	return ERR_PTR(err);
2126 }
2127 
2128 struct xe_bo *
2129 xe_bo_create_locked_range(struct xe_device *xe,
2130 			  struct xe_tile *tile, struct xe_vm *vm,
2131 			  size_t size, u64 start, u64 end,
2132 			  enum ttm_bo_type type, u32 flags, u64 alignment)
2133 {
2134 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2135 				     flags, alignment);
2136 }
2137 
2138 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2139 				  struct xe_vm *vm, size_t size,
2140 				  enum ttm_bo_type type, u32 flags)
2141 {
2142 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2143 				     flags, 0);
2144 }
2145 
2146 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
2147 				struct xe_vm *vm, size_t size,
2148 				u16 cpu_caching,
2149 				u32 flags)
2150 {
2151 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
2152 						 cpu_caching, ttm_bo_type_device,
2153 						 flags | XE_BO_FLAG_USER, 0);
2154 	if (!IS_ERR(bo))
2155 		xe_bo_unlock_vm_held(bo);
2156 
2157 	return bo;
2158 }
2159 
2160 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
2161 			   struct xe_vm *vm, size_t size,
2162 			   enum ttm_bo_type type, u32 flags)
2163 {
2164 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
2165 
2166 	if (!IS_ERR(bo))
2167 		xe_bo_unlock_vm_held(bo);
2168 
2169 	return bo;
2170 }
2171 
2172 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
2173 				      struct xe_vm *vm,
2174 				      size_t size, u64 offset,
2175 				      enum ttm_bo_type type, u32 flags)
2176 {
2177 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
2178 					       type, flags, 0);
2179 }
2180 
2181 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2182 					      struct xe_tile *tile,
2183 					      struct xe_vm *vm,
2184 					      size_t size, u64 offset,
2185 					      enum ttm_bo_type type, u32 flags,
2186 					      u64 alignment)
2187 {
2188 	struct xe_bo *bo;
2189 	int err;
2190 	u64 start = offset == ~0ull ? 0 : offset;
2191 	u64 end = offset == ~0ull ? offset : start + size;
2192 
2193 	if (flags & XE_BO_FLAG_STOLEN &&
2194 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2195 		flags |= XE_BO_FLAG_GGTT;
2196 
2197 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
2198 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2199 				       alignment);
2200 	if (IS_ERR(bo))
2201 		return bo;
2202 
2203 	err = xe_bo_pin(bo);
2204 	if (err)
2205 		goto err_put;
2206 
2207 	err = xe_bo_vmap(bo);
2208 	if (err)
2209 		goto err_unpin;
2210 
2211 	xe_bo_unlock_vm_held(bo);
2212 
2213 	return bo;
2214 
2215 err_unpin:
2216 	xe_bo_unpin(bo);
2217 err_put:
2218 	xe_bo_unlock_vm_held(bo);
2219 	xe_bo_put(bo);
2220 	return ERR_PTR(err);
2221 }
2222 
2223 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2224 				   struct xe_vm *vm, size_t size,
2225 				   enum ttm_bo_type type, u32 flags)
2226 {
2227 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
2228 }
2229 
2230 static void __xe_bo_unpin_map_no_vm(void *arg)
2231 {
2232 	xe_bo_unpin_map_no_vm(arg);
2233 }
2234 
2235 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2236 					   size_t size, u32 flags)
2237 {
2238 	struct xe_bo *bo;
2239 	int ret;
2240 
2241 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2242 
2243 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
2244 	if (IS_ERR(bo))
2245 		return bo;
2246 
2247 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2248 	if (ret)
2249 		return ERR_PTR(ret);
2250 
2251 	return bo;
2252 }
2253 
2254 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2255 					     const void *data, size_t size, u32 flags)
2256 {
2257 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2258 
2259 	if (IS_ERR(bo))
2260 		return bo;
2261 
2262 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2263 
2264 	return bo;
2265 }
2266 
2267 /**
2268  * xe_managed_bo_reinit_in_vram
2269  * @xe: xe device
2270  * @tile: Tile where the new buffer will be created
2271  * @src: Managed buffer object allocated in system memory
2272  *
2273  * Replace a managed src buffer object allocated in system memory with a new
2274  * one allocated in vram, copying the data between them.
2275  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2276  * is responsible for making sure that any old references to it are updated.
2277  *
2278  * Returns 0 for success, negative error code otherwise.
2279  */
2280 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2281 {
2282 	struct xe_bo *bo;
2283 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2284 
2285 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2286 				      XE_BO_FLAG_PINNED_NORESTORE);
2287 
2288 	xe_assert(xe, IS_DGFX(xe));
2289 	xe_assert(xe, !(*src)->vmap.is_iomem);
2290 
2291 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2292 					    xe_bo_size(*src), dst_flags);
2293 	if (IS_ERR(bo))
2294 		return PTR_ERR(bo);
2295 
2296 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2297 	*src = bo;
2298 
2299 	return 0;
2300 }
2301 
2302 /*
2303  * XXX: This is in the VM bind data path, likely should calculate this once and
2304  * store, with a recalculation if the BO is moved.
2305  */
2306 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2307 {
2308 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2309 
2310 	switch (res->mem_type) {
2311 	case XE_PL_STOLEN:
2312 		return xe_ttm_stolen_gpu_offset(xe);
2313 	case XE_PL_TT:
2314 	case XE_PL_SYSTEM:
2315 		return 0;
2316 	default:
2317 		return res_to_mem_region(res)->dpa_base;
2318 	}
2319 	return 0;
2320 }
2321 
2322 /**
2323  * xe_bo_pin_external - pin an external BO
2324  * @bo: buffer object to be pinned
2325  *
2326  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2327  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2328  * asserts and code to ensure evict / restore on suspend / resume.
2329  *
2330  * Returns 0 for success, negative error code otherwise.
2331  */
2332 int xe_bo_pin_external(struct xe_bo *bo)
2333 {
2334 	struct xe_device *xe = xe_bo_device(bo);
2335 	int err;
2336 
2337 	xe_assert(xe, !bo->vm);
2338 	xe_assert(xe, xe_bo_is_user(bo));
2339 
2340 	if (!xe_bo_is_pinned(bo)) {
2341 		err = xe_bo_validate(bo, NULL, false);
2342 		if (err)
2343 			return err;
2344 
2345 		spin_lock(&xe->pinned.lock);
2346 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2347 		spin_unlock(&xe->pinned.lock);
2348 	}
2349 
2350 	ttm_bo_pin(&bo->ttm);
2351 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2352 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2353 
2354 	/*
2355 	 * FIXME: If we always use the reserve / unreserve functions for locking
2356 	 * we do not need this.
2357 	 */
2358 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2359 
2360 	return 0;
2361 }
2362 
2363 int xe_bo_pin(struct xe_bo *bo)
2364 {
2365 	struct ttm_place *place = &bo->placements[0];
2366 	struct xe_device *xe = xe_bo_device(bo);
2367 	int err;
2368 
2369 	/* We currently don't expect user BO to be pinned */
2370 	xe_assert(xe, !xe_bo_is_user(bo));
2371 
2372 	/* Pinned object must be in GGTT or have pinned flag */
2373 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2374 				   XE_BO_FLAG_GGTT));
2375 
2376 	/*
2377 	 * No reason we can't support pinning imported dma-bufs we just don't
2378 	 * expect to pin an imported dma-buf.
2379 	 */
2380 	xe_assert(xe, !bo->ttm.base.import_attach);
2381 
2382 	/* We only expect at most 1 pin */
2383 	xe_assert(xe, !xe_bo_is_pinned(bo));
2384 
2385 	err = xe_bo_validate(bo, NULL, false);
2386 	if (err)
2387 		return err;
2388 
2389 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2390 		spin_lock(&xe->pinned.lock);
2391 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
2392 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
2393 		else
2394 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
2395 		spin_unlock(&xe->pinned.lock);
2396 	}
2397 
2398 	ttm_bo_pin(&bo->ttm);
2399 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2400 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2401 
2402 	/*
2403 	 * FIXME: If we always use the reserve / unreserve functions for locking
2404 	 * we do not need this.
2405 	 */
2406 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2407 
2408 	return 0;
2409 }
2410 
2411 /**
2412  * xe_bo_unpin_external - unpin an external BO
2413  * @bo: buffer object to be unpinned
2414  *
2415  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2416  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2417  * asserts and code to ensure evict / restore on suspend / resume.
2418  *
2419  * Returns 0 for success, negative error code otherwise.
2420  */
2421 void xe_bo_unpin_external(struct xe_bo *bo)
2422 {
2423 	struct xe_device *xe = xe_bo_device(bo);
2424 
2425 	xe_assert(xe, !bo->vm);
2426 	xe_assert(xe, xe_bo_is_pinned(bo));
2427 	xe_assert(xe, xe_bo_is_user(bo));
2428 
2429 	spin_lock(&xe->pinned.lock);
2430 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2431 		list_del_init(&bo->pinned_link);
2432 	spin_unlock(&xe->pinned.lock);
2433 
2434 	ttm_bo_unpin(&bo->ttm);
2435 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2436 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2437 
2438 	/*
2439 	 * FIXME: If we always use the reserve / unreserve functions for locking
2440 	 * we do not need this.
2441 	 */
2442 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2443 }
2444 
2445 void xe_bo_unpin(struct xe_bo *bo)
2446 {
2447 	struct ttm_place *place = &bo->placements[0];
2448 	struct xe_device *xe = xe_bo_device(bo);
2449 
2450 	xe_assert(xe, !bo->ttm.base.import_attach);
2451 	xe_assert(xe, xe_bo_is_pinned(bo));
2452 
2453 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2454 		spin_lock(&xe->pinned.lock);
2455 		xe_assert(xe, !list_empty(&bo->pinned_link));
2456 		list_del_init(&bo->pinned_link);
2457 		spin_unlock(&xe->pinned.lock);
2458 
2459 		if (bo->backup_obj) {
2460 			if (xe_bo_is_pinned(bo->backup_obj))
2461 				ttm_bo_unpin(&bo->backup_obj->ttm);
2462 			xe_bo_put(bo->backup_obj);
2463 			bo->backup_obj = NULL;
2464 		}
2465 	}
2466 	ttm_bo_unpin(&bo->ttm);
2467 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2468 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2469 }
2470 
2471 /**
2472  * xe_bo_validate() - Make sure the bo is in an allowed placement
2473  * @bo: The bo,
2474  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2475  *      NULL. Used together with @allow_res_evict.
2476  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2477  *                   reservation object.
2478  *
2479  * Make sure the bo is in allowed placement, migrating it if necessary. If
2480  * needed, other bos will be evicted. If bos selected for eviction shares
2481  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2482  * set to true, otherwise they will be bypassed.
2483  *
2484  * Return: 0 on success, negative error code on failure. May return
2485  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2486  */
2487 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2488 {
2489 	struct ttm_operation_ctx ctx = {
2490 		.interruptible = true,
2491 		.no_wait_gpu = false,
2492 		.gfp_retry_mayfail = true,
2493 	};
2494 	int ret;
2495 
2496 	if (vm) {
2497 		lockdep_assert_held(&vm->lock);
2498 		xe_vm_assert_held(vm);
2499 
2500 		ctx.allow_res_evict = allow_res_evict;
2501 		ctx.resv = xe_vm_resv(vm);
2502 	}
2503 
2504 	xe_vm_set_validating(vm, allow_res_evict);
2505 	trace_xe_bo_validate(bo);
2506 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2507 	xe_vm_clear_validating(vm, allow_res_evict);
2508 
2509 	return ret;
2510 }
2511 
2512 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2513 {
2514 	if (bo->destroy == &xe_ttm_bo_destroy)
2515 		return true;
2516 
2517 	return false;
2518 }
2519 
2520 /*
2521  * Resolve a BO address. There is no assert to check if the proper lock is held
2522  * so it should only be used in cases where it is not fatal to get the wrong
2523  * address, such as printing debug information, but not in cases where memory is
2524  * written based on this result.
2525  */
2526 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2527 {
2528 	struct xe_device *xe = xe_bo_device(bo);
2529 	struct xe_res_cursor cur;
2530 	u64 page;
2531 
2532 	xe_assert(xe, page_size <= PAGE_SIZE);
2533 	page = offset >> PAGE_SHIFT;
2534 	offset &= (PAGE_SIZE - 1);
2535 
2536 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2537 		xe_assert(xe, bo->ttm.ttm);
2538 
2539 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2540 				page_size, &cur);
2541 		return xe_res_dma(&cur) + offset;
2542 	} else {
2543 		struct xe_res_cursor cur;
2544 
2545 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2546 			     page_size, &cur);
2547 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2548 	}
2549 }
2550 
2551 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2552 {
2553 	if (!READ_ONCE(bo->ttm.pin_count))
2554 		xe_bo_assert_held(bo);
2555 	return __xe_bo_addr(bo, offset, page_size);
2556 }
2557 
2558 int xe_bo_vmap(struct xe_bo *bo)
2559 {
2560 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2561 	void *virtual;
2562 	bool is_iomem;
2563 	int ret;
2564 
2565 	xe_bo_assert_held(bo);
2566 
2567 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2568 			!force_contiguous(bo->flags)))
2569 		return -EINVAL;
2570 
2571 	if (!iosys_map_is_null(&bo->vmap))
2572 		return 0;
2573 
2574 	/*
2575 	 * We use this more or less deprecated interface for now since
2576 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2577 	 * single page bos, which is done here.
2578 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2579 	 * to use struct iosys_map.
2580 	 */
2581 	ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
2582 	if (ret)
2583 		return ret;
2584 
2585 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2586 	if (is_iomem)
2587 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2588 	else
2589 		iosys_map_set_vaddr(&bo->vmap, virtual);
2590 
2591 	return 0;
2592 }
2593 
2594 static void __xe_bo_vunmap(struct xe_bo *bo)
2595 {
2596 	if (!iosys_map_is_null(&bo->vmap)) {
2597 		iosys_map_clear(&bo->vmap);
2598 		ttm_bo_kunmap(&bo->kmap);
2599 	}
2600 }
2601 
2602 void xe_bo_vunmap(struct xe_bo *bo)
2603 {
2604 	xe_bo_assert_held(bo);
2605 	__xe_bo_vunmap(bo);
2606 }
2607 
2608 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
2609 {
2610 	if (value == DRM_XE_PXP_TYPE_NONE)
2611 		return 0;
2612 
2613 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
2614 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
2615 		return -EINVAL;
2616 
2617 	return xe_pxp_key_assign(xe->pxp, bo);
2618 }
2619 
2620 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
2621 					     struct xe_bo *bo,
2622 					     u64 value);
2623 
2624 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
2625 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
2626 };
2627 
2628 static int gem_create_user_ext_set_property(struct xe_device *xe,
2629 					    struct xe_bo *bo,
2630 					    u64 extension)
2631 {
2632 	u64 __user *address = u64_to_user_ptr(extension);
2633 	struct drm_xe_ext_set_property ext;
2634 	int err;
2635 	u32 idx;
2636 
2637 	err = copy_from_user(&ext, address, sizeof(ext));
2638 	if (XE_IOCTL_DBG(xe, err))
2639 		return -EFAULT;
2640 
2641 	if (XE_IOCTL_DBG(xe, ext.property >=
2642 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
2643 	    XE_IOCTL_DBG(xe, ext.pad) ||
2644 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
2645 		return -EINVAL;
2646 
2647 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
2648 	if (!gem_create_set_property_funcs[idx])
2649 		return -EINVAL;
2650 
2651 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
2652 }
2653 
2654 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
2655 					       struct xe_bo *bo,
2656 					       u64 extension);
2657 
2658 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
2659 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
2660 };
2661 
2662 #define MAX_USER_EXTENSIONS	16
2663 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
2664 				      u64 extensions, int ext_number)
2665 {
2666 	u64 __user *address = u64_to_user_ptr(extensions);
2667 	struct drm_xe_user_extension ext;
2668 	int err;
2669 	u32 idx;
2670 
2671 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
2672 		return -E2BIG;
2673 
2674 	err = copy_from_user(&ext, address, sizeof(ext));
2675 	if (XE_IOCTL_DBG(xe, err))
2676 		return -EFAULT;
2677 
2678 	if (XE_IOCTL_DBG(xe, ext.pad) ||
2679 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
2680 		return -EINVAL;
2681 
2682 	idx = array_index_nospec(ext.name,
2683 				 ARRAY_SIZE(gem_create_user_extension_funcs));
2684 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
2685 	if (XE_IOCTL_DBG(xe, err))
2686 		return err;
2687 
2688 	if (ext.next_extension)
2689 		return gem_create_user_extensions(xe, bo, ext.next_extension,
2690 						  ++ext_number);
2691 
2692 	return 0;
2693 }
2694 
2695 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2696 			struct drm_file *file)
2697 {
2698 	struct xe_device *xe = to_xe_device(dev);
2699 	struct xe_file *xef = to_xe_file(file);
2700 	struct drm_xe_gem_create *args = data;
2701 	struct xe_vm *vm = NULL;
2702 	ktime_t end = 0;
2703 	struct xe_bo *bo;
2704 	unsigned int bo_flags;
2705 	u32 handle;
2706 	int err;
2707 
2708 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2709 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2710 		return -EINVAL;
2711 
2712 	/* at least one valid memory placement must be specified */
2713 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2714 			 !args->placement))
2715 		return -EINVAL;
2716 
2717 	if (XE_IOCTL_DBG(xe, args->flags &
2718 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2719 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2720 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2721 		return -EINVAL;
2722 
2723 	if (XE_IOCTL_DBG(xe, args->handle))
2724 		return -EINVAL;
2725 
2726 	if (XE_IOCTL_DBG(xe, !args->size))
2727 		return -EINVAL;
2728 
2729 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2730 		return -EINVAL;
2731 
2732 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2733 		return -EINVAL;
2734 
2735 	bo_flags = 0;
2736 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2737 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2738 
2739 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2740 		bo_flags |= XE_BO_FLAG_SCANOUT;
2741 
2742 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2743 
2744 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2745 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2746 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2747 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2748 	    IS_ALIGNED(args->size, SZ_64K))
2749 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2750 
2751 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2752 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2753 			return -EINVAL;
2754 
2755 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2756 	}
2757 
2758 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2759 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2760 		return -EINVAL;
2761 
2762 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2763 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2764 		return -EINVAL;
2765 
2766 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2767 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2768 		return -EINVAL;
2769 
2770 	if (args->vm_id) {
2771 		vm = xe_vm_lookup(xef, args->vm_id);
2772 		if (XE_IOCTL_DBG(xe, !vm))
2773 			return -ENOENT;
2774 	}
2775 
2776 retry:
2777 	if (vm) {
2778 		err = xe_vm_lock(vm, true);
2779 		if (err)
2780 			goto out_vm;
2781 	}
2782 
2783 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2784 			       bo_flags);
2785 
2786 	if (vm)
2787 		xe_vm_unlock(vm);
2788 
2789 	if (IS_ERR(bo)) {
2790 		err = PTR_ERR(bo);
2791 		if (xe_vm_validate_should_retry(NULL, err, &end))
2792 			goto retry;
2793 		goto out_vm;
2794 	}
2795 
2796 	if (args->extensions) {
2797 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
2798 		if (err)
2799 			goto out_bulk;
2800 	}
2801 
2802 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2803 	if (err)
2804 		goto out_bulk;
2805 
2806 	args->handle = handle;
2807 	goto out_put;
2808 
2809 out_bulk:
2810 	if (vm && !xe_vm_in_fault_mode(vm)) {
2811 		xe_vm_lock(vm, false);
2812 		__xe_bo_unset_bulk_move(bo);
2813 		xe_vm_unlock(vm);
2814 	}
2815 out_put:
2816 	xe_bo_put(bo);
2817 out_vm:
2818 	if (vm)
2819 		xe_vm_put(vm);
2820 
2821 	return err;
2822 }
2823 
2824 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2825 			     struct drm_file *file)
2826 {
2827 	struct xe_device *xe = to_xe_device(dev);
2828 	struct drm_xe_gem_mmap_offset *args = data;
2829 	struct drm_gem_object *gem_obj;
2830 
2831 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2832 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2833 		return -EINVAL;
2834 
2835 	if (XE_IOCTL_DBG(xe, args->flags &
2836 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
2837 		return -EINVAL;
2838 
2839 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2840 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2841 			return -EINVAL;
2842 
2843 		if (XE_IOCTL_DBG(xe, args->handle))
2844 			return -EINVAL;
2845 
2846 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2847 			return -EINVAL;
2848 
2849 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2850 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2851 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2852 		return 0;
2853 	}
2854 
2855 	gem_obj = drm_gem_object_lookup(file, args->handle);
2856 	if (XE_IOCTL_DBG(xe, !gem_obj))
2857 		return -ENOENT;
2858 
2859 	/* The mmap offset was set up at BO allocation time. */
2860 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2861 
2862 	xe_bo_put(gem_to_xe_bo(gem_obj));
2863 	return 0;
2864 }
2865 
2866 /**
2867  * xe_bo_lock() - Lock the buffer object's dma_resv object
2868  * @bo: The struct xe_bo whose lock is to be taken
2869  * @intr: Whether to perform any wait interruptible
2870  *
2871  * Locks the buffer object's dma_resv object. If the buffer object is
2872  * pointing to a shared dma_resv object, that shared lock is locked.
2873  *
2874  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2875  * contended lock was interrupted. If @intr is set to false, the
2876  * function always returns 0.
2877  */
2878 int xe_bo_lock(struct xe_bo *bo, bool intr)
2879 {
2880 	if (intr)
2881 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2882 
2883 	dma_resv_lock(bo->ttm.base.resv, NULL);
2884 
2885 	return 0;
2886 }
2887 
2888 /**
2889  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2890  * @bo: The struct xe_bo whose lock is to be released.
2891  *
2892  * Unlock a buffer object lock that was locked by xe_bo_lock().
2893  */
2894 void xe_bo_unlock(struct xe_bo *bo)
2895 {
2896 	dma_resv_unlock(bo->ttm.base.resv);
2897 }
2898 
2899 /**
2900  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2901  * @bo: The buffer object to migrate
2902  * @mem_type: The TTM memory type intended to migrate to
2903  *
2904  * Check whether the buffer object supports migration to the
2905  * given memory type. Note that pinning may affect the ability to migrate as
2906  * returned by this function.
2907  *
2908  * This function is primarily intended as a helper for checking the
2909  * possibility to migrate buffer objects and can be called without
2910  * the object lock held.
2911  *
2912  * Return: true if migration is possible, false otherwise.
2913  */
2914 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2915 {
2916 	unsigned int cur_place;
2917 
2918 	if (bo->ttm.type == ttm_bo_type_kernel)
2919 		return true;
2920 
2921 	if (bo->ttm.type == ttm_bo_type_sg)
2922 		return false;
2923 
2924 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2925 	     cur_place++) {
2926 		if (bo->placements[cur_place].mem_type == mem_type)
2927 			return true;
2928 	}
2929 
2930 	return false;
2931 }
2932 
2933 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2934 {
2935 	memset(place, 0, sizeof(*place));
2936 	place->mem_type = mem_type;
2937 }
2938 
2939 /**
2940  * xe_bo_migrate - Migrate an object to the desired region id
2941  * @bo: The buffer object to migrate.
2942  * @mem_type: The TTM region type to migrate to.
2943  *
2944  * Attempt to migrate the buffer object to the desired memory region. The
2945  * buffer object may not be pinned, and must be locked.
2946  * On successful completion, the object memory type will be updated,
2947  * but an async migration task may not have completed yet, and to
2948  * accomplish that, the object's kernel fences must be signaled with
2949  * the object lock held.
2950  *
2951  * Return: 0 on success. Negative error code on failure. In particular may
2952  * return -EINTR or -ERESTARTSYS if signal pending.
2953  */
2954 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2955 {
2956 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2957 	struct ttm_operation_ctx ctx = {
2958 		.interruptible = true,
2959 		.no_wait_gpu = false,
2960 		.gfp_retry_mayfail = true,
2961 	};
2962 	struct ttm_placement placement;
2963 	struct ttm_place requested;
2964 
2965 	xe_bo_assert_held(bo);
2966 
2967 	if (bo->ttm.resource->mem_type == mem_type)
2968 		return 0;
2969 
2970 	if (xe_bo_is_pinned(bo))
2971 		return -EBUSY;
2972 
2973 	if (!xe_bo_can_migrate(bo, mem_type))
2974 		return -EINVAL;
2975 
2976 	xe_place_from_ttm_type(mem_type, &requested);
2977 	placement.num_placement = 1;
2978 	placement.placement = &requested;
2979 
2980 	/*
2981 	 * Stolen needs to be handled like below VRAM handling if we ever need
2982 	 * to support it.
2983 	 */
2984 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2985 
2986 	if (mem_type_is_vram(mem_type)) {
2987 		u32 c = 0;
2988 
2989 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2990 	}
2991 
2992 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2993 }
2994 
2995 /**
2996  * xe_bo_evict - Evict an object to evict placement
2997  * @bo: The buffer object to migrate.
2998  *
2999  * On successful completion, the object memory will be moved to evict
3000  * placement. This function blocks until the object has been fully moved.
3001  *
3002  * Return: 0 on success. Negative error code on failure.
3003  */
3004 int xe_bo_evict(struct xe_bo *bo)
3005 {
3006 	struct ttm_operation_ctx ctx = {
3007 		.interruptible = false,
3008 		.no_wait_gpu = false,
3009 		.gfp_retry_mayfail = true,
3010 	};
3011 	struct ttm_placement placement;
3012 	int ret;
3013 
3014 	xe_evict_flags(&bo->ttm, &placement);
3015 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3016 	if (ret)
3017 		return ret;
3018 
3019 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3020 			      false, MAX_SCHEDULE_TIMEOUT);
3021 
3022 	return 0;
3023 }
3024 
3025 /**
3026  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3027  * placed in system memory.
3028  * @bo: The xe_bo
3029  *
3030  * Return: true if extra pages need to be allocated, false otherwise.
3031  */
3032 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3033 {
3034 	struct xe_device *xe = xe_bo_device(bo);
3035 
3036 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3037 		return false;
3038 
3039 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3040 		return false;
3041 
3042 	/* On discrete GPUs, if the GPU can access this buffer from
3043 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3044 	 * can't be used since there's no CCS storage associated with
3045 	 * non-VRAM addresses.
3046 	 */
3047 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3048 		return false;
3049 
3050 	/*
3051 	 * Compression implies coh_none, therefore we know for sure that WB
3052 	 * memory can't currently use compression, which is likely one of the
3053 	 * common cases.
3054 	 */
3055 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
3056 		return false;
3057 
3058 	return true;
3059 }
3060 
3061 /**
3062  * __xe_bo_release_dummy() - Dummy kref release function
3063  * @kref: The embedded struct kref.
3064  *
3065  * Dummy release function for xe_bo_put_deferred(). Keep off.
3066  */
3067 void __xe_bo_release_dummy(struct kref *kref)
3068 {
3069 }
3070 
3071 /**
3072  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3073  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3074  *
3075  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3076  * The @deferred list can be either an onstack local list or a global
3077  * shared list used by a workqueue.
3078  */
3079 void xe_bo_put_commit(struct llist_head *deferred)
3080 {
3081 	struct llist_node *freed;
3082 	struct xe_bo *bo, *next;
3083 
3084 	if (!deferred)
3085 		return;
3086 
3087 	freed = llist_del_all(deferred);
3088 	if (!freed)
3089 		return;
3090 
3091 	llist_for_each_entry_safe(bo, next, freed, freed)
3092 		drm_gem_object_free(&bo->ttm.base.refcount);
3093 }
3094 
3095 static void xe_bo_dev_work_func(struct work_struct *work)
3096 {
3097 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3098 
3099 	xe_bo_put_commit(&bo_dev->async_list);
3100 }
3101 
3102 /**
3103  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3104  * @bo_dev: The BO dev structure
3105  */
3106 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3107 {
3108 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3109 }
3110 
3111 /**
3112  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3113  * @bo_dev: The BO dev structure
3114  */
3115 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3116 {
3117 	flush_work(&bo_dev->async_free);
3118 }
3119 
3120 void xe_bo_put(struct xe_bo *bo)
3121 {
3122 	struct xe_tile *tile;
3123 	u8 id;
3124 
3125 	might_sleep();
3126 	if (bo) {
3127 #ifdef CONFIG_PROC_FS
3128 		if (bo->client)
3129 			might_lock(&bo->client->bos_lock);
3130 #endif
3131 		for_each_tile(tile, xe_bo_device(bo), id)
3132 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
3133 				xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
3134 		drm_gem_object_put(&bo->ttm.base);
3135 	}
3136 }
3137 
3138 /**
3139  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3140  * @file_priv: ...
3141  * @dev: ...
3142  * @args: ...
3143  *
3144  * See dumb_create() hook in include/drm/drm_drv.h
3145  *
3146  * Return: ...
3147  */
3148 int xe_bo_dumb_create(struct drm_file *file_priv,
3149 		      struct drm_device *dev,
3150 		      struct drm_mode_create_dumb *args)
3151 {
3152 	struct xe_device *xe = to_xe_device(dev);
3153 	struct xe_bo *bo;
3154 	uint32_t handle;
3155 	int cpp = DIV_ROUND_UP(args->bpp, 8);
3156 	int err;
3157 	u32 page_size = max_t(u32, PAGE_SIZE,
3158 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3159 
3160 	args->pitch = ALIGN(args->width * cpp, 64);
3161 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
3162 			   page_size);
3163 
3164 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
3165 			       DRM_XE_GEM_CPU_CACHING_WC,
3166 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3167 			       XE_BO_FLAG_SCANOUT |
3168 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
3169 	if (IS_ERR(bo))
3170 		return PTR_ERR(bo);
3171 
3172 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3173 	/* drop reference from allocate - handle holds it now */
3174 	drm_gem_object_put(&bo->ttm.base);
3175 	if (!err)
3176 		args->handle = handle;
3177 	return err;
3178 }
3179 
3180 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3181 {
3182 	struct ttm_buffer_object *tbo = &bo->ttm;
3183 	struct ttm_device *bdev = tbo->bdev;
3184 
3185 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3186 
3187 	list_del_init(&bo->vram_userfault_link);
3188 }
3189 
3190 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3191 #include "tests/xe_bo.c"
3192 #endif
3193