xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_dumb_buffers.h>
13 #include <drm/drm_gem_ttm_helper.h>
14 #include <drm/drm_managed.h>
15 #include <drm/ttm/ttm_backup.h>
16 #include <drm/ttm/ttm_device.h>
17 #include <drm/ttm/ttm_placement.h>
18 #include <drm/ttm/ttm_tt.h>
19 #include <uapi/drm/xe_drm.h>
20 
21 #include <kunit/static_stub.h>
22 
23 #include <trace/events/gpu_mem.h>
24 
25 #include "xe_device.h"
26 #include "xe_dma_buf.h"
27 #include "xe_drm_client.h"
28 #include "xe_ggtt.h"
29 #include "xe_gt.h"
30 #include "xe_map.h"
31 #include "xe_migrate.h"
32 #include "xe_pm.h"
33 #include "xe_preempt_fence.h"
34 #include "xe_pxp.h"
35 #include "xe_res_cursor.h"
36 #include "xe_shrinker.h"
37 #include "xe_sriov_vf_ccs.h"
38 #include "xe_tile.h"
39 #include "xe_trace_bo.h"
40 #include "xe_ttm_stolen_mgr.h"
41 #include "xe_vm.h"
42 #include "xe_vram_types.h"
43 
44 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
45 	[XE_PL_SYSTEM] = "system",
46 	[XE_PL_TT] = "gtt",
47 	[XE_PL_VRAM0] = "vram0",
48 	[XE_PL_VRAM1] = "vram1",
49 	[XE_PL_STOLEN] = "stolen"
50 };
51 
52 static const struct ttm_place sys_placement_flags = {
53 	.fpfn = 0,
54 	.lpfn = 0,
55 	.mem_type = XE_PL_SYSTEM,
56 	.flags = 0,
57 };
58 
59 static struct ttm_placement sys_placement = {
60 	.num_placement = 1,
61 	.placement = &sys_placement_flags,
62 };
63 
64 static struct ttm_placement purge_placement;
65 
66 static const struct ttm_place tt_placement_flags[] = {
67 	{
68 		.fpfn = 0,
69 		.lpfn = 0,
70 		.mem_type = XE_PL_TT,
71 		.flags = TTM_PL_FLAG_DESIRED,
72 	},
73 	{
74 		.fpfn = 0,
75 		.lpfn = 0,
76 		.mem_type = XE_PL_SYSTEM,
77 		.flags = TTM_PL_FLAG_FALLBACK,
78 	}
79 };
80 
81 static struct ttm_placement tt_placement = {
82 	.num_placement = 2,
83 	.placement = tt_placement_flags,
84 };
85 
86 #define for_each_set_bo_vram_flag(bit__, bo_flags__) \
87 	for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \
88 		for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK)
89 
90 bool mem_type_is_vram(u32 mem_type)
91 {
92 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
93 }
94 
95 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
96 {
97 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
98 }
99 
100 static bool resource_is_vram(struct ttm_resource *res)
101 {
102 	return mem_type_is_vram(res->mem_type);
103 }
104 
105 bool xe_bo_is_vram(struct xe_bo *bo)
106 {
107 	return resource_is_vram(bo->ttm.resource) ||
108 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
109 }
110 
111 bool xe_bo_is_stolen(struct xe_bo *bo)
112 {
113 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
114 }
115 
116 /**
117  * xe_bo_has_single_placement - check if BO is placed only in one memory location
118  * @bo: The BO
119  *
120  * This function checks whether a given BO is placed in only one memory location.
121  *
122  * Returns: true if the BO is placed in a single memory location, false otherwise.
123  *
124  */
125 bool xe_bo_has_single_placement(struct xe_bo *bo)
126 {
127 	return bo->placement.num_placement == 1;
128 }
129 
130 /**
131  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
132  * @bo: The BO
133  *
134  * The stolen memory is accessed through the PCI BAR for both DGFX and some
135  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
136  *
137  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
138  */
139 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
140 {
141 	return xe_bo_is_stolen(bo) &&
142 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
143 }
144 
145 /**
146  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
147  * @bo: The BO
148  *
149  * Check if a given bo is bound through VM_BIND. This requires the
150  * reservation lock for the BO to be held.
151  *
152  * Returns: boolean
153  */
154 bool xe_bo_is_vm_bound(struct xe_bo *bo)
155 {
156 	xe_bo_assert_held(bo);
157 
158 	return !list_empty(&bo->ttm.base.gpuva.list);
159 }
160 
161 static bool xe_bo_is_user(struct xe_bo *bo)
162 {
163 	return bo->flags & XE_BO_FLAG_USER;
164 }
165 
166 static struct xe_migrate *
167 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
168 {
169 	struct xe_tile *tile;
170 
171 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
172 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
173 	return tile->migrate;
174 }
175 
176 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
177 {
178 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
179 	struct ttm_resource_manager *mgr;
180 	struct xe_ttm_vram_mgr *vram_mgr;
181 
182 	xe_assert(xe, resource_is_vram(res));
183 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
184 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
185 
186 	return container_of(vram_mgr, struct xe_vram_region, ttm);
187 }
188 
189 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
190 			   u32 bo_flags, u32 *c)
191 {
192 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
193 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
194 
195 		bo->placements[*c] = (struct ttm_place) {
196 			.mem_type = XE_PL_TT,
197 			.flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
198 			TTM_PL_FLAG_FALLBACK : 0,
199 		};
200 		*c += 1;
201 	}
202 }
203 
204 static bool force_contiguous(u32 bo_flags)
205 {
206 	if (bo_flags & XE_BO_FLAG_STOLEN)
207 		return true; /* users expect this */
208 	else if (bo_flags & XE_BO_FLAG_PINNED &&
209 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
210 		return true; /* needs vmap */
211 	else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
212 		return true;
213 
214 	/*
215 	 * For eviction / restore on suspend / resume objects pinned in VRAM
216 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
217 	 */
218 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
219 	       bo_flags & XE_BO_FLAG_PINNED;
220 }
221 
222 static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag)
223 {
224 	xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK);
225 	xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0);
226 
227 	return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1;
228 }
229 
230 static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag,
231 					   enum ttm_bo_type type)
232 {
233 	u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag);
234 
235 	xe_assert(xe, tile_id < xe->info.tile_count);
236 
237 	if (type == ttm_bo_type_kernel && !(bo_flags & XE_BO_FLAG_FORCE_USER_VRAM))
238 		return xe->tiles[tile_id].mem.kernel_vram->placement;
239 	else
240 		return xe->tiles[tile_id].mem.vram->placement;
241 }
242 
243 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
244 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
245 {
246 	struct ttm_place place = { .mem_type = mem_type };
247 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
248 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
249 
250 	struct xe_vram_region *vram;
251 	u64 io_size;
252 
253 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
254 
255 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
256 	xe_assert(xe, vram && vram->usable_size);
257 	io_size = vram->io_size;
258 
259 	if (force_contiguous(bo_flags))
260 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
261 
262 	if (io_size < vram->usable_size) {
263 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
264 			place.fpfn = 0;
265 			place.lpfn = io_size >> PAGE_SHIFT;
266 		} else {
267 			place.flags |= TTM_PL_FLAG_TOPDOWN;
268 		}
269 	}
270 	places[*c] = place;
271 	*c += 1;
272 }
273 
274 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
275 			 u32 bo_flags, enum ttm_bo_type type, u32 *c)
276 {
277 	u32 vram_flag;
278 
279 	for_each_set_bo_vram_flag(vram_flag, bo_flags) {
280 		u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type);
281 
282 		add_vram(xe, bo, bo->placements, bo_flags, pl, c);
283 	}
284 }
285 
286 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
287 			   u32 bo_flags, u32 *c)
288 {
289 	if (bo_flags & XE_BO_FLAG_STOLEN) {
290 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
291 
292 		bo->placements[*c] = (struct ttm_place) {
293 			.mem_type = XE_PL_STOLEN,
294 			.flags = force_contiguous(bo_flags) ?
295 				TTM_PL_FLAG_CONTIGUOUS : 0,
296 		};
297 		*c += 1;
298 	}
299 }
300 
301 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
302 				       u32 bo_flags, enum ttm_bo_type type)
303 {
304 	u32 c = 0;
305 
306 	try_add_vram(xe, bo, bo_flags, type, &c);
307 	try_add_system(xe, bo, bo_flags, &c);
308 	try_add_stolen(xe, bo, bo_flags, &c);
309 
310 	if (!c)
311 		return -EINVAL;
312 
313 	bo->placement = (struct ttm_placement) {
314 		.num_placement = c,
315 		.placement = bo->placements,
316 	};
317 
318 	return 0;
319 }
320 
321 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
322 			      u32 bo_flags, enum ttm_bo_type type)
323 {
324 	xe_bo_assert_held(bo);
325 	return __xe_bo_placement_for_flags(xe, bo, bo_flags, type);
326 }
327 
328 static void xe_evict_flags(struct ttm_buffer_object *tbo,
329 			   struct ttm_placement *placement)
330 {
331 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
332 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
333 	struct xe_bo *bo;
334 
335 	if (!xe_bo_is_xe_bo(tbo)) {
336 		/* Don't handle scatter gather BOs */
337 		if (tbo->type == ttm_bo_type_sg) {
338 			placement->num_placement = 0;
339 			return;
340 		}
341 
342 		*placement = device_unplugged ? purge_placement : sys_placement;
343 		return;
344 	}
345 
346 	bo = ttm_to_xe_bo(tbo);
347 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
348 		*placement = sys_placement;
349 		return;
350 	}
351 
352 	if (device_unplugged && !tbo->base.dma_buf) {
353 		*placement = purge_placement;
354 		return;
355 	}
356 
357 	/*
358 	 * For xe, sg bos that are evicted to system just triggers a
359 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
360 	 */
361 	switch (tbo->resource->mem_type) {
362 	case XE_PL_VRAM0:
363 	case XE_PL_VRAM1:
364 	case XE_PL_STOLEN:
365 		*placement = tt_placement;
366 		break;
367 	case XE_PL_TT:
368 	default:
369 		*placement = sys_placement;
370 		break;
371 	}
372 }
373 
374 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
375 struct xe_ttm_tt {
376 	struct ttm_tt ttm;
377 	struct sg_table sgt;
378 	struct sg_table *sg;
379 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
380 	bool purgeable;
381 };
382 
383 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
384 {
385 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
386 	unsigned long num_pages = tt->num_pages;
387 	int ret;
388 
389 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
390 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
391 
392 	if (xe_tt->sg)
393 		return 0;
394 
395 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
396 						num_pages, 0,
397 						(u64)num_pages << PAGE_SHIFT,
398 						xe_sg_segment_size(xe->drm.dev),
399 						GFP_KERNEL);
400 	if (ret)
401 		return ret;
402 
403 	xe_tt->sg = &xe_tt->sgt;
404 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
405 			      DMA_ATTR_SKIP_CPU_SYNC);
406 	if (ret) {
407 		sg_free_table(xe_tt->sg);
408 		xe_tt->sg = NULL;
409 		return ret;
410 	}
411 
412 	return 0;
413 }
414 
415 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
416 {
417 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
418 
419 	if (xe_tt->sg) {
420 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
421 				  DMA_BIDIRECTIONAL, 0);
422 		sg_free_table(xe_tt->sg);
423 		xe_tt->sg = NULL;
424 	}
425 }
426 
427 struct sg_table *xe_bo_sg(struct xe_bo *bo)
428 {
429 	struct ttm_tt *tt = bo->ttm.ttm;
430 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
431 
432 	return xe_tt->sg;
433 }
434 
435 /*
436  * Account ttm pages against the device shrinker's shrinkable and
437  * purgeable counts.
438  */
439 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
440 {
441 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
442 
443 	if (xe_tt->purgeable)
444 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
445 	else
446 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
447 }
448 
449 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
450 {
451 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
452 
453 	if (xe_tt->purgeable)
454 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
455 	else
456 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
457 }
458 
459 static void update_global_total_pages(struct ttm_device *ttm_dev,
460 				      long num_pages)
461 {
462 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
463 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
464 	u64 global_total_pages =
465 		atomic64_add_return(num_pages, &xe->global_total_pages);
466 
467 	trace_gpu_mem_total(xe->drm.primary->index, 0,
468 			    global_total_pages << PAGE_SHIFT);
469 #endif
470 }
471 
472 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
473 				       u32 page_flags)
474 {
475 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
476 	struct xe_device *xe = xe_bo_device(bo);
477 	struct xe_ttm_tt *xe_tt;
478 	struct ttm_tt *tt;
479 	unsigned long extra_pages;
480 	enum ttm_caching caching = ttm_cached;
481 	int err;
482 
483 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
484 	if (!xe_tt)
485 		return NULL;
486 
487 	tt = &xe_tt->ttm;
488 
489 	extra_pages = 0;
490 	if (xe_bo_needs_ccs_pages(bo))
491 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
492 					   PAGE_SIZE);
493 
494 	/*
495 	 * DGFX system memory is always WB / ttm_cached, since
496 	 * other caching modes are only supported on x86. DGFX
497 	 * GPU system memory accesses are always coherent with the
498 	 * CPU.
499 	 */
500 	if (!IS_DGFX(xe)) {
501 		switch (bo->cpu_caching) {
502 		case DRM_XE_GEM_CPU_CACHING_WC:
503 			caching = ttm_write_combined;
504 			break;
505 		default:
506 			caching = ttm_cached;
507 			break;
508 		}
509 
510 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
511 
512 		/*
513 		 * Display scanout is always non-coherent with the CPU cache.
514 		 *
515 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
516 		 * non-coherent and require a CPU:WC mapping.
517 		 */
518 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
519 		    (xe->info.graphics_verx100 >= 1270 &&
520 		     bo->flags & XE_BO_FLAG_PAGETABLE))
521 			caching = ttm_write_combined;
522 	}
523 
524 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
525 		/*
526 		 * Valid only for internally-created buffers only, for
527 		 * which cpu_caching is never initialized.
528 		 */
529 		xe_assert(xe, bo->cpu_caching == 0);
530 		caching = ttm_uncached;
531 	}
532 
533 	if (ttm_bo->type != ttm_bo_type_sg)
534 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
535 
536 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
537 	if (err) {
538 		kfree(xe_tt);
539 		return NULL;
540 	}
541 
542 	if (ttm_bo->type != ttm_bo_type_sg) {
543 		err = ttm_tt_setup_backup(tt);
544 		if (err) {
545 			ttm_tt_fini(tt);
546 			kfree(xe_tt);
547 			return NULL;
548 		}
549 	}
550 
551 	return tt;
552 }
553 
554 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
555 			      struct ttm_operation_ctx *ctx)
556 {
557 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
558 	int err;
559 
560 	/*
561 	 * dma-bufs are not populated with pages, and the dma-
562 	 * addresses are set up when moved to XE_PL_TT.
563 	 */
564 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
565 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
566 		return 0;
567 
568 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
569 		err = ttm_tt_restore(ttm_dev, tt, ctx);
570 	} else {
571 		ttm_tt_clear_backed_up(tt);
572 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
573 	}
574 	if (err)
575 		return err;
576 
577 	xe_tt->purgeable = false;
578 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
579 	update_global_total_pages(ttm_dev, tt->num_pages);
580 
581 	return 0;
582 }
583 
584 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
585 {
586 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
587 
588 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
589 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
590 		return;
591 
592 	xe_tt_unmap_sg(xe, tt);
593 
594 	ttm_pool_free(&ttm_dev->pool, tt);
595 	xe_ttm_tt_account_subtract(xe, tt);
596 	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
597 }
598 
599 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
600 {
601 	ttm_tt_fini(tt);
602 	kfree(tt);
603 }
604 
605 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
606 {
607 	struct xe_ttm_vram_mgr_resource *vres =
608 		to_xe_ttm_vram_mgr_resource(mem);
609 
610 	return vres->used_visible_size == mem->size;
611 }
612 
613 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
614 				 struct ttm_resource *mem)
615 {
616 	struct xe_device *xe = ttm_to_xe_device(bdev);
617 
618 	switch (mem->mem_type) {
619 	case XE_PL_SYSTEM:
620 	case XE_PL_TT:
621 		return 0;
622 	case XE_PL_VRAM0:
623 	case XE_PL_VRAM1: {
624 		struct xe_vram_region *vram = res_to_mem_region(mem);
625 
626 		if (!xe_ttm_resource_visible(mem))
627 			return -EINVAL;
628 
629 		mem->bus.offset = mem->start << PAGE_SHIFT;
630 
631 		if (vram->mapping &&
632 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
633 			mem->bus.addr = (u8 __force *)vram->mapping +
634 				mem->bus.offset;
635 
636 		mem->bus.offset += vram->io_start;
637 		mem->bus.is_iomem = true;
638 
639 #if  !IS_ENABLED(CONFIG_X86)
640 		mem->bus.caching = ttm_write_combined;
641 #endif
642 		return 0;
643 	} case XE_PL_STOLEN:
644 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
645 	default:
646 		return -EINVAL;
647 	}
648 }
649 
650 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
651 				const struct ttm_operation_ctx *ctx)
652 {
653 	struct dma_resv_iter cursor;
654 	struct dma_fence *fence;
655 	struct drm_gem_object *obj = &bo->ttm.base;
656 	struct drm_gpuvm_bo *vm_bo;
657 	bool idle = false;
658 	int ret = 0;
659 
660 	dma_resv_assert_held(bo->ttm.base.resv);
661 
662 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
663 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
664 				    DMA_RESV_USAGE_BOOKKEEP);
665 		dma_resv_for_each_fence_unlocked(&cursor, fence)
666 			dma_fence_enable_sw_signaling(fence);
667 		dma_resv_iter_end(&cursor);
668 	}
669 
670 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
671 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
672 		struct drm_gpuva *gpuva;
673 
674 		if (!xe_vm_in_fault_mode(vm)) {
675 			drm_gpuvm_bo_evict(vm_bo, true);
676 			continue;
677 		}
678 
679 		if (!idle) {
680 			long timeout;
681 
682 			if (ctx->no_wait_gpu &&
683 			    !dma_resv_test_signaled(bo->ttm.base.resv,
684 						    DMA_RESV_USAGE_BOOKKEEP))
685 				return -EBUSY;
686 
687 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
688 							DMA_RESV_USAGE_BOOKKEEP,
689 							ctx->interruptible,
690 							MAX_SCHEDULE_TIMEOUT);
691 			if (!timeout)
692 				return -ETIME;
693 			if (timeout < 0)
694 				return timeout;
695 
696 			idle = true;
697 		}
698 
699 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
700 			struct xe_vma *vma = gpuva_to_vma(gpuva);
701 
702 			trace_xe_vma_evict(vma);
703 			ret = xe_vm_invalidate_vma(vma);
704 			if (XE_WARN_ON(ret))
705 				return ret;
706 		}
707 	}
708 
709 	return ret;
710 }
711 
712 /*
713  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
714  * Note that unmapping the attachment is deferred to the next
715  * map_attachment time, or to bo destroy (after idling) whichever comes first.
716  * This is to avoid syncing before unmap_attachment(), assuming that the
717  * caller relies on idling the reservation object before moving the
718  * backing store out. Should that assumption not hold, then we will be able
719  * to unconditionally call unmap_attachment() when moving out to system.
720  */
721 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
722 			     struct ttm_resource *new_res)
723 {
724 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
725 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
726 					       ttm);
727 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
728 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
729 	struct sg_table *sg;
730 
731 	xe_assert(xe, attach);
732 	xe_assert(xe, ttm_bo->ttm);
733 
734 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
735 	    ttm_bo->sg) {
736 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
737 				      false, MAX_SCHEDULE_TIMEOUT);
738 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
739 		ttm_bo->sg = NULL;
740 	}
741 
742 	if (new_res->mem_type == XE_PL_SYSTEM)
743 		goto out;
744 
745 	if (ttm_bo->sg) {
746 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
747 		ttm_bo->sg = NULL;
748 	}
749 
750 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
751 	if (IS_ERR(sg))
752 		return PTR_ERR(sg);
753 
754 	ttm_bo->sg = sg;
755 	xe_tt->sg = sg;
756 
757 out:
758 	ttm_bo_move_null(ttm_bo, new_res);
759 
760 	return 0;
761 }
762 
763 /**
764  * xe_bo_move_notify - Notify subsystems of a pending move
765  * @bo: The buffer object
766  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
767  *
768  * This function notifies subsystems of an upcoming buffer move.
769  * Upon receiving such a notification, subsystems should schedule
770  * halting access to the underlying pages and optionally add a fence
771  * to the buffer object's dma_resv object, that signals when access is
772  * stopped. The caller will wait on all dma_resv fences before
773  * starting the move.
774  *
775  * A subsystem may commence access to the object after obtaining
776  * bindings to the new backing memory under the object lock.
777  *
778  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
779  * negative error code on error.
780  */
781 static int xe_bo_move_notify(struct xe_bo *bo,
782 			     const struct ttm_operation_ctx *ctx)
783 {
784 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
785 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
786 	struct ttm_resource *old_mem = ttm_bo->resource;
787 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
788 	int ret;
789 
790 	/*
791 	 * If this starts to call into many components, consider
792 	 * using a notification chain here.
793 	 */
794 
795 	if (xe_bo_is_pinned(bo))
796 		return -EINVAL;
797 
798 	xe_bo_vunmap(bo);
799 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
800 	if (ret)
801 		return ret;
802 
803 	/* Don't call move_notify() for imported dma-bufs. */
804 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
805 		dma_buf_move_notify(ttm_bo->base.dma_buf);
806 
807 	/*
808 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
809 	 * so if we moved from VRAM make sure to unlink this from the userfault
810 	 * tracking.
811 	 */
812 	if (mem_type_is_vram(old_mem_type)) {
813 		mutex_lock(&xe->mem_access.vram_userfault.lock);
814 		if (!list_empty(&bo->vram_userfault_link))
815 			list_del_init(&bo->vram_userfault_link);
816 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
817 	}
818 
819 	return 0;
820 }
821 
822 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
823 		      struct ttm_operation_ctx *ctx,
824 		      struct ttm_resource *new_mem,
825 		      struct ttm_place *hop)
826 {
827 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
828 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
829 	struct ttm_resource *old_mem = ttm_bo->resource;
830 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
831 	struct ttm_tt *ttm = ttm_bo->ttm;
832 	struct xe_migrate *migrate = NULL;
833 	struct dma_fence *fence;
834 	bool move_lacks_source;
835 	bool tt_has_data;
836 	bool needs_clear;
837 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
838 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
839 	int ret = 0;
840 
841 	/* Bo creation path, moving to system or TT. */
842 	if ((!old_mem && ttm) && !handle_system_ccs) {
843 		if (new_mem->mem_type == XE_PL_TT)
844 			ret = xe_tt_map_sg(xe, ttm);
845 		if (!ret)
846 			ttm_bo_move_null(ttm_bo, new_mem);
847 		goto out;
848 	}
849 
850 	if (ttm_bo->type == ttm_bo_type_sg) {
851 		if (new_mem->mem_type == XE_PL_SYSTEM)
852 			ret = xe_bo_move_notify(bo, ctx);
853 		if (!ret)
854 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
855 		return ret;
856 	}
857 
858 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
859 
860 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
861 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
862 
863 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
864 		(!ttm && ttm_bo->type == ttm_bo_type_device);
865 
866 	if (new_mem->mem_type == XE_PL_TT) {
867 		ret = xe_tt_map_sg(xe, ttm);
868 		if (ret)
869 			goto out;
870 	}
871 
872 	if ((move_lacks_source && !needs_clear)) {
873 		ttm_bo_move_null(ttm_bo, new_mem);
874 		goto out;
875 	}
876 
877 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
878 	    new_mem->mem_type == XE_PL_SYSTEM) {
879 		ret = xe_svm_bo_evict(bo);
880 		if (!ret) {
881 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
882 			ttm_bo_move_null(ttm_bo, new_mem);
883 		} else {
884 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
885 				ERR_PTR(ret));
886 		}
887 
888 		goto out;
889 	}
890 
891 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
892 		ttm_bo_move_null(ttm_bo, new_mem);
893 		goto out;
894 	}
895 
896 	/*
897 	 * Failed multi-hop where the old_mem is still marked as
898 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
899 	 */
900 	if (old_mem_type == XE_PL_TT &&
901 	    new_mem->mem_type == XE_PL_TT) {
902 		ttm_bo_move_null(ttm_bo, new_mem);
903 		goto out;
904 	}
905 
906 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
907 		ret = xe_bo_move_notify(bo, ctx);
908 		if (ret)
909 			goto out;
910 	}
911 
912 	if (old_mem_type == XE_PL_TT &&
913 	    new_mem->mem_type == XE_PL_SYSTEM) {
914 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
915 						     DMA_RESV_USAGE_BOOKKEEP,
916 						     false,
917 						     MAX_SCHEDULE_TIMEOUT);
918 		if (timeout < 0) {
919 			ret = timeout;
920 			goto out;
921 		}
922 
923 		if (!handle_system_ccs) {
924 			ttm_bo_move_null(ttm_bo, new_mem);
925 			goto out;
926 		}
927 	}
928 
929 	if (!move_lacks_source &&
930 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
931 	     (mem_type_is_vram(old_mem_type) &&
932 	      new_mem->mem_type == XE_PL_SYSTEM))) {
933 		hop->fpfn = 0;
934 		hop->lpfn = 0;
935 		hop->mem_type = XE_PL_TT;
936 		hop->flags = TTM_PL_FLAG_TEMPORARY;
937 		ret = -EMULTIHOP;
938 		goto out;
939 	}
940 
941 	if (bo->tile)
942 		migrate = bo->tile->migrate;
943 	else if (resource_is_vram(new_mem))
944 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
945 	else if (mem_type_is_vram(old_mem_type))
946 		migrate = mem_type_to_migrate(xe, old_mem_type);
947 	else
948 		migrate = xe->tiles[0].migrate;
949 
950 	xe_assert(xe, migrate);
951 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
952 	if (xe_rpm_reclaim_safe(xe)) {
953 		/*
954 		 * We might be called through swapout in the validation path of
955 		 * another TTM device, so acquire rpm here.
956 		 */
957 		xe_pm_runtime_get(xe);
958 	} else {
959 		drm_WARN_ON(&xe->drm, handle_system_ccs);
960 		xe_pm_runtime_get_noresume(xe);
961 	}
962 
963 	if (move_lacks_source) {
964 		u32 flags = 0;
965 
966 		if (mem_type_is_vram(new_mem->mem_type))
967 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
968 		else if (handle_system_ccs)
969 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
970 
971 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
972 	} else {
973 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
974 					handle_system_ccs);
975 	}
976 	if (IS_ERR(fence)) {
977 		ret = PTR_ERR(fence);
978 		xe_pm_runtime_put(xe);
979 		goto out;
980 	}
981 	if (!move_lacks_source) {
982 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
983 						new_mem);
984 		if (ret) {
985 			dma_fence_wait(fence, false);
986 			ttm_bo_move_null(ttm_bo, new_mem);
987 			ret = 0;
988 		}
989 	} else {
990 		/*
991 		 * ttm_bo_move_accel_cleanup() may blow up if
992 		 * bo->resource == NULL, so just attach the
993 		 * fence and set the new resource.
994 		 */
995 		dma_resv_add_fence(ttm_bo->base.resv, fence,
996 				   DMA_RESV_USAGE_KERNEL);
997 		ttm_bo_move_null(ttm_bo, new_mem);
998 	}
999 
1000 	dma_fence_put(fence);
1001 	xe_pm_runtime_put(xe);
1002 
1003 	/*
1004 	 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
1005 	 * BBs from BO as it is no longer needed.
1006 	 */
1007 	if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
1008 	    new_mem->mem_type == XE_PL_SYSTEM)
1009 		xe_sriov_vf_ccs_detach_bo(bo);
1010 
1011 	if (IS_VF_CCS_READY(xe) &&
1012 	    ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
1013 	     (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
1014 	    handle_system_ccs)
1015 		ret = xe_sriov_vf_ccs_attach_bo(bo);
1016 
1017 out:
1018 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
1019 	    ttm_bo->ttm) {
1020 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
1021 						     DMA_RESV_USAGE_KERNEL,
1022 						     false,
1023 						     MAX_SCHEDULE_TIMEOUT);
1024 		if (timeout < 0)
1025 			ret = timeout;
1026 
1027 		if (IS_VF_CCS_READY(xe))
1028 			xe_sriov_vf_ccs_detach_bo(bo);
1029 
1030 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
1031 	}
1032 
1033 	return ret;
1034 }
1035 
1036 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1037 			       struct ttm_buffer_object *bo,
1038 			       unsigned long *scanned)
1039 {
1040 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1041 	long lret;
1042 
1043 	/* Fake move to system, without copying data. */
1044 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
1045 		struct ttm_resource *new_resource;
1046 
1047 		lret = ttm_bo_wait_ctx(bo, ctx);
1048 		if (lret)
1049 			return lret;
1050 
1051 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1052 		if (lret)
1053 			return lret;
1054 
1055 		xe_tt_unmap_sg(xe, bo->ttm);
1056 		ttm_bo_move_null(bo, new_resource);
1057 	}
1058 
1059 	*scanned += bo->ttm->num_pages;
1060 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1061 			     {.purge = true,
1062 			      .writeback = false,
1063 			      .allow_move = false});
1064 
1065 	if (lret > 0)
1066 		xe_ttm_tt_account_subtract(xe, bo->ttm);
1067 
1068 	return lret;
1069 }
1070 
1071 static bool
1072 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1073 {
1074 	struct drm_gpuvm_bo *vm_bo;
1075 
1076 	if (!ttm_bo_eviction_valuable(bo, place))
1077 		return false;
1078 
1079 	if (!xe_bo_is_xe_bo(bo))
1080 		return true;
1081 
1082 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1083 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1084 			return false;
1085 	}
1086 
1087 	return true;
1088 }
1089 
1090 /**
1091  * xe_bo_shrink() - Try to shrink an xe bo.
1092  * @ctx: The struct ttm_operation_ctx used for shrinking.
1093  * @bo: The TTM buffer object whose pages to shrink.
1094  * @flags: Flags governing the shrink behaviour.
1095  * @scanned: Pointer to a counter of the number of pages
1096  * attempted to shrink.
1097  *
1098  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1099  * Note that we need to be able to handle also non xe bos
1100  * (ghost bos), but only if the struct ttm_tt is embedded in
1101  * a struct xe_ttm_tt. When the function attempts to shrink
1102  * the pages of a buffer object, The value pointed to by @scanned
1103  * is updated.
1104  *
1105  * Return: The number of pages shrunken or purged, or negative error
1106  * code on failure.
1107  */
1108 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1109 		  const struct xe_bo_shrink_flags flags,
1110 		  unsigned long *scanned)
1111 {
1112 	struct ttm_tt *tt = bo->ttm;
1113 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1114 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1115 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1116 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1117 	bool needs_rpm;
1118 	long lret = 0L;
1119 
1120 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1121 	    (flags.purge && !xe_tt->purgeable))
1122 		return -EBUSY;
1123 
1124 	if (!xe_bo_eviction_valuable(bo, &place))
1125 		return -EBUSY;
1126 
1127 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1128 		return xe_bo_shrink_purge(ctx, bo, scanned);
1129 
1130 	if (xe_tt->purgeable) {
1131 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1132 			lret = xe_bo_move_notify(xe_bo, ctx);
1133 		if (!lret)
1134 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1135 		goto out_unref;
1136 	}
1137 
1138 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1139 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1140 		     xe_bo_needs_ccs_pages(xe_bo));
1141 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1142 		goto out_unref;
1143 
1144 	*scanned += tt->num_pages;
1145 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1146 			     {.purge = false,
1147 			      .writeback = flags.writeback,
1148 			      .allow_move = true});
1149 	if (needs_rpm)
1150 		xe_pm_runtime_put(xe);
1151 
1152 	if (lret > 0)
1153 		xe_ttm_tt_account_subtract(xe, tt);
1154 
1155 out_unref:
1156 	xe_bo_put(xe_bo);
1157 
1158 	return lret;
1159 }
1160 
1161 /**
1162  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1163  * up in system memory.
1164  * @bo: The buffer object to prepare.
1165  *
1166  * On successful completion, the object backup pages are allocated. Expectation
1167  * is that this is called from the PM notifier, prior to suspend/hibernation.
1168  *
1169  * Return: 0 on success. Negative error code on failure.
1170  */
1171 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1172 {
1173 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1174 	struct xe_validation_ctx ctx;
1175 	struct drm_exec exec;
1176 	struct xe_bo *backup;
1177 	int ret = 0;
1178 
1179 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1180 		ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1181 		drm_exec_retry_on_contention(&exec);
1182 		xe_assert(xe, !ret);
1183 		xe_assert(xe, !bo->backup_obj);
1184 
1185 		/*
1186 		 * Since this is called from the PM notifier we might have raced with
1187 		 * someone unpinning this after we dropped the pinned list lock and
1188 		 * grabbing the above bo lock.
1189 		 */
1190 		if (!xe_bo_is_pinned(bo))
1191 			break;
1192 
1193 		if (!xe_bo_is_vram(bo))
1194 			break;
1195 
1196 		if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1197 			break;
1198 
1199 		backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1200 					   DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1201 					   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1202 					   XE_BO_FLAG_PINNED, &exec);
1203 		if (IS_ERR(backup)) {
1204 			drm_exec_retry_on_contention(&exec);
1205 			ret = PTR_ERR(backup);
1206 			xe_validation_retry_on_oom(&ctx, &ret);
1207 			break;
1208 		}
1209 
1210 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1211 		ttm_bo_pin(&backup->ttm);
1212 		bo->backup_obj = backup;
1213 	}
1214 
1215 	return ret;
1216 }
1217 
1218 /**
1219  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1220  * @bo: The buffer object to undo the prepare for.
1221  *
1222  * Always returns 0. The backup object is removed, if still present. Expectation
1223  * it that this called from the PM notifier when undoing the prepare step.
1224  *
1225  * Return: Always returns 0.
1226  */
1227 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1228 {
1229 	xe_bo_lock(bo, false);
1230 	if (bo->backup_obj) {
1231 		ttm_bo_unpin(&bo->backup_obj->ttm);
1232 		xe_bo_put(bo->backup_obj);
1233 		bo->backup_obj = NULL;
1234 	}
1235 	xe_bo_unlock(bo);
1236 
1237 	return 0;
1238 }
1239 
1240 static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
1241 {
1242 	struct xe_device *xe = xe_bo_device(bo);
1243 	bool unmap = false;
1244 	int ret = 0;
1245 
1246 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1247 		struct xe_migrate *migrate;
1248 		struct dma_fence *fence;
1249 
1250 		if (bo->tile)
1251 			migrate = bo->tile->migrate;
1252 		else
1253 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1254 
1255 		xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
1256 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1257 		if (ret)
1258 			goto out_backup;
1259 
1260 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1261 					backup->ttm.resource, false);
1262 		if (IS_ERR(fence)) {
1263 			ret = PTR_ERR(fence);
1264 			goto out_backup;
1265 		}
1266 
1267 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1268 				   DMA_RESV_USAGE_KERNEL);
1269 		dma_fence_put(fence);
1270 	} else {
1271 		ret = xe_bo_vmap(backup);
1272 		if (ret)
1273 			goto out_backup;
1274 
1275 		if (iosys_map_is_null(&bo->vmap)) {
1276 			ret = xe_bo_vmap(bo);
1277 			if (ret)
1278 				goto out_vunmap;
1279 			unmap = true;
1280 		}
1281 
1282 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1283 				   xe_bo_size(bo));
1284 	}
1285 
1286 	if (!bo->backup_obj)
1287 		bo->backup_obj = backup;
1288 out_vunmap:
1289 	xe_bo_vunmap(backup);
1290 out_backup:
1291 	if (unmap)
1292 		xe_bo_vunmap(bo);
1293 
1294 	return ret;
1295 }
1296 
1297 /**
1298  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1299  * @bo: The buffer object to move.
1300  *
1301  * On successful completion, the object memory will be moved to system memory.
1302  *
1303  * This is needed to for special handling of pinned VRAM object during
1304  * suspend-resume.
1305  *
1306  * Return: 0 on success. Negative error code on failure.
1307  */
1308 int xe_bo_evict_pinned(struct xe_bo *bo)
1309 {
1310 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1311 	struct xe_validation_ctx ctx;
1312 	struct drm_exec exec;
1313 	struct xe_bo *backup = bo->backup_obj;
1314 	bool backup_created = false;
1315 	int ret = 0;
1316 
1317 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1318 		ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1319 		drm_exec_retry_on_contention(&exec);
1320 		xe_assert(xe, !ret);
1321 
1322 		if (WARN_ON(!bo->ttm.resource)) {
1323 			ret = -EINVAL;
1324 			break;
1325 		}
1326 
1327 		if (WARN_ON(!xe_bo_is_pinned(bo))) {
1328 			ret = -EINVAL;
1329 			break;
1330 		}
1331 
1332 		if (!xe_bo_is_vram(bo))
1333 			break;
1334 
1335 		if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1336 			break;
1337 
1338 		if (!backup) {
1339 			backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
1340 						   xe_bo_size(bo),
1341 						   DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1342 						   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1343 						   XE_BO_FLAG_PINNED, &exec);
1344 			if (IS_ERR(backup)) {
1345 				drm_exec_retry_on_contention(&exec);
1346 				ret = PTR_ERR(backup);
1347 				xe_validation_retry_on_oom(&ctx, &ret);
1348 				break;
1349 			}
1350 			backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1351 			backup_created = true;
1352 		}
1353 
1354 		ret = xe_bo_evict_pinned_copy(bo, backup);
1355 	}
1356 
1357 	if (ret && backup_created)
1358 		xe_bo_put(backup);
1359 
1360 	return ret;
1361 }
1362 
1363 /**
1364  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1365  * @bo: The buffer object to move.
1366  *
1367  * On successful completion, the object memory will be moved back to VRAM.
1368  *
1369  * This is needed to for special handling of pinned VRAM object during
1370  * suspend-resume.
1371  *
1372  * Return: 0 on success. Negative error code on failure.
1373  */
1374 int xe_bo_restore_pinned(struct xe_bo *bo)
1375 {
1376 	struct ttm_operation_ctx ctx = {
1377 		.interruptible = false,
1378 		.gfp_retry_mayfail = false,
1379 	};
1380 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1381 	struct xe_bo *backup = bo->backup_obj;
1382 	bool unmap = false;
1383 	int ret;
1384 
1385 	if (!backup)
1386 		return 0;
1387 
1388 	xe_bo_lock(bo, false);
1389 
1390 	if (!xe_bo_is_pinned(backup)) {
1391 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1392 		if (ret)
1393 			goto out_unlock_bo;
1394 	}
1395 
1396 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1397 		struct xe_migrate *migrate;
1398 		struct dma_fence *fence;
1399 
1400 		if (bo->tile)
1401 			migrate = bo->tile->migrate;
1402 		else
1403 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1404 
1405 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1406 		if (ret)
1407 			goto out_unlock_bo;
1408 
1409 		fence = xe_migrate_copy(migrate, backup, bo,
1410 					backup->ttm.resource, bo->ttm.resource,
1411 					false);
1412 		if (IS_ERR(fence)) {
1413 			ret = PTR_ERR(fence);
1414 			goto out_unlock_bo;
1415 		}
1416 
1417 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1418 				   DMA_RESV_USAGE_KERNEL);
1419 		dma_fence_put(fence);
1420 	} else {
1421 		ret = xe_bo_vmap(backup);
1422 		if (ret)
1423 			goto out_unlock_bo;
1424 
1425 		if (iosys_map_is_null(&bo->vmap)) {
1426 			ret = xe_bo_vmap(bo);
1427 			if (ret)
1428 				goto out_backup;
1429 			unmap = true;
1430 		}
1431 
1432 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1433 				 xe_bo_size(bo));
1434 	}
1435 
1436 	bo->backup_obj = NULL;
1437 
1438 out_backup:
1439 	xe_bo_vunmap(backup);
1440 	if (!bo->backup_obj) {
1441 		if (xe_bo_is_pinned(backup))
1442 			ttm_bo_unpin(&backup->ttm);
1443 		xe_bo_put(backup);
1444 	}
1445 out_unlock_bo:
1446 	if (unmap)
1447 		xe_bo_vunmap(bo);
1448 	xe_bo_unlock(bo);
1449 	return ret;
1450 }
1451 
1452 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1453 {
1454 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1455 	struct ttm_tt *tt = ttm_bo->ttm;
1456 
1457 	if (tt) {
1458 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1459 
1460 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1461 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1462 						 ttm_bo->sg,
1463 						 DMA_BIDIRECTIONAL);
1464 			ttm_bo->sg = NULL;
1465 			xe_tt->sg = NULL;
1466 		} else if (xe_tt->sg) {
1467 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1468 					  xe_tt->sg,
1469 					  DMA_BIDIRECTIONAL, 0);
1470 			sg_free_table(xe_tt->sg);
1471 			xe_tt->sg = NULL;
1472 		}
1473 	}
1474 
1475 	return 0;
1476 }
1477 
1478 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1479 				       unsigned long page_offset)
1480 {
1481 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1482 	struct xe_res_cursor cursor;
1483 	struct xe_vram_region *vram;
1484 
1485 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1486 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1487 
1488 	vram = res_to_mem_region(ttm_bo->resource);
1489 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1490 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1491 }
1492 
1493 static void __xe_bo_vunmap(struct xe_bo *bo);
1494 
1495 /*
1496  * TODO: Move this function to TTM so we don't rely on how TTM does its
1497  * locking, thereby abusing TTM internals.
1498  */
1499 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1500 {
1501 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1502 	bool locked;
1503 
1504 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1505 
1506 	/*
1507 	 * We can typically only race with TTM trylocking under the
1508 	 * lru_lock, which will immediately be unlocked again since
1509 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1510 	 * always succeed here, as long as we hold the lru lock.
1511 	 */
1512 	spin_lock(&ttm_bo->bdev->lru_lock);
1513 	locked = dma_resv_trylock(ttm_bo->base.resv);
1514 	spin_unlock(&ttm_bo->bdev->lru_lock);
1515 	xe_assert(xe, locked);
1516 
1517 	return locked;
1518 }
1519 
1520 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1521 {
1522 	struct dma_resv_iter cursor;
1523 	struct dma_fence *fence;
1524 	struct dma_fence *replacement = NULL;
1525 	struct xe_bo *bo;
1526 
1527 	if (!xe_bo_is_xe_bo(ttm_bo))
1528 		return;
1529 
1530 	bo = ttm_to_xe_bo(ttm_bo);
1531 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1532 
1533 	/*
1534 	 * Corner case where TTM fails to allocate memory and this BOs resv
1535 	 * still points the VMs resv
1536 	 */
1537 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1538 		return;
1539 
1540 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1541 		return;
1542 
1543 	/*
1544 	 * Scrub the preempt fences if any. The unbind fence is already
1545 	 * attached to the resv.
1546 	 * TODO: Don't do this for external bos once we scrub them after
1547 	 * unbind.
1548 	 */
1549 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1550 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1551 		if (xe_fence_is_xe_preempt(fence) &&
1552 		    !dma_fence_is_signaled(fence)) {
1553 			if (!replacement)
1554 				replacement = dma_fence_get_stub();
1555 
1556 			dma_resv_replace_fences(ttm_bo->base.resv,
1557 						fence->context,
1558 						replacement,
1559 						DMA_RESV_USAGE_BOOKKEEP);
1560 		}
1561 	}
1562 	dma_fence_put(replacement);
1563 
1564 	dma_resv_unlock(ttm_bo->base.resv);
1565 }
1566 
1567 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1568 {
1569 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1570 
1571 	if (!xe_bo_is_xe_bo(ttm_bo))
1572 		return;
1573 
1574 	if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
1575 		xe_sriov_vf_ccs_detach_bo(bo);
1576 
1577 	/*
1578 	 * Object is idle and about to be destroyed. Release the
1579 	 * dma-buf attachment.
1580 	 */
1581 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1582 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1583 						       struct xe_ttm_tt, ttm);
1584 
1585 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1586 					 DMA_BIDIRECTIONAL);
1587 		ttm_bo->sg = NULL;
1588 		xe_tt->sg = NULL;
1589 	}
1590 }
1591 
1592 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1593 {
1594 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1595 
1596 	if (ttm_bo->ttm) {
1597 		struct ttm_placement place = {};
1598 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1599 
1600 		drm_WARN_ON(&xe->drm, ret);
1601 	}
1602 }
1603 
1604 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1605 {
1606 	struct ttm_operation_ctx ctx = {
1607 		.interruptible = false,
1608 		.gfp_retry_mayfail = false,
1609 	};
1610 
1611 	if (ttm_bo->ttm) {
1612 		struct xe_ttm_tt *xe_tt =
1613 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1614 
1615 		if (xe_tt->purgeable)
1616 			xe_ttm_bo_purge(ttm_bo, &ctx);
1617 	}
1618 }
1619 
1620 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1621 				unsigned long offset, void *buf, int len,
1622 				int write)
1623 {
1624 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1625 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1626 	struct iosys_map vmap;
1627 	struct xe_res_cursor cursor;
1628 	struct xe_vram_region *vram;
1629 	int bytes_left = len;
1630 	int err = 0;
1631 
1632 	xe_bo_assert_held(bo);
1633 	xe_device_assert_mem_access(xe);
1634 
1635 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1636 		return -EIO;
1637 
1638 	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1639 		struct xe_migrate *migrate =
1640 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1641 
1642 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1643 					       write);
1644 		goto out;
1645 	}
1646 
1647 	vram = res_to_mem_region(ttm_bo->resource);
1648 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1649 		     xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1650 
1651 	do {
1652 		unsigned long page_offset = (offset & ~PAGE_MASK);
1653 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1654 
1655 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1656 					  cursor.start);
1657 		if (write)
1658 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1659 		else
1660 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1661 
1662 		buf += byte_count;
1663 		offset += byte_count;
1664 		bytes_left -= byte_count;
1665 		if (bytes_left)
1666 			xe_res_next(&cursor, PAGE_SIZE);
1667 	} while (bytes_left);
1668 
1669 out:
1670 	return err ?: len;
1671 }
1672 
1673 const struct ttm_device_funcs xe_ttm_funcs = {
1674 	.ttm_tt_create = xe_ttm_tt_create,
1675 	.ttm_tt_populate = xe_ttm_tt_populate,
1676 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1677 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1678 	.evict_flags = xe_evict_flags,
1679 	.move = xe_bo_move,
1680 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1681 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1682 	.access_memory = xe_ttm_access_memory,
1683 	.release_notify = xe_ttm_bo_release_notify,
1684 	.eviction_valuable = xe_bo_eviction_valuable,
1685 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1686 	.swap_notify = xe_ttm_bo_swap_notify,
1687 };
1688 
1689 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1690 {
1691 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1692 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1693 	struct xe_tile *tile;
1694 	u8 id;
1695 
1696 	if (bo->ttm.base.import_attach)
1697 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1698 	drm_gem_object_release(&bo->ttm.base);
1699 
1700 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1701 
1702 	for_each_tile(tile, xe, id)
1703 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1704 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1705 
1706 #ifdef CONFIG_PROC_FS
1707 	if (bo->client)
1708 		xe_drm_client_remove_bo(bo);
1709 #endif
1710 
1711 	if (bo->vm && xe_bo_is_user(bo))
1712 		xe_vm_put(bo->vm);
1713 
1714 	if (bo->parent_obj)
1715 		xe_bo_put(bo->parent_obj);
1716 
1717 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1718 	if (!list_empty(&bo->vram_userfault_link))
1719 		list_del(&bo->vram_userfault_link);
1720 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1721 
1722 	kfree(bo);
1723 }
1724 
1725 static void xe_gem_object_free(struct drm_gem_object *obj)
1726 {
1727 	/* Our BO reference counting scheme works as follows:
1728 	 *
1729 	 * The gem object kref is typically used throughout the driver,
1730 	 * and the gem object holds a ttm_buffer_object refcount, so
1731 	 * that when the last gem object reference is put, which is when
1732 	 * we end up in this function, we put also that ttm_buffer_object
1733 	 * refcount. Anything using gem interfaces is then no longer
1734 	 * allowed to access the object in a way that requires a gem
1735 	 * refcount, including locking the object.
1736 	 *
1737 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1738 	 * refcount directly if needed.
1739 	 */
1740 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1741 	ttm_bo_fini(container_of(obj, struct ttm_buffer_object, base));
1742 }
1743 
1744 static void xe_gem_object_close(struct drm_gem_object *obj,
1745 				struct drm_file *file_priv)
1746 {
1747 	struct xe_bo *bo = gem_to_xe_bo(obj);
1748 
1749 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1750 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1751 
1752 		xe_bo_lock(bo, false);
1753 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1754 		xe_bo_unlock(bo);
1755 	}
1756 }
1757 
1758 static bool should_migrate_to_smem(struct xe_bo *bo)
1759 {
1760 	/*
1761 	 * NOTE: The following atomic checks are platform-specific. For example,
1762 	 * if a device supports CXL atomics, these may not be necessary or
1763 	 * may behave differently.
1764 	 */
1765 
1766 	return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1767 	       bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1768 }
1769 
1770 static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
1771 {
1772 	long lerr;
1773 
1774 	if (ctx->no_wait_gpu)
1775 		return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
1776 			0 : -EBUSY;
1777 
1778 	lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
1779 				     ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
1780 	if (lerr < 0)
1781 		return lerr;
1782 	if (lerr == 0)
1783 		return -EBUSY;
1784 
1785 	return 0;
1786 }
1787 
1788 /* Populate the bo if swapped out, or migrate if the access mode requires that. */
1789 static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
1790 			       struct drm_exec *exec)
1791 {
1792 	struct ttm_buffer_object *tbo = &bo->ttm;
1793 	int err = 0;
1794 
1795 	if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
1796 		err = xe_bo_wait_usage_kernel(bo, ctx);
1797 		if (!err)
1798 			err = ttm_bo_populate(&bo->ttm, ctx);
1799 	} else if (should_migrate_to_smem(bo)) {
1800 		xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
1801 		err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
1802 	}
1803 
1804 	return err;
1805 }
1806 
1807 /* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
1808 static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
1809 {
1810 	vm_fault_t ret;
1811 
1812 	trace_xe_bo_cpu_fault(bo);
1813 
1814 	ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1815 				       TTM_BO_VM_NUM_PREFAULT);
1816 	/*
1817 	 * When TTM is actually called to insert PTEs, ensure no blocking conditions
1818 	 * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
1819 	 */
1820 	xe_assert(xe, ret != VM_FAULT_RETRY);
1821 
1822 	if (ret == VM_FAULT_NOPAGE &&
1823 	    mem_type_is_vram(bo->ttm.resource->mem_type)) {
1824 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1825 		if (list_empty(&bo->vram_userfault_link))
1826 			list_add(&bo->vram_userfault_link,
1827 				 &xe->mem_access.vram_userfault.list);
1828 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1829 	}
1830 
1831 	return ret;
1832 }
1833 
1834 static vm_fault_t xe_err_to_fault_t(int err)
1835 {
1836 	switch (err) {
1837 	case 0:
1838 	case -EINTR:
1839 	case -ERESTARTSYS:
1840 	case -EAGAIN:
1841 		return VM_FAULT_NOPAGE;
1842 	case -ENOMEM:
1843 	case -ENOSPC:
1844 		return VM_FAULT_OOM;
1845 	default:
1846 		break;
1847 	}
1848 	return VM_FAULT_SIGBUS;
1849 }
1850 
1851 static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
1852 {
1853 	dma_resv_assert_held(tbo->base.resv);
1854 
1855 	return tbo->ttm &&
1856 		(tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
1857 		TTM_TT_FLAG_EXTERNAL;
1858 }
1859 
1860 static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
1861 					   struct xe_bo *bo, bool needs_rpm)
1862 {
1863 	struct ttm_buffer_object *tbo = &bo->ttm;
1864 	vm_fault_t ret = VM_FAULT_RETRY;
1865 	struct xe_validation_ctx ctx;
1866 	struct ttm_operation_ctx tctx = {
1867 		.interruptible = true,
1868 		.no_wait_gpu = true,
1869 		.gfp_retry_mayfail = true,
1870 
1871 	};
1872 	int err;
1873 
1874 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1875 		return VM_FAULT_RETRY;
1876 
1877 	err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
1878 				     (struct xe_val_flags) {
1879 					     .interruptible = true,
1880 					     .no_block = true
1881 				     });
1882 	if (err)
1883 		goto out_pm;
1884 
1885 	if (!dma_resv_trylock(tbo->base.resv))
1886 		goto out_validation;
1887 
1888 	if (xe_ttm_bo_is_imported(tbo)) {
1889 		ret = VM_FAULT_SIGBUS;
1890 		drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
1891 		goto out_unlock;
1892 	}
1893 
1894 	err = xe_bo_fault_migrate(bo, &tctx, NULL);
1895 	if (err) {
1896 		/* Return VM_FAULT_RETRY on these errors. */
1897 		if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
1898 			ret = xe_err_to_fault_t(err);
1899 		goto out_unlock;
1900 	}
1901 
1902 	if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
1903 		ret = __xe_bo_cpu_fault(vmf, xe, bo);
1904 
1905 out_unlock:
1906 	dma_resv_unlock(tbo->base.resv);
1907 out_validation:
1908 	xe_validation_ctx_fini(&ctx);
1909 out_pm:
1910 	if (needs_rpm)
1911 		xe_pm_runtime_put(xe);
1912 
1913 	return ret;
1914 }
1915 
1916 static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
1917 {
1918 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1919 	struct drm_device *ddev = tbo->base.dev;
1920 	struct xe_device *xe = to_xe_device(ddev);
1921 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1922 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1923 	bool retry_after_wait = false;
1924 	struct xe_validation_ctx ctx;
1925 	struct drm_exec exec;
1926 	vm_fault_t ret;
1927 	int err = 0;
1928 	int idx;
1929 
1930 	if (!drm_dev_enter(&xe->drm, &idx))
1931 		return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1932 
1933 	ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
1934 	if (ret != VM_FAULT_RETRY)
1935 		goto out;
1936 
1937 	if (fault_flag_allow_retry_first(vmf->flags)) {
1938 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
1939 			goto out;
1940 		retry_after_wait = true;
1941 		xe_bo_get(bo);
1942 		mmap_read_unlock(vmf->vma->vm_mm);
1943 	} else {
1944 		ret = VM_FAULT_NOPAGE;
1945 	}
1946 
1947 	/*
1948 	 * The fastpath failed and we were not required to return and retry immediately.
1949 	 * We're now running in one of two modes:
1950 	 *
1951 	 * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
1952 	 * to resolve blocking waits. But we can't resolve the fault since the
1953 	 * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
1954 	 * should succeed. But it may fail since we drop the bo lock.
1955 	 *
1956 	 * 2) retry_after_wait == false: The fastpath failed, typically even after
1957 	 * a retry. Do whatever's necessary to resolve the fault.
1958 	 *
1959 	 * This construct is recommended to avoid excessive waits under the mmap_lock.
1960 	 */
1961 
1962 	if (needs_rpm)
1963 		xe_pm_runtime_get(xe);
1964 
1965 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1966 			    err) {
1967 		struct ttm_operation_ctx tctx = {
1968 			.interruptible = true,
1969 			.no_wait_gpu = false,
1970 			.gfp_retry_mayfail = retry_after_wait,
1971 		};
1972 
1973 		err = drm_exec_lock_obj(&exec, &tbo->base);
1974 		drm_exec_retry_on_contention(&exec);
1975 		if (err)
1976 			break;
1977 
1978 		if (xe_ttm_bo_is_imported(tbo)) {
1979 			err = -EFAULT;
1980 			drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
1981 			break;
1982 		}
1983 
1984 		err = xe_bo_fault_migrate(bo, &tctx, &exec);
1985 		if (err) {
1986 			drm_exec_retry_on_contention(&exec);
1987 			xe_validation_retry_on_oom(&ctx, &err);
1988 			break;
1989 		}
1990 
1991 		err = xe_bo_wait_usage_kernel(bo, &tctx);
1992 		if (err)
1993 			break;
1994 
1995 		if (!retry_after_wait)
1996 			ret = __xe_bo_cpu_fault(vmf, xe, bo);
1997 	}
1998 	/* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
1999 	if (err && !retry_after_wait)
2000 		ret = xe_err_to_fault_t(err);
2001 
2002 	if (needs_rpm)
2003 		xe_pm_runtime_put(xe);
2004 
2005 	if (retry_after_wait)
2006 		xe_bo_put(bo);
2007 out:
2008 	drm_dev_exit(idx);
2009 
2010 	return ret;
2011 }
2012 
2013 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
2014 			   void *buf, int len, int write)
2015 {
2016 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
2017 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
2018 	struct xe_device *xe = xe_bo_device(bo);
2019 	int ret;
2020 
2021 	xe_pm_runtime_get(xe);
2022 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
2023 	xe_pm_runtime_put(xe);
2024 
2025 	return ret;
2026 }
2027 
2028 /**
2029  * xe_bo_read() - Read from an xe_bo
2030  * @bo: The buffer object to read from.
2031  * @offset: The byte offset to start reading from.
2032  * @dst: Location to store the read.
2033  * @size: Size in bytes for the read.
2034  *
2035  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
2036  *
2037  * Return: Zero on success, or negative error.
2038  */
2039 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
2040 {
2041 	int ret;
2042 
2043 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
2044 	if (ret >= 0 && ret != size)
2045 		ret = -EIO;
2046 	else if (ret == size)
2047 		ret = 0;
2048 
2049 	return ret;
2050 }
2051 
2052 static const struct vm_operations_struct xe_gem_vm_ops = {
2053 	.fault = xe_bo_cpu_fault,
2054 	.open = ttm_bo_vm_open,
2055 	.close = ttm_bo_vm_close,
2056 	.access = xe_bo_vm_access,
2057 };
2058 
2059 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
2060 	.free = xe_gem_object_free,
2061 	.close = xe_gem_object_close,
2062 	.mmap = drm_gem_ttm_mmap,
2063 	.export = xe_gem_prime_export,
2064 	.vm_ops = &xe_gem_vm_ops,
2065 };
2066 
2067 /**
2068  * xe_bo_alloc - Allocate storage for a struct xe_bo
2069  *
2070  * This function is intended to allocate storage to be used for input
2071  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
2072  * created is needed before the call to __xe_bo_create_locked().
2073  * If __xe_bo_create_locked ends up never to be called, then the
2074  * storage allocated with this function needs to be freed using
2075  * xe_bo_free().
2076  *
2077  * Return: A pointer to an uninitialized struct xe_bo on success,
2078  * ERR_PTR(-ENOMEM) on error.
2079  */
2080 struct xe_bo *xe_bo_alloc(void)
2081 {
2082 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
2083 
2084 	if (!bo)
2085 		return ERR_PTR(-ENOMEM);
2086 
2087 	return bo;
2088 }
2089 
2090 /**
2091  * xe_bo_free - Free storage allocated using xe_bo_alloc()
2092  * @bo: The buffer object storage.
2093  *
2094  * Refer to xe_bo_alloc() documentation for valid use-cases.
2095  */
2096 void xe_bo_free(struct xe_bo *bo)
2097 {
2098 	kfree(bo);
2099 }
2100 
2101 /**
2102  * xe_bo_init_locked() - Initialize or create an xe_bo.
2103  * @xe: The xe device.
2104  * @bo: An already allocated buffer object or NULL
2105  * if the function should allocate a new one.
2106  * @tile: The tile to select for migration of this bo, and the tile used for
2107  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2108  * @resv: Pointer to a locked shared reservation object to use fo this bo,
2109  * or NULL for the xe_bo to use its own.
2110  * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
2111  * @size: The storage size to use for the bo.
2112  * @cpu_caching: The cpu caching used for system memory backing store.
2113  * @type: The TTM buffer object type.
2114  * @flags: XE_BO_FLAG_ flags.
2115  * @exec: The drm_exec transaction to use for exhaustive eviction.
2116  *
2117  * Initialize or create an xe buffer object. On failure, any allocated buffer
2118  * object passed in @bo will have been unreferenced.
2119  *
2120  * Return: The buffer object on success. Negative error pointer on failure.
2121  */
2122 struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
2123 				struct xe_tile *tile, struct dma_resv *resv,
2124 				struct ttm_lru_bulk_move *bulk, size_t size,
2125 				u16 cpu_caching, enum ttm_bo_type type,
2126 				u32 flags, struct drm_exec *exec)
2127 {
2128 	struct ttm_operation_ctx ctx = {
2129 		.interruptible = true,
2130 		.no_wait_gpu = false,
2131 		.gfp_retry_mayfail = true,
2132 	};
2133 	struct ttm_placement *placement;
2134 	uint32_t alignment;
2135 	size_t aligned_size;
2136 	int err;
2137 
2138 	/* Only kernel objects should set GT */
2139 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
2140 
2141 	if (XE_WARN_ON(!size)) {
2142 		xe_bo_free(bo);
2143 		return ERR_PTR(-EINVAL);
2144 	}
2145 
2146 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
2147 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
2148 		return ERR_PTR(-EINVAL);
2149 
2150 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
2151 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
2152 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
2153 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
2154 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
2155 
2156 		aligned_size = ALIGN(size, align);
2157 		if (type != ttm_bo_type_device)
2158 			size = ALIGN(size, align);
2159 		flags |= XE_BO_FLAG_INTERNAL_64K;
2160 		alignment = align >> PAGE_SHIFT;
2161 	} else {
2162 		aligned_size = ALIGN(size, SZ_4K);
2163 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
2164 		alignment = SZ_4K >> PAGE_SHIFT;
2165 	}
2166 
2167 	if (type == ttm_bo_type_device && aligned_size != size)
2168 		return ERR_PTR(-EINVAL);
2169 
2170 	if (!bo) {
2171 		bo = xe_bo_alloc();
2172 		if (IS_ERR(bo))
2173 			return bo;
2174 	}
2175 
2176 	bo->ccs_cleared = false;
2177 	bo->tile = tile;
2178 	bo->flags = flags;
2179 	bo->cpu_caching = cpu_caching;
2180 	bo->ttm.base.funcs = &xe_gem_object_funcs;
2181 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
2182 	INIT_LIST_HEAD(&bo->pinned_link);
2183 #ifdef CONFIG_PROC_FS
2184 	INIT_LIST_HEAD(&bo->client_link);
2185 #endif
2186 	INIT_LIST_HEAD(&bo->vram_userfault_link);
2187 
2188 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
2189 
2190 	if (resv) {
2191 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
2192 		ctx.resv = resv;
2193 	}
2194 
2195 	xe_validation_assert_exec(xe, exec, &bo->ttm.base);
2196 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
2197 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type);
2198 		if (WARN_ON(err)) {
2199 			xe_ttm_bo_destroy(&bo->ttm);
2200 			return ERR_PTR(err);
2201 		}
2202 	}
2203 
2204 	/* Defer populating type_sg bos */
2205 	placement = (type == ttm_bo_type_sg ||
2206 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
2207 		&bo->placement;
2208 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
2209 				   placement, alignment,
2210 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
2211 	if (err)
2212 		return ERR_PTR(err);
2213 
2214 	/*
2215 	 * The VRAM pages underneath are potentially still being accessed by the
2216 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
2217 	 * sure to add any corresponding move/clear fences into the objects
2218 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
2219 	 *
2220 	 * For KMD internal buffers we don't care about GPU clearing, however we
2221 	 * still need to handle async evictions, where the VRAM is still being
2222 	 * accessed by the GPU. Most internal callers are not expecting this,
2223 	 * since they are missing the required synchronisation before accessing
2224 	 * the memory. To keep things simple just sync wait any kernel fences
2225 	 * here, if the buffer is designated KMD internal.
2226 	 *
2227 	 * For normal userspace objects we should already have the required
2228 	 * pipelining or sync waiting elsewhere, since we already have to deal
2229 	 * with things like async GPU clearing.
2230 	 */
2231 	if (type == ttm_bo_type_kernel) {
2232 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
2233 						     DMA_RESV_USAGE_KERNEL,
2234 						     ctx.interruptible,
2235 						     MAX_SCHEDULE_TIMEOUT);
2236 
2237 		if (timeout < 0) {
2238 			if (!resv)
2239 				dma_resv_unlock(bo->ttm.base.resv);
2240 			xe_bo_put(bo);
2241 			return ERR_PTR(timeout);
2242 		}
2243 	}
2244 
2245 	bo->created = true;
2246 	if (bulk)
2247 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
2248 	else
2249 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2250 
2251 	return bo;
2252 }
2253 
2254 static int __xe_bo_fixed_placement(struct xe_device *xe,
2255 				   struct xe_bo *bo, enum ttm_bo_type type,
2256 				   u32 flags,
2257 				   u64 start, u64 end, u64 size)
2258 {
2259 	struct ttm_place *place = bo->placements;
2260 	u32 vram_flag, vram_stolen_flags;
2261 
2262 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2263 		return -EINVAL;
2264 
2265 	vram_flag = flags & XE_BO_FLAG_VRAM_MASK;
2266 	vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag;
2267 
2268 	/* check if more than one VRAM/STOLEN flag is set */
2269 	if (hweight32(vram_stolen_flags) > 1)
2270 		return -EINVAL;
2271 
2272 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
2273 	place->fpfn = start >> PAGE_SHIFT;
2274 	place->lpfn = end >> PAGE_SHIFT;
2275 
2276 	if (flags & XE_BO_FLAG_STOLEN)
2277 		place->mem_type = XE_PL_STOLEN;
2278 	else
2279 		place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type);
2280 
2281 	bo->placement = (struct ttm_placement) {
2282 		.num_placement = 1,
2283 		.placement = place,
2284 	};
2285 
2286 	return 0;
2287 }
2288 
2289 static struct xe_bo *
2290 __xe_bo_create_locked(struct xe_device *xe,
2291 		      struct xe_tile *tile, struct xe_vm *vm,
2292 		      size_t size, u64 start, u64 end,
2293 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2294 		      u64 alignment, struct drm_exec *exec)
2295 {
2296 	struct xe_bo *bo = NULL;
2297 	int err;
2298 
2299 	if (vm)
2300 		xe_vm_assert_held(vm);
2301 
2302 	if (start || end != ~0ULL) {
2303 		bo = xe_bo_alloc();
2304 		if (IS_ERR(bo))
2305 			return bo;
2306 
2307 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2308 		err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size);
2309 		if (err) {
2310 			xe_bo_free(bo);
2311 			return ERR_PTR(err);
2312 		}
2313 	}
2314 
2315 	bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2316 			       vm && !xe_vm_in_fault_mode(vm) &&
2317 			       flags & XE_BO_FLAG_USER ?
2318 			       &vm->lru_bulk_move : NULL, size,
2319 			       cpu_caching, type, flags, exec);
2320 	if (IS_ERR(bo))
2321 		return bo;
2322 
2323 	bo->min_align = alignment;
2324 
2325 	/*
2326 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2327 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2328 	 * will keep a reference to the vm, and avoid circular references
2329 	 * by having all the vm's bo refereferences released at vm close
2330 	 * time.
2331 	 */
2332 	if (vm && xe_bo_is_user(bo))
2333 		xe_vm_get(vm);
2334 	bo->vm = vm;
2335 
2336 	if (bo->flags & XE_BO_FLAG_GGTT) {
2337 		struct xe_tile *t;
2338 		u8 id;
2339 
2340 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2341 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2342 				tile = xe_device_get_root_tile(xe);
2343 
2344 			xe_assert(xe, tile);
2345 		}
2346 
2347 		for_each_tile(t, xe, id) {
2348 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2349 				continue;
2350 
2351 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2352 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2353 							   start + xe_bo_size(bo), U64_MAX,
2354 							   exec);
2355 			} else {
2356 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
2357 			}
2358 			if (err)
2359 				goto err_unlock_put_bo;
2360 		}
2361 	}
2362 
2363 	trace_xe_bo_create(bo);
2364 	return bo;
2365 
2366 err_unlock_put_bo:
2367 	__xe_bo_unset_bulk_move(bo);
2368 	xe_bo_unlock_vm_held(bo);
2369 	xe_bo_put(bo);
2370 	return ERR_PTR(err);
2371 }
2372 
2373 /**
2374  * xe_bo_create_locked() - Create a BO
2375  * @xe: The xe device.
2376  * @tile: The tile to select for migration of this bo, and the tile used for
2377  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2378  * @vm: The local vm or NULL for external objects.
2379  * @size: The storage size to use for the bo.
2380  * @type: The TTM buffer object type.
2381  * @flags: XE_BO_FLAG_ flags.
2382  * @exec: The drm_exec transaction to use for exhaustive eviction.
2383  *
2384  * Create a locked xe BO with no range- nor alignment restrictions.
2385  *
2386  * Return: The buffer object on success. Negative error pointer on failure.
2387  */
2388 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2389 				  struct xe_vm *vm, size_t size,
2390 				  enum ttm_bo_type type, u32 flags,
2391 				  struct drm_exec *exec)
2392 {
2393 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2394 				     flags, 0, exec);
2395 }
2396 
2397 static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
2398 				       size_t size, u16 cpu_caching,
2399 				       enum ttm_bo_type type, u32 flags,
2400 				       u64 alignment, bool intr)
2401 {
2402 	struct xe_validation_ctx ctx;
2403 	struct drm_exec exec;
2404 	struct xe_bo *bo;
2405 	int ret = 0;
2406 
2407 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2408 			    ret) {
2409 		bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
2410 					   cpu_caching, type, flags, alignment, &exec);
2411 		drm_exec_retry_on_contention(&exec);
2412 		if (IS_ERR(bo)) {
2413 			ret = PTR_ERR(bo);
2414 			xe_validation_retry_on_oom(&ctx, &ret);
2415 		} else {
2416 			xe_bo_unlock(bo);
2417 		}
2418 	}
2419 
2420 	return ret ? ERR_PTR(ret) : bo;
2421 }
2422 
2423 /**
2424  * xe_bo_create_user() - Create a user BO
2425  * @xe: The xe device.
2426  * @vm: The local vm or NULL for external objects.
2427  * @size: The storage size to use for the bo.
2428  * @cpu_caching: The caching mode to be used for system backing store.
2429  * @flags: XE_BO_FLAG_ flags.
2430  * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
2431  * if such a transaction should be initiated by the call.
2432  *
2433  * Create a bo on behalf of user-space.
2434  *
2435  * Return: The buffer object on success. Negative error pointer on failure.
2436  */
2437 struct xe_bo *xe_bo_create_user(struct xe_device *xe,
2438 				struct xe_vm *vm, size_t size,
2439 				u16 cpu_caching,
2440 				u32 flags, struct drm_exec *exec)
2441 {
2442 	struct xe_bo *bo;
2443 
2444 	flags |= XE_BO_FLAG_USER;
2445 
2446 	if (vm || exec) {
2447 		xe_assert(xe, exec);
2448 		bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
2449 					   cpu_caching, ttm_bo_type_device,
2450 					   flags, 0, exec);
2451 		if (!IS_ERR(bo))
2452 			xe_bo_unlock_vm_held(bo);
2453 	} else {
2454 		bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
2455 				       ttm_bo_type_device, flags, 0, true);
2456 	}
2457 
2458 	return bo;
2459 }
2460 
2461 /**
2462  * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
2463  * @xe: The xe device.
2464  * @tile: The tile to select for migration of this bo, and the tile used for
2465  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2466  * @size: The storage size to use for the bo.
2467  * @start: Start of fixed VRAM range or 0.
2468  * @end: End of fixed VRAM range or ~0ULL.
2469  * @type: The TTM buffer object type.
2470  * @flags: XE_BO_FLAG_ flags.
2471  *
2472  * Create an Xe BO with range- and options. If @start and @end indicate
2473  * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
2474  * only.
2475  *
2476  * Return: The buffer object on success. Negative error pointer on failure.
2477  */
2478 struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
2479 					  size_t size, u64 start, u64 end,
2480 					  enum ttm_bo_type type, u32 flags)
2481 {
2482 	struct xe_validation_ctx ctx;
2483 	struct drm_exec exec;
2484 	struct xe_bo *bo;
2485 	int err = 0;
2486 
2487 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
2488 		bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
2489 					   0, type, flags, 0, &exec);
2490 		if (IS_ERR(bo)) {
2491 			drm_exec_retry_on_contention(&exec);
2492 			err = PTR_ERR(bo);
2493 			xe_validation_retry_on_oom(&ctx, &err);
2494 			break;
2495 		}
2496 
2497 		err = xe_bo_pin(bo, &exec);
2498 		xe_bo_unlock(bo);
2499 		if (err) {
2500 			xe_bo_put(bo);
2501 			drm_exec_retry_on_contention(&exec);
2502 			xe_validation_retry_on_oom(&ctx, &err);
2503 			break;
2504 		}
2505 	}
2506 
2507 	return err ? ERR_PTR(err) : bo;
2508 }
2509 
2510 static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2511 						     struct xe_tile *tile,
2512 						     struct xe_vm *vm,
2513 						     size_t size, u64 offset,
2514 						     enum ttm_bo_type type, u32 flags,
2515 						     u64 alignment, struct drm_exec *exec)
2516 {
2517 	struct xe_bo *bo;
2518 	int err;
2519 	u64 start = offset == ~0ull ? 0 : offset;
2520 	u64 end = offset == ~0ull ? ~0ull : start + size;
2521 
2522 	if (flags & XE_BO_FLAG_STOLEN &&
2523 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2524 		flags |= XE_BO_FLAG_GGTT;
2525 
2526 	bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2527 				   flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2528 				   alignment, exec);
2529 	if (IS_ERR(bo))
2530 		return bo;
2531 
2532 	err = xe_bo_pin(bo, exec);
2533 	if (err)
2534 		goto err_put;
2535 
2536 	err = xe_bo_vmap(bo);
2537 	if (err)
2538 		goto err_unpin;
2539 
2540 	xe_bo_unlock_vm_held(bo);
2541 
2542 	return bo;
2543 
2544 err_unpin:
2545 	xe_bo_unpin(bo);
2546 err_put:
2547 	xe_bo_unlock_vm_held(bo);
2548 	xe_bo_put(bo);
2549 	return ERR_PTR(err);
2550 }
2551 
2552 /**
2553  * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
2554  * @xe: The xe device.
2555  * @tile: The tile to select for migration of this bo, and the tile used for
2556  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2557  * @size: The storage size to use for the bo.
2558  * @offset: Optional VRAM offset or %~0ull for don't care.
2559  * @type: The TTM buffer object type.
2560  * @flags: XE_BO_FLAG_ flags.
2561  * @alignment: GGTT alignment.
2562  * @intr: Whether to execute any waits for backing store interruptible.
2563  *
2564  * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
2565  * options. The bo will be external and not associated with a VM.
2566  *
2567  * Return: The buffer object on success. Negative error pointer on failure.
2568  * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2569  * to true on entry.
2570  */
2571 struct xe_bo *
2572 xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
2573 			     size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
2574 			     u64 alignment, bool intr)
2575 {
2576 	struct xe_validation_ctx ctx;
2577 	struct drm_exec exec;
2578 	struct xe_bo *bo;
2579 	int ret = 0;
2580 
2581 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2582 			    ret) {
2583 		bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
2584 						     type, flags, alignment, &exec);
2585 		if (IS_ERR(bo)) {
2586 			drm_exec_retry_on_contention(&exec);
2587 			ret = PTR_ERR(bo);
2588 			xe_validation_retry_on_oom(&ctx, &ret);
2589 		}
2590 	}
2591 
2592 	return ret ? ERR_PTR(ret) : bo;
2593 }
2594 
2595 /**
2596  * xe_bo_create_pin_map() - Create pinned and mapped bo
2597  * @xe: The xe device.
2598  * @tile: The tile to select for migration of this bo, and the tile used for
2599  * @vm: The vm to associate the buffer object with. The vm's resv must be locked
2600  * with the transaction represented by @exec.
2601  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2602  * @size: The storage size to use for the bo.
2603  * @type: The TTM buffer object type.
2604  * @flags: XE_BO_FLAG_ flags.
2605  * @exec: The drm_exec transaction to use for exhaustive eviction, and
2606  * previously used for locking @vm's resv.
2607  *
2608  * Create a pinned and mapped bo. The bo will be external and not associated
2609  * with a VM.
2610  *
2611  * Return: The buffer object on success. Negative error pointer on failure.
2612  * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
2613  * configured for interruptible locking.
2614  */
2615 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2616 				   struct xe_vm *vm, size_t size,
2617 				   enum ttm_bo_type type, u32 flags,
2618 				   struct drm_exec *exec)
2619 {
2620 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
2621 					       0, exec);
2622 }
2623 
2624 /**
2625  * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
2626  * @xe: The xe device.
2627  * @tile: The tile to select for migration of this bo, and the tile used for
2628  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2629  * @size: The storage size to use for the bo.
2630  * @type: The TTM buffer object type.
2631  * @flags: XE_BO_FLAG_ flags.
2632  * @intr: Whether to execut any waits for backing store interruptible.
2633  *
2634  * Create a pinned and mapped bo. The bo will be external and not associated
2635  * with a VM.
2636  *
2637  * Return: The buffer object on success. Negative error pointer on failure.
2638  * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2639  * to true on entry.
2640  */
2641 struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
2642 					size_t size, enum ttm_bo_type type, u32 flags,
2643 					bool intr)
2644 {
2645 	return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
2646 }
2647 
2648 static void __xe_bo_unpin_map_no_vm(void *arg)
2649 {
2650 	xe_bo_unpin_map_no_vm(arg);
2651 }
2652 
2653 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2654 					   size_t size, u32 flags)
2655 {
2656 	struct xe_bo *bo;
2657 	int ret;
2658 
2659 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2660 	bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
2661 	if (IS_ERR(bo))
2662 		return bo;
2663 
2664 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2665 	if (ret)
2666 		return ERR_PTR(ret);
2667 
2668 	return bo;
2669 }
2670 
2671 void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
2672 {
2673 	devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2674 }
2675 
2676 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2677 					     const void *data, size_t size, u32 flags)
2678 {
2679 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2680 
2681 	if (IS_ERR(bo))
2682 		return bo;
2683 
2684 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2685 
2686 	return bo;
2687 }
2688 
2689 /**
2690  * xe_managed_bo_reinit_in_vram
2691  * @xe: xe device
2692  * @tile: Tile where the new buffer will be created
2693  * @src: Managed buffer object allocated in system memory
2694  *
2695  * Replace a managed src buffer object allocated in system memory with a new
2696  * one allocated in vram, copying the data between them.
2697  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2698  * is responsible for making sure that any old references to it are updated.
2699  *
2700  * Returns 0 for success, negative error code otherwise.
2701  */
2702 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2703 {
2704 	struct xe_bo *bo;
2705 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2706 
2707 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2708 				      XE_BO_FLAG_PINNED_NORESTORE);
2709 
2710 	xe_assert(xe, IS_DGFX(xe));
2711 	xe_assert(xe, !(*src)->vmap.is_iomem);
2712 
2713 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2714 					    xe_bo_size(*src), dst_flags);
2715 	if (IS_ERR(bo))
2716 		return PTR_ERR(bo);
2717 
2718 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2719 	*src = bo;
2720 
2721 	return 0;
2722 }
2723 
2724 /*
2725  * XXX: This is in the VM bind data path, likely should calculate this once and
2726  * store, with a recalculation if the BO is moved.
2727  */
2728 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2729 {
2730 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2731 
2732 	switch (res->mem_type) {
2733 	case XE_PL_STOLEN:
2734 		return xe_ttm_stolen_gpu_offset(xe);
2735 	case XE_PL_TT:
2736 	case XE_PL_SYSTEM:
2737 		return 0;
2738 	default:
2739 		return res_to_mem_region(res)->dpa_base;
2740 	}
2741 	return 0;
2742 }
2743 
2744 /**
2745  * xe_bo_pin_external - pin an external BO
2746  * @bo: buffer object to be pinned
2747  * @in_place: Pin in current placement, don't attempt to migrate.
2748  * @exec: The drm_exec transaction to use for exhaustive eviction.
2749  *
2750  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2751  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2752  * asserts and code to ensure evict / restore on suspend / resume.
2753  *
2754  * Returns 0 for success, negative error code otherwise.
2755  */
2756 int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
2757 {
2758 	struct xe_device *xe = xe_bo_device(bo);
2759 	int err;
2760 
2761 	xe_assert(xe, !bo->vm);
2762 	xe_assert(xe, xe_bo_is_user(bo));
2763 
2764 	if (!xe_bo_is_pinned(bo)) {
2765 		if (!in_place) {
2766 			err = xe_bo_validate(bo, NULL, false, exec);
2767 			if (err)
2768 				return err;
2769 		}
2770 
2771 		spin_lock(&xe->pinned.lock);
2772 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2773 		spin_unlock(&xe->pinned.lock);
2774 	}
2775 
2776 	ttm_bo_pin(&bo->ttm);
2777 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2778 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2779 
2780 	/*
2781 	 * FIXME: If we always use the reserve / unreserve functions for locking
2782 	 * we do not need this.
2783 	 */
2784 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2785 
2786 	return 0;
2787 }
2788 
2789 /**
2790  * xe_bo_pin() - Pin a kernel bo after potentially migrating it
2791  * @bo: The kernel bo to pin.
2792  * @exec: The drm_exec transaction to use for exhaustive eviction.
2793  *
2794  * Attempts to migrate a bo to @bo->placement. If that succeeds,
2795  * pins the bo.
2796  *
2797  * Return: %0 on success, negative error code on migration failure.
2798  */
2799 int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
2800 {
2801 	struct ttm_place *place = &bo->placements[0];
2802 	struct xe_device *xe = xe_bo_device(bo);
2803 	int err;
2804 
2805 	/* We currently don't expect user BO to be pinned */
2806 	xe_assert(xe, !xe_bo_is_user(bo));
2807 
2808 	/* Pinned object must be in GGTT or have pinned flag */
2809 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2810 				   XE_BO_FLAG_GGTT));
2811 
2812 	/*
2813 	 * No reason we can't support pinning imported dma-bufs we just don't
2814 	 * expect to pin an imported dma-buf.
2815 	 */
2816 	xe_assert(xe, !bo->ttm.base.import_attach);
2817 
2818 	/* We only expect at most 1 pin */
2819 	xe_assert(xe, !xe_bo_is_pinned(bo));
2820 
2821 	err = xe_bo_validate(bo, NULL, false, exec);
2822 	if (err)
2823 		return err;
2824 
2825 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2826 		spin_lock(&xe->pinned.lock);
2827 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
2828 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
2829 		else
2830 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
2831 		spin_unlock(&xe->pinned.lock);
2832 	}
2833 
2834 	ttm_bo_pin(&bo->ttm);
2835 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2836 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2837 
2838 	/*
2839 	 * FIXME: If we always use the reserve / unreserve functions for locking
2840 	 * we do not need this.
2841 	 */
2842 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2843 
2844 	return 0;
2845 }
2846 
2847 /**
2848  * xe_bo_unpin_external - unpin an external BO
2849  * @bo: buffer object to be unpinned
2850  *
2851  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2852  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2853  * asserts and code to ensure evict / restore on suspend / resume.
2854  *
2855  * Returns 0 for success, negative error code otherwise.
2856  */
2857 void xe_bo_unpin_external(struct xe_bo *bo)
2858 {
2859 	struct xe_device *xe = xe_bo_device(bo);
2860 
2861 	xe_assert(xe, !bo->vm);
2862 	xe_assert(xe, xe_bo_is_pinned(bo));
2863 	xe_assert(xe, xe_bo_is_user(bo));
2864 
2865 	spin_lock(&xe->pinned.lock);
2866 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2867 		list_del_init(&bo->pinned_link);
2868 	spin_unlock(&xe->pinned.lock);
2869 
2870 	ttm_bo_unpin(&bo->ttm);
2871 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2872 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2873 
2874 	/*
2875 	 * FIXME: If we always use the reserve / unreserve functions for locking
2876 	 * we do not need this.
2877 	 */
2878 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2879 }
2880 
2881 void xe_bo_unpin(struct xe_bo *bo)
2882 {
2883 	struct ttm_place *place = &bo->placements[0];
2884 	struct xe_device *xe = xe_bo_device(bo);
2885 
2886 	xe_assert(xe, !bo->ttm.base.import_attach);
2887 	xe_assert(xe, xe_bo_is_pinned(bo));
2888 
2889 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2890 		spin_lock(&xe->pinned.lock);
2891 		xe_assert(xe, !list_empty(&bo->pinned_link));
2892 		list_del_init(&bo->pinned_link);
2893 		spin_unlock(&xe->pinned.lock);
2894 
2895 		if (bo->backup_obj) {
2896 			if (xe_bo_is_pinned(bo->backup_obj))
2897 				ttm_bo_unpin(&bo->backup_obj->ttm);
2898 			xe_bo_put(bo->backup_obj);
2899 			bo->backup_obj = NULL;
2900 		}
2901 	}
2902 	ttm_bo_unpin(&bo->ttm);
2903 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2904 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2905 }
2906 
2907 /**
2908  * xe_bo_validate() - Make sure the bo is in an allowed placement
2909  * @bo: The bo,
2910  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2911  *      NULL. Used together with @allow_res_evict.
2912  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2913  *                   reservation object.
2914  * @exec: The drm_exec transaction to use for exhaustive eviction.
2915  *
2916  * Make sure the bo is in allowed placement, migrating it if necessary. If
2917  * needed, other bos will be evicted. If bos selected for eviction shares
2918  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2919  * set to true, otherwise they will be bypassed.
2920  *
2921  * Return: 0 on success, negative error code on failure. May return
2922  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2923  */
2924 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
2925 		   struct drm_exec *exec)
2926 {
2927 	struct ttm_operation_ctx ctx = {
2928 		.interruptible = true,
2929 		.no_wait_gpu = false,
2930 		.gfp_retry_mayfail = true,
2931 	};
2932 	int ret;
2933 
2934 	if (xe_bo_is_pinned(bo))
2935 		return 0;
2936 
2937 	if (vm) {
2938 		lockdep_assert_held(&vm->lock);
2939 		xe_vm_assert_held(vm);
2940 
2941 		ctx.allow_res_evict = allow_res_evict;
2942 		ctx.resv = xe_vm_resv(vm);
2943 	}
2944 
2945 	xe_vm_set_validating(vm, allow_res_evict);
2946 	trace_xe_bo_validate(bo);
2947 	xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
2948 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2949 	xe_vm_clear_validating(vm, allow_res_evict);
2950 
2951 	return ret;
2952 }
2953 
2954 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2955 {
2956 	if (bo->destroy == &xe_ttm_bo_destroy)
2957 		return true;
2958 
2959 	return false;
2960 }
2961 
2962 /*
2963  * Resolve a BO address. There is no assert to check if the proper lock is held
2964  * so it should only be used in cases where it is not fatal to get the wrong
2965  * address, such as printing debug information, but not in cases where memory is
2966  * written based on this result.
2967  */
2968 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2969 {
2970 	struct xe_device *xe = xe_bo_device(bo);
2971 	struct xe_res_cursor cur;
2972 	u64 page;
2973 
2974 	xe_assert(xe, page_size <= PAGE_SIZE);
2975 	page = offset >> PAGE_SHIFT;
2976 	offset &= (PAGE_SIZE - 1);
2977 
2978 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2979 		xe_assert(xe, bo->ttm.ttm);
2980 
2981 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2982 				page_size, &cur);
2983 		return xe_res_dma(&cur) + offset;
2984 	} else {
2985 		struct xe_res_cursor cur;
2986 
2987 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2988 			     page_size, &cur);
2989 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2990 	}
2991 }
2992 
2993 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2994 {
2995 	if (!READ_ONCE(bo->ttm.pin_count))
2996 		xe_bo_assert_held(bo);
2997 	return __xe_bo_addr(bo, offset, page_size);
2998 }
2999 
3000 int xe_bo_vmap(struct xe_bo *bo)
3001 {
3002 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3003 	void *virtual;
3004 	bool is_iomem;
3005 	int ret;
3006 
3007 	xe_bo_assert_held(bo);
3008 
3009 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
3010 			!force_contiguous(bo->flags)))
3011 		return -EINVAL;
3012 
3013 	if (!iosys_map_is_null(&bo->vmap))
3014 		return 0;
3015 
3016 	/*
3017 	 * We use this more or less deprecated interface for now since
3018 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
3019 	 * single page bos, which is done here.
3020 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
3021 	 * to use struct iosys_map.
3022 	 */
3023 	ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
3024 	if (ret)
3025 		return ret;
3026 
3027 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
3028 	if (is_iomem)
3029 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
3030 	else
3031 		iosys_map_set_vaddr(&bo->vmap, virtual);
3032 
3033 	return 0;
3034 }
3035 
3036 static void __xe_bo_vunmap(struct xe_bo *bo)
3037 {
3038 	if (!iosys_map_is_null(&bo->vmap)) {
3039 		iosys_map_clear(&bo->vmap);
3040 		ttm_bo_kunmap(&bo->kmap);
3041 	}
3042 }
3043 
3044 void xe_bo_vunmap(struct xe_bo *bo)
3045 {
3046 	xe_bo_assert_held(bo);
3047 	__xe_bo_vunmap(bo);
3048 }
3049 
3050 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
3051 {
3052 	if (value == DRM_XE_PXP_TYPE_NONE)
3053 		return 0;
3054 
3055 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
3056 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
3057 		return -EINVAL;
3058 
3059 	return xe_pxp_key_assign(xe->pxp, bo);
3060 }
3061 
3062 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
3063 					     struct xe_bo *bo,
3064 					     u64 value);
3065 
3066 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
3067 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
3068 };
3069 
3070 static int gem_create_user_ext_set_property(struct xe_device *xe,
3071 					    struct xe_bo *bo,
3072 					    u64 extension)
3073 {
3074 	u64 __user *address = u64_to_user_ptr(extension);
3075 	struct drm_xe_ext_set_property ext;
3076 	int err;
3077 	u32 idx;
3078 
3079 	err = copy_from_user(&ext, address, sizeof(ext));
3080 	if (XE_IOCTL_DBG(xe, err))
3081 		return -EFAULT;
3082 
3083 	if (XE_IOCTL_DBG(xe, ext.property >=
3084 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
3085 	    XE_IOCTL_DBG(xe, ext.pad) ||
3086 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
3087 		return -EINVAL;
3088 
3089 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
3090 	if (!gem_create_set_property_funcs[idx])
3091 		return -EINVAL;
3092 
3093 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
3094 }
3095 
3096 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
3097 					       struct xe_bo *bo,
3098 					       u64 extension);
3099 
3100 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
3101 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
3102 };
3103 
3104 #define MAX_USER_EXTENSIONS	16
3105 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
3106 				      u64 extensions, int ext_number)
3107 {
3108 	u64 __user *address = u64_to_user_ptr(extensions);
3109 	struct drm_xe_user_extension ext;
3110 	int err;
3111 	u32 idx;
3112 
3113 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
3114 		return -E2BIG;
3115 
3116 	err = copy_from_user(&ext, address, sizeof(ext));
3117 	if (XE_IOCTL_DBG(xe, err))
3118 		return -EFAULT;
3119 
3120 	if (XE_IOCTL_DBG(xe, ext.pad) ||
3121 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
3122 		return -EINVAL;
3123 
3124 	idx = array_index_nospec(ext.name,
3125 				 ARRAY_SIZE(gem_create_user_extension_funcs));
3126 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
3127 	if (XE_IOCTL_DBG(xe, err))
3128 		return err;
3129 
3130 	if (ext.next_extension)
3131 		return gem_create_user_extensions(xe, bo, ext.next_extension,
3132 						  ++ext_number);
3133 
3134 	return 0;
3135 }
3136 
3137 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
3138 			struct drm_file *file)
3139 {
3140 	struct xe_device *xe = to_xe_device(dev);
3141 	struct xe_file *xef = to_xe_file(file);
3142 	struct drm_xe_gem_create *args = data;
3143 	struct xe_validation_ctx ctx;
3144 	struct drm_exec exec;
3145 	struct xe_vm *vm = NULL;
3146 	struct xe_bo *bo;
3147 	unsigned int bo_flags;
3148 	u32 handle;
3149 	int err;
3150 
3151 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
3152 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3153 		return -EINVAL;
3154 
3155 	/* at least one valid memory placement must be specified */
3156 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
3157 			 !args->placement))
3158 		return -EINVAL;
3159 
3160 	if (XE_IOCTL_DBG(xe, args->flags &
3161 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
3162 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
3163 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
3164 		return -EINVAL;
3165 
3166 	if (XE_IOCTL_DBG(xe, args->handle))
3167 		return -EINVAL;
3168 
3169 	if (XE_IOCTL_DBG(xe, !args->size))
3170 		return -EINVAL;
3171 
3172 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
3173 		return -EINVAL;
3174 
3175 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
3176 		return -EINVAL;
3177 
3178 	bo_flags = 0;
3179 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
3180 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
3181 
3182 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
3183 		bo_flags |= XE_BO_FLAG_SCANOUT;
3184 
3185 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
3186 
3187 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
3188 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
3189 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
3190 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
3191 	    IS_ALIGNED(args->size, SZ_64K))
3192 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
3193 
3194 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
3195 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
3196 			return -EINVAL;
3197 
3198 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
3199 	}
3200 
3201 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
3202 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
3203 		return -EINVAL;
3204 
3205 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
3206 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
3207 		return -EINVAL;
3208 
3209 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
3210 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
3211 		return -EINVAL;
3212 
3213 	if (args->vm_id) {
3214 		vm = xe_vm_lookup(xef, args->vm_id);
3215 		if (XE_IOCTL_DBG(xe, !vm))
3216 			return -ENOENT;
3217 	}
3218 
3219 	err = 0;
3220 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
3221 			    err) {
3222 		if (vm) {
3223 			err = xe_vm_drm_exec_lock(vm, &exec);
3224 			drm_exec_retry_on_contention(&exec);
3225 			if (err)
3226 				break;
3227 		}
3228 		bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
3229 				       bo_flags, &exec);
3230 		drm_exec_retry_on_contention(&exec);
3231 		if (IS_ERR(bo)) {
3232 			err = PTR_ERR(bo);
3233 			xe_validation_retry_on_oom(&ctx, &err);
3234 			break;
3235 		}
3236 	}
3237 	if (err)
3238 		goto out_vm;
3239 
3240 	if (args->extensions) {
3241 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
3242 		if (err)
3243 			goto out_bulk;
3244 	}
3245 
3246 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
3247 	if (err)
3248 		goto out_bulk;
3249 
3250 	args->handle = handle;
3251 	goto out_put;
3252 
3253 out_bulk:
3254 	if (vm && !xe_vm_in_fault_mode(vm)) {
3255 		xe_vm_lock(vm, false);
3256 		__xe_bo_unset_bulk_move(bo);
3257 		xe_vm_unlock(vm);
3258 	}
3259 out_put:
3260 	xe_bo_put(bo);
3261 out_vm:
3262 	if (vm)
3263 		xe_vm_put(vm);
3264 
3265 	return err;
3266 }
3267 
3268 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
3269 			     struct drm_file *file)
3270 {
3271 	struct xe_device *xe = to_xe_device(dev);
3272 	struct drm_xe_gem_mmap_offset *args = data;
3273 	struct drm_gem_object *gem_obj;
3274 
3275 	if (XE_IOCTL_DBG(xe, args->extensions) ||
3276 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3277 		return -EINVAL;
3278 
3279 	if (XE_IOCTL_DBG(xe, args->flags &
3280 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
3281 		return -EINVAL;
3282 
3283 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
3284 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
3285 			return -EINVAL;
3286 
3287 		if (XE_IOCTL_DBG(xe, args->handle))
3288 			return -EINVAL;
3289 
3290 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
3291 			return -EINVAL;
3292 
3293 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
3294 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
3295 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
3296 		return 0;
3297 	}
3298 
3299 	gem_obj = drm_gem_object_lookup(file, args->handle);
3300 	if (XE_IOCTL_DBG(xe, !gem_obj))
3301 		return -ENOENT;
3302 
3303 	/* The mmap offset was set up at BO allocation time. */
3304 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
3305 
3306 	xe_bo_put(gem_to_xe_bo(gem_obj));
3307 	return 0;
3308 }
3309 
3310 /**
3311  * xe_bo_lock() - Lock the buffer object's dma_resv object
3312  * @bo: The struct xe_bo whose lock is to be taken
3313  * @intr: Whether to perform any wait interruptible
3314  *
3315  * Locks the buffer object's dma_resv object. If the buffer object is
3316  * pointing to a shared dma_resv object, that shared lock is locked.
3317  *
3318  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3319  * contended lock was interrupted. If @intr is set to false, the
3320  * function always returns 0.
3321  */
3322 int xe_bo_lock(struct xe_bo *bo, bool intr)
3323 {
3324 	if (intr)
3325 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
3326 
3327 	dma_resv_lock(bo->ttm.base.resv, NULL);
3328 
3329 	return 0;
3330 }
3331 
3332 /**
3333  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
3334  * @bo: The struct xe_bo whose lock is to be released.
3335  *
3336  * Unlock a buffer object lock that was locked by xe_bo_lock().
3337  */
3338 void xe_bo_unlock(struct xe_bo *bo)
3339 {
3340 	dma_resv_unlock(bo->ttm.base.resv);
3341 }
3342 
3343 /**
3344  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
3345  * @bo: The buffer object to migrate
3346  * @mem_type: The TTM memory type intended to migrate to
3347  *
3348  * Check whether the buffer object supports migration to the
3349  * given memory type. Note that pinning may affect the ability to migrate as
3350  * returned by this function.
3351  *
3352  * This function is primarily intended as a helper for checking the
3353  * possibility to migrate buffer objects and can be called without
3354  * the object lock held.
3355  *
3356  * Return: true if migration is possible, false otherwise.
3357  */
3358 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
3359 {
3360 	unsigned int cur_place;
3361 
3362 	if (bo->ttm.type == ttm_bo_type_kernel)
3363 		return true;
3364 
3365 	if (bo->ttm.type == ttm_bo_type_sg)
3366 		return false;
3367 
3368 	for (cur_place = 0; cur_place < bo->placement.num_placement;
3369 	     cur_place++) {
3370 		if (bo->placements[cur_place].mem_type == mem_type)
3371 			return true;
3372 	}
3373 
3374 	return false;
3375 }
3376 
3377 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
3378 {
3379 	memset(place, 0, sizeof(*place));
3380 	place->mem_type = mem_type;
3381 }
3382 
3383 /**
3384  * xe_bo_migrate - Migrate an object to the desired region id
3385  * @bo: The buffer object to migrate.
3386  * @mem_type: The TTM region type to migrate to.
3387  * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
3388  * a default interruptibe ctx is to be used.
3389  * @exec: The drm_exec transaction to use for exhaustive eviction.
3390  *
3391  * Attempt to migrate the buffer object to the desired memory region. The
3392  * buffer object may not be pinned, and must be locked.
3393  * On successful completion, the object memory type will be updated,
3394  * but an async migration task may not have completed yet, and to
3395  * accomplish that, the object's kernel fences must be signaled with
3396  * the object lock held.
3397  *
3398  * Return: 0 on success. Negative error code on failure. In particular may
3399  * return -EINTR or -ERESTARTSYS if signal pending.
3400  */
3401 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
3402 		  struct drm_exec *exec)
3403 {
3404 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3405 	struct ttm_operation_ctx ctx = {
3406 		.interruptible = true,
3407 		.no_wait_gpu = false,
3408 		.gfp_retry_mayfail = true,
3409 	};
3410 	struct ttm_placement placement;
3411 	struct ttm_place requested;
3412 
3413 	xe_bo_assert_held(bo);
3414 	tctx = tctx ? tctx : &ctx;
3415 
3416 	if (bo->ttm.resource->mem_type == mem_type)
3417 		return 0;
3418 
3419 	if (xe_bo_is_pinned(bo))
3420 		return -EBUSY;
3421 
3422 	if (!xe_bo_can_migrate(bo, mem_type))
3423 		return -EINVAL;
3424 
3425 	xe_place_from_ttm_type(mem_type, &requested);
3426 	placement.num_placement = 1;
3427 	placement.placement = &requested;
3428 
3429 	/*
3430 	 * Stolen needs to be handled like below VRAM handling if we ever need
3431 	 * to support it.
3432 	 */
3433 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
3434 
3435 	if (mem_type_is_vram(mem_type)) {
3436 		u32 c = 0;
3437 
3438 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
3439 	}
3440 
3441 	if (!tctx->no_wait_gpu)
3442 		xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
3443 	return ttm_bo_validate(&bo->ttm, &placement, tctx);
3444 }
3445 
3446 /**
3447  * xe_bo_evict - Evict an object to evict placement
3448  * @bo: The buffer object to migrate.
3449  * @exec: The drm_exec transaction to use for exhaustive eviction.
3450  *
3451  * On successful completion, the object memory will be moved to evict
3452  * placement. This function blocks until the object has been fully moved.
3453  *
3454  * Return: 0 on success. Negative error code on failure.
3455  */
3456 int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
3457 {
3458 	struct ttm_operation_ctx ctx = {
3459 		.interruptible = false,
3460 		.no_wait_gpu = false,
3461 		.gfp_retry_mayfail = true,
3462 	};
3463 	struct ttm_placement placement;
3464 	int ret;
3465 
3466 	xe_evict_flags(&bo->ttm, &placement);
3467 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3468 	if (ret)
3469 		return ret;
3470 
3471 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3472 			      false, MAX_SCHEDULE_TIMEOUT);
3473 
3474 	return 0;
3475 }
3476 
3477 /**
3478  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3479  * placed in system memory.
3480  * @bo: The xe_bo
3481  *
3482  * Return: true if extra pages need to be allocated, false otherwise.
3483  */
3484 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3485 {
3486 	struct xe_device *xe = xe_bo_device(bo);
3487 
3488 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3489 		return false;
3490 
3491 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3492 		return false;
3493 
3494 	/* On discrete GPUs, if the GPU can access this buffer from
3495 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3496 	 * can't be used since there's no CCS storage associated with
3497 	 * non-VRAM addresses.
3498 	 */
3499 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3500 		return false;
3501 
3502 	/*
3503 	 * Compression implies coh_none, therefore we know for sure that WB
3504 	 * memory can't currently use compression, which is likely one of the
3505 	 * common cases.
3506 	 */
3507 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
3508 		return false;
3509 
3510 	return true;
3511 }
3512 
3513 /**
3514  * __xe_bo_release_dummy() - Dummy kref release function
3515  * @kref: The embedded struct kref.
3516  *
3517  * Dummy release function for xe_bo_put_deferred(). Keep off.
3518  */
3519 void __xe_bo_release_dummy(struct kref *kref)
3520 {
3521 }
3522 
3523 /**
3524  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3525  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3526  *
3527  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3528  * The @deferred list can be either an onstack local list or a global
3529  * shared list used by a workqueue.
3530  */
3531 void xe_bo_put_commit(struct llist_head *deferred)
3532 {
3533 	struct llist_node *freed;
3534 	struct xe_bo *bo, *next;
3535 
3536 	if (!deferred)
3537 		return;
3538 
3539 	freed = llist_del_all(deferred);
3540 	if (!freed)
3541 		return;
3542 
3543 	llist_for_each_entry_safe(bo, next, freed, freed)
3544 		drm_gem_object_free(&bo->ttm.base.refcount);
3545 }
3546 
3547 static void xe_bo_dev_work_func(struct work_struct *work)
3548 {
3549 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3550 
3551 	xe_bo_put_commit(&bo_dev->async_list);
3552 }
3553 
3554 /**
3555  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3556  * @bo_dev: The BO dev structure
3557  */
3558 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3559 {
3560 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3561 }
3562 
3563 /**
3564  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3565  * @bo_dev: The BO dev structure
3566  */
3567 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3568 {
3569 	flush_work(&bo_dev->async_free);
3570 }
3571 
3572 void xe_bo_put(struct xe_bo *bo)
3573 {
3574 	struct xe_tile *tile;
3575 	u8 id;
3576 
3577 	might_sleep();
3578 	if (bo) {
3579 #ifdef CONFIG_PROC_FS
3580 		if (bo->client)
3581 			might_lock(&bo->client->bos_lock);
3582 #endif
3583 		for_each_tile(tile, xe_bo_device(bo), id)
3584 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
3585 				xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
3586 		drm_gem_object_put(&bo->ttm.base);
3587 	}
3588 }
3589 
3590 /**
3591  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3592  * @file_priv: ...
3593  * @dev: ...
3594  * @args: ...
3595  *
3596  * See dumb_create() hook in include/drm/drm_drv.h
3597  *
3598  * Return: ...
3599  */
3600 int xe_bo_dumb_create(struct drm_file *file_priv,
3601 		      struct drm_device *dev,
3602 		      struct drm_mode_create_dumb *args)
3603 {
3604 	struct xe_device *xe = to_xe_device(dev);
3605 	struct xe_bo *bo;
3606 	uint32_t handle;
3607 	int err;
3608 	u32 page_size = max_t(u32, PAGE_SIZE,
3609 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3610 
3611 	err = drm_mode_size_dumb(dev, args, SZ_64, page_size);
3612 	if (err)
3613 		return err;
3614 
3615 	bo = xe_bo_create_user(xe, NULL, args->size,
3616 			       DRM_XE_GEM_CPU_CACHING_WC,
3617 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3618 			       XE_BO_FLAG_SCANOUT |
3619 			       XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
3620 	if (IS_ERR(bo))
3621 		return PTR_ERR(bo);
3622 
3623 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3624 	/* drop reference from allocate - handle holds it now */
3625 	drm_gem_object_put(&bo->ttm.base);
3626 	if (!err)
3627 		args->handle = handle;
3628 	return err;
3629 }
3630 
3631 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3632 {
3633 	struct ttm_buffer_object *tbo = &bo->ttm;
3634 	struct ttm_device *bdev = tbo->bdev;
3635 
3636 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3637 
3638 	list_del_init(&bo->vram_userfault_link);
3639 }
3640 
3641 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3642 #include "tests/xe_bo.c"
3643 #endif
3644