xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 536a2ead3a8f2048643da3e8340a4d73fdf71903)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_dumb_buffers.h>
13 #include <drm/drm_gem_ttm_helper.h>
14 #include <drm/drm_managed.h>
15 #include <drm/ttm/ttm_backup.h>
16 #include <drm/ttm/ttm_device.h>
17 #include <drm/ttm/ttm_placement.h>
18 #include <drm/ttm/ttm_tt.h>
19 #include <uapi/drm/xe_drm.h>
20 
21 #include <kunit/static_stub.h>
22 
23 #include <trace/events/gpu_mem.h>
24 
25 #include "xe_device.h"
26 #include "xe_dma_buf.h"
27 #include "xe_drm_client.h"
28 #include "xe_ggtt.h"
29 #include "xe_map.h"
30 #include "xe_migrate.h"
31 #include "xe_pat.h"
32 #include "xe_pm.h"
33 #include "xe_preempt_fence.h"
34 #include "xe_pxp.h"
35 #include "xe_res_cursor.h"
36 #include "xe_shrinker.h"
37 #include "xe_sriov_vf_ccs.h"
38 #include "xe_tile.h"
39 #include "xe_trace_bo.h"
40 #include "xe_ttm_stolen_mgr.h"
41 #include "xe_vm.h"
42 #include "xe_vram_types.h"
43 
44 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
45 	[XE_PL_SYSTEM] = "system",
46 	[XE_PL_TT] = "gtt",
47 	[XE_PL_VRAM0] = "vram0",
48 	[XE_PL_VRAM1] = "vram1",
49 	[XE_PL_STOLEN] = "stolen"
50 };
51 
52 static const struct ttm_place sys_placement_flags = {
53 	.fpfn = 0,
54 	.lpfn = 0,
55 	.mem_type = XE_PL_SYSTEM,
56 	.flags = 0,
57 };
58 
59 static struct ttm_placement sys_placement = {
60 	.num_placement = 1,
61 	.placement = &sys_placement_flags,
62 };
63 
64 static struct ttm_placement purge_placement;
65 
66 static const struct ttm_place tt_placement_flags[] = {
67 	{
68 		.fpfn = 0,
69 		.lpfn = 0,
70 		.mem_type = XE_PL_TT,
71 		.flags = TTM_PL_FLAG_DESIRED,
72 	},
73 	{
74 		.fpfn = 0,
75 		.lpfn = 0,
76 		.mem_type = XE_PL_SYSTEM,
77 		.flags = TTM_PL_FLAG_FALLBACK,
78 	}
79 };
80 
81 static struct ttm_placement tt_placement = {
82 	.num_placement = 2,
83 	.placement = tt_placement_flags,
84 };
85 
86 #define for_each_set_bo_vram_flag(bit__, bo_flags__) \
87 	for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \
88 		for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK)
89 
90 bool mem_type_is_vram(u32 mem_type)
91 {
92 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
93 }
94 
95 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
96 {
97 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
98 }
99 
100 static bool resource_is_vram(struct ttm_resource *res)
101 {
102 	return mem_type_is_vram(res->mem_type);
103 }
104 
105 bool xe_bo_is_vram(struct xe_bo *bo)
106 {
107 	return resource_is_vram(bo->ttm.resource) ||
108 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
109 }
110 
111 bool xe_bo_is_stolen(struct xe_bo *bo)
112 {
113 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
114 }
115 
116 /**
117  * xe_bo_has_single_placement - check if BO is placed only in one memory location
118  * @bo: The BO
119  *
120  * This function checks whether a given BO is placed in only one memory location.
121  *
122  * Returns: true if the BO is placed in a single memory location, false otherwise.
123  *
124  */
125 bool xe_bo_has_single_placement(struct xe_bo *bo)
126 {
127 	return bo->placement.num_placement == 1;
128 }
129 
130 /**
131  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
132  * @bo: The BO
133  *
134  * The stolen memory is accessed through the PCI BAR for both DGFX and some
135  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
136  *
137  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
138  */
139 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
140 {
141 	return xe_bo_is_stolen(bo) &&
142 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
143 }
144 
145 /**
146  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
147  * @bo: The BO
148  *
149  * Check if a given bo is bound through VM_BIND. This requires the
150  * reservation lock for the BO to be held.
151  *
152  * Returns: boolean
153  */
154 bool xe_bo_is_vm_bound(struct xe_bo *bo)
155 {
156 	xe_bo_assert_held(bo);
157 
158 	return !list_empty(&bo->ttm.base.gpuva.list);
159 }
160 
161 static bool xe_bo_is_user(struct xe_bo *bo)
162 {
163 	return bo->flags & XE_BO_FLAG_USER;
164 }
165 
166 static struct xe_migrate *
167 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
168 {
169 	struct xe_tile *tile;
170 
171 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
172 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
173 	return tile->migrate;
174 }
175 
176 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
177 {
178 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
179 	struct ttm_resource_manager *mgr;
180 	struct xe_ttm_vram_mgr *vram_mgr;
181 
182 	xe_assert(xe, resource_is_vram(res));
183 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
184 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
185 
186 	return container_of(vram_mgr, struct xe_vram_region, ttm);
187 }
188 
189 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
190 			   u32 bo_flags, u32 *c)
191 {
192 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
193 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
194 
195 		bo->placements[*c] = (struct ttm_place) {
196 			.mem_type = XE_PL_TT,
197 			.flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
198 			TTM_PL_FLAG_FALLBACK : 0,
199 		};
200 		*c += 1;
201 	}
202 }
203 
204 static bool force_contiguous(u32 bo_flags)
205 {
206 	if (bo_flags & XE_BO_FLAG_STOLEN)
207 		return true; /* users expect this */
208 	else if (bo_flags & XE_BO_FLAG_PINNED &&
209 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
210 		return true; /* needs vmap */
211 	else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
212 		return true;
213 
214 	/*
215 	 * For eviction / restore on suspend / resume objects pinned in VRAM
216 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
217 	 */
218 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
219 	       bo_flags & XE_BO_FLAG_PINNED;
220 }
221 
222 static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag)
223 {
224 	xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK);
225 	xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0);
226 
227 	return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1;
228 }
229 
230 static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag,
231 					   enum ttm_bo_type type)
232 {
233 	u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag);
234 
235 	xe_assert(xe, tile_id < xe->info.tile_count);
236 
237 	if (type == ttm_bo_type_kernel && !(bo_flags & XE_BO_FLAG_FORCE_USER_VRAM))
238 		return xe->tiles[tile_id].mem.kernel_vram->placement;
239 	else
240 		return xe->tiles[tile_id].mem.vram->placement;
241 }
242 
243 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
244 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
245 {
246 	struct ttm_place place = { .mem_type = mem_type };
247 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
248 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
249 
250 	struct xe_vram_region *vram;
251 	u64 io_size;
252 
253 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
254 
255 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
256 	xe_assert(xe, vram && vram->usable_size);
257 	io_size = vram->io_size;
258 
259 	if (force_contiguous(bo_flags))
260 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
261 
262 	if (io_size < vram->usable_size) {
263 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
264 			place.fpfn = 0;
265 			place.lpfn = io_size >> PAGE_SHIFT;
266 		} else {
267 			place.flags |= TTM_PL_FLAG_TOPDOWN;
268 		}
269 	}
270 	places[*c] = place;
271 	*c += 1;
272 }
273 
274 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
275 			 u32 bo_flags, enum ttm_bo_type type, u32 *c)
276 {
277 	u32 vram_flag;
278 
279 	for_each_set_bo_vram_flag(vram_flag, bo_flags) {
280 		u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type);
281 
282 		add_vram(xe, bo, bo->placements, bo_flags, pl, c);
283 	}
284 }
285 
286 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
287 			   u32 bo_flags, u32 *c)
288 {
289 	if (bo_flags & XE_BO_FLAG_STOLEN) {
290 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
291 
292 		bo->placements[*c] = (struct ttm_place) {
293 			.mem_type = XE_PL_STOLEN,
294 			.flags = force_contiguous(bo_flags) ?
295 				TTM_PL_FLAG_CONTIGUOUS : 0,
296 		};
297 		*c += 1;
298 	}
299 }
300 
301 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
302 				       u32 bo_flags, enum ttm_bo_type type)
303 {
304 	u32 c = 0;
305 
306 	try_add_vram(xe, bo, bo_flags, type, &c);
307 	try_add_system(xe, bo, bo_flags, &c);
308 	try_add_stolen(xe, bo, bo_flags, &c);
309 
310 	if (!c)
311 		return -EINVAL;
312 
313 	bo->placement = (struct ttm_placement) {
314 		.num_placement = c,
315 		.placement = bo->placements,
316 	};
317 
318 	return 0;
319 }
320 
321 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
322 			      u32 bo_flags, enum ttm_bo_type type)
323 {
324 	xe_bo_assert_held(bo);
325 	return __xe_bo_placement_for_flags(xe, bo, bo_flags, type);
326 }
327 
328 static void xe_evict_flags(struct ttm_buffer_object *tbo,
329 			   struct ttm_placement *placement)
330 {
331 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
332 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
333 	struct xe_bo *bo;
334 
335 	if (!xe_bo_is_xe_bo(tbo)) {
336 		/* Don't handle scatter gather BOs */
337 		if (tbo->type == ttm_bo_type_sg) {
338 			placement->num_placement = 0;
339 			return;
340 		}
341 
342 		*placement = device_unplugged ? purge_placement : sys_placement;
343 		return;
344 	}
345 
346 	bo = ttm_to_xe_bo(tbo);
347 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
348 		*placement = sys_placement;
349 		return;
350 	}
351 
352 	if (device_unplugged && !tbo->base.dma_buf) {
353 		*placement = purge_placement;
354 		return;
355 	}
356 
357 	/*
358 	 * For xe, sg bos that are evicted to system just triggers a
359 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
360 	 */
361 	switch (tbo->resource->mem_type) {
362 	case XE_PL_VRAM0:
363 	case XE_PL_VRAM1:
364 	case XE_PL_STOLEN:
365 		*placement = tt_placement;
366 		break;
367 	case XE_PL_TT:
368 	default:
369 		*placement = sys_placement;
370 		break;
371 	}
372 }
373 
374 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
375 struct xe_ttm_tt {
376 	struct ttm_tt ttm;
377 	struct sg_table sgt;
378 	struct sg_table *sg;
379 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
380 	bool purgeable;
381 };
382 
383 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
384 {
385 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
386 	unsigned long num_pages = tt->num_pages;
387 	int ret;
388 
389 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
390 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
391 
392 	if (xe_tt->sg)
393 		return 0;
394 
395 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
396 						num_pages, 0,
397 						(u64)num_pages << PAGE_SHIFT,
398 						xe_sg_segment_size(xe->drm.dev),
399 						GFP_KERNEL);
400 	if (ret)
401 		return ret;
402 
403 	xe_tt->sg = &xe_tt->sgt;
404 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
405 			      DMA_ATTR_SKIP_CPU_SYNC);
406 	if (ret) {
407 		sg_free_table(xe_tt->sg);
408 		xe_tt->sg = NULL;
409 		return ret;
410 	}
411 
412 	return 0;
413 }
414 
415 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
416 {
417 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
418 
419 	if (xe_tt->sg) {
420 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
421 				  DMA_BIDIRECTIONAL, 0);
422 		sg_free_table(xe_tt->sg);
423 		xe_tt->sg = NULL;
424 	}
425 }
426 
427 struct sg_table *xe_bo_sg(struct xe_bo *bo)
428 {
429 	struct ttm_tt *tt = bo->ttm.ttm;
430 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
431 
432 	return xe_tt->sg;
433 }
434 
435 /*
436  * Account ttm pages against the device shrinker's shrinkable and
437  * purgeable counts.
438  */
439 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
440 {
441 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
442 
443 	if (xe_tt->purgeable)
444 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
445 	else
446 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
447 }
448 
449 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
450 {
451 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
452 
453 	if (xe_tt->purgeable)
454 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
455 	else
456 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
457 }
458 
459 static void update_global_total_pages(struct ttm_device *ttm_dev,
460 				      long num_pages)
461 {
462 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
463 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
464 	u64 global_total_pages =
465 		atomic64_add_return(num_pages, &xe->global_total_pages);
466 
467 	trace_gpu_mem_total(xe->drm.primary->index, 0,
468 			    global_total_pages << PAGE_SHIFT);
469 #endif
470 }
471 
472 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
473 				       u32 page_flags)
474 {
475 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
476 	struct xe_device *xe = xe_bo_device(bo);
477 	struct xe_ttm_tt *xe_tt;
478 	struct ttm_tt *tt;
479 	unsigned long extra_pages;
480 	enum ttm_caching caching = ttm_cached;
481 	int err;
482 
483 	xe_tt = kzalloc_obj(*xe_tt);
484 	if (!xe_tt)
485 		return NULL;
486 
487 	tt = &xe_tt->ttm;
488 
489 	extra_pages = 0;
490 	if (xe_bo_needs_ccs_pages(bo))
491 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
492 					   PAGE_SIZE);
493 
494 	/*
495 	 * DGFX system memory is always WB / ttm_cached, since
496 	 * other caching modes are only supported on x86. DGFX
497 	 * GPU system memory accesses are always coherent with the
498 	 * CPU.
499 	 */
500 	if (!IS_DGFX(xe)) {
501 		switch (bo->cpu_caching) {
502 		case DRM_XE_GEM_CPU_CACHING_WC:
503 			caching = ttm_write_combined;
504 			break;
505 		default:
506 			caching = ttm_cached;
507 			break;
508 		}
509 
510 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
511 
512 		/*
513 		 * For Xe_LPG and beyond up to NVL-P (excluding), PPGTT PTE
514 		 * lookups are also non-coherent and require a CPU:WC mapping.
515 		 */
516 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_FORCE_WC) ||
517 		    (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE))
518 			caching = ttm_write_combined;
519 	}
520 
521 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
522 		/*
523 		 * Valid only for internally-created buffers only, for
524 		 * which cpu_caching is never initialized.
525 		 */
526 		xe_assert(xe, bo->cpu_caching == 0);
527 		caching = ttm_uncached;
528 	}
529 
530 	if (ttm_bo->type != ttm_bo_type_sg)
531 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
532 
533 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
534 	if (err) {
535 		kfree(xe_tt);
536 		return NULL;
537 	}
538 
539 	if (ttm_bo->type != ttm_bo_type_sg) {
540 		err = ttm_tt_setup_backup(tt);
541 		if (err) {
542 			ttm_tt_fini(tt);
543 			kfree(xe_tt);
544 			return NULL;
545 		}
546 	}
547 
548 	return tt;
549 }
550 
551 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
552 			      struct ttm_operation_ctx *ctx)
553 {
554 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
555 	int err;
556 
557 	/*
558 	 * dma-bufs are not populated with pages, and the dma-
559 	 * addresses are set up when moved to XE_PL_TT.
560 	 */
561 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
562 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
563 		return 0;
564 
565 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
566 		err = ttm_tt_restore(ttm_dev, tt, ctx);
567 	} else {
568 		ttm_tt_clear_backed_up(tt);
569 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
570 	}
571 	if (err)
572 		return err;
573 
574 	xe_tt->purgeable = false;
575 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
576 	update_global_total_pages(ttm_dev, tt->num_pages);
577 
578 	return 0;
579 }
580 
581 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
582 {
583 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
584 
585 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
586 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
587 		return;
588 
589 	xe_tt_unmap_sg(xe, tt);
590 
591 	ttm_pool_free(&ttm_dev->pool, tt);
592 	xe_ttm_tt_account_subtract(xe, tt);
593 	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
594 }
595 
596 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
597 {
598 	ttm_tt_fini(tt);
599 	kfree(tt);
600 }
601 
602 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
603 {
604 	struct xe_ttm_vram_mgr_resource *vres =
605 		to_xe_ttm_vram_mgr_resource(mem);
606 
607 	return vres->used_visible_size == mem->size;
608 }
609 
610 /**
611  * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM.
612  * @bo: The BO
613  *
614  * This function checks whether a given BO resides entirely in memory visible from the CPU
615  *
616  * Returns: true if the BO is entirely visible, false otherwise.
617  *
618  */
619 bool xe_bo_is_visible_vram(struct xe_bo *bo)
620 {
621 	if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo)))
622 		return false;
623 
624 	return xe_ttm_resource_visible(bo->ttm.resource);
625 }
626 
627 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
628 				 struct ttm_resource *mem)
629 {
630 	struct xe_device *xe = ttm_to_xe_device(bdev);
631 
632 	switch (mem->mem_type) {
633 	case XE_PL_SYSTEM:
634 	case XE_PL_TT:
635 		return 0;
636 	case XE_PL_VRAM0:
637 	case XE_PL_VRAM1: {
638 		struct xe_vram_region *vram = res_to_mem_region(mem);
639 
640 		if (!xe_ttm_resource_visible(mem))
641 			return -EINVAL;
642 
643 		mem->bus.offset = mem->start << PAGE_SHIFT;
644 
645 		if (vram->mapping &&
646 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
647 			mem->bus.addr = (u8 __force *)vram->mapping +
648 				mem->bus.offset;
649 
650 		mem->bus.offset += vram->io_start;
651 		mem->bus.is_iomem = true;
652 
653 #if  !IS_ENABLED(CONFIG_X86)
654 		mem->bus.caching = ttm_write_combined;
655 #endif
656 		return 0;
657 	} case XE_PL_STOLEN:
658 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
659 	default:
660 		return -EINVAL;
661 	}
662 }
663 
664 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
665 				const struct ttm_operation_ctx *ctx)
666 {
667 	struct dma_resv_iter cursor;
668 	struct dma_fence *fence;
669 	struct drm_gem_object *obj = &bo->ttm.base;
670 	struct drm_gpuvm_bo *vm_bo;
671 	bool idle = false;
672 	int ret = 0;
673 
674 	dma_resv_assert_held(bo->ttm.base.resv);
675 
676 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
677 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
678 				    DMA_RESV_USAGE_BOOKKEEP);
679 		dma_resv_for_each_fence_unlocked(&cursor, fence)
680 			dma_fence_enable_sw_signaling(fence);
681 		dma_resv_iter_end(&cursor);
682 	}
683 
684 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
685 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
686 		struct drm_gpuva *gpuva;
687 
688 		if (!xe_vm_in_fault_mode(vm)) {
689 			drm_gpuvm_bo_evict(vm_bo, true);
690 			/*
691 			 * L2 cache may not be flushed, so ensure that is done in
692 			 * xe_vm_invalidate_vma() below
693 			 */
694 			if (!xe_device_is_l2_flush_optimized(xe))
695 				continue;
696 		}
697 
698 		if (!idle) {
699 			long timeout;
700 
701 			if (ctx->no_wait_gpu &&
702 			    !dma_resv_test_signaled(bo->ttm.base.resv,
703 						    DMA_RESV_USAGE_BOOKKEEP))
704 				return -EBUSY;
705 
706 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
707 							DMA_RESV_USAGE_BOOKKEEP,
708 							ctx->interruptible,
709 							MAX_SCHEDULE_TIMEOUT);
710 			if (!timeout)
711 				return -ETIME;
712 			if (timeout < 0)
713 				return timeout;
714 
715 			idle = true;
716 		}
717 
718 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
719 			struct xe_vma *vma = gpuva_to_vma(gpuva);
720 
721 			trace_xe_vma_evict(vma);
722 			ret = xe_vm_invalidate_vma(vma);
723 			if (XE_WARN_ON(ret))
724 				return ret;
725 		}
726 	}
727 
728 	return ret;
729 }
730 
731 /*
732  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
733  * Note that unmapping the attachment is deferred to the next
734  * map_attachment time, or to bo destroy (after idling) whichever comes first.
735  * This is to avoid syncing before unmap_attachment(), assuming that the
736  * caller relies on idling the reservation object before moving the
737  * backing store out. Should that assumption not hold, then we will be able
738  * to unconditionally call unmap_attachment() when moving out to system.
739  */
740 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
741 			     struct ttm_resource *new_res)
742 {
743 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
744 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
745 					       ttm);
746 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
747 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
748 	struct sg_table *sg;
749 
750 	xe_assert(xe, attach);
751 	xe_assert(xe, ttm_bo->ttm);
752 
753 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
754 	    ttm_bo->sg) {
755 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
756 				      false, MAX_SCHEDULE_TIMEOUT);
757 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
758 		ttm_bo->sg = NULL;
759 	}
760 
761 	if (new_res->mem_type == XE_PL_SYSTEM)
762 		goto out;
763 
764 	if (ttm_bo->sg) {
765 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
766 		ttm_bo->sg = NULL;
767 	}
768 
769 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
770 	if (IS_ERR(sg))
771 		return PTR_ERR(sg);
772 
773 	ttm_bo->sg = sg;
774 	xe_tt->sg = sg;
775 
776 out:
777 	ttm_bo_move_null(ttm_bo, new_res);
778 
779 	return 0;
780 }
781 
782 /**
783  * xe_bo_move_notify - Notify subsystems of a pending move
784  * @bo: The buffer object
785  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
786  *
787  * This function notifies subsystems of an upcoming buffer move.
788  * Upon receiving such a notification, subsystems should schedule
789  * halting access to the underlying pages and optionally add a fence
790  * to the buffer object's dma_resv object, that signals when access is
791  * stopped. The caller will wait on all dma_resv fences before
792  * starting the move.
793  *
794  * A subsystem may commence access to the object after obtaining
795  * bindings to the new backing memory under the object lock.
796  *
797  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
798  * negative error code on error.
799  */
800 static int xe_bo_move_notify(struct xe_bo *bo,
801 			     const struct ttm_operation_ctx *ctx)
802 {
803 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
804 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
805 	struct ttm_resource *old_mem = ttm_bo->resource;
806 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
807 	int ret;
808 
809 	/*
810 	 * If this starts to call into many components, consider
811 	 * using a notification chain here.
812 	 */
813 
814 	if (xe_bo_is_pinned(bo))
815 		return -EINVAL;
816 
817 	xe_bo_vunmap(bo);
818 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
819 	if (ret)
820 		return ret;
821 
822 	/* Don't call move_notify() for imported dma-bufs. */
823 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
824 		dma_buf_invalidate_mappings(ttm_bo->base.dma_buf);
825 
826 	/*
827 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
828 	 * so if we moved from VRAM make sure to unlink this from the userfault
829 	 * tracking.
830 	 */
831 	if (mem_type_is_vram(old_mem_type)) {
832 		mutex_lock(&xe->mem_access.vram_userfault.lock);
833 		if (!list_empty(&bo->vram_userfault_link))
834 			list_del_init(&bo->vram_userfault_link);
835 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
836 	}
837 
838 	return 0;
839 }
840 
841 /**
842  * xe_bo_set_purgeable_shrinker() - Update shrinker accounting for purgeable state
843  * @bo: Buffer object
844  * @new_state: New purgeable state being set
845  *
846  * Transfers pages between shrinkable and purgeable buckets when the BO
847  * purgeable state changes. Called automatically from xe_bo_set_purgeable_state().
848  */
849 static void xe_bo_set_purgeable_shrinker(struct xe_bo *bo,
850 					 enum xe_madv_purgeable_state new_state)
851 {
852 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
853 	struct ttm_tt *tt = ttm_bo->ttm;
854 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
855 	struct xe_ttm_tt *xe_tt;
856 	long tt_pages;
857 
858 	xe_bo_assert_held(bo);
859 
860 	if (!tt || !ttm_tt_is_populated(tt))
861 		return;
862 
863 	xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
864 	tt_pages = tt->num_pages;
865 
866 	if (!xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_DONTNEED) {
867 		xe_tt->purgeable = true;
868 		/* Transfer pages from shrinkable to purgeable count */
869 		xe_shrinker_mod_pages(xe->mem.shrinker, -tt_pages, tt_pages);
870 	} else if (xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_WILLNEED) {
871 		xe_tt->purgeable = false;
872 		/* Transfer pages from purgeable to shrinkable count */
873 		xe_shrinker_mod_pages(xe->mem.shrinker, tt_pages, -tt_pages);
874 	}
875 }
876 
877 /**
878  * xe_bo_set_purgeable_state() - Set BO purgeable state with validation
879  * @bo: Buffer object
880  * @new_state: New purgeable state
881  *
882  * Sets the purgeable state with lockdep assertions and validates state
883  * transitions. Once a BO is PURGED, it cannot transition to any other state.
884  * Invalid transitions are caught with xe_assert(). Shrinker page accounting
885  * is updated automatically.
886  */
887 void xe_bo_set_purgeable_state(struct xe_bo *bo,
888 			       enum xe_madv_purgeable_state new_state)
889 {
890 	struct xe_device *xe = xe_bo_device(bo);
891 
892 	xe_bo_assert_held(bo);
893 
894 	/* Validate state is one of the known values */
895 	xe_assert(xe, new_state == XE_MADV_PURGEABLE_WILLNEED ||
896 		  new_state == XE_MADV_PURGEABLE_DONTNEED ||
897 		  new_state == XE_MADV_PURGEABLE_PURGED);
898 
899 	/* Once purged, always purged - cannot transition out */
900 	xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED &&
901 			new_state != XE_MADV_PURGEABLE_PURGED));
902 
903 	bo->madv_purgeable = new_state;
904 	xe_bo_set_purgeable_shrinker(bo, new_state);
905 }
906 
907 /**
908  * xe_ttm_bo_purge() - Purge buffer object backing store
909  * @ttm_bo: The TTM buffer object to purge
910  * @ctx: TTM operation context
911  *
912  * This function purges the backing store of a BO marked as DONTNEED and
913  * triggers rebind to invalidate stale GPU mappings. For fault-mode VMs,
914  * this zaps the PTEs. The next GPU access will trigger a page fault and
915  * perform NULL rebind (scratch pages or clear PTEs based on VM config).
916  *
917  * Return: 0 on success, negative error code on failure
918  */
919 static int xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
920 {
921 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
922 	struct ttm_placement place = {};
923 	int ret;
924 
925 	xe_bo_assert_held(bo);
926 
927 	if (!ttm_bo->ttm)
928 		return 0;
929 
930 	if (!xe_bo_madv_is_dontneed(bo))
931 		return 0;
932 
933 	/*
934 	 * Use the standard pre-move hook so we share the same cleanup/invalidate
935 	 * path as migrations: drop any CPU vmap and schedule the necessary GPU
936 	 * unbind/rebind work.
937 	 *
938 	 * This must be called before ttm_bo_validate() frees the pages.
939 	 * May fail in no-wait contexts (fault/shrinker) or if the BO is
940 	 * pinned. Keep state unchanged on failure so we don't end up "PURGED"
941 	 * with stale mappings.
942 	 */
943 	ret = xe_bo_move_notify(bo, ctx);
944 	if (ret)
945 		return ret;
946 
947 	ret = ttm_bo_validate(ttm_bo, &place, ctx);
948 	if (ret)
949 		return ret;
950 
951 	/* Commit the state transition only once invalidation was queued */
952 	xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_PURGED);
953 
954 	return 0;
955 }
956 
957 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
958 		      struct ttm_operation_ctx *ctx,
959 		      struct ttm_resource *new_mem,
960 		      struct ttm_place *hop)
961 {
962 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
963 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
964 	struct ttm_resource *old_mem = ttm_bo->resource;
965 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
966 	struct ttm_tt *ttm = ttm_bo->ttm;
967 	struct xe_migrate *migrate = NULL;
968 	struct dma_fence *fence;
969 	bool move_lacks_source;
970 	bool tt_has_data;
971 	bool needs_clear;
972 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
973 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
974 	int ret = 0;
975 
976 	/*
977 	 * Purge only non-shared BOs explicitly marked DONTNEED by userspace.
978 	 * The move_notify callback will handle invalidation asynchronously.
979 	 */
980 	if (evict && xe_bo_madv_is_dontneed(bo)) {
981 		ret = xe_ttm_bo_purge(ttm_bo, ctx);
982 		if (ret)
983 			return ret;
984 
985 		/* Free the unused eviction destination resource */
986 		ttm_resource_free(ttm_bo, &new_mem);
987 		return 0;
988 	}
989 
990 	/* Bo creation path, moving to system or TT. */
991 	if ((!old_mem && ttm) && !handle_system_ccs) {
992 		if (new_mem->mem_type == XE_PL_TT)
993 			ret = xe_tt_map_sg(xe, ttm);
994 		if (!ret)
995 			ttm_bo_move_null(ttm_bo, new_mem);
996 		goto out;
997 	}
998 
999 	if (ttm_bo->type == ttm_bo_type_sg) {
1000 		if (new_mem->mem_type == XE_PL_SYSTEM)
1001 			ret = xe_bo_move_notify(bo, ctx);
1002 		if (!ret)
1003 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
1004 		return ret;
1005 	}
1006 
1007 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
1008 
1009 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
1010 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
1011 
1012 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
1013 		(!ttm && ttm_bo->type == ttm_bo_type_device);
1014 
1015 	if (new_mem->mem_type == XE_PL_TT) {
1016 		ret = xe_tt_map_sg(xe, ttm);
1017 		if (ret)
1018 			goto out;
1019 	}
1020 
1021 	if ((move_lacks_source && !needs_clear)) {
1022 		ttm_bo_move_null(ttm_bo, new_mem);
1023 		goto out;
1024 	}
1025 
1026 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
1027 	    new_mem->mem_type == XE_PL_SYSTEM) {
1028 		ret = xe_svm_bo_evict(bo);
1029 		if (!ret) {
1030 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
1031 			ttm_bo_move_null(ttm_bo, new_mem);
1032 		} else {
1033 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
1034 				ERR_PTR(ret));
1035 		}
1036 
1037 		goto out;
1038 	}
1039 
1040 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
1041 		ttm_bo_move_null(ttm_bo, new_mem);
1042 		goto out;
1043 	}
1044 
1045 	/*
1046 	 * Failed multi-hop where the old_mem is still marked as
1047 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
1048 	 */
1049 	if (old_mem_type == XE_PL_TT &&
1050 	    new_mem->mem_type == XE_PL_TT) {
1051 		ttm_bo_move_null(ttm_bo, new_mem);
1052 		goto out;
1053 	}
1054 
1055 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
1056 		ret = xe_bo_move_notify(bo, ctx);
1057 		if (ret)
1058 			goto out;
1059 	}
1060 
1061 	if (old_mem_type == XE_PL_TT &&
1062 	    new_mem->mem_type == XE_PL_SYSTEM) {
1063 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
1064 						     DMA_RESV_USAGE_BOOKKEEP,
1065 						     false,
1066 						     MAX_SCHEDULE_TIMEOUT);
1067 		if (timeout < 0) {
1068 			ret = timeout;
1069 			goto out;
1070 		}
1071 
1072 		if (!handle_system_ccs) {
1073 			ttm_bo_move_null(ttm_bo, new_mem);
1074 			goto out;
1075 		}
1076 	}
1077 
1078 	if (!move_lacks_source &&
1079 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
1080 	     (mem_type_is_vram(old_mem_type) &&
1081 	      new_mem->mem_type == XE_PL_SYSTEM))) {
1082 		hop->fpfn = 0;
1083 		hop->lpfn = 0;
1084 		hop->mem_type = XE_PL_TT;
1085 		hop->flags = TTM_PL_FLAG_TEMPORARY;
1086 		ret = -EMULTIHOP;
1087 		goto out;
1088 	}
1089 
1090 	if (bo->tile)
1091 		migrate = bo->tile->migrate;
1092 	else if (resource_is_vram(new_mem))
1093 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
1094 	else if (mem_type_is_vram(old_mem_type))
1095 		migrate = mem_type_to_migrate(xe, old_mem_type);
1096 	else
1097 		migrate = xe->tiles[0].migrate;
1098 
1099 	xe_assert(xe, migrate);
1100 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
1101 	if (xe_rpm_reclaim_safe(xe)) {
1102 		/*
1103 		 * We might be called through swapout in the validation path of
1104 		 * another TTM device, so acquire rpm here.
1105 		 */
1106 		xe_pm_runtime_get(xe);
1107 	} else {
1108 		drm_WARN_ON(&xe->drm, handle_system_ccs);
1109 		xe_pm_runtime_get_noresume(xe);
1110 	}
1111 
1112 	if (move_lacks_source) {
1113 		u32 flags = 0;
1114 
1115 		if (mem_type_is_vram(new_mem->mem_type))
1116 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
1117 		else if (handle_system_ccs)
1118 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
1119 
1120 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
1121 	} else {
1122 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
1123 					handle_system_ccs);
1124 	}
1125 	if (IS_ERR(fence)) {
1126 		ret = PTR_ERR(fence);
1127 		xe_pm_runtime_put(xe);
1128 		goto out;
1129 	}
1130 	if (!move_lacks_source) {
1131 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
1132 						new_mem);
1133 		if (ret) {
1134 			dma_fence_wait(fence, false);
1135 			ttm_bo_move_null(ttm_bo, new_mem);
1136 			ret = 0;
1137 		}
1138 	} else {
1139 		/*
1140 		 * ttm_bo_move_accel_cleanup() may blow up if
1141 		 * bo->resource == NULL, so just attach the
1142 		 * fence and set the new resource.
1143 		 */
1144 		dma_resv_add_fence(ttm_bo->base.resv, fence,
1145 				   DMA_RESV_USAGE_KERNEL);
1146 		ttm_bo_move_null(ttm_bo, new_mem);
1147 	}
1148 
1149 	dma_fence_put(fence);
1150 	xe_pm_runtime_put(xe);
1151 
1152 	/*
1153 	 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
1154 	 * BBs from BO as it is no longer needed.
1155 	 */
1156 	if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
1157 	    new_mem->mem_type == XE_PL_SYSTEM)
1158 		xe_sriov_vf_ccs_detach_bo(bo);
1159 
1160 	if (IS_VF_CCS_READY(xe) &&
1161 	    ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
1162 	     (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
1163 	    handle_system_ccs)
1164 		ret = xe_sriov_vf_ccs_attach_bo(bo);
1165 
1166 out:
1167 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
1168 	    ttm_bo->ttm) {
1169 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
1170 						     DMA_RESV_USAGE_KERNEL,
1171 						     false,
1172 						     MAX_SCHEDULE_TIMEOUT);
1173 		if (timeout < 0)
1174 			ret = timeout;
1175 
1176 		if (IS_VF_CCS_READY(xe))
1177 			xe_sriov_vf_ccs_detach_bo(bo);
1178 
1179 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
1180 	}
1181 
1182 	return ret;
1183 }
1184 
1185 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1186 			       struct ttm_buffer_object *bo,
1187 			       unsigned long *scanned)
1188 {
1189 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1190 	struct ttm_tt *tt = bo->ttm;
1191 	long lret;
1192 
1193 	/* Fake move to system, without copying data. */
1194 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
1195 		struct ttm_resource *new_resource;
1196 
1197 		lret = ttm_bo_wait_ctx(bo, ctx);
1198 		if (lret)
1199 			return lret;
1200 
1201 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1202 		if (lret)
1203 			return lret;
1204 
1205 		xe_tt_unmap_sg(xe, bo->ttm);
1206 		ttm_bo_move_null(bo, new_resource);
1207 	}
1208 
1209 	*scanned += bo->ttm->num_pages;
1210 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1211 			     {.purge = true,
1212 			      .writeback = false,
1213 			      .allow_move = false});
1214 
1215 	if (lret > 0) {
1216 		xe_ttm_tt_account_subtract(xe, bo->ttm);
1217 		update_global_total_pages(bo->bdev, -(long)tt->num_pages);
1218 	}
1219 
1220 	return lret;
1221 }
1222 
1223 static bool
1224 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1225 {
1226 	struct drm_gpuvm_bo *vm_bo;
1227 
1228 	if (!ttm_bo_eviction_valuable(bo, place))
1229 		return false;
1230 
1231 	if (!xe_bo_is_xe_bo(bo))
1232 		return true;
1233 
1234 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1235 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1236 			return false;
1237 	}
1238 
1239 	return true;
1240 }
1241 
1242 /**
1243  * xe_bo_shrink() - Try to shrink an xe bo.
1244  * @ctx: The struct ttm_operation_ctx used for shrinking.
1245  * @bo: The TTM buffer object whose pages to shrink.
1246  * @flags: Flags governing the shrink behaviour.
1247  * @scanned: Pointer to a counter of the number of pages
1248  * attempted to shrink.
1249  *
1250  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1251  * Note that we need to be able to handle also non xe bos
1252  * (ghost bos), but only if the struct ttm_tt is embedded in
1253  * a struct xe_ttm_tt. When the function attempts to shrink
1254  * the pages of a buffer object, The value pointed to by @scanned
1255  * is updated.
1256  *
1257  * Return: The number of pages shrunken or purged, or negative error
1258  * code on failure.
1259  */
1260 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1261 		  const struct xe_bo_shrink_flags flags,
1262 		  unsigned long *scanned)
1263 {
1264 	struct ttm_tt *tt = bo->ttm;
1265 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1266 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1267 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1268 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1269 	bool needs_rpm;
1270 	long lret = 0L;
1271 
1272 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1273 	    (flags.purge && !xe_tt->purgeable))
1274 		return -EBUSY;
1275 
1276 	if (!xe_bo_eviction_valuable(bo, &place))
1277 		return -EBUSY;
1278 
1279 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1280 		return xe_bo_shrink_purge(ctx, bo, scanned);
1281 
1282 	if (xe_tt->purgeable) {
1283 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1284 			lret = xe_bo_move_notify(xe_bo, ctx);
1285 		if (!lret)
1286 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1287 		if (lret > 0 && xe_bo_madv_is_dontneed(xe_bo))
1288 			xe_bo_set_purgeable_state(xe_bo,
1289 						  XE_MADV_PURGEABLE_PURGED);
1290 		goto out_unref;
1291 	}
1292 
1293 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1294 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1295 		     xe_bo_needs_ccs_pages(xe_bo));
1296 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1297 		goto out_unref;
1298 
1299 	*scanned += tt->num_pages;
1300 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1301 			     {.purge = false,
1302 			      .writeback = flags.writeback,
1303 			      .allow_move = true});
1304 	if (needs_rpm)
1305 		xe_pm_runtime_put(xe);
1306 
1307 	if (lret > 0) {
1308 		xe_ttm_tt_account_subtract(xe, tt);
1309 		update_global_total_pages(bo->bdev, -(long)tt->num_pages);
1310 	}
1311 
1312 out_unref:
1313 	xe_bo_put(xe_bo);
1314 
1315 	return lret;
1316 }
1317 
1318 /**
1319  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1320  * up in system memory.
1321  * @bo: The buffer object to prepare.
1322  *
1323  * On successful completion, the object backup pages are allocated. Expectation
1324  * is that this is called from the PM notifier, prior to suspend/hibernation.
1325  *
1326  * Return: 0 on success. Negative error code on failure.
1327  */
1328 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1329 {
1330 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1331 	struct xe_validation_ctx ctx;
1332 	struct drm_exec exec;
1333 	struct xe_bo *backup;
1334 	int ret = 0;
1335 
1336 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1337 		ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1338 		drm_exec_retry_on_contention(&exec);
1339 		xe_assert(xe, !ret);
1340 		xe_assert(xe, !bo->backup_obj);
1341 
1342 		/*
1343 		 * Since this is called from the PM notifier we might have raced with
1344 		 * someone unpinning this after we dropped the pinned list lock and
1345 		 * grabbing the above bo lock.
1346 		 */
1347 		if (!xe_bo_is_pinned(bo))
1348 			break;
1349 
1350 		if (!xe_bo_is_vram(bo))
1351 			break;
1352 
1353 		if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1354 			break;
1355 
1356 		backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1357 					   DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1358 					   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1359 					   XE_BO_FLAG_PINNED, &exec);
1360 		if (IS_ERR(backup)) {
1361 			drm_exec_retry_on_contention(&exec);
1362 			ret = PTR_ERR(backup);
1363 			xe_validation_retry_on_oom(&ctx, &ret);
1364 			break;
1365 		}
1366 
1367 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1368 		ttm_bo_pin(&backup->ttm);
1369 		bo->backup_obj = backup;
1370 	}
1371 
1372 	return ret;
1373 }
1374 
1375 /**
1376  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1377  * @bo: The buffer object to undo the prepare for.
1378  *
1379  * Always returns 0. The backup object is removed, if still present. Expectation
1380  * it that this called from the PM notifier when undoing the prepare step.
1381  *
1382  * Return: Always returns 0.
1383  */
1384 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1385 {
1386 	xe_bo_lock(bo, false);
1387 	if (bo->backup_obj) {
1388 		ttm_bo_unpin(&bo->backup_obj->ttm);
1389 		xe_bo_put(bo->backup_obj);
1390 		bo->backup_obj = NULL;
1391 	}
1392 	xe_bo_unlock(bo);
1393 
1394 	return 0;
1395 }
1396 
1397 static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
1398 {
1399 	struct xe_device *xe = xe_bo_device(bo);
1400 	bool unmap = false;
1401 	int ret = 0;
1402 
1403 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1404 		struct xe_migrate *migrate;
1405 		struct dma_fence *fence;
1406 
1407 		if (bo->tile)
1408 			migrate = bo->tile->migrate;
1409 		else
1410 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1411 
1412 		xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
1413 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1414 		if (ret)
1415 			goto out_backup;
1416 
1417 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1418 					backup->ttm.resource, false);
1419 		if (IS_ERR(fence)) {
1420 			ret = PTR_ERR(fence);
1421 			goto out_backup;
1422 		}
1423 
1424 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1425 				   DMA_RESV_USAGE_KERNEL);
1426 		dma_fence_put(fence);
1427 	} else {
1428 		ret = xe_bo_vmap(backup);
1429 		if (ret)
1430 			goto out_backup;
1431 
1432 		if (iosys_map_is_null(&bo->vmap)) {
1433 			ret = xe_bo_vmap(bo);
1434 			if (ret)
1435 				goto out_vunmap;
1436 			unmap = true;
1437 		}
1438 
1439 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1440 				   xe_bo_size(bo));
1441 	}
1442 
1443 	if (!bo->backup_obj)
1444 		bo->backup_obj = backup;
1445 out_vunmap:
1446 	xe_bo_vunmap(backup);
1447 out_backup:
1448 	if (unmap)
1449 		xe_bo_vunmap(bo);
1450 
1451 	return ret;
1452 }
1453 
1454 /**
1455  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1456  * @bo: The buffer object to move.
1457  *
1458  * On successful completion, the object memory will be moved to system memory.
1459  *
1460  * This is needed to for special handling of pinned VRAM object during
1461  * suspend-resume.
1462  *
1463  * Return: 0 on success. Negative error code on failure.
1464  */
1465 int xe_bo_evict_pinned(struct xe_bo *bo)
1466 {
1467 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1468 	struct xe_validation_ctx ctx;
1469 	struct drm_exec exec;
1470 	struct xe_bo *backup = bo->backup_obj;
1471 	bool backup_created = false;
1472 	int ret = 0;
1473 
1474 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1475 		ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1476 		drm_exec_retry_on_contention(&exec);
1477 		xe_assert(xe, !ret);
1478 
1479 		if (WARN_ON(!bo->ttm.resource)) {
1480 			ret = -EINVAL;
1481 			break;
1482 		}
1483 
1484 		if (WARN_ON(!xe_bo_is_pinned(bo))) {
1485 			ret = -EINVAL;
1486 			break;
1487 		}
1488 
1489 		if (!xe_bo_is_vram(bo))
1490 			break;
1491 
1492 		if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1493 			break;
1494 
1495 		if (!backup) {
1496 			backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
1497 						   xe_bo_size(bo),
1498 						   DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1499 						   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1500 						   XE_BO_FLAG_PINNED, &exec);
1501 			if (IS_ERR(backup)) {
1502 				drm_exec_retry_on_contention(&exec);
1503 				ret = PTR_ERR(backup);
1504 				xe_validation_retry_on_oom(&ctx, &ret);
1505 				break;
1506 			}
1507 			backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1508 			backup_created = true;
1509 		}
1510 
1511 		ret = xe_bo_evict_pinned_copy(bo, backup);
1512 	}
1513 
1514 	if (ret && backup_created)
1515 		xe_bo_put(backup);
1516 
1517 	return ret;
1518 }
1519 
1520 /**
1521  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1522  * @bo: The buffer object to move.
1523  *
1524  * On successful completion, the object memory will be moved back to VRAM.
1525  *
1526  * This is needed to for special handling of pinned VRAM object during
1527  * suspend-resume.
1528  *
1529  * Return: 0 on success. Negative error code on failure.
1530  */
1531 int xe_bo_restore_pinned(struct xe_bo *bo)
1532 {
1533 	struct ttm_operation_ctx ctx = {
1534 		.interruptible = false,
1535 		.gfp_retry_mayfail = false,
1536 	};
1537 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1538 	struct xe_bo *backup = bo->backup_obj;
1539 	bool unmap = false;
1540 	int ret;
1541 
1542 	if (!backup)
1543 		return 0;
1544 
1545 	xe_bo_lock(bo, false);
1546 
1547 	if (!xe_bo_is_pinned(backup)) {
1548 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1549 		if (ret)
1550 			goto out_unlock_bo;
1551 	}
1552 
1553 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1554 		struct xe_migrate *migrate;
1555 		struct dma_fence *fence;
1556 
1557 		if (bo->tile)
1558 			migrate = bo->tile->migrate;
1559 		else
1560 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1561 
1562 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1563 		if (ret)
1564 			goto out_unlock_bo;
1565 
1566 		fence = xe_migrate_copy(migrate, backup, bo,
1567 					backup->ttm.resource, bo->ttm.resource,
1568 					false);
1569 		if (IS_ERR(fence)) {
1570 			ret = PTR_ERR(fence);
1571 			goto out_unlock_bo;
1572 		}
1573 
1574 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1575 				   DMA_RESV_USAGE_KERNEL);
1576 		dma_fence_put(fence);
1577 	} else {
1578 		ret = xe_bo_vmap(backup);
1579 		if (ret)
1580 			goto out_unlock_bo;
1581 
1582 		if (iosys_map_is_null(&bo->vmap)) {
1583 			ret = xe_bo_vmap(bo);
1584 			if (ret)
1585 				goto out_backup;
1586 			unmap = true;
1587 		}
1588 
1589 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1590 				 xe_bo_size(bo));
1591 	}
1592 
1593 	bo->backup_obj = NULL;
1594 
1595 out_backup:
1596 	xe_bo_vunmap(backup);
1597 	if (!bo->backup_obj) {
1598 		if (xe_bo_is_pinned(backup))
1599 			ttm_bo_unpin(&backup->ttm);
1600 		xe_bo_put(backup);
1601 	}
1602 out_unlock_bo:
1603 	if (unmap)
1604 		xe_bo_vunmap(bo);
1605 	xe_bo_unlock(bo);
1606 	return ret;
1607 }
1608 
1609 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1610 {
1611 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1612 	struct ttm_tt *tt = ttm_bo->ttm;
1613 
1614 	if (tt) {
1615 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1616 
1617 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1618 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1619 						 ttm_bo->sg,
1620 						 DMA_BIDIRECTIONAL);
1621 			ttm_bo->sg = NULL;
1622 			xe_tt->sg = NULL;
1623 		} else if (xe_tt->sg) {
1624 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1625 					  xe_tt->sg,
1626 					  DMA_BIDIRECTIONAL, 0);
1627 			sg_free_table(xe_tt->sg);
1628 			xe_tt->sg = NULL;
1629 		}
1630 	}
1631 
1632 	return 0;
1633 }
1634 
1635 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1636 				       unsigned long page_offset)
1637 {
1638 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1639 	struct xe_res_cursor cursor;
1640 	struct xe_vram_region *vram;
1641 
1642 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1643 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1644 
1645 	vram = res_to_mem_region(ttm_bo->resource);
1646 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1647 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1648 }
1649 
1650 static void __xe_bo_vunmap(struct xe_bo *bo);
1651 
1652 /*
1653  * TODO: Move this function to TTM so we don't rely on how TTM does its
1654  * locking, thereby abusing TTM internals.
1655  */
1656 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1657 {
1658 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1659 	bool locked;
1660 
1661 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1662 
1663 	/*
1664 	 * We can typically only race with TTM trylocking under the
1665 	 * lru_lock, which will immediately be unlocked again since
1666 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1667 	 * always succeed here, as long as we hold the lru lock.
1668 	 */
1669 	spin_lock(&ttm_bo->bdev->lru_lock);
1670 	locked = dma_resv_trylock(&ttm_bo->base._resv);
1671 	spin_unlock(&ttm_bo->bdev->lru_lock);
1672 	xe_assert(xe, locked);
1673 
1674 	return locked;
1675 }
1676 
1677 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1678 {
1679 	struct dma_resv_iter cursor;
1680 	struct dma_fence *fence;
1681 	struct dma_fence *replacement = NULL;
1682 	struct xe_bo *bo;
1683 
1684 	if (!xe_bo_is_xe_bo(ttm_bo))
1685 		return;
1686 
1687 	bo = ttm_to_xe_bo(ttm_bo);
1688 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1689 
1690 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1691 		return;
1692 
1693 	/*
1694 	 * Scrub the preempt fences if any. The unbind fence is already
1695 	 * attached to the resv.
1696 	 * TODO: Don't do this for external bos once we scrub them after
1697 	 * unbind.
1698 	 */
1699 	dma_resv_for_each_fence(&cursor, &ttm_bo->base._resv,
1700 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1701 		if (xe_fence_is_xe_preempt(fence) &&
1702 		    !dma_fence_is_signaled(fence)) {
1703 			if (!replacement)
1704 				replacement = dma_fence_get_stub();
1705 
1706 			dma_resv_replace_fences(&ttm_bo->base._resv,
1707 						fence->context,
1708 						replacement,
1709 						DMA_RESV_USAGE_BOOKKEEP);
1710 		}
1711 	}
1712 	dma_fence_put(replacement);
1713 
1714 	dma_resv_unlock(&ttm_bo->base._resv);
1715 }
1716 
1717 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1718 {
1719 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1720 
1721 	if (!xe_bo_is_xe_bo(ttm_bo))
1722 		return;
1723 
1724 	if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
1725 		xe_sriov_vf_ccs_detach_bo(bo);
1726 
1727 	/*
1728 	 * Object is idle and about to be destroyed. Release the
1729 	 * dma-buf attachment.
1730 	 */
1731 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1732 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1733 						       struct xe_ttm_tt, ttm);
1734 
1735 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1736 					 DMA_BIDIRECTIONAL);
1737 		ttm_bo->sg = NULL;
1738 		xe_tt->sg = NULL;
1739 	}
1740 }
1741 
1742 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1743 {
1744 	struct ttm_operation_ctx ctx = {
1745 		.interruptible = false,
1746 		.gfp_retry_mayfail = false,
1747 	};
1748 
1749 	if (ttm_bo->ttm) {
1750 		struct xe_ttm_tt *xe_tt =
1751 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1752 
1753 		if (xe_tt->purgeable)
1754 			xe_ttm_bo_purge(ttm_bo, &ctx);
1755 	}
1756 }
1757 
1758 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1759 				unsigned long offset, void *buf, int len,
1760 				int write)
1761 {
1762 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1763 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1764 	struct iosys_map vmap;
1765 	struct xe_res_cursor cursor;
1766 	struct xe_vram_region *vram;
1767 	int bytes_left = len;
1768 	int err = 0;
1769 
1770 	xe_bo_assert_held(bo);
1771 	xe_device_assert_mem_access(xe);
1772 
1773 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1774 		return -EIO;
1775 
1776 	if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) {
1777 		struct xe_migrate *migrate =
1778 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1779 
1780 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1781 					       write);
1782 		goto out;
1783 	}
1784 
1785 	vram = res_to_mem_region(ttm_bo->resource);
1786 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1787 		     xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1788 
1789 	do {
1790 		unsigned long page_offset = (offset & ~PAGE_MASK);
1791 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1792 
1793 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1794 					  cursor.start);
1795 		if (write)
1796 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1797 		else
1798 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1799 
1800 		buf += byte_count;
1801 		offset += byte_count;
1802 		bytes_left -= byte_count;
1803 		if (bytes_left)
1804 			xe_res_next(&cursor, PAGE_SIZE);
1805 	} while (bytes_left);
1806 
1807 out:
1808 	return err ?: len;
1809 }
1810 
1811 const struct ttm_device_funcs xe_ttm_funcs = {
1812 	.ttm_tt_create = xe_ttm_tt_create,
1813 	.ttm_tt_populate = xe_ttm_tt_populate,
1814 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1815 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1816 	.evict_flags = xe_evict_flags,
1817 	.move = xe_bo_move,
1818 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1819 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1820 	.access_memory = xe_ttm_access_memory,
1821 	.release_notify = xe_ttm_bo_release_notify,
1822 	.eviction_valuable = xe_bo_eviction_valuable,
1823 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1824 	.swap_notify = xe_ttm_bo_swap_notify,
1825 };
1826 
1827 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1828 {
1829 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1830 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1831 	struct xe_tile *tile;
1832 	u8 id;
1833 
1834 	if (bo->ttm.base.import_attach)
1835 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1836 	drm_gem_object_release(&bo->ttm.base);
1837 
1838 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1839 
1840 	for_each_tile(tile, xe, id)
1841 		if (bo->ggtt_node[id])
1842 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1843 
1844 #ifdef CONFIG_PROC_FS
1845 	if (bo->client)
1846 		xe_drm_client_remove_bo(bo);
1847 #endif
1848 
1849 	if (bo->vm && xe_bo_is_user(bo))
1850 		xe_vm_put(bo->vm);
1851 
1852 	if (bo->parent_obj)
1853 		xe_bo_put(bo->parent_obj);
1854 
1855 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1856 	if (!list_empty(&bo->vram_userfault_link))
1857 		list_del(&bo->vram_userfault_link);
1858 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1859 
1860 	kfree(bo);
1861 }
1862 
1863 static void xe_gem_object_free(struct drm_gem_object *obj)
1864 {
1865 	/* Our BO reference counting scheme works as follows:
1866 	 *
1867 	 * The gem object kref is typically used throughout the driver,
1868 	 * and the gem object holds a ttm_buffer_object refcount, so
1869 	 * that when the last gem object reference is put, which is when
1870 	 * we end up in this function, we put also that ttm_buffer_object
1871 	 * refcount. Anything using gem interfaces is then no longer
1872 	 * allowed to access the object in a way that requires a gem
1873 	 * refcount, including locking the object.
1874 	 *
1875 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1876 	 * refcount directly if needed.
1877 	 */
1878 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1879 	ttm_bo_fini(container_of(obj, struct ttm_buffer_object, base));
1880 }
1881 
1882 static void xe_gem_object_close(struct drm_gem_object *obj,
1883 				struct drm_file *file_priv)
1884 {
1885 	struct xe_bo *bo = gem_to_xe_bo(obj);
1886 
1887 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1888 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1889 
1890 		xe_bo_lock(bo, false);
1891 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1892 		xe_bo_unlock(bo);
1893 	}
1894 }
1895 
1896 static bool should_migrate_to_smem(struct xe_bo *bo)
1897 {
1898 	/*
1899 	 * NOTE: The following atomic checks are platform-specific. For example,
1900 	 * if a device supports CXL atomics, these may not be necessary or
1901 	 * may behave differently.
1902 	 */
1903 
1904 	return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1905 	       bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1906 }
1907 
1908 static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
1909 {
1910 	long lerr;
1911 
1912 	if (ctx->no_wait_gpu)
1913 		return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
1914 			0 : -EBUSY;
1915 
1916 	lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
1917 				     ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
1918 	if (lerr < 0)
1919 		return lerr;
1920 	if (lerr == 0)
1921 		return -EBUSY;
1922 
1923 	return 0;
1924 }
1925 
1926 /* Populate the bo if swapped out, or migrate if the access mode requires that. */
1927 static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
1928 			       struct drm_exec *exec)
1929 {
1930 	struct ttm_buffer_object *tbo = &bo->ttm;
1931 	int err = 0;
1932 
1933 	if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
1934 		err = xe_bo_wait_usage_kernel(bo, ctx);
1935 		if (!err)
1936 			err = ttm_bo_populate(&bo->ttm, ctx);
1937 	} else if (should_migrate_to_smem(bo)) {
1938 		xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
1939 		err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
1940 	}
1941 
1942 	return err;
1943 }
1944 
1945 /* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
1946 static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
1947 {
1948 	vm_fault_t ret;
1949 
1950 	trace_xe_bo_cpu_fault(bo);
1951 
1952 	ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1953 				       TTM_BO_VM_NUM_PREFAULT);
1954 	/*
1955 	 * When TTM is actually called to insert PTEs, ensure no blocking conditions
1956 	 * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
1957 	 */
1958 	xe_assert(xe, ret != VM_FAULT_RETRY);
1959 
1960 	if (ret == VM_FAULT_NOPAGE &&
1961 	    mem_type_is_vram(bo->ttm.resource->mem_type)) {
1962 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1963 		if (list_empty(&bo->vram_userfault_link))
1964 			list_add(&bo->vram_userfault_link,
1965 				 &xe->mem_access.vram_userfault.list);
1966 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1967 	}
1968 
1969 	return ret;
1970 }
1971 
1972 static vm_fault_t xe_err_to_fault_t(int err)
1973 {
1974 	switch (err) {
1975 	case 0:
1976 	case -EINTR:
1977 	case -ERESTARTSYS:
1978 	case -EAGAIN:
1979 		return VM_FAULT_NOPAGE;
1980 	case -ENOMEM:
1981 	case -ENOSPC:
1982 		return VM_FAULT_OOM;
1983 	default:
1984 		break;
1985 	}
1986 	return VM_FAULT_SIGBUS;
1987 }
1988 
1989 static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
1990 {
1991 	dma_resv_assert_held(tbo->base.resv);
1992 
1993 	return tbo->ttm &&
1994 		(tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
1995 		TTM_TT_FLAG_EXTERNAL;
1996 }
1997 
1998 static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
1999 					   struct xe_bo *bo, bool needs_rpm)
2000 {
2001 	struct ttm_buffer_object *tbo = &bo->ttm;
2002 	vm_fault_t ret = VM_FAULT_RETRY;
2003 	struct xe_validation_ctx ctx;
2004 	struct ttm_operation_ctx tctx = {
2005 		.interruptible = true,
2006 		.no_wait_gpu = true,
2007 		.gfp_retry_mayfail = true,
2008 
2009 	};
2010 	int err;
2011 
2012 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
2013 		return VM_FAULT_RETRY;
2014 
2015 	err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
2016 				     (struct xe_val_flags) {
2017 					     .interruptible = true,
2018 					     .no_block = true
2019 				     });
2020 	if (err)
2021 		goto out_pm;
2022 
2023 	if (!dma_resv_trylock(tbo->base.resv))
2024 		goto out_validation;
2025 
2026 	/*
2027 	 * Reject CPU faults to purgeable BOs. DONTNEED BOs can be purged
2028 	 * at any time, and purged BOs have no backing store. Either case
2029 	 * is undefined behavior for CPU access.
2030 	 */
2031 	if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) {
2032 		ret = VM_FAULT_SIGBUS;
2033 		goto out_unlock;
2034 	}
2035 
2036 	if (xe_ttm_bo_is_imported(tbo)) {
2037 		ret = VM_FAULT_SIGBUS;
2038 		drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
2039 		goto out_unlock;
2040 	}
2041 
2042 	err = xe_bo_fault_migrate(bo, &tctx, NULL);
2043 	if (err) {
2044 		/* Return VM_FAULT_RETRY on these errors. */
2045 		if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
2046 			ret = xe_err_to_fault_t(err);
2047 		goto out_unlock;
2048 	}
2049 
2050 	if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
2051 		ret = __xe_bo_cpu_fault(vmf, xe, bo);
2052 
2053 out_unlock:
2054 	dma_resv_unlock(tbo->base.resv);
2055 out_validation:
2056 	xe_validation_ctx_fini(&ctx);
2057 out_pm:
2058 	if (needs_rpm)
2059 		xe_pm_runtime_put(xe);
2060 
2061 	return ret;
2062 }
2063 
2064 static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
2065 {
2066 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
2067 	struct drm_device *ddev = tbo->base.dev;
2068 	struct xe_device *xe = to_xe_device(ddev);
2069 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
2070 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
2071 	bool retry_after_wait = false;
2072 	struct xe_validation_ctx ctx;
2073 	struct drm_exec exec;
2074 	vm_fault_t ret;
2075 	int err = 0;
2076 	int idx;
2077 
2078 	if (xe_device_wedged(xe) || !drm_dev_enter(&xe->drm, &idx))
2079 		return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
2080 
2081 	ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
2082 	if (ret != VM_FAULT_RETRY)
2083 		goto out;
2084 
2085 	if (fault_flag_allow_retry_first(vmf->flags)) {
2086 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2087 			goto out;
2088 		retry_after_wait = true;
2089 		xe_bo_get(bo);
2090 		mmap_read_unlock(vmf->vma->vm_mm);
2091 	} else {
2092 		ret = VM_FAULT_NOPAGE;
2093 	}
2094 
2095 	/*
2096 	 * The fastpath failed and we were not required to return and retry immediately.
2097 	 * We're now running in one of two modes:
2098 	 *
2099 	 * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
2100 	 * to resolve blocking waits. But we can't resolve the fault since the
2101 	 * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
2102 	 * should succeed. But it may fail since we drop the bo lock.
2103 	 *
2104 	 * 2) retry_after_wait == false: The fastpath failed, typically even after
2105 	 * a retry. Do whatever's necessary to resolve the fault.
2106 	 *
2107 	 * This construct is recommended to avoid excessive waits under the mmap_lock.
2108 	 */
2109 
2110 	if (needs_rpm)
2111 		xe_pm_runtime_get(xe);
2112 
2113 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
2114 			    err) {
2115 		struct ttm_operation_ctx tctx = {
2116 			.interruptible = true,
2117 			.no_wait_gpu = false,
2118 			.gfp_retry_mayfail = retry_after_wait,
2119 		};
2120 
2121 		err = drm_exec_lock_obj(&exec, &tbo->base);
2122 		drm_exec_retry_on_contention(&exec);
2123 		if (err)
2124 			break;
2125 
2126 		/*
2127 		 * Reject CPU faults to purgeable BOs. DONTNEED BOs can be
2128 		 * purged at any time, and purged BOs have no backing store.
2129 		 */
2130 		if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) {
2131 			err = -EFAULT;
2132 			break;
2133 		}
2134 
2135 		if (xe_ttm_bo_is_imported(tbo)) {
2136 			err = -EFAULT;
2137 			drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
2138 			break;
2139 		}
2140 
2141 		err = xe_bo_fault_migrate(bo, &tctx, &exec);
2142 		if (err) {
2143 			drm_exec_retry_on_contention(&exec);
2144 			xe_validation_retry_on_oom(&ctx, &err);
2145 			break;
2146 		}
2147 
2148 		err = xe_bo_wait_usage_kernel(bo, &tctx);
2149 		if (err)
2150 			break;
2151 
2152 		if (!retry_after_wait)
2153 			ret = __xe_bo_cpu_fault(vmf, xe, bo);
2154 	}
2155 	/* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
2156 	if (err && !retry_after_wait)
2157 		ret = xe_err_to_fault_t(err);
2158 
2159 	if (needs_rpm)
2160 		xe_pm_runtime_put(xe);
2161 
2162 	if (retry_after_wait)
2163 		xe_bo_put(bo);
2164 out:
2165 	drm_dev_exit(idx);
2166 
2167 	return ret;
2168 }
2169 
2170 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
2171 			   void *buf, int len, int write)
2172 {
2173 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
2174 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
2175 	struct xe_device *xe = xe_bo_device(bo);
2176 
2177 	guard(xe_pm_runtime)(xe);
2178 	return ttm_bo_vm_access(vma, addr, buf, len, write);
2179 }
2180 
2181 /**
2182  * xe_bo_read() - Read from an xe_bo
2183  * @bo: The buffer object to read from.
2184  * @offset: The byte offset to start reading from.
2185  * @dst: Location to store the read.
2186  * @size: Size in bytes for the read.
2187  *
2188  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
2189  *
2190  * Return: Zero on success, or negative error.
2191  */
2192 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
2193 {
2194 	int ret;
2195 
2196 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
2197 	if (ret >= 0 && ret != size)
2198 		ret = -EIO;
2199 	else if (ret == size)
2200 		ret = 0;
2201 
2202 	return ret;
2203 }
2204 
2205 static const struct vm_operations_struct xe_gem_vm_ops = {
2206 	.fault = xe_bo_cpu_fault,
2207 	.open = ttm_bo_vm_open,
2208 	.close = ttm_bo_vm_close,
2209 	.access = xe_bo_vm_access,
2210 };
2211 
2212 static int xe_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
2213 {
2214 	struct xe_bo *bo = gem_to_xe_bo(obj);
2215 	int err = 0;
2216 
2217 	/*
2218 	 * Reject mmap of purgeable BOs. DONTNEED BOs can be purged
2219 	 * at any time, making CPU access undefined behavior. Purged BOs have
2220 	 * no backing store and are permanently invalid.
2221 	 */
2222 	err = xe_bo_lock(bo, true);
2223 	if (err)
2224 		return err;
2225 
2226 	if (xe_bo_madv_is_dontneed(bo))
2227 		err = -EBUSY;
2228 	else if (xe_bo_is_purged(bo))
2229 		err = -EINVAL;
2230 	xe_bo_unlock(bo);
2231 	if (err)
2232 		return err;
2233 
2234 	return drm_gem_ttm_mmap(obj, vma);
2235 }
2236 
2237 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
2238 	.free = xe_gem_object_free,
2239 	.close = xe_gem_object_close,
2240 	.mmap = xe_gem_object_mmap,
2241 	.export = xe_gem_prime_export,
2242 	.vm_ops = &xe_gem_vm_ops,
2243 };
2244 
2245 /**
2246  * xe_bo_alloc - Allocate storage for a struct xe_bo
2247  *
2248  * This function is intended to allocate storage to be used for input
2249  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
2250  * created is needed before the call to __xe_bo_create_locked().
2251  * If __xe_bo_create_locked ends up never to be called, then the
2252  * storage allocated with this function needs to be freed using
2253  * xe_bo_free().
2254  *
2255  * Return: A pointer to an uninitialized struct xe_bo on success,
2256  * ERR_PTR(-ENOMEM) on error.
2257  */
2258 struct xe_bo *xe_bo_alloc(void)
2259 {
2260 	struct xe_bo *bo = kzalloc_obj(*bo);
2261 
2262 	if (!bo)
2263 		return ERR_PTR(-ENOMEM);
2264 
2265 	return bo;
2266 }
2267 
2268 /**
2269  * xe_bo_free - Free storage allocated using xe_bo_alloc()
2270  * @bo: The buffer object storage.
2271  *
2272  * Refer to xe_bo_alloc() documentation for valid use-cases.
2273  */
2274 void xe_bo_free(struct xe_bo *bo)
2275 {
2276 	kfree(bo);
2277 }
2278 
2279 /**
2280  * xe_bo_init_locked() - Initialize or create an xe_bo.
2281  * @xe: The xe device.
2282  * @bo: An already allocated buffer object or NULL
2283  * if the function should allocate a new one.
2284  * @tile: The tile to select for migration of this bo, and the tile used for
2285  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2286  * @resv: Pointer to a locked shared reservation object to use for this bo,
2287  * or NULL for the xe_bo to use its own.
2288  * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
2289  * @size: The storage size to use for the bo.
2290  * @cpu_caching: The cpu caching used for system memory backing store.
2291  * @type: The TTM buffer object type.
2292  * @flags: XE_BO_FLAG_ flags.
2293  * @exec: The drm_exec transaction to use for exhaustive eviction.
2294  *
2295  * Initialize or create an xe buffer object. On failure, any allocated buffer
2296  * object passed in @bo will have been unreferenced.
2297  *
2298  * Return: The buffer object on success. Negative error pointer on failure.
2299  */
2300 struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
2301 				struct xe_tile *tile, struct dma_resv *resv,
2302 				struct ttm_lru_bulk_move *bulk, size_t size,
2303 				u16 cpu_caching, enum ttm_bo_type type,
2304 				u32 flags, struct drm_exec *exec)
2305 {
2306 	struct ttm_operation_ctx ctx = {
2307 		.interruptible = true,
2308 		.no_wait_gpu = false,
2309 		.gfp_retry_mayfail = true,
2310 	};
2311 	struct ttm_placement *placement;
2312 	uint32_t alignment;
2313 	size_t aligned_size;
2314 	int err;
2315 
2316 	/* Only kernel objects should set GT */
2317 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
2318 
2319 	if (XE_WARN_ON(!size)) {
2320 		xe_bo_free(bo);
2321 		return ERR_PTR(-EINVAL);
2322 	}
2323 
2324 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
2325 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
2326 		return ERR_PTR(-EINVAL);
2327 
2328 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
2329 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
2330 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
2331 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
2332 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
2333 
2334 		aligned_size = ALIGN(size, align);
2335 		if (type != ttm_bo_type_device)
2336 			size = ALIGN(size, align);
2337 		flags |= XE_BO_FLAG_INTERNAL_64K;
2338 		alignment = align >> PAGE_SHIFT;
2339 	} else {
2340 		aligned_size = ALIGN(size, SZ_4K);
2341 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
2342 		alignment = SZ_4K >> PAGE_SHIFT;
2343 	}
2344 
2345 	if (type == ttm_bo_type_device && aligned_size != size)
2346 		return ERR_PTR(-EINVAL);
2347 
2348 	if (!bo) {
2349 		bo = xe_bo_alloc();
2350 		if (IS_ERR(bo))
2351 			return bo;
2352 	}
2353 
2354 	bo->ccs_cleared = false;
2355 	bo->tile = tile;
2356 	bo->flags = flags;
2357 	bo->cpu_caching = cpu_caching;
2358 	bo->ttm.base.funcs = &xe_gem_object_funcs;
2359 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
2360 	INIT_LIST_HEAD(&bo->pinned_link);
2361 #ifdef CONFIG_PROC_FS
2362 	INIT_LIST_HEAD(&bo->client_link);
2363 #endif
2364 	INIT_LIST_HEAD(&bo->vram_userfault_link);
2365 
2366 	/* Initialize purge advisory state */
2367 	bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED;
2368 
2369 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
2370 
2371 	if (resv) {
2372 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
2373 		ctx.resv = resv;
2374 	}
2375 
2376 	xe_validation_assert_exec(xe, exec, &bo->ttm.base);
2377 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
2378 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type);
2379 		if (WARN_ON(err)) {
2380 			xe_ttm_bo_destroy(&bo->ttm);
2381 			return ERR_PTR(err);
2382 		}
2383 	}
2384 
2385 	/* Defer populating type_sg bos */
2386 	placement = (type == ttm_bo_type_sg ||
2387 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
2388 		&bo->placement;
2389 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
2390 				   placement, alignment,
2391 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
2392 	if (err)
2393 		return ERR_PTR(err);
2394 
2395 	/*
2396 	 * The VRAM pages underneath are potentially still being accessed by the
2397 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
2398 	 * sure to add any corresponding move/clear fences into the objects
2399 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
2400 	 *
2401 	 * For KMD internal buffers we don't care about GPU clearing, however we
2402 	 * still need to handle async evictions, where the VRAM is still being
2403 	 * accessed by the GPU. Most internal callers are not expecting this,
2404 	 * since they are missing the required synchronisation before accessing
2405 	 * the memory. To keep things simple just sync wait any kernel fences
2406 	 * here, if the buffer is designated KMD internal.
2407 	 *
2408 	 * For normal userspace objects we should already have the required
2409 	 * pipelining or sync waiting elsewhere, since we already have to deal
2410 	 * with things like async GPU clearing.
2411 	 */
2412 	if (type == ttm_bo_type_kernel) {
2413 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
2414 						     DMA_RESV_USAGE_KERNEL,
2415 						     ctx.interruptible,
2416 						     MAX_SCHEDULE_TIMEOUT);
2417 
2418 		if (timeout < 0) {
2419 			if (!resv)
2420 				dma_resv_unlock(bo->ttm.base.resv);
2421 			xe_bo_put(bo);
2422 			return ERR_PTR(timeout);
2423 		}
2424 	}
2425 
2426 	bo->created = true;
2427 	if (bulk)
2428 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
2429 	else
2430 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2431 
2432 	return bo;
2433 }
2434 
2435 static int __xe_bo_fixed_placement(struct xe_device *xe,
2436 				   struct xe_bo *bo, enum ttm_bo_type type,
2437 				   u32 flags,
2438 				   u64 start, u64 end, u64 size)
2439 {
2440 	struct ttm_place *place = bo->placements;
2441 	u32 vram_flag, vram_stolen_flags;
2442 
2443 	/*
2444 	 * to allow fixed placement in GGTT of a VF, post-migration fixups would have to
2445 	 * include selecting a new fixed offset and shifting the page ranges for it
2446 	 */
2447 	xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT));
2448 
2449 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2450 		return -EINVAL;
2451 
2452 	vram_flag = flags & XE_BO_FLAG_VRAM_MASK;
2453 	vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag;
2454 
2455 	/* check if more than one VRAM/STOLEN flag is set */
2456 	if (hweight32(vram_stolen_flags) > 1)
2457 		return -EINVAL;
2458 
2459 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
2460 	place->fpfn = start >> PAGE_SHIFT;
2461 	place->lpfn = end >> PAGE_SHIFT;
2462 
2463 	if (flags & XE_BO_FLAG_STOLEN)
2464 		place->mem_type = XE_PL_STOLEN;
2465 	else
2466 		place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type);
2467 
2468 	bo->placement = (struct ttm_placement) {
2469 		.num_placement = 1,
2470 		.placement = place,
2471 	};
2472 
2473 	return 0;
2474 }
2475 
2476 static struct xe_bo *
2477 __xe_bo_create_locked(struct xe_device *xe,
2478 		      struct xe_tile *tile, struct xe_vm *vm,
2479 		      size_t size, u64 start, u64 end,
2480 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2481 		      u64 alignment, struct drm_exec *exec)
2482 {
2483 	struct xe_bo *bo = NULL;
2484 	int err;
2485 
2486 	if (vm)
2487 		xe_vm_assert_held(vm);
2488 
2489 	if (start || end != ~0ULL) {
2490 		bo = xe_bo_alloc();
2491 		if (IS_ERR(bo))
2492 			return bo;
2493 
2494 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2495 		err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size);
2496 		if (err) {
2497 			xe_bo_free(bo);
2498 			return ERR_PTR(err);
2499 		}
2500 	}
2501 
2502 	bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2503 			       vm && !xe_vm_in_fault_mode(vm) &&
2504 			       flags & XE_BO_FLAG_USER ?
2505 			       &vm->lru_bulk_move : NULL, size,
2506 			       cpu_caching, type, flags, exec);
2507 	if (IS_ERR(bo))
2508 		return bo;
2509 
2510 	bo->min_align = alignment;
2511 
2512 	/*
2513 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2514 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2515 	 * will keep a reference to the vm, and avoid circular references
2516 	 * by having all the vm's bo refereferences released at vm close
2517 	 * time.
2518 	 */
2519 	if (vm && xe_bo_is_user(bo))
2520 		xe_vm_get(vm);
2521 	bo->vm = vm;
2522 
2523 	if (bo->flags & XE_BO_FLAG_GGTT) {
2524 		struct xe_tile *t;
2525 		u8 id;
2526 
2527 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2528 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2529 				tile = xe_device_get_root_tile(xe);
2530 
2531 			xe_assert(xe, tile);
2532 		}
2533 
2534 		for_each_tile(t, xe, id) {
2535 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2536 				continue;
2537 
2538 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2539 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2540 							   start + xe_bo_size(bo), U64_MAX,
2541 							   exec);
2542 			} else {
2543 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
2544 			}
2545 			if (err)
2546 				goto err_unlock_put_bo;
2547 		}
2548 	}
2549 
2550 	trace_xe_bo_create(bo);
2551 	return bo;
2552 
2553 err_unlock_put_bo:
2554 	__xe_bo_unset_bulk_move(bo);
2555 	xe_bo_unlock_vm_held(bo);
2556 	xe_bo_put(bo);
2557 	return ERR_PTR(err);
2558 }
2559 
2560 /**
2561  * xe_bo_create_locked() - Create a BO
2562  * @xe: The xe device.
2563  * @tile: The tile to select for migration of this bo, and the tile used for
2564  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2565  * @vm: The local vm or NULL for external objects.
2566  * @size: The storage size to use for the bo.
2567  * @type: The TTM buffer object type.
2568  * @flags: XE_BO_FLAG_ flags.
2569  * @exec: The drm_exec transaction to use for exhaustive eviction.
2570  *
2571  * Create a locked xe BO with no range- nor alignment restrictions.
2572  *
2573  * Return: The buffer object on success. Negative error pointer on failure.
2574  */
2575 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2576 				  struct xe_vm *vm, size_t size,
2577 				  enum ttm_bo_type type, u32 flags,
2578 				  struct drm_exec *exec)
2579 {
2580 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2581 				     flags, 0, exec);
2582 }
2583 
2584 static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
2585 				       size_t size, u16 cpu_caching,
2586 				       enum ttm_bo_type type, u32 flags,
2587 				       u64 alignment, bool intr)
2588 {
2589 	struct xe_validation_ctx ctx;
2590 	struct drm_exec exec;
2591 	struct xe_bo *bo;
2592 	int ret = 0;
2593 
2594 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2595 			    ret) {
2596 		bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
2597 					   cpu_caching, type, flags, alignment, &exec);
2598 		drm_exec_retry_on_contention(&exec);
2599 		if (IS_ERR(bo)) {
2600 			ret = PTR_ERR(bo);
2601 			xe_validation_retry_on_oom(&ctx, &ret);
2602 		} else {
2603 			xe_bo_unlock(bo);
2604 		}
2605 	}
2606 
2607 	return ret ? ERR_PTR(ret) : bo;
2608 }
2609 
2610 /**
2611  * xe_bo_create_user() - Create a user BO
2612  * @xe: The xe device.
2613  * @vm: The local vm or NULL for external objects.
2614  * @size: The storage size to use for the bo.
2615  * @cpu_caching: The caching mode to be used for system backing store.
2616  * @flags: XE_BO_FLAG_ flags.
2617  * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
2618  * if such a transaction should be initiated by the call.
2619  *
2620  * Create a bo on behalf of user-space.
2621  *
2622  * Return: The buffer object on success. Negative error pointer on failure.
2623  */
2624 struct xe_bo *xe_bo_create_user(struct xe_device *xe,
2625 				struct xe_vm *vm, size_t size,
2626 				u16 cpu_caching,
2627 				u32 flags, struct drm_exec *exec)
2628 {
2629 	struct xe_bo *bo;
2630 
2631 	flags |= XE_BO_FLAG_USER;
2632 
2633 	if (vm || exec) {
2634 		xe_assert(xe, exec);
2635 		bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
2636 					   cpu_caching, ttm_bo_type_device,
2637 					   flags, 0, exec);
2638 		if (!IS_ERR(bo))
2639 			xe_bo_unlock_vm_held(bo);
2640 	} else {
2641 		bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
2642 				       ttm_bo_type_device, flags, 0, true);
2643 	}
2644 
2645 	return bo;
2646 }
2647 
2648 /**
2649  * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
2650  * @xe: The xe device.
2651  * @tile: The tile to select for migration of this bo, and the tile used for
2652  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2653  * @size: The storage size to use for the bo.
2654  * @start: Start of fixed VRAM range or 0.
2655  * @end: End of fixed VRAM range or ~0ULL.
2656  * @type: The TTM buffer object type.
2657  * @flags: XE_BO_FLAG_ flags.
2658  *
2659  * Create an Xe BO with range- and options. If @start and @end indicate
2660  * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
2661  * only.
2662  *
2663  * Return: The buffer object on success. Negative error pointer on failure.
2664  */
2665 struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
2666 					  size_t size, u64 start, u64 end,
2667 					  enum ttm_bo_type type, u32 flags)
2668 {
2669 	struct xe_validation_ctx ctx;
2670 	struct drm_exec exec;
2671 	struct xe_bo *bo;
2672 	int err = 0;
2673 
2674 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
2675 		bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
2676 					   0, type, flags, 0, &exec);
2677 		if (IS_ERR(bo)) {
2678 			drm_exec_retry_on_contention(&exec);
2679 			err = PTR_ERR(bo);
2680 			xe_validation_retry_on_oom(&ctx, &err);
2681 			break;
2682 		}
2683 
2684 		err = xe_bo_pin(bo, &exec);
2685 		xe_bo_unlock(bo);
2686 		if (err) {
2687 			xe_bo_put(bo);
2688 			drm_exec_retry_on_contention(&exec);
2689 			xe_validation_retry_on_oom(&ctx, &err);
2690 			break;
2691 		}
2692 	}
2693 
2694 	return err ? ERR_PTR(err) : bo;
2695 }
2696 
2697 static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2698 						     struct xe_tile *tile,
2699 						     struct xe_vm *vm,
2700 						     size_t size, u64 offset,
2701 						     enum ttm_bo_type type, u32 flags,
2702 						     u64 alignment, struct drm_exec *exec)
2703 {
2704 	struct xe_bo *bo;
2705 	int err;
2706 	u64 start = offset == ~0ull ? 0 : offset;
2707 	u64 end = offset == ~0ull ? ~0ull : start + size;
2708 
2709 	if (flags & XE_BO_FLAG_STOLEN &&
2710 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2711 		flags |= XE_BO_FLAG_GGTT;
2712 
2713 	bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2714 				   flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2715 				   alignment, exec);
2716 	if (IS_ERR(bo))
2717 		return bo;
2718 
2719 	err = xe_bo_pin(bo, exec);
2720 	if (err)
2721 		goto err_put;
2722 
2723 	err = xe_bo_vmap(bo);
2724 	if (err)
2725 		goto err_unpin;
2726 
2727 	xe_bo_unlock_vm_held(bo);
2728 
2729 	return bo;
2730 
2731 err_unpin:
2732 	xe_bo_unpin(bo);
2733 err_put:
2734 	xe_bo_unlock_vm_held(bo);
2735 	xe_bo_put(bo);
2736 	return ERR_PTR(err);
2737 }
2738 
2739 /**
2740  * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
2741  * @xe: The xe device.
2742  * @tile: The tile to select for migration of this bo, and the tile used for
2743  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2744  * @size: The storage size to use for the bo.
2745  * @offset: Optional VRAM offset or %~0ull for don't care.
2746  * @type: The TTM buffer object type.
2747  * @flags: XE_BO_FLAG_ flags.
2748  * @alignment: GGTT alignment.
2749  * @intr: Whether to execute any waits for backing store interruptible.
2750  *
2751  * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
2752  * options. The bo will be external and not associated with a VM.
2753  *
2754  * Return: The buffer object on success. Negative error pointer on failure.
2755  * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2756  * to true on entry.
2757  */
2758 struct xe_bo *
2759 xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
2760 			     size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
2761 			     u64 alignment, bool intr)
2762 {
2763 	struct xe_validation_ctx ctx;
2764 	struct drm_exec exec;
2765 	struct xe_bo *bo;
2766 	int ret = 0;
2767 
2768 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2769 			    ret) {
2770 		bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
2771 						     type, flags, alignment, &exec);
2772 		if (IS_ERR(bo)) {
2773 			drm_exec_retry_on_contention(&exec);
2774 			ret = PTR_ERR(bo);
2775 			xe_validation_retry_on_oom(&ctx, &ret);
2776 		}
2777 	}
2778 
2779 	return ret ? ERR_PTR(ret) : bo;
2780 }
2781 
2782 /**
2783  * xe_bo_create_pin_map() - Create pinned and mapped bo
2784  * @xe: The xe device.
2785  * @tile: The tile to select for migration of this bo, and the tile used for
2786  * @vm: The vm to associate the buffer object with. The vm's resv must be locked
2787  * with the transaction represented by @exec.
2788  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2789  * @size: The storage size to use for the bo.
2790  * @type: The TTM buffer object type.
2791  * @flags: XE_BO_FLAG_ flags.
2792  * @exec: The drm_exec transaction to use for exhaustive eviction, and
2793  * previously used for locking @vm's resv.
2794  *
2795  * Create a pinned and mapped bo. The bo will be external and not associated
2796  * with a VM.
2797  *
2798  * Return: The buffer object on success. Negative error pointer on failure.
2799  * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
2800  * configured for interruptible locking.
2801  */
2802 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2803 				   struct xe_vm *vm, size_t size,
2804 				   enum ttm_bo_type type, u32 flags,
2805 				   struct drm_exec *exec)
2806 {
2807 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
2808 					       0, exec);
2809 }
2810 
2811 /**
2812  * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
2813  * @xe: The xe device.
2814  * @tile: The tile to select for migration of this bo, and the tile used for
2815  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2816  * @size: The storage size to use for the bo.
2817  * @type: The TTM buffer object type.
2818  * @flags: XE_BO_FLAG_ flags.
2819  * @intr: Whether to execute any waits for backing store interruptible.
2820  *
2821  * Create a pinned and mapped bo. The bo will be external and not associated
2822  * with a VM.
2823  *
2824  * Return: The buffer object on success. Negative error pointer on failure.
2825  * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2826  * to true on entry.
2827  */
2828 struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
2829 					size_t size, enum ttm_bo_type type, u32 flags,
2830 					bool intr)
2831 {
2832 	return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
2833 }
2834 
2835 static void __xe_bo_unpin_map_no_vm(void *arg)
2836 {
2837 	xe_bo_unpin_map_no_vm(arg);
2838 }
2839 
2840 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2841 					   size_t size, u32 flags)
2842 {
2843 	struct xe_bo *bo;
2844 	int ret;
2845 
2846 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2847 	bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
2848 	if (IS_ERR(bo))
2849 		return bo;
2850 
2851 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2852 	if (ret)
2853 		return ERR_PTR(ret);
2854 
2855 	return bo;
2856 }
2857 
2858 void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
2859 {
2860 	devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2861 }
2862 
2863 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2864 					     const void *data, size_t size, u32 flags)
2865 {
2866 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2867 
2868 	if (IS_ERR(bo))
2869 		return bo;
2870 
2871 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2872 
2873 	return bo;
2874 }
2875 
2876 /**
2877  * xe_managed_bo_reinit_in_vram
2878  * @xe: xe device
2879  * @tile: Tile where the new buffer will be created
2880  * @src: Managed buffer object allocated in system memory
2881  *
2882  * Replace a managed src buffer object allocated in system memory with a new
2883  * one allocated in vram, copying the data between them.
2884  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2885  * is responsible for making sure that any old references to it are updated.
2886  *
2887  * Returns 0 for success, negative error code otherwise.
2888  */
2889 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2890 {
2891 	struct xe_bo *bo;
2892 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2893 
2894 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2895 				      XE_BO_FLAG_PINNED_NORESTORE);
2896 
2897 	xe_assert(xe, IS_DGFX(xe));
2898 	xe_assert(xe, !(*src)->vmap.is_iomem);
2899 
2900 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2901 					    xe_bo_size(*src), dst_flags);
2902 	if (IS_ERR(bo))
2903 		return PTR_ERR(bo);
2904 
2905 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2906 	*src = bo;
2907 
2908 	return 0;
2909 }
2910 
2911 /*
2912  * XXX: This is in the VM bind data path, likely should calculate this once and
2913  * store, with a recalculation if the BO is moved.
2914  */
2915 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2916 {
2917 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2918 
2919 	switch (res->mem_type) {
2920 	case XE_PL_STOLEN:
2921 		return xe_ttm_stolen_gpu_offset(xe);
2922 	case XE_PL_TT:
2923 	case XE_PL_SYSTEM:
2924 		return 0;
2925 	default:
2926 		return res_to_mem_region(res)->dpa_base;
2927 	}
2928 	return 0;
2929 }
2930 
2931 /**
2932  * xe_bo_pin_external - pin an external BO
2933  * @bo: buffer object to be pinned
2934  * @in_place: Pin in current placement, don't attempt to migrate.
2935  * @exec: The drm_exec transaction to use for exhaustive eviction.
2936  *
2937  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2938  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2939  * asserts and code to ensure evict / restore on suspend / resume.
2940  *
2941  * Returns 0 for success, negative error code otherwise.
2942  */
2943 int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
2944 {
2945 	struct xe_device *xe = xe_bo_device(bo);
2946 	int err;
2947 
2948 	xe_assert(xe, !bo->vm);
2949 	xe_assert(xe, xe_bo_is_user(bo));
2950 
2951 	if (!xe_bo_is_pinned(bo)) {
2952 		if (!in_place) {
2953 			err = xe_bo_validate(bo, NULL, false, exec);
2954 			if (err)
2955 				return err;
2956 		}
2957 
2958 		spin_lock(&xe->pinned.lock);
2959 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2960 		spin_unlock(&xe->pinned.lock);
2961 	}
2962 
2963 	ttm_bo_pin(&bo->ttm);
2964 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2965 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2966 
2967 	/*
2968 	 * FIXME: If we always use the reserve / unreserve functions for locking
2969 	 * we do not need this.
2970 	 */
2971 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2972 
2973 	return 0;
2974 }
2975 
2976 /**
2977  * xe_bo_pin() - Pin a kernel bo after potentially migrating it
2978  * @bo: The kernel bo to pin.
2979  * @exec: The drm_exec transaction to use for exhaustive eviction.
2980  *
2981  * Attempts to migrate a bo to @bo->placement. If that succeeds,
2982  * pins the bo.
2983  *
2984  * Return: %0 on success, negative error code on migration failure.
2985  */
2986 int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
2987 {
2988 	struct ttm_place *place = &bo->placements[0];
2989 	struct xe_device *xe = xe_bo_device(bo);
2990 	int err;
2991 
2992 	/* We currently don't expect user BO to be pinned */
2993 	xe_assert(xe, !xe_bo_is_user(bo));
2994 
2995 	/* Pinned object must be in GGTT or have pinned flag */
2996 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2997 				   XE_BO_FLAG_GGTT));
2998 
2999 	/*
3000 	 * No reason we can't support pinning imported dma-bufs we just don't
3001 	 * expect to pin an imported dma-buf.
3002 	 */
3003 	xe_assert(xe, !bo->ttm.base.import_attach);
3004 
3005 	/* We only expect at most 1 pin */
3006 	xe_assert(xe, !xe_bo_is_pinned(bo));
3007 
3008 	err = xe_bo_validate(bo, NULL, false, exec);
3009 	if (err)
3010 		return err;
3011 
3012 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
3013 		spin_lock(&xe->pinned.lock);
3014 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
3015 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
3016 		else
3017 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
3018 		spin_unlock(&xe->pinned.lock);
3019 	}
3020 
3021 	ttm_bo_pin(&bo->ttm);
3022 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
3023 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
3024 
3025 	/*
3026 	 * FIXME: If we always use the reserve / unreserve functions for locking
3027 	 * we do not need this.
3028 	 */
3029 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
3030 
3031 	return 0;
3032 }
3033 
3034 /**
3035  * xe_bo_unpin_external - unpin an external BO
3036  * @bo: buffer object to be unpinned
3037  *
3038  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
3039  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
3040  * asserts and code to ensure evict / restore on suspend / resume.
3041  *
3042  * Returns 0 for success, negative error code otherwise.
3043  */
3044 void xe_bo_unpin_external(struct xe_bo *bo)
3045 {
3046 	struct xe_device *xe = xe_bo_device(bo);
3047 
3048 	xe_assert(xe, !bo->vm);
3049 	xe_assert(xe, xe_bo_is_pinned(bo));
3050 	xe_assert(xe, xe_bo_is_user(bo));
3051 
3052 	spin_lock(&xe->pinned.lock);
3053 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
3054 		list_del_init(&bo->pinned_link);
3055 	spin_unlock(&xe->pinned.lock);
3056 
3057 	ttm_bo_unpin(&bo->ttm);
3058 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
3059 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
3060 
3061 	/*
3062 	 * FIXME: If we always use the reserve / unreserve functions for locking
3063 	 * we do not need this.
3064 	 */
3065 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
3066 }
3067 
3068 void xe_bo_unpin(struct xe_bo *bo)
3069 {
3070 	struct ttm_place *place = &bo->placements[0];
3071 	struct xe_device *xe = xe_bo_device(bo);
3072 
3073 	xe_assert(xe, !bo->ttm.base.import_attach);
3074 	xe_assert(xe, xe_bo_is_pinned(bo));
3075 
3076 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
3077 		spin_lock(&xe->pinned.lock);
3078 		xe_assert(xe, !list_empty(&bo->pinned_link));
3079 		list_del_init(&bo->pinned_link);
3080 		spin_unlock(&xe->pinned.lock);
3081 
3082 		if (bo->backup_obj) {
3083 			if (xe_bo_is_pinned(bo->backup_obj))
3084 				ttm_bo_unpin(&bo->backup_obj->ttm);
3085 			xe_bo_put(bo->backup_obj);
3086 			bo->backup_obj = NULL;
3087 		}
3088 	}
3089 	ttm_bo_unpin(&bo->ttm);
3090 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
3091 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
3092 }
3093 
3094 /**
3095  * xe_bo_validate() - Make sure the bo is in an allowed placement
3096  * @bo: The bo,
3097  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
3098  *      NULL. Used together with @allow_res_evict.
3099  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
3100  *                   reservation object.
3101  * @exec: The drm_exec transaction to use for exhaustive eviction.
3102  *
3103  * Make sure the bo is in allowed placement, migrating it if necessary. If
3104  * needed, other bos will be evicted. If bos selected for eviction shares
3105  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
3106  * set to true, otherwise they will be bypassed.
3107  *
3108  * Return: 0 on success, negative error code on failure. May return
3109  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
3110  */
3111 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
3112 		   struct drm_exec *exec)
3113 {
3114 	struct ttm_operation_ctx ctx = {
3115 		.interruptible = true,
3116 		.no_wait_gpu = false,
3117 		.gfp_retry_mayfail = true,
3118 	};
3119 	int ret;
3120 
3121 	if (xe_bo_is_pinned(bo))
3122 		return 0;
3123 
3124 	if (vm) {
3125 		lockdep_assert_held(&vm->lock);
3126 		xe_vm_assert_held(vm);
3127 
3128 		ctx.allow_res_evict = allow_res_evict;
3129 		ctx.resv = xe_vm_resv(vm);
3130 	}
3131 
3132 	xe_vm_set_validating(vm, allow_res_evict);
3133 	trace_xe_bo_validate(bo);
3134 	xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
3135 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
3136 	xe_vm_clear_validating(vm, allow_res_evict);
3137 
3138 	return ret;
3139 }
3140 
3141 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
3142 {
3143 	if (bo->destroy == &xe_ttm_bo_destroy)
3144 		return true;
3145 
3146 	return false;
3147 }
3148 
3149 /*
3150  * Resolve a BO address. There is no assert to check if the proper lock is held
3151  * so it should only be used in cases where it is not fatal to get the wrong
3152  * address, such as printing debug information, but not in cases where memory is
3153  * written based on this result.
3154  */
3155 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
3156 {
3157 	struct xe_device *xe = xe_bo_device(bo);
3158 	struct xe_res_cursor cur;
3159 	u64 page;
3160 
3161 	xe_assert(xe, page_size <= PAGE_SIZE);
3162 	page = offset >> PAGE_SHIFT;
3163 	offset &= (PAGE_SIZE - 1);
3164 
3165 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
3166 		xe_assert(xe, bo->ttm.ttm);
3167 
3168 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
3169 				page_size, &cur);
3170 		return xe_res_dma(&cur) + offset;
3171 	} else {
3172 		struct xe_res_cursor cur;
3173 
3174 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
3175 			     page_size, &cur);
3176 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
3177 	}
3178 }
3179 
3180 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
3181 {
3182 	if (!READ_ONCE(bo->ttm.pin_count))
3183 		xe_bo_assert_held(bo);
3184 	return __xe_bo_addr(bo, offset, page_size);
3185 }
3186 
3187 int xe_bo_vmap(struct xe_bo *bo)
3188 {
3189 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3190 	void *virtual;
3191 	bool is_iomem;
3192 	int ret;
3193 
3194 	xe_bo_assert_held(bo);
3195 
3196 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
3197 			!force_contiguous(bo->flags)))
3198 		return -EINVAL;
3199 
3200 	if (!iosys_map_is_null(&bo->vmap))
3201 		return 0;
3202 
3203 	/*
3204 	 * We use this more or less deprecated interface for now since
3205 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
3206 	 * single page bos, which is done here.
3207 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
3208 	 * to use struct iosys_map.
3209 	 */
3210 	ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
3211 	if (ret)
3212 		return ret;
3213 
3214 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
3215 	if (is_iomem)
3216 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
3217 	else
3218 		iosys_map_set_vaddr(&bo->vmap, virtual);
3219 
3220 	return 0;
3221 }
3222 
3223 static void __xe_bo_vunmap(struct xe_bo *bo)
3224 {
3225 	if (!iosys_map_is_null(&bo->vmap)) {
3226 		iosys_map_clear(&bo->vmap);
3227 		ttm_bo_kunmap(&bo->kmap);
3228 	}
3229 }
3230 
3231 void xe_bo_vunmap(struct xe_bo *bo)
3232 {
3233 	xe_bo_assert_held(bo);
3234 	__xe_bo_vunmap(bo);
3235 }
3236 
3237 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
3238 {
3239 	if (value == DRM_XE_PXP_TYPE_NONE)
3240 		return 0;
3241 
3242 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
3243 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
3244 		return -EINVAL;
3245 
3246 	return xe_pxp_key_assign(xe->pxp, bo);
3247 }
3248 
3249 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
3250 					     struct xe_bo *bo,
3251 					     u64 value);
3252 
3253 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
3254 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
3255 };
3256 
3257 static int gem_create_user_ext_set_property(struct xe_device *xe,
3258 					    struct xe_bo *bo,
3259 					    u64 extension)
3260 {
3261 	u64 __user *address = u64_to_user_ptr(extension);
3262 	struct drm_xe_ext_set_property ext;
3263 	int err;
3264 	u32 idx;
3265 
3266 	err = copy_from_user(&ext, address, sizeof(ext));
3267 	if (XE_IOCTL_DBG(xe, err))
3268 		return -EFAULT;
3269 
3270 	if (XE_IOCTL_DBG(xe, ext.property >=
3271 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
3272 	    XE_IOCTL_DBG(xe, ext.pad) ||
3273 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
3274 		return -EINVAL;
3275 
3276 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
3277 	if (!gem_create_set_property_funcs[idx])
3278 		return -EINVAL;
3279 
3280 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
3281 }
3282 
3283 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
3284 					       struct xe_bo *bo,
3285 					       u64 extension);
3286 
3287 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
3288 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
3289 };
3290 
3291 #define MAX_USER_EXTENSIONS	16
3292 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
3293 				      u64 extensions, int ext_number)
3294 {
3295 	u64 __user *address = u64_to_user_ptr(extensions);
3296 	struct drm_xe_user_extension ext;
3297 	int err;
3298 	u32 idx;
3299 
3300 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
3301 		return -E2BIG;
3302 
3303 	err = copy_from_user(&ext, address, sizeof(ext));
3304 	if (XE_IOCTL_DBG(xe, err))
3305 		return -EFAULT;
3306 
3307 	if (XE_IOCTL_DBG(xe, ext.pad) ||
3308 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
3309 		return -EINVAL;
3310 
3311 	idx = array_index_nospec(ext.name,
3312 				 ARRAY_SIZE(gem_create_user_extension_funcs));
3313 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
3314 	if (XE_IOCTL_DBG(xe, err))
3315 		return err;
3316 
3317 	if (ext.next_extension)
3318 		return gem_create_user_extensions(xe, bo, ext.next_extension,
3319 						  ++ext_number);
3320 
3321 	return 0;
3322 }
3323 
3324 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
3325 			struct drm_file *file)
3326 {
3327 	struct xe_device *xe = to_xe_device(dev);
3328 	struct xe_file *xef = to_xe_file(file);
3329 	struct drm_xe_gem_create *args = data;
3330 	struct xe_validation_ctx ctx;
3331 	struct drm_exec exec;
3332 	struct xe_vm *vm = NULL;
3333 	struct xe_bo *bo;
3334 	unsigned int bo_flags;
3335 	u32 handle;
3336 	int err;
3337 
3338 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
3339 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3340 		return -EINVAL;
3341 
3342 	/* at least one valid memory placement must be specified */
3343 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
3344 			 !args->placement))
3345 		return -EINVAL;
3346 
3347 	if (XE_IOCTL_DBG(xe, args->flags &
3348 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
3349 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
3350 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
3351 			   DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION)))
3352 		return -EINVAL;
3353 
3354 	if (XE_IOCTL_DBG(xe, args->handle))
3355 		return -EINVAL;
3356 
3357 	if (XE_IOCTL_DBG(xe, !args->size))
3358 		return -EINVAL;
3359 
3360 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
3361 		return -EINVAL;
3362 
3363 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
3364 		return -EINVAL;
3365 
3366 	bo_flags = 0;
3367 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
3368 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
3369 
3370 	/*
3371 	 * Display scanout is always non-coherent with the CPU cache.
3372 	 */
3373 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
3374 		bo_flags |= XE_BO_FLAG_FORCE_WC;
3375 
3376 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) {
3377 		if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20))
3378 			return -EOPNOTSUPP;
3379 		bo_flags |= XE_BO_FLAG_NO_COMPRESSION;
3380 	}
3381 
3382 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
3383 
3384 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
3385 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
3386 	    (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) &&
3387 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
3388 	    IS_ALIGNED(args->size, SZ_64K))
3389 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
3390 
3391 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
3392 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
3393 			return -EINVAL;
3394 
3395 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
3396 	}
3397 
3398 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
3399 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
3400 		return -EINVAL;
3401 
3402 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
3403 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
3404 		return -EINVAL;
3405 
3406 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_FORCE_WC &&
3407 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
3408 		return -EINVAL;
3409 
3410 	if (args->vm_id) {
3411 		vm = xe_vm_lookup(xef, args->vm_id);
3412 		if (XE_IOCTL_DBG(xe, !vm))
3413 			return -ENOENT;
3414 	}
3415 
3416 	err = 0;
3417 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
3418 			    err) {
3419 		if (vm) {
3420 			err = xe_vm_drm_exec_lock(vm, &exec);
3421 			drm_exec_retry_on_contention(&exec);
3422 			if (err)
3423 				break;
3424 		}
3425 		bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
3426 				       bo_flags, &exec);
3427 		drm_exec_retry_on_contention(&exec);
3428 		if (IS_ERR(bo)) {
3429 			err = PTR_ERR(bo);
3430 			xe_validation_retry_on_oom(&ctx, &err);
3431 			break;
3432 		}
3433 	}
3434 	if (err)
3435 		goto out_vm;
3436 
3437 	if (args->extensions) {
3438 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
3439 		if (err)
3440 			goto out_bulk;
3441 	}
3442 
3443 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
3444 	if (err)
3445 		goto out_bulk;
3446 
3447 	args->handle = handle;
3448 	goto out_put;
3449 
3450 out_bulk:
3451 	if (vm && !xe_vm_in_fault_mode(vm)) {
3452 		xe_vm_lock(vm, false);
3453 		__xe_bo_unset_bulk_move(bo);
3454 		xe_vm_unlock(vm);
3455 	}
3456 out_put:
3457 	xe_bo_put(bo);
3458 out_vm:
3459 	if (vm)
3460 		xe_vm_put(vm);
3461 
3462 	return err;
3463 }
3464 
3465 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
3466 			     struct drm_file *file)
3467 {
3468 	struct xe_device *xe = to_xe_device(dev);
3469 	struct drm_xe_gem_mmap_offset *args = data;
3470 	struct drm_gem_object *gem_obj;
3471 
3472 	if (XE_IOCTL_DBG(xe, args->extensions) ||
3473 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3474 		return -EINVAL;
3475 
3476 	if (XE_IOCTL_DBG(xe, args->flags &
3477 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
3478 		return -EINVAL;
3479 
3480 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
3481 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
3482 			return -EINVAL;
3483 
3484 		if (XE_IOCTL_DBG(xe, args->handle))
3485 			return -EINVAL;
3486 
3487 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
3488 			return -EINVAL;
3489 
3490 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
3491 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
3492 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
3493 		return 0;
3494 	}
3495 
3496 	gem_obj = drm_gem_object_lookup(file, args->handle);
3497 	if (XE_IOCTL_DBG(xe, !gem_obj))
3498 		return -ENOENT;
3499 
3500 	/* The mmap offset was set up at BO allocation time. */
3501 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
3502 
3503 	xe_bo_put(gem_to_xe_bo(gem_obj));
3504 	return 0;
3505 }
3506 
3507 /**
3508  * xe_bo_decompress - schedule in-place decompress and install fence
3509  * @bo: buffer object (caller should hold drm_exec reservations for VM+BO)
3510  *
3511  * Schedules an in-place resolve via the migrate layer and installs the
3512  * returned dma_fence into the BO kernel reservation slot (DMA_RESV_USAGE_KERNEL).
3513  * In preempt fence mode, this operation interrupts hardware execution
3514  * which is expensive. Page fault mode is recommended for better performance.
3515  *
3516  * The resolve path only runs for VRAM-backed buffers (currently dGPU-only);
3517  * iGPU/system-memory objects fail the resource check and bypass the resolve.
3518  *
3519  * Returns 0 on success, negative errno on error.
3520  */
3521 int xe_bo_decompress(struct xe_bo *bo)
3522 {
3523 	struct xe_device *xe = xe_bo_device(bo);
3524 	struct xe_tile *tile = xe_device_get_root_tile(xe);
3525 	struct dma_fence *decomp_fence = NULL;
3526 	struct ttm_operation_ctx op_ctx = {
3527 		.interruptible = true,
3528 		.no_wait_gpu = false,
3529 		.gfp_retry_mayfail = false,
3530 	};
3531 	int err = 0;
3532 
3533 	/* Silently skip decompression for non-VRAM buffers */
3534 	if (!bo->ttm.resource || !mem_type_is_vram(bo->ttm.resource->mem_type))
3535 		return 0;
3536 
3537 	/* Notify before scheduling resolve */
3538 	err = xe_bo_move_notify(bo, &op_ctx);
3539 	if (err)
3540 		return err;
3541 
3542 	/* Reserve fence slot before scheduling */
3543 	err = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
3544 	if (err)
3545 		return err;
3546 
3547 	/* Schedule the in-place decompression */
3548 	decomp_fence = xe_migrate_resolve(tile->migrate,
3549 					  bo,
3550 					  bo->ttm.resource);
3551 
3552 	if (IS_ERR(decomp_fence))
3553 		return PTR_ERR(decomp_fence);
3554 
3555 	/* Install kernel-usage fence */
3556 	dma_resv_add_fence(bo->ttm.base.resv, decomp_fence, DMA_RESV_USAGE_KERNEL);
3557 	dma_fence_put(decomp_fence);
3558 
3559 	return 0;
3560 }
3561 
3562 /**
3563  * xe_bo_lock() - Lock the buffer object's dma_resv object
3564  * @bo: The struct xe_bo whose lock is to be taken
3565  * @intr: Whether to perform any wait interruptible
3566  *
3567  * Locks the buffer object's dma_resv object. If the buffer object is
3568  * pointing to a shared dma_resv object, that shared lock is locked.
3569  *
3570  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3571  * contended lock was interrupted. If @intr is set to false, the
3572  * function always returns 0.
3573  */
3574 int xe_bo_lock(struct xe_bo *bo, bool intr)
3575 {
3576 	if (intr)
3577 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
3578 
3579 	dma_resv_lock(bo->ttm.base.resv, NULL);
3580 
3581 	return 0;
3582 }
3583 
3584 /**
3585  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
3586  * @bo: The struct xe_bo whose lock is to be released.
3587  *
3588  * Unlock a buffer object lock that was locked by xe_bo_lock().
3589  */
3590 void xe_bo_unlock(struct xe_bo *bo)
3591 {
3592 	dma_resv_unlock(bo->ttm.base.resv);
3593 }
3594 
3595 /**
3596  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
3597  * @bo: The buffer object to migrate
3598  * @mem_type: The TTM memory type intended to migrate to
3599  *
3600  * Check whether the buffer object supports migration to the
3601  * given memory type. Note that pinning may affect the ability to migrate as
3602  * returned by this function.
3603  *
3604  * This function is primarily intended as a helper for checking the
3605  * possibility to migrate buffer objects and can be called without
3606  * the object lock held.
3607  *
3608  * Return: true if migration is possible, false otherwise.
3609  */
3610 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
3611 {
3612 	unsigned int cur_place;
3613 
3614 	if (bo->ttm.type == ttm_bo_type_kernel)
3615 		return true;
3616 
3617 	if (bo->ttm.type == ttm_bo_type_sg)
3618 		return false;
3619 
3620 	for (cur_place = 0; cur_place < bo->placement.num_placement;
3621 	     cur_place++) {
3622 		if (bo->placements[cur_place].mem_type == mem_type)
3623 			return true;
3624 	}
3625 
3626 	return false;
3627 }
3628 
3629 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
3630 {
3631 	memset(place, 0, sizeof(*place));
3632 	place->mem_type = mem_type;
3633 }
3634 
3635 /**
3636  * xe_bo_migrate - Migrate an object to the desired region id
3637  * @bo: The buffer object to migrate.
3638  * @mem_type: The TTM region type to migrate to.
3639  * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
3640  * a default interruptibe ctx is to be used.
3641  * @exec: The drm_exec transaction to use for exhaustive eviction.
3642  *
3643  * Attempt to migrate the buffer object to the desired memory region. The
3644  * buffer object may not be pinned, and must be locked.
3645  * On successful completion, the object memory type will be updated,
3646  * but an async migration task may not have completed yet, and to
3647  * accomplish that, the object's kernel fences must be signaled with
3648  * the object lock held.
3649  *
3650  * Return: 0 on success. Negative error code on failure. In particular may
3651  * return -EINTR or -ERESTARTSYS if signal pending.
3652  */
3653 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
3654 		  struct drm_exec *exec)
3655 {
3656 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3657 	struct ttm_operation_ctx ctx = {
3658 		.interruptible = true,
3659 		.no_wait_gpu = false,
3660 		.gfp_retry_mayfail = true,
3661 	};
3662 	struct ttm_placement placement;
3663 	struct ttm_place requested;
3664 
3665 	xe_bo_assert_held(bo);
3666 	tctx = tctx ? tctx : &ctx;
3667 
3668 	if (bo->ttm.resource->mem_type == mem_type)
3669 		return 0;
3670 
3671 	if (xe_bo_is_pinned(bo))
3672 		return -EBUSY;
3673 
3674 	if (!xe_bo_can_migrate(bo, mem_type))
3675 		return -EINVAL;
3676 
3677 	xe_place_from_ttm_type(mem_type, &requested);
3678 	placement.num_placement = 1;
3679 	placement.placement = &requested;
3680 
3681 	/*
3682 	 * Stolen needs to be handled like below VRAM handling if we ever need
3683 	 * to support it.
3684 	 */
3685 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
3686 
3687 	if (mem_type_is_vram(mem_type)) {
3688 		u32 c = 0;
3689 
3690 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
3691 	}
3692 
3693 	if (!tctx->no_wait_gpu)
3694 		xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
3695 	return ttm_bo_validate(&bo->ttm, &placement, tctx);
3696 }
3697 
3698 /**
3699  * xe_bo_evict - Evict an object to evict placement
3700  * @bo: The buffer object to migrate.
3701  * @exec: The drm_exec transaction to use for exhaustive eviction.
3702  *
3703  * On successful completion, the object memory will be moved to evict
3704  * placement. This function blocks until the object has been fully moved.
3705  *
3706  * Return: 0 on success. Negative error code on failure.
3707  */
3708 int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
3709 {
3710 	struct ttm_operation_ctx ctx = {
3711 		.interruptible = false,
3712 		.no_wait_gpu = false,
3713 		.gfp_retry_mayfail = true,
3714 	};
3715 	struct ttm_placement placement;
3716 	int ret;
3717 
3718 	xe_evict_flags(&bo->ttm, &placement);
3719 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3720 	if (ret)
3721 		return ret;
3722 
3723 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3724 			      false, MAX_SCHEDULE_TIMEOUT);
3725 
3726 	return 0;
3727 }
3728 
3729 /**
3730  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3731  * placed in system memory.
3732  * @bo: The xe_bo
3733  *
3734  * Return: true if extra pages need to be allocated, false otherwise.
3735  */
3736 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3737 {
3738 	struct xe_device *xe = xe_bo_device(bo);
3739 
3740 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3741 		return false;
3742 
3743 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3744 		return false;
3745 
3746 	/* On discrete GPUs, if the GPU can access this buffer from
3747 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3748 	 * can't be used since there's no CCS storage associated with
3749 	 * non-VRAM addresses.
3750 	 */
3751 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3752 		return false;
3753 
3754 	/* Check if userspace explicitly requested no compression */
3755 	if (bo->flags & XE_BO_FLAG_NO_COMPRESSION)
3756 		return false;
3757 
3758 	/*
3759 	 * For WB (Write-Back) CPU caching mode, check if the device
3760 	 * supports WB compression with coherency.
3761 	 */
3762 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB &&
3763 	    xe->pat.idx[XE_CACHE_WB_COMPRESSION] == XE_PAT_INVALID_IDX)
3764 		return false;
3765 
3766 	return true;
3767 }
3768 
3769 /**
3770  * __xe_bo_release_dummy() - Dummy kref release function
3771  * @kref: The embedded struct kref.
3772  *
3773  * Dummy release function for xe_bo_put_deferred(). Keep off.
3774  */
3775 void __xe_bo_release_dummy(struct kref *kref)
3776 {
3777 }
3778 
3779 /**
3780  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3781  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3782  *
3783  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3784  * The @deferred list can be either an onstack local list or a global
3785  * shared list used by a workqueue.
3786  */
3787 void xe_bo_put_commit(struct llist_head *deferred)
3788 {
3789 	struct llist_node *freed;
3790 	struct xe_bo *bo, *next;
3791 
3792 	if (!deferred)
3793 		return;
3794 
3795 	freed = llist_del_all(deferred);
3796 	if (!freed)
3797 		return;
3798 
3799 	llist_for_each_entry_safe(bo, next, freed, freed)
3800 		drm_gem_object_free(&bo->ttm.base.refcount);
3801 }
3802 
3803 static void xe_bo_dev_work_func(struct work_struct *work)
3804 {
3805 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3806 
3807 	xe_bo_put_commit(&bo_dev->async_list);
3808 }
3809 
3810 /**
3811  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3812  * @bo_dev: The BO dev structure
3813  */
3814 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3815 {
3816 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3817 }
3818 
3819 /**
3820  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3821  * @bo_dev: The BO dev structure
3822  */
3823 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3824 {
3825 	flush_work(&bo_dev->async_free);
3826 }
3827 
3828 void xe_bo_put(struct xe_bo *bo)
3829 {
3830 	struct xe_tile *tile;
3831 	u8 id;
3832 
3833 	might_sleep();
3834 	if (bo) {
3835 #ifdef CONFIG_PROC_FS
3836 		if (bo->client)
3837 			might_lock(&bo->client->bos_lock);
3838 #endif
3839 		for_each_tile(tile, xe_bo_device(bo), id)
3840 			if (bo->ggtt_node[id])
3841 				xe_ggtt_might_lock(tile->mem.ggtt);
3842 		drm_gem_object_put(&bo->ttm.base);
3843 	}
3844 }
3845 
3846 /**
3847  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3848  * @file_priv: ...
3849  * @dev: ...
3850  * @args: ...
3851  *
3852  * See dumb_create() hook in include/drm/drm_drv.h
3853  *
3854  * Return: ...
3855  */
3856 int xe_bo_dumb_create(struct drm_file *file_priv,
3857 		      struct drm_device *dev,
3858 		      struct drm_mode_create_dumb *args)
3859 {
3860 	struct xe_device *xe = to_xe_device(dev);
3861 	struct xe_bo *bo;
3862 	uint32_t handle;
3863 	int err;
3864 	u32 page_size = max_t(u32, PAGE_SIZE,
3865 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3866 
3867 	err = drm_mode_size_dumb(dev, args, SZ_64, page_size);
3868 	if (err)
3869 		return err;
3870 
3871 	bo = xe_bo_create_user(xe, NULL, args->size,
3872 			       DRM_XE_GEM_CPU_CACHING_WC,
3873 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3874 			       XE_BO_FLAG_FORCE_WC |
3875 			       XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
3876 	if (IS_ERR(bo))
3877 		return PTR_ERR(bo);
3878 
3879 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3880 	/* drop reference from allocate - handle holds it now */
3881 	drm_gem_object_put(&bo->ttm.base);
3882 	if (!err)
3883 		args->handle = handle;
3884 	return err;
3885 }
3886 
3887 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3888 {
3889 	struct ttm_buffer_object *tbo = &bo->ttm;
3890 	struct ttm_device *bdev = tbo->bdev;
3891 
3892 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3893 
3894 	list_del_init(&bo->vram_userfault_link);
3895 }
3896 
3897 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3898 #include "tests/xe_bo.c"
3899 #endif
3900