xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 6f17ab9a63e670bd62a287f95e3982f99eafd77e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_gem_ttm_helper.h>
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_backup.h>
15 #include <drm/ttm/ttm_device.h>
16 #include <drm/ttm/ttm_placement.h>
17 #include <drm/ttm/ttm_tt.h>
18 #include <uapi/drm/xe_drm.h>
19 
20 #include <kunit/static_stub.h>
21 
22 #include <trace/events/gpu_mem.h>
23 
24 #include "xe_device.h"
25 #include "xe_dma_buf.h"
26 #include "xe_drm_client.h"
27 #include "xe_ggtt.h"
28 #include "xe_gt.h"
29 #include "xe_map.h"
30 #include "xe_migrate.h"
31 #include "xe_pm.h"
32 #include "xe_preempt_fence.h"
33 #include "xe_pxp.h"
34 #include "xe_res_cursor.h"
35 #include "xe_shrinker.h"
36 #include "xe_sriov_vf_ccs.h"
37 #include "xe_trace_bo.h"
38 #include "xe_ttm_stolen_mgr.h"
39 #include "xe_vm.h"
40 #include "xe_vram_types.h"
41 
42 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
43 	[XE_PL_SYSTEM] = "system",
44 	[XE_PL_TT] = "gtt",
45 	[XE_PL_VRAM0] = "vram0",
46 	[XE_PL_VRAM1] = "vram1",
47 	[XE_PL_STOLEN] = "stolen"
48 };
49 
50 static const struct ttm_place sys_placement_flags = {
51 	.fpfn = 0,
52 	.lpfn = 0,
53 	.mem_type = XE_PL_SYSTEM,
54 	.flags = 0,
55 };
56 
57 static struct ttm_placement sys_placement = {
58 	.num_placement = 1,
59 	.placement = &sys_placement_flags,
60 };
61 
62 static struct ttm_placement purge_placement;
63 
64 static const struct ttm_place tt_placement_flags[] = {
65 	{
66 		.fpfn = 0,
67 		.lpfn = 0,
68 		.mem_type = XE_PL_TT,
69 		.flags = TTM_PL_FLAG_DESIRED,
70 	},
71 	{
72 		.fpfn = 0,
73 		.lpfn = 0,
74 		.mem_type = XE_PL_SYSTEM,
75 		.flags = TTM_PL_FLAG_FALLBACK,
76 	}
77 };
78 
79 static struct ttm_placement tt_placement = {
80 	.num_placement = 2,
81 	.placement = tt_placement_flags,
82 };
83 
84 bool mem_type_is_vram(u32 mem_type)
85 {
86 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
87 }
88 
89 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
90 {
91 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
92 }
93 
94 static bool resource_is_vram(struct ttm_resource *res)
95 {
96 	return mem_type_is_vram(res->mem_type);
97 }
98 
99 bool xe_bo_is_vram(struct xe_bo *bo)
100 {
101 	return resource_is_vram(bo->ttm.resource) ||
102 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
103 }
104 
105 bool xe_bo_is_stolen(struct xe_bo *bo)
106 {
107 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
108 }
109 
110 /**
111  * xe_bo_has_single_placement - check if BO is placed only in one memory location
112  * @bo: The BO
113  *
114  * This function checks whether a given BO is placed in only one memory location.
115  *
116  * Returns: true if the BO is placed in a single memory location, false otherwise.
117  *
118  */
119 bool xe_bo_has_single_placement(struct xe_bo *bo)
120 {
121 	return bo->placement.num_placement == 1;
122 }
123 
124 /**
125  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
126  * @bo: The BO
127  *
128  * The stolen memory is accessed through the PCI BAR for both DGFX and some
129  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
130  *
131  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
132  */
133 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
134 {
135 	return xe_bo_is_stolen(bo) &&
136 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
137 }
138 
139 /**
140  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
141  * @bo: The BO
142  *
143  * Check if a given bo is bound through VM_BIND. This requires the
144  * reservation lock for the BO to be held.
145  *
146  * Returns: boolean
147  */
148 bool xe_bo_is_vm_bound(struct xe_bo *bo)
149 {
150 	xe_bo_assert_held(bo);
151 
152 	return !list_empty(&bo->ttm.base.gpuva.list);
153 }
154 
155 static bool xe_bo_is_user(struct xe_bo *bo)
156 {
157 	return bo->flags & XE_BO_FLAG_USER;
158 }
159 
160 static struct xe_migrate *
161 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
162 {
163 	struct xe_tile *tile;
164 
165 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
166 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
167 	return tile->migrate;
168 }
169 
170 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
171 {
172 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
173 	struct ttm_resource_manager *mgr;
174 	struct xe_ttm_vram_mgr *vram_mgr;
175 
176 	xe_assert(xe, resource_is_vram(res));
177 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
178 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
179 
180 	return container_of(vram_mgr, struct xe_vram_region, ttm);
181 }
182 
183 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
184 			   u32 bo_flags, u32 *c)
185 {
186 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
187 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
188 
189 		bo->placements[*c] = (struct ttm_place) {
190 			.mem_type = XE_PL_TT,
191 			.flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
192 			TTM_PL_FLAG_FALLBACK : 0,
193 		};
194 		*c += 1;
195 	}
196 }
197 
198 static bool force_contiguous(u32 bo_flags)
199 {
200 	if (bo_flags & XE_BO_FLAG_STOLEN)
201 		return true; /* users expect this */
202 	else if (bo_flags & XE_BO_FLAG_PINNED &&
203 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
204 		return true; /* needs vmap */
205 	else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
206 		return true;
207 
208 	/*
209 	 * For eviction / restore on suspend / resume objects pinned in VRAM
210 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
211 	 */
212 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
213 	       bo_flags & XE_BO_FLAG_PINNED;
214 }
215 
216 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
217 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
218 {
219 	struct ttm_place place = { .mem_type = mem_type };
220 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
221 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
222 
223 	struct xe_vram_region *vram;
224 	u64 io_size;
225 
226 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
227 
228 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
229 	xe_assert(xe, vram && vram->usable_size);
230 	io_size = vram->io_size;
231 
232 	if (force_contiguous(bo_flags))
233 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
234 
235 	if (io_size < vram->usable_size) {
236 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
237 			place.fpfn = 0;
238 			place.lpfn = io_size >> PAGE_SHIFT;
239 		} else {
240 			place.flags |= TTM_PL_FLAG_TOPDOWN;
241 		}
242 	}
243 	places[*c] = place;
244 	*c += 1;
245 }
246 
247 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
248 			 u32 bo_flags, u32 *c)
249 {
250 	if (bo_flags & XE_BO_FLAG_VRAM0)
251 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
252 	if (bo_flags & XE_BO_FLAG_VRAM1)
253 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
254 }
255 
256 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
257 			   u32 bo_flags, u32 *c)
258 {
259 	if (bo_flags & XE_BO_FLAG_STOLEN) {
260 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
261 
262 		bo->placements[*c] = (struct ttm_place) {
263 			.mem_type = XE_PL_STOLEN,
264 			.flags = force_contiguous(bo_flags) ?
265 				TTM_PL_FLAG_CONTIGUOUS : 0,
266 		};
267 		*c += 1;
268 	}
269 }
270 
271 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
272 				       u32 bo_flags)
273 {
274 	u32 c = 0;
275 
276 	try_add_vram(xe, bo, bo_flags, &c);
277 	try_add_system(xe, bo, bo_flags, &c);
278 	try_add_stolen(xe, bo, bo_flags, &c);
279 
280 	if (!c)
281 		return -EINVAL;
282 
283 	bo->placement = (struct ttm_placement) {
284 		.num_placement = c,
285 		.placement = bo->placements,
286 	};
287 
288 	return 0;
289 }
290 
291 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
292 			      u32 bo_flags)
293 {
294 	xe_bo_assert_held(bo);
295 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
296 }
297 
298 static void xe_evict_flags(struct ttm_buffer_object *tbo,
299 			   struct ttm_placement *placement)
300 {
301 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
302 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
303 	struct xe_bo *bo;
304 
305 	if (!xe_bo_is_xe_bo(tbo)) {
306 		/* Don't handle scatter gather BOs */
307 		if (tbo->type == ttm_bo_type_sg) {
308 			placement->num_placement = 0;
309 			return;
310 		}
311 
312 		*placement = device_unplugged ? purge_placement : sys_placement;
313 		return;
314 	}
315 
316 	bo = ttm_to_xe_bo(tbo);
317 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
318 		*placement = sys_placement;
319 		return;
320 	}
321 
322 	if (device_unplugged && !tbo->base.dma_buf) {
323 		*placement = purge_placement;
324 		return;
325 	}
326 
327 	/*
328 	 * For xe, sg bos that are evicted to system just triggers a
329 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
330 	 */
331 	switch (tbo->resource->mem_type) {
332 	case XE_PL_VRAM0:
333 	case XE_PL_VRAM1:
334 	case XE_PL_STOLEN:
335 		*placement = tt_placement;
336 		break;
337 	case XE_PL_TT:
338 	default:
339 		*placement = sys_placement;
340 		break;
341 	}
342 }
343 
344 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
345 struct xe_ttm_tt {
346 	struct ttm_tt ttm;
347 	struct sg_table sgt;
348 	struct sg_table *sg;
349 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
350 	bool purgeable;
351 };
352 
353 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
354 {
355 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
356 	unsigned long num_pages = tt->num_pages;
357 	int ret;
358 
359 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
360 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
361 
362 	if (xe_tt->sg)
363 		return 0;
364 
365 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
366 						num_pages, 0,
367 						(u64)num_pages << PAGE_SHIFT,
368 						xe_sg_segment_size(xe->drm.dev),
369 						GFP_KERNEL);
370 	if (ret)
371 		return ret;
372 
373 	xe_tt->sg = &xe_tt->sgt;
374 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
375 			      DMA_ATTR_SKIP_CPU_SYNC);
376 	if (ret) {
377 		sg_free_table(xe_tt->sg);
378 		xe_tt->sg = NULL;
379 		return ret;
380 	}
381 
382 	return 0;
383 }
384 
385 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
386 {
387 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
388 
389 	if (xe_tt->sg) {
390 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
391 				  DMA_BIDIRECTIONAL, 0);
392 		sg_free_table(xe_tt->sg);
393 		xe_tt->sg = NULL;
394 	}
395 }
396 
397 struct sg_table *xe_bo_sg(struct xe_bo *bo)
398 {
399 	struct ttm_tt *tt = bo->ttm.ttm;
400 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
401 
402 	return xe_tt->sg;
403 }
404 
405 /*
406  * Account ttm pages against the device shrinker's shrinkable and
407  * purgeable counts.
408  */
409 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
410 {
411 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
412 
413 	if (xe_tt->purgeable)
414 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
415 	else
416 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
417 }
418 
419 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
420 {
421 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
422 
423 	if (xe_tt->purgeable)
424 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
425 	else
426 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
427 }
428 
429 static void update_global_total_pages(struct ttm_device *ttm_dev,
430 				      long num_pages)
431 {
432 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
433 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
434 	u64 global_total_pages =
435 		atomic64_add_return(num_pages, &xe->global_total_pages);
436 
437 	trace_gpu_mem_total(xe->drm.primary->index, 0,
438 			    global_total_pages << PAGE_SHIFT);
439 #endif
440 }
441 
442 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
443 				       u32 page_flags)
444 {
445 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
446 	struct xe_device *xe = xe_bo_device(bo);
447 	struct xe_ttm_tt *xe_tt;
448 	struct ttm_tt *tt;
449 	unsigned long extra_pages;
450 	enum ttm_caching caching = ttm_cached;
451 	int err;
452 
453 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
454 	if (!xe_tt)
455 		return NULL;
456 
457 	tt = &xe_tt->ttm;
458 
459 	extra_pages = 0;
460 	if (xe_bo_needs_ccs_pages(bo))
461 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
462 					   PAGE_SIZE);
463 
464 	/*
465 	 * DGFX system memory is always WB / ttm_cached, since
466 	 * other caching modes are only supported on x86. DGFX
467 	 * GPU system memory accesses are always coherent with the
468 	 * CPU.
469 	 */
470 	if (!IS_DGFX(xe)) {
471 		switch (bo->cpu_caching) {
472 		case DRM_XE_GEM_CPU_CACHING_WC:
473 			caching = ttm_write_combined;
474 			break;
475 		default:
476 			caching = ttm_cached;
477 			break;
478 		}
479 
480 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
481 
482 		/*
483 		 * Display scanout is always non-coherent with the CPU cache.
484 		 *
485 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
486 		 * non-coherent and require a CPU:WC mapping.
487 		 */
488 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
489 		    (xe->info.graphics_verx100 >= 1270 &&
490 		     bo->flags & XE_BO_FLAG_PAGETABLE))
491 			caching = ttm_write_combined;
492 	}
493 
494 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
495 		/*
496 		 * Valid only for internally-created buffers only, for
497 		 * which cpu_caching is never initialized.
498 		 */
499 		xe_assert(xe, bo->cpu_caching == 0);
500 		caching = ttm_uncached;
501 	}
502 
503 	if (ttm_bo->type != ttm_bo_type_sg)
504 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
505 
506 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
507 	if (err) {
508 		kfree(xe_tt);
509 		return NULL;
510 	}
511 
512 	if (ttm_bo->type != ttm_bo_type_sg) {
513 		err = ttm_tt_setup_backup(tt);
514 		if (err) {
515 			ttm_tt_fini(tt);
516 			kfree(xe_tt);
517 			return NULL;
518 		}
519 	}
520 
521 	return tt;
522 }
523 
524 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
525 			      struct ttm_operation_ctx *ctx)
526 {
527 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
528 	int err;
529 
530 	/*
531 	 * dma-bufs are not populated with pages, and the dma-
532 	 * addresses are set up when moved to XE_PL_TT.
533 	 */
534 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
535 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
536 		return 0;
537 
538 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
539 		err = ttm_tt_restore(ttm_dev, tt, ctx);
540 	} else {
541 		ttm_tt_clear_backed_up(tt);
542 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
543 	}
544 	if (err)
545 		return err;
546 
547 	xe_tt->purgeable = false;
548 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
549 	update_global_total_pages(ttm_dev, tt->num_pages);
550 
551 	return 0;
552 }
553 
554 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
555 {
556 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
557 
558 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
559 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
560 		return;
561 
562 	xe_tt_unmap_sg(xe, tt);
563 
564 	ttm_pool_free(&ttm_dev->pool, tt);
565 	xe_ttm_tt_account_subtract(xe, tt);
566 	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
567 }
568 
569 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
570 {
571 	ttm_tt_fini(tt);
572 	kfree(tt);
573 }
574 
575 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
576 {
577 	struct xe_ttm_vram_mgr_resource *vres =
578 		to_xe_ttm_vram_mgr_resource(mem);
579 
580 	return vres->used_visible_size == mem->size;
581 }
582 
583 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
584 				 struct ttm_resource *mem)
585 {
586 	struct xe_device *xe = ttm_to_xe_device(bdev);
587 
588 	switch (mem->mem_type) {
589 	case XE_PL_SYSTEM:
590 	case XE_PL_TT:
591 		return 0;
592 	case XE_PL_VRAM0:
593 	case XE_PL_VRAM1: {
594 		struct xe_vram_region *vram = res_to_mem_region(mem);
595 
596 		if (!xe_ttm_resource_visible(mem))
597 			return -EINVAL;
598 
599 		mem->bus.offset = mem->start << PAGE_SHIFT;
600 
601 		if (vram->mapping &&
602 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
603 			mem->bus.addr = (u8 __force *)vram->mapping +
604 				mem->bus.offset;
605 
606 		mem->bus.offset += vram->io_start;
607 		mem->bus.is_iomem = true;
608 
609 #if  !IS_ENABLED(CONFIG_X86)
610 		mem->bus.caching = ttm_write_combined;
611 #endif
612 		return 0;
613 	} case XE_PL_STOLEN:
614 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
615 	default:
616 		return -EINVAL;
617 	}
618 }
619 
620 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
621 				const struct ttm_operation_ctx *ctx)
622 {
623 	struct dma_resv_iter cursor;
624 	struct dma_fence *fence;
625 	struct drm_gem_object *obj = &bo->ttm.base;
626 	struct drm_gpuvm_bo *vm_bo;
627 	bool idle = false;
628 	int ret = 0;
629 
630 	dma_resv_assert_held(bo->ttm.base.resv);
631 
632 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
633 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
634 				    DMA_RESV_USAGE_BOOKKEEP);
635 		dma_resv_for_each_fence_unlocked(&cursor, fence)
636 			dma_fence_enable_sw_signaling(fence);
637 		dma_resv_iter_end(&cursor);
638 	}
639 
640 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
641 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
642 		struct drm_gpuva *gpuva;
643 
644 		if (!xe_vm_in_fault_mode(vm)) {
645 			drm_gpuvm_bo_evict(vm_bo, true);
646 			continue;
647 		}
648 
649 		if (!idle) {
650 			long timeout;
651 
652 			if (ctx->no_wait_gpu &&
653 			    !dma_resv_test_signaled(bo->ttm.base.resv,
654 						    DMA_RESV_USAGE_BOOKKEEP))
655 				return -EBUSY;
656 
657 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
658 							DMA_RESV_USAGE_BOOKKEEP,
659 							ctx->interruptible,
660 							MAX_SCHEDULE_TIMEOUT);
661 			if (!timeout)
662 				return -ETIME;
663 			if (timeout < 0)
664 				return timeout;
665 
666 			idle = true;
667 		}
668 
669 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
670 			struct xe_vma *vma = gpuva_to_vma(gpuva);
671 
672 			trace_xe_vma_evict(vma);
673 			ret = xe_vm_invalidate_vma(vma);
674 			if (XE_WARN_ON(ret))
675 				return ret;
676 		}
677 	}
678 
679 	return ret;
680 }
681 
682 /*
683  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
684  * Note that unmapping the attachment is deferred to the next
685  * map_attachment time, or to bo destroy (after idling) whichever comes first.
686  * This is to avoid syncing before unmap_attachment(), assuming that the
687  * caller relies on idling the reservation object before moving the
688  * backing store out. Should that assumption not hold, then we will be able
689  * to unconditionally call unmap_attachment() when moving out to system.
690  */
691 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
692 			     struct ttm_resource *new_res)
693 {
694 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
695 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
696 					       ttm);
697 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
698 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
699 	struct sg_table *sg;
700 
701 	xe_assert(xe, attach);
702 	xe_assert(xe, ttm_bo->ttm);
703 
704 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
705 	    ttm_bo->sg) {
706 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
707 				      false, MAX_SCHEDULE_TIMEOUT);
708 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
709 		ttm_bo->sg = NULL;
710 	}
711 
712 	if (new_res->mem_type == XE_PL_SYSTEM)
713 		goto out;
714 
715 	if (ttm_bo->sg) {
716 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
717 		ttm_bo->sg = NULL;
718 	}
719 
720 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
721 	if (IS_ERR(sg))
722 		return PTR_ERR(sg);
723 
724 	ttm_bo->sg = sg;
725 	xe_tt->sg = sg;
726 
727 out:
728 	ttm_bo_move_null(ttm_bo, new_res);
729 
730 	return 0;
731 }
732 
733 /**
734  * xe_bo_move_notify - Notify subsystems of a pending move
735  * @bo: The buffer object
736  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
737  *
738  * This function notifies subsystems of an upcoming buffer move.
739  * Upon receiving such a notification, subsystems should schedule
740  * halting access to the underlying pages and optionally add a fence
741  * to the buffer object's dma_resv object, that signals when access is
742  * stopped. The caller will wait on all dma_resv fences before
743  * starting the move.
744  *
745  * A subsystem may commence access to the object after obtaining
746  * bindings to the new backing memory under the object lock.
747  *
748  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
749  * negative error code on error.
750  */
751 static int xe_bo_move_notify(struct xe_bo *bo,
752 			     const struct ttm_operation_ctx *ctx)
753 {
754 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
755 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
756 	struct ttm_resource *old_mem = ttm_bo->resource;
757 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
758 	int ret;
759 
760 	/*
761 	 * If this starts to call into many components, consider
762 	 * using a notification chain here.
763 	 */
764 
765 	if (xe_bo_is_pinned(bo))
766 		return -EINVAL;
767 
768 	xe_bo_vunmap(bo);
769 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
770 	if (ret)
771 		return ret;
772 
773 	/* Don't call move_notify() for imported dma-bufs. */
774 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
775 		dma_buf_move_notify(ttm_bo->base.dma_buf);
776 
777 	/*
778 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
779 	 * so if we moved from VRAM make sure to unlink this from the userfault
780 	 * tracking.
781 	 */
782 	if (mem_type_is_vram(old_mem_type)) {
783 		mutex_lock(&xe->mem_access.vram_userfault.lock);
784 		if (!list_empty(&bo->vram_userfault_link))
785 			list_del_init(&bo->vram_userfault_link);
786 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
787 	}
788 
789 	return 0;
790 }
791 
792 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
793 		      struct ttm_operation_ctx *ctx,
794 		      struct ttm_resource *new_mem,
795 		      struct ttm_place *hop)
796 {
797 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
798 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
799 	struct ttm_resource *old_mem = ttm_bo->resource;
800 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
801 	struct ttm_tt *ttm = ttm_bo->ttm;
802 	struct xe_migrate *migrate = NULL;
803 	struct dma_fence *fence;
804 	bool move_lacks_source;
805 	bool tt_has_data;
806 	bool needs_clear;
807 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
808 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
809 	int ret = 0;
810 
811 	/* Bo creation path, moving to system or TT. */
812 	if ((!old_mem && ttm) && !handle_system_ccs) {
813 		if (new_mem->mem_type == XE_PL_TT)
814 			ret = xe_tt_map_sg(xe, ttm);
815 		if (!ret)
816 			ttm_bo_move_null(ttm_bo, new_mem);
817 		goto out;
818 	}
819 
820 	if (ttm_bo->type == ttm_bo_type_sg) {
821 		if (new_mem->mem_type == XE_PL_SYSTEM)
822 			ret = xe_bo_move_notify(bo, ctx);
823 		if (!ret)
824 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
825 		return ret;
826 	}
827 
828 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
829 
830 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
831 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
832 
833 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
834 		(!ttm && ttm_bo->type == ttm_bo_type_device);
835 
836 	if (new_mem->mem_type == XE_PL_TT) {
837 		ret = xe_tt_map_sg(xe, ttm);
838 		if (ret)
839 			goto out;
840 	}
841 
842 	if ((move_lacks_source && !needs_clear)) {
843 		ttm_bo_move_null(ttm_bo, new_mem);
844 		goto out;
845 	}
846 
847 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
848 	    new_mem->mem_type == XE_PL_SYSTEM) {
849 		ret = xe_svm_bo_evict(bo);
850 		if (!ret) {
851 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
852 			ttm_bo_move_null(ttm_bo, new_mem);
853 		} else {
854 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
855 				ERR_PTR(ret));
856 		}
857 
858 		goto out;
859 	}
860 
861 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
862 		ttm_bo_move_null(ttm_bo, new_mem);
863 		goto out;
864 	}
865 
866 	/*
867 	 * Failed multi-hop where the old_mem is still marked as
868 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
869 	 */
870 	if (old_mem_type == XE_PL_TT &&
871 	    new_mem->mem_type == XE_PL_TT) {
872 		ttm_bo_move_null(ttm_bo, new_mem);
873 		goto out;
874 	}
875 
876 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
877 		ret = xe_bo_move_notify(bo, ctx);
878 		if (ret)
879 			goto out;
880 	}
881 
882 	if (old_mem_type == XE_PL_TT &&
883 	    new_mem->mem_type == XE_PL_SYSTEM) {
884 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
885 						     DMA_RESV_USAGE_BOOKKEEP,
886 						     false,
887 						     MAX_SCHEDULE_TIMEOUT);
888 		if (timeout < 0) {
889 			ret = timeout;
890 			goto out;
891 		}
892 
893 		if (!handle_system_ccs) {
894 			ttm_bo_move_null(ttm_bo, new_mem);
895 			goto out;
896 		}
897 	}
898 
899 	if (!move_lacks_source &&
900 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
901 	     (mem_type_is_vram(old_mem_type) &&
902 	      new_mem->mem_type == XE_PL_SYSTEM))) {
903 		hop->fpfn = 0;
904 		hop->lpfn = 0;
905 		hop->mem_type = XE_PL_TT;
906 		hop->flags = TTM_PL_FLAG_TEMPORARY;
907 		ret = -EMULTIHOP;
908 		goto out;
909 	}
910 
911 	if (bo->tile)
912 		migrate = bo->tile->migrate;
913 	else if (resource_is_vram(new_mem))
914 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
915 	else if (mem_type_is_vram(old_mem_type))
916 		migrate = mem_type_to_migrate(xe, old_mem_type);
917 	else
918 		migrate = xe->tiles[0].migrate;
919 
920 	xe_assert(xe, migrate);
921 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
922 	if (xe_rpm_reclaim_safe(xe)) {
923 		/*
924 		 * We might be called through swapout in the validation path of
925 		 * another TTM device, so acquire rpm here.
926 		 */
927 		xe_pm_runtime_get(xe);
928 	} else {
929 		drm_WARN_ON(&xe->drm, handle_system_ccs);
930 		xe_pm_runtime_get_noresume(xe);
931 	}
932 
933 	if (move_lacks_source) {
934 		u32 flags = 0;
935 
936 		if (mem_type_is_vram(new_mem->mem_type))
937 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
938 		else if (handle_system_ccs)
939 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
940 
941 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
942 	} else {
943 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
944 					handle_system_ccs);
945 	}
946 	if (IS_ERR(fence)) {
947 		ret = PTR_ERR(fence);
948 		xe_pm_runtime_put(xe);
949 		goto out;
950 	}
951 	if (!move_lacks_source) {
952 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
953 						new_mem);
954 		if (ret) {
955 			dma_fence_wait(fence, false);
956 			ttm_bo_move_null(ttm_bo, new_mem);
957 			ret = 0;
958 		}
959 	} else {
960 		/*
961 		 * ttm_bo_move_accel_cleanup() may blow up if
962 		 * bo->resource == NULL, so just attach the
963 		 * fence and set the new resource.
964 		 */
965 		dma_resv_add_fence(ttm_bo->base.resv, fence,
966 				   DMA_RESV_USAGE_KERNEL);
967 		ttm_bo_move_null(ttm_bo, new_mem);
968 	}
969 
970 	dma_fence_put(fence);
971 	xe_pm_runtime_put(xe);
972 
973 	/*
974 	 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
975 	 * BBs from BO as it is no longer needed.
976 	 */
977 	if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT &&
978 	    new_mem->mem_type == XE_PL_SYSTEM)
979 		xe_sriov_vf_ccs_detach_bo(bo);
980 
981 	if (IS_SRIOV_VF(xe) &&
982 	    ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
983 	     (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
984 	    handle_system_ccs)
985 		ret = xe_sriov_vf_ccs_attach_bo(bo);
986 
987 out:
988 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
989 	    ttm_bo->ttm) {
990 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
991 						     DMA_RESV_USAGE_KERNEL,
992 						     false,
993 						     MAX_SCHEDULE_TIMEOUT);
994 		if (timeout < 0)
995 			ret = timeout;
996 
997 		if (IS_VF_CCS_BB_VALID(xe, bo))
998 			xe_sriov_vf_ccs_detach_bo(bo);
999 
1000 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
1001 	}
1002 
1003 	return ret;
1004 }
1005 
1006 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1007 			       struct ttm_buffer_object *bo,
1008 			       unsigned long *scanned)
1009 {
1010 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1011 	long lret;
1012 
1013 	/* Fake move to system, without copying data. */
1014 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
1015 		struct ttm_resource *new_resource;
1016 
1017 		lret = ttm_bo_wait_ctx(bo, ctx);
1018 		if (lret)
1019 			return lret;
1020 
1021 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1022 		if (lret)
1023 			return lret;
1024 
1025 		xe_tt_unmap_sg(xe, bo->ttm);
1026 		ttm_bo_move_null(bo, new_resource);
1027 	}
1028 
1029 	*scanned += bo->ttm->num_pages;
1030 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1031 			     {.purge = true,
1032 			      .writeback = false,
1033 			      .allow_move = false});
1034 
1035 	if (lret > 0)
1036 		xe_ttm_tt_account_subtract(xe, bo->ttm);
1037 
1038 	return lret;
1039 }
1040 
1041 static bool
1042 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1043 {
1044 	struct drm_gpuvm_bo *vm_bo;
1045 
1046 	if (!ttm_bo_eviction_valuable(bo, place))
1047 		return false;
1048 
1049 	if (!xe_bo_is_xe_bo(bo))
1050 		return true;
1051 
1052 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1053 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1054 			return false;
1055 	}
1056 
1057 	return true;
1058 }
1059 
1060 /**
1061  * xe_bo_shrink() - Try to shrink an xe bo.
1062  * @ctx: The struct ttm_operation_ctx used for shrinking.
1063  * @bo: The TTM buffer object whose pages to shrink.
1064  * @flags: Flags governing the shrink behaviour.
1065  * @scanned: Pointer to a counter of the number of pages
1066  * attempted to shrink.
1067  *
1068  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1069  * Note that we need to be able to handle also non xe bos
1070  * (ghost bos), but only if the struct ttm_tt is embedded in
1071  * a struct xe_ttm_tt. When the function attempts to shrink
1072  * the pages of a buffer object, The value pointed to by @scanned
1073  * is updated.
1074  *
1075  * Return: The number of pages shrunken or purged, or negative error
1076  * code on failure.
1077  */
1078 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1079 		  const struct xe_bo_shrink_flags flags,
1080 		  unsigned long *scanned)
1081 {
1082 	struct ttm_tt *tt = bo->ttm;
1083 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1084 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1085 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1086 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1087 	bool needs_rpm;
1088 	long lret = 0L;
1089 
1090 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1091 	    (flags.purge && !xe_tt->purgeable))
1092 		return -EBUSY;
1093 
1094 	if (!xe_bo_eviction_valuable(bo, &place))
1095 		return -EBUSY;
1096 
1097 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1098 		return xe_bo_shrink_purge(ctx, bo, scanned);
1099 
1100 	if (xe_tt->purgeable) {
1101 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1102 			lret = xe_bo_move_notify(xe_bo, ctx);
1103 		if (!lret)
1104 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1105 		goto out_unref;
1106 	}
1107 
1108 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1109 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1110 		     xe_bo_needs_ccs_pages(xe_bo));
1111 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1112 		goto out_unref;
1113 
1114 	*scanned += tt->num_pages;
1115 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1116 			     {.purge = false,
1117 			      .writeback = flags.writeback,
1118 			      .allow_move = true});
1119 	if (needs_rpm)
1120 		xe_pm_runtime_put(xe);
1121 
1122 	if (lret > 0)
1123 		xe_ttm_tt_account_subtract(xe, tt);
1124 
1125 out_unref:
1126 	xe_bo_put(xe_bo);
1127 
1128 	return lret;
1129 }
1130 
1131 /**
1132  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1133  * up in system memory.
1134  * @bo: The buffer object to prepare.
1135  *
1136  * On successful completion, the object backup pages are allocated. Expectation
1137  * is that this is called from the PM notifier, prior to suspend/hibernation.
1138  *
1139  * Return: 0 on success. Negative error code on failure.
1140  */
1141 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1142 {
1143 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1144 	struct xe_bo *backup;
1145 	int ret = 0;
1146 
1147 	xe_bo_lock(bo, false);
1148 
1149 	xe_assert(xe, !bo->backup_obj);
1150 
1151 	/*
1152 	 * Since this is called from the PM notifier we might have raced with
1153 	 * someone unpinning this after we dropped the pinned list lock and
1154 	 * grabbing the above bo lock.
1155 	 */
1156 	if (!xe_bo_is_pinned(bo))
1157 		goto out_unlock_bo;
1158 
1159 	if (!xe_bo_is_vram(bo))
1160 		goto out_unlock_bo;
1161 
1162 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1163 		goto out_unlock_bo;
1164 
1165 	backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1166 					DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1167 					XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1168 					XE_BO_FLAG_PINNED);
1169 	if (IS_ERR(backup)) {
1170 		ret = PTR_ERR(backup);
1171 		goto out_unlock_bo;
1172 	}
1173 
1174 	backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1175 	ttm_bo_pin(&backup->ttm);
1176 	bo->backup_obj = backup;
1177 
1178 out_unlock_bo:
1179 	xe_bo_unlock(bo);
1180 	return ret;
1181 }
1182 
1183 /**
1184  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1185  * @bo: The buffer object to undo the prepare for.
1186  *
1187  * Always returns 0. The backup object is removed, if still present. Expectation
1188  * it that this called from the PM notifier when undoing the prepare step.
1189  *
1190  * Return: Always returns 0.
1191  */
1192 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1193 {
1194 	xe_bo_lock(bo, false);
1195 	if (bo->backup_obj) {
1196 		ttm_bo_unpin(&bo->backup_obj->ttm);
1197 		xe_bo_put(bo->backup_obj);
1198 		bo->backup_obj = NULL;
1199 	}
1200 	xe_bo_unlock(bo);
1201 
1202 	return 0;
1203 }
1204 
1205 /**
1206  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1207  * @bo: The buffer object to move.
1208  *
1209  * On successful completion, the object memory will be moved to system memory.
1210  *
1211  * This is needed to for special handling of pinned VRAM object during
1212  * suspend-resume.
1213  *
1214  * Return: 0 on success. Negative error code on failure.
1215  */
1216 int xe_bo_evict_pinned(struct xe_bo *bo)
1217 {
1218 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1219 	struct xe_bo *backup = bo->backup_obj;
1220 	bool backup_created = false;
1221 	bool unmap = false;
1222 	int ret = 0;
1223 
1224 	xe_bo_lock(bo, false);
1225 
1226 	if (WARN_ON(!bo->ttm.resource)) {
1227 		ret = -EINVAL;
1228 		goto out_unlock_bo;
1229 	}
1230 
1231 	if (WARN_ON(!xe_bo_is_pinned(bo))) {
1232 		ret = -EINVAL;
1233 		goto out_unlock_bo;
1234 	}
1235 
1236 	if (!xe_bo_is_vram(bo))
1237 		goto out_unlock_bo;
1238 
1239 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1240 		goto out_unlock_bo;
1241 
1242 	if (!backup) {
1243 		backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv,
1244 						NULL, xe_bo_size(bo),
1245 						DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1246 						XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1247 						XE_BO_FLAG_PINNED);
1248 		if (IS_ERR(backup)) {
1249 			ret = PTR_ERR(backup);
1250 			goto out_unlock_bo;
1251 		}
1252 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1253 		backup_created = true;
1254 	}
1255 
1256 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1257 		struct xe_migrate *migrate;
1258 		struct dma_fence *fence;
1259 
1260 		if (bo->tile)
1261 			migrate = bo->tile->migrate;
1262 		else
1263 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1264 
1265 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1266 		if (ret)
1267 			goto out_backup;
1268 
1269 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1270 		if (ret)
1271 			goto out_backup;
1272 
1273 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1274 					backup->ttm.resource, false);
1275 		if (IS_ERR(fence)) {
1276 			ret = PTR_ERR(fence);
1277 			goto out_backup;
1278 		}
1279 
1280 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1281 				   DMA_RESV_USAGE_KERNEL);
1282 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1283 				   DMA_RESV_USAGE_KERNEL);
1284 		dma_fence_put(fence);
1285 	} else {
1286 		ret = xe_bo_vmap(backup);
1287 		if (ret)
1288 			goto out_backup;
1289 
1290 		if (iosys_map_is_null(&bo->vmap)) {
1291 			ret = xe_bo_vmap(bo);
1292 			if (ret)
1293 				goto out_backup;
1294 			unmap = true;
1295 		}
1296 
1297 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1298 				   xe_bo_size(bo));
1299 	}
1300 
1301 	if (!bo->backup_obj)
1302 		bo->backup_obj = backup;
1303 
1304 out_backup:
1305 	xe_bo_vunmap(backup);
1306 	if (ret && backup_created)
1307 		xe_bo_put(backup);
1308 out_unlock_bo:
1309 	if (unmap)
1310 		xe_bo_vunmap(bo);
1311 	xe_bo_unlock(bo);
1312 	return ret;
1313 }
1314 
1315 /**
1316  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1317  * @bo: The buffer object to move.
1318  *
1319  * On successful completion, the object memory will be moved back to VRAM.
1320  *
1321  * This is needed to for special handling of pinned VRAM object during
1322  * suspend-resume.
1323  *
1324  * Return: 0 on success. Negative error code on failure.
1325  */
1326 int xe_bo_restore_pinned(struct xe_bo *bo)
1327 {
1328 	struct ttm_operation_ctx ctx = {
1329 		.interruptible = false,
1330 		.gfp_retry_mayfail = false,
1331 	};
1332 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1333 	struct xe_bo *backup = bo->backup_obj;
1334 	bool unmap = false;
1335 	int ret;
1336 
1337 	if (!backup)
1338 		return 0;
1339 
1340 	xe_bo_lock(bo, false);
1341 
1342 	if (!xe_bo_is_pinned(backup)) {
1343 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1344 		if (ret)
1345 			goto out_unlock_bo;
1346 	}
1347 
1348 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1349 		struct xe_migrate *migrate;
1350 		struct dma_fence *fence;
1351 
1352 		if (bo->tile)
1353 			migrate = bo->tile->migrate;
1354 		else
1355 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1356 
1357 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1358 		if (ret)
1359 			goto out_unlock_bo;
1360 
1361 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1362 		if (ret)
1363 			goto out_unlock_bo;
1364 
1365 		fence = xe_migrate_copy(migrate, backup, bo,
1366 					backup->ttm.resource, bo->ttm.resource,
1367 					false);
1368 		if (IS_ERR(fence)) {
1369 			ret = PTR_ERR(fence);
1370 			goto out_unlock_bo;
1371 		}
1372 
1373 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1374 				   DMA_RESV_USAGE_KERNEL);
1375 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1376 				   DMA_RESV_USAGE_KERNEL);
1377 		dma_fence_put(fence);
1378 	} else {
1379 		ret = xe_bo_vmap(backup);
1380 		if (ret)
1381 			goto out_unlock_bo;
1382 
1383 		if (iosys_map_is_null(&bo->vmap)) {
1384 			ret = xe_bo_vmap(bo);
1385 			if (ret)
1386 				goto out_backup;
1387 			unmap = true;
1388 		}
1389 
1390 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1391 				 xe_bo_size(bo));
1392 	}
1393 
1394 	bo->backup_obj = NULL;
1395 
1396 out_backup:
1397 	xe_bo_vunmap(backup);
1398 	if (!bo->backup_obj) {
1399 		if (xe_bo_is_pinned(backup))
1400 			ttm_bo_unpin(&backup->ttm);
1401 		xe_bo_put(backup);
1402 	}
1403 out_unlock_bo:
1404 	if (unmap)
1405 		xe_bo_vunmap(bo);
1406 	xe_bo_unlock(bo);
1407 	return ret;
1408 }
1409 
1410 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1411 {
1412 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1413 	struct ttm_tt *tt = ttm_bo->ttm;
1414 
1415 	if (tt) {
1416 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1417 
1418 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1419 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1420 						 ttm_bo->sg,
1421 						 DMA_BIDIRECTIONAL);
1422 			ttm_bo->sg = NULL;
1423 			xe_tt->sg = NULL;
1424 		} else if (xe_tt->sg) {
1425 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1426 					  xe_tt->sg,
1427 					  DMA_BIDIRECTIONAL, 0);
1428 			sg_free_table(xe_tt->sg);
1429 			xe_tt->sg = NULL;
1430 		}
1431 	}
1432 
1433 	return 0;
1434 }
1435 
1436 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1437 				       unsigned long page_offset)
1438 {
1439 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1440 	struct xe_res_cursor cursor;
1441 	struct xe_vram_region *vram;
1442 
1443 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1444 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1445 
1446 	vram = res_to_mem_region(ttm_bo->resource);
1447 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1448 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1449 }
1450 
1451 static void __xe_bo_vunmap(struct xe_bo *bo);
1452 
1453 /*
1454  * TODO: Move this function to TTM so we don't rely on how TTM does its
1455  * locking, thereby abusing TTM internals.
1456  */
1457 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1458 {
1459 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1460 	bool locked;
1461 
1462 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1463 
1464 	/*
1465 	 * We can typically only race with TTM trylocking under the
1466 	 * lru_lock, which will immediately be unlocked again since
1467 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1468 	 * always succeed here, as long as we hold the lru lock.
1469 	 */
1470 	spin_lock(&ttm_bo->bdev->lru_lock);
1471 	locked = dma_resv_trylock(ttm_bo->base.resv);
1472 	spin_unlock(&ttm_bo->bdev->lru_lock);
1473 	xe_assert(xe, locked);
1474 
1475 	return locked;
1476 }
1477 
1478 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1479 {
1480 	struct dma_resv_iter cursor;
1481 	struct dma_fence *fence;
1482 	struct dma_fence *replacement = NULL;
1483 	struct xe_bo *bo;
1484 
1485 	if (!xe_bo_is_xe_bo(ttm_bo))
1486 		return;
1487 
1488 	bo = ttm_to_xe_bo(ttm_bo);
1489 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1490 
1491 	/*
1492 	 * Corner case where TTM fails to allocate memory and this BOs resv
1493 	 * still points the VMs resv
1494 	 */
1495 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1496 		return;
1497 
1498 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1499 		return;
1500 
1501 	/*
1502 	 * Scrub the preempt fences if any. The unbind fence is already
1503 	 * attached to the resv.
1504 	 * TODO: Don't do this for external bos once we scrub them after
1505 	 * unbind.
1506 	 */
1507 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1508 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1509 		if (xe_fence_is_xe_preempt(fence) &&
1510 		    !dma_fence_is_signaled(fence)) {
1511 			if (!replacement)
1512 				replacement = dma_fence_get_stub();
1513 
1514 			dma_resv_replace_fences(ttm_bo->base.resv,
1515 						fence->context,
1516 						replacement,
1517 						DMA_RESV_USAGE_BOOKKEEP);
1518 		}
1519 	}
1520 	dma_fence_put(replacement);
1521 
1522 	dma_resv_unlock(ttm_bo->base.resv);
1523 }
1524 
1525 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1526 {
1527 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1528 
1529 	if (!xe_bo_is_xe_bo(ttm_bo))
1530 		return;
1531 
1532 	if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo))
1533 		xe_sriov_vf_ccs_detach_bo(bo);
1534 
1535 	/*
1536 	 * Object is idle and about to be destroyed. Release the
1537 	 * dma-buf attachment.
1538 	 */
1539 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1540 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1541 						       struct xe_ttm_tt, ttm);
1542 
1543 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1544 					 DMA_BIDIRECTIONAL);
1545 		ttm_bo->sg = NULL;
1546 		xe_tt->sg = NULL;
1547 	}
1548 }
1549 
1550 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1551 {
1552 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1553 
1554 	if (ttm_bo->ttm) {
1555 		struct ttm_placement place = {};
1556 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1557 
1558 		drm_WARN_ON(&xe->drm, ret);
1559 	}
1560 }
1561 
1562 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1563 {
1564 	struct ttm_operation_ctx ctx = {
1565 		.interruptible = false,
1566 		.gfp_retry_mayfail = false,
1567 	};
1568 
1569 	if (ttm_bo->ttm) {
1570 		struct xe_ttm_tt *xe_tt =
1571 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1572 
1573 		if (xe_tt->purgeable)
1574 			xe_ttm_bo_purge(ttm_bo, &ctx);
1575 	}
1576 }
1577 
1578 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1579 				unsigned long offset, void *buf, int len,
1580 				int write)
1581 {
1582 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1583 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1584 	struct iosys_map vmap;
1585 	struct xe_res_cursor cursor;
1586 	struct xe_vram_region *vram;
1587 	int bytes_left = len;
1588 	int err = 0;
1589 
1590 	xe_bo_assert_held(bo);
1591 	xe_device_assert_mem_access(xe);
1592 
1593 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1594 		return -EIO;
1595 
1596 	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1597 		struct xe_migrate *migrate =
1598 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1599 
1600 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1601 					       write);
1602 		goto out;
1603 	}
1604 
1605 	vram = res_to_mem_region(ttm_bo->resource);
1606 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1607 		     xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1608 
1609 	do {
1610 		unsigned long page_offset = (offset & ~PAGE_MASK);
1611 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1612 
1613 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1614 					  cursor.start);
1615 		if (write)
1616 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1617 		else
1618 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1619 
1620 		buf += byte_count;
1621 		offset += byte_count;
1622 		bytes_left -= byte_count;
1623 		if (bytes_left)
1624 			xe_res_next(&cursor, PAGE_SIZE);
1625 	} while (bytes_left);
1626 
1627 out:
1628 	return err ?: len;
1629 }
1630 
1631 const struct ttm_device_funcs xe_ttm_funcs = {
1632 	.ttm_tt_create = xe_ttm_tt_create,
1633 	.ttm_tt_populate = xe_ttm_tt_populate,
1634 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1635 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1636 	.evict_flags = xe_evict_flags,
1637 	.move = xe_bo_move,
1638 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1639 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1640 	.access_memory = xe_ttm_access_memory,
1641 	.release_notify = xe_ttm_bo_release_notify,
1642 	.eviction_valuable = xe_bo_eviction_valuable,
1643 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1644 	.swap_notify = xe_ttm_bo_swap_notify,
1645 };
1646 
1647 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1648 {
1649 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1650 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1651 	struct xe_tile *tile;
1652 	u8 id;
1653 
1654 	if (bo->ttm.base.import_attach)
1655 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1656 	drm_gem_object_release(&bo->ttm.base);
1657 
1658 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1659 
1660 	for_each_tile(tile, xe, id)
1661 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1662 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1663 
1664 #ifdef CONFIG_PROC_FS
1665 	if (bo->client)
1666 		xe_drm_client_remove_bo(bo);
1667 #endif
1668 
1669 	if (bo->vm && xe_bo_is_user(bo))
1670 		xe_vm_put(bo->vm);
1671 
1672 	if (bo->parent_obj)
1673 		xe_bo_put(bo->parent_obj);
1674 
1675 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1676 	if (!list_empty(&bo->vram_userfault_link))
1677 		list_del(&bo->vram_userfault_link);
1678 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1679 
1680 	kfree(bo);
1681 }
1682 
1683 static void xe_gem_object_free(struct drm_gem_object *obj)
1684 {
1685 	/* Our BO reference counting scheme works as follows:
1686 	 *
1687 	 * The gem object kref is typically used throughout the driver,
1688 	 * and the gem object holds a ttm_buffer_object refcount, so
1689 	 * that when the last gem object reference is put, which is when
1690 	 * we end up in this function, we put also that ttm_buffer_object
1691 	 * refcount. Anything using gem interfaces is then no longer
1692 	 * allowed to access the object in a way that requires a gem
1693 	 * refcount, including locking the object.
1694 	 *
1695 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1696 	 * refcount directly if needed.
1697 	 */
1698 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1699 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1700 }
1701 
1702 static void xe_gem_object_close(struct drm_gem_object *obj,
1703 				struct drm_file *file_priv)
1704 {
1705 	struct xe_bo *bo = gem_to_xe_bo(obj);
1706 
1707 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1708 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1709 
1710 		xe_bo_lock(bo, false);
1711 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1712 		xe_bo_unlock(bo);
1713 	}
1714 }
1715 
1716 static bool should_migrate_to_smem(struct xe_bo *bo)
1717 {
1718 	/*
1719 	 * NOTE: The following atomic checks are platform-specific. For example,
1720 	 * if a device supports CXL atomics, these may not be necessary or
1721 	 * may behave differently.
1722 	 */
1723 
1724 	return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1725 	       bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1726 }
1727 
1728 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1729 {
1730 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1731 	struct drm_device *ddev = tbo->base.dev;
1732 	struct xe_device *xe = to_xe_device(ddev);
1733 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1734 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1735 	vm_fault_t ret;
1736 	int idx, r = 0;
1737 
1738 	if (needs_rpm)
1739 		xe_pm_runtime_get(xe);
1740 
1741 	ret = ttm_bo_vm_reserve(tbo, vmf);
1742 	if (ret)
1743 		goto out;
1744 
1745 	if (drm_dev_enter(ddev, &idx)) {
1746 		trace_xe_bo_cpu_fault(bo);
1747 
1748 		if (should_migrate_to_smem(bo)) {
1749 			xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM);
1750 
1751 			r = xe_bo_migrate(bo, XE_PL_TT);
1752 			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1753 				ret = VM_FAULT_NOPAGE;
1754 			else if (r)
1755 				ret = VM_FAULT_SIGBUS;
1756 		}
1757 		if (!ret)
1758 			ret = ttm_bo_vm_fault_reserved(vmf,
1759 						       vmf->vma->vm_page_prot,
1760 						       TTM_BO_VM_NUM_PREFAULT);
1761 		drm_dev_exit(idx);
1762 
1763 		if (ret == VM_FAULT_RETRY &&
1764 		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1765 			goto out;
1766 
1767 		/*
1768 		 * ttm_bo_vm_reserve() already has dma_resv_lock.
1769 		 */
1770 		if (ret == VM_FAULT_NOPAGE &&
1771 		    mem_type_is_vram(tbo->resource->mem_type)) {
1772 			mutex_lock(&xe->mem_access.vram_userfault.lock);
1773 			if (list_empty(&bo->vram_userfault_link))
1774 				list_add(&bo->vram_userfault_link,
1775 					 &xe->mem_access.vram_userfault.list);
1776 			mutex_unlock(&xe->mem_access.vram_userfault.lock);
1777 		}
1778 	} else {
1779 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1780 	}
1781 
1782 	dma_resv_unlock(tbo->base.resv);
1783 out:
1784 	if (needs_rpm)
1785 		xe_pm_runtime_put(xe);
1786 
1787 	return ret;
1788 }
1789 
1790 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1791 			   void *buf, int len, int write)
1792 {
1793 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1794 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1795 	struct xe_device *xe = xe_bo_device(bo);
1796 	int ret;
1797 
1798 	xe_pm_runtime_get(xe);
1799 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1800 	xe_pm_runtime_put(xe);
1801 
1802 	return ret;
1803 }
1804 
1805 /**
1806  * xe_bo_read() - Read from an xe_bo
1807  * @bo: The buffer object to read from.
1808  * @offset: The byte offset to start reading from.
1809  * @dst: Location to store the read.
1810  * @size: Size in bytes for the read.
1811  *
1812  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1813  *
1814  * Return: Zero on success, or negative error.
1815  */
1816 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1817 {
1818 	int ret;
1819 
1820 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1821 	if (ret >= 0 && ret != size)
1822 		ret = -EIO;
1823 	else if (ret == size)
1824 		ret = 0;
1825 
1826 	return ret;
1827 }
1828 
1829 static const struct vm_operations_struct xe_gem_vm_ops = {
1830 	.fault = xe_gem_fault,
1831 	.open = ttm_bo_vm_open,
1832 	.close = ttm_bo_vm_close,
1833 	.access = xe_bo_vm_access,
1834 };
1835 
1836 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1837 	.free = xe_gem_object_free,
1838 	.close = xe_gem_object_close,
1839 	.mmap = drm_gem_ttm_mmap,
1840 	.export = xe_gem_prime_export,
1841 	.vm_ops = &xe_gem_vm_ops,
1842 };
1843 
1844 /**
1845  * xe_bo_alloc - Allocate storage for a struct xe_bo
1846  *
1847  * This function is intended to allocate storage to be used for input
1848  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1849  * created is needed before the call to __xe_bo_create_locked().
1850  * If __xe_bo_create_locked ends up never to be called, then the
1851  * storage allocated with this function needs to be freed using
1852  * xe_bo_free().
1853  *
1854  * Return: A pointer to an uninitialized struct xe_bo on success,
1855  * ERR_PTR(-ENOMEM) on error.
1856  */
1857 struct xe_bo *xe_bo_alloc(void)
1858 {
1859 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1860 
1861 	if (!bo)
1862 		return ERR_PTR(-ENOMEM);
1863 
1864 	return bo;
1865 }
1866 
1867 /**
1868  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1869  * @bo: The buffer object storage.
1870  *
1871  * Refer to xe_bo_alloc() documentation for valid use-cases.
1872  */
1873 void xe_bo_free(struct xe_bo *bo)
1874 {
1875 	kfree(bo);
1876 }
1877 
1878 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1879 				     struct xe_tile *tile, struct dma_resv *resv,
1880 				     struct ttm_lru_bulk_move *bulk, size_t size,
1881 				     u16 cpu_caching, enum ttm_bo_type type,
1882 				     u32 flags)
1883 {
1884 	struct ttm_operation_ctx ctx = {
1885 		.interruptible = true,
1886 		.no_wait_gpu = false,
1887 		.gfp_retry_mayfail = true,
1888 	};
1889 	struct ttm_placement *placement;
1890 	uint32_t alignment;
1891 	size_t aligned_size;
1892 	int err;
1893 
1894 	/* Only kernel objects should set GT */
1895 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1896 
1897 	if (XE_WARN_ON(!size)) {
1898 		xe_bo_free(bo);
1899 		return ERR_PTR(-EINVAL);
1900 	}
1901 
1902 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1903 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1904 		return ERR_PTR(-EINVAL);
1905 
1906 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1907 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1908 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1909 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1910 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1911 
1912 		aligned_size = ALIGN(size, align);
1913 		if (type != ttm_bo_type_device)
1914 			size = ALIGN(size, align);
1915 		flags |= XE_BO_FLAG_INTERNAL_64K;
1916 		alignment = align >> PAGE_SHIFT;
1917 	} else {
1918 		aligned_size = ALIGN(size, SZ_4K);
1919 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1920 		alignment = SZ_4K >> PAGE_SHIFT;
1921 	}
1922 
1923 	if (type == ttm_bo_type_device && aligned_size != size)
1924 		return ERR_PTR(-EINVAL);
1925 
1926 	if (!bo) {
1927 		bo = xe_bo_alloc();
1928 		if (IS_ERR(bo))
1929 			return bo;
1930 	}
1931 
1932 	bo->ccs_cleared = false;
1933 	bo->tile = tile;
1934 	bo->flags = flags;
1935 	bo->cpu_caching = cpu_caching;
1936 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1937 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1938 	INIT_LIST_HEAD(&bo->pinned_link);
1939 #ifdef CONFIG_PROC_FS
1940 	INIT_LIST_HEAD(&bo->client_link);
1941 #endif
1942 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1943 
1944 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1945 
1946 	if (resv) {
1947 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1948 		ctx.resv = resv;
1949 	}
1950 
1951 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1952 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1953 		if (WARN_ON(err)) {
1954 			xe_ttm_bo_destroy(&bo->ttm);
1955 			return ERR_PTR(err);
1956 		}
1957 	}
1958 
1959 	/* Defer populating type_sg bos */
1960 	placement = (type == ttm_bo_type_sg ||
1961 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1962 		&bo->placement;
1963 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1964 				   placement, alignment,
1965 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1966 	if (err)
1967 		return ERR_PTR(err);
1968 
1969 	/*
1970 	 * The VRAM pages underneath are potentially still being accessed by the
1971 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1972 	 * sure to add any corresponding move/clear fences into the objects
1973 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1974 	 *
1975 	 * For KMD internal buffers we don't care about GPU clearing, however we
1976 	 * still need to handle async evictions, where the VRAM is still being
1977 	 * accessed by the GPU. Most internal callers are not expecting this,
1978 	 * since they are missing the required synchronisation before accessing
1979 	 * the memory. To keep things simple just sync wait any kernel fences
1980 	 * here, if the buffer is designated KMD internal.
1981 	 *
1982 	 * For normal userspace objects we should already have the required
1983 	 * pipelining or sync waiting elsewhere, since we already have to deal
1984 	 * with things like async GPU clearing.
1985 	 */
1986 	if (type == ttm_bo_type_kernel) {
1987 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1988 						     DMA_RESV_USAGE_KERNEL,
1989 						     ctx.interruptible,
1990 						     MAX_SCHEDULE_TIMEOUT);
1991 
1992 		if (timeout < 0) {
1993 			if (!resv)
1994 				dma_resv_unlock(bo->ttm.base.resv);
1995 			xe_bo_put(bo);
1996 			return ERR_PTR(timeout);
1997 		}
1998 	}
1999 
2000 	bo->created = true;
2001 	if (bulk)
2002 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
2003 	else
2004 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2005 
2006 	return bo;
2007 }
2008 
2009 static int __xe_bo_fixed_placement(struct xe_device *xe,
2010 				   struct xe_bo *bo,
2011 				   u32 flags,
2012 				   u64 start, u64 end, u64 size)
2013 {
2014 	struct ttm_place *place = bo->placements;
2015 
2016 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2017 		return -EINVAL;
2018 
2019 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
2020 	place->fpfn = start >> PAGE_SHIFT;
2021 	place->lpfn = end >> PAGE_SHIFT;
2022 
2023 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
2024 	case XE_BO_FLAG_VRAM0:
2025 		place->mem_type = XE_PL_VRAM0;
2026 		break;
2027 	case XE_BO_FLAG_VRAM1:
2028 		place->mem_type = XE_PL_VRAM1;
2029 		break;
2030 	case XE_BO_FLAG_STOLEN:
2031 		place->mem_type = XE_PL_STOLEN;
2032 		break;
2033 
2034 	default:
2035 		/* 0 or multiple of the above set */
2036 		return -EINVAL;
2037 	}
2038 
2039 	bo->placement = (struct ttm_placement) {
2040 		.num_placement = 1,
2041 		.placement = place,
2042 	};
2043 
2044 	return 0;
2045 }
2046 
2047 static struct xe_bo *
2048 __xe_bo_create_locked(struct xe_device *xe,
2049 		      struct xe_tile *tile, struct xe_vm *vm,
2050 		      size_t size, u64 start, u64 end,
2051 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2052 		      u64 alignment)
2053 {
2054 	struct xe_bo *bo = NULL;
2055 	int err;
2056 
2057 	if (vm)
2058 		xe_vm_assert_held(vm);
2059 
2060 	if (start || end != ~0ULL) {
2061 		bo = xe_bo_alloc();
2062 		if (IS_ERR(bo))
2063 			return bo;
2064 
2065 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2066 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
2067 		if (err) {
2068 			xe_bo_free(bo);
2069 			return ERR_PTR(err);
2070 		}
2071 	}
2072 
2073 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2074 				    vm && !xe_vm_in_fault_mode(vm) &&
2075 				    flags & XE_BO_FLAG_USER ?
2076 				    &vm->lru_bulk_move : NULL, size,
2077 				    cpu_caching, type, flags);
2078 	if (IS_ERR(bo))
2079 		return bo;
2080 
2081 	bo->min_align = alignment;
2082 
2083 	/*
2084 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2085 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2086 	 * will keep a reference to the vm, and avoid circular references
2087 	 * by having all the vm's bo refereferences released at vm close
2088 	 * time.
2089 	 */
2090 	if (vm && xe_bo_is_user(bo))
2091 		xe_vm_get(vm);
2092 	bo->vm = vm;
2093 
2094 	if (bo->flags & XE_BO_FLAG_GGTT) {
2095 		struct xe_tile *t;
2096 		u8 id;
2097 
2098 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2099 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2100 				tile = xe_device_get_root_tile(xe);
2101 
2102 			xe_assert(xe, tile);
2103 		}
2104 
2105 		for_each_tile(t, xe, id) {
2106 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2107 				continue;
2108 
2109 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2110 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2111 							   start + xe_bo_size(bo), U64_MAX);
2112 			} else {
2113 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
2114 			}
2115 			if (err)
2116 				goto err_unlock_put_bo;
2117 		}
2118 	}
2119 
2120 	trace_xe_bo_create(bo);
2121 	return bo;
2122 
2123 err_unlock_put_bo:
2124 	__xe_bo_unset_bulk_move(bo);
2125 	xe_bo_unlock_vm_held(bo);
2126 	xe_bo_put(bo);
2127 	return ERR_PTR(err);
2128 }
2129 
2130 struct xe_bo *
2131 xe_bo_create_locked_range(struct xe_device *xe,
2132 			  struct xe_tile *tile, struct xe_vm *vm,
2133 			  size_t size, u64 start, u64 end,
2134 			  enum ttm_bo_type type, u32 flags, u64 alignment)
2135 {
2136 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2137 				     flags, alignment);
2138 }
2139 
2140 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2141 				  struct xe_vm *vm, size_t size,
2142 				  enum ttm_bo_type type, u32 flags)
2143 {
2144 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2145 				     flags, 0);
2146 }
2147 
2148 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
2149 				struct xe_vm *vm, size_t size,
2150 				u16 cpu_caching,
2151 				u32 flags)
2152 {
2153 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
2154 						 cpu_caching, ttm_bo_type_device,
2155 						 flags | XE_BO_FLAG_USER, 0);
2156 	if (!IS_ERR(bo))
2157 		xe_bo_unlock_vm_held(bo);
2158 
2159 	return bo;
2160 }
2161 
2162 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
2163 			   struct xe_vm *vm, size_t size,
2164 			   enum ttm_bo_type type, u32 flags)
2165 {
2166 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
2167 
2168 	if (!IS_ERR(bo))
2169 		xe_bo_unlock_vm_held(bo);
2170 
2171 	return bo;
2172 }
2173 
2174 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
2175 				      struct xe_vm *vm,
2176 				      size_t size, u64 offset,
2177 				      enum ttm_bo_type type, u32 flags)
2178 {
2179 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
2180 					       type, flags, 0);
2181 }
2182 
2183 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2184 					      struct xe_tile *tile,
2185 					      struct xe_vm *vm,
2186 					      size_t size, u64 offset,
2187 					      enum ttm_bo_type type, u32 flags,
2188 					      u64 alignment)
2189 {
2190 	struct xe_bo *bo;
2191 	int err;
2192 	u64 start = offset == ~0ull ? 0 : offset;
2193 	u64 end = offset == ~0ull ? offset : start + size;
2194 
2195 	if (flags & XE_BO_FLAG_STOLEN &&
2196 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2197 		flags |= XE_BO_FLAG_GGTT;
2198 
2199 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
2200 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2201 				       alignment);
2202 	if (IS_ERR(bo))
2203 		return bo;
2204 
2205 	err = xe_bo_pin(bo);
2206 	if (err)
2207 		goto err_put;
2208 
2209 	err = xe_bo_vmap(bo);
2210 	if (err)
2211 		goto err_unpin;
2212 
2213 	xe_bo_unlock_vm_held(bo);
2214 
2215 	return bo;
2216 
2217 err_unpin:
2218 	xe_bo_unpin(bo);
2219 err_put:
2220 	xe_bo_unlock_vm_held(bo);
2221 	xe_bo_put(bo);
2222 	return ERR_PTR(err);
2223 }
2224 
2225 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2226 				   struct xe_vm *vm, size_t size,
2227 				   enum ttm_bo_type type, u32 flags)
2228 {
2229 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
2230 }
2231 
2232 static void __xe_bo_unpin_map_no_vm(void *arg)
2233 {
2234 	xe_bo_unpin_map_no_vm(arg);
2235 }
2236 
2237 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2238 					   size_t size, u32 flags)
2239 {
2240 	struct xe_bo *bo;
2241 	int ret;
2242 
2243 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2244 
2245 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
2246 	if (IS_ERR(bo))
2247 		return bo;
2248 
2249 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2250 	if (ret)
2251 		return ERR_PTR(ret);
2252 
2253 	return bo;
2254 }
2255 
2256 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2257 					     const void *data, size_t size, u32 flags)
2258 {
2259 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2260 
2261 	if (IS_ERR(bo))
2262 		return bo;
2263 
2264 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2265 
2266 	return bo;
2267 }
2268 
2269 /**
2270  * xe_managed_bo_reinit_in_vram
2271  * @xe: xe device
2272  * @tile: Tile where the new buffer will be created
2273  * @src: Managed buffer object allocated in system memory
2274  *
2275  * Replace a managed src buffer object allocated in system memory with a new
2276  * one allocated in vram, copying the data between them.
2277  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2278  * is responsible for making sure that any old references to it are updated.
2279  *
2280  * Returns 0 for success, negative error code otherwise.
2281  */
2282 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2283 {
2284 	struct xe_bo *bo;
2285 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2286 
2287 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2288 				      XE_BO_FLAG_PINNED_NORESTORE);
2289 
2290 	xe_assert(xe, IS_DGFX(xe));
2291 	xe_assert(xe, !(*src)->vmap.is_iomem);
2292 
2293 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2294 					    xe_bo_size(*src), dst_flags);
2295 	if (IS_ERR(bo))
2296 		return PTR_ERR(bo);
2297 
2298 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2299 	*src = bo;
2300 
2301 	return 0;
2302 }
2303 
2304 /*
2305  * XXX: This is in the VM bind data path, likely should calculate this once and
2306  * store, with a recalculation if the BO is moved.
2307  */
2308 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2309 {
2310 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2311 
2312 	switch (res->mem_type) {
2313 	case XE_PL_STOLEN:
2314 		return xe_ttm_stolen_gpu_offset(xe);
2315 	case XE_PL_TT:
2316 	case XE_PL_SYSTEM:
2317 		return 0;
2318 	default:
2319 		return res_to_mem_region(res)->dpa_base;
2320 	}
2321 	return 0;
2322 }
2323 
2324 /**
2325  * xe_bo_pin_external - pin an external BO
2326  * @bo: buffer object to be pinned
2327  * @in_place: Pin in current placement, don't attempt to migrate.
2328  *
2329  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2330  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2331  * asserts and code to ensure evict / restore on suspend / resume.
2332  *
2333  * Returns 0 for success, negative error code otherwise.
2334  */
2335 int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
2336 {
2337 	struct xe_device *xe = xe_bo_device(bo);
2338 	int err;
2339 
2340 	xe_assert(xe, !bo->vm);
2341 	xe_assert(xe, xe_bo_is_user(bo));
2342 
2343 	if (!xe_bo_is_pinned(bo)) {
2344 		if (!in_place) {
2345 			err = xe_bo_validate(bo, NULL, false);
2346 			if (err)
2347 				return err;
2348 		}
2349 
2350 		spin_lock(&xe->pinned.lock);
2351 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2352 		spin_unlock(&xe->pinned.lock);
2353 	}
2354 
2355 	ttm_bo_pin(&bo->ttm);
2356 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2357 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2358 
2359 	/*
2360 	 * FIXME: If we always use the reserve / unreserve functions for locking
2361 	 * we do not need this.
2362 	 */
2363 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2364 
2365 	return 0;
2366 }
2367 
2368 int xe_bo_pin(struct xe_bo *bo)
2369 {
2370 	struct ttm_place *place = &bo->placements[0];
2371 	struct xe_device *xe = xe_bo_device(bo);
2372 	int err;
2373 
2374 	/* We currently don't expect user BO to be pinned */
2375 	xe_assert(xe, !xe_bo_is_user(bo));
2376 
2377 	/* Pinned object must be in GGTT or have pinned flag */
2378 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2379 				   XE_BO_FLAG_GGTT));
2380 
2381 	/*
2382 	 * No reason we can't support pinning imported dma-bufs we just don't
2383 	 * expect to pin an imported dma-buf.
2384 	 */
2385 	xe_assert(xe, !bo->ttm.base.import_attach);
2386 
2387 	/* We only expect at most 1 pin */
2388 	xe_assert(xe, !xe_bo_is_pinned(bo));
2389 
2390 	err = xe_bo_validate(bo, NULL, false);
2391 	if (err)
2392 		return err;
2393 
2394 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2395 		spin_lock(&xe->pinned.lock);
2396 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
2397 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
2398 		else
2399 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
2400 		spin_unlock(&xe->pinned.lock);
2401 	}
2402 
2403 	ttm_bo_pin(&bo->ttm);
2404 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2405 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2406 
2407 	/*
2408 	 * FIXME: If we always use the reserve / unreserve functions for locking
2409 	 * we do not need this.
2410 	 */
2411 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2412 
2413 	return 0;
2414 }
2415 
2416 /**
2417  * xe_bo_unpin_external - unpin an external BO
2418  * @bo: buffer object to be unpinned
2419  *
2420  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2421  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2422  * asserts and code to ensure evict / restore on suspend / resume.
2423  *
2424  * Returns 0 for success, negative error code otherwise.
2425  */
2426 void xe_bo_unpin_external(struct xe_bo *bo)
2427 {
2428 	struct xe_device *xe = xe_bo_device(bo);
2429 
2430 	xe_assert(xe, !bo->vm);
2431 	xe_assert(xe, xe_bo_is_pinned(bo));
2432 	xe_assert(xe, xe_bo_is_user(bo));
2433 
2434 	spin_lock(&xe->pinned.lock);
2435 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2436 		list_del_init(&bo->pinned_link);
2437 	spin_unlock(&xe->pinned.lock);
2438 
2439 	ttm_bo_unpin(&bo->ttm);
2440 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2441 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2442 
2443 	/*
2444 	 * FIXME: If we always use the reserve / unreserve functions for locking
2445 	 * we do not need this.
2446 	 */
2447 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2448 }
2449 
2450 void xe_bo_unpin(struct xe_bo *bo)
2451 {
2452 	struct ttm_place *place = &bo->placements[0];
2453 	struct xe_device *xe = xe_bo_device(bo);
2454 
2455 	xe_assert(xe, !bo->ttm.base.import_attach);
2456 	xe_assert(xe, xe_bo_is_pinned(bo));
2457 
2458 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2459 		spin_lock(&xe->pinned.lock);
2460 		xe_assert(xe, !list_empty(&bo->pinned_link));
2461 		list_del_init(&bo->pinned_link);
2462 		spin_unlock(&xe->pinned.lock);
2463 
2464 		if (bo->backup_obj) {
2465 			if (xe_bo_is_pinned(bo->backup_obj))
2466 				ttm_bo_unpin(&bo->backup_obj->ttm);
2467 			xe_bo_put(bo->backup_obj);
2468 			bo->backup_obj = NULL;
2469 		}
2470 	}
2471 	ttm_bo_unpin(&bo->ttm);
2472 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2473 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2474 }
2475 
2476 /**
2477  * xe_bo_validate() - Make sure the bo is in an allowed placement
2478  * @bo: The bo,
2479  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2480  *      NULL. Used together with @allow_res_evict.
2481  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2482  *                   reservation object.
2483  *
2484  * Make sure the bo is in allowed placement, migrating it if necessary. If
2485  * needed, other bos will be evicted. If bos selected for eviction shares
2486  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2487  * set to true, otherwise they will be bypassed.
2488  *
2489  * Return: 0 on success, negative error code on failure. May return
2490  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2491  */
2492 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2493 {
2494 	struct ttm_operation_ctx ctx = {
2495 		.interruptible = true,
2496 		.no_wait_gpu = false,
2497 		.gfp_retry_mayfail = true,
2498 	};
2499 	int ret;
2500 
2501 	if (xe_bo_is_pinned(bo))
2502 		return 0;
2503 
2504 	if (vm) {
2505 		lockdep_assert_held(&vm->lock);
2506 		xe_vm_assert_held(vm);
2507 
2508 		ctx.allow_res_evict = allow_res_evict;
2509 		ctx.resv = xe_vm_resv(vm);
2510 	}
2511 
2512 	xe_vm_set_validating(vm, allow_res_evict);
2513 	trace_xe_bo_validate(bo);
2514 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2515 	xe_vm_clear_validating(vm, allow_res_evict);
2516 
2517 	return ret;
2518 }
2519 
2520 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2521 {
2522 	if (bo->destroy == &xe_ttm_bo_destroy)
2523 		return true;
2524 
2525 	return false;
2526 }
2527 
2528 /*
2529  * Resolve a BO address. There is no assert to check if the proper lock is held
2530  * so it should only be used in cases where it is not fatal to get the wrong
2531  * address, such as printing debug information, but not in cases where memory is
2532  * written based on this result.
2533  */
2534 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2535 {
2536 	struct xe_device *xe = xe_bo_device(bo);
2537 	struct xe_res_cursor cur;
2538 	u64 page;
2539 
2540 	xe_assert(xe, page_size <= PAGE_SIZE);
2541 	page = offset >> PAGE_SHIFT;
2542 	offset &= (PAGE_SIZE - 1);
2543 
2544 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2545 		xe_assert(xe, bo->ttm.ttm);
2546 
2547 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2548 				page_size, &cur);
2549 		return xe_res_dma(&cur) + offset;
2550 	} else {
2551 		struct xe_res_cursor cur;
2552 
2553 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2554 			     page_size, &cur);
2555 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2556 	}
2557 }
2558 
2559 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2560 {
2561 	if (!READ_ONCE(bo->ttm.pin_count))
2562 		xe_bo_assert_held(bo);
2563 	return __xe_bo_addr(bo, offset, page_size);
2564 }
2565 
2566 int xe_bo_vmap(struct xe_bo *bo)
2567 {
2568 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2569 	void *virtual;
2570 	bool is_iomem;
2571 	int ret;
2572 
2573 	xe_bo_assert_held(bo);
2574 
2575 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2576 			!force_contiguous(bo->flags)))
2577 		return -EINVAL;
2578 
2579 	if (!iosys_map_is_null(&bo->vmap))
2580 		return 0;
2581 
2582 	/*
2583 	 * We use this more or less deprecated interface for now since
2584 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2585 	 * single page bos, which is done here.
2586 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2587 	 * to use struct iosys_map.
2588 	 */
2589 	ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
2590 	if (ret)
2591 		return ret;
2592 
2593 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2594 	if (is_iomem)
2595 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2596 	else
2597 		iosys_map_set_vaddr(&bo->vmap, virtual);
2598 
2599 	return 0;
2600 }
2601 
2602 static void __xe_bo_vunmap(struct xe_bo *bo)
2603 {
2604 	if (!iosys_map_is_null(&bo->vmap)) {
2605 		iosys_map_clear(&bo->vmap);
2606 		ttm_bo_kunmap(&bo->kmap);
2607 	}
2608 }
2609 
2610 void xe_bo_vunmap(struct xe_bo *bo)
2611 {
2612 	xe_bo_assert_held(bo);
2613 	__xe_bo_vunmap(bo);
2614 }
2615 
2616 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
2617 {
2618 	if (value == DRM_XE_PXP_TYPE_NONE)
2619 		return 0;
2620 
2621 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
2622 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
2623 		return -EINVAL;
2624 
2625 	return xe_pxp_key_assign(xe->pxp, bo);
2626 }
2627 
2628 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
2629 					     struct xe_bo *bo,
2630 					     u64 value);
2631 
2632 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
2633 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
2634 };
2635 
2636 static int gem_create_user_ext_set_property(struct xe_device *xe,
2637 					    struct xe_bo *bo,
2638 					    u64 extension)
2639 {
2640 	u64 __user *address = u64_to_user_ptr(extension);
2641 	struct drm_xe_ext_set_property ext;
2642 	int err;
2643 	u32 idx;
2644 
2645 	err = copy_from_user(&ext, address, sizeof(ext));
2646 	if (XE_IOCTL_DBG(xe, err))
2647 		return -EFAULT;
2648 
2649 	if (XE_IOCTL_DBG(xe, ext.property >=
2650 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
2651 	    XE_IOCTL_DBG(xe, ext.pad) ||
2652 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
2653 		return -EINVAL;
2654 
2655 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
2656 	if (!gem_create_set_property_funcs[idx])
2657 		return -EINVAL;
2658 
2659 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
2660 }
2661 
2662 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
2663 					       struct xe_bo *bo,
2664 					       u64 extension);
2665 
2666 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
2667 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
2668 };
2669 
2670 #define MAX_USER_EXTENSIONS	16
2671 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
2672 				      u64 extensions, int ext_number)
2673 {
2674 	u64 __user *address = u64_to_user_ptr(extensions);
2675 	struct drm_xe_user_extension ext;
2676 	int err;
2677 	u32 idx;
2678 
2679 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
2680 		return -E2BIG;
2681 
2682 	err = copy_from_user(&ext, address, sizeof(ext));
2683 	if (XE_IOCTL_DBG(xe, err))
2684 		return -EFAULT;
2685 
2686 	if (XE_IOCTL_DBG(xe, ext.pad) ||
2687 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
2688 		return -EINVAL;
2689 
2690 	idx = array_index_nospec(ext.name,
2691 				 ARRAY_SIZE(gem_create_user_extension_funcs));
2692 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
2693 	if (XE_IOCTL_DBG(xe, err))
2694 		return err;
2695 
2696 	if (ext.next_extension)
2697 		return gem_create_user_extensions(xe, bo, ext.next_extension,
2698 						  ++ext_number);
2699 
2700 	return 0;
2701 }
2702 
2703 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2704 			struct drm_file *file)
2705 {
2706 	struct xe_device *xe = to_xe_device(dev);
2707 	struct xe_file *xef = to_xe_file(file);
2708 	struct drm_xe_gem_create *args = data;
2709 	struct xe_vm *vm = NULL;
2710 	ktime_t end = 0;
2711 	struct xe_bo *bo;
2712 	unsigned int bo_flags;
2713 	u32 handle;
2714 	int err;
2715 
2716 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2717 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2718 		return -EINVAL;
2719 
2720 	/* at least one valid memory placement must be specified */
2721 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2722 			 !args->placement))
2723 		return -EINVAL;
2724 
2725 	if (XE_IOCTL_DBG(xe, args->flags &
2726 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2727 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2728 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2729 		return -EINVAL;
2730 
2731 	if (XE_IOCTL_DBG(xe, args->handle))
2732 		return -EINVAL;
2733 
2734 	if (XE_IOCTL_DBG(xe, !args->size))
2735 		return -EINVAL;
2736 
2737 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2738 		return -EINVAL;
2739 
2740 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2741 		return -EINVAL;
2742 
2743 	bo_flags = 0;
2744 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2745 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2746 
2747 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2748 		bo_flags |= XE_BO_FLAG_SCANOUT;
2749 
2750 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2751 
2752 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2753 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2754 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2755 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2756 	    IS_ALIGNED(args->size, SZ_64K))
2757 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2758 
2759 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2760 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2761 			return -EINVAL;
2762 
2763 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2764 	}
2765 
2766 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2767 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2768 		return -EINVAL;
2769 
2770 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2771 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2772 		return -EINVAL;
2773 
2774 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2775 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2776 		return -EINVAL;
2777 
2778 	if (args->vm_id) {
2779 		vm = xe_vm_lookup(xef, args->vm_id);
2780 		if (XE_IOCTL_DBG(xe, !vm))
2781 			return -ENOENT;
2782 	}
2783 
2784 retry:
2785 	if (vm) {
2786 		err = xe_vm_lock(vm, true);
2787 		if (err)
2788 			goto out_vm;
2789 	}
2790 
2791 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2792 			       bo_flags);
2793 
2794 	if (vm)
2795 		xe_vm_unlock(vm);
2796 
2797 	if (IS_ERR(bo)) {
2798 		err = PTR_ERR(bo);
2799 		if (xe_vm_validate_should_retry(NULL, err, &end))
2800 			goto retry;
2801 		goto out_vm;
2802 	}
2803 
2804 	if (args->extensions) {
2805 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
2806 		if (err)
2807 			goto out_bulk;
2808 	}
2809 
2810 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2811 	if (err)
2812 		goto out_bulk;
2813 
2814 	args->handle = handle;
2815 	goto out_put;
2816 
2817 out_bulk:
2818 	if (vm && !xe_vm_in_fault_mode(vm)) {
2819 		xe_vm_lock(vm, false);
2820 		__xe_bo_unset_bulk_move(bo);
2821 		xe_vm_unlock(vm);
2822 	}
2823 out_put:
2824 	xe_bo_put(bo);
2825 out_vm:
2826 	if (vm)
2827 		xe_vm_put(vm);
2828 
2829 	return err;
2830 }
2831 
2832 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2833 			     struct drm_file *file)
2834 {
2835 	struct xe_device *xe = to_xe_device(dev);
2836 	struct drm_xe_gem_mmap_offset *args = data;
2837 	struct drm_gem_object *gem_obj;
2838 
2839 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2840 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2841 		return -EINVAL;
2842 
2843 	if (XE_IOCTL_DBG(xe, args->flags &
2844 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
2845 		return -EINVAL;
2846 
2847 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2848 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2849 			return -EINVAL;
2850 
2851 		if (XE_IOCTL_DBG(xe, args->handle))
2852 			return -EINVAL;
2853 
2854 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2855 			return -EINVAL;
2856 
2857 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2858 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2859 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2860 		return 0;
2861 	}
2862 
2863 	gem_obj = drm_gem_object_lookup(file, args->handle);
2864 	if (XE_IOCTL_DBG(xe, !gem_obj))
2865 		return -ENOENT;
2866 
2867 	/* The mmap offset was set up at BO allocation time. */
2868 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2869 
2870 	xe_bo_put(gem_to_xe_bo(gem_obj));
2871 	return 0;
2872 }
2873 
2874 /**
2875  * xe_bo_lock() - Lock the buffer object's dma_resv object
2876  * @bo: The struct xe_bo whose lock is to be taken
2877  * @intr: Whether to perform any wait interruptible
2878  *
2879  * Locks the buffer object's dma_resv object. If the buffer object is
2880  * pointing to a shared dma_resv object, that shared lock is locked.
2881  *
2882  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2883  * contended lock was interrupted. If @intr is set to false, the
2884  * function always returns 0.
2885  */
2886 int xe_bo_lock(struct xe_bo *bo, bool intr)
2887 {
2888 	if (intr)
2889 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2890 
2891 	dma_resv_lock(bo->ttm.base.resv, NULL);
2892 
2893 	return 0;
2894 }
2895 
2896 /**
2897  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2898  * @bo: The struct xe_bo whose lock is to be released.
2899  *
2900  * Unlock a buffer object lock that was locked by xe_bo_lock().
2901  */
2902 void xe_bo_unlock(struct xe_bo *bo)
2903 {
2904 	dma_resv_unlock(bo->ttm.base.resv);
2905 }
2906 
2907 /**
2908  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2909  * @bo: The buffer object to migrate
2910  * @mem_type: The TTM memory type intended to migrate to
2911  *
2912  * Check whether the buffer object supports migration to the
2913  * given memory type. Note that pinning may affect the ability to migrate as
2914  * returned by this function.
2915  *
2916  * This function is primarily intended as a helper for checking the
2917  * possibility to migrate buffer objects and can be called without
2918  * the object lock held.
2919  *
2920  * Return: true if migration is possible, false otherwise.
2921  */
2922 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2923 {
2924 	unsigned int cur_place;
2925 
2926 	if (bo->ttm.type == ttm_bo_type_kernel)
2927 		return true;
2928 
2929 	if (bo->ttm.type == ttm_bo_type_sg)
2930 		return false;
2931 
2932 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2933 	     cur_place++) {
2934 		if (bo->placements[cur_place].mem_type == mem_type)
2935 			return true;
2936 	}
2937 
2938 	return false;
2939 }
2940 
2941 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2942 {
2943 	memset(place, 0, sizeof(*place));
2944 	place->mem_type = mem_type;
2945 }
2946 
2947 /**
2948  * xe_bo_migrate - Migrate an object to the desired region id
2949  * @bo: The buffer object to migrate.
2950  * @mem_type: The TTM region type to migrate to.
2951  *
2952  * Attempt to migrate the buffer object to the desired memory region. The
2953  * buffer object may not be pinned, and must be locked.
2954  * On successful completion, the object memory type will be updated,
2955  * but an async migration task may not have completed yet, and to
2956  * accomplish that, the object's kernel fences must be signaled with
2957  * the object lock held.
2958  *
2959  * Return: 0 on success. Negative error code on failure. In particular may
2960  * return -EINTR or -ERESTARTSYS if signal pending.
2961  */
2962 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2963 {
2964 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2965 	struct ttm_operation_ctx ctx = {
2966 		.interruptible = true,
2967 		.no_wait_gpu = false,
2968 		.gfp_retry_mayfail = true,
2969 	};
2970 	struct ttm_placement placement;
2971 	struct ttm_place requested;
2972 
2973 	xe_bo_assert_held(bo);
2974 
2975 	if (bo->ttm.resource->mem_type == mem_type)
2976 		return 0;
2977 
2978 	if (xe_bo_is_pinned(bo))
2979 		return -EBUSY;
2980 
2981 	if (!xe_bo_can_migrate(bo, mem_type))
2982 		return -EINVAL;
2983 
2984 	xe_place_from_ttm_type(mem_type, &requested);
2985 	placement.num_placement = 1;
2986 	placement.placement = &requested;
2987 
2988 	/*
2989 	 * Stolen needs to be handled like below VRAM handling if we ever need
2990 	 * to support it.
2991 	 */
2992 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2993 
2994 	if (mem_type_is_vram(mem_type)) {
2995 		u32 c = 0;
2996 
2997 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2998 	}
2999 
3000 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
3001 }
3002 
3003 /**
3004  * xe_bo_evict - Evict an object to evict placement
3005  * @bo: The buffer object to migrate.
3006  *
3007  * On successful completion, the object memory will be moved to evict
3008  * placement. This function blocks until the object has been fully moved.
3009  *
3010  * Return: 0 on success. Negative error code on failure.
3011  */
3012 int xe_bo_evict(struct xe_bo *bo)
3013 {
3014 	struct ttm_operation_ctx ctx = {
3015 		.interruptible = false,
3016 		.no_wait_gpu = false,
3017 		.gfp_retry_mayfail = true,
3018 	};
3019 	struct ttm_placement placement;
3020 	int ret;
3021 
3022 	xe_evict_flags(&bo->ttm, &placement);
3023 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3024 	if (ret)
3025 		return ret;
3026 
3027 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3028 			      false, MAX_SCHEDULE_TIMEOUT);
3029 
3030 	return 0;
3031 }
3032 
3033 /**
3034  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3035  * placed in system memory.
3036  * @bo: The xe_bo
3037  *
3038  * Return: true if extra pages need to be allocated, false otherwise.
3039  */
3040 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3041 {
3042 	struct xe_device *xe = xe_bo_device(bo);
3043 
3044 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3045 		return false;
3046 
3047 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3048 		return false;
3049 
3050 	/* On discrete GPUs, if the GPU can access this buffer from
3051 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3052 	 * can't be used since there's no CCS storage associated with
3053 	 * non-VRAM addresses.
3054 	 */
3055 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3056 		return false;
3057 
3058 	/*
3059 	 * Compression implies coh_none, therefore we know for sure that WB
3060 	 * memory can't currently use compression, which is likely one of the
3061 	 * common cases.
3062 	 */
3063 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
3064 		return false;
3065 
3066 	return true;
3067 }
3068 
3069 /**
3070  * __xe_bo_release_dummy() - Dummy kref release function
3071  * @kref: The embedded struct kref.
3072  *
3073  * Dummy release function for xe_bo_put_deferred(). Keep off.
3074  */
3075 void __xe_bo_release_dummy(struct kref *kref)
3076 {
3077 }
3078 
3079 /**
3080  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3081  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3082  *
3083  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3084  * The @deferred list can be either an onstack local list or a global
3085  * shared list used by a workqueue.
3086  */
3087 void xe_bo_put_commit(struct llist_head *deferred)
3088 {
3089 	struct llist_node *freed;
3090 	struct xe_bo *bo, *next;
3091 
3092 	if (!deferred)
3093 		return;
3094 
3095 	freed = llist_del_all(deferred);
3096 	if (!freed)
3097 		return;
3098 
3099 	llist_for_each_entry_safe(bo, next, freed, freed)
3100 		drm_gem_object_free(&bo->ttm.base.refcount);
3101 }
3102 
3103 static void xe_bo_dev_work_func(struct work_struct *work)
3104 {
3105 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3106 
3107 	xe_bo_put_commit(&bo_dev->async_list);
3108 }
3109 
3110 /**
3111  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3112  * @bo_dev: The BO dev structure
3113  */
3114 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3115 {
3116 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3117 }
3118 
3119 /**
3120  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3121  * @bo_dev: The BO dev structure
3122  */
3123 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3124 {
3125 	flush_work(&bo_dev->async_free);
3126 }
3127 
3128 void xe_bo_put(struct xe_bo *bo)
3129 {
3130 	struct xe_tile *tile;
3131 	u8 id;
3132 
3133 	might_sleep();
3134 	if (bo) {
3135 #ifdef CONFIG_PROC_FS
3136 		if (bo->client)
3137 			might_lock(&bo->client->bos_lock);
3138 #endif
3139 		for_each_tile(tile, xe_bo_device(bo), id)
3140 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
3141 				xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
3142 		drm_gem_object_put(&bo->ttm.base);
3143 	}
3144 }
3145 
3146 /**
3147  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3148  * @file_priv: ...
3149  * @dev: ...
3150  * @args: ...
3151  *
3152  * See dumb_create() hook in include/drm/drm_drv.h
3153  *
3154  * Return: ...
3155  */
3156 int xe_bo_dumb_create(struct drm_file *file_priv,
3157 		      struct drm_device *dev,
3158 		      struct drm_mode_create_dumb *args)
3159 {
3160 	struct xe_device *xe = to_xe_device(dev);
3161 	struct xe_bo *bo;
3162 	uint32_t handle;
3163 	int cpp = DIV_ROUND_UP(args->bpp, 8);
3164 	int err;
3165 	u32 page_size = max_t(u32, PAGE_SIZE,
3166 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3167 
3168 	args->pitch = ALIGN(args->width * cpp, 64);
3169 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
3170 			   page_size);
3171 
3172 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
3173 			       DRM_XE_GEM_CPU_CACHING_WC,
3174 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3175 			       XE_BO_FLAG_SCANOUT |
3176 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
3177 	if (IS_ERR(bo))
3178 		return PTR_ERR(bo);
3179 
3180 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3181 	/* drop reference from allocate - handle holds it now */
3182 	drm_gem_object_put(&bo->ttm.base);
3183 	if (!err)
3184 		args->handle = handle;
3185 	return err;
3186 }
3187 
3188 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3189 {
3190 	struct ttm_buffer_object *tbo = &bo->ttm;
3191 	struct ttm_device *bdev = tbo->bdev;
3192 
3193 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3194 
3195 	list_del_init(&bo->vram_userfault_link);
3196 }
3197 
3198 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3199 #include "tests/xe_bo.c"
3200 #endif
3201