xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 52e6b198833411564e0b9ce6e96bbd3d72f961e7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_dumb_buffers.h>
13 #include <drm/drm_gem_ttm_helper.h>
14 #include <drm/drm_managed.h>
15 #include <drm/ttm/ttm_backup.h>
16 #include <drm/ttm/ttm_device.h>
17 #include <drm/ttm/ttm_placement.h>
18 #include <drm/ttm/ttm_tt.h>
19 #include <uapi/drm/xe_drm.h>
20 
21 #include <kunit/static_stub.h>
22 
23 #include <trace/events/gpu_mem.h>
24 
25 #include "xe_device.h"
26 #include "xe_dma_buf.h"
27 #include "xe_drm_client.h"
28 #include "xe_ggtt.h"
29 #include "xe_gt.h"
30 #include "xe_map.h"
31 #include "xe_migrate.h"
32 #include "xe_pm.h"
33 #include "xe_preempt_fence.h"
34 #include "xe_pxp.h"
35 #include "xe_res_cursor.h"
36 #include "xe_shrinker.h"
37 #include "xe_sriov_vf_ccs.h"
38 #include "xe_trace_bo.h"
39 #include "xe_ttm_stolen_mgr.h"
40 #include "xe_vm.h"
41 #include "xe_vram_types.h"
42 
43 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
44 	[XE_PL_SYSTEM] = "system",
45 	[XE_PL_TT] = "gtt",
46 	[XE_PL_VRAM0] = "vram0",
47 	[XE_PL_VRAM1] = "vram1",
48 	[XE_PL_STOLEN] = "stolen"
49 };
50 
51 static const struct ttm_place sys_placement_flags = {
52 	.fpfn = 0,
53 	.lpfn = 0,
54 	.mem_type = XE_PL_SYSTEM,
55 	.flags = 0,
56 };
57 
58 static struct ttm_placement sys_placement = {
59 	.num_placement = 1,
60 	.placement = &sys_placement_flags,
61 };
62 
63 static struct ttm_placement purge_placement;
64 
65 static const struct ttm_place tt_placement_flags[] = {
66 	{
67 		.fpfn = 0,
68 		.lpfn = 0,
69 		.mem_type = XE_PL_TT,
70 		.flags = TTM_PL_FLAG_DESIRED,
71 	},
72 	{
73 		.fpfn = 0,
74 		.lpfn = 0,
75 		.mem_type = XE_PL_SYSTEM,
76 		.flags = TTM_PL_FLAG_FALLBACK,
77 	}
78 };
79 
80 static struct ttm_placement tt_placement = {
81 	.num_placement = 2,
82 	.placement = tt_placement_flags,
83 };
84 
85 bool mem_type_is_vram(u32 mem_type)
86 {
87 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
88 }
89 
90 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
91 {
92 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
93 }
94 
95 static bool resource_is_vram(struct ttm_resource *res)
96 {
97 	return mem_type_is_vram(res->mem_type);
98 }
99 
100 bool xe_bo_is_vram(struct xe_bo *bo)
101 {
102 	return resource_is_vram(bo->ttm.resource) ||
103 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
104 }
105 
106 bool xe_bo_is_stolen(struct xe_bo *bo)
107 {
108 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
109 }
110 
111 /**
112  * xe_bo_has_single_placement - check if BO is placed only in one memory location
113  * @bo: The BO
114  *
115  * This function checks whether a given BO is placed in only one memory location.
116  *
117  * Returns: true if the BO is placed in a single memory location, false otherwise.
118  *
119  */
120 bool xe_bo_has_single_placement(struct xe_bo *bo)
121 {
122 	return bo->placement.num_placement == 1;
123 }
124 
125 /**
126  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
127  * @bo: The BO
128  *
129  * The stolen memory is accessed through the PCI BAR for both DGFX and some
130  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
131  *
132  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
133  */
134 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
135 {
136 	return xe_bo_is_stolen(bo) &&
137 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
138 }
139 
140 /**
141  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
142  * @bo: The BO
143  *
144  * Check if a given bo is bound through VM_BIND. This requires the
145  * reservation lock for the BO to be held.
146  *
147  * Returns: boolean
148  */
149 bool xe_bo_is_vm_bound(struct xe_bo *bo)
150 {
151 	xe_bo_assert_held(bo);
152 
153 	return !list_empty(&bo->ttm.base.gpuva.list);
154 }
155 
156 static bool xe_bo_is_user(struct xe_bo *bo)
157 {
158 	return bo->flags & XE_BO_FLAG_USER;
159 }
160 
161 static struct xe_migrate *
162 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
163 {
164 	struct xe_tile *tile;
165 
166 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
167 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
168 	return tile->migrate;
169 }
170 
171 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
172 {
173 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
174 	struct ttm_resource_manager *mgr;
175 	struct xe_ttm_vram_mgr *vram_mgr;
176 
177 	xe_assert(xe, resource_is_vram(res));
178 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
179 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
180 
181 	return container_of(vram_mgr, struct xe_vram_region, ttm);
182 }
183 
184 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
185 			   u32 bo_flags, u32 *c)
186 {
187 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
188 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
189 
190 		bo->placements[*c] = (struct ttm_place) {
191 			.mem_type = XE_PL_TT,
192 			.flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
193 			TTM_PL_FLAG_FALLBACK : 0,
194 		};
195 		*c += 1;
196 	}
197 }
198 
199 static bool force_contiguous(u32 bo_flags)
200 {
201 	if (bo_flags & XE_BO_FLAG_STOLEN)
202 		return true; /* users expect this */
203 	else if (bo_flags & XE_BO_FLAG_PINNED &&
204 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
205 		return true; /* needs vmap */
206 	else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
207 		return true;
208 
209 	/*
210 	 * For eviction / restore on suspend / resume objects pinned in VRAM
211 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
212 	 */
213 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
214 	       bo_flags & XE_BO_FLAG_PINNED;
215 }
216 
217 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
218 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
219 {
220 	struct ttm_place place = { .mem_type = mem_type };
221 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
222 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
223 
224 	struct xe_vram_region *vram;
225 	u64 io_size;
226 
227 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
228 
229 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
230 	xe_assert(xe, vram && vram->usable_size);
231 	io_size = vram->io_size;
232 
233 	if (force_contiguous(bo_flags))
234 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
235 
236 	if (io_size < vram->usable_size) {
237 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
238 			place.fpfn = 0;
239 			place.lpfn = io_size >> PAGE_SHIFT;
240 		} else {
241 			place.flags |= TTM_PL_FLAG_TOPDOWN;
242 		}
243 	}
244 	places[*c] = place;
245 	*c += 1;
246 }
247 
248 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
249 			 u32 bo_flags, u32 *c)
250 {
251 	if (bo_flags & XE_BO_FLAG_VRAM0)
252 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
253 	if (bo_flags & XE_BO_FLAG_VRAM1)
254 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
255 }
256 
257 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
258 			   u32 bo_flags, u32 *c)
259 {
260 	if (bo_flags & XE_BO_FLAG_STOLEN) {
261 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
262 
263 		bo->placements[*c] = (struct ttm_place) {
264 			.mem_type = XE_PL_STOLEN,
265 			.flags = force_contiguous(bo_flags) ?
266 				TTM_PL_FLAG_CONTIGUOUS : 0,
267 		};
268 		*c += 1;
269 	}
270 }
271 
272 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
273 				       u32 bo_flags)
274 {
275 	u32 c = 0;
276 
277 	try_add_vram(xe, bo, bo_flags, &c);
278 	try_add_system(xe, bo, bo_flags, &c);
279 	try_add_stolen(xe, bo, bo_flags, &c);
280 
281 	if (!c)
282 		return -EINVAL;
283 
284 	bo->placement = (struct ttm_placement) {
285 		.num_placement = c,
286 		.placement = bo->placements,
287 	};
288 
289 	return 0;
290 }
291 
292 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
293 			      u32 bo_flags)
294 {
295 	xe_bo_assert_held(bo);
296 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
297 }
298 
299 static void xe_evict_flags(struct ttm_buffer_object *tbo,
300 			   struct ttm_placement *placement)
301 {
302 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
303 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
304 	struct xe_bo *bo;
305 
306 	if (!xe_bo_is_xe_bo(tbo)) {
307 		/* Don't handle scatter gather BOs */
308 		if (tbo->type == ttm_bo_type_sg) {
309 			placement->num_placement = 0;
310 			return;
311 		}
312 
313 		*placement = device_unplugged ? purge_placement : sys_placement;
314 		return;
315 	}
316 
317 	bo = ttm_to_xe_bo(tbo);
318 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
319 		*placement = sys_placement;
320 		return;
321 	}
322 
323 	if (device_unplugged && !tbo->base.dma_buf) {
324 		*placement = purge_placement;
325 		return;
326 	}
327 
328 	/*
329 	 * For xe, sg bos that are evicted to system just triggers a
330 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
331 	 */
332 	switch (tbo->resource->mem_type) {
333 	case XE_PL_VRAM0:
334 	case XE_PL_VRAM1:
335 	case XE_PL_STOLEN:
336 		*placement = tt_placement;
337 		break;
338 	case XE_PL_TT:
339 	default:
340 		*placement = sys_placement;
341 		break;
342 	}
343 }
344 
345 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
346 struct xe_ttm_tt {
347 	struct ttm_tt ttm;
348 	struct sg_table sgt;
349 	struct sg_table *sg;
350 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
351 	bool purgeable;
352 };
353 
354 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
355 {
356 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
357 	unsigned long num_pages = tt->num_pages;
358 	int ret;
359 
360 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
361 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
362 
363 	if (xe_tt->sg)
364 		return 0;
365 
366 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
367 						num_pages, 0,
368 						(u64)num_pages << PAGE_SHIFT,
369 						xe_sg_segment_size(xe->drm.dev),
370 						GFP_KERNEL);
371 	if (ret)
372 		return ret;
373 
374 	xe_tt->sg = &xe_tt->sgt;
375 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
376 			      DMA_ATTR_SKIP_CPU_SYNC);
377 	if (ret) {
378 		sg_free_table(xe_tt->sg);
379 		xe_tt->sg = NULL;
380 		return ret;
381 	}
382 
383 	return 0;
384 }
385 
386 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
387 {
388 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
389 
390 	if (xe_tt->sg) {
391 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
392 				  DMA_BIDIRECTIONAL, 0);
393 		sg_free_table(xe_tt->sg);
394 		xe_tt->sg = NULL;
395 	}
396 }
397 
398 struct sg_table *xe_bo_sg(struct xe_bo *bo)
399 {
400 	struct ttm_tt *tt = bo->ttm.ttm;
401 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
402 
403 	return xe_tt->sg;
404 }
405 
406 /*
407  * Account ttm pages against the device shrinker's shrinkable and
408  * purgeable counts.
409  */
410 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
411 {
412 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
413 
414 	if (xe_tt->purgeable)
415 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
416 	else
417 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
418 }
419 
420 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
421 {
422 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
423 
424 	if (xe_tt->purgeable)
425 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
426 	else
427 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
428 }
429 
430 static void update_global_total_pages(struct ttm_device *ttm_dev,
431 				      long num_pages)
432 {
433 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
434 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
435 	u64 global_total_pages =
436 		atomic64_add_return(num_pages, &xe->global_total_pages);
437 
438 	trace_gpu_mem_total(xe->drm.primary->index, 0,
439 			    global_total_pages << PAGE_SHIFT);
440 #endif
441 }
442 
443 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
444 				       u32 page_flags)
445 {
446 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
447 	struct xe_device *xe = xe_bo_device(bo);
448 	struct xe_ttm_tt *xe_tt;
449 	struct ttm_tt *tt;
450 	unsigned long extra_pages;
451 	enum ttm_caching caching = ttm_cached;
452 	int err;
453 
454 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
455 	if (!xe_tt)
456 		return NULL;
457 
458 	tt = &xe_tt->ttm;
459 
460 	extra_pages = 0;
461 	if (xe_bo_needs_ccs_pages(bo))
462 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
463 					   PAGE_SIZE);
464 
465 	/*
466 	 * DGFX system memory is always WB / ttm_cached, since
467 	 * other caching modes are only supported on x86. DGFX
468 	 * GPU system memory accesses are always coherent with the
469 	 * CPU.
470 	 */
471 	if (!IS_DGFX(xe)) {
472 		switch (bo->cpu_caching) {
473 		case DRM_XE_GEM_CPU_CACHING_WC:
474 			caching = ttm_write_combined;
475 			break;
476 		default:
477 			caching = ttm_cached;
478 			break;
479 		}
480 
481 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
482 
483 		/*
484 		 * Display scanout is always non-coherent with the CPU cache.
485 		 *
486 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
487 		 * non-coherent and require a CPU:WC mapping.
488 		 */
489 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
490 		    (xe->info.graphics_verx100 >= 1270 &&
491 		     bo->flags & XE_BO_FLAG_PAGETABLE))
492 			caching = ttm_write_combined;
493 	}
494 
495 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
496 		/*
497 		 * Valid only for internally-created buffers only, for
498 		 * which cpu_caching is never initialized.
499 		 */
500 		xe_assert(xe, bo->cpu_caching == 0);
501 		caching = ttm_uncached;
502 	}
503 
504 	if (ttm_bo->type != ttm_bo_type_sg)
505 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
506 
507 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
508 	if (err) {
509 		kfree(xe_tt);
510 		return NULL;
511 	}
512 
513 	if (ttm_bo->type != ttm_bo_type_sg) {
514 		err = ttm_tt_setup_backup(tt);
515 		if (err) {
516 			ttm_tt_fini(tt);
517 			kfree(xe_tt);
518 			return NULL;
519 		}
520 	}
521 
522 	return tt;
523 }
524 
525 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
526 			      struct ttm_operation_ctx *ctx)
527 {
528 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
529 	int err;
530 
531 	/*
532 	 * dma-bufs are not populated with pages, and the dma-
533 	 * addresses are set up when moved to XE_PL_TT.
534 	 */
535 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
536 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
537 		return 0;
538 
539 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
540 		err = ttm_tt_restore(ttm_dev, tt, ctx);
541 	} else {
542 		ttm_tt_clear_backed_up(tt);
543 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
544 	}
545 	if (err)
546 		return err;
547 
548 	xe_tt->purgeable = false;
549 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
550 	update_global_total_pages(ttm_dev, tt->num_pages);
551 
552 	return 0;
553 }
554 
555 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
556 {
557 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
558 
559 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
560 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
561 		return;
562 
563 	xe_tt_unmap_sg(xe, tt);
564 
565 	ttm_pool_free(&ttm_dev->pool, tt);
566 	xe_ttm_tt_account_subtract(xe, tt);
567 	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
568 }
569 
570 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
571 {
572 	ttm_tt_fini(tt);
573 	kfree(tt);
574 }
575 
576 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
577 {
578 	struct xe_ttm_vram_mgr_resource *vres =
579 		to_xe_ttm_vram_mgr_resource(mem);
580 
581 	return vres->used_visible_size == mem->size;
582 }
583 
584 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
585 				 struct ttm_resource *mem)
586 {
587 	struct xe_device *xe = ttm_to_xe_device(bdev);
588 
589 	switch (mem->mem_type) {
590 	case XE_PL_SYSTEM:
591 	case XE_PL_TT:
592 		return 0;
593 	case XE_PL_VRAM0:
594 	case XE_PL_VRAM1: {
595 		struct xe_vram_region *vram = res_to_mem_region(mem);
596 
597 		if (!xe_ttm_resource_visible(mem))
598 			return -EINVAL;
599 
600 		mem->bus.offset = mem->start << PAGE_SHIFT;
601 
602 		if (vram->mapping &&
603 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
604 			mem->bus.addr = (u8 __force *)vram->mapping +
605 				mem->bus.offset;
606 
607 		mem->bus.offset += vram->io_start;
608 		mem->bus.is_iomem = true;
609 
610 #if  !IS_ENABLED(CONFIG_X86)
611 		mem->bus.caching = ttm_write_combined;
612 #endif
613 		return 0;
614 	} case XE_PL_STOLEN:
615 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
616 	default:
617 		return -EINVAL;
618 	}
619 }
620 
621 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
622 				const struct ttm_operation_ctx *ctx)
623 {
624 	struct dma_resv_iter cursor;
625 	struct dma_fence *fence;
626 	struct drm_gem_object *obj = &bo->ttm.base;
627 	struct drm_gpuvm_bo *vm_bo;
628 	bool idle = false;
629 	int ret = 0;
630 
631 	dma_resv_assert_held(bo->ttm.base.resv);
632 
633 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
634 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
635 				    DMA_RESV_USAGE_BOOKKEEP);
636 		dma_resv_for_each_fence_unlocked(&cursor, fence)
637 			dma_fence_enable_sw_signaling(fence);
638 		dma_resv_iter_end(&cursor);
639 	}
640 
641 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
642 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
643 		struct drm_gpuva *gpuva;
644 
645 		if (!xe_vm_in_fault_mode(vm)) {
646 			drm_gpuvm_bo_evict(vm_bo, true);
647 			continue;
648 		}
649 
650 		if (!idle) {
651 			long timeout;
652 
653 			if (ctx->no_wait_gpu &&
654 			    !dma_resv_test_signaled(bo->ttm.base.resv,
655 						    DMA_RESV_USAGE_BOOKKEEP))
656 				return -EBUSY;
657 
658 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
659 							DMA_RESV_USAGE_BOOKKEEP,
660 							ctx->interruptible,
661 							MAX_SCHEDULE_TIMEOUT);
662 			if (!timeout)
663 				return -ETIME;
664 			if (timeout < 0)
665 				return timeout;
666 
667 			idle = true;
668 		}
669 
670 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
671 			struct xe_vma *vma = gpuva_to_vma(gpuva);
672 
673 			trace_xe_vma_evict(vma);
674 			ret = xe_vm_invalidate_vma(vma);
675 			if (XE_WARN_ON(ret))
676 				return ret;
677 		}
678 	}
679 
680 	return ret;
681 }
682 
683 /*
684  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
685  * Note that unmapping the attachment is deferred to the next
686  * map_attachment time, or to bo destroy (after idling) whichever comes first.
687  * This is to avoid syncing before unmap_attachment(), assuming that the
688  * caller relies on idling the reservation object before moving the
689  * backing store out. Should that assumption not hold, then we will be able
690  * to unconditionally call unmap_attachment() when moving out to system.
691  */
692 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
693 			     struct ttm_resource *new_res)
694 {
695 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
696 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
697 					       ttm);
698 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
699 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
700 	struct sg_table *sg;
701 
702 	xe_assert(xe, attach);
703 	xe_assert(xe, ttm_bo->ttm);
704 
705 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
706 	    ttm_bo->sg) {
707 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
708 				      false, MAX_SCHEDULE_TIMEOUT);
709 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
710 		ttm_bo->sg = NULL;
711 	}
712 
713 	if (new_res->mem_type == XE_PL_SYSTEM)
714 		goto out;
715 
716 	if (ttm_bo->sg) {
717 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
718 		ttm_bo->sg = NULL;
719 	}
720 
721 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
722 	if (IS_ERR(sg))
723 		return PTR_ERR(sg);
724 
725 	ttm_bo->sg = sg;
726 	xe_tt->sg = sg;
727 
728 out:
729 	ttm_bo_move_null(ttm_bo, new_res);
730 
731 	return 0;
732 }
733 
734 /**
735  * xe_bo_move_notify - Notify subsystems of a pending move
736  * @bo: The buffer object
737  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
738  *
739  * This function notifies subsystems of an upcoming buffer move.
740  * Upon receiving such a notification, subsystems should schedule
741  * halting access to the underlying pages and optionally add a fence
742  * to the buffer object's dma_resv object, that signals when access is
743  * stopped. The caller will wait on all dma_resv fences before
744  * starting the move.
745  *
746  * A subsystem may commence access to the object after obtaining
747  * bindings to the new backing memory under the object lock.
748  *
749  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
750  * negative error code on error.
751  */
752 static int xe_bo_move_notify(struct xe_bo *bo,
753 			     const struct ttm_operation_ctx *ctx)
754 {
755 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
756 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
757 	struct ttm_resource *old_mem = ttm_bo->resource;
758 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
759 	int ret;
760 
761 	/*
762 	 * If this starts to call into many components, consider
763 	 * using a notification chain here.
764 	 */
765 
766 	if (xe_bo_is_pinned(bo))
767 		return -EINVAL;
768 
769 	xe_bo_vunmap(bo);
770 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
771 	if (ret)
772 		return ret;
773 
774 	/* Don't call move_notify() for imported dma-bufs. */
775 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
776 		dma_buf_move_notify(ttm_bo->base.dma_buf);
777 
778 	/*
779 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
780 	 * so if we moved from VRAM make sure to unlink this from the userfault
781 	 * tracking.
782 	 */
783 	if (mem_type_is_vram(old_mem_type)) {
784 		mutex_lock(&xe->mem_access.vram_userfault.lock);
785 		if (!list_empty(&bo->vram_userfault_link))
786 			list_del_init(&bo->vram_userfault_link);
787 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
788 	}
789 
790 	return 0;
791 }
792 
793 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
794 		      struct ttm_operation_ctx *ctx,
795 		      struct ttm_resource *new_mem,
796 		      struct ttm_place *hop)
797 {
798 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
799 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
800 	struct ttm_resource *old_mem = ttm_bo->resource;
801 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
802 	struct ttm_tt *ttm = ttm_bo->ttm;
803 	struct xe_migrate *migrate = NULL;
804 	struct dma_fence *fence;
805 	bool move_lacks_source;
806 	bool tt_has_data;
807 	bool needs_clear;
808 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
809 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
810 	int ret = 0;
811 
812 	/* Bo creation path, moving to system or TT. */
813 	if ((!old_mem && ttm) && !handle_system_ccs) {
814 		if (new_mem->mem_type == XE_PL_TT)
815 			ret = xe_tt_map_sg(xe, ttm);
816 		if (!ret)
817 			ttm_bo_move_null(ttm_bo, new_mem);
818 		goto out;
819 	}
820 
821 	if (ttm_bo->type == ttm_bo_type_sg) {
822 		if (new_mem->mem_type == XE_PL_SYSTEM)
823 			ret = xe_bo_move_notify(bo, ctx);
824 		if (!ret)
825 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
826 		return ret;
827 	}
828 
829 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
830 
831 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
832 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
833 
834 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
835 		(!ttm && ttm_bo->type == ttm_bo_type_device);
836 
837 	if (new_mem->mem_type == XE_PL_TT) {
838 		ret = xe_tt_map_sg(xe, ttm);
839 		if (ret)
840 			goto out;
841 	}
842 
843 	if ((move_lacks_source && !needs_clear)) {
844 		ttm_bo_move_null(ttm_bo, new_mem);
845 		goto out;
846 	}
847 
848 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
849 	    new_mem->mem_type == XE_PL_SYSTEM) {
850 		ret = xe_svm_bo_evict(bo);
851 		if (!ret) {
852 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
853 			ttm_bo_move_null(ttm_bo, new_mem);
854 		} else {
855 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
856 				ERR_PTR(ret));
857 		}
858 
859 		goto out;
860 	}
861 
862 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
863 		ttm_bo_move_null(ttm_bo, new_mem);
864 		goto out;
865 	}
866 
867 	/*
868 	 * Failed multi-hop where the old_mem is still marked as
869 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
870 	 */
871 	if (old_mem_type == XE_PL_TT &&
872 	    new_mem->mem_type == XE_PL_TT) {
873 		ttm_bo_move_null(ttm_bo, new_mem);
874 		goto out;
875 	}
876 
877 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
878 		ret = xe_bo_move_notify(bo, ctx);
879 		if (ret)
880 			goto out;
881 	}
882 
883 	if (old_mem_type == XE_PL_TT &&
884 	    new_mem->mem_type == XE_PL_SYSTEM) {
885 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
886 						     DMA_RESV_USAGE_BOOKKEEP,
887 						     false,
888 						     MAX_SCHEDULE_TIMEOUT);
889 		if (timeout < 0) {
890 			ret = timeout;
891 			goto out;
892 		}
893 
894 		if (!handle_system_ccs) {
895 			ttm_bo_move_null(ttm_bo, new_mem);
896 			goto out;
897 		}
898 	}
899 
900 	if (!move_lacks_source &&
901 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
902 	     (mem_type_is_vram(old_mem_type) &&
903 	      new_mem->mem_type == XE_PL_SYSTEM))) {
904 		hop->fpfn = 0;
905 		hop->lpfn = 0;
906 		hop->mem_type = XE_PL_TT;
907 		hop->flags = TTM_PL_FLAG_TEMPORARY;
908 		ret = -EMULTIHOP;
909 		goto out;
910 	}
911 
912 	if (bo->tile)
913 		migrate = bo->tile->migrate;
914 	else if (resource_is_vram(new_mem))
915 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
916 	else if (mem_type_is_vram(old_mem_type))
917 		migrate = mem_type_to_migrate(xe, old_mem_type);
918 	else
919 		migrate = xe->tiles[0].migrate;
920 
921 	xe_assert(xe, migrate);
922 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
923 	if (xe_rpm_reclaim_safe(xe)) {
924 		/*
925 		 * We might be called through swapout in the validation path of
926 		 * another TTM device, so acquire rpm here.
927 		 */
928 		xe_pm_runtime_get(xe);
929 	} else {
930 		drm_WARN_ON(&xe->drm, handle_system_ccs);
931 		xe_pm_runtime_get_noresume(xe);
932 	}
933 
934 	if (move_lacks_source) {
935 		u32 flags = 0;
936 
937 		if (mem_type_is_vram(new_mem->mem_type))
938 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
939 		else if (handle_system_ccs)
940 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
941 
942 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
943 	} else {
944 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
945 					handle_system_ccs);
946 	}
947 	if (IS_ERR(fence)) {
948 		ret = PTR_ERR(fence);
949 		xe_pm_runtime_put(xe);
950 		goto out;
951 	}
952 	if (!move_lacks_source) {
953 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
954 						new_mem);
955 		if (ret) {
956 			dma_fence_wait(fence, false);
957 			ttm_bo_move_null(ttm_bo, new_mem);
958 			ret = 0;
959 		}
960 	} else {
961 		/*
962 		 * ttm_bo_move_accel_cleanup() may blow up if
963 		 * bo->resource == NULL, so just attach the
964 		 * fence and set the new resource.
965 		 */
966 		dma_resv_add_fence(ttm_bo->base.resv, fence,
967 				   DMA_RESV_USAGE_KERNEL);
968 		ttm_bo_move_null(ttm_bo, new_mem);
969 	}
970 
971 	dma_fence_put(fence);
972 	xe_pm_runtime_put(xe);
973 
974 	/*
975 	 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
976 	 * BBs from BO as it is no longer needed.
977 	 */
978 	if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT &&
979 	    new_mem->mem_type == XE_PL_SYSTEM)
980 		xe_sriov_vf_ccs_detach_bo(bo);
981 
982 	if (IS_SRIOV_VF(xe) &&
983 	    ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
984 	     (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
985 	    handle_system_ccs)
986 		ret = xe_sriov_vf_ccs_attach_bo(bo);
987 
988 out:
989 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
990 	    ttm_bo->ttm) {
991 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
992 						     DMA_RESV_USAGE_KERNEL,
993 						     false,
994 						     MAX_SCHEDULE_TIMEOUT);
995 		if (timeout < 0)
996 			ret = timeout;
997 
998 		if (IS_VF_CCS_BB_VALID(xe, bo))
999 			xe_sriov_vf_ccs_detach_bo(bo);
1000 
1001 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
1002 	}
1003 
1004 	return ret;
1005 }
1006 
1007 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1008 			       struct ttm_buffer_object *bo,
1009 			       unsigned long *scanned)
1010 {
1011 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1012 	long lret;
1013 
1014 	/* Fake move to system, without copying data. */
1015 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
1016 		struct ttm_resource *new_resource;
1017 
1018 		lret = ttm_bo_wait_ctx(bo, ctx);
1019 		if (lret)
1020 			return lret;
1021 
1022 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1023 		if (lret)
1024 			return lret;
1025 
1026 		xe_tt_unmap_sg(xe, bo->ttm);
1027 		ttm_bo_move_null(bo, new_resource);
1028 	}
1029 
1030 	*scanned += bo->ttm->num_pages;
1031 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1032 			     {.purge = true,
1033 			      .writeback = false,
1034 			      .allow_move = false});
1035 
1036 	if (lret > 0)
1037 		xe_ttm_tt_account_subtract(xe, bo->ttm);
1038 
1039 	return lret;
1040 }
1041 
1042 static bool
1043 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1044 {
1045 	struct drm_gpuvm_bo *vm_bo;
1046 
1047 	if (!ttm_bo_eviction_valuable(bo, place))
1048 		return false;
1049 
1050 	if (!xe_bo_is_xe_bo(bo))
1051 		return true;
1052 
1053 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1054 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1055 			return false;
1056 	}
1057 
1058 	return true;
1059 }
1060 
1061 /**
1062  * xe_bo_shrink() - Try to shrink an xe bo.
1063  * @ctx: The struct ttm_operation_ctx used for shrinking.
1064  * @bo: The TTM buffer object whose pages to shrink.
1065  * @flags: Flags governing the shrink behaviour.
1066  * @scanned: Pointer to a counter of the number of pages
1067  * attempted to shrink.
1068  *
1069  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1070  * Note that we need to be able to handle also non xe bos
1071  * (ghost bos), but only if the struct ttm_tt is embedded in
1072  * a struct xe_ttm_tt. When the function attempts to shrink
1073  * the pages of a buffer object, The value pointed to by @scanned
1074  * is updated.
1075  *
1076  * Return: The number of pages shrunken or purged, or negative error
1077  * code on failure.
1078  */
1079 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1080 		  const struct xe_bo_shrink_flags flags,
1081 		  unsigned long *scanned)
1082 {
1083 	struct ttm_tt *tt = bo->ttm;
1084 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1085 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1086 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1087 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1088 	bool needs_rpm;
1089 	long lret = 0L;
1090 
1091 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1092 	    (flags.purge && !xe_tt->purgeable))
1093 		return -EBUSY;
1094 
1095 	if (!xe_bo_eviction_valuable(bo, &place))
1096 		return -EBUSY;
1097 
1098 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1099 		return xe_bo_shrink_purge(ctx, bo, scanned);
1100 
1101 	if (xe_tt->purgeable) {
1102 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1103 			lret = xe_bo_move_notify(xe_bo, ctx);
1104 		if (!lret)
1105 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1106 		goto out_unref;
1107 	}
1108 
1109 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1110 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1111 		     xe_bo_needs_ccs_pages(xe_bo));
1112 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1113 		goto out_unref;
1114 
1115 	*scanned += tt->num_pages;
1116 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1117 			     {.purge = false,
1118 			      .writeback = flags.writeback,
1119 			      .allow_move = true});
1120 	if (needs_rpm)
1121 		xe_pm_runtime_put(xe);
1122 
1123 	if (lret > 0)
1124 		xe_ttm_tt_account_subtract(xe, tt);
1125 
1126 out_unref:
1127 	xe_bo_put(xe_bo);
1128 
1129 	return lret;
1130 }
1131 
1132 /**
1133  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1134  * up in system memory.
1135  * @bo: The buffer object to prepare.
1136  *
1137  * On successful completion, the object backup pages are allocated. Expectation
1138  * is that this is called from the PM notifier, prior to suspend/hibernation.
1139  *
1140  * Return: 0 on success. Negative error code on failure.
1141  */
1142 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1143 {
1144 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1145 	struct xe_bo *backup;
1146 	int ret = 0;
1147 
1148 	xe_bo_lock(bo, false);
1149 
1150 	xe_assert(xe, !bo->backup_obj);
1151 
1152 	/*
1153 	 * Since this is called from the PM notifier we might have raced with
1154 	 * someone unpinning this after we dropped the pinned list lock and
1155 	 * grabbing the above bo lock.
1156 	 */
1157 	if (!xe_bo_is_pinned(bo))
1158 		goto out_unlock_bo;
1159 
1160 	if (!xe_bo_is_vram(bo))
1161 		goto out_unlock_bo;
1162 
1163 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1164 		goto out_unlock_bo;
1165 
1166 	backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1167 					DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1168 					XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1169 					XE_BO_FLAG_PINNED);
1170 	if (IS_ERR(backup)) {
1171 		ret = PTR_ERR(backup);
1172 		goto out_unlock_bo;
1173 	}
1174 
1175 	backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1176 	ttm_bo_pin(&backup->ttm);
1177 	bo->backup_obj = backup;
1178 
1179 out_unlock_bo:
1180 	xe_bo_unlock(bo);
1181 	return ret;
1182 }
1183 
1184 /**
1185  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1186  * @bo: The buffer object to undo the prepare for.
1187  *
1188  * Always returns 0. The backup object is removed, if still present. Expectation
1189  * it that this called from the PM notifier when undoing the prepare step.
1190  *
1191  * Return: Always returns 0.
1192  */
1193 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1194 {
1195 	xe_bo_lock(bo, false);
1196 	if (bo->backup_obj) {
1197 		ttm_bo_unpin(&bo->backup_obj->ttm);
1198 		xe_bo_put(bo->backup_obj);
1199 		bo->backup_obj = NULL;
1200 	}
1201 	xe_bo_unlock(bo);
1202 
1203 	return 0;
1204 }
1205 
1206 /**
1207  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1208  * @bo: The buffer object to move.
1209  *
1210  * On successful completion, the object memory will be moved to system memory.
1211  *
1212  * This is needed to for special handling of pinned VRAM object during
1213  * suspend-resume.
1214  *
1215  * Return: 0 on success. Negative error code on failure.
1216  */
1217 int xe_bo_evict_pinned(struct xe_bo *bo)
1218 {
1219 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1220 	struct xe_bo *backup = bo->backup_obj;
1221 	bool backup_created = false;
1222 	bool unmap = false;
1223 	int ret = 0;
1224 
1225 	xe_bo_lock(bo, false);
1226 
1227 	if (WARN_ON(!bo->ttm.resource)) {
1228 		ret = -EINVAL;
1229 		goto out_unlock_bo;
1230 	}
1231 
1232 	if (WARN_ON(!xe_bo_is_pinned(bo))) {
1233 		ret = -EINVAL;
1234 		goto out_unlock_bo;
1235 	}
1236 
1237 	if (!xe_bo_is_vram(bo))
1238 		goto out_unlock_bo;
1239 
1240 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1241 		goto out_unlock_bo;
1242 
1243 	if (!backup) {
1244 		backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv,
1245 						NULL, xe_bo_size(bo),
1246 						DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1247 						XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1248 						XE_BO_FLAG_PINNED);
1249 		if (IS_ERR(backup)) {
1250 			ret = PTR_ERR(backup);
1251 			goto out_unlock_bo;
1252 		}
1253 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1254 		backup_created = true;
1255 	}
1256 
1257 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1258 		struct xe_migrate *migrate;
1259 		struct dma_fence *fence;
1260 
1261 		if (bo->tile)
1262 			migrate = bo->tile->migrate;
1263 		else
1264 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1265 
1266 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1267 		if (ret)
1268 			goto out_backup;
1269 
1270 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1271 		if (ret)
1272 			goto out_backup;
1273 
1274 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1275 					backup->ttm.resource, false);
1276 		if (IS_ERR(fence)) {
1277 			ret = PTR_ERR(fence);
1278 			goto out_backup;
1279 		}
1280 
1281 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1282 				   DMA_RESV_USAGE_KERNEL);
1283 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1284 				   DMA_RESV_USAGE_KERNEL);
1285 		dma_fence_put(fence);
1286 	} else {
1287 		ret = xe_bo_vmap(backup);
1288 		if (ret)
1289 			goto out_backup;
1290 
1291 		if (iosys_map_is_null(&bo->vmap)) {
1292 			ret = xe_bo_vmap(bo);
1293 			if (ret)
1294 				goto out_backup;
1295 			unmap = true;
1296 		}
1297 
1298 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1299 				   xe_bo_size(bo));
1300 	}
1301 
1302 	if (!bo->backup_obj)
1303 		bo->backup_obj = backup;
1304 
1305 out_backup:
1306 	xe_bo_vunmap(backup);
1307 	if (ret && backup_created)
1308 		xe_bo_put(backup);
1309 out_unlock_bo:
1310 	if (unmap)
1311 		xe_bo_vunmap(bo);
1312 	xe_bo_unlock(bo);
1313 	return ret;
1314 }
1315 
1316 /**
1317  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1318  * @bo: The buffer object to move.
1319  *
1320  * On successful completion, the object memory will be moved back to VRAM.
1321  *
1322  * This is needed to for special handling of pinned VRAM object during
1323  * suspend-resume.
1324  *
1325  * Return: 0 on success. Negative error code on failure.
1326  */
1327 int xe_bo_restore_pinned(struct xe_bo *bo)
1328 {
1329 	struct ttm_operation_ctx ctx = {
1330 		.interruptible = false,
1331 		.gfp_retry_mayfail = false,
1332 	};
1333 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1334 	struct xe_bo *backup = bo->backup_obj;
1335 	bool unmap = false;
1336 	int ret;
1337 
1338 	if (!backup)
1339 		return 0;
1340 
1341 	xe_bo_lock(bo, false);
1342 
1343 	if (!xe_bo_is_pinned(backup)) {
1344 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1345 		if (ret)
1346 			goto out_unlock_bo;
1347 	}
1348 
1349 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1350 		struct xe_migrate *migrate;
1351 		struct dma_fence *fence;
1352 
1353 		if (bo->tile)
1354 			migrate = bo->tile->migrate;
1355 		else
1356 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1357 
1358 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1359 		if (ret)
1360 			goto out_unlock_bo;
1361 
1362 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1363 		if (ret)
1364 			goto out_unlock_bo;
1365 
1366 		fence = xe_migrate_copy(migrate, backup, bo,
1367 					backup->ttm.resource, bo->ttm.resource,
1368 					false);
1369 		if (IS_ERR(fence)) {
1370 			ret = PTR_ERR(fence);
1371 			goto out_unlock_bo;
1372 		}
1373 
1374 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1375 				   DMA_RESV_USAGE_KERNEL);
1376 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1377 				   DMA_RESV_USAGE_KERNEL);
1378 		dma_fence_put(fence);
1379 	} else {
1380 		ret = xe_bo_vmap(backup);
1381 		if (ret)
1382 			goto out_unlock_bo;
1383 
1384 		if (iosys_map_is_null(&bo->vmap)) {
1385 			ret = xe_bo_vmap(bo);
1386 			if (ret)
1387 				goto out_backup;
1388 			unmap = true;
1389 		}
1390 
1391 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1392 				 xe_bo_size(bo));
1393 	}
1394 
1395 	bo->backup_obj = NULL;
1396 
1397 out_backup:
1398 	xe_bo_vunmap(backup);
1399 	if (!bo->backup_obj) {
1400 		if (xe_bo_is_pinned(backup))
1401 			ttm_bo_unpin(&backup->ttm);
1402 		xe_bo_put(backup);
1403 	}
1404 out_unlock_bo:
1405 	if (unmap)
1406 		xe_bo_vunmap(bo);
1407 	xe_bo_unlock(bo);
1408 	return ret;
1409 }
1410 
1411 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1412 {
1413 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1414 	struct ttm_tt *tt = ttm_bo->ttm;
1415 
1416 	if (tt) {
1417 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1418 
1419 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1420 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1421 						 ttm_bo->sg,
1422 						 DMA_BIDIRECTIONAL);
1423 			ttm_bo->sg = NULL;
1424 			xe_tt->sg = NULL;
1425 		} else if (xe_tt->sg) {
1426 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1427 					  xe_tt->sg,
1428 					  DMA_BIDIRECTIONAL, 0);
1429 			sg_free_table(xe_tt->sg);
1430 			xe_tt->sg = NULL;
1431 		}
1432 	}
1433 
1434 	return 0;
1435 }
1436 
1437 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1438 				       unsigned long page_offset)
1439 {
1440 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1441 	struct xe_res_cursor cursor;
1442 	struct xe_vram_region *vram;
1443 
1444 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1445 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1446 
1447 	vram = res_to_mem_region(ttm_bo->resource);
1448 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1449 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1450 }
1451 
1452 static void __xe_bo_vunmap(struct xe_bo *bo);
1453 
1454 /*
1455  * TODO: Move this function to TTM so we don't rely on how TTM does its
1456  * locking, thereby abusing TTM internals.
1457  */
1458 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1459 {
1460 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1461 	bool locked;
1462 
1463 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1464 
1465 	/*
1466 	 * We can typically only race with TTM trylocking under the
1467 	 * lru_lock, which will immediately be unlocked again since
1468 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1469 	 * always succeed here, as long as we hold the lru lock.
1470 	 */
1471 	spin_lock(&ttm_bo->bdev->lru_lock);
1472 	locked = dma_resv_trylock(ttm_bo->base.resv);
1473 	spin_unlock(&ttm_bo->bdev->lru_lock);
1474 	xe_assert(xe, locked);
1475 
1476 	return locked;
1477 }
1478 
1479 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1480 {
1481 	struct dma_resv_iter cursor;
1482 	struct dma_fence *fence;
1483 	struct dma_fence *replacement = NULL;
1484 	struct xe_bo *bo;
1485 
1486 	if (!xe_bo_is_xe_bo(ttm_bo))
1487 		return;
1488 
1489 	bo = ttm_to_xe_bo(ttm_bo);
1490 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1491 
1492 	/*
1493 	 * Corner case where TTM fails to allocate memory and this BOs resv
1494 	 * still points the VMs resv
1495 	 */
1496 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1497 		return;
1498 
1499 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1500 		return;
1501 
1502 	/*
1503 	 * Scrub the preempt fences if any. The unbind fence is already
1504 	 * attached to the resv.
1505 	 * TODO: Don't do this for external bos once we scrub them after
1506 	 * unbind.
1507 	 */
1508 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1509 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1510 		if (xe_fence_is_xe_preempt(fence) &&
1511 		    !dma_fence_is_signaled(fence)) {
1512 			if (!replacement)
1513 				replacement = dma_fence_get_stub();
1514 
1515 			dma_resv_replace_fences(ttm_bo->base.resv,
1516 						fence->context,
1517 						replacement,
1518 						DMA_RESV_USAGE_BOOKKEEP);
1519 		}
1520 	}
1521 	dma_fence_put(replacement);
1522 
1523 	dma_resv_unlock(ttm_bo->base.resv);
1524 }
1525 
1526 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1527 {
1528 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1529 
1530 	if (!xe_bo_is_xe_bo(ttm_bo))
1531 		return;
1532 
1533 	if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo))
1534 		xe_sriov_vf_ccs_detach_bo(bo);
1535 
1536 	/*
1537 	 * Object is idle and about to be destroyed. Release the
1538 	 * dma-buf attachment.
1539 	 */
1540 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1541 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1542 						       struct xe_ttm_tt, ttm);
1543 
1544 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1545 					 DMA_BIDIRECTIONAL);
1546 		ttm_bo->sg = NULL;
1547 		xe_tt->sg = NULL;
1548 	}
1549 }
1550 
1551 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1552 {
1553 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1554 
1555 	if (ttm_bo->ttm) {
1556 		struct ttm_placement place = {};
1557 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1558 
1559 		drm_WARN_ON(&xe->drm, ret);
1560 	}
1561 }
1562 
1563 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1564 {
1565 	struct ttm_operation_ctx ctx = {
1566 		.interruptible = false,
1567 		.gfp_retry_mayfail = false,
1568 	};
1569 
1570 	if (ttm_bo->ttm) {
1571 		struct xe_ttm_tt *xe_tt =
1572 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1573 
1574 		if (xe_tt->purgeable)
1575 			xe_ttm_bo_purge(ttm_bo, &ctx);
1576 	}
1577 }
1578 
1579 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1580 				unsigned long offset, void *buf, int len,
1581 				int write)
1582 {
1583 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1584 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1585 	struct iosys_map vmap;
1586 	struct xe_res_cursor cursor;
1587 	struct xe_vram_region *vram;
1588 	int bytes_left = len;
1589 	int err = 0;
1590 
1591 	xe_bo_assert_held(bo);
1592 	xe_device_assert_mem_access(xe);
1593 
1594 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1595 		return -EIO;
1596 
1597 	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1598 		struct xe_migrate *migrate =
1599 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1600 
1601 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1602 					       write);
1603 		goto out;
1604 	}
1605 
1606 	vram = res_to_mem_region(ttm_bo->resource);
1607 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1608 		     xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1609 
1610 	do {
1611 		unsigned long page_offset = (offset & ~PAGE_MASK);
1612 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1613 
1614 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1615 					  cursor.start);
1616 		if (write)
1617 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1618 		else
1619 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1620 
1621 		buf += byte_count;
1622 		offset += byte_count;
1623 		bytes_left -= byte_count;
1624 		if (bytes_left)
1625 			xe_res_next(&cursor, PAGE_SIZE);
1626 	} while (bytes_left);
1627 
1628 out:
1629 	return err ?: len;
1630 }
1631 
1632 const struct ttm_device_funcs xe_ttm_funcs = {
1633 	.ttm_tt_create = xe_ttm_tt_create,
1634 	.ttm_tt_populate = xe_ttm_tt_populate,
1635 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1636 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1637 	.evict_flags = xe_evict_flags,
1638 	.move = xe_bo_move,
1639 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1640 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1641 	.access_memory = xe_ttm_access_memory,
1642 	.release_notify = xe_ttm_bo_release_notify,
1643 	.eviction_valuable = xe_bo_eviction_valuable,
1644 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1645 	.swap_notify = xe_ttm_bo_swap_notify,
1646 };
1647 
1648 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1649 {
1650 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1651 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1652 	struct xe_tile *tile;
1653 	u8 id;
1654 
1655 	if (bo->ttm.base.import_attach)
1656 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1657 	drm_gem_object_release(&bo->ttm.base);
1658 
1659 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1660 
1661 	for_each_tile(tile, xe, id)
1662 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1663 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1664 
1665 #ifdef CONFIG_PROC_FS
1666 	if (bo->client)
1667 		xe_drm_client_remove_bo(bo);
1668 #endif
1669 
1670 	if (bo->vm && xe_bo_is_user(bo))
1671 		xe_vm_put(bo->vm);
1672 
1673 	if (bo->parent_obj)
1674 		xe_bo_put(bo->parent_obj);
1675 
1676 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1677 	if (!list_empty(&bo->vram_userfault_link))
1678 		list_del(&bo->vram_userfault_link);
1679 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1680 
1681 	kfree(bo);
1682 }
1683 
1684 static void xe_gem_object_free(struct drm_gem_object *obj)
1685 {
1686 	/* Our BO reference counting scheme works as follows:
1687 	 *
1688 	 * The gem object kref is typically used throughout the driver,
1689 	 * and the gem object holds a ttm_buffer_object refcount, so
1690 	 * that when the last gem object reference is put, which is when
1691 	 * we end up in this function, we put also that ttm_buffer_object
1692 	 * refcount. Anything using gem interfaces is then no longer
1693 	 * allowed to access the object in a way that requires a gem
1694 	 * refcount, including locking the object.
1695 	 *
1696 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1697 	 * refcount directly if needed.
1698 	 */
1699 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1700 	ttm_bo_fini(container_of(obj, struct ttm_buffer_object, base));
1701 }
1702 
1703 static void xe_gem_object_close(struct drm_gem_object *obj,
1704 				struct drm_file *file_priv)
1705 {
1706 	struct xe_bo *bo = gem_to_xe_bo(obj);
1707 
1708 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1709 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1710 
1711 		xe_bo_lock(bo, false);
1712 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1713 		xe_bo_unlock(bo);
1714 	}
1715 }
1716 
1717 static bool should_migrate_to_smem(struct xe_bo *bo)
1718 {
1719 	/*
1720 	 * NOTE: The following atomic checks are platform-specific. For example,
1721 	 * if a device supports CXL atomics, these may not be necessary or
1722 	 * may behave differently.
1723 	 */
1724 
1725 	return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1726 	       bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1727 }
1728 
1729 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1730 {
1731 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1732 	struct drm_device *ddev = tbo->base.dev;
1733 	struct xe_device *xe = to_xe_device(ddev);
1734 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1735 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1736 	vm_fault_t ret;
1737 	int idx, r = 0;
1738 
1739 	if (needs_rpm)
1740 		xe_pm_runtime_get(xe);
1741 
1742 	ret = ttm_bo_vm_reserve(tbo, vmf);
1743 	if (ret)
1744 		goto out;
1745 
1746 	if (drm_dev_enter(ddev, &idx)) {
1747 		trace_xe_bo_cpu_fault(bo);
1748 
1749 		if (should_migrate_to_smem(bo)) {
1750 			xe_assert(xe, bo->flags & XE_BO_FLAG_SYSTEM);
1751 
1752 			r = xe_bo_migrate(bo, XE_PL_TT);
1753 			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1754 				ret = VM_FAULT_NOPAGE;
1755 			else if (r)
1756 				ret = VM_FAULT_SIGBUS;
1757 		}
1758 		if (!ret)
1759 			ret = ttm_bo_vm_fault_reserved(vmf,
1760 						       vmf->vma->vm_page_prot,
1761 						       TTM_BO_VM_NUM_PREFAULT);
1762 		drm_dev_exit(idx);
1763 
1764 		if (ret == VM_FAULT_RETRY &&
1765 		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1766 			goto out;
1767 
1768 		/*
1769 		 * ttm_bo_vm_reserve() already has dma_resv_lock.
1770 		 */
1771 		if (ret == VM_FAULT_NOPAGE &&
1772 		    mem_type_is_vram(tbo->resource->mem_type)) {
1773 			mutex_lock(&xe->mem_access.vram_userfault.lock);
1774 			if (list_empty(&bo->vram_userfault_link))
1775 				list_add(&bo->vram_userfault_link,
1776 					 &xe->mem_access.vram_userfault.list);
1777 			mutex_unlock(&xe->mem_access.vram_userfault.lock);
1778 		}
1779 	} else {
1780 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1781 	}
1782 
1783 	dma_resv_unlock(tbo->base.resv);
1784 out:
1785 	if (needs_rpm)
1786 		xe_pm_runtime_put(xe);
1787 
1788 	return ret;
1789 }
1790 
1791 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1792 			   void *buf, int len, int write)
1793 {
1794 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1795 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1796 	struct xe_device *xe = xe_bo_device(bo);
1797 	int ret;
1798 
1799 	xe_pm_runtime_get(xe);
1800 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1801 	xe_pm_runtime_put(xe);
1802 
1803 	return ret;
1804 }
1805 
1806 /**
1807  * xe_bo_read() - Read from an xe_bo
1808  * @bo: The buffer object to read from.
1809  * @offset: The byte offset to start reading from.
1810  * @dst: Location to store the read.
1811  * @size: Size in bytes for the read.
1812  *
1813  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1814  *
1815  * Return: Zero on success, or negative error.
1816  */
1817 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1818 {
1819 	int ret;
1820 
1821 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1822 	if (ret >= 0 && ret != size)
1823 		ret = -EIO;
1824 	else if (ret == size)
1825 		ret = 0;
1826 
1827 	return ret;
1828 }
1829 
1830 static const struct vm_operations_struct xe_gem_vm_ops = {
1831 	.fault = xe_gem_fault,
1832 	.open = ttm_bo_vm_open,
1833 	.close = ttm_bo_vm_close,
1834 	.access = xe_bo_vm_access,
1835 };
1836 
1837 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1838 	.free = xe_gem_object_free,
1839 	.close = xe_gem_object_close,
1840 	.mmap = drm_gem_ttm_mmap,
1841 	.export = xe_gem_prime_export,
1842 	.vm_ops = &xe_gem_vm_ops,
1843 };
1844 
1845 /**
1846  * xe_bo_alloc - Allocate storage for a struct xe_bo
1847  *
1848  * This function is intended to allocate storage to be used for input
1849  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1850  * created is needed before the call to __xe_bo_create_locked().
1851  * If __xe_bo_create_locked ends up never to be called, then the
1852  * storage allocated with this function needs to be freed using
1853  * xe_bo_free().
1854  *
1855  * Return: A pointer to an uninitialized struct xe_bo on success,
1856  * ERR_PTR(-ENOMEM) on error.
1857  */
1858 struct xe_bo *xe_bo_alloc(void)
1859 {
1860 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1861 
1862 	if (!bo)
1863 		return ERR_PTR(-ENOMEM);
1864 
1865 	return bo;
1866 }
1867 
1868 /**
1869  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1870  * @bo: The buffer object storage.
1871  *
1872  * Refer to xe_bo_alloc() documentation for valid use-cases.
1873  */
1874 void xe_bo_free(struct xe_bo *bo)
1875 {
1876 	kfree(bo);
1877 }
1878 
1879 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1880 				     struct xe_tile *tile, struct dma_resv *resv,
1881 				     struct ttm_lru_bulk_move *bulk, size_t size,
1882 				     u16 cpu_caching, enum ttm_bo_type type,
1883 				     u32 flags)
1884 {
1885 	struct ttm_operation_ctx ctx = {
1886 		.interruptible = true,
1887 		.no_wait_gpu = false,
1888 		.gfp_retry_mayfail = true,
1889 	};
1890 	struct ttm_placement *placement;
1891 	uint32_t alignment;
1892 	size_t aligned_size;
1893 	int err;
1894 
1895 	/* Only kernel objects should set GT */
1896 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1897 
1898 	if (XE_WARN_ON(!size)) {
1899 		xe_bo_free(bo);
1900 		return ERR_PTR(-EINVAL);
1901 	}
1902 
1903 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1904 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1905 		return ERR_PTR(-EINVAL);
1906 
1907 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1908 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1909 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1910 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1911 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1912 
1913 		aligned_size = ALIGN(size, align);
1914 		if (type != ttm_bo_type_device)
1915 			size = ALIGN(size, align);
1916 		flags |= XE_BO_FLAG_INTERNAL_64K;
1917 		alignment = align >> PAGE_SHIFT;
1918 	} else {
1919 		aligned_size = ALIGN(size, SZ_4K);
1920 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1921 		alignment = SZ_4K >> PAGE_SHIFT;
1922 	}
1923 
1924 	if (type == ttm_bo_type_device && aligned_size != size)
1925 		return ERR_PTR(-EINVAL);
1926 
1927 	if (!bo) {
1928 		bo = xe_bo_alloc();
1929 		if (IS_ERR(bo))
1930 			return bo;
1931 	}
1932 
1933 	bo->ccs_cleared = false;
1934 	bo->tile = tile;
1935 	bo->flags = flags;
1936 	bo->cpu_caching = cpu_caching;
1937 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1938 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1939 	INIT_LIST_HEAD(&bo->pinned_link);
1940 #ifdef CONFIG_PROC_FS
1941 	INIT_LIST_HEAD(&bo->client_link);
1942 #endif
1943 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1944 
1945 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1946 
1947 	if (resv) {
1948 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1949 		ctx.resv = resv;
1950 	}
1951 
1952 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1953 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1954 		if (WARN_ON(err)) {
1955 			xe_ttm_bo_destroy(&bo->ttm);
1956 			return ERR_PTR(err);
1957 		}
1958 	}
1959 
1960 	/* Defer populating type_sg bos */
1961 	placement = (type == ttm_bo_type_sg ||
1962 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1963 		&bo->placement;
1964 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1965 				   placement, alignment,
1966 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1967 	if (err)
1968 		return ERR_PTR(err);
1969 
1970 	/*
1971 	 * The VRAM pages underneath are potentially still being accessed by the
1972 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1973 	 * sure to add any corresponding move/clear fences into the objects
1974 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1975 	 *
1976 	 * For KMD internal buffers we don't care about GPU clearing, however we
1977 	 * still need to handle async evictions, where the VRAM is still being
1978 	 * accessed by the GPU. Most internal callers are not expecting this,
1979 	 * since they are missing the required synchronisation before accessing
1980 	 * the memory. To keep things simple just sync wait any kernel fences
1981 	 * here, if the buffer is designated KMD internal.
1982 	 *
1983 	 * For normal userspace objects we should already have the required
1984 	 * pipelining or sync waiting elsewhere, since we already have to deal
1985 	 * with things like async GPU clearing.
1986 	 */
1987 	if (type == ttm_bo_type_kernel) {
1988 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1989 						     DMA_RESV_USAGE_KERNEL,
1990 						     ctx.interruptible,
1991 						     MAX_SCHEDULE_TIMEOUT);
1992 
1993 		if (timeout < 0) {
1994 			if (!resv)
1995 				dma_resv_unlock(bo->ttm.base.resv);
1996 			xe_bo_put(bo);
1997 			return ERR_PTR(timeout);
1998 		}
1999 	}
2000 
2001 	bo->created = true;
2002 	if (bulk)
2003 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
2004 	else
2005 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2006 
2007 	return bo;
2008 }
2009 
2010 static int __xe_bo_fixed_placement(struct xe_device *xe,
2011 				   struct xe_bo *bo,
2012 				   u32 flags,
2013 				   u64 start, u64 end, u64 size)
2014 {
2015 	struct ttm_place *place = bo->placements;
2016 
2017 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2018 		return -EINVAL;
2019 
2020 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
2021 	place->fpfn = start >> PAGE_SHIFT;
2022 	place->lpfn = end >> PAGE_SHIFT;
2023 
2024 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
2025 	case XE_BO_FLAG_VRAM0:
2026 		place->mem_type = XE_PL_VRAM0;
2027 		break;
2028 	case XE_BO_FLAG_VRAM1:
2029 		place->mem_type = XE_PL_VRAM1;
2030 		break;
2031 	case XE_BO_FLAG_STOLEN:
2032 		place->mem_type = XE_PL_STOLEN;
2033 		break;
2034 
2035 	default:
2036 		/* 0 or multiple of the above set */
2037 		return -EINVAL;
2038 	}
2039 
2040 	bo->placement = (struct ttm_placement) {
2041 		.num_placement = 1,
2042 		.placement = place,
2043 	};
2044 
2045 	return 0;
2046 }
2047 
2048 static struct xe_bo *
2049 __xe_bo_create_locked(struct xe_device *xe,
2050 		      struct xe_tile *tile, struct xe_vm *vm,
2051 		      size_t size, u64 start, u64 end,
2052 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2053 		      u64 alignment)
2054 {
2055 	struct xe_bo *bo = NULL;
2056 	int err;
2057 
2058 	if (vm)
2059 		xe_vm_assert_held(vm);
2060 
2061 	if (start || end != ~0ULL) {
2062 		bo = xe_bo_alloc();
2063 		if (IS_ERR(bo))
2064 			return bo;
2065 
2066 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2067 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
2068 		if (err) {
2069 			xe_bo_free(bo);
2070 			return ERR_PTR(err);
2071 		}
2072 	}
2073 
2074 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2075 				    vm && !xe_vm_in_fault_mode(vm) &&
2076 				    flags & XE_BO_FLAG_USER ?
2077 				    &vm->lru_bulk_move : NULL, size,
2078 				    cpu_caching, type, flags);
2079 	if (IS_ERR(bo))
2080 		return bo;
2081 
2082 	bo->min_align = alignment;
2083 
2084 	/*
2085 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2086 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2087 	 * will keep a reference to the vm, and avoid circular references
2088 	 * by having all the vm's bo refereferences released at vm close
2089 	 * time.
2090 	 */
2091 	if (vm && xe_bo_is_user(bo))
2092 		xe_vm_get(vm);
2093 	bo->vm = vm;
2094 
2095 	if (bo->flags & XE_BO_FLAG_GGTT) {
2096 		struct xe_tile *t;
2097 		u8 id;
2098 
2099 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2100 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2101 				tile = xe_device_get_root_tile(xe);
2102 
2103 			xe_assert(xe, tile);
2104 		}
2105 
2106 		for_each_tile(t, xe, id) {
2107 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2108 				continue;
2109 
2110 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2111 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2112 							   start + xe_bo_size(bo), U64_MAX);
2113 			} else {
2114 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
2115 			}
2116 			if (err)
2117 				goto err_unlock_put_bo;
2118 		}
2119 	}
2120 
2121 	trace_xe_bo_create(bo);
2122 	return bo;
2123 
2124 err_unlock_put_bo:
2125 	__xe_bo_unset_bulk_move(bo);
2126 	xe_bo_unlock_vm_held(bo);
2127 	xe_bo_put(bo);
2128 	return ERR_PTR(err);
2129 }
2130 
2131 struct xe_bo *
2132 xe_bo_create_locked_range(struct xe_device *xe,
2133 			  struct xe_tile *tile, struct xe_vm *vm,
2134 			  size_t size, u64 start, u64 end,
2135 			  enum ttm_bo_type type, u32 flags, u64 alignment)
2136 {
2137 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2138 				     flags, alignment);
2139 }
2140 
2141 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2142 				  struct xe_vm *vm, size_t size,
2143 				  enum ttm_bo_type type, u32 flags)
2144 {
2145 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2146 				     flags, 0);
2147 }
2148 
2149 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
2150 				struct xe_vm *vm, size_t size,
2151 				u16 cpu_caching,
2152 				u32 flags)
2153 {
2154 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
2155 						 cpu_caching, ttm_bo_type_device,
2156 						 flags | XE_BO_FLAG_USER, 0);
2157 	if (!IS_ERR(bo))
2158 		xe_bo_unlock_vm_held(bo);
2159 
2160 	return bo;
2161 }
2162 
2163 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
2164 			   struct xe_vm *vm, size_t size,
2165 			   enum ttm_bo_type type, u32 flags)
2166 {
2167 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
2168 
2169 	if (!IS_ERR(bo))
2170 		xe_bo_unlock_vm_held(bo);
2171 
2172 	return bo;
2173 }
2174 
2175 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
2176 				      struct xe_vm *vm,
2177 				      size_t size, u64 offset,
2178 				      enum ttm_bo_type type, u32 flags)
2179 {
2180 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
2181 					       type, flags, 0);
2182 }
2183 
2184 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2185 					      struct xe_tile *tile,
2186 					      struct xe_vm *vm,
2187 					      size_t size, u64 offset,
2188 					      enum ttm_bo_type type, u32 flags,
2189 					      u64 alignment)
2190 {
2191 	struct xe_bo *bo;
2192 	int err;
2193 	u64 start = offset == ~0ull ? 0 : offset;
2194 	u64 end = offset == ~0ull ? offset : start + size;
2195 
2196 	if (flags & XE_BO_FLAG_STOLEN &&
2197 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2198 		flags |= XE_BO_FLAG_GGTT;
2199 
2200 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
2201 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2202 				       alignment);
2203 	if (IS_ERR(bo))
2204 		return bo;
2205 
2206 	err = xe_bo_pin(bo);
2207 	if (err)
2208 		goto err_put;
2209 
2210 	err = xe_bo_vmap(bo);
2211 	if (err)
2212 		goto err_unpin;
2213 
2214 	xe_bo_unlock_vm_held(bo);
2215 
2216 	return bo;
2217 
2218 err_unpin:
2219 	xe_bo_unpin(bo);
2220 err_put:
2221 	xe_bo_unlock_vm_held(bo);
2222 	xe_bo_put(bo);
2223 	return ERR_PTR(err);
2224 }
2225 
2226 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2227 				   struct xe_vm *vm, size_t size,
2228 				   enum ttm_bo_type type, u32 flags)
2229 {
2230 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
2231 }
2232 
2233 static void __xe_bo_unpin_map_no_vm(void *arg)
2234 {
2235 	xe_bo_unpin_map_no_vm(arg);
2236 }
2237 
2238 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2239 					   size_t size, u32 flags)
2240 {
2241 	struct xe_bo *bo;
2242 	int ret;
2243 
2244 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2245 
2246 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
2247 	if (IS_ERR(bo))
2248 		return bo;
2249 
2250 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2251 	if (ret)
2252 		return ERR_PTR(ret);
2253 
2254 	return bo;
2255 }
2256 
2257 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2258 					     const void *data, size_t size, u32 flags)
2259 {
2260 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2261 
2262 	if (IS_ERR(bo))
2263 		return bo;
2264 
2265 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2266 
2267 	return bo;
2268 }
2269 
2270 /**
2271  * xe_managed_bo_reinit_in_vram
2272  * @xe: xe device
2273  * @tile: Tile where the new buffer will be created
2274  * @src: Managed buffer object allocated in system memory
2275  *
2276  * Replace a managed src buffer object allocated in system memory with a new
2277  * one allocated in vram, copying the data between them.
2278  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2279  * is responsible for making sure that any old references to it are updated.
2280  *
2281  * Returns 0 for success, negative error code otherwise.
2282  */
2283 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2284 {
2285 	struct xe_bo *bo;
2286 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2287 
2288 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2289 				      XE_BO_FLAG_PINNED_NORESTORE);
2290 
2291 	xe_assert(xe, IS_DGFX(xe));
2292 	xe_assert(xe, !(*src)->vmap.is_iomem);
2293 
2294 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2295 					    xe_bo_size(*src), dst_flags);
2296 	if (IS_ERR(bo))
2297 		return PTR_ERR(bo);
2298 
2299 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2300 	*src = bo;
2301 
2302 	return 0;
2303 }
2304 
2305 /*
2306  * XXX: This is in the VM bind data path, likely should calculate this once and
2307  * store, with a recalculation if the BO is moved.
2308  */
2309 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2310 {
2311 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2312 
2313 	switch (res->mem_type) {
2314 	case XE_PL_STOLEN:
2315 		return xe_ttm_stolen_gpu_offset(xe);
2316 	case XE_PL_TT:
2317 	case XE_PL_SYSTEM:
2318 		return 0;
2319 	default:
2320 		return res_to_mem_region(res)->dpa_base;
2321 	}
2322 	return 0;
2323 }
2324 
2325 /**
2326  * xe_bo_pin_external - pin an external BO
2327  * @bo: buffer object to be pinned
2328  * @in_place: Pin in current placement, don't attempt to migrate.
2329  *
2330  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2331  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2332  * asserts and code to ensure evict / restore on suspend / resume.
2333  *
2334  * Returns 0 for success, negative error code otherwise.
2335  */
2336 int xe_bo_pin_external(struct xe_bo *bo, bool in_place)
2337 {
2338 	struct xe_device *xe = xe_bo_device(bo);
2339 	int err;
2340 
2341 	xe_assert(xe, !bo->vm);
2342 	xe_assert(xe, xe_bo_is_user(bo));
2343 
2344 	if (!xe_bo_is_pinned(bo)) {
2345 		if (!in_place) {
2346 			err = xe_bo_validate(bo, NULL, false);
2347 			if (err)
2348 				return err;
2349 		}
2350 
2351 		spin_lock(&xe->pinned.lock);
2352 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2353 		spin_unlock(&xe->pinned.lock);
2354 	}
2355 
2356 	ttm_bo_pin(&bo->ttm);
2357 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2358 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2359 
2360 	/*
2361 	 * FIXME: If we always use the reserve / unreserve functions for locking
2362 	 * we do not need this.
2363 	 */
2364 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2365 
2366 	return 0;
2367 }
2368 
2369 int xe_bo_pin(struct xe_bo *bo)
2370 {
2371 	struct ttm_place *place = &bo->placements[0];
2372 	struct xe_device *xe = xe_bo_device(bo);
2373 	int err;
2374 
2375 	/* We currently don't expect user BO to be pinned */
2376 	xe_assert(xe, !xe_bo_is_user(bo));
2377 
2378 	/* Pinned object must be in GGTT or have pinned flag */
2379 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2380 				   XE_BO_FLAG_GGTT));
2381 
2382 	/*
2383 	 * No reason we can't support pinning imported dma-bufs we just don't
2384 	 * expect to pin an imported dma-buf.
2385 	 */
2386 	xe_assert(xe, !bo->ttm.base.import_attach);
2387 
2388 	/* We only expect at most 1 pin */
2389 	xe_assert(xe, !xe_bo_is_pinned(bo));
2390 
2391 	err = xe_bo_validate(bo, NULL, false);
2392 	if (err)
2393 		return err;
2394 
2395 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2396 		spin_lock(&xe->pinned.lock);
2397 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
2398 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
2399 		else
2400 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
2401 		spin_unlock(&xe->pinned.lock);
2402 	}
2403 
2404 	ttm_bo_pin(&bo->ttm);
2405 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2406 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2407 
2408 	/*
2409 	 * FIXME: If we always use the reserve / unreserve functions for locking
2410 	 * we do not need this.
2411 	 */
2412 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2413 
2414 	return 0;
2415 }
2416 
2417 /**
2418  * xe_bo_unpin_external - unpin an external BO
2419  * @bo: buffer object to be unpinned
2420  *
2421  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2422  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2423  * asserts and code to ensure evict / restore on suspend / resume.
2424  *
2425  * Returns 0 for success, negative error code otherwise.
2426  */
2427 void xe_bo_unpin_external(struct xe_bo *bo)
2428 {
2429 	struct xe_device *xe = xe_bo_device(bo);
2430 
2431 	xe_assert(xe, !bo->vm);
2432 	xe_assert(xe, xe_bo_is_pinned(bo));
2433 	xe_assert(xe, xe_bo_is_user(bo));
2434 
2435 	spin_lock(&xe->pinned.lock);
2436 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2437 		list_del_init(&bo->pinned_link);
2438 	spin_unlock(&xe->pinned.lock);
2439 
2440 	ttm_bo_unpin(&bo->ttm);
2441 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2442 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2443 
2444 	/*
2445 	 * FIXME: If we always use the reserve / unreserve functions for locking
2446 	 * we do not need this.
2447 	 */
2448 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2449 }
2450 
2451 void xe_bo_unpin(struct xe_bo *bo)
2452 {
2453 	struct ttm_place *place = &bo->placements[0];
2454 	struct xe_device *xe = xe_bo_device(bo);
2455 
2456 	xe_assert(xe, !bo->ttm.base.import_attach);
2457 	xe_assert(xe, xe_bo_is_pinned(bo));
2458 
2459 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2460 		spin_lock(&xe->pinned.lock);
2461 		xe_assert(xe, !list_empty(&bo->pinned_link));
2462 		list_del_init(&bo->pinned_link);
2463 		spin_unlock(&xe->pinned.lock);
2464 
2465 		if (bo->backup_obj) {
2466 			if (xe_bo_is_pinned(bo->backup_obj))
2467 				ttm_bo_unpin(&bo->backup_obj->ttm);
2468 			xe_bo_put(bo->backup_obj);
2469 			bo->backup_obj = NULL;
2470 		}
2471 	}
2472 	ttm_bo_unpin(&bo->ttm);
2473 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2474 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2475 }
2476 
2477 /**
2478  * xe_bo_validate() - Make sure the bo is in an allowed placement
2479  * @bo: The bo,
2480  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2481  *      NULL. Used together with @allow_res_evict.
2482  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2483  *                   reservation object.
2484  *
2485  * Make sure the bo is in allowed placement, migrating it if necessary. If
2486  * needed, other bos will be evicted. If bos selected for eviction shares
2487  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2488  * set to true, otherwise they will be bypassed.
2489  *
2490  * Return: 0 on success, negative error code on failure. May return
2491  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2492  */
2493 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2494 {
2495 	struct ttm_operation_ctx ctx = {
2496 		.interruptible = true,
2497 		.no_wait_gpu = false,
2498 		.gfp_retry_mayfail = true,
2499 	};
2500 	int ret;
2501 
2502 	if (xe_bo_is_pinned(bo))
2503 		return 0;
2504 
2505 	if (vm) {
2506 		lockdep_assert_held(&vm->lock);
2507 		xe_vm_assert_held(vm);
2508 
2509 		ctx.allow_res_evict = allow_res_evict;
2510 		ctx.resv = xe_vm_resv(vm);
2511 	}
2512 
2513 	xe_vm_set_validating(vm, allow_res_evict);
2514 	trace_xe_bo_validate(bo);
2515 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2516 	xe_vm_clear_validating(vm, allow_res_evict);
2517 
2518 	return ret;
2519 }
2520 
2521 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2522 {
2523 	if (bo->destroy == &xe_ttm_bo_destroy)
2524 		return true;
2525 
2526 	return false;
2527 }
2528 
2529 /*
2530  * Resolve a BO address. There is no assert to check if the proper lock is held
2531  * so it should only be used in cases where it is not fatal to get the wrong
2532  * address, such as printing debug information, but not in cases where memory is
2533  * written based on this result.
2534  */
2535 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2536 {
2537 	struct xe_device *xe = xe_bo_device(bo);
2538 	struct xe_res_cursor cur;
2539 	u64 page;
2540 
2541 	xe_assert(xe, page_size <= PAGE_SIZE);
2542 	page = offset >> PAGE_SHIFT;
2543 	offset &= (PAGE_SIZE - 1);
2544 
2545 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2546 		xe_assert(xe, bo->ttm.ttm);
2547 
2548 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2549 				page_size, &cur);
2550 		return xe_res_dma(&cur) + offset;
2551 	} else {
2552 		struct xe_res_cursor cur;
2553 
2554 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2555 			     page_size, &cur);
2556 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2557 	}
2558 }
2559 
2560 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2561 {
2562 	if (!READ_ONCE(bo->ttm.pin_count))
2563 		xe_bo_assert_held(bo);
2564 	return __xe_bo_addr(bo, offset, page_size);
2565 }
2566 
2567 int xe_bo_vmap(struct xe_bo *bo)
2568 {
2569 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2570 	void *virtual;
2571 	bool is_iomem;
2572 	int ret;
2573 
2574 	xe_bo_assert_held(bo);
2575 
2576 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2577 			!force_contiguous(bo->flags)))
2578 		return -EINVAL;
2579 
2580 	if (!iosys_map_is_null(&bo->vmap))
2581 		return 0;
2582 
2583 	/*
2584 	 * We use this more or less deprecated interface for now since
2585 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2586 	 * single page bos, which is done here.
2587 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2588 	 * to use struct iosys_map.
2589 	 */
2590 	ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
2591 	if (ret)
2592 		return ret;
2593 
2594 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2595 	if (is_iomem)
2596 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2597 	else
2598 		iosys_map_set_vaddr(&bo->vmap, virtual);
2599 
2600 	return 0;
2601 }
2602 
2603 static void __xe_bo_vunmap(struct xe_bo *bo)
2604 {
2605 	if (!iosys_map_is_null(&bo->vmap)) {
2606 		iosys_map_clear(&bo->vmap);
2607 		ttm_bo_kunmap(&bo->kmap);
2608 	}
2609 }
2610 
2611 void xe_bo_vunmap(struct xe_bo *bo)
2612 {
2613 	xe_bo_assert_held(bo);
2614 	__xe_bo_vunmap(bo);
2615 }
2616 
2617 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
2618 {
2619 	if (value == DRM_XE_PXP_TYPE_NONE)
2620 		return 0;
2621 
2622 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
2623 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
2624 		return -EINVAL;
2625 
2626 	return xe_pxp_key_assign(xe->pxp, bo);
2627 }
2628 
2629 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
2630 					     struct xe_bo *bo,
2631 					     u64 value);
2632 
2633 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
2634 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
2635 };
2636 
2637 static int gem_create_user_ext_set_property(struct xe_device *xe,
2638 					    struct xe_bo *bo,
2639 					    u64 extension)
2640 {
2641 	u64 __user *address = u64_to_user_ptr(extension);
2642 	struct drm_xe_ext_set_property ext;
2643 	int err;
2644 	u32 idx;
2645 
2646 	err = copy_from_user(&ext, address, sizeof(ext));
2647 	if (XE_IOCTL_DBG(xe, err))
2648 		return -EFAULT;
2649 
2650 	if (XE_IOCTL_DBG(xe, ext.property >=
2651 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
2652 	    XE_IOCTL_DBG(xe, ext.pad) ||
2653 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
2654 		return -EINVAL;
2655 
2656 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
2657 	if (!gem_create_set_property_funcs[idx])
2658 		return -EINVAL;
2659 
2660 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
2661 }
2662 
2663 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
2664 					       struct xe_bo *bo,
2665 					       u64 extension);
2666 
2667 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
2668 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
2669 };
2670 
2671 #define MAX_USER_EXTENSIONS	16
2672 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
2673 				      u64 extensions, int ext_number)
2674 {
2675 	u64 __user *address = u64_to_user_ptr(extensions);
2676 	struct drm_xe_user_extension ext;
2677 	int err;
2678 	u32 idx;
2679 
2680 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
2681 		return -E2BIG;
2682 
2683 	err = copy_from_user(&ext, address, sizeof(ext));
2684 	if (XE_IOCTL_DBG(xe, err))
2685 		return -EFAULT;
2686 
2687 	if (XE_IOCTL_DBG(xe, ext.pad) ||
2688 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
2689 		return -EINVAL;
2690 
2691 	idx = array_index_nospec(ext.name,
2692 				 ARRAY_SIZE(gem_create_user_extension_funcs));
2693 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
2694 	if (XE_IOCTL_DBG(xe, err))
2695 		return err;
2696 
2697 	if (ext.next_extension)
2698 		return gem_create_user_extensions(xe, bo, ext.next_extension,
2699 						  ++ext_number);
2700 
2701 	return 0;
2702 }
2703 
2704 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2705 			struct drm_file *file)
2706 {
2707 	struct xe_device *xe = to_xe_device(dev);
2708 	struct xe_file *xef = to_xe_file(file);
2709 	struct drm_xe_gem_create *args = data;
2710 	struct xe_vm *vm = NULL;
2711 	ktime_t end = 0;
2712 	struct xe_bo *bo;
2713 	unsigned int bo_flags;
2714 	u32 handle;
2715 	int err;
2716 
2717 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2718 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2719 		return -EINVAL;
2720 
2721 	/* at least one valid memory placement must be specified */
2722 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2723 			 !args->placement))
2724 		return -EINVAL;
2725 
2726 	if (XE_IOCTL_DBG(xe, args->flags &
2727 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2728 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2729 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2730 		return -EINVAL;
2731 
2732 	if (XE_IOCTL_DBG(xe, args->handle))
2733 		return -EINVAL;
2734 
2735 	if (XE_IOCTL_DBG(xe, !args->size))
2736 		return -EINVAL;
2737 
2738 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2739 		return -EINVAL;
2740 
2741 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2742 		return -EINVAL;
2743 
2744 	bo_flags = 0;
2745 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2746 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2747 
2748 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2749 		bo_flags |= XE_BO_FLAG_SCANOUT;
2750 
2751 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2752 
2753 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2754 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2755 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2756 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2757 	    IS_ALIGNED(args->size, SZ_64K))
2758 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2759 
2760 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2761 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2762 			return -EINVAL;
2763 
2764 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2765 	}
2766 
2767 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2768 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2769 		return -EINVAL;
2770 
2771 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2772 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2773 		return -EINVAL;
2774 
2775 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2776 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2777 		return -EINVAL;
2778 
2779 	if (args->vm_id) {
2780 		vm = xe_vm_lookup(xef, args->vm_id);
2781 		if (XE_IOCTL_DBG(xe, !vm))
2782 			return -ENOENT;
2783 	}
2784 
2785 retry:
2786 	if (vm) {
2787 		err = xe_vm_lock(vm, true);
2788 		if (err)
2789 			goto out_vm;
2790 	}
2791 
2792 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2793 			       bo_flags);
2794 
2795 	if (vm)
2796 		xe_vm_unlock(vm);
2797 
2798 	if (IS_ERR(bo)) {
2799 		err = PTR_ERR(bo);
2800 		if (xe_vm_validate_should_retry(NULL, err, &end))
2801 			goto retry;
2802 		goto out_vm;
2803 	}
2804 
2805 	if (args->extensions) {
2806 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
2807 		if (err)
2808 			goto out_bulk;
2809 	}
2810 
2811 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2812 	if (err)
2813 		goto out_bulk;
2814 
2815 	args->handle = handle;
2816 	goto out_put;
2817 
2818 out_bulk:
2819 	if (vm && !xe_vm_in_fault_mode(vm)) {
2820 		xe_vm_lock(vm, false);
2821 		__xe_bo_unset_bulk_move(bo);
2822 		xe_vm_unlock(vm);
2823 	}
2824 out_put:
2825 	xe_bo_put(bo);
2826 out_vm:
2827 	if (vm)
2828 		xe_vm_put(vm);
2829 
2830 	return err;
2831 }
2832 
2833 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2834 			     struct drm_file *file)
2835 {
2836 	struct xe_device *xe = to_xe_device(dev);
2837 	struct drm_xe_gem_mmap_offset *args = data;
2838 	struct drm_gem_object *gem_obj;
2839 
2840 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2841 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2842 		return -EINVAL;
2843 
2844 	if (XE_IOCTL_DBG(xe, args->flags &
2845 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
2846 		return -EINVAL;
2847 
2848 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2849 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2850 			return -EINVAL;
2851 
2852 		if (XE_IOCTL_DBG(xe, args->handle))
2853 			return -EINVAL;
2854 
2855 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2856 			return -EINVAL;
2857 
2858 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2859 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2860 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2861 		return 0;
2862 	}
2863 
2864 	gem_obj = drm_gem_object_lookup(file, args->handle);
2865 	if (XE_IOCTL_DBG(xe, !gem_obj))
2866 		return -ENOENT;
2867 
2868 	/* The mmap offset was set up at BO allocation time. */
2869 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2870 
2871 	xe_bo_put(gem_to_xe_bo(gem_obj));
2872 	return 0;
2873 }
2874 
2875 /**
2876  * xe_bo_lock() - Lock the buffer object's dma_resv object
2877  * @bo: The struct xe_bo whose lock is to be taken
2878  * @intr: Whether to perform any wait interruptible
2879  *
2880  * Locks the buffer object's dma_resv object. If the buffer object is
2881  * pointing to a shared dma_resv object, that shared lock is locked.
2882  *
2883  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2884  * contended lock was interrupted. If @intr is set to false, the
2885  * function always returns 0.
2886  */
2887 int xe_bo_lock(struct xe_bo *bo, bool intr)
2888 {
2889 	if (intr)
2890 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2891 
2892 	dma_resv_lock(bo->ttm.base.resv, NULL);
2893 
2894 	return 0;
2895 }
2896 
2897 /**
2898  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2899  * @bo: The struct xe_bo whose lock is to be released.
2900  *
2901  * Unlock a buffer object lock that was locked by xe_bo_lock().
2902  */
2903 void xe_bo_unlock(struct xe_bo *bo)
2904 {
2905 	dma_resv_unlock(bo->ttm.base.resv);
2906 }
2907 
2908 /**
2909  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2910  * @bo: The buffer object to migrate
2911  * @mem_type: The TTM memory type intended to migrate to
2912  *
2913  * Check whether the buffer object supports migration to the
2914  * given memory type. Note that pinning may affect the ability to migrate as
2915  * returned by this function.
2916  *
2917  * This function is primarily intended as a helper for checking the
2918  * possibility to migrate buffer objects and can be called without
2919  * the object lock held.
2920  *
2921  * Return: true if migration is possible, false otherwise.
2922  */
2923 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2924 {
2925 	unsigned int cur_place;
2926 
2927 	if (bo->ttm.type == ttm_bo_type_kernel)
2928 		return true;
2929 
2930 	if (bo->ttm.type == ttm_bo_type_sg)
2931 		return false;
2932 
2933 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2934 	     cur_place++) {
2935 		if (bo->placements[cur_place].mem_type == mem_type)
2936 			return true;
2937 	}
2938 
2939 	return false;
2940 }
2941 
2942 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2943 {
2944 	memset(place, 0, sizeof(*place));
2945 	place->mem_type = mem_type;
2946 }
2947 
2948 /**
2949  * xe_bo_migrate - Migrate an object to the desired region id
2950  * @bo: The buffer object to migrate.
2951  * @mem_type: The TTM region type to migrate to.
2952  *
2953  * Attempt to migrate the buffer object to the desired memory region. The
2954  * buffer object may not be pinned, and must be locked.
2955  * On successful completion, the object memory type will be updated,
2956  * but an async migration task may not have completed yet, and to
2957  * accomplish that, the object's kernel fences must be signaled with
2958  * the object lock held.
2959  *
2960  * Return: 0 on success. Negative error code on failure. In particular may
2961  * return -EINTR or -ERESTARTSYS if signal pending.
2962  */
2963 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2964 {
2965 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2966 	struct ttm_operation_ctx ctx = {
2967 		.interruptible = true,
2968 		.no_wait_gpu = false,
2969 		.gfp_retry_mayfail = true,
2970 	};
2971 	struct ttm_placement placement;
2972 	struct ttm_place requested;
2973 
2974 	xe_bo_assert_held(bo);
2975 
2976 	if (bo->ttm.resource->mem_type == mem_type)
2977 		return 0;
2978 
2979 	if (xe_bo_is_pinned(bo))
2980 		return -EBUSY;
2981 
2982 	if (!xe_bo_can_migrate(bo, mem_type))
2983 		return -EINVAL;
2984 
2985 	xe_place_from_ttm_type(mem_type, &requested);
2986 	placement.num_placement = 1;
2987 	placement.placement = &requested;
2988 
2989 	/*
2990 	 * Stolen needs to be handled like below VRAM handling if we ever need
2991 	 * to support it.
2992 	 */
2993 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2994 
2995 	if (mem_type_is_vram(mem_type)) {
2996 		u32 c = 0;
2997 
2998 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2999 	}
3000 
3001 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
3002 }
3003 
3004 /**
3005  * xe_bo_evict - Evict an object to evict placement
3006  * @bo: The buffer object to migrate.
3007  *
3008  * On successful completion, the object memory will be moved to evict
3009  * placement. This function blocks until the object has been fully moved.
3010  *
3011  * Return: 0 on success. Negative error code on failure.
3012  */
3013 int xe_bo_evict(struct xe_bo *bo)
3014 {
3015 	struct ttm_operation_ctx ctx = {
3016 		.interruptible = false,
3017 		.no_wait_gpu = false,
3018 		.gfp_retry_mayfail = true,
3019 	};
3020 	struct ttm_placement placement;
3021 	int ret;
3022 
3023 	xe_evict_flags(&bo->ttm, &placement);
3024 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3025 	if (ret)
3026 		return ret;
3027 
3028 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3029 			      false, MAX_SCHEDULE_TIMEOUT);
3030 
3031 	return 0;
3032 }
3033 
3034 /**
3035  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3036  * placed in system memory.
3037  * @bo: The xe_bo
3038  *
3039  * Return: true if extra pages need to be allocated, false otherwise.
3040  */
3041 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3042 {
3043 	struct xe_device *xe = xe_bo_device(bo);
3044 
3045 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3046 		return false;
3047 
3048 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3049 		return false;
3050 
3051 	/* On discrete GPUs, if the GPU can access this buffer from
3052 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3053 	 * can't be used since there's no CCS storage associated with
3054 	 * non-VRAM addresses.
3055 	 */
3056 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3057 		return false;
3058 
3059 	/*
3060 	 * Compression implies coh_none, therefore we know for sure that WB
3061 	 * memory can't currently use compression, which is likely one of the
3062 	 * common cases.
3063 	 */
3064 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
3065 		return false;
3066 
3067 	return true;
3068 }
3069 
3070 /**
3071  * __xe_bo_release_dummy() - Dummy kref release function
3072  * @kref: The embedded struct kref.
3073  *
3074  * Dummy release function for xe_bo_put_deferred(). Keep off.
3075  */
3076 void __xe_bo_release_dummy(struct kref *kref)
3077 {
3078 }
3079 
3080 /**
3081  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3082  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3083  *
3084  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3085  * The @deferred list can be either an onstack local list or a global
3086  * shared list used by a workqueue.
3087  */
3088 void xe_bo_put_commit(struct llist_head *deferred)
3089 {
3090 	struct llist_node *freed;
3091 	struct xe_bo *bo, *next;
3092 
3093 	if (!deferred)
3094 		return;
3095 
3096 	freed = llist_del_all(deferred);
3097 	if (!freed)
3098 		return;
3099 
3100 	llist_for_each_entry_safe(bo, next, freed, freed)
3101 		drm_gem_object_free(&bo->ttm.base.refcount);
3102 }
3103 
3104 static void xe_bo_dev_work_func(struct work_struct *work)
3105 {
3106 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3107 
3108 	xe_bo_put_commit(&bo_dev->async_list);
3109 }
3110 
3111 /**
3112  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3113  * @bo_dev: The BO dev structure
3114  */
3115 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3116 {
3117 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3118 }
3119 
3120 /**
3121  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3122  * @bo_dev: The BO dev structure
3123  */
3124 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3125 {
3126 	flush_work(&bo_dev->async_free);
3127 }
3128 
3129 void xe_bo_put(struct xe_bo *bo)
3130 {
3131 	struct xe_tile *tile;
3132 	u8 id;
3133 
3134 	might_sleep();
3135 	if (bo) {
3136 #ifdef CONFIG_PROC_FS
3137 		if (bo->client)
3138 			might_lock(&bo->client->bos_lock);
3139 #endif
3140 		for_each_tile(tile, xe_bo_device(bo), id)
3141 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
3142 				xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
3143 		drm_gem_object_put(&bo->ttm.base);
3144 	}
3145 }
3146 
3147 /**
3148  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3149  * @file_priv: ...
3150  * @dev: ...
3151  * @args: ...
3152  *
3153  * See dumb_create() hook in include/drm/drm_drv.h
3154  *
3155  * Return: ...
3156  */
3157 int xe_bo_dumb_create(struct drm_file *file_priv,
3158 		      struct drm_device *dev,
3159 		      struct drm_mode_create_dumb *args)
3160 {
3161 	struct xe_device *xe = to_xe_device(dev);
3162 	struct xe_bo *bo;
3163 	uint32_t handle;
3164 	int err;
3165 	u32 page_size = max_t(u32, PAGE_SIZE,
3166 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3167 
3168 	err = drm_mode_size_dumb(dev, args, SZ_64, page_size);
3169 	if (err)
3170 		return err;
3171 
3172 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
3173 			       DRM_XE_GEM_CPU_CACHING_WC,
3174 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3175 			       XE_BO_FLAG_SCANOUT |
3176 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
3177 	if (IS_ERR(bo))
3178 		return PTR_ERR(bo);
3179 
3180 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3181 	/* drop reference from allocate - handle holds it now */
3182 	drm_gem_object_put(&bo->ttm.base);
3183 	if (!err)
3184 		args->handle = handle;
3185 	return err;
3186 }
3187 
3188 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3189 {
3190 	struct ttm_buffer_object *tbo = &bo->ttm;
3191 	struct ttm_device *bdev = tbo->bdev;
3192 
3193 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3194 
3195 	list_del_init(&bo->vram_userfault_link);
3196 }
3197 
3198 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3199 #include "tests/xe_bo.c"
3200 #endif
3201