xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision f86ad0ed620cb3c91ec7d5468e93ac68d727539d)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_gem_ttm_helper.h>
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_backup.h>
15 #include <drm/ttm/ttm_device.h>
16 #include <drm/ttm/ttm_placement.h>
17 #include <drm/ttm/ttm_tt.h>
18 #include <uapi/drm/xe_drm.h>
19 
20 #include <kunit/static_stub.h>
21 
22 #include "xe_device.h"
23 #include "xe_dma_buf.h"
24 #include "xe_drm_client.h"
25 #include "xe_ggtt.h"
26 #include "xe_gt.h"
27 #include "xe_map.h"
28 #include "xe_migrate.h"
29 #include "xe_pm.h"
30 #include "xe_preempt_fence.h"
31 #include "xe_pxp.h"
32 #include "xe_res_cursor.h"
33 #include "xe_shrinker.h"
34 #include "xe_trace_bo.h"
35 #include "xe_ttm_stolen_mgr.h"
36 #include "xe_vm.h"
37 
38 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
39 	[XE_PL_SYSTEM] = "system",
40 	[XE_PL_TT] = "gtt",
41 	[XE_PL_VRAM0] = "vram0",
42 	[XE_PL_VRAM1] = "vram1",
43 	[XE_PL_STOLEN] = "stolen"
44 };
45 
46 static const struct ttm_place sys_placement_flags = {
47 	.fpfn = 0,
48 	.lpfn = 0,
49 	.mem_type = XE_PL_SYSTEM,
50 	.flags = 0,
51 };
52 
53 static struct ttm_placement sys_placement = {
54 	.num_placement = 1,
55 	.placement = &sys_placement_flags,
56 };
57 
58 static struct ttm_placement purge_placement;
59 
60 static const struct ttm_place tt_placement_flags[] = {
61 	{
62 		.fpfn = 0,
63 		.lpfn = 0,
64 		.mem_type = XE_PL_TT,
65 		.flags = TTM_PL_FLAG_DESIRED,
66 	},
67 	{
68 		.fpfn = 0,
69 		.lpfn = 0,
70 		.mem_type = XE_PL_SYSTEM,
71 		.flags = TTM_PL_FLAG_FALLBACK,
72 	}
73 };
74 
75 static struct ttm_placement tt_placement = {
76 	.num_placement = 2,
77 	.placement = tt_placement_flags,
78 };
79 
80 bool mem_type_is_vram(u32 mem_type)
81 {
82 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
83 }
84 
85 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
86 {
87 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
88 }
89 
90 static bool resource_is_vram(struct ttm_resource *res)
91 {
92 	return mem_type_is_vram(res->mem_type);
93 }
94 
95 bool xe_bo_is_vram(struct xe_bo *bo)
96 {
97 	return resource_is_vram(bo->ttm.resource) ||
98 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
99 }
100 
101 bool xe_bo_is_stolen(struct xe_bo *bo)
102 {
103 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
104 }
105 
106 /**
107  * xe_bo_has_single_placement - check if BO is placed only in one memory location
108  * @bo: The BO
109  *
110  * This function checks whether a given BO is placed in only one memory location.
111  *
112  * Returns: true if the BO is placed in a single memory location, false otherwise.
113  *
114  */
115 bool xe_bo_has_single_placement(struct xe_bo *bo)
116 {
117 	return bo->placement.num_placement == 1;
118 }
119 
120 /**
121  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
122  * @bo: The BO
123  *
124  * The stolen memory is accessed through the PCI BAR for both DGFX and some
125  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
126  *
127  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
128  */
129 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
130 {
131 	return xe_bo_is_stolen(bo) &&
132 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
133 }
134 
135 /**
136  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
137  * @bo: The BO
138  *
139  * Check if a given bo is bound through VM_BIND. This requires the
140  * reservation lock for the BO to be held.
141  *
142  * Returns: boolean
143  */
144 bool xe_bo_is_vm_bound(struct xe_bo *bo)
145 {
146 	xe_bo_assert_held(bo);
147 
148 	return !list_empty(&bo->ttm.base.gpuva.list);
149 }
150 
151 static bool xe_bo_is_user(struct xe_bo *bo)
152 {
153 	return bo->flags & XE_BO_FLAG_USER;
154 }
155 
156 static struct xe_migrate *
157 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
158 {
159 	struct xe_tile *tile;
160 
161 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
162 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
163 	return tile->migrate;
164 }
165 
166 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
167 {
168 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
169 	struct ttm_resource_manager *mgr;
170 	struct xe_ttm_vram_mgr *vram_mgr;
171 
172 	xe_assert(xe, resource_is_vram(res));
173 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
174 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
175 
176 	return container_of(vram_mgr, struct xe_vram_region, ttm);
177 }
178 
179 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
180 			   u32 bo_flags, u32 *c)
181 {
182 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
183 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
184 
185 		bo->placements[*c] = (struct ttm_place) {
186 			.mem_type = XE_PL_TT,
187 		};
188 		*c += 1;
189 	}
190 }
191 
192 static bool force_contiguous(u32 bo_flags)
193 {
194 	if (bo_flags & XE_BO_FLAG_STOLEN)
195 		return true; /* users expect this */
196 	else if (bo_flags & XE_BO_FLAG_PINNED &&
197 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
198 		return true; /* needs vmap */
199 
200 	/*
201 	 * For eviction / restore on suspend / resume objects pinned in VRAM
202 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
203 	 */
204 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
205 	       bo_flags & XE_BO_FLAG_PINNED;
206 }
207 
208 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
209 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
210 {
211 	struct ttm_place place = { .mem_type = mem_type };
212 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
213 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
214 
215 	struct xe_vram_region *vram;
216 	u64 io_size;
217 
218 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
219 
220 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
221 	xe_assert(xe, vram && vram->usable_size);
222 	io_size = vram->io_size;
223 
224 	if (force_contiguous(bo_flags))
225 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
226 
227 	if (io_size < vram->usable_size) {
228 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
229 			place.fpfn = 0;
230 			place.lpfn = io_size >> PAGE_SHIFT;
231 		} else {
232 			place.flags |= TTM_PL_FLAG_TOPDOWN;
233 		}
234 	}
235 	places[*c] = place;
236 	*c += 1;
237 }
238 
239 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
240 			 u32 bo_flags, u32 *c)
241 {
242 	if (bo_flags & XE_BO_FLAG_VRAM0)
243 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
244 	if (bo_flags & XE_BO_FLAG_VRAM1)
245 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
246 }
247 
248 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
249 			   u32 bo_flags, u32 *c)
250 {
251 	if (bo_flags & XE_BO_FLAG_STOLEN) {
252 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
253 
254 		bo->placements[*c] = (struct ttm_place) {
255 			.mem_type = XE_PL_STOLEN,
256 			.flags = force_contiguous(bo_flags) ?
257 				TTM_PL_FLAG_CONTIGUOUS : 0,
258 		};
259 		*c += 1;
260 	}
261 }
262 
263 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
264 				       u32 bo_flags)
265 {
266 	u32 c = 0;
267 
268 	try_add_vram(xe, bo, bo_flags, &c);
269 	try_add_system(xe, bo, bo_flags, &c);
270 	try_add_stolen(xe, bo, bo_flags, &c);
271 
272 	if (!c)
273 		return -EINVAL;
274 
275 	bo->placement = (struct ttm_placement) {
276 		.num_placement = c,
277 		.placement = bo->placements,
278 	};
279 
280 	return 0;
281 }
282 
283 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
284 			      u32 bo_flags)
285 {
286 	xe_bo_assert_held(bo);
287 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
288 }
289 
290 static void xe_evict_flags(struct ttm_buffer_object *tbo,
291 			   struct ttm_placement *placement)
292 {
293 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
294 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
295 	struct xe_bo *bo;
296 
297 	if (!xe_bo_is_xe_bo(tbo)) {
298 		/* Don't handle scatter gather BOs */
299 		if (tbo->type == ttm_bo_type_sg) {
300 			placement->num_placement = 0;
301 			return;
302 		}
303 
304 		*placement = device_unplugged ? purge_placement : sys_placement;
305 		return;
306 	}
307 
308 	bo = ttm_to_xe_bo(tbo);
309 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
310 		*placement = sys_placement;
311 		return;
312 	}
313 
314 	if (device_unplugged && !tbo->base.dma_buf) {
315 		*placement = purge_placement;
316 		return;
317 	}
318 
319 	/*
320 	 * For xe, sg bos that are evicted to system just triggers a
321 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
322 	 */
323 	switch (tbo->resource->mem_type) {
324 	case XE_PL_VRAM0:
325 	case XE_PL_VRAM1:
326 	case XE_PL_STOLEN:
327 		*placement = tt_placement;
328 		break;
329 	case XE_PL_TT:
330 	default:
331 		*placement = sys_placement;
332 		break;
333 	}
334 }
335 
336 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
337 struct xe_ttm_tt {
338 	struct ttm_tt ttm;
339 	struct sg_table sgt;
340 	struct sg_table *sg;
341 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
342 	bool purgeable;
343 };
344 
345 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
346 {
347 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
348 	unsigned long num_pages = tt->num_pages;
349 	int ret;
350 
351 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
352 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
353 
354 	if (xe_tt->sg)
355 		return 0;
356 
357 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
358 						num_pages, 0,
359 						(u64)num_pages << PAGE_SHIFT,
360 						xe_sg_segment_size(xe->drm.dev),
361 						GFP_KERNEL);
362 	if (ret)
363 		return ret;
364 
365 	xe_tt->sg = &xe_tt->sgt;
366 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
367 			      DMA_ATTR_SKIP_CPU_SYNC);
368 	if (ret) {
369 		sg_free_table(xe_tt->sg);
370 		xe_tt->sg = NULL;
371 		return ret;
372 	}
373 
374 	return 0;
375 }
376 
377 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
378 {
379 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
380 
381 	if (xe_tt->sg) {
382 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
383 				  DMA_BIDIRECTIONAL, 0);
384 		sg_free_table(xe_tt->sg);
385 		xe_tt->sg = NULL;
386 	}
387 }
388 
389 struct sg_table *xe_bo_sg(struct xe_bo *bo)
390 {
391 	struct ttm_tt *tt = bo->ttm.ttm;
392 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
393 
394 	return xe_tt->sg;
395 }
396 
397 /*
398  * Account ttm pages against the device shrinker's shrinkable and
399  * purgeable counts.
400  */
401 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
402 {
403 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
404 
405 	if (xe_tt->purgeable)
406 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
407 	else
408 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
409 }
410 
411 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
412 {
413 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
414 
415 	if (xe_tt->purgeable)
416 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
417 	else
418 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
419 }
420 
421 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
422 				       u32 page_flags)
423 {
424 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
425 	struct xe_device *xe = xe_bo_device(bo);
426 	struct xe_ttm_tt *xe_tt;
427 	struct ttm_tt *tt;
428 	unsigned long extra_pages;
429 	enum ttm_caching caching = ttm_cached;
430 	int err;
431 
432 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
433 	if (!xe_tt)
434 		return NULL;
435 
436 	tt = &xe_tt->ttm;
437 
438 	extra_pages = 0;
439 	if (xe_bo_needs_ccs_pages(bo))
440 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
441 					   PAGE_SIZE);
442 
443 	/*
444 	 * DGFX system memory is always WB / ttm_cached, since
445 	 * other caching modes are only supported on x86. DGFX
446 	 * GPU system memory accesses are always coherent with the
447 	 * CPU.
448 	 */
449 	if (!IS_DGFX(xe)) {
450 		switch (bo->cpu_caching) {
451 		case DRM_XE_GEM_CPU_CACHING_WC:
452 			caching = ttm_write_combined;
453 			break;
454 		default:
455 			caching = ttm_cached;
456 			break;
457 		}
458 
459 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
460 
461 		/*
462 		 * Display scanout is always non-coherent with the CPU cache.
463 		 *
464 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
465 		 * non-coherent and require a CPU:WC mapping.
466 		 */
467 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
468 		    (xe->info.graphics_verx100 >= 1270 &&
469 		     bo->flags & XE_BO_FLAG_PAGETABLE))
470 			caching = ttm_write_combined;
471 	}
472 
473 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
474 		/*
475 		 * Valid only for internally-created buffers only, for
476 		 * which cpu_caching is never initialized.
477 		 */
478 		xe_assert(xe, bo->cpu_caching == 0);
479 		caching = ttm_uncached;
480 	}
481 
482 	if (ttm_bo->type != ttm_bo_type_sg)
483 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
484 
485 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
486 	if (err) {
487 		kfree(xe_tt);
488 		return NULL;
489 	}
490 
491 	if (ttm_bo->type != ttm_bo_type_sg) {
492 		err = ttm_tt_setup_backup(tt);
493 		if (err) {
494 			ttm_tt_fini(tt);
495 			kfree(xe_tt);
496 			return NULL;
497 		}
498 	}
499 
500 	return tt;
501 }
502 
503 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
504 			      struct ttm_operation_ctx *ctx)
505 {
506 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
507 	int err;
508 
509 	/*
510 	 * dma-bufs are not populated with pages, and the dma-
511 	 * addresses are set up when moved to XE_PL_TT.
512 	 */
513 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
514 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
515 		return 0;
516 
517 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
518 		err = ttm_tt_restore(ttm_dev, tt, ctx);
519 	} else {
520 		ttm_tt_clear_backed_up(tt);
521 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
522 	}
523 	if (err)
524 		return err;
525 
526 	xe_tt->purgeable = false;
527 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
528 
529 	return 0;
530 }
531 
532 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
533 {
534 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
535 
536 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
537 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
538 		return;
539 
540 	xe_tt_unmap_sg(xe, tt);
541 
542 	ttm_pool_free(&ttm_dev->pool, tt);
543 	xe_ttm_tt_account_subtract(xe, tt);
544 }
545 
546 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
547 {
548 	ttm_tt_fini(tt);
549 	kfree(tt);
550 }
551 
552 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
553 {
554 	struct xe_ttm_vram_mgr_resource *vres =
555 		to_xe_ttm_vram_mgr_resource(mem);
556 
557 	return vres->used_visible_size == mem->size;
558 }
559 
560 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
561 				 struct ttm_resource *mem)
562 {
563 	struct xe_device *xe = ttm_to_xe_device(bdev);
564 
565 	switch (mem->mem_type) {
566 	case XE_PL_SYSTEM:
567 	case XE_PL_TT:
568 		return 0;
569 	case XE_PL_VRAM0:
570 	case XE_PL_VRAM1: {
571 		struct xe_vram_region *vram = res_to_mem_region(mem);
572 
573 		if (!xe_ttm_resource_visible(mem))
574 			return -EINVAL;
575 
576 		mem->bus.offset = mem->start << PAGE_SHIFT;
577 
578 		if (vram->mapping &&
579 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
580 			mem->bus.addr = (u8 __force *)vram->mapping +
581 				mem->bus.offset;
582 
583 		mem->bus.offset += vram->io_start;
584 		mem->bus.is_iomem = true;
585 
586 #if  !IS_ENABLED(CONFIG_X86)
587 		mem->bus.caching = ttm_write_combined;
588 #endif
589 		return 0;
590 	} case XE_PL_STOLEN:
591 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
592 	default:
593 		return -EINVAL;
594 	}
595 }
596 
597 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
598 				const struct ttm_operation_ctx *ctx)
599 {
600 	struct dma_resv_iter cursor;
601 	struct dma_fence *fence;
602 	struct drm_gem_object *obj = &bo->ttm.base;
603 	struct drm_gpuvm_bo *vm_bo;
604 	bool idle = false;
605 	int ret = 0;
606 
607 	dma_resv_assert_held(bo->ttm.base.resv);
608 
609 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
610 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
611 				    DMA_RESV_USAGE_BOOKKEEP);
612 		dma_resv_for_each_fence_unlocked(&cursor, fence)
613 			dma_fence_enable_sw_signaling(fence);
614 		dma_resv_iter_end(&cursor);
615 	}
616 
617 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
618 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
619 		struct drm_gpuva *gpuva;
620 
621 		if (!xe_vm_in_fault_mode(vm)) {
622 			drm_gpuvm_bo_evict(vm_bo, true);
623 			continue;
624 		}
625 
626 		if (!idle) {
627 			long timeout;
628 
629 			if (ctx->no_wait_gpu &&
630 			    !dma_resv_test_signaled(bo->ttm.base.resv,
631 						    DMA_RESV_USAGE_BOOKKEEP))
632 				return -EBUSY;
633 
634 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
635 							DMA_RESV_USAGE_BOOKKEEP,
636 							ctx->interruptible,
637 							MAX_SCHEDULE_TIMEOUT);
638 			if (!timeout)
639 				return -ETIME;
640 			if (timeout < 0)
641 				return timeout;
642 
643 			idle = true;
644 		}
645 
646 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
647 			struct xe_vma *vma = gpuva_to_vma(gpuva);
648 
649 			trace_xe_vma_evict(vma);
650 			ret = xe_vm_invalidate_vma(vma);
651 			if (XE_WARN_ON(ret))
652 				return ret;
653 		}
654 	}
655 
656 	return ret;
657 }
658 
659 /*
660  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
661  * Note that unmapping the attachment is deferred to the next
662  * map_attachment time, or to bo destroy (after idling) whichever comes first.
663  * This is to avoid syncing before unmap_attachment(), assuming that the
664  * caller relies on idling the reservation object before moving the
665  * backing store out. Should that assumption not hold, then we will be able
666  * to unconditionally call unmap_attachment() when moving out to system.
667  */
668 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
669 			     struct ttm_resource *new_res)
670 {
671 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
672 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
673 					       ttm);
674 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
675 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
676 	struct sg_table *sg;
677 
678 	xe_assert(xe, attach);
679 	xe_assert(xe, ttm_bo->ttm);
680 
681 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
682 	    ttm_bo->sg) {
683 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
684 				      false, MAX_SCHEDULE_TIMEOUT);
685 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
686 		ttm_bo->sg = NULL;
687 	}
688 
689 	if (new_res->mem_type == XE_PL_SYSTEM)
690 		goto out;
691 
692 	if (ttm_bo->sg) {
693 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
694 		ttm_bo->sg = NULL;
695 	}
696 
697 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
698 	if (IS_ERR(sg))
699 		return PTR_ERR(sg);
700 
701 	ttm_bo->sg = sg;
702 	xe_tt->sg = sg;
703 
704 out:
705 	ttm_bo_move_null(ttm_bo, new_res);
706 
707 	return 0;
708 }
709 
710 /**
711  * xe_bo_move_notify - Notify subsystems of a pending move
712  * @bo: The buffer object
713  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
714  *
715  * This function notifies subsystems of an upcoming buffer move.
716  * Upon receiving such a notification, subsystems should schedule
717  * halting access to the underlying pages and optionally add a fence
718  * to the buffer object's dma_resv object, that signals when access is
719  * stopped. The caller will wait on all dma_resv fences before
720  * starting the move.
721  *
722  * A subsystem may commence access to the object after obtaining
723  * bindings to the new backing memory under the object lock.
724  *
725  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
726  * negative error code on error.
727  */
728 static int xe_bo_move_notify(struct xe_bo *bo,
729 			     const struct ttm_operation_ctx *ctx)
730 {
731 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
732 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
733 	struct ttm_resource *old_mem = ttm_bo->resource;
734 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
735 	int ret;
736 
737 	/*
738 	 * If this starts to call into many components, consider
739 	 * using a notification chain here.
740 	 */
741 
742 	if (xe_bo_is_pinned(bo))
743 		return -EINVAL;
744 
745 	xe_bo_vunmap(bo);
746 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
747 	if (ret)
748 		return ret;
749 
750 	/* Don't call move_notify() for imported dma-bufs. */
751 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
752 		dma_buf_move_notify(ttm_bo->base.dma_buf);
753 
754 	/*
755 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
756 	 * so if we moved from VRAM make sure to unlink this from the userfault
757 	 * tracking.
758 	 */
759 	if (mem_type_is_vram(old_mem_type)) {
760 		mutex_lock(&xe->mem_access.vram_userfault.lock);
761 		if (!list_empty(&bo->vram_userfault_link))
762 			list_del_init(&bo->vram_userfault_link);
763 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
764 	}
765 
766 	return 0;
767 }
768 
769 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
770 		      struct ttm_operation_ctx *ctx,
771 		      struct ttm_resource *new_mem,
772 		      struct ttm_place *hop)
773 {
774 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
775 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
776 	struct ttm_resource *old_mem = ttm_bo->resource;
777 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
778 	struct ttm_tt *ttm = ttm_bo->ttm;
779 	struct xe_migrate *migrate = NULL;
780 	struct dma_fence *fence;
781 	bool move_lacks_source;
782 	bool tt_has_data;
783 	bool needs_clear;
784 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
785 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
786 	int ret = 0;
787 
788 	/* Bo creation path, moving to system or TT. */
789 	if ((!old_mem && ttm) && !handle_system_ccs) {
790 		if (new_mem->mem_type == XE_PL_TT)
791 			ret = xe_tt_map_sg(xe, ttm);
792 		if (!ret)
793 			ttm_bo_move_null(ttm_bo, new_mem);
794 		goto out;
795 	}
796 
797 	if (ttm_bo->type == ttm_bo_type_sg) {
798 		ret = xe_bo_move_notify(bo, ctx);
799 		if (!ret)
800 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
801 		return ret;
802 	}
803 
804 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
805 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
806 
807 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
808 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
809 
810 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
811 		(!ttm && ttm_bo->type == ttm_bo_type_device);
812 
813 	if (new_mem->mem_type == XE_PL_TT) {
814 		ret = xe_tt_map_sg(xe, ttm);
815 		if (ret)
816 			goto out;
817 	}
818 
819 	if ((move_lacks_source && !needs_clear)) {
820 		ttm_bo_move_null(ttm_bo, new_mem);
821 		goto out;
822 	}
823 
824 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
825 	    new_mem->mem_type == XE_PL_SYSTEM) {
826 		ret = xe_svm_bo_evict(bo);
827 		if (!ret) {
828 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
829 			ttm_bo_move_null(ttm_bo, new_mem);
830 		} else {
831 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
832 				ERR_PTR(ret));
833 		}
834 
835 		goto out;
836 	}
837 
838 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
839 		ttm_bo_move_null(ttm_bo, new_mem);
840 		goto out;
841 	}
842 
843 	/*
844 	 * Failed multi-hop where the old_mem is still marked as
845 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
846 	 */
847 	if (old_mem_type == XE_PL_TT &&
848 	    new_mem->mem_type == XE_PL_TT) {
849 		ttm_bo_move_null(ttm_bo, new_mem);
850 		goto out;
851 	}
852 
853 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
854 		ret = xe_bo_move_notify(bo, ctx);
855 		if (ret)
856 			goto out;
857 	}
858 
859 	if (old_mem_type == XE_PL_TT &&
860 	    new_mem->mem_type == XE_PL_SYSTEM) {
861 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
862 						     DMA_RESV_USAGE_BOOKKEEP,
863 						     false,
864 						     MAX_SCHEDULE_TIMEOUT);
865 		if (timeout < 0) {
866 			ret = timeout;
867 			goto out;
868 		}
869 
870 		if (!handle_system_ccs) {
871 			ttm_bo_move_null(ttm_bo, new_mem);
872 			goto out;
873 		}
874 	}
875 
876 	if (!move_lacks_source &&
877 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
878 	     (mem_type_is_vram(old_mem_type) &&
879 	      new_mem->mem_type == XE_PL_SYSTEM))) {
880 		hop->fpfn = 0;
881 		hop->lpfn = 0;
882 		hop->mem_type = XE_PL_TT;
883 		hop->flags = TTM_PL_FLAG_TEMPORARY;
884 		ret = -EMULTIHOP;
885 		goto out;
886 	}
887 
888 	if (bo->tile)
889 		migrate = bo->tile->migrate;
890 	else if (resource_is_vram(new_mem))
891 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
892 	else if (mem_type_is_vram(old_mem_type))
893 		migrate = mem_type_to_migrate(xe, old_mem_type);
894 	else
895 		migrate = xe->tiles[0].migrate;
896 
897 	xe_assert(xe, migrate);
898 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
899 	if (xe_rpm_reclaim_safe(xe)) {
900 		/*
901 		 * We might be called through swapout in the validation path of
902 		 * another TTM device, so acquire rpm here.
903 		 */
904 		xe_pm_runtime_get(xe);
905 	} else {
906 		drm_WARN_ON(&xe->drm, handle_system_ccs);
907 		xe_pm_runtime_get_noresume(xe);
908 	}
909 
910 	if (move_lacks_source) {
911 		u32 flags = 0;
912 
913 		if (mem_type_is_vram(new_mem->mem_type))
914 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
915 		else if (handle_system_ccs)
916 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
917 
918 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
919 	} else {
920 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
921 					handle_system_ccs);
922 	}
923 	if (IS_ERR(fence)) {
924 		ret = PTR_ERR(fence);
925 		xe_pm_runtime_put(xe);
926 		goto out;
927 	}
928 	if (!move_lacks_source) {
929 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
930 						new_mem);
931 		if (ret) {
932 			dma_fence_wait(fence, false);
933 			ttm_bo_move_null(ttm_bo, new_mem);
934 			ret = 0;
935 		}
936 	} else {
937 		/*
938 		 * ttm_bo_move_accel_cleanup() may blow up if
939 		 * bo->resource == NULL, so just attach the
940 		 * fence and set the new resource.
941 		 */
942 		dma_resv_add_fence(ttm_bo->base.resv, fence,
943 				   DMA_RESV_USAGE_KERNEL);
944 		ttm_bo_move_null(ttm_bo, new_mem);
945 	}
946 
947 	dma_fence_put(fence);
948 	xe_pm_runtime_put(xe);
949 
950 out:
951 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
952 	    ttm_bo->ttm) {
953 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
954 						     DMA_RESV_USAGE_KERNEL,
955 						     false,
956 						     MAX_SCHEDULE_TIMEOUT);
957 		if (timeout < 0)
958 			ret = timeout;
959 
960 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
961 	}
962 
963 	return ret;
964 }
965 
966 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
967 			       struct ttm_buffer_object *bo,
968 			       unsigned long *scanned)
969 {
970 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
971 	long lret;
972 
973 	/* Fake move to system, without copying data. */
974 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
975 		struct ttm_resource *new_resource;
976 
977 		lret = ttm_bo_wait_ctx(bo, ctx);
978 		if (lret)
979 			return lret;
980 
981 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
982 		if (lret)
983 			return lret;
984 
985 		xe_tt_unmap_sg(xe, bo->ttm);
986 		ttm_bo_move_null(bo, new_resource);
987 	}
988 
989 	*scanned += bo->ttm->num_pages;
990 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
991 			     {.purge = true,
992 			      .writeback = false,
993 			      .allow_move = false});
994 
995 	if (lret > 0)
996 		xe_ttm_tt_account_subtract(xe, bo->ttm);
997 
998 	return lret;
999 }
1000 
1001 static bool
1002 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1003 {
1004 	struct drm_gpuvm_bo *vm_bo;
1005 
1006 	if (!ttm_bo_eviction_valuable(bo, place))
1007 		return false;
1008 
1009 	if (!xe_bo_is_xe_bo(bo))
1010 		return true;
1011 
1012 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1013 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1014 			return false;
1015 	}
1016 
1017 	return true;
1018 }
1019 
1020 /**
1021  * xe_bo_shrink() - Try to shrink an xe bo.
1022  * @ctx: The struct ttm_operation_ctx used for shrinking.
1023  * @bo: The TTM buffer object whose pages to shrink.
1024  * @flags: Flags governing the shrink behaviour.
1025  * @scanned: Pointer to a counter of the number of pages
1026  * attempted to shrink.
1027  *
1028  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1029  * Note that we need to be able to handle also non xe bos
1030  * (ghost bos), but only if the struct ttm_tt is embedded in
1031  * a struct xe_ttm_tt. When the function attempts to shrink
1032  * the pages of a buffer object, The value pointed to by @scanned
1033  * is updated.
1034  *
1035  * Return: The number of pages shrunken or purged, or negative error
1036  * code on failure.
1037  */
1038 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1039 		  const struct xe_bo_shrink_flags flags,
1040 		  unsigned long *scanned)
1041 {
1042 	struct ttm_tt *tt = bo->ttm;
1043 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1044 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1045 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1046 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1047 	bool needs_rpm;
1048 	long lret = 0L;
1049 
1050 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1051 	    (flags.purge && !xe_tt->purgeable))
1052 		return -EBUSY;
1053 
1054 	if (!xe_bo_eviction_valuable(bo, &place))
1055 		return -EBUSY;
1056 
1057 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1058 		return xe_bo_shrink_purge(ctx, bo, scanned);
1059 
1060 	if (xe_tt->purgeable) {
1061 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1062 			lret = xe_bo_move_notify(xe_bo, ctx);
1063 		if (!lret)
1064 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1065 		goto out_unref;
1066 	}
1067 
1068 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1069 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1070 		     xe_bo_needs_ccs_pages(xe_bo));
1071 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1072 		goto out_unref;
1073 
1074 	*scanned += tt->num_pages;
1075 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1076 			     {.purge = false,
1077 			      .writeback = flags.writeback,
1078 			      .allow_move = true});
1079 	if (needs_rpm)
1080 		xe_pm_runtime_put(xe);
1081 
1082 	if (lret > 0)
1083 		xe_ttm_tt_account_subtract(xe, tt);
1084 
1085 out_unref:
1086 	xe_bo_put(xe_bo);
1087 
1088 	return lret;
1089 }
1090 
1091 /**
1092  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1093  * up in system memory.
1094  * @bo: The buffer object to prepare.
1095  *
1096  * On successful completion, the object backup pages are allocated. Expectation
1097  * is that this is called from the PM notifier, prior to suspend/hibernation.
1098  *
1099  * Return: 0 on success. Negative error code on failure.
1100  */
1101 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1102 {
1103 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1104 	struct xe_bo *backup;
1105 	int ret = 0;
1106 
1107 	xe_bo_lock(bo, false);
1108 
1109 	xe_assert(xe, !bo->backup_obj);
1110 
1111 	/*
1112 	 * Since this is called from the PM notifier we might have raced with
1113 	 * someone unpinning this after we dropped the pinned list lock and
1114 	 * grabbing the above bo lock.
1115 	 */
1116 	if (!xe_bo_is_pinned(bo))
1117 		goto out_unlock_bo;
1118 
1119 	if (!xe_bo_is_vram(bo))
1120 		goto out_unlock_bo;
1121 
1122 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1123 		goto out_unlock_bo;
1124 
1125 	backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1126 					DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1127 					XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1128 					XE_BO_FLAG_PINNED);
1129 	if (IS_ERR(backup)) {
1130 		ret = PTR_ERR(backup);
1131 		goto out_unlock_bo;
1132 	}
1133 
1134 	backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1135 	ttm_bo_pin(&backup->ttm);
1136 	bo->backup_obj = backup;
1137 
1138 out_unlock_bo:
1139 	xe_bo_unlock(bo);
1140 	return ret;
1141 }
1142 
1143 /**
1144  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1145  * @bo: The buffer object to undo the prepare for.
1146  *
1147  * Always returns 0. The backup object is removed, if still present. Expectation
1148  * it that this called from the PM notifier when undoing the prepare step.
1149  *
1150  * Return: Always returns 0.
1151  */
1152 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1153 {
1154 	xe_bo_lock(bo, false);
1155 	if (bo->backup_obj) {
1156 		ttm_bo_unpin(&bo->backup_obj->ttm);
1157 		xe_bo_put(bo->backup_obj);
1158 		bo->backup_obj = NULL;
1159 	}
1160 	xe_bo_unlock(bo);
1161 
1162 	return 0;
1163 }
1164 
1165 /**
1166  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1167  * @bo: The buffer object to move.
1168  *
1169  * On successful completion, the object memory will be moved to system memory.
1170  *
1171  * This is needed to for special handling of pinned VRAM object during
1172  * suspend-resume.
1173  *
1174  * Return: 0 on success. Negative error code on failure.
1175  */
1176 int xe_bo_evict_pinned(struct xe_bo *bo)
1177 {
1178 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1179 	struct xe_bo *backup = bo->backup_obj;
1180 	bool backup_created = false;
1181 	bool unmap = false;
1182 	int ret = 0;
1183 
1184 	xe_bo_lock(bo, false);
1185 
1186 	if (WARN_ON(!bo->ttm.resource)) {
1187 		ret = -EINVAL;
1188 		goto out_unlock_bo;
1189 	}
1190 
1191 	if (WARN_ON(!xe_bo_is_pinned(bo))) {
1192 		ret = -EINVAL;
1193 		goto out_unlock_bo;
1194 	}
1195 
1196 	if (!xe_bo_is_vram(bo))
1197 		goto out_unlock_bo;
1198 
1199 	if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1200 		goto out_unlock_bo;
1201 
1202 	if (!backup) {
1203 		backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
1204 						DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1205 						XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1206 						XE_BO_FLAG_PINNED);
1207 		if (IS_ERR(backup)) {
1208 			ret = PTR_ERR(backup);
1209 			goto out_unlock_bo;
1210 		}
1211 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1212 		backup_created = true;
1213 	}
1214 
1215 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1216 		struct xe_migrate *migrate;
1217 		struct dma_fence *fence;
1218 
1219 		if (bo->tile)
1220 			migrate = bo->tile->migrate;
1221 		else
1222 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1223 
1224 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1225 		if (ret)
1226 			goto out_backup;
1227 
1228 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1229 		if (ret)
1230 			goto out_backup;
1231 
1232 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1233 					backup->ttm.resource, false);
1234 		if (IS_ERR(fence)) {
1235 			ret = PTR_ERR(fence);
1236 			goto out_backup;
1237 		}
1238 
1239 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1240 				   DMA_RESV_USAGE_KERNEL);
1241 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1242 				   DMA_RESV_USAGE_KERNEL);
1243 		dma_fence_put(fence);
1244 	} else {
1245 		ret = xe_bo_vmap(backup);
1246 		if (ret)
1247 			goto out_backup;
1248 
1249 		if (iosys_map_is_null(&bo->vmap)) {
1250 			ret = xe_bo_vmap(bo);
1251 			if (ret)
1252 				goto out_backup;
1253 			unmap = true;
1254 		}
1255 
1256 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1257 				   bo->size);
1258 	}
1259 
1260 	if (!bo->backup_obj)
1261 		bo->backup_obj = backup;
1262 
1263 out_backup:
1264 	xe_bo_vunmap(backup);
1265 	if (ret && backup_created)
1266 		xe_bo_put(backup);
1267 out_unlock_bo:
1268 	if (unmap)
1269 		xe_bo_vunmap(bo);
1270 	xe_bo_unlock(bo);
1271 	return ret;
1272 }
1273 
1274 /**
1275  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1276  * @bo: The buffer object to move.
1277  *
1278  * On successful completion, the object memory will be moved back to VRAM.
1279  *
1280  * This is needed to for special handling of pinned VRAM object during
1281  * suspend-resume.
1282  *
1283  * Return: 0 on success. Negative error code on failure.
1284  */
1285 int xe_bo_restore_pinned(struct xe_bo *bo)
1286 {
1287 	struct ttm_operation_ctx ctx = {
1288 		.interruptible = false,
1289 		.gfp_retry_mayfail = false,
1290 	};
1291 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1292 	struct xe_bo *backup = bo->backup_obj;
1293 	bool unmap = false;
1294 	int ret;
1295 
1296 	if (!backup)
1297 		return 0;
1298 
1299 	xe_bo_lock(bo, false);
1300 
1301 	if (!xe_bo_is_pinned(backup)) {
1302 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1303 		if (ret)
1304 			goto out_unlock_bo;
1305 	}
1306 
1307 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1308 		struct xe_migrate *migrate;
1309 		struct dma_fence *fence;
1310 
1311 		if (bo->tile)
1312 			migrate = bo->tile->migrate;
1313 		else
1314 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1315 
1316 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1317 		if (ret)
1318 			goto out_unlock_bo;
1319 
1320 		ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
1321 		if (ret)
1322 			goto out_unlock_bo;
1323 
1324 		fence = xe_migrate_copy(migrate, backup, bo,
1325 					backup->ttm.resource, bo->ttm.resource,
1326 					false);
1327 		if (IS_ERR(fence)) {
1328 			ret = PTR_ERR(fence);
1329 			goto out_unlock_bo;
1330 		}
1331 
1332 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1333 				   DMA_RESV_USAGE_KERNEL);
1334 		dma_resv_add_fence(backup->ttm.base.resv, fence,
1335 				   DMA_RESV_USAGE_KERNEL);
1336 		dma_fence_put(fence);
1337 	} else {
1338 		ret = xe_bo_vmap(backup);
1339 		if (ret)
1340 			goto out_unlock_bo;
1341 
1342 		if (iosys_map_is_null(&bo->vmap)) {
1343 			ret = xe_bo_vmap(bo);
1344 			if (ret)
1345 				goto out_backup;
1346 			unmap = true;
1347 		}
1348 
1349 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1350 				 bo->size);
1351 	}
1352 
1353 	bo->backup_obj = NULL;
1354 
1355 out_backup:
1356 	xe_bo_vunmap(backup);
1357 	if (!bo->backup_obj) {
1358 		if (xe_bo_is_pinned(backup))
1359 			ttm_bo_unpin(&backup->ttm);
1360 		xe_bo_put(backup);
1361 	}
1362 out_unlock_bo:
1363 	if (unmap)
1364 		xe_bo_vunmap(bo);
1365 	xe_bo_unlock(bo);
1366 	return ret;
1367 }
1368 
1369 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1370 {
1371 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1372 	struct ttm_tt *tt = ttm_bo->ttm;
1373 
1374 	if (tt) {
1375 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1376 
1377 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1378 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1379 						 ttm_bo->sg,
1380 						 DMA_BIDIRECTIONAL);
1381 			ttm_bo->sg = NULL;
1382 			xe_tt->sg = NULL;
1383 		} else if (xe_tt->sg) {
1384 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1385 					  xe_tt->sg,
1386 					  DMA_BIDIRECTIONAL, 0);
1387 			sg_free_table(xe_tt->sg);
1388 			xe_tt->sg = NULL;
1389 		}
1390 	}
1391 
1392 	return 0;
1393 }
1394 
1395 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1396 				       unsigned long page_offset)
1397 {
1398 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1399 	struct xe_res_cursor cursor;
1400 	struct xe_vram_region *vram;
1401 
1402 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1403 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1404 
1405 	vram = res_to_mem_region(ttm_bo->resource);
1406 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1407 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1408 }
1409 
1410 static void __xe_bo_vunmap(struct xe_bo *bo);
1411 
1412 /*
1413  * TODO: Move this function to TTM so we don't rely on how TTM does its
1414  * locking, thereby abusing TTM internals.
1415  */
1416 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1417 {
1418 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1419 	bool locked;
1420 
1421 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1422 
1423 	/*
1424 	 * We can typically only race with TTM trylocking under the
1425 	 * lru_lock, which will immediately be unlocked again since
1426 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1427 	 * always succeed here, as long as we hold the lru lock.
1428 	 */
1429 	spin_lock(&ttm_bo->bdev->lru_lock);
1430 	locked = dma_resv_trylock(ttm_bo->base.resv);
1431 	spin_unlock(&ttm_bo->bdev->lru_lock);
1432 	xe_assert(xe, locked);
1433 
1434 	return locked;
1435 }
1436 
1437 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1438 {
1439 	struct dma_resv_iter cursor;
1440 	struct dma_fence *fence;
1441 	struct dma_fence *replacement = NULL;
1442 	struct xe_bo *bo;
1443 
1444 	if (!xe_bo_is_xe_bo(ttm_bo))
1445 		return;
1446 
1447 	bo = ttm_to_xe_bo(ttm_bo);
1448 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1449 
1450 	/*
1451 	 * Corner case where TTM fails to allocate memory and this BOs resv
1452 	 * still points the VMs resv
1453 	 */
1454 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1455 		return;
1456 
1457 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1458 		return;
1459 
1460 	/*
1461 	 * Scrub the preempt fences if any. The unbind fence is already
1462 	 * attached to the resv.
1463 	 * TODO: Don't do this for external bos once we scrub them after
1464 	 * unbind.
1465 	 */
1466 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1467 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1468 		if (xe_fence_is_xe_preempt(fence) &&
1469 		    !dma_fence_is_signaled(fence)) {
1470 			if (!replacement)
1471 				replacement = dma_fence_get_stub();
1472 
1473 			dma_resv_replace_fences(ttm_bo->base.resv,
1474 						fence->context,
1475 						replacement,
1476 						DMA_RESV_USAGE_BOOKKEEP);
1477 		}
1478 	}
1479 	dma_fence_put(replacement);
1480 
1481 	dma_resv_unlock(ttm_bo->base.resv);
1482 }
1483 
1484 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1485 {
1486 	if (!xe_bo_is_xe_bo(ttm_bo))
1487 		return;
1488 
1489 	/*
1490 	 * Object is idle and about to be destroyed. Release the
1491 	 * dma-buf attachment.
1492 	 */
1493 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1494 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1495 						       struct xe_ttm_tt, ttm);
1496 
1497 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1498 					 DMA_BIDIRECTIONAL);
1499 		ttm_bo->sg = NULL;
1500 		xe_tt->sg = NULL;
1501 	}
1502 }
1503 
1504 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1505 {
1506 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1507 
1508 	if (ttm_bo->ttm) {
1509 		struct ttm_placement place = {};
1510 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1511 
1512 		drm_WARN_ON(&xe->drm, ret);
1513 	}
1514 }
1515 
1516 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1517 {
1518 	struct ttm_operation_ctx ctx = {
1519 		.interruptible = false,
1520 		.gfp_retry_mayfail = false,
1521 	};
1522 
1523 	if (ttm_bo->ttm) {
1524 		struct xe_ttm_tt *xe_tt =
1525 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1526 
1527 		if (xe_tt->purgeable)
1528 			xe_ttm_bo_purge(ttm_bo, &ctx);
1529 	}
1530 }
1531 
1532 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1533 				unsigned long offset, void *buf, int len,
1534 				int write)
1535 {
1536 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1537 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1538 	struct iosys_map vmap;
1539 	struct xe_res_cursor cursor;
1540 	struct xe_vram_region *vram;
1541 	int bytes_left = len;
1542 	int err = 0;
1543 
1544 	xe_bo_assert_held(bo);
1545 	xe_device_assert_mem_access(xe);
1546 
1547 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1548 		return -EIO;
1549 
1550 	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1551 		struct xe_migrate *migrate =
1552 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1553 
1554 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1555 					       write);
1556 		goto out;
1557 	}
1558 
1559 	vram = res_to_mem_region(ttm_bo->resource);
1560 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1561 		     bo->size - (offset & PAGE_MASK), &cursor);
1562 
1563 	do {
1564 		unsigned long page_offset = (offset & ~PAGE_MASK);
1565 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1566 
1567 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1568 					  cursor.start);
1569 		if (write)
1570 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1571 		else
1572 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1573 
1574 		buf += byte_count;
1575 		offset += byte_count;
1576 		bytes_left -= byte_count;
1577 		if (bytes_left)
1578 			xe_res_next(&cursor, PAGE_SIZE);
1579 	} while (bytes_left);
1580 
1581 out:
1582 	return err ?: len;
1583 }
1584 
1585 const struct ttm_device_funcs xe_ttm_funcs = {
1586 	.ttm_tt_create = xe_ttm_tt_create,
1587 	.ttm_tt_populate = xe_ttm_tt_populate,
1588 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1589 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1590 	.evict_flags = xe_evict_flags,
1591 	.move = xe_bo_move,
1592 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1593 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1594 	.access_memory = xe_ttm_access_memory,
1595 	.release_notify = xe_ttm_bo_release_notify,
1596 	.eviction_valuable = xe_bo_eviction_valuable,
1597 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1598 	.swap_notify = xe_ttm_bo_swap_notify,
1599 };
1600 
1601 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1602 {
1603 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1604 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1605 	struct xe_tile *tile;
1606 	u8 id;
1607 
1608 	if (bo->ttm.base.import_attach)
1609 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1610 	drm_gem_object_release(&bo->ttm.base);
1611 
1612 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1613 
1614 	for_each_tile(tile, xe, id)
1615 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1616 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1617 
1618 #ifdef CONFIG_PROC_FS
1619 	if (bo->client)
1620 		xe_drm_client_remove_bo(bo);
1621 #endif
1622 
1623 	if (bo->vm && xe_bo_is_user(bo))
1624 		xe_vm_put(bo->vm);
1625 
1626 	if (bo->parent_obj)
1627 		xe_bo_put(bo->parent_obj);
1628 
1629 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1630 	if (!list_empty(&bo->vram_userfault_link))
1631 		list_del(&bo->vram_userfault_link);
1632 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1633 
1634 	kfree(bo);
1635 }
1636 
1637 static void xe_gem_object_free(struct drm_gem_object *obj)
1638 {
1639 	/* Our BO reference counting scheme works as follows:
1640 	 *
1641 	 * The gem object kref is typically used throughout the driver,
1642 	 * and the gem object holds a ttm_buffer_object refcount, so
1643 	 * that when the last gem object reference is put, which is when
1644 	 * we end up in this function, we put also that ttm_buffer_object
1645 	 * refcount. Anything using gem interfaces is then no longer
1646 	 * allowed to access the object in a way that requires a gem
1647 	 * refcount, including locking the object.
1648 	 *
1649 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1650 	 * refcount directly if needed.
1651 	 */
1652 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1653 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1654 }
1655 
1656 static void xe_gem_object_close(struct drm_gem_object *obj,
1657 				struct drm_file *file_priv)
1658 {
1659 	struct xe_bo *bo = gem_to_xe_bo(obj);
1660 
1661 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1662 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1663 
1664 		xe_bo_lock(bo, false);
1665 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1666 		xe_bo_unlock(bo);
1667 	}
1668 }
1669 
1670 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1671 {
1672 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1673 	struct drm_device *ddev = tbo->base.dev;
1674 	struct xe_device *xe = to_xe_device(ddev);
1675 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1676 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1677 	vm_fault_t ret;
1678 	int idx;
1679 
1680 	if (needs_rpm)
1681 		xe_pm_runtime_get(xe);
1682 
1683 	ret = ttm_bo_vm_reserve(tbo, vmf);
1684 	if (ret)
1685 		goto out;
1686 
1687 	if (drm_dev_enter(ddev, &idx)) {
1688 		trace_xe_bo_cpu_fault(bo);
1689 
1690 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1691 					       TTM_BO_VM_NUM_PREFAULT);
1692 		drm_dev_exit(idx);
1693 	} else {
1694 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1695 	}
1696 
1697 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1698 		goto out;
1699 	/*
1700 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1701 	 */
1702 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1703 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1704 		if (list_empty(&bo->vram_userfault_link))
1705 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1706 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1707 	}
1708 
1709 	dma_resv_unlock(tbo->base.resv);
1710 out:
1711 	if (needs_rpm)
1712 		xe_pm_runtime_put(xe);
1713 
1714 	return ret;
1715 }
1716 
1717 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1718 			   void *buf, int len, int write)
1719 {
1720 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1721 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1722 	struct xe_device *xe = xe_bo_device(bo);
1723 	int ret;
1724 
1725 	xe_pm_runtime_get(xe);
1726 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1727 	xe_pm_runtime_put(xe);
1728 
1729 	return ret;
1730 }
1731 
1732 /**
1733  * xe_bo_read() - Read from an xe_bo
1734  * @bo: The buffer object to read from.
1735  * @offset: The byte offset to start reading from.
1736  * @dst: Location to store the read.
1737  * @size: Size in bytes for the read.
1738  *
1739  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1740  *
1741  * Return: Zero on success, or negative error.
1742  */
1743 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1744 {
1745 	int ret;
1746 
1747 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1748 	if (ret >= 0 && ret != size)
1749 		ret = -EIO;
1750 	else if (ret == size)
1751 		ret = 0;
1752 
1753 	return ret;
1754 }
1755 
1756 static const struct vm_operations_struct xe_gem_vm_ops = {
1757 	.fault = xe_gem_fault,
1758 	.open = ttm_bo_vm_open,
1759 	.close = ttm_bo_vm_close,
1760 	.access = xe_bo_vm_access,
1761 };
1762 
1763 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1764 	.free = xe_gem_object_free,
1765 	.close = xe_gem_object_close,
1766 	.mmap = drm_gem_ttm_mmap,
1767 	.export = xe_gem_prime_export,
1768 	.vm_ops = &xe_gem_vm_ops,
1769 };
1770 
1771 /**
1772  * xe_bo_alloc - Allocate storage for a struct xe_bo
1773  *
1774  * This function is intended to allocate storage to be used for input
1775  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1776  * created is needed before the call to __xe_bo_create_locked().
1777  * If __xe_bo_create_locked ends up never to be called, then the
1778  * storage allocated with this function needs to be freed using
1779  * xe_bo_free().
1780  *
1781  * Return: A pointer to an uninitialized struct xe_bo on success,
1782  * ERR_PTR(-ENOMEM) on error.
1783  */
1784 struct xe_bo *xe_bo_alloc(void)
1785 {
1786 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1787 
1788 	if (!bo)
1789 		return ERR_PTR(-ENOMEM);
1790 
1791 	return bo;
1792 }
1793 
1794 /**
1795  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1796  * @bo: The buffer object storage.
1797  *
1798  * Refer to xe_bo_alloc() documentation for valid use-cases.
1799  */
1800 void xe_bo_free(struct xe_bo *bo)
1801 {
1802 	kfree(bo);
1803 }
1804 
1805 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1806 				     struct xe_tile *tile, struct dma_resv *resv,
1807 				     struct ttm_lru_bulk_move *bulk, size_t size,
1808 				     u16 cpu_caching, enum ttm_bo_type type,
1809 				     u32 flags)
1810 {
1811 	struct ttm_operation_ctx ctx = {
1812 		.interruptible = true,
1813 		.no_wait_gpu = false,
1814 		.gfp_retry_mayfail = true,
1815 	};
1816 	struct ttm_placement *placement;
1817 	uint32_t alignment;
1818 	size_t aligned_size;
1819 	int err;
1820 
1821 	/* Only kernel objects should set GT */
1822 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1823 
1824 	if (XE_WARN_ON(!size)) {
1825 		xe_bo_free(bo);
1826 		return ERR_PTR(-EINVAL);
1827 	}
1828 
1829 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1830 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1831 		return ERR_PTR(-EINVAL);
1832 
1833 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1834 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1835 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1836 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1837 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1838 
1839 		aligned_size = ALIGN(size, align);
1840 		if (type != ttm_bo_type_device)
1841 			size = ALIGN(size, align);
1842 		flags |= XE_BO_FLAG_INTERNAL_64K;
1843 		alignment = align >> PAGE_SHIFT;
1844 	} else {
1845 		aligned_size = ALIGN(size, SZ_4K);
1846 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1847 		alignment = SZ_4K >> PAGE_SHIFT;
1848 	}
1849 
1850 	if (type == ttm_bo_type_device && aligned_size != size)
1851 		return ERR_PTR(-EINVAL);
1852 
1853 	if (!bo) {
1854 		bo = xe_bo_alloc();
1855 		if (IS_ERR(bo))
1856 			return bo;
1857 	}
1858 
1859 	bo->ccs_cleared = false;
1860 	bo->tile = tile;
1861 	bo->size = size;
1862 	bo->flags = flags;
1863 	bo->cpu_caching = cpu_caching;
1864 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1865 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1866 	INIT_LIST_HEAD(&bo->pinned_link);
1867 #ifdef CONFIG_PROC_FS
1868 	INIT_LIST_HEAD(&bo->client_link);
1869 #endif
1870 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1871 
1872 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1873 
1874 	if (resv) {
1875 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1876 		ctx.resv = resv;
1877 	}
1878 
1879 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1880 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1881 		if (WARN_ON(err)) {
1882 			xe_ttm_bo_destroy(&bo->ttm);
1883 			return ERR_PTR(err);
1884 		}
1885 	}
1886 
1887 	/* Defer populating type_sg bos */
1888 	placement = (type == ttm_bo_type_sg ||
1889 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1890 		&bo->placement;
1891 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1892 				   placement, alignment,
1893 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1894 	if (err)
1895 		return ERR_PTR(err);
1896 
1897 	/*
1898 	 * The VRAM pages underneath are potentially still being accessed by the
1899 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1900 	 * sure to add any corresponding move/clear fences into the objects
1901 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1902 	 *
1903 	 * For KMD internal buffers we don't care about GPU clearing, however we
1904 	 * still need to handle async evictions, where the VRAM is still being
1905 	 * accessed by the GPU. Most internal callers are not expecting this,
1906 	 * since they are missing the required synchronisation before accessing
1907 	 * the memory. To keep things simple just sync wait any kernel fences
1908 	 * here, if the buffer is designated KMD internal.
1909 	 *
1910 	 * For normal userspace objects we should already have the required
1911 	 * pipelining or sync waiting elsewhere, since we already have to deal
1912 	 * with things like async GPU clearing.
1913 	 */
1914 	if (type == ttm_bo_type_kernel) {
1915 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1916 						     DMA_RESV_USAGE_KERNEL,
1917 						     ctx.interruptible,
1918 						     MAX_SCHEDULE_TIMEOUT);
1919 
1920 		if (timeout < 0) {
1921 			if (!resv)
1922 				dma_resv_unlock(bo->ttm.base.resv);
1923 			xe_bo_put(bo);
1924 			return ERR_PTR(timeout);
1925 		}
1926 	}
1927 
1928 	bo->created = true;
1929 	if (bulk)
1930 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1931 	else
1932 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1933 
1934 	return bo;
1935 }
1936 
1937 static int __xe_bo_fixed_placement(struct xe_device *xe,
1938 				   struct xe_bo *bo,
1939 				   u32 flags,
1940 				   u64 start, u64 end, u64 size)
1941 {
1942 	struct ttm_place *place = bo->placements;
1943 
1944 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1945 		return -EINVAL;
1946 
1947 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1948 	place->fpfn = start >> PAGE_SHIFT;
1949 	place->lpfn = end >> PAGE_SHIFT;
1950 
1951 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1952 	case XE_BO_FLAG_VRAM0:
1953 		place->mem_type = XE_PL_VRAM0;
1954 		break;
1955 	case XE_BO_FLAG_VRAM1:
1956 		place->mem_type = XE_PL_VRAM1;
1957 		break;
1958 	case XE_BO_FLAG_STOLEN:
1959 		place->mem_type = XE_PL_STOLEN;
1960 		break;
1961 
1962 	default:
1963 		/* 0 or multiple of the above set */
1964 		return -EINVAL;
1965 	}
1966 
1967 	bo->placement = (struct ttm_placement) {
1968 		.num_placement = 1,
1969 		.placement = place,
1970 	};
1971 
1972 	return 0;
1973 }
1974 
1975 static struct xe_bo *
1976 __xe_bo_create_locked(struct xe_device *xe,
1977 		      struct xe_tile *tile, struct xe_vm *vm,
1978 		      size_t size, u64 start, u64 end,
1979 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
1980 		      u64 alignment)
1981 {
1982 	struct xe_bo *bo = NULL;
1983 	int err;
1984 
1985 	if (vm)
1986 		xe_vm_assert_held(vm);
1987 
1988 	if (start || end != ~0ULL) {
1989 		bo = xe_bo_alloc();
1990 		if (IS_ERR(bo))
1991 			return bo;
1992 
1993 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1994 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1995 		if (err) {
1996 			xe_bo_free(bo);
1997 			return ERR_PTR(err);
1998 		}
1999 	}
2000 
2001 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2002 				    vm && !xe_vm_in_fault_mode(vm) &&
2003 				    flags & XE_BO_FLAG_USER ?
2004 				    &vm->lru_bulk_move : NULL, size,
2005 				    cpu_caching, type, flags);
2006 	if (IS_ERR(bo))
2007 		return bo;
2008 
2009 	bo->min_align = alignment;
2010 
2011 	/*
2012 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2013 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2014 	 * will keep a reference to the vm, and avoid circular references
2015 	 * by having all the vm's bo refereferences released at vm close
2016 	 * time.
2017 	 */
2018 	if (vm && xe_bo_is_user(bo))
2019 		xe_vm_get(vm);
2020 	bo->vm = vm;
2021 
2022 	if (bo->flags & XE_BO_FLAG_GGTT) {
2023 		struct xe_tile *t;
2024 		u8 id;
2025 
2026 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2027 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2028 				tile = xe_device_get_root_tile(xe);
2029 
2030 			xe_assert(xe, tile);
2031 		}
2032 
2033 		for_each_tile(t, xe, id) {
2034 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2035 				continue;
2036 
2037 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2038 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2039 							   start + bo->size, U64_MAX);
2040 			} else {
2041 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
2042 			}
2043 			if (err)
2044 				goto err_unlock_put_bo;
2045 		}
2046 	}
2047 
2048 	trace_xe_bo_create(bo);
2049 	return bo;
2050 
2051 err_unlock_put_bo:
2052 	__xe_bo_unset_bulk_move(bo);
2053 	xe_bo_unlock_vm_held(bo);
2054 	xe_bo_put(bo);
2055 	return ERR_PTR(err);
2056 }
2057 
2058 struct xe_bo *
2059 xe_bo_create_locked_range(struct xe_device *xe,
2060 			  struct xe_tile *tile, struct xe_vm *vm,
2061 			  size_t size, u64 start, u64 end,
2062 			  enum ttm_bo_type type, u32 flags, u64 alignment)
2063 {
2064 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2065 				     flags, alignment);
2066 }
2067 
2068 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2069 				  struct xe_vm *vm, size_t size,
2070 				  enum ttm_bo_type type, u32 flags)
2071 {
2072 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2073 				     flags, 0);
2074 }
2075 
2076 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
2077 				struct xe_vm *vm, size_t size,
2078 				u16 cpu_caching,
2079 				u32 flags)
2080 {
2081 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
2082 						 cpu_caching, ttm_bo_type_device,
2083 						 flags | XE_BO_FLAG_USER, 0);
2084 	if (!IS_ERR(bo))
2085 		xe_bo_unlock_vm_held(bo);
2086 
2087 	return bo;
2088 }
2089 
2090 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
2091 			   struct xe_vm *vm, size_t size,
2092 			   enum ttm_bo_type type, u32 flags)
2093 {
2094 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
2095 
2096 	if (!IS_ERR(bo))
2097 		xe_bo_unlock_vm_held(bo);
2098 
2099 	return bo;
2100 }
2101 
2102 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
2103 				      struct xe_vm *vm,
2104 				      size_t size, u64 offset,
2105 				      enum ttm_bo_type type, u32 flags)
2106 {
2107 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
2108 					       type, flags, 0);
2109 }
2110 
2111 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2112 					      struct xe_tile *tile,
2113 					      struct xe_vm *vm,
2114 					      size_t size, u64 offset,
2115 					      enum ttm_bo_type type, u32 flags,
2116 					      u64 alignment)
2117 {
2118 	struct xe_bo *bo;
2119 	int err;
2120 	u64 start = offset == ~0ull ? 0 : offset;
2121 	u64 end = offset == ~0ull ? offset : start + size;
2122 
2123 	if (flags & XE_BO_FLAG_STOLEN &&
2124 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2125 		flags |= XE_BO_FLAG_GGTT;
2126 
2127 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
2128 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2129 				       alignment);
2130 	if (IS_ERR(bo))
2131 		return bo;
2132 
2133 	err = xe_bo_pin(bo);
2134 	if (err)
2135 		goto err_put;
2136 
2137 	err = xe_bo_vmap(bo);
2138 	if (err)
2139 		goto err_unpin;
2140 
2141 	xe_bo_unlock_vm_held(bo);
2142 
2143 	return bo;
2144 
2145 err_unpin:
2146 	xe_bo_unpin(bo);
2147 err_put:
2148 	xe_bo_unlock_vm_held(bo);
2149 	xe_bo_put(bo);
2150 	return ERR_PTR(err);
2151 }
2152 
2153 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2154 				   struct xe_vm *vm, size_t size,
2155 				   enum ttm_bo_type type, u32 flags)
2156 {
2157 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
2158 }
2159 
2160 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2161 				     const void *data, size_t size,
2162 				     enum ttm_bo_type type, u32 flags)
2163 {
2164 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
2165 						ALIGN(size, PAGE_SIZE),
2166 						type, flags);
2167 	if (IS_ERR(bo))
2168 		return bo;
2169 
2170 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2171 
2172 	return bo;
2173 }
2174 
2175 static void __xe_bo_unpin_map_no_vm(void *arg)
2176 {
2177 	xe_bo_unpin_map_no_vm(arg);
2178 }
2179 
2180 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2181 					   size_t size, u32 flags)
2182 {
2183 	struct xe_bo *bo;
2184 	int ret;
2185 
2186 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2187 
2188 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
2189 	if (IS_ERR(bo))
2190 		return bo;
2191 
2192 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2193 	if (ret)
2194 		return ERR_PTR(ret);
2195 
2196 	return bo;
2197 }
2198 
2199 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2200 					     const void *data, size_t size, u32 flags)
2201 {
2202 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2203 
2204 	if (IS_ERR(bo))
2205 		return bo;
2206 
2207 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2208 
2209 	return bo;
2210 }
2211 
2212 /**
2213  * xe_managed_bo_reinit_in_vram
2214  * @xe: xe device
2215  * @tile: Tile where the new buffer will be created
2216  * @src: Managed buffer object allocated in system memory
2217  *
2218  * Replace a managed src buffer object allocated in system memory with a new
2219  * one allocated in vram, copying the data between them.
2220  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2221  * is responsible for making sure that any old references to it are updated.
2222  *
2223  * Returns 0 for success, negative error code otherwise.
2224  */
2225 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2226 {
2227 	struct xe_bo *bo;
2228 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2229 
2230 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2231 				      XE_BO_FLAG_PINNED_NORESTORE);
2232 
2233 	xe_assert(xe, IS_DGFX(xe));
2234 	xe_assert(xe, !(*src)->vmap.is_iomem);
2235 
2236 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2237 					    (*src)->size, dst_flags);
2238 	if (IS_ERR(bo))
2239 		return PTR_ERR(bo);
2240 
2241 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2242 	*src = bo;
2243 
2244 	return 0;
2245 }
2246 
2247 /*
2248  * XXX: This is in the VM bind data path, likely should calculate this once and
2249  * store, with a recalculation if the BO is moved.
2250  */
2251 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2252 {
2253 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2254 
2255 	switch (res->mem_type) {
2256 	case XE_PL_STOLEN:
2257 		return xe_ttm_stolen_gpu_offset(xe);
2258 	case XE_PL_TT:
2259 	case XE_PL_SYSTEM:
2260 		return 0;
2261 	default:
2262 		return res_to_mem_region(res)->dpa_base;
2263 	}
2264 	return 0;
2265 }
2266 
2267 /**
2268  * xe_bo_pin_external - pin an external BO
2269  * @bo: buffer object to be pinned
2270  *
2271  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2272  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2273  * asserts and code to ensure evict / restore on suspend / resume.
2274  *
2275  * Returns 0 for success, negative error code otherwise.
2276  */
2277 int xe_bo_pin_external(struct xe_bo *bo)
2278 {
2279 	struct xe_device *xe = xe_bo_device(bo);
2280 	int err;
2281 
2282 	xe_assert(xe, !bo->vm);
2283 	xe_assert(xe, xe_bo_is_user(bo));
2284 
2285 	if (!xe_bo_is_pinned(bo)) {
2286 		err = xe_bo_validate(bo, NULL, false);
2287 		if (err)
2288 			return err;
2289 
2290 		spin_lock(&xe->pinned.lock);
2291 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2292 		spin_unlock(&xe->pinned.lock);
2293 	}
2294 
2295 	ttm_bo_pin(&bo->ttm);
2296 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2297 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2298 
2299 	/*
2300 	 * FIXME: If we always use the reserve / unreserve functions for locking
2301 	 * we do not need this.
2302 	 */
2303 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2304 
2305 	return 0;
2306 }
2307 
2308 int xe_bo_pin(struct xe_bo *bo)
2309 {
2310 	struct ttm_place *place = &bo->placements[0];
2311 	struct xe_device *xe = xe_bo_device(bo);
2312 	int err;
2313 
2314 	/* We currently don't expect user BO to be pinned */
2315 	xe_assert(xe, !xe_bo_is_user(bo));
2316 
2317 	/* Pinned object must be in GGTT or have pinned flag */
2318 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2319 				   XE_BO_FLAG_GGTT));
2320 
2321 	/*
2322 	 * No reason we can't support pinning imported dma-bufs we just don't
2323 	 * expect to pin an imported dma-buf.
2324 	 */
2325 	xe_assert(xe, !bo->ttm.base.import_attach);
2326 
2327 	/* We only expect at most 1 pin */
2328 	xe_assert(xe, !xe_bo_is_pinned(bo));
2329 
2330 	err = xe_bo_validate(bo, NULL, false);
2331 	if (err)
2332 		return err;
2333 
2334 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2335 		spin_lock(&xe->pinned.lock);
2336 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
2337 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
2338 		else
2339 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
2340 		spin_unlock(&xe->pinned.lock);
2341 	}
2342 
2343 	ttm_bo_pin(&bo->ttm);
2344 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2345 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2346 
2347 	/*
2348 	 * FIXME: If we always use the reserve / unreserve functions for locking
2349 	 * we do not need this.
2350 	 */
2351 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2352 
2353 	return 0;
2354 }
2355 
2356 /**
2357  * xe_bo_unpin_external - unpin an external BO
2358  * @bo: buffer object to be unpinned
2359  *
2360  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2361  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2362  * asserts and code to ensure evict / restore on suspend / resume.
2363  *
2364  * Returns 0 for success, negative error code otherwise.
2365  */
2366 void xe_bo_unpin_external(struct xe_bo *bo)
2367 {
2368 	struct xe_device *xe = xe_bo_device(bo);
2369 
2370 	xe_assert(xe, !bo->vm);
2371 	xe_assert(xe, xe_bo_is_pinned(bo));
2372 	xe_assert(xe, xe_bo_is_user(bo));
2373 
2374 	spin_lock(&xe->pinned.lock);
2375 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2376 		list_del_init(&bo->pinned_link);
2377 	spin_unlock(&xe->pinned.lock);
2378 
2379 	ttm_bo_unpin(&bo->ttm);
2380 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2381 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2382 
2383 	/*
2384 	 * FIXME: If we always use the reserve / unreserve functions for locking
2385 	 * we do not need this.
2386 	 */
2387 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2388 }
2389 
2390 void xe_bo_unpin(struct xe_bo *bo)
2391 {
2392 	struct ttm_place *place = &bo->placements[0];
2393 	struct xe_device *xe = xe_bo_device(bo);
2394 
2395 	xe_assert(xe, !bo->ttm.base.import_attach);
2396 	xe_assert(xe, xe_bo_is_pinned(bo));
2397 
2398 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2399 		spin_lock(&xe->pinned.lock);
2400 		xe_assert(xe, !list_empty(&bo->pinned_link));
2401 		list_del_init(&bo->pinned_link);
2402 		spin_unlock(&xe->pinned.lock);
2403 
2404 		if (bo->backup_obj) {
2405 			if (xe_bo_is_pinned(bo->backup_obj))
2406 				ttm_bo_unpin(&bo->backup_obj->ttm);
2407 			xe_bo_put(bo->backup_obj);
2408 			bo->backup_obj = NULL;
2409 		}
2410 	}
2411 	ttm_bo_unpin(&bo->ttm);
2412 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2413 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2414 }
2415 
2416 /**
2417  * xe_bo_validate() - Make sure the bo is in an allowed placement
2418  * @bo: The bo,
2419  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2420  *      NULL. Used together with @allow_res_evict.
2421  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2422  *                   reservation object.
2423  *
2424  * Make sure the bo is in allowed placement, migrating it if necessary. If
2425  * needed, other bos will be evicted. If bos selected for eviction shares
2426  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2427  * set to true, otherwise they will be bypassed.
2428  *
2429  * Return: 0 on success, negative error code on failure. May return
2430  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2431  */
2432 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2433 {
2434 	struct ttm_operation_ctx ctx = {
2435 		.interruptible = true,
2436 		.no_wait_gpu = false,
2437 		.gfp_retry_mayfail = true,
2438 	};
2439 	struct pin_cookie cookie;
2440 	int ret;
2441 
2442 	if (vm) {
2443 		lockdep_assert_held(&vm->lock);
2444 		xe_vm_assert_held(vm);
2445 
2446 		ctx.allow_res_evict = allow_res_evict;
2447 		ctx.resv = xe_vm_resv(vm);
2448 	}
2449 
2450 	cookie = xe_vm_set_validating(vm, allow_res_evict);
2451 	trace_xe_bo_validate(bo);
2452 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2453 	xe_vm_clear_validating(vm, allow_res_evict, cookie);
2454 
2455 	return ret;
2456 }
2457 
2458 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2459 {
2460 	if (bo->destroy == &xe_ttm_bo_destroy)
2461 		return true;
2462 
2463 	return false;
2464 }
2465 
2466 /*
2467  * Resolve a BO address. There is no assert to check if the proper lock is held
2468  * so it should only be used in cases where it is not fatal to get the wrong
2469  * address, such as printing debug information, but not in cases where memory is
2470  * written based on this result.
2471  */
2472 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2473 {
2474 	struct xe_device *xe = xe_bo_device(bo);
2475 	struct xe_res_cursor cur;
2476 	u64 page;
2477 
2478 	xe_assert(xe, page_size <= PAGE_SIZE);
2479 	page = offset >> PAGE_SHIFT;
2480 	offset &= (PAGE_SIZE - 1);
2481 
2482 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2483 		xe_assert(xe, bo->ttm.ttm);
2484 
2485 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2486 				page_size, &cur);
2487 		return xe_res_dma(&cur) + offset;
2488 	} else {
2489 		struct xe_res_cursor cur;
2490 
2491 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2492 			     page_size, &cur);
2493 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2494 	}
2495 }
2496 
2497 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2498 {
2499 	if (!READ_ONCE(bo->ttm.pin_count))
2500 		xe_bo_assert_held(bo);
2501 	return __xe_bo_addr(bo, offset, page_size);
2502 }
2503 
2504 int xe_bo_vmap(struct xe_bo *bo)
2505 {
2506 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2507 	void *virtual;
2508 	bool is_iomem;
2509 	int ret;
2510 
2511 	xe_bo_assert_held(bo);
2512 
2513 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2514 			!force_contiguous(bo->flags)))
2515 		return -EINVAL;
2516 
2517 	if (!iosys_map_is_null(&bo->vmap))
2518 		return 0;
2519 
2520 	/*
2521 	 * We use this more or less deprecated interface for now since
2522 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2523 	 * single page bos, which is done here.
2524 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2525 	 * to use struct iosys_map.
2526 	 */
2527 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
2528 	if (ret)
2529 		return ret;
2530 
2531 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2532 	if (is_iomem)
2533 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2534 	else
2535 		iosys_map_set_vaddr(&bo->vmap, virtual);
2536 
2537 	return 0;
2538 }
2539 
2540 static void __xe_bo_vunmap(struct xe_bo *bo)
2541 {
2542 	if (!iosys_map_is_null(&bo->vmap)) {
2543 		iosys_map_clear(&bo->vmap);
2544 		ttm_bo_kunmap(&bo->kmap);
2545 	}
2546 }
2547 
2548 void xe_bo_vunmap(struct xe_bo *bo)
2549 {
2550 	xe_bo_assert_held(bo);
2551 	__xe_bo_vunmap(bo);
2552 }
2553 
2554 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
2555 {
2556 	if (value == DRM_XE_PXP_TYPE_NONE)
2557 		return 0;
2558 
2559 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
2560 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
2561 		return -EINVAL;
2562 
2563 	return xe_pxp_key_assign(xe->pxp, bo);
2564 }
2565 
2566 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
2567 					     struct xe_bo *bo,
2568 					     u64 value);
2569 
2570 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
2571 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
2572 };
2573 
2574 static int gem_create_user_ext_set_property(struct xe_device *xe,
2575 					    struct xe_bo *bo,
2576 					    u64 extension)
2577 {
2578 	u64 __user *address = u64_to_user_ptr(extension);
2579 	struct drm_xe_ext_set_property ext;
2580 	int err;
2581 	u32 idx;
2582 
2583 	err = copy_from_user(&ext, address, sizeof(ext));
2584 	if (XE_IOCTL_DBG(xe, err))
2585 		return -EFAULT;
2586 
2587 	if (XE_IOCTL_DBG(xe, ext.property >=
2588 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
2589 	    XE_IOCTL_DBG(xe, ext.pad) ||
2590 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
2591 		return -EINVAL;
2592 
2593 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
2594 	if (!gem_create_set_property_funcs[idx])
2595 		return -EINVAL;
2596 
2597 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
2598 }
2599 
2600 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
2601 					       struct xe_bo *bo,
2602 					       u64 extension);
2603 
2604 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
2605 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
2606 };
2607 
2608 #define MAX_USER_EXTENSIONS	16
2609 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
2610 				      u64 extensions, int ext_number)
2611 {
2612 	u64 __user *address = u64_to_user_ptr(extensions);
2613 	struct drm_xe_user_extension ext;
2614 	int err;
2615 	u32 idx;
2616 
2617 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
2618 		return -E2BIG;
2619 
2620 	err = copy_from_user(&ext, address, sizeof(ext));
2621 	if (XE_IOCTL_DBG(xe, err))
2622 		return -EFAULT;
2623 
2624 	if (XE_IOCTL_DBG(xe, ext.pad) ||
2625 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
2626 		return -EINVAL;
2627 
2628 	idx = array_index_nospec(ext.name,
2629 				 ARRAY_SIZE(gem_create_user_extension_funcs));
2630 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
2631 	if (XE_IOCTL_DBG(xe, err))
2632 		return err;
2633 
2634 	if (ext.next_extension)
2635 		return gem_create_user_extensions(xe, bo, ext.next_extension,
2636 						  ++ext_number);
2637 
2638 	return 0;
2639 }
2640 
2641 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2642 			struct drm_file *file)
2643 {
2644 	struct xe_device *xe = to_xe_device(dev);
2645 	struct xe_file *xef = to_xe_file(file);
2646 	struct drm_xe_gem_create *args = data;
2647 	struct xe_vm *vm = NULL;
2648 	ktime_t end = 0;
2649 	struct xe_bo *bo;
2650 	unsigned int bo_flags;
2651 	u32 handle;
2652 	int err;
2653 
2654 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2655 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2656 		return -EINVAL;
2657 
2658 	/* at least one valid memory placement must be specified */
2659 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2660 			 !args->placement))
2661 		return -EINVAL;
2662 
2663 	if (XE_IOCTL_DBG(xe, args->flags &
2664 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2665 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2666 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2667 		return -EINVAL;
2668 
2669 	if (XE_IOCTL_DBG(xe, args->handle))
2670 		return -EINVAL;
2671 
2672 	if (XE_IOCTL_DBG(xe, !args->size))
2673 		return -EINVAL;
2674 
2675 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2676 		return -EINVAL;
2677 
2678 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2679 		return -EINVAL;
2680 
2681 	bo_flags = 0;
2682 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2683 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2684 
2685 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2686 		bo_flags |= XE_BO_FLAG_SCANOUT;
2687 
2688 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2689 
2690 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2691 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2692 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2693 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2694 	    IS_ALIGNED(args->size, SZ_64K))
2695 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2696 
2697 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2698 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2699 			return -EINVAL;
2700 
2701 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2702 	}
2703 
2704 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2705 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2706 		return -EINVAL;
2707 
2708 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2709 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2710 		return -EINVAL;
2711 
2712 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2713 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2714 		return -EINVAL;
2715 
2716 	if (args->vm_id) {
2717 		vm = xe_vm_lookup(xef, args->vm_id);
2718 		if (XE_IOCTL_DBG(xe, !vm))
2719 			return -ENOENT;
2720 	}
2721 
2722 retry:
2723 	if (vm) {
2724 		err = xe_vm_lock(vm, true);
2725 		if (err)
2726 			goto out_vm;
2727 	}
2728 
2729 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2730 			       bo_flags);
2731 
2732 	if (vm)
2733 		xe_vm_unlock(vm);
2734 
2735 	if (IS_ERR(bo)) {
2736 		err = PTR_ERR(bo);
2737 		if (xe_vm_validate_should_retry(NULL, err, &end))
2738 			goto retry;
2739 		goto out_vm;
2740 	}
2741 
2742 	if (args->extensions) {
2743 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
2744 		if (err)
2745 			goto out_bulk;
2746 	}
2747 
2748 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2749 	if (err)
2750 		goto out_bulk;
2751 
2752 	args->handle = handle;
2753 	goto out_put;
2754 
2755 out_bulk:
2756 	if (vm && !xe_vm_in_fault_mode(vm)) {
2757 		xe_vm_lock(vm, false);
2758 		__xe_bo_unset_bulk_move(bo);
2759 		xe_vm_unlock(vm);
2760 	}
2761 out_put:
2762 	xe_bo_put(bo);
2763 out_vm:
2764 	if (vm)
2765 		xe_vm_put(vm);
2766 
2767 	return err;
2768 }
2769 
2770 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2771 			     struct drm_file *file)
2772 {
2773 	struct xe_device *xe = to_xe_device(dev);
2774 	struct drm_xe_gem_mmap_offset *args = data;
2775 	struct drm_gem_object *gem_obj;
2776 
2777 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2778 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2779 		return -EINVAL;
2780 
2781 	if (XE_IOCTL_DBG(xe, args->flags &
2782 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
2783 		return -EINVAL;
2784 
2785 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2786 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2787 			return -EINVAL;
2788 
2789 		if (XE_IOCTL_DBG(xe, args->handle))
2790 			return -EINVAL;
2791 
2792 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2793 			return -EINVAL;
2794 
2795 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2796 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2797 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2798 		return 0;
2799 	}
2800 
2801 	gem_obj = drm_gem_object_lookup(file, args->handle);
2802 	if (XE_IOCTL_DBG(xe, !gem_obj))
2803 		return -ENOENT;
2804 
2805 	/* The mmap offset was set up at BO allocation time. */
2806 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2807 
2808 	xe_bo_put(gem_to_xe_bo(gem_obj));
2809 	return 0;
2810 }
2811 
2812 /**
2813  * xe_bo_lock() - Lock the buffer object's dma_resv object
2814  * @bo: The struct xe_bo whose lock is to be taken
2815  * @intr: Whether to perform any wait interruptible
2816  *
2817  * Locks the buffer object's dma_resv object. If the buffer object is
2818  * pointing to a shared dma_resv object, that shared lock is locked.
2819  *
2820  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2821  * contended lock was interrupted. If @intr is set to false, the
2822  * function always returns 0.
2823  */
2824 int xe_bo_lock(struct xe_bo *bo, bool intr)
2825 {
2826 	if (intr)
2827 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2828 
2829 	dma_resv_lock(bo->ttm.base.resv, NULL);
2830 
2831 	return 0;
2832 }
2833 
2834 /**
2835  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2836  * @bo: The struct xe_bo whose lock is to be released.
2837  *
2838  * Unlock a buffer object lock that was locked by xe_bo_lock().
2839  */
2840 void xe_bo_unlock(struct xe_bo *bo)
2841 {
2842 	dma_resv_unlock(bo->ttm.base.resv);
2843 }
2844 
2845 /**
2846  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2847  * @bo: The buffer object to migrate
2848  * @mem_type: The TTM memory type intended to migrate to
2849  *
2850  * Check whether the buffer object supports migration to the
2851  * given memory type. Note that pinning may affect the ability to migrate as
2852  * returned by this function.
2853  *
2854  * This function is primarily intended as a helper for checking the
2855  * possibility to migrate buffer objects and can be called without
2856  * the object lock held.
2857  *
2858  * Return: true if migration is possible, false otherwise.
2859  */
2860 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2861 {
2862 	unsigned int cur_place;
2863 
2864 	if (bo->ttm.type == ttm_bo_type_kernel)
2865 		return true;
2866 
2867 	if (bo->ttm.type == ttm_bo_type_sg)
2868 		return false;
2869 
2870 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2871 	     cur_place++) {
2872 		if (bo->placements[cur_place].mem_type == mem_type)
2873 			return true;
2874 	}
2875 
2876 	return false;
2877 }
2878 
2879 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2880 {
2881 	memset(place, 0, sizeof(*place));
2882 	place->mem_type = mem_type;
2883 }
2884 
2885 /**
2886  * xe_bo_migrate - Migrate an object to the desired region id
2887  * @bo: The buffer object to migrate.
2888  * @mem_type: The TTM region type to migrate to.
2889  *
2890  * Attempt to migrate the buffer object to the desired memory region. The
2891  * buffer object may not be pinned, and must be locked.
2892  * On successful completion, the object memory type will be updated,
2893  * but an async migration task may not have completed yet, and to
2894  * accomplish that, the object's kernel fences must be signaled with
2895  * the object lock held.
2896  *
2897  * Return: 0 on success. Negative error code on failure. In particular may
2898  * return -EINTR or -ERESTARTSYS if signal pending.
2899  */
2900 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2901 {
2902 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2903 	struct ttm_operation_ctx ctx = {
2904 		.interruptible = true,
2905 		.no_wait_gpu = false,
2906 		.gfp_retry_mayfail = true,
2907 	};
2908 	struct ttm_placement placement;
2909 	struct ttm_place requested;
2910 
2911 	xe_bo_assert_held(bo);
2912 
2913 	if (bo->ttm.resource->mem_type == mem_type)
2914 		return 0;
2915 
2916 	if (xe_bo_is_pinned(bo))
2917 		return -EBUSY;
2918 
2919 	if (!xe_bo_can_migrate(bo, mem_type))
2920 		return -EINVAL;
2921 
2922 	xe_place_from_ttm_type(mem_type, &requested);
2923 	placement.num_placement = 1;
2924 	placement.placement = &requested;
2925 
2926 	/*
2927 	 * Stolen needs to be handled like below VRAM handling if we ever need
2928 	 * to support it.
2929 	 */
2930 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2931 
2932 	if (mem_type_is_vram(mem_type)) {
2933 		u32 c = 0;
2934 
2935 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2936 	}
2937 
2938 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2939 }
2940 
2941 /**
2942  * xe_bo_evict - Evict an object to evict placement
2943  * @bo: The buffer object to migrate.
2944  *
2945  * On successful completion, the object memory will be moved to evict
2946  * placement. This function blocks until the object has been fully moved.
2947  *
2948  * Return: 0 on success. Negative error code on failure.
2949  */
2950 int xe_bo_evict(struct xe_bo *bo)
2951 {
2952 	struct ttm_operation_ctx ctx = {
2953 		.interruptible = false,
2954 		.no_wait_gpu = false,
2955 		.gfp_retry_mayfail = true,
2956 	};
2957 	struct ttm_placement placement;
2958 	int ret;
2959 
2960 	xe_evict_flags(&bo->ttm, &placement);
2961 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2962 	if (ret)
2963 		return ret;
2964 
2965 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2966 			      false, MAX_SCHEDULE_TIMEOUT);
2967 
2968 	return 0;
2969 }
2970 
2971 /**
2972  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2973  * placed in system memory.
2974  * @bo: The xe_bo
2975  *
2976  * Return: true if extra pages need to be allocated, false otherwise.
2977  */
2978 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2979 {
2980 	struct xe_device *xe = xe_bo_device(bo);
2981 
2982 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2983 		return false;
2984 
2985 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2986 		return false;
2987 
2988 	/* On discrete GPUs, if the GPU can access this buffer from
2989 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2990 	 * can't be used since there's no CCS storage associated with
2991 	 * non-VRAM addresses.
2992 	 */
2993 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2994 		return false;
2995 
2996 	/*
2997 	 * Compression implies coh_none, therefore we know for sure that WB
2998 	 * memory can't currently use compression, which is likely one of the
2999 	 * common cases.
3000 	 */
3001 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
3002 		return false;
3003 
3004 	return true;
3005 }
3006 
3007 /**
3008  * __xe_bo_release_dummy() - Dummy kref release function
3009  * @kref: The embedded struct kref.
3010  *
3011  * Dummy release function for xe_bo_put_deferred(). Keep off.
3012  */
3013 void __xe_bo_release_dummy(struct kref *kref)
3014 {
3015 }
3016 
3017 /**
3018  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3019  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3020  *
3021  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3022  * The @deferred list can be either an onstack local list or a global
3023  * shared list used by a workqueue.
3024  */
3025 void xe_bo_put_commit(struct llist_head *deferred)
3026 {
3027 	struct llist_node *freed;
3028 	struct xe_bo *bo, *next;
3029 
3030 	if (!deferred)
3031 		return;
3032 
3033 	freed = llist_del_all(deferred);
3034 	if (!freed)
3035 		return;
3036 
3037 	llist_for_each_entry_safe(bo, next, freed, freed)
3038 		drm_gem_object_free(&bo->ttm.base.refcount);
3039 }
3040 
3041 static void xe_bo_dev_work_func(struct work_struct *work)
3042 {
3043 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3044 
3045 	xe_bo_put_commit(&bo_dev->async_list);
3046 }
3047 
3048 /**
3049  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3050  * @bo_dev: The BO dev structure
3051  */
3052 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3053 {
3054 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3055 }
3056 
3057 /**
3058  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3059  * @bo_dev: The BO dev structure
3060  */
3061 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3062 {
3063 	flush_work(&bo_dev->async_free);
3064 }
3065 
3066 void xe_bo_put(struct xe_bo *bo)
3067 {
3068 	struct xe_tile *tile;
3069 	u8 id;
3070 
3071 	might_sleep();
3072 	if (bo) {
3073 #ifdef CONFIG_PROC_FS
3074 		if (bo->client)
3075 			might_lock(&bo->client->bos_lock);
3076 #endif
3077 		for_each_tile(tile, xe_bo_device(bo), id)
3078 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
3079 				xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
3080 		drm_gem_object_put(&bo->ttm.base);
3081 	}
3082 }
3083 
3084 /**
3085  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3086  * @file_priv: ...
3087  * @dev: ...
3088  * @args: ...
3089  *
3090  * See dumb_create() hook in include/drm/drm_drv.h
3091  *
3092  * Return: ...
3093  */
3094 int xe_bo_dumb_create(struct drm_file *file_priv,
3095 		      struct drm_device *dev,
3096 		      struct drm_mode_create_dumb *args)
3097 {
3098 	struct xe_device *xe = to_xe_device(dev);
3099 	struct xe_bo *bo;
3100 	uint32_t handle;
3101 	int cpp = DIV_ROUND_UP(args->bpp, 8);
3102 	int err;
3103 	u32 page_size = max_t(u32, PAGE_SIZE,
3104 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3105 
3106 	args->pitch = ALIGN(args->width * cpp, 64);
3107 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
3108 			   page_size);
3109 
3110 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
3111 			       DRM_XE_GEM_CPU_CACHING_WC,
3112 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3113 			       XE_BO_FLAG_SCANOUT |
3114 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
3115 	if (IS_ERR(bo))
3116 		return PTR_ERR(bo);
3117 
3118 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3119 	/* drop reference from allocate - handle holds it now */
3120 	drm_gem_object_put(&bo->ttm.base);
3121 	if (!err)
3122 		args->handle = handle;
3123 	return err;
3124 }
3125 
3126 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3127 {
3128 	struct ttm_buffer_object *tbo = &bo->ttm;
3129 	struct ttm_device *bdev = tbo->bdev;
3130 
3131 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3132 
3133 	list_del_init(&bo->vram_userfault_link);
3134 }
3135 
3136 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3137 #include "tests/xe_bo.c"
3138 #endif
3139