xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision dcdd6b84d9acaa0794c29de7024cfdb20cfd7b92)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <uapi/drm/xe_drm.h>
17 
18 #include <kunit/static_stub.h>
19 
20 #include "xe_device.h"
21 #include "xe_dma_buf.h"
22 #include "xe_drm_client.h"
23 #include "xe_ggtt.h"
24 #include "xe_gt.h"
25 #include "xe_map.h"
26 #include "xe_migrate.h"
27 #include "xe_pm.h"
28 #include "xe_preempt_fence.h"
29 #include "xe_res_cursor.h"
30 #include "xe_trace_bo.h"
31 #include "xe_ttm_stolen_mgr.h"
32 #include "xe_vm.h"
33 
34 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
35 	[XE_PL_SYSTEM] = "system",
36 	[XE_PL_TT] = "gtt",
37 	[XE_PL_VRAM0] = "vram0",
38 	[XE_PL_VRAM1] = "vram1",
39 	[XE_PL_STOLEN] = "stolen"
40 };
41 
42 static const struct ttm_place sys_placement_flags = {
43 	.fpfn = 0,
44 	.lpfn = 0,
45 	.mem_type = XE_PL_SYSTEM,
46 	.flags = 0,
47 };
48 
49 static struct ttm_placement sys_placement = {
50 	.num_placement = 1,
51 	.placement = &sys_placement_flags,
52 };
53 
54 static const struct ttm_place tt_placement_flags[] = {
55 	{
56 		.fpfn = 0,
57 		.lpfn = 0,
58 		.mem_type = XE_PL_TT,
59 		.flags = TTM_PL_FLAG_DESIRED,
60 	},
61 	{
62 		.fpfn = 0,
63 		.lpfn = 0,
64 		.mem_type = XE_PL_SYSTEM,
65 		.flags = TTM_PL_FLAG_FALLBACK,
66 	}
67 };
68 
69 static struct ttm_placement tt_placement = {
70 	.num_placement = 2,
71 	.placement = tt_placement_flags,
72 };
73 
74 bool mem_type_is_vram(u32 mem_type)
75 {
76 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
77 }
78 
79 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
80 {
81 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
82 }
83 
84 static bool resource_is_vram(struct ttm_resource *res)
85 {
86 	return mem_type_is_vram(res->mem_type);
87 }
88 
89 bool xe_bo_is_vram(struct xe_bo *bo)
90 {
91 	return resource_is_vram(bo->ttm.resource) ||
92 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
93 }
94 
95 bool xe_bo_is_stolen(struct xe_bo *bo)
96 {
97 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
98 }
99 
100 /**
101  * xe_bo_has_single_placement - check if BO is placed only in one memory location
102  * @bo: The BO
103  *
104  * This function checks whether a given BO is placed in only one memory location.
105  *
106  * Returns: true if the BO is placed in a single memory location, false otherwise.
107  *
108  */
109 bool xe_bo_has_single_placement(struct xe_bo *bo)
110 {
111 	return bo->placement.num_placement == 1;
112 }
113 
114 /**
115  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
116  * @bo: The BO
117  *
118  * The stolen memory is accessed through the PCI BAR for both DGFX and some
119  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
120  *
121  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
122  */
123 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
124 {
125 	return xe_bo_is_stolen(bo) &&
126 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
127 }
128 
129 static bool xe_bo_is_user(struct xe_bo *bo)
130 {
131 	return bo->flags & XE_BO_FLAG_USER;
132 }
133 
134 static struct xe_migrate *
135 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
136 {
137 	struct xe_tile *tile;
138 
139 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
140 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
141 	return tile->migrate;
142 }
143 
144 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
145 {
146 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
147 	struct ttm_resource_manager *mgr;
148 
149 	xe_assert(xe, resource_is_vram(res));
150 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
151 	return to_xe_ttm_vram_mgr(mgr)->vram;
152 }
153 
154 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
155 			   u32 bo_flags, u32 *c)
156 {
157 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
158 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
159 
160 		bo->placements[*c] = (struct ttm_place) {
161 			.mem_type = XE_PL_TT,
162 		};
163 		*c += 1;
164 	}
165 }
166 
167 static bool force_contiguous(u32 bo_flags)
168 {
169 	/*
170 	 * For eviction / restore on suspend / resume objects pinned in VRAM
171 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
172 	 */
173 	return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
174 }
175 
176 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
177 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
178 {
179 	struct ttm_place place = { .mem_type = mem_type };
180 	struct xe_mem_region *vram;
181 	u64 io_size;
182 
183 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
184 
185 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
186 	xe_assert(xe, vram && vram->usable_size);
187 	io_size = vram->io_size;
188 
189 	if (force_contiguous(bo_flags))
190 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
191 
192 	if (io_size < vram->usable_size) {
193 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
194 			place.fpfn = 0;
195 			place.lpfn = io_size >> PAGE_SHIFT;
196 		} else {
197 			place.flags |= TTM_PL_FLAG_TOPDOWN;
198 		}
199 	}
200 	places[*c] = place;
201 	*c += 1;
202 }
203 
204 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
205 			 u32 bo_flags, u32 *c)
206 {
207 	if (bo_flags & XE_BO_FLAG_VRAM0)
208 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
209 	if (bo_flags & XE_BO_FLAG_VRAM1)
210 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
211 }
212 
213 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
214 			   u32 bo_flags, u32 *c)
215 {
216 	if (bo_flags & XE_BO_FLAG_STOLEN) {
217 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
218 
219 		bo->placements[*c] = (struct ttm_place) {
220 			.mem_type = XE_PL_STOLEN,
221 			.flags = force_contiguous(bo_flags) ?
222 				TTM_PL_FLAG_CONTIGUOUS : 0,
223 		};
224 		*c += 1;
225 	}
226 }
227 
228 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
229 				       u32 bo_flags)
230 {
231 	u32 c = 0;
232 
233 	try_add_vram(xe, bo, bo_flags, &c);
234 	try_add_system(xe, bo, bo_flags, &c);
235 	try_add_stolen(xe, bo, bo_flags, &c);
236 
237 	if (!c)
238 		return -EINVAL;
239 
240 	bo->placement = (struct ttm_placement) {
241 		.num_placement = c,
242 		.placement = bo->placements,
243 	};
244 
245 	return 0;
246 }
247 
248 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
249 			      u32 bo_flags)
250 {
251 	xe_bo_assert_held(bo);
252 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
253 }
254 
255 static void xe_evict_flags(struct ttm_buffer_object *tbo,
256 			   struct ttm_placement *placement)
257 {
258 	if (!xe_bo_is_xe_bo(tbo)) {
259 		/* Don't handle scatter gather BOs */
260 		if (tbo->type == ttm_bo_type_sg) {
261 			placement->num_placement = 0;
262 			return;
263 		}
264 
265 		*placement = sys_placement;
266 		return;
267 	}
268 
269 	/*
270 	 * For xe, sg bos that are evicted to system just triggers a
271 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
272 	 */
273 	switch (tbo->resource->mem_type) {
274 	case XE_PL_VRAM0:
275 	case XE_PL_VRAM1:
276 	case XE_PL_STOLEN:
277 		*placement = tt_placement;
278 		break;
279 	case XE_PL_TT:
280 	default:
281 		*placement = sys_placement;
282 		break;
283 	}
284 }
285 
286 struct xe_ttm_tt {
287 	struct ttm_tt ttm;
288 	struct device *dev;
289 	struct sg_table sgt;
290 	struct sg_table *sg;
291 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
292 	bool purgeable;
293 };
294 
295 static int xe_tt_map_sg(struct ttm_tt *tt)
296 {
297 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
298 	unsigned long num_pages = tt->num_pages;
299 	int ret;
300 
301 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
302 
303 	if (xe_tt->sg)
304 		return 0;
305 
306 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
307 						num_pages, 0,
308 						(u64)num_pages << PAGE_SHIFT,
309 						xe_sg_segment_size(xe_tt->dev),
310 						GFP_KERNEL);
311 	if (ret)
312 		return ret;
313 
314 	xe_tt->sg = &xe_tt->sgt;
315 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
316 			      DMA_ATTR_SKIP_CPU_SYNC);
317 	if (ret) {
318 		sg_free_table(xe_tt->sg);
319 		xe_tt->sg = NULL;
320 		return ret;
321 	}
322 
323 	return 0;
324 }
325 
326 static void xe_tt_unmap_sg(struct ttm_tt *tt)
327 {
328 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
329 
330 	if (xe_tt->sg) {
331 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
332 				  DMA_BIDIRECTIONAL, 0);
333 		sg_free_table(xe_tt->sg);
334 		xe_tt->sg = NULL;
335 	}
336 }
337 
338 struct sg_table *xe_bo_sg(struct xe_bo *bo)
339 {
340 	struct ttm_tt *tt = bo->ttm.ttm;
341 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
342 
343 	return xe_tt->sg;
344 }
345 
346 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
347 				       u32 page_flags)
348 {
349 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
350 	struct xe_device *xe = xe_bo_device(bo);
351 	struct xe_ttm_tt *tt;
352 	unsigned long extra_pages;
353 	enum ttm_caching caching = ttm_cached;
354 	int err;
355 
356 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
357 	if (!tt)
358 		return NULL;
359 
360 	tt->dev = xe->drm.dev;
361 
362 	extra_pages = 0;
363 	if (xe_bo_needs_ccs_pages(bo))
364 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
365 					   PAGE_SIZE);
366 
367 	/*
368 	 * DGFX system memory is always WB / ttm_cached, since
369 	 * other caching modes are only supported on x86. DGFX
370 	 * GPU system memory accesses are always coherent with the
371 	 * CPU.
372 	 */
373 	if (!IS_DGFX(xe)) {
374 		switch (bo->cpu_caching) {
375 		case DRM_XE_GEM_CPU_CACHING_WC:
376 			caching = ttm_write_combined;
377 			break;
378 		default:
379 			caching = ttm_cached;
380 			break;
381 		}
382 
383 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
384 
385 		/*
386 		 * Display scanout is always non-coherent with the CPU cache.
387 		 *
388 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
389 		 * non-coherent and require a CPU:WC mapping.
390 		 */
391 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
392 		    (xe->info.graphics_verx100 >= 1270 &&
393 		     bo->flags & XE_BO_FLAG_PAGETABLE))
394 			caching = ttm_write_combined;
395 	}
396 
397 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
398 		/*
399 		 * Valid only for internally-created buffers only, for
400 		 * which cpu_caching is never initialized.
401 		 */
402 		xe_assert(xe, bo->cpu_caching == 0);
403 		caching = ttm_uncached;
404 	}
405 
406 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
407 	if (err) {
408 		kfree(tt);
409 		return NULL;
410 	}
411 
412 	return &tt->ttm;
413 }
414 
415 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
416 			      struct ttm_operation_ctx *ctx)
417 {
418 	int err;
419 
420 	/*
421 	 * dma-bufs are not populated with pages, and the dma-
422 	 * addresses are set up when moved to XE_PL_TT.
423 	 */
424 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
425 		return 0;
426 
427 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
428 	if (err)
429 		return err;
430 
431 	return err;
432 }
433 
434 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
435 {
436 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
437 		return;
438 
439 	xe_tt_unmap_sg(tt);
440 
441 	return ttm_pool_free(&ttm_dev->pool, tt);
442 }
443 
444 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
445 {
446 	ttm_tt_fini(tt);
447 	kfree(tt);
448 }
449 
450 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
451 {
452 	struct xe_ttm_vram_mgr_resource *vres =
453 		to_xe_ttm_vram_mgr_resource(mem);
454 
455 	return vres->used_visible_size == mem->size;
456 }
457 
458 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
459 				 struct ttm_resource *mem)
460 {
461 	struct xe_device *xe = ttm_to_xe_device(bdev);
462 
463 	switch (mem->mem_type) {
464 	case XE_PL_SYSTEM:
465 	case XE_PL_TT:
466 		return 0;
467 	case XE_PL_VRAM0:
468 	case XE_PL_VRAM1: {
469 		struct xe_mem_region *vram = res_to_mem_region(mem);
470 
471 		if (!xe_ttm_resource_visible(mem))
472 			return -EINVAL;
473 
474 		mem->bus.offset = mem->start << PAGE_SHIFT;
475 
476 		if (vram->mapping &&
477 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
478 			mem->bus.addr = (u8 __force *)vram->mapping +
479 				mem->bus.offset;
480 
481 		mem->bus.offset += vram->io_start;
482 		mem->bus.is_iomem = true;
483 
484 #if  !IS_ENABLED(CONFIG_X86)
485 		mem->bus.caching = ttm_write_combined;
486 #endif
487 		return 0;
488 	} case XE_PL_STOLEN:
489 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
490 	default:
491 		return -EINVAL;
492 	}
493 }
494 
495 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
496 				const struct ttm_operation_ctx *ctx)
497 {
498 	struct dma_resv_iter cursor;
499 	struct dma_fence *fence;
500 	struct drm_gem_object *obj = &bo->ttm.base;
501 	struct drm_gpuvm_bo *vm_bo;
502 	bool idle = false;
503 	int ret = 0;
504 
505 	dma_resv_assert_held(bo->ttm.base.resv);
506 
507 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
508 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
509 				    DMA_RESV_USAGE_BOOKKEEP);
510 		dma_resv_for_each_fence_unlocked(&cursor, fence)
511 			dma_fence_enable_sw_signaling(fence);
512 		dma_resv_iter_end(&cursor);
513 	}
514 
515 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
516 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
517 		struct drm_gpuva *gpuva;
518 
519 		if (!xe_vm_in_fault_mode(vm)) {
520 			drm_gpuvm_bo_evict(vm_bo, true);
521 			continue;
522 		}
523 
524 		if (!idle) {
525 			long timeout;
526 
527 			if (ctx->no_wait_gpu &&
528 			    !dma_resv_test_signaled(bo->ttm.base.resv,
529 						    DMA_RESV_USAGE_BOOKKEEP))
530 				return -EBUSY;
531 
532 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
533 							DMA_RESV_USAGE_BOOKKEEP,
534 							ctx->interruptible,
535 							MAX_SCHEDULE_TIMEOUT);
536 			if (!timeout)
537 				return -ETIME;
538 			if (timeout < 0)
539 				return timeout;
540 
541 			idle = true;
542 		}
543 
544 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
545 			struct xe_vma *vma = gpuva_to_vma(gpuva);
546 
547 			trace_xe_vma_evict(vma);
548 			ret = xe_vm_invalidate_vma(vma);
549 			if (XE_WARN_ON(ret))
550 				return ret;
551 		}
552 	}
553 
554 	return ret;
555 }
556 
557 /*
558  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
559  * Note that unmapping the attachment is deferred to the next
560  * map_attachment time, or to bo destroy (after idling) whichever comes first.
561  * This is to avoid syncing before unmap_attachment(), assuming that the
562  * caller relies on idling the reservation object before moving the
563  * backing store out. Should that assumption not hold, then we will be able
564  * to unconditionally call unmap_attachment() when moving out to system.
565  */
566 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
567 			     struct ttm_resource *new_res)
568 {
569 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
570 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
571 					       ttm);
572 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
573 	struct sg_table *sg;
574 
575 	xe_assert(xe, attach);
576 	xe_assert(xe, ttm_bo->ttm);
577 
578 	if (new_res->mem_type == XE_PL_SYSTEM)
579 		goto out;
580 
581 	if (ttm_bo->sg) {
582 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
583 		ttm_bo->sg = NULL;
584 	}
585 
586 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
587 	if (IS_ERR(sg))
588 		return PTR_ERR(sg);
589 
590 	ttm_bo->sg = sg;
591 	xe_tt->sg = sg;
592 
593 out:
594 	ttm_bo_move_null(ttm_bo, new_res);
595 
596 	return 0;
597 }
598 
599 /**
600  * xe_bo_move_notify - Notify subsystems of a pending move
601  * @bo: The buffer object
602  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
603  *
604  * This function notifies subsystems of an upcoming buffer move.
605  * Upon receiving such a notification, subsystems should schedule
606  * halting access to the underlying pages and optionally add a fence
607  * to the buffer object's dma_resv object, that signals when access is
608  * stopped. The caller will wait on all dma_resv fences before
609  * starting the move.
610  *
611  * A subsystem may commence access to the object after obtaining
612  * bindings to the new backing memory under the object lock.
613  *
614  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
615  * negative error code on error.
616  */
617 static int xe_bo_move_notify(struct xe_bo *bo,
618 			     const struct ttm_operation_ctx *ctx)
619 {
620 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
621 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
622 	struct ttm_resource *old_mem = ttm_bo->resource;
623 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
624 	int ret;
625 
626 	/*
627 	 * If this starts to call into many components, consider
628 	 * using a notification chain here.
629 	 */
630 
631 	if (xe_bo_is_pinned(bo))
632 		return -EINVAL;
633 
634 	xe_bo_vunmap(bo);
635 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
636 	if (ret)
637 		return ret;
638 
639 	/* Don't call move_notify() for imported dma-bufs. */
640 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
641 		dma_buf_move_notify(ttm_bo->base.dma_buf);
642 
643 	/*
644 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
645 	 * so if we moved from VRAM make sure to unlink this from the userfault
646 	 * tracking.
647 	 */
648 	if (mem_type_is_vram(old_mem_type)) {
649 		mutex_lock(&xe->mem_access.vram_userfault.lock);
650 		if (!list_empty(&bo->vram_userfault_link))
651 			list_del_init(&bo->vram_userfault_link);
652 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
653 	}
654 
655 	return 0;
656 }
657 
658 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
659 		      struct ttm_operation_ctx *ctx,
660 		      struct ttm_resource *new_mem,
661 		      struct ttm_place *hop)
662 {
663 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
664 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
665 	struct ttm_resource *old_mem = ttm_bo->resource;
666 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
667 	struct ttm_tt *ttm = ttm_bo->ttm;
668 	struct xe_migrate *migrate = NULL;
669 	struct dma_fence *fence;
670 	bool move_lacks_source;
671 	bool tt_has_data;
672 	bool needs_clear;
673 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
674 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
675 	int ret = 0;
676 
677 	/* Bo creation path, moving to system or TT. */
678 	if ((!old_mem && ttm) && !handle_system_ccs) {
679 		if (new_mem->mem_type == XE_PL_TT)
680 			ret = xe_tt_map_sg(ttm);
681 		if (!ret)
682 			ttm_bo_move_null(ttm_bo, new_mem);
683 		goto out;
684 	}
685 
686 	if (ttm_bo->type == ttm_bo_type_sg) {
687 		ret = xe_bo_move_notify(bo, ctx);
688 		if (!ret)
689 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
690 		return ret;
691 	}
692 
693 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
694 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
695 
696 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
697 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
698 
699 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
700 		(!ttm && ttm_bo->type == ttm_bo_type_device);
701 
702 	if (new_mem->mem_type == XE_PL_TT) {
703 		ret = xe_tt_map_sg(ttm);
704 		if (ret)
705 			goto out;
706 	}
707 
708 	if ((move_lacks_source && !needs_clear)) {
709 		ttm_bo_move_null(ttm_bo, new_mem);
710 		goto out;
711 	}
712 
713 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
714 		ttm_bo_move_null(ttm_bo, new_mem);
715 		goto out;
716 	}
717 
718 	/* Reject BO eviction if BO is bound to current VM. */
719 	if (evict && ctx->resv) {
720 		struct drm_gpuvm_bo *vm_bo;
721 
722 		drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) {
723 			struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
724 
725 			if (xe_vm_resv(vm) == ctx->resv &&
726 			    xe_vm_in_preempt_fence_mode(vm)) {
727 				ret = -EBUSY;
728 				goto out;
729 			}
730 		}
731 	}
732 
733 	/*
734 	 * Failed multi-hop where the old_mem is still marked as
735 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
736 	 */
737 	if (old_mem_type == XE_PL_TT &&
738 	    new_mem->mem_type == XE_PL_TT) {
739 		ttm_bo_move_null(ttm_bo, new_mem);
740 		goto out;
741 	}
742 
743 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
744 		ret = xe_bo_move_notify(bo, ctx);
745 		if (ret)
746 			goto out;
747 	}
748 
749 	if (old_mem_type == XE_PL_TT &&
750 	    new_mem->mem_type == XE_PL_SYSTEM) {
751 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
752 						     DMA_RESV_USAGE_BOOKKEEP,
753 						     false,
754 						     MAX_SCHEDULE_TIMEOUT);
755 		if (timeout < 0) {
756 			ret = timeout;
757 			goto out;
758 		}
759 
760 		if (!handle_system_ccs) {
761 			ttm_bo_move_null(ttm_bo, new_mem);
762 			goto out;
763 		}
764 	}
765 
766 	if (!move_lacks_source &&
767 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
768 	     (mem_type_is_vram(old_mem_type) &&
769 	      new_mem->mem_type == XE_PL_SYSTEM))) {
770 		hop->fpfn = 0;
771 		hop->lpfn = 0;
772 		hop->mem_type = XE_PL_TT;
773 		hop->flags = TTM_PL_FLAG_TEMPORARY;
774 		ret = -EMULTIHOP;
775 		goto out;
776 	}
777 
778 	if (bo->tile)
779 		migrate = bo->tile->migrate;
780 	else if (resource_is_vram(new_mem))
781 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
782 	else if (mem_type_is_vram(old_mem_type))
783 		migrate = mem_type_to_migrate(xe, old_mem_type);
784 	else
785 		migrate = xe->tiles[0].migrate;
786 
787 	xe_assert(xe, migrate);
788 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
789 	if (xe_rpm_reclaim_safe(xe)) {
790 		/*
791 		 * We might be called through swapout in the validation path of
792 		 * another TTM device, so acquire rpm here.
793 		 */
794 		xe_pm_runtime_get(xe);
795 	} else {
796 		drm_WARN_ON(&xe->drm, handle_system_ccs);
797 		xe_pm_runtime_get_noresume(xe);
798 	}
799 
800 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
801 		/*
802 		 * Kernel memory that is pinned should only be moved on suspend
803 		 * / resume, some of the pinned memory is required for the
804 		 * device to resume / use the GPU to move other evicted memory
805 		 * (user memory) around. This likely could be optimized a bit
806 		 * further where we find the minimum set of pinned memory
807 		 * required for resume but for simplity doing a memcpy for all
808 		 * pinned memory.
809 		 */
810 		ret = xe_bo_vmap(bo);
811 		if (!ret) {
812 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
813 
814 			/* Create a new VMAP once kernel BO back in VRAM */
815 			if (!ret && resource_is_vram(new_mem)) {
816 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
817 				void __iomem *new_addr = vram->mapping +
818 					(new_mem->start << PAGE_SHIFT);
819 
820 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
821 					ret = -EINVAL;
822 					xe_pm_runtime_put(xe);
823 					goto out;
824 				}
825 
826 				xe_assert(xe, new_mem->start ==
827 					  bo->placements->fpfn);
828 
829 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
830 			}
831 		}
832 	} else {
833 		if (move_lacks_source) {
834 			u32 flags = 0;
835 
836 			if (mem_type_is_vram(new_mem->mem_type))
837 				flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
838 			else if (handle_system_ccs)
839 				flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
840 
841 			fence = xe_migrate_clear(migrate, bo, new_mem, flags);
842 		}
843 		else
844 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
845 						new_mem, handle_system_ccs);
846 		if (IS_ERR(fence)) {
847 			ret = PTR_ERR(fence);
848 			xe_pm_runtime_put(xe);
849 			goto out;
850 		}
851 		if (!move_lacks_source) {
852 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
853 							true, new_mem);
854 			if (ret) {
855 				dma_fence_wait(fence, false);
856 				ttm_bo_move_null(ttm_bo, new_mem);
857 				ret = 0;
858 			}
859 		} else {
860 			/*
861 			 * ttm_bo_move_accel_cleanup() may blow up if
862 			 * bo->resource == NULL, so just attach the
863 			 * fence and set the new resource.
864 			 */
865 			dma_resv_add_fence(ttm_bo->base.resv, fence,
866 					   DMA_RESV_USAGE_KERNEL);
867 			ttm_bo_move_null(ttm_bo, new_mem);
868 		}
869 
870 		dma_fence_put(fence);
871 	}
872 
873 	xe_pm_runtime_put(xe);
874 
875 out:
876 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
877 	    ttm_bo->ttm) {
878 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
879 						     DMA_RESV_USAGE_KERNEL,
880 						     false,
881 						     MAX_SCHEDULE_TIMEOUT);
882 		if (timeout < 0)
883 			ret = timeout;
884 
885 		xe_tt_unmap_sg(ttm_bo->ttm);
886 	}
887 
888 	return ret;
889 }
890 
891 /**
892  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
893  * @bo: The buffer object to move.
894  *
895  * On successful completion, the object memory will be moved to system memory.
896  *
897  * This is needed to for special handling of pinned VRAM object during
898  * suspend-resume.
899  *
900  * Return: 0 on success. Negative error code on failure.
901  */
902 int xe_bo_evict_pinned(struct xe_bo *bo)
903 {
904 	struct ttm_place place = {
905 		.mem_type = XE_PL_TT,
906 	};
907 	struct ttm_placement placement = {
908 		.placement = &place,
909 		.num_placement = 1,
910 	};
911 	struct ttm_operation_ctx ctx = {
912 		.interruptible = false,
913 		.gfp_retry_mayfail = true,
914 	};
915 	struct ttm_resource *new_mem;
916 	int ret;
917 
918 	xe_bo_assert_held(bo);
919 
920 	if (WARN_ON(!bo->ttm.resource))
921 		return -EINVAL;
922 
923 	if (WARN_ON(!xe_bo_is_pinned(bo)))
924 		return -EINVAL;
925 
926 	if (!xe_bo_is_vram(bo))
927 		return 0;
928 
929 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
930 	if (ret)
931 		return ret;
932 
933 	if (!bo->ttm.ttm) {
934 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
935 		if (!bo->ttm.ttm) {
936 			ret = -ENOMEM;
937 			goto err_res_free;
938 		}
939 	}
940 
941 	ret = ttm_bo_populate(&bo->ttm, &ctx);
942 	if (ret)
943 		goto err_res_free;
944 
945 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
946 	if (ret)
947 		goto err_res_free;
948 
949 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
950 	if (ret)
951 		goto err_res_free;
952 
953 	return 0;
954 
955 err_res_free:
956 	ttm_resource_free(&bo->ttm, &new_mem);
957 	return ret;
958 }
959 
960 /**
961  * xe_bo_restore_pinned() - Restore a pinned VRAM object
962  * @bo: The buffer object to move.
963  *
964  * On successful completion, the object memory will be moved back to VRAM.
965  *
966  * This is needed to for special handling of pinned VRAM object during
967  * suspend-resume.
968  *
969  * Return: 0 on success. Negative error code on failure.
970  */
971 int xe_bo_restore_pinned(struct xe_bo *bo)
972 {
973 	struct ttm_operation_ctx ctx = {
974 		.interruptible = false,
975 		.gfp_retry_mayfail = false,
976 	};
977 	struct ttm_resource *new_mem;
978 	struct ttm_place *place = &bo->placements[0];
979 	int ret;
980 
981 	xe_bo_assert_held(bo);
982 
983 	if (WARN_ON(!bo->ttm.resource))
984 		return -EINVAL;
985 
986 	if (WARN_ON(!xe_bo_is_pinned(bo)))
987 		return -EINVAL;
988 
989 	if (WARN_ON(xe_bo_is_vram(bo)))
990 		return -EINVAL;
991 
992 	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
993 		return -EINVAL;
994 
995 	if (!mem_type_is_vram(place->mem_type))
996 		return 0;
997 
998 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
999 	if (ret)
1000 		return ret;
1001 
1002 	ret = ttm_bo_populate(&bo->ttm, &ctx);
1003 	if (ret)
1004 		goto err_res_free;
1005 
1006 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1007 	if (ret)
1008 		goto err_res_free;
1009 
1010 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
1011 	if (ret)
1012 		goto err_res_free;
1013 
1014 	return 0;
1015 
1016 err_res_free:
1017 	ttm_resource_free(&bo->ttm, &new_mem);
1018 	return ret;
1019 }
1020 
1021 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1022 				       unsigned long page_offset)
1023 {
1024 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1025 	struct xe_res_cursor cursor;
1026 	struct xe_mem_region *vram;
1027 
1028 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1029 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1030 
1031 	vram = res_to_mem_region(ttm_bo->resource);
1032 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1033 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1034 }
1035 
1036 static void __xe_bo_vunmap(struct xe_bo *bo);
1037 
1038 /*
1039  * TODO: Move this function to TTM so we don't rely on how TTM does its
1040  * locking, thereby abusing TTM internals.
1041  */
1042 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1043 {
1044 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1045 	bool locked;
1046 
1047 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1048 
1049 	/*
1050 	 * We can typically only race with TTM trylocking under the
1051 	 * lru_lock, which will immediately be unlocked again since
1052 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1053 	 * always succeed here, as long as we hold the lru lock.
1054 	 */
1055 	spin_lock(&ttm_bo->bdev->lru_lock);
1056 	locked = dma_resv_trylock(ttm_bo->base.resv);
1057 	spin_unlock(&ttm_bo->bdev->lru_lock);
1058 	xe_assert(xe, locked);
1059 
1060 	return locked;
1061 }
1062 
1063 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1064 {
1065 	struct dma_resv_iter cursor;
1066 	struct dma_fence *fence;
1067 	struct dma_fence *replacement = NULL;
1068 	struct xe_bo *bo;
1069 
1070 	if (!xe_bo_is_xe_bo(ttm_bo))
1071 		return;
1072 
1073 	bo = ttm_to_xe_bo(ttm_bo);
1074 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1075 
1076 	/*
1077 	 * Corner case where TTM fails to allocate memory and this BOs resv
1078 	 * still points the VMs resv
1079 	 */
1080 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1081 		return;
1082 
1083 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1084 		return;
1085 
1086 	/*
1087 	 * Scrub the preempt fences if any. The unbind fence is already
1088 	 * attached to the resv.
1089 	 * TODO: Don't do this for external bos once we scrub them after
1090 	 * unbind.
1091 	 */
1092 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1093 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1094 		if (xe_fence_is_xe_preempt(fence) &&
1095 		    !dma_fence_is_signaled(fence)) {
1096 			if (!replacement)
1097 				replacement = dma_fence_get_stub();
1098 
1099 			dma_resv_replace_fences(ttm_bo->base.resv,
1100 						fence->context,
1101 						replacement,
1102 						DMA_RESV_USAGE_BOOKKEEP);
1103 		}
1104 	}
1105 	dma_fence_put(replacement);
1106 
1107 	dma_resv_unlock(ttm_bo->base.resv);
1108 }
1109 
1110 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1111 {
1112 	if (!xe_bo_is_xe_bo(ttm_bo))
1113 		return;
1114 
1115 	/*
1116 	 * Object is idle and about to be destroyed. Release the
1117 	 * dma-buf attachment.
1118 	 */
1119 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1120 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1121 						       struct xe_ttm_tt, ttm);
1122 
1123 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1124 					 DMA_BIDIRECTIONAL);
1125 		ttm_bo->sg = NULL;
1126 		xe_tt->sg = NULL;
1127 	}
1128 }
1129 
1130 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1131 {
1132 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1133 
1134 	if (ttm_bo->ttm) {
1135 		struct ttm_placement place = {};
1136 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1137 
1138 		drm_WARN_ON(&xe->drm, ret);
1139 	}
1140 }
1141 
1142 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1143 {
1144 	struct ttm_operation_ctx ctx = {
1145 		.interruptible = false,
1146 		.gfp_retry_mayfail = false,
1147 	};
1148 
1149 	if (ttm_bo->ttm) {
1150 		struct xe_ttm_tt *xe_tt =
1151 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1152 
1153 		if (xe_tt->purgeable)
1154 			xe_ttm_bo_purge(ttm_bo, &ctx);
1155 	}
1156 }
1157 
1158 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1159 				unsigned long offset, void *buf, int len,
1160 				int write)
1161 {
1162 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1163 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1164 	struct iosys_map vmap;
1165 	struct xe_res_cursor cursor;
1166 	struct xe_mem_region *vram;
1167 	int bytes_left = len;
1168 
1169 	xe_bo_assert_held(bo);
1170 	xe_device_assert_mem_access(xe);
1171 
1172 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1173 		return -EIO;
1174 
1175 	/* FIXME: Use GPU for non-visible VRAM */
1176 	if (!xe_ttm_resource_visible(ttm_bo->resource))
1177 		return -EIO;
1178 
1179 	vram = res_to_mem_region(ttm_bo->resource);
1180 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1181 		     bo->size - (offset & PAGE_MASK), &cursor);
1182 
1183 	do {
1184 		unsigned long page_offset = (offset & ~PAGE_MASK);
1185 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1186 
1187 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1188 					  cursor.start);
1189 		if (write)
1190 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1191 		else
1192 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1193 
1194 		buf += byte_count;
1195 		offset += byte_count;
1196 		bytes_left -= byte_count;
1197 		if (bytes_left)
1198 			xe_res_next(&cursor, PAGE_SIZE);
1199 	} while (bytes_left);
1200 
1201 	return len;
1202 }
1203 
1204 const struct ttm_device_funcs xe_ttm_funcs = {
1205 	.ttm_tt_create = xe_ttm_tt_create,
1206 	.ttm_tt_populate = xe_ttm_tt_populate,
1207 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1208 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1209 	.evict_flags = xe_evict_flags,
1210 	.move = xe_bo_move,
1211 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1212 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1213 	.access_memory = xe_ttm_access_memory,
1214 	.release_notify = xe_ttm_bo_release_notify,
1215 	.eviction_valuable = ttm_bo_eviction_valuable,
1216 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1217 	.swap_notify = xe_ttm_bo_swap_notify,
1218 };
1219 
1220 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1221 {
1222 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1223 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1224 	struct xe_tile *tile;
1225 	u8 id;
1226 
1227 	if (bo->ttm.base.import_attach)
1228 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1229 	drm_gem_object_release(&bo->ttm.base);
1230 
1231 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1232 
1233 	for_each_tile(tile, xe, id)
1234 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1235 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1236 
1237 #ifdef CONFIG_PROC_FS
1238 	if (bo->client)
1239 		xe_drm_client_remove_bo(bo);
1240 #endif
1241 
1242 	if (bo->vm && xe_bo_is_user(bo))
1243 		xe_vm_put(bo->vm);
1244 
1245 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1246 	if (!list_empty(&bo->vram_userfault_link))
1247 		list_del(&bo->vram_userfault_link);
1248 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1249 
1250 	kfree(bo);
1251 }
1252 
1253 static void xe_gem_object_free(struct drm_gem_object *obj)
1254 {
1255 	/* Our BO reference counting scheme works as follows:
1256 	 *
1257 	 * The gem object kref is typically used throughout the driver,
1258 	 * and the gem object holds a ttm_buffer_object refcount, so
1259 	 * that when the last gem object reference is put, which is when
1260 	 * we end up in this function, we put also that ttm_buffer_object
1261 	 * refcount. Anything using gem interfaces is then no longer
1262 	 * allowed to access the object in a way that requires a gem
1263 	 * refcount, including locking the object.
1264 	 *
1265 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1266 	 * refcount directly if needed.
1267 	 */
1268 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1269 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1270 }
1271 
1272 static void xe_gem_object_close(struct drm_gem_object *obj,
1273 				struct drm_file *file_priv)
1274 {
1275 	struct xe_bo *bo = gem_to_xe_bo(obj);
1276 
1277 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1278 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1279 
1280 		xe_bo_lock(bo, false);
1281 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1282 		xe_bo_unlock(bo);
1283 	}
1284 }
1285 
1286 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1287 {
1288 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1289 	struct drm_device *ddev = tbo->base.dev;
1290 	struct xe_device *xe = to_xe_device(ddev);
1291 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1292 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1293 	vm_fault_t ret;
1294 	int idx;
1295 
1296 	if (needs_rpm)
1297 		xe_pm_runtime_get(xe);
1298 
1299 	ret = ttm_bo_vm_reserve(tbo, vmf);
1300 	if (ret)
1301 		goto out;
1302 
1303 	if (drm_dev_enter(ddev, &idx)) {
1304 		trace_xe_bo_cpu_fault(bo);
1305 
1306 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1307 					       TTM_BO_VM_NUM_PREFAULT);
1308 		drm_dev_exit(idx);
1309 	} else {
1310 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1311 	}
1312 
1313 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1314 		goto out;
1315 	/*
1316 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1317 	 */
1318 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1319 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1320 		if (list_empty(&bo->vram_userfault_link))
1321 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1322 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1323 	}
1324 
1325 	dma_resv_unlock(tbo->base.resv);
1326 out:
1327 	if (needs_rpm)
1328 		xe_pm_runtime_put(xe);
1329 
1330 	return ret;
1331 }
1332 
1333 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1334 			   void *buf, int len, int write)
1335 {
1336 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1337 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1338 	struct xe_device *xe = xe_bo_device(bo);
1339 	int ret;
1340 
1341 	xe_pm_runtime_get(xe);
1342 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1343 	xe_pm_runtime_put(xe);
1344 
1345 	return ret;
1346 }
1347 
1348 /**
1349  * xe_bo_read() - Read from an xe_bo
1350  * @bo: The buffer object to read from.
1351  * @offset: The byte offset to start reading from.
1352  * @dst: Location to store the read.
1353  * @size: Size in bytes for the read.
1354  *
1355  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1356  *
1357  * Return: Zero on success, or negative error.
1358  */
1359 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1360 {
1361 	int ret;
1362 
1363 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1364 	if (ret >= 0 && ret != size)
1365 		ret = -EIO;
1366 	else if (ret == size)
1367 		ret = 0;
1368 
1369 	return ret;
1370 }
1371 
1372 static const struct vm_operations_struct xe_gem_vm_ops = {
1373 	.fault = xe_gem_fault,
1374 	.open = ttm_bo_vm_open,
1375 	.close = ttm_bo_vm_close,
1376 	.access = xe_bo_vm_access,
1377 };
1378 
1379 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1380 	.free = xe_gem_object_free,
1381 	.close = xe_gem_object_close,
1382 	.mmap = drm_gem_ttm_mmap,
1383 	.export = xe_gem_prime_export,
1384 	.vm_ops = &xe_gem_vm_ops,
1385 };
1386 
1387 /**
1388  * xe_bo_alloc - Allocate storage for a struct xe_bo
1389  *
1390  * This function is intended to allocate storage to be used for input
1391  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1392  * created is needed before the call to __xe_bo_create_locked().
1393  * If __xe_bo_create_locked ends up never to be called, then the
1394  * storage allocated with this function needs to be freed using
1395  * xe_bo_free().
1396  *
1397  * Return: A pointer to an uninitialized struct xe_bo on success,
1398  * ERR_PTR(-ENOMEM) on error.
1399  */
1400 struct xe_bo *xe_bo_alloc(void)
1401 {
1402 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1403 
1404 	if (!bo)
1405 		return ERR_PTR(-ENOMEM);
1406 
1407 	return bo;
1408 }
1409 
1410 /**
1411  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1412  * @bo: The buffer object storage.
1413  *
1414  * Refer to xe_bo_alloc() documentation for valid use-cases.
1415  */
1416 void xe_bo_free(struct xe_bo *bo)
1417 {
1418 	kfree(bo);
1419 }
1420 
1421 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1422 				     struct xe_tile *tile, struct dma_resv *resv,
1423 				     struct ttm_lru_bulk_move *bulk, size_t size,
1424 				     u16 cpu_caching, enum ttm_bo_type type,
1425 				     u32 flags)
1426 {
1427 	struct ttm_operation_ctx ctx = {
1428 		.interruptible = true,
1429 		.no_wait_gpu = false,
1430 		.gfp_retry_mayfail = true,
1431 	};
1432 	struct ttm_placement *placement;
1433 	uint32_t alignment;
1434 	size_t aligned_size;
1435 	int err;
1436 
1437 	/* Only kernel objects should set GT */
1438 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1439 
1440 	if (XE_WARN_ON(!size)) {
1441 		xe_bo_free(bo);
1442 		return ERR_PTR(-EINVAL);
1443 	}
1444 
1445 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1446 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1447 		return ERR_PTR(-EINVAL);
1448 
1449 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1450 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1451 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1452 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1453 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1454 
1455 		aligned_size = ALIGN(size, align);
1456 		if (type != ttm_bo_type_device)
1457 			size = ALIGN(size, align);
1458 		flags |= XE_BO_FLAG_INTERNAL_64K;
1459 		alignment = align >> PAGE_SHIFT;
1460 	} else {
1461 		aligned_size = ALIGN(size, SZ_4K);
1462 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1463 		alignment = SZ_4K >> PAGE_SHIFT;
1464 	}
1465 
1466 	if (type == ttm_bo_type_device && aligned_size != size)
1467 		return ERR_PTR(-EINVAL);
1468 
1469 	if (!bo) {
1470 		bo = xe_bo_alloc();
1471 		if (IS_ERR(bo))
1472 			return bo;
1473 	}
1474 
1475 	bo->ccs_cleared = false;
1476 	bo->tile = tile;
1477 	bo->size = size;
1478 	bo->flags = flags;
1479 	bo->cpu_caching = cpu_caching;
1480 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1481 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1482 	INIT_LIST_HEAD(&bo->pinned_link);
1483 #ifdef CONFIG_PROC_FS
1484 	INIT_LIST_HEAD(&bo->client_link);
1485 #endif
1486 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1487 
1488 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1489 
1490 	if (resv) {
1491 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1492 		ctx.resv = resv;
1493 	}
1494 
1495 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1496 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1497 		if (WARN_ON(err)) {
1498 			xe_ttm_bo_destroy(&bo->ttm);
1499 			return ERR_PTR(err);
1500 		}
1501 	}
1502 
1503 	/* Defer populating type_sg bos */
1504 	placement = (type == ttm_bo_type_sg ||
1505 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1506 		&bo->placement;
1507 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1508 				   placement, alignment,
1509 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1510 	if (err)
1511 		return ERR_PTR(err);
1512 
1513 	/*
1514 	 * The VRAM pages underneath are potentially still being accessed by the
1515 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1516 	 * sure to add any corresponding move/clear fences into the objects
1517 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1518 	 *
1519 	 * For KMD internal buffers we don't care about GPU clearing, however we
1520 	 * still need to handle async evictions, where the VRAM is still being
1521 	 * accessed by the GPU. Most internal callers are not expecting this,
1522 	 * since they are missing the required synchronisation before accessing
1523 	 * the memory. To keep things simple just sync wait any kernel fences
1524 	 * here, if the buffer is designated KMD internal.
1525 	 *
1526 	 * For normal userspace objects we should already have the required
1527 	 * pipelining or sync waiting elsewhere, since we already have to deal
1528 	 * with things like async GPU clearing.
1529 	 */
1530 	if (type == ttm_bo_type_kernel) {
1531 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1532 						     DMA_RESV_USAGE_KERNEL,
1533 						     ctx.interruptible,
1534 						     MAX_SCHEDULE_TIMEOUT);
1535 
1536 		if (timeout < 0) {
1537 			if (!resv)
1538 				dma_resv_unlock(bo->ttm.base.resv);
1539 			xe_bo_put(bo);
1540 			return ERR_PTR(timeout);
1541 		}
1542 	}
1543 
1544 	bo->created = true;
1545 	if (bulk)
1546 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1547 	else
1548 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1549 
1550 	return bo;
1551 }
1552 
1553 static int __xe_bo_fixed_placement(struct xe_device *xe,
1554 				   struct xe_bo *bo,
1555 				   u32 flags,
1556 				   u64 start, u64 end, u64 size)
1557 {
1558 	struct ttm_place *place = bo->placements;
1559 
1560 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1561 		return -EINVAL;
1562 
1563 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1564 	place->fpfn = start >> PAGE_SHIFT;
1565 	place->lpfn = end >> PAGE_SHIFT;
1566 
1567 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1568 	case XE_BO_FLAG_VRAM0:
1569 		place->mem_type = XE_PL_VRAM0;
1570 		break;
1571 	case XE_BO_FLAG_VRAM1:
1572 		place->mem_type = XE_PL_VRAM1;
1573 		break;
1574 	case XE_BO_FLAG_STOLEN:
1575 		place->mem_type = XE_PL_STOLEN;
1576 		break;
1577 
1578 	default:
1579 		/* 0 or multiple of the above set */
1580 		return -EINVAL;
1581 	}
1582 
1583 	bo->placement = (struct ttm_placement) {
1584 		.num_placement = 1,
1585 		.placement = place,
1586 	};
1587 
1588 	return 0;
1589 }
1590 
1591 static struct xe_bo *
1592 __xe_bo_create_locked(struct xe_device *xe,
1593 		      struct xe_tile *tile, struct xe_vm *vm,
1594 		      size_t size, u64 start, u64 end,
1595 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
1596 		      u64 alignment)
1597 {
1598 	struct xe_bo *bo = NULL;
1599 	int err;
1600 
1601 	if (vm)
1602 		xe_vm_assert_held(vm);
1603 
1604 	if (start || end != ~0ULL) {
1605 		bo = xe_bo_alloc();
1606 		if (IS_ERR(bo))
1607 			return bo;
1608 
1609 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1610 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1611 		if (err) {
1612 			xe_bo_free(bo);
1613 			return ERR_PTR(err);
1614 		}
1615 	}
1616 
1617 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1618 				    vm && !xe_vm_in_fault_mode(vm) &&
1619 				    flags & XE_BO_FLAG_USER ?
1620 				    &vm->lru_bulk_move : NULL, size,
1621 				    cpu_caching, type, flags);
1622 	if (IS_ERR(bo))
1623 		return bo;
1624 
1625 	bo->min_align = alignment;
1626 
1627 	/*
1628 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1629 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1630 	 * will keep a reference to the vm, and avoid circular references
1631 	 * by having all the vm's bo refereferences released at vm close
1632 	 * time.
1633 	 */
1634 	if (vm && xe_bo_is_user(bo))
1635 		xe_vm_get(vm);
1636 	bo->vm = vm;
1637 
1638 	if (bo->flags & XE_BO_FLAG_GGTT) {
1639 		struct xe_tile *t;
1640 		u8 id;
1641 
1642 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
1643 			if (!tile && flags & XE_BO_FLAG_STOLEN)
1644 				tile = xe_device_get_root_tile(xe);
1645 
1646 			xe_assert(xe, tile);
1647 		}
1648 
1649 		for_each_tile(t, xe, id) {
1650 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
1651 				continue;
1652 
1653 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1654 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
1655 							   start + bo->size, U64_MAX);
1656 			} else {
1657 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
1658 			}
1659 			if (err)
1660 				goto err_unlock_put_bo;
1661 		}
1662 	}
1663 
1664 	trace_xe_bo_create(bo);
1665 	return bo;
1666 
1667 err_unlock_put_bo:
1668 	__xe_bo_unset_bulk_move(bo);
1669 	xe_bo_unlock_vm_held(bo);
1670 	xe_bo_put(bo);
1671 	return ERR_PTR(err);
1672 }
1673 
1674 struct xe_bo *
1675 xe_bo_create_locked_range(struct xe_device *xe,
1676 			  struct xe_tile *tile, struct xe_vm *vm,
1677 			  size_t size, u64 start, u64 end,
1678 			  enum ttm_bo_type type, u32 flags, u64 alignment)
1679 {
1680 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
1681 				     flags, alignment);
1682 }
1683 
1684 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1685 				  struct xe_vm *vm, size_t size,
1686 				  enum ttm_bo_type type, u32 flags)
1687 {
1688 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
1689 				     flags, 0);
1690 }
1691 
1692 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1693 				struct xe_vm *vm, size_t size,
1694 				u16 cpu_caching,
1695 				u32 flags)
1696 {
1697 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1698 						 cpu_caching, ttm_bo_type_device,
1699 						 flags | XE_BO_FLAG_USER, 0);
1700 	if (!IS_ERR(bo))
1701 		xe_bo_unlock_vm_held(bo);
1702 
1703 	return bo;
1704 }
1705 
1706 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1707 			   struct xe_vm *vm, size_t size,
1708 			   enum ttm_bo_type type, u32 flags)
1709 {
1710 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1711 
1712 	if (!IS_ERR(bo))
1713 		xe_bo_unlock_vm_held(bo);
1714 
1715 	return bo;
1716 }
1717 
1718 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1719 				      struct xe_vm *vm,
1720 				      size_t size, u64 offset,
1721 				      enum ttm_bo_type type, u32 flags)
1722 {
1723 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
1724 					       type, flags, 0);
1725 }
1726 
1727 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
1728 					      struct xe_tile *tile,
1729 					      struct xe_vm *vm,
1730 					      size_t size, u64 offset,
1731 					      enum ttm_bo_type type, u32 flags,
1732 					      u64 alignment)
1733 {
1734 	struct xe_bo *bo;
1735 	int err;
1736 	u64 start = offset == ~0ull ? 0 : offset;
1737 	u64 end = offset == ~0ull ? offset : start + size;
1738 
1739 	if (flags & XE_BO_FLAG_STOLEN &&
1740 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1741 		flags |= XE_BO_FLAG_GGTT;
1742 
1743 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1744 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
1745 				       alignment);
1746 	if (IS_ERR(bo))
1747 		return bo;
1748 
1749 	err = xe_bo_pin(bo);
1750 	if (err)
1751 		goto err_put;
1752 
1753 	err = xe_bo_vmap(bo);
1754 	if (err)
1755 		goto err_unpin;
1756 
1757 	xe_bo_unlock_vm_held(bo);
1758 
1759 	return bo;
1760 
1761 err_unpin:
1762 	xe_bo_unpin(bo);
1763 err_put:
1764 	xe_bo_unlock_vm_held(bo);
1765 	xe_bo_put(bo);
1766 	return ERR_PTR(err);
1767 }
1768 
1769 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1770 				   struct xe_vm *vm, size_t size,
1771 				   enum ttm_bo_type type, u32 flags)
1772 {
1773 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1774 }
1775 
1776 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1777 				     const void *data, size_t size,
1778 				     enum ttm_bo_type type, u32 flags)
1779 {
1780 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1781 						ALIGN(size, PAGE_SIZE),
1782 						type, flags);
1783 	if (IS_ERR(bo))
1784 		return bo;
1785 
1786 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1787 
1788 	return bo;
1789 }
1790 
1791 static void __xe_bo_unpin_map_no_vm(void *arg)
1792 {
1793 	xe_bo_unpin_map_no_vm(arg);
1794 }
1795 
1796 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1797 					   size_t size, u32 flags)
1798 {
1799 	struct xe_bo *bo;
1800 	int ret;
1801 
1802 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
1803 
1804 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1805 	if (IS_ERR(bo))
1806 		return bo;
1807 
1808 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
1809 	if (ret)
1810 		return ERR_PTR(ret);
1811 
1812 	return bo;
1813 }
1814 
1815 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1816 					     const void *data, size_t size, u32 flags)
1817 {
1818 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1819 
1820 	if (IS_ERR(bo))
1821 		return bo;
1822 
1823 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1824 
1825 	return bo;
1826 }
1827 
1828 /**
1829  * xe_managed_bo_reinit_in_vram
1830  * @xe: xe device
1831  * @tile: Tile where the new buffer will be created
1832  * @src: Managed buffer object allocated in system memory
1833  *
1834  * Replace a managed src buffer object allocated in system memory with a new
1835  * one allocated in vram, copying the data between them.
1836  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1837  * is responsible for making sure that any old references to it are updated.
1838  *
1839  * Returns 0 for success, negative error code otherwise.
1840  */
1841 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1842 {
1843 	struct xe_bo *bo;
1844 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1845 
1846 	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1847 
1848 	xe_assert(xe, IS_DGFX(xe));
1849 	xe_assert(xe, !(*src)->vmap.is_iomem);
1850 
1851 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1852 					    (*src)->size, dst_flags);
1853 	if (IS_ERR(bo))
1854 		return PTR_ERR(bo);
1855 
1856 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
1857 	*src = bo;
1858 
1859 	return 0;
1860 }
1861 
1862 /*
1863  * XXX: This is in the VM bind data path, likely should calculate this once and
1864  * store, with a recalculation if the BO is moved.
1865  */
1866 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1867 {
1868 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1869 
1870 	if (res->mem_type == XE_PL_STOLEN)
1871 		return xe_ttm_stolen_gpu_offset(xe);
1872 
1873 	return res_to_mem_region(res)->dpa_base;
1874 }
1875 
1876 /**
1877  * xe_bo_pin_external - pin an external BO
1878  * @bo: buffer object to be pinned
1879  *
1880  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1881  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1882  * asserts and code to ensure evict / restore on suspend / resume.
1883  *
1884  * Returns 0 for success, negative error code otherwise.
1885  */
1886 int xe_bo_pin_external(struct xe_bo *bo)
1887 {
1888 	struct xe_device *xe = xe_bo_device(bo);
1889 	int err;
1890 
1891 	xe_assert(xe, !bo->vm);
1892 	xe_assert(xe, xe_bo_is_user(bo));
1893 
1894 	if (!xe_bo_is_pinned(bo)) {
1895 		err = xe_bo_validate(bo, NULL, false);
1896 		if (err)
1897 			return err;
1898 
1899 		if (xe_bo_is_vram(bo)) {
1900 			spin_lock(&xe->pinned.lock);
1901 			list_add_tail(&bo->pinned_link,
1902 				      &xe->pinned.external_vram);
1903 			spin_unlock(&xe->pinned.lock);
1904 		}
1905 	}
1906 
1907 	ttm_bo_pin(&bo->ttm);
1908 
1909 	/*
1910 	 * FIXME: If we always use the reserve / unreserve functions for locking
1911 	 * we do not need this.
1912 	 */
1913 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1914 
1915 	return 0;
1916 }
1917 
1918 int xe_bo_pin(struct xe_bo *bo)
1919 {
1920 	struct ttm_place *place = &bo->placements[0];
1921 	struct xe_device *xe = xe_bo_device(bo);
1922 	int err;
1923 
1924 	/* We currently don't expect user BO to be pinned */
1925 	xe_assert(xe, !xe_bo_is_user(bo));
1926 
1927 	/* Pinned object must be in GGTT or have pinned flag */
1928 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
1929 				   XE_BO_FLAG_GGTT));
1930 
1931 	/*
1932 	 * No reason we can't support pinning imported dma-bufs we just don't
1933 	 * expect to pin an imported dma-buf.
1934 	 */
1935 	xe_assert(xe, !bo->ttm.base.import_attach);
1936 
1937 	/* We only expect at most 1 pin */
1938 	xe_assert(xe, !xe_bo_is_pinned(bo));
1939 
1940 	err = xe_bo_validate(bo, NULL, false);
1941 	if (err)
1942 		return err;
1943 
1944 	/*
1945 	 * For pinned objects in on DGFX, which are also in vram, we expect
1946 	 * these to be in contiguous VRAM memory. Required eviction / restore
1947 	 * during suspend / resume (force restore to same physical address).
1948 	 */
1949 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1950 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1951 		if (mem_type_is_vram(place->mem_type)) {
1952 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1953 
1954 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1955 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1956 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1957 		}
1958 	}
1959 
1960 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1961 		spin_lock(&xe->pinned.lock);
1962 		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1963 		spin_unlock(&xe->pinned.lock);
1964 	}
1965 
1966 	ttm_bo_pin(&bo->ttm);
1967 
1968 	/*
1969 	 * FIXME: If we always use the reserve / unreserve functions for locking
1970 	 * we do not need this.
1971 	 */
1972 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1973 
1974 	return 0;
1975 }
1976 
1977 /**
1978  * xe_bo_unpin_external - unpin an external BO
1979  * @bo: buffer object to be unpinned
1980  *
1981  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1982  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1983  * asserts and code to ensure evict / restore on suspend / resume.
1984  *
1985  * Returns 0 for success, negative error code otherwise.
1986  */
1987 void xe_bo_unpin_external(struct xe_bo *bo)
1988 {
1989 	struct xe_device *xe = xe_bo_device(bo);
1990 
1991 	xe_assert(xe, !bo->vm);
1992 	xe_assert(xe, xe_bo_is_pinned(bo));
1993 	xe_assert(xe, xe_bo_is_user(bo));
1994 
1995 	spin_lock(&xe->pinned.lock);
1996 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
1997 		list_del_init(&bo->pinned_link);
1998 	spin_unlock(&xe->pinned.lock);
1999 
2000 	ttm_bo_unpin(&bo->ttm);
2001 
2002 	/*
2003 	 * FIXME: If we always use the reserve / unreserve functions for locking
2004 	 * we do not need this.
2005 	 */
2006 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2007 }
2008 
2009 void xe_bo_unpin(struct xe_bo *bo)
2010 {
2011 	struct ttm_place *place = &bo->placements[0];
2012 	struct xe_device *xe = xe_bo_device(bo);
2013 
2014 	xe_assert(xe, !bo->ttm.base.import_attach);
2015 	xe_assert(xe, xe_bo_is_pinned(bo));
2016 
2017 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2018 		spin_lock(&xe->pinned.lock);
2019 		xe_assert(xe, !list_empty(&bo->pinned_link));
2020 		list_del_init(&bo->pinned_link);
2021 		spin_unlock(&xe->pinned.lock);
2022 	}
2023 	ttm_bo_unpin(&bo->ttm);
2024 }
2025 
2026 /**
2027  * xe_bo_validate() - Make sure the bo is in an allowed placement
2028  * @bo: The bo,
2029  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2030  *      NULL. Used together with @allow_res_evict.
2031  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2032  *                   reservation object.
2033  *
2034  * Make sure the bo is in allowed placement, migrating it if necessary. If
2035  * needed, other bos will be evicted. If bos selected for eviction shares
2036  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2037  * set to true, otherwise they will be bypassed.
2038  *
2039  * Return: 0 on success, negative error code on failure. May return
2040  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2041  */
2042 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2043 {
2044 	struct ttm_operation_ctx ctx = {
2045 		.interruptible = true,
2046 		.no_wait_gpu = false,
2047 		.gfp_retry_mayfail = true,
2048 	};
2049 
2050 	if (vm) {
2051 		lockdep_assert_held(&vm->lock);
2052 		xe_vm_assert_held(vm);
2053 
2054 		ctx.allow_res_evict = allow_res_evict;
2055 		ctx.resv = xe_vm_resv(vm);
2056 	}
2057 
2058 	trace_xe_bo_validate(bo);
2059 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2060 }
2061 
2062 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2063 {
2064 	if (bo->destroy == &xe_ttm_bo_destroy)
2065 		return true;
2066 
2067 	return false;
2068 }
2069 
2070 /*
2071  * Resolve a BO address. There is no assert to check if the proper lock is held
2072  * so it should only be used in cases where it is not fatal to get the wrong
2073  * address, such as printing debug information, but not in cases where memory is
2074  * written based on this result.
2075  */
2076 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2077 {
2078 	struct xe_device *xe = xe_bo_device(bo);
2079 	struct xe_res_cursor cur;
2080 	u64 page;
2081 
2082 	xe_assert(xe, page_size <= PAGE_SIZE);
2083 	page = offset >> PAGE_SHIFT;
2084 	offset &= (PAGE_SIZE - 1);
2085 
2086 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2087 		xe_assert(xe, bo->ttm.ttm);
2088 
2089 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2090 				page_size, &cur);
2091 		return xe_res_dma(&cur) + offset;
2092 	} else {
2093 		struct xe_res_cursor cur;
2094 
2095 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2096 			     page_size, &cur);
2097 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2098 	}
2099 }
2100 
2101 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2102 {
2103 	if (!READ_ONCE(bo->ttm.pin_count))
2104 		xe_bo_assert_held(bo);
2105 	return __xe_bo_addr(bo, offset, page_size);
2106 }
2107 
2108 int xe_bo_vmap(struct xe_bo *bo)
2109 {
2110 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2111 	void *virtual;
2112 	bool is_iomem;
2113 	int ret;
2114 
2115 	xe_bo_assert_held(bo);
2116 
2117 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2118 			!force_contiguous(bo->flags)))
2119 		return -EINVAL;
2120 
2121 	if (!iosys_map_is_null(&bo->vmap))
2122 		return 0;
2123 
2124 	/*
2125 	 * We use this more or less deprecated interface for now since
2126 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2127 	 * single page bos, which is done here.
2128 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2129 	 * to use struct iosys_map.
2130 	 */
2131 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
2132 	if (ret)
2133 		return ret;
2134 
2135 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2136 	if (is_iomem)
2137 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2138 	else
2139 		iosys_map_set_vaddr(&bo->vmap, virtual);
2140 
2141 	return 0;
2142 }
2143 
2144 static void __xe_bo_vunmap(struct xe_bo *bo)
2145 {
2146 	if (!iosys_map_is_null(&bo->vmap)) {
2147 		iosys_map_clear(&bo->vmap);
2148 		ttm_bo_kunmap(&bo->kmap);
2149 	}
2150 }
2151 
2152 void xe_bo_vunmap(struct xe_bo *bo)
2153 {
2154 	xe_bo_assert_held(bo);
2155 	__xe_bo_vunmap(bo);
2156 }
2157 
2158 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2159 			struct drm_file *file)
2160 {
2161 	struct xe_device *xe = to_xe_device(dev);
2162 	struct xe_file *xef = to_xe_file(file);
2163 	struct drm_xe_gem_create *args = data;
2164 	struct xe_vm *vm = NULL;
2165 	struct xe_bo *bo;
2166 	unsigned int bo_flags;
2167 	u32 handle;
2168 	int err;
2169 
2170 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2171 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2172 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2173 		return -EINVAL;
2174 
2175 	/* at least one valid memory placement must be specified */
2176 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2177 			 !args->placement))
2178 		return -EINVAL;
2179 
2180 	if (XE_IOCTL_DBG(xe, args->flags &
2181 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2182 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2183 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2184 		return -EINVAL;
2185 
2186 	if (XE_IOCTL_DBG(xe, args->handle))
2187 		return -EINVAL;
2188 
2189 	if (XE_IOCTL_DBG(xe, !args->size))
2190 		return -EINVAL;
2191 
2192 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2193 		return -EINVAL;
2194 
2195 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2196 		return -EINVAL;
2197 
2198 	bo_flags = 0;
2199 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2200 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2201 
2202 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2203 		bo_flags |= XE_BO_FLAG_SCANOUT;
2204 
2205 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2206 
2207 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2208 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2209 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2210 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2211 	    IS_ALIGNED(args->size, SZ_64K))
2212 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2213 
2214 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2215 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2216 			return -EINVAL;
2217 
2218 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2219 	}
2220 
2221 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2222 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2223 		return -EINVAL;
2224 
2225 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2226 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2227 		return -EINVAL;
2228 
2229 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2230 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2231 		return -EINVAL;
2232 
2233 	if (args->vm_id) {
2234 		vm = xe_vm_lookup(xef, args->vm_id);
2235 		if (XE_IOCTL_DBG(xe, !vm))
2236 			return -ENOENT;
2237 		err = xe_vm_lock(vm, true);
2238 		if (err)
2239 			goto out_vm;
2240 	}
2241 
2242 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2243 			       bo_flags);
2244 
2245 	if (vm)
2246 		xe_vm_unlock(vm);
2247 
2248 	if (IS_ERR(bo)) {
2249 		err = PTR_ERR(bo);
2250 		goto out_vm;
2251 	}
2252 
2253 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2254 	if (err)
2255 		goto out_bulk;
2256 
2257 	args->handle = handle;
2258 	goto out_put;
2259 
2260 out_bulk:
2261 	if (vm && !xe_vm_in_fault_mode(vm)) {
2262 		xe_vm_lock(vm, false);
2263 		__xe_bo_unset_bulk_move(bo);
2264 		xe_vm_unlock(vm);
2265 	}
2266 out_put:
2267 	xe_bo_put(bo);
2268 out_vm:
2269 	if (vm)
2270 		xe_vm_put(vm);
2271 
2272 	return err;
2273 }
2274 
2275 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2276 			     struct drm_file *file)
2277 {
2278 	struct xe_device *xe = to_xe_device(dev);
2279 	struct drm_xe_gem_mmap_offset *args = data;
2280 	struct drm_gem_object *gem_obj;
2281 
2282 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2283 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2284 		return -EINVAL;
2285 
2286 	if (XE_IOCTL_DBG(xe, args->flags &
2287 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
2288 		return -EINVAL;
2289 
2290 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2291 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2292 			return -EINVAL;
2293 
2294 		if (XE_IOCTL_DBG(xe, args->handle))
2295 			return -EINVAL;
2296 
2297 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2298 			return -EINVAL;
2299 
2300 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2301 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2302 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2303 		return 0;
2304 	}
2305 
2306 	gem_obj = drm_gem_object_lookup(file, args->handle);
2307 	if (XE_IOCTL_DBG(xe, !gem_obj))
2308 		return -ENOENT;
2309 
2310 	/* The mmap offset was set up at BO allocation time. */
2311 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2312 
2313 	xe_bo_put(gem_to_xe_bo(gem_obj));
2314 	return 0;
2315 }
2316 
2317 /**
2318  * xe_bo_lock() - Lock the buffer object's dma_resv object
2319  * @bo: The struct xe_bo whose lock is to be taken
2320  * @intr: Whether to perform any wait interruptible
2321  *
2322  * Locks the buffer object's dma_resv object. If the buffer object is
2323  * pointing to a shared dma_resv object, that shared lock is locked.
2324  *
2325  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2326  * contended lock was interrupted. If @intr is set to false, the
2327  * function always returns 0.
2328  */
2329 int xe_bo_lock(struct xe_bo *bo, bool intr)
2330 {
2331 	if (intr)
2332 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2333 
2334 	dma_resv_lock(bo->ttm.base.resv, NULL);
2335 
2336 	return 0;
2337 }
2338 
2339 /**
2340  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2341  * @bo: The struct xe_bo whose lock is to be released.
2342  *
2343  * Unlock a buffer object lock that was locked by xe_bo_lock().
2344  */
2345 void xe_bo_unlock(struct xe_bo *bo)
2346 {
2347 	dma_resv_unlock(bo->ttm.base.resv);
2348 }
2349 
2350 /**
2351  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2352  * @bo: The buffer object to migrate
2353  * @mem_type: The TTM memory type intended to migrate to
2354  *
2355  * Check whether the buffer object supports migration to the
2356  * given memory type. Note that pinning may affect the ability to migrate as
2357  * returned by this function.
2358  *
2359  * This function is primarily intended as a helper for checking the
2360  * possibility to migrate buffer objects and can be called without
2361  * the object lock held.
2362  *
2363  * Return: true if migration is possible, false otherwise.
2364  */
2365 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2366 {
2367 	unsigned int cur_place;
2368 
2369 	if (bo->ttm.type == ttm_bo_type_kernel)
2370 		return true;
2371 
2372 	if (bo->ttm.type == ttm_bo_type_sg)
2373 		return false;
2374 
2375 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2376 	     cur_place++) {
2377 		if (bo->placements[cur_place].mem_type == mem_type)
2378 			return true;
2379 	}
2380 
2381 	return false;
2382 }
2383 
2384 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2385 {
2386 	memset(place, 0, sizeof(*place));
2387 	place->mem_type = mem_type;
2388 }
2389 
2390 /**
2391  * xe_bo_migrate - Migrate an object to the desired region id
2392  * @bo: The buffer object to migrate.
2393  * @mem_type: The TTM region type to migrate to.
2394  *
2395  * Attempt to migrate the buffer object to the desired memory region. The
2396  * buffer object may not be pinned, and must be locked.
2397  * On successful completion, the object memory type will be updated,
2398  * but an async migration task may not have completed yet, and to
2399  * accomplish that, the object's kernel fences must be signaled with
2400  * the object lock held.
2401  *
2402  * Return: 0 on success. Negative error code on failure. In particular may
2403  * return -EINTR or -ERESTARTSYS if signal pending.
2404  */
2405 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2406 {
2407 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2408 	struct ttm_operation_ctx ctx = {
2409 		.interruptible = true,
2410 		.no_wait_gpu = false,
2411 		.gfp_retry_mayfail = true,
2412 	};
2413 	struct ttm_placement placement;
2414 	struct ttm_place requested;
2415 
2416 	xe_bo_assert_held(bo);
2417 
2418 	if (bo->ttm.resource->mem_type == mem_type)
2419 		return 0;
2420 
2421 	if (xe_bo_is_pinned(bo))
2422 		return -EBUSY;
2423 
2424 	if (!xe_bo_can_migrate(bo, mem_type))
2425 		return -EINVAL;
2426 
2427 	xe_place_from_ttm_type(mem_type, &requested);
2428 	placement.num_placement = 1;
2429 	placement.placement = &requested;
2430 
2431 	/*
2432 	 * Stolen needs to be handled like below VRAM handling if we ever need
2433 	 * to support it.
2434 	 */
2435 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2436 
2437 	if (mem_type_is_vram(mem_type)) {
2438 		u32 c = 0;
2439 
2440 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2441 	}
2442 
2443 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2444 }
2445 
2446 /**
2447  * xe_bo_evict - Evict an object to evict placement
2448  * @bo: The buffer object to migrate.
2449  * @force_alloc: Set force_alloc in ttm_operation_ctx
2450  *
2451  * On successful completion, the object memory will be moved to evict
2452  * placement. This function blocks until the object has been fully moved.
2453  *
2454  * Return: 0 on success. Negative error code on failure.
2455  */
2456 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2457 {
2458 	struct ttm_operation_ctx ctx = {
2459 		.interruptible = false,
2460 		.no_wait_gpu = false,
2461 		.force_alloc = force_alloc,
2462 		.gfp_retry_mayfail = true,
2463 	};
2464 	struct ttm_placement placement;
2465 	int ret;
2466 
2467 	xe_evict_flags(&bo->ttm, &placement);
2468 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2469 	if (ret)
2470 		return ret;
2471 
2472 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2473 			      false, MAX_SCHEDULE_TIMEOUT);
2474 
2475 	return 0;
2476 }
2477 
2478 /**
2479  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2480  * placed in system memory.
2481  * @bo: The xe_bo
2482  *
2483  * Return: true if extra pages need to be allocated, false otherwise.
2484  */
2485 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2486 {
2487 	struct xe_device *xe = xe_bo_device(bo);
2488 
2489 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2490 		return false;
2491 
2492 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2493 		return false;
2494 
2495 	/* On discrete GPUs, if the GPU can access this buffer from
2496 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2497 	 * can't be used since there's no CCS storage associated with
2498 	 * non-VRAM addresses.
2499 	 */
2500 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2501 		return false;
2502 
2503 	return true;
2504 }
2505 
2506 /**
2507  * __xe_bo_release_dummy() - Dummy kref release function
2508  * @kref: The embedded struct kref.
2509  *
2510  * Dummy release function for xe_bo_put_deferred(). Keep off.
2511  */
2512 void __xe_bo_release_dummy(struct kref *kref)
2513 {
2514 }
2515 
2516 /**
2517  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2518  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2519  *
2520  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2521  * The @deferred list can be either an onstack local list or a global
2522  * shared list used by a workqueue.
2523  */
2524 void xe_bo_put_commit(struct llist_head *deferred)
2525 {
2526 	struct llist_node *freed;
2527 	struct xe_bo *bo, *next;
2528 
2529 	if (!deferred)
2530 		return;
2531 
2532 	freed = llist_del_all(deferred);
2533 	if (!freed)
2534 		return;
2535 
2536 	llist_for_each_entry_safe(bo, next, freed, freed)
2537 		drm_gem_object_free(&bo->ttm.base.refcount);
2538 }
2539 
2540 void xe_bo_put(struct xe_bo *bo)
2541 {
2542 	struct xe_tile *tile;
2543 	u8 id;
2544 
2545 	might_sleep();
2546 	if (bo) {
2547 #ifdef CONFIG_PROC_FS
2548 		if (bo->client)
2549 			might_lock(&bo->client->bos_lock);
2550 #endif
2551 		for_each_tile(tile, xe_bo_device(bo), id)
2552 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
2553 				might_lock(&bo->ggtt_node[id]->ggtt->lock);
2554 		drm_gem_object_put(&bo->ttm.base);
2555 	}
2556 }
2557 
2558 /**
2559  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2560  * @file_priv: ...
2561  * @dev: ...
2562  * @args: ...
2563  *
2564  * See dumb_create() hook in include/drm/drm_drv.h
2565  *
2566  * Return: ...
2567  */
2568 int xe_bo_dumb_create(struct drm_file *file_priv,
2569 		      struct drm_device *dev,
2570 		      struct drm_mode_create_dumb *args)
2571 {
2572 	struct xe_device *xe = to_xe_device(dev);
2573 	struct xe_bo *bo;
2574 	uint32_t handle;
2575 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2576 	int err;
2577 	u32 page_size = max_t(u32, PAGE_SIZE,
2578 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2579 
2580 	args->pitch = ALIGN(args->width * cpp, 64);
2581 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2582 			   page_size);
2583 
2584 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2585 			       DRM_XE_GEM_CPU_CACHING_WC,
2586 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2587 			       XE_BO_FLAG_SCANOUT |
2588 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
2589 	if (IS_ERR(bo))
2590 		return PTR_ERR(bo);
2591 
2592 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2593 	/* drop reference from allocate - handle holds it now */
2594 	drm_gem_object_put(&bo->ttm.base);
2595 	if (!err)
2596 		args->handle = handle;
2597 	return err;
2598 }
2599 
2600 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2601 {
2602 	struct ttm_buffer_object *tbo = &bo->ttm;
2603 	struct ttm_device *bdev = tbo->bdev;
2604 
2605 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2606 
2607 	list_del_init(&bo->vram_userfault_link);
2608 }
2609 
2610 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2611 #include "tests/xe_bo.c"
2612 #endif
2613