xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 41a97c4a12947c2786a1680d6839bb72d1c57cec)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_gem_ttm_helper.h>
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_device.h>
15 #include <drm/ttm/ttm_placement.h>
16 #include <drm/ttm/ttm_tt.h>
17 #include <uapi/drm/xe_drm.h>
18 
19 #include <kunit/static_stub.h>
20 
21 #include "xe_device.h"
22 #include "xe_dma_buf.h"
23 #include "xe_drm_client.h"
24 #include "xe_ggtt.h"
25 #include "xe_gt.h"
26 #include "xe_map.h"
27 #include "xe_migrate.h"
28 #include "xe_pm.h"
29 #include "xe_preempt_fence.h"
30 #include "xe_pxp.h"
31 #include "xe_res_cursor.h"
32 #include "xe_trace_bo.h"
33 #include "xe_ttm_stolen_mgr.h"
34 #include "xe_vm.h"
35 
36 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
37 	[XE_PL_SYSTEM] = "system",
38 	[XE_PL_TT] = "gtt",
39 	[XE_PL_VRAM0] = "vram0",
40 	[XE_PL_VRAM1] = "vram1",
41 	[XE_PL_STOLEN] = "stolen"
42 };
43 
44 static const struct ttm_place sys_placement_flags = {
45 	.fpfn = 0,
46 	.lpfn = 0,
47 	.mem_type = XE_PL_SYSTEM,
48 	.flags = 0,
49 };
50 
51 static struct ttm_placement sys_placement = {
52 	.num_placement = 1,
53 	.placement = &sys_placement_flags,
54 };
55 
56 static const struct ttm_place tt_placement_flags[] = {
57 	{
58 		.fpfn = 0,
59 		.lpfn = 0,
60 		.mem_type = XE_PL_TT,
61 		.flags = TTM_PL_FLAG_DESIRED,
62 	},
63 	{
64 		.fpfn = 0,
65 		.lpfn = 0,
66 		.mem_type = XE_PL_SYSTEM,
67 		.flags = TTM_PL_FLAG_FALLBACK,
68 	}
69 };
70 
71 static struct ttm_placement tt_placement = {
72 	.num_placement = 2,
73 	.placement = tt_placement_flags,
74 };
75 
76 bool mem_type_is_vram(u32 mem_type)
77 {
78 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
79 }
80 
81 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
82 {
83 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
84 }
85 
86 static bool resource_is_vram(struct ttm_resource *res)
87 {
88 	return mem_type_is_vram(res->mem_type);
89 }
90 
91 bool xe_bo_is_vram(struct xe_bo *bo)
92 {
93 	return resource_is_vram(bo->ttm.resource) ||
94 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
95 }
96 
97 bool xe_bo_is_stolen(struct xe_bo *bo)
98 {
99 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
100 }
101 
102 /**
103  * xe_bo_has_single_placement - check if BO is placed only in one memory location
104  * @bo: The BO
105  *
106  * This function checks whether a given BO is placed in only one memory location.
107  *
108  * Returns: true if the BO is placed in a single memory location, false otherwise.
109  *
110  */
111 bool xe_bo_has_single_placement(struct xe_bo *bo)
112 {
113 	return bo->placement.num_placement == 1;
114 }
115 
116 /**
117  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
118  * @bo: The BO
119  *
120  * The stolen memory is accessed through the PCI BAR for both DGFX and some
121  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
122  *
123  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
124  */
125 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
126 {
127 	return xe_bo_is_stolen(bo) &&
128 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
129 }
130 
131 static bool xe_bo_is_user(struct xe_bo *bo)
132 {
133 	return bo->flags & XE_BO_FLAG_USER;
134 }
135 
136 static struct xe_migrate *
137 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
138 {
139 	struct xe_tile *tile;
140 
141 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
142 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
143 	return tile->migrate;
144 }
145 
146 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
147 {
148 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
149 	struct ttm_resource_manager *mgr;
150 
151 	xe_assert(xe, resource_is_vram(res));
152 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
153 	return to_xe_ttm_vram_mgr(mgr)->vram;
154 }
155 
156 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
157 			   u32 bo_flags, u32 *c)
158 {
159 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
160 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
161 
162 		bo->placements[*c] = (struct ttm_place) {
163 			.mem_type = XE_PL_TT,
164 		};
165 		*c += 1;
166 	}
167 }
168 
169 static bool force_contiguous(u32 bo_flags)
170 {
171 	/*
172 	 * For eviction / restore on suspend / resume objects pinned in VRAM
173 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
174 	 */
175 	return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
176 }
177 
178 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
179 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
180 {
181 	struct ttm_place place = { .mem_type = mem_type };
182 	struct xe_mem_region *vram;
183 	u64 io_size;
184 
185 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
186 
187 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
188 	xe_assert(xe, vram && vram->usable_size);
189 	io_size = vram->io_size;
190 
191 	if (force_contiguous(bo_flags))
192 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
193 
194 	if (io_size < vram->usable_size) {
195 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
196 			place.fpfn = 0;
197 			place.lpfn = io_size >> PAGE_SHIFT;
198 		} else {
199 			place.flags |= TTM_PL_FLAG_TOPDOWN;
200 		}
201 	}
202 	places[*c] = place;
203 	*c += 1;
204 }
205 
206 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
207 			 u32 bo_flags, u32 *c)
208 {
209 	if (bo_flags & XE_BO_FLAG_VRAM0)
210 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
211 	if (bo_flags & XE_BO_FLAG_VRAM1)
212 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
213 }
214 
215 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
216 			   u32 bo_flags, u32 *c)
217 {
218 	if (bo_flags & XE_BO_FLAG_STOLEN) {
219 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
220 
221 		bo->placements[*c] = (struct ttm_place) {
222 			.mem_type = XE_PL_STOLEN,
223 			.flags = force_contiguous(bo_flags) ?
224 				TTM_PL_FLAG_CONTIGUOUS : 0,
225 		};
226 		*c += 1;
227 	}
228 }
229 
230 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
231 				       u32 bo_flags)
232 {
233 	u32 c = 0;
234 
235 	try_add_vram(xe, bo, bo_flags, &c);
236 	try_add_system(xe, bo, bo_flags, &c);
237 	try_add_stolen(xe, bo, bo_flags, &c);
238 
239 	if (!c)
240 		return -EINVAL;
241 
242 	bo->placement = (struct ttm_placement) {
243 		.num_placement = c,
244 		.placement = bo->placements,
245 	};
246 
247 	return 0;
248 }
249 
250 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
251 			      u32 bo_flags)
252 {
253 	xe_bo_assert_held(bo);
254 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
255 }
256 
257 static void xe_evict_flags(struct ttm_buffer_object *tbo,
258 			   struct ttm_placement *placement)
259 {
260 	if (!xe_bo_is_xe_bo(tbo)) {
261 		/* Don't handle scatter gather BOs */
262 		if (tbo->type == ttm_bo_type_sg) {
263 			placement->num_placement = 0;
264 			return;
265 		}
266 
267 		*placement = sys_placement;
268 		return;
269 	}
270 
271 	/*
272 	 * For xe, sg bos that are evicted to system just triggers a
273 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
274 	 */
275 	switch (tbo->resource->mem_type) {
276 	case XE_PL_VRAM0:
277 	case XE_PL_VRAM1:
278 	case XE_PL_STOLEN:
279 		*placement = tt_placement;
280 		break;
281 	case XE_PL_TT:
282 	default:
283 		*placement = sys_placement;
284 		break;
285 	}
286 }
287 
288 struct xe_ttm_tt {
289 	struct ttm_tt ttm;
290 	struct device *dev;
291 	struct sg_table sgt;
292 	struct sg_table *sg;
293 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
294 	bool purgeable;
295 };
296 
297 static int xe_tt_map_sg(struct ttm_tt *tt)
298 {
299 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
300 	unsigned long num_pages = tt->num_pages;
301 	int ret;
302 
303 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
304 
305 	if (xe_tt->sg)
306 		return 0;
307 
308 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
309 						num_pages, 0,
310 						(u64)num_pages << PAGE_SHIFT,
311 						xe_sg_segment_size(xe_tt->dev),
312 						GFP_KERNEL);
313 	if (ret)
314 		return ret;
315 
316 	xe_tt->sg = &xe_tt->sgt;
317 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
318 			      DMA_ATTR_SKIP_CPU_SYNC);
319 	if (ret) {
320 		sg_free_table(xe_tt->sg);
321 		xe_tt->sg = NULL;
322 		return ret;
323 	}
324 
325 	return 0;
326 }
327 
328 static void xe_tt_unmap_sg(struct ttm_tt *tt)
329 {
330 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
331 
332 	if (xe_tt->sg) {
333 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
334 				  DMA_BIDIRECTIONAL, 0);
335 		sg_free_table(xe_tt->sg);
336 		xe_tt->sg = NULL;
337 	}
338 }
339 
340 struct sg_table *xe_bo_sg(struct xe_bo *bo)
341 {
342 	struct ttm_tt *tt = bo->ttm.ttm;
343 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
344 
345 	return xe_tt->sg;
346 }
347 
348 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
349 				       u32 page_flags)
350 {
351 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
352 	struct xe_device *xe = xe_bo_device(bo);
353 	struct xe_ttm_tt *tt;
354 	unsigned long extra_pages;
355 	enum ttm_caching caching = ttm_cached;
356 	int err;
357 
358 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
359 	if (!tt)
360 		return NULL;
361 
362 	tt->dev = xe->drm.dev;
363 
364 	extra_pages = 0;
365 	if (xe_bo_needs_ccs_pages(bo))
366 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
367 					   PAGE_SIZE);
368 
369 	/*
370 	 * DGFX system memory is always WB / ttm_cached, since
371 	 * other caching modes are only supported on x86. DGFX
372 	 * GPU system memory accesses are always coherent with the
373 	 * CPU.
374 	 */
375 	if (!IS_DGFX(xe)) {
376 		switch (bo->cpu_caching) {
377 		case DRM_XE_GEM_CPU_CACHING_WC:
378 			caching = ttm_write_combined;
379 			break;
380 		default:
381 			caching = ttm_cached;
382 			break;
383 		}
384 
385 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
386 
387 		/*
388 		 * Display scanout is always non-coherent with the CPU cache.
389 		 *
390 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
391 		 * non-coherent and require a CPU:WC mapping.
392 		 */
393 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
394 		    (xe->info.graphics_verx100 >= 1270 &&
395 		     bo->flags & XE_BO_FLAG_PAGETABLE))
396 			caching = ttm_write_combined;
397 	}
398 
399 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
400 		/*
401 		 * Valid only for internally-created buffers only, for
402 		 * which cpu_caching is never initialized.
403 		 */
404 		xe_assert(xe, bo->cpu_caching == 0);
405 		caching = ttm_uncached;
406 	}
407 
408 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
409 	if (err) {
410 		kfree(tt);
411 		return NULL;
412 	}
413 
414 	return &tt->ttm;
415 }
416 
417 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
418 			      struct ttm_operation_ctx *ctx)
419 {
420 	int err;
421 
422 	/*
423 	 * dma-bufs are not populated with pages, and the dma-
424 	 * addresses are set up when moved to XE_PL_TT.
425 	 */
426 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
427 		return 0;
428 
429 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
430 	if (err)
431 		return err;
432 
433 	return err;
434 }
435 
436 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
437 {
438 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
439 		return;
440 
441 	xe_tt_unmap_sg(tt);
442 
443 	return ttm_pool_free(&ttm_dev->pool, tt);
444 }
445 
446 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
447 {
448 	ttm_tt_fini(tt);
449 	kfree(tt);
450 }
451 
452 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
453 {
454 	struct xe_ttm_vram_mgr_resource *vres =
455 		to_xe_ttm_vram_mgr_resource(mem);
456 
457 	return vres->used_visible_size == mem->size;
458 }
459 
460 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
461 				 struct ttm_resource *mem)
462 {
463 	struct xe_device *xe = ttm_to_xe_device(bdev);
464 
465 	switch (mem->mem_type) {
466 	case XE_PL_SYSTEM:
467 	case XE_PL_TT:
468 		return 0;
469 	case XE_PL_VRAM0:
470 	case XE_PL_VRAM1: {
471 		struct xe_mem_region *vram = res_to_mem_region(mem);
472 
473 		if (!xe_ttm_resource_visible(mem))
474 			return -EINVAL;
475 
476 		mem->bus.offset = mem->start << PAGE_SHIFT;
477 
478 		if (vram->mapping &&
479 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
480 			mem->bus.addr = (u8 __force *)vram->mapping +
481 				mem->bus.offset;
482 
483 		mem->bus.offset += vram->io_start;
484 		mem->bus.is_iomem = true;
485 
486 #if  !IS_ENABLED(CONFIG_X86)
487 		mem->bus.caching = ttm_write_combined;
488 #endif
489 		return 0;
490 	} case XE_PL_STOLEN:
491 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
492 	default:
493 		return -EINVAL;
494 	}
495 }
496 
497 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
498 				const struct ttm_operation_ctx *ctx)
499 {
500 	struct dma_resv_iter cursor;
501 	struct dma_fence *fence;
502 	struct drm_gem_object *obj = &bo->ttm.base;
503 	struct drm_gpuvm_bo *vm_bo;
504 	bool idle = false;
505 	int ret = 0;
506 
507 	dma_resv_assert_held(bo->ttm.base.resv);
508 
509 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
510 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
511 				    DMA_RESV_USAGE_BOOKKEEP);
512 		dma_resv_for_each_fence_unlocked(&cursor, fence)
513 			dma_fence_enable_sw_signaling(fence);
514 		dma_resv_iter_end(&cursor);
515 	}
516 
517 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
518 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
519 		struct drm_gpuva *gpuva;
520 
521 		if (!xe_vm_in_fault_mode(vm)) {
522 			drm_gpuvm_bo_evict(vm_bo, true);
523 			continue;
524 		}
525 
526 		if (!idle) {
527 			long timeout;
528 
529 			if (ctx->no_wait_gpu &&
530 			    !dma_resv_test_signaled(bo->ttm.base.resv,
531 						    DMA_RESV_USAGE_BOOKKEEP))
532 				return -EBUSY;
533 
534 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
535 							DMA_RESV_USAGE_BOOKKEEP,
536 							ctx->interruptible,
537 							MAX_SCHEDULE_TIMEOUT);
538 			if (!timeout)
539 				return -ETIME;
540 			if (timeout < 0)
541 				return timeout;
542 
543 			idle = true;
544 		}
545 
546 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
547 			struct xe_vma *vma = gpuva_to_vma(gpuva);
548 
549 			trace_xe_vma_evict(vma);
550 			ret = xe_vm_invalidate_vma(vma);
551 			if (XE_WARN_ON(ret))
552 				return ret;
553 		}
554 	}
555 
556 	return ret;
557 }
558 
559 /*
560  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
561  * Note that unmapping the attachment is deferred to the next
562  * map_attachment time, or to bo destroy (after idling) whichever comes first.
563  * This is to avoid syncing before unmap_attachment(), assuming that the
564  * caller relies on idling the reservation object before moving the
565  * backing store out. Should that assumption not hold, then we will be able
566  * to unconditionally call unmap_attachment() when moving out to system.
567  */
568 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
569 			     struct ttm_resource *new_res)
570 {
571 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
572 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
573 					       ttm);
574 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
575 	struct sg_table *sg;
576 
577 	xe_assert(xe, attach);
578 	xe_assert(xe, ttm_bo->ttm);
579 
580 	if (new_res->mem_type == XE_PL_SYSTEM)
581 		goto out;
582 
583 	if (ttm_bo->sg) {
584 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
585 		ttm_bo->sg = NULL;
586 	}
587 
588 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
589 	if (IS_ERR(sg))
590 		return PTR_ERR(sg);
591 
592 	ttm_bo->sg = sg;
593 	xe_tt->sg = sg;
594 
595 out:
596 	ttm_bo_move_null(ttm_bo, new_res);
597 
598 	return 0;
599 }
600 
601 /**
602  * xe_bo_move_notify - Notify subsystems of a pending move
603  * @bo: The buffer object
604  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
605  *
606  * This function notifies subsystems of an upcoming buffer move.
607  * Upon receiving such a notification, subsystems should schedule
608  * halting access to the underlying pages and optionally add a fence
609  * to the buffer object's dma_resv object, that signals when access is
610  * stopped. The caller will wait on all dma_resv fences before
611  * starting the move.
612  *
613  * A subsystem may commence access to the object after obtaining
614  * bindings to the new backing memory under the object lock.
615  *
616  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
617  * negative error code on error.
618  */
619 static int xe_bo_move_notify(struct xe_bo *bo,
620 			     const struct ttm_operation_ctx *ctx)
621 {
622 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
623 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
624 	struct ttm_resource *old_mem = ttm_bo->resource;
625 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
626 	int ret;
627 
628 	/*
629 	 * If this starts to call into many components, consider
630 	 * using a notification chain here.
631 	 */
632 
633 	if (xe_bo_is_pinned(bo))
634 		return -EINVAL;
635 
636 	xe_bo_vunmap(bo);
637 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
638 	if (ret)
639 		return ret;
640 
641 	/* Don't call move_notify() for imported dma-bufs. */
642 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
643 		dma_buf_move_notify(ttm_bo->base.dma_buf);
644 
645 	/*
646 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
647 	 * so if we moved from VRAM make sure to unlink this from the userfault
648 	 * tracking.
649 	 */
650 	if (mem_type_is_vram(old_mem_type)) {
651 		mutex_lock(&xe->mem_access.vram_userfault.lock);
652 		if (!list_empty(&bo->vram_userfault_link))
653 			list_del_init(&bo->vram_userfault_link);
654 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
655 	}
656 
657 	return 0;
658 }
659 
660 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
661 		      struct ttm_operation_ctx *ctx,
662 		      struct ttm_resource *new_mem,
663 		      struct ttm_place *hop)
664 {
665 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
666 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
667 	struct ttm_resource *old_mem = ttm_bo->resource;
668 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
669 	struct ttm_tt *ttm = ttm_bo->ttm;
670 	struct xe_migrate *migrate = NULL;
671 	struct dma_fence *fence;
672 	bool move_lacks_source;
673 	bool tt_has_data;
674 	bool needs_clear;
675 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
676 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
677 	int ret = 0;
678 
679 	/* Bo creation path, moving to system or TT. */
680 	if ((!old_mem && ttm) && !handle_system_ccs) {
681 		if (new_mem->mem_type == XE_PL_TT)
682 			ret = xe_tt_map_sg(ttm);
683 		if (!ret)
684 			ttm_bo_move_null(ttm_bo, new_mem);
685 		goto out;
686 	}
687 
688 	if (ttm_bo->type == ttm_bo_type_sg) {
689 		ret = xe_bo_move_notify(bo, ctx);
690 		if (!ret)
691 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
692 		return ret;
693 	}
694 
695 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
696 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
697 
698 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
699 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
700 
701 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
702 		(!ttm && ttm_bo->type == ttm_bo_type_device);
703 
704 	if (new_mem->mem_type == XE_PL_TT) {
705 		ret = xe_tt_map_sg(ttm);
706 		if (ret)
707 			goto out;
708 	}
709 
710 	if ((move_lacks_source && !needs_clear)) {
711 		ttm_bo_move_null(ttm_bo, new_mem);
712 		goto out;
713 	}
714 
715 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
716 		ttm_bo_move_null(ttm_bo, new_mem);
717 		goto out;
718 	}
719 
720 	/* Reject BO eviction if BO is bound to current VM. */
721 	if (evict && ctx->resv) {
722 		struct drm_gpuvm_bo *vm_bo;
723 
724 		drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) {
725 			struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
726 
727 			if (xe_vm_resv(vm) == ctx->resv &&
728 			    xe_vm_in_preempt_fence_mode(vm)) {
729 				ret = -EBUSY;
730 				goto out;
731 			}
732 		}
733 	}
734 
735 	/*
736 	 * Failed multi-hop where the old_mem is still marked as
737 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
738 	 */
739 	if (old_mem_type == XE_PL_TT &&
740 	    new_mem->mem_type == XE_PL_TT) {
741 		ttm_bo_move_null(ttm_bo, new_mem);
742 		goto out;
743 	}
744 
745 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
746 		ret = xe_bo_move_notify(bo, ctx);
747 		if (ret)
748 			goto out;
749 	}
750 
751 	if (old_mem_type == XE_PL_TT &&
752 	    new_mem->mem_type == XE_PL_SYSTEM) {
753 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
754 						     DMA_RESV_USAGE_BOOKKEEP,
755 						     false,
756 						     MAX_SCHEDULE_TIMEOUT);
757 		if (timeout < 0) {
758 			ret = timeout;
759 			goto out;
760 		}
761 
762 		if (!handle_system_ccs) {
763 			ttm_bo_move_null(ttm_bo, new_mem);
764 			goto out;
765 		}
766 	}
767 
768 	if (!move_lacks_source &&
769 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
770 	     (mem_type_is_vram(old_mem_type) &&
771 	      new_mem->mem_type == XE_PL_SYSTEM))) {
772 		hop->fpfn = 0;
773 		hop->lpfn = 0;
774 		hop->mem_type = XE_PL_TT;
775 		hop->flags = TTM_PL_FLAG_TEMPORARY;
776 		ret = -EMULTIHOP;
777 		goto out;
778 	}
779 
780 	if (bo->tile)
781 		migrate = bo->tile->migrate;
782 	else if (resource_is_vram(new_mem))
783 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
784 	else if (mem_type_is_vram(old_mem_type))
785 		migrate = mem_type_to_migrate(xe, old_mem_type);
786 	else
787 		migrate = xe->tiles[0].migrate;
788 
789 	xe_assert(xe, migrate);
790 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
791 	if (xe_rpm_reclaim_safe(xe)) {
792 		/*
793 		 * We might be called through swapout in the validation path of
794 		 * another TTM device, so acquire rpm here.
795 		 */
796 		xe_pm_runtime_get(xe);
797 	} else {
798 		drm_WARN_ON(&xe->drm, handle_system_ccs);
799 		xe_pm_runtime_get_noresume(xe);
800 	}
801 
802 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
803 		/*
804 		 * Kernel memory that is pinned should only be moved on suspend
805 		 * / resume, some of the pinned memory is required for the
806 		 * device to resume / use the GPU to move other evicted memory
807 		 * (user memory) around. This likely could be optimized a bit
808 		 * further where we find the minimum set of pinned memory
809 		 * required for resume but for simplity doing a memcpy for all
810 		 * pinned memory.
811 		 */
812 		ret = xe_bo_vmap(bo);
813 		if (!ret) {
814 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
815 
816 			/* Create a new VMAP once kernel BO back in VRAM */
817 			if (!ret && resource_is_vram(new_mem)) {
818 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
819 				void __iomem *new_addr = vram->mapping +
820 					(new_mem->start << PAGE_SHIFT);
821 
822 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
823 					ret = -EINVAL;
824 					xe_pm_runtime_put(xe);
825 					goto out;
826 				}
827 
828 				xe_assert(xe, new_mem->start ==
829 					  bo->placements->fpfn);
830 
831 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
832 			}
833 		}
834 	} else {
835 		if (move_lacks_source) {
836 			u32 flags = 0;
837 
838 			if (mem_type_is_vram(new_mem->mem_type))
839 				flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
840 			else if (handle_system_ccs)
841 				flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
842 
843 			fence = xe_migrate_clear(migrate, bo, new_mem, flags);
844 		}
845 		else
846 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
847 						new_mem, handle_system_ccs);
848 		if (IS_ERR(fence)) {
849 			ret = PTR_ERR(fence);
850 			xe_pm_runtime_put(xe);
851 			goto out;
852 		}
853 		if (!move_lacks_source) {
854 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
855 							true, new_mem);
856 			if (ret) {
857 				dma_fence_wait(fence, false);
858 				ttm_bo_move_null(ttm_bo, new_mem);
859 				ret = 0;
860 			}
861 		} else {
862 			/*
863 			 * ttm_bo_move_accel_cleanup() may blow up if
864 			 * bo->resource == NULL, so just attach the
865 			 * fence and set the new resource.
866 			 */
867 			dma_resv_add_fence(ttm_bo->base.resv, fence,
868 					   DMA_RESV_USAGE_KERNEL);
869 			ttm_bo_move_null(ttm_bo, new_mem);
870 		}
871 
872 		dma_fence_put(fence);
873 	}
874 
875 	xe_pm_runtime_put(xe);
876 
877 out:
878 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
879 	    ttm_bo->ttm) {
880 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
881 						     DMA_RESV_USAGE_KERNEL,
882 						     false,
883 						     MAX_SCHEDULE_TIMEOUT);
884 		if (timeout < 0)
885 			ret = timeout;
886 
887 		xe_tt_unmap_sg(ttm_bo->ttm);
888 	}
889 
890 	return ret;
891 }
892 
893 /**
894  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
895  * @bo: The buffer object to move.
896  *
897  * On successful completion, the object memory will be moved to system memory.
898  *
899  * This is needed to for special handling of pinned VRAM object during
900  * suspend-resume.
901  *
902  * Return: 0 on success. Negative error code on failure.
903  */
904 int xe_bo_evict_pinned(struct xe_bo *bo)
905 {
906 	struct ttm_place place = {
907 		.mem_type = XE_PL_TT,
908 	};
909 	struct ttm_placement placement = {
910 		.placement = &place,
911 		.num_placement = 1,
912 	};
913 	struct ttm_operation_ctx ctx = {
914 		.interruptible = false,
915 		.gfp_retry_mayfail = true,
916 	};
917 	struct ttm_resource *new_mem;
918 	int ret;
919 
920 	xe_bo_assert_held(bo);
921 
922 	if (WARN_ON(!bo->ttm.resource))
923 		return -EINVAL;
924 
925 	if (WARN_ON(!xe_bo_is_pinned(bo)))
926 		return -EINVAL;
927 
928 	if (!xe_bo_is_vram(bo))
929 		return 0;
930 
931 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
932 	if (ret)
933 		return ret;
934 
935 	if (!bo->ttm.ttm) {
936 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
937 		if (!bo->ttm.ttm) {
938 			ret = -ENOMEM;
939 			goto err_res_free;
940 		}
941 	}
942 
943 	ret = ttm_bo_populate(&bo->ttm, &ctx);
944 	if (ret)
945 		goto err_res_free;
946 
947 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
948 	if (ret)
949 		goto err_res_free;
950 
951 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
952 	if (ret)
953 		goto err_res_free;
954 
955 	return 0;
956 
957 err_res_free:
958 	ttm_resource_free(&bo->ttm, &new_mem);
959 	return ret;
960 }
961 
962 /**
963  * xe_bo_restore_pinned() - Restore a pinned VRAM object
964  * @bo: The buffer object to move.
965  *
966  * On successful completion, the object memory will be moved back to VRAM.
967  *
968  * This is needed to for special handling of pinned VRAM object during
969  * suspend-resume.
970  *
971  * Return: 0 on success. Negative error code on failure.
972  */
973 int xe_bo_restore_pinned(struct xe_bo *bo)
974 {
975 	struct ttm_operation_ctx ctx = {
976 		.interruptible = false,
977 		.gfp_retry_mayfail = false,
978 	};
979 	struct ttm_resource *new_mem;
980 	struct ttm_place *place = &bo->placements[0];
981 	int ret;
982 
983 	xe_bo_assert_held(bo);
984 
985 	if (WARN_ON(!bo->ttm.resource))
986 		return -EINVAL;
987 
988 	if (WARN_ON(!xe_bo_is_pinned(bo)))
989 		return -EINVAL;
990 
991 	if (WARN_ON(xe_bo_is_vram(bo)))
992 		return -EINVAL;
993 
994 	if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
995 		return -EINVAL;
996 
997 	if (!mem_type_is_vram(place->mem_type))
998 		return 0;
999 
1000 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
1001 	if (ret)
1002 		return ret;
1003 
1004 	ret = ttm_bo_populate(&bo->ttm, &ctx);
1005 	if (ret)
1006 		goto err_res_free;
1007 
1008 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1009 	if (ret)
1010 		goto err_res_free;
1011 
1012 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
1013 	if (ret)
1014 		goto err_res_free;
1015 
1016 	return 0;
1017 
1018 err_res_free:
1019 	ttm_resource_free(&bo->ttm, &new_mem);
1020 	return ret;
1021 }
1022 
1023 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1024 				       unsigned long page_offset)
1025 {
1026 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1027 	struct xe_res_cursor cursor;
1028 	struct xe_mem_region *vram;
1029 
1030 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1031 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1032 
1033 	vram = res_to_mem_region(ttm_bo->resource);
1034 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1035 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1036 }
1037 
1038 static void __xe_bo_vunmap(struct xe_bo *bo);
1039 
1040 /*
1041  * TODO: Move this function to TTM so we don't rely on how TTM does its
1042  * locking, thereby abusing TTM internals.
1043  */
1044 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1045 {
1046 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1047 	bool locked;
1048 
1049 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1050 
1051 	/*
1052 	 * We can typically only race with TTM trylocking under the
1053 	 * lru_lock, which will immediately be unlocked again since
1054 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1055 	 * always succeed here, as long as we hold the lru lock.
1056 	 */
1057 	spin_lock(&ttm_bo->bdev->lru_lock);
1058 	locked = dma_resv_trylock(ttm_bo->base.resv);
1059 	spin_unlock(&ttm_bo->bdev->lru_lock);
1060 	xe_assert(xe, locked);
1061 
1062 	return locked;
1063 }
1064 
1065 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1066 {
1067 	struct dma_resv_iter cursor;
1068 	struct dma_fence *fence;
1069 	struct dma_fence *replacement = NULL;
1070 	struct xe_bo *bo;
1071 
1072 	if (!xe_bo_is_xe_bo(ttm_bo))
1073 		return;
1074 
1075 	bo = ttm_to_xe_bo(ttm_bo);
1076 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1077 
1078 	/*
1079 	 * Corner case where TTM fails to allocate memory and this BOs resv
1080 	 * still points the VMs resv
1081 	 */
1082 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1083 		return;
1084 
1085 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1086 		return;
1087 
1088 	/*
1089 	 * Scrub the preempt fences if any. The unbind fence is already
1090 	 * attached to the resv.
1091 	 * TODO: Don't do this for external bos once we scrub them after
1092 	 * unbind.
1093 	 */
1094 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1095 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1096 		if (xe_fence_is_xe_preempt(fence) &&
1097 		    !dma_fence_is_signaled(fence)) {
1098 			if (!replacement)
1099 				replacement = dma_fence_get_stub();
1100 
1101 			dma_resv_replace_fences(ttm_bo->base.resv,
1102 						fence->context,
1103 						replacement,
1104 						DMA_RESV_USAGE_BOOKKEEP);
1105 		}
1106 	}
1107 	dma_fence_put(replacement);
1108 
1109 	dma_resv_unlock(ttm_bo->base.resv);
1110 }
1111 
1112 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1113 {
1114 	if (!xe_bo_is_xe_bo(ttm_bo))
1115 		return;
1116 
1117 	/*
1118 	 * Object is idle and about to be destroyed. Release the
1119 	 * dma-buf attachment.
1120 	 */
1121 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1122 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1123 						       struct xe_ttm_tt, ttm);
1124 
1125 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1126 					 DMA_BIDIRECTIONAL);
1127 		ttm_bo->sg = NULL;
1128 		xe_tt->sg = NULL;
1129 	}
1130 }
1131 
1132 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1133 {
1134 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1135 
1136 	if (ttm_bo->ttm) {
1137 		struct ttm_placement place = {};
1138 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1139 
1140 		drm_WARN_ON(&xe->drm, ret);
1141 	}
1142 }
1143 
1144 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1145 {
1146 	struct ttm_operation_ctx ctx = {
1147 		.interruptible = false,
1148 		.gfp_retry_mayfail = false,
1149 	};
1150 
1151 	if (ttm_bo->ttm) {
1152 		struct xe_ttm_tt *xe_tt =
1153 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1154 
1155 		if (xe_tt->purgeable)
1156 			xe_ttm_bo_purge(ttm_bo, &ctx);
1157 	}
1158 }
1159 
1160 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1161 				unsigned long offset, void *buf, int len,
1162 				int write)
1163 {
1164 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1165 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1166 	struct iosys_map vmap;
1167 	struct xe_res_cursor cursor;
1168 	struct xe_mem_region *vram;
1169 	int bytes_left = len;
1170 
1171 	xe_bo_assert_held(bo);
1172 	xe_device_assert_mem_access(xe);
1173 
1174 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1175 		return -EIO;
1176 
1177 	/* FIXME: Use GPU for non-visible VRAM */
1178 	if (!xe_ttm_resource_visible(ttm_bo->resource))
1179 		return -EIO;
1180 
1181 	vram = res_to_mem_region(ttm_bo->resource);
1182 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1183 		     bo->size - (offset & PAGE_MASK), &cursor);
1184 
1185 	do {
1186 		unsigned long page_offset = (offset & ~PAGE_MASK);
1187 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1188 
1189 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1190 					  cursor.start);
1191 		if (write)
1192 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1193 		else
1194 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1195 
1196 		buf += byte_count;
1197 		offset += byte_count;
1198 		bytes_left -= byte_count;
1199 		if (bytes_left)
1200 			xe_res_next(&cursor, PAGE_SIZE);
1201 	} while (bytes_left);
1202 
1203 	return len;
1204 }
1205 
1206 const struct ttm_device_funcs xe_ttm_funcs = {
1207 	.ttm_tt_create = xe_ttm_tt_create,
1208 	.ttm_tt_populate = xe_ttm_tt_populate,
1209 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1210 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1211 	.evict_flags = xe_evict_flags,
1212 	.move = xe_bo_move,
1213 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1214 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1215 	.access_memory = xe_ttm_access_memory,
1216 	.release_notify = xe_ttm_bo_release_notify,
1217 	.eviction_valuable = ttm_bo_eviction_valuable,
1218 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1219 	.swap_notify = xe_ttm_bo_swap_notify,
1220 };
1221 
1222 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1223 {
1224 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1225 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1226 	struct xe_tile *tile;
1227 	u8 id;
1228 
1229 	if (bo->ttm.base.import_attach)
1230 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1231 	drm_gem_object_release(&bo->ttm.base);
1232 
1233 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1234 
1235 	for_each_tile(tile, xe, id)
1236 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1237 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1238 
1239 #ifdef CONFIG_PROC_FS
1240 	if (bo->client)
1241 		xe_drm_client_remove_bo(bo);
1242 #endif
1243 
1244 	if (bo->vm && xe_bo_is_user(bo))
1245 		xe_vm_put(bo->vm);
1246 
1247 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1248 	if (!list_empty(&bo->vram_userfault_link))
1249 		list_del(&bo->vram_userfault_link);
1250 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1251 
1252 	kfree(bo);
1253 }
1254 
1255 static void xe_gem_object_free(struct drm_gem_object *obj)
1256 {
1257 	/* Our BO reference counting scheme works as follows:
1258 	 *
1259 	 * The gem object kref is typically used throughout the driver,
1260 	 * and the gem object holds a ttm_buffer_object refcount, so
1261 	 * that when the last gem object reference is put, which is when
1262 	 * we end up in this function, we put also that ttm_buffer_object
1263 	 * refcount. Anything using gem interfaces is then no longer
1264 	 * allowed to access the object in a way that requires a gem
1265 	 * refcount, including locking the object.
1266 	 *
1267 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1268 	 * refcount directly if needed.
1269 	 */
1270 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1271 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1272 }
1273 
1274 static void xe_gem_object_close(struct drm_gem_object *obj,
1275 				struct drm_file *file_priv)
1276 {
1277 	struct xe_bo *bo = gem_to_xe_bo(obj);
1278 
1279 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1280 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1281 
1282 		xe_bo_lock(bo, false);
1283 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1284 		xe_bo_unlock(bo);
1285 	}
1286 }
1287 
1288 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1289 {
1290 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1291 	struct drm_device *ddev = tbo->base.dev;
1292 	struct xe_device *xe = to_xe_device(ddev);
1293 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1294 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1295 	vm_fault_t ret;
1296 	int idx;
1297 
1298 	if (needs_rpm)
1299 		xe_pm_runtime_get(xe);
1300 
1301 	ret = ttm_bo_vm_reserve(tbo, vmf);
1302 	if (ret)
1303 		goto out;
1304 
1305 	if (drm_dev_enter(ddev, &idx)) {
1306 		trace_xe_bo_cpu_fault(bo);
1307 
1308 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1309 					       TTM_BO_VM_NUM_PREFAULT);
1310 		drm_dev_exit(idx);
1311 	} else {
1312 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1313 	}
1314 
1315 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1316 		goto out;
1317 	/*
1318 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1319 	 */
1320 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1321 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1322 		if (list_empty(&bo->vram_userfault_link))
1323 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1324 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1325 	}
1326 
1327 	dma_resv_unlock(tbo->base.resv);
1328 out:
1329 	if (needs_rpm)
1330 		xe_pm_runtime_put(xe);
1331 
1332 	return ret;
1333 }
1334 
1335 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1336 			   void *buf, int len, int write)
1337 {
1338 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1339 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1340 	struct xe_device *xe = xe_bo_device(bo);
1341 	int ret;
1342 
1343 	xe_pm_runtime_get(xe);
1344 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1345 	xe_pm_runtime_put(xe);
1346 
1347 	return ret;
1348 }
1349 
1350 /**
1351  * xe_bo_read() - Read from an xe_bo
1352  * @bo: The buffer object to read from.
1353  * @offset: The byte offset to start reading from.
1354  * @dst: Location to store the read.
1355  * @size: Size in bytes for the read.
1356  *
1357  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1358  *
1359  * Return: Zero on success, or negative error.
1360  */
1361 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1362 {
1363 	int ret;
1364 
1365 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1366 	if (ret >= 0 && ret != size)
1367 		ret = -EIO;
1368 	else if (ret == size)
1369 		ret = 0;
1370 
1371 	return ret;
1372 }
1373 
1374 static const struct vm_operations_struct xe_gem_vm_ops = {
1375 	.fault = xe_gem_fault,
1376 	.open = ttm_bo_vm_open,
1377 	.close = ttm_bo_vm_close,
1378 	.access = xe_bo_vm_access,
1379 };
1380 
1381 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1382 	.free = xe_gem_object_free,
1383 	.close = xe_gem_object_close,
1384 	.mmap = drm_gem_ttm_mmap,
1385 	.export = xe_gem_prime_export,
1386 	.vm_ops = &xe_gem_vm_ops,
1387 };
1388 
1389 /**
1390  * xe_bo_alloc - Allocate storage for a struct xe_bo
1391  *
1392  * This function is intended to allocate storage to be used for input
1393  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1394  * created is needed before the call to __xe_bo_create_locked().
1395  * If __xe_bo_create_locked ends up never to be called, then the
1396  * storage allocated with this function needs to be freed using
1397  * xe_bo_free().
1398  *
1399  * Return: A pointer to an uninitialized struct xe_bo on success,
1400  * ERR_PTR(-ENOMEM) on error.
1401  */
1402 struct xe_bo *xe_bo_alloc(void)
1403 {
1404 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1405 
1406 	if (!bo)
1407 		return ERR_PTR(-ENOMEM);
1408 
1409 	return bo;
1410 }
1411 
1412 /**
1413  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1414  * @bo: The buffer object storage.
1415  *
1416  * Refer to xe_bo_alloc() documentation for valid use-cases.
1417  */
1418 void xe_bo_free(struct xe_bo *bo)
1419 {
1420 	kfree(bo);
1421 }
1422 
1423 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1424 				     struct xe_tile *tile, struct dma_resv *resv,
1425 				     struct ttm_lru_bulk_move *bulk, size_t size,
1426 				     u16 cpu_caching, enum ttm_bo_type type,
1427 				     u32 flags)
1428 {
1429 	struct ttm_operation_ctx ctx = {
1430 		.interruptible = true,
1431 		.no_wait_gpu = false,
1432 		.gfp_retry_mayfail = true,
1433 	};
1434 	struct ttm_placement *placement;
1435 	uint32_t alignment;
1436 	size_t aligned_size;
1437 	int err;
1438 
1439 	/* Only kernel objects should set GT */
1440 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1441 
1442 	if (XE_WARN_ON(!size)) {
1443 		xe_bo_free(bo);
1444 		return ERR_PTR(-EINVAL);
1445 	}
1446 
1447 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1448 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1449 		return ERR_PTR(-EINVAL);
1450 
1451 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1452 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1453 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1454 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1455 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1456 
1457 		aligned_size = ALIGN(size, align);
1458 		if (type != ttm_bo_type_device)
1459 			size = ALIGN(size, align);
1460 		flags |= XE_BO_FLAG_INTERNAL_64K;
1461 		alignment = align >> PAGE_SHIFT;
1462 	} else {
1463 		aligned_size = ALIGN(size, SZ_4K);
1464 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1465 		alignment = SZ_4K >> PAGE_SHIFT;
1466 	}
1467 
1468 	if (type == ttm_bo_type_device && aligned_size != size)
1469 		return ERR_PTR(-EINVAL);
1470 
1471 	if (!bo) {
1472 		bo = xe_bo_alloc();
1473 		if (IS_ERR(bo))
1474 			return bo;
1475 	}
1476 
1477 	bo->ccs_cleared = false;
1478 	bo->tile = tile;
1479 	bo->size = size;
1480 	bo->flags = flags;
1481 	bo->cpu_caching = cpu_caching;
1482 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1483 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1484 	INIT_LIST_HEAD(&bo->pinned_link);
1485 #ifdef CONFIG_PROC_FS
1486 	INIT_LIST_HEAD(&bo->client_link);
1487 #endif
1488 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1489 
1490 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1491 
1492 	if (resv) {
1493 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1494 		ctx.resv = resv;
1495 	}
1496 
1497 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1498 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1499 		if (WARN_ON(err)) {
1500 			xe_ttm_bo_destroy(&bo->ttm);
1501 			return ERR_PTR(err);
1502 		}
1503 	}
1504 
1505 	/* Defer populating type_sg bos */
1506 	placement = (type == ttm_bo_type_sg ||
1507 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1508 		&bo->placement;
1509 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1510 				   placement, alignment,
1511 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1512 	if (err)
1513 		return ERR_PTR(err);
1514 
1515 	/*
1516 	 * The VRAM pages underneath are potentially still being accessed by the
1517 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1518 	 * sure to add any corresponding move/clear fences into the objects
1519 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1520 	 *
1521 	 * For KMD internal buffers we don't care about GPU clearing, however we
1522 	 * still need to handle async evictions, where the VRAM is still being
1523 	 * accessed by the GPU. Most internal callers are not expecting this,
1524 	 * since they are missing the required synchronisation before accessing
1525 	 * the memory. To keep things simple just sync wait any kernel fences
1526 	 * here, if the buffer is designated KMD internal.
1527 	 *
1528 	 * For normal userspace objects we should already have the required
1529 	 * pipelining or sync waiting elsewhere, since we already have to deal
1530 	 * with things like async GPU clearing.
1531 	 */
1532 	if (type == ttm_bo_type_kernel) {
1533 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1534 						     DMA_RESV_USAGE_KERNEL,
1535 						     ctx.interruptible,
1536 						     MAX_SCHEDULE_TIMEOUT);
1537 
1538 		if (timeout < 0) {
1539 			if (!resv)
1540 				dma_resv_unlock(bo->ttm.base.resv);
1541 			xe_bo_put(bo);
1542 			return ERR_PTR(timeout);
1543 		}
1544 	}
1545 
1546 	bo->created = true;
1547 	if (bulk)
1548 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1549 	else
1550 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1551 
1552 	return bo;
1553 }
1554 
1555 static int __xe_bo_fixed_placement(struct xe_device *xe,
1556 				   struct xe_bo *bo,
1557 				   u32 flags,
1558 				   u64 start, u64 end, u64 size)
1559 {
1560 	struct ttm_place *place = bo->placements;
1561 
1562 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1563 		return -EINVAL;
1564 
1565 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1566 	place->fpfn = start >> PAGE_SHIFT;
1567 	place->lpfn = end >> PAGE_SHIFT;
1568 
1569 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1570 	case XE_BO_FLAG_VRAM0:
1571 		place->mem_type = XE_PL_VRAM0;
1572 		break;
1573 	case XE_BO_FLAG_VRAM1:
1574 		place->mem_type = XE_PL_VRAM1;
1575 		break;
1576 	case XE_BO_FLAG_STOLEN:
1577 		place->mem_type = XE_PL_STOLEN;
1578 		break;
1579 
1580 	default:
1581 		/* 0 or multiple of the above set */
1582 		return -EINVAL;
1583 	}
1584 
1585 	bo->placement = (struct ttm_placement) {
1586 		.num_placement = 1,
1587 		.placement = place,
1588 	};
1589 
1590 	return 0;
1591 }
1592 
1593 static struct xe_bo *
1594 __xe_bo_create_locked(struct xe_device *xe,
1595 		      struct xe_tile *tile, struct xe_vm *vm,
1596 		      size_t size, u64 start, u64 end,
1597 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
1598 		      u64 alignment)
1599 {
1600 	struct xe_bo *bo = NULL;
1601 	int err;
1602 
1603 	if (vm)
1604 		xe_vm_assert_held(vm);
1605 
1606 	if (start || end != ~0ULL) {
1607 		bo = xe_bo_alloc();
1608 		if (IS_ERR(bo))
1609 			return bo;
1610 
1611 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1612 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1613 		if (err) {
1614 			xe_bo_free(bo);
1615 			return ERR_PTR(err);
1616 		}
1617 	}
1618 
1619 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1620 				    vm && !xe_vm_in_fault_mode(vm) &&
1621 				    flags & XE_BO_FLAG_USER ?
1622 				    &vm->lru_bulk_move : NULL, size,
1623 				    cpu_caching, type, flags);
1624 	if (IS_ERR(bo))
1625 		return bo;
1626 
1627 	bo->min_align = alignment;
1628 
1629 	/*
1630 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1631 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1632 	 * will keep a reference to the vm, and avoid circular references
1633 	 * by having all the vm's bo refereferences released at vm close
1634 	 * time.
1635 	 */
1636 	if (vm && xe_bo_is_user(bo))
1637 		xe_vm_get(vm);
1638 	bo->vm = vm;
1639 
1640 	if (bo->flags & XE_BO_FLAG_GGTT) {
1641 		struct xe_tile *t;
1642 		u8 id;
1643 
1644 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
1645 			if (!tile && flags & XE_BO_FLAG_STOLEN)
1646 				tile = xe_device_get_root_tile(xe);
1647 
1648 			xe_assert(xe, tile);
1649 		}
1650 
1651 		for_each_tile(t, xe, id) {
1652 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
1653 				continue;
1654 
1655 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1656 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
1657 							   start + bo->size, U64_MAX);
1658 			} else {
1659 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
1660 			}
1661 			if (err)
1662 				goto err_unlock_put_bo;
1663 		}
1664 	}
1665 
1666 	trace_xe_bo_create(bo);
1667 	return bo;
1668 
1669 err_unlock_put_bo:
1670 	__xe_bo_unset_bulk_move(bo);
1671 	xe_bo_unlock_vm_held(bo);
1672 	xe_bo_put(bo);
1673 	return ERR_PTR(err);
1674 }
1675 
1676 struct xe_bo *
1677 xe_bo_create_locked_range(struct xe_device *xe,
1678 			  struct xe_tile *tile, struct xe_vm *vm,
1679 			  size_t size, u64 start, u64 end,
1680 			  enum ttm_bo_type type, u32 flags, u64 alignment)
1681 {
1682 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
1683 				     flags, alignment);
1684 }
1685 
1686 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1687 				  struct xe_vm *vm, size_t size,
1688 				  enum ttm_bo_type type, u32 flags)
1689 {
1690 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
1691 				     flags, 0);
1692 }
1693 
1694 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1695 				struct xe_vm *vm, size_t size,
1696 				u16 cpu_caching,
1697 				u32 flags)
1698 {
1699 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1700 						 cpu_caching, ttm_bo_type_device,
1701 						 flags | XE_BO_FLAG_USER, 0);
1702 	if (!IS_ERR(bo))
1703 		xe_bo_unlock_vm_held(bo);
1704 
1705 	return bo;
1706 }
1707 
1708 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1709 			   struct xe_vm *vm, size_t size,
1710 			   enum ttm_bo_type type, u32 flags)
1711 {
1712 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1713 
1714 	if (!IS_ERR(bo))
1715 		xe_bo_unlock_vm_held(bo);
1716 
1717 	return bo;
1718 }
1719 
1720 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1721 				      struct xe_vm *vm,
1722 				      size_t size, u64 offset,
1723 				      enum ttm_bo_type type, u32 flags)
1724 {
1725 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
1726 					       type, flags, 0);
1727 }
1728 
1729 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
1730 					      struct xe_tile *tile,
1731 					      struct xe_vm *vm,
1732 					      size_t size, u64 offset,
1733 					      enum ttm_bo_type type, u32 flags,
1734 					      u64 alignment)
1735 {
1736 	struct xe_bo *bo;
1737 	int err;
1738 	u64 start = offset == ~0ull ? 0 : offset;
1739 	u64 end = offset == ~0ull ? offset : start + size;
1740 
1741 	if (flags & XE_BO_FLAG_STOLEN &&
1742 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1743 		flags |= XE_BO_FLAG_GGTT;
1744 
1745 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1746 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
1747 				       alignment);
1748 	if (IS_ERR(bo))
1749 		return bo;
1750 
1751 	err = xe_bo_pin(bo);
1752 	if (err)
1753 		goto err_put;
1754 
1755 	err = xe_bo_vmap(bo);
1756 	if (err)
1757 		goto err_unpin;
1758 
1759 	xe_bo_unlock_vm_held(bo);
1760 
1761 	return bo;
1762 
1763 err_unpin:
1764 	xe_bo_unpin(bo);
1765 err_put:
1766 	xe_bo_unlock_vm_held(bo);
1767 	xe_bo_put(bo);
1768 	return ERR_PTR(err);
1769 }
1770 
1771 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1772 				   struct xe_vm *vm, size_t size,
1773 				   enum ttm_bo_type type, u32 flags)
1774 {
1775 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1776 }
1777 
1778 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1779 				     const void *data, size_t size,
1780 				     enum ttm_bo_type type, u32 flags)
1781 {
1782 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1783 						ALIGN(size, PAGE_SIZE),
1784 						type, flags);
1785 	if (IS_ERR(bo))
1786 		return bo;
1787 
1788 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1789 
1790 	return bo;
1791 }
1792 
1793 static void __xe_bo_unpin_map_no_vm(void *arg)
1794 {
1795 	xe_bo_unpin_map_no_vm(arg);
1796 }
1797 
1798 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1799 					   size_t size, u32 flags)
1800 {
1801 	struct xe_bo *bo;
1802 	int ret;
1803 
1804 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
1805 
1806 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1807 	if (IS_ERR(bo))
1808 		return bo;
1809 
1810 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
1811 	if (ret)
1812 		return ERR_PTR(ret);
1813 
1814 	return bo;
1815 }
1816 
1817 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1818 					     const void *data, size_t size, u32 flags)
1819 {
1820 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1821 
1822 	if (IS_ERR(bo))
1823 		return bo;
1824 
1825 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1826 
1827 	return bo;
1828 }
1829 
1830 /**
1831  * xe_managed_bo_reinit_in_vram
1832  * @xe: xe device
1833  * @tile: Tile where the new buffer will be created
1834  * @src: Managed buffer object allocated in system memory
1835  *
1836  * Replace a managed src buffer object allocated in system memory with a new
1837  * one allocated in vram, copying the data between them.
1838  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1839  * is responsible for making sure that any old references to it are updated.
1840  *
1841  * Returns 0 for success, negative error code otherwise.
1842  */
1843 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1844 {
1845 	struct xe_bo *bo;
1846 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1847 
1848 	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1849 
1850 	xe_assert(xe, IS_DGFX(xe));
1851 	xe_assert(xe, !(*src)->vmap.is_iomem);
1852 
1853 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1854 					    (*src)->size, dst_flags);
1855 	if (IS_ERR(bo))
1856 		return PTR_ERR(bo);
1857 
1858 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
1859 	*src = bo;
1860 
1861 	return 0;
1862 }
1863 
1864 /*
1865  * XXX: This is in the VM bind data path, likely should calculate this once and
1866  * store, with a recalculation if the BO is moved.
1867  */
1868 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1869 {
1870 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1871 
1872 	if (res->mem_type == XE_PL_STOLEN)
1873 		return xe_ttm_stolen_gpu_offset(xe);
1874 
1875 	return res_to_mem_region(res)->dpa_base;
1876 }
1877 
1878 /**
1879  * xe_bo_pin_external - pin an external BO
1880  * @bo: buffer object to be pinned
1881  *
1882  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1883  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1884  * asserts and code to ensure evict / restore on suspend / resume.
1885  *
1886  * Returns 0 for success, negative error code otherwise.
1887  */
1888 int xe_bo_pin_external(struct xe_bo *bo)
1889 {
1890 	struct xe_device *xe = xe_bo_device(bo);
1891 	int err;
1892 
1893 	xe_assert(xe, !bo->vm);
1894 	xe_assert(xe, xe_bo_is_user(bo));
1895 
1896 	if (!xe_bo_is_pinned(bo)) {
1897 		err = xe_bo_validate(bo, NULL, false);
1898 		if (err)
1899 			return err;
1900 
1901 		if (xe_bo_is_vram(bo)) {
1902 			spin_lock(&xe->pinned.lock);
1903 			list_add_tail(&bo->pinned_link,
1904 				      &xe->pinned.external_vram);
1905 			spin_unlock(&xe->pinned.lock);
1906 		}
1907 	}
1908 
1909 	ttm_bo_pin(&bo->ttm);
1910 
1911 	/*
1912 	 * FIXME: If we always use the reserve / unreserve functions for locking
1913 	 * we do not need this.
1914 	 */
1915 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1916 
1917 	return 0;
1918 }
1919 
1920 int xe_bo_pin(struct xe_bo *bo)
1921 {
1922 	struct ttm_place *place = &bo->placements[0];
1923 	struct xe_device *xe = xe_bo_device(bo);
1924 	int err;
1925 
1926 	/* We currently don't expect user BO to be pinned */
1927 	xe_assert(xe, !xe_bo_is_user(bo));
1928 
1929 	/* Pinned object must be in GGTT or have pinned flag */
1930 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
1931 				   XE_BO_FLAG_GGTT));
1932 
1933 	/*
1934 	 * No reason we can't support pinning imported dma-bufs we just don't
1935 	 * expect to pin an imported dma-buf.
1936 	 */
1937 	xe_assert(xe, !bo->ttm.base.import_attach);
1938 
1939 	/* We only expect at most 1 pin */
1940 	xe_assert(xe, !xe_bo_is_pinned(bo));
1941 
1942 	err = xe_bo_validate(bo, NULL, false);
1943 	if (err)
1944 		return err;
1945 
1946 	/*
1947 	 * For pinned objects in on DGFX, which are also in vram, we expect
1948 	 * these to be in contiguous VRAM memory. Required eviction / restore
1949 	 * during suspend / resume (force restore to same physical address).
1950 	 */
1951 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1952 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1953 		if (mem_type_is_vram(place->mem_type)) {
1954 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1955 
1956 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1957 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1958 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1959 		}
1960 	}
1961 
1962 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1963 		spin_lock(&xe->pinned.lock);
1964 		list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1965 		spin_unlock(&xe->pinned.lock);
1966 	}
1967 
1968 	ttm_bo_pin(&bo->ttm);
1969 
1970 	/*
1971 	 * FIXME: If we always use the reserve / unreserve functions for locking
1972 	 * we do not need this.
1973 	 */
1974 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1975 
1976 	return 0;
1977 }
1978 
1979 /**
1980  * xe_bo_unpin_external - unpin an external BO
1981  * @bo: buffer object to be unpinned
1982  *
1983  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1984  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1985  * asserts and code to ensure evict / restore on suspend / resume.
1986  *
1987  * Returns 0 for success, negative error code otherwise.
1988  */
1989 void xe_bo_unpin_external(struct xe_bo *bo)
1990 {
1991 	struct xe_device *xe = xe_bo_device(bo);
1992 
1993 	xe_assert(xe, !bo->vm);
1994 	xe_assert(xe, xe_bo_is_pinned(bo));
1995 	xe_assert(xe, xe_bo_is_user(bo));
1996 
1997 	spin_lock(&xe->pinned.lock);
1998 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
1999 		list_del_init(&bo->pinned_link);
2000 	spin_unlock(&xe->pinned.lock);
2001 
2002 	ttm_bo_unpin(&bo->ttm);
2003 
2004 	/*
2005 	 * FIXME: If we always use the reserve / unreserve functions for locking
2006 	 * we do not need this.
2007 	 */
2008 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2009 }
2010 
2011 void xe_bo_unpin(struct xe_bo *bo)
2012 {
2013 	struct ttm_place *place = &bo->placements[0];
2014 	struct xe_device *xe = xe_bo_device(bo);
2015 
2016 	xe_assert(xe, !bo->ttm.base.import_attach);
2017 	xe_assert(xe, xe_bo_is_pinned(bo));
2018 
2019 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2020 		spin_lock(&xe->pinned.lock);
2021 		xe_assert(xe, !list_empty(&bo->pinned_link));
2022 		list_del_init(&bo->pinned_link);
2023 		spin_unlock(&xe->pinned.lock);
2024 	}
2025 	ttm_bo_unpin(&bo->ttm);
2026 }
2027 
2028 /**
2029  * xe_bo_validate() - Make sure the bo is in an allowed placement
2030  * @bo: The bo,
2031  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2032  *      NULL. Used together with @allow_res_evict.
2033  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2034  *                   reservation object.
2035  *
2036  * Make sure the bo is in allowed placement, migrating it if necessary. If
2037  * needed, other bos will be evicted. If bos selected for eviction shares
2038  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2039  * set to true, otherwise they will be bypassed.
2040  *
2041  * Return: 0 on success, negative error code on failure. May return
2042  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2043  */
2044 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2045 {
2046 	struct ttm_operation_ctx ctx = {
2047 		.interruptible = true,
2048 		.no_wait_gpu = false,
2049 		.gfp_retry_mayfail = true,
2050 	};
2051 
2052 	if (vm) {
2053 		lockdep_assert_held(&vm->lock);
2054 		xe_vm_assert_held(vm);
2055 
2056 		ctx.allow_res_evict = allow_res_evict;
2057 		ctx.resv = xe_vm_resv(vm);
2058 	}
2059 
2060 	trace_xe_bo_validate(bo);
2061 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2062 }
2063 
2064 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2065 {
2066 	if (bo->destroy == &xe_ttm_bo_destroy)
2067 		return true;
2068 
2069 	return false;
2070 }
2071 
2072 /*
2073  * Resolve a BO address. There is no assert to check if the proper lock is held
2074  * so it should only be used in cases where it is not fatal to get the wrong
2075  * address, such as printing debug information, but not in cases where memory is
2076  * written based on this result.
2077  */
2078 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2079 {
2080 	struct xe_device *xe = xe_bo_device(bo);
2081 	struct xe_res_cursor cur;
2082 	u64 page;
2083 
2084 	xe_assert(xe, page_size <= PAGE_SIZE);
2085 	page = offset >> PAGE_SHIFT;
2086 	offset &= (PAGE_SIZE - 1);
2087 
2088 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2089 		xe_assert(xe, bo->ttm.ttm);
2090 
2091 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2092 				page_size, &cur);
2093 		return xe_res_dma(&cur) + offset;
2094 	} else {
2095 		struct xe_res_cursor cur;
2096 
2097 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2098 			     page_size, &cur);
2099 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2100 	}
2101 }
2102 
2103 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2104 {
2105 	if (!READ_ONCE(bo->ttm.pin_count))
2106 		xe_bo_assert_held(bo);
2107 	return __xe_bo_addr(bo, offset, page_size);
2108 }
2109 
2110 int xe_bo_vmap(struct xe_bo *bo)
2111 {
2112 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2113 	void *virtual;
2114 	bool is_iomem;
2115 	int ret;
2116 
2117 	xe_bo_assert_held(bo);
2118 
2119 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2120 			!force_contiguous(bo->flags)))
2121 		return -EINVAL;
2122 
2123 	if (!iosys_map_is_null(&bo->vmap))
2124 		return 0;
2125 
2126 	/*
2127 	 * We use this more or less deprecated interface for now since
2128 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2129 	 * single page bos, which is done here.
2130 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2131 	 * to use struct iosys_map.
2132 	 */
2133 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
2134 	if (ret)
2135 		return ret;
2136 
2137 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2138 	if (is_iomem)
2139 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2140 	else
2141 		iosys_map_set_vaddr(&bo->vmap, virtual);
2142 
2143 	return 0;
2144 }
2145 
2146 static void __xe_bo_vunmap(struct xe_bo *bo)
2147 {
2148 	if (!iosys_map_is_null(&bo->vmap)) {
2149 		iosys_map_clear(&bo->vmap);
2150 		ttm_bo_kunmap(&bo->kmap);
2151 	}
2152 }
2153 
2154 void xe_bo_vunmap(struct xe_bo *bo)
2155 {
2156 	xe_bo_assert_held(bo);
2157 	__xe_bo_vunmap(bo);
2158 }
2159 
2160 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
2161 {
2162 	if (value == DRM_XE_PXP_TYPE_NONE)
2163 		return 0;
2164 
2165 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
2166 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
2167 		return -EINVAL;
2168 
2169 	return xe_pxp_key_assign(xe->pxp, bo);
2170 }
2171 
2172 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
2173 					     struct xe_bo *bo,
2174 					     u64 value);
2175 
2176 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
2177 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type,
2178 };
2179 
2180 static int gem_create_user_ext_set_property(struct xe_device *xe,
2181 					    struct xe_bo *bo,
2182 					    u64 extension)
2183 {
2184 	u64 __user *address = u64_to_user_ptr(extension);
2185 	struct drm_xe_ext_set_property ext;
2186 	int err;
2187 	u32 idx;
2188 
2189 	err = __copy_from_user(&ext, address, sizeof(ext));
2190 	if (XE_IOCTL_DBG(xe, err))
2191 		return -EFAULT;
2192 
2193 	if (XE_IOCTL_DBG(xe, ext.property >=
2194 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
2195 	    XE_IOCTL_DBG(xe, ext.pad) ||
2196 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
2197 		return -EINVAL;
2198 
2199 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
2200 	if (!gem_create_set_property_funcs[idx])
2201 		return -EINVAL;
2202 
2203 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
2204 }
2205 
2206 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
2207 					       struct xe_bo *bo,
2208 					       u64 extension);
2209 
2210 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
2211 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
2212 };
2213 
2214 #define MAX_USER_EXTENSIONS	16
2215 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
2216 				      u64 extensions, int ext_number)
2217 {
2218 	u64 __user *address = u64_to_user_ptr(extensions);
2219 	struct drm_xe_user_extension ext;
2220 	int err;
2221 	u32 idx;
2222 
2223 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
2224 		return -E2BIG;
2225 
2226 	err = __copy_from_user(&ext, address, sizeof(ext));
2227 	if (XE_IOCTL_DBG(xe, err))
2228 		return -EFAULT;
2229 
2230 	if (XE_IOCTL_DBG(xe, ext.pad) ||
2231 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
2232 		return -EINVAL;
2233 
2234 	idx = array_index_nospec(ext.name,
2235 				 ARRAY_SIZE(gem_create_user_extension_funcs));
2236 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
2237 	if (XE_IOCTL_DBG(xe, err))
2238 		return err;
2239 
2240 	if (ext.next_extension)
2241 		return gem_create_user_extensions(xe, bo, ext.next_extension,
2242 						  ++ext_number);
2243 
2244 	return 0;
2245 }
2246 
2247 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2248 			struct drm_file *file)
2249 {
2250 	struct xe_device *xe = to_xe_device(dev);
2251 	struct xe_file *xef = to_xe_file(file);
2252 	struct drm_xe_gem_create *args = data;
2253 	struct xe_vm *vm = NULL;
2254 	struct xe_bo *bo;
2255 	unsigned int bo_flags;
2256 	u32 handle;
2257 	int err;
2258 
2259 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2260 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2261 		return -EINVAL;
2262 
2263 	/* at least one valid memory placement must be specified */
2264 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2265 			 !args->placement))
2266 		return -EINVAL;
2267 
2268 	if (XE_IOCTL_DBG(xe, args->flags &
2269 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2270 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2271 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2272 		return -EINVAL;
2273 
2274 	if (XE_IOCTL_DBG(xe, args->handle))
2275 		return -EINVAL;
2276 
2277 	if (XE_IOCTL_DBG(xe, !args->size))
2278 		return -EINVAL;
2279 
2280 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2281 		return -EINVAL;
2282 
2283 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2284 		return -EINVAL;
2285 
2286 	bo_flags = 0;
2287 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2288 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2289 
2290 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2291 		bo_flags |= XE_BO_FLAG_SCANOUT;
2292 
2293 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2294 
2295 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
2296 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2297 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
2298 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2299 	    IS_ALIGNED(args->size, SZ_64K))
2300 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
2301 
2302 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2303 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2304 			return -EINVAL;
2305 
2306 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2307 	}
2308 
2309 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2310 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2311 		return -EINVAL;
2312 
2313 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2314 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2315 		return -EINVAL;
2316 
2317 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2318 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2319 		return -EINVAL;
2320 
2321 	if (args->vm_id) {
2322 		vm = xe_vm_lookup(xef, args->vm_id);
2323 		if (XE_IOCTL_DBG(xe, !vm))
2324 			return -ENOENT;
2325 		err = xe_vm_lock(vm, true);
2326 		if (err)
2327 			goto out_vm;
2328 	}
2329 
2330 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2331 			       bo_flags);
2332 
2333 	if (vm)
2334 		xe_vm_unlock(vm);
2335 
2336 	if (IS_ERR(bo)) {
2337 		err = PTR_ERR(bo);
2338 		goto out_vm;
2339 	}
2340 
2341 	if (args->extensions) {
2342 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
2343 		if (err)
2344 			goto out_bulk;
2345 	}
2346 
2347 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2348 	if (err)
2349 		goto out_bulk;
2350 
2351 	args->handle = handle;
2352 	goto out_put;
2353 
2354 out_bulk:
2355 	if (vm && !xe_vm_in_fault_mode(vm)) {
2356 		xe_vm_lock(vm, false);
2357 		__xe_bo_unset_bulk_move(bo);
2358 		xe_vm_unlock(vm);
2359 	}
2360 out_put:
2361 	xe_bo_put(bo);
2362 out_vm:
2363 	if (vm)
2364 		xe_vm_put(vm);
2365 
2366 	return err;
2367 }
2368 
2369 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2370 			     struct drm_file *file)
2371 {
2372 	struct xe_device *xe = to_xe_device(dev);
2373 	struct drm_xe_gem_mmap_offset *args = data;
2374 	struct drm_gem_object *gem_obj;
2375 
2376 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2377 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2378 		return -EINVAL;
2379 
2380 	if (XE_IOCTL_DBG(xe, args->flags &
2381 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
2382 		return -EINVAL;
2383 
2384 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
2385 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
2386 			return -EINVAL;
2387 
2388 		if (XE_IOCTL_DBG(xe, args->handle))
2389 			return -EINVAL;
2390 
2391 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
2392 			return -EINVAL;
2393 
2394 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
2395 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
2396 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
2397 		return 0;
2398 	}
2399 
2400 	gem_obj = drm_gem_object_lookup(file, args->handle);
2401 	if (XE_IOCTL_DBG(xe, !gem_obj))
2402 		return -ENOENT;
2403 
2404 	/* The mmap offset was set up at BO allocation time. */
2405 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2406 
2407 	xe_bo_put(gem_to_xe_bo(gem_obj));
2408 	return 0;
2409 }
2410 
2411 /**
2412  * xe_bo_lock() - Lock the buffer object's dma_resv object
2413  * @bo: The struct xe_bo whose lock is to be taken
2414  * @intr: Whether to perform any wait interruptible
2415  *
2416  * Locks the buffer object's dma_resv object. If the buffer object is
2417  * pointing to a shared dma_resv object, that shared lock is locked.
2418  *
2419  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2420  * contended lock was interrupted. If @intr is set to false, the
2421  * function always returns 0.
2422  */
2423 int xe_bo_lock(struct xe_bo *bo, bool intr)
2424 {
2425 	if (intr)
2426 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2427 
2428 	dma_resv_lock(bo->ttm.base.resv, NULL);
2429 
2430 	return 0;
2431 }
2432 
2433 /**
2434  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2435  * @bo: The struct xe_bo whose lock is to be released.
2436  *
2437  * Unlock a buffer object lock that was locked by xe_bo_lock().
2438  */
2439 void xe_bo_unlock(struct xe_bo *bo)
2440 {
2441 	dma_resv_unlock(bo->ttm.base.resv);
2442 }
2443 
2444 /**
2445  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2446  * @bo: The buffer object to migrate
2447  * @mem_type: The TTM memory type intended to migrate to
2448  *
2449  * Check whether the buffer object supports migration to the
2450  * given memory type. Note that pinning may affect the ability to migrate as
2451  * returned by this function.
2452  *
2453  * This function is primarily intended as a helper for checking the
2454  * possibility to migrate buffer objects and can be called without
2455  * the object lock held.
2456  *
2457  * Return: true if migration is possible, false otherwise.
2458  */
2459 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2460 {
2461 	unsigned int cur_place;
2462 
2463 	if (bo->ttm.type == ttm_bo_type_kernel)
2464 		return true;
2465 
2466 	if (bo->ttm.type == ttm_bo_type_sg)
2467 		return false;
2468 
2469 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2470 	     cur_place++) {
2471 		if (bo->placements[cur_place].mem_type == mem_type)
2472 			return true;
2473 	}
2474 
2475 	return false;
2476 }
2477 
2478 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2479 {
2480 	memset(place, 0, sizeof(*place));
2481 	place->mem_type = mem_type;
2482 }
2483 
2484 /**
2485  * xe_bo_migrate - Migrate an object to the desired region id
2486  * @bo: The buffer object to migrate.
2487  * @mem_type: The TTM region type to migrate to.
2488  *
2489  * Attempt to migrate the buffer object to the desired memory region. The
2490  * buffer object may not be pinned, and must be locked.
2491  * On successful completion, the object memory type will be updated,
2492  * but an async migration task may not have completed yet, and to
2493  * accomplish that, the object's kernel fences must be signaled with
2494  * the object lock held.
2495  *
2496  * Return: 0 on success. Negative error code on failure. In particular may
2497  * return -EINTR or -ERESTARTSYS if signal pending.
2498  */
2499 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2500 {
2501 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2502 	struct ttm_operation_ctx ctx = {
2503 		.interruptible = true,
2504 		.no_wait_gpu = false,
2505 		.gfp_retry_mayfail = true,
2506 	};
2507 	struct ttm_placement placement;
2508 	struct ttm_place requested;
2509 
2510 	xe_bo_assert_held(bo);
2511 
2512 	if (bo->ttm.resource->mem_type == mem_type)
2513 		return 0;
2514 
2515 	if (xe_bo_is_pinned(bo))
2516 		return -EBUSY;
2517 
2518 	if (!xe_bo_can_migrate(bo, mem_type))
2519 		return -EINVAL;
2520 
2521 	xe_place_from_ttm_type(mem_type, &requested);
2522 	placement.num_placement = 1;
2523 	placement.placement = &requested;
2524 
2525 	/*
2526 	 * Stolen needs to be handled like below VRAM handling if we ever need
2527 	 * to support it.
2528 	 */
2529 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2530 
2531 	if (mem_type_is_vram(mem_type)) {
2532 		u32 c = 0;
2533 
2534 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2535 	}
2536 
2537 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2538 }
2539 
2540 /**
2541  * xe_bo_evict - Evict an object to evict placement
2542  * @bo: The buffer object to migrate.
2543  * @force_alloc: Set force_alloc in ttm_operation_ctx
2544  *
2545  * On successful completion, the object memory will be moved to evict
2546  * placement. This function blocks until the object has been fully moved.
2547  *
2548  * Return: 0 on success. Negative error code on failure.
2549  */
2550 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2551 {
2552 	struct ttm_operation_ctx ctx = {
2553 		.interruptible = false,
2554 		.no_wait_gpu = false,
2555 		.force_alloc = force_alloc,
2556 		.gfp_retry_mayfail = true,
2557 	};
2558 	struct ttm_placement placement;
2559 	int ret;
2560 
2561 	xe_evict_flags(&bo->ttm, &placement);
2562 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2563 	if (ret)
2564 		return ret;
2565 
2566 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2567 			      false, MAX_SCHEDULE_TIMEOUT);
2568 
2569 	return 0;
2570 }
2571 
2572 /**
2573  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2574  * placed in system memory.
2575  * @bo: The xe_bo
2576  *
2577  * Return: true if extra pages need to be allocated, false otherwise.
2578  */
2579 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2580 {
2581 	struct xe_device *xe = xe_bo_device(bo);
2582 
2583 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2584 		return false;
2585 
2586 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2587 		return false;
2588 
2589 	/* On discrete GPUs, if the GPU can access this buffer from
2590 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2591 	 * can't be used since there's no CCS storage associated with
2592 	 * non-VRAM addresses.
2593 	 */
2594 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2595 		return false;
2596 
2597 	return true;
2598 }
2599 
2600 /**
2601  * __xe_bo_release_dummy() - Dummy kref release function
2602  * @kref: The embedded struct kref.
2603  *
2604  * Dummy release function for xe_bo_put_deferred(). Keep off.
2605  */
2606 void __xe_bo_release_dummy(struct kref *kref)
2607 {
2608 }
2609 
2610 /**
2611  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2612  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2613  *
2614  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2615  * The @deferred list can be either an onstack local list or a global
2616  * shared list used by a workqueue.
2617  */
2618 void xe_bo_put_commit(struct llist_head *deferred)
2619 {
2620 	struct llist_node *freed;
2621 	struct xe_bo *bo, *next;
2622 
2623 	if (!deferred)
2624 		return;
2625 
2626 	freed = llist_del_all(deferred);
2627 	if (!freed)
2628 		return;
2629 
2630 	llist_for_each_entry_safe(bo, next, freed, freed)
2631 		drm_gem_object_free(&bo->ttm.base.refcount);
2632 }
2633 
2634 void xe_bo_put(struct xe_bo *bo)
2635 {
2636 	struct xe_tile *tile;
2637 	u8 id;
2638 
2639 	might_sleep();
2640 	if (bo) {
2641 #ifdef CONFIG_PROC_FS
2642 		if (bo->client)
2643 			might_lock(&bo->client->bos_lock);
2644 #endif
2645 		for_each_tile(tile, xe_bo_device(bo), id)
2646 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
2647 				might_lock(&bo->ggtt_node[id]->ggtt->lock);
2648 		drm_gem_object_put(&bo->ttm.base);
2649 	}
2650 }
2651 
2652 /**
2653  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2654  * @file_priv: ...
2655  * @dev: ...
2656  * @args: ...
2657  *
2658  * See dumb_create() hook in include/drm/drm_drv.h
2659  *
2660  * Return: ...
2661  */
2662 int xe_bo_dumb_create(struct drm_file *file_priv,
2663 		      struct drm_device *dev,
2664 		      struct drm_mode_create_dumb *args)
2665 {
2666 	struct xe_device *xe = to_xe_device(dev);
2667 	struct xe_bo *bo;
2668 	uint32_t handle;
2669 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2670 	int err;
2671 	u32 page_size = max_t(u32, PAGE_SIZE,
2672 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2673 
2674 	args->pitch = ALIGN(args->width * cpp, 64);
2675 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2676 			   page_size);
2677 
2678 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2679 			       DRM_XE_GEM_CPU_CACHING_WC,
2680 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2681 			       XE_BO_FLAG_SCANOUT |
2682 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
2683 	if (IS_ERR(bo))
2684 		return PTR_ERR(bo);
2685 
2686 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2687 	/* drop reference from allocate - handle holds it now */
2688 	drm_gem_object_put(&bo->ttm.base);
2689 	if (!err)
2690 		args->handle = handle;
2691 	return err;
2692 }
2693 
2694 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2695 {
2696 	struct ttm_buffer_object *tbo = &bo->ttm;
2697 	struct ttm_device *bdev = tbo->bdev;
2698 
2699 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2700 
2701 	list_del_init(&bo->vram_userfault_link);
2702 }
2703 
2704 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2705 #include "tests/xe_bo.c"
2706 #endif
2707