xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 78c3925c048c752334873f56c3a3d1c9d53e0416)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <drm/xe_drm.h>
17 
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt.h"
23 #include "xe_map.h"
24 #include "xe_migrate.h"
25 #include "xe_preempt_fence.h"
26 #include "xe_res_cursor.h"
27 #include "xe_trace.h"
28 #include "xe_ttm_stolen_mgr.h"
29 #include "xe_vm.h"
30 
31 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
32 	[XE_PL_SYSTEM] = "system",
33 	[XE_PL_TT] = "gtt",
34 	[XE_PL_VRAM0] = "vram0",
35 	[XE_PL_VRAM1] = "vram1",
36 	[XE_PL_STOLEN] = "stolen"
37 };
38 
39 static const struct ttm_place sys_placement_flags = {
40 	.fpfn = 0,
41 	.lpfn = 0,
42 	.mem_type = XE_PL_SYSTEM,
43 	.flags = 0,
44 };
45 
46 static struct ttm_placement sys_placement = {
47 	.num_placement = 1,
48 	.placement = &sys_placement_flags,
49 };
50 
51 static const struct ttm_place tt_placement_flags[] = {
52 	{
53 		.fpfn = 0,
54 		.lpfn = 0,
55 		.mem_type = XE_PL_TT,
56 		.flags = TTM_PL_FLAG_DESIRED,
57 	},
58 	{
59 		.fpfn = 0,
60 		.lpfn = 0,
61 		.mem_type = XE_PL_SYSTEM,
62 		.flags = TTM_PL_FLAG_FALLBACK,
63 	}
64 };
65 
66 static struct ttm_placement tt_placement = {
67 	.num_placement = 2,
68 	.placement = tt_placement_flags,
69 };
70 
71 bool mem_type_is_vram(u32 mem_type)
72 {
73 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
74 }
75 
76 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
77 {
78 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
79 }
80 
81 static bool resource_is_vram(struct ttm_resource *res)
82 {
83 	return mem_type_is_vram(res->mem_type);
84 }
85 
86 bool xe_bo_is_vram(struct xe_bo *bo)
87 {
88 	return resource_is_vram(bo->ttm.resource) ||
89 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
90 }
91 
92 bool xe_bo_is_stolen(struct xe_bo *bo)
93 {
94 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
95 }
96 
97 /**
98  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
99  * @bo: The BO
100  *
101  * The stolen memory is accessed through the PCI BAR for both DGFX and some
102  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
103  *
104  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
105  */
106 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
107 {
108 	return xe_bo_is_stolen(bo) &&
109 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
110 }
111 
112 static bool xe_bo_is_user(struct xe_bo *bo)
113 {
114 	return bo->flags & XE_BO_CREATE_USER_BIT;
115 }
116 
117 static struct xe_migrate *
118 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
119 {
120 	struct xe_tile *tile;
121 
122 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
123 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
124 	return tile->migrate;
125 }
126 
127 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
128 {
129 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
130 	struct ttm_resource_manager *mgr;
131 
132 	xe_assert(xe, resource_is_vram(res));
133 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
134 	return to_xe_ttm_vram_mgr(mgr)->vram;
135 }
136 
137 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
138 			   u32 bo_flags, u32 *c)
139 {
140 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
141 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
142 
143 		bo->placements[*c] = (struct ttm_place) {
144 			.mem_type = XE_PL_TT,
145 		};
146 		*c += 1;
147 
148 		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
149 			bo->props.preferred_mem_type = XE_PL_TT;
150 	}
151 }
152 
153 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
154 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
155 {
156 	struct ttm_place place = { .mem_type = mem_type };
157 	struct xe_mem_region *vram;
158 	u64 io_size;
159 
160 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
161 
162 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
163 	xe_assert(xe, vram && vram->usable_size);
164 	io_size = vram->io_size;
165 
166 	/*
167 	 * For eviction / restore on suspend / resume objects
168 	 * pinned in VRAM must be contiguous
169 	 */
170 	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
171 			XE_BO_CREATE_GGTT_BIT))
172 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
173 
174 	if (io_size < vram->usable_size) {
175 		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
176 			place.fpfn = 0;
177 			place.lpfn = io_size >> PAGE_SHIFT;
178 		} else {
179 			place.flags |= TTM_PL_FLAG_TOPDOWN;
180 		}
181 	}
182 	places[*c] = place;
183 	*c += 1;
184 
185 	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
186 		bo->props.preferred_mem_type = mem_type;
187 }
188 
189 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
190 			 u32 bo_flags, u32 *c)
191 {
192 	if (bo->props.preferred_gt == XE_GT1) {
193 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
194 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
195 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
196 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
197 	} else {
198 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
199 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
200 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
201 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
202 	}
203 }
204 
205 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
206 			   u32 bo_flags, u32 *c)
207 {
208 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
209 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
210 
211 		bo->placements[*c] = (struct ttm_place) {
212 			.mem_type = XE_PL_STOLEN,
213 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
214 					     XE_BO_CREATE_GGTT_BIT) ?
215 				TTM_PL_FLAG_CONTIGUOUS : 0,
216 		};
217 		*c += 1;
218 	}
219 }
220 
221 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
222 				       u32 bo_flags)
223 {
224 	u32 c = 0;
225 
226 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
227 
228 	/* The order of placements should indicate preferred location */
229 
230 	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
231 		try_add_system(xe, bo, bo_flags, &c);
232 		try_add_vram(xe, bo, bo_flags, &c);
233 	} else {
234 		try_add_vram(xe, bo, bo_flags, &c);
235 		try_add_system(xe, bo, bo_flags, &c);
236 	}
237 	try_add_stolen(xe, bo, bo_flags, &c);
238 
239 	if (!c)
240 		return -EINVAL;
241 
242 	bo->placement = (struct ttm_placement) {
243 		.num_placement = c,
244 		.placement = bo->placements,
245 	};
246 
247 	return 0;
248 }
249 
250 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
251 			      u32 bo_flags)
252 {
253 	xe_bo_assert_held(bo);
254 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
255 }
256 
257 static void xe_evict_flags(struct ttm_buffer_object *tbo,
258 			   struct ttm_placement *placement)
259 {
260 	if (!xe_bo_is_xe_bo(tbo)) {
261 		/* Don't handle scatter gather BOs */
262 		if (tbo->type == ttm_bo_type_sg) {
263 			placement->num_placement = 0;
264 			return;
265 		}
266 
267 		*placement = sys_placement;
268 		return;
269 	}
270 
271 	/*
272 	 * For xe, sg bos that are evicted to system just triggers a
273 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
274 	 */
275 	switch (tbo->resource->mem_type) {
276 	case XE_PL_VRAM0:
277 	case XE_PL_VRAM1:
278 	case XE_PL_STOLEN:
279 		*placement = tt_placement;
280 		break;
281 	case XE_PL_TT:
282 	default:
283 		*placement = sys_placement;
284 		break;
285 	}
286 }
287 
288 struct xe_ttm_tt {
289 	struct ttm_tt ttm;
290 	struct device *dev;
291 	struct sg_table sgt;
292 	struct sg_table *sg;
293 };
294 
295 static int xe_tt_map_sg(struct ttm_tt *tt)
296 {
297 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
298 	unsigned long num_pages = tt->num_pages;
299 	int ret;
300 
301 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
302 
303 	if (xe_tt->sg)
304 		return 0;
305 
306 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
307 						num_pages, 0,
308 						(u64)num_pages << PAGE_SHIFT,
309 						xe_sg_segment_size(xe_tt->dev),
310 						GFP_KERNEL);
311 	if (ret)
312 		return ret;
313 
314 	xe_tt->sg = &xe_tt->sgt;
315 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
316 			      DMA_ATTR_SKIP_CPU_SYNC);
317 	if (ret) {
318 		sg_free_table(xe_tt->sg);
319 		xe_tt->sg = NULL;
320 		return ret;
321 	}
322 
323 	return 0;
324 }
325 
326 struct sg_table *xe_bo_sg(struct xe_bo *bo)
327 {
328 	struct ttm_tt *tt = bo->ttm.ttm;
329 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
330 
331 	return xe_tt->sg;
332 }
333 
334 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
335 				       u32 page_flags)
336 {
337 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
338 	struct xe_device *xe = xe_bo_device(bo);
339 	struct xe_ttm_tt *tt;
340 	unsigned long extra_pages;
341 	enum ttm_caching caching;
342 	int err;
343 
344 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
345 	if (!tt)
346 		return NULL;
347 
348 	tt->dev = xe->drm.dev;
349 
350 	extra_pages = 0;
351 	if (xe_bo_needs_ccs_pages(bo))
352 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
353 					   PAGE_SIZE);
354 
355 	switch (bo->cpu_caching) {
356 	case DRM_XE_GEM_CPU_CACHING_WC:
357 		caching = ttm_write_combined;
358 		break;
359 	default:
360 		caching = ttm_cached;
361 		break;
362 	}
363 
364 	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
365 
366 	/*
367 	 * Display scanout is always non-coherent with the CPU cache.
368 	 *
369 	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
370 	 * require a CPU:WC mapping.
371 	 */
372 	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
373 	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
374 		caching = ttm_write_combined;
375 
376 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
377 	if (err) {
378 		kfree(tt);
379 		return NULL;
380 	}
381 
382 	return &tt->ttm;
383 }
384 
385 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
386 			      struct ttm_operation_ctx *ctx)
387 {
388 	int err;
389 
390 	/*
391 	 * dma-bufs are not populated with pages, and the dma-
392 	 * addresses are set up when moved to XE_PL_TT.
393 	 */
394 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
395 		return 0;
396 
397 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
398 	if (err)
399 		return err;
400 
401 	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
402 	err = xe_tt_map_sg(tt);
403 	if (err)
404 		ttm_pool_free(&ttm_dev->pool, tt);
405 
406 	return err;
407 }
408 
409 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
410 {
411 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
412 
413 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
414 		return;
415 
416 	if (xe_tt->sg) {
417 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
418 				  DMA_BIDIRECTIONAL, 0);
419 		sg_free_table(xe_tt->sg);
420 		xe_tt->sg = NULL;
421 	}
422 
423 	return ttm_pool_free(&ttm_dev->pool, tt);
424 }
425 
426 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
427 {
428 	ttm_tt_fini(tt);
429 	kfree(tt);
430 }
431 
432 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
433 				 struct ttm_resource *mem)
434 {
435 	struct xe_device *xe = ttm_to_xe_device(bdev);
436 
437 	switch (mem->mem_type) {
438 	case XE_PL_SYSTEM:
439 	case XE_PL_TT:
440 		return 0;
441 	case XE_PL_VRAM0:
442 	case XE_PL_VRAM1: {
443 		struct xe_ttm_vram_mgr_resource *vres =
444 			to_xe_ttm_vram_mgr_resource(mem);
445 		struct xe_mem_region *vram = res_to_mem_region(mem);
446 
447 		if (vres->used_visible_size < mem->size)
448 			return -EINVAL;
449 
450 		mem->bus.offset = mem->start << PAGE_SHIFT;
451 
452 		if (vram->mapping &&
453 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
454 			mem->bus.addr = (u8 __force *)vram->mapping +
455 				mem->bus.offset;
456 
457 		mem->bus.offset += vram->io_start;
458 		mem->bus.is_iomem = true;
459 
460 #if  !defined(CONFIG_X86)
461 		mem->bus.caching = ttm_write_combined;
462 #endif
463 		return 0;
464 	} case XE_PL_STOLEN:
465 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
466 	default:
467 		return -EINVAL;
468 	}
469 }
470 
471 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
472 				const struct ttm_operation_ctx *ctx)
473 {
474 	struct dma_resv_iter cursor;
475 	struct dma_fence *fence;
476 	struct drm_gem_object *obj = &bo->ttm.base;
477 	struct drm_gpuvm_bo *vm_bo;
478 	bool idle = false;
479 	int ret = 0;
480 
481 	dma_resv_assert_held(bo->ttm.base.resv);
482 
483 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
484 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
485 				    DMA_RESV_USAGE_BOOKKEEP);
486 		dma_resv_for_each_fence_unlocked(&cursor, fence)
487 			dma_fence_enable_sw_signaling(fence);
488 		dma_resv_iter_end(&cursor);
489 	}
490 
491 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
492 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
493 		struct drm_gpuva *gpuva;
494 
495 		if (!xe_vm_in_fault_mode(vm)) {
496 			drm_gpuvm_bo_evict(vm_bo, true);
497 			continue;
498 		}
499 
500 		if (!idle) {
501 			long timeout;
502 
503 			if (ctx->no_wait_gpu &&
504 			    !dma_resv_test_signaled(bo->ttm.base.resv,
505 						    DMA_RESV_USAGE_BOOKKEEP))
506 				return -EBUSY;
507 
508 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
509 							DMA_RESV_USAGE_BOOKKEEP,
510 							ctx->interruptible,
511 							MAX_SCHEDULE_TIMEOUT);
512 			if (!timeout)
513 				return -ETIME;
514 			if (timeout < 0)
515 				return timeout;
516 
517 			idle = true;
518 		}
519 
520 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
521 			struct xe_vma *vma = gpuva_to_vma(gpuva);
522 
523 			trace_xe_vma_evict(vma);
524 			ret = xe_vm_invalidate_vma(vma);
525 			if (XE_WARN_ON(ret))
526 				return ret;
527 		}
528 	}
529 
530 	return ret;
531 }
532 
533 /*
534  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
535  * Note that unmapping the attachment is deferred to the next
536  * map_attachment time, or to bo destroy (after idling) whichever comes first.
537  * This is to avoid syncing before unmap_attachment(), assuming that the
538  * caller relies on idling the reservation object before moving the
539  * backing store out. Should that assumption not hold, then we will be able
540  * to unconditionally call unmap_attachment() when moving out to system.
541  */
542 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
543 			     struct ttm_resource *new_res)
544 {
545 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
546 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
547 					       ttm);
548 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
549 	struct sg_table *sg;
550 
551 	xe_assert(xe, attach);
552 	xe_assert(xe, ttm_bo->ttm);
553 
554 	if (new_res->mem_type == XE_PL_SYSTEM)
555 		goto out;
556 
557 	if (ttm_bo->sg) {
558 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
559 		ttm_bo->sg = NULL;
560 	}
561 
562 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
563 	if (IS_ERR(sg))
564 		return PTR_ERR(sg);
565 
566 	ttm_bo->sg = sg;
567 	xe_tt->sg = sg;
568 
569 out:
570 	ttm_bo_move_null(ttm_bo, new_res);
571 
572 	return 0;
573 }
574 
575 /**
576  * xe_bo_move_notify - Notify subsystems of a pending move
577  * @bo: The buffer object
578  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
579  *
580  * This function notifies subsystems of an upcoming buffer move.
581  * Upon receiving such a notification, subsystems should schedule
582  * halting access to the underlying pages and optionally add a fence
583  * to the buffer object's dma_resv object, that signals when access is
584  * stopped. The caller will wait on all dma_resv fences before
585  * starting the move.
586  *
587  * A subsystem may commence access to the object after obtaining
588  * bindings to the new backing memory under the object lock.
589  *
590  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
591  * negative error code on error.
592  */
593 static int xe_bo_move_notify(struct xe_bo *bo,
594 			     const struct ttm_operation_ctx *ctx)
595 {
596 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
597 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
598 	struct ttm_resource *old_mem = ttm_bo->resource;
599 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
600 	int ret;
601 
602 	/*
603 	 * If this starts to call into many components, consider
604 	 * using a notification chain here.
605 	 */
606 
607 	if (xe_bo_is_pinned(bo))
608 		return -EINVAL;
609 
610 	xe_bo_vunmap(bo);
611 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
612 	if (ret)
613 		return ret;
614 
615 	/* Don't call move_notify() for imported dma-bufs. */
616 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
617 		dma_buf_move_notify(ttm_bo->base.dma_buf);
618 
619 	/*
620 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
621 	 * so if we moved from VRAM make sure to unlink this from the userfault
622 	 * tracking.
623 	 */
624 	if (mem_type_is_vram(old_mem_type)) {
625 		mutex_lock(&xe->mem_access.vram_userfault.lock);
626 		if (!list_empty(&bo->vram_userfault_link))
627 			list_del_init(&bo->vram_userfault_link);
628 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
629 	}
630 
631 	return 0;
632 }
633 
634 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
635 		      struct ttm_operation_ctx *ctx,
636 		      struct ttm_resource *new_mem,
637 		      struct ttm_place *hop)
638 {
639 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
640 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
641 	struct ttm_resource *old_mem = ttm_bo->resource;
642 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
643 	struct ttm_tt *ttm = ttm_bo->ttm;
644 	struct xe_migrate *migrate = NULL;
645 	struct dma_fence *fence;
646 	bool move_lacks_source;
647 	bool tt_has_data;
648 	bool needs_clear;
649 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
650 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
651 	int ret = 0;
652 	/* Bo creation path, moving to system or TT. */
653 	if ((!old_mem && ttm) && !handle_system_ccs) {
654 		ttm_bo_move_null(ttm_bo, new_mem);
655 		return 0;
656 	}
657 
658 	if (ttm_bo->type == ttm_bo_type_sg) {
659 		ret = xe_bo_move_notify(bo, ctx);
660 		if (!ret)
661 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
662 		goto out;
663 	}
664 
665 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
666 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
667 
668 	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
669 						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
670 
671 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
672 		(!ttm && ttm_bo->type == ttm_bo_type_device);
673 
674 	if ((move_lacks_source && !needs_clear)) {
675 		ttm_bo_move_null(ttm_bo, new_mem);
676 		goto out;
677 	}
678 
679 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
680 		ttm_bo_move_null(ttm_bo, new_mem);
681 		goto out;
682 	}
683 
684 	/*
685 	 * Failed multi-hop where the old_mem is still marked as
686 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
687 	 */
688 	if (old_mem_type == XE_PL_TT &&
689 	    new_mem->mem_type == XE_PL_TT) {
690 		ttm_bo_move_null(ttm_bo, new_mem);
691 		goto out;
692 	}
693 
694 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
695 		ret = xe_bo_move_notify(bo, ctx);
696 		if (ret)
697 			goto out;
698 	}
699 
700 	if (old_mem_type == XE_PL_TT &&
701 	    new_mem->mem_type == XE_PL_SYSTEM) {
702 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
703 						     DMA_RESV_USAGE_BOOKKEEP,
704 						     true,
705 						     MAX_SCHEDULE_TIMEOUT);
706 		if (timeout < 0) {
707 			ret = timeout;
708 			goto out;
709 		}
710 
711 		if (!handle_system_ccs) {
712 			ttm_bo_move_null(ttm_bo, new_mem);
713 			goto out;
714 		}
715 	}
716 
717 	if (!move_lacks_source &&
718 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
719 	     (mem_type_is_vram(old_mem_type) &&
720 	      new_mem->mem_type == XE_PL_SYSTEM))) {
721 		hop->fpfn = 0;
722 		hop->lpfn = 0;
723 		hop->mem_type = XE_PL_TT;
724 		hop->flags = TTM_PL_FLAG_TEMPORARY;
725 		ret = -EMULTIHOP;
726 		goto out;
727 	}
728 
729 	if (bo->tile)
730 		migrate = bo->tile->migrate;
731 	else if (resource_is_vram(new_mem))
732 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
733 	else if (mem_type_is_vram(old_mem_type))
734 		migrate = mem_type_to_migrate(xe, old_mem_type);
735 	else
736 		migrate = xe->tiles[0].migrate;
737 
738 	xe_assert(xe, migrate);
739 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
740 	xe_device_mem_access_get(xe);
741 
742 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
743 		/*
744 		 * Kernel memory that is pinned should only be moved on suspend
745 		 * / resume, some of the pinned memory is required for the
746 		 * device to resume / use the GPU to move other evicted memory
747 		 * (user memory) around. This likely could be optimized a bit
748 		 * futher where we find the minimum set of pinned memory
749 		 * required for resume but for simplity doing a memcpy for all
750 		 * pinned memory.
751 		 */
752 		ret = xe_bo_vmap(bo);
753 		if (!ret) {
754 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
755 
756 			/* Create a new VMAP once kernel BO back in VRAM */
757 			if (!ret && resource_is_vram(new_mem)) {
758 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
759 				void __iomem *new_addr = vram->mapping +
760 					(new_mem->start << PAGE_SHIFT);
761 
762 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
763 					ret = -EINVAL;
764 					xe_device_mem_access_put(xe);
765 					goto out;
766 				}
767 
768 				xe_assert(xe, new_mem->start ==
769 					  bo->placements->fpfn);
770 
771 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
772 			}
773 		}
774 	} else {
775 		if (move_lacks_source)
776 			fence = xe_migrate_clear(migrate, bo, new_mem);
777 		else
778 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
779 						new_mem, handle_system_ccs);
780 		if (IS_ERR(fence)) {
781 			ret = PTR_ERR(fence);
782 			xe_device_mem_access_put(xe);
783 			goto out;
784 		}
785 		if (!move_lacks_source) {
786 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
787 							true, new_mem);
788 			if (ret) {
789 				dma_fence_wait(fence, false);
790 				ttm_bo_move_null(ttm_bo, new_mem);
791 				ret = 0;
792 			}
793 		} else {
794 			/*
795 			 * ttm_bo_move_accel_cleanup() may blow up if
796 			 * bo->resource == NULL, so just attach the
797 			 * fence and set the new resource.
798 			 */
799 			dma_resv_add_fence(ttm_bo->base.resv, fence,
800 					   DMA_RESV_USAGE_KERNEL);
801 			ttm_bo_move_null(ttm_bo, new_mem);
802 		}
803 
804 		dma_fence_put(fence);
805 	}
806 
807 	xe_device_mem_access_put(xe);
808 
809 out:
810 	return ret;
811 
812 }
813 
814 /**
815  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
816  * @bo: The buffer object to move.
817  *
818  * On successful completion, the object memory will be moved to sytem memory.
819  * This function blocks until the object has been fully moved.
820  *
821  * This is needed to for special handling of pinned VRAM object during
822  * suspend-resume.
823  *
824  * Return: 0 on success. Negative error code on failure.
825  */
826 int xe_bo_evict_pinned(struct xe_bo *bo)
827 {
828 	struct ttm_place place = {
829 		.mem_type = XE_PL_TT,
830 	};
831 	struct ttm_placement placement = {
832 		.placement = &place,
833 		.num_placement = 1,
834 	};
835 	struct ttm_operation_ctx ctx = {
836 		.interruptible = false,
837 	};
838 	struct ttm_resource *new_mem;
839 	int ret;
840 
841 	xe_bo_assert_held(bo);
842 
843 	if (WARN_ON(!bo->ttm.resource))
844 		return -EINVAL;
845 
846 	if (WARN_ON(!xe_bo_is_pinned(bo)))
847 		return -EINVAL;
848 
849 	if (WARN_ON(!xe_bo_is_vram(bo)))
850 		return -EINVAL;
851 
852 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
853 	if (ret)
854 		return ret;
855 
856 	if (!bo->ttm.ttm) {
857 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
858 		if (!bo->ttm.ttm) {
859 			ret = -ENOMEM;
860 			goto err_res_free;
861 		}
862 	}
863 
864 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
865 	if (ret)
866 		goto err_res_free;
867 
868 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
869 	if (ret)
870 		goto err_res_free;
871 
872 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
873 	if (ret)
874 		goto err_res_free;
875 
876 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
877 			      false, MAX_SCHEDULE_TIMEOUT);
878 
879 	return 0;
880 
881 err_res_free:
882 	ttm_resource_free(&bo->ttm, &new_mem);
883 	return ret;
884 }
885 
886 /**
887  * xe_bo_restore_pinned() - Restore a pinned VRAM object
888  * @bo: The buffer object to move.
889  *
890  * On successful completion, the object memory will be moved back to VRAM.
891  * This function blocks until the object has been fully moved.
892  *
893  * This is needed to for special handling of pinned VRAM object during
894  * suspend-resume.
895  *
896  * Return: 0 on success. Negative error code on failure.
897  */
898 int xe_bo_restore_pinned(struct xe_bo *bo)
899 {
900 	struct ttm_operation_ctx ctx = {
901 		.interruptible = false,
902 	};
903 	struct ttm_resource *new_mem;
904 	int ret;
905 
906 	xe_bo_assert_held(bo);
907 
908 	if (WARN_ON(!bo->ttm.resource))
909 		return -EINVAL;
910 
911 	if (WARN_ON(!xe_bo_is_pinned(bo)))
912 		return -EINVAL;
913 
914 	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
915 		return -EINVAL;
916 
917 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
918 	if (ret)
919 		return ret;
920 
921 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
922 	if (ret)
923 		goto err_res_free;
924 
925 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
926 	if (ret)
927 		goto err_res_free;
928 
929 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
930 	if (ret)
931 		goto err_res_free;
932 
933 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
934 			      false, MAX_SCHEDULE_TIMEOUT);
935 
936 	return 0;
937 
938 err_res_free:
939 	ttm_resource_free(&bo->ttm, &new_mem);
940 	return ret;
941 }
942 
943 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
944 				       unsigned long page_offset)
945 {
946 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
947 	struct xe_res_cursor cursor;
948 	struct xe_mem_region *vram;
949 
950 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
951 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
952 
953 	vram = res_to_mem_region(ttm_bo->resource);
954 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
955 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
956 }
957 
958 static void __xe_bo_vunmap(struct xe_bo *bo);
959 
960 /*
961  * TODO: Move this function to TTM so we don't rely on how TTM does its
962  * locking, thereby abusing TTM internals.
963  */
964 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
965 {
966 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
967 	bool locked;
968 
969 	xe_assert(xe, !kref_read(&ttm_bo->kref));
970 
971 	/*
972 	 * We can typically only race with TTM trylocking under the
973 	 * lru_lock, which will immediately be unlocked again since
974 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
975 	 * always succeed here, as long as we hold the lru lock.
976 	 */
977 	spin_lock(&ttm_bo->bdev->lru_lock);
978 	locked = dma_resv_trylock(ttm_bo->base.resv);
979 	spin_unlock(&ttm_bo->bdev->lru_lock);
980 	xe_assert(xe, locked);
981 
982 	return locked;
983 }
984 
985 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
986 {
987 	struct dma_resv_iter cursor;
988 	struct dma_fence *fence;
989 	struct dma_fence *replacement = NULL;
990 	struct xe_bo *bo;
991 
992 	if (!xe_bo_is_xe_bo(ttm_bo))
993 		return;
994 
995 	bo = ttm_to_xe_bo(ttm_bo);
996 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
997 
998 	/*
999 	 * Corner case where TTM fails to allocate memory and this BOs resv
1000 	 * still points the VMs resv
1001 	 */
1002 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1003 		return;
1004 
1005 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1006 		return;
1007 
1008 	/*
1009 	 * Scrub the preempt fences if any. The unbind fence is already
1010 	 * attached to the resv.
1011 	 * TODO: Don't do this for external bos once we scrub them after
1012 	 * unbind.
1013 	 */
1014 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1015 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1016 		if (xe_fence_is_xe_preempt(fence) &&
1017 		    !dma_fence_is_signaled(fence)) {
1018 			if (!replacement)
1019 				replacement = dma_fence_get_stub();
1020 
1021 			dma_resv_replace_fences(ttm_bo->base.resv,
1022 						fence->context,
1023 						replacement,
1024 						DMA_RESV_USAGE_BOOKKEEP);
1025 		}
1026 	}
1027 	dma_fence_put(replacement);
1028 
1029 	dma_resv_unlock(ttm_bo->base.resv);
1030 }
1031 
1032 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1033 {
1034 	if (!xe_bo_is_xe_bo(ttm_bo))
1035 		return;
1036 
1037 	/*
1038 	 * Object is idle and about to be destroyed. Release the
1039 	 * dma-buf attachment.
1040 	 */
1041 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1042 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1043 						       struct xe_ttm_tt, ttm);
1044 
1045 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1046 					 DMA_BIDIRECTIONAL);
1047 		ttm_bo->sg = NULL;
1048 		xe_tt->sg = NULL;
1049 	}
1050 }
1051 
1052 const struct ttm_device_funcs xe_ttm_funcs = {
1053 	.ttm_tt_create = xe_ttm_tt_create,
1054 	.ttm_tt_populate = xe_ttm_tt_populate,
1055 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1056 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1057 	.evict_flags = xe_evict_flags,
1058 	.move = xe_bo_move,
1059 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1060 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1061 	.release_notify = xe_ttm_bo_release_notify,
1062 	.eviction_valuable = ttm_bo_eviction_valuable,
1063 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1064 };
1065 
1066 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1067 {
1068 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1069 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1070 
1071 	if (bo->ttm.base.import_attach)
1072 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1073 	drm_gem_object_release(&bo->ttm.base);
1074 
1075 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1076 
1077 	if (bo->ggtt_node.size)
1078 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1079 
1080 #ifdef CONFIG_PROC_FS
1081 	if (bo->client)
1082 		xe_drm_client_remove_bo(bo);
1083 #endif
1084 
1085 	if (bo->vm && xe_bo_is_user(bo))
1086 		xe_vm_put(bo->vm);
1087 
1088 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1089 	if (!list_empty(&bo->vram_userfault_link))
1090 		list_del(&bo->vram_userfault_link);
1091 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1092 
1093 	kfree(bo);
1094 }
1095 
1096 static void xe_gem_object_free(struct drm_gem_object *obj)
1097 {
1098 	/* Our BO reference counting scheme works as follows:
1099 	 *
1100 	 * The gem object kref is typically used throughout the driver,
1101 	 * and the gem object holds a ttm_buffer_object refcount, so
1102 	 * that when the last gem object reference is put, which is when
1103 	 * we end up in this function, we put also that ttm_buffer_object
1104 	 * refcount. Anything using gem interfaces is then no longer
1105 	 * allowed to access the object in a way that requires a gem
1106 	 * refcount, including locking the object.
1107 	 *
1108 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1109 	 * refcount directly if needed.
1110 	 */
1111 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1112 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1113 }
1114 
1115 static void xe_gem_object_close(struct drm_gem_object *obj,
1116 				struct drm_file *file_priv)
1117 {
1118 	struct xe_bo *bo = gem_to_xe_bo(obj);
1119 
1120 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1121 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1122 
1123 		xe_bo_lock(bo, false);
1124 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1125 		xe_bo_unlock(bo);
1126 	}
1127 }
1128 
1129 static bool should_migrate_to_system(struct xe_bo *bo)
1130 {
1131 	struct xe_device *xe = xe_bo_device(bo);
1132 
1133 	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
1134 }
1135 
1136 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1137 {
1138 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1139 	struct drm_device *ddev = tbo->base.dev;
1140 	struct xe_device *xe = to_xe_device(ddev);
1141 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1142 	bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
1143 	vm_fault_t ret;
1144 	int idx, r = 0;
1145 
1146 	if (needs_rpm)
1147 		xe_device_mem_access_get(xe);
1148 
1149 	ret = ttm_bo_vm_reserve(tbo, vmf);
1150 	if (ret)
1151 		goto out;
1152 
1153 	if (drm_dev_enter(ddev, &idx)) {
1154 		trace_xe_bo_cpu_fault(bo);
1155 
1156 		if (should_migrate_to_system(bo)) {
1157 			r = xe_bo_migrate(bo, XE_PL_TT);
1158 			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1159 				ret = VM_FAULT_NOPAGE;
1160 			else if (r)
1161 				ret = VM_FAULT_SIGBUS;
1162 		}
1163 		if (!ret)
1164 			ret = ttm_bo_vm_fault_reserved(vmf,
1165 						       vmf->vma->vm_page_prot,
1166 						       TTM_BO_VM_NUM_PREFAULT);
1167 		drm_dev_exit(idx);
1168 	} else {
1169 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1170 	}
1171 
1172 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1173 		goto out;
1174 	/*
1175 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1176 	 */
1177 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1178 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1179 		if (list_empty(&bo->vram_userfault_link))
1180 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1181 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1182 	}
1183 
1184 	dma_resv_unlock(tbo->base.resv);
1185 out:
1186 	if (needs_rpm)
1187 		xe_device_mem_access_put(xe);
1188 
1189 	return ret;
1190 }
1191 
1192 static const struct vm_operations_struct xe_gem_vm_ops = {
1193 	.fault = xe_gem_fault,
1194 	.open = ttm_bo_vm_open,
1195 	.close = ttm_bo_vm_close,
1196 	.access = ttm_bo_vm_access
1197 };
1198 
1199 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1200 	.free = xe_gem_object_free,
1201 	.close = xe_gem_object_close,
1202 	.mmap = drm_gem_ttm_mmap,
1203 	.export = xe_gem_prime_export,
1204 	.vm_ops = &xe_gem_vm_ops,
1205 };
1206 
1207 /**
1208  * xe_bo_alloc - Allocate storage for a struct xe_bo
1209  *
1210  * This funcition is intended to allocate storage to be used for input
1211  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1212  * created is needed before the call to __xe_bo_create_locked().
1213  * If __xe_bo_create_locked ends up never to be called, then the
1214  * storage allocated with this function needs to be freed using
1215  * xe_bo_free().
1216  *
1217  * Return: A pointer to an uninitialized struct xe_bo on success,
1218  * ERR_PTR(-ENOMEM) on error.
1219  */
1220 struct xe_bo *xe_bo_alloc(void)
1221 {
1222 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1223 
1224 	if (!bo)
1225 		return ERR_PTR(-ENOMEM);
1226 
1227 	return bo;
1228 }
1229 
1230 /**
1231  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1232  * @bo: The buffer object storage.
1233  *
1234  * Refer to xe_bo_alloc() documentation for valid use-cases.
1235  */
1236 void xe_bo_free(struct xe_bo *bo)
1237 {
1238 	kfree(bo);
1239 }
1240 
1241 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1242 				     struct xe_tile *tile, struct dma_resv *resv,
1243 				     struct ttm_lru_bulk_move *bulk, size_t size,
1244 				     u16 cpu_caching, enum ttm_bo_type type,
1245 				     u32 flags)
1246 {
1247 	struct ttm_operation_ctx ctx = {
1248 		.interruptible = true,
1249 		.no_wait_gpu = false,
1250 	};
1251 	struct ttm_placement *placement;
1252 	uint32_t alignment;
1253 	size_t aligned_size;
1254 	int err;
1255 
1256 	/* Only kernel objects should set GT */
1257 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1258 
1259 	if (XE_WARN_ON(!size)) {
1260 		xe_bo_free(bo);
1261 		return ERR_PTR(-EINVAL);
1262 	}
1263 
1264 	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
1265 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
1266 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
1267 		aligned_size = ALIGN(size, SZ_64K);
1268 		if (type != ttm_bo_type_device)
1269 			size = ALIGN(size, SZ_64K);
1270 		flags |= XE_BO_INTERNAL_64K;
1271 		alignment = SZ_64K >> PAGE_SHIFT;
1272 
1273 	} else {
1274 		aligned_size = ALIGN(size, SZ_4K);
1275 		flags &= ~XE_BO_INTERNAL_64K;
1276 		alignment = SZ_4K >> PAGE_SHIFT;
1277 	}
1278 
1279 	if (type == ttm_bo_type_device && aligned_size != size)
1280 		return ERR_PTR(-EINVAL);
1281 
1282 	if (!bo) {
1283 		bo = xe_bo_alloc();
1284 		if (IS_ERR(bo))
1285 			return bo;
1286 	}
1287 
1288 	bo->ccs_cleared = false;
1289 	bo->tile = tile;
1290 	bo->size = size;
1291 	bo->flags = flags;
1292 	bo->cpu_caching = cpu_caching;
1293 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1294 	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
1295 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
1296 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
1297 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1298 	INIT_LIST_HEAD(&bo->pinned_link);
1299 #ifdef CONFIG_PROC_FS
1300 	INIT_LIST_HEAD(&bo->client_link);
1301 #endif
1302 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1303 
1304 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1305 
1306 	if (resv) {
1307 		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
1308 		ctx.resv = resv;
1309 	}
1310 
1311 	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
1312 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1313 		if (WARN_ON(err)) {
1314 			xe_ttm_bo_destroy(&bo->ttm);
1315 			return ERR_PTR(err);
1316 		}
1317 	}
1318 
1319 	/* Defer populating type_sg bos */
1320 	placement = (type == ttm_bo_type_sg ||
1321 		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
1322 		&bo->placement;
1323 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1324 				   placement, alignment,
1325 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1326 	if (err)
1327 		return ERR_PTR(err);
1328 
1329 	/*
1330 	 * The VRAM pages underneath are potentially still being accessed by the
1331 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1332 	 * sure to add any corresponding move/clear fences into the objects
1333 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1334 	 *
1335 	 * For KMD internal buffers we don't care about GPU clearing, however we
1336 	 * still need to handle async evictions, where the VRAM is still being
1337 	 * accessed by the GPU. Most internal callers are not expecting this,
1338 	 * since they are missing the required synchronisation before accessing
1339 	 * the memory. To keep things simple just sync wait any kernel fences
1340 	 * here, if the buffer is designated KMD internal.
1341 	 *
1342 	 * For normal userspace objects we should already have the required
1343 	 * pipelining or sync waiting elsewhere, since we already have to deal
1344 	 * with things like async GPU clearing.
1345 	 */
1346 	if (type == ttm_bo_type_kernel) {
1347 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1348 						     DMA_RESV_USAGE_KERNEL,
1349 						     ctx.interruptible,
1350 						     MAX_SCHEDULE_TIMEOUT);
1351 
1352 		if (timeout < 0) {
1353 			if (!resv)
1354 				dma_resv_unlock(bo->ttm.base.resv);
1355 			xe_bo_put(bo);
1356 			return ERR_PTR(timeout);
1357 		}
1358 	}
1359 
1360 	bo->created = true;
1361 	if (bulk)
1362 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1363 	else
1364 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1365 
1366 	return bo;
1367 }
1368 
1369 static int __xe_bo_fixed_placement(struct xe_device *xe,
1370 				   struct xe_bo *bo,
1371 				   u32 flags,
1372 				   u64 start, u64 end, u64 size)
1373 {
1374 	struct ttm_place *place = bo->placements;
1375 
1376 	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
1377 		return -EINVAL;
1378 
1379 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1380 	place->fpfn = start >> PAGE_SHIFT;
1381 	place->lpfn = end >> PAGE_SHIFT;
1382 
1383 	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
1384 	case XE_BO_CREATE_VRAM0_BIT:
1385 		place->mem_type = XE_PL_VRAM0;
1386 		break;
1387 	case XE_BO_CREATE_VRAM1_BIT:
1388 		place->mem_type = XE_PL_VRAM1;
1389 		break;
1390 	case XE_BO_CREATE_STOLEN_BIT:
1391 		place->mem_type = XE_PL_STOLEN;
1392 		break;
1393 
1394 	default:
1395 		/* 0 or multiple of the above set */
1396 		return -EINVAL;
1397 	}
1398 
1399 	bo->placement = (struct ttm_placement) {
1400 		.num_placement = 1,
1401 		.placement = place,
1402 	};
1403 
1404 	return 0;
1405 }
1406 
1407 static struct xe_bo *
1408 __xe_bo_create_locked(struct xe_device *xe,
1409 		      struct xe_tile *tile, struct xe_vm *vm,
1410 		      size_t size, u64 start, u64 end,
1411 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1412 {
1413 	struct xe_bo *bo = NULL;
1414 	int err;
1415 
1416 	if (vm)
1417 		xe_vm_assert_held(vm);
1418 
1419 	if (start || end != ~0ULL) {
1420 		bo = xe_bo_alloc();
1421 		if (IS_ERR(bo))
1422 			return bo;
1423 
1424 		flags |= XE_BO_FIXED_PLACEMENT_BIT;
1425 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1426 		if (err) {
1427 			xe_bo_free(bo);
1428 			return ERR_PTR(err);
1429 		}
1430 	}
1431 
1432 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1433 				    vm && !xe_vm_in_fault_mode(vm) &&
1434 				    flags & XE_BO_CREATE_USER_BIT ?
1435 				    &vm->lru_bulk_move : NULL, size,
1436 				    cpu_caching, type, flags);
1437 	if (IS_ERR(bo))
1438 		return bo;
1439 
1440 	/*
1441 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1442 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1443 	 * will keep a reference to the vm, and avoid circular references
1444 	 * by having all the vm's bo refereferences released at vm close
1445 	 * time.
1446 	 */
1447 	if (vm && xe_bo_is_user(bo))
1448 		xe_vm_get(vm);
1449 	bo->vm = vm;
1450 
1451 	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
1452 		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
1453 			tile = xe_device_get_root_tile(xe);
1454 
1455 		xe_assert(xe, tile);
1456 
1457 		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
1458 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1459 						   start + bo->size, U64_MAX);
1460 		} else {
1461 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1462 		}
1463 		if (err)
1464 			goto err_unlock_put_bo;
1465 	}
1466 
1467 	return bo;
1468 
1469 err_unlock_put_bo:
1470 	__xe_bo_unset_bulk_move(bo);
1471 	xe_bo_unlock_vm_held(bo);
1472 	xe_bo_put(bo);
1473 	return ERR_PTR(err);
1474 }
1475 
1476 struct xe_bo *
1477 xe_bo_create_locked_range(struct xe_device *xe,
1478 			  struct xe_tile *tile, struct xe_vm *vm,
1479 			  size_t size, u64 start, u64 end,
1480 			  enum ttm_bo_type type, u32 flags)
1481 {
1482 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1483 }
1484 
1485 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1486 				  struct xe_vm *vm, size_t size,
1487 				  enum ttm_bo_type type, u32 flags)
1488 {
1489 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1490 }
1491 
1492 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1493 				struct xe_vm *vm, size_t size,
1494 				u16 cpu_caching,
1495 				enum ttm_bo_type type,
1496 				u32 flags)
1497 {
1498 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1499 						 cpu_caching, type,
1500 						 flags | XE_BO_CREATE_USER_BIT);
1501 	if (!IS_ERR(bo))
1502 		xe_bo_unlock_vm_held(bo);
1503 
1504 	return bo;
1505 }
1506 
1507 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1508 			   struct xe_vm *vm, size_t size,
1509 			   enum ttm_bo_type type, u32 flags)
1510 {
1511 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1512 
1513 	if (!IS_ERR(bo))
1514 		xe_bo_unlock_vm_held(bo);
1515 
1516 	return bo;
1517 }
1518 
1519 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1520 				      struct xe_vm *vm,
1521 				      size_t size, u64 offset,
1522 				      enum ttm_bo_type type, u32 flags)
1523 {
1524 	struct xe_bo *bo;
1525 	int err;
1526 	u64 start = offset == ~0ull ? 0 : offset;
1527 	u64 end = offset == ~0ull ? offset : start + size;
1528 
1529 	if (flags & XE_BO_CREATE_STOLEN_BIT &&
1530 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1531 		flags |= XE_BO_CREATE_GGTT_BIT;
1532 
1533 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1534 				       flags | XE_BO_NEEDS_CPU_ACCESS);
1535 	if (IS_ERR(bo))
1536 		return bo;
1537 
1538 	err = xe_bo_pin(bo);
1539 	if (err)
1540 		goto err_put;
1541 
1542 	err = xe_bo_vmap(bo);
1543 	if (err)
1544 		goto err_unpin;
1545 
1546 	xe_bo_unlock_vm_held(bo);
1547 
1548 	return bo;
1549 
1550 err_unpin:
1551 	xe_bo_unpin(bo);
1552 err_put:
1553 	xe_bo_unlock_vm_held(bo);
1554 	xe_bo_put(bo);
1555 	return ERR_PTR(err);
1556 }
1557 
1558 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1559 				   struct xe_vm *vm, size_t size,
1560 				   enum ttm_bo_type type, u32 flags)
1561 {
1562 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1563 }
1564 
1565 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1566 				     const void *data, size_t size,
1567 				     enum ttm_bo_type type, u32 flags)
1568 {
1569 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1570 						ALIGN(size, PAGE_SIZE),
1571 						type, flags);
1572 	if (IS_ERR(bo))
1573 		return bo;
1574 
1575 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1576 
1577 	return bo;
1578 }
1579 
1580 static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1581 {
1582 	xe_bo_unpin_map_no_vm(arg);
1583 }
1584 
1585 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1586 					   size_t size, u32 flags)
1587 {
1588 	struct xe_bo *bo;
1589 	int ret;
1590 
1591 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1592 	if (IS_ERR(bo))
1593 		return bo;
1594 
1595 	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1596 	if (ret)
1597 		return ERR_PTR(ret);
1598 
1599 	return bo;
1600 }
1601 
1602 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1603 					     const void *data, size_t size, u32 flags)
1604 {
1605 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1606 
1607 	if (IS_ERR(bo))
1608 		return bo;
1609 
1610 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1611 
1612 	return bo;
1613 }
1614 
1615 /**
1616  * xe_managed_bo_reinit_in_vram
1617  * @xe: xe device
1618  * @tile: Tile where the new buffer will be created
1619  * @src: Managed buffer object allocated in system memory
1620  *
1621  * Replace a managed src buffer object allocated in system memory with a new
1622  * one allocated in vram, copying the data between them.
1623  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1624  * is responsible for making sure that any old references to it are updated.
1625  *
1626  * Returns 0 for success, negative error code otherwise.
1627  */
1628 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1629 {
1630 	struct xe_bo *bo;
1631 
1632 	xe_assert(xe, IS_DGFX(xe));
1633 	xe_assert(xe, !(*src)->vmap.is_iomem);
1634 
1635 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size,
1636 					    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
1637 					    XE_BO_CREATE_GGTT_BIT);
1638 	if (IS_ERR(bo))
1639 		return PTR_ERR(bo);
1640 
1641 	drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
1642 	*src = bo;
1643 
1644 	return 0;
1645 }
1646 
1647 /*
1648  * XXX: This is in the VM bind data path, likely should calculate this once and
1649  * store, with a recalculation if the BO is moved.
1650  */
1651 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1652 {
1653 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1654 
1655 	if (res->mem_type == XE_PL_STOLEN)
1656 		return xe_ttm_stolen_gpu_offset(xe);
1657 
1658 	return res_to_mem_region(res)->dpa_base;
1659 }
1660 
1661 /**
1662  * xe_bo_pin_external - pin an external BO
1663  * @bo: buffer object to be pinned
1664  *
1665  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1666  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1667  * asserts and code to ensure evict / restore on suspend / resume.
1668  *
1669  * Returns 0 for success, negative error code otherwise.
1670  */
1671 int xe_bo_pin_external(struct xe_bo *bo)
1672 {
1673 	struct xe_device *xe = xe_bo_device(bo);
1674 	int err;
1675 
1676 	xe_assert(xe, !bo->vm);
1677 	xe_assert(xe, xe_bo_is_user(bo));
1678 
1679 	if (!xe_bo_is_pinned(bo)) {
1680 		err = xe_bo_validate(bo, NULL, false);
1681 		if (err)
1682 			return err;
1683 
1684 		if (xe_bo_is_vram(bo)) {
1685 			spin_lock(&xe->pinned.lock);
1686 			list_add_tail(&bo->pinned_link,
1687 				      &xe->pinned.external_vram);
1688 			spin_unlock(&xe->pinned.lock);
1689 		}
1690 	}
1691 
1692 	ttm_bo_pin(&bo->ttm);
1693 
1694 	/*
1695 	 * FIXME: If we always use the reserve / unreserve functions for locking
1696 	 * we do not need this.
1697 	 */
1698 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1699 
1700 	return 0;
1701 }
1702 
1703 int xe_bo_pin(struct xe_bo *bo)
1704 {
1705 	struct xe_device *xe = xe_bo_device(bo);
1706 	int err;
1707 
1708 	/* We currently don't expect user BO to be pinned */
1709 	xe_assert(xe, !xe_bo_is_user(bo));
1710 
1711 	/* Pinned object must be in GGTT or have pinned flag */
1712 	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
1713 				   XE_BO_CREATE_GGTT_BIT));
1714 
1715 	/*
1716 	 * No reason we can't support pinning imported dma-bufs we just don't
1717 	 * expect to pin an imported dma-buf.
1718 	 */
1719 	xe_assert(xe, !bo->ttm.base.import_attach);
1720 
1721 	/* We only expect at most 1 pin */
1722 	xe_assert(xe, !xe_bo_is_pinned(bo));
1723 
1724 	err = xe_bo_validate(bo, NULL, false);
1725 	if (err)
1726 		return err;
1727 
1728 	/*
1729 	 * For pinned objects in on DGFX, which are also in vram, we expect
1730 	 * these to be in contiguous VRAM memory. Required eviction / restore
1731 	 * during suspend / resume (force restore to same physical address).
1732 	 */
1733 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1734 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1735 		struct ttm_place *place = &(bo->placements[0]);
1736 
1737 		if (mem_type_is_vram(place->mem_type)) {
1738 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1739 
1740 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1741 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1742 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1743 
1744 			spin_lock(&xe->pinned.lock);
1745 			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1746 			spin_unlock(&xe->pinned.lock);
1747 		}
1748 	}
1749 
1750 	ttm_bo_pin(&bo->ttm);
1751 
1752 	/*
1753 	 * FIXME: If we always use the reserve / unreserve functions for locking
1754 	 * we do not need this.
1755 	 */
1756 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1757 
1758 	return 0;
1759 }
1760 
1761 /**
1762  * xe_bo_unpin_external - unpin an external BO
1763  * @bo: buffer object to be unpinned
1764  *
1765  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1766  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1767  * asserts and code to ensure evict / restore on suspend / resume.
1768  *
1769  * Returns 0 for success, negative error code otherwise.
1770  */
1771 void xe_bo_unpin_external(struct xe_bo *bo)
1772 {
1773 	struct xe_device *xe = xe_bo_device(bo);
1774 
1775 	xe_assert(xe, !bo->vm);
1776 	xe_assert(xe, xe_bo_is_pinned(bo));
1777 	xe_assert(xe, xe_bo_is_user(bo));
1778 
1779 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1780 		spin_lock(&xe->pinned.lock);
1781 		list_del_init(&bo->pinned_link);
1782 		spin_unlock(&xe->pinned.lock);
1783 	}
1784 
1785 	ttm_bo_unpin(&bo->ttm);
1786 
1787 	/*
1788 	 * FIXME: If we always use the reserve / unreserve functions for locking
1789 	 * we do not need this.
1790 	 */
1791 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1792 }
1793 
1794 void xe_bo_unpin(struct xe_bo *bo)
1795 {
1796 	struct xe_device *xe = xe_bo_device(bo);
1797 
1798 	xe_assert(xe, !bo->ttm.base.import_attach);
1799 	xe_assert(xe, xe_bo_is_pinned(bo));
1800 
1801 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1802 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1803 		struct ttm_place *place = &(bo->placements[0]);
1804 
1805 		if (mem_type_is_vram(place->mem_type)) {
1806 			xe_assert(xe, !list_empty(&bo->pinned_link));
1807 
1808 			spin_lock(&xe->pinned.lock);
1809 			list_del_init(&bo->pinned_link);
1810 			spin_unlock(&xe->pinned.lock);
1811 		}
1812 	}
1813 
1814 	ttm_bo_unpin(&bo->ttm);
1815 }
1816 
1817 /**
1818  * xe_bo_validate() - Make sure the bo is in an allowed placement
1819  * @bo: The bo,
1820  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1821  *      NULL. Used together with @allow_res_evict.
1822  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1823  *                   reservation object.
1824  *
1825  * Make sure the bo is in allowed placement, migrating it if necessary. If
1826  * needed, other bos will be evicted. If bos selected for eviction shares
1827  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1828  * set to true, otherwise they will be bypassed.
1829  *
1830  * Return: 0 on success, negative error code on failure. May return
1831  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1832  */
1833 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1834 {
1835 	struct ttm_operation_ctx ctx = {
1836 		.interruptible = true,
1837 		.no_wait_gpu = false,
1838 	};
1839 
1840 	if (vm) {
1841 		lockdep_assert_held(&vm->lock);
1842 		xe_vm_assert_held(vm);
1843 
1844 		ctx.allow_res_evict = allow_res_evict;
1845 		ctx.resv = xe_vm_resv(vm);
1846 	}
1847 
1848 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1849 }
1850 
1851 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1852 {
1853 	if (bo->destroy == &xe_ttm_bo_destroy)
1854 		return true;
1855 
1856 	return false;
1857 }
1858 
1859 /*
1860  * Resolve a BO address. There is no assert to check if the proper lock is held
1861  * so it should only be used in cases where it is not fatal to get the wrong
1862  * address, such as printing debug information, but not in cases where memory is
1863  * written based on this result.
1864  */
1865 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1866 {
1867 	struct xe_device *xe = xe_bo_device(bo);
1868 	struct xe_res_cursor cur;
1869 	u64 page;
1870 
1871 	xe_assert(xe, page_size <= PAGE_SIZE);
1872 	page = offset >> PAGE_SHIFT;
1873 	offset &= (PAGE_SIZE - 1);
1874 
1875 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1876 		xe_assert(xe, bo->ttm.ttm);
1877 
1878 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1879 				page_size, &cur);
1880 		return xe_res_dma(&cur) + offset;
1881 	} else {
1882 		struct xe_res_cursor cur;
1883 
1884 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1885 			     page_size, &cur);
1886 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1887 	}
1888 }
1889 
1890 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1891 {
1892 	if (!READ_ONCE(bo->ttm.pin_count))
1893 		xe_bo_assert_held(bo);
1894 	return __xe_bo_addr(bo, offset, page_size);
1895 }
1896 
1897 int xe_bo_vmap(struct xe_bo *bo)
1898 {
1899 	void *virtual;
1900 	bool is_iomem;
1901 	int ret;
1902 
1903 	xe_bo_assert_held(bo);
1904 
1905 	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
1906 		return -EINVAL;
1907 
1908 	if (!iosys_map_is_null(&bo->vmap))
1909 		return 0;
1910 
1911 	/*
1912 	 * We use this more or less deprecated interface for now since
1913 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1914 	 * single page bos, which is done here.
1915 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1916 	 * to use struct iosys_map.
1917 	 */
1918 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1919 	if (ret)
1920 		return ret;
1921 
1922 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1923 	if (is_iomem)
1924 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1925 	else
1926 		iosys_map_set_vaddr(&bo->vmap, virtual);
1927 
1928 	return 0;
1929 }
1930 
1931 static void __xe_bo_vunmap(struct xe_bo *bo)
1932 {
1933 	if (!iosys_map_is_null(&bo->vmap)) {
1934 		iosys_map_clear(&bo->vmap);
1935 		ttm_bo_kunmap(&bo->kmap);
1936 	}
1937 }
1938 
1939 void xe_bo_vunmap(struct xe_bo *bo)
1940 {
1941 	xe_bo_assert_held(bo);
1942 	__xe_bo_vunmap(bo);
1943 }
1944 
1945 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1946 			struct drm_file *file)
1947 {
1948 	struct xe_device *xe = to_xe_device(dev);
1949 	struct xe_file *xef = to_xe_file(file);
1950 	struct drm_xe_gem_create *args = data;
1951 	struct xe_vm *vm = NULL;
1952 	struct xe_bo *bo;
1953 	unsigned int bo_flags;
1954 	u32 handle;
1955 	int err;
1956 
1957 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1958 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1959 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1960 		return -EINVAL;
1961 
1962 	/* at least one valid memory placement must be specified */
1963 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1964 			 !args->placement))
1965 		return -EINVAL;
1966 
1967 	if (XE_IOCTL_DBG(xe, args->flags &
1968 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1969 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1970 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1971 		return -EINVAL;
1972 
1973 	if (XE_IOCTL_DBG(xe, args->handle))
1974 		return -EINVAL;
1975 
1976 	if (XE_IOCTL_DBG(xe, !args->size))
1977 		return -EINVAL;
1978 
1979 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1980 		return -EINVAL;
1981 
1982 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1983 		return -EINVAL;
1984 
1985 	bo_flags = 0;
1986 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1987 		bo_flags |= XE_BO_DEFER_BACKING;
1988 
1989 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1990 		bo_flags |= XE_BO_SCANOUT_BIT;
1991 
1992 	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
1993 
1994 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1995 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
1996 			return -EINVAL;
1997 
1998 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
1999 	}
2000 
2001 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2002 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2003 		return -EINVAL;
2004 
2005 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
2006 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2007 		return -EINVAL;
2008 
2009 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
2010 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2011 		return -EINVAL;
2012 
2013 	if (args->vm_id) {
2014 		vm = xe_vm_lookup(xef, args->vm_id);
2015 		if (XE_IOCTL_DBG(xe, !vm))
2016 			return -ENOENT;
2017 		err = xe_vm_lock(vm, true);
2018 		if (err)
2019 			goto out_vm;
2020 	}
2021 
2022 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2023 			       ttm_bo_type_device, bo_flags);
2024 
2025 	if (vm)
2026 		xe_vm_unlock(vm);
2027 
2028 	if (IS_ERR(bo)) {
2029 		err = PTR_ERR(bo);
2030 		goto out_vm;
2031 	}
2032 
2033 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2034 	if (err)
2035 		goto out_bulk;
2036 
2037 	args->handle = handle;
2038 	goto out_put;
2039 
2040 out_bulk:
2041 	if (vm && !xe_vm_in_fault_mode(vm)) {
2042 		xe_vm_lock(vm, false);
2043 		__xe_bo_unset_bulk_move(bo);
2044 		xe_vm_unlock(vm);
2045 	}
2046 out_put:
2047 	xe_bo_put(bo);
2048 out_vm:
2049 	if (vm)
2050 		xe_vm_put(vm);
2051 
2052 	return err;
2053 }
2054 
2055 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2056 			     struct drm_file *file)
2057 {
2058 	struct xe_device *xe = to_xe_device(dev);
2059 	struct drm_xe_gem_mmap_offset *args = data;
2060 	struct drm_gem_object *gem_obj;
2061 
2062 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2063 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2064 		return -EINVAL;
2065 
2066 	if (XE_IOCTL_DBG(xe, args->flags))
2067 		return -EINVAL;
2068 
2069 	gem_obj = drm_gem_object_lookup(file, args->handle);
2070 	if (XE_IOCTL_DBG(xe, !gem_obj))
2071 		return -ENOENT;
2072 
2073 	/* The mmap offset was set up at BO allocation time. */
2074 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2075 
2076 	xe_bo_put(gem_to_xe_bo(gem_obj));
2077 	return 0;
2078 }
2079 
2080 /**
2081  * xe_bo_lock() - Lock the buffer object's dma_resv object
2082  * @bo: The struct xe_bo whose lock is to be taken
2083  * @intr: Whether to perform any wait interruptible
2084  *
2085  * Locks the buffer object's dma_resv object. If the buffer object is
2086  * pointing to a shared dma_resv object, that shared lock is locked.
2087  *
2088  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2089  * contended lock was interrupted. If @intr is set to false, the
2090  * function always returns 0.
2091  */
2092 int xe_bo_lock(struct xe_bo *bo, bool intr)
2093 {
2094 	if (intr)
2095 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2096 
2097 	dma_resv_lock(bo->ttm.base.resv, NULL);
2098 
2099 	return 0;
2100 }
2101 
2102 /**
2103  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2104  * @bo: The struct xe_bo whose lock is to be released.
2105  *
2106  * Unlock a buffer object lock that was locked by xe_bo_lock().
2107  */
2108 void xe_bo_unlock(struct xe_bo *bo)
2109 {
2110 	dma_resv_unlock(bo->ttm.base.resv);
2111 }
2112 
2113 /**
2114  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2115  * @bo: The buffer object to migrate
2116  * @mem_type: The TTM memory type intended to migrate to
2117  *
2118  * Check whether the buffer object supports migration to the
2119  * given memory type. Note that pinning may affect the ability to migrate as
2120  * returned by this function.
2121  *
2122  * This function is primarily intended as a helper for checking the
2123  * possibility to migrate buffer objects and can be called without
2124  * the object lock held.
2125  *
2126  * Return: true if migration is possible, false otherwise.
2127  */
2128 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2129 {
2130 	unsigned int cur_place;
2131 
2132 	if (bo->ttm.type == ttm_bo_type_kernel)
2133 		return true;
2134 
2135 	if (bo->ttm.type == ttm_bo_type_sg)
2136 		return false;
2137 
2138 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2139 	     cur_place++) {
2140 		if (bo->placements[cur_place].mem_type == mem_type)
2141 			return true;
2142 	}
2143 
2144 	return false;
2145 }
2146 
2147 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2148 {
2149 	memset(place, 0, sizeof(*place));
2150 	place->mem_type = mem_type;
2151 }
2152 
2153 /**
2154  * xe_bo_migrate - Migrate an object to the desired region id
2155  * @bo: The buffer object to migrate.
2156  * @mem_type: The TTM region type to migrate to.
2157  *
2158  * Attempt to migrate the buffer object to the desired memory region. The
2159  * buffer object may not be pinned, and must be locked.
2160  * On successful completion, the object memory type will be updated,
2161  * but an async migration task may not have completed yet, and to
2162  * accomplish that, the object's kernel fences must be signaled with
2163  * the object lock held.
2164  *
2165  * Return: 0 on success. Negative error code on failure. In particular may
2166  * return -EINTR or -ERESTARTSYS if signal pending.
2167  */
2168 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2169 {
2170 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2171 	struct ttm_operation_ctx ctx = {
2172 		.interruptible = true,
2173 		.no_wait_gpu = false,
2174 	};
2175 	struct ttm_placement placement;
2176 	struct ttm_place requested;
2177 
2178 	xe_bo_assert_held(bo);
2179 
2180 	if (bo->ttm.resource->mem_type == mem_type)
2181 		return 0;
2182 
2183 	if (xe_bo_is_pinned(bo))
2184 		return -EBUSY;
2185 
2186 	if (!xe_bo_can_migrate(bo, mem_type))
2187 		return -EINVAL;
2188 
2189 	xe_place_from_ttm_type(mem_type, &requested);
2190 	placement.num_placement = 1;
2191 	placement.placement = &requested;
2192 
2193 	/*
2194 	 * Stolen needs to be handled like below VRAM handling if we ever need
2195 	 * to support it.
2196 	 */
2197 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2198 
2199 	if (mem_type_is_vram(mem_type)) {
2200 		u32 c = 0;
2201 
2202 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2203 	}
2204 
2205 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2206 }
2207 
2208 /**
2209  * xe_bo_evict - Evict an object to evict placement
2210  * @bo: The buffer object to migrate.
2211  * @force_alloc: Set force_alloc in ttm_operation_ctx
2212  *
2213  * On successful completion, the object memory will be moved to evict
2214  * placement. Ths function blocks until the object has been fully moved.
2215  *
2216  * Return: 0 on success. Negative error code on failure.
2217  */
2218 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2219 {
2220 	struct ttm_operation_ctx ctx = {
2221 		.interruptible = false,
2222 		.no_wait_gpu = false,
2223 		.force_alloc = force_alloc,
2224 	};
2225 	struct ttm_placement placement;
2226 	int ret;
2227 
2228 	xe_evict_flags(&bo->ttm, &placement);
2229 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2230 	if (ret)
2231 		return ret;
2232 
2233 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2234 			      false, MAX_SCHEDULE_TIMEOUT);
2235 
2236 	return 0;
2237 }
2238 
2239 /**
2240  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2241  * placed in system memory.
2242  * @bo: The xe_bo
2243  *
2244  * Return: true if extra pages need to be allocated, false otherwise.
2245  */
2246 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2247 {
2248 	struct xe_device *xe = xe_bo_device(bo);
2249 
2250 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2251 		return false;
2252 
2253 	/* On discrete GPUs, if the GPU can access this buffer from
2254 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2255 	 * can't be used since there's no CCS storage associated with
2256 	 * non-VRAM addresses.
2257 	 */
2258 	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
2259 		return false;
2260 
2261 	return true;
2262 }
2263 
2264 /**
2265  * __xe_bo_release_dummy() - Dummy kref release function
2266  * @kref: The embedded struct kref.
2267  *
2268  * Dummy release function for xe_bo_put_deferred(). Keep off.
2269  */
2270 void __xe_bo_release_dummy(struct kref *kref)
2271 {
2272 }
2273 
2274 /**
2275  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2276  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2277  *
2278  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2279  * The @deferred list can be either an onstack local list or a global
2280  * shared list used by a workqueue.
2281  */
2282 void xe_bo_put_commit(struct llist_head *deferred)
2283 {
2284 	struct llist_node *freed;
2285 	struct xe_bo *bo, *next;
2286 
2287 	if (!deferred)
2288 		return;
2289 
2290 	freed = llist_del_all(deferred);
2291 	if (!freed)
2292 		return;
2293 
2294 	llist_for_each_entry_safe(bo, next, freed, freed)
2295 		drm_gem_object_free(&bo->ttm.base.refcount);
2296 }
2297 
2298 /**
2299  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2300  * @file_priv: ...
2301  * @dev: ...
2302  * @args: ...
2303  *
2304  * See dumb_create() hook in include/drm/drm_drv.h
2305  *
2306  * Return: ...
2307  */
2308 int xe_bo_dumb_create(struct drm_file *file_priv,
2309 		      struct drm_device *dev,
2310 		      struct drm_mode_create_dumb *args)
2311 {
2312 	struct xe_device *xe = to_xe_device(dev);
2313 	struct xe_bo *bo;
2314 	uint32_t handle;
2315 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2316 	int err;
2317 	u32 page_size = max_t(u32, PAGE_SIZE,
2318 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2319 
2320 	args->pitch = ALIGN(args->width * cpp, 64);
2321 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2322 			   page_size);
2323 
2324 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2325 			       DRM_XE_GEM_CPU_CACHING_WC,
2326 			       ttm_bo_type_device,
2327 			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2328 			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
2329 			       XE_BO_NEEDS_CPU_ACCESS);
2330 	if (IS_ERR(bo))
2331 		return PTR_ERR(bo);
2332 
2333 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2334 	/* drop reference from allocate - handle holds it now */
2335 	drm_gem_object_put(&bo->ttm.base);
2336 	if (!err)
2337 		args->handle = handle;
2338 	return err;
2339 }
2340 
2341 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2342 {
2343 	struct ttm_buffer_object *tbo = &bo->ttm;
2344 	struct ttm_device *bdev = tbo->bdev;
2345 
2346 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2347 
2348 	list_del_init(&bo->vram_userfault_link);
2349 }
2350 
2351 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2352 #include "tests/xe_bo.c"
2353 #endif
2354