xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision ff9fbcafbaf13346c742c0d672a22f5ac20b9d92)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <drm/xe_drm.h>
17 
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt.h"
23 #include "xe_map.h"
24 #include "xe_migrate.h"
25 #include "xe_pm.h"
26 #include "xe_preempt_fence.h"
27 #include "xe_res_cursor.h"
28 #include "xe_trace.h"
29 #include "xe_ttm_stolen_mgr.h"
30 #include "xe_vm.h"
31 
32 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
33 	[XE_PL_SYSTEM] = "system",
34 	[XE_PL_TT] = "gtt",
35 	[XE_PL_VRAM0] = "vram0",
36 	[XE_PL_VRAM1] = "vram1",
37 	[XE_PL_STOLEN] = "stolen"
38 };
39 
40 static const struct ttm_place sys_placement_flags = {
41 	.fpfn = 0,
42 	.lpfn = 0,
43 	.mem_type = XE_PL_SYSTEM,
44 	.flags = 0,
45 };
46 
47 static struct ttm_placement sys_placement = {
48 	.num_placement = 1,
49 	.placement = &sys_placement_flags,
50 };
51 
52 static const struct ttm_place tt_placement_flags[] = {
53 	{
54 		.fpfn = 0,
55 		.lpfn = 0,
56 		.mem_type = XE_PL_TT,
57 		.flags = TTM_PL_FLAG_DESIRED,
58 	},
59 	{
60 		.fpfn = 0,
61 		.lpfn = 0,
62 		.mem_type = XE_PL_SYSTEM,
63 		.flags = TTM_PL_FLAG_FALLBACK,
64 	}
65 };
66 
67 static struct ttm_placement tt_placement = {
68 	.num_placement = 2,
69 	.placement = tt_placement_flags,
70 };
71 
72 bool mem_type_is_vram(u32 mem_type)
73 {
74 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
75 }
76 
77 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
78 {
79 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
80 }
81 
82 static bool resource_is_vram(struct ttm_resource *res)
83 {
84 	return mem_type_is_vram(res->mem_type);
85 }
86 
87 bool xe_bo_is_vram(struct xe_bo *bo)
88 {
89 	return resource_is_vram(bo->ttm.resource) ||
90 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
91 }
92 
93 bool xe_bo_is_stolen(struct xe_bo *bo)
94 {
95 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
96 }
97 
98 /**
99  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
100  * @bo: The BO
101  *
102  * The stolen memory is accessed through the PCI BAR for both DGFX and some
103  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
104  *
105  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
106  */
107 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
108 {
109 	return xe_bo_is_stolen(bo) &&
110 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
111 }
112 
113 static bool xe_bo_is_user(struct xe_bo *bo)
114 {
115 	return bo->flags & XE_BO_FLAG_USER;
116 }
117 
118 static struct xe_migrate *
119 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
120 {
121 	struct xe_tile *tile;
122 
123 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
124 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
125 	return tile->migrate;
126 }
127 
128 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
129 {
130 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
131 	struct ttm_resource_manager *mgr;
132 
133 	xe_assert(xe, resource_is_vram(res));
134 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
135 	return to_xe_ttm_vram_mgr(mgr)->vram;
136 }
137 
138 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
139 			   u32 bo_flags, u32 *c)
140 {
141 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
142 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
143 
144 		bo->placements[*c] = (struct ttm_place) {
145 			.mem_type = XE_PL_TT,
146 		};
147 		*c += 1;
148 	}
149 }
150 
151 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
152 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
153 {
154 	struct ttm_place place = { .mem_type = mem_type };
155 	struct xe_mem_region *vram;
156 	u64 io_size;
157 
158 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
159 
160 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
161 	xe_assert(xe, vram && vram->usable_size);
162 	io_size = vram->io_size;
163 
164 	/*
165 	 * For eviction / restore on suspend / resume objects
166 	 * pinned in VRAM must be contiguous
167 	 */
168 	if (bo_flags & (XE_BO_FLAG_PINNED |
169 			XE_BO_FLAG_GGTT))
170 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
171 
172 	if (io_size < vram->usable_size) {
173 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
174 			place.fpfn = 0;
175 			place.lpfn = io_size >> PAGE_SHIFT;
176 		} else {
177 			place.flags |= TTM_PL_FLAG_TOPDOWN;
178 		}
179 	}
180 	places[*c] = place;
181 	*c += 1;
182 }
183 
184 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
185 			 u32 bo_flags, u32 *c)
186 {
187 	if (bo_flags & XE_BO_FLAG_VRAM0)
188 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
189 	if (bo_flags & XE_BO_FLAG_VRAM1)
190 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
191 }
192 
193 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
194 			   u32 bo_flags, u32 *c)
195 {
196 	if (bo_flags & XE_BO_FLAG_STOLEN) {
197 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
198 
199 		bo->placements[*c] = (struct ttm_place) {
200 			.mem_type = XE_PL_STOLEN,
201 			.flags = bo_flags & (XE_BO_FLAG_PINNED |
202 					     XE_BO_FLAG_GGTT) ?
203 				TTM_PL_FLAG_CONTIGUOUS : 0,
204 		};
205 		*c += 1;
206 	}
207 }
208 
209 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
210 				       u32 bo_flags)
211 {
212 	u32 c = 0;
213 
214 	try_add_vram(xe, bo, bo_flags, &c);
215 	try_add_system(xe, bo, bo_flags, &c);
216 	try_add_stolen(xe, bo, bo_flags, &c);
217 
218 	if (!c)
219 		return -EINVAL;
220 
221 	bo->placement = (struct ttm_placement) {
222 		.num_placement = c,
223 		.placement = bo->placements,
224 	};
225 
226 	return 0;
227 }
228 
229 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
230 			      u32 bo_flags)
231 {
232 	xe_bo_assert_held(bo);
233 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
234 }
235 
236 static void xe_evict_flags(struct ttm_buffer_object *tbo,
237 			   struct ttm_placement *placement)
238 {
239 	if (!xe_bo_is_xe_bo(tbo)) {
240 		/* Don't handle scatter gather BOs */
241 		if (tbo->type == ttm_bo_type_sg) {
242 			placement->num_placement = 0;
243 			return;
244 		}
245 
246 		*placement = sys_placement;
247 		return;
248 	}
249 
250 	/*
251 	 * For xe, sg bos that are evicted to system just triggers a
252 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
253 	 */
254 	switch (tbo->resource->mem_type) {
255 	case XE_PL_VRAM0:
256 	case XE_PL_VRAM1:
257 	case XE_PL_STOLEN:
258 		*placement = tt_placement;
259 		break;
260 	case XE_PL_TT:
261 	default:
262 		*placement = sys_placement;
263 		break;
264 	}
265 }
266 
267 struct xe_ttm_tt {
268 	struct ttm_tt ttm;
269 	struct device *dev;
270 	struct sg_table sgt;
271 	struct sg_table *sg;
272 };
273 
274 static int xe_tt_map_sg(struct ttm_tt *tt)
275 {
276 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
277 	unsigned long num_pages = tt->num_pages;
278 	int ret;
279 
280 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
281 
282 	if (xe_tt->sg)
283 		return 0;
284 
285 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
286 						num_pages, 0,
287 						(u64)num_pages << PAGE_SHIFT,
288 						xe_sg_segment_size(xe_tt->dev),
289 						GFP_KERNEL);
290 	if (ret)
291 		return ret;
292 
293 	xe_tt->sg = &xe_tt->sgt;
294 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
295 			      DMA_ATTR_SKIP_CPU_SYNC);
296 	if (ret) {
297 		sg_free_table(xe_tt->sg);
298 		xe_tt->sg = NULL;
299 		return ret;
300 	}
301 
302 	return 0;
303 }
304 
305 struct sg_table *xe_bo_sg(struct xe_bo *bo)
306 {
307 	struct ttm_tt *tt = bo->ttm.ttm;
308 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
309 
310 	return xe_tt->sg;
311 }
312 
313 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
314 				       u32 page_flags)
315 {
316 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
317 	struct xe_device *xe = xe_bo_device(bo);
318 	struct xe_ttm_tt *tt;
319 	unsigned long extra_pages;
320 	enum ttm_caching caching = ttm_cached;
321 	int err;
322 
323 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
324 	if (!tt)
325 		return NULL;
326 
327 	tt->dev = xe->drm.dev;
328 
329 	extra_pages = 0;
330 	if (xe_bo_needs_ccs_pages(bo))
331 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
332 					   PAGE_SIZE);
333 
334 	/*
335 	 * DGFX system memory is always WB / ttm_cached, since
336 	 * other caching modes are only supported on x86. DGFX
337 	 * GPU system memory accesses are always coherent with the
338 	 * CPU.
339 	 */
340 	if (!IS_DGFX(xe)) {
341 		switch (bo->cpu_caching) {
342 		case DRM_XE_GEM_CPU_CACHING_WC:
343 			caching = ttm_write_combined;
344 			break;
345 		default:
346 			caching = ttm_cached;
347 			break;
348 		}
349 
350 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
351 
352 		/*
353 		 * Display scanout is always non-coherent with the CPU cache.
354 		 *
355 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
356 		 * non-coherent and require a CPU:WC mapping.
357 		 */
358 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
359 		    (xe->info.graphics_verx100 >= 1270 &&
360 		     bo->flags & XE_BO_FLAG_PAGETABLE))
361 			caching = ttm_write_combined;
362 	}
363 
364 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
365 	if (err) {
366 		kfree(tt);
367 		return NULL;
368 	}
369 
370 	return &tt->ttm;
371 }
372 
373 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
374 			      struct ttm_operation_ctx *ctx)
375 {
376 	int err;
377 
378 	/*
379 	 * dma-bufs are not populated with pages, and the dma-
380 	 * addresses are set up when moved to XE_PL_TT.
381 	 */
382 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
383 		return 0;
384 
385 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
386 	if (err)
387 		return err;
388 
389 	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
390 	err = xe_tt_map_sg(tt);
391 	if (err)
392 		ttm_pool_free(&ttm_dev->pool, tt);
393 
394 	return err;
395 }
396 
397 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
398 {
399 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
400 
401 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
402 		return;
403 
404 	if (xe_tt->sg) {
405 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
406 				  DMA_BIDIRECTIONAL, 0);
407 		sg_free_table(xe_tt->sg);
408 		xe_tt->sg = NULL;
409 	}
410 
411 	return ttm_pool_free(&ttm_dev->pool, tt);
412 }
413 
414 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
415 {
416 	ttm_tt_fini(tt);
417 	kfree(tt);
418 }
419 
420 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
421 				 struct ttm_resource *mem)
422 {
423 	struct xe_device *xe = ttm_to_xe_device(bdev);
424 
425 	switch (mem->mem_type) {
426 	case XE_PL_SYSTEM:
427 	case XE_PL_TT:
428 		return 0;
429 	case XE_PL_VRAM0:
430 	case XE_PL_VRAM1: {
431 		struct xe_ttm_vram_mgr_resource *vres =
432 			to_xe_ttm_vram_mgr_resource(mem);
433 		struct xe_mem_region *vram = res_to_mem_region(mem);
434 
435 		if (vres->used_visible_size < mem->size)
436 			return -EINVAL;
437 
438 		mem->bus.offset = mem->start << PAGE_SHIFT;
439 
440 		if (vram->mapping &&
441 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
442 			mem->bus.addr = (u8 __force *)vram->mapping +
443 				mem->bus.offset;
444 
445 		mem->bus.offset += vram->io_start;
446 		mem->bus.is_iomem = true;
447 
448 #if  !defined(CONFIG_X86)
449 		mem->bus.caching = ttm_write_combined;
450 #endif
451 		return 0;
452 	} case XE_PL_STOLEN:
453 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
454 	default:
455 		return -EINVAL;
456 	}
457 }
458 
459 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
460 				const struct ttm_operation_ctx *ctx)
461 {
462 	struct dma_resv_iter cursor;
463 	struct dma_fence *fence;
464 	struct drm_gem_object *obj = &bo->ttm.base;
465 	struct drm_gpuvm_bo *vm_bo;
466 	bool idle = false;
467 	int ret = 0;
468 
469 	dma_resv_assert_held(bo->ttm.base.resv);
470 
471 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
472 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
473 				    DMA_RESV_USAGE_BOOKKEEP);
474 		dma_resv_for_each_fence_unlocked(&cursor, fence)
475 			dma_fence_enable_sw_signaling(fence);
476 		dma_resv_iter_end(&cursor);
477 	}
478 
479 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
480 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
481 		struct drm_gpuva *gpuva;
482 
483 		if (!xe_vm_in_fault_mode(vm)) {
484 			drm_gpuvm_bo_evict(vm_bo, true);
485 			continue;
486 		}
487 
488 		if (!idle) {
489 			long timeout;
490 
491 			if (ctx->no_wait_gpu &&
492 			    !dma_resv_test_signaled(bo->ttm.base.resv,
493 						    DMA_RESV_USAGE_BOOKKEEP))
494 				return -EBUSY;
495 
496 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
497 							DMA_RESV_USAGE_BOOKKEEP,
498 							ctx->interruptible,
499 							MAX_SCHEDULE_TIMEOUT);
500 			if (!timeout)
501 				return -ETIME;
502 			if (timeout < 0)
503 				return timeout;
504 
505 			idle = true;
506 		}
507 
508 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
509 			struct xe_vma *vma = gpuva_to_vma(gpuva);
510 
511 			trace_xe_vma_evict(vma);
512 			ret = xe_vm_invalidate_vma(vma);
513 			if (XE_WARN_ON(ret))
514 				return ret;
515 		}
516 	}
517 
518 	return ret;
519 }
520 
521 /*
522  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
523  * Note that unmapping the attachment is deferred to the next
524  * map_attachment time, or to bo destroy (after idling) whichever comes first.
525  * This is to avoid syncing before unmap_attachment(), assuming that the
526  * caller relies on idling the reservation object before moving the
527  * backing store out. Should that assumption not hold, then we will be able
528  * to unconditionally call unmap_attachment() when moving out to system.
529  */
530 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
531 			     struct ttm_resource *new_res)
532 {
533 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
534 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
535 					       ttm);
536 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
537 	struct sg_table *sg;
538 
539 	xe_assert(xe, attach);
540 	xe_assert(xe, ttm_bo->ttm);
541 
542 	if (new_res->mem_type == XE_PL_SYSTEM)
543 		goto out;
544 
545 	if (ttm_bo->sg) {
546 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
547 		ttm_bo->sg = NULL;
548 	}
549 
550 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
551 	if (IS_ERR(sg))
552 		return PTR_ERR(sg);
553 
554 	ttm_bo->sg = sg;
555 	xe_tt->sg = sg;
556 
557 out:
558 	ttm_bo_move_null(ttm_bo, new_res);
559 
560 	return 0;
561 }
562 
563 /**
564  * xe_bo_move_notify - Notify subsystems of a pending move
565  * @bo: The buffer object
566  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
567  *
568  * This function notifies subsystems of an upcoming buffer move.
569  * Upon receiving such a notification, subsystems should schedule
570  * halting access to the underlying pages and optionally add a fence
571  * to the buffer object's dma_resv object, that signals when access is
572  * stopped. The caller will wait on all dma_resv fences before
573  * starting the move.
574  *
575  * A subsystem may commence access to the object after obtaining
576  * bindings to the new backing memory under the object lock.
577  *
578  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
579  * negative error code on error.
580  */
581 static int xe_bo_move_notify(struct xe_bo *bo,
582 			     const struct ttm_operation_ctx *ctx)
583 {
584 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
585 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
586 	struct ttm_resource *old_mem = ttm_bo->resource;
587 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
588 	int ret;
589 
590 	/*
591 	 * If this starts to call into many components, consider
592 	 * using a notification chain here.
593 	 */
594 
595 	if (xe_bo_is_pinned(bo))
596 		return -EINVAL;
597 
598 	xe_bo_vunmap(bo);
599 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
600 	if (ret)
601 		return ret;
602 
603 	/* Don't call move_notify() for imported dma-bufs. */
604 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
605 		dma_buf_move_notify(ttm_bo->base.dma_buf);
606 
607 	/*
608 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
609 	 * so if we moved from VRAM make sure to unlink this from the userfault
610 	 * tracking.
611 	 */
612 	if (mem_type_is_vram(old_mem_type)) {
613 		mutex_lock(&xe->mem_access.vram_userfault.lock);
614 		if (!list_empty(&bo->vram_userfault_link))
615 			list_del_init(&bo->vram_userfault_link);
616 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
617 	}
618 
619 	return 0;
620 }
621 
622 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
623 		      struct ttm_operation_ctx *ctx,
624 		      struct ttm_resource *new_mem,
625 		      struct ttm_place *hop)
626 {
627 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
628 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
629 	struct ttm_resource *old_mem = ttm_bo->resource;
630 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
631 	struct ttm_tt *ttm = ttm_bo->ttm;
632 	struct xe_migrate *migrate = NULL;
633 	struct dma_fence *fence;
634 	bool move_lacks_source;
635 	bool tt_has_data;
636 	bool needs_clear;
637 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
638 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
639 	int ret = 0;
640 	/* Bo creation path, moving to system or TT. */
641 	if ((!old_mem && ttm) && !handle_system_ccs) {
642 		ttm_bo_move_null(ttm_bo, new_mem);
643 		return 0;
644 	}
645 
646 	if (ttm_bo->type == ttm_bo_type_sg) {
647 		ret = xe_bo_move_notify(bo, ctx);
648 		if (!ret)
649 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
650 		goto out;
651 	}
652 
653 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
654 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
655 
656 	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
657 						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
658 
659 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
660 		(!ttm && ttm_bo->type == ttm_bo_type_device);
661 
662 	if ((move_lacks_source && !needs_clear)) {
663 		ttm_bo_move_null(ttm_bo, new_mem);
664 		goto out;
665 	}
666 
667 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
668 		ttm_bo_move_null(ttm_bo, new_mem);
669 		goto out;
670 	}
671 
672 	/*
673 	 * Failed multi-hop where the old_mem is still marked as
674 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
675 	 */
676 	if (old_mem_type == XE_PL_TT &&
677 	    new_mem->mem_type == XE_PL_TT) {
678 		ttm_bo_move_null(ttm_bo, new_mem);
679 		goto out;
680 	}
681 
682 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
683 		ret = xe_bo_move_notify(bo, ctx);
684 		if (ret)
685 			goto out;
686 	}
687 
688 	if (old_mem_type == XE_PL_TT &&
689 	    new_mem->mem_type == XE_PL_SYSTEM) {
690 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
691 						     DMA_RESV_USAGE_BOOKKEEP,
692 						     true,
693 						     MAX_SCHEDULE_TIMEOUT);
694 		if (timeout < 0) {
695 			ret = timeout;
696 			goto out;
697 		}
698 
699 		if (!handle_system_ccs) {
700 			ttm_bo_move_null(ttm_bo, new_mem);
701 			goto out;
702 		}
703 	}
704 
705 	if (!move_lacks_source &&
706 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
707 	     (mem_type_is_vram(old_mem_type) &&
708 	      new_mem->mem_type == XE_PL_SYSTEM))) {
709 		hop->fpfn = 0;
710 		hop->lpfn = 0;
711 		hop->mem_type = XE_PL_TT;
712 		hop->flags = TTM_PL_FLAG_TEMPORARY;
713 		ret = -EMULTIHOP;
714 		goto out;
715 	}
716 
717 	if (bo->tile)
718 		migrate = bo->tile->migrate;
719 	else if (resource_is_vram(new_mem))
720 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
721 	else if (mem_type_is_vram(old_mem_type))
722 		migrate = mem_type_to_migrate(xe, old_mem_type);
723 	else
724 		migrate = xe->tiles[0].migrate;
725 
726 	xe_assert(xe, migrate);
727 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
728 	xe_pm_runtime_get_noresume(xe);
729 
730 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
731 		/*
732 		 * Kernel memory that is pinned should only be moved on suspend
733 		 * / resume, some of the pinned memory is required for the
734 		 * device to resume / use the GPU to move other evicted memory
735 		 * (user memory) around. This likely could be optimized a bit
736 		 * futher where we find the minimum set of pinned memory
737 		 * required for resume but for simplity doing a memcpy for all
738 		 * pinned memory.
739 		 */
740 		ret = xe_bo_vmap(bo);
741 		if (!ret) {
742 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
743 
744 			/* Create a new VMAP once kernel BO back in VRAM */
745 			if (!ret && resource_is_vram(new_mem)) {
746 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
747 				void __iomem *new_addr = vram->mapping +
748 					(new_mem->start << PAGE_SHIFT);
749 
750 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
751 					ret = -EINVAL;
752 					xe_pm_runtime_put(xe);
753 					goto out;
754 				}
755 
756 				xe_assert(xe, new_mem->start ==
757 					  bo->placements->fpfn);
758 
759 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
760 			}
761 		}
762 	} else {
763 		if (move_lacks_source)
764 			fence = xe_migrate_clear(migrate, bo, new_mem);
765 		else
766 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
767 						new_mem, handle_system_ccs);
768 		if (IS_ERR(fence)) {
769 			ret = PTR_ERR(fence);
770 			xe_pm_runtime_put(xe);
771 			goto out;
772 		}
773 		if (!move_lacks_source) {
774 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
775 							true, new_mem);
776 			if (ret) {
777 				dma_fence_wait(fence, false);
778 				ttm_bo_move_null(ttm_bo, new_mem);
779 				ret = 0;
780 			}
781 		} else {
782 			/*
783 			 * ttm_bo_move_accel_cleanup() may blow up if
784 			 * bo->resource == NULL, so just attach the
785 			 * fence and set the new resource.
786 			 */
787 			dma_resv_add_fence(ttm_bo->base.resv, fence,
788 					   DMA_RESV_USAGE_KERNEL);
789 			ttm_bo_move_null(ttm_bo, new_mem);
790 		}
791 
792 		dma_fence_put(fence);
793 	}
794 
795 	xe_pm_runtime_put(xe);
796 
797 out:
798 	return ret;
799 
800 }
801 
802 /**
803  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
804  * @bo: The buffer object to move.
805  *
806  * On successful completion, the object memory will be moved to sytem memory.
807  *
808  * This is needed to for special handling of pinned VRAM object during
809  * suspend-resume.
810  *
811  * Return: 0 on success. Negative error code on failure.
812  */
813 int xe_bo_evict_pinned(struct xe_bo *bo)
814 {
815 	struct ttm_place place = {
816 		.mem_type = XE_PL_TT,
817 	};
818 	struct ttm_placement placement = {
819 		.placement = &place,
820 		.num_placement = 1,
821 	};
822 	struct ttm_operation_ctx ctx = {
823 		.interruptible = false,
824 	};
825 	struct ttm_resource *new_mem;
826 	int ret;
827 
828 	xe_bo_assert_held(bo);
829 
830 	if (WARN_ON(!bo->ttm.resource))
831 		return -EINVAL;
832 
833 	if (WARN_ON(!xe_bo_is_pinned(bo)))
834 		return -EINVAL;
835 
836 	if (WARN_ON(!xe_bo_is_vram(bo)))
837 		return -EINVAL;
838 
839 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
840 	if (ret)
841 		return ret;
842 
843 	if (!bo->ttm.ttm) {
844 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
845 		if (!bo->ttm.ttm) {
846 			ret = -ENOMEM;
847 			goto err_res_free;
848 		}
849 	}
850 
851 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
852 	if (ret)
853 		goto err_res_free;
854 
855 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
856 	if (ret)
857 		goto err_res_free;
858 
859 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
860 	if (ret)
861 		goto err_res_free;
862 
863 	return 0;
864 
865 err_res_free:
866 	ttm_resource_free(&bo->ttm, &new_mem);
867 	return ret;
868 }
869 
870 /**
871  * xe_bo_restore_pinned() - Restore a pinned VRAM object
872  * @bo: The buffer object to move.
873  *
874  * On successful completion, the object memory will be moved back to VRAM.
875  *
876  * This is needed to for special handling of pinned VRAM object during
877  * suspend-resume.
878  *
879  * Return: 0 on success. Negative error code on failure.
880  */
881 int xe_bo_restore_pinned(struct xe_bo *bo)
882 {
883 	struct ttm_operation_ctx ctx = {
884 		.interruptible = false,
885 	};
886 	struct ttm_resource *new_mem;
887 	int ret;
888 
889 	xe_bo_assert_held(bo);
890 
891 	if (WARN_ON(!bo->ttm.resource))
892 		return -EINVAL;
893 
894 	if (WARN_ON(!xe_bo_is_pinned(bo)))
895 		return -EINVAL;
896 
897 	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
898 		return -EINVAL;
899 
900 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
901 	if (ret)
902 		return ret;
903 
904 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
905 	if (ret)
906 		goto err_res_free;
907 
908 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
909 	if (ret)
910 		goto err_res_free;
911 
912 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
913 	if (ret)
914 		goto err_res_free;
915 
916 	return 0;
917 
918 err_res_free:
919 	ttm_resource_free(&bo->ttm, &new_mem);
920 	return ret;
921 }
922 
923 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
924 				       unsigned long page_offset)
925 {
926 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
927 	struct xe_res_cursor cursor;
928 	struct xe_mem_region *vram;
929 
930 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
931 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
932 
933 	vram = res_to_mem_region(ttm_bo->resource);
934 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
935 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
936 }
937 
938 static void __xe_bo_vunmap(struct xe_bo *bo);
939 
940 /*
941  * TODO: Move this function to TTM so we don't rely on how TTM does its
942  * locking, thereby abusing TTM internals.
943  */
944 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
945 {
946 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
947 	bool locked;
948 
949 	xe_assert(xe, !kref_read(&ttm_bo->kref));
950 
951 	/*
952 	 * We can typically only race with TTM trylocking under the
953 	 * lru_lock, which will immediately be unlocked again since
954 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
955 	 * always succeed here, as long as we hold the lru lock.
956 	 */
957 	spin_lock(&ttm_bo->bdev->lru_lock);
958 	locked = dma_resv_trylock(ttm_bo->base.resv);
959 	spin_unlock(&ttm_bo->bdev->lru_lock);
960 	xe_assert(xe, locked);
961 
962 	return locked;
963 }
964 
965 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
966 {
967 	struct dma_resv_iter cursor;
968 	struct dma_fence *fence;
969 	struct dma_fence *replacement = NULL;
970 	struct xe_bo *bo;
971 
972 	if (!xe_bo_is_xe_bo(ttm_bo))
973 		return;
974 
975 	bo = ttm_to_xe_bo(ttm_bo);
976 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
977 
978 	/*
979 	 * Corner case where TTM fails to allocate memory and this BOs resv
980 	 * still points the VMs resv
981 	 */
982 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
983 		return;
984 
985 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
986 		return;
987 
988 	/*
989 	 * Scrub the preempt fences if any. The unbind fence is already
990 	 * attached to the resv.
991 	 * TODO: Don't do this for external bos once we scrub them after
992 	 * unbind.
993 	 */
994 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
995 				DMA_RESV_USAGE_BOOKKEEP, fence) {
996 		if (xe_fence_is_xe_preempt(fence) &&
997 		    !dma_fence_is_signaled(fence)) {
998 			if (!replacement)
999 				replacement = dma_fence_get_stub();
1000 
1001 			dma_resv_replace_fences(ttm_bo->base.resv,
1002 						fence->context,
1003 						replacement,
1004 						DMA_RESV_USAGE_BOOKKEEP);
1005 		}
1006 	}
1007 	dma_fence_put(replacement);
1008 
1009 	dma_resv_unlock(ttm_bo->base.resv);
1010 }
1011 
1012 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1013 {
1014 	if (!xe_bo_is_xe_bo(ttm_bo))
1015 		return;
1016 
1017 	/*
1018 	 * Object is idle and about to be destroyed. Release the
1019 	 * dma-buf attachment.
1020 	 */
1021 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1022 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1023 						       struct xe_ttm_tt, ttm);
1024 
1025 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1026 					 DMA_BIDIRECTIONAL);
1027 		ttm_bo->sg = NULL;
1028 		xe_tt->sg = NULL;
1029 	}
1030 }
1031 
1032 const struct ttm_device_funcs xe_ttm_funcs = {
1033 	.ttm_tt_create = xe_ttm_tt_create,
1034 	.ttm_tt_populate = xe_ttm_tt_populate,
1035 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1036 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1037 	.evict_flags = xe_evict_flags,
1038 	.move = xe_bo_move,
1039 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1040 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1041 	.release_notify = xe_ttm_bo_release_notify,
1042 	.eviction_valuable = ttm_bo_eviction_valuable,
1043 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1044 };
1045 
1046 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1047 {
1048 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1049 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1050 
1051 	if (bo->ttm.base.import_attach)
1052 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1053 	drm_gem_object_release(&bo->ttm.base);
1054 
1055 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1056 
1057 	if (bo->ggtt_node.size)
1058 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1059 
1060 #ifdef CONFIG_PROC_FS
1061 	if (bo->client)
1062 		xe_drm_client_remove_bo(bo);
1063 #endif
1064 
1065 	if (bo->vm && xe_bo_is_user(bo))
1066 		xe_vm_put(bo->vm);
1067 
1068 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1069 	if (!list_empty(&bo->vram_userfault_link))
1070 		list_del(&bo->vram_userfault_link);
1071 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1072 
1073 	kfree(bo);
1074 }
1075 
1076 static void xe_gem_object_free(struct drm_gem_object *obj)
1077 {
1078 	/* Our BO reference counting scheme works as follows:
1079 	 *
1080 	 * The gem object kref is typically used throughout the driver,
1081 	 * and the gem object holds a ttm_buffer_object refcount, so
1082 	 * that when the last gem object reference is put, which is when
1083 	 * we end up in this function, we put also that ttm_buffer_object
1084 	 * refcount. Anything using gem interfaces is then no longer
1085 	 * allowed to access the object in a way that requires a gem
1086 	 * refcount, including locking the object.
1087 	 *
1088 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1089 	 * refcount directly if needed.
1090 	 */
1091 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1092 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1093 }
1094 
1095 static void xe_gem_object_close(struct drm_gem_object *obj,
1096 				struct drm_file *file_priv)
1097 {
1098 	struct xe_bo *bo = gem_to_xe_bo(obj);
1099 
1100 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1101 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1102 
1103 		xe_bo_lock(bo, false);
1104 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1105 		xe_bo_unlock(bo);
1106 	}
1107 }
1108 
1109 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1110 {
1111 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1112 	struct drm_device *ddev = tbo->base.dev;
1113 	struct xe_device *xe = to_xe_device(ddev);
1114 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1115 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1116 	vm_fault_t ret;
1117 	int idx;
1118 
1119 	if (needs_rpm)
1120 		xe_pm_runtime_get(xe);
1121 
1122 	ret = ttm_bo_vm_reserve(tbo, vmf);
1123 	if (ret)
1124 		goto out;
1125 
1126 	if (drm_dev_enter(ddev, &idx)) {
1127 		trace_xe_bo_cpu_fault(bo);
1128 
1129 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1130 					       TTM_BO_VM_NUM_PREFAULT);
1131 		drm_dev_exit(idx);
1132 	} else {
1133 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1134 	}
1135 
1136 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1137 		goto out;
1138 	/*
1139 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1140 	 */
1141 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1142 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1143 		if (list_empty(&bo->vram_userfault_link))
1144 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1145 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1146 	}
1147 
1148 	dma_resv_unlock(tbo->base.resv);
1149 out:
1150 	if (needs_rpm)
1151 		xe_pm_runtime_put(xe);
1152 
1153 	return ret;
1154 }
1155 
1156 static const struct vm_operations_struct xe_gem_vm_ops = {
1157 	.fault = xe_gem_fault,
1158 	.open = ttm_bo_vm_open,
1159 	.close = ttm_bo_vm_close,
1160 	.access = ttm_bo_vm_access
1161 };
1162 
1163 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1164 	.free = xe_gem_object_free,
1165 	.close = xe_gem_object_close,
1166 	.mmap = drm_gem_ttm_mmap,
1167 	.export = xe_gem_prime_export,
1168 	.vm_ops = &xe_gem_vm_ops,
1169 };
1170 
1171 /**
1172  * xe_bo_alloc - Allocate storage for a struct xe_bo
1173  *
1174  * This funcition is intended to allocate storage to be used for input
1175  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1176  * created is needed before the call to __xe_bo_create_locked().
1177  * If __xe_bo_create_locked ends up never to be called, then the
1178  * storage allocated with this function needs to be freed using
1179  * xe_bo_free().
1180  *
1181  * Return: A pointer to an uninitialized struct xe_bo on success,
1182  * ERR_PTR(-ENOMEM) on error.
1183  */
1184 struct xe_bo *xe_bo_alloc(void)
1185 {
1186 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1187 
1188 	if (!bo)
1189 		return ERR_PTR(-ENOMEM);
1190 
1191 	return bo;
1192 }
1193 
1194 /**
1195  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1196  * @bo: The buffer object storage.
1197  *
1198  * Refer to xe_bo_alloc() documentation for valid use-cases.
1199  */
1200 void xe_bo_free(struct xe_bo *bo)
1201 {
1202 	kfree(bo);
1203 }
1204 
1205 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1206 				     struct xe_tile *tile, struct dma_resv *resv,
1207 				     struct ttm_lru_bulk_move *bulk, size_t size,
1208 				     u16 cpu_caching, enum ttm_bo_type type,
1209 				     u32 flags)
1210 {
1211 	struct ttm_operation_ctx ctx = {
1212 		.interruptible = true,
1213 		.no_wait_gpu = false,
1214 	};
1215 	struct ttm_placement *placement;
1216 	uint32_t alignment;
1217 	size_t aligned_size;
1218 	int err;
1219 
1220 	/* Only kernel objects should set GT */
1221 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1222 
1223 	if (XE_WARN_ON(!size)) {
1224 		xe_bo_free(bo);
1225 		return ERR_PTR(-EINVAL);
1226 	}
1227 
1228 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1229 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1230 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1231 	     (flags & XE_BO_NEEDS_64K))) {
1232 		aligned_size = ALIGN(size, SZ_64K);
1233 		if (type != ttm_bo_type_device)
1234 			size = ALIGN(size, SZ_64K);
1235 		flags |= XE_BO_FLAG_INTERNAL_64K;
1236 		alignment = SZ_64K >> PAGE_SHIFT;
1237 
1238 	} else {
1239 		aligned_size = ALIGN(size, SZ_4K);
1240 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1241 		alignment = SZ_4K >> PAGE_SHIFT;
1242 	}
1243 
1244 	if (type == ttm_bo_type_device && aligned_size != size)
1245 		return ERR_PTR(-EINVAL);
1246 
1247 	if (!bo) {
1248 		bo = xe_bo_alloc();
1249 		if (IS_ERR(bo))
1250 			return bo;
1251 	}
1252 
1253 	bo->ccs_cleared = false;
1254 	bo->tile = tile;
1255 	bo->size = size;
1256 	bo->flags = flags;
1257 	bo->cpu_caching = cpu_caching;
1258 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1259 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1260 	INIT_LIST_HEAD(&bo->pinned_link);
1261 #ifdef CONFIG_PROC_FS
1262 	INIT_LIST_HEAD(&bo->client_link);
1263 #endif
1264 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1265 
1266 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1267 
1268 	if (resv) {
1269 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1270 		ctx.resv = resv;
1271 	}
1272 
1273 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1274 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1275 		if (WARN_ON(err)) {
1276 			xe_ttm_bo_destroy(&bo->ttm);
1277 			return ERR_PTR(err);
1278 		}
1279 	}
1280 
1281 	/* Defer populating type_sg bos */
1282 	placement = (type == ttm_bo_type_sg ||
1283 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1284 		&bo->placement;
1285 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1286 				   placement, alignment,
1287 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1288 	if (err)
1289 		return ERR_PTR(err);
1290 
1291 	/*
1292 	 * The VRAM pages underneath are potentially still being accessed by the
1293 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1294 	 * sure to add any corresponding move/clear fences into the objects
1295 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1296 	 *
1297 	 * For KMD internal buffers we don't care about GPU clearing, however we
1298 	 * still need to handle async evictions, where the VRAM is still being
1299 	 * accessed by the GPU. Most internal callers are not expecting this,
1300 	 * since they are missing the required synchronisation before accessing
1301 	 * the memory. To keep things simple just sync wait any kernel fences
1302 	 * here, if the buffer is designated KMD internal.
1303 	 *
1304 	 * For normal userspace objects we should already have the required
1305 	 * pipelining or sync waiting elsewhere, since we already have to deal
1306 	 * with things like async GPU clearing.
1307 	 */
1308 	if (type == ttm_bo_type_kernel) {
1309 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1310 						     DMA_RESV_USAGE_KERNEL,
1311 						     ctx.interruptible,
1312 						     MAX_SCHEDULE_TIMEOUT);
1313 
1314 		if (timeout < 0) {
1315 			if (!resv)
1316 				dma_resv_unlock(bo->ttm.base.resv);
1317 			xe_bo_put(bo);
1318 			return ERR_PTR(timeout);
1319 		}
1320 	}
1321 
1322 	bo->created = true;
1323 	if (bulk)
1324 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1325 	else
1326 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1327 
1328 	return bo;
1329 }
1330 
1331 static int __xe_bo_fixed_placement(struct xe_device *xe,
1332 				   struct xe_bo *bo,
1333 				   u32 flags,
1334 				   u64 start, u64 end, u64 size)
1335 {
1336 	struct ttm_place *place = bo->placements;
1337 
1338 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1339 		return -EINVAL;
1340 
1341 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1342 	place->fpfn = start >> PAGE_SHIFT;
1343 	place->lpfn = end >> PAGE_SHIFT;
1344 
1345 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1346 	case XE_BO_FLAG_VRAM0:
1347 		place->mem_type = XE_PL_VRAM0;
1348 		break;
1349 	case XE_BO_FLAG_VRAM1:
1350 		place->mem_type = XE_PL_VRAM1;
1351 		break;
1352 	case XE_BO_FLAG_STOLEN:
1353 		place->mem_type = XE_PL_STOLEN;
1354 		break;
1355 
1356 	default:
1357 		/* 0 or multiple of the above set */
1358 		return -EINVAL;
1359 	}
1360 
1361 	bo->placement = (struct ttm_placement) {
1362 		.num_placement = 1,
1363 		.placement = place,
1364 	};
1365 
1366 	return 0;
1367 }
1368 
1369 static struct xe_bo *
1370 __xe_bo_create_locked(struct xe_device *xe,
1371 		      struct xe_tile *tile, struct xe_vm *vm,
1372 		      size_t size, u64 start, u64 end,
1373 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1374 {
1375 	struct xe_bo *bo = NULL;
1376 	int err;
1377 
1378 	if (vm)
1379 		xe_vm_assert_held(vm);
1380 
1381 	if (start || end != ~0ULL) {
1382 		bo = xe_bo_alloc();
1383 		if (IS_ERR(bo))
1384 			return bo;
1385 
1386 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1387 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1388 		if (err) {
1389 			xe_bo_free(bo);
1390 			return ERR_PTR(err);
1391 		}
1392 	}
1393 
1394 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1395 				    vm && !xe_vm_in_fault_mode(vm) &&
1396 				    flags & XE_BO_FLAG_USER ?
1397 				    &vm->lru_bulk_move : NULL, size,
1398 				    cpu_caching, type, flags);
1399 	if (IS_ERR(bo))
1400 		return bo;
1401 
1402 	/*
1403 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1404 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1405 	 * will keep a reference to the vm, and avoid circular references
1406 	 * by having all the vm's bo refereferences released at vm close
1407 	 * time.
1408 	 */
1409 	if (vm && xe_bo_is_user(bo))
1410 		xe_vm_get(vm);
1411 	bo->vm = vm;
1412 
1413 	if (bo->flags & XE_BO_FLAG_GGTT) {
1414 		if (!tile && flags & XE_BO_FLAG_STOLEN)
1415 			tile = xe_device_get_root_tile(xe);
1416 
1417 		xe_assert(xe, tile);
1418 
1419 		if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1420 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1421 						   start + bo->size, U64_MAX);
1422 		} else {
1423 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1424 		}
1425 		if (err)
1426 			goto err_unlock_put_bo;
1427 	}
1428 
1429 	return bo;
1430 
1431 err_unlock_put_bo:
1432 	__xe_bo_unset_bulk_move(bo);
1433 	xe_bo_unlock_vm_held(bo);
1434 	xe_bo_put(bo);
1435 	return ERR_PTR(err);
1436 }
1437 
1438 struct xe_bo *
1439 xe_bo_create_locked_range(struct xe_device *xe,
1440 			  struct xe_tile *tile, struct xe_vm *vm,
1441 			  size_t size, u64 start, u64 end,
1442 			  enum ttm_bo_type type, u32 flags)
1443 {
1444 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1445 }
1446 
1447 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1448 				  struct xe_vm *vm, size_t size,
1449 				  enum ttm_bo_type type, u32 flags)
1450 {
1451 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1452 }
1453 
1454 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1455 				struct xe_vm *vm, size_t size,
1456 				u16 cpu_caching,
1457 				enum ttm_bo_type type,
1458 				u32 flags)
1459 {
1460 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1461 						 cpu_caching, type,
1462 						 flags | XE_BO_FLAG_USER);
1463 	if (!IS_ERR(bo))
1464 		xe_bo_unlock_vm_held(bo);
1465 
1466 	return bo;
1467 }
1468 
1469 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1470 			   struct xe_vm *vm, size_t size,
1471 			   enum ttm_bo_type type, u32 flags)
1472 {
1473 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1474 
1475 	if (!IS_ERR(bo))
1476 		xe_bo_unlock_vm_held(bo);
1477 
1478 	return bo;
1479 }
1480 
1481 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1482 				      struct xe_vm *vm,
1483 				      size_t size, u64 offset,
1484 				      enum ttm_bo_type type, u32 flags)
1485 {
1486 	struct xe_bo *bo;
1487 	int err;
1488 	u64 start = offset == ~0ull ? 0 : offset;
1489 	u64 end = offset == ~0ull ? offset : start + size;
1490 
1491 	if (flags & XE_BO_FLAG_STOLEN &&
1492 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1493 		flags |= XE_BO_FLAG_GGTT;
1494 
1495 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1496 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
1497 	if (IS_ERR(bo))
1498 		return bo;
1499 
1500 	err = xe_bo_pin(bo);
1501 	if (err)
1502 		goto err_put;
1503 
1504 	err = xe_bo_vmap(bo);
1505 	if (err)
1506 		goto err_unpin;
1507 
1508 	xe_bo_unlock_vm_held(bo);
1509 
1510 	return bo;
1511 
1512 err_unpin:
1513 	xe_bo_unpin(bo);
1514 err_put:
1515 	xe_bo_unlock_vm_held(bo);
1516 	xe_bo_put(bo);
1517 	return ERR_PTR(err);
1518 }
1519 
1520 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1521 				   struct xe_vm *vm, size_t size,
1522 				   enum ttm_bo_type type, u32 flags)
1523 {
1524 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1525 }
1526 
1527 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1528 				     const void *data, size_t size,
1529 				     enum ttm_bo_type type, u32 flags)
1530 {
1531 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1532 						ALIGN(size, PAGE_SIZE),
1533 						type, flags);
1534 	if (IS_ERR(bo))
1535 		return bo;
1536 
1537 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1538 
1539 	return bo;
1540 }
1541 
1542 static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1543 {
1544 	xe_bo_unpin_map_no_vm(arg);
1545 }
1546 
1547 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1548 					   size_t size, u32 flags)
1549 {
1550 	struct xe_bo *bo;
1551 	int ret;
1552 
1553 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1554 	if (IS_ERR(bo))
1555 		return bo;
1556 
1557 	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1558 	if (ret)
1559 		return ERR_PTR(ret);
1560 
1561 	return bo;
1562 }
1563 
1564 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1565 					     const void *data, size_t size, u32 flags)
1566 {
1567 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1568 
1569 	if (IS_ERR(bo))
1570 		return bo;
1571 
1572 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1573 
1574 	return bo;
1575 }
1576 
1577 /**
1578  * xe_managed_bo_reinit_in_vram
1579  * @xe: xe device
1580  * @tile: Tile where the new buffer will be created
1581  * @src: Managed buffer object allocated in system memory
1582  *
1583  * Replace a managed src buffer object allocated in system memory with a new
1584  * one allocated in vram, copying the data between them.
1585  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1586  * is responsible for making sure that any old references to it are updated.
1587  *
1588  * Returns 0 for success, negative error code otherwise.
1589  */
1590 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1591 {
1592 	struct xe_bo *bo;
1593 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1594 
1595 	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1596 
1597 	xe_assert(xe, IS_DGFX(xe));
1598 	xe_assert(xe, !(*src)->vmap.is_iomem);
1599 
1600 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1601 					    (*src)->size, dst_flags);
1602 	if (IS_ERR(bo))
1603 		return PTR_ERR(bo);
1604 
1605 	drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
1606 	*src = bo;
1607 
1608 	return 0;
1609 }
1610 
1611 /*
1612  * XXX: This is in the VM bind data path, likely should calculate this once and
1613  * store, with a recalculation if the BO is moved.
1614  */
1615 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1616 {
1617 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1618 
1619 	if (res->mem_type == XE_PL_STOLEN)
1620 		return xe_ttm_stolen_gpu_offset(xe);
1621 
1622 	return res_to_mem_region(res)->dpa_base;
1623 }
1624 
1625 /**
1626  * xe_bo_pin_external - pin an external BO
1627  * @bo: buffer object to be pinned
1628  *
1629  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1630  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1631  * asserts and code to ensure evict / restore on suspend / resume.
1632  *
1633  * Returns 0 for success, negative error code otherwise.
1634  */
1635 int xe_bo_pin_external(struct xe_bo *bo)
1636 {
1637 	struct xe_device *xe = xe_bo_device(bo);
1638 	int err;
1639 
1640 	xe_assert(xe, !bo->vm);
1641 	xe_assert(xe, xe_bo_is_user(bo));
1642 
1643 	if (!xe_bo_is_pinned(bo)) {
1644 		err = xe_bo_validate(bo, NULL, false);
1645 		if (err)
1646 			return err;
1647 
1648 		if (xe_bo_is_vram(bo)) {
1649 			spin_lock(&xe->pinned.lock);
1650 			list_add_tail(&bo->pinned_link,
1651 				      &xe->pinned.external_vram);
1652 			spin_unlock(&xe->pinned.lock);
1653 		}
1654 	}
1655 
1656 	ttm_bo_pin(&bo->ttm);
1657 
1658 	/*
1659 	 * FIXME: If we always use the reserve / unreserve functions for locking
1660 	 * we do not need this.
1661 	 */
1662 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1663 
1664 	return 0;
1665 }
1666 
1667 int xe_bo_pin(struct xe_bo *bo)
1668 {
1669 	struct xe_device *xe = xe_bo_device(bo);
1670 	int err;
1671 
1672 	/* We currently don't expect user BO to be pinned */
1673 	xe_assert(xe, !xe_bo_is_user(bo));
1674 
1675 	/* Pinned object must be in GGTT or have pinned flag */
1676 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
1677 				   XE_BO_FLAG_GGTT));
1678 
1679 	/*
1680 	 * No reason we can't support pinning imported dma-bufs we just don't
1681 	 * expect to pin an imported dma-buf.
1682 	 */
1683 	xe_assert(xe, !bo->ttm.base.import_attach);
1684 
1685 	/* We only expect at most 1 pin */
1686 	xe_assert(xe, !xe_bo_is_pinned(bo));
1687 
1688 	err = xe_bo_validate(bo, NULL, false);
1689 	if (err)
1690 		return err;
1691 
1692 	/*
1693 	 * For pinned objects in on DGFX, which are also in vram, we expect
1694 	 * these to be in contiguous VRAM memory. Required eviction / restore
1695 	 * during suspend / resume (force restore to same physical address).
1696 	 */
1697 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1698 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1699 		struct ttm_place *place = &(bo->placements[0]);
1700 
1701 		if (mem_type_is_vram(place->mem_type)) {
1702 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1703 
1704 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1705 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1706 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1707 
1708 			spin_lock(&xe->pinned.lock);
1709 			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1710 			spin_unlock(&xe->pinned.lock);
1711 		}
1712 	}
1713 
1714 	ttm_bo_pin(&bo->ttm);
1715 
1716 	/*
1717 	 * FIXME: If we always use the reserve / unreserve functions for locking
1718 	 * we do not need this.
1719 	 */
1720 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1721 
1722 	return 0;
1723 }
1724 
1725 /**
1726  * xe_bo_unpin_external - unpin an external BO
1727  * @bo: buffer object to be unpinned
1728  *
1729  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1730  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1731  * asserts and code to ensure evict / restore on suspend / resume.
1732  *
1733  * Returns 0 for success, negative error code otherwise.
1734  */
1735 void xe_bo_unpin_external(struct xe_bo *bo)
1736 {
1737 	struct xe_device *xe = xe_bo_device(bo);
1738 
1739 	xe_assert(xe, !bo->vm);
1740 	xe_assert(xe, xe_bo_is_pinned(bo));
1741 	xe_assert(xe, xe_bo_is_user(bo));
1742 
1743 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1744 		spin_lock(&xe->pinned.lock);
1745 		list_del_init(&bo->pinned_link);
1746 		spin_unlock(&xe->pinned.lock);
1747 	}
1748 
1749 	ttm_bo_unpin(&bo->ttm);
1750 
1751 	/*
1752 	 * FIXME: If we always use the reserve / unreserve functions for locking
1753 	 * we do not need this.
1754 	 */
1755 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1756 }
1757 
1758 void xe_bo_unpin(struct xe_bo *bo)
1759 {
1760 	struct xe_device *xe = xe_bo_device(bo);
1761 
1762 	xe_assert(xe, !bo->ttm.base.import_attach);
1763 	xe_assert(xe, xe_bo_is_pinned(bo));
1764 
1765 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1766 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1767 		struct ttm_place *place = &(bo->placements[0]);
1768 
1769 		if (mem_type_is_vram(place->mem_type)) {
1770 			xe_assert(xe, !list_empty(&bo->pinned_link));
1771 
1772 			spin_lock(&xe->pinned.lock);
1773 			list_del_init(&bo->pinned_link);
1774 			spin_unlock(&xe->pinned.lock);
1775 		}
1776 	}
1777 
1778 	ttm_bo_unpin(&bo->ttm);
1779 }
1780 
1781 /**
1782  * xe_bo_validate() - Make sure the bo is in an allowed placement
1783  * @bo: The bo,
1784  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1785  *      NULL. Used together with @allow_res_evict.
1786  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1787  *                   reservation object.
1788  *
1789  * Make sure the bo is in allowed placement, migrating it if necessary. If
1790  * needed, other bos will be evicted. If bos selected for eviction shares
1791  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1792  * set to true, otherwise they will be bypassed.
1793  *
1794  * Return: 0 on success, negative error code on failure. May return
1795  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1796  */
1797 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1798 {
1799 	struct ttm_operation_ctx ctx = {
1800 		.interruptible = true,
1801 		.no_wait_gpu = false,
1802 	};
1803 
1804 	if (vm) {
1805 		lockdep_assert_held(&vm->lock);
1806 		xe_vm_assert_held(vm);
1807 
1808 		ctx.allow_res_evict = allow_res_evict;
1809 		ctx.resv = xe_vm_resv(vm);
1810 	}
1811 
1812 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1813 }
1814 
1815 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1816 {
1817 	if (bo->destroy == &xe_ttm_bo_destroy)
1818 		return true;
1819 
1820 	return false;
1821 }
1822 
1823 /*
1824  * Resolve a BO address. There is no assert to check if the proper lock is held
1825  * so it should only be used in cases where it is not fatal to get the wrong
1826  * address, such as printing debug information, but not in cases where memory is
1827  * written based on this result.
1828  */
1829 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1830 {
1831 	struct xe_device *xe = xe_bo_device(bo);
1832 	struct xe_res_cursor cur;
1833 	u64 page;
1834 
1835 	xe_assert(xe, page_size <= PAGE_SIZE);
1836 	page = offset >> PAGE_SHIFT;
1837 	offset &= (PAGE_SIZE - 1);
1838 
1839 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1840 		xe_assert(xe, bo->ttm.ttm);
1841 
1842 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1843 				page_size, &cur);
1844 		return xe_res_dma(&cur) + offset;
1845 	} else {
1846 		struct xe_res_cursor cur;
1847 
1848 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1849 			     page_size, &cur);
1850 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1851 	}
1852 }
1853 
1854 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1855 {
1856 	if (!READ_ONCE(bo->ttm.pin_count))
1857 		xe_bo_assert_held(bo);
1858 	return __xe_bo_addr(bo, offset, page_size);
1859 }
1860 
1861 int xe_bo_vmap(struct xe_bo *bo)
1862 {
1863 	void *virtual;
1864 	bool is_iomem;
1865 	int ret;
1866 
1867 	xe_bo_assert_held(bo);
1868 
1869 	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
1870 		return -EINVAL;
1871 
1872 	if (!iosys_map_is_null(&bo->vmap))
1873 		return 0;
1874 
1875 	/*
1876 	 * We use this more or less deprecated interface for now since
1877 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1878 	 * single page bos, which is done here.
1879 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1880 	 * to use struct iosys_map.
1881 	 */
1882 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1883 	if (ret)
1884 		return ret;
1885 
1886 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1887 	if (is_iomem)
1888 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1889 	else
1890 		iosys_map_set_vaddr(&bo->vmap, virtual);
1891 
1892 	return 0;
1893 }
1894 
1895 static void __xe_bo_vunmap(struct xe_bo *bo)
1896 {
1897 	if (!iosys_map_is_null(&bo->vmap)) {
1898 		iosys_map_clear(&bo->vmap);
1899 		ttm_bo_kunmap(&bo->kmap);
1900 	}
1901 }
1902 
1903 void xe_bo_vunmap(struct xe_bo *bo)
1904 {
1905 	xe_bo_assert_held(bo);
1906 	__xe_bo_vunmap(bo);
1907 }
1908 
1909 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1910 			struct drm_file *file)
1911 {
1912 	struct xe_device *xe = to_xe_device(dev);
1913 	struct xe_file *xef = to_xe_file(file);
1914 	struct drm_xe_gem_create *args = data;
1915 	struct xe_vm *vm = NULL;
1916 	struct xe_bo *bo;
1917 	unsigned int bo_flags;
1918 	u32 handle;
1919 	int err;
1920 
1921 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1922 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1923 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1924 		return -EINVAL;
1925 
1926 	/* at least one valid memory placement must be specified */
1927 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1928 			 !args->placement))
1929 		return -EINVAL;
1930 
1931 	if (XE_IOCTL_DBG(xe, args->flags &
1932 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1933 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1934 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1935 		return -EINVAL;
1936 
1937 	if (XE_IOCTL_DBG(xe, args->handle))
1938 		return -EINVAL;
1939 
1940 	if (XE_IOCTL_DBG(xe, !args->size))
1941 		return -EINVAL;
1942 
1943 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1944 		return -EINVAL;
1945 
1946 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1947 		return -EINVAL;
1948 
1949 	bo_flags = 0;
1950 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1951 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
1952 
1953 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1954 		bo_flags |= XE_BO_FLAG_SCANOUT;
1955 
1956 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
1957 
1958 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1959 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
1960 			return -EINVAL;
1961 
1962 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
1963 	}
1964 
1965 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1966 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1967 		return -EINVAL;
1968 
1969 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
1970 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1971 		return -EINVAL;
1972 
1973 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
1974 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1975 		return -EINVAL;
1976 
1977 	if (args->vm_id) {
1978 		vm = xe_vm_lookup(xef, args->vm_id);
1979 		if (XE_IOCTL_DBG(xe, !vm))
1980 			return -ENOENT;
1981 		err = xe_vm_lock(vm, true);
1982 		if (err)
1983 			goto out_vm;
1984 	}
1985 
1986 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1987 			       ttm_bo_type_device, bo_flags);
1988 
1989 	if (vm)
1990 		xe_vm_unlock(vm);
1991 
1992 	if (IS_ERR(bo)) {
1993 		err = PTR_ERR(bo);
1994 		goto out_vm;
1995 	}
1996 
1997 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1998 	if (err)
1999 		goto out_bulk;
2000 
2001 	args->handle = handle;
2002 	goto out_put;
2003 
2004 out_bulk:
2005 	if (vm && !xe_vm_in_fault_mode(vm)) {
2006 		xe_vm_lock(vm, false);
2007 		__xe_bo_unset_bulk_move(bo);
2008 		xe_vm_unlock(vm);
2009 	}
2010 out_put:
2011 	xe_bo_put(bo);
2012 out_vm:
2013 	if (vm)
2014 		xe_vm_put(vm);
2015 
2016 	return err;
2017 }
2018 
2019 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2020 			     struct drm_file *file)
2021 {
2022 	struct xe_device *xe = to_xe_device(dev);
2023 	struct drm_xe_gem_mmap_offset *args = data;
2024 	struct drm_gem_object *gem_obj;
2025 
2026 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2027 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2028 		return -EINVAL;
2029 
2030 	if (XE_IOCTL_DBG(xe, args->flags))
2031 		return -EINVAL;
2032 
2033 	gem_obj = drm_gem_object_lookup(file, args->handle);
2034 	if (XE_IOCTL_DBG(xe, !gem_obj))
2035 		return -ENOENT;
2036 
2037 	/* The mmap offset was set up at BO allocation time. */
2038 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2039 
2040 	xe_bo_put(gem_to_xe_bo(gem_obj));
2041 	return 0;
2042 }
2043 
2044 /**
2045  * xe_bo_lock() - Lock the buffer object's dma_resv object
2046  * @bo: The struct xe_bo whose lock is to be taken
2047  * @intr: Whether to perform any wait interruptible
2048  *
2049  * Locks the buffer object's dma_resv object. If the buffer object is
2050  * pointing to a shared dma_resv object, that shared lock is locked.
2051  *
2052  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2053  * contended lock was interrupted. If @intr is set to false, the
2054  * function always returns 0.
2055  */
2056 int xe_bo_lock(struct xe_bo *bo, bool intr)
2057 {
2058 	if (intr)
2059 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2060 
2061 	dma_resv_lock(bo->ttm.base.resv, NULL);
2062 
2063 	return 0;
2064 }
2065 
2066 /**
2067  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2068  * @bo: The struct xe_bo whose lock is to be released.
2069  *
2070  * Unlock a buffer object lock that was locked by xe_bo_lock().
2071  */
2072 void xe_bo_unlock(struct xe_bo *bo)
2073 {
2074 	dma_resv_unlock(bo->ttm.base.resv);
2075 }
2076 
2077 /**
2078  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2079  * @bo: The buffer object to migrate
2080  * @mem_type: The TTM memory type intended to migrate to
2081  *
2082  * Check whether the buffer object supports migration to the
2083  * given memory type. Note that pinning may affect the ability to migrate as
2084  * returned by this function.
2085  *
2086  * This function is primarily intended as a helper for checking the
2087  * possibility to migrate buffer objects and can be called without
2088  * the object lock held.
2089  *
2090  * Return: true if migration is possible, false otherwise.
2091  */
2092 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2093 {
2094 	unsigned int cur_place;
2095 
2096 	if (bo->ttm.type == ttm_bo_type_kernel)
2097 		return true;
2098 
2099 	if (bo->ttm.type == ttm_bo_type_sg)
2100 		return false;
2101 
2102 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2103 	     cur_place++) {
2104 		if (bo->placements[cur_place].mem_type == mem_type)
2105 			return true;
2106 	}
2107 
2108 	return false;
2109 }
2110 
2111 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2112 {
2113 	memset(place, 0, sizeof(*place));
2114 	place->mem_type = mem_type;
2115 }
2116 
2117 /**
2118  * xe_bo_migrate - Migrate an object to the desired region id
2119  * @bo: The buffer object to migrate.
2120  * @mem_type: The TTM region type to migrate to.
2121  *
2122  * Attempt to migrate the buffer object to the desired memory region. The
2123  * buffer object may not be pinned, and must be locked.
2124  * On successful completion, the object memory type will be updated,
2125  * but an async migration task may not have completed yet, and to
2126  * accomplish that, the object's kernel fences must be signaled with
2127  * the object lock held.
2128  *
2129  * Return: 0 on success. Negative error code on failure. In particular may
2130  * return -EINTR or -ERESTARTSYS if signal pending.
2131  */
2132 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2133 {
2134 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2135 	struct ttm_operation_ctx ctx = {
2136 		.interruptible = true,
2137 		.no_wait_gpu = false,
2138 	};
2139 	struct ttm_placement placement;
2140 	struct ttm_place requested;
2141 
2142 	xe_bo_assert_held(bo);
2143 
2144 	if (bo->ttm.resource->mem_type == mem_type)
2145 		return 0;
2146 
2147 	if (xe_bo_is_pinned(bo))
2148 		return -EBUSY;
2149 
2150 	if (!xe_bo_can_migrate(bo, mem_type))
2151 		return -EINVAL;
2152 
2153 	xe_place_from_ttm_type(mem_type, &requested);
2154 	placement.num_placement = 1;
2155 	placement.placement = &requested;
2156 
2157 	/*
2158 	 * Stolen needs to be handled like below VRAM handling if we ever need
2159 	 * to support it.
2160 	 */
2161 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2162 
2163 	if (mem_type_is_vram(mem_type)) {
2164 		u32 c = 0;
2165 
2166 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2167 	}
2168 
2169 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2170 }
2171 
2172 /**
2173  * xe_bo_evict - Evict an object to evict placement
2174  * @bo: The buffer object to migrate.
2175  * @force_alloc: Set force_alloc in ttm_operation_ctx
2176  *
2177  * On successful completion, the object memory will be moved to evict
2178  * placement. Ths function blocks until the object has been fully moved.
2179  *
2180  * Return: 0 on success. Negative error code on failure.
2181  */
2182 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2183 {
2184 	struct ttm_operation_ctx ctx = {
2185 		.interruptible = false,
2186 		.no_wait_gpu = false,
2187 		.force_alloc = force_alloc,
2188 	};
2189 	struct ttm_placement placement;
2190 	int ret;
2191 
2192 	xe_evict_flags(&bo->ttm, &placement);
2193 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2194 	if (ret)
2195 		return ret;
2196 
2197 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2198 			      false, MAX_SCHEDULE_TIMEOUT);
2199 
2200 	return 0;
2201 }
2202 
2203 /**
2204  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2205  * placed in system memory.
2206  * @bo: The xe_bo
2207  *
2208  * Return: true if extra pages need to be allocated, false otherwise.
2209  */
2210 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2211 {
2212 	struct xe_device *xe = xe_bo_device(bo);
2213 
2214 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2215 		return false;
2216 
2217 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2218 		return false;
2219 
2220 	/* On discrete GPUs, if the GPU can access this buffer from
2221 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2222 	 * can't be used since there's no CCS storage associated with
2223 	 * non-VRAM addresses.
2224 	 */
2225 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2226 		return false;
2227 
2228 	return true;
2229 }
2230 
2231 /**
2232  * __xe_bo_release_dummy() - Dummy kref release function
2233  * @kref: The embedded struct kref.
2234  *
2235  * Dummy release function for xe_bo_put_deferred(). Keep off.
2236  */
2237 void __xe_bo_release_dummy(struct kref *kref)
2238 {
2239 }
2240 
2241 /**
2242  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2243  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2244  *
2245  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2246  * The @deferred list can be either an onstack local list or a global
2247  * shared list used by a workqueue.
2248  */
2249 void xe_bo_put_commit(struct llist_head *deferred)
2250 {
2251 	struct llist_node *freed;
2252 	struct xe_bo *bo, *next;
2253 
2254 	if (!deferred)
2255 		return;
2256 
2257 	freed = llist_del_all(deferred);
2258 	if (!freed)
2259 		return;
2260 
2261 	llist_for_each_entry_safe(bo, next, freed, freed)
2262 		drm_gem_object_free(&bo->ttm.base.refcount);
2263 }
2264 
2265 /**
2266  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2267  * @file_priv: ...
2268  * @dev: ...
2269  * @args: ...
2270  *
2271  * See dumb_create() hook in include/drm/drm_drv.h
2272  *
2273  * Return: ...
2274  */
2275 int xe_bo_dumb_create(struct drm_file *file_priv,
2276 		      struct drm_device *dev,
2277 		      struct drm_mode_create_dumb *args)
2278 {
2279 	struct xe_device *xe = to_xe_device(dev);
2280 	struct xe_bo *bo;
2281 	uint32_t handle;
2282 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2283 	int err;
2284 	u32 page_size = max_t(u32, PAGE_SIZE,
2285 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2286 
2287 	args->pitch = ALIGN(args->width * cpp, 64);
2288 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2289 			   page_size);
2290 
2291 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2292 			       DRM_XE_GEM_CPU_CACHING_WC,
2293 			       ttm_bo_type_device,
2294 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2295 			       XE_BO_FLAG_SCANOUT |
2296 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
2297 	if (IS_ERR(bo))
2298 		return PTR_ERR(bo);
2299 
2300 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2301 	/* drop reference from allocate - handle holds it now */
2302 	drm_gem_object_put(&bo->ttm.base);
2303 	if (!err)
2304 		args->handle = handle;
2305 	return err;
2306 }
2307 
2308 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2309 {
2310 	struct ttm_buffer_object *tbo = &bo->ttm;
2311 	struct ttm_device *bdev = tbo->bdev;
2312 
2313 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2314 
2315 	list_del_init(&bo->vram_userfault_link);
2316 }
2317 
2318 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2319 #include "tests/xe_bo.c"
2320 #endif
2321