xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 3ff78451b8e446e9a548b98a0d4dd8d24dc5780b)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <drm/xe_drm.h>
17 
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt.h"
23 #include "xe_map.h"
24 #include "xe_migrate.h"
25 #include "xe_pm.h"
26 #include "xe_preempt_fence.h"
27 #include "xe_res_cursor.h"
28 #include "xe_trace.h"
29 #include "xe_ttm_stolen_mgr.h"
30 #include "xe_vm.h"
31 
32 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
33 	[XE_PL_SYSTEM] = "system",
34 	[XE_PL_TT] = "gtt",
35 	[XE_PL_VRAM0] = "vram0",
36 	[XE_PL_VRAM1] = "vram1",
37 	[XE_PL_STOLEN] = "stolen"
38 };
39 
40 static const struct ttm_place sys_placement_flags = {
41 	.fpfn = 0,
42 	.lpfn = 0,
43 	.mem_type = XE_PL_SYSTEM,
44 	.flags = 0,
45 };
46 
47 static struct ttm_placement sys_placement = {
48 	.num_placement = 1,
49 	.placement = &sys_placement_flags,
50 };
51 
52 static const struct ttm_place tt_placement_flags[] = {
53 	{
54 		.fpfn = 0,
55 		.lpfn = 0,
56 		.mem_type = XE_PL_TT,
57 		.flags = TTM_PL_FLAG_DESIRED,
58 	},
59 	{
60 		.fpfn = 0,
61 		.lpfn = 0,
62 		.mem_type = XE_PL_SYSTEM,
63 		.flags = TTM_PL_FLAG_FALLBACK,
64 	}
65 };
66 
67 static struct ttm_placement tt_placement = {
68 	.num_placement = 2,
69 	.placement = tt_placement_flags,
70 };
71 
72 bool mem_type_is_vram(u32 mem_type)
73 {
74 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
75 }
76 
77 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
78 {
79 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
80 }
81 
82 static bool resource_is_vram(struct ttm_resource *res)
83 {
84 	return mem_type_is_vram(res->mem_type);
85 }
86 
87 bool xe_bo_is_vram(struct xe_bo *bo)
88 {
89 	return resource_is_vram(bo->ttm.resource) ||
90 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
91 }
92 
93 bool xe_bo_is_stolen(struct xe_bo *bo)
94 {
95 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
96 }
97 
98 /**
99  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
100  * @bo: The BO
101  *
102  * The stolen memory is accessed through the PCI BAR for both DGFX and some
103  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
104  *
105  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
106  */
107 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
108 {
109 	return xe_bo_is_stolen(bo) &&
110 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
111 }
112 
113 static bool xe_bo_is_user(struct xe_bo *bo)
114 {
115 	return bo->flags & XE_BO_FLAG_USER;
116 }
117 
118 static struct xe_migrate *
119 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
120 {
121 	struct xe_tile *tile;
122 
123 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
124 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
125 	return tile->migrate;
126 }
127 
128 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
129 {
130 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
131 	struct ttm_resource_manager *mgr;
132 
133 	xe_assert(xe, resource_is_vram(res));
134 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
135 	return to_xe_ttm_vram_mgr(mgr)->vram;
136 }
137 
138 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
139 			   u32 bo_flags, u32 *c)
140 {
141 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
142 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
143 
144 		bo->placements[*c] = (struct ttm_place) {
145 			.mem_type = XE_PL_TT,
146 		};
147 		*c += 1;
148 	}
149 }
150 
151 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
152 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
153 {
154 	struct ttm_place place = { .mem_type = mem_type };
155 	struct xe_mem_region *vram;
156 	u64 io_size;
157 
158 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
159 
160 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
161 	xe_assert(xe, vram && vram->usable_size);
162 	io_size = vram->io_size;
163 
164 	/*
165 	 * For eviction / restore on suspend / resume objects
166 	 * pinned in VRAM must be contiguous
167 	 */
168 	if (bo_flags & (XE_BO_FLAG_PINNED |
169 			XE_BO_FLAG_GGTT))
170 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
171 
172 	if (io_size < vram->usable_size) {
173 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
174 			place.fpfn = 0;
175 			place.lpfn = io_size >> PAGE_SHIFT;
176 		} else {
177 			place.flags |= TTM_PL_FLAG_TOPDOWN;
178 		}
179 	}
180 	places[*c] = place;
181 	*c += 1;
182 }
183 
184 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
185 			 u32 bo_flags, u32 *c)
186 {
187 	if (bo_flags & XE_BO_FLAG_VRAM0)
188 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
189 	if (bo_flags & XE_BO_FLAG_VRAM1)
190 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
191 }
192 
193 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
194 			   u32 bo_flags, u32 *c)
195 {
196 	if (bo_flags & XE_BO_FLAG_STOLEN) {
197 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
198 
199 		bo->placements[*c] = (struct ttm_place) {
200 			.mem_type = XE_PL_STOLEN,
201 			.flags = bo_flags & (XE_BO_FLAG_PINNED |
202 					     XE_BO_FLAG_GGTT) ?
203 				TTM_PL_FLAG_CONTIGUOUS : 0,
204 		};
205 		*c += 1;
206 	}
207 }
208 
209 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
210 				       u32 bo_flags)
211 {
212 	u32 c = 0;
213 
214 	try_add_vram(xe, bo, bo_flags, &c);
215 	try_add_system(xe, bo, bo_flags, &c);
216 	try_add_stolen(xe, bo, bo_flags, &c);
217 
218 	if (!c)
219 		return -EINVAL;
220 
221 	bo->placement = (struct ttm_placement) {
222 		.num_placement = c,
223 		.placement = bo->placements,
224 	};
225 
226 	return 0;
227 }
228 
229 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
230 			      u32 bo_flags)
231 {
232 	xe_bo_assert_held(bo);
233 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
234 }
235 
236 static void xe_evict_flags(struct ttm_buffer_object *tbo,
237 			   struct ttm_placement *placement)
238 {
239 	if (!xe_bo_is_xe_bo(tbo)) {
240 		/* Don't handle scatter gather BOs */
241 		if (tbo->type == ttm_bo_type_sg) {
242 			placement->num_placement = 0;
243 			return;
244 		}
245 
246 		*placement = sys_placement;
247 		return;
248 	}
249 
250 	/*
251 	 * For xe, sg bos that are evicted to system just triggers a
252 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
253 	 */
254 	switch (tbo->resource->mem_type) {
255 	case XE_PL_VRAM0:
256 	case XE_PL_VRAM1:
257 	case XE_PL_STOLEN:
258 		*placement = tt_placement;
259 		break;
260 	case XE_PL_TT:
261 	default:
262 		*placement = sys_placement;
263 		break;
264 	}
265 }
266 
267 struct xe_ttm_tt {
268 	struct ttm_tt ttm;
269 	struct device *dev;
270 	struct sg_table sgt;
271 	struct sg_table *sg;
272 };
273 
274 static int xe_tt_map_sg(struct ttm_tt *tt)
275 {
276 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
277 	unsigned long num_pages = tt->num_pages;
278 	int ret;
279 
280 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
281 
282 	if (xe_tt->sg)
283 		return 0;
284 
285 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
286 						num_pages, 0,
287 						(u64)num_pages << PAGE_SHIFT,
288 						xe_sg_segment_size(xe_tt->dev),
289 						GFP_KERNEL);
290 	if (ret)
291 		return ret;
292 
293 	xe_tt->sg = &xe_tt->sgt;
294 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
295 			      DMA_ATTR_SKIP_CPU_SYNC);
296 	if (ret) {
297 		sg_free_table(xe_tt->sg);
298 		xe_tt->sg = NULL;
299 		return ret;
300 	}
301 
302 	return 0;
303 }
304 
305 struct sg_table *xe_bo_sg(struct xe_bo *bo)
306 {
307 	struct ttm_tt *tt = bo->ttm.ttm;
308 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
309 
310 	return xe_tt->sg;
311 }
312 
313 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
314 				       u32 page_flags)
315 {
316 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
317 	struct xe_device *xe = xe_bo_device(bo);
318 	struct xe_ttm_tt *tt;
319 	unsigned long extra_pages;
320 	enum ttm_caching caching;
321 	int err;
322 
323 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
324 	if (!tt)
325 		return NULL;
326 
327 	tt->dev = xe->drm.dev;
328 
329 	extra_pages = 0;
330 	if (xe_bo_needs_ccs_pages(bo))
331 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
332 					   PAGE_SIZE);
333 
334 	switch (bo->cpu_caching) {
335 	case DRM_XE_GEM_CPU_CACHING_WC:
336 		caching = ttm_write_combined;
337 		break;
338 	default:
339 		caching = ttm_cached;
340 		break;
341 	}
342 
343 	WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
344 
345 	/*
346 	 * Display scanout is always non-coherent with the CPU cache.
347 	 *
348 	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
349 	 * require a CPU:WC mapping.
350 	 */
351 	if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
352 	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE))
353 		caching = ttm_write_combined;
354 
355 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
356 	if (err) {
357 		kfree(tt);
358 		return NULL;
359 	}
360 
361 	return &tt->ttm;
362 }
363 
364 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
365 			      struct ttm_operation_ctx *ctx)
366 {
367 	int err;
368 
369 	/*
370 	 * dma-bufs are not populated with pages, and the dma-
371 	 * addresses are set up when moved to XE_PL_TT.
372 	 */
373 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
374 		return 0;
375 
376 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
377 	if (err)
378 		return err;
379 
380 	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
381 	err = xe_tt_map_sg(tt);
382 	if (err)
383 		ttm_pool_free(&ttm_dev->pool, tt);
384 
385 	return err;
386 }
387 
388 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
389 {
390 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
391 
392 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
393 		return;
394 
395 	if (xe_tt->sg) {
396 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
397 				  DMA_BIDIRECTIONAL, 0);
398 		sg_free_table(xe_tt->sg);
399 		xe_tt->sg = NULL;
400 	}
401 
402 	return ttm_pool_free(&ttm_dev->pool, tt);
403 }
404 
405 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
406 {
407 	ttm_tt_fini(tt);
408 	kfree(tt);
409 }
410 
411 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
412 				 struct ttm_resource *mem)
413 {
414 	struct xe_device *xe = ttm_to_xe_device(bdev);
415 
416 	switch (mem->mem_type) {
417 	case XE_PL_SYSTEM:
418 	case XE_PL_TT:
419 		return 0;
420 	case XE_PL_VRAM0:
421 	case XE_PL_VRAM1: {
422 		struct xe_ttm_vram_mgr_resource *vres =
423 			to_xe_ttm_vram_mgr_resource(mem);
424 		struct xe_mem_region *vram = res_to_mem_region(mem);
425 
426 		if (vres->used_visible_size < mem->size)
427 			return -EINVAL;
428 
429 		mem->bus.offset = mem->start << PAGE_SHIFT;
430 
431 		if (vram->mapping &&
432 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
433 			mem->bus.addr = (u8 __force *)vram->mapping +
434 				mem->bus.offset;
435 
436 		mem->bus.offset += vram->io_start;
437 		mem->bus.is_iomem = true;
438 
439 #if  !defined(CONFIG_X86)
440 		mem->bus.caching = ttm_write_combined;
441 #endif
442 		return 0;
443 	} case XE_PL_STOLEN:
444 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
445 	default:
446 		return -EINVAL;
447 	}
448 }
449 
450 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
451 				const struct ttm_operation_ctx *ctx)
452 {
453 	struct dma_resv_iter cursor;
454 	struct dma_fence *fence;
455 	struct drm_gem_object *obj = &bo->ttm.base;
456 	struct drm_gpuvm_bo *vm_bo;
457 	bool idle = false;
458 	int ret = 0;
459 
460 	dma_resv_assert_held(bo->ttm.base.resv);
461 
462 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
463 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
464 				    DMA_RESV_USAGE_BOOKKEEP);
465 		dma_resv_for_each_fence_unlocked(&cursor, fence)
466 			dma_fence_enable_sw_signaling(fence);
467 		dma_resv_iter_end(&cursor);
468 	}
469 
470 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
471 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
472 		struct drm_gpuva *gpuva;
473 
474 		if (!xe_vm_in_fault_mode(vm)) {
475 			drm_gpuvm_bo_evict(vm_bo, true);
476 			continue;
477 		}
478 
479 		if (!idle) {
480 			long timeout;
481 
482 			if (ctx->no_wait_gpu &&
483 			    !dma_resv_test_signaled(bo->ttm.base.resv,
484 						    DMA_RESV_USAGE_BOOKKEEP))
485 				return -EBUSY;
486 
487 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
488 							DMA_RESV_USAGE_BOOKKEEP,
489 							ctx->interruptible,
490 							MAX_SCHEDULE_TIMEOUT);
491 			if (!timeout)
492 				return -ETIME;
493 			if (timeout < 0)
494 				return timeout;
495 
496 			idle = true;
497 		}
498 
499 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
500 			struct xe_vma *vma = gpuva_to_vma(gpuva);
501 
502 			trace_xe_vma_evict(vma);
503 			ret = xe_vm_invalidate_vma(vma);
504 			if (XE_WARN_ON(ret))
505 				return ret;
506 		}
507 	}
508 
509 	return ret;
510 }
511 
512 /*
513  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
514  * Note that unmapping the attachment is deferred to the next
515  * map_attachment time, or to bo destroy (after idling) whichever comes first.
516  * This is to avoid syncing before unmap_attachment(), assuming that the
517  * caller relies on idling the reservation object before moving the
518  * backing store out. Should that assumption not hold, then we will be able
519  * to unconditionally call unmap_attachment() when moving out to system.
520  */
521 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
522 			     struct ttm_resource *new_res)
523 {
524 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
525 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
526 					       ttm);
527 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
528 	struct sg_table *sg;
529 
530 	xe_assert(xe, attach);
531 	xe_assert(xe, ttm_bo->ttm);
532 
533 	if (new_res->mem_type == XE_PL_SYSTEM)
534 		goto out;
535 
536 	if (ttm_bo->sg) {
537 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
538 		ttm_bo->sg = NULL;
539 	}
540 
541 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
542 	if (IS_ERR(sg))
543 		return PTR_ERR(sg);
544 
545 	ttm_bo->sg = sg;
546 	xe_tt->sg = sg;
547 
548 out:
549 	ttm_bo_move_null(ttm_bo, new_res);
550 
551 	return 0;
552 }
553 
554 /**
555  * xe_bo_move_notify - Notify subsystems of a pending move
556  * @bo: The buffer object
557  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
558  *
559  * This function notifies subsystems of an upcoming buffer move.
560  * Upon receiving such a notification, subsystems should schedule
561  * halting access to the underlying pages and optionally add a fence
562  * to the buffer object's dma_resv object, that signals when access is
563  * stopped. The caller will wait on all dma_resv fences before
564  * starting the move.
565  *
566  * A subsystem may commence access to the object after obtaining
567  * bindings to the new backing memory under the object lock.
568  *
569  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
570  * negative error code on error.
571  */
572 static int xe_bo_move_notify(struct xe_bo *bo,
573 			     const struct ttm_operation_ctx *ctx)
574 {
575 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
576 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
577 	struct ttm_resource *old_mem = ttm_bo->resource;
578 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
579 	int ret;
580 
581 	/*
582 	 * If this starts to call into many components, consider
583 	 * using a notification chain here.
584 	 */
585 
586 	if (xe_bo_is_pinned(bo))
587 		return -EINVAL;
588 
589 	xe_bo_vunmap(bo);
590 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
591 	if (ret)
592 		return ret;
593 
594 	/* Don't call move_notify() for imported dma-bufs. */
595 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
596 		dma_buf_move_notify(ttm_bo->base.dma_buf);
597 
598 	/*
599 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
600 	 * so if we moved from VRAM make sure to unlink this from the userfault
601 	 * tracking.
602 	 */
603 	if (mem_type_is_vram(old_mem_type)) {
604 		mutex_lock(&xe->mem_access.vram_userfault.lock);
605 		if (!list_empty(&bo->vram_userfault_link))
606 			list_del_init(&bo->vram_userfault_link);
607 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
608 	}
609 
610 	return 0;
611 }
612 
613 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
614 		      struct ttm_operation_ctx *ctx,
615 		      struct ttm_resource *new_mem,
616 		      struct ttm_place *hop)
617 {
618 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
619 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
620 	struct ttm_resource *old_mem = ttm_bo->resource;
621 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
622 	struct ttm_tt *ttm = ttm_bo->ttm;
623 	struct xe_migrate *migrate = NULL;
624 	struct dma_fence *fence;
625 	bool move_lacks_source;
626 	bool tt_has_data;
627 	bool needs_clear;
628 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
629 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
630 	int ret = 0;
631 	/* Bo creation path, moving to system or TT. */
632 	if ((!old_mem && ttm) && !handle_system_ccs) {
633 		ttm_bo_move_null(ttm_bo, new_mem);
634 		return 0;
635 	}
636 
637 	if (ttm_bo->type == ttm_bo_type_sg) {
638 		ret = xe_bo_move_notify(bo, ctx);
639 		if (!ret)
640 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
641 		goto out;
642 	}
643 
644 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
645 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
646 
647 	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
648 						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
649 
650 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
651 		(!ttm && ttm_bo->type == ttm_bo_type_device);
652 
653 	if ((move_lacks_source && !needs_clear)) {
654 		ttm_bo_move_null(ttm_bo, new_mem);
655 		goto out;
656 	}
657 
658 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
659 		ttm_bo_move_null(ttm_bo, new_mem);
660 		goto out;
661 	}
662 
663 	/*
664 	 * Failed multi-hop where the old_mem is still marked as
665 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
666 	 */
667 	if (old_mem_type == XE_PL_TT &&
668 	    new_mem->mem_type == XE_PL_TT) {
669 		ttm_bo_move_null(ttm_bo, new_mem);
670 		goto out;
671 	}
672 
673 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
674 		ret = xe_bo_move_notify(bo, ctx);
675 		if (ret)
676 			goto out;
677 	}
678 
679 	if (old_mem_type == XE_PL_TT &&
680 	    new_mem->mem_type == XE_PL_SYSTEM) {
681 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
682 						     DMA_RESV_USAGE_BOOKKEEP,
683 						     true,
684 						     MAX_SCHEDULE_TIMEOUT);
685 		if (timeout < 0) {
686 			ret = timeout;
687 			goto out;
688 		}
689 
690 		if (!handle_system_ccs) {
691 			ttm_bo_move_null(ttm_bo, new_mem);
692 			goto out;
693 		}
694 	}
695 
696 	if (!move_lacks_source &&
697 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
698 	     (mem_type_is_vram(old_mem_type) &&
699 	      new_mem->mem_type == XE_PL_SYSTEM))) {
700 		hop->fpfn = 0;
701 		hop->lpfn = 0;
702 		hop->mem_type = XE_PL_TT;
703 		hop->flags = TTM_PL_FLAG_TEMPORARY;
704 		ret = -EMULTIHOP;
705 		goto out;
706 	}
707 
708 	if (bo->tile)
709 		migrate = bo->tile->migrate;
710 	else if (resource_is_vram(new_mem))
711 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
712 	else if (mem_type_is_vram(old_mem_type))
713 		migrate = mem_type_to_migrate(xe, old_mem_type);
714 	else
715 		migrate = xe->tiles[0].migrate;
716 
717 	xe_assert(xe, migrate);
718 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
719 	xe_pm_runtime_get_noresume(xe);
720 
721 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
722 		/*
723 		 * Kernel memory that is pinned should only be moved on suspend
724 		 * / resume, some of the pinned memory is required for the
725 		 * device to resume / use the GPU to move other evicted memory
726 		 * (user memory) around. This likely could be optimized a bit
727 		 * futher where we find the minimum set of pinned memory
728 		 * required for resume but for simplity doing a memcpy for all
729 		 * pinned memory.
730 		 */
731 		ret = xe_bo_vmap(bo);
732 		if (!ret) {
733 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
734 
735 			/* Create a new VMAP once kernel BO back in VRAM */
736 			if (!ret && resource_is_vram(new_mem)) {
737 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
738 				void __iomem *new_addr = vram->mapping +
739 					(new_mem->start << PAGE_SHIFT);
740 
741 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
742 					ret = -EINVAL;
743 					xe_pm_runtime_put(xe);
744 					goto out;
745 				}
746 
747 				xe_assert(xe, new_mem->start ==
748 					  bo->placements->fpfn);
749 
750 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
751 			}
752 		}
753 	} else {
754 		if (move_lacks_source)
755 			fence = xe_migrate_clear(migrate, bo, new_mem);
756 		else
757 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
758 						new_mem, handle_system_ccs);
759 		if (IS_ERR(fence)) {
760 			ret = PTR_ERR(fence);
761 			xe_pm_runtime_put(xe);
762 			goto out;
763 		}
764 		if (!move_lacks_source) {
765 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
766 							true, new_mem);
767 			if (ret) {
768 				dma_fence_wait(fence, false);
769 				ttm_bo_move_null(ttm_bo, new_mem);
770 				ret = 0;
771 			}
772 		} else {
773 			/*
774 			 * ttm_bo_move_accel_cleanup() may blow up if
775 			 * bo->resource == NULL, so just attach the
776 			 * fence and set the new resource.
777 			 */
778 			dma_resv_add_fence(ttm_bo->base.resv, fence,
779 					   DMA_RESV_USAGE_KERNEL);
780 			ttm_bo_move_null(ttm_bo, new_mem);
781 		}
782 
783 		dma_fence_put(fence);
784 	}
785 
786 	xe_pm_runtime_put(xe);
787 
788 out:
789 	return ret;
790 
791 }
792 
793 /**
794  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
795  * @bo: The buffer object to move.
796  *
797  * On successful completion, the object memory will be moved to sytem memory.
798  *
799  * This is needed to for special handling of pinned VRAM object during
800  * suspend-resume.
801  *
802  * Return: 0 on success. Negative error code on failure.
803  */
804 int xe_bo_evict_pinned(struct xe_bo *bo)
805 {
806 	struct ttm_place place = {
807 		.mem_type = XE_PL_TT,
808 	};
809 	struct ttm_placement placement = {
810 		.placement = &place,
811 		.num_placement = 1,
812 	};
813 	struct ttm_operation_ctx ctx = {
814 		.interruptible = false,
815 	};
816 	struct ttm_resource *new_mem;
817 	int ret;
818 
819 	xe_bo_assert_held(bo);
820 
821 	if (WARN_ON(!bo->ttm.resource))
822 		return -EINVAL;
823 
824 	if (WARN_ON(!xe_bo_is_pinned(bo)))
825 		return -EINVAL;
826 
827 	if (WARN_ON(!xe_bo_is_vram(bo)))
828 		return -EINVAL;
829 
830 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
831 	if (ret)
832 		return ret;
833 
834 	if (!bo->ttm.ttm) {
835 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
836 		if (!bo->ttm.ttm) {
837 			ret = -ENOMEM;
838 			goto err_res_free;
839 		}
840 	}
841 
842 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
843 	if (ret)
844 		goto err_res_free;
845 
846 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
847 	if (ret)
848 		goto err_res_free;
849 
850 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
851 	if (ret)
852 		goto err_res_free;
853 
854 	return 0;
855 
856 err_res_free:
857 	ttm_resource_free(&bo->ttm, &new_mem);
858 	return ret;
859 }
860 
861 /**
862  * xe_bo_restore_pinned() - Restore a pinned VRAM object
863  * @bo: The buffer object to move.
864  *
865  * On successful completion, the object memory will be moved back to VRAM.
866  *
867  * This is needed to for special handling of pinned VRAM object during
868  * suspend-resume.
869  *
870  * Return: 0 on success. Negative error code on failure.
871  */
872 int xe_bo_restore_pinned(struct xe_bo *bo)
873 {
874 	struct ttm_operation_ctx ctx = {
875 		.interruptible = false,
876 	};
877 	struct ttm_resource *new_mem;
878 	int ret;
879 
880 	xe_bo_assert_held(bo);
881 
882 	if (WARN_ON(!bo->ttm.resource))
883 		return -EINVAL;
884 
885 	if (WARN_ON(!xe_bo_is_pinned(bo)))
886 		return -EINVAL;
887 
888 	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
889 		return -EINVAL;
890 
891 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
892 	if (ret)
893 		return ret;
894 
895 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
896 	if (ret)
897 		goto err_res_free;
898 
899 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
900 	if (ret)
901 		goto err_res_free;
902 
903 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
904 	if (ret)
905 		goto err_res_free;
906 
907 	return 0;
908 
909 err_res_free:
910 	ttm_resource_free(&bo->ttm, &new_mem);
911 	return ret;
912 }
913 
914 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
915 				       unsigned long page_offset)
916 {
917 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
918 	struct xe_res_cursor cursor;
919 	struct xe_mem_region *vram;
920 
921 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
922 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
923 
924 	vram = res_to_mem_region(ttm_bo->resource);
925 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
926 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
927 }
928 
929 static void __xe_bo_vunmap(struct xe_bo *bo);
930 
931 /*
932  * TODO: Move this function to TTM so we don't rely on how TTM does its
933  * locking, thereby abusing TTM internals.
934  */
935 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
936 {
937 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
938 	bool locked;
939 
940 	xe_assert(xe, !kref_read(&ttm_bo->kref));
941 
942 	/*
943 	 * We can typically only race with TTM trylocking under the
944 	 * lru_lock, which will immediately be unlocked again since
945 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
946 	 * always succeed here, as long as we hold the lru lock.
947 	 */
948 	spin_lock(&ttm_bo->bdev->lru_lock);
949 	locked = dma_resv_trylock(ttm_bo->base.resv);
950 	spin_unlock(&ttm_bo->bdev->lru_lock);
951 	xe_assert(xe, locked);
952 
953 	return locked;
954 }
955 
956 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
957 {
958 	struct dma_resv_iter cursor;
959 	struct dma_fence *fence;
960 	struct dma_fence *replacement = NULL;
961 	struct xe_bo *bo;
962 
963 	if (!xe_bo_is_xe_bo(ttm_bo))
964 		return;
965 
966 	bo = ttm_to_xe_bo(ttm_bo);
967 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
968 
969 	/*
970 	 * Corner case where TTM fails to allocate memory and this BOs resv
971 	 * still points the VMs resv
972 	 */
973 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
974 		return;
975 
976 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
977 		return;
978 
979 	/*
980 	 * Scrub the preempt fences if any. The unbind fence is already
981 	 * attached to the resv.
982 	 * TODO: Don't do this for external bos once we scrub them after
983 	 * unbind.
984 	 */
985 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
986 				DMA_RESV_USAGE_BOOKKEEP, fence) {
987 		if (xe_fence_is_xe_preempt(fence) &&
988 		    !dma_fence_is_signaled(fence)) {
989 			if (!replacement)
990 				replacement = dma_fence_get_stub();
991 
992 			dma_resv_replace_fences(ttm_bo->base.resv,
993 						fence->context,
994 						replacement,
995 						DMA_RESV_USAGE_BOOKKEEP);
996 		}
997 	}
998 	dma_fence_put(replacement);
999 
1000 	dma_resv_unlock(ttm_bo->base.resv);
1001 }
1002 
1003 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1004 {
1005 	if (!xe_bo_is_xe_bo(ttm_bo))
1006 		return;
1007 
1008 	/*
1009 	 * Object is idle and about to be destroyed. Release the
1010 	 * dma-buf attachment.
1011 	 */
1012 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1013 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1014 						       struct xe_ttm_tt, ttm);
1015 
1016 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1017 					 DMA_BIDIRECTIONAL);
1018 		ttm_bo->sg = NULL;
1019 		xe_tt->sg = NULL;
1020 	}
1021 }
1022 
1023 const struct ttm_device_funcs xe_ttm_funcs = {
1024 	.ttm_tt_create = xe_ttm_tt_create,
1025 	.ttm_tt_populate = xe_ttm_tt_populate,
1026 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1027 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1028 	.evict_flags = xe_evict_flags,
1029 	.move = xe_bo_move,
1030 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1031 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1032 	.release_notify = xe_ttm_bo_release_notify,
1033 	.eviction_valuable = ttm_bo_eviction_valuable,
1034 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1035 };
1036 
1037 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1038 {
1039 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1040 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1041 
1042 	if (bo->ttm.base.import_attach)
1043 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1044 	drm_gem_object_release(&bo->ttm.base);
1045 
1046 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1047 
1048 	if (bo->ggtt_node.size)
1049 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1050 
1051 #ifdef CONFIG_PROC_FS
1052 	if (bo->client)
1053 		xe_drm_client_remove_bo(bo);
1054 #endif
1055 
1056 	if (bo->vm && xe_bo_is_user(bo))
1057 		xe_vm_put(bo->vm);
1058 
1059 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1060 	if (!list_empty(&bo->vram_userfault_link))
1061 		list_del(&bo->vram_userfault_link);
1062 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1063 
1064 	kfree(bo);
1065 }
1066 
1067 static void xe_gem_object_free(struct drm_gem_object *obj)
1068 {
1069 	/* Our BO reference counting scheme works as follows:
1070 	 *
1071 	 * The gem object kref is typically used throughout the driver,
1072 	 * and the gem object holds a ttm_buffer_object refcount, so
1073 	 * that when the last gem object reference is put, which is when
1074 	 * we end up in this function, we put also that ttm_buffer_object
1075 	 * refcount. Anything using gem interfaces is then no longer
1076 	 * allowed to access the object in a way that requires a gem
1077 	 * refcount, including locking the object.
1078 	 *
1079 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1080 	 * refcount directly if needed.
1081 	 */
1082 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1083 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1084 }
1085 
1086 static void xe_gem_object_close(struct drm_gem_object *obj,
1087 				struct drm_file *file_priv)
1088 {
1089 	struct xe_bo *bo = gem_to_xe_bo(obj);
1090 
1091 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1092 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1093 
1094 		xe_bo_lock(bo, false);
1095 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1096 		xe_bo_unlock(bo);
1097 	}
1098 }
1099 
1100 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1101 {
1102 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1103 	struct drm_device *ddev = tbo->base.dev;
1104 	struct xe_device *xe = to_xe_device(ddev);
1105 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1106 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1107 	vm_fault_t ret;
1108 	int idx;
1109 
1110 	if (needs_rpm)
1111 		xe_pm_runtime_get(xe);
1112 
1113 	ret = ttm_bo_vm_reserve(tbo, vmf);
1114 	if (ret)
1115 		goto out;
1116 
1117 	if (drm_dev_enter(ddev, &idx)) {
1118 		trace_xe_bo_cpu_fault(bo);
1119 
1120 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1121 					       TTM_BO_VM_NUM_PREFAULT);
1122 		drm_dev_exit(idx);
1123 	} else {
1124 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1125 	}
1126 
1127 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1128 		goto out;
1129 	/*
1130 	 * ttm_bo_vm_reserve() already has dma_resv_lock.
1131 	 */
1132 	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1133 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1134 		if (list_empty(&bo->vram_userfault_link))
1135 			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1136 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1137 	}
1138 
1139 	dma_resv_unlock(tbo->base.resv);
1140 out:
1141 	if (needs_rpm)
1142 		xe_pm_runtime_put(xe);
1143 
1144 	return ret;
1145 }
1146 
1147 static const struct vm_operations_struct xe_gem_vm_ops = {
1148 	.fault = xe_gem_fault,
1149 	.open = ttm_bo_vm_open,
1150 	.close = ttm_bo_vm_close,
1151 	.access = ttm_bo_vm_access
1152 };
1153 
1154 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1155 	.free = xe_gem_object_free,
1156 	.close = xe_gem_object_close,
1157 	.mmap = drm_gem_ttm_mmap,
1158 	.export = xe_gem_prime_export,
1159 	.vm_ops = &xe_gem_vm_ops,
1160 };
1161 
1162 /**
1163  * xe_bo_alloc - Allocate storage for a struct xe_bo
1164  *
1165  * This funcition is intended to allocate storage to be used for input
1166  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1167  * created is needed before the call to __xe_bo_create_locked().
1168  * If __xe_bo_create_locked ends up never to be called, then the
1169  * storage allocated with this function needs to be freed using
1170  * xe_bo_free().
1171  *
1172  * Return: A pointer to an uninitialized struct xe_bo on success,
1173  * ERR_PTR(-ENOMEM) on error.
1174  */
1175 struct xe_bo *xe_bo_alloc(void)
1176 {
1177 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1178 
1179 	if (!bo)
1180 		return ERR_PTR(-ENOMEM);
1181 
1182 	return bo;
1183 }
1184 
1185 /**
1186  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1187  * @bo: The buffer object storage.
1188  *
1189  * Refer to xe_bo_alloc() documentation for valid use-cases.
1190  */
1191 void xe_bo_free(struct xe_bo *bo)
1192 {
1193 	kfree(bo);
1194 }
1195 
1196 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1197 				     struct xe_tile *tile, struct dma_resv *resv,
1198 				     struct ttm_lru_bulk_move *bulk, size_t size,
1199 				     u16 cpu_caching, enum ttm_bo_type type,
1200 				     u32 flags)
1201 {
1202 	struct ttm_operation_ctx ctx = {
1203 		.interruptible = true,
1204 		.no_wait_gpu = false,
1205 	};
1206 	struct ttm_placement *placement;
1207 	uint32_t alignment;
1208 	size_t aligned_size;
1209 	int err;
1210 
1211 	/* Only kernel objects should set GT */
1212 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1213 
1214 	if (XE_WARN_ON(!size)) {
1215 		xe_bo_free(bo);
1216 		return ERR_PTR(-EINVAL);
1217 	}
1218 
1219 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1220 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1221 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1222 	     (flags & XE_BO_NEEDS_64K))) {
1223 		aligned_size = ALIGN(size, SZ_64K);
1224 		if (type != ttm_bo_type_device)
1225 			size = ALIGN(size, SZ_64K);
1226 		flags |= XE_BO_FLAG_INTERNAL_64K;
1227 		alignment = SZ_64K >> PAGE_SHIFT;
1228 
1229 	} else {
1230 		aligned_size = ALIGN(size, SZ_4K);
1231 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
1232 		alignment = SZ_4K >> PAGE_SHIFT;
1233 	}
1234 
1235 	if (type == ttm_bo_type_device && aligned_size != size)
1236 		return ERR_PTR(-EINVAL);
1237 
1238 	if (!bo) {
1239 		bo = xe_bo_alloc();
1240 		if (IS_ERR(bo))
1241 			return bo;
1242 	}
1243 
1244 	bo->ccs_cleared = false;
1245 	bo->tile = tile;
1246 	bo->size = size;
1247 	bo->flags = flags;
1248 	bo->cpu_caching = cpu_caching;
1249 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1250 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1251 	INIT_LIST_HEAD(&bo->pinned_link);
1252 #ifdef CONFIG_PROC_FS
1253 	INIT_LIST_HEAD(&bo->client_link);
1254 #endif
1255 	INIT_LIST_HEAD(&bo->vram_userfault_link);
1256 
1257 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1258 
1259 	if (resv) {
1260 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1261 		ctx.resv = resv;
1262 	}
1263 
1264 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1265 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1266 		if (WARN_ON(err)) {
1267 			xe_ttm_bo_destroy(&bo->ttm);
1268 			return ERR_PTR(err);
1269 		}
1270 	}
1271 
1272 	/* Defer populating type_sg bos */
1273 	placement = (type == ttm_bo_type_sg ||
1274 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1275 		&bo->placement;
1276 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1277 				   placement, alignment,
1278 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1279 	if (err)
1280 		return ERR_PTR(err);
1281 
1282 	/*
1283 	 * The VRAM pages underneath are potentially still being accessed by the
1284 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1285 	 * sure to add any corresponding move/clear fences into the objects
1286 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1287 	 *
1288 	 * For KMD internal buffers we don't care about GPU clearing, however we
1289 	 * still need to handle async evictions, where the VRAM is still being
1290 	 * accessed by the GPU. Most internal callers are not expecting this,
1291 	 * since they are missing the required synchronisation before accessing
1292 	 * the memory. To keep things simple just sync wait any kernel fences
1293 	 * here, if the buffer is designated KMD internal.
1294 	 *
1295 	 * For normal userspace objects we should already have the required
1296 	 * pipelining or sync waiting elsewhere, since we already have to deal
1297 	 * with things like async GPU clearing.
1298 	 */
1299 	if (type == ttm_bo_type_kernel) {
1300 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1301 						     DMA_RESV_USAGE_KERNEL,
1302 						     ctx.interruptible,
1303 						     MAX_SCHEDULE_TIMEOUT);
1304 
1305 		if (timeout < 0) {
1306 			if (!resv)
1307 				dma_resv_unlock(bo->ttm.base.resv);
1308 			xe_bo_put(bo);
1309 			return ERR_PTR(timeout);
1310 		}
1311 	}
1312 
1313 	bo->created = true;
1314 	if (bulk)
1315 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1316 	else
1317 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1318 
1319 	return bo;
1320 }
1321 
1322 static int __xe_bo_fixed_placement(struct xe_device *xe,
1323 				   struct xe_bo *bo,
1324 				   u32 flags,
1325 				   u64 start, u64 end, u64 size)
1326 {
1327 	struct ttm_place *place = bo->placements;
1328 
1329 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1330 		return -EINVAL;
1331 
1332 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1333 	place->fpfn = start >> PAGE_SHIFT;
1334 	place->lpfn = end >> PAGE_SHIFT;
1335 
1336 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1337 	case XE_BO_FLAG_VRAM0:
1338 		place->mem_type = XE_PL_VRAM0;
1339 		break;
1340 	case XE_BO_FLAG_VRAM1:
1341 		place->mem_type = XE_PL_VRAM1;
1342 		break;
1343 	case XE_BO_FLAG_STOLEN:
1344 		place->mem_type = XE_PL_STOLEN;
1345 		break;
1346 
1347 	default:
1348 		/* 0 or multiple of the above set */
1349 		return -EINVAL;
1350 	}
1351 
1352 	bo->placement = (struct ttm_placement) {
1353 		.num_placement = 1,
1354 		.placement = place,
1355 	};
1356 
1357 	return 0;
1358 }
1359 
1360 static struct xe_bo *
1361 __xe_bo_create_locked(struct xe_device *xe,
1362 		      struct xe_tile *tile, struct xe_vm *vm,
1363 		      size_t size, u64 start, u64 end,
1364 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1365 {
1366 	struct xe_bo *bo = NULL;
1367 	int err;
1368 
1369 	if (vm)
1370 		xe_vm_assert_held(vm);
1371 
1372 	if (start || end != ~0ULL) {
1373 		bo = xe_bo_alloc();
1374 		if (IS_ERR(bo))
1375 			return bo;
1376 
1377 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1378 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1379 		if (err) {
1380 			xe_bo_free(bo);
1381 			return ERR_PTR(err);
1382 		}
1383 	}
1384 
1385 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1386 				    vm && !xe_vm_in_fault_mode(vm) &&
1387 				    flags & XE_BO_FLAG_USER ?
1388 				    &vm->lru_bulk_move : NULL, size,
1389 				    cpu_caching, type, flags);
1390 	if (IS_ERR(bo))
1391 		return bo;
1392 
1393 	/*
1394 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1395 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1396 	 * will keep a reference to the vm, and avoid circular references
1397 	 * by having all the vm's bo refereferences released at vm close
1398 	 * time.
1399 	 */
1400 	if (vm && xe_bo_is_user(bo))
1401 		xe_vm_get(vm);
1402 	bo->vm = vm;
1403 
1404 	if (bo->flags & XE_BO_FLAG_GGTT) {
1405 		if (!tile && flags & XE_BO_FLAG_STOLEN)
1406 			tile = xe_device_get_root_tile(xe);
1407 
1408 		xe_assert(xe, tile);
1409 
1410 		if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1411 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1412 						   start + bo->size, U64_MAX);
1413 		} else {
1414 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1415 		}
1416 		if (err)
1417 			goto err_unlock_put_bo;
1418 	}
1419 
1420 	return bo;
1421 
1422 err_unlock_put_bo:
1423 	__xe_bo_unset_bulk_move(bo);
1424 	xe_bo_unlock_vm_held(bo);
1425 	xe_bo_put(bo);
1426 	return ERR_PTR(err);
1427 }
1428 
1429 struct xe_bo *
1430 xe_bo_create_locked_range(struct xe_device *xe,
1431 			  struct xe_tile *tile, struct xe_vm *vm,
1432 			  size_t size, u64 start, u64 end,
1433 			  enum ttm_bo_type type, u32 flags)
1434 {
1435 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1436 }
1437 
1438 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1439 				  struct xe_vm *vm, size_t size,
1440 				  enum ttm_bo_type type, u32 flags)
1441 {
1442 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1443 }
1444 
1445 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1446 				struct xe_vm *vm, size_t size,
1447 				u16 cpu_caching,
1448 				enum ttm_bo_type type,
1449 				u32 flags)
1450 {
1451 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1452 						 cpu_caching, type,
1453 						 flags | XE_BO_FLAG_USER);
1454 	if (!IS_ERR(bo))
1455 		xe_bo_unlock_vm_held(bo);
1456 
1457 	return bo;
1458 }
1459 
1460 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1461 			   struct xe_vm *vm, size_t size,
1462 			   enum ttm_bo_type type, u32 flags)
1463 {
1464 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1465 
1466 	if (!IS_ERR(bo))
1467 		xe_bo_unlock_vm_held(bo);
1468 
1469 	return bo;
1470 }
1471 
1472 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1473 				      struct xe_vm *vm,
1474 				      size_t size, u64 offset,
1475 				      enum ttm_bo_type type, u32 flags)
1476 {
1477 	struct xe_bo *bo;
1478 	int err;
1479 	u64 start = offset == ~0ull ? 0 : offset;
1480 	u64 end = offset == ~0ull ? offset : start + size;
1481 
1482 	if (flags & XE_BO_FLAG_STOLEN &&
1483 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1484 		flags |= XE_BO_FLAG_GGTT;
1485 
1486 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1487 				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
1488 	if (IS_ERR(bo))
1489 		return bo;
1490 
1491 	err = xe_bo_pin(bo);
1492 	if (err)
1493 		goto err_put;
1494 
1495 	err = xe_bo_vmap(bo);
1496 	if (err)
1497 		goto err_unpin;
1498 
1499 	xe_bo_unlock_vm_held(bo);
1500 
1501 	return bo;
1502 
1503 err_unpin:
1504 	xe_bo_unpin(bo);
1505 err_put:
1506 	xe_bo_unlock_vm_held(bo);
1507 	xe_bo_put(bo);
1508 	return ERR_PTR(err);
1509 }
1510 
1511 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1512 				   struct xe_vm *vm, size_t size,
1513 				   enum ttm_bo_type type, u32 flags)
1514 {
1515 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1516 }
1517 
1518 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1519 				     const void *data, size_t size,
1520 				     enum ttm_bo_type type, u32 flags)
1521 {
1522 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1523 						ALIGN(size, PAGE_SIZE),
1524 						type, flags);
1525 	if (IS_ERR(bo))
1526 		return bo;
1527 
1528 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1529 
1530 	return bo;
1531 }
1532 
1533 static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1534 {
1535 	xe_bo_unpin_map_no_vm(arg);
1536 }
1537 
1538 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1539 					   size_t size, u32 flags)
1540 {
1541 	struct xe_bo *bo;
1542 	int ret;
1543 
1544 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1545 	if (IS_ERR(bo))
1546 		return bo;
1547 
1548 	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1549 	if (ret)
1550 		return ERR_PTR(ret);
1551 
1552 	return bo;
1553 }
1554 
1555 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1556 					     const void *data, size_t size, u32 flags)
1557 {
1558 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1559 
1560 	if (IS_ERR(bo))
1561 		return bo;
1562 
1563 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1564 
1565 	return bo;
1566 }
1567 
1568 /**
1569  * xe_managed_bo_reinit_in_vram
1570  * @xe: xe device
1571  * @tile: Tile where the new buffer will be created
1572  * @src: Managed buffer object allocated in system memory
1573  *
1574  * Replace a managed src buffer object allocated in system memory with a new
1575  * one allocated in vram, copying the data between them.
1576  * Buffer object in VRAM is not going to have the same GGTT address, the caller
1577  * is responsible for making sure that any old references to it are updated.
1578  *
1579  * Returns 0 for success, negative error code otherwise.
1580  */
1581 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1582 {
1583 	struct xe_bo *bo;
1584 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1585 
1586 	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1587 
1588 	xe_assert(xe, IS_DGFX(xe));
1589 	xe_assert(xe, !(*src)->vmap.is_iomem);
1590 
1591 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1592 					    (*src)->size, dst_flags);
1593 	if (IS_ERR(bo))
1594 		return PTR_ERR(bo);
1595 
1596 	drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
1597 	*src = bo;
1598 
1599 	return 0;
1600 }
1601 
1602 /*
1603  * XXX: This is in the VM bind data path, likely should calculate this once and
1604  * store, with a recalculation if the BO is moved.
1605  */
1606 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1607 {
1608 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1609 
1610 	if (res->mem_type == XE_PL_STOLEN)
1611 		return xe_ttm_stolen_gpu_offset(xe);
1612 
1613 	return res_to_mem_region(res)->dpa_base;
1614 }
1615 
1616 /**
1617  * xe_bo_pin_external - pin an external BO
1618  * @bo: buffer object to be pinned
1619  *
1620  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1621  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1622  * asserts and code to ensure evict / restore on suspend / resume.
1623  *
1624  * Returns 0 for success, negative error code otherwise.
1625  */
1626 int xe_bo_pin_external(struct xe_bo *bo)
1627 {
1628 	struct xe_device *xe = xe_bo_device(bo);
1629 	int err;
1630 
1631 	xe_assert(xe, !bo->vm);
1632 	xe_assert(xe, xe_bo_is_user(bo));
1633 
1634 	if (!xe_bo_is_pinned(bo)) {
1635 		err = xe_bo_validate(bo, NULL, false);
1636 		if (err)
1637 			return err;
1638 
1639 		if (xe_bo_is_vram(bo)) {
1640 			spin_lock(&xe->pinned.lock);
1641 			list_add_tail(&bo->pinned_link,
1642 				      &xe->pinned.external_vram);
1643 			spin_unlock(&xe->pinned.lock);
1644 		}
1645 	}
1646 
1647 	ttm_bo_pin(&bo->ttm);
1648 
1649 	/*
1650 	 * FIXME: If we always use the reserve / unreserve functions for locking
1651 	 * we do not need this.
1652 	 */
1653 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1654 
1655 	return 0;
1656 }
1657 
1658 int xe_bo_pin(struct xe_bo *bo)
1659 {
1660 	struct xe_device *xe = xe_bo_device(bo);
1661 	int err;
1662 
1663 	/* We currently don't expect user BO to be pinned */
1664 	xe_assert(xe, !xe_bo_is_user(bo));
1665 
1666 	/* Pinned object must be in GGTT or have pinned flag */
1667 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
1668 				   XE_BO_FLAG_GGTT));
1669 
1670 	/*
1671 	 * No reason we can't support pinning imported dma-bufs we just don't
1672 	 * expect to pin an imported dma-buf.
1673 	 */
1674 	xe_assert(xe, !bo->ttm.base.import_attach);
1675 
1676 	/* We only expect at most 1 pin */
1677 	xe_assert(xe, !xe_bo_is_pinned(bo));
1678 
1679 	err = xe_bo_validate(bo, NULL, false);
1680 	if (err)
1681 		return err;
1682 
1683 	/*
1684 	 * For pinned objects in on DGFX, which are also in vram, we expect
1685 	 * these to be in contiguous VRAM memory. Required eviction / restore
1686 	 * during suspend / resume (force restore to same physical address).
1687 	 */
1688 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1689 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1690 		struct ttm_place *place = &(bo->placements[0]);
1691 
1692 		if (mem_type_is_vram(place->mem_type)) {
1693 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1694 
1695 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1696 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1697 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1698 
1699 			spin_lock(&xe->pinned.lock);
1700 			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1701 			spin_unlock(&xe->pinned.lock);
1702 		}
1703 	}
1704 
1705 	ttm_bo_pin(&bo->ttm);
1706 
1707 	/*
1708 	 * FIXME: If we always use the reserve / unreserve functions for locking
1709 	 * we do not need this.
1710 	 */
1711 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1712 
1713 	return 0;
1714 }
1715 
1716 /**
1717  * xe_bo_unpin_external - unpin an external BO
1718  * @bo: buffer object to be unpinned
1719  *
1720  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1721  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1722  * asserts and code to ensure evict / restore on suspend / resume.
1723  *
1724  * Returns 0 for success, negative error code otherwise.
1725  */
1726 void xe_bo_unpin_external(struct xe_bo *bo)
1727 {
1728 	struct xe_device *xe = xe_bo_device(bo);
1729 
1730 	xe_assert(xe, !bo->vm);
1731 	xe_assert(xe, xe_bo_is_pinned(bo));
1732 	xe_assert(xe, xe_bo_is_user(bo));
1733 
1734 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1735 		spin_lock(&xe->pinned.lock);
1736 		list_del_init(&bo->pinned_link);
1737 		spin_unlock(&xe->pinned.lock);
1738 	}
1739 
1740 	ttm_bo_unpin(&bo->ttm);
1741 
1742 	/*
1743 	 * FIXME: If we always use the reserve / unreserve functions for locking
1744 	 * we do not need this.
1745 	 */
1746 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1747 }
1748 
1749 void xe_bo_unpin(struct xe_bo *bo)
1750 {
1751 	struct xe_device *xe = xe_bo_device(bo);
1752 
1753 	xe_assert(xe, !bo->ttm.base.import_attach);
1754 	xe_assert(xe, xe_bo_is_pinned(bo));
1755 
1756 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1757 	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1758 		struct ttm_place *place = &(bo->placements[0]);
1759 
1760 		if (mem_type_is_vram(place->mem_type)) {
1761 			xe_assert(xe, !list_empty(&bo->pinned_link));
1762 
1763 			spin_lock(&xe->pinned.lock);
1764 			list_del_init(&bo->pinned_link);
1765 			spin_unlock(&xe->pinned.lock);
1766 		}
1767 	}
1768 
1769 	ttm_bo_unpin(&bo->ttm);
1770 }
1771 
1772 /**
1773  * xe_bo_validate() - Make sure the bo is in an allowed placement
1774  * @bo: The bo,
1775  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1776  *      NULL. Used together with @allow_res_evict.
1777  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1778  *                   reservation object.
1779  *
1780  * Make sure the bo is in allowed placement, migrating it if necessary. If
1781  * needed, other bos will be evicted. If bos selected for eviction shares
1782  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1783  * set to true, otherwise they will be bypassed.
1784  *
1785  * Return: 0 on success, negative error code on failure. May return
1786  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1787  */
1788 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1789 {
1790 	struct ttm_operation_ctx ctx = {
1791 		.interruptible = true,
1792 		.no_wait_gpu = false,
1793 	};
1794 
1795 	if (vm) {
1796 		lockdep_assert_held(&vm->lock);
1797 		xe_vm_assert_held(vm);
1798 
1799 		ctx.allow_res_evict = allow_res_evict;
1800 		ctx.resv = xe_vm_resv(vm);
1801 	}
1802 
1803 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1804 }
1805 
1806 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1807 {
1808 	if (bo->destroy == &xe_ttm_bo_destroy)
1809 		return true;
1810 
1811 	return false;
1812 }
1813 
1814 /*
1815  * Resolve a BO address. There is no assert to check if the proper lock is held
1816  * so it should only be used in cases where it is not fatal to get the wrong
1817  * address, such as printing debug information, but not in cases where memory is
1818  * written based on this result.
1819  */
1820 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1821 {
1822 	struct xe_device *xe = xe_bo_device(bo);
1823 	struct xe_res_cursor cur;
1824 	u64 page;
1825 
1826 	xe_assert(xe, page_size <= PAGE_SIZE);
1827 	page = offset >> PAGE_SHIFT;
1828 	offset &= (PAGE_SIZE - 1);
1829 
1830 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1831 		xe_assert(xe, bo->ttm.ttm);
1832 
1833 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1834 				page_size, &cur);
1835 		return xe_res_dma(&cur) + offset;
1836 	} else {
1837 		struct xe_res_cursor cur;
1838 
1839 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1840 			     page_size, &cur);
1841 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1842 	}
1843 }
1844 
1845 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1846 {
1847 	if (!READ_ONCE(bo->ttm.pin_count))
1848 		xe_bo_assert_held(bo);
1849 	return __xe_bo_addr(bo, offset, page_size);
1850 }
1851 
1852 int xe_bo_vmap(struct xe_bo *bo)
1853 {
1854 	void *virtual;
1855 	bool is_iomem;
1856 	int ret;
1857 
1858 	xe_bo_assert_held(bo);
1859 
1860 	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
1861 		return -EINVAL;
1862 
1863 	if (!iosys_map_is_null(&bo->vmap))
1864 		return 0;
1865 
1866 	/*
1867 	 * We use this more or less deprecated interface for now since
1868 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1869 	 * single page bos, which is done here.
1870 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1871 	 * to use struct iosys_map.
1872 	 */
1873 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1874 	if (ret)
1875 		return ret;
1876 
1877 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1878 	if (is_iomem)
1879 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1880 	else
1881 		iosys_map_set_vaddr(&bo->vmap, virtual);
1882 
1883 	return 0;
1884 }
1885 
1886 static void __xe_bo_vunmap(struct xe_bo *bo)
1887 {
1888 	if (!iosys_map_is_null(&bo->vmap)) {
1889 		iosys_map_clear(&bo->vmap);
1890 		ttm_bo_kunmap(&bo->kmap);
1891 	}
1892 }
1893 
1894 void xe_bo_vunmap(struct xe_bo *bo)
1895 {
1896 	xe_bo_assert_held(bo);
1897 	__xe_bo_vunmap(bo);
1898 }
1899 
1900 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1901 			struct drm_file *file)
1902 {
1903 	struct xe_device *xe = to_xe_device(dev);
1904 	struct xe_file *xef = to_xe_file(file);
1905 	struct drm_xe_gem_create *args = data;
1906 	struct xe_vm *vm = NULL;
1907 	struct xe_bo *bo;
1908 	unsigned int bo_flags;
1909 	u32 handle;
1910 	int err;
1911 
1912 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1913 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1914 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1915 		return -EINVAL;
1916 
1917 	/* at least one valid memory placement must be specified */
1918 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1919 			 !args->placement))
1920 		return -EINVAL;
1921 
1922 	if (XE_IOCTL_DBG(xe, args->flags &
1923 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1924 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1925 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1926 		return -EINVAL;
1927 
1928 	if (XE_IOCTL_DBG(xe, args->handle))
1929 		return -EINVAL;
1930 
1931 	if (XE_IOCTL_DBG(xe, !args->size))
1932 		return -EINVAL;
1933 
1934 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1935 		return -EINVAL;
1936 
1937 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1938 		return -EINVAL;
1939 
1940 	bo_flags = 0;
1941 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1942 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
1943 
1944 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1945 		bo_flags |= XE_BO_FLAG_SCANOUT;
1946 
1947 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
1948 
1949 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1950 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
1951 			return -EINVAL;
1952 
1953 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
1954 	}
1955 
1956 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1957 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1958 		return -EINVAL;
1959 
1960 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
1961 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1962 		return -EINVAL;
1963 
1964 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
1965 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1966 		return -EINVAL;
1967 
1968 	if (args->vm_id) {
1969 		vm = xe_vm_lookup(xef, args->vm_id);
1970 		if (XE_IOCTL_DBG(xe, !vm))
1971 			return -ENOENT;
1972 		err = xe_vm_lock(vm, true);
1973 		if (err)
1974 			goto out_vm;
1975 	}
1976 
1977 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1978 			       ttm_bo_type_device, bo_flags);
1979 
1980 	if (vm)
1981 		xe_vm_unlock(vm);
1982 
1983 	if (IS_ERR(bo)) {
1984 		err = PTR_ERR(bo);
1985 		goto out_vm;
1986 	}
1987 
1988 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1989 	if (err)
1990 		goto out_bulk;
1991 
1992 	args->handle = handle;
1993 	goto out_put;
1994 
1995 out_bulk:
1996 	if (vm && !xe_vm_in_fault_mode(vm)) {
1997 		xe_vm_lock(vm, false);
1998 		__xe_bo_unset_bulk_move(bo);
1999 		xe_vm_unlock(vm);
2000 	}
2001 out_put:
2002 	xe_bo_put(bo);
2003 out_vm:
2004 	if (vm)
2005 		xe_vm_put(vm);
2006 
2007 	return err;
2008 }
2009 
2010 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2011 			     struct drm_file *file)
2012 {
2013 	struct xe_device *xe = to_xe_device(dev);
2014 	struct drm_xe_gem_mmap_offset *args = data;
2015 	struct drm_gem_object *gem_obj;
2016 
2017 	if (XE_IOCTL_DBG(xe, args->extensions) ||
2018 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2019 		return -EINVAL;
2020 
2021 	if (XE_IOCTL_DBG(xe, args->flags))
2022 		return -EINVAL;
2023 
2024 	gem_obj = drm_gem_object_lookup(file, args->handle);
2025 	if (XE_IOCTL_DBG(xe, !gem_obj))
2026 		return -ENOENT;
2027 
2028 	/* The mmap offset was set up at BO allocation time. */
2029 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2030 
2031 	xe_bo_put(gem_to_xe_bo(gem_obj));
2032 	return 0;
2033 }
2034 
2035 /**
2036  * xe_bo_lock() - Lock the buffer object's dma_resv object
2037  * @bo: The struct xe_bo whose lock is to be taken
2038  * @intr: Whether to perform any wait interruptible
2039  *
2040  * Locks the buffer object's dma_resv object. If the buffer object is
2041  * pointing to a shared dma_resv object, that shared lock is locked.
2042  *
2043  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2044  * contended lock was interrupted. If @intr is set to false, the
2045  * function always returns 0.
2046  */
2047 int xe_bo_lock(struct xe_bo *bo, bool intr)
2048 {
2049 	if (intr)
2050 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2051 
2052 	dma_resv_lock(bo->ttm.base.resv, NULL);
2053 
2054 	return 0;
2055 }
2056 
2057 /**
2058  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2059  * @bo: The struct xe_bo whose lock is to be released.
2060  *
2061  * Unlock a buffer object lock that was locked by xe_bo_lock().
2062  */
2063 void xe_bo_unlock(struct xe_bo *bo)
2064 {
2065 	dma_resv_unlock(bo->ttm.base.resv);
2066 }
2067 
2068 /**
2069  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2070  * @bo: The buffer object to migrate
2071  * @mem_type: The TTM memory type intended to migrate to
2072  *
2073  * Check whether the buffer object supports migration to the
2074  * given memory type. Note that pinning may affect the ability to migrate as
2075  * returned by this function.
2076  *
2077  * This function is primarily intended as a helper for checking the
2078  * possibility to migrate buffer objects and can be called without
2079  * the object lock held.
2080  *
2081  * Return: true if migration is possible, false otherwise.
2082  */
2083 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2084 {
2085 	unsigned int cur_place;
2086 
2087 	if (bo->ttm.type == ttm_bo_type_kernel)
2088 		return true;
2089 
2090 	if (bo->ttm.type == ttm_bo_type_sg)
2091 		return false;
2092 
2093 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2094 	     cur_place++) {
2095 		if (bo->placements[cur_place].mem_type == mem_type)
2096 			return true;
2097 	}
2098 
2099 	return false;
2100 }
2101 
2102 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2103 {
2104 	memset(place, 0, sizeof(*place));
2105 	place->mem_type = mem_type;
2106 }
2107 
2108 /**
2109  * xe_bo_migrate - Migrate an object to the desired region id
2110  * @bo: The buffer object to migrate.
2111  * @mem_type: The TTM region type to migrate to.
2112  *
2113  * Attempt to migrate the buffer object to the desired memory region. The
2114  * buffer object may not be pinned, and must be locked.
2115  * On successful completion, the object memory type will be updated,
2116  * but an async migration task may not have completed yet, and to
2117  * accomplish that, the object's kernel fences must be signaled with
2118  * the object lock held.
2119  *
2120  * Return: 0 on success. Negative error code on failure. In particular may
2121  * return -EINTR or -ERESTARTSYS if signal pending.
2122  */
2123 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2124 {
2125 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2126 	struct ttm_operation_ctx ctx = {
2127 		.interruptible = true,
2128 		.no_wait_gpu = false,
2129 	};
2130 	struct ttm_placement placement;
2131 	struct ttm_place requested;
2132 
2133 	xe_bo_assert_held(bo);
2134 
2135 	if (bo->ttm.resource->mem_type == mem_type)
2136 		return 0;
2137 
2138 	if (xe_bo_is_pinned(bo))
2139 		return -EBUSY;
2140 
2141 	if (!xe_bo_can_migrate(bo, mem_type))
2142 		return -EINVAL;
2143 
2144 	xe_place_from_ttm_type(mem_type, &requested);
2145 	placement.num_placement = 1;
2146 	placement.placement = &requested;
2147 
2148 	/*
2149 	 * Stolen needs to be handled like below VRAM handling if we ever need
2150 	 * to support it.
2151 	 */
2152 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2153 
2154 	if (mem_type_is_vram(mem_type)) {
2155 		u32 c = 0;
2156 
2157 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2158 	}
2159 
2160 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2161 }
2162 
2163 /**
2164  * xe_bo_evict - Evict an object to evict placement
2165  * @bo: The buffer object to migrate.
2166  * @force_alloc: Set force_alloc in ttm_operation_ctx
2167  *
2168  * On successful completion, the object memory will be moved to evict
2169  * placement. Ths function blocks until the object has been fully moved.
2170  *
2171  * Return: 0 on success. Negative error code on failure.
2172  */
2173 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2174 {
2175 	struct ttm_operation_ctx ctx = {
2176 		.interruptible = false,
2177 		.no_wait_gpu = false,
2178 		.force_alloc = force_alloc,
2179 	};
2180 	struct ttm_placement placement;
2181 	int ret;
2182 
2183 	xe_evict_flags(&bo->ttm, &placement);
2184 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2185 	if (ret)
2186 		return ret;
2187 
2188 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2189 			      false, MAX_SCHEDULE_TIMEOUT);
2190 
2191 	return 0;
2192 }
2193 
2194 /**
2195  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2196  * placed in system memory.
2197  * @bo: The xe_bo
2198  *
2199  * Return: true if extra pages need to be allocated, false otherwise.
2200  */
2201 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2202 {
2203 	struct xe_device *xe = xe_bo_device(bo);
2204 
2205 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2206 		return false;
2207 
2208 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2209 		return false;
2210 
2211 	/* On discrete GPUs, if the GPU can access this buffer from
2212 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2213 	 * can't be used since there's no CCS storage associated with
2214 	 * non-VRAM addresses.
2215 	 */
2216 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2217 		return false;
2218 
2219 	return true;
2220 }
2221 
2222 /**
2223  * __xe_bo_release_dummy() - Dummy kref release function
2224  * @kref: The embedded struct kref.
2225  *
2226  * Dummy release function for xe_bo_put_deferred(). Keep off.
2227  */
2228 void __xe_bo_release_dummy(struct kref *kref)
2229 {
2230 }
2231 
2232 /**
2233  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2234  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2235  *
2236  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2237  * The @deferred list can be either an onstack local list or a global
2238  * shared list used by a workqueue.
2239  */
2240 void xe_bo_put_commit(struct llist_head *deferred)
2241 {
2242 	struct llist_node *freed;
2243 	struct xe_bo *bo, *next;
2244 
2245 	if (!deferred)
2246 		return;
2247 
2248 	freed = llist_del_all(deferred);
2249 	if (!freed)
2250 		return;
2251 
2252 	llist_for_each_entry_safe(bo, next, freed, freed)
2253 		drm_gem_object_free(&bo->ttm.base.refcount);
2254 }
2255 
2256 /**
2257  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2258  * @file_priv: ...
2259  * @dev: ...
2260  * @args: ...
2261  *
2262  * See dumb_create() hook in include/drm/drm_drv.h
2263  *
2264  * Return: ...
2265  */
2266 int xe_bo_dumb_create(struct drm_file *file_priv,
2267 		      struct drm_device *dev,
2268 		      struct drm_mode_create_dumb *args)
2269 {
2270 	struct xe_device *xe = to_xe_device(dev);
2271 	struct xe_bo *bo;
2272 	uint32_t handle;
2273 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2274 	int err;
2275 	u32 page_size = max_t(u32, PAGE_SIZE,
2276 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2277 
2278 	args->pitch = ALIGN(args->width * cpp, 64);
2279 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2280 			   page_size);
2281 
2282 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2283 			       DRM_XE_GEM_CPU_CACHING_WC,
2284 			       ttm_bo_type_device,
2285 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2286 			       XE_BO_FLAG_SCANOUT |
2287 			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
2288 	if (IS_ERR(bo))
2289 		return PTR_ERR(bo);
2290 
2291 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2292 	/* drop reference from allocate - handle holds it now */
2293 	drm_gem_object_put(&bo->ttm.base);
2294 	if (!err)
2295 		args->handle = handle;
2296 	return err;
2297 }
2298 
2299 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2300 {
2301 	struct ttm_buffer_object *tbo = &bo->ttm;
2302 	struct ttm_device *bdev = tbo->bdev;
2303 
2304 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2305 
2306 	list_del_init(&bo->vram_userfault_link);
2307 }
2308 
2309 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2310 #include "tests/xe_bo.c"
2311 #endif
2312