xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision e28c5efc31397af17bc5a7d55b963f59bcde0166)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <drm/xe_drm.h>
17 
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt.h"
23 #include "xe_map.h"
24 #include "xe_migrate.h"
25 #include "xe_preempt_fence.h"
26 #include "xe_res_cursor.h"
27 #include "xe_trace.h"
28 #include "xe_ttm_stolen_mgr.h"
29 #include "xe_vm.h"
30 
31 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
32 	[XE_PL_SYSTEM] = "system",
33 	[XE_PL_TT] = "gtt",
34 	[XE_PL_VRAM0] = "vram0",
35 	[XE_PL_VRAM1] = "vram1",
36 	[XE_PL_STOLEN] = "stolen"
37 };
38 
39 static const struct ttm_place sys_placement_flags = {
40 	.fpfn = 0,
41 	.lpfn = 0,
42 	.mem_type = XE_PL_SYSTEM,
43 	.flags = 0,
44 };
45 
46 static struct ttm_placement sys_placement = {
47 	.num_placement = 1,
48 	.placement = &sys_placement_flags,
49 	.num_busy_placement = 1,
50 	.busy_placement = &sys_placement_flags,
51 };
52 
53 static const struct ttm_place tt_placement_flags = {
54 	.fpfn = 0,
55 	.lpfn = 0,
56 	.mem_type = XE_PL_TT,
57 	.flags = 0,
58 };
59 
60 static struct ttm_placement tt_placement = {
61 	.num_placement = 1,
62 	.placement = &tt_placement_flags,
63 	.num_busy_placement = 1,
64 	.busy_placement = &sys_placement_flags,
65 };
66 
67 bool mem_type_is_vram(u32 mem_type)
68 {
69 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
70 }
71 
72 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
73 {
74 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
75 }
76 
77 static bool resource_is_vram(struct ttm_resource *res)
78 {
79 	return mem_type_is_vram(res->mem_type);
80 }
81 
82 bool xe_bo_is_vram(struct xe_bo *bo)
83 {
84 	return resource_is_vram(bo->ttm.resource) ||
85 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
86 }
87 
88 bool xe_bo_is_stolen(struct xe_bo *bo)
89 {
90 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
91 }
92 
93 /**
94  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
95  * @bo: The BO
96  *
97  * The stolen memory is accessed through the PCI BAR for both DGFX and some
98  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
99  *
100  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
101  */
102 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
103 {
104 	return xe_bo_is_stolen(bo) &&
105 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
106 }
107 
108 static bool xe_bo_is_user(struct xe_bo *bo)
109 {
110 	return bo->flags & XE_BO_CREATE_USER_BIT;
111 }
112 
113 static struct xe_migrate *
114 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
115 {
116 	struct xe_tile *tile;
117 
118 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
119 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
120 	return tile->migrate;
121 }
122 
123 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
124 {
125 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
126 	struct ttm_resource_manager *mgr;
127 
128 	xe_assert(xe, resource_is_vram(res));
129 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
130 	return to_xe_ttm_vram_mgr(mgr)->vram;
131 }
132 
133 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
134 			   u32 bo_flags, u32 *c)
135 {
136 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
137 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
138 
139 		bo->placements[*c] = (struct ttm_place) {
140 			.mem_type = XE_PL_TT,
141 		};
142 		*c += 1;
143 
144 		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
145 			bo->props.preferred_mem_type = XE_PL_TT;
146 	}
147 }
148 
149 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
150 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
151 {
152 	struct ttm_place place = { .mem_type = mem_type };
153 	struct xe_mem_region *vram;
154 	u64 io_size;
155 
156 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
157 
158 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
159 	xe_assert(xe, vram && vram->usable_size);
160 	io_size = vram->io_size;
161 
162 	/*
163 	 * For eviction / restore on suspend / resume objects
164 	 * pinned in VRAM must be contiguous
165 	 */
166 	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
167 			XE_BO_CREATE_GGTT_BIT))
168 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
169 
170 	if (io_size < vram->usable_size) {
171 		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
172 			place.fpfn = 0;
173 			place.lpfn = io_size >> PAGE_SHIFT;
174 		} else {
175 			place.flags |= TTM_PL_FLAG_TOPDOWN;
176 		}
177 	}
178 	places[*c] = place;
179 	*c += 1;
180 
181 	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
182 		bo->props.preferred_mem_type = mem_type;
183 }
184 
185 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
186 			 u32 bo_flags, u32 *c)
187 {
188 	if (bo->props.preferred_gt == XE_GT1) {
189 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
190 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
191 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
192 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
193 	} else {
194 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
195 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
196 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
197 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
198 	}
199 }
200 
201 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
202 			   u32 bo_flags, u32 *c)
203 {
204 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
205 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
206 
207 		bo->placements[*c] = (struct ttm_place) {
208 			.mem_type = XE_PL_STOLEN,
209 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
210 					     XE_BO_CREATE_GGTT_BIT) ?
211 				TTM_PL_FLAG_CONTIGUOUS : 0,
212 		};
213 		*c += 1;
214 	}
215 }
216 
217 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
218 				       u32 bo_flags)
219 {
220 	u32 c = 0;
221 
222 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
223 
224 	/* The order of placements should indicate preferred location */
225 
226 	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
227 		try_add_system(xe, bo, bo_flags, &c);
228 		try_add_vram(xe, bo, bo_flags, &c);
229 	} else {
230 		try_add_vram(xe, bo, bo_flags, &c);
231 		try_add_system(xe, bo, bo_flags, &c);
232 	}
233 	try_add_stolen(xe, bo, bo_flags, &c);
234 
235 	if (!c)
236 		return -EINVAL;
237 
238 	bo->placement = (struct ttm_placement) {
239 		.num_placement = c,
240 		.placement = bo->placements,
241 		.num_busy_placement = c,
242 		.busy_placement = bo->placements,
243 	};
244 
245 	return 0;
246 }
247 
248 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
249 			      u32 bo_flags)
250 {
251 	xe_bo_assert_held(bo);
252 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
253 }
254 
255 static void xe_evict_flags(struct ttm_buffer_object *tbo,
256 			   struct ttm_placement *placement)
257 {
258 	if (!xe_bo_is_xe_bo(tbo)) {
259 		/* Don't handle scatter gather BOs */
260 		if (tbo->type == ttm_bo_type_sg) {
261 			placement->num_placement = 0;
262 			placement->num_busy_placement = 0;
263 			return;
264 		}
265 
266 		*placement = sys_placement;
267 		return;
268 	}
269 
270 	/*
271 	 * For xe, sg bos that are evicted to system just triggers a
272 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
273 	 */
274 	switch (tbo->resource->mem_type) {
275 	case XE_PL_VRAM0:
276 	case XE_PL_VRAM1:
277 	case XE_PL_STOLEN:
278 		*placement = tt_placement;
279 		break;
280 	case XE_PL_TT:
281 	default:
282 		*placement = sys_placement;
283 		break;
284 	}
285 }
286 
287 struct xe_ttm_tt {
288 	struct ttm_tt ttm;
289 	struct device *dev;
290 	struct sg_table sgt;
291 	struct sg_table *sg;
292 };
293 
294 static int xe_tt_map_sg(struct ttm_tt *tt)
295 {
296 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
297 	unsigned long num_pages = tt->num_pages;
298 	int ret;
299 
300 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
301 
302 	if (xe_tt->sg)
303 		return 0;
304 
305 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
306 						num_pages, 0,
307 						(u64)num_pages << PAGE_SHIFT,
308 						xe_sg_segment_size(xe_tt->dev),
309 						GFP_KERNEL);
310 	if (ret)
311 		return ret;
312 
313 	xe_tt->sg = &xe_tt->sgt;
314 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
315 			      DMA_ATTR_SKIP_CPU_SYNC);
316 	if (ret) {
317 		sg_free_table(xe_tt->sg);
318 		xe_tt->sg = NULL;
319 		return ret;
320 	}
321 
322 	return 0;
323 }
324 
325 struct sg_table *xe_bo_sg(struct xe_bo *bo)
326 {
327 	struct ttm_tt *tt = bo->ttm.ttm;
328 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
329 
330 	return xe_tt->sg;
331 }
332 
333 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
334 				       u32 page_flags)
335 {
336 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
337 	struct xe_device *xe = xe_bo_device(bo);
338 	struct xe_ttm_tt *tt;
339 	unsigned long extra_pages;
340 	enum ttm_caching caching;
341 	int err;
342 
343 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
344 	if (!tt)
345 		return NULL;
346 
347 	tt->dev = xe->drm.dev;
348 
349 	extra_pages = 0;
350 	if (xe_bo_needs_ccs_pages(bo))
351 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
352 					   PAGE_SIZE);
353 
354 	switch (bo->cpu_caching) {
355 	case DRM_XE_GEM_CPU_CACHING_WC:
356 		caching = ttm_write_combined;
357 		break;
358 	default:
359 		caching = ttm_cached;
360 		break;
361 	}
362 
363 	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
364 
365 	/*
366 	 * Display scanout is always non-coherent with the CPU cache.
367 	 *
368 	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
369 	 * require a CPU:WC mapping.
370 	 */
371 	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
372 	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
373 		caching = ttm_write_combined;
374 
375 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
376 	if (err) {
377 		kfree(tt);
378 		return NULL;
379 	}
380 
381 	return &tt->ttm;
382 }
383 
384 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
385 			      struct ttm_operation_ctx *ctx)
386 {
387 	int err;
388 
389 	/*
390 	 * dma-bufs are not populated with pages, and the dma-
391 	 * addresses are set up when moved to XE_PL_TT.
392 	 */
393 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
394 		return 0;
395 
396 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
397 	if (err)
398 		return err;
399 
400 	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
401 	err = xe_tt_map_sg(tt);
402 	if (err)
403 		ttm_pool_free(&ttm_dev->pool, tt);
404 
405 	return err;
406 }
407 
408 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
409 {
410 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
411 
412 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
413 		return;
414 
415 	if (xe_tt->sg) {
416 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
417 				  DMA_BIDIRECTIONAL, 0);
418 		sg_free_table(xe_tt->sg);
419 		xe_tt->sg = NULL;
420 	}
421 
422 	return ttm_pool_free(&ttm_dev->pool, tt);
423 }
424 
425 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
426 {
427 	ttm_tt_fini(tt);
428 	kfree(tt);
429 }
430 
431 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
432 				 struct ttm_resource *mem)
433 {
434 	struct xe_device *xe = ttm_to_xe_device(bdev);
435 
436 	switch (mem->mem_type) {
437 	case XE_PL_SYSTEM:
438 	case XE_PL_TT:
439 		return 0;
440 	case XE_PL_VRAM0:
441 	case XE_PL_VRAM1: {
442 		struct xe_ttm_vram_mgr_resource *vres =
443 			to_xe_ttm_vram_mgr_resource(mem);
444 		struct xe_mem_region *vram = res_to_mem_region(mem);
445 
446 		if (vres->used_visible_size < mem->size)
447 			return -EINVAL;
448 
449 		mem->bus.offset = mem->start << PAGE_SHIFT;
450 
451 		if (vram->mapping &&
452 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
453 			mem->bus.addr = (u8 __force *)vram->mapping +
454 				mem->bus.offset;
455 
456 		mem->bus.offset += vram->io_start;
457 		mem->bus.is_iomem = true;
458 
459 #if  !defined(CONFIG_X86)
460 		mem->bus.caching = ttm_write_combined;
461 #endif
462 		return 0;
463 	} case XE_PL_STOLEN:
464 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
465 	default:
466 		return -EINVAL;
467 	}
468 }
469 
470 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
471 				const struct ttm_operation_ctx *ctx)
472 {
473 	struct dma_resv_iter cursor;
474 	struct dma_fence *fence;
475 	struct drm_gem_object *obj = &bo->ttm.base;
476 	struct drm_gpuvm_bo *vm_bo;
477 	bool idle = false;
478 	int ret = 0;
479 
480 	dma_resv_assert_held(bo->ttm.base.resv);
481 
482 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
483 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
484 				    DMA_RESV_USAGE_BOOKKEEP);
485 		dma_resv_for_each_fence_unlocked(&cursor, fence)
486 			dma_fence_enable_sw_signaling(fence);
487 		dma_resv_iter_end(&cursor);
488 	}
489 
490 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
491 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
492 		struct drm_gpuva *gpuva;
493 
494 		if (!xe_vm_in_fault_mode(vm)) {
495 			drm_gpuvm_bo_evict(vm_bo, true);
496 			continue;
497 		}
498 
499 		if (!idle) {
500 			long timeout;
501 
502 			if (ctx->no_wait_gpu &&
503 			    !dma_resv_test_signaled(bo->ttm.base.resv,
504 						    DMA_RESV_USAGE_BOOKKEEP))
505 				return -EBUSY;
506 
507 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
508 							DMA_RESV_USAGE_BOOKKEEP,
509 							ctx->interruptible,
510 							MAX_SCHEDULE_TIMEOUT);
511 			if (!timeout)
512 				return -ETIME;
513 			if (timeout < 0)
514 				return timeout;
515 
516 			idle = true;
517 		}
518 
519 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
520 			struct xe_vma *vma = gpuva_to_vma(gpuva);
521 
522 			trace_xe_vma_evict(vma);
523 			ret = xe_vm_invalidate_vma(vma);
524 			if (XE_WARN_ON(ret))
525 				return ret;
526 		}
527 	}
528 
529 	return ret;
530 }
531 
532 /*
533  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
534  * Note that unmapping the attachment is deferred to the next
535  * map_attachment time, or to bo destroy (after idling) whichever comes first.
536  * This is to avoid syncing before unmap_attachment(), assuming that the
537  * caller relies on idling the reservation object before moving the
538  * backing store out. Should that assumption not hold, then we will be able
539  * to unconditionally call unmap_attachment() when moving out to system.
540  */
541 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
542 			     struct ttm_resource *new_res)
543 {
544 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
545 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
546 					       ttm);
547 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
548 	struct sg_table *sg;
549 
550 	xe_assert(xe, attach);
551 	xe_assert(xe, ttm_bo->ttm);
552 
553 	if (new_res->mem_type == XE_PL_SYSTEM)
554 		goto out;
555 
556 	if (ttm_bo->sg) {
557 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
558 		ttm_bo->sg = NULL;
559 	}
560 
561 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
562 	if (IS_ERR(sg))
563 		return PTR_ERR(sg);
564 
565 	ttm_bo->sg = sg;
566 	xe_tt->sg = sg;
567 
568 out:
569 	ttm_bo_move_null(ttm_bo, new_res);
570 
571 	return 0;
572 }
573 
574 /**
575  * xe_bo_move_notify - Notify subsystems of a pending move
576  * @bo: The buffer object
577  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
578  *
579  * This function notifies subsystems of an upcoming buffer move.
580  * Upon receiving such a notification, subsystems should schedule
581  * halting access to the underlying pages and optionally add a fence
582  * to the buffer object's dma_resv object, that signals when access is
583  * stopped. The caller will wait on all dma_resv fences before
584  * starting the move.
585  *
586  * A subsystem may commence access to the object after obtaining
587  * bindings to the new backing memory under the object lock.
588  *
589  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
590  * negative error code on error.
591  */
592 static int xe_bo_move_notify(struct xe_bo *bo,
593 			     const struct ttm_operation_ctx *ctx)
594 {
595 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
596 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
597 	int ret;
598 
599 	/*
600 	 * If this starts to call into many components, consider
601 	 * using a notification chain here.
602 	 */
603 
604 	if (xe_bo_is_pinned(bo))
605 		return -EINVAL;
606 
607 	xe_bo_vunmap(bo);
608 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
609 	if (ret)
610 		return ret;
611 
612 	/* Don't call move_notify() for imported dma-bufs. */
613 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
614 		dma_buf_move_notify(ttm_bo->base.dma_buf);
615 
616 	return 0;
617 }
618 
619 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
620 		      struct ttm_operation_ctx *ctx,
621 		      struct ttm_resource *new_mem,
622 		      struct ttm_place *hop)
623 {
624 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
625 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
626 	struct ttm_resource *old_mem = ttm_bo->resource;
627 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
628 	struct ttm_tt *ttm = ttm_bo->ttm;
629 	struct xe_migrate *migrate = NULL;
630 	struct dma_fence *fence;
631 	bool move_lacks_source;
632 	bool tt_has_data;
633 	bool needs_clear;
634 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
635 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
636 	int ret = 0;
637 	/* Bo creation path, moving to system or TT. */
638 	if ((!old_mem && ttm) && !handle_system_ccs) {
639 		ttm_bo_move_null(ttm_bo, new_mem);
640 		return 0;
641 	}
642 
643 	if (ttm_bo->type == ttm_bo_type_sg) {
644 		ret = xe_bo_move_notify(bo, ctx);
645 		if (!ret)
646 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
647 		goto out;
648 	}
649 
650 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
651 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
652 
653 	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
654 						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
655 
656 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
657 		(!ttm && ttm_bo->type == ttm_bo_type_device);
658 
659 	if ((move_lacks_source && !needs_clear)) {
660 		ttm_bo_move_null(ttm_bo, new_mem);
661 		goto out;
662 	}
663 
664 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
665 		ttm_bo_move_null(ttm_bo, new_mem);
666 		goto out;
667 	}
668 
669 	/*
670 	 * Failed multi-hop where the old_mem is still marked as
671 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
672 	 */
673 	if (old_mem_type == XE_PL_TT &&
674 	    new_mem->mem_type == XE_PL_TT) {
675 		ttm_bo_move_null(ttm_bo, new_mem);
676 		goto out;
677 	}
678 
679 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
680 		ret = xe_bo_move_notify(bo, ctx);
681 		if (ret)
682 			goto out;
683 	}
684 
685 	if (old_mem_type == XE_PL_TT &&
686 	    new_mem->mem_type == XE_PL_SYSTEM) {
687 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
688 						     DMA_RESV_USAGE_BOOKKEEP,
689 						     true,
690 						     MAX_SCHEDULE_TIMEOUT);
691 		if (timeout < 0) {
692 			ret = timeout;
693 			goto out;
694 		}
695 
696 		if (!handle_system_ccs) {
697 			ttm_bo_move_null(ttm_bo, new_mem);
698 			goto out;
699 		}
700 	}
701 
702 	if (!move_lacks_source &&
703 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
704 	     (mem_type_is_vram(old_mem_type) &&
705 	      new_mem->mem_type == XE_PL_SYSTEM))) {
706 		hop->fpfn = 0;
707 		hop->lpfn = 0;
708 		hop->mem_type = XE_PL_TT;
709 		hop->flags = TTM_PL_FLAG_TEMPORARY;
710 		ret = -EMULTIHOP;
711 		goto out;
712 	}
713 
714 	if (bo->tile)
715 		migrate = bo->tile->migrate;
716 	else if (resource_is_vram(new_mem))
717 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
718 	else if (mem_type_is_vram(old_mem_type))
719 		migrate = mem_type_to_migrate(xe, old_mem_type);
720 	else
721 		migrate = xe->tiles[0].migrate;
722 
723 	xe_assert(xe, migrate);
724 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
725 	xe_device_mem_access_get(xe);
726 
727 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
728 		/*
729 		 * Kernel memory that is pinned should only be moved on suspend
730 		 * / resume, some of the pinned memory is required for the
731 		 * device to resume / use the GPU to move other evicted memory
732 		 * (user memory) around. This likely could be optimized a bit
733 		 * futher where we find the minimum set of pinned memory
734 		 * required for resume but for simplity doing a memcpy for all
735 		 * pinned memory.
736 		 */
737 		ret = xe_bo_vmap(bo);
738 		if (!ret) {
739 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
740 
741 			/* Create a new VMAP once kernel BO back in VRAM */
742 			if (!ret && resource_is_vram(new_mem)) {
743 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
744 				void __iomem *new_addr = vram->mapping +
745 					(new_mem->start << PAGE_SHIFT);
746 
747 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
748 					ret = -EINVAL;
749 					xe_device_mem_access_put(xe);
750 					goto out;
751 				}
752 
753 				xe_assert(xe, new_mem->start ==
754 					  bo->placements->fpfn);
755 
756 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
757 			}
758 		}
759 	} else {
760 		if (move_lacks_source)
761 			fence = xe_migrate_clear(migrate, bo, new_mem);
762 		else
763 			fence = xe_migrate_copy(migrate, bo, bo, old_mem,
764 						new_mem, handle_system_ccs);
765 		if (IS_ERR(fence)) {
766 			ret = PTR_ERR(fence);
767 			xe_device_mem_access_put(xe);
768 			goto out;
769 		}
770 		if (!move_lacks_source) {
771 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
772 							true, new_mem);
773 			if (ret) {
774 				dma_fence_wait(fence, false);
775 				ttm_bo_move_null(ttm_bo, new_mem);
776 				ret = 0;
777 			}
778 		} else {
779 			/*
780 			 * ttm_bo_move_accel_cleanup() may blow up if
781 			 * bo->resource == NULL, so just attach the
782 			 * fence and set the new resource.
783 			 */
784 			dma_resv_add_fence(ttm_bo->base.resv, fence,
785 					   DMA_RESV_USAGE_KERNEL);
786 			ttm_bo_move_null(ttm_bo, new_mem);
787 		}
788 
789 		dma_fence_put(fence);
790 	}
791 
792 	xe_device_mem_access_put(xe);
793 
794 out:
795 	return ret;
796 
797 }
798 
799 /**
800  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
801  * @bo: The buffer object to move.
802  *
803  * On successful completion, the object memory will be moved to sytem memory.
804  * This function blocks until the object has been fully moved.
805  *
806  * This is needed to for special handling of pinned VRAM object during
807  * suspend-resume.
808  *
809  * Return: 0 on success. Negative error code on failure.
810  */
811 int xe_bo_evict_pinned(struct xe_bo *bo)
812 {
813 	struct ttm_place place = {
814 		.mem_type = XE_PL_TT,
815 	};
816 	struct ttm_placement placement = {
817 		.placement = &place,
818 		.num_placement = 1,
819 	};
820 	struct ttm_operation_ctx ctx = {
821 		.interruptible = false,
822 	};
823 	struct ttm_resource *new_mem;
824 	int ret;
825 
826 	xe_bo_assert_held(bo);
827 
828 	if (WARN_ON(!bo->ttm.resource))
829 		return -EINVAL;
830 
831 	if (WARN_ON(!xe_bo_is_pinned(bo)))
832 		return -EINVAL;
833 
834 	if (WARN_ON(!xe_bo_is_vram(bo)))
835 		return -EINVAL;
836 
837 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
838 	if (ret)
839 		return ret;
840 
841 	if (!bo->ttm.ttm) {
842 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
843 		if (!bo->ttm.ttm) {
844 			ret = -ENOMEM;
845 			goto err_res_free;
846 		}
847 	}
848 
849 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
850 	if (ret)
851 		goto err_res_free;
852 
853 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
854 	if (ret)
855 		goto err_res_free;
856 
857 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
858 	if (ret)
859 		goto err_res_free;
860 
861 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
862 			      false, MAX_SCHEDULE_TIMEOUT);
863 
864 	return 0;
865 
866 err_res_free:
867 	ttm_resource_free(&bo->ttm, &new_mem);
868 	return ret;
869 }
870 
871 /**
872  * xe_bo_restore_pinned() - Restore a pinned VRAM object
873  * @bo: The buffer object to move.
874  *
875  * On successful completion, the object memory will be moved back to VRAM.
876  * This function blocks until the object has been fully moved.
877  *
878  * This is needed to for special handling of pinned VRAM object during
879  * suspend-resume.
880  *
881  * Return: 0 on success. Negative error code on failure.
882  */
883 int xe_bo_restore_pinned(struct xe_bo *bo)
884 {
885 	struct ttm_operation_ctx ctx = {
886 		.interruptible = false,
887 	};
888 	struct ttm_resource *new_mem;
889 	int ret;
890 
891 	xe_bo_assert_held(bo);
892 
893 	if (WARN_ON(!bo->ttm.resource))
894 		return -EINVAL;
895 
896 	if (WARN_ON(!xe_bo_is_pinned(bo)))
897 		return -EINVAL;
898 
899 	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
900 		return -EINVAL;
901 
902 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
903 	if (ret)
904 		return ret;
905 
906 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
907 	if (ret)
908 		goto err_res_free;
909 
910 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
911 	if (ret)
912 		goto err_res_free;
913 
914 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
915 	if (ret)
916 		goto err_res_free;
917 
918 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
919 			      false, MAX_SCHEDULE_TIMEOUT);
920 
921 	return 0;
922 
923 err_res_free:
924 	ttm_resource_free(&bo->ttm, &new_mem);
925 	return ret;
926 }
927 
928 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
929 				       unsigned long page_offset)
930 {
931 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
932 	struct xe_res_cursor cursor;
933 	struct xe_mem_region *vram;
934 
935 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
936 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
937 
938 	vram = res_to_mem_region(ttm_bo->resource);
939 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
940 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
941 }
942 
943 static void __xe_bo_vunmap(struct xe_bo *bo);
944 
945 /*
946  * TODO: Move this function to TTM so we don't rely on how TTM does its
947  * locking, thereby abusing TTM internals.
948  */
949 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
950 {
951 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
952 	bool locked;
953 
954 	xe_assert(xe, !kref_read(&ttm_bo->kref));
955 
956 	/*
957 	 * We can typically only race with TTM trylocking under the
958 	 * lru_lock, which will immediately be unlocked again since
959 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
960 	 * always succeed here, as long as we hold the lru lock.
961 	 */
962 	spin_lock(&ttm_bo->bdev->lru_lock);
963 	locked = dma_resv_trylock(ttm_bo->base.resv);
964 	spin_unlock(&ttm_bo->bdev->lru_lock);
965 	xe_assert(xe, locked);
966 
967 	return locked;
968 }
969 
970 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
971 {
972 	struct dma_resv_iter cursor;
973 	struct dma_fence *fence;
974 	struct dma_fence *replacement = NULL;
975 	struct xe_bo *bo;
976 
977 	if (!xe_bo_is_xe_bo(ttm_bo))
978 		return;
979 
980 	bo = ttm_to_xe_bo(ttm_bo);
981 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
982 
983 	/*
984 	 * Corner case where TTM fails to allocate memory and this BOs resv
985 	 * still points the VMs resv
986 	 */
987 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
988 		return;
989 
990 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
991 		return;
992 
993 	/*
994 	 * Scrub the preempt fences if any. The unbind fence is already
995 	 * attached to the resv.
996 	 * TODO: Don't do this for external bos once we scrub them after
997 	 * unbind.
998 	 */
999 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1000 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1001 		if (xe_fence_is_xe_preempt(fence) &&
1002 		    !dma_fence_is_signaled(fence)) {
1003 			if (!replacement)
1004 				replacement = dma_fence_get_stub();
1005 
1006 			dma_resv_replace_fences(ttm_bo->base.resv,
1007 						fence->context,
1008 						replacement,
1009 						DMA_RESV_USAGE_BOOKKEEP);
1010 		}
1011 	}
1012 	dma_fence_put(replacement);
1013 
1014 	dma_resv_unlock(ttm_bo->base.resv);
1015 }
1016 
1017 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1018 {
1019 	if (!xe_bo_is_xe_bo(ttm_bo))
1020 		return;
1021 
1022 	/*
1023 	 * Object is idle and about to be destroyed. Release the
1024 	 * dma-buf attachment.
1025 	 */
1026 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1027 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1028 						       struct xe_ttm_tt, ttm);
1029 
1030 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1031 					 DMA_BIDIRECTIONAL);
1032 		ttm_bo->sg = NULL;
1033 		xe_tt->sg = NULL;
1034 	}
1035 }
1036 
1037 struct ttm_device_funcs xe_ttm_funcs = {
1038 	.ttm_tt_create = xe_ttm_tt_create,
1039 	.ttm_tt_populate = xe_ttm_tt_populate,
1040 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1041 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1042 	.evict_flags = xe_evict_flags,
1043 	.move = xe_bo_move,
1044 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1045 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1046 	.release_notify = xe_ttm_bo_release_notify,
1047 	.eviction_valuable = ttm_bo_eviction_valuable,
1048 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1049 };
1050 
1051 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1052 {
1053 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1054 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1055 
1056 	if (bo->ttm.base.import_attach)
1057 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1058 	drm_gem_object_release(&bo->ttm.base);
1059 
1060 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1061 
1062 	if (bo->ggtt_node.size)
1063 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1064 
1065 #ifdef CONFIG_PROC_FS
1066 	if (bo->client)
1067 		xe_drm_client_remove_bo(bo);
1068 #endif
1069 
1070 	if (bo->vm && xe_bo_is_user(bo))
1071 		xe_vm_put(bo->vm);
1072 
1073 	kfree(bo);
1074 }
1075 
1076 static void xe_gem_object_free(struct drm_gem_object *obj)
1077 {
1078 	/* Our BO reference counting scheme works as follows:
1079 	 *
1080 	 * The gem object kref is typically used throughout the driver,
1081 	 * and the gem object holds a ttm_buffer_object refcount, so
1082 	 * that when the last gem object reference is put, which is when
1083 	 * we end up in this function, we put also that ttm_buffer_object
1084 	 * refcount. Anything using gem interfaces is then no longer
1085 	 * allowed to access the object in a way that requires a gem
1086 	 * refcount, including locking the object.
1087 	 *
1088 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1089 	 * refcount directly if needed.
1090 	 */
1091 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1092 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1093 }
1094 
1095 static void xe_gem_object_close(struct drm_gem_object *obj,
1096 				struct drm_file *file_priv)
1097 {
1098 	struct xe_bo *bo = gem_to_xe_bo(obj);
1099 
1100 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1101 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1102 
1103 		xe_bo_lock(bo, false);
1104 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1105 		xe_bo_unlock(bo);
1106 	}
1107 }
1108 
1109 static bool should_migrate_to_system(struct xe_bo *bo)
1110 {
1111 	struct xe_device *xe = xe_bo_device(bo);
1112 
1113 	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
1114 }
1115 
1116 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1117 {
1118 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1119 	struct drm_device *ddev = tbo->base.dev;
1120 	vm_fault_t ret;
1121 	int idx, r = 0;
1122 
1123 	ret = ttm_bo_vm_reserve(tbo, vmf);
1124 	if (ret)
1125 		return ret;
1126 
1127 	if (drm_dev_enter(ddev, &idx)) {
1128 		struct xe_bo *bo = ttm_to_xe_bo(tbo);
1129 
1130 		trace_xe_bo_cpu_fault(bo);
1131 
1132 		if (should_migrate_to_system(bo)) {
1133 			r = xe_bo_migrate(bo, XE_PL_TT);
1134 			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1135 				ret = VM_FAULT_NOPAGE;
1136 			else if (r)
1137 				ret = VM_FAULT_SIGBUS;
1138 		}
1139 		if (!ret)
1140 			ret = ttm_bo_vm_fault_reserved(vmf,
1141 						       vmf->vma->vm_page_prot,
1142 						       TTM_BO_VM_NUM_PREFAULT);
1143 		drm_dev_exit(idx);
1144 	} else {
1145 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1146 	}
1147 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1148 		return ret;
1149 
1150 	dma_resv_unlock(tbo->base.resv);
1151 	return ret;
1152 }
1153 
1154 static const struct vm_operations_struct xe_gem_vm_ops = {
1155 	.fault = xe_gem_fault,
1156 	.open = ttm_bo_vm_open,
1157 	.close = ttm_bo_vm_close,
1158 	.access = ttm_bo_vm_access
1159 };
1160 
1161 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1162 	.free = xe_gem_object_free,
1163 	.close = xe_gem_object_close,
1164 	.mmap = drm_gem_ttm_mmap,
1165 	.export = xe_gem_prime_export,
1166 	.vm_ops = &xe_gem_vm_ops,
1167 };
1168 
1169 /**
1170  * xe_bo_alloc - Allocate storage for a struct xe_bo
1171  *
1172  * This funcition is intended to allocate storage to be used for input
1173  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1174  * created is needed before the call to __xe_bo_create_locked().
1175  * If __xe_bo_create_locked ends up never to be called, then the
1176  * storage allocated with this function needs to be freed using
1177  * xe_bo_free().
1178  *
1179  * Return: A pointer to an uninitialized struct xe_bo on success,
1180  * ERR_PTR(-ENOMEM) on error.
1181  */
1182 struct xe_bo *xe_bo_alloc(void)
1183 {
1184 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1185 
1186 	if (!bo)
1187 		return ERR_PTR(-ENOMEM);
1188 
1189 	return bo;
1190 }
1191 
1192 /**
1193  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1194  * @bo: The buffer object storage.
1195  *
1196  * Refer to xe_bo_alloc() documentation for valid use-cases.
1197  */
1198 void xe_bo_free(struct xe_bo *bo)
1199 {
1200 	kfree(bo);
1201 }
1202 
1203 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1204 				     struct xe_tile *tile, struct dma_resv *resv,
1205 				     struct ttm_lru_bulk_move *bulk, size_t size,
1206 				     u16 cpu_caching, enum ttm_bo_type type,
1207 				     u32 flags)
1208 {
1209 	struct ttm_operation_ctx ctx = {
1210 		.interruptible = true,
1211 		.no_wait_gpu = false,
1212 	};
1213 	struct ttm_placement *placement;
1214 	uint32_t alignment;
1215 	size_t aligned_size;
1216 	int err;
1217 
1218 	/* Only kernel objects should set GT */
1219 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1220 
1221 	if (XE_WARN_ON(!size)) {
1222 		xe_bo_free(bo);
1223 		return ERR_PTR(-EINVAL);
1224 	}
1225 
1226 	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
1227 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
1228 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
1229 		aligned_size = ALIGN(size, SZ_64K);
1230 		if (type != ttm_bo_type_device)
1231 			size = ALIGN(size, SZ_64K);
1232 		flags |= XE_BO_INTERNAL_64K;
1233 		alignment = SZ_64K >> PAGE_SHIFT;
1234 
1235 	} else {
1236 		aligned_size = ALIGN(size, SZ_4K);
1237 		flags &= ~XE_BO_INTERNAL_64K;
1238 		alignment = SZ_4K >> PAGE_SHIFT;
1239 	}
1240 
1241 	if (type == ttm_bo_type_device && aligned_size != size)
1242 		return ERR_PTR(-EINVAL);
1243 
1244 	if (!bo) {
1245 		bo = xe_bo_alloc();
1246 		if (IS_ERR(bo))
1247 			return bo;
1248 	}
1249 
1250 	bo->ccs_cleared = false;
1251 	bo->tile = tile;
1252 	bo->size = size;
1253 	bo->flags = flags;
1254 	bo->cpu_caching = cpu_caching;
1255 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1256 	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
1257 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
1258 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
1259 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1260 	INIT_LIST_HEAD(&bo->pinned_link);
1261 #ifdef CONFIG_PROC_FS
1262 	INIT_LIST_HEAD(&bo->client_link);
1263 #endif
1264 
1265 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1266 
1267 	if (resv) {
1268 		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
1269 		ctx.resv = resv;
1270 	}
1271 
1272 	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
1273 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1274 		if (WARN_ON(err)) {
1275 			xe_ttm_bo_destroy(&bo->ttm);
1276 			return ERR_PTR(err);
1277 		}
1278 	}
1279 
1280 	/* Defer populating type_sg bos */
1281 	placement = (type == ttm_bo_type_sg ||
1282 		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
1283 		&bo->placement;
1284 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1285 				   placement, alignment,
1286 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1287 	if (err)
1288 		return ERR_PTR(err);
1289 
1290 	/*
1291 	 * The VRAM pages underneath are potentially still being accessed by the
1292 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1293 	 * sure to add any corresponding move/clear fences into the objects
1294 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1295 	 *
1296 	 * For KMD internal buffers we don't care about GPU clearing, however we
1297 	 * still need to handle async evictions, where the VRAM is still being
1298 	 * accessed by the GPU. Most internal callers are not expecting this,
1299 	 * since they are missing the required synchronisation before accessing
1300 	 * the memory. To keep things simple just sync wait any kernel fences
1301 	 * here, if the buffer is designated KMD internal.
1302 	 *
1303 	 * For normal userspace objects we should already have the required
1304 	 * pipelining or sync waiting elsewhere, since we already have to deal
1305 	 * with things like async GPU clearing.
1306 	 */
1307 	if (type == ttm_bo_type_kernel) {
1308 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1309 						     DMA_RESV_USAGE_KERNEL,
1310 						     ctx.interruptible,
1311 						     MAX_SCHEDULE_TIMEOUT);
1312 
1313 		if (timeout < 0) {
1314 			if (!resv)
1315 				dma_resv_unlock(bo->ttm.base.resv);
1316 			xe_bo_put(bo);
1317 			return ERR_PTR(timeout);
1318 		}
1319 	}
1320 
1321 	bo->created = true;
1322 	if (bulk)
1323 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1324 	else
1325 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1326 
1327 	return bo;
1328 }
1329 
1330 static int __xe_bo_fixed_placement(struct xe_device *xe,
1331 				   struct xe_bo *bo,
1332 				   u32 flags,
1333 				   u64 start, u64 end, u64 size)
1334 {
1335 	struct ttm_place *place = bo->placements;
1336 
1337 	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
1338 		return -EINVAL;
1339 
1340 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1341 	place->fpfn = start >> PAGE_SHIFT;
1342 	place->lpfn = end >> PAGE_SHIFT;
1343 
1344 	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
1345 	case XE_BO_CREATE_VRAM0_BIT:
1346 		place->mem_type = XE_PL_VRAM0;
1347 		break;
1348 	case XE_BO_CREATE_VRAM1_BIT:
1349 		place->mem_type = XE_PL_VRAM1;
1350 		break;
1351 	case XE_BO_CREATE_STOLEN_BIT:
1352 		place->mem_type = XE_PL_STOLEN;
1353 		break;
1354 
1355 	default:
1356 		/* 0 or multiple of the above set */
1357 		return -EINVAL;
1358 	}
1359 
1360 	bo->placement = (struct ttm_placement) {
1361 		.num_placement = 1,
1362 		.placement = place,
1363 		.num_busy_placement = 1,
1364 		.busy_placement = place,
1365 	};
1366 
1367 	return 0;
1368 }
1369 
1370 static struct xe_bo *
1371 __xe_bo_create_locked(struct xe_device *xe,
1372 		      struct xe_tile *tile, struct xe_vm *vm,
1373 		      size_t size, u64 start, u64 end,
1374 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1375 {
1376 	struct xe_bo *bo = NULL;
1377 	int err;
1378 
1379 	if (vm)
1380 		xe_vm_assert_held(vm);
1381 
1382 	if (start || end != ~0ULL) {
1383 		bo = xe_bo_alloc();
1384 		if (IS_ERR(bo))
1385 			return bo;
1386 
1387 		flags |= XE_BO_FIXED_PLACEMENT_BIT;
1388 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1389 		if (err) {
1390 			xe_bo_free(bo);
1391 			return ERR_PTR(err);
1392 		}
1393 	}
1394 
1395 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1396 				    vm && !xe_vm_in_fault_mode(vm) &&
1397 				    flags & XE_BO_CREATE_USER_BIT ?
1398 				    &vm->lru_bulk_move : NULL, size,
1399 				    cpu_caching, type, flags);
1400 	if (IS_ERR(bo))
1401 		return bo;
1402 
1403 	/*
1404 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1405 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1406 	 * will keep a reference to the vm, and avoid circular references
1407 	 * by having all the vm's bo refereferences released at vm close
1408 	 * time.
1409 	 */
1410 	if (vm && xe_bo_is_user(bo))
1411 		xe_vm_get(vm);
1412 	bo->vm = vm;
1413 
1414 	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
1415 		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
1416 			tile = xe_device_get_root_tile(xe);
1417 
1418 		xe_assert(xe, tile);
1419 
1420 		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
1421 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1422 						   start + bo->size, U64_MAX);
1423 		} else {
1424 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1425 		}
1426 		if (err)
1427 			goto err_unlock_put_bo;
1428 	}
1429 
1430 	return bo;
1431 
1432 err_unlock_put_bo:
1433 	__xe_bo_unset_bulk_move(bo);
1434 	xe_bo_unlock_vm_held(bo);
1435 	xe_bo_put(bo);
1436 	return ERR_PTR(err);
1437 }
1438 
1439 struct xe_bo *
1440 xe_bo_create_locked_range(struct xe_device *xe,
1441 			  struct xe_tile *tile, struct xe_vm *vm,
1442 			  size_t size, u64 start, u64 end,
1443 			  enum ttm_bo_type type, u32 flags)
1444 {
1445 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1446 }
1447 
1448 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1449 				  struct xe_vm *vm, size_t size,
1450 				  enum ttm_bo_type type, u32 flags)
1451 {
1452 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1453 }
1454 
1455 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1456 				struct xe_vm *vm, size_t size,
1457 				u16 cpu_caching,
1458 				enum ttm_bo_type type,
1459 				u32 flags)
1460 {
1461 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1462 						 cpu_caching, type,
1463 						 flags | XE_BO_CREATE_USER_BIT);
1464 	if (!IS_ERR(bo))
1465 		xe_bo_unlock_vm_held(bo);
1466 
1467 	return bo;
1468 }
1469 
1470 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1471 			   struct xe_vm *vm, size_t size,
1472 			   enum ttm_bo_type type, u32 flags)
1473 {
1474 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1475 
1476 	if (!IS_ERR(bo))
1477 		xe_bo_unlock_vm_held(bo);
1478 
1479 	return bo;
1480 }
1481 
1482 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1483 				      struct xe_vm *vm,
1484 				      size_t size, u64 offset,
1485 				      enum ttm_bo_type type, u32 flags)
1486 {
1487 	struct xe_bo *bo;
1488 	int err;
1489 	u64 start = offset == ~0ull ? 0 : offset;
1490 	u64 end = offset == ~0ull ? offset : start + size;
1491 
1492 	if (flags & XE_BO_CREATE_STOLEN_BIT &&
1493 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1494 		flags |= XE_BO_CREATE_GGTT_BIT;
1495 
1496 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1497 				       flags | XE_BO_NEEDS_CPU_ACCESS);
1498 	if (IS_ERR(bo))
1499 		return bo;
1500 
1501 	err = xe_bo_pin(bo);
1502 	if (err)
1503 		goto err_put;
1504 
1505 	err = xe_bo_vmap(bo);
1506 	if (err)
1507 		goto err_unpin;
1508 
1509 	xe_bo_unlock_vm_held(bo);
1510 
1511 	return bo;
1512 
1513 err_unpin:
1514 	xe_bo_unpin(bo);
1515 err_put:
1516 	xe_bo_unlock_vm_held(bo);
1517 	xe_bo_put(bo);
1518 	return ERR_PTR(err);
1519 }
1520 
1521 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1522 				   struct xe_vm *vm, size_t size,
1523 				   enum ttm_bo_type type, u32 flags)
1524 {
1525 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1526 }
1527 
1528 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1529 				     const void *data, size_t size,
1530 				     enum ttm_bo_type type, u32 flags)
1531 {
1532 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1533 						ALIGN(size, PAGE_SIZE),
1534 						type, flags);
1535 	if (IS_ERR(bo))
1536 		return bo;
1537 
1538 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1539 
1540 	return bo;
1541 }
1542 
1543 static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
1544 {
1545 	xe_bo_unpin_map_no_vm(arg);
1546 }
1547 
1548 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1549 					   size_t size, u32 flags)
1550 {
1551 	struct xe_bo *bo;
1552 	int ret;
1553 
1554 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1555 	if (IS_ERR(bo))
1556 		return bo;
1557 
1558 	ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
1559 	if (ret)
1560 		return ERR_PTR(ret);
1561 
1562 	return bo;
1563 }
1564 
1565 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1566 					     const void *data, size_t size, u32 flags)
1567 {
1568 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1569 
1570 	if (IS_ERR(bo))
1571 		return bo;
1572 
1573 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1574 
1575 	return bo;
1576 }
1577 
1578 /*
1579  * XXX: This is in the VM bind data path, likely should calculate this once and
1580  * store, with a recalculation if the BO is moved.
1581  */
1582 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1583 {
1584 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1585 
1586 	if (res->mem_type == XE_PL_STOLEN)
1587 		return xe_ttm_stolen_gpu_offset(xe);
1588 
1589 	return res_to_mem_region(res)->dpa_base;
1590 }
1591 
1592 /**
1593  * xe_bo_pin_external - pin an external BO
1594  * @bo: buffer object to be pinned
1595  *
1596  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1597  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1598  * asserts and code to ensure evict / restore on suspend / resume.
1599  *
1600  * Returns 0 for success, negative error code otherwise.
1601  */
1602 int xe_bo_pin_external(struct xe_bo *bo)
1603 {
1604 	struct xe_device *xe = xe_bo_device(bo);
1605 	int err;
1606 
1607 	xe_assert(xe, !bo->vm);
1608 	xe_assert(xe, xe_bo_is_user(bo));
1609 
1610 	if (!xe_bo_is_pinned(bo)) {
1611 		err = xe_bo_validate(bo, NULL, false);
1612 		if (err)
1613 			return err;
1614 
1615 		if (xe_bo_is_vram(bo)) {
1616 			spin_lock(&xe->pinned.lock);
1617 			list_add_tail(&bo->pinned_link,
1618 				      &xe->pinned.external_vram);
1619 			spin_unlock(&xe->pinned.lock);
1620 		}
1621 	}
1622 
1623 	ttm_bo_pin(&bo->ttm);
1624 
1625 	/*
1626 	 * FIXME: If we always use the reserve / unreserve functions for locking
1627 	 * we do not need this.
1628 	 */
1629 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1630 
1631 	return 0;
1632 }
1633 
1634 int xe_bo_pin(struct xe_bo *bo)
1635 {
1636 	struct xe_device *xe = xe_bo_device(bo);
1637 	int err;
1638 
1639 	/* We currently don't expect user BO to be pinned */
1640 	xe_assert(xe, !xe_bo_is_user(bo));
1641 
1642 	/* Pinned object must be in GGTT or have pinned flag */
1643 	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
1644 				   XE_BO_CREATE_GGTT_BIT));
1645 
1646 	/*
1647 	 * No reason we can't support pinning imported dma-bufs we just don't
1648 	 * expect to pin an imported dma-buf.
1649 	 */
1650 	xe_assert(xe, !bo->ttm.base.import_attach);
1651 
1652 	/* We only expect at most 1 pin */
1653 	xe_assert(xe, !xe_bo_is_pinned(bo));
1654 
1655 	err = xe_bo_validate(bo, NULL, false);
1656 	if (err)
1657 		return err;
1658 
1659 	/*
1660 	 * For pinned objects in on DGFX, which are also in vram, we expect
1661 	 * these to be in contiguous VRAM memory. Required eviction / restore
1662 	 * during suspend / resume (force restore to same physical address).
1663 	 */
1664 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1665 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1666 		struct ttm_place *place = &(bo->placements[0]);
1667 
1668 		if (mem_type_is_vram(place->mem_type)) {
1669 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1670 
1671 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1672 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1673 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1674 
1675 			spin_lock(&xe->pinned.lock);
1676 			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1677 			spin_unlock(&xe->pinned.lock);
1678 		}
1679 	}
1680 
1681 	ttm_bo_pin(&bo->ttm);
1682 
1683 	/*
1684 	 * FIXME: If we always use the reserve / unreserve functions for locking
1685 	 * we do not need this.
1686 	 */
1687 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1688 
1689 	return 0;
1690 }
1691 
1692 /**
1693  * xe_bo_unpin_external - unpin an external BO
1694  * @bo: buffer object to be unpinned
1695  *
1696  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1697  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1698  * asserts and code to ensure evict / restore on suspend / resume.
1699  *
1700  * Returns 0 for success, negative error code otherwise.
1701  */
1702 void xe_bo_unpin_external(struct xe_bo *bo)
1703 {
1704 	struct xe_device *xe = xe_bo_device(bo);
1705 
1706 	xe_assert(xe, !bo->vm);
1707 	xe_assert(xe, xe_bo_is_pinned(bo));
1708 	xe_assert(xe, xe_bo_is_user(bo));
1709 
1710 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1711 		spin_lock(&xe->pinned.lock);
1712 		list_del_init(&bo->pinned_link);
1713 		spin_unlock(&xe->pinned.lock);
1714 	}
1715 
1716 	ttm_bo_unpin(&bo->ttm);
1717 
1718 	/*
1719 	 * FIXME: If we always use the reserve / unreserve functions for locking
1720 	 * we do not need this.
1721 	 */
1722 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1723 }
1724 
1725 void xe_bo_unpin(struct xe_bo *bo)
1726 {
1727 	struct xe_device *xe = xe_bo_device(bo);
1728 
1729 	xe_assert(xe, !bo->ttm.base.import_attach);
1730 	xe_assert(xe, xe_bo_is_pinned(bo));
1731 
1732 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1733 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1734 		struct ttm_place *place = &(bo->placements[0]);
1735 
1736 		if (mem_type_is_vram(place->mem_type)) {
1737 			xe_assert(xe, !list_empty(&bo->pinned_link));
1738 
1739 			spin_lock(&xe->pinned.lock);
1740 			list_del_init(&bo->pinned_link);
1741 			spin_unlock(&xe->pinned.lock);
1742 		}
1743 	}
1744 
1745 	ttm_bo_unpin(&bo->ttm);
1746 }
1747 
1748 /**
1749  * xe_bo_validate() - Make sure the bo is in an allowed placement
1750  * @bo: The bo,
1751  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1752  *      NULL. Used together with @allow_res_evict.
1753  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1754  *                   reservation object.
1755  *
1756  * Make sure the bo is in allowed placement, migrating it if necessary. If
1757  * needed, other bos will be evicted. If bos selected for eviction shares
1758  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1759  * set to true, otherwise they will be bypassed.
1760  *
1761  * Return: 0 on success, negative error code on failure. May return
1762  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1763  */
1764 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1765 {
1766 	struct ttm_operation_ctx ctx = {
1767 		.interruptible = true,
1768 		.no_wait_gpu = false,
1769 	};
1770 
1771 	if (vm) {
1772 		lockdep_assert_held(&vm->lock);
1773 		xe_vm_assert_held(vm);
1774 
1775 		ctx.allow_res_evict = allow_res_evict;
1776 		ctx.resv = xe_vm_resv(vm);
1777 	}
1778 
1779 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1780 }
1781 
1782 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1783 {
1784 	if (bo->destroy == &xe_ttm_bo_destroy)
1785 		return true;
1786 
1787 	return false;
1788 }
1789 
1790 /*
1791  * Resolve a BO address. There is no assert to check if the proper lock is held
1792  * so it should only be used in cases where it is not fatal to get the wrong
1793  * address, such as printing debug information, but not in cases where memory is
1794  * written based on this result.
1795  */
1796 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1797 {
1798 	struct xe_device *xe = xe_bo_device(bo);
1799 	struct xe_res_cursor cur;
1800 	u64 page;
1801 
1802 	xe_assert(xe, page_size <= PAGE_SIZE);
1803 	page = offset >> PAGE_SHIFT;
1804 	offset &= (PAGE_SIZE - 1);
1805 
1806 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1807 		xe_assert(xe, bo->ttm.ttm);
1808 
1809 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1810 				page_size, &cur);
1811 		return xe_res_dma(&cur) + offset;
1812 	} else {
1813 		struct xe_res_cursor cur;
1814 
1815 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1816 			     page_size, &cur);
1817 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1818 	}
1819 }
1820 
1821 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1822 {
1823 	if (!READ_ONCE(bo->ttm.pin_count))
1824 		xe_bo_assert_held(bo);
1825 	return __xe_bo_addr(bo, offset, page_size);
1826 }
1827 
1828 int xe_bo_vmap(struct xe_bo *bo)
1829 {
1830 	void *virtual;
1831 	bool is_iomem;
1832 	int ret;
1833 
1834 	xe_bo_assert_held(bo);
1835 
1836 	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
1837 		return -EINVAL;
1838 
1839 	if (!iosys_map_is_null(&bo->vmap))
1840 		return 0;
1841 
1842 	/*
1843 	 * We use this more or less deprecated interface for now since
1844 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1845 	 * single page bos, which is done here.
1846 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1847 	 * to use struct iosys_map.
1848 	 */
1849 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1850 	if (ret)
1851 		return ret;
1852 
1853 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1854 	if (is_iomem)
1855 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1856 	else
1857 		iosys_map_set_vaddr(&bo->vmap, virtual);
1858 
1859 	return 0;
1860 }
1861 
1862 static void __xe_bo_vunmap(struct xe_bo *bo)
1863 {
1864 	if (!iosys_map_is_null(&bo->vmap)) {
1865 		iosys_map_clear(&bo->vmap);
1866 		ttm_bo_kunmap(&bo->kmap);
1867 	}
1868 }
1869 
1870 void xe_bo_vunmap(struct xe_bo *bo)
1871 {
1872 	xe_bo_assert_held(bo);
1873 	__xe_bo_vunmap(bo);
1874 }
1875 
1876 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1877 			struct drm_file *file)
1878 {
1879 	struct xe_device *xe = to_xe_device(dev);
1880 	struct xe_file *xef = to_xe_file(file);
1881 	struct drm_xe_gem_create *args = data;
1882 	struct xe_vm *vm = NULL;
1883 	struct xe_bo *bo;
1884 	unsigned int bo_flags;
1885 	u32 handle;
1886 	int err;
1887 
1888 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1889 	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
1890 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1891 		return -EINVAL;
1892 
1893 	/* at least one valid memory placement must be specified */
1894 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
1895 			 !args->placement))
1896 		return -EINVAL;
1897 
1898 	if (XE_IOCTL_DBG(xe, args->flags &
1899 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1900 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1901 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
1902 		return -EINVAL;
1903 
1904 	if (XE_IOCTL_DBG(xe, args->handle))
1905 		return -EINVAL;
1906 
1907 	if (XE_IOCTL_DBG(xe, !args->size))
1908 		return -EINVAL;
1909 
1910 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1911 		return -EINVAL;
1912 
1913 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1914 		return -EINVAL;
1915 
1916 	bo_flags = 0;
1917 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1918 		bo_flags |= XE_BO_DEFER_BACKING;
1919 
1920 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1921 		bo_flags |= XE_BO_SCANOUT_BIT;
1922 
1923 	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
1924 
1925 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1926 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
1927 			return -EINVAL;
1928 
1929 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
1930 	}
1931 
1932 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1933 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1934 		return -EINVAL;
1935 
1936 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
1937 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1938 		return -EINVAL;
1939 
1940 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
1941 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1942 		return -EINVAL;
1943 
1944 	if (args->vm_id) {
1945 		vm = xe_vm_lookup(xef, args->vm_id);
1946 		if (XE_IOCTL_DBG(xe, !vm))
1947 			return -ENOENT;
1948 		err = xe_vm_lock(vm, true);
1949 		if (err)
1950 			goto out_vm;
1951 	}
1952 
1953 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1954 			       ttm_bo_type_device, bo_flags);
1955 
1956 	if (vm)
1957 		xe_vm_unlock(vm);
1958 
1959 	if (IS_ERR(bo)) {
1960 		err = PTR_ERR(bo);
1961 		goto out_vm;
1962 	}
1963 
1964 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1965 	if (err)
1966 		goto out_bulk;
1967 
1968 	args->handle = handle;
1969 	goto out_put;
1970 
1971 out_bulk:
1972 	if (vm && !xe_vm_in_fault_mode(vm)) {
1973 		xe_vm_lock(vm, false);
1974 		__xe_bo_unset_bulk_move(bo);
1975 		xe_vm_unlock(vm);
1976 	}
1977 out_put:
1978 	xe_bo_put(bo);
1979 out_vm:
1980 	if (vm)
1981 		xe_vm_put(vm);
1982 
1983 	return err;
1984 }
1985 
1986 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
1987 			     struct drm_file *file)
1988 {
1989 	struct xe_device *xe = to_xe_device(dev);
1990 	struct drm_xe_gem_mmap_offset *args = data;
1991 	struct drm_gem_object *gem_obj;
1992 
1993 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1994 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1995 		return -EINVAL;
1996 
1997 	if (XE_IOCTL_DBG(xe, args->flags))
1998 		return -EINVAL;
1999 
2000 	gem_obj = drm_gem_object_lookup(file, args->handle);
2001 	if (XE_IOCTL_DBG(xe, !gem_obj))
2002 		return -ENOENT;
2003 
2004 	/* The mmap offset was set up at BO allocation time. */
2005 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2006 
2007 	xe_bo_put(gem_to_xe_bo(gem_obj));
2008 	return 0;
2009 }
2010 
2011 /**
2012  * xe_bo_lock() - Lock the buffer object's dma_resv object
2013  * @bo: The struct xe_bo whose lock is to be taken
2014  * @intr: Whether to perform any wait interruptible
2015  *
2016  * Locks the buffer object's dma_resv object. If the buffer object is
2017  * pointing to a shared dma_resv object, that shared lock is locked.
2018  *
2019  * Return: 0 on success, -EINTR if @intr is true and the wait for a
2020  * contended lock was interrupted. If @intr is set to false, the
2021  * function always returns 0.
2022  */
2023 int xe_bo_lock(struct xe_bo *bo, bool intr)
2024 {
2025 	if (intr)
2026 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2027 
2028 	dma_resv_lock(bo->ttm.base.resv, NULL);
2029 
2030 	return 0;
2031 }
2032 
2033 /**
2034  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2035  * @bo: The struct xe_bo whose lock is to be released.
2036  *
2037  * Unlock a buffer object lock that was locked by xe_bo_lock().
2038  */
2039 void xe_bo_unlock(struct xe_bo *bo)
2040 {
2041 	dma_resv_unlock(bo->ttm.base.resv);
2042 }
2043 
2044 /**
2045  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2046  * @bo: The buffer object to migrate
2047  * @mem_type: The TTM memory type intended to migrate to
2048  *
2049  * Check whether the buffer object supports migration to the
2050  * given memory type. Note that pinning may affect the ability to migrate as
2051  * returned by this function.
2052  *
2053  * This function is primarily intended as a helper for checking the
2054  * possibility to migrate buffer objects and can be called without
2055  * the object lock held.
2056  *
2057  * Return: true if migration is possible, false otherwise.
2058  */
2059 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2060 {
2061 	unsigned int cur_place;
2062 
2063 	if (bo->ttm.type == ttm_bo_type_kernel)
2064 		return true;
2065 
2066 	if (bo->ttm.type == ttm_bo_type_sg)
2067 		return false;
2068 
2069 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2070 	     cur_place++) {
2071 		if (bo->placements[cur_place].mem_type == mem_type)
2072 			return true;
2073 	}
2074 
2075 	return false;
2076 }
2077 
2078 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2079 {
2080 	memset(place, 0, sizeof(*place));
2081 	place->mem_type = mem_type;
2082 }
2083 
2084 /**
2085  * xe_bo_migrate - Migrate an object to the desired region id
2086  * @bo: The buffer object to migrate.
2087  * @mem_type: The TTM region type to migrate to.
2088  *
2089  * Attempt to migrate the buffer object to the desired memory region. The
2090  * buffer object may not be pinned, and must be locked.
2091  * On successful completion, the object memory type will be updated,
2092  * but an async migration task may not have completed yet, and to
2093  * accomplish that, the object's kernel fences must be signaled with
2094  * the object lock held.
2095  *
2096  * Return: 0 on success. Negative error code on failure. In particular may
2097  * return -EINTR or -ERESTARTSYS if signal pending.
2098  */
2099 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2100 {
2101 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2102 	struct ttm_operation_ctx ctx = {
2103 		.interruptible = true,
2104 		.no_wait_gpu = false,
2105 	};
2106 	struct ttm_placement placement;
2107 	struct ttm_place requested;
2108 
2109 	xe_bo_assert_held(bo);
2110 
2111 	if (bo->ttm.resource->mem_type == mem_type)
2112 		return 0;
2113 
2114 	if (xe_bo_is_pinned(bo))
2115 		return -EBUSY;
2116 
2117 	if (!xe_bo_can_migrate(bo, mem_type))
2118 		return -EINVAL;
2119 
2120 	xe_place_from_ttm_type(mem_type, &requested);
2121 	placement.num_placement = 1;
2122 	placement.num_busy_placement = 1;
2123 	placement.placement = &requested;
2124 	placement.busy_placement = &requested;
2125 
2126 	/*
2127 	 * Stolen needs to be handled like below VRAM handling if we ever need
2128 	 * to support it.
2129 	 */
2130 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2131 
2132 	if (mem_type_is_vram(mem_type)) {
2133 		u32 c = 0;
2134 
2135 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2136 	}
2137 
2138 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2139 }
2140 
2141 /**
2142  * xe_bo_evict - Evict an object to evict placement
2143  * @bo: The buffer object to migrate.
2144  * @force_alloc: Set force_alloc in ttm_operation_ctx
2145  *
2146  * On successful completion, the object memory will be moved to evict
2147  * placement. Ths function blocks until the object has been fully moved.
2148  *
2149  * Return: 0 on success. Negative error code on failure.
2150  */
2151 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2152 {
2153 	struct ttm_operation_ctx ctx = {
2154 		.interruptible = false,
2155 		.no_wait_gpu = false,
2156 		.force_alloc = force_alloc,
2157 	};
2158 	struct ttm_placement placement;
2159 	int ret;
2160 
2161 	xe_evict_flags(&bo->ttm, &placement);
2162 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2163 	if (ret)
2164 		return ret;
2165 
2166 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2167 			      false, MAX_SCHEDULE_TIMEOUT);
2168 
2169 	return 0;
2170 }
2171 
2172 /**
2173  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2174  * placed in system memory.
2175  * @bo: The xe_bo
2176  *
2177  * Return: true if extra pages need to be allocated, false otherwise.
2178  */
2179 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2180 {
2181 	struct xe_device *xe = xe_bo_device(bo);
2182 
2183 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2184 		return false;
2185 
2186 	/* On discrete GPUs, if the GPU can access this buffer from
2187 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2188 	 * can't be used since there's no CCS storage associated with
2189 	 * non-VRAM addresses.
2190 	 */
2191 	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
2192 		return false;
2193 
2194 	return true;
2195 }
2196 
2197 /**
2198  * __xe_bo_release_dummy() - Dummy kref release function
2199  * @kref: The embedded struct kref.
2200  *
2201  * Dummy release function for xe_bo_put_deferred(). Keep off.
2202  */
2203 void __xe_bo_release_dummy(struct kref *kref)
2204 {
2205 }
2206 
2207 /**
2208  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2209  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2210  *
2211  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2212  * The @deferred list can be either an onstack local list or a global
2213  * shared list used by a workqueue.
2214  */
2215 void xe_bo_put_commit(struct llist_head *deferred)
2216 {
2217 	struct llist_node *freed;
2218 	struct xe_bo *bo, *next;
2219 
2220 	if (!deferred)
2221 		return;
2222 
2223 	freed = llist_del_all(deferred);
2224 	if (!freed)
2225 		return;
2226 
2227 	llist_for_each_entry_safe(bo, next, freed, freed)
2228 		drm_gem_object_free(&bo->ttm.base.refcount);
2229 }
2230 
2231 /**
2232  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2233  * @file_priv: ...
2234  * @dev: ...
2235  * @args: ...
2236  *
2237  * See dumb_create() hook in include/drm/drm_drv.h
2238  *
2239  * Return: ...
2240  */
2241 int xe_bo_dumb_create(struct drm_file *file_priv,
2242 		      struct drm_device *dev,
2243 		      struct drm_mode_create_dumb *args)
2244 {
2245 	struct xe_device *xe = to_xe_device(dev);
2246 	struct xe_bo *bo;
2247 	uint32_t handle;
2248 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2249 	int err;
2250 	u32 page_size = max_t(u32, PAGE_SIZE,
2251 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2252 
2253 	args->pitch = ALIGN(args->width * cpp, 64);
2254 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2255 			   page_size);
2256 
2257 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2258 			       DRM_XE_GEM_CPU_CACHING_WC,
2259 			       ttm_bo_type_device,
2260 			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2261 			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
2262 			       XE_BO_NEEDS_CPU_ACCESS);
2263 	if (IS_ERR(bo))
2264 		return PTR_ERR(bo);
2265 
2266 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2267 	/* drop reference from allocate - handle holds it now */
2268 	drm_gem_object_put(&bo->ttm.base);
2269 	if (!err)
2270 		args->handle = handle;
2271 	return err;
2272 }
2273 
2274 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2275 #include "tests/xe_bo.c"
2276 #endif
2277