xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 8cdcef1c2f82d207aa8b2a02298fbc17191c6261)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/ttm/ttm_device.h>
13 #include <drm/ttm/ttm_placement.h>
14 #include <drm/ttm/ttm_tt.h>
15 #include <drm/xe_drm.h>
16 
17 #include "xe_device.h"
18 #include "xe_dma_buf.h"
19 #include "xe_drm_client.h"
20 #include "xe_ggtt.h"
21 #include "xe_gt.h"
22 #include "xe_map.h"
23 #include "xe_migrate.h"
24 #include "xe_preempt_fence.h"
25 #include "xe_res_cursor.h"
26 #include "xe_trace.h"
27 #include "xe_ttm_stolen_mgr.h"
28 #include "xe_vm.h"
29 
30 static const struct ttm_place sys_placement_flags = {
31 	.fpfn = 0,
32 	.lpfn = 0,
33 	.mem_type = XE_PL_SYSTEM,
34 	.flags = 0,
35 };
36 
37 static struct ttm_placement sys_placement = {
38 	.num_placement = 1,
39 	.placement = &sys_placement_flags,
40 	.num_busy_placement = 1,
41 	.busy_placement = &sys_placement_flags,
42 };
43 
44 static const struct ttm_place tt_placement_flags = {
45 	.fpfn = 0,
46 	.lpfn = 0,
47 	.mem_type = XE_PL_TT,
48 	.flags = 0,
49 };
50 
51 static struct ttm_placement tt_placement = {
52 	.num_placement = 1,
53 	.placement = &tt_placement_flags,
54 	.num_busy_placement = 1,
55 	.busy_placement = &sys_placement_flags,
56 };
57 
58 bool mem_type_is_vram(u32 mem_type)
59 {
60 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
61 }
62 
63 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
64 {
65 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
66 }
67 
68 static bool resource_is_vram(struct ttm_resource *res)
69 {
70 	return mem_type_is_vram(res->mem_type);
71 }
72 
73 bool xe_bo_is_vram(struct xe_bo *bo)
74 {
75 	return resource_is_vram(bo->ttm.resource) ||
76 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
77 }
78 
79 bool xe_bo_is_stolen(struct xe_bo *bo)
80 {
81 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
82 }
83 
84 /**
85  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
86  * @bo: The BO
87  *
88  * The stolen memory is accessed through the PCI BAR for both DGFX and some
89  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
90  *
91  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
92  */
93 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
94 {
95 	return xe_bo_is_stolen(bo) &&
96 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
97 }
98 
99 static bool xe_bo_is_user(struct xe_bo *bo)
100 {
101 	return bo->flags & XE_BO_CREATE_USER_BIT;
102 }
103 
104 static struct xe_migrate *
105 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
106 {
107 	struct xe_tile *tile;
108 
109 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
110 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
111 	return tile->migrate;
112 }
113 
114 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
115 {
116 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
117 	struct ttm_resource_manager *mgr;
118 
119 	xe_assert(xe, resource_is_vram(res));
120 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
121 	return to_xe_ttm_vram_mgr(mgr)->vram;
122 }
123 
124 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
125 			   u32 bo_flags, u32 *c)
126 {
127 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
128 
129 	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
130 		bo->placements[*c] = (struct ttm_place) {
131 			.mem_type = XE_PL_TT,
132 		};
133 		*c += 1;
134 
135 		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
136 			bo->props.preferred_mem_type = XE_PL_TT;
137 	}
138 }
139 
140 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
141 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
142 {
143 	struct ttm_place place = { .mem_type = mem_type };
144 	struct xe_mem_region *vram;
145 	u64 io_size;
146 
147 	vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
148 	xe_assert(xe, vram && vram->usable_size);
149 	io_size = vram->io_size;
150 
151 	/*
152 	 * For eviction / restore on suspend / resume objects
153 	 * pinned in VRAM must be contiguous
154 	 */
155 	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
156 			XE_BO_CREATE_GGTT_BIT))
157 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
158 
159 	if (io_size < vram->usable_size) {
160 		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
161 			place.fpfn = 0;
162 			place.lpfn = io_size >> PAGE_SHIFT;
163 		} else {
164 			place.flags |= TTM_PL_FLAG_TOPDOWN;
165 		}
166 	}
167 	places[*c] = place;
168 	*c += 1;
169 
170 	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
171 		bo->props.preferred_mem_type = mem_type;
172 }
173 
174 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
175 			 u32 bo_flags, u32 *c)
176 {
177 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
178 
179 	if (bo->props.preferred_gt == XE_GT1) {
180 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
181 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
182 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
183 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
184 	} else {
185 		if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
186 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
187 		if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
188 			add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
189 	}
190 }
191 
192 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
193 			   u32 bo_flags, u32 *c)
194 {
195 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
196 
197 	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
198 		bo->placements[*c] = (struct ttm_place) {
199 			.mem_type = XE_PL_STOLEN,
200 			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
201 					     XE_BO_CREATE_GGTT_BIT) ?
202 				TTM_PL_FLAG_CONTIGUOUS : 0,
203 		};
204 		*c += 1;
205 	}
206 }
207 
208 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
209 				       u32 bo_flags)
210 {
211 	u32 c = 0;
212 
213 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
214 
215 	/* The order of placements should indicate preferred location */
216 
217 	if (bo->props.preferred_mem_class == DRM_XE_MEM_REGION_CLASS_SYSMEM) {
218 		try_add_system(xe, bo, bo_flags, &c);
219 		try_add_vram(xe, bo, bo_flags, &c);
220 	} else {
221 		try_add_vram(xe, bo, bo_flags, &c);
222 		try_add_system(xe, bo, bo_flags, &c);
223 	}
224 	try_add_stolen(xe, bo, bo_flags, &c);
225 
226 	if (!c)
227 		return -EINVAL;
228 
229 	bo->placement = (struct ttm_placement) {
230 		.num_placement = c,
231 		.placement = bo->placements,
232 		.num_busy_placement = c,
233 		.busy_placement = bo->placements,
234 	};
235 
236 	return 0;
237 }
238 
239 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
240 			      u32 bo_flags)
241 {
242 	xe_bo_assert_held(bo);
243 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
244 }
245 
246 static void xe_evict_flags(struct ttm_buffer_object *tbo,
247 			   struct ttm_placement *placement)
248 {
249 	struct xe_bo *bo;
250 
251 	if (!xe_bo_is_xe_bo(tbo)) {
252 		/* Don't handle scatter gather BOs */
253 		if (tbo->type == ttm_bo_type_sg) {
254 			placement->num_placement = 0;
255 			placement->num_busy_placement = 0;
256 			return;
257 		}
258 
259 		*placement = sys_placement;
260 		return;
261 	}
262 
263 	/*
264 	 * For xe, sg bos that are evicted to system just triggers a
265 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
266 	 */
267 
268 	bo = ttm_to_xe_bo(tbo);
269 	switch (tbo->resource->mem_type) {
270 	case XE_PL_VRAM0:
271 	case XE_PL_VRAM1:
272 	case XE_PL_STOLEN:
273 		*placement = tt_placement;
274 		break;
275 	case XE_PL_TT:
276 	default:
277 		*placement = sys_placement;
278 		break;
279 	}
280 }
281 
282 struct xe_ttm_tt {
283 	struct ttm_tt ttm;
284 	struct device *dev;
285 	struct sg_table sgt;
286 	struct sg_table *sg;
287 };
288 
289 static int xe_tt_map_sg(struct ttm_tt *tt)
290 {
291 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
292 	unsigned long num_pages = tt->num_pages;
293 	int ret;
294 
295 	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
296 
297 	if (xe_tt->sg)
298 		return 0;
299 
300 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
301 						num_pages, 0,
302 						(u64)num_pages << PAGE_SHIFT,
303 						xe_sg_segment_size(xe_tt->dev),
304 						GFP_KERNEL);
305 	if (ret)
306 		return ret;
307 
308 	xe_tt->sg = &xe_tt->sgt;
309 	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
310 			      DMA_ATTR_SKIP_CPU_SYNC);
311 	if (ret) {
312 		sg_free_table(xe_tt->sg);
313 		xe_tt->sg = NULL;
314 		return ret;
315 	}
316 
317 	return 0;
318 }
319 
320 struct sg_table *xe_bo_sg(struct xe_bo *bo)
321 {
322 	struct ttm_tt *tt = bo->ttm.ttm;
323 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
324 
325 	return xe_tt->sg;
326 }
327 
328 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
329 				       u32 page_flags)
330 {
331 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
332 	struct xe_device *xe = xe_bo_device(bo);
333 	struct xe_ttm_tt *tt;
334 	unsigned long extra_pages;
335 	enum ttm_caching caching;
336 	int err;
337 
338 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
339 	if (!tt)
340 		return NULL;
341 
342 	tt->dev = xe->drm.dev;
343 
344 	extra_pages = 0;
345 	if (xe_bo_needs_ccs_pages(bo))
346 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
347 					   PAGE_SIZE);
348 
349 	switch (bo->cpu_caching) {
350 	case DRM_XE_GEM_CPU_CACHING_WC:
351 		caching = ttm_write_combined;
352 		break;
353 	default:
354 		caching = ttm_cached;
355 		break;
356 	}
357 
358 	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
359 
360 	/*
361 	 * Display scanout is always non-coherent with the CPU cache.
362 	 *
363 	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
364 	 * require a CPU:WC mapping.
365 	 */
366 	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
367 	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
368 		caching = ttm_write_combined;
369 
370 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
371 	if (err) {
372 		kfree(tt);
373 		return NULL;
374 	}
375 
376 	return &tt->ttm;
377 }
378 
379 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
380 			      struct ttm_operation_ctx *ctx)
381 {
382 	int err;
383 
384 	/*
385 	 * dma-bufs are not populated with pages, and the dma-
386 	 * addresses are set up when moved to XE_PL_TT.
387 	 */
388 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
389 		return 0;
390 
391 	err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
392 	if (err)
393 		return err;
394 
395 	/* A follow up may move this xe_bo_move when BO is moved to XE_PL_TT */
396 	err = xe_tt_map_sg(tt);
397 	if (err)
398 		ttm_pool_free(&ttm_dev->pool, tt);
399 
400 	return err;
401 }
402 
403 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
404 {
405 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
406 
407 	if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
408 		return;
409 
410 	if (xe_tt->sg) {
411 		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
412 				  DMA_BIDIRECTIONAL, 0);
413 		sg_free_table(xe_tt->sg);
414 		xe_tt->sg = NULL;
415 	}
416 
417 	return ttm_pool_free(&ttm_dev->pool, tt);
418 }
419 
420 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
421 {
422 	ttm_tt_fini(tt);
423 	kfree(tt);
424 }
425 
426 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
427 				 struct ttm_resource *mem)
428 {
429 	struct xe_device *xe = ttm_to_xe_device(bdev);
430 
431 	switch (mem->mem_type) {
432 	case XE_PL_SYSTEM:
433 	case XE_PL_TT:
434 		return 0;
435 	case XE_PL_VRAM0:
436 	case XE_PL_VRAM1: {
437 		struct xe_ttm_vram_mgr_resource *vres =
438 			to_xe_ttm_vram_mgr_resource(mem);
439 		struct xe_mem_region *vram = res_to_mem_region(mem);
440 
441 		if (vres->used_visible_size < mem->size)
442 			return -EINVAL;
443 
444 		mem->bus.offset = mem->start << PAGE_SHIFT;
445 
446 		if (vram->mapping &&
447 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
448 			mem->bus.addr = (u8 *)vram->mapping +
449 				mem->bus.offset;
450 
451 		mem->bus.offset += vram->io_start;
452 		mem->bus.is_iomem = true;
453 
454 #if  !defined(CONFIG_X86)
455 		mem->bus.caching = ttm_write_combined;
456 #endif
457 		return 0;
458 	} case XE_PL_STOLEN:
459 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
460 	default:
461 		return -EINVAL;
462 	}
463 }
464 
465 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
466 				const struct ttm_operation_ctx *ctx)
467 {
468 	struct dma_resv_iter cursor;
469 	struct dma_fence *fence;
470 	struct drm_gpuva *gpuva;
471 	struct drm_gem_object *obj = &bo->ttm.base;
472 	struct drm_gpuvm_bo *vm_bo;
473 	int ret = 0;
474 
475 	dma_resv_assert_held(bo->ttm.base.resv);
476 
477 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
478 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
479 				    DMA_RESV_USAGE_BOOKKEEP);
480 		dma_resv_for_each_fence_unlocked(&cursor, fence)
481 			dma_fence_enable_sw_signaling(fence);
482 		dma_resv_iter_end(&cursor);
483 	}
484 
485 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
486 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
487 			struct xe_vma *vma = gpuva_to_vma(gpuva);
488 			struct xe_vm *vm = xe_vma_vm(vma);
489 
490 			trace_xe_vma_evict(vma);
491 
492 		if (xe_vm_in_fault_mode(vm)) {
493 			/* Wait for pending binds / unbinds. */
494 			long timeout;
495 
496 			if (ctx->no_wait_gpu &&
497 			    !dma_resv_test_signaled(bo->ttm.base.resv,
498 						    DMA_RESV_USAGE_BOOKKEEP))
499 				return -EBUSY;
500 
501 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
502 							DMA_RESV_USAGE_BOOKKEEP,
503 							ctx->interruptible,
504 							MAX_SCHEDULE_TIMEOUT);
505 			if (timeout > 0) {
506 				ret = xe_vm_invalidate_vma(vma);
507 				XE_WARN_ON(ret);
508 			} else if (!timeout) {
509 				ret = -ETIME;
510 			} else {
511 				ret = timeout;
512 			}
513 
514 		} else {
515 			bool vm_resv_locked = false;
516 
517 			/*
518 			 * We need to put the vma on the vm's rebind_list,
519 			 * but need the vm resv to do so. If we can't verify
520 			 * that we indeed have it locked, put the vma an the
521 			 * vm's notifier.rebind_list instead and scoop later.
522 			 */
523 			if (dma_resv_trylock(xe_vm_resv(vm)))
524 				vm_resv_locked = true;
525 			else if (ctx->resv != xe_vm_resv(vm)) {
526 				spin_lock(&vm->notifier.list_lock);
527 				if (!(vma->gpuva.flags & XE_VMA_DESTROYED))
528 					list_move_tail(&vma->notifier.rebind_link,
529 						       &vm->notifier.rebind_list);
530 				spin_unlock(&vm->notifier.list_lock);
531 				continue;
532 			}
533 
534 			xe_vm_assert_held(vm);
535 			if (vma->tile_present &&
536 			    !(vma->gpuva.flags & XE_VMA_DESTROYED) &&
537 			    list_empty(&vma->combined_links.rebind))
538 				list_add_tail(&vma->combined_links.rebind,
539 					      &vm->rebind_list);
540 
541 			if (vm_resv_locked)
542 				dma_resv_unlock(xe_vm_resv(vm));
543 		}
544 		}
545 	}
546 
547 	return ret;
548 }
549 
550 /*
551  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
552  * Note that unmapping the attachment is deferred to the next
553  * map_attachment time, or to bo destroy (after idling) whichever comes first.
554  * This is to avoid syncing before unmap_attachment(), assuming that the
555  * caller relies on idling the reservation object before moving the
556  * backing store out. Should that assumption not hold, then we will be able
557  * to unconditionally call unmap_attachment() when moving out to system.
558  */
559 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
560 			     struct ttm_resource *new_res)
561 {
562 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
563 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
564 					       ttm);
565 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
566 	struct sg_table *sg;
567 
568 	xe_assert(xe, attach);
569 	xe_assert(xe, ttm_bo->ttm);
570 
571 	if (new_res->mem_type == XE_PL_SYSTEM)
572 		goto out;
573 
574 	if (ttm_bo->sg) {
575 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
576 		ttm_bo->sg = NULL;
577 	}
578 
579 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
580 	if (IS_ERR(sg))
581 		return PTR_ERR(sg);
582 
583 	ttm_bo->sg = sg;
584 	xe_tt->sg = sg;
585 
586 out:
587 	ttm_bo_move_null(ttm_bo, new_res);
588 
589 	return 0;
590 }
591 
592 /**
593  * xe_bo_move_notify - Notify subsystems of a pending move
594  * @bo: The buffer object
595  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
596  *
597  * This function notifies subsystems of an upcoming buffer move.
598  * Upon receiving such a notification, subsystems should schedule
599  * halting access to the underlying pages and optionally add a fence
600  * to the buffer object's dma_resv object, that signals when access is
601  * stopped. The caller will wait on all dma_resv fences before
602  * starting the move.
603  *
604  * A subsystem may commence access to the object after obtaining
605  * bindings to the new backing memory under the object lock.
606  *
607  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
608  * negative error code on error.
609  */
610 static int xe_bo_move_notify(struct xe_bo *bo,
611 			     const struct ttm_operation_ctx *ctx)
612 {
613 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
614 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
615 	int ret;
616 
617 	/*
618 	 * If this starts to call into many components, consider
619 	 * using a notification chain here.
620 	 */
621 
622 	if (xe_bo_is_pinned(bo))
623 		return -EINVAL;
624 
625 	xe_bo_vunmap(bo);
626 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
627 	if (ret)
628 		return ret;
629 
630 	/* Don't call move_notify() for imported dma-bufs. */
631 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
632 		dma_buf_move_notify(ttm_bo->base.dma_buf);
633 
634 	return 0;
635 }
636 
637 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
638 		      struct ttm_operation_ctx *ctx,
639 		      struct ttm_resource *new_mem,
640 		      struct ttm_place *hop)
641 {
642 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
643 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
644 	struct ttm_resource *old_mem = ttm_bo->resource;
645 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
646 	struct ttm_tt *ttm = ttm_bo->ttm;
647 	struct xe_migrate *migrate = NULL;
648 	struct dma_fence *fence;
649 	bool move_lacks_source;
650 	bool tt_has_data;
651 	bool needs_clear;
652 	int ret = 0;
653 
654 	/* Bo creation path, moving to system or TT. No clearing required. */
655 	if (!old_mem && ttm) {
656 		ttm_bo_move_null(ttm_bo, new_mem);
657 		return 0;
658 	}
659 
660 	if (ttm_bo->type == ttm_bo_type_sg) {
661 		ret = xe_bo_move_notify(bo, ctx);
662 		if (!ret)
663 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
664 		goto out;
665 	}
666 
667 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
668 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
669 
670 	move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
671 
672 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
673 		(!ttm && ttm_bo->type == ttm_bo_type_device);
674 
675 	if ((move_lacks_source && !needs_clear) ||
676 	    (old_mem_type == XE_PL_SYSTEM &&
677 	     new_mem->mem_type == XE_PL_TT)) {
678 		ttm_bo_move_null(ttm_bo, new_mem);
679 		goto out;
680 	}
681 
682 	/*
683 	 * Failed multi-hop where the old_mem is still marked as
684 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
685 	 */
686 	if (old_mem_type == XE_PL_TT &&
687 	    new_mem->mem_type == XE_PL_TT) {
688 		ttm_bo_move_null(ttm_bo, new_mem);
689 		goto out;
690 	}
691 
692 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
693 		ret = xe_bo_move_notify(bo, ctx);
694 		if (ret)
695 			goto out;
696 	}
697 
698 	if (old_mem_type == XE_PL_TT &&
699 	    new_mem->mem_type == XE_PL_SYSTEM) {
700 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
701 						     DMA_RESV_USAGE_BOOKKEEP,
702 						     true,
703 						     MAX_SCHEDULE_TIMEOUT);
704 		if (timeout < 0) {
705 			ret = timeout;
706 			goto out;
707 		}
708 		ttm_bo_move_null(ttm_bo, new_mem);
709 		goto out;
710 	}
711 
712 	if (!move_lacks_source &&
713 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
714 	     (mem_type_is_vram(old_mem_type) &&
715 	      new_mem->mem_type == XE_PL_SYSTEM))) {
716 		hop->fpfn = 0;
717 		hop->lpfn = 0;
718 		hop->mem_type = XE_PL_TT;
719 		hop->flags = TTM_PL_FLAG_TEMPORARY;
720 		ret = -EMULTIHOP;
721 		goto out;
722 	}
723 
724 	if (bo->tile)
725 		migrate = bo->tile->migrate;
726 	else if (resource_is_vram(new_mem))
727 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
728 	else if (mem_type_is_vram(old_mem_type))
729 		migrate = mem_type_to_migrate(xe, old_mem_type);
730 
731 	xe_assert(xe, migrate);
732 
733 	trace_xe_bo_move(bo);
734 	xe_device_mem_access_get(xe);
735 
736 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
737 		/*
738 		 * Kernel memory that is pinned should only be moved on suspend
739 		 * / resume, some of the pinned memory is required for the
740 		 * device to resume / use the GPU to move other evicted memory
741 		 * (user memory) around. This likely could be optimized a bit
742 		 * futher where we find the minimum set of pinned memory
743 		 * required for resume but for simplity doing a memcpy for all
744 		 * pinned memory.
745 		 */
746 		ret = xe_bo_vmap(bo);
747 		if (!ret) {
748 			ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
749 
750 			/* Create a new VMAP once kernel BO back in VRAM */
751 			if (!ret && resource_is_vram(new_mem)) {
752 				struct xe_mem_region *vram = res_to_mem_region(new_mem);
753 				void *new_addr = vram->mapping +
754 					(new_mem->start << PAGE_SHIFT);
755 
756 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
757 					ret = -EINVAL;
758 					xe_device_mem_access_put(xe);
759 					goto out;
760 				}
761 
762 				xe_assert(xe, new_mem->start ==
763 					  bo->placements->fpfn);
764 
765 				iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
766 			}
767 		}
768 	} else {
769 		if (move_lacks_source)
770 			fence = xe_migrate_clear(migrate, bo, new_mem);
771 		else
772 			fence = xe_migrate_copy(migrate,
773 						bo, bo, old_mem, new_mem);
774 		if (IS_ERR(fence)) {
775 			ret = PTR_ERR(fence);
776 			xe_device_mem_access_put(xe);
777 			goto out;
778 		}
779 		if (!move_lacks_source) {
780 			ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
781 							true, new_mem);
782 			if (ret) {
783 				dma_fence_wait(fence, false);
784 				ttm_bo_move_null(ttm_bo, new_mem);
785 				ret = 0;
786 			}
787 		} else {
788 			/*
789 			 * ttm_bo_move_accel_cleanup() may blow up if
790 			 * bo->resource == NULL, so just attach the
791 			 * fence and set the new resource.
792 			 */
793 			dma_resv_add_fence(ttm_bo->base.resv, fence,
794 					   DMA_RESV_USAGE_KERNEL);
795 			ttm_bo_move_null(ttm_bo, new_mem);
796 		}
797 
798 		dma_fence_put(fence);
799 	}
800 
801 	xe_device_mem_access_put(xe);
802 
803 out:
804 	return ret;
805 
806 }
807 
808 /**
809  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
810  * @bo: The buffer object to move.
811  *
812  * On successful completion, the object memory will be moved to sytem memory.
813  * This function blocks until the object has been fully moved.
814  *
815  * This is needed to for special handling of pinned VRAM object during
816  * suspend-resume.
817  *
818  * Return: 0 on success. Negative error code on failure.
819  */
820 int xe_bo_evict_pinned(struct xe_bo *bo)
821 {
822 	struct ttm_place place = {
823 		.mem_type = XE_PL_TT,
824 	};
825 	struct ttm_placement placement = {
826 		.placement = &place,
827 		.num_placement = 1,
828 	};
829 	struct ttm_operation_ctx ctx = {
830 		.interruptible = false,
831 	};
832 	struct ttm_resource *new_mem;
833 	int ret;
834 
835 	xe_bo_assert_held(bo);
836 
837 	if (WARN_ON(!bo->ttm.resource))
838 		return -EINVAL;
839 
840 	if (WARN_ON(!xe_bo_is_pinned(bo)))
841 		return -EINVAL;
842 
843 	if (WARN_ON(!xe_bo_is_vram(bo)))
844 		return -EINVAL;
845 
846 	ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
847 	if (ret)
848 		return ret;
849 
850 	if (!bo->ttm.ttm) {
851 		bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
852 		if (!bo->ttm.ttm) {
853 			ret = -ENOMEM;
854 			goto err_res_free;
855 		}
856 	}
857 
858 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
859 	if (ret)
860 		goto err_res_free;
861 
862 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
863 	if (ret)
864 		goto err_res_free;
865 
866 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
867 	if (ret)
868 		goto err_res_free;
869 
870 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
871 			      false, MAX_SCHEDULE_TIMEOUT);
872 
873 	return 0;
874 
875 err_res_free:
876 	ttm_resource_free(&bo->ttm, &new_mem);
877 	return ret;
878 }
879 
880 /**
881  * xe_bo_restore_pinned() - Restore a pinned VRAM object
882  * @bo: The buffer object to move.
883  *
884  * On successful completion, the object memory will be moved back to VRAM.
885  * This function blocks until the object has been fully moved.
886  *
887  * This is needed to for special handling of pinned VRAM object during
888  * suspend-resume.
889  *
890  * Return: 0 on success. Negative error code on failure.
891  */
892 int xe_bo_restore_pinned(struct xe_bo *bo)
893 {
894 	struct ttm_operation_ctx ctx = {
895 		.interruptible = false,
896 	};
897 	struct ttm_resource *new_mem;
898 	int ret;
899 
900 	xe_bo_assert_held(bo);
901 
902 	if (WARN_ON(!bo->ttm.resource))
903 		return -EINVAL;
904 
905 	if (WARN_ON(!xe_bo_is_pinned(bo)))
906 		return -EINVAL;
907 
908 	if (WARN_ON(xe_bo_is_vram(bo) || !bo->ttm.ttm))
909 		return -EINVAL;
910 
911 	ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
912 	if (ret)
913 		return ret;
914 
915 	ret = ttm_tt_populate(bo->ttm.bdev, bo->ttm.ttm, &ctx);
916 	if (ret)
917 		goto err_res_free;
918 
919 	ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
920 	if (ret)
921 		goto err_res_free;
922 
923 	ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
924 	if (ret)
925 		goto err_res_free;
926 
927 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
928 			      false, MAX_SCHEDULE_TIMEOUT);
929 
930 	return 0;
931 
932 err_res_free:
933 	ttm_resource_free(&bo->ttm, &new_mem);
934 	return ret;
935 }
936 
937 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
938 				       unsigned long page_offset)
939 {
940 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
941 	struct xe_res_cursor cursor;
942 	struct xe_mem_region *vram;
943 
944 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
945 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
946 
947 	vram = res_to_mem_region(ttm_bo->resource);
948 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
949 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
950 }
951 
952 static void __xe_bo_vunmap(struct xe_bo *bo);
953 
954 /*
955  * TODO: Move this function to TTM so we don't rely on how TTM does its
956  * locking, thereby abusing TTM internals.
957  */
958 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
959 {
960 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
961 	bool locked;
962 
963 	xe_assert(xe, !kref_read(&ttm_bo->kref));
964 
965 	/*
966 	 * We can typically only race with TTM trylocking under the
967 	 * lru_lock, which will immediately be unlocked again since
968 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
969 	 * always succeed here, as long as we hold the lru lock.
970 	 */
971 	spin_lock(&ttm_bo->bdev->lru_lock);
972 	locked = dma_resv_trylock(ttm_bo->base.resv);
973 	spin_unlock(&ttm_bo->bdev->lru_lock);
974 	xe_assert(xe, locked);
975 
976 	return locked;
977 }
978 
979 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
980 {
981 	struct dma_resv_iter cursor;
982 	struct dma_fence *fence;
983 	struct dma_fence *replacement = NULL;
984 	struct xe_bo *bo;
985 
986 	if (!xe_bo_is_xe_bo(ttm_bo))
987 		return;
988 
989 	bo = ttm_to_xe_bo(ttm_bo);
990 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
991 
992 	/*
993 	 * Corner case where TTM fails to allocate memory and this BOs resv
994 	 * still points the VMs resv
995 	 */
996 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
997 		return;
998 
999 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1000 		return;
1001 
1002 	/*
1003 	 * Scrub the preempt fences if any. The unbind fence is already
1004 	 * attached to the resv.
1005 	 * TODO: Don't do this for external bos once we scrub them after
1006 	 * unbind.
1007 	 */
1008 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1009 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1010 		if (xe_fence_is_xe_preempt(fence) &&
1011 		    !dma_fence_is_signaled(fence)) {
1012 			if (!replacement)
1013 				replacement = dma_fence_get_stub();
1014 
1015 			dma_resv_replace_fences(ttm_bo->base.resv,
1016 						fence->context,
1017 						replacement,
1018 						DMA_RESV_USAGE_BOOKKEEP);
1019 		}
1020 	}
1021 	dma_fence_put(replacement);
1022 
1023 	dma_resv_unlock(ttm_bo->base.resv);
1024 }
1025 
1026 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1027 {
1028 	if (!xe_bo_is_xe_bo(ttm_bo))
1029 		return;
1030 
1031 	/*
1032 	 * Object is idle and about to be destroyed. Release the
1033 	 * dma-buf attachment.
1034 	 */
1035 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1036 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1037 						       struct xe_ttm_tt, ttm);
1038 
1039 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1040 					 DMA_BIDIRECTIONAL);
1041 		ttm_bo->sg = NULL;
1042 		xe_tt->sg = NULL;
1043 	}
1044 }
1045 
1046 struct ttm_device_funcs xe_ttm_funcs = {
1047 	.ttm_tt_create = xe_ttm_tt_create,
1048 	.ttm_tt_populate = xe_ttm_tt_populate,
1049 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1050 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1051 	.evict_flags = xe_evict_flags,
1052 	.move = xe_bo_move,
1053 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1054 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1055 	.release_notify = xe_ttm_bo_release_notify,
1056 	.eviction_valuable = ttm_bo_eviction_valuable,
1057 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1058 };
1059 
1060 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1061 {
1062 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1063 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1064 
1065 	if (bo->ttm.base.import_attach)
1066 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1067 	drm_gem_object_release(&bo->ttm.base);
1068 
1069 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1070 
1071 	if (bo->ggtt_node.size)
1072 		xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo);
1073 
1074 #ifdef CONFIG_PROC_FS
1075 	if (bo->client)
1076 		xe_drm_client_remove_bo(bo);
1077 #endif
1078 
1079 	if (bo->vm && xe_bo_is_user(bo))
1080 		xe_vm_put(bo->vm);
1081 
1082 	kfree(bo);
1083 }
1084 
1085 static void xe_gem_object_free(struct drm_gem_object *obj)
1086 {
1087 	/* Our BO reference counting scheme works as follows:
1088 	 *
1089 	 * The gem object kref is typically used throughout the driver,
1090 	 * and the gem object holds a ttm_buffer_object refcount, so
1091 	 * that when the last gem object reference is put, which is when
1092 	 * we end up in this function, we put also that ttm_buffer_object
1093 	 * refcount. Anything using gem interfaces is then no longer
1094 	 * allowed to access the object in a way that requires a gem
1095 	 * refcount, including locking the object.
1096 	 *
1097 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1098 	 * refcount directly if needed.
1099 	 */
1100 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1101 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1102 }
1103 
1104 static void xe_gem_object_close(struct drm_gem_object *obj,
1105 				struct drm_file *file_priv)
1106 {
1107 	struct xe_bo *bo = gem_to_xe_bo(obj);
1108 
1109 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1110 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1111 
1112 		xe_bo_lock(bo, false);
1113 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1114 		xe_bo_unlock(bo);
1115 	}
1116 }
1117 
1118 static bool should_migrate_to_system(struct xe_bo *bo)
1119 {
1120 	struct xe_device *xe = xe_bo_device(bo);
1121 
1122 	return xe_device_in_fault_mode(xe) && bo->props.cpu_atomic;
1123 }
1124 
1125 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1126 {
1127 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1128 	struct drm_device *ddev = tbo->base.dev;
1129 	vm_fault_t ret;
1130 	int idx, r = 0;
1131 
1132 	ret = ttm_bo_vm_reserve(tbo, vmf);
1133 	if (ret)
1134 		return ret;
1135 
1136 	if (drm_dev_enter(ddev, &idx)) {
1137 		struct xe_bo *bo = ttm_to_xe_bo(tbo);
1138 
1139 		trace_xe_bo_cpu_fault(bo);
1140 
1141 		if (should_migrate_to_system(bo)) {
1142 			r = xe_bo_migrate(bo, XE_PL_TT);
1143 			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
1144 				ret = VM_FAULT_NOPAGE;
1145 			else if (r)
1146 				ret = VM_FAULT_SIGBUS;
1147 		}
1148 		if (!ret)
1149 			ret = ttm_bo_vm_fault_reserved(vmf,
1150 						       vmf->vma->vm_page_prot,
1151 						       TTM_BO_VM_NUM_PREFAULT);
1152 		drm_dev_exit(idx);
1153 	} else {
1154 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1155 	}
1156 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1157 		return ret;
1158 
1159 	dma_resv_unlock(tbo->base.resv);
1160 	return ret;
1161 }
1162 
1163 static const struct vm_operations_struct xe_gem_vm_ops = {
1164 	.fault = xe_gem_fault,
1165 	.open = ttm_bo_vm_open,
1166 	.close = ttm_bo_vm_close,
1167 	.access = ttm_bo_vm_access
1168 };
1169 
1170 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1171 	.free = xe_gem_object_free,
1172 	.close = xe_gem_object_close,
1173 	.mmap = drm_gem_ttm_mmap,
1174 	.export = xe_gem_prime_export,
1175 	.vm_ops = &xe_gem_vm_ops,
1176 };
1177 
1178 /**
1179  * xe_bo_alloc - Allocate storage for a struct xe_bo
1180  *
1181  * This funcition is intended to allocate storage to be used for input
1182  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1183  * created is needed before the call to __xe_bo_create_locked().
1184  * If __xe_bo_create_locked ends up never to be called, then the
1185  * storage allocated with this function needs to be freed using
1186  * xe_bo_free().
1187  *
1188  * Return: A pointer to an uninitialized struct xe_bo on success,
1189  * ERR_PTR(-ENOMEM) on error.
1190  */
1191 struct xe_bo *xe_bo_alloc(void)
1192 {
1193 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1194 
1195 	if (!bo)
1196 		return ERR_PTR(-ENOMEM);
1197 
1198 	return bo;
1199 }
1200 
1201 /**
1202  * xe_bo_free - Free storage allocated using xe_bo_alloc()
1203  * @bo: The buffer object storage.
1204  *
1205  * Refer to xe_bo_alloc() documentation for valid use-cases.
1206  */
1207 void xe_bo_free(struct xe_bo *bo)
1208 {
1209 	kfree(bo);
1210 }
1211 
1212 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1213 				     struct xe_tile *tile, struct dma_resv *resv,
1214 				     struct ttm_lru_bulk_move *bulk, size_t size,
1215 				     u16 cpu_caching, enum ttm_bo_type type,
1216 				     u32 flags)
1217 {
1218 	struct ttm_operation_ctx ctx = {
1219 		.interruptible = true,
1220 		.no_wait_gpu = false,
1221 	};
1222 	struct ttm_placement *placement;
1223 	uint32_t alignment;
1224 	int err;
1225 
1226 	/* Only kernel objects should set GT */
1227 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1228 
1229 	if (XE_WARN_ON(!size)) {
1230 		xe_bo_free(bo);
1231 		return ERR_PTR(-EINVAL);
1232 	}
1233 
1234 	if (!bo) {
1235 		bo = xe_bo_alloc();
1236 		if (IS_ERR(bo))
1237 			return bo;
1238 	}
1239 
1240 	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
1241 	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
1242 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
1243 		size = ALIGN(size, SZ_64K);
1244 		flags |= XE_BO_INTERNAL_64K;
1245 		alignment = SZ_64K >> PAGE_SHIFT;
1246 	} else {
1247 		size = ALIGN(size, PAGE_SIZE);
1248 		alignment = SZ_4K >> PAGE_SHIFT;
1249 	}
1250 
1251 	bo->tile = tile;
1252 	bo->size = size;
1253 	bo->flags = flags;
1254 	bo->cpu_caching = cpu_caching;
1255 	bo->ttm.base.funcs = &xe_gem_object_funcs;
1256 	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
1257 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
1258 	bo->props.preferred_mem_type = XE_BO_PROPS_INVALID;
1259 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1260 	INIT_LIST_HEAD(&bo->pinned_link);
1261 #ifdef CONFIG_PROC_FS
1262 	INIT_LIST_HEAD(&bo->client_link);
1263 #endif
1264 
1265 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1266 
1267 	if (resv) {
1268 		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
1269 		ctx.resv = resv;
1270 	}
1271 
1272 	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
1273 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1274 		if (WARN_ON(err)) {
1275 			xe_ttm_bo_destroy(&bo->ttm);
1276 			return ERR_PTR(err);
1277 		}
1278 	}
1279 
1280 	/* Defer populating type_sg bos */
1281 	placement = (type == ttm_bo_type_sg ||
1282 		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
1283 		&bo->placement;
1284 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1285 				   placement, alignment,
1286 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
1287 	if (err)
1288 		return ERR_PTR(err);
1289 
1290 	/*
1291 	 * The VRAM pages underneath are potentially still being accessed by the
1292 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
1293 	 * sure to add any corresponding move/clear fences into the objects
1294 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1295 	 *
1296 	 * For KMD internal buffers we don't care about GPU clearing, however we
1297 	 * still need to handle async evictions, where the VRAM is still being
1298 	 * accessed by the GPU. Most internal callers are not expecting this,
1299 	 * since they are missing the required synchronisation before accessing
1300 	 * the memory. To keep things simple just sync wait any kernel fences
1301 	 * here, if the buffer is designated KMD internal.
1302 	 *
1303 	 * For normal userspace objects we should already have the required
1304 	 * pipelining or sync waiting elsewhere, since we already have to deal
1305 	 * with things like async GPU clearing.
1306 	 */
1307 	if (type == ttm_bo_type_kernel) {
1308 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1309 						     DMA_RESV_USAGE_KERNEL,
1310 						     ctx.interruptible,
1311 						     MAX_SCHEDULE_TIMEOUT);
1312 
1313 		if (timeout < 0) {
1314 			if (!resv)
1315 				dma_resv_unlock(bo->ttm.base.resv);
1316 			xe_bo_put(bo);
1317 			return ERR_PTR(timeout);
1318 		}
1319 	}
1320 
1321 	bo->created = true;
1322 	if (bulk)
1323 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
1324 	else
1325 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1326 
1327 	return bo;
1328 }
1329 
1330 static int __xe_bo_fixed_placement(struct xe_device *xe,
1331 				   struct xe_bo *bo,
1332 				   u32 flags,
1333 				   u64 start, u64 end, u64 size)
1334 {
1335 	struct ttm_place *place = bo->placements;
1336 
1337 	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
1338 		return -EINVAL;
1339 
1340 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
1341 	place->fpfn = start >> PAGE_SHIFT;
1342 	place->lpfn = end >> PAGE_SHIFT;
1343 
1344 	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
1345 	case XE_BO_CREATE_VRAM0_BIT:
1346 		place->mem_type = XE_PL_VRAM0;
1347 		break;
1348 	case XE_BO_CREATE_VRAM1_BIT:
1349 		place->mem_type = XE_PL_VRAM1;
1350 		break;
1351 	case XE_BO_CREATE_STOLEN_BIT:
1352 		place->mem_type = XE_PL_STOLEN;
1353 		break;
1354 
1355 	default:
1356 		/* 0 or multiple of the above set */
1357 		return -EINVAL;
1358 	}
1359 
1360 	bo->placement = (struct ttm_placement) {
1361 		.num_placement = 1,
1362 		.placement = place,
1363 		.num_busy_placement = 1,
1364 		.busy_placement = place,
1365 	};
1366 
1367 	return 0;
1368 }
1369 
1370 static struct xe_bo *
1371 __xe_bo_create_locked(struct xe_device *xe,
1372 		      struct xe_tile *tile, struct xe_vm *vm,
1373 		      size_t size, u64 start, u64 end,
1374 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags)
1375 {
1376 	struct xe_bo *bo = NULL;
1377 	int err;
1378 
1379 	if (vm)
1380 		xe_vm_assert_held(vm);
1381 
1382 	if (start || end != ~0ULL) {
1383 		bo = xe_bo_alloc();
1384 		if (IS_ERR(bo))
1385 			return bo;
1386 
1387 		flags |= XE_BO_FIXED_PLACEMENT_BIT;
1388 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1389 		if (err) {
1390 			xe_bo_free(bo);
1391 			return ERR_PTR(err);
1392 		}
1393 	}
1394 
1395 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1396 				    vm && !xe_vm_in_fault_mode(vm) &&
1397 				    flags & XE_BO_CREATE_USER_BIT ?
1398 				    &vm->lru_bulk_move : NULL, size,
1399 				    cpu_caching, type, flags);
1400 	if (IS_ERR(bo))
1401 		return bo;
1402 
1403 	/*
1404 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1405 	 * to ensure the shared resv doesn't disappear under the bo, the bo
1406 	 * will keep a reference to the vm, and avoid circular references
1407 	 * by having all the vm's bo refereferences released at vm close
1408 	 * time.
1409 	 */
1410 	if (vm && xe_bo_is_user(bo))
1411 		xe_vm_get(vm);
1412 	bo->vm = vm;
1413 
1414 	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
1415 		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
1416 			tile = xe_device_get_root_tile(xe);
1417 
1418 		xe_assert(xe, tile);
1419 
1420 		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
1421 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
1422 						   start + bo->size, U64_MAX);
1423 		} else {
1424 			err = xe_ggtt_insert_bo(tile->mem.ggtt, bo);
1425 		}
1426 		if (err)
1427 			goto err_unlock_put_bo;
1428 	}
1429 
1430 	return bo;
1431 
1432 err_unlock_put_bo:
1433 	__xe_bo_unset_bulk_move(bo);
1434 	xe_bo_unlock_vm_held(bo);
1435 	xe_bo_put(bo);
1436 	return ERR_PTR(err);
1437 }
1438 
1439 struct xe_bo *
1440 xe_bo_create_locked_range(struct xe_device *xe,
1441 			  struct xe_tile *tile, struct xe_vm *vm,
1442 			  size_t size, u64 start, u64 end,
1443 			  enum ttm_bo_type type, u32 flags)
1444 {
1445 	return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type, flags);
1446 }
1447 
1448 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1449 				  struct xe_vm *vm, size_t size,
1450 				  enum ttm_bo_type type, u32 flags)
1451 {
1452 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type, flags);
1453 }
1454 
1455 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1456 				struct xe_vm *vm, size_t size,
1457 				u16 cpu_caching,
1458 				enum ttm_bo_type type,
1459 				u32 flags)
1460 {
1461 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1462 						 cpu_caching, type,
1463 						 flags | XE_BO_CREATE_USER_BIT);
1464 	if (!IS_ERR(bo))
1465 		xe_bo_unlock_vm_held(bo);
1466 
1467 	return bo;
1468 }
1469 
1470 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1471 			   struct xe_vm *vm, size_t size,
1472 			   enum ttm_bo_type type, u32 flags)
1473 {
1474 	struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1475 
1476 	if (!IS_ERR(bo))
1477 		xe_bo_unlock_vm_held(bo);
1478 
1479 	return bo;
1480 }
1481 
1482 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1483 				      struct xe_vm *vm,
1484 				      size_t size, u64 offset,
1485 				      enum ttm_bo_type type, u32 flags)
1486 {
1487 	struct xe_bo *bo;
1488 	int err;
1489 	u64 start = offset == ~0ull ? 0 : offset;
1490 	u64 end = offset == ~0ull ? offset : start + size;
1491 
1492 	if (flags & XE_BO_CREATE_STOLEN_BIT &&
1493 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1494 		flags |= XE_BO_CREATE_GGTT_BIT;
1495 
1496 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1497 				       flags | XE_BO_NEEDS_CPU_ACCESS);
1498 	if (IS_ERR(bo))
1499 		return bo;
1500 
1501 	err = xe_bo_pin(bo);
1502 	if (err)
1503 		goto err_put;
1504 
1505 	err = xe_bo_vmap(bo);
1506 	if (err)
1507 		goto err_unpin;
1508 
1509 	xe_bo_unlock_vm_held(bo);
1510 
1511 	return bo;
1512 
1513 err_unpin:
1514 	xe_bo_unpin(bo);
1515 err_put:
1516 	xe_bo_unlock_vm_held(bo);
1517 	xe_bo_put(bo);
1518 	return ERR_PTR(err);
1519 }
1520 
1521 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1522 				   struct xe_vm *vm, size_t size,
1523 				   enum ttm_bo_type type, u32 flags)
1524 {
1525 	return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1526 }
1527 
1528 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1529 				     const void *data, size_t size,
1530 				     enum ttm_bo_type type, u32 flags)
1531 {
1532 	struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1533 						ALIGN(size, PAGE_SIZE),
1534 						type, flags);
1535 	if (IS_ERR(bo))
1536 		return bo;
1537 
1538 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1539 
1540 	return bo;
1541 }
1542 
1543 /*
1544  * XXX: This is in the VM bind data path, likely should calculate this once and
1545  * store, with a recalculation if the BO is moved.
1546  */
1547 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1548 {
1549 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1550 
1551 	if (res->mem_type == XE_PL_STOLEN)
1552 		return xe_ttm_stolen_gpu_offset(xe);
1553 
1554 	return res_to_mem_region(res)->dpa_base;
1555 }
1556 
1557 /**
1558  * xe_bo_pin_external - pin an external BO
1559  * @bo: buffer object to be pinned
1560  *
1561  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1562  * BO. Unique call compared to xe_bo_pin as this function has it own set of
1563  * asserts and code to ensure evict / restore on suspend / resume.
1564  *
1565  * Returns 0 for success, negative error code otherwise.
1566  */
1567 int xe_bo_pin_external(struct xe_bo *bo)
1568 {
1569 	struct xe_device *xe = xe_bo_device(bo);
1570 	int err;
1571 
1572 	xe_assert(xe, !bo->vm);
1573 	xe_assert(xe, xe_bo_is_user(bo));
1574 
1575 	if (!xe_bo_is_pinned(bo)) {
1576 		err = xe_bo_validate(bo, NULL, false);
1577 		if (err)
1578 			return err;
1579 
1580 		if (xe_bo_is_vram(bo)) {
1581 			spin_lock(&xe->pinned.lock);
1582 			list_add_tail(&bo->pinned_link,
1583 				      &xe->pinned.external_vram);
1584 			spin_unlock(&xe->pinned.lock);
1585 		}
1586 	}
1587 
1588 	ttm_bo_pin(&bo->ttm);
1589 
1590 	/*
1591 	 * FIXME: If we always use the reserve / unreserve functions for locking
1592 	 * we do not need this.
1593 	 */
1594 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1595 
1596 	return 0;
1597 }
1598 
1599 int xe_bo_pin(struct xe_bo *bo)
1600 {
1601 	struct xe_device *xe = xe_bo_device(bo);
1602 	int err;
1603 
1604 	/* We currently don't expect user BO to be pinned */
1605 	xe_assert(xe, !xe_bo_is_user(bo));
1606 
1607 	/* Pinned object must be in GGTT or have pinned flag */
1608 	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
1609 				   XE_BO_CREATE_GGTT_BIT));
1610 
1611 	/*
1612 	 * No reason we can't support pinning imported dma-bufs we just don't
1613 	 * expect to pin an imported dma-buf.
1614 	 */
1615 	xe_assert(xe, !bo->ttm.base.import_attach);
1616 
1617 	/* We only expect at most 1 pin */
1618 	xe_assert(xe, !xe_bo_is_pinned(bo));
1619 
1620 	err = xe_bo_validate(bo, NULL, false);
1621 	if (err)
1622 		return err;
1623 
1624 	/*
1625 	 * For pinned objects in on DGFX, which are also in vram, we expect
1626 	 * these to be in contiguous VRAM memory. Required eviction / restore
1627 	 * during suspend / resume (force restore to same physical address).
1628 	 */
1629 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1630 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1631 		struct ttm_place *place = &(bo->placements[0]);
1632 
1633 		if (mem_type_is_vram(place->mem_type)) {
1634 			xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1635 
1636 			place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1637 				       vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1638 			place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1639 
1640 			spin_lock(&xe->pinned.lock);
1641 			list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1642 			spin_unlock(&xe->pinned.lock);
1643 		}
1644 	}
1645 
1646 	ttm_bo_pin(&bo->ttm);
1647 
1648 	/*
1649 	 * FIXME: If we always use the reserve / unreserve functions for locking
1650 	 * we do not need this.
1651 	 */
1652 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1653 
1654 	return 0;
1655 }
1656 
1657 /**
1658  * xe_bo_unpin_external - unpin an external BO
1659  * @bo: buffer object to be unpinned
1660  *
1661  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1662  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1663  * asserts and code to ensure evict / restore on suspend / resume.
1664  *
1665  * Returns 0 for success, negative error code otherwise.
1666  */
1667 void xe_bo_unpin_external(struct xe_bo *bo)
1668 {
1669 	struct xe_device *xe = xe_bo_device(bo);
1670 
1671 	xe_assert(xe, !bo->vm);
1672 	xe_assert(xe, xe_bo_is_pinned(bo));
1673 	xe_assert(xe, xe_bo_is_user(bo));
1674 
1675 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link)) {
1676 		spin_lock(&xe->pinned.lock);
1677 		list_del_init(&bo->pinned_link);
1678 		spin_unlock(&xe->pinned.lock);
1679 	}
1680 
1681 	ttm_bo_unpin(&bo->ttm);
1682 
1683 	/*
1684 	 * FIXME: If we always use the reserve / unreserve functions for locking
1685 	 * we do not need this.
1686 	 */
1687 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1688 }
1689 
1690 void xe_bo_unpin(struct xe_bo *bo)
1691 {
1692 	struct xe_device *xe = xe_bo_device(bo);
1693 
1694 	xe_assert(xe, !bo->ttm.base.import_attach);
1695 	xe_assert(xe, xe_bo_is_pinned(bo));
1696 
1697 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1698 	    bo->flags & XE_BO_INTERNAL_TEST)) {
1699 		struct ttm_place *place = &(bo->placements[0]);
1700 
1701 		if (mem_type_is_vram(place->mem_type)) {
1702 			xe_assert(xe, !list_empty(&bo->pinned_link));
1703 
1704 			spin_lock(&xe->pinned.lock);
1705 			list_del_init(&bo->pinned_link);
1706 			spin_unlock(&xe->pinned.lock);
1707 		}
1708 	}
1709 
1710 	ttm_bo_unpin(&bo->ttm);
1711 }
1712 
1713 /**
1714  * xe_bo_validate() - Make sure the bo is in an allowed placement
1715  * @bo: The bo,
1716  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
1717  *      NULL. Used together with @allow_res_evict.
1718  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
1719  *                   reservation object.
1720  *
1721  * Make sure the bo is in allowed placement, migrating it if necessary. If
1722  * needed, other bos will be evicted. If bos selected for eviction shares
1723  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
1724  * set to true, otherwise they will be bypassed.
1725  *
1726  * Return: 0 on success, negative error code on failure. May return
1727  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
1728  */
1729 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
1730 {
1731 	struct ttm_operation_ctx ctx = {
1732 		.interruptible = true,
1733 		.no_wait_gpu = false,
1734 	};
1735 
1736 	if (vm) {
1737 		lockdep_assert_held(&vm->lock);
1738 		xe_vm_assert_held(vm);
1739 
1740 		ctx.allow_res_evict = allow_res_evict;
1741 		ctx.resv = xe_vm_resv(vm);
1742 	}
1743 
1744 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
1745 }
1746 
1747 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
1748 {
1749 	if (bo->destroy == &xe_ttm_bo_destroy)
1750 		return true;
1751 
1752 	return false;
1753 }
1754 
1755 /*
1756  * Resolve a BO address. There is no assert to check if the proper lock is held
1757  * so it should only be used in cases where it is not fatal to get the wrong
1758  * address, such as printing debug information, but not in cases where memory is
1759  * written based on this result.
1760  */
1761 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1762 {
1763 	struct xe_device *xe = xe_bo_device(bo);
1764 	struct xe_res_cursor cur;
1765 	u64 page;
1766 
1767 	xe_assert(xe, page_size <= PAGE_SIZE);
1768 	page = offset >> PAGE_SHIFT;
1769 	offset &= (PAGE_SIZE - 1);
1770 
1771 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
1772 		xe_assert(xe, bo->ttm.ttm);
1773 
1774 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
1775 				page_size, &cur);
1776 		return xe_res_dma(&cur) + offset;
1777 	} else {
1778 		struct xe_res_cursor cur;
1779 
1780 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
1781 			     page_size, &cur);
1782 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
1783 	}
1784 }
1785 
1786 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
1787 {
1788 	if (!READ_ONCE(bo->ttm.pin_count))
1789 		xe_bo_assert_held(bo);
1790 	return __xe_bo_addr(bo, offset, page_size);
1791 }
1792 
1793 int xe_bo_vmap(struct xe_bo *bo)
1794 {
1795 	void *virtual;
1796 	bool is_iomem;
1797 	int ret;
1798 
1799 	xe_bo_assert_held(bo);
1800 
1801 	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
1802 		return -EINVAL;
1803 
1804 	if (!iosys_map_is_null(&bo->vmap))
1805 		return 0;
1806 
1807 	/*
1808 	 * We use this more or less deprecated interface for now since
1809 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
1810 	 * single page bos, which is done here.
1811 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
1812 	 * to use struct iosys_map.
1813 	 */
1814 	ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
1815 	if (ret)
1816 		return ret;
1817 
1818 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
1819 	if (is_iomem)
1820 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
1821 	else
1822 		iosys_map_set_vaddr(&bo->vmap, virtual);
1823 
1824 	return 0;
1825 }
1826 
1827 static void __xe_bo_vunmap(struct xe_bo *bo)
1828 {
1829 	if (!iosys_map_is_null(&bo->vmap)) {
1830 		iosys_map_clear(&bo->vmap);
1831 		ttm_bo_kunmap(&bo->kmap);
1832 	}
1833 }
1834 
1835 void xe_bo_vunmap(struct xe_bo *bo)
1836 {
1837 	xe_bo_assert_held(bo);
1838 	__xe_bo_vunmap(bo);
1839 }
1840 
1841 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
1842 			struct drm_file *file)
1843 {
1844 	struct xe_device *xe = to_xe_device(dev);
1845 	struct xe_file *xef = to_xe_file(file);
1846 	struct drm_xe_gem_create *args = data;
1847 	struct xe_vm *vm = NULL;
1848 	struct xe_bo *bo;
1849 	unsigned int bo_flags;
1850 	u32 handle;
1851 	int err;
1852 
1853 	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
1854 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1855 		return -EINVAL;
1856 
1857 	if (XE_IOCTL_DBG(xe, args->flags &
1858 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
1859 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
1860 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
1861 			   xe->info.mem_region_mask)))
1862 		return -EINVAL;
1863 
1864 	/* at least one memory type must be specified */
1865 	if (XE_IOCTL_DBG(xe, !(args->flags & xe->info.mem_region_mask)))
1866 		return -EINVAL;
1867 
1868 	if (XE_IOCTL_DBG(xe, args->handle))
1869 		return -EINVAL;
1870 
1871 	if (XE_IOCTL_DBG(xe, !args->size))
1872 		return -EINVAL;
1873 
1874 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
1875 		return -EINVAL;
1876 
1877 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
1878 		return -EINVAL;
1879 
1880 	bo_flags = 0;
1881 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
1882 		bo_flags |= XE_BO_DEFER_BACKING;
1883 
1884 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
1885 		bo_flags |= XE_BO_SCANOUT_BIT;
1886 
1887 	bo_flags |= args->flags << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
1888 
1889 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
1890 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
1891 			return -EINVAL;
1892 
1893 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
1894 	}
1895 
1896 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
1897 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
1898 		return -EINVAL;
1899 
1900 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
1901 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
1902 		return -EINVAL;
1903 
1904 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
1905 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
1906 		return -EINVAL;
1907 
1908 	if (args->vm_id) {
1909 		vm = xe_vm_lookup(xef, args->vm_id);
1910 		if (XE_IOCTL_DBG(xe, !vm))
1911 			return -ENOENT;
1912 		err = xe_vm_lock(vm, true);
1913 		if (err)
1914 			goto out_vm;
1915 	}
1916 
1917 	bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
1918 			       ttm_bo_type_device, bo_flags);
1919 
1920 	if (vm)
1921 		xe_vm_unlock(vm);
1922 
1923 	if (IS_ERR(bo)) {
1924 		err = PTR_ERR(bo);
1925 		goto out_vm;
1926 	}
1927 
1928 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
1929 	if (err)
1930 		goto out_bulk;
1931 
1932 	args->handle = handle;
1933 	goto out_put;
1934 
1935 out_bulk:
1936 	if (vm && !xe_vm_in_fault_mode(vm)) {
1937 		xe_vm_lock(vm, false);
1938 		__xe_bo_unset_bulk_move(bo);
1939 		xe_vm_unlock(vm);
1940 	}
1941 out_put:
1942 	xe_bo_put(bo);
1943 out_vm:
1944 	if (vm)
1945 		xe_vm_put(vm);
1946 
1947 	return err;
1948 }
1949 
1950 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
1951 			     struct drm_file *file)
1952 {
1953 	struct xe_device *xe = to_xe_device(dev);
1954 	struct drm_xe_gem_mmap_offset *args = data;
1955 	struct drm_gem_object *gem_obj;
1956 
1957 	if (XE_IOCTL_DBG(xe, args->extensions) ||
1958 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1959 		return -EINVAL;
1960 
1961 	if (XE_IOCTL_DBG(xe, args->flags))
1962 		return -EINVAL;
1963 
1964 	gem_obj = drm_gem_object_lookup(file, args->handle);
1965 	if (XE_IOCTL_DBG(xe, !gem_obj))
1966 		return -ENOENT;
1967 
1968 	/* The mmap offset was set up at BO allocation time. */
1969 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
1970 
1971 	xe_bo_put(gem_to_xe_bo(gem_obj));
1972 	return 0;
1973 }
1974 
1975 /**
1976  * xe_bo_lock() - Lock the buffer object's dma_resv object
1977  * @bo: The struct xe_bo whose lock is to be taken
1978  * @intr: Whether to perform any wait interruptible
1979  *
1980  * Locks the buffer object's dma_resv object. If the buffer object is
1981  * pointing to a shared dma_resv object, that shared lock is locked.
1982  *
1983  * Return: 0 on success, -EINTR if @intr is true and the wait for a
1984  * contended lock was interrupted. If @intr is set to false, the
1985  * function always returns 0.
1986  */
1987 int xe_bo_lock(struct xe_bo *bo, bool intr)
1988 {
1989 	if (intr)
1990 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
1991 
1992 	dma_resv_lock(bo->ttm.base.resv, NULL);
1993 
1994 	return 0;
1995 }
1996 
1997 /**
1998  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
1999  * @bo: The struct xe_bo whose lock is to be released.
2000  *
2001  * Unlock a buffer object lock that was locked by xe_bo_lock().
2002  */
2003 void xe_bo_unlock(struct xe_bo *bo)
2004 {
2005 	dma_resv_unlock(bo->ttm.base.resv);
2006 }
2007 
2008 /**
2009  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2010  * @bo: The buffer object to migrate
2011  * @mem_type: The TTM memory type intended to migrate to
2012  *
2013  * Check whether the buffer object supports migration to the
2014  * given memory type. Note that pinning may affect the ability to migrate as
2015  * returned by this function.
2016  *
2017  * This function is primarily intended as a helper for checking the
2018  * possibility to migrate buffer objects and can be called without
2019  * the object lock held.
2020  *
2021  * Return: true if migration is possible, false otherwise.
2022  */
2023 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2024 {
2025 	unsigned int cur_place;
2026 
2027 	if (bo->ttm.type == ttm_bo_type_kernel)
2028 		return true;
2029 
2030 	if (bo->ttm.type == ttm_bo_type_sg)
2031 		return false;
2032 
2033 	for (cur_place = 0; cur_place < bo->placement.num_placement;
2034 	     cur_place++) {
2035 		if (bo->placements[cur_place].mem_type == mem_type)
2036 			return true;
2037 	}
2038 
2039 	return false;
2040 }
2041 
2042 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2043 {
2044 	memset(place, 0, sizeof(*place));
2045 	place->mem_type = mem_type;
2046 }
2047 
2048 /**
2049  * xe_bo_migrate - Migrate an object to the desired region id
2050  * @bo: The buffer object to migrate.
2051  * @mem_type: The TTM region type to migrate to.
2052  *
2053  * Attempt to migrate the buffer object to the desired memory region. The
2054  * buffer object may not be pinned, and must be locked.
2055  * On successful completion, the object memory type will be updated,
2056  * but an async migration task may not have completed yet, and to
2057  * accomplish that, the object's kernel fences must be signaled with
2058  * the object lock held.
2059  *
2060  * Return: 0 on success. Negative error code on failure. In particular may
2061  * return -EINTR or -ERESTARTSYS if signal pending.
2062  */
2063 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2064 {
2065 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2066 	struct ttm_operation_ctx ctx = {
2067 		.interruptible = true,
2068 		.no_wait_gpu = false,
2069 	};
2070 	struct ttm_placement placement;
2071 	struct ttm_place requested;
2072 
2073 	xe_bo_assert_held(bo);
2074 
2075 	if (bo->ttm.resource->mem_type == mem_type)
2076 		return 0;
2077 
2078 	if (xe_bo_is_pinned(bo))
2079 		return -EBUSY;
2080 
2081 	if (!xe_bo_can_migrate(bo, mem_type))
2082 		return -EINVAL;
2083 
2084 	xe_place_from_ttm_type(mem_type, &requested);
2085 	placement.num_placement = 1;
2086 	placement.num_busy_placement = 1;
2087 	placement.placement = &requested;
2088 	placement.busy_placement = &requested;
2089 
2090 	/*
2091 	 * Stolen needs to be handled like below VRAM handling if we ever need
2092 	 * to support it.
2093 	 */
2094 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2095 
2096 	if (mem_type_is_vram(mem_type)) {
2097 		u32 c = 0;
2098 
2099 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2100 	}
2101 
2102 	return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2103 }
2104 
2105 /**
2106  * xe_bo_evict - Evict an object to evict placement
2107  * @bo: The buffer object to migrate.
2108  * @force_alloc: Set force_alloc in ttm_operation_ctx
2109  *
2110  * On successful completion, the object memory will be moved to evict
2111  * placement. Ths function blocks until the object has been fully moved.
2112  *
2113  * Return: 0 on success. Negative error code on failure.
2114  */
2115 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2116 {
2117 	struct ttm_operation_ctx ctx = {
2118 		.interruptible = false,
2119 		.no_wait_gpu = false,
2120 		.force_alloc = force_alloc,
2121 	};
2122 	struct ttm_placement placement;
2123 	int ret;
2124 
2125 	xe_evict_flags(&bo->ttm, &placement);
2126 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2127 	if (ret)
2128 		return ret;
2129 
2130 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2131 			      false, MAX_SCHEDULE_TIMEOUT);
2132 
2133 	return 0;
2134 }
2135 
2136 /**
2137  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2138  * placed in system memory.
2139  * @bo: The xe_bo
2140  *
2141  * If a bo has an allowable placement in XE_PL_TT memory, it can't use
2142  * flat CCS compression, because the GPU then has no way to access the
2143  * CCS metadata using relevant commands. For the opposite case, we need to
2144  * allocate storage for the CCS metadata when the BO is not resident in
2145  * VRAM memory.
2146  *
2147  * Return: true if extra pages need to be allocated, false otherwise.
2148  */
2149 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2150 {
2151 	return bo->ttm.type == ttm_bo_type_device &&
2152 		!(bo->flags & XE_BO_CREATE_SYSTEM_BIT) &&
2153 		(bo->flags & XE_BO_CREATE_VRAM_MASK);
2154 }
2155 
2156 /**
2157  * __xe_bo_release_dummy() - Dummy kref release function
2158  * @kref: The embedded struct kref.
2159  *
2160  * Dummy release function for xe_bo_put_deferred(). Keep off.
2161  */
2162 void __xe_bo_release_dummy(struct kref *kref)
2163 {
2164 }
2165 
2166 /**
2167  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2168  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2169  *
2170  * Puts all bos whose put was deferred by xe_bo_put_deferred().
2171  * The @deferred list can be either an onstack local list or a global
2172  * shared list used by a workqueue.
2173  */
2174 void xe_bo_put_commit(struct llist_head *deferred)
2175 {
2176 	struct llist_node *freed;
2177 	struct xe_bo *bo, *next;
2178 
2179 	if (!deferred)
2180 		return;
2181 
2182 	freed = llist_del_all(deferred);
2183 	if (!freed)
2184 		return;
2185 
2186 	llist_for_each_entry_safe(bo, next, freed, freed)
2187 		drm_gem_object_free(&bo->ttm.base.refcount);
2188 }
2189 
2190 /**
2191  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2192  * @file_priv: ...
2193  * @dev: ...
2194  * @args: ...
2195  *
2196  * See dumb_create() hook in include/drm/drm_drv.h
2197  *
2198  * Return: ...
2199  */
2200 int xe_bo_dumb_create(struct drm_file *file_priv,
2201 		      struct drm_device *dev,
2202 		      struct drm_mode_create_dumb *args)
2203 {
2204 	struct xe_device *xe = to_xe_device(dev);
2205 	struct xe_bo *bo;
2206 	uint32_t handle;
2207 	int cpp = DIV_ROUND_UP(args->bpp, 8);
2208 	int err;
2209 	u32 page_size = max_t(u32, PAGE_SIZE,
2210 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2211 
2212 	args->pitch = ALIGN(args->width * cpp, 64);
2213 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2214 			   page_size);
2215 
2216 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2217 			       DRM_XE_GEM_CPU_CACHING_WC,
2218 			       ttm_bo_type_device,
2219 			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2220 			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
2221 			       XE_BO_NEEDS_CPU_ACCESS);
2222 	if (IS_ERR(bo))
2223 		return PTR_ERR(bo);
2224 
2225 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2226 	/* drop reference from allocate - handle holds it now */
2227 	drm_gem_object_put(&bo->ttm.base);
2228 	if (!err)
2229 		args->handle = handle;
2230 	return err;
2231 }
2232 
2233 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2234 #include "tests/xe_bo.c"
2235 #endif
2236