xref: /linux/drivers/gpu/drm/xe/xe_bo.c (revision 284fc30e66e602a5df58393860f67477d6a79339)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_bo.h"
7 
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_gem_ttm_helper.h>
13 #include <drm/drm_managed.h>
14 #include <drm/ttm/ttm_backup.h>
15 #include <drm/ttm/ttm_device.h>
16 #include <drm/ttm/ttm_placement.h>
17 #include <drm/ttm/ttm_tt.h>
18 #include <uapi/drm/xe_drm.h>
19 
20 #include <kunit/static_stub.h>
21 
22 #include <trace/events/gpu_mem.h>
23 
24 #include "xe_device.h"
25 #include "xe_dma_buf.h"
26 #include "xe_drm_client.h"
27 #include "xe_ggtt.h"
28 #include "xe_gt.h"
29 #include "xe_map.h"
30 #include "xe_migrate.h"
31 #include "xe_pm.h"
32 #include "xe_preempt_fence.h"
33 #include "xe_pxp.h"
34 #include "xe_res_cursor.h"
35 #include "xe_shrinker.h"
36 #include "xe_sriov_vf_ccs.h"
37 #include "xe_trace_bo.h"
38 #include "xe_ttm_stolen_mgr.h"
39 #include "xe_vm.h"
40 #include "xe_vram_types.h"
41 
42 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES]  = {
43 	[XE_PL_SYSTEM] = "system",
44 	[XE_PL_TT] = "gtt",
45 	[XE_PL_VRAM0] = "vram0",
46 	[XE_PL_VRAM1] = "vram1",
47 	[XE_PL_STOLEN] = "stolen"
48 };
49 
50 static const struct ttm_place sys_placement_flags = {
51 	.fpfn = 0,
52 	.lpfn = 0,
53 	.mem_type = XE_PL_SYSTEM,
54 	.flags = 0,
55 };
56 
57 static struct ttm_placement sys_placement = {
58 	.num_placement = 1,
59 	.placement = &sys_placement_flags,
60 };
61 
62 static struct ttm_placement purge_placement;
63 
64 static const struct ttm_place tt_placement_flags[] = {
65 	{
66 		.fpfn = 0,
67 		.lpfn = 0,
68 		.mem_type = XE_PL_TT,
69 		.flags = TTM_PL_FLAG_DESIRED,
70 	},
71 	{
72 		.fpfn = 0,
73 		.lpfn = 0,
74 		.mem_type = XE_PL_SYSTEM,
75 		.flags = TTM_PL_FLAG_FALLBACK,
76 	}
77 };
78 
79 static struct ttm_placement tt_placement = {
80 	.num_placement = 2,
81 	.placement = tt_placement_flags,
82 };
83 
mem_type_is_vram(u32 mem_type)84 bool mem_type_is_vram(u32 mem_type)
85 {
86 	return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
87 }
88 
resource_is_stolen_vram(struct xe_device * xe,struct ttm_resource * res)89 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
90 {
91 	return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
92 }
93 
resource_is_vram(struct ttm_resource * res)94 static bool resource_is_vram(struct ttm_resource *res)
95 {
96 	return mem_type_is_vram(res->mem_type);
97 }
98 
xe_bo_is_vram(struct xe_bo * bo)99 bool xe_bo_is_vram(struct xe_bo *bo)
100 {
101 	return resource_is_vram(bo->ttm.resource) ||
102 		resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
103 }
104 
xe_bo_is_stolen(struct xe_bo * bo)105 bool xe_bo_is_stolen(struct xe_bo *bo)
106 {
107 	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
108 }
109 
110 /**
111  * xe_bo_has_single_placement - check if BO is placed only in one memory location
112  * @bo: The BO
113  *
114  * This function checks whether a given BO is placed in only one memory location.
115  *
116  * Returns: true if the BO is placed in a single memory location, false otherwise.
117  *
118  */
xe_bo_has_single_placement(struct xe_bo * bo)119 bool xe_bo_has_single_placement(struct xe_bo *bo)
120 {
121 	return bo->placement.num_placement == 1;
122 }
123 
124 /**
125  * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
126  * @bo: The BO
127  *
128  * The stolen memory is accessed through the PCI BAR for both DGFX and some
129  * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
130  *
131  * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
132  */
xe_bo_is_stolen_devmem(struct xe_bo * bo)133 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
134 {
135 	return xe_bo_is_stolen(bo) &&
136 		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
137 }
138 
139 /**
140  * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
141  * @bo: The BO
142  *
143  * Check if a given bo is bound through VM_BIND. This requires the
144  * reservation lock for the BO to be held.
145  *
146  * Returns: boolean
147  */
xe_bo_is_vm_bound(struct xe_bo * bo)148 bool xe_bo_is_vm_bound(struct xe_bo *bo)
149 {
150 	xe_bo_assert_held(bo);
151 
152 	return !list_empty(&bo->ttm.base.gpuva.list);
153 }
154 
xe_bo_is_user(struct xe_bo * bo)155 static bool xe_bo_is_user(struct xe_bo *bo)
156 {
157 	return bo->flags & XE_BO_FLAG_USER;
158 }
159 
160 static struct xe_migrate *
mem_type_to_migrate(struct xe_device * xe,u32 mem_type)161 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
162 {
163 	struct xe_tile *tile;
164 
165 	xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
166 	tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
167 	return tile->migrate;
168 }
169 
res_to_mem_region(struct ttm_resource * res)170 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
171 {
172 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
173 	struct ttm_resource_manager *mgr;
174 	struct xe_ttm_vram_mgr *vram_mgr;
175 
176 	xe_assert(xe, resource_is_vram(res));
177 	mgr = ttm_manager_type(&xe->ttm, res->mem_type);
178 	vram_mgr = to_xe_ttm_vram_mgr(mgr);
179 
180 	return container_of(vram_mgr, struct xe_vram_region, ttm);
181 }
182 
try_add_system(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)183 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
184 			   u32 bo_flags, u32 *c)
185 {
186 	if (bo_flags & XE_BO_FLAG_SYSTEM) {
187 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
188 
189 		bo->placements[*c] = (struct ttm_place) {
190 			.mem_type = XE_PL_TT,
191 			.flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
192 			TTM_PL_FLAG_FALLBACK : 0,
193 		};
194 		*c += 1;
195 	}
196 }
197 
force_contiguous(u32 bo_flags)198 static bool force_contiguous(u32 bo_flags)
199 {
200 	if (bo_flags & XE_BO_FLAG_STOLEN)
201 		return true; /* users expect this */
202 	else if (bo_flags & XE_BO_FLAG_PINNED &&
203 		 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
204 		return true; /* needs vmap */
205 	else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
206 		return true;
207 
208 	/*
209 	 * For eviction / restore on suspend / resume objects pinned in VRAM
210 	 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
211 	 */
212 	return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
213 	       bo_flags & XE_BO_FLAG_PINNED;
214 }
215 
add_vram(struct xe_device * xe,struct xe_bo * bo,struct ttm_place * places,u32 bo_flags,u32 mem_type,u32 * c)216 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
217 		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
218 {
219 	struct ttm_place place = { .mem_type = mem_type };
220 	struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
221 	struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
222 
223 	struct xe_vram_region *vram;
224 	u64 io_size;
225 
226 	xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
227 
228 	vram = container_of(vram_mgr, struct xe_vram_region, ttm);
229 	xe_assert(xe, vram && vram->usable_size);
230 	io_size = vram->io_size;
231 
232 	if (force_contiguous(bo_flags))
233 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
234 
235 	if (io_size < vram->usable_size) {
236 		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
237 			place.fpfn = 0;
238 			place.lpfn = io_size >> PAGE_SHIFT;
239 		} else {
240 			place.flags |= TTM_PL_FLAG_TOPDOWN;
241 		}
242 	}
243 	places[*c] = place;
244 	*c += 1;
245 }
246 
try_add_vram(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)247 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
248 			 u32 bo_flags, u32 *c)
249 {
250 	if (bo_flags & XE_BO_FLAG_VRAM0)
251 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
252 	if (bo_flags & XE_BO_FLAG_VRAM1)
253 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
254 }
255 
try_add_stolen(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)256 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
257 			   u32 bo_flags, u32 *c)
258 {
259 	if (bo_flags & XE_BO_FLAG_STOLEN) {
260 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
261 
262 		bo->placements[*c] = (struct ttm_place) {
263 			.mem_type = XE_PL_STOLEN,
264 			.flags = force_contiguous(bo_flags) ?
265 				TTM_PL_FLAG_CONTIGUOUS : 0,
266 		};
267 		*c += 1;
268 	}
269 }
270 
__xe_bo_placement_for_flags(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags)271 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
272 				       u32 bo_flags)
273 {
274 	u32 c = 0;
275 
276 	try_add_vram(xe, bo, bo_flags, &c);
277 	try_add_system(xe, bo, bo_flags, &c);
278 	try_add_stolen(xe, bo, bo_flags, &c);
279 
280 	if (!c)
281 		return -EINVAL;
282 
283 	bo->placement = (struct ttm_placement) {
284 		.num_placement = c,
285 		.placement = bo->placements,
286 	};
287 
288 	return 0;
289 }
290 
xe_bo_placement_for_flags(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags)291 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
292 			      u32 bo_flags)
293 {
294 	xe_bo_assert_held(bo);
295 	return __xe_bo_placement_for_flags(xe, bo, bo_flags);
296 }
297 
xe_evict_flags(struct ttm_buffer_object * tbo,struct ttm_placement * placement)298 static void xe_evict_flags(struct ttm_buffer_object *tbo,
299 			   struct ttm_placement *placement)
300 {
301 	struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
302 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
303 	struct xe_bo *bo;
304 
305 	if (!xe_bo_is_xe_bo(tbo)) {
306 		/* Don't handle scatter gather BOs */
307 		if (tbo->type == ttm_bo_type_sg) {
308 			placement->num_placement = 0;
309 			return;
310 		}
311 
312 		*placement = device_unplugged ? purge_placement : sys_placement;
313 		return;
314 	}
315 
316 	bo = ttm_to_xe_bo(tbo);
317 	if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
318 		*placement = sys_placement;
319 		return;
320 	}
321 
322 	if (device_unplugged && !tbo->base.dma_buf) {
323 		*placement = purge_placement;
324 		return;
325 	}
326 
327 	/*
328 	 * For xe, sg bos that are evicted to system just triggers a
329 	 * rebind of the sg list upon subsequent validation to XE_PL_TT.
330 	 */
331 	switch (tbo->resource->mem_type) {
332 	case XE_PL_VRAM0:
333 	case XE_PL_VRAM1:
334 	case XE_PL_STOLEN:
335 		*placement = tt_placement;
336 		break;
337 	case XE_PL_TT:
338 	default:
339 		*placement = sys_placement;
340 		break;
341 	}
342 }
343 
344 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
345 struct xe_ttm_tt {
346 	struct ttm_tt ttm;
347 	struct sg_table sgt;
348 	struct sg_table *sg;
349 	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
350 	bool purgeable;
351 };
352 
xe_tt_map_sg(struct xe_device * xe,struct ttm_tt * tt)353 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
354 {
355 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
356 	unsigned long num_pages = tt->num_pages;
357 	int ret;
358 
359 	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
360 		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
361 
362 	if (xe_tt->sg)
363 		return 0;
364 
365 	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
366 						num_pages, 0,
367 						(u64)num_pages << PAGE_SHIFT,
368 						xe_sg_segment_size(xe->drm.dev),
369 						GFP_KERNEL);
370 	if (ret)
371 		return ret;
372 
373 	xe_tt->sg = &xe_tt->sgt;
374 	ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
375 			      DMA_ATTR_SKIP_CPU_SYNC);
376 	if (ret) {
377 		sg_free_table(xe_tt->sg);
378 		xe_tt->sg = NULL;
379 		return ret;
380 	}
381 
382 	return 0;
383 }
384 
xe_tt_unmap_sg(struct xe_device * xe,struct ttm_tt * tt)385 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
386 {
387 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
388 
389 	if (xe_tt->sg) {
390 		dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
391 				  DMA_BIDIRECTIONAL, 0);
392 		sg_free_table(xe_tt->sg);
393 		xe_tt->sg = NULL;
394 	}
395 }
396 
xe_bo_sg(struct xe_bo * bo)397 struct sg_table *xe_bo_sg(struct xe_bo *bo)
398 {
399 	struct ttm_tt *tt = bo->ttm.ttm;
400 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
401 
402 	return xe_tt->sg;
403 }
404 
405 /*
406  * Account ttm pages against the device shrinker's shrinkable and
407  * purgeable counts.
408  */
xe_ttm_tt_account_add(struct xe_device * xe,struct ttm_tt * tt)409 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
410 {
411 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
412 
413 	if (xe_tt->purgeable)
414 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
415 	else
416 		xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
417 }
418 
xe_ttm_tt_account_subtract(struct xe_device * xe,struct ttm_tt * tt)419 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
420 {
421 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
422 
423 	if (xe_tt->purgeable)
424 		xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
425 	else
426 		xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
427 }
428 
update_global_total_pages(struct ttm_device * ttm_dev,long num_pages)429 static void update_global_total_pages(struct ttm_device *ttm_dev,
430 				      long num_pages)
431 {
432 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
433 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
434 	u64 global_total_pages =
435 		atomic64_add_return(num_pages, &xe->global_total_pages);
436 
437 	trace_gpu_mem_total(xe->drm.primary->index, 0,
438 			    global_total_pages << PAGE_SHIFT);
439 #endif
440 }
441 
xe_ttm_tt_create(struct ttm_buffer_object * ttm_bo,u32 page_flags)442 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
443 				       u32 page_flags)
444 {
445 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
446 	struct xe_device *xe = xe_bo_device(bo);
447 	struct xe_ttm_tt *xe_tt;
448 	struct ttm_tt *tt;
449 	unsigned long extra_pages;
450 	enum ttm_caching caching = ttm_cached;
451 	int err;
452 
453 	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
454 	if (!xe_tt)
455 		return NULL;
456 
457 	tt = &xe_tt->ttm;
458 
459 	extra_pages = 0;
460 	if (xe_bo_needs_ccs_pages(bo))
461 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
462 					   PAGE_SIZE);
463 
464 	/*
465 	 * DGFX system memory is always WB / ttm_cached, since
466 	 * other caching modes are only supported on x86. DGFX
467 	 * GPU system memory accesses are always coherent with the
468 	 * CPU.
469 	 */
470 	if (!IS_DGFX(xe)) {
471 		switch (bo->cpu_caching) {
472 		case DRM_XE_GEM_CPU_CACHING_WC:
473 			caching = ttm_write_combined;
474 			break;
475 		default:
476 			caching = ttm_cached;
477 			break;
478 		}
479 
480 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
481 
482 		/*
483 		 * Display scanout is always non-coherent with the CPU cache.
484 		 *
485 		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
486 		 * non-coherent and require a CPU:WC mapping.
487 		 */
488 		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
489 		    (xe->info.graphics_verx100 >= 1270 &&
490 		     bo->flags & XE_BO_FLAG_PAGETABLE))
491 			caching = ttm_write_combined;
492 	}
493 
494 	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
495 		/*
496 		 * Valid only for internally-created buffers only, for
497 		 * which cpu_caching is never initialized.
498 		 */
499 		xe_assert(xe, bo->cpu_caching == 0);
500 		caching = ttm_uncached;
501 	}
502 
503 	if (ttm_bo->type != ttm_bo_type_sg)
504 		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
505 
506 	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
507 	if (err) {
508 		kfree(xe_tt);
509 		return NULL;
510 	}
511 
512 	if (ttm_bo->type != ttm_bo_type_sg) {
513 		err = ttm_tt_setup_backup(tt);
514 		if (err) {
515 			ttm_tt_fini(tt);
516 			kfree(xe_tt);
517 			return NULL;
518 		}
519 	}
520 
521 	return tt;
522 }
523 
xe_ttm_tt_populate(struct ttm_device * ttm_dev,struct ttm_tt * tt,struct ttm_operation_ctx * ctx)524 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
525 			      struct ttm_operation_ctx *ctx)
526 {
527 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
528 	int err;
529 
530 	/*
531 	 * dma-bufs are not populated with pages, and the dma-
532 	 * addresses are set up when moved to XE_PL_TT.
533 	 */
534 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
535 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
536 		return 0;
537 
538 	if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
539 		err = ttm_tt_restore(ttm_dev, tt, ctx);
540 	} else {
541 		ttm_tt_clear_backed_up(tt);
542 		err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
543 	}
544 	if (err)
545 		return err;
546 
547 	xe_tt->purgeable = false;
548 	xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
549 	update_global_total_pages(ttm_dev, tt->num_pages);
550 
551 	return 0;
552 }
553 
xe_ttm_tt_unpopulate(struct ttm_device * ttm_dev,struct ttm_tt * tt)554 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
555 {
556 	struct xe_device *xe = ttm_to_xe_device(ttm_dev);
557 
558 	if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
559 	    !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
560 		return;
561 
562 	xe_tt_unmap_sg(xe, tt);
563 
564 	ttm_pool_free(&ttm_dev->pool, tt);
565 	xe_ttm_tt_account_subtract(xe, tt);
566 	update_global_total_pages(ttm_dev, -(long)tt->num_pages);
567 }
568 
xe_ttm_tt_destroy(struct ttm_device * ttm_dev,struct ttm_tt * tt)569 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
570 {
571 	ttm_tt_fini(tt);
572 	kfree(tt);
573 }
574 
xe_ttm_resource_visible(struct ttm_resource * mem)575 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
576 {
577 	struct xe_ttm_vram_mgr_resource *vres =
578 		to_xe_ttm_vram_mgr_resource(mem);
579 
580 	return vres->used_visible_size == mem->size;
581 }
582 
xe_ttm_io_mem_reserve(struct ttm_device * bdev,struct ttm_resource * mem)583 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
584 				 struct ttm_resource *mem)
585 {
586 	struct xe_device *xe = ttm_to_xe_device(bdev);
587 
588 	switch (mem->mem_type) {
589 	case XE_PL_SYSTEM:
590 	case XE_PL_TT:
591 		return 0;
592 	case XE_PL_VRAM0:
593 	case XE_PL_VRAM1: {
594 		struct xe_vram_region *vram = res_to_mem_region(mem);
595 
596 		if (!xe_ttm_resource_visible(mem))
597 			return -EINVAL;
598 
599 		mem->bus.offset = mem->start << PAGE_SHIFT;
600 
601 		if (vram->mapping &&
602 		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
603 			mem->bus.addr = (u8 __force *)vram->mapping +
604 				mem->bus.offset;
605 
606 		mem->bus.offset += vram->io_start;
607 		mem->bus.is_iomem = true;
608 
609 #if  !IS_ENABLED(CONFIG_X86)
610 		mem->bus.caching = ttm_write_combined;
611 #endif
612 		return 0;
613 	} case XE_PL_STOLEN:
614 		return xe_ttm_stolen_io_mem_reserve(xe, mem);
615 	default:
616 		return -EINVAL;
617 	}
618 }
619 
xe_bo_trigger_rebind(struct xe_device * xe,struct xe_bo * bo,const struct ttm_operation_ctx * ctx)620 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
621 				const struct ttm_operation_ctx *ctx)
622 {
623 	struct dma_resv_iter cursor;
624 	struct dma_fence *fence;
625 	struct drm_gem_object *obj = &bo->ttm.base;
626 	struct drm_gpuvm_bo *vm_bo;
627 	bool idle = false;
628 	int ret = 0;
629 
630 	dma_resv_assert_held(bo->ttm.base.resv);
631 
632 	if (!list_empty(&bo->ttm.base.gpuva.list)) {
633 		dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
634 				    DMA_RESV_USAGE_BOOKKEEP);
635 		dma_resv_for_each_fence_unlocked(&cursor, fence)
636 			dma_fence_enable_sw_signaling(fence);
637 		dma_resv_iter_end(&cursor);
638 	}
639 
640 	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
641 		struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
642 		struct drm_gpuva *gpuva;
643 
644 		if (!xe_vm_in_fault_mode(vm)) {
645 			drm_gpuvm_bo_evict(vm_bo, true);
646 			continue;
647 		}
648 
649 		if (!idle) {
650 			long timeout;
651 
652 			if (ctx->no_wait_gpu &&
653 			    !dma_resv_test_signaled(bo->ttm.base.resv,
654 						    DMA_RESV_USAGE_BOOKKEEP))
655 				return -EBUSY;
656 
657 			timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
658 							DMA_RESV_USAGE_BOOKKEEP,
659 							ctx->interruptible,
660 							MAX_SCHEDULE_TIMEOUT);
661 			if (!timeout)
662 				return -ETIME;
663 			if (timeout < 0)
664 				return timeout;
665 
666 			idle = true;
667 		}
668 
669 		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
670 			struct xe_vma *vma = gpuva_to_vma(gpuva);
671 
672 			trace_xe_vma_evict(vma);
673 			ret = xe_vm_invalidate_vma(vma);
674 			if (XE_WARN_ON(ret))
675 				return ret;
676 		}
677 	}
678 
679 	return ret;
680 }
681 
682 /*
683  * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
684  * Note that unmapping the attachment is deferred to the next
685  * map_attachment time, or to bo destroy (after idling) whichever comes first.
686  * This is to avoid syncing before unmap_attachment(), assuming that the
687  * caller relies on idling the reservation object before moving the
688  * backing store out. Should that assumption not hold, then we will be able
689  * to unconditionally call unmap_attachment() when moving out to system.
690  */
xe_bo_move_dmabuf(struct ttm_buffer_object * ttm_bo,struct ttm_resource * new_res)691 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
692 			     struct ttm_resource *new_res)
693 {
694 	struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
695 	struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
696 					       ttm);
697 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
698 	bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
699 	struct sg_table *sg;
700 
701 	xe_assert(xe, attach);
702 	xe_assert(xe, ttm_bo->ttm);
703 
704 	if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
705 	    ttm_bo->sg) {
706 		dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
707 				      false, MAX_SCHEDULE_TIMEOUT);
708 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
709 		ttm_bo->sg = NULL;
710 	}
711 
712 	if (new_res->mem_type == XE_PL_SYSTEM)
713 		goto out;
714 
715 	if (ttm_bo->sg) {
716 		dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
717 		ttm_bo->sg = NULL;
718 	}
719 
720 	sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
721 	if (IS_ERR(sg))
722 		return PTR_ERR(sg);
723 
724 	ttm_bo->sg = sg;
725 	xe_tt->sg = sg;
726 
727 out:
728 	ttm_bo_move_null(ttm_bo, new_res);
729 
730 	return 0;
731 }
732 
733 /**
734  * xe_bo_move_notify - Notify subsystems of a pending move
735  * @bo: The buffer object
736  * @ctx: The struct ttm_operation_ctx controlling locking and waits.
737  *
738  * This function notifies subsystems of an upcoming buffer move.
739  * Upon receiving such a notification, subsystems should schedule
740  * halting access to the underlying pages and optionally add a fence
741  * to the buffer object's dma_resv object, that signals when access is
742  * stopped. The caller will wait on all dma_resv fences before
743  * starting the move.
744  *
745  * A subsystem may commence access to the object after obtaining
746  * bindings to the new backing memory under the object lock.
747  *
748  * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
749  * negative error code on error.
750  */
xe_bo_move_notify(struct xe_bo * bo,const struct ttm_operation_ctx * ctx)751 static int xe_bo_move_notify(struct xe_bo *bo,
752 			     const struct ttm_operation_ctx *ctx)
753 {
754 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
755 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
756 	struct ttm_resource *old_mem = ttm_bo->resource;
757 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
758 	int ret;
759 
760 	/*
761 	 * If this starts to call into many components, consider
762 	 * using a notification chain here.
763 	 */
764 
765 	if (xe_bo_is_pinned(bo))
766 		return -EINVAL;
767 
768 	xe_bo_vunmap(bo);
769 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
770 	if (ret)
771 		return ret;
772 
773 	/* Don't call move_notify() for imported dma-bufs. */
774 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
775 		dma_buf_move_notify(ttm_bo->base.dma_buf);
776 
777 	/*
778 	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
779 	 * so if we moved from VRAM make sure to unlink this from the userfault
780 	 * tracking.
781 	 */
782 	if (mem_type_is_vram(old_mem_type)) {
783 		mutex_lock(&xe->mem_access.vram_userfault.lock);
784 		if (!list_empty(&bo->vram_userfault_link))
785 			list_del_init(&bo->vram_userfault_link);
786 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
787 	}
788 
789 	return 0;
790 }
791 
xe_bo_move(struct ttm_buffer_object * ttm_bo,bool evict,struct ttm_operation_ctx * ctx,struct ttm_resource * new_mem,struct ttm_place * hop)792 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
793 		      struct ttm_operation_ctx *ctx,
794 		      struct ttm_resource *new_mem,
795 		      struct ttm_place *hop)
796 {
797 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
798 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
799 	struct ttm_resource *old_mem = ttm_bo->resource;
800 	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
801 	struct ttm_tt *ttm = ttm_bo->ttm;
802 	struct xe_migrate *migrate = NULL;
803 	struct dma_fence *fence;
804 	bool move_lacks_source;
805 	bool tt_has_data;
806 	bool needs_clear;
807 	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
808 				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
809 	int ret = 0;
810 
811 	/* Bo creation path, moving to system or TT. */
812 	if ((!old_mem && ttm) && !handle_system_ccs) {
813 		if (new_mem->mem_type == XE_PL_TT)
814 			ret = xe_tt_map_sg(xe, ttm);
815 		if (!ret)
816 			ttm_bo_move_null(ttm_bo, new_mem);
817 		goto out;
818 	}
819 
820 	if (ttm_bo->type == ttm_bo_type_sg) {
821 		if (new_mem->mem_type == XE_PL_SYSTEM)
822 			ret = xe_bo_move_notify(bo, ctx);
823 		if (!ret)
824 			ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
825 		return ret;
826 	}
827 
828 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
829 
830 	move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
831 					 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
832 
833 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
834 		(!ttm && ttm_bo->type == ttm_bo_type_device);
835 
836 	if (new_mem->mem_type == XE_PL_TT) {
837 		ret = xe_tt_map_sg(xe, ttm);
838 		if (ret)
839 			goto out;
840 	}
841 
842 	if ((move_lacks_source && !needs_clear)) {
843 		ttm_bo_move_null(ttm_bo, new_mem);
844 		goto out;
845 	}
846 
847 	if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
848 	    new_mem->mem_type == XE_PL_SYSTEM) {
849 		ret = xe_svm_bo_evict(bo);
850 		if (!ret) {
851 			drm_dbg(&xe->drm, "Evict system allocator BO success\n");
852 			ttm_bo_move_null(ttm_bo, new_mem);
853 		} else {
854 			drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
855 				ERR_PTR(ret));
856 		}
857 
858 		goto out;
859 	}
860 
861 	if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
862 		ttm_bo_move_null(ttm_bo, new_mem);
863 		goto out;
864 	}
865 
866 	/*
867 	 * Failed multi-hop where the old_mem is still marked as
868 	 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
869 	 */
870 	if (old_mem_type == XE_PL_TT &&
871 	    new_mem->mem_type == XE_PL_TT) {
872 		ttm_bo_move_null(ttm_bo, new_mem);
873 		goto out;
874 	}
875 
876 	if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
877 		ret = xe_bo_move_notify(bo, ctx);
878 		if (ret)
879 			goto out;
880 	}
881 
882 	if (old_mem_type == XE_PL_TT &&
883 	    new_mem->mem_type == XE_PL_SYSTEM) {
884 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
885 						     DMA_RESV_USAGE_BOOKKEEP,
886 						     false,
887 						     MAX_SCHEDULE_TIMEOUT);
888 		if (timeout < 0) {
889 			ret = timeout;
890 			goto out;
891 		}
892 
893 		if (!handle_system_ccs) {
894 			ttm_bo_move_null(ttm_bo, new_mem);
895 			goto out;
896 		}
897 	}
898 
899 	if (!move_lacks_source &&
900 	    ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
901 	     (mem_type_is_vram(old_mem_type) &&
902 	      new_mem->mem_type == XE_PL_SYSTEM))) {
903 		hop->fpfn = 0;
904 		hop->lpfn = 0;
905 		hop->mem_type = XE_PL_TT;
906 		hop->flags = TTM_PL_FLAG_TEMPORARY;
907 		ret = -EMULTIHOP;
908 		goto out;
909 	}
910 
911 	if (bo->tile)
912 		migrate = bo->tile->migrate;
913 	else if (resource_is_vram(new_mem))
914 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
915 	else if (mem_type_is_vram(old_mem_type))
916 		migrate = mem_type_to_migrate(xe, old_mem_type);
917 	else
918 		migrate = xe->tiles[0].migrate;
919 
920 	xe_assert(xe, migrate);
921 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
922 	if (xe_rpm_reclaim_safe(xe)) {
923 		/*
924 		 * We might be called through swapout in the validation path of
925 		 * another TTM device, so acquire rpm here.
926 		 */
927 		xe_pm_runtime_get(xe);
928 	} else {
929 		drm_WARN_ON(&xe->drm, handle_system_ccs);
930 		xe_pm_runtime_get_noresume(xe);
931 	}
932 
933 	if (move_lacks_source) {
934 		u32 flags = 0;
935 
936 		if (mem_type_is_vram(new_mem->mem_type))
937 			flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
938 		else if (handle_system_ccs)
939 			flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
940 
941 		fence = xe_migrate_clear(migrate, bo, new_mem, flags);
942 	} else {
943 		fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
944 					handle_system_ccs);
945 	}
946 	if (IS_ERR(fence)) {
947 		ret = PTR_ERR(fence);
948 		xe_pm_runtime_put(xe);
949 		goto out;
950 	}
951 	if (!move_lacks_source) {
952 		ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
953 						new_mem);
954 		if (ret) {
955 			dma_fence_wait(fence, false);
956 			ttm_bo_move_null(ttm_bo, new_mem);
957 			ret = 0;
958 		}
959 	} else {
960 		/*
961 		 * ttm_bo_move_accel_cleanup() may blow up if
962 		 * bo->resource == NULL, so just attach the
963 		 * fence and set the new resource.
964 		 */
965 		dma_resv_add_fence(ttm_bo->base.resv, fence,
966 				   DMA_RESV_USAGE_KERNEL);
967 		ttm_bo_move_null(ttm_bo, new_mem);
968 	}
969 
970 	dma_fence_put(fence);
971 	xe_pm_runtime_put(xe);
972 
973 	/*
974 	 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
975 	 * BBs from BO as it is no longer needed.
976 	 */
977 	if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
978 	    new_mem->mem_type == XE_PL_SYSTEM)
979 		xe_sriov_vf_ccs_detach_bo(bo);
980 
981 	if (IS_VF_CCS_READY(xe) &&
982 	    ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
983 	     (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
984 	    handle_system_ccs)
985 		ret = xe_sriov_vf_ccs_attach_bo(bo);
986 
987 out:
988 	if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
989 	    ttm_bo->ttm) {
990 		long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
991 						     DMA_RESV_USAGE_KERNEL,
992 						     false,
993 						     MAX_SCHEDULE_TIMEOUT);
994 		if (timeout < 0)
995 			ret = timeout;
996 
997 		if (IS_VF_CCS_READY(xe))
998 			xe_sriov_vf_ccs_detach_bo(bo);
999 
1000 		xe_tt_unmap_sg(xe, ttm_bo->ttm);
1001 	}
1002 
1003 	return ret;
1004 }
1005 
xe_bo_shrink_purge(struct ttm_operation_ctx * ctx,struct ttm_buffer_object * bo,unsigned long * scanned)1006 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1007 			       struct ttm_buffer_object *bo,
1008 			       unsigned long *scanned)
1009 {
1010 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1011 	long lret;
1012 
1013 	/* Fake move to system, without copying data. */
1014 	if (bo->resource->mem_type != XE_PL_SYSTEM) {
1015 		struct ttm_resource *new_resource;
1016 
1017 		lret = ttm_bo_wait_ctx(bo, ctx);
1018 		if (lret)
1019 			return lret;
1020 
1021 		lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1022 		if (lret)
1023 			return lret;
1024 
1025 		xe_tt_unmap_sg(xe, bo->ttm);
1026 		ttm_bo_move_null(bo, new_resource);
1027 	}
1028 
1029 	*scanned += bo->ttm->num_pages;
1030 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1031 			     {.purge = true,
1032 			      .writeback = false,
1033 			      .allow_move = false});
1034 
1035 	if (lret > 0)
1036 		xe_ttm_tt_account_subtract(xe, bo->ttm);
1037 
1038 	return lret;
1039 }
1040 
1041 static bool
xe_bo_eviction_valuable(struct ttm_buffer_object * bo,const struct ttm_place * place)1042 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1043 {
1044 	struct drm_gpuvm_bo *vm_bo;
1045 
1046 	if (!ttm_bo_eviction_valuable(bo, place))
1047 		return false;
1048 
1049 	if (!xe_bo_is_xe_bo(bo))
1050 		return true;
1051 
1052 	drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1053 		if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1054 			return false;
1055 	}
1056 
1057 	return true;
1058 }
1059 
1060 /**
1061  * xe_bo_shrink() - Try to shrink an xe bo.
1062  * @ctx: The struct ttm_operation_ctx used for shrinking.
1063  * @bo: The TTM buffer object whose pages to shrink.
1064  * @flags: Flags governing the shrink behaviour.
1065  * @scanned: Pointer to a counter of the number of pages
1066  * attempted to shrink.
1067  *
1068  * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1069  * Note that we need to be able to handle also non xe bos
1070  * (ghost bos), but only if the struct ttm_tt is embedded in
1071  * a struct xe_ttm_tt. When the function attempts to shrink
1072  * the pages of a buffer object, The value pointed to by @scanned
1073  * is updated.
1074  *
1075  * Return: The number of pages shrunken or purged, or negative error
1076  * code on failure.
1077  */
xe_bo_shrink(struct ttm_operation_ctx * ctx,struct ttm_buffer_object * bo,const struct xe_bo_shrink_flags flags,unsigned long * scanned)1078 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1079 		  const struct xe_bo_shrink_flags flags,
1080 		  unsigned long *scanned)
1081 {
1082 	struct ttm_tt *tt = bo->ttm;
1083 	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1084 	struct ttm_place place = {.mem_type = bo->resource->mem_type};
1085 	struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1086 	struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1087 	bool needs_rpm;
1088 	long lret = 0L;
1089 
1090 	if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1091 	    (flags.purge && !xe_tt->purgeable))
1092 		return -EBUSY;
1093 
1094 	if (!xe_bo_eviction_valuable(bo, &place))
1095 		return -EBUSY;
1096 
1097 	if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1098 		return xe_bo_shrink_purge(ctx, bo, scanned);
1099 
1100 	if (xe_tt->purgeable) {
1101 		if (bo->resource->mem_type != XE_PL_SYSTEM)
1102 			lret = xe_bo_move_notify(xe_bo, ctx);
1103 		if (!lret)
1104 			lret = xe_bo_shrink_purge(ctx, bo, scanned);
1105 		goto out_unref;
1106 	}
1107 
1108 	/* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1109 	needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1110 		     xe_bo_needs_ccs_pages(xe_bo));
1111 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1112 		goto out_unref;
1113 
1114 	*scanned += tt->num_pages;
1115 	lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1116 			     {.purge = false,
1117 			      .writeback = flags.writeback,
1118 			      .allow_move = true});
1119 	if (needs_rpm)
1120 		xe_pm_runtime_put(xe);
1121 
1122 	if (lret > 0)
1123 		xe_ttm_tt_account_subtract(xe, tt);
1124 
1125 out_unref:
1126 	xe_bo_put(xe_bo);
1127 
1128 	return lret;
1129 }
1130 
1131 /**
1132  * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1133  * up in system memory.
1134  * @bo: The buffer object to prepare.
1135  *
1136  * On successful completion, the object backup pages are allocated. Expectation
1137  * is that this is called from the PM notifier, prior to suspend/hibernation.
1138  *
1139  * Return: 0 on success. Negative error code on failure.
1140  */
xe_bo_notifier_prepare_pinned(struct xe_bo * bo)1141 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1142 {
1143 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1144 	struct xe_validation_ctx ctx;
1145 	struct drm_exec exec;
1146 	struct xe_bo *backup;
1147 	int ret = 0;
1148 
1149 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1150 		ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1151 		drm_exec_retry_on_contention(&exec);
1152 		xe_assert(xe, !ret);
1153 		xe_assert(xe, !bo->backup_obj);
1154 
1155 		/*
1156 		 * Since this is called from the PM notifier we might have raced with
1157 		 * someone unpinning this after we dropped the pinned list lock and
1158 		 * grabbing the above bo lock.
1159 		 */
1160 		if (!xe_bo_is_pinned(bo))
1161 			break;
1162 
1163 		if (!xe_bo_is_vram(bo))
1164 			break;
1165 
1166 		if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1167 			break;
1168 
1169 		backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1170 					   DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1171 					   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1172 					   XE_BO_FLAG_PINNED, &exec);
1173 		if (IS_ERR(backup)) {
1174 			drm_exec_retry_on_contention(&exec);
1175 			ret = PTR_ERR(backup);
1176 			xe_validation_retry_on_oom(&ctx, &ret);
1177 			break;
1178 		}
1179 
1180 		backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1181 		ttm_bo_pin(&backup->ttm);
1182 		bo->backup_obj = backup;
1183 	}
1184 
1185 	return ret;
1186 }
1187 
1188 /**
1189  * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1190  * @bo: The buffer object to undo the prepare for.
1191  *
1192  * Always returns 0. The backup object is removed, if still present. Expectation
1193  * it that this called from the PM notifier when undoing the prepare step.
1194  *
1195  * Return: Always returns 0.
1196  */
xe_bo_notifier_unprepare_pinned(struct xe_bo * bo)1197 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1198 {
1199 	xe_bo_lock(bo, false);
1200 	if (bo->backup_obj) {
1201 		ttm_bo_unpin(&bo->backup_obj->ttm);
1202 		xe_bo_put(bo->backup_obj);
1203 		bo->backup_obj = NULL;
1204 	}
1205 	xe_bo_unlock(bo);
1206 
1207 	return 0;
1208 }
1209 
xe_bo_evict_pinned_copy(struct xe_bo * bo,struct xe_bo * backup)1210 static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
1211 {
1212 	struct xe_device *xe = xe_bo_device(bo);
1213 	bool unmap = false;
1214 	int ret = 0;
1215 
1216 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1217 		struct xe_migrate *migrate;
1218 		struct dma_fence *fence;
1219 
1220 		if (bo->tile)
1221 			migrate = bo->tile->migrate;
1222 		else
1223 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1224 
1225 		xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
1226 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1227 		if (ret)
1228 			goto out_backup;
1229 
1230 		fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1231 					backup->ttm.resource, false);
1232 		if (IS_ERR(fence)) {
1233 			ret = PTR_ERR(fence);
1234 			goto out_backup;
1235 		}
1236 
1237 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1238 				   DMA_RESV_USAGE_KERNEL);
1239 		dma_fence_put(fence);
1240 	} else {
1241 		ret = xe_bo_vmap(backup);
1242 		if (ret)
1243 			goto out_backup;
1244 
1245 		if (iosys_map_is_null(&bo->vmap)) {
1246 			ret = xe_bo_vmap(bo);
1247 			if (ret)
1248 				goto out_vunmap;
1249 			unmap = true;
1250 		}
1251 
1252 		xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1253 				   xe_bo_size(bo));
1254 	}
1255 
1256 	if (!bo->backup_obj)
1257 		bo->backup_obj = backup;
1258 out_vunmap:
1259 	xe_bo_vunmap(backup);
1260 out_backup:
1261 	if (unmap)
1262 		xe_bo_vunmap(bo);
1263 
1264 	return ret;
1265 }
1266 
1267 /**
1268  * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1269  * @bo: The buffer object to move.
1270  *
1271  * On successful completion, the object memory will be moved to system memory.
1272  *
1273  * This is needed to for special handling of pinned VRAM object during
1274  * suspend-resume.
1275  *
1276  * Return: 0 on success. Negative error code on failure.
1277  */
xe_bo_evict_pinned(struct xe_bo * bo)1278 int xe_bo_evict_pinned(struct xe_bo *bo)
1279 {
1280 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1281 	struct xe_validation_ctx ctx;
1282 	struct drm_exec exec;
1283 	struct xe_bo *backup = bo->backup_obj;
1284 	bool backup_created = false;
1285 	int ret = 0;
1286 
1287 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1288 		ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1289 		drm_exec_retry_on_contention(&exec);
1290 		xe_assert(xe, !ret);
1291 
1292 		if (WARN_ON(!bo->ttm.resource)) {
1293 			ret = -EINVAL;
1294 			break;
1295 		}
1296 
1297 		if (WARN_ON(!xe_bo_is_pinned(bo))) {
1298 			ret = -EINVAL;
1299 			break;
1300 		}
1301 
1302 		if (!xe_bo_is_vram(bo))
1303 			break;
1304 
1305 		if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1306 			break;
1307 
1308 		if (!backup) {
1309 			backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
1310 						   xe_bo_size(bo),
1311 						   DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1312 						   XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1313 						   XE_BO_FLAG_PINNED, &exec);
1314 			if (IS_ERR(backup)) {
1315 				drm_exec_retry_on_contention(&exec);
1316 				ret = PTR_ERR(backup);
1317 				xe_validation_retry_on_oom(&ctx, &ret);
1318 				break;
1319 			}
1320 			backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1321 			backup_created = true;
1322 		}
1323 
1324 		ret = xe_bo_evict_pinned_copy(bo, backup);
1325 	}
1326 
1327 	if (ret && backup_created)
1328 		xe_bo_put(backup);
1329 
1330 	return ret;
1331 }
1332 
1333 /**
1334  * xe_bo_restore_pinned() - Restore a pinned VRAM object
1335  * @bo: The buffer object to move.
1336  *
1337  * On successful completion, the object memory will be moved back to VRAM.
1338  *
1339  * This is needed to for special handling of pinned VRAM object during
1340  * suspend-resume.
1341  *
1342  * Return: 0 on success. Negative error code on failure.
1343  */
xe_bo_restore_pinned(struct xe_bo * bo)1344 int xe_bo_restore_pinned(struct xe_bo *bo)
1345 {
1346 	struct ttm_operation_ctx ctx = {
1347 		.interruptible = false,
1348 		.gfp_retry_mayfail = false,
1349 	};
1350 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1351 	struct xe_bo *backup = bo->backup_obj;
1352 	bool unmap = false;
1353 	int ret;
1354 
1355 	if (!backup)
1356 		return 0;
1357 
1358 	xe_bo_lock(bo, false);
1359 
1360 	if (!xe_bo_is_pinned(backup)) {
1361 		ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1362 		if (ret)
1363 			goto out_unlock_bo;
1364 	}
1365 
1366 	if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1367 		struct xe_migrate *migrate;
1368 		struct dma_fence *fence;
1369 
1370 		if (bo->tile)
1371 			migrate = bo->tile->migrate;
1372 		else
1373 			migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1374 
1375 		ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1376 		if (ret)
1377 			goto out_unlock_bo;
1378 
1379 		fence = xe_migrate_copy(migrate, backup, bo,
1380 					backup->ttm.resource, bo->ttm.resource,
1381 					false);
1382 		if (IS_ERR(fence)) {
1383 			ret = PTR_ERR(fence);
1384 			goto out_unlock_bo;
1385 		}
1386 
1387 		dma_resv_add_fence(bo->ttm.base.resv, fence,
1388 				   DMA_RESV_USAGE_KERNEL);
1389 		dma_fence_put(fence);
1390 	} else {
1391 		ret = xe_bo_vmap(backup);
1392 		if (ret)
1393 			goto out_unlock_bo;
1394 
1395 		if (iosys_map_is_null(&bo->vmap)) {
1396 			ret = xe_bo_vmap(bo);
1397 			if (ret)
1398 				goto out_backup;
1399 			unmap = true;
1400 		}
1401 
1402 		xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1403 				 xe_bo_size(bo));
1404 	}
1405 
1406 	bo->backup_obj = NULL;
1407 
1408 out_backup:
1409 	xe_bo_vunmap(backup);
1410 	if (!bo->backup_obj) {
1411 		if (xe_bo_is_pinned(backup))
1412 			ttm_bo_unpin(&backup->ttm);
1413 		xe_bo_put(backup);
1414 	}
1415 out_unlock_bo:
1416 	if (unmap)
1417 		xe_bo_vunmap(bo);
1418 	xe_bo_unlock(bo);
1419 	return ret;
1420 }
1421 
xe_bo_dma_unmap_pinned(struct xe_bo * bo)1422 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1423 {
1424 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
1425 	struct ttm_tt *tt = ttm_bo->ttm;
1426 
1427 	if (tt) {
1428 		struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1429 
1430 		if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1431 			dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1432 						 ttm_bo->sg,
1433 						 DMA_BIDIRECTIONAL);
1434 			ttm_bo->sg = NULL;
1435 			xe_tt->sg = NULL;
1436 		} else if (xe_tt->sg) {
1437 			dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1438 					  xe_tt->sg,
1439 					  DMA_BIDIRECTIONAL, 0);
1440 			sg_free_table(xe_tt->sg);
1441 			xe_tt->sg = NULL;
1442 		}
1443 	}
1444 
1445 	return 0;
1446 }
1447 
xe_ttm_io_mem_pfn(struct ttm_buffer_object * ttm_bo,unsigned long page_offset)1448 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1449 				       unsigned long page_offset)
1450 {
1451 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1452 	struct xe_res_cursor cursor;
1453 	struct xe_vram_region *vram;
1454 
1455 	if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1456 		return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1457 
1458 	vram = res_to_mem_region(ttm_bo->resource);
1459 	xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1460 	return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1461 }
1462 
1463 static void __xe_bo_vunmap(struct xe_bo *bo);
1464 
1465 /*
1466  * TODO: Move this function to TTM so we don't rely on how TTM does its
1467  * locking, thereby abusing TTM internals.
1468  */
xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object * ttm_bo)1469 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1470 {
1471 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1472 	bool locked;
1473 
1474 	xe_assert(xe, !kref_read(&ttm_bo->kref));
1475 
1476 	/*
1477 	 * We can typically only race with TTM trylocking under the
1478 	 * lru_lock, which will immediately be unlocked again since
1479 	 * the ttm_bo refcount is zero at this point. So trylocking *should*
1480 	 * always succeed here, as long as we hold the lru lock.
1481 	 */
1482 	spin_lock(&ttm_bo->bdev->lru_lock);
1483 	locked = dma_resv_trylock(ttm_bo->base.resv);
1484 	spin_unlock(&ttm_bo->bdev->lru_lock);
1485 	xe_assert(xe, locked);
1486 
1487 	return locked;
1488 }
1489 
xe_ttm_bo_release_notify(struct ttm_buffer_object * ttm_bo)1490 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1491 {
1492 	struct dma_resv_iter cursor;
1493 	struct dma_fence *fence;
1494 	struct dma_fence *replacement = NULL;
1495 	struct xe_bo *bo;
1496 
1497 	if (!xe_bo_is_xe_bo(ttm_bo))
1498 		return;
1499 
1500 	bo = ttm_to_xe_bo(ttm_bo);
1501 	xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1502 
1503 	/*
1504 	 * Corner case where TTM fails to allocate memory and this BOs resv
1505 	 * still points the VMs resv
1506 	 */
1507 	if (ttm_bo->base.resv != &ttm_bo->base._resv)
1508 		return;
1509 
1510 	if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1511 		return;
1512 
1513 	/*
1514 	 * Scrub the preempt fences if any. The unbind fence is already
1515 	 * attached to the resv.
1516 	 * TODO: Don't do this for external bos once we scrub them after
1517 	 * unbind.
1518 	 */
1519 	dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1520 				DMA_RESV_USAGE_BOOKKEEP, fence) {
1521 		if (xe_fence_is_xe_preempt(fence) &&
1522 		    !dma_fence_is_signaled(fence)) {
1523 			if (!replacement)
1524 				replacement = dma_fence_get_stub();
1525 
1526 			dma_resv_replace_fences(ttm_bo->base.resv,
1527 						fence->context,
1528 						replacement,
1529 						DMA_RESV_USAGE_BOOKKEEP);
1530 		}
1531 	}
1532 	dma_fence_put(replacement);
1533 
1534 	dma_resv_unlock(ttm_bo->base.resv);
1535 }
1536 
xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object * ttm_bo)1537 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1538 {
1539 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1540 
1541 	if (!xe_bo_is_xe_bo(ttm_bo))
1542 		return;
1543 
1544 	if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
1545 		xe_sriov_vf_ccs_detach_bo(bo);
1546 
1547 	/*
1548 	 * Object is idle and about to be destroyed. Release the
1549 	 * dma-buf attachment.
1550 	 */
1551 	if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1552 		struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1553 						       struct xe_ttm_tt, ttm);
1554 
1555 		dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1556 					 DMA_BIDIRECTIONAL);
1557 		ttm_bo->sg = NULL;
1558 		xe_tt->sg = NULL;
1559 	}
1560 }
1561 
xe_ttm_bo_purge(struct ttm_buffer_object * ttm_bo,struct ttm_operation_ctx * ctx)1562 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1563 {
1564 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1565 
1566 	if (ttm_bo->ttm) {
1567 		struct ttm_placement place = {};
1568 		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1569 
1570 		drm_WARN_ON(&xe->drm, ret);
1571 	}
1572 }
1573 
xe_ttm_bo_swap_notify(struct ttm_buffer_object * ttm_bo)1574 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1575 {
1576 	struct ttm_operation_ctx ctx = {
1577 		.interruptible = false,
1578 		.gfp_retry_mayfail = false,
1579 	};
1580 
1581 	if (ttm_bo->ttm) {
1582 		struct xe_ttm_tt *xe_tt =
1583 			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1584 
1585 		if (xe_tt->purgeable)
1586 			xe_ttm_bo_purge(ttm_bo, &ctx);
1587 	}
1588 }
1589 
xe_ttm_access_memory(struct ttm_buffer_object * ttm_bo,unsigned long offset,void * buf,int len,int write)1590 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1591 				unsigned long offset, void *buf, int len,
1592 				int write)
1593 {
1594 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1595 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1596 	struct iosys_map vmap;
1597 	struct xe_res_cursor cursor;
1598 	struct xe_vram_region *vram;
1599 	int bytes_left = len;
1600 	int err = 0;
1601 
1602 	xe_bo_assert_held(bo);
1603 	xe_device_assert_mem_access(xe);
1604 
1605 	if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1606 		return -EIO;
1607 
1608 	if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
1609 		struct xe_migrate *migrate =
1610 			mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1611 
1612 		err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1613 					       write);
1614 		goto out;
1615 	}
1616 
1617 	vram = res_to_mem_region(ttm_bo->resource);
1618 	xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1619 		     xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1620 
1621 	do {
1622 		unsigned long page_offset = (offset & ~PAGE_MASK);
1623 		int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1624 
1625 		iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1626 					  cursor.start);
1627 		if (write)
1628 			xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1629 		else
1630 			xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1631 
1632 		buf += byte_count;
1633 		offset += byte_count;
1634 		bytes_left -= byte_count;
1635 		if (bytes_left)
1636 			xe_res_next(&cursor, PAGE_SIZE);
1637 	} while (bytes_left);
1638 
1639 out:
1640 	return err ?: len;
1641 }
1642 
1643 const struct ttm_device_funcs xe_ttm_funcs = {
1644 	.ttm_tt_create = xe_ttm_tt_create,
1645 	.ttm_tt_populate = xe_ttm_tt_populate,
1646 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1647 	.ttm_tt_destroy = xe_ttm_tt_destroy,
1648 	.evict_flags = xe_evict_flags,
1649 	.move = xe_bo_move,
1650 	.io_mem_reserve = xe_ttm_io_mem_reserve,
1651 	.io_mem_pfn = xe_ttm_io_mem_pfn,
1652 	.access_memory = xe_ttm_access_memory,
1653 	.release_notify = xe_ttm_bo_release_notify,
1654 	.eviction_valuable = xe_bo_eviction_valuable,
1655 	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1656 	.swap_notify = xe_ttm_bo_swap_notify,
1657 };
1658 
xe_ttm_bo_destroy(struct ttm_buffer_object * ttm_bo)1659 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1660 {
1661 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1662 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1663 	struct xe_tile *tile;
1664 	u8 id;
1665 
1666 	if (bo->ttm.base.import_attach)
1667 		drm_prime_gem_destroy(&bo->ttm.base, NULL);
1668 	drm_gem_object_release(&bo->ttm.base);
1669 
1670 	xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1671 
1672 	for_each_tile(tile, xe, id)
1673 		if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1674 			xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1675 
1676 #ifdef CONFIG_PROC_FS
1677 	if (bo->client)
1678 		xe_drm_client_remove_bo(bo);
1679 #endif
1680 
1681 	if (bo->vm && xe_bo_is_user(bo))
1682 		xe_vm_put(bo->vm);
1683 
1684 	if (bo->parent_obj)
1685 		xe_bo_put(bo->parent_obj);
1686 
1687 	mutex_lock(&xe->mem_access.vram_userfault.lock);
1688 	if (!list_empty(&bo->vram_userfault_link))
1689 		list_del(&bo->vram_userfault_link);
1690 	mutex_unlock(&xe->mem_access.vram_userfault.lock);
1691 
1692 	kfree(bo);
1693 }
1694 
xe_gem_object_free(struct drm_gem_object * obj)1695 static void xe_gem_object_free(struct drm_gem_object *obj)
1696 {
1697 	/* Our BO reference counting scheme works as follows:
1698 	 *
1699 	 * The gem object kref is typically used throughout the driver,
1700 	 * and the gem object holds a ttm_buffer_object refcount, so
1701 	 * that when the last gem object reference is put, which is when
1702 	 * we end up in this function, we put also that ttm_buffer_object
1703 	 * refcount. Anything using gem interfaces is then no longer
1704 	 * allowed to access the object in a way that requires a gem
1705 	 * refcount, including locking the object.
1706 	 *
1707 	 * driver ttm callbacks is allowed to use the ttm_buffer_object
1708 	 * refcount directly if needed.
1709 	 */
1710 	__xe_bo_vunmap(gem_to_xe_bo(obj));
1711 	ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1712 }
1713 
xe_gem_object_close(struct drm_gem_object * obj,struct drm_file * file_priv)1714 static void xe_gem_object_close(struct drm_gem_object *obj,
1715 				struct drm_file *file_priv)
1716 {
1717 	struct xe_bo *bo = gem_to_xe_bo(obj);
1718 
1719 	if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1720 		xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1721 
1722 		xe_bo_lock(bo, false);
1723 		ttm_bo_set_bulk_move(&bo->ttm, NULL);
1724 		xe_bo_unlock(bo);
1725 	}
1726 }
1727 
should_migrate_to_smem(struct xe_bo * bo)1728 static bool should_migrate_to_smem(struct xe_bo *bo)
1729 {
1730 	/*
1731 	 * NOTE: The following atomic checks are platform-specific. For example,
1732 	 * if a device supports CXL atomics, these may not be necessary or
1733 	 * may behave differently.
1734 	 */
1735 
1736 	return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1737 	       bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1738 }
1739 
xe_bo_wait_usage_kernel(struct xe_bo * bo,struct ttm_operation_ctx * ctx)1740 static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
1741 {
1742 	long lerr;
1743 
1744 	if (ctx->no_wait_gpu)
1745 		return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
1746 			0 : -EBUSY;
1747 
1748 	lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
1749 				     ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
1750 	if (lerr < 0)
1751 		return lerr;
1752 	if (lerr == 0)
1753 		return -EBUSY;
1754 
1755 	return 0;
1756 }
1757 
1758 /* Populate the bo if swapped out, or migrate if the access mode requires that. */
xe_bo_fault_migrate(struct xe_bo * bo,struct ttm_operation_ctx * ctx,struct drm_exec * exec)1759 static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
1760 			       struct drm_exec *exec)
1761 {
1762 	struct ttm_buffer_object *tbo = &bo->ttm;
1763 	int err = 0;
1764 
1765 	if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
1766 		err = xe_bo_wait_usage_kernel(bo, ctx);
1767 		if (!err)
1768 			err = ttm_bo_populate(&bo->ttm, ctx);
1769 	} else if (should_migrate_to_smem(bo)) {
1770 		xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
1771 		err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
1772 	}
1773 
1774 	return err;
1775 }
1776 
1777 /* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
__xe_bo_cpu_fault(struct vm_fault * vmf,struct xe_device * xe,struct xe_bo * bo)1778 static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
1779 {
1780 	vm_fault_t ret;
1781 
1782 	trace_xe_bo_cpu_fault(bo);
1783 
1784 	ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1785 				       TTM_BO_VM_NUM_PREFAULT);
1786 	/*
1787 	 * When TTM is actually called to insert PTEs, ensure no blocking conditions
1788 	 * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
1789 	 */
1790 	xe_assert(xe, ret != VM_FAULT_RETRY);
1791 
1792 	if (ret == VM_FAULT_NOPAGE &&
1793 	    mem_type_is_vram(bo->ttm.resource->mem_type)) {
1794 		mutex_lock(&xe->mem_access.vram_userfault.lock);
1795 		if (list_empty(&bo->vram_userfault_link))
1796 			list_add(&bo->vram_userfault_link,
1797 				 &xe->mem_access.vram_userfault.list);
1798 		mutex_unlock(&xe->mem_access.vram_userfault.lock);
1799 	}
1800 
1801 	return ret;
1802 }
1803 
xe_err_to_fault_t(int err)1804 static vm_fault_t xe_err_to_fault_t(int err)
1805 {
1806 	switch (err) {
1807 	case 0:
1808 	case -EINTR:
1809 	case -ERESTARTSYS:
1810 	case -EAGAIN:
1811 		return VM_FAULT_NOPAGE;
1812 	case -ENOMEM:
1813 	case -ENOSPC:
1814 		return VM_FAULT_OOM;
1815 	default:
1816 		break;
1817 	}
1818 	return VM_FAULT_SIGBUS;
1819 }
1820 
xe_ttm_bo_is_imported(struct ttm_buffer_object * tbo)1821 static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
1822 {
1823 	dma_resv_assert_held(tbo->base.resv);
1824 
1825 	return tbo->ttm &&
1826 		(tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
1827 		TTM_TT_FLAG_EXTERNAL;
1828 }
1829 
xe_bo_cpu_fault_fastpath(struct vm_fault * vmf,struct xe_device * xe,struct xe_bo * bo,bool needs_rpm)1830 static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
1831 					   struct xe_bo *bo, bool needs_rpm)
1832 {
1833 	struct ttm_buffer_object *tbo = &bo->ttm;
1834 	vm_fault_t ret = VM_FAULT_RETRY;
1835 	struct xe_validation_ctx ctx;
1836 	struct ttm_operation_ctx tctx = {
1837 		.interruptible = true,
1838 		.no_wait_gpu = true,
1839 		.gfp_retry_mayfail = true,
1840 
1841 	};
1842 	int err;
1843 
1844 	if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1845 		return VM_FAULT_RETRY;
1846 
1847 	err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
1848 				     (struct xe_val_flags) {
1849 					     .interruptible = true,
1850 					     .no_block = true
1851 				     });
1852 	if (err)
1853 		goto out_pm;
1854 
1855 	if (!dma_resv_trylock(tbo->base.resv))
1856 		goto out_validation;
1857 
1858 	if (xe_ttm_bo_is_imported(tbo)) {
1859 		ret = VM_FAULT_SIGBUS;
1860 		drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
1861 		goto out_unlock;
1862 	}
1863 
1864 	err = xe_bo_fault_migrate(bo, &tctx, NULL);
1865 	if (err) {
1866 		/* Return VM_FAULT_RETRY on these errors. */
1867 		if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
1868 			ret = xe_err_to_fault_t(err);
1869 		goto out_unlock;
1870 	}
1871 
1872 	if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
1873 		ret = __xe_bo_cpu_fault(vmf, xe, bo);
1874 
1875 out_unlock:
1876 	dma_resv_unlock(tbo->base.resv);
1877 out_validation:
1878 	xe_validation_ctx_fini(&ctx);
1879 out_pm:
1880 	if (needs_rpm)
1881 		xe_pm_runtime_put(xe);
1882 
1883 	return ret;
1884 }
1885 
xe_bo_cpu_fault(struct vm_fault * vmf)1886 static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
1887 {
1888 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1889 	struct drm_device *ddev = tbo->base.dev;
1890 	struct xe_device *xe = to_xe_device(ddev);
1891 	struct xe_bo *bo = ttm_to_xe_bo(tbo);
1892 	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1893 	bool retry_after_wait = false;
1894 	struct xe_validation_ctx ctx;
1895 	struct drm_exec exec;
1896 	vm_fault_t ret;
1897 	int err = 0;
1898 	int idx;
1899 
1900 	if (!drm_dev_enter(&xe->drm, &idx))
1901 		return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1902 
1903 	ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
1904 	if (ret != VM_FAULT_RETRY)
1905 		goto out;
1906 
1907 	if (fault_flag_allow_retry_first(vmf->flags)) {
1908 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
1909 			goto out;
1910 		retry_after_wait = true;
1911 		xe_bo_get(bo);
1912 		mmap_read_unlock(vmf->vma->vm_mm);
1913 	} else {
1914 		ret = VM_FAULT_NOPAGE;
1915 	}
1916 
1917 	/*
1918 	 * The fastpath failed and we were not required to return and retry immediately.
1919 	 * We're now running in one of two modes:
1920 	 *
1921 	 * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
1922 	 * to resolve blocking waits. But we can't resolve the fault since the
1923 	 * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
1924 	 * should succeed. But it may fail since we drop the bo lock.
1925 	 *
1926 	 * 2) retry_after_wait == false: The fastpath failed, typically even after
1927 	 * a retry. Do whatever's necessary to resolve the fault.
1928 	 *
1929 	 * This construct is recommended to avoid excessive waits under the mmap_lock.
1930 	 */
1931 
1932 	if (needs_rpm)
1933 		xe_pm_runtime_get(xe);
1934 
1935 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1936 			    err) {
1937 		struct ttm_operation_ctx tctx = {
1938 			.interruptible = true,
1939 			.no_wait_gpu = false,
1940 			.gfp_retry_mayfail = retry_after_wait,
1941 		};
1942 
1943 		err = drm_exec_lock_obj(&exec, &tbo->base);
1944 		drm_exec_retry_on_contention(&exec);
1945 		if (err)
1946 			break;
1947 
1948 		if (xe_ttm_bo_is_imported(tbo)) {
1949 			err = -EFAULT;
1950 			drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
1951 			break;
1952 		}
1953 
1954 		err = xe_bo_fault_migrate(bo, &tctx, &exec);
1955 		if (err) {
1956 			drm_exec_retry_on_contention(&exec);
1957 			xe_validation_retry_on_oom(&ctx, &err);
1958 			break;
1959 		}
1960 
1961 		err = xe_bo_wait_usage_kernel(bo, &tctx);
1962 		if (err)
1963 			break;
1964 
1965 		if (!retry_after_wait)
1966 			ret = __xe_bo_cpu_fault(vmf, xe, bo);
1967 	}
1968 	/* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
1969 	if (err && !retry_after_wait)
1970 		ret = xe_err_to_fault_t(err);
1971 
1972 	if (needs_rpm)
1973 		xe_pm_runtime_put(xe);
1974 
1975 	if (retry_after_wait)
1976 		xe_bo_put(bo);
1977 out:
1978 	drm_dev_exit(idx);
1979 
1980 	return ret;
1981 }
1982 
xe_bo_vm_access(struct vm_area_struct * vma,unsigned long addr,void * buf,int len,int write)1983 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1984 			   void *buf, int len, int write)
1985 {
1986 	struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1987 	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1988 	struct xe_device *xe = xe_bo_device(bo);
1989 	int ret;
1990 
1991 	xe_pm_runtime_get(xe);
1992 	ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1993 	xe_pm_runtime_put(xe);
1994 
1995 	return ret;
1996 }
1997 
1998 /**
1999  * xe_bo_read() - Read from an xe_bo
2000  * @bo: The buffer object to read from.
2001  * @offset: The byte offset to start reading from.
2002  * @dst: Location to store the read.
2003  * @size: Size in bytes for the read.
2004  *
2005  * Read @size bytes from the @bo, starting from @offset, storing into @dst.
2006  *
2007  * Return: Zero on success, or negative error.
2008  */
xe_bo_read(struct xe_bo * bo,u64 offset,void * dst,int size)2009 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
2010 {
2011 	int ret;
2012 
2013 	ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
2014 	if (ret >= 0 && ret != size)
2015 		ret = -EIO;
2016 	else if (ret == size)
2017 		ret = 0;
2018 
2019 	return ret;
2020 }
2021 
2022 static const struct vm_operations_struct xe_gem_vm_ops = {
2023 	.fault = xe_bo_cpu_fault,
2024 	.open = ttm_bo_vm_open,
2025 	.close = ttm_bo_vm_close,
2026 	.access = xe_bo_vm_access,
2027 };
2028 
2029 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
2030 	.free = xe_gem_object_free,
2031 	.close = xe_gem_object_close,
2032 	.mmap = drm_gem_ttm_mmap,
2033 	.export = xe_gem_prime_export,
2034 	.vm_ops = &xe_gem_vm_ops,
2035 };
2036 
2037 /**
2038  * xe_bo_alloc - Allocate storage for a struct xe_bo
2039  *
2040  * This function is intended to allocate storage to be used for input
2041  * to __xe_bo_create_locked(), in the case a pointer to the bo to be
2042  * created is needed before the call to __xe_bo_create_locked().
2043  * If __xe_bo_create_locked ends up never to be called, then the
2044  * storage allocated with this function needs to be freed using
2045  * xe_bo_free().
2046  *
2047  * Return: A pointer to an uninitialized struct xe_bo on success,
2048  * ERR_PTR(-ENOMEM) on error.
2049  */
xe_bo_alloc(void)2050 struct xe_bo *xe_bo_alloc(void)
2051 {
2052 	struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
2053 
2054 	if (!bo)
2055 		return ERR_PTR(-ENOMEM);
2056 
2057 	return bo;
2058 }
2059 
2060 /**
2061  * xe_bo_free - Free storage allocated using xe_bo_alloc()
2062  * @bo: The buffer object storage.
2063  *
2064  * Refer to xe_bo_alloc() documentation for valid use-cases.
2065  */
xe_bo_free(struct xe_bo * bo)2066 void xe_bo_free(struct xe_bo *bo)
2067 {
2068 	kfree(bo);
2069 }
2070 
2071 /**
2072  * xe_bo_init_locked() - Initialize or create an xe_bo.
2073  * @xe: The xe device.
2074  * @bo: An already allocated buffer object or NULL
2075  * if the function should allocate a new one.
2076  * @tile: The tile to select for migration of this bo, and the tile used for
2077  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2078  * @resv: Pointer to a locked shared reservation object to use fo this bo,
2079  * or NULL for the xe_bo to use its own.
2080  * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
2081  * @size: The storage size to use for the bo.
2082  * @cpu_caching: The cpu caching used for system memory backing store.
2083  * @type: The TTM buffer object type.
2084  * @flags: XE_BO_FLAG_ flags.
2085  * @exec: The drm_exec transaction to use for exhaustive eviction.
2086  *
2087  * Initialize or create an xe buffer object. On failure, any allocated buffer
2088  * object passed in @bo will have been unreferenced.
2089  *
2090  * Return: The buffer object on success. Negative error pointer on failure.
2091  */
xe_bo_init_locked(struct xe_device * xe,struct xe_bo * bo,struct xe_tile * tile,struct dma_resv * resv,struct ttm_lru_bulk_move * bulk,size_t size,u16 cpu_caching,enum ttm_bo_type type,u32 flags,struct drm_exec * exec)2092 struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
2093 				struct xe_tile *tile, struct dma_resv *resv,
2094 				struct ttm_lru_bulk_move *bulk, size_t size,
2095 				u16 cpu_caching, enum ttm_bo_type type,
2096 				u32 flags, struct drm_exec *exec)
2097 {
2098 	struct ttm_operation_ctx ctx = {
2099 		.interruptible = true,
2100 		.no_wait_gpu = false,
2101 		.gfp_retry_mayfail = true,
2102 	};
2103 	struct ttm_placement *placement;
2104 	uint32_t alignment;
2105 	size_t aligned_size;
2106 	int err;
2107 
2108 	/* Only kernel objects should set GT */
2109 	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
2110 
2111 	if (XE_WARN_ON(!size)) {
2112 		xe_bo_free(bo);
2113 		return ERR_PTR(-EINVAL);
2114 	}
2115 
2116 	/* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
2117 	if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
2118 		return ERR_PTR(-EINVAL);
2119 
2120 	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
2121 	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
2122 	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
2123 	     (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
2124 		size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
2125 
2126 		aligned_size = ALIGN(size, align);
2127 		if (type != ttm_bo_type_device)
2128 			size = ALIGN(size, align);
2129 		flags |= XE_BO_FLAG_INTERNAL_64K;
2130 		alignment = align >> PAGE_SHIFT;
2131 	} else {
2132 		aligned_size = ALIGN(size, SZ_4K);
2133 		flags &= ~XE_BO_FLAG_INTERNAL_64K;
2134 		alignment = SZ_4K >> PAGE_SHIFT;
2135 	}
2136 
2137 	if (type == ttm_bo_type_device && aligned_size != size)
2138 		return ERR_PTR(-EINVAL);
2139 
2140 	if (!bo) {
2141 		bo = xe_bo_alloc();
2142 		if (IS_ERR(bo))
2143 			return bo;
2144 	}
2145 
2146 	bo->ccs_cleared = false;
2147 	bo->tile = tile;
2148 	bo->flags = flags;
2149 	bo->cpu_caching = cpu_caching;
2150 	bo->ttm.base.funcs = &xe_gem_object_funcs;
2151 	bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
2152 	INIT_LIST_HEAD(&bo->pinned_link);
2153 #ifdef CONFIG_PROC_FS
2154 	INIT_LIST_HEAD(&bo->client_link);
2155 #endif
2156 	INIT_LIST_HEAD(&bo->vram_userfault_link);
2157 
2158 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
2159 
2160 	if (resv) {
2161 		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
2162 		ctx.resv = resv;
2163 	}
2164 
2165 	xe_validation_assert_exec(xe, exec, &bo->ttm.base);
2166 	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
2167 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
2168 		if (WARN_ON(err)) {
2169 			xe_ttm_bo_destroy(&bo->ttm);
2170 			return ERR_PTR(err);
2171 		}
2172 	}
2173 
2174 	/* Defer populating type_sg bos */
2175 	placement = (type == ttm_bo_type_sg ||
2176 		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
2177 		&bo->placement;
2178 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
2179 				   placement, alignment,
2180 				   &ctx, NULL, resv, xe_ttm_bo_destroy);
2181 	if (err)
2182 		return ERR_PTR(err);
2183 
2184 	/*
2185 	 * The VRAM pages underneath are potentially still being accessed by the
2186 	 * GPU, as per async GPU clearing and async evictions. However TTM makes
2187 	 * sure to add any corresponding move/clear fences into the objects
2188 	 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
2189 	 *
2190 	 * For KMD internal buffers we don't care about GPU clearing, however we
2191 	 * still need to handle async evictions, where the VRAM is still being
2192 	 * accessed by the GPU. Most internal callers are not expecting this,
2193 	 * since they are missing the required synchronisation before accessing
2194 	 * the memory. To keep things simple just sync wait any kernel fences
2195 	 * here, if the buffer is designated KMD internal.
2196 	 *
2197 	 * For normal userspace objects we should already have the required
2198 	 * pipelining or sync waiting elsewhere, since we already have to deal
2199 	 * with things like async GPU clearing.
2200 	 */
2201 	if (type == ttm_bo_type_kernel) {
2202 		long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
2203 						     DMA_RESV_USAGE_KERNEL,
2204 						     ctx.interruptible,
2205 						     MAX_SCHEDULE_TIMEOUT);
2206 
2207 		if (timeout < 0) {
2208 			if (!resv)
2209 				dma_resv_unlock(bo->ttm.base.resv);
2210 			xe_bo_put(bo);
2211 			return ERR_PTR(timeout);
2212 		}
2213 	}
2214 
2215 	bo->created = true;
2216 	if (bulk)
2217 		ttm_bo_set_bulk_move(&bo->ttm, bulk);
2218 	else
2219 		ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2220 
2221 	return bo;
2222 }
2223 
__xe_bo_fixed_placement(struct xe_device * xe,struct xe_bo * bo,u32 flags,u64 start,u64 end,u64 size)2224 static int __xe_bo_fixed_placement(struct xe_device *xe,
2225 				   struct xe_bo *bo,
2226 				   u32 flags,
2227 				   u64 start, u64 end, u64 size)
2228 {
2229 	struct ttm_place *place = bo->placements;
2230 
2231 	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2232 		return -EINVAL;
2233 
2234 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
2235 	place->fpfn = start >> PAGE_SHIFT;
2236 	place->lpfn = end >> PAGE_SHIFT;
2237 
2238 	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
2239 	case XE_BO_FLAG_VRAM0:
2240 		place->mem_type = XE_PL_VRAM0;
2241 		break;
2242 	case XE_BO_FLAG_VRAM1:
2243 		place->mem_type = XE_PL_VRAM1;
2244 		break;
2245 	case XE_BO_FLAG_STOLEN:
2246 		place->mem_type = XE_PL_STOLEN;
2247 		break;
2248 
2249 	default:
2250 		/* 0 or multiple of the above set */
2251 		return -EINVAL;
2252 	}
2253 
2254 	bo->placement = (struct ttm_placement) {
2255 		.num_placement = 1,
2256 		.placement = place,
2257 	};
2258 
2259 	return 0;
2260 }
2261 
2262 static struct xe_bo *
__xe_bo_create_locked(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 start,u64 end,u16 cpu_caching,enum ttm_bo_type type,u32 flags,u64 alignment,struct drm_exec * exec)2263 __xe_bo_create_locked(struct xe_device *xe,
2264 		      struct xe_tile *tile, struct xe_vm *vm,
2265 		      size_t size, u64 start, u64 end,
2266 		      u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2267 		      u64 alignment, struct drm_exec *exec)
2268 {
2269 	struct xe_bo *bo = NULL;
2270 	int err;
2271 
2272 	if (vm)
2273 		xe_vm_assert_held(vm);
2274 
2275 	if (start || end != ~0ULL) {
2276 		bo = xe_bo_alloc();
2277 		if (IS_ERR(bo))
2278 			return bo;
2279 
2280 		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2281 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
2282 		if (err) {
2283 			xe_bo_free(bo);
2284 			return ERR_PTR(err);
2285 		}
2286 	}
2287 
2288 	bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2289 			       vm && !xe_vm_in_fault_mode(vm) &&
2290 			       flags & XE_BO_FLAG_USER ?
2291 			       &vm->lru_bulk_move : NULL, size,
2292 			       cpu_caching, type, flags, exec);
2293 	if (IS_ERR(bo))
2294 		return bo;
2295 
2296 	bo->min_align = alignment;
2297 
2298 	/*
2299 	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2300 	 * to ensure the shared resv doesn't disappear under the bo, the bo
2301 	 * will keep a reference to the vm, and avoid circular references
2302 	 * by having all the vm's bo refereferences released at vm close
2303 	 * time.
2304 	 */
2305 	if (vm && xe_bo_is_user(bo))
2306 		xe_vm_get(vm);
2307 	bo->vm = vm;
2308 
2309 	if (bo->flags & XE_BO_FLAG_GGTT) {
2310 		struct xe_tile *t;
2311 		u8 id;
2312 
2313 		if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2314 			if (!tile && flags & XE_BO_FLAG_STOLEN)
2315 				tile = xe_device_get_root_tile(xe);
2316 
2317 			xe_assert(xe, tile);
2318 		}
2319 
2320 		for_each_tile(t, xe, id) {
2321 			if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2322 				continue;
2323 
2324 			if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2325 				err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2326 							   start + xe_bo_size(bo), U64_MAX,
2327 							   exec);
2328 			} else {
2329 				err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
2330 			}
2331 			if (err)
2332 				goto err_unlock_put_bo;
2333 		}
2334 	}
2335 
2336 	trace_xe_bo_create(bo);
2337 	return bo;
2338 
2339 err_unlock_put_bo:
2340 	__xe_bo_unset_bulk_move(bo);
2341 	xe_bo_unlock_vm_held(bo);
2342 	xe_bo_put(bo);
2343 	return ERR_PTR(err);
2344 }
2345 
2346 /**
2347  * xe_bo_create_locked() - Create a BO
2348  * @xe: The xe device.
2349  * @tile: The tile to select for migration of this bo, and the tile used for
2350  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2351  * @vm: The local vm or NULL for external objects.
2352  * @size: The storage size to use for the bo.
2353  * @type: The TTM buffer object type.
2354  * @flags: XE_BO_FLAG_ flags.
2355  * @exec: The drm_exec transaction to use for exhaustive eviction.
2356  *
2357  * Create a locked xe BO with no range- nor alignment restrictions.
2358  *
2359  * Return: The buffer object on success. Negative error pointer on failure.
2360  */
xe_bo_create_locked(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags,struct drm_exec * exec)2361 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2362 				  struct xe_vm *vm, size_t size,
2363 				  enum ttm_bo_type type, u32 flags,
2364 				  struct drm_exec *exec)
2365 {
2366 	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2367 				     flags, 0, exec);
2368 }
2369 
xe_bo_create_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,u16 cpu_caching,enum ttm_bo_type type,u32 flags,u64 alignment,bool intr)2370 static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
2371 				       size_t size, u16 cpu_caching,
2372 				       enum ttm_bo_type type, u32 flags,
2373 				       u64 alignment, bool intr)
2374 {
2375 	struct xe_validation_ctx ctx;
2376 	struct drm_exec exec;
2377 	struct xe_bo *bo;
2378 	int ret = 0;
2379 
2380 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2381 			    ret) {
2382 		bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
2383 					   cpu_caching, type, flags, alignment, &exec);
2384 		drm_exec_retry_on_contention(&exec);
2385 		if (IS_ERR(bo)) {
2386 			ret = PTR_ERR(bo);
2387 			xe_validation_retry_on_oom(&ctx, &ret);
2388 		} else {
2389 			xe_bo_unlock(bo);
2390 		}
2391 	}
2392 
2393 	return ret ? ERR_PTR(ret) : bo;
2394 }
2395 
2396 /**
2397  * xe_bo_create_user() - Create a user BO
2398  * @xe: The xe device.
2399  * @vm: The local vm or NULL for external objects.
2400  * @size: The storage size to use for the bo.
2401  * @cpu_caching: The caching mode to be used for system backing store.
2402  * @flags: XE_BO_FLAG_ flags.
2403  * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
2404  * if such a transaction should be initiated by the call.
2405  *
2406  * Create a bo on behalf of user-space.
2407  *
2408  * Return: The buffer object on success. Negative error pointer on failure.
2409  */
xe_bo_create_user(struct xe_device * xe,struct xe_vm * vm,size_t size,u16 cpu_caching,u32 flags,struct drm_exec * exec)2410 struct xe_bo *xe_bo_create_user(struct xe_device *xe,
2411 				struct xe_vm *vm, size_t size,
2412 				u16 cpu_caching,
2413 				u32 flags, struct drm_exec *exec)
2414 {
2415 	struct xe_bo *bo;
2416 
2417 	flags |= XE_BO_FLAG_USER;
2418 
2419 	if (vm || exec) {
2420 		xe_assert(xe, exec);
2421 		bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
2422 					   cpu_caching, ttm_bo_type_device,
2423 					   flags, 0, exec);
2424 		if (!IS_ERR(bo))
2425 			xe_bo_unlock_vm_held(bo);
2426 	} else {
2427 		bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
2428 				       ttm_bo_type_device, flags, 0, true);
2429 	}
2430 
2431 	return bo;
2432 }
2433 
2434 /**
2435  * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
2436  * @xe: The xe device.
2437  * @tile: The tile to select for migration of this bo, and the tile used for
2438  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2439  * @size: The storage size to use for the bo.
2440  * @start: Start of fixed VRAM range or 0.
2441  * @end: End of fixed VRAM range or ~0ULL.
2442  * @type: The TTM buffer object type.
2443  * @flags: XE_BO_FLAG_ flags.
2444  *
2445  * Create an Xe BO with range- and options. If @start and @end indicate
2446  * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
2447  * only.
2448  *
2449  * Return: The buffer object on success. Negative error pointer on failure.
2450  */
xe_bo_create_pin_range_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,u64 start,u64 end,enum ttm_bo_type type,u32 flags)2451 struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
2452 					  size_t size, u64 start, u64 end,
2453 					  enum ttm_bo_type type, u32 flags)
2454 {
2455 	struct xe_validation_ctx ctx;
2456 	struct drm_exec exec;
2457 	struct xe_bo *bo;
2458 	int err = 0;
2459 
2460 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
2461 		bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
2462 					   0, type, flags, 0, &exec);
2463 		if (IS_ERR(bo)) {
2464 			drm_exec_retry_on_contention(&exec);
2465 			err = PTR_ERR(bo);
2466 			xe_validation_retry_on_oom(&ctx, &err);
2467 			break;
2468 		}
2469 
2470 		err = xe_bo_pin(bo, &exec);
2471 		xe_bo_unlock(bo);
2472 		if (err) {
2473 			xe_bo_put(bo);
2474 			drm_exec_retry_on_contention(&exec);
2475 			xe_validation_retry_on_oom(&ctx, &err);
2476 			break;
2477 		}
2478 	}
2479 
2480 	return err ? ERR_PTR(err) : bo;
2481 }
2482 
xe_bo_create_pin_map_at_aligned(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 offset,enum ttm_bo_type type,u32 flags,u64 alignment,struct drm_exec * exec)2483 static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2484 						     struct xe_tile *tile,
2485 						     struct xe_vm *vm,
2486 						     size_t size, u64 offset,
2487 						     enum ttm_bo_type type, u32 flags,
2488 						     u64 alignment, struct drm_exec *exec)
2489 {
2490 	struct xe_bo *bo;
2491 	int err;
2492 	u64 start = offset == ~0ull ? 0 : offset;
2493 	u64 end = offset == ~0ull ? ~0ull : start + size;
2494 
2495 	if (flags & XE_BO_FLAG_STOLEN &&
2496 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2497 		flags |= XE_BO_FLAG_GGTT;
2498 
2499 	bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2500 				   flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2501 				   alignment, exec);
2502 	if (IS_ERR(bo))
2503 		return bo;
2504 
2505 	err = xe_bo_pin(bo, exec);
2506 	if (err)
2507 		goto err_put;
2508 
2509 	err = xe_bo_vmap(bo);
2510 	if (err)
2511 		goto err_unpin;
2512 
2513 	xe_bo_unlock_vm_held(bo);
2514 
2515 	return bo;
2516 
2517 err_unpin:
2518 	xe_bo_unpin(bo);
2519 err_put:
2520 	xe_bo_unlock_vm_held(bo);
2521 	xe_bo_put(bo);
2522 	return ERR_PTR(err);
2523 }
2524 
2525 /**
2526  * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
2527  * @xe: The xe device.
2528  * @tile: The tile to select for migration of this bo, and the tile used for
2529  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2530  * @size: The storage size to use for the bo.
2531  * @offset: Optional VRAM offset or %~0ull for don't care.
2532  * @type: The TTM buffer object type.
2533  * @flags: XE_BO_FLAG_ flags.
2534  * @alignment: GGTT alignment.
2535  * @intr: Whether to execute any waits for backing store interruptible.
2536  *
2537  * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
2538  * options. The bo will be external and not associated with a VM.
2539  *
2540  * Return: The buffer object on success. Negative error pointer on failure.
2541  * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2542  * to true on entry.
2543  */
2544 struct xe_bo *
xe_bo_create_pin_map_at_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,u64 offset,enum ttm_bo_type type,u32 flags,u64 alignment,bool intr)2545 xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
2546 			     size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
2547 			     u64 alignment, bool intr)
2548 {
2549 	struct xe_validation_ctx ctx;
2550 	struct drm_exec exec;
2551 	struct xe_bo *bo;
2552 	int ret = 0;
2553 
2554 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2555 			    ret) {
2556 		bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
2557 						     type, flags, alignment, &exec);
2558 		if (IS_ERR(bo)) {
2559 			drm_exec_retry_on_contention(&exec);
2560 			ret = PTR_ERR(bo);
2561 			xe_validation_retry_on_oom(&ctx, &ret);
2562 		}
2563 	}
2564 
2565 	return ret ? ERR_PTR(ret) : bo;
2566 }
2567 
2568 /**
2569  * xe_bo_create_pin_map() - Create pinned and mapped bo
2570  * @xe: The xe device.
2571  * @tile: The tile to select for migration of this bo, and the tile used for
2572  * @vm: The vm to associate the buffer object with. The vm's resv must be locked
2573  * with the transaction represented by @exec.
2574  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2575  * @size: The storage size to use for the bo.
2576  * @type: The TTM buffer object type.
2577  * @flags: XE_BO_FLAG_ flags.
2578  * @exec: The drm_exec transaction to use for exhaustive eviction, and
2579  * previously used for locking @vm's resv.
2580  *
2581  * Create a pinned and mapped bo. The bo will be external and not associated
2582  * with a VM.
2583  *
2584  * Return: The buffer object on success. Negative error pointer on failure.
2585  * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
2586  * configured for interruptible locking.
2587  */
xe_bo_create_pin_map(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags,struct drm_exec * exec)2588 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2589 				   struct xe_vm *vm, size_t size,
2590 				   enum ttm_bo_type type, u32 flags,
2591 				   struct drm_exec *exec)
2592 {
2593 	return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
2594 					       0, exec);
2595 }
2596 
2597 /**
2598  * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
2599  * @xe: The xe device.
2600  * @tile: The tile to select for migration of this bo, and the tile used for
2601  * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2602  * @size: The storage size to use for the bo.
2603  * @type: The TTM buffer object type.
2604  * @flags: XE_BO_FLAG_ flags.
2605  * @intr: Whether to execut any waits for backing store interruptible.
2606  *
2607  * Create a pinned and mapped bo. The bo will be external and not associated
2608  * with a VM.
2609  *
2610  * Return: The buffer object on success. Negative error pointer on failure.
2611  * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2612  * to true on entry.
2613  */
xe_bo_create_pin_map_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,enum ttm_bo_type type,u32 flags,bool intr)2614 struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
2615 					size_t size, enum ttm_bo_type type, u32 flags,
2616 					bool intr)
2617 {
2618 	return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
2619 }
2620 
__xe_bo_unpin_map_no_vm(void * arg)2621 static void __xe_bo_unpin_map_no_vm(void *arg)
2622 {
2623 	xe_bo_unpin_map_no_vm(arg);
2624 }
2625 
xe_managed_bo_create_pin_map(struct xe_device * xe,struct xe_tile * tile,size_t size,u32 flags)2626 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2627 					   size_t size, u32 flags)
2628 {
2629 	struct xe_bo *bo;
2630 	int ret;
2631 
2632 	KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2633 	bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
2634 	if (IS_ERR(bo))
2635 		return bo;
2636 
2637 	ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2638 	if (ret)
2639 		return ERR_PTR(ret);
2640 
2641 	return bo;
2642 }
2643 
xe_managed_bo_unpin_map_no_vm(struct xe_bo * bo)2644 void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
2645 {
2646 	devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2647 }
2648 
xe_managed_bo_create_from_data(struct xe_device * xe,struct xe_tile * tile,const void * data,size_t size,u32 flags)2649 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2650 					     const void *data, size_t size, u32 flags)
2651 {
2652 	struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2653 
2654 	if (IS_ERR(bo))
2655 		return bo;
2656 
2657 	xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2658 
2659 	return bo;
2660 }
2661 
2662 /**
2663  * xe_managed_bo_reinit_in_vram
2664  * @xe: xe device
2665  * @tile: Tile where the new buffer will be created
2666  * @src: Managed buffer object allocated in system memory
2667  *
2668  * Replace a managed src buffer object allocated in system memory with a new
2669  * one allocated in vram, copying the data between them.
2670  * Buffer object in VRAM is not going to have the same GGTT address, the caller
2671  * is responsible for making sure that any old references to it are updated.
2672  *
2673  * Returns 0 for success, negative error code otherwise.
2674  */
xe_managed_bo_reinit_in_vram(struct xe_device * xe,struct xe_tile * tile,struct xe_bo ** src)2675 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2676 {
2677 	struct xe_bo *bo;
2678 	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2679 
2680 	dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2681 				      XE_BO_FLAG_PINNED_NORESTORE);
2682 
2683 	xe_assert(xe, IS_DGFX(xe));
2684 	xe_assert(xe, !(*src)->vmap.is_iomem);
2685 
2686 	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2687 					    xe_bo_size(*src), dst_flags);
2688 	if (IS_ERR(bo))
2689 		return PTR_ERR(bo);
2690 
2691 	devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2692 	*src = bo;
2693 
2694 	return 0;
2695 }
2696 
2697 /*
2698  * XXX: This is in the VM bind data path, likely should calculate this once and
2699  * store, with a recalculation if the BO is moved.
2700  */
vram_region_gpu_offset(struct ttm_resource * res)2701 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2702 {
2703 	struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2704 
2705 	switch (res->mem_type) {
2706 	case XE_PL_STOLEN:
2707 		return xe_ttm_stolen_gpu_offset(xe);
2708 	case XE_PL_TT:
2709 	case XE_PL_SYSTEM:
2710 		return 0;
2711 	default:
2712 		return res_to_mem_region(res)->dpa_base;
2713 	}
2714 	return 0;
2715 }
2716 
2717 /**
2718  * xe_bo_pin_external - pin an external BO
2719  * @bo: buffer object to be pinned
2720  * @in_place: Pin in current placement, don't attempt to migrate.
2721  * @exec: The drm_exec transaction to use for exhaustive eviction.
2722  *
2723  * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2724  * BO. Unique call compared to xe_bo_pin as this function has it own set of
2725  * asserts and code to ensure evict / restore on suspend / resume.
2726  *
2727  * Returns 0 for success, negative error code otherwise.
2728  */
xe_bo_pin_external(struct xe_bo * bo,bool in_place,struct drm_exec * exec)2729 int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
2730 {
2731 	struct xe_device *xe = xe_bo_device(bo);
2732 	int err;
2733 
2734 	xe_assert(xe, !bo->vm);
2735 	xe_assert(xe, xe_bo_is_user(bo));
2736 
2737 	if (!xe_bo_is_pinned(bo)) {
2738 		if (!in_place) {
2739 			err = xe_bo_validate(bo, NULL, false, exec);
2740 			if (err)
2741 				return err;
2742 		}
2743 
2744 		spin_lock(&xe->pinned.lock);
2745 		list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2746 		spin_unlock(&xe->pinned.lock);
2747 	}
2748 
2749 	ttm_bo_pin(&bo->ttm);
2750 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2751 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2752 
2753 	/*
2754 	 * FIXME: If we always use the reserve / unreserve functions for locking
2755 	 * we do not need this.
2756 	 */
2757 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2758 
2759 	return 0;
2760 }
2761 
2762 /**
2763  * xe_bo_pin() - Pin a kernel bo after potentially migrating it
2764  * @bo: The kernel bo to pin.
2765  * @exec: The drm_exec transaction to use for exhaustive eviction.
2766  *
2767  * Attempts to migrate a bo to @bo->placement. If that succeeds,
2768  * pins the bo.
2769  *
2770  * Return: %0 on success, negative error code on migration failure.
2771  */
xe_bo_pin(struct xe_bo * bo,struct drm_exec * exec)2772 int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
2773 {
2774 	struct ttm_place *place = &bo->placements[0];
2775 	struct xe_device *xe = xe_bo_device(bo);
2776 	int err;
2777 
2778 	/* We currently don't expect user BO to be pinned */
2779 	xe_assert(xe, !xe_bo_is_user(bo));
2780 
2781 	/* Pinned object must be in GGTT or have pinned flag */
2782 	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
2783 				   XE_BO_FLAG_GGTT));
2784 
2785 	/*
2786 	 * No reason we can't support pinning imported dma-bufs we just don't
2787 	 * expect to pin an imported dma-buf.
2788 	 */
2789 	xe_assert(xe, !bo->ttm.base.import_attach);
2790 
2791 	/* We only expect at most 1 pin */
2792 	xe_assert(xe, !xe_bo_is_pinned(bo));
2793 
2794 	err = xe_bo_validate(bo, NULL, false, exec);
2795 	if (err)
2796 		return err;
2797 
2798 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2799 		spin_lock(&xe->pinned.lock);
2800 		if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
2801 			list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
2802 		else
2803 			list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
2804 		spin_unlock(&xe->pinned.lock);
2805 	}
2806 
2807 	ttm_bo_pin(&bo->ttm);
2808 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2809 		xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2810 
2811 	/*
2812 	 * FIXME: If we always use the reserve / unreserve functions for locking
2813 	 * we do not need this.
2814 	 */
2815 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2816 
2817 	return 0;
2818 }
2819 
2820 /**
2821  * xe_bo_unpin_external - unpin an external BO
2822  * @bo: buffer object to be unpinned
2823  *
2824  * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2825  * BO. Unique call compared to xe_bo_unpin as this function has it own set of
2826  * asserts and code to ensure evict / restore on suspend / resume.
2827  *
2828  * Returns 0 for success, negative error code otherwise.
2829  */
xe_bo_unpin_external(struct xe_bo * bo)2830 void xe_bo_unpin_external(struct xe_bo *bo)
2831 {
2832 	struct xe_device *xe = xe_bo_device(bo);
2833 
2834 	xe_assert(xe, !bo->vm);
2835 	xe_assert(xe, xe_bo_is_pinned(bo));
2836 	xe_assert(xe, xe_bo_is_user(bo));
2837 
2838 	spin_lock(&xe->pinned.lock);
2839 	if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
2840 		list_del_init(&bo->pinned_link);
2841 	spin_unlock(&xe->pinned.lock);
2842 
2843 	ttm_bo_unpin(&bo->ttm);
2844 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2845 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2846 
2847 	/*
2848 	 * FIXME: If we always use the reserve / unreserve functions for locking
2849 	 * we do not need this.
2850 	 */
2851 	ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2852 }
2853 
xe_bo_unpin(struct xe_bo * bo)2854 void xe_bo_unpin(struct xe_bo *bo)
2855 {
2856 	struct ttm_place *place = &bo->placements[0];
2857 	struct xe_device *xe = xe_bo_device(bo);
2858 
2859 	xe_assert(xe, !bo->ttm.base.import_attach);
2860 	xe_assert(xe, xe_bo_is_pinned(bo));
2861 
2862 	if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
2863 		spin_lock(&xe->pinned.lock);
2864 		xe_assert(xe, !list_empty(&bo->pinned_link));
2865 		list_del_init(&bo->pinned_link);
2866 		spin_unlock(&xe->pinned.lock);
2867 
2868 		if (bo->backup_obj) {
2869 			if (xe_bo_is_pinned(bo->backup_obj))
2870 				ttm_bo_unpin(&bo->backup_obj->ttm);
2871 			xe_bo_put(bo->backup_obj);
2872 			bo->backup_obj = NULL;
2873 		}
2874 	}
2875 	ttm_bo_unpin(&bo->ttm);
2876 	if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2877 		xe_ttm_tt_account_add(xe, bo->ttm.ttm);
2878 }
2879 
2880 /**
2881  * xe_bo_validate() - Make sure the bo is in an allowed placement
2882  * @bo: The bo,
2883  * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2884  *      NULL. Used together with @allow_res_evict.
2885  * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2886  *                   reservation object.
2887  * @exec: The drm_exec transaction to use for exhaustive eviction.
2888  *
2889  * Make sure the bo is in allowed placement, migrating it if necessary. If
2890  * needed, other bos will be evicted. If bos selected for eviction shares
2891  * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2892  * set to true, otherwise they will be bypassed.
2893  *
2894  * Return: 0 on success, negative error code on failure. May return
2895  * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2896  */
xe_bo_validate(struct xe_bo * bo,struct xe_vm * vm,bool allow_res_evict,struct drm_exec * exec)2897 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
2898 		   struct drm_exec *exec)
2899 {
2900 	struct ttm_operation_ctx ctx = {
2901 		.interruptible = true,
2902 		.no_wait_gpu = false,
2903 		.gfp_retry_mayfail = true,
2904 	};
2905 	int ret;
2906 
2907 	if (xe_bo_is_pinned(bo))
2908 		return 0;
2909 
2910 	if (vm) {
2911 		lockdep_assert_held(&vm->lock);
2912 		xe_vm_assert_held(vm);
2913 
2914 		ctx.allow_res_evict = allow_res_evict;
2915 		ctx.resv = xe_vm_resv(vm);
2916 	}
2917 
2918 	xe_vm_set_validating(vm, allow_res_evict);
2919 	trace_xe_bo_validate(bo);
2920 	xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
2921 	ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2922 	xe_vm_clear_validating(vm, allow_res_evict);
2923 
2924 	return ret;
2925 }
2926 
xe_bo_is_xe_bo(struct ttm_buffer_object * bo)2927 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2928 {
2929 	if (bo->destroy == &xe_ttm_bo_destroy)
2930 		return true;
2931 
2932 	return false;
2933 }
2934 
2935 /*
2936  * Resolve a BO address. There is no assert to check if the proper lock is held
2937  * so it should only be used in cases where it is not fatal to get the wrong
2938  * address, such as printing debug information, but not in cases where memory is
2939  * written based on this result.
2940  */
__xe_bo_addr(struct xe_bo * bo,u64 offset,size_t page_size)2941 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2942 {
2943 	struct xe_device *xe = xe_bo_device(bo);
2944 	struct xe_res_cursor cur;
2945 	u64 page;
2946 
2947 	xe_assert(xe, page_size <= PAGE_SIZE);
2948 	page = offset >> PAGE_SHIFT;
2949 	offset &= (PAGE_SIZE - 1);
2950 
2951 	if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2952 		xe_assert(xe, bo->ttm.ttm);
2953 
2954 		xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2955 				page_size, &cur);
2956 		return xe_res_dma(&cur) + offset;
2957 	} else {
2958 		struct xe_res_cursor cur;
2959 
2960 		xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2961 			     page_size, &cur);
2962 		return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2963 	}
2964 }
2965 
xe_bo_addr(struct xe_bo * bo,u64 offset,size_t page_size)2966 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2967 {
2968 	if (!READ_ONCE(bo->ttm.pin_count))
2969 		xe_bo_assert_held(bo);
2970 	return __xe_bo_addr(bo, offset, page_size);
2971 }
2972 
xe_bo_vmap(struct xe_bo * bo)2973 int xe_bo_vmap(struct xe_bo *bo)
2974 {
2975 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2976 	void *virtual;
2977 	bool is_iomem;
2978 	int ret;
2979 
2980 	xe_bo_assert_held(bo);
2981 
2982 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2983 			!force_contiguous(bo->flags)))
2984 		return -EINVAL;
2985 
2986 	if (!iosys_map_is_null(&bo->vmap))
2987 		return 0;
2988 
2989 	/*
2990 	 * We use this more or less deprecated interface for now since
2991 	 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2992 	 * single page bos, which is done here.
2993 	 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2994 	 * to use struct iosys_map.
2995 	 */
2996 	ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
2997 	if (ret)
2998 		return ret;
2999 
3000 	virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
3001 	if (is_iomem)
3002 		iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
3003 	else
3004 		iosys_map_set_vaddr(&bo->vmap, virtual);
3005 
3006 	return 0;
3007 }
3008 
__xe_bo_vunmap(struct xe_bo * bo)3009 static void __xe_bo_vunmap(struct xe_bo *bo)
3010 {
3011 	if (!iosys_map_is_null(&bo->vmap)) {
3012 		iosys_map_clear(&bo->vmap);
3013 		ttm_bo_kunmap(&bo->kmap);
3014 	}
3015 }
3016 
xe_bo_vunmap(struct xe_bo * bo)3017 void xe_bo_vunmap(struct xe_bo *bo)
3018 {
3019 	xe_bo_assert_held(bo);
3020 	__xe_bo_vunmap(bo);
3021 }
3022 
gem_create_set_pxp_type(struct xe_device * xe,struct xe_bo * bo,u64 value)3023 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
3024 {
3025 	if (value == DRM_XE_PXP_TYPE_NONE)
3026 		return 0;
3027 
3028 	/* we only support DRM_XE_PXP_TYPE_HWDRM for now */
3029 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
3030 		return -EINVAL;
3031 
3032 	return xe_pxp_key_assign(xe->pxp, bo);
3033 }
3034 
3035 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
3036 					     struct xe_bo *bo,
3037 					     u64 value);
3038 
3039 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
3040 	[DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
3041 };
3042 
gem_create_user_ext_set_property(struct xe_device * xe,struct xe_bo * bo,u64 extension)3043 static int gem_create_user_ext_set_property(struct xe_device *xe,
3044 					    struct xe_bo *bo,
3045 					    u64 extension)
3046 {
3047 	u64 __user *address = u64_to_user_ptr(extension);
3048 	struct drm_xe_ext_set_property ext;
3049 	int err;
3050 	u32 idx;
3051 
3052 	err = copy_from_user(&ext, address, sizeof(ext));
3053 	if (XE_IOCTL_DBG(xe, err))
3054 		return -EFAULT;
3055 
3056 	if (XE_IOCTL_DBG(xe, ext.property >=
3057 			 ARRAY_SIZE(gem_create_set_property_funcs)) ||
3058 	    XE_IOCTL_DBG(xe, ext.pad) ||
3059 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
3060 		return -EINVAL;
3061 
3062 	idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
3063 	if (!gem_create_set_property_funcs[idx])
3064 		return -EINVAL;
3065 
3066 	return gem_create_set_property_funcs[idx](xe, bo, ext.value);
3067 }
3068 
3069 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
3070 					       struct xe_bo *bo,
3071 					       u64 extension);
3072 
3073 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
3074 	[DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
3075 };
3076 
3077 #define MAX_USER_EXTENSIONS	16
gem_create_user_extensions(struct xe_device * xe,struct xe_bo * bo,u64 extensions,int ext_number)3078 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
3079 				      u64 extensions, int ext_number)
3080 {
3081 	u64 __user *address = u64_to_user_ptr(extensions);
3082 	struct drm_xe_user_extension ext;
3083 	int err;
3084 	u32 idx;
3085 
3086 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
3087 		return -E2BIG;
3088 
3089 	err = copy_from_user(&ext, address, sizeof(ext));
3090 	if (XE_IOCTL_DBG(xe, err))
3091 		return -EFAULT;
3092 
3093 	if (XE_IOCTL_DBG(xe, ext.pad) ||
3094 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
3095 		return -EINVAL;
3096 
3097 	idx = array_index_nospec(ext.name,
3098 				 ARRAY_SIZE(gem_create_user_extension_funcs));
3099 	err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
3100 	if (XE_IOCTL_DBG(xe, err))
3101 		return err;
3102 
3103 	if (ext.next_extension)
3104 		return gem_create_user_extensions(xe, bo, ext.next_extension,
3105 						  ++ext_number);
3106 
3107 	return 0;
3108 }
3109 
xe_gem_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3110 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
3111 			struct drm_file *file)
3112 {
3113 	struct xe_device *xe = to_xe_device(dev);
3114 	struct xe_file *xef = to_xe_file(file);
3115 	struct drm_xe_gem_create *args = data;
3116 	struct xe_validation_ctx ctx;
3117 	struct drm_exec exec;
3118 	struct xe_vm *vm = NULL;
3119 	struct xe_bo *bo;
3120 	unsigned int bo_flags;
3121 	u32 handle;
3122 	int err;
3123 
3124 	if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
3125 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3126 		return -EINVAL;
3127 
3128 	/* at least one valid memory placement must be specified */
3129 	if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
3130 			 !args->placement))
3131 		return -EINVAL;
3132 
3133 	if (XE_IOCTL_DBG(xe, args->flags &
3134 			 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
3135 			   DRM_XE_GEM_CREATE_FLAG_SCANOUT |
3136 			   DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
3137 		return -EINVAL;
3138 
3139 	if (XE_IOCTL_DBG(xe, args->handle))
3140 		return -EINVAL;
3141 
3142 	if (XE_IOCTL_DBG(xe, !args->size))
3143 		return -EINVAL;
3144 
3145 	if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
3146 		return -EINVAL;
3147 
3148 	if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
3149 		return -EINVAL;
3150 
3151 	bo_flags = 0;
3152 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
3153 		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
3154 
3155 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
3156 		bo_flags |= XE_BO_FLAG_SCANOUT;
3157 
3158 	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
3159 
3160 	/* CCS formats need physical placement at a 64K alignment in VRAM. */
3161 	if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
3162 	    (bo_flags & XE_BO_FLAG_SCANOUT) &&
3163 	    !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
3164 	    IS_ALIGNED(args->size, SZ_64K))
3165 		bo_flags |= XE_BO_FLAG_NEEDS_64K;
3166 
3167 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
3168 		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
3169 			return -EINVAL;
3170 
3171 		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
3172 	}
3173 
3174 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
3175 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
3176 		return -EINVAL;
3177 
3178 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
3179 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
3180 		return -EINVAL;
3181 
3182 	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
3183 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
3184 		return -EINVAL;
3185 
3186 	if (args->vm_id) {
3187 		vm = xe_vm_lookup(xef, args->vm_id);
3188 		if (XE_IOCTL_DBG(xe, !vm))
3189 			return -ENOENT;
3190 	}
3191 
3192 	err = 0;
3193 	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
3194 			    err) {
3195 		if (vm) {
3196 			err = xe_vm_drm_exec_lock(vm, &exec);
3197 			drm_exec_retry_on_contention(&exec);
3198 			if (err)
3199 				break;
3200 		}
3201 		bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
3202 				       bo_flags, &exec);
3203 		drm_exec_retry_on_contention(&exec);
3204 		if (IS_ERR(bo)) {
3205 			err = PTR_ERR(bo);
3206 			xe_validation_retry_on_oom(&ctx, &err);
3207 			break;
3208 		}
3209 	}
3210 	if (err)
3211 		goto out_vm;
3212 
3213 	if (args->extensions) {
3214 		err = gem_create_user_extensions(xe, bo, args->extensions, 0);
3215 		if (err)
3216 			goto out_bulk;
3217 	}
3218 
3219 	err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
3220 	if (err)
3221 		goto out_bulk;
3222 
3223 	args->handle = handle;
3224 	goto out_put;
3225 
3226 out_bulk:
3227 	if (vm && !xe_vm_in_fault_mode(vm)) {
3228 		xe_vm_lock(vm, false);
3229 		__xe_bo_unset_bulk_move(bo);
3230 		xe_vm_unlock(vm);
3231 	}
3232 out_put:
3233 	xe_bo_put(bo);
3234 out_vm:
3235 	if (vm)
3236 		xe_vm_put(vm);
3237 
3238 	return err;
3239 }
3240 
xe_gem_mmap_offset_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3241 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
3242 			     struct drm_file *file)
3243 {
3244 	struct xe_device *xe = to_xe_device(dev);
3245 	struct drm_xe_gem_mmap_offset *args = data;
3246 	struct drm_gem_object *gem_obj;
3247 
3248 	if (XE_IOCTL_DBG(xe, args->extensions) ||
3249 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3250 		return -EINVAL;
3251 
3252 	if (XE_IOCTL_DBG(xe, args->flags &
3253 			 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
3254 		return -EINVAL;
3255 
3256 	if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
3257 		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
3258 			return -EINVAL;
3259 
3260 		if (XE_IOCTL_DBG(xe, args->handle))
3261 			return -EINVAL;
3262 
3263 		if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
3264 			return -EINVAL;
3265 
3266 		BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
3267 			      SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
3268 		args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
3269 		return 0;
3270 	}
3271 
3272 	gem_obj = drm_gem_object_lookup(file, args->handle);
3273 	if (XE_IOCTL_DBG(xe, !gem_obj))
3274 		return -ENOENT;
3275 
3276 	/* The mmap offset was set up at BO allocation time. */
3277 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
3278 
3279 	xe_bo_put(gem_to_xe_bo(gem_obj));
3280 	return 0;
3281 }
3282 
3283 /**
3284  * xe_bo_lock() - Lock the buffer object's dma_resv object
3285  * @bo: The struct xe_bo whose lock is to be taken
3286  * @intr: Whether to perform any wait interruptible
3287  *
3288  * Locks the buffer object's dma_resv object. If the buffer object is
3289  * pointing to a shared dma_resv object, that shared lock is locked.
3290  *
3291  * Return: 0 on success, -EINTR if @intr is true and the wait for a
3292  * contended lock was interrupted. If @intr is set to false, the
3293  * function always returns 0.
3294  */
xe_bo_lock(struct xe_bo * bo,bool intr)3295 int xe_bo_lock(struct xe_bo *bo, bool intr)
3296 {
3297 	if (intr)
3298 		return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
3299 
3300 	dma_resv_lock(bo->ttm.base.resv, NULL);
3301 
3302 	return 0;
3303 }
3304 
3305 /**
3306  * xe_bo_unlock() - Unlock the buffer object's dma_resv object
3307  * @bo: The struct xe_bo whose lock is to be released.
3308  *
3309  * Unlock a buffer object lock that was locked by xe_bo_lock().
3310  */
xe_bo_unlock(struct xe_bo * bo)3311 void xe_bo_unlock(struct xe_bo *bo)
3312 {
3313 	dma_resv_unlock(bo->ttm.base.resv);
3314 }
3315 
3316 /**
3317  * xe_bo_can_migrate - Whether a buffer object likely can be migrated
3318  * @bo: The buffer object to migrate
3319  * @mem_type: The TTM memory type intended to migrate to
3320  *
3321  * Check whether the buffer object supports migration to the
3322  * given memory type. Note that pinning may affect the ability to migrate as
3323  * returned by this function.
3324  *
3325  * This function is primarily intended as a helper for checking the
3326  * possibility to migrate buffer objects and can be called without
3327  * the object lock held.
3328  *
3329  * Return: true if migration is possible, false otherwise.
3330  */
xe_bo_can_migrate(struct xe_bo * bo,u32 mem_type)3331 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
3332 {
3333 	unsigned int cur_place;
3334 
3335 	if (bo->ttm.type == ttm_bo_type_kernel)
3336 		return true;
3337 
3338 	if (bo->ttm.type == ttm_bo_type_sg)
3339 		return false;
3340 
3341 	for (cur_place = 0; cur_place < bo->placement.num_placement;
3342 	     cur_place++) {
3343 		if (bo->placements[cur_place].mem_type == mem_type)
3344 			return true;
3345 	}
3346 
3347 	return false;
3348 }
3349 
xe_place_from_ttm_type(u32 mem_type,struct ttm_place * place)3350 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
3351 {
3352 	memset(place, 0, sizeof(*place));
3353 	place->mem_type = mem_type;
3354 }
3355 
3356 /**
3357  * xe_bo_migrate - Migrate an object to the desired region id
3358  * @bo: The buffer object to migrate.
3359  * @mem_type: The TTM region type to migrate to.
3360  * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
3361  * a default interruptibe ctx is to be used.
3362  * @exec: The drm_exec transaction to use for exhaustive eviction.
3363  *
3364  * Attempt to migrate the buffer object to the desired memory region. The
3365  * buffer object may not be pinned, and must be locked.
3366  * On successful completion, the object memory type will be updated,
3367  * but an async migration task may not have completed yet, and to
3368  * accomplish that, the object's kernel fences must be signaled with
3369  * the object lock held.
3370  *
3371  * Return: 0 on success. Negative error code on failure. In particular may
3372  * return -EINTR or -ERESTARTSYS if signal pending.
3373  */
xe_bo_migrate(struct xe_bo * bo,u32 mem_type,struct ttm_operation_ctx * tctx,struct drm_exec * exec)3374 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
3375 		  struct drm_exec *exec)
3376 {
3377 	struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3378 	struct ttm_operation_ctx ctx = {
3379 		.interruptible = true,
3380 		.no_wait_gpu = false,
3381 		.gfp_retry_mayfail = true,
3382 	};
3383 	struct ttm_placement placement;
3384 	struct ttm_place requested;
3385 
3386 	xe_bo_assert_held(bo);
3387 	tctx = tctx ? tctx : &ctx;
3388 
3389 	if (bo->ttm.resource->mem_type == mem_type)
3390 		return 0;
3391 
3392 	if (xe_bo_is_pinned(bo))
3393 		return -EBUSY;
3394 
3395 	if (!xe_bo_can_migrate(bo, mem_type))
3396 		return -EINVAL;
3397 
3398 	xe_place_from_ttm_type(mem_type, &requested);
3399 	placement.num_placement = 1;
3400 	placement.placement = &requested;
3401 
3402 	/*
3403 	 * Stolen needs to be handled like below VRAM handling if we ever need
3404 	 * to support it.
3405 	 */
3406 	drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
3407 
3408 	if (mem_type_is_vram(mem_type)) {
3409 		u32 c = 0;
3410 
3411 		add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
3412 	}
3413 
3414 	if (!tctx->no_wait_gpu)
3415 		xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
3416 	return ttm_bo_validate(&bo->ttm, &placement, tctx);
3417 }
3418 
3419 /**
3420  * xe_bo_evict - Evict an object to evict placement
3421  * @bo: The buffer object to migrate.
3422  * @exec: The drm_exec transaction to use for exhaustive eviction.
3423  *
3424  * On successful completion, the object memory will be moved to evict
3425  * placement. This function blocks until the object has been fully moved.
3426  *
3427  * Return: 0 on success. Negative error code on failure.
3428  */
xe_bo_evict(struct xe_bo * bo,struct drm_exec * exec)3429 int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
3430 {
3431 	struct ttm_operation_ctx ctx = {
3432 		.interruptible = false,
3433 		.no_wait_gpu = false,
3434 		.gfp_retry_mayfail = true,
3435 	};
3436 	struct ttm_placement placement;
3437 	int ret;
3438 
3439 	xe_evict_flags(&bo->ttm, &placement);
3440 	ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3441 	if (ret)
3442 		return ret;
3443 
3444 	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3445 			      false, MAX_SCHEDULE_TIMEOUT);
3446 
3447 	return 0;
3448 }
3449 
3450 /**
3451  * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3452  * placed in system memory.
3453  * @bo: The xe_bo
3454  *
3455  * Return: true if extra pages need to be allocated, false otherwise.
3456  */
xe_bo_needs_ccs_pages(struct xe_bo * bo)3457 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3458 {
3459 	struct xe_device *xe = xe_bo_device(bo);
3460 
3461 	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3462 		return false;
3463 
3464 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3465 		return false;
3466 
3467 	/* On discrete GPUs, if the GPU can access this buffer from
3468 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3469 	 * can't be used since there's no CCS storage associated with
3470 	 * non-VRAM addresses.
3471 	 */
3472 	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3473 		return false;
3474 
3475 	/*
3476 	 * Compression implies coh_none, therefore we know for sure that WB
3477 	 * memory can't currently use compression, which is likely one of the
3478 	 * common cases.
3479 	 */
3480 	if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
3481 		return false;
3482 
3483 	return true;
3484 }
3485 
3486 /**
3487  * __xe_bo_release_dummy() - Dummy kref release function
3488  * @kref: The embedded struct kref.
3489  *
3490  * Dummy release function for xe_bo_put_deferred(). Keep off.
3491  */
__xe_bo_release_dummy(struct kref * kref)3492 void __xe_bo_release_dummy(struct kref *kref)
3493 {
3494 }
3495 
3496 /**
3497  * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3498  * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3499  *
3500  * Puts all bos whose put was deferred by xe_bo_put_deferred().
3501  * The @deferred list can be either an onstack local list or a global
3502  * shared list used by a workqueue.
3503  */
xe_bo_put_commit(struct llist_head * deferred)3504 void xe_bo_put_commit(struct llist_head *deferred)
3505 {
3506 	struct llist_node *freed;
3507 	struct xe_bo *bo, *next;
3508 
3509 	if (!deferred)
3510 		return;
3511 
3512 	freed = llist_del_all(deferred);
3513 	if (!freed)
3514 		return;
3515 
3516 	llist_for_each_entry_safe(bo, next, freed, freed)
3517 		drm_gem_object_free(&bo->ttm.base.refcount);
3518 }
3519 
xe_bo_dev_work_func(struct work_struct * work)3520 static void xe_bo_dev_work_func(struct work_struct *work)
3521 {
3522 	struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3523 
3524 	xe_bo_put_commit(&bo_dev->async_list);
3525 }
3526 
3527 /**
3528  * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3529  * @bo_dev: The BO dev structure
3530  */
xe_bo_dev_init(struct xe_bo_dev * bo_dev)3531 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3532 {
3533 	INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3534 }
3535 
3536 /**
3537  * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3538  * @bo_dev: The BO dev structure
3539  */
xe_bo_dev_fini(struct xe_bo_dev * bo_dev)3540 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3541 {
3542 	flush_work(&bo_dev->async_free);
3543 }
3544 
xe_bo_put(struct xe_bo * bo)3545 void xe_bo_put(struct xe_bo *bo)
3546 {
3547 	struct xe_tile *tile;
3548 	u8 id;
3549 
3550 	might_sleep();
3551 	if (bo) {
3552 #ifdef CONFIG_PROC_FS
3553 		if (bo->client)
3554 			might_lock(&bo->client->bos_lock);
3555 #endif
3556 		for_each_tile(tile, xe_bo_device(bo), id)
3557 			if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
3558 				xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
3559 		drm_gem_object_put(&bo->ttm.base);
3560 	}
3561 }
3562 
3563 /**
3564  * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3565  * @file_priv: ...
3566  * @dev: ...
3567  * @args: ...
3568  *
3569  * See dumb_create() hook in include/drm/drm_drv.h
3570  *
3571  * Return: ...
3572  */
xe_bo_dumb_create(struct drm_file * file_priv,struct drm_device * dev,struct drm_mode_create_dumb * args)3573 int xe_bo_dumb_create(struct drm_file *file_priv,
3574 		      struct drm_device *dev,
3575 		      struct drm_mode_create_dumb *args)
3576 {
3577 	struct xe_device *xe = to_xe_device(dev);
3578 	struct xe_bo *bo;
3579 	uint32_t handle;
3580 	int cpp = DIV_ROUND_UP(args->bpp, 8);
3581 	int err;
3582 	u32 page_size = max_t(u32, PAGE_SIZE,
3583 		xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3584 
3585 	args->pitch = ALIGN(args->width * cpp, 64);
3586 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
3587 			   page_size);
3588 
3589 	bo = xe_bo_create_user(xe, NULL, args->size,
3590 			       DRM_XE_GEM_CPU_CACHING_WC,
3591 			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3592 			       XE_BO_FLAG_SCANOUT |
3593 			       XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
3594 	if (IS_ERR(bo))
3595 		return PTR_ERR(bo);
3596 
3597 	err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3598 	/* drop reference from allocate - handle holds it now */
3599 	drm_gem_object_put(&bo->ttm.base);
3600 	if (!err)
3601 		args->handle = handle;
3602 	return err;
3603 }
3604 
xe_bo_runtime_pm_release_mmap_offset(struct xe_bo * bo)3605 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3606 {
3607 	struct ttm_buffer_object *tbo = &bo->ttm;
3608 	struct ttm_device *bdev = tbo->bdev;
3609 
3610 	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3611 
3612 	list_del_init(&bo->vram_userfault_link);
3613 }
3614 
3615 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3616 #include "tests/xe_bo.c"
3617 #endif
3618