1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_bo.h"
7
8 #include <linux/dma-buf.h>
9 #include <linux/nospec.h>
10
11 #include <drm/drm_drv.h>
12 #include <drm/drm_dumb_buffers.h>
13 #include <drm/drm_gem_ttm_helper.h>
14 #include <drm/drm_managed.h>
15 #include <drm/ttm/ttm_backup.h>
16 #include <drm/ttm/ttm_device.h>
17 #include <drm/ttm/ttm_placement.h>
18 #include <drm/ttm/ttm_tt.h>
19 #include <uapi/drm/xe_drm.h>
20
21 #include <kunit/static_stub.h>
22
23 #include <trace/events/gpu_mem.h>
24
25 #include "xe_device.h"
26 #include "xe_dma_buf.h"
27 #include "xe_drm_client.h"
28 #include "xe_ggtt.h"
29 #include "xe_map.h"
30 #include "xe_migrate.h"
31 #include "xe_pat.h"
32 #include "xe_pm.h"
33 #include "xe_preempt_fence.h"
34 #include "xe_pxp.h"
35 #include "xe_res_cursor.h"
36 #include "xe_shrinker.h"
37 #include "xe_sriov_vf_ccs.h"
38 #include "xe_tile.h"
39 #include "xe_trace_bo.h"
40 #include "xe_ttm_stolen_mgr.h"
41 #include "xe_vm.h"
42 #include "xe_vram_types.h"
43
44 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = {
45 [XE_PL_SYSTEM] = "system",
46 [XE_PL_TT] = "gtt",
47 [XE_PL_VRAM0] = "vram0",
48 [XE_PL_VRAM1] = "vram1",
49 [XE_PL_STOLEN] = "stolen"
50 };
51
52 static const struct ttm_place sys_placement_flags = {
53 .fpfn = 0,
54 .lpfn = 0,
55 .mem_type = XE_PL_SYSTEM,
56 .flags = 0,
57 };
58
59 static struct ttm_placement sys_placement = {
60 .num_placement = 1,
61 .placement = &sys_placement_flags,
62 };
63
64 static struct ttm_placement purge_placement;
65
66 static const struct ttm_place tt_placement_flags[] = {
67 {
68 .fpfn = 0,
69 .lpfn = 0,
70 .mem_type = XE_PL_TT,
71 .flags = TTM_PL_FLAG_DESIRED,
72 },
73 {
74 .fpfn = 0,
75 .lpfn = 0,
76 .mem_type = XE_PL_SYSTEM,
77 .flags = TTM_PL_FLAG_FALLBACK,
78 }
79 };
80
81 static struct ttm_placement tt_placement = {
82 .num_placement = 2,
83 .placement = tt_placement_flags,
84 };
85
86 #define for_each_set_bo_vram_flag(bit__, bo_flags__) \
87 for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \
88 for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK)
89
mem_type_is_vram(u32 mem_type)90 bool mem_type_is_vram(u32 mem_type)
91 {
92 return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
93 }
94
resource_is_stolen_vram(struct xe_device * xe,struct ttm_resource * res)95 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
96 {
97 return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
98 }
99
resource_is_vram(struct ttm_resource * res)100 static bool resource_is_vram(struct ttm_resource *res)
101 {
102 return mem_type_is_vram(res->mem_type);
103 }
104
xe_bo_is_vram(struct xe_bo * bo)105 bool xe_bo_is_vram(struct xe_bo *bo)
106 {
107 return resource_is_vram(bo->ttm.resource) ||
108 resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
109 }
110
xe_bo_is_stolen(struct xe_bo * bo)111 bool xe_bo_is_stolen(struct xe_bo *bo)
112 {
113 return bo->ttm.resource->mem_type == XE_PL_STOLEN;
114 }
115
116 /**
117 * xe_bo_has_single_placement - check if BO is placed only in one memory location
118 * @bo: The BO
119 *
120 * This function checks whether a given BO is placed in only one memory location.
121 *
122 * Returns: true if the BO is placed in a single memory location, false otherwise.
123 *
124 */
xe_bo_has_single_placement(struct xe_bo * bo)125 bool xe_bo_has_single_placement(struct xe_bo *bo)
126 {
127 return bo->placement.num_placement == 1;
128 }
129
130 /**
131 * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
132 * @bo: The BO
133 *
134 * The stolen memory is accessed through the PCI BAR for both DGFX and some
135 * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
136 *
137 * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
138 */
xe_bo_is_stolen_devmem(struct xe_bo * bo)139 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
140 {
141 return xe_bo_is_stolen(bo) &&
142 GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
143 }
144
145 /**
146 * xe_bo_is_vm_bound - check if BO has any mappings through VM_BIND
147 * @bo: The BO
148 *
149 * Check if a given bo is bound through VM_BIND. This requires the
150 * reservation lock for the BO to be held.
151 *
152 * Returns: boolean
153 */
xe_bo_is_vm_bound(struct xe_bo * bo)154 bool xe_bo_is_vm_bound(struct xe_bo *bo)
155 {
156 xe_bo_assert_held(bo);
157
158 return !list_empty(&bo->ttm.base.gpuva.list);
159 }
160
xe_bo_is_user(struct xe_bo * bo)161 static bool xe_bo_is_user(struct xe_bo *bo)
162 {
163 return bo->flags & XE_BO_FLAG_USER;
164 }
165
166 static struct xe_migrate *
mem_type_to_migrate(struct xe_device * xe,u32 mem_type)167 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
168 {
169 struct xe_tile *tile;
170
171 xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
172 tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
173 return tile->migrate;
174 }
175
res_to_mem_region(struct ttm_resource * res)176 static struct xe_vram_region *res_to_mem_region(struct ttm_resource *res)
177 {
178 struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
179 struct ttm_resource_manager *mgr;
180 struct xe_ttm_vram_mgr *vram_mgr;
181
182 xe_assert(xe, resource_is_vram(res));
183 mgr = ttm_manager_type(&xe->ttm, res->mem_type);
184 vram_mgr = to_xe_ttm_vram_mgr(mgr);
185
186 return container_of(vram_mgr, struct xe_vram_region, ttm);
187 }
188
try_add_system(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)189 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
190 u32 bo_flags, u32 *c)
191 {
192 if (bo_flags & XE_BO_FLAG_SYSTEM) {
193 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
194
195 bo->placements[*c] = (struct ttm_place) {
196 .mem_type = XE_PL_TT,
197 .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
198 TTM_PL_FLAG_FALLBACK : 0,
199 };
200 *c += 1;
201 }
202 }
203
force_contiguous(u32 bo_flags)204 static bool force_contiguous(u32 bo_flags)
205 {
206 if (bo_flags & XE_BO_FLAG_STOLEN)
207 return true; /* users expect this */
208 else if (bo_flags & XE_BO_FLAG_PINNED &&
209 !(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
210 return true; /* needs vmap */
211 else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
212 return true;
213
214 /*
215 * For eviction / restore on suspend / resume objects pinned in VRAM
216 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
217 */
218 return bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS &&
219 bo_flags & XE_BO_FLAG_PINNED;
220 }
221
vram_bo_flag_to_tile_id(struct xe_device * xe,u32 vram_bo_flag)222 static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag)
223 {
224 xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK);
225 xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0);
226
227 return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1;
228 }
229
bo_vram_flags_to_vram_placement(struct xe_device * xe,u32 bo_flags,u32 vram_flag,enum ttm_bo_type type)230 static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag,
231 enum ttm_bo_type type)
232 {
233 u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag);
234
235 xe_assert(xe, tile_id < xe->info.tile_count);
236
237 if (type == ttm_bo_type_kernel && !(bo_flags & XE_BO_FLAG_FORCE_USER_VRAM))
238 return xe->tiles[tile_id].mem.kernel_vram->placement;
239 else
240 return xe->tiles[tile_id].mem.vram->placement;
241 }
242
add_vram(struct xe_device * xe,struct xe_bo * bo,struct ttm_place * places,u32 bo_flags,u32 mem_type,u32 * c)243 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
244 struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
245 {
246 struct ttm_place place = { .mem_type = mem_type };
247 struct ttm_resource_manager *mgr = ttm_manager_type(&xe->ttm, mem_type);
248 struct xe_ttm_vram_mgr *vram_mgr = to_xe_ttm_vram_mgr(mgr);
249
250 struct xe_vram_region *vram;
251 u64 io_size;
252
253 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
254
255 vram = container_of(vram_mgr, struct xe_vram_region, ttm);
256 xe_assert(xe, vram && vram->usable_size);
257 io_size = vram->io_size;
258
259 if (force_contiguous(bo_flags))
260 place.flags |= TTM_PL_FLAG_CONTIGUOUS;
261
262 if (io_size < vram->usable_size) {
263 if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
264 place.fpfn = 0;
265 place.lpfn = io_size >> PAGE_SHIFT;
266 } else {
267 place.flags |= TTM_PL_FLAG_TOPDOWN;
268 }
269 }
270 places[*c] = place;
271 *c += 1;
272 }
273
try_add_vram(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,enum ttm_bo_type type,u32 * c)274 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
275 u32 bo_flags, enum ttm_bo_type type, u32 *c)
276 {
277 u32 vram_flag;
278
279 for_each_set_bo_vram_flag(vram_flag, bo_flags) {
280 u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type);
281
282 add_vram(xe, bo, bo->placements, bo_flags, pl, c);
283 }
284 }
285
try_add_stolen(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)286 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
287 u32 bo_flags, u32 *c)
288 {
289 if (bo_flags & XE_BO_FLAG_STOLEN) {
290 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
291
292 bo->placements[*c] = (struct ttm_place) {
293 .mem_type = XE_PL_STOLEN,
294 .flags = force_contiguous(bo_flags) ?
295 TTM_PL_FLAG_CONTIGUOUS : 0,
296 };
297 *c += 1;
298 }
299 }
300
__xe_bo_placement_for_flags(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,enum ttm_bo_type type)301 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
302 u32 bo_flags, enum ttm_bo_type type)
303 {
304 u32 c = 0;
305
306 try_add_vram(xe, bo, bo_flags, type, &c);
307 try_add_system(xe, bo, bo_flags, &c);
308 try_add_stolen(xe, bo, bo_flags, &c);
309
310 if (!c)
311 return -EINVAL;
312
313 bo->placement = (struct ttm_placement) {
314 .num_placement = c,
315 .placement = bo->placements,
316 };
317
318 return 0;
319 }
320
xe_bo_placement_for_flags(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,enum ttm_bo_type type)321 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
322 u32 bo_flags, enum ttm_bo_type type)
323 {
324 xe_bo_assert_held(bo);
325 return __xe_bo_placement_for_flags(xe, bo, bo_flags, type);
326 }
327
xe_evict_flags(struct ttm_buffer_object * tbo,struct ttm_placement * placement)328 static void xe_evict_flags(struct ttm_buffer_object *tbo,
329 struct ttm_placement *placement)
330 {
331 struct xe_device *xe = container_of(tbo->bdev, typeof(*xe), ttm);
332 bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
333 struct xe_bo *bo;
334
335 if (!xe_bo_is_xe_bo(tbo)) {
336 /* Don't handle scatter gather BOs */
337 if (tbo->type == ttm_bo_type_sg) {
338 placement->num_placement = 0;
339 return;
340 }
341
342 *placement = device_unplugged ? purge_placement : sys_placement;
343 return;
344 }
345
346 bo = ttm_to_xe_bo(tbo);
347 if (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) {
348 *placement = sys_placement;
349 return;
350 }
351
352 if (device_unplugged && !tbo->base.dma_buf) {
353 *placement = purge_placement;
354 return;
355 }
356
357 /*
358 * For xe, sg bos that are evicted to system just triggers a
359 * rebind of the sg list upon subsequent validation to XE_PL_TT.
360 */
361 switch (tbo->resource->mem_type) {
362 case XE_PL_VRAM0:
363 case XE_PL_VRAM1:
364 case XE_PL_STOLEN:
365 *placement = tt_placement;
366 break;
367 case XE_PL_TT:
368 default:
369 *placement = sys_placement;
370 break;
371 }
372 }
373
374 /* struct xe_ttm_tt - Subclassed ttm_tt for xe */
375 struct xe_ttm_tt {
376 struct ttm_tt ttm;
377 struct sg_table sgt;
378 struct sg_table *sg;
379 /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
380 bool purgeable;
381 };
382
xe_tt_map_sg(struct xe_device * xe,struct ttm_tt * tt)383 static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
384 {
385 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
386 unsigned long num_pages = tt->num_pages;
387 int ret;
388
389 XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
390 !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
391
392 if (xe_tt->sg)
393 return 0;
394
395 ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
396 num_pages, 0,
397 (u64)num_pages << PAGE_SHIFT,
398 xe_sg_segment_size(xe->drm.dev),
399 GFP_KERNEL);
400 if (ret)
401 return ret;
402
403 xe_tt->sg = &xe_tt->sgt;
404 ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
405 DMA_ATTR_SKIP_CPU_SYNC);
406 if (ret) {
407 sg_free_table(xe_tt->sg);
408 xe_tt->sg = NULL;
409 return ret;
410 }
411
412 return 0;
413 }
414
xe_tt_unmap_sg(struct xe_device * xe,struct ttm_tt * tt)415 static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
416 {
417 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
418
419 if (xe_tt->sg) {
420 dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
421 DMA_BIDIRECTIONAL, 0);
422 sg_free_table(xe_tt->sg);
423 xe_tt->sg = NULL;
424 }
425 }
426
xe_bo_sg(struct xe_bo * bo)427 struct sg_table *xe_bo_sg(struct xe_bo *bo)
428 {
429 struct ttm_tt *tt = bo->ttm.ttm;
430 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
431
432 return xe_tt->sg;
433 }
434
435 /*
436 * Account ttm pages against the device shrinker's shrinkable and
437 * purgeable counts.
438 */
xe_ttm_tt_account_add(struct xe_device * xe,struct ttm_tt * tt)439 static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
440 {
441 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
442
443 if (xe_tt->purgeable)
444 xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
445 else
446 xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
447 }
448
xe_ttm_tt_account_subtract(struct xe_device * xe,struct ttm_tt * tt)449 static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
450 {
451 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
452
453 if (xe_tt->purgeable)
454 xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
455 else
456 xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
457 }
458
update_global_total_pages(struct ttm_device * ttm_dev,long num_pages)459 static void update_global_total_pages(struct ttm_device *ttm_dev,
460 long num_pages)
461 {
462 #if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
463 struct xe_device *xe = ttm_to_xe_device(ttm_dev);
464 u64 global_total_pages =
465 atomic64_add_return(num_pages, &xe->global_total_pages);
466
467 trace_gpu_mem_total(xe->drm.primary->index, 0,
468 global_total_pages << PAGE_SHIFT);
469 #endif
470 }
471
xe_ttm_tt_create(struct ttm_buffer_object * ttm_bo,u32 page_flags)472 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
473 u32 page_flags)
474 {
475 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
476 struct xe_device *xe = xe_bo_device(bo);
477 struct xe_ttm_tt *xe_tt;
478 struct ttm_tt *tt;
479 unsigned long extra_pages;
480 enum ttm_caching caching = ttm_cached;
481 int err;
482
483 xe_tt = kzalloc_obj(*xe_tt);
484 if (!xe_tt)
485 return NULL;
486
487 tt = &xe_tt->ttm;
488
489 extra_pages = 0;
490 if (xe_bo_needs_ccs_pages(bo))
491 extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
492 PAGE_SIZE);
493
494 /*
495 * DGFX system memory is always WB / ttm_cached, since
496 * other caching modes are only supported on x86. DGFX
497 * GPU system memory accesses are always coherent with the
498 * CPU.
499 */
500 if (!IS_DGFX(xe)) {
501 switch (bo->cpu_caching) {
502 case DRM_XE_GEM_CPU_CACHING_WC:
503 caching = ttm_write_combined;
504 break;
505 default:
506 caching = ttm_cached;
507 break;
508 }
509
510 WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
511
512 /*
513 * For Xe_LPG and beyond up to NVL-P (excluding), PPGTT PTE
514 * lookups are also non-coherent and require a CPU:WC mapping.
515 */
516 if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_FORCE_WC) ||
517 (!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE))
518 caching = ttm_write_combined;
519 }
520
521 if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
522 /*
523 * Valid only for internally-created buffers only, for
524 * which cpu_caching is never initialized.
525 */
526 xe_assert(xe, bo->cpu_caching == 0);
527 caching = ttm_uncached;
528 }
529
530 if (ttm_bo->type != ttm_bo_type_sg)
531 page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
532
533 err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
534 if (err) {
535 kfree(xe_tt);
536 return NULL;
537 }
538
539 if (ttm_bo->type != ttm_bo_type_sg) {
540 err = ttm_tt_setup_backup(tt);
541 if (err) {
542 ttm_tt_fini(tt);
543 kfree(xe_tt);
544 return NULL;
545 }
546 }
547
548 return tt;
549 }
550
xe_ttm_tt_populate(struct ttm_device * ttm_dev,struct ttm_tt * tt,struct ttm_operation_ctx * ctx)551 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
552 struct ttm_operation_ctx *ctx)
553 {
554 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
555 int err;
556
557 /*
558 * dma-bufs are not populated with pages, and the dma-
559 * addresses are set up when moved to XE_PL_TT.
560 */
561 if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
562 !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
563 return 0;
564
565 if (ttm_tt_is_backed_up(tt) && !xe_tt->purgeable) {
566 err = ttm_tt_restore(ttm_dev, tt, ctx);
567 } else {
568 ttm_tt_clear_backed_up(tt);
569 err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
570 }
571 if (err)
572 return err;
573
574 xe_tt->purgeable = false;
575 xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
576 update_global_total_pages(ttm_dev, tt->num_pages);
577
578 return 0;
579 }
580
xe_ttm_tt_unpopulate(struct ttm_device * ttm_dev,struct ttm_tt * tt)581 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
582 {
583 struct xe_device *xe = ttm_to_xe_device(ttm_dev);
584
585 if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
586 !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
587 return;
588
589 xe_tt_unmap_sg(xe, tt);
590
591 ttm_pool_free(&ttm_dev->pool, tt);
592 xe_ttm_tt_account_subtract(xe, tt);
593 update_global_total_pages(ttm_dev, -(long)tt->num_pages);
594 }
595
xe_ttm_tt_destroy(struct ttm_device * ttm_dev,struct ttm_tt * tt)596 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
597 {
598 ttm_tt_fini(tt);
599 kfree(tt);
600 }
601
xe_ttm_resource_visible(struct ttm_resource * mem)602 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
603 {
604 struct xe_ttm_vram_mgr_resource *vres =
605 to_xe_ttm_vram_mgr_resource(mem);
606
607 return vres->used_visible_size == mem->size;
608 }
609
610 /**
611 * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM.
612 * @bo: The BO
613 *
614 * This function checks whether a given BO resides entirely in memory visible from the CPU
615 *
616 * Returns: true if the BO is entirely visible, false otherwise.
617 *
618 */
xe_bo_is_visible_vram(struct xe_bo * bo)619 bool xe_bo_is_visible_vram(struct xe_bo *bo)
620 {
621 if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo)))
622 return false;
623
624 return xe_ttm_resource_visible(bo->ttm.resource);
625 }
626
xe_ttm_io_mem_reserve(struct ttm_device * bdev,struct ttm_resource * mem)627 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
628 struct ttm_resource *mem)
629 {
630 struct xe_device *xe = ttm_to_xe_device(bdev);
631
632 switch (mem->mem_type) {
633 case XE_PL_SYSTEM:
634 case XE_PL_TT:
635 return 0;
636 case XE_PL_VRAM0:
637 case XE_PL_VRAM1: {
638 struct xe_vram_region *vram = res_to_mem_region(mem);
639
640 if (!xe_ttm_resource_visible(mem))
641 return -EINVAL;
642
643 mem->bus.offset = mem->start << PAGE_SHIFT;
644
645 if (vram->mapping &&
646 mem->placement & TTM_PL_FLAG_CONTIGUOUS)
647 mem->bus.addr = (u8 __force *)vram->mapping +
648 mem->bus.offset;
649
650 mem->bus.offset += vram->io_start;
651 mem->bus.is_iomem = true;
652
653 #if !IS_ENABLED(CONFIG_X86)
654 mem->bus.caching = ttm_write_combined;
655 #endif
656 return 0;
657 } case XE_PL_STOLEN:
658 return xe_ttm_stolen_io_mem_reserve(xe, mem);
659 default:
660 return -EINVAL;
661 }
662 }
663
xe_bo_trigger_rebind(struct xe_device * xe,struct xe_bo * bo,const struct ttm_operation_ctx * ctx)664 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
665 const struct ttm_operation_ctx *ctx)
666 {
667 struct dma_resv_iter cursor;
668 struct dma_fence *fence;
669 struct drm_gem_object *obj = &bo->ttm.base;
670 struct drm_gpuvm_bo *vm_bo;
671 bool idle = false;
672 int ret = 0;
673
674 dma_resv_assert_held(bo->ttm.base.resv);
675
676 if (!list_empty(&bo->ttm.base.gpuva.list)) {
677 dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
678 DMA_RESV_USAGE_BOOKKEEP);
679 dma_resv_for_each_fence_unlocked(&cursor, fence)
680 dma_fence_enable_sw_signaling(fence);
681 dma_resv_iter_end(&cursor);
682 }
683
684 drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
685 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
686 struct drm_gpuva *gpuva;
687
688 if (!xe_vm_in_fault_mode(vm)) {
689 drm_gpuvm_bo_evict(vm_bo, true);
690 /*
691 * L2 cache may not be flushed, so ensure that is done in
692 * xe_vm_invalidate_vma() below
693 */
694 if (!xe_device_is_l2_flush_optimized(xe))
695 continue;
696 }
697
698 if (!idle) {
699 long timeout;
700
701 if (ctx->no_wait_gpu &&
702 !dma_resv_test_signaled(bo->ttm.base.resv,
703 DMA_RESV_USAGE_BOOKKEEP))
704 return -EBUSY;
705
706 timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
707 DMA_RESV_USAGE_BOOKKEEP,
708 ctx->interruptible,
709 MAX_SCHEDULE_TIMEOUT);
710 if (!timeout)
711 return -ETIME;
712 if (timeout < 0)
713 return timeout;
714
715 idle = true;
716 }
717
718 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
719 struct xe_vma *vma = gpuva_to_vma(gpuva);
720
721 trace_xe_vma_evict(vma);
722 ret = xe_vm_invalidate_vma(vma);
723 if (XE_WARN_ON(ret))
724 return ret;
725 }
726 }
727
728 return ret;
729 }
730
731 /*
732 * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
733 * Note that unmapping the attachment is deferred to the next
734 * map_attachment time, or to bo destroy (after idling) whichever comes first.
735 * This is to avoid syncing before unmap_attachment(), assuming that the
736 * caller relies on idling the reservation object before moving the
737 * backing store out. Should that assumption not hold, then we will be able
738 * to unconditionally call unmap_attachment() when moving out to system.
739 */
xe_bo_move_dmabuf(struct ttm_buffer_object * ttm_bo,struct ttm_resource * new_res)740 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
741 struct ttm_resource *new_res)
742 {
743 struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
744 struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
745 ttm);
746 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
747 bool device_unplugged = drm_dev_is_unplugged(&xe->drm);
748 struct sg_table *sg;
749
750 xe_assert(xe, attach);
751 xe_assert(xe, ttm_bo->ttm);
752
753 if (device_unplugged && new_res->mem_type == XE_PL_SYSTEM &&
754 ttm_bo->sg) {
755 dma_resv_wait_timeout(ttm_bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
756 false, MAX_SCHEDULE_TIMEOUT);
757 dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
758 ttm_bo->sg = NULL;
759 }
760
761 if (new_res->mem_type == XE_PL_SYSTEM)
762 goto out;
763
764 if (ttm_bo->sg) {
765 dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
766 ttm_bo->sg = NULL;
767 }
768
769 sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
770 if (IS_ERR(sg))
771 return PTR_ERR(sg);
772
773 ttm_bo->sg = sg;
774 xe_tt->sg = sg;
775
776 out:
777 ttm_bo_move_null(ttm_bo, new_res);
778
779 return 0;
780 }
781
782 /**
783 * xe_bo_move_notify - Notify subsystems of a pending move
784 * @bo: The buffer object
785 * @ctx: The struct ttm_operation_ctx controlling locking and waits.
786 *
787 * This function notifies subsystems of an upcoming buffer move.
788 * Upon receiving such a notification, subsystems should schedule
789 * halting access to the underlying pages and optionally add a fence
790 * to the buffer object's dma_resv object, that signals when access is
791 * stopped. The caller will wait on all dma_resv fences before
792 * starting the move.
793 *
794 * A subsystem may commence access to the object after obtaining
795 * bindings to the new backing memory under the object lock.
796 *
797 * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
798 * negative error code on error.
799 */
xe_bo_move_notify(struct xe_bo * bo,const struct ttm_operation_ctx * ctx)800 static int xe_bo_move_notify(struct xe_bo *bo,
801 const struct ttm_operation_ctx *ctx)
802 {
803 struct ttm_buffer_object *ttm_bo = &bo->ttm;
804 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
805 struct ttm_resource *old_mem = ttm_bo->resource;
806 u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
807 int ret;
808
809 /*
810 * If this starts to call into many components, consider
811 * using a notification chain here.
812 */
813
814 if (xe_bo_is_pinned(bo))
815 return -EINVAL;
816
817 xe_bo_vunmap(bo);
818 ret = xe_bo_trigger_rebind(xe, bo, ctx);
819 if (ret)
820 return ret;
821
822 /* Don't call move_notify() for imported dma-bufs. */
823 if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
824 dma_buf_invalidate_mappings(ttm_bo->base.dma_buf);
825
826 /*
827 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
828 * so if we moved from VRAM make sure to unlink this from the userfault
829 * tracking.
830 */
831 if (mem_type_is_vram(old_mem_type)) {
832 mutex_lock(&xe->mem_access.vram_userfault.lock);
833 if (!list_empty(&bo->vram_userfault_link))
834 list_del_init(&bo->vram_userfault_link);
835 mutex_unlock(&xe->mem_access.vram_userfault.lock);
836 }
837
838 return 0;
839 }
840
841 /**
842 * xe_bo_set_purgeable_shrinker() - Update shrinker accounting for purgeable state
843 * @bo: Buffer object
844 * @new_state: New purgeable state being set
845 *
846 * Transfers pages between shrinkable and purgeable buckets when the BO
847 * purgeable state changes. Called automatically from xe_bo_set_purgeable_state().
848 */
xe_bo_set_purgeable_shrinker(struct xe_bo * bo,enum xe_madv_purgeable_state new_state)849 static void xe_bo_set_purgeable_shrinker(struct xe_bo *bo,
850 enum xe_madv_purgeable_state new_state)
851 {
852 struct ttm_buffer_object *ttm_bo = &bo->ttm;
853 struct ttm_tt *tt = ttm_bo->ttm;
854 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
855 struct xe_ttm_tt *xe_tt;
856 long tt_pages;
857
858 xe_bo_assert_held(bo);
859
860 if (!tt || !ttm_tt_is_populated(tt))
861 return;
862
863 xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
864 tt_pages = tt->num_pages;
865
866 if (!xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_DONTNEED) {
867 xe_tt->purgeable = true;
868 /* Transfer pages from shrinkable to purgeable count */
869 xe_shrinker_mod_pages(xe->mem.shrinker, -tt_pages, tt_pages);
870 } else if (xe_tt->purgeable && new_state == XE_MADV_PURGEABLE_WILLNEED) {
871 xe_tt->purgeable = false;
872 /* Transfer pages from purgeable to shrinkable count */
873 xe_shrinker_mod_pages(xe->mem.shrinker, tt_pages, -tt_pages);
874 }
875 }
876
877 /**
878 * xe_bo_set_purgeable_state() - Set BO purgeable state with validation
879 * @bo: Buffer object
880 * @new_state: New purgeable state
881 *
882 * Sets the purgeable state with lockdep assertions and validates state
883 * transitions. Once a BO is PURGED, it cannot transition to any other state.
884 * Invalid transitions are caught with xe_assert(). Shrinker page accounting
885 * is updated automatically.
886 */
xe_bo_set_purgeable_state(struct xe_bo * bo,enum xe_madv_purgeable_state new_state)887 void xe_bo_set_purgeable_state(struct xe_bo *bo,
888 enum xe_madv_purgeable_state new_state)
889 {
890 struct xe_device *xe = xe_bo_device(bo);
891
892 xe_bo_assert_held(bo);
893
894 /* Validate state is one of the known values */
895 xe_assert(xe, new_state == XE_MADV_PURGEABLE_WILLNEED ||
896 new_state == XE_MADV_PURGEABLE_DONTNEED ||
897 new_state == XE_MADV_PURGEABLE_PURGED);
898
899 /* Once purged, always purged - cannot transition out */
900 xe_assert(xe, !(bo->madv_purgeable == XE_MADV_PURGEABLE_PURGED &&
901 new_state != XE_MADV_PURGEABLE_PURGED));
902
903 bo->madv_purgeable = new_state;
904 xe_bo_set_purgeable_shrinker(bo, new_state);
905 }
906
907 /**
908 * xe_ttm_bo_purge() - Purge buffer object backing store
909 * @ttm_bo: The TTM buffer object to purge
910 * @ctx: TTM operation context
911 *
912 * This function purges the backing store of a BO marked as DONTNEED and
913 * triggers rebind to invalidate stale GPU mappings. For fault-mode VMs,
914 * this zaps the PTEs. The next GPU access will trigger a page fault and
915 * perform NULL rebind (scratch pages or clear PTEs based on VM config).
916 *
917 * Return: 0 on success, negative error code on failure
918 */
xe_ttm_bo_purge(struct ttm_buffer_object * ttm_bo,struct ttm_operation_ctx * ctx)919 static int xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
920 {
921 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
922 struct ttm_placement place = {};
923 int ret;
924
925 xe_bo_assert_held(bo);
926
927 if (!ttm_bo->ttm)
928 return 0;
929
930 if (!xe_bo_madv_is_dontneed(bo))
931 return 0;
932
933 /*
934 * Use the standard pre-move hook so we share the same cleanup/invalidate
935 * path as migrations: drop any CPU vmap and schedule the necessary GPU
936 * unbind/rebind work.
937 *
938 * This must be called before ttm_bo_validate() frees the pages.
939 * May fail in no-wait contexts (fault/shrinker) or if the BO is
940 * pinned. Keep state unchanged on failure so we don't end up "PURGED"
941 * with stale mappings.
942 */
943 ret = xe_bo_move_notify(bo, ctx);
944 if (ret)
945 return ret;
946
947 ret = ttm_bo_validate(ttm_bo, &place, ctx);
948 if (ret)
949 return ret;
950
951 /* Commit the state transition only once invalidation was queued */
952 xe_bo_set_purgeable_state(bo, XE_MADV_PURGEABLE_PURGED);
953
954 return 0;
955 }
956
xe_bo_move(struct ttm_buffer_object * ttm_bo,bool evict,struct ttm_operation_ctx * ctx,struct ttm_resource * new_mem,struct ttm_place * hop)957 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
958 struct ttm_operation_ctx *ctx,
959 struct ttm_resource *new_mem,
960 struct ttm_place *hop)
961 {
962 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
963 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
964 struct ttm_resource *old_mem = ttm_bo->resource;
965 u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
966 struct ttm_tt *ttm = ttm_bo->ttm;
967 struct xe_migrate *migrate = NULL;
968 struct dma_fence *fence;
969 bool move_lacks_source;
970 bool tt_has_data;
971 bool needs_clear;
972 bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
973 ttm && ttm_tt_is_populated(ttm)) ? true : false;
974 int ret = 0;
975
976 /*
977 * Purge only non-shared BOs explicitly marked DONTNEED by userspace.
978 * The move_notify callback will handle invalidation asynchronously.
979 */
980 if (evict && xe_bo_madv_is_dontneed(bo)) {
981 ret = xe_ttm_bo_purge(ttm_bo, ctx);
982 if (ret)
983 return ret;
984
985 /* Free the unused eviction destination resource */
986 ttm_resource_free(ttm_bo, &new_mem);
987 return 0;
988 }
989
990 /* Bo creation path, moving to system or TT. */
991 if ((!old_mem && ttm) && !handle_system_ccs) {
992 if (new_mem->mem_type == XE_PL_TT)
993 ret = xe_tt_map_sg(xe, ttm);
994 if (!ret)
995 ttm_bo_move_null(ttm_bo, new_mem);
996 goto out;
997 }
998
999 if (ttm_bo->type == ttm_bo_type_sg) {
1000 if (new_mem->mem_type == XE_PL_SYSTEM)
1001 ret = xe_bo_move_notify(bo, ctx);
1002 if (!ret)
1003 ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
1004 return ret;
1005 }
1006
1007 tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
1008
1009 move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
1010 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
1011
1012 needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
1013 (!ttm && ttm_bo->type == ttm_bo_type_device);
1014
1015 if (new_mem->mem_type == XE_PL_TT) {
1016 ret = xe_tt_map_sg(xe, ttm);
1017 if (ret)
1018 goto out;
1019 }
1020
1021 if ((move_lacks_source && !needs_clear)) {
1022 ttm_bo_move_null(ttm_bo, new_mem);
1023 goto out;
1024 }
1025
1026 if (!move_lacks_source && (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR) &&
1027 new_mem->mem_type == XE_PL_SYSTEM) {
1028 ret = xe_svm_bo_evict(bo);
1029 if (!ret) {
1030 drm_dbg(&xe->drm, "Evict system allocator BO success\n");
1031 ttm_bo_move_null(ttm_bo, new_mem);
1032 } else {
1033 drm_dbg(&xe->drm, "Evict system allocator BO failed=%pe\n",
1034 ERR_PTR(ret));
1035 }
1036
1037 goto out;
1038 }
1039
1040 if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
1041 ttm_bo_move_null(ttm_bo, new_mem);
1042 goto out;
1043 }
1044
1045 /*
1046 * Failed multi-hop where the old_mem is still marked as
1047 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
1048 */
1049 if (old_mem_type == XE_PL_TT &&
1050 new_mem->mem_type == XE_PL_TT) {
1051 ttm_bo_move_null(ttm_bo, new_mem);
1052 goto out;
1053 }
1054
1055 if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
1056 ret = xe_bo_move_notify(bo, ctx);
1057 if (ret)
1058 goto out;
1059 }
1060
1061 if (old_mem_type == XE_PL_TT &&
1062 new_mem->mem_type == XE_PL_SYSTEM) {
1063 long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
1064 DMA_RESV_USAGE_BOOKKEEP,
1065 false,
1066 MAX_SCHEDULE_TIMEOUT);
1067 if (timeout < 0) {
1068 ret = timeout;
1069 goto out;
1070 }
1071
1072 if (!handle_system_ccs) {
1073 ttm_bo_move_null(ttm_bo, new_mem);
1074 goto out;
1075 }
1076 }
1077
1078 if (!move_lacks_source &&
1079 ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
1080 (mem_type_is_vram(old_mem_type) &&
1081 new_mem->mem_type == XE_PL_SYSTEM))) {
1082 hop->fpfn = 0;
1083 hop->lpfn = 0;
1084 hop->mem_type = XE_PL_TT;
1085 hop->flags = TTM_PL_FLAG_TEMPORARY;
1086 ret = -EMULTIHOP;
1087 goto out;
1088 }
1089
1090 if (bo->tile)
1091 migrate = bo->tile->migrate;
1092 else if (resource_is_vram(new_mem))
1093 migrate = mem_type_to_migrate(xe, new_mem->mem_type);
1094 else if (mem_type_is_vram(old_mem_type))
1095 migrate = mem_type_to_migrate(xe, old_mem_type);
1096 else
1097 migrate = xe->tiles[0].migrate;
1098
1099 xe_assert(xe, migrate);
1100 trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
1101 if (xe_rpm_reclaim_safe(xe)) {
1102 /*
1103 * We might be called through swapout in the validation path of
1104 * another TTM device, so acquire rpm here.
1105 */
1106 xe_pm_runtime_get(xe);
1107 } else {
1108 drm_WARN_ON(&xe->drm, handle_system_ccs);
1109 xe_pm_runtime_get_noresume(xe);
1110 }
1111
1112 if (move_lacks_source) {
1113 u32 flags = 0;
1114
1115 if (mem_type_is_vram(new_mem->mem_type))
1116 flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
1117 else if (handle_system_ccs)
1118 flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
1119
1120 fence = xe_migrate_clear(migrate, bo, new_mem, flags);
1121 } else {
1122 fence = xe_migrate_copy(migrate, bo, bo, old_mem, new_mem,
1123 handle_system_ccs);
1124 }
1125 if (IS_ERR(fence)) {
1126 ret = PTR_ERR(fence);
1127 xe_pm_runtime_put(xe);
1128 goto out;
1129 }
1130 if (!move_lacks_source) {
1131 ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict, true,
1132 new_mem);
1133 if (ret) {
1134 dma_fence_wait(fence, false);
1135 ttm_bo_move_null(ttm_bo, new_mem);
1136 ret = 0;
1137 }
1138 } else {
1139 /*
1140 * ttm_bo_move_accel_cleanup() may blow up if
1141 * bo->resource == NULL, so just attach the
1142 * fence and set the new resource.
1143 */
1144 dma_resv_add_fence(ttm_bo->base.resv, fence,
1145 DMA_RESV_USAGE_KERNEL);
1146 ttm_bo_move_null(ttm_bo, new_mem);
1147 }
1148
1149 dma_fence_put(fence);
1150 xe_pm_runtime_put(xe);
1151
1152 /*
1153 * CCS meta data is migrated from TT -> SMEM. So, let us detach the
1154 * BBs from BO as it is no longer needed.
1155 */
1156 if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
1157 new_mem->mem_type == XE_PL_SYSTEM)
1158 xe_sriov_vf_ccs_detach_bo(bo);
1159
1160 if (IS_VF_CCS_READY(xe) &&
1161 ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
1162 (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
1163 handle_system_ccs)
1164 ret = xe_sriov_vf_ccs_attach_bo(bo);
1165
1166 out:
1167 if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
1168 ttm_bo->ttm) {
1169 long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
1170 DMA_RESV_USAGE_KERNEL,
1171 false,
1172 MAX_SCHEDULE_TIMEOUT);
1173 if (timeout < 0)
1174 ret = timeout;
1175
1176 if (IS_VF_CCS_READY(xe))
1177 xe_sriov_vf_ccs_detach_bo(bo);
1178
1179 xe_tt_unmap_sg(xe, ttm_bo->ttm);
1180 }
1181
1182 return ret;
1183 }
1184
xe_bo_shrink_purge(struct ttm_operation_ctx * ctx,struct ttm_buffer_object * bo,unsigned long * scanned)1185 static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
1186 struct ttm_buffer_object *bo,
1187 unsigned long *scanned)
1188 {
1189 struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1190 struct ttm_tt *tt = bo->ttm;
1191 long lret;
1192
1193 /* Fake move to system, without copying data. */
1194 if (bo->resource->mem_type != XE_PL_SYSTEM) {
1195 struct ttm_resource *new_resource;
1196
1197 lret = ttm_bo_wait_ctx(bo, ctx);
1198 if (lret)
1199 return lret;
1200
1201 lret = ttm_bo_mem_space(bo, &sys_placement, &new_resource, ctx);
1202 if (lret)
1203 return lret;
1204
1205 xe_tt_unmap_sg(xe, bo->ttm);
1206 ttm_bo_move_null(bo, new_resource);
1207 }
1208
1209 *scanned += bo->ttm->num_pages;
1210 lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1211 {.purge = true,
1212 .writeback = false,
1213 .allow_move = false});
1214
1215 if (lret > 0) {
1216 xe_ttm_tt_account_subtract(xe, bo->ttm);
1217 update_global_total_pages(bo->bdev, -(long)tt->num_pages);
1218 }
1219
1220 return lret;
1221 }
1222
1223 static bool
xe_bo_eviction_valuable(struct ttm_buffer_object * bo,const struct ttm_place * place)1224 xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
1225 {
1226 struct drm_gpuvm_bo *vm_bo;
1227
1228 if (!ttm_bo_eviction_valuable(bo, place))
1229 return false;
1230
1231 if (!xe_bo_is_xe_bo(bo))
1232 return true;
1233
1234 drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
1235 if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
1236 return false;
1237 }
1238
1239 return true;
1240 }
1241
1242 /**
1243 * xe_bo_shrink() - Try to shrink an xe bo.
1244 * @ctx: The struct ttm_operation_ctx used for shrinking.
1245 * @bo: The TTM buffer object whose pages to shrink.
1246 * @flags: Flags governing the shrink behaviour.
1247 * @scanned: Pointer to a counter of the number of pages
1248 * attempted to shrink.
1249 *
1250 * Try to shrink- or purge a bo, and if it succeeds, unmap dma.
1251 * Note that we need to be able to handle also non xe bos
1252 * (ghost bos), but only if the struct ttm_tt is embedded in
1253 * a struct xe_ttm_tt. When the function attempts to shrink
1254 * the pages of a buffer object, The value pointed to by @scanned
1255 * is updated.
1256 *
1257 * Return: The number of pages shrunken or purged, or negative error
1258 * code on failure.
1259 */
xe_bo_shrink(struct ttm_operation_ctx * ctx,struct ttm_buffer_object * bo,const struct xe_bo_shrink_flags flags,unsigned long * scanned)1260 long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
1261 const struct xe_bo_shrink_flags flags,
1262 unsigned long *scanned)
1263 {
1264 struct ttm_tt *tt = bo->ttm;
1265 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
1266 struct ttm_place place = {.mem_type = bo->resource->mem_type};
1267 struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
1268 struct xe_device *xe = ttm_to_xe_device(bo->bdev);
1269 bool needs_rpm;
1270 long lret = 0L;
1271
1272 if (!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE) ||
1273 (flags.purge && !xe_tt->purgeable))
1274 return -EBUSY;
1275
1276 if (!xe_bo_eviction_valuable(bo, &place))
1277 return -EBUSY;
1278
1279 if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
1280 return xe_bo_shrink_purge(ctx, bo, scanned);
1281
1282 if (xe_tt->purgeable) {
1283 if (bo->resource->mem_type != XE_PL_SYSTEM)
1284 lret = xe_bo_move_notify(xe_bo, ctx);
1285 if (!lret)
1286 lret = xe_bo_shrink_purge(ctx, bo, scanned);
1287 if (lret > 0 && xe_bo_madv_is_dontneed(xe_bo))
1288 xe_bo_set_purgeable_state(xe_bo,
1289 XE_MADV_PURGEABLE_PURGED);
1290 goto out_unref;
1291 }
1292
1293 /* System CCS needs gpu copy when moving PL_TT -> PL_SYSTEM */
1294 needs_rpm = (!IS_DGFX(xe) && bo->resource->mem_type != XE_PL_SYSTEM &&
1295 xe_bo_needs_ccs_pages(xe_bo));
1296 if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
1297 goto out_unref;
1298
1299 *scanned += tt->num_pages;
1300 lret = ttm_bo_shrink(ctx, bo, (struct ttm_bo_shrink_flags)
1301 {.purge = false,
1302 .writeback = flags.writeback,
1303 .allow_move = true});
1304 if (needs_rpm)
1305 xe_pm_runtime_put(xe);
1306
1307 if (lret > 0) {
1308 xe_ttm_tt_account_subtract(xe, tt);
1309 update_global_total_pages(bo->bdev, -(long)tt->num_pages);
1310 }
1311
1312 out_unref:
1313 xe_bo_put(xe_bo);
1314
1315 return lret;
1316 }
1317
1318 /**
1319 * xe_bo_notifier_prepare_pinned() - Prepare a pinned VRAM object to be backed
1320 * up in system memory.
1321 * @bo: The buffer object to prepare.
1322 *
1323 * On successful completion, the object backup pages are allocated. Expectation
1324 * is that this is called from the PM notifier, prior to suspend/hibernation.
1325 *
1326 * Return: 0 on success. Negative error code on failure.
1327 */
xe_bo_notifier_prepare_pinned(struct xe_bo * bo)1328 int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
1329 {
1330 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1331 struct xe_validation_ctx ctx;
1332 struct drm_exec exec;
1333 struct xe_bo *backup;
1334 int ret = 0;
1335
1336 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1337 ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1338 drm_exec_retry_on_contention(&exec);
1339 xe_assert(xe, !ret);
1340 xe_assert(xe, !bo->backup_obj);
1341
1342 /*
1343 * Since this is called from the PM notifier we might have raced with
1344 * someone unpinning this after we dropped the pinned list lock and
1345 * grabbing the above bo lock.
1346 */
1347 if (!xe_bo_is_pinned(bo))
1348 break;
1349
1350 if (!xe_bo_is_vram(bo))
1351 break;
1352
1353 if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1354 break;
1355
1356 backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
1357 DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1358 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1359 XE_BO_FLAG_PINNED, &exec);
1360 if (IS_ERR(backup)) {
1361 drm_exec_retry_on_contention(&exec);
1362 ret = PTR_ERR(backup);
1363 xe_validation_retry_on_oom(&ctx, &ret);
1364 break;
1365 }
1366
1367 backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1368 ttm_bo_pin(&backup->ttm);
1369 bo->backup_obj = backup;
1370 }
1371
1372 return ret;
1373 }
1374
1375 /**
1376 * xe_bo_notifier_unprepare_pinned() - Undo the previous prepare operation.
1377 * @bo: The buffer object to undo the prepare for.
1378 *
1379 * Always returns 0. The backup object is removed, if still present. Expectation
1380 * it that this called from the PM notifier when undoing the prepare step.
1381 *
1382 * Return: Always returns 0.
1383 */
xe_bo_notifier_unprepare_pinned(struct xe_bo * bo)1384 int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
1385 {
1386 xe_bo_lock(bo, false);
1387 if (bo->backup_obj) {
1388 ttm_bo_unpin(&bo->backup_obj->ttm);
1389 xe_bo_put(bo->backup_obj);
1390 bo->backup_obj = NULL;
1391 }
1392 xe_bo_unlock(bo);
1393
1394 return 0;
1395 }
1396
xe_bo_evict_pinned_copy(struct xe_bo * bo,struct xe_bo * backup)1397 static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
1398 {
1399 struct xe_device *xe = xe_bo_device(bo);
1400 bool unmap = false;
1401 int ret = 0;
1402
1403 if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1404 struct xe_migrate *migrate;
1405 struct dma_fence *fence;
1406
1407 if (bo->tile)
1408 migrate = bo->tile->migrate;
1409 else
1410 migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1411
1412 xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
1413 ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1414 if (ret)
1415 goto out_backup;
1416
1417 fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
1418 backup->ttm.resource, false);
1419 if (IS_ERR(fence)) {
1420 ret = PTR_ERR(fence);
1421 goto out_backup;
1422 }
1423
1424 dma_resv_add_fence(bo->ttm.base.resv, fence,
1425 DMA_RESV_USAGE_KERNEL);
1426 dma_fence_put(fence);
1427 } else {
1428 ret = xe_bo_vmap(backup);
1429 if (ret)
1430 goto out_backup;
1431
1432 if (iosys_map_is_null(&bo->vmap)) {
1433 ret = xe_bo_vmap(bo);
1434 if (ret)
1435 goto out_vunmap;
1436 unmap = true;
1437 }
1438
1439 xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
1440 xe_bo_size(bo));
1441 }
1442
1443 if (!bo->backup_obj)
1444 bo->backup_obj = backup;
1445 out_vunmap:
1446 xe_bo_vunmap(backup);
1447 out_backup:
1448 if (unmap)
1449 xe_bo_vunmap(bo);
1450
1451 return ret;
1452 }
1453
1454 /**
1455 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
1456 * @bo: The buffer object to move.
1457 *
1458 * On successful completion, the object memory will be moved to system memory.
1459 *
1460 * This is needed to for special handling of pinned VRAM object during
1461 * suspend-resume.
1462 *
1463 * Return: 0 on success. Negative error code on failure.
1464 */
xe_bo_evict_pinned(struct xe_bo * bo)1465 int xe_bo_evict_pinned(struct xe_bo *bo)
1466 {
1467 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1468 struct xe_validation_ctx ctx;
1469 struct drm_exec exec;
1470 struct xe_bo *backup = bo->backup_obj;
1471 bool backup_created = false;
1472 int ret = 0;
1473
1474 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
1475 ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
1476 drm_exec_retry_on_contention(&exec);
1477 xe_assert(xe, !ret);
1478
1479 if (WARN_ON(!bo->ttm.resource)) {
1480 ret = -EINVAL;
1481 break;
1482 }
1483
1484 if (WARN_ON(!xe_bo_is_pinned(bo))) {
1485 ret = -EINVAL;
1486 break;
1487 }
1488
1489 if (!xe_bo_is_vram(bo))
1490 break;
1491
1492 if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
1493 break;
1494
1495 if (!backup) {
1496 backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
1497 xe_bo_size(bo),
1498 DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
1499 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
1500 XE_BO_FLAG_PINNED, &exec);
1501 if (IS_ERR(backup)) {
1502 drm_exec_retry_on_contention(&exec);
1503 ret = PTR_ERR(backup);
1504 xe_validation_retry_on_oom(&ctx, &ret);
1505 break;
1506 }
1507 backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
1508 backup_created = true;
1509 }
1510
1511 ret = xe_bo_evict_pinned_copy(bo, backup);
1512 }
1513
1514 if (ret && backup_created)
1515 xe_bo_put(backup);
1516
1517 return ret;
1518 }
1519
1520 /**
1521 * xe_bo_restore_pinned() - Restore a pinned VRAM object
1522 * @bo: The buffer object to move.
1523 *
1524 * On successful completion, the object memory will be moved back to VRAM.
1525 *
1526 * This is needed to for special handling of pinned VRAM object during
1527 * suspend-resume.
1528 *
1529 * Return: 0 on success. Negative error code on failure.
1530 */
xe_bo_restore_pinned(struct xe_bo * bo)1531 int xe_bo_restore_pinned(struct xe_bo *bo)
1532 {
1533 struct ttm_operation_ctx ctx = {
1534 .interruptible = false,
1535 .gfp_retry_mayfail = false,
1536 };
1537 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
1538 struct xe_bo *backup = bo->backup_obj;
1539 bool unmap = false;
1540 int ret;
1541
1542 if (!backup)
1543 return 0;
1544
1545 xe_bo_lock(bo, false);
1546
1547 if (!xe_bo_is_pinned(backup)) {
1548 ret = ttm_bo_validate(&backup->ttm, &backup->placement, &ctx);
1549 if (ret)
1550 goto out_unlock_bo;
1551 }
1552
1553 if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
1554 struct xe_migrate *migrate;
1555 struct dma_fence *fence;
1556
1557 if (bo->tile)
1558 migrate = bo->tile->migrate;
1559 else
1560 migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
1561
1562 ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
1563 if (ret)
1564 goto out_unlock_bo;
1565
1566 fence = xe_migrate_copy(migrate, backup, bo,
1567 backup->ttm.resource, bo->ttm.resource,
1568 false);
1569 if (IS_ERR(fence)) {
1570 ret = PTR_ERR(fence);
1571 goto out_unlock_bo;
1572 }
1573
1574 dma_resv_add_fence(bo->ttm.base.resv, fence,
1575 DMA_RESV_USAGE_KERNEL);
1576 dma_fence_put(fence);
1577 } else {
1578 ret = xe_bo_vmap(backup);
1579 if (ret)
1580 goto out_unlock_bo;
1581
1582 if (iosys_map_is_null(&bo->vmap)) {
1583 ret = xe_bo_vmap(bo);
1584 if (ret)
1585 goto out_backup;
1586 unmap = true;
1587 }
1588
1589 xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
1590 xe_bo_size(bo));
1591 }
1592
1593 bo->backup_obj = NULL;
1594
1595 out_backup:
1596 xe_bo_vunmap(backup);
1597 if (!bo->backup_obj) {
1598 if (xe_bo_is_pinned(backup))
1599 ttm_bo_unpin(&backup->ttm);
1600 xe_bo_put(backup);
1601 }
1602 out_unlock_bo:
1603 if (unmap)
1604 xe_bo_vunmap(bo);
1605 xe_bo_unlock(bo);
1606 return ret;
1607 }
1608
xe_bo_dma_unmap_pinned(struct xe_bo * bo)1609 int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
1610 {
1611 struct ttm_buffer_object *ttm_bo = &bo->ttm;
1612 struct ttm_tt *tt = ttm_bo->ttm;
1613
1614 if (tt) {
1615 struct xe_ttm_tt *xe_tt = container_of(tt, typeof(*xe_tt), ttm);
1616
1617 if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1618 dma_buf_unmap_attachment(ttm_bo->base.import_attach,
1619 ttm_bo->sg,
1620 DMA_BIDIRECTIONAL);
1621 ttm_bo->sg = NULL;
1622 xe_tt->sg = NULL;
1623 } else if (xe_tt->sg) {
1624 dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
1625 xe_tt->sg,
1626 DMA_BIDIRECTIONAL, 0);
1627 sg_free_table(xe_tt->sg);
1628 xe_tt->sg = NULL;
1629 }
1630 }
1631
1632 return 0;
1633 }
1634
xe_ttm_io_mem_pfn(struct ttm_buffer_object * ttm_bo,unsigned long page_offset)1635 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1636 unsigned long page_offset)
1637 {
1638 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1639 struct xe_res_cursor cursor;
1640 struct xe_vram_region *vram;
1641
1642 if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1643 return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1644
1645 vram = res_to_mem_region(ttm_bo->resource);
1646 xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1647 return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1648 }
1649
1650 static void __xe_bo_vunmap(struct xe_bo *bo);
1651
1652 /*
1653 * TODO: Move this function to TTM so we don't rely on how TTM does its
1654 * locking, thereby abusing TTM internals.
1655 */
xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object * ttm_bo)1656 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1657 {
1658 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1659 bool locked;
1660
1661 xe_assert(xe, !kref_read(&ttm_bo->kref));
1662
1663 /*
1664 * We can typically only race with TTM trylocking under the
1665 * lru_lock, which will immediately be unlocked again since
1666 * the ttm_bo refcount is zero at this point. So trylocking *should*
1667 * always succeed here, as long as we hold the lru lock.
1668 */
1669 spin_lock(&ttm_bo->bdev->lru_lock);
1670 locked = dma_resv_trylock(&ttm_bo->base._resv);
1671 spin_unlock(&ttm_bo->bdev->lru_lock);
1672 xe_assert(xe, locked);
1673
1674 return locked;
1675 }
1676
xe_ttm_bo_release_notify(struct ttm_buffer_object * ttm_bo)1677 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1678 {
1679 struct dma_resv_iter cursor;
1680 struct dma_fence *fence;
1681 struct dma_fence *replacement = NULL;
1682 struct xe_bo *bo;
1683
1684 if (!xe_bo_is_xe_bo(ttm_bo))
1685 return;
1686
1687 bo = ttm_to_xe_bo(ttm_bo);
1688 xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1689
1690 if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1691 return;
1692
1693 /*
1694 * Scrub the preempt fences if any. The unbind fence is already
1695 * attached to the resv.
1696 * TODO: Don't do this for external bos once we scrub them after
1697 * unbind.
1698 */
1699 dma_resv_for_each_fence(&cursor, &ttm_bo->base._resv,
1700 DMA_RESV_USAGE_BOOKKEEP, fence) {
1701 if (xe_fence_is_xe_preempt(fence) &&
1702 !dma_fence_is_signaled(fence)) {
1703 if (!replacement)
1704 replacement = dma_fence_get_stub();
1705
1706 dma_resv_replace_fences(&ttm_bo->base._resv,
1707 fence->context,
1708 replacement,
1709 DMA_RESV_USAGE_BOOKKEEP);
1710 }
1711 }
1712 dma_fence_put(replacement);
1713
1714 dma_resv_unlock(&ttm_bo->base._resv);
1715 }
1716
xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object * ttm_bo)1717 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1718 {
1719 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1720
1721 if (!xe_bo_is_xe_bo(ttm_bo))
1722 return;
1723
1724 if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
1725 xe_sriov_vf_ccs_detach_bo(bo);
1726
1727 /*
1728 * Object is idle and about to be destroyed. Release the
1729 * dma-buf attachment.
1730 */
1731 if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1732 struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1733 struct xe_ttm_tt, ttm);
1734
1735 dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1736 DMA_BIDIRECTIONAL);
1737 ttm_bo->sg = NULL;
1738 xe_tt->sg = NULL;
1739 }
1740 }
1741
xe_ttm_bo_swap_notify(struct ttm_buffer_object * ttm_bo)1742 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1743 {
1744 struct ttm_operation_ctx ctx = {
1745 .interruptible = false,
1746 .gfp_retry_mayfail = false,
1747 };
1748
1749 if (ttm_bo->ttm) {
1750 struct xe_ttm_tt *xe_tt =
1751 container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1752
1753 if (xe_tt->purgeable)
1754 xe_ttm_bo_purge(ttm_bo, &ctx);
1755 }
1756 }
1757
xe_ttm_access_memory(struct ttm_buffer_object * ttm_bo,unsigned long offset,void * buf,int len,int write)1758 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1759 unsigned long offset, void *buf, int len,
1760 int write)
1761 {
1762 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1763 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1764 struct iosys_map vmap;
1765 struct xe_res_cursor cursor;
1766 struct xe_vram_region *vram;
1767 int bytes_left = len;
1768 int err = 0;
1769
1770 xe_bo_assert_held(bo);
1771 xe_device_assert_mem_access(xe);
1772
1773 if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1774 return -EIO;
1775
1776 if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) {
1777 struct xe_migrate *migrate =
1778 mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
1779
1780 err = xe_migrate_access_memory(migrate, bo, offset, buf, len,
1781 write);
1782 goto out;
1783 }
1784
1785 vram = res_to_mem_region(ttm_bo->resource);
1786 xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1787 xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
1788
1789 do {
1790 unsigned long page_offset = (offset & ~PAGE_MASK);
1791 int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1792
1793 iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1794 cursor.start);
1795 if (write)
1796 xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1797 else
1798 xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1799
1800 buf += byte_count;
1801 offset += byte_count;
1802 bytes_left -= byte_count;
1803 if (bytes_left)
1804 xe_res_next(&cursor, PAGE_SIZE);
1805 } while (bytes_left);
1806
1807 out:
1808 return err ?: len;
1809 }
1810
1811 const struct ttm_device_funcs xe_ttm_funcs = {
1812 .ttm_tt_create = xe_ttm_tt_create,
1813 .ttm_tt_populate = xe_ttm_tt_populate,
1814 .ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1815 .ttm_tt_destroy = xe_ttm_tt_destroy,
1816 .evict_flags = xe_evict_flags,
1817 .move = xe_bo_move,
1818 .io_mem_reserve = xe_ttm_io_mem_reserve,
1819 .io_mem_pfn = xe_ttm_io_mem_pfn,
1820 .access_memory = xe_ttm_access_memory,
1821 .release_notify = xe_ttm_bo_release_notify,
1822 .eviction_valuable = xe_bo_eviction_valuable,
1823 .delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1824 .swap_notify = xe_ttm_bo_swap_notify,
1825 };
1826
xe_ttm_bo_destroy(struct ttm_buffer_object * ttm_bo)1827 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1828 {
1829 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1830 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1831 struct xe_tile *tile;
1832 u8 id;
1833
1834 if (bo->ttm.base.import_attach)
1835 drm_prime_gem_destroy(&bo->ttm.base, NULL);
1836 drm_gem_object_release(&bo->ttm.base);
1837
1838 xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1839
1840 for_each_tile(tile, xe, id)
1841 if (bo->ggtt_node[id])
1842 xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1843
1844 #ifdef CONFIG_PROC_FS
1845 if (bo->client)
1846 xe_drm_client_remove_bo(bo);
1847 #endif
1848
1849 if (bo->vm && xe_bo_is_user(bo))
1850 xe_vm_put(bo->vm);
1851
1852 if (bo->parent_obj)
1853 xe_bo_put(bo->parent_obj);
1854
1855 mutex_lock(&xe->mem_access.vram_userfault.lock);
1856 if (!list_empty(&bo->vram_userfault_link))
1857 list_del(&bo->vram_userfault_link);
1858 mutex_unlock(&xe->mem_access.vram_userfault.lock);
1859
1860 kfree(bo);
1861 }
1862
xe_gem_object_free(struct drm_gem_object * obj)1863 static void xe_gem_object_free(struct drm_gem_object *obj)
1864 {
1865 /* Our BO reference counting scheme works as follows:
1866 *
1867 * The gem object kref is typically used throughout the driver,
1868 * and the gem object holds a ttm_buffer_object refcount, so
1869 * that when the last gem object reference is put, which is when
1870 * we end up in this function, we put also that ttm_buffer_object
1871 * refcount. Anything using gem interfaces is then no longer
1872 * allowed to access the object in a way that requires a gem
1873 * refcount, including locking the object.
1874 *
1875 * driver ttm callbacks is allowed to use the ttm_buffer_object
1876 * refcount directly if needed.
1877 */
1878 __xe_bo_vunmap(gem_to_xe_bo(obj));
1879 ttm_bo_fini(container_of(obj, struct ttm_buffer_object, base));
1880 }
1881
xe_gem_object_close(struct drm_gem_object * obj,struct drm_file * file_priv)1882 static void xe_gem_object_close(struct drm_gem_object *obj,
1883 struct drm_file *file_priv)
1884 {
1885 struct xe_bo *bo = gem_to_xe_bo(obj);
1886
1887 if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1888 xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1889
1890 xe_bo_lock(bo, false);
1891 ttm_bo_set_bulk_move(&bo->ttm, NULL);
1892 xe_bo_unlock(bo);
1893 }
1894 }
1895
should_migrate_to_smem(struct xe_bo * bo)1896 static bool should_migrate_to_smem(struct xe_bo *bo)
1897 {
1898 /*
1899 * NOTE: The following atomic checks are platform-specific. For example,
1900 * if a device supports CXL atomics, these may not be necessary or
1901 * may behave differently.
1902 */
1903
1904 return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
1905 bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
1906 }
1907
xe_bo_wait_usage_kernel(struct xe_bo * bo,struct ttm_operation_ctx * ctx)1908 static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
1909 {
1910 long lerr;
1911
1912 if (ctx->no_wait_gpu)
1913 return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
1914 0 : -EBUSY;
1915
1916 lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
1917 ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
1918 if (lerr < 0)
1919 return lerr;
1920 if (lerr == 0)
1921 return -EBUSY;
1922
1923 return 0;
1924 }
1925
1926 /* Populate the bo if swapped out, or migrate if the access mode requires that. */
xe_bo_fault_migrate(struct xe_bo * bo,struct ttm_operation_ctx * ctx,struct drm_exec * exec)1927 static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
1928 struct drm_exec *exec)
1929 {
1930 struct ttm_buffer_object *tbo = &bo->ttm;
1931 int err = 0;
1932
1933 if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
1934 err = xe_bo_wait_usage_kernel(bo, ctx);
1935 if (!err)
1936 err = ttm_bo_populate(&bo->ttm, ctx);
1937 } else if (should_migrate_to_smem(bo)) {
1938 xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
1939 err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
1940 }
1941
1942 return err;
1943 }
1944
1945 /* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
__xe_bo_cpu_fault(struct vm_fault * vmf,struct xe_device * xe,struct xe_bo * bo)1946 static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
1947 {
1948 vm_fault_t ret;
1949
1950 trace_xe_bo_cpu_fault(bo);
1951
1952 ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1953 TTM_BO_VM_NUM_PREFAULT);
1954 /*
1955 * When TTM is actually called to insert PTEs, ensure no blocking conditions
1956 * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
1957 */
1958 xe_assert(xe, ret != VM_FAULT_RETRY);
1959
1960 if (ret == VM_FAULT_NOPAGE &&
1961 mem_type_is_vram(bo->ttm.resource->mem_type)) {
1962 mutex_lock(&xe->mem_access.vram_userfault.lock);
1963 if (list_empty(&bo->vram_userfault_link))
1964 list_add(&bo->vram_userfault_link,
1965 &xe->mem_access.vram_userfault.list);
1966 mutex_unlock(&xe->mem_access.vram_userfault.lock);
1967 }
1968
1969 return ret;
1970 }
1971
xe_err_to_fault_t(int err)1972 static vm_fault_t xe_err_to_fault_t(int err)
1973 {
1974 switch (err) {
1975 case 0:
1976 case -EINTR:
1977 case -ERESTARTSYS:
1978 case -EAGAIN:
1979 return VM_FAULT_NOPAGE;
1980 case -ENOMEM:
1981 case -ENOSPC:
1982 return VM_FAULT_OOM;
1983 default:
1984 break;
1985 }
1986 return VM_FAULT_SIGBUS;
1987 }
1988
xe_ttm_bo_is_imported(struct ttm_buffer_object * tbo)1989 static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
1990 {
1991 dma_resv_assert_held(tbo->base.resv);
1992
1993 return tbo->ttm &&
1994 (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
1995 TTM_TT_FLAG_EXTERNAL;
1996 }
1997
xe_bo_cpu_fault_fastpath(struct vm_fault * vmf,struct xe_device * xe,struct xe_bo * bo,bool needs_rpm)1998 static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
1999 struct xe_bo *bo, bool needs_rpm)
2000 {
2001 struct ttm_buffer_object *tbo = &bo->ttm;
2002 vm_fault_t ret = VM_FAULT_RETRY;
2003 struct xe_validation_ctx ctx;
2004 struct ttm_operation_ctx tctx = {
2005 .interruptible = true,
2006 .no_wait_gpu = true,
2007 .gfp_retry_mayfail = true,
2008
2009 };
2010 int err;
2011
2012 if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
2013 return VM_FAULT_RETRY;
2014
2015 err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
2016 (struct xe_val_flags) {
2017 .interruptible = true,
2018 .no_block = true
2019 });
2020 if (err)
2021 goto out_pm;
2022
2023 if (!dma_resv_trylock(tbo->base.resv))
2024 goto out_validation;
2025
2026 /*
2027 * Reject CPU faults to purgeable BOs. DONTNEED BOs can be purged
2028 * at any time, and purged BOs have no backing store. Either case
2029 * is undefined behavior for CPU access.
2030 */
2031 if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) {
2032 ret = VM_FAULT_SIGBUS;
2033 goto out_unlock;
2034 }
2035
2036 if (xe_ttm_bo_is_imported(tbo)) {
2037 ret = VM_FAULT_SIGBUS;
2038 drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
2039 goto out_unlock;
2040 }
2041
2042 err = xe_bo_fault_migrate(bo, &tctx, NULL);
2043 if (err) {
2044 /* Return VM_FAULT_RETRY on these errors. */
2045 if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
2046 ret = xe_err_to_fault_t(err);
2047 goto out_unlock;
2048 }
2049
2050 if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
2051 ret = __xe_bo_cpu_fault(vmf, xe, bo);
2052
2053 out_unlock:
2054 dma_resv_unlock(tbo->base.resv);
2055 out_validation:
2056 xe_validation_ctx_fini(&ctx);
2057 out_pm:
2058 if (needs_rpm)
2059 xe_pm_runtime_put(xe);
2060
2061 return ret;
2062 }
2063
xe_bo_cpu_fault(struct vm_fault * vmf)2064 static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
2065 {
2066 struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
2067 struct drm_device *ddev = tbo->base.dev;
2068 struct xe_device *xe = to_xe_device(ddev);
2069 struct xe_bo *bo = ttm_to_xe_bo(tbo);
2070 bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
2071 bool retry_after_wait = false;
2072 struct xe_validation_ctx ctx;
2073 struct drm_exec exec;
2074 vm_fault_t ret;
2075 int err = 0;
2076 int idx;
2077
2078 if (xe_device_wedged(xe) || !drm_dev_enter(&xe->drm, &idx))
2079 return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
2080
2081 ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
2082 if (ret != VM_FAULT_RETRY)
2083 goto out;
2084
2085 if (fault_flag_allow_retry_first(vmf->flags)) {
2086 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
2087 goto out;
2088 retry_after_wait = true;
2089 xe_bo_get(bo);
2090 mmap_read_unlock(vmf->vma->vm_mm);
2091 } else {
2092 ret = VM_FAULT_NOPAGE;
2093 }
2094
2095 /*
2096 * The fastpath failed and we were not required to return and retry immediately.
2097 * We're now running in one of two modes:
2098 *
2099 * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
2100 * to resolve blocking waits. But we can't resolve the fault since the
2101 * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
2102 * should succeed. But it may fail since we drop the bo lock.
2103 *
2104 * 2) retry_after_wait == false: The fastpath failed, typically even after
2105 * a retry. Do whatever's necessary to resolve the fault.
2106 *
2107 * This construct is recommended to avoid excessive waits under the mmap_lock.
2108 */
2109
2110 if (needs_rpm)
2111 xe_pm_runtime_get(xe);
2112
2113 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
2114 err) {
2115 struct ttm_operation_ctx tctx = {
2116 .interruptible = true,
2117 .no_wait_gpu = false,
2118 .gfp_retry_mayfail = retry_after_wait,
2119 };
2120
2121 err = drm_exec_lock_obj(&exec, &tbo->base);
2122 drm_exec_retry_on_contention(&exec);
2123 if (err)
2124 break;
2125
2126 /*
2127 * Reject CPU faults to purgeable BOs. DONTNEED BOs can be
2128 * purged at any time, and purged BOs have no backing store.
2129 */
2130 if (xe_bo_madv_is_dontneed(bo) || xe_bo_is_purged(bo)) {
2131 err = -EFAULT;
2132 break;
2133 }
2134
2135 if (xe_ttm_bo_is_imported(tbo)) {
2136 err = -EFAULT;
2137 drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
2138 break;
2139 }
2140
2141 err = xe_bo_fault_migrate(bo, &tctx, &exec);
2142 if (err) {
2143 drm_exec_retry_on_contention(&exec);
2144 xe_validation_retry_on_oom(&ctx, &err);
2145 break;
2146 }
2147
2148 err = xe_bo_wait_usage_kernel(bo, &tctx);
2149 if (err)
2150 break;
2151
2152 if (!retry_after_wait)
2153 ret = __xe_bo_cpu_fault(vmf, xe, bo);
2154 }
2155 /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
2156 if (err && !retry_after_wait)
2157 ret = xe_err_to_fault_t(err);
2158
2159 if (needs_rpm)
2160 xe_pm_runtime_put(xe);
2161
2162 if (retry_after_wait)
2163 xe_bo_put(bo);
2164 out:
2165 drm_dev_exit(idx);
2166
2167 return ret;
2168 }
2169
xe_bo_vm_access(struct vm_area_struct * vma,unsigned long addr,void * buf,int len,int write)2170 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
2171 void *buf, int len, int write)
2172 {
2173 struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
2174 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
2175 struct xe_device *xe = xe_bo_device(bo);
2176
2177 guard(xe_pm_runtime)(xe);
2178 return ttm_bo_vm_access(vma, addr, buf, len, write);
2179 }
2180
2181 /**
2182 * xe_bo_read() - Read from an xe_bo
2183 * @bo: The buffer object to read from.
2184 * @offset: The byte offset to start reading from.
2185 * @dst: Location to store the read.
2186 * @size: Size in bytes for the read.
2187 *
2188 * Read @size bytes from the @bo, starting from @offset, storing into @dst.
2189 *
2190 * Return: Zero on success, or negative error.
2191 */
xe_bo_read(struct xe_bo * bo,u64 offset,void * dst,int size)2192 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
2193 {
2194 int ret;
2195
2196 ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
2197 if (ret >= 0 && ret != size)
2198 ret = -EIO;
2199 else if (ret == size)
2200 ret = 0;
2201
2202 return ret;
2203 }
2204
2205 static const struct vm_operations_struct xe_gem_vm_ops = {
2206 .fault = xe_bo_cpu_fault,
2207 .open = ttm_bo_vm_open,
2208 .close = ttm_bo_vm_close,
2209 .access = xe_bo_vm_access,
2210 };
2211
xe_gem_object_mmap(struct drm_gem_object * obj,struct vm_area_struct * vma)2212 static int xe_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
2213 {
2214 struct xe_bo *bo = gem_to_xe_bo(obj);
2215 int err = 0;
2216
2217 /*
2218 * Reject mmap of purgeable BOs. DONTNEED BOs can be purged
2219 * at any time, making CPU access undefined behavior. Purged BOs have
2220 * no backing store and are permanently invalid.
2221 */
2222 err = xe_bo_lock(bo, true);
2223 if (err)
2224 return err;
2225
2226 if (xe_bo_madv_is_dontneed(bo))
2227 err = -EBUSY;
2228 else if (xe_bo_is_purged(bo))
2229 err = -EINVAL;
2230 xe_bo_unlock(bo);
2231 if (err)
2232 return err;
2233
2234 return drm_gem_ttm_mmap(obj, vma);
2235 }
2236
2237 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
2238 .free = xe_gem_object_free,
2239 .close = xe_gem_object_close,
2240 .mmap = xe_gem_object_mmap,
2241 .export = xe_gem_prime_export,
2242 .vm_ops = &xe_gem_vm_ops,
2243 };
2244
2245 /**
2246 * xe_bo_alloc - Allocate storage for a struct xe_bo
2247 *
2248 * This function is intended to allocate storage to be used for input
2249 * to __xe_bo_create_locked(), in the case a pointer to the bo to be
2250 * created is needed before the call to __xe_bo_create_locked().
2251 * If __xe_bo_create_locked ends up never to be called, then the
2252 * storage allocated with this function needs to be freed using
2253 * xe_bo_free().
2254 *
2255 * Return: A pointer to an uninitialized struct xe_bo on success,
2256 * ERR_PTR(-ENOMEM) on error.
2257 */
xe_bo_alloc(void)2258 struct xe_bo *xe_bo_alloc(void)
2259 {
2260 struct xe_bo *bo = kzalloc_obj(*bo);
2261
2262 if (!bo)
2263 return ERR_PTR(-ENOMEM);
2264
2265 return bo;
2266 }
2267
2268 /**
2269 * xe_bo_free - Free storage allocated using xe_bo_alloc()
2270 * @bo: The buffer object storage.
2271 *
2272 * Refer to xe_bo_alloc() documentation for valid use-cases.
2273 */
xe_bo_free(struct xe_bo * bo)2274 void xe_bo_free(struct xe_bo *bo)
2275 {
2276 kfree(bo);
2277 }
2278
2279 /**
2280 * xe_bo_init_locked() - Initialize or create an xe_bo.
2281 * @xe: The xe device.
2282 * @bo: An already allocated buffer object or NULL
2283 * if the function should allocate a new one.
2284 * @tile: The tile to select for migration of this bo, and the tile used for
2285 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2286 * @resv: Pointer to a locked shared reservation object to use for this bo,
2287 * or NULL for the xe_bo to use its own.
2288 * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
2289 * @size: The storage size to use for the bo.
2290 * @cpu_caching: The cpu caching used for system memory backing store.
2291 * @type: The TTM buffer object type.
2292 * @flags: XE_BO_FLAG_ flags.
2293 * @exec: The drm_exec transaction to use for exhaustive eviction.
2294 *
2295 * Initialize or create an xe buffer object. On failure, any allocated buffer
2296 * object passed in @bo will have been unreferenced.
2297 *
2298 * Return: The buffer object on success. Negative error pointer on failure.
2299 */
xe_bo_init_locked(struct xe_device * xe,struct xe_bo * bo,struct xe_tile * tile,struct dma_resv * resv,struct ttm_lru_bulk_move * bulk,size_t size,u16 cpu_caching,enum ttm_bo_type type,u32 flags,struct drm_exec * exec)2300 struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
2301 struct xe_tile *tile, struct dma_resv *resv,
2302 struct ttm_lru_bulk_move *bulk, size_t size,
2303 u16 cpu_caching, enum ttm_bo_type type,
2304 u32 flags, struct drm_exec *exec)
2305 {
2306 struct ttm_operation_ctx ctx = {
2307 .interruptible = true,
2308 .no_wait_gpu = false,
2309 .gfp_retry_mayfail = true,
2310 };
2311 struct ttm_placement *placement;
2312 uint32_t alignment;
2313 size_t aligned_size;
2314 int err;
2315
2316 /* Only kernel objects should set GT */
2317 xe_assert(xe, !tile || type == ttm_bo_type_kernel);
2318
2319 if (XE_WARN_ON(!size)) {
2320 xe_bo_free(bo);
2321 return ERR_PTR(-EINVAL);
2322 }
2323
2324 /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
2325 if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) {
2326 xe_bo_free(bo);
2327 return ERR_PTR(-EINVAL);
2328 }
2329
2330 if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
2331 !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
2332 ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
2333 (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
2334 size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
2335
2336 aligned_size = ALIGN(size, align);
2337 if (type != ttm_bo_type_device)
2338 size = ALIGN(size, align);
2339 flags |= XE_BO_FLAG_INTERNAL_64K;
2340 alignment = align >> PAGE_SHIFT;
2341 } else {
2342 aligned_size = ALIGN(size, SZ_4K);
2343 flags &= ~XE_BO_FLAG_INTERNAL_64K;
2344 alignment = SZ_4K >> PAGE_SHIFT;
2345 }
2346
2347 if (type == ttm_bo_type_device && aligned_size != size) {
2348 xe_bo_free(bo);
2349 return ERR_PTR(-EINVAL);
2350 }
2351
2352 if (!bo) {
2353 bo = xe_bo_alloc();
2354 if (IS_ERR(bo))
2355 return bo;
2356 }
2357
2358 bo->ccs_cleared = false;
2359 bo->tile = tile;
2360 bo->flags = flags;
2361 bo->cpu_caching = cpu_caching;
2362 bo->ttm.base.funcs = &xe_gem_object_funcs;
2363 bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
2364 INIT_LIST_HEAD(&bo->pinned_link);
2365 #ifdef CONFIG_PROC_FS
2366 INIT_LIST_HEAD(&bo->client_link);
2367 #endif
2368 INIT_LIST_HEAD(&bo->vram_userfault_link);
2369
2370 /* Initialize purge advisory state */
2371 bo->madv_purgeable = XE_MADV_PURGEABLE_WILLNEED;
2372
2373 drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
2374
2375 if (resv) {
2376 ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
2377 ctx.resv = resv;
2378 }
2379
2380 xe_validation_assert_exec(xe, exec, &bo->ttm.base);
2381 if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
2382 err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type);
2383 if (WARN_ON(err)) {
2384 xe_ttm_bo_destroy(&bo->ttm);
2385 return ERR_PTR(err);
2386 }
2387 }
2388
2389 /* Defer populating type_sg bos */
2390 placement = (type == ttm_bo_type_sg ||
2391 bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
2392 &bo->placement;
2393 err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
2394 placement, alignment,
2395 &ctx, NULL, resv, xe_ttm_bo_destroy);
2396 if (err)
2397 return ERR_PTR(err);
2398
2399 /*
2400 * The VRAM pages underneath are potentially still being accessed by the
2401 * GPU, as per async GPU clearing and async evictions. However TTM makes
2402 * sure to add any corresponding move/clear fences into the objects
2403 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
2404 *
2405 * For KMD internal buffers we don't care about GPU clearing, however we
2406 * still need to handle async evictions, where the VRAM is still being
2407 * accessed by the GPU. Most internal callers are not expecting this,
2408 * since they are missing the required synchronisation before accessing
2409 * the memory. To keep things simple just sync wait any kernel fences
2410 * here, if the buffer is designated KMD internal.
2411 *
2412 * For normal userspace objects we should already have the required
2413 * pipelining or sync waiting elsewhere, since we already have to deal
2414 * with things like async GPU clearing.
2415 */
2416 if (type == ttm_bo_type_kernel) {
2417 long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
2418 DMA_RESV_USAGE_KERNEL,
2419 ctx.interruptible,
2420 MAX_SCHEDULE_TIMEOUT);
2421
2422 if (timeout < 0) {
2423 if (!resv)
2424 dma_resv_unlock(bo->ttm.base.resv);
2425 xe_bo_put(bo);
2426 return ERR_PTR(timeout);
2427 }
2428 }
2429
2430 bo->created = true;
2431 if (bulk)
2432 ttm_bo_set_bulk_move(&bo->ttm, bulk);
2433 else
2434 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2435
2436 return bo;
2437 }
2438
__xe_bo_fixed_placement(struct xe_device * xe,struct xe_bo * bo,enum ttm_bo_type type,u32 flags,u64 start,u64 end,u64 size)2439 static int __xe_bo_fixed_placement(struct xe_device *xe,
2440 struct xe_bo *bo, enum ttm_bo_type type,
2441 u32 flags,
2442 u64 start, u64 end, u64 size)
2443 {
2444 struct ttm_place *place = bo->placements;
2445 u32 vram_flag, vram_stolen_flags;
2446
2447 /*
2448 * to allow fixed placement in GGTT of a VF, post-migration fixups would have to
2449 * include selecting a new fixed offset and shifting the page ranges for it
2450 */
2451 xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT));
2452
2453 if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
2454 return -EINVAL;
2455
2456 vram_flag = flags & XE_BO_FLAG_VRAM_MASK;
2457 vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag;
2458
2459 /* check if more than one VRAM/STOLEN flag is set */
2460 if (hweight32(vram_stolen_flags) > 1)
2461 return -EINVAL;
2462
2463 place->flags = TTM_PL_FLAG_CONTIGUOUS;
2464 place->fpfn = start >> PAGE_SHIFT;
2465 place->lpfn = end >> PAGE_SHIFT;
2466
2467 if (flags & XE_BO_FLAG_STOLEN)
2468 place->mem_type = XE_PL_STOLEN;
2469 else
2470 place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type);
2471
2472 bo->placement = (struct ttm_placement) {
2473 .num_placement = 1,
2474 .placement = place,
2475 };
2476
2477 return 0;
2478 }
2479
2480 static struct xe_bo *
__xe_bo_create_locked(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 start,u64 end,u16 cpu_caching,enum ttm_bo_type type,u32 flags,u64 alignment,struct drm_exec * exec)2481 __xe_bo_create_locked(struct xe_device *xe,
2482 struct xe_tile *tile, struct xe_vm *vm,
2483 size_t size, u64 start, u64 end,
2484 u16 cpu_caching, enum ttm_bo_type type, u32 flags,
2485 u64 alignment, struct drm_exec *exec)
2486 {
2487 struct xe_bo *bo = NULL;
2488 int err;
2489
2490 if (vm)
2491 xe_vm_assert_held(vm);
2492
2493 if (start || end != ~0ULL) {
2494 bo = xe_bo_alloc();
2495 if (IS_ERR(bo))
2496 return bo;
2497
2498 flags |= XE_BO_FLAG_FIXED_PLACEMENT;
2499 err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size);
2500 if (err) {
2501 xe_bo_free(bo);
2502 return ERR_PTR(err);
2503 }
2504 }
2505
2506 bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
2507 vm && !xe_vm_in_fault_mode(vm) &&
2508 flags & XE_BO_FLAG_USER ?
2509 &vm->lru_bulk_move : NULL, size,
2510 cpu_caching, type, flags, exec);
2511 if (IS_ERR(bo))
2512 return bo;
2513
2514 bo->min_align = alignment;
2515
2516 /*
2517 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
2518 * to ensure the shared resv doesn't disappear under the bo, the bo
2519 * will keep a reference to the vm, and avoid circular references
2520 * by having all the vm's bo refereferences released at vm close
2521 * time.
2522 */
2523 if (vm && xe_bo_is_user(bo))
2524 xe_vm_get(vm);
2525 bo->vm = vm;
2526
2527 if (bo->flags & XE_BO_FLAG_GGTT) {
2528 struct xe_tile *t;
2529 u8 id;
2530
2531 if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
2532 if (!tile && flags & XE_BO_FLAG_STOLEN)
2533 tile = xe_device_get_root_tile(xe);
2534
2535 xe_assert(xe, tile);
2536 }
2537
2538 for_each_tile(t, xe, id) {
2539 if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
2540 continue;
2541
2542 if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
2543 err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
2544 start + xe_bo_size(bo), U64_MAX,
2545 exec);
2546 } else {
2547 err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
2548 }
2549 if (err)
2550 goto err_unlock_put_bo;
2551 }
2552 }
2553
2554 trace_xe_bo_create(bo);
2555 return bo;
2556
2557 err_unlock_put_bo:
2558 __xe_bo_unset_bulk_move(bo);
2559 xe_bo_unlock_vm_held(bo);
2560 xe_bo_put(bo);
2561 return ERR_PTR(err);
2562 }
2563
2564 /**
2565 * xe_bo_create_locked() - Create a BO
2566 * @xe: The xe device.
2567 * @tile: The tile to select for migration of this bo, and the tile used for
2568 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2569 * @vm: The local vm or NULL for external objects.
2570 * @size: The storage size to use for the bo.
2571 * @type: The TTM buffer object type.
2572 * @flags: XE_BO_FLAG_ flags.
2573 * @exec: The drm_exec transaction to use for exhaustive eviction.
2574 *
2575 * Create a locked xe BO with no range- nor alignment restrictions.
2576 *
2577 * Return: The buffer object on success. Negative error pointer on failure.
2578 */
xe_bo_create_locked(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags,struct drm_exec * exec)2579 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
2580 struct xe_vm *vm, size_t size,
2581 enum ttm_bo_type type, u32 flags,
2582 struct drm_exec *exec)
2583 {
2584 return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
2585 flags, 0, exec);
2586 }
2587
xe_bo_create_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,u16 cpu_caching,enum ttm_bo_type type,u32 flags,u64 alignment,bool intr)2588 static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
2589 size_t size, u16 cpu_caching,
2590 enum ttm_bo_type type, u32 flags,
2591 u64 alignment, bool intr)
2592 {
2593 struct xe_validation_ctx ctx;
2594 struct drm_exec exec;
2595 struct xe_bo *bo;
2596 int ret = 0;
2597
2598 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2599 ret) {
2600 bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
2601 cpu_caching, type, flags, alignment, &exec);
2602 drm_exec_retry_on_contention(&exec);
2603 if (IS_ERR(bo)) {
2604 ret = PTR_ERR(bo);
2605 xe_validation_retry_on_oom(&ctx, &ret);
2606 } else {
2607 xe_bo_unlock(bo);
2608 }
2609 }
2610
2611 return ret ? ERR_PTR(ret) : bo;
2612 }
2613
2614 /**
2615 * xe_bo_create_user() - Create a user BO
2616 * @xe: The xe device.
2617 * @vm: The local vm or NULL for external objects.
2618 * @size: The storage size to use for the bo.
2619 * @cpu_caching: The caching mode to be used for system backing store.
2620 * @flags: XE_BO_FLAG_ flags.
2621 * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
2622 * if such a transaction should be initiated by the call.
2623 *
2624 * Create a bo on behalf of user-space.
2625 *
2626 * Return: The buffer object on success. Negative error pointer on failure.
2627 */
xe_bo_create_user(struct xe_device * xe,struct xe_vm * vm,size_t size,u16 cpu_caching,u32 flags,struct drm_exec * exec)2628 struct xe_bo *xe_bo_create_user(struct xe_device *xe,
2629 struct xe_vm *vm, size_t size,
2630 u16 cpu_caching,
2631 u32 flags, struct drm_exec *exec)
2632 {
2633 struct xe_bo *bo;
2634
2635 flags |= XE_BO_FLAG_USER;
2636
2637 if (vm || exec) {
2638 xe_assert(xe, exec);
2639 bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
2640 cpu_caching, ttm_bo_type_device,
2641 flags, 0, exec);
2642 if (!IS_ERR(bo))
2643 xe_bo_unlock_vm_held(bo);
2644 } else {
2645 bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
2646 ttm_bo_type_device, flags, 0, true);
2647 }
2648
2649 return bo;
2650 }
2651
2652 /**
2653 * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
2654 * @xe: The xe device.
2655 * @tile: The tile to select for migration of this bo, and the tile used for
2656 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2657 * @size: The storage size to use for the bo.
2658 * @start: Start of fixed VRAM range or 0.
2659 * @end: End of fixed VRAM range or ~0ULL.
2660 * @type: The TTM buffer object type.
2661 * @flags: XE_BO_FLAG_ flags.
2662 *
2663 * Create an Xe BO with range- and options. If @start and @end indicate
2664 * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
2665 * only.
2666 *
2667 * Return: The buffer object on success. Negative error pointer on failure.
2668 */
xe_bo_create_pin_range_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,u64 start,u64 end,enum ttm_bo_type type,u32 flags)2669 struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
2670 size_t size, u64 start, u64 end,
2671 enum ttm_bo_type type, u32 flags)
2672 {
2673 struct xe_validation_ctx ctx;
2674 struct drm_exec exec;
2675 struct xe_bo *bo;
2676 int err = 0;
2677
2678 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
2679 bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
2680 0, type, flags, 0, &exec);
2681 if (IS_ERR(bo)) {
2682 drm_exec_retry_on_contention(&exec);
2683 err = PTR_ERR(bo);
2684 xe_validation_retry_on_oom(&ctx, &err);
2685 break;
2686 }
2687
2688 err = xe_bo_pin(bo, &exec);
2689 xe_bo_unlock(bo);
2690 if (err) {
2691 xe_bo_put(bo);
2692 drm_exec_retry_on_contention(&exec);
2693 xe_validation_retry_on_oom(&ctx, &err);
2694 break;
2695 }
2696 }
2697
2698 return err ? ERR_PTR(err) : bo;
2699 }
2700
xe_bo_create_pin_map_at_aligned(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 offset,enum ttm_bo_type type,u32 flags,u64 alignment,struct drm_exec * exec)2701 static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
2702 struct xe_tile *tile,
2703 struct xe_vm *vm,
2704 size_t size, u64 offset,
2705 enum ttm_bo_type type, u32 flags,
2706 u64 alignment, struct drm_exec *exec)
2707 {
2708 struct xe_bo *bo;
2709 int err;
2710 u64 start = offset == ~0ull ? 0 : offset;
2711 u64 end = offset == ~0ull ? ~0ull : start + size;
2712
2713 if (flags & XE_BO_FLAG_STOLEN &&
2714 xe_ttm_stolen_cpu_access_needs_ggtt(xe))
2715 flags |= XE_BO_FLAG_GGTT;
2716
2717 bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
2718 flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
2719 alignment, exec);
2720 if (IS_ERR(bo))
2721 return bo;
2722
2723 err = xe_bo_pin(bo, exec);
2724 if (err)
2725 goto err_put;
2726
2727 err = xe_bo_vmap(bo);
2728 if (err)
2729 goto err_unpin;
2730
2731 xe_bo_unlock_vm_held(bo);
2732
2733 return bo;
2734
2735 err_unpin:
2736 xe_bo_unpin(bo);
2737 err_put:
2738 xe_bo_unlock_vm_held(bo);
2739 xe_bo_put(bo);
2740 return ERR_PTR(err);
2741 }
2742
2743 /**
2744 * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
2745 * @xe: The xe device.
2746 * @tile: The tile to select for migration of this bo, and the tile used for
2747 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2748 * @size: The storage size to use for the bo.
2749 * @offset: Optional VRAM offset or %~0ull for don't care.
2750 * @type: The TTM buffer object type.
2751 * @flags: XE_BO_FLAG_ flags.
2752 * @alignment: GGTT alignment.
2753 * @intr: Whether to execute any waits for backing store interruptible.
2754 *
2755 * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
2756 * options. The bo will be external and not associated with a VM.
2757 *
2758 * Return: The buffer object on success. Negative error pointer on failure.
2759 * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2760 * to true on entry.
2761 */
2762 struct xe_bo *
xe_bo_create_pin_map_at_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,u64 offset,enum ttm_bo_type type,u32 flags,u64 alignment,bool intr)2763 xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
2764 size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
2765 u64 alignment, bool intr)
2766 {
2767 struct xe_validation_ctx ctx;
2768 struct drm_exec exec;
2769 struct xe_bo *bo;
2770 int ret = 0;
2771
2772 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
2773 ret) {
2774 bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
2775 type, flags, alignment, &exec);
2776 if (IS_ERR(bo)) {
2777 drm_exec_retry_on_contention(&exec);
2778 ret = PTR_ERR(bo);
2779 xe_validation_retry_on_oom(&ctx, &ret);
2780 }
2781 }
2782
2783 return ret ? ERR_PTR(ret) : bo;
2784 }
2785
2786 /**
2787 * xe_bo_create_pin_map() - Create pinned and mapped bo
2788 * @xe: The xe device.
2789 * @tile: The tile to select for migration of this bo, and the tile used for
2790 * @vm: The vm to associate the buffer object with. The vm's resv must be locked
2791 * with the transaction represented by @exec.
2792 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2793 * @size: The storage size to use for the bo.
2794 * @type: The TTM buffer object type.
2795 * @flags: XE_BO_FLAG_ flags.
2796 * @exec: The drm_exec transaction to use for exhaustive eviction, and
2797 * previously used for locking @vm's resv.
2798 *
2799 * Create a pinned and mapped bo. The bo will be external and not associated
2800 * with a VM.
2801 *
2802 * Return: The buffer object on success. Negative error pointer on failure.
2803 * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
2804 * configured for interruptible locking.
2805 */
xe_bo_create_pin_map(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags,struct drm_exec * exec)2806 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2807 struct xe_vm *vm, size_t size,
2808 enum ttm_bo_type type, u32 flags,
2809 struct drm_exec *exec)
2810 {
2811 return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
2812 0, exec);
2813 }
2814
2815 /**
2816 * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
2817 * @xe: The xe device.
2818 * @tile: The tile to select for migration of this bo, and the tile used for
2819 * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
2820 * @size: The storage size to use for the bo.
2821 * @type: The TTM buffer object type.
2822 * @flags: XE_BO_FLAG_ flags.
2823 * @intr: Whether to execute any waits for backing store interruptible.
2824 *
2825 * Create a pinned and mapped bo. The bo will be external and not associated
2826 * with a VM.
2827 *
2828 * Return: The buffer object on success. Negative error pointer on failure.
2829 * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
2830 * to true on entry.
2831 */
xe_bo_create_pin_map_novm(struct xe_device * xe,struct xe_tile * tile,size_t size,enum ttm_bo_type type,u32 flags,bool intr)2832 struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
2833 size_t size, enum ttm_bo_type type, u32 flags,
2834 bool intr)
2835 {
2836 return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
2837 }
2838
__xe_bo_unpin_map_no_vm(void * arg)2839 static void __xe_bo_unpin_map_no_vm(void *arg)
2840 {
2841 xe_bo_unpin_map_no_vm(arg);
2842 }
2843
xe_managed_bo_create_pin_map(struct xe_device * xe,struct xe_tile * tile,size_t size,u32 flags)2844 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
2845 size_t size, u32 flags)
2846 {
2847 struct xe_bo *bo;
2848 int ret;
2849
2850 KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
2851 bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
2852 if (IS_ERR(bo))
2853 return bo;
2854
2855 ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2856 if (ret)
2857 return ERR_PTR(ret);
2858
2859 return bo;
2860 }
2861
xe_managed_bo_unpin_map_no_vm(struct xe_bo * bo)2862 void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
2863 {
2864 devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
2865 }
2866
xe_managed_bo_create_from_data(struct xe_device * xe,struct xe_tile * tile,const void * data,size_t size,u32 flags)2867 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
2868 const void *data, size_t size, u32 flags)
2869 {
2870 struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
2871
2872 if (IS_ERR(bo))
2873 return bo;
2874
2875 xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
2876
2877 return bo;
2878 }
2879
2880 /**
2881 * xe_managed_bo_reinit_in_vram
2882 * @xe: xe device
2883 * @tile: Tile where the new buffer will be created
2884 * @src: Managed buffer object allocated in system memory
2885 *
2886 * Replace a managed src buffer object allocated in system memory with a new
2887 * one allocated in vram, copying the data between them.
2888 * Buffer object in VRAM is not going to have the same GGTT address, the caller
2889 * is responsible for making sure that any old references to it are updated.
2890 *
2891 * Returns 0 for success, negative error code otherwise.
2892 */
xe_managed_bo_reinit_in_vram(struct xe_device * xe,struct xe_tile * tile,struct xe_bo ** src)2893 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
2894 {
2895 struct xe_bo *bo;
2896 u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
2897
2898 dst_flags |= (*src)->flags & (XE_BO_FLAG_GGTT_INVALIDATE |
2899 XE_BO_FLAG_PINNED_NORESTORE);
2900
2901 xe_assert(xe, IS_DGFX(xe));
2902 xe_assert(xe, !(*src)->vmap.is_iomem);
2903
2904 bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
2905 xe_bo_size(*src), dst_flags);
2906 if (IS_ERR(bo))
2907 return PTR_ERR(bo);
2908
2909 devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
2910 *src = bo;
2911
2912 return 0;
2913 }
2914
2915 /*
2916 * XXX: This is in the VM bind data path, likely should calculate this once and
2917 * store, with a recalculation if the BO is moved.
2918 */
vram_region_gpu_offset(struct ttm_resource * res)2919 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
2920 {
2921 struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
2922
2923 switch (res->mem_type) {
2924 case XE_PL_STOLEN:
2925 return xe_ttm_stolen_gpu_offset(xe);
2926 case XE_PL_TT:
2927 case XE_PL_SYSTEM:
2928 return 0;
2929 default:
2930 return res_to_mem_region(res)->dpa_base;
2931 }
2932 return 0;
2933 }
2934
2935 /**
2936 * xe_bo_pin_external - pin an external BO
2937 * @bo: buffer object to be pinned
2938 * @in_place: Pin in current placement, don't attempt to migrate.
2939 * @exec: The drm_exec transaction to use for exhaustive eviction.
2940 *
2941 * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
2942 * BO. Unique call compared to xe_bo_pin as this function has it own set of
2943 * asserts and code to ensure evict / restore on suspend / resume.
2944 *
2945 * Returns 0 for success, negative error code otherwise.
2946 */
xe_bo_pin_external(struct xe_bo * bo,bool in_place,struct drm_exec * exec)2947 int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
2948 {
2949 struct xe_device *xe = xe_bo_device(bo);
2950 int err;
2951
2952 xe_assert(xe, !bo->vm);
2953 xe_assert(xe, xe_bo_is_user(bo));
2954
2955 if (!xe_bo_is_pinned(bo)) {
2956 if (!in_place) {
2957 err = xe_bo_validate(bo, NULL, false, exec);
2958 if (err)
2959 return err;
2960 }
2961
2962 spin_lock(&xe->pinned.lock);
2963 list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
2964 spin_unlock(&xe->pinned.lock);
2965 }
2966
2967 ttm_bo_pin(&bo->ttm);
2968 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
2969 xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
2970
2971 /*
2972 * FIXME: If we always use the reserve / unreserve functions for locking
2973 * we do not need this.
2974 */
2975 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
2976
2977 return 0;
2978 }
2979
2980 /**
2981 * xe_bo_pin() - Pin a kernel bo after potentially migrating it
2982 * @bo: The kernel bo to pin.
2983 * @exec: The drm_exec transaction to use for exhaustive eviction.
2984 *
2985 * Attempts to migrate a bo to @bo->placement. If that succeeds,
2986 * pins the bo.
2987 *
2988 * Return: %0 on success, negative error code on migration failure.
2989 */
xe_bo_pin(struct xe_bo * bo,struct drm_exec * exec)2990 int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
2991 {
2992 struct ttm_place *place = &bo->placements[0];
2993 struct xe_device *xe = xe_bo_device(bo);
2994 int err;
2995
2996 /* We currently don't expect user BO to be pinned */
2997 xe_assert(xe, !xe_bo_is_user(bo));
2998
2999 /* Pinned object must be in GGTT or have pinned flag */
3000 xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
3001 XE_BO_FLAG_GGTT));
3002
3003 /*
3004 * No reason we can't support pinning imported dma-bufs we just don't
3005 * expect to pin an imported dma-buf.
3006 */
3007 xe_assert(xe, !bo->ttm.base.import_attach);
3008
3009 /* We only expect at most 1 pin */
3010 xe_assert(xe, !xe_bo_is_pinned(bo));
3011
3012 err = xe_bo_validate(bo, NULL, false, exec);
3013 if (err)
3014 return err;
3015
3016 if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
3017 spin_lock(&xe->pinned.lock);
3018 if (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)
3019 list_add_tail(&bo->pinned_link, &xe->pinned.late.kernel_bo_present);
3020 else
3021 list_add_tail(&bo->pinned_link, &xe->pinned.early.kernel_bo_present);
3022 spin_unlock(&xe->pinned.lock);
3023 }
3024
3025 ttm_bo_pin(&bo->ttm);
3026 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
3027 xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
3028
3029 /*
3030 * FIXME: If we always use the reserve / unreserve functions for locking
3031 * we do not need this.
3032 */
3033 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
3034
3035 return 0;
3036 }
3037
3038 /**
3039 * xe_bo_unpin_external - unpin an external BO
3040 * @bo: buffer object to be unpinned
3041 *
3042 * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
3043 * BO. Unique call compared to xe_bo_unpin as this function has it own set of
3044 * asserts and code to ensure evict / restore on suspend / resume.
3045 *
3046 * Returns 0 for success, negative error code otherwise.
3047 */
xe_bo_unpin_external(struct xe_bo * bo)3048 void xe_bo_unpin_external(struct xe_bo *bo)
3049 {
3050 struct xe_device *xe = xe_bo_device(bo);
3051
3052 xe_assert(xe, !bo->vm);
3053 xe_assert(xe, xe_bo_is_pinned(bo));
3054 xe_assert(xe, xe_bo_is_user(bo));
3055
3056 spin_lock(&xe->pinned.lock);
3057 if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
3058 list_del_init(&bo->pinned_link);
3059 spin_unlock(&xe->pinned.lock);
3060
3061 ttm_bo_unpin(&bo->ttm);
3062 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
3063 xe_ttm_tt_account_add(xe, bo->ttm.ttm);
3064
3065 /*
3066 * FIXME: If we always use the reserve / unreserve functions for locking
3067 * we do not need this.
3068 */
3069 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
3070 }
3071
xe_bo_unpin(struct xe_bo * bo)3072 void xe_bo_unpin(struct xe_bo *bo)
3073 {
3074 struct ttm_place *place = &bo->placements[0];
3075 struct xe_device *xe = xe_bo_device(bo);
3076
3077 xe_assert(xe, !bo->ttm.base.import_attach);
3078 xe_assert(xe, xe_bo_is_pinned(bo));
3079
3080 if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
3081 spin_lock(&xe->pinned.lock);
3082 xe_assert(xe, !list_empty(&bo->pinned_link));
3083 list_del_init(&bo->pinned_link);
3084 spin_unlock(&xe->pinned.lock);
3085
3086 if (bo->backup_obj) {
3087 if (xe_bo_is_pinned(bo->backup_obj))
3088 ttm_bo_unpin(&bo->backup_obj->ttm);
3089 xe_bo_put(bo->backup_obj);
3090 bo->backup_obj = NULL;
3091 }
3092 }
3093 ttm_bo_unpin(&bo->ttm);
3094 if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
3095 xe_ttm_tt_account_add(xe, bo->ttm.ttm);
3096 }
3097
3098 /**
3099 * xe_bo_validate() - Make sure the bo is in an allowed placement
3100 * @bo: The bo,
3101 * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
3102 * NULL. Used together with @allow_res_evict.
3103 * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
3104 * reservation object.
3105 * @exec: The drm_exec transaction to use for exhaustive eviction.
3106 *
3107 * Make sure the bo is in allowed placement, migrating it if necessary. If
3108 * needed, other bos will be evicted. If bos selected for eviction shares
3109 * the @vm's reservation object, they can be evicted iff @allow_res_evict is
3110 * set to true, otherwise they will be bypassed.
3111 *
3112 * Return: 0 on success, negative error code on failure. May return
3113 * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
3114 */
xe_bo_validate(struct xe_bo * bo,struct xe_vm * vm,bool allow_res_evict,struct drm_exec * exec)3115 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
3116 struct drm_exec *exec)
3117 {
3118 struct ttm_operation_ctx ctx = {
3119 .interruptible = true,
3120 .no_wait_gpu = false,
3121 .gfp_retry_mayfail = true,
3122 };
3123 int ret;
3124
3125 if (xe_bo_is_pinned(bo))
3126 return 0;
3127
3128 if (vm) {
3129 lockdep_assert_held(&vm->lock);
3130 xe_vm_assert_held(vm);
3131
3132 ctx.allow_res_evict = allow_res_evict;
3133 ctx.resv = xe_vm_resv(vm);
3134 }
3135
3136 xe_vm_set_validating(vm, allow_res_evict);
3137 trace_xe_bo_validate(bo);
3138 xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
3139 ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
3140 xe_vm_clear_validating(vm, allow_res_evict);
3141
3142 return ret;
3143 }
3144
xe_bo_is_xe_bo(struct ttm_buffer_object * bo)3145 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
3146 {
3147 if (bo->destroy == &xe_ttm_bo_destroy)
3148 return true;
3149
3150 return false;
3151 }
3152
3153 /*
3154 * Resolve a BO address. There is no assert to check if the proper lock is held
3155 * so it should only be used in cases where it is not fatal to get the wrong
3156 * address, such as printing debug information, but not in cases where memory is
3157 * written based on this result.
3158 */
__xe_bo_addr(struct xe_bo * bo,u64 offset,size_t page_size)3159 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
3160 {
3161 struct xe_device *xe = xe_bo_device(bo);
3162 struct xe_res_cursor cur;
3163 u64 page;
3164
3165 xe_assert(xe, page_size <= PAGE_SIZE);
3166 page = offset >> PAGE_SHIFT;
3167 offset &= (PAGE_SIZE - 1);
3168
3169 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
3170 xe_assert(xe, bo->ttm.ttm);
3171
3172 xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
3173 page_size, &cur);
3174 return xe_res_dma(&cur) + offset;
3175 } else {
3176 struct xe_res_cursor cur;
3177
3178 xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
3179 page_size, &cur);
3180 return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
3181 }
3182 }
3183
xe_bo_addr(struct xe_bo * bo,u64 offset,size_t page_size)3184 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
3185 {
3186 if (!READ_ONCE(bo->ttm.pin_count))
3187 xe_bo_assert_held(bo);
3188 return __xe_bo_addr(bo, offset, page_size);
3189 }
3190
xe_bo_vmap(struct xe_bo * bo)3191 int xe_bo_vmap(struct xe_bo *bo)
3192 {
3193 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3194 void *virtual;
3195 bool is_iomem;
3196 int ret;
3197
3198 xe_bo_assert_held(bo);
3199
3200 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
3201 !force_contiguous(bo->flags)))
3202 return -EINVAL;
3203
3204 if (!iosys_map_is_null(&bo->vmap))
3205 return 0;
3206
3207 /*
3208 * We use this more or less deprecated interface for now since
3209 * ttm_bo_vmap() doesn't offer the optimization of kmapping
3210 * single page bos, which is done here.
3211 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
3212 * to use struct iosys_map.
3213 */
3214 ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
3215 if (ret)
3216 return ret;
3217
3218 virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
3219 if (is_iomem)
3220 iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
3221 else
3222 iosys_map_set_vaddr(&bo->vmap, virtual);
3223
3224 return 0;
3225 }
3226
__xe_bo_vunmap(struct xe_bo * bo)3227 static void __xe_bo_vunmap(struct xe_bo *bo)
3228 {
3229 if (!iosys_map_is_null(&bo->vmap)) {
3230 iosys_map_clear(&bo->vmap);
3231 ttm_bo_kunmap(&bo->kmap);
3232 }
3233 }
3234
xe_bo_vunmap(struct xe_bo * bo)3235 void xe_bo_vunmap(struct xe_bo *bo)
3236 {
3237 xe_bo_assert_held(bo);
3238 __xe_bo_vunmap(bo);
3239 }
3240
gem_create_set_pxp_type(struct xe_device * xe,struct xe_bo * bo,u64 value)3241 static int gem_create_set_pxp_type(struct xe_device *xe, struct xe_bo *bo, u64 value)
3242 {
3243 if (value == DRM_XE_PXP_TYPE_NONE)
3244 return 0;
3245
3246 /* we only support DRM_XE_PXP_TYPE_HWDRM for now */
3247 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
3248 return -EINVAL;
3249
3250 return xe_pxp_key_assign(xe->pxp, bo);
3251 }
3252
3253 typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
3254 struct xe_bo *bo,
3255 u64 value);
3256
3257 static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
3258 [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
3259 };
3260
gem_create_user_ext_set_property(struct xe_device * xe,struct xe_bo * bo,u64 extension)3261 static int gem_create_user_ext_set_property(struct xe_device *xe,
3262 struct xe_bo *bo,
3263 u64 extension)
3264 {
3265 u64 __user *address = u64_to_user_ptr(extension);
3266 struct drm_xe_ext_set_property ext;
3267 int err;
3268 u32 idx;
3269
3270 err = copy_from_user(&ext, address, sizeof(ext));
3271 if (XE_IOCTL_DBG(xe, err))
3272 return -EFAULT;
3273
3274 if (XE_IOCTL_DBG(xe, ext.property >=
3275 ARRAY_SIZE(gem_create_set_property_funcs)) ||
3276 XE_IOCTL_DBG(xe, ext.pad) ||
3277 XE_IOCTL_DBG(xe, ext.property != DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY))
3278 return -EINVAL;
3279
3280 idx = array_index_nospec(ext.property, ARRAY_SIZE(gem_create_set_property_funcs));
3281 if (!gem_create_set_property_funcs[idx])
3282 return -EINVAL;
3283
3284 return gem_create_set_property_funcs[idx](xe, bo, ext.value);
3285 }
3286
3287 typedef int (*xe_gem_create_user_extension_fn)(struct xe_device *xe,
3288 struct xe_bo *bo,
3289 u64 extension);
3290
3291 static const xe_gem_create_user_extension_fn gem_create_user_extension_funcs[] = {
3292 [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_user_ext_set_property,
3293 };
3294
3295 #define MAX_USER_EXTENSIONS 16
gem_create_user_extensions(struct xe_device * xe,struct xe_bo * bo,u64 extensions,int ext_number)3296 static int gem_create_user_extensions(struct xe_device *xe, struct xe_bo *bo,
3297 u64 extensions, int ext_number)
3298 {
3299 u64 __user *address = u64_to_user_ptr(extensions);
3300 struct drm_xe_user_extension ext;
3301 int err;
3302 u32 idx;
3303
3304 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
3305 return -E2BIG;
3306
3307 err = copy_from_user(&ext, address, sizeof(ext));
3308 if (XE_IOCTL_DBG(xe, err))
3309 return -EFAULT;
3310
3311 if (XE_IOCTL_DBG(xe, ext.pad) ||
3312 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(gem_create_user_extension_funcs)))
3313 return -EINVAL;
3314
3315 idx = array_index_nospec(ext.name,
3316 ARRAY_SIZE(gem_create_user_extension_funcs));
3317 err = gem_create_user_extension_funcs[idx](xe, bo, extensions);
3318 if (XE_IOCTL_DBG(xe, err))
3319 return err;
3320
3321 if (ext.next_extension)
3322 return gem_create_user_extensions(xe, bo, ext.next_extension,
3323 ++ext_number);
3324
3325 return 0;
3326 }
3327
xe_gem_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3328 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
3329 struct drm_file *file)
3330 {
3331 struct xe_device *xe = to_xe_device(dev);
3332 struct xe_file *xef = to_xe_file(file);
3333 struct drm_xe_gem_create *args = data;
3334 struct xe_validation_ctx ctx;
3335 struct drm_exec exec;
3336 struct xe_vm *vm = NULL;
3337 struct xe_bo *bo;
3338 unsigned int bo_flags;
3339 u32 handle;
3340 int err;
3341
3342 if (XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
3343 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3344 return -EINVAL;
3345
3346 /* at least one valid memory placement must be specified */
3347 if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
3348 !args->placement))
3349 return -EINVAL;
3350
3351 if (XE_IOCTL_DBG(xe, args->flags &
3352 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
3353 DRM_XE_GEM_CREATE_FLAG_SCANOUT |
3354 DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM |
3355 DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION)))
3356 return -EINVAL;
3357
3358 if (XE_IOCTL_DBG(xe, args->handle))
3359 return -EINVAL;
3360
3361 if (XE_IOCTL_DBG(xe, !args->size))
3362 return -EINVAL;
3363
3364 if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
3365 return -EINVAL;
3366
3367 if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
3368 return -EINVAL;
3369
3370 bo_flags = 0;
3371 if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
3372 bo_flags |= XE_BO_FLAG_DEFER_BACKING;
3373
3374 /*
3375 * Display scanout is always non-coherent with the CPU cache.
3376 */
3377 if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
3378 bo_flags |= XE_BO_FLAG_FORCE_WC;
3379
3380 if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) {
3381 if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20))
3382 return -EOPNOTSUPP;
3383 bo_flags |= XE_BO_FLAG_NO_COMPRESSION;
3384 }
3385
3386 bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
3387
3388 /* CCS formats need physical placement at a 64K alignment in VRAM. */
3389 if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
3390 (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) &&
3391 !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
3392 IS_ALIGNED(args->size, SZ_64K))
3393 bo_flags |= XE_BO_FLAG_NEEDS_64K;
3394
3395 if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
3396 if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
3397 return -EINVAL;
3398
3399 bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
3400 }
3401
3402 if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
3403 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
3404 return -EINVAL;
3405
3406 if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
3407 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
3408 return -EINVAL;
3409
3410 if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_FORCE_WC &&
3411 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
3412 return -EINVAL;
3413
3414 if (args->vm_id) {
3415 vm = xe_vm_lookup(xef, args->vm_id);
3416 if (XE_IOCTL_DBG(xe, !vm))
3417 return -ENOENT;
3418 }
3419
3420 err = 0;
3421 xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
3422 err) {
3423 if (vm) {
3424 err = xe_vm_drm_exec_lock(vm, &exec);
3425 drm_exec_retry_on_contention(&exec);
3426 if (err)
3427 break;
3428 }
3429 bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
3430 bo_flags, &exec);
3431 drm_exec_retry_on_contention(&exec);
3432 if (IS_ERR(bo)) {
3433 err = PTR_ERR(bo);
3434 xe_validation_retry_on_oom(&ctx, &err);
3435 break;
3436 }
3437 }
3438 if (err)
3439 goto out_vm;
3440
3441 if (args->extensions) {
3442 err = gem_create_user_extensions(xe, bo, args->extensions, 0);
3443 if (err)
3444 goto out_bulk;
3445 }
3446
3447 err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
3448 if (err)
3449 goto out_bulk;
3450
3451 args->handle = handle;
3452 goto out_put;
3453
3454 out_bulk:
3455 if (vm && !xe_vm_in_fault_mode(vm)) {
3456 xe_vm_lock(vm, false);
3457 __xe_bo_unset_bulk_move(bo);
3458 xe_vm_unlock(vm);
3459 }
3460 out_put:
3461 xe_bo_put(bo);
3462 out_vm:
3463 if (vm)
3464 xe_vm_put(vm);
3465
3466 return err;
3467 }
3468
xe_gem_mmap_offset_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3469 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
3470 struct drm_file *file)
3471 {
3472 struct xe_device *xe = to_xe_device(dev);
3473 struct drm_xe_gem_mmap_offset *args = data;
3474 struct drm_gem_object *gem_obj;
3475
3476 if (XE_IOCTL_DBG(xe, args->extensions) ||
3477 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
3478 return -EINVAL;
3479
3480 if (XE_IOCTL_DBG(xe, args->flags &
3481 ~DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER))
3482 return -EINVAL;
3483
3484 if (args->flags & DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER) {
3485 if (XE_IOCTL_DBG(xe, !IS_DGFX(xe)))
3486 return -EINVAL;
3487
3488 if (XE_IOCTL_DBG(xe, args->handle))
3489 return -EINVAL;
3490
3491 if (XE_IOCTL_DBG(xe, PAGE_SIZE > SZ_4K))
3492 return -EINVAL;
3493
3494 BUILD_BUG_ON(((XE_PCI_BARRIER_MMAP_OFFSET >> XE_PTE_SHIFT) +
3495 SZ_4K) >= DRM_FILE_PAGE_OFFSET_START);
3496 args->offset = XE_PCI_BARRIER_MMAP_OFFSET;
3497 return 0;
3498 }
3499
3500 gem_obj = drm_gem_object_lookup(file, args->handle);
3501 if (XE_IOCTL_DBG(xe, !gem_obj))
3502 return -ENOENT;
3503
3504 /* The mmap offset was set up at BO allocation time. */
3505 args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
3506
3507 xe_bo_put(gem_to_xe_bo(gem_obj));
3508 return 0;
3509 }
3510
3511 /**
3512 * xe_bo_decompress - schedule in-place decompress and install fence
3513 * @bo: buffer object (caller should hold drm_exec reservations for VM+BO)
3514 *
3515 * Schedules an in-place resolve via the migrate layer and installs the
3516 * returned dma_fence into the BO kernel reservation slot (DMA_RESV_USAGE_KERNEL).
3517 * In preempt fence mode, this operation interrupts hardware execution
3518 * which is expensive. Page fault mode is recommended for better performance.
3519 *
3520 * The resolve path only runs for VRAM-backed buffers (currently dGPU-only);
3521 * iGPU/system-memory objects fail the resource check and bypass the resolve.
3522 *
3523 * Returns 0 on success, negative errno on error.
3524 */
xe_bo_decompress(struct xe_bo * bo)3525 int xe_bo_decompress(struct xe_bo *bo)
3526 {
3527 struct xe_device *xe = xe_bo_device(bo);
3528 struct xe_tile *tile = xe_device_get_root_tile(xe);
3529 struct dma_fence *decomp_fence = NULL;
3530 struct ttm_operation_ctx op_ctx = {
3531 .interruptible = true,
3532 .no_wait_gpu = false,
3533 .gfp_retry_mayfail = false,
3534 };
3535 int err = 0;
3536
3537 /* Silently skip decompression for non-VRAM buffers */
3538 if (!bo->ttm.resource || !mem_type_is_vram(bo->ttm.resource->mem_type))
3539 return 0;
3540
3541 /* Notify before scheduling resolve */
3542 err = xe_bo_move_notify(bo, &op_ctx);
3543 if (err)
3544 return err;
3545
3546 /* Reserve fence slot before scheduling */
3547 err = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
3548 if (err)
3549 return err;
3550
3551 /* Schedule the in-place decompression */
3552 decomp_fence = xe_migrate_resolve(tile->migrate,
3553 bo,
3554 bo->ttm.resource);
3555
3556 if (IS_ERR(decomp_fence))
3557 return PTR_ERR(decomp_fence);
3558
3559 /* Install kernel-usage fence */
3560 dma_resv_add_fence(bo->ttm.base.resv, decomp_fence, DMA_RESV_USAGE_KERNEL);
3561 dma_fence_put(decomp_fence);
3562
3563 return 0;
3564 }
3565
3566 /**
3567 * xe_bo_lock() - Lock the buffer object's dma_resv object
3568 * @bo: The struct xe_bo whose lock is to be taken
3569 * @intr: Whether to perform any wait interruptible
3570 *
3571 * Locks the buffer object's dma_resv object. If the buffer object is
3572 * pointing to a shared dma_resv object, that shared lock is locked.
3573 *
3574 * Return: 0 on success, -EINTR if @intr is true and the wait for a
3575 * contended lock was interrupted. If @intr is set to false, the
3576 * function always returns 0.
3577 */
xe_bo_lock(struct xe_bo * bo,bool intr)3578 int xe_bo_lock(struct xe_bo *bo, bool intr)
3579 {
3580 if (intr)
3581 return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
3582
3583 dma_resv_lock(bo->ttm.base.resv, NULL);
3584
3585 return 0;
3586 }
3587
3588 /**
3589 * xe_bo_unlock() - Unlock the buffer object's dma_resv object
3590 * @bo: The struct xe_bo whose lock is to be released.
3591 *
3592 * Unlock a buffer object lock that was locked by xe_bo_lock().
3593 */
xe_bo_unlock(struct xe_bo * bo)3594 void xe_bo_unlock(struct xe_bo *bo)
3595 {
3596 dma_resv_unlock(bo->ttm.base.resv);
3597 }
3598
3599 /**
3600 * xe_bo_can_migrate - Whether a buffer object likely can be migrated
3601 * @bo: The buffer object to migrate
3602 * @mem_type: The TTM memory type intended to migrate to
3603 *
3604 * Check whether the buffer object supports migration to the
3605 * given memory type. Note that pinning may affect the ability to migrate as
3606 * returned by this function.
3607 *
3608 * This function is primarily intended as a helper for checking the
3609 * possibility to migrate buffer objects and can be called without
3610 * the object lock held.
3611 *
3612 * Return: true if migration is possible, false otherwise.
3613 */
xe_bo_can_migrate(struct xe_bo * bo,u32 mem_type)3614 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
3615 {
3616 unsigned int cur_place;
3617
3618 if (bo->ttm.type == ttm_bo_type_kernel)
3619 return true;
3620
3621 if (bo->ttm.type == ttm_bo_type_sg)
3622 return false;
3623
3624 for (cur_place = 0; cur_place < bo->placement.num_placement;
3625 cur_place++) {
3626 if (bo->placements[cur_place].mem_type == mem_type)
3627 return true;
3628 }
3629
3630 return false;
3631 }
3632
xe_place_from_ttm_type(u32 mem_type,struct ttm_place * place)3633 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
3634 {
3635 memset(place, 0, sizeof(*place));
3636 place->mem_type = mem_type;
3637 }
3638
3639 /**
3640 * xe_bo_migrate - Migrate an object to the desired region id
3641 * @bo: The buffer object to migrate.
3642 * @mem_type: The TTM region type to migrate to.
3643 * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
3644 * a default interruptibe ctx is to be used.
3645 * @exec: The drm_exec transaction to use for exhaustive eviction.
3646 *
3647 * Attempt to migrate the buffer object to the desired memory region. The
3648 * buffer object may not be pinned, and must be locked.
3649 * On successful completion, the object memory type will be updated,
3650 * but an async migration task may not have completed yet, and to
3651 * accomplish that, the object's kernel fences must be signaled with
3652 * the object lock held.
3653 *
3654 * Return: 0 on success. Negative error code on failure. In particular may
3655 * return -EINTR or -ERESTARTSYS if signal pending.
3656 */
xe_bo_migrate(struct xe_bo * bo,u32 mem_type,struct ttm_operation_ctx * tctx,struct drm_exec * exec)3657 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
3658 struct drm_exec *exec)
3659 {
3660 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
3661 struct ttm_operation_ctx ctx = {
3662 .interruptible = true,
3663 .no_wait_gpu = false,
3664 .gfp_retry_mayfail = true,
3665 };
3666 struct ttm_placement placement;
3667 struct ttm_place requested;
3668
3669 xe_bo_assert_held(bo);
3670 tctx = tctx ? tctx : &ctx;
3671
3672 if (bo->ttm.resource->mem_type == mem_type)
3673 return 0;
3674
3675 if (xe_bo_is_pinned(bo))
3676 return -EBUSY;
3677
3678 if (!xe_bo_can_migrate(bo, mem_type))
3679 return -EINVAL;
3680
3681 xe_place_from_ttm_type(mem_type, &requested);
3682 placement.num_placement = 1;
3683 placement.placement = &requested;
3684
3685 /*
3686 * Stolen needs to be handled like below VRAM handling if we ever need
3687 * to support it.
3688 */
3689 drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
3690
3691 if (mem_type_is_vram(mem_type)) {
3692 u32 c = 0;
3693
3694 add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
3695 }
3696
3697 if (!tctx->no_wait_gpu)
3698 xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
3699 return ttm_bo_validate(&bo->ttm, &placement, tctx);
3700 }
3701
3702 /**
3703 * xe_bo_evict - Evict an object to evict placement
3704 * @bo: The buffer object to migrate.
3705 * @exec: The drm_exec transaction to use for exhaustive eviction.
3706 *
3707 * On successful completion, the object memory will be moved to evict
3708 * placement. This function blocks until the object has been fully moved.
3709 *
3710 * Return: 0 on success. Negative error code on failure.
3711 */
xe_bo_evict(struct xe_bo * bo,struct drm_exec * exec)3712 int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
3713 {
3714 struct ttm_operation_ctx ctx = {
3715 .interruptible = false,
3716 .no_wait_gpu = false,
3717 .gfp_retry_mayfail = true,
3718 };
3719 struct ttm_placement placement;
3720 int ret;
3721
3722 xe_evict_flags(&bo->ttm, &placement);
3723 ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
3724 if (ret)
3725 return ret;
3726
3727 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
3728 false, MAX_SCHEDULE_TIMEOUT);
3729
3730 return 0;
3731 }
3732
3733 /**
3734 * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
3735 * placed in system memory.
3736 * @bo: The xe_bo
3737 *
3738 * Return: true if extra pages need to be allocated, false otherwise.
3739 */
xe_bo_needs_ccs_pages(struct xe_bo * bo)3740 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
3741 {
3742 struct xe_device *xe = xe_bo_device(bo);
3743
3744 if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
3745 return false;
3746
3747 if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
3748 return false;
3749
3750 /* On discrete GPUs, if the GPU can access this buffer from
3751 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
3752 * can't be used since there's no CCS storage associated with
3753 * non-VRAM addresses.
3754 */
3755 if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
3756 return false;
3757
3758 /* Check if userspace explicitly requested no compression */
3759 if (bo->flags & XE_BO_FLAG_NO_COMPRESSION)
3760 return false;
3761
3762 /*
3763 * For WB (Write-Back) CPU caching mode, check if the device
3764 * supports WB compression with coherency.
3765 */
3766 if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB &&
3767 xe->pat.idx[XE_CACHE_WB_COMPRESSION] == XE_PAT_INVALID_IDX)
3768 return false;
3769
3770 return true;
3771 }
3772
3773 /**
3774 * __xe_bo_release_dummy() - Dummy kref release function
3775 * @kref: The embedded struct kref.
3776 *
3777 * Dummy release function for xe_bo_put_deferred(). Keep off.
3778 */
__xe_bo_release_dummy(struct kref * kref)3779 void __xe_bo_release_dummy(struct kref *kref)
3780 {
3781 }
3782
3783 /**
3784 * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
3785 * @deferred: The lockless list used for the call to xe_bo_put_deferred().
3786 *
3787 * Puts all bos whose put was deferred by xe_bo_put_deferred().
3788 * The @deferred list can be either an onstack local list or a global
3789 * shared list used by a workqueue.
3790 */
xe_bo_put_commit(struct llist_head * deferred)3791 void xe_bo_put_commit(struct llist_head *deferred)
3792 {
3793 struct llist_node *freed;
3794 struct xe_bo *bo, *next;
3795
3796 if (!deferred)
3797 return;
3798
3799 freed = llist_del_all(deferred);
3800 if (!freed)
3801 return;
3802
3803 llist_for_each_entry_safe(bo, next, freed, freed)
3804 drm_gem_object_free(&bo->ttm.base.refcount);
3805 }
3806
xe_bo_dev_work_func(struct work_struct * work)3807 static void xe_bo_dev_work_func(struct work_struct *work)
3808 {
3809 struct xe_bo_dev *bo_dev = container_of(work, typeof(*bo_dev), async_free);
3810
3811 xe_bo_put_commit(&bo_dev->async_list);
3812 }
3813
3814 /**
3815 * xe_bo_dev_init() - Initialize BO dev to manage async BO freeing
3816 * @bo_dev: The BO dev structure
3817 */
xe_bo_dev_init(struct xe_bo_dev * bo_dev)3818 void xe_bo_dev_init(struct xe_bo_dev *bo_dev)
3819 {
3820 INIT_WORK(&bo_dev->async_free, xe_bo_dev_work_func);
3821 }
3822
3823 /**
3824 * xe_bo_dev_fini() - Finalize BO dev managing async BO freeing
3825 * @bo_dev: The BO dev structure
3826 */
xe_bo_dev_fini(struct xe_bo_dev * bo_dev)3827 void xe_bo_dev_fini(struct xe_bo_dev *bo_dev)
3828 {
3829 flush_work(&bo_dev->async_free);
3830 }
3831
xe_bo_put(struct xe_bo * bo)3832 void xe_bo_put(struct xe_bo *bo)
3833 {
3834 struct xe_tile *tile;
3835 u8 id;
3836
3837 might_sleep();
3838 if (bo) {
3839 #ifdef CONFIG_PROC_FS
3840 if (bo->client)
3841 might_lock(&bo->client->bos_lock);
3842 #endif
3843 for_each_tile(tile, xe_bo_device(bo), id)
3844 if (bo->ggtt_node[id])
3845 xe_ggtt_might_lock(tile->mem.ggtt);
3846 drm_gem_object_put(&bo->ttm.base);
3847 }
3848 }
3849
3850 /**
3851 * xe_bo_dumb_create - Create a dumb bo as backing for a fb
3852 * @file_priv: ...
3853 * @dev: ...
3854 * @args: ...
3855 *
3856 * See dumb_create() hook in include/drm/drm_drv.h
3857 *
3858 * Return: ...
3859 */
xe_bo_dumb_create(struct drm_file * file_priv,struct drm_device * dev,struct drm_mode_create_dumb * args)3860 int xe_bo_dumb_create(struct drm_file *file_priv,
3861 struct drm_device *dev,
3862 struct drm_mode_create_dumb *args)
3863 {
3864 struct xe_device *xe = to_xe_device(dev);
3865 struct xe_bo *bo;
3866 uint32_t handle;
3867 int err;
3868 u32 page_size = max_t(u32, PAGE_SIZE,
3869 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
3870
3871 err = drm_mode_size_dumb(dev, args, SZ_64, page_size);
3872 if (err)
3873 return err;
3874
3875 bo = xe_bo_create_user(xe, NULL, args->size,
3876 DRM_XE_GEM_CPU_CACHING_WC,
3877 XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
3878 XE_BO_FLAG_FORCE_WC |
3879 XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
3880 if (IS_ERR(bo))
3881 return PTR_ERR(bo);
3882
3883 err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
3884 /* drop reference from allocate - handle holds it now */
3885 drm_gem_object_put(&bo->ttm.base);
3886 if (!err)
3887 args->handle = handle;
3888 return err;
3889 }
3890
xe_bo_runtime_pm_release_mmap_offset(struct xe_bo * bo)3891 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
3892 {
3893 struct ttm_buffer_object *tbo = &bo->ttm;
3894 struct ttm_device *bdev = tbo->bdev;
3895
3896 drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
3897
3898 list_del_init(&bo->vram_userfault_link);
3899 }
3900
3901 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
3902 #include "tests/xe_bo.c"
3903 #endif
3904