1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_bo.h"
7
8 #include <linux/dma-buf.h>
9
10 #include <drm/drm_drv.h>
11 #include <drm/drm_gem_ttm_helper.h>
12 #include <drm/drm_managed.h>
13 #include <drm/ttm/ttm_device.h>
14 #include <drm/ttm/ttm_placement.h>
15 #include <drm/ttm/ttm_tt.h>
16 #include <uapi/drm/xe_drm.h>
17
18 #include "xe_device.h"
19 #include "xe_dma_buf.h"
20 #include "xe_drm_client.h"
21 #include "xe_ggtt.h"
22 #include "xe_gt.h"
23 #include "xe_map.h"
24 #include "xe_migrate.h"
25 #include "xe_pm.h"
26 #include "xe_preempt_fence.h"
27 #include "xe_res_cursor.h"
28 #include "xe_trace_bo.h"
29 #include "xe_ttm_stolen_mgr.h"
30 #include "xe_vm.h"
31
32 const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = {
33 [XE_PL_SYSTEM] = "system",
34 [XE_PL_TT] = "gtt",
35 [XE_PL_VRAM0] = "vram0",
36 [XE_PL_VRAM1] = "vram1",
37 [XE_PL_STOLEN] = "stolen"
38 };
39
40 static const struct ttm_place sys_placement_flags = {
41 .fpfn = 0,
42 .lpfn = 0,
43 .mem_type = XE_PL_SYSTEM,
44 .flags = 0,
45 };
46
47 static struct ttm_placement sys_placement = {
48 .num_placement = 1,
49 .placement = &sys_placement_flags,
50 };
51
52 static const struct ttm_place tt_placement_flags[] = {
53 {
54 .fpfn = 0,
55 .lpfn = 0,
56 .mem_type = XE_PL_TT,
57 .flags = TTM_PL_FLAG_DESIRED,
58 },
59 {
60 .fpfn = 0,
61 .lpfn = 0,
62 .mem_type = XE_PL_SYSTEM,
63 .flags = TTM_PL_FLAG_FALLBACK,
64 }
65 };
66
67 static struct ttm_placement tt_placement = {
68 .num_placement = 2,
69 .placement = tt_placement_flags,
70 };
71
mem_type_is_vram(u32 mem_type)72 bool mem_type_is_vram(u32 mem_type)
73 {
74 return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
75 }
76
resource_is_stolen_vram(struct xe_device * xe,struct ttm_resource * res)77 static bool resource_is_stolen_vram(struct xe_device *xe, struct ttm_resource *res)
78 {
79 return res->mem_type == XE_PL_STOLEN && IS_DGFX(xe);
80 }
81
resource_is_vram(struct ttm_resource * res)82 static bool resource_is_vram(struct ttm_resource *res)
83 {
84 return mem_type_is_vram(res->mem_type);
85 }
86
xe_bo_is_vram(struct xe_bo * bo)87 bool xe_bo_is_vram(struct xe_bo *bo)
88 {
89 return resource_is_vram(bo->ttm.resource) ||
90 resource_is_stolen_vram(xe_bo_device(bo), bo->ttm.resource);
91 }
92
xe_bo_is_stolen(struct xe_bo * bo)93 bool xe_bo_is_stolen(struct xe_bo *bo)
94 {
95 return bo->ttm.resource->mem_type == XE_PL_STOLEN;
96 }
97
98 /**
99 * xe_bo_has_single_placement - check if BO is placed only in one memory location
100 * @bo: The BO
101 *
102 * This function checks whether a given BO is placed in only one memory location.
103 *
104 * Returns: true if the BO is placed in a single memory location, false otherwise.
105 *
106 */
xe_bo_has_single_placement(struct xe_bo * bo)107 bool xe_bo_has_single_placement(struct xe_bo *bo)
108 {
109 return bo->placement.num_placement == 1;
110 }
111
112 /**
113 * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
114 * @bo: The BO
115 *
116 * The stolen memory is accessed through the PCI BAR for both DGFX and some
117 * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
118 *
119 * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
120 */
xe_bo_is_stolen_devmem(struct xe_bo * bo)121 bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
122 {
123 return xe_bo_is_stolen(bo) &&
124 GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
125 }
126
xe_bo_is_user(struct xe_bo * bo)127 static bool xe_bo_is_user(struct xe_bo *bo)
128 {
129 return bo->flags & XE_BO_FLAG_USER;
130 }
131
132 static struct xe_migrate *
mem_type_to_migrate(struct xe_device * xe,u32 mem_type)133 mem_type_to_migrate(struct xe_device *xe, u32 mem_type)
134 {
135 struct xe_tile *tile;
136
137 xe_assert(xe, mem_type == XE_PL_STOLEN || mem_type_is_vram(mem_type));
138 tile = &xe->tiles[mem_type == XE_PL_STOLEN ? 0 : (mem_type - XE_PL_VRAM0)];
139 return tile->migrate;
140 }
141
res_to_mem_region(struct ttm_resource * res)142 static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
143 {
144 struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
145 struct ttm_resource_manager *mgr;
146
147 xe_assert(xe, resource_is_vram(res));
148 mgr = ttm_manager_type(&xe->ttm, res->mem_type);
149 return to_xe_ttm_vram_mgr(mgr)->vram;
150 }
151
try_add_system(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)152 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
153 u32 bo_flags, u32 *c)
154 {
155 if (bo_flags & XE_BO_FLAG_SYSTEM) {
156 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
157
158 bo->placements[*c] = (struct ttm_place) {
159 .mem_type = XE_PL_TT,
160 };
161 *c += 1;
162 }
163 }
164
force_contiguous(u32 bo_flags)165 static bool force_contiguous(u32 bo_flags)
166 {
167 /*
168 * For eviction / restore on suspend / resume objects pinned in VRAM
169 * must be contiguous, also only contiguous BOs support xe_bo_vmap.
170 */
171 return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
172 }
173
add_vram(struct xe_device * xe,struct xe_bo * bo,struct ttm_place * places,u32 bo_flags,u32 mem_type,u32 * c)174 static void add_vram(struct xe_device *xe, struct xe_bo *bo,
175 struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
176 {
177 struct ttm_place place = { .mem_type = mem_type };
178 struct xe_mem_region *vram;
179 u64 io_size;
180
181 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
182
183 vram = to_xe_ttm_vram_mgr(ttm_manager_type(&xe->ttm, mem_type))->vram;
184 xe_assert(xe, vram && vram->usable_size);
185 io_size = vram->io_size;
186
187 if (force_contiguous(bo_flags))
188 place.flags |= TTM_PL_FLAG_CONTIGUOUS;
189
190 if (io_size < vram->usable_size) {
191 if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
192 place.fpfn = 0;
193 place.lpfn = io_size >> PAGE_SHIFT;
194 } else {
195 place.flags |= TTM_PL_FLAG_TOPDOWN;
196 }
197 }
198 places[*c] = place;
199 *c += 1;
200 }
201
try_add_vram(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)202 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
203 u32 bo_flags, u32 *c)
204 {
205 if (bo_flags & XE_BO_FLAG_VRAM0)
206 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
207 if (bo_flags & XE_BO_FLAG_VRAM1)
208 add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
209 }
210
try_add_stolen(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags,u32 * c)211 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
212 u32 bo_flags, u32 *c)
213 {
214 if (bo_flags & XE_BO_FLAG_STOLEN) {
215 xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
216
217 bo->placements[*c] = (struct ttm_place) {
218 .mem_type = XE_PL_STOLEN,
219 .flags = force_contiguous(bo_flags) ?
220 TTM_PL_FLAG_CONTIGUOUS : 0,
221 };
222 *c += 1;
223 }
224 }
225
__xe_bo_placement_for_flags(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags)226 static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
227 u32 bo_flags)
228 {
229 u32 c = 0;
230
231 try_add_vram(xe, bo, bo_flags, &c);
232 try_add_system(xe, bo, bo_flags, &c);
233 try_add_stolen(xe, bo, bo_flags, &c);
234
235 if (!c)
236 return -EINVAL;
237
238 bo->placement = (struct ttm_placement) {
239 .num_placement = c,
240 .placement = bo->placements,
241 };
242
243 return 0;
244 }
245
xe_bo_placement_for_flags(struct xe_device * xe,struct xe_bo * bo,u32 bo_flags)246 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
247 u32 bo_flags)
248 {
249 xe_bo_assert_held(bo);
250 return __xe_bo_placement_for_flags(xe, bo, bo_flags);
251 }
252
xe_evict_flags(struct ttm_buffer_object * tbo,struct ttm_placement * placement)253 static void xe_evict_flags(struct ttm_buffer_object *tbo,
254 struct ttm_placement *placement)
255 {
256 if (!xe_bo_is_xe_bo(tbo)) {
257 /* Don't handle scatter gather BOs */
258 if (tbo->type == ttm_bo_type_sg) {
259 placement->num_placement = 0;
260 return;
261 }
262
263 *placement = sys_placement;
264 return;
265 }
266
267 /*
268 * For xe, sg bos that are evicted to system just triggers a
269 * rebind of the sg list upon subsequent validation to XE_PL_TT.
270 */
271 switch (tbo->resource->mem_type) {
272 case XE_PL_VRAM0:
273 case XE_PL_VRAM1:
274 case XE_PL_STOLEN:
275 *placement = tt_placement;
276 break;
277 case XE_PL_TT:
278 default:
279 *placement = sys_placement;
280 break;
281 }
282 }
283
284 struct xe_ttm_tt {
285 struct ttm_tt ttm;
286 struct device *dev;
287 struct sg_table sgt;
288 struct sg_table *sg;
289 /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
290 bool purgeable;
291 };
292
xe_tt_map_sg(struct ttm_tt * tt)293 static int xe_tt_map_sg(struct ttm_tt *tt)
294 {
295 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
296 unsigned long num_pages = tt->num_pages;
297 int ret;
298
299 XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
300
301 if (xe_tt->sg)
302 return 0;
303
304 ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
305 num_pages, 0,
306 (u64)num_pages << PAGE_SHIFT,
307 xe_sg_segment_size(xe_tt->dev),
308 GFP_KERNEL);
309 if (ret)
310 return ret;
311
312 xe_tt->sg = &xe_tt->sgt;
313 ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
314 DMA_ATTR_SKIP_CPU_SYNC);
315 if (ret) {
316 sg_free_table(xe_tt->sg);
317 xe_tt->sg = NULL;
318 return ret;
319 }
320
321 return 0;
322 }
323
xe_tt_unmap_sg(struct ttm_tt * tt)324 static void xe_tt_unmap_sg(struct ttm_tt *tt)
325 {
326 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
327
328 if (xe_tt->sg) {
329 dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
330 DMA_BIDIRECTIONAL, 0);
331 sg_free_table(xe_tt->sg);
332 xe_tt->sg = NULL;
333 }
334 }
335
xe_bo_sg(struct xe_bo * bo)336 struct sg_table *xe_bo_sg(struct xe_bo *bo)
337 {
338 struct ttm_tt *tt = bo->ttm.ttm;
339 struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
340
341 return xe_tt->sg;
342 }
343
xe_ttm_tt_create(struct ttm_buffer_object * ttm_bo,u32 page_flags)344 static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
345 u32 page_flags)
346 {
347 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
348 struct xe_device *xe = xe_bo_device(bo);
349 struct xe_ttm_tt *tt;
350 unsigned long extra_pages;
351 enum ttm_caching caching = ttm_cached;
352 int err;
353
354 tt = kzalloc(sizeof(*tt), GFP_KERNEL);
355 if (!tt)
356 return NULL;
357
358 tt->dev = xe->drm.dev;
359
360 extra_pages = 0;
361 if (xe_bo_needs_ccs_pages(bo))
362 extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
363 PAGE_SIZE);
364
365 /*
366 * DGFX system memory is always WB / ttm_cached, since
367 * other caching modes are only supported on x86. DGFX
368 * GPU system memory accesses are always coherent with the
369 * CPU.
370 */
371 if (!IS_DGFX(xe)) {
372 switch (bo->cpu_caching) {
373 case DRM_XE_GEM_CPU_CACHING_WC:
374 caching = ttm_write_combined;
375 break;
376 default:
377 caching = ttm_cached;
378 break;
379 }
380
381 WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
382
383 /*
384 * Display scanout is always non-coherent with the CPU cache.
385 *
386 * For Xe_LPG and beyond, PPGTT PTE lookups are also
387 * non-coherent and require a CPU:WC mapping.
388 */
389 if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
390 (xe->info.graphics_verx100 >= 1270 &&
391 bo->flags & XE_BO_FLAG_PAGETABLE))
392 caching = ttm_write_combined;
393 }
394
395 if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
396 /*
397 * Valid only for internally-created buffers only, for
398 * which cpu_caching is never initialized.
399 */
400 xe_assert(xe, bo->cpu_caching == 0);
401 caching = ttm_uncached;
402 }
403
404 err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
405 if (err) {
406 kfree(tt);
407 return NULL;
408 }
409
410 return &tt->ttm;
411 }
412
xe_ttm_tt_populate(struct ttm_device * ttm_dev,struct ttm_tt * tt,struct ttm_operation_ctx * ctx)413 static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
414 struct ttm_operation_ctx *ctx)
415 {
416 int err;
417
418 /*
419 * dma-bufs are not populated with pages, and the dma-
420 * addresses are set up when moved to XE_PL_TT.
421 */
422 if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
423 return 0;
424
425 err = ttm_pool_alloc(&ttm_dev->pool, tt, ctx);
426 if (err)
427 return err;
428
429 return err;
430 }
431
xe_ttm_tt_unpopulate(struct ttm_device * ttm_dev,struct ttm_tt * tt)432 static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
433 {
434 if (tt->page_flags & TTM_TT_FLAG_EXTERNAL)
435 return;
436
437 xe_tt_unmap_sg(tt);
438
439 return ttm_pool_free(&ttm_dev->pool, tt);
440 }
441
xe_ttm_tt_destroy(struct ttm_device * ttm_dev,struct ttm_tt * tt)442 static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
443 {
444 ttm_tt_fini(tt);
445 kfree(tt);
446 }
447
xe_ttm_resource_visible(struct ttm_resource * mem)448 static bool xe_ttm_resource_visible(struct ttm_resource *mem)
449 {
450 struct xe_ttm_vram_mgr_resource *vres =
451 to_xe_ttm_vram_mgr_resource(mem);
452
453 return vres->used_visible_size == mem->size;
454 }
455
xe_ttm_io_mem_reserve(struct ttm_device * bdev,struct ttm_resource * mem)456 static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
457 struct ttm_resource *mem)
458 {
459 struct xe_device *xe = ttm_to_xe_device(bdev);
460
461 switch (mem->mem_type) {
462 case XE_PL_SYSTEM:
463 case XE_PL_TT:
464 return 0;
465 case XE_PL_VRAM0:
466 case XE_PL_VRAM1: {
467 struct xe_mem_region *vram = res_to_mem_region(mem);
468
469 if (!xe_ttm_resource_visible(mem))
470 return -EINVAL;
471
472 mem->bus.offset = mem->start << PAGE_SHIFT;
473
474 if (vram->mapping &&
475 mem->placement & TTM_PL_FLAG_CONTIGUOUS)
476 mem->bus.addr = (u8 __force *)vram->mapping +
477 mem->bus.offset;
478
479 mem->bus.offset += vram->io_start;
480 mem->bus.is_iomem = true;
481
482 #if !IS_ENABLED(CONFIG_X86)
483 mem->bus.caching = ttm_write_combined;
484 #endif
485 return 0;
486 } case XE_PL_STOLEN:
487 return xe_ttm_stolen_io_mem_reserve(xe, mem);
488 default:
489 return -EINVAL;
490 }
491 }
492
xe_bo_trigger_rebind(struct xe_device * xe,struct xe_bo * bo,const struct ttm_operation_ctx * ctx)493 static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
494 const struct ttm_operation_ctx *ctx)
495 {
496 struct dma_resv_iter cursor;
497 struct dma_fence *fence;
498 struct drm_gem_object *obj = &bo->ttm.base;
499 struct drm_gpuvm_bo *vm_bo;
500 bool idle = false;
501 int ret = 0;
502
503 dma_resv_assert_held(bo->ttm.base.resv);
504
505 if (!list_empty(&bo->ttm.base.gpuva.list)) {
506 dma_resv_iter_begin(&cursor, bo->ttm.base.resv,
507 DMA_RESV_USAGE_BOOKKEEP);
508 dma_resv_for_each_fence_unlocked(&cursor, fence)
509 dma_fence_enable_sw_signaling(fence);
510 dma_resv_iter_end(&cursor);
511 }
512
513 drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
514 struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
515 struct drm_gpuva *gpuva;
516
517 if (!xe_vm_in_fault_mode(vm)) {
518 drm_gpuvm_bo_evict(vm_bo, true);
519 continue;
520 }
521
522 if (!idle) {
523 long timeout;
524
525 if (ctx->no_wait_gpu &&
526 !dma_resv_test_signaled(bo->ttm.base.resv,
527 DMA_RESV_USAGE_BOOKKEEP))
528 return -EBUSY;
529
530 timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
531 DMA_RESV_USAGE_BOOKKEEP,
532 ctx->interruptible,
533 MAX_SCHEDULE_TIMEOUT);
534 if (!timeout)
535 return -ETIME;
536 if (timeout < 0)
537 return timeout;
538
539 idle = true;
540 }
541
542 drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
543 struct xe_vma *vma = gpuva_to_vma(gpuva);
544
545 trace_xe_vma_evict(vma);
546 ret = xe_vm_invalidate_vma(vma);
547 if (XE_WARN_ON(ret))
548 return ret;
549 }
550 }
551
552 return ret;
553 }
554
555 /*
556 * The dma-buf map_attachment() / unmap_attachment() is hooked up here.
557 * Note that unmapping the attachment is deferred to the next
558 * map_attachment time, or to bo destroy (after idling) whichever comes first.
559 * This is to avoid syncing before unmap_attachment(), assuming that the
560 * caller relies on idling the reservation object before moving the
561 * backing store out. Should that assumption not hold, then we will be able
562 * to unconditionally call unmap_attachment() when moving out to system.
563 */
xe_bo_move_dmabuf(struct ttm_buffer_object * ttm_bo,struct ttm_resource * new_res)564 static int xe_bo_move_dmabuf(struct ttm_buffer_object *ttm_bo,
565 struct ttm_resource *new_res)
566 {
567 struct dma_buf_attachment *attach = ttm_bo->base.import_attach;
568 struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm, struct xe_ttm_tt,
569 ttm);
570 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
571 struct sg_table *sg;
572
573 xe_assert(xe, attach);
574 xe_assert(xe, ttm_bo->ttm);
575
576 if (new_res->mem_type == XE_PL_SYSTEM)
577 goto out;
578
579 if (ttm_bo->sg) {
580 dma_buf_unmap_attachment(attach, ttm_bo->sg, DMA_BIDIRECTIONAL);
581 ttm_bo->sg = NULL;
582 }
583
584 sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
585 if (IS_ERR(sg))
586 return PTR_ERR(sg);
587
588 ttm_bo->sg = sg;
589 xe_tt->sg = sg;
590
591 out:
592 ttm_bo_move_null(ttm_bo, new_res);
593
594 return 0;
595 }
596
597 /**
598 * xe_bo_move_notify - Notify subsystems of a pending move
599 * @bo: The buffer object
600 * @ctx: The struct ttm_operation_ctx controlling locking and waits.
601 *
602 * This function notifies subsystems of an upcoming buffer move.
603 * Upon receiving such a notification, subsystems should schedule
604 * halting access to the underlying pages and optionally add a fence
605 * to the buffer object's dma_resv object, that signals when access is
606 * stopped. The caller will wait on all dma_resv fences before
607 * starting the move.
608 *
609 * A subsystem may commence access to the object after obtaining
610 * bindings to the new backing memory under the object lock.
611 *
612 * Return: 0 on success, -EINTR or -ERESTARTSYS if interrupted in fault mode,
613 * negative error code on error.
614 */
xe_bo_move_notify(struct xe_bo * bo,const struct ttm_operation_ctx * ctx)615 static int xe_bo_move_notify(struct xe_bo *bo,
616 const struct ttm_operation_ctx *ctx)
617 {
618 struct ttm_buffer_object *ttm_bo = &bo->ttm;
619 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
620 struct ttm_resource *old_mem = ttm_bo->resource;
621 u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
622 int ret;
623
624 /*
625 * If this starts to call into many components, consider
626 * using a notification chain here.
627 */
628
629 if (xe_bo_is_pinned(bo))
630 return -EINVAL;
631
632 xe_bo_vunmap(bo);
633 ret = xe_bo_trigger_rebind(xe, bo, ctx);
634 if (ret)
635 return ret;
636
637 /* Don't call move_notify() for imported dma-bufs. */
638 if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
639 dma_buf_move_notify(ttm_bo->base.dma_buf);
640
641 /*
642 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
643 * so if we moved from VRAM make sure to unlink this from the userfault
644 * tracking.
645 */
646 if (mem_type_is_vram(old_mem_type)) {
647 mutex_lock(&xe->mem_access.vram_userfault.lock);
648 if (!list_empty(&bo->vram_userfault_link))
649 list_del_init(&bo->vram_userfault_link);
650 mutex_unlock(&xe->mem_access.vram_userfault.lock);
651 }
652
653 return 0;
654 }
655
xe_bo_move(struct ttm_buffer_object * ttm_bo,bool evict,struct ttm_operation_ctx * ctx,struct ttm_resource * new_mem,struct ttm_place * hop)656 static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
657 struct ttm_operation_ctx *ctx,
658 struct ttm_resource *new_mem,
659 struct ttm_place *hop)
660 {
661 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
662 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
663 struct ttm_resource *old_mem = ttm_bo->resource;
664 u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
665 struct ttm_tt *ttm = ttm_bo->ttm;
666 struct xe_migrate *migrate = NULL;
667 struct dma_fence *fence;
668 bool move_lacks_source;
669 bool tt_has_data;
670 bool needs_clear;
671 bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
672 ttm && ttm_tt_is_populated(ttm)) ? true : false;
673 int ret = 0;
674
675 /* Bo creation path, moving to system or TT. */
676 if ((!old_mem && ttm) && !handle_system_ccs) {
677 if (new_mem->mem_type == XE_PL_TT)
678 ret = xe_tt_map_sg(ttm);
679 if (!ret)
680 ttm_bo_move_null(ttm_bo, new_mem);
681 goto out;
682 }
683
684 if (ttm_bo->type == ttm_bo_type_sg) {
685 ret = xe_bo_move_notify(bo, ctx);
686 if (!ret)
687 ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
688 return ret;
689 }
690
691 tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
692 (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
693
694 move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
695 (!mem_type_is_vram(old_mem_type) && !tt_has_data));
696
697 needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
698 (!ttm && ttm_bo->type == ttm_bo_type_device);
699
700 if (new_mem->mem_type == XE_PL_TT) {
701 ret = xe_tt_map_sg(ttm);
702 if (ret)
703 goto out;
704 }
705
706 if ((move_lacks_source && !needs_clear)) {
707 ttm_bo_move_null(ttm_bo, new_mem);
708 goto out;
709 }
710
711 if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
712 ttm_bo_move_null(ttm_bo, new_mem);
713 goto out;
714 }
715
716 /*
717 * Failed multi-hop where the old_mem is still marked as
718 * TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
719 */
720 if (old_mem_type == XE_PL_TT &&
721 new_mem->mem_type == XE_PL_TT) {
722 ttm_bo_move_null(ttm_bo, new_mem);
723 goto out;
724 }
725
726 if (!move_lacks_source && !xe_bo_is_pinned(bo)) {
727 ret = xe_bo_move_notify(bo, ctx);
728 if (ret)
729 goto out;
730 }
731
732 if (old_mem_type == XE_PL_TT &&
733 new_mem->mem_type == XE_PL_SYSTEM) {
734 long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
735 DMA_RESV_USAGE_BOOKKEEP,
736 false,
737 MAX_SCHEDULE_TIMEOUT);
738 if (timeout < 0) {
739 ret = timeout;
740 goto out;
741 }
742
743 if (!handle_system_ccs) {
744 ttm_bo_move_null(ttm_bo, new_mem);
745 goto out;
746 }
747 }
748
749 if (!move_lacks_source &&
750 ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) ||
751 (mem_type_is_vram(old_mem_type) &&
752 new_mem->mem_type == XE_PL_SYSTEM))) {
753 hop->fpfn = 0;
754 hop->lpfn = 0;
755 hop->mem_type = XE_PL_TT;
756 hop->flags = TTM_PL_FLAG_TEMPORARY;
757 ret = -EMULTIHOP;
758 goto out;
759 }
760
761 if (bo->tile)
762 migrate = bo->tile->migrate;
763 else if (resource_is_vram(new_mem))
764 migrate = mem_type_to_migrate(xe, new_mem->mem_type);
765 else if (mem_type_is_vram(old_mem_type))
766 migrate = mem_type_to_migrate(xe, old_mem_type);
767 else
768 migrate = xe->tiles[0].migrate;
769
770 xe_assert(xe, migrate);
771 trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
772 if (xe_rpm_reclaim_safe(xe)) {
773 /*
774 * We might be called through swapout in the validation path of
775 * another TTM device, so acquire rpm here.
776 */
777 xe_pm_runtime_get(xe);
778 } else {
779 drm_WARN_ON(&xe->drm, handle_system_ccs);
780 xe_pm_runtime_get_noresume(xe);
781 }
782
783 if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
784 /*
785 * Kernel memory that is pinned should only be moved on suspend
786 * / resume, some of the pinned memory is required for the
787 * device to resume / use the GPU to move other evicted memory
788 * (user memory) around. This likely could be optimized a bit
789 * further where we find the minimum set of pinned memory
790 * required for resume but for simplity doing a memcpy for all
791 * pinned memory.
792 */
793 ret = xe_bo_vmap(bo);
794 if (!ret) {
795 ret = ttm_bo_move_memcpy(ttm_bo, ctx, new_mem);
796
797 /* Create a new VMAP once kernel BO back in VRAM */
798 if (!ret && resource_is_vram(new_mem)) {
799 struct xe_mem_region *vram = res_to_mem_region(new_mem);
800 void __iomem *new_addr = vram->mapping +
801 (new_mem->start << PAGE_SHIFT);
802
803 if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
804 ret = -EINVAL;
805 xe_pm_runtime_put(xe);
806 goto out;
807 }
808
809 xe_assert(xe, new_mem->start ==
810 bo->placements->fpfn);
811
812 iosys_map_set_vaddr_iomem(&bo->vmap, new_addr);
813 }
814 }
815 } else {
816 if (move_lacks_source) {
817 u32 flags = 0;
818
819 if (mem_type_is_vram(new_mem->mem_type))
820 flags |= XE_MIGRATE_CLEAR_FLAG_FULL;
821 else if (handle_system_ccs)
822 flags |= XE_MIGRATE_CLEAR_FLAG_CCS_DATA;
823
824 fence = xe_migrate_clear(migrate, bo, new_mem, flags);
825 }
826 else
827 fence = xe_migrate_copy(migrate, bo, bo, old_mem,
828 new_mem, handle_system_ccs);
829 if (IS_ERR(fence)) {
830 ret = PTR_ERR(fence);
831 xe_pm_runtime_put(xe);
832 goto out;
833 }
834 if (!move_lacks_source) {
835 ret = ttm_bo_move_accel_cleanup(ttm_bo, fence, evict,
836 true, new_mem);
837 if (ret) {
838 dma_fence_wait(fence, false);
839 ttm_bo_move_null(ttm_bo, new_mem);
840 ret = 0;
841 }
842 } else {
843 /*
844 * ttm_bo_move_accel_cleanup() may blow up if
845 * bo->resource == NULL, so just attach the
846 * fence and set the new resource.
847 */
848 dma_resv_add_fence(ttm_bo->base.resv, fence,
849 DMA_RESV_USAGE_KERNEL);
850 ttm_bo_move_null(ttm_bo, new_mem);
851 }
852
853 dma_fence_put(fence);
854 }
855
856 xe_pm_runtime_put(xe);
857
858 out:
859 if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
860 ttm_bo->ttm) {
861 long timeout = dma_resv_wait_timeout(ttm_bo->base.resv,
862 DMA_RESV_USAGE_KERNEL,
863 false,
864 MAX_SCHEDULE_TIMEOUT);
865 if (timeout < 0)
866 ret = timeout;
867
868 xe_tt_unmap_sg(ttm_bo->ttm);
869 }
870
871 return ret;
872 }
873
874 /**
875 * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
876 * @bo: The buffer object to move.
877 *
878 * On successful completion, the object memory will be moved to system memory.
879 *
880 * This is needed to for special handling of pinned VRAM object during
881 * suspend-resume.
882 *
883 * Return: 0 on success. Negative error code on failure.
884 */
xe_bo_evict_pinned(struct xe_bo * bo)885 int xe_bo_evict_pinned(struct xe_bo *bo)
886 {
887 struct ttm_place place = {
888 .mem_type = XE_PL_TT,
889 };
890 struct ttm_placement placement = {
891 .placement = &place,
892 .num_placement = 1,
893 };
894 struct ttm_operation_ctx ctx = {
895 .interruptible = false,
896 .gfp_retry_mayfail = true,
897 };
898 struct ttm_resource *new_mem;
899 int ret;
900
901 xe_bo_assert_held(bo);
902
903 if (WARN_ON(!bo->ttm.resource))
904 return -EINVAL;
905
906 if (WARN_ON(!xe_bo_is_pinned(bo)))
907 return -EINVAL;
908
909 if (!xe_bo_is_vram(bo))
910 return 0;
911
912 ret = ttm_bo_mem_space(&bo->ttm, &placement, &new_mem, &ctx);
913 if (ret)
914 return ret;
915
916 if (!bo->ttm.ttm) {
917 bo->ttm.ttm = xe_ttm_tt_create(&bo->ttm, 0);
918 if (!bo->ttm.ttm) {
919 ret = -ENOMEM;
920 goto err_res_free;
921 }
922 }
923
924 ret = ttm_bo_populate(&bo->ttm, &ctx);
925 if (ret)
926 goto err_res_free;
927
928 ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
929 if (ret)
930 goto err_res_free;
931
932 ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
933 if (ret)
934 goto err_res_free;
935
936 return 0;
937
938 err_res_free:
939 ttm_resource_free(&bo->ttm, &new_mem);
940 return ret;
941 }
942
943 /**
944 * xe_bo_restore_pinned() - Restore a pinned VRAM object
945 * @bo: The buffer object to move.
946 *
947 * On successful completion, the object memory will be moved back to VRAM.
948 *
949 * This is needed to for special handling of pinned VRAM object during
950 * suspend-resume.
951 *
952 * Return: 0 on success. Negative error code on failure.
953 */
xe_bo_restore_pinned(struct xe_bo * bo)954 int xe_bo_restore_pinned(struct xe_bo *bo)
955 {
956 struct ttm_operation_ctx ctx = {
957 .interruptible = false,
958 .gfp_retry_mayfail = false,
959 };
960 struct ttm_resource *new_mem;
961 struct ttm_place *place = &bo->placements[0];
962 int ret;
963
964 xe_bo_assert_held(bo);
965
966 if (WARN_ON(!bo->ttm.resource))
967 return -EINVAL;
968
969 if (WARN_ON(!xe_bo_is_pinned(bo)))
970 return -EINVAL;
971
972 if (WARN_ON(xe_bo_is_vram(bo)))
973 return -EINVAL;
974
975 if (WARN_ON(!bo->ttm.ttm && !xe_bo_is_stolen(bo)))
976 return -EINVAL;
977
978 if (!mem_type_is_vram(place->mem_type))
979 return 0;
980
981 ret = ttm_bo_mem_space(&bo->ttm, &bo->placement, &new_mem, &ctx);
982 if (ret)
983 return ret;
984
985 ret = ttm_bo_populate(&bo->ttm, &ctx);
986 if (ret)
987 goto err_res_free;
988
989 ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
990 if (ret)
991 goto err_res_free;
992
993 ret = xe_bo_move(&bo->ttm, false, &ctx, new_mem, NULL);
994 if (ret)
995 goto err_res_free;
996
997 return 0;
998
999 err_res_free:
1000 ttm_resource_free(&bo->ttm, &new_mem);
1001 return ret;
1002 }
1003
xe_ttm_io_mem_pfn(struct ttm_buffer_object * ttm_bo,unsigned long page_offset)1004 static unsigned long xe_ttm_io_mem_pfn(struct ttm_buffer_object *ttm_bo,
1005 unsigned long page_offset)
1006 {
1007 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1008 struct xe_res_cursor cursor;
1009 struct xe_mem_region *vram;
1010
1011 if (ttm_bo->resource->mem_type == XE_PL_STOLEN)
1012 return xe_ttm_stolen_io_offset(bo, page_offset << PAGE_SHIFT) >> PAGE_SHIFT;
1013
1014 vram = res_to_mem_region(ttm_bo->resource);
1015 xe_res_first(ttm_bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor);
1016 return (vram->io_start + cursor.start) >> PAGE_SHIFT;
1017 }
1018
1019 static void __xe_bo_vunmap(struct xe_bo *bo);
1020
1021 /*
1022 * TODO: Move this function to TTM so we don't rely on how TTM does its
1023 * locking, thereby abusing TTM internals.
1024 */
xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object * ttm_bo)1025 static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
1026 {
1027 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1028 bool locked;
1029
1030 xe_assert(xe, !kref_read(&ttm_bo->kref));
1031
1032 /*
1033 * We can typically only race with TTM trylocking under the
1034 * lru_lock, which will immediately be unlocked again since
1035 * the ttm_bo refcount is zero at this point. So trylocking *should*
1036 * always succeed here, as long as we hold the lru lock.
1037 */
1038 spin_lock(&ttm_bo->bdev->lru_lock);
1039 locked = dma_resv_trylock(ttm_bo->base.resv);
1040 spin_unlock(&ttm_bo->bdev->lru_lock);
1041 xe_assert(xe, locked);
1042
1043 return locked;
1044 }
1045
xe_ttm_bo_release_notify(struct ttm_buffer_object * ttm_bo)1046 static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
1047 {
1048 struct dma_resv_iter cursor;
1049 struct dma_fence *fence;
1050 struct dma_fence *replacement = NULL;
1051 struct xe_bo *bo;
1052
1053 if (!xe_bo_is_xe_bo(ttm_bo))
1054 return;
1055
1056 bo = ttm_to_xe_bo(ttm_bo);
1057 xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
1058
1059 /*
1060 * Corner case where TTM fails to allocate memory and this BOs resv
1061 * still points the VMs resv
1062 */
1063 if (ttm_bo->base.resv != &ttm_bo->base._resv)
1064 return;
1065
1066 if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
1067 return;
1068
1069 /*
1070 * Scrub the preempt fences if any. The unbind fence is already
1071 * attached to the resv.
1072 * TODO: Don't do this for external bos once we scrub them after
1073 * unbind.
1074 */
1075 dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
1076 DMA_RESV_USAGE_BOOKKEEP, fence) {
1077 if (xe_fence_is_xe_preempt(fence) &&
1078 !dma_fence_is_signaled(fence)) {
1079 if (!replacement)
1080 replacement = dma_fence_get_stub();
1081
1082 dma_resv_replace_fences(ttm_bo->base.resv,
1083 fence->context,
1084 replacement,
1085 DMA_RESV_USAGE_BOOKKEEP);
1086 }
1087 }
1088 dma_fence_put(replacement);
1089
1090 dma_resv_unlock(ttm_bo->base.resv);
1091 }
1092
xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object * ttm_bo)1093 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
1094 {
1095 if (!xe_bo_is_xe_bo(ttm_bo))
1096 return;
1097
1098 /*
1099 * Object is idle and about to be destroyed. Release the
1100 * dma-buf attachment.
1101 */
1102 if (ttm_bo->type == ttm_bo_type_sg && ttm_bo->sg) {
1103 struct xe_ttm_tt *xe_tt = container_of(ttm_bo->ttm,
1104 struct xe_ttm_tt, ttm);
1105
1106 dma_buf_unmap_attachment(ttm_bo->base.import_attach, ttm_bo->sg,
1107 DMA_BIDIRECTIONAL);
1108 ttm_bo->sg = NULL;
1109 xe_tt->sg = NULL;
1110 }
1111 }
1112
xe_ttm_bo_purge(struct ttm_buffer_object * ttm_bo,struct ttm_operation_ctx * ctx)1113 static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
1114 {
1115 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1116
1117 if (ttm_bo->ttm) {
1118 struct ttm_placement place = {};
1119 int ret = ttm_bo_validate(ttm_bo, &place, ctx);
1120
1121 drm_WARN_ON(&xe->drm, ret);
1122 }
1123 }
1124
xe_ttm_bo_swap_notify(struct ttm_buffer_object * ttm_bo)1125 static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
1126 {
1127 struct ttm_operation_ctx ctx = {
1128 .interruptible = false,
1129 .gfp_retry_mayfail = false,
1130 };
1131
1132 if (ttm_bo->ttm) {
1133 struct xe_ttm_tt *xe_tt =
1134 container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
1135
1136 if (xe_tt->purgeable)
1137 xe_ttm_bo_purge(ttm_bo, &ctx);
1138 }
1139 }
1140
xe_ttm_access_memory(struct ttm_buffer_object * ttm_bo,unsigned long offset,void * buf,int len,int write)1141 static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
1142 unsigned long offset, void *buf, int len,
1143 int write)
1144 {
1145 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1146 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1147 struct iosys_map vmap;
1148 struct xe_res_cursor cursor;
1149 struct xe_mem_region *vram;
1150 int bytes_left = len;
1151
1152 xe_bo_assert_held(bo);
1153 xe_device_assert_mem_access(xe);
1154
1155 if (!mem_type_is_vram(ttm_bo->resource->mem_type))
1156 return -EIO;
1157
1158 /* FIXME: Use GPU for non-visible VRAM */
1159 if (!xe_ttm_resource_visible(ttm_bo->resource))
1160 return -EIO;
1161
1162 vram = res_to_mem_region(ttm_bo->resource);
1163 xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
1164 bo->size - (offset & PAGE_MASK), &cursor);
1165
1166 do {
1167 unsigned long page_offset = (offset & ~PAGE_MASK);
1168 int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left);
1169
1170 iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping +
1171 cursor.start);
1172 if (write)
1173 xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count);
1174 else
1175 xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count);
1176
1177 buf += byte_count;
1178 offset += byte_count;
1179 bytes_left -= byte_count;
1180 if (bytes_left)
1181 xe_res_next(&cursor, PAGE_SIZE);
1182 } while (bytes_left);
1183
1184 return len;
1185 }
1186
1187 const struct ttm_device_funcs xe_ttm_funcs = {
1188 .ttm_tt_create = xe_ttm_tt_create,
1189 .ttm_tt_populate = xe_ttm_tt_populate,
1190 .ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
1191 .ttm_tt_destroy = xe_ttm_tt_destroy,
1192 .evict_flags = xe_evict_flags,
1193 .move = xe_bo_move,
1194 .io_mem_reserve = xe_ttm_io_mem_reserve,
1195 .io_mem_pfn = xe_ttm_io_mem_pfn,
1196 .access_memory = xe_ttm_access_memory,
1197 .release_notify = xe_ttm_bo_release_notify,
1198 .eviction_valuable = ttm_bo_eviction_valuable,
1199 .delete_mem_notify = xe_ttm_bo_delete_mem_notify,
1200 .swap_notify = xe_ttm_bo_swap_notify,
1201 };
1202
xe_ttm_bo_destroy(struct ttm_buffer_object * ttm_bo)1203 static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
1204 {
1205 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1206 struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
1207 struct xe_tile *tile;
1208 u8 id;
1209
1210 if (bo->ttm.base.import_attach)
1211 drm_prime_gem_destroy(&bo->ttm.base, NULL);
1212 drm_gem_object_release(&bo->ttm.base);
1213
1214 xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list));
1215
1216 for_each_tile(tile, xe, id)
1217 if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size)
1218 xe_ggtt_remove_bo(tile->mem.ggtt, bo);
1219
1220 #ifdef CONFIG_PROC_FS
1221 if (bo->client)
1222 xe_drm_client_remove_bo(bo);
1223 #endif
1224
1225 if (bo->vm && xe_bo_is_user(bo))
1226 xe_vm_put(bo->vm);
1227
1228 mutex_lock(&xe->mem_access.vram_userfault.lock);
1229 if (!list_empty(&bo->vram_userfault_link))
1230 list_del(&bo->vram_userfault_link);
1231 mutex_unlock(&xe->mem_access.vram_userfault.lock);
1232
1233 kfree(bo);
1234 }
1235
xe_gem_object_free(struct drm_gem_object * obj)1236 static void xe_gem_object_free(struct drm_gem_object *obj)
1237 {
1238 /* Our BO reference counting scheme works as follows:
1239 *
1240 * The gem object kref is typically used throughout the driver,
1241 * and the gem object holds a ttm_buffer_object refcount, so
1242 * that when the last gem object reference is put, which is when
1243 * we end up in this function, we put also that ttm_buffer_object
1244 * refcount. Anything using gem interfaces is then no longer
1245 * allowed to access the object in a way that requires a gem
1246 * refcount, including locking the object.
1247 *
1248 * driver ttm callbacks is allowed to use the ttm_buffer_object
1249 * refcount directly if needed.
1250 */
1251 __xe_bo_vunmap(gem_to_xe_bo(obj));
1252 ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
1253 }
1254
xe_gem_object_close(struct drm_gem_object * obj,struct drm_file * file_priv)1255 static void xe_gem_object_close(struct drm_gem_object *obj,
1256 struct drm_file *file_priv)
1257 {
1258 struct xe_bo *bo = gem_to_xe_bo(obj);
1259
1260 if (bo->vm && !xe_vm_in_fault_mode(bo->vm)) {
1261 xe_assert(xe_bo_device(bo), xe_bo_is_user(bo));
1262
1263 xe_bo_lock(bo, false);
1264 ttm_bo_set_bulk_move(&bo->ttm, NULL);
1265 xe_bo_unlock(bo);
1266 }
1267 }
1268
xe_gem_fault(struct vm_fault * vmf)1269 static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
1270 {
1271 struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
1272 struct drm_device *ddev = tbo->base.dev;
1273 struct xe_device *xe = to_xe_device(ddev);
1274 struct xe_bo *bo = ttm_to_xe_bo(tbo);
1275 bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
1276 vm_fault_t ret;
1277 int idx;
1278
1279 if (needs_rpm)
1280 xe_pm_runtime_get(xe);
1281
1282 ret = ttm_bo_vm_reserve(tbo, vmf);
1283 if (ret)
1284 goto out;
1285
1286 if (drm_dev_enter(ddev, &idx)) {
1287 trace_xe_bo_cpu_fault(bo);
1288
1289 ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
1290 TTM_BO_VM_NUM_PREFAULT);
1291 drm_dev_exit(idx);
1292 } else {
1293 ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
1294 }
1295
1296 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
1297 goto out;
1298 /*
1299 * ttm_bo_vm_reserve() already has dma_resv_lock.
1300 */
1301 if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
1302 mutex_lock(&xe->mem_access.vram_userfault.lock);
1303 if (list_empty(&bo->vram_userfault_link))
1304 list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
1305 mutex_unlock(&xe->mem_access.vram_userfault.lock);
1306 }
1307
1308 dma_resv_unlock(tbo->base.resv);
1309 out:
1310 if (needs_rpm)
1311 xe_pm_runtime_put(xe);
1312
1313 return ret;
1314 }
1315
xe_bo_vm_access(struct vm_area_struct * vma,unsigned long addr,void * buf,int len,int write)1316 static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
1317 void *buf, int len, int write)
1318 {
1319 struct ttm_buffer_object *ttm_bo = vma->vm_private_data;
1320 struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
1321 struct xe_device *xe = xe_bo_device(bo);
1322 int ret;
1323
1324 xe_pm_runtime_get(xe);
1325 ret = ttm_bo_vm_access(vma, addr, buf, len, write);
1326 xe_pm_runtime_put(xe);
1327
1328 return ret;
1329 }
1330
1331 /**
1332 * xe_bo_read() - Read from an xe_bo
1333 * @bo: The buffer object to read from.
1334 * @offset: The byte offset to start reading from.
1335 * @dst: Location to store the read.
1336 * @size: Size in bytes for the read.
1337 *
1338 * Read @size bytes from the @bo, starting from @offset, storing into @dst.
1339 *
1340 * Return: Zero on success, or negative error.
1341 */
xe_bo_read(struct xe_bo * bo,u64 offset,void * dst,int size)1342 int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
1343 {
1344 int ret;
1345
1346 ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0);
1347 if (ret >= 0 && ret != size)
1348 ret = -EIO;
1349 else if (ret == size)
1350 ret = 0;
1351
1352 return ret;
1353 }
1354
1355 static const struct vm_operations_struct xe_gem_vm_ops = {
1356 .fault = xe_gem_fault,
1357 .open = ttm_bo_vm_open,
1358 .close = ttm_bo_vm_close,
1359 .access = xe_bo_vm_access,
1360 };
1361
1362 static const struct drm_gem_object_funcs xe_gem_object_funcs = {
1363 .free = xe_gem_object_free,
1364 .close = xe_gem_object_close,
1365 .mmap = drm_gem_ttm_mmap,
1366 .export = xe_gem_prime_export,
1367 .vm_ops = &xe_gem_vm_ops,
1368 };
1369
1370 /**
1371 * xe_bo_alloc - Allocate storage for a struct xe_bo
1372 *
1373 * This function is intended to allocate storage to be used for input
1374 * to __xe_bo_create_locked(), in the case a pointer to the bo to be
1375 * created is needed before the call to __xe_bo_create_locked().
1376 * If __xe_bo_create_locked ends up never to be called, then the
1377 * storage allocated with this function needs to be freed using
1378 * xe_bo_free().
1379 *
1380 * Return: A pointer to an uninitialized struct xe_bo on success,
1381 * ERR_PTR(-ENOMEM) on error.
1382 */
xe_bo_alloc(void)1383 struct xe_bo *xe_bo_alloc(void)
1384 {
1385 struct xe_bo *bo = kzalloc(sizeof(*bo), GFP_KERNEL);
1386
1387 if (!bo)
1388 return ERR_PTR(-ENOMEM);
1389
1390 return bo;
1391 }
1392
1393 /**
1394 * xe_bo_free - Free storage allocated using xe_bo_alloc()
1395 * @bo: The buffer object storage.
1396 *
1397 * Refer to xe_bo_alloc() documentation for valid use-cases.
1398 */
xe_bo_free(struct xe_bo * bo)1399 void xe_bo_free(struct xe_bo *bo)
1400 {
1401 kfree(bo);
1402 }
1403
___xe_bo_create_locked(struct xe_device * xe,struct xe_bo * bo,struct xe_tile * tile,struct dma_resv * resv,struct ttm_lru_bulk_move * bulk,size_t size,u16 cpu_caching,enum ttm_bo_type type,u32 flags)1404 struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
1405 struct xe_tile *tile, struct dma_resv *resv,
1406 struct ttm_lru_bulk_move *bulk, size_t size,
1407 u16 cpu_caching, enum ttm_bo_type type,
1408 u32 flags)
1409 {
1410 struct ttm_operation_ctx ctx = {
1411 .interruptible = true,
1412 .no_wait_gpu = false,
1413 .gfp_retry_mayfail = true,
1414 };
1415 struct ttm_placement *placement;
1416 uint32_t alignment;
1417 size_t aligned_size;
1418 int err;
1419
1420 /* Only kernel objects should set GT */
1421 xe_assert(xe, !tile || type == ttm_bo_type_kernel);
1422
1423 if (XE_WARN_ON(!size)) {
1424 xe_bo_free(bo);
1425 return ERR_PTR(-EINVAL);
1426 }
1427
1428 /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */
1429 if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT))
1430 return ERR_PTR(-EINVAL);
1431
1432 if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
1433 !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
1434 ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
1435 (flags & (XE_BO_FLAG_NEEDS_64K | XE_BO_FLAG_NEEDS_2M)))) {
1436 size_t align = flags & XE_BO_FLAG_NEEDS_2M ? SZ_2M : SZ_64K;
1437
1438 aligned_size = ALIGN(size, align);
1439 if (type != ttm_bo_type_device)
1440 size = ALIGN(size, align);
1441 flags |= XE_BO_FLAG_INTERNAL_64K;
1442 alignment = align >> PAGE_SHIFT;
1443 } else {
1444 aligned_size = ALIGN(size, SZ_4K);
1445 flags &= ~XE_BO_FLAG_INTERNAL_64K;
1446 alignment = SZ_4K >> PAGE_SHIFT;
1447 }
1448
1449 if (type == ttm_bo_type_device && aligned_size != size)
1450 return ERR_PTR(-EINVAL);
1451
1452 if (!bo) {
1453 bo = xe_bo_alloc();
1454 if (IS_ERR(bo))
1455 return bo;
1456 }
1457
1458 bo->ccs_cleared = false;
1459 bo->tile = tile;
1460 bo->size = size;
1461 bo->flags = flags;
1462 bo->cpu_caching = cpu_caching;
1463 bo->ttm.base.funcs = &xe_gem_object_funcs;
1464 bo->ttm.priority = XE_BO_PRIORITY_NORMAL;
1465 INIT_LIST_HEAD(&bo->pinned_link);
1466 #ifdef CONFIG_PROC_FS
1467 INIT_LIST_HEAD(&bo->client_link);
1468 #endif
1469 INIT_LIST_HEAD(&bo->vram_userfault_link);
1470
1471 drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
1472
1473 if (resv) {
1474 ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
1475 ctx.resv = resv;
1476 }
1477
1478 if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
1479 err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
1480 if (WARN_ON(err)) {
1481 xe_ttm_bo_destroy(&bo->ttm);
1482 return ERR_PTR(err);
1483 }
1484 }
1485
1486 /* Defer populating type_sg bos */
1487 placement = (type == ttm_bo_type_sg ||
1488 bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
1489 &bo->placement;
1490 err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
1491 placement, alignment,
1492 &ctx, NULL, resv, xe_ttm_bo_destroy);
1493 if (err)
1494 return ERR_PTR(err);
1495
1496 /*
1497 * The VRAM pages underneath are potentially still being accessed by the
1498 * GPU, as per async GPU clearing and async evictions. However TTM makes
1499 * sure to add any corresponding move/clear fences into the objects
1500 * dma-resv using the DMA_RESV_USAGE_KERNEL slot.
1501 *
1502 * For KMD internal buffers we don't care about GPU clearing, however we
1503 * still need to handle async evictions, where the VRAM is still being
1504 * accessed by the GPU. Most internal callers are not expecting this,
1505 * since they are missing the required synchronisation before accessing
1506 * the memory. To keep things simple just sync wait any kernel fences
1507 * here, if the buffer is designated KMD internal.
1508 *
1509 * For normal userspace objects we should already have the required
1510 * pipelining or sync waiting elsewhere, since we already have to deal
1511 * with things like async GPU clearing.
1512 */
1513 if (type == ttm_bo_type_kernel) {
1514 long timeout = dma_resv_wait_timeout(bo->ttm.base.resv,
1515 DMA_RESV_USAGE_KERNEL,
1516 ctx.interruptible,
1517 MAX_SCHEDULE_TIMEOUT);
1518
1519 if (timeout < 0) {
1520 if (!resv)
1521 dma_resv_unlock(bo->ttm.base.resv);
1522 xe_bo_put(bo);
1523 return ERR_PTR(timeout);
1524 }
1525 }
1526
1527 bo->created = true;
1528 if (bulk)
1529 ttm_bo_set_bulk_move(&bo->ttm, bulk);
1530 else
1531 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1532
1533 return bo;
1534 }
1535
__xe_bo_fixed_placement(struct xe_device * xe,struct xe_bo * bo,u32 flags,u64 start,u64 end,u64 size)1536 static int __xe_bo_fixed_placement(struct xe_device *xe,
1537 struct xe_bo *bo,
1538 u32 flags,
1539 u64 start, u64 end, u64 size)
1540 {
1541 struct ttm_place *place = bo->placements;
1542
1543 if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
1544 return -EINVAL;
1545
1546 place->flags = TTM_PL_FLAG_CONTIGUOUS;
1547 place->fpfn = start >> PAGE_SHIFT;
1548 place->lpfn = end >> PAGE_SHIFT;
1549
1550 switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
1551 case XE_BO_FLAG_VRAM0:
1552 place->mem_type = XE_PL_VRAM0;
1553 break;
1554 case XE_BO_FLAG_VRAM1:
1555 place->mem_type = XE_PL_VRAM1;
1556 break;
1557 case XE_BO_FLAG_STOLEN:
1558 place->mem_type = XE_PL_STOLEN;
1559 break;
1560
1561 default:
1562 /* 0 or multiple of the above set */
1563 return -EINVAL;
1564 }
1565
1566 bo->placement = (struct ttm_placement) {
1567 .num_placement = 1,
1568 .placement = place,
1569 };
1570
1571 return 0;
1572 }
1573
1574 static struct xe_bo *
__xe_bo_create_locked(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 start,u64 end,u16 cpu_caching,enum ttm_bo_type type,u32 flags,u64 alignment)1575 __xe_bo_create_locked(struct xe_device *xe,
1576 struct xe_tile *tile, struct xe_vm *vm,
1577 size_t size, u64 start, u64 end,
1578 u16 cpu_caching, enum ttm_bo_type type, u32 flags,
1579 u64 alignment)
1580 {
1581 struct xe_bo *bo = NULL;
1582 int err;
1583
1584 if (vm)
1585 xe_vm_assert_held(vm);
1586
1587 if (start || end != ~0ULL) {
1588 bo = xe_bo_alloc();
1589 if (IS_ERR(bo))
1590 return bo;
1591
1592 flags |= XE_BO_FLAG_FIXED_PLACEMENT;
1593 err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
1594 if (err) {
1595 xe_bo_free(bo);
1596 return ERR_PTR(err);
1597 }
1598 }
1599
1600 bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
1601 vm && !xe_vm_in_fault_mode(vm) &&
1602 flags & XE_BO_FLAG_USER ?
1603 &vm->lru_bulk_move : NULL, size,
1604 cpu_caching, type, flags);
1605 if (IS_ERR(bo))
1606 return bo;
1607
1608 bo->min_align = alignment;
1609
1610 /*
1611 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
1612 * to ensure the shared resv doesn't disappear under the bo, the bo
1613 * will keep a reference to the vm, and avoid circular references
1614 * by having all the vm's bo refereferences released at vm close
1615 * time.
1616 */
1617 if (vm && xe_bo_is_user(bo))
1618 xe_vm_get(vm);
1619 bo->vm = vm;
1620
1621 if (bo->flags & XE_BO_FLAG_GGTT) {
1622 struct xe_tile *t;
1623 u8 id;
1624
1625 if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) {
1626 if (!tile && flags & XE_BO_FLAG_STOLEN)
1627 tile = xe_device_get_root_tile(xe);
1628
1629 xe_assert(xe, tile);
1630 }
1631
1632 for_each_tile(t, xe, id) {
1633 if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t)))
1634 continue;
1635
1636 if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
1637 err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
1638 start + bo->size, U64_MAX);
1639 } else {
1640 err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
1641 }
1642 if (err)
1643 goto err_unlock_put_bo;
1644 }
1645 }
1646
1647 return bo;
1648
1649 err_unlock_put_bo:
1650 __xe_bo_unset_bulk_move(bo);
1651 xe_bo_unlock_vm_held(bo);
1652 xe_bo_put(bo);
1653 return ERR_PTR(err);
1654 }
1655
1656 struct xe_bo *
xe_bo_create_locked_range(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 start,u64 end,enum ttm_bo_type type,u32 flags,u64 alignment)1657 xe_bo_create_locked_range(struct xe_device *xe,
1658 struct xe_tile *tile, struct xe_vm *vm,
1659 size_t size, u64 start, u64 end,
1660 enum ttm_bo_type type, u32 flags, u64 alignment)
1661 {
1662 return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
1663 flags, alignment);
1664 }
1665
xe_bo_create_locked(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags)1666 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
1667 struct xe_vm *vm, size_t size,
1668 enum ttm_bo_type type, u32 flags)
1669 {
1670 return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
1671 flags, 0);
1672 }
1673
xe_bo_create_user(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u16 cpu_caching,u32 flags)1674 struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
1675 struct xe_vm *vm, size_t size,
1676 u16 cpu_caching,
1677 u32 flags)
1678 {
1679 struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
1680 cpu_caching, ttm_bo_type_device,
1681 flags | XE_BO_FLAG_USER, 0);
1682 if (!IS_ERR(bo))
1683 xe_bo_unlock_vm_held(bo);
1684
1685 return bo;
1686 }
1687
xe_bo_create(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags)1688 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
1689 struct xe_vm *vm, size_t size,
1690 enum ttm_bo_type type, u32 flags)
1691 {
1692 struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
1693
1694 if (!IS_ERR(bo))
1695 xe_bo_unlock_vm_held(bo);
1696
1697 return bo;
1698 }
1699
xe_bo_create_pin_map_at(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 offset,enum ttm_bo_type type,u32 flags)1700 struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
1701 struct xe_vm *vm,
1702 size_t size, u64 offset,
1703 enum ttm_bo_type type, u32 flags)
1704 {
1705 return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
1706 type, flags, 0);
1707 }
1708
xe_bo_create_pin_map_at_aligned(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,u64 offset,enum ttm_bo_type type,u32 flags,u64 alignment)1709 struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
1710 struct xe_tile *tile,
1711 struct xe_vm *vm,
1712 size_t size, u64 offset,
1713 enum ttm_bo_type type, u32 flags,
1714 u64 alignment)
1715 {
1716 struct xe_bo *bo;
1717 int err;
1718 u64 start = offset == ~0ull ? 0 : offset;
1719 u64 end = offset == ~0ull ? offset : start + size;
1720
1721 if (flags & XE_BO_FLAG_STOLEN &&
1722 xe_ttm_stolen_cpu_access_needs_ggtt(xe))
1723 flags |= XE_BO_FLAG_GGTT;
1724
1725 bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
1726 flags | XE_BO_FLAG_NEEDS_CPU_ACCESS,
1727 alignment);
1728 if (IS_ERR(bo))
1729 return bo;
1730
1731 err = xe_bo_pin(bo);
1732 if (err)
1733 goto err_put;
1734
1735 err = xe_bo_vmap(bo);
1736 if (err)
1737 goto err_unpin;
1738
1739 xe_bo_unlock_vm_held(bo);
1740
1741 return bo;
1742
1743 err_unpin:
1744 xe_bo_unpin(bo);
1745 err_put:
1746 xe_bo_unlock_vm_held(bo);
1747 xe_bo_put(bo);
1748 return ERR_PTR(err);
1749 }
1750
xe_bo_create_pin_map(struct xe_device * xe,struct xe_tile * tile,struct xe_vm * vm,size_t size,enum ttm_bo_type type,u32 flags)1751 struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1752 struct xe_vm *vm, size_t size,
1753 enum ttm_bo_type type, u32 flags)
1754 {
1755 return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
1756 }
1757
xe_bo_create_from_data(struct xe_device * xe,struct xe_tile * tile,const void * data,size_t size,enum ttm_bo_type type,u32 flags)1758 struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1759 const void *data, size_t size,
1760 enum ttm_bo_type type, u32 flags)
1761 {
1762 struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
1763 ALIGN(size, PAGE_SIZE),
1764 type, flags);
1765 if (IS_ERR(bo))
1766 return bo;
1767
1768 xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1769
1770 return bo;
1771 }
1772
__xe_bo_unpin_map_no_vm(void * arg)1773 static void __xe_bo_unpin_map_no_vm(void *arg)
1774 {
1775 xe_bo_unpin_map_no_vm(arg);
1776 }
1777
xe_managed_bo_create_pin_map(struct xe_device * xe,struct xe_tile * tile,size_t size,u32 flags)1778 struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
1779 size_t size, u32 flags)
1780 {
1781 struct xe_bo *bo;
1782 int ret;
1783
1784 bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
1785 if (IS_ERR(bo))
1786 return bo;
1787
1788 ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
1789 if (ret)
1790 return ERR_PTR(ret);
1791
1792 return bo;
1793 }
1794
xe_managed_bo_create_from_data(struct xe_device * xe,struct xe_tile * tile,const void * data,size_t size,u32 flags)1795 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
1796 const void *data, size_t size, u32 flags)
1797 {
1798 struct xe_bo *bo = xe_managed_bo_create_pin_map(xe, tile, ALIGN(size, PAGE_SIZE), flags);
1799
1800 if (IS_ERR(bo))
1801 return bo;
1802
1803 xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
1804
1805 return bo;
1806 }
1807
1808 /**
1809 * xe_managed_bo_reinit_in_vram
1810 * @xe: xe device
1811 * @tile: Tile where the new buffer will be created
1812 * @src: Managed buffer object allocated in system memory
1813 *
1814 * Replace a managed src buffer object allocated in system memory with a new
1815 * one allocated in vram, copying the data between them.
1816 * Buffer object in VRAM is not going to have the same GGTT address, the caller
1817 * is responsible for making sure that any old references to it are updated.
1818 *
1819 * Returns 0 for success, negative error code otherwise.
1820 */
xe_managed_bo_reinit_in_vram(struct xe_device * xe,struct xe_tile * tile,struct xe_bo ** src)1821 int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
1822 {
1823 struct xe_bo *bo;
1824 u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
1825
1826 dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
1827
1828 xe_assert(xe, IS_DGFX(xe));
1829 xe_assert(xe, !(*src)->vmap.is_iomem);
1830
1831 bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
1832 (*src)->size, dst_flags);
1833 if (IS_ERR(bo))
1834 return PTR_ERR(bo);
1835
1836 devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
1837 *src = bo;
1838
1839 return 0;
1840 }
1841
1842 /*
1843 * XXX: This is in the VM bind data path, likely should calculate this once and
1844 * store, with a recalculation if the BO is moved.
1845 */
vram_region_gpu_offset(struct ttm_resource * res)1846 uint64_t vram_region_gpu_offset(struct ttm_resource *res)
1847 {
1848 struct xe_device *xe = ttm_to_xe_device(res->bo->bdev);
1849
1850 if (res->mem_type == XE_PL_STOLEN)
1851 return xe_ttm_stolen_gpu_offset(xe);
1852
1853 return res_to_mem_region(res)->dpa_base;
1854 }
1855
1856 /**
1857 * xe_bo_pin_external - pin an external BO
1858 * @bo: buffer object to be pinned
1859 *
1860 * Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1861 * BO. Unique call compared to xe_bo_pin as this function has it own set of
1862 * asserts and code to ensure evict / restore on suspend / resume.
1863 *
1864 * Returns 0 for success, negative error code otherwise.
1865 */
xe_bo_pin_external(struct xe_bo * bo)1866 int xe_bo_pin_external(struct xe_bo *bo)
1867 {
1868 struct xe_device *xe = xe_bo_device(bo);
1869 int err;
1870
1871 xe_assert(xe, !bo->vm);
1872 xe_assert(xe, xe_bo_is_user(bo));
1873
1874 if (!xe_bo_is_pinned(bo)) {
1875 err = xe_bo_validate(bo, NULL, false);
1876 if (err)
1877 return err;
1878
1879 if (xe_bo_is_vram(bo)) {
1880 spin_lock(&xe->pinned.lock);
1881 list_add_tail(&bo->pinned_link,
1882 &xe->pinned.external_vram);
1883 spin_unlock(&xe->pinned.lock);
1884 }
1885 }
1886
1887 ttm_bo_pin(&bo->ttm);
1888
1889 /*
1890 * FIXME: If we always use the reserve / unreserve functions for locking
1891 * we do not need this.
1892 */
1893 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1894
1895 return 0;
1896 }
1897
xe_bo_pin(struct xe_bo * bo)1898 int xe_bo_pin(struct xe_bo *bo)
1899 {
1900 struct ttm_place *place = &bo->placements[0];
1901 struct xe_device *xe = xe_bo_device(bo);
1902 int err;
1903
1904 /* We currently don't expect user BO to be pinned */
1905 xe_assert(xe, !xe_bo_is_user(bo));
1906
1907 /* Pinned object must be in GGTT or have pinned flag */
1908 xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
1909 XE_BO_FLAG_GGTT));
1910
1911 /*
1912 * No reason we can't support pinning imported dma-bufs we just don't
1913 * expect to pin an imported dma-buf.
1914 */
1915 xe_assert(xe, !bo->ttm.base.import_attach);
1916
1917 /* We only expect at most 1 pin */
1918 xe_assert(xe, !xe_bo_is_pinned(bo));
1919
1920 err = xe_bo_validate(bo, NULL, false);
1921 if (err)
1922 return err;
1923
1924 /*
1925 * For pinned objects in on DGFX, which are also in vram, we expect
1926 * these to be in contiguous VRAM memory. Required eviction / restore
1927 * during suspend / resume (force restore to same physical address).
1928 */
1929 if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
1930 bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
1931 if (mem_type_is_vram(place->mem_type)) {
1932 xe_assert(xe, place->flags & TTM_PL_FLAG_CONTIGUOUS);
1933
1934 place->fpfn = (xe_bo_addr(bo, 0, PAGE_SIZE) -
1935 vram_region_gpu_offset(bo->ttm.resource)) >> PAGE_SHIFT;
1936 place->lpfn = place->fpfn + (bo->size >> PAGE_SHIFT);
1937 }
1938 }
1939
1940 if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1941 spin_lock(&xe->pinned.lock);
1942 list_add_tail(&bo->pinned_link, &xe->pinned.kernel_bo_present);
1943 spin_unlock(&xe->pinned.lock);
1944 }
1945
1946 ttm_bo_pin(&bo->ttm);
1947
1948 /*
1949 * FIXME: If we always use the reserve / unreserve functions for locking
1950 * we do not need this.
1951 */
1952 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1953
1954 return 0;
1955 }
1956
1957 /**
1958 * xe_bo_unpin_external - unpin an external BO
1959 * @bo: buffer object to be unpinned
1960 *
1961 * Unpin an external (not tied to a VM, can be exported via dma-buf / prime FD)
1962 * BO. Unique call compared to xe_bo_unpin as this function has it own set of
1963 * asserts and code to ensure evict / restore on suspend / resume.
1964 *
1965 * Returns 0 for success, negative error code otherwise.
1966 */
xe_bo_unpin_external(struct xe_bo * bo)1967 void xe_bo_unpin_external(struct xe_bo *bo)
1968 {
1969 struct xe_device *xe = xe_bo_device(bo);
1970
1971 xe_assert(xe, !bo->vm);
1972 xe_assert(xe, xe_bo_is_pinned(bo));
1973 xe_assert(xe, xe_bo_is_user(bo));
1974
1975 spin_lock(&xe->pinned.lock);
1976 if (bo->ttm.pin_count == 1 && !list_empty(&bo->pinned_link))
1977 list_del_init(&bo->pinned_link);
1978 spin_unlock(&xe->pinned.lock);
1979
1980 ttm_bo_unpin(&bo->ttm);
1981
1982 /*
1983 * FIXME: If we always use the reserve / unreserve functions for locking
1984 * we do not need this.
1985 */
1986 ttm_bo_move_to_lru_tail_unlocked(&bo->ttm);
1987 }
1988
xe_bo_unpin(struct xe_bo * bo)1989 void xe_bo_unpin(struct xe_bo *bo)
1990 {
1991 struct ttm_place *place = &bo->placements[0];
1992 struct xe_device *xe = xe_bo_device(bo);
1993
1994 xe_assert(xe, !bo->ttm.base.import_attach);
1995 xe_assert(xe, xe_bo_is_pinned(bo));
1996
1997 if (mem_type_is_vram(place->mem_type) || bo->flags & XE_BO_FLAG_GGTT) {
1998 spin_lock(&xe->pinned.lock);
1999 xe_assert(xe, !list_empty(&bo->pinned_link));
2000 list_del_init(&bo->pinned_link);
2001 spin_unlock(&xe->pinned.lock);
2002 }
2003 ttm_bo_unpin(&bo->ttm);
2004 }
2005
2006 /**
2007 * xe_bo_validate() - Make sure the bo is in an allowed placement
2008 * @bo: The bo,
2009 * @vm: Pointer to a the vm the bo shares a locked dma_resv object with, or
2010 * NULL. Used together with @allow_res_evict.
2011 * @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
2012 * reservation object.
2013 *
2014 * Make sure the bo is in allowed placement, migrating it if necessary. If
2015 * needed, other bos will be evicted. If bos selected for eviction shares
2016 * the @vm's reservation object, they can be evicted iff @allow_res_evict is
2017 * set to true, otherwise they will be bypassed.
2018 *
2019 * Return: 0 on success, negative error code on failure. May return
2020 * -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
2021 */
xe_bo_validate(struct xe_bo * bo,struct xe_vm * vm,bool allow_res_evict)2022 int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
2023 {
2024 struct ttm_operation_ctx ctx = {
2025 .interruptible = true,
2026 .no_wait_gpu = false,
2027 .gfp_retry_mayfail = true,
2028 };
2029
2030 if (vm) {
2031 lockdep_assert_held(&vm->lock);
2032 xe_vm_assert_held(vm);
2033
2034 ctx.allow_res_evict = allow_res_evict;
2035 ctx.resv = xe_vm_resv(vm);
2036 }
2037
2038 trace_xe_bo_validate(bo);
2039 return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
2040 }
2041
xe_bo_is_xe_bo(struct ttm_buffer_object * bo)2042 bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
2043 {
2044 if (bo->destroy == &xe_ttm_bo_destroy)
2045 return true;
2046
2047 return false;
2048 }
2049
2050 /*
2051 * Resolve a BO address. There is no assert to check if the proper lock is held
2052 * so it should only be used in cases where it is not fatal to get the wrong
2053 * address, such as printing debug information, but not in cases where memory is
2054 * written based on this result.
2055 */
__xe_bo_addr(struct xe_bo * bo,u64 offset,size_t page_size)2056 dma_addr_t __xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2057 {
2058 struct xe_device *xe = xe_bo_device(bo);
2059 struct xe_res_cursor cur;
2060 u64 page;
2061
2062 xe_assert(xe, page_size <= PAGE_SIZE);
2063 page = offset >> PAGE_SHIFT;
2064 offset &= (PAGE_SIZE - 1);
2065
2066 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
2067 xe_assert(xe, bo->ttm.ttm);
2068
2069 xe_res_first_sg(xe_bo_sg(bo), page << PAGE_SHIFT,
2070 page_size, &cur);
2071 return xe_res_dma(&cur) + offset;
2072 } else {
2073 struct xe_res_cursor cur;
2074
2075 xe_res_first(bo->ttm.resource, page << PAGE_SHIFT,
2076 page_size, &cur);
2077 return cur.start + offset + vram_region_gpu_offset(bo->ttm.resource);
2078 }
2079 }
2080
xe_bo_addr(struct xe_bo * bo,u64 offset,size_t page_size)2081 dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size)
2082 {
2083 if (!READ_ONCE(bo->ttm.pin_count))
2084 xe_bo_assert_held(bo);
2085 return __xe_bo_addr(bo, offset, page_size);
2086 }
2087
xe_bo_vmap(struct xe_bo * bo)2088 int xe_bo_vmap(struct xe_bo *bo)
2089 {
2090 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2091 void *virtual;
2092 bool is_iomem;
2093 int ret;
2094
2095 xe_bo_assert_held(bo);
2096
2097 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) ||
2098 !force_contiguous(bo->flags)))
2099 return -EINVAL;
2100
2101 if (!iosys_map_is_null(&bo->vmap))
2102 return 0;
2103
2104 /*
2105 * We use this more or less deprecated interface for now since
2106 * ttm_bo_vmap() doesn't offer the optimization of kmapping
2107 * single page bos, which is done here.
2108 * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
2109 * to use struct iosys_map.
2110 */
2111 ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
2112 if (ret)
2113 return ret;
2114
2115 virtual = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
2116 if (is_iomem)
2117 iosys_map_set_vaddr_iomem(&bo->vmap, (void __iomem *)virtual);
2118 else
2119 iosys_map_set_vaddr(&bo->vmap, virtual);
2120
2121 return 0;
2122 }
2123
__xe_bo_vunmap(struct xe_bo * bo)2124 static void __xe_bo_vunmap(struct xe_bo *bo)
2125 {
2126 if (!iosys_map_is_null(&bo->vmap)) {
2127 iosys_map_clear(&bo->vmap);
2128 ttm_bo_kunmap(&bo->kmap);
2129 }
2130 }
2131
xe_bo_vunmap(struct xe_bo * bo)2132 void xe_bo_vunmap(struct xe_bo *bo)
2133 {
2134 xe_bo_assert_held(bo);
2135 __xe_bo_vunmap(bo);
2136 }
2137
xe_gem_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2138 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
2139 struct drm_file *file)
2140 {
2141 struct xe_device *xe = to_xe_device(dev);
2142 struct xe_file *xef = to_xe_file(file);
2143 struct drm_xe_gem_create *args = data;
2144 struct xe_vm *vm = NULL;
2145 struct xe_bo *bo;
2146 unsigned int bo_flags;
2147 u32 handle;
2148 int err;
2149
2150 if (XE_IOCTL_DBG(xe, args->extensions) ||
2151 XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
2152 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2153 return -EINVAL;
2154
2155 /* at least one valid memory placement must be specified */
2156 if (XE_IOCTL_DBG(xe, (args->placement & ~xe->info.mem_region_mask) ||
2157 !args->placement))
2158 return -EINVAL;
2159
2160 if (XE_IOCTL_DBG(xe, args->flags &
2161 ~(DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING |
2162 DRM_XE_GEM_CREATE_FLAG_SCANOUT |
2163 DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM)))
2164 return -EINVAL;
2165
2166 if (XE_IOCTL_DBG(xe, args->handle))
2167 return -EINVAL;
2168
2169 if (XE_IOCTL_DBG(xe, !args->size))
2170 return -EINVAL;
2171
2172 if (XE_IOCTL_DBG(xe, args->size > SIZE_MAX))
2173 return -EINVAL;
2174
2175 if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
2176 return -EINVAL;
2177
2178 bo_flags = 0;
2179 if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
2180 bo_flags |= XE_BO_FLAG_DEFER_BACKING;
2181
2182 if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
2183 bo_flags |= XE_BO_FLAG_SCANOUT;
2184
2185 bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
2186
2187 /* CCS formats need physical placement at a 64K alignment in VRAM. */
2188 if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
2189 (bo_flags & XE_BO_FLAG_SCANOUT) &&
2190 !(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
2191 IS_ALIGNED(args->size, SZ_64K))
2192 bo_flags |= XE_BO_FLAG_NEEDS_64K;
2193
2194 if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
2195 if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
2196 return -EINVAL;
2197
2198 bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
2199 }
2200
2201 if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
2202 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
2203 return -EINVAL;
2204
2205 if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
2206 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
2207 return -EINVAL;
2208
2209 if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
2210 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
2211 return -EINVAL;
2212
2213 if (args->vm_id) {
2214 vm = xe_vm_lookup(xef, args->vm_id);
2215 if (XE_IOCTL_DBG(xe, !vm))
2216 return -ENOENT;
2217 err = xe_vm_lock(vm, true);
2218 if (err)
2219 goto out_vm;
2220 }
2221
2222 bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
2223 bo_flags);
2224
2225 if (vm)
2226 xe_vm_unlock(vm);
2227
2228 if (IS_ERR(bo)) {
2229 err = PTR_ERR(bo);
2230 goto out_vm;
2231 }
2232
2233 err = drm_gem_handle_create(file, &bo->ttm.base, &handle);
2234 if (err)
2235 goto out_bulk;
2236
2237 args->handle = handle;
2238 goto out_put;
2239
2240 out_bulk:
2241 if (vm && !xe_vm_in_fault_mode(vm)) {
2242 xe_vm_lock(vm, false);
2243 __xe_bo_unset_bulk_move(bo);
2244 xe_vm_unlock(vm);
2245 }
2246 out_put:
2247 xe_bo_put(bo);
2248 out_vm:
2249 if (vm)
2250 xe_vm_put(vm);
2251
2252 return err;
2253 }
2254
xe_gem_mmap_offset_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2255 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
2256 struct drm_file *file)
2257 {
2258 struct xe_device *xe = to_xe_device(dev);
2259 struct drm_xe_gem_mmap_offset *args = data;
2260 struct drm_gem_object *gem_obj;
2261
2262 if (XE_IOCTL_DBG(xe, args->extensions) ||
2263 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
2264 return -EINVAL;
2265
2266 if (XE_IOCTL_DBG(xe, args->flags))
2267 return -EINVAL;
2268
2269 gem_obj = drm_gem_object_lookup(file, args->handle);
2270 if (XE_IOCTL_DBG(xe, !gem_obj))
2271 return -ENOENT;
2272
2273 /* The mmap offset was set up at BO allocation time. */
2274 args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
2275
2276 xe_bo_put(gem_to_xe_bo(gem_obj));
2277 return 0;
2278 }
2279
2280 /**
2281 * xe_bo_lock() - Lock the buffer object's dma_resv object
2282 * @bo: The struct xe_bo whose lock is to be taken
2283 * @intr: Whether to perform any wait interruptible
2284 *
2285 * Locks the buffer object's dma_resv object. If the buffer object is
2286 * pointing to a shared dma_resv object, that shared lock is locked.
2287 *
2288 * Return: 0 on success, -EINTR if @intr is true and the wait for a
2289 * contended lock was interrupted. If @intr is set to false, the
2290 * function always returns 0.
2291 */
xe_bo_lock(struct xe_bo * bo,bool intr)2292 int xe_bo_lock(struct xe_bo *bo, bool intr)
2293 {
2294 if (intr)
2295 return dma_resv_lock_interruptible(bo->ttm.base.resv, NULL);
2296
2297 dma_resv_lock(bo->ttm.base.resv, NULL);
2298
2299 return 0;
2300 }
2301
2302 /**
2303 * xe_bo_unlock() - Unlock the buffer object's dma_resv object
2304 * @bo: The struct xe_bo whose lock is to be released.
2305 *
2306 * Unlock a buffer object lock that was locked by xe_bo_lock().
2307 */
xe_bo_unlock(struct xe_bo * bo)2308 void xe_bo_unlock(struct xe_bo *bo)
2309 {
2310 dma_resv_unlock(bo->ttm.base.resv);
2311 }
2312
2313 /**
2314 * xe_bo_can_migrate - Whether a buffer object likely can be migrated
2315 * @bo: The buffer object to migrate
2316 * @mem_type: The TTM memory type intended to migrate to
2317 *
2318 * Check whether the buffer object supports migration to the
2319 * given memory type. Note that pinning may affect the ability to migrate as
2320 * returned by this function.
2321 *
2322 * This function is primarily intended as a helper for checking the
2323 * possibility to migrate buffer objects and can be called without
2324 * the object lock held.
2325 *
2326 * Return: true if migration is possible, false otherwise.
2327 */
xe_bo_can_migrate(struct xe_bo * bo,u32 mem_type)2328 bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type)
2329 {
2330 unsigned int cur_place;
2331
2332 if (bo->ttm.type == ttm_bo_type_kernel)
2333 return true;
2334
2335 if (bo->ttm.type == ttm_bo_type_sg)
2336 return false;
2337
2338 for (cur_place = 0; cur_place < bo->placement.num_placement;
2339 cur_place++) {
2340 if (bo->placements[cur_place].mem_type == mem_type)
2341 return true;
2342 }
2343
2344 return false;
2345 }
2346
xe_place_from_ttm_type(u32 mem_type,struct ttm_place * place)2347 static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
2348 {
2349 memset(place, 0, sizeof(*place));
2350 place->mem_type = mem_type;
2351 }
2352
2353 /**
2354 * xe_bo_migrate - Migrate an object to the desired region id
2355 * @bo: The buffer object to migrate.
2356 * @mem_type: The TTM region type to migrate to.
2357 *
2358 * Attempt to migrate the buffer object to the desired memory region. The
2359 * buffer object may not be pinned, and must be locked.
2360 * On successful completion, the object memory type will be updated,
2361 * but an async migration task may not have completed yet, and to
2362 * accomplish that, the object's kernel fences must be signaled with
2363 * the object lock held.
2364 *
2365 * Return: 0 on success. Negative error code on failure. In particular may
2366 * return -EINTR or -ERESTARTSYS if signal pending.
2367 */
xe_bo_migrate(struct xe_bo * bo,u32 mem_type)2368 int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
2369 {
2370 struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
2371 struct ttm_operation_ctx ctx = {
2372 .interruptible = true,
2373 .no_wait_gpu = false,
2374 .gfp_retry_mayfail = true,
2375 };
2376 struct ttm_placement placement;
2377 struct ttm_place requested;
2378
2379 xe_bo_assert_held(bo);
2380
2381 if (bo->ttm.resource->mem_type == mem_type)
2382 return 0;
2383
2384 if (xe_bo_is_pinned(bo))
2385 return -EBUSY;
2386
2387 if (!xe_bo_can_migrate(bo, mem_type))
2388 return -EINVAL;
2389
2390 xe_place_from_ttm_type(mem_type, &requested);
2391 placement.num_placement = 1;
2392 placement.placement = &requested;
2393
2394 /*
2395 * Stolen needs to be handled like below VRAM handling if we ever need
2396 * to support it.
2397 */
2398 drm_WARN_ON(&xe->drm, mem_type == XE_PL_STOLEN);
2399
2400 if (mem_type_is_vram(mem_type)) {
2401 u32 c = 0;
2402
2403 add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
2404 }
2405
2406 return ttm_bo_validate(&bo->ttm, &placement, &ctx);
2407 }
2408
2409 /**
2410 * xe_bo_evict - Evict an object to evict placement
2411 * @bo: The buffer object to migrate.
2412 * @force_alloc: Set force_alloc in ttm_operation_ctx
2413 *
2414 * On successful completion, the object memory will be moved to evict
2415 * placement. This function blocks until the object has been fully moved.
2416 *
2417 * Return: 0 on success. Negative error code on failure.
2418 */
xe_bo_evict(struct xe_bo * bo,bool force_alloc)2419 int xe_bo_evict(struct xe_bo *bo, bool force_alloc)
2420 {
2421 struct ttm_operation_ctx ctx = {
2422 .interruptible = false,
2423 .no_wait_gpu = false,
2424 .force_alloc = force_alloc,
2425 .gfp_retry_mayfail = true,
2426 };
2427 struct ttm_placement placement;
2428 int ret;
2429
2430 xe_evict_flags(&bo->ttm, &placement);
2431 ret = ttm_bo_validate(&bo->ttm, &placement, &ctx);
2432 if (ret)
2433 return ret;
2434
2435 dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
2436 false, MAX_SCHEDULE_TIMEOUT);
2437
2438 return 0;
2439 }
2440
2441 /**
2442 * xe_bo_needs_ccs_pages - Whether a bo needs to back up CCS pages when
2443 * placed in system memory.
2444 * @bo: The xe_bo
2445 *
2446 * Return: true if extra pages need to be allocated, false otherwise.
2447 */
xe_bo_needs_ccs_pages(struct xe_bo * bo)2448 bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
2449 {
2450 struct xe_device *xe = xe_bo_device(bo);
2451
2452 if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
2453 return false;
2454
2455 if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
2456 return false;
2457
2458 /* On discrete GPUs, if the GPU can access this buffer from
2459 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
2460 * can't be used since there's no CCS storage associated with
2461 * non-VRAM addresses.
2462 */
2463 if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
2464 return false;
2465
2466 return true;
2467 }
2468
2469 /**
2470 * __xe_bo_release_dummy() - Dummy kref release function
2471 * @kref: The embedded struct kref.
2472 *
2473 * Dummy release function for xe_bo_put_deferred(). Keep off.
2474 */
__xe_bo_release_dummy(struct kref * kref)2475 void __xe_bo_release_dummy(struct kref *kref)
2476 {
2477 }
2478
2479 /**
2480 * xe_bo_put_commit() - Put bos whose put was deferred by xe_bo_put_deferred().
2481 * @deferred: The lockless list used for the call to xe_bo_put_deferred().
2482 *
2483 * Puts all bos whose put was deferred by xe_bo_put_deferred().
2484 * The @deferred list can be either an onstack local list or a global
2485 * shared list used by a workqueue.
2486 */
xe_bo_put_commit(struct llist_head * deferred)2487 void xe_bo_put_commit(struct llist_head *deferred)
2488 {
2489 struct llist_node *freed;
2490 struct xe_bo *bo, *next;
2491
2492 if (!deferred)
2493 return;
2494
2495 freed = llist_del_all(deferred);
2496 if (!freed)
2497 return;
2498
2499 llist_for_each_entry_safe(bo, next, freed, freed)
2500 drm_gem_object_free(&bo->ttm.base.refcount);
2501 }
2502
xe_bo_put(struct xe_bo * bo)2503 void xe_bo_put(struct xe_bo *bo)
2504 {
2505 struct xe_tile *tile;
2506 u8 id;
2507
2508 might_sleep();
2509 if (bo) {
2510 #ifdef CONFIG_PROC_FS
2511 if (bo->client)
2512 might_lock(&bo->client->bos_lock);
2513 #endif
2514 for_each_tile(tile, xe_bo_device(bo), id)
2515 if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
2516 might_lock(&bo->ggtt_node[id]->ggtt->lock);
2517 drm_gem_object_put(&bo->ttm.base);
2518 }
2519 }
2520
2521 /**
2522 * xe_bo_dumb_create - Create a dumb bo as backing for a fb
2523 * @file_priv: ...
2524 * @dev: ...
2525 * @args: ...
2526 *
2527 * See dumb_create() hook in include/drm/drm_drv.h
2528 *
2529 * Return: ...
2530 */
xe_bo_dumb_create(struct drm_file * file_priv,struct drm_device * dev,struct drm_mode_create_dumb * args)2531 int xe_bo_dumb_create(struct drm_file *file_priv,
2532 struct drm_device *dev,
2533 struct drm_mode_create_dumb *args)
2534 {
2535 struct xe_device *xe = to_xe_device(dev);
2536 struct xe_bo *bo;
2537 uint32_t handle;
2538 int cpp = DIV_ROUND_UP(args->bpp, 8);
2539 int err;
2540 u32 page_size = max_t(u32, PAGE_SIZE,
2541 xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
2542
2543 args->pitch = ALIGN(args->width * cpp, 64);
2544 args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
2545 page_size);
2546
2547 bo = xe_bo_create_user(xe, NULL, NULL, args->size,
2548 DRM_XE_GEM_CPU_CACHING_WC,
2549 XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
2550 XE_BO_FLAG_SCANOUT |
2551 XE_BO_FLAG_NEEDS_CPU_ACCESS);
2552 if (IS_ERR(bo))
2553 return PTR_ERR(bo);
2554
2555 err = drm_gem_handle_create(file_priv, &bo->ttm.base, &handle);
2556 /* drop reference from allocate - handle holds it now */
2557 drm_gem_object_put(&bo->ttm.base);
2558 if (!err)
2559 args->handle = handle;
2560 return err;
2561 }
2562
xe_bo_runtime_pm_release_mmap_offset(struct xe_bo * bo)2563 void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
2564 {
2565 struct ttm_buffer_object *tbo = &bo->ttm;
2566 struct ttm_device *bdev = tbo->bdev;
2567
2568 drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
2569
2570 list_del_init(&bo->vram_userfault_link);
2571 }
2572
2573 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
2574 #include "tests/xe_bo.c"
2575 #endif
2576