xref: /linux/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c (revision f6e8dc9edf963dbc99085e54f6ced6da9daa6100)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2023 Intel Corporation
4  * Copyright (C) 2021-2022 Red Hat
5  */
6 
7 #include <drm/drm_managed.h>
8 
9 #include <drm/ttm/ttm_device.h>
10 #include <drm/ttm/ttm_placement.h>
11 #include <drm/ttm/ttm_range_manager.h>
12 
13 #include <generated/xe_wa_oob.h>
14 
15 #include "regs/xe_bars.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_regs.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_gt.h"
21 #include "xe_gt_printk.h"
22 #include "xe_mmio.h"
23 #include "xe_res_cursor.h"
24 #include "xe_sriov.h"
25 #include "xe_ttm_stolen_mgr.h"
26 #include "xe_ttm_vram_mgr.h"
27 #include "xe_vram.h"
28 #include "xe_wa.h"
29 
30 struct xe_ttm_stolen_mgr {
31 	struct xe_ttm_vram_mgr base;
32 
33 	/* PCI base offset */
34 	resource_size_t io_base;
35 	/* GPU base offset */
36 	resource_size_t stolen_base;
37 
38 	void __iomem *mapping;
39 };
40 
41 static inline struct xe_ttm_stolen_mgr *
42 to_stolen_mgr(struct ttm_resource_manager *man)
43 {
44 	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
45 }
46 
47 /**
48  * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
49  * stolen, can we then fallback to mapping through the GGTT.
50  * @xe: xe device
51  *
52  * Some older integrated platforms don't support reliable CPU access for stolen,
53  * however on such hardware we can always use the mappable part of the GGTT for
54  * CPU access. Check if that's the case for this device.
55  */
56 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
57 {
58 	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
59 }
60 
61 static u32 get_wopcm_size(struct xe_device *xe)
62 {
63 	u32 wopcm_size;
64 	u64 val;
65 
66 	val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED);
67 	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
68 
69 	switch (val) {
70 	case 0x5 ... 0x6:
71 		val--;
72 		fallthrough;
73 	case 0x0 ... 0x3:
74 		wopcm_size = (1U << val) * SZ_1M;
75 		break;
76 	default:
77 		WARN(1, "Missing case wopcm_size=%llx\n", val);
78 		wopcm_size = 0;
79 	}
80 
81 	return wopcm_size;
82 }
83 
84 static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
85 {
86 	struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram;
87 	resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram);
88 	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
89 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
90 	u64 stolen_size, wopcm_size;
91 	u64 tile_offset;
92 	u64 tile_size;
93 
94 	tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram);
95 	tile_size = xe_vram_region_actual_physical_size(tile_vram);
96 
97 	/* Use DSM base address instead for stolen memory */
98 	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
99 	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
100 		return 0;
101 
102 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
103 	wopcm_size = get_wopcm_size(xe);
104 	if (drm_WARN_ON(&xe->drm, !wopcm_size))
105 		return 0;
106 
107 	stolen_size = tile_size - mgr->stolen_base;
108 
109 	xe_assert(xe, stolen_size > wopcm_size);
110 	stolen_size -= wopcm_size;
111 
112 	/* Verify usage fits in the actual resource available */
113 	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
114 		mgr->io_base = tile_io_start + mgr->stolen_base;
115 
116 	/*
117 	 * There may be few KB of platform dependent reserved memory at the end
118 	 * of vram which is not part of the DSM. Such reserved memory portion is
119 	 * always less then DSM granularity so align down the stolen_size to DSM
120 	 * granularity to accommodate such reserve vram portion.
121 	 */
122 	return ALIGN_DOWN(stolen_size, SZ_1M);
123 }
124 
125 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
126 {
127 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
128 	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
129 	u32 stolen_size, wopcm_size;
130 	u32 ggc, gms;
131 
132 	ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC);
133 
134 	/*
135 	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
136 	 * GTT size
137 	 */
138 	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
139 		return 0;
140 
141 	/*
142 	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
143 	 * PTEs, together with the DM flag being set. Previously there was no
144 	 * such flag so the address was the io_base.
145 	 *
146 	 * DSMBASE = GSMBASE + 8MB
147 	 */
148 	mgr->stolen_base = SZ_8M;
149 	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
150 
151 	/* return valid GMS value, -EIO if invalid */
152 	gms = REG_FIELD_GET(GMS_MASK, ggc);
153 	switch (gms) {
154 	case 0x0 ... 0x04:
155 		stolen_size = gms * 32 * SZ_1M;
156 		break;
157 	case 0xf0 ... 0xfe:
158 		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
159 		break;
160 	default:
161 		return 0;
162 	}
163 
164 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
165 	wopcm_size = get_wopcm_size(xe);
166 	if (drm_WARN_ON(&xe->drm, !wopcm_size))
167 		return 0;
168 
169 	stolen_size -= wopcm_size;
170 
171 	if (media_gt && XE_GT_WA(media_gt, 14019821291)) {
172 		u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE)
173 			& ~GENMASK_ULL(5, 0);
174 
175 		/*
176 		 * This workaround is primarily implemented by the BIOS.  We
177 		 * just need to figure out whether the BIOS has applied the
178 		 * workaround (meaning the programmed address falls within
179 		 * the DSM) and, if so, reserve that part of the DSM to
180 		 * prevent accidental reuse.  The DSM location should be just
181 		 * below the WOPCM.
182 		 */
183 		if (gscpsmi_base >= mgr->io_base &&
184 		    gscpsmi_base < mgr->io_base + stolen_size) {
185 			xe_gt_dbg(media_gt,
186 				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
187 				  mgr->io_base + stolen_size - gscpsmi_base);
188 			stolen_size = gscpsmi_base - mgr->io_base;
189 		}
190 	}
191 
192 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
193 		return 0;
194 
195 	return stolen_size;
196 }
197 
198 extern struct resource intel_graphics_stolen_res;
199 
200 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
201 {
202 #ifdef CONFIG_X86
203 	/* Map into GGTT */
204 	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
205 
206 	/* Stolen memory is x86 only */
207 	mgr->stolen_base = intel_graphics_stolen_res.start;
208 	return resource_size(&intel_graphics_stolen_res);
209 #else
210 	return 0;
211 #endif
212 }
213 
214 int xe_ttm_stolen_mgr_init(struct xe_device *xe)
215 {
216 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
217 	struct xe_ttm_stolen_mgr *mgr;
218 	u64 stolen_size, io_size;
219 	int err;
220 
221 	mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
222 	if (!mgr)
223 		return -ENOMEM;
224 
225 	if (IS_SRIOV_VF(xe))
226 		stolen_size = 0;
227 	else if (IS_DGFX(xe))
228 		stolen_size = detect_bar2_dgfx(xe, mgr);
229 	else if (GRAPHICS_VERx100(xe) >= 1270)
230 		stolen_size = detect_bar2_integrated(xe, mgr);
231 	else
232 		stolen_size = detect_stolen(xe, mgr);
233 
234 	if (!stolen_size) {
235 		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
236 		return 0;
237 	}
238 
239 	/*
240 	 * We don't try to attempt partial visible support for stolen vram,
241 	 * since stolen is always at the end of vram, and the BAR size is pretty
242 	 * much always 256M, with small-bar.
243 	 */
244 	io_size = 0;
245 	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
246 		io_size = stolen_size;
247 
248 	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
249 				     io_size, PAGE_SIZE);
250 	if (err) {
251 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
252 		return err;
253 	}
254 
255 	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
256 		    stolen_size);
257 
258 	if (io_size)
259 		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
260 
261 	return 0;
262 }
263 
264 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
265 {
266 	struct xe_device *xe = xe_bo_device(bo);
267 	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
268 	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
269 	struct xe_res_cursor cur;
270 
271 	XE_WARN_ON(!mgr->io_base);
272 
273 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
274 		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
275 
276 	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
277 	return mgr->io_base + cur.start;
278 }
279 
280 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
281 					       struct xe_ttm_stolen_mgr *mgr,
282 					       struct ttm_resource *mem)
283 {
284 	struct xe_res_cursor cur;
285 
286 	if (!mgr->io_base)
287 		return -EIO;
288 
289 	xe_res_first(mem, 0, 4096, &cur);
290 	mem->bus.offset = cur.start;
291 
292 	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
293 
294 	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
295 		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
296 
297 	mem->bus.offset += mgr->io_base;
298 	mem->bus.is_iomem = true;
299 	mem->bus.caching = ttm_write_combined;
300 
301 	return 0;
302 }
303 
304 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
305 						 struct xe_ttm_stolen_mgr *mgr,
306 						 struct ttm_resource *mem)
307 {
308 #ifdef CONFIG_X86
309 	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
310 
311 	XE_WARN_ON(IS_DGFX(xe));
312 
313 	/* XXX: Require BO to be mapped to GGTT? */
314 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT)))
315 		return -EIO;
316 
317 	/* GGTT is always contiguously mapped */
318 	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
319 
320 	mem->bus.is_iomem = true;
321 	mem->bus.caching = ttm_write_combined;
322 
323 	return 0;
324 #else
325 	/* How is it even possible to get here without gen12 stolen? */
326 	drm_WARN_ON(&xe->drm, 1);
327 	return -EIO;
328 #endif
329 }
330 
331 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
332 {
333 	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
334 	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
335 
336 	if (!mgr || !mgr->io_base)
337 		return -EIO;
338 
339 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
340 		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
341 	else
342 		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
343 }
344 
345 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
346 {
347 	struct xe_ttm_stolen_mgr *mgr =
348 		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
349 
350 	return mgr->stolen_base;
351 }
352