xref: /linux/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c (revision 221013afb459e5deb8bd08e29b37050af5586d1c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021-2023 Intel Corporation
4  * Copyright (C) 2021-2002 Red Hat
5  */
6 
7 #include <drm/drm_managed.h>
8 #include <drm/drm_mm.h>
9 
10 #include <drm/ttm/ttm_device.h>
11 #include <drm/ttm/ttm_placement.h>
12 #include <drm/ttm/ttm_range_manager.h>
13 
14 #include <generated/xe_wa_oob.h>
15 
16 #include "regs/xe_bars.h"
17 #include "regs/xe_gt_regs.h"
18 #include "regs/xe_regs.h"
19 #include "xe_bo.h"
20 #include "xe_device.h"
21 #include "xe_gt.h"
22 #include "xe_gt_printk.h"
23 #include "xe_mmio.h"
24 #include "xe_res_cursor.h"
25 #include "xe_sriov.h"
26 #include "xe_ttm_stolen_mgr.h"
27 #include "xe_ttm_vram_mgr.h"
28 #include "xe_wa.h"
29 
30 struct xe_ttm_stolen_mgr {
31 	struct xe_ttm_vram_mgr base;
32 
33 	/* PCI base offset */
34 	resource_size_t io_base;
35 	/* GPU base offset */
36 	resource_size_t stolen_base;
37 
38 	void __iomem *mapping;
39 };
40 
41 static inline struct xe_ttm_stolen_mgr *
42 to_stolen_mgr(struct ttm_resource_manager *man)
43 {
44 	return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
45 }
46 
47 /**
48  * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
49  * stolen, can we then fallback to mapping through the GGTT.
50  * @xe: xe device
51  *
52  * Some older integrated platforms don't support reliable CPU access for stolen,
53  * however on such hardware we can always use the mappable part of the GGTT for
54  * CPU access. Check if that's the case for this device.
55  */
56 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
57 {
58 	return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
59 }
60 
61 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
62 {
63 	struct xe_tile *tile = xe_device_get_root_tile(xe);
64 	struct xe_gt *mmio = xe_root_mmio_gt(xe);
65 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
66 	u64 stolen_size;
67 	u64 tile_offset;
68 	u64 tile_size;
69 
70 	tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
71 	tile_size = tile->mem.vram.actual_physical_size;
72 
73 	/* Use DSM base address instead for stolen memory */
74 	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
75 	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
76 		return 0;
77 
78 	stolen_size = tile_size - mgr->stolen_base;
79 
80 	/* Verify usage fits in the actual resource available */
81 	if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
82 		mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
83 
84 	/*
85 	 * There may be few KB of platform dependent reserved memory at the end
86 	 * of vram which is not part of the DSM. Such reserved memory portion is
87 	 * always less then DSM granularity so align down the stolen_size to DSM
88 	 * granularity to accommodate such reserve vram portion.
89 	 */
90 	return ALIGN_DOWN(stolen_size, SZ_1M);
91 }
92 
93 static u32 get_wopcm_size(struct xe_device *xe)
94 {
95 	u32 wopcm_size;
96 	u64 val;
97 
98 	val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
99 	val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
100 
101 	switch (val) {
102 	case 0x5 ... 0x6:
103 		val--;
104 		fallthrough;
105 	case 0x0 ... 0x3:
106 		wopcm_size = (1U << val) * SZ_1M;
107 		break;
108 	default:
109 		WARN(1, "Missing case wopcm_size=%llx\n", val);
110 		wopcm_size = 0;
111 	}
112 
113 	return wopcm_size;
114 }
115 
116 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
117 {
118 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
119 	struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
120 	u32 stolen_size, wopcm_size;
121 	u32 ggc, gms;
122 
123 	ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
124 
125 	/*
126 	 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
127 	 * GTT size
128 	 */
129 	if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
130 		return 0;
131 
132 	/*
133 	 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
134 	 * PTEs, together with the DM flag being set. Previously there was no
135 	 * such flag so the address was the io_base.
136 	 *
137 	 * DSMBASE = GSMBASE + 8MB
138 	 */
139 	mgr->stolen_base = SZ_8M;
140 	mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
141 
142 	/* return valid GMS value, -EIO if invalid */
143 	gms = REG_FIELD_GET(GMS_MASK, ggc);
144 	switch (gms) {
145 	case 0x0 ... 0x04:
146 		stolen_size = gms * 32 * SZ_1M;
147 		break;
148 	case 0xf0 ... 0xfe:
149 		stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
150 		break;
151 	default:
152 		return 0;
153 	}
154 
155 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
156 	wopcm_size = get_wopcm_size(xe);
157 	if (drm_WARN_ON(&xe->drm, !wopcm_size))
158 		return 0;
159 
160 	stolen_size -= wopcm_size;
161 
162 	if (media_gt && XE_WA(media_gt, 14019821291)) {
163 		u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
164 			& ~GENMASK_ULL(5, 0);
165 
166 		/*
167 		 * This workaround is primarily implemented by the BIOS.  We
168 		 * just need to figure out whether the BIOS has applied the
169 		 * workaround (meaning the programmed address falls within
170 		 * the DSM) and, if so, reserve that part of the DSM to
171 		 * prevent accidental reuse.  The DSM location should be just
172 		 * below the WOPCM.
173 		 */
174 		if (gscpsmi_base >= mgr->io_base &&
175 		    gscpsmi_base < mgr->io_base + stolen_size) {
176 			xe_gt_dbg(media_gt,
177 				  "Reserving %llu bytes of DSM for Wa_14019821291\n",
178 				  mgr->io_base + stolen_size - gscpsmi_base);
179 			stolen_size = gscpsmi_base - mgr->io_base;
180 		}
181 	}
182 
183 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
184 		return 0;
185 
186 	return stolen_size;
187 }
188 
189 extern struct resource intel_graphics_stolen_res;
190 
191 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
192 {
193 #ifdef CONFIG_X86
194 	/* Map into GGTT */
195 	mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
196 
197 	/* Stolen memory is x86 only */
198 	mgr->stolen_base = intel_graphics_stolen_res.start;
199 	return resource_size(&intel_graphics_stolen_res);
200 #else
201 	return 0;
202 #endif
203 }
204 
205 void xe_ttm_stolen_mgr_init(struct xe_device *xe)
206 {
207 	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
208 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
209 	u64 stolen_size, io_size;
210 	int err;
211 
212 	if (!mgr) {
213 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n");
214 		return;
215 	}
216 
217 	if (IS_SRIOV_VF(xe))
218 		stolen_size = 0;
219 	else if (IS_DGFX(xe))
220 		stolen_size = detect_bar2_dgfx(xe, mgr);
221 	else if (GRAPHICS_VERx100(xe) >= 1270)
222 		stolen_size = detect_bar2_integrated(xe, mgr);
223 	else
224 		stolen_size = detect_stolen(xe, mgr);
225 
226 	if (!stolen_size) {
227 		drm_dbg_kms(&xe->drm, "No stolen memory support\n");
228 		return;
229 	}
230 
231 	/*
232 	 * We don't try to attempt partial visible support for stolen vram,
233 	 * since stolen is always at the end of vram, and the BAR size is pretty
234 	 * much always 256M, with small-bar.
235 	 */
236 	io_size = 0;
237 	if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
238 		io_size = stolen_size;
239 
240 	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
241 				     io_size, PAGE_SIZE);
242 	if (err) {
243 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
244 		return;
245 	}
246 
247 	drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
248 		    stolen_size);
249 
250 	if (io_size)
251 		mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
252 }
253 
254 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
255 {
256 	struct xe_device *xe = xe_bo_device(bo);
257 	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
258 	struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
259 	struct xe_res_cursor cur;
260 
261 	XE_WARN_ON(!mgr->io_base);
262 
263 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
264 		return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
265 
266 	xe_res_first(bo->ttm.resource, offset, 4096, &cur);
267 	return mgr->io_base + cur.start;
268 }
269 
270 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
271 					       struct xe_ttm_stolen_mgr *mgr,
272 					       struct ttm_resource *mem)
273 {
274 	struct xe_res_cursor cur;
275 
276 	if (!mgr->io_base)
277 		return -EIO;
278 
279 	xe_res_first(mem, 0, 4096, &cur);
280 	mem->bus.offset = cur.start;
281 
282 	drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
283 
284 	if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
285 		mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
286 
287 	mem->bus.offset += mgr->io_base;
288 	mem->bus.is_iomem = true;
289 	mem->bus.caching = ttm_write_combined;
290 
291 	return 0;
292 }
293 
294 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
295 						 struct xe_ttm_stolen_mgr *mgr,
296 						 struct ttm_resource *mem)
297 {
298 #ifdef CONFIG_X86
299 	struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
300 
301 	XE_WARN_ON(IS_DGFX(xe));
302 
303 	/* XXX: Require BO to be mapped to GGTT? */
304 	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT)))
305 		return -EIO;
306 
307 	/* GGTT is always contiguously mapped */
308 	mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
309 
310 	mem->bus.is_iomem = true;
311 	mem->bus.caching = ttm_write_combined;
312 
313 	return 0;
314 #else
315 	/* How is it even possible to get here without gen12 stolen? */
316 	drm_WARN_ON(&xe->drm, 1);
317 	return -EIO;
318 #endif
319 }
320 
321 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
322 {
323 	struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
324 	struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
325 
326 	if (!mgr || !mgr->io_base)
327 		return -EIO;
328 
329 	if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
330 		return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
331 	else
332 		return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
333 }
334 
335 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
336 {
337 	struct xe_ttm_stolen_mgr *mgr =
338 		to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
339 
340 	return mgr->stolen_base;
341 }
342