1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021-2023 Intel Corporation
4 * Copyright (C) 2021-2002 Red Hat
5 */
6
7 #include <drm/drm_managed.h>
8
9 #include <drm/ttm/ttm_device.h>
10 #include <drm/ttm/ttm_placement.h>
11 #include <drm/ttm/ttm_range_manager.h>
12
13 #include <generated/xe_wa_oob.h>
14
15 #include "regs/xe_bars.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_regs.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_gt.h"
21 #include "xe_gt_printk.h"
22 #include "xe_mmio.h"
23 #include "xe_res_cursor.h"
24 #include "xe_sriov.h"
25 #include "xe_ttm_stolen_mgr.h"
26 #include "xe_ttm_vram_mgr.h"
27 #include "xe_wa.h"
28
29 struct xe_ttm_stolen_mgr {
30 struct xe_ttm_vram_mgr base;
31
32 /* PCI base offset */
33 resource_size_t io_base;
34 /* GPU base offset */
35 resource_size_t stolen_base;
36
37 void __iomem *mapping;
38 };
39
40 static inline struct xe_ttm_stolen_mgr *
to_stolen_mgr(struct ttm_resource_manager * man)41 to_stolen_mgr(struct ttm_resource_manager *man)
42 {
43 return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
44 }
45
46 /**
47 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
48 * stolen, can we then fallback to mapping through the GGTT.
49 * @xe: xe device
50 *
51 * Some older integrated platforms don't support reliable CPU access for stolen,
52 * however on such hardware we can always use the mappable part of the GGTT for
53 * CPU access. Check if that's the case for this device.
54 */
xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device * xe)55 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
56 {
57 return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
58 }
59
get_wopcm_size(struct xe_device * xe)60 static u32 get_wopcm_size(struct xe_device *xe)
61 {
62 u32 wopcm_size;
63 u64 val;
64
65 val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED);
66 val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
67
68 switch (val) {
69 case 0x5 ... 0x6:
70 val--;
71 fallthrough;
72 case 0x0 ... 0x3:
73 wopcm_size = (1U << val) * SZ_1M;
74 break;
75 default:
76 WARN(1, "Missing case wopcm_size=%llx\n", val);
77 wopcm_size = 0;
78 }
79
80 return wopcm_size;
81 }
82
detect_bar2_dgfx(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr)83 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
84 {
85 struct xe_tile *tile = xe_device_get_root_tile(xe);
86 struct xe_mmio *mmio = xe_root_tile_mmio(xe);
87 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
88 u64 stolen_size, wopcm_size;
89 u64 tile_offset;
90 u64 tile_size;
91
92 tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
93 tile_size = tile->mem.vram.actual_physical_size;
94
95 /* Use DSM base address instead for stolen memory */
96 mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
97 if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
98 return 0;
99
100 /* Carve out the top of DSM as it contains the reserved WOPCM region */
101 wopcm_size = get_wopcm_size(xe);
102 if (drm_WARN_ON(&xe->drm, !wopcm_size))
103 return 0;
104
105 stolen_size = tile_size - mgr->stolen_base;
106 stolen_size -= wopcm_size;
107
108 /* Verify usage fits in the actual resource available */
109 if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
110 mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
111
112 /*
113 * There may be few KB of platform dependent reserved memory at the end
114 * of vram which is not part of the DSM. Such reserved memory portion is
115 * always less then DSM granularity so align down the stolen_size to DSM
116 * granularity to accommodate such reserve vram portion.
117 */
118 return ALIGN_DOWN(stolen_size, SZ_1M);
119 }
120
detect_bar2_integrated(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr)121 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
122 {
123 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
124 struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
125 u32 stolen_size, wopcm_size;
126 u32 ggc, gms;
127
128 ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC);
129
130 /*
131 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
132 * GTT size
133 */
134 if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
135 return 0;
136
137 /*
138 * Graphics >= 1270 uses the offset to the GSMBASE as address in the
139 * PTEs, together with the DM flag being set. Previously there was no
140 * such flag so the address was the io_base.
141 *
142 * DSMBASE = GSMBASE + 8MB
143 */
144 mgr->stolen_base = SZ_8M;
145 mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
146
147 /* return valid GMS value, -EIO if invalid */
148 gms = REG_FIELD_GET(GMS_MASK, ggc);
149 switch (gms) {
150 case 0x0 ... 0x04:
151 stolen_size = gms * 32 * SZ_1M;
152 break;
153 case 0xf0 ... 0xfe:
154 stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
155 break;
156 default:
157 return 0;
158 }
159
160 /* Carve out the top of DSM as it contains the reserved WOPCM region */
161 wopcm_size = get_wopcm_size(xe);
162 if (drm_WARN_ON(&xe->drm, !wopcm_size))
163 return 0;
164
165 stolen_size -= wopcm_size;
166
167 if (media_gt && XE_WA(media_gt, 14019821291)) {
168 u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE)
169 & ~GENMASK_ULL(5, 0);
170
171 /*
172 * This workaround is primarily implemented by the BIOS. We
173 * just need to figure out whether the BIOS has applied the
174 * workaround (meaning the programmed address falls within
175 * the DSM) and, if so, reserve that part of the DSM to
176 * prevent accidental reuse. The DSM location should be just
177 * below the WOPCM.
178 */
179 if (gscpsmi_base >= mgr->io_base &&
180 gscpsmi_base < mgr->io_base + stolen_size) {
181 xe_gt_dbg(media_gt,
182 "Reserving %llu bytes of DSM for Wa_14019821291\n",
183 mgr->io_base + stolen_size - gscpsmi_base);
184 stolen_size = gscpsmi_base - mgr->io_base;
185 }
186 }
187
188 if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
189 return 0;
190
191 return stolen_size;
192 }
193
194 extern struct resource intel_graphics_stolen_res;
195
detect_stolen(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr)196 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
197 {
198 #ifdef CONFIG_X86
199 /* Map into GGTT */
200 mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
201
202 /* Stolen memory is x86 only */
203 mgr->stolen_base = intel_graphics_stolen_res.start;
204 return resource_size(&intel_graphics_stolen_res);
205 #else
206 return 0;
207 #endif
208 }
209
xe_ttm_stolen_mgr_init(struct xe_device * xe)210 void xe_ttm_stolen_mgr_init(struct xe_device *xe)
211 {
212 struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
213 struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
214 u64 stolen_size, io_size;
215 int err;
216
217 if (!mgr) {
218 drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n");
219 return;
220 }
221
222 if (IS_SRIOV_VF(xe))
223 stolen_size = 0;
224 else if (IS_DGFX(xe))
225 stolen_size = detect_bar2_dgfx(xe, mgr);
226 else if (GRAPHICS_VERx100(xe) >= 1270)
227 stolen_size = detect_bar2_integrated(xe, mgr);
228 else
229 stolen_size = detect_stolen(xe, mgr);
230
231 if (!stolen_size) {
232 drm_dbg_kms(&xe->drm, "No stolen memory support\n");
233 return;
234 }
235
236 /*
237 * We don't try to attempt partial visible support for stolen vram,
238 * since stolen is always at the end of vram, and the BAR size is pretty
239 * much always 256M, with small-bar.
240 */
241 io_size = 0;
242 if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
243 io_size = stolen_size;
244
245 err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
246 io_size, PAGE_SIZE);
247 if (err) {
248 drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
249 return;
250 }
251
252 drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
253 stolen_size);
254
255 if (io_size)
256 mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
257 }
258
xe_ttm_stolen_io_offset(struct xe_bo * bo,u32 offset)259 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
260 {
261 struct xe_device *xe = xe_bo_device(bo);
262 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
263 struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
264 struct xe_res_cursor cur;
265
266 XE_WARN_ON(!mgr->io_base);
267
268 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
269 return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
270
271 xe_res_first(bo->ttm.resource, offset, 4096, &cur);
272 return mgr->io_base + cur.start;
273 }
274
__xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr,struct ttm_resource * mem)275 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
276 struct xe_ttm_stolen_mgr *mgr,
277 struct ttm_resource *mem)
278 {
279 struct xe_res_cursor cur;
280
281 if (!mgr->io_base)
282 return -EIO;
283
284 xe_res_first(mem, 0, 4096, &cur);
285 mem->bus.offset = cur.start;
286
287 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
288
289 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
290 mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
291
292 mem->bus.offset += mgr->io_base;
293 mem->bus.is_iomem = true;
294 mem->bus.caching = ttm_write_combined;
295
296 return 0;
297 }
298
__xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device * xe,struct xe_ttm_stolen_mgr * mgr,struct ttm_resource * mem)299 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
300 struct xe_ttm_stolen_mgr *mgr,
301 struct ttm_resource *mem)
302 {
303 #ifdef CONFIG_X86
304 struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
305
306 XE_WARN_ON(IS_DGFX(xe));
307
308 /* XXX: Require BO to be mapped to GGTT? */
309 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT)))
310 return -EIO;
311
312 /* GGTT is always contiguously mapped */
313 mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
314
315 mem->bus.is_iomem = true;
316 mem->bus.caching = ttm_write_combined;
317
318 return 0;
319 #else
320 /* How is it even possible to get here without gen12 stolen? */
321 drm_WARN_ON(&xe->drm, 1);
322 return -EIO;
323 #endif
324 }
325
xe_ttm_stolen_io_mem_reserve(struct xe_device * xe,struct ttm_resource * mem)326 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
327 {
328 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
329 struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
330
331 if (!mgr || !mgr->io_base)
332 return -EIO;
333
334 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
335 return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
336 else
337 return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
338 }
339
xe_ttm_stolen_gpu_offset(struct xe_device * xe)340 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
341 {
342 struct xe_ttm_stolen_mgr *mgr =
343 to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
344
345 return mgr->stolen_base;
346 }
347