1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 * Copyright (C) 2021-2002 Red Hat 5 */ 6 7 #include <drm/drm_managed.h> 8 #include <drm/drm_mm.h> 9 10 #include <drm/ttm/ttm_device.h> 11 #include <drm/ttm/ttm_placement.h> 12 #include <drm/ttm/ttm_range_manager.h> 13 14 #include <generated/xe_wa_oob.h> 15 16 #include "regs/xe_bars.h" 17 #include "regs/xe_gt_regs.h" 18 #include "regs/xe_regs.h" 19 #include "xe_bo.h" 20 #include "xe_device.h" 21 #include "xe_gt.h" 22 #include "xe_gt_printk.h" 23 #include "xe_mmio.h" 24 #include "xe_res_cursor.h" 25 #include "xe_sriov.h" 26 #include "xe_ttm_stolen_mgr.h" 27 #include "xe_ttm_vram_mgr.h" 28 #include "xe_wa.h" 29 30 struct xe_ttm_stolen_mgr { 31 struct xe_ttm_vram_mgr base; 32 33 /* PCI base offset */ 34 resource_size_t io_base; 35 /* GPU base offset */ 36 resource_size_t stolen_base; 37 38 void __iomem *mapping; 39 }; 40 41 static inline struct xe_ttm_stolen_mgr * 42 to_stolen_mgr(struct ttm_resource_manager *man) 43 { 44 return container_of(man, struct xe_ttm_stolen_mgr, base.manager); 45 } 46 47 /** 48 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access 49 * stolen, can we then fallback to mapping through the GGTT. 50 * @xe: xe device 51 * 52 * Some older integrated platforms don't support reliable CPU access for stolen, 53 * however on such hardware we can always use the mappable part of the GGTT for 54 * CPU access. Check if that's the case for this device. 55 */ 56 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) 57 { 58 return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); 59 } 60 61 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 62 { 63 struct xe_tile *tile = xe_device_get_root_tile(xe); 64 struct xe_gt *mmio = xe_root_mmio_gt(xe); 65 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 66 u64 stolen_size; 67 u64 tile_offset; 68 u64 tile_size; 69 70 tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; 71 tile_size = tile->mem.vram.actual_physical_size; 72 73 /* Use DSM base address instead for stolen memory */ 74 mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; 75 if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) 76 return 0; 77 78 stolen_size = tile_size - mgr->stolen_base; 79 80 /* Verify usage fits in the actual resource available */ 81 if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) 82 mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; 83 84 /* 85 * There may be few KB of platform dependent reserved memory at the end 86 * of vram which is not part of the DSM. Such reserved memory portion is 87 * always less then DSM granularity so align down the stolen_size to DSM 88 * granularity to accommodate such reserve vram portion. 89 */ 90 return ALIGN_DOWN(stolen_size, SZ_1M); 91 } 92 93 static u32 get_wopcm_size(struct xe_device *xe) 94 { 95 u32 wopcm_size; 96 u64 val; 97 98 val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); 99 val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); 100 101 switch (val) { 102 case 0x5 ... 0x6: 103 val--; 104 fallthrough; 105 case 0x0 ... 0x3: 106 wopcm_size = (1U << val) * SZ_1M; 107 break; 108 default: 109 WARN(1, "Missing case wopcm_size=%llx\n", val); 110 wopcm_size = 0; 111 } 112 113 return wopcm_size; 114 } 115 116 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 117 { 118 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 119 struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; 120 u32 stolen_size, wopcm_size; 121 u32 ggc, gms; 122 123 ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); 124 125 /* 126 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the 127 * GTT size 128 */ 129 if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) 130 return 0; 131 132 /* 133 * Graphics >= 1270 uses the offset to the GSMBASE as address in the 134 * PTEs, together with the DM flag being set. Previously there was no 135 * such flag so the address was the io_base. 136 * 137 * DSMBASE = GSMBASE + 8MB 138 */ 139 mgr->stolen_base = SZ_8M; 140 mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; 141 142 /* return valid GMS value, -EIO if invalid */ 143 gms = REG_FIELD_GET(GMS_MASK, ggc); 144 switch (gms) { 145 case 0x0 ... 0x04: 146 stolen_size = gms * 32 * SZ_1M; 147 break; 148 case 0xf0 ... 0xfe: 149 stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; 150 break; 151 default: 152 return 0; 153 } 154 155 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 156 wopcm_size = get_wopcm_size(xe); 157 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 158 return 0; 159 160 stolen_size -= wopcm_size; 161 162 if (media_gt && XE_WA(media_gt, 14019821291)) { 163 u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE) 164 & ~GENMASK_ULL(5, 0); 165 166 /* 167 * This workaround is primarily implemented by the BIOS. We 168 * just need to figure out whether the BIOS has applied the 169 * workaround (meaning the programmed address falls within 170 * the DSM) and, if so, reserve that part of the DSM to 171 * prevent accidental reuse. The DSM location should be just 172 * below the WOPCM. 173 */ 174 if (gscpsmi_base >= mgr->io_base && 175 gscpsmi_base < mgr->io_base + stolen_size) { 176 xe_gt_dbg(media_gt, 177 "Reserving %llu bytes of DSM for Wa_14019821291\n", 178 mgr->io_base + stolen_size - gscpsmi_base); 179 stolen_size = gscpsmi_base - mgr->io_base; 180 } 181 } 182 183 if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) 184 return 0; 185 186 return stolen_size; 187 } 188 189 extern struct resource intel_graphics_stolen_res; 190 191 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 192 { 193 #ifdef CONFIG_X86 194 /* Map into GGTT */ 195 mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); 196 197 /* Stolen memory is x86 only */ 198 mgr->stolen_base = intel_graphics_stolen_res.start; 199 return resource_size(&intel_graphics_stolen_res); 200 #else 201 return 0; 202 #endif 203 } 204 205 void xe_ttm_stolen_mgr_init(struct xe_device *xe) 206 { 207 struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); 208 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 209 u64 stolen_size, io_size; 210 int err; 211 212 if (!mgr) { 213 drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); 214 return; 215 } 216 217 if (IS_SRIOV_VF(xe)) 218 stolen_size = 0; 219 else if (IS_DGFX(xe)) 220 stolen_size = detect_bar2_dgfx(xe, mgr); 221 else if (GRAPHICS_VERx100(xe) >= 1270) 222 stolen_size = detect_bar2_integrated(xe, mgr); 223 else 224 stolen_size = detect_stolen(xe, mgr); 225 226 if (!stolen_size) { 227 drm_dbg_kms(&xe->drm, "No stolen memory support\n"); 228 return; 229 } 230 231 /* 232 * We don't try to attempt partial visible support for stolen vram, 233 * since stolen is always at the end of vram, and the BAR size is pretty 234 * much always 256M, with small-bar. 235 */ 236 io_size = 0; 237 if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 238 io_size = stolen_size; 239 240 err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, 241 io_size, PAGE_SIZE); 242 if (err) { 243 drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); 244 return; 245 } 246 247 drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", 248 stolen_size); 249 250 if (io_size) 251 mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); 252 } 253 254 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) 255 { 256 struct xe_device *xe = xe_bo_device(bo); 257 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 258 struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); 259 struct xe_res_cursor cur; 260 261 XE_WARN_ON(!mgr->io_base); 262 263 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 264 return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; 265 266 xe_res_first(bo->ttm.resource, offset, 4096, &cur); 267 return mgr->io_base + cur.start; 268 } 269 270 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, 271 struct xe_ttm_stolen_mgr *mgr, 272 struct ttm_resource *mem) 273 { 274 struct xe_res_cursor cur; 275 276 if (!mgr->io_base) 277 return -EIO; 278 279 xe_res_first(mem, 0, 4096, &cur); 280 mem->bus.offset = cur.start; 281 282 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); 283 284 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) 285 mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; 286 287 mem->bus.offset += mgr->io_base; 288 mem->bus.is_iomem = true; 289 mem->bus.caching = ttm_write_combined; 290 291 return 0; 292 } 293 294 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, 295 struct xe_ttm_stolen_mgr *mgr, 296 struct ttm_resource *mem) 297 { 298 #ifdef CONFIG_X86 299 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); 300 301 XE_WARN_ON(IS_DGFX(xe)); 302 303 /* XXX: Require BO to be mapped to GGTT? */ 304 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT))) 305 return -EIO; 306 307 /* GGTT is always contiguously mapped */ 308 mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; 309 310 mem->bus.is_iomem = true; 311 mem->bus.caching = ttm_write_combined; 312 313 return 0; 314 #else 315 /* How is it even possible to get here without gen12 stolen? */ 316 drm_WARN_ON(&xe->drm, 1); 317 return -EIO; 318 #endif 319 } 320 321 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) 322 { 323 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 324 struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; 325 326 if (!mgr || !mgr->io_base) 327 return -EIO; 328 329 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 330 return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); 331 else 332 return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); 333 } 334 335 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) 336 { 337 struct xe_ttm_stolen_mgr *mgr = 338 to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); 339 340 return mgr->stolen_base; 341 } 342