1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 * Copyright (C) 2021-2002 Red Hat 5 */ 6 7 #include <drm/drm_managed.h> 8 9 #include <drm/ttm/ttm_device.h> 10 #include <drm/ttm/ttm_placement.h> 11 #include <drm/ttm/ttm_range_manager.h> 12 13 #include <generated/xe_wa_oob.h> 14 15 #include "regs/xe_bars.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_regs.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_gt.h" 21 #include "xe_gt_printk.h" 22 #include "xe_mmio.h" 23 #include "xe_res_cursor.h" 24 #include "xe_sriov.h" 25 #include "xe_ttm_stolen_mgr.h" 26 #include "xe_ttm_vram_mgr.h" 27 #include "xe_wa.h" 28 #include "xe_vram.h" 29 30 struct xe_ttm_stolen_mgr { 31 struct xe_ttm_vram_mgr base; 32 33 /* PCI base offset */ 34 resource_size_t io_base; 35 /* GPU base offset */ 36 resource_size_t stolen_base; 37 38 void __iomem *mapping; 39 }; 40 41 static inline struct xe_ttm_stolen_mgr * 42 to_stolen_mgr(struct ttm_resource_manager *man) 43 { 44 return container_of(man, struct xe_ttm_stolen_mgr, base.manager); 45 } 46 47 /** 48 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access 49 * stolen, can we then fallback to mapping through the GGTT. 50 * @xe: xe device 51 * 52 * Some older integrated platforms don't support reliable CPU access for stolen, 53 * however on such hardware we can always use the mappable part of the GGTT for 54 * CPU access. Check if that's the case for this device. 55 */ 56 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) 57 { 58 return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); 59 } 60 61 static u32 get_wopcm_size(struct xe_device *xe) 62 { 63 u32 wopcm_size; 64 u64 val; 65 66 val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); 67 val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); 68 69 switch (val) { 70 case 0x5 ... 0x6: 71 val--; 72 fallthrough; 73 case 0x0 ... 0x3: 74 wopcm_size = (1U << val) * SZ_1M; 75 break; 76 default: 77 WARN(1, "Missing case wopcm_size=%llx\n", val); 78 wopcm_size = 0; 79 } 80 81 return wopcm_size; 82 } 83 84 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 85 { 86 struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; 87 resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); 88 struct xe_mmio *mmio = xe_root_tile_mmio(xe); 89 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 90 u64 stolen_size, wopcm_size; 91 u64 tile_offset; 92 u64 tile_size; 93 94 tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); 95 tile_size = xe_vram_region_actual_physical_size(tile_vram); 96 97 /* Use DSM base address instead for stolen memory */ 98 mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; 99 if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) 100 return 0; 101 102 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 103 wopcm_size = get_wopcm_size(xe); 104 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 105 return 0; 106 107 stolen_size = tile_size - mgr->stolen_base; 108 stolen_size -= wopcm_size; 109 110 /* Verify usage fits in the actual resource available */ 111 if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) 112 mgr->io_base = tile_io_start + mgr->stolen_base; 113 114 /* 115 * There may be few KB of platform dependent reserved memory at the end 116 * of vram which is not part of the DSM. Such reserved memory portion is 117 * always less then DSM granularity so align down the stolen_size to DSM 118 * granularity to accommodate such reserve vram portion. 119 */ 120 return ALIGN_DOWN(stolen_size, SZ_1M); 121 } 122 123 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 124 { 125 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 126 struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; 127 u32 stolen_size, wopcm_size; 128 u32 ggc, gms; 129 130 ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC); 131 132 /* 133 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the 134 * GTT size 135 */ 136 if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) 137 return 0; 138 139 /* 140 * Graphics >= 1270 uses the offset to the GSMBASE as address in the 141 * PTEs, together with the DM flag being set. Previously there was no 142 * such flag so the address was the io_base. 143 * 144 * DSMBASE = GSMBASE + 8MB 145 */ 146 mgr->stolen_base = SZ_8M; 147 mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; 148 149 /* return valid GMS value, -EIO if invalid */ 150 gms = REG_FIELD_GET(GMS_MASK, ggc); 151 switch (gms) { 152 case 0x0 ... 0x04: 153 stolen_size = gms * 32 * SZ_1M; 154 break; 155 case 0xf0 ... 0xfe: 156 stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; 157 break; 158 default: 159 return 0; 160 } 161 162 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 163 wopcm_size = get_wopcm_size(xe); 164 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 165 return 0; 166 167 stolen_size -= wopcm_size; 168 169 if (media_gt && XE_GT_WA(media_gt, 14019821291)) { 170 u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) 171 & ~GENMASK_ULL(5, 0); 172 173 /* 174 * This workaround is primarily implemented by the BIOS. We 175 * just need to figure out whether the BIOS has applied the 176 * workaround (meaning the programmed address falls within 177 * the DSM) and, if so, reserve that part of the DSM to 178 * prevent accidental reuse. The DSM location should be just 179 * below the WOPCM. 180 */ 181 if (gscpsmi_base >= mgr->io_base && 182 gscpsmi_base < mgr->io_base + stolen_size) { 183 xe_gt_dbg(media_gt, 184 "Reserving %llu bytes of DSM for Wa_14019821291\n", 185 mgr->io_base + stolen_size - gscpsmi_base); 186 stolen_size = gscpsmi_base - mgr->io_base; 187 } 188 } 189 190 if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) 191 return 0; 192 193 return stolen_size; 194 } 195 196 extern struct resource intel_graphics_stolen_res; 197 198 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 199 { 200 #ifdef CONFIG_X86 201 /* Map into GGTT */ 202 mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); 203 204 /* Stolen memory is x86 only */ 205 mgr->stolen_base = intel_graphics_stolen_res.start; 206 return resource_size(&intel_graphics_stolen_res); 207 #else 208 return 0; 209 #endif 210 } 211 212 int xe_ttm_stolen_mgr_init(struct xe_device *xe) 213 { 214 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 215 struct xe_ttm_stolen_mgr *mgr; 216 u64 stolen_size, io_size; 217 int err; 218 219 mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); 220 if (!mgr) 221 return -ENOMEM; 222 223 if (IS_SRIOV_VF(xe)) 224 stolen_size = 0; 225 else if (IS_DGFX(xe)) 226 stolen_size = detect_bar2_dgfx(xe, mgr); 227 else if (GRAPHICS_VERx100(xe) >= 1270) 228 stolen_size = detect_bar2_integrated(xe, mgr); 229 else 230 stolen_size = detect_stolen(xe, mgr); 231 232 if (!stolen_size) { 233 drm_dbg_kms(&xe->drm, "No stolen memory support\n"); 234 return 0; 235 } 236 237 /* 238 * We don't try to attempt partial visible support for stolen vram, 239 * since stolen is always at the end of vram, and the BAR size is pretty 240 * much always 256M, with small-bar. 241 */ 242 io_size = 0; 243 if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 244 io_size = stolen_size; 245 246 err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, 247 io_size, PAGE_SIZE); 248 if (err) { 249 drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); 250 return err; 251 } 252 253 drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", 254 stolen_size); 255 256 if (io_size) 257 mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); 258 259 return 0; 260 } 261 262 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) 263 { 264 struct xe_device *xe = xe_bo_device(bo); 265 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 266 struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); 267 struct xe_res_cursor cur; 268 269 XE_WARN_ON(!mgr->io_base); 270 271 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 272 return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; 273 274 xe_res_first(bo->ttm.resource, offset, 4096, &cur); 275 return mgr->io_base + cur.start; 276 } 277 278 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, 279 struct xe_ttm_stolen_mgr *mgr, 280 struct ttm_resource *mem) 281 { 282 struct xe_res_cursor cur; 283 284 if (!mgr->io_base) 285 return -EIO; 286 287 xe_res_first(mem, 0, 4096, &cur); 288 mem->bus.offset = cur.start; 289 290 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); 291 292 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) 293 mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; 294 295 mem->bus.offset += mgr->io_base; 296 mem->bus.is_iomem = true; 297 mem->bus.caching = ttm_write_combined; 298 299 return 0; 300 } 301 302 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, 303 struct xe_ttm_stolen_mgr *mgr, 304 struct ttm_resource *mem) 305 { 306 #ifdef CONFIG_X86 307 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); 308 309 XE_WARN_ON(IS_DGFX(xe)); 310 311 /* XXX: Require BO to be mapped to GGTT? */ 312 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT))) 313 return -EIO; 314 315 /* GGTT is always contiguously mapped */ 316 mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; 317 318 mem->bus.is_iomem = true; 319 mem->bus.caching = ttm_write_combined; 320 321 return 0; 322 #else 323 /* How is it even possible to get here without gen12 stolen? */ 324 drm_WARN_ON(&xe->drm, 1); 325 return -EIO; 326 #endif 327 } 328 329 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) 330 { 331 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 332 struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; 333 334 if (!mgr || !mgr->io_base) 335 return -EIO; 336 337 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 338 return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); 339 else 340 return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); 341 } 342 343 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) 344 { 345 struct xe_ttm_stolen_mgr *mgr = 346 to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); 347 348 return mgr->stolen_base; 349 } 350