1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 * Copyright (C) 2021-2022 Red Hat 5 */ 6 7 #include <drm/drm_managed.h> 8 9 #include <drm/ttm/ttm_device.h> 10 #include <drm/ttm/ttm_placement.h> 11 #include <drm/ttm/ttm_range_manager.h> 12 13 #include <generated/xe_wa_oob.h> 14 15 #include "regs/xe_bars.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_regs.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_gt.h" 21 #include "xe_gt_printk.h" 22 #include "xe_mmio.h" 23 #include "xe_res_cursor.h" 24 #include "xe_sriov.h" 25 #include "xe_ttm_stolen_mgr.h" 26 #include "xe_ttm_vram_mgr.h" 27 #include "xe_vram.h" 28 #include "xe_wa.h" 29 30 struct xe_ttm_stolen_mgr { 31 struct xe_ttm_vram_mgr base; 32 33 /* PCI base offset */ 34 resource_size_t io_base; 35 /* GPU base offset */ 36 resource_size_t stolen_base; 37 38 void __iomem *mapping; 39 }; 40 41 static inline struct xe_ttm_stolen_mgr * 42 to_stolen_mgr(struct ttm_resource_manager *man) 43 { 44 return container_of(man, struct xe_ttm_stolen_mgr, base.manager); 45 } 46 47 /** 48 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access 49 * stolen, can we then fallback to mapping through the GGTT. 50 * @xe: xe device 51 * 52 * Some older integrated platforms don't support reliable CPU access for stolen, 53 * however on such hardware we can always use the mappable part of the GGTT for 54 * CPU access. Check if that's the case for this device. 55 */ 56 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) 57 { 58 return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); 59 } 60 61 static u32 get_wopcm_size(struct xe_device *xe) 62 { 63 u32 wopcm_size; 64 u64 val; 65 66 val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); 67 val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); 68 69 switch (val) { 70 case 0x5 ... 0x6: 71 val--; 72 fallthrough; 73 case 0x0 ... 0x3: 74 wopcm_size = (1U << val) * SZ_1M; 75 break; 76 default: 77 WARN(1, "Missing case wopcm_size=%llx\n", val); 78 wopcm_size = 0; 79 } 80 81 return wopcm_size; 82 } 83 84 static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 85 { 86 struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram; 87 resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram); 88 struct xe_mmio *mmio = xe_root_tile_mmio(xe); 89 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 90 u64 stolen_size, wopcm_size; 91 u64 tile_offset; 92 u64 tile_size; 93 94 tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram); 95 tile_size = xe_vram_region_actual_physical_size(tile_vram); 96 97 /* Use DSM base address instead for stolen memory */ 98 mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; 99 if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) 100 return 0; 101 102 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 103 wopcm_size = get_wopcm_size(xe); 104 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 105 return 0; 106 107 stolen_size = tile_size - mgr->stolen_base; 108 109 xe_assert(xe, stolen_size > wopcm_size); 110 stolen_size -= wopcm_size; 111 112 /* Verify usage fits in the actual resource available */ 113 if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) 114 mgr->io_base = tile_io_start + mgr->stolen_base; 115 116 /* 117 * There may be few KB of platform dependent reserved memory at the end 118 * of vram which is not part of the DSM. Such reserved memory portion is 119 * always less then DSM granularity so align down the stolen_size to DSM 120 * granularity to accommodate such reserve vram portion. 121 */ 122 return ALIGN_DOWN(stolen_size, SZ_1M); 123 } 124 125 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 126 { 127 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 128 struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; 129 u32 stolen_size, wopcm_size; 130 u32 ggc, gms; 131 132 ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC); 133 134 /* 135 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the 136 * GTT size 137 */ 138 if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) 139 return 0; 140 141 /* 142 * Graphics >= 1270 uses the offset to the GSMBASE as address in the 143 * PTEs, together with the DM flag being set. Previously there was no 144 * such flag so the address was the io_base. 145 * 146 * DSMBASE = GSMBASE + 8MB 147 */ 148 mgr->stolen_base = SZ_8M; 149 mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; 150 151 /* return valid GMS value, -EIO if invalid */ 152 gms = REG_FIELD_GET(GMS_MASK, ggc); 153 switch (gms) { 154 case 0x0 ... 0x04: 155 stolen_size = gms * 32 * SZ_1M; 156 break; 157 case 0xf0 ... 0xfe: 158 stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; 159 break; 160 default: 161 return 0; 162 } 163 164 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 165 wopcm_size = get_wopcm_size(xe); 166 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 167 return 0; 168 169 stolen_size -= wopcm_size; 170 171 if (media_gt && XE_GT_WA(media_gt, 14019821291)) { 172 u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) 173 & ~GENMASK_ULL(5, 0); 174 175 /* 176 * This workaround is primarily implemented by the BIOS. We 177 * just need to figure out whether the BIOS has applied the 178 * workaround (meaning the programmed address falls within 179 * the DSM) and, if so, reserve that part of the DSM to 180 * prevent accidental reuse. The DSM location should be just 181 * below the WOPCM. 182 */ 183 if (gscpsmi_base >= mgr->io_base && 184 gscpsmi_base < mgr->io_base + stolen_size) { 185 xe_gt_dbg(media_gt, 186 "Reserving %llu bytes of DSM for Wa_14019821291\n", 187 mgr->io_base + stolen_size - gscpsmi_base); 188 stolen_size = gscpsmi_base - mgr->io_base; 189 } 190 } 191 192 if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) 193 return 0; 194 195 return stolen_size; 196 } 197 198 extern struct resource intel_graphics_stolen_res; 199 200 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 201 { 202 #ifdef CONFIG_X86 203 /* Map into GGTT */ 204 mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); 205 206 /* Stolen memory is x86 only */ 207 mgr->stolen_base = intel_graphics_stolen_res.start; 208 return resource_size(&intel_graphics_stolen_res); 209 #else 210 return 0; 211 #endif 212 } 213 214 int xe_ttm_stolen_mgr_init(struct xe_device *xe) 215 { 216 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 217 struct xe_ttm_stolen_mgr *mgr; 218 u64 stolen_size, io_size; 219 int err; 220 221 mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); 222 if (!mgr) 223 return -ENOMEM; 224 225 if (IS_SRIOV_VF(xe)) 226 stolen_size = 0; 227 else if (IS_DGFX(xe)) 228 stolen_size = detect_bar2_dgfx(xe, mgr); 229 else if (GRAPHICS_VERx100(xe) >= 1270) 230 stolen_size = detect_bar2_integrated(xe, mgr); 231 else 232 stolen_size = detect_stolen(xe, mgr); 233 234 if (!stolen_size) { 235 drm_dbg_kms(&xe->drm, "No stolen memory support\n"); 236 return 0; 237 } 238 239 /* 240 * We don't try to attempt partial visible support for stolen vram, 241 * since stolen is always at the end of vram, and the BAR size is pretty 242 * much always 256M, with small-bar. 243 */ 244 io_size = 0; 245 if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 246 io_size = stolen_size; 247 248 err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, 249 io_size, PAGE_SIZE); 250 if (err) { 251 drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); 252 return err; 253 } 254 255 drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", 256 stolen_size); 257 258 if (io_size) 259 mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); 260 261 return 0; 262 } 263 264 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) 265 { 266 struct xe_device *xe = xe_bo_device(bo); 267 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 268 struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); 269 struct xe_res_cursor cur; 270 271 XE_WARN_ON(!mgr->io_base); 272 273 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 274 return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; 275 276 xe_res_first(bo->ttm.resource, offset, 4096, &cur); 277 return mgr->io_base + cur.start; 278 } 279 280 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, 281 struct xe_ttm_stolen_mgr *mgr, 282 struct ttm_resource *mem) 283 { 284 struct xe_res_cursor cur; 285 286 if (!mgr->io_base) 287 return -EIO; 288 289 xe_res_first(mem, 0, 4096, &cur); 290 mem->bus.offset = cur.start; 291 292 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); 293 294 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) 295 mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; 296 297 mem->bus.offset += mgr->io_base; 298 mem->bus.is_iomem = true; 299 mem->bus.caching = ttm_write_combined; 300 301 return 0; 302 } 303 304 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, 305 struct xe_ttm_stolen_mgr *mgr, 306 struct ttm_resource *mem) 307 { 308 #ifdef CONFIG_X86 309 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); 310 311 XE_WARN_ON(IS_DGFX(xe)); 312 313 /* XXX: Require BO to be mapped to GGTT? */ 314 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT))) 315 return -EIO; 316 317 /* GGTT is always contiguously mapped */ 318 mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; 319 320 mem->bus.is_iomem = true; 321 mem->bus.caching = ttm_write_combined; 322 323 return 0; 324 #else 325 /* How is it even possible to get here without gen12 stolen? */ 326 drm_WARN_ON(&xe->drm, 1); 327 return -EIO; 328 #endif 329 } 330 331 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) 332 { 333 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 334 struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; 335 336 if (!mgr || !mgr->io_base) 337 return -EIO; 338 339 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 340 return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); 341 else 342 return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); 343 } 344 345 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) 346 { 347 struct xe_ttm_stolen_mgr *mgr = 348 to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); 349 350 return mgr->stolen_base; 351 } 352