1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 * Copyright (C) 2021-2002 Red Hat 5 */ 6 7 #include <drm/drm_managed.h> 8 9 #include <drm/ttm/ttm_device.h> 10 #include <drm/ttm/ttm_placement.h> 11 #include <drm/ttm/ttm_range_manager.h> 12 13 #include <generated/xe_wa_oob.h> 14 15 #include "regs/xe_bars.h" 16 #include "regs/xe_gt_regs.h" 17 #include "regs/xe_regs.h" 18 #include "xe_bo.h" 19 #include "xe_device.h" 20 #include "xe_gt.h" 21 #include "xe_gt_printk.h" 22 #include "xe_mmio.h" 23 #include "xe_res_cursor.h" 24 #include "xe_sriov.h" 25 #include "xe_ttm_stolen_mgr.h" 26 #include "xe_ttm_vram_mgr.h" 27 #include "xe_wa.h" 28 29 struct xe_ttm_stolen_mgr { 30 struct xe_ttm_vram_mgr base; 31 32 /* PCI base offset */ 33 resource_size_t io_base; 34 /* GPU base offset */ 35 resource_size_t stolen_base; 36 37 void __iomem *mapping; 38 }; 39 40 static inline struct xe_ttm_stolen_mgr * 41 to_stolen_mgr(struct ttm_resource_manager *man) 42 { 43 return container_of(man, struct xe_ttm_stolen_mgr, base.manager); 44 } 45 46 /** 47 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access 48 * stolen, can we then fallback to mapping through the GGTT. 49 * @xe: xe device 50 * 51 * Some older integrated platforms don't support reliable CPU access for stolen, 52 * however on such hardware we can always use the mappable part of the GGTT for 53 * CPU access. Check if that's the case for this device. 54 */ 55 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) 56 { 57 return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); 58 } 59 60 static u32 get_wopcm_size(struct xe_device *xe) 61 { 62 u32 wopcm_size; 63 u64 val; 64 65 val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); 66 val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); 67 68 switch (val) { 69 case 0x5 ... 0x6: 70 val--; 71 fallthrough; 72 case 0x0 ... 0x3: 73 wopcm_size = (1U << val) * SZ_1M; 74 break; 75 default: 76 WARN(1, "Missing case wopcm_size=%llx\n", val); 77 wopcm_size = 0; 78 } 79 80 return wopcm_size; 81 } 82 83 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 84 { 85 struct xe_tile *tile = xe_device_get_root_tile(xe); 86 struct xe_mmio *mmio = xe_root_tile_mmio(xe); 87 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 88 u64 stolen_size, wopcm_size; 89 u64 tile_offset; 90 u64 tile_size; 91 92 tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; 93 tile_size = tile->mem.vram.actual_physical_size; 94 95 /* Use DSM base address instead for stolen memory */ 96 mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; 97 if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) 98 return 0; 99 100 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 101 wopcm_size = get_wopcm_size(xe); 102 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 103 return 0; 104 105 stolen_size = tile_size - mgr->stolen_base; 106 stolen_size -= wopcm_size; 107 108 /* Verify usage fits in the actual resource available */ 109 if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) 110 mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; 111 112 /* 113 * There may be few KB of platform dependent reserved memory at the end 114 * of vram which is not part of the DSM. Such reserved memory portion is 115 * always less then DSM granularity so align down the stolen_size to DSM 116 * granularity to accommodate such reserve vram portion. 117 */ 118 return ALIGN_DOWN(stolen_size, SZ_1M); 119 } 120 121 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 122 { 123 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 124 struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; 125 u32 stolen_size, wopcm_size; 126 u32 ggc, gms; 127 128 ggc = xe_mmio_read32(xe_root_tile_mmio(xe), GGC); 129 130 /* 131 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the 132 * GTT size 133 */ 134 if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) 135 return 0; 136 137 /* 138 * Graphics >= 1270 uses the offset to the GSMBASE as address in the 139 * PTEs, together with the DM flag being set. Previously there was no 140 * such flag so the address was the io_base. 141 * 142 * DSMBASE = GSMBASE + 8MB 143 */ 144 mgr->stolen_base = SZ_8M; 145 mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; 146 147 /* return valid GMS value, -EIO if invalid */ 148 gms = REG_FIELD_GET(GMS_MASK, ggc); 149 switch (gms) { 150 case 0x0 ... 0x04: 151 stolen_size = gms * 32 * SZ_1M; 152 break; 153 case 0xf0 ... 0xfe: 154 stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; 155 break; 156 default: 157 return 0; 158 } 159 160 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 161 wopcm_size = get_wopcm_size(xe); 162 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 163 return 0; 164 165 stolen_size -= wopcm_size; 166 167 if (media_gt && XE_WA(media_gt, 14019821291)) { 168 u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE) 169 & ~GENMASK_ULL(5, 0); 170 171 /* 172 * This workaround is primarily implemented by the BIOS. We 173 * just need to figure out whether the BIOS has applied the 174 * workaround (meaning the programmed address falls within 175 * the DSM) and, if so, reserve that part of the DSM to 176 * prevent accidental reuse. The DSM location should be just 177 * below the WOPCM. 178 */ 179 if (gscpsmi_base >= mgr->io_base && 180 gscpsmi_base < mgr->io_base + stolen_size) { 181 xe_gt_dbg(media_gt, 182 "Reserving %llu bytes of DSM for Wa_14019821291\n", 183 mgr->io_base + stolen_size - gscpsmi_base); 184 stolen_size = gscpsmi_base - mgr->io_base; 185 } 186 } 187 188 if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) 189 return 0; 190 191 return stolen_size; 192 } 193 194 extern struct resource intel_graphics_stolen_res; 195 196 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 197 { 198 #ifdef CONFIG_X86 199 /* Map into GGTT */ 200 mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); 201 202 /* Stolen memory is x86 only */ 203 mgr->stolen_base = intel_graphics_stolen_res.start; 204 return resource_size(&intel_graphics_stolen_res); 205 #else 206 return 0; 207 #endif 208 } 209 210 void xe_ttm_stolen_mgr_init(struct xe_device *xe) 211 { 212 struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); 213 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 214 u64 stolen_size, io_size; 215 int err; 216 217 if (!mgr) { 218 drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n"); 219 return; 220 } 221 222 if (IS_SRIOV_VF(xe)) 223 stolen_size = 0; 224 else if (IS_DGFX(xe)) 225 stolen_size = detect_bar2_dgfx(xe, mgr); 226 else if (GRAPHICS_VERx100(xe) >= 1270) 227 stolen_size = detect_bar2_integrated(xe, mgr); 228 else 229 stolen_size = detect_stolen(xe, mgr); 230 231 if (!stolen_size) { 232 drm_dbg_kms(&xe->drm, "No stolen memory support\n"); 233 return; 234 } 235 236 /* 237 * We don't try to attempt partial visible support for stolen vram, 238 * since stolen is always at the end of vram, and the BAR size is pretty 239 * much always 256M, with small-bar. 240 */ 241 io_size = 0; 242 if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 243 io_size = stolen_size; 244 245 err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, 246 io_size, PAGE_SIZE); 247 if (err) { 248 drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); 249 return; 250 } 251 252 drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", 253 stolen_size); 254 255 if (io_size) 256 mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); 257 } 258 259 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) 260 { 261 struct xe_device *xe = xe_bo_device(bo); 262 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 263 struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); 264 struct xe_res_cursor cur; 265 266 XE_WARN_ON(!mgr->io_base); 267 268 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 269 return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; 270 271 xe_res_first(bo->ttm.resource, offset, 4096, &cur); 272 return mgr->io_base + cur.start; 273 } 274 275 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, 276 struct xe_ttm_stolen_mgr *mgr, 277 struct ttm_resource *mem) 278 { 279 struct xe_res_cursor cur; 280 281 if (!mgr->io_base) 282 return -EIO; 283 284 xe_res_first(mem, 0, 4096, &cur); 285 mem->bus.offset = cur.start; 286 287 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); 288 289 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) 290 mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; 291 292 mem->bus.offset += mgr->io_base; 293 mem->bus.is_iomem = true; 294 mem->bus.caching = ttm_write_combined; 295 296 return 0; 297 } 298 299 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, 300 struct xe_ttm_stolen_mgr *mgr, 301 struct ttm_resource *mem) 302 { 303 #ifdef CONFIG_X86 304 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); 305 306 XE_WARN_ON(IS_DGFX(xe)); 307 308 /* XXX: Require BO to be mapped to GGTT? */ 309 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT))) 310 return -EIO; 311 312 /* GGTT is always contiguously mapped */ 313 mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; 314 315 mem->bus.is_iomem = true; 316 mem->bus.caching = ttm_write_combined; 317 318 return 0; 319 #else 320 /* How is it even possible to get here without gen12 stolen? */ 321 drm_WARN_ON(&xe->drm, 1); 322 return -EIO; 323 #endif 324 } 325 326 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) 327 { 328 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 329 struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; 330 331 if (!mgr || !mgr->io_base) 332 return -EIO; 333 334 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 335 return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); 336 else 337 return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); 338 } 339 340 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) 341 { 342 struct xe_ttm_stolen_mgr *mgr = 343 to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); 344 345 return mgr->stolen_base; 346 } 347