1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 * Copyright (C) 2021-2002 Red Hat 5 */ 6 7 #include <drm/drm_managed.h> 8 #include <drm/drm_mm.h> 9 10 #include <drm/ttm/ttm_device.h> 11 #include <drm/ttm/ttm_placement.h> 12 #include <drm/ttm/ttm_range_manager.h> 13 14 #include "generated/xe_wa_oob.h" 15 #include "regs/xe_gt_regs.h" 16 #include "regs/xe_regs.h" 17 #include "xe_bo.h" 18 #include "xe_device.h" 19 #include "xe_gt.h" 20 #include "xe_mmio.h" 21 #include "xe_res_cursor.h" 22 #include "xe_ttm_stolen_mgr.h" 23 #include "xe_ttm_vram_mgr.h" 24 #include "xe_wa.h" 25 26 struct xe_ttm_stolen_mgr { 27 struct xe_ttm_vram_mgr base; 28 29 /* PCI base offset */ 30 resource_size_t io_base; 31 /* GPU base offset */ 32 resource_size_t stolen_base; 33 34 void __iomem *mapping; 35 }; 36 37 static inline struct xe_ttm_stolen_mgr * 38 to_stolen_mgr(struct ttm_resource_manager *man) 39 { 40 return container_of(man, struct xe_ttm_stolen_mgr, base.manager); 41 } 42 43 /** 44 * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access 45 * stolen, can we then fallback to mapping through the GGTT. 46 * @xe: xe device 47 * 48 * Some older integrated platforms don't support reliable CPU access for stolen, 49 * however on such hardware we can always use the mappable part of the GGTT for 50 * CPU access. Check if that's the case for this device. 51 */ 52 bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) 53 { 54 return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); 55 } 56 57 static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 58 { 59 struct xe_tile *tile = xe_device_get_root_tile(xe); 60 struct xe_gt *mmio = xe_root_mmio_gt(xe); 61 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 62 u64 stolen_size; 63 u64 tile_offset; 64 u64 tile_size; 65 66 tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; 67 tile_size = tile->mem.vram.actual_physical_size; 68 69 /* Use DSM base address instead for stolen memory */ 70 mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; 71 if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) 72 return 0; 73 74 stolen_size = tile_size - mgr->stolen_base; 75 76 /* Verify usage fits in the actual resource available */ 77 if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) 78 mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; 79 80 /* 81 * There may be few KB of platform dependent reserved memory at the end 82 * of vram which is not part of the DSM. Such reserved memory portion is 83 * always less then DSM granularity so align down the stolen_size to DSM 84 * granularity to accommodate such reserve vram portion. 85 */ 86 return ALIGN_DOWN(stolen_size, SZ_1M); 87 } 88 89 static u32 get_wopcm_size(struct xe_device *xe) 90 { 91 u32 wopcm_size; 92 u64 val; 93 94 val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); 95 val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); 96 97 switch (val) { 98 case 0x5 ... 0x6: 99 val--; 100 fallthrough; 101 case 0x0 ... 0x3: 102 wopcm_size = (1U << val) * SZ_1M; 103 break; 104 default: 105 WARN(1, "Missing case wopcm_size=%llx\n", val); 106 wopcm_size = 0; 107 } 108 109 return wopcm_size; 110 } 111 112 static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 113 { 114 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 115 struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; 116 u32 stolen_size, wopcm_size; 117 u32 ggc, gms; 118 119 ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); 120 121 /* 122 * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the 123 * GTT size 124 */ 125 if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) 126 return 0; 127 128 /* 129 * Graphics >= 1270 uses the offset to the GSMBASE as address in the 130 * PTEs, together with the DM flag being set. Previously there was no 131 * such flag so the address was the io_base. 132 * 133 * DSMBASE = GSMBASE + 8MB 134 */ 135 mgr->stolen_base = SZ_8M; 136 mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; 137 138 /* return valid GMS value, -EIO if invalid */ 139 gms = REG_FIELD_GET(GMS_MASK, ggc); 140 switch (gms) { 141 case 0x0 ... 0x04: 142 stolen_size = gms * 32 * SZ_1M; 143 break; 144 case 0xf0 ... 0xfe: 145 stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; 146 break; 147 default: 148 return 0; 149 } 150 151 /* Carve out the top of DSM as it contains the reserved WOPCM region */ 152 wopcm_size = get_wopcm_size(xe); 153 if (drm_WARN_ON(&xe->drm, !wopcm_size)) 154 return 0; 155 156 stolen_size -= wopcm_size; 157 158 if (media_gt && XE_WA(media_gt, 14019821291)) { 159 u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE) 160 & ~GENMASK_ULL(5, 0); 161 162 /* 163 * This workaround is primarily implemented by the BIOS. We 164 * just need to figure out whether the BIOS has applied the 165 * workaround (meaning the programmed address falls within 166 * the DSM) and, if so, reserve that part of the DSM to 167 * prevent accidental reuse. The DSM location should be just 168 * below the WOPCM. 169 */ 170 if (gscpsmi_base >= mgr->io_base && 171 gscpsmi_base < mgr->io_base + stolen_size) { 172 xe_gt_dbg(media_gt, 173 "Reserving %llu bytes of DSM for Wa_14019821291\n", 174 mgr->io_base + stolen_size - gscpsmi_base); 175 stolen_size = gscpsmi_base - mgr->io_base; 176 } 177 } 178 179 if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) 180 return 0; 181 182 return stolen_size; 183 } 184 185 extern struct resource intel_graphics_stolen_res; 186 187 static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) 188 { 189 #ifdef CONFIG_X86 190 /* Map into GGTT */ 191 mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); 192 193 /* Stolen memory is x86 only */ 194 mgr->stolen_base = intel_graphics_stolen_res.start; 195 return resource_size(&intel_graphics_stolen_res); 196 #else 197 return 0; 198 #endif 199 } 200 201 void xe_ttm_stolen_mgr_init(struct xe_device *xe) 202 { 203 struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); 204 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 205 u64 stolen_size, io_size, pgsize; 206 int err; 207 208 if (IS_DGFX(xe)) 209 stolen_size = detect_bar2_dgfx(xe, mgr); 210 else if (GRAPHICS_VERx100(xe) >= 1270) 211 stolen_size = detect_bar2_integrated(xe, mgr); 212 else 213 stolen_size = detect_stolen(xe, mgr); 214 215 if (!stolen_size) { 216 drm_dbg_kms(&xe->drm, "No stolen memory support\n"); 217 return; 218 } 219 220 pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; 221 if (pgsize < PAGE_SIZE) 222 pgsize = PAGE_SIZE; 223 224 /* 225 * We don't try to attempt partial visible support for stolen vram, 226 * since stolen is always at the end of vram, and the BAR size is pretty 227 * much always 256M, with small-bar. 228 */ 229 io_size = 0; 230 if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 231 io_size = stolen_size; 232 233 err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, 234 io_size, pgsize); 235 if (err) { 236 drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); 237 return; 238 } 239 240 drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", 241 stolen_size); 242 243 if (io_size) 244 mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); 245 } 246 247 u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) 248 { 249 struct xe_device *xe = xe_bo_device(bo); 250 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 251 struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); 252 struct xe_res_cursor cur; 253 254 XE_WARN_ON(!mgr->io_base); 255 256 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 257 return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; 258 259 xe_res_first(bo->ttm.resource, offset, 4096, &cur); 260 return mgr->io_base + cur.start; 261 } 262 263 static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, 264 struct xe_ttm_stolen_mgr *mgr, 265 struct ttm_resource *mem) 266 { 267 struct xe_res_cursor cur; 268 269 if (!mgr->io_base) 270 return -EIO; 271 272 xe_res_first(mem, 0, 4096, &cur); 273 mem->bus.offset = cur.start; 274 275 drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); 276 277 if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) 278 mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; 279 280 mem->bus.offset += mgr->io_base; 281 mem->bus.is_iomem = true; 282 mem->bus.caching = ttm_write_combined; 283 284 return 0; 285 } 286 287 static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, 288 struct xe_ttm_stolen_mgr *mgr, 289 struct ttm_resource *mem) 290 { 291 #ifdef CONFIG_X86 292 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); 293 294 XE_WARN_ON(IS_DGFX(xe)); 295 296 /* XXX: Require BO to be mapped to GGTT? */ 297 if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) 298 return -EIO; 299 300 /* GGTT is always contiguously mapped */ 301 mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; 302 303 mem->bus.is_iomem = true; 304 mem->bus.caching = ttm_write_combined; 305 306 return 0; 307 #else 308 /* How is it even possible to get here without gen12 stolen? */ 309 drm_WARN_ON(&xe->drm, 1); 310 return -EIO; 311 #endif 312 } 313 314 int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) 315 { 316 struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); 317 struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; 318 319 if (!mgr || !mgr->io_base) 320 return -EIO; 321 322 if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) 323 return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); 324 else 325 return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); 326 } 327 328 u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) 329 { 330 struct xe_ttm_stolen_mgr *mgr = 331 to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); 332 333 return mgr->stolen_base; 334 } 335