1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 */ 5 6 #include <linux/minmax.h> 7 8 #include "xe_mmio.h" 9 10 #include <drm/drm_managed.h> 11 #include <drm/xe_drm.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "regs/xe_regs.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_ggtt.h" 19 #include "xe_gt.h" 20 #include "xe_gt_mcr.h" 21 #include "xe_macros.h" 22 #include "xe_module.h" 23 #include "xe_tile.h" 24 25 #define XEHP_MTCFG_ADDR XE_REG(0x101800) 26 #define TILE_COUNT REG_GENMASK(15, 8) 27 28 #define BAR_SIZE_SHIFT 20 29 30 static void 31 _resize_bar(struct xe_device *xe, int resno, resource_size_t size) 32 { 33 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 34 int bar_size = pci_rebar_bytes_to_size(size); 35 int ret; 36 37 if (pci_resource_len(pdev, resno)) 38 pci_release_resource(pdev, resno); 39 40 ret = pci_resize_resource(pdev, resno, bar_size); 41 if (ret) { 42 drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", 43 resno, 1 << bar_size, ERR_PTR(ret)); 44 return; 45 } 46 47 drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); 48 } 49 50 /* 51 * if force_vram_bar_size is set, attempt to set to the requested size 52 * else set to maximum possible size 53 */ 54 static void xe_resize_vram_bar(struct xe_device *xe) 55 { 56 u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; 57 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 58 struct pci_bus *root = pdev->bus; 59 resource_size_t current_size; 60 resource_size_t rebar_size; 61 struct resource *root_res; 62 u32 bar_size_mask; 63 u32 pci_cmd; 64 int i; 65 66 /* gather some relevant info */ 67 current_size = pci_resource_len(pdev, LMEM_BAR); 68 bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); 69 70 if (!bar_size_mask) 71 return; 72 73 /* set to a specific size? */ 74 if (force_vram_bar_size) { 75 u32 bar_size_bit; 76 77 rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; 78 79 bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); 80 81 if (!bar_size_bit) { 82 drm_info(&xe->drm, 83 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", 84 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); 85 return; 86 } 87 88 rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); 89 90 if (rebar_size == current_size) 91 return; 92 } else { 93 rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); 94 95 /* only resize if larger than current */ 96 if (rebar_size <= current_size) 97 return; 98 } 99 100 drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", 101 (u64)current_size >> 20, (u64)rebar_size >> 20); 102 103 while (root->parent) 104 root = root->parent; 105 106 pci_bus_for_each_resource(root, root_res, i) { 107 if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 108 root_res->start > 0x100000000ull) 109 break; 110 } 111 112 if (!root_res) { 113 drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); 114 return; 115 } 116 117 pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); 118 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); 119 120 _resize_bar(xe, LMEM_BAR, rebar_size); 121 122 pci_assign_unassigned_bus_resources(pdev->bus); 123 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); 124 } 125 126 static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) 127 { 128 if (!pci_resource_flags(pdev, bar)) 129 return false; 130 131 if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) 132 return false; 133 134 if (!pci_resource_len(pdev, bar)) 135 return false; 136 137 return true; 138 } 139 140 static int xe_determine_lmem_bar_size(struct xe_device *xe) 141 { 142 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 143 144 if (!xe_pci_resource_valid(pdev, LMEM_BAR)) { 145 drm_err(&xe->drm, "pci resource is not valid\n"); 146 return -ENXIO; 147 } 148 149 xe_resize_vram_bar(xe); 150 151 xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); 152 xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); 153 if (!xe->mem.vram.io_size) 154 return -EIO; 155 156 /* XXX: Need to change when xe link code is ready */ 157 xe->mem.vram.dpa_base = 0; 158 159 /* set up a map to the total memory area. */ 160 xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); 161 162 return 0; 163 } 164 165 /** 166 * xe_mmio_tile_vram_size() - Collect vram size and offset information 167 * @tile: tile to get info for 168 * @vram_size: available vram (size - device reserved portions) 169 * @tile_size: actual vram size 170 * @tile_offset: physical start point in the vram address space 171 * 172 * There are 4 places for size information: 173 * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1) 174 * - TILEx size (actual vram size) 175 * - GSMBASE offset (TILEx - "stolen") 176 * - CSSBASE offset (TILEx - CSS space necessary) 177 * 178 * CSSBASE is always a lower/smaller offset then GSMBASE. 179 * 180 * The actual available size of memory is to the CCS or GSM base. 181 * NOTE: multi-tile bases will include the tile offset. 182 * 183 */ 184 static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, 185 u64 *tile_size, u64 *tile_offset) 186 { 187 struct xe_device *xe = tile_to_xe(tile); 188 struct xe_gt *gt = tile->primary_gt; 189 u64 offset; 190 int err; 191 u32 reg; 192 193 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 194 if (err) 195 return err; 196 197 /* actual size */ 198 if (unlikely(xe->info.platform == XE_DG1)) { 199 *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); 200 *tile_offset = 0; 201 } else { 202 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); 203 *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; 204 *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; 205 } 206 207 /* minus device usage */ 208 if (xe->info.has_flat_ccs) { 209 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); 210 offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; 211 } else { 212 offset = xe_mmio_read64_2x32(gt, GSMBASE); 213 } 214 215 /* remove the tile offset so we have just the available size */ 216 *vram_size = offset - *tile_offset; 217 218 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 219 } 220 221 int xe_mmio_probe_vram(struct xe_device *xe) 222 { 223 struct xe_tile *tile; 224 resource_size_t io_size; 225 u64 available_size = 0; 226 u64 total_size = 0; 227 u64 tile_offset; 228 u64 tile_size; 229 u64 vram_size; 230 int err; 231 u8 id; 232 233 if (!IS_DGFX(xe)) 234 return 0; 235 236 /* Get the size of the root tile's vram for later accessibility comparison */ 237 tile = xe_device_get_root_tile(xe); 238 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 239 if (err) 240 return err; 241 242 err = xe_determine_lmem_bar_size(xe); 243 if (err) 244 return err; 245 246 drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 247 &xe->mem.vram.io_size); 248 249 io_size = xe->mem.vram.io_size; 250 251 /* tile specific ranges */ 252 for_each_tile(tile, xe, id) { 253 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 254 if (err) 255 return err; 256 257 tile->mem.vram.actual_physical_size = tile_size; 258 tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; 259 tile->mem.vram.io_size = min_t(u64, vram_size, io_size); 260 261 if (!tile->mem.vram.io_size) { 262 drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); 263 return -ENODEV; 264 } 265 266 tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; 267 tile->mem.vram.usable_size = vram_size; 268 tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; 269 270 if (tile->mem.vram.io_size < tile->mem.vram.usable_size) 271 drm_info(&xe->drm, "Small BAR device\n"); 272 drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, 273 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); 274 drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, 275 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, 276 &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); 277 278 /* calculate total size using tile size to get the correct HW sizing */ 279 total_size += tile_size; 280 available_size += vram_size; 281 282 if (total_size > xe->mem.vram.io_size) { 283 drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", 284 &total_size, &xe->mem.vram.io_size); 285 } 286 287 io_size -= min_t(u64, tile_size, io_size); 288 } 289 290 xe->mem.vram.actual_physical_size = total_size; 291 292 drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 293 &xe->mem.vram.actual_physical_size); 294 drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 295 &available_size); 296 297 return 0; 298 } 299 300 void xe_mmio_probe_tiles(struct xe_device *xe) 301 { 302 size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; 303 u8 id, tile_count = xe->info.tile_count; 304 struct xe_gt *gt = xe_root_mmio_gt(xe); 305 struct xe_tile *tile; 306 void *regs; 307 u32 mtcfg; 308 309 if (tile_count == 1) 310 goto add_mmio_ext; 311 312 if (!xe->info.skip_mtcfg) { 313 mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); 314 tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; 315 if (tile_count < xe->info.tile_count) { 316 drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", 317 xe->info.tile_count, tile_count); 318 xe->info.tile_count = tile_count; 319 320 /* 321 * FIXME: Needs some work for standalone media, but should be impossible 322 * with multi-tile for now. 323 */ 324 xe->info.gt_count = xe->info.tile_count; 325 } 326 } 327 328 regs = xe->mmio.regs; 329 for_each_tile(tile, xe, id) { 330 tile->mmio.size = tile_mmio_size; 331 tile->mmio.regs = regs; 332 regs += tile_mmio_size; 333 } 334 335 add_mmio_ext: 336 /* 337 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). 338 * When supported, there could be an additional contiguous multi-tile MMIO extension 339 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. 340 */ 341 if (xe->info.has_mmio_ext) { 342 regs = xe->mmio.regs + tile_mmio_size * tile_count; 343 344 for_each_tile(tile, xe, id) { 345 tile->mmio_ext.size = tile_mmio_ext_size; 346 tile->mmio_ext.regs = regs; 347 348 regs += tile_mmio_ext_size; 349 } 350 } 351 } 352 353 static void mmio_fini(struct drm_device *drm, void *arg) 354 { 355 struct xe_device *xe = arg; 356 357 pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 358 if (xe->mem.vram.mapping) 359 iounmap(xe->mem.vram.mapping); 360 } 361 362 static int xe_verify_lmem_ready(struct xe_device *xe) 363 { 364 struct xe_gt *gt = xe_root_mmio_gt(xe); 365 366 /* 367 * The boot firmware initializes local memory and assesses its health. 368 * If memory training fails, the punit will have been instructed to 369 * keep the GT powered down; we won't be able to communicate with it 370 * and we should not continue with driver initialization. 371 */ 372 if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { 373 drm_err(&xe->drm, "VRAM not initialized by firmware\n"); 374 return -ENODEV; 375 } 376 377 return 0; 378 } 379 380 int xe_mmio_init(struct xe_device *xe) 381 { 382 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 383 const int mmio_bar = 0; 384 385 /* 386 * Map the entire BAR. 387 * The first 16MB of the BAR, belong to the root tile, and include: 388 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). 389 */ 390 xe->mmio.size = pci_resource_len(pdev, mmio_bar); 391 xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0); 392 if (xe->mmio.regs == NULL) { 393 drm_err(&xe->drm, "failed to map registers\n"); 394 return -EIO; 395 } 396 397 return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); 398 } 399 400 int xe_mmio_root_tile_init(struct xe_device *xe) 401 { 402 struct xe_tile *root_tile = xe_device_get_root_tile(xe); 403 int err; 404 405 /* Setup first tile; other tiles (if present) will be setup later. */ 406 root_tile->mmio.size = SZ_16M; 407 root_tile->mmio.regs = xe->mmio.regs; 408 409 err = xe_verify_lmem_ready(xe); 410 if (err) 411 return err; 412 413 return 0; 414 } 415 416 /** 417 * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads 418 * @gt: MMIO target GT 419 * @reg: register to read value from 420 * 421 * Although Intel GPUs have some 64-bit registers, the hardware officially 422 * only supports GTTMMADR register reads of 32 bits or smaller. Even if 423 * a readq operation may return a reasonable value, that violation of the 424 * spec shouldn't be relied upon and all 64-bit register reads should be 425 * performed as two 32-bit reads of the upper and lower dwords. 426 * 427 * When reading registers that may be changing (such as 428 * counters), a rollover of the lower dword between the two 32-bit reads 429 * can be problematic. This function attempts to ensure the upper dword has 430 * stabilized before returning the 64-bit value. 431 * 432 * Note that because this function may re-read the register multiple times 433 * while waiting for the value to stabilize it should not be used to read 434 * any registers where read operations have side effects. 435 * 436 * Returns the value of the 64-bit register. 437 */ 438 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) 439 { 440 struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; 441 u32 ldw, udw, oldudw, retries; 442 443 if (reg.addr < gt->mmio.adj_limit) { 444 reg.addr += gt->mmio.adj_offset; 445 reg_udw.addr += gt->mmio.adj_offset; 446 } 447 448 oldudw = xe_mmio_read32(gt, reg_udw); 449 for (retries = 5; retries; --retries) { 450 ldw = xe_mmio_read32(gt, reg); 451 udw = xe_mmio_read32(gt, reg_udw); 452 453 if (udw == oldudw) 454 break; 455 456 oldudw = udw; 457 } 458 459 xe_gt_WARN(gt, retries == 0, 460 "64-bit read of %#x did not stabilize\n", reg.addr); 461 462 return (u64)udw << 32 | ldw; 463 } 464 465 /** 466 * xe_mmio_wait32() - Wait for a register to match the desired masked value 467 * @gt: MMIO target GT 468 * @reg: register to read value from 469 * @mask: mask to be applied to the value read from the register 470 * @val: desired value after applying the mask 471 * @timeout_us: time out after this period of time. Wait logic tries to be 472 * smart, applying an exponential backoff until @timeout_us is reached. 473 * @out_val: if not NULL, points where to store the last unmasked value 474 * @atomic: needs to be true if calling from an atomic context 475 * 476 * This function polls for the desired masked value and returns zero on success 477 * or -ETIMEDOUT if timed out. 478 * 479 * Note that @timeout_us represents the minimum amount of time to wait before 480 * giving up. The actual time taken by this function can be a little more than 481 * @timeout_us for different reasons, specially in non-atomic contexts. Thus, 482 * it is possible that this function succeeds even after @timeout_us has passed. 483 */ 484 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, 485 u32 *out_val, bool atomic) 486 { 487 ktime_t cur = ktime_get_raw(); 488 const ktime_t end = ktime_add_us(cur, timeout_us); 489 int ret = -ETIMEDOUT; 490 s64 wait = 10; 491 u32 read; 492 493 for (;;) { 494 read = xe_mmio_read32(gt, reg); 495 if ((read & mask) == val) { 496 ret = 0; 497 break; 498 } 499 500 cur = ktime_get_raw(); 501 if (!ktime_before(cur, end)) 502 break; 503 504 if (ktime_after(ktime_add_us(cur, wait), end)) 505 wait = ktime_us_delta(end, cur); 506 507 if (atomic) 508 udelay(wait); 509 else 510 usleep_range(wait, wait << 1); 511 wait <<= 1; 512 } 513 514 if (ret != 0) { 515 read = xe_mmio_read32(gt, reg); 516 if ((read & mask) == val) 517 ret = 0; 518 } 519 520 if (out_val) 521 *out_val = read; 522 523 return ret; 524 } 525