1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 */ 5 6 #include <linux/minmax.h> 7 8 #include "xe_mmio.h" 9 10 #include <drm/drm_managed.h> 11 #include <drm/xe_drm.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "regs/xe_regs.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_ggtt.h" 19 #include "xe_gt.h" 20 #include "xe_gt_mcr.h" 21 #include "xe_macros.h" 22 #include "xe_module.h" 23 #include "xe_sriov.h" 24 #include "xe_tile.h" 25 26 #define XEHP_MTCFG_ADDR XE_REG(0x101800) 27 #define TILE_COUNT REG_GENMASK(15, 8) 28 29 #define BAR_SIZE_SHIFT 20 30 31 static void 32 _resize_bar(struct xe_device *xe, int resno, resource_size_t size) 33 { 34 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 35 int bar_size = pci_rebar_bytes_to_size(size); 36 int ret; 37 38 if (pci_resource_len(pdev, resno)) 39 pci_release_resource(pdev, resno); 40 41 ret = pci_resize_resource(pdev, resno, bar_size); 42 if (ret) { 43 drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", 44 resno, 1 << bar_size, ERR_PTR(ret)); 45 return; 46 } 47 48 drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); 49 } 50 51 /* 52 * if force_vram_bar_size is set, attempt to set to the requested size 53 * else set to maximum possible size 54 */ 55 static void xe_resize_vram_bar(struct xe_device *xe) 56 { 57 u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; 58 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 59 struct pci_bus *root = pdev->bus; 60 resource_size_t current_size; 61 resource_size_t rebar_size; 62 struct resource *root_res; 63 u32 bar_size_mask; 64 u32 pci_cmd; 65 int i; 66 67 /* gather some relevant info */ 68 current_size = pci_resource_len(pdev, LMEM_BAR); 69 bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); 70 71 if (!bar_size_mask) 72 return; 73 74 /* set to a specific size? */ 75 if (force_vram_bar_size) { 76 u32 bar_size_bit; 77 78 rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; 79 80 bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); 81 82 if (!bar_size_bit) { 83 drm_info(&xe->drm, 84 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", 85 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); 86 return; 87 } 88 89 rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); 90 91 if (rebar_size == current_size) 92 return; 93 } else { 94 rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); 95 96 /* only resize if larger than current */ 97 if (rebar_size <= current_size) 98 return; 99 } 100 101 drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", 102 (u64)current_size >> 20, (u64)rebar_size >> 20); 103 104 while (root->parent) 105 root = root->parent; 106 107 pci_bus_for_each_resource(root, root_res, i) { 108 if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 109 (u64)root_res->start > 0x100000000ul) 110 break; 111 } 112 113 if (!root_res) { 114 drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); 115 return; 116 } 117 118 pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); 119 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); 120 121 _resize_bar(xe, LMEM_BAR, rebar_size); 122 123 pci_assign_unassigned_bus_resources(pdev->bus); 124 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); 125 } 126 127 static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) 128 { 129 if (!pci_resource_flags(pdev, bar)) 130 return false; 131 132 if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) 133 return false; 134 135 if (!pci_resource_len(pdev, bar)) 136 return false; 137 138 return true; 139 } 140 141 static int xe_determine_lmem_bar_size(struct xe_device *xe) 142 { 143 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 144 145 if (!xe_pci_resource_valid(pdev, LMEM_BAR)) { 146 drm_err(&xe->drm, "pci resource is not valid\n"); 147 return -ENXIO; 148 } 149 150 xe_resize_vram_bar(xe); 151 152 xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); 153 xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); 154 if (!xe->mem.vram.io_size) 155 return -EIO; 156 157 /* XXX: Need to change when xe link code is ready */ 158 xe->mem.vram.dpa_base = 0; 159 160 /* set up a map to the total memory area. */ 161 xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); 162 163 return 0; 164 } 165 166 /** 167 * xe_mmio_tile_vram_size() - Collect vram size and offset information 168 * @tile: tile to get info for 169 * @vram_size: available vram (size - device reserved portions) 170 * @tile_size: actual vram size 171 * @tile_offset: physical start point in the vram address space 172 * 173 * There are 4 places for size information: 174 * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1) 175 * - TILEx size (actual vram size) 176 * - GSMBASE offset (TILEx - "stolen") 177 * - CSSBASE offset (TILEx - CSS space necessary) 178 * 179 * CSSBASE is always a lower/smaller offset then GSMBASE. 180 * 181 * The actual available size of memory is to the CCS or GSM base. 182 * NOTE: multi-tile bases will include the tile offset. 183 * 184 */ 185 static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, 186 u64 *tile_size, u64 *tile_offset) 187 { 188 struct xe_device *xe = tile_to_xe(tile); 189 struct xe_gt *gt = tile->primary_gt; 190 u64 offset; 191 int err; 192 u32 reg; 193 194 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 195 if (err) 196 return err; 197 198 /* actual size */ 199 if (unlikely(xe->info.platform == XE_DG1)) { 200 *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); 201 *tile_offset = 0; 202 } else { 203 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); 204 *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; 205 *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; 206 } 207 208 /* minus device usage */ 209 if (xe->info.has_flat_ccs) { 210 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); 211 offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; 212 } else { 213 offset = xe_mmio_read64_2x32(gt, GSMBASE); 214 } 215 216 /* remove the tile offset so we have just the available size */ 217 *vram_size = offset - *tile_offset; 218 219 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 220 } 221 222 int xe_mmio_probe_vram(struct xe_device *xe) 223 { 224 struct xe_tile *tile; 225 resource_size_t io_size; 226 u64 available_size = 0; 227 u64 total_size = 0; 228 u64 tile_offset; 229 u64 tile_size; 230 u64 vram_size; 231 int err; 232 u8 id; 233 234 if (!IS_DGFX(xe)) 235 return 0; 236 237 /* Get the size of the root tile's vram for later accessibility comparison */ 238 tile = xe_device_get_root_tile(xe); 239 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 240 if (err) 241 return err; 242 243 err = xe_determine_lmem_bar_size(xe); 244 if (err) 245 return err; 246 247 drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 248 &xe->mem.vram.io_size); 249 250 io_size = xe->mem.vram.io_size; 251 252 /* tile specific ranges */ 253 for_each_tile(tile, xe, id) { 254 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 255 if (err) 256 return err; 257 258 tile->mem.vram.actual_physical_size = tile_size; 259 tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; 260 tile->mem.vram.io_size = min_t(u64, vram_size, io_size); 261 262 if (!tile->mem.vram.io_size) { 263 drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); 264 return -ENODEV; 265 } 266 267 tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; 268 tile->mem.vram.usable_size = vram_size; 269 tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; 270 271 if (tile->mem.vram.io_size < tile->mem.vram.usable_size) 272 drm_info(&xe->drm, "Small BAR device\n"); 273 drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, 274 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); 275 drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, 276 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, 277 &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); 278 279 /* calculate total size using tile size to get the correct HW sizing */ 280 total_size += tile_size; 281 available_size += vram_size; 282 283 if (total_size > xe->mem.vram.io_size) { 284 drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", 285 &total_size, &xe->mem.vram.io_size); 286 } 287 288 io_size -= min_t(u64, tile_size, io_size); 289 } 290 291 xe->mem.vram.actual_physical_size = total_size; 292 293 drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 294 &xe->mem.vram.actual_physical_size); 295 drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 296 &available_size); 297 298 return 0; 299 } 300 301 void xe_mmio_probe_tiles(struct xe_device *xe) 302 { 303 size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; 304 u8 id, tile_count = xe->info.tile_count; 305 struct xe_gt *gt = xe_root_mmio_gt(xe); 306 struct xe_tile *tile; 307 void __iomem *regs; 308 u32 mtcfg; 309 310 if (tile_count == 1) 311 goto add_mmio_ext; 312 313 if (!xe->info.skip_mtcfg) { 314 mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); 315 tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; 316 if (tile_count < xe->info.tile_count) { 317 drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", 318 xe->info.tile_count, tile_count); 319 xe->info.tile_count = tile_count; 320 321 /* 322 * FIXME: Needs some work for standalone media, but should be impossible 323 * with multi-tile for now. 324 */ 325 xe->info.gt_count = xe->info.tile_count; 326 } 327 } 328 329 regs = xe->mmio.regs; 330 for_each_tile(tile, xe, id) { 331 tile->mmio.size = tile_mmio_size; 332 tile->mmio.regs = regs; 333 regs += tile_mmio_size; 334 } 335 336 add_mmio_ext: 337 /* 338 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). 339 * When supported, there could be an additional contiguous multi-tile MMIO extension 340 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. 341 */ 342 if (xe->info.has_mmio_ext) { 343 regs = xe->mmio.regs + tile_mmio_size * tile_count; 344 345 for_each_tile(tile, xe, id) { 346 tile->mmio_ext.size = tile_mmio_ext_size; 347 tile->mmio_ext.regs = regs; 348 349 regs += tile_mmio_ext_size; 350 } 351 } 352 } 353 354 static void mmio_fini(struct drm_device *drm, void *arg) 355 { 356 struct xe_device *xe = arg; 357 358 pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 359 if (xe->mem.vram.mapping) 360 iounmap(xe->mem.vram.mapping); 361 } 362 363 static int xe_verify_lmem_ready(struct xe_device *xe) 364 { 365 struct xe_gt *gt = xe_root_mmio_gt(xe); 366 367 if (!IS_DGFX(xe)) 368 return 0; 369 370 if (IS_SRIOV_VF(xe)) 371 return 0; 372 373 /* 374 * The boot firmware initializes local memory and assesses its health. 375 * If memory training fails, the punit will have been instructed to 376 * keep the GT powered down; we won't be able to communicate with it 377 * and we should not continue with driver initialization. 378 */ 379 if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { 380 drm_err(&xe->drm, "VRAM not initialized by firmware\n"); 381 return -ENODEV; 382 } 383 384 return 0; 385 } 386 387 int xe_mmio_init(struct xe_device *xe) 388 { 389 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 390 const int mmio_bar = 0; 391 392 /* 393 * Map the entire BAR. 394 * The first 16MB of the BAR, belong to the root tile, and include: 395 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). 396 */ 397 xe->mmio.size = pci_resource_len(pdev, mmio_bar); 398 xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0); 399 if (xe->mmio.regs == NULL) { 400 drm_err(&xe->drm, "failed to map registers\n"); 401 return -EIO; 402 } 403 404 return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); 405 } 406 407 int xe_mmio_root_tile_init(struct xe_device *xe) 408 { 409 struct xe_tile *root_tile = xe_device_get_root_tile(xe); 410 int err; 411 412 /* Setup first tile; other tiles (if present) will be setup later. */ 413 root_tile->mmio.size = SZ_16M; 414 root_tile->mmio.regs = xe->mmio.regs; 415 416 err = xe_verify_lmem_ready(xe); 417 if (err) 418 return err; 419 420 return 0; 421 } 422 423 /** 424 * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads 425 * @gt: MMIO target GT 426 * @reg: register to read value from 427 * 428 * Although Intel GPUs have some 64-bit registers, the hardware officially 429 * only supports GTTMMADR register reads of 32 bits or smaller. Even if 430 * a readq operation may return a reasonable value, that violation of the 431 * spec shouldn't be relied upon and all 64-bit register reads should be 432 * performed as two 32-bit reads of the upper and lower dwords. 433 * 434 * When reading registers that may be changing (such as 435 * counters), a rollover of the lower dword between the two 32-bit reads 436 * can be problematic. This function attempts to ensure the upper dword has 437 * stabilized before returning the 64-bit value. 438 * 439 * Note that because this function may re-read the register multiple times 440 * while waiting for the value to stabilize it should not be used to read 441 * any registers where read operations have side effects. 442 * 443 * Returns the value of the 64-bit register. 444 */ 445 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) 446 { 447 struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; 448 u32 ldw, udw, oldudw, retries; 449 450 if (reg.addr < gt->mmio.adj_limit) { 451 reg.addr += gt->mmio.adj_offset; 452 reg_udw.addr += gt->mmio.adj_offset; 453 } 454 455 oldudw = xe_mmio_read32(gt, reg_udw); 456 for (retries = 5; retries; --retries) { 457 ldw = xe_mmio_read32(gt, reg); 458 udw = xe_mmio_read32(gt, reg_udw); 459 460 if (udw == oldudw) 461 break; 462 463 oldudw = udw; 464 } 465 466 xe_gt_WARN(gt, retries == 0, 467 "64-bit read of %#x did not stabilize\n", reg.addr); 468 469 return (u64)udw << 32 | ldw; 470 } 471 472 /** 473 * xe_mmio_wait32() - Wait for a register to match the desired masked value 474 * @gt: MMIO target GT 475 * @reg: register to read value from 476 * @mask: mask to be applied to the value read from the register 477 * @val: desired value after applying the mask 478 * @timeout_us: time out after this period of time. Wait logic tries to be 479 * smart, applying an exponential backoff until @timeout_us is reached. 480 * @out_val: if not NULL, points where to store the last unmasked value 481 * @atomic: needs to be true if calling from an atomic context 482 * 483 * This function polls for the desired masked value and returns zero on success 484 * or -ETIMEDOUT if timed out. 485 * 486 * Note that @timeout_us represents the minimum amount of time to wait before 487 * giving up. The actual time taken by this function can be a little more than 488 * @timeout_us for different reasons, specially in non-atomic contexts. Thus, 489 * it is possible that this function succeeds even after @timeout_us has passed. 490 */ 491 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, 492 u32 *out_val, bool atomic) 493 { 494 ktime_t cur = ktime_get_raw(); 495 const ktime_t end = ktime_add_us(cur, timeout_us); 496 int ret = -ETIMEDOUT; 497 s64 wait = 10; 498 u32 read; 499 500 for (;;) { 501 read = xe_mmio_read32(gt, reg); 502 if ((read & mask) == val) { 503 ret = 0; 504 break; 505 } 506 507 cur = ktime_get_raw(); 508 if (!ktime_before(cur, end)) 509 break; 510 511 if (ktime_after(ktime_add_us(cur, wait), end)) 512 wait = ktime_us_delta(end, cur); 513 514 if (atomic) 515 udelay(wait); 516 else 517 usleep_range(wait, wait << 1); 518 wait <<= 1; 519 } 520 521 if (ret != 0) { 522 read = xe_mmio_read32(gt, reg); 523 if ((read & mask) == val) 524 ret = 0; 525 } 526 527 if (out_val) 528 *out_val = read; 529 530 return ret; 531 } 532