1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 */ 5 6 #include <linux/minmax.h> 7 8 #include "xe_mmio.h" 9 10 #include <drm/drm_managed.h> 11 #include <drm/xe_drm.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "regs/xe_regs.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_ggtt.h" 19 #include "xe_gt.h" 20 #include "xe_gt_mcr.h" 21 #include "xe_macros.h" 22 #include "xe_module.h" 23 #include "xe_sriov.h" 24 #include "xe_tile.h" 25 26 #define XEHP_MTCFG_ADDR XE_REG(0x101800) 27 #define TILE_COUNT REG_GENMASK(15, 8) 28 29 #define BAR_SIZE_SHIFT 20 30 31 static void 32 _resize_bar(struct xe_device *xe, int resno, resource_size_t size) 33 { 34 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 35 int bar_size = pci_rebar_bytes_to_size(size); 36 int ret; 37 38 if (pci_resource_len(pdev, resno)) 39 pci_release_resource(pdev, resno); 40 41 ret = pci_resize_resource(pdev, resno, bar_size); 42 if (ret) { 43 drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", 44 resno, 1 << bar_size, ERR_PTR(ret)); 45 return; 46 } 47 48 drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); 49 } 50 51 /* 52 * if force_vram_bar_size is set, attempt to set to the requested size 53 * else set to maximum possible size 54 */ 55 static void xe_resize_vram_bar(struct xe_device *xe) 56 { 57 u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; 58 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 59 struct pci_bus *root = pdev->bus; 60 resource_size_t current_size; 61 resource_size_t rebar_size; 62 struct resource *root_res; 63 u32 bar_size_mask; 64 u32 pci_cmd; 65 int i; 66 67 /* gather some relevant info */ 68 current_size = pci_resource_len(pdev, LMEM_BAR); 69 bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); 70 71 if (!bar_size_mask) 72 return; 73 74 /* set to a specific size? */ 75 if (force_vram_bar_size) { 76 u32 bar_size_bit; 77 78 rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; 79 80 bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); 81 82 if (!bar_size_bit) { 83 drm_info(&xe->drm, 84 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", 85 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); 86 return; 87 } 88 89 rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); 90 91 if (rebar_size == current_size) 92 return; 93 } else { 94 rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); 95 96 /* only resize if larger than current */ 97 if (rebar_size <= current_size) 98 return; 99 } 100 101 drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", 102 (u64)current_size >> 20, (u64)rebar_size >> 20); 103 104 while (root->parent) 105 root = root->parent; 106 107 pci_bus_for_each_resource(root, root_res, i) { 108 if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 109 (u64)root_res->start > 0x100000000ul) 110 break; 111 } 112 113 if (!root_res) { 114 drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); 115 return; 116 } 117 118 pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); 119 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); 120 121 _resize_bar(xe, LMEM_BAR, rebar_size); 122 123 pci_assign_unassigned_bus_resources(pdev->bus); 124 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); 125 } 126 127 static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) 128 { 129 if (!pci_resource_flags(pdev, bar)) 130 return false; 131 132 if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) 133 return false; 134 135 if (!pci_resource_len(pdev, bar)) 136 return false; 137 138 return true; 139 } 140 141 static int xe_determine_lmem_bar_size(struct xe_device *xe) 142 { 143 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 144 145 if (!xe_pci_resource_valid(pdev, LMEM_BAR)) { 146 drm_err(&xe->drm, "pci resource is not valid\n"); 147 return -ENXIO; 148 } 149 150 xe_resize_vram_bar(xe); 151 152 xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); 153 xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); 154 if (!xe->mem.vram.io_size) 155 return -EIO; 156 157 /* XXX: Need to change when xe link code is ready */ 158 xe->mem.vram.dpa_base = 0; 159 160 /* set up a map to the total memory area. */ 161 xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); 162 163 return 0; 164 } 165 166 static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) 167 { 168 struct xe_device *xe = gt_to_xe(gt); 169 u64 offset; 170 u32 reg; 171 172 if (GRAPHICS_VER(xe) >= 20) { 173 u64 ccs_size = tile_size / 512; 174 u64 offset_hi, offset_lo; 175 u32 nodes, num_enabled; 176 177 reg = xe_mmio_read32(gt, MIRROR_FUSE3); 178 nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg); 179 num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */ 180 181 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); 182 offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg); 183 184 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER); 185 offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg); 186 187 offset = offset_hi << 32; /* HW view bits 39:32 */ 188 offset |= offset_lo << 6; /* HW view bits 31:6 */ 189 offset *= num_enabled; /* convert to SW view */ 190 191 /* We don't expect any holes */ 192 xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), 193 "Hole between CCS and GSM.\n"); 194 } else { 195 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); 196 offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; 197 } 198 199 return offset; 200 } 201 202 /** 203 * xe_mmio_tile_vram_size() - Collect vram size and offset information 204 * @tile: tile to get info for 205 * @vram_size: available vram (size - device reserved portions) 206 * @tile_size: actual vram size 207 * @tile_offset: physical start point in the vram address space 208 * 209 * There are 4 places for size information: 210 * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1) 211 * - TILEx size (actual vram size) 212 * - GSMBASE offset (TILEx - "stolen") 213 * - CSSBASE offset (TILEx - CSS space necessary) 214 * 215 * CSSBASE is always a lower/smaller offset then GSMBASE. 216 * 217 * The actual available size of memory is to the CCS or GSM base. 218 * NOTE: multi-tile bases will include the tile offset. 219 * 220 */ 221 static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, 222 u64 *tile_size, u64 *tile_offset) 223 { 224 struct xe_device *xe = tile_to_xe(tile); 225 struct xe_gt *gt = tile->primary_gt; 226 u64 offset; 227 int err; 228 u32 reg; 229 230 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 231 if (err) 232 return err; 233 234 /* actual size */ 235 if (unlikely(xe->info.platform == XE_DG1)) { 236 *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); 237 *tile_offset = 0; 238 } else { 239 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); 240 *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; 241 *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; 242 } 243 244 /* minus device usage */ 245 if (xe->info.has_flat_ccs) { 246 offset = get_flat_ccs_offset(gt, *tile_size); 247 } else { 248 offset = xe_mmio_read64_2x32(gt, GSMBASE); 249 } 250 251 /* remove the tile offset so we have just the available size */ 252 *vram_size = offset - *tile_offset; 253 254 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 255 } 256 257 int xe_mmio_probe_vram(struct xe_device *xe) 258 { 259 struct xe_tile *tile; 260 resource_size_t io_size; 261 u64 available_size = 0; 262 u64 total_size = 0; 263 u64 tile_offset; 264 u64 tile_size; 265 u64 vram_size; 266 int err; 267 u8 id; 268 269 if (!IS_DGFX(xe)) 270 return 0; 271 272 /* Get the size of the root tile's vram for later accessibility comparison */ 273 tile = xe_device_get_root_tile(xe); 274 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 275 if (err) 276 return err; 277 278 err = xe_determine_lmem_bar_size(xe); 279 if (err) 280 return err; 281 282 drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 283 &xe->mem.vram.io_size); 284 285 io_size = xe->mem.vram.io_size; 286 287 /* tile specific ranges */ 288 for_each_tile(tile, xe, id) { 289 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 290 if (err) 291 return err; 292 293 tile->mem.vram.actual_physical_size = tile_size; 294 tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; 295 tile->mem.vram.io_size = min_t(u64, vram_size, io_size); 296 297 if (!tile->mem.vram.io_size) { 298 drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); 299 return -ENODEV; 300 } 301 302 tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; 303 tile->mem.vram.usable_size = vram_size; 304 tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; 305 306 if (tile->mem.vram.io_size < tile->mem.vram.usable_size) 307 drm_info(&xe->drm, "Small BAR device\n"); 308 drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, 309 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); 310 drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, 311 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size, 312 &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size); 313 314 /* calculate total size using tile size to get the correct HW sizing */ 315 total_size += tile_size; 316 available_size += vram_size; 317 318 if (total_size > xe->mem.vram.io_size) { 319 drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", 320 &total_size, &xe->mem.vram.io_size); 321 } 322 323 io_size -= min_t(u64, tile_size, io_size); 324 } 325 326 xe->mem.vram.actual_physical_size = total_size; 327 328 drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 329 &xe->mem.vram.actual_physical_size); 330 drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 331 &available_size); 332 333 return 0; 334 } 335 336 void xe_mmio_probe_tiles(struct xe_device *xe) 337 { 338 size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; 339 u8 id, tile_count = xe->info.tile_count; 340 struct xe_gt *gt = xe_root_mmio_gt(xe); 341 struct xe_tile *tile; 342 void __iomem *regs; 343 u32 mtcfg; 344 345 if (tile_count == 1) 346 goto add_mmio_ext; 347 348 if (!xe->info.skip_mtcfg) { 349 mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); 350 tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; 351 if (tile_count < xe->info.tile_count) { 352 drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", 353 xe->info.tile_count, tile_count); 354 xe->info.tile_count = tile_count; 355 356 /* 357 * FIXME: Needs some work for standalone media, but should be impossible 358 * with multi-tile for now. 359 */ 360 xe->info.gt_count = xe->info.tile_count; 361 } 362 } 363 364 regs = xe->mmio.regs; 365 for_each_tile(tile, xe, id) { 366 tile->mmio.size = tile_mmio_size; 367 tile->mmio.regs = regs; 368 regs += tile_mmio_size; 369 } 370 371 add_mmio_ext: 372 /* 373 * By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). 374 * When supported, there could be an additional contiguous multi-tile MMIO extension 375 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. 376 */ 377 if (xe->info.has_mmio_ext) { 378 regs = xe->mmio.regs + tile_mmio_size * tile_count; 379 380 for_each_tile(tile, xe, id) { 381 tile->mmio_ext.size = tile_mmio_ext_size; 382 tile->mmio_ext.regs = regs; 383 384 regs += tile_mmio_ext_size; 385 } 386 } 387 } 388 389 static void mmio_fini(struct drm_device *drm, void *arg) 390 { 391 struct xe_device *xe = arg; 392 393 pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 394 if (xe->mem.vram.mapping) 395 iounmap(xe->mem.vram.mapping); 396 } 397 398 int xe_mmio_init(struct xe_device *xe) 399 { 400 struct xe_tile *root_tile = xe_device_get_root_tile(xe); 401 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 402 const int mmio_bar = 0; 403 404 /* 405 * Map the entire BAR. 406 * The first 16MB of the BAR, belong to the root tile, and include: 407 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). 408 */ 409 xe->mmio.size = pci_resource_len(pdev, mmio_bar); 410 xe->mmio.regs = pci_iomap(pdev, mmio_bar, 0); 411 if (xe->mmio.regs == NULL) { 412 drm_err(&xe->drm, "failed to map registers\n"); 413 return -EIO; 414 } 415 416 /* Setup first tile; other tiles (if present) will be setup later. */ 417 root_tile->mmio.size = SZ_16M; 418 root_tile->mmio.regs = xe->mmio.regs; 419 420 return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); 421 } 422 423 u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) 424 { 425 struct xe_tile *tile = gt_to_tile(gt); 426 427 if (reg.addr < gt->mmio.adj_limit) 428 reg.addr += gt->mmio.adj_offset; 429 430 return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); 431 } 432 433 u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) 434 { 435 struct xe_tile *tile = gt_to_tile(gt); 436 437 if (reg.addr < gt->mmio.adj_limit) 438 reg.addr += gt->mmio.adj_offset; 439 440 return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); 441 } 442 443 void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) 444 { 445 struct xe_tile *tile = gt_to_tile(gt); 446 447 if (reg.addr < gt->mmio.adj_limit) 448 reg.addr += gt->mmio.adj_offset; 449 450 writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); 451 } 452 453 u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) 454 { 455 struct xe_tile *tile = gt_to_tile(gt); 456 457 if (reg.addr < gt->mmio.adj_limit) 458 reg.addr += gt->mmio.adj_offset; 459 460 return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); 461 } 462 463 u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) 464 { 465 u32 old, reg_val; 466 467 old = xe_mmio_read32(gt, reg); 468 reg_val = (old & ~clr) | set; 469 xe_mmio_write32(gt, reg, reg_val); 470 471 return old; 472 } 473 474 int xe_mmio_write32_and_verify(struct xe_gt *gt, 475 struct xe_reg reg, u32 val, u32 mask, u32 eval) 476 { 477 u32 reg_val; 478 479 xe_mmio_write32(gt, reg, val); 480 reg_val = xe_mmio_read32(gt, reg); 481 482 return (reg_val & mask) != eval ? -EINVAL : 0; 483 } 484 485 bool xe_mmio_in_range(const struct xe_gt *gt, 486 const struct xe_mmio_range *range, 487 struct xe_reg reg) 488 { 489 if (reg.addr < gt->mmio.adj_limit) 490 reg.addr += gt->mmio.adj_offset; 491 492 return range && reg.addr >= range->start && reg.addr <= range->end; 493 } 494 495 /** 496 * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads 497 * @gt: MMIO target GT 498 * @reg: register to read value from 499 * 500 * Although Intel GPUs have some 64-bit registers, the hardware officially 501 * only supports GTTMMADR register reads of 32 bits or smaller. Even if 502 * a readq operation may return a reasonable value, that violation of the 503 * spec shouldn't be relied upon and all 64-bit register reads should be 504 * performed as two 32-bit reads of the upper and lower dwords. 505 * 506 * When reading registers that may be changing (such as 507 * counters), a rollover of the lower dword between the two 32-bit reads 508 * can be problematic. This function attempts to ensure the upper dword has 509 * stabilized before returning the 64-bit value. 510 * 511 * Note that because this function may re-read the register multiple times 512 * while waiting for the value to stabilize it should not be used to read 513 * any registers where read operations have side effects. 514 * 515 * Returns the value of the 64-bit register. 516 */ 517 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) 518 { 519 struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; 520 u32 ldw, udw, oldudw, retries; 521 522 if (reg.addr < gt->mmio.adj_limit) { 523 reg.addr += gt->mmio.adj_offset; 524 reg_udw.addr += gt->mmio.adj_offset; 525 } 526 527 oldudw = xe_mmio_read32(gt, reg_udw); 528 for (retries = 5; retries; --retries) { 529 ldw = xe_mmio_read32(gt, reg); 530 udw = xe_mmio_read32(gt, reg_udw); 531 532 if (udw == oldudw) 533 break; 534 535 oldudw = udw; 536 } 537 538 xe_gt_WARN(gt, retries == 0, 539 "64-bit read of %#x did not stabilize\n", reg.addr); 540 541 return (u64)udw << 32 | ldw; 542 } 543 544 /** 545 * xe_mmio_wait32() - Wait for a register to match the desired masked value 546 * @gt: MMIO target GT 547 * @reg: register to read value from 548 * @mask: mask to be applied to the value read from the register 549 * @val: desired value after applying the mask 550 * @timeout_us: time out after this period of time. Wait logic tries to be 551 * smart, applying an exponential backoff until @timeout_us is reached. 552 * @out_val: if not NULL, points where to store the last unmasked value 553 * @atomic: needs to be true if calling from an atomic context 554 * 555 * This function polls for the desired masked value and returns zero on success 556 * or -ETIMEDOUT if timed out. 557 * 558 * Note that @timeout_us represents the minimum amount of time to wait before 559 * giving up. The actual time taken by this function can be a little more than 560 * @timeout_us for different reasons, specially in non-atomic contexts. Thus, 561 * it is possible that this function succeeds even after @timeout_us has passed. 562 */ 563 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, 564 u32 *out_val, bool atomic) 565 { 566 ktime_t cur = ktime_get_raw(); 567 const ktime_t end = ktime_add_us(cur, timeout_us); 568 int ret = -ETIMEDOUT; 569 s64 wait = 10; 570 u32 read; 571 572 for (;;) { 573 read = xe_mmio_read32(gt, reg); 574 if ((read & mask) == val) { 575 ret = 0; 576 break; 577 } 578 579 cur = ktime_get_raw(); 580 if (!ktime_before(cur, end)) 581 break; 582 583 if (ktime_after(ktime_add_us(cur, wait), end)) 584 wait = ktime_us_delta(end, cur); 585 586 if (atomic) 587 udelay(wait); 588 else 589 usleep_range(wait, wait << 1); 590 wait <<= 1; 591 } 592 593 if (ret != 0) { 594 read = xe_mmio_read32(gt, reg); 595 if ((read & mask) == val) 596 ret = 0; 597 } 598 599 if (out_val) 600 *out_val = read; 601 602 return ret; 603 } 604