1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021-2023 Intel Corporation 4 */ 5 6 #include <linux/minmax.h> 7 8 #include "xe_mmio.h" 9 10 #include <drm/drm_managed.h> 11 #include <drm/xe_drm.h> 12 13 #include "regs/xe_engine_regs.h" 14 #include "regs/xe_gt_regs.h" 15 #include "regs/xe_regs.h" 16 #include "xe_bo.h" 17 #include "xe_device.h" 18 #include "xe_gt.h" 19 #include "xe_gt_mcr.h" 20 #include "xe_macros.h" 21 #include "xe_module.h" 22 23 #define XEHP_MTCFG_ADDR XE_REG(0x101800) 24 #define TILE_COUNT REG_GENMASK(15, 8) 25 26 #define BAR_SIZE_SHIFT 20 27 28 static int xe_set_dma_info(struct xe_device *xe) 29 { 30 unsigned int mask_size = xe->info.dma_mask_size; 31 int err; 32 33 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev)); 34 35 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 36 if (err) 37 goto mask_err; 38 39 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size)); 40 if (err) 41 goto mask_err; 42 43 return 0; 44 45 mask_err: 46 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err); 47 return err; 48 } 49 50 static void 51 _resize_bar(struct xe_device *xe, int resno, resource_size_t size) 52 { 53 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 54 int bar_size = pci_rebar_bytes_to_size(size); 55 int ret; 56 57 if (pci_resource_len(pdev, resno)) 58 pci_release_resource(pdev, resno); 59 60 ret = pci_resize_resource(pdev, resno, bar_size); 61 if (ret) { 62 drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n", 63 resno, 1 << bar_size, ERR_PTR(ret)); 64 return; 65 } 66 67 drm_info(&xe->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); 68 } 69 70 /* 71 * if force_vram_bar_size is set, attempt to set to the requested size 72 * else set to maximum possible size 73 */ 74 static void xe_resize_vram_bar(struct xe_device *xe) 75 { 76 u64 force_vram_bar_size = xe_modparam.force_vram_bar_size; 77 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 78 struct pci_bus *root = pdev->bus; 79 resource_size_t current_size; 80 resource_size_t rebar_size; 81 struct resource *root_res; 82 u32 bar_size_mask; 83 u32 pci_cmd; 84 int i; 85 86 /* gather some relevant info */ 87 current_size = pci_resource_len(pdev, LMEM_BAR); 88 bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR); 89 90 if (!bar_size_mask) 91 return; 92 93 /* set to a specific size? */ 94 if (force_vram_bar_size) { 95 u32 bar_size_bit; 96 97 rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M; 98 99 bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size)); 100 101 if (!bar_size_bit) { 102 drm_info(&xe->drm, 103 "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n", 104 (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20); 105 return; 106 } 107 108 rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT); 109 110 if (rebar_size == current_size) 111 return; 112 } else { 113 rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT); 114 115 /* only resize if larger than current */ 116 if (rebar_size <= current_size) 117 return; 118 } 119 120 drm_info(&xe->drm, "Attempting to resize bar from %lluMiB -> %lluMiB\n", 121 (u64)current_size >> 20, (u64)rebar_size >> 20); 122 123 while (root->parent) 124 root = root->parent; 125 126 pci_bus_for_each_resource(root, root_res, i) { 127 if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && 128 root_res->start > 0x100000000ull) 129 break; 130 } 131 132 if (!root_res) { 133 drm_info(&xe->drm, "Can't resize VRAM BAR - platform support is missing. Consider enabling 'Resizable BAR' support in your BIOS\n"); 134 return; 135 } 136 137 pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); 138 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY); 139 140 _resize_bar(xe, LMEM_BAR, rebar_size); 141 142 pci_assign_unassigned_bus_resources(pdev->bus); 143 pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); 144 } 145 146 static bool xe_pci_resource_valid(struct pci_dev *pdev, int bar) 147 { 148 if (!pci_resource_flags(pdev, bar)) 149 return false; 150 151 if (pci_resource_flags(pdev, bar) & IORESOURCE_UNSET) 152 return false; 153 154 if (!pci_resource_len(pdev, bar)) 155 return false; 156 157 return true; 158 } 159 160 static int xe_determine_lmem_bar_size(struct xe_device *xe) 161 { 162 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 163 164 if (!xe_pci_resource_valid(pdev, LMEM_BAR)) { 165 drm_err(&xe->drm, "pci resource is not valid\n"); 166 return -ENXIO; 167 } 168 169 xe_resize_vram_bar(xe); 170 171 xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR); 172 xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR); 173 if (!xe->mem.vram.io_size) 174 return -EIO; 175 176 /* XXX: Need to change when xe link code is ready */ 177 xe->mem.vram.dpa_base = 0; 178 179 /* set up a map to the total memory area. */ 180 xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size); 181 182 return 0; 183 } 184 185 /** 186 * xe_mmio_tile_vram_size() - Collect vram size and offset information 187 * @tile: tile to get info for 188 * @vram_size: available vram (size - device reserved portions) 189 * @tile_size: actual vram size 190 * @tile_offset: physical start point in the vram address space 191 * 192 * There are 4 places for size information: 193 * - io size (from pci_resource_len of LMEM bar) (only used for small bar and DG1) 194 * - TILEx size (actual vram size) 195 * - GSMBASE offset (TILEx - "stolen") 196 * - CSSBASE offset (TILEx - CSS space necessary) 197 * 198 * CSSBASE is always a lower/smaller offset then GSMBASE. 199 * 200 * The actual available size of memory is to the CCS or GSM base. 201 * NOTE: multi-tile bases will include the tile offset. 202 * 203 */ 204 static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, 205 u64 *tile_size, u64 *tile_offset) 206 { 207 struct xe_device *xe = tile_to_xe(tile); 208 struct xe_gt *gt = tile->primary_gt; 209 u64 offset; 210 int err; 211 u32 reg; 212 213 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 214 if (err) 215 return err; 216 217 /* actual size */ 218 if (unlikely(xe->info.platform == XE_DG1)) { 219 *tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR); 220 *tile_offset = 0; 221 } else { 222 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id)); 223 *tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G; 224 *tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G; 225 } 226 227 /* minus device usage */ 228 if (xe->info.has_flat_ccs) { 229 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); 230 offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; 231 } else { 232 offset = xe_mmio_read64_2x32(gt, GSMBASE); 233 } 234 235 /* remove the tile offset so we have just the available size */ 236 *vram_size = offset - *tile_offset; 237 238 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 239 } 240 241 int xe_mmio_probe_vram(struct xe_device *xe) 242 { 243 struct xe_tile *tile; 244 resource_size_t io_size; 245 u64 available_size = 0; 246 u64 total_size = 0; 247 u64 tile_offset; 248 u64 tile_size; 249 u64 vram_size; 250 int err; 251 u8 id; 252 253 if (!IS_DGFX(xe)) 254 return 0; 255 256 /* Get the size of the root tile's vram for later accessibility comparison */ 257 tile = xe_device_get_root_tile(xe); 258 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 259 if (err) 260 return err; 261 262 err = xe_determine_lmem_bar_size(xe); 263 if (err) 264 return err; 265 266 drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 267 &xe->mem.vram.io_size); 268 269 io_size = xe->mem.vram.io_size; 270 271 /* tile specific ranges */ 272 for_each_tile(tile, xe, id) { 273 err = xe_mmio_tile_vram_size(tile, &vram_size, &tile_size, &tile_offset); 274 if (err) 275 return err; 276 277 tile->mem.vram.actual_physical_size = tile_size; 278 tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset; 279 tile->mem.vram.io_size = min_t(u64, vram_size, io_size); 280 281 if (!tile->mem.vram.io_size) { 282 drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n"); 283 return -ENODEV; 284 } 285 286 tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset; 287 tile->mem.vram.usable_size = vram_size; 288 tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset; 289 290 if (tile->mem.vram.io_size < tile->mem.vram.usable_size) 291 drm_info(&xe->drm, "Small BAR device\n"); 292 drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id, 293 tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size); 294 drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id, 295 &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + tile->mem.vram.actual_physical_size, 296 &tile->mem.vram.io_start, tile->mem.vram.io_start + tile->mem.vram.io_size); 297 298 /* calculate total size using tile size to get the correct HW sizing */ 299 total_size += tile_size; 300 available_size += vram_size; 301 302 if (total_size > xe->mem.vram.io_size) { 303 drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n", 304 &total_size, &xe->mem.vram.io_size); 305 } 306 307 io_size -= min_t(u64, tile_size, io_size); 308 } 309 310 xe->mem.vram.actual_physical_size = total_size; 311 312 drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 313 &xe->mem.vram.actual_physical_size); 314 drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start, 315 &available_size); 316 317 return 0; 318 } 319 320 static void xe_mmio_probe_tiles(struct xe_device *xe) 321 { 322 size_t tile_mmio_size = SZ_16M, tile_mmio_ext_size = xe->info.tile_mmio_ext_size; 323 u8 id, tile_count = xe->info.tile_count; 324 struct xe_gt *gt = xe_root_mmio_gt(xe); 325 const int mmio_bar = 0; 326 struct xe_tile *tile; 327 void *regs; 328 u32 mtcfg; 329 330 if (tile_count == 1) 331 goto add_mmio_ext; 332 333 if (!xe->info.bypass_mtcfg) { 334 mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR); 335 tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1; 336 if (tile_count < xe->info.tile_count) { 337 drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n", 338 xe->info.tile_count, tile_count); 339 pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 340 xe->mmio.size = (tile_mmio_size + tile_mmio_ext_size) * tile_count; 341 xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size); 342 xe->info.tile_count = tile_count; 343 344 /* 345 * FIXME: Needs some work for standalone media, but should be impossible 346 * with multi-tile for now. 347 */ 348 xe->info.gt_count = xe->info.tile_count; 349 } 350 } 351 352 regs = xe->mmio.regs; 353 for_each_tile(tile, xe, id) { 354 tile->mmio.size = tile_mmio_size; 355 tile->mmio.regs = regs; 356 regs += tile_mmio_size; 357 } 358 359 add_mmio_ext: 360 /* By design, there's a contiguous multi-tile MMIO space (16MB hard coded per tile). 361 * When supported, there could be an additional contiguous multi-tile MMIO extension 362 * space ON TOP of it, and hence the necessity for distinguished MMIO spaces. 363 */ 364 if (xe->info.supports_mmio_ext) { 365 regs = xe->mmio.regs + tile_mmio_size * tile_count; 366 367 for_each_tile(tile, xe, id) { 368 tile->mmio_ext.size = tile_mmio_ext_size; 369 tile->mmio_ext.regs = regs; 370 371 regs += tile_mmio_ext_size; 372 } 373 } 374 } 375 376 static void mmio_fini(struct drm_device *drm, void *arg) 377 { 378 struct xe_device *xe = arg; 379 380 pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); 381 if (xe->mem.vram.mapping) 382 iounmap(xe->mem.vram.mapping); 383 } 384 385 static int xe_verify_lmem_ready(struct xe_device *xe) 386 { 387 struct xe_gt *gt = xe_root_mmio_gt(xe); 388 389 /* 390 * The boot firmware initializes local memory and assesses its health. 391 * If memory training fails, the punit will have been instructed to 392 * keep the GT powered down; we won't be able to communicate with it 393 * and we should not continue with driver initialization. 394 */ 395 if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { 396 drm_err(&xe->drm, "VRAM not initialized by firmware\n"); 397 return -ENODEV; 398 } 399 400 return 0; 401 } 402 403 int xe_mmio_init(struct xe_device *xe) 404 { 405 struct xe_tile *root_tile = xe_device_get_root_tile(xe); 406 const int mmio_bar = 0; 407 int err; 408 409 /* 410 * Map the maximum expected BAR size, which will get remapped later 411 * if we determine that we're running on a reduced-tile system. 412 * The first 16MB of the BAR, belong to the root tile, and include: 413 * registers (0-4MB), reserved space (4MB-8MB) and GGTT (8MB-16MB). 414 */ 415 xe->mmio.size = (SZ_16M + xe->info.tile_mmio_ext_size) * xe->info.tile_count; 416 xe->mmio.regs = pci_iomap(to_pci_dev(xe->drm.dev), mmio_bar, xe->mmio.size); 417 if (xe->mmio.regs == NULL) { 418 drm_err(&xe->drm, "failed to map registers\n"); 419 return -EIO; 420 } 421 422 err = drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); 423 if (err) 424 return err; 425 426 /* Setup first tile; other tiles (if present) will be setup later. */ 427 root_tile->mmio.size = xe->mmio.size; 428 root_tile->mmio.regs = xe->mmio.regs; 429 430 err = xe_verify_lmem_ready(xe); 431 if (err) 432 return err; 433 434 err = xe_set_dma_info(xe); 435 if (err) 436 return err; 437 438 xe_mmio_probe_tiles(xe); 439 440 return 0; 441 } 442 443 /** 444 * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads 445 * @gt: MMIO target GT 446 * @reg: register to read value from 447 * 448 * Although Intel GPUs have some 64-bit registers, the hardware officially 449 * only supports GTTMMADR register reads of 32 bits or smaller. Even if 450 * a readq operation may return a reasonable value, that violation of the 451 * spec shouldn't be relied upon and all 64-bit register reads should be 452 * performed as two 32-bit reads of the upper and lower dwords. 453 * 454 * When reading registers that may be changing (such as 455 * counters), a rollover of the lower dword between the two 32-bit reads 456 * can be problematic. This function attempts to ensure the upper dword has 457 * stabilized before returning the 64-bit value. 458 * 459 * Note that because this function may re-read the register multiple times 460 * while waiting for the value to stabilize it should not be used to read 461 * any registers where read operations have side effects. 462 * 463 * Returns the value of the 64-bit register. 464 */ 465 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg) 466 { 467 struct xe_reg reg_udw = { .addr = reg.addr + 0x4 }; 468 u32 ldw, udw, oldudw, retries; 469 470 if (reg.addr < gt->mmio.adj_limit) { 471 reg.addr += gt->mmio.adj_offset; 472 reg_udw.addr += gt->mmio.adj_offset; 473 } 474 475 oldudw = xe_mmio_read32(gt, reg_udw); 476 for (retries = 5; retries; --retries) { 477 ldw = xe_mmio_read32(gt, reg); 478 udw = xe_mmio_read32(gt, reg_udw); 479 480 if (udw == oldudw) 481 break; 482 483 oldudw = udw; 484 } 485 486 xe_gt_WARN(gt, retries == 0, 487 "64-bit read of %#x did not stabilize\n", reg.addr); 488 489 return (u64)udw << 32 | ldw; 490 } 491 492 /** 493 * xe_mmio_wait32() - Wait for a register to match the desired masked value 494 * @gt: MMIO target GT 495 * @reg: register to read value from 496 * @mask: mask to be applied to the value read from the register 497 * @val: desired value after applying the mask 498 * @timeout_us: time out after this period of time. Wait logic tries to be 499 * smart, applying an exponential backoff until @timeout_us is reached. 500 * @out_val: if not NULL, points where to store the last unmasked value 501 * @atomic: needs to be true if calling from an atomic context 502 * 503 * This function polls for the desired masked value and returns zero on success 504 * or -ETIMEDOUT if timed out. 505 * 506 * Note that @timeout_us represents the minimum amount of time to wait before 507 * giving up. The actual time taken by this function can be a little more than 508 * @timeout_us for different reasons, specially in non-atomic contexts. Thus, 509 * it is possible that this function succeeds even after @timeout_us has passed. 510 */ 511 int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us, 512 u32 *out_val, bool atomic) 513 { 514 ktime_t cur = ktime_get_raw(); 515 const ktime_t end = ktime_add_us(cur, timeout_us); 516 int ret = -ETIMEDOUT; 517 s64 wait = 10; 518 u32 read; 519 520 for (;;) { 521 read = xe_mmio_read32(gt, reg); 522 if ((read & mask) == val) { 523 ret = 0; 524 break; 525 } 526 527 cur = ktime_get_raw(); 528 if (!ktime_before(cur, end)) 529 break; 530 531 if (ktime_after(ktime_add_us(cur, wait), end)) 532 wait = ktime_us_delta(end, cur); 533 534 if (atomic) 535 udelay(wait); 536 else 537 usleep_range(wait, wait << 1); 538 wait <<= 1; 539 } 540 541 if (ret != 0) { 542 read = xe_mmio_read32(gt, reg); 543 if ((read & mask) == val) 544 ret = 0; 545 } 546 547 if (out_val) 548 *out_val = read; 549 550 return ret; 551 } 552