1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_ggtt.h" 7 8 #include <kunit/visibility.h> 9 #include <linux/fault-inject.h> 10 #include <linux/io-64-nonatomic-lo-hi.h> 11 #include <linux/sizes.h> 12 13 #include <drm/drm_drv.h> 14 #include <drm/drm_managed.h> 15 #include <drm/intel/i915_drm.h> 16 #include <generated/xe_wa_oob.h> 17 18 #include "regs/xe_gt_regs.h" 19 #include "regs/xe_gtt_defs.h" 20 #include "regs/xe_regs.h" 21 #include "xe_assert.h" 22 #include "xe_bo.h" 23 #include "xe_gt_printk.h" 24 #include "xe_gt_types.h" 25 #include "xe_map.h" 26 #include "xe_mmio.h" 27 #include "xe_pm.h" 28 #include "xe_res_cursor.h" 29 #include "xe_sriov.h" 30 #include "xe_tile_printk.h" 31 #include "xe_tile_sriov_vf.h" 32 #include "xe_tlb_inval.h" 33 #include "xe_wa.h" 34 #include "xe_wopcm.h" 35 36 /** 37 * DOC: Global Graphics Translation Table (GGTT) 38 * 39 * Xe GGTT implements the support for a Global Virtual Address space that is used 40 * for resources that are accessible to privileged (i.e. kernel-mode) processes, 41 * and not tied to a specific user-level process. For example, the Graphics 42 * micro-Controller (GuC) and Display Engine (if present) utilize this Global 43 * address space. 44 * 45 * The Global GTT (GGTT) translates from the Global virtual address to a physical 46 * address that can be accessed by HW. The GGTT is a flat, single-level table. 47 * 48 * Xe implements a simplified version of the GGTT specifically managing only a 49 * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) 50 * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to 51 * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space 52 * is limited on both ends of the GGTT, because the GuC shim HW redirects 53 * accesses to those addresses to other HW areas instead of going through the 54 * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, 55 * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things 56 * simple, instead of checking each object to see if they are accessed by GuC or 57 * not, we just exclude those areas from the allocator. Additionally, to simplify 58 * the driver load, we use the maximum WOPCM size in this logic instead of the 59 * programmed one, so we don't need to wait until the actual size to be 60 * programmed is determined (which requires FW fetch) before initializing the 61 * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs 62 * depending on the platform) but we can live with this. Another benefit of this 63 * is the GuC bootrom can't access anything below the WOPCM max size so anything 64 * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT 65 * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max 66 * give us the correct placement for free. 67 */ 68 69 /** 70 * struct xe_ggtt_node - A node in GGTT. 71 * 72 * This struct is allocated with xe_ggtt_insert_node(,_transform) or xe_ggtt_insert_bo(,_at). 73 * It will be deallocated using xe_ggtt_node_remove(). 74 */ 75 struct xe_ggtt_node { 76 /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ 77 struct xe_ggtt *ggtt; 78 /** @base: A drm_mm_node */ 79 struct drm_mm_node base; 80 /** @delayed_removal_work: The work struct for the delayed removal */ 81 struct work_struct delayed_removal_work; 82 /** @invalidate_on_remove: If it needs invalidation upon removal */ 83 bool invalidate_on_remove; 84 }; 85 86 /** 87 * struct xe_ggtt_pt_ops - GGTT Page table operations 88 * Which can vary from platform to platform. 89 */ 90 struct xe_ggtt_pt_ops { 91 /** @pte_encode_flags: Encode PTE flags for a given BO */ 92 u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); 93 94 /** @ggtt_set_pte: Directly write into GGTT's PTE */ 95 xe_ggtt_set_pte_fn ggtt_set_pte; 96 97 /** @ggtt_get_pte: Directly read from GGTT's PTE */ 98 u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); 99 }; 100 101 /** 102 * struct xe_ggtt - Main GGTT struct 103 * 104 * In general, each tile can contains its own Global Graphics Translation Table 105 * (GGTT) instance. 106 */ 107 struct xe_ggtt { 108 /** @tile: Back pointer to tile where this GGTT belongs */ 109 struct xe_tile *tile; 110 /** @start: Start offset of GGTT */ 111 u64 start; 112 /** @size: Total usable size of this GGTT */ 113 u64 size; 114 115 #define XE_GGTT_FLAGS_64K BIT(0) 116 /** 117 * @flags: Flags for this GGTT 118 * Acceptable flags: 119 * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. 120 */ 121 unsigned int flags; 122 /** @scratch: Internal object allocation used as a scratch page */ 123 struct xe_bo *scratch; 124 /** @lock: Mutex lock to protect GGTT data */ 125 struct mutex lock; 126 /** 127 * @gsm: The iomem pointer to the actual location of the translation 128 * table located in the GSM for easy PTE manipulation 129 */ 130 u64 __iomem *gsm; 131 /** @pt_ops: Page Table operations per platform */ 132 const struct xe_ggtt_pt_ops *pt_ops; 133 /** @mm: The memory manager used to manage individual GGTT allocations */ 134 struct drm_mm mm; 135 /** @access_count: counts GGTT writes */ 136 unsigned int access_count; 137 /** @wq: Dedicated unordered work queue to process node removals */ 138 struct workqueue_struct *wq; 139 }; 140 141 static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 142 { 143 u64 pte = XE_PAGE_PRESENT; 144 145 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 146 pte |= XE_GGTT_PTE_DM; 147 148 return pte; 149 } 150 151 static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 152 { 153 struct xe_device *xe = xe_bo_device(bo); 154 u64 pte; 155 156 pte = xelp_ggtt_pte_flags(bo, pat_index); 157 158 xe_assert(xe, pat_index <= 3); 159 160 if (pat_index & BIT(0)) 161 pte |= XELPG_GGTT_PTE_PAT0; 162 163 if (pat_index & BIT(1)) 164 pte |= XELPG_GGTT_PTE_PAT1; 165 166 return pte; 167 } 168 169 static unsigned int probe_gsm_size(struct pci_dev *pdev) 170 { 171 u16 gmch_ctl, ggms; 172 173 pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); 174 ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; 175 return ggms ? SZ_1M << ggms : 0; 176 } 177 178 static void ggtt_update_access_counter(struct xe_ggtt *ggtt) 179 { 180 struct xe_tile *tile = ggtt->tile; 181 struct xe_gt *affected_gt; 182 u32 max_gtt_writes; 183 184 if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) { 185 affected_gt = tile->primary_gt; 186 max_gtt_writes = 1100; 187 188 /* Only expected to apply to primary GT on dgpu platforms */ 189 xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile))); 190 } else { 191 affected_gt = tile->media_gt; 192 max_gtt_writes = 63; 193 194 /* Only expected to apply to media GT on igpu platforms */ 195 xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile))); 196 } 197 198 /* 199 * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit 200 * to wait for completion of prior GTT writes before letting this through. 201 * This needs to be done for all GGTT writes originating from the CPU. 202 */ 203 lockdep_assert_held(&ggtt->lock); 204 205 if ((++ggtt->access_count % max_gtt_writes) == 0) { 206 xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0); 207 ggtt->access_count = 0; 208 } 209 } 210 211 /** 212 * xe_ggtt_start - Get starting offset of GGTT. 213 * @ggtt: &xe_ggtt 214 * 215 * Returns: Starting offset for this &xe_ggtt. 216 */ 217 u64 xe_ggtt_start(struct xe_ggtt *ggtt) 218 { 219 return ggtt->start; 220 } 221 222 /** 223 * xe_ggtt_size - Get size of GGTT. 224 * @ggtt: &xe_ggtt 225 * 226 * Returns: Total usable size of this &xe_ggtt. 227 */ 228 u64 xe_ggtt_size(struct xe_ggtt *ggtt) 229 { 230 return ggtt->size; 231 } 232 233 static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 234 { 235 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 236 xe_tile_assert(ggtt->tile, addr < ggtt->start + ggtt->size); 237 238 writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); 239 } 240 241 static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) 242 { 243 xe_ggtt_set_pte(ggtt, addr, pte); 244 ggtt_update_access_counter(ggtt); 245 } 246 247 static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) 248 { 249 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 250 xe_tile_assert(ggtt->tile, addr < ggtt->start + ggtt->size); 251 252 return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); 253 } 254 255 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 256 { 257 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 258 u64 end = start + size - 1; 259 u64 scratch_pte; 260 261 xe_tile_assert(ggtt->tile, start < end); 262 263 if (ggtt->scratch) 264 scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | 265 ggtt->pt_ops->pte_encode_flags(ggtt->scratch, 266 pat_index); 267 else 268 scratch_pte = 0; 269 270 while (start < end) { 271 ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); 272 start += XE_PAGE_SIZE; 273 } 274 } 275 276 static void primelockdep(struct xe_ggtt *ggtt) 277 { 278 if (!IS_ENABLED(CONFIG_LOCKDEP)) 279 return; 280 281 fs_reclaim_acquire(GFP_KERNEL); 282 might_lock(&ggtt->lock); 283 fs_reclaim_release(GFP_KERNEL); 284 } 285 286 /** 287 * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile 288 * @tile: &xe_tile 289 * 290 * Allocates a &xe_ggtt for a given tile. 291 * 292 * Return: &xe_ggtt on success, or NULL when out of memory. 293 */ 294 struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) 295 { 296 struct xe_device *xe = tile_to_xe(tile); 297 struct xe_ggtt *ggtt; 298 299 ggtt = drmm_kzalloc(&xe->drm, sizeof(*ggtt), GFP_KERNEL); 300 if (!ggtt) 301 return NULL; 302 303 if (drmm_mutex_init(&xe->drm, &ggtt->lock)) 304 return NULL; 305 306 primelockdep(ggtt); 307 ggtt->tile = tile; 308 309 return ggtt; 310 } 311 312 static void ggtt_fini_early(struct drm_device *drm, void *arg) 313 { 314 struct xe_ggtt *ggtt = arg; 315 316 destroy_workqueue(ggtt->wq); 317 drm_mm_takedown(&ggtt->mm); 318 } 319 320 static void ggtt_fini(void *arg) 321 { 322 struct xe_ggtt *ggtt = arg; 323 324 ggtt->scratch = NULL; 325 } 326 327 #ifdef CONFIG_LOCKDEP 328 void xe_ggtt_might_lock(struct xe_ggtt *ggtt) 329 { 330 might_lock(&ggtt->lock); 331 } 332 #endif 333 334 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 335 .pte_encode_flags = xelp_ggtt_pte_flags, 336 .ggtt_set_pte = xe_ggtt_set_pte, 337 .ggtt_get_pte = xe_ggtt_get_pte, 338 }; 339 340 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 341 .pte_encode_flags = xelpg_ggtt_pte_flags, 342 .ggtt_set_pte = xe_ggtt_set_pte, 343 .ggtt_get_pte = xe_ggtt_get_pte, 344 }; 345 346 static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 347 .pte_encode_flags = xelpg_ggtt_pte_flags, 348 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 349 .ggtt_get_pte = xe_ggtt_get_pte, 350 }; 351 352 static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u64 start, u64 size) 353 { 354 ggtt->start = start; 355 ggtt->size = size; 356 drm_mm_init(&ggtt->mm, 0, size); 357 } 358 359 int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 start, u32 size) 360 { 361 __xe_ggtt_init_early(ggtt, start, size); 362 return 0; 363 } 364 EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); 365 366 static void dev_fini_ggtt(void *arg) 367 { 368 struct xe_ggtt *ggtt = arg; 369 370 drain_workqueue(ggtt->wq); 371 } 372 373 /** 374 * xe_ggtt_init_early - Early GGTT initialization 375 * @ggtt: the &xe_ggtt to be initialized 376 * 377 * It allows to create new mappings usable by the GuC. 378 * Mappings are not usable by the HW engines, as it doesn't have scratch nor 379 * initial clear done to it yet. That will happen in the regular, non-early 380 * GGTT initialization. 381 * 382 * Return: 0 on success or a negative error code on failure. 383 */ 384 int xe_ggtt_init_early(struct xe_ggtt *ggtt) 385 { 386 struct xe_device *xe = tile_to_xe(ggtt->tile); 387 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 388 unsigned int gsm_size; 389 u64 ggtt_start, wopcm = xe_wopcm_size(xe), ggtt_size; 390 int err; 391 392 if (!IS_SRIOV_VF(xe)) { 393 if (GRAPHICS_VERx100(xe) >= 1250) 394 gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ 395 else 396 gsm_size = probe_gsm_size(pdev); 397 if (gsm_size == 0) { 398 xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); 399 return -ENOMEM; 400 } 401 ggtt_start = wopcm; 402 ggtt_size = (gsm_size / 8) * (u64)XE_PAGE_SIZE - ggtt_start; 403 } else { 404 ggtt_start = xe_tile_sriov_vf_ggtt_base(ggtt->tile); 405 ggtt_size = xe_tile_sriov_vf_ggtt(ggtt->tile); 406 407 if (ggtt_start < wopcm || 408 ggtt_start + ggtt_size > GUC_GGTT_TOP) { 409 xe_tile_err(ggtt->tile, "Invalid GGTT configuration: %#llx-%#llx\n", 410 ggtt_start, ggtt_start + ggtt_size - 1); 411 return -ERANGE; 412 } 413 } 414 415 ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; 416 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 417 ggtt->flags |= XE_GGTT_FLAGS_64K; 418 419 if (ggtt_size + ggtt_start > GUC_GGTT_TOP) 420 ggtt_size = GUC_GGTT_TOP - ggtt_start; 421 422 if (GRAPHICS_VERx100(xe) >= 1270) 423 ggtt->pt_ops = 424 (ggtt->tile->media_gt && XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || 425 (ggtt->tile->primary_gt && XE_GT_WA(ggtt->tile->primary_gt, 22019338487)) ? 426 &xelpg_pt_wa_ops : &xelpg_pt_ops; 427 else 428 ggtt->pt_ops = &xelp_pt_ops; 429 430 ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); 431 if (!ggtt->wq) 432 return -ENOMEM; 433 434 __xe_ggtt_init_early(ggtt, ggtt_start, ggtt_size); 435 436 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 437 if (err) 438 return err; 439 440 return devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); 441 } 442 ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ 443 444 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); 445 446 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) 447 { 448 struct drm_mm_node *hole; 449 u64 start, end; 450 451 /* Display may have allocated inside ggtt, so be careful with clearing here */ 452 mutex_lock(&ggtt->lock); 453 drm_mm_for_each_hole(hole, &ggtt->mm, start, end) 454 xe_ggtt_clear(ggtt, ggtt->start + start, end - start); 455 456 xe_ggtt_invalidate(ggtt); 457 mutex_unlock(&ggtt->lock); 458 } 459 460 static void ggtt_node_fini(struct xe_ggtt_node *node) 461 { 462 kfree(node); 463 } 464 465 static void ggtt_node_remove(struct xe_ggtt_node *node) 466 { 467 struct xe_ggtt *ggtt = node->ggtt; 468 struct xe_device *xe = tile_to_xe(ggtt->tile); 469 bool bound; 470 int idx; 471 472 bound = drm_dev_enter(&xe->drm, &idx); 473 474 mutex_lock(&ggtt->lock); 475 if (bound) 476 xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); 477 drm_mm_remove_node(&node->base); 478 node->base.size = 0; 479 mutex_unlock(&ggtt->lock); 480 481 if (!bound) 482 goto free_node; 483 484 if (node->invalidate_on_remove) 485 xe_ggtt_invalidate(ggtt); 486 487 drm_dev_exit(idx); 488 489 free_node: 490 ggtt_node_fini(node); 491 } 492 493 static void ggtt_node_remove_work_func(struct work_struct *work) 494 { 495 struct xe_ggtt_node *node = container_of(work, typeof(*node), 496 delayed_removal_work); 497 struct xe_device *xe = tile_to_xe(node->ggtt->tile); 498 499 guard(xe_pm_runtime)(xe); 500 ggtt_node_remove(node); 501 } 502 503 /** 504 * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 505 * @node: the &xe_ggtt_node to be removed 506 * @invalidate: if node needs invalidation upon removal 507 */ 508 void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 509 { 510 struct xe_ggtt *ggtt; 511 struct xe_device *xe; 512 513 if (!node || !node->ggtt) 514 return; 515 516 ggtt = node->ggtt; 517 xe = tile_to_xe(ggtt->tile); 518 519 node->invalidate_on_remove = invalidate; 520 521 if (xe_pm_runtime_get_if_active(xe)) { 522 ggtt_node_remove(node); 523 xe_pm_runtime_put(xe); 524 } else { 525 queue_work(ggtt->wq, &node->delayed_removal_work); 526 } 527 } 528 529 /** 530 * xe_ggtt_init - Regular non-early GGTT initialization 531 * @ggtt: the &xe_ggtt to be initialized 532 * 533 * Return: 0 on success or a negative error code on failure. 534 */ 535 int xe_ggtt_init(struct xe_ggtt *ggtt) 536 { 537 struct xe_device *xe = tile_to_xe(ggtt->tile); 538 unsigned int flags; 539 int err; 540 541 /* 542 * So we don't need to worry about 64K GGTT layout when dealing with 543 * scratch entries, rather keep the scratch page in system memory on 544 * platforms where 64K pages are needed for VRAM. 545 */ 546 flags = 0; 547 if (ggtt->flags & XE_GGTT_FLAGS_64K) 548 flags |= XE_BO_FLAG_SYSTEM; 549 else 550 flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile); 551 552 ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); 553 if (IS_ERR(ggtt->scratch)) { 554 err = PTR_ERR(ggtt->scratch); 555 goto err; 556 } 557 558 xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); 559 560 xe_ggtt_initial_clear(ggtt); 561 562 return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); 563 err: 564 ggtt->scratch = NULL; 565 return err; 566 } 567 568 static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) 569 { 570 int err; 571 572 if (!gt) 573 return; 574 575 err = xe_tlb_inval_ggtt(>->tlb_inval); 576 xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); 577 } 578 579 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) 580 { 581 struct xe_device *xe = tile_to_xe(ggtt->tile); 582 583 /* 584 * XXX: Barrier for GGTT pages. Unsure exactly why this required but 585 * without this LNL is having issues with the GuC reading scratch page 586 * vs. correct GGTT page. Not particularly a hot code path so blindly 587 * do a mmio read here which results in GuC reading correct GGTT page. 588 */ 589 xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); 590 591 /* Each GT in a tile has its own TLB to cache GGTT lookups */ 592 ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); 593 ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); 594 } 595 596 /** 597 * xe_ggtt_shift_nodes() - Shift GGTT nodes to adjust for a change in usable address range. 598 * @ggtt: the &xe_ggtt struct instance 599 * @new_start: new location of area provisioned for current VF 600 * 601 * Ensure that all struct &xe_ggtt_node are moved to the @new_start base address 602 * by changing the base offset of the GGTT. 603 * 604 * This function may be called multiple times during recovery, but if 605 * @new_start is unchanged from the current base, it's a noop. 606 * 607 * @new_start should be a value between xe_wopcm_size() and #GUC_GGTT_TOP. 608 */ 609 void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_start) 610 { 611 guard(mutex)(&ggtt->lock); 612 613 xe_tile_assert(ggtt->tile, new_start >= xe_wopcm_size(tile_to_xe(ggtt->tile))); 614 xe_tile_assert(ggtt->tile, new_start + ggtt->size <= GUC_GGTT_TOP); 615 616 /* pairs with READ_ONCE in xe_ggtt_node_addr() */ 617 WRITE_ONCE(ggtt->start, new_start); 618 } 619 620 static int xe_ggtt_insert_node_locked(struct xe_ggtt_node *node, 621 u32 size, u32 align, u32 mm_flags) 622 { 623 return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, 624 mm_flags); 625 } 626 627 static struct xe_ggtt_node *ggtt_node_init(struct xe_ggtt *ggtt) 628 { 629 struct xe_ggtt_node *node = kzalloc_obj(*node, GFP_NOFS); 630 631 if (!node) 632 return ERR_PTR(-ENOMEM); 633 634 INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); 635 node->ggtt = ggtt; 636 637 return node; 638 } 639 640 /** 641 * xe_ggtt_insert_node - Insert a &xe_ggtt_node into the GGTT 642 * @ggtt: the &xe_ggtt into which the node should be inserted. 643 * @size: size of the node 644 * @align: alignment constrain of the node 645 * 646 * Return: &xe_ggtt_node on success or a ERR_PTR on failure. 647 */ 648 struct xe_ggtt_node *xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 align) 649 { 650 struct xe_ggtt_node *node; 651 int ret; 652 653 node = ggtt_node_init(ggtt); 654 if (IS_ERR(node)) 655 return node; 656 657 guard(mutex)(&ggtt->lock); 658 ret = xe_ggtt_insert_node_locked(node, size, align, 659 DRM_MM_INSERT_HIGH); 660 if (ret) { 661 ggtt_node_fini(node); 662 return ERR_PTR(ret); 663 } 664 665 return node; 666 } 667 668 /** 669 * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. 670 * @node: the &xe_ggtt_node 671 * 672 * Return: GGTT node page table entries size in bytes. 673 */ 674 size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node) 675 { 676 if (!node) 677 return 0; 678 679 return node->base.size / XE_PAGE_SIZE * sizeof(u64); 680 } 681 682 /** 683 * xe_ggtt_map_bo - Map the BO into GGTT 684 * @ggtt: the &xe_ggtt where node will be mapped 685 * @node: the &xe_ggtt_node where this BO is mapped 686 * @bo: the &xe_bo to be mapped 687 * @pte: The pte flags to append. 688 */ 689 static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 690 struct xe_bo *bo, u64 pte) 691 { 692 u64 start, end; 693 struct xe_res_cursor cur; 694 695 if (XE_WARN_ON(!node)) 696 return; 697 698 start = xe_ggtt_node_addr(node); 699 end = start + xe_bo_size(bo); 700 701 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { 702 xe_assert(xe_bo_device(bo), bo->ttm.ttm); 703 704 for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); 705 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 706 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 707 pte | xe_res_dma(&cur)); 708 } else { 709 /* Prepend GPU offset */ 710 pte |= vram_region_gpu_offset(bo->ttm.resource); 711 712 for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); 713 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 714 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 715 pte + cur.start); 716 } 717 } 718 719 /** 720 * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT 721 * @ggtt: the &xe_ggtt where node will be mapped 722 * @bo: the &xe_bo to be mapped 723 * 724 * This is used to restore a GGTT mapping after suspend. 725 */ 726 void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) 727 { 728 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 729 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 730 u64 pte; 731 732 mutex_lock(&ggtt->lock); 733 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 734 xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pte); 735 mutex_unlock(&ggtt->lock); 736 } 737 738 /** 739 * xe_ggtt_insert_node_transform - Insert a newly allocated &xe_ggtt_node into the GGTT 740 * @ggtt: the &xe_ggtt where the node will inserted/reserved. 741 * @bo: The bo to be transformed 742 * @pte_flags: The extra GGTT flags to add to mapping. 743 * @size: size of the node 744 * @align: required alignment for node 745 * @transform: transformation function that will populate the GGTT node, or NULL for linear mapping. 746 * @arg: Extra argument to pass to the transformation function. 747 * 748 * This function allows inserting a GGTT node with a custom transformation function. 749 * This is useful for display to allow inserting rotated framebuffers to GGTT. 750 * 751 * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. 752 */ 753 struct xe_ggtt_node *xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, 754 struct xe_bo *bo, u64 pte_flags, 755 u64 size, u32 align, 756 xe_ggtt_transform_cb transform, void *arg) 757 { 758 struct xe_ggtt_node *node; 759 int ret; 760 761 node = ggtt_node_init(ggtt); 762 if (IS_ERR(node)) 763 return ERR_CAST(node); 764 765 if (mutex_lock_interruptible(&ggtt->lock) < 0) { 766 ret = -ERESTARTSYS; 767 goto err; 768 } 769 770 ret = xe_ggtt_insert_node_locked(node, size, align, 0); 771 if (ret) 772 goto err_unlock; 773 774 if (transform) 775 transform(ggtt, node, pte_flags, ggtt->pt_ops->ggtt_set_pte, arg); 776 else 777 xe_ggtt_map_bo(ggtt, node, bo, pte_flags); 778 779 mutex_unlock(&ggtt->lock); 780 return node; 781 782 err_unlock: 783 mutex_unlock(&ggtt->lock); 784 err: 785 ggtt_node_fini(node); 786 return ERR_PTR(ret); 787 } 788 789 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 790 u64 start, u64 end, struct drm_exec *exec) 791 { 792 u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; 793 u8 tile_id = ggtt->tile->id; 794 int err; 795 796 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 797 alignment = SZ_64K; 798 799 if (XE_WARN_ON(bo->ggtt_node[tile_id])) { 800 /* Someone's already inserted this BO in the GGTT */ 801 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 802 return 0; 803 } 804 805 err = xe_bo_validate(bo, NULL, false, exec); 806 if (err) 807 return err; 808 809 xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 810 811 bo->ggtt_node[tile_id] = ggtt_node_init(ggtt); 812 if (IS_ERR(bo->ggtt_node[tile_id])) { 813 err = PTR_ERR(bo->ggtt_node[tile_id]); 814 bo->ggtt_node[tile_id] = NULL; 815 goto out; 816 } 817 818 mutex_lock(&ggtt->lock); 819 /* 820 * When inheriting the initial framebuffer, the framebuffer is 821 * physically located at VRAM address 0, and usually at GGTT address 0 too. 822 * 823 * The display code will ask for a GGTT allocation between end of BO and 824 * remainder of GGTT, unaware that the start is reserved by WOPCM. 825 */ 826 if (start >= ggtt->start) 827 start -= ggtt->start; 828 else 829 start = 0; 830 831 /* Should never happen, but since we handle start, fail graciously for end */ 832 if (end >= ggtt->start) 833 end -= ggtt->start; 834 else 835 end = 0; 836 837 xe_tile_assert(ggtt->tile, end >= start + xe_bo_size(bo)); 838 839 err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, 840 xe_bo_size(bo), alignment, 0, start, end, 0); 841 if (err) { 842 ggtt_node_fini(bo->ggtt_node[tile_id]); 843 bo->ggtt_node[tile_id] = NULL; 844 } else { 845 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 846 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 847 u64 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 848 849 xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pte); 850 } 851 mutex_unlock(&ggtt->lock); 852 853 if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) 854 xe_ggtt_invalidate(ggtt); 855 856 out: 857 xe_pm_runtime_put(tile_to_xe(ggtt->tile)); 858 859 return err; 860 } 861 862 /** 863 * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space 864 * @ggtt: the &xe_ggtt where bo will be inserted 865 * @bo: the &xe_bo to be inserted 866 * @start: address where it will be inserted 867 * @end: end of the range where it will be inserted 868 * @exec: The drm_exec transaction to use for exhaustive eviction. 869 * 870 * Return: 0 on success or a negative error code on failure. 871 */ 872 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 873 u64 start, u64 end, struct drm_exec *exec) 874 { 875 return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); 876 } 877 878 /** 879 * xe_ggtt_insert_bo - Insert BO into GGTT 880 * @ggtt: the &xe_ggtt where bo will be inserted 881 * @bo: the &xe_bo to be inserted 882 * @exec: The drm_exec transaction to use for exhaustive eviction. 883 * 884 * Return: 0 on success or a negative error code on failure. 885 */ 886 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, 887 struct drm_exec *exec) 888 { 889 return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); 890 } 891 892 /** 893 * xe_ggtt_remove_bo - Remove a BO from the GGTT 894 * @ggtt: the &xe_ggtt where node will be removed 895 * @bo: the &xe_bo to be removed 896 */ 897 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 898 { 899 u8 tile_id = ggtt->tile->id; 900 901 if (XE_WARN_ON(!bo->ggtt_node[tile_id])) 902 return; 903 904 /* This BO is not currently in the GGTT */ 905 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 906 907 xe_ggtt_node_remove(bo->ggtt_node[tile_id], 908 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); 909 } 910 911 /** 912 * xe_ggtt_largest_hole - Largest GGTT hole 913 * @ggtt: the &xe_ggtt that will be inspected 914 * @alignment: minimum alignment 915 * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare 916 * 917 * Return: size of the largest continuous GGTT region 918 */ 919 u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) 920 { 921 const struct drm_mm *mm = &ggtt->mm; 922 const struct drm_mm_node *entry; 923 u64 hole_start, hole_end, hole_size; 924 u64 max_hole = 0; 925 926 mutex_lock(&ggtt->lock); 927 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 928 hole_start = max(hole_start, ggtt->start); 929 hole_start = ALIGN(hole_start, alignment); 930 hole_end = ALIGN_DOWN(hole_end, alignment); 931 if (hole_start >= hole_end) 932 continue; 933 hole_size = hole_end - hole_start; 934 if (spare) 935 *spare -= min3(*spare, hole_size, max_hole); 936 max_hole = max(max_hole, hole_size); 937 } 938 939 mutex_unlock(&ggtt->lock); 940 941 return max_hole; 942 } 943 944 #ifdef CONFIG_PCI_IOV 945 static u64 xe_encode_vfid_pte(u16 vfid) 946 { 947 return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; 948 } 949 950 static void xe_ggtt_assign_locked(const struct xe_ggtt_node *node, u16 vfid) 951 { 952 struct xe_ggtt *ggtt = node->ggtt; 953 u64 start = xe_ggtt_node_addr(node); 954 u64 size = xe_ggtt_node_size(node); 955 u64 end = start + size - 1; 956 u64 pte = xe_encode_vfid_pte(vfid); 957 958 lockdep_assert_held(&ggtt->lock); 959 960 while (start < end) { 961 ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); 962 start += XE_PAGE_SIZE; 963 } 964 965 xe_ggtt_invalidate(ggtt); 966 } 967 968 /** 969 * xe_ggtt_assign - assign a GGTT region to the VF 970 * @node: the &xe_ggtt_node to update 971 * @vfid: the VF identifier 972 * 973 * This function is used by the PF driver to assign a GGTT region to the VF. 974 * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some 975 * platforms VFs can't modify that either. 976 */ 977 void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) 978 { 979 guard(mutex)(&node->ggtt->lock); 980 xe_ggtt_assign_locked(node, vfid); 981 } 982 983 /** 984 * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer. 985 * @node: the &xe_ggtt_node to be saved 986 * @dst: destination buffer 987 * @size: destination buffer size in bytes 988 * @vfid: VF identifier 989 * 990 * Return: 0 on success or a negative error code on failure. 991 */ 992 int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid) 993 { 994 struct xe_ggtt *ggtt; 995 u64 start, end; 996 u64 *buf = dst; 997 u64 pte; 998 999 if (!node) 1000 return -ENOENT; 1001 1002 ggtt = node->ggtt; 1003 guard(mutex)(&ggtt->lock); 1004 1005 if (xe_ggtt_node_pt_size(node) != size) 1006 return -EINVAL; 1007 1008 start = xe_ggtt_node_addr(node); 1009 end = start + xe_ggtt_node_size(node) - 1; 1010 1011 while (start < end) { 1012 pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); 1013 if (vfid != u64_get_bits(pte, GGTT_PTE_VFID)) 1014 return -EPERM; 1015 1016 *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID); 1017 start += XE_PAGE_SIZE; 1018 } 1019 1020 return 0; 1021 } 1022 1023 /** 1024 * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer. 1025 * @node: the &xe_ggtt_node to be loaded 1026 * @src: source buffer 1027 * @size: source buffer size in bytes 1028 * @vfid: VF identifier 1029 * 1030 * Return: 0 on success or a negative error code on failure. 1031 */ 1032 int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid) 1033 { 1034 u64 vfid_pte = xe_encode_vfid_pte(vfid); 1035 const u64 *buf = src; 1036 struct xe_ggtt *ggtt; 1037 u64 start, end; 1038 1039 if (!node) 1040 return -ENOENT; 1041 1042 ggtt = node->ggtt; 1043 guard(mutex)(&ggtt->lock); 1044 1045 if (xe_ggtt_node_pt_size(node) != size) 1046 return -EINVAL; 1047 1048 start = xe_ggtt_node_addr(node); 1049 end = start + xe_ggtt_node_size(node) - 1; 1050 1051 while (start < end) { 1052 vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); 1053 ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte); 1054 start += XE_PAGE_SIZE; 1055 } 1056 xe_ggtt_invalidate(ggtt); 1057 1058 return 0; 1059 } 1060 1061 #endif 1062 1063 /** 1064 * xe_ggtt_dump - Dump GGTT for debug 1065 * @ggtt: the &xe_ggtt to be dumped 1066 * @p: the &drm_mm_printer helper handle to be used to dump the information 1067 * 1068 * Return: 0 on success or a negative error code on failure. 1069 */ 1070 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) 1071 { 1072 int err; 1073 1074 err = mutex_lock_interruptible(&ggtt->lock); 1075 if (err) 1076 return err; 1077 1078 drm_mm_print(&ggtt->mm, p); 1079 mutex_unlock(&ggtt->lock); 1080 return err; 1081 } 1082 1083 /** 1084 * xe_ggtt_print_holes - Print holes 1085 * @ggtt: the &xe_ggtt to be inspected 1086 * @alignment: min alignment 1087 * @p: the &drm_printer 1088 * 1089 * Print GGTT ranges that are available and return total size available. 1090 * 1091 * Return: Total available size. 1092 */ 1093 u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) 1094 { 1095 const struct drm_mm *mm = &ggtt->mm; 1096 const struct drm_mm_node *entry; 1097 u64 hole_start, hole_end, hole_size; 1098 u64 total = 0; 1099 char buf[10]; 1100 1101 mutex_lock(&ggtt->lock); 1102 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 1103 hole_start = max(hole_start, ggtt->start); 1104 hole_start = ALIGN(hole_start, alignment); 1105 hole_end = ALIGN_DOWN(hole_end, alignment); 1106 if (hole_start >= hole_end) 1107 continue; 1108 hole_size = hole_end - hole_start; 1109 total += hole_size; 1110 1111 string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1112 drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", 1113 hole_start, hole_end - 1, buf); 1114 } 1115 1116 mutex_unlock(&ggtt->lock); 1117 1118 return total; 1119 } 1120 1121 /** 1122 * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO 1123 * @ggtt: &xe_ggtt 1124 * @bo: &xe_bo 1125 * @pat_index: The pat_index for the PTE. 1126 * 1127 * This function returns the pte_flags for a given BO, without address. 1128 * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. 1129 */ 1130 u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, 1131 struct xe_bo *bo, u16 pat_index) 1132 { 1133 return ggtt->pt_ops->pte_encode_flags(bo, pat_index); 1134 } 1135 1136 /** 1137 * xe_ggtt_read_pte - Read a PTE from the GGTT 1138 * @ggtt: &xe_ggtt 1139 * @offset: the offset for which the mapping should be read. 1140 * 1141 * Used by testcases, and by display reading out an inherited bios FB. 1142 */ 1143 u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) 1144 { 1145 return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); 1146 } 1147 1148 /** 1149 * xe_ggtt_node_addr - Get @node offset in GGTT. 1150 * @node: &xe_ggtt_node 1151 * 1152 * Get the GGTT offset for allocated node. 1153 */ 1154 u64 xe_ggtt_node_addr(const struct xe_ggtt_node *node) 1155 { 1156 /* pairs with WRITE_ONCE in xe_ggtt_shift_nodes() */ 1157 return node->base.start + READ_ONCE(node->ggtt->start); 1158 } 1159 1160 /** 1161 * xe_ggtt_node_size - Get @node allocation size. 1162 * @node: &xe_ggtt_node 1163 * 1164 * Get the allocated node's size. 1165 */ 1166 u64 xe_ggtt_node_size(const struct xe_ggtt_node *node) 1167 { 1168 return node->base.size; 1169 } 1170