1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_ggtt.h" 7 8 #include <kunit/visibility.h> 9 #include <linux/fault-inject.h> 10 #include <linux/io-64-nonatomic-lo-hi.h> 11 #include <linux/sizes.h> 12 13 #include <drm/drm_drv.h> 14 #include <drm/drm_managed.h> 15 #include <drm/intel/i915_drm.h> 16 #include <generated/xe_wa_oob.h> 17 18 #include "regs/xe_gt_regs.h" 19 #include "regs/xe_gtt_defs.h" 20 #include "regs/xe_regs.h" 21 #include "xe_assert.h" 22 #include "xe_bo.h" 23 #include "xe_gt_printk.h" 24 #include "xe_gt_types.h" 25 #include "xe_map.h" 26 #include "xe_mmio.h" 27 #include "xe_pm.h" 28 #include "xe_res_cursor.h" 29 #include "xe_sriov.h" 30 #include "xe_tile_printk.h" 31 #include "xe_tile_sriov_vf.h" 32 #include "xe_tlb_inval.h" 33 #include "xe_wa.h" 34 #include "xe_wopcm.h" 35 36 /** 37 * DOC: Global Graphics Translation Table (GGTT) 38 * 39 * Xe GGTT implements the support for a Global Virtual Address space that is used 40 * for resources that are accessible to privileged (i.e. kernel-mode) processes, 41 * and not tied to a specific user-level process. For example, the Graphics 42 * micro-Controller (GuC) and Display Engine (if present) utilize this Global 43 * address space. 44 * 45 * The Global GTT (GGTT) translates from the Global virtual address to a physical 46 * address that can be accessed by HW. The GGTT is a flat, single-level table. 47 * 48 * Xe implements a simplified version of the GGTT specifically managing only a 49 * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) 50 * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to 51 * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space 52 * is limited on both ends of the GGTT, because the GuC shim HW redirects 53 * accesses to those addresses to other HW areas instead of going through the 54 * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, 55 * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things 56 * simple, instead of checking each object to see if they are accessed by GuC or 57 * not, we just exclude those areas from the allocator. Additionally, to simplify 58 * the driver load, we use the maximum WOPCM size in this logic instead of the 59 * programmed one, so we don't need to wait until the actual size to be 60 * programmed is determined (which requires FW fetch) before initializing the 61 * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs 62 * depending on the platform) but we can live with this. Another benefit of this 63 * is the GuC bootrom can't access anything below the WOPCM max size so anything 64 * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT 65 * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max 66 * give us the correct placement for free. 67 */ 68 69 #define XE_GGTT_FLAGS_64K BIT(0) 70 #define XE_GGTT_FLAGS_ONLINE BIT(1) 71 72 /** 73 * struct xe_ggtt_node - A node in GGTT. 74 * 75 * This struct is allocated with xe_ggtt_insert_node(,_transform) or xe_ggtt_insert_bo(,_at). 76 * It will be deallocated using xe_ggtt_node_remove(). 77 */ 78 struct xe_ggtt_node { 79 /** @ggtt: Back pointer to xe_ggtt where this region will be inserted at */ 80 struct xe_ggtt *ggtt; 81 /** @base: A drm_mm_node */ 82 struct drm_mm_node base; 83 /** @delayed_removal_work: The work struct for the delayed removal */ 84 struct work_struct delayed_removal_work; 85 /** @invalidate_on_remove: If it needs invalidation upon removal */ 86 bool invalidate_on_remove; 87 }; 88 89 /** 90 * struct xe_ggtt_pt_ops - GGTT Page table operations 91 * Which can vary from platform to platform. 92 */ 93 struct xe_ggtt_pt_ops { 94 /** @pte_encode_flags: Encode PTE flags for a given BO */ 95 u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index); 96 97 /** @ggtt_set_pte: Directly write into GGTT's PTE */ 98 xe_ggtt_set_pte_fn ggtt_set_pte; 99 100 /** @ggtt_get_pte: Directly read from GGTT's PTE */ 101 u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr); 102 }; 103 104 /** 105 * struct xe_ggtt - Main GGTT struct 106 * 107 * In general, each tile can contains its own Global Graphics Translation Table 108 * (GGTT) instance. 109 */ 110 struct xe_ggtt { 111 /** @tile: Back pointer to tile where this GGTT belongs */ 112 struct xe_tile *tile; 113 /** @start: Start offset of GGTT */ 114 u64 start; 115 /** @size: Total usable size of this GGTT */ 116 u64 size; 117 118 #define XE_GGTT_FLAGS_64K BIT(0) 119 /** 120 * @flags: Flags for this GGTT 121 * Acceptable flags: 122 * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. 123 * - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock 124 * after init 125 */ 126 unsigned int flags; 127 /** @scratch: Internal object allocation used as a scratch page */ 128 struct xe_bo *scratch; 129 /** @lock: Mutex lock to protect GGTT data */ 130 struct mutex lock; 131 /** 132 * @gsm: The iomem pointer to the actual location of the translation 133 * table located in the GSM for easy PTE manipulation 134 */ 135 u64 __iomem *gsm; 136 /** @pt_ops: Page Table operations per platform */ 137 const struct xe_ggtt_pt_ops *pt_ops; 138 /** @mm: The memory manager used to manage individual GGTT allocations */ 139 struct drm_mm mm; 140 /** @access_count: counts GGTT writes */ 141 unsigned int access_count; 142 /** @wq: Dedicated unordered work queue to process node removals */ 143 struct workqueue_struct *wq; 144 }; 145 146 static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 147 { 148 u64 pte = XE_PAGE_PRESENT; 149 150 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 151 pte |= XE_GGTT_PTE_DM; 152 153 return pte; 154 } 155 156 static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 157 { 158 struct xe_device *xe = xe_bo_device(bo); 159 u64 pte; 160 161 pte = xelp_ggtt_pte_flags(bo, pat_index); 162 163 xe_assert(xe, pat_index <= 3); 164 165 if (pat_index & BIT(0)) 166 pte |= XELPG_GGTT_PTE_PAT0; 167 168 if (pat_index & BIT(1)) 169 pte |= XELPG_GGTT_PTE_PAT1; 170 171 return pte; 172 } 173 174 static unsigned int probe_gsm_size(struct pci_dev *pdev) 175 { 176 u16 gmch_ctl, ggms; 177 178 pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); 179 ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; 180 return ggms ? SZ_1M << ggms : 0; 181 } 182 183 static void ggtt_update_access_counter(struct xe_ggtt *ggtt) 184 { 185 struct xe_tile *tile = ggtt->tile; 186 struct xe_gt *affected_gt; 187 u32 max_gtt_writes; 188 189 if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) { 190 affected_gt = tile->primary_gt; 191 max_gtt_writes = 1100; 192 193 /* Only expected to apply to primary GT on dgpu platforms */ 194 xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile))); 195 } else { 196 affected_gt = tile->media_gt; 197 max_gtt_writes = 63; 198 199 /* Only expected to apply to media GT on igpu platforms */ 200 xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile))); 201 } 202 203 /* 204 * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit 205 * to wait for completion of prior GTT writes before letting this through. 206 * This needs to be done for all GGTT writes originating from the CPU. 207 */ 208 lockdep_assert_held(&ggtt->lock); 209 210 if ((++ggtt->access_count % max_gtt_writes) == 0) { 211 xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0); 212 ggtt->access_count = 0; 213 } 214 } 215 216 /** 217 * xe_ggtt_start - Get starting offset of GGTT. 218 * @ggtt: &xe_ggtt 219 * 220 * Returns: Starting offset for this &xe_ggtt. 221 */ 222 u64 xe_ggtt_start(struct xe_ggtt *ggtt) 223 { 224 return ggtt->start; 225 } 226 227 /** 228 * xe_ggtt_size - Get size of GGTT. 229 * @ggtt: &xe_ggtt 230 * 231 * Returns: Total usable size of this &xe_ggtt. 232 */ 233 u64 xe_ggtt_size(struct xe_ggtt *ggtt) 234 { 235 return ggtt->size; 236 } 237 238 static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 239 { 240 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 241 xe_tile_assert(ggtt->tile, addr < ggtt->start + ggtt->size); 242 243 writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); 244 } 245 246 static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) 247 { 248 xe_ggtt_set_pte(ggtt, addr, pte); 249 ggtt_update_access_counter(ggtt); 250 } 251 252 static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) 253 { 254 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 255 xe_tile_assert(ggtt->tile, addr < ggtt->start + ggtt->size); 256 257 return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); 258 } 259 260 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 261 { 262 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 263 u64 end = start + size - 1; 264 u64 scratch_pte; 265 266 xe_tile_assert(ggtt->tile, start < end); 267 268 if (ggtt->scratch) 269 scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | 270 ggtt->pt_ops->pte_encode_flags(ggtt->scratch, 271 pat_index); 272 else 273 scratch_pte = 0; 274 275 while (start < end) { 276 ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); 277 start += XE_PAGE_SIZE; 278 } 279 } 280 281 static void primelockdep(struct xe_ggtt *ggtt) 282 { 283 if (!IS_ENABLED(CONFIG_LOCKDEP)) 284 return; 285 286 fs_reclaim_acquire(GFP_KERNEL); 287 might_lock(&ggtt->lock); 288 fs_reclaim_release(GFP_KERNEL); 289 } 290 291 /** 292 * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile 293 * @tile: &xe_tile 294 * 295 * Allocates a &xe_ggtt for a given tile. 296 * 297 * Return: &xe_ggtt on success, or NULL when out of memory. 298 */ 299 struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) 300 { 301 struct xe_device *xe = tile_to_xe(tile); 302 struct xe_ggtt *ggtt; 303 304 ggtt = drmm_kzalloc(&xe->drm, sizeof(*ggtt), GFP_KERNEL); 305 if (!ggtt) 306 return NULL; 307 308 if (drmm_mutex_init(&xe->drm, &ggtt->lock)) 309 return NULL; 310 311 primelockdep(ggtt); 312 ggtt->tile = tile; 313 314 return ggtt; 315 } 316 317 static void ggtt_fini_early(struct drm_device *drm, void *arg) 318 { 319 struct xe_ggtt *ggtt = arg; 320 321 destroy_workqueue(ggtt->wq); 322 drm_mm_takedown(&ggtt->mm); 323 } 324 325 static void ggtt_fini(void *arg) 326 { 327 struct xe_ggtt *ggtt = arg; 328 329 ggtt->scratch = NULL; 330 } 331 332 #ifdef CONFIG_LOCKDEP 333 void xe_ggtt_might_lock(struct xe_ggtt *ggtt) 334 { 335 might_lock(&ggtt->lock); 336 } 337 #endif 338 339 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 340 .pte_encode_flags = xelp_ggtt_pte_flags, 341 .ggtt_set_pte = xe_ggtt_set_pte, 342 .ggtt_get_pte = xe_ggtt_get_pte, 343 }; 344 345 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 346 .pte_encode_flags = xelpg_ggtt_pte_flags, 347 .ggtt_set_pte = xe_ggtt_set_pte, 348 .ggtt_get_pte = xe_ggtt_get_pte, 349 }; 350 351 static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 352 .pte_encode_flags = xelpg_ggtt_pte_flags, 353 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 354 .ggtt_get_pte = xe_ggtt_get_pte, 355 }; 356 357 static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u64 start, u64 size) 358 { 359 ggtt->start = start; 360 ggtt->size = size; 361 drm_mm_init(&ggtt->mm, 0, size); 362 } 363 364 int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 start, u32 size) 365 { 366 __xe_ggtt_init_early(ggtt, start, size); 367 return 0; 368 } 369 EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); 370 371 static void dev_fini_ggtt(void *arg) 372 { 373 struct xe_ggtt *ggtt = arg; 374 375 scoped_guard(mutex, &ggtt->lock) 376 ggtt->flags &= ~XE_GGTT_FLAGS_ONLINE; 377 drain_workqueue(ggtt->wq); 378 } 379 380 /** 381 * xe_ggtt_init_early - Early GGTT initialization 382 * @ggtt: the &xe_ggtt to be initialized 383 * 384 * It allows to create new mappings usable by the GuC. 385 * Mappings are not usable by the HW engines, as it doesn't have scratch nor 386 * initial clear done to it yet. That will happen in the regular, non-early 387 * GGTT initialization. 388 * 389 * Return: 0 on success or a negative error code on failure. 390 */ 391 int xe_ggtt_init_early(struct xe_ggtt *ggtt) 392 { 393 struct xe_device *xe = tile_to_xe(ggtt->tile); 394 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 395 unsigned int gsm_size; 396 u64 ggtt_start, wopcm = xe_wopcm_size(xe), ggtt_size; 397 int err; 398 399 if (!IS_SRIOV_VF(xe)) { 400 if (GRAPHICS_VERx100(xe) >= 1250) 401 gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ 402 else 403 gsm_size = probe_gsm_size(pdev); 404 if (gsm_size == 0) { 405 xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); 406 return -ENOMEM; 407 } 408 ggtt_start = wopcm; 409 ggtt_size = (gsm_size / 8) * (u64)XE_PAGE_SIZE - ggtt_start; 410 } else { 411 ggtt_start = xe_tile_sriov_vf_ggtt_base(ggtt->tile); 412 ggtt_size = xe_tile_sriov_vf_ggtt(ggtt->tile); 413 414 if (ggtt_start < wopcm || 415 ggtt_start + ggtt_size > GUC_GGTT_TOP) { 416 xe_tile_err(ggtt->tile, "Invalid GGTT configuration: %#llx-%#llx\n", 417 ggtt_start, ggtt_start + ggtt_size - 1); 418 return -ERANGE; 419 } 420 } 421 422 ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; 423 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 424 ggtt->flags |= XE_GGTT_FLAGS_64K; 425 426 if (ggtt_size + ggtt_start > GUC_GGTT_TOP) 427 ggtt_size = GUC_GGTT_TOP - ggtt_start; 428 429 if (GRAPHICS_VERx100(xe) >= 1270) 430 ggtt->pt_ops = 431 (ggtt->tile->media_gt && XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || 432 (ggtt->tile->primary_gt && XE_GT_WA(ggtt->tile->primary_gt, 22019338487)) ? 433 &xelpg_pt_wa_ops : &xelpg_pt_ops; 434 else 435 ggtt->pt_ops = &xelp_pt_ops; 436 437 ggtt->wq = alloc_workqueue("xe-ggtt-wq", WQ_MEM_RECLAIM | WQ_PERCPU, 0); 438 if (!ggtt->wq) 439 return -ENOMEM; 440 441 __xe_ggtt_init_early(ggtt, ggtt_start, ggtt_size); 442 443 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 444 if (err) 445 return err; 446 447 ggtt->flags |= XE_GGTT_FLAGS_ONLINE; 448 return devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); 449 } 450 ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ 451 452 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); 453 454 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) 455 { 456 struct drm_mm_node *hole; 457 u64 start, end; 458 459 /* Display may have allocated inside ggtt, so be careful with clearing here */ 460 mutex_lock(&ggtt->lock); 461 drm_mm_for_each_hole(hole, &ggtt->mm, start, end) 462 xe_ggtt_clear(ggtt, ggtt->start + start, end - start); 463 464 xe_ggtt_invalidate(ggtt); 465 mutex_unlock(&ggtt->lock); 466 } 467 468 static void ggtt_node_fini(struct xe_ggtt_node *node) 469 { 470 kfree(node); 471 } 472 473 static void ggtt_node_remove(struct xe_ggtt_node *node) 474 { 475 struct xe_ggtt *ggtt = node->ggtt; 476 bool bound; 477 478 mutex_lock(&ggtt->lock); 479 bound = ggtt->flags & XE_GGTT_FLAGS_ONLINE; 480 if (bound) 481 xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node)); 482 drm_mm_remove_node(&node->base); 483 node->base.size = 0; 484 if (bound && node->invalidate_on_remove) 485 xe_ggtt_invalidate(ggtt); 486 mutex_unlock(&ggtt->lock); 487 488 ggtt_node_fini(node); 489 } 490 491 static void ggtt_node_remove_work_func(struct work_struct *work) 492 { 493 struct xe_ggtt_node *node = container_of(work, typeof(*node), 494 delayed_removal_work); 495 struct xe_device *xe = tile_to_xe(node->ggtt->tile); 496 497 guard(xe_pm_runtime)(xe); 498 ggtt_node_remove(node); 499 } 500 501 /** 502 * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 503 * @node: the &xe_ggtt_node to be removed 504 * @invalidate: if node needs invalidation upon removal 505 */ 506 void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 507 { 508 struct xe_ggtt *ggtt; 509 struct xe_device *xe; 510 511 if (!node || !node->ggtt) 512 return; 513 514 ggtt = node->ggtt; 515 xe = tile_to_xe(ggtt->tile); 516 517 node->invalidate_on_remove = invalidate; 518 519 if (xe_pm_runtime_get_if_active(xe)) { 520 ggtt_node_remove(node); 521 xe_pm_runtime_put(xe); 522 } else { 523 queue_work(ggtt->wq, &node->delayed_removal_work); 524 } 525 } 526 527 /** 528 * xe_ggtt_init - Regular non-early GGTT initialization 529 * @ggtt: the &xe_ggtt to be initialized 530 * 531 * Return: 0 on success or a negative error code on failure. 532 */ 533 int xe_ggtt_init(struct xe_ggtt *ggtt) 534 { 535 struct xe_device *xe = tile_to_xe(ggtt->tile); 536 unsigned int flags; 537 int err; 538 539 /* 540 * So we don't need to worry about 64K GGTT layout when dealing with 541 * scratch entries, rather keep the scratch page in system memory on 542 * platforms where 64K pages are needed for VRAM. 543 */ 544 flags = 0; 545 if (ggtt->flags & XE_GGTT_FLAGS_64K) 546 flags |= XE_BO_FLAG_SYSTEM; 547 else 548 flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile); 549 550 ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); 551 if (IS_ERR(ggtt->scratch)) { 552 err = PTR_ERR(ggtt->scratch); 553 goto err; 554 } 555 556 xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); 557 558 xe_ggtt_initial_clear(ggtt); 559 560 return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); 561 err: 562 ggtt->scratch = NULL; 563 return err; 564 } 565 566 static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) 567 { 568 int err; 569 570 if (!gt) 571 return; 572 573 err = xe_tlb_inval_ggtt(>->tlb_inval); 574 xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); 575 } 576 577 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) 578 { 579 struct xe_device *xe = tile_to_xe(ggtt->tile); 580 581 /* 582 * XXX: Barrier for GGTT pages. Unsure exactly why this required but 583 * without this LNL is having issues with the GuC reading scratch page 584 * vs. correct GGTT page. Not particularly a hot code path so blindly 585 * do a mmio read here which results in GuC reading correct GGTT page. 586 */ 587 xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); 588 589 /* Each GT in a tile has its own TLB to cache GGTT lookups */ 590 ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); 591 ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); 592 } 593 594 /** 595 * xe_ggtt_shift_nodes() - Shift GGTT nodes to adjust for a change in usable address range. 596 * @ggtt: the &xe_ggtt struct instance 597 * @new_start: new location of area provisioned for current VF 598 * 599 * Ensure that all struct &xe_ggtt_node are moved to the @new_start base address 600 * by changing the base offset of the GGTT. 601 * 602 * This function may be called multiple times during recovery, but if 603 * @new_start is unchanged from the current base, it's a noop. 604 * 605 * @new_start should be a value between xe_wopcm_size() and #GUC_GGTT_TOP. 606 */ 607 void xe_ggtt_shift_nodes(struct xe_ggtt *ggtt, u64 new_start) 608 { 609 guard(mutex)(&ggtt->lock); 610 611 xe_tile_assert(ggtt->tile, new_start >= xe_wopcm_size(tile_to_xe(ggtt->tile))); 612 xe_tile_assert(ggtt->tile, new_start + ggtt->size <= GUC_GGTT_TOP); 613 614 /* pairs with READ_ONCE in xe_ggtt_node_addr() */ 615 WRITE_ONCE(ggtt->start, new_start); 616 } 617 618 static int xe_ggtt_insert_node_locked(struct xe_ggtt_node *node, 619 u32 size, u32 align, u32 mm_flags) 620 { 621 return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, 622 mm_flags); 623 } 624 625 static struct xe_ggtt_node *ggtt_node_init(struct xe_ggtt *ggtt) 626 { 627 struct xe_ggtt_node *node = kzalloc_obj(*node, GFP_NOFS); 628 629 if (!node) 630 return ERR_PTR(-ENOMEM); 631 632 INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); 633 node->ggtt = ggtt; 634 635 return node; 636 } 637 638 /** 639 * xe_ggtt_insert_node - Insert a &xe_ggtt_node into the GGTT 640 * @ggtt: the &xe_ggtt into which the node should be inserted. 641 * @size: size of the node 642 * @align: alignment constrain of the node 643 * 644 * Return: &xe_ggtt_node on success or a ERR_PTR on failure. 645 */ 646 struct xe_ggtt_node *xe_ggtt_insert_node(struct xe_ggtt *ggtt, u32 size, u32 align) 647 { 648 struct xe_ggtt_node *node; 649 int ret; 650 651 node = ggtt_node_init(ggtt); 652 if (IS_ERR(node)) 653 return node; 654 655 guard(mutex)(&ggtt->lock); 656 ret = xe_ggtt_insert_node_locked(node, size, align, 657 DRM_MM_INSERT_HIGH); 658 if (ret) { 659 ggtt_node_fini(node); 660 return ERR_PTR(ret); 661 } 662 663 return node; 664 } 665 666 /** 667 * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. 668 * @node: the &xe_ggtt_node 669 * 670 * Return: GGTT node page table entries size in bytes. 671 */ 672 size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node) 673 { 674 if (!node) 675 return 0; 676 677 return node->base.size / XE_PAGE_SIZE * sizeof(u64); 678 } 679 680 /** 681 * xe_ggtt_map_bo - Map the BO into GGTT 682 * @ggtt: the &xe_ggtt where node will be mapped 683 * @node: the &xe_ggtt_node where this BO is mapped 684 * @bo: the &xe_bo to be mapped 685 * @pte: The pte flags to append. 686 */ 687 static void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 688 struct xe_bo *bo, u64 pte) 689 { 690 u64 start, end; 691 struct xe_res_cursor cur; 692 693 if (XE_WARN_ON(!node)) 694 return; 695 696 start = xe_ggtt_node_addr(node); 697 end = start + xe_bo_size(bo); 698 699 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { 700 xe_assert(xe_bo_device(bo), bo->ttm.ttm); 701 702 for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); 703 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 704 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 705 pte | xe_res_dma(&cur)); 706 } else { 707 /* Prepend GPU offset */ 708 pte |= vram_region_gpu_offset(bo->ttm.resource); 709 710 for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); 711 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 712 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 713 pte + cur.start); 714 } 715 } 716 717 /** 718 * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT 719 * @ggtt: the &xe_ggtt where node will be mapped 720 * @bo: the &xe_bo to be mapped 721 * 722 * This is used to restore a GGTT mapping after suspend. 723 */ 724 void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) 725 { 726 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 727 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 728 u64 pte; 729 730 mutex_lock(&ggtt->lock); 731 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 732 xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pte); 733 mutex_unlock(&ggtt->lock); 734 } 735 736 /** 737 * xe_ggtt_insert_node_transform - Insert a newly allocated &xe_ggtt_node into the GGTT 738 * @ggtt: the &xe_ggtt where the node will inserted/reserved. 739 * @bo: The bo to be transformed 740 * @pte_flags: The extra GGTT flags to add to mapping. 741 * @size: size of the node 742 * @align: required alignment for node 743 * @transform: transformation function that will populate the GGTT node, or NULL for linear mapping. 744 * @arg: Extra argument to pass to the transformation function. 745 * 746 * This function allows inserting a GGTT node with a custom transformation function. 747 * This is useful for display to allow inserting rotated framebuffers to GGTT. 748 * 749 * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. 750 */ 751 struct xe_ggtt_node *xe_ggtt_insert_node_transform(struct xe_ggtt *ggtt, 752 struct xe_bo *bo, u64 pte_flags, 753 u64 size, u32 align, 754 xe_ggtt_transform_cb transform, void *arg) 755 { 756 struct xe_ggtt_node *node; 757 int ret; 758 759 node = ggtt_node_init(ggtt); 760 if (IS_ERR(node)) 761 return ERR_CAST(node); 762 763 if (mutex_lock_interruptible(&ggtt->lock) < 0) { 764 ret = -ERESTARTSYS; 765 goto err; 766 } 767 768 ret = xe_ggtt_insert_node_locked(node, size, align, 0); 769 if (ret) 770 goto err_unlock; 771 772 if (transform) 773 transform(ggtt, node, pte_flags, ggtt->pt_ops->ggtt_set_pte, arg); 774 else 775 xe_ggtt_map_bo(ggtt, node, bo, pte_flags); 776 777 mutex_unlock(&ggtt->lock); 778 return node; 779 780 err_unlock: 781 mutex_unlock(&ggtt->lock); 782 err: 783 ggtt_node_fini(node); 784 return ERR_PTR(ret); 785 } 786 787 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 788 u64 start, u64 end, struct drm_exec *exec) 789 { 790 u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; 791 u8 tile_id = ggtt->tile->id; 792 int err; 793 794 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 795 alignment = SZ_64K; 796 797 if (XE_WARN_ON(bo->ggtt_node[tile_id])) { 798 /* Someone's already inserted this BO in the GGTT */ 799 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 800 return 0; 801 } 802 803 err = xe_bo_validate(bo, NULL, false, exec); 804 if (err) 805 return err; 806 807 xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 808 809 bo->ggtt_node[tile_id] = ggtt_node_init(ggtt); 810 if (IS_ERR(bo->ggtt_node[tile_id])) { 811 err = PTR_ERR(bo->ggtt_node[tile_id]); 812 bo->ggtt_node[tile_id] = NULL; 813 goto out; 814 } 815 816 mutex_lock(&ggtt->lock); 817 /* 818 * When inheriting the initial framebuffer, the framebuffer is 819 * physically located at VRAM address 0, and usually at GGTT address 0 too. 820 * 821 * The display code will ask for a GGTT allocation between end of BO and 822 * remainder of GGTT, unaware that the start is reserved by WOPCM. 823 */ 824 if (start >= ggtt->start) 825 start -= ggtt->start; 826 else 827 start = 0; 828 829 /* Should never happen, but since we handle start, fail graciously for end */ 830 if (end >= ggtt->start) 831 end -= ggtt->start; 832 else 833 end = 0; 834 835 xe_tile_assert(ggtt->tile, end >= start + xe_bo_size(bo)); 836 837 err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, 838 xe_bo_size(bo), alignment, 0, start, end, 0); 839 if (err) { 840 ggtt_node_fini(bo->ggtt_node[tile_id]); 841 bo->ggtt_node[tile_id] = NULL; 842 } else { 843 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 844 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 845 u64 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 846 847 xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pte); 848 } 849 mutex_unlock(&ggtt->lock); 850 851 if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) 852 xe_ggtt_invalidate(ggtt); 853 854 out: 855 xe_pm_runtime_put(tile_to_xe(ggtt->tile)); 856 857 return err; 858 } 859 860 /** 861 * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space 862 * @ggtt: the &xe_ggtt where bo will be inserted 863 * @bo: the &xe_bo to be inserted 864 * @start: address where it will be inserted 865 * @end: end of the range where it will be inserted 866 * @exec: The drm_exec transaction to use for exhaustive eviction. 867 * 868 * Return: 0 on success or a negative error code on failure. 869 */ 870 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 871 u64 start, u64 end, struct drm_exec *exec) 872 { 873 return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); 874 } 875 876 /** 877 * xe_ggtt_insert_bo - Insert BO into GGTT 878 * @ggtt: the &xe_ggtt where bo will be inserted 879 * @bo: the &xe_bo to be inserted 880 * @exec: The drm_exec transaction to use for exhaustive eviction. 881 * 882 * Return: 0 on success or a negative error code on failure. 883 */ 884 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, 885 struct drm_exec *exec) 886 { 887 return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); 888 } 889 890 /** 891 * xe_ggtt_remove_bo - Remove a BO from the GGTT 892 * @ggtt: the &xe_ggtt where node will be removed 893 * @bo: the &xe_bo to be removed 894 */ 895 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 896 { 897 u8 tile_id = ggtt->tile->id; 898 899 if (XE_WARN_ON(!bo->ggtt_node[tile_id])) 900 return; 901 902 /* This BO is not currently in the GGTT */ 903 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 904 905 xe_ggtt_node_remove(bo->ggtt_node[tile_id], 906 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); 907 } 908 909 /** 910 * xe_ggtt_largest_hole - Largest GGTT hole 911 * @ggtt: the &xe_ggtt that will be inspected 912 * @alignment: minimum alignment 913 * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare 914 * 915 * Return: size of the largest continuous GGTT region 916 */ 917 u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) 918 { 919 const struct drm_mm *mm = &ggtt->mm; 920 const struct drm_mm_node *entry; 921 u64 hole_start, hole_end, hole_size; 922 u64 max_hole = 0; 923 924 mutex_lock(&ggtt->lock); 925 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 926 hole_start = max(hole_start, ggtt->start); 927 hole_start = ALIGN(hole_start, alignment); 928 hole_end = ALIGN_DOWN(hole_end, alignment); 929 if (hole_start >= hole_end) 930 continue; 931 hole_size = hole_end - hole_start; 932 if (spare) 933 *spare -= min3(*spare, hole_size, max_hole); 934 max_hole = max(max_hole, hole_size); 935 } 936 937 mutex_unlock(&ggtt->lock); 938 939 return max_hole; 940 } 941 942 #ifdef CONFIG_PCI_IOV 943 static u64 xe_encode_vfid_pte(u16 vfid) 944 { 945 return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; 946 } 947 948 static void xe_ggtt_assign_locked(const struct xe_ggtt_node *node, u16 vfid) 949 { 950 struct xe_ggtt *ggtt = node->ggtt; 951 u64 start = xe_ggtt_node_addr(node); 952 u64 size = xe_ggtt_node_size(node); 953 u64 end = start + size - 1; 954 u64 pte = xe_encode_vfid_pte(vfid); 955 956 lockdep_assert_held(&ggtt->lock); 957 958 while (start < end) { 959 ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); 960 start += XE_PAGE_SIZE; 961 } 962 963 xe_ggtt_invalidate(ggtt); 964 } 965 966 /** 967 * xe_ggtt_assign - assign a GGTT region to the VF 968 * @node: the &xe_ggtt_node to update 969 * @vfid: the VF identifier 970 * 971 * This function is used by the PF driver to assign a GGTT region to the VF. 972 * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some 973 * platforms VFs can't modify that either. 974 */ 975 void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) 976 { 977 guard(mutex)(&node->ggtt->lock); 978 xe_ggtt_assign_locked(node, vfid); 979 } 980 981 /** 982 * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer. 983 * @node: the &xe_ggtt_node to be saved 984 * @dst: destination buffer 985 * @size: destination buffer size in bytes 986 * @vfid: VF identifier 987 * 988 * Return: 0 on success or a negative error code on failure. 989 */ 990 int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid) 991 { 992 struct xe_ggtt *ggtt; 993 u64 start, end; 994 u64 *buf = dst; 995 u64 pte; 996 997 if (!node) 998 return -ENOENT; 999 1000 ggtt = node->ggtt; 1001 guard(mutex)(&ggtt->lock); 1002 1003 if (xe_ggtt_node_pt_size(node) != size) 1004 return -EINVAL; 1005 1006 start = xe_ggtt_node_addr(node); 1007 end = start + xe_ggtt_node_size(node) - 1; 1008 1009 while (start < end) { 1010 pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); 1011 if (vfid != u64_get_bits(pte, GGTT_PTE_VFID)) 1012 return -EPERM; 1013 1014 *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID); 1015 start += XE_PAGE_SIZE; 1016 } 1017 1018 return 0; 1019 } 1020 1021 /** 1022 * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer. 1023 * @node: the &xe_ggtt_node to be loaded 1024 * @src: source buffer 1025 * @size: source buffer size in bytes 1026 * @vfid: VF identifier 1027 * 1028 * Return: 0 on success or a negative error code on failure. 1029 */ 1030 int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid) 1031 { 1032 u64 vfid_pte = xe_encode_vfid_pte(vfid); 1033 const u64 *buf = src; 1034 struct xe_ggtt *ggtt; 1035 u64 start, end; 1036 1037 if (!node) 1038 return -ENOENT; 1039 1040 ggtt = node->ggtt; 1041 guard(mutex)(&ggtt->lock); 1042 1043 if (xe_ggtt_node_pt_size(node) != size) 1044 return -EINVAL; 1045 1046 start = xe_ggtt_node_addr(node); 1047 end = start + xe_ggtt_node_size(node) - 1; 1048 1049 while (start < end) { 1050 vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); 1051 ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte); 1052 start += XE_PAGE_SIZE; 1053 } 1054 xe_ggtt_invalidate(ggtt); 1055 1056 return 0; 1057 } 1058 1059 #endif 1060 1061 /** 1062 * xe_ggtt_dump - Dump GGTT for debug 1063 * @ggtt: the &xe_ggtt to be dumped 1064 * @p: the &drm_mm_printer helper handle to be used to dump the information 1065 * 1066 * Return: 0 on success or a negative error code on failure. 1067 */ 1068 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) 1069 { 1070 int err; 1071 1072 err = mutex_lock_interruptible(&ggtt->lock); 1073 if (err) 1074 return err; 1075 1076 drm_mm_print(&ggtt->mm, p); 1077 mutex_unlock(&ggtt->lock); 1078 return err; 1079 } 1080 1081 /** 1082 * xe_ggtt_print_holes - Print holes 1083 * @ggtt: the &xe_ggtt to be inspected 1084 * @alignment: min alignment 1085 * @p: the &drm_printer 1086 * 1087 * Print GGTT ranges that are available and return total size available. 1088 * 1089 * Return: Total available size. 1090 */ 1091 u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) 1092 { 1093 const struct drm_mm *mm = &ggtt->mm; 1094 const struct drm_mm_node *entry; 1095 u64 hole_start, hole_end, hole_size; 1096 u64 total = 0; 1097 char buf[10]; 1098 1099 mutex_lock(&ggtt->lock); 1100 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 1101 hole_start = max(hole_start, ggtt->start); 1102 hole_start = ALIGN(hole_start, alignment); 1103 hole_end = ALIGN_DOWN(hole_end, alignment); 1104 if (hole_start >= hole_end) 1105 continue; 1106 hole_size = hole_end - hole_start; 1107 total += hole_size; 1108 1109 string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1110 drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", 1111 hole_start, hole_end - 1, buf); 1112 } 1113 1114 mutex_unlock(&ggtt->lock); 1115 1116 return total; 1117 } 1118 1119 /** 1120 * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO 1121 * @ggtt: &xe_ggtt 1122 * @bo: &xe_bo 1123 * @pat_index: The pat_index for the PTE. 1124 * 1125 * This function returns the pte_flags for a given BO, without address. 1126 * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. 1127 */ 1128 u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, 1129 struct xe_bo *bo, u16 pat_index) 1130 { 1131 return ggtt->pt_ops->pte_encode_flags(bo, pat_index); 1132 } 1133 1134 /** 1135 * xe_ggtt_read_pte - Read a PTE from the GGTT 1136 * @ggtt: &xe_ggtt 1137 * @offset: the offset for which the mapping should be read. 1138 * 1139 * Used by testcases, and by display reading out an inherited bios FB. 1140 */ 1141 u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) 1142 { 1143 return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); 1144 } 1145 1146 /** 1147 * xe_ggtt_node_addr - Get @node offset in GGTT. 1148 * @node: &xe_ggtt_node 1149 * 1150 * Get the GGTT offset for allocated node. 1151 */ 1152 u64 xe_ggtt_node_addr(const struct xe_ggtt_node *node) 1153 { 1154 /* pairs with WRITE_ONCE in xe_ggtt_shift_nodes() */ 1155 return node->base.start + READ_ONCE(node->ggtt->start); 1156 } 1157 1158 /** 1159 * xe_ggtt_node_size - Get @node allocation size. 1160 * @node: &xe_ggtt_node 1161 * 1162 * Get the allocated node's size. 1163 */ 1164 u64 xe_ggtt_node_size(const struct xe_ggtt_node *node) 1165 { 1166 return node->base.size; 1167 } 1168