1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_ggtt.h" 7 8 #include <kunit/visibility.h> 9 #include <linux/fault-inject.h> 10 #include <linux/io-64-nonatomic-lo-hi.h> 11 #include <linux/sizes.h> 12 13 #include <drm/drm_drv.h> 14 #include <drm/drm_managed.h> 15 #include <drm/intel/i915_drm.h> 16 #include <generated/xe_wa_oob.h> 17 18 #include "regs/xe_gt_regs.h" 19 #include "regs/xe_gtt_defs.h" 20 #include "regs/xe_regs.h" 21 #include "xe_assert.h" 22 #include "xe_bo.h" 23 #include "xe_device.h" 24 #include "xe_gt.h" 25 #include "xe_gt_printk.h" 26 #include "xe_map.h" 27 #include "xe_mmio.h" 28 #include "xe_pm.h" 29 #include "xe_res_cursor.h" 30 #include "xe_sriov.h" 31 #include "xe_tile_printk.h" 32 #include "xe_tile_sriov_vf.h" 33 #include "xe_tlb_inval.h" 34 #include "xe_wa.h" 35 #include "xe_wopcm.h" 36 37 /** 38 * DOC: Global Graphics Translation Table (GGTT) 39 * 40 * Xe GGTT implements the support for a Global Virtual Address space that is used 41 * for resources that are accessible to privileged (i.e. kernel-mode) processes, 42 * and not tied to a specific user-level process. For example, the Graphics 43 * micro-Controller (GuC) and Display Engine (if present) utilize this Global 44 * address space. 45 * 46 * The Global GTT (GGTT) translates from the Global virtual address to a physical 47 * address that can be accessed by HW. The GGTT is a flat, single-level table. 48 * 49 * Xe implements a simplified version of the GGTT specifically managing only a 50 * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) 51 * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to 52 * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space 53 * is limited on both ends of the GGTT, because the GuC shim HW redirects 54 * accesses to those addresses to other HW areas instead of going through the 55 * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, 56 * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things 57 * simple, instead of checking each object to see if they are accessed by GuC or 58 * not, we just exclude those areas from the allocator. Additionally, to simplify 59 * the driver load, we use the maximum WOPCM size in this logic instead of the 60 * programmed one, so we don't need to wait until the actual size to be 61 * programmed is determined (which requires FW fetch) before initializing the 62 * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs 63 * depending on the platform) but we can live with this. Another benefit of this 64 * is the GuC bootrom can't access anything below the WOPCM max size so anything 65 * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT 66 * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max 67 * give us the correct placement for free. 68 */ 69 70 static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 71 { 72 u64 pte = XE_PAGE_PRESENT; 73 74 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 75 pte |= XE_GGTT_PTE_DM; 76 77 return pte; 78 } 79 80 static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 81 { 82 struct xe_device *xe = xe_bo_device(bo); 83 u64 pte; 84 85 pte = xelp_ggtt_pte_flags(bo, pat_index); 86 87 xe_assert(xe, pat_index <= 3); 88 89 if (pat_index & BIT(0)) 90 pte |= XELPG_GGTT_PTE_PAT0; 91 92 if (pat_index & BIT(1)) 93 pte |= XELPG_GGTT_PTE_PAT1; 94 95 return pte; 96 } 97 98 static unsigned int probe_gsm_size(struct pci_dev *pdev) 99 { 100 u16 gmch_ctl, ggms; 101 102 pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); 103 ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; 104 return ggms ? SZ_1M << ggms : 0; 105 } 106 107 static void ggtt_update_access_counter(struct xe_ggtt *ggtt) 108 { 109 struct xe_tile *tile = ggtt->tile; 110 struct xe_gt *affected_gt; 111 u32 max_gtt_writes; 112 113 if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) { 114 affected_gt = tile->primary_gt; 115 max_gtt_writes = 1100; 116 117 /* Only expected to apply to primary GT on dgpu platforms */ 118 xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile))); 119 } else { 120 affected_gt = tile->media_gt; 121 max_gtt_writes = 63; 122 123 /* Only expected to apply to media GT on igpu platforms */ 124 xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile))); 125 } 126 127 /* 128 * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit 129 * to wait for completion of prior GTT writes before letting this through. 130 * This needs to be done for all GGTT writes originating from the CPU. 131 */ 132 lockdep_assert_held(&ggtt->lock); 133 134 if ((++ggtt->access_count % max_gtt_writes) == 0) { 135 xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0); 136 ggtt->access_count = 0; 137 } 138 } 139 140 static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 141 { 142 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 143 xe_tile_assert(ggtt->tile, addr < ggtt->size); 144 145 writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); 146 } 147 148 static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) 149 { 150 xe_ggtt_set_pte(ggtt, addr, pte); 151 ggtt_update_access_counter(ggtt); 152 } 153 154 static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr) 155 { 156 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 157 xe_tile_assert(ggtt->tile, addr < ggtt->size); 158 159 return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]); 160 } 161 162 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 163 { 164 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 165 u64 end = start + size - 1; 166 u64 scratch_pte; 167 168 xe_tile_assert(ggtt->tile, start < end); 169 170 if (ggtt->scratch) 171 scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | 172 ggtt->pt_ops->pte_encode_flags(ggtt->scratch, 173 pat_index); 174 else 175 scratch_pte = 0; 176 177 while (start < end) { 178 ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); 179 start += XE_PAGE_SIZE; 180 } 181 } 182 183 static void primelockdep(struct xe_ggtt *ggtt) 184 { 185 if (!IS_ENABLED(CONFIG_LOCKDEP)) 186 return; 187 188 fs_reclaim_acquire(GFP_KERNEL); 189 might_lock(&ggtt->lock); 190 fs_reclaim_release(GFP_KERNEL); 191 } 192 193 /** 194 * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile 195 * @tile: &xe_tile 196 * 197 * Allocates a &xe_ggtt for a given tile. 198 * 199 * Return: &xe_ggtt on success, or NULL when out of memory. 200 */ 201 struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) 202 { 203 struct xe_device *xe = tile_to_xe(tile); 204 struct xe_ggtt *ggtt; 205 206 ggtt = drmm_kzalloc(&xe->drm, sizeof(*ggtt), GFP_KERNEL); 207 if (!ggtt) 208 return NULL; 209 210 if (drmm_mutex_init(&xe->drm, &ggtt->lock)) 211 return NULL; 212 213 primelockdep(ggtt); 214 ggtt->tile = tile; 215 216 return ggtt; 217 } 218 219 static void ggtt_fini_early(struct drm_device *drm, void *arg) 220 { 221 struct xe_ggtt *ggtt = arg; 222 223 destroy_workqueue(ggtt->wq); 224 drm_mm_takedown(&ggtt->mm); 225 } 226 227 static void ggtt_fini(void *arg) 228 { 229 struct xe_ggtt *ggtt = arg; 230 231 ggtt->scratch = NULL; 232 } 233 234 #ifdef CONFIG_LOCKDEP 235 void xe_ggtt_might_lock(struct xe_ggtt *ggtt) 236 { 237 might_lock(&ggtt->lock); 238 } 239 #endif 240 241 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 242 .pte_encode_flags = xelp_ggtt_pte_flags, 243 .ggtt_set_pte = xe_ggtt_set_pte, 244 .ggtt_get_pte = xe_ggtt_get_pte, 245 }; 246 247 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 248 .pte_encode_flags = xelpg_ggtt_pte_flags, 249 .ggtt_set_pte = xe_ggtt_set_pte, 250 .ggtt_get_pte = xe_ggtt_get_pte, 251 }; 252 253 static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 254 .pte_encode_flags = xelpg_ggtt_pte_flags, 255 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 256 .ggtt_get_pte = xe_ggtt_get_pte, 257 }; 258 259 static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) 260 { 261 drm_mm_init(&ggtt->mm, reserved, 262 ggtt->size - reserved); 263 } 264 265 int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) 266 { 267 ggtt->size = size; 268 __xe_ggtt_init_early(ggtt, reserved); 269 return 0; 270 } 271 EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); 272 273 static void dev_fini_ggtt(void *arg) 274 { 275 struct xe_ggtt *ggtt = arg; 276 277 drain_workqueue(ggtt->wq); 278 } 279 280 /** 281 * xe_ggtt_init_early - Early GGTT initialization 282 * @ggtt: the &xe_ggtt to be initialized 283 * 284 * It allows to create new mappings usable by the GuC. 285 * Mappings are not usable by the HW engines, as it doesn't have scratch nor 286 * initial clear done to it yet. That will happen in the regular, non-early 287 * GGTT initialization. 288 * 289 * Return: 0 on success or a negative error code on failure. 290 */ 291 int xe_ggtt_init_early(struct xe_ggtt *ggtt) 292 { 293 struct xe_device *xe = tile_to_xe(ggtt->tile); 294 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 295 unsigned int gsm_size; 296 int err; 297 298 if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250) 299 gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ 300 else 301 gsm_size = probe_gsm_size(pdev); 302 303 if (gsm_size == 0) { 304 xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); 305 return -ENOMEM; 306 } 307 308 ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; 309 ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; 310 311 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 312 ggtt->flags |= XE_GGTT_FLAGS_64K; 313 314 if (ggtt->size > GUC_GGTT_TOP) 315 ggtt->size = GUC_GGTT_TOP; 316 317 if (GRAPHICS_VERx100(xe) >= 1270) 318 ggtt->pt_ops = 319 (ggtt->tile->media_gt && XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || 320 (ggtt->tile->primary_gt && XE_GT_WA(ggtt->tile->primary_gt, 22019338487)) ? 321 &xelpg_pt_wa_ops : &xelpg_pt_ops; 322 else 323 ggtt->pt_ops = &xelp_pt_ops; 324 325 ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); 326 if (!ggtt->wq) 327 return -ENOMEM; 328 329 __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); 330 331 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 332 if (err) 333 return err; 334 335 err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); 336 if (err) 337 return err; 338 339 if (IS_SRIOV_VF(xe)) { 340 err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); 341 if (err) 342 return err; 343 } 344 345 return 0; 346 } 347 ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ 348 349 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); 350 351 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) 352 { 353 struct drm_mm_node *hole; 354 u64 start, end; 355 356 /* Display may have allocated inside ggtt, so be careful with clearing here */ 357 mutex_lock(&ggtt->lock); 358 drm_mm_for_each_hole(hole, &ggtt->mm, start, end) 359 xe_ggtt_clear(ggtt, start, end - start); 360 361 xe_ggtt_invalidate(ggtt); 362 mutex_unlock(&ggtt->lock); 363 } 364 365 static void ggtt_node_remove(struct xe_ggtt_node *node) 366 { 367 struct xe_ggtt *ggtt = node->ggtt; 368 struct xe_device *xe = tile_to_xe(ggtt->tile); 369 bool bound; 370 int idx; 371 372 bound = drm_dev_enter(&xe->drm, &idx); 373 374 mutex_lock(&ggtt->lock); 375 if (bound) 376 xe_ggtt_clear(ggtt, node->base.start, node->base.size); 377 drm_mm_remove_node(&node->base); 378 node->base.size = 0; 379 mutex_unlock(&ggtt->lock); 380 381 if (!bound) 382 goto free_node; 383 384 if (node->invalidate_on_remove) 385 xe_ggtt_invalidate(ggtt); 386 387 drm_dev_exit(idx); 388 389 free_node: 390 xe_ggtt_node_fini(node); 391 } 392 393 static void ggtt_node_remove_work_func(struct work_struct *work) 394 { 395 struct xe_ggtt_node *node = container_of(work, typeof(*node), 396 delayed_removal_work); 397 struct xe_device *xe = tile_to_xe(node->ggtt->tile); 398 399 xe_pm_runtime_get(xe); 400 ggtt_node_remove(node); 401 xe_pm_runtime_put(xe); 402 } 403 404 /** 405 * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 406 * @node: the &xe_ggtt_node to be removed 407 * @invalidate: if node needs invalidation upon removal 408 */ 409 void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 410 { 411 struct xe_ggtt *ggtt; 412 struct xe_device *xe; 413 414 if (!node || !node->ggtt) 415 return; 416 417 ggtt = node->ggtt; 418 xe = tile_to_xe(ggtt->tile); 419 420 node->invalidate_on_remove = invalidate; 421 422 if (xe_pm_runtime_get_if_active(xe)) { 423 ggtt_node_remove(node); 424 xe_pm_runtime_put(xe); 425 } else { 426 queue_work(ggtt->wq, &node->delayed_removal_work); 427 } 428 } 429 430 /** 431 * xe_ggtt_init - Regular non-early GGTT initialization 432 * @ggtt: the &xe_ggtt to be initialized 433 * 434 * Return: 0 on success or a negative error code on failure. 435 */ 436 int xe_ggtt_init(struct xe_ggtt *ggtt) 437 { 438 struct xe_device *xe = tile_to_xe(ggtt->tile); 439 unsigned int flags; 440 int err; 441 442 /* 443 * So we don't need to worry about 64K GGTT layout when dealing with 444 * scratch entries, rather keep the scratch page in system memory on 445 * platforms where 64K pages are needed for VRAM. 446 */ 447 flags = 0; 448 if (ggtt->flags & XE_GGTT_FLAGS_64K) 449 flags |= XE_BO_FLAG_SYSTEM; 450 else 451 flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile); 452 453 ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); 454 if (IS_ERR(ggtt->scratch)) { 455 err = PTR_ERR(ggtt->scratch); 456 goto err; 457 } 458 459 xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); 460 461 xe_ggtt_initial_clear(ggtt); 462 463 return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); 464 err: 465 ggtt->scratch = NULL; 466 return err; 467 } 468 469 static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) 470 { 471 int err; 472 473 if (!gt) 474 return; 475 476 err = xe_tlb_inval_ggtt(>->tlb_inval); 477 xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); 478 } 479 480 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) 481 { 482 struct xe_device *xe = tile_to_xe(ggtt->tile); 483 484 /* 485 * XXX: Barrier for GGTT pages. Unsure exactly why this required but 486 * without this LNL is having issues with the GuC reading scratch page 487 * vs. correct GGTT page. Not particularly a hot code path so blindly 488 * do a mmio read here which results in GuC reading correct GGTT page. 489 */ 490 xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); 491 492 /* Each GT in a tile has its own TLB to cache GGTT lookups */ 493 ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); 494 ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); 495 } 496 497 static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, 498 const struct drm_mm_node *node, const char *description) 499 { 500 char buf[10]; 501 502 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 503 string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); 504 xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", 505 node->start, node->start + node->size, buf, description); 506 } 507 } 508 509 /** 510 * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses 511 * @node: the &xe_ggtt_node to hold reserved GGTT node 512 * @start: the starting GGTT address of the reserved region 513 * @end: then end GGTT address of the reserved region 514 * 515 * To be used in cases where ggtt->lock is already taken. 516 * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. 517 * 518 * Return: 0 on success or a negative error code on failure. 519 */ 520 int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) 521 { 522 struct xe_ggtt *ggtt = node->ggtt; 523 int err; 524 525 xe_tile_assert(ggtt->tile, start < end); 526 xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); 527 xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); 528 xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); 529 lockdep_assert_held(&ggtt->lock); 530 531 node->base.color = 0; 532 node->base.start = start; 533 node->base.size = end - start; 534 535 err = drm_mm_reserve_node(&ggtt->mm, &node->base); 536 537 if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", 538 node->base.start, node->base.start + node->base.size, ERR_PTR(err))) 539 return err; 540 541 xe_ggtt_dump_node(ggtt, &node->base, "balloon"); 542 return 0; 543 } 544 545 /** 546 * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region 547 * @node: the &xe_ggtt_node with reserved GGTT region 548 * 549 * To be used in cases where ggtt->lock is already taken. 550 * See xe_ggtt_node_insert_balloon_locked() for details. 551 */ 552 void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) 553 { 554 if (!xe_ggtt_node_allocated(node)) 555 return; 556 557 lockdep_assert_held(&node->ggtt->lock); 558 559 xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); 560 561 drm_mm_remove_node(&node->base); 562 } 563 564 static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) 565 { 566 struct xe_tile *tile = ggtt->tile; 567 struct xe_device *xe = tile_to_xe(tile); 568 u64 __maybe_unused wopcm = xe_wopcm_size(xe); 569 570 xe_tile_assert(tile, start >= wopcm); 571 xe_tile_assert(tile, start + size < ggtt->size - wopcm); 572 } 573 574 /** 575 * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. 576 * @ggtt: the &xe_ggtt struct instance 577 * @shift: change to the location of area provisioned for current VF 578 * 579 * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected 580 * to represent allocations in range formerly assigned to current VF, before the range changed. 581 * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. 582 * 583 * The function has no ability of failing - because it shifts existing nodes, without 584 * any additional processing. If the nodes were successfully existing at the old address, 585 * they will do the same at the new one. A fail inside this function would indicate that 586 * the list of nodes was either already damaged, or that the shift brings the address range 587 * outside of valid bounds. Both cases justify an assert rather than error code. 588 */ 589 void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) 590 { 591 struct xe_tile *tile __maybe_unused = ggtt->tile; 592 struct drm_mm_node *node, *tmpn; 593 LIST_HEAD(temp_list_head); 594 595 lockdep_assert_held(&ggtt->lock); 596 597 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 598 drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) 599 xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); 600 601 drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { 602 drm_mm_remove_node(node); 603 list_add(&node->node_list, &temp_list_head); 604 } 605 606 list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { 607 list_del(&node->node_list); 608 node->start += shift; 609 drm_mm_reserve_node(&ggtt->mm, node); 610 xe_tile_assert(tile, drm_mm_node_allocated(node)); 611 } 612 } 613 614 /** 615 * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT 616 * @node: the &xe_ggtt_node to be inserted 617 * @size: size of the node 618 * @align: alignment constrain of the node 619 * @mm_flags: flags to control the node behavior 620 * 621 * It cannot be called without first having called xe_ggtt_init() once. 622 * To be used in cases where ggtt->lock is already taken. 623 * 624 * Return: 0 on success or a negative error code on failure. 625 */ 626 int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, 627 u32 size, u32 align, u32 mm_flags) 628 { 629 return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, 630 mm_flags); 631 } 632 633 /** 634 * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT 635 * @node: the &xe_ggtt_node to be inserted 636 * @size: size of the node 637 * @align: alignment constrain of the node 638 * 639 * It cannot be called without first having called xe_ggtt_init() once. 640 * 641 * Return: 0 on success or a negative error code on failure. 642 */ 643 int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) 644 { 645 int ret; 646 647 if (!node || !node->ggtt) 648 return -ENOENT; 649 650 mutex_lock(&node->ggtt->lock); 651 ret = xe_ggtt_node_insert_locked(node, size, align, 652 DRM_MM_INSERT_HIGH); 653 mutex_unlock(&node->ggtt->lock); 654 655 return ret; 656 } 657 658 /** 659 * xe_ggtt_node_init - Initialize %xe_ggtt_node struct 660 * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. 661 * 662 * This function will allocate the struct %xe_ggtt_node and return its pointer. 663 * This struct will then be freed after the node removal upon xe_ggtt_node_remove() 664 * or xe_ggtt_node_remove_balloon_locked(). 665 * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated 666 * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 667 * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT. 668 * 669 * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. 670 **/ 671 struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) 672 { 673 struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); 674 675 if (!node) 676 return ERR_PTR(-ENOMEM); 677 678 INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); 679 node->ggtt = ggtt; 680 681 return node; 682 } 683 684 /** 685 * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct 686 * @node: the &xe_ggtt_node to be freed 687 * 688 * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 689 * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, 690 * this function needs to be called to free the %xe_ggtt_node struct 691 **/ 692 void xe_ggtt_node_fini(struct xe_ggtt_node *node) 693 { 694 kfree(node); 695 } 696 697 /** 698 * xe_ggtt_node_allocated - Check if node is allocated in GGTT 699 * @node: the &xe_ggtt_node to be inspected 700 * 701 * Return: True if allocated, False otherwise. 702 */ 703 bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) 704 { 705 if (!node || !node->ggtt) 706 return false; 707 708 return drm_mm_node_allocated(&node->base); 709 } 710 711 /** 712 * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node. 713 * @node: the &xe_ggtt_node 714 * 715 * Return: GGTT node page table entries size in bytes. 716 */ 717 size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node) 718 { 719 if (!node) 720 return 0; 721 722 return node->base.size / XE_PAGE_SIZE * sizeof(u64); 723 } 724 725 /** 726 * xe_ggtt_map_bo - Map the BO into GGTT 727 * @ggtt: the &xe_ggtt where node will be mapped 728 * @node: the &xe_ggtt_node where this BO is mapped 729 * @bo: the &xe_bo to be mapped 730 * @pat_index: Which pat_index to use. 731 */ 732 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 733 struct xe_bo *bo, u16 pat_index) 734 { 735 736 u64 start, pte, end; 737 struct xe_res_cursor cur; 738 739 if (XE_WARN_ON(!node)) 740 return; 741 742 start = node->base.start; 743 end = start + xe_bo_size(bo); 744 745 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 746 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { 747 xe_assert(xe_bo_device(bo), bo->ttm.ttm); 748 749 for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); 750 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 751 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 752 pte | xe_res_dma(&cur)); 753 } else { 754 /* Prepend GPU offset */ 755 pte |= vram_region_gpu_offset(bo->ttm.resource); 756 757 for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); 758 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 759 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 760 pte + cur.start); 761 } 762 } 763 764 /** 765 * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT 766 * @ggtt: the &xe_ggtt where node will be mapped 767 * @bo: the &xe_bo to be mapped 768 * 769 * This is used to restore a GGTT mapping after suspend. 770 */ 771 void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) 772 { 773 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 774 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 775 776 mutex_lock(&ggtt->lock); 777 xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index); 778 mutex_unlock(&ggtt->lock); 779 } 780 781 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 782 u64 start, u64 end, struct drm_exec *exec) 783 { 784 u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; 785 u8 tile_id = ggtt->tile->id; 786 int err; 787 788 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 789 alignment = SZ_64K; 790 791 if (XE_WARN_ON(bo->ggtt_node[tile_id])) { 792 /* Someone's already inserted this BO in the GGTT */ 793 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 794 return 0; 795 } 796 797 err = xe_bo_validate(bo, NULL, false, exec); 798 if (err) 799 return err; 800 801 xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 802 803 bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); 804 if (IS_ERR(bo->ggtt_node[tile_id])) { 805 err = PTR_ERR(bo->ggtt_node[tile_id]); 806 bo->ggtt_node[tile_id] = NULL; 807 goto out; 808 } 809 810 mutex_lock(&ggtt->lock); 811 err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, 812 xe_bo_size(bo), alignment, 0, start, end, 0); 813 if (err) { 814 xe_ggtt_node_fini(bo->ggtt_node[tile_id]); 815 bo->ggtt_node[tile_id] = NULL; 816 } else { 817 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 818 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 819 820 xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index); 821 } 822 mutex_unlock(&ggtt->lock); 823 824 if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) 825 xe_ggtt_invalidate(ggtt); 826 827 out: 828 xe_pm_runtime_put(tile_to_xe(ggtt->tile)); 829 830 return err; 831 } 832 833 /** 834 * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space 835 * @ggtt: the &xe_ggtt where bo will be inserted 836 * @bo: the &xe_bo to be inserted 837 * @start: address where it will be inserted 838 * @end: end of the range where it will be inserted 839 * @exec: The drm_exec transaction to use for exhaustive eviction. 840 * 841 * Return: 0 on success or a negative error code on failure. 842 */ 843 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 844 u64 start, u64 end, struct drm_exec *exec) 845 { 846 return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); 847 } 848 849 /** 850 * xe_ggtt_insert_bo - Insert BO into GGTT 851 * @ggtt: the &xe_ggtt where bo will be inserted 852 * @bo: the &xe_bo to be inserted 853 * @exec: The drm_exec transaction to use for exhaustive eviction. 854 * 855 * Return: 0 on success or a negative error code on failure. 856 */ 857 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, 858 struct drm_exec *exec) 859 { 860 return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); 861 } 862 863 /** 864 * xe_ggtt_remove_bo - Remove a BO from the GGTT 865 * @ggtt: the &xe_ggtt where node will be removed 866 * @bo: the &xe_bo to be removed 867 */ 868 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 869 { 870 u8 tile_id = ggtt->tile->id; 871 872 if (XE_WARN_ON(!bo->ggtt_node[tile_id])) 873 return; 874 875 /* This BO is not currently in the GGTT */ 876 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 877 878 xe_ggtt_node_remove(bo->ggtt_node[tile_id], 879 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); 880 } 881 882 /** 883 * xe_ggtt_largest_hole - Largest GGTT hole 884 * @ggtt: the &xe_ggtt that will be inspected 885 * @alignment: minimum alignment 886 * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare 887 * 888 * Return: size of the largest continuous GGTT region 889 */ 890 u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) 891 { 892 const struct drm_mm *mm = &ggtt->mm; 893 const struct drm_mm_node *entry; 894 u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); 895 u64 hole_start, hole_end, hole_size; 896 u64 max_hole = 0; 897 898 mutex_lock(&ggtt->lock); 899 900 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 901 hole_start = max(hole_start, hole_min_start); 902 hole_start = ALIGN(hole_start, alignment); 903 hole_end = ALIGN_DOWN(hole_end, alignment); 904 if (hole_start >= hole_end) 905 continue; 906 hole_size = hole_end - hole_start; 907 if (spare) 908 *spare -= min3(*spare, hole_size, max_hole); 909 max_hole = max(max_hole, hole_size); 910 } 911 912 mutex_unlock(&ggtt->lock); 913 914 return max_hole; 915 } 916 917 #ifdef CONFIG_PCI_IOV 918 static u64 xe_encode_vfid_pte(u16 vfid) 919 { 920 return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; 921 } 922 923 static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) 924 { 925 u64 start = node->start; 926 u64 size = node->size; 927 u64 end = start + size - 1; 928 u64 pte = xe_encode_vfid_pte(vfid); 929 930 lockdep_assert_held(&ggtt->lock); 931 932 if (!drm_mm_node_allocated(node)) 933 return; 934 935 while (start < end) { 936 ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); 937 start += XE_PAGE_SIZE; 938 } 939 940 xe_ggtt_invalidate(ggtt); 941 } 942 943 /** 944 * xe_ggtt_assign - assign a GGTT region to the VF 945 * @node: the &xe_ggtt_node to update 946 * @vfid: the VF identifier 947 * 948 * This function is used by the PF driver to assign a GGTT region to the VF. 949 * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some 950 * platforms VFs can't modify that either. 951 */ 952 void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) 953 { 954 mutex_lock(&node->ggtt->lock); 955 xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); 956 mutex_unlock(&node->ggtt->lock); 957 } 958 959 /** 960 * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer. 961 * @node: the &xe_ggtt_node to be saved 962 * @dst: destination buffer 963 * @size: destination buffer size in bytes 964 * @vfid: VF identifier 965 * 966 * Return: 0 on success or a negative error code on failure. 967 */ 968 int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid) 969 { 970 struct xe_ggtt *ggtt; 971 u64 start, end; 972 u64 *buf = dst; 973 u64 pte; 974 975 if (!node) 976 return -ENOENT; 977 978 guard(mutex)(&node->ggtt->lock); 979 980 if (xe_ggtt_node_pt_size(node) != size) 981 return -EINVAL; 982 983 ggtt = node->ggtt; 984 start = node->base.start; 985 end = start + node->base.size - 1; 986 987 while (start < end) { 988 pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start); 989 if (vfid != u64_get_bits(pte, GGTT_PTE_VFID)) 990 return -EPERM; 991 992 *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID); 993 start += XE_PAGE_SIZE; 994 } 995 996 return 0; 997 } 998 999 /** 1000 * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer. 1001 * @node: the &xe_ggtt_node to be loaded 1002 * @src: source buffer 1003 * @size: source buffer size in bytes 1004 * @vfid: VF identifier 1005 * 1006 * Return: 0 on success or a negative error code on failure. 1007 */ 1008 int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid) 1009 { 1010 u64 vfid_pte = xe_encode_vfid_pte(vfid); 1011 const u64 *buf = src; 1012 struct xe_ggtt *ggtt; 1013 u64 start, end; 1014 1015 if (!node) 1016 return -ENOENT; 1017 1018 guard(mutex)(&node->ggtt->lock); 1019 1020 if (xe_ggtt_node_pt_size(node) != size) 1021 return -EINVAL; 1022 1023 ggtt = node->ggtt; 1024 start = node->base.start; 1025 end = start + node->base.size - 1; 1026 1027 while (start < end) { 1028 vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID); 1029 ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte); 1030 start += XE_PAGE_SIZE; 1031 } 1032 xe_ggtt_invalidate(ggtt); 1033 1034 return 0; 1035 } 1036 1037 #endif 1038 1039 /** 1040 * xe_ggtt_dump - Dump GGTT for debug 1041 * @ggtt: the &xe_ggtt to be dumped 1042 * @p: the &drm_mm_printer helper handle to be used to dump the information 1043 * 1044 * Return: 0 on success or a negative error code on failure. 1045 */ 1046 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) 1047 { 1048 int err; 1049 1050 err = mutex_lock_interruptible(&ggtt->lock); 1051 if (err) 1052 return err; 1053 1054 drm_mm_print(&ggtt->mm, p); 1055 mutex_unlock(&ggtt->lock); 1056 return err; 1057 } 1058 1059 /** 1060 * xe_ggtt_print_holes - Print holes 1061 * @ggtt: the &xe_ggtt to be inspected 1062 * @alignment: min alignment 1063 * @p: the &drm_printer 1064 * 1065 * Print GGTT ranges that are available and return total size available. 1066 * 1067 * Return: Total available size. 1068 */ 1069 u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) 1070 { 1071 const struct drm_mm *mm = &ggtt->mm; 1072 const struct drm_mm_node *entry; 1073 u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); 1074 u64 hole_start, hole_end, hole_size; 1075 u64 total = 0; 1076 char buf[10]; 1077 1078 mutex_lock(&ggtt->lock); 1079 1080 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 1081 hole_start = max(hole_start, hole_min_start); 1082 hole_start = ALIGN(hole_start, alignment); 1083 hole_end = ALIGN_DOWN(hole_end, alignment); 1084 if (hole_start >= hole_end) 1085 continue; 1086 hole_size = hole_end - hole_start; 1087 total += hole_size; 1088 1089 string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 1090 drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", 1091 hole_start, hole_end - 1, buf); 1092 } 1093 1094 mutex_unlock(&ggtt->lock); 1095 1096 return total; 1097 } 1098 1099 /** 1100 * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO 1101 * @ggtt: &xe_ggtt 1102 * @bo: &xe_bo 1103 * @pat_index: The pat_index for the PTE. 1104 * 1105 * This function returns the pte_flags for a given BO, without address. 1106 * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. 1107 */ 1108 u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, 1109 struct xe_bo *bo, u16 pat_index) 1110 { 1111 return ggtt->pt_ops->pte_encode_flags(bo, pat_index); 1112 } 1113 1114 /** 1115 * xe_ggtt_read_pte - Read a PTE from the GGTT 1116 * @ggtt: &xe_ggtt 1117 * @offset: the offset for which the mapping should be read. 1118 * 1119 * Used by testcases, and by display reading out an inherited bios FB. 1120 */ 1121 u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) 1122 { 1123 return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); 1124 } 1125