1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_ggtt.h" 7 8 #include <kunit/visibility.h> 9 #include <linux/fault-inject.h> 10 #include <linux/io-64-nonatomic-lo-hi.h> 11 #include <linux/sizes.h> 12 13 #include <drm/drm_drv.h> 14 #include <drm/drm_managed.h> 15 #include <drm/intel/i915_drm.h> 16 #include <generated/xe_wa_oob.h> 17 18 #include "regs/xe_gt_regs.h" 19 #include "regs/xe_gtt_defs.h" 20 #include "regs/xe_regs.h" 21 #include "xe_assert.h" 22 #include "xe_bo.h" 23 #include "xe_device.h" 24 #include "xe_gt.h" 25 #include "xe_gt_printk.h" 26 #include "xe_map.h" 27 #include "xe_mmio.h" 28 #include "xe_pm.h" 29 #include "xe_res_cursor.h" 30 #include "xe_sriov.h" 31 #include "xe_tile_printk.h" 32 #include "xe_tile_sriov_vf.h" 33 #include "xe_tlb_inval.h" 34 #include "xe_wa.h" 35 #include "xe_wopcm.h" 36 37 /** 38 * DOC: Global Graphics Translation Table (GGTT) 39 * 40 * Xe GGTT implements the support for a Global Virtual Address space that is used 41 * for resources that are accessible to privileged (i.e. kernel-mode) processes, 42 * and not tied to a specific user-level process. For example, the Graphics 43 * micro-Controller (GuC) and Display Engine (if present) utilize this Global 44 * address space. 45 * 46 * The Global GTT (GGTT) translates from the Global virtual address to a physical 47 * address that can be accessed by HW. The GGTT is a flat, single-level table. 48 * 49 * Xe implements a simplified version of the GGTT specifically managing only a 50 * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) 51 * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to 52 * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space 53 * is limited on both ends of the GGTT, because the GuC shim HW redirects 54 * accesses to those addresses to other HW areas instead of going through the 55 * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, 56 * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things 57 * simple, instead of checking each object to see if they are accessed by GuC or 58 * not, we just exclude those areas from the allocator. Additionally, to simplify 59 * the driver load, we use the maximum WOPCM size in this logic instead of the 60 * programmed one, so we don't need to wait until the actual size to be 61 * programmed is determined (which requires FW fetch) before initializing the 62 * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs 63 * depending on the platform) but we can live with this. Another benefit of this 64 * is the GuC bootrom can't access anything below the WOPCM max size so anything 65 * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT 66 * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max 67 * give us the correct placement for free. 68 */ 69 70 static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 71 { 72 u64 pte = XE_PAGE_PRESENT; 73 74 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 75 pte |= XE_GGTT_PTE_DM; 76 77 return pte; 78 } 79 80 static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index) 81 { 82 struct xe_device *xe = xe_bo_device(bo); 83 u64 pte; 84 85 pte = xelp_ggtt_pte_flags(bo, pat_index); 86 87 xe_assert(xe, pat_index <= 3); 88 89 if (pat_index & BIT(0)) 90 pte |= XELPG_GGTT_PTE_PAT0; 91 92 if (pat_index & BIT(1)) 93 pte |= XELPG_GGTT_PTE_PAT1; 94 95 return pte; 96 } 97 98 static unsigned int probe_gsm_size(struct pci_dev *pdev) 99 { 100 u16 gmch_ctl, ggms; 101 102 pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); 103 ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; 104 return ggms ? SZ_1M << ggms : 0; 105 } 106 107 static void ggtt_update_access_counter(struct xe_ggtt *ggtt) 108 { 109 struct xe_tile *tile = ggtt->tile; 110 struct xe_gt *affected_gt = XE_GT_WA(tile->primary_gt, 22019338487) ? 111 tile->primary_gt : tile->media_gt; 112 struct xe_mmio *mmio = &affected_gt->mmio; 113 u32 max_gtt_writes = XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63; 114 /* 115 * Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit 116 * to wait for completion of prior GTT writes before letting this through. 117 * This needs to be done for all GGTT writes originating from the CPU. 118 */ 119 lockdep_assert_held(&ggtt->lock); 120 121 if ((++ggtt->access_count % max_gtt_writes) == 0) { 122 xe_mmio_write32(mmio, GMD_ID, 0x0); 123 ggtt->access_count = 0; 124 } 125 } 126 127 static void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 128 { 129 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 130 xe_tile_assert(ggtt->tile, addr < ggtt->size); 131 132 writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); 133 } 134 135 static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte) 136 { 137 xe_ggtt_set_pte(ggtt, addr, pte); 138 ggtt_update_access_counter(ggtt); 139 } 140 141 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 142 { 143 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 144 u64 end = start + size - 1; 145 u64 scratch_pte; 146 147 xe_tile_assert(ggtt->tile, start < end); 148 149 if (ggtt->scratch) 150 scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) | 151 ggtt->pt_ops->pte_encode_flags(ggtt->scratch, 152 pat_index); 153 else 154 scratch_pte = 0; 155 156 while (start < end) { 157 ggtt->pt_ops->ggtt_set_pte(ggtt, start, scratch_pte); 158 start += XE_PAGE_SIZE; 159 } 160 } 161 162 /** 163 * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile 164 * @tile: &xe_tile 165 * 166 * Allocates a &xe_ggtt for a given tile. 167 * 168 * Return: &xe_ggtt on success, or NULL when out of memory. 169 */ 170 struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile) 171 { 172 struct xe_ggtt *ggtt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*ggtt), GFP_KERNEL); 173 if (ggtt) 174 ggtt->tile = tile; 175 return ggtt; 176 } 177 178 static void ggtt_fini_early(struct drm_device *drm, void *arg) 179 { 180 struct xe_ggtt *ggtt = arg; 181 182 destroy_workqueue(ggtt->wq); 183 mutex_destroy(&ggtt->lock); 184 drm_mm_takedown(&ggtt->mm); 185 } 186 187 static void ggtt_fini(void *arg) 188 { 189 struct xe_ggtt *ggtt = arg; 190 191 ggtt->scratch = NULL; 192 } 193 194 #ifdef CONFIG_LOCKDEP 195 void xe_ggtt_might_lock(struct xe_ggtt *ggtt) 196 { 197 might_lock(&ggtt->lock); 198 } 199 #endif 200 201 static void primelockdep(struct xe_ggtt *ggtt) 202 { 203 if (!IS_ENABLED(CONFIG_LOCKDEP)) 204 return; 205 206 fs_reclaim_acquire(GFP_KERNEL); 207 might_lock(&ggtt->lock); 208 fs_reclaim_release(GFP_KERNEL); 209 } 210 211 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 212 .pte_encode_flags = xelp_ggtt_pte_flags, 213 .ggtt_set_pte = xe_ggtt_set_pte, 214 }; 215 216 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 217 .pte_encode_flags = xelpg_ggtt_pte_flags, 218 .ggtt_set_pte = xe_ggtt_set_pte, 219 }; 220 221 static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { 222 .pte_encode_flags = xelpg_ggtt_pte_flags, 223 .ggtt_set_pte = xe_ggtt_set_pte_and_flush, 224 }; 225 226 static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved) 227 { 228 drm_mm_init(&ggtt->mm, reserved, 229 ggtt->size - reserved); 230 mutex_init(&ggtt->lock); 231 primelockdep(ggtt); 232 } 233 234 int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) 235 { 236 ggtt->size = size; 237 __xe_ggtt_init_early(ggtt, reserved); 238 return 0; 239 } 240 EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); 241 242 static void dev_fini_ggtt(void *arg) 243 { 244 struct xe_ggtt *ggtt = arg; 245 246 drain_workqueue(ggtt->wq); 247 } 248 249 /** 250 * xe_ggtt_init_early - Early GGTT initialization 251 * @ggtt: the &xe_ggtt to be initialized 252 * 253 * It allows to create new mappings usable by the GuC. 254 * Mappings are not usable by the HW engines, as it doesn't have scratch nor 255 * initial clear done to it yet. That will happen in the regular, non-early 256 * GGTT initialization. 257 * 258 * Return: 0 on success or a negative error code on failure. 259 */ 260 int xe_ggtt_init_early(struct xe_ggtt *ggtt) 261 { 262 struct xe_device *xe = tile_to_xe(ggtt->tile); 263 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 264 unsigned int gsm_size; 265 int err; 266 267 if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250) 268 gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ 269 else 270 gsm_size = probe_gsm_size(pdev); 271 272 if (gsm_size == 0) { 273 xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n"); 274 return -ENOMEM; 275 } 276 277 ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; 278 ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; 279 280 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 281 ggtt->flags |= XE_GGTT_FLAGS_64K; 282 283 if (ggtt->size > GUC_GGTT_TOP) 284 ggtt->size = GUC_GGTT_TOP; 285 286 if (GRAPHICS_VERx100(xe) >= 1270) 287 ggtt->pt_ops = (ggtt->tile->media_gt && 288 XE_GT_WA(ggtt->tile->media_gt, 22019338487)) || 289 XE_GT_WA(ggtt->tile->primary_gt, 22019338487) ? 290 &xelpg_pt_wa_ops : &xelpg_pt_ops; 291 else 292 ggtt->pt_ops = &xelp_pt_ops; 293 294 ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); 295 if (!ggtt->wq) 296 return -ENOMEM; 297 298 __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); 299 300 err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 301 if (err) 302 return err; 303 304 err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); 305 if (err) 306 return err; 307 308 if (IS_SRIOV_VF(xe)) { 309 err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); 310 if (err) 311 return err; 312 } 313 314 return 0; 315 } 316 ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */ 317 318 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); 319 320 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) 321 { 322 struct drm_mm_node *hole; 323 u64 start, end; 324 325 /* Display may have allocated inside ggtt, so be careful with clearing here */ 326 mutex_lock(&ggtt->lock); 327 drm_mm_for_each_hole(hole, &ggtt->mm, start, end) 328 xe_ggtt_clear(ggtt, start, end - start); 329 330 xe_ggtt_invalidate(ggtt); 331 mutex_unlock(&ggtt->lock); 332 } 333 334 static void ggtt_node_remove(struct xe_ggtt_node *node) 335 { 336 struct xe_ggtt *ggtt = node->ggtt; 337 struct xe_device *xe = tile_to_xe(ggtt->tile); 338 bool bound; 339 int idx; 340 341 bound = drm_dev_enter(&xe->drm, &idx); 342 343 mutex_lock(&ggtt->lock); 344 if (bound) 345 xe_ggtt_clear(ggtt, node->base.start, node->base.size); 346 drm_mm_remove_node(&node->base); 347 node->base.size = 0; 348 mutex_unlock(&ggtt->lock); 349 350 if (!bound) 351 goto free_node; 352 353 if (node->invalidate_on_remove) 354 xe_ggtt_invalidate(ggtt); 355 356 drm_dev_exit(idx); 357 358 free_node: 359 xe_ggtt_node_fini(node); 360 } 361 362 static void ggtt_node_remove_work_func(struct work_struct *work) 363 { 364 struct xe_ggtt_node *node = container_of(work, typeof(*node), 365 delayed_removal_work); 366 struct xe_device *xe = tile_to_xe(node->ggtt->tile); 367 368 xe_pm_runtime_get(xe); 369 ggtt_node_remove(node); 370 xe_pm_runtime_put(xe); 371 } 372 373 /** 374 * xe_ggtt_node_remove - Remove a &xe_ggtt_node from the GGTT 375 * @node: the &xe_ggtt_node to be removed 376 * @invalidate: if node needs invalidation upon removal 377 */ 378 void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate) 379 { 380 struct xe_ggtt *ggtt; 381 struct xe_device *xe; 382 383 if (!node || !node->ggtt) 384 return; 385 386 ggtt = node->ggtt; 387 xe = tile_to_xe(ggtt->tile); 388 389 node->invalidate_on_remove = invalidate; 390 391 if (xe_pm_runtime_get_if_active(xe)) { 392 ggtt_node_remove(node); 393 xe_pm_runtime_put(xe); 394 } else { 395 queue_work(ggtt->wq, &node->delayed_removal_work); 396 } 397 } 398 399 /** 400 * xe_ggtt_init - Regular non-early GGTT initialization 401 * @ggtt: the &xe_ggtt to be initialized 402 * 403 * Return: 0 on success or a negative error code on failure. 404 */ 405 int xe_ggtt_init(struct xe_ggtt *ggtt) 406 { 407 struct xe_device *xe = tile_to_xe(ggtt->tile); 408 unsigned int flags; 409 int err; 410 411 /* 412 * So we don't need to worry about 64K GGTT layout when dealing with 413 * scratch entries, rather keep the scratch page in system memory on 414 * platforms where 64K pages are needed for VRAM. 415 */ 416 flags = 0; 417 if (ggtt->flags & XE_GGTT_FLAGS_64K) 418 flags |= XE_BO_FLAG_SYSTEM; 419 else 420 flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile); 421 422 ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); 423 if (IS_ERR(ggtt->scratch)) { 424 err = PTR_ERR(ggtt->scratch); 425 goto err; 426 } 427 428 xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); 429 430 xe_ggtt_initial_clear(ggtt); 431 432 return devm_add_action_or_reset(xe->drm.dev, ggtt_fini, ggtt); 433 err: 434 ggtt->scratch = NULL; 435 return err; 436 } 437 438 static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) 439 { 440 int err; 441 442 if (!gt) 443 return; 444 445 err = xe_tlb_inval_ggtt(>->tlb_inval); 446 xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err)); 447 } 448 449 static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) 450 { 451 struct xe_device *xe = tile_to_xe(ggtt->tile); 452 453 /* 454 * XXX: Barrier for GGTT pages. Unsure exactly why this required but 455 * without this LNL is having issues with the GuC reading scratch page 456 * vs. correct GGTT page. Not particularly a hot code path so blindly 457 * do a mmio read here which results in GuC reading correct GGTT page. 458 */ 459 xe_mmio_read32(xe_root_tile_mmio(xe), VF_CAP_REG); 460 461 /* Each GT in a tile has its own TLB to cache GGTT lookups */ 462 ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); 463 ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); 464 } 465 466 static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, 467 const struct drm_mm_node *node, const char *description) 468 { 469 char buf[10]; 470 471 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { 472 string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); 473 xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n", 474 node->start, node->start + node->size, buf, description); 475 } 476 } 477 478 /** 479 * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses 480 * @node: the &xe_ggtt_node to hold reserved GGTT node 481 * @start: the starting GGTT address of the reserved region 482 * @end: then end GGTT address of the reserved region 483 * 484 * To be used in cases where ggtt->lock is already taken. 485 * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node. 486 * 487 * Return: 0 on success or a negative error code on failure. 488 */ 489 int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end) 490 { 491 struct xe_ggtt *ggtt = node->ggtt; 492 int err; 493 494 xe_tile_assert(ggtt->tile, start < end); 495 xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); 496 xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); 497 xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base)); 498 lockdep_assert_held(&ggtt->lock); 499 500 node->base.color = 0; 501 node->base.start = start; 502 node->base.size = end - start; 503 504 err = drm_mm_reserve_node(&ggtt->mm, &node->base); 505 506 if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n", 507 node->base.start, node->base.start + node->base.size, ERR_PTR(err))) 508 return err; 509 510 xe_ggtt_dump_node(ggtt, &node->base, "balloon"); 511 return 0; 512 } 513 514 /** 515 * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region 516 * @node: the &xe_ggtt_node with reserved GGTT region 517 * 518 * To be used in cases where ggtt->lock is already taken. 519 * See xe_ggtt_node_insert_balloon_locked() for details. 520 */ 521 void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node) 522 { 523 if (!xe_ggtt_node_allocated(node)) 524 return; 525 526 lockdep_assert_held(&node->ggtt->lock); 527 528 xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon"); 529 530 drm_mm_remove_node(&node->base); 531 } 532 533 static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size) 534 { 535 struct xe_tile *tile = ggtt->tile; 536 struct xe_device *xe = tile_to_xe(tile); 537 u64 __maybe_unused wopcm = xe_wopcm_size(xe); 538 539 xe_tile_assert(tile, start >= wopcm); 540 xe_tile_assert(tile, start + size < ggtt->size - wopcm); 541 } 542 543 /** 544 * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range. 545 * @ggtt: the &xe_ggtt struct instance 546 * @shift: change to the location of area provisioned for current VF 547 * 548 * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected 549 * to represent allocations in range formerly assigned to current VF, before the range changed. 550 * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets. 551 * 552 * The function has no ability of failing - because it shifts existing nodes, without 553 * any additional processing. If the nodes were successfully existing at the old address, 554 * they will do the same at the new one. A fail inside this function would indicate that 555 * the list of nodes was either already damaged, or that the shift brings the address range 556 * outside of valid bounds. Both cases justify an assert rather than error code. 557 */ 558 void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift) 559 { 560 struct xe_tile *tile __maybe_unused = ggtt->tile; 561 struct drm_mm_node *node, *tmpn; 562 LIST_HEAD(temp_list_head); 563 564 lockdep_assert_held(&ggtt->lock); 565 566 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) 567 drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) 568 xe_ggtt_assert_fit(ggtt, node->start + shift, node->size); 569 570 drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) { 571 drm_mm_remove_node(node); 572 list_add(&node->node_list, &temp_list_head); 573 } 574 575 list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) { 576 list_del(&node->node_list); 577 node->start += shift; 578 drm_mm_reserve_node(&ggtt->mm, node); 579 xe_tile_assert(tile, drm_mm_node_allocated(node)); 580 } 581 } 582 583 /** 584 * xe_ggtt_node_insert_locked - Locked version to insert a &xe_ggtt_node into the GGTT 585 * @node: the &xe_ggtt_node to be inserted 586 * @size: size of the node 587 * @align: alignment constrain of the node 588 * @mm_flags: flags to control the node behavior 589 * 590 * It cannot be called without first having called xe_ggtt_init() once. 591 * To be used in cases where ggtt->lock is already taken. 592 * 593 * Return: 0 on success or a negative error code on failure. 594 */ 595 int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node, 596 u32 size, u32 align, u32 mm_flags) 597 { 598 return drm_mm_insert_node_generic(&node->ggtt->mm, &node->base, size, align, 0, 599 mm_flags); 600 } 601 602 /** 603 * xe_ggtt_node_insert - Insert a &xe_ggtt_node into the GGTT 604 * @node: the &xe_ggtt_node to be inserted 605 * @size: size of the node 606 * @align: alignment constrain of the node 607 * 608 * It cannot be called without first having called xe_ggtt_init() once. 609 * 610 * Return: 0 on success or a negative error code on failure. 611 */ 612 int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align) 613 { 614 int ret; 615 616 if (!node || !node->ggtt) 617 return -ENOENT; 618 619 mutex_lock(&node->ggtt->lock); 620 ret = xe_ggtt_node_insert_locked(node, size, align, 621 DRM_MM_INSERT_HIGH); 622 mutex_unlock(&node->ggtt->lock); 623 624 return ret; 625 } 626 627 /** 628 * xe_ggtt_node_init - Initialize %xe_ggtt_node struct 629 * @ggtt: the &xe_ggtt where the new node will later be inserted/reserved. 630 * 631 * This function will allocate the struct %xe_ggtt_node and return its pointer. 632 * This struct will then be freed after the node removal upon xe_ggtt_node_remove() 633 * or xe_ggtt_node_remove_balloon_locked(). 634 * Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated 635 * in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 636 * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT. 637 * 638 * Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise. 639 **/ 640 struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt) 641 { 642 struct xe_ggtt_node *node = kzalloc(sizeof(*node), GFP_NOFS); 643 644 if (!node) 645 return ERR_PTR(-ENOMEM); 646 647 INIT_WORK(&node->delayed_removal_work, ggtt_node_remove_work_func); 648 node->ggtt = ggtt; 649 650 return node; 651 } 652 653 /** 654 * xe_ggtt_node_fini - Forcebly finalize %xe_ggtt_node struct 655 * @node: the &xe_ggtt_node to be freed 656 * 657 * If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(), 658 * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then, 659 * this function needs to be called to free the %xe_ggtt_node struct 660 **/ 661 void xe_ggtt_node_fini(struct xe_ggtt_node *node) 662 { 663 kfree(node); 664 } 665 666 /** 667 * xe_ggtt_node_allocated - Check if node is allocated in GGTT 668 * @node: the &xe_ggtt_node to be inspected 669 * 670 * Return: True if allocated, False otherwise. 671 */ 672 bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node) 673 { 674 if (!node || !node->ggtt) 675 return false; 676 677 return drm_mm_node_allocated(&node->base); 678 } 679 680 /** 681 * xe_ggtt_map_bo - Map the BO into GGTT 682 * @ggtt: the &xe_ggtt where node will be mapped 683 * @node: the &xe_ggtt_node where this BO is mapped 684 * @bo: the &xe_bo to be mapped 685 * @pat_index: Which pat_index to use. 686 */ 687 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, 688 struct xe_bo *bo, u16 pat_index) 689 { 690 691 u64 start, pte, end; 692 struct xe_res_cursor cur; 693 694 if (XE_WARN_ON(!node)) 695 return; 696 697 start = node->base.start; 698 end = start + xe_bo_size(bo); 699 700 pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); 701 if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { 702 xe_assert(xe_bo_device(bo), bo->ttm.ttm); 703 704 for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); 705 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 706 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 707 pte | xe_res_dma(&cur)); 708 } else { 709 /* Prepend GPU offset */ 710 pte |= vram_region_gpu_offset(bo->ttm.resource); 711 712 for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); 713 cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) 714 ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, 715 pte + cur.start); 716 } 717 } 718 719 /** 720 * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT 721 * @ggtt: the &xe_ggtt where node will be mapped 722 * @bo: the &xe_bo to be mapped 723 * 724 * This is used to restore a GGTT mapping after suspend. 725 */ 726 void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo) 727 { 728 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 729 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 730 731 mutex_lock(&ggtt->lock); 732 xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index); 733 mutex_unlock(&ggtt->lock); 734 } 735 736 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 737 u64 start, u64 end, struct drm_exec *exec) 738 { 739 u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; 740 u8 tile_id = ggtt->tile->id; 741 int err; 742 743 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 744 alignment = SZ_64K; 745 746 if (XE_WARN_ON(bo->ggtt_node[tile_id])) { 747 /* Someone's already inserted this BO in the GGTT */ 748 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 749 return 0; 750 } 751 752 err = xe_bo_validate(bo, NULL, false, exec); 753 if (err) 754 return err; 755 756 xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); 757 758 bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); 759 if (IS_ERR(bo->ggtt_node[tile_id])) { 760 err = PTR_ERR(bo->ggtt_node[tile_id]); 761 bo->ggtt_node[tile_id] = NULL; 762 goto out; 763 } 764 765 mutex_lock(&ggtt->lock); 766 err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, 767 xe_bo_size(bo), alignment, 0, start, end, 0); 768 if (err) { 769 xe_ggtt_node_fini(bo->ggtt_node[tile_id]); 770 bo->ggtt_node[tile_id] = NULL; 771 } else { 772 u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; 773 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; 774 775 xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index); 776 } 777 mutex_unlock(&ggtt->lock); 778 779 if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) 780 xe_ggtt_invalidate(ggtt); 781 782 out: 783 xe_pm_runtime_put(tile_to_xe(ggtt->tile)); 784 785 return err; 786 } 787 788 /** 789 * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space 790 * @ggtt: the &xe_ggtt where bo will be inserted 791 * @bo: the &xe_bo to be inserted 792 * @start: address where it will be inserted 793 * @end: end of the range where it will be inserted 794 * @exec: The drm_exec transaction to use for exhaustive eviction. 795 * 796 * Return: 0 on success or a negative error code on failure. 797 */ 798 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 799 u64 start, u64 end, struct drm_exec *exec) 800 { 801 return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec); 802 } 803 804 /** 805 * xe_ggtt_insert_bo - Insert BO into GGTT 806 * @ggtt: the &xe_ggtt where bo will be inserted 807 * @bo: the &xe_bo to be inserted 808 * @exec: The drm_exec transaction to use for exhaustive eviction. 809 * 810 * Return: 0 on success or a negative error code on failure. 811 */ 812 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, 813 struct drm_exec *exec) 814 { 815 return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec); 816 } 817 818 /** 819 * xe_ggtt_remove_bo - Remove a BO from the GGTT 820 * @ggtt: the &xe_ggtt where node will be removed 821 * @bo: the &xe_bo to be removed 822 */ 823 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 824 { 825 u8 tile_id = ggtt->tile->id; 826 827 if (XE_WARN_ON(!bo->ggtt_node[tile_id])) 828 return; 829 830 /* This BO is not currently in the GGTT */ 831 xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); 832 833 xe_ggtt_node_remove(bo->ggtt_node[tile_id], 834 bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); 835 } 836 837 /** 838 * xe_ggtt_largest_hole - Largest GGTT hole 839 * @ggtt: the &xe_ggtt that will be inspected 840 * @alignment: minimum alignment 841 * @spare: If not NULL: in: desired memory size to be spared / out: Adjusted possible spare 842 * 843 * Return: size of the largest continuous GGTT region 844 */ 845 u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare) 846 { 847 const struct drm_mm *mm = &ggtt->mm; 848 const struct drm_mm_node *entry; 849 u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); 850 u64 hole_start, hole_end, hole_size; 851 u64 max_hole = 0; 852 853 mutex_lock(&ggtt->lock); 854 855 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 856 hole_start = max(hole_start, hole_min_start); 857 hole_start = ALIGN(hole_start, alignment); 858 hole_end = ALIGN_DOWN(hole_end, alignment); 859 if (hole_start >= hole_end) 860 continue; 861 hole_size = hole_end - hole_start; 862 if (spare) 863 *spare -= min3(*spare, hole_size, max_hole); 864 max_hole = max(max_hole, hole_size); 865 } 866 867 mutex_unlock(&ggtt->lock); 868 869 return max_hole; 870 } 871 872 #ifdef CONFIG_PCI_IOV 873 static u64 xe_encode_vfid_pte(u16 vfid) 874 { 875 return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; 876 } 877 878 static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) 879 { 880 u64 start = node->start; 881 u64 size = node->size; 882 u64 end = start + size - 1; 883 u64 pte = xe_encode_vfid_pte(vfid); 884 885 lockdep_assert_held(&ggtt->lock); 886 887 if (!drm_mm_node_allocated(node)) 888 return; 889 890 while (start < end) { 891 ggtt->pt_ops->ggtt_set_pte(ggtt, start, pte); 892 start += XE_PAGE_SIZE; 893 } 894 895 xe_ggtt_invalidate(ggtt); 896 } 897 898 /** 899 * xe_ggtt_assign - assign a GGTT region to the VF 900 * @node: the &xe_ggtt_node to update 901 * @vfid: the VF identifier 902 * 903 * This function is used by the PF driver to assign a GGTT region to the VF. 904 * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some 905 * platforms VFs can't modify that either. 906 */ 907 void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid) 908 { 909 mutex_lock(&node->ggtt->lock); 910 xe_ggtt_assign_locked(node->ggtt, &node->base, vfid); 911 mutex_unlock(&node->ggtt->lock); 912 } 913 #endif 914 915 /** 916 * xe_ggtt_dump - Dump GGTT for debug 917 * @ggtt: the &xe_ggtt to be dumped 918 * @p: the &drm_mm_printer helper handle to be used to dump the information 919 * 920 * Return: 0 on success or a negative error code on failure. 921 */ 922 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) 923 { 924 int err; 925 926 err = mutex_lock_interruptible(&ggtt->lock); 927 if (err) 928 return err; 929 930 drm_mm_print(&ggtt->mm, p); 931 mutex_unlock(&ggtt->lock); 932 return err; 933 } 934 935 /** 936 * xe_ggtt_print_holes - Print holes 937 * @ggtt: the &xe_ggtt to be inspected 938 * @alignment: min alignment 939 * @p: the &drm_printer 940 * 941 * Print GGTT ranges that are available and return total size available. 942 * 943 * Return: Total available size. 944 */ 945 u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer *p) 946 { 947 const struct drm_mm *mm = &ggtt->mm; 948 const struct drm_mm_node *entry; 949 u64 hole_min_start = xe_wopcm_size(tile_to_xe(ggtt->tile)); 950 u64 hole_start, hole_end, hole_size; 951 u64 total = 0; 952 char buf[10]; 953 954 mutex_lock(&ggtt->lock); 955 956 drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { 957 hole_start = max(hole_start, hole_min_start); 958 hole_start = ALIGN(hole_start, alignment); 959 hole_end = ALIGN_DOWN(hole_end, alignment); 960 if (hole_start >= hole_end) 961 continue; 962 hole_size = hole_end - hole_start; 963 total += hole_size; 964 965 string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); 966 drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", 967 hole_start, hole_end - 1, buf); 968 } 969 970 mutex_unlock(&ggtt->lock); 971 972 return total; 973 } 974 975 /** 976 * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO 977 * @ggtt: &xe_ggtt 978 * @bo: &xe_bo 979 * @pat_index: The pat_index for the PTE. 980 * 981 * This function returns the pte_flags for a given BO, without address. 982 * It's used for DPT to fill a GGTT mapped BO with a linear lookup table. 983 */ 984 u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, 985 struct xe_bo *bo, u16 pat_index) 986 { 987 return ggtt->pt_ops->pte_encode_flags(bo, pat_index); 988 } 989 990 /** 991 * xe_ggtt_read_pte - Read a PTE from the GGTT 992 * @ggtt: &xe_ggtt 993 * @offset: the offset for which the mapping should be read. 994 * 995 * Used by testcases, and by display reading out an inherited bios FB. 996 */ 997 u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset) 998 { 999 return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE)); 1000 } 1001