1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_ggtt.h" 7 8 #include <linux/sizes.h> 9 10 #include <drm/drm_managed.h> 11 #include <drm/i915_drm.h> 12 13 #include "regs/xe_gt_regs.h" 14 #include "xe_bo.h" 15 #include "xe_device.h" 16 #include "xe_gt.h" 17 #include "xe_gt_tlb_invalidation.h" 18 #include "xe_map.h" 19 #include "xe_mmio.h" 20 #include "xe_wopcm.h" 21 22 #define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) 23 #define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) 24 25 /* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ 26 #define GUC_GGTT_TOP 0xFEE00000 27 28 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 29 u16 pat_index) 30 { 31 u64 pte; 32 33 pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); 34 pte |= XE_PAGE_PRESENT; 35 36 if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) 37 pte |= XE_GGTT_PTE_DM; 38 39 return pte; 40 } 41 42 static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, 43 u16 pat_index) 44 { 45 struct xe_device *xe = xe_bo_device(bo); 46 u64 pte; 47 48 pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index); 49 50 xe_assert(xe, pat_index <= 3); 51 52 if (pat_index & BIT(0)) 53 pte |= XELPG_GGTT_PTE_PAT0; 54 55 if (pat_index & BIT(1)) 56 pte |= XELPG_GGTT_PTE_PAT1; 57 58 return pte; 59 } 60 61 static unsigned int probe_gsm_size(struct pci_dev *pdev) 62 { 63 u16 gmch_ctl, ggms; 64 65 pci_read_config_word(pdev, SNB_GMCH_CTRL, &gmch_ctl); 66 ggms = (gmch_ctl >> BDW_GMCH_GGMS_SHIFT) & BDW_GMCH_GGMS_MASK; 67 return ggms ? SZ_1M << ggms : 0; 68 } 69 70 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte) 71 { 72 xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK)); 73 xe_tile_assert(ggtt->tile, addr < ggtt->size); 74 75 writeq(pte, &ggtt->gsm[addr >> XE_PTE_SHIFT]); 76 } 77 78 static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size) 79 { 80 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 81 u64 end = start + size - 1; 82 u64 scratch_pte; 83 84 xe_tile_assert(ggtt->tile, start < end); 85 86 if (ggtt->scratch) 87 scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, 88 pat_index); 89 else 90 scratch_pte = 0; 91 92 while (start < end) { 93 xe_ggtt_set_pte(ggtt, start, scratch_pte); 94 start += XE_PAGE_SIZE; 95 } 96 } 97 98 static void ggtt_fini_early(struct drm_device *drm, void *arg) 99 { 100 struct xe_ggtt *ggtt = arg; 101 102 mutex_destroy(&ggtt->lock); 103 drm_mm_takedown(&ggtt->mm); 104 } 105 106 static void ggtt_fini(struct drm_device *drm, void *arg) 107 { 108 struct xe_ggtt *ggtt = arg; 109 110 ggtt->scratch = NULL; 111 } 112 113 static void primelockdep(struct xe_ggtt *ggtt) 114 { 115 if (!IS_ENABLED(CONFIG_LOCKDEP)) 116 return; 117 118 fs_reclaim_acquire(GFP_KERNEL); 119 might_lock(&ggtt->lock); 120 fs_reclaim_release(GFP_KERNEL); 121 } 122 123 static const struct xe_ggtt_pt_ops xelp_pt_ops = { 124 .pte_encode_bo = xelp_ggtt_pte_encode_bo, 125 }; 126 127 static const struct xe_ggtt_pt_ops xelpg_pt_ops = { 128 .pte_encode_bo = xelpg_ggtt_pte_encode_bo, 129 }; 130 131 /* 132 * Early GGTT initialization, which allows to create new mappings usable by the 133 * GuC. 134 * Mappings are not usable by the HW engines, as it doesn't have scratch / 135 * initial clear done to it yet. That will happen in the regular, non-early 136 * GGTT init. 137 */ 138 int xe_ggtt_init_early(struct xe_ggtt *ggtt) 139 { 140 struct xe_device *xe = tile_to_xe(ggtt->tile); 141 struct pci_dev *pdev = to_pci_dev(xe->drm.dev); 142 unsigned int gsm_size; 143 144 gsm_size = probe_gsm_size(pdev); 145 if (gsm_size == 0) { 146 drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); 147 return -ENOMEM; 148 } 149 150 ggtt->gsm = ggtt->tile->mmio.regs + SZ_8M; 151 ggtt->size = (gsm_size / 8) * (u64) XE_PAGE_SIZE; 152 153 if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) 154 ggtt->flags |= XE_GGTT_FLAGS_64K; 155 156 /* 157 * 8B per entry, each points to a 4KB page. 158 * 159 * The GuC address space is limited on both ends of the GGTT, because 160 * the GuC shim HW redirects accesses to those addresses to other HW 161 * areas instead of going through the GGTT. On the bottom end, the GuC 162 * can't access offsets below the WOPCM size, while on the top side the 163 * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of 164 * checking each object to see if they are accessed by GuC or not, we 165 * just exclude those areas from the allocator. Additionally, to 166 * simplify the driver load, we use the maximum WOPCM size in this logic 167 * instead of the programmed one, so we don't need to wait until the 168 * actual size to be programmed is determined (which requires FW fetch) 169 * before initializing the GGTT. These simplifications might waste space 170 * in the GGTT (about 20-25 MBs depending on the platform) but we can 171 * live with this. 172 * 173 * Another benifit of this is the GuC bootrom can't access anything 174 * below the WOPCM max size so anything the bootom needs to access (e.g. 175 * a RSA key) needs to be placed in the GGTT above the WOPCM max size. 176 * Starting the GGTT allocations above the WOPCM max give us the correct 177 * placement for free. 178 */ 179 if (ggtt->size > GUC_GGTT_TOP) 180 ggtt->size = GUC_GGTT_TOP; 181 182 if (GRAPHICS_VERx100(xe) >= 1270) 183 ggtt->pt_ops = &xelpg_pt_ops; 184 else 185 ggtt->pt_ops = &xelp_pt_ops; 186 187 drm_mm_init(&ggtt->mm, xe_wopcm_size(xe), 188 ggtt->size - xe_wopcm_size(xe)); 189 mutex_init(&ggtt->lock); 190 primelockdep(ggtt); 191 192 return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); 193 } 194 195 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) 196 { 197 struct drm_mm_node *hole; 198 u64 start, end; 199 200 /* Display may have allocated inside ggtt, so be careful with clearing here */ 201 xe_device_mem_access_get(tile_to_xe(ggtt->tile)); 202 mutex_lock(&ggtt->lock); 203 drm_mm_for_each_hole(hole, &ggtt->mm, start, end) 204 xe_ggtt_clear(ggtt, start, end - start); 205 206 xe_ggtt_invalidate(ggtt); 207 mutex_unlock(&ggtt->lock); 208 xe_device_mem_access_put(tile_to_xe(ggtt->tile)); 209 } 210 211 int xe_ggtt_init(struct xe_ggtt *ggtt) 212 { 213 struct xe_device *xe = tile_to_xe(ggtt->tile); 214 unsigned int flags; 215 int err; 216 217 /* 218 * So we don't need to worry about 64K GGTT layout when dealing with 219 * scratch entires, rather keep the scratch page in system memory on 220 * platforms where 64K pages are needed for VRAM. 221 */ 222 flags = XE_BO_CREATE_PINNED_BIT; 223 if (ggtt->flags & XE_GGTT_FLAGS_64K) 224 flags |= XE_BO_CREATE_SYSTEM_BIT; 225 else 226 flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile); 227 228 ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); 229 if (IS_ERR(ggtt->scratch)) { 230 err = PTR_ERR(ggtt->scratch); 231 goto err; 232 } 233 234 xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); 235 236 xe_ggtt_initial_clear(ggtt); 237 238 return drmm_add_action_or_reset(&xe->drm, ggtt_fini, ggtt); 239 err: 240 ggtt->scratch = NULL; 241 return err; 242 } 243 244 #define GUC_TLB_INV_CR XE_REG(0xcee8) 245 #define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) 246 #define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) 247 #define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) 248 #define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) 249 #define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) 250 251 static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) 252 { 253 if (!gt) 254 return; 255 256 /* 257 * Invalidation can happen when there's no in-flight work keeping the 258 * GT awake. We need to explicitly grab forcewake to ensure the GT 259 * and GuC are accessible. 260 */ 261 xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); 262 263 /* TODO: vfunc for GuC vs. non-GuC */ 264 265 if (gt->uc.guc.submission_state.enabled) { 266 int seqno; 267 268 seqno = xe_gt_tlb_invalidation_guc(gt); 269 xe_gt_assert(gt, seqno > 0); 270 if (seqno > 0) 271 xe_gt_tlb_invalidation_wait(gt, seqno); 272 } else if (xe_device_uc_enabled(gt_to_xe(gt))) { 273 struct xe_device *xe = gt_to_xe(gt); 274 275 if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { 276 xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, 277 PVC_GUC_TLB_INV_DESC1_INVALIDATE); 278 xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, 279 PVC_GUC_TLB_INV_DESC0_VALID); 280 } else 281 xe_mmio_write32(gt, GUC_TLB_INV_CR, 282 GUC_TLB_INV_CR_INVALIDATE); 283 } 284 285 xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); 286 } 287 288 void xe_ggtt_invalidate(struct xe_ggtt *ggtt) 289 { 290 /* Each GT in a tile has its own TLB to cache GGTT lookups */ 291 ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); 292 ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); 293 } 294 295 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) 296 { 297 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 298 u64 addr, scratch_pte; 299 300 scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0, pat_index); 301 302 printk("%sGlobal GTT:", prefix); 303 for (addr = 0; addr < ggtt->size; addr += XE_PAGE_SIZE) { 304 unsigned int i = addr / XE_PAGE_SIZE; 305 306 xe_tile_assert(ggtt->tile, addr <= U32_MAX); 307 if (ggtt->gsm[i] == scratch_pte) 308 continue; 309 310 printk("%s ggtt[0x%08x] = 0x%016llx", 311 prefix, (u32)addr, ggtt->gsm[i]); 312 } 313 } 314 315 int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, 316 u32 size, u32 align, u32 mm_flags) 317 { 318 return drm_mm_insert_node_generic(&ggtt->mm, node, size, align, 0, 319 mm_flags); 320 } 321 322 int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, 323 u32 size, u32 align) 324 { 325 int ret; 326 327 mutex_lock(&ggtt->lock); 328 ret = xe_ggtt_insert_special_node_locked(ggtt, node, size, 329 align, DRM_MM_INSERT_HIGH); 330 mutex_unlock(&ggtt->lock); 331 332 return ret; 333 } 334 335 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 336 { 337 u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; 338 u64 start = bo->ggtt_node.start; 339 u64 offset, pte; 340 341 for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { 342 pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); 343 xe_ggtt_set_pte(ggtt, start + offset, pte); 344 } 345 346 xe_ggtt_invalidate(ggtt); 347 } 348 349 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 350 u64 start, u64 end) 351 { 352 int err; 353 u64 alignment = XE_PAGE_SIZE; 354 355 if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) 356 alignment = SZ_64K; 357 358 if (XE_WARN_ON(bo->ggtt_node.size)) { 359 /* Someone's already inserted this BO in the GGTT */ 360 xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); 361 return 0; 362 } 363 364 err = xe_bo_validate(bo, NULL, false); 365 if (err) 366 return err; 367 368 xe_device_mem_access_get(tile_to_xe(ggtt->tile)); 369 mutex_lock(&ggtt->lock); 370 err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, 371 alignment, 0, start, end, 0); 372 if (!err) 373 xe_ggtt_map_bo(ggtt, bo); 374 mutex_unlock(&ggtt->lock); 375 xe_device_mem_access_put(tile_to_xe(ggtt->tile)); 376 377 return err; 378 } 379 380 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, 381 u64 start, u64 end) 382 { 383 return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); 384 } 385 386 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 387 { 388 return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); 389 } 390 391 void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) 392 { 393 xe_device_mem_access_get(tile_to_xe(ggtt->tile)); 394 mutex_lock(&ggtt->lock); 395 396 xe_ggtt_clear(ggtt, node->start, node->size); 397 drm_mm_remove_node(node); 398 node->size = 0; 399 400 xe_ggtt_invalidate(ggtt); 401 402 mutex_unlock(&ggtt->lock); 403 xe_device_mem_access_put(tile_to_xe(ggtt->tile)); 404 } 405 406 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) 407 { 408 if (XE_WARN_ON(!bo->ggtt_node.size)) 409 return; 410 411 /* This BO is not currently in the GGTT */ 412 xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); 413 414 xe_ggtt_remove_node(ggtt, &bo->ggtt_node); 415 } 416 417 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) 418 { 419 int err; 420 421 err = mutex_lock_interruptible(&ggtt->lock); 422 if (err) 423 return err; 424 425 drm_mm_print(&ggtt->mm, p); 426 mutex_unlock(&ggtt->lock); 427 return err; 428 } 429