1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/slab.h> /* fault-inject.h is not standalone! */ 7 8 #include <linux/fault-inject.h> 9 #include <linux/sched/mm.h> 10 11 #include <drm/drm_cache.h> 12 13 #include "gem/i915_gem_internal.h" 14 #include "gem/i915_gem_lmem.h" 15 #include "i915_reg.h" 16 #include "i915_trace.h" 17 #include "i915_utils.h" 18 #include "intel_gt.h" 19 #include "intel_gt_mcr.h" 20 #include "intel_gt_print.h" 21 #include "intel_gt_regs.h" 22 #include "intel_gtt.h" 23 24 bool i915_ggtt_require_binder(struct drm_i915_private *i915) 25 { 26 /* Wa_13010847436 & Wa_14019519902 */ 27 return MEDIA_VER_FULL(i915) == IP_VER(13, 0); 28 } 29 30 static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) 31 { 32 return IS_BROXTON(i915) && i915_vtd_active(i915); 33 } 34 35 bool intel_vm_no_concurrent_access_wa(struct drm_i915_private *i915) 36 { 37 return IS_CHERRYVIEW(i915) || intel_ggtt_update_needs_vtd_wa(i915); 38 } 39 40 struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) 41 { 42 struct drm_i915_gem_object *obj; 43 44 /* 45 * To avoid severe over-allocation when dealing with min_page_size 46 * restrictions, we override that behaviour here by allowing an object 47 * size and page layout which can be smaller. In practice this should be 48 * totally fine, since GTT paging structures are not typically inserted 49 * into the GTT. 50 * 51 * Note that we also hit this path for the scratch page, and for this 52 * case it might need to be 64K, but that should work fine here since we 53 * used the passed in size for the page size, which should ensure it 54 * also has the same alignment. 55 */ 56 obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 57 vm->lmem_pt_obj_flags); 58 /* 59 * Ensure all paging structures for this vm share the same dma-resv 60 * object underneath, with the idea that one object_lock() will lock 61 * them all at once. 62 */ 63 if (!IS_ERR(obj)) { 64 obj->base.resv = i915_vm_resv_get(vm); 65 obj->shares_resv_from = vm; 66 67 if (vm->fpriv) 68 i915_drm_client_add_object(vm->fpriv->client, obj); 69 } 70 71 return obj; 72 } 73 74 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) 75 { 76 struct drm_i915_gem_object *obj; 77 78 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 79 i915_gem_shrink_all(vm->i915); 80 81 obj = i915_gem_object_create_internal(vm->i915, sz); 82 /* 83 * Ensure all paging structures for this vm share the same dma-resv 84 * object underneath, with the idea that one object_lock() will lock 85 * them all at once. 86 */ 87 if (!IS_ERR(obj)) { 88 obj->base.resv = i915_vm_resv_get(vm); 89 obj->shares_resv_from = vm; 90 91 if (vm->fpriv) 92 i915_drm_client_add_object(vm->fpriv->client, obj); 93 } 94 95 return obj; 96 } 97 98 int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 99 { 100 enum i915_map_type type; 101 void *vaddr; 102 103 type = intel_gt_coherent_map_type(vm->gt, obj, true); 104 /* 105 * FIXME: It is suspected that some Address Translation Service (ATS) 106 * issue on IOMMU is causing CAT errors to occur on some MTL workloads. 107 * Applying a write barrier to the ppgtt set entry functions appeared 108 * to have no effect, so we must temporarily use I915_MAP_WC here on 109 * MTL until a proper ATS solution is found. 110 */ 111 if (IS_METEORLAKE(vm->i915)) 112 type = I915_MAP_WC; 113 114 vaddr = i915_gem_object_pin_map_unlocked(obj, type); 115 if (IS_ERR(vaddr)) 116 return PTR_ERR(vaddr); 117 118 i915_gem_object_make_unshrinkable(obj); 119 return 0; 120 } 121 122 int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 123 { 124 enum i915_map_type type; 125 void *vaddr; 126 127 type = intel_gt_coherent_map_type(vm->gt, obj, true); 128 /* 129 * FIXME: It is suspected that some Address Translation Service (ATS) 130 * issue on IOMMU is causing CAT errors to occur on some MTL workloads. 131 * Applying a write barrier to the ppgtt set entry functions appeared 132 * to have no effect, so we must temporarily use I915_MAP_WC here on 133 * MTL until a proper ATS solution is found. 134 */ 135 if (IS_METEORLAKE(vm->i915)) 136 type = I915_MAP_WC; 137 138 vaddr = i915_gem_object_pin_map(obj, type); 139 if (IS_ERR(vaddr)) 140 return PTR_ERR(vaddr); 141 142 i915_gem_object_make_unshrinkable(obj); 143 return 0; 144 } 145 146 static void clear_vm_list(struct list_head *list) 147 { 148 struct i915_vma *vma, *vn; 149 150 list_for_each_entry_safe(vma, vn, list, vm_link) { 151 struct drm_i915_gem_object *obj = vma->obj; 152 153 if (!i915_gem_object_get_rcu(obj)) { 154 /* 155 * Object is dying, but has not yet cleared its 156 * vma list. 157 * Unbind the dying vma to ensure our list 158 * is completely drained. We leave the destruction to 159 * the object destructor to avoid the vma 160 * disappearing under it. 161 */ 162 atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 163 WARN_ON(__i915_vma_unbind(vma)); 164 165 /* Remove from the unbound list */ 166 list_del_init(&vma->vm_link); 167 168 /* 169 * Delay the vm and vm mutex freeing until the 170 * object is done with destruction. 171 */ 172 i915_vm_resv_get(vma->vm); 173 vma->vm_ddestroy = true; 174 } else { 175 i915_vma_destroy_locked(vma); 176 i915_gem_object_put(obj); 177 } 178 179 } 180 } 181 182 static void __i915_vm_close(struct i915_address_space *vm) 183 { 184 mutex_lock(&vm->mutex); 185 186 clear_vm_list(&vm->bound_list); 187 clear_vm_list(&vm->unbound_list); 188 189 /* Check for must-fix unanticipated side-effects */ 190 GEM_BUG_ON(!list_empty(&vm->bound_list)); 191 GEM_BUG_ON(!list_empty(&vm->unbound_list)); 192 193 mutex_unlock(&vm->mutex); 194 } 195 196 /* lock the vm into the current ww, if we lock one, we lock all */ 197 int i915_vm_lock_objects(struct i915_address_space *vm, 198 struct i915_gem_ww_ctx *ww) 199 { 200 if (vm->scratch[0]->base.resv == &vm->_resv) { 201 return i915_gem_object_lock(vm->scratch[0], ww); 202 } else { 203 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 204 205 /* We borrowed the scratch page from ggtt, take the top level object */ 206 return i915_gem_object_lock(ppgtt->pd->pt.base, ww); 207 } 208 } 209 210 void i915_address_space_fini(struct i915_address_space *vm) 211 { 212 drm_mm_takedown(&vm->mm); 213 } 214 215 /** 216 * i915_vm_resv_release - Final struct i915_address_space destructor 217 * @kref: Pointer to the &i915_address_space.resv_ref member. 218 * 219 * This function is called when the last lock sharer no longer shares the 220 * &i915_address_space._resv lock, and also if we raced when 221 * destroying a vma by the vma destruction 222 */ 223 void i915_vm_resv_release(struct kref *kref) 224 { 225 struct i915_address_space *vm = 226 container_of(kref, typeof(*vm), resv_ref); 227 228 dma_resv_fini(&vm->_resv); 229 mutex_destroy(&vm->mutex); 230 231 kfree(vm); 232 } 233 234 static void __i915_vm_release(struct work_struct *work) 235 { 236 struct i915_address_space *vm = 237 container_of(work, struct i915_address_space, release_work); 238 239 __i915_vm_close(vm); 240 241 /* Synchronize async unbinds. */ 242 i915_vma_resource_bind_dep_sync_all(vm); 243 244 vm->cleanup(vm); 245 i915_address_space_fini(vm); 246 247 i915_vm_resv_put(vm); 248 } 249 250 void i915_vm_release(struct kref *kref) 251 { 252 struct i915_address_space *vm = 253 container_of(kref, struct i915_address_space, ref); 254 255 GEM_BUG_ON(i915_is_ggtt(vm)); 256 trace_i915_ppgtt_release(vm); 257 258 queue_work(vm->i915->wq, &vm->release_work); 259 } 260 261 void i915_address_space_init(struct i915_address_space *vm, int subclass) 262 { 263 kref_init(&vm->ref); 264 265 /* 266 * Special case for GGTT that has already done an early 267 * kref_init here. 268 */ 269 if (!kref_read(&vm->resv_ref)) 270 kref_init(&vm->resv_ref); 271 272 vm->pending_unbind = RB_ROOT_CACHED; 273 INIT_WORK(&vm->release_work, __i915_vm_release); 274 275 /* 276 * The vm->mutex must be reclaim safe (for use in the shrinker). 277 * Do a dummy acquire now under fs_reclaim so that any allocation 278 * attempt holding the lock is immediately reported by lockdep. 279 */ 280 mutex_init(&vm->mutex); 281 lockdep_set_subclass(&vm->mutex, subclass); 282 283 if (!intel_vm_no_concurrent_access_wa(vm->i915)) { 284 i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 285 } else { 286 /* 287 * CHV + BXT VTD workaround use stop_machine(), 288 * which is allowed to allocate memory. This means &vm->mutex 289 * is the outer lock, and in theory we can allocate memory inside 290 * it through stop_machine(). 291 * 292 * Add the annotation for this, we use trylock in shrinker. 293 */ 294 mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); 295 might_alloc(GFP_KERNEL); 296 mutex_release(&vm->mutex.dep_map, _THIS_IP_); 297 } 298 dma_resv_init(&vm->_resv); 299 300 GEM_BUG_ON(!vm->total); 301 drm_mm_init(&vm->mm, 0, vm->total); 302 303 memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, 304 ARRAY_SIZE(vm->min_alignment)); 305 306 if (HAS_64K_PAGES(vm->i915)) { 307 vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; 308 vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; 309 } 310 311 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 312 313 INIT_LIST_HEAD(&vm->bound_list); 314 INIT_LIST_HEAD(&vm->unbound_list); 315 } 316 317 void *__px_vaddr(struct drm_i915_gem_object *p) 318 { 319 enum i915_map_type type; 320 321 GEM_BUG_ON(!i915_gem_object_has_pages(p)); 322 return page_unpack_bits(p->mm.mapping, &type); 323 } 324 325 dma_addr_t __px_dma(struct drm_i915_gem_object *p) 326 { 327 GEM_BUG_ON(!i915_gem_object_has_pages(p)); 328 return sg_dma_address(p->mm.pages->sgl); 329 } 330 331 struct page *__px_page(struct drm_i915_gem_object *p) 332 { 333 GEM_BUG_ON(!i915_gem_object_has_pages(p)); 334 return sg_page(p->mm.pages->sgl); 335 } 336 337 void 338 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) 339 { 340 void *vaddr = __px_vaddr(p); 341 342 memset64(vaddr, val, count); 343 drm_clflush_virt_range(vaddr, PAGE_SIZE); 344 } 345 346 static void poison_scratch_page(struct drm_i915_gem_object *scratch) 347 { 348 void *vaddr = __px_vaddr(scratch); 349 u8 val; 350 351 val = 0; 352 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 353 val = POISON_FREE; 354 355 memset(vaddr, val, scratch->base.size); 356 drm_clflush_virt_range(vaddr, scratch->base.size); 357 } 358 359 int setup_scratch_page(struct i915_address_space *vm) 360 { 361 unsigned long size; 362 363 /* 364 * In order to utilize 64K pages for an object with a size < 2M, we will 365 * need to support a 64K scratch page, given that every 16th entry for a 366 * page-table operating in 64K mode must point to a properly aligned 64K 367 * region, including any PTEs which happen to point to scratch. 368 * 369 * This is only relevant for the 48b PPGTT where we support 370 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 371 * scratch (read-only) between all vm, we create one 64k scratch page 372 * for all. 373 */ 374 size = I915_GTT_PAGE_SIZE_4K; 375 if (i915_vm_is_4lvl(vm) && 376 HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) && 377 !HAS_64K_PAGES(vm->i915)) 378 size = I915_GTT_PAGE_SIZE_64K; 379 380 do { 381 struct drm_i915_gem_object *obj; 382 383 obj = vm->alloc_scratch_dma(vm, size); 384 if (IS_ERR(obj)) 385 goto skip; 386 387 if (map_pt_dma(vm, obj)) 388 goto skip_obj; 389 390 /* We need a single contiguous page for our scratch */ 391 if (obj->mm.page_sizes.sg < size) 392 goto skip_obj; 393 394 /* And it needs to be correspondingly aligned */ 395 if (__px_dma(obj) & (size - 1)) 396 goto skip_obj; 397 398 /* 399 * Use a non-zero scratch page for debugging. 400 * 401 * We want a value that should be reasonably obvious 402 * to spot in the error state, while also causing a GPU hang 403 * if executed. We prefer using a clear page in production, so 404 * should it ever be accidentally used, the effect should be 405 * fairly benign. 406 */ 407 poison_scratch_page(obj); 408 409 vm->scratch[0] = obj; 410 vm->scratch_order = get_order(size); 411 return 0; 412 413 skip_obj: 414 i915_gem_object_put(obj); 415 skip: 416 if (size == I915_GTT_PAGE_SIZE_4K) 417 return -ENOMEM; 418 419 size = I915_GTT_PAGE_SIZE_4K; 420 } while (1); 421 } 422 423 void free_scratch(struct i915_address_space *vm) 424 { 425 int i; 426 427 if (!vm->scratch[0]) 428 return; 429 430 for (i = 0; i <= vm->top; i++) 431 i915_gem_object_put(vm->scratch[i]); 432 } 433 434 void gtt_write_workarounds(struct intel_gt *gt) 435 { 436 struct drm_i915_private *i915 = gt->i915; 437 struct intel_uncore *uncore = gt->uncore; 438 439 /* 440 * This function is for gtt related workarounds. This function is 441 * called on driver load and after a GPU reset, so you can place 442 * workarounds here even if they get overwritten by GPU reset. 443 */ 444 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 445 if (IS_BROADWELL(i915)) 446 intel_uncore_write(uncore, 447 GEN8_L3_LRA_1_GPGPU, 448 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 449 else if (IS_CHERRYVIEW(i915)) 450 intel_uncore_write(uncore, 451 GEN8_L3_LRA_1_GPGPU, 452 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 453 else if (IS_GEN9_LP(i915)) 454 intel_uncore_write(uncore, 455 GEN8_L3_LRA_1_GPGPU, 456 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 457 else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) 458 intel_uncore_write(uncore, 459 GEN8_L3_LRA_1_GPGPU, 460 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 461 462 /* 463 * To support 64K PTEs we need to first enable the use of the 464 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 465 * mmio, otherwise the page-walker will simply ignore the IPS bit. This 466 * shouldn't be needed after GEN10. 467 * 468 * 64K pages were first introduced from BDW+, although technically they 469 * only *work* from gen9+. For pre-BDW we instead have the option for 470 * 32K pages, but we don't currently have any support for it in our 471 * driver. 472 */ 473 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 474 GRAPHICS_VER(i915) <= 10) 475 intel_uncore_rmw(uncore, 476 GEN8_GAMW_ECO_DEV_RW_IA, 477 0, 478 GAMW_ECO_ENABLE_64K_IPS_FIELD); 479 480 if (IS_GRAPHICS_VER(i915, 8, 11)) { 481 bool can_use_gtt_cache = true; 482 483 /* 484 * According to the BSpec if we use 2M/1G pages then we also 485 * need to disable the GTT cache. At least on BDW we can see 486 * visual corruption when using 2M pages, and not disabling the 487 * GTT cache. 488 */ 489 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 490 can_use_gtt_cache = false; 491 492 /* WaGttCachingOffByDefault */ 493 intel_uncore_write(uncore, 494 HSW_GTT_CACHE_EN, 495 can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 496 gt_WARN_ON_ONCE(gt, can_use_gtt_cache && 497 intel_uncore_read(uncore, 498 HSW_GTT_CACHE_EN) == 0); 499 } 500 } 501 502 static void xelpmp_setup_private_ppat(struct intel_uncore *uncore) 503 { 504 intel_uncore_write(uncore, XELPMP_PAT_INDEX(0), 505 MTL_PPAT_L4_0_WB); 506 intel_uncore_write(uncore, XELPMP_PAT_INDEX(1), 507 MTL_PPAT_L4_1_WT); 508 intel_uncore_write(uncore, XELPMP_PAT_INDEX(2), 509 MTL_PPAT_L4_3_UC); 510 intel_uncore_write(uncore, XELPMP_PAT_INDEX(3), 511 MTL_PPAT_L4_0_WB | MTL_2_COH_1W); 512 intel_uncore_write(uncore, XELPMP_PAT_INDEX(4), 513 MTL_PPAT_L4_0_WB | MTL_3_COH_2W); 514 515 /* 516 * Remaining PAT entries are left at the hardware-default 517 * fully-cached setting 518 */ 519 } 520 521 static void xelpg_setup_private_ppat(struct intel_gt *gt) 522 { 523 intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), 524 MTL_PPAT_L4_0_WB); 525 intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), 526 MTL_PPAT_L4_1_WT); 527 intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), 528 MTL_PPAT_L4_3_UC); 529 intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), 530 MTL_PPAT_L4_0_WB | MTL_2_COH_1W); 531 intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), 532 MTL_PPAT_L4_0_WB | MTL_3_COH_2W); 533 534 /* 535 * Remaining PAT entries are left at the hardware-default 536 * fully-cached setting 537 */ 538 } 539 540 static void tgl_setup_private_ppat(struct intel_uncore *uncore) 541 { 542 /* TGL doesn't support LLC or AGE settings */ 543 intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 544 intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 545 intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 546 intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 547 intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 548 intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 549 intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 550 intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 551 } 552 553 static void xehp_setup_private_ppat(struct intel_gt *gt) 554 { 555 enum forcewake_domains fw; 556 unsigned long flags; 557 558 fw = intel_uncore_forcewake_for_reg(gt->uncore, _MMIO(XEHP_PAT_INDEX(0).reg), 559 FW_REG_WRITE); 560 intel_uncore_forcewake_get(gt->uncore, fw); 561 562 intel_gt_mcr_lock(gt, &flags); 563 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB); 564 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC); 565 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT); 566 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC); 567 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB); 568 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB); 569 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB); 570 intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB); 571 intel_gt_mcr_unlock(gt, flags); 572 573 intel_uncore_forcewake_put(gt->uncore, fw); 574 } 575 576 static void icl_setup_private_ppat(struct intel_uncore *uncore) 577 { 578 intel_uncore_write(uncore, 579 GEN10_PAT_INDEX(0), 580 GEN8_PPAT_WB | GEN8_PPAT_LLC); 581 intel_uncore_write(uncore, 582 GEN10_PAT_INDEX(1), 583 GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 584 intel_uncore_write(uncore, 585 GEN10_PAT_INDEX(2), 586 GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 587 intel_uncore_write(uncore, 588 GEN10_PAT_INDEX(3), 589 GEN8_PPAT_UC); 590 intel_uncore_write(uncore, 591 GEN10_PAT_INDEX(4), 592 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 593 intel_uncore_write(uncore, 594 GEN10_PAT_INDEX(5), 595 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 596 intel_uncore_write(uncore, 597 GEN10_PAT_INDEX(6), 598 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 599 intel_uncore_write(uncore, 600 GEN10_PAT_INDEX(7), 601 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 602 } 603 604 /* 605 * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 606 * bits. When using advanced contexts each context stores its own PAT, but 607 * writing this data shouldn't be harmful even in those cases. 608 */ 609 static void bdw_setup_private_ppat(struct intel_uncore *uncore) 610 { 611 struct drm_i915_private *i915 = uncore->i915; 612 u64 pat; 613 614 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 615 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 616 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 617 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 618 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 619 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 620 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 621 622 /* for scanout with eLLC */ 623 if (GRAPHICS_VER(i915) >= 9) 624 pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 625 else 626 pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 627 628 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 629 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 630 } 631 632 static void chv_setup_private_ppat(struct intel_uncore *uncore) 633 { 634 u64 pat; 635 636 /* 637 * Map WB on BDW to snooped on CHV. 638 * 639 * Only the snoop bit has meaning for CHV, the rest is 640 * ignored. 641 * 642 * The hardware will never snoop for certain types of accesses: 643 * - CPU GTT (GMADR->GGTT->no snoop->memory) 644 * - PPGTT page tables 645 * - some other special cycles 646 * 647 * As with BDW, we also need to consider the following for GT accesses: 648 * "For GGTT, there is NO pat_sel[2:0] from the entry, 649 * so RTL will always use the value corresponding to 650 * pat_sel = 000". 651 * Which means we must set the snoop bit in PAT entry 0 652 * in order to keep the global status page working. 653 */ 654 655 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 656 GEN8_PPAT(1, 0) | 657 GEN8_PPAT(2, 0) | 658 GEN8_PPAT(3, 0) | 659 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 660 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 661 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 662 GEN8_PPAT(7, CHV_PPAT_SNOOP); 663 664 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 665 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 666 } 667 668 void setup_private_pat(struct intel_gt *gt) 669 { 670 struct intel_uncore *uncore = gt->uncore; 671 struct drm_i915_private *i915 = gt->i915; 672 673 GEM_BUG_ON(GRAPHICS_VER(i915) < 8); 674 675 if (gt->type == GT_MEDIA) { 676 xelpmp_setup_private_ppat(gt->uncore); 677 return; 678 } 679 680 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 681 xelpg_setup_private_ppat(gt); 682 else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) 683 xehp_setup_private_ppat(gt); 684 else if (GRAPHICS_VER(i915) >= 12) 685 tgl_setup_private_ppat(uncore); 686 else if (GRAPHICS_VER(i915) >= 11) 687 icl_setup_private_ppat(uncore); 688 else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 689 chv_setup_private_ppat(uncore); 690 else 691 bdw_setup_private_ppat(uncore); 692 } 693 694 struct i915_vma * 695 __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) 696 { 697 struct drm_i915_gem_object *obj; 698 struct i915_vma *vma; 699 700 obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); 701 if (IS_ERR(obj)) 702 return ERR_CAST(obj); 703 704 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 705 706 vma = i915_vma_instance(obj, vm, NULL); 707 if (IS_ERR(vma)) { 708 i915_gem_object_put(obj); 709 return vma; 710 } 711 712 return vma; 713 } 714 715 struct i915_vma * 716 __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) 717 { 718 struct i915_vma *vma; 719 int err; 720 721 vma = __vm_create_scratch_for_read(vm, size); 722 if (IS_ERR(vma)) 723 return vma; 724 725 err = i915_vma_pin(vma, 0, 0, 726 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 727 if (err) { 728 i915_vma_put(vma); 729 return ERR_PTR(err); 730 } 731 732 return vma; 733 } 734 735 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 736 #include "selftests/mock_gtt.c" 737 #endif 738