1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <asm/set_memory.h> 7 #include <asm/smp.h> 8 #include <linux/types.h> 9 #include <linux/stop_machine.h> 10 11 #include <drm/drm_managed.h> 12 #include <drm/intel/i915_drm.h> 13 #include <drm/intel/intel-gtt.h> 14 15 #include "gem/i915_gem_lmem.h" 16 17 #include "intel_context.h" 18 #include "intel_ggtt_gmch.h" 19 #include "intel_gpu_commands.h" 20 #include "intel_gt.h" 21 #include "intel_gt_regs.h" 22 #include "intel_pci_config.h" 23 #include "intel_ring.h" 24 #include "i915_drv.h" 25 #include "i915_pci.h" 26 #include "i915_reg.h" 27 #include "i915_request.h" 28 #include "i915_scatterlist.h" 29 #include "i915_utils.h" 30 #include "i915_vgpu.h" 31 32 #include "intel_gtt.h" 33 #include "gen8_ppgtt.h" 34 #include "intel_engine_pm.h" 35 36 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 37 unsigned long color, 38 u64 *start, 39 u64 *end) 40 { 41 if (i915_node_color_differs(node, color)) 42 *start += I915_GTT_PAGE_SIZE; 43 44 /* 45 * Also leave a space between the unallocated reserved node after the 46 * GTT and any objects within the GTT, i.e. we use the color adjustment 47 * to insert a guard page to prevent prefetches crossing over the 48 * GTT boundary. 49 */ 50 node = list_next_entry(node, node_list); 51 if (node->color != color) 52 *end -= I915_GTT_PAGE_SIZE; 53 } 54 55 static int ggtt_init_hw(struct i915_ggtt *ggtt) 56 { 57 struct drm_i915_private *i915 = ggtt->vm.i915; 58 59 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 60 61 ggtt->vm.is_ggtt = true; 62 63 /* Only VLV supports read-only GGTT mappings */ 64 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 65 66 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 67 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 68 69 if (ggtt->mappable_end) { 70 if (!io_mapping_init_wc(&ggtt->iomap, 71 ggtt->gmadr.start, 72 ggtt->mappable_end)) { 73 ggtt->vm.cleanup(&ggtt->vm); 74 return -EIO; 75 } 76 77 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 78 ggtt->mappable_end); 79 } 80 81 intel_ggtt_init_fences(ggtt); 82 83 return 0; 84 } 85 86 /** 87 * i915_ggtt_init_hw - Initialize GGTT hardware 88 * @i915: i915 device 89 */ 90 int i915_ggtt_init_hw(struct drm_i915_private *i915) 91 { 92 int ret; 93 94 /* 95 * Note that we use page colouring to enforce a guard page at the 96 * end of the address space. This is required as the CS may prefetch 97 * beyond the end of the batch buffer, across the page boundary, 98 * and beyond the end of the GTT if we do not provide a guard. 99 */ 100 ret = ggtt_init_hw(to_gt(i915)->ggtt); 101 if (ret) 102 return ret; 103 104 return 0; 105 } 106 107 /** 108 * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM 109 * @vm: The VM to suspend the mappings for 110 * @evict_all: Evict all VMAs 111 * 112 * Suspend the memory mappings for all objects mapped to HW via the GGTT or a 113 * DPT page table. 114 */ 115 void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all) 116 { 117 struct i915_vma *vma, *vn; 118 int save_skip_rewrite; 119 120 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 121 122 retry: 123 i915_gem_drain_freed_objects(vm->i915); 124 125 mutex_lock(&vm->mutex); 126 127 /* 128 * Skip rewriting PTE on VMA unbind. 129 * FIXME: Use an argument to i915_vma_unbind() instead? 130 */ 131 save_skip_rewrite = vm->skip_pte_rewrite; 132 vm->skip_pte_rewrite = true; 133 134 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 135 struct drm_i915_gem_object *obj = vma->obj; 136 137 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 138 139 if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 140 continue; 141 142 /* unlikely to race when GPU is idle, so no worry about slowpath.. */ 143 if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { 144 /* 145 * No dead objects should appear here, GPU should be 146 * completely idle, and userspace suspended 147 */ 148 i915_gem_object_get(obj); 149 150 mutex_unlock(&vm->mutex); 151 152 i915_gem_object_lock(obj, NULL); 153 GEM_WARN_ON(i915_vma_unbind(vma)); 154 i915_gem_object_unlock(obj); 155 i915_gem_object_put(obj); 156 157 vm->skip_pte_rewrite = save_skip_rewrite; 158 goto retry; 159 } 160 161 if (evict_all || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 162 i915_vma_wait_for_bind(vma); 163 164 __i915_vma_evict(vma, false); 165 drm_mm_remove_node(&vma->node); 166 } 167 168 i915_gem_object_unlock(obj); 169 } 170 171 vm->clear_range(vm, 0, vm->total); 172 173 vm->skip_pte_rewrite = save_skip_rewrite; 174 175 mutex_unlock(&vm->mutex); 176 177 drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list)); 178 } 179 180 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 181 { 182 struct intel_gt *gt; 183 184 i915_ggtt_suspend_vm(&ggtt->vm, false); 185 ggtt->invalidate(ggtt); 186 187 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 188 intel_gt_check_and_clear_faults(gt); 189 } 190 191 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 192 { 193 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 194 195 spin_lock_irq(&uncore->lock); 196 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 197 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 198 spin_unlock_irq(&uncore->lock); 199 } 200 201 static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) 202 { 203 /* 204 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range 205 * will be dropped. For WC mappings in general we have 64 byte burst 206 * writes when the WC buffer is flushed, so we can't use it, but have to 207 * resort to an uncached mapping. The WC issue is easily caught by the 208 * readback check when writing GTT PTE entries. 209 */ 210 if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) 211 return true; 212 213 return false; 214 } 215 216 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 217 { 218 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 219 220 /* 221 * Note that as an uncached mmio write, this will flush the 222 * WCB of the writes into the GGTT before it triggers the invalidate. 223 * 224 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). 225 */ 226 if (needs_wc_ggtt_mapping(ggtt->vm.i915)) 227 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, 228 GFX_FLSH_CNTL_EN); 229 } 230 231 static void guc_ggtt_ct_invalidate(struct intel_gt *gt) 232 { 233 struct intel_uncore *uncore = gt->uncore; 234 intel_wakeref_t wakeref; 235 236 with_intel_runtime_pm_if_active(uncore->rpm, wakeref) 237 intel_guc_invalidate_tlb_guc(gt_to_guc(gt)); 238 } 239 240 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 241 { 242 struct drm_i915_private *i915 = ggtt->vm.i915; 243 struct intel_gt *gt; 244 245 gen8_ggtt_invalidate(ggtt); 246 247 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { 248 if (intel_guc_tlb_invalidation_is_available(gt_to_guc(gt))) 249 guc_ggtt_ct_invalidate(gt); 250 else if (GRAPHICS_VER(i915) >= 12) 251 intel_uncore_write_fw(gt->uncore, 252 GEN12_GUC_TLB_INV_CR, 253 GEN12_GUC_TLB_INV_CR_INVALIDATE); 254 else 255 intel_uncore_write_fw(gt->uncore, 256 GEN8_GTCR, GEN8_GTCR_INVALIDATE); 257 } 258 } 259 260 static u64 mtl_ggtt_pte_encode(dma_addr_t addr, 261 unsigned int pat_index, 262 u32 flags) 263 { 264 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 265 266 WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); 267 268 if (flags & PTE_LM) 269 pte |= GEN12_GGTT_PTE_LM; 270 271 if (pat_index & BIT(0)) 272 pte |= MTL_GGTT_PTE_PAT0; 273 274 if (pat_index & BIT(1)) 275 pte |= MTL_GGTT_PTE_PAT1; 276 277 return pte; 278 } 279 280 u64 gen8_ggtt_pte_encode(dma_addr_t addr, 281 unsigned int pat_index, 282 u32 flags) 283 { 284 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 285 286 if (flags & PTE_LM) 287 pte |= GEN12_GGTT_PTE_LM; 288 289 return pte; 290 } 291 292 static dma_addr_t gen8_ggtt_pte_decode(u64 pte, bool *is_present, bool *is_local) 293 { 294 *is_present = pte & GEN8_PAGE_PRESENT; 295 *is_local = pte & GEN12_GGTT_PTE_LM; 296 297 return pte & GEN12_GGTT_PTE_ADDR_MASK; 298 } 299 300 static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) 301 { 302 struct intel_gt *gt = ggtt->vm.gt; 303 304 return intel_gt_is_bind_context_ready(gt); 305 } 306 307 static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref) 308 { 309 struct intel_context *ce; 310 struct intel_gt *gt = ggtt->vm.gt; 311 312 if (intel_gt_is_wedged(gt)) 313 return NULL; 314 315 ce = gt->engine[BCS0]->bind_context; 316 GEM_BUG_ON(!ce); 317 318 /* 319 * If the GT is not awake already at this stage then fallback 320 * to pci based GGTT update otherwise __intel_wakeref_get_first() 321 * would conflict with fs_reclaim trying to allocate memory while 322 * doing rpm_resume(). 323 */ 324 *wakeref = intel_gt_pm_get_if_awake(gt); 325 if (!*wakeref) 326 return NULL; 327 328 intel_engine_pm_get(ce->engine); 329 330 return ce; 331 } 332 333 static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref) 334 { 335 intel_engine_pm_put(ce->engine); 336 intel_gt_pm_put(ce->engine->gt, wakeref); 337 } 338 339 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, 340 struct sg_table *pages, u32 num_entries, 341 const gen8_pte_t pte) 342 { 343 struct i915_sched_attr attr = {}; 344 struct intel_gt *gt = ggtt->vm.gt; 345 const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; 346 struct sgt_iter iter; 347 struct i915_request *rq; 348 struct intel_context *ce; 349 intel_wakeref_t wakeref; 350 u32 *cs; 351 352 if (!num_entries) 353 return true; 354 355 ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref); 356 if (!ce) 357 return false; 358 359 if (pages) 360 iter = __sgt_iter(pages->sgl, true); 361 362 while (num_entries) { 363 int count = 0; 364 dma_addr_t addr; 365 /* 366 * MI_UPDATE_GTT can update 512 entries in a single command but 367 * that end up with engine reset, 511 works. 368 */ 369 u32 n_ptes = min_t(u32, 511, num_entries); 370 371 if (mutex_lock_interruptible(&ce->timeline->mutex)) 372 goto put_ce; 373 374 intel_context_enter(ce); 375 rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); 376 intel_context_exit(ce); 377 if (IS_ERR(rq)) { 378 GT_TRACE(gt, "Failed to get bind request\n"); 379 mutex_unlock(&ce->timeline->mutex); 380 goto put_ce; 381 } 382 383 cs = intel_ring_begin(rq, 2 * n_ptes + 2); 384 if (IS_ERR(cs)) { 385 GT_TRACE(gt, "Failed to ring space for GGTT bind\n"); 386 i915_request_set_error_once(rq, PTR_ERR(cs)); 387 /* once a request is created, it must be queued */ 388 goto queue_err_rq; 389 } 390 391 *cs++ = MI_UPDATE_GTT | (2 * n_ptes); 392 *cs++ = offset << 12; 393 394 if (pages) { 395 for_each_sgt_daddr_next(addr, iter) { 396 if (count == n_ptes) 397 break; 398 *cs++ = lower_32_bits(pte | addr); 399 *cs++ = upper_32_bits(pte | addr); 400 count++; 401 } 402 /* fill remaining with scratch pte, if any */ 403 if (count < n_ptes) { 404 memset64((u64 *)cs, scratch_pte, 405 n_ptes - count); 406 cs += (n_ptes - count) * 2; 407 } 408 } else { 409 memset64((u64 *)cs, pte, n_ptes); 410 cs += n_ptes * 2; 411 } 412 413 intel_ring_advance(rq, cs); 414 queue_err_rq: 415 i915_request_get(rq); 416 __i915_request_commit(rq); 417 __i915_request_queue(rq, &attr); 418 419 mutex_unlock(&ce->timeline->mutex); 420 /* This will break if the request is complete or after engine reset */ 421 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 422 if (rq->fence.error) 423 goto err_rq; 424 425 i915_request_put(rq); 426 427 num_entries -= n_ptes; 428 offset += n_ptes; 429 } 430 431 gen8_ggtt_bind_put_ce(ce, wakeref); 432 return true; 433 434 err_rq: 435 i915_request_put(rq); 436 put_ce: 437 gen8_ggtt_bind_put_ce(ce, wakeref); 438 return false; 439 } 440 441 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 442 { 443 writeq(pte, addr); 444 } 445 446 static gen8_pte_t gen8_get_pte(void __iomem *addr) 447 { 448 return readq(addr); 449 } 450 451 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 452 dma_addr_t addr, 453 u64 offset, 454 unsigned int pat_index, 455 u32 flags) 456 { 457 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 458 gen8_pte_t __iomem *pte = 459 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 460 461 gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags)); 462 463 ggtt->invalidate(ggtt); 464 } 465 466 static dma_addr_t gen8_ggtt_read_entry(struct i915_address_space *vm, 467 u64 offset, bool *is_present, bool *is_local) 468 { 469 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 470 gen8_pte_t __iomem *pte = 471 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 472 473 return ggtt->vm.pte_decode(gen8_get_pte(pte), is_present, is_local); 474 } 475 476 static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, 477 dma_addr_t addr, u64 offset, 478 unsigned int pat_index, u32 flags) 479 { 480 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 481 gen8_pte_t pte; 482 483 pte = ggtt->vm.pte_encode(addr, pat_index, flags); 484 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 485 gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte)) 486 return ggtt->invalidate(ggtt); 487 488 gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); 489 } 490 491 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 492 struct i915_vma_resource *vma_res, 493 unsigned int pat_index, 494 u32 flags) 495 { 496 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 497 const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 498 gen8_pte_t __iomem *gte; 499 gen8_pte_t __iomem *end; 500 struct sgt_iter iter; 501 dma_addr_t addr; 502 503 /* 504 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 505 * not to allow the user to override access to a read only page. 506 */ 507 508 gte = (gen8_pte_t __iomem *)ggtt->gsm; 509 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 510 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 511 while (gte < end) 512 gen8_set_pte(gte++, vm->scratch[0]->encode); 513 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 514 515 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 516 gen8_set_pte(gte++, pte_encode | addr); 517 GEM_BUG_ON(gte > end); 518 519 /* Fill the allocated but "unused" space beyond the end of the buffer */ 520 while (gte < end) 521 gen8_set_pte(gte++, vm->scratch[0]->encode); 522 523 /* 524 * We want to flush the TLBs only after we're certain all the PTE 525 * updates have finished. 526 */ 527 ggtt->invalidate(ggtt); 528 } 529 530 static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 531 struct i915_vma_resource *vma_res, 532 unsigned int pat_index, u32 flags) 533 { 534 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 535 gen8_pte_t scratch_pte = vm->scratch[0]->encode; 536 gen8_pte_t pte_encode; 537 u64 start, end; 538 539 pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 540 start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 541 end = start + vma_res->guard / I915_GTT_PAGE_SIZE; 542 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 543 goto err; 544 545 start = end; 546 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 547 if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages, 548 vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode)) 549 goto err; 550 551 start += vma_res->node_size / I915_GTT_PAGE_SIZE; 552 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 553 goto err; 554 555 return true; 556 557 err: 558 return false; 559 } 560 561 static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 562 struct i915_vma_resource *vma_res, 563 unsigned int pat_index, u32 flags) 564 { 565 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 566 567 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 568 __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) 569 return ggtt->invalidate(ggtt); 570 571 gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); 572 } 573 574 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 575 u64 start, u64 length) 576 { 577 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 578 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 579 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 580 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 581 gen8_pte_t __iomem *gtt_base = 582 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 583 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 584 int i; 585 586 if (WARN(num_entries > max_entries, 587 "First entry = %d; Num entries = %d (max=%d)\n", 588 first_entry, num_entries, max_entries)) 589 num_entries = max_entries; 590 591 for (i = 0; i < num_entries; i++) 592 gen8_set_pte(>t_base[i], scratch_pte); 593 } 594 595 static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, 596 u64 start, u64 length) 597 { 598 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 599 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 600 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 601 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 602 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 603 604 if (WARN(num_entries > max_entries, 605 "First entry = %d; Num entries = %d (max=%d)\n", 606 first_entry, num_entries, max_entries)) 607 num_entries = max_entries; 608 609 if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry, 610 NULL, num_entries, scratch_pte)) 611 return ggtt->invalidate(ggtt); 612 613 gen8_ggtt_clear_range(vm, start, length); 614 } 615 616 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 617 dma_addr_t addr, 618 u64 offset, 619 unsigned int pat_index, 620 u32 flags) 621 { 622 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 623 gen6_pte_t __iomem *pte = 624 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 625 626 iowrite32(vm->pte_encode(addr, pat_index, flags), pte); 627 628 ggtt->invalidate(ggtt); 629 } 630 631 static dma_addr_t gen6_ggtt_read_entry(struct i915_address_space *vm, 632 u64 offset, 633 bool *is_present, bool *is_local) 634 { 635 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 636 gen6_pte_t __iomem *pte = 637 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 638 639 return vm->pte_decode(ioread32(pte), is_present, is_local); 640 } 641 642 /* 643 * Binds an object into the global gtt with the specified cache level. 644 * The object will be accessible to the GPU via commands whose operands 645 * reference offsets within the global GTT as well as accessible by the GPU 646 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 647 */ 648 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 649 struct i915_vma_resource *vma_res, 650 unsigned int pat_index, 651 u32 flags) 652 { 653 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 654 gen6_pte_t __iomem *gte; 655 gen6_pte_t __iomem *end; 656 struct sgt_iter iter; 657 dma_addr_t addr; 658 659 gte = (gen6_pte_t __iomem *)ggtt->gsm; 660 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 661 662 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 663 while (gte < end) 664 iowrite32(vm->scratch[0]->encode, gte++); 665 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 666 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 667 iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); 668 GEM_BUG_ON(gte > end); 669 670 /* Fill the allocated but "unused" space beyond the end of the buffer */ 671 while (gte < end) 672 iowrite32(vm->scratch[0]->encode, gte++); 673 674 /* 675 * We want to flush the TLBs only after we're certain all the PTE 676 * updates have finished. 677 */ 678 ggtt->invalidate(ggtt); 679 } 680 681 static void nop_clear_range(struct i915_address_space *vm, 682 u64 start, u64 length) 683 { 684 } 685 686 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 687 { 688 /* 689 * Make sure the internal GAM fifo has been cleared of all GTT 690 * writes before exiting stop_machine(). This guarantees that 691 * any aperture accesses waiting to start in another process 692 * cannot back up behind the GTT writes causing a hang. 693 * The register can be any arbitrary GAM register. 694 */ 695 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 696 } 697 698 struct insert_page { 699 struct i915_address_space *vm; 700 dma_addr_t addr; 701 u64 offset; 702 unsigned int pat_index; 703 }; 704 705 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 706 { 707 struct insert_page *arg = _arg; 708 709 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, 710 arg->pat_index, 0); 711 bxt_vtd_ggtt_wa(arg->vm); 712 713 return 0; 714 } 715 716 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 717 dma_addr_t addr, 718 u64 offset, 719 unsigned int pat_index, 720 u32 unused) 721 { 722 struct insert_page arg = { vm, addr, offset, pat_index }; 723 724 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 725 } 726 727 struct insert_entries { 728 struct i915_address_space *vm; 729 struct i915_vma_resource *vma_res; 730 unsigned int pat_index; 731 u32 flags; 732 }; 733 734 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 735 { 736 struct insert_entries *arg = _arg; 737 738 gen8_ggtt_insert_entries(arg->vm, arg->vma_res, 739 arg->pat_index, arg->flags); 740 bxt_vtd_ggtt_wa(arg->vm); 741 742 return 0; 743 } 744 745 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 746 struct i915_vma_resource *vma_res, 747 unsigned int pat_index, 748 u32 flags) 749 { 750 struct insert_entries arg = { vm, vma_res, pat_index, flags }; 751 752 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 753 } 754 755 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 756 u64 start, u64 length) 757 { 758 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 759 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 760 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 761 gen6_pte_t scratch_pte, __iomem *gtt_base = 762 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 763 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 764 int i; 765 766 if (WARN(num_entries > max_entries, 767 "First entry = %d; Num entries = %d (max=%d)\n", 768 first_entry, num_entries, max_entries)) 769 num_entries = max_entries; 770 771 scratch_pte = vm->scratch[0]->encode; 772 for (i = 0; i < num_entries; i++) 773 iowrite32(scratch_pte, >t_base[i]); 774 } 775 776 void intel_ggtt_bind_vma(struct i915_address_space *vm, 777 struct i915_vm_pt_stash *stash, 778 struct i915_vma_resource *vma_res, 779 unsigned int pat_index, 780 u32 flags) 781 { 782 u32 pte_flags; 783 784 if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) 785 return; 786 787 vma_res->bound_flags |= flags; 788 789 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 790 pte_flags = 0; 791 if (vma_res->bi.readonly) 792 pte_flags |= PTE_READ_ONLY; 793 if (vma_res->bi.lmem) 794 pte_flags |= PTE_LM; 795 796 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 797 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 798 } 799 800 void intel_ggtt_unbind_vma(struct i915_address_space *vm, 801 struct i915_vma_resource *vma_res) 802 { 803 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 804 } 805 806 dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm, 807 u64 offset, bool *is_present, bool *is_local) 808 { 809 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 810 811 return ggtt->vm.read_entry(vm, offset, is_present, is_local); 812 } 813 814 /* 815 * Reserve the top of the GuC address space for firmware images. Addresses 816 * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, 817 * which makes for a suitable range to hold GuC/HuC firmware images if the 818 * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT 819 * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk 820 * of the same size anyway, which is far more than needed, to keep the logic 821 * in uc_fw_ggtt_offset() simple. 822 */ 823 #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) 824 825 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 826 { 827 u64 offset; 828 int ret; 829 830 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 831 return 0; 832 833 GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); 834 offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; 835 836 ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, 837 GUC_TOP_RESERVE_SIZE, offset, 838 I915_COLOR_UNEVICTABLE, PIN_NOEVICT); 839 if (ret) 840 drm_dbg(&ggtt->vm.i915->drm, 841 "Failed to reserve top of GGTT for GuC\n"); 842 843 return ret; 844 } 845 846 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 847 { 848 if (drm_mm_node_allocated(&ggtt->uc_fw)) 849 drm_mm_remove_node(&ggtt->uc_fw); 850 } 851 852 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 853 { 854 ggtt_release_guc_top(ggtt); 855 if (drm_mm_node_allocated(&ggtt->error_capture)) 856 drm_mm_remove_node(&ggtt->error_capture); 857 mutex_destroy(&ggtt->error_mutex); 858 } 859 860 static int init_ggtt(struct i915_ggtt *ggtt) 861 { 862 /* 863 * Let GEM Manage all of the aperture. 864 * 865 * However, leave one page at the end still bound to the scratch page. 866 * There are a number of places where the hardware apparently prefetches 867 * past the end of the object, and we've seen multiple hangs with the 868 * GPU head pointer stuck in a batchbuffer bound at the last page of the 869 * aperture. One page should be enough to keep any prefetching inside 870 * of the aperture. 871 */ 872 unsigned long hole_start, hole_end; 873 struct drm_mm_node *entry; 874 int ret; 875 876 /* 877 * GuC requires all resources that we're sharing with it to be placed in 878 * non-WOPCM memory. If GuC is not present or not in use we still need a 879 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 880 * why. 881 */ 882 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 883 intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); 884 885 ret = intel_vgt_balloon(ggtt); 886 if (ret) 887 return ret; 888 889 mutex_init(&ggtt->error_mutex); 890 if (ggtt->mappable_end) { 891 /* 892 * Reserve a mappable slot for our lockless error capture. 893 * 894 * We strongly prefer taking address 0x0 in order to protect 895 * other critical buffers against accidental overwrites, 896 * as writing to address 0 is a very common mistake. 897 * 898 * Since 0 may already be in use by the system (e.g. the BIOS 899 * framebuffer), we let the reservation fail quietly and hope 900 * 0 remains reserved always. 901 * 902 * If we fail to reserve 0, and then fail to find any space 903 * for an error-capture, remain silent. We can afford not 904 * to reserve an error_capture node as we have fallback 905 * paths, and we trust that 0 will remain reserved. However, 906 * the only likely reason for failure to insert is a driver 907 * bug, which we expect to cause other failures... 908 * 909 * Since CPU can perform speculative reads on error capture 910 * (write-combining allows it) add scratch page after error 911 * capture to avoid DMAR errors. 912 */ 913 ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; 914 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 915 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 916 drm_mm_insert_node_in_range(&ggtt->vm.mm, 917 &ggtt->error_capture, 918 ggtt->error_capture.size, 0, 919 ggtt->error_capture.color, 920 0, ggtt->mappable_end, 921 DRM_MM_INSERT_LOW); 922 } 923 if (drm_mm_node_allocated(&ggtt->error_capture)) { 924 u64 start = ggtt->error_capture.start; 925 u64 size = ggtt->error_capture.size; 926 927 ggtt->vm.scratch_range(&ggtt->vm, start, size); 928 drm_dbg(&ggtt->vm.i915->drm, 929 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 930 start, start + size); 931 } 932 933 /* 934 * The upper portion of the GuC address space has a sizeable hole 935 * (several MB) that is inaccessible by GuC. Reserve this range within 936 * GGTT as it can comfortably hold GuC/HuC firmware images. 937 */ 938 ret = ggtt_reserve_guc_top(ggtt); 939 if (ret) 940 goto err; 941 942 /* Clear any non-preallocated blocks */ 943 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 944 drm_dbg(&ggtt->vm.i915->drm, 945 "clearing unused GTT space: [%lx, %lx]\n", 946 hole_start, hole_end); 947 ggtt->vm.clear_range(&ggtt->vm, hole_start, 948 hole_end - hole_start); 949 } 950 951 /* And finally clear the reserved guard page */ 952 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 953 954 return 0; 955 956 err: 957 cleanup_init_ggtt(ggtt); 958 return ret; 959 } 960 961 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 962 struct i915_vm_pt_stash *stash, 963 struct i915_vma_resource *vma_res, 964 unsigned int pat_index, 965 u32 flags) 966 { 967 u32 pte_flags; 968 969 /* Currently applicable only to VLV */ 970 pte_flags = 0; 971 if (vma_res->bi.readonly) 972 pte_flags |= PTE_READ_ONLY; 973 974 if (flags & I915_VMA_LOCAL_BIND) 975 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 976 stash, vma_res, pat_index, flags); 977 978 if (flags & I915_VMA_GLOBAL_BIND) 979 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 980 981 vma_res->bound_flags |= flags; 982 } 983 984 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 985 struct i915_vma_resource *vma_res) 986 { 987 if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) 988 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 989 990 if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) 991 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res); 992 } 993 994 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 995 { 996 struct i915_vm_pt_stash stash = {}; 997 struct i915_ppgtt *ppgtt; 998 int err; 999 1000 ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); 1001 if (IS_ERR(ppgtt)) 1002 return PTR_ERR(ppgtt); 1003 1004 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 1005 err = -ENODEV; 1006 goto err_ppgtt; 1007 } 1008 1009 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 1010 if (err) 1011 goto err_ppgtt; 1012 1013 i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); 1014 err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); 1015 i915_gem_object_unlock(ppgtt->vm.scratch[0]); 1016 if (err) 1017 goto err_stash; 1018 1019 /* 1020 * Note we only pre-allocate as far as the end of the global 1021 * GTT. On 48b / 4-level page-tables, the difference is very, 1022 * very significant! We have to preallocate as GVT/vgpu does 1023 * not like the page directory disappearing. 1024 */ 1025 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 1026 1027 ggtt->alias = ppgtt; 1028 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 1029 1030 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); 1031 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 1032 1033 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); 1034 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 1035 1036 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 1037 return 0; 1038 1039 err_stash: 1040 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 1041 err_ppgtt: 1042 i915_vm_put(&ppgtt->vm); 1043 return err; 1044 } 1045 1046 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 1047 { 1048 struct i915_ppgtt *ppgtt; 1049 1050 ppgtt = fetch_and_zero(&ggtt->alias); 1051 if (!ppgtt) 1052 return; 1053 1054 i915_vm_put(&ppgtt->vm); 1055 1056 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1057 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1058 } 1059 1060 int i915_init_ggtt(struct drm_i915_private *i915) 1061 { 1062 int ret; 1063 1064 ret = init_ggtt(to_gt(i915)->ggtt); 1065 if (ret) 1066 return ret; 1067 1068 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 1069 ret = init_aliasing_ppgtt(to_gt(i915)->ggtt); 1070 if (ret) 1071 cleanup_init_ggtt(to_gt(i915)->ggtt); 1072 } 1073 1074 return 0; 1075 } 1076 1077 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 1078 { 1079 struct i915_vma *vma, *vn; 1080 1081 flush_workqueue(ggtt->vm.i915->wq); 1082 i915_gem_drain_freed_objects(ggtt->vm.i915); 1083 1084 mutex_lock(&ggtt->vm.mutex); 1085 1086 ggtt->vm.skip_pte_rewrite = true; 1087 1088 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 1089 struct drm_i915_gem_object *obj = vma->obj; 1090 bool trylock; 1091 1092 trylock = i915_gem_object_trylock(obj, NULL); 1093 WARN_ON(!trylock); 1094 1095 WARN_ON(__i915_vma_unbind(vma)); 1096 if (trylock) 1097 i915_gem_object_unlock(obj); 1098 } 1099 1100 if (drm_mm_node_allocated(&ggtt->error_capture)) 1101 drm_mm_remove_node(&ggtt->error_capture); 1102 mutex_destroy(&ggtt->error_mutex); 1103 1104 ggtt_release_guc_top(ggtt); 1105 intel_vgt_deballoon(ggtt); 1106 1107 ggtt->vm.cleanup(&ggtt->vm); 1108 1109 mutex_unlock(&ggtt->vm.mutex); 1110 i915_address_space_fini(&ggtt->vm); 1111 1112 arch_phys_wc_del(ggtt->mtrr); 1113 1114 if (ggtt->iomap.size) 1115 io_mapping_fini(&ggtt->iomap); 1116 } 1117 1118 /** 1119 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 1120 * @i915: i915 device 1121 */ 1122 void i915_ggtt_driver_release(struct drm_i915_private *i915) 1123 { 1124 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1125 1126 fini_aliasing_ppgtt(ggtt); 1127 1128 intel_ggtt_fini_fences(ggtt); 1129 ggtt_cleanup_hw(ggtt); 1130 } 1131 1132 /** 1133 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after 1134 * all free objects have been drained. 1135 * @i915: i915 device 1136 */ 1137 void i915_ggtt_driver_late_release(struct drm_i915_private *i915) 1138 { 1139 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1140 1141 GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); 1142 dma_resv_fini(&ggtt->vm._resv); 1143 } 1144 1145 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1146 { 1147 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1148 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1149 return snb_gmch_ctl << 20; 1150 } 1151 1152 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1153 { 1154 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1155 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1156 if (bdw_gmch_ctl) 1157 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1158 1159 #ifdef CONFIG_X86_32 1160 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 1161 if (bdw_gmch_ctl > 4) 1162 bdw_gmch_ctl = 4; 1163 #endif 1164 1165 return bdw_gmch_ctl << 20; 1166 } 1167 1168 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1169 { 1170 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1171 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1172 1173 if (gmch_ctrl) 1174 return 1 << (20 + gmch_ctrl); 1175 1176 return 0; 1177 } 1178 1179 static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) 1180 { 1181 /* 1182 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset 1183 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset 1184 */ 1185 GEM_BUG_ON(GRAPHICS_VER(i915) < 6); 1186 return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; 1187 } 1188 1189 static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) 1190 { 1191 return gen6_gttmmadr_size(i915) / 2; 1192 } 1193 1194 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 1195 { 1196 struct drm_i915_private *i915 = ggtt->vm.i915; 1197 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 1198 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1199 phys_addr_t phys_addr; 1200 u32 pte_flags; 1201 int ret; 1202 1203 GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); 1204 1205 if (i915_direct_stolen_access(i915)) { 1206 drm_dbg(&i915->drm, "Using direct GSM access\n"); 1207 phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK; 1208 } else { 1209 phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); 1210 } 1211 1212 if (needs_wc_ggtt_mapping(i915)) 1213 ggtt->gsm = ioremap_wc(phys_addr, size); 1214 else 1215 ggtt->gsm = ioremap(phys_addr, size); 1216 1217 if (!ggtt->gsm) { 1218 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 1219 return -ENOMEM; 1220 } 1221 1222 kref_init(&ggtt->vm.resv_ref); 1223 ret = setup_scratch_page(&ggtt->vm); 1224 if (ret) { 1225 drm_err(&i915->drm, "Scratch setup failed\n"); 1226 /* iounmap will also get called at remove, but meh */ 1227 iounmap(ggtt->gsm); 1228 return ret; 1229 } 1230 1231 pte_flags = 0; 1232 if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) 1233 pte_flags |= PTE_LM; 1234 1235 ggtt->vm.scratch[0]->encode = 1236 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 1237 i915_gem_get_pat_index(i915, 1238 I915_CACHE_NONE), 1239 pte_flags); 1240 1241 return 0; 1242 } 1243 1244 static void gen6_gmch_remove(struct i915_address_space *vm) 1245 { 1246 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 1247 1248 iounmap(ggtt->gsm); 1249 free_scratch(vm); 1250 } 1251 1252 static struct resource pci_resource(struct pci_dev *pdev, int bar) 1253 { 1254 return DEFINE_RES_MEM(pci_resource_start(pdev, bar), 1255 pci_resource_len(pdev, bar)); 1256 } 1257 1258 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 1259 { 1260 struct drm_i915_private *i915 = ggtt->vm.i915; 1261 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1262 unsigned int size; 1263 u16 snb_gmch_ctl; 1264 1265 if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { 1266 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1267 return -ENXIO; 1268 1269 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1270 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1271 } 1272 1273 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1274 if (IS_CHERRYVIEW(i915)) 1275 size = chv_get_total_gtt_size(snb_gmch_ctl); 1276 else 1277 size = gen8_get_total_gtt_size(snb_gmch_ctl); 1278 1279 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1280 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1281 ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; 1282 1283 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 1284 ggtt->vm.cleanup = gen6_gmch_remove; 1285 ggtt->vm.insert_page = gen8_ggtt_insert_page; 1286 ggtt->vm.clear_range = nop_clear_range; 1287 ggtt->vm.scratch_range = gen8_ggtt_clear_range; 1288 1289 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 1290 ggtt->vm.read_entry = gen8_ggtt_read_entry; 1291 1292 /* 1293 * Serialize GTT updates with aperture access on BXT if VT-d is on, 1294 * and always on CHV. 1295 */ 1296 if (intel_vm_no_concurrent_access_wa(i915)) { 1297 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 1298 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 1299 1300 /* 1301 * Calling stop_machine() version of GGTT update function 1302 * at error capture/reset path will raise lockdep warning. 1303 * Allow calling gen8_ggtt_insert_* directly at reset path 1304 * which is safe from parallel GGTT updates. 1305 */ 1306 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1307 ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; 1308 1309 ggtt->vm.bind_async_flags = 1310 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 1311 } 1312 1313 if (i915_ggtt_require_binder(i915)) { 1314 ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; 1315 ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; 1316 ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; 1317 /* 1318 * On GPU is hung, we might bind VMAs for error capture. 1319 * Fallback to CPU GGTT updates in that case. 1320 */ 1321 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1322 } 1323 1324 if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc)) 1325 ggtt->invalidate = guc_ggtt_invalidate; 1326 else 1327 ggtt->invalidate = gen8_ggtt_invalidate; 1328 1329 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1330 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1331 1332 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 1333 ggtt->vm.pte_encode = mtl_ggtt_pte_encode; 1334 else 1335 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 1336 1337 ggtt->vm.pte_decode = gen8_ggtt_pte_decode; 1338 1339 return ggtt_probe_common(ggtt, size); 1340 } 1341 1342 /* 1343 * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, 1344 * so the switch-case statements in these PTE encode functions are still valid. 1345 * See translation table LEGACY_CACHELEVEL. 1346 */ 1347 static u64 snb_pte_encode(dma_addr_t addr, 1348 unsigned int pat_index, 1349 u32 flags) 1350 { 1351 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1352 1353 switch (pat_index) { 1354 case I915_CACHE_L3_LLC: 1355 case I915_CACHE_LLC: 1356 pte |= GEN6_PTE_CACHE_LLC; 1357 break; 1358 case I915_CACHE_NONE: 1359 pte |= GEN6_PTE_UNCACHED; 1360 break; 1361 default: 1362 MISSING_CASE(pat_index); 1363 } 1364 1365 return pte; 1366 } 1367 1368 static u64 ivb_pte_encode(dma_addr_t addr, 1369 unsigned int pat_index, 1370 u32 flags) 1371 { 1372 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1373 1374 switch (pat_index) { 1375 case I915_CACHE_L3_LLC: 1376 pte |= GEN7_PTE_CACHE_L3_LLC; 1377 break; 1378 case I915_CACHE_LLC: 1379 pte |= GEN6_PTE_CACHE_LLC; 1380 break; 1381 case I915_CACHE_NONE: 1382 pte |= GEN6_PTE_UNCACHED; 1383 break; 1384 default: 1385 MISSING_CASE(pat_index); 1386 } 1387 1388 return pte; 1389 } 1390 1391 static u64 byt_pte_encode(dma_addr_t addr, 1392 unsigned int pat_index, 1393 u32 flags) 1394 { 1395 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1396 1397 if (!(flags & PTE_READ_ONLY)) 1398 pte |= BYT_PTE_WRITEABLE; 1399 1400 if (pat_index != I915_CACHE_NONE) 1401 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1402 1403 return pte; 1404 } 1405 1406 static u64 hsw_pte_encode(dma_addr_t addr, 1407 unsigned int pat_index, 1408 u32 flags) 1409 { 1410 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1411 1412 if (pat_index != I915_CACHE_NONE) 1413 pte |= HSW_WB_LLC_AGE3; 1414 1415 return pte; 1416 } 1417 1418 static u64 iris_pte_encode(dma_addr_t addr, 1419 unsigned int pat_index, 1420 u32 flags) 1421 { 1422 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1423 1424 switch (pat_index) { 1425 case I915_CACHE_NONE: 1426 break; 1427 case I915_CACHE_WT: 1428 pte |= HSW_WT_ELLC_LLC_AGE3; 1429 break; 1430 default: 1431 pte |= HSW_WB_ELLC_LLC_AGE3; 1432 break; 1433 } 1434 1435 return pte; 1436 } 1437 1438 static dma_addr_t gen6_pte_decode(u64 pte, bool *is_present, bool *is_local) 1439 { 1440 *is_present = pte & GEN6_PTE_VALID; 1441 *is_local = false; 1442 1443 return ((pte & 0xff0) << 28) | (pte & ~0xfff); 1444 } 1445 1446 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1447 { 1448 struct drm_i915_private *i915 = ggtt->vm.i915; 1449 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1450 unsigned int size; 1451 u16 snb_gmch_ctl; 1452 1453 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1454 return -ENXIO; 1455 1456 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1457 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1458 1459 /* 1460 * 64/512MB is the current min/max we actually know of, but this is 1461 * just a coarse sanity check. 1462 */ 1463 if (ggtt->mappable_end < (64 << 20) || 1464 ggtt->mappable_end > (512 << 20)) { 1465 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1466 &ggtt->mappable_end); 1467 return -ENXIO; 1468 } 1469 1470 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1471 1472 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1473 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1474 1475 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1476 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1477 1478 ggtt->vm.clear_range = nop_clear_range; 1479 if (!HAS_FULL_PPGTT(i915)) 1480 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1481 ggtt->vm.scratch_range = gen6_ggtt_clear_range; 1482 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1483 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1484 ggtt->vm.read_entry = gen6_ggtt_read_entry; 1485 ggtt->vm.cleanup = gen6_gmch_remove; 1486 1487 ggtt->invalidate = gen6_ggtt_invalidate; 1488 1489 if (HAS_EDRAM(i915)) 1490 ggtt->vm.pte_encode = iris_pte_encode; 1491 else if (IS_HASWELL(i915)) 1492 ggtt->vm.pte_encode = hsw_pte_encode; 1493 else if (IS_VALLEYVIEW(i915)) 1494 ggtt->vm.pte_encode = byt_pte_encode; 1495 else if (GRAPHICS_VER(i915) >= 7) 1496 ggtt->vm.pte_encode = ivb_pte_encode; 1497 else 1498 ggtt->vm.pte_encode = snb_pte_encode; 1499 1500 ggtt->vm.pte_decode = gen6_pte_decode; 1501 1502 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1503 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1504 1505 return ggtt_probe_common(ggtt, size); 1506 } 1507 1508 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1509 { 1510 struct drm_i915_private *i915 = gt->i915; 1511 int ret; 1512 1513 ggtt->vm.gt = gt; 1514 ggtt->vm.i915 = i915; 1515 ggtt->vm.dma = i915->drm.dev; 1516 dma_resv_init(&ggtt->vm._resv); 1517 1518 if (GRAPHICS_VER(i915) >= 8) 1519 ret = gen8_gmch_probe(ggtt); 1520 else if (GRAPHICS_VER(i915) >= 6) 1521 ret = gen6_gmch_probe(ggtt); 1522 else 1523 ret = intel_ggtt_gmch_probe(ggtt); 1524 1525 if (ret) { 1526 dma_resv_fini(&ggtt->vm._resv); 1527 return ret; 1528 } 1529 1530 if ((ggtt->vm.total - 1) >> 32) { 1531 drm_err(&i915->drm, 1532 "We never expected a Global GTT with more than 32bits" 1533 " of address space! Found %lldM!\n", 1534 ggtt->vm.total >> 20); 1535 ggtt->vm.total = 1ULL << 32; 1536 ggtt->mappable_end = 1537 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1538 } 1539 1540 if (ggtt->mappable_end > ggtt->vm.total) { 1541 drm_err(&i915->drm, 1542 "mappable aperture extends past end of GGTT," 1543 " aperture=%pa, total=%llx\n", 1544 &ggtt->mappable_end, ggtt->vm.total); 1545 ggtt->mappable_end = ggtt->vm.total; 1546 } 1547 1548 /* GMADR is the PCI mmio aperture into the global GTT. */ 1549 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1550 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1551 (u64)ggtt->mappable_end >> 20); 1552 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1553 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1554 1555 return 0; 1556 } 1557 1558 /** 1559 * i915_ggtt_probe_hw - Probe GGTT hardware location 1560 * @i915: i915 device 1561 */ 1562 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1563 { 1564 struct intel_gt *gt; 1565 int ret, i; 1566 1567 for_each_gt(gt, i915, i) { 1568 ret = intel_gt_assign_ggtt(gt); 1569 if (ret) 1570 return ret; 1571 } 1572 1573 ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); 1574 if (ret) 1575 return ret; 1576 1577 if (i915_vtd_active(i915)) 1578 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1579 1580 return 0; 1581 } 1582 1583 struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) 1584 { 1585 struct i915_ggtt *ggtt; 1586 1587 ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); 1588 if (!ggtt) 1589 return ERR_PTR(-ENOMEM); 1590 1591 INIT_LIST_HEAD(&ggtt->gt_list); 1592 1593 return ggtt; 1594 } 1595 1596 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1597 { 1598 if (GRAPHICS_VER(i915) < 6) 1599 return intel_ggtt_gmch_enable_hw(i915); 1600 1601 return 0; 1602 } 1603 1604 /** 1605 * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM 1606 * @vm: The VM to restore the mappings for 1607 * @all_evicted: Were all VMAs expected to be evicted on suspend? 1608 * 1609 * Restore the memory mappings for all objects mapped to HW via the GGTT or a 1610 * DPT page table. 1611 * 1612 * Returns %true if restoring the mapping for any object that was in a write 1613 * domain before suspend. 1614 */ 1615 bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted) 1616 { 1617 struct i915_vma *vma; 1618 bool write_domain_objs = false; 1619 1620 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 1621 1622 if (all_evicted) { 1623 drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list)); 1624 return false; 1625 } 1626 1627 /* First fill our portion of the GTT with scratch pages */ 1628 vm->clear_range(vm, 0, vm->total); 1629 1630 /* clflush objects bound into the GGTT and rebind them. */ 1631 list_for_each_entry(vma, &vm->bound_list, vm_link) { 1632 struct drm_i915_gem_object *obj = vma->obj; 1633 unsigned int was_bound = 1634 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1635 1636 GEM_BUG_ON(!was_bound); 1637 1638 /* 1639 * Clear the bound flags of the vma resource to allow 1640 * ptes to be repopulated. 1641 */ 1642 vma->resource->bound_flags = 0; 1643 vma->ops->bind_vma(vm, NULL, vma->resource, 1644 obj ? obj->pat_index : 1645 i915_gem_get_pat_index(vm->i915, 1646 I915_CACHE_NONE), 1647 was_bound); 1648 1649 if (obj) { /* only used during resume => exclusive access */ 1650 write_domain_objs |= fetch_and_zero(&obj->write_domain); 1651 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1652 } 1653 } 1654 1655 return write_domain_objs; 1656 } 1657 1658 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1659 { 1660 struct intel_gt *gt; 1661 bool flush; 1662 1663 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1664 intel_gt_check_and_clear_faults(gt); 1665 1666 flush = i915_ggtt_resume_vm(&ggtt->vm, false); 1667 1668 if (drm_mm_node_allocated(&ggtt->error_capture)) 1669 ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, 1670 ggtt->error_capture.size); 1671 1672 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1673 intel_uc_resume_mappings(>->uc); 1674 1675 ggtt->invalidate(ggtt); 1676 1677 if (flush) 1678 wbinvd_on_all_cpus(); 1679 1680 intel_ggtt_restore_fences(ggtt); 1681 } 1682