1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <asm/set_memory.h> 7 #include <asm/smp.h> 8 #include <linux/types.h> 9 #include <linux/stop_machine.h> 10 11 #include <drm/drm_managed.h> 12 #include <drm/i915_drm.h> 13 #include <drm/intel-gtt.h> 14 15 #include "display/intel_display.h" 16 #include "gem/i915_gem_lmem.h" 17 18 #include "intel_context.h" 19 #include "intel_ggtt_gmch.h" 20 #include "intel_gpu_commands.h" 21 #include "intel_gt.h" 22 #include "intel_gt_regs.h" 23 #include "intel_pci_config.h" 24 #include "intel_ring.h" 25 #include "i915_drv.h" 26 #include "i915_pci.h" 27 #include "i915_request.h" 28 #include "i915_scatterlist.h" 29 #include "i915_utils.h" 30 #include "i915_vgpu.h" 31 32 #include "intel_gtt.h" 33 #include "gen8_ppgtt.h" 34 #include "intel_engine_pm.h" 35 36 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 37 unsigned long color, 38 u64 *start, 39 u64 *end) 40 { 41 if (i915_node_color_differs(node, color)) 42 *start += I915_GTT_PAGE_SIZE; 43 44 /* 45 * Also leave a space between the unallocated reserved node after the 46 * GTT and any objects within the GTT, i.e. we use the color adjustment 47 * to insert a guard page to prevent prefetches crossing over the 48 * GTT boundary. 49 */ 50 node = list_next_entry(node, node_list); 51 if (node->color != color) 52 *end -= I915_GTT_PAGE_SIZE; 53 } 54 55 static int ggtt_init_hw(struct i915_ggtt *ggtt) 56 { 57 struct drm_i915_private *i915 = ggtt->vm.i915; 58 59 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 60 61 ggtt->vm.is_ggtt = true; 62 63 /* Only VLV supports read-only GGTT mappings */ 64 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 65 66 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 67 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 68 69 if (ggtt->mappable_end) { 70 if (!io_mapping_init_wc(&ggtt->iomap, 71 ggtt->gmadr.start, 72 ggtt->mappable_end)) { 73 ggtt->vm.cleanup(&ggtt->vm); 74 return -EIO; 75 } 76 77 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 78 ggtt->mappable_end); 79 } 80 81 intel_ggtt_init_fences(ggtt); 82 83 return 0; 84 } 85 86 /** 87 * i915_ggtt_init_hw - Initialize GGTT hardware 88 * @i915: i915 device 89 */ 90 int i915_ggtt_init_hw(struct drm_i915_private *i915) 91 { 92 int ret; 93 94 /* 95 * Note that we use page colouring to enforce a guard page at the 96 * end of the address space. This is required as the CS may prefetch 97 * beyond the end of the batch buffer, across the page boundary, 98 * and beyond the end of the GTT if we do not provide a guard. 99 */ 100 ret = ggtt_init_hw(to_gt(i915)->ggtt); 101 if (ret) 102 return ret; 103 104 return 0; 105 } 106 107 /** 108 * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM 109 * @vm: The VM to suspend the mappings for 110 * 111 * Suspend the memory mappings for all objects mapped to HW via the GGTT or a 112 * DPT page table. 113 */ 114 void i915_ggtt_suspend_vm(struct i915_address_space *vm) 115 { 116 struct i915_vma *vma, *vn; 117 int save_skip_rewrite; 118 119 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 120 121 retry: 122 i915_gem_drain_freed_objects(vm->i915); 123 124 mutex_lock(&vm->mutex); 125 126 /* 127 * Skip rewriting PTE on VMA unbind. 128 * FIXME: Use an argument to i915_vma_unbind() instead? 129 */ 130 save_skip_rewrite = vm->skip_pte_rewrite; 131 vm->skip_pte_rewrite = true; 132 133 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 134 struct drm_i915_gem_object *obj = vma->obj; 135 136 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 137 138 if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 139 continue; 140 141 /* unlikely to race when GPU is idle, so no worry about slowpath.. */ 142 if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { 143 /* 144 * No dead objects should appear here, GPU should be 145 * completely idle, and userspace suspended 146 */ 147 i915_gem_object_get(obj); 148 149 mutex_unlock(&vm->mutex); 150 151 i915_gem_object_lock(obj, NULL); 152 GEM_WARN_ON(i915_vma_unbind(vma)); 153 i915_gem_object_unlock(obj); 154 i915_gem_object_put(obj); 155 156 vm->skip_pte_rewrite = save_skip_rewrite; 157 goto retry; 158 } 159 160 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 161 i915_vma_wait_for_bind(vma); 162 163 __i915_vma_evict(vma, false); 164 drm_mm_remove_node(&vma->node); 165 } 166 167 i915_gem_object_unlock(obj); 168 } 169 170 vm->clear_range(vm, 0, vm->total); 171 172 vm->skip_pte_rewrite = save_skip_rewrite; 173 174 mutex_unlock(&vm->mutex); 175 } 176 177 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 178 { 179 struct intel_gt *gt; 180 181 i915_ggtt_suspend_vm(&ggtt->vm); 182 ggtt->invalidate(ggtt); 183 184 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 185 intel_gt_check_and_clear_faults(gt); 186 } 187 188 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 189 { 190 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 191 192 spin_lock_irq(&uncore->lock); 193 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 194 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 195 spin_unlock_irq(&uncore->lock); 196 } 197 198 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 199 { 200 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 201 202 /* 203 * Note that as an uncached mmio write, this will flush the 204 * WCB of the writes into the GGTT before it triggers the invalidate. 205 */ 206 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 207 } 208 209 static void guc_ggtt_ct_invalidate(struct intel_gt *gt) 210 { 211 struct intel_uncore *uncore = gt->uncore; 212 intel_wakeref_t wakeref; 213 214 with_intel_runtime_pm_if_active(uncore->rpm, wakeref) { 215 struct intel_guc *guc = >->uc.guc; 216 217 intel_guc_invalidate_tlb_guc(guc); 218 } 219 } 220 221 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 222 { 223 struct drm_i915_private *i915 = ggtt->vm.i915; 224 struct intel_gt *gt; 225 226 gen8_ggtt_invalidate(ggtt); 227 228 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { 229 if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) { 230 guc_ggtt_ct_invalidate(gt); 231 } else if (GRAPHICS_VER(i915) >= 12) { 232 intel_uncore_write_fw(gt->uncore, 233 GEN12_GUC_TLB_INV_CR, 234 GEN12_GUC_TLB_INV_CR_INVALIDATE); 235 } else { 236 intel_uncore_write_fw(gt->uncore, 237 GEN8_GTCR, GEN8_GTCR_INVALIDATE); 238 } 239 } 240 } 241 242 static u64 mtl_ggtt_pte_encode(dma_addr_t addr, 243 unsigned int pat_index, 244 u32 flags) 245 { 246 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 247 248 WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); 249 250 if (flags & PTE_LM) 251 pte |= GEN12_GGTT_PTE_LM; 252 253 if (pat_index & BIT(0)) 254 pte |= MTL_GGTT_PTE_PAT0; 255 256 if (pat_index & BIT(1)) 257 pte |= MTL_GGTT_PTE_PAT1; 258 259 return pte; 260 } 261 262 u64 gen8_ggtt_pte_encode(dma_addr_t addr, 263 unsigned int pat_index, 264 u32 flags) 265 { 266 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 267 268 if (flags & PTE_LM) 269 pte |= GEN12_GGTT_PTE_LM; 270 271 return pte; 272 } 273 274 static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) 275 { 276 struct intel_gt *gt = ggtt->vm.gt; 277 278 return intel_gt_is_bind_context_ready(gt); 279 } 280 281 static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) 282 { 283 struct intel_context *ce; 284 struct intel_gt *gt = ggtt->vm.gt; 285 286 if (intel_gt_is_wedged(gt)) 287 return NULL; 288 289 ce = gt->engine[BCS0]->bind_context; 290 GEM_BUG_ON(!ce); 291 292 /* 293 * If the GT is not awake already at this stage then fallback 294 * to pci based GGTT update otherwise __intel_wakeref_get_first() 295 * would conflict with fs_reclaim trying to allocate memory while 296 * doing rpm_resume(). 297 */ 298 if (!intel_gt_pm_get_if_awake(gt)) 299 return NULL; 300 301 intel_engine_pm_get(ce->engine); 302 303 return ce; 304 } 305 306 static void gen8_ggtt_bind_put_ce(struct intel_context *ce) 307 { 308 intel_engine_pm_put(ce->engine); 309 intel_gt_pm_put(ce->engine->gt); 310 } 311 312 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, 313 struct sg_table *pages, u32 num_entries, 314 const gen8_pte_t pte) 315 { 316 struct i915_sched_attr attr = {}; 317 struct intel_gt *gt = ggtt->vm.gt; 318 const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; 319 struct sgt_iter iter; 320 struct i915_request *rq; 321 struct intel_context *ce; 322 u32 *cs; 323 324 if (!num_entries) 325 return true; 326 327 ce = gen8_ggtt_bind_get_ce(ggtt); 328 if (!ce) 329 return false; 330 331 if (pages) 332 iter = __sgt_iter(pages->sgl, true); 333 334 while (num_entries) { 335 int count = 0; 336 dma_addr_t addr; 337 /* 338 * MI_UPDATE_GTT can update 512 entries in a single command but 339 * that end up with engine reset, 511 works. 340 */ 341 u32 n_ptes = min_t(u32, 511, num_entries); 342 343 if (mutex_lock_interruptible(&ce->timeline->mutex)) 344 goto put_ce; 345 346 intel_context_enter(ce); 347 rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); 348 intel_context_exit(ce); 349 if (IS_ERR(rq)) { 350 GT_TRACE(gt, "Failed to get bind request\n"); 351 mutex_unlock(&ce->timeline->mutex); 352 goto put_ce; 353 } 354 355 cs = intel_ring_begin(rq, 2 * n_ptes + 2); 356 if (IS_ERR(cs)) { 357 GT_TRACE(gt, "Failed to ring space for GGTT bind\n"); 358 i915_request_set_error_once(rq, PTR_ERR(cs)); 359 /* once a request is created, it must be queued */ 360 goto queue_err_rq; 361 } 362 363 *cs++ = MI_UPDATE_GTT | (2 * n_ptes); 364 *cs++ = offset << 12; 365 366 if (pages) { 367 for_each_sgt_daddr_next(addr, iter) { 368 if (count == n_ptes) 369 break; 370 *cs++ = lower_32_bits(pte | addr); 371 *cs++ = upper_32_bits(pte | addr); 372 count++; 373 } 374 /* fill remaining with scratch pte, if any */ 375 if (count < n_ptes) { 376 memset64((u64 *)cs, scratch_pte, 377 n_ptes - count); 378 cs += (n_ptes - count) * 2; 379 } 380 } else { 381 memset64((u64 *)cs, pte, n_ptes); 382 cs += n_ptes * 2; 383 } 384 385 intel_ring_advance(rq, cs); 386 queue_err_rq: 387 i915_request_get(rq); 388 __i915_request_commit(rq); 389 __i915_request_queue(rq, &attr); 390 391 mutex_unlock(&ce->timeline->mutex); 392 /* This will break if the request is complete or after engine reset */ 393 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 394 if (rq->fence.error) 395 goto err_rq; 396 397 i915_request_put(rq); 398 399 num_entries -= n_ptes; 400 offset += n_ptes; 401 } 402 403 gen8_ggtt_bind_put_ce(ce); 404 return true; 405 406 err_rq: 407 i915_request_put(rq); 408 put_ce: 409 gen8_ggtt_bind_put_ce(ce); 410 return false; 411 } 412 413 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 414 { 415 writeq(pte, addr); 416 } 417 418 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 419 dma_addr_t addr, 420 u64 offset, 421 unsigned int pat_index, 422 u32 flags) 423 { 424 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 425 gen8_pte_t __iomem *pte = 426 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 427 428 gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags)); 429 430 ggtt->invalidate(ggtt); 431 } 432 433 static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, 434 dma_addr_t addr, u64 offset, 435 unsigned int pat_index, u32 flags) 436 { 437 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 438 gen8_pte_t pte; 439 440 pte = ggtt->vm.pte_encode(addr, pat_index, flags); 441 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 442 gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte)) 443 return ggtt->invalidate(ggtt); 444 445 gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); 446 } 447 448 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 449 struct i915_vma_resource *vma_res, 450 unsigned int pat_index, 451 u32 flags) 452 { 453 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 454 const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 455 gen8_pte_t __iomem *gte; 456 gen8_pte_t __iomem *end; 457 struct sgt_iter iter; 458 dma_addr_t addr; 459 460 /* 461 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 462 * not to allow the user to override access to a read only page. 463 */ 464 465 gte = (gen8_pte_t __iomem *)ggtt->gsm; 466 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 467 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 468 while (gte < end) 469 gen8_set_pte(gte++, vm->scratch[0]->encode); 470 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 471 472 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 473 gen8_set_pte(gte++, pte_encode | addr); 474 GEM_BUG_ON(gte > end); 475 476 /* Fill the allocated but "unused" space beyond the end of the buffer */ 477 while (gte < end) 478 gen8_set_pte(gte++, vm->scratch[0]->encode); 479 480 /* 481 * We want to flush the TLBs only after we're certain all the PTE 482 * updates have finished. 483 */ 484 ggtt->invalidate(ggtt); 485 } 486 487 static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 488 struct i915_vma_resource *vma_res, 489 unsigned int pat_index, u32 flags) 490 { 491 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 492 gen8_pte_t scratch_pte = vm->scratch[0]->encode; 493 gen8_pte_t pte_encode; 494 u64 start, end; 495 496 pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 497 start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 498 end = start + vma_res->guard / I915_GTT_PAGE_SIZE; 499 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 500 goto err; 501 502 start = end; 503 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 504 if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages, 505 vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode)) 506 goto err; 507 508 start += vma_res->node_size / I915_GTT_PAGE_SIZE; 509 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 510 goto err; 511 512 return true; 513 514 err: 515 return false; 516 } 517 518 static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 519 struct i915_vma_resource *vma_res, 520 unsigned int pat_index, u32 flags) 521 { 522 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 523 524 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 525 __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) 526 return ggtt->invalidate(ggtt); 527 528 gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); 529 } 530 531 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 532 u64 start, u64 length) 533 { 534 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 535 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 536 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 537 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 538 gen8_pte_t __iomem *gtt_base = 539 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 540 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 541 int i; 542 543 if (WARN(num_entries > max_entries, 544 "First entry = %d; Num entries = %d (max=%d)\n", 545 first_entry, num_entries, max_entries)) 546 num_entries = max_entries; 547 548 for (i = 0; i < num_entries; i++) 549 gen8_set_pte(>t_base[i], scratch_pte); 550 } 551 552 static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, 553 u64 start, u64 length) 554 { 555 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 556 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 557 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 558 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 559 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 560 561 if (WARN(num_entries > max_entries, 562 "First entry = %d; Num entries = %d (max=%d)\n", 563 first_entry, num_entries, max_entries)) 564 num_entries = max_entries; 565 566 if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry, 567 NULL, num_entries, scratch_pte)) 568 return ggtt->invalidate(ggtt); 569 570 gen8_ggtt_clear_range(vm, start, length); 571 } 572 573 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 574 dma_addr_t addr, 575 u64 offset, 576 unsigned int pat_index, 577 u32 flags) 578 { 579 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 580 gen6_pte_t __iomem *pte = 581 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 582 583 iowrite32(vm->pte_encode(addr, pat_index, flags), pte); 584 585 ggtt->invalidate(ggtt); 586 } 587 588 /* 589 * Binds an object into the global gtt with the specified cache level. 590 * The object will be accessible to the GPU via commands whose operands 591 * reference offsets within the global GTT as well as accessible by the GPU 592 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 593 */ 594 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 595 struct i915_vma_resource *vma_res, 596 unsigned int pat_index, 597 u32 flags) 598 { 599 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 600 gen6_pte_t __iomem *gte; 601 gen6_pte_t __iomem *end; 602 struct sgt_iter iter; 603 dma_addr_t addr; 604 605 gte = (gen6_pte_t __iomem *)ggtt->gsm; 606 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 607 608 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 609 while (gte < end) 610 iowrite32(vm->scratch[0]->encode, gte++); 611 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 612 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 613 iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); 614 GEM_BUG_ON(gte > end); 615 616 /* Fill the allocated but "unused" space beyond the end of the buffer */ 617 while (gte < end) 618 iowrite32(vm->scratch[0]->encode, gte++); 619 620 /* 621 * We want to flush the TLBs only after we're certain all the PTE 622 * updates have finished. 623 */ 624 ggtt->invalidate(ggtt); 625 } 626 627 static void nop_clear_range(struct i915_address_space *vm, 628 u64 start, u64 length) 629 { 630 } 631 632 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 633 { 634 /* 635 * Make sure the internal GAM fifo has been cleared of all GTT 636 * writes before exiting stop_machine(). This guarantees that 637 * any aperture accesses waiting to start in another process 638 * cannot back up behind the GTT writes causing a hang. 639 * The register can be any arbitrary GAM register. 640 */ 641 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 642 } 643 644 struct insert_page { 645 struct i915_address_space *vm; 646 dma_addr_t addr; 647 u64 offset; 648 unsigned int pat_index; 649 }; 650 651 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 652 { 653 struct insert_page *arg = _arg; 654 655 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, 656 arg->pat_index, 0); 657 bxt_vtd_ggtt_wa(arg->vm); 658 659 return 0; 660 } 661 662 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 663 dma_addr_t addr, 664 u64 offset, 665 unsigned int pat_index, 666 u32 unused) 667 { 668 struct insert_page arg = { vm, addr, offset, pat_index }; 669 670 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 671 } 672 673 struct insert_entries { 674 struct i915_address_space *vm; 675 struct i915_vma_resource *vma_res; 676 unsigned int pat_index; 677 u32 flags; 678 }; 679 680 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 681 { 682 struct insert_entries *arg = _arg; 683 684 gen8_ggtt_insert_entries(arg->vm, arg->vma_res, 685 arg->pat_index, arg->flags); 686 bxt_vtd_ggtt_wa(arg->vm); 687 688 return 0; 689 } 690 691 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 692 struct i915_vma_resource *vma_res, 693 unsigned int pat_index, 694 u32 flags) 695 { 696 struct insert_entries arg = { vm, vma_res, pat_index, flags }; 697 698 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 699 } 700 701 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 702 u64 start, u64 length) 703 { 704 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 705 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 706 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 707 gen6_pte_t scratch_pte, __iomem *gtt_base = 708 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 709 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 710 int i; 711 712 if (WARN(num_entries > max_entries, 713 "First entry = %d; Num entries = %d (max=%d)\n", 714 first_entry, num_entries, max_entries)) 715 num_entries = max_entries; 716 717 scratch_pte = vm->scratch[0]->encode; 718 for (i = 0; i < num_entries; i++) 719 iowrite32(scratch_pte, >t_base[i]); 720 } 721 722 void intel_ggtt_bind_vma(struct i915_address_space *vm, 723 struct i915_vm_pt_stash *stash, 724 struct i915_vma_resource *vma_res, 725 unsigned int pat_index, 726 u32 flags) 727 { 728 u32 pte_flags; 729 730 if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) 731 return; 732 733 vma_res->bound_flags |= flags; 734 735 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 736 pte_flags = 0; 737 if (vma_res->bi.readonly) 738 pte_flags |= PTE_READ_ONLY; 739 if (vma_res->bi.lmem) 740 pte_flags |= PTE_LM; 741 742 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 743 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 744 } 745 746 void intel_ggtt_unbind_vma(struct i915_address_space *vm, 747 struct i915_vma_resource *vma_res) 748 { 749 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 750 } 751 752 /* 753 * Reserve the top of the GuC address space for firmware images. Addresses 754 * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, 755 * which makes for a suitable range to hold GuC/HuC firmware images if the 756 * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT 757 * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk 758 * of the same size anyway, which is far more than needed, to keep the logic 759 * in uc_fw_ggtt_offset() simple. 760 */ 761 #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) 762 763 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 764 { 765 u64 offset; 766 int ret; 767 768 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 769 return 0; 770 771 GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); 772 offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; 773 774 ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, 775 GUC_TOP_RESERVE_SIZE, offset, 776 I915_COLOR_UNEVICTABLE, PIN_NOEVICT); 777 if (ret) 778 drm_dbg(&ggtt->vm.i915->drm, 779 "Failed to reserve top of GGTT for GuC\n"); 780 781 return ret; 782 } 783 784 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 785 { 786 if (drm_mm_node_allocated(&ggtt->uc_fw)) 787 drm_mm_remove_node(&ggtt->uc_fw); 788 } 789 790 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 791 { 792 ggtt_release_guc_top(ggtt); 793 if (drm_mm_node_allocated(&ggtt->error_capture)) 794 drm_mm_remove_node(&ggtt->error_capture); 795 mutex_destroy(&ggtt->error_mutex); 796 } 797 798 static int init_ggtt(struct i915_ggtt *ggtt) 799 { 800 /* 801 * Let GEM Manage all of the aperture. 802 * 803 * However, leave one page at the end still bound to the scratch page. 804 * There are a number of places where the hardware apparently prefetches 805 * past the end of the object, and we've seen multiple hangs with the 806 * GPU head pointer stuck in a batchbuffer bound at the last page of the 807 * aperture. One page should be enough to keep any prefetching inside 808 * of the aperture. 809 */ 810 unsigned long hole_start, hole_end; 811 struct drm_mm_node *entry; 812 int ret; 813 814 /* 815 * GuC requires all resources that we're sharing with it to be placed in 816 * non-WOPCM memory. If GuC is not present or not in use we still need a 817 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 818 * why. 819 */ 820 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 821 intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); 822 823 ret = intel_vgt_balloon(ggtt); 824 if (ret) 825 return ret; 826 827 mutex_init(&ggtt->error_mutex); 828 if (ggtt->mappable_end) { 829 /* 830 * Reserve a mappable slot for our lockless error capture. 831 * 832 * We strongly prefer taking address 0x0 in order to protect 833 * other critical buffers against accidental overwrites, 834 * as writing to address 0 is a very common mistake. 835 * 836 * Since 0 may already be in use by the system (e.g. the BIOS 837 * framebuffer), we let the reservation fail quietly and hope 838 * 0 remains reserved always. 839 * 840 * If we fail to reserve 0, and then fail to find any space 841 * for an error-capture, remain silent. We can afford not 842 * to reserve an error_capture node as we have fallback 843 * paths, and we trust that 0 will remain reserved. However, 844 * the only likely reason for failure to insert is a driver 845 * bug, which we expect to cause other failures... 846 * 847 * Since CPU can perform speculative reads on error capture 848 * (write-combining allows it) add scratch page after error 849 * capture to avoid DMAR errors. 850 */ 851 ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; 852 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 853 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 854 drm_mm_insert_node_in_range(&ggtt->vm.mm, 855 &ggtt->error_capture, 856 ggtt->error_capture.size, 0, 857 ggtt->error_capture.color, 858 0, ggtt->mappable_end, 859 DRM_MM_INSERT_LOW); 860 } 861 if (drm_mm_node_allocated(&ggtt->error_capture)) { 862 u64 start = ggtt->error_capture.start; 863 u64 size = ggtt->error_capture.size; 864 865 ggtt->vm.scratch_range(&ggtt->vm, start, size); 866 drm_dbg(&ggtt->vm.i915->drm, 867 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 868 start, start + size); 869 } 870 871 /* 872 * The upper portion of the GuC address space has a sizeable hole 873 * (several MB) that is inaccessible by GuC. Reserve this range within 874 * GGTT as it can comfortably hold GuC/HuC firmware images. 875 */ 876 ret = ggtt_reserve_guc_top(ggtt); 877 if (ret) 878 goto err; 879 880 /* Clear any non-preallocated blocks */ 881 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 882 drm_dbg(&ggtt->vm.i915->drm, 883 "clearing unused GTT space: [%lx, %lx]\n", 884 hole_start, hole_end); 885 ggtt->vm.clear_range(&ggtt->vm, hole_start, 886 hole_end - hole_start); 887 } 888 889 /* And finally clear the reserved guard page */ 890 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 891 892 return 0; 893 894 err: 895 cleanup_init_ggtt(ggtt); 896 return ret; 897 } 898 899 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 900 struct i915_vm_pt_stash *stash, 901 struct i915_vma_resource *vma_res, 902 unsigned int pat_index, 903 u32 flags) 904 { 905 u32 pte_flags; 906 907 /* Currently applicable only to VLV */ 908 pte_flags = 0; 909 if (vma_res->bi.readonly) 910 pte_flags |= PTE_READ_ONLY; 911 912 if (flags & I915_VMA_LOCAL_BIND) 913 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 914 stash, vma_res, pat_index, flags); 915 916 if (flags & I915_VMA_GLOBAL_BIND) 917 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 918 919 vma_res->bound_flags |= flags; 920 } 921 922 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 923 struct i915_vma_resource *vma_res) 924 { 925 if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) 926 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 927 928 if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) 929 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res); 930 } 931 932 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 933 { 934 struct i915_vm_pt_stash stash = {}; 935 struct i915_ppgtt *ppgtt; 936 int err; 937 938 ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); 939 if (IS_ERR(ppgtt)) 940 return PTR_ERR(ppgtt); 941 942 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 943 err = -ENODEV; 944 goto err_ppgtt; 945 } 946 947 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 948 if (err) 949 goto err_ppgtt; 950 951 i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); 952 err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); 953 i915_gem_object_unlock(ppgtt->vm.scratch[0]); 954 if (err) 955 goto err_stash; 956 957 /* 958 * Note we only pre-allocate as far as the end of the global 959 * GTT. On 48b / 4-level page-tables, the difference is very, 960 * very significant! We have to preallocate as GVT/vgpu does 961 * not like the page directory disappearing. 962 */ 963 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 964 965 ggtt->alias = ppgtt; 966 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 967 968 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); 969 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 970 971 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); 972 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 973 974 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 975 return 0; 976 977 err_stash: 978 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 979 err_ppgtt: 980 i915_vm_put(&ppgtt->vm); 981 return err; 982 } 983 984 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 985 { 986 struct i915_ppgtt *ppgtt; 987 988 ppgtt = fetch_and_zero(&ggtt->alias); 989 if (!ppgtt) 990 return; 991 992 i915_vm_put(&ppgtt->vm); 993 994 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 995 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 996 } 997 998 int i915_init_ggtt(struct drm_i915_private *i915) 999 { 1000 int ret; 1001 1002 ret = init_ggtt(to_gt(i915)->ggtt); 1003 if (ret) 1004 return ret; 1005 1006 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 1007 ret = init_aliasing_ppgtt(to_gt(i915)->ggtt); 1008 if (ret) 1009 cleanup_init_ggtt(to_gt(i915)->ggtt); 1010 } 1011 1012 return 0; 1013 } 1014 1015 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 1016 { 1017 struct i915_vma *vma, *vn; 1018 1019 flush_workqueue(ggtt->vm.i915->wq); 1020 i915_gem_drain_freed_objects(ggtt->vm.i915); 1021 1022 mutex_lock(&ggtt->vm.mutex); 1023 1024 ggtt->vm.skip_pte_rewrite = true; 1025 1026 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 1027 struct drm_i915_gem_object *obj = vma->obj; 1028 bool trylock; 1029 1030 trylock = i915_gem_object_trylock(obj, NULL); 1031 WARN_ON(!trylock); 1032 1033 WARN_ON(__i915_vma_unbind(vma)); 1034 if (trylock) 1035 i915_gem_object_unlock(obj); 1036 } 1037 1038 if (drm_mm_node_allocated(&ggtt->error_capture)) 1039 drm_mm_remove_node(&ggtt->error_capture); 1040 mutex_destroy(&ggtt->error_mutex); 1041 1042 ggtt_release_guc_top(ggtt); 1043 intel_vgt_deballoon(ggtt); 1044 1045 ggtt->vm.cleanup(&ggtt->vm); 1046 1047 mutex_unlock(&ggtt->vm.mutex); 1048 i915_address_space_fini(&ggtt->vm); 1049 1050 arch_phys_wc_del(ggtt->mtrr); 1051 1052 if (ggtt->iomap.size) 1053 io_mapping_fini(&ggtt->iomap); 1054 } 1055 1056 /** 1057 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 1058 * @i915: i915 device 1059 */ 1060 void i915_ggtt_driver_release(struct drm_i915_private *i915) 1061 { 1062 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1063 1064 fini_aliasing_ppgtt(ggtt); 1065 1066 intel_ggtt_fini_fences(ggtt); 1067 ggtt_cleanup_hw(ggtt); 1068 } 1069 1070 /** 1071 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after 1072 * all free objects have been drained. 1073 * @i915: i915 device 1074 */ 1075 void i915_ggtt_driver_late_release(struct drm_i915_private *i915) 1076 { 1077 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1078 1079 GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); 1080 dma_resv_fini(&ggtt->vm._resv); 1081 } 1082 1083 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1084 { 1085 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1086 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1087 return snb_gmch_ctl << 20; 1088 } 1089 1090 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1091 { 1092 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1093 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1094 if (bdw_gmch_ctl) 1095 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1096 1097 #ifdef CONFIG_X86_32 1098 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 1099 if (bdw_gmch_ctl > 4) 1100 bdw_gmch_ctl = 4; 1101 #endif 1102 1103 return bdw_gmch_ctl << 20; 1104 } 1105 1106 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1107 { 1108 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1109 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1110 1111 if (gmch_ctrl) 1112 return 1 << (20 + gmch_ctrl); 1113 1114 return 0; 1115 } 1116 1117 static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) 1118 { 1119 /* 1120 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset 1121 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset 1122 */ 1123 GEM_BUG_ON(GRAPHICS_VER(i915) < 6); 1124 return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; 1125 } 1126 1127 static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) 1128 { 1129 return gen6_gttmmadr_size(i915) / 2; 1130 } 1131 1132 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 1133 { 1134 struct drm_i915_private *i915 = ggtt->vm.i915; 1135 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1136 phys_addr_t phys_addr; 1137 u32 pte_flags; 1138 int ret; 1139 1140 GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); 1141 phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); 1142 1143 /* 1144 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range 1145 * will be dropped. For WC mappings in general we have 64 byte burst 1146 * writes when the WC buffer is flushed, so we can't use it, but have to 1147 * resort to an uncached mapping. The WC issue is easily caught by the 1148 * readback check when writing GTT PTE entries. 1149 */ 1150 if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) 1151 ggtt->gsm = ioremap(phys_addr, size); 1152 else 1153 ggtt->gsm = ioremap_wc(phys_addr, size); 1154 if (!ggtt->gsm) { 1155 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 1156 return -ENOMEM; 1157 } 1158 1159 kref_init(&ggtt->vm.resv_ref); 1160 ret = setup_scratch_page(&ggtt->vm); 1161 if (ret) { 1162 drm_err(&i915->drm, "Scratch setup failed\n"); 1163 /* iounmap will also get called at remove, but meh */ 1164 iounmap(ggtt->gsm); 1165 return ret; 1166 } 1167 1168 pte_flags = 0; 1169 if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) 1170 pte_flags |= PTE_LM; 1171 1172 ggtt->vm.scratch[0]->encode = 1173 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 1174 i915_gem_get_pat_index(i915, 1175 I915_CACHE_NONE), 1176 pte_flags); 1177 1178 return 0; 1179 } 1180 1181 static void gen6_gmch_remove(struct i915_address_space *vm) 1182 { 1183 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 1184 1185 iounmap(ggtt->gsm); 1186 free_scratch(vm); 1187 } 1188 1189 static struct resource pci_resource(struct pci_dev *pdev, int bar) 1190 { 1191 return DEFINE_RES_MEM(pci_resource_start(pdev, bar), 1192 pci_resource_len(pdev, bar)); 1193 } 1194 1195 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 1196 { 1197 struct drm_i915_private *i915 = ggtt->vm.i915; 1198 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1199 unsigned int size; 1200 u16 snb_gmch_ctl; 1201 1202 if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { 1203 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1204 return -ENXIO; 1205 1206 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1207 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1208 } 1209 1210 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1211 if (IS_CHERRYVIEW(i915)) 1212 size = chv_get_total_gtt_size(snb_gmch_ctl); 1213 else 1214 size = gen8_get_total_gtt_size(snb_gmch_ctl); 1215 1216 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1217 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1218 ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; 1219 1220 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 1221 ggtt->vm.cleanup = gen6_gmch_remove; 1222 ggtt->vm.insert_page = gen8_ggtt_insert_page; 1223 ggtt->vm.clear_range = nop_clear_range; 1224 ggtt->vm.scratch_range = gen8_ggtt_clear_range; 1225 1226 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 1227 1228 /* 1229 * Serialize GTT updates with aperture access on BXT if VT-d is on, 1230 * and always on CHV. 1231 */ 1232 if (intel_vm_no_concurrent_access_wa(i915)) { 1233 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 1234 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 1235 1236 /* 1237 * Calling stop_machine() version of GGTT update function 1238 * at error capture/reset path will raise lockdep warning. 1239 * Allow calling gen8_ggtt_insert_* directly at reset path 1240 * which is safe from parallel GGTT updates. 1241 */ 1242 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1243 ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; 1244 1245 ggtt->vm.bind_async_flags = 1246 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 1247 } 1248 1249 if (i915_ggtt_require_binder(i915)) { 1250 ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; 1251 ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; 1252 ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; 1253 /* 1254 * On GPU is hung, we might bind VMAs for error capture. 1255 * Fallback to CPU GGTT updates in that case. 1256 */ 1257 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1258 } 1259 1260 if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc)) 1261 ggtt->invalidate = guc_ggtt_invalidate; 1262 else 1263 ggtt->invalidate = gen8_ggtt_invalidate; 1264 1265 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1266 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1267 1268 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 1269 ggtt->vm.pte_encode = mtl_ggtt_pte_encode; 1270 else 1271 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 1272 1273 return ggtt_probe_common(ggtt, size); 1274 } 1275 1276 /* 1277 * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, 1278 * so the switch-case statements in these PTE encode functions are still valid. 1279 * See translation table LEGACY_CACHELEVEL. 1280 */ 1281 static u64 snb_pte_encode(dma_addr_t addr, 1282 unsigned int pat_index, 1283 u32 flags) 1284 { 1285 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1286 1287 switch (pat_index) { 1288 case I915_CACHE_L3_LLC: 1289 case I915_CACHE_LLC: 1290 pte |= GEN6_PTE_CACHE_LLC; 1291 break; 1292 case I915_CACHE_NONE: 1293 pte |= GEN6_PTE_UNCACHED; 1294 break; 1295 default: 1296 MISSING_CASE(pat_index); 1297 } 1298 1299 return pte; 1300 } 1301 1302 static u64 ivb_pte_encode(dma_addr_t addr, 1303 unsigned int pat_index, 1304 u32 flags) 1305 { 1306 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1307 1308 switch (pat_index) { 1309 case I915_CACHE_L3_LLC: 1310 pte |= GEN7_PTE_CACHE_L3_LLC; 1311 break; 1312 case I915_CACHE_LLC: 1313 pte |= GEN6_PTE_CACHE_LLC; 1314 break; 1315 case I915_CACHE_NONE: 1316 pte |= GEN6_PTE_UNCACHED; 1317 break; 1318 default: 1319 MISSING_CASE(pat_index); 1320 } 1321 1322 return pte; 1323 } 1324 1325 static u64 byt_pte_encode(dma_addr_t addr, 1326 unsigned int pat_index, 1327 u32 flags) 1328 { 1329 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1330 1331 if (!(flags & PTE_READ_ONLY)) 1332 pte |= BYT_PTE_WRITEABLE; 1333 1334 if (pat_index != I915_CACHE_NONE) 1335 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1336 1337 return pte; 1338 } 1339 1340 static u64 hsw_pte_encode(dma_addr_t addr, 1341 unsigned int pat_index, 1342 u32 flags) 1343 { 1344 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1345 1346 if (pat_index != I915_CACHE_NONE) 1347 pte |= HSW_WB_LLC_AGE3; 1348 1349 return pte; 1350 } 1351 1352 static u64 iris_pte_encode(dma_addr_t addr, 1353 unsigned int pat_index, 1354 u32 flags) 1355 { 1356 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1357 1358 switch (pat_index) { 1359 case I915_CACHE_NONE: 1360 break; 1361 case I915_CACHE_WT: 1362 pte |= HSW_WT_ELLC_LLC_AGE3; 1363 break; 1364 default: 1365 pte |= HSW_WB_ELLC_LLC_AGE3; 1366 break; 1367 } 1368 1369 return pte; 1370 } 1371 1372 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1373 { 1374 struct drm_i915_private *i915 = ggtt->vm.i915; 1375 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1376 unsigned int size; 1377 u16 snb_gmch_ctl; 1378 1379 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1380 return -ENXIO; 1381 1382 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1383 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1384 1385 /* 1386 * 64/512MB is the current min/max we actually know of, but this is 1387 * just a coarse sanity check. 1388 */ 1389 if (ggtt->mappable_end < (64 << 20) || 1390 ggtt->mappable_end > (512 << 20)) { 1391 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1392 &ggtt->mappable_end); 1393 return -ENXIO; 1394 } 1395 1396 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1397 1398 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1399 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1400 1401 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1402 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1403 1404 ggtt->vm.clear_range = nop_clear_range; 1405 if (!HAS_FULL_PPGTT(i915)) 1406 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1407 ggtt->vm.scratch_range = gen6_ggtt_clear_range; 1408 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1409 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1410 ggtt->vm.cleanup = gen6_gmch_remove; 1411 1412 ggtt->invalidate = gen6_ggtt_invalidate; 1413 1414 if (HAS_EDRAM(i915)) 1415 ggtt->vm.pte_encode = iris_pte_encode; 1416 else if (IS_HASWELL(i915)) 1417 ggtt->vm.pte_encode = hsw_pte_encode; 1418 else if (IS_VALLEYVIEW(i915)) 1419 ggtt->vm.pte_encode = byt_pte_encode; 1420 else if (GRAPHICS_VER(i915) >= 7) 1421 ggtt->vm.pte_encode = ivb_pte_encode; 1422 else 1423 ggtt->vm.pte_encode = snb_pte_encode; 1424 1425 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1426 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1427 1428 return ggtt_probe_common(ggtt, size); 1429 } 1430 1431 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1432 { 1433 struct drm_i915_private *i915 = gt->i915; 1434 int ret; 1435 1436 ggtt->vm.gt = gt; 1437 ggtt->vm.i915 = i915; 1438 ggtt->vm.dma = i915->drm.dev; 1439 dma_resv_init(&ggtt->vm._resv); 1440 1441 if (GRAPHICS_VER(i915) >= 8) 1442 ret = gen8_gmch_probe(ggtt); 1443 else if (GRAPHICS_VER(i915) >= 6) 1444 ret = gen6_gmch_probe(ggtt); 1445 else 1446 ret = intel_ggtt_gmch_probe(ggtt); 1447 1448 if (ret) { 1449 dma_resv_fini(&ggtt->vm._resv); 1450 return ret; 1451 } 1452 1453 if ((ggtt->vm.total - 1) >> 32) { 1454 drm_err(&i915->drm, 1455 "We never expected a Global GTT with more than 32bits" 1456 " of address space! Found %lldM!\n", 1457 ggtt->vm.total >> 20); 1458 ggtt->vm.total = 1ULL << 32; 1459 ggtt->mappable_end = 1460 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1461 } 1462 1463 if (ggtt->mappable_end > ggtt->vm.total) { 1464 drm_err(&i915->drm, 1465 "mappable aperture extends past end of GGTT," 1466 " aperture=%pa, total=%llx\n", 1467 &ggtt->mappable_end, ggtt->vm.total); 1468 ggtt->mappable_end = ggtt->vm.total; 1469 } 1470 1471 /* GMADR is the PCI mmio aperture into the global GTT. */ 1472 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1473 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1474 (u64)ggtt->mappable_end >> 20); 1475 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1476 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1477 1478 return 0; 1479 } 1480 1481 /** 1482 * i915_ggtt_probe_hw - Probe GGTT hardware location 1483 * @i915: i915 device 1484 */ 1485 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1486 { 1487 struct intel_gt *gt; 1488 int ret, i; 1489 1490 for_each_gt(gt, i915, i) { 1491 ret = intel_gt_assign_ggtt(gt); 1492 if (ret) 1493 return ret; 1494 } 1495 1496 ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); 1497 if (ret) 1498 return ret; 1499 1500 if (i915_vtd_active(i915)) 1501 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1502 1503 return 0; 1504 } 1505 1506 struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) 1507 { 1508 struct i915_ggtt *ggtt; 1509 1510 ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); 1511 if (!ggtt) 1512 return ERR_PTR(-ENOMEM); 1513 1514 INIT_LIST_HEAD(&ggtt->gt_list); 1515 1516 return ggtt; 1517 } 1518 1519 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1520 { 1521 if (GRAPHICS_VER(i915) < 6) 1522 return intel_ggtt_gmch_enable_hw(i915); 1523 1524 return 0; 1525 } 1526 1527 /** 1528 * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM 1529 * @vm: The VM to restore the mappings for 1530 * 1531 * Restore the memory mappings for all objects mapped to HW via the GGTT or a 1532 * DPT page table. 1533 * 1534 * Returns %true if restoring the mapping for any object that was in a write 1535 * domain before suspend. 1536 */ 1537 bool i915_ggtt_resume_vm(struct i915_address_space *vm) 1538 { 1539 struct i915_vma *vma; 1540 bool write_domain_objs = false; 1541 1542 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 1543 1544 /* First fill our portion of the GTT with scratch pages */ 1545 vm->clear_range(vm, 0, vm->total); 1546 1547 /* clflush objects bound into the GGTT and rebind them. */ 1548 list_for_each_entry(vma, &vm->bound_list, vm_link) { 1549 struct drm_i915_gem_object *obj = vma->obj; 1550 unsigned int was_bound = 1551 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1552 1553 GEM_BUG_ON(!was_bound); 1554 1555 /* 1556 * Clear the bound flags of the vma resource to allow 1557 * ptes to be repopulated. 1558 */ 1559 vma->resource->bound_flags = 0; 1560 vma->ops->bind_vma(vm, NULL, vma->resource, 1561 obj ? obj->pat_index : 1562 i915_gem_get_pat_index(vm->i915, 1563 I915_CACHE_NONE), 1564 was_bound); 1565 1566 if (obj) { /* only used during resume => exclusive access */ 1567 write_domain_objs |= fetch_and_zero(&obj->write_domain); 1568 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1569 } 1570 } 1571 1572 return write_domain_objs; 1573 } 1574 1575 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1576 { 1577 struct intel_gt *gt; 1578 bool flush; 1579 1580 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1581 intel_gt_check_and_clear_faults(gt); 1582 1583 flush = i915_ggtt_resume_vm(&ggtt->vm); 1584 1585 if (drm_mm_node_allocated(&ggtt->error_capture)) 1586 ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, 1587 ggtt->error_capture.size); 1588 1589 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1590 intel_uc_resume_mappings(>->uc); 1591 1592 ggtt->invalidate(ggtt); 1593 1594 if (flush) 1595 wbinvd_on_all_cpus(); 1596 1597 intel_ggtt_restore_fences(ggtt); 1598 } 1599