1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <asm/set_memory.h> 7 #include <asm/smp.h> 8 #include <linux/types.h> 9 #include <linux/stop_machine.h> 10 11 #include <drm/drm_managed.h> 12 #include <drm/i915_drm.h> 13 #include <drm/intel-gtt.h> 14 15 #include "display/intel_display.h" 16 #include "gem/i915_gem_lmem.h" 17 18 #include "intel_context.h" 19 #include "intel_ggtt_gmch.h" 20 #include "intel_gpu_commands.h" 21 #include "intel_gt.h" 22 #include "intel_gt_regs.h" 23 #include "intel_pci_config.h" 24 #include "intel_ring.h" 25 #include "i915_drv.h" 26 #include "i915_pci.h" 27 #include "i915_reg.h" 28 #include "i915_request.h" 29 #include "i915_scatterlist.h" 30 #include "i915_utils.h" 31 #include "i915_vgpu.h" 32 33 #include "intel_gtt.h" 34 #include "gen8_ppgtt.h" 35 #include "intel_engine_pm.h" 36 37 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 38 unsigned long color, 39 u64 *start, 40 u64 *end) 41 { 42 if (i915_node_color_differs(node, color)) 43 *start += I915_GTT_PAGE_SIZE; 44 45 /* 46 * Also leave a space between the unallocated reserved node after the 47 * GTT and any objects within the GTT, i.e. we use the color adjustment 48 * to insert a guard page to prevent prefetches crossing over the 49 * GTT boundary. 50 */ 51 node = list_next_entry(node, node_list); 52 if (node->color != color) 53 *end -= I915_GTT_PAGE_SIZE; 54 } 55 56 static int ggtt_init_hw(struct i915_ggtt *ggtt) 57 { 58 struct drm_i915_private *i915 = ggtt->vm.i915; 59 60 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 61 62 ggtt->vm.is_ggtt = true; 63 64 /* Only VLV supports read-only GGTT mappings */ 65 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 66 67 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 68 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 69 70 if (ggtt->mappable_end) { 71 if (!io_mapping_init_wc(&ggtt->iomap, 72 ggtt->gmadr.start, 73 ggtt->mappable_end)) { 74 ggtt->vm.cleanup(&ggtt->vm); 75 return -EIO; 76 } 77 78 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 79 ggtt->mappable_end); 80 } 81 82 intel_ggtt_init_fences(ggtt); 83 84 return 0; 85 } 86 87 /** 88 * i915_ggtt_init_hw - Initialize GGTT hardware 89 * @i915: i915 device 90 */ 91 int i915_ggtt_init_hw(struct drm_i915_private *i915) 92 { 93 int ret; 94 95 /* 96 * Note that we use page colouring to enforce a guard page at the 97 * end of the address space. This is required as the CS may prefetch 98 * beyond the end of the batch buffer, across the page boundary, 99 * and beyond the end of the GTT if we do not provide a guard. 100 */ 101 ret = ggtt_init_hw(to_gt(i915)->ggtt); 102 if (ret) 103 return ret; 104 105 return 0; 106 } 107 108 /** 109 * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM 110 * @vm: The VM to suspend the mappings for 111 * 112 * Suspend the memory mappings for all objects mapped to HW via the GGTT or a 113 * DPT page table. 114 */ 115 void i915_ggtt_suspend_vm(struct i915_address_space *vm) 116 { 117 struct i915_vma *vma, *vn; 118 int save_skip_rewrite; 119 120 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 121 122 retry: 123 i915_gem_drain_freed_objects(vm->i915); 124 125 mutex_lock(&vm->mutex); 126 127 /* 128 * Skip rewriting PTE on VMA unbind. 129 * FIXME: Use an argument to i915_vma_unbind() instead? 130 */ 131 save_skip_rewrite = vm->skip_pte_rewrite; 132 vm->skip_pte_rewrite = true; 133 134 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 135 struct drm_i915_gem_object *obj = vma->obj; 136 137 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 138 139 if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 140 continue; 141 142 /* unlikely to race when GPU is idle, so no worry about slowpath.. */ 143 if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { 144 /* 145 * No dead objects should appear here, GPU should be 146 * completely idle, and userspace suspended 147 */ 148 i915_gem_object_get(obj); 149 150 mutex_unlock(&vm->mutex); 151 152 i915_gem_object_lock(obj, NULL); 153 GEM_WARN_ON(i915_vma_unbind(vma)); 154 i915_gem_object_unlock(obj); 155 i915_gem_object_put(obj); 156 157 vm->skip_pte_rewrite = save_skip_rewrite; 158 goto retry; 159 } 160 161 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 162 i915_vma_wait_for_bind(vma); 163 164 __i915_vma_evict(vma, false); 165 drm_mm_remove_node(&vma->node); 166 } 167 168 i915_gem_object_unlock(obj); 169 } 170 171 vm->clear_range(vm, 0, vm->total); 172 173 vm->skip_pte_rewrite = save_skip_rewrite; 174 175 mutex_unlock(&vm->mutex); 176 } 177 178 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 179 { 180 struct intel_gt *gt; 181 182 i915_ggtt_suspend_vm(&ggtt->vm); 183 ggtt->invalidate(ggtt); 184 185 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 186 intel_gt_check_and_clear_faults(gt); 187 } 188 189 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 190 { 191 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 192 193 spin_lock_irq(&uncore->lock); 194 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 195 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 196 spin_unlock_irq(&uncore->lock); 197 } 198 199 static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) 200 { 201 /* 202 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range 203 * will be dropped. For WC mappings in general we have 64 byte burst 204 * writes when the WC buffer is flushed, so we can't use it, but have to 205 * resort to an uncached mapping. The WC issue is easily caught by the 206 * readback check when writing GTT PTE entries. 207 */ 208 if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) 209 return true; 210 211 return false; 212 } 213 214 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 215 { 216 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 217 218 /* 219 * Note that as an uncached mmio write, this will flush the 220 * WCB of the writes into the GGTT before it triggers the invalidate. 221 * 222 * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). 223 */ 224 if (needs_wc_ggtt_mapping(ggtt->vm.i915)) 225 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, 226 GFX_FLSH_CNTL_EN); 227 } 228 229 static void guc_ggtt_ct_invalidate(struct intel_gt *gt) 230 { 231 struct intel_uncore *uncore = gt->uncore; 232 intel_wakeref_t wakeref; 233 234 with_intel_runtime_pm_if_active(uncore->rpm, wakeref) { 235 struct intel_guc *guc = >->uc.guc; 236 237 intel_guc_invalidate_tlb_guc(guc); 238 } 239 } 240 241 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 242 { 243 struct drm_i915_private *i915 = ggtt->vm.i915; 244 struct intel_gt *gt; 245 246 gen8_ggtt_invalidate(ggtt); 247 248 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { 249 if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) 250 guc_ggtt_ct_invalidate(gt); 251 else if (GRAPHICS_VER(i915) >= 12) 252 intel_uncore_write_fw(gt->uncore, 253 GEN12_GUC_TLB_INV_CR, 254 GEN12_GUC_TLB_INV_CR_INVALIDATE); 255 else 256 intel_uncore_write_fw(gt->uncore, 257 GEN8_GTCR, GEN8_GTCR_INVALIDATE); 258 } 259 } 260 261 static u64 mtl_ggtt_pte_encode(dma_addr_t addr, 262 unsigned int pat_index, 263 u32 flags) 264 { 265 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 266 267 WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); 268 269 if (flags & PTE_LM) 270 pte |= GEN12_GGTT_PTE_LM; 271 272 if (pat_index & BIT(0)) 273 pte |= MTL_GGTT_PTE_PAT0; 274 275 if (pat_index & BIT(1)) 276 pte |= MTL_GGTT_PTE_PAT1; 277 278 return pte; 279 } 280 281 u64 gen8_ggtt_pte_encode(dma_addr_t addr, 282 unsigned int pat_index, 283 u32 flags) 284 { 285 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; 286 287 if (flags & PTE_LM) 288 pte |= GEN12_GGTT_PTE_LM; 289 290 return pte; 291 } 292 293 static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) 294 { 295 struct intel_gt *gt = ggtt->vm.gt; 296 297 return intel_gt_is_bind_context_ready(gt); 298 } 299 300 static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref) 301 { 302 struct intel_context *ce; 303 struct intel_gt *gt = ggtt->vm.gt; 304 305 if (intel_gt_is_wedged(gt)) 306 return NULL; 307 308 ce = gt->engine[BCS0]->bind_context; 309 GEM_BUG_ON(!ce); 310 311 /* 312 * If the GT is not awake already at this stage then fallback 313 * to pci based GGTT update otherwise __intel_wakeref_get_first() 314 * would conflict with fs_reclaim trying to allocate memory while 315 * doing rpm_resume(). 316 */ 317 *wakeref = intel_gt_pm_get_if_awake(gt); 318 if (!*wakeref) 319 return NULL; 320 321 intel_engine_pm_get(ce->engine); 322 323 return ce; 324 } 325 326 static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref) 327 { 328 intel_engine_pm_put(ce->engine); 329 intel_gt_pm_put(ce->engine->gt, wakeref); 330 } 331 332 static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, 333 struct sg_table *pages, u32 num_entries, 334 const gen8_pte_t pte) 335 { 336 struct i915_sched_attr attr = {}; 337 struct intel_gt *gt = ggtt->vm.gt; 338 const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; 339 struct sgt_iter iter; 340 struct i915_request *rq; 341 struct intel_context *ce; 342 intel_wakeref_t wakeref; 343 u32 *cs; 344 345 if (!num_entries) 346 return true; 347 348 ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref); 349 if (!ce) 350 return false; 351 352 if (pages) 353 iter = __sgt_iter(pages->sgl, true); 354 355 while (num_entries) { 356 int count = 0; 357 dma_addr_t addr; 358 /* 359 * MI_UPDATE_GTT can update 512 entries in a single command but 360 * that end up with engine reset, 511 works. 361 */ 362 u32 n_ptes = min_t(u32, 511, num_entries); 363 364 if (mutex_lock_interruptible(&ce->timeline->mutex)) 365 goto put_ce; 366 367 intel_context_enter(ce); 368 rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); 369 intel_context_exit(ce); 370 if (IS_ERR(rq)) { 371 GT_TRACE(gt, "Failed to get bind request\n"); 372 mutex_unlock(&ce->timeline->mutex); 373 goto put_ce; 374 } 375 376 cs = intel_ring_begin(rq, 2 * n_ptes + 2); 377 if (IS_ERR(cs)) { 378 GT_TRACE(gt, "Failed to ring space for GGTT bind\n"); 379 i915_request_set_error_once(rq, PTR_ERR(cs)); 380 /* once a request is created, it must be queued */ 381 goto queue_err_rq; 382 } 383 384 *cs++ = MI_UPDATE_GTT | (2 * n_ptes); 385 *cs++ = offset << 12; 386 387 if (pages) { 388 for_each_sgt_daddr_next(addr, iter) { 389 if (count == n_ptes) 390 break; 391 *cs++ = lower_32_bits(pte | addr); 392 *cs++ = upper_32_bits(pte | addr); 393 count++; 394 } 395 /* fill remaining with scratch pte, if any */ 396 if (count < n_ptes) { 397 memset64((u64 *)cs, scratch_pte, 398 n_ptes - count); 399 cs += (n_ptes - count) * 2; 400 } 401 } else { 402 memset64((u64 *)cs, pte, n_ptes); 403 cs += n_ptes * 2; 404 } 405 406 intel_ring_advance(rq, cs); 407 queue_err_rq: 408 i915_request_get(rq); 409 __i915_request_commit(rq); 410 __i915_request_queue(rq, &attr); 411 412 mutex_unlock(&ce->timeline->mutex); 413 /* This will break if the request is complete or after engine reset */ 414 i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 415 if (rq->fence.error) 416 goto err_rq; 417 418 i915_request_put(rq); 419 420 num_entries -= n_ptes; 421 offset += n_ptes; 422 } 423 424 gen8_ggtt_bind_put_ce(ce, wakeref); 425 return true; 426 427 err_rq: 428 i915_request_put(rq); 429 put_ce: 430 gen8_ggtt_bind_put_ce(ce, wakeref); 431 return false; 432 } 433 434 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 435 { 436 writeq(pte, addr); 437 } 438 439 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 440 dma_addr_t addr, 441 u64 offset, 442 unsigned int pat_index, 443 u32 flags) 444 { 445 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 446 gen8_pte_t __iomem *pte = 447 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 448 449 gen8_set_pte(pte, ggtt->vm.pte_encode(addr, pat_index, flags)); 450 451 ggtt->invalidate(ggtt); 452 } 453 454 static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, 455 dma_addr_t addr, u64 offset, 456 unsigned int pat_index, u32 flags) 457 { 458 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 459 gen8_pte_t pte; 460 461 pte = ggtt->vm.pte_encode(addr, pat_index, flags); 462 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 463 gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte)) 464 return ggtt->invalidate(ggtt); 465 466 gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); 467 } 468 469 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 470 struct i915_vma_resource *vma_res, 471 unsigned int pat_index, 472 u32 flags) 473 { 474 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 475 const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 476 gen8_pte_t __iomem *gte; 477 gen8_pte_t __iomem *end; 478 struct sgt_iter iter; 479 dma_addr_t addr; 480 481 /* 482 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 483 * not to allow the user to override access to a read only page. 484 */ 485 486 gte = (gen8_pte_t __iomem *)ggtt->gsm; 487 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 488 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 489 while (gte < end) 490 gen8_set_pte(gte++, vm->scratch[0]->encode); 491 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 492 493 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 494 gen8_set_pte(gte++, pte_encode | addr); 495 GEM_BUG_ON(gte > end); 496 497 /* Fill the allocated but "unused" space beyond the end of the buffer */ 498 while (gte < end) 499 gen8_set_pte(gte++, vm->scratch[0]->encode); 500 501 /* 502 * We want to flush the TLBs only after we're certain all the PTE 503 * updates have finished. 504 */ 505 ggtt->invalidate(ggtt); 506 } 507 508 static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 509 struct i915_vma_resource *vma_res, 510 unsigned int pat_index, u32 flags) 511 { 512 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 513 gen8_pte_t scratch_pte = vm->scratch[0]->encode; 514 gen8_pte_t pte_encode; 515 u64 start, end; 516 517 pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); 518 start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 519 end = start + vma_res->guard / I915_GTT_PAGE_SIZE; 520 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 521 goto err; 522 523 start = end; 524 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 525 if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages, 526 vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode)) 527 goto err; 528 529 start += vma_res->node_size / I915_GTT_PAGE_SIZE; 530 if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) 531 goto err; 532 533 return true; 534 535 err: 536 return false; 537 } 538 539 static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, 540 struct i915_vma_resource *vma_res, 541 unsigned int pat_index, u32 flags) 542 { 543 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 544 545 if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && 546 __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) 547 return ggtt->invalidate(ggtt); 548 549 gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); 550 } 551 552 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 553 u64 start, u64 length) 554 { 555 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 556 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 557 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 558 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 559 gen8_pte_t __iomem *gtt_base = 560 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 561 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 562 int i; 563 564 if (WARN(num_entries > max_entries, 565 "First entry = %d; Num entries = %d (max=%d)\n", 566 first_entry, num_entries, max_entries)) 567 num_entries = max_entries; 568 569 for (i = 0; i < num_entries; i++) 570 gen8_set_pte(>t_base[i], scratch_pte); 571 } 572 573 static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, 574 u64 start, u64 length) 575 { 576 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 577 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 578 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 579 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 580 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 581 582 if (WARN(num_entries > max_entries, 583 "First entry = %d; Num entries = %d (max=%d)\n", 584 first_entry, num_entries, max_entries)) 585 num_entries = max_entries; 586 587 if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry, 588 NULL, num_entries, scratch_pte)) 589 return ggtt->invalidate(ggtt); 590 591 gen8_ggtt_clear_range(vm, start, length); 592 } 593 594 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 595 dma_addr_t addr, 596 u64 offset, 597 unsigned int pat_index, 598 u32 flags) 599 { 600 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 601 gen6_pte_t __iomem *pte = 602 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 603 604 iowrite32(vm->pte_encode(addr, pat_index, flags), pte); 605 606 ggtt->invalidate(ggtt); 607 } 608 609 /* 610 * Binds an object into the global gtt with the specified cache level. 611 * The object will be accessible to the GPU via commands whose operands 612 * reference offsets within the global GTT as well as accessible by the GPU 613 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 614 */ 615 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 616 struct i915_vma_resource *vma_res, 617 unsigned int pat_index, 618 u32 flags) 619 { 620 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 621 gen6_pte_t __iomem *gte; 622 gen6_pte_t __iomem *end; 623 struct sgt_iter iter; 624 dma_addr_t addr; 625 626 gte = (gen6_pte_t __iomem *)ggtt->gsm; 627 gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; 628 629 end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; 630 while (gte < end) 631 iowrite32(vm->scratch[0]->encode, gte++); 632 end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; 633 for_each_sgt_daddr(addr, iter, vma_res->bi.pages) 634 iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); 635 GEM_BUG_ON(gte > end); 636 637 /* Fill the allocated but "unused" space beyond the end of the buffer */ 638 while (gte < end) 639 iowrite32(vm->scratch[0]->encode, gte++); 640 641 /* 642 * We want to flush the TLBs only after we're certain all the PTE 643 * updates have finished. 644 */ 645 ggtt->invalidate(ggtt); 646 } 647 648 static void nop_clear_range(struct i915_address_space *vm, 649 u64 start, u64 length) 650 { 651 } 652 653 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 654 { 655 /* 656 * Make sure the internal GAM fifo has been cleared of all GTT 657 * writes before exiting stop_machine(). This guarantees that 658 * any aperture accesses waiting to start in another process 659 * cannot back up behind the GTT writes causing a hang. 660 * The register can be any arbitrary GAM register. 661 */ 662 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 663 } 664 665 struct insert_page { 666 struct i915_address_space *vm; 667 dma_addr_t addr; 668 u64 offset; 669 unsigned int pat_index; 670 }; 671 672 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 673 { 674 struct insert_page *arg = _arg; 675 676 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, 677 arg->pat_index, 0); 678 bxt_vtd_ggtt_wa(arg->vm); 679 680 return 0; 681 } 682 683 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 684 dma_addr_t addr, 685 u64 offset, 686 unsigned int pat_index, 687 u32 unused) 688 { 689 struct insert_page arg = { vm, addr, offset, pat_index }; 690 691 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 692 } 693 694 struct insert_entries { 695 struct i915_address_space *vm; 696 struct i915_vma_resource *vma_res; 697 unsigned int pat_index; 698 u32 flags; 699 }; 700 701 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 702 { 703 struct insert_entries *arg = _arg; 704 705 gen8_ggtt_insert_entries(arg->vm, arg->vma_res, 706 arg->pat_index, arg->flags); 707 bxt_vtd_ggtt_wa(arg->vm); 708 709 return 0; 710 } 711 712 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 713 struct i915_vma_resource *vma_res, 714 unsigned int pat_index, 715 u32 flags) 716 { 717 struct insert_entries arg = { vm, vma_res, pat_index, flags }; 718 719 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 720 } 721 722 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 723 u64 start, u64 length) 724 { 725 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 726 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 727 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 728 gen6_pte_t scratch_pte, __iomem *gtt_base = 729 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 730 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 731 int i; 732 733 if (WARN(num_entries > max_entries, 734 "First entry = %d; Num entries = %d (max=%d)\n", 735 first_entry, num_entries, max_entries)) 736 num_entries = max_entries; 737 738 scratch_pte = vm->scratch[0]->encode; 739 for (i = 0; i < num_entries; i++) 740 iowrite32(scratch_pte, >t_base[i]); 741 } 742 743 void intel_ggtt_bind_vma(struct i915_address_space *vm, 744 struct i915_vm_pt_stash *stash, 745 struct i915_vma_resource *vma_res, 746 unsigned int pat_index, 747 u32 flags) 748 { 749 u32 pte_flags; 750 751 if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) 752 return; 753 754 vma_res->bound_flags |= flags; 755 756 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 757 pte_flags = 0; 758 if (vma_res->bi.readonly) 759 pte_flags |= PTE_READ_ONLY; 760 if (vma_res->bi.lmem) 761 pte_flags |= PTE_LM; 762 763 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 764 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 765 } 766 767 void intel_ggtt_unbind_vma(struct i915_address_space *vm, 768 struct i915_vma_resource *vma_res) 769 { 770 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 771 } 772 773 /* 774 * Reserve the top of the GuC address space for firmware images. Addresses 775 * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, 776 * which makes for a suitable range to hold GuC/HuC firmware images if the 777 * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT 778 * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk 779 * of the same size anyway, which is far more than needed, to keep the logic 780 * in uc_fw_ggtt_offset() simple. 781 */ 782 #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) 783 784 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 785 { 786 u64 offset; 787 int ret; 788 789 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 790 return 0; 791 792 GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); 793 offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; 794 795 ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, 796 GUC_TOP_RESERVE_SIZE, offset, 797 I915_COLOR_UNEVICTABLE, PIN_NOEVICT); 798 if (ret) 799 drm_dbg(&ggtt->vm.i915->drm, 800 "Failed to reserve top of GGTT for GuC\n"); 801 802 return ret; 803 } 804 805 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 806 { 807 if (drm_mm_node_allocated(&ggtt->uc_fw)) 808 drm_mm_remove_node(&ggtt->uc_fw); 809 } 810 811 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 812 { 813 ggtt_release_guc_top(ggtt); 814 if (drm_mm_node_allocated(&ggtt->error_capture)) 815 drm_mm_remove_node(&ggtt->error_capture); 816 mutex_destroy(&ggtt->error_mutex); 817 } 818 819 static int init_ggtt(struct i915_ggtt *ggtt) 820 { 821 /* 822 * Let GEM Manage all of the aperture. 823 * 824 * However, leave one page at the end still bound to the scratch page. 825 * There are a number of places where the hardware apparently prefetches 826 * past the end of the object, and we've seen multiple hangs with the 827 * GPU head pointer stuck in a batchbuffer bound at the last page of the 828 * aperture. One page should be enough to keep any prefetching inside 829 * of the aperture. 830 */ 831 unsigned long hole_start, hole_end; 832 struct drm_mm_node *entry; 833 int ret; 834 835 /* 836 * GuC requires all resources that we're sharing with it to be placed in 837 * non-WOPCM memory. If GuC is not present or not in use we still need a 838 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 839 * why. 840 */ 841 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 842 intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); 843 844 ret = intel_vgt_balloon(ggtt); 845 if (ret) 846 return ret; 847 848 mutex_init(&ggtt->error_mutex); 849 if (ggtt->mappable_end) { 850 /* 851 * Reserve a mappable slot for our lockless error capture. 852 * 853 * We strongly prefer taking address 0x0 in order to protect 854 * other critical buffers against accidental overwrites, 855 * as writing to address 0 is a very common mistake. 856 * 857 * Since 0 may already be in use by the system (e.g. the BIOS 858 * framebuffer), we let the reservation fail quietly and hope 859 * 0 remains reserved always. 860 * 861 * If we fail to reserve 0, and then fail to find any space 862 * for an error-capture, remain silent. We can afford not 863 * to reserve an error_capture node as we have fallback 864 * paths, and we trust that 0 will remain reserved. However, 865 * the only likely reason for failure to insert is a driver 866 * bug, which we expect to cause other failures... 867 * 868 * Since CPU can perform speculative reads on error capture 869 * (write-combining allows it) add scratch page after error 870 * capture to avoid DMAR errors. 871 */ 872 ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; 873 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 874 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 875 drm_mm_insert_node_in_range(&ggtt->vm.mm, 876 &ggtt->error_capture, 877 ggtt->error_capture.size, 0, 878 ggtt->error_capture.color, 879 0, ggtt->mappable_end, 880 DRM_MM_INSERT_LOW); 881 } 882 if (drm_mm_node_allocated(&ggtt->error_capture)) { 883 u64 start = ggtt->error_capture.start; 884 u64 size = ggtt->error_capture.size; 885 886 ggtt->vm.scratch_range(&ggtt->vm, start, size); 887 drm_dbg(&ggtt->vm.i915->drm, 888 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 889 start, start + size); 890 } 891 892 /* 893 * The upper portion of the GuC address space has a sizeable hole 894 * (several MB) that is inaccessible by GuC. Reserve this range within 895 * GGTT as it can comfortably hold GuC/HuC firmware images. 896 */ 897 ret = ggtt_reserve_guc_top(ggtt); 898 if (ret) 899 goto err; 900 901 /* Clear any non-preallocated blocks */ 902 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 903 drm_dbg(&ggtt->vm.i915->drm, 904 "clearing unused GTT space: [%lx, %lx]\n", 905 hole_start, hole_end); 906 ggtt->vm.clear_range(&ggtt->vm, hole_start, 907 hole_end - hole_start); 908 } 909 910 /* And finally clear the reserved guard page */ 911 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 912 913 return 0; 914 915 err: 916 cleanup_init_ggtt(ggtt); 917 return ret; 918 } 919 920 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 921 struct i915_vm_pt_stash *stash, 922 struct i915_vma_resource *vma_res, 923 unsigned int pat_index, 924 u32 flags) 925 { 926 u32 pte_flags; 927 928 /* Currently applicable only to VLV */ 929 pte_flags = 0; 930 if (vma_res->bi.readonly) 931 pte_flags |= PTE_READ_ONLY; 932 933 if (flags & I915_VMA_LOCAL_BIND) 934 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 935 stash, vma_res, pat_index, flags); 936 937 if (flags & I915_VMA_GLOBAL_BIND) 938 vm->insert_entries(vm, vma_res, pat_index, pte_flags); 939 940 vma_res->bound_flags |= flags; 941 } 942 943 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 944 struct i915_vma_resource *vma_res) 945 { 946 if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) 947 vm->clear_range(vm, vma_res->start, vma_res->vma_size); 948 949 if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) 950 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res); 951 } 952 953 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 954 { 955 struct i915_vm_pt_stash stash = {}; 956 struct i915_ppgtt *ppgtt; 957 int err; 958 959 ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); 960 if (IS_ERR(ppgtt)) 961 return PTR_ERR(ppgtt); 962 963 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 964 err = -ENODEV; 965 goto err_ppgtt; 966 } 967 968 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 969 if (err) 970 goto err_ppgtt; 971 972 i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); 973 err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); 974 i915_gem_object_unlock(ppgtt->vm.scratch[0]); 975 if (err) 976 goto err_stash; 977 978 /* 979 * Note we only pre-allocate as far as the end of the global 980 * GTT. On 48b / 4-level page-tables, the difference is very, 981 * very significant! We have to preallocate as GVT/vgpu does 982 * not like the page directory disappearing. 983 */ 984 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 985 986 ggtt->alias = ppgtt; 987 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 988 989 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); 990 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 991 992 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); 993 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 994 995 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 996 return 0; 997 998 err_stash: 999 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 1000 err_ppgtt: 1001 i915_vm_put(&ppgtt->vm); 1002 return err; 1003 } 1004 1005 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 1006 { 1007 struct i915_ppgtt *ppgtt; 1008 1009 ppgtt = fetch_and_zero(&ggtt->alias); 1010 if (!ppgtt) 1011 return; 1012 1013 i915_vm_put(&ppgtt->vm); 1014 1015 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1016 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1017 } 1018 1019 int i915_init_ggtt(struct drm_i915_private *i915) 1020 { 1021 int ret; 1022 1023 ret = init_ggtt(to_gt(i915)->ggtt); 1024 if (ret) 1025 return ret; 1026 1027 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 1028 ret = init_aliasing_ppgtt(to_gt(i915)->ggtt); 1029 if (ret) 1030 cleanup_init_ggtt(to_gt(i915)->ggtt); 1031 } 1032 1033 return 0; 1034 } 1035 1036 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 1037 { 1038 struct i915_vma *vma, *vn; 1039 1040 flush_workqueue(ggtt->vm.i915->wq); 1041 i915_gem_drain_freed_objects(ggtt->vm.i915); 1042 1043 mutex_lock(&ggtt->vm.mutex); 1044 1045 ggtt->vm.skip_pte_rewrite = true; 1046 1047 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 1048 struct drm_i915_gem_object *obj = vma->obj; 1049 bool trylock; 1050 1051 trylock = i915_gem_object_trylock(obj, NULL); 1052 WARN_ON(!trylock); 1053 1054 WARN_ON(__i915_vma_unbind(vma)); 1055 if (trylock) 1056 i915_gem_object_unlock(obj); 1057 } 1058 1059 if (drm_mm_node_allocated(&ggtt->error_capture)) 1060 drm_mm_remove_node(&ggtt->error_capture); 1061 mutex_destroy(&ggtt->error_mutex); 1062 1063 ggtt_release_guc_top(ggtt); 1064 intel_vgt_deballoon(ggtt); 1065 1066 ggtt->vm.cleanup(&ggtt->vm); 1067 1068 mutex_unlock(&ggtt->vm.mutex); 1069 i915_address_space_fini(&ggtt->vm); 1070 1071 arch_phys_wc_del(ggtt->mtrr); 1072 1073 if (ggtt->iomap.size) 1074 io_mapping_fini(&ggtt->iomap); 1075 } 1076 1077 /** 1078 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 1079 * @i915: i915 device 1080 */ 1081 void i915_ggtt_driver_release(struct drm_i915_private *i915) 1082 { 1083 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1084 1085 fini_aliasing_ppgtt(ggtt); 1086 1087 intel_ggtt_fini_fences(ggtt); 1088 ggtt_cleanup_hw(ggtt); 1089 } 1090 1091 /** 1092 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after 1093 * all free objects have been drained. 1094 * @i915: i915 device 1095 */ 1096 void i915_ggtt_driver_late_release(struct drm_i915_private *i915) 1097 { 1098 struct i915_ggtt *ggtt = to_gt(i915)->ggtt; 1099 1100 GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); 1101 dma_resv_fini(&ggtt->vm._resv); 1102 } 1103 1104 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1105 { 1106 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1107 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1108 return snb_gmch_ctl << 20; 1109 } 1110 1111 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1112 { 1113 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1114 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1115 if (bdw_gmch_ctl) 1116 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1117 1118 #ifdef CONFIG_X86_32 1119 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 1120 if (bdw_gmch_ctl > 4) 1121 bdw_gmch_ctl = 4; 1122 #endif 1123 1124 return bdw_gmch_ctl << 20; 1125 } 1126 1127 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1128 { 1129 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1130 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1131 1132 if (gmch_ctrl) 1133 return 1 << (20 + gmch_ctrl); 1134 1135 return 0; 1136 } 1137 1138 static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) 1139 { 1140 /* 1141 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset 1142 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset 1143 */ 1144 GEM_BUG_ON(GRAPHICS_VER(i915) < 6); 1145 return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; 1146 } 1147 1148 static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) 1149 { 1150 return gen6_gttmmadr_size(i915) / 2; 1151 } 1152 1153 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 1154 { 1155 struct drm_i915_private *i915 = ggtt->vm.i915; 1156 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 1157 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1158 phys_addr_t phys_addr; 1159 u32 pte_flags; 1160 int ret; 1161 1162 GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); 1163 1164 if (i915_direct_stolen_access(i915)) { 1165 drm_dbg(&i915->drm, "Using direct GSM access\n"); 1166 phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK; 1167 } else { 1168 phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); 1169 } 1170 1171 if (needs_wc_ggtt_mapping(i915)) 1172 ggtt->gsm = ioremap_wc(phys_addr, size); 1173 else 1174 ggtt->gsm = ioremap(phys_addr, size); 1175 1176 if (!ggtt->gsm) { 1177 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 1178 return -ENOMEM; 1179 } 1180 1181 kref_init(&ggtt->vm.resv_ref); 1182 ret = setup_scratch_page(&ggtt->vm); 1183 if (ret) { 1184 drm_err(&i915->drm, "Scratch setup failed\n"); 1185 /* iounmap will also get called at remove, but meh */ 1186 iounmap(ggtt->gsm); 1187 return ret; 1188 } 1189 1190 pte_flags = 0; 1191 if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) 1192 pte_flags |= PTE_LM; 1193 1194 ggtt->vm.scratch[0]->encode = 1195 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 1196 i915_gem_get_pat_index(i915, 1197 I915_CACHE_NONE), 1198 pte_flags); 1199 1200 return 0; 1201 } 1202 1203 static void gen6_gmch_remove(struct i915_address_space *vm) 1204 { 1205 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 1206 1207 iounmap(ggtt->gsm); 1208 free_scratch(vm); 1209 } 1210 1211 static struct resource pci_resource(struct pci_dev *pdev, int bar) 1212 { 1213 return DEFINE_RES_MEM(pci_resource_start(pdev, bar), 1214 pci_resource_len(pdev, bar)); 1215 } 1216 1217 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 1218 { 1219 struct drm_i915_private *i915 = ggtt->vm.i915; 1220 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1221 unsigned int size; 1222 u16 snb_gmch_ctl; 1223 1224 if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { 1225 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1226 return -ENXIO; 1227 1228 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1229 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1230 } 1231 1232 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1233 if (IS_CHERRYVIEW(i915)) 1234 size = chv_get_total_gtt_size(snb_gmch_ctl); 1235 else 1236 size = gen8_get_total_gtt_size(snb_gmch_ctl); 1237 1238 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1239 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1240 ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; 1241 1242 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 1243 ggtt->vm.cleanup = gen6_gmch_remove; 1244 ggtt->vm.insert_page = gen8_ggtt_insert_page; 1245 ggtt->vm.clear_range = nop_clear_range; 1246 ggtt->vm.scratch_range = gen8_ggtt_clear_range; 1247 1248 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 1249 1250 /* 1251 * Serialize GTT updates with aperture access on BXT if VT-d is on, 1252 * and always on CHV. 1253 */ 1254 if (intel_vm_no_concurrent_access_wa(i915)) { 1255 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 1256 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 1257 1258 /* 1259 * Calling stop_machine() version of GGTT update function 1260 * at error capture/reset path will raise lockdep warning. 1261 * Allow calling gen8_ggtt_insert_* directly at reset path 1262 * which is safe from parallel GGTT updates. 1263 */ 1264 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1265 ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; 1266 1267 ggtt->vm.bind_async_flags = 1268 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 1269 } 1270 1271 if (i915_ggtt_require_binder(i915)) { 1272 ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; 1273 ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; 1274 ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; 1275 /* 1276 * On GPU is hung, we might bind VMAs for error capture. 1277 * Fallback to CPU GGTT updates in that case. 1278 */ 1279 ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; 1280 } 1281 1282 if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc)) 1283 ggtt->invalidate = guc_ggtt_invalidate; 1284 else 1285 ggtt->invalidate = gen8_ggtt_invalidate; 1286 1287 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1288 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1289 1290 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) 1291 ggtt->vm.pte_encode = mtl_ggtt_pte_encode; 1292 else 1293 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 1294 1295 return ggtt_probe_common(ggtt, size); 1296 } 1297 1298 /* 1299 * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, 1300 * so the switch-case statements in these PTE encode functions are still valid. 1301 * See translation table LEGACY_CACHELEVEL. 1302 */ 1303 static u64 snb_pte_encode(dma_addr_t addr, 1304 unsigned int pat_index, 1305 u32 flags) 1306 { 1307 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1308 1309 switch (pat_index) { 1310 case I915_CACHE_L3_LLC: 1311 case I915_CACHE_LLC: 1312 pte |= GEN6_PTE_CACHE_LLC; 1313 break; 1314 case I915_CACHE_NONE: 1315 pte |= GEN6_PTE_UNCACHED; 1316 break; 1317 default: 1318 MISSING_CASE(pat_index); 1319 } 1320 1321 return pte; 1322 } 1323 1324 static u64 ivb_pte_encode(dma_addr_t addr, 1325 unsigned int pat_index, 1326 u32 flags) 1327 { 1328 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1329 1330 switch (pat_index) { 1331 case I915_CACHE_L3_LLC: 1332 pte |= GEN7_PTE_CACHE_L3_LLC; 1333 break; 1334 case I915_CACHE_LLC: 1335 pte |= GEN6_PTE_CACHE_LLC; 1336 break; 1337 case I915_CACHE_NONE: 1338 pte |= GEN6_PTE_UNCACHED; 1339 break; 1340 default: 1341 MISSING_CASE(pat_index); 1342 } 1343 1344 return pte; 1345 } 1346 1347 static u64 byt_pte_encode(dma_addr_t addr, 1348 unsigned int pat_index, 1349 u32 flags) 1350 { 1351 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1352 1353 if (!(flags & PTE_READ_ONLY)) 1354 pte |= BYT_PTE_WRITEABLE; 1355 1356 if (pat_index != I915_CACHE_NONE) 1357 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1358 1359 return pte; 1360 } 1361 1362 static u64 hsw_pte_encode(dma_addr_t addr, 1363 unsigned int pat_index, 1364 u32 flags) 1365 { 1366 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1367 1368 if (pat_index != I915_CACHE_NONE) 1369 pte |= HSW_WB_LLC_AGE3; 1370 1371 return pte; 1372 } 1373 1374 static u64 iris_pte_encode(dma_addr_t addr, 1375 unsigned int pat_index, 1376 u32 flags) 1377 { 1378 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1379 1380 switch (pat_index) { 1381 case I915_CACHE_NONE: 1382 break; 1383 case I915_CACHE_WT: 1384 pte |= HSW_WT_ELLC_LLC_AGE3; 1385 break; 1386 default: 1387 pte |= HSW_WB_ELLC_LLC_AGE3; 1388 break; 1389 } 1390 1391 return pte; 1392 } 1393 1394 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1395 { 1396 struct drm_i915_private *i915 = ggtt->vm.i915; 1397 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1398 unsigned int size; 1399 u16 snb_gmch_ctl; 1400 1401 if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) 1402 return -ENXIO; 1403 1404 ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); 1405 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1406 1407 /* 1408 * 64/512MB is the current min/max we actually know of, but this is 1409 * just a coarse sanity check. 1410 */ 1411 if (ggtt->mappable_end < (64 << 20) || 1412 ggtt->mappable_end > (512 << 20)) { 1413 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1414 &ggtt->mappable_end); 1415 return -ENXIO; 1416 } 1417 1418 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1419 1420 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1421 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1422 1423 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1424 ggtt->vm.alloc_scratch_dma = alloc_pt_dma; 1425 1426 ggtt->vm.clear_range = nop_clear_range; 1427 if (!HAS_FULL_PPGTT(i915)) 1428 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1429 ggtt->vm.scratch_range = gen6_ggtt_clear_range; 1430 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1431 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1432 ggtt->vm.cleanup = gen6_gmch_remove; 1433 1434 ggtt->invalidate = gen6_ggtt_invalidate; 1435 1436 if (HAS_EDRAM(i915)) 1437 ggtt->vm.pte_encode = iris_pte_encode; 1438 else if (IS_HASWELL(i915)) 1439 ggtt->vm.pte_encode = hsw_pte_encode; 1440 else if (IS_VALLEYVIEW(i915)) 1441 ggtt->vm.pte_encode = byt_pte_encode; 1442 else if (GRAPHICS_VER(i915) >= 7) 1443 ggtt->vm.pte_encode = ivb_pte_encode; 1444 else 1445 ggtt->vm.pte_encode = snb_pte_encode; 1446 1447 ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; 1448 ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; 1449 1450 return ggtt_probe_common(ggtt, size); 1451 } 1452 1453 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1454 { 1455 struct drm_i915_private *i915 = gt->i915; 1456 int ret; 1457 1458 ggtt->vm.gt = gt; 1459 ggtt->vm.i915 = i915; 1460 ggtt->vm.dma = i915->drm.dev; 1461 dma_resv_init(&ggtt->vm._resv); 1462 1463 if (GRAPHICS_VER(i915) >= 8) 1464 ret = gen8_gmch_probe(ggtt); 1465 else if (GRAPHICS_VER(i915) >= 6) 1466 ret = gen6_gmch_probe(ggtt); 1467 else 1468 ret = intel_ggtt_gmch_probe(ggtt); 1469 1470 if (ret) { 1471 dma_resv_fini(&ggtt->vm._resv); 1472 return ret; 1473 } 1474 1475 if ((ggtt->vm.total - 1) >> 32) { 1476 drm_err(&i915->drm, 1477 "We never expected a Global GTT with more than 32bits" 1478 " of address space! Found %lldM!\n", 1479 ggtt->vm.total >> 20); 1480 ggtt->vm.total = 1ULL << 32; 1481 ggtt->mappable_end = 1482 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1483 } 1484 1485 if (ggtt->mappable_end > ggtt->vm.total) { 1486 drm_err(&i915->drm, 1487 "mappable aperture extends past end of GGTT," 1488 " aperture=%pa, total=%llx\n", 1489 &ggtt->mappable_end, ggtt->vm.total); 1490 ggtt->mappable_end = ggtt->vm.total; 1491 } 1492 1493 /* GMADR is the PCI mmio aperture into the global GTT. */ 1494 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1495 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1496 (u64)ggtt->mappable_end >> 20); 1497 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1498 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1499 1500 return 0; 1501 } 1502 1503 /** 1504 * i915_ggtt_probe_hw - Probe GGTT hardware location 1505 * @i915: i915 device 1506 */ 1507 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1508 { 1509 struct intel_gt *gt; 1510 int ret, i; 1511 1512 for_each_gt(gt, i915, i) { 1513 ret = intel_gt_assign_ggtt(gt); 1514 if (ret) 1515 return ret; 1516 } 1517 1518 ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915)); 1519 if (ret) 1520 return ret; 1521 1522 if (i915_vtd_active(i915)) 1523 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1524 1525 return 0; 1526 } 1527 1528 struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) 1529 { 1530 struct i915_ggtt *ggtt; 1531 1532 ggtt = drmm_kzalloc(&i915->drm, sizeof(*ggtt), GFP_KERNEL); 1533 if (!ggtt) 1534 return ERR_PTR(-ENOMEM); 1535 1536 INIT_LIST_HEAD(&ggtt->gt_list); 1537 1538 return ggtt; 1539 } 1540 1541 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1542 { 1543 if (GRAPHICS_VER(i915) < 6) 1544 return intel_ggtt_gmch_enable_hw(i915); 1545 1546 return 0; 1547 } 1548 1549 /** 1550 * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM 1551 * @vm: The VM to restore the mappings for 1552 * 1553 * Restore the memory mappings for all objects mapped to HW via the GGTT or a 1554 * DPT page table. 1555 * 1556 * Returns %true if restoring the mapping for any object that was in a write 1557 * domain before suspend. 1558 */ 1559 bool i915_ggtt_resume_vm(struct i915_address_space *vm) 1560 { 1561 struct i915_vma *vma; 1562 bool write_domain_objs = false; 1563 1564 drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); 1565 1566 /* First fill our portion of the GTT with scratch pages */ 1567 vm->clear_range(vm, 0, vm->total); 1568 1569 /* clflush objects bound into the GGTT and rebind them. */ 1570 list_for_each_entry(vma, &vm->bound_list, vm_link) { 1571 struct drm_i915_gem_object *obj = vma->obj; 1572 unsigned int was_bound = 1573 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1574 1575 GEM_BUG_ON(!was_bound); 1576 1577 /* 1578 * Clear the bound flags of the vma resource to allow 1579 * ptes to be repopulated. 1580 */ 1581 vma->resource->bound_flags = 0; 1582 vma->ops->bind_vma(vm, NULL, vma->resource, 1583 obj ? obj->pat_index : 1584 i915_gem_get_pat_index(vm->i915, 1585 I915_CACHE_NONE), 1586 was_bound); 1587 1588 if (obj) { /* only used during resume => exclusive access */ 1589 write_domain_objs |= fetch_and_zero(&obj->write_domain); 1590 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1591 } 1592 } 1593 1594 return write_domain_objs; 1595 } 1596 1597 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1598 { 1599 struct intel_gt *gt; 1600 bool flush; 1601 1602 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1603 intel_gt_check_and_clear_faults(gt); 1604 1605 flush = i915_ggtt_resume_vm(&ggtt->vm); 1606 1607 if (drm_mm_node_allocated(&ggtt->error_capture)) 1608 ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, 1609 ggtt->error_capture.size); 1610 1611 list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) 1612 intel_uc_resume_mappings(>->uc); 1613 1614 ggtt->invalidate(ggtt); 1615 1616 if (flush) 1617 wbinvd_on_all_cpus(); 1618 1619 intel_ggtt_restore_fences(ggtt); 1620 } 1621