1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/log2.h> 27 #include <linux/random.h> 28 #include <linux/seq_file.h> 29 #include <linux/stop_machine.h> 30 31 #include <drm/drmP.h> 32 #include <drm/i915_drm.h> 33 34 #include "i915_drv.h" 35 #include "i915_vgpu.h" 36 #include "i915_trace.h" 37 #include "intel_drv.h" 38 #include "intel_frontbuffer.h" 39 40 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 41 42 /** 43 * DOC: Global GTT views 44 * 45 * Background and previous state 46 * 47 * Historically objects could exists (be bound) in global GTT space only as 48 * singular instances with a view representing all of the object's backing pages 49 * in a linear fashion. This view will be called a normal view. 50 * 51 * To support multiple views of the same object, where the number of mapped 52 * pages is not equal to the backing store, or where the layout of the pages 53 * is not linear, concept of a GGTT view was added. 54 * 55 * One example of an alternative view is a stereo display driven by a single 56 * image. In this case we would have a framebuffer looking like this 57 * (2x2 pages): 58 * 59 * 12 60 * 34 61 * 62 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 63 * rendering. In contrast, fed to the display engine would be an alternative 64 * view which could look something like this: 65 * 66 * 1212 67 * 3434 68 * 69 * In this example both the size and layout of pages in the alternative view is 70 * different from the normal view. 71 * 72 * Implementation and usage 73 * 74 * GGTT views are implemented using VMAs and are distinguished via enum 75 * i915_ggtt_view_type and struct i915_ggtt_view. 76 * 77 * A new flavour of core GEM functions which work with GGTT bound objects were 78 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 79 * renaming in large amounts of code. They take the struct i915_ggtt_view 80 * parameter encapsulating all metadata required to implement a view. 81 * 82 * As a helper for callers which are only interested in the normal view, 83 * globally const i915_ggtt_view_normal singleton instance exists. All old core 84 * GEM API functions, the ones not taking the view parameter, are operating on, 85 * or with the normal GGTT view. 86 * 87 * Code wanting to add or use a new GGTT view needs to: 88 * 89 * 1. Add a new enum with a suitable name. 90 * 2. Extend the metadata in the i915_ggtt_view structure if required. 91 * 3. Add support to i915_get_vma_pages(). 92 * 93 * New views are required to build a scatter-gather table from within the 94 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 95 * exists for the lifetime of an VMA. 96 * 97 * Core API is designed to have copy semantics which means that passed in 98 * struct i915_ggtt_view does not need to be persistent (left around after 99 * calling the core API functions). 100 * 101 */ 102 103 static int 104 i915_get_ggtt_vma_pages(struct i915_vma *vma); 105 106 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) 107 { 108 /* Note that as an uncached mmio write, this should flush the 109 * WCB of the writes into the GGTT before it triggers the invalidate. 110 */ 111 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 112 } 113 114 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv) 115 { 116 gen6_ggtt_invalidate(dev_priv); 117 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 118 } 119 120 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv) 121 { 122 intel_gtt_chipset_flush(); 123 } 124 125 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) 126 { 127 i915->ggtt.invalidate(i915); 128 } 129 130 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 131 int enable_ppgtt) 132 { 133 bool has_aliasing_ppgtt; 134 bool has_full_ppgtt; 135 bool has_full_48bit_ppgtt; 136 137 has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; 138 has_full_ppgtt = dev_priv->info.has_full_ppgtt; 139 has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; 140 141 if (intel_vgpu_active(dev_priv)) { 142 /* emulation is too hard */ 143 has_full_ppgtt = false; 144 has_full_48bit_ppgtt = false; 145 } 146 147 if (!has_aliasing_ppgtt) 148 return 0; 149 150 /* 151 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 152 * execlists, the sole mechanism available to submit work. 153 */ 154 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 155 return 0; 156 157 if (enable_ppgtt == 1) 158 return 1; 159 160 if (enable_ppgtt == 2 && has_full_ppgtt) 161 return 2; 162 163 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 164 return 3; 165 166 #ifdef CONFIG_INTEL_IOMMU 167 /* Disable ppgtt on SNB if VT-d is on. */ 168 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 169 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 170 return 0; 171 } 172 #endif 173 174 /* Early VLV doesn't have this */ 175 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 176 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 177 return 0; 178 } 179 180 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 181 return has_full_48bit_ppgtt ? 3 : 2; 182 else 183 return has_aliasing_ppgtt ? 1 : 0; 184 } 185 186 static int ppgtt_bind_vma(struct i915_vma *vma, 187 enum i915_cache_level cache_level, 188 u32 unused) 189 { 190 u32 pte_flags = 0; 191 192 vma->pages = vma->obj->mm.pages; 193 194 /* Currently applicable only to VLV */ 195 if (vma->obj->gt_ro) 196 pte_flags |= PTE_READ_ONLY; 197 198 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 199 cache_level, pte_flags); 200 201 return 0; 202 } 203 204 static void ppgtt_unbind_vma(struct i915_vma *vma) 205 { 206 vma->vm->clear_range(vma->vm, 207 vma->node.start, 208 vma->size); 209 } 210 211 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 212 enum i915_cache_level level) 213 { 214 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 215 pte |= addr; 216 217 switch (level) { 218 case I915_CACHE_NONE: 219 pte |= PPAT_UNCACHED_INDEX; 220 break; 221 case I915_CACHE_WT: 222 pte |= PPAT_DISPLAY_ELLC_INDEX; 223 break; 224 default: 225 pte |= PPAT_CACHED_INDEX; 226 break; 227 } 228 229 return pte; 230 } 231 232 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 233 const enum i915_cache_level level) 234 { 235 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 236 pde |= addr; 237 if (level != I915_CACHE_NONE) 238 pde |= PPAT_CACHED_PDE_INDEX; 239 else 240 pde |= PPAT_UNCACHED_INDEX; 241 return pde; 242 } 243 244 #define gen8_pdpe_encode gen8_pde_encode 245 #define gen8_pml4e_encode gen8_pde_encode 246 247 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 248 enum i915_cache_level level, 249 u32 unused) 250 { 251 gen6_pte_t pte = GEN6_PTE_VALID; 252 pte |= GEN6_PTE_ADDR_ENCODE(addr); 253 254 switch (level) { 255 case I915_CACHE_L3_LLC: 256 case I915_CACHE_LLC: 257 pte |= GEN6_PTE_CACHE_LLC; 258 break; 259 case I915_CACHE_NONE: 260 pte |= GEN6_PTE_UNCACHED; 261 break; 262 default: 263 MISSING_CASE(level); 264 } 265 266 return pte; 267 } 268 269 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 270 enum i915_cache_level level, 271 u32 unused) 272 { 273 gen6_pte_t pte = GEN6_PTE_VALID; 274 pte |= GEN6_PTE_ADDR_ENCODE(addr); 275 276 switch (level) { 277 case I915_CACHE_L3_LLC: 278 pte |= GEN7_PTE_CACHE_L3_LLC; 279 break; 280 case I915_CACHE_LLC: 281 pte |= GEN6_PTE_CACHE_LLC; 282 break; 283 case I915_CACHE_NONE: 284 pte |= GEN6_PTE_UNCACHED; 285 break; 286 default: 287 MISSING_CASE(level); 288 } 289 290 return pte; 291 } 292 293 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 294 enum i915_cache_level level, 295 u32 flags) 296 { 297 gen6_pte_t pte = GEN6_PTE_VALID; 298 pte |= GEN6_PTE_ADDR_ENCODE(addr); 299 300 if (!(flags & PTE_READ_ONLY)) 301 pte |= BYT_PTE_WRITEABLE; 302 303 if (level != I915_CACHE_NONE) 304 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 305 306 return pte; 307 } 308 309 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 310 enum i915_cache_level level, 311 u32 unused) 312 { 313 gen6_pte_t pte = GEN6_PTE_VALID; 314 pte |= HSW_PTE_ADDR_ENCODE(addr); 315 316 if (level != I915_CACHE_NONE) 317 pte |= HSW_WB_LLC_AGE3; 318 319 return pte; 320 } 321 322 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 323 enum i915_cache_level level, 324 u32 unused) 325 { 326 gen6_pte_t pte = GEN6_PTE_VALID; 327 pte |= HSW_PTE_ADDR_ENCODE(addr); 328 329 switch (level) { 330 case I915_CACHE_NONE: 331 break; 332 case I915_CACHE_WT: 333 pte |= HSW_WT_ELLC_LLC_AGE3; 334 break; 335 default: 336 pte |= HSW_WB_ELLC_LLC_AGE3; 337 break; 338 } 339 340 return pte; 341 } 342 343 static int __setup_page_dma(struct drm_i915_private *dev_priv, 344 struct i915_page_dma *p, gfp_t flags) 345 { 346 struct device *kdev = &dev_priv->drm.pdev->dev; 347 348 p->page = alloc_page(flags); 349 if (!p->page) 350 return -ENOMEM; 351 352 p->daddr = dma_map_page(kdev, 353 p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 354 355 if (dma_mapping_error(kdev, p->daddr)) { 356 __free_page(p->page); 357 return -EINVAL; 358 } 359 360 return 0; 361 } 362 363 static int setup_page_dma(struct drm_i915_private *dev_priv, 364 struct i915_page_dma *p) 365 { 366 return __setup_page_dma(dev_priv, p, I915_GFP_DMA); 367 } 368 369 static void cleanup_page_dma(struct drm_i915_private *dev_priv, 370 struct i915_page_dma *p) 371 { 372 struct pci_dev *pdev = dev_priv->drm.pdev; 373 374 if (WARN_ON(!p->page)) 375 return; 376 377 dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 378 __free_page(p->page); 379 memset(p, 0, sizeof(*p)); 380 } 381 382 static void *kmap_page_dma(struct i915_page_dma *p) 383 { 384 return kmap_atomic(p->page); 385 } 386 387 /* We use the flushing unmap only with ppgtt structures: 388 * page directories, page tables and scratch pages. 389 */ 390 static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) 391 { 392 /* There are only few exceptions for gen >=6. chv and bxt. 393 * And we are not sure about the latter so play safe for now. 394 */ 395 if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) 396 drm_clflush_virt_range(vaddr, PAGE_SIZE); 397 398 kunmap_atomic(vaddr); 399 } 400 401 #define kmap_px(px) kmap_page_dma(px_base(px)) 402 #define kunmap_px(ppgtt, vaddr) \ 403 kunmap_page_dma((ppgtt)->base.i915, (vaddr)) 404 405 #define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) 406 #define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) 407 #define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) 408 #define fill32_px(dev_priv, px, v) \ 409 fill_page_dma_32((dev_priv), px_base(px), (v)) 410 411 static void fill_page_dma(struct drm_i915_private *dev_priv, 412 struct i915_page_dma *p, const uint64_t val) 413 { 414 int i; 415 uint64_t * const vaddr = kmap_page_dma(p); 416 417 for (i = 0; i < 512; i++) 418 vaddr[i] = val; 419 420 kunmap_page_dma(dev_priv, vaddr); 421 } 422 423 static void fill_page_dma_32(struct drm_i915_private *dev_priv, 424 struct i915_page_dma *p, const uint32_t val32) 425 { 426 uint64_t v = val32; 427 428 v = v << 32 | val32; 429 430 fill_page_dma(dev_priv, p, v); 431 } 432 433 static int 434 setup_scratch_page(struct drm_i915_private *dev_priv, 435 struct i915_page_dma *scratch, 436 gfp_t gfp) 437 { 438 return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); 439 } 440 441 static void cleanup_scratch_page(struct drm_i915_private *dev_priv, 442 struct i915_page_dma *scratch) 443 { 444 cleanup_page_dma(dev_priv, scratch); 445 } 446 447 static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) 448 { 449 struct i915_page_table *pt; 450 const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; 451 int ret = -ENOMEM; 452 453 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 454 if (!pt) 455 return ERR_PTR(-ENOMEM); 456 457 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 458 GFP_KERNEL); 459 460 if (!pt->used_ptes) 461 goto fail_bitmap; 462 463 ret = setup_px(dev_priv, pt); 464 if (ret) 465 goto fail_page_m; 466 467 return pt; 468 469 fail_page_m: 470 kfree(pt->used_ptes); 471 fail_bitmap: 472 kfree(pt); 473 474 return ERR_PTR(ret); 475 } 476 477 static void free_pt(struct drm_i915_private *dev_priv, 478 struct i915_page_table *pt) 479 { 480 cleanup_px(dev_priv, pt); 481 kfree(pt->used_ptes); 482 kfree(pt); 483 } 484 485 static void gen8_initialize_pt(struct i915_address_space *vm, 486 struct i915_page_table *pt) 487 { 488 gen8_pte_t scratch_pte; 489 490 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 491 I915_CACHE_LLC); 492 493 fill_px(vm->i915, pt, scratch_pte); 494 } 495 496 static void gen6_initialize_pt(struct i915_address_space *vm, 497 struct i915_page_table *pt) 498 { 499 gen6_pte_t scratch_pte; 500 501 WARN_ON(vm->scratch_page.daddr == 0); 502 503 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 504 I915_CACHE_LLC, 0); 505 506 fill32_px(vm->i915, pt, scratch_pte); 507 } 508 509 static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) 510 { 511 struct i915_page_directory *pd; 512 int ret = -ENOMEM; 513 514 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 515 if (!pd) 516 return ERR_PTR(-ENOMEM); 517 518 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 519 sizeof(*pd->used_pdes), GFP_KERNEL); 520 if (!pd->used_pdes) 521 goto fail_bitmap; 522 523 ret = setup_px(dev_priv, pd); 524 if (ret) 525 goto fail_page_m; 526 527 return pd; 528 529 fail_page_m: 530 kfree(pd->used_pdes); 531 fail_bitmap: 532 kfree(pd); 533 534 return ERR_PTR(ret); 535 } 536 537 static void free_pd(struct drm_i915_private *dev_priv, 538 struct i915_page_directory *pd) 539 { 540 if (px_page(pd)) { 541 cleanup_px(dev_priv, pd); 542 kfree(pd->used_pdes); 543 kfree(pd); 544 } 545 } 546 547 static void gen8_initialize_pd(struct i915_address_space *vm, 548 struct i915_page_directory *pd) 549 { 550 gen8_pde_t scratch_pde; 551 552 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 553 554 fill_px(vm->i915, pd, scratch_pde); 555 } 556 557 static int __pdp_init(struct drm_i915_private *dev_priv, 558 struct i915_page_directory_pointer *pdp) 559 { 560 size_t pdpes = I915_PDPES_PER_PDP(dev_priv); 561 562 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 563 sizeof(unsigned long), 564 GFP_KERNEL); 565 if (!pdp->used_pdpes) 566 return -ENOMEM; 567 568 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 569 GFP_KERNEL); 570 if (!pdp->page_directory) { 571 kfree(pdp->used_pdpes); 572 /* the PDP might be the statically allocated top level. Keep it 573 * as clean as possible */ 574 pdp->used_pdpes = NULL; 575 return -ENOMEM; 576 } 577 578 return 0; 579 } 580 581 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 582 { 583 kfree(pdp->used_pdpes); 584 kfree(pdp->page_directory); 585 pdp->page_directory = NULL; 586 } 587 588 static struct 589 i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) 590 { 591 struct i915_page_directory_pointer *pdp; 592 int ret = -ENOMEM; 593 594 WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); 595 596 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 597 if (!pdp) 598 return ERR_PTR(-ENOMEM); 599 600 ret = __pdp_init(dev_priv, pdp); 601 if (ret) 602 goto fail_bitmap; 603 604 ret = setup_px(dev_priv, pdp); 605 if (ret) 606 goto fail_page_m; 607 608 return pdp; 609 610 fail_page_m: 611 __pdp_fini(pdp); 612 fail_bitmap: 613 kfree(pdp); 614 615 return ERR_PTR(ret); 616 } 617 618 static void free_pdp(struct drm_i915_private *dev_priv, 619 struct i915_page_directory_pointer *pdp) 620 { 621 __pdp_fini(pdp); 622 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 623 cleanup_px(dev_priv, pdp); 624 kfree(pdp); 625 } 626 } 627 628 static void gen8_initialize_pdp(struct i915_address_space *vm, 629 struct i915_page_directory_pointer *pdp) 630 { 631 gen8_ppgtt_pdpe_t scratch_pdpe; 632 633 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 634 635 fill_px(vm->i915, pdp, scratch_pdpe); 636 } 637 638 static void gen8_initialize_pml4(struct i915_address_space *vm, 639 struct i915_pml4 *pml4) 640 { 641 gen8_ppgtt_pml4e_t scratch_pml4e; 642 643 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 644 I915_CACHE_LLC); 645 646 fill_px(vm->i915, pml4, scratch_pml4e); 647 } 648 649 static void 650 gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, 651 struct i915_page_directory_pointer *pdp, 652 struct i915_page_directory *pd, 653 int index) 654 { 655 gen8_ppgtt_pdpe_t *page_directorypo; 656 657 if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) 658 return; 659 660 page_directorypo = kmap_px(pdp); 661 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 662 kunmap_px(ppgtt, page_directorypo); 663 } 664 665 static void 666 gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, 667 struct i915_pml4 *pml4, 668 struct i915_page_directory_pointer *pdp, 669 int index) 670 { 671 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 672 673 WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); 674 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 675 kunmap_px(ppgtt, pagemap); 676 } 677 678 /* Broadwell Page Directory Pointer Descriptors */ 679 static int gen8_write_pdp(struct drm_i915_gem_request *req, 680 unsigned entry, 681 dma_addr_t addr) 682 { 683 struct intel_ring *ring = req->ring; 684 struct intel_engine_cs *engine = req->engine; 685 int ret; 686 687 BUG_ON(entry >= 4); 688 689 ret = intel_ring_begin(req, 6); 690 if (ret) 691 return ret; 692 693 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 694 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 695 intel_ring_emit(ring, upper_32_bits(addr)); 696 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 697 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 698 intel_ring_emit(ring, lower_32_bits(addr)); 699 intel_ring_advance(ring); 700 701 return 0; 702 } 703 704 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 705 struct drm_i915_gem_request *req) 706 { 707 int i, ret; 708 709 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 710 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 711 712 ret = gen8_write_pdp(req, i, pd_daddr); 713 if (ret) 714 return ret; 715 } 716 717 return 0; 718 } 719 720 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 721 struct drm_i915_gem_request *req) 722 { 723 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 724 } 725 726 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 727 * the page table structures, we mark them dirty so that 728 * context switching/execlist queuing code takes extra steps 729 * to ensure that tlbs are flushed. 730 */ 731 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 732 { 733 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask; 734 } 735 736 /* Removes entries from a single page table, releasing it if it's empty. 737 * Caller can use the return value to update higher-level entries. 738 */ 739 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 740 struct i915_page_table *pt, 741 uint64_t start, 742 uint64_t length) 743 { 744 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 745 unsigned int num_entries = gen8_pte_count(start, length); 746 unsigned int pte = gen8_pte_index(start); 747 unsigned int pte_end = pte + num_entries; 748 gen8_pte_t *pt_vaddr; 749 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 750 I915_CACHE_LLC); 751 752 if (WARN_ON(!px_page(pt))) 753 return false; 754 755 GEM_BUG_ON(pte_end > GEN8_PTES); 756 757 bitmap_clear(pt->used_ptes, pte, num_entries); 758 if (USES_FULL_PPGTT(vm->i915)) { 759 if (bitmap_empty(pt->used_ptes, GEN8_PTES)) 760 return true; 761 } 762 763 pt_vaddr = kmap_px(pt); 764 765 while (pte < pte_end) 766 pt_vaddr[pte++] = scratch_pte; 767 768 kunmap_px(ppgtt, pt_vaddr); 769 770 return false; 771 } 772 773 /* Removes entries from a single page dir, releasing it if it's empty. 774 * Caller can use the return value to update higher-level entries 775 */ 776 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 777 struct i915_page_directory *pd, 778 uint64_t start, 779 uint64_t length) 780 { 781 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 782 struct i915_page_table *pt; 783 uint64_t pde; 784 gen8_pde_t *pde_vaddr; 785 gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), 786 I915_CACHE_LLC); 787 788 gen8_for_each_pde(pt, pd, start, length, pde) { 789 if (WARN_ON(!pd->page_table[pde])) 790 break; 791 792 if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { 793 __clear_bit(pde, pd->used_pdes); 794 pde_vaddr = kmap_px(pd); 795 pde_vaddr[pde] = scratch_pde; 796 kunmap_px(ppgtt, pde_vaddr); 797 free_pt(vm->i915, pt); 798 } 799 } 800 801 if (bitmap_empty(pd->used_pdes, I915_PDES)) 802 return true; 803 804 return false; 805 } 806 807 /* Removes entries from a single page dir pointer, releasing it if it's empty. 808 * Caller can use the return value to update higher-level entries 809 */ 810 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 811 struct i915_page_directory_pointer *pdp, 812 uint64_t start, 813 uint64_t length) 814 { 815 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 816 struct i915_page_directory *pd; 817 uint64_t pdpe; 818 819 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 820 if (WARN_ON(!pdp->page_directory[pdpe])) 821 break; 822 823 if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { 824 __clear_bit(pdpe, pdp->used_pdpes); 825 gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); 826 free_pd(vm->i915, pd); 827 } 828 } 829 830 mark_tlbs_dirty(ppgtt); 831 832 if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) 833 return true; 834 835 return false; 836 } 837 838 /* Removes entries from a single pml4. 839 * This is the top-level structure in 4-level page tables used on gen8+. 840 * Empty entries are always scratch pml4e. 841 */ 842 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, 843 struct i915_pml4 *pml4, 844 uint64_t start, 845 uint64_t length) 846 { 847 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 848 struct i915_page_directory_pointer *pdp; 849 uint64_t pml4e; 850 851 GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); 852 853 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 854 if (WARN_ON(!pml4->pdps[pml4e])) 855 break; 856 857 if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { 858 __clear_bit(pml4e, pml4->used_pml4es); 859 gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); 860 free_pdp(vm->i915, pdp); 861 } 862 } 863 } 864 865 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 866 uint64_t start, uint64_t length) 867 { 868 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 869 870 if (USES_FULL_48BIT_PPGTT(vm->i915)) 871 gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); 872 else 873 gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); 874 } 875 876 static void 877 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 878 struct i915_page_directory_pointer *pdp, 879 struct sg_page_iter *sg_iter, 880 uint64_t start, 881 enum i915_cache_level cache_level) 882 { 883 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 884 gen8_pte_t *pt_vaddr; 885 unsigned pdpe = gen8_pdpe_index(start); 886 unsigned pde = gen8_pde_index(start); 887 unsigned pte = gen8_pte_index(start); 888 889 pt_vaddr = NULL; 890 891 while (__sg_page_iter_next(sg_iter)) { 892 if (pt_vaddr == NULL) { 893 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 894 struct i915_page_table *pt = pd->page_table[pde]; 895 pt_vaddr = kmap_px(pt); 896 } 897 898 pt_vaddr[pte] = 899 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 900 cache_level); 901 if (++pte == GEN8_PTES) { 902 kunmap_px(ppgtt, pt_vaddr); 903 pt_vaddr = NULL; 904 if (++pde == I915_PDES) { 905 if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) 906 break; 907 pde = 0; 908 } 909 pte = 0; 910 } 911 } 912 913 if (pt_vaddr) 914 kunmap_px(ppgtt, pt_vaddr); 915 } 916 917 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 918 struct sg_table *pages, 919 uint64_t start, 920 enum i915_cache_level cache_level, 921 u32 unused) 922 { 923 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 924 struct sg_page_iter sg_iter; 925 926 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 927 928 if (!USES_FULL_48BIT_PPGTT(vm->i915)) { 929 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 930 cache_level); 931 } else { 932 struct i915_page_directory_pointer *pdp; 933 uint64_t pml4e; 934 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 935 936 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 937 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 938 start, cache_level); 939 } 940 } 941 } 942 943 static void gen8_free_page_tables(struct drm_i915_private *dev_priv, 944 struct i915_page_directory *pd) 945 { 946 int i; 947 948 if (!px_page(pd)) 949 return; 950 951 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 952 if (WARN_ON(!pd->page_table[i])) 953 continue; 954 955 free_pt(dev_priv, pd->page_table[i]); 956 pd->page_table[i] = NULL; 957 } 958 } 959 960 static int gen8_init_scratch(struct i915_address_space *vm) 961 { 962 struct drm_i915_private *dev_priv = vm->i915; 963 int ret; 964 965 ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); 966 if (ret) 967 return ret; 968 969 vm->scratch_pt = alloc_pt(dev_priv); 970 if (IS_ERR(vm->scratch_pt)) { 971 ret = PTR_ERR(vm->scratch_pt); 972 goto free_scratch_page; 973 } 974 975 vm->scratch_pd = alloc_pd(dev_priv); 976 if (IS_ERR(vm->scratch_pd)) { 977 ret = PTR_ERR(vm->scratch_pd); 978 goto free_pt; 979 } 980 981 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 982 vm->scratch_pdp = alloc_pdp(dev_priv); 983 if (IS_ERR(vm->scratch_pdp)) { 984 ret = PTR_ERR(vm->scratch_pdp); 985 goto free_pd; 986 } 987 } 988 989 gen8_initialize_pt(vm, vm->scratch_pt); 990 gen8_initialize_pd(vm, vm->scratch_pd); 991 if (USES_FULL_48BIT_PPGTT(dev_priv)) 992 gen8_initialize_pdp(vm, vm->scratch_pdp); 993 994 return 0; 995 996 free_pd: 997 free_pd(dev_priv, vm->scratch_pd); 998 free_pt: 999 free_pt(dev_priv, vm->scratch_pt); 1000 free_scratch_page: 1001 cleanup_scratch_page(dev_priv, &vm->scratch_page); 1002 1003 return ret; 1004 } 1005 1006 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 1007 { 1008 enum vgt_g2v_type msg; 1009 struct drm_i915_private *dev_priv = ppgtt->base.i915; 1010 int i; 1011 1012 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1013 u64 daddr = px_dma(&ppgtt->pml4); 1014 1015 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1016 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1017 1018 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1019 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1020 } else { 1021 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 1022 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1023 1024 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1025 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1026 } 1027 1028 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1029 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1030 } 1031 1032 I915_WRITE(vgtif_reg(g2v_notify), msg); 1033 1034 return 0; 1035 } 1036 1037 static void gen8_free_scratch(struct i915_address_space *vm) 1038 { 1039 struct drm_i915_private *dev_priv = vm->i915; 1040 1041 if (USES_FULL_48BIT_PPGTT(dev_priv)) 1042 free_pdp(dev_priv, vm->scratch_pdp); 1043 free_pd(dev_priv, vm->scratch_pd); 1044 free_pt(dev_priv, vm->scratch_pt); 1045 cleanup_scratch_page(dev_priv, &vm->scratch_page); 1046 } 1047 1048 static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, 1049 struct i915_page_directory_pointer *pdp) 1050 { 1051 int i; 1052 1053 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { 1054 if (WARN_ON(!pdp->page_directory[i])) 1055 continue; 1056 1057 gen8_free_page_tables(dev_priv, pdp->page_directory[i]); 1058 free_pd(dev_priv, pdp->page_directory[i]); 1059 } 1060 1061 free_pdp(dev_priv, pdp); 1062 } 1063 1064 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1065 { 1066 struct drm_i915_private *dev_priv = ppgtt->base.i915; 1067 int i; 1068 1069 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 1070 if (WARN_ON(!ppgtt->pml4.pdps[i])) 1071 continue; 1072 1073 gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); 1074 } 1075 1076 cleanup_px(dev_priv, &ppgtt->pml4); 1077 } 1078 1079 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1080 { 1081 struct drm_i915_private *dev_priv = vm->i915; 1082 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1083 1084 if (intel_vgpu_active(dev_priv)) 1085 gen8_ppgtt_notify_vgt(ppgtt, false); 1086 1087 if (!USES_FULL_48BIT_PPGTT(dev_priv)) 1088 gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); 1089 else 1090 gen8_ppgtt_cleanup_4lvl(ppgtt); 1091 1092 gen8_free_scratch(vm); 1093 } 1094 1095 /** 1096 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1097 * @vm: Master vm structure. 1098 * @pd: Page directory for this address range. 1099 * @start: Starting virtual address to begin allocations. 1100 * @length: Size of the allocations. 1101 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1102 * caller to free on error. 1103 * 1104 * Allocate the required number of page tables. Extremely similar to 1105 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1106 * the page directory boundary (instead of the page directory pointer). That 1107 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1108 * possible, and likely that the caller will need to use multiple calls of this 1109 * function to achieve the appropriate allocation. 1110 * 1111 * Return: 0 if success; negative error code otherwise. 1112 */ 1113 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1114 struct i915_page_directory *pd, 1115 uint64_t start, 1116 uint64_t length, 1117 unsigned long *new_pts) 1118 { 1119 struct drm_i915_private *dev_priv = vm->i915; 1120 struct i915_page_table *pt; 1121 uint32_t pde; 1122 1123 gen8_for_each_pde(pt, pd, start, length, pde) { 1124 /* Don't reallocate page tables */ 1125 if (test_bit(pde, pd->used_pdes)) { 1126 /* Scratch is never allocated this way */ 1127 WARN_ON(pt == vm->scratch_pt); 1128 continue; 1129 } 1130 1131 pt = alloc_pt(dev_priv); 1132 if (IS_ERR(pt)) 1133 goto unwind_out; 1134 1135 gen8_initialize_pt(vm, pt); 1136 pd->page_table[pde] = pt; 1137 __set_bit(pde, new_pts); 1138 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1139 } 1140 1141 return 0; 1142 1143 unwind_out: 1144 for_each_set_bit(pde, new_pts, I915_PDES) 1145 free_pt(dev_priv, pd->page_table[pde]); 1146 1147 return -ENOMEM; 1148 } 1149 1150 /** 1151 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1152 * @vm: Master vm structure. 1153 * @pdp: Page directory pointer for this address range. 1154 * @start: Starting virtual address to begin allocations. 1155 * @length: Size of the allocations. 1156 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1157 * caller to free on error. 1158 * 1159 * Allocate the required number of page directories starting at the pde index of 1160 * @start, and ending at the pde index @start + @length. This function will skip 1161 * over already allocated page directories within the range, and only allocate 1162 * new ones, setting the appropriate pointer within the pdp as well as the 1163 * correct position in the bitmap @new_pds. 1164 * 1165 * The function will only allocate the pages within the range for a give page 1166 * directory pointer. In other words, if @start + @length straddles a virtually 1167 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1168 * required by the caller, This is not currently possible, and the BUG in the 1169 * code will prevent it. 1170 * 1171 * Return: 0 if success; negative error code otherwise. 1172 */ 1173 static int 1174 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1175 struct i915_page_directory_pointer *pdp, 1176 uint64_t start, 1177 uint64_t length, 1178 unsigned long *new_pds) 1179 { 1180 struct drm_i915_private *dev_priv = vm->i915; 1181 struct i915_page_directory *pd; 1182 uint32_t pdpe; 1183 uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); 1184 1185 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1186 1187 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1188 if (test_bit(pdpe, pdp->used_pdpes)) 1189 continue; 1190 1191 pd = alloc_pd(dev_priv); 1192 if (IS_ERR(pd)) 1193 goto unwind_out; 1194 1195 gen8_initialize_pd(vm, pd); 1196 pdp->page_directory[pdpe] = pd; 1197 __set_bit(pdpe, new_pds); 1198 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1199 } 1200 1201 return 0; 1202 1203 unwind_out: 1204 for_each_set_bit(pdpe, new_pds, pdpes) 1205 free_pd(dev_priv, pdp->page_directory[pdpe]); 1206 1207 return -ENOMEM; 1208 } 1209 1210 /** 1211 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1212 * @vm: Master vm structure. 1213 * @pml4: Page map level 4 for this address range. 1214 * @start: Starting virtual address to begin allocations. 1215 * @length: Size of the allocations. 1216 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1217 * caller to free on error. 1218 * 1219 * Allocate the required number of page directory pointers. Extremely similar to 1220 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1221 * The main difference is here we are limited by the pml4 boundary (instead of 1222 * the page directory pointer). 1223 * 1224 * Return: 0 if success; negative error code otherwise. 1225 */ 1226 static int 1227 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1228 struct i915_pml4 *pml4, 1229 uint64_t start, 1230 uint64_t length, 1231 unsigned long *new_pdps) 1232 { 1233 struct drm_i915_private *dev_priv = vm->i915; 1234 struct i915_page_directory_pointer *pdp; 1235 uint32_t pml4e; 1236 1237 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1238 1239 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1240 if (!test_bit(pml4e, pml4->used_pml4es)) { 1241 pdp = alloc_pdp(dev_priv); 1242 if (IS_ERR(pdp)) 1243 goto unwind_out; 1244 1245 gen8_initialize_pdp(vm, pdp); 1246 pml4->pdps[pml4e] = pdp; 1247 __set_bit(pml4e, new_pdps); 1248 trace_i915_page_directory_pointer_entry_alloc(vm, 1249 pml4e, 1250 start, 1251 GEN8_PML4E_SHIFT); 1252 } 1253 } 1254 1255 return 0; 1256 1257 unwind_out: 1258 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1259 free_pdp(dev_priv, pml4->pdps[pml4e]); 1260 1261 return -ENOMEM; 1262 } 1263 1264 static void 1265 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1266 { 1267 kfree(new_pts); 1268 kfree(new_pds); 1269 } 1270 1271 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1272 * of these are based on the number of PDPEs in the system. 1273 */ 1274 static 1275 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1276 unsigned long **new_pts, 1277 uint32_t pdpes) 1278 { 1279 unsigned long *pds; 1280 unsigned long *pts; 1281 1282 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1283 if (!pds) 1284 return -ENOMEM; 1285 1286 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1287 GFP_TEMPORARY); 1288 if (!pts) 1289 goto err_out; 1290 1291 *new_pds = pds; 1292 *new_pts = pts; 1293 1294 return 0; 1295 1296 err_out: 1297 free_gen8_temp_bitmaps(pds, pts); 1298 return -ENOMEM; 1299 } 1300 1301 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1302 struct i915_page_directory_pointer *pdp, 1303 uint64_t start, 1304 uint64_t length) 1305 { 1306 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1307 unsigned long *new_page_dirs, *new_page_tables; 1308 struct drm_i915_private *dev_priv = vm->i915; 1309 struct i915_page_directory *pd; 1310 const uint64_t orig_start = start; 1311 const uint64_t orig_length = length; 1312 uint32_t pdpe; 1313 uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); 1314 int ret; 1315 1316 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1317 if (ret) 1318 return ret; 1319 1320 /* Do the allocations first so we can easily bail out */ 1321 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1322 new_page_dirs); 1323 if (ret) { 1324 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1325 return ret; 1326 } 1327 1328 /* For every page directory referenced, allocate page tables */ 1329 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1330 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1331 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1332 if (ret) 1333 goto err_out; 1334 } 1335 1336 start = orig_start; 1337 length = orig_length; 1338 1339 /* Allocations have completed successfully, so set the bitmaps, and do 1340 * the mappings. */ 1341 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1342 gen8_pde_t *const page_directory = kmap_px(pd); 1343 struct i915_page_table *pt; 1344 uint64_t pd_len = length; 1345 uint64_t pd_start = start; 1346 uint32_t pde; 1347 1348 /* Every pd should be allocated, we just did that above. */ 1349 WARN_ON(!pd); 1350 1351 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1352 /* Same reasoning as pd */ 1353 WARN_ON(!pt); 1354 WARN_ON(!pd_len); 1355 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1356 1357 /* Set our used ptes within the page table */ 1358 bitmap_set(pt->used_ptes, 1359 gen8_pte_index(pd_start), 1360 gen8_pte_count(pd_start, pd_len)); 1361 1362 /* Our pde is now pointing to the pagetable, pt */ 1363 __set_bit(pde, pd->used_pdes); 1364 1365 /* Map the PDE to the page table */ 1366 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1367 I915_CACHE_LLC); 1368 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1369 gen8_pte_index(start), 1370 gen8_pte_count(start, length), 1371 GEN8_PTES); 1372 1373 /* NB: We haven't yet mapped ptes to pages. At this 1374 * point we're still relying on insert_entries() */ 1375 } 1376 1377 kunmap_px(ppgtt, page_directory); 1378 __set_bit(pdpe, pdp->used_pdpes); 1379 gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); 1380 } 1381 1382 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1383 mark_tlbs_dirty(ppgtt); 1384 return 0; 1385 1386 err_out: 1387 while (pdpe--) { 1388 unsigned long temp; 1389 1390 for_each_set_bit(temp, new_page_tables + pdpe * 1391 BITS_TO_LONGS(I915_PDES), I915_PDES) 1392 free_pt(dev_priv, 1393 pdp->page_directory[pdpe]->page_table[temp]); 1394 } 1395 1396 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1397 free_pd(dev_priv, pdp->page_directory[pdpe]); 1398 1399 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1400 mark_tlbs_dirty(ppgtt); 1401 return ret; 1402 } 1403 1404 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1405 struct i915_pml4 *pml4, 1406 uint64_t start, 1407 uint64_t length) 1408 { 1409 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1410 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1411 struct i915_page_directory_pointer *pdp; 1412 uint64_t pml4e; 1413 int ret = 0; 1414 1415 /* Do the pml4 allocations first, so we don't need to track the newly 1416 * allocated tables below the pdp */ 1417 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1418 1419 /* The pagedirectory and pagetable allocations are done in the shared 3 1420 * and 4 level code. Just allocate the pdps. 1421 */ 1422 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1423 new_pdps); 1424 if (ret) 1425 return ret; 1426 1427 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1428 "The allocation has spanned more than 512GB. " 1429 "It is highly likely this is incorrect."); 1430 1431 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1432 WARN_ON(!pdp); 1433 1434 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1435 if (ret) 1436 goto err_out; 1437 1438 gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); 1439 } 1440 1441 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1442 GEN8_PML4ES_PER_PML4); 1443 1444 return 0; 1445 1446 err_out: 1447 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1448 gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]); 1449 1450 return ret; 1451 } 1452 1453 static int gen8_alloc_va_range(struct i915_address_space *vm, 1454 uint64_t start, uint64_t length) 1455 { 1456 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1457 1458 if (USES_FULL_48BIT_PPGTT(vm->i915)) 1459 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1460 else 1461 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1462 } 1463 1464 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1465 uint64_t start, uint64_t length, 1466 gen8_pte_t scratch_pte, 1467 struct seq_file *m) 1468 { 1469 struct i915_page_directory *pd; 1470 uint32_t pdpe; 1471 1472 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1473 struct i915_page_table *pt; 1474 uint64_t pd_len = length; 1475 uint64_t pd_start = start; 1476 uint32_t pde; 1477 1478 if (!test_bit(pdpe, pdp->used_pdpes)) 1479 continue; 1480 1481 seq_printf(m, "\tPDPE #%d\n", pdpe); 1482 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1483 uint32_t pte; 1484 gen8_pte_t *pt_vaddr; 1485 1486 if (!test_bit(pde, pd->used_pdes)) 1487 continue; 1488 1489 pt_vaddr = kmap_px(pt); 1490 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1491 uint64_t va = 1492 (pdpe << GEN8_PDPE_SHIFT) | 1493 (pde << GEN8_PDE_SHIFT) | 1494 (pte << GEN8_PTE_SHIFT); 1495 int i; 1496 bool found = false; 1497 1498 for (i = 0; i < 4; i++) 1499 if (pt_vaddr[pte + i] != scratch_pte) 1500 found = true; 1501 if (!found) 1502 continue; 1503 1504 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1505 for (i = 0; i < 4; i++) { 1506 if (pt_vaddr[pte + i] != scratch_pte) 1507 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1508 else 1509 seq_puts(m, " SCRATCH "); 1510 } 1511 seq_puts(m, "\n"); 1512 } 1513 /* don't use kunmap_px, it could trigger 1514 * an unnecessary flush. 1515 */ 1516 kunmap_atomic(pt_vaddr); 1517 } 1518 } 1519 } 1520 1521 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1522 { 1523 struct i915_address_space *vm = &ppgtt->base; 1524 uint64_t start = ppgtt->base.start; 1525 uint64_t length = ppgtt->base.total; 1526 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 1527 I915_CACHE_LLC); 1528 1529 if (!USES_FULL_48BIT_PPGTT(vm->i915)) { 1530 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1531 } else { 1532 uint64_t pml4e; 1533 struct i915_pml4 *pml4 = &ppgtt->pml4; 1534 struct i915_page_directory_pointer *pdp; 1535 1536 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1537 if (!test_bit(pml4e, pml4->used_pml4es)) 1538 continue; 1539 1540 seq_printf(m, " PML4E #%llu\n", pml4e); 1541 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1542 } 1543 } 1544 } 1545 1546 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1547 { 1548 unsigned long *new_page_dirs, *new_page_tables; 1549 uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); 1550 int ret; 1551 1552 /* We allocate temp bitmap for page tables for no gain 1553 * but as this is for init only, lets keep the things simple 1554 */ 1555 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1556 if (ret) 1557 return ret; 1558 1559 /* Allocate for all pdps regardless of how the ppgtt 1560 * was defined. 1561 */ 1562 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1563 0, 1ULL << 32, 1564 new_page_dirs); 1565 if (!ret) 1566 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1567 1568 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1569 1570 return ret; 1571 } 1572 1573 /* 1574 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1575 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1576 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1577 * space. 1578 * 1579 */ 1580 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1581 { 1582 struct drm_i915_private *dev_priv = ppgtt->base.i915; 1583 int ret; 1584 1585 ret = gen8_init_scratch(&ppgtt->base); 1586 if (ret) 1587 return ret; 1588 1589 ppgtt->base.start = 0; 1590 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1591 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1592 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1593 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1594 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1595 ppgtt->base.bind_vma = ppgtt_bind_vma; 1596 ppgtt->debug_dump = gen8_dump_ppgtt; 1597 1598 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1599 ret = setup_px(dev_priv, &ppgtt->pml4); 1600 if (ret) 1601 goto free_scratch; 1602 1603 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1604 1605 ppgtt->base.total = 1ULL << 48; 1606 ppgtt->switch_mm = gen8_48b_mm_switch; 1607 } else { 1608 ret = __pdp_init(dev_priv, &ppgtt->pdp); 1609 if (ret) 1610 goto free_scratch; 1611 1612 ppgtt->base.total = 1ULL << 32; 1613 ppgtt->switch_mm = gen8_legacy_mm_switch; 1614 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1615 0, 0, 1616 GEN8_PML4E_SHIFT); 1617 1618 if (intel_vgpu_active(dev_priv)) { 1619 ret = gen8_preallocate_top_level_pdps(ppgtt); 1620 if (ret) 1621 goto free_scratch; 1622 } 1623 } 1624 1625 if (intel_vgpu_active(dev_priv)) 1626 gen8_ppgtt_notify_vgt(ppgtt, true); 1627 1628 return 0; 1629 1630 free_scratch: 1631 gen8_free_scratch(&ppgtt->base); 1632 return ret; 1633 } 1634 1635 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1636 { 1637 struct i915_address_space *vm = &ppgtt->base; 1638 struct i915_page_table *unused; 1639 gen6_pte_t scratch_pte; 1640 uint32_t pd_entry; 1641 uint32_t pte, pde; 1642 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1643 1644 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1645 I915_CACHE_LLC, 0); 1646 1647 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1648 u32 expected; 1649 gen6_pte_t *pt_vaddr; 1650 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1651 pd_entry = readl(ppgtt->pd_addr + pde); 1652 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1653 1654 if (pd_entry != expected) 1655 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1656 pde, 1657 pd_entry, 1658 expected); 1659 seq_printf(m, "\tPDE: %x\n", pd_entry); 1660 1661 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1662 1663 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1664 unsigned long va = 1665 (pde * PAGE_SIZE * GEN6_PTES) + 1666 (pte * PAGE_SIZE); 1667 int i; 1668 bool found = false; 1669 for (i = 0; i < 4; i++) 1670 if (pt_vaddr[pte + i] != scratch_pte) 1671 found = true; 1672 if (!found) 1673 continue; 1674 1675 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1676 for (i = 0; i < 4; i++) { 1677 if (pt_vaddr[pte + i] != scratch_pte) 1678 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1679 else 1680 seq_puts(m, " SCRATCH "); 1681 } 1682 seq_puts(m, "\n"); 1683 } 1684 kunmap_px(ppgtt, pt_vaddr); 1685 } 1686 } 1687 1688 /* Write pde (index) from the page directory @pd to the page table @pt */ 1689 static void gen6_write_pde(struct i915_page_directory *pd, 1690 const int pde, struct i915_page_table *pt) 1691 { 1692 /* Caller needs to make sure the write completes if necessary */ 1693 struct i915_hw_ppgtt *ppgtt = 1694 container_of(pd, struct i915_hw_ppgtt, pd); 1695 u32 pd_entry; 1696 1697 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1698 pd_entry |= GEN6_PDE_VALID; 1699 1700 writel(pd_entry, ppgtt->pd_addr + pde); 1701 } 1702 1703 /* Write all the page tables found in the ppgtt structure to incrementing page 1704 * directories. */ 1705 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1706 struct i915_page_directory *pd, 1707 uint32_t start, uint32_t length) 1708 { 1709 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1710 struct i915_page_table *pt; 1711 uint32_t pde; 1712 1713 gen6_for_each_pde(pt, pd, start, length, pde) 1714 gen6_write_pde(pd, pde, pt); 1715 1716 /* Make sure write is complete before other code can use this page 1717 * table. Also require for WC mapped PTEs */ 1718 readl(ggtt->gsm); 1719 } 1720 1721 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1722 { 1723 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1724 1725 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1726 } 1727 1728 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1729 struct drm_i915_gem_request *req) 1730 { 1731 struct intel_ring *ring = req->ring; 1732 struct intel_engine_cs *engine = req->engine; 1733 int ret; 1734 1735 /* NB: TLBs must be flushed and invalidated before a switch */ 1736 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1737 if (ret) 1738 return ret; 1739 1740 ret = intel_ring_begin(req, 6); 1741 if (ret) 1742 return ret; 1743 1744 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1745 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1746 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1747 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1748 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1749 intel_ring_emit(ring, MI_NOOP); 1750 intel_ring_advance(ring); 1751 1752 return 0; 1753 } 1754 1755 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1756 struct drm_i915_gem_request *req) 1757 { 1758 struct intel_ring *ring = req->ring; 1759 struct intel_engine_cs *engine = req->engine; 1760 int ret; 1761 1762 /* NB: TLBs must be flushed and invalidated before a switch */ 1763 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1764 if (ret) 1765 return ret; 1766 1767 ret = intel_ring_begin(req, 6); 1768 if (ret) 1769 return ret; 1770 1771 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1772 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1773 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1774 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1775 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1776 intel_ring_emit(ring, MI_NOOP); 1777 intel_ring_advance(ring); 1778 1779 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1780 if (engine->id != RCS) { 1781 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1782 if (ret) 1783 return ret; 1784 } 1785 1786 return 0; 1787 } 1788 1789 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1790 struct drm_i915_gem_request *req) 1791 { 1792 struct intel_engine_cs *engine = req->engine; 1793 struct drm_i915_private *dev_priv = req->i915; 1794 1795 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1796 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1797 return 0; 1798 } 1799 1800 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) 1801 { 1802 struct intel_engine_cs *engine; 1803 enum intel_engine_id id; 1804 1805 for_each_engine(engine, dev_priv, id) { 1806 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ? 1807 GEN8_GFX_PPGTT_48B : 0; 1808 I915_WRITE(RING_MODE_GEN7(engine), 1809 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1810 } 1811 } 1812 1813 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) 1814 { 1815 struct intel_engine_cs *engine; 1816 uint32_t ecochk, ecobits; 1817 enum intel_engine_id id; 1818 1819 ecobits = I915_READ(GAC_ECO_BITS); 1820 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1821 1822 ecochk = I915_READ(GAM_ECOCHK); 1823 if (IS_HASWELL(dev_priv)) { 1824 ecochk |= ECOCHK_PPGTT_WB_HSW; 1825 } else { 1826 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1827 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1828 } 1829 I915_WRITE(GAM_ECOCHK, ecochk); 1830 1831 for_each_engine(engine, dev_priv, id) { 1832 /* GFX_MODE is per-ring on gen7+ */ 1833 I915_WRITE(RING_MODE_GEN7(engine), 1834 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1835 } 1836 } 1837 1838 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) 1839 { 1840 uint32_t ecochk, gab_ctl, ecobits; 1841 1842 ecobits = I915_READ(GAC_ECO_BITS); 1843 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1844 ECOBITS_PPGTT_CACHE64B); 1845 1846 gab_ctl = I915_READ(GAB_CTL); 1847 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1848 1849 ecochk = I915_READ(GAM_ECOCHK); 1850 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1851 1852 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1853 } 1854 1855 /* PPGTT support for Sandybdrige/Gen6 and later */ 1856 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1857 uint64_t start, 1858 uint64_t length) 1859 { 1860 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1861 gen6_pte_t *pt_vaddr, scratch_pte; 1862 unsigned first_entry = start >> PAGE_SHIFT; 1863 unsigned num_entries = length >> PAGE_SHIFT; 1864 unsigned act_pt = first_entry / GEN6_PTES; 1865 unsigned first_pte = first_entry % GEN6_PTES; 1866 unsigned last_pte, i; 1867 1868 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1869 I915_CACHE_LLC, 0); 1870 1871 while (num_entries) { 1872 last_pte = first_pte + num_entries; 1873 if (last_pte > GEN6_PTES) 1874 last_pte = GEN6_PTES; 1875 1876 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1877 1878 for (i = first_pte; i < last_pte; i++) 1879 pt_vaddr[i] = scratch_pte; 1880 1881 kunmap_px(ppgtt, pt_vaddr); 1882 1883 num_entries -= last_pte - first_pte; 1884 first_pte = 0; 1885 act_pt++; 1886 } 1887 } 1888 1889 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1890 struct sg_table *pages, 1891 uint64_t start, 1892 enum i915_cache_level cache_level, u32 flags) 1893 { 1894 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1895 unsigned first_entry = start >> PAGE_SHIFT; 1896 unsigned act_pt = first_entry / GEN6_PTES; 1897 unsigned act_pte = first_entry % GEN6_PTES; 1898 gen6_pte_t *pt_vaddr = NULL; 1899 struct sgt_iter sgt_iter; 1900 dma_addr_t addr; 1901 1902 for_each_sgt_dma(addr, sgt_iter, pages) { 1903 if (pt_vaddr == NULL) 1904 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1905 1906 pt_vaddr[act_pte] = 1907 vm->pte_encode(addr, cache_level, flags); 1908 1909 if (++act_pte == GEN6_PTES) { 1910 kunmap_px(ppgtt, pt_vaddr); 1911 pt_vaddr = NULL; 1912 act_pt++; 1913 act_pte = 0; 1914 } 1915 } 1916 1917 if (pt_vaddr) 1918 kunmap_px(ppgtt, pt_vaddr); 1919 } 1920 1921 static int gen6_alloc_va_range(struct i915_address_space *vm, 1922 uint64_t start_in, uint64_t length_in) 1923 { 1924 DECLARE_BITMAP(new_page_tables, I915_PDES); 1925 struct drm_i915_private *dev_priv = vm->i915; 1926 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1927 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1928 struct i915_page_table *pt; 1929 uint32_t start, length, start_save, length_save; 1930 uint32_t pde; 1931 int ret; 1932 1933 start = start_save = start_in; 1934 length = length_save = length_in; 1935 1936 bitmap_zero(new_page_tables, I915_PDES); 1937 1938 /* The allocation is done in two stages so that we can bail out with 1939 * minimal amount of pain. The first stage finds new page tables that 1940 * need allocation. The second stage marks use ptes within the page 1941 * tables. 1942 */ 1943 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1944 if (pt != vm->scratch_pt) { 1945 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1946 continue; 1947 } 1948 1949 /* We've already allocated a page table */ 1950 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1951 1952 pt = alloc_pt(dev_priv); 1953 if (IS_ERR(pt)) { 1954 ret = PTR_ERR(pt); 1955 goto unwind_out; 1956 } 1957 1958 gen6_initialize_pt(vm, pt); 1959 1960 ppgtt->pd.page_table[pde] = pt; 1961 __set_bit(pde, new_page_tables); 1962 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1963 } 1964 1965 start = start_save; 1966 length = length_save; 1967 1968 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1969 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1970 1971 bitmap_zero(tmp_bitmap, GEN6_PTES); 1972 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1973 gen6_pte_count(start, length)); 1974 1975 if (__test_and_clear_bit(pde, new_page_tables)) 1976 gen6_write_pde(&ppgtt->pd, pde, pt); 1977 1978 trace_i915_page_table_entry_map(vm, pde, pt, 1979 gen6_pte_index(start), 1980 gen6_pte_count(start, length), 1981 GEN6_PTES); 1982 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1983 GEN6_PTES); 1984 } 1985 1986 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1987 1988 /* Make sure write is complete before other code can use this page 1989 * table. Also require for WC mapped PTEs */ 1990 readl(ggtt->gsm); 1991 1992 mark_tlbs_dirty(ppgtt); 1993 return 0; 1994 1995 unwind_out: 1996 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1997 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1998 1999 ppgtt->pd.page_table[pde] = vm->scratch_pt; 2000 free_pt(dev_priv, pt); 2001 } 2002 2003 mark_tlbs_dirty(ppgtt); 2004 return ret; 2005 } 2006 2007 static int gen6_init_scratch(struct i915_address_space *vm) 2008 { 2009 struct drm_i915_private *dev_priv = vm->i915; 2010 int ret; 2011 2012 ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); 2013 if (ret) 2014 return ret; 2015 2016 vm->scratch_pt = alloc_pt(dev_priv); 2017 if (IS_ERR(vm->scratch_pt)) { 2018 cleanup_scratch_page(dev_priv, &vm->scratch_page); 2019 return PTR_ERR(vm->scratch_pt); 2020 } 2021 2022 gen6_initialize_pt(vm, vm->scratch_pt); 2023 2024 return 0; 2025 } 2026 2027 static void gen6_free_scratch(struct i915_address_space *vm) 2028 { 2029 struct drm_i915_private *dev_priv = vm->i915; 2030 2031 free_pt(dev_priv, vm->scratch_pt); 2032 cleanup_scratch_page(dev_priv, &vm->scratch_page); 2033 } 2034 2035 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 2036 { 2037 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 2038 struct i915_page_directory *pd = &ppgtt->pd; 2039 struct drm_i915_private *dev_priv = vm->i915; 2040 struct i915_page_table *pt; 2041 uint32_t pde; 2042 2043 drm_mm_remove_node(&ppgtt->node); 2044 2045 gen6_for_all_pdes(pt, pd, pde) 2046 if (pt != vm->scratch_pt) 2047 free_pt(dev_priv, pt); 2048 2049 gen6_free_scratch(vm); 2050 } 2051 2052 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 2053 { 2054 struct i915_address_space *vm = &ppgtt->base; 2055 struct drm_i915_private *dev_priv = ppgtt->base.i915; 2056 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2057 int ret; 2058 2059 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2060 * allocator works in address space sizes, so it's multiplied by page 2061 * size. We allocate at the top of the GTT to avoid fragmentation. 2062 */ 2063 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2064 2065 ret = gen6_init_scratch(vm); 2066 if (ret) 2067 return ret; 2068 2069 ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node, 2070 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2071 I915_COLOR_UNEVICTABLE, 2072 0, ggtt->base.total, 2073 PIN_HIGH); 2074 if (ret) 2075 goto err_out; 2076 2077 if (ppgtt->node.start < ggtt->mappable_end) 2078 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2079 2080 return 0; 2081 2082 err_out: 2083 gen6_free_scratch(vm); 2084 return ret; 2085 } 2086 2087 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2088 { 2089 return gen6_ppgtt_allocate_page_directories(ppgtt); 2090 } 2091 2092 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2093 uint64_t start, uint64_t length) 2094 { 2095 struct i915_page_table *unused; 2096 uint32_t pde; 2097 2098 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2099 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2100 } 2101 2102 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2103 { 2104 struct drm_i915_private *dev_priv = ppgtt->base.i915; 2105 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2106 int ret; 2107 2108 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2109 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 2110 ppgtt->switch_mm = gen6_mm_switch; 2111 else if (IS_HASWELL(dev_priv)) 2112 ppgtt->switch_mm = hsw_mm_switch; 2113 else if (IS_GEN7(dev_priv)) 2114 ppgtt->switch_mm = gen7_mm_switch; 2115 else 2116 BUG(); 2117 2118 ret = gen6_ppgtt_alloc(ppgtt); 2119 if (ret) 2120 return ret; 2121 2122 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2123 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2124 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2125 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2126 ppgtt->base.bind_vma = ppgtt_bind_vma; 2127 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2128 ppgtt->base.start = 0; 2129 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2130 ppgtt->debug_dump = gen6_dump_ppgtt; 2131 2132 ppgtt->pd.base.ggtt_offset = 2133 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2134 2135 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2136 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2137 2138 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2139 2140 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2141 2142 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2143 ppgtt->node.size >> 20, 2144 ppgtt->node.start / PAGE_SIZE); 2145 2146 DRM_DEBUG("Adding PPGTT at offset %x\n", 2147 ppgtt->pd.base.ggtt_offset << 10); 2148 2149 return 0; 2150 } 2151 2152 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2153 struct drm_i915_private *dev_priv) 2154 { 2155 ppgtt->base.i915 = dev_priv; 2156 2157 if (INTEL_INFO(dev_priv)->gen < 8) 2158 return gen6_ppgtt_init(ppgtt); 2159 else 2160 return gen8_ppgtt_init(ppgtt); 2161 } 2162 2163 static void i915_address_space_init(struct i915_address_space *vm, 2164 struct drm_i915_private *dev_priv, 2165 const char *name) 2166 { 2167 i915_gem_timeline_init(dev_priv, &vm->timeline, name); 2168 drm_mm_init(&vm->mm, vm->start, vm->total); 2169 INIT_LIST_HEAD(&vm->active_list); 2170 INIT_LIST_HEAD(&vm->inactive_list); 2171 INIT_LIST_HEAD(&vm->unbound_list); 2172 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2173 } 2174 2175 static void i915_address_space_fini(struct i915_address_space *vm) 2176 { 2177 i915_gem_timeline_fini(&vm->timeline); 2178 drm_mm_takedown(&vm->mm); 2179 list_del(&vm->global_link); 2180 } 2181 2182 static void gtt_write_workarounds(struct drm_i915_private *dev_priv) 2183 { 2184 /* This function is for gtt related workarounds. This function is 2185 * called on driver load and after a GPU reset, so you can place 2186 * workarounds here even if they get overwritten by GPU reset. 2187 */ 2188 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2189 if (IS_BROADWELL(dev_priv)) 2190 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2191 else if (IS_CHERRYVIEW(dev_priv)) 2192 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2193 else if (IS_SKYLAKE(dev_priv)) 2194 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2195 else if (IS_BROXTON(dev_priv)) 2196 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2197 } 2198 2199 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2200 struct drm_i915_private *dev_priv, 2201 struct drm_i915_file_private *file_priv, 2202 const char *name) 2203 { 2204 int ret; 2205 2206 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2207 if (ret == 0) { 2208 kref_init(&ppgtt->ref); 2209 i915_address_space_init(&ppgtt->base, dev_priv, name); 2210 ppgtt->base.file = file_priv; 2211 } 2212 2213 return ret; 2214 } 2215 2216 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) 2217 { 2218 gtt_write_workarounds(dev_priv); 2219 2220 /* In the case of execlists, PPGTT is enabled by the context descriptor 2221 * and the PDPs are contained within the context itself. We don't 2222 * need to do anything here. */ 2223 if (i915.enable_execlists) 2224 return 0; 2225 2226 if (!USES_PPGTT(dev_priv)) 2227 return 0; 2228 2229 if (IS_GEN6(dev_priv)) 2230 gen6_ppgtt_enable(dev_priv); 2231 else if (IS_GEN7(dev_priv)) 2232 gen7_ppgtt_enable(dev_priv); 2233 else if (INTEL_GEN(dev_priv) >= 8) 2234 gen8_ppgtt_enable(dev_priv); 2235 else 2236 MISSING_CASE(INTEL_GEN(dev_priv)); 2237 2238 return 0; 2239 } 2240 2241 struct i915_hw_ppgtt * 2242 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2243 struct drm_i915_file_private *fpriv, 2244 const char *name) 2245 { 2246 struct i915_hw_ppgtt *ppgtt; 2247 int ret; 2248 2249 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2250 if (!ppgtt) 2251 return ERR_PTR(-ENOMEM); 2252 2253 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); 2254 if (ret) { 2255 kfree(ppgtt); 2256 return ERR_PTR(ret); 2257 } 2258 2259 trace_i915_ppgtt_create(&ppgtt->base); 2260 2261 return ppgtt; 2262 } 2263 2264 void i915_ppgtt_close(struct i915_address_space *vm) 2265 { 2266 struct list_head *phases[] = { 2267 &vm->active_list, 2268 &vm->inactive_list, 2269 &vm->unbound_list, 2270 NULL, 2271 }, **phase; 2272 2273 GEM_BUG_ON(vm->closed); 2274 vm->closed = true; 2275 2276 for (phase = phases; *phase; phase++) { 2277 struct i915_vma *vma, *vn; 2278 2279 list_for_each_entry_safe(vma, vn, *phase, vm_link) 2280 if (!i915_vma_is_closed(vma)) 2281 i915_vma_close(vma); 2282 } 2283 } 2284 2285 void i915_ppgtt_release(struct kref *kref) 2286 { 2287 struct i915_hw_ppgtt *ppgtt = 2288 container_of(kref, struct i915_hw_ppgtt, ref); 2289 2290 trace_i915_ppgtt_release(&ppgtt->base); 2291 2292 /* vmas should already be unbound and destroyed */ 2293 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2294 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2295 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2296 2297 i915_address_space_fini(&ppgtt->base); 2298 2299 ppgtt->base.cleanup(&ppgtt->base); 2300 kfree(ppgtt); 2301 } 2302 2303 /* Certain Gen5 chipsets require require idling the GPU before 2304 * unmapping anything from the GTT when VT-d is enabled. 2305 */ 2306 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2307 { 2308 #ifdef CONFIG_INTEL_IOMMU 2309 /* Query intel_iommu to see if we need the workaround. Presumably that 2310 * was loaded first. 2311 */ 2312 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2313 return true; 2314 #endif 2315 return false; 2316 } 2317 2318 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2319 { 2320 struct intel_engine_cs *engine; 2321 enum intel_engine_id id; 2322 2323 if (INTEL_INFO(dev_priv)->gen < 6) 2324 return; 2325 2326 for_each_engine(engine, dev_priv, id) { 2327 u32 fault_reg; 2328 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2329 if (fault_reg & RING_FAULT_VALID) { 2330 DRM_DEBUG_DRIVER("Unexpected fault\n" 2331 "\tAddr: 0x%08lx\n" 2332 "\tAddress space: %s\n" 2333 "\tSource ID: %d\n" 2334 "\tType: %d\n", 2335 fault_reg & PAGE_MASK, 2336 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2337 RING_FAULT_SRCID(fault_reg), 2338 RING_FAULT_FAULT_TYPE(fault_reg)); 2339 I915_WRITE(RING_FAULT_REG(engine), 2340 fault_reg & ~RING_FAULT_VALID); 2341 } 2342 } 2343 2344 /* Engine specific init may not have been done till this point. */ 2345 if (dev_priv->engine[RCS]) 2346 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2347 } 2348 2349 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) 2350 { 2351 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2352 2353 /* Don't bother messing with faults pre GEN6 as we have little 2354 * documentation supporting that it's a good idea. 2355 */ 2356 if (INTEL_GEN(dev_priv) < 6) 2357 return; 2358 2359 i915_check_and_clear_faults(dev_priv); 2360 2361 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 2362 2363 i915_ggtt_invalidate(dev_priv); 2364 } 2365 2366 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, 2367 struct sg_table *pages) 2368 { 2369 do { 2370 if (dma_map_sg(&obj->base.dev->pdev->dev, 2371 pages->sgl, pages->nents, 2372 PCI_DMA_BIDIRECTIONAL)) 2373 return 0; 2374 2375 /* If the DMA remap fails, one cause can be that we have 2376 * too many objects pinned in a small remapping table, 2377 * such as swiotlb. Incrementally purge all other objects and 2378 * try again - if there are no more pages to remove from 2379 * the DMA remapper, i915_gem_shrink will return 0. 2380 */ 2381 GEM_BUG_ON(obj->mm.pages == pages); 2382 } while (i915_gem_shrink(to_i915(obj->base.dev), 2383 obj->base.size >> PAGE_SHIFT, 2384 I915_SHRINK_BOUND | 2385 I915_SHRINK_UNBOUND | 2386 I915_SHRINK_ACTIVE)); 2387 2388 return -ENOSPC; 2389 } 2390 2391 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2392 { 2393 writeq(pte, addr); 2394 } 2395 2396 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2397 dma_addr_t addr, 2398 uint64_t offset, 2399 enum i915_cache_level level, 2400 u32 unused) 2401 { 2402 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2403 gen8_pte_t __iomem *pte = 2404 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); 2405 2406 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2407 2408 ggtt->invalidate(vm->i915); 2409 } 2410 2411 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2412 struct sg_table *st, 2413 uint64_t start, 2414 enum i915_cache_level level, u32 unused) 2415 { 2416 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2417 struct sgt_iter sgt_iter; 2418 gen8_pte_t __iomem *gtt_entries; 2419 gen8_pte_t gtt_entry; 2420 dma_addr_t addr; 2421 int i = 0; 2422 2423 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2424 2425 for_each_sgt_dma(addr, sgt_iter, st) { 2426 gtt_entry = gen8_pte_encode(addr, level); 2427 gen8_set_pte(>t_entries[i++], gtt_entry); 2428 } 2429 2430 /* 2431 * XXX: This serves as a posting read to make sure that the PTE has 2432 * actually been updated. There is some concern that even though 2433 * registers and PTEs are within the same BAR that they are potentially 2434 * of NUMA access patterns. Therefore, even with the way we assume 2435 * hardware should work, we must keep this posting read for paranoia. 2436 */ 2437 if (i != 0) 2438 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2439 2440 /* This next bit makes the above posting read even more important. We 2441 * want to flush the TLBs only after we're certain all the PTE updates 2442 * have finished. 2443 */ 2444 ggtt->invalidate(vm->i915); 2445 } 2446 2447 struct insert_entries { 2448 struct i915_address_space *vm; 2449 struct sg_table *st; 2450 uint64_t start; 2451 enum i915_cache_level level; 2452 u32 flags; 2453 }; 2454 2455 static int gen8_ggtt_insert_entries__cb(void *_arg) 2456 { 2457 struct insert_entries *arg = _arg; 2458 gen8_ggtt_insert_entries(arg->vm, arg->st, 2459 arg->start, arg->level, arg->flags); 2460 return 0; 2461 } 2462 2463 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2464 struct sg_table *st, 2465 uint64_t start, 2466 enum i915_cache_level level, 2467 u32 flags) 2468 { 2469 struct insert_entries arg = { vm, st, start, level, flags }; 2470 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2471 } 2472 2473 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2474 dma_addr_t addr, 2475 uint64_t offset, 2476 enum i915_cache_level level, 2477 u32 flags) 2478 { 2479 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2480 gen6_pte_t __iomem *pte = 2481 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); 2482 2483 iowrite32(vm->pte_encode(addr, level, flags), pte); 2484 2485 ggtt->invalidate(vm->i915); 2486 } 2487 2488 /* 2489 * Binds an object into the global gtt with the specified cache level. The object 2490 * will be accessible to the GPU via commands whose operands reference offsets 2491 * within the global GTT as well as accessible by the GPU through the GMADR 2492 * mapped BAR (dev_priv->mm.gtt->gtt). 2493 */ 2494 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2495 struct sg_table *st, 2496 uint64_t start, 2497 enum i915_cache_level level, u32 flags) 2498 { 2499 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2500 struct sgt_iter sgt_iter; 2501 gen6_pte_t __iomem *gtt_entries; 2502 gen6_pte_t gtt_entry; 2503 dma_addr_t addr; 2504 int i = 0; 2505 2506 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2507 2508 for_each_sgt_dma(addr, sgt_iter, st) { 2509 gtt_entry = vm->pte_encode(addr, level, flags); 2510 iowrite32(gtt_entry, >t_entries[i++]); 2511 } 2512 2513 /* XXX: This serves as a posting read to make sure that the PTE has 2514 * actually been updated. There is some concern that even though 2515 * registers and PTEs are within the same BAR that they are potentially 2516 * of NUMA access patterns. Therefore, even with the way we assume 2517 * hardware should work, we must keep this posting read for paranoia. 2518 */ 2519 if (i != 0) 2520 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2521 2522 /* This next bit makes the above posting read even more important. We 2523 * want to flush the TLBs only after we're certain all the PTE updates 2524 * have finished. 2525 */ 2526 ggtt->invalidate(vm->i915); 2527 } 2528 2529 static void nop_clear_range(struct i915_address_space *vm, 2530 uint64_t start, uint64_t length) 2531 { 2532 } 2533 2534 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2535 uint64_t start, uint64_t length) 2536 { 2537 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2538 unsigned first_entry = start >> PAGE_SHIFT; 2539 unsigned num_entries = length >> PAGE_SHIFT; 2540 gen8_pte_t scratch_pte, __iomem *gtt_base = 2541 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2542 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2543 int i; 2544 2545 if (WARN(num_entries > max_entries, 2546 "First entry = %d; Num entries = %d (max=%d)\n", 2547 first_entry, num_entries, max_entries)) 2548 num_entries = max_entries; 2549 2550 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 2551 I915_CACHE_LLC); 2552 for (i = 0; i < num_entries; i++) 2553 gen8_set_pte(>t_base[i], scratch_pte); 2554 readl(gtt_base); 2555 } 2556 2557 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2558 uint64_t start, 2559 uint64_t length) 2560 { 2561 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2562 unsigned first_entry = start >> PAGE_SHIFT; 2563 unsigned num_entries = length >> PAGE_SHIFT; 2564 gen6_pte_t scratch_pte, __iomem *gtt_base = 2565 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2566 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2567 int i; 2568 2569 if (WARN(num_entries > max_entries, 2570 "First entry = %d; Num entries = %d (max=%d)\n", 2571 first_entry, num_entries, max_entries)) 2572 num_entries = max_entries; 2573 2574 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2575 I915_CACHE_LLC, 0); 2576 2577 for (i = 0; i < num_entries; i++) 2578 iowrite32(scratch_pte, >t_base[i]); 2579 readl(gtt_base); 2580 } 2581 2582 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2583 dma_addr_t addr, 2584 uint64_t offset, 2585 enum i915_cache_level cache_level, 2586 u32 unused) 2587 { 2588 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2589 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2590 2591 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2592 } 2593 2594 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2595 struct sg_table *pages, 2596 uint64_t start, 2597 enum i915_cache_level cache_level, u32 unused) 2598 { 2599 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2600 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2601 2602 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2603 2604 } 2605 2606 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2607 uint64_t start, 2608 uint64_t length) 2609 { 2610 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 2611 } 2612 2613 static int ggtt_bind_vma(struct i915_vma *vma, 2614 enum i915_cache_level cache_level, 2615 u32 flags) 2616 { 2617 struct drm_i915_private *i915 = vma->vm->i915; 2618 struct drm_i915_gem_object *obj = vma->obj; 2619 u32 pte_flags = 0; 2620 int ret; 2621 2622 ret = i915_get_ggtt_vma_pages(vma); 2623 if (ret) 2624 return ret; 2625 2626 /* Currently applicable only to VLV */ 2627 if (obj->gt_ro) 2628 pte_flags |= PTE_READ_ONLY; 2629 2630 intel_runtime_pm_get(i915); 2631 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2632 cache_level, pte_flags); 2633 intel_runtime_pm_put(i915); 2634 2635 /* 2636 * Without aliasing PPGTT there's no difference between 2637 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2638 * upgrade to both bound if we bind either to avoid double-binding. 2639 */ 2640 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2641 2642 return 0; 2643 } 2644 2645 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2646 enum i915_cache_level cache_level, 2647 u32 flags) 2648 { 2649 struct drm_i915_private *i915 = vma->vm->i915; 2650 u32 pte_flags; 2651 int ret; 2652 2653 ret = i915_get_ggtt_vma_pages(vma); 2654 if (ret) 2655 return ret; 2656 2657 /* Currently applicable only to VLV */ 2658 pte_flags = 0; 2659 if (vma->obj->gt_ro) 2660 pte_flags |= PTE_READ_ONLY; 2661 2662 2663 if (flags & I915_VMA_GLOBAL_BIND) { 2664 intel_runtime_pm_get(i915); 2665 vma->vm->insert_entries(vma->vm, 2666 vma->pages, vma->node.start, 2667 cache_level, pte_flags); 2668 intel_runtime_pm_put(i915); 2669 } 2670 2671 if (flags & I915_VMA_LOCAL_BIND) { 2672 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2673 appgtt->base.insert_entries(&appgtt->base, 2674 vma->pages, vma->node.start, 2675 cache_level, pte_flags); 2676 } 2677 2678 return 0; 2679 } 2680 2681 static void ggtt_unbind_vma(struct i915_vma *vma) 2682 { 2683 struct drm_i915_private *i915 = vma->vm->i915; 2684 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2685 const u64 size = min(vma->size, vma->node.size); 2686 2687 if (vma->flags & I915_VMA_GLOBAL_BIND) { 2688 intel_runtime_pm_get(i915); 2689 vma->vm->clear_range(vma->vm, 2690 vma->node.start, size); 2691 intel_runtime_pm_put(i915); 2692 } 2693 2694 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2695 appgtt->base.clear_range(&appgtt->base, 2696 vma->node.start, size); 2697 } 2698 2699 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, 2700 struct sg_table *pages) 2701 { 2702 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2703 struct device *kdev = &dev_priv->drm.pdev->dev; 2704 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2705 2706 if (unlikely(ggtt->do_idle_maps)) { 2707 if (i915_gem_wait_for_idle(dev_priv, 0)) { 2708 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2709 /* Wait a bit, in hopes it avoids the hang */ 2710 udelay(10); 2711 } 2712 } 2713 2714 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2715 } 2716 2717 static void i915_gtt_color_adjust(const struct drm_mm_node *node, 2718 unsigned long color, 2719 u64 *start, 2720 u64 *end) 2721 { 2722 if (node->color != color) 2723 *start += I915_GTT_PAGE_SIZE; 2724 2725 node = list_next_entry(node, node_list); 2726 if (node->allocated && node->color != color) 2727 *end -= I915_GTT_PAGE_SIZE; 2728 } 2729 2730 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2731 { 2732 /* Let GEM Manage all of the aperture. 2733 * 2734 * However, leave one page at the end still bound to the scratch page. 2735 * There are a number of places where the hardware apparently prefetches 2736 * past the end of the object, and we've seen multiple hangs with the 2737 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2738 * aperture. One page should be enough to keep any prefetching inside 2739 * of the aperture. 2740 */ 2741 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2742 unsigned long hole_start, hole_end; 2743 struct i915_hw_ppgtt *ppgtt; 2744 struct drm_mm_node *entry; 2745 int ret; 2746 2747 ret = intel_vgt_balloon(dev_priv); 2748 if (ret) 2749 return ret; 2750 2751 /* Reserve a mappable slot for our lockless error capture */ 2752 ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture, 2753 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 2754 0, ggtt->mappable_end, 2755 DRM_MM_INSERT_LOW); 2756 if (ret) 2757 return ret; 2758 2759 /* Clear any non-preallocated blocks */ 2760 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2761 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2762 hole_start, hole_end); 2763 ggtt->base.clear_range(&ggtt->base, hole_start, 2764 hole_end - hole_start); 2765 } 2766 2767 /* And finally clear the reserved guard page */ 2768 ggtt->base.clear_range(&ggtt->base, 2769 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2770 2771 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2772 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2773 if (!ppgtt) { 2774 ret = -ENOMEM; 2775 goto err; 2776 } 2777 2778 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2779 if (ret) 2780 goto err_ppgtt; 2781 2782 if (ppgtt->base.allocate_va_range) { 2783 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2784 ppgtt->base.total); 2785 if (ret) 2786 goto err_ppgtt_cleanup; 2787 } 2788 2789 ppgtt->base.clear_range(&ppgtt->base, 2790 ppgtt->base.start, 2791 ppgtt->base.total); 2792 2793 dev_priv->mm.aliasing_ppgtt = ppgtt; 2794 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2795 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2796 } 2797 2798 return 0; 2799 2800 err_ppgtt_cleanup: 2801 ppgtt->base.cleanup(&ppgtt->base); 2802 err_ppgtt: 2803 kfree(ppgtt); 2804 err: 2805 drm_mm_remove_node(&ggtt->error_capture); 2806 return ret; 2807 } 2808 2809 /** 2810 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2811 * @dev_priv: i915 device 2812 */ 2813 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2814 { 2815 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2816 2817 if (dev_priv->mm.aliasing_ppgtt) { 2818 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2819 ppgtt->base.cleanup(&ppgtt->base); 2820 kfree(ppgtt); 2821 } 2822 2823 i915_gem_cleanup_stolen(&dev_priv->drm); 2824 2825 if (drm_mm_node_allocated(&ggtt->error_capture)) 2826 drm_mm_remove_node(&ggtt->error_capture); 2827 2828 if (drm_mm_initialized(&ggtt->base.mm)) { 2829 intel_vgt_deballoon(dev_priv); 2830 2831 mutex_lock(&dev_priv->drm.struct_mutex); 2832 i915_address_space_fini(&ggtt->base); 2833 mutex_unlock(&dev_priv->drm.struct_mutex); 2834 } 2835 2836 ggtt->base.cleanup(&ggtt->base); 2837 2838 arch_phys_wc_del(ggtt->mtrr); 2839 io_mapping_fini(&ggtt->mappable); 2840 } 2841 2842 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2843 { 2844 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2845 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2846 return snb_gmch_ctl << 20; 2847 } 2848 2849 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2850 { 2851 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2852 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2853 if (bdw_gmch_ctl) 2854 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2855 2856 #ifdef CONFIG_X86_32 2857 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2858 if (bdw_gmch_ctl > 4) 2859 bdw_gmch_ctl = 4; 2860 #endif 2861 2862 return bdw_gmch_ctl << 20; 2863 } 2864 2865 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2866 { 2867 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2868 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2869 2870 if (gmch_ctrl) 2871 return 1 << (20 + gmch_ctrl); 2872 2873 return 0; 2874 } 2875 2876 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2877 { 2878 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2879 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2880 return snb_gmch_ctl << 25; /* 32 MB units */ 2881 } 2882 2883 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2884 { 2885 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2886 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2887 return bdw_gmch_ctl << 25; /* 32 MB units */ 2888 } 2889 2890 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2891 { 2892 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2893 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2894 2895 /* 2896 * 0x0 to 0x10: 32MB increments starting at 0MB 2897 * 0x11 to 0x16: 4MB increments starting at 8MB 2898 * 0x17 to 0x1d: 4MB increments start at 36MB 2899 */ 2900 if (gmch_ctrl < 0x11) 2901 return gmch_ctrl << 25; 2902 else if (gmch_ctrl < 0x17) 2903 return (gmch_ctrl - 0x11 + 2) << 22; 2904 else 2905 return (gmch_ctrl - 0x17 + 9) << 22; 2906 } 2907 2908 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2909 { 2910 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2911 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2912 2913 if (gen9_gmch_ctl < 0xf0) 2914 return gen9_gmch_ctl << 25; /* 32 MB units */ 2915 else 2916 /* 4MB increments starting at 0xf0 for 4MB */ 2917 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2918 } 2919 2920 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2921 { 2922 struct drm_i915_private *dev_priv = ggtt->base.i915; 2923 struct pci_dev *pdev = dev_priv->drm.pdev; 2924 phys_addr_t phys_addr; 2925 int ret; 2926 2927 /* For Modern GENs the PTEs and register space are split in the BAR */ 2928 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2929 2930 /* 2931 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2932 * dropped. For WC mappings in general we have 64 byte burst writes 2933 * when the WC buffer is flushed, so we can't use it, but have to 2934 * resort to an uncached mapping. The WC issue is easily caught by the 2935 * readback check when writing GTT PTE entries. 2936 */ 2937 if (IS_GEN9_LP(dev_priv)) 2938 ggtt->gsm = ioremap_nocache(phys_addr, size); 2939 else 2940 ggtt->gsm = ioremap_wc(phys_addr, size); 2941 if (!ggtt->gsm) { 2942 DRM_ERROR("Failed to map the ggtt page table\n"); 2943 return -ENOMEM; 2944 } 2945 2946 ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); 2947 if (ret) { 2948 DRM_ERROR("Scratch setup failed\n"); 2949 /* iounmap will also get called at remove, but meh */ 2950 iounmap(ggtt->gsm); 2951 return ret; 2952 } 2953 2954 return 0; 2955 } 2956 2957 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2958 * bits. When using advanced contexts each context stores its own PAT, but 2959 * writing this data shouldn't be harmful even in those cases. */ 2960 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2961 { 2962 uint64_t pat; 2963 2964 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2965 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2966 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2967 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2968 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2969 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2970 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2971 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2972 2973 if (!USES_PPGTT(dev_priv)) 2974 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2975 * so RTL will always use the value corresponding to 2976 * pat_sel = 000". 2977 * So let's disable cache for GGTT to avoid screen corruptions. 2978 * MOCS still can be used though. 2979 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2980 * before this patch, i.e. the same uncached + snooping access 2981 * like on gen6/7 seems to be in effect. 2982 * - So this just fixes blitter/render access. Again it looks 2983 * like it's not just uncached access, but uncached + snooping. 2984 * So we can still hold onto all our assumptions wrt cpu 2985 * clflushing on LLC machines. 2986 */ 2987 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2988 2989 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2990 * write would work. */ 2991 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2992 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2993 } 2994 2995 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2996 { 2997 uint64_t pat; 2998 2999 /* 3000 * Map WB on BDW to snooped on CHV. 3001 * 3002 * Only the snoop bit has meaning for CHV, the rest is 3003 * ignored. 3004 * 3005 * The hardware will never snoop for certain types of accesses: 3006 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3007 * - PPGTT page tables 3008 * - some other special cycles 3009 * 3010 * As with BDW, we also need to consider the following for GT accesses: 3011 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3012 * so RTL will always use the value corresponding to 3013 * pat_sel = 000". 3014 * Which means we must set the snoop bit in PAT entry 0 3015 * in order to keep the global status page working. 3016 */ 3017 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3018 GEN8_PPAT(1, 0) | 3019 GEN8_PPAT(2, 0) | 3020 GEN8_PPAT(3, 0) | 3021 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3022 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3023 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3024 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3025 3026 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3027 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3028 } 3029 3030 static void gen6_gmch_remove(struct i915_address_space *vm) 3031 { 3032 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3033 3034 iounmap(ggtt->gsm); 3035 cleanup_scratch_page(vm->i915, &vm->scratch_page); 3036 } 3037 3038 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3039 { 3040 struct drm_i915_private *dev_priv = ggtt->base.i915; 3041 struct pci_dev *pdev = dev_priv->drm.pdev; 3042 unsigned int size; 3043 u16 snb_gmch_ctl; 3044 3045 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3046 ggtt->mappable_base = pci_resource_start(pdev, 2); 3047 ggtt->mappable_end = pci_resource_len(pdev, 2); 3048 3049 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3050 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3051 3052 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3053 3054 if (INTEL_GEN(dev_priv) >= 9) { 3055 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3056 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3057 } else if (IS_CHERRYVIEW(dev_priv)) { 3058 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3059 size = chv_get_total_gtt_size(snb_gmch_ctl); 3060 } else { 3061 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3062 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3063 } 3064 3065 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3066 3067 if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) 3068 chv_setup_private_ppat(dev_priv); 3069 else 3070 bdw_setup_private_ppat(dev_priv); 3071 3072 ggtt->base.cleanup = gen6_gmch_remove; 3073 ggtt->base.bind_vma = ggtt_bind_vma; 3074 ggtt->base.unbind_vma = ggtt_unbind_vma; 3075 ggtt->base.insert_page = gen8_ggtt_insert_page; 3076 ggtt->base.clear_range = nop_clear_range; 3077 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3078 ggtt->base.clear_range = gen8_ggtt_clear_range; 3079 3080 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3081 if (IS_CHERRYVIEW(dev_priv)) 3082 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3083 3084 ggtt->invalidate = gen6_ggtt_invalidate; 3085 3086 return ggtt_probe_common(ggtt, size); 3087 } 3088 3089 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3090 { 3091 struct drm_i915_private *dev_priv = ggtt->base.i915; 3092 struct pci_dev *pdev = dev_priv->drm.pdev; 3093 unsigned int size; 3094 u16 snb_gmch_ctl; 3095 3096 ggtt->mappable_base = pci_resource_start(pdev, 2); 3097 ggtt->mappable_end = pci_resource_len(pdev, 2); 3098 3099 /* 64/512MB is the current min/max we actually know of, but this is just 3100 * a coarse sanity check. 3101 */ 3102 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3103 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3104 return -ENXIO; 3105 } 3106 3107 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3108 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3109 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3110 3111 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3112 3113 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3114 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3115 3116 ggtt->base.clear_range = gen6_ggtt_clear_range; 3117 ggtt->base.insert_page = gen6_ggtt_insert_page; 3118 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3119 ggtt->base.bind_vma = ggtt_bind_vma; 3120 ggtt->base.unbind_vma = ggtt_unbind_vma; 3121 ggtt->base.cleanup = gen6_gmch_remove; 3122 3123 ggtt->invalidate = gen6_ggtt_invalidate; 3124 3125 if (HAS_EDRAM(dev_priv)) 3126 ggtt->base.pte_encode = iris_pte_encode; 3127 else if (IS_HASWELL(dev_priv)) 3128 ggtt->base.pte_encode = hsw_pte_encode; 3129 else if (IS_VALLEYVIEW(dev_priv)) 3130 ggtt->base.pte_encode = byt_pte_encode; 3131 else if (INTEL_GEN(dev_priv) >= 7) 3132 ggtt->base.pte_encode = ivb_pte_encode; 3133 else 3134 ggtt->base.pte_encode = snb_pte_encode; 3135 3136 return ggtt_probe_common(ggtt, size); 3137 } 3138 3139 static void i915_gmch_remove(struct i915_address_space *vm) 3140 { 3141 intel_gmch_remove(); 3142 } 3143 3144 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3145 { 3146 struct drm_i915_private *dev_priv = ggtt->base.i915; 3147 int ret; 3148 3149 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3150 if (!ret) { 3151 DRM_ERROR("failed to set up gmch\n"); 3152 return -EIO; 3153 } 3154 3155 intel_gtt_get(&ggtt->base.total, 3156 &ggtt->stolen_size, 3157 &ggtt->mappable_base, 3158 &ggtt->mappable_end); 3159 3160 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3161 ggtt->base.insert_page = i915_ggtt_insert_page; 3162 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3163 ggtt->base.clear_range = i915_ggtt_clear_range; 3164 ggtt->base.bind_vma = ggtt_bind_vma; 3165 ggtt->base.unbind_vma = ggtt_unbind_vma; 3166 ggtt->base.cleanup = i915_gmch_remove; 3167 3168 ggtt->invalidate = gmch_ggtt_invalidate; 3169 3170 if (unlikely(ggtt->do_idle_maps)) 3171 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3172 3173 return 0; 3174 } 3175 3176 /** 3177 * i915_ggtt_probe_hw - Probe GGTT hardware location 3178 * @dev_priv: i915 device 3179 */ 3180 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3181 { 3182 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3183 int ret; 3184 3185 ggtt->base.i915 = dev_priv; 3186 3187 if (INTEL_GEN(dev_priv) <= 5) 3188 ret = i915_gmch_probe(ggtt); 3189 else if (INTEL_GEN(dev_priv) < 8) 3190 ret = gen6_gmch_probe(ggtt); 3191 else 3192 ret = gen8_gmch_probe(ggtt); 3193 if (ret) 3194 return ret; 3195 3196 /* Trim the GGTT to fit the GuC mappable upper range (when enabled). 3197 * This is easier than doing range restriction on the fly, as we 3198 * currently don't have any bits spare to pass in this upper 3199 * restriction! 3200 */ 3201 if (HAS_GUC(dev_priv) && i915.enable_guc_loading) { 3202 ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); 3203 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3204 } 3205 3206 if ((ggtt->base.total - 1) >> 32) { 3207 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3208 " of address space! Found %lldM!\n", 3209 ggtt->base.total >> 20); 3210 ggtt->base.total = 1ULL << 32; 3211 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3212 } 3213 3214 if (ggtt->mappable_end > ggtt->base.total) { 3215 DRM_ERROR("mappable aperture extends past end of GGTT," 3216 " aperture=%llx, total=%llx\n", 3217 ggtt->mappable_end, ggtt->base.total); 3218 ggtt->mappable_end = ggtt->base.total; 3219 } 3220 3221 /* GMADR is the PCI mmio aperture into the global GTT. */ 3222 DRM_INFO("Memory usable by graphics device = %lluM\n", 3223 ggtt->base.total >> 20); 3224 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3225 DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); 3226 #ifdef CONFIG_INTEL_IOMMU 3227 if (intel_iommu_gfx_mapped) 3228 DRM_INFO("VT-d active for gfx access\n"); 3229 #endif 3230 3231 return 0; 3232 } 3233 3234 /** 3235 * i915_ggtt_init_hw - Initialize GGTT hardware 3236 * @dev_priv: i915 device 3237 */ 3238 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3239 { 3240 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3241 int ret; 3242 3243 INIT_LIST_HEAD(&dev_priv->vm_list); 3244 3245 /* Subtract the guard page before address space initialization to 3246 * shrink the range used by drm_mm. 3247 */ 3248 mutex_lock(&dev_priv->drm.struct_mutex); 3249 ggtt->base.total -= PAGE_SIZE; 3250 i915_address_space_init(&ggtt->base, dev_priv, "[global]"); 3251 ggtt->base.total += PAGE_SIZE; 3252 if (!HAS_LLC(dev_priv)) 3253 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3254 mutex_unlock(&dev_priv->drm.struct_mutex); 3255 3256 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3257 dev_priv->ggtt.mappable_base, 3258 dev_priv->ggtt.mappable_end)) { 3259 ret = -EIO; 3260 goto out_gtt_cleanup; 3261 } 3262 3263 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3264 3265 /* 3266 * Initialise stolen early so that we may reserve preallocated 3267 * objects for the BIOS to KMS transition. 3268 */ 3269 ret = i915_gem_init_stolen(dev_priv); 3270 if (ret) 3271 goto out_gtt_cleanup; 3272 3273 return 0; 3274 3275 out_gtt_cleanup: 3276 ggtt->base.cleanup(&ggtt->base); 3277 return ret; 3278 } 3279 3280 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3281 { 3282 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3283 return -EIO; 3284 3285 return 0; 3286 } 3287 3288 void i915_ggtt_enable_guc(struct drm_i915_private *i915) 3289 { 3290 i915->ggtt.invalidate = guc_ggtt_invalidate; 3291 } 3292 3293 void i915_ggtt_disable_guc(struct drm_i915_private *i915) 3294 { 3295 i915->ggtt.invalidate = gen6_ggtt_invalidate; 3296 } 3297 3298 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) 3299 { 3300 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3301 struct drm_i915_gem_object *obj, *on; 3302 3303 i915_check_and_clear_faults(dev_priv); 3304 3305 /* First fill our portion of the GTT with scratch pages */ 3306 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 3307 3308 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3309 3310 /* clflush objects bound into the GGTT and rebind them. */ 3311 list_for_each_entry_safe(obj, on, 3312 &dev_priv->mm.bound_list, global_link) { 3313 bool ggtt_bound = false; 3314 struct i915_vma *vma; 3315 3316 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3317 if (vma->vm != &ggtt->base) 3318 continue; 3319 3320 if (!i915_vma_unbind(vma)) 3321 continue; 3322 3323 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3324 PIN_UPDATE)); 3325 ggtt_bound = true; 3326 } 3327 3328 if (ggtt_bound) 3329 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3330 } 3331 3332 ggtt->base.closed = false; 3333 3334 if (INTEL_GEN(dev_priv) >= 8) { 3335 if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) 3336 chv_setup_private_ppat(dev_priv); 3337 else 3338 bdw_setup_private_ppat(dev_priv); 3339 3340 return; 3341 } 3342 3343 if (USES_PPGTT(dev_priv)) { 3344 struct i915_address_space *vm; 3345 3346 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3347 /* TODO: Perhaps it shouldn't be gen6 specific */ 3348 3349 struct i915_hw_ppgtt *ppgtt; 3350 3351 if (i915_is_ggtt(vm)) 3352 ppgtt = dev_priv->mm.aliasing_ppgtt; 3353 else 3354 ppgtt = i915_vm_to_ppgtt(vm); 3355 3356 gen6_write_page_range(dev_priv, &ppgtt->pd, 3357 0, ppgtt->base.total); 3358 } 3359 } 3360 3361 i915_ggtt_invalidate(dev_priv); 3362 } 3363 3364 static struct scatterlist * 3365 rotate_pages(const dma_addr_t *in, unsigned int offset, 3366 unsigned int width, unsigned int height, 3367 unsigned int stride, 3368 struct sg_table *st, struct scatterlist *sg) 3369 { 3370 unsigned int column, row; 3371 unsigned int src_idx; 3372 3373 for (column = 0; column < width; column++) { 3374 src_idx = stride * (height - 1) + column; 3375 for (row = 0; row < height; row++) { 3376 st->nents++; 3377 /* We don't need the pages, but need to initialize 3378 * the entries so the sg list can be happily traversed. 3379 * The only thing we need are DMA addresses. 3380 */ 3381 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3382 sg_dma_address(sg) = in[offset + src_idx]; 3383 sg_dma_len(sg) = PAGE_SIZE; 3384 sg = sg_next(sg); 3385 src_idx -= stride; 3386 } 3387 } 3388 3389 return sg; 3390 } 3391 3392 static struct sg_table * 3393 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, 3394 struct drm_i915_gem_object *obj) 3395 { 3396 const size_t n_pages = obj->base.size / PAGE_SIZE; 3397 unsigned int size = intel_rotation_info_size(rot_info); 3398 struct sgt_iter sgt_iter; 3399 dma_addr_t dma_addr; 3400 unsigned long i; 3401 dma_addr_t *page_addr_list; 3402 struct sg_table *st; 3403 struct scatterlist *sg; 3404 int ret = -ENOMEM; 3405 3406 /* Allocate a temporary list of source pages for random access. */ 3407 page_addr_list = drm_malloc_gfp(n_pages, 3408 sizeof(dma_addr_t), 3409 GFP_TEMPORARY); 3410 if (!page_addr_list) 3411 return ERR_PTR(ret); 3412 3413 /* Allocate target SG list. */ 3414 st = kmalloc(sizeof(*st), GFP_KERNEL); 3415 if (!st) 3416 goto err_st_alloc; 3417 3418 ret = sg_alloc_table(st, size, GFP_KERNEL); 3419 if (ret) 3420 goto err_sg_alloc; 3421 3422 /* Populate source page list from the object. */ 3423 i = 0; 3424 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) 3425 page_addr_list[i++] = dma_addr; 3426 3427 GEM_BUG_ON(i != n_pages); 3428 st->nents = 0; 3429 sg = st->sgl; 3430 3431 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3432 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3433 rot_info->plane[i].width, rot_info->plane[i].height, 3434 rot_info->plane[i].stride, st, sg); 3435 } 3436 3437 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3438 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3439 3440 drm_free_large(page_addr_list); 3441 3442 return st; 3443 3444 err_sg_alloc: 3445 kfree(st); 3446 err_st_alloc: 3447 drm_free_large(page_addr_list); 3448 3449 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3450 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3451 3452 return ERR_PTR(ret); 3453 } 3454 3455 static struct sg_table * 3456 intel_partial_pages(const struct i915_ggtt_view *view, 3457 struct drm_i915_gem_object *obj) 3458 { 3459 struct sg_table *st; 3460 struct scatterlist *sg, *iter; 3461 unsigned int count = view->partial.size; 3462 unsigned int offset; 3463 int ret = -ENOMEM; 3464 3465 st = kmalloc(sizeof(*st), GFP_KERNEL); 3466 if (!st) 3467 goto err_st_alloc; 3468 3469 ret = sg_alloc_table(st, count, GFP_KERNEL); 3470 if (ret) 3471 goto err_sg_alloc; 3472 3473 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); 3474 GEM_BUG_ON(!iter); 3475 3476 sg = st->sgl; 3477 st->nents = 0; 3478 do { 3479 unsigned int len; 3480 3481 len = min(iter->length - (offset << PAGE_SHIFT), 3482 count << PAGE_SHIFT); 3483 sg_set_page(sg, NULL, len, 0); 3484 sg_dma_address(sg) = 3485 sg_dma_address(iter) + (offset << PAGE_SHIFT); 3486 sg_dma_len(sg) = len; 3487 3488 st->nents++; 3489 count -= len >> PAGE_SHIFT; 3490 if (count == 0) { 3491 sg_mark_end(sg); 3492 return st; 3493 } 3494 3495 sg = __sg_next(sg); 3496 iter = __sg_next(iter); 3497 offset = 0; 3498 } while (1); 3499 3500 err_sg_alloc: 3501 kfree(st); 3502 err_st_alloc: 3503 return ERR_PTR(ret); 3504 } 3505 3506 static int 3507 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3508 { 3509 int ret = 0; 3510 3511 /* The vma->pages are only valid within the lifespan of the borrowed 3512 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 3513 * must be the vma->pages. A simple rule is that vma->pages must only 3514 * be accessed when the obj->mm.pages are pinned. 3515 */ 3516 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 3517 3518 if (vma->pages) 3519 return 0; 3520 3521 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3522 vma->pages = vma->obj->mm.pages; 3523 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3524 vma->pages = 3525 intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, 3526 vma->obj); 3527 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3528 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3529 else 3530 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3531 vma->ggtt_view.type); 3532 3533 if (!vma->pages) { 3534 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3535 vma->ggtt_view.type); 3536 ret = -EINVAL; 3537 } else if (IS_ERR(vma->pages)) { 3538 ret = PTR_ERR(vma->pages); 3539 vma->pages = NULL; 3540 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3541 vma->ggtt_view.type, ret); 3542 } 3543 3544 return ret; 3545 } 3546 3547 /** 3548 * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) 3549 * @vm: the &struct i915_address_space 3550 * @node: the &struct drm_mm_node (typically i915_vma.mode) 3551 * @size: how much space to allocate inside the GTT, 3552 * must be #I915_GTT_PAGE_SIZE aligned 3553 * @offset: where to insert inside the GTT, 3554 * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node 3555 * (@offset + @size) must fit within the address space 3556 * @color: color to apply to node, if this node is not from a VMA, 3557 * color must be #I915_COLOR_UNEVICTABLE 3558 * @flags: control search and eviction behaviour 3559 * 3560 * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside 3561 * the address space (using @size and @color). If the @node does not fit, it 3562 * tries to evict any overlapping nodes from the GTT, including any 3563 * neighbouring nodes if the colors do not match (to ensure guard pages between 3564 * differing domains). See i915_gem_evict_for_node() for the gory details 3565 * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on 3566 * evicting active overlapping objects, and any overlapping node that is pinned 3567 * or marked as unevictable will also result in failure. 3568 * 3569 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if 3570 * asked to wait for eviction and interrupted. 3571 */ 3572 int i915_gem_gtt_reserve(struct i915_address_space *vm, 3573 struct drm_mm_node *node, 3574 u64 size, u64 offset, unsigned long color, 3575 unsigned int flags) 3576 { 3577 int err; 3578 3579 GEM_BUG_ON(!size); 3580 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 3581 GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); 3582 GEM_BUG_ON(range_overflows(offset, size, vm->total)); 3583 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); 3584 GEM_BUG_ON(drm_mm_node_allocated(node)); 3585 3586 node->size = size; 3587 node->start = offset; 3588 node->color = color; 3589 3590 err = drm_mm_reserve_node(&vm->mm, node); 3591 if (err != -ENOSPC) 3592 return err; 3593 3594 err = i915_gem_evict_for_node(vm, node, flags); 3595 if (err == 0) 3596 err = drm_mm_reserve_node(&vm->mm, node); 3597 3598 return err; 3599 } 3600 3601 static u64 random_offset(u64 start, u64 end, u64 len, u64 align) 3602 { 3603 u64 range, addr; 3604 3605 GEM_BUG_ON(range_overflows(start, len, end)); 3606 GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); 3607 3608 range = round_down(end - len, align) - round_up(start, align); 3609 if (range) { 3610 if (sizeof(unsigned long) == sizeof(u64)) { 3611 addr = get_random_long(); 3612 } else { 3613 addr = get_random_int(); 3614 if (range > U32_MAX) { 3615 addr <<= 32; 3616 addr |= get_random_int(); 3617 } 3618 } 3619 div64_u64_rem(addr, range, &addr); 3620 start += addr; 3621 } 3622 3623 return round_up(start, align); 3624 } 3625 3626 /** 3627 * i915_gem_gtt_insert - insert a node into an address_space (GTT) 3628 * @vm: the &struct i915_address_space 3629 * @node: the &struct drm_mm_node (typically i915_vma.node) 3630 * @size: how much space to allocate inside the GTT, 3631 * must be #I915_GTT_PAGE_SIZE aligned 3632 * @alignment: required alignment of starting offset, may be 0 but 3633 * if specified, this must be a power-of-two and at least 3634 * #I915_GTT_MIN_ALIGNMENT 3635 * @color: color to apply to node 3636 * @start: start of any range restriction inside GTT (0 for all), 3637 * must be #I915_GTT_PAGE_SIZE aligned 3638 * @end: end of any range restriction inside GTT (U64_MAX for all), 3639 * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX 3640 * @flags: control search and eviction behaviour 3641 * 3642 * i915_gem_gtt_insert() first searches for an available hole into which 3643 * is can insert the node. The hole address is aligned to @alignment and 3644 * its @size must then fit entirely within the [@start, @end] bounds. The 3645 * nodes on either side of the hole must match @color, or else a guard page 3646 * will be inserted between the two nodes (or the node evicted). If no 3647 * suitable hole is found, first a victim is randomly selected and tested 3648 * for eviction, otherwise then the LRU list of objects within the GTT 3649 * is scanned to find the first set of replacement nodes to create the hole. 3650 * Those old overlapping nodes are evicted from the GTT (and so must be 3651 * rebound before any future use). Any node that is currently pinned cannot 3652 * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently 3653 * active and #PIN_NONBLOCK is specified, that node is also skipped when 3654 * searching for an eviction candidate. See i915_gem_evict_something() for 3655 * the gory details on the eviction algorithm. 3656 * 3657 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if 3658 * asked to wait for eviction and interrupted. 3659 */ 3660 int i915_gem_gtt_insert(struct i915_address_space *vm, 3661 struct drm_mm_node *node, 3662 u64 size, u64 alignment, unsigned long color, 3663 u64 start, u64 end, unsigned int flags) 3664 { 3665 enum drm_mm_insert_mode mode; 3666 u64 offset; 3667 int err; 3668 3669 lockdep_assert_held(&vm->i915->drm.struct_mutex); 3670 GEM_BUG_ON(!size); 3671 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 3672 GEM_BUG_ON(alignment && !is_power_of_2(alignment)); 3673 GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); 3674 GEM_BUG_ON(start >= end); 3675 GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); 3676 GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); 3677 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); 3678 GEM_BUG_ON(drm_mm_node_allocated(node)); 3679 3680 if (unlikely(range_overflows(start, size, end))) 3681 return -ENOSPC; 3682 3683 if (unlikely(round_up(start, alignment) > round_down(end - size, alignment))) 3684 return -ENOSPC; 3685 3686 mode = DRM_MM_INSERT_BEST; 3687 if (flags & PIN_HIGH) 3688 mode = DRM_MM_INSERT_HIGH; 3689 if (flags & PIN_MAPPABLE) 3690 mode = DRM_MM_INSERT_LOW; 3691 3692 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, 3693 * so we know that we always have a minimum alignment of 4096. 3694 * The drm_mm range manager is optimised to return results 3695 * with zero alignment, so where possible use the optimal 3696 * path. 3697 */ 3698 BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE); 3699 if (alignment <= I915_GTT_MIN_ALIGNMENT) 3700 alignment = 0; 3701 3702 err = drm_mm_insert_node_in_range(&vm->mm, node, 3703 size, alignment, color, 3704 start, end, mode); 3705 if (err != -ENOSPC) 3706 return err; 3707 3708 /* No free space, pick a slot at random. 3709 * 3710 * There is a pathological case here using a GTT shared between 3711 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): 3712 * 3713 * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->| 3714 * (64k objects) (448k objects) 3715 * 3716 * Now imagine that the eviction LRU is ordered top-down (just because 3717 * pathology meets real life), and that we need to evict an object to 3718 * make room inside the aperture. The eviction scan then has to walk 3719 * the 448k list before it finds one within range. And now imagine that 3720 * it has to search for a new hole between every byte inside the memcpy, 3721 * for several simultaneous clients. 3722 * 3723 * On a full-ppgtt system, if we have run out of available space, there 3724 * will be lots and lots of objects in the eviction list! Again, 3725 * searching that LRU list may be slow if we are also applying any 3726 * range restrictions (e.g. restriction to low 4GiB) and so, for 3727 * simplicity and similarilty between different GTT, try the single 3728 * random replacement first. 3729 */ 3730 offset = random_offset(start, end, 3731 size, alignment ?: I915_GTT_MIN_ALIGNMENT); 3732 err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags); 3733 if (err != -ENOSPC) 3734 return err; 3735 3736 /* Randomly selected placement is pinned, do a search */ 3737 err = i915_gem_evict_something(vm, size, alignment, color, 3738 start, end, flags); 3739 if (err) 3740 return err; 3741 3742 return drm_mm_insert_node_in_range(&vm->mm, node, 3743 size, alignment, color, 3744 start, end, DRM_MM_INSERT_EVICT); 3745 } 3746