1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_trace.h" 31 #include "intel_drv.h" 32 33 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); 34 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); 35 36 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 37 { 38 bool has_aliasing_ppgtt; 39 bool has_full_ppgtt; 40 41 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; 42 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; 43 if (IS_GEN8(dev)) 44 has_full_ppgtt = false; /* XXX why? */ 45 46 /* 47 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 48 * execlists, the sole mechanism available to submit work. 49 */ 50 if (INTEL_INFO(dev)->gen < 9 && 51 (enable_ppgtt == 0 || !has_aliasing_ppgtt)) 52 return 0; 53 54 if (enable_ppgtt == 1) 55 return 1; 56 57 if (enable_ppgtt == 2 && has_full_ppgtt) 58 return 2; 59 60 #ifdef CONFIG_INTEL_IOMMU 61 /* Disable ppgtt on SNB if VT-d is on. */ 62 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 63 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 64 return 0; 65 } 66 #endif 67 68 /* Early VLV doesn't have this */ 69 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && 70 dev->pdev->revision < 0xb) { 71 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 72 return 0; 73 } 74 75 return has_aliasing_ppgtt ? 1 : 0; 76 } 77 78 79 static void ppgtt_bind_vma(struct i915_vma *vma, 80 enum i915_cache_level cache_level, 81 u32 flags); 82 static void ppgtt_unbind_vma(struct i915_vma *vma); 83 84 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 85 enum i915_cache_level level, 86 bool valid) 87 { 88 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 89 pte |= addr; 90 91 switch (level) { 92 case I915_CACHE_NONE: 93 pte |= PPAT_UNCACHED_INDEX; 94 break; 95 case I915_CACHE_WT: 96 pte |= PPAT_DISPLAY_ELLC_INDEX; 97 break; 98 default: 99 pte |= PPAT_CACHED_INDEX; 100 break; 101 } 102 103 return pte; 104 } 105 106 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 107 dma_addr_t addr, 108 enum i915_cache_level level) 109 { 110 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 111 pde |= addr; 112 if (level != I915_CACHE_NONE) 113 pde |= PPAT_CACHED_PDE_INDEX; 114 else 115 pde |= PPAT_UNCACHED_INDEX; 116 return pde; 117 } 118 119 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 120 enum i915_cache_level level, 121 bool valid, u32 unused) 122 { 123 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 124 pte |= GEN6_PTE_ADDR_ENCODE(addr); 125 126 switch (level) { 127 case I915_CACHE_L3_LLC: 128 case I915_CACHE_LLC: 129 pte |= GEN6_PTE_CACHE_LLC; 130 break; 131 case I915_CACHE_NONE: 132 pte |= GEN6_PTE_UNCACHED; 133 break; 134 default: 135 WARN_ON(1); 136 } 137 138 return pte; 139 } 140 141 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 142 enum i915_cache_level level, 143 bool valid, u32 unused) 144 { 145 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 146 pte |= GEN6_PTE_ADDR_ENCODE(addr); 147 148 switch (level) { 149 case I915_CACHE_L3_LLC: 150 pte |= GEN7_PTE_CACHE_L3_LLC; 151 break; 152 case I915_CACHE_LLC: 153 pte |= GEN6_PTE_CACHE_LLC; 154 break; 155 case I915_CACHE_NONE: 156 pte |= GEN6_PTE_UNCACHED; 157 break; 158 default: 159 WARN_ON(1); 160 } 161 162 return pte; 163 } 164 165 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 166 enum i915_cache_level level, 167 bool valid, u32 flags) 168 { 169 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 170 pte |= GEN6_PTE_ADDR_ENCODE(addr); 171 172 if (!(flags & PTE_READ_ONLY)) 173 pte |= BYT_PTE_WRITEABLE; 174 175 if (level != I915_CACHE_NONE) 176 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 177 178 return pte; 179 } 180 181 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 182 enum i915_cache_level level, 183 bool valid, u32 unused) 184 { 185 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 186 pte |= HSW_PTE_ADDR_ENCODE(addr); 187 188 if (level != I915_CACHE_NONE) 189 pte |= HSW_WB_LLC_AGE3; 190 191 return pte; 192 } 193 194 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 195 enum i915_cache_level level, 196 bool valid, u32 unused) 197 { 198 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 199 pte |= HSW_PTE_ADDR_ENCODE(addr); 200 201 switch (level) { 202 case I915_CACHE_NONE: 203 break; 204 case I915_CACHE_WT: 205 pte |= HSW_WT_ELLC_LLC_AGE3; 206 break; 207 default: 208 pte |= HSW_WB_ELLC_LLC_AGE3; 209 break; 210 } 211 212 return pte; 213 } 214 215 /* Broadwell Page Directory Pointer Descriptors */ 216 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, 217 uint64_t val) 218 { 219 int ret; 220 221 BUG_ON(entry >= 4); 222 223 ret = intel_ring_begin(ring, 6); 224 if (ret) 225 return ret; 226 227 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 228 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 229 intel_ring_emit(ring, (u32)(val >> 32)); 230 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 231 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 232 intel_ring_emit(ring, (u32)(val)); 233 intel_ring_advance(ring); 234 235 return 0; 236 } 237 238 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 239 struct intel_engine_cs *ring) 240 { 241 int i, ret; 242 243 /* bit of a hack to find the actual last used pd */ 244 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 245 246 for (i = used_pd - 1; i >= 0; i--) { 247 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 248 ret = gen8_write_pdp(ring, i, addr); 249 if (ret) 250 return ret; 251 } 252 253 return 0; 254 } 255 256 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 257 uint64_t start, 258 uint64_t length, 259 bool use_scratch) 260 { 261 struct i915_hw_ppgtt *ppgtt = 262 container_of(vm, struct i915_hw_ppgtt, base); 263 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 264 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 265 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 266 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 267 unsigned num_entries = length >> PAGE_SHIFT; 268 unsigned last_pte, i; 269 270 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 271 I915_CACHE_LLC, use_scratch); 272 273 while (num_entries) { 274 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 275 276 last_pte = pte + num_entries; 277 if (last_pte > GEN8_PTES_PER_PAGE) 278 last_pte = GEN8_PTES_PER_PAGE; 279 280 pt_vaddr = kmap_atomic(page_table); 281 282 for (i = pte; i < last_pte; i++) { 283 pt_vaddr[i] = scratch_pte; 284 num_entries--; 285 } 286 287 if (!HAS_LLC(ppgtt->base.dev)) 288 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 289 kunmap_atomic(pt_vaddr); 290 291 pte = 0; 292 if (++pde == GEN8_PDES_PER_PAGE) { 293 pdpe++; 294 pde = 0; 295 } 296 } 297 } 298 299 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 300 struct sg_table *pages, 301 uint64_t start, 302 enum i915_cache_level cache_level, u32 unused) 303 { 304 struct i915_hw_ppgtt *ppgtt = 305 container_of(vm, struct i915_hw_ppgtt, base); 306 gen8_gtt_pte_t *pt_vaddr; 307 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 308 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 309 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 310 struct sg_page_iter sg_iter; 311 312 pt_vaddr = NULL; 313 314 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 315 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 316 break; 317 318 if (pt_vaddr == NULL) 319 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 320 321 pt_vaddr[pte] = 322 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 323 cache_level, true); 324 if (++pte == GEN8_PTES_PER_PAGE) { 325 if (!HAS_LLC(ppgtt->base.dev)) 326 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 327 kunmap_atomic(pt_vaddr); 328 pt_vaddr = NULL; 329 if (++pde == GEN8_PDES_PER_PAGE) { 330 pdpe++; 331 pde = 0; 332 } 333 pte = 0; 334 } 335 } 336 if (pt_vaddr) { 337 if (!HAS_LLC(ppgtt->base.dev)) 338 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 339 kunmap_atomic(pt_vaddr); 340 } 341 } 342 343 static void gen8_free_page_tables(struct page **pt_pages) 344 { 345 int i; 346 347 if (pt_pages == NULL) 348 return; 349 350 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 351 if (pt_pages[i]) 352 __free_pages(pt_pages[i], 0); 353 } 354 355 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 356 { 357 int i; 358 359 for (i = 0; i < ppgtt->num_pd_pages; i++) { 360 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 361 kfree(ppgtt->gen8_pt_pages[i]); 362 kfree(ppgtt->gen8_pt_dma_addr[i]); 363 } 364 365 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 366 } 367 368 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 369 { 370 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 371 int i, j; 372 373 for (i = 0; i < ppgtt->num_pd_pages; i++) { 374 /* TODO: In the future we'll support sparse mappings, so this 375 * will have to change. */ 376 if (!ppgtt->pd_dma_addr[i]) 377 continue; 378 379 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 380 PCI_DMA_BIDIRECTIONAL); 381 382 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 383 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 384 if (addr) 385 pci_unmap_page(hwdev, addr, PAGE_SIZE, 386 PCI_DMA_BIDIRECTIONAL); 387 } 388 } 389 } 390 391 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 392 { 393 struct i915_hw_ppgtt *ppgtt = 394 container_of(vm, struct i915_hw_ppgtt, base); 395 396 gen8_ppgtt_unmap_pages(ppgtt); 397 gen8_ppgtt_free(ppgtt); 398 } 399 400 static struct page **__gen8_alloc_page_tables(void) 401 { 402 struct page **pt_pages; 403 int i; 404 405 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); 406 if (!pt_pages) 407 return ERR_PTR(-ENOMEM); 408 409 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 410 pt_pages[i] = alloc_page(GFP_KERNEL); 411 if (!pt_pages[i]) 412 goto bail; 413 } 414 415 return pt_pages; 416 417 bail: 418 gen8_free_page_tables(pt_pages); 419 kfree(pt_pages); 420 return ERR_PTR(-ENOMEM); 421 } 422 423 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 424 const int max_pdp) 425 { 426 struct page **pt_pages[GEN8_LEGACY_PDPS]; 427 int i, ret; 428 429 for (i = 0; i < max_pdp; i++) { 430 pt_pages[i] = __gen8_alloc_page_tables(); 431 if (IS_ERR(pt_pages[i])) { 432 ret = PTR_ERR(pt_pages[i]); 433 goto unwind_out; 434 } 435 } 436 437 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 438 * "atomic" - for cleanup purposes. 439 */ 440 for (i = 0; i < max_pdp; i++) 441 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 442 443 return 0; 444 445 unwind_out: 446 while (i--) { 447 gen8_free_page_tables(pt_pages[i]); 448 kfree(pt_pages[i]); 449 } 450 451 return ret; 452 } 453 454 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 455 { 456 int i; 457 458 for (i = 0; i < ppgtt->num_pd_pages; i++) { 459 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 460 sizeof(dma_addr_t), 461 GFP_KERNEL); 462 if (!ppgtt->gen8_pt_dma_addr[i]) 463 return -ENOMEM; 464 } 465 466 return 0; 467 } 468 469 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 470 const int max_pdp) 471 { 472 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 473 if (!ppgtt->pd_pages) 474 return -ENOMEM; 475 476 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 477 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 478 479 return 0; 480 } 481 482 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 483 const int max_pdp) 484 { 485 int ret; 486 487 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 488 if (ret) 489 return ret; 490 491 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 492 if (ret) { 493 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 494 return ret; 495 } 496 497 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 498 499 ret = gen8_ppgtt_allocate_dma(ppgtt); 500 if (ret) 501 gen8_ppgtt_free(ppgtt); 502 503 return ret; 504 } 505 506 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 507 const int pd) 508 { 509 dma_addr_t pd_addr; 510 int ret; 511 512 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 513 &ppgtt->pd_pages[pd], 0, 514 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 515 516 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 517 if (ret) 518 return ret; 519 520 ppgtt->pd_dma_addr[pd] = pd_addr; 521 522 return 0; 523 } 524 525 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 526 const int pd, 527 const int pt) 528 { 529 dma_addr_t pt_addr; 530 struct page *p; 531 int ret; 532 533 p = ppgtt->gen8_pt_pages[pd][pt]; 534 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 535 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 536 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 537 if (ret) 538 return ret; 539 540 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 541 542 return 0; 543 } 544 545 /** 546 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 547 * with a net effect resembling a 2-level page table in normal x86 terms. Each 548 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 549 * space. 550 * 551 * FIXME: split allocation into smaller pieces. For now we only ever do this 552 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 553 * TODO: Do something with the size parameter 554 */ 555 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 556 { 557 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 558 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 559 int i, j, ret; 560 561 if (size % (1<<30)) 562 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 563 564 /* 1. Do all our allocations for page directories and page tables. */ 565 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 566 if (ret) 567 return ret; 568 569 /* 570 * 2. Create DMA mappings for the page directories and page tables. 571 */ 572 for (i = 0; i < max_pdp; i++) { 573 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 574 if (ret) 575 goto bail; 576 577 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 578 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 579 if (ret) 580 goto bail; 581 } 582 } 583 584 /* 585 * 3. Map all the page directory entires to point to the page tables 586 * we've allocated. 587 * 588 * For now, the PPGTT helper functions all require that the PDEs are 589 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 590 * will never need to touch the PDEs again. 591 */ 592 for (i = 0; i < max_pdp; i++) { 593 gen8_ppgtt_pde_t *pd_vaddr; 594 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 595 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 596 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 597 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 598 I915_CACHE_LLC); 599 } 600 if (!HAS_LLC(ppgtt->base.dev)) 601 drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); 602 kunmap_atomic(pd_vaddr); 603 } 604 605 ppgtt->switch_mm = gen8_mm_switch; 606 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 607 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 608 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 609 ppgtt->base.start = 0; 610 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 611 612 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 613 614 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 615 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 616 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 617 ppgtt->num_pd_entries, 618 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 619 return 0; 620 621 bail: 622 gen8_ppgtt_unmap_pages(ppgtt); 623 gen8_ppgtt_free(ppgtt); 624 return ret; 625 } 626 627 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 628 { 629 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 630 struct i915_address_space *vm = &ppgtt->base; 631 gen6_gtt_pte_t __iomem *pd_addr; 632 gen6_gtt_pte_t scratch_pte; 633 uint32_t pd_entry; 634 int pte, pde; 635 636 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 637 638 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 639 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 640 641 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 642 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 643 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 644 u32 expected; 645 gen6_gtt_pte_t *pt_vaddr; 646 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 647 pd_entry = readl(pd_addr + pde); 648 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 649 650 if (pd_entry != expected) 651 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 652 pde, 653 pd_entry, 654 expected); 655 seq_printf(m, "\tPDE: %x\n", pd_entry); 656 657 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 658 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 659 unsigned long va = 660 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 661 (pte * PAGE_SIZE); 662 int i; 663 bool found = false; 664 for (i = 0; i < 4; i++) 665 if (pt_vaddr[pte + i] != scratch_pte) 666 found = true; 667 if (!found) 668 continue; 669 670 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 671 for (i = 0; i < 4; i++) { 672 if (pt_vaddr[pte + i] != scratch_pte) 673 seq_printf(m, " %08x", pt_vaddr[pte + i]); 674 else 675 seq_puts(m, " SCRATCH "); 676 } 677 seq_puts(m, "\n"); 678 } 679 kunmap_atomic(pt_vaddr); 680 } 681 } 682 683 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 684 { 685 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 686 gen6_gtt_pte_t __iomem *pd_addr; 687 uint32_t pd_entry; 688 int i; 689 690 WARN_ON(ppgtt->pd_offset & 0x3f); 691 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 692 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 693 for (i = 0; i < ppgtt->num_pd_entries; i++) { 694 dma_addr_t pt_addr; 695 696 pt_addr = ppgtt->pt_dma_addr[i]; 697 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 698 pd_entry |= GEN6_PDE_VALID; 699 700 writel(pd_entry, pd_addr + i); 701 } 702 readl(pd_addr); 703 } 704 705 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 706 { 707 BUG_ON(ppgtt->pd_offset & 0x3f); 708 709 return (ppgtt->pd_offset / 64) << 16; 710 } 711 712 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 713 struct intel_engine_cs *ring) 714 { 715 int ret; 716 717 /* NB: TLBs must be flushed and invalidated before a switch */ 718 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 719 if (ret) 720 return ret; 721 722 ret = intel_ring_begin(ring, 6); 723 if (ret) 724 return ret; 725 726 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 727 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 728 intel_ring_emit(ring, PP_DIR_DCLV_2G); 729 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 730 intel_ring_emit(ring, get_pd_offset(ppgtt)); 731 intel_ring_emit(ring, MI_NOOP); 732 intel_ring_advance(ring); 733 734 return 0; 735 } 736 737 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 738 struct intel_engine_cs *ring) 739 { 740 int ret; 741 742 /* NB: TLBs must be flushed and invalidated before a switch */ 743 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 744 if (ret) 745 return ret; 746 747 ret = intel_ring_begin(ring, 6); 748 if (ret) 749 return ret; 750 751 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 752 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 753 intel_ring_emit(ring, PP_DIR_DCLV_2G); 754 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 755 intel_ring_emit(ring, get_pd_offset(ppgtt)); 756 intel_ring_emit(ring, MI_NOOP); 757 intel_ring_advance(ring); 758 759 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 760 if (ring->id != RCS) { 761 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 762 if (ret) 763 return ret; 764 } 765 766 return 0; 767 } 768 769 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 770 struct intel_engine_cs *ring) 771 { 772 struct drm_device *dev = ppgtt->base.dev; 773 struct drm_i915_private *dev_priv = dev->dev_private; 774 775 776 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 777 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 778 779 POSTING_READ(RING_PP_DIR_DCLV(ring)); 780 781 return 0; 782 } 783 784 static void gen8_ppgtt_enable(struct drm_device *dev) 785 { 786 struct drm_i915_private *dev_priv = dev->dev_private; 787 struct intel_engine_cs *ring; 788 int j; 789 790 for_each_ring(ring, dev_priv, j) { 791 I915_WRITE(RING_MODE_GEN7(ring), 792 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 793 } 794 } 795 796 static void gen7_ppgtt_enable(struct drm_device *dev) 797 { 798 struct drm_i915_private *dev_priv = dev->dev_private; 799 struct intel_engine_cs *ring; 800 uint32_t ecochk, ecobits; 801 int i; 802 803 ecobits = I915_READ(GAC_ECO_BITS); 804 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 805 806 ecochk = I915_READ(GAM_ECOCHK); 807 if (IS_HASWELL(dev)) { 808 ecochk |= ECOCHK_PPGTT_WB_HSW; 809 } else { 810 ecochk |= ECOCHK_PPGTT_LLC_IVB; 811 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 812 } 813 I915_WRITE(GAM_ECOCHK, ecochk); 814 815 for_each_ring(ring, dev_priv, i) { 816 /* GFX_MODE is per-ring on gen7+ */ 817 I915_WRITE(RING_MODE_GEN7(ring), 818 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 819 } 820 } 821 822 static void gen6_ppgtt_enable(struct drm_device *dev) 823 { 824 struct drm_i915_private *dev_priv = dev->dev_private; 825 uint32_t ecochk, gab_ctl, ecobits; 826 827 ecobits = I915_READ(GAC_ECO_BITS); 828 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 829 ECOBITS_PPGTT_CACHE64B); 830 831 gab_ctl = I915_READ(GAB_CTL); 832 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 833 834 ecochk = I915_READ(GAM_ECOCHK); 835 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 836 837 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 838 } 839 840 /* PPGTT support for Sandybdrige/Gen6 and later */ 841 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 842 uint64_t start, 843 uint64_t length, 844 bool use_scratch) 845 { 846 struct i915_hw_ppgtt *ppgtt = 847 container_of(vm, struct i915_hw_ppgtt, base); 848 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 849 unsigned first_entry = start >> PAGE_SHIFT; 850 unsigned num_entries = length >> PAGE_SHIFT; 851 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 852 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 853 unsigned last_pte, i; 854 855 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 856 857 while (num_entries) { 858 last_pte = first_pte + num_entries; 859 if (last_pte > I915_PPGTT_PT_ENTRIES) 860 last_pte = I915_PPGTT_PT_ENTRIES; 861 862 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 863 864 for (i = first_pte; i < last_pte; i++) 865 pt_vaddr[i] = scratch_pte; 866 867 kunmap_atomic(pt_vaddr); 868 869 num_entries -= last_pte - first_pte; 870 first_pte = 0; 871 act_pt++; 872 } 873 } 874 875 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 876 struct sg_table *pages, 877 uint64_t start, 878 enum i915_cache_level cache_level, u32 flags) 879 { 880 struct i915_hw_ppgtt *ppgtt = 881 container_of(vm, struct i915_hw_ppgtt, base); 882 gen6_gtt_pte_t *pt_vaddr; 883 unsigned first_entry = start >> PAGE_SHIFT; 884 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 885 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 886 struct sg_page_iter sg_iter; 887 888 pt_vaddr = NULL; 889 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 890 if (pt_vaddr == NULL) 891 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 892 893 pt_vaddr[act_pte] = 894 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 895 cache_level, true, flags); 896 897 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 898 kunmap_atomic(pt_vaddr); 899 pt_vaddr = NULL; 900 act_pt++; 901 act_pte = 0; 902 } 903 } 904 if (pt_vaddr) 905 kunmap_atomic(pt_vaddr); 906 } 907 908 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 909 { 910 int i; 911 912 if (ppgtt->pt_dma_addr) { 913 for (i = 0; i < ppgtt->num_pd_entries; i++) 914 pci_unmap_page(ppgtt->base.dev->pdev, 915 ppgtt->pt_dma_addr[i], 916 4096, PCI_DMA_BIDIRECTIONAL); 917 } 918 } 919 920 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 921 { 922 int i; 923 924 kfree(ppgtt->pt_dma_addr); 925 for (i = 0; i < ppgtt->num_pd_entries; i++) 926 __free_page(ppgtt->pt_pages[i]); 927 kfree(ppgtt->pt_pages); 928 } 929 930 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 931 { 932 struct i915_hw_ppgtt *ppgtt = 933 container_of(vm, struct i915_hw_ppgtt, base); 934 935 drm_mm_remove_node(&ppgtt->node); 936 937 gen6_ppgtt_unmap_pages(ppgtt); 938 gen6_ppgtt_free(ppgtt); 939 } 940 941 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 942 { 943 struct drm_device *dev = ppgtt->base.dev; 944 struct drm_i915_private *dev_priv = dev->dev_private; 945 bool retried = false; 946 int ret; 947 948 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 949 * allocator works in address space sizes, so it's multiplied by page 950 * size. We allocate at the top of the GTT to avoid fragmentation. 951 */ 952 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 953 alloc: 954 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 955 &ppgtt->node, GEN6_PD_SIZE, 956 GEN6_PD_ALIGN, 0, 957 0, dev_priv->gtt.base.total, 958 DRM_MM_TOPDOWN); 959 if (ret == -ENOSPC && !retried) { 960 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 961 GEN6_PD_SIZE, GEN6_PD_ALIGN, 962 I915_CACHE_NONE, 963 0, dev_priv->gtt.base.total, 964 0); 965 if (ret) 966 return ret; 967 968 retried = true; 969 goto alloc; 970 } 971 972 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 973 DRM_DEBUG("Forced to use aperture for PDEs\n"); 974 975 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 976 return ret; 977 } 978 979 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 980 { 981 int i; 982 983 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 984 GFP_KERNEL); 985 986 if (!ppgtt->pt_pages) 987 return -ENOMEM; 988 989 for (i = 0; i < ppgtt->num_pd_entries; i++) { 990 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 991 if (!ppgtt->pt_pages[i]) { 992 gen6_ppgtt_free(ppgtt); 993 return -ENOMEM; 994 } 995 } 996 997 return 0; 998 } 999 1000 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1001 { 1002 int ret; 1003 1004 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1005 if (ret) 1006 return ret; 1007 1008 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1009 if (ret) { 1010 drm_mm_remove_node(&ppgtt->node); 1011 return ret; 1012 } 1013 1014 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1015 GFP_KERNEL); 1016 if (!ppgtt->pt_dma_addr) { 1017 drm_mm_remove_node(&ppgtt->node); 1018 gen6_ppgtt_free(ppgtt); 1019 return -ENOMEM; 1020 } 1021 1022 return 0; 1023 } 1024 1025 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1026 { 1027 struct drm_device *dev = ppgtt->base.dev; 1028 int i; 1029 1030 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1031 dma_addr_t pt_addr; 1032 1033 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1034 PCI_DMA_BIDIRECTIONAL); 1035 1036 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1037 gen6_ppgtt_unmap_pages(ppgtt); 1038 return -EIO; 1039 } 1040 1041 ppgtt->pt_dma_addr[i] = pt_addr; 1042 } 1043 1044 return 0; 1045 } 1046 1047 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1048 { 1049 struct drm_device *dev = ppgtt->base.dev; 1050 struct drm_i915_private *dev_priv = dev->dev_private; 1051 int ret; 1052 1053 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1054 if (IS_GEN6(dev)) { 1055 ppgtt->switch_mm = gen6_mm_switch; 1056 } else if (IS_HASWELL(dev)) { 1057 ppgtt->switch_mm = hsw_mm_switch; 1058 } else if (IS_GEN7(dev)) { 1059 ppgtt->switch_mm = gen7_mm_switch; 1060 } else 1061 BUG(); 1062 1063 ret = gen6_ppgtt_alloc(ppgtt); 1064 if (ret) 1065 return ret; 1066 1067 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1068 if (ret) { 1069 gen6_ppgtt_free(ppgtt); 1070 return ret; 1071 } 1072 1073 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1074 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1075 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1076 ppgtt->base.start = 0; 1077 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1078 ppgtt->debug_dump = gen6_dump_ppgtt; 1079 1080 ppgtt->pd_offset = 1081 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1082 1083 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1084 1085 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1086 ppgtt->node.size >> 20, 1087 ppgtt->node.start / PAGE_SIZE); 1088 1089 gen6_write_pdes(ppgtt); 1090 DRM_DEBUG("Adding PPGTT at offset %x\n", 1091 ppgtt->pd_offset << 10); 1092 1093 return 0; 1094 } 1095 1096 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1097 { 1098 struct drm_i915_private *dev_priv = dev->dev_private; 1099 1100 ppgtt->base.dev = dev; 1101 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1102 1103 if (INTEL_INFO(dev)->gen < 8) 1104 return gen6_ppgtt_init(ppgtt); 1105 else if (IS_GEN8(dev) || IS_GEN9(dev)) 1106 return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1107 else 1108 BUG(); 1109 } 1110 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1111 { 1112 struct drm_i915_private *dev_priv = dev->dev_private; 1113 int ret = 0; 1114 1115 ret = __hw_ppgtt_init(dev, ppgtt); 1116 if (ret == 0) { 1117 kref_init(&ppgtt->ref); 1118 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1119 ppgtt->base.total); 1120 i915_init_vm(dev_priv, &ppgtt->base); 1121 } 1122 1123 return ret; 1124 } 1125 1126 int i915_ppgtt_init_hw(struct drm_device *dev) 1127 { 1128 struct drm_i915_private *dev_priv = dev->dev_private; 1129 struct intel_engine_cs *ring; 1130 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 1131 int i, ret = 0; 1132 1133 /* In the case of execlists, PPGTT is enabled by the context descriptor 1134 * and the PDPs are contained within the context itself. We don't 1135 * need to do anything here. */ 1136 if (i915.enable_execlists) 1137 return 0; 1138 1139 if (!USES_PPGTT(dev)) 1140 return 0; 1141 1142 if (IS_GEN6(dev)) 1143 gen6_ppgtt_enable(dev); 1144 else if (IS_GEN7(dev)) 1145 gen7_ppgtt_enable(dev); 1146 else if (INTEL_INFO(dev)->gen >= 8) 1147 gen8_ppgtt_enable(dev); 1148 else 1149 WARN_ON(1); 1150 1151 if (ppgtt) { 1152 for_each_ring(ring, dev_priv, i) { 1153 ret = ppgtt->switch_mm(ppgtt, ring); 1154 if (ret != 0) 1155 return ret; 1156 } 1157 } 1158 1159 return ret; 1160 } 1161 struct i915_hw_ppgtt * 1162 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 1163 { 1164 struct i915_hw_ppgtt *ppgtt; 1165 int ret; 1166 1167 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 1168 if (!ppgtt) 1169 return ERR_PTR(-ENOMEM); 1170 1171 ret = i915_ppgtt_init(dev, ppgtt); 1172 if (ret) { 1173 kfree(ppgtt); 1174 return ERR_PTR(ret); 1175 } 1176 1177 ppgtt->file_priv = fpriv; 1178 1179 trace_i915_ppgtt_create(&ppgtt->base); 1180 1181 return ppgtt; 1182 } 1183 1184 void i915_ppgtt_release(struct kref *kref) 1185 { 1186 struct i915_hw_ppgtt *ppgtt = 1187 container_of(kref, struct i915_hw_ppgtt, ref); 1188 1189 trace_i915_ppgtt_release(&ppgtt->base); 1190 1191 /* vmas should already be unbound */ 1192 WARN_ON(!list_empty(&ppgtt->base.active_list)); 1193 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 1194 1195 list_del(&ppgtt->base.global_link); 1196 drm_mm_takedown(&ppgtt->base.mm); 1197 1198 ppgtt->base.cleanup(&ppgtt->base); 1199 kfree(ppgtt); 1200 } 1201 1202 static void 1203 ppgtt_bind_vma(struct i915_vma *vma, 1204 enum i915_cache_level cache_level, 1205 u32 flags) 1206 { 1207 /* Currently applicable only to VLV */ 1208 if (vma->obj->gt_ro) 1209 flags |= PTE_READ_ONLY; 1210 1211 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1212 cache_level, flags); 1213 } 1214 1215 static void ppgtt_unbind_vma(struct i915_vma *vma) 1216 { 1217 vma->vm->clear_range(vma->vm, 1218 vma->node.start, 1219 vma->obj->base.size, 1220 true); 1221 } 1222 1223 extern int intel_iommu_gfx_mapped; 1224 /* Certain Gen5 chipsets require require idling the GPU before 1225 * unmapping anything from the GTT when VT-d is enabled. 1226 */ 1227 static inline bool needs_idle_maps(struct drm_device *dev) 1228 { 1229 #ifdef CONFIG_INTEL_IOMMU 1230 /* Query intel_iommu to see if we need the workaround. Presumably that 1231 * was loaded first. 1232 */ 1233 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1234 return true; 1235 #endif 1236 return false; 1237 } 1238 1239 static bool do_idling(struct drm_i915_private *dev_priv) 1240 { 1241 bool ret = dev_priv->mm.interruptible; 1242 1243 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1244 dev_priv->mm.interruptible = false; 1245 if (i915_gpu_idle(dev_priv->dev)) { 1246 DRM_ERROR("Couldn't idle GPU\n"); 1247 /* Wait a bit, in hopes it avoids the hang */ 1248 udelay(10); 1249 } 1250 } 1251 1252 return ret; 1253 } 1254 1255 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1256 { 1257 if (unlikely(dev_priv->gtt.do_idle_maps)) 1258 dev_priv->mm.interruptible = interruptible; 1259 } 1260 1261 void i915_check_and_clear_faults(struct drm_device *dev) 1262 { 1263 struct drm_i915_private *dev_priv = dev->dev_private; 1264 struct intel_engine_cs *ring; 1265 int i; 1266 1267 if (INTEL_INFO(dev)->gen < 6) 1268 return; 1269 1270 for_each_ring(ring, dev_priv, i) { 1271 u32 fault_reg; 1272 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1273 if (fault_reg & RING_FAULT_VALID) { 1274 DRM_DEBUG_DRIVER("Unexpected fault\n" 1275 "\tAddr: 0x%08lx\n" 1276 "\tAddress space: %s\n" 1277 "\tSource ID: %d\n" 1278 "\tType: %d\n", 1279 fault_reg & PAGE_MASK, 1280 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1281 RING_FAULT_SRCID(fault_reg), 1282 RING_FAULT_FAULT_TYPE(fault_reg)); 1283 I915_WRITE(RING_FAULT_REG(ring), 1284 fault_reg & ~RING_FAULT_VALID); 1285 } 1286 } 1287 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1288 } 1289 1290 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 1291 { 1292 if (INTEL_INFO(dev_priv->dev)->gen < 6) { 1293 intel_gtt_chipset_flush(); 1294 } else { 1295 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1296 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1297 } 1298 } 1299 1300 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1301 { 1302 struct drm_i915_private *dev_priv = dev->dev_private; 1303 1304 /* Don't bother messing with faults pre GEN6 as we have little 1305 * documentation supporting that it's a good idea. 1306 */ 1307 if (INTEL_INFO(dev)->gen < 6) 1308 return; 1309 1310 i915_check_and_clear_faults(dev); 1311 1312 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1313 dev_priv->gtt.base.start, 1314 dev_priv->gtt.base.total, 1315 true); 1316 1317 i915_ggtt_flush(dev_priv); 1318 } 1319 1320 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1321 { 1322 struct drm_i915_private *dev_priv = dev->dev_private; 1323 struct drm_i915_gem_object *obj; 1324 struct i915_address_space *vm; 1325 1326 i915_check_and_clear_faults(dev); 1327 1328 /* First fill our portion of the GTT with scratch pages */ 1329 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1330 dev_priv->gtt.base.start, 1331 dev_priv->gtt.base.total, 1332 true); 1333 1334 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1335 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1336 &dev_priv->gtt.base); 1337 if (!vma) 1338 continue; 1339 1340 i915_gem_clflush_object(obj, obj->pin_display); 1341 /* The bind_vma code tries to be smart about tracking mappings. 1342 * Unfortunately above, we've just wiped out the mappings 1343 * without telling our object about it. So we need to fake it. 1344 */ 1345 vma->bound &= ~GLOBAL_BIND; 1346 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 1347 } 1348 1349 1350 if (INTEL_INFO(dev)->gen >= 8) { 1351 if (IS_CHERRYVIEW(dev)) 1352 chv_setup_private_ppat(dev_priv); 1353 else 1354 bdw_setup_private_ppat(dev_priv); 1355 1356 return; 1357 } 1358 1359 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1360 /* TODO: Perhaps it shouldn't be gen6 specific */ 1361 if (i915_is_ggtt(vm)) { 1362 if (dev_priv->mm.aliasing_ppgtt) 1363 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1364 continue; 1365 } 1366 1367 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1368 } 1369 1370 i915_ggtt_flush(dev_priv); 1371 } 1372 1373 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1374 { 1375 if (obj->has_dma_mapping) 1376 return 0; 1377 1378 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1379 obj->pages->sgl, obj->pages->nents, 1380 PCI_DMA_BIDIRECTIONAL)) 1381 return -ENOSPC; 1382 1383 return 0; 1384 } 1385 1386 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1387 { 1388 #ifdef writeq 1389 writeq(pte, addr); 1390 #else 1391 iowrite32((u32)pte, addr); 1392 iowrite32(pte >> 32, addr + 4); 1393 #endif 1394 } 1395 1396 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1397 struct sg_table *st, 1398 uint64_t start, 1399 enum i915_cache_level level, u32 unused) 1400 { 1401 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1402 unsigned first_entry = start >> PAGE_SHIFT; 1403 gen8_gtt_pte_t __iomem *gtt_entries = 1404 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1405 int i = 0; 1406 struct sg_page_iter sg_iter; 1407 dma_addr_t addr = 0; /* shut up gcc */ 1408 1409 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1410 addr = sg_dma_address(sg_iter.sg) + 1411 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1412 gen8_set_pte(>t_entries[i], 1413 gen8_pte_encode(addr, level, true)); 1414 i++; 1415 } 1416 1417 /* 1418 * XXX: This serves as a posting read to make sure that the PTE has 1419 * actually been updated. There is some concern that even though 1420 * registers and PTEs are within the same BAR that they are potentially 1421 * of NUMA access patterns. Therefore, even with the way we assume 1422 * hardware should work, we must keep this posting read for paranoia. 1423 */ 1424 if (i != 0) 1425 WARN_ON(readq(>t_entries[i-1]) 1426 != gen8_pte_encode(addr, level, true)); 1427 1428 /* This next bit makes the above posting read even more important. We 1429 * want to flush the TLBs only after we're certain all the PTE updates 1430 * have finished. 1431 */ 1432 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1433 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1434 } 1435 1436 /* 1437 * Binds an object into the global gtt with the specified cache level. The object 1438 * will be accessible to the GPU via commands whose operands reference offsets 1439 * within the global GTT as well as accessible by the GPU through the GMADR 1440 * mapped BAR (dev_priv->mm.gtt->gtt). 1441 */ 1442 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1443 struct sg_table *st, 1444 uint64_t start, 1445 enum i915_cache_level level, u32 flags) 1446 { 1447 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1448 unsigned first_entry = start >> PAGE_SHIFT; 1449 gen6_gtt_pte_t __iomem *gtt_entries = 1450 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1451 int i = 0; 1452 struct sg_page_iter sg_iter; 1453 dma_addr_t addr = 0; 1454 1455 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1456 addr = sg_page_iter_dma_address(&sg_iter); 1457 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 1458 i++; 1459 } 1460 1461 /* XXX: This serves as a posting read to make sure that the PTE has 1462 * actually been updated. There is some concern that even though 1463 * registers and PTEs are within the same BAR that they are potentially 1464 * of NUMA access patterns. Therefore, even with the way we assume 1465 * hardware should work, we must keep this posting read for paranoia. 1466 */ 1467 if (i != 0) { 1468 unsigned long gtt = readl(>t_entries[i-1]); 1469 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 1470 } 1471 1472 /* This next bit makes the above posting read even more important. We 1473 * want to flush the TLBs only after we're certain all the PTE updates 1474 * have finished. 1475 */ 1476 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1477 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1478 } 1479 1480 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1481 uint64_t start, 1482 uint64_t length, 1483 bool use_scratch) 1484 { 1485 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1486 unsigned first_entry = start >> PAGE_SHIFT; 1487 unsigned num_entries = length >> PAGE_SHIFT; 1488 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1489 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1490 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1491 int i; 1492 1493 if (WARN(num_entries > max_entries, 1494 "First entry = %d; Num entries = %d (max=%d)\n", 1495 first_entry, num_entries, max_entries)) 1496 num_entries = max_entries; 1497 1498 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1499 I915_CACHE_LLC, 1500 use_scratch); 1501 for (i = 0; i < num_entries; i++) 1502 gen8_set_pte(>t_base[i], scratch_pte); 1503 readl(gtt_base); 1504 } 1505 1506 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1507 uint64_t start, 1508 uint64_t length, 1509 bool use_scratch) 1510 { 1511 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1512 unsigned first_entry = start >> PAGE_SHIFT; 1513 unsigned num_entries = length >> PAGE_SHIFT; 1514 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1515 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1516 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1517 int i; 1518 1519 if (WARN(num_entries > max_entries, 1520 "First entry = %d; Num entries = %d (max=%d)\n", 1521 first_entry, num_entries, max_entries)) 1522 num_entries = max_entries; 1523 1524 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); 1525 1526 for (i = 0; i < num_entries; i++) 1527 iowrite32(scratch_pte, >t_base[i]); 1528 readl(gtt_base); 1529 } 1530 1531 1532 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1533 enum i915_cache_level cache_level, 1534 u32 unused) 1535 { 1536 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1537 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1538 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1539 1540 BUG_ON(!i915_is_ggtt(vma->vm)); 1541 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); 1542 vma->bound = GLOBAL_BIND; 1543 } 1544 1545 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1546 uint64_t start, 1547 uint64_t length, 1548 bool unused) 1549 { 1550 unsigned first_entry = start >> PAGE_SHIFT; 1551 unsigned num_entries = length >> PAGE_SHIFT; 1552 intel_gtt_clear_range(first_entry, num_entries); 1553 } 1554 1555 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1556 { 1557 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1558 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1559 1560 BUG_ON(!i915_is_ggtt(vma->vm)); 1561 vma->bound = 0; 1562 intel_gtt_clear_range(first, size); 1563 } 1564 1565 static void ggtt_bind_vma(struct i915_vma *vma, 1566 enum i915_cache_level cache_level, 1567 u32 flags) 1568 { 1569 struct drm_device *dev = vma->vm->dev; 1570 struct drm_i915_private *dev_priv = dev->dev_private; 1571 struct drm_i915_gem_object *obj = vma->obj; 1572 1573 /* Currently applicable only to VLV */ 1574 if (obj->gt_ro) 1575 flags |= PTE_READ_ONLY; 1576 1577 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1578 * or we have a global mapping already but the cacheability flags have 1579 * changed, set the global PTEs. 1580 * 1581 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1582 * instead if none of the above hold true. 1583 * 1584 * NB: A global mapping should only be needed for special regions like 1585 * "gtt mappable", SNB errata, or if specified via special execbuf 1586 * flags. At all other times, the GPU will use the aliasing PPGTT. 1587 */ 1588 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1589 if (!(vma->bound & GLOBAL_BIND) || 1590 (cache_level != obj->cache_level)) { 1591 vma->vm->insert_entries(vma->vm, obj->pages, 1592 vma->node.start, 1593 cache_level, flags); 1594 vma->bound |= GLOBAL_BIND; 1595 } 1596 } 1597 1598 if (dev_priv->mm.aliasing_ppgtt && 1599 (!(vma->bound & LOCAL_BIND) || 1600 (cache_level != obj->cache_level))) { 1601 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1602 appgtt->base.insert_entries(&appgtt->base, 1603 vma->obj->pages, 1604 vma->node.start, 1605 cache_level, flags); 1606 vma->bound |= LOCAL_BIND; 1607 } 1608 } 1609 1610 static void ggtt_unbind_vma(struct i915_vma *vma) 1611 { 1612 struct drm_device *dev = vma->vm->dev; 1613 struct drm_i915_private *dev_priv = dev->dev_private; 1614 struct drm_i915_gem_object *obj = vma->obj; 1615 1616 if (vma->bound & GLOBAL_BIND) { 1617 vma->vm->clear_range(vma->vm, 1618 vma->node.start, 1619 obj->base.size, 1620 true); 1621 vma->bound &= ~GLOBAL_BIND; 1622 } 1623 1624 if (vma->bound & LOCAL_BIND) { 1625 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1626 appgtt->base.clear_range(&appgtt->base, 1627 vma->node.start, 1628 obj->base.size, 1629 true); 1630 vma->bound &= ~LOCAL_BIND; 1631 } 1632 } 1633 1634 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1635 { 1636 struct drm_device *dev = obj->base.dev; 1637 struct drm_i915_private *dev_priv = dev->dev_private; 1638 bool interruptible; 1639 1640 interruptible = do_idling(dev_priv); 1641 1642 if (!obj->has_dma_mapping) 1643 dma_unmap_sg(&dev->pdev->dev, 1644 obj->pages->sgl, obj->pages->nents, 1645 PCI_DMA_BIDIRECTIONAL); 1646 1647 undo_idling(dev_priv, interruptible); 1648 } 1649 1650 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1651 unsigned long color, 1652 unsigned long *start, 1653 unsigned long *end) 1654 { 1655 if (node->color != color) 1656 *start += 4096; 1657 1658 if (!list_empty(&node->node_list)) { 1659 node = list_entry(node->node_list.next, 1660 struct drm_mm_node, 1661 node_list); 1662 if (node->allocated && node->color != color) 1663 *end -= 4096; 1664 } 1665 } 1666 1667 static int i915_gem_setup_global_gtt(struct drm_device *dev, 1668 unsigned long start, 1669 unsigned long mappable_end, 1670 unsigned long end) 1671 { 1672 /* Let GEM Manage all of the aperture. 1673 * 1674 * However, leave one page at the end still bound to the scratch page. 1675 * There are a number of places where the hardware apparently prefetches 1676 * past the end of the object, and we've seen multiple hangs with the 1677 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1678 * aperture. One page should be enough to keep any prefetching inside 1679 * of the aperture. 1680 */ 1681 struct drm_i915_private *dev_priv = dev->dev_private; 1682 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1683 struct drm_mm_node *entry; 1684 struct drm_i915_gem_object *obj; 1685 unsigned long hole_start, hole_end; 1686 int ret; 1687 1688 BUG_ON(mappable_end > end); 1689 1690 /* Subtract the guard page ... */ 1691 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1692 if (!HAS_LLC(dev)) 1693 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1694 1695 /* Mark any preallocated objects as occupied */ 1696 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1697 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1698 1699 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1700 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1701 1702 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1703 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1704 if (ret) { 1705 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 1706 return ret; 1707 } 1708 vma->bound |= GLOBAL_BIND; 1709 } 1710 1711 dev_priv->gtt.base.start = start; 1712 dev_priv->gtt.base.total = end - start; 1713 1714 /* Clear any non-preallocated blocks */ 1715 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1716 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1717 hole_start, hole_end); 1718 ggtt_vm->clear_range(ggtt_vm, hole_start, 1719 hole_end - hole_start, true); 1720 } 1721 1722 /* And finally clear the reserved guard page */ 1723 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1724 1725 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 1726 struct i915_hw_ppgtt *ppgtt; 1727 1728 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 1729 if (!ppgtt) 1730 return -ENOMEM; 1731 1732 ret = __hw_ppgtt_init(dev, ppgtt); 1733 if (ret != 0) 1734 return ret; 1735 1736 dev_priv->mm.aliasing_ppgtt = ppgtt; 1737 } 1738 1739 return 0; 1740 } 1741 1742 void i915_gem_init_global_gtt(struct drm_device *dev) 1743 { 1744 struct drm_i915_private *dev_priv = dev->dev_private; 1745 unsigned long gtt_size, mappable_size; 1746 1747 gtt_size = dev_priv->gtt.base.total; 1748 mappable_size = dev_priv->gtt.mappable_end; 1749 1750 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1751 } 1752 1753 void i915_global_gtt_cleanup(struct drm_device *dev) 1754 { 1755 struct drm_i915_private *dev_priv = dev->dev_private; 1756 struct i915_address_space *vm = &dev_priv->gtt.base; 1757 1758 if (dev_priv->mm.aliasing_ppgtt) { 1759 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 1760 1761 ppgtt->base.cleanup(&ppgtt->base); 1762 } 1763 1764 if (drm_mm_initialized(&vm->mm)) { 1765 drm_mm_takedown(&vm->mm); 1766 list_del(&vm->global_link); 1767 } 1768 1769 vm->cleanup(vm); 1770 } 1771 1772 static int setup_scratch_page(struct drm_device *dev) 1773 { 1774 struct drm_i915_private *dev_priv = dev->dev_private; 1775 struct page *page; 1776 dma_addr_t dma_addr; 1777 1778 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1779 if (page == NULL) 1780 return -ENOMEM; 1781 set_pages_uc(page, 1); 1782 1783 #ifdef CONFIG_INTEL_IOMMU 1784 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1785 PCI_DMA_BIDIRECTIONAL); 1786 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1787 return -EINVAL; 1788 #else 1789 dma_addr = page_to_phys(page); 1790 #endif 1791 dev_priv->gtt.base.scratch.page = page; 1792 dev_priv->gtt.base.scratch.addr = dma_addr; 1793 1794 return 0; 1795 } 1796 1797 static void teardown_scratch_page(struct drm_device *dev) 1798 { 1799 struct drm_i915_private *dev_priv = dev->dev_private; 1800 struct page *page = dev_priv->gtt.base.scratch.page; 1801 1802 set_pages_wb(page, 1); 1803 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1804 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1805 __free_page(page); 1806 } 1807 1808 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1809 { 1810 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1811 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1812 return snb_gmch_ctl << 20; 1813 } 1814 1815 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1816 { 1817 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1818 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1819 if (bdw_gmch_ctl) 1820 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1821 1822 #ifdef CONFIG_X86_32 1823 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 1824 if (bdw_gmch_ctl > 4) 1825 bdw_gmch_ctl = 4; 1826 #endif 1827 1828 return bdw_gmch_ctl << 20; 1829 } 1830 1831 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1832 { 1833 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1834 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1835 1836 if (gmch_ctrl) 1837 return 1 << (20 + gmch_ctrl); 1838 1839 return 0; 1840 } 1841 1842 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1843 { 1844 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1845 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1846 return snb_gmch_ctl << 25; /* 32 MB units */ 1847 } 1848 1849 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1850 { 1851 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1852 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1853 return bdw_gmch_ctl << 25; /* 32 MB units */ 1854 } 1855 1856 static size_t chv_get_stolen_size(u16 gmch_ctrl) 1857 { 1858 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 1859 gmch_ctrl &= SNB_GMCH_GMS_MASK; 1860 1861 /* 1862 * 0x0 to 0x10: 32MB increments starting at 0MB 1863 * 0x11 to 0x16: 4MB increments starting at 8MB 1864 * 0x17 to 0x1d: 4MB increments start at 36MB 1865 */ 1866 if (gmch_ctrl < 0x11) 1867 return gmch_ctrl << 25; 1868 else if (gmch_ctrl < 0x17) 1869 return (gmch_ctrl - 0x11 + 2) << 22; 1870 else 1871 return (gmch_ctrl - 0x17 + 9) << 22; 1872 } 1873 1874 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 1875 { 1876 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1877 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 1878 1879 if (gen9_gmch_ctl < 0xf0) 1880 return gen9_gmch_ctl << 25; /* 32 MB units */ 1881 else 1882 /* 4MB increments starting at 0xf0 for 4MB */ 1883 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 1884 } 1885 1886 static int ggtt_probe_common(struct drm_device *dev, 1887 size_t gtt_size) 1888 { 1889 struct drm_i915_private *dev_priv = dev->dev_private; 1890 phys_addr_t gtt_phys_addr; 1891 int ret; 1892 1893 /* For Modern GENs the PTEs and register space are split in the BAR */ 1894 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1895 (pci_resource_len(dev->pdev, 0) / 2); 1896 1897 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1898 if (!dev_priv->gtt.gsm) { 1899 DRM_ERROR("Failed to map the gtt page table\n"); 1900 return -ENOMEM; 1901 } 1902 1903 ret = setup_scratch_page(dev); 1904 if (ret) { 1905 DRM_ERROR("Scratch setup failed\n"); 1906 /* iounmap will also get called at remove, but meh */ 1907 iounmap(dev_priv->gtt.gsm); 1908 } 1909 1910 return ret; 1911 } 1912 1913 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1914 * bits. When using advanced contexts each context stores its own PAT, but 1915 * writing this data shouldn't be harmful even in those cases. */ 1916 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 1917 { 1918 uint64_t pat; 1919 1920 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1921 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1922 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1923 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1924 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1925 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1926 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1927 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1928 1929 if (!USES_PPGTT(dev_priv->dev)) 1930 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 1931 * so RTL will always use the value corresponding to 1932 * pat_sel = 000". 1933 * So let's disable cache for GGTT to avoid screen corruptions. 1934 * MOCS still can be used though. 1935 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 1936 * before this patch, i.e. the same uncached + snooping access 1937 * like on gen6/7 seems to be in effect. 1938 * - So this just fixes blitter/render access. Again it looks 1939 * like it's not just uncached access, but uncached + snooping. 1940 * So we can still hold onto all our assumptions wrt cpu 1941 * clflushing on LLC machines. 1942 */ 1943 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 1944 1945 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1946 * write would work. */ 1947 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1948 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1949 } 1950 1951 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 1952 { 1953 uint64_t pat; 1954 1955 /* 1956 * Map WB on BDW to snooped on CHV. 1957 * 1958 * Only the snoop bit has meaning for CHV, the rest is 1959 * ignored. 1960 * 1961 * The hardware will never snoop for certain types of accesses: 1962 * - CPU GTT (GMADR->GGTT->no snoop->memory) 1963 * - PPGTT page tables 1964 * - some other special cycles 1965 * 1966 * As with BDW, we also need to consider the following for GT accesses: 1967 * "For GGTT, there is NO pat_sel[2:0] from the entry, 1968 * so RTL will always use the value corresponding to 1969 * pat_sel = 000". 1970 * Which means we must set the snoop bit in PAT entry 0 1971 * in order to keep the global status page working. 1972 */ 1973 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 1974 GEN8_PPAT(1, 0) | 1975 GEN8_PPAT(2, 0) | 1976 GEN8_PPAT(3, 0) | 1977 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 1978 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 1979 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 1980 GEN8_PPAT(7, CHV_PPAT_SNOOP); 1981 1982 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1983 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1984 } 1985 1986 static int gen8_gmch_probe(struct drm_device *dev, 1987 size_t *gtt_total, 1988 size_t *stolen, 1989 phys_addr_t *mappable_base, 1990 unsigned long *mappable_end) 1991 { 1992 struct drm_i915_private *dev_priv = dev->dev_private; 1993 unsigned int gtt_size; 1994 u16 snb_gmch_ctl; 1995 int ret; 1996 1997 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1998 *mappable_base = pci_resource_start(dev->pdev, 2); 1999 *mappable_end = pci_resource_len(dev->pdev, 2); 2000 2001 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 2002 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 2003 2004 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2005 2006 if (INTEL_INFO(dev)->gen >= 9) { 2007 *stolen = gen9_get_stolen_size(snb_gmch_ctl); 2008 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 2009 } else if (IS_CHERRYVIEW(dev)) { 2010 *stolen = chv_get_stolen_size(snb_gmch_ctl); 2011 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 2012 } else { 2013 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 2014 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 2015 } 2016 2017 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 2018 2019 if (IS_CHERRYVIEW(dev)) 2020 chv_setup_private_ppat(dev_priv); 2021 else 2022 bdw_setup_private_ppat(dev_priv); 2023 2024 ret = ggtt_probe_common(dev, gtt_size); 2025 2026 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 2027 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 2028 2029 return ret; 2030 } 2031 2032 static int gen6_gmch_probe(struct drm_device *dev, 2033 size_t *gtt_total, 2034 size_t *stolen, 2035 phys_addr_t *mappable_base, 2036 unsigned long *mappable_end) 2037 { 2038 struct drm_i915_private *dev_priv = dev->dev_private; 2039 unsigned int gtt_size; 2040 u16 snb_gmch_ctl; 2041 int ret; 2042 2043 *mappable_base = pci_resource_start(dev->pdev, 2); 2044 *mappable_end = pci_resource_len(dev->pdev, 2); 2045 2046 /* 64/512MB is the current min/max we actually know of, but this is just 2047 * a coarse sanity check. 2048 */ 2049 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 2050 DRM_ERROR("Unknown GMADR size (%lx)\n", 2051 dev_priv->gtt.mappable_end); 2052 return -ENXIO; 2053 } 2054 2055 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 2056 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 2057 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2058 2059 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 2060 2061 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 2062 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 2063 2064 ret = ggtt_probe_common(dev, gtt_size); 2065 2066 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 2067 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 2068 2069 return ret; 2070 } 2071 2072 static void gen6_gmch_remove(struct i915_address_space *vm) 2073 { 2074 2075 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 2076 2077 iounmap(gtt->gsm); 2078 teardown_scratch_page(vm->dev); 2079 } 2080 2081 static int i915_gmch_probe(struct drm_device *dev, 2082 size_t *gtt_total, 2083 size_t *stolen, 2084 phys_addr_t *mappable_base, 2085 unsigned long *mappable_end) 2086 { 2087 struct drm_i915_private *dev_priv = dev->dev_private; 2088 int ret; 2089 2090 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2091 if (!ret) { 2092 DRM_ERROR("failed to set up gmch\n"); 2093 return -EIO; 2094 } 2095 2096 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2097 2098 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2099 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2100 2101 if (unlikely(dev_priv->gtt.do_idle_maps)) 2102 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2103 2104 return 0; 2105 } 2106 2107 static void i915_gmch_remove(struct i915_address_space *vm) 2108 { 2109 intel_gmch_remove(); 2110 } 2111 2112 int i915_gem_gtt_init(struct drm_device *dev) 2113 { 2114 struct drm_i915_private *dev_priv = dev->dev_private; 2115 struct i915_gtt *gtt = &dev_priv->gtt; 2116 int ret; 2117 2118 if (INTEL_INFO(dev)->gen <= 5) { 2119 gtt->gtt_probe = i915_gmch_probe; 2120 gtt->base.cleanup = i915_gmch_remove; 2121 } else if (INTEL_INFO(dev)->gen < 8) { 2122 gtt->gtt_probe = gen6_gmch_probe; 2123 gtt->base.cleanup = gen6_gmch_remove; 2124 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2125 gtt->base.pte_encode = iris_pte_encode; 2126 else if (IS_HASWELL(dev)) 2127 gtt->base.pte_encode = hsw_pte_encode; 2128 else if (IS_VALLEYVIEW(dev)) 2129 gtt->base.pte_encode = byt_pte_encode; 2130 else if (INTEL_INFO(dev)->gen >= 7) 2131 gtt->base.pte_encode = ivb_pte_encode; 2132 else 2133 gtt->base.pte_encode = snb_pte_encode; 2134 } else { 2135 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2136 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2137 } 2138 2139 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2140 >t->mappable_base, >t->mappable_end); 2141 if (ret) 2142 return ret; 2143 2144 gtt->base.dev = dev; 2145 2146 /* GMADR is the PCI mmio aperture into the global GTT. */ 2147 DRM_INFO("Memory usable by graphics device = %zdM\n", 2148 gtt->base.total >> 20); 2149 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2150 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2151 #ifdef CONFIG_INTEL_IOMMU 2152 if (intel_iommu_gfx_mapped) 2153 DRM_INFO("VT-d active for gfx access\n"); 2154 #endif 2155 /* 2156 * i915.enable_ppgtt is read-only, so do an early pass to validate the 2157 * user's requested state against the hardware/driver capabilities. We 2158 * do this now so that we can print out any log messages once rather 2159 * than every time we check intel_enable_ppgtt(). 2160 */ 2161 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 2162 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 2163 2164 return 0; 2165 } 2166 2167 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2168 struct i915_address_space *vm) 2169 { 2170 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2171 if (vma == NULL) 2172 return ERR_PTR(-ENOMEM); 2173 2174 INIT_LIST_HEAD(&vma->vma_link); 2175 INIT_LIST_HEAD(&vma->mm_list); 2176 INIT_LIST_HEAD(&vma->exec_list); 2177 vma->vm = vm; 2178 vma->obj = obj; 2179 2180 switch (INTEL_INFO(vm->dev)->gen) { 2181 case 9: 2182 case 8: 2183 case 7: 2184 case 6: 2185 if (i915_is_ggtt(vm)) { 2186 vma->unbind_vma = ggtt_unbind_vma; 2187 vma->bind_vma = ggtt_bind_vma; 2188 } else { 2189 vma->unbind_vma = ppgtt_unbind_vma; 2190 vma->bind_vma = ppgtt_bind_vma; 2191 } 2192 break; 2193 case 5: 2194 case 4: 2195 case 3: 2196 case 2: 2197 BUG_ON(!i915_is_ggtt(vm)); 2198 vma->unbind_vma = i915_ggtt_unbind_vma; 2199 vma->bind_vma = i915_ggtt_bind_vma; 2200 break; 2201 default: 2202 BUG(); 2203 } 2204 2205 /* Keep GGTT vmas first to make debug easier */ 2206 if (i915_is_ggtt(vm)) 2207 list_add(&vma->vma_link, &obj->vma_list); 2208 else { 2209 list_add_tail(&vma->vma_link, &obj->vma_list); 2210 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 2211 } 2212 2213 return vma; 2214 } 2215 2216 struct i915_vma * 2217 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2218 struct i915_address_space *vm) 2219 { 2220 struct i915_vma *vma; 2221 2222 vma = i915_gem_obj_to_vma(obj, vm); 2223 if (!vma) 2224 vma = __i915_gem_vma_create(obj, vm); 2225 2226 return vma; 2227 } 2228