1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Dynamic DMA mapping support for AMD Hammer. 4 * 5 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI. 6 * This allows to use PCI devices that only support 32bit addresses on systems 7 * with more than 4GB. 8 * 9 * See Documentation/core-api/dma-api-howto.rst for the interface specification. 10 * 11 * Copyright 2002 Andi Kleen, SuSE Labs. 12 */ 13 14 #include <linux/types.h> 15 #include <linux/ctype.h> 16 #include <linux/agp_backend.h> 17 #include <linux/init.h> 18 #include <linux/mm.h> 19 #include <linux/sched.h> 20 #include <linux/sched/debug.h> 21 #include <linux/string.h> 22 #include <linux/spinlock.h> 23 #include <linux/pci.h> 24 #include <linux/topology.h> 25 #include <linux/interrupt.h> 26 #include <linux/bitmap.h> 27 #include <linux/kdebug.h> 28 #include <linux/scatterlist.h> 29 #include <linux/iommu-helper.h> 30 #include <linux/syscore_ops.h> 31 #include <linux/io.h> 32 #include <linux/gfp.h> 33 #include <linux/atomic.h> 34 #include <linux/dma-direct.h> 35 #include <linux/dma-map-ops.h> 36 #include <asm/mtrr.h> 37 #include <asm/proto.h> 38 #include <asm/iommu.h> 39 #include <asm/gart.h> 40 #include <asm/set_memory.h> 41 #include <asm/swiotlb.h> 42 #include <asm/dma.h> 43 #include <asm/amd_nb.h> 44 #include <asm/x86_init.h> 45 #include <asm/iommu_table.h> 46 47 static unsigned long iommu_bus_base; /* GART remapping area (physical) */ 48 static unsigned long iommu_size; /* size of remapping area bytes */ 49 static unsigned long iommu_pages; /* .. and in pages */ 50 51 static u32 *iommu_gatt_base; /* Remapping table */ 52 53 /* 54 * If this is disabled the IOMMU will use an optimized flushing strategy 55 * of only flushing when an mapping is reused. With it true the GART is 56 * flushed for every mapping. Problem is that doing the lazy flush seems 57 * to trigger bugs with some popular PCI cards, in particular 3ware (but 58 * has been also also seen with Qlogic at least). 59 */ 60 static int iommu_fullflush = 1; 61 62 /* Allocation bitmap for the remapping area: */ 63 static DEFINE_SPINLOCK(iommu_bitmap_lock); 64 /* Guarded by iommu_bitmap_lock: */ 65 static unsigned long *iommu_gart_bitmap; 66 67 static u32 gart_unmapped_entry; 68 69 #define GPTE_VALID 1 70 #define GPTE_COHERENT 2 71 #define GPTE_ENCODE(x) \ 72 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) 73 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) 74 75 #ifdef CONFIG_AGP 76 #define AGPEXTERN extern 77 #else 78 #define AGPEXTERN 79 #endif 80 81 /* GART can only remap to physical addresses < 1TB */ 82 #define GART_MAX_PHYS_ADDR (1ULL << 40) 83 84 /* backdoor interface to AGP driver */ 85 AGPEXTERN int agp_memory_reserved; 86 AGPEXTERN __u32 *agp_gatt_table; 87 88 static unsigned long next_bit; /* protected by iommu_bitmap_lock */ 89 static bool need_flush; /* global flush state. set for each gart wrap */ 90 91 static unsigned long alloc_iommu(struct device *dev, int size, 92 unsigned long align_mask) 93 { 94 unsigned long offset, flags; 95 unsigned long boundary_size; 96 unsigned long base_index; 97 98 base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), 99 PAGE_SIZE) >> PAGE_SHIFT; 100 boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); 101 102 spin_lock_irqsave(&iommu_bitmap_lock, flags); 103 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, 104 size, base_index, boundary_size, align_mask); 105 if (offset == -1) { 106 need_flush = true; 107 offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, 108 size, base_index, boundary_size, 109 align_mask); 110 } 111 if (offset != -1) { 112 next_bit = offset+size; 113 if (next_bit >= iommu_pages) { 114 next_bit = 0; 115 need_flush = true; 116 } 117 } 118 if (iommu_fullflush) 119 need_flush = true; 120 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 121 122 return offset; 123 } 124 125 static void free_iommu(unsigned long offset, int size) 126 { 127 unsigned long flags; 128 129 spin_lock_irqsave(&iommu_bitmap_lock, flags); 130 bitmap_clear(iommu_gart_bitmap, offset, size); 131 if (offset >= next_bit) 132 next_bit = offset + size; 133 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 134 } 135 136 /* 137 * Use global flush state to avoid races with multiple flushers. 138 */ 139 static void flush_gart(void) 140 { 141 unsigned long flags; 142 143 spin_lock_irqsave(&iommu_bitmap_lock, flags); 144 if (need_flush) { 145 amd_flush_garts(); 146 need_flush = false; 147 } 148 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 149 } 150 151 #ifdef CONFIG_IOMMU_LEAK 152 /* Debugging aid for drivers that don't free their IOMMU tables */ 153 static void dump_leak(void) 154 { 155 static int dump; 156 157 if (dump) 158 return; 159 dump = 1; 160 161 show_stack(NULL, NULL, KERN_ERR); 162 debug_dma_dump_mappings(NULL); 163 } 164 #endif 165 166 static void iommu_full(struct device *dev, size_t size, int dir) 167 { 168 /* 169 * Ran out of IOMMU space for this operation. This is very bad. 170 * Unfortunately the drivers cannot handle this operation properly. 171 * Return some non mapped prereserved space in the aperture and 172 * let the Northbridge deal with it. This will result in garbage 173 * in the IO operation. When the size exceeds the prereserved space 174 * memory corruption will occur or random memory will be DMAed 175 * out. Hopefully no network devices use single mappings that big. 176 */ 177 178 dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); 179 #ifdef CONFIG_IOMMU_LEAK 180 dump_leak(); 181 #endif 182 } 183 184 static inline int 185 need_iommu(struct device *dev, unsigned long addr, size_t size) 186 { 187 return force_iommu || !dma_capable(dev, addr, size, true); 188 } 189 190 static inline int 191 nonforced_iommu(struct device *dev, unsigned long addr, size_t size) 192 { 193 return !dma_capable(dev, addr, size, true); 194 } 195 196 /* Map a single continuous physical area into the IOMMU. 197 * Caller needs to check if the iommu is needed and flush. 198 */ 199 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, 200 size_t size, int dir, unsigned long align_mask) 201 { 202 unsigned long npages = iommu_num_pages(phys_mem, size, PAGE_SIZE); 203 unsigned long iommu_page; 204 int i; 205 206 if (unlikely(phys_mem + size > GART_MAX_PHYS_ADDR)) 207 return DMA_MAPPING_ERROR; 208 209 iommu_page = alloc_iommu(dev, npages, align_mask); 210 if (iommu_page == -1) { 211 if (!nonforced_iommu(dev, phys_mem, size)) 212 return phys_mem; 213 if (panic_on_overflow) 214 panic("dma_map_area overflow %lu bytes\n", size); 215 iommu_full(dev, size, dir); 216 return DMA_MAPPING_ERROR; 217 } 218 219 for (i = 0; i < npages; i++) { 220 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); 221 phys_mem += PAGE_SIZE; 222 } 223 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); 224 } 225 226 /* Map a single area into the IOMMU */ 227 static dma_addr_t gart_map_page(struct device *dev, struct page *page, 228 unsigned long offset, size_t size, 229 enum dma_data_direction dir, 230 unsigned long attrs) 231 { 232 unsigned long bus; 233 phys_addr_t paddr = page_to_phys(page) + offset; 234 235 if (!need_iommu(dev, paddr, size)) 236 return paddr; 237 238 bus = dma_map_area(dev, paddr, size, dir, 0); 239 flush_gart(); 240 241 return bus; 242 } 243 244 /* 245 * Free a DMA mapping. 246 */ 247 static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, 248 size_t size, enum dma_data_direction dir, 249 unsigned long attrs) 250 { 251 unsigned long iommu_page; 252 int npages; 253 int i; 254 255 if (WARN_ON_ONCE(dma_addr == DMA_MAPPING_ERROR)) 256 return; 257 258 /* 259 * This driver will not always use a GART mapping, but might have 260 * created a direct mapping instead. If that is the case there is 261 * nothing to unmap here. 262 */ 263 if (dma_addr < iommu_bus_base || 264 dma_addr >= iommu_bus_base + iommu_size) 265 return; 266 267 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; 268 npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); 269 for (i = 0; i < npages; i++) { 270 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 271 } 272 free_iommu(iommu_page, npages); 273 } 274 275 /* 276 * Wrapper for pci_unmap_single working with scatterlists. 277 */ 278 static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 279 enum dma_data_direction dir, unsigned long attrs) 280 { 281 struct scatterlist *s; 282 int i; 283 284 for_each_sg(sg, s, nents, i) { 285 if (!s->dma_length || !s->length) 286 break; 287 gart_unmap_page(dev, s->dma_address, s->dma_length, dir, 0); 288 } 289 } 290 291 /* Fallback for dma_map_sg in case of overflow */ 292 static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, 293 int nents, int dir) 294 { 295 struct scatterlist *s; 296 int i; 297 298 #ifdef CONFIG_IOMMU_DEBUG 299 pr_debug("dma_map_sg overflow\n"); 300 #endif 301 302 for_each_sg(sg, s, nents, i) { 303 unsigned long addr = sg_phys(s); 304 305 if (nonforced_iommu(dev, addr, s->length)) { 306 addr = dma_map_area(dev, addr, s->length, dir, 0); 307 if (addr == DMA_MAPPING_ERROR) { 308 if (i > 0) 309 gart_unmap_sg(dev, sg, i, dir, 0); 310 nents = 0; 311 sg[0].dma_length = 0; 312 break; 313 } 314 } 315 s->dma_address = addr; 316 s->dma_length = s->length; 317 } 318 flush_gart(); 319 320 return nents; 321 } 322 323 /* Map multiple scatterlist entries continuous into the first. */ 324 static int __dma_map_cont(struct device *dev, struct scatterlist *start, 325 int nelems, struct scatterlist *sout, 326 unsigned long pages) 327 { 328 unsigned long iommu_start = alloc_iommu(dev, pages, 0); 329 unsigned long iommu_page = iommu_start; 330 struct scatterlist *s; 331 int i; 332 333 if (iommu_start == -1) 334 return -1; 335 336 for_each_sg(start, s, nelems, i) { 337 unsigned long pages, addr; 338 unsigned long phys_addr = s->dma_address; 339 340 BUG_ON(s != start && s->offset); 341 if (s == start) { 342 sout->dma_address = iommu_bus_base; 343 sout->dma_address += iommu_page*PAGE_SIZE + s->offset; 344 sout->dma_length = s->length; 345 } else { 346 sout->dma_length += s->length; 347 } 348 349 addr = phys_addr; 350 pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); 351 while (pages--) { 352 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); 353 addr += PAGE_SIZE; 354 iommu_page++; 355 } 356 } 357 BUG_ON(iommu_page - iommu_start != pages); 358 359 return 0; 360 } 361 362 static inline int 363 dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, 364 struct scatterlist *sout, unsigned long pages, int need) 365 { 366 if (!need) { 367 BUG_ON(nelems != 1); 368 sout->dma_address = start->dma_address; 369 sout->dma_length = start->length; 370 return 0; 371 } 372 return __dma_map_cont(dev, start, nelems, sout, pages); 373 } 374 375 /* 376 * DMA map all entries in a scatterlist. 377 * Merge chunks that have page aligned sizes into a continuous mapping. 378 */ 379 static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, 380 enum dma_data_direction dir, unsigned long attrs) 381 { 382 struct scatterlist *s, *ps, *start_sg, *sgmap; 383 int need = 0, nextneed, i, out, start; 384 unsigned long pages = 0; 385 unsigned int seg_size; 386 unsigned int max_seg_size; 387 388 if (nents == 0) 389 return 0; 390 391 out = 0; 392 start = 0; 393 start_sg = sg; 394 sgmap = sg; 395 seg_size = 0; 396 max_seg_size = dma_get_max_seg_size(dev); 397 ps = NULL; /* shut up gcc */ 398 399 for_each_sg(sg, s, nents, i) { 400 dma_addr_t addr = sg_phys(s); 401 402 s->dma_address = addr; 403 BUG_ON(s->length == 0); 404 405 nextneed = need_iommu(dev, addr, s->length); 406 407 /* Handle the previous not yet processed entries */ 408 if (i > start) { 409 /* 410 * Can only merge when the last chunk ends on a 411 * page boundary and the new one doesn't have an 412 * offset. 413 */ 414 if (!iommu_merge || !nextneed || !need || s->offset || 415 (s->length + seg_size > max_seg_size) || 416 (ps->offset + ps->length) % PAGE_SIZE) { 417 if (dma_map_cont(dev, start_sg, i - start, 418 sgmap, pages, need) < 0) 419 goto error; 420 out++; 421 422 seg_size = 0; 423 sgmap = sg_next(sgmap); 424 pages = 0; 425 start = i; 426 start_sg = s; 427 } 428 } 429 430 seg_size += s->length; 431 need = nextneed; 432 pages += iommu_num_pages(s->offset, s->length, PAGE_SIZE); 433 ps = s; 434 } 435 if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) 436 goto error; 437 out++; 438 flush_gart(); 439 if (out < nents) { 440 sgmap = sg_next(sgmap); 441 sgmap->dma_length = 0; 442 } 443 return out; 444 445 error: 446 flush_gart(); 447 gart_unmap_sg(dev, sg, out, dir, 0); 448 449 /* When it was forced or merged try again in a dumb way */ 450 if (force_iommu || iommu_merge) { 451 out = dma_map_sg_nonforce(dev, sg, nents, dir); 452 if (out > 0) 453 return out; 454 } 455 if (panic_on_overflow) 456 panic("dma_map_sg: overflow on %lu pages\n", pages); 457 458 iommu_full(dev, pages << PAGE_SHIFT, dir); 459 for_each_sg(sg, s, nents, i) 460 s->dma_address = DMA_MAPPING_ERROR; 461 return 0; 462 } 463 464 /* allocate and map a coherent mapping */ 465 static void * 466 gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, 467 gfp_t flag, unsigned long attrs) 468 { 469 void *vaddr; 470 471 vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); 472 if (!vaddr || 473 !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) 474 return vaddr; 475 476 *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size, 477 DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1); 478 flush_gart(); 479 if (unlikely(*dma_addr == DMA_MAPPING_ERROR)) 480 goto out_free; 481 return vaddr; 482 out_free: 483 dma_direct_free(dev, size, vaddr, *dma_addr, attrs); 484 return NULL; 485 } 486 487 /* free a coherent mapping */ 488 static void 489 gart_free_coherent(struct device *dev, size_t size, void *vaddr, 490 dma_addr_t dma_addr, unsigned long attrs) 491 { 492 gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); 493 dma_direct_free(dev, size, vaddr, dma_addr, attrs); 494 } 495 496 static int no_agp; 497 498 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) 499 { 500 unsigned long a; 501 502 if (!iommu_size) { 503 iommu_size = aper_size; 504 if (!no_agp) 505 iommu_size /= 2; 506 } 507 508 a = aper + iommu_size; 509 iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; 510 511 if (iommu_size < 64*1024*1024) { 512 pr_warn("PCI-DMA: Warning: Small IOMMU %luMB." 513 " Consider increasing the AGP aperture in BIOS\n", 514 iommu_size >> 20); 515 } 516 517 return iommu_size; 518 } 519 520 static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) 521 { 522 unsigned aper_size = 0, aper_base_32, aper_order; 523 u64 aper_base; 524 525 pci_read_config_dword(dev, AMD64_GARTAPERTUREBASE, &aper_base_32); 526 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &aper_order); 527 aper_order = (aper_order >> 1) & 7; 528 529 aper_base = aper_base_32 & 0x7fff; 530 aper_base <<= 25; 531 532 aper_size = (32 * 1024 * 1024) << aper_order; 533 if (aper_base + aper_size > 0x100000000UL || !aper_size) 534 aper_base = 0; 535 536 *size = aper_size; 537 return aper_base; 538 } 539 540 static void enable_gart_translations(void) 541 { 542 int i; 543 544 if (!amd_nb_has_feature(AMD_NB_GART)) 545 return; 546 547 for (i = 0; i < amd_nb_num(); i++) { 548 struct pci_dev *dev = node_to_amd_nb(i)->misc; 549 550 enable_gart_translation(dev, __pa(agp_gatt_table)); 551 } 552 553 /* Flush the GART-TLB to remove stale entries */ 554 amd_flush_garts(); 555 } 556 557 /* 558 * If fix_up_north_bridges is set, the north bridges have to be fixed up on 559 * resume in the same way as they are handled in gart_iommu_hole_init(). 560 */ 561 static bool fix_up_north_bridges; 562 static u32 aperture_order; 563 static u32 aperture_alloc; 564 565 void set_up_gart_resume(u32 aper_order, u32 aper_alloc) 566 { 567 fix_up_north_bridges = true; 568 aperture_order = aper_order; 569 aperture_alloc = aper_alloc; 570 } 571 572 static void gart_fixup_northbridges(void) 573 { 574 int i; 575 576 if (!fix_up_north_bridges) 577 return; 578 579 if (!amd_nb_has_feature(AMD_NB_GART)) 580 return; 581 582 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 583 584 for (i = 0; i < amd_nb_num(); i++) { 585 struct pci_dev *dev = node_to_amd_nb(i)->misc; 586 587 /* 588 * Don't enable translations just yet. That is the next 589 * step. Restore the pre-suspend aperture settings. 590 */ 591 gart_set_size_and_enable(dev, aperture_order); 592 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); 593 } 594 } 595 596 static void gart_resume(void) 597 { 598 pr_info("PCI-DMA: Resuming GART IOMMU\n"); 599 600 gart_fixup_northbridges(); 601 602 enable_gart_translations(); 603 } 604 605 static struct syscore_ops gart_syscore_ops = { 606 .resume = gart_resume, 607 608 }; 609 610 /* 611 * Private Northbridge GATT initialization in case we cannot use the 612 * AGP driver for some reason. 613 */ 614 static __init int init_amd_gatt(struct agp_kern_info *info) 615 { 616 unsigned aper_size, gatt_size, new_aper_size; 617 unsigned aper_base, new_aper_base; 618 struct pci_dev *dev; 619 void *gatt; 620 int i; 621 622 pr_info("PCI-DMA: Disabling AGP.\n"); 623 624 aper_size = aper_base = info->aper_size = 0; 625 dev = NULL; 626 for (i = 0; i < amd_nb_num(); i++) { 627 dev = node_to_amd_nb(i)->misc; 628 new_aper_base = read_aperture(dev, &new_aper_size); 629 if (!new_aper_base) 630 goto nommu; 631 632 if (!aper_base) { 633 aper_size = new_aper_size; 634 aper_base = new_aper_base; 635 } 636 if (aper_size != new_aper_size || aper_base != new_aper_base) 637 goto nommu; 638 } 639 if (!aper_base) 640 goto nommu; 641 642 info->aper_base = aper_base; 643 info->aper_size = aper_size >> 20; 644 645 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); 646 gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 647 get_order(gatt_size)); 648 if (!gatt) 649 panic("Cannot allocate GATT table"); 650 if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) 651 panic("Could not set GART PTEs to uncacheable pages"); 652 653 agp_gatt_table = gatt; 654 655 register_syscore_ops(&gart_syscore_ops); 656 657 flush_gart(); 658 659 pr_info("PCI-DMA: aperture base @ %x size %u KB\n", 660 aper_base, aper_size>>10); 661 662 return 0; 663 664 nommu: 665 /* Should not happen anymore */ 666 pr_warn("PCI-DMA: More than 4GB of RAM and no IOMMU - falling back to iommu=soft.\n"); 667 return -1; 668 } 669 670 static const struct dma_map_ops gart_dma_ops = { 671 .map_sg = gart_map_sg, 672 .unmap_sg = gart_unmap_sg, 673 .map_page = gart_map_page, 674 .unmap_page = gart_unmap_page, 675 .alloc = gart_alloc_coherent, 676 .free = gart_free_coherent, 677 .mmap = dma_common_mmap, 678 .get_sgtable = dma_common_get_sgtable, 679 .dma_supported = dma_direct_supported, 680 .get_required_mask = dma_direct_get_required_mask, 681 .alloc_pages = dma_direct_alloc_pages, 682 .free_pages = dma_direct_free_pages, 683 }; 684 685 static void gart_iommu_shutdown(void) 686 { 687 struct pci_dev *dev; 688 int i; 689 690 /* don't shutdown it if there is AGP installed */ 691 if (!no_agp) 692 return; 693 694 if (!amd_nb_has_feature(AMD_NB_GART)) 695 return; 696 697 for (i = 0; i < amd_nb_num(); i++) { 698 u32 ctl; 699 700 dev = node_to_amd_nb(i)->misc; 701 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 702 703 ctl &= ~GARTEN; 704 705 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl); 706 } 707 } 708 709 int __init gart_iommu_init(void) 710 { 711 struct agp_kern_info info; 712 unsigned long iommu_start; 713 unsigned long aper_base, aper_size; 714 unsigned long start_pfn, end_pfn; 715 unsigned long scratch; 716 717 if (!amd_nb_has_feature(AMD_NB_GART)) 718 return 0; 719 720 #ifndef CONFIG_AGP_AMD64 721 no_agp = 1; 722 #else 723 /* Makefile puts PCI initialization via subsys_initcall first. */ 724 /* Add other AMD AGP bridge drivers here */ 725 no_agp = no_agp || 726 (agp_amd64_init() < 0) || 727 (agp_copy_info(agp_bridge, &info) < 0); 728 #endif 729 730 if (no_iommu || 731 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 732 !gart_iommu_aperture || 733 (no_agp && init_amd_gatt(&info) < 0)) { 734 if (max_pfn > MAX_DMA32_PFN) { 735 pr_warn("More than 4GB of memory but GART IOMMU not available.\n"); 736 pr_warn("falling back to iommu=soft.\n"); 737 } 738 return 0; 739 } 740 741 /* need to map that range */ 742 aper_size = info.aper_size << 20; 743 aper_base = info.aper_base; 744 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); 745 746 start_pfn = PFN_DOWN(aper_base); 747 if (!pfn_range_is_mapped(start_pfn, end_pfn)) 748 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT, 749 PAGE_KERNEL); 750 751 pr_info("PCI-DMA: using GART IOMMU.\n"); 752 iommu_size = check_iommu_size(info.aper_base, aper_size); 753 iommu_pages = iommu_size >> PAGE_SHIFT; 754 755 iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, 756 get_order(iommu_pages/8)); 757 if (!iommu_gart_bitmap) 758 panic("Cannot allocate iommu bitmap\n"); 759 760 pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", 761 iommu_size >> 20); 762 763 agp_memory_reserved = iommu_size; 764 iommu_start = aper_size - iommu_size; 765 iommu_bus_base = info.aper_base + iommu_start; 766 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); 767 768 /* 769 * Unmap the IOMMU part of the GART. The alias of the page is 770 * always mapped with cache enabled and there is no full cache 771 * coherency across the GART remapping. The unmapping avoids 772 * automatic prefetches from the CPU allocating cache lines in 773 * there. All CPU accesses are done via the direct mapping to 774 * the backing memory. The GART address is only used by PCI 775 * devices. 776 */ 777 set_memory_np((unsigned long)__va(iommu_bus_base), 778 iommu_size >> PAGE_SHIFT); 779 /* 780 * Tricky. The GART table remaps the physical memory range, 781 * so the CPU wont notice potential aliases and if the memory 782 * is remapped to UC later on, we might surprise the PCI devices 783 * with a stray writeout of a cacheline. So play it sure and 784 * do an explicit, full-scale wbinvd() _after_ having marked all 785 * the pages as Not-Present: 786 */ 787 wbinvd(); 788 789 /* 790 * Now all caches are flushed and we can safely enable 791 * GART hardware. Doing it early leaves the possibility 792 * of stale cache entries that can lead to GART PTE 793 * errors. 794 */ 795 enable_gart_translations(); 796 797 /* 798 * Try to workaround a bug (thanks to BenH): 799 * Set unmapped entries to a scratch page instead of 0. 800 * Any prefetches that hit unmapped entries won't get an bus abort 801 * then. (P2P bridge may be prefetching on DMA reads). 802 */ 803 scratch = get_zeroed_page(GFP_KERNEL); 804 if (!scratch) 805 panic("Cannot allocate iommu scratch page"); 806 gart_unmapped_entry = GPTE_ENCODE(__pa(scratch)); 807 808 flush_gart(); 809 dma_ops = &gart_dma_ops; 810 x86_platform.iommu_shutdown = gart_iommu_shutdown; 811 swiotlb = 0; 812 813 return 0; 814 } 815 816 void __init gart_parse_options(char *p) 817 { 818 int arg; 819 820 if (isdigit(*p) && get_option(&p, &arg)) 821 iommu_size = arg; 822 if (!strncmp(p, "fullflush", 9)) 823 iommu_fullflush = 1; 824 if (!strncmp(p, "nofullflush", 11)) 825 iommu_fullflush = 0; 826 if (!strncmp(p, "noagp", 5)) 827 no_agp = 1; 828 if (!strncmp(p, "noaperture", 10)) 829 fix_aperture = 0; 830 /* duplicated from pci-dma.c */ 831 if (!strncmp(p, "force", 5)) 832 gart_iommu_aperture_allowed = 1; 833 if (!strncmp(p, "allowed", 7)) 834 gart_iommu_aperture_allowed = 1; 835 if (!strncmp(p, "memaper", 7)) { 836 fallback_aper_force = 1; 837 p += 7; 838 if (*p == '=') { 839 ++p; 840 if (get_option(&p, &arg)) 841 fallback_aper_order = arg; 842 } 843 } 844 } 845 IOMMU_INIT_POST(gart_iommu_hole_init); 846