1 /* 2 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation 3 * 4 * Rewrite, cleanup, new allocation schemes, virtual merging: 5 * Copyright (C) 2004 Olof Johansson, IBM Corporation 6 * and Ben. Herrenschmidt, IBM Corporation 7 * 8 * Dynamic DMA mapping support, bus-independent parts. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 */ 24 25 26 #include <linux/init.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/mm.h> 30 #include <linux/spinlock.h> 31 #include <linux/string.h> 32 #include <linux/dma-mapping.h> 33 #include <linux/bitmap.h> 34 #include <linux/iommu-helper.h> 35 #include <linux/crash_dump.h> 36 #include <linux/hash.h> 37 #include <linux/fault-inject.h> 38 #include <linux/pci.h> 39 #include <asm/io.h> 40 #include <asm/prom.h> 41 #include <asm/iommu.h> 42 #include <asm/pci-bridge.h> 43 #include <asm/machdep.h> 44 #include <asm/kdump.h> 45 #include <asm/fadump.h> 46 #include <asm/vio.h> 47 48 #define DBG(...) 49 50 static int novmerge; 51 52 static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); 53 54 static int __init setup_iommu(char *str) 55 { 56 if (!strcmp(str, "novmerge")) 57 novmerge = 1; 58 else if (!strcmp(str, "vmerge")) 59 novmerge = 0; 60 return 1; 61 } 62 63 __setup("iommu=", setup_iommu); 64 65 static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); 66 67 /* 68 * We precalculate the hash to avoid doing it on every allocation. 69 * 70 * The hash is important to spread CPUs across all the pools. For example, 71 * on a POWER7 with 4 way SMT we want interrupts on the primary threads and 72 * with 4 pools all primary threads would map to the same pool. 73 */ 74 static int __init setup_iommu_pool_hash(void) 75 { 76 unsigned int i; 77 78 for_each_possible_cpu(i) 79 per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); 80 81 return 0; 82 } 83 subsys_initcall(setup_iommu_pool_hash); 84 85 #ifdef CONFIG_FAIL_IOMMU 86 87 static DECLARE_FAULT_ATTR(fail_iommu); 88 89 static int __init setup_fail_iommu(char *str) 90 { 91 return setup_fault_attr(&fail_iommu, str); 92 } 93 __setup("fail_iommu=", setup_fail_iommu); 94 95 static bool should_fail_iommu(struct device *dev) 96 { 97 return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1); 98 } 99 100 static int __init fail_iommu_debugfs(void) 101 { 102 struct dentry *dir = fault_create_debugfs_attr("fail_iommu", 103 NULL, &fail_iommu); 104 105 return IS_ERR(dir) ? PTR_ERR(dir) : 0; 106 } 107 late_initcall(fail_iommu_debugfs); 108 109 static ssize_t fail_iommu_show(struct device *dev, 110 struct device_attribute *attr, char *buf) 111 { 112 return sprintf(buf, "%d\n", dev->archdata.fail_iommu); 113 } 114 115 static ssize_t fail_iommu_store(struct device *dev, 116 struct device_attribute *attr, const char *buf, 117 size_t count) 118 { 119 int i; 120 121 if (count > 0 && sscanf(buf, "%d", &i) > 0) 122 dev->archdata.fail_iommu = (i == 0) ? 0 : 1; 123 124 return count; 125 } 126 127 static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show, 128 fail_iommu_store); 129 130 static int fail_iommu_bus_notify(struct notifier_block *nb, 131 unsigned long action, void *data) 132 { 133 struct device *dev = data; 134 135 if (action == BUS_NOTIFY_ADD_DEVICE) { 136 if (device_create_file(dev, &dev_attr_fail_iommu)) 137 pr_warn("Unable to create IOMMU fault injection sysfs " 138 "entries\n"); 139 } else if (action == BUS_NOTIFY_DEL_DEVICE) { 140 device_remove_file(dev, &dev_attr_fail_iommu); 141 } 142 143 return 0; 144 } 145 146 static struct notifier_block fail_iommu_bus_notifier = { 147 .notifier_call = fail_iommu_bus_notify 148 }; 149 150 static int __init fail_iommu_setup(void) 151 { 152 #ifdef CONFIG_PCI 153 bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); 154 #endif 155 #ifdef CONFIG_IBMVIO 156 bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); 157 #endif 158 159 return 0; 160 } 161 /* 162 * Must execute after PCI and VIO subsystem have initialised but before 163 * devices are probed. 164 */ 165 arch_initcall(fail_iommu_setup); 166 #else 167 static inline bool should_fail_iommu(struct device *dev) 168 { 169 return false; 170 } 171 #endif 172 173 static unsigned long iommu_range_alloc(struct device *dev, 174 struct iommu_table *tbl, 175 unsigned long npages, 176 unsigned long *handle, 177 unsigned long mask, 178 unsigned int align_order) 179 { 180 unsigned long n, end, start; 181 unsigned long limit; 182 int largealloc = npages > 15; 183 int pass = 0; 184 unsigned long align_mask; 185 unsigned long boundary_size; 186 unsigned long flags; 187 unsigned int pool_nr; 188 struct iommu_pool *pool; 189 190 align_mask = 0xffffffffffffffffl >> (64 - align_order); 191 192 /* This allocator was derived from x86_64's bit string search */ 193 194 /* Sanity check */ 195 if (unlikely(npages == 0)) { 196 if (printk_ratelimit()) 197 WARN_ON(1); 198 return DMA_ERROR_CODE; 199 } 200 201 if (should_fail_iommu(dev)) 202 return DMA_ERROR_CODE; 203 204 /* 205 * We don't need to disable preemption here because any CPU can 206 * safely use any IOMMU pool. 207 */ 208 pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); 209 210 if (largealloc) 211 pool = &(tbl->large_pool); 212 else 213 pool = &(tbl->pools[pool_nr]); 214 215 spin_lock_irqsave(&(pool->lock), flags); 216 217 again: 218 if ((pass == 0) && handle && *handle && 219 (*handle >= pool->start) && (*handle < pool->end)) 220 start = *handle; 221 else 222 start = pool->hint; 223 224 limit = pool->end; 225 226 /* The case below can happen if we have a small segment appended 227 * to a large, or when the previous alloc was at the very end of 228 * the available space. If so, go back to the initial start. 229 */ 230 if (start >= limit) 231 start = pool->start; 232 233 if (limit + tbl->it_offset > mask) { 234 limit = mask - tbl->it_offset + 1; 235 /* If we're constrained on address range, first try 236 * at the masked hint to avoid O(n) search complexity, 237 * but on second pass, start at 0 in pool 0. 238 */ 239 if ((start & mask) >= limit || pass > 0) { 240 spin_unlock(&(pool->lock)); 241 pool = &(tbl->pools[0]); 242 spin_lock(&(pool->lock)); 243 start = pool->start; 244 } else { 245 start &= mask; 246 } 247 } 248 249 if (dev) 250 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 251 1 << IOMMU_PAGE_SHIFT); 252 else 253 boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); 254 /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ 255 256 n = iommu_area_alloc(tbl->it_map, limit, start, npages, 257 tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, 258 align_mask); 259 if (n == -1) { 260 if (likely(pass == 0)) { 261 /* First try the pool from the start */ 262 pool->hint = pool->start; 263 pass++; 264 goto again; 265 266 } else if (pass <= tbl->nr_pools) { 267 /* Now try scanning all the other pools */ 268 spin_unlock(&(pool->lock)); 269 pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1); 270 pool = &tbl->pools[pool_nr]; 271 spin_lock(&(pool->lock)); 272 pool->hint = pool->start; 273 pass++; 274 goto again; 275 276 } else { 277 /* Give up */ 278 spin_unlock_irqrestore(&(pool->lock), flags); 279 return DMA_ERROR_CODE; 280 } 281 } 282 283 end = n + npages; 284 285 /* Bump the hint to a new block for small allocs. */ 286 if (largealloc) { 287 /* Don't bump to new block to avoid fragmentation */ 288 pool->hint = end; 289 } else { 290 /* Overflow will be taken care of at the next allocation */ 291 pool->hint = (end + tbl->it_blocksize - 1) & 292 ~(tbl->it_blocksize - 1); 293 } 294 295 /* Update handle for SG allocations */ 296 if (handle) 297 *handle = end; 298 299 spin_unlock_irqrestore(&(pool->lock), flags); 300 301 return n; 302 } 303 304 static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, 305 void *page, unsigned int npages, 306 enum dma_data_direction direction, 307 unsigned long mask, unsigned int align_order, 308 struct dma_attrs *attrs) 309 { 310 unsigned long entry; 311 dma_addr_t ret = DMA_ERROR_CODE; 312 int build_fail; 313 314 entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); 315 316 if (unlikely(entry == DMA_ERROR_CODE)) 317 return DMA_ERROR_CODE; 318 319 entry += tbl->it_offset; /* Offset into real TCE table */ 320 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ 321 322 /* Put the TCEs in the HW table */ 323 build_fail = ppc_md.tce_build(tbl, entry, npages, 324 (unsigned long)page & IOMMU_PAGE_MASK, 325 direction, attrs); 326 327 /* ppc_md.tce_build() only returns non-zero for transient errors. 328 * Clean up the table bitmap in this case and return 329 * DMA_ERROR_CODE. For all other errors the functionality is 330 * not altered. 331 */ 332 if (unlikely(build_fail)) { 333 __iommu_free(tbl, ret, npages); 334 return DMA_ERROR_CODE; 335 } 336 337 /* Flush/invalidate TLB caches if necessary */ 338 if (ppc_md.tce_flush) 339 ppc_md.tce_flush(tbl); 340 341 /* Make sure updates are seen by hardware */ 342 mb(); 343 344 return ret; 345 } 346 347 static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, 348 unsigned int npages) 349 { 350 unsigned long entry, free_entry; 351 352 entry = dma_addr >> IOMMU_PAGE_SHIFT; 353 free_entry = entry - tbl->it_offset; 354 355 if (((free_entry + npages) > tbl->it_size) || 356 (entry < tbl->it_offset)) { 357 if (printk_ratelimit()) { 358 printk(KERN_INFO "iommu_free: invalid entry\n"); 359 printk(KERN_INFO "\tentry = 0x%lx\n", entry); 360 printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr); 361 printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl); 362 printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno); 363 printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size); 364 printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset); 365 printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); 366 WARN_ON(1); 367 } 368 369 return false; 370 } 371 372 return true; 373 } 374 375 static struct iommu_pool *get_pool(struct iommu_table *tbl, 376 unsigned long entry) 377 { 378 struct iommu_pool *p; 379 unsigned long largepool_start = tbl->large_pool.start; 380 381 /* The large pool is the last pool at the top of the table */ 382 if (entry >= largepool_start) { 383 p = &tbl->large_pool; 384 } else { 385 unsigned int pool_nr = entry / tbl->poolsize; 386 387 BUG_ON(pool_nr > tbl->nr_pools); 388 p = &tbl->pools[pool_nr]; 389 } 390 391 return p; 392 } 393 394 static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 395 unsigned int npages) 396 { 397 unsigned long entry, free_entry; 398 unsigned long flags; 399 struct iommu_pool *pool; 400 401 entry = dma_addr >> IOMMU_PAGE_SHIFT; 402 free_entry = entry - tbl->it_offset; 403 404 pool = get_pool(tbl, free_entry); 405 406 if (!iommu_free_check(tbl, dma_addr, npages)) 407 return; 408 409 ppc_md.tce_free(tbl, entry, npages); 410 411 spin_lock_irqsave(&(pool->lock), flags); 412 bitmap_clear(tbl->it_map, free_entry, npages); 413 spin_unlock_irqrestore(&(pool->lock), flags); 414 } 415 416 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 417 unsigned int npages) 418 { 419 __iommu_free(tbl, dma_addr, npages); 420 421 /* Make sure TLB cache is flushed if the HW needs it. We do 422 * not do an mb() here on purpose, it is not needed on any of 423 * the current platforms. 424 */ 425 if (ppc_md.tce_flush) 426 ppc_md.tce_flush(tbl); 427 } 428 429 int iommu_map_sg(struct device *dev, struct iommu_table *tbl, 430 struct scatterlist *sglist, int nelems, 431 unsigned long mask, enum dma_data_direction direction, 432 struct dma_attrs *attrs) 433 { 434 dma_addr_t dma_next = 0, dma_addr; 435 struct scatterlist *s, *outs, *segstart; 436 int outcount, incount, i, build_fail = 0; 437 unsigned int align; 438 unsigned long handle; 439 unsigned int max_seg_size; 440 441 BUG_ON(direction == DMA_NONE); 442 443 if ((nelems == 0) || !tbl) 444 return 0; 445 446 outs = s = segstart = &sglist[0]; 447 outcount = 1; 448 incount = nelems; 449 handle = 0; 450 451 /* Init first segment length for backout at failure */ 452 outs->dma_length = 0; 453 454 DBG("sg mapping %d elements:\n", nelems); 455 456 max_seg_size = dma_get_max_seg_size(dev); 457 for_each_sg(sglist, s, nelems, i) { 458 unsigned long vaddr, npages, entry, slen; 459 460 slen = s->length; 461 /* Sanity check */ 462 if (slen == 0) { 463 dma_next = 0; 464 continue; 465 } 466 /* Allocate iommu entries for that segment */ 467 vaddr = (unsigned long) sg_virt(s); 468 npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); 469 align = 0; 470 if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && 471 (vaddr & ~PAGE_MASK) == 0) 472 align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; 473 entry = iommu_range_alloc(dev, tbl, npages, &handle, 474 mask >> IOMMU_PAGE_SHIFT, align); 475 476 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); 477 478 /* Handle failure */ 479 if (unlikely(entry == DMA_ERROR_CODE)) { 480 if (printk_ratelimit()) 481 dev_info(dev, "iommu_alloc failed, tbl %p " 482 "vaddr %lx npages %lu\n", tbl, vaddr, 483 npages); 484 goto failure; 485 } 486 487 /* Convert entry to a dma_addr_t */ 488 entry += tbl->it_offset; 489 dma_addr = entry << IOMMU_PAGE_SHIFT; 490 dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); 491 492 DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", 493 npages, entry, dma_addr); 494 495 /* Insert into HW table */ 496 build_fail = ppc_md.tce_build(tbl, entry, npages, 497 vaddr & IOMMU_PAGE_MASK, 498 direction, attrs); 499 if(unlikely(build_fail)) 500 goto failure; 501 502 /* If we are in an open segment, try merging */ 503 if (segstart != s) { 504 DBG(" - trying merge...\n"); 505 /* We cannot merge if: 506 * - allocated dma_addr isn't contiguous to previous allocation 507 */ 508 if (novmerge || (dma_addr != dma_next) || 509 (outs->dma_length + s->length > max_seg_size)) { 510 /* Can't merge: create a new segment */ 511 segstart = s; 512 outcount++; 513 outs = sg_next(outs); 514 DBG(" can't merge, new segment.\n"); 515 } else { 516 outs->dma_length += s->length; 517 DBG(" merged, new len: %ux\n", outs->dma_length); 518 } 519 } 520 521 if (segstart == s) { 522 /* This is a new segment, fill entries */ 523 DBG(" - filling new segment.\n"); 524 outs->dma_address = dma_addr; 525 outs->dma_length = slen; 526 } 527 528 /* Calculate next page pointer for contiguous check */ 529 dma_next = dma_addr + slen; 530 531 DBG(" - dma next is: %lx\n", dma_next); 532 } 533 534 /* Flush/invalidate TLB caches if necessary */ 535 if (ppc_md.tce_flush) 536 ppc_md.tce_flush(tbl); 537 538 DBG("mapped %d elements:\n", outcount); 539 540 /* For the sake of iommu_unmap_sg, we clear out the length in the 541 * next entry of the sglist if we didn't fill the list completely 542 */ 543 if (outcount < incount) { 544 outs = sg_next(outs); 545 outs->dma_address = DMA_ERROR_CODE; 546 outs->dma_length = 0; 547 } 548 549 /* Make sure updates are seen by hardware */ 550 mb(); 551 552 return outcount; 553 554 failure: 555 for_each_sg(sglist, s, nelems, i) { 556 if (s->dma_length != 0) { 557 unsigned long vaddr, npages; 558 559 vaddr = s->dma_address & IOMMU_PAGE_MASK; 560 npages = iommu_num_pages(s->dma_address, s->dma_length, 561 IOMMU_PAGE_SIZE); 562 __iommu_free(tbl, vaddr, npages); 563 s->dma_address = DMA_ERROR_CODE; 564 s->dma_length = 0; 565 } 566 if (s == outs) 567 break; 568 } 569 return 0; 570 } 571 572 573 void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, 574 int nelems, enum dma_data_direction direction, 575 struct dma_attrs *attrs) 576 { 577 struct scatterlist *sg; 578 579 BUG_ON(direction == DMA_NONE); 580 581 if (!tbl) 582 return; 583 584 sg = sglist; 585 while (nelems--) { 586 unsigned int npages; 587 dma_addr_t dma_handle = sg->dma_address; 588 589 if (sg->dma_length == 0) 590 break; 591 npages = iommu_num_pages(dma_handle, sg->dma_length, 592 IOMMU_PAGE_SIZE); 593 __iommu_free(tbl, dma_handle, npages); 594 sg = sg_next(sg); 595 } 596 597 /* Flush/invalidate TLBs if necessary. As for iommu_free(), we 598 * do not do an mb() here, the affected platforms do not need it 599 * when freeing. 600 */ 601 if (ppc_md.tce_flush) 602 ppc_md.tce_flush(tbl); 603 } 604 605 static void iommu_table_clear(struct iommu_table *tbl) 606 { 607 /* 608 * In case of firmware assisted dump system goes through clean 609 * reboot process at the time of system crash. Hence it's safe to 610 * clear the TCE entries if firmware assisted dump is active. 611 */ 612 if (!is_kdump_kernel() || is_fadump_active()) { 613 /* Clear the table in case firmware left allocations in it */ 614 ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); 615 return; 616 } 617 618 #ifdef CONFIG_CRASH_DUMP 619 if (ppc_md.tce_get) { 620 unsigned long index, tceval, tcecount = 0; 621 622 /* Reserve the existing mappings left by the first kernel. */ 623 for (index = 0; index < tbl->it_size; index++) { 624 tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); 625 /* 626 * Freed TCE entry contains 0x7fffffffffffffff on JS20 627 */ 628 if (tceval && (tceval != 0x7fffffffffffffffUL)) { 629 __set_bit(index, tbl->it_map); 630 tcecount++; 631 } 632 } 633 634 if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) { 635 printk(KERN_WARNING "TCE table is full; freeing "); 636 printk(KERN_WARNING "%d entries for the kdump boot\n", 637 KDUMP_MIN_TCE_ENTRIES); 638 for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES; 639 index < tbl->it_size; index++) 640 __clear_bit(index, tbl->it_map); 641 } 642 } 643 #endif 644 } 645 646 /* 647 * Build a iommu_table structure. This contains a bit map which 648 * is used to manage allocation of the tce space. 649 */ 650 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) 651 { 652 unsigned long sz; 653 static int welcomed = 0; 654 struct page *page; 655 unsigned int i; 656 struct iommu_pool *p; 657 658 /* number of bytes needed for the bitmap */ 659 sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); 660 661 page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); 662 if (!page) 663 panic("iommu_init_table: Can't allocate %ld bytes\n", sz); 664 tbl->it_map = page_address(page); 665 memset(tbl->it_map, 0, sz); 666 667 /* 668 * Reserve page 0 so it will not be used for any mappings. 669 * This avoids buggy drivers that consider page 0 to be invalid 670 * to crash the machine or even lose data. 671 */ 672 if (tbl->it_offset == 0) 673 set_bit(0, tbl->it_map); 674 675 /* We only split the IOMMU table if we have 1GB or more of space */ 676 if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) 677 tbl->nr_pools = IOMMU_NR_POOLS; 678 else 679 tbl->nr_pools = 1; 680 681 /* We reserve the top 1/4 of the table for large allocations */ 682 tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools; 683 684 for (i = 0; i < tbl->nr_pools; i++) { 685 p = &tbl->pools[i]; 686 spin_lock_init(&(p->lock)); 687 p->start = tbl->poolsize * i; 688 p->hint = p->start; 689 p->end = p->start + tbl->poolsize; 690 } 691 692 p = &tbl->large_pool; 693 spin_lock_init(&(p->lock)); 694 p->start = tbl->poolsize * i; 695 p->hint = p->start; 696 p->end = tbl->it_size; 697 698 iommu_table_clear(tbl); 699 700 if (!welcomed) { 701 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", 702 novmerge ? "disabled" : "enabled"); 703 welcomed = 1; 704 } 705 706 return tbl; 707 } 708 709 void iommu_free_table(struct iommu_table *tbl, const char *node_name) 710 { 711 unsigned long bitmap_sz; 712 unsigned int order; 713 714 if (!tbl || !tbl->it_map) { 715 printk(KERN_ERR "%s: expected TCE map for %s\n", __func__, 716 node_name); 717 return; 718 } 719 720 /* verify that table contains no entries */ 721 if (!bitmap_empty(tbl->it_map, tbl->it_size)) 722 pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name); 723 724 /* calculate bitmap size in bytes */ 725 bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long); 726 727 /* free bitmap */ 728 order = get_order(bitmap_sz); 729 free_pages((unsigned long) tbl->it_map, order); 730 731 /* free table */ 732 kfree(tbl); 733 } 734 735 /* Creates TCEs for a user provided buffer. The user buffer must be 736 * contiguous real kernel storage (not vmalloc). The address passed here 737 * comprises a page address and offset into that page. The dma_addr_t 738 * returned will point to the same byte within the page as was passed in. 739 */ 740 dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, 741 struct page *page, unsigned long offset, size_t size, 742 unsigned long mask, enum dma_data_direction direction, 743 struct dma_attrs *attrs) 744 { 745 dma_addr_t dma_handle = DMA_ERROR_CODE; 746 void *vaddr; 747 unsigned long uaddr; 748 unsigned int npages, align; 749 750 BUG_ON(direction == DMA_NONE); 751 752 vaddr = page_address(page) + offset; 753 uaddr = (unsigned long)vaddr; 754 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); 755 756 if (tbl) { 757 align = 0; 758 if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && 759 ((unsigned long)vaddr & ~PAGE_MASK) == 0) 760 align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; 761 762 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, 763 mask >> IOMMU_PAGE_SHIFT, align, 764 attrs); 765 if (dma_handle == DMA_ERROR_CODE) { 766 if (printk_ratelimit()) { 767 dev_info(dev, "iommu_alloc failed, tbl %p " 768 "vaddr %p npages %d\n", tbl, vaddr, 769 npages); 770 } 771 } else 772 dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); 773 } 774 775 return dma_handle; 776 } 777 778 void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, 779 size_t size, enum dma_data_direction direction, 780 struct dma_attrs *attrs) 781 { 782 unsigned int npages; 783 784 BUG_ON(direction == DMA_NONE); 785 786 if (tbl) { 787 npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); 788 iommu_free(tbl, dma_handle, npages); 789 } 790 } 791 792 /* Allocates a contiguous real buffer and creates mappings over it. 793 * Returns the virtual address of the buffer and sets dma_handle 794 * to the dma address (mapping) of the first page. 795 */ 796 void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, 797 size_t size, dma_addr_t *dma_handle, 798 unsigned long mask, gfp_t flag, int node) 799 { 800 void *ret = NULL; 801 dma_addr_t mapping; 802 unsigned int order; 803 unsigned int nio_pages, io_order; 804 struct page *page; 805 806 size = PAGE_ALIGN(size); 807 order = get_order(size); 808 809 /* 810 * Client asked for way too much space. This is checked later 811 * anyway. It is easier to debug here for the drivers than in 812 * the tce tables. 813 */ 814 if (order >= IOMAP_MAX_ORDER) { 815 dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n", 816 size); 817 return NULL; 818 } 819 820 if (!tbl) 821 return NULL; 822 823 /* Alloc enough pages (and possibly more) */ 824 page = alloc_pages_node(node, flag, order); 825 if (!page) 826 return NULL; 827 ret = page_address(page); 828 memset(ret, 0, size); 829 830 /* Set up tces to cover the allocated range */ 831 nio_pages = size >> IOMMU_PAGE_SHIFT; 832 io_order = get_iommu_order(size); 833 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, 834 mask >> IOMMU_PAGE_SHIFT, io_order, NULL); 835 if (mapping == DMA_ERROR_CODE) { 836 free_pages((unsigned long)ret, order); 837 return NULL; 838 } 839 *dma_handle = mapping; 840 return ret; 841 } 842 843 void iommu_free_coherent(struct iommu_table *tbl, size_t size, 844 void *vaddr, dma_addr_t dma_handle) 845 { 846 if (tbl) { 847 unsigned int nio_pages; 848 849 size = PAGE_ALIGN(size); 850 nio_pages = size >> IOMMU_PAGE_SHIFT; 851 iommu_free(tbl, dma_handle, nio_pages); 852 size = PAGE_ALIGN(size); 853 free_pages((unsigned long)vaddr, get_order(size)); 854 } 855 } 856