1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/arch/arm/mm/dma-mapping.c 4 * 5 * Copyright (C) 2000-2004 Russell King 6 * 7 * DMA uncached mapping support. 8 */ 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/genalloc.h> 12 #include <linux/gfp.h> 13 #include <linux/errno.h> 14 #include <linux/list.h> 15 #include <linux/init.h> 16 #include <linux/device.h> 17 #include <linux/dma-direct.h> 18 #include <linux/dma-map-ops.h> 19 #include <linux/highmem.h> 20 #include <linux/memblock.h> 21 #include <linux/slab.h> 22 #include <linux/iommu.h> 23 #include <linux/io.h> 24 #include <linux/vmalloc.h> 25 #include <linux/sizes.h> 26 #include <linux/cma.h> 27 28 #include <asm/page.h> 29 #include <asm/highmem.h> 30 #include <asm/cacheflush.h> 31 #include <asm/tlbflush.h> 32 #include <asm/mach/arch.h> 33 #include <asm/dma-iommu.h> 34 #include <asm/mach/map.h> 35 #include <asm/system_info.h> 36 #include <asm/xen/xen-ops.h> 37 38 #include "dma.h" 39 #include "mm.h" 40 41 struct arm_dma_alloc_args { 42 struct device *dev; 43 size_t size; 44 gfp_t gfp; 45 pgprot_t prot; 46 const void *caller; 47 bool want_vaddr; 48 int coherent_flag; 49 }; 50 51 struct arm_dma_free_args { 52 struct device *dev; 53 size_t size; 54 void *cpu_addr; 55 struct page *page; 56 bool want_vaddr; 57 }; 58 59 #define NORMAL 0 60 #define COHERENT 1 61 62 struct arm_dma_allocator { 63 void *(*alloc)(struct arm_dma_alloc_args *args, 64 struct page **ret_page); 65 void (*free)(struct arm_dma_free_args *args); 66 }; 67 68 struct arm_dma_buffer { 69 struct list_head list; 70 void *virt; 71 struct arm_dma_allocator *allocator; 72 }; 73 74 static LIST_HEAD(arm_dma_bufs); 75 static DEFINE_SPINLOCK(arm_dma_bufs_lock); 76 77 static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) 78 { 79 struct arm_dma_buffer *buf, *found = NULL; 80 unsigned long flags; 81 82 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 83 list_for_each_entry(buf, &arm_dma_bufs, list) { 84 if (buf->virt == virt) { 85 list_del(&buf->list); 86 found = buf; 87 break; 88 } 89 } 90 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 91 return found; 92 } 93 94 /* 95 * The DMA API is built upon the notion of "buffer ownership". A buffer 96 * is either exclusively owned by the CPU (and therefore may be accessed 97 * by it) or exclusively owned by the DMA device. These helper functions 98 * represent the transitions between these two ownership states. 99 * 100 * Note, however, that on later ARMs, this notion does not work due to 101 * speculative prefetches. We model our approach on the assumption that 102 * the CPU does do speculative prefetches, which means we clean caches 103 * before transfers and delay cache invalidation until transfer completion. 104 * 105 */ 106 107 static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) 108 { 109 /* 110 * Ensure that the allocated pages are zeroed, and that any data 111 * lurking in the kernel direct-mapped region is invalidated. 112 */ 113 if (PageHighMem(page)) { 114 phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); 115 phys_addr_t end = base + size; 116 while (size > 0) { 117 void *ptr = kmap_atomic(page); 118 memset(ptr, 0, PAGE_SIZE); 119 if (coherent_flag != COHERENT) 120 dmac_flush_range(ptr, ptr + PAGE_SIZE); 121 kunmap_atomic(ptr); 122 page++; 123 size -= PAGE_SIZE; 124 } 125 if (coherent_flag != COHERENT) 126 outer_flush_range(base, end); 127 } else { 128 void *ptr = page_address(page); 129 memset(ptr, 0, size); 130 if (coherent_flag != COHERENT) { 131 dmac_flush_range(ptr, ptr + size); 132 outer_flush_range(__pa(ptr), __pa(ptr) + size); 133 } 134 } 135 } 136 137 /* 138 * Allocate a DMA buffer for 'dev' of size 'size' using the 139 * specified gfp mask. Note that 'size' must be page aligned. 140 */ 141 static struct page *__dma_alloc_buffer(struct device *dev, size_t size, 142 gfp_t gfp, int coherent_flag) 143 { 144 unsigned long order = get_order(size); 145 struct page *page, *p, *e; 146 147 page = alloc_pages(gfp, order); 148 if (!page) 149 return NULL; 150 151 /* 152 * Now split the huge page and free the excess pages 153 */ 154 split_page(page, order); 155 for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) 156 __free_page(p); 157 158 __dma_clear_buffer(page, size, coherent_flag); 159 160 return page; 161 } 162 163 /* 164 * Free a DMA buffer. 'size' must be page aligned. 165 */ 166 static void __dma_free_buffer(struct page *page, size_t size) 167 { 168 struct page *e = page + (size >> PAGE_SHIFT); 169 170 while (page < e) { 171 __free_page(page); 172 page++; 173 } 174 } 175 176 static void *__alloc_from_contiguous(struct device *dev, size_t size, 177 pgprot_t prot, struct page **ret_page, 178 const void *caller, bool want_vaddr, 179 int coherent_flag, gfp_t gfp); 180 181 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 182 pgprot_t prot, struct page **ret_page, 183 const void *caller, bool want_vaddr); 184 185 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 186 static struct gen_pool *atomic_pool __ro_after_init; 187 188 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; 189 190 static int __init early_coherent_pool(char *p) 191 { 192 atomic_pool_size = memparse(p, &p); 193 return 0; 194 } 195 early_param("coherent_pool", early_coherent_pool); 196 197 /* 198 * Initialise the coherent pool for atomic allocations. 199 */ 200 static int __init atomic_pool_init(void) 201 { 202 pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); 203 gfp_t gfp = GFP_KERNEL | GFP_DMA; 204 struct page *page; 205 void *ptr; 206 207 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 208 if (!atomic_pool) 209 goto out; 210 /* 211 * The atomic pool is only used for non-coherent allocations 212 * so we must pass NORMAL for coherent_flag. 213 */ 214 if (dev_get_cma_area(NULL)) 215 ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, 216 &page, atomic_pool_init, true, NORMAL, 217 GFP_KERNEL); 218 else 219 ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, 220 &page, atomic_pool_init, true); 221 if (ptr) { 222 int ret; 223 224 ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, 225 page_to_phys(page), 226 atomic_pool_size, -1); 227 if (ret) 228 goto destroy_genpool; 229 230 gen_pool_set_algo(atomic_pool, 231 gen_pool_first_fit_order_align, 232 NULL); 233 pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", 234 atomic_pool_size / 1024); 235 return 0; 236 } 237 238 destroy_genpool: 239 gen_pool_destroy(atomic_pool); 240 atomic_pool = NULL; 241 out: 242 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 243 atomic_pool_size / 1024); 244 return -ENOMEM; 245 } 246 /* 247 * CMA is activated by core_initcall, so we must be called after it. 248 */ 249 postcore_initcall(atomic_pool_init); 250 251 #ifdef CONFIG_CMA_AREAS 252 struct dma_contig_early_reserve { 253 phys_addr_t base; 254 unsigned long size; 255 }; 256 257 static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; 258 259 static int dma_mmu_remap_num __initdata; 260 261 #ifdef CONFIG_DMA_CMA 262 void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) 263 { 264 dma_mmu_remap[dma_mmu_remap_num].base = base; 265 dma_mmu_remap[dma_mmu_remap_num].size = size; 266 dma_mmu_remap_num++; 267 } 268 #endif 269 270 void __init dma_contiguous_remap(void) 271 { 272 int i; 273 for (i = 0; i < dma_mmu_remap_num; i++) { 274 phys_addr_t start = dma_mmu_remap[i].base; 275 phys_addr_t end = start + dma_mmu_remap[i].size; 276 struct map_desc map; 277 unsigned long addr; 278 279 if (end > arm_lowmem_limit) 280 end = arm_lowmem_limit; 281 if (start >= end) 282 continue; 283 284 map.pfn = __phys_to_pfn(start); 285 map.virtual = __phys_to_virt(start); 286 map.length = end - start; 287 map.type = MT_MEMORY_DMA_READY; 288 289 /* 290 * Clear previous low-memory mapping to ensure that the 291 * TLB does not see any conflicting entries, then flush 292 * the TLB of the old entries before creating new mappings. 293 * 294 * This ensures that any speculatively loaded TLB entries 295 * (even though they may be rare) can not cause any problems, 296 * and ensures that this code is architecturally compliant. 297 */ 298 for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); 299 addr += PMD_SIZE) 300 pmd_clear(pmd_off_k(addr)); 301 302 flush_tlb_kernel_range(__phys_to_virt(start), 303 __phys_to_virt(end)); 304 305 iotable_init(&map, 1); 306 } 307 } 308 #endif 309 310 static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data) 311 { 312 struct page *page = virt_to_page((void *)addr); 313 pgprot_t prot = *(pgprot_t *)data; 314 315 set_pte_ext(pte, mk_pte(page, prot), 0); 316 return 0; 317 } 318 319 static void __dma_remap(struct page *page, size_t size, pgprot_t prot) 320 { 321 unsigned long start = (unsigned long) page_address(page); 322 unsigned end = start + size; 323 324 apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); 325 flush_tlb_kernel_range(start, end); 326 } 327 328 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, 329 pgprot_t prot, struct page **ret_page, 330 const void *caller, bool want_vaddr) 331 { 332 struct page *page; 333 void *ptr = NULL; 334 /* 335 * __alloc_remap_buffer is only called when the device is 336 * non-coherent 337 */ 338 page = __dma_alloc_buffer(dev, size, gfp, NORMAL); 339 if (!page) 340 return NULL; 341 if (!want_vaddr) 342 goto out; 343 344 ptr = dma_common_contiguous_remap(page, size, prot, caller); 345 if (!ptr) { 346 __dma_free_buffer(page, size); 347 return NULL; 348 } 349 350 out: 351 *ret_page = page; 352 return ptr; 353 } 354 355 static void *__alloc_from_pool(size_t size, struct page **ret_page) 356 { 357 unsigned long val; 358 void *ptr = NULL; 359 360 if (!atomic_pool) { 361 WARN(1, "coherent pool not initialised!\n"); 362 return NULL; 363 } 364 365 val = gen_pool_alloc(atomic_pool, size); 366 if (val) { 367 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 368 369 *ret_page = phys_to_page(phys); 370 ptr = (void *)val; 371 } 372 373 return ptr; 374 } 375 376 static bool __in_atomic_pool(void *start, size_t size) 377 { 378 return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); 379 } 380 381 static int __free_from_pool(void *start, size_t size) 382 { 383 if (!__in_atomic_pool(start, size)) 384 return 0; 385 386 gen_pool_free(atomic_pool, (unsigned long)start, size); 387 388 return 1; 389 } 390 391 static void *__alloc_from_contiguous(struct device *dev, size_t size, 392 pgprot_t prot, struct page **ret_page, 393 const void *caller, bool want_vaddr, 394 int coherent_flag, gfp_t gfp) 395 { 396 unsigned long order = get_order(size); 397 size_t count = size >> PAGE_SHIFT; 398 struct page *page; 399 void *ptr = NULL; 400 401 page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN); 402 if (!page) 403 return NULL; 404 405 __dma_clear_buffer(page, size, coherent_flag); 406 407 if (!want_vaddr) 408 goto out; 409 410 if (PageHighMem(page)) { 411 ptr = dma_common_contiguous_remap(page, size, prot, caller); 412 if (!ptr) { 413 dma_release_from_contiguous(dev, page, count); 414 return NULL; 415 } 416 } else { 417 __dma_remap(page, size, prot); 418 ptr = page_address(page); 419 } 420 421 out: 422 *ret_page = page; 423 return ptr; 424 } 425 426 static void __free_from_contiguous(struct device *dev, struct page *page, 427 void *cpu_addr, size_t size, bool want_vaddr) 428 { 429 if (want_vaddr) { 430 if (PageHighMem(page)) 431 dma_common_free_remap(cpu_addr, size); 432 else 433 __dma_remap(page, size, PAGE_KERNEL); 434 } 435 dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); 436 } 437 438 static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) 439 { 440 prot = (attrs & DMA_ATTR_WRITE_COMBINE) ? 441 pgprot_writecombine(prot) : 442 pgprot_dmacoherent(prot); 443 return prot; 444 } 445 446 static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, 447 struct page **ret_page) 448 { 449 struct page *page; 450 /* __alloc_simple_buffer is only called when the device is coherent */ 451 page = __dma_alloc_buffer(dev, size, gfp, COHERENT); 452 if (!page) 453 return NULL; 454 455 *ret_page = page; 456 return page_address(page); 457 } 458 459 static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, 460 struct page **ret_page) 461 { 462 return __alloc_simple_buffer(args->dev, args->size, args->gfp, 463 ret_page); 464 } 465 466 static void simple_allocator_free(struct arm_dma_free_args *args) 467 { 468 __dma_free_buffer(args->page, args->size); 469 } 470 471 static struct arm_dma_allocator simple_allocator = { 472 .alloc = simple_allocator_alloc, 473 .free = simple_allocator_free, 474 }; 475 476 static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, 477 struct page **ret_page) 478 { 479 return __alloc_from_contiguous(args->dev, args->size, args->prot, 480 ret_page, args->caller, 481 args->want_vaddr, args->coherent_flag, 482 args->gfp); 483 } 484 485 static void cma_allocator_free(struct arm_dma_free_args *args) 486 { 487 __free_from_contiguous(args->dev, args->page, args->cpu_addr, 488 args->size, args->want_vaddr); 489 } 490 491 static struct arm_dma_allocator cma_allocator = { 492 .alloc = cma_allocator_alloc, 493 .free = cma_allocator_free, 494 }; 495 496 static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, 497 struct page **ret_page) 498 { 499 return __alloc_from_pool(args->size, ret_page); 500 } 501 502 static void pool_allocator_free(struct arm_dma_free_args *args) 503 { 504 __free_from_pool(args->cpu_addr, args->size); 505 } 506 507 static struct arm_dma_allocator pool_allocator = { 508 .alloc = pool_allocator_alloc, 509 .free = pool_allocator_free, 510 }; 511 512 static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, 513 struct page **ret_page) 514 { 515 return __alloc_remap_buffer(args->dev, args->size, args->gfp, 516 args->prot, ret_page, args->caller, 517 args->want_vaddr); 518 } 519 520 static void remap_allocator_free(struct arm_dma_free_args *args) 521 { 522 if (args->want_vaddr) 523 dma_common_free_remap(args->cpu_addr, args->size); 524 525 __dma_free_buffer(args->page, args->size); 526 } 527 528 static struct arm_dma_allocator remap_allocator = { 529 .alloc = remap_allocator_alloc, 530 .free = remap_allocator_free, 531 }; 532 533 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, 534 gfp_t gfp, pgprot_t prot, bool is_coherent, 535 unsigned long attrs, const void *caller) 536 { 537 u64 mask = min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); 538 struct page *page = NULL; 539 void *addr; 540 bool allowblock, cma; 541 struct arm_dma_buffer *buf; 542 struct arm_dma_alloc_args args = { 543 .dev = dev, 544 .size = PAGE_ALIGN(size), 545 .gfp = gfp, 546 .prot = prot, 547 .caller = caller, 548 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 549 .coherent_flag = is_coherent ? COHERENT : NORMAL, 550 }; 551 552 #ifdef CONFIG_DMA_API_DEBUG 553 u64 limit = (mask + 1) & ~mask; 554 if (limit && size >= limit) { 555 dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", 556 size, mask); 557 return NULL; 558 } 559 #endif 560 561 buf = kzalloc(sizeof(*buf), 562 gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)); 563 if (!buf) 564 return NULL; 565 566 if (mask < 0xffffffffULL) 567 gfp |= GFP_DMA; 568 569 args.gfp = gfp; 570 571 *handle = DMA_MAPPING_ERROR; 572 allowblock = gfpflags_allow_blocking(gfp); 573 cma = allowblock ? dev_get_cma_area(dev) : NULL; 574 575 if (cma) 576 buf->allocator = &cma_allocator; 577 else if (is_coherent) 578 buf->allocator = &simple_allocator; 579 else if (allowblock) 580 buf->allocator = &remap_allocator; 581 else 582 buf->allocator = &pool_allocator; 583 584 addr = buf->allocator->alloc(&args, &page); 585 586 if (page) { 587 unsigned long flags; 588 589 *handle = phys_to_dma(dev, page_to_phys(page)); 590 buf->virt = args.want_vaddr ? addr : page; 591 592 spin_lock_irqsave(&arm_dma_bufs_lock, flags); 593 list_add(&buf->list, &arm_dma_bufs); 594 spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); 595 } else { 596 kfree(buf); 597 } 598 599 return args.want_vaddr ? addr : page; 600 } 601 602 /* 603 * Free a buffer as defined by the above mapping. 604 */ 605 static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, 606 dma_addr_t handle, unsigned long attrs, 607 bool is_coherent) 608 { 609 struct page *page = phys_to_page(dma_to_phys(dev, handle)); 610 struct arm_dma_buffer *buf; 611 struct arm_dma_free_args args = { 612 .dev = dev, 613 .size = PAGE_ALIGN(size), 614 .cpu_addr = cpu_addr, 615 .page = page, 616 .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), 617 }; 618 619 buf = arm_dma_buffer_find(cpu_addr); 620 if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) 621 return; 622 623 buf->allocator->free(&args); 624 kfree(buf); 625 } 626 627 static void dma_cache_maint_page(phys_addr_t phys, size_t size, 628 enum dma_data_direction dir, 629 void (*op)(const void *, size_t, int)) 630 { 631 unsigned long offset = offset_in_page(phys); 632 unsigned long pfn = __phys_to_pfn(phys); 633 size_t left = size; 634 635 /* 636 * A single sg entry may refer to multiple physically contiguous 637 * pages. But we still need to process highmem pages individually. 638 * If highmem is not configured then the bulk of this loop gets 639 * optimized out. 640 */ 641 do { 642 size_t len = left; 643 void *vaddr; 644 645 phys = __pfn_to_phys(pfn); 646 if (PhysHighMem(phys)) { 647 if (len + offset > PAGE_SIZE) 648 len = PAGE_SIZE - offset; 649 650 if (cache_is_vipt_nonaliasing()) { 651 vaddr = kmap_atomic_pfn(pfn); 652 op(vaddr + offset, len, dir); 653 kunmap_atomic(vaddr); 654 } else { 655 struct page *page = phys_to_page(phys); 656 657 vaddr = kmap_high_get(page); 658 if (vaddr) { 659 op(vaddr + offset, len, dir); 660 kunmap_high(page); 661 } 662 } 663 } else { 664 phys += offset; 665 vaddr = phys_to_virt(phys); 666 op(vaddr, len, dir); 667 } 668 offset = 0; 669 pfn++; 670 left -= len; 671 } while (left); 672 } 673 674 /* 675 * Make an area consistent for devices. 676 * Note: Drivers should NOT use this function directly. 677 * Use the driver DMA support - see dma-mapping.h (dma_sync_*) 678 */ 679 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, 680 enum dma_data_direction dir) 681 { 682 dma_cache_maint_page(paddr, size, dir, dmac_map_area); 683 684 if (dir == DMA_FROM_DEVICE) { 685 outer_inv_range(paddr, paddr + size); 686 } else { 687 outer_clean_range(paddr, paddr + size); 688 } 689 /* FIXME: non-speculating: flush on bidirectional mappings? */ 690 } 691 692 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, 693 enum dma_data_direction dir) 694 { 695 /* FIXME: non-speculating: not required */ 696 /* in any case, don't bother invalidating if DMA to device */ 697 if (dir != DMA_TO_DEVICE) { 698 outer_inv_range(paddr, paddr + size); 699 700 dma_cache_maint_page(paddr, size, dir, dmac_unmap_area); 701 } 702 703 /* 704 * Mark the D-cache clean for these pages to avoid extra flushing. 705 */ 706 if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { 707 struct folio *folio = pfn_folio(paddr / PAGE_SIZE); 708 size_t offset = offset_in_folio(folio, paddr); 709 710 for (;;) { 711 size_t sz = folio_size(folio) - offset; 712 713 if (size < sz) 714 break; 715 if (!offset) 716 set_bit(PG_dcache_clean, &folio->flags.f); 717 offset = 0; 718 size -= sz; 719 if (!size) 720 break; 721 folio = folio_next(folio); 722 } 723 } 724 } 725 726 #ifdef CONFIG_ARM_DMA_USE_IOMMU 727 728 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs) 729 { 730 int prot = 0; 731 732 if (attrs & DMA_ATTR_PRIVILEGED) 733 prot |= IOMMU_PRIV; 734 735 if (attrs & DMA_ATTR_MMIO) 736 prot |= IOMMU_MMIO; 737 738 switch (dir) { 739 case DMA_BIDIRECTIONAL: 740 return prot | IOMMU_READ | IOMMU_WRITE; 741 case DMA_TO_DEVICE: 742 return prot | IOMMU_READ; 743 case DMA_FROM_DEVICE: 744 return prot | IOMMU_WRITE; 745 default: 746 return prot; 747 } 748 } 749 750 /* IOMMU */ 751 752 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); 753 754 static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, 755 size_t size) 756 { 757 unsigned int order = get_order(size); 758 unsigned int align = 0; 759 unsigned int count, start; 760 size_t mapping_size = mapping->bits << PAGE_SHIFT; 761 unsigned long flags; 762 dma_addr_t iova; 763 int i; 764 765 if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) 766 order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; 767 768 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 769 align = (1 << order) - 1; 770 771 spin_lock_irqsave(&mapping->lock, flags); 772 for (i = 0; i < mapping->nr_bitmaps; i++) { 773 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 774 mapping->bits, 0, count, align); 775 776 if (start > mapping->bits) 777 continue; 778 779 bitmap_set(mapping->bitmaps[i], start, count); 780 break; 781 } 782 783 /* 784 * No unused range found. Try to extend the existing mapping 785 * and perform a second attempt to reserve an IO virtual 786 * address range of size bytes. 787 */ 788 if (i == mapping->nr_bitmaps) { 789 if (extend_iommu_mapping(mapping)) { 790 spin_unlock_irqrestore(&mapping->lock, flags); 791 return DMA_MAPPING_ERROR; 792 } 793 794 start = bitmap_find_next_zero_area(mapping->bitmaps[i], 795 mapping->bits, 0, count, align); 796 797 if (start > mapping->bits) { 798 spin_unlock_irqrestore(&mapping->lock, flags); 799 return DMA_MAPPING_ERROR; 800 } 801 802 bitmap_set(mapping->bitmaps[i], start, count); 803 } 804 spin_unlock_irqrestore(&mapping->lock, flags); 805 806 iova = mapping->base + (mapping_size * i); 807 iova += start << PAGE_SHIFT; 808 809 return iova; 810 } 811 812 static inline void __free_iova(struct dma_iommu_mapping *mapping, 813 dma_addr_t addr, size_t size) 814 { 815 unsigned int start, count; 816 size_t mapping_size = mapping->bits << PAGE_SHIFT; 817 unsigned long flags; 818 dma_addr_t bitmap_base; 819 u32 bitmap_index; 820 821 if (!size) 822 return; 823 824 bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; 825 BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); 826 827 bitmap_base = mapping->base + mapping_size * bitmap_index; 828 829 start = (addr - bitmap_base) >> PAGE_SHIFT; 830 831 if (addr + size > bitmap_base + mapping_size) { 832 /* 833 * The address range to be freed reaches into the iova 834 * range of the next bitmap. This should not happen as 835 * we don't allow this in __alloc_iova (at the 836 * moment). 837 */ 838 BUG(); 839 } else 840 count = size >> PAGE_SHIFT; 841 842 spin_lock_irqsave(&mapping->lock, flags); 843 bitmap_clear(mapping->bitmaps[bitmap_index], start, count); 844 spin_unlock_irqrestore(&mapping->lock, flags); 845 } 846 847 /* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ 848 static const int iommu_order_array[] = { 9, 8, 4, 0 }; 849 850 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, 851 gfp_t gfp, unsigned long attrs, 852 int coherent_flag) 853 { 854 struct page **pages; 855 int count = size >> PAGE_SHIFT; 856 int array_size = count * sizeof(struct page *); 857 int i = 0; 858 int order_idx = 0; 859 860 pages = kvzalloc(array_size, GFP_KERNEL); 861 if (!pages) 862 return NULL; 863 864 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) 865 { 866 unsigned long order = get_order(size); 867 struct page *page; 868 869 page = dma_alloc_from_contiguous(dev, count, order, 870 gfp & __GFP_NOWARN); 871 if (!page) 872 goto error; 873 874 __dma_clear_buffer(page, size, coherent_flag); 875 876 for (i = 0; i < count; i++) 877 pages[i] = page + i; 878 879 return pages; 880 } 881 882 /* Go straight to 4K chunks if caller says it's OK. */ 883 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 884 order_idx = ARRAY_SIZE(iommu_order_array) - 1; 885 886 /* 887 * IOMMU can map any pages, so himem can also be used here 888 */ 889 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 890 891 while (count) { 892 int j, order; 893 894 order = iommu_order_array[order_idx]; 895 896 /* Drop down when we get small */ 897 if (__fls(count) < order) { 898 order_idx++; 899 continue; 900 } 901 902 if (order) { 903 /* See if it's easy to allocate a high-order chunk */ 904 pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); 905 906 /* Go down a notch at first sign of pressure */ 907 if (!pages[i]) { 908 order_idx++; 909 continue; 910 } 911 } else { 912 pages[i] = alloc_pages(gfp, 0); 913 if (!pages[i]) 914 goto error; 915 } 916 917 if (order) { 918 split_page(pages[i], order); 919 j = 1 << order; 920 while (--j) 921 pages[i + j] = pages[i] + j; 922 } 923 924 __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag); 925 i += 1 << order; 926 count -= 1 << order; 927 } 928 929 return pages; 930 error: 931 while (i--) 932 if (pages[i]) 933 __free_pages(pages[i], 0); 934 kvfree(pages); 935 return NULL; 936 } 937 938 static int __iommu_free_buffer(struct device *dev, struct page **pages, 939 size_t size, unsigned long attrs) 940 { 941 int count = size >> PAGE_SHIFT; 942 int i; 943 944 if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { 945 dma_release_from_contiguous(dev, pages[0], count); 946 } else { 947 for (i = 0; i < count; i++) 948 if (pages[i]) 949 __free_pages(pages[i], 0); 950 } 951 952 kvfree(pages); 953 return 0; 954 } 955 956 /* 957 * Create a mapping in device IO address space for specified pages 958 */ 959 static dma_addr_t 960 __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, 961 unsigned long attrs) 962 { 963 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 964 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 965 dma_addr_t dma_addr, iova; 966 int i; 967 968 dma_addr = __alloc_iova(mapping, size); 969 if (dma_addr == DMA_MAPPING_ERROR) 970 return dma_addr; 971 972 iova = dma_addr; 973 for (i = 0; i < count; ) { 974 int ret; 975 976 unsigned int next_pfn = page_to_pfn(pages[i]) + 1; 977 phys_addr_t phys = page_to_phys(pages[i]); 978 unsigned int len, j; 979 980 for (j = i + 1; j < count; j++, next_pfn++) 981 if (page_to_pfn(pages[j]) != next_pfn) 982 break; 983 984 len = (j - i) << PAGE_SHIFT; 985 ret = iommu_map(mapping->domain, iova, phys, len, 986 __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs), 987 GFP_KERNEL); 988 if (ret < 0) 989 goto fail; 990 iova += len; 991 i = j; 992 } 993 return dma_addr; 994 fail: 995 iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); 996 __free_iova(mapping, dma_addr, size); 997 return DMA_MAPPING_ERROR; 998 } 999 1000 static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) 1001 { 1002 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1003 1004 /* 1005 * add optional in-page offset from iova to size and align 1006 * result to page size 1007 */ 1008 size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); 1009 iova &= PAGE_MASK; 1010 1011 iommu_unmap(mapping->domain, iova, size); 1012 __free_iova(mapping, iova, size); 1013 return 0; 1014 } 1015 1016 static struct page **__atomic_get_pages(void *addr) 1017 { 1018 struct page *page; 1019 phys_addr_t phys; 1020 1021 phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); 1022 page = phys_to_page(phys); 1023 1024 return (struct page **)page; 1025 } 1026 1027 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) 1028 { 1029 if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) 1030 return __atomic_get_pages(cpu_addr); 1031 1032 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1033 return cpu_addr; 1034 1035 return dma_common_find_pages(cpu_addr); 1036 } 1037 1038 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, 1039 dma_addr_t *handle, int coherent_flag, 1040 unsigned long attrs) 1041 { 1042 struct page *page; 1043 void *addr; 1044 1045 if (coherent_flag == COHERENT) 1046 addr = __alloc_simple_buffer(dev, size, gfp, &page); 1047 else 1048 addr = __alloc_from_pool(size, &page); 1049 if (!addr) 1050 return NULL; 1051 1052 *handle = __iommu_create_mapping(dev, &page, size, attrs); 1053 if (*handle == DMA_MAPPING_ERROR) 1054 goto err_mapping; 1055 1056 return addr; 1057 1058 err_mapping: 1059 __free_from_pool(addr, size); 1060 return NULL; 1061 } 1062 1063 static void __iommu_free_atomic(struct device *dev, void *cpu_addr, 1064 dma_addr_t handle, size_t size, int coherent_flag) 1065 { 1066 __iommu_remove_mapping(dev, handle, size); 1067 if (coherent_flag == COHERENT) 1068 __dma_free_buffer(virt_to_page(cpu_addr), size); 1069 else 1070 __free_from_pool(cpu_addr, size); 1071 } 1072 1073 static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, 1074 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1075 { 1076 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); 1077 struct page **pages; 1078 void *addr = NULL; 1079 int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; 1080 1081 *handle = DMA_MAPPING_ERROR; 1082 size = PAGE_ALIGN(size); 1083 1084 if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) 1085 return __iommu_alloc_simple(dev, size, gfp, handle, 1086 coherent_flag, attrs); 1087 1088 pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); 1089 if (!pages) 1090 return NULL; 1091 1092 *handle = __iommu_create_mapping(dev, pages, size, attrs); 1093 if (*handle == DMA_MAPPING_ERROR) 1094 goto err_buffer; 1095 1096 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) 1097 return pages; 1098 1099 addr = dma_common_pages_remap(pages, size, prot, 1100 __builtin_return_address(0)); 1101 if (!addr) 1102 goto err_mapping; 1103 1104 return addr; 1105 1106 err_mapping: 1107 __iommu_remove_mapping(dev, *handle, size); 1108 err_buffer: 1109 __iommu_free_buffer(dev, pages, size, attrs); 1110 return NULL; 1111 } 1112 1113 static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 1114 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1115 unsigned long attrs) 1116 { 1117 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1118 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1119 int err; 1120 1121 if (!pages) 1122 return -ENXIO; 1123 1124 if (vma->vm_pgoff >= nr_pages) 1125 return -ENXIO; 1126 1127 if (!dev->dma_coherent) 1128 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); 1129 1130 err = vm_map_pages(vma, pages, nr_pages); 1131 if (err) 1132 pr_err("Remapping memory failed: %d\n", err); 1133 1134 return err; 1135 } 1136 1137 /* 1138 * free a page as defined by the above mapping. 1139 * Must not be called with IRQs disabled. 1140 */ 1141 static void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 1142 dma_addr_t handle, unsigned long attrs) 1143 { 1144 int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; 1145 struct page **pages; 1146 size = PAGE_ALIGN(size); 1147 1148 if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { 1149 __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag); 1150 return; 1151 } 1152 1153 pages = __iommu_get_pages(cpu_addr, attrs); 1154 if (!pages) { 1155 WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); 1156 return; 1157 } 1158 1159 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) 1160 dma_common_free_remap(cpu_addr, size); 1161 1162 __iommu_remove_mapping(dev, handle, size); 1163 __iommu_free_buffer(dev, pages, size, attrs); 1164 } 1165 1166 static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 1167 void *cpu_addr, dma_addr_t dma_addr, 1168 size_t size, unsigned long attrs) 1169 { 1170 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 1171 struct page **pages = __iommu_get_pages(cpu_addr, attrs); 1172 1173 if (!pages) 1174 return -ENXIO; 1175 1176 return sg_alloc_table_from_pages(sgt, pages, count, 0, size, 1177 GFP_KERNEL); 1178 } 1179 1180 /* 1181 * Map a part of the scatter-gather list into contiguous io address space 1182 */ 1183 static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, 1184 size_t size, dma_addr_t *handle, 1185 enum dma_data_direction dir, unsigned long attrs) 1186 { 1187 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1188 dma_addr_t iova, iova_base; 1189 int ret = 0; 1190 unsigned int count; 1191 struct scatterlist *s; 1192 int prot; 1193 1194 size = PAGE_ALIGN(size); 1195 *handle = DMA_MAPPING_ERROR; 1196 1197 iova_base = iova = __alloc_iova(mapping, size); 1198 if (iova == DMA_MAPPING_ERROR) 1199 return -ENOMEM; 1200 1201 for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { 1202 phys_addr_t phys = page_to_phys(sg_page(s)); 1203 unsigned int len = PAGE_ALIGN(s->offset + s->length); 1204 1205 if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1206 arch_sync_dma_for_device(sg_phys(s), s->length, dir); 1207 1208 prot = __dma_info_to_prot(dir, attrs); 1209 1210 ret = iommu_map(mapping->domain, iova, phys, len, prot, 1211 GFP_KERNEL); 1212 if (ret < 0) 1213 goto fail; 1214 count += len >> PAGE_SHIFT; 1215 iova += len; 1216 } 1217 *handle = iova_base; 1218 1219 return 0; 1220 fail: 1221 iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); 1222 __free_iova(mapping, iova_base, size); 1223 return ret; 1224 } 1225 1226 /** 1227 * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA 1228 * @dev: valid struct device pointer 1229 * @sg: list of buffers 1230 * @nents: number of buffers to map 1231 * @dir: DMA transfer direction 1232 * 1233 * Map a set of buffers described by scatterlist in streaming mode for DMA. 1234 * The scatter gather list elements are merged together (if possible) and 1235 * tagged with the appropriate dma address and length. They are obtained via 1236 * sg_dma_{address,length}. 1237 */ 1238 static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, 1239 int nents, enum dma_data_direction dir, unsigned long attrs) 1240 { 1241 struct scatterlist *s = sg, *dma = sg, *start = sg; 1242 int i, count = 0, ret; 1243 unsigned int offset = s->offset; 1244 unsigned int size = s->offset + s->length; 1245 unsigned int max = dma_get_max_seg_size(dev); 1246 1247 for (i = 1; i < nents; i++) { 1248 s = sg_next(s); 1249 1250 s->dma_length = 0; 1251 1252 if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { 1253 ret = __map_sg_chunk(dev, start, size, 1254 &dma->dma_address, dir, attrs); 1255 if (ret < 0) 1256 goto bad_mapping; 1257 1258 dma->dma_address += offset; 1259 dma->dma_length = size - offset; 1260 1261 size = offset = s->offset; 1262 start = s; 1263 dma = sg_next(dma); 1264 count += 1; 1265 } 1266 size += s->length; 1267 } 1268 ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs); 1269 if (ret < 0) 1270 goto bad_mapping; 1271 1272 dma->dma_address += offset; 1273 dma->dma_length = size - offset; 1274 1275 return count+1; 1276 1277 bad_mapping: 1278 for_each_sg(sg, s, count, i) 1279 __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); 1280 if (ret == -ENOMEM) 1281 return ret; 1282 return -EINVAL; 1283 } 1284 1285 /** 1286 * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg 1287 * @dev: valid struct device pointer 1288 * @sg: list of buffers 1289 * @nents: number of buffers to unmap (same as was passed to dma_map_sg) 1290 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1291 * 1292 * Unmap a set of streaming mode DMA translations. Again, CPU access 1293 * rules concerning calls here are the same as for dma_unmap_single(). 1294 */ 1295 static void arm_iommu_unmap_sg(struct device *dev, 1296 struct scatterlist *sg, int nents, 1297 enum dma_data_direction dir, 1298 unsigned long attrs) 1299 { 1300 struct scatterlist *s; 1301 int i; 1302 1303 for_each_sg(sg, s, nents, i) { 1304 if (sg_dma_len(s)) 1305 __iommu_remove_mapping(dev, sg_dma_address(s), 1306 sg_dma_len(s)); 1307 if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1308 arch_sync_dma_for_cpu(sg_phys(s), s->length, dir); 1309 } 1310 } 1311 1312 /** 1313 * arm_iommu_sync_sg_for_cpu 1314 * @dev: valid struct device pointer 1315 * @sg: list of buffers 1316 * @nents: number of buffers to map (returned from dma_map_sg) 1317 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1318 */ 1319 static void arm_iommu_sync_sg_for_cpu(struct device *dev, 1320 struct scatterlist *sg, 1321 int nents, enum dma_data_direction dir) 1322 { 1323 struct scatterlist *s; 1324 int i; 1325 1326 if (dev->dma_coherent) 1327 return; 1328 1329 for_each_sg(sg, s, nents, i) 1330 arch_sync_dma_for_cpu(sg_phys(s), s->length, dir); 1331 1332 } 1333 1334 /** 1335 * arm_iommu_sync_sg_for_device 1336 * @dev: valid struct device pointer 1337 * @sg: list of buffers 1338 * @nents: number of buffers to map (returned from dma_map_sg) 1339 * @dir: DMA transfer direction (same as was passed to dma_map_sg) 1340 */ 1341 static void arm_iommu_sync_sg_for_device(struct device *dev, 1342 struct scatterlist *sg, 1343 int nents, enum dma_data_direction dir) 1344 { 1345 struct scatterlist *s; 1346 int i; 1347 1348 if (dev->dma_coherent) 1349 return; 1350 1351 for_each_sg(sg, s, nents, i) 1352 arch_sync_dma_for_device(sg_phys(s), s->length, dir); 1353 } 1354 1355 /** 1356 * arm_iommu_map_phys 1357 * @dev: valid struct device pointer 1358 * @phys: physical address that buffer resides in 1359 * @size: size of buffer to map 1360 * @dir: DMA transfer direction 1361 * @attrs: DMA mapping attributes 1362 * 1363 * IOMMU aware version of arm_dma_map_page() 1364 */ 1365 static dma_addr_t arm_iommu_map_phys(struct device *dev, phys_addr_t phys, 1366 size_t size, enum dma_data_direction dir, unsigned long attrs) 1367 { 1368 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1369 int len = PAGE_ALIGN(size + offset_in_page(phys)); 1370 phys_addr_t addr = phys & PAGE_MASK; 1371 dma_addr_t dma_addr; 1372 int ret, prot; 1373 1374 if (!dev->dma_coherent && 1375 !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) 1376 arch_sync_dma_for_device(phys, size, dir); 1377 1378 dma_addr = __alloc_iova(mapping, len); 1379 if (dma_addr == DMA_MAPPING_ERROR) 1380 return dma_addr; 1381 1382 prot = __dma_info_to_prot(dir, attrs); 1383 1384 ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL); 1385 if (ret < 0) 1386 goto fail; 1387 1388 return dma_addr + offset_in_page(phys); 1389 fail: 1390 __free_iova(mapping, dma_addr, len); 1391 return DMA_MAPPING_ERROR; 1392 } 1393 1394 /** 1395 * arm_iommu_unmap_page 1396 * @dev: valid struct device pointer 1397 * @handle: DMA address of buffer 1398 * @size: size of buffer (same as passed to dma_map_page) 1399 * @dir: DMA transfer direction (same as passed to dma_map_page) 1400 * @attrs: DMA mapping attributes 1401 * 1402 * IOMMU aware version of arm_dma_unmap_phys() 1403 */ 1404 static void arm_iommu_unmap_phys(struct device *dev, dma_addr_t handle, 1405 size_t size, enum dma_data_direction dir, unsigned long attrs) 1406 { 1407 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1408 dma_addr_t iova = handle & PAGE_MASK; 1409 int offset = handle & ~PAGE_MASK; 1410 int len = PAGE_ALIGN(size + offset); 1411 1412 if (!iova) 1413 return; 1414 1415 if (!dev->dma_coherent && 1416 !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) { 1417 phys_addr_t phys = iommu_iova_to_phys(mapping->domain, iova); 1418 1419 arch_sync_dma_for_cpu(phys + offset, size, dir); 1420 } 1421 1422 iommu_unmap(mapping->domain, iova, len); 1423 __free_iova(mapping, iova, len); 1424 } 1425 1426 static void arm_iommu_sync_single_for_cpu(struct device *dev, 1427 dma_addr_t handle, size_t size, enum dma_data_direction dir) 1428 { 1429 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1430 dma_addr_t iova = handle & PAGE_MASK; 1431 unsigned int offset = handle & ~PAGE_MASK; 1432 phys_addr_t phys; 1433 1434 if (dev->dma_coherent || !iova) 1435 return; 1436 1437 phys = iommu_iova_to_phys(mapping->domain, iova); 1438 arch_sync_dma_for_cpu(phys + offset, size, dir); 1439 } 1440 1441 static void arm_iommu_sync_single_for_device(struct device *dev, 1442 dma_addr_t handle, size_t size, enum dma_data_direction dir) 1443 { 1444 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1445 dma_addr_t iova = handle & PAGE_MASK; 1446 unsigned int offset = handle & ~PAGE_MASK; 1447 phys_addr_t phys; 1448 1449 if (dev->dma_coherent || !iova) 1450 return; 1451 1452 phys = iommu_iova_to_phys(mapping->domain, iova); 1453 arch_sync_dma_for_device(phys + offset, size, dir); 1454 } 1455 1456 static const struct dma_map_ops iommu_ops = { 1457 .alloc = arm_iommu_alloc_attrs, 1458 .free = arm_iommu_free_attrs, 1459 .mmap = arm_iommu_mmap_attrs, 1460 .get_sgtable = arm_iommu_get_sgtable, 1461 1462 .map_phys = arm_iommu_map_phys, 1463 .unmap_phys = arm_iommu_unmap_phys, 1464 .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, 1465 .sync_single_for_device = arm_iommu_sync_single_for_device, 1466 1467 .map_sg = arm_iommu_map_sg, 1468 .unmap_sg = arm_iommu_unmap_sg, 1469 .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, 1470 .sync_sg_for_device = arm_iommu_sync_sg_for_device, 1471 }; 1472 1473 /** 1474 * arm_iommu_create_mapping 1475 * @dev: pointer to the client device (for IOMMU calls) 1476 * @base: start address of the valid IO address space 1477 * @size: maximum size of the valid IO address space 1478 * 1479 * Creates a mapping structure which holds information about used/unused 1480 * IO address ranges, which is required to perform memory allocation and 1481 * mapping with IOMMU aware functions. 1482 * 1483 * The client device need to be attached to the mapping with 1484 * arm_iommu_attach_device function. 1485 */ 1486 struct dma_iommu_mapping * 1487 arm_iommu_create_mapping(struct device *dev, dma_addr_t base, u64 size) 1488 { 1489 unsigned int bits = size >> PAGE_SHIFT; 1490 unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); 1491 struct dma_iommu_mapping *mapping; 1492 int extensions = 1; 1493 int err = -ENOMEM; 1494 1495 /* currently only 32-bit DMA address space is supported */ 1496 if (size > DMA_BIT_MASK(32) + 1) 1497 return ERR_PTR(-ERANGE); 1498 1499 if (!bitmap_size) 1500 return ERR_PTR(-EINVAL); 1501 1502 if (bitmap_size > PAGE_SIZE) { 1503 extensions = bitmap_size / PAGE_SIZE; 1504 bitmap_size = PAGE_SIZE; 1505 } 1506 1507 mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); 1508 if (!mapping) 1509 goto err; 1510 1511 mapping->bitmap_size = bitmap_size; 1512 mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *), 1513 GFP_KERNEL); 1514 if (!mapping->bitmaps) 1515 goto err2; 1516 1517 mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); 1518 if (!mapping->bitmaps[0]) 1519 goto err3; 1520 1521 mapping->nr_bitmaps = 1; 1522 mapping->extensions = extensions; 1523 mapping->base = base; 1524 mapping->bits = BITS_PER_BYTE * bitmap_size; 1525 1526 spin_lock_init(&mapping->lock); 1527 1528 mapping->domain = iommu_paging_domain_alloc(dev); 1529 if (IS_ERR(mapping->domain)) { 1530 err = PTR_ERR(mapping->domain); 1531 goto err4; 1532 } 1533 1534 kref_init(&mapping->kref); 1535 return mapping; 1536 err4: 1537 kfree(mapping->bitmaps[0]); 1538 err3: 1539 kfree(mapping->bitmaps); 1540 err2: 1541 kfree(mapping); 1542 err: 1543 return ERR_PTR(err); 1544 } 1545 EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); 1546 1547 static void release_iommu_mapping(struct kref *kref) 1548 { 1549 int i; 1550 struct dma_iommu_mapping *mapping = 1551 container_of(kref, struct dma_iommu_mapping, kref); 1552 1553 iommu_domain_free(mapping->domain); 1554 for (i = 0; i < mapping->nr_bitmaps; i++) 1555 kfree(mapping->bitmaps[i]); 1556 kfree(mapping->bitmaps); 1557 kfree(mapping); 1558 } 1559 1560 static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) 1561 { 1562 int next_bitmap; 1563 1564 if (mapping->nr_bitmaps >= mapping->extensions) 1565 return -EINVAL; 1566 1567 next_bitmap = mapping->nr_bitmaps; 1568 mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, 1569 GFP_ATOMIC); 1570 if (!mapping->bitmaps[next_bitmap]) 1571 return -ENOMEM; 1572 1573 mapping->nr_bitmaps++; 1574 1575 return 0; 1576 } 1577 1578 void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) 1579 { 1580 if (mapping) 1581 kref_put(&mapping->kref, release_iommu_mapping); 1582 } 1583 EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); 1584 1585 static int __arm_iommu_attach_device(struct device *dev, 1586 struct dma_iommu_mapping *mapping) 1587 { 1588 int err; 1589 1590 err = iommu_attach_device(mapping->domain, dev); 1591 if (err) 1592 return err; 1593 1594 kref_get(&mapping->kref); 1595 to_dma_iommu_mapping(dev) = mapping; 1596 1597 pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); 1598 return 0; 1599 } 1600 1601 /** 1602 * arm_iommu_attach_device 1603 * @dev: valid struct device pointer 1604 * @mapping: io address space mapping structure (returned from 1605 * arm_iommu_create_mapping) 1606 * 1607 * Attaches specified io address space mapping to the provided device. 1608 * This replaces the dma operations (dma_map_ops pointer) with the 1609 * IOMMU aware version. 1610 * 1611 * More than one client might be attached to the same io address space 1612 * mapping. 1613 */ 1614 int arm_iommu_attach_device(struct device *dev, 1615 struct dma_iommu_mapping *mapping) 1616 { 1617 int err; 1618 1619 err = __arm_iommu_attach_device(dev, mapping); 1620 if (err) 1621 return err; 1622 1623 set_dma_ops(dev, &iommu_ops); 1624 return 0; 1625 } 1626 EXPORT_SYMBOL_GPL(arm_iommu_attach_device); 1627 1628 /** 1629 * arm_iommu_detach_device 1630 * @dev: valid struct device pointer 1631 * 1632 * Detaches the provided device from a previously attached map. 1633 * This overwrites the dma_ops pointer with appropriate non-IOMMU ops. 1634 */ 1635 void arm_iommu_detach_device(struct device *dev) 1636 { 1637 struct dma_iommu_mapping *mapping; 1638 1639 mapping = to_dma_iommu_mapping(dev); 1640 if (!mapping) { 1641 dev_warn(dev, "Not attached\n"); 1642 return; 1643 } 1644 1645 iommu_detach_device(mapping->domain, dev); 1646 kref_put(&mapping->kref, release_iommu_mapping); 1647 to_dma_iommu_mapping(dev) = NULL; 1648 set_dma_ops(dev, NULL); 1649 1650 pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); 1651 } 1652 EXPORT_SYMBOL_GPL(arm_iommu_detach_device); 1653 1654 static void arm_setup_iommu_dma_ops(struct device *dev) 1655 { 1656 struct dma_iommu_mapping *mapping; 1657 u64 dma_base = 0, size = 1ULL << 32; 1658 1659 if (dev->dma_range_map) { 1660 dma_base = dma_range_map_min(dev->dma_range_map); 1661 size = dma_range_map_max(dev->dma_range_map) - dma_base; 1662 } 1663 mapping = arm_iommu_create_mapping(dev, dma_base, size); 1664 if (IS_ERR(mapping)) { 1665 pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", 1666 size, dev_name(dev)); 1667 return; 1668 } 1669 1670 if (__arm_iommu_attach_device(dev, mapping)) { 1671 pr_warn("Failed to attached device %s to IOMMU_mapping\n", 1672 dev_name(dev)); 1673 arm_iommu_release_mapping(mapping); 1674 return; 1675 } 1676 1677 set_dma_ops(dev, &iommu_ops); 1678 } 1679 1680 static void arm_teardown_iommu_dma_ops(struct device *dev) 1681 { 1682 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); 1683 1684 if (!mapping) 1685 return; 1686 1687 arm_iommu_detach_device(dev); 1688 arm_iommu_release_mapping(mapping); 1689 } 1690 1691 #else 1692 1693 static void arm_setup_iommu_dma_ops(struct device *dev) 1694 { 1695 } 1696 1697 static void arm_teardown_iommu_dma_ops(struct device *dev) { } 1698 1699 #endif /* CONFIG_ARM_DMA_USE_IOMMU */ 1700 1701 void arch_setup_dma_ops(struct device *dev, bool coherent) 1702 { 1703 /* 1704 * Due to legacy code that sets the ->dma_coherent flag from a bus 1705 * notifier we can't just assign coherent to the ->dma_coherent flag 1706 * here, but instead have to make sure we only set but never clear it 1707 * for now. 1708 */ 1709 if (coherent) 1710 dev->dma_coherent = true; 1711 1712 /* 1713 * Don't override the dma_ops if they have already been set. Ideally 1714 * this should be the only location where dma_ops are set, remove this 1715 * check when all other callers of set_dma_ops will have disappeared. 1716 */ 1717 if (dev->dma_ops) 1718 return; 1719 1720 if (device_iommu_mapped(dev)) 1721 arm_setup_iommu_dma_ops(dev); 1722 1723 xen_setup_dma_ops(dev); 1724 dev->archdata.dma_ops_setup = true; 1725 } 1726 1727 void arch_teardown_dma_ops(struct device *dev) 1728 { 1729 if (!dev->archdata.dma_ops_setup) 1730 return; 1731 1732 arm_teardown_iommu_dma_ops(dev); 1733 /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ 1734 set_dma_ops(dev, NULL); 1735 } 1736 1737 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 1738 gfp_t gfp, unsigned long attrs) 1739 { 1740 return __dma_alloc(dev, size, dma_handle, gfp, 1741 __get_dma_pgprot(attrs, PAGE_KERNEL), false, 1742 attrs, __builtin_return_address(0)); 1743 } 1744 1745 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, 1746 dma_addr_t dma_handle, unsigned long attrs) 1747 { 1748 __arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false); 1749 } 1750