1 /* 2 * SWIOTLB-based DMA API implementation 3 * 4 * Copyright (C) 2012 ARM Ltd. 5 * Author: Catalin Marinas <catalin.marinas@arm.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <linux/gfp.h> 21 #include <linux/acpi.h> 22 #include <linux/bootmem.h> 23 #include <linux/cache.h> 24 #include <linux/export.h> 25 #include <linux/slab.h> 26 #include <linux/genalloc.h> 27 #include <linux/dma-mapping.h> 28 #include <linux/dma-contiguous.h> 29 #include <linux/vmalloc.h> 30 #include <linux/swiotlb.h> 31 32 #include <asm/cacheflush.h> 33 34 static int swiotlb __ro_after_init; 35 36 static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, 37 bool coherent) 38 { 39 if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) 40 return pgprot_writecombine(prot); 41 return prot; 42 } 43 44 static struct gen_pool *atomic_pool; 45 46 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 47 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; 48 49 static int __init early_coherent_pool(char *p) 50 { 51 atomic_pool_size = memparse(p, &p); 52 return 0; 53 } 54 early_param("coherent_pool", early_coherent_pool); 55 56 static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) 57 { 58 unsigned long val; 59 void *ptr = NULL; 60 61 if (!atomic_pool) { 62 WARN(1, "coherent pool not initialised!\n"); 63 return NULL; 64 } 65 66 val = gen_pool_alloc(atomic_pool, size); 67 if (val) { 68 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 69 70 *ret_page = phys_to_page(phys); 71 ptr = (void *)val; 72 memset(ptr, 0, size); 73 } 74 75 return ptr; 76 } 77 78 static bool __in_atomic_pool(void *start, size_t size) 79 { 80 return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); 81 } 82 83 static int __free_from_pool(void *start, size_t size) 84 { 85 if (!__in_atomic_pool(start, size)) 86 return 0; 87 88 gen_pool_free(atomic_pool, (unsigned long)start, size); 89 90 return 1; 91 } 92 93 static void *__dma_alloc_coherent(struct device *dev, size_t size, 94 dma_addr_t *dma_handle, gfp_t flags, 95 unsigned long attrs) 96 { 97 if (dev == NULL) { 98 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 99 return NULL; 100 } 101 102 if (IS_ENABLED(CONFIG_ZONE_DMA) && 103 dev->coherent_dma_mask <= DMA_BIT_MASK(32)) 104 flags |= GFP_DMA; 105 if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) { 106 struct page *page; 107 void *addr; 108 109 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, 110 get_order(size)); 111 if (!page) 112 return NULL; 113 114 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 115 addr = page_address(page); 116 memset(addr, 0, size); 117 return addr; 118 } else { 119 return swiotlb_alloc_coherent(dev, size, dma_handle, flags); 120 } 121 } 122 123 static void __dma_free_coherent(struct device *dev, size_t size, 124 void *vaddr, dma_addr_t dma_handle, 125 unsigned long attrs) 126 { 127 bool freed; 128 phys_addr_t paddr = dma_to_phys(dev, dma_handle); 129 130 if (dev == NULL) { 131 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 132 return; 133 } 134 135 freed = dma_release_from_contiguous(dev, 136 phys_to_page(paddr), 137 size >> PAGE_SHIFT); 138 if (!freed) 139 swiotlb_free_coherent(dev, size, vaddr, dma_handle); 140 } 141 142 static void *__dma_alloc(struct device *dev, size_t size, 143 dma_addr_t *dma_handle, gfp_t flags, 144 unsigned long attrs) 145 { 146 struct page *page; 147 void *ptr, *coherent_ptr; 148 bool coherent = is_device_dma_coherent(dev); 149 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); 150 151 size = PAGE_ALIGN(size); 152 153 if (!coherent && !gfpflags_allow_blocking(flags)) { 154 struct page *page = NULL; 155 void *addr = __alloc_from_pool(size, &page, flags); 156 157 if (addr) 158 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 159 160 return addr; 161 } 162 163 ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); 164 if (!ptr) 165 goto no_mem; 166 167 /* no need for non-cacheable mapping if coherent */ 168 if (coherent) 169 return ptr; 170 171 /* remove any dirty cache lines on the kernel alias */ 172 __dma_flush_area(ptr, size); 173 174 /* create a coherent mapping */ 175 page = virt_to_page(ptr); 176 coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, 177 prot, NULL); 178 if (!coherent_ptr) 179 goto no_map; 180 181 return coherent_ptr; 182 183 no_map: 184 __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); 185 no_mem: 186 *dma_handle = DMA_ERROR_CODE; 187 return NULL; 188 } 189 190 static void __dma_free(struct device *dev, size_t size, 191 void *vaddr, dma_addr_t dma_handle, 192 unsigned long attrs) 193 { 194 void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); 195 196 size = PAGE_ALIGN(size); 197 198 if (!is_device_dma_coherent(dev)) { 199 if (__free_from_pool(vaddr, size)) 200 return; 201 vunmap(vaddr); 202 } 203 __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); 204 } 205 206 static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, 207 unsigned long offset, size_t size, 208 enum dma_data_direction dir, 209 unsigned long attrs) 210 { 211 dma_addr_t dev_addr; 212 213 dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); 214 if (!is_device_dma_coherent(dev)) 215 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 216 217 return dev_addr; 218 } 219 220 221 static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, 222 size_t size, enum dma_data_direction dir, 223 unsigned long attrs) 224 { 225 if (!is_device_dma_coherent(dev)) 226 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 227 swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); 228 } 229 230 static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 231 int nelems, enum dma_data_direction dir, 232 unsigned long attrs) 233 { 234 struct scatterlist *sg; 235 int i, ret; 236 237 ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); 238 if (!is_device_dma_coherent(dev)) 239 for_each_sg(sgl, sg, ret, i) 240 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 241 sg->length, dir); 242 243 return ret; 244 } 245 246 static void __swiotlb_unmap_sg_attrs(struct device *dev, 247 struct scatterlist *sgl, int nelems, 248 enum dma_data_direction dir, 249 unsigned long attrs) 250 { 251 struct scatterlist *sg; 252 int i; 253 254 if (!is_device_dma_coherent(dev)) 255 for_each_sg(sgl, sg, nelems, i) 256 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 257 sg->length, dir); 258 swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); 259 } 260 261 static void __swiotlb_sync_single_for_cpu(struct device *dev, 262 dma_addr_t dev_addr, size_t size, 263 enum dma_data_direction dir) 264 { 265 if (!is_device_dma_coherent(dev)) 266 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 267 swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); 268 } 269 270 static void __swiotlb_sync_single_for_device(struct device *dev, 271 dma_addr_t dev_addr, size_t size, 272 enum dma_data_direction dir) 273 { 274 swiotlb_sync_single_for_device(dev, dev_addr, size, dir); 275 if (!is_device_dma_coherent(dev)) 276 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 277 } 278 279 static void __swiotlb_sync_sg_for_cpu(struct device *dev, 280 struct scatterlist *sgl, int nelems, 281 enum dma_data_direction dir) 282 { 283 struct scatterlist *sg; 284 int i; 285 286 if (!is_device_dma_coherent(dev)) 287 for_each_sg(sgl, sg, nelems, i) 288 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 289 sg->length, dir); 290 swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); 291 } 292 293 static void __swiotlb_sync_sg_for_device(struct device *dev, 294 struct scatterlist *sgl, int nelems, 295 enum dma_data_direction dir) 296 { 297 struct scatterlist *sg; 298 int i; 299 300 swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); 301 if (!is_device_dma_coherent(dev)) 302 for_each_sg(sgl, sg, nelems, i) 303 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 304 sg->length, dir); 305 } 306 307 static int __swiotlb_mmap(struct device *dev, 308 struct vm_area_struct *vma, 309 void *cpu_addr, dma_addr_t dma_addr, size_t size, 310 unsigned long attrs) 311 { 312 int ret = -ENXIO; 313 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> 314 PAGE_SHIFT; 315 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 316 unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; 317 unsigned long off = vma->vm_pgoff; 318 319 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 320 is_device_dma_coherent(dev)); 321 322 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 323 return ret; 324 325 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 326 ret = remap_pfn_range(vma, vma->vm_start, 327 pfn + off, 328 vma->vm_end - vma->vm_start, 329 vma->vm_page_prot); 330 } 331 332 return ret; 333 } 334 335 static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, 336 void *cpu_addr, dma_addr_t handle, size_t size, 337 unsigned long attrs) 338 { 339 int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 340 341 if (!ret) 342 sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), 343 PAGE_ALIGN(size), 0); 344 345 return ret; 346 } 347 348 static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) 349 { 350 if (swiotlb) 351 return swiotlb_dma_supported(hwdev, mask); 352 return 1; 353 } 354 355 static struct dma_map_ops swiotlb_dma_ops = { 356 .alloc = __dma_alloc, 357 .free = __dma_free, 358 .mmap = __swiotlb_mmap, 359 .get_sgtable = __swiotlb_get_sgtable, 360 .map_page = __swiotlb_map_page, 361 .unmap_page = __swiotlb_unmap_page, 362 .map_sg = __swiotlb_map_sg_attrs, 363 .unmap_sg = __swiotlb_unmap_sg_attrs, 364 .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, 365 .sync_single_for_device = __swiotlb_sync_single_for_device, 366 .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, 367 .sync_sg_for_device = __swiotlb_sync_sg_for_device, 368 .dma_supported = __swiotlb_dma_supported, 369 .mapping_error = swiotlb_dma_mapping_error, 370 }; 371 372 static int __init atomic_pool_init(void) 373 { 374 pgprot_t prot = __pgprot(PROT_NORMAL_NC); 375 unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; 376 struct page *page; 377 void *addr; 378 unsigned int pool_size_order = get_order(atomic_pool_size); 379 380 if (dev_get_cma_area(NULL)) 381 page = dma_alloc_from_contiguous(NULL, nr_pages, 382 pool_size_order); 383 else 384 page = alloc_pages(GFP_DMA, pool_size_order); 385 386 if (page) { 387 int ret; 388 void *page_addr = page_address(page); 389 390 memset(page_addr, 0, atomic_pool_size); 391 __dma_flush_area(page_addr, atomic_pool_size); 392 393 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 394 if (!atomic_pool) 395 goto free_page; 396 397 addr = dma_common_contiguous_remap(page, atomic_pool_size, 398 VM_USERMAP, prot, atomic_pool_init); 399 400 if (!addr) 401 goto destroy_genpool; 402 403 ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, 404 page_to_phys(page), 405 atomic_pool_size, -1); 406 if (ret) 407 goto remove_mapping; 408 409 gen_pool_set_algo(atomic_pool, 410 gen_pool_first_fit_order_align, 411 (void *)PAGE_SHIFT); 412 413 pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", 414 atomic_pool_size / 1024); 415 return 0; 416 } 417 goto out; 418 419 remove_mapping: 420 dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); 421 destroy_genpool: 422 gen_pool_destroy(atomic_pool); 423 atomic_pool = NULL; 424 free_page: 425 if (!dma_release_from_contiguous(NULL, page, nr_pages)) 426 __free_pages(page, pool_size_order); 427 out: 428 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 429 atomic_pool_size / 1024); 430 return -ENOMEM; 431 } 432 433 /******************************************** 434 * The following APIs are for dummy DMA ops * 435 ********************************************/ 436 437 static void *__dummy_alloc(struct device *dev, size_t size, 438 dma_addr_t *dma_handle, gfp_t flags, 439 unsigned long attrs) 440 { 441 return NULL; 442 } 443 444 static void __dummy_free(struct device *dev, size_t size, 445 void *vaddr, dma_addr_t dma_handle, 446 unsigned long attrs) 447 { 448 } 449 450 static int __dummy_mmap(struct device *dev, 451 struct vm_area_struct *vma, 452 void *cpu_addr, dma_addr_t dma_addr, size_t size, 453 unsigned long attrs) 454 { 455 return -ENXIO; 456 } 457 458 static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, 459 unsigned long offset, size_t size, 460 enum dma_data_direction dir, 461 unsigned long attrs) 462 { 463 return DMA_ERROR_CODE; 464 } 465 466 static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, 467 size_t size, enum dma_data_direction dir, 468 unsigned long attrs) 469 { 470 } 471 472 static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, 473 int nelems, enum dma_data_direction dir, 474 unsigned long attrs) 475 { 476 return 0; 477 } 478 479 static void __dummy_unmap_sg(struct device *dev, 480 struct scatterlist *sgl, int nelems, 481 enum dma_data_direction dir, 482 unsigned long attrs) 483 { 484 } 485 486 static void __dummy_sync_single(struct device *dev, 487 dma_addr_t dev_addr, size_t size, 488 enum dma_data_direction dir) 489 { 490 } 491 492 static void __dummy_sync_sg(struct device *dev, 493 struct scatterlist *sgl, int nelems, 494 enum dma_data_direction dir) 495 { 496 } 497 498 static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) 499 { 500 return 1; 501 } 502 503 static int __dummy_dma_supported(struct device *hwdev, u64 mask) 504 { 505 return 0; 506 } 507 508 struct dma_map_ops dummy_dma_ops = { 509 .alloc = __dummy_alloc, 510 .free = __dummy_free, 511 .mmap = __dummy_mmap, 512 .map_page = __dummy_map_page, 513 .unmap_page = __dummy_unmap_page, 514 .map_sg = __dummy_map_sg, 515 .unmap_sg = __dummy_unmap_sg, 516 .sync_single_for_cpu = __dummy_sync_single, 517 .sync_single_for_device = __dummy_sync_single, 518 .sync_sg_for_cpu = __dummy_sync_sg, 519 .sync_sg_for_device = __dummy_sync_sg, 520 .mapping_error = __dummy_mapping_error, 521 .dma_supported = __dummy_dma_supported, 522 }; 523 EXPORT_SYMBOL(dummy_dma_ops); 524 525 static int __init arm64_dma_init(void) 526 { 527 if (swiotlb_force == SWIOTLB_FORCE || 528 max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) 529 swiotlb = 1; 530 531 return atomic_pool_init(); 532 } 533 arch_initcall(arm64_dma_init); 534 535 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 536 537 static int __init dma_debug_do_init(void) 538 { 539 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 540 return 0; 541 } 542 fs_initcall(dma_debug_do_init); 543 544 545 #ifdef CONFIG_IOMMU_DMA 546 #include <linux/dma-iommu.h> 547 #include <linux/platform_device.h> 548 #include <linux/amba/bus.h> 549 550 /* Thankfully, all cache ops are by VA so we can ignore phys here */ 551 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) 552 { 553 __dma_flush_area(virt, PAGE_SIZE); 554 } 555 556 static void *__iommu_alloc_attrs(struct device *dev, size_t size, 557 dma_addr_t *handle, gfp_t gfp, 558 unsigned long attrs) 559 { 560 bool coherent = is_device_dma_coherent(dev); 561 int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); 562 size_t iosize = size; 563 void *addr; 564 565 if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) 566 return NULL; 567 568 size = PAGE_ALIGN(size); 569 570 /* 571 * Some drivers rely on this, and we probably don't want the 572 * possibility of stale kernel data being read by devices anyway. 573 */ 574 gfp |= __GFP_ZERO; 575 576 if (gfpflags_allow_blocking(gfp)) { 577 struct page **pages; 578 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 579 580 pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, 581 handle, flush_page); 582 if (!pages) 583 return NULL; 584 585 addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 586 __builtin_return_address(0)); 587 if (!addr) 588 iommu_dma_free(dev, pages, iosize, handle); 589 } else { 590 struct page *page; 591 /* 592 * In atomic context we can't remap anything, so we'll only 593 * get the virtually contiguous buffer we need by way of a 594 * physically contiguous allocation. 595 */ 596 if (coherent) { 597 page = alloc_pages(gfp, get_order(size)); 598 addr = page ? page_address(page) : NULL; 599 } else { 600 addr = __alloc_from_pool(size, &page, gfp); 601 } 602 if (!addr) 603 return NULL; 604 605 *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); 606 if (iommu_dma_mapping_error(dev, *handle)) { 607 if (coherent) 608 __free_pages(page, get_order(size)); 609 else 610 __free_from_pool(addr, size); 611 addr = NULL; 612 } 613 } 614 return addr; 615 } 616 617 static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 618 dma_addr_t handle, unsigned long attrs) 619 { 620 size_t iosize = size; 621 622 size = PAGE_ALIGN(size); 623 /* 624 * @cpu_addr will be one of 3 things depending on how it was allocated: 625 * - A remapped array of pages from iommu_dma_alloc(), for all 626 * non-atomic allocations. 627 * - A non-cacheable alias from the atomic pool, for atomic 628 * allocations by non-coherent devices. 629 * - A normal lowmem address, for atomic allocations by 630 * coherent devices. 631 * Hence how dodgy the below logic looks... 632 */ 633 if (__in_atomic_pool(cpu_addr, size)) { 634 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 635 __free_from_pool(cpu_addr, size); 636 } else if (is_vmalloc_addr(cpu_addr)){ 637 struct vm_struct *area = find_vm_area(cpu_addr); 638 639 if (WARN_ON(!area || !area->pages)) 640 return; 641 iommu_dma_free(dev, area->pages, iosize, &handle); 642 dma_common_free_remap(cpu_addr, size, VM_USERMAP); 643 } else { 644 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 645 __free_pages(virt_to_page(cpu_addr), get_order(size)); 646 } 647 } 648 649 static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 650 void *cpu_addr, dma_addr_t dma_addr, size_t size, 651 unsigned long attrs) 652 { 653 struct vm_struct *area; 654 int ret; 655 656 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 657 is_device_dma_coherent(dev)); 658 659 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 660 return ret; 661 662 area = find_vm_area(cpu_addr); 663 if (WARN_ON(!area || !area->pages)) 664 return -ENXIO; 665 666 return iommu_dma_mmap(area->pages, size, vma); 667 } 668 669 static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 670 void *cpu_addr, dma_addr_t dma_addr, 671 size_t size, unsigned long attrs) 672 { 673 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 674 struct vm_struct *area = find_vm_area(cpu_addr); 675 676 if (WARN_ON(!area || !area->pages)) 677 return -ENXIO; 678 679 return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, 680 GFP_KERNEL); 681 } 682 683 static void __iommu_sync_single_for_cpu(struct device *dev, 684 dma_addr_t dev_addr, size_t size, 685 enum dma_data_direction dir) 686 { 687 phys_addr_t phys; 688 689 if (is_device_dma_coherent(dev)) 690 return; 691 692 phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 693 __dma_unmap_area(phys_to_virt(phys), size, dir); 694 } 695 696 static void __iommu_sync_single_for_device(struct device *dev, 697 dma_addr_t dev_addr, size_t size, 698 enum dma_data_direction dir) 699 { 700 phys_addr_t phys; 701 702 if (is_device_dma_coherent(dev)) 703 return; 704 705 phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 706 __dma_map_area(phys_to_virt(phys), size, dir); 707 } 708 709 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, 710 unsigned long offset, size_t size, 711 enum dma_data_direction dir, 712 unsigned long attrs) 713 { 714 bool coherent = is_device_dma_coherent(dev); 715 int prot = dma_direction_to_prot(dir, coherent); 716 dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); 717 718 if (!iommu_dma_mapping_error(dev, dev_addr) && 719 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 720 __iommu_sync_single_for_device(dev, dev_addr, size, dir); 721 722 return dev_addr; 723 } 724 725 static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, 726 size_t size, enum dma_data_direction dir, 727 unsigned long attrs) 728 { 729 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 730 __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); 731 732 iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); 733 } 734 735 static void __iommu_sync_sg_for_cpu(struct device *dev, 736 struct scatterlist *sgl, int nelems, 737 enum dma_data_direction dir) 738 { 739 struct scatterlist *sg; 740 int i; 741 742 if (is_device_dma_coherent(dev)) 743 return; 744 745 for_each_sg(sgl, sg, nelems, i) 746 __dma_unmap_area(sg_virt(sg), sg->length, dir); 747 } 748 749 static void __iommu_sync_sg_for_device(struct device *dev, 750 struct scatterlist *sgl, int nelems, 751 enum dma_data_direction dir) 752 { 753 struct scatterlist *sg; 754 int i; 755 756 if (is_device_dma_coherent(dev)) 757 return; 758 759 for_each_sg(sgl, sg, nelems, i) 760 __dma_map_area(sg_virt(sg), sg->length, dir); 761 } 762 763 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 764 int nelems, enum dma_data_direction dir, 765 unsigned long attrs) 766 { 767 bool coherent = is_device_dma_coherent(dev); 768 769 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 770 __iommu_sync_sg_for_device(dev, sgl, nelems, dir); 771 772 return iommu_dma_map_sg(dev, sgl, nelems, 773 dma_direction_to_prot(dir, coherent)); 774 } 775 776 static void __iommu_unmap_sg_attrs(struct device *dev, 777 struct scatterlist *sgl, int nelems, 778 enum dma_data_direction dir, 779 unsigned long attrs) 780 { 781 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 782 __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); 783 784 iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); 785 } 786 787 static struct dma_map_ops iommu_dma_ops = { 788 .alloc = __iommu_alloc_attrs, 789 .free = __iommu_free_attrs, 790 .mmap = __iommu_mmap_attrs, 791 .get_sgtable = __iommu_get_sgtable, 792 .map_page = __iommu_map_page, 793 .unmap_page = __iommu_unmap_page, 794 .map_sg = __iommu_map_sg_attrs, 795 .unmap_sg = __iommu_unmap_sg_attrs, 796 .sync_single_for_cpu = __iommu_sync_single_for_cpu, 797 .sync_single_for_device = __iommu_sync_single_for_device, 798 .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, 799 .sync_sg_for_device = __iommu_sync_sg_for_device, 800 .map_resource = iommu_dma_map_resource, 801 .unmap_resource = iommu_dma_unmap_resource, 802 .dma_supported = iommu_dma_supported, 803 .mapping_error = iommu_dma_mapping_error, 804 }; 805 806 /* 807 * TODO: Right now __iommu_setup_dma_ops() gets called too early to do 808 * everything it needs to - the device is only partially created and the 809 * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we 810 * need this delayed attachment dance. Once IOMMU probe ordering is sorted 811 * to move the arch_setup_dma_ops() call later, all the notifier bits below 812 * become unnecessary, and will go away. 813 */ 814 struct iommu_dma_notifier_data { 815 struct list_head list; 816 struct device *dev; 817 const struct iommu_ops *ops; 818 u64 dma_base; 819 u64 size; 820 }; 821 static LIST_HEAD(iommu_dma_masters); 822 static DEFINE_MUTEX(iommu_dma_notifier_lock); 823 824 static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, 825 u64 dma_base, u64 size) 826 { 827 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 828 829 /* 830 * If the IOMMU driver has the DMA domain support that we require, 831 * then the IOMMU core will have already configured a group for this 832 * device, and allocated the default domain for that group. 833 */ 834 if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) { 835 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 836 dev_name(dev)); 837 return false; 838 } 839 840 dev->archdata.dma_ops = &iommu_dma_ops; 841 return true; 842 } 843 844 static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, 845 u64 dma_base, u64 size) 846 { 847 struct iommu_dma_notifier_data *iommudata; 848 849 iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); 850 if (!iommudata) 851 return; 852 853 iommudata->dev = dev; 854 iommudata->ops = ops; 855 iommudata->dma_base = dma_base; 856 iommudata->size = size; 857 858 mutex_lock(&iommu_dma_notifier_lock); 859 list_add(&iommudata->list, &iommu_dma_masters); 860 mutex_unlock(&iommu_dma_notifier_lock); 861 } 862 863 static int __iommu_attach_notifier(struct notifier_block *nb, 864 unsigned long action, void *data) 865 { 866 struct iommu_dma_notifier_data *master, *tmp; 867 868 if (action != BUS_NOTIFY_BIND_DRIVER) 869 return 0; 870 871 mutex_lock(&iommu_dma_notifier_lock); 872 list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { 873 if (data == master->dev && do_iommu_attach(master->dev, 874 master->ops, master->dma_base, master->size)) { 875 list_del(&master->list); 876 kfree(master); 877 break; 878 } 879 } 880 mutex_unlock(&iommu_dma_notifier_lock); 881 return 0; 882 } 883 884 static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) 885 { 886 struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); 887 int ret; 888 889 if (!nb) 890 return -ENOMEM; 891 892 nb->notifier_call = __iommu_attach_notifier; 893 894 ret = bus_register_notifier(bus, nb); 895 if (ret) { 896 pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", 897 bus->name); 898 kfree(nb); 899 } 900 return ret; 901 } 902 903 static int __init __iommu_dma_init(void) 904 { 905 int ret; 906 907 ret = iommu_dma_init(); 908 if (!ret) 909 ret = register_iommu_dma_ops_notifier(&platform_bus_type); 910 if (!ret) 911 ret = register_iommu_dma_ops_notifier(&amba_bustype); 912 #ifdef CONFIG_PCI 913 if (!ret) 914 ret = register_iommu_dma_ops_notifier(&pci_bus_type); 915 #endif 916 return ret; 917 } 918 arch_initcall(__iommu_dma_init); 919 920 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 921 const struct iommu_ops *ops) 922 { 923 struct iommu_group *group; 924 925 if (!ops) 926 return; 927 /* 928 * TODO: As a concession to the future, we're ready to handle being 929 * called both early and late (i.e. after bus_add_device). Once all 930 * the platform bus code is reworked to call us late and the notifier 931 * junk above goes away, move the body of do_iommu_attach here. 932 */ 933 group = iommu_group_get(dev); 934 if (group) { 935 do_iommu_attach(dev, ops, dma_base, size); 936 iommu_group_put(group); 937 } else { 938 queue_iommu_attach(dev, ops, dma_base, size); 939 } 940 } 941 942 void arch_teardown_dma_ops(struct device *dev) 943 { 944 dev->archdata.dma_ops = NULL; 945 } 946 947 #else 948 949 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 950 const struct iommu_ops *iommu) 951 { } 952 953 #endif /* CONFIG_IOMMU_DMA */ 954 955 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 956 const struct iommu_ops *iommu, bool coherent) 957 { 958 if (!dev->archdata.dma_ops) 959 dev->archdata.dma_ops = &swiotlb_dma_ops; 960 961 dev->archdata.dma_coherent = coherent; 962 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 963 } 964