1 /* 2 * SWIOTLB-based DMA API implementation 3 * 4 * Copyright (C) 2012 ARM Ltd. 5 * Author: Catalin Marinas <catalin.marinas@arm.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <linux/gfp.h> 21 #include <linux/acpi.h> 22 #include <linux/bootmem.h> 23 #include <linux/cache.h> 24 #include <linux/export.h> 25 #include <linux/slab.h> 26 #include <linux/genalloc.h> 27 #include <linux/dma-mapping.h> 28 #include <linux/dma-contiguous.h> 29 #include <linux/vmalloc.h> 30 #include <linux/swiotlb.h> 31 32 #include <asm/cacheflush.h> 33 34 static int swiotlb __ro_after_init; 35 36 static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, 37 bool coherent) 38 { 39 if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) 40 return pgprot_writecombine(prot); 41 return prot; 42 } 43 44 static struct gen_pool *atomic_pool; 45 46 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 47 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; 48 49 static int __init early_coherent_pool(char *p) 50 { 51 atomic_pool_size = memparse(p, &p); 52 return 0; 53 } 54 early_param("coherent_pool", early_coherent_pool); 55 56 static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) 57 { 58 unsigned long val; 59 void *ptr = NULL; 60 61 if (!atomic_pool) { 62 WARN(1, "coherent pool not initialised!\n"); 63 return NULL; 64 } 65 66 val = gen_pool_alloc(atomic_pool, size); 67 if (val) { 68 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 69 70 *ret_page = phys_to_page(phys); 71 ptr = (void *)val; 72 memset(ptr, 0, size); 73 } 74 75 return ptr; 76 } 77 78 static bool __in_atomic_pool(void *start, size_t size) 79 { 80 return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); 81 } 82 83 static int __free_from_pool(void *start, size_t size) 84 { 85 if (!__in_atomic_pool(start, size)) 86 return 0; 87 88 gen_pool_free(atomic_pool, (unsigned long)start, size); 89 90 return 1; 91 } 92 93 static void *__dma_alloc_coherent(struct device *dev, size_t size, 94 dma_addr_t *dma_handle, gfp_t flags, 95 unsigned long attrs) 96 { 97 if (dev == NULL) { 98 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 99 return NULL; 100 } 101 102 if (IS_ENABLED(CONFIG_ZONE_DMA) && 103 dev->coherent_dma_mask <= DMA_BIT_MASK(32)) 104 flags |= GFP_DMA; 105 if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) { 106 struct page *page; 107 void *addr; 108 109 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, 110 get_order(size)); 111 if (!page) 112 return NULL; 113 114 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 115 addr = page_address(page); 116 memset(addr, 0, size); 117 return addr; 118 } else { 119 return swiotlb_alloc_coherent(dev, size, dma_handle, flags); 120 } 121 } 122 123 static void __dma_free_coherent(struct device *dev, size_t size, 124 void *vaddr, dma_addr_t dma_handle, 125 unsigned long attrs) 126 { 127 bool freed; 128 phys_addr_t paddr = dma_to_phys(dev, dma_handle); 129 130 if (dev == NULL) { 131 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 132 return; 133 } 134 135 freed = dma_release_from_contiguous(dev, 136 phys_to_page(paddr), 137 size >> PAGE_SHIFT); 138 if (!freed) 139 swiotlb_free_coherent(dev, size, vaddr, dma_handle); 140 } 141 142 static void *__dma_alloc(struct device *dev, size_t size, 143 dma_addr_t *dma_handle, gfp_t flags, 144 unsigned long attrs) 145 { 146 struct page *page; 147 void *ptr, *coherent_ptr; 148 bool coherent = is_device_dma_coherent(dev); 149 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); 150 151 size = PAGE_ALIGN(size); 152 153 if (!coherent && !gfpflags_allow_blocking(flags)) { 154 struct page *page = NULL; 155 void *addr = __alloc_from_pool(size, &page, flags); 156 157 if (addr) 158 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 159 160 return addr; 161 } 162 163 ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); 164 if (!ptr) 165 goto no_mem; 166 167 /* no need for non-cacheable mapping if coherent */ 168 if (coherent) 169 return ptr; 170 171 /* remove any dirty cache lines on the kernel alias */ 172 __dma_flush_area(ptr, size); 173 174 /* create a coherent mapping */ 175 page = virt_to_page(ptr); 176 coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, 177 prot, NULL); 178 if (!coherent_ptr) 179 goto no_map; 180 181 return coherent_ptr; 182 183 no_map: 184 __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); 185 no_mem: 186 *dma_handle = DMA_ERROR_CODE; 187 return NULL; 188 } 189 190 static void __dma_free(struct device *dev, size_t size, 191 void *vaddr, dma_addr_t dma_handle, 192 unsigned long attrs) 193 { 194 void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); 195 196 size = PAGE_ALIGN(size); 197 198 if (!is_device_dma_coherent(dev)) { 199 if (__free_from_pool(vaddr, size)) 200 return; 201 vunmap(vaddr); 202 } 203 __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); 204 } 205 206 static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, 207 unsigned long offset, size_t size, 208 enum dma_data_direction dir, 209 unsigned long attrs) 210 { 211 dma_addr_t dev_addr; 212 213 dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); 214 if (!is_device_dma_coherent(dev)) 215 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 216 217 return dev_addr; 218 } 219 220 221 static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, 222 size_t size, enum dma_data_direction dir, 223 unsigned long attrs) 224 { 225 if (!is_device_dma_coherent(dev)) 226 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 227 swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); 228 } 229 230 static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 231 int nelems, enum dma_data_direction dir, 232 unsigned long attrs) 233 { 234 struct scatterlist *sg; 235 int i, ret; 236 237 ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); 238 if (!is_device_dma_coherent(dev)) 239 for_each_sg(sgl, sg, ret, i) 240 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 241 sg->length, dir); 242 243 return ret; 244 } 245 246 static void __swiotlb_unmap_sg_attrs(struct device *dev, 247 struct scatterlist *sgl, int nelems, 248 enum dma_data_direction dir, 249 unsigned long attrs) 250 { 251 struct scatterlist *sg; 252 int i; 253 254 if (!is_device_dma_coherent(dev)) 255 for_each_sg(sgl, sg, nelems, i) 256 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 257 sg->length, dir); 258 swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); 259 } 260 261 static void __swiotlb_sync_single_for_cpu(struct device *dev, 262 dma_addr_t dev_addr, size_t size, 263 enum dma_data_direction dir) 264 { 265 if (!is_device_dma_coherent(dev)) 266 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 267 swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); 268 } 269 270 static void __swiotlb_sync_single_for_device(struct device *dev, 271 dma_addr_t dev_addr, size_t size, 272 enum dma_data_direction dir) 273 { 274 swiotlb_sync_single_for_device(dev, dev_addr, size, dir); 275 if (!is_device_dma_coherent(dev)) 276 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 277 } 278 279 static void __swiotlb_sync_sg_for_cpu(struct device *dev, 280 struct scatterlist *sgl, int nelems, 281 enum dma_data_direction dir) 282 { 283 struct scatterlist *sg; 284 int i; 285 286 if (!is_device_dma_coherent(dev)) 287 for_each_sg(sgl, sg, nelems, i) 288 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 289 sg->length, dir); 290 swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); 291 } 292 293 static void __swiotlb_sync_sg_for_device(struct device *dev, 294 struct scatterlist *sgl, int nelems, 295 enum dma_data_direction dir) 296 { 297 struct scatterlist *sg; 298 int i; 299 300 swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); 301 if (!is_device_dma_coherent(dev)) 302 for_each_sg(sgl, sg, nelems, i) 303 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 304 sg->length, dir); 305 } 306 307 static int __swiotlb_mmap(struct device *dev, 308 struct vm_area_struct *vma, 309 void *cpu_addr, dma_addr_t dma_addr, size_t size, 310 unsigned long attrs) 311 { 312 int ret = -ENXIO; 313 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> 314 PAGE_SHIFT; 315 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 316 unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; 317 unsigned long off = vma->vm_pgoff; 318 319 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 320 is_device_dma_coherent(dev)); 321 322 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 323 return ret; 324 325 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 326 ret = remap_pfn_range(vma, vma->vm_start, 327 pfn + off, 328 vma->vm_end - vma->vm_start, 329 vma->vm_page_prot); 330 } 331 332 return ret; 333 } 334 335 static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, 336 void *cpu_addr, dma_addr_t handle, size_t size, 337 unsigned long attrs) 338 { 339 int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 340 341 if (!ret) 342 sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), 343 PAGE_ALIGN(size), 0); 344 345 return ret; 346 } 347 348 static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) 349 { 350 if (swiotlb) 351 return swiotlb_dma_supported(hwdev, mask); 352 return 1; 353 } 354 355 static struct dma_map_ops swiotlb_dma_ops = { 356 .alloc = __dma_alloc, 357 .free = __dma_free, 358 .mmap = __swiotlb_mmap, 359 .get_sgtable = __swiotlb_get_sgtable, 360 .map_page = __swiotlb_map_page, 361 .unmap_page = __swiotlb_unmap_page, 362 .map_sg = __swiotlb_map_sg_attrs, 363 .unmap_sg = __swiotlb_unmap_sg_attrs, 364 .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, 365 .sync_single_for_device = __swiotlb_sync_single_for_device, 366 .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, 367 .sync_sg_for_device = __swiotlb_sync_sg_for_device, 368 .dma_supported = __swiotlb_dma_supported, 369 .mapping_error = swiotlb_dma_mapping_error, 370 }; 371 372 static int __init atomic_pool_init(void) 373 { 374 pgprot_t prot = __pgprot(PROT_NORMAL_NC); 375 unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; 376 struct page *page; 377 void *addr; 378 unsigned int pool_size_order = get_order(atomic_pool_size); 379 380 if (dev_get_cma_area(NULL)) 381 page = dma_alloc_from_contiguous(NULL, nr_pages, 382 pool_size_order); 383 else 384 page = alloc_pages(GFP_DMA, pool_size_order); 385 386 if (page) { 387 int ret; 388 void *page_addr = page_address(page); 389 390 memset(page_addr, 0, atomic_pool_size); 391 __dma_flush_area(page_addr, atomic_pool_size); 392 393 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 394 if (!atomic_pool) 395 goto free_page; 396 397 addr = dma_common_contiguous_remap(page, atomic_pool_size, 398 VM_USERMAP, prot, atomic_pool_init); 399 400 if (!addr) 401 goto destroy_genpool; 402 403 ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, 404 page_to_phys(page), 405 atomic_pool_size, -1); 406 if (ret) 407 goto remove_mapping; 408 409 gen_pool_set_algo(atomic_pool, 410 gen_pool_first_fit_order_align, 411 (void *)PAGE_SHIFT); 412 413 pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", 414 atomic_pool_size / 1024); 415 return 0; 416 } 417 goto out; 418 419 remove_mapping: 420 dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); 421 destroy_genpool: 422 gen_pool_destroy(atomic_pool); 423 atomic_pool = NULL; 424 free_page: 425 if (!dma_release_from_contiguous(NULL, page, nr_pages)) 426 __free_pages(page, pool_size_order); 427 out: 428 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 429 atomic_pool_size / 1024); 430 return -ENOMEM; 431 } 432 433 /******************************************** 434 * The following APIs are for dummy DMA ops * 435 ********************************************/ 436 437 static void *__dummy_alloc(struct device *dev, size_t size, 438 dma_addr_t *dma_handle, gfp_t flags, 439 unsigned long attrs) 440 { 441 return NULL; 442 } 443 444 static void __dummy_free(struct device *dev, size_t size, 445 void *vaddr, dma_addr_t dma_handle, 446 unsigned long attrs) 447 { 448 } 449 450 static int __dummy_mmap(struct device *dev, 451 struct vm_area_struct *vma, 452 void *cpu_addr, dma_addr_t dma_addr, size_t size, 453 unsigned long attrs) 454 { 455 return -ENXIO; 456 } 457 458 static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, 459 unsigned long offset, size_t size, 460 enum dma_data_direction dir, 461 unsigned long attrs) 462 { 463 return DMA_ERROR_CODE; 464 } 465 466 static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, 467 size_t size, enum dma_data_direction dir, 468 unsigned long attrs) 469 { 470 } 471 472 static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, 473 int nelems, enum dma_data_direction dir, 474 unsigned long attrs) 475 { 476 return 0; 477 } 478 479 static void __dummy_unmap_sg(struct device *dev, 480 struct scatterlist *sgl, int nelems, 481 enum dma_data_direction dir, 482 unsigned long attrs) 483 { 484 } 485 486 static void __dummy_sync_single(struct device *dev, 487 dma_addr_t dev_addr, size_t size, 488 enum dma_data_direction dir) 489 { 490 } 491 492 static void __dummy_sync_sg(struct device *dev, 493 struct scatterlist *sgl, int nelems, 494 enum dma_data_direction dir) 495 { 496 } 497 498 static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) 499 { 500 return 1; 501 } 502 503 static int __dummy_dma_supported(struct device *hwdev, u64 mask) 504 { 505 return 0; 506 } 507 508 struct dma_map_ops dummy_dma_ops = { 509 .alloc = __dummy_alloc, 510 .free = __dummy_free, 511 .mmap = __dummy_mmap, 512 .map_page = __dummy_map_page, 513 .unmap_page = __dummy_unmap_page, 514 .map_sg = __dummy_map_sg, 515 .unmap_sg = __dummy_unmap_sg, 516 .sync_single_for_cpu = __dummy_sync_single, 517 .sync_single_for_device = __dummy_sync_single, 518 .sync_sg_for_cpu = __dummy_sync_sg, 519 .sync_sg_for_device = __dummy_sync_sg, 520 .mapping_error = __dummy_mapping_error, 521 .dma_supported = __dummy_dma_supported, 522 }; 523 EXPORT_SYMBOL(dummy_dma_ops); 524 525 static int __init arm64_dma_init(void) 526 { 527 if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) 528 swiotlb = 1; 529 530 return atomic_pool_init(); 531 } 532 arch_initcall(arm64_dma_init); 533 534 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 535 536 static int __init dma_debug_do_init(void) 537 { 538 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 539 return 0; 540 } 541 fs_initcall(dma_debug_do_init); 542 543 544 #ifdef CONFIG_IOMMU_DMA 545 #include <linux/dma-iommu.h> 546 #include <linux/platform_device.h> 547 #include <linux/amba/bus.h> 548 549 /* Thankfully, all cache ops are by VA so we can ignore phys here */ 550 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) 551 { 552 __dma_flush_area(virt, PAGE_SIZE); 553 } 554 555 static void *__iommu_alloc_attrs(struct device *dev, size_t size, 556 dma_addr_t *handle, gfp_t gfp, 557 unsigned long attrs) 558 { 559 bool coherent = is_device_dma_coherent(dev); 560 int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); 561 size_t iosize = size; 562 void *addr; 563 564 if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) 565 return NULL; 566 567 size = PAGE_ALIGN(size); 568 569 /* 570 * Some drivers rely on this, and we probably don't want the 571 * possibility of stale kernel data being read by devices anyway. 572 */ 573 gfp |= __GFP_ZERO; 574 575 if (gfpflags_allow_blocking(gfp)) { 576 struct page **pages; 577 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 578 579 pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, 580 handle, flush_page); 581 if (!pages) 582 return NULL; 583 584 addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 585 __builtin_return_address(0)); 586 if (!addr) 587 iommu_dma_free(dev, pages, iosize, handle); 588 } else { 589 struct page *page; 590 /* 591 * In atomic context we can't remap anything, so we'll only 592 * get the virtually contiguous buffer we need by way of a 593 * physically contiguous allocation. 594 */ 595 if (coherent) { 596 page = alloc_pages(gfp, get_order(size)); 597 addr = page ? page_address(page) : NULL; 598 } else { 599 addr = __alloc_from_pool(size, &page, gfp); 600 } 601 if (!addr) 602 return NULL; 603 604 *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); 605 if (iommu_dma_mapping_error(dev, *handle)) { 606 if (coherent) 607 __free_pages(page, get_order(size)); 608 else 609 __free_from_pool(addr, size); 610 addr = NULL; 611 } 612 } 613 return addr; 614 } 615 616 static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 617 dma_addr_t handle, unsigned long attrs) 618 { 619 size_t iosize = size; 620 621 size = PAGE_ALIGN(size); 622 /* 623 * @cpu_addr will be one of 3 things depending on how it was allocated: 624 * - A remapped array of pages from iommu_dma_alloc(), for all 625 * non-atomic allocations. 626 * - A non-cacheable alias from the atomic pool, for atomic 627 * allocations by non-coherent devices. 628 * - A normal lowmem address, for atomic allocations by 629 * coherent devices. 630 * Hence how dodgy the below logic looks... 631 */ 632 if (__in_atomic_pool(cpu_addr, size)) { 633 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 634 __free_from_pool(cpu_addr, size); 635 } else if (is_vmalloc_addr(cpu_addr)){ 636 struct vm_struct *area = find_vm_area(cpu_addr); 637 638 if (WARN_ON(!area || !area->pages)) 639 return; 640 iommu_dma_free(dev, area->pages, iosize, &handle); 641 dma_common_free_remap(cpu_addr, size, VM_USERMAP); 642 } else { 643 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 644 __free_pages(virt_to_page(cpu_addr), get_order(size)); 645 } 646 } 647 648 static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 649 void *cpu_addr, dma_addr_t dma_addr, size_t size, 650 unsigned long attrs) 651 { 652 struct vm_struct *area; 653 int ret; 654 655 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 656 is_device_dma_coherent(dev)); 657 658 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 659 return ret; 660 661 area = find_vm_area(cpu_addr); 662 if (WARN_ON(!area || !area->pages)) 663 return -ENXIO; 664 665 return iommu_dma_mmap(area->pages, size, vma); 666 } 667 668 static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 669 void *cpu_addr, dma_addr_t dma_addr, 670 size_t size, unsigned long attrs) 671 { 672 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 673 struct vm_struct *area = find_vm_area(cpu_addr); 674 675 if (WARN_ON(!area || !area->pages)) 676 return -ENXIO; 677 678 return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, 679 GFP_KERNEL); 680 } 681 682 static void __iommu_sync_single_for_cpu(struct device *dev, 683 dma_addr_t dev_addr, size_t size, 684 enum dma_data_direction dir) 685 { 686 phys_addr_t phys; 687 688 if (is_device_dma_coherent(dev)) 689 return; 690 691 phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 692 __dma_unmap_area(phys_to_virt(phys), size, dir); 693 } 694 695 static void __iommu_sync_single_for_device(struct device *dev, 696 dma_addr_t dev_addr, size_t size, 697 enum dma_data_direction dir) 698 { 699 phys_addr_t phys; 700 701 if (is_device_dma_coherent(dev)) 702 return; 703 704 phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 705 __dma_map_area(phys_to_virt(phys), size, dir); 706 } 707 708 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, 709 unsigned long offset, size_t size, 710 enum dma_data_direction dir, 711 unsigned long attrs) 712 { 713 bool coherent = is_device_dma_coherent(dev); 714 int prot = dma_direction_to_prot(dir, coherent); 715 dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); 716 717 if (!iommu_dma_mapping_error(dev, dev_addr) && 718 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 719 __iommu_sync_single_for_device(dev, dev_addr, size, dir); 720 721 return dev_addr; 722 } 723 724 static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, 725 size_t size, enum dma_data_direction dir, 726 unsigned long attrs) 727 { 728 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 729 __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); 730 731 iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); 732 } 733 734 static void __iommu_sync_sg_for_cpu(struct device *dev, 735 struct scatterlist *sgl, int nelems, 736 enum dma_data_direction dir) 737 { 738 struct scatterlist *sg; 739 int i; 740 741 if (is_device_dma_coherent(dev)) 742 return; 743 744 for_each_sg(sgl, sg, nelems, i) 745 __dma_unmap_area(sg_virt(sg), sg->length, dir); 746 } 747 748 static void __iommu_sync_sg_for_device(struct device *dev, 749 struct scatterlist *sgl, int nelems, 750 enum dma_data_direction dir) 751 { 752 struct scatterlist *sg; 753 int i; 754 755 if (is_device_dma_coherent(dev)) 756 return; 757 758 for_each_sg(sgl, sg, nelems, i) 759 __dma_map_area(sg_virt(sg), sg->length, dir); 760 } 761 762 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 763 int nelems, enum dma_data_direction dir, 764 unsigned long attrs) 765 { 766 bool coherent = is_device_dma_coherent(dev); 767 768 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 769 __iommu_sync_sg_for_device(dev, sgl, nelems, dir); 770 771 return iommu_dma_map_sg(dev, sgl, nelems, 772 dma_direction_to_prot(dir, coherent)); 773 } 774 775 static void __iommu_unmap_sg_attrs(struct device *dev, 776 struct scatterlist *sgl, int nelems, 777 enum dma_data_direction dir, 778 unsigned long attrs) 779 { 780 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 781 __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); 782 783 iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); 784 } 785 786 static struct dma_map_ops iommu_dma_ops = { 787 .alloc = __iommu_alloc_attrs, 788 .free = __iommu_free_attrs, 789 .mmap = __iommu_mmap_attrs, 790 .get_sgtable = __iommu_get_sgtable, 791 .map_page = __iommu_map_page, 792 .unmap_page = __iommu_unmap_page, 793 .map_sg = __iommu_map_sg_attrs, 794 .unmap_sg = __iommu_unmap_sg_attrs, 795 .sync_single_for_cpu = __iommu_sync_single_for_cpu, 796 .sync_single_for_device = __iommu_sync_single_for_device, 797 .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, 798 .sync_sg_for_device = __iommu_sync_sg_for_device, 799 .dma_supported = iommu_dma_supported, 800 .mapping_error = iommu_dma_mapping_error, 801 }; 802 803 /* 804 * TODO: Right now __iommu_setup_dma_ops() gets called too early to do 805 * everything it needs to - the device is only partially created and the 806 * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we 807 * need this delayed attachment dance. Once IOMMU probe ordering is sorted 808 * to move the arch_setup_dma_ops() call later, all the notifier bits below 809 * become unnecessary, and will go away. 810 */ 811 struct iommu_dma_notifier_data { 812 struct list_head list; 813 struct device *dev; 814 const struct iommu_ops *ops; 815 u64 dma_base; 816 u64 size; 817 }; 818 static LIST_HEAD(iommu_dma_masters); 819 static DEFINE_MUTEX(iommu_dma_notifier_lock); 820 821 static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, 822 u64 dma_base, u64 size) 823 { 824 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 825 826 /* 827 * If the IOMMU driver has the DMA domain support that we require, 828 * then the IOMMU core will have already configured a group for this 829 * device, and allocated the default domain for that group. 830 */ 831 if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) { 832 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 833 dev_name(dev)); 834 return false; 835 } 836 837 dev->archdata.dma_ops = &iommu_dma_ops; 838 return true; 839 } 840 841 static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, 842 u64 dma_base, u64 size) 843 { 844 struct iommu_dma_notifier_data *iommudata; 845 846 iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); 847 if (!iommudata) 848 return; 849 850 iommudata->dev = dev; 851 iommudata->ops = ops; 852 iommudata->dma_base = dma_base; 853 iommudata->size = size; 854 855 mutex_lock(&iommu_dma_notifier_lock); 856 list_add(&iommudata->list, &iommu_dma_masters); 857 mutex_unlock(&iommu_dma_notifier_lock); 858 } 859 860 static int __iommu_attach_notifier(struct notifier_block *nb, 861 unsigned long action, void *data) 862 { 863 struct iommu_dma_notifier_data *master, *tmp; 864 865 if (action != BUS_NOTIFY_BIND_DRIVER) 866 return 0; 867 868 mutex_lock(&iommu_dma_notifier_lock); 869 list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { 870 if (data == master->dev && do_iommu_attach(master->dev, 871 master->ops, master->dma_base, master->size)) { 872 list_del(&master->list); 873 kfree(master); 874 break; 875 } 876 } 877 mutex_unlock(&iommu_dma_notifier_lock); 878 return 0; 879 } 880 881 static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) 882 { 883 struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); 884 int ret; 885 886 if (!nb) 887 return -ENOMEM; 888 889 nb->notifier_call = __iommu_attach_notifier; 890 891 ret = bus_register_notifier(bus, nb); 892 if (ret) { 893 pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", 894 bus->name); 895 kfree(nb); 896 } 897 return ret; 898 } 899 900 static int __init __iommu_dma_init(void) 901 { 902 int ret; 903 904 ret = iommu_dma_init(); 905 if (!ret) 906 ret = register_iommu_dma_ops_notifier(&platform_bus_type); 907 if (!ret) 908 ret = register_iommu_dma_ops_notifier(&amba_bustype); 909 #ifdef CONFIG_PCI 910 if (!ret) 911 ret = register_iommu_dma_ops_notifier(&pci_bus_type); 912 #endif 913 return ret; 914 } 915 arch_initcall(__iommu_dma_init); 916 917 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 918 const struct iommu_ops *ops) 919 { 920 struct iommu_group *group; 921 922 if (!ops) 923 return; 924 /* 925 * TODO: As a concession to the future, we're ready to handle being 926 * called both early and late (i.e. after bus_add_device). Once all 927 * the platform bus code is reworked to call us late and the notifier 928 * junk above goes away, move the body of do_iommu_attach here. 929 */ 930 group = iommu_group_get(dev); 931 if (group) { 932 do_iommu_attach(dev, ops, dma_base, size); 933 iommu_group_put(group); 934 } else { 935 queue_iommu_attach(dev, ops, dma_base, size); 936 } 937 } 938 939 void arch_teardown_dma_ops(struct device *dev) 940 { 941 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 942 943 if (WARN_ON(domain)) 944 iommu_detach_device(domain, dev); 945 946 dev->archdata.dma_ops = NULL; 947 } 948 949 #else 950 951 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 952 const struct iommu_ops *iommu) 953 { } 954 955 #endif /* CONFIG_IOMMU_DMA */ 956 957 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 958 const struct iommu_ops *iommu, bool coherent) 959 { 960 if (!dev->archdata.dma_ops) 961 dev->archdata.dma_ops = &swiotlb_dma_ops; 962 963 dev->archdata.dma_coherent = coherent; 964 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 965 } 966