1 /* 2 * SWIOTLB-based DMA API implementation 3 * 4 * Copyright (C) 2012 ARM Ltd. 5 * Author: Catalin Marinas <catalin.marinas@arm.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include <linux/gfp.h> 21 #include <linux/acpi.h> 22 #include <linux/bootmem.h> 23 #include <linux/cache.h> 24 #include <linux/export.h> 25 #include <linux/slab.h> 26 #include <linux/genalloc.h> 27 #include <linux/dma-mapping.h> 28 #include <linux/dma-contiguous.h> 29 #include <linux/vmalloc.h> 30 #include <linux/swiotlb.h> 31 32 #include <asm/cacheflush.h> 33 34 static int swiotlb __ro_after_init; 35 36 static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, 37 bool coherent) 38 { 39 if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) 40 return pgprot_writecombine(prot); 41 return prot; 42 } 43 44 static struct gen_pool *atomic_pool; 45 46 #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K 47 static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; 48 49 static int __init early_coherent_pool(char *p) 50 { 51 atomic_pool_size = memparse(p, &p); 52 return 0; 53 } 54 early_param("coherent_pool", early_coherent_pool); 55 56 static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) 57 { 58 unsigned long val; 59 void *ptr = NULL; 60 61 if (!atomic_pool) { 62 WARN(1, "coherent pool not initialised!\n"); 63 return NULL; 64 } 65 66 val = gen_pool_alloc(atomic_pool, size); 67 if (val) { 68 phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); 69 70 *ret_page = phys_to_page(phys); 71 ptr = (void *)val; 72 memset(ptr, 0, size); 73 } 74 75 return ptr; 76 } 77 78 static bool __in_atomic_pool(void *start, size_t size) 79 { 80 return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); 81 } 82 83 static int __free_from_pool(void *start, size_t size) 84 { 85 if (!__in_atomic_pool(start, size)) 86 return 0; 87 88 gen_pool_free(atomic_pool, (unsigned long)start, size); 89 90 return 1; 91 } 92 93 static void *__dma_alloc_coherent(struct device *dev, size_t size, 94 dma_addr_t *dma_handle, gfp_t flags, 95 unsigned long attrs) 96 { 97 if (dev == NULL) { 98 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 99 return NULL; 100 } 101 102 if (IS_ENABLED(CONFIG_ZONE_DMA) && 103 dev->coherent_dma_mask <= DMA_BIT_MASK(32)) 104 flags |= GFP_DMA; 105 if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) { 106 struct page *page; 107 void *addr; 108 109 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, 110 get_order(size), flags); 111 if (!page) 112 return NULL; 113 114 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 115 addr = page_address(page); 116 memset(addr, 0, size); 117 return addr; 118 } else { 119 return swiotlb_alloc_coherent(dev, size, dma_handle, flags); 120 } 121 } 122 123 static void __dma_free_coherent(struct device *dev, size_t size, 124 void *vaddr, dma_addr_t dma_handle, 125 unsigned long attrs) 126 { 127 bool freed; 128 phys_addr_t paddr = dma_to_phys(dev, dma_handle); 129 130 if (dev == NULL) { 131 WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); 132 return; 133 } 134 135 freed = dma_release_from_contiguous(dev, 136 phys_to_page(paddr), 137 size >> PAGE_SHIFT); 138 if (!freed) 139 swiotlb_free_coherent(dev, size, vaddr, dma_handle); 140 } 141 142 static void *__dma_alloc(struct device *dev, size_t size, 143 dma_addr_t *dma_handle, gfp_t flags, 144 unsigned long attrs) 145 { 146 struct page *page; 147 void *ptr, *coherent_ptr; 148 bool coherent = is_device_dma_coherent(dev); 149 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); 150 151 size = PAGE_ALIGN(size); 152 153 if (!coherent && !gfpflags_allow_blocking(flags)) { 154 struct page *page = NULL; 155 void *addr = __alloc_from_pool(size, &page, flags); 156 157 if (addr) 158 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 159 160 return addr; 161 } 162 163 ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); 164 if (!ptr) 165 goto no_mem; 166 167 /* no need for non-cacheable mapping if coherent */ 168 if (coherent) 169 return ptr; 170 171 /* remove any dirty cache lines on the kernel alias */ 172 __dma_flush_area(ptr, size); 173 174 /* create a coherent mapping */ 175 page = virt_to_page(ptr); 176 coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, 177 prot, NULL); 178 if (!coherent_ptr) 179 goto no_map; 180 181 return coherent_ptr; 182 183 no_map: 184 __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); 185 no_mem: 186 *dma_handle = DMA_ERROR_CODE; 187 return NULL; 188 } 189 190 static void __dma_free(struct device *dev, size_t size, 191 void *vaddr, dma_addr_t dma_handle, 192 unsigned long attrs) 193 { 194 void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); 195 196 size = PAGE_ALIGN(size); 197 198 if (!is_device_dma_coherent(dev)) { 199 if (__free_from_pool(vaddr, size)) 200 return; 201 vunmap(vaddr); 202 } 203 __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); 204 } 205 206 static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, 207 unsigned long offset, size_t size, 208 enum dma_data_direction dir, 209 unsigned long attrs) 210 { 211 dma_addr_t dev_addr; 212 213 dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); 214 if (!is_device_dma_coherent(dev) && 215 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 216 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 217 218 return dev_addr; 219 } 220 221 222 static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, 223 size_t size, enum dma_data_direction dir, 224 unsigned long attrs) 225 { 226 if (!is_device_dma_coherent(dev) && 227 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 228 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 229 swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); 230 } 231 232 static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 233 int nelems, enum dma_data_direction dir, 234 unsigned long attrs) 235 { 236 struct scatterlist *sg; 237 int i, ret; 238 239 ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); 240 if (!is_device_dma_coherent(dev) && 241 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 242 for_each_sg(sgl, sg, ret, i) 243 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 244 sg->length, dir); 245 246 return ret; 247 } 248 249 static void __swiotlb_unmap_sg_attrs(struct device *dev, 250 struct scatterlist *sgl, int nelems, 251 enum dma_data_direction dir, 252 unsigned long attrs) 253 { 254 struct scatterlist *sg; 255 int i; 256 257 if (!is_device_dma_coherent(dev) && 258 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 259 for_each_sg(sgl, sg, nelems, i) 260 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 261 sg->length, dir); 262 swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); 263 } 264 265 static void __swiotlb_sync_single_for_cpu(struct device *dev, 266 dma_addr_t dev_addr, size_t size, 267 enum dma_data_direction dir) 268 { 269 if (!is_device_dma_coherent(dev)) 270 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 271 swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); 272 } 273 274 static void __swiotlb_sync_single_for_device(struct device *dev, 275 dma_addr_t dev_addr, size_t size, 276 enum dma_data_direction dir) 277 { 278 swiotlb_sync_single_for_device(dev, dev_addr, size, dir); 279 if (!is_device_dma_coherent(dev)) 280 __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); 281 } 282 283 static void __swiotlb_sync_sg_for_cpu(struct device *dev, 284 struct scatterlist *sgl, int nelems, 285 enum dma_data_direction dir) 286 { 287 struct scatterlist *sg; 288 int i; 289 290 if (!is_device_dma_coherent(dev)) 291 for_each_sg(sgl, sg, nelems, i) 292 __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 293 sg->length, dir); 294 swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); 295 } 296 297 static void __swiotlb_sync_sg_for_device(struct device *dev, 298 struct scatterlist *sgl, int nelems, 299 enum dma_data_direction dir) 300 { 301 struct scatterlist *sg; 302 int i; 303 304 swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); 305 if (!is_device_dma_coherent(dev)) 306 for_each_sg(sgl, sg, nelems, i) 307 __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), 308 sg->length, dir); 309 } 310 311 static int __swiotlb_mmap(struct device *dev, 312 struct vm_area_struct *vma, 313 void *cpu_addr, dma_addr_t dma_addr, size_t size, 314 unsigned long attrs) 315 { 316 int ret = -ENXIO; 317 unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> 318 PAGE_SHIFT; 319 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 320 unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; 321 unsigned long off = vma->vm_pgoff; 322 323 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 324 is_device_dma_coherent(dev)); 325 326 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 327 return ret; 328 329 if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { 330 ret = remap_pfn_range(vma, vma->vm_start, 331 pfn + off, 332 vma->vm_end - vma->vm_start, 333 vma->vm_page_prot); 334 } 335 336 return ret; 337 } 338 339 static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, 340 void *cpu_addr, dma_addr_t handle, size_t size, 341 unsigned long attrs) 342 { 343 int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 344 345 if (!ret) 346 sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), 347 PAGE_ALIGN(size), 0); 348 349 return ret; 350 } 351 352 static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) 353 { 354 if (swiotlb) 355 return swiotlb_dma_supported(hwdev, mask); 356 return 1; 357 } 358 359 static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) 360 { 361 if (swiotlb) 362 return swiotlb_dma_mapping_error(hwdev, addr); 363 return 0; 364 } 365 366 static const struct dma_map_ops swiotlb_dma_ops = { 367 .alloc = __dma_alloc, 368 .free = __dma_free, 369 .mmap = __swiotlb_mmap, 370 .get_sgtable = __swiotlb_get_sgtable, 371 .map_page = __swiotlb_map_page, 372 .unmap_page = __swiotlb_unmap_page, 373 .map_sg = __swiotlb_map_sg_attrs, 374 .unmap_sg = __swiotlb_unmap_sg_attrs, 375 .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, 376 .sync_single_for_device = __swiotlb_sync_single_for_device, 377 .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, 378 .sync_sg_for_device = __swiotlb_sync_sg_for_device, 379 .dma_supported = __swiotlb_dma_supported, 380 .mapping_error = __swiotlb_dma_mapping_error, 381 }; 382 383 static int __init atomic_pool_init(void) 384 { 385 pgprot_t prot = __pgprot(PROT_NORMAL_NC); 386 unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; 387 struct page *page; 388 void *addr; 389 unsigned int pool_size_order = get_order(atomic_pool_size); 390 391 if (dev_get_cma_area(NULL)) 392 page = dma_alloc_from_contiguous(NULL, nr_pages, 393 pool_size_order, GFP_KERNEL); 394 else 395 page = alloc_pages(GFP_DMA, pool_size_order); 396 397 if (page) { 398 int ret; 399 void *page_addr = page_address(page); 400 401 memset(page_addr, 0, atomic_pool_size); 402 __dma_flush_area(page_addr, atomic_pool_size); 403 404 atomic_pool = gen_pool_create(PAGE_SHIFT, -1); 405 if (!atomic_pool) 406 goto free_page; 407 408 addr = dma_common_contiguous_remap(page, atomic_pool_size, 409 VM_USERMAP, prot, atomic_pool_init); 410 411 if (!addr) 412 goto destroy_genpool; 413 414 ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, 415 page_to_phys(page), 416 atomic_pool_size, -1); 417 if (ret) 418 goto remove_mapping; 419 420 gen_pool_set_algo(atomic_pool, 421 gen_pool_first_fit_order_align, 422 (void *)PAGE_SHIFT); 423 424 pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", 425 atomic_pool_size / 1024); 426 return 0; 427 } 428 goto out; 429 430 remove_mapping: 431 dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); 432 destroy_genpool: 433 gen_pool_destroy(atomic_pool); 434 atomic_pool = NULL; 435 free_page: 436 if (!dma_release_from_contiguous(NULL, page, nr_pages)) 437 __free_pages(page, pool_size_order); 438 out: 439 pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", 440 atomic_pool_size / 1024); 441 return -ENOMEM; 442 } 443 444 /******************************************** 445 * The following APIs are for dummy DMA ops * 446 ********************************************/ 447 448 static void *__dummy_alloc(struct device *dev, size_t size, 449 dma_addr_t *dma_handle, gfp_t flags, 450 unsigned long attrs) 451 { 452 return NULL; 453 } 454 455 static void __dummy_free(struct device *dev, size_t size, 456 void *vaddr, dma_addr_t dma_handle, 457 unsigned long attrs) 458 { 459 } 460 461 static int __dummy_mmap(struct device *dev, 462 struct vm_area_struct *vma, 463 void *cpu_addr, dma_addr_t dma_addr, size_t size, 464 unsigned long attrs) 465 { 466 return -ENXIO; 467 } 468 469 static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, 470 unsigned long offset, size_t size, 471 enum dma_data_direction dir, 472 unsigned long attrs) 473 { 474 return DMA_ERROR_CODE; 475 } 476 477 static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, 478 size_t size, enum dma_data_direction dir, 479 unsigned long attrs) 480 { 481 } 482 483 static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, 484 int nelems, enum dma_data_direction dir, 485 unsigned long attrs) 486 { 487 return 0; 488 } 489 490 static void __dummy_unmap_sg(struct device *dev, 491 struct scatterlist *sgl, int nelems, 492 enum dma_data_direction dir, 493 unsigned long attrs) 494 { 495 } 496 497 static void __dummy_sync_single(struct device *dev, 498 dma_addr_t dev_addr, size_t size, 499 enum dma_data_direction dir) 500 { 501 } 502 503 static void __dummy_sync_sg(struct device *dev, 504 struct scatterlist *sgl, int nelems, 505 enum dma_data_direction dir) 506 { 507 } 508 509 static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) 510 { 511 return 1; 512 } 513 514 static int __dummy_dma_supported(struct device *hwdev, u64 mask) 515 { 516 return 0; 517 } 518 519 const struct dma_map_ops dummy_dma_ops = { 520 .alloc = __dummy_alloc, 521 .free = __dummy_free, 522 .mmap = __dummy_mmap, 523 .map_page = __dummy_map_page, 524 .unmap_page = __dummy_unmap_page, 525 .map_sg = __dummy_map_sg, 526 .unmap_sg = __dummy_unmap_sg, 527 .sync_single_for_cpu = __dummy_sync_single, 528 .sync_single_for_device = __dummy_sync_single, 529 .sync_sg_for_cpu = __dummy_sync_sg, 530 .sync_sg_for_device = __dummy_sync_sg, 531 .mapping_error = __dummy_mapping_error, 532 .dma_supported = __dummy_dma_supported, 533 }; 534 EXPORT_SYMBOL(dummy_dma_ops); 535 536 static int __init arm64_dma_init(void) 537 { 538 if (swiotlb_force == SWIOTLB_FORCE || 539 max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) 540 swiotlb = 1; 541 542 return atomic_pool_init(); 543 } 544 arch_initcall(arm64_dma_init); 545 546 #define PREALLOC_DMA_DEBUG_ENTRIES 4096 547 548 static int __init dma_debug_do_init(void) 549 { 550 dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); 551 return 0; 552 } 553 fs_initcall(dma_debug_do_init); 554 555 556 #ifdef CONFIG_IOMMU_DMA 557 #include <linux/dma-iommu.h> 558 #include <linux/platform_device.h> 559 #include <linux/amba/bus.h> 560 561 /* Thankfully, all cache ops are by VA so we can ignore phys here */ 562 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) 563 { 564 __dma_flush_area(virt, PAGE_SIZE); 565 } 566 567 static void *__iommu_alloc_attrs(struct device *dev, size_t size, 568 dma_addr_t *handle, gfp_t gfp, 569 unsigned long attrs) 570 { 571 bool coherent = is_device_dma_coherent(dev); 572 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 573 size_t iosize = size; 574 void *addr; 575 576 if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) 577 return NULL; 578 579 size = PAGE_ALIGN(size); 580 581 /* 582 * Some drivers rely on this, and we probably don't want the 583 * possibility of stale kernel data being read by devices anyway. 584 */ 585 gfp |= __GFP_ZERO; 586 587 if (gfpflags_allow_blocking(gfp)) { 588 struct page **pages; 589 pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); 590 591 pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, 592 handle, flush_page); 593 if (!pages) 594 return NULL; 595 596 addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, 597 __builtin_return_address(0)); 598 if (!addr) 599 iommu_dma_free(dev, pages, iosize, handle); 600 } else { 601 struct page *page; 602 /* 603 * In atomic context we can't remap anything, so we'll only 604 * get the virtually contiguous buffer we need by way of a 605 * physically contiguous allocation. 606 */ 607 if (coherent) { 608 page = alloc_pages(gfp, get_order(size)); 609 addr = page ? page_address(page) : NULL; 610 } else { 611 addr = __alloc_from_pool(size, &page, gfp); 612 } 613 if (!addr) 614 return NULL; 615 616 *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); 617 if (iommu_dma_mapping_error(dev, *handle)) { 618 if (coherent) 619 __free_pages(page, get_order(size)); 620 else 621 __free_from_pool(addr, size); 622 addr = NULL; 623 } 624 } 625 return addr; 626 } 627 628 static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, 629 dma_addr_t handle, unsigned long attrs) 630 { 631 size_t iosize = size; 632 633 size = PAGE_ALIGN(size); 634 /* 635 * @cpu_addr will be one of 3 things depending on how it was allocated: 636 * - A remapped array of pages from iommu_dma_alloc(), for all 637 * non-atomic allocations. 638 * - A non-cacheable alias from the atomic pool, for atomic 639 * allocations by non-coherent devices. 640 * - A normal lowmem address, for atomic allocations by 641 * coherent devices. 642 * Hence how dodgy the below logic looks... 643 */ 644 if (__in_atomic_pool(cpu_addr, size)) { 645 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 646 __free_from_pool(cpu_addr, size); 647 } else if (is_vmalloc_addr(cpu_addr)){ 648 struct vm_struct *area = find_vm_area(cpu_addr); 649 650 if (WARN_ON(!area || !area->pages)) 651 return; 652 iommu_dma_free(dev, area->pages, iosize, &handle); 653 dma_common_free_remap(cpu_addr, size, VM_USERMAP); 654 } else { 655 iommu_dma_unmap_page(dev, handle, iosize, 0, 0); 656 __free_pages(virt_to_page(cpu_addr), get_order(size)); 657 } 658 } 659 660 static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, 661 void *cpu_addr, dma_addr_t dma_addr, size_t size, 662 unsigned long attrs) 663 { 664 struct vm_struct *area; 665 int ret; 666 667 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, 668 is_device_dma_coherent(dev)); 669 670 if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) 671 return ret; 672 673 area = find_vm_area(cpu_addr); 674 if (WARN_ON(!area || !area->pages)) 675 return -ENXIO; 676 677 return iommu_dma_mmap(area->pages, size, vma); 678 } 679 680 static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, 681 void *cpu_addr, dma_addr_t dma_addr, 682 size_t size, unsigned long attrs) 683 { 684 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 685 struct vm_struct *area = find_vm_area(cpu_addr); 686 687 if (WARN_ON(!area || !area->pages)) 688 return -ENXIO; 689 690 return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, 691 GFP_KERNEL); 692 } 693 694 static void __iommu_sync_single_for_cpu(struct device *dev, 695 dma_addr_t dev_addr, size_t size, 696 enum dma_data_direction dir) 697 { 698 phys_addr_t phys; 699 700 if (is_device_dma_coherent(dev)) 701 return; 702 703 phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 704 __dma_unmap_area(phys_to_virt(phys), size, dir); 705 } 706 707 static void __iommu_sync_single_for_device(struct device *dev, 708 dma_addr_t dev_addr, size_t size, 709 enum dma_data_direction dir) 710 { 711 phys_addr_t phys; 712 713 if (is_device_dma_coherent(dev)) 714 return; 715 716 phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); 717 __dma_map_area(phys_to_virt(phys), size, dir); 718 } 719 720 static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, 721 unsigned long offset, size_t size, 722 enum dma_data_direction dir, 723 unsigned long attrs) 724 { 725 bool coherent = is_device_dma_coherent(dev); 726 int prot = dma_info_to_prot(dir, coherent, attrs); 727 dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); 728 729 if (!iommu_dma_mapping_error(dev, dev_addr) && 730 (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 731 __iommu_sync_single_for_device(dev, dev_addr, size, dir); 732 733 return dev_addr; 734 } 735 736 static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, 737 size_t size, enum dma_data_direction dir, 738 unsigned long attrs) 739 { 740 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 741 __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); 742 743 iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); 744 } 745 746 static void __iommu_sync_sg_for_cpu(struct device *dev, 747 struct scatterlist *sgl, int nelems, 748 enum dma_data_direction dir) 749 { 750 struct scatterlist *sg; 751 int i; 752 753 if (is_device_dma_coherent(dev)) 754 return; 755 756 for_each_sg(sgl, sg, nelems, i) 757 __dma_unmap_area(sg_virt(sg), sg->length, dir); 758 } 759 760 static void __iommu_sync_sg_for_device(struct device *dev, 761 struct scatterlist *sgl, int nelems, 762 enum dma_data_direction dir) 763 { 764 struct scatterlist *sg; 765 int i; 766 767 if (is_device_dma_coherent(dev)) 768 return; 769 770 for_each_sg(sgl, sg, nelems, i) 771 __dma_map_area(sg_virt(sg), sg->length, dir); 772 } 773 774 static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, 775 int nelems, enum dma_data_direction dir, 776 unsigned long attrs) 777 { 778 bool coherent = is_device_dma_coherent(dev); 779 780 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 781 __iommu_sync_sg_for_device(dev, sgl, nelems, dir); 782 783 return iommu_dma_map_sg(dev, sgl, nelems, 784 dma_info_to_prot(dir, coherent, attrs)); 785 } 786 787 static void __iommu_unmap_sg_attrs(struct device *dev, 788 struct scatterlist *sgl, int nelems, 789 enum dma_data_direction dir, 790 unsigned long attrs) 791 { 792 if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) 793 __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); 794 795 iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); 796 } 797 798 static const struct dma_map_ops iommu_dma_ops = { 799 .alloc = __iommu_alloc_attrs, 800 .free = __iommu_free_attrs, 801 .mmap = __iommu_mmap_attrs, 802 .get_sgtable = __iommu_get_sgtable, 803 .map_page = __iommu_map_page, 804 .unmap_page = __iommu_unmap_page, 805 .map_sg = __iommu_map_sg_attrs, 806 .unmap_sg = __iommu_unmap_sg_attrs, 807 .sync_single_for_cpu = __iommu_sync_single_for_cpu, 808 .sync_single_for_device = __iommu_sync_single_for_device, 809 .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, 810 .sync_sg_for_device = __iommu_sync_sg_for_device, 811 .map_resource = iommu_dma_map_resource, 812 .unmap_resource = iommu_dma_unmap_resource, 813 .mapping_error = iommu_dma_mapping_error, 814 }; 815 816 /* 817 * TODO: Right now __iommu_setup_dma_ops() gets called too early to do 818 * everything it needs to - the device is only partially created and the 819 * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we 820 * need this delayed attachment dance. Once IOMMU probe ordering is sorted 821 * to move the arch_setup_dma_ops() call later, all the notifier bits below 822 * become unnecessary, and will go away. 823 */ 824 struct iommu_dma_notifier_data { 825 struct list_head list; 826 struct device *dev; 827 const struct iommu_ops *ops; 828 u64 dma_base; 829 u64 size; 830 }; 831 static LIST_HEAD(iommu_dma_masters); 832 static DEFINE_MUTEX(iommu_dma_notifier_lock); 833 834 static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, 835 u64 dma_base, u64 size) 836 { 837 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 838 839 /* 840 * If the IOMMU driver has the DMA domain support that we require, 841 * then the IOMMU core will have already configured a group for this 842 * device, and allocated the default domain for that group. 843 */ 844 if (!domain) 845 goto out_err; 846 847 if (domain->type == IOMMU_DOMAIN_DMA) { 848 if (iommu_dma_init_domain(domain, dma_base, size, dev)) 849 goto out_err; 850 851 dev->dma_ops = &iommu_dma_ops; 852 } 853 854 return true; 855 out_err: 856 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 857 dev_name(dev)); 858 return false; 859 } 860 861 static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, 862 u64 dma_base, u64 size) 863 { 864 struct iommu_dma_notifier_data *iommudata; 865 866 iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); 867 if (!iommudata) 868 return; 869 870 iommudata->dev = dev; 871 iommudata->ops = ops; 872 iommudata->dma_base = dma_base; 873 iommudata->size = size; 874 875 mutex_lock(&iommu_dma_notifier_lock); 876 list_add(&iommudata->list, &iommu_dma_masters); 877 mutex_unlock(&iommu_dma_notifier_lock); 878 } 879 880 static int __iommu_attach_notifier(struct notifier_block *nb, 881 unsigned long action, void *data) 882 { 883 struct iommu_dma_notifier_data *master, *tmp; 884 885 if (action != BUS_NOTIFY_BIND_DRIVER) 886 return 0; 887 888 mutex_lock(&iommu_dma_notifier_lock); 889 list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { 890 if (data == master->dev && do_iommu_attach(master->dev, 891 master->ops, master->dma_base, master->size)) { 892 list_del(&master->list); 893 kfree(master); 894 break; 895 } 896 } 897 mutex_unlock(&iommu_dma_notifier_lock); 898 return 0; 899 } 900 901 static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) 902 { 903 struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); 904 int ret; 905 906 if (!nb) 907 return -ENOMEM; 908 909 nb->notifier_call = __iommu_attach_notifier; 910 911 ret = bus_register_notifier(bus, nb); 912 if (ret) { 913 pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", 914 bus->name); 915 kfree(nb); 916 } 917 return ret; 918 } 919 920 static int __init __iommu_dma_init(void) 921 { 922 int ret; 923 924 ret = iommu_dma_init(); 925 if (!ret) 926 ret = register_iommu_dma_ops_notifier(&platform_bus_type); 927 if (!ret) 928 ret = register_iommu_dma_ops_notifier(&amba_bustype); 929 #ifdef CONFIG_PCI 930 if (!ret) 931 ret = register_iommu_dma_ops_notifier(&pci_bus_type); 932 #endif 933 return ret; 934 } 935 arch_initcall(__iommu_dma_init); 936 937 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 938 const struct iommu_ops *ops) 939 { 940 struct iommu_group *group; 941 942 if (!ops) 943 return; 944 /* 945 * TODO: As a concession to the future, we're ready to handle being 946 * called both early and late (i.e. after bus_add_device). Once all 947 * the platform bus code is reworked to call us late and the notifier 948 * junk above goes away, move the body of do_iommu_attach here. 949 */ 950 group = iommu_group_get(dev); 951 if (group) { 952 do_iommu_attach(dev, ops, dma_base, size); 953 iommu_group_put(group); 954 } else { 955 queue_iommu_attach(dev, ops, dma_base, size); 956 } 957 } 958 959 void arch_teardown_dma_ops(struct device *dev) 960 { 961 dev->dma_ops = NULL; 962 } 963 964 #else 965 966 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 967 const struct iommu_ops *iommu) 968 { } 969 970 #endif /* CONFIG_IOMMU_DMA */ 971 972 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, 973 const struct iommu_ops *iommu, bool coherent) 974 { 975 if (!dev->dma_ops) 976 dev->dma_ops = &swiotlb_dma_ops; 977 978 dev->archdata.dma_coherent = coherent; 979 __iommu_setup_dma_ops(dev, dma_base, size, iommu); 980 } 981