1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * A fairly generic DMA-API to IOMMU-API glue layer. 4 * 5 * Copyright (C) 2014-2015 ARM Ltd. 6 * 7 * based in part on arch/arm/mm/dma-mapping.c: 8 * Copyright (C) 2000-2004 Russell King 9 */ 10 11 #include <linux/acpi_iort.h> 12 #include <linux/atomic.h> 13 #include <linux/crash_dump.h> 14 #include <linux/device.h> 15 #include <linux/dma-direct.h> 16 #include <linux/dma-map-ops.h> 17 #include <linux/gfp.h> 18 #include <linux/huge_mm.h> 19 #include <linux/iommu.h> 20 #include <linux/iova.h> 21 #include <linux/irq.h> 22 #include <linux/list_sort.h> 23 #include <linux/memremap.h> 24 #include <linux/mm.h> 25 #include <linux/mutex.h> 26 #include <linux/of_iommu.h> 27 #include <linux/pci.h> 28 #include <linux/scatterlist.h> 29 #include <linux/spinlock.h> 30 #include <linux/swiotlb.h> 31 #include <linux/vmalloc.h> 32 33 #include "dma-iommu.h" 34 35 struct iommu_dma_msi_page { 36 struct list_head list; 37 dma_addr_t iova; 38 phys_addr_t phys; 39 }; 40 41 enum iommu_dma_cookie_type { 42 IOMMU_DMA_IOVA_COOKIE, 43 IOMMU_DMA_MSI_COOKIE, 44 }; 45 46 struct iommu_dma_cookie { 47 enum iommu_dma_cookie_type type; 48 union { 49 /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ 50 struct { 51 struct iova_domain iovad; 52 53 struct iova_fq __percpu *fq; /* Flush queue */ 54 /* Number of TLB flushes that have been started */ 55 atomic64_t fq_flush_start_cnt; 56 /* Number of TLB flushes that have been finished */ 57 atomic64_t fq_flush_finish_cnt; 58 /* Timer to regularily empty the flush queues */ 59 struct timer_list fq_timer; 60 /* 1 when timer is active, 0 when not */ 61 atomic_t fq_timer_on; 62 }; 63 /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ 64 dma_addr_t msi_iova; 65 }; 66 struct list_head msi_page_list; 67 68 /* Domain for flush queue callback; NULL if flush queue not in use */ 69 struct iommu_domain *fq_domain; 70 struct mutex mutex; 71 }; 72 73 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); 74 bool iommu_dma_forcedac __read_mostly; 75 76 static int __init iommu_dma_forcedac_setup(char *str) 77 { 78 int ret = kstrtobool(str, &iommu_dma_forcedac); 79 80 if (!ret && iommu_dma_forcedac) 81 pr_info("Forcing DAC for PCI devices\n"); 82 return ret; 83 } 84 early_param("iommu.forcedac", iommu_dma_forcedac_setup); 85 86 /* Number of entries per flush queue */ 87 #define IOVA_FQ_SIZE 256 88 89 /* Timeout (in ms) after which entries are flushed from the queue */ 90 #define IOVA_FQ_TIMEOUT 10 91 92 /* Flush queue entry for deferred flushing */ 93 struct iova_fq_entry { 94 unsigned long iova_pfn; 95 unsigned long pages; 96 struct list_head freelist; 97 u64 counter; /* Flush counter when this entry was added */ 98 }; 99 100 /* Per-CPU flush queue structure */ 101 struct iova_fq { 102 struct iova_fq_entry entries[IOVA_FQ_SIZE]; 103 unsigned int head, tail; 104 spinlock_t lock; 105 }; 106 107 #define fq_ring_for_each(i, fq) \ 108 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 109 110 static inline bool fq_full(struct iova_fq *fq) 111 { 112 assert_spin_locked(&fq->lock); 113 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 114 } 115 116 static inline unsigned int fq_ring_add(struct iova_fq *fq) 117 { 118 unsigned int idx = fq->tail; 119 120 assert_spin_locked(&fq->lock); 121 122 fq->tail = (idx + 1) % IOVA_FQ_SIZE; 123 124 return idx; 125 } 126 127 static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) 128 { 129 u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); 130 unsigned int idx; 131 132 assert_spin_locked(&fq->lock); 133 134 fq_ring_for_each(idx, fq) { 135 136 if (fq->entries[idx].counter >= counter) 137 break; 138 139 put_pages_list(&fq->entries[idx].freelist); 140 free_iova_fast(&cookie->iovad, 141 fq->entries[idx].iova_pfn, 142 fq->entries[idx].pages); 143 144 fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 145 } 146 } 147 148 static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) 149 { 150 atomic64_inc(&cookie->fq_flush_start_cnt); 151 cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); 152 atomic64_inc(&cookie->fq_flush_finish_cnt); 153 } 154 155 static void fq_flush_timeout(struct timer_list *t) 156 { 157 struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); 158 int cpu; 159 160 atomic_set(&cookie->fq_timer_on, 0); 161 fq_flush_iotlb(cookie); 162 163 for_each_possible_cpu(cpu) { 164 unsigned long flags; 165 struct iova_fq *fq; 166 167 fq = per_cpu_ptr(cookie->fq, cpu); 168 spin_lock_irqsave(&fq->lock, flags); 169 fq_ring_free(cookie, fq); 170 spin_unlock_irqrestore(&fq->lock, flags); 171 } 172 } 173 174 static void queue_iova(struct iommu_dma_cookie *cookie, 175 unsigned long pfn, unsigned long pages, 176 struct list_head *freelist) 177 { 178 struct iova_fq *fq; 179 unsigned long flags; 180 unsigned int idx; 181 182 /* 183 * Order against the IOMMU driver's pagetable update from unmapping 184 * @pte, to guarantee that fq_flush_iotlb() observes that if called 185 * from a different CPU before we release the lock below. Full barrier 186 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially 187 * written fq state here. 188 */ 189 smp_mb(); 190 191 fq = raw_cpu_ptr(cookie->fq); 192 spin_lock_irqsave(&fq->lock, flags); 193 194 /* 195 * First remove all entries from the flush queue that have already been 196 * flushed out on another CPU. This makes the fq_full() check below less 197 * likely to be true. 198 */ 199 fq_ring_free(cookie, fq); 200 201 if (fq_full(fq)) { 202 fq_flush_iotlb(cookie); 203 fq_ring_free(cookie, fq); 204 } 205 206 idx = fq_ring_add(fq); 207 208 fq->entries[idx].iova_pfn = pfn; 209 fq->entries[idx].pages = pages; 210 fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); 211 list_splice(freelist, &fq->entries[idx].freelist); 212 213 spin_unlock_irqrestore(&fq->lock, flags); 214 215 /* Avoid false sharing as much as possible. */ 216 if (!atomic_read(&cookie->fq_timer_on) && 217 !atomic_xchg(&cookie->fq_timer_on, 1)) 218 mod_timer(&cookie->fq_timer, 219 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 220 } 221 222 static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) 223 { 224 int cpu, idx; 225 226 if (!cookie->fq) 227 return; 228 229 del_timer_sync(&cookie->fq_timer); 230 /* The IOVAs will be torn down separately, so just free our queued pages */ 231 for_each_possible_cpu(cpu) { 232 struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); 233 234 fq_ring_for_each(idx, fq) 235 put_pages_list(&fq->entries[idx].freelist); 236 } 237 238 free_percpu(cookie->fq); 239 } 240 241 /* sysfs updates are serialised by the mutex of the group owning @domain */ 242 int iommu_dma_init_fq(struct iommu_domain *domain) 243 { 244 struct iommu_dma_cookie *cookie = domain->iova_cookie; 245 struct iova_fq __percpu *queue; 246 int i, cpu; 247 248 if (cookie->fq_domain) 249 return 0; 250 251 atomic64_set(&cookie->fq_flush_start_cnt, 0); 252 atomic64_set(&cookie->fq_flush_finish_cnt, 0); 253 254 queue = alloc_percpu(struct iova_fq); 255 if (!queue) { 256 pr_warn("iova flush queue initialization failed\n"); 257 return -ENOMEM; 258 } 259 260 for_each_possible_cpu(cpu) { 261 struct iova_fq *fq = per_cpu_ptr(queue, cpu); 262 263 fq->head = 0; 264 fq->tail = 0; 265 266 spin_lock_init(&fq->lock); 267 268 for (i = 0; i < IOVA_FQ_SIZE; i++) 269 INIT_LIST_HEAD(&fq->entries[i].freelist); 270 } 271 272 cookie->fq = queue; 273 274 timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); 275 atomic_set(&cookie->fq_timer_on, 0); 276 /* 277 * Prevent incomplete fq state being observable. Pairs with path from 278 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() 279 */ 280 smp_wmb(); 281 WRITE_ONCE(cookie->fq_domain, domain); 282 return 0; 283 } 284 285 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) 286 { 287 if (cookie->type == IOMMU_DMA_IOVA_COOKIE) 288 return cookie->iovad.granule; 289 return PAGE_SIZE; 290 } 291 292 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) 293 { 294 struct iommu_dma_cookie *cookie; 295 296 cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); 297 if (cookie) { 298 INIT_LIST_HEAD(&cookie->msi_page_list); 299 cookie->type = type; 300 } 301 return cookie; 302 } 303 304 /** 305 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain 306 * @domain: IOMMU domain to prepare for DMA-API usage 307 */ 308 int iommu_get_dma_cookie(struct iommu_domain *domain) 309 { 310 if (domain->iova_cookie) 311 return -EEXIST; 312 313 domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); 314 if (!domain->iova_cookie) 315 return -ENOMEM; 316 317 mutex_init(&domain->iova_cookie->mutex); 318 return 0; 319 } 320 321 /** 322 * iommu_get_msi_cookie - Acquire just MSI remapping resources 323 * @domain: IOMMU domain to prepare 324 * @base: Start address of IOVA region for MSI mappings 325 * 326 * Users who manage their own IOVA allocation and do not want DMA API support, 327 * but would still like to take advantage of automatic MSI remapping, can use 328 * this to initialise their own domain appropriately. Users should reserve a 329 * contiguous IOVA region, starting at @base, large enough to accommodate the 330 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address 331 * used by the devices attached to @domain. 332 */ 333 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) 334 { 335 struct iommu_dma_cookie *cookie; 336 337 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 338 return -EINVAL; 339 340 if (domain->iova_cookie) 341 return -EEXIST; 342 343 cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); 344 if (!cookie) 345 return -ENOMEM; 346 347 cookie->msi_iova = base; 348 domain->iova_cookie = cookie; 349 return 0; 350 } 351 EXPORT_SYMBOL(iommu_get_msi_cookie); 352 353 /** 354 * iommu_put_dma_cookie - Release a domain's DMA mapping resources 355 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or 356 * iommu_get_msi_cookie() 357 */ 358 void iommu_put_dma_cookie(struct iommu_domain *domain) 359 { 360 struct iommu_dma_cookie *cookie = domain->iova_cookie; 361 struct iommu_dma_msi_page *msi, *tmp; 362 363 if (!cookie) 364 return; 365 366 if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { 367 iommu_dma_free_fq(cookie); 368 put_iova_domain(&cookie->iovad); 369 } 370 371 list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { 372 list_del(&msi->list); 373 kfree(msi); 374 } 375 kfree(cookie); 376 domain->iova_cookie = NULL; 377 } 378 379 /** 380 * iommu_dma_get_resv_regions - Reserved region driver helper 381 * @dev: Device from iommu_get_resv_regions() 382 * @list: Reserved region list from iommu_get_resv_regions() 383 * 384 * IOMMU drivers can use this to implement their .get_resv_regions callback 385 * for general non-IOMMU-specific reservations. Currently, this covers GICv3 386 * ITS region reservation on ACPI based ARM platforms that may require HW MSI 387 * reservation. 388 */ 389 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) 390 { 391 392 if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) 393 iort_iommu_get_resv_regions(dev, list); 394 395 if (dev->of_node) 396 of_iommu_get_resv_regions(dev, list); 397 } 398 EXPORT_SYMBOL(iommu_dma_get_resv_regions); 399 400 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, 401 phys_addr_t start, phys_addr_t end) 402 { 403 struct iova_domain *iovad = &cookie->iovad; 404 struct iommu_dma_msi_page *msi_page; 405 int i, num_pages; 406 407 start -= iova_offset(iovad, start); 408 num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); 409 410 for (i = 0; i < num_pages; i++) { 411 msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); 412 if (!msi_page) 413 return -ENOMEM; 414 415 msi_page->phys = start; 416 msi_page->iova = start; 417 INIT_LIST_HEAD(&msi_page->list); 418 list_add(&msi_page->list, &cookie->msi_page_list); 419 start += iovad->granule; 420 } 421 422 return 0; 423 } 424 425 static int iommu_dma_ranges_sort(void *priv, const struct list_head *a, 426 const struct list_head *b) 427 { 428 struct resource_entry *res_a = list_entry(a, typeof(*res_a), node); 429 struct resource_entry *res_b = list_entry(b, typeof(*res_b), node); 430 431 return res_a->res->start > res_b->res->start; 432 } 433 434 static int iova_reserve_pci_windows(struct pci_dev *dev, 435 struct iova_domain *iovad) 436 { 437 struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); 438 struct resource_entry *window; 439 unsigned long lo, hi; 440 phys_addr_t start = 0, end; 441 442 resource_list_for_each_entry(window, &bridge->windows) { 443 if (resource_type(window->res) != IORESOURCE_MEM) 444 continue; 445 446 lo = iova_pfn(iovad, window->res->start - window->offset); 447 hi = iova_pfn(iovad, window->res->end - window->offset); 448 reserve_iova(iovad, lo, hi); 449 } 450 451 /* Get reserved DMA windows from host bridge */ 452 list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort); 453 resource_list_for_each_entry(window, &bridge->dma_ranges) { 454 end = window->res->start - window->offset; 455 resv_iova: 456 if (end > start) { 457 lo = iova_pfn(iovad, start); 458 hi = iova_pfn(iovad, end); 459 reserve_iova(iovad, lo, hi); 460 } else if (end < start) { 461 /* DMA ranges should be non-overlapping */ 462 dev_err(&dev->dev, 463 "Failed to reserve IOVA [%pa-%pa]\n", 464 &start, &end); 465 return -EINVAL; 466 } 467 468 start = window->res->end - window->offset + 1; 469 /* If window is last entry */ 470 if (window->node.next == &bridge->dma_ranges && 471 end != ~(phys_addr_t)0) { 472 end = ~(phys_addr_t)0; 473 goto resv_iova; 474 } 475 } 476 477 return 0; 478 } 479 480 static int iova_reserve_iommu_regions(struct device *dev, 481 struct iommu_domain *domain) 482 { 483 struct iommu_dma_cookie *cookie = domain->iova_cookie; 484 struct iova_domain *iovad = &cookie->iovad; 485 struct iommu_resv_region *region; 486 LIST_HEAD(resv_regions); 487 int ret = 0; 488 489 if (dev_is_pci(dev)) { 490 ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad); 491 if (ret) 492 return ret; 493 } 494 495 iommu_get_resv_regions(dev, &resv_regions); 496 list_for_each_entry(region, &resv_regions, list) { 497 unsigned long lo, hi; 498 499 /* We ARE the software that manages these! */ 500 if (region->type == IOMMU_RESV_SW_MSI) 501 continue; 502 503 lo = iova_pfn(iovad, region->start); 504 hi = iova_pfn(iovad, region->start + region->length - 1); 505 reserve_iova(iovad, lo, hi); 506 507 if (region->type == IOMMU_RESV_MSI) 508 ret = cookie_init_hw_msi_region(cookie, region->start, 509 region->start + region->length); 510 if (ret) 511 break; 512 } 513 iommu_put_resv_regions(dev, &resv_regions); 514 515 return ret; 516 } 517 518 static bool dev_is_untrusted(struct device *dev) 519 { 520 return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; 521 } 522 523 static bool dev_use_swiotlb(struct device *dev, size_t size, 524 enum dma_data_direction dir) 525 { 526 return IS_ENABLED(CONFIG_SWIOTLB) && 527 (dev_is_untrusted(dev) || 528 dma_kmalloc_needs_bounce(dev, size, dir)); 529 } 530 531 static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg, 532 int nents, enum dma_data_direction dir) 533 { 534 struct scatterlist *s; 535 int i; 536 537 if (!IS_ENABLED(CONFIG_SWIOTLB)) 538 return false; 539 540 if (dev_is_untrusted(dev)) 541 return true; 542 543 /* 544 * If kmalloc() buffers are not DMA-safe for this device and 545 * direction, check the individual lengths in the sg list. If any 546 * element is deemed unsafe, use the swiotlb for bouncing. 547 */ 548 if (!dma_kmalloc_safe(dev, dir)) { 549 for_each_sg(sg, s, nents, i) 550 if (!dma_kmalloc_size_aligned(s->length)) 551 return true; 552 } 553 554 return false; 555 } 556 557 /** 558 * iommu_dma_init_domain - Initialise a DMA mapping domain 559 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 560 * @base: IOVA at which the mappable address space starts 561 * @limit: Last address of the IOVA space 562 * @dev: Device the domain is being initialised for 563 * 564 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to 565 * avoid rounding surprises. If necessary, we reserve the page at address 0 566 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but 567 * any change which could make prior IOVAs invalid will fail. 568 */ 569 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, 570 dma_addr_t limit, struct device *dev) 571 { 572 struct iommu_dma_cookie *cookie = domain->iova_cookie; 573 unsigned long order, base_pfn; 574 struct iova_domain *iovad; 575 int ret; 576 577 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) 578 return -EINVAL; 579 580 iovad = &cookie->iovad; 581 582 /* Use the smallest supported page size for IOVA granularity */ 583 order = __ffs(domain->pgsize_bitmap); 584 base_pfn = max_t(unsigned long, 1, base >> order); 585 586 /* Check the domain allows at least some access to the device... */ 587 if (domain->geometry.force_aperture) { 588 if (base > domain->geometry.aperture_end || 589 limit < domain->geometry.aperture_start) { 590 pr_warn("specified DMA range outside IOMMU capability\n"); 591 return -EFAULT; 592 } 593 /* ...then finally give it a kicking to make sure it fits */ 594 base_pfn = max_t(unsigned long, base_pfn, 595 domain->geometry.aperture_start >> order); 596 } 597 598 /* start_pfn is always nonzero for an already-initialised domain */ 599 mutex_lock(&cookie->mutex); 600 if (iovad->start_pfn) { 601 if (1UL << order != iovad->granule || 602 base_pfn != iovad->start_pfn) { 603 pr_warn("Incompatible range for DMA domain\n"); 604 ret = -EFAULT; 605 goto done_unlock; 606 } 607 608 ret = 0; 609 goto done_unlock; 610 } 611 612 init_iova_domain(iovad, 1UL << order, base_pfn); 613 ret = iova_domain_init_rcaches(iovad); 614 if (ret) 615 goto done_unlock; 616 617 /* If the FQ fails we can simply fall back to strict mode */ 618 if (domain->type == IOMMU_DOMAIN_DMA_FQ && 619 (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) 620 domain->type = IOMMU_DOMAIN_DMA; 621 622 ret = iova_reserve_iommu_regions(dev, domain); 623 624 done_unlock: 625 mutex_unlock(&cookie->mutex); 626 return ret; 627 } 628 629 /** 630 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API 631 * page flags. 632 * @dir: Direction of DMA transfer 633 * @coherent: Is the DMA master cache-coherent? 634 * @attrs: DMA attributes for the mapping 635 * 636 * Return: corresponding IOMMU API page protection flags 637 */ 638 static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, 639 unsigned long attrs) 640 { 641 int prot = coherent ? IOMMU_CACHE : 0; 642 643 if (attrs & DMA_ATTR_PRIVILEGED) 644 prot |= IOMMU_PRIV; 645 646 switch (dir) { 647 case DMA_BIDIRECTIONAL: 648 return prot | IOMMU_READ | IOMMU_WRITE; 649 case DMA_TO_DEVICE: 650 return prot | IOMMU_READ; 651 case DMA_FROM_DEVICE: 652 return prot | IOMMU_WRITE; 653 default: 654 return 0; 655 } 656 } 657 658 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, 659 size_t size, u64 dma_limit, struct device *dev) 660 { 661 struct iommu_dma_cookie *cookie = domain->iova_cookie; 662 struct iova_domain *iovad = &cookie->iovad; 663 unsigned long shift, iova_len, iova; 664 665 if (cookie->type == IOMMU_DMA_MSI_COOKIE) { 666 cookie->msi_iova += size; 667 return cookie->msi_iova - size; 668 } 669 670 shift = iova_shift(iovad); 671 iova_len = size >> shift; 672 673 dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); 674 675 if (domain->geometry.force_aperture) 676 dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); 677 678 /* 679 * Try to use all the 32-bit PCI addresses first. The original SAC vs. 680 * DAC reasoning loses relevance with PCIe, but enough hardware and 681 * firmware bugs are still lurking out there that it's safest not to 682 * venture into the 64-bit space until necessary. 683 * 684 * If your device goes wrong after seeing the notice then likely either 685 * its driver is not setting DMA masks accurately, the hardware has 686 * some inherent bug in handling >32-bit addresses, or not all the 687 * expected address bits are wired up between the device and the IOMMU. 688 */ 689 if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { 690 iova = alloc_iova_fast(iovad, iova_len, 691 DMA_BIT_MASK(32) >> shift, false); 692 if (iova) 693 goto done; 694 695 dev->iommu->pci_32bit_workaround = false; 696 dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); 697 } 698 699 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); 700 done: 701 return (dma_addr_t)iova << shift; 702 } 703 704 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, 705 dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) 706 { 707 struct iova_domain *iovad = &cookie->iovad; 708 709 /* The MSI case is only ever cleaning up its most recent allocation */ 710 if (cookie->type == IOMMU_DMA_MSI_COOKIE) 711 cookie->msi_iova -= size; 712 else if (gather && gather->queued) 713 queue_iova(cookie, iova_pfn(iovad, iova), 714 size >> iova_shift(iovad), 715 &gather->freelist); 716 else 717 free_iova_fast(iovad, iova_pfn(iovad, iova), 718 size >> iova_shift(iovad)); 719 } 720 721 static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, 722 size_t size) 723 { 724 struct iommu_domain *domain = iommu_get_dma_domain(dev); 725 struct iommu_dma_cookie *cookie = domain->iova_cookie; 726 struct iova_domain *iovad = &cookie->iovad; 727 size_t iova_off = iova_offset(iovad, dma_addr); 728 struct iommu_iotlb_gather iotlb_gather; 729 size_t unmapped; 730 731 dma_addr -= iova_off; 732 size = iova_align(iovad, size + iova_off); 733 iommu_iotlb_gather_init(&iotlb_gather); 734 iotlb_gather.queued = READ_ONCE(cookie->fq_domain); 735 736 unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); 737 WARN_ON(unmapped != size); 738 739 if (!iotlb_gather.queued) 740 iommu_iotlb_sync(domain, &iotlb_gather); 741 iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); 742 } 743 744 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, 745 size_t size, int prot, u64 dma_mask) 746 { 747 struct iommu_domain *domain = iommu_get_dma_domain(dev); 748 struct iommu_dma_cookie *cookie = domain->iova_cookie; 749 struct iova_domain *iovad = &cookie->iovad; 750 size_t iova_off = iova_offset(iovad, phys); 751 dma_addr_t iova; 752 753 if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 754 iommu_deferred_attach(dev, domain)) 755 return DMA_MAPPING_ERROR; 756 757 size = iova_align(iovad, size + iova_off); 758 759 iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); 760 if (!iova) 761 return DMA_MAPPING_ERROR; 762 763 if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { 764 iommu_dma_free_iova(cookie, iova, size, NULL); 765 return DMA_MAPPING_ERROR; 766 } 767 return iova + iova_off; 768 } 769 770 static void __iommu_dma_free_pages(struct page **pages, int count) 771 { 772 while (count--) 773 __free_page(pages[count]); 774 kvfree(pages); 775 } 776 777 static struct page **__iommu_dma_alloc_pages(struct device *dev, 778 unsigned int count, unsigned long order_mask, gfp_t gfp) 779 { 780 struct page **pages; 781 unsigned int i = 0, nid = dev_to_node(dev); 782 783 order_mask &= GENMASK(MAX_ORDER, 0); 784 if (!order_mask) 785 return NULL; 786 787 pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); 788 if (!pages) 789 return NULL; 790 791 /* IOMMU can map any pages, so himem can also be used here */ 792 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 793 794 while (count) { 795 struct page *page = NULL; 796 unsigned int order_size; 797 798 /* 799 * Higher-order allocations are a convenience rather 800 * than a necessity, hence using __GFP_NORETRY until 801 * falling back to minimum-order allocations. 802 */ 803 for (order_mask &= GENMASK(__fls(count), 0); 804 order_mask; order_mask &= ~order_size) { 805 unsigned int order = __fls(order_mask); 806 gfp_t alloc_flags = gfp; 807 808 order_size = 1U << order; 809 if (order_mask > order_size) 810 alloc_flags |= __GFP_NORETRY; 811 page = alloc_pages_node(nid, alloc_flags, order); 812 if (!page) 813 continue; 814 if (order) 815 split_page(page, order); 816 break; 817 } 818 if (!page) { 819 __iommu_dma_free_pages(pages, i); 820 return NULL; 821 } 822 count -= order_size; 823 while (order_size--) 824 pages[i++] = page++; 825 } 826 return pages; 827 } 828 829 /* 830 * If size is less than PAGE_SIZE, then a full CPU page will be allocated, 831 * but an IOMMU which supports smaller pages might not map the whole thing. 832 */ 833 static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, 834 size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot, 835 unsigned long attrs) 836 { 837 struct iommu_domain *domain = iommu_get_dma_domain(dev); 838 struct iommu_dma_cookie *cookie = domain->iova_cookie; 839 struct iova_domain *iovad = &cookie->iovad; 840 bool coherent = dev_is_dma_coherent(dev); 841 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 842 unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; 843 struct page **pages; 844 dma_addr_t iova; 845 ssize_t ret; 846 847 if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 848 iommu_deferred_attach(dev, domain)) 849 return NULL; 850 851 min_size = alloc_sizes & -alloc_sizes; 852 if (min_size < PAGE_SIZE) { 853 min_size = PAGE_SIZE; 854 alloc_sizes |= PAGE_SIZE; 855 } else { 856 size = ALIGN(size, min_size); 857 } 858 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 859 alloc_sizes = min_size; 860 861 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 862 pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT, 863 gfp); 864 if (!pages) 865 return NULL; 866 867 size = iova_align(iovad, size); 868 iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev); 869 if (!iova) 870 goto out_free_pages; 871 872 /* 873 * Remove the zone/policy flags from the GFP - these are applied to the 874 * __iommu_dma_alloc_pages() but are not used for the supporting 875 * internal allocations that follow. 876 */ 877 gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP); 878 879 if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp)) 880 goto out_free_iova; 881 882 if (!(ioprot & IOMMU_CACHE)) { 883 struct scatterlist *sg; 884 int i; 885 886 for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) 887 arch_dma_prep_coherent(sg_page(sg), sg->length); 888 } 889 890 ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, 891 gfp); 892 if (ret < 0 || ret < size) 893 goto out_free_sg; 894 895 sgt->sgl->dma_address = iova; 896 sgt->sgl->dma_length = size; 897 return pages; 898 899 out_free_sg: 900 sg_free_table(sgt); 901 out_free_iova: 902 iommu_dma_free_iova(cookie, iova, size, NULL); 903 out_free_pages: 904 __iommu_dma_free_pages(pages, count); 905 return NULL; 906 } 907 908 static void *iommu_dma_alloc_remap(struct device *dev, size_t size, 909 dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, 910 unsigned long attrs) 911 { 912 struct page **pages; 913 struct sg_table sgt; 914 void *vaddr; 915 916 pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot, 917 attrs); 918 if (!pages) 919 return NULL; 920 *dma_handle = sgt.sgl->dma_address; 921 sg_free_table(&sgt); 922 vaddr = dma_common_pages_remap(pages, size, prot, 923 __builtin_return_address(0)); 924 if (!vaddr) 925 goto out_unmap; 926 return vaddr; 927 928 out_unmap: 929 __iommu_dma_unmap(dev, *dma_handle, size); 930 __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 931 return NULL; 932 } 933 934 static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, 935 size_t size, enum dma_data_direction dir, gfp_t gfp, 936 unsigned long attrs) 937 { 938 struct dma_sgt_handle *sh; 939 940 sh = kmalloc(sizeof(*sh), gfp); 941 if (!sh) 942 return NULL; 943 944 sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, 945 PAGE_KERNEL, attrs); 946 if (!sh->pages) { 947 kfree(sh); 948 return NULL; 949 } 950 return &sh->sgt; 951 } 952 953 static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 954 struct sg_table *sgt, enum dma_data_direction dir) 955 { 956 struct dma_sgt_handle *sh = sgt_handle(sgt); 957 958 __iommu_dma_unmap(dev, sgt->sgl->dma_address, size); 959 __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 960 sg_free_table(&sh->sgt); 961 kfree(sh); 962 } 963 964 static void iommu_dma_sync_single_for_cpu(struct device *dev, 965 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 966 { 967 phys_addr_t phys; 968 969 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) 970 return; 971 972 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 973 if (!dev_is_dma_coherent(dev)) 974 arch_sync_dma_for_cpu(phys, size, dir); 975 976 if (is_swiotlb_buffer(dev, phys)) 977 swiotlb_sync_single_for_cpu(dev, phys, size, dir); 978 } 979 980 static void iommu_dma_sync_single_for_device(struct device *dev, 981 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 982 { 983 phys_addr_t phys; 984 985 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) 986 return; 987 988 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 989 if (is_swiotlb_buffer(dev, phys)) 990 swiotlb_sync_single_for_device(dev, phys, size, dir); 991 992 if (!dev_is_dma_coherent(dev)) 993 arch_sync_dma_for_device(phys, size, dir); 994 } 995 996 static void iommu_dma_sync_sg_for_cpu(struct device *dev, 997 struct scatterlist *sgl, int nelems, 998 enum dma_data_direction dir) 999 { 1000 struct scatterlist *sg; 1001 int i; 1002 1003 if (sg_dma_is_swiotlb(sgl)) 1004 for_each_sg(sgl, sg, nelems, i) 1005 iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), 1006 sg->length, dir); 1007 else if (!dev_is_dma_coherent(dev)) 1008 for_each_sg(sgl, sg, nelems, i) 1009 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 1010 } 1011 1012 static void iommu_dma_sync_sg_for_device(struct device *dev, 1013 struct scatterlist *sgl, int nelems, 1014 enum dma_data_direction dir) 1015 { 1016 struct scatterlist *sg; 1017 int i; 1018 1019 if (sg_dma_is_swiotlb(sgl)) 1020 for_each_sg(sgl, sg, nelems, i) 1021 iommu_dma_sync_single_for_device(dev, 1022 sg_dma_address(sg), 1023 sg->length, dir); 1024 else if (!dev_is_dma_coherent(dev)) 1025 for_each_sg(sgl, sg, nelems, i) 1026 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); 1027 } 1028 1029 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 1030 unsigned long offset, size_t size, enum dma_data_direction dir, 1031 unsigned long attrs) 1032 { 1033 phys_addr_t phys = page_to_phys(page) + offset; 1034 bool coherent = dev_is_dma_coherent(dev); 1035 int prot = dma_info_to_prot(dir, coherent, attrs); 1036 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1037 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1038 struct iova_domain *iovad = &cookie->iovad; 1039 dma_addr_t iova, dma_mask = dma_get_mask(dev); 1040 1041 /* 1042 * If both the physical buffer start address and size are 1043 * page aligned, we don't need to use a bounce page. 1044 */ 1045 if (dev_use_swiotlb(dev, size, dir) && 1046 iova_offset(iovad, phys | size)) { 1047 void *padding_start; 1048 size_t padding_size, aligned_size; 1049 1050 if (!is_swiotlb_active(dev)) { 1051 dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); 1052 return DMA_MAPPING_ERROR; 1053 } 1054 1055 aligned_size = iova_align(iovad, size); 1056 phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, 1057 iova_mask(iovad), dir, attrs); 1058 1059 if (phys == DMA_MAPPING_ERROR) 1060 return DMA_MAPPING_ERROR; 1061 1062 /* Cleanup the padding area. */ 1063 padding_start = phys_to_virt(phys); 1064 padding_size = aligned_size; 1065 1066 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 1067 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { 1068 padding_start += size; 1069 padding_size -= size; 1070 } 1071 1072 memset(padding_start, 0, padding_size); 1073 } 1074 1075 if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1076 arch_sync_dma_for_device(phys, size, dir); 1077 1078 iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); 1079 if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) 1080 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 1081 return iova; 1082 } 1083 1084 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 1085 size_t size, enum dma_data_direction dir, unsigned long attrs) 1086 { 1087 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1088 phys_addr_t phys; 1089 1090 phys = iommu_iova_to_phys(domain, dma_handle); 1091 if (WARN_ON(!phys)) 1092 return; 1093 1094 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) 1095 arch_sync_dma_for_cpu(phys, size, dir); 1096 1097 __iommu_dma_unmap(dev, dma_handle, size); 1098 1099 if (unlikely(is_swiotlb_buffer(dev, phys))) 1100 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 1101 } 1102 1103 /* 1104 * Prepare a successfully-mapped scatterlist to give back to the caller. 1105 * 1106 * At this point the segments are already laid out by iommu_dma_map_sg() to 1107 * avoid individually crossing any boundaries, so we merely need to check a 1108 * segment's start address to avoid concatenating across one. 1109 */ 1110 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, 1111 dma_addr_t dma_addr) 1112 { 1113 struct scatterlist *s, *cur = sg; 1114 unsigned long seg_mask = dma_get_seg_boundary(dev); 1115 unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); 1116 int i, count = 0; 1117 1118 for_each_sg(sg, s, nents, i) { 1119 /* Restore this segment's original unaligned fields first */ 1120 dma_addr_t s_dma_addr = sg_dma_address(s); 1121 unsigned int s_iova_off = sg_dma_address(s); 1122 unsigned int s_length = sg_dma_len(s); 1123 unsigned int s_iova_len = s->length; 1124 1125 sg_dma_address(s) = DMA_MAPPING_ERROR; 1126 sg_dma_len(s) = 0; 1127 1128 if (sg_dma_is_bus_address(s)) { 1129 if (i > 0) 1130 cur = sg_next(cur); 1131 1132 sg_dma_unmark_bus_address(s); 1133 sg_dma_address(cur) = s_dma_addr; 1134 sg_dma_len(cur) = s_length; 1135 sg_dma_mark_bus_address(cur); 1136 count++; 1137 cur_len = 0; 1138 continue; 1139 } 1140 1141 s->offset += s_iova_off; 1142 s->length = s_length; 1143 1144 /* 1145 * Now fill in the real DMA data. If... 1146 * - there is a valid output segment to append to 1147 * - and this segment starts on an IOVA page boundary 1148 * - but doesn't fall at a segment boundary 1149 * - and wouldn't make the resulting output segment too long 1150 */ 1151 if (cur_len && !s_iova_off && (dma_addr & seg_mask) && 1152 (max_len - cur_len >= s_length)) { 1153 /* ...then concatenate it with the previous one */ 1154 cur_len += s_length; 1155 } else { 1156 /* Otherwise start the next output segment */ 1157 if (i > 0) 1158 cur = sg_next(cur); 1159 cur_len = s_length; 1160 count++; 1161 1162 sg_dma_address(cur) = dma_addr + s_iova_off; 1163 } 1164 1165 sg_dma_len(cur) = cur_len; 1166 dma_addr += s_iova_len; 1167 1168 if (s_length + s_iova_off < s_iova_len) 1169 cur_len = 0; 1170 } 1171 return count; 1172 } 1173 1174 /* 1175 * If mapping failed, then just restore the original list, 1176 * but making sure the DMA fields are invalidated. 1177 */ 1178 static void __invalidate_sg(struct scatterlist *sg, int nents) 1179 { 1180 struct scatterlist *s; 1181 int i; 1182 1183 for_each_sg(sg, s, nents, i) { 1184 if (sg_dma_is_bus_address(s)) { 1185 sg_dma_unmark_bus_address(s); 1186 } else { 1187 if (sg_dma_address(s) != DMA_MAPPING_ERROR) 1188 s->offset += sg_dma_address(s); 1189 if (sg_dma_len(s)) 1190 s->length = sg_dma_len(s); 1191 } 1192 sg_dma_address(s) = DMA_MAPPING_ERROR; 1193 sg_dma_len(s) = 0; 1194 } 1195 } 1196 1197 static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg, 1198 int nents, enum dma_data_direction dir, unsigned long attrs) 1199 { 1200 struct scatterlist *s; 1201 int i; 1202 1203 for_each_sg(sg, s, nents, i) 1204 iommu_dma_unmap_page(dev, sg_dma_address(s), 1205 sg_dma_len(s), dir, attrs); 1206 } 1207 1208 static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg, 1209 int nents, enum dma_data_direction dir, unsigned long attrs) 1210 { 1211 struct scatterlist *s; 1212 int i; 1213 1214 sg_dma_mark_swiotlb(sg); 1215 1216 for_each_sg(sg, s, nents, i) { 1217 sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s), 1218 s->offset, s->length, dir, attrs); 1219 if (sg_dma_address(s) == DMA_MAPPING_ERROR) 1220 goto out_unmap; 1221 sg_dma_len(s) = s->length; 1222 } 1223 1224 return nents; 1225 1226 out_unmap: 1227 iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 1228 return -EIO; 1229 } 1230 1231 /* 1232 * The DMA API client is passing in a scatterlist which could describe 1233 * any old buffer layout, but the IOMMU API requires everything to be 1234 * aligned to IOMMU pages. Hence the need for this complicated bit of 1235 * impedance-matching, to be able to hand off a suitably-aligned list, 1236 * but still preserve the original offsets and sizes for the caller. 1237 */ 1238 static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 1239 int nents, enum dma_data_direction dir, unsigned long attrs) 1240 { 1241 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1242 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1243 struct iova_domain *iovad = &cookie->iovad; 1244 struct scatterlist *s, *prev = NULL; 1245 int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); 1246 struct pci_p2pdma_map_state p2pdma_state = {}; 1247 enum pci_p2pdma_map_type map; 1248 dma_addr_t iova; 1249 size_t iova_len = 0; 1250 unsigned long mask = dma_get_seg_boundary(dev); 1251 ssize_t ret; 1252 int i; 1253 1254 if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { 1255 ret = iommu_deferred_attach(dev, domain); 1256 if (ret) 1257 goto out; 1258 } 1259 1260 if (dev_use_sg_swiotlb(dev, sg, nents, dir)) 1261 return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs); 1262 1263 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1264 iommu_dma_sync_sg_for_device(dev, sg, nents, dir); 1265 1266 /* 1267 * Work out how much IOVA space we need, and align the segments to 1268 * IOVA granules for the IOMMU driver to handle. With some clever 1269 * trickery we can modify the list in-place, but reversibly, by 1270 * stashing the unaligned parts in the as-yet-unused DMA fields. 1271 */ 1272 for_each_sg(sg, s, nents, i) { 1273 size_t s_iova_off = iova_offset(iovad, s->offset); 1274 size_t s_length = s->length; 1275 size_t pad_len = (mask - iova_len + 1) & mask; 1276 1277 if (is_pci_p2pdma_page(sg_page(s))) { 1278 map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); 1279 switch (map) { 1280 case PCI_P2PDMA_MAP_BUS_ADDR: 1281 /* 1282 * iommu_map_sg() will skip this segment as 1283 * it is marked as a bus address, 1284 * __finalise_sg() will copy the dma address 1285 * into the output segment. 1286 */ 1287 continue; 1288 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 1289 /* 1290 * Mapping through host bridge should be 1291 * mapped with regular IOVAs, thus we 1292 * do nothing here and continue below. 1293 */ 1294 break; 1295 default: 1296 ret = -EREMOTEIO; 1297 goto out_restore_sg; 1298 } 1299 } 1300 1301 sg_dma_address(s) = s_iova_off; 1302 sg_dma_len(s) = s_length; 1303 s->offset -= s_iova_off; 1304 s_length = iova_align(iovad, s_length + s_iova_off); 1305 s->length = s_length; 1306 1307 /* 1308 * Due to the alignment of our single IOVA allocation, we can 1309 * depend on these assumptions about the segment boundary mask: 1310 * - If mask size >= IOVA size, then the IOVA range cannot 1311 * possibly fall across a boundary, so we don't care. 1312 * - If mask size < IOVA size, then the IOVA range must start 1313 * exactly on a boundary, therefore we can lay things out 1314 * based purely on segment lengths without needing to know 1315 * the actual addresses beforehand. 1316 * - The mask must be a power of 2, so pad_len == 0 if 1317 * iova_len == 0, thus we cannot dereference prev the first 1318 * time through here (i.e. before it has a meaningful value). 1319 */ 1320 if (pad_len && pad_len < s_length - 1) { 1321 prev->length += pad_len; 1322 iova_len += pad_len; 1323 } 1324 1325 iova_len += s_length; 1326 prev = s; 1327 } 1328 1329 if (!iova_len) 1330 return __finalise_sg(dev, sg, nents, 0); 1331 1332 iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); 1333 if (!iova) { 1334 ret = -ENOMEM; 1335 goto out_restore_sg; 1336 } 1337 1338 /* 1339 * We'll leave any physical concatenation to the IOMMU driver's 1340 * implementation - it knows better than we do. 1341 */ 1342 ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); 1343 if (ret < 0 || ret < iova_len) 1344 goto out_free_iova; 1345 1346 return __finalise_sg(dev, sg, nents, iova); 1347 1348 out_free_iova: 1349 iommu_dma_free_iova(cookie, iova, iova_len, NULL); 1350 out_restore_sg: 1351 __invalidate_sg(sg, nents); 1352 out: 1353 if (ret != -ENOMEM && ret != -EREMOTEIO) 1354 return -EINVAL; 1355 return ret; 1356 } 1357 1358 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 1359 int nents, enum dma_data_direction dir, unsigned long attrs) 1360 { 1361 dma_addr_t end = 0, start; 1362 struct scatterlist *tmp; 1363 int i; 1364 1365 if (sg_dma_is_swiotlb(sg)) { 1366 iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs); 1367 return; 1368 } 1369 1370 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1371 iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); 1372 1373 /* 1374 * The scatterlist segments are mapped into a single 1375 * contiguous IOVA allocation, the start and end points 1376 * just have to be determined. 1377 */ 1378 for_each_sg(sg, tmp, nents, i) { 1379 if (sg_dma_is_bus_address(tmp)) { 1380 sg_dma_unmark_bus_address(tmp); 1381 continue; 1382 } 1383 1384 if (sg_dma_len(tmp) == 0) 1385 break; 1386 1387 start = sg_dma_address(tmp); 1388 break; 1389 } 1390 1391 nents -= i; 1392 for_each_sg(tmp, tmp, nents, i) { 1393 if (sg_dma_is_bus_address(tmp)) { 1394 sg_dma_unmark_bus_address(tmp); 1395 continue; 1396 } 1397 1398 if (sg_dma_len(tmp) == 0) 1399 break; 1400 1401 end = sg_dma_address(tmp) + sg_dma_len(tmp); 1402 } 1403 1404 if (end) 1405 __iommu_dma_unmap(dev, start, end - start); 1406 } 1407 1408 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 1409 size_t size, enum dma_data_direction dir, unsigned long attrs) 1410 { 1411 return __iommu_dma_map(dev, phys, size, 1412 dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, 1413 dma_get_mask(dev)); 1414 } 1415 1416 static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 1417 size_t size, enum dma_data_direction dir, unsigned long attrs) 1418 { 1419 __iommu_dma_unmap(dev, handle, size); 1420 } 1421 1422 static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) 1423 { 1424 size_t alloc_size = PAGE_ALIGN(size); 1425 int count = alloc_size >> PAGE_SHIFT; 1426 struct page *page = NULL, **pages = NULL; 1427 1428 /* Non-coherent atomic allocation? Easy */ 1429 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1430 dma_free_from_pool(dev, cpu_addr, alloc_size)) 1431 return; 1432 1433 if (is_vmalloc_addr(cpu_addr)) { 1434 /* 1435 * If it the address is remapped, then it's either non-coherent 1436 * or highmem CMA, or an iommu_dma_alloc_remap() construction. 1437 */ 1438 pages = dma_common_find_pages(cpu_addr); 1439 if (!pages) 1440 page = vmalloc_to_page(cpu_addr); 1441 dma_common_free_remap(cpu_addr, alloc_size); 1442 } else { 1443 /* Lowmem means a coherent atomic or CMA allocation */ 1444 page = virt_to_page(cpu_addr); 1445 } 1446 1447 if (pages) 1448 __iommu_dma_free_pages(pages, count); 1449 if (page) 1450 dma_free_contiguous(dev, page, alloc_size); 1451 } 1452 1453 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 1454 dma_addr_t handle, unsigned long attrs) 1455 { 1456 __iommu_dma_unmap(dev, handle, size); 1457 __iommu_dma_free(dev, size, cpu_addr); 1458 } 1459 1460 static void *iommu_dma_alloc_pages(struct device *dev, size_t size, 1461 struct page **pagep, gfp_t gfp, unsigned long attrs) 1462 { 1463 bool coherent = dev_is_dma_coherent(dev); 1464 size_t alloc_size = PAGE_ALIGN(size); 1465 int node = dev_to_node(dev); 1466 struct page *page = NULL; 1467 void *cpu_addr; 1468 1469 page = dma_alloc_contiguous(dev, alloc_size, gfp); 1470 if (!page) 1471 page = alloc_pages_node(node, gfp, get_order(alloc_size)); 1472 if (!page) 1473 return NULL; 1474 1475 if (!coherent || PageHighMem(page)) { 1476 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 1477 1478 cpu_addr = dma_common_contiguous_remap(page, alloc_size, 1479 prot, __builtin_return_address(0)); 1480 if (!cpu_addr) 1481 goto out_free_pages; 1482 1483 if (!coherent) 1484 arch_dma_prep_coherent(page, size); 1485 } else { 1486 cpu_addr = page_address(page); 1487 } 1488 1489 *pagep = page; 1490 memset(cpu_addr, 0, alloc_size); 1491 return cpu_addr; 1492 out_free_pages: 1493 dma_free_contiguous(dev, page, alloc_size); 1494 return NULL; 1495 } 1496 1497 static void *iommu_dma_alloc(struct device *dev, size_t size, 1498 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1499 { 1500 bool coherent = dev_is_dma_coherent(dev); 1501 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 1502 struct page *page = NULL; 1503 void *cpu_addr; 1504 1505 gfp |= __GFP_ZERO; 1506 1507 if (gfpflags_allow_blocking(gfp) && 1508 !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { 1509 return iommu_dma_alloc_remap(dev, size, handle, gfp, 1510 dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); 1511 } 1512 1513 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1514 !gfpflags_allow_blocking(gfp) && !coherent) 1515 page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr, 1516 gfp, NULL); 1517 else 1518 cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); 1519 if (!cpu_addr) 1520 return NULL; 1521 1522 *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot, 1523 dev->coherent_dma_mask); 1524 if (*handle == DMA_MAPPING_ERROR) { 1525 __iommu_dma_free(dev, size, cpu_addr); 1526 return NULL; 1527 } 1528 1529 return cpu_addr; 1530 } 1531 1532 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 1533 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1534 unsigned long attrs) 1535 { 1536 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1537 unsigned long pfn, off = vma->vm_pgoff; 1538 int ret; 1539 1540 vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); 1541 1542 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 1543 return ret; 1544 1545 if (off >= nr_pages || vma_pages(vma) > nr_pages - off) 1546 return -ENXIO; 1547 1548 if (is_vmalloc_addr(cpu_addr)) { 1549 struct page **pages = dma_common_find_pages(cpu_addr); 1550 1551 if (pages) 1552 return vm_map_pages(vma, pages, nr_pages); 1553 pfn = vmalloc_to_pfn(cpu_addr); 1554 } else { 1555 pfn = page_to_pfn(virt_to_page(cpu_addr)); 1556 } 1557 1558 return remap_pfn_range(vma, vma->vm_start, pfn + off, 1559 vma->vm_end - vma->vm_start, 1560 vma->vm_page_prot); 1561 } 1562 1563 static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 1564 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1565 unsigned long attrs) 1566 { 1567 struct page *page; 1568 int ret; 1569 1570 if (is_vmalloc_addr(cpu_addr)) { 1571 struct page **pages = dma_common_find_pages(cpu_addr); 1572 1573 if (pages) { 1574 return sg_alloc_table_from_pages(sgt, pages, 1575 PAGE_ALIGN(size) >> PAGE_SHIFT, 1576 0, size, GFP_KERNEL); 1577 } 1578 1579 page = vmalloc_to_page(cpu_addr); 1580 } else { 1581 page = virt_to_page(cpu_addr); 1582 } 1583 1584 ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 1585 if (!ret) 1586 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 1587 return ret; 1588 } 1589 1590 static unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1591 { 1592 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1593 1594 return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1595 } 1596 1597 static size_t iommu_dma_opt_mapping_size(void) 1598 { 1599 return iova_rcache_range(); 1600 } 1601 1602 static const struct dma_map_ops iommu_dma_ops = { 1603 .flags = DMA_F_PCI_P2PDMA_SUPPORTED, 1604 .alloc = iommu_dma_alloc, 1605 .free = iommu_dma_free, 1606 .alloc_pages = dma_common_alloc_pages, 1607 .free_pages = dma_common_free_pages, 1608 .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, 1609 .free_noncontiguous = iommu_dma_free_noncontiguous, 1610 .mmap = iommu_dma_mmap, 1611 .get_sgtable = iommu_dma_get_sgtable, 1612 .map_page = iommu_dma_map_page, 1613 .unmap_page = iommu_dma_unmap_page, 1614 .map_sg = iommu_dma_map_sg, 1615 .unmap_sg = iommu_dma_unmap_sg, 1616 .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, 1617 .sync_single_for_device = iommu_dma_sync_single_for_device, 1618 .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, 1619 .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1620 .map_resource = iommu_dma_map_resource, 1621 .unmap_resource = iommu_dma_unmap_resource, 1622 .get_merge_boundary = iommu_dma_get_merge_boundary, 1623 .opt_mapping_size = iommu_dma_opt_mapping_size, 1624 }; 1625 1626 /* 1627 * The IOMMU core code allocates the default DMA domain, which the underlying 1628 * IOMMU driver needs to support via the dma-iommu layer. 1629 */ 1630 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) 1631 { 1632 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1633 1634 if (!domain) 1635 goto out_err; 1636 1637 /* 1638 * The IOMMU core code allocates the default DMA domain, which the 1639 * underlying IOMMU driver needs to support via the dma-iommu layer. 1640 */ 1641 if (iommu_is_dma_domain(domain)) { 1642 if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) 1643 goto out_err; 1644 dev->dma_ops = &iommu_dma_ops; 1645 } 1646 1647 return; 1648 out_err: 1649 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 1650 dev_name(dev)); 1651 } 1652 EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); 1653 1654 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, 1655 phys_addr_t msi_addr, struct iommu_domain *domain) 1656 { 1657 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1658 struct iommu_dma_msi_page *msi_page; 1659 dma_addr_t iova; 1660 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 1661 size_t size = cookie_msi_granule(cookie); 1662 1663 msi_addr &= ~(phys_addr_t)(size - 1); 1664 list_for_each_entry(msi_page, &cookie->msi_page_list, list) 1665 if (msi_page->phys == msi_addr) 1666 return msi_page; 1667 1668 msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL); 1669 if (!msi_page) 1670 return NULL; 1671 1672 iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); 1673 if (!iova) 1674 goto out_free_page; 1675 1676 if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL)) 1677 goto out_free_iova; 1678 1679 INIT_LIST_HEAD(&msi_page->list); 1680 msi_page->phys = msi_addr; 1681 msi_page->iova = iova; 1682 list_add(&msi_page->list, &cookie->msi_page_list); 1683 return msi_page; 1684 1685 out_free_iova: 1686 iommu_dma_free_iova(cookie, iova, size, NULL); 1687 out_free_page: 1688 kfree(msi_page); 1689 return NULL; 1690 } 1691 1692 /** 1693 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 1694 * @desc: MSI descriptor, will store the MSI page 1695 * @msi_addr: MSI target address to be mapped 1696 * 1697 * Return: 0 on success or negative error code if the mapping failed. 1698 */ 1699 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 1700 { 1701 struct device *dev = msi_desc_to_dev(desc); 1702 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1703 struct iommu_dma_msi_page *msi_page; 1704 static DEFINE_MUTEX(msi_prepare_lock); /* see below */ 1705 1706 if (!domain || !domain->iova_cookie) { 1707 desc->iommu_cookie = NULL; 1708 return 0; 1709 } 1710 1711 /* 1712 * In fact the whole prepare operation should already be serialised by 1713 * irq_domain_mutex further up the callchain, but that's pretty subtle 1714 * on its own, so consider this locking as failsafe documentation... 1715 */ 1716 mutex_lock(&msi_prepare_lock); 1717 msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); 1718 mutex_unlock(&msi_prepare_lock); 1719 1720 msi_desc_set_iommu_cookie(desc, msi_page); 1721 1722 if (!msi_page) 1723 return -ENOMEM; 1724 return 0; 1725 } 1726 1727 /** 1728 * iommu_dma_compose_msi_msg() - Apply translation to an MSI message 1729 * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() 1730 * @msg: MSI message containing target physical address 1731 */ 1732 void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 1733 { 1734 struct device *dev = msi_desc_to_dev(desc); 1735 const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1736 const struct iommu_dma_msi_page *msi_page; 1737 1738 msi_page = msi_desc_get_iommu_cookie(desc); 1739 1740 if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) 1741 return; 1742 1743 msg->address_hi = upper_32_bits(msi_page->iova); 1744 msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; 1745 msg->address_lo += lower_32_bits(msi_page->iova); 1746 } 1747 1748 static int iommu_dma_init(void) 1749 { 1750 if (is_kdump_kernel()) 1751 static_branch_enable(&iommu_deferred_attach_enabled); 1752 1753 return iova_cache_get(); 1754 } 1755 arch_initcall(iommu_dma_init); 1756