1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * A fairly generic DMA-API to IOMMU-API glue layer. 4 * 5 * Copyright (C) 2014-2015 ARM Ltd. 6 * 7 * based in part on arch/arm/mm/dma-mapping.c: 8 * Copyright (C) 2000-2004 Russell King 9 */ 10 11 #include <linux/acpi_iort.h> 12 #include <linux/atomic.h> 13 #include <linux/crash_dump.h> 14 #include <linux/device.h> 15 #include <linux/dma-direct.h> 16 #include <linux/dma-map-ops.h> 17 #include <linux/gfp.h> 18 #include <linux/huge_mm.h> 19 #include <linux/iommu.h> 20 #include <linux/iova.h> 21 #include <linux/irq.h> 22 #include <linux/list_sort.h> 23 #include <linux/memremap.h> 24 #include <linux/mm.h> 25 #include <linux/mutex.h> 26 #include <linux/of_iommu.h> 27 #include <linux/pci.h> 28 #include <linux/scatterlist.h> 29 #include <linux/spinlock.h> 30 #include <linux/swiotlb.h> 31 #include <linux/vmalloc.h> 32 33 #include "dma-iommu.h" 34 35 struct iommu_dma_msi_page { 36 struct list_head list; 37 dma_addr_t iova; 38 phys_addr_t phys; 39 }; 40 41 enum iommu_dma_cookie_type { 42 IOMMU_DMA_IOVA_COOKIE, 43 IOMMU_DMA_MSI_COOKIE, 44 }; 45 46 enum iommu_dma_queue_type { 47 IOMMU_DMA_OPTS_PER_CPU_QUEUE, 48 IOMMU_DMA_OPTS_SINGLE_QUEUE, 49 }; 50 51 struct iommu_dma_options { 52 enum iommu_dma_queue_type qt; 53 size_t fq_size; 54 unsigned int fq_timeout; 55 }; 56 57 struct iommu_dma_cookie { 58 enum iommu_dma_cookie_type type; 59 union { 60 /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ 61 struct { 62 struct iova_domain iovad; 63 /* Flush queue */ 64 union { 65 struct iova_fq *single_fq; 66 struct iova_fq __percpu *percpu_fq; 67 }; 68 /* Number of TLB flushes that have been started */ 69 atomic64_t fq_flush_start_cnt; 70 /* Number of TLB flushes that have been finished */ 71 atomic64_t fq_flush_finish_cnt; 72 /* Timer to regularily empty the flush queues */ 73 struct timer_list fq_timer; 74 /* 1 when timer is active, 0 when not */ 75 atomic_t fq_timer_on; 76 }; 77 /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ 78 dma_addr_t msi_iova; 79 }; 80 struct list_head msi_page_list; 81 82 /* Domain for flush queue callback; NULL if flush queue not in use */ 83 struct iommu_domain *fq_domain; 84 /* Options for dma-iommu use */ 85 struct iommu_dma_options options; 86 struct mutex mutex; 87 }; 88 89 static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); 90 bool iommu_dma_forcedac __read_mostly; 91 92 static int __init iommu_dma_forcedac_setup(char *str) 93 { 94 int ret = kstrtobool(str, &iommu_dma_forcedac); 95 96 if (!ret && iommu_dma_forcedac) 97 pr_info("Forcing DAC for PCI devices\n"); 98 return ret; 99 } 100 early_param("iommu.forcedac", iommu_dma_forcedac_setup); 101 102 /* Number of entries per flush queue */ 103 #define IOVA_DEFAULT_FQ_SIZE 256 104 #define IOVA_SINGLE_FQ_SIZE 32768 105 106 /* Timeout (in ms) after which entries are flushed from the queue */ 107 #define IOVA_DEFAULT_FQ_TIMEOUT 10 108 #define IOVA_SINGLE_FQ_TIMEOUT 1000 109 110 /* Flush queue entry for deferred flushing */ 111 struct iova_fq_entry { 112 unsigned long iova_pfn; 113 unsigned long pages; 114 struct list_head freelist; 115 u64 counter; /* Flush counter when this entry was added */ 116 }; 117 118 /* Per-CPU flush queue structure */ 119 struct iova_fq { 120 spinlock_t lock; 121 unsigned int head, tail; 122 unsigned int mod_mask; 123 struct iova_fq_entry entries[]; 124 }; 125 126 #define fq_ring_for_each(i, fq) \ 127 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask) 128 129 static inline bool fq_full(struct iova_fq *fq) 130 { 131 assert_spin_locked(&fq->lock); 132 return (((fq->tail + 1) & fq->mod_mask) == fq->head); 133 } 134 135 static inline unsigned int fq_ring_add(struct iova_fq *fq) 136 { 137 unsigned int idx = fq->tail; 138 139 assert_spin_locked(&fq->lock); 140 141 fq->tail = (idx + 1) & fq->mod_mask; 142 143 return idx; 144 } 145 146 static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq) 147 { 148 u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); 149 unsigned int idx; 150 151 assert_spin_locked(&fq->lock); 152 153 fq_ring_for_each(idx, fq) { 154 155 if (fq->entries[idx].counter >= counter) 156 break; 157 158 put_pages_list(&fq->entries[idx].freelist); 159 free_iova_fast(&cookie->iovad, 160 fq->entries[idx].iova_pfn, 161 fq->entries[idx].pages); 162 163 fq->head = (fq->head + 1) & fq->mod_mask; 164 } 165 } 166 167 static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) 168 { 169 unsigned long flags; 170 171 spin_lock_irqsave(&fq->lock, flags); 172 fq_ring_free_locked(cookie, fq); 173 spin_unlock_irqrestore(&fq->lock, flags); 174 } 175 176 static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) 177 { 178 atomic64_inc(&cookie->fq_flush_start_cnt); 179 cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); 180 atomic64_inc(&cookie->fq_flush_finish_cnt); 181 } 182 183 static void fq_flush_timeout(struct timer_list *t) 184 { 185 struct iommu_dma_cookie *cookie = from_timer(cookie, t, fq_timer); 186 int cpu; 187 188 atomic_set(&cookie->fq_timer_on, 0); 189 fq_flush_iotlb(cookie); 190 191 if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) { 192 fq_ring_free(cookie, cookie->single_fq); 193 } else { 194 for_each_possible_cpu(cpu) 195 fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu)); 196 } 197 } 198 199 static void queue_iova(struct iommu_dma_cookie *cookie, 200 unsigned long pfn, unsigned long pages, 201 struct list_head *freelist) 202 { 203 struct iova_fq *fq; 204 unsigned long flags; 205 unsigned int idx; 206 207 /* 208 * Order against the IOMMU driver's pagetable update from unmapping 209 * @pte, to guarantee that fq_flush_iotlb() observes that if called 210 * from a different CPU before we release the lock below. Full barrier 211 * so it also pairs with iommu_dma_init_fq() to avoid seeing partially 212 * written fq state here. 213 */ 214 smp_mb(); 215 216 if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) 217 fq = cookie->single_fq; 218 else 219 fq = raw_cpu_ptr(cookie->percpu_fq); 220 221 spin_lock_irqsave(&fq->lock, flags); 222 223 /* 224 * First remove all entries from the flush queue that have already been 225 * flushed out on another CPU. This makes the fq_full() check below less 226 * likely to be true. 227 */ 228 fq_ring_free_locked(cookie, fq); 229 230 if (fq_full(fq)) { 231 fq_flush_iotlb(cookie); 232 fq_ring_free_locked(cookie, fq); 233 } 234 235 idx = fq_ring_add(fq); 236 237 fq->entries[idx].iova_pfn = pfn; 238 fq->entries[idx].pages = pages; 239 fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); 240 list_splice(freelist, &fq->entries[idx].freelist); 241 242 spin_unlock_irqrestore(&fq->lock, flags); 243 244 /* Avoid false sharing as much as possible. */ 245 if (!atomic_read(&cookie->fq_timer_on) && 246 !atomic_xchg(&cookie->fq_timer_on, 1)) 247 mod_timer(&cookie->fq_timer, 248 jiffies + msecs_to_jiffies(cookie->options.fq_timeout)); 249 } 250 251 static void iommu_dma_free_fq_single(struct iova_fq *fq) 252 { 253 int idx; 254 255 fq_ring_for_each(idx, fq) 256 put_pages_list(&fq->entries[idx].freelist); 257 vfree(fq); 258 } 259 260 static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) 261 { 262 int cpu, idx; 263 264 /* The IOVAs will be torn down separately, so just free our queued pages */ 265 for_each_possible_cpu(cpu) { 266 struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); 267 268 fq_ring_for_each(idx, fq) 269 put_pages_list(&fq->entries[idx].freelist); 270 } 271 272 free_percpu(percpu_fq); 273 } 274 275 static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) 276 { 277 if (!cookie->fq_domain) 278 return; 279 280 del_timer_sync(&cookie->fq_timer); 281 if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) 282 iommu_dma_free_fq_single(cookie->single_fq); 283 else 284 iommu_dma_free_fq_percpu(cookie->percpu_fq); 285 } 286 287 static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size) 288 { 289 int i; 290 291 fq->head = 0; 292 fq->tail = 0; 293 fq->mod_mask = fq_size - 1; 294 295 spin_lock_init(&fq->lock); 296 297 for (i = 0; i < fq_size; i++) 298 INIT_LIST_HEAD(&fq->entries[i].freelist); 299 } 300 301 static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) 302 { 303 size_t fq_size = cookie->options.fq_size; 304 struct iova_fq *queue; 305 306 queue = vmalloc(struct_size(queue, entries, fq_size)); 307 if (!queue) 308 return -ENOMEM; 309 iommu_dma_init_one_fq(queue, fq_size); 310 cookie->single_fq = queue; 311 312 return 0; 313 } 314 315 static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie) 316 { 317 size_t fq_size = cookie->options.fq_size; 318 struct iova_fq __percpu *queue; 319 int cpu; 320 321 queue = __alloc_percpu(struct_size(queue, entries, fq_size), 322 __alignof__(*queue)); 323 if (!queue) 324 return -ENOMEM; 325 326 for_each_possible_cpu(cpu) 327 iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size); 328 cookie->percpu_fq = queue; 329 return 0; 330 } 331 332 /* sysfs updates are serialised by the mutex of the group owning @domain */ 333 int iommu_dma_init_fq(struct iommu_domain *domain) 334 { 335 struct iommu_dma_cookie *cookie = domain->iova_cookie; 336 int rc; 337 338 if (cookie->fq_domain) 339 return 0; 340 341 atomic64_set(&cookie->fq_flush_start_cnt, 0); 342 atomic64_set(&cookie->fq_flush_finish_cnt, 0); 343 344 if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) 345 rc = iommu_dma_init_fq_single(cookie); 346 else 347 rc = iommu_dma_init_fq_percpu(cookie); 348 349 if (rc) { 350 pr_warn("iova flush queue initialization failed\n"); 351 return -ENOMEM; 352 } 353 354 timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); 355 atomic_set(&cookie->fq_timer_on, 0); 356 /* 357 * Prevent incomplete fq state being observable. Pairs with path from 358 * __iommu_dma_unmap() through iommu_dma_free_iova() to queue_iova() 359 */ 360 smp_wmb(); 361 WRITE_ONCE(cookie->fq_domain, domain); 362 return 0; 363 } 364 365 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) 366 { 367 if (cookie->type == IOMMU_DMA_IOVA_COOKIE) 368 return cookie->iovad.granule; 369 return PAGE_SIZE; 370 } 371 372 static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) 373 { 374 struct iommu_dma_cookie *cookie; 375 376 cookie = kzalloc(sizeof(*cookie), GFP_KERNEL); 377 if (cookie) { 378 INIT_LIST_HEAD(&cookie->msi_page_list); 379 cookie->type = type; 380 } 381 return cookie; 382 } 383 384 /** 385 * iommu_get_dma_cookie - Acquire DMA-API resources for a domain 386 * @domain: IOMMU domain to prepare for DMA-API usage 387 */ 388 int iommu_get_dma_cookie(struct iommu_domain *domain) 389 { 390 if (domain->iova_cookie) 391 return -EEXIST; 392 393 domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE); 394 if (!domain->iova_cookie) 395 return -ENOMEM; 396 397 mutex_init(&domain->iova_cookie->mutex); 398 return 0; 399 } 400 401 /** 402 * iommu_get_msi_cookie - Acquire just MSI remapping resources 403 * @domain: IOMMU domain to prepare 404 * @base: Start address of IOVA region for MSI mappings 405 * 406 * Users who manage their own IOVA allocation and do not want DMA API support, 407 * but would still like to take advantage of automatic MSI remapping, can use 408 * this to initialise their own domain appropriately. Users should reserve a 409 * contiguous IOVA region, starting at @base, large enough to accommodate the 410 * number of PAGE_SIZE mappings necessary to cover every MSI doorbell address 411 * used by the devices attached to @domain. 412 */ 413 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) 414 { 415 struct iommu_dma_cookie *cookie; 416 417 if (domain->type != IOMMU_DOMAIN_UNMANAGED) 418 return -EINVAL; 419 420 if (domain->iova_cookie) 421 return -EEXIST; 422 423 cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE); 424 if (!cookie) 425 return -ENOMEM; 426 427 cookie->msi_iova = base; 428 domain->iova_cookie = cookie; 429 return 0; 430 } 431 EXPORT_SYMBOL(iommu_get_msi_cookie); 432 433 /** 434 * iommu_put_dma_cookie - Release a domain's DMA mapping resources 435 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or 436 * iommu_get_msi_cookie() 437 */ 438 void iommu_put_dma_cookie(struct iommu_domain *domain) 439 { 440 struct iommu_dma_cookie *cookie = domain->iova_cookie; 441 struct iommu_dma_msi_page *msi, *tmp; 442 443 if (!cookie) 444 return; 445 446 if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) { 447 iommu_dma_free_fq(cookie); 448 put_iova_domain(&cookie->iovad); 449 } 450 451 list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) { 452 list_del(&msi->list); 453 kfree(msi); 454 } 455 kfree(cookie); 456 domain->iova_cookie = NULL; 457 } 458 459 /** 460 * iommu_dma_get_resv_regions - Reserved region driver helper 461 * @dev: Device from iommu_get_resv_regions() 462 * @list: Reserved region list from iommu_get_resv_regions() 463 * 464 * IOMMU drivers can use this to implement their .get_resv_regions callback 465 * for general non-IOMMU-specific reservations. Currently, this covers GICv3 466 * ITS region reservation on ACPI based ARM platforms that may require HW MSI 467 * reservation. 468 */ 469 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) 470 { 471 472 if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) 473 iort_iommu_get_resv_regions(dev, list); 474 475 if (dev->of_node) 476 of_iommu_get_resv_regions(dev, list); 477 } 478 EXPORT_SYMBOL(iommu_dma_get_resv_regions); 479 480 static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, 481 phys_addr_t start, phys_addr_t end) 482 { 483 struct iova_domain *iovad = &cookie->iovad; 484 struct iommu_dma_msi_page *msi_page; 485 int i, num_pages; 486 487 start -= iova_offset(iovad, start); 488 num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); 489 490 for (i = 0; i < num_pages; i++) { 491 msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); 492 if (!msi_page) 493 return -ENOMEM; 494 495 msi_page->phys = start; 496 msi_page->iova = start; 497 INIT_LIST_HEAD(&msi_page->list); 498 list_add(&msi_page->list, &cookie->msi_page_list); 499 start += iovad->granule; 500 } 501 502 return 0; 503 } 504 505 static int iommu_dma_ranges_sort(void *priv, const struct list_head *a, 506 const struct list_head *b) 507 { 508 struct resource_entry *res_a = list_entry(a, typeof(*res_a), node); 509 struct resource_entry *res_b = list_entry(b, typeof(*res_b), node); 510 511 return res_a->res->start > res_b->res->start; 512 } 513 514 static int iova_reserve_pci_windows(struct pci_dev *dev, 515 struct iova_domain *iovad) 516 { 517 struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); 518 struct resource_entry *window; 519 unsigned long lo, hi; 520 phys_addr_t start = 0, end; 521 522 resource_list_for_each_entry(window, &bridge->windows) { 523 if (resource_type(window->res) != IORESOURCE_MEM) 524 continue; 525 526 lo = iova_pfn(iovad, window->res->start - window->offset); 527 hi = iova_pfn(iovad, window->res->end - window->offset); 528 reserve_iova(iovad, lo, hi); 529 } 530 531 /* Get reserved DMA windows from host bridge */ 532 list_sort(NULL, &bridge->dma_ranges, iommu_dma_ranges_sort); 533 resource_list_for_each_entry(window, &bridge->dma_ranges) { 534 end = window->res->start - window->offset; 535 resv_iova: 536 if (end > start) { 537 lo = iova_pfn(iovad, start); 538 hi = iova_pfn(iovad, end); 539 reserve_iova(iovad, lo, hi); 540 } else if (end < start) { 541 /* DMA ranges should be non-overlapping */ 542 dev_err(&dev->dev, 543 "Failed to reserve IOVA [%pa-%pa]\n", 544 &start, &end); 545 return -EINVAL; 546 } 547 548 start = window->res->end - window->offset + 1; 549 /* If window is last entry */ 550 if (window->node.next == &bridge->dma_ranges && 551 end != ~(phys_addr_t)0) { 552 end = ~(phys_addr_t)0; 553 goto resv_iova; 554 } 555 } 556 557 return 0; 558 } 559 560 static int iova_reserve_iommu_regions(struct device *dev, 561 struct iommu_domain *domain) 562 { 563 struct iommu_dma_cookie *cookie = domain->iova_cookie; 564 struct iova_domain *iovad = &cookie->iovad; 565 struct iommu_resv_region *region; 566 LIST_HEAD(resv_regions); 567 int ret = 0; 568 569 if (dev_is_pci(dev)) { 570 ret = iova_reserve_pci_windows(to_pci_dev(dev), iovad); 571 if (ret) 572 return ret; 573 } 574 575 iommu_get_resv_regions(dev, &resv_regions); 576 list_for_each_entry(region, &resv_regions, list) { 577 unsigned long lo, hi; 578 579 /* We ARE the software that manages these! */ 580 if (region->type == IOMMU_RESV_SW_MSI) 581 continue; 582 583 lo = iova_pfn(iovad, region->start); 584 hi = iova_pfn(iovad, region->start + region->length - 1); 585 reserve_iova(iovad, lo, hi); 586 587 if (region->type == IOMMU_RESV_MSI) 588 ret = cookie_init_hw_msi_region(cookie, region->start, 589 region->start + region->length); 590 if (ret) 591 break; 592 } 593 iommu_put_resv_regions(dev, &resv_regions); 594 595 return ret; 596 } 597 598 static bool dev_is_untrusted(struct device *dev) 599 { 600 return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; 601 } 602 603 static bool dev_use_swiotlb(struct device *dev, size_t size, 604 enum dma_data_direction dir) 605 { 606 return IS_ENABLED(CONFIG_SWIOTLB) && 607 (dev_is_untrusted(dev) || 608 dma_kmalloc_needs_bounce(dev, size, dir)); 609 } 610 611 static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg, 612 int nents, enum dma_data_direction dir) 613 { 614 struct scatterlist *s; 615 int i; 616 617 if (!IS_ENABLED(CONFIG_SWIOTLB)) 618 return false; 619 620 if (dev_is_untrusted(dev)) 621 return true; 622 623 /* 624 * If kmalloc() buffers are not DMA-safe for this device and 625 * direction, check the individual lengths in the sg list. If any 626 * element is deemed unsafe, use the swiotlb for bouncing. 627 */ 628 if (!dma_kmalloc_safe(dev, dir)) { 629 for_each_sg(sg, s, nents, i) 630 if (!dma_kmalloc_size_aligned(s->length)) 631 return true; 632 } 633 634 return false; 635 } 636 637 /** 638 * iommu_dma_init_options - Initialize dma-iommu options 639 * @options: The options to be initialized 640 * @dev: Device the options are set for 641 * 642 * This allows tuning dma-iommu specific to device properties 643 */ 644 static void iommu_dma_init_options(struct iommu_dma_options *options, 645 struct device *dev) 646 { 647 /* Shadowing IOTLB flushes do better with a single large queue */ 648 if (dev->iommu->shadow_on_flush) { 649 options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE; 650 options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT; 651 options->fq_size = IOVA_SINGLE_FQ_SIZE; 652 } else { 653 options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE; 654 options->fq_size = IOVA_DEFAULT_FQ_SIZE; 655 options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT; 656 } 657 } 658 659 /** 660 * iommu_dma_init_domain - Initialise a DMA mapping domain 661 * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() 662 * @base: IOVA at which the mappable address space starts 663 * @limit: Last address of the IOVA space 664 * @dev: Device the domain is being initialised for 665 * 666 * @base and @limit + 1 should be exact multiples of IOMMU page granularity to 667 * avoid rounding surprises. If necessary, we reserve the page at address 0 668 * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but 669 * any change which could make prior IOVAs invalid will fail. 670 */ 671 static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, 672 dma_addr_t limit, struct device *dev) 673 { 674 struct iommu_dma_cookie *cookie = domain->iova_cookie; 675 unsigned long order, base_pfn; 676 struct iova_domain *iovad; 677 int ret; 678 679 if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) 680 return -EINVAL; 681 682 iovad = &cookie->iovad; 683 684 /* Use the smallest supported page size for IOVA granularity */ 685 order = __ffs(domain->pgsize_bitmap); 686 base_pfn = max_t(unsigned long, 1, base >> order); 687 688 /* Check the domain allows at least some access to the device... */ 689 if (domain->geometry.force_aperture) { 690 if (base > domain->geometry.aperture_end || 691 limit < domain->geometry.aperture_start) { 692 pr_warn("specified DMA range outside IOMMU capability\n"); 693 return -EFAULT; 694 } 695 /* ...then finally give it a kicking to make sure it fits */ 696 base_pfn = max_t(unsigned long, base_pfn, 697 domain->geometry.aperture_start >> order); 698 } 699 700 /* start_pfn is always nonzero for an already-initialised domain */ 701 mutex_lock(&cookie->mutex); 702 if (iovad->start_pfn) { 703 if (1UL << order != iovad->granule || 704 base_pfn != iovad->start_pfn) { 705 pr_warn("Incompatible range for DMA domain\n"); 706 ret = -EFAULT; 707 goto done_unlock; 708 } 709 710 ret = 0; 711 goto done_unlock; 712 } 713 714 init_iova_domain(iovad, 1UL << order, base_pfn); 715 ret = iova_domain_init_rcaches(iovad); 716 if (ret) 717 goto done_unlock; 718 719 iommu_dma_init_options(&cookie->options, dev); 720 721 /* If the FQ fails we can simply fall back to strict mode */ 722 if (domain->type == IOMMU_DOMAIN_DMA_FQ && 723 (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) 724 domain->type = IOMMU_DOMAIN_DMA; 725 726 ret = iova_reserve_iommu_regions(dev, domain); 727 728 done_unlock: 729 mutex_unlock(&cookie->mutex); 730 return ret; 731 } 732 733 /** 734 * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API 735 * page flags. 736 * @dir: Direction of DMA transfer 737 * @coherent: Is the DMA master cache-coherent? 738 * @attrs: DMA attributes for the mapping 739 * 740 * Return: corresponding IOMMU API page protection flags 741 */ 742 static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, 743 unsigned long attrs) 744 { 745 int prot = coherent ? IOMMU_CACHE : 0; 746 747 if (attrs & DMA_ATTR_PRIVILEGED) 748 prot |= IOMMU_PRIV; 749 750 switch (dir) { 751 case DMA_BIDIRECTIONAL: 752 return prot | IOMMU_READ | IOMMU_WRITE; 753 case DMA_TO_DEVICE: 754 return prot | IOMMU_READ; 755 case DMA_FROM_DEVICE: 756 return prot | IOMMU_WRITE; 757 default: 758 return 0; 759 } 760 } 761 762 static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, 763 size_t size, u64 dma_limit, struct device *dev) 764 { 765 struct iommu_dma_cookie *cookie = domain->iova_cookie; 766 struct iova_domain *iovad = &cookie->iovad; 767 unsigned long shift, iova_len, iova; 768 769 if (cookie->type == IOMMU_DMA_MSI_COOKIE) { 770 cookie->msi_iova += size; 771 return cookie->msi_iova - size; 772 } 773 774 shift = iova_shift(iovad); 775 iova_len = size >> shift; 776 777 dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); 778 779 if (domain->geometry.force_aperture) 780 dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); 781 782 /* 783 * Try to use all the 32-bit PCI addresses first. The original SAC vs. 784 * DAC reasoning loses relevance with PCIe, but enough hardware and 785 * firmware bugs are still lurking out there that it's safest not to 786 * venture into the 64-bit space until necessary. 787 * 788 * If your device goes wrong after seeing the notice then likely either 789 * its driver is not setting DMA masks accurately, the hardware has 790 * some inherent bug in handling >32-bit addresses, or not all the 791 * expected address bits are wired up between the device and the IOMMU. 792 */ 793 if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { 794 iova = alloc_iova_fast(iovad, iova_len, 795 DMA_BIT_MASK(32) >> shift, false); 796 if (iova) 797 goto done; 798 799 dev->iommu->pci_32bit_workaround = false; 800 dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); 801 } 802 803 iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); 804 done: 805 return (dma_addr_t)iova << shift; 806 } 807 808 static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, 809 dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) 810 { 811 struct iova_domain *iovad = &cookie->iovad; 812 813 /* The MSI case is only ever cleaning up its most recent allocation */ 814 if (cookie->type == IOMMU_DMA_MSI_COOKIE) 815 cookie->msi_iova -= size; 816 else if (gather && gather->queued) 817 queue_iova(cookie, iova_pfn(iovad, iova), 818 size >> iova_shift(iovad), 819 &gather->freelist); 820 else 821 free_iova_fast(iovad, iova_pfn(iovad, iova), 822 size >> iova_shift(iovad)); 823 } 824 825 static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, 826 size_t size) 827 { 828 struct iommu_domain *domain = iommu_get_dma_domain(dev); 829 struct iommu_dma_cookie *cookie = domain->iova_cookie; 830 struct iova_domain *iovad = &cookie->iovad; 831 size_t iova_off = iova_offset(iovad, dma_addr); 832 struct iommu_iotlb_gather iotlb_gather; 833 size_t unmapped; 834 835 dma_addr -= iova_off; 836 size = iova_align(iovad, size + iova_off); 837 iommu_iotlb_gather_init(&iotlb_gather); 838 iotlb_gather.queued = READ_ONCE(cookie->fq_domain); 839 840 unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); 841 WARN_ON(unmapped != size); 842 843 if (!iotlb_gather.queued) 844 iommu_iotlb_sync(domain, &iotlb_gather); 845 iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); 846 } 847 848 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, 849 size_t size, int prot, u64 dma_mask) 850 { 851 struct iommu_domain *domain = iommu_get_dma_domain(dev); 852 struct iommu_dma_cookie *cookie = domain->iova_cookie; 853 struct iova_domain *iovad = &cookie->iovad; 854 size_t iova_off = iova_offset(iovad, phys); 855 dma_addr_t iova; 856 857 if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 858 iommu_deferred_attach(dev, domain)) 859 return DMA_MAPPING_ERROR; 860 861 size = iova_align(iovad, size + iova_off); 862 863 iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); 864 if (!iova) 865 return DMA_MAPPING_ERROR; 866 867 if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { 868 iommu_dma_free_iova(cookie, iova, size, NULL); 869 return DMA_MAPPING_ERROR; 870 } 871 return iova + iova_off; 872 } 873 874 static void __iommu_dma_free_pages(struct page **pages, int count) 875 { 876 while (count--) 877 __free_page(pages[count]); 878 kvfree(pages); 879 } 880 881 static struct page **__iommu_dma_alloc_pages(struct device *dev, 882 unsigned int count, unsigned long order_mask, gfp_t gfp) 883 { 884 struct page **pages; 885 unsigned int i = 0, nid = dev_to_node(dev); 886 887 order_mask &= GENMASK(MAX_ORDER, 0); 888 if (!order_mask) 889 return NULL; 890 891 pages = kvcalloc(count, sizeof(*pages), GFP_KERNEL); 892 if (!pages) 893 return NULL; 894 895 /* IOMMU can map any pages, so himem can also be used here */ 896 gfp |= __GFP_NOWARN | __GFP_HIGHMEM; 897 898 while (count) { 899 struct page *page = NULL; 900 unsigned int order_size; 901 902 /* 903 * Higher-order allocations are a convenience rather 904 * than a necessity, hence using __GFP_NORETRY until 905 * falling back to minimum-order allocations. 906 */ 907 for (order_mask &= GENMASK(__fls(count), 0); 908 order_mask; order_mask &= ~order_size) { 909 unsigned int order = __fls(order_mask); 910 gfp_t alloc_flags = gfp; 911 912 order_size = 1U << order; 913 if (order_mask > order_size) 914 alloc_flags |= __GFP_NORETRY; 915 page = alloc_pages_node(nid, alloc_flags, order); 916 if (!page) 917 continue; 918 if (order) 919 split_page(page, order); 920 break; 921 } 922 if (!page) { 923 __iommu_dma_free_pages(pages, i); 924 return NULL; 925 } 926 count -= order_size; 927 while (order_size--) 928 pages[i++] = page++; 929 } 930 return pages; 931 } 932 933 /* 934 * If size is less than PAGE_SIZE, then a full CPU page will be allocated, 935 * but an IOMMU which supports smaller pages might not map the whole thing. 936 */ 937 static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, 938 size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot, 939 unsigned long attrs) 940 { 941 struct iommu_domain *domain = iommu_get_dma_domain(dev); 942 struct iommu_dma_cookie *cookie = domain->iova_cookie; 943 struct iova_domain *iovad = &cookie->iovad; 944 bool coherent = dev_is_dma_coherent(dev); 945 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 946 unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; 947 struct page **pages; 948 dma_addr_t iova; 949 ssize_t ret; 950 951 if (static_branch_unlikely(&iommu_deferred_attach_enabled) && 952 iommu_deferred_attach(dev, domain)) 953 return NULL; 954 955 min_size = alloc_sizes & -alloc_sizes; 956 if (min_size < PAGE_SIZE) { 957 min_size = PAGE_SIZE; 958 alloc_sizes |= PAGE_SIZE; 959 } else { 960 size = ALIGN(size, min_size); 961 } 962 if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) 963 alloc_sizes = min_size; 964 965 count = PAGE_ALIGN(size) >> PAGE_SHIFT; 966 pages = __iommu_dma_alloc_pages(dev, count, alloc_sizes >> PAGE_SHIFT, 967 gfp); 968 if (!pages) 969 return NULL; 970 971 size = iova_align(iovad, size); 972 iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev); 973 if (!iova) 974 goto out_free_pages; 975 976 /* 977 * Remove the zone/policy flags from the GFP - these are applied to the 978 * __iommu_dma_alloc_pages() but are not used for the supporting 979 * internal allocations that follow. 980 */ 981 gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP); 982 983 if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp)) 984 goto out_free_iova; 985 986 if (!(ioprot & IOMMU_CACHE)) { 987 struct scatterlist *sg; 988 int i; 989 990 for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) 991 arch_dma_prep_coherent(sg_page(sg), sg->length); 992 } 993 994 ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, 995 gfp); 996 if (ret < 0 || ret < size) 997 goto out_free_sg; 998 999 sgt->sgl->dma_address = iova; 1000 sgt->sgl->dma_length = size; 1001 return pages; 1002 1003 out_free_sg: 1004 sg_free_table(sgt); 1005 out_free_iova: 1006 iommu_dma_free_iova(cookie, iova, size, NULL); 1007 out_free_pages: 1008 __iommu_dma_free_pages(pages, count); 1009 return NULL; 1010 } 1011 1012 static void *iommu_dma_alloc_remap(struct device *dev, size_t size, 1013 dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, 1014 unsigned long attrs) 1015 { 1016 struct page **pages; 1017 struct sg_table sgt; 1018 void *vaddr; 1019 1020 pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot, 1021 attrs); 1022 if (!pages) 1023 return NULL; 1024 *dma_handle = sgt.sgl->dma_address; 1025 sg_free_table(&sgt); 1026 vaddr = dma_common_pages_remap(pages, size, prot, 1027 __builtin_return_address(0)); 1028 if (!vaddr) 1029 goto out_unmap; 1030 return vaddr; 1031 1032 out_unmap: 1033 __iommu_dma_unmap(dev, *dma_handle, size); 1034 __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 1035 return NULL; 1036 } 1037 1038 static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, 1039 size_t size, enum dma_data_direction dir, gfp_t gfp, 1040 unsigned long attrs) 1041 { 1042 struct dma_sgt_handle *sh; 1043 1044 sh = kmalloc(sizeof(*sh), gfp); 1045 if (!sh) 1046 return NULL; 1047 1048 sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, 1049 PAGE_KERNEL, attrs); 1050 if (!sh->pages) { 1051 kfree(sh); 1052 return NULL; 1053 } 1054 return &sh->sgt; 1055 } 1056 1057 static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, 1058 struct sg_table *sgt, enum dma_data_direction dir) 1059 { 1060 struct dma_sgt_handle *sh = sgt_handle(sgt); 1061 1062 __iommu_dma_unmap(dev, sgt->sgl->dma_address, size); 1063 __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT); 1064 sg_free_table(&sh->sgt); 1065 kfree(sh); 1066 } 1067 1068 static void iommu_dma_sync_single_for_cpu(struct device *dev, 1069 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 1070 { 1071 phys_addr_t phys; 1072 1073 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) 1074 return; 1075 1076 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 1077 if (!dev_is_dma_coherent(dev)) 1078 arch_sync_dma_for_cpu(phys, size, dir); 1079 1080 if (is_swiotlb_buffer(dev, phys)) 1081 swiotlb_sync_single_for_cpu(dev, phys, size, dir); 1082 } 1083 1084 static void iommu_dma_sync_single_for_device(struct device *dev, 1085 dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) 1086 { 1087 phys_addr_t phys; 1088 1089 if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) 1090 return; 1091 1092 phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); 1093 if (is_swiotlb_buffer(dev, phys)) 1094 swiotlb_sync_single_for_device(dev, phys, size, dir); 1095 1096 if (!dev_is_dma_coherent(dev)) 1097 arch_sync_dma_for_device(phys, size, dir); 1098 } 1099 1100 static void iommu_dma_sync_sg_for_cpu(struct device *dev, 1101 struct scatterlist *sgl, int nelems, 1102 enum dma_data_direction dir) 1103 { 1104 struct scatterlist *sg; 1105 int i; 1106 1107 if (sg_dma_is_swiotlb(sgl)) 1108 for_each_sg(sgl, sg, nelems, i) 1109 iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg), 1110 sg->length, dir); 1111 else if (!dev_is_dma_coherent(dev)) 1112 for_each_sg(sgl, sg, nelems, i) 1113 arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); 1114 } 1115 1116 static void iommu_dma_sync_sg_for_device(struct device *dev, 1117 struct scatterlist *sgl, int nelems, 1118 enum dma_data_direction dir) 1119 { 1120 struct scatterlist *sg; 1121 int i; 1122 1123 if (sg_dma_is_swiotlb(sgl)) 1124 for_each_sg(sgl, sg, nelems, i) 1125 iommu_dma_sync_single_for_device(dev, 1126 sg_dma_address(sg), 1127 sg->length, dir); 1128 else if (!dev_is_dma_coherent(dev)) 1129 for_each_sg(sgl, sg, nelems, i) 1130 arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); 1131 } 1132 1133 static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, 1134 unsigned long offset, size_t size, enum dma_data_direction dir, 1135 unsigned long attrs) 1136 { 1137 phys_addr_t phys = page_to_phys(page) + offset; 1138 bool coherent = dev_is_dma_coherent(dev); 1139 int prot = dma_info_to_prot(dir, coherent, attrs); 1140 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1141 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1142 struct iova_domain *iovad = &cookie->iovad; 1143 dma_addr_t iova, dma_mask = dma_get_mask(dev); 1144 1145 /* 1146 * If both the physical buffer start address and size are 1147 * page aligned, we don't need to use a bounce page. 1148 */ 1149 if (dev_use_swiotlb(dev, size, dir) && 1150 iova_offset(iovad, phys | size)) { 1151 void *padding_start; 1152 size_t padding_size, aligned_size; 1153 1154 if (!is_swiotlb_active(dev)) { 1155 dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); 1156 return DMA_MAPPING_ERROR; 1157 } 1158 1159 aligned_size = iova_align(iovad, size); 1160 phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size, 1161 iova_mask(iovad), dir, attrs); 1162 1163 if (phys == DMA_MAPPING_ERROR) 1164 return DMA_MAPPING_ERROR; 1165 1166 /* Cleanup the padding area. */ 1167 padding_start = phys_to_virt(phys); 1168 padding_size = aligned_size; 1169 1170 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && 1171 (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) { 1172 padding_start += size; 1173 padding_size -= size; 1174 } 1175 1176 memset(padding_start, 0, padding_size); 1177 } 1178 1179 if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1180 arch_sync_dma_for_device(phys, size, dir); 1181 1182 iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); 1183 if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) 1184 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 1185 return iova; 1186 } 1187 1188 static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, 1189 size_t size, enum dma_data_direction dir, unsigned long attrs) 1190 { 1191 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1192 phys_addr_t phys; 1193 1194 phys = iommu_iova_to_phys(domain, dma_handle); 1195 if (WARN_ON(!phys)) 1196 return; 1197 1198 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) 1199 arch_sync_dma_for_cpu(phys, size, dir); 1200 1201 __iommu_dma_unmap(dev, dma_handle, size); 1202 1203 if (unlikely(is_swiotlb_buffer(dev, phys))) 1204 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 1205 } 1206 1207 /* 1208 * Prepare a successfully-mapped scatterlist to give back to the caller. 1209 * 1210 * At this point the segments are already laid out by iommu_dma_map_sg() to 1211 * avoid individually crossing any boundaries, so we merely need to check a 1212 * segment's start address to avoid concatenating across one. 1213 */ 1214 static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, 1215 dma_addr_t dma_addr) 1216 { 1217 struct scatterlist *s, *cur = sg; 1218 unsigned long seg_mask = dma_get_seg_boundary(dev); 1219 unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); 1220 int i, count = 0; 1221 1222 for_each_sg(sg, s, nents, i) { 1223 /* Restore this segment's original unaligned fields first */ 1224 dma_addr_t s_dma_addr = sg_dma_address(s); 1225 unsigned int s_iova_off = sg_dma_address(s); 1226 unsigned int s_length = sg_dma_len(s); 1227 unsigned int s_iova_len = s->length; 1228 1229 sg_dma_address(s) = DMA_MAPPING_ERROR; 1230 sg_dma_len(s) = 0; 1231 1232 if (sg_dma_is_bus_address(s)) { 1233 if (i > 0) 1234 cur = sg_next(cur); 1235 1236 sg_dma_unmark_bus_address(s); 1237 sg_dma_address(cur) = s_dma_addr; 1238 sg_dma_len(cur) = s_length; 1239 sg_dma_mark_bus_address(cur); 1240 count++; 1241 cur_len = 0; 1242 continue; 1243 } 1244 1245 s->offset += s_iova_off; 1246 s->length = s_length; 1247 1248 /* 1249 * Now fill in the real DMA data. If... 1250 * - there is a valid output segment to append to 1251 * - and this segment starts on an IOVA page boundary 1252 * - but doesn't fall at a segment boundary 1253 * - and wouldn't make the resulting output segment too long 1254 */ 1255 if (cur_len && !s_iova_off && (dma_addr & seg_mask) && 1256 (max_len - cur_len >= s_length)) { 1257 /* ...then concatenate it with the previous one */ 1258 cur_len += s_length; 1259 } else { 1260 /* Otherwise start the next output segment */ 1261 if (i > 0) 1262 cur = sg_next(cur); 1263 cur_len = s_length; 1264 count++; 1265 1266 sg_dma_address(cur) = dma_addr + s_iova_off; 1267 } 1268 1269 sg_dma_len(cur) = cur_len; 1270 dma_addr += s_iova_len; 1271 1272 if (s_length + s_iova_off < s_iova_len) 1273 cur_len = 0; 1274 } 1275 return count; 1276 } 1277 1278 /* 1279 * If mapping failed, then just restore the original list, 1280 * but making sure the DMA fields are invalidated. 1281 */ 1282 static void __invalidate_sg(struct scatterlist *sg, int nents) 1283 { 1284 struct scatterlist *s; 1285 int i; 1286 1287 for_each_sg(sg, s, nents, i) { 1288 if (sg_dma_is_bus_address(s)) { 1289 sg_dma_unmark_bus_address(s); 1290 } else { 1291 if (sg_dma_address(s) != DMA_MAPPING_ERROR) 1292 s->offset += sg_dma_address(s); 1293 if (sg_dma_len(s)) 1294 s->length = sg_dma_len(s); 1295 } 1296 sg_dma_address(s) = DMA_MAPPING_ERROR; 1297 sg_dma_len(s) = 0; 1298 } 1299 } 1300 1301 static void iommu_dma_unmap_sg_swiotlb(struct device *dev, struct scatterlist *sg, 1302 int nents, enum dma_data_direction dir, unsigned long attrs) 1303 { 1304 struct scatterlist *s; 1305 int i; 1306 1307 for_each_sg(sg, s, nents, i) 1308 iommu_dma_unmap_page(dev, sg_dma_address(s), 1309 sg_dma_len(s), dir, attrs); 1310 } 1311 1312 static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg, 1313 int nents, enum dma_data_direction dir, unsigned long attrs) 1314 { 1315 struct scatterlist *s; 1316 int i; 1317 1318 sg_dma_mark_swiotlb(sg); 1319 1320 for_each_sg(sg, s, nents, i) { 1321 sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s), 1322 s->offset, s->length, dir, attrs); 1323 if (sg_dma_address(s) == DMA_MAPPING_ERROR) 1324 goto out_unmap; 1325 sg_dma_len(s) = s->length; 1326 } 1327 1328 return nents; 1329 1330 out_unmap: 1331 iommu_dma_unmap_sg_swiotlb(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 1332 return -EIO; 1333 } 1334 1335 /* 1336 * The DMA API client is passing in a scatterlist which could describe 1337 * any old buffer layout, but the IOMMU API requires everything to be 1338 * aligned to IOMMU pages. Hence the need for this complicated bit of 1339 * impedance-matching, to be able to hand off a suitably-aligned list, 1340 * but still preserve the original offsets and sizes for the caller. 1341 */ 1342 static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, 1343 int nents, enum dma_data_direction dir, unsigned long attrs) 1344 { 1345 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1346 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1347 struct iova_domain *iovad = &cookie->iovad; 1348 struct scatterlist *s, *prev = NULL; 1349 int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); 1350 struct pci_p2pdma_map_state p2pdma_state = {}; 1351 enum pci_p2pdma_map_type map; 1352 dma_addr_t iova; 1353 size_t iova_len = 0; 1354 unsigned long mask = dma_get_seg_boundary(dev); 1355 ssize_t ret; 1356 int i; 1357 1358 if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { 1359 ret = iommu_deferred_attach(dev, domain); 1360 if (ret) 1361 goto out; 1362 } 1363 1364 if (dev_use_sg_swiotlb(dev, sg, nents, dir)) 1365 return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs); 1366 1367 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1368 iommu_dma_sync_sg_for_device(dev, sg, nents, dir); 1369 1370 /* 1371 * Work out how much IOVA space we need, and align the segments to 1372 * IOVA granules for the IOMMU driver to handle. With some clever 1373 * trickery we can modify the list in-place, but reversibly, by 1374 * stashing the unaligned parts in the as-yet-unused DMA fields. 1375 */ 1376 for_each_sg(sg, s, nents, i) { 1377 size_t s_iova_off = iova_offset(iovad, s->offset); 1378 size_t s_length = s->length; 1379 size_t pad_len = (mask - iova_len + 1) & mask; 1380 1381 if (is_pci_p2pdma_page(sg_page(s))) { 1382 map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); 1383 switch (map) { 1384 case PCI_P2PDMA_MAP_BUS_ADDR: 1385 /* 1386 * iommu_map_sg() will skip this segment as 1387 * it is marked as a bus address, 1388 * __finalise_sg() will copy the dma address 1389 * into the output segment. 1390 */ 1391 continue; 1392 case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: 1393 /* 1394 * Mapping through host bridge should be 1395 * mapped with regular IOVAs, thus we 1396 * do nothing here and continue below. 1397 */ 1398 break; 1399 default: 1400 ret = -EREMOTEIO; 1401 goto out_restore_sg; 1402 } 1403 } 1404 1405 sg_dma_address(s) = s_iova_off; 1406 sg_dma_len(s) = s_length; 1407 s->offset -= s_iova_off; 1408 s_length = iova_align(iovad, s_length + s_iova_off); 1409 s->length = s_length; 1410 1411 /* 1412 * Due to the alignment of our single IOVA allocation, we can 1413 * depend on these assumptions about the segment boundary mask: 1414 * - If mask size >= IOVA size, then the IOVA range cannot 1415 * possibly fall across a boundary, so we don't care. 1416 * - If mask size < IOVA size, then the IOVA range must start 1417 * exactly on a boundary, therefore we can lay things out 1418 * based purely on segment lengths without needing to know 1419 * the actual addresses beforehand. 1420 * - The mask must be a power of 2, so pad_len == 0 if 1421 * iova_len == 0, thus we cannot dereference prev the first 1422 * time through here (i.e. before it has a meaningful value). 1423 */ 1424 if (pad_len && pad_len < s_length - 1) { 1425 prev->length += pad_len; 1426 iova_len += pad_len; 1427 } 1428 1429 iova_len += s_length; 1430 prev = s; 1431 } 1432 1433 if (!iova_len) 1434 return __finalise_sg(dev, sg, nents, 0); 1435 1436 iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); 1437 if (!iova) { 1438 ret = -ENOMEM; 1439 goto out_restore_sg; 1440 } 1441 1442 /* 1443 * We'll leave any physical concatenation to the IOMMU driver's 1444 * implementation - it knows better than we do. 1445 */ 1446 ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); 1447 if (ret < 0 || ret < iova_len) 1448 goto out_free_iova; 1449 1450 return __finalise_sg(dev, sg, nents, iova); 1451 1452 out_free_iova: 1453 iommu_dma_free_iova(cookie, iova, iova_len, NULL); 1454 out_restore_sg: 1455 __invalidate_sg(sg, nents); 1456 out: 1457 if (ret != -ENOMEM && ret != -EREMOTEIO) 1458 return -EINVAL; 1459 return ret; 1460 } 1461 1462 static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, 1463 int nents, enum dma_data_direction dir, unsigned long attrs) 1464 { 1465 dma_addr_t end = 0, start; 1466 struct scatterlist *tmp; 1467 int i; 1468 1469 if (sg_dma_is_swiotlb(sg)) { 1470 iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs); 1471 return; 1472 } 1473 1474 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 1475 iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); 1476 1477 /* 1478 * The scatterlist segments are mapped into a single 1479 * contiguous IOVA allocation, the start and end points 1480 * just have to be determined. 1481 */ 1482 for_each_sg(sg, tmp, nents, i) { 1483 if (sg_dma_is_bus_address(tmp)) { 1484 sg_dma_unmark_bus_address(tmp); 1485 continue; 1486 } 1487 1488 if (sg_dma_len(tmp) == 0) 1489 break; 1490 1491 start = sg_dma_address(tmp); 1492 break; 1493 } 1494 1495 nents -= i; 1496 for_each_sg(tmp, tmp, nents, i) { 1497 if (sg_dma_is_bus_address(tmp)) { 1498 sg_dma_unmark_bus_address(tmp); 1499 continue; 1500 } 1501 1502 if (sg_dma_len(tmp) == 0) 1503 break; 1504 1505 end = sg_dma_address(tmp) + sg_dma_len(tmp); 1506 } 1507 1508 if (end) 1509 __iommu_dma_unmap(dev, start, end - start); 1510 } 1511 1512 static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, 1513 size_t size, enum dma_data_direction dir, unsigned long attrs) 1514 { 1515 return __iommu_dma_map(dev, phys, size, 1516 dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO, 1517 dma_get_mask(dev)); 1518 } 1519 1520 static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, 1521 size_t size, enum dma_data_direction dir, unsigned long attrs) 1522 { 1523 __iommu_dma_unmap(dev, handle, size); 1524 } 1525 1526 static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) 1527 { 1528 size_t alloc_size = PAGE_ALIGN(size); 1529 int count = alloc_size >> PAGE_SHIFT; 1530 struct page *page = NULL, **pages = NULL; 1531 1532 /* Non-coherent atomic allocation? Easy */ 1533 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1534 dma_free_from_pool(dev, cpu_addr, alloc_size)) 1535 return; 1536 1537 if (is_vmalloc_addr(cpu_addr)) { 1538 /* 1539 * If it the address is remapped, then it's either non-coherent 1540 * or highmem CMA, or an iommu_dma_alloc_remap() construction. 1541 */ 1542 pages = dma_common_find_pages(cpu_addr); 1543 if (!pages) 1544 page = vmalloc_to_page(cpu_addr); 1545 dma_common_free_remap(cpu_addr, alloc_size); 1546 } else { 1547 /* Lowmem means a coherent atomic or CMA allocation */ 1548 page = virt_to_page(cpu_addr); 1549 } 1550 1551 if (pages) 1552 __iommu_dma_free_pages(pages, count); 1553 if (page) 1554 dma_free_contiguous(dev, page, alloc_size); 1555 } 1556 1557 static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, 1558 dma_addr_t handle, unsigned long attrs) 1559 { 1560 __iommu_dma_unmap(dev, handle, size); 1561 __iommu_dma_free(dev, size, cpu_addr); 1562 } 1563 1564 static void *iommu_dma_alloc_pages(struct device *dev, size_t size, 1565 struct page **pagep, gfp_t gfp, unsigned long attrs) 1566 { 1567 bool coherent = dev_is_dma_coherent(dev); 1568 size_t alloc_size = PAGE_ALIGN(size); 1569 int node = dev_to_node(dev); 1570 struct page *page = NULL; 1571 void *cpu_addr; 1572 1573 page = dma_alloc_contiguous(dev, alloc_size, gfp); 1574 if (!page) 1575 page = alloc_pages_node(node, gfp, get_order(alloc_size)); 1576 if (!page) 1577 return NULL; 1578 1579 if (!coherent || PageHighMem(page)) { 1580 pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); 1581 1582 cpu_addr = dma_common_contiguous_remap(page, alloc_size, 1583 prot, __builtin_return_address(0)); 1584 if (!cpu_addr) 1585 goto out_free_pages; 1586 1587 if (!coherent) 1588 arch_dma_prep_coherent(page, size); 1589 } else { 1590 cpu_addr = page_address(page); 1591 } 1592 1593 *pagep = page; 1594 memset(cpu_addr, 0, alloc_size); 1595 return cpu_addr; 1596 out_free_pages: 1597 dma_free_contiguous(dev, page, alloc_size); 1598 return NULL; 1599 } 1600 1601 static void *iommu_dma_alloc(struct device *dev, size_t size, 1602 dma_addr_t *handle, gfp_t gfp, unsigned long attrs) 1603 { 1604 bool coherent = dev_is_dma_coherent(dev); 1605 int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); 1606 struct page *page = NULL; 1607 void *cpu_addr; 1608 1609 gfp |= __GFP_ZERO; 1610 1611 if (gfpflags_allow_blocking(gfp) && 1612 !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { 1613 return iommu_dma_alloc_remap(dev, size, handle, gfp, 1614 dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); 1615 } 1616 1617 if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && 1618 !gfpflags_allow_blocking(gfp) && !coherent) 1619 page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr, 1620 gfp, NULL); 1621 else 1622 cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs); 1623 if (!cpu_addr) 1624 return NULL; 1625 1626 *handle = __iommu_dma_map(dev, page_to_phys(page), size, ioprot, 1627 dev->coherent_dma_mask); 1628 if (*handle == DMA_MAPPING_ERROR) { 1629 __iommu_dma_free(dev, size, cpu_addr); 1630 return NULL; 1631 } 1632 1633 return cpu_addr; 1634 } 1635 1636 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, 1637 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1638 unsigned long attrs) 1639 { 1640 unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; 1641 unsigned long pfn, off = vma->vm_pgoff; 1642 int ret; 1643 1644 vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); 1645 1646 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 1647 return ret; 1648 1649 if (off >= nr_pages || vma_pages(vma) > nr_pages - off) 1650 return -ENXIO; 1651 1652 if (is_vmalloc_addr(cpu_addr)) { 1653 struct page **pages = dma_common_find_pages(cpu_addr); 1654 1655 if (pages) 1656 return vm_map_pages(vma, pages, nr_pages); 1657 pfn = vmalloc_to_pfn(cpu_addr); 1658 } else { 1659 pfn = page_to_pfn(virt_to_page(cpu_addr)); 1660 } 1661 1662 return remap_pfn_range(vma, vma->vm_start, pfn + off, 1663 vma->vm_end - vma->vm_start, 1664 vma->vm_page_prot); 1665 } 1666 1667 static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, 1668 void *cpu_addr, dma_addr_t dma_addr, size_t size, 1669 unsigned long attrs) 1670 { 1671 struct page *page; 1672 int ret; 1673 1674 if (is_vmalloc_addr(cpu_addr)) { 1675 struct page **pages = dma_common_find_pages(cpu_addr); 1676 1677 if (pages) { 1678 return sg_alloc_table_from_pages(sgt, pages, 1679 PAGE_ALIGN(size) >> PAGE_SHIFT, 1680 0, size, GFP_KERNEL); 1681 } 1682 1683 page = vmalloc_to_page(cpu_addr); 1684 } else { 1685 page = virt_to_page(cpu_addr); 1686 } 1687 1688 ret = sg_alloc_table(sgt, 1, GFP_KERNEL); 1689 if (!ret) 1690 sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); 1691 return ret; 1692 } 1693 1694 static unsigned long iommu_dma_get_merge_boundary(struct device *dev) 1695 { 1696 struct iommu_domain *domain = iommu_get_dma_domain(dev); 1697 1698 return (1UL << __ffs(domain->pgsize_bitmap)) - 1; 1699 } 1700 1701 static size_t iommu_dma_opt_mapping_size(void) 1702 { 1703 return iova_rcache_range(); 1704 } 1705 1706 static const struct dma_map_ops iommu_dma_ops = { 1707 .flags = DMA_F_PCI_P2PDMA_SUPPORTED, 1708 .alloc = iommu_dma_alloc, 1709 .free = iommu_dma_free, 1710 .alloc_pages = dma_common_alloc_pages, 1711 .free_pages = dma_common_free_pages, 1712 .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, 1713 .free_noncontiguous = iommu_dma_free_noncontiguous, 1714 .mmap = iommu_dma_mmap, 1715 .get_sgtable = iommu_dma_get_sgtable, 1716 .map_page = iommu_dma_map_page, 1717 .unmap_page = iommu_dma_unmap_page, 1718 .map_sg = iommu_dma_map_sg, 1719 .unmap_sg = iommu_dma_unmap_sg, 1720 .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, 1721 .sync_single_for_device = iommu_dma_sync_single_for_device, 1722 .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, 1723 .sync_sg_for_device = iommu_dma_sync_sg_for_device, 1724 .map_resource = iommu_dma_map_resource, 1725 .unmap_resource = iommu_dma_unmap_resource, 1726 .get_merge_boundary = iommu_dma_get_merge_boundary, 1727 .opt_mapping_size = iommu_dma_opt_mapping_size, 1728 }; 1729 1730 /* 1731 * The IOMMU core code allocates the default DMA domain, which the underlying 1732 * IOMMU driver needs to support via the dma-iommu layer. 1733 */ 1734 void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) 1735 { 1736 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1737 1738 if (!domain) 1739 goto out_err; 1740 1741 /* 1742 * The IOMMU core code allocates the default DMA domain, which the 1743 * underlying IOMMU driver needs to support via the dma-iommu layer. 1744 */ 1745 if (iommu_is_dma_domain(domain)) { 1746 if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev)) 1747 goto out_err; 1748 dev->dma_ops = &iommu_dma_ops; 1749 } 1750 1751 return; 1752 out_err: 1753 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", 1754 dev_name(dev)); 1755 } 1756 EXPORT_SYMBOL_GPL(iommu_setup_dma_ops); 1757 1758 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, 1759 phys_addr_t msi_addr, struct iommu_domain *domain) 1760 { 1761 struct iommu_dma_cookie *cookie = domain->iova_cookie; 1762 struct iommu_dma_msi_page *msi_page; 1763 dma_addr_t iova; 1764 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; 1765 size_t size = cookie_msi_granule(cookie); 1766 1767 msi_addr &= ~(phys_addr_t)(size - 1); 1768 list_for_each_entry(msi_page, &cookie->msi_page_list, list) 1769 if (msi_page->phys == msi_addr) 1770 return msi_page; 1771 1772 msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL); 1773 if (!msi_page) 1774 return NULL; 1775 1776 iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); 1777 if (!iova) 1778 goto out_free_page; 1779 1780 if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL)) 1781 goto out_free_iova; 1782 1783 INIT_LIST_HEAD(&msi_page->list); 1784 msi_page->phys = msi_addr; 1785 msi_page->iova = iova; 1786 list_add(&msi_page->list, &cookie->msi_page_list); 1787 return msi_page; 1788 1789 out_free_iova: 1790 iommu_dma_free_iova(cookie, iova, size, NULL); 1791 out_free_page: 1792 kfree(msi_page); 1793 return NULL; 1794 } 1795 1796 /** 1797 * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain 1798 * @desc: MSI descriptor, will store the MSI page 1799 * @msi_addr: MSI target address to be mapped 1800 * 1801 * Return: 0 on success or negative error code if the mapping failed. 1802 */ 1803 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) 1804 { 1805 struct device *dev = msi_desc_to_dev(desc); 1806 struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1807 struct iommu_dma_msi_page *msi_page; 1808 static DEFINE_MUTEX(msi_prepare_lock); /* see below */ 1809 1810 if (!domain || !domain->iova_cookie) { 1811 desc->iommu_cookie = NULL; 1812 return 0; 1813 } 1814 1815 /* 1816 * In fact the whole prepare operation should already be serialised by 1817 * irq_domain_mutex further up the callchain, but that's pretty subtle 1818 * on its own, so consider this locking as failsafe documentation... 1819 */ 1820 mutex_lock(&msi_prepare_lock); 1821 msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); 1822 mutex_unlock(&msi_prepare_lock); 1823 1824 msi_desc_set_iommu_cookie(desc, msi_page); 1825 1826 if (!msi_page) 1827 return -ENOMEM; 1828 return 0; 1829 } 1830 1831 /** 1832 * iommu_dma_compose_msi_msg() - Apply translation to an MSI message 1833 * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() 1834 * @msg: MSI message containing target physical address 1835 */ 1836 void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) 1837 { 1838 struct device *dev = msi_desc_to_dev(desc); 1839 const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); 1840 const struct iommu_dma_msi_page *msi_page; 1841 1842 msi_page = msi_desc_get_iommu_cookie(desc); 1843 1844 if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) 1845 return; 1846 1847 msg->address_hi = upper_32_bits(msi_page->iova); 1848 msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; 1849 msg->address_lo += lower_32_bits(msi_page->iova); 1850 } 1851 1852 static int iommu_dma_init(void) 1853 { 1854 if (is_kdump_kernel()) 1855 static_branch_enable(&iommu_deferred_attach_enabled); 1856 1857 return iova_cache_get(); 1858 } 1859 arch_initcall(iommu_dma_init); 1860