1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/domainset.h> 37 #include <sys/malloc.h> 38 #include <sys/bus.h> 39 #include <sys/conf.h> 40 #include <sys/interrupt.h> 41 #include <sys/kernel.h> 42 #include <sys/ktr.h> 43 #include <sys/lock.h> 44 #include <sys/proc.h> 45 #include <sys/memdesc.h> 46 #include <sys/msan.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/rman.h> 50 #include <sys/taskqueue.h> 51 #include <sys/tree.h> 52 #include <sys/uio.h> 53 #include <sys/vmem.h> 54 #include <dev/pci/pcireg.h> 55 #include <dev/pci/pcivar.h> 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> 58 #include <vm/vm_kern.h> 59 #include <vm/vm_object.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_map.h> 62 #include <dev/iommu/iommu.h> 63 #include <machine/atomic.h> 64 #include <machine/bus.h> 65 #include <machine/md_var.h> 66 #include <machine/iommu.h> 67 #include <dev/iommu/busdma_iommu.h> 68 69 /* 70 * busdma_iommu.c, the implementation of the busdma(9) interface using 71 * IOMMU units from Intel VT-d. 72 */ 73 74 static bool 75 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 76 { 77 char str[128], *env; 78 int default_bounce; 79 bool ret; 80 static const char bounce_str[] = "bounce"; 81 static const char iommu_str[] = "iommu"; 82 static const char dmar_str[] = "dmar"; /* compatibility */ 83 84 default_bounce = 0; 85 env = kern_getenv("hw.busdma.default"); 86 if (env != NULL) { 87 if (strcmp(env, bounce_str) == 0) 88 default_bounce = 1; 89 else if (strcmp(env, iommu_str) == 0 || 90 strcmp(env, dmar_str) == 0) 91 default_bounce = 0; 92 freeenv(env); 93 } 94 95 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 96 domain, bus, slot, func); 97 env = kern_getenv(str); 98 if (env == NULL) 99 return (default_bounce != 0); 100 if (strcmp(env, bounce_str) == 0) 101 ret = true; 102 else if (strcmp(env, iommu_str) == 0 || 103 strcmp(env, dmar_str) == 0) 104 ret = false; 105 else 106 ret = default_bounce != 0; 107 freeenv(env); 108 return (ret); 109 } 110 111 /* 112 * Given original device, find the requester ID that will be seen by 113 * the IOMMU unit and used for page table lookup. PCI bridges may take 114 * ownership of transactions from downstream devices, so it may not be 115 * the same as the BSF of the target device. In those cases, all 116 * devices downstream of the bridge must share a single mapping 117 * domain, and must collectively be assigned to use either IOMMU or 118 * bounce mapping. 119 */ 120 device_t 121 iommu_get_requester(device_t dev, uint16_t *rid) 122 { 123 devclass_t pci_class; 124 device_t l, pci, pcib, pcip, pcibp, requester; 125 int cap_offset; 126 uint16_t pcie_flags; 127 bool bridge_is_pcie; 128 129 pci_class = devclass_find("pci"); 130 l = requester = dev; 131 132 *rid = pci_get_rid(dev); 133 134 /* 135 * Walk the bridge hierarchy from the target device to the 136 * host port to find the translating bridge nearest the IOMMU 137 * unit. 138 */ 139 for (;;) { 140 pci = device_get_parent(l); 141 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " 142 "for %s", device_get_name(dev), device_get_name(l))); 143 KASSERT(device_get_devclass(pci) == pci_class, 144 ("iommu_get_requester(%s): non-pci parent %s for %s", 145 device_get_name(dev), device_get_name(pci), 146 device_get_name(l))); 147 148 pcib = device_get_parent(pci); 149 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " 150 "for %s", device_get_name(dev), device_get_name(pci))); 151 152 /* 153 * The parent of our "bridge" isn't another PCI bus, 154 * so pcib isn't a PCI->PCI bridge but rather a host 155 * port, and the requester ID won't be translated 156 * further. 157 */ 158 pcip = device_get_parent(pcib); 159 if (device_get_devclass(pcip) != pci_class) 160 break; 161 pcibp = device_get_parent(pcip); 162 163 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 164 /* 165 * Do not stop the loop even if the target 166 * device is PCIe, because it is possible (but 167 * unlikely) to have a PCI->PCIe bridge 168 * somewhere in the hierarchy. 169 */ 170 l = pcib; 171 } else { 172 /* 173 * Device is not PCIe, it cannot be seen as a 174 * requester by IOMMU unit. Check whether the 175 * bridge is PCIe. 176 */ 177 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 178 &cap_offset) == 0; 179 requester = pcib; 180 181 /* 182 * Check for a buggy PCIe/PCI bridge that 183 * doesn't report the express capability. If 184 * the bridge above it is express but isn't a 185 * PCI bridge, then we know pcib is actually a 186 * PCIe/PCI bridge. 187 */ 188 if (!bridge_is_pcie && pci_find_cap(pcibp, 189 PCIY_EXPRESS, &cap_offset) == 0) { 190 pcie_flags = pci_read_config(pcibp, 191 cap_offset + PCIER_FLAGS, 2); 192 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 193 PCIEM_TYPE_PCI_BRIDGE) 194 bridge_is_pcie = true; 195 } 196 197 if (bridge_is_pcie) { 198 /* 199 * The current device is not PCIe, but 200 * the bridge above it is. This is a 201 * PCIe->PCI bridge. Assume that the 202 * requester ID will be the secondary 203 * bus number with slot and function 204 * set to zero. 205 * 206 * XXX: Doesn't handle the case where 207 * the bridge is PCIe->PCI-X, and the 208 * bridge will only take ownership of 209 * requests in some cases. We should 210 * provide context entries with the 211 * same page tables for taken and 212 * non-taken transactions. 213 */ 214 *rid = PCI_RID(pci_get_bus(l), 0, 0); 215 l = pcibp; 216 } else { 217 /* 218 * Neither the device nor the bridge 219 * above it are PCIe. This is a 220 * conventional PCI->PCI bridge, which 221 * will use the bridge's BSF as the 222 * requester ID. 223 */ 224 *rid = pci_get_rid(pcib); 225 l = pcib; 226 } 227 } 228 } 229 return (requester); 230 } 231 232 struct iommu_ctx * 233 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 234 { 235 device_t requester; 236 struct iommu_ctx *ctx; 237 bool disabled; 238 uint16_t rid; 239 240 requester = iommu_get_requester(dev, &rid); 241 242 /* 243 * If the user requested the IOMMU disabled for the device, we 244 * cannot disable the IOMMU unit, due to possibility of other 245 * devices on the same IOMMU unit still requiring translation. 246 * Instead provide the identity mapping for the device 247 * context. 248 */ 249 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 250 pci_get_bus(requester), pci_get_slot(requester), 251 pci_get_function(requester)); 252 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 253 if (ctx == NULL) 254 return (NULL); 255 if (disabled) { 256 /* 257 * Keep the first reference on context, release the 258 * later refs. 259 */ 260 IOMMU_LOCK(unit); 261 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 262 ctx->flags |= IOMMU_CTX_DISABLED; 263 IOMMU_UNLOCK(unit); 264 } else { 265 iommu_free_ctx_locked(unit, ctx); 266 } 267 ctx = NULL; 268 } 269 return (ctx); 270 } 271 272 struct iommu_ctx * 273 iommu_get_dev_ctx(device_t dev) 274 { 275 struct iommu_unit *unit; 276 277 unit = iommu_find(dev, bootverbose); 278 /* Not in scope of any IOMMU ? */ 279 if (unit == NULL) 280 return (NULL); 281 if (!unit->dma_enabled) 282 return (NULL); 283 284 #if defined(__amd64__) || defined(__i386__) 285 dmar_quirks_pre_use(unit); 286 dmar_instantiate_rmrr_ctxs(unit); 287 #endif 288 289 return (iommu_instantiate_ctx(unit, dev, false)); 290 } 291 292 bus_dma_tag_t 293 iommu_get_dma_tag(device_t dev, device_t child) 294 { 295 struct iommu_ctx *ctx; 296 bus_dma_tag_t res; 297 298 ctx = iommu_get_dev_ctx(child); 299 if (ctx == NULL) 300 return (NULL); 301 302 res = (bus_dma_tag_t)ctx->tag; 303 return (res); 304 } 305 306 bool 307 bus_dma_iommu_set_buswide(device_t dev) 308 { 309 struct iommu_unit *unit; 310 device_t parent; 311 u_int busno, slot, func; 312 313 parent = device_get_parent(dev); 314 if (device_get_devclass(parent) != devclass_find("pci")) 315 return (false); 316 unit = iommu_find(dev, bootverbose); 317 if (unit == NULL) 318 return (false); 319 busno = pci_get_bus(dev); 320 slot = pci_get_slot(dev); 321 func = pci_get_function(dev); 322 if (slot != 0 || func != 0) { 323 if (bootverbose) { 324 device_printf(dev, 325 "iommu%d pci%d:%d:%d requested buswide busdma\n", 326 unit->unit, busno, slot, func); 327 } 328 return (false); 329 } 330 iommu_set_buswide_ctx(unit, busno); 331 return (true); 332 } 333 334 void 335 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 336 { 337 338 MPASS(busno <= PCI_BUSMAX); 339 IOMMU_LOCK(unit); 340 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 341 1 << (busno % (NBBY * sizeof(uint32_t))); 342 IOMMU_UNLOCK(unit); 343 } 344 345 bool 346 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 347 { 348 349 MPASS(busno <= PCI_BUSMAX); 350 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 351 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 352 } 353 354 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 355 356 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 357 struct bus_dmamap_iommu *map); 358 359 static int 360 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 361 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 362 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 363 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 364 void *lockfuncarg, bus_dma_tag_t *dmat) 365 { 366 struct bus_dma_tag_iommu *newtag, *oldtag; 367 int error; 368 369 *dmat = NULL; 370 error = common_bus_dma_tag_create(parent != NULL ? 371 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 372 boundary, lowaddr, highaddr, filter, filterarg, maxsize, 373 nsegments, maxsegsz, flags, lockfunc, lockfuncarg, 374 sizeof(struct bus_dma_tag_iommu), (void **)&newtag); 375 if (error != 0) 376 goto out; 377 378 oldtag = (struct bus_dma_tag_iommu *)parent; 379 newtag->common.impl = &bus_dma_iommu_impl; 380 newtag->ctx = oldtag->ctx; 381 newtag->owner = oldtag->owner; 382 383 *dmat = (bus_dma_tag_t)newtag; 384 out: 385 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 386 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 387 error); 388 return (error); 389 } 390 391 static int 392 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 393 { 394 395 return (0); 396 } 397 398 static int 399 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 400 { 401 struct bus_dma_tag_iommu *dmat, *parent; 402 struct bus_dma_tag_iommu *dmat_copy __unused; 403 int error; 404 405 error = 0; 406 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1; 407 408 if (dmat != NULL) { 409 if (dmat->map_count != 0) { 410 error = EBUSY; 411 goto out; 412 } 413 while (dmat != NULL) { 414 parent = (struct bus_dma_tag_iommu *)dmat->common.parent; 415 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 416 1) { 417 if (dmat == dmat->ctx->tag) 418 iommu_free_ctx(dmat->ctx); 419 free(dmat->segments, M_IOMMU_DMAMAP); 420 free(dmat, M_DEVBUF); 421 dmat = parent; 422 } else 423 dmat = NULL; 424 } 425 } 426 out: 427 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); 428 return (error); 429 } 430 431 static bool 432 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 433 { 434 435 return (false); 436 } 437 438 static int 439 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 440 { 441 struct bus_dma_tag_iommu *tag; 442 struct bus_dmamap_iommu *map; 443 444 tag = (struct bus_dma_tag_iommu *)dmat; 445 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 446 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 447 if (map == NULL) { 448 *mapp = NULL; 449 return (ENOMEM); 450 } 451 if (tag->segments == NULL) { 452 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 453 tag->common.nsegments, M_IOMMU_DMAMAP, 454 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 455 if (tag->segments == NULL) { 456 free(map, M_IOMMU_DMAMAP); 457 *mapp = NULL; 458 return (ENOMEM); 459 } 460 } 461 TAILQ_INIT(&map->map_entries); 462 map->tag = tag; 463 map->locked = true; 464 map->cansleep = false; 465 tag->map_count++; 466 *mapp = (bus_dmamap_t)map; 467 468 return (0); 469 } 470 471 static int 472 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 473 { 474 struct bus_dma_tag_iommu *tag; 475 struct bus_dmamap_iommu *map; 476 struct iommu_domain *domain; 477 478 tag = (struct bus_dma_tag_iommu *)dmat; 479 map = (struct bus_dmamap_iommu *)map1; 480 if (map != NULL) { 481 domain = tag->ctx->domain; 482 IOMMU_DOMAIN_LOCK(domain); 483 if (!TAILQ_EMPTY(&map->map_entries)) { 484 IOMMU_DOMAIN_UNLOCK(domain); 485 return (EBUSY); 486 } 487 IOMMU_DOMAIN_UNLOCK(domain); 488 free(map, M_IOMMU_DMAMAP); 489 } 490 tag->map_count--; 491 return (0); 492 } 493 494 495 static int 496 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 497 bus_dmamap_t *mapp) 498 { 499 struct bus_dma_tag_iommu *tag; 500 struct bus_dmamap_iommu *map; 501 int error, mflags; 502 vm_memattr_t attr; 503 504 error = iommu_bus_dmamap_create(dmat, flags, mapp); 505 if (error != 0) 506 return (error); 507 508 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 509 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 510 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 511 VM_MEMATTR_DEFAULT; 512 513 tag = (struct bus_dma_tag_iommu *)dmat; 514 map = (struct bus_dmamap_iommu *)*mapp; 515 516 if (tag->common.maxsize < PAGE_SIZE && 517 tag->common.alignment <= tag->common.maxsize && 518 attr == VM_MEMATTR_DEFAULT) { 519 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 520 DOMAINSET_PREF(tag->common.domain), mflags); 521 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 522 } else { 523 *vaddr = (void *)kmem_alloc_attr_domainset( 524 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 525 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 526 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 527 } 528 if (*vaddr == NULL) { 529 iommu_bus_dmamap_destroy(dmat, *mapp); 530 *mapp = NULL; 531 return (ENOMEM); 532 } 533 return (0); 534 } 535 536 static void 537 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 538 { 539 struct bus_dma_tag_iommu *tag; 540 struct bus_dmamap_iommu *map; 541 542 tag = (struct bus_dma_tag_iommu *)dmat; 543 map = (struct bus_dmamap_iommu *)map1; 544 545 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 546 free(vaddr, M_DEVBUF); 547 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 548 } else { 549 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 550 ("iommu_bus_dmamem_free for non alloced map %p", map)); 551 kmem_free((vm_offset_t)vaddr, tag->common.maxsize); 552 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 553 } 554 555 iommu_bus_dmamap_destroy(dmat, map1); 556 } 557 558 static int 559 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 560 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 561 int flags, bus_dma_segment_t *segs, int *segp, 562 struct iommu_map_entries_tailq *unroll_list) 563 { 564 struct iommu_ctx *ctx; 565 struct iommu_domain *domain; 566 struct iommu_map_entry *entry; 567 iommu_gaddr_t size; 568 bus_size_t buflen1; 569 int error, idx, gas_flags, seg; 570 571 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 572 if (segs == NULL) 573 segs = tag->segments; 574 ctx = tag->ctx; 575 domain = ctx->domain; 576 seg = *segp; 577 error = 0; 578 idx = 0; 579 while (buflen > 0) { 580 seg++; 581 if (seg >= tag->common.nsegments) { 582 error = EFBIG; 583 break; 584 } 585 buflen1 = buflen > tag->common.maxsegsz ? 586 tag->common.maxsegsz : buflen; 587 size = round_page(offset + buflen1); 588 589 /* 590 * (Too) optimistically allow split if there are more 591 * then one segments left. 592 */ 593 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 594 if (seg + 1 < tag->common.nsegments) 595 gas_flags |= IOMMU_MF_CANSPLIT; 596 597 error = iommu_map(domain, &tag->common, size, offset, 598 IOMMU_MAP_ENTRY_READ | 599 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0), 600 gas_flags, ma + idx, &entry); 601 if (error != 0) 602 break; 603 if ((gas_flags & IOMMU_MF_CANSPLIT) != 0) { 604 KASSERT(size >= entry->end - entry->start, 605 ("split increased entry size %jx %jx %jx", 606 (uintmax_t)size, (uintmax_t)entry->start, 607 (uintmax_t)entry->end)); 608 size = entry->end - entry->start; 609 if (buflen1 > size) 610 buflen1 = size; 611 } else { 612 KASSERT(entry->end - entry->start == size, 613 ("no split allowed %jx %jx %jx", 614 (uintmax_t)size, (uintmax_t)entry->start, 615 (uintmax_t)entry->end)); 616 } 617 if (offset + buflen1 > size) 618 buflen1 = size - offset; 619 if (buflen1 > tag->common.maxsegsz) 620 buflen1 = tag->common.maxsegsz; 621 622 KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) 623 == 0, 624 ("alignment failed: ctx %p start 0x%jx offset %x " 625 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 626 (uintmax_t)tag->common.alignment)); 627 KASSERT(entry->end <= tag->common.lowaddr || 628 entry->start >= tag->common.highaddr, 629 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 630 "lowaddr 0x%jx highaddr 0x%jx", ctx, 631 (uintmax_t)entry->start, (uintmax_t)entry->end, 632 (uintmax_t)tag->common.lowaddr, 633 (uintmax_t)tag->common.highaddr)); 634 KASSERT(iommu_test_boundary(entry->start + offset, buflen1, 635 tag->common.boundary), 636 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 637 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 638 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 639 KASSERT(buflen1 <= tag->common.maxsegsz, 640 ("segment too large: ctx %p start 0x%jx end 0x%jx " 641 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 642 (uintmax_t)entry->start, (uintmax_t)entry->end, 643 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 644 645 IOMMU_DOMAIN_LOCK(domain); 646 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 647 entry->flags |= IOMMU_MAP_ENTRY_MAP; 648 IOMMU_DOMAIN_UNLOCK(domain); 649 TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); 650 651 segs[seg].ds_addr = entry->start + offset; 652 segs[seg].ds_len = buflen1; 653 654 idx += OFF_TO_IDX(trunc_page(offset + buflen1)); 655 offset += buflen1; 656 offset &= IOMMU_PAGE_MASK; 657 buflen -= buflen1; 658 } 659 if (error == 0) 660 *segp = seg; 661 return (error); 662 } 663 664 static int 665 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 666 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 667 int flags, bus_dma_segment_t *segs, int *segp) 668 { 669 struct iommu_ctx *ctx; 670 struct iommu_domain *domain; 671 struct iommu_map_entry *entry, *entry1; 672 struct iommu_map_entries_tailq unroll_list; 673 int error; 674 675 ctx = tag->ctx; 676 domain = ctx->domain; 677 atomic_add_long(&ctx->loads, 1); 678 679 TAILQ_INIT(&unroll_list); 680 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 681 buflen, flags, segs, segp, &unroll_list); 682 if (error != 0) { 683 /* 684 * The busdma interface does not allow us to report 685 * partial buffer load, so unfortunately we have to 686 * revert all work done. 687 */ 688 IOMMU_DOMAIN_LOCK(domain); 689 TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, 690 entry1) { 691 /* 692 * No entries other than what we have created 693 * during the failed run might have been 694 * inserted there in between, since we own ctx 695 * pglock. 696 */ 697 TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); 698 TAILQ_REMOVE(&unroll_list, entry, unroll_link); 699 TAILQ_INSERT_TAIL(&domain->unload_entries, entry, 700 dmamap_link); 701 } 702 IOMMU_DOMAIN_UNLOCK(domain); 703 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 704 &domain->unload_task); 705 } 706 707 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 708 !map->cansleep) 709 error = EINPROGRESS; 710 if (error == EINPROGRESS) 711 iommu_bus_schedule_dmamap(domain->iommu, map); 712 return (error); 713 } 714 715 static int 716 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 717 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 718 bus_dma_segment_t *segs, int *segp) 719 { 720 struct bus_dma_tag_iommu *tag; 721 struct bus_dmamap_iommu *map; 722 723 tag = (struct bus_dma_tag_iommu *)dmat; 724 map = (struct bus_dmamap_iommu *)map1; 725 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 726 flags, segs, segp)); 727 } 728 729 static int 730 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 731 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 732 int *segp) 733 { 734 struct bus_dma_tag_iommu *tag; 735 struct bus_dmamap_iommu *map; 736 vm_page_t *ma, fma; 737 vm_paddr_t pstart, pend, paddr; 738 int error, i, ma_cnt, mflags, offset; 739 740 tag = (struct bus_dma_tag_iommu *)dmat; 741 map = (struct bus_dmamap_iommu *)map1; 742 pstart = trunc_page(buf); 743 pend = round_page(buf + buflen); 744 offset = buf & PAGE_MASK; 745 ma_cnt = OFF_TO_IDX(pend - pstart); 746 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 747 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 748 if (ma == NULL) 749 return (ENOMEM); 750 fma = NULL; 751 for (i = 0; i < ma_cnt; i++) { 752 paddr = pstart + ptoa(i); 753 ma[i] = PHYS_TO_VM_PAGE(paddr); 754 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 755 /* 756 * If PHYS_TO_VM_PAGE() returned NULL or the 757 * vm_page was not initialized we'll use a 758 * fake page. 759 */ 760 if (fma == NULL) { 761 fma = malloc(sizeof(struct vm_page) * ma_cnt, 762 M_DEVBUF, M_ZERO | mflags); 763 if (fma == NULL) { 764 free(ma, M_DEVBUF); 765 return (ENOMEM); 766 } 767 } 768 vm_page_initfake(&fma[i], pstart + ptoa(i), 769 VM_MEMATTR_DEFAULT); 770 ma[i] = &fma[i]; 771 } 772 } 773 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 774 flags, segs, segp); 775 free(fma, M_DEVBUF); 776 free(ma, M_DEVBUF); 777 return (error); 778 } 779 780 static int 781 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 782 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 783 int *segp) 784 { 785 struct bus_dma_tag_iommu *tag; 786 struct bus_dmamap_iommu *map; 787 vm_page_t *ma, fma; 788 vm_paddr_t pstart, pend, paddr; 789 int error, i, ma_cnt, mflags, offset; 790 791 tag = (struct bus_dma_tag_iommu *)dmat; 792 map = (struct bus_dmamap_iommu *)map1; 793 pstart = trunc_page((vm_offset_t)buf); 794 pend = round_page((vm_offset_t)buf + buflen); 795 offset = (vm_offset_t)buf & PAGE_MASK; 796 ma_cnt = OFF_TO_IDX(pend - pstart); 797 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 798 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 799 if (ma == NULL) 800 return (ENOMEM); 801 fma = NULL; 802 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 803 if (pmap == kernel_pmap) 804 paddr = pmap_kextract(pstart); 805 else 806 paddr = pmap_extract(pmap, pstart); 807 ma[i] = PHYS_TO_VM_PAGE(paddr); 808 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 809 /* 810 * If PHYS_TO_VM_PAGE() returned NULL or the 811 * vm_page was not initialized we'll use a 812 * fake page. 813 */ 814 if (fma == NULL) { 815 fma = malloc(sizeof(struct vm_page) * ma_cnt, 816 M_DEVBUF, M_ZERO | mflags); 817 if (fma == NULL) { 818 free(ma, M_DEVBUF); 819 return (ENOMEM); 820 } 821 } 822 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 823 ma[i] = &fma[i]; 824 } 825 } 826 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 827 flags, segs, segp); 828 free(ma, M_DEVBUF); 829 free(fma, M_DEVBUF); 830 return (error); 831 } 832 833 static void 834 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 835 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 836 { 837 struct bus_dmamap_iommu *map; 838 839 if (map1 == NULL) 840 return; 841 map = (struct bus_dmamap_iommu *)map1; 842 map->mem = *mem; 843 map->tag = (struct bus_dma_tag_iommu *)dmat; 844 map->callback = callback; 845 map->callback_arg = callback_arg; 846 } 847 848 static bus_dma_segment_t * 849 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 850 bus_dma_segment_t *segs, int nsegs, int error) 851 { 852 struct bus_dma_tag_iommu *tag; 853 struct bus_dmamap_iommu *map; 854 855 tag = (struct bus_dma_tag_iommu *)dmat; 856 map = (struct bus_dmamap_iommu *)map1; 857 858 if (!map->locked) { 859 KASSERT(map->cansleep, 860 ("map not locked and not sleepable context %p", map)); 861 862 /* 863 * We are called from the delayed context. Relock the 864 * driver. 865 */ 866 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 867 map->locked = true; 868 } 869 870 if (segs == NULL) 871 segs = tag->segments; 872 return (segs); 873 } 874 875 /* 876 * The limitations of busdma KPI forces the iommu to perform the actual 877 * unload, consisting of the unmapping of the map entries page tables, 878 * from the delayed context on i386, since page table page mapping 879 * might require a sleep to be successfull. The unfortunate 880 * consequence is that the DMA requests can be served some time after 881 * the bus_dmamap_unload() call returned. 882 * 883 * On amd64, we assume that sf allocation cannot fail. 884 */ 885 static void 886 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 887 { 888 struct bus_dma_tag_iommu *tag; 889 struct bus_dmamap_iommu *map; 890 struct iommu_ctx *ctx; 891 struct iommu_domain *domain; 892 #ifndef IOMMU_DOMAIN_UNLOAD_SLEEP 893 struct iommu_map_entries_tailq entries; 894 #endif 895 896 tag = (struct bus_dma_tag_iommu *)dmat; 897 map = (struct bus_dmamap_iommu *)map1; 898 ctx = tag->ctx; 899 domain = ctx->domain; 900 atomic_add_long(&ctx->unloads, 1); 901 902 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) 903 IOMMU_DOMAIN_LOCK(domain); 904 TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link); 905 IOMMU_DOMAIN_UNLOCK(domain); 906 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 907 &domain->unload_task); 908 #else 909 TAILQ_INIT(&entries); 910 IOMMU_DOMAIN_LOCK(domain); 911 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 912 IOMMU_DOMAIN_UNLOCK(domain); 913 THREAD_NO_SLEEPING(); 914 iommu_domain_unload(domain, &entries, false); 915 THREAD_SLEEPING_OK(); 916 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 917 #endif 918 } 919 920 static void 921 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, 922 bus_dmasync_op_t op) 923 { 924 struct bus_dmamap_iommu *map __unused; 925 926 map = (struct bus_dmamap_iommu *)map1; 927 kmsan_bus_dmamap_sync(&map->kmsan_mem, op); 928 } 929 930 #ifdef KMSAN 931 static void 932 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) 933 { 934 struct bus_dmamap_iommu *map; 935 936 map = (struct bus_dmamap_iommu *)map1; 937 if (map == NULL) 938 return; 939 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); 940 } 941 #endif 942 943 struct bus_dma_impl bus_dma_iommu_impl = { 944 .tag_create = iommu_bus_dma_tag_create, 945 .tag_destroy = iommu_bus_dma_tag_destroy, 946 .tag_set_domain = iommu_bus_dma_tag_set_domain, 947 .id_mapped = iommu_bus_dma_id_mapped, 948 .map_create = iommu_bus_dmamap_create, 949 .map_destroy = iommu_bus_dmamap_destroy, 950 .mem_alloc = iommu_bus_dmamem_alloc, 951 .mem_free = iommu_bus_dmamem_free, 952 .load_phys = iommu_bus_dmamap_load_phys, 953 .load_buffer = iommu_bus_dmamap_load_buffer, 954 .load_ma = iommu_bus_dmamap_load_ma, 955 .map_waitok = iommu_bus_dmamap_waitok, 956 .map_complete = iommu_bus_dmamap_complete, 957 .map_unload = iommu_bus_dmamap_unload, 958 .map_sync = iommu_bus_dmamap_sync, 959 #ifdef KMSAN 960 .load_kmsan = iommu_bus_dmamap_load_kmsan, 961 #endif 962 }; 963 964 static void 965 iommu_bus_task_dmamap(void *arg, int pending) 966 { 967 struct bus_dma_tag_iommu *tag; 968 struct bus_dmamap_iommu *map; 969 struct iommu_unit *unit; 970 971 unit = arg; 972 IOMMU_LOCK(unit); 973 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 974 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 975 IOMMU_UNLOCK(unit); 976 tag = map->tag; 977 map->cansleep = true; 978 map->locked = false; 979 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 980 &map->mem, map->callback, map->callback_arg, 981 BUS_DMA_WAITOK); 982 map->cansleep = false; 983 if (map->locked) { 984 (tag->common.lockfunc)(tag->common.lockfuncarg, 985 BUS_DMA_UNLOCK); 986 } else 987 map->locked = true; 988 map->cansleep = false; 989 IOMMU_LOCK(unit); 990 } 991 IOMMU_UNLOCK(unit); 992 } 993 994 static void 995 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 996 { 997 998 map->locked = false; 999 IOMMU_LOCK(unit); 1000 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 1001 IOMMU_UNLOCK(unit); 1002 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 1003 } 1004 1005 int 1006 iommu_init_busdma(struct iommu_unit *unit) 1007 { 1008 int error; 1009 1010 unit->dma_enabled = 1; 1011 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 1012 if (error == 0) /* compatibility */ 1013 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 1014 TAILQ_INIT(&unit->delayed_maps); 1015 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 1016 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 1017 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 1018 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 1019 "iommu%d busdma taskq", unit->unit); 1020 return (0); 1021 } 1022 1023 void 1024 iommu_fini_busdma(struct iommu_unit *unit) 1025 { 1026 1027 if (unit->delayed_taskqueue == NULL) 1028 return; 1029 1030 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 1031 taskqueue_free(unit->delayed_taskqueue); 1032 unit->delayed_taskqueue = NULL; 1033 } 1034 1035 int 1036 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 1037 vm_paddr_t start, vm_size_t length, int flags) 1038 { 1039 struct bus_dma_tag_common *tc; 1040 struct bus_dma_tag_iommu *tag; 1041 struct bus_dmamap_iommu *map; 1042 struct iommu_ctx *ctx; 1043 struct iommu_domain *domain; 1044 struct iommu_map_entry *entry; 1045 vm_page_t *ma; 1046 vm_size_t i; 1047 int error; 1048 bool waitok; 1049 1050 MPASS((start & PAGE_MASK) == 0); 1051 MPASS((length & PAGE_MASK) == 0); 1052 MPASS(length > 0); 1053 MPASS(start + length >= start); 1054 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1055 1056 tc = (struct bus_dma_tag_common *)dmat; 1057 if (tc->impl != &bus_dma_iommu_impl) 1058 return (0); 1059 1060 tag = (struct bus_dma_tag_iommu *)dmat; 1061 ctx = tag->ctx; 1062 domain = ctx->domain; 1063 map = (struct bus_dmamap_iommu *)map1; 1064 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1065 1066 entry = iommu_map_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1067 if (entry == NULL) 1068 return (ENOMEM); 1069 entry->start = start; 1070 entry->end = start + length; 1071 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1072 M_WAITOK : M_NOWAIT); 1073 if (ma == NULL) { 1074 iommu_map_free_entry(domain, entry); 1075 return (ENOMEM); 1076 } 1077 for (i = 0; i < atop(length); i++) { 1078 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1079 VM_MEMATTR_DEFAULT); 1080 } 1081 error = iommu_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1082 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE), 1083 waitok ? IOMMU_MF_CANWAIT : 0, ma); 1084 if (error == 0) { 1085 IOMMU_DOMAIN_LOCK(domain); 1086 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1087 entry->flags |= IOMMU_MAP_ENTRY_MAP; 1088 IOMMU_DOMAIN_UNLOCK(domain); 1089 } else { 1090 iommu_domain_unload_entry(entry, true); 1091 } 1092 for (i = 0; i < atop(length); i++) 1093 vm_page_putfake(ma[i]); 1094 free(ma, M_TEMP); 1095 return (error); 1096 } 1097 1098 static void 1099 iommu_domain_unload_task(void *arg, int pending) 1100 { 1101 struct iommu_domain *domain; 1102 struct iommu_map_entries_tailq entries; 1103 1104 domain = arg; 1105 TAILQ_INIT(&entries); 1106 1107 for (;;) { 1108 IOMMU_DOMAIN_LOCK(domain); 1109 TAILQ_SWAP(&domain->unload_entries, &entries, 1110 iommu_map_entry, dmamap_link); 1111 IOMMU_DOMAIN_UNLOCK(domain); 1112 if (TAILQ_EMPTY(&entries)) 1113 break; 1114 iommu_domain_unload(domain, &entries, true); 1115 } 1116 } 1117 1118 void 1119 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, 1120 const struct iommu_domain_map_ops *ops) 1121 { 1122 1123 domain->ops = ops; 1124 domain->iommu = unit; 1125 1126 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); 1127 RB_INIT(&domain->rb_root); 1128 TAILQ_INIT(&domain->unload_entries); 1129 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); 1130 } 1131 1132 void 1133 iommu_domain_fini(struct iommu_domain *domain) 1134 { 1135 1136 mtx_destroy(&domain->lock); 1137 } 1138