1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/domainset.h> 34 #include <sys/malloc.h> 35 #include <sys/bus.h> 36 #include <sys/conf.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/memdesc.h> 43 #include <sys/msan.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_map.h> 59 #include <dev/iommu/iommu.h> 60 #include <machine/atomic.h> 61 #include <machine/bus.h> 62 #include <machine/md_var.h> 63 #include <machine/iommu.h> 64 #include <dev/iommu/busdma_iommu.h> 65 66 /* 67 * busdma_iommu.c, the implementation of the busdma(9) interface using 68 * IOMMU units from Intel VT-d. 69 */ 70 71 static bool 72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 73 { 74 char str[128], *env; 75 int default_bounce; 76 bool ret; 77 static const char bounce_str[] = "bounce"; 78 static const char iommu_str[] = "iommu"; 79 static const char dmar_str[] = "dmar"; /* compatibility */ 80 81 default_bounce = 0; 82 env = kern_getenv("hw.busdma.default"); 83 if (env != NULL) { 84 if (strcmp(env, bounce_str) == 0) 85 default_bounce = 1; 86 else if (strcmp(env, iommu_str) == 0 || 87 strcmp(env, dmar_str) == 0) 88 default_bounce = 0; 89 freeenv(env); 90 } 91 92 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 93 domain, bus, slot, func); 94 env = kern_getenv(str); 95 if (env == NULL) 96 return (default_bounce != 0); 97 if (strcmp(env, bounce_str) == 0) 98 ret = true; 99 else if (strcmp(env, iommu_str) == 0 || 100 strcmp(env, dmar_str) == 0) 101 ret = false; 102 else 103 ret = default_bounce != 0; 104 freeenv(env); 105 return (ret); 106 } 107 108 /* 109 * Given original device, find the requester ID that will be seen by 110 * the IOMMU unit and used for page table lookup. PCI bridges may take 111 * ownership of transactions from downstream devices, so it may not be 112 * the same as the BSF of the target device. In those cases, all 113 * devices downstream of the bridge must share a single mapping 114 * domain, and must collectively be assigned to use either IOMMU or 115 * bounce mapping. 116 */ 117 device_t 118 iommu_get_requester(device_t dev, uint16_t *rid) 119 { 120 devclass_t pci_class; 121 device_t l, pci, pcib, pcip, pcibp, requester; 122 int cap_offset; 123 uint16_t pcie_flags; 124 bool bridge_is_pcie; 125 126 pci_class = devclass_find("pci"); 127 l = requester = dev; 128 129 *rid = pci_get_rid(dev); 130 131 /* 132 * Walk the bridge hierarchy from the target device to the 133 * host port to find the translating bridge nearest the IOMMU 134 * unit. 135 */ 136 for (;;) { 137 pci = device_get_parent(l); 138 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " 139 "for %s", device_get_name(dev), device_get_name(l))); 140 KASSERT(device_get_devclass(pci) == pci_class, 141 ("iommu_get_requester(%s): non-pci parent %s for %s", 142 device_get_name(dev), device_get_name(pci), 143 device_get_name(l))); 144 145 pcib = device_get_parent(pci); 146 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " 147 "for %s", device_get_name(dev), device_get_name(pci))); 148 149 /* 150 * The parent of our "bridge" isn't another PCI bus, 151 * so pcib isn't a PCI->PCI bridge but rather a host 152 * port, and the requester ID won't be translated 153 * further. 154 */ 155 pcip = device_get_parent(pcib); 156 if (device_get_devclass(pcip) != pci_class) 157 break; 158 pcibp = device_get_parent(pcip); 159 160 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 161 /* 162 * Do not stop the loop even if the target 163 * device is PCIe, because it is possible (but 164 * unlikely) to have a PCI->PCIe bridge 165 * somewhere in the hierarchy. 166 */ 167 l = pcib; 168 } else { 169 /* 170 * Device is not PCIe, it cannot be seen as a 171 * requester by IOMMU unit. Check whether the 172 * bridge is PCIe. 173 */ 174 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 175 &cap_offset) == 0; 176 requester = pcib; 177 178 /* 179 * Check for a buggy PCIe/PCI bridge that 180 * doesn't report the express capability. If 181 * the bridge above it is express but isn't a 182 * PCI bridge, then we know pcib is actually a 183 * PCIe/PCI bridge. 184 */ 185 if (!bridge_is_pcie && pci_find_cap(pcibp, 186 PCIY_EXPRESS, &cap_offset) == 0) { 187 pcie_flags = pci_read_config(pcibp, 188 cap_offset + PCIER_FLAGS, 2); 189 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 190 PCIEM_TYPE_PCI_BRIDGE) 191 bridge_is_pcie = true; 192 } 193 194 if (bridge_is_pcie) { 195 /* 196 * The current device is not PCIe, but 197 * the bridge above it is. This is a 198 * PCIe->PCI bridge. Assume that the 199 * requester ID will be the secondary 200 * bus number with slot and function 201 * set to zero. 202 * 203 * XXX: Doesn't handle the case where 204 * the bridge is PCIe->PCI-X, and the 205 * bridge will only take ownership of 206 * requests in some cases. We should 207 * provide context entries with the 208 * same page tables for taken and 209 * non-taken transactions. 210 */ 211 *rid = PCI_RID(pci_get_bus(l), 0, 0); 212 l = pcibp; 213 } else { 214 /* 215 * Neither the device nor the bridge 216 * above it are PCIe. This is a 217 * conventional PCI->PCI bridge, which 218 * will use the bridge's BSF as the 219 * requester ID. 220 */ 221 *rid = pci_get_rid(pcib); 222 l = pcib; 223 } 224 } 225 } 226 return (requester); 227 } 228 229 struct iommu_ctx * 230 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 231 { 232 device_t requester; 233 struct iommu_ctx *ctx; 234 bool disabled; 235 uint16_t rid; 236 237 requester = iommu_get_requester(dev, &rid); 238 239 /* 240 * If the user requested the IOMMU disabled for the device, we 241 * cannot disable the IOMMU unit, due to possibility of other 242 * devices on the same IOMMU unit still requiring translation. 243 * Instead provide the identity mapping for the device 244 * context. 245 */ 246 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 247 pci_get_bus(requester), pci_get_slot(requester), 248 pci_get_function(requester)); 249 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 250 if (ctx == NULL) 251 return (NULL); 252 if (disabled) { 253 /* 254 * Keep the first reference on context, release the 255 * later refs. 256 */ 257 IOMMU_LOCK(unit); 258 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 259 ctx->flags |= IOMMU_CTX_DISABLED; 260 IOMMU_UNLOCK(unit); 261 } else { 262 iommu_free_ctx_locked(unit, ctx); 263 } 264 ctx = NULL; 265 } 266 return (ctx); 267 } 268 269 struct iommu_ctx * 270 iommu_get_dev_ctx(device_t dev) 271 { 272 struct iommu_unit *unit; 273 274 unit = iommu_find(dev, bootverbose); 275 /* Not in scope of any IOMMU ? */ 276 if (unit == NULL) 277 return (NULL); 278 if (!unit->dma_enabled) 279 return (NULL); 280 281 #if defined(__amd64__) || defined(__i386__) 282 dmar_quirks_pre_use(unit); 283 dmar_instantiate_rmrr_ctxs(unit); 284 #endif 285 286 return (iommu_instantiate_ctx(unit, dev, false)); 287 } 288 289 bus_dma_tag_t 290 iommu_get_dma_tag(device_t dev, device_t child) 291 { 292 struct iommu_ctx *ctx; 293 bus_dma_tag_t res; 294 295 ctx = iommu_get_dev_ctx(child); 296 if (ctx == NULL) 297 return (NULL); 298 299 res = (bus_dma_tag_t)ctx->tag; 300 return (res); 301 } 302 303 bool 304 bus_dma_iommu_set_buswide(device_t dev) 305 { 306 struct iommu_unit *unit; 307 device_t parent; 308 u_int busno, slot, func; 309 310 parent = device_get_parent(dev); 311 if (device_get_devclass(parent) != devclass_find("pci")) 312 return (false); 313 unit = iommu_find(dev, bootverbose); 314 if (unit == NULL) 315 return (false); 316 busno = pci_get_bus(dev); 317 slot = pci_get_slot(dev); 318 func = pci_get_function(dev); 319 if (slot != 0 || func != 0) { 320 if (bootverbose) { 321 device_printf(dev, 322 "iommu%d pci%d:%d:%d requested buswide busdma\n", 323 unit->unit, busno, slot, func); 324 } 325 return (false); 326 } 327 iommu_set_buswide_ctx(unit, busno); 328 return (true); 329 } 330 331 void 332 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 333 { 334 335 MPASS(busno <= PCI_BUSMAX); 336 IOMMU_LOCK(unit); 337 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 338 1 << (busno % (NBBY * sizeof(uint32_t))); 339 IOMMU_UNLOCK(unit); 340 } 341 342 bool 343 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 344 { 345 346 MPASS(busno <= PCI_BUSMAX); 347 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 348 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 349 } 350 351 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 352 353 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 354 struct bus_dmamap_iommu *map); 355 356 static int 357 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 358 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 359 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 360 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 361 void *lockfuncarg, bus_dma_tag_t *dmat) 362 { 363 struct bus_dma_tag_iommu *newtag, *oldtag; 364 int error; 365 366 *dmat = NULL; 367 error = common_bus_dma_tag_create(parent != NULL ? 368 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 369 boundary, lowaddr, highaddr, filter, filterarg, maxsize, 370 nsegments, maxsegsz, flags, lockfunc, lockfuncarg, 371 sizeof(struct bus_dma_tag_iommu), (void **)&newtag); 372 if (error != 0) 373 goto out; 374 375 oldtag = (struct bus_dma_tag_iommu *)parent; 376 newtag->common.impl = &bus_dma_iommu_impl; 377 newtag->ctx = oldtag->ctx; 378 newtag->owner = oldtag->owner; 379 380 *dmat = (bus_dma_tag_t)newtag; 381 out: 382 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 383 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 384 error); 385 return (error); 386 } 387 388 static int 389 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 390 { 391 392 return (0); 393 } 394 395 static int 396 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 397 { 398 struct bus_dma_tag_iommu *dmat, *parent; 399 struct bus_dma_tag_iommu *dmat_copy __unused; 400 int error; 401 402 error = 0; 403 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1; 404 405 if (dmat != NULL) { 406 if (dmat->map_count != 0) { 407 error = EBUSY; 408 goto out; 409 } 410 while (dmat != NULL) { 411 parent = (struct bus_dma_tag_iommu *)dmat->common.parent; 412 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 413 1) { 414 if (dmat == dmat->ctx->tag) 415 iommu_free_ctx(dmat->ctx); 416 free(dmat->segments, M_IOMMU_DMAMAP); 417 free(dmat, M_DEVBUF); 418 dmat = parent; 419 } else 420 dmat = NULL; 421 } 422 } 423 out: 424 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); 425 return (error); 426 } 427 428 static bool 429 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 430 { 431 432 return (false); 433 } 434 435 static int 436 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 437 { 438 struct bus_dma_tag_iommu *tag; 439 struct bus_dmamap_iommu *map; 440 441 tag = (struct bus_dma_tag_iommu *)dmat; 442 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 443 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 444 if (map == NULL) { 445 *mapp = NULL; 446 return (ENOMEM); 447 } 448 if (tag->segments == NULL) { 449 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 450 tag->common.nsegments, M_IOMMU_DMAMAP, 451 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 452 if (tag->segments == NULL) { 453 free(map, M_IOMMU_DMAMAP); 454 *mapp = NULL; 455 return (ENOMEM); 456 } 457 } 458 IOMMU_DMAMAP_INIT(map); 459 TAILQ_INIT(&map->map_entries); 460 map->tag = tag; 461 map->locked = true; 462 map->cansleep = false; 463 tag->map_count++; 464 *mapp = (bus_dmamap_t)map; 465 466 return (0); 467 } 468 469 static int 470 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 471 { 472 struct bus_dma_tag_iommu *tag; 473 struct bus_dmamap_iommu *map; 474 475 tag = (struct bus_dma_tag_iommu *)dmat; 476 map = (struct bus_dmamap_iommu *)map1; 477 if (map != NULL) { 478 IOMMU_DMAMAP_LOCK(map); 479 if (!TAILQ_EMPTY(&map->map_entries)) { 480 IOMMU_DMAMAP_UNLOCK(map); 481 return (EBUSY); 482 } 483 IOMMU_DMAMAP_DESTROY(map); 484 free(map, M_IOMMU_DMAMAP); 485 } 486 tag->map_count--; 487 return (0); 488 } 489 490 491 static int 492 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 493 bus_dmamap_t *mapp) 494 { 495 struct bus_dma_tag_iommu *tag; 496 struct bus_dmamap_iommu *map; 497 int error, mflags; 498 vm_memattr_t attr; 499 500 error = iommu_bus_dmamap_create(dmat, flags, mapp); 501 if (error != 0) 502 return (error); 503 504 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 505 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 506 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 507 VM_MEMATTR_DEFAULT; 508 509 tag = (struct bus_dma_tag_iommu *)dmat; 510 map = (struct bus_dmamap_iommu *)*mapp; 511 512 if (tag->common.maxsize < PAGE_SIZE && 513 tag->common.alignment <= tag->common.maxsize && 514 attr == VM_MEMATTR_DEFAULT) { 515 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 516 DOMAINSET_PREF(tag->common.domain), mflags); 517 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 518 } else { 519 *vaddr = kmem_alloc_attr_domainset( 520 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 521 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 522 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 523 } 524 if (*vaddr == NULL) { 525 iommu_bus_dmamap_destroy(dmat, *mapp); 526 *mapp = NULL; 527 return (ENOMEM); 528 } 529 return (0); 530 } 531 532 static void 533 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 534 { 535 struct bus_dma_tag_iommu *tag; 536 struct bus_dmamap_iommu *map; 537 538 tag = (struct bus_dma_tag_iommu *)dmat; 539 map = (struct bus_dmamap_iommu *)map1; 540 541 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 542 free(vaddr, M_DEVBUF); 543 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 544 } else { 545 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 546 ("iommu_bus_dmamem_free for non alloced map %p", map)); 547 kmem_free(vaddr, tag->common.maxsize); 548 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 549 } 550 551 iommu_bus_dmamap_destroy(dmat, map1); 552 } 553 554 static int 555 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 556 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 557 int flags, bus_dma_segment_t *segs, int *segp, 558 struct iommu_map_entries_tailq *entries) 559 { 560 struct iommu_ctx *ctx; 561 struct iommu_domain *domain; 562 struct iommu_map_entry *entry; 563 bus_size_t buflen1; 564 int error, e_flags, idx, gas_flags, seg; 565 566 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 567 if (segs == NULL) 568 segs = tag->segments; 569 ctx = tag->ctx; 570 domain = ctx->domain; 571 e_flags = IOMMU_MAP_ENTRY_READ | 572 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0); 573 seg = *segp; 574 error = 0; 575 idx = 0; 576 while (buflen > 0) { 577 seg++; 578 if (seg >= tag->common.nsegments) { 579 error = EFBIG; 580 break; 581 } 582 buflen1 = buflen > tag->common.maxsegsz ? 583 tag->common.maxsegsz : buflen; 584 585 /* 586 * (Too) optimistically allow split if there are more 587 * then one segments left. 588 */ 589 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 590 if (seg + 1 < tag->common.nsegments) 591 gas_flags |= IOMMU_MF_CANSPLIT; 592 593 error = iommu_gas_map(domain, &tag->common, buflen1, 594 offset, e_flags, gas_flags, ma + idx, &entry); 595 if (error != 0) 596 break; 597 /* Update buflen1 in case buffer split. */ 598 if (buflen1 > entry->end - entry->start - offset) 599 buflen1 = entry->end - entry->start - offset; 600 601 KASSERT(vm_addr_align_ok(entry->start + offset, 602 tag->common.alignment), 603 ("alignment failed: ctx %p start 0x%jx offset %x " 604 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 605 (uintmax_t)tag->common.alignment)); 606 KASSERT(entry->end <= tag->common.lowaddr || 607 entry->start >= tag->common.highaddr, 608 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 609 "lowaddr 0x%jx highaddr 0x%jx", ctx, 610 (uintmax_t)entry->start, (uintmax_t)entry->end, 611 (uintmax_t)tag->common.lowaddr, 612 (uintmax_t)tag->common.highaddr)); 613 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, 614 tag->common.boundary), 615 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 616 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 617 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 618 KASSERT(buflen1 <= tag->common.maxsegsz, 619 ("segment too large: ctx %p start 0x%jx end 0x%jx " 620 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 621 (uintmax_t)entry->start, (uintmax_t)entry->end, 622 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 623 624 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 625 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); 626 TAILQ_INSERT_TAIL(entries, entry, dmamap_link); 627 628 segs[seg].ds_addr = entry->start + offset; 629 segs[seg].ds_len = buflen1; 630 631 idx += OFF_TO_IDX(offset + buflen1); 632 offset += buflen1; 633 offset &= IOMMU_PAGE_MASK; 634 buflen -= buflen1; 635 } 636 if (error == 0) 637 *segp = seg; 638 return (error); 639 } 640 641 static int 642 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 643 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 644 int flags, bus_dma_segment_t *segs, int *segp) 645 { 646 struct iommu_ctx *ctx; 647 struct iommu_domain *domain; 648 struct iommu_map_entries_tailq entries; 649 int error; 650 651 ctx = tag->ctx; 652 domain = ctx->domain; 653 atomic_add_long(&ctx->loads, 1); 654 655 TAILQ_INIT(&entries); 656 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 657 buflen, flags, segs, segp, &entries); 658 if (error == 0) { 659 IOMMU_DMAMAP_LOCK(map); 660 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link); 661 IOMMU_DMAMAP_UNLOCK(map); 662 } else if (!TAILQ_EMPTY(&entries)) { 663 /* 664 * The busdma interface does not allow us to report 665 * partial buffer load, so unfortunately we have to 666 * revert all work done. 667 */ 668 IOMMU_DOMAIN_LOCK(domain); 669 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 670 IOMMU_DOMAIN_UNLOCK(domain); 671 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 672 &domain->unload_task); 673 } 674 675 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 676 !map->cansleep) 677 error = EINPROGRESS; 678 if (error == EINPROGRESS) 679 iommu_bus_schedule_dmamap(domain->iommu, map); 680 return (error); 681 } 682 683 static int 684 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 685 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 686 bus_dma_segment_t *segs, int *segp) 687 { 688 struct bus_dma_tag_iommu *tag; 689 struct bus_dmamap_iommu *map; 690 691 tag = (struct bus_dma_tag_iommu *)dmat; 692 map = (struct bus_dmamap_iommu *)map1; 693 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 694 flags, segs, segp)); 695 } 696 697 static int 698 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 699 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 700 int *segp) 701 { 702 struct bus_dma_tag_iommu *tag; 703 struct bus_dmamap_iommu *map; 704 vm_page_t *ma, fma; 705 vm_paddr_t pstart, pend, paddr; 706 int error, i, ma_cnt, mflags, offset; 707 708 tag = (struct bus_dma_tag_iommu *)dmat; 709 map = (struct bus_dmamap_iommu *)map1; 710 pstart = trunc_page(buf); 711 pend = round_page(buf + buflen); 712 offset = buf & PAGE_MASK; 713 ma_cnt = OFF_TO_IDX(pend - pstart); 714 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 715 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 716 if (ma == NULL) 717 return (ENOMEM); 718 fma = NULL; 719 for (i = 0; i < ma_cnt; i++) { 720 paddr = pstart + ptoa(i); 721 ma[i] = PHYS_TO_VM_PAGE(paddr); 722 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 723 /* 724 * If PHYS_TO_VM_PAGE() returned NULL or the 725 * vm_page was not initialized we'll use a 726 * fake page. 727 */ 728 if (fma == NULL) { 729 fma = malloc(sizeof(struct vm_page) * ma_cnt, 730 M_DEVBUF, M_ZERO | mflags); 731 if (fma == NULL) { 732 free(ma, M_DEVBUF); 733 return (ENOMEM); 734 } 735 } 736 vm_page_initfake(&fma[i], pstart + ptoa(i), 737 VM_MEMATTR_DEFAULT); 738 ma[i] = &fma[i]; 739 } 740 } 741 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 742 flags, segs, segp); 743 free(fma, M_DEVBUF); 744 free(ma, M_DEVBUF); 745 return (error); 746 } 747 748 static int 749 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 750 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 751 int *segp) 752 { 753 struct bus_dma_tag_iommu *tag; 754 struct bus_dmamap_iommu *map; 755 vm_page_t *ma, fma; 756 vm_paddr_t pstart, pend, paddr; 757 int error, i, ma_cnt, mflags, offset; 758 759 tag = (struct bus_dma_tag_iommu *)dmat; 760 map = (struct bus_dmamap_iommu *)map1; 761 pstart = trunc_page((vm_offset_t)buf); 762 pend = round_page((vm_offset_t)buf + buflen); 763 offset = (vm_offset_t)buf & PAGE_MASK; 764 ma_cnt = OFF_TO_IDX(pend - pstart); 765 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 766 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 767 if (ma == NULL) 768 return (ENOMEM); 769 fma = NULL; 770 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 771 if (pmap == kernel_pmap) 772 paddr = pmap_kextract(pstart); 773 else 774 paddr = pmap_extract(pmap, pstart); 775 ma[i] = PHYS_TO_VM_PAGE(paddr); 776 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 777 /* 778 * If PHYS_TO_VM_PAGE() returned NULL or the 779 * vm_page was not initialized we'll use a 780 * fake page. 781 */ 782 if (fma == NULL) { 783 fma = malloc(sizeof(struct vm_page) * ma_cnt, 784 M_DEVBUF, M_ZERO | mflags); 785 if (fma == NULL) { 786 free(ma, M_DEVBUF); 787 return (ENOMEM); 788 } 789 } 790 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 791 ma[i] = &fma[i]; 792 } 793 } 794 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 795 flags, segs, segp); 796 free(ma, M_DEVBUF); 797 free(fma, M_DEVBUF); 798 return (error); 799 } 800 801 static void 802 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 803 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 804 { 805 struct bus_dmamap_iommu *map; 806 807 if (map1 == NULL) 808 return; 809 map = (struct bus_dmamap_iommu *)map1; 810 map->mem = *mem; 811 map->tag = (struct bus_dma_tag_iommu *)dmat; 812 map->callback = callback; 813 map->callback_arg = callback_arg; 814 } 815 816 static bus_dma_segment_t * 817 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 818 bus_dma_segment_t *segs, int nsegs, int error) 819 { 820 struct bus_dma_tag_iommu *tag; 821 struct bus_dmamap_iommu *map; 822 823 tag = (struct bus_dma_tag_iommu *)dmat; 824 map = (struct bus_dmamap_iommu *)map1; 825 826 if (!map->locked) { 827 KASSERT(map->cansleep, 828 ("map not locked and not sleepable context %p", map)); 829 830 /* 831 * We are called from the delayed context. Relock the 832 * driver. 833 */ 834 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 835 map->locked = true; 836 } 837 838 if (segs == NULL) 839 segs = tag->segments; 840 return (segs); 841 } 842 843 /* 844 * The limitations of busdma KPI forces the iommu to perform the actual 845 * unload, consisting of the unmapping of the map entries page tables, 846 * from the delayed context on i386, since page table page mapping 847 * might require a sleep to be successfull. The unfortunate 848 * consequence is that the DMA requests can be served some time after 849 * the bus_dmamap_unload() call returned. 850 * 851 * On amd64, we assume that sf allocation cannot fail. 852 */ 853 static void 854 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 855 { 856 struct bus_dma_tag_iommu *tag; 857 struct bus_dmamap_iommu *map; 858 struct iommu_ctx *ctx; 859 struct iommu_domain *domain; 860 struct iommu_map_entries_tailq entries; 861 862 tag = (struct bus_dma_tag_iommu *)dmat; 863 map = (struct bus_dmamap_iommu *)map1; 864 ctx = tag->ctx; 865 domain = ctx->domain; 866 atomic_add_long(&ctx->unloads, 1); 867 868 TAILQ_INIT(&entries); 869 IOMMU_DMAMAP_LOCK(map); 870 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 871 IOMMU_DMAMAP_UNLOCK(map); 872 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) 873 IOMMU_DOMAIN_LOCK(domain); 874 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 875 IOMMU_DOMAIN_UNLOCK(domain); 876 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 877 &domain->unload_task); 878 #else 879 THREAD_NO_SLEEPING(); 880 iommu_domain_unload(domain, &entries, false); 881 THREAD_SLEEPING_OK(); 882 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 883 #endif 884 } 885 886 static void 887 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, 888 bus_dmasync_op_t op) 889 { 890 struct bus_dmamap_iommu *map __unused; 891 892 map = (struct bus_dmamap_iommu *)map1; 893 kmsan_bus_dmamap_sync(&map->kmsan_mem, op); 894 } 895 896 #ifdef KMSAN 897 static void 898 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) 899 { 900 struct bus_dmamap_iommu *map; 901 902 map = (struct bus_dmamap_iommu *)map1; 903 if (map == NULL) 904 return; 905 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); 906 } 907 #endif 908 909 struct bus_dma_impl bus_dma_iommu_impl = { 910 .tag_create = iommu_bus_dma_tag_create, 911 .tag_destroy = iommu_bus_dma_tag_destroy, 912 .tag_set_domain = iommu_bus_dma_tag_set_domain, 913 .id_mapped = iommu_bus_dma_id_mapped, 914 .map_create = iommu_bus_dmamap_create, 915 .map_destroy = iommu_bus_dmamap_destroy, 916 .mem_alloc = iommu_bus_dmamem_alloc, 917 .mem_free = iommu_bus_dmamem_free, 918 .load_phys = iommu_bus_dmamap_load_phys, 919 .load_buffer = iommu_bus_dmamap_load_buffer, 920 .load_ma = iommu_bus_dmamap_load_ma, 921 .map_waitok = iommu_bus_dmamap_waitok, 922 .map_complete = iommu_bus_dmamap_complete, 923 .map_unload = iommu_bus_dmamap_unload, 924 .map_sync = iommu_bus_dmamap_sync, 925 #ifdef KMSAN 926 .load_kmsan = iommu_bus_dmamap_load_kmsan, 927 #endif 928 }; 929 930 static void 931 iommu_bus_task_dmamap(void *arg, int pending) 932 { 933 struct bus_dma_tag_iommu *tag; 934 struct bus_dmamap_iommu *map; 935 struct iommu_unit *unit; 936 937 unit = arg; 938 IOMMU_LOCK(unit); 939 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 940 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 941 IOMMU_UNLOCK(unit); 942 tag = map->tag; 943 map->cansleep = true; 944 map->locked = false; 945 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 946 &map->mem, map->callback, map->callback_arg, 947 BUS_DMA_WAITOK); 948 map->cansleep = false; 949 if (map->locked) { 950 (tag->common.lockfunc)(tag->common.lockfuncarg, 951 BUS_DMA_UNLOCK); 952 } else 953 map->locked = true; 954 map->cansleep = false; 955 IOMMU_LOCK(unit); 956 } 957 IOMMU_UNLOCK(unit); 958 } 959 960 static void 961 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 962 { 963 964 map->locked = false; 965 IOMMU_LOCK(unit); 966 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 967 IOMMU_UNLOCK(unit); 968 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 969 } 970 971 int 972 iommu_init_busdma(struct iommu_unit *unit) 973 { 974 int error; 975 976 unit->dma_enabled = 1; 977 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 978 if (error == 0) /* compatibility */ 979 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 980 TAILQ_INIT(&unit->delayed_maps); 981 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 982 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 983 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 984 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 985 "iommu%d busdma taskq", unit->unit); 986 return (0); 987 } 988 989 void 990 iommu_fini_busdma(struct iommu_unit *unit) 991 { 992 993 if (unit->delayed_taskqueue == NULL) 994 return; 995 996 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 997 taskqueue_free(unit->delayed_taskqueue); 998 unit->delayed_taskqueue = NULL; 999 } 1000 1001 int 1002 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 1003 vm_paddr_t start, vm_size_t length, int flags) 1004 { 1005 struct bus_dma_tag_common *tc; 1006 struct bus_dma_tag_iommu *tag; 1007 struct bus_dmamap_iommu *map; 1008 struct iommu_ctx *ctx; 1009 struct iommu_domain *domain; 1010 struct iommu_map_entry *entry; 1011 vm_page_t *ma; 1012 vm_size_t i; 1013 int error; 1014 bool waitok; 1015 1016 MPASS((start & PAGE_MASK) == 0); 1017 MPASS((length & PAGE_MASK) == 0); 1018 MPASS(length > 0); 1019 MPASS(start + length >= start); 1020 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1021 1022 tc = (struct bus_dma_tag_common *)dmat; 1023 if (tc->impl != &bus_dma_iommu_impl) 1024 return (0); 1025 1026 tag = (struct bus_dma_tag_iommu *)dmat; 1027 ctx = tag->ctx; 1028 domain = ctx->domain; 1029 map = (struct bus_dmamap_iommu *)map1; 1030 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1031 1032 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1033 if (entry == NULL) 1034 return (ENOMEM); 1035 entry->start = start; 1036 entry->end = start + length; 1037 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1038 M_WAITOK : M_NOWAIT); 1039 if (ma == NULL) { 1040 iommu_gas_free_entry(entry); 1041 return (ENOMEM); 1042 } 1043 for (i = 0; i < atop(length); i++) { 1044 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1045 VM_MEMATTR_DEFAULT); 1046 } 1047 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1048 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) | 1049 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma); 1050 if (error == 0) { 1051 IOMMU_DMAMAP_LOCK(map); 1052 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1053 IOMMU_DMAMAP_UNLOCK(map); 1054 } else { 1055 iommu_gas_free_entry(entry); 1056 } 1057 for (i = 0; i < atop(length); i++) 1058 vm_page_putfake(ma[i]); 1059 free(ma, M_TEMP); 1060 return (error); 1061 } 1062 1063 static void 1064 iommu_domain_unload_task(void *arg, int pending) 1065 { 1066 struct iommu_domain *domain; 1067 struct iommu_map_entries_tailq entries; 1068 1069 domain = arg; 1070 TAILQ_INIT(&entries); 1071 1072 for (;;) { 1073 IOMMU_DOMAIN_LOCK(domain); 1074 TAILQ_SWAP(&domain->unload_entries, &entries, 1075 iommu_map_entry, dmamap_link); 1076 IOMMU_DOMAIN_UNLOCK(domain); 1077 if (TAILQ_EMPTY(&entries)) 1078 break; 1079 iommu_domain_unload(domain, &entries, true); 1080 } 1081 } 1082 1083 void 1084 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, 1085 const struct iommu_domain_map_ops *ops) 1086 { 1087 1088 domain->ops = ops; 1089 domain->iommu = unit; 1090 1091 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); 1092 RB_INIT(&domain->rb_root); 1093 TAILQ_INIT(&domain->unload_entries); 1094 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); 1095 } 1096 1097 void 1098 iommu_domain_fini(struct iommu_domain *domain) 1099 { 1100 1101 mtx_destroy(&domain->lock); 1102 } 1103