1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/domainset.h> 35 #include <sys/malloc.h> 36 #include <sys/bus.h> 37 #include <sys/conf.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/ktr.h> 41 #include <sys/lock.h> 42 #include <sys/proc.h> 43 #include <sys/memdesc.h> 44 #include <sys/msan.h> 45 #include <sys/mutex.h> 46 #include <sys/sysctl.h> 47 #include <sys/rman.h> 48 #include <sys/taskqueue.h> 49 #include <sys/tree.h> 50 #include <sys/uio.h> 51 #include <sys/vmem.h> 52 #include <dev/pci/pcireg.h> 53 #include <dev/pci/pcivar.h> 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_kern.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_map.h> 60 #include <dev/iommu/iommu.h> 61 #include <machine/atomic.h> 62 #include <machine/bus.h> 63 #include <machine/md_var.h> 64 #include <machine/iommu.h> 65 #include <dev/iommu/busdma_iommu.h> 66 67 /* 68 * busdma_iommu.c, the implementation of the busdma(9) interface using 69 * IOMMU units from Intel VT-d. 70 */ 71 72 static bool 73 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 74 { 75 char str[128], *env; 76 int default_bounce; 77 bool ret; 78 static const char bounce_str[] = "bounce"; 79 static const char iommu_str[] = "iommu"; 80 static const char dmar_str[] = "dmar"; /* compatibility */ 81 82 default_bounce = 0; 83 env = kern_getenv("hw.busdma.default"); 84 if (env != NULL) { 85 if (strcmp(env, bounce_str) == 0) 86 default_bounce = 1; 87 else if (strcmp(env, iommu_str) == 0 || 88 strcmp(env, dmar_str) == 0) 89 default_bounce = 0; 90 freeenv(env); 91 } 92 93 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 94 domain, bus, slot, func); 95 env = kern_getenv(str); 96 if (env == NULL) 97 return (default_bounce != 0); 98 if (strcmp(env, bounce_str) == 0) 99 ret = true; 100 else if (strcmp(env, iommu_str) == 0 || 101 strcmp(env, dmar_str) == 0) 102 ret = false; 103 else 104 ret = default_bounce != 0; 105 freeenv(env); 106 return (ret); 107 } 108 109 /* 110 * Given original device, find the requester ID that will be seen by 111 * the IOMMU unit and used for page table lookup. PCI bridges may take 112 * ownership of transactions from downstream devices, so it may not be 113 * the same as the BSF of the target device. In those cases, all 114 * devices downstream of the bridge must share a single mapping 115 * domain, and must collectively be assigned to use either IOMMU or 116 * bounce mapping. 117 */ 118 device_t 119 iommu_get_requester(device_t dev, uint16_t *rid) 120 { 121 devclass_t pci_class; 122 device_t l, pci, pcib, pcip, pcibp, requester; 123 int cap_offset; 124 uint16_t pcie_flags; 125 bool bridge_is_pcie; 126 127 pci_class = devclass_find("pci"); 128 l = requester = dev; 129 130 *rid = pci_get_rid(dev); 131 132 /* 133 * Walk the bridge hierarchy from the target device to the 134 * host port to find the translating bridge nearest the IOMMU 135 * unit. 136 */ 137 for (;;) { 138 pci = device_get_parent(l); 139 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " 140 "for %s", device_get_name(dev), device_get_name(l))); 141 KASSERT(device_get_devclass(pci) == pci_class, 142 ("iommu_get_requester(%s): non-pci parent %s for %s", 143 device_get_name(dev), device_get_name(pci), 144 device_get_name(l))); 145 146 pcib = device_get_parent(pci); 147 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " 148 "for %s", device_get_name(dev), device_get_name(pci))); 149 150 /* 151 * The parent of our "bridge" isn't another PCI bus, 152 * so pcib isn't a PCI->PCI bridge but rather a host 153 * port, and the requester ID won't be translated 154 * further. 155 */ 156 pcip = device_get_parent(pcib); 157 if (device_get_devclass(pcip) != pci_class) 158 break; 159 pcibp = device_get_parent(pcip); 160 161 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 162 /* 163 * Do not stop the loop even if the target 164 * device is PCIe, because it is possible (but 165 * unlikely) to have a PCI->PCIe bridge 166 * somewhere in the hierarchy. 167 */ 168 l = pcib; 169 } else { 170 /* 171 * Device is not PCIe, it cannot be seen as a 172 * requester by IOMMU unit. Check whether the 173 * bridge is PCIe. 174 */ 175 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 176 &cap_offset) == 0; 177 requester = pcib; 178 179 /* 180 * Check for a buggy PCIe/PCI bridge that 181 * doesn't report the express capability. If 182 * the bridge above it is express but isn't a 183 * PCI bridge, then we know pcib is actually a 184 * PCIe/PCI bridge. 185 */ 186 if (!bridge_is_pcie && pci_find_cap(pcibp, 187 PCIY_EXPRESS, &cap_offset) == 0) { 188 pcie_flags = pci_read_config(pcibp, 189 cap_offset + PCIER_FLAGS, 2); 190 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 191 PCIEM_TYPE_PCI_BRIDGE) 192 bridge_is_pcie = true; 193 } 194 195 if (bridge_is_pcie) { 196 /* 197 * The current device is not PCIe, but 198 * the bridge above it is. This is a 199 * PCIe->PCI bridge. Assume that the 200 * requester ID will be the secondary 201 * bus number with slot and function 202 * set to zero. 203 * 204 * XXX: Doesn't handle the case where 205 * the bridge is PCIe->PCI-X, and the 206 * bridge will only take ownership of 207 * requests in some cases. We should 208 * provide context entries with the 209 * same page tables for taken and 210 * non-taken transactions. 211 */ 212 *rid = PCI_RID(pci_get_bus(l), 0, 0); 213 l = pcibp; 214 } else { 215 /* 216 * Neither the device nor the bridge 217 * above it are PCIe. This is a 218 * conventional PCI->PCI bridge, which 219 * will use the bridge's BSF as the 220 * requester ID. 221 */ 222 *rid = pci_get_rid(pcib); 223 l = pcib; 224 } 225 } 226 } 227 return (requester); 228 } 229 230 struct iommu_ctx * 231 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 232 { 233 device_t requester; 234 struct iommu_ctx *ctx; 235 bool disabled; 236 uint16_t rid; 237 238 requester = iommu_get_requester(dev, &rid); 239 240 /* 241 * If the user requested the IOMMU disabled for the device, we 242 * cannot disable the IOMMU unit, due to possibility of other 243 * devices on the same IOMMU unit still requiring translation. 244 * Instead provide the identity mapping for the device 245 * context. 246 */ 247 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 248 pci_get_bus(requester), pci_get_slot(requester), 249 pci_get_function(requester)); 250 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 251 if (ctx == NULL) 252 return (NULL); 253 if (disabled) { 254 /* 255 * Keep the first reference on context, release the 256 * later refs. 257 */ 258 IOMMU_LOCK(unit); 259 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 260 ctx->flags |= IOMMU_CTX_DISABLED; 261 IOMMU_UNLOCK(unit); 262 } else { 263 iommu_free_ctx_locked(unit, ctx); 264 } 265 ctx = NULL; 266 } 267 return (ctx); 268 } 269 270 struct iommu_ctx * 271 iommu_get_dev_ctx(device_t dev) 272 { 273 struct iommu_unit *unit; 274 275 unit = iommu_find(dev, bootverbose); 276 /* Not in scope of any IOMMU ? */ 277 if (unit == NULL) 278 return (NULL); 279 if (!unit->dma_enabled) 280 return (NULL); 281 282 #if defined(__amd64__) || defined(__i386__) 283 dmar_quirks_pre_use(unit); 284 dmar_instantiate_rmrr_ctxs(unit); 285 #endif 286 287 return (iommu_instantiate_ctx(unit, dev, false)); 288 } 289 290 bus_dma_tag_t 291 iommu_get_dma_tag(device_t dev, device_t child) 292 { 293 struct iommu_ctx *ctx; 294 bus_dma_tag_t res; 295 296 ctx = iommu_get_dev_ctx(child); 297 if (ctx == NULL) 298 return (NULL); 299 300 res = (bus_dma_tag_t)ctx->tag; 301 return (res); 302 } 303 304 bool 305 bus_dma_iommu_set_buswide(device_t dev) 306 { 307 struct iommu_unit *unit; 308 device_t parent; 309 u_int busno, slot, func; 310 311 parent = device_get_parent(dev); 312 if (device_get_devclass(parent) != devclass_find("pci")) 313 return (false); 314 unit = iommu_find(dev, bootverbose); 315 if (unit == NULL) 316 return (false); 317 busno = pci_get_bus(dev); 318 slot = pci_get_slot(dev); 319 func = pci_get_function(dev); 320 if (slot != 0 || func != 0) { 321 if (bootverbose) { 322 device_printf(dev, 323 "iommu%d pci%d:%d:%d requested buswide busdma\n", 324 unit->unit, busno, slot, func); 325 } 326 return (false); 327 } 328 iommu_set_buswide_ctx(unit, busno); 329 return (true); 330 } 331 332 void 333 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 334 { 335 336 MPASS(busno <= PCI_BUSMAX); 337 IOMMU_LOCK(unit); 338 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 339 1 << (busno % (NBBY * sizeof(uint32_t))); 340 IOMMU_UNLOCK(unit); 341 } 342 343 bool 344 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 345 { 346 347 MPASS(busno <= PCI_BUSMAX); 348 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 349 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 350 } 351 352 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 353 354 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 355 struct bus_dmamap_iommu *map); 356 357 static int 358 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 359 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 360 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 361 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 362 void *lockfuncarg, bus_dma_tag_t *dmat) 363 { 364 struct bus_dma_tag_iommu *newtag, *oldtag; 365 int error; 366 367 *dmat = NULL; 368 error = common_bus_dma_tag_create(parent != NULL ? 369 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 370 boundary, lowaddr, highaddr, filter, filterarg, maxsize, 371 nsegments, maxsegsz, flags, lockfunc, lockfuncarg, 372 sizeof(struct bus_dma_tag_iommu), (void **)&newtag); 373 if (error != 0) 374 goto out; 375 376 oldtag = (struct bus_dma_tag_iommu *)parent; 377 newtag->common.impl = &bus_dma_iommu_impl; 378 newtag->ctx = oldtag->ctx; 379 newtag->owner = oldtag->owner; 380 381 *dmat = (bus_dma_tag_t)newtag; 382 out: 383 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 384 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 385 error); 386 return (error); 387 } 388 389 static int 390 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 391 { 392 393 return (0); 394 } 395 396 static int 397 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 398 { 399 struct bus_dma_tag_iommu *dmat, *parent; 400 struct bus_dma_tag_iommu *dmat_copy __unused; 401 int error; 402 403 error = 0; 404 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1; 405 406 if (dmat != NULL) { 407 if (dmat->map_count != 0) { 408 error = EBUSY; 409 goto out; 410 } 411 while (dmat != NULL) { 412 parent = (struct bus_dma_tag_iommu *)dmat->common.parent; 413 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 414 1) { 415 if (dmat == dmat->ctx->tag) 416 iommu_free_ctx(dmat->ctx); 417 free(dmat->segments, M_IOMMU_DMAMAP); 418 free(dmat, M_DEVBUF); 419 dmat = parent; 420 } else 421 dmat = NULL; 422 } 423 } 424 out: 425 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); 426 return (error); 427 } 428 429 static bool 430 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 431 { 432 433 return (false); 434 } 435 436 static int 437 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 438 { 439 struct bus_dma_tag_iommu *tag; 440 struct bus_dmamap_iommu *map; 441 442 tag = (struct bus_dma_tag_iommu *)dmat; 443 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 444 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 445 if (map == NULL) { 446 *mapp = NULL; 447 return (ENOMEM); 448 } 449 if (tag->segments == NULL) { 450 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 451 tag->common.nsegments, M_IOMMU_DMAMAP, 452 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 453 if (tag->segments == NULL) { 454 free(map, M_IOMMU_DMAMAP); 455 *mapp = NULL; 456 return (ENOMEM); 457 } 458 } 459 IOMMU_DMAMAP_INIT(map); 460 TAILQ_INIT(&map->map_entries); 461 map->tag = tag; 462 map->locked = true; 463 map->cansleep = false; 464 tag->map_count++; 465 *mapp = (bus_dmamap_t)map; 466 467 return (0); 468 } 469 470 static int 471 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 472 { 473 struct bus_dma_tag_iommu *tag; 474 struct bus_dmamap_iommu *map; 475 476 tag = (struct bus_dma_tag_iommu *)dmat; 477 map = (struct bus_dmamap_iommu *)map1; 478 if (map != NULL) { 479 IOMMU_DMAMAP_LOCK(map); 480 if (!TAILQ_EMPTY(&map->map_entries)) { 481 IOMMU_DMAMAP_UNLOCK(map); 482 return (EBUSY); 483 } 484 IOMMU_DMAMAP_DESTROY(map); 485 free(map, M_IOMMU_DMAMAP); 486 } 487 tag->map_count--; 488 return (0); 489 } 490 491 492 static int 493 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 494 bus_dmamap_t *mapp) 495 { 496 struct bus_dma_tag_iommu *tag; 497 struct bus_dmamap_iommu *map; 498 int error, mflags; 499 vm_memattr_t attr; 500 501 error = iommu_bus_dmamap_create(dmat, flags, mapp); 502 if (error != 0) 503 return (error); 504 505 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 506 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 507 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 508 VM_MEMATTR_DEFAULT; 509 510 tag = (struct bus_dma_tag_iommu *)dmat; 511 map = (struct bus_dmamap_iommu *)*mapp; 512 513 if (tag->common.maxsize < PAGE_SIZE && 514 tag->common.alignment <= tag->common.maxsize && 515 attr == VM_MEMATTR_DEFAULT) { 516 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 517 DOMAINSET_PREF(tag->common.domain), mflags); 518 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 519 } else { 520 *vaddr = kmem_alloc_attr_domainset( 521 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 522 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 523 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 524 } 525 if (*vaddr == NULL) { 526 iommu_bus_dmamap_destroy(dmat, *mapp); 527 *mapp = NULL; 528 return (ENOMEM); 529 } 530 return (0); 531 } 532 533 static void 534 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 535 { 536 struct bus_dma_tag_iommu *tag; 537 struct bus_dmamap_iommu *map; 538 539 tag = (struct bus_dma_tag_iommu *)dmat; 540 map = (struct bus_dmamap_iommu *)map1; 541 542 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 543 free(vaddr, M_DEVBUF); 544 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 545 } else { 546 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 547 ("iommu_bus_dmamem_free for non alloced map %p", map)); 548 kmem_free(vaddr, tag->common.maxsize); 549 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 550 } 551 552 iommu_bus_dmamap_destroy(dmat, map1); 553 } 554 555 static int 556 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 557 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 558 int flags, bus_dma_segment_t *segs, int *segp, 559 struct iommu_map_entries_tailq *entries) 560 { 561 struct iommu_ctx *ctx; 562 struct iommu_domain *domain; 563 struct iommu_map_entry *entry; 564 bus_size_t buflen1; 565 int error, e_flags, idx, gas_flags, seg; 566 567 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 568 if (segs == NULL) 569 segs = tag->segments; 570 ctx = tag->ctx; 571 domain = ctx->domain; 572 e_flags = IOMMU_MAP_ENTRY_READ | 573 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0); 574 seg = *segp; 575 error = 0; 576 idx = 0; 577 while (buflen > 0) { 578 seg++; 579 if (seg >= tag->common.nsegments) { 580 error = EFBIG; 581 break; 582 } 583 buflen1 = buflen > tag->common.maxsegsz ? 584 tag->common.maxsegsz : buflen; 585 586 /* 587 * (Too) optimistically allow split if there are more 588 * then one segments left. 589 */ 590 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 591 if (seg + 1 < tag->common.nsegments) 592 gas_flags |= IOMMU_MF_CANSPLIT; 593 594 error = iommu_gas_map(domain, &tag->common, buflen1, 595 offset, e_flags, gas_flags, ma + idx, &entry); 596 if (error != 0) 597 break; 598 /* Update buflen1 in case buffer split. */ 599 if (buflen1 > entry->end - entry->start - offset) 600 buflen1 = entry->end - entry->start - offset; 601 602 KASSERT(vm_addr_align_ok(entry->start + offset, 603 tag->common.alignment), 604 ("alignment failed: ctx %p start 0x%jx offset %x " 605 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 606 (uintmax_t)tag->common.alignment)); 607 KASSERT(entry->end <= tag->common.lowaddr || 608 entry->start >= tag->common.highaddr, 609 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 610 "lowaddr 0x%jx highaddr 0x%jx", ctx, 611 (uintmax_t)entry->start, (uintmax_t)entry->end, 612 (uintmax_t)tag->common.lowaddr, 613 (uintmax_t)tag->common.highaddr)); 614 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, 615 tag->common.boundary), 616 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 617 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 618 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 619 KASSERT(buflen1 <= tag->common.maxsegsz, 620 ("segment too large: ctx %p start 0x%jx end 0x%jx " 621 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 622 (uintmax_t)entry->start, (uintmax_t)entry->end, 623 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 624 625 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 626 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); 627 TAILQ_INSERT_TAIL(entries, entry, dmamap_link); 628 629 segs[seg].ds_addr = entry->start + offset; 630 segs[seg].ds_len = buflen1; 631 632 idx += OFF_TO_IDX(offset + buflen1); 633 offset += buflen1; 634 offset &= IOMMU_PAGE_MASK; 635 buflen -= buflen1; 636 } 637 if (error == 0) 638 *segp = seg; 639 return (error); 640 } 641 642 static int 643 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 644 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 645 int flags, bus_dma_segment_t *segs, int *segp) 646 { 647 struct iommu_ctx *ctx; 648 struct iommu_domain *domain; 649 struct iommu_map_entries_tailq entries; 650 int error; 651 652 ctx = tag->ctx; 653 domain = ctx->domain; 654 atomic_add_long(&ctx->loads, 1); 655 656 TAILQ_INIT(&entries); 657 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 658 buflen, flags, segs, segp, &entries); 659 if (error == 0) { 660 IOMMU_DMAMAP_LOCK(map); 661 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link); 662 IOMMU_DMAMAP_UNLOCK(map); 663 } else if (!TAILQ_EMPTY(&entries)) { 664 /* 665 * The busdma interface does not allow us to report 666 * partial buffer load, so unfortunately we have to 667 * revert all work done. 668 */ 669 IOMMU_DOMAIN_LOCK(domain); 670 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 671 IOMMU_DOMAIN_UNLOCK(domain); 672 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 673 &domain->unload_task); 674 } 675 676 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 677 !map->cansleep) 678 error = EINPROGRESS; 679 if (error == EINPROGRESS) 680 iommu_bus_schedule_dmamap(domain->iommu, map); 681 return (error); 682 } 683 684 static int 685 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 686 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 687 bus_dma_segment_t *segs, int *segp) 688 { 689 struct bus_dma_tag_iommu *tag; 690 struct bus_dmamap_iommu *map; 691 692 tag = (struct bus_dma_tag_iommu *)dmat; 693 map = (struct bus_dmamap_iommu *)map1; 694 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 695 flags, segs, segp)); 696 } 697 698 static int 699 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 700 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 701 int *segp) 702 { 703 struct bus_dma_tag_iommu *tag; 704 struct bus_dmamap_iommu *map; 705 vm_page_t *ma, fma; 706 vm_paddr_t pstart, pend, paddr; 707 int error, i, ma_cnt, mflags, offset; 708 709 tag = (struct bus_dma_tag_iommu *)dmat; 710 map = (struct bus_dmamap_iommu *)map1; 711 pstart = trunc_page(buf); 712 pend = round_page(buf + buflen); 713 offset = buf & PAGE_MASK; 714 ma_cnt = OFF_TO_IDX(pend - pstart); 715 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 716 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 717 if (ma == NULL) 718 return (ENOMEM); 719 fma = NULL; 720 for (i = 0; i < ma_cnt; i++) { 721 paddr = pstart + ptoa(i); 722 ma[i] = PHYS_TO_VM_PAGE(paddr); 723 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 724 /* 725 * If PHYS_TO_VM_PAGE() returned NULL or the 726 * vm_page was not initialized we'll use a 727 * fake page. 728 */ 729 if (fma == NULL) { 730 fma = malloc(sizeof(struct vm_page) * ma_cnt, 731 M_DEVBUF, M_ZERO | mflags); 732 if (fma == NULL) { 733 free(ma, M_DEVBUF); 734 return (ENOMEM); 735 } 736 } 737 vm_page_initfake(&fma[i], pstart + ptoa(i), 738 VM_MEMATTR_DEFAULT); 739 ma[i] = &fma[i]; 740 } 741 } 742 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 743 flags, segs, segp); 744 free(fma, M_DEVBUF); 745 free(ma, M_DEVBUF); 746 return (error); 747 } 748 749 static int 750 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 751 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 752 int *segp) 753 { 754 struct bus_dma_tag_iommu *tag; 755 struct bus_dmamap_iommu *map; 756 vm_page_t *ma, fma; 757 vm_paddr_t pstart, pend, paddr; 758 int error, i, ma_cnt, mflags, offset; 759 760 tag = (struct bus_dma_tag_iommu *)dmat; 761 map = (struct bus_dmamap_iommu *)map1; 762 pstart = trunc_page((vm_offset_t)buf); 763 pend = round_page((vm_offset_t)buf + buflen); 764 offset = (vm_offset_t)buf & PAGE_MASK; 765 ma_cnt = OFF_TO_IDX(pend - pstart); 766 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 767 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 768 if (ma == NULL) 769 return (ENOMEM); 770 fma = NULL; 771 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 772 if (pmap == kernel_pmap) 773 paddr = pmap_kextract(pstart); 774 else 775 paddr = pmap_extract(pmap, pstart); 776 ma[i] = PHYS_TO_VM_PAGE(paddr); 777 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 778 /* 779 * If PHYS_TO_VM_PAGE() returned NULL or the 780 * vm_page was not initialized we'll use a 781 * fake page. 782 */ 783 if (fma == NULL) { 784 fma = malloc(sizeof(struct vm_page) * ma_cnt, 785 M_DEVBUF, M_ZERO | mflags); 786 if (fma == NULL) { 787 free(ma, M_DEVBUF); 788 return (ENOMEM); 789 } 790 } 791 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 792 ma[i] = &fma[i]; 793 } 794 } 795 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 796 flags, segs, segp); 797 free(ma, M_DEVBUF); 798 free(fma, M_DEVBUF); 799 return (error); 800 } 801 802 static void 803 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 804 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 805 { 806 struct bus_dmamap_iommu *map; 807 808 if (map1 == NULL) 809 return; 810 map = (struct bus_dmamap_iommu *)map1; 811 map->mem = *mem; 812 map->tag = (struct bus_dma_tag_iommu *)dmat; 813 map->callback = callback; 814 map->callback_arg = callback_arg; 815 } 816 817 static bus_dma_segment_t * 818 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 819 bus_dma_segment_t *segs, int nsegs, int error) 820 { 821 struct bus_dma_tag_iommu *tag; 822 struct bus_dmamap_iommu *map; 823 824 tag = (struct bus_dma_tag_iommu *)dmat; 825 map = (struct bus_dmamap_iommu *)map1; 826 827 if (!map->locked) { 828 KASSERT(map->cansleep, 829 ("map not locked and not sleepable context %p", map)); 830 831 /* 832 * We are called from the delayed context. Relock the 833 * driver. 834 */ 835 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 836 map->locked = true; 837 } 838 839 if (segs == NULL) 840 segs = tag->segments; 841 return (segs); 842 } 843 844 /* 845 * The limitations of busdma KPI forces the iommu to perform the actual 846 * unload, consisting of the unmapping of the map entries page tables, 847 * from the delayed context on i386, since page table page mapping 848 * might require a sleep to be successfull. The unfortunate 849 * consequence is that the DMA requests can be served some time after 850 * the bus_dmamap_unload() call returned. 851 * 852 * On amd64, we assume that sf allocation cannot fail. 853 */ 854 static void 855 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 856 { 857 struct bus_dma_tag_iommu *tag; 858 struct bus_dmamap_iommu *map; 859 struct iommu_ctx *ctx; 860 struct iommu_domain *domain; 861 struct iommu_map_entries_tailq entries; 862 863 tag = (struct bus_dma_tag_iommu *)dmat; 864 map = (struct bus_dmamap_iommu *)map1; 865 ctx = tag->ctx; 866 domain = ctx->domain; 867 atomic_add_long(&ctx->unloads, 1); 868 869 TAILQ_INIT(&entries); 870 IOMMU_DMAMAP_LOCK(map); 871 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 872 IOMMU_DMAMAP_UNLOCK(map); 873 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) 874 IOMMU_DOMAIN_LOCK(domain); 875 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 876 IOMMU_DOMAIN_UNLOCK(domain); 877 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 878 &domain->unload_task); 879 #else 880 THREAD_NO_SLEEPING(); 881 iommu_domain_unload(domain, &entries, false); 882 THREAD_SLEEPING_OK(); 883 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 884 #endif 885 } 886 887 static void 888 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, 889 bus_dmasync_op_t op) 890 { 891 struct bus_dmamap_iommu *map __unused; 892 893 map = (struct bus_dmamap_iommu *)map1; 894 kmsan_bus_dmamap_sync(&map->kmsan_mem, op); 895 } 896 897 #ifdef KMSAN 898 static void 899 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) 900 { 901 struct bus_dmamap_iommu *map; 902 903 map = (struct bus_dmamap_iommu *)map1; 904 if (map == NULL) 905 return; 906 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); 907 } 908 #endif 909 910 struct bus_dma_impl bus_dma_iommu_impl = { 911 .tag_create = iommu_bus_dma_tag_create, 912 .tag_destroy = iommu_bus_dma_tag_destroy, 913 .tag_set_domain = iommu_bus_dma_tag_set_domain, 914 .id_mapped = iommu_bus_dma_id_mapped, 915 .map_create = iommu_bus_dmamap_create, 916 .map_destroy = iommu_bus_dmamap_destroy, 917 .mem_alloc = iommu_bus_dmamem_alloc, 918 .mem_free = iommu_bus_dmamem_free, 919 .load_phys = iommu_bus_dmamap_load_phys, 920 .load_buffer = iommu_bus_dmamap_load_buffer, 921 .load_ma = iommu_bus_dmamap_load_ma, 922 .map_waitok = iommu_bus_dmamap_waitok, 923 .map_complete = iommu_bus_dmamap_complete, 924 .map_unload = iommu_bus_dmamap_unload, 925 .map_sync = iommu_bus_dmamap_sync, 926 #ifdef KMSAN 927 .load_kmsan = iommu_bus_dmamap_load_kmsan, 928 #endif 929 }; 930 931 static void 932 iommu_bus_task_dmamap(void *arg, int pending) 933 { 934 struct bus_dma_tag_iommu *tag; 935 struct bus_dmamap_iommu *map; 936 struct iommu_unit *unit; 937 938 unit = arg; 939 IOMMU_LOCK(unit); 940 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 941 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 942 IOMMU_UNLOCK(unit); 943 tag = map->tag; 944 map->cansleep = true; 945 map->locked = false; 946 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 947 &map->mem, map->callback, map->callback_arg, 948 BUS_DMA_WAITOK); 949 map->cansleep = false; 950 if (map->locked) { 951 (tag->common.lockfunc)(tag->common.lockfuncarg, 952 BUS_DMA_UNLOCK); 953 } else 954 map->locked = true; 955 map->cansleep = false; 956 IOMMU_LOCK(unit); 957 } 958 IOMMU_UNLOCK(unit); 959 } 960 961 static void 962 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 963 { 964 965 map->locked = false; 966 IOMMU_LOCK(unit); 967 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 968 IOMMU_UNLOCK(unit); 969 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 970 } 971 972 int 973 iommu_init_busdma(struct iommu_unit *unit) 974 { 975 int error; 976 977 unit->dma_enabled = 1; 978 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 979 if (error == 0) /* compatibility */ 980 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 981 TAILQ_INIT(&unit->delayed_maps); 982 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 983 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 984 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 985 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 986 "iommu%d busdma taskq", unit->unit); 987 return (0); 988 } 989 990 void 991 iommu_fini_busdma(struct iommu_unit *unit) 992 { 993 994 if (unit->delayed_taskqueue == NULL) 995 return; 996 997 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 998 taskqueue_free(unit->delayed_taskqueue); 999 unit->delayed_taskqueue = NULL; 1000 } 1001 1002 int 1003 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 1004 vm_paddr_t start, vm_size_t length, int flags) 1005 { 1006 struct bus_dma_tag_common *tc; 1007 struct bus_dma_tag_iommu *tag; 1008 struct bus_dmamap_iommu *map; 1009 struct iommu_ctx *ctx; 1010 struct iommu_domain *domain; 1011 struct iommu_map_entry *entry; 1012 vm_page_t *ma; 1013 vm_size_t i; 1014 int error; 1015 bool waitok; 1016 1017 MPASS((start & PAGE_MASK) == 0); 1018 MPASS((length & PAGE_MASK) == 0); 1019 MPASS(length > 0); 1020 MPASS(start + length >= start); 1021 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1022 1023 tc = (struct bus_dma_tag_common *)dmat; 1024 if (tc->impl != &bus_dma_iommu_impl) 1025 return (0); 1026 1027 tag = (struct bus_dma_tag_iommu *)dmat; 1028 ctx = tag->ctx; 1029 domain = ctx->domain; 1030 map = (struct bus_dmamap_iommu *)map1; 1031 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1032 1033 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1034 if (entry == NULL) 1035 return (ENOMEM); 1036 entry->start = start; 1037 entry->end = start + length; 1038 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1039 M_WAITOK : M_NOWAIT); 1040 if (ma == NULL) { 1041 iommu_gas_free_entry(entry); 1042 return (ENOMEM); 1043 } 1044 for (i = 0; i < atop(length); i++) { 1045 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1046 VM_MEMATTR_DEFAULT); 1047 } 1048 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1049 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) | 1050 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma); 1051 if (error == 0) { 1052 IOMMU_DMAMAP_LOCK(map); 1053 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1054 IOMMU_DMAMAP_UNLOCK(map); 1055 } else { 1056 iommu_gas_free_entry(entry); 1057 } 1058 for (i = 0; i < atop(length); i++) 1059 vm_page_putfake(ma[i]); 1060 free(ma, M_TEMP); 1061 return (error); 1062 } 1063 1064 static void 1065 iommu_domain_unload_task(void *arg, int pending) 1066 { 1067 struct iommu_domain *domain; 1068 struct iommu_map_entries_tailq entries; 1069 1070 domain = arg; 1071 TAILQ_INIT(&entries); 1072 1073 for (;;) { 1074 IOMMU_DOMAIN_LOCK(domain); 1075 TAILQ_SWAP(&domain->unload_entries, &entries, 1076 iommu_map_entry, dmamap_link); 1077 IOMMU_DOMAIN_UNLOCK(domain); 1078 if (TAILQ_EMPTY(&entries)) 1079 break; 1080 iommu_domain_unload(domain, &entries, true); 1081 } 1082 } 1083 1084 void 1085 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, 1086 const struct iommu_domain_map_ops *ops) 1087 { 1088 1089 domain->ops = ops; 1090 domain->iommu = unit; 1091 1092 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); 1093 RB_INIT(&domain->rb_root); 1094 TAILQ_INIT(&domain->unload_entries); 1095 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); 1096 } 1097 1098 void 1099 iommu_domain_fini(struct iommu_domain *domain) 1100 { 1101 1102 mtx_destroy(&domain->lock); 1103 } 1104