1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/domainset.h> 34 #include <sys/malloc.h> 35 #include <sys/bus.h> 36 #include <sys/conf.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/memdesc.h> 43 #include <sys/msan.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_map.h> 59 #include <dev/iommu/iommu.h> 60 #include <machine/atomic.h> 61 #include <machine/bus.h> 62 #include <machine/md_var.h> 63 #include <machine/iommu.h> 64 #include <dev/iommu/busdma_iommu.h> 65 66 /* 67 * busdma_iommu.c, the implementation of the busdma(9) interface using 68 * IOMMU units from Intel VT-d. 69 */ 70 71 static bool 72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 73 { 74 char str[128], *env; 75 int default_bounce; 76 bool ret; 77 static const char bounce_str[] = "bounce"; 78 static const char iommu_str[] = "iommu"; 79 static const char dmar_str[] = "dmar"; /* compatibility */ 80 81 default_bounce = 0; 82 env = kern_getenv("hw.busdma.default"); 83 if (env != NULL) { 84 if (strcmp(env, bounce_str) == 0) 85 default_bounce = 1; 86 else if (strcmp(env, iommu_str) == 0 || 87 strcmp(env, dmar_str) == 0) 88 default_bounce = 0; 89 freeenv(env); 90 } 91 92 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 93 domain, bus, slot, func); 94 env = kern_getenv(str); 95 if (env == NULL) 96 return (default_bounce != 0); 97 if (strcmp(env, bounce_str) == 0) 98 ret = true; 99 else if (strcmp(env, iommu_str) == 0 || 100 strcmp(env, dmar_str) == 0) 101 ret = false; 102 else 103 ret = default_bounce != 0; 104 freeenv(env); 105 return (ret); 106 } 107 108 /* 109 * Given original device, find the requester ID that will be seen by 110 * the IOMMU unit and used for page table lookup. PCI bridges may take 111 * ownership of transactions from downstream devices, so it may not be 112 * the same as the BSF of the target device. In those cases, all 113 * devices downstream of the bridge must share a single mapping 114 * domain, and must collectively be assigned to use either IOMMU or 115 * bounce mapping. 116 */ 117 int 118 iommu_get_requester(device_t dev, device_t *requesterp, uint16_t *rid) 119 { 120 devclass_t pci_class; 121 device_t l, pci, pcib, pcip, pcibp, requester; 122 int cap_offset; 123 uint16_t pcie_flags; 124 bool bridge_is_pcie; 125 126 pci_class = devclass_find("pci"); 127 l = requester = dev; 128 129 pci = device_get_parent(dev); 130 if (pci == NULL || device_get_devclass(pci) != pci_class) { 131 *rid = 0; /* XXXKIB: Could be ACPI HID */ 132 *requesterp = NULL; 133 return (ENOTTY); 134 } 135 136 *rid = pci_get_rid(dev); 137 138 /* 139 * Walk the bridge hierarchy from the target device to the 140 * host port to find the translating bridge nearest the IOMMU 141 * unit. 142 */ 143 for (;;) { 144 pci = device_get_parent(l); 145 if (pci == NULL) { 146 if (bootverbose) { 147 printf( 148 "iommu_get_requester(%s): NULL parent for %s\n", 149 device_get_name(dev), device_get_name(l)); 150 } 151 *rid = 0; 152 *requesterp = NULL; 153 return (ENXIO); 154 } 155 if (device_get_devclass(pci) != pci_class) { 156 if (bootverbose) { 157 printf( 158 "iommu_get_requester(%s): non-pci parent %s for %s\n", 159 device_get_name(dev), device_get_name(pci), 160 device_get_name(l)); 161 } 162 *rid = 0; 163 *requesterp = NULL; 164 return (ENXIO); 165 } 166 167 pcib = device_get_parent(pci); 168 if (pcib == NULL) { 169 if (bootverbose) { 170 printf( 171 "iommu_get_requester(%s): NULL bridge for %s\n", 172 device_get_name(dev), device_get_name(pci)); 173 } 174 *rid = 0; 175 *requesterp = NULL; 176 return (ENXIO); 177 } 178 179 /* 180 * The parent of our "bridge" isn't another PCI bus, 181 * so pcib isn't a PCI->PCI bridge but rather a host 182 * port, and the requester ID won't be translated 183 * further. 184 */ 185 pcip = device_get_parent(pcib); 186 if (device_get_devclass(pcip) != pci_class) 187 break; 188 pcibp = device_get_parent(pcip); 189 190 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 191 /* 192 * Do not stop the loop even if the target 193 * device is PCIe, because it is possible (but 194 * unlikely) to have a PCI->PCIe bridge 195 * somewhere in the hierarchy. 196 */ 197 l = pcib; 198 } else { 199 /* 200 * Device is not PCIe, it cannot be seen as a 201 * requester by IOMMU unit. Check whether the 202 * bridge is PCIe. 203 */ 204 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 205 &cap_offset) == 0; 206 requester = pcib; 207 208 /* 209 * Check for a buggy PCIe/PCI bridge that 210 * doesn't report the express capability. If 211 * the bridge above it is express but isn't a 212 * PCI bridge, then we know pcib is actually a 213 * PCIe/PCI bridge. 214 */ 215 if (!bridge_is_pcie && pci_find_cap(pcibp, 216 PCIY_EXPRESS, &cap_offset) == 0) { 217 pcie_flags = pci_read_config(pcibp, 218 cap_offset + PCIER_FLAGS, 2); 219 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 220 PCIEM_TYPE_PCI_BRIDGE) 221 bridge_is_pcie = true; 222 } 223 224 if (bridge_is_pcie) { 225 /* 226 * The current device is not PCIe, but 227 * the bridge above it is. This is a 228 * PCIe->PCI bridge. Assume that the 229 * requester ID will be the secondary 230 * bus number with slot and function 231 * set to zero. 232 * 233 * XXX: Doesn't handle the case where 234 * the bridge is PCIe->PCI-X, and the 235 * bridge will only take ownership of 236 * requests in some cases. We should 237 * provide context entries with the 238 * same page tables for taken and 239 * non-taken transactions. 240 */ 241 *rid = PCI_RID(pci_get_bus(l), 0, 0); 242 l = pcibp; 243 } else { 244 /* 245 * Neither the device nor the bridge 246 * above it are PCIe. This is a 247 * conventional PCI->PCI bridge, which 248 * will use the bridge's BSF as the 249 * requester ID. 250 */ 251 *rid = pci_get_rid(pcib); 252 l = pcib; 253 } 254 } 255 } 256 *requesterp = requester; 257 return (0); 258 } 259 260 struct iommu_ctx * 261 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 262 { 263 device_t requester; 264 struct iommu_ctx *ctx; 265 int error; 266 bool disabled; 267 uint16_t rid; 268 269 error = iommu_get_requester(dev, &requester, &rid); 270 if (error != 0) 271 return (NULL); 272 273 /* 274 * If the user requested the IOMMU disabled for the device, we 275 * cannot disable the IOMMU unit, due to possibility of other 276 * devices on the same IOMMU unit still requiring translation. 277 * Instead provide the identity mapping for the device 278 * context. 279 */ 280 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 281 pci_get_bus(requester), pci_get_slot(requester), 282 pci_get_function(requester)); 283 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 284 if (ctx == NULL) 285 return (NULL); 286 if (disabled) { 287 /* 288 * Keep the first reference on context, release the 289 * later refs. 290 */ 291 IOMMU_LOCK(unit); 292 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 293 ctx->flags |= IOMMU_CTX_DISABLED; 294 IOMMU_UNLOCK(unit); 295 } else { 296 iommu_free_ctx_locked(unit, ctx); 297 } 298 } 299 return (ctx); 300 } 301 302 struct iommu_ctx * 303 iommu_get_dev_ctx(device_t dev) 304 { 305 struct iommu_ctx *ctx; 306 struct iommu_unit *unit; 307 308 unit = iommu_find(dev, bootverbose); 309 /* Not in scope of any IOMMU ? */ 310 if (unit == NULL) 311 return (NULL); 312 if (!unit->dma_enabled) 313 return (NULL); 314 315 iommu_unit_pre_instantiate_ctx(unit); 316 ctx = iommu_instantiate_ctx(unit, dev, false); 317 if (ctx != NULL && (ctx->flags & IOMMU_CTX_DISABLED) != 0) 318 ctx = NULL; 319 return (ctx); 320 } 321 322 bus_dma_tag_t 323 iommu_get_dma_tag(device_t dev, device_t child) 324 { 325 struct iommu_ctx *ctx; 326 bus_dma_tag_t res; 327 328 ctx = iommu_get_dev_ctx(child); 329 if (ctx == NULL) 330 return (NULL); 331 332 res = (bus_dma_tag_t)ctx->tag; 333 return (res); 334 } 335 336 bool 337 bus_dma_iommu_set_buswide(device_t dev) 338 { 339 struct iommu_unit *unit; 340 device_t parent; 341 u_int busno, slot, func; 342 343 parent = device_get_parent(dev); 344 if (device_get_devclass(parent) != devclass_find("pci")) 345 return (false); 346 unit = iommu_find(dev, bootverbose); 347 if (unit == NULL) 348 return (false); 349 busno = pci_get_bus(dev); 350 slot = pci_get_slot(dev); 351 func = pci_get_function(dev); 352 if (slot != 0 || func != 0) { 353 if (bootverbose) { 354 device_printf(dev, 355 "iommu%d pci%d:%d:%d requested buswide busdma\n", 356 unit->unit, busno, slot, func); 357 } 358 return (false); 359 } 360 iommu_set_buswide_ctx(unit, busno); 361 return (true); 362 } 363 364 void 365 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 366 { 367 368 MPASS(busno <= PCI_BUSMAX); 369 IOMMU_LOCK(unit); 370 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 371 1 << (busno % (NBBY * sizeof(uint32_t))); 372 IOMMU_UNLOCK(unit); 373 } 374 375 bool 376 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 377 { 378 379 MPASS(busno <= PCI_BUSMAX); 380 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 381 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 382 } 383 384 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 385 386 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 387 struct bus_dmamap_iommu *map); 388 389 static int 390 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 391 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 392 bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, 393 bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) 394 { 395 struct bus_dma_tag_iommu *newtag, *oldtag; 396 int error; 397 398 *dmat = NULL; 399 error = common_bus_dma_tag_create(parent != NULL ? 400 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 401 boundary, lowaddr, highaddr, maxsize, nsegments, maxsegsz, flags, 402 lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu), 403 (void **)&newtag); 404 if (error != 0) 405 goto out; 406 407 oldtag = (struct bus_dma_tag_iommu *)parent; 408 newtag->common.impl = &bus_dma_iommu_impl; 409 newtag->ctx = oldtag->ctx; 410 newtag->owner = oldtag->owner; 411 412 *dmat = (bus_dma_tag_t)newtag; 413 out: 414 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 415 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 416 error); 417 return (error); 418 } 419 420 static int 421 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 422 { 423 424 return (0); 425 } 426 427 static int 428 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 429 { 430 struct bus_dma_tag_iommu *dmat; 431 struct iommu_unit *iommu; 432 struct iommu_ctx *ctx; 433 int error; 434 435 error = 0; 436 dmat = (struct bus_dma_tag_iommu *)dmat1; 437 438 if (dmat != NULL) { 439 if (dmat->map_count != 0) { 440 error = EBUSY; 441 goto out; 442 } 443 ctx = dmat->ctx; 444 if (dmat == ctx->tag) { 445 iommu = ctx->domain->iommu; 446 IOMMU_LOCK(iommu); 447 iommu_free_ctx_locked(iommu, dmat->ctx); 448 } 449 free(dmat->segments, M_IOMMU_DMAMAP); 450 free(dmat, M_DEVBUF); 451 } 452 out: 453 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error); 454 return (error); 455 } 456 457 static bool 458 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 459 { 460 461 return (false); 462 } 463 464 static int 465 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 466 { 467 struct bus_dma_tag_iommu *tag; 468 struct bus_dmamap_iommu *map; 469 470 tag = (struct bus_dma_tag_iommu *)dmat; 471 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 472 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 473 if (map == NULL) { 474 *mapp = NULL; 475 return (ENOMEM); 476 } 477 if (tag->segments == NULL) { 478 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 479 tag->common.nsegments, M_IOMMU_DMAMAP, 480 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 481 if (tag->segments == NULL) { 482 free(map, M_IOMMU_DMAMAP); 483 *mapp = NULL; 484 return (ENOMEM); 485 } 486 } 487 IOMMU_DMAMAP_INIT(map); 488 TAILQ_INIT(&map->map_entries); 489 map->tag = tag; 490 map->locked = true; 491 map->cansleep = false; 492 tag->map_count++; 493 *mapp = (bus_dmamap_t)map; 494 495 return (0); 496 } 497 498 static int 499 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 500 { 501 struct bus_dma_tag_iommu *tag; 502 struct bus_dmamap_iommu *map; 503 504 tag = (struct bus_dma_tag_iommu *)dmat; 505 map = (struct bus_dmamap_iommu *)map1; 506 if (map != NULL) { 507 IOMMU_DMAMAP_LOCK(map); 508 if (!TAILQ_EMPTY(&map->map_entries)) { 509 IOMMU_DMAMAP_UNLOCK(map); 510 return (EBUSY); 511 } 512 IOMMU_DMAMAP_DESTROY(map); 513 free(map, M_IOMMU_DMAMAP); 514 } 515 tag->map_count--; 516 return (0); 517 } 518 519 520 static int 521 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 522 bus_dmamap_t *mapp) 523 { 524 struct bus_dma_tag_iommu *tag; 525 struct bus_dmamap_iommu *map; 526 int error, mflags; 527 vm_memattr_t attr; 528 529 error = iommu_bus_dmamap_create(dmat, flags, mapp); 530 if (error != 0) 531 return (error); 532 533 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 534 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 535 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 536 VM_MEMATTR_DEFAULT; 537 538 tag = (struct bus_dma_tag_iommu *)dmat; 539 map = (struct bus_dmamap_iommu *)*mapp; 540 541 if (tag->common.maxsize < PAGE_SIZE && 542 tag->common.alignment <= tag->common.maxsize && 543 attr == VM_MEMATTR_DEFAULT) { 544 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 545 DOMAINSET_PREF(tag->common.domain), mflags); 546 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 547 } else { 548 *vaddr = kmem_alloc_attr_domainset( 549 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 550 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 551 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 552 } 553 if (*vaddr == NULL) { 554 iommu_bus_dmamap_destroy(dmat, *mapp); 555 *mapp = NULL; 556 return (ENOMEM); 557 } 558 return (0); 559 } 560 561 static void 562 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 563 { 564 struct bus_dma_tag_iommu *tag; 565 struct bus_dmamap_iommu *map; 566 567 tag = (struct bus_dma_tag_iommu *)dmat; 568 map = (struct bus_dmamap_iommu *)map1; 569 570 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 571 free(vaddr, M_DEVBUF); 572 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 573 } else { 574 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 575 ("iommu_bus_dmamem_free for non alloced map %p", map)); 576 kmem_free(vaddr, tag->common.maxsize); 577 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 578 } 579 580 iommu_bus_dmamap_destroy(dmat, map1); 581 } 582 583 static int 584 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 585 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 586 int flags, bus_dma_segment_t *segs, int *segp, 587 struct iommu_map_entries_tailq *entries) 588 { 589 struct iommu_ctx *ctx; 590 struct iommu_domain *domain; 591 struct iommu_map_entry *entry; 592 bus_size_t buflen1; 593 int error, e_flags, idx, gas_flags, seg; 594 595 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 596 if (segs == NULL) 597 segs = tag->segments; 598 ctx = tag->ctx; 599 domain = ctx->domain; 600 e_flags = IOMMU_MAP_ENTRY_READ | 601 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0); 602 seg = *segp; 603 error = 0; 604 idx = 0; 605 while (buflen > 0) { 606 seg++; 607 if (seg >= tag->common.nsegments) { 608 error = EFBIG; 609 break; 610 } 611 buflen1 = buflen > tag->common.maxsegsz ? 612 tag->common.maxsegsz : buflen; 613 614 /* 615 * (Too) optimistically allow split if there are more 616 * then one segments left. 617 */ 618 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 619 if (seg + 1 < tag->common.nsegments) 620 gas_flags |= IOMMU_MF_CANSPLIT; 621 622 error = iommu_gas_map(domain, &tag->common, buflen1, 623 offset, e_flags, gas_flags, ma + idx, &entry); 624 if (error != 0) 625 break; 626 /* Update buflen1 in case buffer split. */ 627 if (buflen1 > entry->end - entry->start - offset) 628 buflen1 = entry->end - entry->start - offset; 629 630 KASSERT(vm_addr_align_ok(entry->start + offset, 631 tag->common.alignment), 632 ("alignment failed: ctx %p start 0x%jx offset %x " 633 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 634 (uintmax_t)tag->common.alignment)); 635 KASSERT(entry->end <= tag->common.lowaddr || 636 entry->start >= tag->common.highaddr, 637 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 638 "lowaddr 0x%jx highaddr 0x%jx", ctx, 639 (uintmax_t)entry->start, (uintmax_t)entry->end, 640 (uintmax_t)tag->common.lowaddr, 641 (uintmax_t)tag->common.highaddr)); 642 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, 643 tag->common.boundary), 644 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 645 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 646 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 647 KASSERT(buflen1 <= tag->common.maxsegsz, 648 ("segment too large: ctx %p start 0x%jx end 0x%jx " 649 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 650 (uintmax_t)entry->start, (uintmax_t)entry->end, 651 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 652 653 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 654 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); 655 TAILQ_INSERT_TAIL(entries, entry, dmamap_link); 656 657 segs[seg].ds_addr = entry->start + offset; 658 segs[seg].ds_len = buflen1; 659 660 idx += OFF_TO_IDX(offset + buflen1); 661 offset += buflen1; 662 offset &= IOMMU_PAGE_MASK; 663 buflen -= buflen1; 664 } 665 if (error == 0) 666 *segp = seg; 667 return (error); 668 } 669 670 static int 671 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 672 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 673 int flags, bus_dma_segment_t *segs, int *segp) 674 { 675 struct iommu_ctx *ctx; 676 struct iommu_domain *domain; 677 struct iommu_map_entries_tailq entries; 678 int error; 679 680 ctx = tag->ctx; 681 domain = ctx->domain; 682 atomic_add_long(&ctx->loads, 1); 683 684 TAILQ_INIT(&entries); 685 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 686 buflen, flags, segs, segp, &entries); 687 if (error == 0) { 688 IOMMU_DMAMAP_LOCK(map); 689 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link); 690 IOMMU_DMAMAP_UNLOCK(map); 691 } else if (!TAILQ_EMPTY(&entries)) { 692 /* 693 * The busdma interface does not allow us to report 694 * partial buffer load, so unfortunately we have to 695 * revert all work done. 696 */ 697 IOMMU_DOMAIN_LOCK(domain); 698 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 699 IOMMU_DOMAIN_UNLOCK(domain); 700 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 701 &domain->unload_task); 702 } 703 704 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 705 !map->cansleep) 706 error = EINPROGRESS; 707 if (error == EINPROGRESS) 708 iommu_bus_schedule_dmamap(domain->iommu, map); 709 return (error); 710 } 711 712 static int 713 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 714 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 715 bus_dma_segment_t *segs, int *segp) 716 { 717 struct bus_dma_tag_iommu *tag; 718 struct bus_dmamap_iommu *map; 719 720 tag = (struct bus_dma_tag_iommu *)dmat; 721 map = (struct bus_dmamap_iommu *)map1; 722 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 723 flags, segs, segp)); 724 } 725 726 static int 727 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 728 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 729 int *segp) 730 { 731 struct bus_dma_tag_iommu *tag; 732 struct bus_dmamap_iommu *map; 733 vm_page_t *ma, fma; 734 vm_paddr_t pstart, pend, paddr; 735 int error, i, ma_cnt, mflags, offset; 736 737 tag = (struct bus_dma_tag_iommu *)dmat; 738 map = (struct bus_dmamap_iommu *)map1; 739 pstart = trunc_page(buf); 740 pend = round_page(buf + buflen); 741 offset = buf & PAGE_MASK; 742 ma_cnt = OFF_TO_IDX(pend - pstart); 743 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 744 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 745 if (ma == NULL) 746 return (ENOMEM); 747 fma = NULL; 748 for (i = 0; i < ma_cnt; i++) { 749 paddr = pstart + ptoa(i); 750 ma[i] = PHYS_TO_VM_PAGE(paddr); 751 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 752 /* 753 * If PHYS_TO_VM_PAGE() returned NULL or the 754 * vm_page was not initialized we'll use a 755 * fake page. 756 */ 757 if (fma == NULL) { 758 fma = malloc(sizeof(struct vm_page) * ma_cnt, 759 M_DEVBUF, M_ZERO | mflags); 760 if (fma == NULL) { 761 free(ma, M_DEVBUF); 762 return (ENOMEM); 763 } 764 } 765 vm_page_initfake(&fma[i], pstart + ptoa(i), 766 VM_MEMATTR_DEFAULT); 767 ma[i] = &fma[i]; 768 } 769 } 770 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 771 flags, segs, segp); 772 free(fma, M_DEVBUF); 773 free(ma, M_DEVBUF); 774 return (error); 775 } 776 777 static int 778 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 779 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 780 int *segp) 781 { 782 struct bus_dma_tag_iommu *tag; 783 struct bus_dmamap_iommu *map; 784 vm_page_t *ma, fma; 785 vm_paddr_t pstart, pend, paddr; 786 int error, i, ma_cnt, mflags, offset; 787 788 tag = (struct bus_dma_tag_iommu *)dmat; 789 map = (struct bus_dmamap_iommu *)map1; 790 pstart = trunc_page((vm_offset_t)buf); 791 pend = round_page((vm_offset_t)buf + buflen); 792 offset = (vm_offset_t)buf & PAGE_MASK; 793 ma_cnt = OFF_TO_IDX(pend - pstart); 794 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 795 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 796 if (ma == NULL) 797 return (ENOMEM); 798 fma = NULL; 799 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 800 if (pmap == kernel_pmap) 801 paddr = pmap_kextract(pstart); 802 else 803 paddr = pmap_extract(pmap, pstart); 804 ma[i] = PHYS_TO_VM_PAGE(paddr); 805 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 806 /* 807 * If PHYS_TO_VM_PAGE() returned NULL or the 808 * vm_page was not initialized we'll use a 809 * fake page. 810 */ 811 if (fma == NULL) { 812 fma = malloc(sizeof(struct vm_page) * ma_cnt, 813 M_DEVBUF, M_ZERO | mflags); 814 if (fma == NULL) { 815 free(ma, M_DEVBUF); 816 return (ENOMEM); 817 } 818 } 819 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 820 ma[i] = &fma[i]; 821 } 822 } 823 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 824 flags, segs, segp); 825 free(ma, M_DEVBUF); 826 free(fma, M_DEVBUF); 827 return (error); 828 } 829 830 static void 831 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 832 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 833 { 834 struct bus_dmamap_iommu *map; 835 836 if (map1 == NULL) 837 return; 838 map = (struct bus_dmamap_iommu *)map1; 839 map->mem = *mem; 840 map->tag = (struct bus_dma_tag_iommu *)dmat; 841 map->callback = callback; 842 map->callback_arg = callback_arg; 843 } 844 845 static bus_dma_segment_t * 846 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 847 bus_dma_segment_t *segs, int nsegs, int error) 848 { 849 struct bus_dma_tag_iommu *tag; 850 struct bus_dmamap_iommu *map; 851 852 tag = (struct bus_dma_tag_iommu *)dmat; 853 map = (struct bus_dmamap_iommu *)map1; 854 855 if (!map->locked) { 856 KASSERT(map->cansleep, 857 ("map not locked and not sleepable context %p", map)); 858 859 /* 860 * We are called from the delayed context. Relock the 861 * driver. 862 */ 863 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 864 map->locked = true; 865 } 866 867 if (segs == NULL) 868 segs = tag->segments; 869 return (segs); 870 } 871 872 /* 873 * The limitations of busdma KPI forces the iommu to perform the actual 874 * unload, consisting of the unmapping of the map entries page tables, 875 * from the delayed context on i386, since page table page mapping 876 * might require a sleep to be successfull. The unfortunate 877 * consequence is that the DMA requests can be served some time after 878 * the bus_dmamap_unload() call returned. 879 * 880 * On amd64, we assume that sf allocation cannot fail. 881 */ 882 static void 883 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 884 { 885 struct bus_dma_tag_iommu *tag; 886 struct bus_dmamap_iommu *map; 887 struct iommu_ctx *ctx; 888 struct iommu_domain *domain; 889 struct iommu_map_entries_tailq entries; 890 891 tag = (struct bus_dma_tag_iommu *)dmat; 892 map = (struct bus_dmamap_iommu *)map1; 893 ctx = tag->ctx; 894 domain = ctx->domain; 895 atomic_add_long(&ctx->unloads, 1); 896 897 TAILQ_INIT(&entries); 898 IOMMU_DMAMAP_LOCK(map); 899 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 900 IOMMU_DMAMAP_UNLOCK(map); 901 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) 902 IOMMU_DOMAIN_LOCK(domain); 903 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 904 IOMMU_DOMAIN_UNLOCK(domain); 905 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 906 &domain->unload_task); 907 #else 908 THREAD_NO_SLEEPING(); 909 iommu_domain_unload(domain, &entries, false); 910 THREAD_SLEEPING_OK(); 911 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 912 #endif 913 } 914 915 static void 916 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, 917 bus_dmasync_op_t op) 918 { 919 struct bus_dmamap_iommu *map __unused; 920 921 map = (struct bus_dmamap_iommu *)map1; 922 kmsan_bus_dmamap_sync(&map->kmsan_mem, op); 923 } 924 925 #ifdef KMSAN 926 static void 927 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) 928 { 929 struct bus_dmamap_iommu *map; 930 931 map = (struct bus_dmamap_iommu *)map1; 932 if (map == NULL) 933 return; 934 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); 935 } 936 #endif 937 938 struct bus_dma_impl bus_dma_iommu_impl = { 939 .tag_create = iommu_bus_dma_tag_create, 940 .tag_destroy = iommu_bus_dma_tag_destroy, 941 .tag_set_domain = iommu_bus_dma_tag_set_domain, 942 .id_mapped = iommu_bus_dma_id_mapped, 943 .map_create = iommu_bus_dmamap_create, 944 .map_destroy = iommu_bus_dmamap_destroy, 945 .mem_alloc = iommu_bus_dmamem_alloc, 946 .mem_free = iommu_bus_dmamem_free, 947 .load_phys = iommu_bus_dmamap_load_phys, 948 .load_buffer = iommu_bus_dmamap_load_buffer, 949 .load_ma = iommu_bus_dmamap_load_ma, 950 .map_waitok = iommu_bus_dmamap_waitok, 951 .map_complete = iommu_bus_dmamap_complete, 952 .map_unload = iommu_bus_dmamap_unload, 953 .map_sync = iommu_bus_dmamap_sync, 954 #ifdef KMSAN 955 .load_kmsan = iommu_bus_dmamap_load_kmsan, 956 #endif 957 }; 958 959 static void 960 iommu_bus_task_dmamap(void *arg, int pending) 961 { 962 struct bus_dma_tag_iommu *tag; 963 struct bus_dmamap_iommu *map; 964 struct iommu_unit *unit; 965 966 unit = arg; 967 IOMMU_LOCK(unit); 968 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 969 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 970 IOMMU_UNLOCK(unit); 971 tag = map->tag; 972 map->cansleep = true; 973 map->locked = false; 974 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 975 &map->mem, map->callback, map->callback_arg, 976 BUS_DMA_WAITOK); 977 map->cansleep = false; 978 if (map->locked) { 979 (tag->common.lockfunc)(tag->common.lockfuncarg, 980 BUS_DMA_UNLOCK); 981 } else 982 map->locked = true; 983 map->cansleep = false; 984 IOMMU_LOCK(unit); 985 } 986 IOMMU_UNLOCK(unit); 987 } 988 989 static void 990 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 991 { 992 993 map->locked = false; 994 IOMMU_LOCK(unit); 995 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 996 IOMMU_UNLOCK(unit); 997 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 998 } 999 1000 int 1001 iommu_init_busdma(struct iommu_unit *unit) 1002 { 1003 int error; 1004 1005 unit->dma_enabled = 0; 1006 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 1007 if (error == 0) /* compatibility */ 1008 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 1009 SYSCTL_ADD_INT(&unit->sysctl_ctx, 1010 SYSCTL_CHILDREN(device_get_sysctl_tree(unit->dev)), 1011 OID_AUTO, "dma", CTLFLAG_RD, &unit->dma_enabled, 0, 1012 "DMA ops enabled"); 1013 TAILQ_INIT(&unit->delayed_maps); 1014 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 1015 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 1016 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 1017 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 1018 "iommu%d busdma taskq", unit->unit); 1019 return (0); 1020 } 1021 1022 void 1023 iommu_fini_busdma(struct iommu_unit *unit) 1024 { 1025 1026 if (unit->delayed_taskqueue == NULL) 1027 return; 1028 1029 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 1030 taskqueue_free(unit->delayed_taskqueue); 1031 unit->delayed_taskqueue = NULL; 1032 } 1033 1034 int 1035 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 1036 vm_paddr_t start, vm_size_t length, int flags) 1037 { 1038 struct bus_dma_tag_common *tc; 1039 struct bus_dma_tag_iommu *tag; 1040 struct bus_dmamap_iommu *map; 1041 struct iommu_ctx *ctx; 1042 struct iommu_domain *domain; 1043 struct iommu_map_entry *entry; 1044 vm_page_t *ma; 1045 vm_size_t i; 1046 int error; 1047 bool waitok; 1048 1049 MPASS((start & PAGE_MASK) == 0); 1050 MPASS((length & PAGE_MASK) == 0); 1051 MPASS(length > 0); 1052 MPASS(start + length >= start); 1053 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1054 1055 tc = (struct bus_dma_tag_common *)dmat; 1056 if (tc->impl != &bus_dma_iommu_impl) 1057 return (0); 1058 1059 tag = (struct bus_dma_tag_iommu *)dmat; 1060 ctx = tag->ctx; 1061 domain = ctx->domain; 1062 map = (struct bus_dmamap_iommu *)map1; 1063 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1064 1065 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1066 if (entry == NULL) 1067 return (ENOMEM); 1068 entry->start = start; 1069 entry->end = start + length; 1070 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1071 M_WAITOK : M_NOWAIT); 1072 if (ma == NULL) { 1073 iommu_gas_free_entry(entry); 1074 return (ENOMEM); 1075 } 1076 for (i = 0; i < atop(length); i++) { 1077 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1078 VM_MEMATTR_DEFAULT); 1079 } 1080 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1081 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) | 1082 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma); 1083 if (error == 0) { 1084 IOMMU_DMAMAP_LOCK(map); 1085 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1086 IOMMU_DMAMAP_UNLOCK(map); 1087 } else { 1088 iommu_gas_free_entry(entry); 1089 } 1090 for (i = 0; i < atop(length); i++) 1091 vm_page_putfake(ma[i]); 1092 free(ma, M_TEMP); 1093 return (error); 1094 } 1095 1096 static void 1097 iommu_domain_unload_task(void *arg, int pending) 1098 { 1099 struct iommu_domain *domain; 1100 struct iommu_map_entries_tailq entries; 1101 1102 domain = arg; 1103 TAILQ_INIT(&entries); 1104 1105 for (;;) { 1106 IOMMU_DOMAIN_LOCK(domain); 1107 TAILQ_SWAP(&domain->unload_entries, &entries, 1108 iommu_map_entry, dmamap_link); 1109 IOMMU_DOMAIN_UNLOCK(domain); 1110 if (TAILQ_EMPTY(&entries)) 1111 break; 1112 iommu_domain_unload(domain, &entries, true); 1113 } 1114 } 1115 1116 void 1117 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, 1118 const struct iommu_domain_map_ops *ops) 1119 { 1120 1121 domain->ops = ops; 1122 domain->iommu = unit; 1123 1124 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); 1125 RB_INIT(&domain->rb_root); 1126 TAILQ_INIT(&domain->unload_entries); 1127 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); 1128 } 1129 1130 void 1131 iommu_domain_fini(struct iommu_domain *domain) 1132 { 1133 1134 mtx_destroy(&domain->lock); 1135 } 1136