1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/domainset.h> 34 #include <sys/malloc.h> 35 #include <sys/bus.h> 36 #include <sys/conf.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/memdesc.h> 43 #include <sys/msan.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_map.h> 59 #include <dev/iommu/iommu.h> 60 #include <machine/atomic.h> 61 #include <machine/bus.h> 62 #include <machine/md_var.h> 63 #include <machine/iommu.h> 64 #include <dev/iommu/busdma_iommu.h> 65 66 /* 67 * busdma_iommu.c, the implementation of the busdma(9) interface using 68 * IOMMU units from Intel VT-d. 69 */ 70 71 static bool 72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 73 { 74 char str[128], *env; 75 int default_bounce; 76 bool ret; 77 static const char bounce_str[] = "bounce"; 78 static const char iommu_str[] = "iommu"; 79 static const char dmar_str[] = "dmar"; /* compatibility */ 80 81 default_bounce = 0; 82 env = kern_getenv("hw.busdma.default"); 83 if (env != NULL) { 84 if (strcmp(env, bounce_str) == 0) 85 default_bounce = 1; 86 else if (strcmp(env, iommu_str) == 0 || 87 strcmp(env, dmar_str) == 0) 88 default_bounce = 0; 89 freeenv(env); 90 } 91 92 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 93 domain, bus, slot, func); 94 env = kern_getenv(str); 95 if (env == NULL) 96 return (default_bounce != 0); 97 if (strcmp(env, bounce_str) == 0) 98 ret = true; 99 else if (strcmp(env, iommu_str) == 0 || 100 strcmp(env, dmar_str) == 0) 101 ret = false; 102 else 103 ret = default_bounce != 0; 104 freeenv(env); 105 return (ret); 106 } 107 108 /* 109 * Given original device, find the requester ID that will be seen by 110 * the IOMMU unit and used for page table lookup. PCI bridges may take 111 * ownership of transactions from downstream devices, so it may not be 112 * the same as the BSF of the target device. In those cases, all 113 * devices downstream of the bridge must share a single mapping 114 * domain, and must collectively be assigned to use either IOMMU or 115 * bounce mapping. 116 */ 117 device_t 118 iommu_get_requester(device_t dev, uint16_t *rid) 119 { 120 devclass_t pci_class; 121 device_t l, pci, pcib, pcip, pcibp, requester; 122 int cap_offset; 123 uint16_t pcie_flags; 124 bool bridge_is_pcie; 125 126 pci_class = devclass_find("pci"); 127 l = requester = dev; 128 129 pci = device_get_parent(dev); 130 if (pci == NULL || device_get_devclass(pci) != pci_class) { 131 *rid = 0; /* XXXKIB: Could be ACPI HID */ 132 return (requester); 133 } 134 135 *rid = pci_get_rid(dev); 136 137 /* 138 * Walk the bridge hierarchy from the target device to the 139 * host port to find the translating bridge nearest the IOMMU 140 * unit. 141 */ 142 for (;;) { 143 pci = device_get_parent(l); 144 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " 145 "for %s", device_get_name(dev), device_get_name(l))); 146 KASSERT(device_get_devclass(pci) == pci_class, 147 ("iommu_get_requester(%s): non-pci parent %s for %s", 148 device_get_name(dev), device_get_name(pci), 149 device_get_name(l))); 150 151 pcib = device_get_parent(pci); 152 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " 153 "for %s", device_get_name(dev), device_get_name(pci))); 154 155 /* 156 * The parent of our "bridge" isn't another PCI bus, 157 * so pcib isn't a PCI->PCI bridge but rather a host 158 * port, and the requester ID won't be translated 159 * further. 160 */ 161 pcip = device_get_parent(pcib); 162 if (device_get_devclass(pcip) != pci_class) 163 break; 164 pcibp = device_get_parent(pcip); 165 166 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 167 /* 168 * Do not stop the loop even if the target 169 * device is PCIe, because it is possible (but 170 * unlikely) to have a PCI->PCIe bridge 171 * somewhere in the hierarchy. 172 */ 173 l = pcib; 174 } else { 175 /* 176 * Device is not PCIe, it cannot be seen as a 177 * requester by IOMMU unit. Check whether the 178 * bridge is PCIe. 179 */ 180 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 181 &cap_offset) == 0; 182 requester = pcib; 183 184 /* 185 * Check for a buggy PCIe/PCI bridge that 186 * doesn't report the express capability. If 187 * the bridge above it is express but isn't a 188 * PCI bridge, then we know pcib is actually a 189 * PCIe/PCI bridge. 190 */ 191 if (!bridge_is_pcie && pci_find_cap(pcibp, 192 PCIY_EXPRESS, &cap_offset) == 0) { 193 pcie_flags = pci_read_config(pcibp, 194 cap_offset + PCIER_FLAGS, 2); 195 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 196 PCIEM_TYPE_PCI_BRIDGE) 197 bridge_is_pcie = true; 198 } 199 200 if (bridge_is_pcie) { 201 /* 202 * The current device is not PCIe, but 203 * the bridge above it is. This is a 204 * PCIe->PCI bridge. Assume that the 205 * requester ID will be the secondary 206 * bus number with slot and function 207 * set to zero. 208 * 209 * XXX: Doesn't handle the case where 210 * the bridge is PCIe->PCI-X, and the 211 * bridge will only take ownership of 212 * requests in some cases. We should 213 * provide context entries with the 214 * same page tables for taken and 215 * non-taken transactions. 216 */ 217 *rid = PCI_RID(pci_get_bus(l), 0, 0); 218 l = pcibp; 219 } else { 220 /* 221 * Neither the device nor the bridge 222 * above it are PCIe. This is a 223 * conventional PCI->PCI bridge, which 224 * will use the bridge's BSF as the 225 * requester ID. 226 */ 227 *rid = pci_get_rid(pcib); 228 l = pcib; 229 } 230 } 231 } 232 return (requester); 233 } 234 235 struct iommu_ctx * 236 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 237 { 238 device_t requester; 239 struct iommu_ctx *ctx; 240 bool disabled; 241 uint16_t rid; 242 243 requester = iommu_get_requester(dev, &rid); 244 245 /* 246 * If the user requested the IOMMU disabled for the device, we 247 * cannot disable the IOMMU unit, due to possibility of other 248 * devices on the same IOMMU unit still requiring translation. 249 * Instead provide the identity mapping for the device 250 * context. 251 */ 252 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 253 pci_get_bus(requester), pci_get_slot(requester), 254 pci_get_function(requester)); 255 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 256 if (ctx == NULL) 257 return (NULL); 258 if (disabled) { 259 /* 260 * Keep the first reference on context, release the 261 * later refs. 262 */ 263 IOMMU_LOCK(unit); 264 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 265 ctx->flags |= IOMMU_CTX_DISABLED; 266 IOMMU_UNLOCK(unit); 267 } else { 268 iommu_free_ctx_locked(unit, ctx); 269 } 270 ctx = NULL; 271 } 272 return (ctx); 273 } 274 275 struct iommu_ctx * 276 iommu_get_dev_ctx(device_t dev) 277 { 278 struct iommu_unit *unit; 279 280 unit = iommu_find(dev, bootverbose); 281 /* Not in scope of any IOMMU ? */ 282 if (unit == NULL) 283 return (NULL); 284 if (!unit->dma_enabled) 285 return (NULL); 286 287 iommu_unit_pre_instantiate_ctx(unit); 288 return (iommu_instantiate_ctx(unit, dev, false)); 289 } 290 291 bus_dma_tag_t 292 iommu_get_dma_tag(device_t dev, device_t child) 293 { 294 struct iommu_ctx *ctx; 295 bus_dma_tag_t res; 296 297 ctx = iommu_get_dev_ctx(child); 298 if (ctx == NULL) 299 return (NULL); 300 301 res = (bus_dma_tag_t)ctx->tag; 302 return (res); 303 } 304 305 bool 306 bus_dma_iommu_set_buswide(device_t dev) 307 { 308 struct iommu_unit *unit; 309 device_t parent; 310 u_int busno, slot, func; 311 312 parent = device_get_parent(dev); 313 if (device_get_devclass(parent) != devclass_find("pci")) 314 return (false); 315 unit = iommu_find(dev, bootverbose); 316 if (unit == NULL) 317 return (false); 318 busno = pci_get_bus(dev); 319 slot = pci_get_slot(dev); 320 func = pci_get_function(dev); 321 if (slot != 0 || func != 0) { 322 if (bootverbose) { 323 device_printf(dev, 324 "iommu%d pci%d:%d:%d requested buswide busdma\n", 325 unit->unit, busno, slot, func); 326 } 327 return (false); 328 } 329 iommu_set_buswide_ctx(unit, busno); 330 return (true); 331 } 332 333 void 334 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 335 { 336 337 MPASS(busno <= PCI_BUSMAX); 338 IOMMU_LOCK(unit); 339 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 340 1 << (busno % (NBBY * sizeof(uint32_t))); 341 IOMMU_UNLOCK(unit); 342 } 343 344 bool 345 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 346 { 347 348 MPASS(busno <= PCI_BUSMAX); 349 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 350 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 351 } 352 353 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 354 355 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 356 struct bus_dmamap_iommu *map); 357 358 static int 359 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 360 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 361 bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, 362 bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) 363 { 364 struct bus_dma_tag_iommu *newtag, *oldtag; 365 int error; 366 367 *dmat = NULL; 368 error = common_bus_dma_tag_create(parent != NULL ? 369 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 370 boundary, lowaddr, highaddr, maxsize, nsegments, maxsegsz, flags, 371 lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu), 372 (void **)&newtag); 373 if (error != 0) 374 goto out; 375 376 oldtag = (struct bus_dma_tag_iommu *)parent; 377 newtag->common.impl = &bus_dma_iommu_impl; 378 newtag->ctx = oldtag->ctx; 379 newtag->owner = oldtag->owner; 380 381 *dmat = (bus_dma_tag_t)newtag; 382 out: 383 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 384 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 385 error); 386 return (error); 387 } 388 389 static int 390 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 391 { 392 393 return (0); 394 } 395 396 static int 397 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 398 { 399 struct bus_dma_tag_iommu *dmat; 400 struct iommu_unit *iommu; 401 struct iommu_ctx *ctx; 402 int error; 403 404 error = 0; 405 dmat = (struct bus_dma_tag_iommu *)dmat1; 406 407 if (dmat != NULL) { 408 if (dmat->map_count != 0) { 409 error = EBUSY; 410 goto out; 411 } 412 ctx = dmat->ctx; 413 if (dmat == ctx->tag) { 414 iommu = ctx->domain->iommu; 415 IOMMU_LOCK(iommu); 416 iommu_free_ctx_locked(iommu, dmat->ctx); 417 } 418 free(dmat->segments, M_IOMMU_DMAMAP); 419 free(dmat, M_DEVBUF); 420 } 421 out: 422 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error); 423 return (error); 424 } 425 426 static bool 427 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 428 { 429 430 return (false); 431 } 432 433 static int 434 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 435 { 436 struct bus_dma_tag_iommu *tag; 437 struct bus_dmamap_iommu *map; 438 439 tag = (struct bus_dma_tag_iommu *)dmat; 440 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 441 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 442 if (map == NULL) { 443 *mapp = NULL; 444 return (ENOMEM); 445 } 446 if (tag->segments == NULL) { 447 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 448 tag->common.nsegments, M_IOMMU_DMAMAP, 449 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 450 if (tag->segments == NULL) { 451 free(map, M_IOMMU_DMAMAP); 452 *mapp = NULL; 453 return (ENOMEM); 454 } 455 } 456 IOMMU_DMAMAP_INIT(map); 457 TAILQ_INIT(&map->map_entries); 458 map->tag = tag; 459 map->locked = true; 460 map->cansleep = false; 461 tag->map_count++; 462 *mapp = (bus_dmamap_t)map; 463 464 return (0); 465 } 466 467 static int 468 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 469 { 470 struct bus_dma_tag_iommu *tag; 471 struct bus_dmamap_iommu *map; 472 473 tag = (struct bus_dma_tag_iommu *)dmat; 474 map = (struct bus_dmamap_iommu *)map1; 475 if (map != NULL) { 476 IOMMU_DMAMAP_LOCK(map); 477 if (!TAILQ_EMPTY(&map->map_entries)) { 478 IOMMU_DMAMAP_UNLOCK(map); 479 return (EBUSY); 480 } 481 IOMMU_DMAMAP_DESTROY(map); 482 free(map, M_IOMMU_DMAMAP); 483 } 484 tag->map_count--; 485 return (0); 486 } 487 488 489 static int 490 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 491 bus_dmamap_t *mapp) 492 { 493 struct bus_dma_tag_iommu *tag; 494 struct bus_dmamap_iommu *map; 495 int error, mflags; 496 vm_memattr_t attr; 497 498 error = iommu_bus_dmamap_create(dmat, flags, mapp); 499 if (error != 0) 500 return (error); 501 502 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 503 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 504 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 505 VM_MEMATTR_DEFAULT; 506 507 tag = (struct bus_dma_tag_iommu *)dmat; 508 map = (struct bus_dmamap_iommu *)*mapp; 509 510 if (tag->common.maxsize < PAGE_SIZE && 511 tag->common.alignment <= tag->common.maxsize && 512 attr == VM_MEMATTR_DEFAULT) { 513 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 514 DOMAINSET_PREF(tag->common.domain), mflags); 515 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 516 } else { 517 *vaddr = kmem_alloc_attr_domainset( 518 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 519 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 520 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 521 } 522 if (*vaddr == NULL) { 523 iommu_bus_dmamap_destroy(dmat, *mapp); 524 *mapp = NULL; 525 return (ENOMEM); 526 } 527 return (0); 528 } 529 530 static void 531 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 532 { 533 struct bus_dma_tag_iommu *tag; 534 struct bus_dmamap_iommu *map; 535 536 tag = (struct bus_dma_tag_iommu *)dmat; 537 map = (struct bus_dmamap_iommu *)map1; 538 539 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 540 free(vaddr, M_DEVBUF); 541 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 542 } else { 543 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 544 ("iommu_bus_dmamem_free for non alloced map %p", map)); 545 kmem_free(vaddr, tag->common.maxsize); 546 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 547 } 548 549 iommu_bus_dmamap_destroy(dmat, map1); 550 } 551 552 static int 553 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 554 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 555 int flags, bus_dma_segment_t *segs, int *segp, 556 struct iommu_map_entries_tailq *entries) 557 { 558 struct iommu_ctx *ctx; 559 struct iommu_domain *domain; 560 struct iommu_map_entry *entry; 561 bus_size_t buflen1; 562 int error, e_flags, idx, gas_flags, seg; 563 564 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 565 if (segs == NULL) 566 segs = tag->segments; 567 ctx = tag->ctx; 568 domain = ctx->domain; 569 e_flags = IOMMU_MAP_ENTRY_READ | 570 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0); 571 seg = *segp; 572 error = 0; 573 idx = 0; 574 while (buflen > 0) { 575 seg++; 576 if (seg >= tag->common.nsegments) { 577 error = EFBIG; 578 break; 579 } 580 buflen1 = buflen > tag->common.maxsegsz ? 581 tag->common.maxsegsz : buflen; 582 583 /* 584 * (Too) optimistically allow split if there are more 585 * then one segments left. 586 */ 587 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 588 if (seg + 1 < tag->common.nsegments) 589 gas_flags |= IOMMU_MF_CANSPLIT; 590 591 error = iommu_gas_map(domain, &tag->common, buflen1, 592 offset, e_flags, gas_flags, ma + idx, &entry); 593 if (error != 0) 594 break; 595 /* Update buflen1 in case buffer split. */ 596 if (buflen1 > entry->end - entry->start - offset) 597 buflen1 = entry->end - entry->start - offset; 598 599 KASSERT(vm_addr_align_ok(entry->start + offset, 600 tag->common.alignment), 601 ("alignment failed: ctx %p start 0x%jx offset %x " 602 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 603 (uintmax_t)tag->common.alignment)); 604 KASSERT(entry->end <= tag->common.lowaddr || 605 entry->start >= tag->common.highaddr, 606 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 607 "lowaddr 0x%jx highaddr 0x%jx", ctx, 608 (uintmax_t)entry->start, (uintmax_t)entry->end, 609 (uintmax_t)tag->common.lowaddr, 610 (uintmax_t)tag->common.highaddr)); 611 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, 612 tag->common.boundary), 613 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 614 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 615 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 616 KASSERT(buflen1 <= tag->common.maxsegsz, 617 ("segment too large: ctx %p start 0x%jx end 0x%jx " 618 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 619 (uintmax_t)entry->start, (uintmax_t)entry->end, 620 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 621 622 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 623 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); 624 TAILQ_INSERT_TAIL(entries, entry, dmamap_link); 625 626 segs[seg].ds_addr = entry->start + offset; 627 segs[seg].ds_len = buflen1; 628 629 idx += OFF_TO_IDX(offset + buflen1); 630 offset += buflen1; 631 offset &= IOMMU_PAGE_MASK; 632 buflen -= buflen1; 633 } 634 if (error == 0) 635 *segp = seg; 636 return (error); 637 } 638 639 static int 640 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 641 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 642 int flags, bus_dma_segment_t *segs, int *segp) 643 { 644 struct iommu_ctx *ctx; 645 struct iommu_domain *domain; 646 struct iommu_map_entries_tailq entries; 647 int error; 648 649 ctx = tag->ctx; 650 domain = ctx->domain; 651 atomic_add_long(&ctx->loads, 1); 652 653 TAILQ_INIT(&entries); 654 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 655 buflen, flags, segs, segp, &entries); 656 if (error == 0) { 657 IOMMU_DMAMAP_LOCK(map); 658 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link); 659 IOMMU_DMAMAP_UNLOCK(map); 660 } else if (!TAILQ_EMPTY(&entries)) { 661 /* 662 * The busdma interface does not allow us to report 663 * partial buffer load, so unfortunately we have to 664 * revert all work done. 665 */ 666 IOMMU_DOMAIN_LOCK(domain); 667 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 668 IOMMU_DOMAIN_UNLOCK(domain); 669 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 670 &domain->unload_task); 671 } 672 673 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 674 !map->cansleep) 675 error = EINPROGRESS; 676 if (error == EINPROGRESS) 677 iommu_bus_schedule_dmamap(domain->iommu, map); 678 return (error); 679 } 680 681 static int 682 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 683 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 684 bus_dma_segment_t *segs, int *segp) 685 { 686 struct bus_dma_tag_iommu *tag; 687 struct bus_dmamap_iommu *map; 688 689 tag = (struct bus_dma_tag_iommu *)dmat; 690 map = (struct bus_dmamap_iommu *)map1; 691 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 692 flags, segs, segp)); 693 } 694 695 static int 696 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 697 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 698 int *segp) 699 { 700 struct bus_dma_tag_iommu *tag; 701 struct bus_dmamap_iommu *map; 702 vm_page_t *ma, fma; 703 vm_paddr_t pstart, pend, paddr; 704 int error, i, ma_cnt, mflags, offset; 705 706 tag = (struct bus_dma_tag_iommu *)dmat; 707 map = (struct bus_dmamap_iommu *)map1; 708 pstart = trunc_page(buf); 709 pend = round_page(buf + buflen); 710 offset = buf & PAGE_MASK; 711 ma_cnt = OFF_TO_IDX(pend - pstart); 712 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 713 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 714 if (ma == NULL) 715 return (ENOMEM); 716 fma = NULL; 717 for (i = 0; i < ma_cnt; i++) { 718 paddr = pstart + ptoa(i); 719 ma[i] = PHYS_TO_VM_PAGE(paddr); 720 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 721 /* 722 * If PHYS_TO_VM_PAGE() returned NULL or the 723 * vm_page was not initialized we'll use a 724 * fake page. 725 */ 726 if (fma == NULL) { 727 fma = malloc(sizeof(struct vm_page) * ma_cnt, 728 M_DEVBUF, M_ZERO | mflags); 729 if (fma == NULL) { 730 free(ma, M_DEVBUF); 731 return (ENOMEM); 732 } 733 } 734 vm_page_initfake(&fma[i], pstart + ptoa(i), 735 VM_MEMATTR_DEFAULT); 736 ma[i] = &fma[i]; 737 } 738 } 739 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 740 flags, segs, segp); 741 free(fma, M_DEVBUF); 742 free(ma, M_DEVBUF); 743 return (error); 744 } 745 746 static int 747 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 748 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 749 int *segp) 750 { 751 struct bus_dma_tag_iommu *tag; 752 struct bus_dmamap_iommu *map; 753 vm_page_t *ma, fma; 754 vm_paddr_t pstart, pend, paddr; 755 int error, i, ma_cnt, mflags, offset; 756 757 tag = (struct bus_dma_tag_iommu *)dmat; 758 map = (struct bus_dmamap_iommu *)map1; 759 pstart = trunc_page((vm_offset_t)buf); 760 pend = round_page((vm_offset_t)buf + buflen); 761 offset = (vm_offset_t)buf & PAGE_MASK; 762 ma_cnt = OFF_TO_IDX(pend - pstart); 763 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 764 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 765 if (ma == NULL) 766 return (ENOMEM); 767 fma = NULL; 768 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 769 if (pmap == kernel_pmap) 770 paddr = pmap_kextract(pstart); 771 else 772 paddr = pmap_extract(pmap, pstart); 773 ma[i] = PHYS_TO_VM_PAGE(paddr); 774 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 775 /* 776 * If PHYS_TO_VM_PAGE() returned NULL or the 777 * vm_page was not initialized we'll use a 778 * fake page. 779 */ 780 if (fma == NULL) { 781 fma = malloc(sizeof(struct vm_page) * ma_cnt, 782 M_DEVBUF, M_ZERO | mflags); 783 if (fma == NULL) { 784 free(ma, M_DEVBUF); 785 return (ENOMEM); 786 } 787 } 788 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 789 ma[i] = &fma[i]; 790 } 791 } 792 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 793 flags, segs, segp); 794 free(ma, M_DEVBUF); 795 free(fma, M_DEVBUF); 796 return (error); 797 } 798 799 static void 800 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 801 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 802 { 803 struct bus_dmamap_iommu *map; 804 805 if (map1 == NULL) 806 return; 807 map = (struct bus_dmamap_iommu *)map1; 808 map->mem = *mem; 809 map->tag = (struct bus_dma_tag_iommu *)dmat; 810 map->callback = callback; 811 map->callback_arg = callback_arg; 812 } 813 814 static bus_dma_segment_t * 815 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 816 bus_dma_segment_t *segs, int nsegs, int error) 817 { 818 struct bus_dma_tag_iommu *tag; 819 struct bus_dmamap_iommu *map; 820 821 tag = (struct bus_dma_tag_iommu *)dmat; 822 map = (struct bus_dmamap_iommu *)map1; 823 824 if (!map->locked) { 825 KASSERT(map->cansleep, 826 ("map not locked and not sleepable context %p", map)); 827 828 /* 829 * We are called from the delayed context. Relock the 830 * driver. 831 */ 832 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 833 map->locked = true; 834 } 835 836 if (segs == NULL) 837 segs = tag->segments; 838 return (segs); 839 } 840 841 /* 842 * The limitations of busdma KPI forces the iommu to perform the actual 843 * unload, consisting of the unmapping of the map entries page tables, 844 * from the delayed context on i386, since page table page mapping 845 * might require a sleep to be successfull. The unfortunate 846 * consequence is that the DMA requests can be served some time after 847 * the bus_dmamap_unload() call returned. 848 * 849 * On amd64, we assume that sf allocation cannot fail. 850 */ 851 static void 852 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 853 { 854 struct bus_dma_tag_iommu *tag; 855 struct bus_dmamap_iommu *map; 856 struct iommu_ctx *ctx; 857 struct iommu_domain *domain; 858 struct iommu_map_entries_tailq entries; 859 860 tag = (struct bus_dma_tag_iommu *)dmat; 861 map = (struct bus_dmamap_iommu *)map1; 862 ctx = tag->ctx; 863 domain = ctx->domain; 864 atomic_add_long(&ctx->unloads, 1); 865 866 TAILQ_INIT(&entries); 867 IOMMU_DMAMAP_LOCK(map); 868 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 869 IOMMU_DMAMAP_UNLOCK(map); 870 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) 871 IOMMU_DOMAIN_LOCK(domain); 872 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 873 IOMMU_DOMAIN_UNLOCK(domain); 874 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 875 &domain->unload_task); 876 #else 877 THREAD_NO_SLEEPING(); 878 iommu_domain_unload(domain, &entries, false); 879 THREAD_SLEEPING_OK(); 880 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 881 #endif 882 } 883 884 static void 885 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, 886 bus_dmasync_op_t op) 887 { 888 struct bus_dmamap_iommu *map __unused; 889 890 map = (struct bus_dmamap_iommu *)map1; 891 kmsan_bus_dmamap_sync(&map->kmsan_mem, op); 892 } 893 894 #ifdef KMSAN 895 static void 896 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) 897 { 898 struct bus_dmamap_iommu *map; 899 900 map = (struct bus_dmamap_iommu *)map1; 901 if (map == NULL) 902 return; 903 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); 904 } 905 #endif 906 907 struct bus_dma_impl bus_dma_iommu_impl = { 908 .tag_create = iommu_bus_dma_tag_create, 909 .tag_destroy = iommu_bus_dma_tag_destroy, 910 .tag_set_domain = iommu_bus_dma_tag_set_domain, 911 .id_mapped = iommu_bus_dma_id_mapped, 912 .map_create = iommu_bus_dmamap_create, 913 .map_destroy = iommu_bus_dmamap_destroy, 914 .mem_alloc = iommu_bus_dmamem_alloc, 915 .mem_free = iommu_bus_dmamem_free, 916 .load_phys = iommu_bus_dmamap_load_phys, 917 .load_buffer = iommu_bus_dmamap_load_buffer, 918 .load_ma = iommu_bus_dmamap_load_ma, 919 .map_waitok = iommu_bus_dmamap_waitok, 920 .map_complete = iommu_bus_dmamap_complete, 921 .map_unload = iommu_bus_dmamap_unload, 922 .map_sync = iommu_bus_dmamap_sync, 923 #ifdef KMSAN 924 .load_kmsan = iommu_bus_dmamap_load_kmsan, 925 #endif 926 }; 927 928 static void 929 iommu_bus_task_dmamap(void *arg, int pending) 930 { 931 struct bus_dma_tag_iommu *tag; 932 struct bus_dmamap_iommu *map; 933 struct iommu_unit *unit; 934 935 unit = arg; 936 IOMMU_LOCK(unit); 937 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 938 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 939 IOMMU_UNLOCK(unit); 940 tag = map->tag; 941 map->cansleep = true; 942 map->locked = false; 943 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 944 &map->mem, map->callback, map->callback_arg, 945 BUS_DMA_WAITOK); 946 map->cansleep = false; 947 if (map->locked) { 948 (tag->common.lockfunc)(tag->common.lockfuncarg, 949 BUS_DMA_UNLOCK); 950 } else 951 map->locked = true; 952 map->cansleep = false; 953 IOMMU_LOCK(unit); 954 } 955 IOMMU_UNLOCK(unit); 956 } 957 958 static void 959 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 960 { 961 962 map->locked = false; 963 IOMMU_LOCK(unit); 964 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 965 IOMMU_UNLOCK(unit); 966 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 967 } 968 969 int 970 iommu_init_busdma(struct iommu_unit *unit) 971 { 972 int error; 973 974 unit->dma_enabled = 0; 975 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 976 if (error == 0) /* compatibility */ 977 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 978 SYSCTL_ADD_INT(&unit->sysctl_ctx, 979 SYSCTL_CHILDREN(device_get_sysctl_tree(unit->dev)), 980 OID_AUTO, "dma", CTLFLAG_RD, &unit->dma_enabled, 0, 981 "DMA ops enabled"); 982 TAILQ_INIT(&unit->delayed_maps); 983 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 984 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 985 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 986 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 987 "iommu%d busdma taskq", unit->unit); 988 return (0); 989 } 990 991 void 992 iommu_fini_busdma(struct iommu_unit *unit) 993 { 994 995 if (unit->delayed_taskqueue == NULL) 996 return; 997 998 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 999 taskqueue_free(unit->delayed_taskqueue); 1000 unit->delayed_taskqueue = NULL; 1001 } 1002 1003 int 1004 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 1005 vm_paddr_t start, vm_size_t length, int flags) 1006 { 1007 struct bus_dma_tag_common *tc; 1008 struct bus_dma_tag_iommu *tag; 1009 struct bus_dmamap_iommu *map; 1010 struct iommu_ctx *ctx; 1011 struct iommu_domain *domain; 1012 struct iommu_map_entry *entry; 1013 vm_page_t *ma; 1014 vm_size_t i; 1015 int error; 1016 bool waitok; 1017 1018 MPASS((start & PAGE_MASK) == 0); 1019 MPASS((length & PAGE_MASK) == 0); 1020 MPASS(length > 0); 1021 MPASS(start + length >= start); 1022 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1023 1024 tc = (struct bus_dma_tag_common *)dmat; 1025 if (tc->impl != &bus_dma_iommu_impl) 1026 return (0); 1027 1028 tag = (struct bus_dma_tag_iommu *)dmat; 1029 ctx = tag->ctx; 1030 domain = ctx->domain; 1031 map = (struct bus_dmamap_iommu *)map1; 1032 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1033 1034 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1035 if (entry == NULL) 1036 return (ENOMEM); 1037 entry->start = start; 1038 entry->end = start + length; 1039 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1040 M_WAITOK : M_NOWAIT); 1041 if (ma == NULL) { 1042 iommu_gas_free_entry(entry); 1043 return (ENOMEM); 1044 } 1045 for (i = 0; i < atop(length); i++) { 1046 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1047 VM_MEMATTR_DEFAULT); 1048 } 1049 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1050 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) | 1051 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma); 1052 if (error == 0) { 1053 IOMMU_DMAMAP_LOCK(map); 1054 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1055 IOMMU_DMAMAP_UNLOCK(map); 1056 } else { 1057 iommu_gas_free_entry(entry); 1058 } 1059 for (i = 0; i < atop(length); i++) 1060 vm_page_putfake(ma[i]); 1061 free(ma, M_TEMP); 1062 return (error); 1063 } 1064 1065 static void 1066 iommu_domain_unload_task(void *arg, int pending) 1067 { 1068 struct iommu_domain *domain; 1069 struct iommu_map_entries_tailq entries; 1070 1071 domain = arg; 1072 TAILQ_INIT(&entries); 1073 1074 for (;;) { 1075 IOMMU_DOMAIN_LOCK(domain); 1076 TAILQ_SWAP(&domain->unload_entries, &entries, 1077 iommu_map_entry, dmamap_link); 1078 IOMMU_DOMAIN_UNLOCK(domain); 1079 if (TAILQ_EMPTY(&entries)) 1080 break; 1081 iommu_domain_unload(domain, &entries, true); 1082 } 1083 } 1084 1085 void 1086 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, 1087 const struct iommu_domain_map_ops *ops) 1088 { 1089 1090 domain->ops = ops; 1091 domain->iommu = unit; 1092 1093 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); 1094 RB_INIT(&domain->rb_root); 1095 TAILQ_INIT(&domain->unload_entries); 1096 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); 1097 } 1098 1099 void 1100 iommu_domain_fini(struct iommu_domain *domain) 1101 { 1102 1103 mtx_destroy(&domain->lock); 1104 } 1105