1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/domainset.h> 34 #include <sys/malloc.h> 35 #include <sys/bus.h> 36 #include <sys/conf.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/ktr.h> 40 #include <sys/lock.h> 41 #include <sys/proc.h> 42 #include <sys/memdesc.h> 43 #include <sys/msan.h> 44 #include <sys/mutex.h> 45 #include <sys/sysctl.h> 46 #include <sys/rman.h> 47 #include <sys/taskqueue.h> 48 #include <sys/tree.h> 49 #include <sys/uio.h> 50 #include <sys/vmem.h> 51 #include <dev/pci/pcireg.h> 52 #include <dev/pci/pcivar.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_map.h> 59 #include <dev/iommu/iommu.h> 60 #include <machine/atomic.h> 61 #include <machine/bus.h> 62 #include <machine/md_var.h> 63 #include <machine/iommu.h> 64 #include <dev/iommu/busdma_iommu.h> 65 66 /* 67 * busdma_iommu.c, the implementation of the busdma(9) interface using 68 * IOMMU units from Intel VT-d. 69 */ 70 71 static bool 72 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 73 { 74 char str[128], *env; 75 int default_bounce; 76 bool ret; 77 static const char bounce_str[] = "bounce"; 78 static const char iommu_str[] = "iommu"; 79 static const char dmar_str[] = "dmar"; /* compatibility */ 80 81 default_bounce = 0; 82 env = kern_getenv("hw.busdma.default"); 83 if (env != NULL) { 84 if (strcmp(env, bounce_str) == 0) 85 default_bounce = 1; 86 else if (strcmp(env, iommu_str) == 0 || 87 strcmp(env, dmar_str) == 0) 88 default_bounce = 0; 89 freeenv(env); 90 } 91 92 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 93 domain, bus, slot, func); 94 env = kern_getenv(str); 95 if (env == NULL) 96 return (default_bounce != 0); 97 if (strcmp(env, bounce_str) == 0) 98 ret = true; 99 else if (strcmp(env, iommu_str) == 0 || 100 strcmp(env, dmar_str) == 0) 101 ret = false; 102 else 103 ret = default_bounce != 0; 104 freeenv(env); 105 return (ret); 106 } 107 108 /* 109 * Given original device, find the requester ID that will be seen by 110 * the IOMMU unit and used for page table lookup. PCI bridges may take 111 * ownership of transactions from downstream devices, so it may not be 112 * the same as the BSF of the target device. In those cases, all 113 * devices downstream of the bridge must share a single mapping 114 * domain, and must collectively be assigned to use either IOMMU or 115 * bounce mapping. 116 */ 117 device_t 118 iommu_get_requester(device_t dev, uint16_t *rid) 119 { 120 devclass_t pci_class; 121 device_t l, pci, pcib, pcip, pcibp, requester; 122 int cap_offset; 123 uint16_t pcie_flags; 124 bool bridge_is_pcie; 125 126 pci_class = devclass_find("pci"); 127 l = requester = dev; 128 129 *rid = pci_get_rid(dev); 130 131 /* 132 * Walk the bridge hierarchy from the target device to the 133 * host port to find the translating bridge nearest the IOMMU 134 * unit. 135 */ 136 for (;;) { 137 pci = device_get_parent(l); 138 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " 139 "for %s", device_get_name(dev), device_get_name(l))); 140 KASSERT(device_get_devclass(pci) == pci_class, 141 ("iommu_get_requester(%s): non-pci parent %s for %s", 142 device_get_name(dev), device_get_name(pci), 143 device_get_name(l))); 144 145 pcib = device_get_parent(pci); 146 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " 147 "for %s", device_get_name(dev), device_get_name(pci))); 148 149 /* 150 * The parent of our "bridge" isn't another PCI bus, 151 * so pcib isn't a PCI->PCI bridge but rather a host 152 * port, and the requester ID won't be translated 153 * further. 154 */ 155 pcip = device_get_parent(pcib); 156 if (device_get_devclass(pcip) != pci_class) 157 break; 158 pcibp = device_get_parent(pcip); 159 160 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 161 /* 162 * Do not stop the loop even if the target 163 * device is PCIe, because it is possible (but 164 * unlikely) to have a PCI->PCIe bridge 165 * somewhere in the hierarchy. 166 */ 167 l = pcib; 168 } else { 169 /* 170 * Device is not PCIe, it cannot be seen as a 171 * requester by IOMMU unit. Check whether the 172 * bridge is PCIe. 173 */ 174 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 175 &cap_offset) == 0; 176 requester = pcib; 177 178 /* 179 * Check for a buggy PCIe/PCI bridge that 180 * doesn't report the express capability. If 181 * the bridge above it is express but isn't a 182 * PCI bridge, then we know pcib is actually a 183 * PCIe/PCI bridge. 184 */ 185 if (!bridge_is_pcie && pci_find_cap(pcibp, 186 PCIY_EXPRESS, &cap_offset) == 0) { 187 pcie_flags = pci_read_config(pcibp, 188 cap_offset + PCIER_FLAGS, 2); 189 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 190 PCIEM_TYPE_PCI_BRIDGE) 191 bridge_is_pcie = true; 192 } 193 194 if (bridge_is_pcie) { 195 /* 196 * The current device is not PCIe, but 197 * the bridge above it is. This is a 198 * PCIe->PCI bridge. Assume that the 199 * requester ID will be the secondary 200 * bus number with slot and function 201 * set to zero. 202 * 203 * XXX: Doesn't handle the case where 204 * the bridge is PCIe->PCI-X, and the 205 * bridge will only take ownership of 206 * requests in some cases. We should 207 * provide context entries with the 208 * same page tables for taken and 209 * non-taken transactions. 210 */ 211 *rid = PCI_RID(pci_get_bus(l), 0, 0); 212 l = pcibp; 213 } else { 214 /* 215 * Neither the device nor the bridge 216 * above it are PCIe. This is a 217 * conventional PCI->PCI bridge, which 218 * will use the bridge's BSF as the 219 * requester ID. 220 */ 221 *rid = pci_get_rid(pcib); 222 l = pcib; 223 } 224 } 225 } 226 return (requester); 227 } 228 229 struct iommu_ctx * 230 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 231 { 232 device_t requester; 233 struct iommu_ctx *ctx; 234 bool disabled; 235 uint16_t rid; 236 237 requester = iommu_get_requester(dev, &rid); 238 239 /* 240 * If the user requested the IOMMU disabled for the device, we 241 * cannot disable the IOMMU unit, due to possibility of other 242 * devices on the same IOMMU unit still requiring translation. 243 * Instead provide the identity mapping for the device 244 * context. 245 */ 246 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 247 pci_get_bus(requester), pci_get_slot(requester), 248 pci_get_function(requester)); 249 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 250 if (ctx == NULL) 251 return (NULL); 252 if (disabled) { 253 /* 254 * Keep the first reference on context, release the 255 * later refs. 256 */ 257 IOMMU_LOCK(unit); 258 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 259 ctx->flags |= IOMMU_CTX_DISABLED; 260 IOMMU_UNLOCK(unit); 261 } else { 262 iommu_free_ctx_locked(unit, ctx); 263 } 264 ctx = NULL; 265 } 266 return (ctx); 267 } 268 269 struct iommu_ctx * 270 iommu_get_dev_ctx(device_t dev) 271 { 272 struct iommu_unit *unit; 273 274 unit = iommu_find(dev, bootverbose); 275 /* Not in scope of any IOMMU ? */ 276 if (unit == NULL) 277 return (NULL); 278 if (!unit->dma_enabled) 279 return (NULL); 280 281 iommu_unit_pre_instantiate_ctx(unit); 282 return (iommu_instantiate_ctx(unit, dev, false)); 283 } 284 285 bus_dma_tag_t 286 iommu_get_dma_tag(device_t dev, device_t child) 287 { 288 struct iommu_ctx *ctx; 289 bus_dma_tag_t res; 290 291 ctx = iommu_get_dev_ctx(child); 292 if (ctx == NULL) 293 return (NULL); 294 295 res = (bus_dma_tag_t)ctx->tag; 296 return (res); 297 } 298 299 bool 300 bus_dma_iommu_set_buswide(device_t dev) 301 { 302 struct iommu_unit *unit; 303 device_t parent; 304 u_int busno, slot, func; 305 306 parent = device_get_parent(dev); 307 if (device_get_devclass(parent) != devclass_find("pci")) 308 return (false); 309 unit = iommu_find(dev, bootverbose); 310 if (unit == NULL) 311 return (false); 312 busno = pci_get_bus(dev); 313 slot = pci_get_slot(dev); 314 func = pci_get_function(dev); 315 if (slot != 0 || func != 0) { 316 if (bootverbose) { 317 device_printf(dev, 318 "iommu%d pci%d:%d:%d requested buswide busdma\n", 319 unit->unit, busno, slot, func); 320 } 321 return (false); 322 } 323 iommu_set_buswide_ctx(unit, busno); 324 return (true); 325 } 326 327 void 328 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 329 { 330 331 MPASS(busno <= PCI_BUSMAX); 332 IOMMU_LOCK(unit); 333 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 334 1 << (busno % (NBBY * sizeof(uint32_t))); 335 IOMMU_UNLOCK(unit); 336 } 337 338 bool 339 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 340 { 341 342 MPASS(busno <= PCI_BUSMAX); 343 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 344 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 345 } 346 347 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 348 349 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 350 struct bus_dmamap_iommu *map); 351 352 static int 353 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 354 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 355 bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, 356 bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) 357 { 358 struct bus_dma_tag_iommu *newtag, *oldtag; 359 int error; 360 361 *dmat = NULL; 362 error = common_bus_dma_tag_create(parent != NULL ? 363 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 364 boundary, lowaddr, highaddr, maxsize, nsegments, maxsegsz, flags, 365 lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu), 366 (void **)&newtag); 367 if (error != 0) 368 goto out; 369 370 oldtag = (struct bus_dma_tag_iommu *)parent; 371 newtag->common.impl = &bus_dma_iommu_impl; 372 newtag->ctx = oldtag->ctx; 373 newtag->owner = oldtag->owner; 374 375 *dmat = (bus_dma_tag_t)newtag; 376 out: 377 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 378 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 379 error); 380 return (error); 381 } 382 383 static int 384 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 385 { 386 387 return (0); 388 } 389 390 static int 391 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 392 { 393 struct bus_dma_tag_iommu *dmat; 394 int error; 395 396 error = 0; 397 dmat = (struct bus_dma_tag_iommu *)dmat1; 398 399 if (dmat != NULL) { 400 if (dmat->map_count != 0) { 401 error = EBUSY; 402 goto out; 403 } 404 if (dmat == dmat->ctx->tag) 405 iommu_free_ctx(dmat->ctx); 406 free(dmat->segments, M_IOMMU_DMAMAP); 407 free(dmat, M_DEVBUF); 408 } 409 out: 410 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error); 411 return (error); 412 } 413 414 static bool 415 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 416 { 417 418 return (false); 419 } 420 421 static int 422 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 423 { 424 struct bus_dma_tag_iommu *tag; 425 struct bus_dmamap_iommu *map; 426 427 tag = (struct bus_dma_tag_iommu *)dmat; 428 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 429 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 430 if (map == NULL) { 431 *mapp = NULL; 432 return (ENOMEM); 433 } 434 if (tag->segments == NULL) { 435 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 436 tag->common.nsegments, M_IOMMU_DMAMAP, 437 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 438 if (tag->segments == NULL) { 439 free(map, M_IOMMU_DMAMAP); 440 *mapp = NULL; 441 return (ENOMEM); 442 } 443 } 444 IOMMU_DMAMAP_INIT(map); 445 TAILQ_INIT(&map->map_entries); 446 map->tag = tag; 447 map->locked = true; 448 map->cansleep = false; 449 tag->map_count++; 450 *mapp = (bus_dmamap_t)map; 451 452 return (0); 453 } 454 455 static int 456 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 457 { 458 struct bus_dma_tag_iommu *tag; 459 struct bus_dmamap_iommu *map; 460 461 tag = (struct bus_dma_tag_iommu *)dmat; 462 map = (struct bus_dmamap_iommu *)map1; 463 if (map != NULL) { 464 IOMMU_DMAMAP_LOCK(map); 465 if (!TAILQ_EMPTY(&map->map_entries)) { 466 IOMMU_DMAMAP_UNLOCK(map); 467 return (EBUSY); 468 } 469 IOMMU_DMAMAP_DESTROY(map); 470 free(map, M_IOMMU_DMAMAP); 471 } 472 tag->map_count--; 473 return (0); 474 } 475 476 477 static int 478 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 479 bus_dmamap_t *mapp) 480 { 481 struct bus_dma_tag_iommu *tag; 482 struct bus_dmamap_iommu *map; 483 int error, mflags; 484 vm_memattr_t attr; 485 486 error = iommu_bus_dmamap_create(dmat, flags, mapp); 487 if (error != 0) 488 return (error); 489 490 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 491 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 492 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 493 VM_MEMATTR_DEFAULT; 494 495 tag = (struct bus_dma_tag_iommu *)dmat; 496 map = (struct bus_dmamap_iommu *)*mapp; 497 498 if (tag->common.maxsize < PAGE_SIZE && 499 tag->common.alignment <= tag->common.maxsize && 500 attr == VM_MEMATTR_DEFAULT) { 501 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 502 DOMAINSET_PREF(tag->common.domain), mflags); 503 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 504 } else { 505 *vaddr = kmem_alloc_attr_domainset( 506 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 507 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 508 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 509 } 510 if (*vaddr == NULL) { 511 iommu_bus_dmamap_destroy(dmat, *mapp); 512 *mapp = NULL; 513 return (ENOMEM); 514 } 515 return (0); 516 } 517 518 static void 519 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 520 { 521 struct bus_dma_tag_iommu *tag; 522 struct bus_dmamap_iommu *map; 523 524 tag = (struct bus_dma_tag_iommu *)dmat; 525 map = (struct bus_dmamap_iommu *)map1; 526 527 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 528 free(vaddr, M_DEVBUF); 529 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 530 } else { 531 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 532 ("iommu_bus_dmamem_free for non alloced map %p", map)); 533 kmem_free(vaddr, tag->common.maxsize); 534 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 535 } 536 537 iommu_bus_dmamap_destroy(dmat, map1); 538 } 539 540 static int 541 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 542 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 543 int flags, bus_dma_segment_t *segs, int *segp, 544 struct iommu_map_entries_tailq *entries) 545 { 546 struct iommu_ctx *ctx; 547 struct iommu_domain *domain; 548 struct iommu_map_entry *entry; 549 bus_size_t buflen1; 550 int error, e_flags, idx, gas_flags, seg; 551 552 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 553 if (segs == NULL) 554 segs = tag->segments; 555 ctx = tag->ctx; 556 domain = ctx->domain; 557 e_flags = IOMMU_MAP_ENTRY_READ | 558 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0); 559 seg = *segp; 560 error = 0; 561 idx = 0; 562 while (buflen > 0) { 563 seg++; 564 if (seg >= tag->common.nsegments) { 565 error = EFBIG; 566 break; 567 } 568 buflen1 = buflen > tag->common.maxsegsz ? 569 tag->common.maxsegsz : buflen; 570 571 /* 572 * (Too) optimistically allow split if there are more 573 * then one segments left. 574 */ 575 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 576 if (seg + 1 < tag->common.nsegments) 577 gas_flags |= IOMMU_MF_CANSPLIT; 578 579 error = iommu_gas_map(domain, &tag->common, buflen1, 580 offset, e_flags, gas_flags, ma + idx, &entry); 581 if (error != 0) 582 break; 583 /* Update buflen1 in case buffer split. */ 584 if (buflen1 > entry->end - entry->start - offset) 585 buflen1 = entry->end - entry->start - offset; 586 587 KASSERT(vm_addr_align_ok(entry->start + offset, 588 tag->common.alignment), 589 ("alignment failed: ctx %p start 0x%jx offset %x " 590 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 591 (uintmax_t)tag->common.alignment)); 592 KASSERT(entry->end <= tag->common.lowaddr || 593 entry->start >= tag->common.highaddr, 594 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 595 "lowaddr 0x%jx highaddr 0x%jx", ctx, 596 (uintmax_t)entry->start, (uintmax_t)entry->end, 597 (uintmax_t)tag->common.lowaddr, 598 (uintmax_t)tag->common.highaddr)); 599 KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, 600 tag->common.boundary), 601 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 602 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 603 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 604 KASSERT(buflen1 <= tag->common.maxsegsz, 605 ("segment too large: ctx %p start 0x%jx end 0x%jx " 606 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 607 (uintmax_t)entry->start, (uintmax_t)entry->end, 608 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 609 610 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 611 ("entry %p missing IOMMU_MAP_ENTRY_MAP", entry)); 612 TAILQ_INSERT_TAIL(entries, entry, dmamap_link); 613 614 segs[seg].ds_addr = entry->start + offset; 615 segs[seg].ds_len = buflen1; 616 617 idx += OFF_TO_IDX(offset + buflen1); 618 offset += buflen1; 619 offset &= IOMMU_PAGE_MASK; 620 buflen -= buflen1; 621 } 622 if (error == 0) 623 *segp = seg; 624 return (error); 625 } 626 627 static int 628 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 629 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 630 int flags, bus_dma_segment_t *segs, int *segp) 631 { 632 struct iommu_ctx *ctx; 633 struct iommu_domain *domain; 634 struct iommu_map_entries_tailq entries; 635 int error; 636 637 ctx = tag->ctx; 638 domain = ctx->domain; 639 atomic_add_long(&ctx->loads, 1); 640 641 TAILQ_INIT(&entries); 642 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 643 buflen, flags, segs, segp, &entries); 644 if (error == 0) { 645 IOMMU_DMAMAP_LOCK(map); 646 TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link); 647 IOMMU_DMAMAP_UNLOCK(map); 648 } else if (!TAILQ_EMPTY(&entries)) { 649 /* 650 * The busdma interface does not allow us to report 651 * partial buffer load, so unfortunately we have to 652 * revert all work done. 653 */ 654 IOMMU_DOMAIN_LOCK(domain); 655 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 656 IOMMU_DOMAIN_UNLOCK(domain); 657 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 658 &domain->unload_task); 659 } 660 661 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 662 !map->cansleep) 663 error = EINPROGRESS; 664 if (error == EINPROGRESS) 665 iommu_bus_schedule_dmamap(domain->iommu, map); 666 return (error); 667 } 668 669 static int 670 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 671 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 672 bus_dma_segment_t *segs, int *segp) 673 { 674 struct bus_dma_tag_iommu *tag; 675 struct bus_dmamap_iommu *map; 676 677 tag = (struct bus_dma_tag_iommu *)dmat; 678 map = (struct bus_dmamap_iommu *)map1; 679 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 680 flags, segs, segp)); 681 } 682 683 static int 684 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 685 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 686 int *segp) 687 { 688 struct bus_dma_tag_iommu *tag; 689 struct bus_dmamap_iommu *map; 690 vm_page_t *ma, fma; 691 vm_paddr_t pstart, pend, paddr; 692 int error, i, ma_cnt, mflags, offset; 693 694 tag = (struct bus_dma_tag_iommu *)dmat; 695 map = (struct bus_dmamap_iommu *)map1; 696 pstart = trunc_page(buf); 697 pend = round_page(buf + buflen); 698 offset = buf & PAGE_MASK; 699 ma_cnt = OFF_TO_IDX(pend - pstart); 700 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 701 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 702 if (ma == NULL) 703 return (ENOMEM); 704 fma = NULL; 705 for (i = 0; i < ma_cnt; i++) { 706 paddr = pstart + ptoa(i); 707 ma[i] = PHYS_TO_VM_PAGE(paddr); 708 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 709 /* 710 * If PHYS_TO_VM_PAGE() returned NULL or the 711 * vm_page was not initialized we'll use a 712 * fake page. 713 */ 714 if (fma == NULL) { 715 fma = malloc(sizeof(struct vm_page) * ma_cnt, 716 M_DEVBUF, M_ZERO | mflags); 717 if (fma == NULL) { 718 free(ma, M_DEVBUF); 719 return (ENOMEM); 720 } 721 } 722 vm_page_initfake(&fma[i], pstart + ptoa(i), 723 VM_MEMATTR_DEFAULT); 724 ma[i] = &fma[i]; 725 } 726 } 727 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 728 flags, segs, segp); 729 free(fma, M_DEVBUF); 730 free(ma, M_DEVBUF); 731 return (error); 732 } 733 734 static int 735 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 736 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 737 int *segp) 738 { 739 struct bus_dma_tag_iommu *tag; 740 struct bus_dmamap_iommu *map; 741 vm_page_t *ma, fma; 742 vm_paddr_t pstart, pend, paddr; 743 int error, i, ma_cnt, mflags, offset; 744 745 tag = (struct bus_dma_tag_iommu *)dmat; 746 map = (struct bus_dmamap_iommu *)map1; 747 pstart = trunc_page((vm_offset_t)buf); 748 pend = round_page((vm_offset_t)buf + buflen); 749 offset = (vm_offset_t)buf & PAGE_MASK; 750 ma_cnt = OFF_TO_IDX(pend - pstart); 751 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 752 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 753 if (ma == NULL) 754 return (ENOMEM); 755 fma = NULL; 756 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 757 if (pmap == kernel_pmap) 758 paddr = pmap_kextract(pstart); 759 else 760 paddr = pmap_extract(pmap, pstart); 761 ma[i] = PHYS_TO_VM_PAGE(paddr); 762 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 763 /* 764 * If PHYS_TO_VM_PAGE() returned NULL or the 765 * vm_page was not initialized we'll use a 766 * fake page. 767 */ 768 if (fma == NULL) { 769 fma = malloc(sizeof(struct vm_page) * ma_cnt, 770 M_DEVBUF, M_ZERO | mflags); 771 if (fma == NULL) { 772 free(ma, M_DEVBUF); 773 return (ENOMEM); 774 } 775 } 776 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 777 ma[i] = &fma[i]; 778 } 779 } 780 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 781 flags, segs, segp); 782 free(ma, M_DEVBUF); 783 free(fma, M_DEVBUF); 784 return (error); 785 } 786 787 static void 788 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 789 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 790 { 791 struct bus_dmamap_iommu *map; 792 793 if (map1 == NULL) 794 return; 795 map = (struct bus_dmamap_iommu *)map1; 796 map->mem = *mem; 797 map->tag = (struct bus_dma_tag_iommu *)dmat; 798 map->callback = callback; 799 map->callback_arg = callback_arg; 800 } 801 802 static bus_dma_segment_t * 803 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 804 bus_dma_segment_t *segs, int nsegs, int error) 805 { 806 struct bus_dma_tag_iommu *tag; 807 struct bus_dmamap_iommu *map; 808 809 tag = (struct bus_dma_tag_iommu *)dmat; 810 map = (struct bus_dmamap_iommu *)map1; 811 812 if (!map->locked) { 813 KASSERT(map->cansleep, 814 ("map not locked and not sleepable context %p", map)); 815 816 /* 817 * We are called from the delayed context. Relock the 818 * driver. 819 */ 820 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 821 map->locked = true; 822 } 823 824 if (segs == NULL) 825 segs = tag->segments; 826 return (segs); 827 } 828 829 /* 830 * The limitations of busdma KPI forces the iommu to perform the actual 831 * unload, consisting of the unmapping of the map entries page tables, 832 * from the delayed context on i386, since page table page mapping 833 * might require a sleep to be successfull. The unfortunate 834 * consequence is that the DMA requests can be served some time after 835 * the bus_dmamap_unload() call returned. 836 * 837 * On amd64, we assume that sf allocation cannot fail. 838 */ 839 static void 840 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 841 { 842 struct bus_dma_tag_iommu *tag; 843 struct bus_dmamap_iommu *map; 844 struct iommu_ctx *ctx; 845 struct iommu_domain *domain; 846 struct iommu_map_entries_tailq entries; 847 848 tag = (struct bus_dma_tag_iommu *)dmat; 849 map = (struct bus_dmamap_iommu *)map1; 850 ctx = tag->ctx; 851 domain = ctx->domain; 852 atomic_add_long(&ctx->unloads, 1); 853 854 TAILQ_INIT(&entries); 855 IOMMU_DMAMAP_LOCK(map); 856 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 857 IOMMU_DMAMAP_UNLOCK(map); 858 #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) 859 IOMMU_DOMAIN_LOCK(domain); 860 TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link); 861 IOMMU_DOMAIN_UNLOCK(domain); 862 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 863 &domain->unload_task); 864 #else 865 THREAD_NO_SLEEPING(); 866 iommu_domain_unload(domain, &entries, false); 867 THREAD_SLEEPING_OK(); 868 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 869 #endif 870 } 871 872 static void 873 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, 874 bus_dmasync_op_t op) 875 { 876 struct bus_dmamap_iommu *map __unused; 877 878 map = (struct bus_dmamap_iommu *)map1; 879 kmsan_bus_dmamap_sync(&map->kmsan_mem, op); 880 } 881 882 #ifdef KMSAN 883 static void 884 iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) 885 { 886 struct bus_dmamap_iommu *map; 887 888 map = (struct bus_dmamap_iommu *)map1; 889 if (map == NULL) 890 return; 891 memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); 892 } 893 #endif 894 895 struct bus_dma_impl bus_dma_iommu_impl = { 896 .tag_create = iommu_bus_dma_tag_create, 897 .tag_destroy = iommu_bus_dma_tag_destroy, 898 .tag_set_domain = iommu_bus_dma_tag_set_domain, 899 .id_mapped = iommu_bus_dma_id_mapped, 900 .map_create = iommu_bus_dmamap_create, 901 .map_destroy = iommu_bus_dmamap_destroy, 902 .mem_alloc = iommu_bus_dmamem_alloc, 903 .mem_free = iommu_bus_dmamem_free, 904 .load_phys = iommu_bus_dmamap_load_phys, 905 .load_buffer = iommu_bus_dmamap_load_buffer, 906 .load_ma = iommu_bus_dmamap_load_ma, 907 .map_waitok = iommu_bus_dmamap_waitok, 908 .map_complete = iommu_bus_dmamap_complete, 909 .map_unload = iommu_bus_dmamap_unload, 910 .map_sync = iommu_bus_dmamap_sync, 911 #ifdef KMSAN 912 .load_kmsan = iommu_bus_dmamap_load_kmsan, 913 #endif 914 }; 915 916 static void 917 iommu_bus_task_dmamap(void *arg, int pending) 918 { 919 struct bus_dma_tag_iommu *tag; 920 struct bus_dmamap_iommu *map; 921 struct iommu_unit *unit; 922 923 unit = arg; 924 IOMMU_LOCK(unit); 925 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 926 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 927 IOMMU_UNLOCK(unit); 928 tag = map->tag; 929 map->cansleep = true; 930 map->locked = false; 931 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 932 &map->mem, map->callback, map->callback_arg, 933 BUS_DMA_WAITOK); 934 map->cansleep = false; 935 if (map->locked) { 936 (tag->common.lockfunc)(tag->common.lockfuncarg, 937 BUS_DMA_UNLOCK); 938 } else 939 map->locked = true; 940 map->cansleep = false; 941 IOMMU_LOCK(unit); 942 } 943 IOMMU_UNLOCK(unit); 944 } 945 946 static void 947 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 948 { 949 950 map->locked = false; 951 IOMMU_LOCK(unit); 952 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 953 IOMMU_UNLOCK(unit); 954 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 955 } 956 957 int 958 iommu_init_busdma(struct iommu_unit *unit) 959 { 960 int error; 961 962 unit->dma_enabled = 0; 963 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 964 if (error == 0) /* compatibility */ 965 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 966 SYSCTL_ADD_INT(&unit->sysctl_ctx, 967 SYSCTL_CHILDREN(device_get_sysctl_tree(unit->dev)), 968 OID_AUTO, "dma", CTLFLAG_RD, &unit->dma_enabled, 0, 969 "DMA ops enabled"); 970 TAILQ_INIT(&unit->delayed_maps); 971 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 972 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 973 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 974 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 975 "iommu%d busdma taskq", unit->unit); 976 return (0); 977 } 978 979 void 980 iommu_fini_busdma(struct iommu_unit *unit) 981 { 982 983 if (unit->delayed_taskqueue == NULL) 984 return; 985 986 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 987 taskqueue_free(unit->delayed_taskqueue); 988 unit->delayed_taskqueue = NULL; 989 } 990 991 int 992 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 993 vm_paddr_t start, vm_size_t length, int flags) 994 { 995 struct bus_dma_tag_common *tc; 996 struct bus_dma_tag_iommu *tag; 997 struct bus_dmamap_iommu *map; 998 struct iommu_ctx *ctx; 999 struct iommu_domain *domain; 1000 struct iommu_map_entry *entry; 1001 vm_page_t *ma; 1002 vm_size_t i; 1003 int error; 1004 bool waitok; 1005 1006 MPASS((start & PAGE_MASK) == 0); 1007 MPASS((length & PAGE_MASK) == 0); 1008 MPASS(length > 0); 1009 MPASS(start + length >= start); 1010 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1011 1012 tc = (struct bus_dma_tag_common *)dmat; 1013 if (tc->impl != &bus_dma_iommu_impl) 1014 return (0); 1015 1016 tag = (struct bus_dma_tag_iommu *)dmat; 1017 ctx = tag->ctx; 1018 domain = ctx->domain; 1019 map = (struct bus_dmamap_iommu *)map1; 1020 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1021 1022 entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1023 if (entry == NULL) 1024 return (ENOMEM); 1025 entry->start = start; 1026 entry->end = start + length; 1027 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1028 M_WAITOK : M_NOWAIT); 1029 if (ma == NULL) { 1030 iommu_gas_free_entry(entry); 1031 return (ENOMEM); 1032 } 1033 for (i = 0; i < atop(length); i++) { 1034 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1035 VM_MEMATTR_DEFAULT); 1036 } 1037 error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1038 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) | 1039 IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma); 1040 if (error == 0) { 1041 IOMMU_DMAMAP_LOCK(map); 1042 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1043 IOMMU_DMAMAP_UNLOCK(map); 1044 } else { 1045 iommu_gas_free_entry(entry); 1046 } 1047 for (i = 0; i < atop(length); i++) 1048 vm_page_putfake(ma[i]); 1049 free(ma, M_TEMP); 1050 return (error); 1051 } 1052 1053 static void 1054 iommu_domain_unload_task(void *arg, int pending) 1055 { 1056 struct iommu_domain *domain; 1057 struct iommu_map_entries_tailq entries; 1058 1059 domain = arg; 1060 TAILQ_INIT(&entries); 1061 1062 for (;;) { 1063 IOMMU_DOMAIN_LOCK(domain); 1064 TAILQ_SWAP(&domain->unload_entries, &entries, 1065 iommu_map_entry, dmamap_link); 1066 IOMMU_DOMAIN_UNLOCK(domain); 1067 if (TAILQ_EMPTY(&entries)) 1068 break; 1069 iommu_domain_unload(domain, &entries, true); 1070 } 1071 } 1072 1073 void 1074 iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, 1075 const struct iommu_domain_map_ops *ops) 1076 { 1077 1078 domain->ops = ops; 1079 domain->iommu = unit; 1080 1081 TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); 1082 RB_INIT(&domain->rb_root); 1083 TAILQ_INIT(&domain->unload_entries); 1084 mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); 1085 } 1086 1087 void 1088 iommu_domain_fini(struct iommu_domain *domain) 1089 { 1090 1091 mtx_destroy(&domain->lock); 1092 } 1093