1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * All rights reserved. 6 * 7 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 8 * under sponsorship from the FreeBSD Foundation. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/domainset.h> 38 #include <sys/malloc.h> 39 #include <sys/bus.h> 40 #include <sys/conf.h> 41 #include <sys/interrupt.h> 42 #include <sys/kernel.h> 43 #include <sys/ktr.h> 44 #include <sys/lock.h> 45 #include <sys/proc.h> 46 #include <sys/memdesc.h> 47 #include <sys/mutex.h> 48 #include <sys/sysctl.h> 49 #include <sys/rman.h> 50 #include <sys/taskqueue.h> 51 #include <sys/tree.h> 52 #include <sys/uio.h> 53 #include <sys/vmem.h> 54 #include <dev/pci/pcireg.h> 55 #include <dev/pci/pcivar.h> 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> 58 #include <vm/vm_kern.h> 59 #include <vm/vm_object.h> 60 #include <vm/vm_page.h> 61 #include <vm/vm_map.h> 62 #include <machine/atomic.h> 63 #include <machine/bus.h> 64 #include <machine/md_var.h> 65 #if defined(__amd64__) || defined(__i386__) 66 #include <machine/specialreg.h> 67 #include <x86/include/busdma_impl.h> 68 #include <x86/iommu/intel_reg.h> 69 #include <dev/iommu/busdma_iommu.h> 70 #include <dev/iommu/iommu.h> 71 #include <x86/iommu/intel_dmar.h> 72 #endif 73 74 /* 75 * busdma_iommu.c, the implementation of the busdma(9) interface using 76 * IOMMU units from Intel VT-d. 77 */ 78 79 static bool 80 iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) 81 { 82 char str[128], *env; 83 int default_bounce; 84 bool ret; 85 static const char bounce_str[] = "bounce"; 86 static const char iommu_str[] = "iommu"; 87 static const char dmar_str[] = "dmar"; /* compatibility */ 88 89 default_bounce = 0; 90 env = kern_getenv("hw.busdma.default"); 91 if (env != NULL) { 92 if (strcmp(env, bounce_str) == 0) 93 default_bounce = 1; 94 else if (strcmp(env, iommu_str) == 0 || 95 strcmp(env, dmar_str) == 0) 96 default_bounce = 0; 97 freeenv(env); 98 } 99 100 snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", 101 domain, bus, slot, func); 102 env = kern_getenv(str); 103 if (env == NULL) 104 return (default_bounce != 0); 105 if (strcmp(env, bounce_str) == 0) 106 ret = true; 107 else if (strcmp(env, iommu_str) == 0 || 108 strcmp(env, dmar_str) == 0) 109 ret = false; 110 else 111 ret = default_bounce != 0; 112 freeenv(env); 113 return (ret); 114 } 115 116 /* 117 * Given original device, find the requester ID that will be seen by 118 * the IOMMU unit and used for page table lookup. PCI bridges may take 119 * ownership of transactions from downstream devices, so it may not be 120 * the same as the BSF of the target device. In those cases, all 121 * devices downstream of the bridge must share a single mapping 122 * domain, and must collectively be assigned to use either IOMMU or 123 * bounce mapping. 124 */ 125 device_t 126 iommu_get_requester(device_t dev, uint16_t *rid) 127 { 128 devclass_t pci_class; 129 device_t l, pci, pcib, pcip, pcibp, requester; 130 int cap_offset; 131 uint16_t pcie_flags; 132 bool bridge_is_pcie; 133 134 pci_class = devclass_find("pci"); 135 l = requester = dev; 136 137 *rid = pci_get_rid(dev); 138 139 /* 140 * Walk the bridge hierarchy from the target device to the 141 * host port to find the translating bridge nearest the IOMMU 142 * unit. 143 */ 144 for (;;) { 145 pci = device_get_parent(l); 146 KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " 147 "for %s", device_get_name(dev), device_get_name(l))); 148 KASSERT(device_get_devclass(pci) == pci_class, 149 ("iommu_get_requester(%s): non-pci parent %s for %s", 150 device_get_name(dev), device_get_name(pci), 151 device_get_name(l))); 152 153 pcib = device_get_parent(pci); 154 KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " 155 "for %s", device_get_name(dev), device_get_name(pci))); 156 157 /* 158 * The parent of our "bridge" isn't another PCI bus, 159 * so pcib isn't a PCI->PCI bridge but rather a host 160 * port, and the requester ID won't be translated 161 * further. 162 */ 163 pcip = device_get_parent(pcib); 164 if (device_get_devclass(pcip) != pci_class) 165 break; 166 pcibp = device_get_parent(pcip); 167 168 if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { 169 /* 170 * Do not stop the loop even if the target 171 * device is PCIe, because it is possible (but 172 * unlikely) to have a PCI->PCIe bridge 173 * somewhere in the hierarchy. 174 */ 175 l = pcib; 176 } else { 177 /* 178 * Device is not PCIe, it cannot be seen as a 179 * requester by IOMMU unit. Check whether the 180 * bridge is PCIe. 181 */ 182 bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, 183 &cap_offset) == 0; 184 requester = pcib; 185 186 /* 187 * Check for a buggy PCIe/PCI bridge that 188 * doesn't report the express capability. If 189 * the bridge above it is express but isn't a 190 * PCI bridge, then we know pcib is actually a 191 * PCIe/PCI bridge. 192 */ 193 if (!bridge_is_pcie && pci_find_cap(pcibp, 194 PCIY_EXPRESS, &cap_offset) == 0) { 195 pcie_flags = pci_read_config(pcibp, 196 cap_offset + PCIER_FLAGS, 2); 197 if ((pcie_flags & PCIEM_FLAGS_TYPE) != 198 PCIEM_TYPE_PCI_BRIDGE) 199 bridge_is_pcie = true; 200 } 201 202 if (bridge_is_pcie) { 203 /* 204 * The current device is not PCIe, but 205 * the bridge above it is. This is a 206 * PCIe->PCI bridge. Assume that the 207 * requester ID will be the secondary 208 * bus number with slot and function 209 * set to zero. 210 * 211 * XXX: Doesn't handle the case where 212 * the bridge is PCIe->PCI-X, and the 213 * bridge will only take ownership of 214 * requests in some cases. We should 215 * provide context entries with the 216 * same page tables for taken and 217 * non-taken transactions. 218 */ 219 *rid = PCI_RID(pci_get_bus(l), 0, 0); 220 l = pcibp; 221 } else { 222 /* 223 * Neither the device nor the bridge 224 * above it are PCIe. This is a 225 * conventional PCI->PCI bridge, which 226 * will use the bridge's BSF as the 227 * requester ID. 228 */ 229 *rid = pci_get_rid(pcib); 230 l = pcib; 231 } 232 } 233 } 234 return (requester); 235 } 236 237 struct iommu_ctx * 238 iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) 239 { 240 device_t requester; 241 struct iommu_ctx *ctx; 242 bool disabled; 243 uint16_t rid; 244 245 requester = iommu_get_requester(dev, &rid); 246 247 /* 248 * If the user requested the IOMMU disabled for the device, we 249 * cannot disable the IOMMU unit, due to possibility of other 250 * devices on the same IOMMU unit still requiring translation. 251 * Instead provide the identity mapping for the device 252 * context. 253 */ 254 disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), 255 pci_get_bus(requester), pci_get_slot(requester), 256 pci_get_function(requester)); 257 ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); 258 if (ctx == NULL) 259 return (NULL); 260 if (disabled) { 261 /* 262 * Keep the first reference on context, release the 263 * later refs. 264 */ 265 IOMMU_LOCK(unit); 266 if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { 267 ctx->flags |= IOMMU_CTX_DISABLED; 268 IOMMU_UNLOCK(unit); 269 } else { 270 iommu_free_ctx_locked(unit, ctx); 271 } 272 ctx = NULL; 273 } 274 return (ctx); 275 } 276 277 bus_dma_tag_t 278 acpi_iommu_get_dma_tag(device_t dev, device_t child) 279 { 280 struct iommu_unit *unit; 281 struct iommu_ctx *ctx; 282 bus_dma_tag_t res; 283 284 unit = iommu_find(child, bootverbose); 285 /* Not in scope of any IOMMU ? */ 286 if (unit == NULL) 287 return (NULL); 288 if (!unit->dma_enabled) 289 return (NULL); 290 291 #if defined(__amd64__) || defined(__i386__) 292 dmar_quirks_pre_use(unit); 293 dmar_instantiate_rmrr_ctxs(unit); 294 #endif 295 296 ctx = iommu_instantiate_ctx(unit, child, false); 297 res = ctx == NULL ? NULL : (bus_dma_tag_t)ctx->tag; 298 return (res); 299 } 300 301 bool 302 bus_dma_iommu_set_buswide(device_t dev) 303 { 304 struct iommu_unit *unit; 305 device_t parent; 306 u_int busno, slot, func; 307 308 parent = device_get_parent(dev); 309 if (device_get_devclass(parent) != devclass_find("pci")) 310 return (false); 311 unit = iommu_find(dev, bootverbose); 312 if (unit == NULL) 313 return (false); 314 busno = pci_get_bus(dev); 315 slot = pci_get_slot(dev); 316 func = pci_get_function(dev); 317 if (slot != 0 || func != 0) { 318 if (bootverbose) { 319 device_printf(dev, 320 "iommu%d pci%d:%d:%d requested buswide busdma\n", 321 unit->unit, busno, slot, func); 322 } 323 return (false); 324 } 325 iommu_set_buswide_ctx(unit, busno); 326 return (true); 327 } 328 329 void 330 iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) 331 { 332 333 MPASS(busno <= PCI_BUSMAX); 334 IOMMU_LOCK(unit); 335 unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 336 1 << (busno % (NBBY * sizeof(uint32_t))); 337 IOMMU_UNLOCK(unit); 338 } 339 340 bool 341 iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) 342 { 343 344 MPASS(busno <= PCI_BUSMAX); 345 return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & 346 (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); 347 } 348 349 static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); 350 351 static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, 352 struct bus_dmamap_iommu *map); 353 354 static int 355 iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, 356 bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, 357 bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, 358 int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, 359 void *lockfuncarg, bus_dma_tag_t *dmat) 360 { 361 struct bus_dma_tag_iommu *newtag, *oldtag; 362 int error; 363 364 *dmat = NULL; 365 error = common_bus_dma_tag_create(parent != NULL ? 366 &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, 367 boundary, lowaddr, highaddr, filter, filterarg, maxsize, 368 nsegments, maxsegsz, flags, lockfunc, lockfuncarg, 369 sizeof(struct bus_dma_tag_iommu), (void **)&newtag); 370 if (error != 0) 371 goto out; 372 373 oldtag = (struct bus_dma_tag_iommu *)parent; 374 newtag->common.impl = &bus_dma_iommu_impl; 375 newtag->ctx = oldtag->ctx; 376 newtag->owner = oldtag->owner; 377 378 *dmat = (bus_dma_tag_t)newtag; 379 out: 380 CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", 381 __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), 382 error); 383 return (error); 384 } 385 386 static int 387 iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) 388 { 389 390 return (0); 391 } 392 393 static int 394 iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) 395 { 396 struct bus_dma_tag_iommu *dmat, *dmat_copy, *parent; 397 int error; 398 399 error = 0; 400 dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1; 401 402 if (dmat != NULL) { 403 if (dmat->map_count != 0) { 404 error = EBUSY; 405 goto out; 406 } 407 while (dmat != NULL) { 408 parent = (struct bus_dma_tag_iommu *)dmat->common.parent; 409 if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 410 1) { 411 if (dmat == dmat->ctx->tag) 412 iommu_free_ctx(dmat->ctx); 413 free(dmat->segments, M_IOMMU_DMAMAP); 414 free(dmat, M_DEVBUF); 415 dmat = parent; 416 } else 417 dmat = NULL; 418 } 419 } 420 out: 421 CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); 422 return (error); 423 } 424 425 static bool 426 iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) 427 { 428 429 return (false); 430 } 431 432 static int 433 iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) 434 { 435 struct bus_dma_tag_iommu *tag; 436 struct bus_dmamap_iommu *map; 437 438 tag = (struct bus_dma_tag_iommu *)dmat; 439 map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, 440 DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); 441 if (map == NULL) { 442 *mapp = NULL; 443 return (ENOMEM); 444 } 445 if (tag->segments == NULL) { 446 tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * 447 tag->common.nsegments, M_IOMMU_DMAMAP, 448 DOMAINSET_PREF(tag->common.domain), M_NOWAIT); 449 if (tag->segments == NULL) { 450 free(map, M_IOMMU_DMAMAP); 451 *mapp = NULL; 452 return (ENOMEM); 453 } 454 } 455 TAILQ_INIT(&map->map_entries); 456 map->tag = tag; 457 map->locked = true; 458 map->cansleep = false; 459 tag->map_count++; 460 *mapp = (bus_dmamap_t)map; 461 462 return (0); 463 } 464 465 static int 466 iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) 467 { 468 struct bus_dma_tag_iommu *tag; 469 struct bus_dmamap_iommu *map; 470 struct iommu_domain *domain; 471 472 tag = (struct bus_dma_tag_iommu *)dmat; 473 map = (struct bus_dmamap_iommu *)map1; 474 if (map != NULL) { 475 domain = tag->ctx->domain; 476 IOMMU_DOMAIN_LOCK(domain); 477 if (!TAILQ_EMPTY(&map->map_entries)) { 478 IOMMU_DOMAIN_UNLOCK(domain); 479 return (EBUSY); 480 } 481 IOMMU_DOMAIN_UNLOCK(domain); 482 free(map, M_IOMMU_DMAMAP); 483 } 484 tag->map_count--; 485 return (0); 486 } 487 488 489 static int 490 iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, 491 bus_dmamap_t *mapp) 492 { 493 struct bus_dma_tag_iommu *tag; 494 struct bus_dmamap_iommu *map; 495 int error, mflags; 496 vm_memattr_t attr; 497 498 error = iommu_bus_dmamap_create(dmat, flags, mapp); 499 if (error != 0) 500 return (error); 501 502 mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; 503 mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; 504 attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : 505 VM_MEMATTR_DEFAULT; 506 507 tag = (struct bus_dma_tag_iommu *)dmat; 508 map = (struct bus_dmamap_iommu *)*mapp; 509 510 if (tag->common.maxsize < PAGE_SIZE && 511 tag->common.alignment <= tag->common.maxsize && 512 attr == VM_MEMATTR_DEFAULT) { 513 *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, 514 DOMAINSET_PREF(tag->common.domain), mflags); 515 map->flags |= BUS_DMAMAP_IOMMU_MALLOC; 516 } else { 517 *vaddr = (void *)kmem_alloc_attr_domainset( 518 DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, 519 mflags, 0ul, BUS_SPACE_MAXADDR, attr); 520 map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; 521 } 522 if (*vaddr == NULL) { 523 iommu_bus_dmamap_destroy(dmat, *mapp); 524 *mapp = NULL; 525 return (ENOMEM); 526 } 527 return (0); 528 } 529 530 static void 531 iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) 532 { 533 struct bus_dma_tag_iommu *tag; 534 struct bus_dmamap_iommu *map; 535 536 tag = (struct bus_dma_tag_iommu *)dmat; 537 map = (struct bus_dmamap_iommu *)map1; 538 539 if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { 540 free(vaddr, M_DEVBUF); 541 map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; 542 } else { 543 KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, 544 ("iommu_bus_dmamem_free for non alloced map %p", map)); 545 kmem_free((vm_offset_t)vaddr, tag->common.maxsize); 546 map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; 547 } 548 549 iommu_bus_dmamap_destroy(dmat, map1); 550 } 551 552 static int 553 iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, 554 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 555 int flags, bus_dma_segment_t *segs, int *segp, 556 struct iommu_map_entries_tailq *unroll_list) 557 { 558 struct iommu_ctx *ctx; 559 struct iommu_domain *domain; 560 struct iommu_map_entry *entry; 561 iommu_gaddr_t size; 562 bus_size_t buflen1; 563 int error, idx, gas_flags, seg; 564 565 KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); 566 if (segs == NULL) 567 segs = tag->segments; 568 ctx = tag->ctx; 569 domain = ctx->domain; 570 seg = *segp; 571 error = 0; 572 idx = 0; 573 while (buflen > 0) { 574 seg++; 575 if (seg >= tag->common.nsegments) { 576 error = EFBIG; 577 break; 578 } 579 buflen1 = buflen > tag->common.maxsegsz ? 580 tag->common.maxsegsz : buflen; 581 size = round_page(offset + buflen1); 582 583 /* 584 * (Too) optimistically allow split if there are more 585 * then one segments left. 586 */ 587 gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; 588 if (seg + 1 < tag->common.nsegments) 589 gas_flags |= IOMMU_MF_CANSPLIT; 590 591 error = iommu_map(domain, &tag->common, size, offset, 592 IOMMU_MAP_ENTRY_READ | 593 ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0), 594 gas_flags, ma + idx, &entry); 595 if (error != 0) 596 break; 597 if ((gas_flags & IOMMU_MF_CANSPLIT) != 0) { 598 KASSERT(size >= entry->end - entry->start, 599 ("split increased entry size %jx %jx %jx", 600 (uintmax_t)size, (uintmax_t)entry->start, 601 (uintmax_t)entry->end)); 602 size = entry->end - entry->start; 603 if (buflen1 > size) 604 buflen1 = size; 605 } else { 606 KASSERT(entry->end - entry->start == size, 607 ("no split allowed %jx %jx %jx", 608 (uintmax_t)size, (uintmax_t)entry->start, 609 (uintmax_t)entry->end)); 610 } 611 if (offset + buflen1 > size) 612 buflen1 = size - offset; 613 if (buflen1 > tag->common.maxsegsz) 614 buflen1 = tag->common.maxsegsz; 615 616 KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) 617 == 0, 618 ("alignment failed: ctx %p start 0x%jx offset %x " 619 "align 0x%jx", ctx, (uintmax_t)entry->start, offset, 620 (uintmax_t)tag->common.alignment)); 621 KASSERT(entry->end <= tag->common.lowaddr || 622 entry->start >= tag->common.highaddr, 623 ("entry placement failed: ctx %p start 0x%jx end 0x%jx " 624 "lowaddr 0x%jx highaddr 0x%jx", ctx, 625 (uintmax_t)entry->start, (uintmax_t)entry->end, 626 (uintmax_t)tag->common.lowaddr, 627 (uintmax_t)tag->common.highaddr)); 628 KASSERT(iommu_test_boundary(entry->start + offset, buflen1, 629 tag->common.boundary), 630 ("boundary failed: ctx %p start 0x%jx end 0x%jx " 631 "boundary 0x%jx", ctx, (uintmax_t)entry->start, 632 (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); 633 KASSERT(buflen1 <= tag->common.maxsegsz, 634 ("segment too large: ctx %p start 0x%jx end 0x%jx " 635 "buflen1 0x%jx maxsegsz 0x%jx", ctx, 636 (uintmax_t)entry->start, (uintmax_t)entry->end, 637 (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); 638 639 IOMMU_DOMAIN_LOCK(domain); 640 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 641 entry->flags |= IOMMU_MAP_ENTRY_MAP; 642 IOMMU_DOMAIN_UNLOCK(domain); 643 TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); 644 645 segs[seg].ds_addr = entry->start + offset; 646 segs[seg].ds_len = buflen1; 647 648 idx += OFF_TO_IDX(trunc_page(offset + buflen1)); 649 offset += buflen1; 650 offset &= IOMMU_PAGE_MASK; 651 buflen -= buflen1; 652 } 653 if (error == 0) 654 *segp = seg; 655 return (error); 656 } 657 658 static int 659 iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, 660 struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, 661 int flags, bus_dma_segment_t *segs, int *segp) 662 { 663 struct iommu_ctx *ctx; 664 struct iommu_domain *domain; 665 struct iommu_map_entry *entry, *entry1; 666 struct iommu_map_entries_tailq unroll_list; 667 int error; 668 669 ctx = tag->ctx; 670 domain = ctx->domain; 671 atomic_add_long(&ctx->loads, 1); 672 673 TAILQ_INIT(&unroll_list); 674 error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, 675 buflen, flags, segs, segp, &unroll_list); 676 if (error != 0) { 677 /* 678 * The busdma interface does not allow us to report 679 * partial buffer load, so unfortunately we have to 680 * revert all work done. 681 */ 682 IOMMU_DOMAIN_LOCK(domain); 683 TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, 684 entry1) { 685 /* 686 * No entries other than what we have created 687 * during the failed run might have been 688 * inserted there in between, since we own ctx 689 * pglock. 690 */ 691 TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); 692 TAILQ_REMOVE(&unroll_list, entry, unroll_link); 693 TAILQ_INSERT_TAIL(&domain->unload_entries, entry, 694 dmamap_link); 695 } 696 IOMMU_DOMAIN_UNLOCK(domain); 697 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 698 &domain->unload_task); 699 } 700 701 if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && 702 !map->cansleep) 703 error = EINPROGRESS; 704 if (error == EINPROGRESS) 705 iommu_bus_schedule_dmamap(domain->iommu, map); 706 return (error); 707 } 708 709 static int 710 iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, 711 struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, 712 bus_dma_segment_t *segs, int *segp) 713 { 714 struct bus_dma_tag_iommu *tag; 715 struct bus_dmamap_iommu *map; 716 717 tag = (struct bus_dma_tag_iommu *)dmat; 718 map = (struct bus_dmamap_iommu *)map1; 719 return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, 720 flags, segs, segp)); 721 } 722 723 static int 724 iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, 725 vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, 726 int *segp) 727 { 728 struct bus_dma_tag_iommu *tag; 729 struct bus_dmamap_iommu *map; 730 vm_page_t *ma, fma; 731 vm_paddr_t pstart, pend, paddr; 732 int error, i, ma_cnt, mflags, offset; 733 734 tag = (struct bus_dma_tag_iommu *)dmat; 735 map = (struct bus_dmamap_iommu *)map1; 736 pstart = trunc_page(buf); 737 pend = round_page(buf + buflen); 738 offset = buf & PAGE_MASK; 739 ma_cnt = OFF_TO_IDX(pend - pstart); 740 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 741 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 742 if (ma == NULL) 743 return (ENOMEM); 744 fma = NULL; 745 for (i = 0; i < ma_cnt; i++) { 746 paddr = pstart + ptoa(i); 747 ma[i] = PHYS_TO_VM_PAGE(paddr); 748 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 749 /* 750 * If PHYS_TO_VM_PAGE() returned NULL or the 751 * vm_page was not initialized we'll use a 752 * fake page. 753 */ 754 if (fma == NULL) { 755 fma = malloc(sizeof(struct vm_page) * ma_cnt, 756 M_DEVBUF, M_ZERO | mflags); 757 if (fma == NULL) { 758 free(ma, M_DEVBUF); 759 return (ENOMEM); 760 } 761 } 762 vm_page_initfake(&fma[i], pstart + ptoa(i), 763 VM_MEMATTR_DEFAULT); 764 ma[i] = &fma[i]; 765 } 766 } 767 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 768 flags, segs, segp); 769 free(fma, M_DEVBUF); 770 free(ma, M_DEVBUF); 771 return (error); 772 } 773 774 static int 775 iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, 776 bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, 777 int *segp) 778 { 779 struct bus_dma_tag_iommu *tag; 780 struct bus_dmamap_iommu *map; 781 vm_page_t *ma, fma; 782 vm_paddr_t pstart, pend, paddr; 783 int error, i, ma_cnt, mflags, offset; 784 785 tag = (struct bus_dma_tag_iommu *)dmat; 786 map = (struct bus_dmamap_iommu *)map1; 787 pstart = trunc_page((vm_offset_t)buf); 788 pend = round_page((vm_offset_t)buf + buflen); 789 offset = (vm_offset_t)buf & PAGE_MASK; 790 ma_cnt = OFF_TO_IDX(pend - pstart); 791 mflags = map->cansleep ? M_WAITOK : M_NOWAIT; 792 ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); 793 if (ma == NULL) 794 return (ENOMEM); 795 fma = NULL; 796 for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { 797 if (pmap == kernel_pmap) 798 paddr = pmap_kextract(pstart); 799 else 800 paddr = pmap_extract(pmap, pstart); 801 ma[i] = PHYS_TO_VM_PAGE(paddr); 802 if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { 803 /* 804 * If PHYS_TO_VM_PAGE() returned NULL or the 805 * vm_page was not initialized we'll use a 806 * fake page. 807 */ 808 if (fma == NULL) { 809 fma = malloc(sizeof(struct vm_page) * ma_cnt, 810 M_DEVBUF, M_ZERO | mflags); 811 if (fma == NULL) { 812 free(ma, M_DEVBUF); 813 return (ENOMEM); 814 } 815 } 816 vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); 817 ma[i] = &fma[i]; 818 } 819 } 820 error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, 821 flags, segs, segp); 822 free(ma, M_DEVBUF); 823 free(fma, M_DEVBUF); 824 return (error); 825 } 826 827 static void 828 iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, 829 struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) 830 { 831 struct bus_dmamap_iommu *map; 832 833 if (map1 == NULL) 834 return; 835 map = (struct bus_dmamap_iommu *)map1; 836 map->mem = *mem; 837 map->tag = (struct bus_dma_tag_iommu *)dmat; 838 map->callback = callback; 839 map->callback_arg = callback_arg; 840 } 841 842 static bus_dma_segment_t * 843 iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, 844 bus_dma_segment_t *segs, int nsegs, int error) 845 { 846 struct bus_dma_tag_iommu *tag; 847 struct bus_dmamap_iommu *map; 848 849 tag = (struct bus_dma_tag_iommu *)dmat; 850 map = (struct bus_dmamap_iommu *)map1; 851 852 if (!map->locked) { 853 KASSERT(map->cansleep, 854 ("map not locked and not sleepable context %p", map)); 855 856 /* 857 * We are called from the delayed context. Relock the 858 * driver. 859 */ 860 (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); 861 map->locked = true; 862 } 863 864 if (segs == NULL) 865 segs = tag->segments; 866 return (segs); 867 } 868 869 /* 870 * The limitations of busdma KPI forces the iommu to perform the actual 871 * unload, consisting of the unmapping of the map entries page tables, 872 * from the delayed context on i386, since page table page mapping 873 * might require a sleep to be successfull. The unfortunate 874 * consequence is that the DMA requests can be served some time after 875 * the bus_dmamap_unload() call returned. 876 * 877 * On amd64, we assume that sf allocation cannot fail. 878 */ 879 static void 880 iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) 881 { 882 struct bus_dma_tag_iommu *tag; 883 struct bus_dmamap_iommu *map; 884 struct iommu_ctx *ctx; 885 struct iommu_domain *domain; 886 #if defined(__amd64__) 887 struct iommu_map_entries_tailq entries; 888 #endif 889 890 tag = (struct bus_dma_tag_iommu *)dmat; 891 map = (struct bus_dmamap_iommu *)map1; 892 ctx = tag->ctx; 893 domain = ctx->domain; 894 atomic_add_long(&ctx->unloads, 1); 895 896 #if defined(__i386__) 897 IOMMU_DOMAIN_LOCK(domain); 898 TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link); 899 IOMMU_DOMAIN_UNLOCK(domain); 900 taskqueue_enqueue(domain->iommu->delayed_taskqueue, 901 &domain->unload_task); 902 #else /* defined(__amd64__) */ 903 TAILQ_INIT(&entries); 904 IOMMU_DOMAIN_LOCK(domain); 905 TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); 906 IOMMU_DOMAIN_UNLOCK(domain); 907 THREAD_NO_SLEEPING(); 908 iommu_domain_unload(domain, &entries, false); 909 THREAD_SLEEPING_OK(); 910 KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); 911 #endif 912 } 913 914 static void 915 iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, 916 bus_dmasync_op_t op) 917 { 918 } 919 920 struct bus_dma_impl bus_dma_iommu_impl = { 921 .tag_create = iommu_bus_dma_tag_create, 922 .tag_destroy = iommu_bus_dma_tag_destroy, 923 .tag_set_domain = iommu_bus_dma_tag_set_domain, 924 .id_mapped = iommu_bus_dma_id_mapped, 925 .map_create = iommu_bus_dmamap_create, 926 .map_destroy = iommu_bus_dmamap_destroy, 927 .mem_alloc = iommu_bus_dmamem_alloc, 928 .mem_free = iommu_bus_dmamem_free, 929 .load_phys = iommu_bus_dmamap_load_phys, 930 .load_buffer = iommu_bus_dmamap_load_buffer, 931 .load_ma = iommu_bus_dmamap_load_ma, 932 .map_waitok = iommu_bus_dmamap_waitok, 933 .map_complete = iommu_bus_dmamap_complete, 934 .map_unload = iommu_bus_dmamap_unload, 935 .map_sync = iommu_bus_dmamap_sync, 936 }; 937 938 static void 939 iommu_bus_task_dmamap(void *arg, int pending) 940 { 941 struct bus_dma_tag_iommu *tag; 942 struct bus_dmamap_iommu *map; 943 struct iommu_unit *unit; 944 945 unit = arg; 946 IOMMU_LOCK(unit); 947 while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { 948 TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); 949 IOMMU_UNLOCK(unit); 950 tag = map->tag; 951 map->cansleep = true; 952 map->locked = false; 953 bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, 954 &map->mem, map->callback, map->callback_arg, 955 BUS_DMA_WAITOK); 956 map->cansleep = false; 957 if (map->locked) { 958 (tag->common.lockfunc)(tag->common.lockfuncarg, 959 BUS_DMA_UNLOCK); 960 } else 961 map->locked = true; 962 map->cansleep = false; 963 IOMMU_LOCK(unit); 964 } 965 IOMMU_UNLOCK(unit); 966 } 967 968 static void 969 iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) 970 { 971 972 map->locked = false; 973 IOMMU_LOCK(unit); 974 TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); 975 IOMMU_UNLOCK(unit); 976 taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); 977 } 978 979 int 980 iommu_init_busdma(struct iommu_unit *unit) 981 { 982 int error; 983 984 unit->dma_enabled = 1; 985 error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); 986 if (error == 0) /* compatibility */ 987 TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); 988 TAILQ_INIT(&unit->delayed_maps); 989 TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); 990 unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, 991 taskqueue_thread_enqueue, &unit->delayed_taskqueue); 992 taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, 993 "iommu%d busdma taskq", unit->unit); 994 return (0); 995 } 996 997 void 998 iommu_fini_busdma(struct iommu_unit *unit) 999 { 1000 1001 if (unit->delayed_taskqueue == NULL) 1002 return; 1003 1004 taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); 1005 taskqueue_free(unit->delayed_taskqueue); 1006 unit->delayed_taskqueue = NULL; 1007 } 1008 1009 int 1010 bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, 1011 vm_paddr_t start, vm_size_t length, int flags) 1012 { 1013 struct bus_dma_tag_common *tc; 1014 struct bus_dma_tag_iommu *tag; 1015 struct bus_dmamap_iommu *map; 1016 struct iommu_ctx *ctx; 1017 struct iommu_domain *domain; 1018 struct iommu_map_entry *entry; 1019 vm_page_t *ma; 1020 vm_size_t i; 1021 int error; 1022 bool waitok; 1023 1024 MPASS((start & PAGE_MASK) == 0); 1025 MPASS((length & PAGE_MASK) == 0); 1026 MPASS(length > 0); 1027 MPASS(start + length >= start); 1028 MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); 1029 1030 tc = (struct bus_dma_tag_common *)dmat; 1031 if (tc->impl != &bus_dma_iommu_impl) 1032 return (0); 1033 1034 tag = (struct bus_dma_tag_iommu *)dmat; 1035 ctx = tag->ctx; 1036 domain = ctx->domain; 1037 map = (struct bus_dmamap_iommu *)map1; 1038 waitok = (flags & BUS_DMA_NOWAIT) != 0; 1039 1040 entry = iommu_map_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); 1041 if (entry == NULL) 1042 return (ENOMEM); 1043 entry->start = start; 1044 entry->end = start + length; 1045 ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? 1046 M_WAITOK : M_NOWAIT); 1047 if (ma == NULL) { 1048 iommu_map_free_entry(domain, entry); 1049 return (ENOMEM); 1050 } 1051 for (i = 0; i < atop(length); i++) { 1052 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 1053 VM_MEMATTR_DEFAULT); 1054 } 1055 error = iommu_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | 1056 ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE), 1057 waitok ? IOMMU_MF_CANWAIT : 0, ma); 1058 if (error == 0) { 1059 IOMMU_DOMAIN_LOCK(domain); 1060 TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); 1061 entry->flags |= IOMMU_MAP_ENTRY_MAP; 1062 IOMMU_DOMAIN_UNLOCK(domain); 1063 } else { 1064 iommu_domain_unload_entry(entry, true); 1065 } 1066 for (i = 0; i < atop(length); i++) 1067 vm_page_putfake(ma[i]); 1068 free(ma, M_TEMP); 1069 return (error); 1070 } 1071