1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/bus.h> 38 #include <sys/interrupt.h> 39 #include <sys/kernel.h> 40 #include <sys/ktr.h> 41 #include <sys/limits.h> 42 #include <sys/lock.h> 43 #include <sys/memdesc.h> 44 #include <sys/mutex.h> 45 #include <sys/proc.h> 46 #include <sys/rwlock.h> 47 #include <sys/rman.h> 48 #include <sys/sysctl.h> 49 #include <sys/taskqueue.h> 50 #include <sys/tree.h> 51 #include <sys/uio.h> 52 #include <sys/vmem.h> 53 #include <vm/vm.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_kern.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_pager.h> 59 #include <vm/vm_map.h> 60 #include <contrib/dev/acpica/include/acpi.h> 61 #include <contrib/dev/acpica/include/accommon.h> 62 #include <dev/pci/pcireg.h> 63 #include <dev/pci/pcivar.h> 64 #include <machine/atomic.h> 65 #include <machine/bus.h> 66 #include <machine/md_var.h> 67 #include <machine/specialreg.h> 68 #include <x86/include/busdma_impl.h> 69 #include <dev/iommu/busdma_iommu.h> 70 #include <x86/iommu/intel_reg.h> 71 #include <x86/iommu/intel_dmar.h> 72 73 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); 74 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain"); 75 76 static void dmar_unref_domain_locked(struct dmar_unit *dmar, 77 struct dmar_domain *domain); 78 static void dmar_domain_destroy(struct dmar_domain *domain); 79 80 static void 81 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) 82 { 83 struct sf_buf *sf; 84 dmar_root_entry_t *re; 85 vm_page_t ctxm; 86 87 /* 88 * Allocated context page must be linked. 89 */ 90 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC); 91 if (ctxm != NULL) 92 return; 93 94 /* 95 * Page not present, allocate and link. Note that other 96 * thread might execute this sequence in parallel. This 97 * should be safe, because the context entries written by both 98 * threads are equal. 99 */ 100 TD_PREP_PINNED_ASSERT; 101 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO | 102 IOMMU_PGF_WAITOK); 103 re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf); 104 re += bus; 105 dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & 106 VM_PAGE_TO_PHYS(ctxm))); 107 dmar_flush_root_to_ram(dmar, re); 108 dmar_unmap_pgtbl(sf); 109 TD_PINNED_ASSERT; 110 } 111 112 static dmar_ctx_entry_t * 113 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) 114 { 115 struct dmar_unit *dmar; 116 dmar_ctx_entry_t *ctxp; 117 118 dmar = CTX2DMAR(ctx); 119 120 ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid), 121 IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp); 122 ctxp += ctx->context.rid & 0xff; 123 return (ctxp); 124 } 125 126 static void 127 device_tag_init(struct dmar_ctx *ctx, device_t dev) 128 { 129 struct dmar_domain *domain; 130 bus_addr_t maxaddr; 131 132 domain = CTX2DOM(ctx); 133 maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR); 134 ctx->context.tag->common.ref_count = 1; /* Prevent free */ 135 ctx->context.tag->common.impl = &bus_dma_iommu_impl; 136 ctx->context.tag->common.boundary = 0; 137 ctx->context.tag->common.lowaddr = maxaddr; 138 ctx->context.tag->common.highaddr = maxaddr; 139 ctx->context.tag->common.maxsize = maxaddr; 140 ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 141 ctx->context.tag->common.maxsegsz = maxaddr; 142 ctx->context.tag->ctx = CTX2IOCTX(ctx); 143 ctx->context.tag->owner = dev; 144 } 145 146 static void 147 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain, 148 vm_page_t ctx_root) 149 { 150 /* 151 * For update due to move, the store is not atomic. It is 152 * possible that DMAR read upper doubleword, while low 153 * doubleword is not yet updated. The domain id is stored in 154 * the upper doubleword, while the table pointer in the lower. 155 * 156 * There is no good solution, for the same reason it is wrong 157 * to clear P bit in the ctx entry for update. 158 */ 159 dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) | 160 domain->awlvl); 161 if (ctx_root == NULL) { 162 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P); 163 } else { 164 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR | 165 (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) | 166 DMAR_CTX1_P); 167 } 168 } 169 170 static void 171 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move, 172 int busno) 173 { 174 struct dmar_unit *unit; 175 struct dmar_domain *domain; 176 vm_page_t ctx_root; 177 int i; 178 179 domain = CTX2DOM(ctx); 180 unit = DOM2DMAR(domain); 181 KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0), 182 ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx", 183 unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner), 184 pci_get_function(ctx->context.tag->owner), 185 ctxp->ctx1, ctxp->ctx2)); 186 187 if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 && 188 (unit->hw_ecap & DMAR_ECAP_PT) != 0) { 189 KASSERT(domain->pgtbl_obj == NULL, 190 ("ctx %p non-null pgtbl_obj", ctx)); 191 ctx_root = NULL; 192 } else { 193 ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, 194 IOMMU_PGF_NOALLOC); 195 } 196 197 if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) { 198 MPASS(!move); 199 for (i = 0; i <= PCI_BUSMAX; i++) { 200 ctx_id_entry_init_one(&ctxp[i], domain, ctx_root); 201 } 202 } else { 203 ctx_id_entry_init_one(ctxp, domain, ctx_root); 204 } 205 dmar_flush_ctx_to_ram(unit, ctxp); 206 } 207 208 static int 209 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force) 210 { 211 int error; 212 213 /* 214 * If dmar declares Caching Mode as Set, follow 11.5 "Caching 215 * Mode Consideration" and do the (global) invalidation of the 216 * negative TLB entries. 217 */ 218 if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force) 219 return (0); 220 if (dmar->qi_enabled) { 221 dmar_qi_invalidate_ctx_glob_locked(dmar); 222 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force) 223 dmar_qi_invalidate_iotlb_glob_locked(dmar); 224 return (0); 225 } 226 error = dmar_inv_ctx_glob(dmar); 227 if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)) 228 error = dmar_inv_iotlb_glob(dmar); 229 return (error); 230 } 231 232 static int 233 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus, 234 int slot, int func, int dev_domain, int dev_busno, 235 const void *dev_path, int dev_path_len) 236 { 237 struct iommu_map_entries_tailq rmrr_entries; 238 struct iommu_map_entry *entry, *entry1; 239 vm_page_t *ma; 240 iommu_gaddr_t start, end; 241 vm_pindex_t size, i; 242 int error, error1; 243 244 error = 0; 245 TAILQ_INIT(&rmrr_entries); 246 dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path, 247 dev_path_len, &rmrr_entries); 248 TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) { 249 /* 250 * VT-d specification requires that the start of an 251 * RMRR entry is 4k-aligned. Buggy BIOSes put 252 * anything into the start and end fields. Truncate 253 * and round as neccesary. 254 * 255 * We also allow the overlapping RMRR entries, see 256 * iommu_gas_alloc_region(). 257 */ 258 start = entry->start; 259 end = entry->end; 260 if (bootverbose) 261 printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n", 262 domain->iodom.iommu->unit, bus, slot, func, 263 (uintmax_t)start, (uintmax_t)end); 264 entry->start = trunc_page(start); 265 entry->end = round_page(end); 266 if (entry->start == entry->end) { 267 /* Workaround for some AMI (?) BIOSes */ 268 if (bootverbose) { 269 if (dev != NULL) 270 device_printf(dev, ""); 271 printf("pci%d:%d:%d ", bus, slot, func); 272 printf("BIOS bug: dmar%d RMRR " 273 "region (%jx, %jx) corrected\n", 274 domain->iodom.iommu->unit, start, end); 275 } 276 entry->end += DMAR_PAGE_SIZE * 0x20; 277 } 278 size = OFF_TO_IDX(entry->end - entry->start); 279 ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK); 280 for (i = 0; i < size; i++) { 281 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 282 VM_MEMATTR_DEFAULT); 283 } 284 error1 = iommu_gas_map_region(DOM2IODOM(domain), entry, 285 IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE, 286 IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma); 287 /* 288 * Non-failed RMRR entries are owned by context rb 289 * tree. Get rid of the failed entry, but do not stop 290 * the loop. Rest of the parsed RMRR entries are 291 * loaded and removed on the context destruction. 292 */ 293 if (error1 == 0 && entry->end != entry->start) { 294 IOMMU_LOCK(domain->iodom.iommu); 295 domain->refs++; /* XXXKIB prevent free */ 296 domain->iodom.flags |= IOMMU_DOMAIN_RMRR; 297 IOMMU_UNLOCK(domain->iodom.iommu); 298 } else { 299 if (error1 != 0) { 300 if (dev != NULL) 301 device_printf(dev, ""); 302 printf("pci%d:%d:%d ", bus, slot, func); 303 printf( 304 "dmar%d failed to map RMRR region (%jx, %jx) %d\n", 305 domain->iodom.iommu->unit, start, end, 306 error1); 307 error = error1; 308 } 309 TAILQ_REMOVE(&rmrr_entries, entry, unroll_link); 310 iommu_gas_free_entry(DOM2IODOM(domain), entry); 311 } 312 for (i = 0; i < size; i++) 313 vm_page_putfake(ma[i]); 314 free(ma, M_TEMP); 315 } 316 return (error); 317 } 318 319 /* 320 * PCI memory address space is shared between memory-mapped devices (MMIO) and 321 * host memory (which may be remapped by an IOMMU). Device accesses to an 322 * address within a memory aperture in a PCIe root port will be treated as 323 * peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the 324 * address space of the root port's memory apertures in the address space used 325 * by the IOMMU for remapping. 326 */ 327 static int 328 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev) 329 { 330 struct iommu_domain *iodom; 331 device_t root; 332 uint32_t val; 333 uint64_t base, limit; 334 int error; 335 336 iodom = DOM2IODOM(domain); 337 338 root = pci_find_pcie_root_port(dev); 339 if (root == NULL) 340 return (0); 341 342 /* Disable downstream memory */ 343 base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2)); 344 limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2)); 345 error = iommu_gas_reserve_region_extend(iodom, base, limit + 1); 346 if (bootverbose || error != 0) 347 device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n", 348 base, limit + 1, error); 349 if (error != 0) 350 return (error); 351 352 /* Disable downstream prefetchable memory */ 353 val = pci_read_config(root, PCIR_PMBASEL_1, 2); 354 if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) { 355 if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { 356 base = PCI_PPBMEMBASE( 357 pci_read_config(root, PCIR_PMBASEH_1, 4), 358 val); 359 limit = PCI_PPBMEMLIMIT( 360 pci_read_config(root, PCIR_PMLIMITH_1, 4), 361 pci_read_config(root, PCIR_PMLIMITL_1, 2)); 362 } else { 363 base = PCI_PPBMEMBASE(0, val); 364 limit = PCI_PPBMEMLIMIT(0, 365 pci_read_config(root, PCIR_PMLIMITL_1, 2)); 366 } 367 error = iommu_gas_reserve_region_extend(iodom, base, 368 limit + 1); 369 if (bootverbose || error != 0) 370 device_printf(dev, "DMAR reserve [%#jx-%#jx] " 371 "(error %d)\n", base, limit + 1, error); 372 if (error != 0) 373 return (error); 374 } 375 376 return (error); 377 } 378 379 static struct dmar_domain * 380 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) 381 { 382 struct iommu_domain *iodom; 383 struct iommu_unit *unit; 384 struct dmar_domain *domain; 385 int error, id, mgaw; 386 387 id = alloc_unr(dmar->domids); 388 if (id == -1) 389 return (NULL); 390 domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO); 391 iodom = DOM2IODOM(domain); 392 unit = DMAR2IOMMU(dmar); 393 domain->domain = id; 394 LIST_INIT(&domain->contexts); 395 iommu_domain_init(unit, iodom, &dmar_domain_map_ops); 396 397 domain->dmar = dmar; 398 399 /* 400 * For now, use the maximal usable physical address of the 401 * installed memory to calculate the mgaw on id_mapped domain. 402 * It is useful for the identity mapping, and less so for the 403 * virtualized bus address space. 404 */ 405 domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; 406 mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped); 407 error = domain_set_agaw(domain, mgaw); 408 if (error != 0) 409 goto fail; 410 if (!id_mapped) 411 /* Use all supported address space for remapping. */ 412 domain->iodom.end = 1ULL << (domain->agaw - 1); 413 414 iommu_gas_init_domain(DOM2IODOM(domain)); 415 416 if (id_mapped) { 417 if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) { 418 domain->pgtbl_obj = domain_get_idmap_pgtbl(domain, 419 domain->iodom.end); 420 } 421 domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; 422 } else { 423 error = domain_alloc_pgtbl(domain); 424 if (error != 0) 425 goto fail; 426 /* Disable local apic region access */ 427 error = iommu_gas_reserve_region(iodom, 0xfee00000, 428 0xfeefffff + 1, &iodom->msi_entry); 429 if (error != 0) 430 goto fail; 431 } 432 return (domain); 433 434 fail: 435 dmar_domain_destroy(domain); 436 return (NULL); 437 } 438 439 static struct dmar_ctx * 440 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid) 441 { 442 struct dmar_ctx *ctx; 443 444 ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO); 445 ctx->context.domain = DOM2IODOM(domain); 446 ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), 447 M_DMAR_CTX, M_WAITOK | M_ZERO); 448 ctx->context.rid = rid; 449 ctx->refs = 1; 450 return (ctx); 451 } 452 453 static void 454 dmar_ctx_link(struct dmar_ctx *ctx) 455 { 456 struct dmar_domain *domain; 457 458 domain = CTX2DOM(ctx); 459 IOMMU_ASSERT_LOCKED(domain->iodom.iommu); 460 KASSERT(domain->refs >= domain->ctx_cnt, 461 ("dom %p ref underflow %d %d", domain, domain->refs, 462 domain->ctx_cnt)); 463 domain->refs++; 464 domain->ctx_cnt++; 465 LIST_INSERT_HEAD(&domain->contexts, ctx, link); 466 } 467 468 static void 469 dmar_ctx_unlink(struct dmar_ctx *ctx) 470 { 471 struct dmar_domain *domain; 472 473 domain = CTX2DOM(ctx); 474 IOMMU_ASSERT_LOCKED(domain->iodom.iommu); 475 KASSERT(domain->refs > 0, 476 ("domain %p ctx dtr refs %d", domain, domain->refs)); 477 KASSERT(domain->ctx_cnt >= domain->refs, 478 ("domain %p ctx dtr refs %d ctx_cnt %d", domain, 479 domain->refs, domain->ctx_cnt)); 480 domain->refs--; 481 domain->ctx_cnt--; 482 LIST_REMOVE(ctx, link); 483 } 484 485 static void 486 dmar_domain_destroy(struct dmar_domain *domain) 487 { 488 struct iommu_domain *iodom; 489 struct dmar_unit *dmar; 490 491 iodom = DOM2IODOM(domain); 492 493 KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), 494 ("unfinished unloads %p", domain)); 495 KASSERT(LIST_EMPTY(&domain->contexts), 496 ("destroying dom %p with contexts", domain)); 497 KASSERT(domain->ctx_cnt == 0, 498 ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); 499 KASSERT(domain->refs == 0, 500 ("destroying dom %p with refs %d", domain, domain->refs)); 501 if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { 502 DMAR_DOMAIN_LOCK(domain); 503 iommu_gas_fini_domain(iodom); 504 DMAR_DOMAIN_UNLOCK(domain); 505 } 506 if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { 507 if (domain->pgtbl_obj != NULL) 508 DMAR_DOMAIN_PGLOCK(domain); 509 domain_free_pgtbl(domain); 510 } 511 iommu_domain_fini(iodom); 512 dmar = DOM2DMAR(domain); 513 free_unr(dmar->domids, domain->domain); 514 free(domain, M_DMAR_DOMAIN); 515 } 516 517 static struct dmar_ctx * 518 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, 519 int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, 520 bool id_mapped, bool rmrr_init) 521 { 522 struct dmar_domain *domain, *domain1; 523 struct dmar_ctx *ctx, *ctx1; 524 struct iommu_unit *unit; 525 dmar_ctx_entry_t *ctxp; 526 struct sf_buf *sf; 527 int bus, slot, func, error; 528 bool enable; 529 530 if (dev != NULL) { 531 bus = pci_get_bus(dev); 532 slot = pci_get_slot(dev); 533 func = pci_get_function(dev); 534 } else { 535 bus = PCI_RID2BUS(rid); 536 slot = PCI_RID2SLOT(rid); 537 func = PCI_RID2FUNC(rid); 538 } 539 enable = false; 540 TD_PREP_PINNED_ASSERT; 541 unit = DMAR2IOMMU(dmar); 542 DMAR_LOCK(dmar); 543 KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0), 544 ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus, 545 slot, func)); 546 ctx = dmar_find_ctx_locked(dmar, rid); 547 error = 0; 548 if (ctx == NULL) { 549 /* 550 * Perform the allocations which require sleep or have 551 * higher chance to succeed if the sleep is allowed. 552 */ 553 DMAR_UNLOCK(dmar); 554 dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); 555 domain1 = dmar_domain_alloc(dmar, id_mapped); 556 if (domain1 == NULL) { 557 TD_PINNED_ASSERT; 558 return (NULL); 559 } 560 if (!id_mapped) { 561 error = domain_init_rmrr(domain1, dev, bus, 562 slot, func, dev_domain, dev_busno, dev_path, 563 dev_path_len); 564 if (error == 0) 565 error = dmar_reserve_pci_regions(domain1, dev); 566 if (error != 0) { 567 dmar_domain_destroy(domain1); 568 TD_PINNED_ASSERT; 569 return (NULL); 570 } 571 } 572 ctx1 = dmar_ctx_alloc(domain1, rid); 573 ctxp = dmar_map_ctx_entry(ctx1, &sf); 574 DMAR_LOCK(dmar); 575 576 /* 577 * Recheck the contexts, other thread might have 578 * already allocated needed one. 579 */ 580 ctx = dmar_find_ctx_locked(dmar, rid); 581 if (ctx == NULL) { 582 domain = domain1; 583 ctx = ctx1; 584 dmar_ctx_link(ctx); 585 ctx->context.tag->owner = dev; 586 device_tag_init(ctx, dev); 587 588 /* 589 * This is the first activated context for the 590 * DMAR unit. Enable the translation after 591 * everything is set up. 592 */ 593 if (LIST_EMPTY(&dmar->domains)) 594 enable = true; 595 LIST_INSERT_HEAD(&dmar->domains, domain, link); 596 ctx_id_entry_init(ctx, ctxp, false, bus); 597 if (dev != NULL) { 598 device_printf(dev, 599 "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d " 600 "agaw %d %s-mapped\n", 601 dmar->iommu.unit, dmar->segment, bus, slot, 602 func, rid, domain->domain, domain->mgaw, 603 domain->agaw, id_mapped ? "id" : "re"); 604 } 605 dmar_unmap_pgtbl(sf); 606 } else { 607 dmar_unmap_pgtbl(sf); 608 dmar_domain_destroy(domain1); 609 /* Nothing needs to be done to destroy ctx1. */ 610 free(ctx1, M_DMAR_CTX); 611 domain = CTX2DOM(ctx); 612 ctx->refs++; /* tag referenced us */ 613 } 614 } else { 615 domain = CTX2DOM(ctx); 616 if (ctx->context.tag->owner == NULL) 617 ctx->context.tag->owner = dev; 618 ctx->refs++; /* tag referenced us */ 619 } 620 621 error = dmar_flush_for_ctx_entry(dmar, enable); 622 if (error != 0) { 623 dmar_free_ctx_locked(dmar, ctx); 624 TD_PINNED_ASSERT; 625 return (NULL); 626 } 627 628 /* 629 * The dmar lock was potentially dropped between check for the 630 * empty context list and now. Recheck the state of GCMD_TE 631 * to avoid unneeded command. 632 */ 633 if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) { 634 error = dmar_enable_translation(dmar); 635 if (error == 0) { 636 if (bootverbose) { 637 printf("dmar%d: enabled translation\n", 638 dmar->iommu.unit); 639 } 640 } else { 641 printf("dmar%d: enabling translation failed, " 642 "error %d\n", dmar->iommu.unit, error); 643 dmar_free_ctx_locked(dmar, ctx); 644 TD_PINNED_ASSERT; 645 return (NULL); 646 } 647 } 648 DMAR_UNLOCK(dmar); 649 TD_PINNED_ASSERT; 650 return (ctx); 651 } 652 653 struct dmar_ctx * 654 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, 655 bool id_mapped, bool rmrr_init) 656 { 657 int dev_domain, dev_path_len, dev_busno; 658 659 dev_domain = pci_get_domain(dev); 660 dev_path_len = dmar_dev_depth(dev); 661 ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; 662 dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); 663 return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno, 664 dev_path, dev_path_len, id_mapped, rmrr_init)); 665 } 666 667 struct dmar_ctx * 668 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid, 669 int dev_domain, int dev_busno, 670 const void *dev_path, int dev_path_len, 671 bool id_mapped, bool rmrr_init) 672 { 673 674 return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno, 675 dev_path, dev_path_len, id_mapped, rmrr_init)); 676 } 677 678 int 679 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) 680 { 681 struct dmar_unit *dmar; 682 struct dmar_domain *old_domain; 683 dmar_ctx_entry_t *ctxp; 684 struct sf_buf *sf; 685 int error; 686 687 dmar = domain->dmar; 688 old_domain = CTX2DOM(ctx); 689 if (domain == old_domain) 690 return (0); 691 KASSERT(old_domain->iodom.iommu == domain->iodom.iommu, 692 ("domain %p %u moving between dmars %u %u", domain, 693 domain->domain, old_domain->iodom.iommu->unit, 694 domain->iodom.iommu->unit)); 695 TD_PREP_PINNED_ASSERT; 696 697 ctxp = dmar_map_ctx_entry(ctx, &sf); 698 DMAR_LOCK(dmar); 699 dmar_ctx_unlink(ctx); 700 ctx->context.domain = &domain->iodom; 701 dmar_ctx_link(ctx); 702 ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100); 703 dmar_unmap_pgtbl(sf); 704 error = dmar_flush_for_ctx_entry(dmar, true); 705 /* If flush failed, rolling back would not work as well. */ 706 printf("dmar%d rid %x domain %d->%d %s-mapped\n", 707 dmar->iommu.unit, ctx->context.rid, old_domain->domain, 708 domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ? 709 "id" : "re"); 710 dmar_unref_domain_locked(dmar, old_domain); 711 TD_PINNED_ASSERT; 712 return (error); 713 } 714 715 static void 716 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain) 717 { 718 719 DMAR_ASSERT_LOCKED(dmar); 720 KASSERT(domain->refs >= 1, 721 ("dmar %d domain %p refs %u", dmar->iommu.unit, domain, 722 domain->refs)); 723 KASSERT(domain->refs > domain->ctx_cnt, 724 ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain, 725 domain->refs, domain->ctx_cnt)); 726 727 if (domain->refs > 1) { 728 domain->refs--; 729 DMAR_UNLOCK(dmar); 730 return; 731 } 732 733 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0, 734 ("lost ref on RMRR domain %p", domain)); 735 736 LIST_REMOVE(domain, link); 737 DMAR_UNLOCK(dmar); 738 739 taskqueue_drain(dmar->iommu.delayed_taskqueue, 740 &domain->iodom.unload_task); 741 dmar_domain_destroy(domain); 742 } 743 744 void 745 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) 746 { 747 struct sf_buf *sf; 748 dmar_ctx_entry_t *ctxp; 749 struct dmar_domain *domain; 750 751 DMAR_ASSERT_LOCKED(dmar); 752 KASSERT(ctx->refs >= 1, 753 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); 754 755 /* 756 * If our reference is not last, only the dereference should 757 * be performed. 758 */ 759 if (ctx->refs > 1) { 760 ctx->refs--; 761 DMAR_UNLOCK(dmar); 762 return; 763 } 764 765 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, 766 ("lost ref on disabled ctx %p", ctx)); 767 768 /* 769 * Otherwise, the context entry must be cleared before the 770 * page table is destroyed. The mapping of the context 771 * entries page could require sleep, unlock the dmar. 772 */ 773 DMAR_UNLOCK(dmar); 774 TD_PREP_PINNED_ASSERT; 775 ctxp = dmar_map_ctx_entry(ctx, &sf); 776 DMAR_LOCK(dmar); 777 KASSERT(ctx->refs >= 1, 778 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); 779 780 /* 781 * Other thread might have referenced the context, in which 782 * case again only the dereference should be performed. 783 */ 784 if (ctx->refs > 1) { 785 ctx->refs--; 786 DMAR_UNLOCK(dmar); 787 dmar_unmap_pgtbl(sf); 788 TD_PINNED_ASSERT; 789 return; 790 } 791 792 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, 793 ("lost ref on disabled ctx %p", ctx)); 794 795 /* 796 * Clear the context pointer and flush the caches. 797 * XXXKIB: cannot do this if any RMRR entries are still present. 798 */ 799 dmar_pte_clear(&ctxp->ctx1); 800 ctxp->ctx2 = 0; 801 dmar_flush_ctx_to_ram(dmar, ctxp); 802 dmar_inv_ctx_glob(dmar); 803 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) { 804 if (dmar->qi_enabled) 805 dmar_qi_invalidate_iotlb_glob_locked(dmar); 806 else 807 dmar_inv_iotlb_glob(dmar); 808 } 809 dmar_unmap_pgtbl(sf); 810 domain = CTX2DOM(ctx); 811 dmar_ctx_unlink(ctx); 812 free(ctx->context.tag, M_DMAR_CTX); 813 free(ctx, M_DMAR_CTX); 814 dmar_unref_domain_locked(dmar, domain); 815 TD_PINNED_ASSERT; 816 } 817 818 void 819 dmar_free_ctx(struct dmar_ctx *ctx) 820 { 821 struct dmar_unit *dmar; 822 823 dmar = CTX2DMAR(ctx); 824 DMAR_LOCK(dmar); 825 dmar_free_ctx_locked(dmar, ctx); 826 } 827 828 /* 829 * Returns with the domain locked. 830 */ 831 struct dmar_ctx * 832 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) 833 { 834 struct dmar_domain *domain; 835 struct dmar_ctx *ctx; 836 837 DMAR_ASSERT_LOCKED(dmar); 838 839 LIST_FOREACH(domain, &dmar->domains, link) { 840 LIST_FOREACH(ctx, &domain->contexts, link) { 841 if (ctx->context.rid == rid) 842 return (ctx); 843 } 844 } 845 return (NULL); 846 } 847 848 void 849 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) 850 { 851 struct iommu_domain *domain; 852 853 domain = entry->domain; 854 IOMMU_DOMAIN_LOCK(domain); 855 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 856 iommu_gas_free_region(domain, entry); 857 else 858 iommu_gas_free_space(domain, entry); 859 IOMMU_DOMAIN_UNLOCK(domain); 860 if (free) 861 iommu_gas_free_entry(domain, entry); 862 else 863 entry->flags = 0; 864 } 865 866 void 867 dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free) 868 { 869 struct dmar_domain *domain; 870 struct dmar_unit *unit; 871 872 domain = IODOM2DOM(entry->domain); 873 unit = DOM2DMAR(domain); 874 if (unit->qi_enabled) { 875 DMAR_LOCK(unit); 876 dmar_qi_invalidate_locked(IODOM2DOM(entry->domain), 877 entry->start, entry->end - entry->start, &entry->gseq, 878 true); 879 if (!free) 880 entry->flags |= IOMMU_MAP_ENTRY_QI_NF; 881 TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link); 882 DMAR_UNLOCK(unit); 883 } else { 884 domain_flush_iotlb_sync(IODOM2DOM(entry->domain), 885 entry->start, entry->end - entry->start); 886 dmar_domain_free_entry(entry, free); 887 } 888 } 889 890 static bool 891 dmar_domain_unload_emit_wait(struct dmar_domain *domain, 892 struct iommu_map_entry *entry) 893 { 894 895 if (TAILQ_NEXT(entry, dmamap_link) == NULL) 896 return (true); 897 return (domain->batch_no++ % dmar_batch_coalesce == 0); 898 } 899 900 void 901 dmar_domain_unload(struct dmar_domain *domain, 902 struct iommu_map_entries_tailq *entries, bool cansleep) 903 { 904 struct dmar_unit *unit; 905 struct iommu_domain *iodom; 906 struct iommu_map_entry *entry, *entry1; 907 int error; 908 909 iodom = DOM2IODOM(domain); 910 unit = DOM2DMAR(domain); 911 912 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { 913 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 914 ("not mapped entry %p %p", domain, entry)); 915 error = iodom->ops->unmap(iodom, entry->start, entry->end - 916 entry->start, cansleep ? IOMMU_PGF_WAITOK : 0); 917 KASSERT(error == 0, ("unmap %p error %d", domain, error)); 918 if (!unit->qi_enabled) { 919 domain_flush_iotlb_sync(domain, entry->start, 920 entry->end - entry->start); 921 TAILQ_REMOVE(entries, entry, dmamap_link); 922 dmar_domain_free_entry(entry, true); 923 } 924 } 925 if (TAILQ_EMPTY(entries)) 926 return; 927 928 KASSERT(unit->qi_enabled, ("loaded entry left")); 929 DMAR_LOCK(unit); 930 TAILQ_FOREACH(entry, entries, dmamap_link) { 931 dmar_qi_invalidate_locked(domain, entry->start, entry->end - 932 entry->start, &entry->gseq, 933 dmar_domain_unload_emit_wait(domain, entry)); 934 } 935 TAILQ_CONCAT(&unit->tlb_flush_entries, entries, dmamap_link); 936 DMAR_UNLOCK(unit); 937 } 938 939 struct iommu_ctx * 940 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 941 bool id_mapped, bool rmrr_init) 942 { 943 struct dmar_unit *dmar; 944 struct dmar_ctx *ret; 945 946 dmar = IOMMU2DMAR(iommu); 947 948 ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init); 949 950 return (CTX2IOCTX(ret)); 951 } 952 953 void 954 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) 955 { 956 struct dmar_unit *dmar; 957 struct dmar_ctx *ctx; 958 959 dmar = IOMMU2DMAR(iommu); 960 ctx = IOCTX2CTX(context); 961 962 dmar_free_ctx_locked(dmar, ctx); 963 } 964 965 void 966 iommu_free_ctx(struct iommu_ctx *context) 967 { 968 struct dmar_ctx *ctx; 969 970 ctx = IOCTX2CTX(context); 971 972 dmar_free_ctx(ctx); 973 } 974 975 void 976 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free) 977 { 978 979 dmar_domain_unload_entry(entry, free); 980 } 981 982 void 983 iommu_domain_unload(struct iommu_domain *iodom, 984 struct iommu_map_entries_tailq *entries, bool cansleep) 985 { 986 struct dmar_domain *domain; 987 988 domain = IODOM2DOM(iodom); 989 990 dmar_domain_unload(domain, entries, cansleep); 991 } 992