1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/bus.h> 35 #include <sys/interrupt.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/memdesc.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/rwlock.h> 44 #include <sys/rman.h> 45 #include <sys/sysctl.h> 46 #include <sys/taskqueue.h> 47 #include <sys/tree.h> 48 #include <sys/uio.h> 49 #include <sys/vmem.h> 50 #include <vm/vm.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_kern.h> 53 #include <vm/vm_object.h> 54 #include <vm/vm_page.h> 55 #include <vm/vm_pager.h> 56 #include <vm/vm_map.h> 57 #include <contrib/dev/acpica/include/acpi.h> 58 #include <contrib/dev/acpica/include/accommon.h> 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 #include <machine/atomic.h> 62 #include <machine/bus.h> 63 #include <machine/md_var.h> 64 #include <machine/specialreg.h> 65 #include <x86/include/busdma_impl.h> 66 #include <dev/iommu/busdma_iommu.h> 67 #include <x86/iommu/intel_reg.h> 68 #include <x86/iommu/x86_iommu.h> 69 #include <x86/iommu/intel_dmar.h> 70 71 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); 72 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain"); 73 74 static void dmar_unref_domain_locked(struct dmar_unit *dmar, 75 struct dmar_domain *domain); 76 static void dmar_domain_destroy(struct dmar_domain *domain); 77 78 static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); 79 static void dmar_free_ctx(struct dmar_ctx *ctx); 80 81 static void 82 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) 83 { 84 struct sf_buf *sf; 85 dmar_root_entry_t *re; 86 vm_page_t ctxm; 87 88 /* 89 * Allocated context page must be linked. 90 */ 91 ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC); 92 if (ctxm != NULL) 93 return; 94 95 /* 96 * Page not present, allocate and link. Note that other 97 * thread might execute this sequence in parallel. This 98 * should be safe, because the context entries written by both 99 * threads are equal. 100 */ 101 TD_PREP_PINNED_ASSERT; 102 ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO | 103 IOMMU_PGF_WAITOK); 104 re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf); 105 re += bus; 106 dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & 107 VM_PAGE_TO_PHYS(ctxm))); 108 dmar_flush_root_to_ram(dmar, re); 109 iommu_unmap_pgtbl(sf); 110 TD_PINNED_ASSERT; 111 } 112 113 static dmar_ctx_entry_t * 114 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) 115 { 116 struct dmar_unit *dmar; 117 dmar_ctx_entry_t *ctxp; 118 119 dmar = CTX2DMAR(ctx); 120 121 ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid), 122 IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp); 123 ctxp += ctx->context.rid & 0xff; 124 return (ctxp); 125 } 126 127 static void 128 device_tag_init(struct dmar_ctx *ctx, device_t dev) 129 { 130 struct dmar_domain *domain; 131 bus_addr_t maxaddr; 132 133 domain = CTX2DOM(ctx); 134 maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR); 135 ctx->context.tag->common.impl = &bus_dma_iommu_impl; 136 ctx->context.tag->common.boundary = 0; 137 ctx->context.tag->common.lowaddr = maxaddr; 138 ctx->context.tag->common.highaddr = maxaddr; 139 ctx->context.tag->common.maxsize = maxaddr; 140 ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 141 ctx->context.tag->common.maxsegsz = maxaddr; 142 ctx->context.tag->ctx = CTX2IOCTX(ctx); 143 ctx->context.tag->owner = dev; 144 } 145 146 static void 147 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain, 148 vm_page_t ctx_root) 149 { 150 /* 151 * For update due to move, the store is not atomic. It is 152 * possible that DMAR read upper doubleword, while low 153 * doubleword is not yet updated. The domain id is stored in 154 * the upper doubleword, while the table pointer in the lower. 155 * 156 * There is no good solution, for the same reason it is wrong 157 * to clear P bit in the ctx entry for update. 158 */ 159 dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) | 160 domain->awlvl); 161 if (ctx_root == NULL) { 162 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P); 163 } else { 164 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR | 165 (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) | 166 DMAR_CTX1_P); 167 } 168 } 169 170 static void 171 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move, 172 int busno) 173 { 174 struct dmar_unit *unit; 175 struct dmar_domain *domain; 176 vm_page_t ctx_root; 177 int i; 178 179 domain = CTX2DOM(ctx); 180 unit = DOM2DMAR(domain); 181 KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0), 182 ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx", 183 unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner), 184 pci_get_function(ctx->context.tag->owner), 185 ctxp->ctx1, ctxp->ctx2)); 186 187 if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 && 188 (unit->hw_ecap & DMAR_ECAP_PT) != 0) { 189 KASSERT(domain->pgtbl_obj == NULL, 190 ("ctx %p non-null pgtbl_obj", ctx)); 191 ctx_root = NULL; 192 } else { 193 ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0, 194 IOMMU_PGF_NOALLOC); 195 } 196 197 if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) { 198 MPASS(!move); 199 for (i = 0; i <= PCI_BUSMAX; i++) { 200 ctx_id_entry_init_one(&ctxp[i], domain, ctx_root); 201 } 202 } else { 203 ctx_id_entry_init_one(ctxp, domain, ctx_root); 204 } 205 dmar_flush_ctx_to_ram(unit, ctxp); 206 } 207 208 static int 209 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force) 210 { 211 int error; 212 213 /* 214 * If dmar declares Caching Mode as Set, follow 11.5 "Caching 215 * Mode Consideration" and do the (global) invalidation of the 216 * negative TLB entries. 217 */ 218 if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force) 219 return (0); 220 if (dmar->qi_enabled) { 221 dmar_qi_invalidate_ctx_glob_locked(dmar); 222 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force) 223 dmar_qi_invalidate_iotlb_glob_locked(dmar); 224 return (0); 225 } 226 error = dmar_inv_ctx_glob(dmar); 227 if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)) 228 error = dmar_inv_iotlb_glob(dmar); 229 return (error); 230 } 231 232 static int 233 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus, 234 int slot, int func, int dev_domain, int dev_busno, 235 const void *dev_path, int dev_path_len) 236 { 237 struct iommu_map_entries_tailq rmrr_entries; 238 struct iommu_map_entry *entry, *entry1; 239 vm_page_t *ma; 240 iommu_gaddr_t start, end; 241 vm_pindex_t size, i; 242 int error, error1; 243 244 if (!dmar_rmrr_enable) 245 return (0); 246 247 error = 0; 248 TAILQ_INIT(&rmrr_entries); 249 dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path, 250 dev_path_len, &rmrr_entries); 251 TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) { 252 /* 253 * VT-d specification requires that the start of an 254 * RMRR entry is 4k-aligned. Buggy BIOSes put 255 * anything into the start and end fields. Truncate 256 * and round as neccesary. 257 * 258 * We also allow the overlapping RMRR entries, see 259 * iommu_gas_alloc_region(). 260 */ 261 start = entry->start; 262 end = entry->end; 263 if (bootverbose) 264 printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n", 265 domain->iodom.iommu->unit, bus, slot, func, 266 (uintmax_t)start, (uintmax_t)end); 267 entry->start = trunc_page(start); 268 entry->end = round_page(end); 269 if (entry->start == entry->end) { 270 /* Workaround for some AMI (?) BIOSes */ 271 if (bootverbose) { 272 if (dev != NULL) 273 device_printf(dev, ""); 274 printf("pci%d:%d:%d ", bus, slot, func); 275 printf("BIOS bug: dmar%d RMRR " 276 "region (%jx, %jx) corrected\n", 277 domain->iodom.iommu->unit, start, end); 278 } 279 entry->end += IOMMU_PAGE_SIZE * 0x20; 280 } 281 size = OFF_TO_IDX(entry->end - entry->start); 282 ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK); 283 for (i = 0; i < size; i++) { 284 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 285 VM_MEMATTR_DEFAULT); 286 } 287 error1 = iommu_gas_map_region(DOM2IODOM(domain), entry, 288 IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE, 289 IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma); 290 /* 291 * Non-failed RMRR entries are owned by context rb 292 * tree. Get rid of the failed entry, but do not stop 293 * the loop. Rest of the parsed RMRR entries are 294 * loaded and removed on the context destruction. 295 */ 296 if (error1 == 0 && entry->end != entry->start) { 297 IOMMU_LOCK(domain->iodom.iommu); 298 domain->refs++; /* XXXKIB prevent free */ 299 domain->iodom.flags |= IOMMU_DOMAIN_RMRR; 300 IOMMU_UNLOCK(domain->iodom.iommu); 301 } else { 302 if (error1 != 0) { 303 if (dev != NULL) 304 device_printf(dev, ""); 305 printf("pci%d:%d:%d ", bus, slot, func); 306 printf( 307 "dmar%d failed to map RMRR region (%jx, %jx) %d\n", 308 domain->iodom.iommu->unit, start, end, 309 error1); 310 error = error1; 311 } 312 TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link); 313 iommu_gas_free_entry(entry); 314 } 315 for (i = 0; i < size; i++) 316 vm_page_putfake(ma[i]); 317 free(ma, M_TEMP); 318 } 319 return (error); 320 } 321 322 /* 323 * PCI memory address space is shared between memory-mapped devices (MMIO) and 324 * host memory (which may be remapped by an IOMMU). Device accesses to an 325 * address within a memory aperture in a PCIe root port will be treated as 326 * peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the 327 * address space of the root port's memory apertures in the address space used 328 * by the IOMMU for remapping. 329 */ 330 static int 331 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev) 332 { 333 struct iommu_domain *iodom; 334 device_t root; 335 uint32_t val; 336 uint64_t base, limit; 337 int error; 338 339 iodom = DOM2IODOM(domain); 340 341 root = pci_find_pcie_root_port(dev); 342 if (root == NULL) 343 return (0); 344 345 /* Disable downstream memory */ 346 base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2)); 347 limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2)); 348 error = iommu_gas_reserve_region_extend(iodom, base, limit + 1); 349 if (bootverbose || error != 0) 350 device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n", 351 base, limit + 1, error); 352 if (error != 0) 353 return (error); 354 355 /* Disable downstream prefetchable memory */ 356 val = pci_read_config(root, PCIR_PMBASEL_1, 2); 357 if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) { 358 if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { 359 base = PCI_PPBMEMBASE( 360 pci_read_config(root, PCIR_PMBASEH_1, 4), 361 val); 362 limit = PCI_PPBMEMLIMIT( 363 pci_read_config(root, PCIR_PMLIMITH_1, 4), 364 pci_read_config(root, PCIR_PMLIMITL_1, 2)); 365 } else { 366 base = PCI_PPBMEMBASE(0, val); 367 limit = PCI_PPBMEMLIMIT(0, 368 pci_read_config(root, PCIR_PMLIMITL_1, 2)); 369 } 370 error = iommu_gas_reserve_region_extend(iodom, base, 371 limit + 1); 372 if (bootverbose || error != 0) 373 device_printf(dev, "DMAR reserve [%#jx-%#jx] " 374 "(error %d)\n", base, limit + 1, error); 375 if (error != 0) 376 return (error); 377 } 378 379 return (error); 380 } 381 382 static struct dmar_domain * 383 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) 384 { 385 struct iommu_domain *iodom; 386 struct iommu_unit *unit; 387 struct dmar_domain *domain; 388 int error, id, mgaw; 389 390 id = alloc_unr(dmar->domids); 391 if (id == -1) 392 return (NULL); 393 domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO); 394 iodom = DOM2IODOM(domain); 395 unit = DMAR2IOMMU(dmar); 396 domain->domain = id; 397 LIST_INIT(&domain->contexts); 398 iommu_domain_init(unit, iodom, &dmar_domain_map_ops); 399 400 domain->dmar = dmar; 401 402 /* 403 * For now, use the maximal usable physical address of the 404 * installed memory to calculate the mgaw on id_mapped domain. 405 * It is useful for the identity mapping, and less so for the 406 * virtualized bus address space. 407 */ 408 domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; 409 mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped); 410 error = domain_set_agaw(domain, mgaw); 411 if (error != 0) 412 goto fail; 413 if (!id_mapped) 414 /* Use all supported address space for remapping. */ 415 domain->iodom.end = 1ULL << (domain->agaw - 1); 416 417 iommu_gas_init_domain(DOM2IODOM(domain)); 418 419 if (id_mapped) { 420 if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) { 421 domain->pgtbl_obj = domain_get_idmap_pgtbl(domain, 422 domain->iodom.end); 423 } 424 domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; 425 } else { 426 error = dmar_domain_alloc_pgtbl(domain); 427 if (error != 0) 428 goto fail; 429 /* Disable local apic region access */ 430 error = iommu_gas_reserve_region(iodom, 0xfee00000, 431 0xfeefffff + 1, &iodom->msi_entry); 432 if (error != 0) 433 goto fail; 434 } 435 return (domain); 436 437 fail: 438 dmar_domain_destroy(domain); 439 return (NULL); 440 } 441 442 static struct dmar_ctx * 443 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid) 444 { 445 struct dmar_ctx *ctx; 446 447 ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO); 448 ctx->context.domain = DOM2IODOM(domain); 449 ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), 450 M_DMAR_CTX, M_WAITOK | M_ZERO); 451 ctx->context.rid = rid; 452 ctx->refs = 1; 453 return (ctx); 454 } 455 456 static void 457 dmar_ctx_link(struct dmar_ctx *ctx) 458 { 459 struct dmar_domain *domain; 460 461 domain = CTX2DOM(ctx); 462 IOMMU_ASSERT_LOCKED(domain->iodom.iommu); 463 KASSERT(domain->refs >= domain->ctx_cnt, 464 ("dom %p ref underflow %d %d", domain, domain->refs, 465 domain->ctx_cnt)); 466 domain->refs++; 467 domain->ctx_cnt++; 468 LIST_INSERT_HEAD(&domain->contexts, ctx, link); 469 } 470 471 static void 472 dmar_ctx_unlink(struct dmar_ctx *ctx) 473 { 474 struct dmar_domain *domain; 475 476 domain = CTX2DOM(ctx); 477 IOMMU_ASSERT_LOCKED(domain->iodom.iommu); 478 KASSERT(domain->refs > 0, 479 ("domain %p ctx dtr refs %d", domain, domain->refs)); 480 KASSERT(domain->ctx_cnt >= domain->refs, 481 ("domain %p ctx dtr refs %d ctx_cnt %d", domain, 482 domain->refs, domain->ctx_cnt)); 483 domain->refs--; 484 domain->ctx_cnt--; 485 LIST_REMOVE(ctx, link); 486 } 487 488 static void 489 dmar_domain_destroy(struct dmar_domain *domain) 490 { 491 struct iommu_domain *iodom; 492 struct dmar_unit *dmar; 493 494 iodom = DOM2IODOM(domain); 495 496 KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), 497 ("unfinished unloads %p", domain)); 498 KASSERT(LIST_EMPTY(&domain->contexts), 499 ("destroying dom %p with contexts", domain)); 500 KASSERT(domain->ctx_cnt == 0, 501 ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); 502 KASSERT(domain->refs == 0, 503 ("destroying dom %p with refs %d", domain, domain->refs)); 504 if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { 505 DMAR_DOMAIN_LOCK(domain); 506 iommu_gas_fini_domain(iodom); 507 DMAR_DOMAIN_UNLOCK(domain); 508 } 509 if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { 510 if (domain->pgtbl_obj != NULL) 511 DMAR_DOMAIN_PGLOCK(domain); 512 dmar_domain_free_pgtbl(domain); 513 } 514 iommu_domain_fini(iodom); 515 dmar = DOM2DMAR(domain); 516 free_unr(dmar->domids, domain->domain); 517 free(domain, M_DMAR_DOMAIN); 518 } 519 520 static struct dmar_ctx * 521 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, 522 int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, 523 bool id_mapped, bool rmrr_init) 524 { 525 struct dmar_domain *domain, *domain1; 526 struct dmar_ctx *ctx, *ctx1; 527 struct iommu_unit *unit __diagused; 528 dmar_ctx_entry_t *ctxp; 529 struct sf_buf *sf; 530 int bus, slot, func, error; 531 bool enable; 532 533 if (dev != NULL) { 534 bus = pci_get_bus(dev); 535 slot = pci_get_slot(dev); 536 func = pci_get_function(dev); 537 } else { 538 bus = PCI_RID2BUS(rid); 539 slot = PCI_RID2SLOT(rid); 540 func = PCI_RID2FUNC(rid); 541 } 542 enable = false; 543 TD_PREP_PINNED_ASSERT; 544 unit = DMAR2IOMMU(dmar); 545 DMAR_LOCK(dmar); 546 KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0), 547 ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus, 548 slot, func)); 549 ctx = dmar_find_ctx_locked(dmar, rid); 550 error = 0; 551 if (ctx == NULL) { 552 /* 553 * Perform the allocations which require sleep or have 554 * higher chance to succeed if the sleep is allowed. 555 */ 556 DMAR_UNLOCK(dmar); 557 dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); 558 domain1 = dmar_domain_alloc(dmar, id_mapped); 559 if (domain1 == NULL) { 560 TD_PINNED_ASSERT; 561 return (NULL); 562 } 563 if (!id_mapped) { 564 error = domain_init_rmrr(domain1, dev, bus, 565 slot, func, dev_domain, dev_busno, dev_path, 566 dev_path_len); 567 if (error == 0 && dev != NULL) 568 error = dmar_reserve_pci_regions(domain1, dev); 569 if (error != 0) { 570 dmar_domain_destroy(domain1); 571 TD_PINNED_ASSERT; 572 return (NULL); 573 } 574 } 575 ctx1 = dmar_ctx_alloc(domain1, rid); 576 ctxp = dmar_map_ctx_entry(ctx1, &sf); 577 DMAR_LOCK(dmar); 578 579 /* 580 * Recheck the contexts, other thread might have 581 * already allocated needed one. 582 */ 583 ctx = dmar_find_ctx_locked(dmar, rid); 584 if (ctx == NULL) { 585 domain = domain1; 586 ctx = ctx1; 587 dmar_ctx_link(ctx); 588 ctx->context.tag->owner = dev; 589 device_tag_init(ctx, dev); 590 591 /* 592 * This is the first activated context for the 593 * DMAR unit. Enable the translation after 594 * everything is set up. 595 */ 596 if (LIST_EMPTY(&dmar->domains)) 597 enable = true; 598 LIST_INSERT_HEAD(&dmar->domains, domain, link); 599 ctx_id_entry_init(ctx, ctxp, false, bus); 600 if (dev != NULL) { 601 device_printf(dev, 602 "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d " 603 "agaw %d %s-mapped\n", 604 dmar->iommu.unit, dmar->segment, bus, slot, 605 func, rid, domain->domain, domain->mgaw, 606 domain->agaw, id_mapped ? "id" : "re"); 607 } 608 iommu_unmap_pgtbl(sf); 609 } else { 610 iommu_unmap_pgtbl(sf); 611 dmar_domain_destroy(domain1); 612 /* Nothing needs to be done to destroy ctx1. */ 613 free(ctx1, M_DMAR_CTX); 614 domain = CTX2DOM(ctx); 615 ctx->refs++; /* tag referenced us */ 616 } 617 } else { 618 domain = CTX2DOM(ctx); 619 if (ctx->context.tag->owner == NULL) 620 ctx->context.tag->owner = dev; 621 ctx->refs++; /* tag referenced us */ 622 } 623 624 error = dmar_flush_for_ctx_entry(dmar, enable); 625 if (error != 0) { 626 dmar_free_ctx_locked(dmar, ctx); 627 TD_PINNED_ASSERT; 628 return (NULL); 629 } 630 631 /* 632 * The dmar lock was potentially dropped between check for the 633 * empty context list and now. Recheck the state of GCMD_TE 634 * to avoid unneeded command. 635 */ 636 if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) { 637 error = dmar_disable_protected_regions(dmar); 638 if (error != 0) 639 printf("dmar%d: Failed to disable protected regions\n", 640 dmar->iommu.unit); 641 error = dmar_enable_translation(dmar); 642 if (error == 0) { 643 if (bootverbose) { 644 printf("dmar%d: enabled translation\n", 645 dmar->iommu.unit); 646 } 647 } else { 648 printf("dmar%d: enabling translation failed, " 649 "error %d\n", dmar->iommu.unit, error); 650 dmar_free_ctx_locked(dmar, ctx); 651 TD_PINNED_ASSERT; 652 return (NULL); 653 } 654 } 655 DMAR_UNLOCK(dmar); 656 TD_PINNED_ASSERT; 657 return (ctx); 658 } 659 660 struct dmar_ctx * 661 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, 662 bool id_mapped, bool rmrr_init) 663 { 664 int dev_domain, dev_path_len, dev_busno; 665 666 dev_domain = pci_get_domain(dev); 667 dev_path_len = dmar_dev_depth(dev); 668 ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; 669 dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); 670 return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno, 671 dev_path, dev_path_len, id_mapped, rmrr_init)); 672 } 673 674 struct dmar_ctx * 675 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid, 676 int dev_domain, int dev_busno, 677 const void *dev_path, int dev_path_len, 678 bool id_mapped, bool rmrr_init) 679 { 680 681 return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno, 682 dev_path, dev_path_len, id_mapped, rmrr_init)); 683 } 684 685 int 686 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) 687 { 688 struct dmar_unit *dmar; 689 struct dmar_domain *old_domain; 690 dmar_ctx_entry_t *ctxp; 691 struct sf_buf *sf; 692 int error; 693 694 dmar = domain->dmar; 695 old_domain = CTX2DOM(ctx); 696 if (domain == old_domain) 697 return (0); 698 KASSERT(old_domain->iodom.iommu == domain->iodom.iommu, 699 ("domain %p %u moving between dmars %u %u", domain, 700 domain->domain, old_domain->iodom.iommu->unit, 701 domain->iodom.iommu->unit)); 702 TD_PREP_PINNED_ASSERT; 703 704 ctxp = dmar_map_ctx_entry(ctx, &sf); 705 DMAR_LOCK(dmar); 706 dmar_ctx_unlink(ctx); 707 ctx->context.domain = &domain->iodom; 708 dmar_ctx_link(ctx); 709 ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100); 710 iommu_unmap_pgtbl(sf); 711 error = dmar_flush_for_ctx_entry(dmar, true); 712 /* If flush failed, rolling back would not work as well. */ 713 printf("dmar%d rid %x domain %d->%d %s-mapped\n", 714 dmar->iommu.unit, ctx->context.rid, old_domain->domain, 715 domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ? 716 "id" : "re"); 717 dmar_unref_domain_locked(dmar, old_domain); 718 TD_PINNED_ASSERT; 719 return (error); 720 } 721 722 static void 723 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain) 724 { 725 726 DMAR_ASSERT_LOCKED(dmar); 727 KASSERT(domain->refs >= 1, 728 ("dmar %d domain %p refs %u", dmar->iommu.unit, domain, 729 domain->refs)); 730 KASSERT(domain->refs > domain->ctx_cnt, 731 ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain, 732 domain->refs, domain->ctx_cnt)); 733 734 if (domain->refs > 1) { 735 domain->refs--; 736 DMAR_UNLOCK(dmar); 737 return; 738 } 739 740 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0, 741 ("lost ref on RMRR domain %p", domain)); 742 743 LIST_REMOVE(domain, link); 744 DMAR_UNLOCK(dmar); 745 746 taskqueue_drain(dmar->iommu.delayed_taskqueue, 747 &domain->iodom.unload_task); 748 dmar_domain_destroy(domain); 749 } 750 751 static void 752 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) 753 { 754 struct sf_buf *sf; 755 dmar_ctx_entry_t *ctxp; 756 struct dmar_domain *domain; 757 758 DMAR_ASSERT_LOCKED(dmar); 759 KASSERT(ctx->refs >= 1, 760 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); 761 762 /* 763 * If our reference is not last, only the dereference should 764 * be performed. 765 */ 766 if (ctx->refs > 1) { 767 ctx->refs--; 768 DMAR_UNLOCK(dmar); 769 return; 770 } 771 772 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, 773 ("lost ref on disabled ctx %p", ctx)); 774 775 /* 776 * Otherwise, the context entry must be cleared before the 777 * page table is destroyed. The mapping of the context 778 * entries page could require sleep, unlock the dmar. 779 */ 780 DMAR_UNLOCK(dmar); 781 TD_PREP_PINNED_ASSERT; 782 ctxp = dmar_map_ctx_entry(ctx, &sf); 783 DMAR_LOCK(dmar); 784 KASSERT(ctx->refs >= 1, 785 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); 786 787 /* 788 * Other thread might have referenced the context, in which 789 * case again only the dereference should be performed. 790 */ 791 if (ctx->refs > 1) { 792 ctx->refs--; 793 DMAR_UNLOCK(dmar); 794 iommu_unmap_pgtbl(sf); 795 TD_PINNED_ASSERT; 796 return; 797 } 798 799 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, 800 ("lost ref on disabled ctx %p", ctx)); 801 802 /* 803 * Clear the context pointer and flush the caches. 804 * XXXKIB: cannot do this if any RMRR entries are still present. 805 */ 806 dmar_pte_clear(&ctxp->ctx1); 807 ctxp->ctx2 = 0; 808 dmar_flush_ctx_to_ram(dmar, ctxp); 809 dmar_inv_ctx_glob(dmar); 810 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) { 811 if (dmar->qi_enabled) 812 dmar_qi_invalidate_iotlb_glob_locked(dmar); 813 else 814 dmar_inv_iotlb_glob(dmar); 815 } 816 iommu_unmap_pgtbl(sf); 817 domain = CTX2DOM(ctx); 818 dmar_ctx_unlink(ctx); 819 free(ctx->context.tag, M_DMAR_CTX); 820 free(ctx, M_DMAR_CTX); 821 dmar_unref_domain_locked(dmar, domain); 822 TD_PINNED_ASSERT; 823 } 824 825 static void 826 dmar_free_ctx(struct dmar_ctx *ctx) 827 { 828 struct dmar_unit *dmar; 829 830 dmar = CTX2DMAR(ctx); 831 DMAR_LOCK(dmar); 832 dmar_free_ctx_locked(dmar, ctx); 833 } 834 835 /* 836 * Returns with the domain locked. 837 */ 838 struct dmar_ctx * 839 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) 840 { 841 struct dmar_domain *domain; 842 struct dmar_ctx *ctx; 843 844 DMAR_ASSERT_LOCKED(dmar); 845 846 LIST_FOREACH(domain, &dmar->domains, link) { 847 LIST_FOREACH(ctx, &domain->contexts, link) { 848 if (ctx->context.rid == rid) 849 return (ctx); 850 } 851 } 852 return (NULL); 853 } 854 855 void 856 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) 857 { 858 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 859 iommu_gas_free_region(entry); 860 else 861 iommu_gas_free_space(entry); 862 if (free) 863 iommu_gas_free_entry(entry); 864 else 865 entry->flags = 0; 866 } 867 868 /* 869 * If the given value for "free" is true, then the caller must not be using 870 * the entry's dmamap_link field. 871 */ 872 void 873 dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free, 874 bool cansleep) 875 { 876 struct dmar_domain *domain; 877 struct dmar_unit *unit; 878 879 domain = IODOM2DOM(entry->domain); 880 unit = DOM2DMAR(domain); 881 882 /* 883 * If "free" is false, then the IOTLB invalidation must be performed 884 * synchronously. Otherwise, the caller might free the entry before 885 * dmar_qi_task() is finished processing it. 886 */ 887 if (unit->qi_enabled) { 888 if (free) { 889 DMAR_LOCK(unit); 890 iommu_qi_invalidate_locked(&domain->iodom, entry, 891 true); 892 DMAR_UNLOCK(unit); 893 } else { 894 iommu_qi_invalidate_sync(&domain->iodom, entry->start, 895 entry->end - entry->start, cansleep); 896 dmar_domain_free_entry(entry, false); 897 } 898 } else { 899 domain_flush_iotlb_sync(domain, entry->start, entry->end - 900 entry->start); 901 dmar_domain_free_entry(entry, free); 902 } 903 } 904 905 static bool 906 dmar_domain_unload_emit_wait(struct dmar_domain *domain, 907 struct iommu_map_entry *entry) 908 { 909 910 if (TAILQ_NEXT(entry, dmamap_link) == NULL) 911 return (true); 912 return (domain->batch_no++ % dmar_batch_coalesce == 0); 913 } 914 915 void 916 dmar_domain_unload(struct iommu_domain *iodom, 917 struct iommu_map_entries_tailq *entries, bool cansleep) 918 { 919 struct dmar_domain *domain; 920 struct dmar_unit *unit; 921 struct iommu_map_entry *entry, *entry1; 922 int error __diagused; 923 924 domain = IODOM2DOM(iodom); 925 unit = DOM2DMAR(domain); 926 927 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { 928 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 929 ("not mapped entry %p %p", domain, entry)); 930 error = iodom->ops->unmap(iodom, entry->start, entry->end - 931 entry->start, cansleep ? IOMMU_PGF_WAITOK : 0); 932 KASSERT(error == 0, ("unmap %p error %d", domain, error)); 933 if (!unit->qi_enabled) { 934 domain_flush_iotlb_sync(domain, entry->start, 935 entry->end - entry->start); 936 TAILQ_REMOVE(entries, entry, dmamap_link); 937 dmar_domain_free_entry(entry, true); 938 } 939 } 940 if (TAILQ_EMPTY(entries)) 941 return; 942 943 KASSERT(unit->qi_enabled, ("loaded entry left")); 944 DMAR_LOCK(unit); 945 while ((entry = TAILQ_FIRST(entries)) != NULL) { 946 TAILQ_REMOVE(entries, entry, dmamap_link); 947 iommu_qi_invalidate_locked(&domain->iodom, entry, 948 dmar_domain_unload_emit_wait(domain, entry)); 949 } 950 DMAR_UNLOCK(unit); 951 } 952 953 struct iommu_ctx * 954 dmar_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 955 bool id_mapped, bool rmrr_init) 956 { 957 struct dmar_unit *dmar; 958 struct dmar_ctx *ret; 959 960 dmar = IOMMU2DMAR(iommu); 961 ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init); 962 return (CTX2IOCTX(ret)); 963 } 964 965 void 966 dmar_free_ctx_locked_method(struct iommu_unit *iommu, 967 struct iommu_ctx *context) 968 { 969 struct dmar_unit *dmar; 970 struct dmar_ctx *ctx; 971 972 dmar = IOMMU2DMAR(iommu); 973 ctx = IOCTX2CTX(context); 974 dmar_free_ctx_locked(dmar, ctx); 975 } 976 977 void 978 dmar_free_ctx_method(struct iommu_ctx *context) 979 { 980 struct dmar_ctx *ctx; 981 982 ctx = IOCTX2CTX(context); 983 dmar_free_ctx(ctx); 984 } 985