1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 The FreeBSD Foundation 5 * 6 * This software was developed by Konstantin Belousov <kib@FreeBSD.org> 7 * under sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 #include <sys/bus.h> 35 #include <sys/interrupt.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/memdesc.h> 41 #include <sys/mutex.h> 42 #include <sys/proc.h> 43 #include <sys/rwlock.h> 44 #include <sys/rman.h> 45 #include <sys/sysctl.h> 46 #include <sys/taskqueue.h> 47 #include <sys/tree.h> 48 #include <sys/uio.h> 49 #include <sys/vmem.h> 50 #include <vm/vm.h> 51 #include <vm/vm_extern.h> 52 #include <vm/vm_kern.h> 53 #include <vm/vm_object.h> 54 #include <vm/vm_page.h> 55 #include <vm/vm_pager.h> 56 #include <vm/vm_map.h> 57 #include <contrib/dev/acpica/include/acpi.h> 58 #include <contrib/dev/acpica/include/accommon.h> 59 #include <dev/pci/pcireg.h> 60 #include <dev/pci/pcivar.h> 61 #include <machine/atomic.h> 62 #include <machine/bus.h> 63 #include <machine/md_var.h> 64 #include <machine/specialreg.h> 65 #include <x86/include/busdma_impl.h> 66 #include <dev/iommu/busdma_iommu.h> 67 #include <x86/iommu/intel_reg.h> 68 #include <x86/iommu/intel_dmar.h> 69 70 static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); 71 static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain"); 72 73 static void dmar_unref_domain_locked(struct dmar_unit *dmar, 74 struct dmar_domain *domain); 75 static void dmar_domain_destroy(struct dmar_domain *domain); 76 77 static void 78 dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) 79 { 80 struct sf_buf *sf; 81 dmar_root_entry_t *re; 82 vm_page_t ctxm; 83 84 /* 85 * Allocated context page must be linked. 86 */ 87 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC); 88 if (ctxm != NULL) 89 return; 90 91 /* 92 * Page not present, allocate and link. Note that other 93 * thread might execute this sequence in parallel. This 94 * should be safe, because the context entries written by both 95 * threads are equal. 96 */ 97 TD_PREP_PINNED_ASSERT; 98 ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO | 99 IOMMU_PGF_WAITOK); 100 re = dmar_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf); 101 re += bus; 102 dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & 103 VM_PAGE_TO_PHYS(ctxm))); 104 dmar_flush_root_to_ram(dmar, re); 105 dmar_unmap_pgtbl(sf); 106 TD_PINNED_ASSERT; 107 } 108 109 static dmar_ctx_entry_t * 110 dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) 111 { 112 struct dmar_unit *dmar; 113 dmar_ctx_entry_t *ctxp; 114 115 dmar = CTX2DMAR(ctx); 116 117 ctxp = dmar_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid), 118 IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp); 119 ctxp += ctx->context.rid & 0xff; 120 return (ctxp); 121 } 122 123 static void 124 device_tag_init(struct dmar_ctx *ctx, device_t dev) 125 { 126 struct dmar_domain *domain; 127 bus_addr_t maxaddr; 128 129 domain = CTX2DOM(ctx); 130 maxaddr = MIN(domain->iodom.end, BUS_SPACE_MAXADDR); 131 ctx->context.tag->common.impl = &bus_dma_iommu_impl; 132 ctx->context.tag->common.boundary = 0; 133 ctx->context.tag->common.lowaddr = maxaddr; 134 ctx->context.tag->common.highaddr = maxaddr; 135 ctx->context.tag->common.maxsize = maxaddr; 136 ctx->context.tag->common.nsegments = BUS_SPACE_UNRESTRICTED; 137 ctx->context.tag->common.maxsegsz = maxaddr; 138 ctx->context.tag->ctx = CTX2IOCTX(ctx); 139 ctx->context.tag->owner = dev; 140 } 141 142 static void 143 ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain, 144 vm_page_t ctx_root) 145 { 146 /* 147 * For update due to move, the store is not atomic. It is 148 * possible that DMAR read upper doubleword, while low 149 * doubleword is not yet updated. The domain id is stored in 150 * the upper doubleword, while the table pointer in the lower. 151 * 152 * There is no good solution, for the same reason it is wrong 153 * to clear P bit in the ctx entry for update. 154 */ 155 dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) | 156 domain->awlvl); 157 if (ctx_root == NULL) { 158 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P); 159 } else { 160 dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR | 161 (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) | 162 DMAR_CTX1_P); 163 } 164 } 165 166 static void 167 ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move, 168 int busno) 169 { 170 struct dmar_unit *unit; 171 struct dmar_domain *domain; 172 vm_page_t ctx_root; 173 int i; 174 175 domain = CTX2DOM(ctx); 176 unit = DOM2DMAR(domain); 177 KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0), 178 ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx", 179 unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner), 180 pci_get_function(ctx->context.tag->owner), 181 ctxp->ctx1, ctxp->ctx2)); 182 183 if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 && 184 (unit->hw_ecap & DMAR_ECAP_PT) != 0) { 185 KASSERT(domain->pgtbl_obj == NULL, 186 ("ctx %p non-null pgtbl_obj", ctx)); 187 ctx_root = NULL; 188 } else { 189 ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, 190 IOMMU_PGF_NOALLOC); 191 } 192 193 if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) { 194 MPASS(!move); 195 for (i = 0; i <= PCI_BUSMAX; i++) { 196 ctx_id_entry_init_one(&ctxp[i], domain, ctx_root); 197 } 198 } else { 199 ctx_id_entry_init_one(ctxp, domain, ctx_root); 200 } 201 dmar_flush_ctx_to_ram(unit, ctxp); 202 } 203 204 static int 205 dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force) 206 { 207 int error; 208 209 /* 210 * If dmar declares Caching Mode as Set, follow 11.5 "Caching 211 * Mode Consideration" and do the (global) invalidation of the 212 * negative TLB entries. 213 */ 214 if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force) 215 return (0); 216 if (dmar->qi_enabled) { 217 dmar_qi_invalidate_ctx_glob_locked(dmar); 218 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force) 219 dmar_qi_invalidate_iotlb_glob_locked(dmar); 220 return (0); 221 } 222 error = dmar_inv_ctx_glob(dmar); 223 if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)) 224 error = dmar_inv_iotlb_glob(dmar); 225 return (error); 226 } 227 228 static int 229 domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus, 230 int slot, int func, int dev_domain, int dev_busno, 231 const void *dev_path, int dev_path_len) 232 { 233 struct iommu_map_entries_tailq rmrr_entries; 234 struct iommu_map_entry *entry, *entry1; 235 vm_page_t *ma; 236 iommu_gaddr_t start, end; 237 vm_pindex_t size, i; 238 int error, error1; 239 240 if (!dmar_rmrr_enable) 241 return (0); 242 243 error = 0; 244 TAILQ_INIT(&rmrr_entries); 245 dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path, 246 dev_path_len, &rmrr_entries); 247 TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) { 248 /* 249 * VT-d specification requires that the start of an 250 * RMRR entry is 4k-aligned. Buggy BIOSes put 251 * anything into the start and end fields. Truncate 252 * and round as neccesary. 253 * 254 * We also allow the overlapping RMRR entries, see 255 * iommu_gas_alloc_region(). 256 */ 257 start = entry->start; 258 end = entry->end; 259 if (bootverbose) 260 printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n", 261 domain->iodom.iommu->unit, bus, slot, func, 262 (uintmax_t)start, (uintmax_t)end); 263 entry->start = trunc_page(start); 264 entry->end = round_page(end); 265 if (entry->start == entry->end) { 266 /* Workaround for some AMI (?) BIOSes */ 267 if (bootverbose) { 268 if (dev != NULL) 269 device_printf(dev, ""); 270 printf("pci%d:%d:%d ", bus, slot, func); 271 printf("BIOS bug: dmar%d RMRR " 272 "region (%jx, %jx) corrected\n", 273 domain->iodom.iommu->unit, start, end); 274 } 275 entry->end += DMAR_PAGE_SIZE * 0x20; 276 } 277 size = OFF_TO_IDX(entry->end - entry->start); 278 ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK); 279 for (i = 0; i < size; i++) { 280 ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, 281 VM_MEMATTR_DEFAULT); 282 } 283 error1 = iommu_gas_map_region(DOM2IODOM(domain), entry, 284 IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE, 285 IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma); 286 /* 287 * Non-failed RMRR entries are owned by context rb 288 * tree. Get rid of the failed entry, but do not stop 289 * the loop. Rest of the parsed RMRR entries are 290 * loaded and removed on the context destruction. 291 */ 292 if (error1 == 0 && entry->end != entry->start) { 293 IOMMU_LOCK(domain->iodom.iommu); 294 domain->refs++; /* XXXKIB prevent free */ 295 domain->iodom.flags |= IOMMU_DOMAIN_RMRR; 296 IOMMU_UNLOCK(domain->iodom.iommu); 297 } else { 298 if (error1 != 0) { 299 if (dev != NULL) 300 device_printf(dev, ""); 301 printf("pci%d:%d:%d ", bus, slot, func); 302 printf( 303 "dmar%d failed to map RMRR region (%jx, %jx) %d\n", 304 domain->iodom.iommu->unit, start, end, 305 error1); 306 error = error1; 307 } 308 TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link); 309 iommu_gas_free_entry(entry); 310 } 311 for (i = 0; i < size; i++) 312 vm_page_putfake(ma[i]); 313 free(ma, M_TEMP); 314 } 315 return (error); 316 } 317 318 /* 319 * PCI memory address space is shared between memory-mapped devices (MMIO) and 320 * host memory (which may be remapped by an IOMMU). Device accesses to an 321 * address within a memory aperture in a PCIe root port will be treated as 322 * peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the 323 * address space of the root port's memory apertures in the address space used 324 * by the IOMMU for remapping. 325 */ 326 static int 327 dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev) 328 { 329 struct iommu_domain *iodom; 330 device_t root; 331 uint32_t val; 332 uint64_t base, limit; 333 int error; 334 335 iodom = DOM2IODOM(domain); 336 337 root = pci_find_pcie_root_port(dev); 338 if (root == NULL) 339 return (0); 340 341 /* Disable downstream memory */ 342 base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2)); 343 limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2)); 344 error = iommu_gas_reserve_region_extend(iodom, base, limit + 1); 345 if (bootverbose || error != 0) 346 device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n", 347 base, limit + 1, error); 348 if (error != 0) 349 return (error); 350 351 /* Disable downstream prefetchable memory */ 352 val = pci_read_config(root, PCIR_PMBASEL_1, 2); 353 if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) { 354 if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) { 355 base = PCI_PPBMEMBASE( 356 pci_read_config(root, PCIR_PMBASEH_1, 4), 357 val); 358 limit = PCI_PPBMEMLIMIT( 359 pci_read_config(root, PCIR_PMLIMITH_1, 4), 360 pci_read_config(root, PCIR_PMLIMITL_1, 2)); 361 } else { 362 base = PCI_PPBMEMBASE(0, val); 363 limit = PCI_PPBMEMLIMIT(0, 364 pci_read_config(root, PCIR_PMLIMITL_1, 2)); 365 } 366 error = iommu_gas_reserve_region_extend(iodom, base, 367 limit + 1); 368 if (bootverbose || error != 0) 369 device_printf(dev, "DMAR reserve [%#jx-%#jx] " 370 "(error %d)\n", base, limit + 1, error); 371 if (error != 0) 372 return (error); 373 } 374 375 return (error); 376 } 377 378 static struct dmar_domain * 379 dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) 380 { 381 struct iommu_domain *iodom; 382 struct iommu_unit *unit; 383 struct dmar_domain *domain; 384 int error, id, mgaw; 385 386 id = alloc_unr(dmar->domids); 387 if (id == -1) 388 return (NULL); 389 domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO); 390 iodom = DOM2IODOM(domain); 391 unit = DMAR2IOMMU(dmar); 392 domain->domain = id; 393 LIST_INIT(&domain->contexts); 394 iommu_domain_init(unit, iodom, &dmar_domain_map_ops); 395 396 domain->dmar = dmar; 397 398 /* 399 * For now, use the maximal usable physical address of the 400 * installed memory to calculate the mgaw on id_mapped domain. 401 * It is useful for the identity mapping, and less so for the 402 * virtualized bus address space. 403 */ 404 domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; 405 mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped); 406 error = domain_set_agaw(domain, mgaw); 407 if (error != 0) 408 goto fail; 409 if (!id_mapped) 410 /* Use all supported address space for remapping. */ 411 domain->iodom.end = 1ULL << (domain->agaw - 1); 412 413 iommu_gas_init_domain(DOM2IODOM(domain)); 414 415 if (id_mapped) { 416 if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) { 417 domain->pgtbl_obj = domain_get_idmap_pgtbl(domain, 418 domain->iodom.end); 419 } 420 domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; 421 } else { 422 error = domain_alloc_pgtbl(domain); 423 if (error != 0) 424 goto fail; 425 /* Disable local apic region access */ 426 error = iommu_gas_reserve_region(iodom, 0xfee00000, 427 0xfeefffff + 1, &iodom->msi_entry); 428 if (error != 0) 429 goto fail; 430 } 431 return (domain); 432 433 fail: 434 dmar_domain_destroy(domain); 435 return (NULL); 436 } 437 438 static struct dmar_ctx * 439 dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid) 440 { 441 struct dmar_ctx *ctx; 442 443 ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO); 444 ctx->context.domain = DOM2IODOM(domain); 445 ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), 446 M_DMAR_CTX, M_WAITOK | M_ZERO); 447 ctx->context.rid = rid; 448 ctx->refs = 1; 449 return (ctx); 450 } 451 452 static void 453 dmar_ctx_link(struct dmar_ctx *ctx) 454 { 455 struct dmar_domain *domain; 456 457 domain = CTX2DOM(ctx); 458 IOMMU_ASSERT_LOCKED(domain->iodom.iommu); 459 KASSERT(domain->refs >= domain->ctx_cnt, 460 ("dom %p ref underflow %d %d", domain, domain->refs, 461 domain->ctx_cnt)); 462 domain->refs++; 463 domain->ctx_cnt++; 464 LIST_INSERT_HEAD(&domain->contexts, ctx, link); 465 } 466 467 static void 468 dmar_ctx_unlink(struct dmar_ctx *ctx) 469 { 470 struct dmar_domain *domain; 471 472 domain = CTX2DOM(ctx); 473 IOMMU_ASSERT_LOCKED(domain->iodom.iommu); 474 KASSERT(domain->refs > 0, 475 ("domain %p ctx dtr refs %d", domain, domain->refs)); 476 KASSERT(domain->ctx_cnt >= domain->refs, 477 ("domain %p ctx dtr refs %d ctx_cnt %d", domain, 478 domain->refs, domain->ctx_cnt)); 479 domain->refs--; 480 domain->ctx_cnt--; 481 LIST_REMOVE(ctx, link); 482 } 483 484 static void 485 dmar_domain_destroy(struct dmar_domain *domain) 486 { 487 struct iommu_domain *iodom; 488 struct dmar_unit *dmar; 489 490 iodom = DOM2IODOM(domain); 491 492 KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), 493 ("unfinished unloads %p", domain)); 494 KASSERT(LIST_EMPTY(&domain->contexts), 495 ("destroying dom %p with contexts", domain)); 496 KASSERT(domain->ctx_cnt == 0, 497 ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); 498 KASSERT(domain->refs == 0, 499 ("destroying dom %p with refs %d", domain, domain->refs)); 500 if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { 501 DMAR_DOMAIN_LOCK(domain); 502 iommu_gas_fini_domain(iodom); 503 DMAR_DOMAIN_UNLOCK(domain); 504 } 505 if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { 506 if (domain->pgtbl_obj != NULL) 507 DMAR_DOMAIN_PGLOCK(domain); 508 domain_free_pgtbl(domain); 509 } 510 iommu_domain_fini(iodom); 511 dmar = DOM2DMAR(domain); 512 free_unr(dmar->domids, domain->domain); 513 free(domain, M_DMAR_DOMAIN); 514 } 515 516 static struct dmar_ctx * 517 dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, 518 int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, 519 bool id_mapped, bool rmrr_init) 520 { 521 struct dmar_domain *domain, *domain1; 522 struct dmar_ctx *ctx, *ctx1; 523 struct iommu_unit *unit __diagused; 524 dmar_ctx_entry_t *ctxp; 525 struct sf_buf *sf; 526 int bus, slot, func, error; 527 bool enable; 528 529 if (dev != NULL) { 530 bus = pci_get_bus(dev); 531 slot = pci_get_slot(dev); 532 func = pci_get_function(dev); 533 } else { 534 bus = PCI_RID2BUS(rid); 535 slot = PCI_RID2SLOT(rid); 536 func = PCI_RID2FUNC(rid); 537 } 538 enable = false; 539 TD_PREP_PINNED_ASSERT; 540 unit = DMAR2IOMMU(dmar); 541 DMAR_LOCK(dmar); 542 KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0), 543 ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus, 544 slot, func)); 545 ctx = dmar_find_ctx_locked(dmar, rid); 546 error = 0; 547 if (ctx == NULL) { 548 /* 549 * Perform the allocations which require sleep or have 550 * higher chance to succeed if the sleep is allowed. 551 */ 552 DMAR_UNLOCK(dmar); 553 dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); 554 domain1 = dmar_domain_alloc(dmar, id_mapped); 555 if (domain1 == NULL) { 556 TD_PINNED_ASSERT; 557 return (NULL); 558 } 559 if (!id_mapped) { 560 error = domain_init_rmrr(domain1, dev, bus, 561 slot, func, dev_domain, dev_busno, dev_path, 562 dev_path_len); 563 if (error == 0 && dev != NULL) 564 error = dmar_reserve_pci_regions(domain1, dev); 565 if (error != 0) { 566 dmar_domain_destroy(domain1); 567 TD_PINNED_ASSERT; 568 return (NULL); 569 } 570 } 571 ctx1 = dmar_ctx_alloc(domain1, rid); 572 ctxp = dmar_map_ctx_entry(ctx1, &sf); 573 DMAR_LOCK(dmar); 574 575 /* 576 * Recheck the contexts, other thread might have 577 * already allocated needed one. 578 */ 579 ctx = dmar_find_ctx_locked(dmar, rid); 580 if (ctx == NULL) { 581 domain = domain1; 582 ctx = ctx1; 583 dmar_ctx_link(ctx); 584 ctx->context.tag->owner = dev; 585 device_tag_init(ctx, dev); 586 587 /* 588 * This is the first activated context for the 589 * DMAR unit. Enable the translation after 590 * everything is set up. 591 */ 592 if (LIST_EMPTY(&dmar->domains)) 593 enable = true; 594 LIST_INSERT_HEAD(&dmar->domains, domain, link); 595 ctx_id_entry_init(ctx, ctxp, false, bus); 596 if (dev != NULL) { 597 device_printf(dev, 598 "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d " 599 "agaw %d %s-mapped\n", 600 dmar->iommu.unit, dmar->segment, bus, slot, 601 func, rid, domain->domain, domain->mgaw, 602 domain->agaw, id_mapped ? "id" : "re"); 603 } 604 dmar_unmap_pgtbl(sf); 605 } else { 606 dmar_unmap_pgtbl(sf); 607 dmar_domain_destroy(domain1); 608 /* Nothing needs to be done to destroy ctx1. */ 609 free(ctx1, M_DMAR_CTX); 610 domain = CTX2DOM(ctx); 611 ctx->refs++; /* tag referenced us */ 612 } 613 } else { 614 domain = CTX2DOM(ctx); 615 if (ctx->context.tag->owner == NULL) 616 ctx->context.tag->owner = dev; 617 ctx->refs++; /* tag referenced us */ 618 } 619 620 error = dmar_flush_for_ctx_entry(dmar, enable); 621 if (error != 0) { 622 dmar_free_ctx_locked(dmar, ctx); 623 TD_PINNED_ASSERT; 624 return (NULL); 625 } 626 627 /* 628 * The dmar lock was potentially dropped between check for the 629 * empty context list and now. Recheck the state of GCMD_TE 630 * to avoid unneeded command. 631 */ 632 if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) { 633 error = dmar_disable_protected_regions(dmar); 634 if (error != 0) 635 printf("dmar%d: Failed to disable protected regions\n", 636 dmar->iommu.unit); 637 error = dmar_enable_translation(dmar); 638 if (error == 0) { 639 if (bootverbose) { 640 printf("dmar%d: enabled translation\n", 641 dmar->iommu.unit); 642 } 643 } else { 644 printf("dmar%d: enabling translation failed, " 645 "error %d\n", dmar->iommu.unit, error); 646 dmar_free_ctx_locked(dmar, ctx); 647 TD_PINNED_ASSERT; 648 return (NULL); 649 } 650 } 651 DMAR_UNLOCK(dmar); 652 TD_PINNED_ASSERT; 653 return (ctx); 654 } 655 656 struct dmar_ctx * 657 dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, 658 bool id_mapped, bool rmrr_init) 659 { 660 int dev_domain, dev_path_len, dev_busno; 661 662 dev_domain = pci_get_domain(dev); 663 dev_path_len = dmar_dev_depth(dev); 664 ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; 665 dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); 666 return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno, 667 dev_path, dev_path_len, id_mapped, rmrr_init)); 668 } 669 670 struct dmar_ctx * 671 dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid, 672 int dev_domain, int dev_busno, 673 const void *dev_path, int dev_path_len, 674 bool id_mapped, bool rmrr_init) 675 { 676 677 return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno, 678 dev_path, dev_path_len, id_mapped, rmrr_init)); 679 } 680 681 int 682 dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) 683 { 684 struct dmar_unit *dmar; 685 struct dmar_domain *old_domain; 686 dmar_ctx_entry_t *ctxp; 687 struct sf_buf *sf; 688 int error; 689 690 dmar = domain->dmar; 691 old_domain = CTX2DOM(ctx); 692 if (domain == old_domain) 693 return (0); 694 KASSERT(old_domain->iodom.iommu == domain->iodom.iommu, 695 ("domain %p %u moving between dmars %u %u", domain, 696 domain->domain, old_domain->iodom.iommu->unit, 697 domain->iodom.iommu->unit)); 698 TD_PREP_PINNED_ASSERT; 699 700 ctxp = dmar_map_ctx_entry(ctx, &sf); 701 DMAR_LOCK(dmar); 702 dmar_ctx_unlink(ctx); 703 ctx->context.domain = &domain->iodom; 704 dmar_ctx_link(ctx); 705 ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100); 706 dmar_unmap_pgtbl(sf); 707 error = dmar_flush_for_ctx_entry(dmar, true); 708 /* If flush failed, rolling back would not work as well. */ 709 printf("dmar%d rid %x domain %d->%d %s-mapped\n", 710 dmar->iommu.unit, ctx->context.rid, old_domain->domain, 711 domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ? 712 "id" : "re"); 713 dmar_unref_domain_locked(dmar, old_domain); 714 TD_PINNED_ASSERT; 715 return (error); 716 } 717 718 static void 719 dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain) 720 { 721 722 DMAR_ASSERT_LOCKED(dmar); 723 KASSERT(domain->refs >= 1, 724 ("dmar %d domain %p refs %u", dmar->iommu.unit, domain, 725 domain->refs)); 726 KASSERT(domain->refs > domain->ctx_cnt, 727 ("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain, 728 domain->refs, domain->ctx_cnt)); 729 730 if (domain->refs > 1) { 731 domain->refs--; 732 DMAR_UNLOCK(dmar); 733 return; 734 } 735 736 KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0, 737 ("lost ref on RMRR domain %p", domain)); 738 739 LIST_REMOVE(domain, link); 740 DMAR_UNLOCK(dmar); 741 742 taskqueue_drain(dmar->iommu.delayed_taskqueue, 743 &domain->iodom.unload_task); 744 dmar_domain_destroy(domain); 745 } 746 747 void 748 dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) 749 { 750 struct sf_buf *sf; 751 dmar_ctx_entry_t *ctxp; 752 struct dmar_domain *domain; 753 754 DMAR_ASSERT_LOCKED(dmar); 755 KASSERT(ctx->refs >= 1, 756 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); 757 758 /* 759 * If our reference is not last, only the dereference should 760 * be performed. 761 */ 762 if (ctx->refs > 1) { 763 ctx->refs--; 764 DMAR_UNLOCK(dmar); 765 return; 766 } 767 768 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, 769 ("lost ref on disabled ctx %p", ctx)); 770 771 /* 772 * Otherwise, the context entry must be cleared before the 773 * page table is destroyed. The mapping of the context 774 * entries page could require sleep, unlock the dmar. 775 */ 776 DMAR_UNLOCK(dmar); 777 TD_PREP_PINNED_ASSERT; 778 ctxp = dmar_map_ctx_entry(ctx, &sf); 779 DMAR_LOCK(dmar); 780 KASSERT(ctx->refs >= 1, 781 ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); 782 783 /* 784 * Other thread might have referenced the context, in which 785 * case again only the dereference should be performed. 786 */ 787 if (ctx->refs > 1) { 788 ctx->refs--; 789 DMAR_UNLOCK(dmar); 790 dmar_unmap_pgtbl(sf); 791 TD_PINNED_ASSERT; 792 return; 793 } 794 795 KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, 796 ("lost ref on disabled ctx %p", ctx)); 797 798 /* 799 * Clear the context pointer and flush the caches. 800 * XXXKIB: cannot do this if any RMRR entries are still present. 801 */ 802 dmar_pte_clear(&ctxp->ctx1); 803 ctxp->ctx2 = 0; 804 dmar_flush_ctx_to_ram(dmar, ctxp); 805 dmar_inv_ctx_glob(dmar); 806 if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) { 807 if (dmar->qi_enabled) 808 dmar_qi_invalidate_iotlb_glob_locked(dmar); 809 else 810 dmar_inv_iotlb_glob(dmar); 811 } 812 dmar_unmap_pgtbl(sf); 813 domain = CTX2DOM(ctx); 814 dmar_ctx_unlink(ctx); 815 free(ctx->context.tag, M_DMAR_CTX); 816 free(ctx, M_DMAR_CTX); 817 dmar_unref_domain_locked(dmar, domain); 818 TD_PINNED_ASSERT; 819 } 820 821 void 822 dmar_free_ctx(struct dmar_ctx *ctx) 823 { 824 struct dmar_unit *dmar; 825 826 dmar = CTX2DMAR(ctx); 827 DMAR_LOCK(dmar); 828 dmar_free_ctx_locked(dmar, ctx); 829 } 830 831 /* 832 * Returns with the domain locked. 833 */ 834 struct dmar_ctx * 835 dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) 836 { 837 struct dmar_domain *domain; 838 struct dmar_ctx *ctx; 839 840 DMAR_ASSERT_LOCKED(dmar); 841 842 LIST_FOREACH(domain, &dmar->domains, link) { 843 LIST_FOREACH(ctx, &domain->contexts, link) { 844 if (ctx->context.rid == rid) 845 return (ctx); 846 } 847 } 848 return (NULL); 849 } 850 851 void 852 dmar_domain_free_entry(struct iommu_map_entry *entry, bool free) 853 { 854 if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0) 855 iommu_gas_free_region(entry); 856 else 857 iommu_gas_free_space(entry); 858 if (free) 859 iommu_gas_free_entry(entry); 860 else 861 entry->flags = 0; 862 } 863 864 /* 865 * If the given value for "free" is true, then the caller must not be using 866 * the entry's dmamap_link field. 867 */ 868 void 869 iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, 870 bool cansleep) 871 { 872 struct dmar_domain *domain; 873 struct dmar_unit *unit; 874 875 domain = IODOM2DOM(entry->domain); 876 unit = DOM2DMAR(domain); 877 878 /* 879 * If "free" is false, then the IOTLB invalidation must be performed 880 * synchronously. Otherwise, the caller might free the entry before 881 * dmar_qi_task() is finished processing it. 882 */ 883 if (unit->qi_enabled) { 884 if (free) { 885 DMAR_LOCK(unit); 886 dmar_qi_invalidate_locked(domain, entry, true); 887 DMAR_UNLOCK(unit); 888 } else { 889 dmar_qi_invalidate_sync(domain, entry->start, 890 entry->end - entry->start, cansleep); 891 dmar_domain_free_entry(entry, false); 892 } 893 } else { 894 domain_flush_iotlb_sync(domain, entry->start, entry->end - 895 entry->start); 896 dmar_domain_free_entry(entry, free); 897 } 898 } 899 900 static bool 901 dmar_domain_unload_emit_wait(struct dmar_domain *domain, 902 struct iommu_map_entry *entry) 903 { 904 905 if (TAILQ_NEXT(entry, dmamap_link) == NULL) 906 return (true); 907 return (domain->batch_no++ % dmar_batch_coalesce == 0); 908 } 909 910 void 911 iommu_domain_unload(struct iommu_domain *iodom, 912 struct iommu_map_entries_tailq *entries, bool cansleep) 913 { 914 struct dmar_domain *domain; 915 struct dmar_unit *unit; 916 struct iommu_map_entry *entry, *entry1; 917 int error __diagused; 918 919 domain = IODOM2DOM(iodom); 920 unit = DOM2DMAR(domain); 921 922 TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { 923 KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, 924 ("not mapped entry %p %p", domain, entry)); 925 error = iodom->ops->unmap(iodom, entry->start, entry->end - 926 entry->start, cansleep ? IOMMU_PGF_WAITOK : 0); 927 KASSERT(error == 0, ("unmap %p error %d", domain, error)); 928 if (!unit->qi_enabled) { 929 domain_flush_iotlb_sync(domain, entry->start, 930 entry->end - entry->start); 931 TAILQ_REMOVE(entries, entry, dmamap_link); 932 dmar_domain_free_entry(entry, true); 933 } 934 } 935 if (TAILQ_EMPTY(entries)) 936 return; 937 938 KASSERT(unit->qi_enabled, ("loaded entry left")); 939 DMAR_LOCK(unit); 940 while ((entry = TAILQ_FIRST(entries)) != NULL) { 941 TAILQ_REMOVE(entries, entry, dmamap_link); 942 dmar_qi_invalidate_locked(domain, entry, 943 dmar_domain_unload_emit_wait(domain, entry)); 944 } 945 DMAR_UNLOCK(unit); 946 } 947 948 struct iommu_ctx * 949 iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, 950 bool id_mapped, bool rmrr_init) 951 { 952 struct dmar_unit *dmar; 953 struct dmar_ctx *ret; 954 955 dmar = IOMMU2DMAR(iommu); 956 957 ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init); 958 959 return (CTX2IOCTX(ret)); 960 } 961 962 void 963 iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context) 964 { 965 struct dmar_unit *dmar; 966 struct dmar_ctx *ctx; 967 968 dmar = IOMMU2DMAR(iommu); 969 ctx = IOCTX2CTX(context); 970 971 dmar_free_ctx_locked(dmar, ctx); 972 } 973 974 void 975 iommu_free_ctx(struct iommu_ctx *context) 976 { 977 struct dmar_ctx *ctx; 978 979 ctx = IOCTX2CTX(context); 980 981 dmar_free_ctx(ctx); 982 } 983