1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/systm.h> 33 #include <sys/malloc.h> 34 35 #include <vm/vm.h> 36 #include <vm/pmap.h> 37 38 #include <dev/pci/pcireg.h> 39 40 #include <machine/vmparam.h> 41 #include <contrib/dev/acpica/include/acpi.h> 42 43 #include "io/iommu.h" 44 45 /* 46 * Documented in the "Intel Virtualization Technology for Directed I/O", 47 * Architecture Spec, September 2008. 48 */ 49 50 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1) 51 52 /* Section 10.4 "Register Descriptions" */ 53 struct vtdmap { 54 volatile uint32_t version; 55 volatile uint32_t res0; 56 volatile uint64_t cap; 57 volatile uint64_t ext_cap; 58 volatile uint32_t gcr; 59 volatile uint32_t gsr; 60 volatile uint64_t rta; 61 volatile uint64_t ccr; 62 }; 63 64 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) 65 #define VTD_CAP_ND(cap) ((cap) & 0x7) 66 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) 67 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) 68 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) 69 70 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) 71 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) 72 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) 73 74 #define VTD_GCR_WBF (1 << 27) 75 #define VTD_GCR_SRTP (1 << 30) 76 #define VTD_GCR_TE (1U << 31) 77 78 #define VTD_GSR_WBFS (1 << 27) 79 #define VTD_GSR_RTPS (1 << 30) 80 #define VTD_GSR_TES (1U << 31) 81 82 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ 83 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ 84 85 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ 86 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ 87 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ 88 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ 89 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ 90 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ 91 #define VTD_IIR_DOMAIN_P 32 92 93 #define VTD_ROOT_PRESENT 0x1 94 #define VTD_CTX_PRESENT 0x1 95 #define VTD_CTX_TT_ALL (1UL << 2) 96 97 #define VTD_PTE_RD (1UL << 0) 98 #define VTD_PTE_WR (1UL << 1) 99 #define VTD_PTE_SUPERPAGE (1UL << 7) 100 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) 101 102 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2) 103 104 struct domain { 105 uint64_t *ptp; /* first level page table page */ 106 int pt_levels; /* number of page table levels */ 107 int addrwidth; /* 'AW' field in context entry */ 108 int spsmask; /* supported super page sizes */ 109 u_int id; /* domain id */ 110 vm_paddr_t maxaddr; /* highest address to be mapped */ 111 SLIST_ENTRY(domain) next; 112 }; 113 114 static SLIST_HEAD(, domain) domhead; 115 116 #define DRHD_MAX_UNITS 16 117 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; 118 static int drhd_num; 119 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; 120 static int max_domains; 121 typedef int (*drhd_ident_func_t)(void); 122 123 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); 124 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); 125 126 static MALLOC_DEFINE(M_VTD, "vtd", "vtd"); 127 128 static int 129 vtd_max_domains(struct vtdmap *vtdmap) 130 { 131 int nd; 132 133 nd = VTD_CAP_ND(vtdmap->cap); 134 135 switch (nd) { 136 case 0: 137 return (16); 138 case 1: 139 return (64); 140 case 2: 141 return (256); 142 case 3: 143 return (1024); 144 case 4: 145 return (4 * 1024); 146 case 5: 147 return (16 * 1024); 148 case 6: 149 return (64 * 1024); 150 default: 151 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); 152 } 153 } 154 155 static u_int 156 domain_id(void) 157 { 158 u_int id; 159 struct domain *dom; 160 161 /* Skip domain id 0 - it is reserved when Caching Mode field is set */ 162 for (id = 1; id < max_domains; id++) { 163 SLIST_FOREACH(dom, &domhead, next) { 164 if (dom->id == id) 165 break; 166 } 167 if (dom == NULL) 168 break; /* found it */ 169 } 170 171 if (id >= max_domains) 172 panic("domain ids exhausted"); 173 174 return (id); 175 } 176 177 static struct vtdmap * 178 vtd_device_scope(uint16_t rid) 179 { 180 int i, remaining, pathremaining; 181 char *end, *pathend; 182 struct vtdmap *vtdmap; 183 ACPI_DMAR_HARDWARE_UNIT *drhd; 184 ACPI_DMAR_DEVICE_SCOPE *device_scope; 185 ACPI_DMAR_PCI_PATH *path; 186 187 for (i = 0; i < drhd_num; i++) { 188 drhd = drhds[i]; 189 190 if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) { 191 /* 192 * From Intel VT-d arch spec, version 3.0: 193 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported 194 * for a Segment, it must be enumerated by BIOS after all other 195 * DRHD structures for the same Segment. 196 */ 197 vtdmap = vtdmaps[i]; 198 return(vtdmap); 199 } 200 201 end = (char *)drhd + drhd->Header.Length; 202 remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT); 203 while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) { 204 device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining); 205 remaining -= device_scope->Length; 206 207 switch (device_scope->EntryType){ 208 /* 0x01 and 0x02 are PCI device entries */ 209 case 0x01: 210 case 0x02: 211 break; 212 default: 213 continue; 214 } 215 216 if (PCI_RID2BUS(rid) != device_scope->Bus) 217 continue; 218 219 pathend = (char *)device_scope + device_scope->Length; 220 pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE); 221 while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) { 222 path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining); 223 pathremaining -= sizeof(ACPI_DMAR_PCI_PATH); 224 225 if (PCI_RID2SLOT(rid) != path->Device) 226 continue; 227 if (PCI_RID2FUNC(rid) != path->Function) 228 continue; 229 230 vtdmap = vtdmaps[i]; 231 return (vtdmap); 232 } 233 } 234 } 235 236 /* No matching scope */ 237 return (NULL); 238 } 239 240 static void 241 vtd_wbflush(struct vtdmap *vtdmap) 242 { 243 244 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) 245 pmap_invalidate_cache(); 246 247 if (VTD_CAP_RWBF(vtdmap->cap)) { 248 vtdmap->gcr = VTD_GCR_WBF; 249 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) 250 ; 251 } 252 } 253 254 static void 255 vtd_ctx_global_invalidate(struct vtdmap *vtdmap) 256 { 257 258 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; 259 while ((vtdmap->ccr & VTD_CCR_ICC) != 0) 260 ; 261 } 262 263 static void 264 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) 265 { 266 int offset; 267 volatile uint64_t *iotlb_reg, val; 268 269 vtd_wbflush(vtdmap); 270 271 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; 272 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); 273 274 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | 275 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; 276 277 while (1) { 278 val = *iotlb_reg; 279 if ((val & VTD_IIR_IVT) == 0) 280 break; 281 } 282 } 283 284 static void 285 vtd_translation_enable(struct vtdmap *vtdmap) 286 { 287 288 vtdmap->gcr = VTD_GCR_TE; 289 while ((vtdmap->gsr & VTD_GSR_TES) == 0) 290 ; 291 } 292 293 static void 294 vtd_translation_disable(struct vtdmap *vtdmap) 295 { 296 297 vtdmap->gcr = 0; 298 while ((vtdmap->gsr & VTD_GSR_TES) != 0) 299 ; 300 } 301 302 static int 303 vtd_init(void) 304 { 305 int i, units, remaining, tmp; 306 struct vtdmap *vtdmap; 307 vm_paddr_t ctx_paddr; 308 char *end, envname[32]; 309 unsigned long mapaddr; 310 ACPI_STATUS status; 311 ACPI_TABLE_DMAR *dmar; 312 ACPI_DMAR_HEADER *hdr; 313 ACPI_DMAR_HARDWARE_UNIT *drhd; 314 315 /* 316 * Allow the user to override the ACPI DMAR table by specifying the 317 * physical address of each remapping unit. 318 * 319 * The following example specifies two remapping units at 320 * physical addresses 0xfed90000 and 0xfeda0000 respectively. 321 * set vtd.regmap.0.addr=0xfed90000 322 * set vtd.regmap.1.addr=0xfeda0000 323 */ 324 for (units = 0; units < DRHD_MAX_UNITS; units++) { 325 snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units); 326 if (getenv_ulong(envname, &mapaddr) == 0) 327 break; 328 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr); 329 } 330 331 if (units > 0) 332 goto skip_dmar; 333 334 /* Search for DMAR table. */ 335 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); 336 if (ACPI_FAILURE(status)) 337 return (ENXIO); 338 339 end = (char *)dmar + dmar->Header.Length; 340 remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR); 341 while (remaining > sizeof(ACPI_DMAR_HEADER)) { 342 hdr = (ACPI_DMAR_HEADER *)(end - remaining); 343 if (hdr->Length > remaining) 344 break; 345 /* 346 * From Intel VT-d arch spec, version 1.3: 347 * BIOS implementations must report mapping structures 348 * in numerical order, i.e. All remapping structures of 349 * type 0 (DRHD) enumerated before remapping structures of 350 * type 1 (RMRR) and so forth. 351 */ 352 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) 353 break; 354 355 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; 356 drhds[units] = drhd; 357 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); 358 if (++units >= DRHD_MAX_UNITS) 359 break; 360 remaining -= hdr->Length; 361 } 362 363 if (units <= 0) 364 return (ENXIO); 365 366 skip_dmar: 367 drhd_num = units; 368 369 max_domains = 64 * 1024; /* maximum valid value */ 370 for (i = 0; i < drhd_num; i++){ 371 vtdmap = vtdmaps[i]; 372 373 if (VTD_CAP_CM(vtdmap->cap) != 0) 374 panic("vtd_init: invalid caching mode"); 375 376 /* take most compatible (minimum) value */ 377 if ((tmp = vtd_max_domains(vtdmap)) < max_domains) 378 max_domains = tmp; 379 } 380 381 /* 382 * Set up the root-table to point to the context-entry tables 383 */ 384 for (i = 0; i < 256; i++) { 385 ctx_paddr = vtophys(ctx_tables[i]); 386 if (ctx_paddr & PAGE_MASK) 387 panic("ctx table (0x%0lx) not page aligned", ctx_paddr); 388 389 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; 390 } 391 392 return (0); 393 } 394 395 static void 396 vtd_cleanup(void) 397 { 398 } 399 400 static void 401 vtd_enable(void) 402 { 403 int i; 404 struct vtdmap *vtdmap; 405 406 for (i = 0; i < drhd_num; i++) { 407 vtdmap = vtdmaps[i]; 408 vtd_wbflush(vtdmap); 409 410 /* Update the root table address */ 411 vtdmap->rta = vtophys(root_table); 412 vtdmap->gcr = VTD_GCR_SRTP; 413 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) 414 ; 415 416 vtd_ctx_global_invalidate(vtdmap); 417 vtd_iotlb_global_invalidate(vtdmap); 418 419 vtd_translation_enable(vtdmap); 420 } 421 } 422 423 static void 424 vtd_disable(void) 425 { 426 int i; 427 struct vtdmap *vtdmap; 428 429 for (i = 0; i < drhd_num; i++) { 430 vtdmap = vtdmaps[i]; 431 vtd_translation_disable(vtdmap); 432 } 433 } 434 435 static void 436 vtd_add_device(void *arg, uint16_t rid) 437 { 438 int idx; 439 uint64_t *ctxp; 440 struct domain *dom = arg; 441 vm_paddr_t pt_paddr; 442 struct vtdmap *vtdmap; 443 uint8_t bus; 444 445 KASSERT(dom != NULL, ("domain is NULL")); 446 447 bus = PCI_RID2BUS(rid); 448 ctxp = ctx_tables[bus]; 449 pt_paddr = vtophys(dom->ptp); 450 idx = VTD_RID2IDX(rid); 451 452 if (ctxp[idx] & VTD_CTX_PRESENT) { 453 panic("vtd_add_device: device %x is already owned by " 454 "domain %d", rid, 455 (uint16_t)(ctxp[idx + 1] >> 8)); 456 } 457 458 if ((vtdmap = vtd_device_scope(rid)) == NULL) 459 panic("vtd_add_device: device %x is not in scope for " 460 "any DMA remapping unit", rid); 461 462 /* 463 * Order is important. The 'present' bit is set only after all fields 464 * of the context pointer are initialized. 465 */ 466 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); 467 468 if (VTD_ECAP_DI(vtdmap->ext_cap)) 469 ctxp[idx] = VTD_CTX_TT_ALL; 470 else 471 ctxp[idx] = 0; 472 473 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; 474 475 /* 476 * 'Not Present' entries are not cached in either the Context Cache 477 * or in the IOTLB, so there is no need to invalidate either of them. 478 */ 479 } 480 481 static void 482 vtd_remove_device(void *arg, uint16_t rid) 483 { 484 int i, idx; 485 uint64_t *ctxp; 486 struct vtdmap *vtdmap; 487 uint8_t bus; 488 489 bus = PCI_RID2BUS(rid); 490 ctxp = ctx_tables[bus]; 491 idx = VTD_RID2IDX(rid); 492 493 /* 494 * Order is important. The 'present' bit is must be cleared first. 495 */ 496 ctxp[idx] = 0; 497 ctxp[idx + 1] = 0; 498 499 /* 500 * Invalidate the Context Cache and the IOTLB. 501 * 502 * XXX use device-selective invalidation for Context Cache 503 * XXX use domain-selective invalidation for IOTLB 504 */ 505 for (i = 0; i < drhd_num; i++) { 506 vtdmap = vtdmaps[i]; 507 vtd_ctx_global_invalidate(vtdmap); 508 vtd_iotlb_global_invalidate(vtdmap); 509 } 510 } 511 512 #define CREATE_MAPPING 0 513 #define REMOVE_MAPPING 1 514 515 static uint64_t 516 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, 517 int remove) 518 { 519 struct domain *dom; 520 int i, spshift, ptpshift, ptpindex, nlevels; 521 uint64_t spsize, *ptp; 522 523 dom = arg; 524 ptpindex = 0; 525 ptpshift = 0; 526 527 KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__, 528 gpa, len)); 529 KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond " 530 "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr)); 531 532 if (gpa & PAGE_MASK) 533 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); 534 535 if (hpa & PAGE_MASK) 536 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); 537 538 if (len & PAGE_MASK) 539 panic("vtd_create_mapping: unaligned len 0x%0lx", len); 540 541 /* 542 * Compute the size of the mapping that we can accommodate. 543 * 544 * This is based on three factors: 545 * - supported super page size 546 * - alignment of the region starting at 'gpa' and 'hpa' 547 * - length of the region 'len' 548 */ 549 spshift = 48; 550 for (i = 3; i >= 0; i--) { 551 spsize = 1UL << spshift; 552 if ((dom->spsmask & (1 << i)) != 0 && 553 (gpa & (spsize - 1)) == 0 && 554 (hpa & (spsize - 1)) == 0 && 555 (len >= spsize)) { 556 break; 557 } 558 spshift -= 9; 559 } 560 561 ptp = dom->ptp; 562 nlevels = dom->pt_levels; 563 while (--nlevels >= 0) { 564 ptpshift = 12 + nlevels * 9; 565 ptpindex = (gpa >> ptpshift) & 0x1FF; 566 567 /* We have reached the leaf mapping */ 568 if (spshift >= ptpshift) { 569 break; 570 } 571 572 /* 573 * We are working on a non-leaf page table page. 574 * 575 * Create a downstream page table page if necessary and point 576 * to it from the current page table. 577 */ 578 if (ptp[ptpindex] == 0) { 579 void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO); 580 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; 581 } 582 583 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); 584 } 585 586 if ((gpa & ((1UL << ptpshift) - 1)) != 0) 587 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); 588 589 /* 590 * Update the 'gpa' -> 'hpa' mapping 591 */ 592 if (remove) { 593 ptp[ptpindex] = 0; 594 } else { 595 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; 596 597 if (nlevels > 0) 598 ptp[ptpindex] |= VTD_PTE_SUPERPAGE; 599 } 600 601 return (1UL << ptpshift); 602 } 603 604 static uint64_t 605 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) 606 { 607 608 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); 609 } 610 611 static uint64_t 612 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) 613 { 614 615 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); 616 } 617 618 static void 619 vtd_invalidate_tlb(void *dom) 620 { 621 int i; 622 struct vtdmap *vtdmap; 623 624 /* 625 * Invalidate the IOTLB. 626 * XXX use domain-selective invalidation for IOTLB 627 */ 628 for (i = 0; i < drhd_num; i++) { 629 vtdmap = vtdmaps[i]; 630 vtd_iotlb_global_invalidate(vtdmap); 631 } 632 } 633 634 static void * 635 vtd_create_domain(vm_paddr_t maxaddr) 636 { 637 struct domain *dom; 638 vm_paddr_t addr; 639 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; 640 struct vtdmap *vtdmap; 641 642 if (drhd_num <= 0) 643 panic("vtd_create_domain: no dma remapping hardware available"); 644 645 /* 646 * Calculate AGAW. 647 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. 648 */ 649 addr = 0; 650 for (gaw = 0; addr < maxaddr; gaw++) 651 addr = 1ULL << gaw; 652 653 res = (gaw - 12) % 9; 654 if (res == 0) 655 agaw = gaw; 656 else 657 agaw = gaw + 9 - res; 658 659 if (agaw > 64) 660 agaw = 64; 661 662 /* 663 * Select the smallest Supported AGAW and the corresponding number 664 * of page table levels. 665 */ 666 pt_levels = 2; 667 sagaw = 30; 668 addrwidth = 0; 669 670 tmp = ~0; 671 for (i = 0; i < drhd_num; i++) { 672 vtdmap = vtdmaps[i]; 673 /* take most compatible value */ 674 tmp &= VTD_CAP_SAGAW(vtdmap->cap); 675 } 676 677 for (i = 0; i < 5; i++) { 678 if ((tmp & (1 << i)) != 0 && sagaw >= agaw) 679 break; 680 pt_levels++; 681 addrwidth++; 682 sagaw += 9; 683 if (sagaw > 64) 684 sagaw = 64; 685 } 686 687 if (i >= 5) { 688 panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d", 689 tmp, agaw); 690 } 691 692 dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK); 693 dom->pt_levels = pt_levels; 694 dom->addrwidth = addrwidth; 695 dom->id = domain_id(); 696 dom->maxaddr = maxaddr; 697 dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK); 698 if ((uintptr_t)dom->ptp & PAGE_MASK) 699 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); 700 701 #ifdef notyet 702 /* 703 * XXX superpage mappings for the iommu do not work correctly. 704 * 705 * By default all physical memory is mapped into the host_domain. 706 * When a VM is allocated wired memory the pages belonging to it 707 * are removed from the host_domain and added to the vm's domain. 708 * 709 * If the page being removed was mapped using a superpage mapping 710 * in the host_domain then we need to demote the mapping before 711 * removing the page. 712 * 713 * There is not any code to deal with the demotion at the moment 714 * so we disable superpage mappings altogether. 715 */ 716 dom->spsmask = ~0; 717 for (i = 0; i < drhd_num; i++) { 718 vtdmap = vtdmaps[i]; 719 /* take most compatible value */ 720 dom->spsmask &= VTD_CAP_SPS(vtdmap->cap); 721 } 722 #endif 723 724 SLIST_INSERT_HEAD(&domhead, dom, next); 725 726 return (dom); 727 } 728 729 static void 730 vtd_free_ptp(uint64_t *ptp, int level) 731 { 732 int i; 733 uint64_t *nlp; 734 735 if (level > 1) { 736 for (i = 0; i < 512; i++) { 737 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) 738 continue; 739 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) 740 continue; 741 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); 742 vtd_free_ptp(nlp, level - 1); 743 } 744 } 745 746 bzero(ptp, PAGE_SIZE); 747 free(ptp, M_VTD); 748 } 749 750 static void 751 vtd_destroy_domain(void *arg) 752 { 753 struct domain *dom; 754 755 dom = arg; 756 757 SLIST_REMOVE(&domhead, dom, domain, next); 758 vtd_free_ptp(dom->ptp, dom->pt_levels); 759 free(dom, M_VTD); 760 } 761 762 const struct iommu_ops iommu_ops_intel = { 763 .init = vtd_init, 764 .cleanup = vtd_cleanup, 765 .enable = vtd_enable, 766 .disable = vtd_disable, 767 .create_domain = vtd_create_domain, 768 .destroy_domain = vtd_destroy_domain, 769 .create_mapping = vtd_create_mapping, 770 .remove_mapping = vtd_remove_mapping, 771 .add_device = vtd_add_device, 772 .remove_device = vtd_remove_device, 773 .invalidate_tlb = vtd_invalidate_tlb, 774 }; 775