1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 /* 31 * This file and its contents are supplied under the terms of the 32 * Common Development and Distribution License ("CDDL"), version 1.0. 33 * You may only use this file in accordance with the terms of version 34 * 1.0 of the CDDL. 35 * 36 * A full copy of the text of the CDDL should have accompanied this 37 * source. A copy of the CDDL is also available via the Internet at 38 * http://www.illumos.org/license/CDDL. 39 * 40 * Copyright 2018 Joyent, Inc. 41 * Copyright 2022 Oxide Computer Company 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/kernel.h> 49 #include <sys/systm.h> 50 #include <sys/kmem.h> 51 52 #include <dev/pci/pcireg.h> 53 54 #include <machine/vmparam.h> 55 #include <sys/vmm_vm.h> 56 57 #include <contrib/dev/acpica/include/acpi.h> 58 59 #include <sys/sunndi.h> 60 61 #include "io/iommu.h" 62 63 /* 64 * Documented in the "Intel Virtualization Technology for Directed I/O", 65 * Architecture Spec, September 2008. 66 */ 67 68 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1) 69 70 /* Section 10.4 "Register Descriptions" */ 71 struct vtdmap { 72 volatile uint32_t version; 73 volatile uint32_t res0; 74 volatile uint64_t cap; 75 volatile uint64_t ext_cap; 76 volatile uint32_t gcr; 77 volatile uint32_t gsr; 78 volatile uint64_t rta; 79 volatile uint64_t ccr; 80 }; 81 82 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) 83 #define VTD_CAP_ND(cap) ((cap) & 0x7) 84 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) 85 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) 86 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) 87 88 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) 89 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) 90 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) 91 92 #define VTD_GCR_WBF (1 << 27) 93 #define VTD_GCR_SRTP (1 << 30) 94 #define VTD_GCR_TE (1U << 31) 95 96 #define VTD_GSR_WBFS (1 << 27) 97 #define VTD_GSR_RTPS (1 << 30) 98 #define VTD_GSR_TES (1U << 31) 99 100 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ 101 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ 102 103 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ 104 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ 105 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ 106 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ 107 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ 108 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ 109 #define VTD_IIR_DOMAIN_P 32 110 111 #define VTD_ROOT_PRESENT 0x1 112 #define VTD_CTX_PRESENT 0x1 113 #define VTD_CTX_TT_ALL (1UL << 2) 114 115 #define VTD_PTE_RD (1UL << 0) 116 #define VTD_PTE_WR (1UL << 1) 117 #define VTD_PTE_SUPERPAGE (1UL << 7) 118 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) 119 120 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2) 121 122 struct domain { 123 uint64_t *ptp; /* first level page table page */ 124 int pt_levels; /* number of page table levels */ 125 int addrwidth; /* 'AW' field in context entry */ 126 int spsmask; /* supported super page sizes */ 127 uint_t id; /* domain id */ 128 vm_paddr_t maxaddr; /* highest address to be mapped */ 129 SLIST_ENTRY(domain) next; 130 }; 131 132 static SLIST_HEAD(, domain) domhead; 133 134 #define DRHD_MAX_UNITS 8 135 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; 136 static int drhd_num; 137 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; 138 static int max_domains; 139 typedef int (*drhd_ident_func_t)(void); 140 static dev_info_t *vtddips[DRHD_MAX_UNITS]; 141 142 static uint64_t root_table[PAGE_SIZE / sizeof (uint64_t)] __aligned(4096); 143 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof (uint64_t)] __aligned(4096); 144 145 static int 146 vtd_max_domains(struct vtdmap *vtdmap) 147 { 148 int nd; 149 150 nd = VTD_CAP_ND(vtdmap->cap); 151 152 switch (nd) { 153 case 0: 154 return (16); 155 case 1: 156 return (64); 157 case 2: 158 return (256); 159 case 3: 160 return (1024); 161 case 4: 162 return (4 * 1024); 163 case 5: 164 return (16 * 1024); 165 case 6: 166 return (64 * 1024); 167 default: 168 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); 169 } 170 } 171 172 static uint_t 173 domain_id(void) 174 { 175 uint_t id; 176 struct domain *dom; 177 178 /* Skip domain id 0 - it is reserved when Caching Mode field is set */ 179 for (id = 1; id < max_domains; id++) { 180 SLIST_FOREACH(dom, &domhead, next) { 181 if (dom->id == id) 182 break; 183 } 184 if (dom == NULL) 185 break; /* found it */ 186 } 187 188 if (id >= max_domains) 189 panic("domain ids exhausted"); 190 191 return (id); 192 } 193 194 static struct vtdmap * 195 vtd_device_scope(uint16_t rid) 196 { 197 int i, remaining, pathrem; 198 char *end, *pathend; 199 struct vtdmap *vtdmap; 200 ACPI_DMAR_HARDWARE_UNIT *drhd; 201 ACPI_DMAR_DEVICE_SCOPE *device_scope; 202 ACPI_DMAR_PCI_PATH *path; 203 204 for (i = 0; i < drhd_num; i++) { 205 drhd = drhds[i]; 206 207 if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) { 208 /* 209 * From Intel VT-d arch spec, version 3.0: 210 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is 211 * reported for a Segment, it must be enumerated by BIOS 212 * after all other DRHD structures for the same Segment. 213 */ 214 vtdmap = vtdmaps[i]; 215 return (vtdmap); 216 } 217 218 end = (char *)drhd + drhd->Header.Length; 219 remaining = drhd->Header.Length - 220 sizeof (ACPI_DMAR_HARDWARE_UNIT); 221 while (remaining > sizeof (ACPI_DMAR_DEVICE_SCOPE)) { 222 device_scope = 223 (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining); 224 remaining -= device_scope->Length; 225 226 switch (device_scope->EntryType) { 227 /* 0x01 and 0x02 are PCI device entries */ 228 case 0x01: 229 case 0x02: 230 break; 231 default: 232 continue; 233 } 234 235 if (PCI_RID2BUS(rid) != device_scope->Bus) 236 continue; 237 238 pathend = (char *)device_scope + device_scope->Length; 239 pathrem = device_scope->Length - 240 sizeof (ACPI_DMAR_DEVICE_SCOPE); 241 while (pathrem >= sizeof (ACPI_DMAR_PCI_PATH)) { 242 path = (ACPI_DMAR_PCI_PATH *) 243 (pathend - pathrem); 244 pathrem -= sizeof (ACPI_DMAR_PCI_PATH); 245 246 if (PCI_RID2SLOT(rid) != path->Device) 247 continue; 248 if (PCI_RID2FUNC(rid) != path->Function) 249 continue; 250 251 vtdmap = vtdmaps[i]; 252 return (vtdmap); 253 } 254 } 255 } 256 257 /* No matching scope */ 258 return (NULL); 259 } 260 261 static void 262 vtd_wbflush(struct vtdmap *vtdmap) 263 { 264 265 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) 266 invalidate_cache_all(); 267 268 if (VTD_CAP_RWBF(vtdmap->cap)) { 269 vtdmap->gcr = VTD_GCR_WBF; 270 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) 271 ; 272 } 273 } 274 275 static void 276 vtd_ctx_global_invalidate(struct vtdmap *vtdmap) 277 { 278 279 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; 280 while ((vtdmap->ccr & VTD_CCR_ICC) != 0) 281 ; 282 } 283 284 static void 285 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) 286 { 287 int offset; 288 volatile uint64_t *iotlb_reg, val; 289 290 vtd_wbflush(vtdmap); 291 292 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; 293 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); 294 295 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | 296 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; 297 298 while (1) { 299 val = *iotlb_reg; 300 if ((val & VTD_IIR_IVT) == 0) 301 break; 302 } 303 } 304 305 static void 306 vtd_translation_enable(struct vtdmap *vtdmap) 307 { 308 309 vtdmap->gcr = VTD_GCR_TE; 310 while ((vtdmap->gsr & VTD_GSR_TES) == 0) 311 ; 312 } 313 314 static void 315 vtd_translation_disable(struct vtdmap *vtdmap) 316 { 317 318 vtdmap->gcr = 0; 319 while ((vtdmap->gsr & VTD_GSR_TES) != 0) 320 ; 321 } 322 323 static void * 324 vtd_map(dev_info_t *dip) 325 { 326 caddr_t regs; 327 ddi_acc_handle_t hdl; 328 int error; 329 330 static ddi_device_acc_attr_t regs_attr = { 331 DDI_DEVICE_ATTR_V0, 332 DDI_NEVERSWAP_ACC, 333 DDI_STRICTORDER_ACC, 334 }; 335 336 error = ddi_regs_map_setup(dip, 0, ®s, 0, PAGE_SIZE, ®s_attr, 337 &hdl); 338 339 if (error != DDI_SUCCESS) 340 return (NULL); 341 342 ddi_set_driver_private(dip, hdl); 343 344 return (regs); 345 } 346 347 static void 348 vtd_unmap(dev_info_t *dip) 349 { 350 ddi_acc_handle_t hdl = ddi_get_driver_private(dip); 351 352 if (hdl != NULL) 353 ddi_regs_map_free(&hdl); 354 } 355 356 static dev_info_t * 357 vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *drhd, int unit) 358 { 359 dev_info_t *dip; 360 struct ddi_parent_private_data *pdptr; 361 struct regspec reg; 362 int circ; 363 364 /* 365 * Try to find an existing devinfo node for this vtd unit. 366 */ 367 ndi_devi_enter(ddi_root_node(), &circ); 368 dip = ddi_find_devinfo("vtd", unit, 0); 369 ndi_devi_exit(ddi_root_node(), circ); 370 371 if (dip != NULL) 372 return (dip); 373 374 /* 375 * None found, construct a devinfo node for this vtd unit. 376 */ 377 dip = ddi_add_child(ddi_root_node(), "vtd", 378 DEVI_SID_NODEID, unit); 379 380 reg.regspec_bustype = 0; 381 reg.regspec_addr = drhd->Address; 382 reg.regspec_size = PAGE_SIZE; 383 384 /* 385 * update the reg properties 386 * 387 * reg property will be used for register 388 * set access 389 * 390 * refer to the bus_map of root nexus driver 391 * I/O or memory mapping: 392 * 393 * <bustype=0, addr=x, len=x>: memory 394 * <bustype=1, addr=x, len=x>: i/o 395 * <bustype>1, addr=0, len=x>: x86-compatibility i/o 396 */ 397 (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, 398 dip, "reg", (int *)®, 399 sizeof (struct regspec) / sizeof (int)); 400 401 /* 402 * This is an artificially constructed dev_info, and we 403 * need to set a few more things to be able to use it 404 * for ddi_dma_alloc_handle/free_handle. 405 */ 406 ddi_set_driver(dip, ddi_get_driver(ddi_root_node())); 407 DEVI(dip)->devi_bus_dma_allochdl = 408 DEVI(ddi_get_driver((ddi_root_node()))); 409 410 pdptr = kmem_zalloc(sizeof (struct ddi_parent_private_data) 411 + sizeof (struct regspec), KM_SLEEP); 412 pdptr->par_nreg = 1; 413 pdptr->par_reg = (struct regspec *)(pdptr + 1); 414 pdptr->par_reg->regspec_bustype = 0; 415 pdptr->par_reg->regspec_addr = drhd->Address; 416 pdptr->par_reg->regspec_size = PAGE_SIZE; 417 ddi_set_parent_data(dip, pdptr); 418 419 return (dip); 420 } 421 422 static int 423 vtd_init(void) 424 { 425 int i, units, remaining, tmp; 426 struct vtdmap *vtdmap; 427 vm_paddr_t ctx_paddr; 428 char *end; 429 #ifdef __FreeBSD__ 430 char envname[32]; 431 unsigned long mapaddr; 432 #endif 433 ACPI_STATUS status; 434 ACPI_TABLE_DMAR *dmar; 435 ACPI_DMAR_HEADER *hdr; 436 ACPI_DMAR_HARDWARE_UNIT *drhd; 437 438 #ifdef __FreeBSD__ 439 /* 440 * Allow the user to override the ACPI DMAR table by specifying the 441 * physical address of each remapping unit. 442 * 443 * The following example specifies two remapping units at 444 * physical addresses 0xfed90000 and 0xfeda0000 respectively. 445 * set vtd.regmap.0.addr=0xfed90000 446 * set vtd.regmap.1.addr=0xfeda0000 447 */ 448 for (units = 0; units < DRHD_MAX_UNITS; units++) { 449 snprintf(envname, sizeof (envname), "vtd.regmap.%d.addr", 450 units); 451 if (getenv_ulong(envname, &mapaddr) == 0) 452 break; 453 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr); 454 } 455 456 if (units > 0) 457 goto skip_dmar; 458 #else 459 units = 0; 460 #endif 461 /* Search for DMAR table. */ 462 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); 463 if (ACPI_FAILURE(status)) 464 return (ENXIO); 465 466 end = (char *)dmar + dmar->Header.Length; 467 remaining = dmar->Header.Length - sizeof (ACPI_TABLE_DMAR); 468 while (remaining > sizeof (ACPI_DMAR_HEADER)) { 469 hdr = (ACPI_DMAR_HEADER *)(end - remaining); 470 if (hdr->Length > remaining) 471 break; 472 /* 473 * From Intel VT-d arch spec, version 1.3: 474 * BIOS implementations must report mapping structures 475 * in numerical order, i.e. All remapping structures of 476 * type 0 (DRHD) enumerated before remapping structures of 477 * type 1 (RMRR) and so forth. 478 */ 479 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) 480 break; 481 482 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; 483 drhds[units] = drhd; 484 #ifdef __FreeBSD__ 485 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); 486 #else 487 vtddips[units] = vtd_get_dip(drhd, units); 488 vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]); 489 if (vtdmaps[units] == NULL) 490 goto fail; 491 #endif 492 if (++units >= DRHD_MAX_UNITS) 493 break; 494 remaining -= hdr->Length; 495 } 496 497 if (units <= 0) 498 return (ENXIO); 499 500 #ifdef __FreeBSD__ 501 skip_dmar: 502 #endif 503 drhd_num = units; 504 505 max_domains = 64 * 1024; /* maximum valid value */ 506 for (i = 0; i < drhd_num; i++) { 507 vtdmap = vtdmaps[i]; 508 509 if (VTD_CAP_CM(vtdmap->cap) != 0) 510 panic("vtd_init: invalid caching mode"); 511 512 /* take most compatible (minimum) value */ 513 if ((tmp = vtd_max_domains(vtdmap)) < max_domains) 514 max_domains = tmp; 515 } 516 517 /* 518 * Set up the root-table to point to the context-entry tables 519 */ 520 for (i = 0; i < 256; i++) { 521 ctx_paddr = vtophys(ctx_tables[i]); 522 if (ctx_paddr & PAGE_MASK) 523 panic("ctx table (0x%0lx) not page aligned", ctx_paddr); 524 525 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; 526 } 527 528 return (0); 529 530 #ifndef __FreeBSD__ 531 fail: 532 for (i = 0; i <= units; i++) 533 vtd_unmap(vtddips[i]); 534 return (ENXIO); 535 #endif 536 } 537 538 static void 539 vtd_cleanup(void) 540 { 541 #ifndef __FreeBSD__ 542 int i; 543 544 KASSERT(SLIST_EMPTY(&domhead), ("domain list not empty")); 545 546 bzero(root_table, sizeof (root_table)); 547 548 for (i = 0; i <= drhd_num; i++) { 549 vtdmaps[i] = NULL; 550 /* 551 * Unmap the vtd registers. Note that the devinfo nodes 552 * themselves aren't removed, they are considered system state 553 * and can be reused when the module is reloaded. 554 */ 555 if (vtddips[i] != NULL) 556 vtd_unmap(vtddips[i]); 557 } 558 #endif 559 } 560 561 static void 562 vtd_enable(void) 563 { 564 int i; 565 struct vtdmap *vtdmap; 566 567 for (i = 0; i < drhd_num; i++) { 568 vtdmap = vtdmaps[i]; 569 vtd_wbflush(vtdmap); 570 571 /* Update the root table address */ 572 vtdmap->rta = vtophys(root_table); 573 vtdmap->gcr = VTD_GCR_SRTP; 574 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) 575 ; 576 577 vtd_ctx_global_invalidate(vtdmap); 578 vtd_iotlb_global_invalidate(vtdmap); 579 580 vtd_translation_enable(vtdmap); 581 } 582 } 583 584 static void 585 vtd_disable(void) 586 { 587 int i; 588 struct vtdmap *vtdmap; 589 590 for (i = 0; i < drhd_num; i++) { 591 vtdmap = vtdmaps[i]; 592 vtd_translation_disable(vtdmap); 593 } 594 } 595 596 static void 597 vtd_add_device(void *arg, uint16_t rid) 598 { 599 int idx; 600 uint64_t *ctxp; 601 struct domain *dom = arg; 602 vm_paddr_t pt_paddr; 603 struct vtdmap *vtdmap; 604 uint8_t bus; 605 606 bus = PCI_RID2BUS(rid); 607 ctxp = ctx_tables[bus]; 608 pt_paddr = vtophys(dom->ptp); 609 idx = VTD_RID2IDX(rid); 610 611 if (ctxp[idx] & VTD_CTX_PRESENT) { 612 panic("vtd_add_device: device %x is already owned by " 613 "domain %d", rid, (uint16_t)(ctxp[idx + 1] >> 8)); 614 } 615 616 if ((vtdmap = vtd_device_scope(rid)) == NULL) 617 panic("vtd_add_device: device %x is not in scope for " 618 "any DMA remapping unit", rid); 619 620 /* 621 * Order is important. The 'present' bit is set only after all fields 622 * of the context pointer are initialized. 623 */ 624 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); 625 626 if (VTD_ECAP_DI(vtdmap->ext_cap)) 627 ctxp[idx] = VTD_CTX_TT_ALL; 628 else 629 ctxp[idx] = 0; 630 631 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; 632 633 /* 634 * 'Not Present' entries are not cached in either the Context Cache 635 * or in the IOTLB, so there is no need to invalidate either of them. 636 */ 637 } 638 639 static void 640 vtd_remove_device(void *arg, uint16_t rid) 641 { 642 int i, idx; 643 uint64_t *ctxp; 644 struct vtdmap *vtdmap; 645 uint8_t bus; 646 647 bus = PCI_RID2BUS(rid); 648 ctxp = ctx_tables[bus]; 649 idx = VTD_RID2IDX(rid); 650 651 /* 652 * Order is important. The 'present' bit is must be cleared first. 653 */ 654 ctxp[idx] = 0; 655 ctxp[idx + 1] = 0; 656 657 /* 658 * Invalidate the Context Cache and the IOTLB. 659 * 660 * XXX use device-selective invalidation for Context Cache 661 * XXX use domain-selective invalidation for IOTLB 662 */ 663 for (i = 0; i < drhd_num; i++) { 664 vtdmap = vtdmaps[i]; 665 vtd_ctx_global_invalidate(vtdmap); 666 vtd_iotlb_global_invalidate(vtdmap); 667 } 668 } 669 670 #define CREATE_MAPPING 0 671 #define REMOVE_MAPPING 1 672 673 static uint64_t 674 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, 675 int remove) 676 { 677 struct domain *dom; 678 int i, spshift, ptpshift, ptpindex, nlevels; 679 uint64_t spsize, *ptp; 680 681 dom = arg; 682 ptpindex = 0; 683 ptpshift = 0; 684 685 KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__, 686 gpa, len)); 687 KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond " 688 "domain maxaddr %lx", __func__, gpa, len, dom->maxaddr)); 689 690 if (gpa & PAGE_MASK) 691 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); 692 693 if (hpa & PAGE_MASK) 694 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); 695 696 if (len & PAGE_MASK) 697 panic("vtd_create_mapping: unaligned len 0x%0lx", len); 698 699 /* 700 * Compute the size of the mapping that we can accommodate. 701 * 702 * This is based on three factors: 703 * - supported super page size 704 * - alignment of the region starting at 'gpa' and 'hpa' 705 * - length of the region 'len' 706 */ 707 spshift = 48; 708 for (i = 3; i >= 0; i--) { 709 spsize = 1UL << spshift; 710 if ((dom->spsmask & (1 << i)) != 0 && 711 (gpa & (spsize - 1)) == 0 && 712 (hpa & (spsize - 1)) == 0 && 713 (len >= spsize)) { 714 break; 715 } 716 spshift -= 9; 717 } 718 719 ptp = dom->ptp; 720 nlevels = dom->pt_levels; 721 while (--nlevels >= 0) { 722 ptpshift = 12 + nlevels * 9; 723 ptpindex = (gpa >> ptpshift) & 0x1FF; 724 725 /* We have reached the leaf mapping */ 726 if (spshift >= ptpshift) { 727 break; 728 } 729 730 /* 731 * We are working on a non-leaf page table page. 732 * 733 * Create a downstream page table page if necessary and point 734 * to it from the current page table. 735 */ 736 if (ptp[ptpindex] == 0) { 737 void *nlp = vmm_ptp_alloc(); 738 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; 739 } 740 741 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); 742 } 743 744 if ((gpa & ((1UL << ptpshift) - 1)) != 0) 745 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); 746 747 /* 748 * Update the 'gpa' -> 'hpa' mapping 749 */ 750 if (remove) { 751 ptp[ptpindex] = 0; 752 } else { 753 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; 754 755 if (nlevels > 0) 756 ptp[ptpindex] |= VTD_PTE_SUPERPAGE; 757 } 758 759 return (1UL << ptpshift); 760 } 761 762 static uint64_t 763 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) 764 { 765 766 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); 767 } 768 769 static uint64_t 770 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) 771 { 772 773 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); 774 } 775 776 static void 777 vtd_invalidate_tlb(void *dom) 778 { 779 int i; 780 struct vtdmap *vtdmap; 781 782 /* 783 * Invalidate the IOTLB. 784 * XXX use domain-selective invalidation for IOTLB 785 */ 786 for (i = 0; i < drhd_num; i++) { 787 vtdmap = vtdmaps[i]; 788 vtd_iotlb_global_invalidate(vtdmap); 789 } 790 } 791 792 static void * 793 vtd_create_domain(vm_paddr_t maxaddr) 794 { 795 struct domain *dom; 796 vm_paddr_t addr; 797 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; 798 struct vtdmap *vtdmap; 799 800 if (drhd_num <= 0) 801 panic("vtd_create_domain: no dma remapping hardware available"); 802 803 /* 804 * Calculate AGAW. 805 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. 806 */ 807 addr = 0; 808 for (gaw = 0; addr < maxaddr; gaw++) 809 addr = 1ULL << gaw; 810 811 res = (gaw - 12) % 9; 812 if (res == 0) 813 agaw = gaw; 814 else 815 agaw = gaw + 9 - res; 816 817 if (agaw > 64) 818 agaw = 64; 819 820 /* 821 * Select the smallest Supported AGAW and the corresponding number 822 * of page table levels. 823 */ 824 pt_levels = 2; 825 sagaw = 30; 826 addrwidth = 0; 827 828 tmp = ~0; 829 for (i = 0; i < drhd_num; i++) { 830 vtdmap = vtdmaps[i]; 831 /* take most compatible value */ 832 tmp &= VTD_CAP_SAGAW(vtdmap->cap); 833 } 834 835 for (i = 0; i < 5; i++) { 836 if ((tmp & (1 << i)) != 0 && sagaw >= agaw) 837 break; 838 pt_levels++; 839 addrwidth++; 840 sagaw += 9; 841 if (sagaw > 64) 842 sagaw = 64; 843 } 844 845 if (i >= 5) { 846 panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d", 847 tmp, agaw); 848 } 849 850 dom = kmem_zalloc(sizeof (struct domain), KM_SLEEP); 851 dom->pt_levels = pt_levels; 852 dom->addrwidth = addrwidth; 853 dom->id = domain_id(); 854 dom->maxaddr = maxaddr; 855 dom->ptp = vmm_ptp_alloc(); 856 if ((uintptr_t)dom->ptp & PAGE_MASK) 857 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); 858 859 #ifdef __FreeBSD__ 860 #ifdef notyet 861 /* 862 * XXX superpage mappings for the iommu do not work correctly. 863 * 864 * By default all physical memory is mapped into the host_domain. 865 * When a VM is allocated wired memory the pages belonging to it 866 * are removed from the host_domain and added to the vm's domain. 867 * 868 * If the page being removed was mapped using a superpage mapping 869 * in the host_domain then we need to demote the mapping before 870 * removing the page. 871 * 872 * There is not any code to deal with the demotion at the moment 873 * so we disable superpage mappings altogether. 874 */ 875 dom->spsmask = ~0; 876 for (i = 0; i < drhd_num; i++) { 877 vtdmap = vtdmaps[i]; 878 /* take most compatible value */ 879 dom->spsmask &= VTD_CAP_SPS(vtdmap->cap); 880 } 881 #endif 882 #else 883 /* 884 * On illumos we decidedly do not remove memory mapped to a VM's domain 885 * from the host_domain, so we don't have to deal with page demotion and 886 * can just use large pages. 887 * 888 * Since VM memory is currently allocated as 4k pages and mapped into 889 * the VM domain page by page, the use of large pages is essentially 890 * limited to the host_domain. 891 */ 892 dom->spsmask = VTD_CAP_SPS(vtdmap->cap); 893 #endif 894 895 SLIST_INSERT_HEAD(&domhead, dom, next); 896 897 return (dom); 898 } 899 900 static void 901 vtd_free_ptp(uint64_t *ptp, int level) 902 { 903 int i; 904 uint64_t *nlp; 905 906 if (level > 1) { 907 for (i = 0; i < 512; i++) { 908 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) 909 continue; 910 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) 911 continue; 912 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); 913 vtd_free_ptp(nlp, level - 1); 914 } 915 } 916 917 vmm_ptp_free(ptp); 918 } 919 920 static void 921 vtd_destroy_domain(void *arg) 922 { 923 struct domain *dom; 924 925 dom = arg; 926 927 SLIST_REMOVE(&domhead, dom, domain, next); 928 vtd_free_ptp(dom->ptp, dom->pt_levels); 929 kmem_free(dom, sizeof (*dom)); 930 } 931 932 const struct iommu_ops vmm_iommu_ops = { 933 .init = vtd_init, 934 .cleanup = vtd_cleanup, 935 .enable = vtd_enable, 936 .disable = vtd_disable, 937 .create_domain = vtd_create_domain, 938 .destroy_domain = vtd_destroy_domain, 939 .create_mapping = vtd_create_mapping, 940 .remove_mapping = vtd_remove_mapping, 941 .add_device = vtd_add_device, 942 .remove_device = vtd_remove_device, 943 .invalidate_tlb = vtd_invalidate_tlb, 944 }; 945 946 947 static struct modlmisc modlmisc = { 948 &mod_miscops, 949 "bhyve vmm vtd", 950 }; 951 952 static struct modlinkage modlinkage = { 953 MODREV_1, 954 &modlmisc, 955 NULL 956 }; 957 958 int 959 _init(void) 960 { 961 return (mod_install(&modlinkage)); 962 } 963 964 int 965 _fini(void) 966 { 967 return (mod_remove(&modlinkage)); 968 } 969 970 int 971 _info(struct modinfo *modinfop) 972 { 973 return (mod_info(&modlinkage, modinfop)); 974 } 975