1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <dev/pci/pcireg.h> 41 42 #include <machine/vmparam.h> 43 #include <contrib/dev/acpica/include/acpi.h> 44 45 #include "io/iommu.h" 46 47 /* 48 * Documented in the "Intel Virtualization Technology for Directed I/O", 49 * Architecture Spec, September 2008. 50 */ 51 52 /* Section 10.4 "Register Descriptions" */ 53 struct vtdmap { 54 volatile uint32_t version; 55 volatile uint32_t res0; 56 volatile uint64_t cap; 57 volatile uint64_t ext_cap; 58 volatile uint32_t gcr; 59 volatile uint32_t gsr; 60 volatile uint64_t rta; 61 volatile uint64_t ccr; 62 }; 63 64 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) 65 #define VTD_CAP_ND(cap) ((cap) & 0x7) 66 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) 67 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) 68 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) 69 70 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) 71 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) 72 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) 73 74 #define VTD_GCR_WBF (1 << 27) 75 #define VTD_GCR_SRTP (1 << 30) 76 #define VTD_GCR_TE (1U << 31) 77 78 #define VTD_GSR_WBFS (1 << 27) 79 #define VTD_GSR_RTPS (1 << 30) 80 #define VTD_GSR_TES (1U << 31) 81 82 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ 83 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ 84 85 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ 86 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ 87 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ 88 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ 89 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ 90 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ 91 #define VTD_IIR_DOMAIN_P 32 92 93 #define VTD_ROOT_PRESENT 0x1 94 #define VTD_CTX_PRESENT 0x1 95 #define VTD_CTX_TT_ALL (1UL << 2) 96 97 #define VTD_PTE_RD (1UL << 0) 98 #define VTD_PTE_WR (1UL << 1) 99 #define VTD_PTE_SUPERPAGE (1UL << 7) 100 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) 101 102 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2) 103 104 struct domain { 105 uint64_t *ptp; /* first level page table page */ 106 int pt_levels; /* number of page table levels */ 107 int addrwidth; /* 'AW' field in context entry */ 108 int spsmask; /* supported super page sizes */ 109 u_int id; /* domain id */ 110 vm_paddr_t maxaddr; /* highest address to be mapped */ 111 SLIST_ENTRY(domain) next; 112 }; 113 114 static SLIST_HEAD(, domain) domhead; 115 116 #define DRHD_MAX_UNITS 8 117 static int drhd_num; 118 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; 119 static int max_domains; 120 typedef int (*drhd_ident_func_t)(void); 121 122 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); 123 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); 124 125 static MALLOC_DEFINE(M_VTD, "vtd", "vtd"); 126 127 static int 128 vtd_max_domains(struct vtdmap *vtdmap) 129 { 130 int nd; 131 132 nd = VTD_CAP_ND(vtdmap->cap); 133 134 switch (nd) { 135 case 0: 136 return (16); 137 case 1: 138 return (64); 139 case 2: 140 return (256); 141 case 3: 142 return (1024); 143 case 4: 144 return (4 * 1024); 145 case 5: 146 return (16 * 1024); 147 case 6: 148 return (64 * 1024); 149 default: 150 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); 151 } 152 } 153 154 static u_int 155 domain_id(void) 156 { 157 u_int id; 158 struct domain *dom; 159 160 /* Skip domain id 0 - it is reserved when Caching Mode field is set */ 161 for (id = 1; id < max_domains; id++) { 162 SLIST_FOREACH(dom, &domhead, next) { 163 if (dom->id == id) 164 break; 165 } 166 if (dom == NULL) 167 break; /* found it */ 168 } 169 170 if (id >= max_domains) 171 panic("domain ids exhausted"); 172 173 return (id); 174 } 175 176 static void 177 vtd_wbflush(struct vtdmap *vtdmap) 178 { 179 180 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) 181 pmap_invalidate_cache(); 182 183 if (VTD_CAP_RWBF(vtdmap->cap)) { 184 vtdmap->gcr = VTD_GCR_WBF; 185 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) 186 ; 187 } 188 } 189 190 static void 191 vtd_ctx_global_invalidate(struct vtdmap *vtdmap) 192 { 193 194 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; 195 while ((vtdmap->ccr & VTD_CCR_ICC) != 0) 196 ; 197 } 198 199 static void 200 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) 201 { 202 int offset; 203 volatile uint64_t *iotlb_reg, val; 204 205 vtd_wbflush(vtdmap); 206 207 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; 208 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); 209 210 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | 211 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; 212 213 while (1) { 214 val = *iotlb_reg; 215 if ((val & VTD_IIR_IVT) == 0) 216 break; 217 } 218 } 219 220 static void 221 vtd_translation_enable(struct vtdmap *vtdmap) 222 { 223 224 vtdmap->gcr = VTD_GCR_TE; 225 while ((vtdmap->gsr & VTD_GSR_TES) == 0) 226 ; 227 } 228 229 static void 230 vtd_translation_disable(struct vtdmap *vtdmap) 231 { 232 233 vtdmap->gcr = 0; 234 while ((vtdmap->gsr & VTD_GSR_TES) != 0) 235 ; 236 } 237 238 static int 239 vtd_init(void) 240 { 241 int i, units, remaining; 242 struct vtdmap *vtdmap; 243 vm_paddr_t ctx_paddr; 244 char *end, envname[32]; 245 unsigned long mapaddr; 246 ACPI_STATUS status; 247 ACPI_TABLE_DMAR *dmar; 248 ACPI_DMAR_HEADER *hdr; 249 ACPI_DMAR_HARDWARE_UNIT *drhd; 250 251 /* 252 * Allow the user to override the ACPI DMAR table by specifying the 253 * physical address of each remapping unit. 254 * 255 * The following example specifies two remapping units at 256 * physical addresses 0xfed90000 and 0xfeda0000 respectively. 257 * set vtd.regmap.0.addr=0xfed90000 258 * set vtd.regmap.1.addr=0xfeda0000 259 */ 260 for (units = 0; units < DRHD_MAX_UNITS; units++) { 261 snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units); 262 if (getenv_ulong(envname, &mapaddr) == 0) 263 break; 264 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr); 265 } 266 267 if (units > 0) 268 goto skip_dmar; 269 270 /* Search for DMAR table. */ 271 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); 272 if (ACPI_FAILURE(status)) 273 return (ENXIO); 274 275 end = (char *)dmar + dmar->Header.Length; 276 remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR); 277 while (remaining > sizeof(ACPI_DMAR_HEADER)) { 278 hdr = (ACPI_DMAR_HEADER *)(end - remaining); 279 if (hdr->Length > remaining) 280 break; 281 /* 282 * From Intel VT-d arch spec, version 1.3: 283 * BIOS implementations must report mapping structures 284 * in numerical order, i.e. All remapping structures of 285 * type 0 (DRHD) enumerated before remapping structures of 286 * type 1 (RMRR) and so forth. 287 */ 288 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) 289 break; 290 291 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; 292 vtdmaps[units++] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); 293 if (units >= DRHD_MAX_UNITS) 294 break; 295 remaining -= hdr->Length; 296 } 297 298 if (units <= 0) 299 return (ENXIO); 300 301 skip_dmar: 302 drhd_num = units; 303 vtdmap = vtdmaps[0]; 304 305 if (VTD_CAP_CM(vtdmap->cap) != 0) 306 panic("vtd_init: invalid caching mode"); 307 308 max_domains = vtd_max_domains(vtdmap); 309 310 /* 311 * Set up the root-table to point to the context-entry tables 312 */ 313 for (i = 0; i < 256; i++) { 314 ctx_paddr = vtophys(ctx_tables[i]); 315 if (ctx_paddr & PAGE_MASK) 316 panic("ctx table (0x%0lx) not page aligned", ctx_paddr); 317 318 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; 319 } 320 321 return (0); 322 } 323 324 static void 325 vtd_cleanup(void) 326 { 327 } 328 329 static void 330 vtd_enable(void) 331 { 332 int i; 333 struct vtdmap *vtdmap; 334 335 for (i = 0; i < drhd_num; i++) { 336 vtdmap = vtdmaps[i]; 337 vtd_wbflush(vtdmap); 338 339 /* Update the root table address */ 340 vtdmap->rta = vtophys(root_table); 341 vtdmap->gcr = VTD_GCR_SRTP; 342 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) 343 ; 344 345 vtd_ctx_global_invalidate(vtdmap); 346 vtd_iotlb_global_invalidate(vtdmap); 347 348 vtd_translation_enable(vtdmap); 349 } 350 } 351 352 static void 353 vtd_disable(void) 354 { 355 int i; 356 struct vtdmap *vtdmap; 357 358 for (i = 0; i < drhd_num; i++) { 359 vtdmap = vtdmaps[i]; 360 vtd_translation_disable(vtdmap); 361 } 362 } 363 364 static void 365 vtd_add_device(void *arg, uint16_t rid) 366 { 367 int idx; 368 uint64_t *ctxp; 369 struct domain *dom = arg; 370 vm_paddr_t pt_paddr; 371 struct vtdmap *vtdmap; 372 uint8_t bus; 373 374 vtdmap = vtdmaps[0]; 375 bus = PCI_RID2BUS(rid); 376 ctxp = ctx_tables[bus]; 377 pt_paddr = vtophys(dom->ptp); 378 idx = VTD_RID2IDX(rid); 379 380 if (ctxp[idx] & VTD_CTX_PRESENT) { 381 panic("vtd_add_device: device %x is already owned by " 382 "domain %d", rid, 383 (uint16_t)(ctxp[idx + 1] >> 8)); 384 } 385 386 /* 387 * Order is important. The 'present' bit is set only after all fields 388 * of the context pointer are initialized. 389 */ 390 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); 391 392 if (VTD_ECAP_DI(vtdmap->ext_cap)) 393 ctxp[idx] = VTD_CTX_TT_ALL; 394 else 395 ctxp[idx] = 0; 396 397 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; 398 399 /* 400 * 'Not Present' entries are not cached in either the Context Cache 401 * or in the IOTLB, so there is no need to invalidate either of them. 402 */ 403 } 404 405 static void 406 vtd_remove_device(void *arg, uint16_t rid) 407 { 408 int i, idx; 409 uint64_t *ctxp; 410 struct vtdmap *vtdmap; 411 uint8_t bus; 412 413 bus = PCI_RID2BUS(rid); 414 ctxp = ctx_tables[bus]; 415 idx = VTD_RID2IDX(rid); 416 417 /* 418 * Order is important. The 'present' bit is must be cleared first. 419 */ 420 ctxp[idx] = 0; 421 ctxp[idx + 1] = 0; 422 423 /* 424 * Invalidate the Context Cache and the IOTLB. 425 * 426 * XXX use device-selective invalidation for Context Cache 427 * XXX use domain-selective invalidation for IOTLB 428 */ 429 for (i = 0; i < drhd_num; i++) { 430 vtdmap = vtdmaps[i]; 431 vtd_ctx_global_invalidate(vtdmap); 432 vtd_iotlb_global_invalidate(vtdmap); 433 } 434 } 435 436 #define CREATE_MAPPING 0 437 #define REMOVE_MAPPING 1 438 439 static uint64_t 440 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, 441 int remove) 442 { 443 struct domain *dom; 444 int i, spshift, ptpshift, ptpindex, nlevels; 445 uint64_t spsize, *ptp; 446 447 dom = arg; 448 ptpindex = 0; 449 ptpshift = 0; 450 451 if (gpa & PAGE_MASK) 452 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); 453 454 if (hpa & PAGE_MASK) 455 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); 456 457 if (len & PAGE_MASK) 458 panic("vtd_create_mapping: unaligned len 0x%0lx", len); 459 460 /* 461 * Compute the size of the mapping that we can accomodate. 462 * 463 * This is based on three factors: 464 * - supported super page size 465 * - alignment of the region starting at 'gpa' and 'hpa' 466 * - length of the region 'len' 467 */ 468 spshift = 48; 469 for (i = 3; i >= 0; i--) { 470 spsize = 1UL << spshift; 471 if ((dom->spsmask & (1 << i)) != 0 && 472 (gpa & (spsize - 1)) == 0 && 473 (hpa & (spsize - 1)) == 0 && 474 (len >= spsize)) { 475 break; 476 } 477 spshift -= 9; 478 } 479 480 ptp = dom->ptp; 481 nlevels = dom->pt_levels; 482 while (--nlevels >= 0) { 483 ptpshift = 12 + nlevels * 9; 484 ptpindex = (gpa >> ptpshift) & 0x1FF; 485 486 /* We have reached the leaf mapping */ 487 if (spshift >= ptpshift) { 488 break; 489 } 490 491 /* 492 * We are working on a non-leaf page table page. 493 * 494 * Create a downstream page table page if necessary and point 495 * to it from the current page table. 496 */ 497 if (ptp[ptpindex] == 0) { 498 void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO); 499 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; 500 } 501 502 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); 503 } 504 505 if ((gpa & ((1UL << ptpshift) - 1)) != 0) 506 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); 507 508 /* 509 * Update the 'gpa' -> 'hpa' mapping 510 */ 511 if (remove) { 512 ptp[ptpindex] = 0; 513 } else { 514 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; 515 516 if (nlevels > 0) 517 ptp[ptpindex] |= VTD_PTE_SUPERPAGE; 518 } 519 520 return (1UL << ptpshift); 521 } 522 523 static uint64_t 524 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) 525 { 526 527 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); 528 } 529 530 static uint64_t 531 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) 532 { 533 534 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); 535 } 536 537 static void 538 vtd_invalidate_tlb(void *dom) 539 { 540 int i; 541 struct vtdmap *vtdmap; 542 543 /* 544 * Invalidate the IOTLB. 545 * XXX use domain-selective invalidation for IOTLB 546 */ 547 for (i = 0; i < drhd_num; i++) { 548 vtdmap = vtdmaps[i]; 549 vtd_iotlb_global_invalidate(vtdmap); 550 } 551 } 552 553 static void * 554 vtd_create_domain(vm_paddr_t maxaddr) 555 { 556 struct domain *dom; 557 vm_paddr_t addr; 558 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; 559 struct vtdmap *vtdmap; 560 561 if (drhd_num <= 0) 562 panic("vtd_create_domain: no dma remapping hardware available"); 563 564 vtdmap = vtdmaps[0]; 565 566 /* 567 * Calculate AGAW. 568 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. 569 */ 570 addr = 0; 571 for (gaw = 0; addr < maxaddr; gaw++) 572 addr = 1ULL << gaw; 573 574 res = (gaw - 12) % 9; 575 if (res == 0) 576 agaw = gaw; 577 else 578 agaw = gaw + 9 - res; 579 580 if (agaw > 64) 581 agaw = 64; 582 583 /* 584 * Select the smallest Supported AGAW and the corresponding number 585 * of page table levels. 586 */ 587 pt_levels = 2; 588 sagaw = 30; 589 addrwidth = 0; 590 tmp = VTD_CAP_SAGAW(vtdmap->cap); 591 for (i = 0; i < 5; i++) { 592 if ((tmp & (1 << i)) != 0 && sagaw >= agaw) 593 break; 594 pt_levels++; 595 addrwidth++; 596 sagaw += 9; 597 if (sagaw > 64) 598 sagaw = 64; 599 } 600 601 if (i >= 5) { 602 panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d", 603 VTD_CAP_SAGAW(vtdmap->cap), agaw); 604 } 605 606 dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK); 607 dom->pt_levels = pt_levels; 608 dom->addrwidth = addrwidth; 609 dom->id = domain_id(); 610 dom->maxaddr = maxaddr; 611 dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK); 612 if ((uintptr_t)dom->ptp & PAGE_MASK) 613 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); 614 615 #ifdef notyet 616 /* 617 * XXX superpage mappings for the iommu do not work correctly. 618 * 619 * By default all physical memory is mapped into the host_domain. 620 * When a VM is allocated wired memory the pages belonging to it 621 * are removed from the host_domain and added to the vm's domain. 622 * 623 * If the page being removed was mapped using a superpage mapping 624 * in the host_domain then we need to demote the mapping before 625 * removing the page. 626 * 627 * There is not any code to deal with the demotion at the moment 628 * so we disable superpage mappings altogether. 629 */ 630 dom->spsmask = VTD_CAP_SPS(vtdmap->cap); 631 #endif 632 633 SLIST_INSERT_HEAD(&domhead, dom, next); 634 635 return (dom); 636 } 637 638 static void 639 vtd_free_ptp(uint64_t *ptp, int level) 640 { 641 int i; 642 uint64_t *nlp; 643 644 if (level > 1) { 645 for (i = 0; i < 512; i++) { 646 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) 647 continue; 648 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) 649 continue; 650 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); 651 vtd_free_ptp(nlp, level - 1); 652 } 653 } 654 655 bzero(ptp, PAGE_SIZE); 656 free(ptp, M_VTD); 657 } 658 659 static void 660 vtd_destroy_domain(void *arg) 661 { 662 struct domain *dom; 663 664 dom = arg; 665 666 SLIST_REMOVE(&domhead, dom, domain, next); 667 vtd_free_ptp(dom->ptp, dom->pt_levels); 668 free(dom, M_VTD); 669 } 670 671 struct iommu_ops iommu_ops_intel = { 672 vtd_init, 673 vtd_cleanup, 674 vtd_enable, 675 vtd_disable, 676 vtd_create_domain, 677 vtd_destroy_domain, 678 vtd_create_mapping, 679 vtd_remove_mapping, 680 vtd_add_device, 681 vtd_remove_device, 682 vtd_invalidate_tlb, 683 }; 684