1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 37 #include <vm/vm.h> 38 #include <vm/pmap.h> 39 40 #include <dev/pci/pcireg.h> 41 42 #include <machine/pmap.h> 43 #include <machine/vmparam.h> 44 #include <machine/pci_cfgreg.h> 45 46 #include "io/iommu.h" 47 48 /* 49 * Documented in the "Intel Virtualization Technology for Directed I/O", 50 * Architecture Spec, September 2008. 51 */ 52 53 /* Section 10.4 "Register Descriptions" */ 54 struct vtdmap { 55 volatile uint32_t version; 56 volatile uint32_t res0; 57 volatile uint64_t cap; 58 volatile uint64_t ext_cap; 59 volatile uint32_t gcr; 60 volatile uint32_t gsr; 61 volatile uint64_t rta; 62 volatile uint64_t ccr; 63 }; 64 65 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) 66 #define VTD_CAP_ND(cap) ((cap) & 0x7) 67 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) 68 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) 69 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) 70 71 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) 72 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) 73 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) 74 75 #define VTD_GCR_WBF (1 << 27) 76 #define VTD_GCR_SRTP (1 << 30) 77 #define VTD_GCR_TE (1 << 31) 78 79 #define VTD_GSR_WBFS (1 << 27) 80 #define VTD_GSR_RTPS (1 << 30) 81 #define VTD_GSR_TES (1 << 31) 82 83 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ 84 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ 85 86 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ 87 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ 88 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ 89 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ 90 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ 91 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ 92 #define VTD_IIR_DOMAIN_P 32 93 94 #define VTD_ROOT_PRESENT 0x1 95 #define VTD_CTX_PRESENT 0x1 96 #define VTD_CTX_TT_ALL (1UL << 2) 97 98 #define VTD_PTE_RD (1UL << 0) 99 #define VTD_PTE_WR (1UL << 1) 100 #define VTD_PTE_SUPERPAGE (1UL << 7) 101 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) 102 103 struct domain { 104 uint64_t *ptp; /* first level page table page */ 105 int pt_levels; /* number of page table levels */ 106 int addrwidth; /* 'AW' field in context entry */ 107 int spsmask; /* supported super page sizes */ 108 u_int id; /* domain id */ 109 vm_paddr_t maxaddr; /* highest address to be mapped */ 110 SLIST_ENTRY(domain) next; 111 }; 112 113 static SLIST_HEAD(, domain) domhead; 114 115 #define DRHD_MAX_UNITS 8 116 static int drhd_num; 117 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; 118 static int max_domains; 119 typedef int (*drhd_ident_func_t)(void); 120 121 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); 122 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); 123 124 static MALLOC_DEFINE(M_VTD, "vtd", "vtd"); 125 126 /* 127 * Config space register definitions from the "Intel 5520 and 5500" datasheet. 128 */ 129 static int 130 tylersburg_vtd_ident(void) 131 { 132 int units, nlbus; 133 uint16_t did, vid; 134 uint32_t miscsts, vtbar; 135 136 const int bus = 0; 137 const int slot = 20; 138 const int func = 0; 139 140 units = 0; 141 142 vid = pci_cfgregread(bus, slot, func, PCIR_VENDOR, 2); 143 did = pci_cfgregread(bus, slot, func, PCIR_DEVICE, 2); 144 if (vid != 0x8086 || did != 0x342E) 145 goto done; 146 147 /* 148 * Check if this is a dual IOH configuration. 149 */ 150 miscsts = pci_cfgregread(bus, slot, func, 0x9C, 4); 151 if (miscsts & (1 << 25)) 152 nlbus = pci_cfgregread(bus, slot, func, 0x160, 1); 153 else 154 nlbus = -1; 155 156 vtbar = pci_cfgregread(bus, slot, func, 0x180, 4); 157 if (vtbar & 0x1) { 158 vtdmaps[units++] = (struct vtdmap *) 159 PHYS_TO_DMAP(vtbar & 0xffffe000); 160 } else if (bootverbose) 161 printf("VT-d unit in legacy IOH is disabled!\n"); 162 163 if (nlbus != -1) { 164 vtbar = pci_cfgregread(nlbus, slot, func, 0x180, 4); 165 if (vtbar & 0x1) { 166 vtdmaps[units++] = (struct vtdmap *) 167 PHYS_TO_DMAP(vtbar & 0xffffe000); 168 } else if (bootverbose) 169 printf("VT-d unit in non-legacy IOH is disabled!\n"); 170 } 171 done: 172 return (units); 173 } 174 175 static drhd_ident_func_t drhd_ident_funcs[] = { 176 tylersburg_vtd_ident, 177 NULL 178 }; 179 180 static int 181 vtd_max_domains(struct vtdmap *vtdmap) 182 { 183 int nd; 184 185 nd = VTD_CAP_ND(vtdmap->cap); 186 187 switch (nd) { 188 case 0: 189 return (16); 190 case 1: 191 return (64); 192 case 2: 193 return (256); 194 case 3: 195 return (1024); 196 case 4: 197 return (4 * 1024); 198 case 5: 199 return (16 * 1024); 200 case 6: 201 return (64 * 1024); 202 default: 203 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); 204 } 205 } 206 207 static u_int 208 domain_id(void) 209 { 210 u_int id; 211 struct domain *dom; 212 213 /* Skip domain id 0 - it is reserved when Caching Mode field is set */ 214 for (id = 1; id < max_domains; id++) { 215 SLIST_FOREACH(dom, &domhead, next) { 216 if (dom->id == id) 217 break; 218 } 219 if (dom == NULL) 220 break; /* found it */ 221 } 222 223 if (id >= max_domains) 224 panic("domain ids exhausted"); 225 226 return (id); 227 } 228 229 static void 230 vtd_wbflush(struct vtdmap *vtdmap) 231 { 232 233 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) 234 pmap_invalidate_cache(); 235 236 if (VTD_CAP_RWBF(vtdmap->cap)) { 237 vtdmap->gcr = VTD_GCR_WBF; 238 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) 239 ; 240 } 241 } 242 243 static void 244 vtd_ctx_global_invalidate(struct vtdmap *vtdmap) 245 { 246 247 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; 248 while ((vtdmap->ccr & VTD_CCR_ICC) != 0) 249 ; 250 } 251 252 static void 253 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) 254 { 255 int offset; 256 volatile uint64_t *iotlb_reg, val; 257 258 vtd_wbflush(vtdmap); 259 260 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; 261 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); 262 263 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | 264 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; 265 266 while (1) { 267 val = *iotlb_reg; 268 if ((val & VTD_IIR_IVT) == 0) 269 break; 270 } 271 } 272 273 static void 274 vtd_translation_enable(struct vtdmap *vtdmap) 275 { 276 277 vtdmap->gcr = VTD_GCR_TE; 278 while ((vtdmap->gsr & VTD_GSR_TES) == 0) 279 ; 280 } 281 282 static void 283 vtd_translation_disable(struct vtdmap *vtdmap) 284 { 285 286 vtdmap->gcr = 0; 287 while ((vtdmap->gsr & VTD_GSR_TES) != 0) 288 ; 289 } 290 291 static int 292 vtd_init(void) 293 { 294 int i, units; 295 struct vtdmap *vtdmap; 296 vm_paddr_t ctx_paddr; 297 298 for (i = 0; drhd_ident_funcs[i] != NULL; i++) { 299 units = (*drhd_ident_funcs[i])(); 300 if (units > 0) 301 break; 302 } 303 304 if (units <= 0) 305 return (ENXIO); 306 307 drhd_num = units; 308 vtdmap = vtdmaps[0]; 309 310 if (VTD_CAP_CM(vtdmap->cap) != 0) 311 panic("vtd_init: invalid caching mode"); 312 313 max_domains = vtd_max_domains(vtdmap); 314 315 /* 316 * Set up the root-table to point to the context-entry tables 317 */ 318 for (i = 0; i < 256; i++) { 319 ctx_paddr = vtophys(ctx_tables[i]); 320 if (ctx_paddr & PAGE_MASK) 321 panic("ctx table (0x%0lx) not page aligned", ctx_paddr); 322 323 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; 324 } 325 326 return (0); 327 } 328 329 static void 330 vtd_cleanup(void) 331 { 332 } 333 334 static void 335 vtd_enable(void) 336 { 337 int i; 338 struct vtdmap *vtdmap; 339 340 for (i = 0; i < drhd_num; i++) { 341 vtdmap = vtdmaps[i]; 342 vtd_wbflush(vtdmap); 343 344 /* Update the root table address */ 345 vtdmap->rta = vtophys(root_table); 346 vtdmap->gcr = VTD_GCR_SRTP; 347 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) 348 ; 349 350 vtd_ctx_global_invalidate(vtdmap); 351 vtd_iotlb_global_invalidate(vtdmap); 352 353 vtd_translation_enable(vtdmap); 354 } 355 } 356 357 static void 358 vtd_disable(void) 359 { 360 int i; 361 struct vtdmap *vtdmap; 362 363 for (i = 0; i < drhd_num; i++) { 364 vtdmap = vtdmaps[i]; 365 vtd_translation_disable(vtdmap); 366 } 367 } 368 369 static void 370 vtd_add_device(void *arg, int bus, int slot, int func) 371 { 372 int idx; 373 uint64_t *ctxp; 374 struct domain *dom = arg; 375 vm_paddr_t pt_paddr; 376 struct vtdmap *vtdmap; 377 378 if (bus < 0 || bus > PCI_BUSMAX || 379 slot < 0 || slot > PCI_SLOTMAX || 380 func < 0 || func > PCI_FUNCMAX) 381 panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func); 382 383 vtdmap = vtdmaps[0]; 384 ctxp = ctx_tables[bus]; 385 pt_paddr = vtophys(dom->ptp); 386 idx = (slot << 3 | func) * 2; 387 388 if (ctxp[idx] & VTD_CTX_PRESENT) { 389 panic("vtd_add_device: device %d/%d/%d is already owned by " 390 "domain %d", bus, slot, func, 391 (uint16_t)(ctxp[idx + 1] >> 8)); 392 } 393 394 /* 395 * Order is important. The 'present' bit is set only after all fields 396 * of the context pointer are initialized. 397 */ 398 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); 399 400 if (VTD_ECAP_DI(vtdmap->ext_cap)) 401 ctxp[idx] = VTD_CTX_TT_ALL; 402 else 403 ctxp[idx] = 0; 404 405 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; 406 407 /* 408 * 'Not Present' entries are not cached in either the Context Cache 409 * or in the IOTLB, so there is no need to invalidate either of them. 410 */ 411 } 412 413 static void 414 vtd_remove_device(void *arg, int bus, int slot, int func) 415 { 416 int i, idx; 417 uint64_t *ctxp; 418 struct vtdmap *vtdmap; 419 420 if (bus < 0 || bus > PCI_BUSMAX || 421 slot < 0 || slot > PCI_SLOTMAX || 422 func < 0 || func > PCI_FUNCMAX) 423 panic("vtd_add_device: invalid bsf %d/%d/%d", bus, slot, func); 424 425 ctxp = ctx_tables[bus]; 426 idx = (slot << 3 | func) * 2; 427 428 /* 429 * Order is important. The 'present' bit is must be cleared first. 430 */ 431 ctxp[idx] = 0; 432 ctxp[idx + 1] = 0; 433 434 /* 435 * Invalidate the Context Cache and the IOTLB. 436 * 437 * XXX use device-selective invalidation for Context Cache 438 * XXX use domain-selective invalidation for IOTLB 439 */ 440 for (i = 0; i < drhd_num; i++) { 441 vtdmap = vtdmaps[i]; 442 vtd_ctx_global_invalidate(vtdmap); 443 vtd_iotlb_global_invalidate(vtdmap); 444 } 445 } 446 447 #define CREATE_MAPPING 0 448 #define REMOVE_MAPPING 1 449 450 static uint64_t 451 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, 452 int remove) 453 { 454 struct domain *dom; 455 int i, spshift, ptpshift, ptpindex, nlevels; 456 uint64_t spsize, *ptp; 457 458 dom = arg; 459 ptpindex = 0; 460 ptpshift = 0; 461 462 if (gpa & PAGE_MASK) 463 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); 464 465 if (hpa & PAGE_MASK) 466 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); 467 468 if (len & PAGE_MASK) 469 panic("vtd_create_mapping: unaligned len 0x%0lx", len); 470 471 /* 472 * Compute the size of the mapping that we can accomodate. 473 * 474 * This is based on three factors: 475 * - supported super page size 476 * - alignment of the region starting at 'gpa' and 'hpa' 477 * - length of the region 'len' 478 */ 479 spshift = 48; 480 for (i = 3; i >= 0; i--) { 481 spsize = 1UL << spshift; 482 if ((dom->spsmask & (1 << i)) != 0 && 483 (gpa & (spsize - 1)) == 0 && 484 (hpa & (spsize - 1)) == 0 && 485 (len >= spsize)) { 486 break; 487 } 488 spshift -= 9; 489 } 490 491 ptp = dom->ptp; 492 nlevels = dom->pt_levels; 493 while (--nlevels >= 0) { 494 ptpshift = 12 + nlevels * 9; 495 ptpindex = (gpa >> ptpshift) & 0x1FF; 496 497 /* We have reached the leaf mapping */ 498 if (spshift >= ptpshift) { 499 break; 500 } 501 502 /* 503 * We are working on a non-leaf page table page. 504 * 505 * Create a downstream page table page if necessary and point 506 * to it from the current page table. 507 */ 508 if (ptp[ptpindex] == 0) { 509 void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO); 510 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; 511 } 512 513 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); 514 } 515 516 if ((gpa & ((1UL << ptpshift) - 1)) != 0) 517 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); 518 519 /* 520 * Update the 'gpa' -> 'hpa' mapping 521 */ 522 if (remove) { 523 ptp[ptpindex] = 0; 524 } else { 525 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; 526 527 if (nlevels > 0) 528 ptp[ptpindex] |= VTD_PTE_SUPERPAGE; 529 } 530 531 return (1UL << ptpshift); 532 } 533 534 static uint64_t 535 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) 536 { 537 538 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); 539 } 540 541 static uint64_t 542 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) 543 { 544 545 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); 546 } 547 548 static void 549 vtd_invalidate_tlb(void *dom) 550 { 551 int i; 552 struct vtdmap *vtdmap; 553 554 /* 555 * Invalidate the IOTLB. 556 * XXX use domain-selective invalidation for IOTLB 557 */ 558 for (i = 0; i < drhd_num; i++) { 559 vtdmap = vtdmaps[i]; 560 vtd_iotlb_global_invalidate(vtdmap); 561 } 562 } 563 564 static void * 565 vtd_create_domain(vm_paddr_t maxaddr) 566 { 567 struct domain *dom; 568 vm_paddr_t addr; 569 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; 570 struct vtdmap *vtdmap; 571 572 if (drhd_num <= 0) 573 panic("vtd_create_domain: no dma remapping hardware available"); 574 575 vtdmap = vtdmaps[0]; 576 577 /* 578 * Calculate AGAW. 579 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. 580 */ 581 addr = 0; 582 for (gaw = 0; addr < maxaddr; gaw++) 583 addr = 1ULL << gaw; 584 585 res = (gaw - 12) % 9; 586 if (res == 0) 587 agaw = gaw; 588 else 589 agaw = gaw + 9 - res; 590 591 if (agaw > 64) 592 agaw = 64; 593 594 /* 595 * Select the smallest Supported AGAW and the corresponding number 596 * of page table levels. 597 */ 598 pt_levels = 2; 599 sagaw = 30; 600 addrwidth = 0; 601 tmp = VTD_CAP_SAGAW(vtdmap->cap); 602 for (i = 0; i < 5; i++) { 603 if ((tmp & (1 << i)) != 0 && sagaw >= agaw) 604 break; 605 pt_levels++; 606 addrwidth++; 607 sagaw += 9; 608 if (sagaw > 64) 609 sagaw = 64; 610 } 611 612 if (i >= 5) { 613 panic("vtd_create_domain: SAGAW 0x%lx does not support AGAW %d", 614 VTD_CAP_SAGAW(vtdmap->cap), agaw); 615 } 616 617 dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK); 618 dom->pt_levels = pt_levels; 619 dom->addrwidth = addrwidth; 620 dom->spsmask = VTD_CAP_SPS(vtdmap->cap); 621 dom->id = domain_id(); 622 dom->maxaddr = maxaddr; 623 dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK); 624 if ((uintptr_t)dom->ptp & PAGE_MASK) 625 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); 626 627 SLIST_INSERT_HEAD(&domhead, dom, next); 628 629 return (dom); 630 } 631 632 static void 633 vtd_free_ptp(uint64_t *ptp, int level) 634 { 635 int i; 636 uint64_t *nlp; 637 638 if (level > 1) { 639 for (i = 0; i < 512; i++) { 640 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) 641 continue; 642 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) 643 continue; 644 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); 645 vtd_free_ptp(nlp, level - 1); 646 } 647 } 648 649 bzero(ptp, PAGE_SIZE); 650 free(ptp, M_VTD); 651 } 652 653 static void 654 vtd_destroy_domain(void *arg) 655 { 656 struct domain *dom; 657 658 dom = arg; 659 660 SLIST_REMOVE(&domhead, dom, domain, next); 661 vtd_free_ptp(dom->ptp, dom->pt_levels); 662 free(dom, M_VTD); 663 } 664 665 struct iommu_ops iommu_ops_intel = { 666 vtd_init, 667 vtd_cleanup, 668 vtd_enable, 669 vtd_disable, 670 vtd_create_domain, 671 vtd_destroy_domain, 672 vtd_create_mapping, 673 vtd_remove_mapping, 674 vtd_add_device, 675 vtd_remove_device, 676 vtd_invalidate_tlb, 677 }; 678