1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Portions Copyright (c) 2010, Oracle and/or its affiliates. 23 * All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2009, Intel Corporation. 27 * All rights reserved. 28 */ 29 30 /* 31 * Copyright 2023 Oxide Computer Company 32 */ 33 34 /* 35 * Intel IOMMU implementation 36 * This file contains Intel IOMMU code exported 37 * to the rest of the system and code that deals 38 * with the Intel IOMMU as a whole. 39 */ 40 41 #include <sys/conf.h> 42 #include <sys/modctl.h> 43 #include <sys/pci.h> 44 #include <sys/pci_impl.h> 45 #include <sys/sysmacros.h> 46 #include <sys/ddi.h> 47 #include <sys/ddidmareq.h> 48 #include <sys/ddi_impldefs.h> 49 #include <sys/ddifm.h> 50 #include <sys/sunndi.h> 51 #include <sys/debug.h> 52 #include <sys/fm/protocol.h> 53 #include <sys/note.h> 54 #include <sys/apic.h> 55 #include <vm/hat_i86.h> 56 #include <sys/smp_impldefs.h> 57 #include <sys/spl.h> 58 #include <sys/archsystm.h> 59 #include <sys/x86_archext.h> 60 #include <sys/avl.h> 61 #include <sys/bootconf.h> 62 #include <sys/bootinfo.h> 63 #include <sys/atomic.h> 64 #include <sys/immu.h> 65 /* ########################### Globals and tunables ######################## */ 66 /* 67 * Global switches (boolean) that can be toggled either via boot options 68 * or via /etc/system or kmdb 69 */ 70 71 /* Various features */ 72 boolean_t immu_enable = B_TRUE; 73 boolean_t immu_dvma_enable = B_TRUE; 74 75 /* accessed in other files so not static */ 76 boolean_t immu_gfxdvma_enable = B_TRUE; 77 boolean_t immu_intrmap_enable = B_FALSE; 78 boolean_t immu_qinv_enable = B_TRUE; 79 80 /* various quirks that need working around */ 81 82 /* XXX We always map page 0 read/write for now */ 83 boolean_t immu_quirk_usbpage0 = B_TRUE; 84 boolean_t immu_quirk_usbrmrr = B_TRUE; 85 boolean_t immu_quirk_usbfullpa; 86 boolean_t immu_quirk_mobile4; 87 88 /* debug messages */ 89 boolean_t immu_dmar_print; 90 91 /* Tunables */ 92 int64_t immu_flush_gran = 5; 93 94 immu_flags_t immu_global_dvma_flags; 95 96 /* ############ END OPTIONS section ################ */ 97 98 /* 99 * Global used internally by Intel IOMMU code 100 */ 101 dev_info_t *root_devinfo; 102 kmutex_t immu_lock; 103 list_t immu_list; 104 boolean_t immu_setup; 105 boolean_t immu_running; 106 boolean_t immu_quiesced; 107 108 /* ######################## END Globals and tunables ###################### */ 109 /* Globals used only in this file */ 110 static char **black_array; 111 static uint_t nblacks; 112 113 static char **unity_driver_array; 114 static uint_t nunity; 115 static char **xlate_driver_array; 116 static uint_t nxlate; 117 118 static char **premap_driver_array; 119 static uint_t npremap; 120 static char **nopremap_driver_array; 121 static uint_t nnopremap; 122 /* ###################### Utility routines ############################# */ 123 124 /* 125 * Check if the device has mobile 4 chipset 126 */ 127 static int 128 check_mobile4(dev_info_t *dip, void *arg) 129 { 130 _NOTE(ARGUNUSED(arg)); 131 int vendor, device; 132 int *ip = (int *)arg; 133 134 vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 135 "vendor-id", -1); 136 device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 137 "device-id", -1); 138 139 if (vendor == 0x8086 && device == 0x2a40) { 140 *ip = B_TRUE; 141 ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. " 142 "Force setting IOMMU write buffer"); 143 return (DDI_WALK_TERMINATE); 144 } else { 145 return (DDI_WALK_CONTINUE); 146 } 147 } 148 149 static void 150 map_bios_rsvd_mem(dev_info_t *dip) 151 { 152 struct memlist *mp; 153 154 /* 155 * Make sure the domain for the device is set up before 156 * mapping anything. 157 */ 158 (void) immu_dvma_device_setup(dip, 0); 159 160 memlist_read_lock(); 161 162 mp = bios_rsvd; 163 while (mp != NULL) { 164 memrng_t mrng = {0}; 165 166 ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range " 167 "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address, 168 mp->ml_address + mp->ml_size); 169 170 mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address); 171 mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE; 172 173 (void) immu_map_memrange(dip, &mrng); 174 175 mp = mp->ml_next; 176 } 177 178 memlist_read_unlock(); 179 } 180 181 182 /* 183 * Check if the driver requests a specific type of mapping. 184 */ 185 /*ARGSUSED*/ 186 static void 187 check_conf(dev_info_t *dip, void *arg) 188 { 189 immu_devi_t *immu_devi; 190 const char *dname; 191 uint_t i; 192 int hasmapprop = 0, haspreprop = 0; 193 boolean_t old_premap; 194 195 /* 196 * Only PCI devices can use an IOMMU. Legacy ISA devices 197 * are handled in check_lpc. 198 */ 199 if (!DEVI_IS_PCI(dip)) 200 return; 201 202 dname = ddi_driver_name(dip); 203 if (dname == NULL) 204 return; 205 immu_devi = immu_devi_get(dip); 206 207 for (i = 0; i < nunity; i++) { 208 if (strcmp(unity_driver_array[i], dname) == 0) { 209 hasmapprop = 1; 210 immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY; 211 } 212 } 213 214 for (i = 0; i < nxlate; i++) { 215 if (strcmp(xlate_driver_array[i], dname) == 0) { 216 hasmapprop = 1; 217 immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY; 218 } 219 } 220 221 old_premap = immu_devi->imd_use_premap; 222 223 for (i = 0; i < nnopremap; i++) { 224 if (strcmp(nopremap_driver_array[i], dname) == 0) { 225 haspreprop = 1; 226 immu_devi->imd_use_premap = B_FALSE; 227 } 228 } 229 230 for (i = 0; i < npremap; i++) { 231 if (strcmp(premap_driver_array[i], dname) == 0) { 232 haspreprop = 1; 233 immu_devi->imd_use_premap = B_TRUE; 234 } 235 } 236 237 /* 238 * Report if we changed the value from the default. 239 */ 240 if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags)) 241 ddi_err(DER_LOG, dip, "using %s DVMA mapping", 242 immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ? 243 DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE); 244 245 if (haspreprop && (immu_devi->imd_use_premap != old_premap)) 246 ddi_err(DER_LOG, dip, "%susing premapped DVMA space", 247 immu_devi->imd_use_premap ? "" : "not "); 248 } 249 250 /* 251 * Check if the device is USB controller 252 */ 253 /*ARGSUSED*/ 254 static void 255 check_usb(dev_info_t *dip, void *arg) 256 { 257 const char *drv = ddi_driver_name(dip); 258 immu_devi_t *immu_devi; 259 260 261 /* 262 * It's not clear if xHCI really needs these quirks; however, to be on 263 * the safe side until we know for certain we add it to the list below. 264 */ 265 if (drv == NULL || 266 (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 && 267 strcmp(drv, "ehci") != 0 && strcmp(drv, "xhci") != 0)) { 268 return; 269 } 270 271 immu_devi = immu_devi_get(dip); 272 273 /* 274 * If unit mappings are already specified, globally or 275 * locally, we're done here, since that covers both 276 * quirks below. 277 */ 278 if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY) 279 return; 280 281 /* This must come first since it does unity mapping */ 282 if (immu_quirk_usbfullpa == B_TRUE) { 283 immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY; 284 } else if (immu_quirk_usbrmrr == B_TRUE) { 285 ddi_err(DER_LOG, dip, "Applying USB RMRR quirk"); 286 map_bios_rsvd_mem(dip); 287 } 288 } 289 290 /* 291 * Check if the device is a LPC device 292 */ 293 /*ARGSUSED*/ 294 static void 295 check_lpc(dev_info_t *dip, void *arg) 296 { 297 immu_devi_t *immu_devi; 298 299 immu_devi = immu_devi_get(dip); 300 if (immu_devi->imd_lpc == B_TRUE) { 301 ddi_err(DER_LOG, dip, "iommu: Found LPC device"); 302 /* This will put the immu_devi on the LPC "specials" list */ 303 (void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP); 304 } 305 } 306 307 /* 308 * Check if the device is a GFX device 309 */ 310 /*ARGSUSED*/ 311 static void 312 check_gfx(dev_info_t *dip, void *arg) 313 { 314 immu_devi_t *immu_devi; 315 316 immu_devi = immu_devi_get(dip); 317 if (immu_devi->imd_display == B_TRUE) { 318 immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY; 319 ddi_err(DER_LOG, dip, "iommu: Found GFX device"); 320 /* This will put the immu_devi on the GFX "specials" list */ 321 (void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP); 322 } 323 } 324 325 static void 326 walk_tree(int (*f)(dev_info_t *, void *), void *arg) 327 { 328 ndi_devi_enter(root_devinfo); 329 ddi_walk_devs(ddi_get_child(root_devinfo), f, arg); 330 ndi_devi_exit(root_devinfo); 331 } 332 333 static int 334 check_pre_setup_quirks(dev_info_t *dip, void *arg) 335 { 336 /* just 1 check right now */ 337 return (check_mobile4(dip, arg)); 338 } 339 340 static int 341 check_pre_startup_quirks(dev_info_t *dip, void *arg) 342 { 343 if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) { 344 ddi_err(DER_PANIC, dip, "Failed to get immu_devi"); 345 } 346 347 check_gfx(dip, arg); 348 349 check_lpc(dip, arg); 350 351 check_conf(dip, arg); 352 353 check_usb(dip, arg); 354 355 return (DDI_WALK_CONTINUE); 356 } 357 358 static void 359 pre_setup_quirks(void) 360 { 361 walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4); 362 } 363 364 static void 365 pre_startup_quirks(void) 366 { 367 walk_tree(check_pre_startup_quirks, NULL); 368 369 immu_dmar_rmrr_map(); 370 } 371 372 static int 373 get_conf_str(char *bopt, char **val) 374 { 375 int ret; 376 377 /* 378 * Check the rootnex.conf property 379 * Fake up a dev_t since searching the global 380 * property list needs it 381 */ 382 ret = ddi_prop_lookup_string( 383 makedevice(ddi_name_to_major("rootnex"), 0), 384 root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, 385 bopt, val); 386 387 return (ret); 388 } 389 390 /* 391 * get_conf_opt() 392 * get a rootnex.conf setting (always a boolean) 393 */ 394 static void 395 get_conf_opt(char *bopt, boolean_t *kvar) 396 { 397 char *val = NULL; 398 399 /* 400 * Check the rootnex.conf property 401 * Fake up a dev_t since searching the global 402 * property list needs it 403 */ 404 405 if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS) 406 return; 407 408 if (strcmp(val, "true") == 0) { 409 *kvar = B_TRUE; 410 } else if (strcmp(val, "false") == 0) { 411 *kvar = B_FALSE; 412 } else { 413 ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ", 414 "is not set to true or false. Ignoring option.", 415 bopt, val); 416 } 417 ddi_prop_free(val); 418 } 419 420 /* 421 * get_bootopt() 422 * check a boot option (always a boolean) 423 */ 424 static int 425 get_boot_str(char *bopt, char **val) 426 { 427 int ret; 428 429 ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo, 430 DDI_PROP_DONTPASS, bopt, val); 431 432 return (ret); 433 } 434 435 static void 436 get_bootopt(char *bopt, boolean_t *kvar) 437 { 438 char *val = NULL; 439 440 /* 441 * All boot options set at the GRUB menu become 442 * properties on the rootnex. 443 */ 444 if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS) 445 return; 446 447 if (strcmp(val, "true") == 0) { 448 *kvar = B_TRUE; 449 } else if (strcmp(val, "false") == 0) { 450 *kvar = B_FALSE; 451 } else { 452 ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ", 453 "is not set to true or false. Ignoring option.", 454 bopt, val); 455 } 456 ddi_prop_free(val); 457 } 458 459 static void 460 get_boot_dvma_mode(void) 461 { 462 char *val = NULL; 463 464 if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val) 465 != DDI_PROP_SUCCESS) 466 return; 467 468 if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) { 469 immu_global_dvma_flags |= IMMU_FLAGS_UNITY; 470 } else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) { 471 immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY; 472 } else { 473 ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s", 474 val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP); 475 } 476 ddi_prop_free(val); 477 } 478 479 static void 480 get_conf_dvma_mode(void) 481 { 482 char *val = NULL; 483 484 if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val) 485 != DDI_PROP_SUCCESS) 486 return; 487 488 if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) { 489 immu_global_dvma_flags |= IMMU_FLAGS_UNITY; 490 } else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) { 491 immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY; 492 } else { 493 ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex " 494 "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP); 495 } 496 ddi_prop_free(val); 497 } 498 499 500 static void 501 get_conf_tunables(char *bopt, int64_t *ivar) 502 { 503 int64_t *iarray; 504 uint_t n; 505 506 /* 507 * Check the rootnex.conf property 508 * Fake up a dev_t since searching the global 509 * property list needs it 510 */ 511 if (ddi_prop_lookup_int64_array( 512 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, 513 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt, 514 &iarray, &n) != DDI_PROP_SUCCESS) { 515 return; 516 } 517 518 if (n != 1) { 519 ddi_err(DER_WARN, NULL, "More than one value specified for " 520 "%s property. Ignoring and using default", 521 "immu-flush-gran"); 522 ddi_prop_free(iarray); 523 return; 524 } 525 526 if (iarray[0] < 0) { 527 ddi_err(DER_WARN, NULL, "Negative value specified for " 528 "%s property. Inoring and Using default value", 529 "immu-flush-gran"); 530 ddi_prop_free(iarray); 531 return; 532 } 533 534 *ivar = iarray[0]; 535 536 ddi_prop_free(iarray); 537 } 538 539 static void 540 read_conf_options(void) 541 { 542 /* enable/disable options */ 543 get_conf_opt("immu-enable", &immu_enable); 544 get_conf_opt("immu-dvma-enable", &immu_dvma_enable); 545 get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable); 546 get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable); 547 get_conf_opt("immu-qinv-enable", &immu_qinv_enable); 548 549 /* workaround switches */ 550 get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0); 551 get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa); 552 get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr); 553 554 /* debug printing */ 555 get_conf_opt("immu-dmar-print", &immu_dmar_print); 556 557 /* get tunables */ 558 get_conf_tunables("immu-flush-gran", &immu_flush_gran); 559 560 get_conf_dvma_mode(); 561 } 562 563 static void 564 read_boot_options(void) 565 { 566 /* enable/disable options */ 567 get_bootopt("immu-enable", &immu_enable); 568 get_bootopt("immu-dvma-enable", &immu_dvma_enable); 569 get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable); 570 get_bootopt("immu-intrmap-enable", &immu_intrmap_enable); 571 get_bootopt("immu-qinv-enable", &immu_qinv_enable); 572 573 /* workaround switches */ 574 get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0); 575 get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa); 576 get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr); 577 578 /* debug printing */ 579 get_bootopt("immu-dmar-print", &immu_dmar_print); 580 581 get_boot_dvma_mode(); 582 } 583 584 static void 585 mapping_list_setup(void) 586 { 587 char **string_array; 588 uint_t nstrings; 589 590 if (ddi_prop_lookup_string_array( 591 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, 592 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, 593 "immu-dvma-unity-drivers", 594 &string_array, &nstrings) == DDI_PROP_SUCCESS) { 595 unity_driver_array = string_array; 596 nunity = nstrings; 597 } 598 599 if (ddi_prop_lookup_string_array( 600 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, 601 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, 602 "immu-dvma-xlate-drivers", 603 &string_array, &nstrings) == DDI_PROP_SUCCESS) { 604 xlate_driver_array = string_array; 605 nxlate = nstrings; 606 } 607 608 if (ddi_prop_lookup_string_array( 609 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, 610 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, 611 "immu-dvma-premap-drivers", 612 &string_array, &nstrings) == DDI_PROP_SUCCESS) { 613 premap_driver_array = string_array; 614 npremap = nstrings; 615 } 616 617 if (ddi_prop_lookup_string_array( 618 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, 619 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, 620 "immu-dvma-nopremap-drivers", 621 &string_array, &nstrings) == DDI_PROP_SUCCESS) { 622 nopremap_driver_array = string_array; 623 nnopremap = nstrings; 624 } 625 } 626 627 /* 628 * Note, this will not catch hardware not enumerated 629 * in early boot 630 */ 631 static boolean_t 632 blacklisted_driver(void) 633 { 634 char **strptr; 635 int i; 636 major_t maj; 637 638 /* need at least 2 strings */ 639 if (nblacks < 2) { 640 return (B_FALSE); 641 } 642 643 for (i = 0; nblacks - i > 1; i++) { 644 strptr = &black_array[i]; 645 if (strcmp(*strptr++, "DRIVER") == 0) { 646 if ((maj = ddi_name_to_major(*strptr++)) 647 != DDI_MAJOR_T_NONE) { 648 /* is there hardware bound to this drvr */ 649 if (devnamesp[maj].dn_head != NULL) { 650 return (B_TRUE); 651 } 652 } 653 i += 1; /* for loop adds 1, so add only 1 here */ 654 } 655 } 656 657 return (B_FALSE); 658 } 659 660 static boolean_t 661 blacklisted_smbios(void) 662 { 663 id_t smid; 664 smbios_hdl_t *smhdl; 665 smbios_info_t sminf; 666 smbios_system_t smsys; 667 char *mfg, *product, *version; 668 char **strptr; 669 int i; 670 671 /* need at least 4 strings for this setting */ 672 if (nblacks < 4) { 673 return (B_FALSE); 674 } 675 676 smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL); 677 if (smhdl == NULL || 678 (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR || 679 smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) { 680 return (B_FALSE); 681 } 682 683 mfg = (char *)sminf.smbi_manufacturer; 684 product = (char *)sminf.smbi_product; 685 version = (char *)sminf.smbi_version; 686 687 ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n"); 688 ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg); 689 ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product); 690 ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version); 691 692 for (i = 0; nblacks - i > 3; i++) { 693 strptr = &black_array[i]; 694 if (strcmp(*strptr++, "SMBIOS") == 0) { 695 if (strcmp(*strptr++, mfg) == 0 && 696 (*strptr[0] == '\0' || 697 strcmp(*strptr++, product) == 0) && 698 (*strptr[0] == '\0' || 699 strcmp(*strptr++, version) == 0)) { 700 return (B_TRUE); 701 } 702 i += 3; 703 } 704 } 705 706 return (B_FALSE); 707 } 708 709 static boolean_t 710 blacklisted_acpi(void) 711 { 712 if (nblacks == 0) { 713 return (B_FALSE); 714 } 715 716 return (immu_dmar_blacklisted(black_array, nblacks)); 717 } 718 719 /* 720 * Check if system is blacklisted by Intel IOMMU driver 721 * i.e. should Intel IOMMU be disabled on this system 722 * Currently a system can be blacklistd based on the 723 * following bases: 724 * 725 * 1. DMAR ACPI table information. 726 * This information includes things like 727 * manufacturer and revision number. If rootnex.conf 728 * has matching info set in its blacklist property 729 * then Intel IOMMu will be disabled 730 * 731 * 2. SMBIOS information 732 * 733 * 3. Driver installed - useful if a particular 734 * driver or hardware is toxic if Intel IOMMU 735 * is turned on. 736 */ 737 738 static void 739 blacklist_setup(void) 740 { 741 char **string_array; 742 uint_t nstrings; 743 744 /* 745 * Check the rootnex.conf blacklist property. 746 * Fake up a dev_t since searching the global 747 * property list needs it 748 */ 749 if (ddi_prop_lookup_string_array( 750 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo, 751 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist", 752 &string_array, &nstrings) != DDI_PROP_SUCCESS) { 753 return; 754 } 755 756 /* smallest blacklist criteria works with multiples of 2 */ 757 if (nstrings % 2 != 0) { 758 ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist " 759 "rootnex.conf: number of strings must be a " 760 "multiple of 2"); 761 ddi_prop_free(string_array); 762 return; 763 } 764 765 black_array = string_array; 766 nblacks = nstrings; 767 } 768 769 static void 770 blacklist_destroy(void) 771 { 772 if (black_array) { 773 ddi_prop_free(black_array); 774 black_array = NULL; 775 nblacks = 0; 776 } 777 } 778 779 static char * 780 immu_alloc_name(const char *str, int instance) 781 { 782 size_t slen; 783 char *s; 784 785 slen = strlen(str) + IMMU_ISTRLEN + 1; 786 s = kmem_zalloc(slen, VM_SLEEP); 787 if (s != NULL) 788 (void) snprintf(s, slen, "%s%d", str, instance); 789 790 return (s); 791 } 792 793 794 /* 795 * Now set all the fields in the order they are defined 796 * We do this only as a defensive-coding practice, it is 797 * not a correctness issue. 798 */ 799 static void * 800 immu_state_alloc(int seg, void *dmar_unit) 801 { 802 immu_t *immu; 803 char *nodename, *hcachename, *pcachename; 804 int instance; 805 806 dmar_unit = immu_dmar_walk_units(seg, dmar_unit); 807 if (dmar_unit == NULL) { 808 /* No more IOMMUs in this segment */ 809 return (NULL); 810 } 811 812 immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP); 813 814 mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL); 815 816 mutex_enter(&(immu->immu_lock)); 817 818 immu->immu_dmar_unit = dmar_unit; 819 immu->immu_dip = immu_dmar_unit_dip(dmar_unit); 820 821 nodename = ddi_node_name(immu->immu_dip); 822 instance = ddi_get_instance(immu->immu_dip); 823 824 immu->immu_name = immu_alloc_name(nodename, instance); 825 if (immu->immu_name == NULL) 826 return (NULL); 827 828 /* 829 * the immu_intr_lock mutex is grabbed by the IOMMU 830 * unit's interrupt handler so we need to use an 831 * interrupt cookie for the mutex 832 */ 833 mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER, 834 (void *)ipltospl(IMMU_INTR_IPL)); 835 836 /* IOMMU regs related */ 837 mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL); 838 cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL); 839 immu->immu_regs_busy = B_FALSE; 840 841 /* DVMA related */ 842 immu->immu_dvma_coherent = B_FALSE; 843 844 /* DVMA context related */ 845 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL); 846 847 /* DVMA domain related */ 848 list_create(&(immu->immu_domain_list), sizeof (domain_t), 849 offsetof(domain_t, dom_immu_node)); 850 851 /* DVMA special device lists */ 852 immu->immu_dvma_gfx_only = B_FALSE; 853 list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t), 854 offsetof(immu_devi_t, imd_spc_node)); 855 list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t), 856 offsetof(immu_devi_t, imd_spc_node)); 857 858 /* interrupt remapping related */ 859 mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL); 860 861 /* qinv related */ 862 mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL); 863 864 /* 865 * insert this immu unit into the system-wide list 866 */ 867 list_insert_tail(&immu_list, immu); 868 869 pcachename = immu_alloc_name("immu_pgtable_cache", instance); 870 if (pcachename == NULL) 871 return (NULL); 872 873 hcachename = immu_alloc_name("immu_hdl_cache", instance); 874 if (hcachename == NULL) 875 return (NULL); 876 877 immu->immu_pgtable_cache = kmem_cache_create(pcachename, 878 sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu, 879 NULL, 0); 880 immu->immu_hdl_cache = kmem_cache_create(hcachename, 881 sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor, 882 NULL, NULL, immu, NULL, 0); 883 884 mutex_exit(&(immu->immu_lock)); 885 886 ddi_err(DER_LOG, immu->immu_dip, "unit setup"); 887 888 immu_dmar_set_immu(dmar_unit, immu); 889 890 return (dmar_unit); 891 } 892 893 static void 894 immu_subsystems_setup(void) 895 { 896 int seg; 897 void *unit_hdl; 898 899 ddi_err(DER_VERB, NULL, 900 "Creating state structures for Intel IOMMU units"); 901 902 mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL); 903 list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node)); 904 905 mutex_enter(&immu_lock); 906 907 unit_hdl = NULL; 908 for (seg = 0; seg < IMMU_MAXSEG; seg++) { 909 while (unit_hdl = immu_state_alloc(seg, unit_hdl)) { 910 ; 911 } 912 } 913 914 immu_regs_setup(&immu_list); /* subsequent code needs this first */ 915 immu_dvma_setup(&immu_list); 916 if (immu_qinv_setup(&immu_list) == DDI_SUCCESS) 917 immu_intrmap_setup(&immu_list); 918 else 919 immu_intrmap_enable = B_FALSE; 920 921 mutex_exit(&immu_lock); 922 } 923 924 /* 925 * immu_subsystems_startup() 926 * startup all units that were setup 927 */ 928 static void 929 immu_subsystems_startup(void) 930 { 931 immu_t *immu; 932 iommulib_ops_t *iommulib_ops; 933 934 mutex_enter(&immu_lock); 935 936 immu_dmar_startup(); 937 938 immu = list_head(&immu_list); 939 for (; immu; immu = list_next(&immu_list, immu)) { 940 941 mutex_enter(&(immu->immu_lock)); 942 943 immu_intr_register(immu); 944 immu_dvma_startup(immu); 945 immu_intrmap_startup(immu); 946 immu_qinv_startup(immu); 947 948 /* 949 * Set IOMMU unit's regs to do 950 * the actual startup. This will 951 * set immu->immu_running field 952 * if the unit is successfully 953 * started 954 */ 955 immu_regs_startup(immu); 956 957 mutex_exit(&(immu->immu_lock)); 958 959 iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP); 960 *iommulib_ops = immulib_ops; 961 iommulib_ops->ilops_data = (void *)immu; 962 (void) iommulib_iommu_register(immu->immu_dip, iommulib_ops, 963 &immu->immu_iommulib_handle); 964 } 965 966 mutex_exit(&immu_lock); 967 } 968 969 /* ################## Intel IOMMU internal interfaces ###################### */ 970 971 /* 972 * Internal interfaces for IOMMU code (i.e. not exported to rootnex 973 * or rest of system) 974 */ 975 976 /* 977 * ddip can be NULL, in which case we walk up until we find the root dip 978 * NOTE: We never visit the root dip since its not a hardware node 979 */ 980 int 981 immu_walk_ancestor( 982 dev_info_t *rdip, 983 dev_info_t *ddip, 984 int (*func)(dev_info_t *, void *arg), 985 void *arg, 986 int *lvlp, 987 immu_flags_t immu_flags) 988 { 989 dev_info_t *pdip; 990 int level; 991 int error = DDI_SUCCESS; 992 993 /* ddip and immu can be NULL */ 994 995 /* Hold rdip so that branch is not detached */ 996 ndi_hold_devi(rdip); 997 for (pdip = rdip, level = 1; pdip && pdip != root_devinfo; 998 pdip = ddi_get_parent(pdip), level++) { 999 1000 if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) { 1001 error = DDI_FAILURE; 1002 break; 1003 } 1004 if (func(pdip, arg) == DDI_WALK_TERMINATE) { 1005 break; 1006 } 1007 if (immu_flags & IMMU_FLAGS_DONTPASS) { 1008 break; 1009 } 1010 if (pdip == ddip) { 1011 break; 1012 } 1013 } 1014 1015 ndi_rele_devi(rdip); 1016 1017 if (lvlp) 1018 *lvlp = level; 1019 1020 return (error); 1021 } 1022 1023 /* ######################## Intel IOMMU entry points ####################### */ 1024 /* 1025 * immu_init() 1026 * called from rootnex_attach(). setup but don't startup the Intel IOMMU 1027 * This is the first function called in Intel IOMMU code 1028 */ 1029 void 1030 immu_init(void) 1031 { 1032 char *phony_reg = "A thing of beauty is a joy forever"; 1033 1034 /* Set some global shorthands that are needed by all of IOMMU code */ 1035 root_devinfo = ddi_root_node(); 1036 1037 /* 1038 * Intel IOMMU only supported only if MMU(CPU) page size is == 1039 * IOMMU pages size. 1040 */ 1041 /*LINTED*/ 1042 if (MMU_PAGESIZE != IMMU_PAGESIZE) { 1043 ddi_err(DER_WARN, NULL, 1044 "MMU page size (%d) is not equal to\n" 1045 "IOMMU page size (%d). " 1046 "Disabling Intel IOMMU. ", 1047 MMU_PAGESIZE, IMMU_PAGESIZE); 1048 immu_enable = B_FALSE; 1049 return; 1050 } 1051 1052 /* 1053 * Read rootnex.conf options. Do this before 1054 * boot options so boot options can override .conf options. 1055 */ 1056 read_conf_options(); 1057 1058 /* 1059 * retrieve the Intel IOMMU boot options. 1060 * Do this before parsing immu ACPI table 1061 * as a boot option could potentially affect 1062 * ACPI parsing. 1063 */ 1064 ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n"); 1065 read_boot_options(); 1066 1067 /* 1068 * Check the IOMMU enable boot-option first. 1069 * This is so that we can skip parsing the ACPI table 1070 * if necessary because that may cause problems in 1071 * systems with buggy BIOS or ACPI tables 1072 */ 1073 if (immu_enable == B_FALSE) { 1074 return; 1075 } 1076 1077 if (immu_intrmap_enable == B_TRUE) 1078 immu_qinv_enable = B_TRUE; 1079 1080 /* 1081 * Next, check if the system even has an Intel IOMMU 1082 * We use the presence or absence of the IOMMU ACPI 1083 * table to detect Intel IOMMU. 1084 */ 1085 if (immu_dmar_setup() != DDI_SUCCESS) { 1086 immu_enable = B_FALSE; 1087 return; 1088 } 1089 1090 mapping_list_setup(); 1091 1092 /* 1093 * Check blacklists 1094 */ 1095 blacklist_setup(); 1096 1097 if (blacklisted_smbios() == B_TRUE) { 1098 blacklist_destroy(); 1099 immu_enable = B_FALSE; 1100 return; 1101 } 1102 1103 if (blacklisted_driver() == B_TRUE) { 1104 blacklist_destroy(); 1105 immu_enable = B_FALSE; 1106 return; 1107 } 1108 1109 /* 1110 * Read the "raw" DMAR ACPI table to get information 1111 * and convert into a form we can use. 1112 */ 1113 if (immu_dmar_parse() != DDI_SUCCESS) { 1114 blacklist_destroy(); 1115 immu_enable = B_FALSE; 1116 return; 1117 } 1118 1119 /* 1120 * now that we have processed the ACPI table 1121 * check if we need to blacklist this system 1122 * based on ACPI info 1123 */ 1124 if (blacklisted_acpi() == B_TRUE) { 1125 immu_dmar_destroy(); 1126 blacklist_destroy(); 1127 immu_enable = B_FALSE; 1128 return; 1129 } 1130 1131 blacklist_destroy(); 1132 1133 /* 1134 * Check if system has HW quirks. 1135 */ 1136 pre_setup_quirks(); 1137 1138 /* Now do the rest of the setup */ 1139 immu_subsystems_setup(); 1140 1141 /* 1142 * Now that the IMMU is setup, create a phony 1143 * reg prop so that suspend/resume works 1144 */ 1145 if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg", 1146 (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) { 1147 ddi_err(DER_PANIC, NULL, "Failed to create reg prop for " 1148 "rootnex node"); 1149 /*NOTREACHED*/ 1150 } 1151 1152 immu_setup = B_TRUE; 1153 } 1154 1155 /* 1156 * immu_startup() 1157 * called directly by boot code to startup 1158 * all units of the IOMMU 1159 */ 1160 void 1161 immu_startup(void) 1162 { 1163 /* 1164 * If IOMMU is disabled, do nothing 1165 */ 1166 if (immu_enable == B_FALSE) { 1167 return; 1168 } 1169 1170 if (immu_setup == B_FALSE) { 1171 ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, " 1172 "skipping IOMMU startup"); 1173 return; 1174 } 1175 1176 pre_startup_quirks(); 1177 1178 ddi_err(DER_CONT, NULL, 1179 "?Starting Intel IOMMU (dmar) units...\n"); 1180 1181 immu_subsystems_startup(); 1182 1183 immu_running = B_TRUE; 1184 } 1185 1186 /* 1187 * Hook to notify IOMMU code of device tree changes 1188 */ 1189 void 1190 immu_device_tree_changed(void) 1191 { 1192 if (immu_setup == B_FALSE) { 1193 return; 1194 } 1195 1196 ddi_err(DER_WARN, NULL, "Intel IOMMU currently " 1197 "does not use device tree updates"); 1198 } 1199 1200 /* 1201 * Hook to notify IOMMU code of memory changes 1202 */ 1203 void 1204 immu_physmem_update(uint64_t addr, uint64_t size) 1205 { 1206 if (immu_setup == B_FALSE) { 1207 return; 1208 } 1209 immu_dvma_physmem_update(addr, size); 1210 } 1211 1212 /* 1213 * immu_quiesce() 1214 * quiesce all units that are running 1215 */ 1216 int 1217 immu_quiesce(void) 1218 { 1219 immu_t *immu; 1220 int ret = DDI_SUCCESS; 1221 1222 mutex_enter(&immu_lock); 1223 1224 if (immu_running == B_FALSE) { 1225 mutex_exit(&immu_lock); 1226 return (DDI_SUCCESS); 1227 } 1228 1229 immu = list_head(&immu_list); 1230 for (; immu; immu = list_next(&immu_list, immu)) { 1231 1232 /* if immu is not running, we dont quiesce */ 1233 if (immu->immu_regs_running == B_FALSE) 1234 continue; 1235 1236 /* flush caches */ 1237 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); 1238 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait); 1239 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait); 1240 rw_exit(&(immu->immu_ctx_rwlock)); 1241 immu_regs_wbf_flush(immu); 1242 1243 mutex_enter(&(immu->immu_lock)); 1244 1245 /* 1246 * Set IOMMU unit's regs to do 1247 * the actual shutdown. 1248 */ 1249 immu_regs_shutdown(immu); 1250 immu_regs_suspend(immu); 1251 1252 /* if immu is still running, we failed */ 1253 if (immu->immu_regs_running == B_TRUE) 1254 ret = DDI_FAILURE; 1255 else 1256 immu->immu_regs_quiesced = B_TRUE; 1257 1258 mutex_exit(&(immu->immu_lock)); 1259 } 1260 1261 if (ret == DDI_SUCCESS) { 1262 immu_running = B_FALSE; 1263 immu_quiesced = B_TRUE; 1264 } 1265 mutex_exit(&immu_lock); 1266 1267 return (ret); 1268 } 1269 1270 /* 1271 * immu_unquiesce() 1272 * unquiesce all units 1273 */ 1274 int 1275 immu_unquiesce(void) 1276 { 1277 immu_t *immu; 1278 int ret = DDI_SUCCESS; 1279 1280 mutex_enter(&immu_lock); 1281 1282 if (immu_quiesced == B_FALSE) { 1283 mutex_exit(&immu_lock); 1284 return (DDI_SUCCESS); 1285 } 1286 1287 immu = list_head(&immu_list); 1288 for (; immu; immu = list_next(&immu_list, immu)) { 1289 1290 mutex_enter(&(immu->immu_lock)); 1291 1292 /* if immu was not quiesced, i.e was not running before */ 1293 if (immu->immu_regs_quiesced == B_FALSE) { 1294 mutex_exit(&(immu->immu_lock)); 1295 continue; 1296 } 1297 1298 if (immu_regs_resume(immu) != DDI_SUCCESS) { 1299 ret = DDI_FAILURE; 1300 mutex_exit(&(immu->immu_lock)); 1301 continue; 1302 } 1303 1304 /* flush caches before unquiesce */ 1305 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER); 1306 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait); 1307 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait); 1308 rw_exit(&(immu->immu_ctx_rwlock)); 1309 1310 /* 1311 * Set IOMMU unit's regs to do 1312 * the actual startup. This will 1313 * set immu->immu_regs_running field 1314 * if the unit is successfully 1315 * started 1316 */ 1317 immu_regs_startup(immu); 1318 1319 if (immu->immu_regs_running == B_FALSE) { 1320 ret = DDI_FAILURE; 1321 } else { 1322 immu_quiesced = B_TRUE; 1323 immu_running = B_TRUE; 1324 immu->immu_regs_quiesced = B_FALSE; 1325 } 1326 1327 mutex_exit(&(immu->immu_lock)); 1328 } 1329 1330 mutex_exit(&immu_lock); 1331 1332 return (ret); 1333 } 1334 1335 void 1336 immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync) 1337 { 1338 caddr_t vaddr; 1339 uint64_t paddr; 1340 1341 iwp->iwp_sync = sync; 1342 1343 vaddr = (caddr_t)&iwp->iwp_vstatus; 1344 paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr)); 1345 paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET; 1346 1347 iwp->iwp_pstatus = paddr; 1348 iwp->iwp_name = name; 1349 } 1350 1351 /* ############## END Intel IOMMU entry points ################## */ 1352