1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23 * All rights reserved.
24 */
25 /*
26 * Copyright (c) 2009, Intel Corporation.
27 * All rights reserved.
28 */
29
30 /*
31 * Copyright 2023 Oxide Computer Company
32 */
33
34 /*
35 * Intel IOMMU implementation
36 * This file contains Intel IOMMU code exported
37 * to the rest of the system and code that deals
38 * with the Intel IOMMU as a whole.
39 */
40
41 #include <sys/conf.h>
42 #include <sys/modctl.h>
43 #include <sys/pci.h>
44 #include <sys/pci_impl.h>
45 #include <sys/sysmacros.h>
46 #include <sys/ddi.h>
47 #include <sys/ddidmareq.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/ddifm.h>
50 #include <sys/sunndi.h>
51 #include <sys/debug.h>
52 #include <sys/fm/protocol.h>
53 #include <sys/note.h>
54 #include <sys/apic.h>
55 #include <vm/hat_i86.h>
56 #include <sys/smp_impldefs.h>
57 #include <sys/spl.h>
58 #include <sys/archsystm.h>
59 #include <sys/x86_archext.h>
60 #include <sys/avl.h>
61 #include <sys/bootconf.h>
62 #include <sys/bootinfo.h>
63 #include <sys/atomic.h>
64 #include <sys/immu.h>
65 /* ########################### Globals and tunables ######################## */
66 /*
67 * Global switches (boolean) that can be toggled either via boot options
68 * or via /etc/system or kmdb
69 */
70
71 /* Various features */
72 boolean_t immu_enable = B_TRUE;
73 boolean_t immu_dvma_enable = B_TRUE;
74
75 /* accessed in other files so not static */
76 boolean_t immu_gfxdvma_enable = B_TRUE;
77 boolean_t immu_intrmap_enable = B_FALSE;
78 boolean_t immu_qinv_enable = B_TRUE;
79
80 /* various quirks that need working around */
81
82 /* XXX We always map page 0 read/write for now */
83 boolean_t immu_quirk_usbpage0 = B_TRUE;
84 boolean_t immu_quirk_usbrmrr = B_TRUE;
85 boolean_t immu_quirk_usbfullpa;
86 boolean_t immu_quirk_mobile4;
87
88 /* debug messages */
89 boolean_t immu_dmar_print;
90
91 /* Tunables */
92 int64_t immu_flush_gran = 5;
93
94 immu_flags_t immu_global_dvma_flags;
95
96 /* ############ END OPTIONS section ################ */
97
98 /*
99 * Global used internally by Intel IOMMU code
100 */
101 dev_info_t *root_devinfo;
102 kmutex_t immu_lock;
103 list_t immu_list;
104 boolean_t immu_setup;
105 boolean_t immu_running;
106 boolean_t immu_quiesced;
107
108 /* ######################## END Globals and tunables ###################### */
109 /* Globals used only in this file */
110 static char **black_array;
111 static uint_t nblacks;
112
113 static char **unity_driver_array;
114 static uint_t nunity;
115 static char **xlate_driver_array;
116 static uint_t nxlate;
117
118 static char **premap_driver_array;
119 static uint_t npremap;
120 static char **nopremap_driver_array;
121 static uint_t nnopremap;
122 /* ###################### Utility routines ############################# */
123
124 /*
125 * Check if the device has mobile 4 chipset
126 */
127 static int
check_mobile4(dev_info_t * dip,void * arg)128 check_mobile4(dev_info_t *dip, void *arg)
129 {
130 _NOTE(ARGUNUSED(arg));
131 int vendor, device;
132 int *ip = (int *)arg;
133
134 vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
135 "vendor-id", -1);
136 device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
137 "device-id", -1);
138
139 if (vendor == 0x8086 && device == 0x2a40) {
140 *ip = B_TRUE;
141 ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. "
142 "Force setting IOMMU write buffer");
143 return (DDI_WALK_TERMINATE);
144 } else {
145 return (DDI_WALK_CONTINUE);
146 }
147 }
148
149 static void
map_bios_rsvd_mem(dev_info_t * dip)150 map_bios_rsvd_mem(dev_info_t *dip)
151 {
152 struct memlist *mp;
153
154 /*
155 * Make sure the domain for the device is set up before
156 * mapping anything.
157 */
158 (void) immu_dvma_device_setup(dip, 0);
159
160 memlist_read_lock();
161
162 mp = bios_rsvd;
163 while (mp != NULL) {
164 memrng_t mrng = {0};
165
166 ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range "
167 "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
168 mp->ml_address + mp->ml_size);
169
170 mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
171 mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
172
173 (void) immu_map_memrange(dip, &mrng);
174
175 mp = mp->ml_next;
176 }
177
178 memlist_read_unlock();
179 }
180
181
182 /*
183 * Check if the driver requests a specific type of mapping.
184 */
185 /*ARGSUSED*/
186 static void
check_conf(dev_info_t * dip,void * arg)187 check_conf(dev_info_t *dip, void *arg)
188 {
189 immu_devi_t *immu_devi;
190 const char *dname;
191 uint_t i;
192 int hasmapprop = 0, haspreprop = 0;
193 boolean_t old_premap;
194
195 /*
196 * Only PCI devices can use an IOMMU. Legacy ISA devices
197 * are handled in check_lpc.
198 */
199 if (!DEVI_IS_PCI(dip))
200 return;
201
202 dname = ddi_driver_name(dip);
203 if (dname == NULL)
204 return;
205 immu_devi = immu_devi_get(dip);
206
207 for (i = 0; i < nunity; i++) {
208 if (strcmp(unity_driver_array[i], dname) == 0) {
209 hasmapprop = 1;
210 immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
211 }
212 }
213
214 for (i = 0; i < nxlate; i++) {
215 if (strcmp(xlate_driver_array[i], dname) == 0) {
216 hasmapprop = 1;
217 immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
218 }
219 }
220
221 old_premap = immu_devi->imd_use_premap;
222
223 for (i = 0; i < nnopremap; i++) {
224 if (strcmp(nopremap_driver_array[i], dname) == 0) {
225 haspreprop = 1;
226 immu_devi->imd_use_premap = B_FALSE;
227 }
228 }
229
230 for (i = 0; i < npremap; i++) {
231 if (strcmp(premap_driver_array[i], dname) == 0) {
232 haspreprop = 1;
233 immu_devi->imd_use_premap = B_TRUE;
234 }
235 }
236
237 /*
238 * Report if we changed the value from the default.
239 */
240 if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
241 ddi_err(DER_LOG, dip, "using %s DVMA mapping",
242 immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
243 DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
244
245 if (haspreprop && (immu_devi->imd_use_premap != old_premap))
246 ddi_err(DER_LOG, dip, "%susing premapped DVMA space",
247 immu_devi->imd_use_premap ? "" : "not ");
248 }
249
250 /*
251 * Check if the device is USB controller
252 */
253 /*ARGSUSED*/
254 static void
check_usb(dev_info_t * dip,void * arg)255 check_usb(dev_info_t *dip, void *arg)
256 {
257 const char *drv = ddi_driver_name(dip);
258 immu_devi_t *immu_devi;
259
260
261 /*
262 * It's not clear if xHCI really needs these quirks; however, to be on
263 * the safe side until we know for certain we add it to the list below.
264 */
265 if (drv == NULL ||
266 (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
267 strcmp(drv, "ehci") != 0 && strcmp(drv, "xhci") != 0)) {
268 return;
269 }
270
271 immu_devi = immu_devi_get(dip);
272
273 /*
274 * If unit mappings are already specified, globally or
275 * locally, we're done here, since that covers both
276 * quirks below.
277 */
278 if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY)
279 return;
280
281 /* This must come first since it does unity mapping */
282 if (immu_quirk_usbfullpa == B_TRUE) {
283 immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
284 } else if (immu_quirk_usbrmrr == B_TRUE) {
285 ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
286 map_bios_rsvd_mem(dip);
287 }
288 }
289
290 /*
291 * Check if the device is a LPC device
292 */
293 /*ARGSUSED*/
294 static void
check_lpc(dev_info_t * dip,void * arg)295 check_lpc(dev_info_t *dip, void *arg)
296 {
297 immu_devi_t *immu_devi;
298
299 immu_devi = immu_devi_get(dip);
300 if (immu_devi->imd_lpc == B_TRUE) {
301 ddi_err(DER_LOG, dip, "iommu: Found LPC device");
302 /* This will put the immu_devi on the LPC "specials" list */
303 (void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP);
304 }
305 }
306
307 /*
308 * Check if the device is a GFX device
309 */
310 /*ARGSUSED*/
311 static void
check_gfx(dev_info_t * dip,void * arg)312 check_gfx(dev_info_t *dip, void *arg)
313 {
314 immu_devi_t *immu_devi;
315
316 immu_devi = immu_devi_get(dip);
317 if (immu_devi->imd_display == B_TRUE) {
318 immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
319 ddi_err(DER_LOG, dip, "iommu: Found GFX device");
320 /* This will put the immu_devi on the GFX "specials" list */
321 (void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
322 }
323 }
324
325 static void
walk_tree(int (* f)(dev_info_t *,void *),void * arg)326 walk_tree(int (*f)(dev_info_t *, void *), void *arg)
327 {
328 ndi_devi_enter(root_devinfo);
329 ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
330 ndi_devi_exit(root_devinfo);
331 }
332
333 static int
check_pre_setup_quirks(dev_info_t * dip,void * arg)334 check_pre_setup_quirks(dev_info_t *dip, void *arg)
335 {
336 /* just 1 check right now */
337 return (check_mobile4(dip, arg));
338 }
339
340 static int
check_pre_startup_quirks(dev_info_t * dip,void * arg)341 check_pre_startup_quirks(dev_info_t *dip, void *arg)
342 {
343 if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
344 ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
345 }
346
347 check_gfx(dip, arg);
348
349 check_lpc(dip, arg);
350
351 check_conf(dip, arg);
352
353 check_usb(dip, arg);
354
355 return (DDI_WALK_CONTINUE);
356 }
357
358 static void
pre_setup_quirks(void)359 pre_setup_quirks(void)
360 {
361 walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
362 }
363
364 static void
pre_startup_quirks(void)365 pre_startup_quirks(void)
366 {
367 walk_tree(check_pre_startup_quirks, NULL);
368
369 immu_dmar_rmrr_map();
370 }
371
372 static int
get_conf_str(char * bopt,char ** val)373 get_conf_str(char *bopt, char **val)
374 {
375 int ret;
376
377 /*
378 * Check the rootnex.conf property
379 * Fake up a dev_t since searching the global
380 * property list needs it
381 */
382 ret = ddi_prop_lookup_string(
383 makedevice(ddi_name_to_major("rootnex"), 0),
384 root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
385 bopt, val);
386
387 return (ret);
388 }
389
390 /*
391 * get_conf_opt()
392 * get a rootnex.conf setting (always a boolean)
393 */
394 static void
get_conf_opt(char * bopt,boolean_t * kvar)395 get_conf_opt(char *bopt, boolean_t *kvar)
396 {
397 char *val = NULL;
398
399 /*
400 * Check the rootnex.conf property
401 * Fake up a dev_t since searching the global
402 * property list needs it
403 */
404
405 if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS)
406 return;
407
408 if (strcmp(val, "true") == 0) {
409 *kvar = B_TRUE;
410 } else if (strcmp(val, "false") == 0) {
411 *kvar = B_FALSE;
412 } else {
413 ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
414 "is not set to true or false. Ignoring option.",
415 bopt, val);
416 }
417 ddi_prop_free(val);
418 }
419
420 /*
421 * get_bootopt()
422 * check a boot option (always a boolean)
423 */
424 static int
get_boot_str(char * bopt,char ** val)425 get_boot_str(char *bopt, char **val)
426 {
427 int ret;
428
429 ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
430 DDI_PROP_DONTPASS, bopt, val);
431
432 return (ret);
433 }
434
435 static void
get_bootopt(char * bopt,boolean_t * kvar)436 get_bootopt(char *bopt, boolean_t *kvar)
437 {
438 char *val = NULL;
439
440 /*
441 * All boot options set at the GRUB menu become
442 * properties on the rootnex.
443 */
444 if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS)
445 return;
446
447 if (strcmp(val, "true") == 0) {
448 *kvar = B_TRUE;
449 } else if (strcmp(val, "false") == 0) {
450 *kvar = B_FALSE;
451 } else {
452 ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
453 "is not set to true or false. Ignoring option.",
454 bopt, val);
455 }
456 ddi_prop_free(val);
457 }
458
459 static void
get_boot_dvma_mode(void)460 get_boot_dvma_mode(void)
461 {
462 char *val = NULL;
463
464 if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
465 != DDI_PROP_SUCCESS)
466 return;
467
468 if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
469 immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
470 } else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
471 immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
472 } else {
473 ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s",
474 val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
475 }
476 ddi_prop_free(val);
477 }
478
479 static void
get_conf_dvma_mode(void)480 get_conf_dvma_mode(void)
481 {
482 char *val = NULL;
483
484 if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
485 != DDI_PROP_SUCCESS)
486 return;
487
488 if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
489 immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
490 } else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
491 immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
492 } else {
493 ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex "
494 "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
495 }
496 ddi_prop_free(val);
497 }
498
499
500 static void
get_conf_tunables(char * bopt,int64_t * ivar)501 get_conf_tunables(char *bopt, int64_t *ivar)
502 {
503 int64_t *iarray;
504 uint_t n;
505
506 /*
507 * Check the rootnex.conf property
508 * Fake up a dev_t since searching the global
509 * property list needs it
510 */
511 if (ddi_prop_lookup_int64_array(
512 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
513 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
514 &iarray, &n) != DDI_PROP_SUCCESS) {
515 return;
516 }
517
518 if (n != 1) {
519 ddi_err(DER_WARN, NULL, "More than one value specified for "
520 "%s property. Ignoring and using default",
521 "immu-flush-gran");
522 ddi_prop_free(iarray);
523 return;
524 }
525
526 if (iarray[0] < 0) {
527 ddi_err(DER_WARN, NULL, "Negative value specified for "
528 "%s property. Inoring and Using default value",
529 "immu-flush-gran");
530 ddi_prop_free(iarray);
531 return;
532 }
533
534 *ivar = iarray[0];
535
536 ddi_prop_free(iarray);
537 }
538
539 static void
read_conf_options(void)540 read_conf_options(void)
541 {
542 /* enable/disable options */
543 get_conf_opt("immu-enable", &immu_enable);
544 get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
545 get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
546 get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
547 get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
548
549 /* workaround switches */
550 get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
551 get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
552 get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
553
554 /* debug printing */
555 get_conf_opt("immu-dmar-print", &immu_dmar_print);
556
557 /* get tunables */
558 get_conf_tunables("immu-flush-gran", &immu_flush_gran);
559
560 get_conf_dvma_mode();
561 }
562
563 static void
read_boot_options(void)564 read_boot_options(void)
565 {
566 /* enable/disable options */
567 get_bootopt("immu-enable", &immu_enable);
568 get_bootopt("immu-dvma-enable", &immu_dvma_enable);
569 get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
570 get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
571 get_bootopt("immu-qinv-enable", &immu_qinv_enable);
572
573 /* workaround switches */
574 get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
575 get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
576 get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
577
578 /* debug printing */
579 get_bootopt("immu-dmar-print", &immu_dmar_print);
580
581 get_boot_dvma_mode();
582 }
583
584 static void
mapping_list_setup(void)585 mapping_list_setup(void)
586 {
587 char **string_array;
588 uint_t nstrings;
589
590 if (ddi_prop_lookup_string_array(
591 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
592 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
593 "immu-dvma-unity-drivers",
594 &string_array, &nstrings) == DDI_PROP_SUCCESS) {
595 unity_driver_array = string_array;
596 nunity = nstrings;
597 }
598
599 if (ddi_prop_lookup_string_array(
600 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
601 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
602 "immu-dvma-xlate-drivers",
603 &string_array, &nstrings) == DDI_PROP_SUCCESS) {
604 xlate_driver_array = string_array;
605 nxlate = nstrings;
606 }
607
608 if (ddi_prop_lookup_string_array(
609 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
610 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
611 "immu-dvma-premap-drivers",
612 &string_array, &nstrings) == DDI_PROP_SUCCESS) {
613 premap_driver_array = string_array;
614 npremap = nstrings;
615 }
616
617 if (ddi_prop_lookup_string_array(
618 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
619 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
620 "immu-dvma-nopremap-drivers",
621 &string_array, &nstrings) == DDI_PROP_SUCCESS) {
622 nopremap_driver_array = string_array;
623 nnopremap = nstrings;
624 }
625 }
626
627 /*
628 * Note, this will not catch hardware not enumerated
629 * in early boot
630 */
631 static boolean_t
blacklisted_driver(void)632 blacklisted_driver(void)
633 {
634 char **strptr;
635 int i;
636 major_t maj;
637
638 /* need at least 2 strings */
639 if (nblacks < 2) {
640 return (B_FALSE);
641 }
642
643 for (i = 0; nblacks - i > 1; i++) {
644 strptr = &black_array[i];
645 if (strcmp(*strptr++, "DRIVER") == 0) {
646 if ((maj = ddi_name_to_major(*strptr++))
647 != DDI_MAJOR_T_NONE) {
648 /* is there hardware bound to this drvr */
649 if (devnamesp[maj].dn_head != NULL) {
650 return (B_TRUE);
651 }
652 }
653 i += 1; /* for loop adds 1, so add only 1 here */
654 }
655 }
656
657 return (B_FALSE);
658 }
659
660 static boolean_t
blacklisted_smbios(void)661 blacklisted_smbios(void)
662 {
663 id_t smid;
664 smbios_hdl_t *smhdl;
665 smbios_info_t sminf;
666 smbios_system_t smsys;
667 char *mfg, *product, *version;
668 char **strptr;
669 int i;
670
671 /* need at least 4 strings for this setting */
672 if (nblacks < 4) {
673 return (B_FALSE);
674 }
675
676 smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
677 if (smhdl == NULL ||
678 (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
679 smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
680 return (B_FALSE);
681 }
682
683 mfg = (char *)sminf.smbi_manufacturer;
684 product = (char *)sminf.smbi_product;
685 version = (char *)sminf.smbi_version;
686
687 ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
688 ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
689 ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
690 ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
691
692 for (i = 0; nblacks - i > 3; i++) {
693 strptr = &black_array[i];
694 if (strcmp(*strptr++, "SMBIOS") == 0) {
695 if (strcmp(*strptr++, mfg) == 0 &&
696 (*strptr[0] == '\0' ||
697 strcmp(*strptr++, product) == 0) &&
698 (*strptr[0] == '\0' ||
699 strcmp(*strptr++, version) == 0)) {
700 return (B_TRUE);
701 }
702 i += 3;
703 }
704 }
705
706 return (B_FALSE);
707 }
708
709 static boolean_t
blacklisted_acpi(void)710 blacklisted_acpi(void)
711 {
712 if (nblacks == 0) {
713 return (B_FALSE);
714 }
715
716 return (immu_dmar_blacklisted(black_array, nblacks));
717 }
718
719 /*
720 * Check if system is blacklisted by Intel IOMMU driver
721 * i.e. should Intel IOMMU be disabled on this system
722 * Currently a system can be blacklistd based on the
723 * following bases:
724 *
725 * 1. DMAR ACPI table information.
726 * This information includes things like
727 * manufacturer and revision number. If rootnex.conf
728 * has matching info set in its blacklist property
729 * then Intel IOMMu will be disabled
730 *
731 * 2. SMBIOS information
732 *
733 * 3. Driver installed - useful if a particular
734 * driver or hardware is toxic if Intel IOMMU
735 * is turned on.
736 */
737
738 static void
blacklist_setup(void)739 blacklist_setup(void)
740 {
741 char **string_array;
742 uint_t nstrings;
743
744 /*
745 * Check the rootnex.conf blacklist property.
746 * Fake up a dev_t since searching the global
747 * property list needs it
748 */
749 if (ddi_prop_lookup_string_array(
750 makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
751 DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
752 &string_array, &nstrings) != DDI_PROP_SUCCESS) {
753 return;
754 }
755
756 /* smallest blacklist criteria works with multiples of 2 */
757 if (nstrings % 2 != 0) {
758 ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
759 "rootnex.conf: number of strings must be a "
760 "multiple of 2");
761 ddi_prop_free(string_array);
762 return;
763 }
764
765 black_array = string_array;
766 nblacks = nstrings;
767 }
768
769 static void
blacklist_destroy(void)770 blacklist_destroy(void)
771 {
772 if (black_array) {
773 ddi_prop_free(black_array);
774 black_array = NULL;
775 nblacks = 0;
776 }
777 }
778
779 static char *
immu_alloc_name(const char * str,int instance)780 immu_alloc_name(const char *str, int instance)
781 {
782 size_t slen;
783 char *s;
784
785 slen = strlen(str) + IMMU_ISTRLEN + 1;
786 s = kmem_zalloc(slen, VM_SLEEP);
787 if (s != NULL)
788 (void) snprintf(s, slen, "%s%d", str, instance);
789
790 return (s);
791 }
792
793
794 /*
795 * Now set all the fields in the order they are defined
796 * We do this only as a defensive-coding practice, it is
797 * not a correctness issue.
798 */
799 static void *
immu_state_alloc(int seg,void * dmar_unit)800 immu_state_alloc(int seg, void *dmar_unit)
801 {
802 immu_t *immu;
803 char *nodename, *hcachename, *pcachename;
804 int instance;
805
806 dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
807 if (dmar_unit == NULL) {
808 /* No more IOMMUs in this segment */
809 return (NULL);
810 }
811
812 immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
813
814 mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
815
816 mutex_enter(&(immu->immu_lock));
817
818 immu->immu_dmar_unit = dmar_unit;
819 immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
820
821 nodename = ddi_node_name(immu->immu_dip);
822 instance = ddi_get_instance(immu->immu_dip);
823
824 immu->immu_name = immu_alloc_name(nodename, instance);
825 if (immu->immu_name == NULL)
826 return (NULL);
827
828 /*
829 * the immu_intr_lock mutex is grabbed by the IOMMU
830 * unit's interrupt handler so we need to use an
831 * interrupt cookie for the mutex
832 */
833 mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
834 (void *)ipltospl(IMMU_INTR_IPL));
835
836 /* IOMMU regs related */
837 mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
838 cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
839 immu->immu_regs_busy = B_FALSE;
840
841 /* DVMA related */
842 immu->immu_dvma_coherent = B_FALSE;
843
844 /* DVMA context related */
845 rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
846
847 /* DVMA domain related */
848 list_create(&(immu->immu_domain_list), sizeof (domain_t),
849 offsetof(domain_t, dom_immu_node));
850
851 /* DVMA special device lists */
852 immu->immu_dvma_gfx_only = B_FALSE;
853 list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
854 offsetof(immu_devi_t, imd_spc_node));
855 list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
856 offsetof(immu_devi_t, imd_spc_node));
857
858 /* interrupt remapping related */
859 mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
860
861 /* qinv related */
862 mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
863
864 /*
865 * insert this immu unit into the system-wide list
866 */
867 list_insert_tail(&immu_list, immu);
868
869 pcachename = immu_alloc_name("immu_pgtable_cache", instance);
870 if (pcachename == NULL)
871 return (NULL);
872
873 hcachename = immu_alloc_name("immu_hdl_cache", instance);
874 if (hcachename == NULL)
875 return (NULL);
876
877 immu->immu_pgtable_cache = kmem_cache_create(pcachename,
878 sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu,
879 NULL, 0);
880 immu->immu_hdl_cache = kmem_cache_create(hcachename,
881 sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor,
882 NULL, NULL, immu, NULL, 0);
883
884 mutex_exit(&(immu->immu_lock));
885
886 ddi_err(DER_LOG, immu->immu_dip, "unit setup");
887
888 immu_dmar_set_immu(dmar_unit, immu);
889
890 return (dmar_unit);
891 }
892
893 static void
immu_subsystems_setup(void)894 immu_subsystems_setup(void)
895 {
896 int seg;
897 void *unit_hdl;
898
899 ddi_err(DER_VERB, NULL,
900 "Creating state structures for Intel IOMMU units");
901
902 mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
903 list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
904
905 mutex_enter(&immu_lock);
906
907 unit_hdl = NULL;
908 for (seg = 0; seg < IMMU_MAXSEG; seg++) {
909 while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
910 ;
911 }
912 }
913
914 immu_regs_setup(&immu_list); /* subsequent code needs this first */
915 immu_dvma_setup(&immu_list);
916 if (immu_qinv_setup(&immu_list) == DDI_SUCCESS)
917 immu_intrmap_setup(&immu_list);
918 else
919 immu_intrmap_enable = B_FALSE;
920
921 mutex_exit(&immu_lock);
922 }
923
924 /*
925 * immu_subsystems_startup()
926 * startup all units that were setup
927 */
928 static void
immu_subsystems_startup(void)929 immu_subsystems_startup(void)
930 {
931 immu_t *immu;
932 iommulib_ops_t *iommulib_ops;
933
934 mutex_enter(&immu_lock);
935
936 immu_dmar_startup();
937
938 immu = list_head(&immu_list);
939 for (; immu; immu = list_next(&immu_list, immu)) {
940
941 mutex_enter(&(immu->immu_lock));
942
943 immu_intr_register(immu);
944 immu_dvma_startup(immu);
945 immu_intrmap_startup(immu);
946 immu_qinv_startup(immu);
947
948 /*
949 * Set IOMMU unit's regs to do
950 * the actual startup. This will
951 * set immu->immu_running field
952 * if the unit is successfully
953 * started
954 */
955 immu_regs_startup(immu);
956
957 mutex_exit(&(immu->immu_lock));
958
959 iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP);
960 *iommulib_ops = immulib_ops;
961 iommulib_ops->ilops_data = (void *)immu;
962 (void) iommulib_iommu_register(immu->immu_dip, iommulib_ops,
963 &immu->immu_iommulib_handle);
964 }
965
966 mutex_exit(&immu_lock);
967 }
968
969 /* ################## Intel IOMMU internal interfaces ###################### */
970
971 /*
972 * Internal interfaces for IOMMU code (i.e. not exported to rootnex
973 * or rest of system)
974 */
975
976 /*
977 * ddip can be NULL, in which case we walk up until we find the root dip
978 * NOTE: We never visit the root dip since its not a hardware node
979 */
980 int
immu_walk_ancestor(dev_info_t * rdip,dev_info_t * ddip,int (* func)(dev_info_t *,void * arg),void * arg,int * lvlp,immu_flags_t immu_flags)981 immu_walk_ancestor(
982 dev_info_t *rdip,
983 dev_info_t *ddip,
984 int (*func)(dev_info_t *, void *arg),
985 void *arg,
986 int *lvlp,
987 immu_flags_t immu_flags)
988 {
989 dev_info_t *pdip;
990 int level;
991 int error = DDI_SUCCESS;
992
993 /* ddip and immu can be NULL */
994
995 /* Hold rdip so that branch is not detached */
996 ndi_hold_devi(rdip);
997 for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
998 pdip = ddi_get_parent(pdip), level++) {
999
1000 if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
1001 error = DDI_FAILURE;
1002 break;
1003 }
1004 if (func(pdip, arg) == DDI_WALK_TERMINATE) {
1005 break;
1006 }
1007 if (immu_flags & IMMU_FLAGS_DONTPASS) {
1008 break;
1009 }
1010 if (pdip == ddip) {
1011 break;
1012 }
1013 }
1014
1015 ndi_rele_devi(rdip);
1016
1017 if (lvlp)
1018 *lvlp = level;
1019
1020 return (error);
1021 }
1022
1023 /* ######################## Intel IOMMU entry points ####################### */
1024 /*
1025 * immu_init()
1026 * called from rootnex_attach(). setup but don't startup the Intel IOMMU
1027 * This is the first function called in Intel IOMMU code
1028 */
1029 void
immu_init(void)1030 immu_init(void)
1031 {
1032 char *phony_reg = "A thing of beauty is a joy forever";
1033
1034 /* Set some global shorthands that are needed by all of IOMMU code */
1035 root_devinfo = ddi_root_node();
1036
1037 /*
1038 * Intel IOMMU only supported only if MMU(CPU) page size is ==
1039 * IOMMU pages size.
1040 */
1041 /*LINTED*/
1042 if (MMU_PAGESIZE != IMMU_PAGESIZE) {
1043 ddi_err(DER_WARN, NULL,
1044 "MMU page size (%d) is not equal to\n"
1045 "IOMMU page size (%d). "
1046 "Disabling Intel IOMMU. ",
1047 MMU_PAGESIZE, IMMU_PAGESIZE);
1048 immu_enable = B_FALSE;
1049 return;
1050 }
1051
1052 /*
1053 * Read rootnex.conf options. Do this before
1054 * boot options so boot options can override .conf options.
1055 */
1056 read_conf_options();
1057
1058 /*
1059 * retrieve the Intel IOMMU boot options.
1060 * Do this before parsing immu ACPI table
1061 * as a boot option could potentially affect
1062 * ACPI parsing.
1063 */
1064 ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
1065 read_boot_options();
1066
1067 /*
1068 * Check the IOMMU enable boot-option first.
1069 * This is so that we can skip parsing the ACPI table
1070 * if necessary because that may cause problems in
1071 * systems with buggy BIOS or ACPI tables
1072 */
1073 if (immu_enable == B_FALSE) {
1074 return;
1075 }
1076
1077 if (immu_intrmap_enable == B_TRUE)
1078 immu_qinv_enable = B_TRUE;
1079
1080 /*
1081 * Next, check if the system even has an Intel IOMMU
1082 * We use the presence or absence of the IOMMU ACPI
1083 * table to detect Intel IOMMU.
1084 */
1085 if (immu_dmar_setup() != DDI_SUCCESS) {
1086 immu_enable = B_FALSE;
1087 return;
1088 }
1089
1090 mapping_list_setup();
1091
1092 /*
1093 * Check blacklists
1094 */
1095 blacklist_setup();
1096
1097 if (blacklisted_smbios() == B_TRUE) {
1098 blacklist_destroy();
1099 immu_enable = B_FALSE;
1100 return;
1101 }
1102
1103 if (blacklisted_driver() == B_TRUE) {
1104 blacklist_destroy();
1105 immu_enable = B_FALSE;
1106 return;
1107 }
1108
1109 /*
1110 * Read the "raw" DMAR ACPI table to get information
1111 * and convert into a form we can use.
1112 */
1113 if (immu_dmar_parse() != DDI_SUCCESS) {
1114 blacklist_destroy();
1115 immu_enable = B_FALSE;
1116 return;
1117 }
1118
1119 /*
1120 * now that we have processed the ACPI table
1121 * check if we need to blacklist this system
1122 * based on ACPI info
1123 */
1124 if (blacklisted_acpi() == B_TRUE) {
1125 immu_dmar_destroy();
1126 blacklist_destroy();
1127 immu_enable = B_FALSE;
1128 return;
1129 }
1130
1131 blacklist_destroy();
1132
1133 /*
1134 * Check if system has HW quirks.
1135 */
1136 pre_setup_quirks();
1137
1138 /* Now do the rest of the setup */
1139 immu_subsystems_setup();
1140
1141 /*
1142 * Now that the IMMU is setup, create a phony
1143 * reg prop so that suspend/resume works
1144 */
1145 if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
1146 (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
1147 ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
1148 "rootnex node");
1149 /*NOTREACHED*/
1150 }
1151
1152 immu_setup = B_TRUE;
1153 }
1154
1155 /*
1156 * immu_startup()
1157 * called directly by boot code to startup
1158 * all units of the IOMMU
1159 */
1160 void
immu_startup(void)1161 immu_startup(void)
1162 {
1163 /*
1164 * If IOMMU is disabled, do nothing
1165 */
1166 if (immu_enable == B_FALSE) {
1167 return;
1168 }
1169
1170 if (immu_setup == B_FALSE) {
1171 ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
1172 "skipping IOMMU startup");
1173 return;
1174 }
1175
1176 pre_startup_quirks();
1177
1178 ddi_err(DER_CONT, NULL,
1179 "?Starting Intel IOMMU (dmar) units...\n");
1180
1181 immu_subsystems_startup();
1182
1183 immu_running = B_TRUE;
1184 }
1185
1186 /*
1187 * Hook to notify IOMMU code of device tree changes
1188 */
1189 void
immu_device_tree_changed(void)1190 immu_device_tree_changed(void)
1191 {
1192 if (immu_setup == B_FALSE) {
1193 return;
1194 }
1195
1196 ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1197 "does not use device tree updates");
1198 }
1199
1200 /*
1201 * Hook to notify IOMMU code of memory changes
1202 */
1203 void
immu_physmem_update(uint64_t addr,uint64_t size)1204 immu_physmem_update(uint64_t addr, uint64_t size)
1205 {
1206 if (immu_setup == B_FALSE) {
1207 return;
1208 }
1209 immu_dvma_physmem_update(addr, size);
1210 }
1211
1212 /*
1213 * immu_quiesce()
1214 * quiesce all units that are running
1215 */
1216 int
immu_quiesce(void)1217 immu_quiesce(void)
1218 {
1219 immu_t *immu;
1220 int ret = DDI_SUCCESS;
1221
1222 mutex_enter(&immu_lock);
1223
1224 if (immu_running == B_FALSE) {
1225 mutex_exit(&immu_lock);
1226 return (DDI_SUCCESS);
1227 }
1228
1229 immu = list_head(&immu_list);
1230 for (; immu; immu = list_next(&immu_list, immu)) {
1231
1232 /* if immu is not running, we dont quiesce */
1233 if (immu->immu_regs_running == B_FALSE)
1234 continue;
1235
1236 /* flush caches */
1237 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1238 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1239 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1240 rw_exit(&(immu->immu_ctx_rwlock));
1241 immu_regs_wbf_flush(immu);
1242
1243 mutex_enter(&(immu->immu_lock));
1244
1245 /*
1246 * Set IOMMU unit's regs to do
1247 * the actual shutdown.
1248 */
1249 immu_regs_shutdown(immu);
1250 immu_regs_suspend(immu);
1251
1252 /* if immu is still running, we failed */
1253 if (immu->immu_regs_running == B_TRUE)
1254 ret = DDI_FAILURE;
1255 else
1256 immu->immu_regs_quiesced = B_TRUE;
1257
1258 mutex_exit(&(immu->immu_lock));
1259 }
1260
1261 if (ret == DDI_SUCCESS) {
1262 immu_running = B_FALSE;
1263 immu_quiesced = B_TRUE;
1264 }
1265 mutex_exit(&immu_lock);
1266
1267 return (ret);
1268 }
1269
1270 /*
1271 * immu_unquiesce()
1272 * unquiesce all units
1273 */
1274 int
immu_unquiesce(void)1275 immu_unquiesce(void)
1276 {
1277 immu_t *immu;
1278 int ret = DDI_SUCCESS;
1279
1280 mutex_enter(&immu_lock);
1281
1282 if (immu_quiesced == B_FALSE) {
1283 mutex_exit(&immu_lock);
1284 return (DDI_SUCCESS);
1285 }
1286
1287 immu = list_head(&immu_list);
1288 for (; immu; immu = list_next(&immu_list, immu)) {
1289
1290 mutex_enter(&(immu->immu_lock));
1291
1292 /* if immu was not quiesced, i.e was not running before */
1293 if (immu->immu_regs_quiesced == B_FALSE) {
1294 mutex_exit(&(immu->immu_lock));
1295 continue;
1296 }
1297
1298 if (immu_regs_resume(immu) != DDI_SUCCESS) {
1299 ret = DDI_FAILURE;
1300 mutex_exit(&(immu->immu_lock));
1301 continue;
1302 }
1303
1304 /* flush caches before unquiesce */
1305 rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1306 immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1307 immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1308 rw_exit(&(immu->immu_ctx_rwlock));
1309
1310 /*
1311 * Set IOMMU unit's regs to do
1312 * the actual startup. This will
1313 * set immu->immu_regs_running field
1314 * if the unit is successfully
1315 * started
1316 */
1317 immu_regs_startup(immu);
1318
1319 if (immu->immu_regs_running == B_FALSE) {
1320 ret = DDI_FAILURE;
1321 } else {
1322 immu_quiesced = B_TRUE;
1323 immu_running = B_TRUE;
1324 immu->immu_regs_quiesced = B_FALSE;
1325 }
1326
1327 mutex_exit(&(immu->immu_lock));
1328 }
1329
1330 mutex_exit(&immu_lock);
1331
1332 return (ret);
1333 }
1334
1335 void
immu_init_inv_wait(immu_inv_wait_t * iwp,const char * name,boolean_t sync)1336 immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync)
1337 {
1338 caddr_t vaddr;
1339 uint64_t paddr;
1340
1341 iwp->iwp_sync = sync;
1342
1343 vaddr = (caddr_t)&iwp->iwp_vstatus;
1344 paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
1345 paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
1346
1347 iwp->iwp_pstatus = paddr;
1348 iwp->iwp_name = name;
1349 }
1350
1351 /* ############## END Intel IOMMU entry points ################## */
1352