xref: /illumos-gate/usr/src/uts/i86pc/io/immu.c (revision aaceae985c2e78cadef76bf0b7b50ed887ccb3a6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23  * All rights reserved.
24  */
25 /*
26  * Copyright (c) 2009, Intel Corporation.
27  * All rights reserved.
28  */
29 
30 /*
31  * Copyright 2023 Oxide Computer Company
32  */
33 
34 /*
35  * Intel IOMMU implementation
36  * This file contains Intel IOMMU code exported
37  * to the rest of the system and code that deals
38  * with the Intel IOMMU as a whole.
39  */
40 
41 #include <sys/conf.h>
42 #include <sys/modctl.h>
43 #include <sys/pci.h>
44 #include <sys/pci_impl.h>
45 #include <sys/sysmacros.h>
46 #include <sys/ddi.h>
47 #include <sys/ddidmareq.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/ddifm.h>
50 #include <sys/sunndi.h>
51 #include <sys/debug.h>
52 #include <sys/fm/protocol.h>
53 #include <sys/note.h>
54 #include <sys/apic.h>
55 #include <vm/hat_i86.h>
56 #include <sys/smp_impldefs.h>
57 #include <sys/spl.h>
58 #include <sys/archsystm.h>
59 #include <sys/x86_archext.h>
60 #include <sys/avl.h>
61 #include <sys/bootconf.h>
62 #include <sys/bootinfo.h>
63 #include <sys/atomic.h>
64 #include <sys/immu.h>
65 /* ########################### Globals and tunables ######################## */
66 /*
67  * Global switches (boolean) that can be toggled either via boot options
68  * or via /etc/system or kmdb
69  */
70 
71 /* Various features */
72 boolean_t immu_enable = B_TRUE;
73 boolean_t immu_dvma_enable = B_TRUE;
74 
75 /* accessed in other files so not static */
76 boolean_t immu_gfxdvma_enable = B_TRUE;
77 boolean_t immu_intrmap_enable = B_FALSE;
78 boolean_t immu_qinv_enable = B_TRUE;
79 
80 /* various quirks that need working around */
81 
82 /* XXX We always map page 0 read/write for now */
83 boolean_t immu_quirk_usbpage0 = B_TRUE;
84 boolean_t immu_quirk_usbrmrr = B_TRUE;
85 boolean_t immu_quirk_usbfullpa;
86 boolean_t immu_quirk_mobile4;
87 
88 /* debug messages */
89 boolean_t immu_dmar_print;
90 
91 /* Tunables */
92 int64_t immu_flush_gran = 5;
93 
94 immu_flags_t immu_global_dvma_flags;
95 
96 /* ############  END OPTIONS section ################ */
97 
98 /*
99  * Global used internally by Intel IOMMU code
100  */
101 dev_info_t *root_devinfo;
102 kmutex_t immu_lock;
103 list_t immu_list;
104 boolean_t immu_setup;
105 boolean_t immu_running;
106 boolean_t immu_quiesced;
107 
108 /* ######################## END Globals and tunables ###################### */
109 /* Globals used only in this file */
110 static char **black_array;
111 static uint_t nblacks;
112 
113 static char **unity_driver_array;
114 static uint_t nunity;
115 static char **xlate_driver_array;
116 static uint_t nxlate;
117 
118 static char **premap_driver_array;
119 static uint_t npremap;
120 static char **nopremap_driver_array;
121 static uint_t nnopremap;
122 /* ###################### Utility routines ############################# */
123 
124 /*
125  * Check if the device has mobile 4 chipset
126  */
127 static int
128 check_mobile4(dev_info_t *dip, void *arg)
129 {
130 	_NOTE(ARGUNUSED(arg));
131 	int vendor, device;
132 	int *ip = (int *)arg;
133 
134 	vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
135 	    "vendor-id", -1);
136 	device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
137 	    "device-id", -1);
138 
139 	if (vendor == 0x8086 && device == 0x2a40) {
140 		*ip = B_TRUE;
141 		ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. "
142 		    "Force setting IOMMU write buffer");
143 		return (DDI_WALK_TERMINATE);
144 	} else {
145 		return (DDI_WALK_CONTINUE);
146 	}
147 }
148 
149 static void
150 map_bios_rsvd_mem(dev_info_t *dip)
151 {
152 	struct memlist *mp;
153 
154 	/*
155 	 * Make sure the domain for the device is set up before
156 	 * mapping anything.
157 	 */
158 	(void) immu_dvma_device_setup(dip, 0);
159 
160 	memlist_read_lock();
161 
162 	mp = bios_rsvd;
163 	while (mp != NULL) {
164 		memrng_t mrng = {0};
165 
166 		ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range "
167 		    "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
168 		    mp->ml_address + mp->ml_size);
169 
170 		mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
171 		mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
172 
173 		(void) immu_map_memrange(dip, &mrng);
174 
175 		mp = mp->ml_next;
176 	}
177 
178 	memlist_read_unlock();
179 }
180 
181 
182 /*
183  * Check if the driver requests a specific type of mapping.
184  */
185 /*ARGSUSED*/
186 static void
187 check_conf(dev_info_t *dip, void *arg)
188 {
189 	immu_devi_t *immu_devi;
190 	const char *dname;
191 	uint_t i;
192 	int hasmapprop = 0, haspreprop = 0;
193 	boolean_t old_premap;
194 
195 	/*
196 	 * Only PCI devices can use an IOMMU. Legacy ISA devices
197 	 * are handled in check_lpc.
198 	 */
199 	if (!DEVI_IS_PCI(dip))
200 		return;
201 
202 	dname = ddi_driver_name(dip);
203 	if (dname == NULL)
204 		return;
205 	immu_devi = immu_devi_get(dip);
206 
207 	for (i = 0; i < nunity; i++) {
208 		if (strcmp(unity_driver_array[i], dname) == 0) {
209 			hasmapprop = 1;
210 			immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
211 		}
212 	}
213 
214 	for (i = 0; i < nxlate; i++) {
215 		if (strcmp(xlate_driver_array[i], dname) == 0) {
216 			hasmapprop = 1;
217 			immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
218 		}
219 	}
220 
221 	old_premap = immu_devi->imd_use_premap;
222 
223 	for (i = 0; i < nnopremap; i++) {
224 		if (strcmp(nopremap_driver_array[i], dname) == 0) {
225 			haspreprop = 1;
226 			immu_devi->imd_use_premap = B_FALSE;
227 		}
228 	}
229 
230 	for (i = 0; i < npremap; i++) {
231 		if (strcmp(premap_driver_array[i], dname) == 0) {
232 			haspreprop = 1;
233 			immu_devi->imd_use_premap = B_TRUE;
234 		}
235 	}
236 
237 	/*
238 	 * Report if we changed the value from the default.
239 	 */
240 	if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
241 		ddi_err(DER_LOG, dip, "using %s DVMA mapping",
242 		    immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
243 		    DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
244 
245 	if (haspreprop && (immu_devi->imd_use_premap != old_premap))
246 		ddi_err(DER_LOG, dip, "%susing premapped DVMA space",
247 		    immu_devi->imd_use_premap ? "" : "not ");
248 }
249 
250 /*
251  * Check if the device is USB controller
252  */
253 /*ARGSUSED*/
254 static void
255 check_usb(dev_info_t *dip, void *arg)
256 {
257 	const char *drv = ddi_driver_name(dip);
258 	immu_devi_t *immu_devi;
259 
260 
261 	/*
262 	 * It's not clear if xHCI really needs these quirks; however, to be on
263 	 * the safe side until we know for certain we add it to the list below.
264 	 */
265 	if (drv == NULL ||
266 	    (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
267 	    strcmp(drv, "ehci") != 0 && strcmp(drv, "xhci") != 0)) {
268 		return;
269 	}
270 
271 	immu_devi = immu_devi_get(dip);
272 
273 	/*
274 	 * If unit mappings are already specified, globally or
275 	 * locally, we're done here, since that covers both
276 	 * quirks below.
277 	 */
278 	if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY)
279 		return;
280 
281 	/* This must come first since it does unity mapping */
282 	if (immu_quirk_usbfullpa == B_TRUE) {
283 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
284 	} else if (immu_quirk_usbrmrr == B_TRUE) {
285 		ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
286 		map_bios_rsvd_mem(dip);
287 	}
288 }
289 
290 /*
291  * Check if the device is a LPC device
292  */
293 /*ARGSUSED*/
294 static void
295 check_lpc(dev_info_t *dip, void *arg)
296 {
297 	immu_devi_t *immu_devi;
298 
299 	immu_devi = immu_devi_get(dip);
300 	if (immu_devi->imd_lpc == B_TRUE) {
301 		ddi_err(DER_LOG, dip, "iommu: Found LPC device");
302 		/* This will put the immu_devi on the LPC "specials" list */
303 		(void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP);
304 	}
305 }
306 
307 /*
308  * Check if the device is a GFX device
309  */
310 /*ARGSUSED*/
311 static void
312 check_gfx(dev_info_t *dip, void *arg)
313 {
314 	immu_devi_t *immu_devi;
315 
316 	immu_devi = immu_devi_get(dip);
317 	if (immu_devi->imd_display == B_TRUE) {
318 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
319 		ddi_err(DER_LOG, dip, "iommu: Found GFX device");
320 		/* This will put the immu_devi on the GFX "specials" list */
321 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
322 	}
323 }
324 
325 static void
326 walk_tree(int (*f)(dev_info_t *, void *), void *arg)
327 {
328 	ndi_devi_enter(root_devinfo);
329 	ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
330 	ndi_devi_exit(root_devinfo);
331 }
332 
333 static int
334 check_pre_setup_quirks(dev_info_t *dip, void *arg)
335 {
336 	/* just 1 check right now */
337 	return (check_mobile4(dip, arg));
338 }
339 
340 static int
341 check_pre_startup_quirks(dev_info_t *dip, void *arg)
342 {
343 	if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
344 		ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
345 	}
346 
347 	check_gfx(dip, arg);
348 
349 	check_lpc(dip, arg);
350 
351 	check_conf(dip, arg);
352 
353 	check_usb(dip, arg);
354 
355 	return (DDI_WALK_CONTINUE);
356 }
357 
358 static void
359 pre_setup_quirks(void)
360 {
361 	walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
362 }
363 
364 static void
365 pre_startup_quirks(void)
366 {
367 	walk_tree(check_pre_startup_quirks, NULL);
368 
369 	immu_dmar_rmrr_map();
370 }
371 
372 static int
373 get_conf_str(char *bopt, char **val)
374 {
375 	int ret;
376 
377 	/*
378 	 * Check the rootnex.conf property
379 	 * Fake up a dev_t since searching the global
380 	 * property list needs it
381 	 */
382 	ret = ddi_prop_lookup_string(
383 	    makedevice(ddi_name_to_major("rootnex"), 0),
384 	    root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
385 	    bopt, val);
386 
387 	return (ret);
388 }
389 
390 /*
391  * get_conf_opt()
392  *	get a rootnex.conf setting  (always a boolean)
393  */
394 static void
395 get_conf_opt(char *bopt, boolean_t *kvar)
396 {
397 	char *val = NULL;
398 
399 	/*
400 	 * Check the rootnex.conf property
401 	 * Fake up a dev_t since searching the global
402 	 * property list needs it
403 	 */
404 
405 	if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS)
406 		return;
407 
408 	if (strcmp(val, "true") == 0) {
409 		*kvar = B_TRUE;
410 	} else if (strcmp(val, "false") == 0) {
411 		*kvar = B_FALSE;
412 	} else {
413 		ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
414 		    "is not set to true or false. Ignoring option.",
415 		    bopt, val);
416 	}
417 	ddi_prop_free(val);
418 }
419 
420 /*
421  * get_bootopt()
422  *	check a boot option  (always a boolean)
423  */
424 static int
425 get_boot_str(char *bopt, char **val)
426 {
427 	int ret;
428 
429 	ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
430 	    DDI_PROP_DONTPASS, bopt, val);
431 
432 	return (ret);
433 }
434 
435 static void
436 get_bootopt(char *bopt, boolean_t *kvar)
437 {
438 	char *val = NULL;
439 
440 	/*
441 	 * All boot options set at the GRUB menu become
442 	 * properties on the rootnex.
443 	 */
444 	if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS)
445 		return;
446 
447 	if (strcmp(val, "true") == 0) {
448 		*kvar = B_TRUE;
449 	} else if (strcmp(val, "false") == 0) {
450 		*kvar = B_FALSE;
451 	} else {
452 		ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
453 		    "is not set to true or false. Ignoring option.",
454 		    bopt, val);
455 	}
456 	ddi_prop_free(val);
457 }
458 
459 static void
460 get_boot_dvma_mode(void)
461 {
462 	char *val = NULL;
463 
464 	if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
465 	    != DDI_PROP_SUCCESS)
466 		return;
467 
468 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
469 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
470 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
471 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
472 	} else {
473 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s",
474 		    val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
475 	}
476 	ddi_prop_free(val);
477 }
478 
479 static void
480 get_conf_dvma_mode(void)
481 {
482 	char *val = NULL;
483 
484 	if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
485 	    != DDI_PROP_SUCCESS)
486 		return;
487 
488 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
489 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
490 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
491 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
492 	} else {
493 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex "
494 		    "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
495 	}
496 	ddi_prop_free(val);
497 }
498 
499 
500 static void
501 get_conf_tunables(char *bopt, int64_t *ivar)
502 {
503 	int64_t	*iarray;
504 	uint_t n;
505 
506 	/*
507 	 * Check the rootnex.conf property
508 	 * Fake up a dev_t since searching the global
509 	 * property list needs it
510 	 */
511 	if (ddi_prop_lookup_int64_array(
512 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
513 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
514 	    &iarray, &n) != DDI_PROP_SUCCESS) {
515 		return;
516 	}
517 
518 	if (n != 1) {
519 		ddi_err(DER_WARN, NULL, "More than one value specified for "
520 		    "%s property. Ignoring and using default",
521 		    "immu-flush-gran");
522 		ddi_prop_free(iarray);
523 		return;
524 	}
525 
526 	if (iarray[0] < 0) {
527 		ddi_err(DER_WARN, NULL, "Negative value specified for "
528 		    "%s property. Inoring and Using default value",
529 		    "immu-flush-gran");
530 		ddi_prop_free(iarray);
531 		return;
532 	}
533 
534 	*ivar = iarray[0];
535 
536 	ddi_prop_free(iarray);
537 }
538 
539 static void
540 read_conf_options(void)
541 {
542 	/* enable/disable options */
543 	get_conf_opt("immu-enable", &immu_enable);
544 	get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
545 	get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
546 	get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
547 	get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
548 
549 	/* workaround switches */
550 	get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
551 	get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
552 	get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
553 
554 	/* debug printing */
555 	get_conf_opt("immu-dmar-print", &immu_dmar_print);
556 
557 	/* get tunables */
558 	get_conf_tunables("immu-flush-gran", &immu_flush_gran);
559 
560 	get_conf_dvma_mode();
561 }
562 
563 static void
564 read_boot_options(void)
565 {
566 	/* enable/disable options */
567 	get_bootopt("immu-enable", &immu_enable);
568 	get_bootopt("immu-dvma-enable", &immu_dvma_enable);
569 	get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
570 	get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
571 	get_bootopt("immu-qinv-enable", &immu_qinv_enable);
572 
573 	/* workaround switches */
574 	get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
575 	get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
576 	get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
577 
578 	/* debug printing */
579 	get_bootopt("immu-dmar-print", &immu_dmar_print);
580 
581 	get_boot_dvma_mode();
582 }
583 
584 static void
585 mapping_list_setup(void)
586 {
587 	char **string_array;
588 	uint_t nstrings;
589 
590 	if (ddi_prop_lookup_string_array(
591 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
592 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
593 	    "immu-dvma-unity-drivers",
594 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
595 		unity_driver_array = string_array;
596 		nunity = nstrings;
597 	}
598 
599 	if (ddi_prop_lookup_string_array(
600 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
601 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
602 	    "immu-dvma-xlate-drivers",
603 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
604 		xlate_driver_array = string_array;
605 		nxlate = nstrings;
606 	}
607 
608 	if (ddi_prop_lookup_string_array(
609 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
610 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
611 	    "immu-dvma-premap-drivers",
612 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
613 		premap_driver_array = string_array;
614 		npremap = nstrings;
615 	}
616 
617 	if (ddi_prop_lookup_string_array(
618 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
619 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
620 	    "immu-dvma-nopremap-drivers",
621 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
622 		nopremap_driver_array = string_array;
623 		nnopremap = nstrings;
624 	}
625 }
626 
627 /*
628  * Note, this will not catch hardware not enumerated
629  * in early boot
630  */
631 static boolean_t
632 blacklisted_driver(void)
633 {
634 	char **strptr;
635 	int i;
636 	major_t maj;
637 
638 	/* need at least 2 strings */
639 	if (nblacks < 2) {
640 		return (B_FALSE);
641 	}
642 
643 	for (i = 0; nblacks - i > 1; i++) {
644 		strptr = &black_array[i];
645 		if (strcmp(*strptr++, "DRIVER") == 0) {
646 			if ((maj = ddi_name_to_major(*strptr++))
647 			    != DDI_MAJOR_T_NONE) {
648 				/* is there hardware bound to this drvr */
649 				if (devnamesp[maj].dn_head != NULL) {
650 					return (B_TRUE);
651 				}
652 			}
653 			i += 1;   /* for loop adds 1, so add only 1 here */
654 		}
655 	}
656 
657 	return (B_FALSE);
658 }
659 
660 static boolean_t
661 blacklisted_smbios(void)
662 {
663 	id_t smid;
664 	smbios_hdl_t *smhdl;
665 	smbios_info_t sminf;
666 	smbios_system_t smsys;
667 	char *mfg, *product, *version;
668 	char **strptr;
669 	int i;
670 
671 	/* need at least 4 strings for this setting */
672 	if (nblacks < 4) {
673 		return (B_FALSE);
674 	}
675 
676 	smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
677 	if (smhdl == NULL ||
678 	    (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
679 	    smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
680 		return (B_FALSE);
681 	}
682 
683 	mfg = (char *)sminf.smbi_manufacturer;
684 	product = (char *)sminf.smbi_product;
685 	version = (char *)sminf.smbi_version;
686 
687 	ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
688 	ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
689 	ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
690 	ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
691 
692 	for (i = 0; nblacks - i > 3; i++) {
693 		strptr = &black_array[i];
694 		if (strcmp(*strptr++, "SMBIOS") == 0) {
695 			if (strcmp(*strptr++, mfg) == 0 &&
696 			    (*strptr[0] == '\0' ||
697 			    strcmp(*strptr++, product) == 0) &&
698 			    (*strptr[0] == '\0' ||
699 			    strcmp(*strptr++, version) == 0)) {
700 				return (B_TRUE);
701 			}
702 			i += 3;
703 		}
704 	}
705 
706 	return (B_FALSE);
707 }
708 
709 static boolean_t
710 blacklisted_acpi(void)
711 {
712 	if (nblacks == 0) {
713 		return (B_FALSE);
714 	}
715 
716 	return (immu_dmar_blacklisted(black_array, nblacks));
717 }
718 
719 /*
720  * Check if system is blacklisted by Intel IOMMU driver
721  * i.e. should Intel IOMMU be disabled on this system
722  * Currently a system can be blacklistd based on the
723  * following bases:
724  *
725  * 1. DMAR ACPI table information.
726  *    This information includes things like
727  *    manufacturer and revision number. If rootnex.conf
728  *    has matching info set in its blacklist property
729  *    then Intel IOMMu will be disabled
730  *
731  * 2. SMBIOS information
732  *
733  * 3. Driver installed - useful if a particular
734  *    driver or hardware is toxic if Intel IOMMU
735  *    is turned on.
736  */
737 
738 static void
739 blacklist_setup(void)
740 {
741 	char **string_array;
742 	uint_t nstrings;
743 
744 	/*
745 	 * Check the rootnex.conf blacklist property.
746 	 * Fake up a dev_t since searching the global
747 	 * property list needs it
748 	 */
749 	if (ddi_prop_lookup_string_array(
750 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
751 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
752 	    &string_array, &nstrings) != DDI_PROP_SUCCESS) {
753 		return;
754 	}
755 
756 	/* smallest blacklist criteria works with multiples of 2 */
757 	if (nstrings % 2 != 0) {
758 		ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
759 		    "rootnex.conf: number of strings must be a "
760 		    "multiple of 2");
761 		ddi_prop_free(string_array);
762 		return;
763 	}
764 
765 	black_array = string_array;
766 	nblacks = nstrings;
767 }
768 
769 static void
770 blacklist_destroy(void)
771 {
772 	if (black_array) {
773 		ddi_prop_free(black_array);
774 		black_array = NULL;
775 		nblacks = 0;
776 	}
777 }
778 
779 static char *
780 immu_alloc_name(const char *str, int instance)
781 {
782 	size_t slen;
783 	char *s;
784 
785 	slen = strlen(str) + IMMU_ISTRLEN + 1;
786 	s = kmem_zalloc(slen, VM_SLEEP);
787 	if (s != NULL)
788 		(void) snprintf(s, slen, "%s%d", str, instance);
789 
790 	return (s);
791 }
792 
793 
794 /*
795  * Now set all the fields in the order they are defined
796  * We do this only as a defensive-coding practice, it is
797  * not a correctness issue.
798  */
799 static void *
800 immu_state_alloc(int seg, void *dmar_unit)
801 {
802 	immu_t *immu;
803 	char *nodename, *hcachename, *pcachename;
804 	int instance;
805 
806 	dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
807 	if (dmar_unit == NULL) {
808 		/* No more IOMMUs in this segment */
809 		return (NULL);
810 	}
811 
812 	immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
813 
814 	mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
815 
816 	mutex_enter(&(immu->immu_lock));
817 
818 	immu->immu_dmar_unit = dmar_unit;
819 	immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
820 
821 	nodename = ddi_node_name(immu->immu_dip);
822 	instance = ddi_get_instance(immu->immu_dip);
823 
824 	immu->immu_name = immu_alloc_name(nodename, instance);
825 	if (immu->immu_name == NULL)
826 		return (NULL);
827 
828 	/*
829 	 * the immu_intr_lock mutex is grabbed by the IOMMU
830 	 * unit's interrupt handler so we need to use an
831 	 * interrupt cookie for the mutex
832 	 */
833 	mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
834 	    (void *)ipltospl(IMMU_INTR_IPL));
835 
836 	/* IOMMU regs related */
837 	mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
838 	cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
839 	immu->immu_regs_busy = B_FALSE;
840 
841 	/* DVMA related */
842 	immu->immu_dvma_coherent = B_FALSE;
843 
844 	/* DVMA context related */
845 	rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
846 
847 	/* DVMA domain related */
848 	list_create(&(immu->immu_domain_list), sizeof (domain_t),
849 	    offsetof(domain_t, dom_immu_node));
850 
851 	/* DVMA special device lists */
852 	immu->immu_dvma_gfx_only = B_FALSE;
853 	list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
854 	    offsetof(immu_devi_t, imd_spc_node));
855 	list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
856 	    offsetof(immu_devi_t, imd_spc_node));
857 
858 	/* interrupt remapping related */
859 	mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
860 
861 	/* qinv related */
862 	mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
863 
864 	/*
865 	 * insert this immu unit into the system-wide list
866 	 */
867 	list_insert_tail(&immu_list, immu);
868 
869 	pcachename = immu_alloc_name("immu_pgtable_cache", instance);
870 	if (pcachename == NULL)
871 		return (NULL);
872 
873 	hcachename = immu_alloc_name("immu_hdl_cache", instance);
874 	if (hcachename == NULL)
875 		return (NULL);
876 
877 	immu->immu_pgtable_cache = kmem_cache_create(pcachename,
878 	    sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu,
879 	    NULL, 0);
880 	immu->immu_hdl_cache = kmem_cache_create(hcachename,
881 	    sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor,
882 	    NULL, NULL, immu, NULL, 0);
883 
884 	mutex_exit(&(immu->immu_lock));
885 
886 	ddi_err(DER_LOG, immu->immu_dip, "unit setup");
887 
888 	immu_dmar_set_immu(dmar_unit, immu);
889 
890 	return (dmar_unit);
891 }
892 
893 static void
894 immu_subsystems_setup(void)
895 {
896 	int seg;
897 	void *unit_hdl;
898 
899 	ddi_err(DER_VERB, NULL,
900 	    "Creating state structures for Intel IOMMU units");
901 
902 	mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
903 	list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
904 
905 	mutex_enter(&immu_lock);
906 
907 	unit_hdl = NULL;
908 	for (seg = 0; seg < IMMU_MAXSEG; seg++) {
909 		while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
910 			;
911 		}
912 	}
913 
914 	immu_regs_setup(&immu_list);	/* subsequent code needs this first */
915 	immu_dvma_setup(&immu_list);
916 	if (immu_qinv_setup(&immu_list) == DDI_SUCCESS)
917 		immu_intrmap_setup(&immu_list);
918 	else
919 		immu_intrmap_enable = B_FALSE;
920 
921 	mutex_exit(&immu_lock);
922 }
923 
924 /*
925  * immu_subsystems_startup()
926  *	startup all units that were setup
927  */
928 static void
929 immu_subsystems_startup(void)
930 {
931 	immu_t *immu;
932 	iommulib_ops_t *iommulib_ops;
933 
934 	mutex_enter(&immu_lock);
935 
936 	immu_dmar_startup();
937 
938 	immu = list_head(&immu_list);
939 	for (; immu; immu = list_next(&immu_list, immu)) {
940 
941 		mutex_enter(&(immu->immu_lock));
942 
943 		immu_intr_register(immu);
944 		immu_dvma_startup(immu);
945 		immu_intrmap_startup(immu);
946 		immu_qinv_startup(immu);
947 
948 		/*
949 		 * Set IOMMU unit's regs to do
950 		 * the actual startup. This will
951 		 * set immu->immu_running  field
952 		 * if the unit is successfully
953 		 * started
954 		 */
955 		immu_regs_startup(immu);
956 
957 		mutex_exit(&(immu->immu_lock));
958 
959 		iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP);
960 		*iommulib_ops = immulib_ops;
961 		iommulib_ops->ilops_data = (void *)immu;
962 		(void) iommulib_iommu_register(immu->immu_dip, iommulib_ops,
963 		    &immu->immu_iommulib_handle);
964 	}
965 
966 	mutex_exit(&immu_lock);
967 }
968 
969 /* ##################  Intel IOMMU internal interfaces ###################### */
970 
971 /*
972  * Internal interfaces for IOMMU code (i.e. not exported to rootnex
973  * or rest of system)
974  */
975 
976 /*
977  * ddip can be NULL, in which case we walk up until we find the root dip
978  * NOTE: We never visit the root dip since its not a hardware node
979  */
980 int
981 immu_walk_ancestor(
982 	dev_info_t *rdip,
983 	dev_info_t *ddip,
984 	int (*func)(dev_info_t *, void *arg),
985 	void *arg,
986 	int *lvlp,
987 	immu_flags_t immu_flags)
988 {
989 	dev_info_t *pdip;
990 	int level;
991 	int error = DDI_SUCCESS;
992 
993 	/* ddip and immu can be NULL */
994 
995 	/* Hold rdip so that branch is not detached */
996 	ndi_hold_devi(rdip);
997 	for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
998 	    pdip = ddi_get_parent(pdip), level++) {
999 
1000 		if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
1001 			error = DDI_FAILURE;
1002 			break;
1003 		}
1004 		if (func(pdip, arg) == DDI_WALK_TERMINATE) {
1005 			break;
1006 		}
1007 		if (immu_flags & IMMU_FLAGS_DONTPASS) {
1008 			break;
1009 		}
1010 		if (pdip == ddip) {
1011 			break;
1012 		}
1013 	}
1014 
1015 	ndi_rele_devi(rdip);
1016 
1017 	if (lvlp)
1018 		*lvlp = level;
1019 
1020 	return (error);
1021 }
1022 
1023 /* ########################  Intel IOMMU entry points ####################### */
1024 /*
1025  * immu_init()
1026  *	called from rootnex_attach(). setup but don't startup the Intel IOMMU
1027  *      This is the first function called in Intel IOMMU code
1028  */
1029 void
1030 immu_init(void)
1031 {
1032 	char *phony_reg = "A thing of beauty is a joy forever";
1033 
1034 	/* Set some global shorthands that are needed by all of IOMMU code */
1035 	root_devinfo = ddi_root_node();
1036 
1037 	/*
1038 	 * Intel IOMMU only supported only if MMU(CPU) page size is ==
1039 	 * IOMMU pages size.
1040 	 */
1041 	/*LINTED*/
1042 	if (MMU_PAGESIZE != IMMU_PAGESIZE) {
1043 		ddi_err(DER_WARN, NULL,
1044 		    "MMU page size (%d) is not equal to\n"
1045 		    "IOMMU page size (%d). "
1046 		    "Disabling Intel IOMMU. ",
1047 		    MMU_PAGESIZE, IMMU_PAGESIZE);
1048 		immu_enable = B_FALSE;
1049 		return;
1050 	}
1051 
1052 	/*
1053 	 * Read rootnex.conf options. Do this before
1054 	 * boot options so boot options can override .conf options.
1055 	 */
1056 	read_conf_options();
1057 
1058 	/*
1059 	 * retrieve the Intel IOMMU boot options.
1060 	 * Do this before parsing immu ACPI table
1061 	 * as a boot option could potentially affect
1062 	 * ACPI parsing.
1063 	 */
1064 	ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
1065 	read_boot_options();
1066 
1067 	/*
1068 	 * Check the IOMMU enable boot-option first.
1069 	 * This is so that we can skip parsing the ACPI table
1070 	 * if necessary because that may cause problems in
1071 	 * systems with buggy BIOS or ACPI tables
1072 	 */
1073 	if (immu_enable == B_FALSE) {
1074 		return;
1075 	}
1076 
1077 	if (immu_intrmap_enable == B_TRUE)
1078 		immu_qinv_enable = B_TRUE;
1079 
1080 	/*
1081 	 * Next, check if the system even has an Intel IOMMU
1082 	 * We use the presence or absence of the IOMMU ACPI
1083 	 * table to detect Intel IOMMU.
1084 	 */
1085 	if (immu_dmar_setup() != DDI_SUCCESS) {
1086 		immu_enable = B_FALSE;
1087 		return;
1088 	}
1089 
1090 	mapping_list_setup();
1091 
1092 	/*
1093 	 * Check blacklists
1094 	 */
1095 	blacklist_setup();
1096 
1097 	if (blacklisted_smbios() == B_TRUE) {
1098 		blacklist_destroy();
1099 		immu_enable = B_FALSE;
1100 		return;
1101 	}
1102 
1103 	if (blacklisted_driver() == B_TRUE) {
1104 		blacklist_destroy();
1105 		immu_enable = B_FALSE;
1106 		return;
1107 	}
1108 
1109 	/*
1110 	 * Read the "raw" DMAR ACPI table to get information
1111 	 * and convert into a form we can use.
1112 	 */
1113 	if (immu_dmar_parse() != DDI_SUCCESS) {
1114 		blacklist_destroy();
1115 		immu_enable = B_FALSE;
1116 		return;
1117 	}
1118 
1119 	/*
1120 	 * now that we have processed the ACPI table
1121 	 * check if we need to blacklist this system
1122 	 * based on ACPI info
1123 	 */
1124 	if (blacklisted_acpi() == B_TRUE) {
1125 		immu_dmar_destroy();
1126 		blacklist_destroy();
1127 		immu_enable = B_FALSE;
1128 		return;
1129 	}
1130 
1131 	blacklist_destroy();
1132 
1133 	/*
1134 	 * Check if system has HW quirks.
1135 	 */
1136 	pre_setup_quirks();
1137 
1138 	/* Now do the rest of the setup */
1139 	immu_subsystems_setup();
1140 
1141 	/*
1142 	 * Now that the IMMU is setup, create a phony
1143 	 * reg prop so that suspend/resume works
1144 	 */
1145 	if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
1146 	    (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
1147 		ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
1148 		    "rootnex node");
1149 		/*NOTREACHED*/
1150 	}
1151 
1152 	immu_setup = B_TRUE;
1153 }
1154 
1155 /*
1156  * immu_startup()
1157  *	called directly by boot code to startup
1158  *	all units of the IOMMU
1159  */
1160 void
1161 immu_startup(void)
1162 {
1163 	/*
1164 	 * If IOMMU is disabled, do nothing
1165 	 */
1166 	if (immu_enable == B_FALSE) {
1167 		return;
1168 	}
1169 
1170 	if (immu_setup == B_FALSE) {
1171 		ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
1172 		    "skipping IOMMU startup");
1173 		return;
1174 	}
1175 
1176 	pre_startup_quirks();
1177 
1178 	ddi_err(DER_CONT, NULL,
1179 	    "?Starting Intel IOMMU (dmar) units...\n");
1180 
1181 	immu_subsystems_startup();
1182 
1183 	immu_running = B_TRUE;
1184 }
1185 
1186 /*
1187  * Hook to notify IOMMU code of device tree changes
1188  */
1189 void
1190 immu_device_tree_changed(void)
1191 {
1192 	if (immu_setup == B_FALSE) {
1193 		return;
1194 	}
1195 
1196 	ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1197 	    "does not use device tree updates");
1198 }
1199 
1200 /*
1201  * Hook to notify IOMMU code of memory changes
1202  */
1203 void
1204 immu_physmem_update(uint64_t addr, uint64_t size)
1205 {
1206 	if (immu_setup == B_FALSE) {
1207 		return;
1208 	}
1209 	immu_dvma_physmem_update(addr, size);
1210 }
1211 
1212 /*
1213  * immu_quiesce()
1214  *	quiesce all units that are running
1215  */
1216 int
1217 immu_quiesce(void)
1218 {
1219 	immu_t *immu;
1220 	int ret = DDI_SUCCESS;
1221 
1222 	mutex_enter(&immu_lock);
1223 
1224 	if (immu_running == B_FALSE) {
1225 		mutex_exit(&immu_lock);
1226 		return (DDI_SUCCESS);
1227 	}
1228 
1229 	immu = list_head(&immu_list);
1230 	for (; immu; immu = list_next(&immu_list, immu)) {
1231 
1232 		/* if immu is not running, we dont quiesce */
1233 		if (immu->immu_regs_running == B_FALSE)
1234 			continue;
1235 
1236 		/* flush caches */
1237 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1238 		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1239 		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1240 		rw_exit(&(immu->immu_ctx_rwlock));
1241 		immu_regs_wbf_flush(immu);
1242 
1243 		mutex_enter(&(immu->immu_lock));
1244 
1245 		/*
1246 		 * Set IOMMU unit's regs to do
1247 		 * the actual shutdown.
1248 		 */
1249 		immu_regs_shutdown(immu);
1250 		immu_regs_suspend(immu);
1251 
1252 		/* if immu is still running, we failed */
1253 		if (immu->immu_regs_running == B_TRUE)
1254 			ret = DDI_FAILURE;
1255 		else
1256 			immu->immu_regs_quiesced = B_TRUE;
1257 
1258 		mutex_exit(&(immu->immu_lock));
1259 	}
1260 
1261 	if (ret == DDI_SUCCESS) {
1262 		immu_running = B_FALSE;
1263 		immu_quiesced = B_TRUE;
1264 	}
1265 	mutex_exit(&immu_lock);
1266 
1267 	return (ret);
1268 }
1269 
1270 /*
1271  * immu_unquiesce()
1272  *	unquiesce all units
1273  */
1274 int
1275 immu_unquiesce(void)
1276 {
1277 	immu_t *immu;
1278 	int ret = DDI_SUCCESS;
1279 
1280 	mutex_enter(&immu_lock);
1281 
1282 	if (immu_quiesced == B_FALSE) {
1283 		mutex_exit(&immu_lock);
1284 		return (DDI_SUCCESS);
1285 	}
1286 
1287 	immu = list_head(&immu_list);
1288 	for (; immu; immu = list_next(&immu_list, immu)) {
1289 
1290 		mutex_enter(&(immu->immu_lock));
1291 
1292 		/* if immu was not quiesced, i.e was not running before */
1293 		if (immu->immu_regs_quiesced == B_FALSE) {
1294 			mutex_exit(&(immu->immu_lock));
1295 			continue;
1296 		}
1297 
1298 		if (immu_regs_resume(immu) != DDI_SUCCESS) {
1299 			ret = DDI_FAILURE;
1300 			mutex_exit(&(immu->immu_lock));
1301 			continue;
1302 		}
1303 
1304 		/* flush caches before unquiesce */
1305 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1306 		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1307 		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1308 		rw_exit(&(immu->immu_ctx_rwlock));
1309 
1310 		/*
1311 		 * Set IOMMU unit's regs to do
1312 		 * the actual startup. This will
1313 		 * set immu->immu_regs_running  field
1314 		 * if the unit is successfully
1315 		 * started
1316 		 */
1317 		immu_regs_startup(immu);
1318 
1319 		if (immu->immu_regs_running == B_FALSE) {
1320 			ret = DDI_FAILURE;
1321 		} else {
1322 			immu_quiesced = B_TRUE;
1323 			immu_running = B_TRUE;
1324 			immu->immu_regs_quiesced = B_FALSE;
1325 		}
1326 
1327 		mutex_exit(&(immu->immu_lock));
1328 	}
1329 
1330 	mutex_exit(&immu_lock);
1331 
1332 	return (ret);
1333 }
1334 
1335 void
1336 immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync)
1337 {
1338 	caddr_t vaddr;
1339 	uint64_t paddr;
1340 
1341 	iwp->iwp_sync = sync;
1342 
1343 	vaddr = (caddr_t)&iwp->iwp_vstatus;
1344 	paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
1345 	paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
1346 
1347 	iwp->iwp_pstatus = paddr;
1348 	iwp->iwp_name = name;
1349 }
1350 
1351 /* ##############  END Intel IOMMU entry points ################## */
1352