xref: /illumos-gate/usr/src/uts/i86pc/io/immu.c (revision 8887b57dc579de11464fc6c74163d2595ce073af)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23  * All rights reserved.
24  */
25 /*
26  * Copyright (c) 2009, Intel Corporation.
27  * All rights reserved.
28  */
29 
30 /*
31  * Intel IOMMU implementation
32  * This file contains Intel IOMMU code exported
33  * to the rest of the system and code that deals
34  * with the Intel IOMMU as a whole.
35  */
36 
37 #include <sys/conf.h>
38 #include <sys/modctl.h>
39 #include <sys/pci.h>
40 #include <sys/pci_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ddi.h>
43 #include <sys/ddidmareq.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ddifm.h>
46 #include <sys/sunndi.h>
47 #include <sys/debug.h>
48 #include <sys/fm/protocol.h>
49 #include <sys/note.h>
50 #include <sys/apic.h>
51 #include <vm/hat_i86.h>
52 #include <sys/smp_impldefs.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <sys/x86_archext.h>
56 #include <sys/avl.h>
57 #include <sys/bootconf.h>
58 #include <sys/bootinfo.h>
59 #include <sys/atomic.h>
60 #include <sys/immu.h>
61 /* ########################### Globals and tunables ######################## */
62 /*
63  * Global switches (boolean) that can be toggled either via boot options
64  * or via /etc/system or kmdb
65  */
66 
67 /* Various features */
68 boolean_t immu_enable = B_TRUE;
69 boolean_t immu_dvma_enable = B_TRUE;
70 
71 /* accessed in other files so not static */
72 boolean_t immu_gfxdvma_enable = B_TRUE;
73 boolean_t immu_intrmap_enable = B_FALSE;
74 boolean_t immu_qinv_enable = B_TRUE;
75 
76 /* various quirks that need working around */
77 
78 /* XXX We always map page 0 read/write for now */
79 boolean_t immu_quirk_usbpage0 = B_TRUE;
80 boolean_t immu_quirk_usbrmrr = B_TRUE;
81 boolean_t immu_quirk_usbfullpa;
82 boolean_t immu_quirk_mobile4;
83 
84 /* debug messages */
85 boolean_t immu_dmar_print;
86 
87 /* Tunables */
88 int64_t immu_flush_gran = 5;
89 
90 immu_flags_t immu_global_dvma_flags;
91 
92 /* ############  END OPTIONS section ################ */
93 
94 /*
95  * Global used internally by Intel IOMMU code
96  */
97 dev_info_t *root_devinfo;
98 kmutex_t immu_lock;
99 list_t immu_list;
100 boolean_t immu_setup;
101 boolean_t immu_running;
102 boolean_t immu_quiesced;
103 
104 /* ######################## END Globals and tunables ###################### */
105 /* Globals used only in this file */
106 static char **black_array;
107 static uint_t nblacks;
108 
109 static char **unity_driver_array;
110 static uint_t nunity;
111 static char **xlate_driver_array;
112 static uint_t nxlate;
113 
114 static char **premap_driver_array;
115 static uint_t npremap;
116 static char **nopremap_driver_array;
117 static uint_t nnopremap;
118 /* ###################### Utility routines ############################# */
119 
120 /*
121  * Check if the device has mobile 4 chipset
122  */
123 static int
124 check_mobile4(dev_info_t *dip, void *arg)
125 {
126 	_NOTE(ARGUNUSED(arg));
127 	int vendor, device;
128 	int *ip = (int *)arg;
129 
130 	vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
131 	    "vendor-id", -1);
132 	device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
133 	    "device-id", -1);
134 
135 	if (vendor == 0x8086 && device == 0x2a40) {
136 		*ip = B_TRUE;
137 		ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. "
138 		    "Force setting IOMMU write buffer");
139 		return (DDI_WALK_TERMINATE);
140 	} else {
141 		return (DDI_WALK_CONTINUE);
142 	}
143 }
144 
145 static void
146 map_bios_rsvd_mem(dev_info_t *dip)
147 {
148 	struct memlist *mp;
149 
150 	/*
151 	 * Make sure the domain for the device is set up before
152 	 * mapping anything.
153 	 */
154 	(void) immu_dvma_device_setup(dip, 0);
155 
156 	memlist_read_lock();
157 
158 	mp = bios_rsvd;
159 	while (mp != NULL) {
160 		memrng_t mrng = {0};
161 
162 		ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range "
163 		    "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
164 		    mp->ml_address + mp->ml_size);
165 
166 		mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
167 		mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
168 
169 		(void) immu_map_memrange(dip, &mrng);
170 
171 		mp = mp->ml_next;
172 	}
173 
174 	memlist_read_unlock();
175 }
176 
177 
178 /*
179  * Check if the driver requests a specific type of mapping.
180  */
181 /*ARGSUSED*/
182 static void
183 check_conf(dev_info_t *dip, void *arg)
184 {
185 	immu_devi_t *immu_devi;
186 	const char *dname;
187 	uint_t i;
188 	int hasmapprop = 0, haspreprop = 0;
189 	boolean_t old_premap;
190 
191 	/*
192 	 * Only PCI devices can use an IOMMU. Legacy ISA devices
193 	 * are handled in check_lpc.
194 	 */
195 	if (!DEVI_IS_PCI(dip))
196 		return;
197 
198 	dname = ddi_driver_name(dip);
199 	if (dname == NULL)
200 		return;
201 	immu_devi = immu_devi_get(dip);
202 
203 	for (i = 0; i < nunity; i++) {
204 		if (strcmp(unity_driver_array[i], dname) == 0) {
205 			hasmapprop = 1;
206 			immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
207 		}
208 	}
209 
210 	for (i = 0; i < nxlate; i++) {
211 		if (strcmp(xlate_driver_array[i], dname) == 0) {
212 			hasmapprop = 1;
213 			immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
214 		}
215 	}
216 
217 	old_premap = immu_devi->imd_use_premap;
218 
219 	for (i = 0; i < nnopremap; i++) {
220 		if (strcmp(nopremap_driver_array[i], dname) == 0) {
221 			haspreprop = 1;
222 			immu_devi->imd_use_premap = B_FALSE;
223 		}
224 	}
225 
226 	for (i = 0; i < npremap; i++) {
227 		if (strcmp(premap_driver_array[i], dname) == 0) {
228 			haspreprop = 1;
229 			immu_devi->imd_use_premap = B_TRUE;
230 		}
231 	}
232 
233 	/*
234 	 * Report if we changed the value from the default.
235 	 */
236 	if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
237 		ddi_err(DER_LOG, dip, "using %s DVMA mapping",
238 		    immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
239 		    DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
240 
241 	if (haspreprop && (immu_devi->imd_use_premap != old_premap))
242 		ddi_err(DER_LOG, dip, "%susing premapped DVMA space",
243 		    immu_devi->imd_use_premap ? "" : "not ");
244 }
245 
246 /*
247  * Check if the device is USB controller
248  */
249 /*ARGSUSED*/
250 static void
251 check_usb(dev_info_t *dip, void *arg)
252 {
253 	const char *drv = ddi_driver_name(dip);
254 	immu_devi_t *immu_devi;
255 
256 
257 	if (drv == NULL ||
258 	    (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
259 	    strcmp(drv, "ehci") != 0)) {
260 		return;
261 	}
262 
263 	immu_devi = immu_devi_get(dip);
264 
265 	/*
266 	 * If unit mappings are already specified, globally or
267 	 * locally, we're done here, since that covers both
268 	 * quirks below.
269 	 */
270 	if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY)
271 		return;
272 
273 	/* This must come first since it does unity mapping */
274 	if (immu_quirk_usbfullpa == B_TRUE) {
275 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
276 	} else if (immu_quirk_usbrmrr == B_TRUE) {
277 		ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
278 		map_bios_rsvd_mem(dip);
279 	}
280 }
281 
282 /*
283  * Check if the device is a LPC device
284  */
285 /*ARGSUSED*/
286 static void
287 check_lpc(dev_info_t *dip, void *arg)
288 {
289 	immu_devi_t *immu_devi;
290 
291 	immu_devi = immu_devi_get(dip);
292 	if (immu_devi->imd_lpc == B_TRUE) {
293 		ddi_err(DER_LOG, dip, "iommu: Found LPC device");
294 		/* This will put the immu_devi on the LPC "specials" list */
295 		(void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP);
296 	}
297 }
298 
299 /*
300  * Check if the device is a GFX device
301  */
302 /*ARGSUSED*/
303 static void
304 check_gfx(dev_info_t *dip, void *arg)
305 {
306 	immu_devi_t *immu_devi;
307 
308 	immu_devi = immu_devi_get(dip);
309 	if (immu_devi->imd_display == B_TRUE) {
310 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
311 		ddi_err(DER_LOG, dip, "iommu: Found GFX device");
312 		/* This will put the immu_devi on the GFX "specials" list */
313 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
314 	}
315 }
316 
317 static void
318 walk_tree(int (*f)(dev_info_t *, void *), void *arg)
319 {
320 	int count;
321 
322 	ndi_devi_enter(root_devinfo, &count);
323 	ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
324 	ndi_devi_exit(root_devinfo, count);
325 }
326 
327 static int
328 check_pre_setup_quirks(dev_info_t *dip, void *arg)
329 {
330 	/* just 1 check right now */
331 	return (check_mobile4(dip, arg));
332 }
333 
334 static int
335 check_pre_startup_quirks(dev_info_t *dip, void *arg)
336 {
337 	if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
338 		ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
339 	}
340 
341 	check_gfx(dip, arg);
342 
343 	check_lpc(dip, arg);
344 
345 	check_conf(dip, arg);
346 
347 	check_usb(dip, arg);
348 
349 	return (DDI_WALK_CONTINUE);
350 }
351 
352 static void
353 pre_setup_quirks(void)
354 {
355 	walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
356 }
357 
358 static void
359 pre_startup_quirks(void)
360 {
361 	walk_tree(check_pre_startup_quirks, NULL);
362 
363 	immu_dmar_rmrr_map();
364 }
365 
366 static int
367 get_conf_str(char *bopt, char **val)
368 {
369 	int ret;
370 
371 	/*
372 	 * Check the rootnex.conf property
373 	 * Fake up a dev_t since searching the global
374 	 * property list needs it
375 	 */
376 	ret = ddi_prop_lookup_string(
377 	    makedevice(ddi_name_to_major("rootnex"), 0),
378 	    root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
379 	    bopt, val);
380 
381 	return (ret);
382 }
383 
384 /*
385  * get_conf_opt()
386  * 	get a rootnex.conf setting  (always a boolean)
387  */
388 static void
389 get_conf_opt(char *bopt, boolean_t *kvar)
390 {
391 	char *val = NULL;
392 
393 	/*
394 	 * Check the rootnex.conf property
395 	 * Fake up a dev_t since searching the global
396 	 * property list needs it
397 	 */
398 
399 	if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS)
400 		return;
401 
402 	if (strcmp(val, "true") == 0) {
403 		*kvar = B_TRUE;
404 	} else if (strcmp(val, "false") == 0) {
405 		*kvar = B_FALSE;
406 	} else {
407 		ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
408 		    "is not set to true or false. Ignoring option.",
409 		    bopt, val);
410 	}
411 	ddi_prop_free(val);
412 }
413 
414 /*
415  * get_bootopt()
416  * 	check a boot option  (always a boolean)
417  */
418 static int
419 get_boot_str(char *bopt, char **val)
420 {
421 	int ret;
422 
423 	ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
424 	    DDI_PROP_DONTPASS, bopt, val);
425 
426 	return (ret);
427 }
428 
429 static void
430 get_bootopt(char *bopt, boolean_t *kvar)
431 {
432 	char *val = NULL;
433 
434 	/*
435 	 * All boot options set at the GRUB menu become
436 	 * properties on the rootnex.
437 	 */
438 	if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS)
439 		return;
440 
441 	if (strcmp(val, "true") == 0) {
442 		*kvar = B_TRUE;
443 	} else if (strcmp(val, "false") == 0) {
444 		*kvar = B_FALSE;
445 	} else {
446 		ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
447 		    "is not set to true or false. Ignoring option.",
448 		    bopt, val);
449 	}
450 	ddi_prop_free(val);
451 }
452 
453 static void
454 get_boot_dvma_mode(void)
455 {
456 	char *val = NULL;
457 
458 	if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
459 	    != DDI_PROP_SUCCESS)
460 		return;
461 
462 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
463 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
464 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
465 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
466 	} else {
467 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s",
468 		    val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
469 	}
470 	ddi_prop_free(val);
471 }
472 
473 static void
474 get_conf_dvma_mode(void)
475 {
476 	char *val = NULL;
477 
478 	if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
479 	    != DDI_PROP_SUCCESS)
480 		return;
481 
482 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
483 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
484 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
485 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
486 	} else {
487 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex "
488 		    "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
489 	}
490 	ddi_prop_free(val);
491 }
492 
493 
494 static void
495 get_conf_tunables(char *bopt, int64_t *ivar)
496 {
497 	int64_t	*iarray;
498 	uint_t n;
499 
500 	/*
501 	 * Check the rootnex.conf property
502 	 * Fake up a dev_t since searching the global
503 	 * property list needs it
504 	 */
505 	if (ddi_prop_lookup_int64_array(
506 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
507 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
508 	    &iarray, &n) != DDI_PROP_SUCCESS) {
509 		return;
510 	}
511 
512 	if (n != 1) {
513 		ddi_err(DER_WARN, NULL, "More than one value specified for "
514 		    "%s property. Ignoring and using default",
515 		    "immu-flush-gran");
516 		ddi_prop_free(iarray);
517 		return;
518 	}
519 
520 	if (iarray[0] < 0) {
521 		ddi_err(DER_WARN, NULL, "Negative value specified for "
522 		    "%s property. Inoring and Using default value",
523 		    "immu-flush-gran");
524 		ddi_prop_free(iarray);
525 		return;
526 	}
527 
528 	*ivar = iarray[0];
529 
530 	ddi_prop_free(iarray);
531 }
532 
533 static void
534 read_conf_options(void)
535 {
536 	/* enable/disable options */
537 	get_conf_opt("immu-enable", &immu_enable);
538 	get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
539 	get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
540 	get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
541 	get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
542 
543 	/* workaround switches */
544 	get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
545 	get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
546 	get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
547 
548 	/* debug printing */
549 	get_conf_opt("immu-dmar-print", &immu_dmar_print);
550 
551 	/* get tunables */
552 	get_conf_tunables("immu-flush-gran", &immu_flush_gran);
553 
554 	get_conf_dvma_mode();
555 }
556 
557 static void
558 read_boot_options(void)
559 {
560 	/* enable/disable options */
561 	get_bootopt("immu-enable", &immu_enable);
562 	get_bootopt("immu-dvma-enable", &immu_dvma_enable);
563 	get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
564 	get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
565 	get_bootopt("immu-qinv-enable", &immu_qinv_enable);
566 
567 	/* workaround switches */
568 	get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
569 	get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
570 	get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
571 
572 	/* debug printing */
573 	get_bootopt("immu-dmar-print", &immu_dmar_print);
574 
575 	get_boot_dvma_mode();
576 }
577 
578 static void
579 mapping_list_setup(void)
580 {
581 	char **string_array;
582 	uint_t nstrings;
583 
584 	if (ddi_prop_lookup_string_array(
585 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
586 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
587 	    "immu-dvma-unity-drivers",
588 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
589 		unity_driver_array = string_array;
590 		nunity = nstrings;
591 	}
592 
593 	if (ddi_prop_lookup_string_array(
594 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
595 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
596 	    "immu-dvma-xlate-drivers",
597 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
598 		xlate_driver_array = string_array;
599 		nxlate = nstrings;
600 	}
601 
602 	if (ddi_prop_lookup_string_array(
603 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
604 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
605 	    "immu-dvma-premap-drivers",
606 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
607 		premap_driver_array = string_array;
608 		npremap = nstrings;
609 	}
610 
611 	if (ddi_prop_lookup_string_array(
612 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
613 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
614 	    "immu-dvma-nopremap-drivers",
615 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
616 		nopremap_driver_array = string_array;
617 		nnopremap = nstrings;
618 	}
619 }
620 
621 /*
622  * Note, this will not catch hardware not enumerated
623  * in early boot
624  */
625 static boolean_t
626 blacklisted_driver(void)
627 {
628 	char **strptr;
629 	int i;
630 	major_t maj;
631 
632 	/* need at least 2 strings */
633 	if (nblacks < 2) {
634 		return (B_FALSE);
635 	}
636 
637 	for (i = 0; nblacks - i > 1; i++) {
638 		strptr = &black_array[i];
639 		if (strcmp(*strptr++, "DRIVER") == 0) {
640 			if ((maj = ddi_name_to_major(*strptr++))
641 			    != DDI_MAJOR_T_NONE) {
642 				/* is there hardware bound to this drvr */
643 				if (devnamesp[maj].dn_head != NULL) {
644 					return (B_TRUE);
645 				}
646 			}
647 			i += 1;   /* for loop adds 1, so add only 1 here */
648 		}
649 	}
650 
651 	return (B_FALSE);
652 }
653 
654 static boolean_t
655 blacklisted_smbios(void)
656 {
657 	id_t smid;
658 	smbios_hdl_t *smhdl;
659 	smbios_info_t sminf;
660 	smbios_system_t smsys;
661 	char *mfg, *product, *version;
662 	char **strptr;
663 	int i;
664 
665 	/* need at least 4 strings for this setting */
666 	if (nblacks < 4) {
667 		return (B_FALSE);
668 	}
669 
670 	smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
671 	if (smhdl == NULL ||
672 	    (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
673 	    smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
674 		return (B_FALSE);
675 	}
676 
677 	mfg = (char *)sminf.smbi_manufacturer;
678 	product = (char *)sminf.smbi_product;
679 	version = (char *)sminf.smbi_version;
680 
681 	ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
682 	ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
683 	ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
684 	ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
685 
686 	for (i = 0; nblacks - i > 3; i++) {
687 		strptr = &black_array[i];
688 		if (strcmp(*strptr++, "SMBIOS") == 0) {
689 			if (strcmp(*strptr++, mfg) == 0 &&
690 			    ((char *)strptr == '\0' ||
691 			    strcmp(*strptr++, product) == 0) &&
692 			    ((char *)strptr == '\0' ||
693 			    strcmp(*strptr++, version) == 0)) {
694 				return (B_TRUE);
695 			}
696 			i += 3;
697 		}
698 	}
699 
700 	return (B_FALSE);
701 }
702 
703 static boolean_t
704 blacklisted_acpi(void)
705 {
706 	if (nblacks == 0) {
707 		return (B_FALSE);
708 	}
709 
710 	return (immu_dmar_blacklisted(black_array, nblacks));
711 }
712 
713 /*
714  * Check if system is blacklisted by Intel IOMMU driver
715  * i.e. should Intel IOMMU be disabled on this system
716  * Currently a system can be blacklistd based on the
717  * following bases:
718  *
719  * 1. DMAR ACPI table information.
720  *    This information includes things like
721  *    manufacturer and revision number. If rootnex.conf
722  *    has matching info set in its blacklist property
723  *    then Intel IOMMu will be disabled
724  *
725  * 2. SMBIOS information
726  *
727  * 3. Driver installed - useful if a particular
728  *    driver or hardware is toxic if Intel IOMMU
729  *    is turned on.
730  */
731 
732 static void
733 blacklist_setup(void)
734 {
735 	char **string_array;
736 	uint_t nstrings;
737 
738 	/*
739 	 * Check the rootnex.conf blacklist property.
740 	 * Fake up a dev_t since searching the global
741 	 * property list needs it
742 	 */
743 	if (ddi_prop_lookup_string_array(
744 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
745 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
746 	    &string_array, &nstrings) != DDI_PROP_SUCCESS) {
747 		return;
748 	}
749 
750 	/* smallest blacklist criteria works with multiples of 2 */
751 	if (nstrings % 2 != 0) {
752 		ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
753 		    "rootnex.conf: number of strings must be a "
754 		    "multiple of 2");
755 		ddi_prop_free(string_array);
756 		return;
757 	}
758 
759 	black_array = string_array;
760 	nblacks = nstrings;
761 }
762 
763 static void
764 blacklist_destroy(void)
765 {
766 	if (black_array) {
767 		ddi_prop_free(black_array);
768 		black_array = NULL;
769 		nblacks = 0;
770 	}
771 }
772 
773 static char *
774 immu_alloc_name(const char *str, int instance)
775 {
776 	size_t slen;
777 	char *s;
778 
779 	slen = strlen(str) + IMMU_ISTRLEN + 1;
780 	s = kmem_zalloc(slen, VM_SLEEP);
781 	if (s != NULL)
782 		(void) snprintf(s, slen, "%s%d", str, instance);
783 
784 	return (s);
785 }
786 
787 
788 /*
789  * Now set all the fields in the order they are defined
790  * We do this only as a defensive-coding practice, it is
791  * not a correctness issue.
792  */
793 static void *
794 immu_state_alloc(int seg, void *dmar_unit)
795 {
796 	immu_t *immu;
797 	char *nodename, *hcachename, *pcachename;
798 	int instance;
799 
800 	dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
801 	if (dmar_unit == NULL) {
802 		/* No more IOMMUs in this segment */
803 		return (NULL);
804 	}
805 
806 	immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
807 
808 	mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
809 
810 	mutex_enter(&(immu->immu_lock));
811 
812 	immu->immu_dmar_unit = dmar_unit;
813 	immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
814 
815 	nodename = ddi_node_name(immu->immu_dip);
816 	instance = ddi_get_instance(immu->immu_dip);
817 
818 	immu->immu_name = immu_alloc_name(nodename, instance);
819 	if (immu->immu_name == NULL)
820 		return (NULL);
821 
822 	/*
823 	 * the immu_intr_lock mutex is grabbed by the IOMMU
824 	 * unit's interrupt handler so we need to use an
825 	 * interrupt cookie for the mutex
826 	 */
827 	mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
828 	    (void *)ipltospl(IMMU_INTR_IPL));
829 
830 	/* IOMMU regs related */
831 	mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
832 	cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
833 	immu->immu_regs_busy = B_FALSE;
834 
835 	/* DVMA related */
836 	immu->immu_dvma_coherent = B_FALSE;
837 
838 	/* DVMA context related */
839 	rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
840 
841 	/* DVMA domain related */
842 	list_create(&(immu->immu_domain_list), sizeof (domain_t),
843 	    offsetof(domain_t, dom_immu_node));
844 
845 	/* DVMA special device lists */
846 	immu->immu_dvma_gfx_only = B_FALSE;
847 	list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
848 	    offsetof(immu_devi_t, imd_spc_node));
849 	list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
850 	    offsetof(immu_devi_t, imd_spc_node));
851 
852 	/* interrupt remapping related */
853 	mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
854 
855 	/* qinv related */
856 	mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
857 
858 	/*
859 	 * insert this immu unit into the system-wide list
860 	 */
861 	list_insert_tail(&immu_list, immu);
862 
863 	pcachename = immu_alloc_name("immu_pgtable_cache", instance);
864 	if (pcachename == NULL)
865 		return (NULL);
866 
867 	hcachename = immu_alloc_name("immu_hdl_cache", instance);
868 	if (hcachename == NULL)
869 		return (NULL);
870 
871 	immu->immu_pgtable_cache = kmem_cache_create(pcachename,
872 	    sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu,
873 	    NULL, 0);
874 	immu->immu_hdl_cache = kmem_cache_create(hcachename,
875 	    sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor,
876 	    NULL, NULL, immu, NULL, 0);
877 
878 	mutex_exit(&(immu->immu_lock));
879 
880 	ddi_err(DER_LOG, immu->immu_dip, "unit setup");
881 
882 	immu_dmar_set_immu(dmar_unit, immu);
883 
884 	return (dmar_unit);
885 }
886 
887 static void
888 immu_subsystems_setup(void)
889 {
890 	int seg;
891 	void *unit_hdl;
892 
893 	ddi_err(DER_VERB, NULL,
894 	    "Creating state structures for Intel IOMMU units");
895 
896 	mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
897 	list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
898 
899 	mutex_enter(&immu_lock);
900 
901 	unit_hdl = NULL;
902 	for (seg = 0; seg < IMMU_MAXSEG; seg++) {
903 		while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
904 			;
905 		}
906 	}
907 
908 	immu_regs_setup(&immu_list);	/* subsequent code needs this first */
909 	immu_dvma_setup(&immu_list);
910 	if (immu_qinv_setup(&immu_list) == DDI_SUCCESS)
911 		immu_intrmap_setup(&immu_list);
912 	else
913 		immu_intrmap_enable = B_FALSE;
914 
915 	mutex_exit(&immu_lock);
916 }
917 
918 /*
919  * immu_subsystems_startup()
920  * 	startup all units that were setup
921  */
922 static void
923 immu_subsystems_startup(void)
924 {
925 	immu_t *immu;
926 	iommulib_ops_t *iommulib_ops;
927 
928 	mutex_enter(&immu_lock);
929 
930 	immu_dmar_startup();
931 
932 	immu = list_head(&immu_list);
933 	for (; immu; immu = list_next(&immu_list, immu)) {
934 
935 		mutex_enter(&(immu->immu_lock));
936 
937 		immu_intr_register(immu);
938 		immu_dvma_startup(immu);
939 		immu_intrmap_startup(immu);
940 		immu_qinv_startup(immu);
941 
942 		/*
943 		 * Set IOMMU unit's regs to do
944 		 * the actual startup. This will
945 		 * set immu->immu_running  field
946 		 * if the unit is successfully
947 		 * started
948 		 */
949 		immu_regs_startup(immu);
950 
951 		mutex_exit(&(immu->immu_lock));
952 
953 		iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP);
954 		*iommulib_ops = immulib_ops;
955 		iommulib_ops->ilops_data = (void *)immu;
956 		(void) iommulib_iommu_register(immu->immu_dip, iommulib_ops,
957 		    &immu->immu_iommulib_handle);
958 	}
959 
960 	mutex_exit(&immu_lock);
961 }
962 
963 /* ##################  Intel IOMMU internal interfaces ###################### */
964 
965 /*
966  * Internal interfaces for IOMMU code (i.e. not exported to rootnex
967  * or rest of system)
968  */
969 
970 /*
971  * ddip can be NULL, in which case we walk up until we find the root dip
972  * NOTE: We never visit the root dip since its not a hardware node
973  */
974 int
975 immu_walk_ancestor(
976 	dev_info_t *rdip,
977 	dev_info_t *ddip,
978 	int (*func)(dev_info_t *, void *arg),
979 	void *arg,
980 	int *lvlp,
981 	immu_flags_t immu_flags)
982 {
983 	dev_info_t *pdip;
984 	int level;
985 	int error = DDI_SUCCESS;
986 
987 	/* ddip and immu can be NULL */
988 
989 	/* Hold rdip so that branch is not detached */
990 	ndi_hold_devi(rdip);
991 	for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
992 	    pdip = ddi_get_parent(pdip), level++) {
993 
994 		if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
995 			error = DDI_FAILURE;
996 			break;
997 		}
998 		if (func(pdip, arg) == DDI_WALK_TERMINATE) {
999 			break;
1000 		}
1001 		if (immu_flags & IMMU_FLAGS_DONTPASS) {
1002 			break;
1003 		}
1004 		if (pdip == ddip) {
1005 			break;
1006 		}
1007 	}
1008 
1009 	ndi_rele_devi(rdip);
1010 
1011 	if (lvlp)
1012 		*lvlp = level;
1013 
1014 	return (error);
1015 }
1016 
1017 /* ########################  Intel IOMMU entry points ####################### */
1018 /*
1019  * immu_init()
1020  *	called from rootnex_attach(). setup but don't startup the Intel IOMMU
1021  *      This is the first function called in Intel IOMMU code
1022  */
1023 void
1024 immu_init(void)
1025 {
1026 	char *phony_reg = "A thing of beauty is a joy forever";
1027 
1028 	/* Set some global shorthands that are needed by all of IOMMU code */
1029 	root_devinfo = ddi_root_node();
1030 
1031 	/*
1032 	 * Intel IOMMU only supported only if MMU(CPU) page size is ==
1033 	 * IOMMU pages size.
1034 	 */
1035 	/*LINTED*/
1036 	if (MMU_PAGESIZE != IMMU_PAGESIZE) {
1037 		ddi_err(DER_WARN, NULL,
1038 		    "MMU page size (%d) is not equal to\n"
1039 		    "IOMMU page size (%d). "
1040 		    "Disabling Intel IOMMU. ",
1041 		    MMU_PAGESIZE, IMMU_PAGESIZE);
1042 		immu_enable = B_FALSE;
1043 		return;
1044 	}
1045 
1046 	/*
1047 	 * Read rootnex.conf options. Do this before
1048 	 * boot options so boot options can override .conf options.
1049 	 */
1050 	read_conf_options();
1051 
1052 	/*
1053 	 * retrieve the Intel IOMMU boot options.
1054 	 * Do this before parsing immu ACPI table
1055 	 * as a boot option could potentially affect
1056 	 * ACPI parsing.
1057 	 */
1058 	ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
1059 	read_boot_options();
1060 
1061 	/*
1062 	 * Check the IOMMU enable boot-option first.
1063 	 * This is so that we can skip parsing the ACPI table
1064 	 * if necessary because that may cause problems in
1065 	 * systems with buggy BIOS or ACPI tables
1066 	 */
1067 	if (immu_enable == B_FALSE) {
1068 		return;
1069 	}
1070 
1071 	if (immu_intrmap_enable == B_TRUE)
1072 		immu_qinv_enable = B_TRUE;
1073 
1074 	/*
1075 	 * Next, check if the system even has an Intel IOMMU
1076 	 * We use the presence or absence of the IOMMU ACPI
1077 	 * table to detect Intel IOMMU.
1078 	 */
1079 	if (immu_dmar_setup() != DDI_SUCCESS) {
1080 		immu_enable = B_FALSE;
1081 		return;
1082 	}
1083 
1084 	mapping_list_setup();
1085 
1086 	/*
1087 	 * Check blacklists
1088 	 */
1089 	blacklist_setup();
1090 
1091 	if (blacklisted_smbios() == B_TRUE) {
1092 		blacklist_destroy();
1093 		immu_enable = B_FALSE;
1094 		return;
1095 	}
1096 
1097 	if (blacklisted_driver() == B_TRUE) {
1098 		blacklist_destroy();
1099 		immu_enable = B_FALSE;
1100 		return;
1101 	}
1102 
1103 	/*
1104 	 * Read the "raw" DMAR ACPI table to get information
1105 	 * and convert into a form we can use.
1106 	 */
1107 	if (immu_dmar_parse() != DDI_SUCCESS) {
1108 		blacklist_destroy();
1109 		immu_enable = B_FALSE;
1110 		return;
1111 	}
1112 
1113 	/*
1114 	 * now that we have processed the ACPI table
1115 	 * check if we need to blacklist this system
1116 	 * based on ACPI info
1117 	 */
1118 	if (blacklisted_acpi() == B_TRUE) {
1119 		immu_dmar_destroy();
1120 		blacklist_destroy();
1121 		immu_enable = B_FALSE;
1122 		return;
1123 	}
1124 
1125 	blacklist_destroy();
1126 
1127 	/*
1128 	 * Check if system has HW quirks.
1129 	 */
1130 	pre_setup_quirks();
1131 
1132 	/* Now do the rest of the setup */
1133 	immu_subsystems_setup();
1134 
1135 	/*
1136 	 * Now that the IMMU is setup, create a phony
1137 	 * reg prop so that suspend/resume works
1138 	 */
1139 	if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
1140 	    (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
1141 		ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
1142 		    "rootnex node");
1143 		/*NOTREACHED*/
1144 	}
1145 
1146 	immu_setup = B_TRUE;
1147 }
1148 
1149 /*
1150  * immu_startup()
1151  * 	called directly by boot code to startup
1152  *      all units of the IOMMU
1153  */
1154 void
1155 immu_startup(void)
1156 {
1157 	/*
1158 	 * If IOMMU is disabled, do nothing
1159 	 */
1160 	if (immu_enable == B_FALSE) {
1161 		return;
1162 	}
1163 
1164 	if (immu_setup == B_FALSE) {
1165 		ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
1166 		    "skipping IOMMU startup");
1167 		return;
1168 	}
1169 
1170 	pre_startup_quirks();
1171 
1172 	ddi_err(DER_CONT, NULL,
1173 	    "?Starting Intel IOMMU (dmar) units...\n");
1174 
1175 	immu_subsystems_startup();
1176 
1177 	immu_running = B_TRUE;
1178 }
1179 
1180 /*
1181  * Hook to notify IOMMU code of device tree changes
1182  */
1183 void
1184 immu_device_tree_changed(void)
1185 {
1186 	if (immu_setup == B_FALSE) {
1187 		return;
1188 	}
1189 
1190 	ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1191 	    "does not use device tree updates");
1192 }
1193 
1194 /*
1195  * Hook to notify IOMMU code of memory changes
1196  */
1197 void
1198 immu_physmem_update(uint64_t addr, uint64_t size)
1199 {
1200 	if (immu_setup == B_FALSE) {
1201 		return;
1202 	}
1203 	immu_dvma_physmem_update(addr, size);
1204 }
1205 
1206 /*
1207  * immu_quiesce()
1208  * 	quiesce all units that are running
1209  */
1210 int
1211 immu_quiesce(void)
1212 {
1213 	immu_t *immu;
1214 	int ret = DDI_SUCCESS;
1215 
1216 	mutex_enter(&immu_lock);
1217 
1218 	if (immu_running == B_FALSE) {
1219 		mutex_exit(&immu_lock);
1220 		return (DDI_SUCCESS);
1221 	}
1222 
1223 	immu = list_head(&immu_list);
1224 	for (; immu; immu = list_next(&immu_list, immu)) {
1225 
1226 		/* if immu is not running, we dont quiesce */
1227 		if (immu->immu_regs_running == B_FALSE)
1228 			continue;
1229 
1230 		/* flush caches */
1231 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1232 		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1233 		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1234 		rw_exit(&(immu->immu_ctx_rwlock));
1235 		immu_regs_wbf_flush(immu);
1236 
1237 		mutex_enter(&(immu->immu_lock));
1238 
1239 		/*
1240 		 * Set IOMMU unit's regs to do
1241 		 * the actual shutdown.
1242 		 */
1243 		immu_regs_shutdown(immu);
1244 		immu_regs_suspend(immu);
1245 
1246 		/* if immu is still running, we failed */
1247 		if (immu->immu_regs_running == B_TRUE)
1248 			ret = DDI_FAILURE;
1249 		else
1250 			immu->immu_regs_quiesced = B_TRUE;
1251 
1252 		mutex_exit(&(immu->immu_lock));
1253 	}
1254 
1255 	if (ret == DDI_SUCCESS) {
1256 		immu_running = B_FALSE;
1257 		immu_quiesced = B_TRUE;
1258 	}
1259 	mutex_exit(&immu_lock);
1260 
1261 	return (ret);
1262 }
1263 
1264 /*
1265  * immu_unquiesce()
1266  * 	unquiesce all units
1267  */
1268 int
1269 immu_unquiesce(void)
1270 {
1271 	immu_t *immu;
1272 	int ret = DDI_SUCCESS;
1273 
1274 	mutex_enter(&immu_lock);
1275 
1276 	if (immu_quiesced == B_FALSE) {
1277 		mutex_exit(&immu_lock);
1278 		return (DDI_SUCCESS);
1279 	}
1280 
1281 	immu = list_head(&immu_list);
1282 	for (; immu; immu = list_next(&immu_list, immu)) {
1283 
1284 		mutex_enter(&(immu->immu_lock));
1285 
1286 		/* if immu was not quiesced, i.e was not running before */
1287 		if (immu->immu_regs_quiesced == B_FALSE) {
1288 			mutex_exit(&(immu->immu_lock));
1289 			continue;
1290 		}
1291 
1292 		if (immu_regs_resume(immu) != DDI_SUCCESS) {
1293 			ret = DDI_FAILURE;
1294 			mutex_exit(&(immu->immu_lock));
1295 			continue;
1296 		}
1297 
1298 		/* flush caches before unquiesce */
1299 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1300 		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1301 		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1302 		rw_exit(&(immu->immu_ctx_rwlock));
1303 
1304 		/*
1305 		 * Set IOMMU unit's regs to do
1306 		 * the actual startup. This will
1307 		 * set immu->immu_regs_running  field
1308 		 * if the unit is successfully
1309 		 * started
1310 		 */
1311 		immu_regs_startup(immu);
1312 
1313 		if (immu->immu_regs_running == B_FALSE) {
1314 			ret = DDI_FAILURE;
1315 		} else {
1316 			immu_quiesced = B_TRUE;
1317 			immu_running = B_TRUE;
1318 			immu->immu_regs_quiesced = B_FALSE;
1319 		}
1320 
1321 		mutex_exit(&(immu->immu_lock));
1322 	}
1323 
1324 	mutex_exit(&immu_lock);
1325 
1326 	return (ret);
1327 }
1328 
1329 void
1330 immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync)
1331 {
1332 	caddr_t vaddr;
1333 	uint64_t paddr;
1334 
1335 	iwp->iwp_sync = sync;
1336 
1337 	vaddr = (caddr_t)&iwp->iwp_vstatus;
1338 	paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
1339 	paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
1340 
1341 	iwp->iwp_pstatus = paddr;
1342 	iwp->iwp_name = name;
1343 }
1344 
1345 /* ##############  END Intel IOMMU entry points ################## */
1346