xref: /titanic_44/usr/src/uts/i86pc/io/immu.c (revision 82629e3015252bf18319ba3815c773df23e21436)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Portions Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2009, Intel Corporation.
27  * All rights reserved.
28  */
29 
30 /*
31  * Intel IOMMU implementation
32  * This file contains Intel IOMMU code exported
33  * to the rest of the system and code that deals
34  * with the Intel IOMMU as a whole.
35  */
36 
37 #include <sys/conf.h>
38 #include <sys/modctl.h>
39 #include <sys/pci.h>
40 #include <sys/pci_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ddi.h>
43 #include <sys/ddidmareq.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ddifm.h>
46 #include <sys/sunndi.h>
47 #include <sys/debug.h>
48 #include <sys/fm/protocol.h>
49 #include <sys/note.h>
50 #include <sys/apic.h>
51 #include <vm/hat_i86.h>
52 #include <sys/smp_impldefs.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <sys/x86_archext.h>
56 #include <sys/rootnex.h>
57 #include <sys/avl.h>
58 #include <sys/bootconf.h>
59 #include <sys/bootinfo.h>
60 #include <sys/atomic.h>
61 #include <sys/immu.h>
62 /* ########################### Globals and tunables ######################## */
63 /*
64  * Global switches (boolean) that can be toggled either via boot options
65  * or via /etc/system or kmdb
66  */
67 
68 /* Various features */
69 boolean_t immu_enable = B_TRUE;
70 boolean_t immu_dvma_enable = B_TRUE;
71 
72 /* accessed in other files so not static */
73 boolean_t immu_gfxdvma_enable = B_TRUE;
74 boolean_t immu_intrmap_enable = B_FALSE;
75 boolean_t immu_qinv_enable = B_FALSE;
76 
77 /* various quirks that need working around */
78 
79 /* XXX We always map page 0 read/write for now */
80 boolean_t immu_quirk_usbpage0 = B_TRUE;
81 boolean_t immu_quirk_usbrmrr = B_TRUE;
82 boolean_t immu_quirk_usbfullpa;
83 boolean_t immu_quirk_mobile4;
84 
85 /* debug messages */
86 boolean_t immu_dmar_print;
87 
88 /* Tunables */
89 int64_t immu_flush_gran = 5;
90 
91 /* ############  END OPTIONS section ################ */
92 
93 /*
94  * Global used internally by Intel IOMMU code
95  */
96 dev_info_t *root_devinfo;
97 kmutex_t immu_lock;
98 list_t immu_list;
99 void *immu_pgtable_cache;
100 boolean_t immu_setup;
101 boolean_t immu_running;
102 boolean_t immu_quiesced;
103 
104 /* ######################## END Globals and tunables ###################### */
105 /* Globals used only in this file */
106 static char **black_array;
107 static uint_t nblacks;
108 /* ###################### Utility routines ############################# */
109 
110 /*
111  * Check if the device has mobile 4 chipset
112  */
113 static int
114 check_mobile4(dev_info_t *dip, void *arg)
115 {
116 	_NOTE(ARGUNUSED(arg));
117 	int vendor, device;
118 	int *ip = (int *)arg;
119 
120 	ASSERT(arg);
121 
122 	vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
123 	    "vendor-id", -1);
124 	device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
125 	    "device-id", -1);
126 
127 	if (vendor == 0x8086 && device == 0x2a40) {
128 		*ip = B_TRUE;
129 		ddi_err(DER_NOTE, dip, "IMMU: Mobile 4 chipset detected. "
130 		    "Force setting IOMMU write buffer");
131 		return (DDI_WALK_TERMINATE);
132 	} else {
133 		return (DDI_WALK_CONTINUE);
134 	}
135 }
136 
137 static void
138 map_bios_rsvd_mem(dev_info_t *dip)
139 {
140 	struct memlist *mp;
141 	int e;
142 
143 	memlist_read_lock();
144 
145 	mp = bios_rsvd;
146 	while (mp != NULL) {
147 		memrng_t mrng = {0};
148 
149 		ddi_err(DER_LOG, dip, "IMMU: Mapping BIOS rsvd range "
150 		    "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
151 		    mp->ml_address + mp->ml_size);
152 
153 		mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
154 		mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
155 
156 		e = immu_dvma_map(NULL, NULL, &mrng, 0, dip, IMMU_FLAGS_MEMRNG);
157 		ASSERT(e == DDI_DMA_MAPPED || e == DDI_DMA_USE_PHYSICAL);
158 
159 		mp = mp->ml_next;
160 	}
161 
162 	memlist_read_unlock();
163 }
164 
165 
166 /*
167  * Check if the driver requests physical mapping
168  */
169 /*ARGSUSED*/
170 static void
171 check_physical(dev_info_t *dip, void *arg)
172 {
173 	char *val;
174 
175 	/*
176 	 * Check for the DVMA unity mapping property on the device
177 	 */
178 	val = NULL;
179 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
180 	    DDI_PROP_DONTPASS, DDI_DVMA_MAPTYPE_PROP, &val) == DDI_SUCCESS) {
181 		ASSERT(val);
182 		if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) != 0) {
183 			ddi_err(DER_WARN, dip, "%s value \"%s\" is not valid",
184 			    DDI_DVMA_MAPTYPE_PROP, val);
185 		} else {
186 			int e;
187 
188 			ddi_err(DER_NOTE, dip,
189 			    "Using unity DVMA mapping for device");
190 			e = immu_dvma_map(NULL, NULL, NULL, 0, dip,
191 			    IMMU_FLAGS_UNITY);
192 			/* for unity mode, map will return USE_PHYSICAL */
193 			ASSERT(e == DDI_DMA_USE_PHYSICAL);
194 		}
195 		ddi_prop_free(val);
196 	}
197 }
198 
199 /*
200  * Check if the device is USB controller
201  */
202 /*ARGSUSED*/
203 static void
204 check_usb(dev_info_t *dip, void *arg)
205 {
206 	const char *drv = ddi_driver_name(dip);
207 
208 	if (drv == NULL ||
209 	    (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
210 	    strcmp(drv, "ehci") != 0)) {
211 		return;
212 	}
213 
214 	/* This must come first since it does unity mapping */
215 	if (immu_quirk_usbfullpa == B_TRUE) {
216 		int e;
217 		ddi_err(DER_NOTE, dip, "Applying USB FULL PA quirk");
218 		e = immu_dvma_map(NULL, NULL, NULL, 0, dip, IMMU_FLAGS_UNITY);
219 		/* for unity mode, map will return USE_PHYSICAL */
220 		ASSERT(e == DDI_DMA_USE_PHYSICAL);
221 	}
222 
223 	if (immu_quirk_usbrmrr == B_TRUE) {
224 		ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
225 		map_bios_rsvd_mem(dip);
226 	}
227 }
228 
229 /*
230  * Check if the device is a LPC device
231  */
232 /*ARGSUSED*/
233 static void
234 check_lpc(dev_info_t *dip, void *arg)
235 {
236 	immu_devi_t *immu_devi;
237 
238 	immu_devi = immu_devi_get(dip);
239 	ASSERT(immu_devi);
240 	if (immu_devi->imd_lpc == B_TRUE) {
241 		ddi_err(DER_LOG, dip, "IMMU: Found LPC device");
242 		/* This will put the immu_devi on the LPC "specials" list */
243 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
244 	}
245 }
246 
247 /*
248  * Check if the device is a GFX device
249  */
250 /*ARGSUSED*/
251 static void
252 check_gfx(dev_info_t *dip, void *arg)
253 {
254 	immu_devi_t *immu_devi;
255 	int e;
256 
257 	immu_devi = immu_devi_get(dip);
258 	ASSERT(immu_devi);
259 	if (immu_devi->imd_display == B_TRUE) {
260 		ddi_err(DER_LOG, dip, "IMMU: Found GFX device");
261 		/* This will put the immu_devi on the GFX "specials" list */
262 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
263 		e = immu_dvma_map(NULL, NULL, NULL, 0, dip, IMMU_FLAGS_UNITY);
264 		/* for unity mode, map will return USE_PHYSICAL */
265 		ASSERT(e == DDI_DMA_USE_PHYSICAL);
266 	}
267 }
268 
269 static void
270 walk_tree(int (*f)(dev_info_t *, void *), void *arg)
271 {
272 	int count;
273 
274 	ndi_devi_enter(root_devinfo, &count);
275 	ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
276 	ndi_devi_exit(root_devinfo, count);
277 }
278 
279 static int
280 check_pre_setup_quirks(dev_info_t *dip, void *arg)
281 {
282 	/* just 1 check right now */
283 	return (check_mobile4(dip, arg));
284 }
285 
286 static int
287 check_pre_startup_quirks(dev_info_t *dip, void *arg)
288 {
289 	if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
290 		ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
291 	}
292 
293 	check_gfx(dip, arg);
294 
295 	check_lpc(dip, arg);
296 
297 	check_usb(dip, arg);
298 
299 	check_physical(dip, arg);
300 
301 	return (DDI_WALK_CONTINUE);
302 }
303 
304 static void
305 pre_setup_quirks(void)
306 {
307 	walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
308 }
309 
310 static void
311 pre_startup_quirks(void)
312 {
313 	walk_tree(check_pre_startup_quirks, NULL);
314 
315 	immu_dmar_rmrr_map();
316 }
317 
318 /*
319  * get_conf_opt()
320  * 	get a rootnex.conf setting  (always a boolean)
321  */
322 static void
323 get_conf_opt(char *bopt, boolean_t *kvar)
324 {
325 	char *val = NULL;
326 
327 	ASSERT(bopt);
328 	ASSERT(kvar);
329 
330 	/*
331 	 * Check the rootnex.conf property
332 	 * Fake up a dev_t since searching the global
333 	 * property list needs it
334 	 */
335 	if (ddi_prop_lookup_string(makedevice(ddi_name_to_major("rootnex"), 0),
336 	    root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
337 	    bopt, &val) != DDI_PROP_SUCCESS) {
338 		return;
339 	}
340 
341 	ASSERT(val);
342 	if (strcmp(val, "true") == 0) {
343 		*kvar = B_TRUE;
344 	} else if (strcmp(val, "false") == 0) {
345 		*kvar = B_FALSE;
346 	} else {
347 		ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
348 		    "is not set to true or false. Ignoring option.",
349 		    bopt, val);
350 	}
351 	ddi_prop_free(val);
352 }
353 
354 /*
355  * get_bootopt()
356  * 	check a boot option  (always a boolean)
357  */
358 static void
359 get_bootopt(char *bopt, boolean_t *kvar)
360 {
361 	char *val = NULL;
362 
363 	ASSERT(bopt);
364 	ASSERT(kvar);
365 
366 	/*
367 	 * All boot options set at the GRUB menu become
368 	 * properties on the rootnex.
369 	 */
370 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
371 	    DDI_PROP_DONTPASS, bopt, &val) == DDI_SUCCESS) {
372 		ASSERT(val);
373 		if (strcmp(val, "true") == 0) {
374 			*kvar = B_TRUE;
375 		} else if (strcmp(val, "false") == 0) {
376 			*kvar = B_FALSE;
377 		} else {
378 			ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
379 			    "is not set to true or false. Ignoring option.",
380 			    bopt, val);
381 		}
382 		ddi_prop_free(val);
383 	}
384 }
385 
386 static void
387 get_conf_tunables(char *bopt, int64_t *ivar)
388 {
389 	int64_t	*iarray;
390 	uint_t n;
391 
392 	/*
393 	 * Check the rootnex.conf property
394 	 * Fake up a dev_t since searching the global
395 	 * property list needs it
396 	 */
397 	if (ddi_prop_lookup_int64_array(
398 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
399 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
400 	    &iarray, &n) != DDI_PROP_SUCCESS) {
401 		return;
402 	}
403 
404 	if (n != 1) {
405 		ddi_err(DER_WARN, NULL, "More than one value specified for "
406 		    "%s property. Ignoring and using default",
407 		    "immu-flush-gran");
408 		ddi_prop_free(iarray);
409 		return;
410 	}
411 
412 	if (iarray[0] < 0) {
413 		ddi_err(DER_WARN, NULL, "Negative value specified for "
414 		    "%s property. Inoring and Using default value",
415 		    "immu-flush-gran");
416 		ddi_prop_free(iarray);
417 		return;
418 	}
419 
420 	*ivar = iarray[0];
421 
422 	ddi_prop_free(iarray);
423 }
424 
425 static void
426 read_conf_options(void)
427 {
428 	/* enable/disable options */
429 	get_conf_opt("immu-enable", &immu_enable);
430 	get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
431 	get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
432 	get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
433 	get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
434 
435 	/* workaround switches */
436 	get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
437 	get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
438 	get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
439 
440 	/* debug printing */
441 	get_conf_opt("immu-dmar-print", &immu_dmar_print);
442 
443 	/* get tunables */
444 	get_conf_tunables("immu-flush-gran", &immu_flush_gran);
445 }
446 
447 static void
448 read_boot_options(void)
449 {
450 	/* enable/disable options */
451 	get_bootopt("immu-enable", &immu_enable);
452 	get_bootopt("immu-dvma-enable", &immu_dvma_enable);
453 	get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
454 	get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
455 	get_bootopt("immu-qinv-enable", &immu_qinv_enable);
456 
457 	/* workaround switches */
458 	get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
459 	get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
460 	get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
461 
462 	/* debug printing */
463 	get_bootopt("immu-dmar-print", &immu_dmar_print);
464 }
465 
466 /*
467  * Note, this will not catch hardware not enumerated
468  * in early boot
469  */
470 static boolean_t
471 blacklisted_driver(void)
472 {
473 	char **strptr;
474 	int i;
475 	major_t maj;
476 
477 	ASSERT((black_array == NULL) ^ (nblacks != 0));
478 
479 	/* need at least 2 strings */
480 	if (nblacks < 2) {
481 		return (B_FALSE);
482 	}
483 
484 	for (i = 0; nblacks - i > 1; i++) {
485 		strptr = &black_array[i];
486 		if (strcmp(*strptr++, "DRIVER") == 0) {
487 			if ((maj = ddi_name_to_major(*strptr++))
488 			    != DDI_MAJOR_T_NONE) {
489 				/* is there hardware bound to this drvr */
490 				if (devnamesp[maj].dn_head != NULL) {
491 					return (B_TRUE);
492 				}
493 			}
494 			i += 1;   /* for loop adds 1, so add only 1 here */
495 		}
496 	}
497 
498 	return (B_FALSE);
499 }
500 
501 static boolean_t
502 blacklisted_smbios(void)
503 {
504 	id_t smid;
505 	smbios_hdl_t *smhdl;
506 	smbios_info_t sminf;
507 	smbios_system_t smsys;
508 	char *mfg, *product, *version;
509 	char **strptr;
510 	int i;
511 
512 	ASSERT((black_array == NULL) ^ (nblacks != 0));
513 
514 	/* need at least 4 strings for this setting */
515 	if (nblacks < 4) {
516 		return (B_FALSE);
517 	}
518 
519 	smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
520 	if (smhdl == NULL ||
521 	    (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
522 	    smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
523 		return (B_FALSE);
524 	}
525 
526 	mfg = (char *)sminf.smbi_manufacturer;
527 	product = (char *)sminf.smbi_product;
528 	version = (char *)sminf.smbi_version;
529 
530 	ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
531 	ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
532 	ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
533 	ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
534 
535 	for (i = 0; nblacks - i > 3; i++) {
536 		strptr = &black_array[i];
537 		if (strcmp(*strptr++, "SMBIOS") == 0) {
538 			if (strcmp(*strptr++, mfg) == 0 &&
539 			    ((char *)strptr == '\0' ||
540 			    strcmp(*strptr++, product) == 0) &&
541 			    ((char *)strptr == '\0' ||
542 			    strcmp(*strptr++, version) == 0)) {
543 				return (B_TRUE);
544 			}
545 			i += 3;
546 		}
547 	}
548 
549 	return (B_FALSE);
550 }
551 
552 static boolean_t
553 blacklisted_acpi(void)
554 {
555 	ASSERT((black_array == NULL) ^ (nblacks != 0));
556 	if (nblacks == 0) {
557 		return (B_FALSE);
558 	}
559 
560 	return (immu_dmar_blacklisted(black_array, nblacks));
561 }
562 
563 /*
564  * Check if system is blacklisted by Intel IOMMU driver
565  * i.e. should Intel IOMMU be disabled on this system
566  * Currently a system can be blacklistd based on the
567  * following bases:
568  *
569  * 1. DMAR ACPI table information.
570  *    This information includes things like
571  *    manufacturer and revision number. If rootnex.conf
572  *    has matching info set in its blacklist property
573  *    then Intel IOMMu will be disabled
574  *
575  * 2. SMBIOS information
576  *
577  * 3. Driver installed - useful if a particular
578  *    driver or hardware is toxic if Intel IOMMU
579  *    is turned on.
580  */
581 
582 static void
583 blacklist_setup(void)
584 {
585 	char **string_array;
586 	uint_t nstrings;
587 
588 	/*
589 	 * Check the rootnex.conf blacklist property.
590 	 * Fake up a dev_t since searching the global
591 	 * property list needs it
592 	 */
593 	if (ddi_prop_lookup_string_array(
594 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
595 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
596 	    &string_array, &nstrings) != DDI_PROP_SUCCESS) {
597 		return;
598 	}
599 
600 	/* smallest blacklist criteria works with multiples of 2 */
601 	if (nstrings % 2 != 0) {
602 		ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
603 		    "rootnex.conf: number of strings must be a "
604 		    "multiple of 2");
605 		ddi_prop_free(string_array);
606 		return;
607 	}
608 
609 	black_array = string_array;
610 	nblacks = nstrings;
611 }
612 
613 static void
614 blacklist_destroy(void)
615 {
616 	if (black_array) {
617 		ddi_prop_free(black_array);
618 		black_array = NULL;
619 		nblacks = 0;
620 	}
621 
622 	ASSERT(black_array == NULL);
623 	ASSERT(nblacks == 0);
624 }
625 
626 
627 /*
628  * Now set all the fields in the order they are defined
629  * We do this only as a defensive-coding practice, it is
630  * not a correctness issue.
631  */
632 static void *
633 immu_state_alloc(int seg, void *dmar_unit)
634 {
635 	immu_t *immu;
636 
637 	dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
638 	if (dmar_unit == NULL) {
639 		/* No more IOMMUs in this segment */
640 		return (NULL);
641 	}
642 
643 	immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
644 
645 	mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
646 
647 	mutex_enter(&(immu->immu_lock));
648 
649 	immu->immu_dmar_unit = dmar_unit;
650 	immu->immu_name = ddi_strdup(immu_dmar_unit_name(dmar_unit),
651 	    KM_SLEEP);
652 	immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
653 
654 	/*
655 	 * the immu_intr_lock mutex is grabbed by the IOMMU
656 	 * unit's interrupt handler so we need to use an
657 	 * interrupt cookie for the mutex
658 	 */
659 	mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
660 	    (void *)ipltospl(IMMU_INTR_IPL));
661 
662 	/* IOMMU regs related */
663 	mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
664 	cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
665 	immu->immu_regs_busy = B_FALSE;
666 
667 	/* DVMA related */
668 	immu->immu_dvma_coherent = B_FALSE;
669 
670 	/* DVMA context related */
671 	rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
672 
673 	/* DVMA domain related */
674 	list_create(&(immu->immu_domain_list), sizeof (domain_t),
675 	    offsetof(domain_t, dom_immu_node));
676 
677 	/* DVMA special device lists */
678 	immu->immu_dvma_gfx_only = B_FALSE;
679 	list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
680 	    offsetof(immu_devi_t, imd_spc_node));
681 	list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
682 	    offsetof(immu_devi_t, imd_spc_node));
683 
684 	/* interrupt remapping related */
685 	mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
686 
687 	/* qinv related */
688 	mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
689 
690 	/*
691 	 * insert this immu unit into the system-wide list
692 	 */
693 	list_insert_tail(&immu_list, immu);
694 
695 	mutex_exit(&(immu->immu_lock));
696 
697 	ddi_err(DER_LOG, immu->immu_dip, "IMMU: unit setup");
698 
699 	immu_dmar_set_immu(dmar_unit, immu);
700 
701 	return (dmar_unit);
702 }
703 
704 static void
705 immu_subsystems_setup(void)
706 {
707 	int seg;
708 	void *unit_hdl;
709 
710 	ddi_err(DER_VERB, NULL,
711 	    "Creating state structures for Intel IOMMU units\n");
712 
713 	ASSERT(immu_setup == B_FALSE);
714 	ASSERT(immu_running == B_FALSE);
715 
716 	mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
717 	list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
718 
719 	mutex_enter(&immu_lock);
720 
721 	ASSERT(immu_pgtable_cache == NULL);
722 
723 	immu_pgtable_cache = kmem_cache_create("immu_pgtable_cache",
724 	    sizeof (pgtable_t), 0,
725 	    pgtable_ctor, pgtable_dtor, NULL, NULL, NULL, 0);
726 
727 	unit_hdl = NULL;
728 	for (seg = 0; seg < IMMU_MAXSEG; seg++) {
729 		while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
730 			;
731 		}
732 	}
733 
734 	immu_regs_setup(&immu_list);	/* subsequent code needs this first */
735 	immu_dvma_setup(&immu_list);
736 	immu_intrmap_setup(&immu_list);
737 	immu_qinv_setup(&immu_list);
738 
739 	mutex_exit(&immu_lock);
740 }
741 
742 /*
743  * immu_subsystems_startup()
744  * 	startup all units that were setup
745  */
746 static void
747 immu_subsystems_startup(void)
748 {
749 	immu_t *immu;
750 
751 	mutex_enter(&immu_lock);
752 
753 	ASSERT(immu_setup == B_TRUE);
754 	ASSERT(immu_running == B_FALSE);
755 
756 	immu_dmar_startup();
757 
758 	immu = list_head(&immu_list);
759 	for (; immu; immu = list_next(&immu_list, immu)) {
760 
761 		mutex_enter(&(immu->immu_lock));
762 
763 		immu_intr_register(immu);
764 		immu_dvma_startup(immu);
765 		immu_intrmap_startup(immu);
766 		immu_qinv_startup(immu);
767 
768 		/*
769 		 * Set IOMMU unit's regs to do
770 		 * the actual startup. This will
771 		 * set immu->immu_running  field
772 		 * if the unit is successfully
773 		 * started
774 		 */
775 		immu_regs_startup(immu);
776 
777 		mutex_exit(&(immu->immu_lock));
778 	}
779 
780 	mutex_exit(&immu_lock);
781 }
782 
783 /* ##################  Intel IOMMU internal interfaces ###################### */
784 
785 /*
786  * Internal interfaces for IOMMU code (i.e. not exported to rootnex
787  * or rest of system)
788  */
789 
790 /*
791  * ddip can be NULL, in which case we walk up until we find the root dip
792  * NOTE: We never visit the root dip since its not a hardware node
793  */
794 int
795 immu_walk_ancestor(
796 	dev_info_t *rdip,
797 	dev_info_t *ddip,
798 	int (*func)(dev_info_t *, void *arg),
799 	void *arg,
800 	int *lvlp,
801 	immu_flags_t immu_flags)
802 {
803 	dev_info_t *pdip;
804 	int level;
805 	int error = DDI_SUCCESS;
806 
807 	ASSERT(root_devinfo);
808 	ASSERT(rdip);
809 	ASSERT(rdip != root_devinfo);
810 	ASSERT(func);
811 
812 	/* ddip and immu can be NULL */
813 
814 	/* Hold rdip so that branch is not detached */
815 	ndi_hold_devi(rdip);
816 	for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
817 	    pdip = ddi_get_parent(pdip), level++) {
818 
819 		if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
820 			error = DDI_FAILURE;
821 			break;
822 		}
823 		if (func(pdip, arg) == DDI_WALK_TERMINATE) {
824 			break;
825 		}
826 		if (immu_flags & IMMU_FLAGS_DONTPASS) {
827 			break;
828 		}
829 		if (pdip == ddip) {
830 			break;
831 		}
832 	}
833 
834 	ndi_rele_devi(rdip);
835 
836 	if (lvlp)
837 		*lvlp = level;
838 
839 	return (error);
840 }
841 
842 /* ########################  Intel IOMMU entry points ####################### */
843 /*
844  * immu_init()
845  *	called from rootnex_attach(). setup but don't startup the Intel IOMMU
846  *      This is the first function called in Intel IOMMU code
847  */
848 void
849 immu_init(void)
850 {
851 	char *phony_reg = "A thing of beauty is a joy forever";
852 
853 	/* Set some global shorthands that are needed by all of IOMMU code */
854 	ASSERT(root_devinfo == NULL);
855 	root_devinfo = ddi_root_node();
856 
857 	/*
858 	 * Intel IOMMU only supported only if MMU(CPU) page size is ==
859 	 * IOMMU pages size.
860 	 */
861 	/*LINTED*/
862 	if (MMU_PAGESIZE != IMMU_PAGESIZE) {
863 		ddi_err(DER_WARN, NULL,
864 		    "MMU page size (%d) is not equal to\n"
865 		    "IOMMU page size (%d). "
866 		    "Disabling Intel IOMMU. ",
867 		    MMU_PAGESIZE, IMMU_PAGESIZE);
868 		immu_enable = B_FALSE;
869 		return;
870 	}
871 
872 	/*
873 	 * Read rootnex.conf options. Do this before
874 	 * boot options so boot options can override .conf options.
875 	 */
876 	read_conf_options();
877 
878 	/*
879 	 * retrieve the Intel IOMMU boot options.
880 	 * Do this before parsing immu ACPI table
881 	 * as a boot option could potentially affect
882 	 * ACPI parsing.
883 	 */
884 	ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
885 	read_boot_options();
886 
887 	/*
888 	 * Check the IOMMU enable boot-option first.
889 	 * This is so that we can skip parsing the ACPI table
890 	 * if necessary because that may cause problems in
891 	 * systems with buggy BIOS or ACPI tables
892 	 */
893 	if (immu_enable == B_FALSE) {
894 		return;
895 	}
896 
897 	/*
898 	 * Next, check if the system even has an Intel IOMMU
899 	 * We use the presence or absence of the IOMMU ACPI
900 	 * table to detect Intel IOMMU.
901 	 */
902 	if (immu_dmar_setup() != DDI_SUCCESS) {
903 		immu_enable = B_FALSE;
904 		return;
905 	}
906 
907 	/*
908 	 * Check blacklists
909 	 */
910 	blacklist_setup();
911 
912 	if (blacklisted_smbios() == B_TRUE) {
913 		blacklist_destroy();
914 		immu_enable = B_FALSE;
915 		return;
916 	}
917 
918 	if (blacklisted_driver() == B_TRUE) {
919 		blacklist_destroy();
920 		immu_enable = B_FALSE;
921 		return;
922 	}
923 
924 	/*
925 	 * Read the "raw" DMAR ACPI table to get information
926 	 * and convert into a form we can use.
927 	 */
928 	if (immu_dmar_parse() != DDI_SUCCESS) {
929 		blacklist_destroy();
930 		immu_enable = B_FALSE;
931 		return;
932 	}
933 
934 	/*
935 	 * now that we have processed the ACPI table
936 	 * check if we need to blacklist this system
937 	 * based on ACPI info
938 	 */
939 	if (blacklisted_acpi() == B_TRUE) {
940 		immu_dmar_destroy();
941 		blacklist_destroy();
942 		immu_enable = B_FALSE;
943 		return;
944 	}
945 
946 	blacklist_destroy();
947 
948 	/*
949 	 * Check if system has HW quirks.
950 	 */
951 	pre_setup_quirks();
952 
953 	/* Now do the rest of the setup */
954 	immu_subsystems_setup();
955 
956 	/*
957 	 * Now that the IMMU is setup, create a phony
958 	 * reg prop so that suspend/resume works
959 	 */
960 	if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
961 	    (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
962 		ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
963 		    "rootnex node");
964 		/*NOTREACHED*/
965 	}
966 
967 	immu_setup = B_TRUE;
968 }
969 
970 /*
971  * immu_startup()
972  * 	called directly by boot code to startup
973  *      all units of the IOMMU
974  */
975 void
976 immu_startup(void)
977 {
978 	/*
979 	 * If IOMMU is disabled, do nothing
980 	 */
981 	if (immu_enable == B_FALSE) {
982 		return;
983 	}
984 
985 	if (immu_setup == B_FALSE) {
986 		ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
987 		    "skipping IOMU startup");
988 		return;
989 	}
990 
991 	pre_startup_quirks();
992 
993 	ddi_err(DER_CONT, NULL,
994 	    "?Starting Intel IOMMU (dmar) units...\n");
995 
996 	immu_subsystems_startup();
997 
998 	immu_running = B_TRUE;
999 }
1000 
1001 /*
1002  * immu_map_sgl()
1003  * 	called from rootnex_coredma_bindhdl() when Intel
1004  *	IOMMU is enabled to build DVMA cookies and map them.
1005  */
1006 int
1007 immu_map_sgl(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
1008     int prealloc_count, dev_info_t *rdip)
1009 {
1010 	if (immu_running == B_FALSE) {
1011 		return (DDI_DMA_USE_PHYSICAL);
1012 	}
1013 
1014 	return (immu_dvma_map(hp, dmareq, NULL, prealloc_count, rdip,
1015 	    IMMU_FLAGS_DMAHDL));
1016 }
1017 
1018 /*
1019  * immu_unmap_sgl()
1020  * 	called from rootnex_coredma_unbindhdl(), to unmap DVMA
1021  * 	cookies and free them
1022  */
1023 int
1024 immu_unmap_sgl(ddi_dma_impl_t *hp, dev_info_t *rdip)
1025 {
1026 	if (immu_running == B_FALSE) {
1027 		return (DDI_DMA_USE_PHYSICAL);
1028 	}
1029 
1030 	return (immu_dvma_unmap(hp, rdip));
1031 }
1032 
1033 /*
1034  * Hook to notify IOMMU code of device tree changes
1035  */
1036 void
1037 immu_device_tree_changed(void)
1038 {
1039 	if (immu_setup == B_FALSE) {
1040 		return;
1041 	}
1042 
1043 	ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1044 	    "does not use device tree updates");
1045 }
1046 
1047 /*
1048  * Hook to notify IOMMU code of memory changes
1049  */
1050 void
1051 immu_physmem_update(uint64_t addr, uint64_t size)
1052 {
1053 	if (immu_setup == B_FALSE) {
1054 		return;
1055 	}
1056 	immu_dvma_physmem_update(addr, size);
1057 }
1058 
1059 /*
1060  * immu_quiesce()
1061  * 	quiesce all units that are running
1062  */
1063 int
1064 immu_quiesce(void)
1065 {
1066 	immu_t *immu;
1067 	int ret = DDI_SUCCESS;
1068 
1069 	mutex_enter(&immu_lock);
1070 
1071 	if (immu_running == B_FALSE)
1072 		return (DDI_SUCCESS);
1073 
1074 	ASSERT(immu_setup == B_TRUE);
1075 
1076 	immu = list_head(&immu_list);
1077 	for (; immu; immu = list_next(&immu_list, immu)) {
1078 
1079 		/* if immu is not running, we dont quiesce */
1080 		if (immu->immu_regs_running == B_FALSE)
1081 			continue;
1082 
1083 		/* flush caches */
1084 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1085 		immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL);
1086 		rw_exit(&(immu->immu_ctx_rwlock));
1087 		immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL);
1088 		immu_regs_wbf_flush(immu);
1089 
1090 		mutex_enter(&(immu->immu_lock));
1091 
1092 		/*
1093 		 * Set IOMMU unit's regs to do
1094 		 * the actual shutdown.
1095 		 */
1096 		immu_regs_shutdown(immu);
1097 		immu_regs_suspend(immu);
1098 
1099 		/* if immu is still running, we failed */
1100 		if (immu->immu_regs_running == B_TRUE)
1101 			ret = DDI_FAILURE;
1102 		else
1103 			immu->immu_regs_quiesced = B_TRUE;
1104 
1105 		mutex_exit(&(immu->immu_lock));
1106 	}
1107 	mutex_exit(&immu_lock);
1108 
1109 	if (ret == DDI_SUCCESS) {
1110 		immu_running = B_FALSE;
1111 		immu_quiesced = B_TRUE;
1112 	}
1113 
1114 	return (ret);
1115 }
1116 
1117 /*
1118  * immu_unquiesce()
1119  * 	unquiesce all units
1120  */
1121 int
1122 immu_unquiesce(void)
1123 {
1124 	immu_t *immu;
1125 	int ret = DDI_SUCCESS;
1126 
1127 	mutex_enter(&immu_lock);
1128 
1129 	if (immu_quiesced == B_FALSE)
1130 		return (DDI_SUCCESS);
1131 
1132 	ASSERT(immu_setup == B_TRUE);
1133 	ASSERT(immu_running == B_FALSE);
1134 
1135 	immu = list_head(&immu_list);
1136 	for (; immu; immu = list_next(&immu_list, immu)) {
1137 
1138 		mutex_enter(&(immu->immu_lock));
1139 
1140 		/* if immu was not quiesced, i.e was not running before */
1141 		if (immu->immu_regs_quiesced == B_FALSE) {
1142 			mutex_exit(&(immu->immu_lock));
1143 			continue;
1144 		}
1145 
1146 		if (immu_regs_resume(immu) != DDI_SUCCESS) {
1147 			ret = DDI_FAILURE;
1148 			mutex_exit(&(immu->immu_lock));
1149 			continue;
1150 		}
1151 
1152 		/* flush caches before unquiesce */
1153 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1154 		immu_regs_context_flush(immu, 0, 0, 0, CONTEXT_GLOBAL);
1155 		rw_exit(&(immu->immu_ctx_rwlock));
1156 		immu_regs_iotlb_flush(immu, 0, 0, 0, 0, IOTLB_GLOBAL);
1157 
1158 		/*
1159 		 * Set IOMMU unit's regs to do
1160 		 * the actual startup. This will
1161 		 * set immu->immu_regs_running  field
1162 		 * if the unit is successfully
1163 		 * started
1164 		 */
1165 		immu_regs_startup(immu);
1166 
1167 		if (immu->immu_regs_running == B_FALSE) {
1168 			ret = DDI_FAILURE;
1169 		} else {
1170 			immu_quiesced = B_TRUE;
1171 			immu_running = B_TRUE;
1172 			immu->immu_regs_quiesced = B_FALSE;
1173 		}
1174 
1175 		mutex_exit(&(immu->immu_lock));
1176 	}
1177 
1178 	mutex_exit(&immu_lock);
1179 
1180 	return (ret);
1181 }
1182 
1183 /* ##############  END Intel IOMMU entry points ################## */
1184