xref: /titanic_50/usr/src/uts/i86pc/io/immu.c (revision a307732568c3d861c38b0342ae32434226d10e94)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23  * All rights reserved.
24  */
25 /*
26  * Copyright (c) 2009, Intel Corporation.
27  * All rights reserved.
28  */
29 
30 /*
31  * Intel IOMMU implementation
32  * This file contains Intel IOMMU code exported
33  * to the rest of the system and code that deals
34  * with the Intel IOMMU as a whole.
35  */
36 
37 #include <sys/conf.h>
38 #include <sys/modctl.h>
39 #include <sys/pci.h>
40 #include <sys/pci_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ddi.h>
43 #include <sys/ddidmareq.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ddifm.h>
46 #include <sys/sunndi.h>
47 #include <sys/debug.h>
48 #include <sys/fm/protocol.h>
49 #include <sys/note.h>
50 #include <sys/apic.h>
51 #include <vm/hat_i86.h>
52 #include <sys/smp_impldefs.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <sys/x86_archext.h>
56 #include <sys/rootnex.h>
57 #include <sys/avl.h>
58 #include <sys/bootconf.h>
59 #include <sys/bootinfo.h>
60 #include <sys/atomic.h>
61 #include <sys/immu.h>
62 /* ########################### Globals and tunables ######################## */
63 /*
64  * Global switches (boolean) that can be toggled either via boot options
65  * or via /etc/system or kmdb
66  */
67 
68 /* Various features */
69 boolean_t immu_enable = B_TRUE;
70 boolean_t immu_dvma_enable = B_TRUE;
71 
72 /* accessed in other files so not static */
73 boolean_t immu_gfxdvma_enable = B_TRUE;
74 boolean_t immu_intrmap_enable = B_FALSE;
75 boolean_t immu_qinv_enable = B_FALSE;
76 
77 /* various quirks that need working around */
78 
79 /* XXX We always map page 0 read/write for now */
80 boolean_t immu_quirk_usbpage0 = B_TRUE;
81 boolean_t immu_quirk_usbrmrr = B_TRUE;
82 boolean_t immu_quirk_usbfullpa;
83 boolean_t immu_quirk_mobile4;
84 
85 /* debug messages */
86 boolean_t immu_dmar_print;
87 
88 /* Tunables */
89 int64_t immu_flush_gran = 5;
90 
91 immu_flags_t immu_global_dvma_flags;
92 
93 /* ############  END OPTIONS section ################ */
94 
95 /*
96  * Global used internally by Intel IOMMU code
97  */
98 dev_info_t *root_devinfo;
99 kmutex_t immu_lock;
100 list_t immu_list;
101 void *immu_pgtable_cache;
102 boolean_t immu_setup;
103 boolean_t immu_running;
104 boolean_t immu_quiesced;
105 
106 /* ######################## END Globals and tunables ###################### */
107 /* Globals used only in this file */
108 static char **black_array;
109 static uint_t nblacks;
110 
111 static char **unity_driver_array;
112 static uint_t nunity;
113 static char **xlate_driver_array;
114 static uint_t nxlate;
115 /* ###################### Utility routines ############################# */
116 
117 /*
118  * Check if the device has mobile 4 chipset
119  */
120 static int
121 check_mobile4(dev_info_t *dip, void *arg)
122 {
123 	_NOTE(ARGUNUSED(arg));
124 	int vendor, device;
125 	int *ip = (int *)arg;
126 
127 	ASSERT(arg);
128 
129 	vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
130 	    "vendor-id", -1);
131 	device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
132 	    "device-id", -1);
133 
134 	if (vendor == 0x8086 && device == 0x2a40) {
135 		*ip = B_TRUE;
136 		ddi_err(DER_NOTE, dip, "IMMU: Mobile 4 chipset detected. "
137 		    "Force setting IOMMU write buffer");
138 		return (DDI_WALK_TERMINATE);
139 	} else {
140 		return (DDI_WALK_CONTINUE);
141 	}
142 }
143 
144 static void
145 map_bios_rsvd_mem(dev_info_t *dip)
146 {
147 	struct memlist *mp;
148 	int e;
149 
150 	memlist_read_lock();
151 
152 	mp = bios_rsvd;
153 	while (mp != NULL) {
154 		memrng_t mrng = {0};
155 
156 		ddi_err(DER_LOG, dip, "IMMU: Mapping BIOS rsvd range "
157 		    "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
158 		    mp->ml_address + mp->ml_size);
159 
160 		mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
161 		mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
162 
163 		e = immu_dvma_map(NULL, NULL, &mrng, 0, dip, IMMU_FLAGS_MEMRNG);
164 		ASSERT(e == DDI_DMA_MAPPED || e == DDI_DMA_USE_PHYSICAL);
165 
166 		mp = mp->ml_next;
167 	}
168 
169 	memlist_read_unlock();
170 }
171 
172 
173 /*
174  * Check if the driver requests a specific type of mapping.
175  */
176 /*ARGSUSED*/
177 static void
178 check_conf(dev_info_t *dip, void *arg)
179 {
180 	immu_devi_t *immu_devi;
181 	const char *dname;
182 	uint_t i;
183 	int hasprop = 0;
184 
185 	/*
186 	 * Only PCI devices can use an IOMMU. Legacy ISA devices
187 	 * are handled in check_lpc.
188 	 */
189 	if (!DEVI_IS_PCI(dip))
190 		return;
191 
192 	dname = ddi_driver_name(dip);
193 	if (dname == NULL)
194 		return;
195 	immu_devi = immu_devi_get(dip);
196 
197 	for (i = 0; i < nunity; i++) {
198 		if (strcmp(unity_driver_array[i], dname) == 0) {
199 			hasprop = 1;
200 			immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
201 		}
202 	}
203 
204 	for (i = 0; i < nxlate; i++) {
205 		if (strcmp(xlate_driver_array[i], dname) == 0) {
206 			hasprop = 1;
207 			immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
208 		}
209 	}
210 
211 	/*
212 	 * Report if we changed the value from the default.
213 	 */
214 	if (hasprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
215 		ddi_err(DER_LOG, dip, "using %s DVMA mapping",
216 		    immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
217 		    DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
218 }
219 
220 /*
221  * Check if the device is USB controller
222  */
223 /*ARGSUSED*/
224 static void
225 check_usb(dev_info_t *dip, void *arg)
226 {
227 	const char *drv = ddi_driver_name(dip);
228 	immu_devi_t *immu_devi;
229 
230 
231 	if (drv == NULL ||
232 	    (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
233 	    strcmp(drv, "ehci") != 0)) {
234 		return;
235 	}
236 
237 	immu_devi = immu_devi_get(dip);
238 
239 	/*
240 	 * If unit mappings are already specified, globally or
241 	 * locally, we're done here, since that covers both
242 	 * quirks below.
243 	 */
244 	if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY)
245 		return;
246 
247 	/* This must come first since it does unity mapping */
248 	if (immu_quirk_usbfullpa == B_TRUE) {
249 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
250 	} else if (immu_quirk_usbrmrr == B_TRUE) {
251 		ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
252 		map_bios_rsvd_mem(dip);
253 	}
254 }
255 
256 /*
257  * Check if the device is a LPC device
258  */
259 /*ARGSUSED*/
260 static void
261 check_lpc(dev_info_t *dip, void *arg)
262 {
263 	immu_devi_t *immu_devi;
264 
265 	immu_devi = immu_devi_get(dip);
266 	ASSERT(immu_devi);
267 	if (immu_devi->imd_lpc == B_TRUE) {
268 		ddi_err(DER_LOG, dip, "IMMU: Found LPC device");
269 		/* This will put the immu_devi on the LPC "specials" list */
270 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
271 	}
272 }
273 
274 /*
275  * Check if the device is a GFX device
276  */
277 /*ARGSUSED*/
278 static void
279 check_gfx(dev_info_t *dip, void *arg)
280 {
281 	immu_devi_t *immu_devi;
282 
283 	immu_devi = immu_devi_get(dip);
284 	ASSERT(immu_devi);
285 	if (immu_devi->imd_display == B_TRUE) {
286 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
287 		ddi_err(DER_LOG, dip, "IMMU: Found GFX device");
288 		/* This will put the immu_devi on the GFX "specials" list */
289 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
290 	}
291 }
292 
293 static void
294 walk_tree(int (*f)(dev_info_t *, void *), void *arg)
295 {
296 	int count;
297 
298 	ndi_devi_enter(root_devinfo, &count);
299 	ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
300 	ndi_devi_exit(root_devinfo, count);
301 }
302 
303 static int
304 check_pre_setup_quirks(dev_info_t *dip, void *arg)
305 {
306 	/* just 1 check right now */
307 	return (check_mobile4(dip, arg));
308 }
309 
310 static int
311 check_pre_startup_quirks(dev_info_t *dip, void *arg)
312 {
313 	if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
314 		ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
315 	}
316 
317 	check_gfx(dip, arg);
318 
319 	check_lpc(dip, arg);
320 
321 	check_conf(dip, arg);
322 
323 	check_usb(dip, arg);
324 
325 	return (DDI_WALK_CONTINUE);
326 }
327 
328 static void
329 pre_setup_quirks(void)
330 {
331 	walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
332 }
333 
334 static void
335 pre_startup_quirks(void)
336 {
337 	walk_tree(check_pre_startup_quirks, NULL);
338 
339 	immu_dmar_rmrr_map();
340 }
341 
342 static int
343 get_conf_str(char *bopt, char **val)
344 {
345 	int ret;
346 
347 	/*
348 	 * Check the rootnex.conf property
349 	 * Fake up a dev_t since searching the global
350 	 * property list needs it
351 	 */
352 	ret = ddi_prop_lookup_string(
353 	    makedevice(ddi_name_to_major("rootnex"), 0),
354 	    root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
355 	    bopt, val);
356 
357 	return (ret);
358 }
359 
360 /*
361  * get_conf_opt()
362  * 	get a rootnex.conf setting  (always a boolean)
363  */
364 static void
365 get_conf_opt(char *bopt, boolean_t *kvar)
366 {
367 	char *val = NULL;
368 
369 	ASSERT(bopt);
370 	ASSERT(kvar);
371 
372 	/*
373 	 * Check the rootnex.conf property
374 	 * Fake up a dev_t since searching the global
375 	 * property list needs it
376 	 */
377 
378 	if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS)
379 		return;
380 
381 	if (strcmp(val, "true") == 0) {
382 		*kvar = B_TRUE;
383 	} else if (strcmp(val, "false") == 0) {
384 		*kvar = B_FALSE;
385 	} else {
386 		ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
387 		    "is not set to true or false. Ignoring option.",
388 		    bopt, val);
389 	}
390 	ddi_prop_free(val);
391 }
392 
393 /*
394  * get_bootopt()
395  * 	check a boot option  (always a boolean)
396  */
397 static int
398 get_boot_str(char *bopt, char **val)
399 {
400 	int ret;
401 
402 	ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
403 	    DDI_PROP_DONTPASS, bopt, val);
404 
405 	return (ret);
406 }
407 
408 static void
409 get_bootopt(char *bopt, boolean_t *kvar)
410 {
411 	char *val = NULL;
412 
413 	/*
414 	 * All boot options set at the GRUB menu become
415 	 * properties on the rootnex.
416 	 */
417 	if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS)
418 		return;
419 
420 	if (strcmp(val, "true") == 0) {
421 		*kvar = B_TRUE;
422 	} else if (strcmp(val, "false") == 0) {
423 		*kvar = B_FALSE;
424 	} else {
425 		ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
426 		    "is not set to true or false. Ignoring option.",
427 		    bopt, val);
428 	}
429 	ddi_prop_free(val);
430 }
431 
432 static void
433 get_boot_dvma_mode(void)
434 {
435 	char *val = NULL;
436 
437 	if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
438 	    != DDI_PROP_SUCCESS)
439 		return;
440 
441 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
442 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
443 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
444 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
445 	} else {
446 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s",
447 		    val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
448 	}
449 	ddi_prop_free(val);
450 }
451 
452 static void
453 get_conf_dvma_mode(void)
454 {
455 	char *val = NULL;
456 
457 	if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
458 	    != DDI_PROP_SUCCESS)
459 		return;
460 
461 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
462 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
463 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
464 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
465 	} else {
466 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex "
467 		    "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
468 	}
469 	ddi_prop_free(val);
470 }
471 
472 
473 static void
474 get_conf_tunables(char *bopt, int64_t *ivar)
475 {
476 	int64_t	*iarray;
477 	uint_t n;
478 
479 	/*
480 	 * Check the rootnex.conf property
481 	 * Fake up a dev_t since searching the global
482 	 * property list needs it
483 	 */
484 	if (ddi_prop_lookup_int64_array(
485 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
486 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
487 	    &iarray, &n) != DDI_PROP_SUCCESS) {
488 		return;
489 	}
490 
491 	if (n != 1) {
492 		ddi_err(DER_WARN, NULL, "More than one value specified for "
493 		    "%s property. Ignoring and using default",
494 		    "immu-flush-gran");
495 		ddi_prop_free(iarray);
496 		return;
497 	}
498 
499 	if (iarray[0] < 0) {
500 		ddi_err(DER_WARN, NULL, "Negative value specified for "
501 		    "%s property. Inoring and Using default value",
502 		    "immu-flush-gran");
503 		ddi_prop_free(iarray);
504 		return;
505 	}
506 
507 	*ivar = iarray[0];
508 
509 	ddi_prop_free(iarray);
510 }
511 
512 static void
513 read_conf_options(void)
514 {
515 	/* enable/disable options */
516 	get_conf_opt("immu-enable", &immu_enable);
517 	get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
518 	get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
519 	get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
520 	get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
521 
522 	/* workaround switches */
523 	get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
524 	get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
525 	get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
526 
527 	/* debug printing */
528 	get_conf_opt("immu-dmar-print", &immu_dmar_print);
529 
530 	/* get tunables */
531 	get_conf_tunables("immu-flush-gran", &immu_flush_gran);
532 
533 	get_conf_dvma_mode();
534 }
535 
536 static void
537 read_boot_options(void)
538 {
539 	/* enable/disable options */
540 	get_bootopt("immu-enable", &immu_enable);
541 	get_bootopt("immu-dvma-enable", &immu_dvma_enable);
542 	get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
543 	get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
544 	get_bootopt("immu-qinv-enable", &immu_qinv_enable);
545 
546 	/* workaround switches */
547 	get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
548 	get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
549 	get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
550 
551 	/* debug printing */
552 	get_bootopt("immu-dmar-print", &immu_dmar_print);
553 
554 	get_boot_dvma_mode();
555 }
556 
557 static void
558 mapping_list_setup(void)
559 {
560 	char **string_array;
561 	uint_t nstrings;
562 
563 	if (ddi_prop_lookup_string_array(
564 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
565 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
566 	    "immu-dvma-unity-drivers",
567 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
568 		unity_driver_array = string_array;
569 		nunity = nstrings;
570 	}
571 
572 	if (ddi_prop_lookup_string_array(
573 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
574 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
575 	    "immu-dvma-xlate-drivers",
576 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
577 		xlate_driver_array = string_array;
578 		nxlate = nstrings;
579 	}
580 }
581 
582 /*
583  * Note, this will not catch hardware not enumerated
584  * in early boot
585  */
586 static boolean_t
587 blacklisted_driver(void)
588 {
589 	char **strptr;
590 	int i;
591 	major_t maj;
592 
593 	ASSERT((black_array == NULL) ^ (nblacks != 0));
594 
595 	/* need at least 2 strings */
596 	if (nblacks < 2) {
597 		return (B_FALSE);
598 	}
599 
600 	for (i = 0; nblacks - i > 1; i++) {
601 		strptr = &black_array[i];
602 		if (strcmp(*strptr++, "DRIVER") == 0) {
603 			if ((maj = ddi_name_to_major(*strptr++))
604 			    != DDI_MAJOR_T_NONE) {
605 				/* is there hardware bound to this drvr */
606 				if (devnamesp[maj].dn_head != NULL) {
607 					return (B_TRUE);
608 				}
609 			}
610 			i += 1;   /* for loop adds 1, so add only 1 here */
611 		}
612 	}
613 
614 	return (B_FALSE);
615 }
616 
617 static boolean_t
618 blacklisted_smbios(void)
619 {
620 	id_t smid;
621 	smbios_hdl_t *smhdl;
622 	smbios_info_t sminf;
623 	smbios_system_t smsys;
624 	char *mfg, *product, *version;
625 	char **strptr;
626 	int i;
627 
628 	ASSERT((black_array == NULL) ^ (nblacks != 0));
629 
630 	/* need at least 4 strings for this setting */
631 	if (nblacks < 4) {
632 		return (B_FALSE);
633 	}
634 
635 	smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
636 	if (smhdl == NULL ||
637 	    (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
638 	    smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
639 		return (B_FALSE);
640 	}
641 
642 	mfg = (char *)sminf.smbi_manufacturer;
643 	product = (char *)sminf.smbi_product;
644 	version = (char *)sminf.smbi_version;
645 
646 	ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
647 	ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
648 	ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
649 	ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
650 
651 	for (i = 0; nblacks - i > 3; i++) {
652 		strptr = &black_array[i];
653 		if (strcmp(*strptr++, "SMBIOS") == 0) {
654 			if (strcmp(*strptr++, mfg) == 0 &&
655 			    ((char *)strptr == '\0' ||
656 			    strcmp(*strptr++, product) == 0) &&
657 			    ((char *)strptr == '\0' ||
658 			    strcmp(*strptr++, version) == 0)) {
659 				return (B_TRUE);
660 			}
661 			i += 3;
662 		}
663 	}
664 
665 	return (B_FALSE);
666 }
667 
668 static boolean_t
669 blacklisted_acpi(void)
670 {
671 	ASSERT((black_array == NULL) ^ (nblacks != 0));
672 	if (nblacks == 0) {
673 		return (B_FALSE);
674 	}
675 
676 	return (immu_dmar_blacklisted(black_array, nblacks));
677 }
678 
679 /*
680  * Check if system is blacklisted by Intel IOMMU driver
681  * i.e. should Intel IOMMU be disabled on this system
682  * Currently a system can be blacklistd based on the
683  * following bases:
684  *
685  * 1. DMAR ACPI table information.
686  *    This information includes things like
687  *    manufacturer and revision number. If rootnex.conf
688  *    has matching info set in its blacklist property
689  *    then Intel IOMMu will be disabled
690  *
691  * 2. SMBIOS information
692  *
693  * 3. Driver installed - useful if a particular
694  *    driver or hardware is toxic if Intel IOMMU
695  *    is turned on.
696  */
697 
698 static void
699 blacklist_setup(void)
700 {
701 	char **string_array;
702 	uint_t nstrings;
703 
704 	/*
705 	 * Check the rootnex.conf blacklist property.
706 	 * Fake up a dev_t since searching the global
707 	 * property list needs it
708 	 */
709 	if (ddi_prop_lookup_string_array(
710 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
711 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
712 	    &string_array, &nstrings) != DDI_PROP_SUCCESS) {
713 		return;
714 	}
715 
716 	/* smallest blacklist criteria works with multiples of 2 */
717 	if (nstrings % 2 != 0) {
718 		ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
719 		    "rootnex.conf: number of strings must be a "
720 		    "multiple of 2");
721 		ddi_prop_free(string_array);
722 		return;
723 	}
724 
725 	black_array = string_array;
726 	nblacks = nstrings;
727 }
728 
729 static void
730 blacklist_destroy(void)
731 {
732 	if (black_array) {
733 		ddi_prop_free(black_array);
734 		black_array = NULL;
735 		nblacks = 0;
736 	}
737 
738 	ASSERT(black_array == NULL);
739 	ASSERT(nblacks == 0);
740 }
741 
742 
743 /*
744  * Now set all the fields in the order they are defined
745  * We do this only as a defensive-coding practice, it is
746  * not a correctness issue.
747  */
748 static void *
749 immu_state_alloc(int seg, void *dmar_unit)
750 {
751 	immu_t *immu;
752 
753 	dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
754 	if (dmar_unit == NULL) {
755 		/* No more IOMMUs in this segment */
756 		return (NULL);
757 	}
758 
759 	immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
760 
761 	mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
762 
763 	mutex_enter(&(immu->immu_lock));
764 
765 	immu->immu_dmar_unit = dmar_unit;
766 	immu->immu_name = ddi_strdup(immu_dmar_unit_name(dmar_unit),
767 	    KM_SLEEP);
768 	immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
769 
770 	/*
771 	 * the immu_intr_lock mutex is grabbed by the IOMMU
772 	 * unit's interrupt handler so we need to use an
773 	 * interrupt cookie for the mutex
774 	 */
775 	mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
776 	    (void *)ipltospl(IMMU_INTR_IPL));
777 
778 	/* IOMMU regs related */
779 	mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
780 	cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
781 	immu->immu_regs_busy = B_FALSE;
782 
783 	/* DVMA related */
784 	immu->immu_dvma_coherent = B_FALSE;
785 
786 	/* DVMA context related */
787 	rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
788 
789 	/* DVMA domain related */
790 	list_create(&(immu->immu_domain_list), sizeof (domain_t),
791 	    offsetof(domain_t, dom_immu_node));
792 
793 	/* DVMA special device lists */
794 	immu->immu_dvma_gfx_only = B_FALSE;
795 	list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
796 	    offsetof(immu_devi_t, imd_spc_node));
797 	list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
798 	    offsetof(immu_devi_t, imd_spc_node));
799 
800 	/* interrupt remapping related */
801 	mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
802 
803 	/* qinv related */
804 	mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
805 
806 	/*
807 	 * insert this immu unit into the system-wide list
808 	 */
809 	list_insert_tail(&immu_list, immu);
810 
811 	mutex_exit(&(immu->immu_lock));
812 
813 	ddi_err(DER_LOG, immu->immu_dip, "IMMU: unit setup");
814 
815 	immu_dmar_set_immu(dmar_unit, immu);
816 
817 	return (dmar_unit);
818 }
819 
820 static void
821 immu_subsystems_setup(void)
822 {
823 	int seg;
824 	void *unit_hdl;
825 
826 	ddi_err(DER_VERB, NULL,
827 	    "Creating state structures for Intel IOMMU units\n");
828 
829 	ASSERT(immu_setup == B_FALSE);
830 	ASSERT(immu_running == B_FALSE);
831 
832 	mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
833 	list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
834 
835 	mutex_enter(&immu_lock);
836 
837 	ASSERT(immu_pgtable_cache == NULL);
838 
839 	immu_pgtable_cache = kmem_cache_create("immu_pgtable_cache",
840 	    sizeof (pgtable_t), 0,
841 	    pgtable_ctor, pgtable_dtor, NULL, NULL, NULL, 0);
842 
843 	unit_hdl = NULL;
844 	for (seg = 0; seg < IMMU_MAXSEG; seg++) {
845 		while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
846 			;
847 		}
848 	}
849 
850 	immu_regs_setup(&immu_list);	/* subsequent code needs this first */
851 	immu_dvma_setup(&immu_list);
852 	if (immu_qinv_setup(&immu_list) == DDI_SUCCESS)
853 		immu_intrmap_setup(&immu_list);
854 	else
855 		immu_intrmap_enable = B_FALSE;
856 
857 	mutex_exit(&immu_lock);
858 }
859 
860 /*
861  * immu_subsystems_startup()
862  * 	startup all units that were setup
863  */
864 static void
865 immu_subsystems_startup(void)
866 {
867 	immu_t *immu;
868 
869 	mutex_enter(&immu_lock);
870 
871 	ASSERT(immu_setup == B_TRUE);
872 	ASSERT(immu_running == B_FALSE);
873 
874 	immu_dmar_startup();
875 
876 	immu = list_head(&immu_list);
877 	for (; immu; immu = list_next(&immu_list, immu)) {
878 
879 		mutex_enter(&(immu->immu_lock));
880 
881 		immu_intr_register(immu);
882 		immu_dvma_startup(immu);
883 		immu_intrmap_startup(immu);
884 		immu_qinv_startup(immu);
885 
886 		/*
887 		 * Set IOMMU unit's regs to do
888 		 * the actual startup. This will
889 		 * set immu->immu_running  field
890 		 * if the unit is successfully
891 		 * started
892 		 */
893 		immu_regs_startup(immu);
894 
895 		mutex_exit(&(immu->immu_lock));
896 	}
897 
898 	mutex_exit(&immu_lock);
899 }
900 
901 /* ##################  Intel IOMMU internal interfaces ###################### */
902 
903 /*
904  * Internal interfaces for IOMMU code (i.e. not exported to rootnex
905  * or rest of system)
906  */
907 
908 /*
909  * ddip can be NULL, in which case we walk up until we find the root dip
910  * NOTE: We never visit the root dip since its not a hardware node
911  */
912 int
913 immu_walk_ancestor(
914 	dev_info_t *rdip,
915 	dev_info_t *ddip,
916 	int (*func)(dev_info_t *, void *arg),
917 	void *arg,
918 	int *lvlp,
919 	immu_flags_t immu_flags)
920 {
921 	dev_info_t *pdip;
922 	int level;
923 	int error = DDI_SUCCESS;
924 
925 	ASSERT(root_devinfo);
926 	ASSERT(rdip);
927 	ASSERT(rdip != root_devinfo);
928 	ASSERT(func);
929 
930 	/* ddip and immu can be NULL */
931 
932 	/* Hold rdip so that branch is not detached */
933 	ndi_hold_devi(rdip);
934 	for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
935 	    pdip = ddi_get_parent(pdip), level++) {
936 
937 		if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
938 			error = DDI_FAILURE;
939 			break;
940 		}
941 		if (func(pdip, arg) == DDI_WALK_TERMINATE) {
942 			break;
943 		}
944 		if (immu_flags & IMMU_FLAGS_DONTPASS) {
945 			break;
946 		}
947 		if (pdip == ddip) {
948 			break;
949 		}
950 	}
951 
952 	ndi_rele_devi(rdip);
953 
954 	if (lvlp)
955 		*lvlp = level;
956 
957 	return (error);
958 }
959 
960 /* ########################  Intel IOMMU entry points ####################### */
961 /*
962  * immu_init()
963  *	called from rootnex_attach(). setup but don't startup the Intel IOMMU
964  *      This is the first function called in Intel IOMMU code
965  */
966 void
967 immu_init(void)
968 {
969 	char *phony_reg = "A thing of beauty is a joy forever";
970 
971 	/* Set some global shorthands that are needed by all of IOMMU code */
972 	ASSERT(root_devinfo == NULL);
973 	root_devinfo = ddi_root_node();
974 
975 	/*
976 	 * Intel IOMMU only supported only if MMU(CPU) page size is ==
977 	 * IOMMU pages size.
978 	 */
979 	/*LINTED*/
980 	if (MMU_PAGESIZE != IMMU_PAGESIZE) {
981 		ddi_err(DER_WARN, NULL,
982 		    "MMU page size (%d) is not equal to\n"
983 		    "IOMMU page size (%d). "
984 		    "Disabling Intel IOMMU. ",
985 		    MMU_PAGESIZE, IMMU_PAGESIZE);
986 		immu_enable = B_FALSE;
987 		return;
988 	}
989 
990 	/*
991 	 * Read rootnex.conf options. Do this before
992 	 * boot options so boot options can override .conf options.
993 	 */
994 	read_conf_options();
995 
996 	/*
997 	 * retrieve the Intel IOMMU boot options.
998 	 * Do this before parsing immu ACPI table
999 	 * as a boot option could potentially affect
1000 	 * ACPI parsing.
1001 	 */
1002 	ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
1003 	read_boot_options();
1004 
1005 	/*
1006 	 * Check the IOMMU enable boot-option first.
1007 	 * This is so that we can skip parsing the ACPI table
1008 	 * if necessary because that may cause problems in
1009 	 * systems with buggy BIOS or ACPI tables
1010 	 */
1011 	if (immu_enable == B_FALSE) {
1012 		return;
1013 	}
1014 
1015 	if (immu_intrmap_enable == B_TRUE)
1016 		immu_qinv_enable = B_TRUE;
1017 
1018 	/*
1019 	 * Next, check if the system even has an Intel IOMMU
1020 	 * We use the presence or absence of the IOMMU ACPI
1021 	 * table to detect Intel IOMMU.
1022 	 */
1023 	if (immu_dmar_setup() != DDI_SUCCESS) {
1024 		immu_enable = B_FALSE;
1025 		return;
1026 	}
1027 
1028 	mapping_list_setup();
1029 
1030 	/*
1031 	 * Check blacklists
1032 	 */
1033 	blacklist_setup();
1034 
1035 	if (blacklisted_smbios() == B_TRUE) {
1036 		blacklist_destroy();
1037 		immu_enable = B_FALSE;
1038 		return;
1039 	}
1040 
1041 	if (blacklisted_driver() == B_TRUE) {
1042 		blacklist_destroy();
1043 		immu_enable = B_FALSE;
1044 		return;
1045 	}
1046 
1047 	/*
1048 	 * Read the "raw" DMAR ACPI table to get information
1049 	 * and convert into a form we can use.
1050 	 */
1051 	if (immu_dmar_parse() != DDI_SUCCESS) {
1052 		blacklist_destroy();
1053 		immu_enable = B_FALSE;
1054 		return;
1055 	}
1056 
1057 	/*
1058 	 * now that we have processed the ACPI table
1059 	 * check if we need to blacklist this system
1060 	 * based on ACPI info
1061 	 */
1062 	if (blacklisted_acpi() == B_TRUE) {
1063 		immu_dmar_destroy();
1064 		blacklist_destroy();
1065 		immu_enable = B_FALSE;
1066 		return;
1067 	}
1068 
1069 	blacklist_destroy();
1070 
1071 	/*
1072 	 * Check if system has HW quirks.
1073 	 */
1074 	pre_setup_quirks();
1075 
1076 	/* Now do the rest of the setup */
1077 	immu_subsystems_setup();
1078 
1079 	/*
1080 	 * Now that the IMMU is setup, create a phony
1081 	 * reg prop so that suspend/resume works
1082 	 */
1083 	if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
1084 	    (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
1085 		ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
1086 		    "rootnex node");
1087 		/*NOTREACHED*/
1088 	}
1089 
1090 	immu_setup = B_TRUE;
1091 }
1092 
1093 /*
1094  * immu_startup()
1095  * 	called directly by boot code to startup
1096  *      all units of the IOMMU
1097  */
1098 void
1099 immu_startup(void)
1100 {
1101 	/*
1102 	 * If IOMMU is disabled, do nothing
1103 	 */
1104 	if (immu_enable == B_FALSE) {
1105 		return;
1106 	}
1107 
1108 	if (immu_setup == B_FALSE) {
1109 		ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
1110 		    "skipping IOMU startup");
1111 		return;
1112 	}
1113 
1114 	pre_startup_quirks();
1115 
1116 	ddi_err(DER_CONT, NULL,
1117 	    "?Starting Intel IOMMU (dmar) units...\n");
1118 
1119 	immu_subsystems_startup();
1120 
1121 	immu_running = B_TRUE;
1122 }
1123 
1124 /*
1125  * immu_map_sgl()
1126  * 	called from rootnex_coredma_bindhdl() when Intel
1127  *	IOMMU is enabled to build DVMA cookies and map them.
1128  */
1129 int
1130 immu_map_sgl(ddi_dma_impl_t *hp, struct ddi_dma_req *dmareq,
1131     int prealloc_count, dev_info_t *rdip)
1132 {
1133 	if (immu_running == B_FALSE) {
1134 		return (DDI_DMA_USE_PHYSICAL);
1135 	}
1136 
1137 	return (immu_dvma_map(hp, dmareq, NULL, prealloc_count, rdip,
1138 	    IMMU_FLAGS_DMAHDL));
1139 }
1140 
1141 /*
1142  * immu_unmap_sgl()
1143  * 	called from rootnex_coredma_unbindhdl(), to unmap DVMA
1144  * 	cookies and free them
1145  */
1146 int
1147 immu_unmap_sgl(ddi_dma_impl_t *hp, dev_info_t *rdip)
1148 {
1149 	if (immu_running == B_FALSE) {
1150 		return (DDI_DMA_USE_PHYSICAL);
1151 	}
1152 
1153 	return (immu_dvma_unmap(hp, rdip));
1154 }
1155 
1156 /*
1157  * Hook to notify IOMMU code of device tree changes
1158  */
1159 void
1160 immu_device_tree_changed(void)
1161 {
1162 	if (immu_setup == B_FALSE) {
1163 		return;
1164 	}
1165 
1166 	ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1167 	    "does not use device tree updates");
1168 }
1169 
1170 /*
1171  * Hook to notify IOMMU code of memory changes
1172  */
1173 void
1174 immu_physmem_update(uint64_t addr, uint64_t size)
1175 {
1176 	if (immu_setup == B_FALSE) {
1177 		return;
1178 	}
1179 	immu_dvma_physmem_update(addr, size);
1180 }
1181 
1182 /*
1183  * immu_quiesce()
1184  * 	quiesce all units that are running
1185  */
1186 int
1187 immu_quiesce(void)
1188 {
1189 	immu_t *immu;
1190 	int ret = DDI_SUCCESS;
1191 
1192 	mutex_enter(&immu_lock);
1193 
1194 	if (immu_running == B_FALSE)
1195 		return (DDI_SUCCESS);
1196 
1197 	ASSERT(immu_setup == B_TRUE);
1198 
1199 	immu = list_head(&immu_list);
1200 	for (; immu; immu = list_next(&immu_list, immu)) {
1201 
1202 		/* if immu is not running, we dont quiesce */
1203 		if (immu->immu_regs_running == B_FALSE)
1204 			continue;
1205 
1206 		/* flush caches */
1207 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1208 		immu_flush_context_gbl(immu);
1209 		rw_exit(&(immu->immu_ctx_rwlock));
1210 		immu_flush_iotlb_gbl(immu);
1211 		immu_regs_wbf_flush(immu);
1212 
1213 		mutex_enter(&(immu->immu_lock));
1214 
1215 		/*
1216 		 * Set IOMMU unit's regs to do
1217 		 * the actual shutdown.
1218 		 */
1219 		immu_regs_shutdown(immu);
1220 		immu_regs_suspend(immu);
1221 
1222 		/* if immu is still running, we failed */
1223 		if (immu->immu_regs_running == B_TRUE)
1224 			ret = DDI_FAILURE;
1225 		else
1226 			immu->immu_regs_quiesced = B_TRUE;
1227 
1228 		mutex_exit(&(immu->immu_lock));
1229 	}
1230 	mutex_exit(&immu_lock);
1231 
1232 	if (ret == DDI_SUCCESS) {
1233 		immu_running = B_FALSE;
1234 		immu_quiesced = B_TRUE;
1235 	}
1236 
1237 	return (ret);
1238 }
1239 
1240 /*
1241  * immu_unquiesce()
1242  * 	unquiesce all units
1243  */
1244 int
1245 immu_unquiesce(void)
1246 {
1247 	immu_t *immu;
1248 	int ret = DDI_SUCCESS;
1249 
1250 	mutex_enter(&immu_lock);
1251 
1252 	if (immu_quiesced == B_FALSE)
1253 		return (DDI_SUCCESS);
1254 
1255 	ASSERT(immu_setup == B_TRUE);
1256 	ASSERT(immu_running == B_FALSE);
1257 
1258 	immu = list_head(&immu_list);
1259 	for (; immu; immu = list_next(&immu_list, immu)) {
1260 
1261 		mutex_enter(&(immu->immu_lock));
1262 
1263 		/* if immu was not quiesced, i.e was not running before */
1264 		if (immu->immu_regs_quiesced == B_FALSE) {
1265 			mutex_exit(&(immu->immu_lock));
1266 			continue;
1267 		}
1268 
1269 		if (immu_regs_resume(immu) != DDI_SUCCESS) {
1270 			ret = DDI_FAILURE;
1271 			mutex_exit(&(immu->immu_lock));
1272 			continue;
1273 		}
1274 
1275 		/* flush caches before unquiesce */
1276 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1277 		immu_flush_context_gbl(immu);
1278 		rw_exit(&(immu->immu_ctx_rwlock));
1279 		immu_flush_iotlb_gbl(immu);
1280 
1281 		/*
1282 		 * Set IOMMU unit's regs to do
1283 		 * the actual startup. This will
1284 		 * set immu->immu_regs_running  field
1285 		 * if the unit is successfully
1286 		 * started
1287 		 */
1288 		immu_regs_startup(immu);
1289 
1290 		if (immu->immu_regs_running == B_FALSE) {
1291 			ret = DDI_FAILURE;
1292 		} else {
1293 			immu_quiesced = B_TRUE;
1294 			immu_running = B_TRUE;
1295 			immu->immu_regs_quiesced = B_FALSE;
1296 		}
1297 
1298 		mutex_exit(&(immu->immu_lock));
1299 	}
1300 
1301 	mutex_exit(&immu_lock);
1302 
1303 	return (ret);
1304 }
1305 
1306 /* ##############  END Intel IOMMU entry points ################## */
1307