xref: /illumos-gate/usr/src/uts/i86pc/io/immu_intrmap.c (revision b3783300013fa93b98278c901b855062f538f7e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2009, Intel Corporation.
28  * All rights reserved.
29  */
30 
31 
32 #include <sys/apic.h>
33 #include <vm/hat_i86.h>
34 #include <sys/sysmacros.h>
35 #include <sys/smp_impldefs.h>
36 #include <sys/immu.h>
37 
38 
39 typedef struct intrmap_private {
40 	immu_t		*ir_immu;
41 	immu_inv_wait_t	ir_inv_wait;
42 	uint16_t	ir_idx;
43 	uint32_t	ir_sid_svt_sq;
44 } intrmap_private_t;
45 
46 #define	INTRMAP_PRIVATE(intrmap) ((intrmap_private_t *)intrmap)
47 
48 /* interrupt remapping table entry */
49 typedef struct intrmap_rte {
50 	uint64_t	lo;
51 	uint64_t	hi;
52 } intrmap_rte_t;
53 
54 #define	IRTE_HIGH(sid_svt_sq) (sid_svt_sq)
55 #define	IRTE_LOW(dst, vector, dlm, tm, rh, dm, fpd, p)	\
56 	    (((uint64_t)(dst) << 32) |  \
57 	    ((uint64_t)(vector) << 16) | \
58 	    ((uint64_t)(dlm) << 5) | \
59 	    ((uint64_t)(tm) << 4) | \
60 	    ((uint64_t)(rh) << 3) | \
61 	    ((uint64_t)(dm) << 2) | \
62 	    ((uint64_t)(fpd) << 1) | \
63 	    (p))
64 
65 typedef enum {
66 	SVT_NO_VERIFY = 0,	/* no verification */
67 	SVT_ALL_VERIFY,		/* using sid and sq to verify */
68 	SVT_BUS_VERIFY,		/* verify #startbus and #endbus */
69 	SVT_RSVD
70 } intrmap_svt_t;
71 
72 typedef enum {
73 	SQ_VERIFY_ALL = 0,	/* verify all 16 bits */
74 	SQ_VERIFY_IGR_1,	/* ignore bit 3 */
75 	SQ_VERIFY_IGR_2,	/* ignore bit 2-3 */
76 	SQ_VERIFY_IGR_3		/* ignore bit 1-3 */
77 } intrmap_sq_t;
78 
79 /*
80  * S field of the Interrupt Remapping Table Address Register
81  * the size of the interrupt remapping table is 1 << (immu_intrmap_irta_s + 1)
82  */
83 static uint_t intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
84 
85 /*
86  * If true, arrange to suppress broadcast EOI by setting edge-triggered mode
87  * even for level-triggered interrupts in the interrupt-remapping engine.
88  * If false, broadcast EOI can still be suppressed if the CPU supports the
89  * APIC_SVR_SUPPRESS_BROADCAST_EOI bit.  In both cases, the IOAPIC is still
90  * programmed with the correct trigger mode, and pcplusmp must send an EOI
91  * to the IOAPIC by writing to the IOAPIC's EOI register to make up for the
92  * missing broadcast EOI.
93  */
94 static int intrmap_suppress_brdcst_eoi = 0;
95 
96 /*
97  * whether verify the source id of interrupt request
98  */
99 static int intrmap_enable_sid_verify = 0;
100 
101 /* fault types for DVMA remapping */
102 static char *immu_dvma_faults[] = {
103 	"Reserved",
104 	"The present field in root-entry is Clear",
105 	"The present field in context-entry is Clear",
106 	"Hardware detected invalid programming of a context-entry",
107 	"The DMA request attempted to access an address beyond max support",
108 	"The Write field in a page-table entry is Clear when DMA write",
109 	"The Read field in a page-table entry is Clear when DMA read",
110 	"Access the next level page table resulted in error",
111 	"Access the root-entry table resulted in error",
112 	"Access the context-entry table resulted in error",
113 	"Reserved field not initialized to zero in a present root-entry",
114 	"Reserved field not initialized to zero in a present context-entry",
115 	"Reserved field not initialized to zero in a present page-table entry",
116 	"DMA blocked due to the Translation Type field in context-entry",
117 	"Incorrect fault event reason number",
118 };
119 #define	DVMA_MAX_FAULTS (sizeof (immu_dvma_faults)/(sizeof (char *))) - 1
120 
121 /* fault types for interrupt remapping */
122 static char *immu_intrmap_faults[] = {
123 	"reserved field set in IRTE",
124 	"interrupt_index exceed the intr-remap table size",
125 	"present field in IRTE is clear",
126 	"hardware access intr-remap table address resulted in error",
127 	"reserved field set in IRTE, include various conditional",
128 	"hardware blocked an interrupt request in Compatibility format",
129 	"remappable interrupt request blocked due to verification failure"
130 };
131 #define	INTRMAP_MAX_FAULTS \
132 	(sizeof (immu_intrmap_faults) / (sizeof (char *))) - 1
133 
134 /* Function prototypes */
135 static int immu_intrmap_init(int apic_mode);
136 static void immu_intrmap_switchon(int suppress_brdcst_eoi);
137 static void immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
138     uint16_t type, int count, uchar_t ioapic_index);
139 static void immu_intrmap_map(void *intrmap_private, void *intrmap_data,
140     uint16_t type, int count);
141 static void immu_intrmap_free(void **intrmap_privatep);
142 static void immu_intrmap_rdt(void *intrmap_private, ioapic_rdt_t *irdt);
143 static void immu_intrmap_msi(void *intrmap_private, msi_regs_t *mregs);
144 
145 static struct apic_intrmap_ops intrmap_ops = {
146 	immu_intrmap_init,
147 	immu_intrmap_switchon,
148 	immu_intrmap_alloc,
149 	immu_intrmap_map,
150 	immu_intrmap_free,
151 	immu_intrmap_rdt,
152 	immu_intrmap_msi,
153 };
154 
155 /* apic mode, APIC/X2APIC */
156 static int intrmap_apic_mode = LOCAL_APIC;
157 
158 
159 /*
160  * helper functions
161  */
162 static uint_t
163 bitset_find_free(bitset_t *b, uint_t post)
164 {
165 	uint_t	i;
166 	uint_t	cap = bitset_capacity(b);
167 
168 	if (post == cap)
169 		post = 0;
170 
171 	ASSERT(post < cap);
172 
173 	for (i = post; i < cap; i++) {
174 		if (!bitset_in_set(b, i))
175 			return (i);
176 	}
177 
178 	for (i = 0; i < post; i++) {
179 		if (!bitset_in_set(b, i))
180 			return (i);
181 	}
182 
183 	return (INTRMAP_IDX_FULL);	/* no free index */
184 }
185 
186 /*
187  * helper function to find 'count' contigous free
188  * interrupt remapping table entries
189  */
190 static uint_t
191 bitset_find_multi_free(bitset_t *b, uint_t post, uint_t count)
192 {
193 	uint_t  i, j;
194 	uint_t	cap = bitset_capacity(b);
195 
196 	if (post == INTRMAP_IDX_FULL) {
197 		return (INTRMAP_IDX_FULL);
198 	}
199 
200 	if (count > cap)
201 		return (INTRMAP_IDX_FULL);
202 
203 	ASSERT(post < cap);
204 
205 	for (i = post; (i + count) <= cap; i++) {
206 		for (j = 0; j < count; j++) {
207 			if (bitset_in_set(b, (i + j))) {
208 				i = i + j;
209 				break;
210 			}
211 			if (j == count - 1)
212 				return (i);
213 		}
214 	}
215 
216 	for (i = 0; (i < post) && ((i + count) <= cap); i++) {
217 		for (j = 0; j < count; j++) {
218 			if (bitset_in_set(b, (i + j))) {
219 				i = i + j;
220 				break;
221 			}
222 			if (j == count - 1)
223 				return (i);
224 		}
225 	}
226 
227 	return (INTRMAP_IDX_FULL);		/* no free index */
228 }
229 
230 /* alloc one interrupt remapping table entry */
231 static int
232 alloc_tbl_entry(intrmap_t *intrmap)
233 {
234 	uint32_t idx;
235 
236 	for (;;) {
237 		mutex_enter(&intrmap->intrmap_lock);
238 		idx = intrmap->intrmap_free;
239 		if (idx != INTRMAP_IDX_FULL) {
240 			bitset_add(&intrmap->intrmap_map, idx);
241 			intrmap->intrmap_free =
242 			    bitset_find_free(&intrmap->intrmap_map, idx + 1);
243 			mutex_exit(&intrmap->intrmap_lock);
244 			break;
245 		}
246 
247 		/* no free intr entry, use compatible format intr */
248 		mutex_exit(&intrmap->intrmap_lock);
249 
250 		if (intrmap_apic_mode != LOCAL_X2APIC) {
251 			break;
252 		}
253 
254 		/*
255 		 * x2apic mode not allowed compatible
256 		 * interrupt
257 		 */
258 		delay(IMMU_ALLOC_RESOURCE_DELAY);
259 	}
260 
261 	return (idx);
262 }
263 
264 /* alloc 'cnt' contigous interrupt remapping table entries */
265 static int
266 alloc_tbl_multi_entries(intrmap_t *intrmap, uint_t cnt)
267 {
268 	uint_t idx, pos, i;
269 
270 	for (; ; ) {
271 		mutex_enter(&intrmap->intrmap_lock);
272 		pos = intrmap->intrmap_free;
273 		idx = bitset_find_multi_free(&intrmap->intrmap_map, pos, cnt);
274 
275 		if (idx != INTRMAP_IDX_FULL) {
276 			if (idx <= pos && pos < (idx + cnt)) {
277 				intrmap->intrmap_free = bitset_find_free(
278 				    &intrmap->intrmap_map, idx + cnt);
279 			}
280 			for (i = 0; i < cnt; i++) {
281 				bitset_add(&intrmap->intrmap_map, idx + i);
282 			}
283 			mutex_exit(&intrmap->intrmap_lock);
284 			break;
285 		}
286 
287 		mutex_exit(&intrmap->intrmap_lock);
288 
289 		if (intrmap_apic_mode != LOCAL_X2APIC) {
290 			break;
291 		}
292 
293 		/* x2apic mode not allowed comapitible interrupt */
294 		delay(IMMU_ALLOC_RESOURCE_DELAY);
295 	}
296 
297 	return (idx);
298 }
299 
300 /* init interrupt remapping table */
301 static int
302 init_unit(immu_t *immu)
303 {
304 	intrmap_t *intrmap;
305 	size_t size;
306 
307 	ddi_dma_attr_t intrmap_dma_attr = {
308 		DMA_ATTR_V0,
309 		0U,
310 		0xffffffffffffffffULL,
311 		0xffffffffU,
312 		MMU_PAGESIZE,	/* page aligned */
313 		0x1,
314 		0x1,
315 		0xffffffffU,
316 		0xffffffffffffffffULL,
317 		1,
318 		4,
319 		0
320 	};
321 
322 	ddi_device_acc_attr_t intrmap_acc_attr = {
323 		DDI_DEVICE_ATTR_V0,
324 		DDI_NEVERSWAP_ACC,
325 		DDI_STRICTORDER_ACC
326 	};
327 
328 	/*
329 	 * Using interrupt remapping implies using the queue
330 	 * invalidation interface. According to Intel,
331 	 * hardware that supports interrupt remapping should
332 	 * also support QI.
333 	 */
334 	ASSERT(IMMU_ECAP_GET_QI(immu->immu_regs_excap));
335 
336 	if (intrmap_apic_mode == LOCAL_X2APIC) {
337 		if (!IMMU_ECAP_GET_EIM(immu->immu_regs_excap)) {
338 			return (DDI_FAILURE);
339 		}
340 	}
341 
342 	if (intrmap_irta_s > INTRMAP_MAX_IRTA_SIZE) {
343 		intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
344 	}
345 
346 	intrmap =  kmem_zalloc(sizeof (intrmap_t), KM_SLEEP);
347 
348 	if (ddi_dma_alloc_handle(immu->immu_dip,
349 	    &intrmap_dma_attr,
350 	    DDI_DMA_SLEEP,
351 	    NULL,
352 	    &(intrmap->intrmap_dma_hdl)) != DDI_SUCCESS) {
353 		kmem_free(intrmap, sizeof (intrmap_t));
354 		return (DDI_FAILURE);
355 	}
356 
357 	intrmap->intrmap_size = 1 << (intrmap_irta_s + 1);
358 	size = intrmap->intrmap_size * INTRMAP_RTE_SIZE;
359 	if (ddi_dma_mem_alloc(intrmap->intrmap_dma_hdl,
360 	    size,
361 	    &intrmap_acc_attr,
362 	    DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED,
363 	    DDI_DMA_SLEEP,
364 	    NULL,
365 	    &(intrmap->intrmap_vaddr),
366 	    &size,
367 	    &(intrmap->intrmap_acc_hdl)) != DDI_SUCCESS) {
368 		ddi_dma_free_handle(&(intrmap->intrmap_dma_hdl));
369 		kmem_free(intrmap, sizeof (intrmap_t));
370 		return (DDI_FAILURE);
371 	}
372 
373 	ASSERT(!((uintptr_t)intrmap->intrmap_vaddr & MMU_PAGEOFFSET));
374 	bzero(intrmap->intrmap_vaddr, size);
375 	intrmap->intrmap_paddr = pfn_to_pa(
376 	    hat_getpfnum(kas.a_hat, intrmap->intrmap_vaddr));
377 
378 	mutex_init(&(intrmap->intrmap_lock), NULL, MUTEX_DRIVER, NULL);
379 	bitset_init(&intrmap->intrmap_map);
380 	bitset_resize(&intrmap->intrmap_map, intrmap->intrmap_size);
381 	intrmap->intrmap_free = 0;
382 
383 	immu->immu_intrmap = intrmap;
384 
385 	return (DDI_SUCCESS);
386 }
387 
388 static immu_t *
389 get_immu(dev_info_t *dip, uint16_t type, uchar_t ioapic_index)
390 {
391 	immu_t	*immu = NULL;
392 
393 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
394 		immu = immu_dmar_ioapic_immu(ioapic_index);
395 	} else {
396 		if (dip != NULL)
397 			immu = immu_dmar_get_immu(dip);
398 	}
399 
400 	return (immu);
401 }
402 
403 static int
404 get_top_pcibridge(dev_info_t *dip, void *arg)
405 {
406 	dev_info_t **topdipp = arg;
407 	immu_devi_t *immu_devi;
408 
409 	mutex_enter(&(DEVI(dip)->devi_lock));
410 	immu_devi = DEVI(dip)->devi_iommu;
411 	mutex_exit(&(DEVI(dip)->devi_lock));
412 
413 	if (immu_devi == NULL || immu_devi->imd_pcib_type == IMMU_PCIB_BAD ||
414 	    immu_devi->imd_pcib_type == IMMU_PCIB_ENDPOINT) {
415 		return (DDI_WALK_CONTINUE);
416 	}
417 
418 	*topdipp = dip;
419 
420 	return (DDI_WALK_CONTINUE);
421 }
422 
423 static dev_info_t *
424 intrmap_top_pcibridge(dev_info_t *rdip)
425 {
426 	dev_info_t *top_pcibridge = NULL;
427 
428 	if (immu_walk_ancestor(rdip, NULL, get_top_pcibridge,
429 	    &top_pcibridge, NULL, 0) != DDI_SUCCESS) {
430 		return (NULL);
431 	}
432 
433 	return (top_pcibridge);
434 }
435 
436 /* function to get interrupt request source id */
437 static uint32_t
438 get_sid(dev_info_t *dip, uint16_t type, uchar_t ioapic_index)
439 {
440 	dev_info_t	*pdip;
441 	immu_devi_t	*immu_devi;
442 	uint16_t	sid;
443 	uchar_t		svt, sq;
444 
445 	if (!intrmap_enable_sid_verify) {
446 		return (0);
447 	}
448 
449 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
450 		/* for interrupt through I/O APIC */
451 		sid = immu_dmar_ioapic_sid(ioapic_index);
452 		svt = SVT_ALL_VERIFY;
453 		sq = SQ_VERIFY_ALL;
454 	} else {
455 		/* MSI/MSI-X interrupt */
456 		ASSERT(dip);
457 		pdip = intrmap_top_pcibridge(dip);
458 		ASSERT(pdip);
459 		immu_devi = DEVI(pdip)->devi_iommu;
460 		ASSERT(immu_devi);
461 		if (immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCI) {
462 			/* device behind pcie to pci bridge */
463 			sid = (immu_devi->imd_bus << 8) | immu_devi->imd_sec;
464 			svt = SVT_BUS_VERIFY;
465 			sq = SQ_VERIFY_ALL;
466 		} else {
467 			/* pcie device or device behind pci to pci bridge */
468 			sid = (immu_devi->imd_bus << 8) |
469 			    immu_devi->imd_devfunc;
470 			svt = SVT_ALL_VERIFY;
471 			sq = SQ_VERIFY_ALL;
472 		}
473 	}
474 
475 	return (sid | (svt << 18) | (sq << 16));
476 }
477 
478 static void
479 intrmap_enable(immu_t *immu)
480 {
481 	intrmap_t *intrmap;
482 	uint64_t irta_reg;
483 
484 	intrmap = immu->immu_intrmap;
485 
486 	irta_reg = intrmap->intrmap_paddr | intrmap_irta_s;
487 	if (intrmap_apic_mode == LOCAL_X2APIC) {
488 		irta_reg |= (0x1 << 11);
489 	}
490 
491 	immu_regs_intrmap_enable(immu, irta_reg);
492 }
493 
494 /* ####################################################################### */
495 
496 /*
497  * immu_intr_handler()
498  *	the fault event handler for a single immu unit
499  */
500 uint_t
501 immu_intr_handler(caddr_t arg, caddr_t arg1 __unused)
502 {
503 	immu_t *immu = (immu_t *)arg;
504 	uint32_t status;
505 	int index, fault_reg_offset;
506 	int max_fault_index;
507 	boolean_t found_fault;
508 	dev_info_t *idip;
509 
510 	mutex_enter(&(immu->immu_intr_lock));
511 	mutex_enter(&(immu->immu_regs_lock));
512 
513 	/* read the fault status */
514 	status = immu_regs_get32(immu, IMMU_REG_FAULT_STS);
515 
516 	idip = immu->immu_dip;
517 	ASSERT(idip);
518 
519 	/* check if we have a pending fault for this immu unit */
520 	if ((status & IMMU_FAULT_STS_PPF) == 0) {
521 		mutex_exit(&(immu->immu_regs_lock));
522 		mutex_exit(&(immu->immu_intr_lock));
523 		return (DDI_INTR_UNCLAIMED);
524 	}
525 
526 	/*
527 	 * handle all primary pending faults
528 	 */
529 	index = IMMU_FAULT_GET_INDEX(status);
530 	max_fault_index =  IMMU_CAP_GET_NFR(immu->immu_regs_cap) - 1;
531 	fault_reg_offset = IMMU_CAP_GET_FRO(immu->immu_regs_cap);
532 
533 	found_fault = B_FALSE;
534 	_NOTE(CONSTCOND)
535 	while (1) {
536 		uint64_t val;
537 		uint8_t fault_reason;
538 		uint8_t fault_type;
539 		uint16_t sid;
540 		uint64_t pg_addr;
541 		uint64_t idx;
542 
543 		/* read the higher 64bits */
544 		val = immu_regs_get64(immu, fault_reg_offset + index * 16 + 8);
545 
546 		/* check if this fault register has pending fault */
547 		if (!IMMU_FRR_GET_F(val)) {
548 			break;
549 		}
550 
551 		found_fault = B_TRUE;
552 
553 		/* get the fault reason, fault type and sid */
554 		fault_reason = IMMU_FRR_GET_FR(val);
555 		fault_type = IMMU_FRR_GET_FT(val);
556 		sid = IMMU_FRR_GET_SID(val);
557 
558 		/* read the first 64bits */
559 		val = immu_regs_get64(immu, fault_reg_offset + index * 16);
560 		pg_addr = val & IMMU_PAGEMASK;
561 		idx = val >> 48;
562 
563 		/* clear the fault */
564 		immu_regs_put32(immu, fault_reg_offset + index * 16 + 12,
565 		    (((uint32_t)1) << 31));
566 
567 		/* report the fault info */
568 		if (fault_reason < 0x20) {
569 			/* immu-remapping fault */
570 			ddi_err(DER_WARN, idip,
571 			    "generated a fault event when translating DMA %s\n"
572 			    "\t on address 0x%" PRIx64 " for PCI(%d, %d, %d), "
573 			    "the reason is:\n\t %s",
574 			    fault_type ? "read" : "write", pg_addr,
575 			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
576 			    immu_dvma_faults[MIN(fault_reason,
577 			    DVMA_MAX_FAULTS)]);
578 			immu_print_fault_info(sid, pg_addr);
579 		} else if (fault_reason < 0x27) {
580 			/* intr-remapping fault */
581 			ddi_err(DER_WARN, idip,
582 			    "generated a fault event when translating "
583 			    "interrupt request\n"
584 			    "\t on index 0x%" PRIx64 " for PCI(%d, %d, %d), "
585 			    "the reason is:\n\t %s",
586 			    idx,
587 			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
588 			    immu_intrmap_faults[MIN((fault_reason - 0x20),
589 			    INTRMAP_MAX_FAULTS)]);
590 		} else {
591 			ddi_err(DER_WARN, idip, "Unknown fault reason: 0x%x",
592 			    fault_reason);
593 		}
594 
595 		index++;
596 		if (index > max_fault_index)
597 			index = 0;
598 	}
599 
600 	/* Clear the fault */
601 	if (!found_fault) {
602 		ddi_err(DER_MODE, idip,
603 		    "Fault register set but no fault present");
604 	}
605 	immu_regs_put32(immu, IMMU_REG_FAULT_STS, 1);
606 	mutex_exit(&(immu->immu_regs_lock));
607 	mutex_exit(&(immu->immu_intr_lock));
608 	return (DDI_INTR_CLAIMED);
609 }
610 /* ######################################################################### */
611 
612 /*
613  * Interrupt remap entry points
614  */
615 
616 /* initialize interrupt remapping */
617 static int
618 immu_intrmap_init(int apic_mode)
619 {
620 	immu_t *immu;
621 	int error = DDI_FAILURE;
622 
623 	if (immu_intrmap_enable == B_FALSE) {
624 		return (DDI_SUCCESS);
625 	}
626 
627 	intrmap_apic_mode = apic_mode;
628 
629 	immu = list_head(&immu_list);
630 	for (; immu; immu = list_next(&immu_list, immu)) {
631 		if ((immu->immu_intrmap_running == B_TRUE) &&
632 		    IMMU_ECAP_GET_IR(immu->immu_regs_excap)) {
633 			if (init_unit(immu) == DDI_SUCCESS) {
634 				error = DDI_SUCCESS;
635 			}
636 		}
637 	}
638 
639 	/*
640 	 * if all IOMMU units disable intr remapping,
641 	 * return FAILURE
642 	 */
643 	return (error);
644 }
645 
646 
647 
648 /* enable interrupt remapping */
649 static void
650 immu_intrmap_switchon(int suppress_brdcst_eoi)
651 {
652 	immu_t *immu;
653 
654 
655 	intrmap_suppress_brdcst_eoi = suppress_brdcst_eoi;
656 
657 	immu = list_head(&immu_list);
658 	for (; immu; immu = list_next(&immu_list, immu)) {
659 		if (immu->immu_intrmap_setup == B_TRUE) {
660 			intrmap_enable(immu);
661 		}
662 	}
663 }
664 
665 /* alloc remapping entry for the interrupt */
666 static void
667 immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
668     uint16_t type, int count, uchar_t ioapic_index)
669 {
670 	immu_t	*immu;
671 	intrmap_t *intrmap;
672 	immu_inv_wait_t *iwp;
673 	uint32_t		idx, i;
674 	uint32_t		sid_svt_sq;
675 	intrmap_private_t	*intrmap_private;
676 
677 	if (intrmap_private_tbl[0] == INTRMAP_DISABLE ||
678 	    intrmap_private_tbl[0] != NULL) {
679 		return;
680 	}
681 
682 	intrmap_private_tbl[0] =
683 	    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);
684 	intrmap_private = INTRMAP_PRIVATE(intrmap_private_tbl[0]);
685 
686 	immu = get_immu(dip, type, ioapic_index);
687 	if ((immu != NULL) && (immu->immu_intrmap_running == B_TRUE)) {
688 		intrmap_private->ir_immu = immu;
689 	} else {
690 		goto intrmap_disable;
691 	}
692 
693 	intrmap = immu->immu_intrmap;
694 
695 	if (count == 1) {
696 		idx = alloc_tbl_entry(intrmap);
697 	} else {
698 		idx = alloc_tbl_multi_entries(intrmap, count);
699 	}
700 
701 	if (idx == INTRMAP_IDX_FULL) {
702 		goto intrmap_disable;
703 	}
704 
705 	intrmap_private->ir_idx = idx;
706 
707 	sid_svt_sq = intrmap_private->ir_sid_svt_sq =
708 	    get_sid(dip, type, ioapic_index);
709 	iwp = &intrmap_private->ir_inv_wait;
710 	immu_init_inv_wait(iwp, "intrmaplocal", B_TRUE);
711 
712 	if (count == 1) {
713 		if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
714 			immu_qinv_intr_one_cache(immu, idx, iwp);
715 		} else {
716 			immu_regs_wbf_flush(immu);
717 		}
718 		return;
719 	}
720 
721 	for (i = 1; i < count; i++) {
722 		intrmap_private_tbl[i] =
723 		    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);
724 
725 		INTRMAP_PRIVATE(intrmap_private_tbl[i])->ir_immu = immu;
726 		INTRMAP_PRIVATE(intrmap_private_tbl[i])->ir_sid_svt_sq =
727 		    sid_svt_sq;
728 		INTRMAP_PRIVATE(intrmap_private_tbl[i])->ir_idx = idx + i;
729 	}
730 
731 	if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
732 		immu_qinv_intr_caches(immu, idx, count, iwp);
733 	} else {
734 		immu_regs_wbf_flush(immu);
735 	}
736 
737 	return;
738 
739 intrmap_disable:
740 	kmem_free(intrmap_private_tbl[0], sizeof (intrmap_private_t));
741 	intrmap_private_tbl[0] = INTRMAP_DISABLE;
742 }
743 
744 
745 /* remapping the interrupt */
746 static void
747 immu_intrmap_map(void *intrmap_private, void *intrmap_data, uint16_t type,
748     int count)
749 {
750 	immu_t	*immu;
751 	immu_inv_wait_t	*iwp;
752 	intrmap_t	*intrmap;
753 	ioapic_rdt_t	*irdt = (ioapic_rdt_t *)intrmap_data;
754 	msi_regs_t	*mregs = (msi_regs_t *)intrmap_data;
755 	intrmap_rte_t	irte;
756 	uint_t		idx, i;
757 	uint32_t	dst, sid_svt_sq;
758 	uchar_t		vector, dlm, tm, rh, dm;
759 
760 	if (intrmap_private == INTRMAP_DISABLE)
761 		return;
762 
763 	idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
764 	immu = INTRMAP_PRIVATE(intrmap_private)->ir_immu;
765 	iwp = &INTRMAP_PRIVATE(intrmap_private)->ir_inv_wait;
766 	intrmap = immu->immu_intrmap;
767 	sid_svt_sq = INTRMAP_PRIVATE(intrmap_private)->ir_sid_svt_sq;
768 
769 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
770 		dm = RDT_DM(irdt->ir_lo);
771 		rh = 0;
772 		tm = RDT_TM(irdt->ir_lo);
773 		dlm = RDT_DLM(irdt->ir_lo);
774 		dst = irdt->ir_hi;
775 
776 		/*
777 		 * Mark the IRTE's TM as Edge to suppress broadcast EOI.
778 		 */
779 		if (intrmap_suppress_brdcst_eoi) {
780 			tm = TRIGGER_MODE_EDGE;
781 		}
782 
783 		vector = RDT_VECTOR(irdt->ir_lo);
784 	} else {
785 		dm = MSI_ADDR_DM_PHYSICAL;
786 		rh = MSI_ADDR_RH_FIXED;
787 		tm = TRIGGER_MODE_EDGE;
788 		dlm = 0;
789 		dst = mregs->mr_addr;
790 
791 		vector = mregs->mr_data & 0xff;
792 	}
793 
794 	if (intrmap_apic_mode == LOCAL_APIC)
795 		dst = (dst & 0xFF) << 8;
796 
797 	if (count == 1) {
798 		irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
799 		irte.hi = IRTE_HIGH(sid_svt_sq);
800 
801 		/* set interrupt remapping table entry */
802 		bcopy(&irte, intrmap->intrmap_vaddr +
803 		    idx * INTRMAP_RTE_SIZE,
804 		    INTRMAP_RTE_SIZE);
805 
806 		immu_qinv_intr_one_cache(immu, idx, iwp);
807 
808 	} else {
809 		for (i = 0; i < count; i++) {
810 			irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
811 			irte.hi = IRTE_HIGH(sid_svt_sq);
812 
813 			/* set interrupt remapping table entry */
814 			bcopy(&irte, intrmap->intrmap_vaddr +
815 			    idx * INTRMAP_RTE_SIZE,
816 			    INTRMAP_RTE_SIZE);
817 			vector++;
818 			idx++;
819 		}
820 
821 		immu_qinv_intr_caches(immu, idx, count, iwp);
822 	}
823 }
824 
825 /* free the remapping entry */
826 static void
827 immu_intrmap_free(void **intrmap_privatep)
828 {
829 	immu_t *immu;
830 	immu_inv_wait_t *iwp;
831 	intrmap_t *intrmap;
832 	uint32_t idx;
833 
834 	if (*intrmap_privatep == INTRMAP_DISABLE || *intrmap_privatep == NULL) {
835 		*intrmap_privatep = NULL;
836 		return;
837 	}
838 
839 	immu = INTRMAP_PRIVATE(*intrmap_privatep)->ir_immu;
840 	iwp = &INTRMAP_PRIVATE(*intrmap_privatep)->ir_inv_wait;
841 	intrmap = immu->immu_intrmap;
842 	idx = INTRMAP_PRIVATE(*intrmap_privatep)->ir_idx;
843 
844 	bzero(intrmap->intrmap_vaddr + idx * INTRMAP_RTE_SIZE,
845 	    INTRMAP_RTE_SIZE);
846 
847 	immu_qinv_intr_one_cache(immu, idx, iwp);
848 
849 	mutex_enter(&intrmap->intrmap_lock);
850 	bitset_del(&intrmap->intrmap_map, idx);
851 	if (intrmap->intrmap_free == INTRMAP_IDX_FULL) {
852 		intrmap->intrmap_free = idx;
853 	}
854 	mutex_exit(&intrmap->intrmap_lock);
855 
856 	kmem_free(*intrmap_privatep, sizeof (intrmap_private_t));
857 	*intrmap_privatep = NULL;
858 }
859 
860 /* record the ioapic rdt entry */
861 static void
862 immu_intrmap_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
863 {
864 	uint32_t rdt_entry, tm, pol, idx, vector;
865 
866 	rdt_entry = irdt->ir_lo;
867 
868 	if (intrmap_private != INTRMAP_DISABLE && intrmap_private != NULL) {
869 		idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
870 		tm = RDT_TM(rdt_entry);
871 		pol = RDT_POL(rdt_entry);
872 		vector = RDT_VECTOR(rdt_entry);
873 		irdt->ir_lo = (tm << INTRMAP_IOAPIC_TM_SHIFT) |
874 		    (pol << INTRMAP_IOAPIC_POL_SHIFT) |
875 		    ((idx >> 15) << INTRMAP_IOAPIC_IDX15_SHIFT) |
876 		    vector;
877 		irdt->ir_hi = (idx << INTRMAP_IOAPIC_IDX_SHIFT) |
878 		    (1 << INTRMAP_IOAPIC_FORMAT_SHIFT);
879 	} else {
880 		irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
881 	}
882 }
883 
884 /* record the msi interrupt structure */
885 /*ARGSUSED*/
886 static void
887 immu_intrmap_msi(void *intrmap_private, msi_regs_t *mregs)
888 {
889 	uint_t	idx;
890 
891 	if (intrmap_private != INTRMAP_DISABLE && intrmap_private != NULL) {
892 		idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
893 
894 		mregs->mr_data = 0;
895 		mregs->mr_addr = MSI_ADDR_HDR |
896 		    ((idx & 0x7fff) << INTRMAP_MSI_IDX_SHIFT) |
897 		    (1 << INTRMAP_MSI_FORMAT_SHIFT) |
898 		    (1 << INTRMAP_MSI_SHV_SHIFT) |
899 		    ((idx >> 15) << INTRMAP_MSI_IDX15_SHIFT);
900 	} else {
901 		mregs->mr_addr = MSI_ADDR_HDR |
902 		    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
903 		    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
904 		    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
905 		mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
906 		    mregs->mr_data;
907 	}
908 }
909 
910 /* ######################################################################### */
911 /*
912  * Functions exported by immu_intr.c
913  */
914 void
915 immu_intrmap_setup(list_t *listp)
916 {
917 	immu_t *immu;
918 
919 	/*
920 	 * Check if ACPI DMAR tables say that
921 	 * interrupt remapping is supported
922 	 */
923 	if (immu_dmar_intrmap_supported() == B_FALSE) {
924 		return;
925 	}
926 
927 	/*
928 	 * Check if interrupt remapping is disabled.
929 	 */
930 	if (immu_intrmap_enable == B_FALSE) {
931 		return;
932 	}
933 
934 	psm_vt_ops = &intrmap_ops;
935 
936 	immu = list_head(listp);
937 	for (; immu; immu = list_next(listp, immu)) {
938 		mutex_init(&(immu->immu_intrmap_lock), NULL,
939 		    MUTEX_DEFAULT, NULL);
940 		mutex_enter(&(immu->immu_intrmap_lock));
941 		immu_init_inv_wait(&immu->immu_intrmap_inv_wait,
942 		    "intrmapglobal", B_TRUE);
943 		immu->immu_intrmap_setup = B_TRUE;
944 		mutex_exit(&(immu->immu_intrmap_lock));
945 	}
946 }
947 
948 void
949 immu_intrmap_startup(immu_t *immu)
950 {
951 	/* do nothing */
952 	mutex_enter(&(immu->immu_intrmap_lock));
953 	if (immu->immu_intrmap_setup == B_TRUE) {
954 		immu->immu_intrmap_running = B_TRUE;
955 	}
956 	mutex_exit(&(immu->immu_intrmap_lock));
957 }
958 
959 /*
960  * Register a Intel IOMMU unit (i.e. DMAR unit's)
961  * interrupt handler
962  */
963 void
964 immu_intr_register(immu_t *immu)
965 {
966 	int irq, vect;
967 	char intr_handler_name[IMMU_MAXNAMELEN];
968 	uint32_t msi_data;
969 	uint32_t uaddr;
970 	uint32_t msi_addr;
971 	uint32_t localapic_id = 0;
972 
973 	if (psm_get_localapicid)
974 		localapic_id = psm_get_localapicid(0);
975 
976 	msi_addr = (MSI_ADDR_HDR |
977 	    ((localapic_id & 0xFF) << MSI_ADDR_DEST_SHIFT) |
978 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
979 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
980 
981 	if (intrmap_apic_mode == LOCAL_X2APIC) {
982 		uaddr = localapic_id & 0xFFFFFF00;
983 	} else {
984 		uaddr = 0;
985 	}
986 
987 	/* Dont need to hold immu_intr_lock since we are in boot */
988 	irq = vect = psm_get_ipivect(IMMU_INTR_IPL, -1);
989 	if (psm_xlate_vector_by_irq != NULL)
990 		vect = psm_xlate_vector_by_irq(irq);
991 
992 	msi_data = ((MSI_DATA_DELIVERY_FIXED <<
993 	    MSI_DATA_DELIVERY_SHIFT) | vect);
994 
995 	(void) snprintf(intr_handler_name, sizeof (intr_handler_name),
996 	    "%s-intr-handler", immu->immu_name);
997 
998 	(void) add_avintr((void *)NULL, IMMU_INTR_IPL,
999 	    immu_intr_handler, intr_handler_name, irq,
1000 	    (caddr_t)immu, NULL, NULL, NULL);
1001 
1002 	immu_regs_intr_enable(immu, msi_addr, msi_data, uaddr);
1003 
1004 	(void) immu_intr_handler((caddr_t)immu, NULL);
1005 }
1006