xref: /titanic_51/usr/src/uts/i86pc/io/immu_intrmap.c (revision e5803b76927480e8f9b67b22201c484ccf4c2bcf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2009, Intel Corporation.
28  * All rights reserved.
29  */
30 
31 
32 #include <sys/apic.h>
33 #include <vm/hat_i86.h>
34 #include <sys/sysmacros.h>
35 #include <sys/smp_impldefs.h>
36 #include <sys/immu.h>
37 
38 
39 typedef struct intrmap_private {
40 	immu_t		*ir_immu;
41 	immu_inv_wait_t	ir_inv_wait;
42 	uint16_t	ir_idx;
43 	uint32_t	ir_sid_svt_sq;
44 } intrmap_private_t;
45 
46 #define	INTRMAP_PRIVATE(intrmap) ((intrmap_private_t *)intrmap)
47 
48 /* interrupt remapping table entry */
49 typedef struct intrmap_rte {
50 	uint64_t	lo;
51 	uint64_t	hi;
52 } intrmap_rte_t;
53 
54 #define	IRTE_HIGH(sid_svt_sq) (sid_svt_sq)
55 #define	IRTE_LOW(dst, vector, dlm, tm, rh, dm, fpd, p)	\
56 	    (((uint64_t)(dst) << 32) |  \
57 	    ((uint64_t)(vector) << 16) | \
58 	    ((uint64_t)(dlm) << 5) | \
59 	    ((uint64_t)(tm) << 4) | \
60 	    ((uint64_t)(rh) << 3) | \
61 	    ((uint64_t)(dm) << 2) | \
62 	    ((uint64_t)(fpd) << 1) | \
63 	    (p))
64 
65 typedef enum {
66 	SVT_NO_VERIFY = 0, 	/* no verification */
67 	SVT_ALL_VERIFY,		/* using sid and sq to verify */
68 	SVT_BUS_VERIFY,		/* verify #startbus and #endbus */
69 	SVT_RSVD
70 } intrmap_svt_t;
71 
72 typedef enum {
73 	SQ_VERIFY_ALL = 0,	/* verify all 16 bits */
74 	SQ_VERIFY_IGR_1,	/* ignore bit 3 */
75 	SQ_VERIFY_IGR_2,	/* ignore bit 2-3 */
76 	SQ_VERIFY_IGR_3		/* ignore bit 1-3 */
77 } intrmap_sq_t;
78 
79 /*
80  * S field of the Interrupt Remapping Table Address Register
81  * the size of the interrupt remapping table is 1 << (immu_intrmap_irta_s + 1)
82  */
83 static uint_t intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
84 
85 /*
86  * If true, arrange to suppress broadcast EOI by setting edge-triggered mode
87  * even for level-triggered interrupts in the interrupt-remapping engine.
88  * If false, broadcast EOI can still be suppressed if the CPU supports the
89  * APIC_SVR_SUPPRESS_BROADCAST_EOI bit.  In both cases, the IOAPIC is still
90  * programmed with the correct trigger mode, and pcplusmp must send an EOI
91  * to the IOAPIC by writing to the IOAPIC's EOI register to make up for the
92  * missing broadcast EOI.
93  */
94 static int intrmap_suppress_brdcst_eoi = 0;
95 
96 /*
97  * whether verify the source id of interrupt request
98  */
99 static int intrmap_enable_sid_verify = 0;
100 
101 /* fault types for DVMA remapping */
102 static char *immu_dvma_faults[] = {
103 	"Reserved",
104 	"The present field in root-entry is Clear",
105 	"The present field in context-entry is Clear",
106 	"Hardware detected invalid programming of a context-entry",
107 	"The DMA request attempted to access an address beyond max support",
108 	"The Write field in a page-table entry is Clear when DMA write",
109 	"The Read field in a page-table entry is Clear when DMA read",
110 	"Access the next level page table resulted in error",
111 	"Access the root-entry table resulted in error",
112 	"Access the context-entry table resulted in error",
113 	"Reserved field not initialized to zero in a present root-entry",
114 	"Reserved field not initialized to zero in a present context-entry",
115 	"Reserved field not initialized to zero in a present page-table entry",
116 	"DMA blocked due to the Translation Type field in context-entry",
117 	"Incorrect fault event reason number",
118 };
119 #define	DVMA_MAX_FAULTS (sizeof (immu_dvma_faults)/(sizeof (char *))) - 1
120 
121 /* fault types for interrupt remapping */
122 static char *immu_intrmap_faults[] = {
123 	"reserved field set in IRTE",
124 	"interrupt_index exceed the intr-remap table size",
125 	"present field in IRTE is clear",
126 	"hardware access intr-remap table address resulted in error",
127 	"reserved field set in IRTE, include various conditional",
128 	"hardware blocked an interrupt request in Compatibility format",
129 	"remappable interrupt request blocked due to verification failure"
130 };
131 #define	INTRMAP_MAX_FAULTS \
132 	(sizeof (immu_intrmap_faults) / (sizeof (char *))) - 1
133 
134 /* Function prototypes */
135 static int immu_intrmap_init(int apic_mode);
136 static void immu_intrmap_switchon(int suppress_brdcst_eoi);
137 static void immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
138     uint16_t type, int count, uchar_t ioapic_index);
139 static void immu_intrmap_map(void *intrmap_private, void *intrmap_data,
140     uint16_t type, int count);
141 static void immu_intrmap_free(void **intrmap_privatep);
142 static void immu_intrmap_rdt(void *intrmap_private, ioapic_rdt_t *irdt);
143 static void immu_intrmap_msi(void *intrmap_private, msi_regs_t *mregs);
144 
145 static struct apic_intrmap_ops intrmap_ops = {
146 	immu_intrmap_init,
147 	immu_intrmap_switchon,
148 	immu_intrmap_alloc,
149 	immu_intrmap_map,
150 	immu_intrmap_free,
151 	immu_intrmap_rdt,
152 	immu_intrmap_msi,
153 };
154 
155 /* apic mode, APIC/X2APIC */
156 static int intrmap_apic_mode = LOCAL_APIC;
157 
158 
159 /*
160  * helper functions
161  */
162 static uint_t
163 bitset_find_free(bitset_t *b, uint_t post)
164 {
165 	uint_t	i;
166 	uint_t	cap = bitset_capacity(b);
167 
168 	if (post == cap)
169 		post = 0;
170 
171 	ASSERT(post < cap);
172 
173 	for (i = post; i < cap; i++) {
174 		if (!bitset_in_set(b, i))
175 			return (i);
176 	}
177 
178 	for (i = 0; i < post; i++) {
179 		if (!bitset_in_set(b, i))
180 			return (i);
181 	}
182 
183 	return (INTRMAP_IDX_FULL);	/* no free index */
184 }
185 
186 /*
187  * helper function to find 'count' contigous free
188  * interrupt remapping table entries
189  */
190 static uint_t
191 bitset_find_multi_free(bitset_t *b, uint_t post, uint_t count)
192 {
193 	uint_t  i, j;
194 	uint_t	cap = bitset_capacity(b);
195 
196 	if (post == INTRMAP_IDX_FULL) {
197 		return (INTRMAP_IDX_FULL);
198 	}
199 
200 	if (count > cap)
201 		return (INTRMAP_IDX_FULL);
202 
203 	ASSERT(post < cap);
204 
205 	for (i = post; (i + count) <= cap; i++) {
206 		for (j = 0; j < count; j++) {
207 			if (bitset_in_set(b, (i + j))) {
208 				i = i + j;
209 				break;
210 			}
211 			if (j == count - 1)
212 				return (i);
213 		}
214 	}
215 
216 	for (i = 0; (i < post) && ((i + count) <= cap); i++) {
217 		for (j = 0; j < count; j++) {
218 			if (bitset_in_set(b, (i + j))) {
219 				i = i + j;
220 				break;
221 			}
222 			if (j == count - 1)
223 				return (i);
224 		}
225 	}
226 
227 	return (INTRMAP_IDX_FULL);  		/* no free index */
228 }
229 
230 /* alloc one interrupt remapping table entry */
231 static int
232 alloc_tbl_entry(intrmap_t *intrmap)
233 {
234 	uint32_t idx;
235 
236 	for (;;) {
237 		mutex_enter(&intrmap->intrmap_lock);
238 		idx = intrmap->intrmap_free;
239 		if (idx != INTRMAP_IDX_FULL) {
240 			bitset_add(&intrmap->intrmap_map, idx);
241 			intrmap->intrmap_free =
242 			    bitset_find_free(&intrmap->intrmap_map, idx + 1);
243 			mutex_exit(&intrmap->intrmap_lock);
244 			break;
245 		}
246 
247 		/* no free intr entry, use compatible format intr */
248 		mutex_exit(&intrmap->intrmap_lock);
249 
250 		if (intrmap_apic_mode != LOCAL_X2APIC) {
251 			break;
252 		}
253 
254 		/*
255 		 * x2apic mode not allowed compatible
256 		 * interrupt
257 		 */
258 		delay(IMMU_ALLOC_RESOURCE_DELAY);
259 	}
260 
261 	return (idx);
262 }
263 
264 /* alloc 'cnt' contigous interrupt remapping table entries */
265 static int
266 alloc_tbl_multi_entries(intrmap_t *intrmap, uint_t cnt)
267 {
268 	uint_t idx, pos, i;
269 
270 	for (; ; ) {
271 		mutex_enter(&intrmap->intrmap_lock);
272 		pos = intrmap->intrmap_free;
273 		idx = bitset_find_multi_free(&intrmap->intrmap_map, pos, cnt);
274 
275 		if (idx != INTRMAP_IDX_FULL) {
276 			if (idx <= pos && pos < (idx + cnt)) {
277 				intrmap->intrmap_free = bitset_find_free(
278 				    &intrmap->intrmap_map, idx + cnt);
279 			}
280 			for (i = 0; i < cnt; i++) {
281 				bitset_add(&intrmap->intrmap_map, idx + i);
282 			}
283 			mutex_exit(&intrmap->intrmap_lock);
284 			break;
285 		}
286 
287 		mutex_exit(&intrmap->intrmap_lock);
288 
289 		if (intrmap_apic_mode != LOCAL_X2APIC) {
290 			break;
291 		}
292 
293 		/* x2apic mode not allowed comapitible interrupt */
294 		delay(IMMU_ALLOC_RESOURCE_DELAY);
295 	}
296 
297 	return (idx);
298 }
299 
300 /* init interrupt remapping table */
301 static int
302 init_unit(immu_t *immu)
303 {
304 	intrmap_t *intrmap;
305 	size_t size;
306 
307 	ddi_dma_attr_t intrmap_dma_attr = {
308 		DMA_ATTR_V0,
309 		0U,
310 		0xffffffffffffffffULL,
311 		0xffffffffU,
312 		MMU_PAGESIZE,	/* page aligned */
313 		0x1,
314 		0x1,
315 		0xffffffffU,
316 		0xffffffffffffffffULL,
317 		1,
318 		4,
319 		0
320 	};
321 
322 	ddi_device_acc_attr_t intrmap_acc_attr = {
323 		DDI_DEVICE_ATTR_V0,
324 		DDI_NEVERSWAP_ACC,
325 		DDI_STRICTORDER_ACC
326 	};
327 
328 	/*
329 	 * Using interrupt remapping implies using the queue
330 	 * invalidation interface. According to Intel,
331 	 * hardware that supports interrupt remapping should
332 	 * also support QI.
333 	 */
334 	ASSERT(IMMU_ECAP_GET_QI(immu->immu_regs_excap));
335 
336 	if (intrmap_apic_mode == LOCAL_X2APIC) {
337 		if (!IMMU_ECAP_GET_EIM(immu->immu_regs_excap)) {
338 			return (DDI_FAILURE);
339 		}
340 	}
341 
342 	if (intrmap_irta_s > INTRMAP_MAX_IRTA_SIZE) {
343 		intrmap_irta_s = INTRMAP_MAX_IRTA_SIZE;
344 	}
345 
346 	intrmap =  kmem_zalloc(sizeof (intrmap_t), KM_SLEEP);
347 
348 	if (ddi_dma_alloc_handle(immu->immu_dip,
349 	    &intrmap_dma_attr,
350 	    DDI_DMA_SLEEP,
351 	    NULL,
352 	    &(intrmap->intrmap_dma_hdl)) != DDI_SUCCESS) {
353 		kmem_free(intrmap, sizeof (intrmap_t));
354 		return (DDI_FAILURE);
355 	}
356 
357 	intrmap->intrmap_size = 1 << (intrmap_irta_s + 1);
358 	size = intrmap->intrmap_size * INTRMAP_RTE_SIZE;
359 	if (ddi_dma_mem_alloc(intrmap->intrmap_dma_hdl,
360 	    size,
361 	    &intrmap_acc_attr,
362 	    DDI_DMA_CONSISTENT | IOMEM_DATA_UNCACHED,
363 	    DDI_DMA_SLEEP,
364 	    NULL,
365 	    &(intrmap->intrmap_vaddr),
366 	    &size,
367 	    &(intrmap->intrmap_acc_hdl)) != DDI_SUCCESS) {
368 		ddi_dma_free_handle(&(intrmap->intrmap_dma_hdl));
369 		kmem_free(intrmap, sizeof (intrmap_t));
370 		return (DDI_FAILURE);
371 	}
372 
373 	ASSERT(!((uintptr_t)intrmap->intrmap_vaddr & MMU_PAGEOFFSET));
374 	bzero(intrmap->intrmap_vaddr, size);
375 	intrmap->intrmap_paddr = pfn_to_pa(
376 	    hat_getpfnum(kas.a_hat, intrmap->intrmap_vaddr));
377 
378 	mutex_init(&(intrmap->intrmap_lock), NULL, MUTEX_DRIVER, NULL);
379 	bitset_init(&intrmap->intrmap_map);
380 	bitset_resize(&intrmap->intrmap_map, intrmap->intrmap_size);
381 	intrmap->intrmap_free = 0;
382 
383 	immu->immu_intrmap = intrmap;
384 
385 	return (DDI_SUCCESS);
386 }
387 
388 static immu_t *
389 get_immu(dev_info_t *dip, uint16_t type, uchar_t ioapic_index)
390 {
391 	immu_t	*immu = NULL;
392 
393 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
394 		immu = immu_dmar_ioapic_immu(ioapic_index);
395 	} else {
396 		if (dip != NULL)
397 			immu = immu_dmar_get_immu(dip);
398 	}
399 
400 	return (immu);
401 }
402 
403 static int
404 get_top_pcibridge(dev_info_t *dip, void *arg)
405 {
406 	dev_info_t **topdipp = arg;
407 	immu_devi_t *immu_devi;
408 
409 	mutex_enter(&(DEVI(dip)->devi_lock));
410 	immu_devi = DEVI(dip)->devi_iommu;
411 	mutex_exit(&(DEVI(dip)->devi_lock));
412 
413 	if (immu_devi == NULL || immu_devi->imd_pcib_type == IMMU_PCIB_BAD ||
414 	    immu_devi->imd_pcib_type == IMMU_PCIB_ENDPOINT) {
415 		return (DDI_WALK_CONTINUE);
416 	}
417 
418 	*topdipp = dip;
419 
420 	return (DDI_WALK_CONTINUE);
421 }
422 
423 static dev_info_t *
424 intrmap_top_pcibridge(dev_info_t *rdip)
425 {
426 	dev_info_t *top_pcibridge = NULL;
427 
428 	if (immu_walk_ancestor(rdip, NULL, get_top_pcibridge,
429 	    &top_pcibridge, NULL, 0) != DDI_SUCCESS) {
430 		return (NULL);
431 	}
432 
433 	return (top_pcibridge);
434 }
435 
436 /* function to get interrupt request source id */
437 static uint32_t
438 get_sid(dev_info_t *dip, uint16_t type, uchar_t ioapic_index)
439 {
440 	dev_info_t	*pdip;
441 	immu_devi_t	*immu_devi;
442 	uint16_t	sid;
443 	uchar_t		svt, sq;
444 
445 	if (!intrmap_enable_sid_verify) {
446 		return (0);
447 	}
448 
449 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
450 		/* for interrupt through I/O APIC */
451 		sid = immu_dmar_ioapic_sid(ioapic_index);
452 		svt = SVT_ALL_VERIFY;
453 		sq = SQ_VERIFY_ALL;
454 	} else {
455 		/* MSI/MSI-X interrupt */
456 		ASSERT(dip);
457 		pdip = intrmap_top_pcibridge(dip);
458 		ASSERT(pdip);
459 		immu_devi = DEVI(pdip)->devi_iommu;
460 		ASSERT(immu_devi);
461 		if (immu_devi->imd_pcib_type == IMMU_PCIB_PCIE_PCI) {
462 			/* device behind pcie to pci bridge */
463 			sid = (immu_devi->imd_bus << 8) | immu_devi->imd_sec;
464 			svt = SVT_BUS_VERIFY;
465 			sq = SQ_VERIFY_ALL;
466 		} else {
467 			/* pcie device or device behind pci to pci bridge */
468 			sid = (immu_devi->imd_bus << 8) |
469 			    immu_devi->imd_devfunc;
470 			svt = SVT_ALL_VERIFY;
471 			sq = SQ_VERIFY_ALL;
472 		}
473 	}
474 
475 	return (sid | (svt << 18) | (sq << 16));
476 }
477 
478 static void
479 intrmap_enable(immu_t *immu)
480 {
481 	intrmap_t *intrmap;
482 	uint64_t irta_reg;
483 
484 	intrmap = immu->immu_intrmap;
485 
486 	irta_reg = intrmap->intrmap_paddr | intrmap_irta_s;
487 	if (intrmap_apic_mode == LOCAL_X2APIC) {
488 		irta_reg |= (0x1 << 11);
489 	}
490 
491 	immu_regs_intrmap_enable(immu, irta_reg);
492 }
493 
494 /* ####################################################################### */
495 
496 /*
497  * immu_intr_handler()
498  * 	the fault event handler for a single immu unit
499  */
500 int
501 immu_intr_handler(immu_t *immu)
502 {
503 	uint32_t status;
504 	int index, fault_reg_offset;
505 	int max_fault_index;
506 	boolean_t found_fault;
507 	dev_info_t *idip;
508 
509 	mutex_enter(&(immu->immu_intr_lock));
510 	mutex_enter(&(immu->immu_regs_lock));
511 
512 	/* read the fault status */
513 	status = immu_regs_get32(immu, IMMU_REG_FAULT_STS);
514 
515 	idip = immu->immu_dip;
516 	ASSERT(idip);
517 
518 	/* check if we have a pending fault for this immu unit */
519 	if ((status & IMMU_FAULT_STS_PPF) == 0) {
520 		mutex_exit(&(immu->immu_regs_lock));
521 		mutex_exit(&(immu->immu_intr_lock));
522 		return (DDI_INTR_UNCLAIMED);
523 	}
524 
525 	/*
526 	 * handle all primary pending faults
527 	 */
528 	index = IMMU_FAULT_GET_INDEX(status);
529 	max_fault_index =  IMMU_CAP_GET_NFR(immu->immu_regs_cap) - 1;
530 	fault_reg_offset = IMMU_CAP_GET_FRO(immu->immu_regs_cap);
531 
532 	found_fault = B_FALSE;
533 	_NOTE(CONSTCOND)
534 	while (1) {
535 		uint64_t val;
536 		uint8_t fault_reason;
537 		uint8_t fault_type;
538 		uint16_t sid;
539 		uint64_t pg_addr;
540 		uint64_t idx;
541 
542 		/* read the higher 64bits */
543 		val = immu_regs_get64(immu, fault_reg_offset + index * 16 + 8);
544 
545 		/* check if this fault register has pending fault */
546 		if (!IMMU_FRR_GET_F(val)) {
547 			break;
548 		}
549 
550 		found_fault = B_TRUE;
551 
552 		/* get the fault reason, fault type and sid */
553 		fault_reason = IMMU_FRR_GET_FR(val);
554 		fault_type = IMMU_FRR_GET_FT(val);
555 		sid = IMMU_FRR_GET_SID(val);
556 
557 		/* read the first 64bits */
558 		val = immu_regs_get64(immu, fault_reg_offset + index * 16);
559 		pg_addr = val & IMMU_PAGEMASK;
560 		idx = val >> 48;
561 
562 		/* clear the fault */
563 		immu_regs_put32(immu, fault_reg_offset + index * 16 + 12,
564 		    (((uint32_t)1) << 31));
565 
566 		/* report the fault info */
567 		if (fault_reason < 0x20) {
568 			/* immu-remapping fault */
569 			ddi_err(DER_WARN, idip,
570 			    "generated a fault event when translating DMA %s\n"
571 			    "\t on address 0x%" PRIx64 " for PCI(%d, %d, %d), "
572 			    "the reason is:\n\t %s",
573 			    fault_type ? "read" : "write", pg_addr,
574 			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
575 			    immu_dvma_faults[MIN(fault_reason,
576 			    DVMA_MAX_FAULTS)]);
577 			immu_print_fault_info(sid, pg_addr);
578 		} else if (fault_reason < 0x27) {
579 			/* intr-remapping fault */
580 			ddi_err(DER_WARN, idip,
581 			    "generated a fault event when translating "
582 			    "interrupt request\n"
583 			    "\t on index 0x%" PRIx64 " for PCI(%d, %d, %d), "
584 			    "the reason is:\n\t %s",
585 			    idx,
586 			    (sid >> 8) & 0xff, (sid >> 3) & 0x1f, sid & 0x7,
587 			    immu_intrmap_faults[MIN((fault_reason - 0x20),
588 			    INTRMAP_MAX_FAULTS)]);
589 		} else {
590 			ddi_err(DER_WARN, idip, "Unknown fault reason: 0x%x",
591 			    fault_reason);
592 		}
593 
594 		index++;
595 		if (index > max_fault_index)
596 			index = 0;
597 	}
598 
599 	/* Clear the fault */
600 	if (!found_fault) {
601 		ddi_err(DER_MODE, idip,
602 		    "Fault register set but no fault present");
603 	}
604 	immu_regs_put32(immu, IMMU_REG_FAULT_STS, 1);
605 	mutex_exit(&(immu->immu_regs_lock));
606 	mutex_exit(&(immu->immu_intr_lock));
607 	return (DDI_INTR_CLAIMED);
608 }
609 /* ######################################################################### */
610 
611 /*
612  * Interrupt remap entry points
613  */
614 
615 /* initialize interrupt remapping */
616 static int
617 immu_intrmap_init(int apic_mode)
618 {
619 	immu_t *immu;
620 	int error = DDI_FAILURE;
621 
622 	if (immu_intrmap_enable == B_FALSE) {
623 		return (DDI_SUCCESS);
624 	}
625 
626 	intrmap_apic_mode = apic_mode;
627 
628 	immu = list_head(&immu_list);
629 	for (; immu; immu = list_next(&immu_list, immu)) {
630 		if ((immu->immu_intrmap_running == B_TRUE) &&
631 		    IMMU_ECAP_GET_IR(immu->immu_regs_excap)) {
632 			if (init_unit(immu) == DDI_SUCCESS) {
633 				error = DDI_SUCCESS;
634 			}
635 		}
636 	}
637 
638 	/*
639 	 * if all IOMMU units disable intr remapping,
640 	 * return FAILURE
641 	 */
642 	return (error);
643 }
644 
645 
646 
647 /* enable interrupt remapping */
648 static void
649 immu_intrmap_switchon(int suppress_brdcst_eoi)
650 {
651 	immu_t *immu;
652 
653 
654 	intrmap_suppress_brdcst_eoi = suppress_brdcst_eoi;
655 
656 	immu = list_head(&immu_list);
657 	for (; immu; immu = list_next(&immu_list, immu)) {
658 		if (immu->immu_intrmap_setup == B_TRUE) {
659 			intrmap_enable(immu);
660 		}
661 	}
662 }
663 
664 /* alloc remapping entry for the interrupt */
665 static void
666 immu_intrmap_alloc(void **intrmap_private_tbl, dev_info_t *dip,
667     uint16_t type, int count, uchar_t ioapic_index)
668 {
669 	immu_t	*immu;
670 	intrmap_t *intrmap;
671 	immu_inv_wait_t *iwp;
672 	uint32_t		idx, i;
673 	uint32_t		sid_svt_sq;
674 	intrmap_private_t	*intrmap_private;
675 
676 	if (intrmap_private_tbl[0] == INTRMAP_DISABLE ||
677 	    intrmap_private_tbl[0] != NULL) {
678 		return;
679 	}
680 
681 	intrmap_private_tbl[0] =
682 	    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);
683 	intrmap_private = INTRMAP_PRIVATE(intrmap_private_tbl[0]);
684 
685 	immu = get_immu(dip, type, ioapic_index);
686 	if ((immu != NULL) && (immu->immu_intrmap_running == B_TRUE)) {
687 		intrmap_private->ir_immu = immu;
688 	} else {
689 		goto intrmap_disable;
690 	}
691 
692 	intrmap = immu->immu_intrmap;
693 
694 	if (count == 1) {
695 		idx = alloc_tbl_entry(intrmap);
696 	} else {
697 		idx = alloc_tbl_multi_entries(intrmap, count);
698 	}
699 
700 	if (idx == INTRMAP_IDX_FULL) {
701 		goto intrmap_disable;
702 	}
703 
704 	intrmap_private->ir_idx = idx;
705 
706 	sid_svt_sq = intrmap_private->ir_sid_svt_sq =
707 	    get_sid(dip, type, ioapic_index);
708 	iwp = &intrmap_private->ir_inv_wait;
709 	immu_init_inv_wait(iwp, "intrmaplocal", B_TRUE);
710 
711 	if (count == 1) {
712 		if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
713 			immu_qinv_intr_one_cache(immu, idx, iwp);
714 		} else {
715 			immu_regs_wbf_flush(immu);
716 		}
717 		return;
718 	}
719 
720 	for (i = 1; i < count; i++) {
721 		intrmap_private_tbl[i] =
722 		    kmem_zalloc(sizeof (intrmap_private_t), KM_SLEEP);
723 
724 		INTRMAP_PRIVATE(intrmap_private_tbl[i])->ir_immu = immu;
725 		INTRMAP_PRIVATE(intrmap_private_tbl[i])->ir_sid_svt_sq =
726 		    sid_svt_sq;
727 		INTRMAP_PRIVATE(intrmap_private_tbl[i])->ir_idx = idx + i;
728 	}
729 
730 	if (IMMU_CAP_GET_CM(immu->immu_regs_cap)) {
731 		immu_qinv_intr_caches(immu, idx, count, iwp);
732 	} else {
733 		immu_regs_wbf_flush(immu);
734 	}
735 
736 	return;
737 
738 intrmap_disable:
739 	kmem_free(intrmap_private_tbl[0], sizeof (intrmap_private_t));
740 	intrmap_private_tbl[0] = INTRMAP_DISABLE;
741 }
742 
743 
744 /* remapping the interrupt */
745 static void
746 immu_intrmap_map(void *intrmap_private, void *intrmap_data, uint16_t type,
747     int count)
748 {
749 	immu_t	*immu;
750 	immu_inv_wait_t	*iwp;
751 	intrmap_t	*intrmap;
752 	ioapic_rdt_t	*irdt = (ioapic_rdt_t *)intrmap_data;
753 	msi_regs_t	*mregs = (msi_regs_t *)intrmap_data;
754 	intrmap_rte_t	irte;
755 	uint_t		idx, i;
756 	uint32_t	dst, sid_svt_sq;
757 	uchar_t		vector, dlm, tm, rh, dm;
758 
759 	if (intrmap_private == INTRMAP_DISABLE)
760 		return;
761 
762 	idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
763 	immu = INTRMAP_PRIVATE(intrmap_private)->ir_immu;
764 	iwp = &INTRMAP_PRIVATE(intrmap_private)->ir_inv_wait;
765 	intrmap = immu->immu_intrmap;
766 	sid_svt_sq = INTRMAP_PRIVATE(intrmap_private)->ir_sid_svt_sq;
767 
768 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
769 		dm = RDT_DM(irdt->ir_lo);
770 		rh = 0;
771 		tm = RDT_TM(irdt->ir_lo);
772 		dlm = RDT_DLM(irdt->ir_lo);
773 		dst = irdt->ir_hi;
774 
775 		/*
776 		 * Mark the IRTE's TM as Edge to suppress broadcast EOI.
777 		 */
778 		if (intrmap_suppress_brdcst_eoi) {
779 			tm = TRIGGER_MODE_EDGE;
780 		}
781 
782 		vector = RDT_VECTOR(irdt->ir_lo);
783 	} else {
784 		dm = MSI_ADDR_DM_PHYSICAL;
785 		rh = MSI_ADDR_RH_FIXED;
786 		tm = TRIGGER_MODE_EDGE;
787 		dlm = 0;
788 		dst = mregs->mr_addr;
789 
790 		vector = mregs->mr_data & 0xff;
791 	}
792 
793 	if (intrmap_apic_mode == LOCAL_APIC)
794 		dst = (dst & 0xFF) << 8;
795 
796 	if (count == 1) {
797 		irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
798 		irte.hi = IRTE_HIGH(sid_svt_sq);
799 
800 		/* set interrupt remapping table entry */
801 		bcopy(&irte, intrmap->intrmap_vaddr +
802 		    idx * INTRMAP_RTE_SIZE,
803 		    INTRMAP_RTE_SIZE);
804 
805 		immu_qinv_intr_one_cache(immu, idx, iwp);
806 
807 	} else {
808 		for (i = 0; i < count; i++) {
809 			irte.lo = IRTE_LOW(dst, vector, dlm, tm, rh, dm, 0, 1);
810 			irte.hi = IRTE_HIGH(sid_svt_sq);
811 
812 			/* set interrupt remapping table entry */
813 			bcopy(&irte, intrmap->intrmap_vaddr +
814 			    idx * INTRMAP_RTE_SIZE,
815 			    INTRMAP_RTE_SIZE);
816 			vector++;
817 			idx++;
818 		}
819 
820 		immu_qinv_intr_caches(immu, idx, count, iwp);
821 	}
822 }
823 
824 /* free the remapping entry */
825 static void
826 immu_intrmap_free(void **intrmap_privatep)
827 {
828 	immu_t *immu;
829 	immu_inv_wait_t *iwp;
830 	intrmap_t *intrmap;
831 	uint32_t idx;
832 
833 	if (*intrmap_privatep == INTRMAP_DISABLE || *intrmap_privatep == NULL) {
834 		*intrmap_privatep = NULL;
835 		return;
836 	}
837 
838 	immu = INTRMAP_PRIVATE(*intrmap_privatep)->ir_immu;
839 	iwp = &INTRMAP_PRIVATE(*intrmap_privatep)->ir_inv_wait;
840 	intrmap = immu->immu_intrmap;
841 	idx = INTRMAP_PRIVATE(*intrmap_privatep)->ir_idx;
842 
843 	bzero(intrmap->intrmap_vaddr + idx * INTRMAP_RTE_SIZE,
844 	    INTRMAP_RTE_SIZE);
845 
846 	immu_qinv_intr_one_cache(immu, idx, iwp);
847 
848 	mutex_enter(&intrmap->intrmap_lock);
849 	bitset_del(&intrmap->intrmap_map, idx);
850 	if (intrmap->intrmap_free == INTRMAP_IDX_FULL) {
851 		intrmap->intrmap_free = idx;
852 	}
853 	mutex_exit(&intrmap->intrmap_lock);
854 
855 	kmem_free(*intrmap_privatep, sizeof (intrmap_private_t));
856 	*intrmap_privatep = NULL;
857 }
858 
859 /* record the ioapic rdt entry */
860 static void
861 immu_intrmap_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
862 {
863 	uint32_t rdt_entry, tm, pol, idx, vector;
864 
865 	rdt_entry = irdt->ir_lo;
866 
867 	if (intrmap_private != INTRMAP_DISABLE && intrmap_private != NULL) {
868 		idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
869 		tm = RDT_TM(rdt_entry);
870 		pol = RDT_POL(rdt_entry);
871 		vector = RDT_VECTOR(rdt_entry);
872 		irdt->ir_lo = (tm << INTRMAP_IOAPIC_TM_SHIFT) |
873 		    (pol << INTRMAP_IOAPIC_POL_SHIFT) |
874 		    ((idx >> 15) << INTRMAP_IOAPIC_IDX15_SHIFT) |
875 		    vector;
876 		irdt->ir_hi = (idx << INTRMAP_IOAPIC_IDX_SHIFT) |
877 		    (1 << INTRMAP_IOAPIC_FORMAT_SHIFT);
878 	} else {
879 		irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
880 	}
881 }
882 
883 /* record the msi interrupt structure */
884 /*ARGSUSED*/
885 static void
886 immu_intrmap_msi(void *intrmap_private, msi_regs_t *mregs)
887 {
888 	uint_t	idx;
889 
890 	if (intrmap_private != INTRMAP_DISABLE && intrmap_private != NULL) {
891 		idx = INTRMAP_PRIVATE(intrmap_private)->ir_idx;
892 
893 		mregs->mr_data = 0;
894 		mregs->mr_addr = MSI_ADDR_HDR |
895 		    ((idx & 0x7fff) << INTRMAP_MSI_IDX_SHIFT) |
896 		    (1 << INTRMAP_MSI_FORMAT_SHIFT) |
897 		    (1 << INTRMAP_MSI_SHV_SHIFT) |
898 		    ((idx >> 15) << INTRMAP_MSI_IDX15_SHIFT);
899 	} else {
900 		mregs->mr_addr = MSI_ADDR_HDR |
901 		    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
902 		    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
903 		    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
904 		mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
905 		    mregs->mr_data;
906 	}
907 }
908 
909 /* ######################################################################### */
910 /*
911  * Functions exported by immu_intr.c
912  */
913 void
914 immu_intrmap_setup(list_t *listp)
915 {
916 	immu_t *immu;
917 
918 	/*
919 	 * Check if ACPI DMAR tables say that
920 	 * interrupt remapping is supported
921 	 */
922 	if (immu_dmar_intrmap_supported() == B_FALSE) {
923 		return;
924 	}
925 
926 	/*
927 	 * Check if interrupt remapping is disabled.
928 	 */
929 	if (immu_intrmap_enable == B_FALSE) {
930 		return;
931 	}
932 
933 	psm_vt_ops = &intrmap_ops;
934 
935 	immu = list_head(listp);
936 	for (; immu; immu = list_next(listp, immu)) {
937 		mutex_init(&(immu->immu_intrmap_lock), NULL,
938 		    MUTEX_DEFAULT, NULL);
939 		mutex_enter(&(immu->immu_intrmap_lock));
940 		immu_init_inv_wait(&immu->immu_intrmap_inv_wait,
941 		    "intrmapglobal", B_TRUE);
942 		immu->immu_intrmap_setup = B_TRUE;
943 		mutex_exit(&(immu->immu_intrmap_lock));
944 	}
945 }
946 
947 void
948 immu_intrmap_startup(immu_t *immu)
949 {
950 	/* do nothing */
951 	mutex_enter(&(immu->immu_intrmap_lock));
952 	if (immu->immu_intrmap_setup == B_TRUE) {
953 		immu->immu_intrmap_running = B_TRUE;
954 	}
955 	mutex_exit(&(immu->immu_intrmap_lock));
956 }
957 
958 /*
959  * Register a Intel IOMMU unit (i.e. DMAR unit's)
960  * interrupt handler
961  */
962 void
963 immu_intr_register(immu_t *immu)
964 {
965 	int irq, vect;
966 	char intr_handler_name[IMMU_MAXNAMELEN];
967 	uint32_t msi_data;
968 	uint32_t uaddr;
969 	uint32_t msi_addr;
970 	uint32_t localapic_id = 0;
971 
972 	if (psm_get_localapicid)
973 		localapic_id = psm_get_localapicid(0);
974 
975 	msi_addr = (MSI_ADDR_HDR |
976 	    ((localapic_id & 0xFF) << MSI_ADDR_DEST_SHIFT) |
977 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
978 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
979 
980 	if (intrmap_apic_mode == LOCAL_X2APIC) {
981 		uaddr = localapic_id & 0xFFFFFF00;
982 	} else {
983 		uaddr = 0;
984 	}
985 
986 	/* Dont need to hold immu_intr_lock since we are in boot */
987 	irq = vect = psm_get_ipivect(IMMU_INTR_IPL, -1);
988 	if (psm_xlate_vector_by_irq != NULL)
989 		vect = psm_xlate_vector_by_irq(irq);
990 
991 	msi_data = ((MSI_DATA_DELIVERY_FIXED <<
992 	    MSI_DATA_DELIVERY_SHIFT) | vect);
993 
994 	(void) snprintf(intr_handler_name, sizeof (intr_handler_name),
995 	    "%s-intr-handler", immu->immu_name);
996 
997 	(void) add_avintr((void *)NULL, IMMU_INTR_IPL,
998 	    (avfunc)(immu_intr_handler), intr_handler_name, irq,
999 	    (caddr_t)immu, NULL, NULL, NULL);
1000 
1001 	immu_regs_intr_enable(immu, msi_addr, msi_data, uaddr);
1002 
1003 	(void) immu_intr_handler(immu);
1004 }
1005