xref: /illumos-gate/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision 99114ab6663dd12ed5ff3c0da58de645e7ebaff4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * apic_introp.c:
28  *	Has code for Advanced DDI interrupt framework support.
29  */
30 
31 #include <sys/cpuvar.h>
32 #include <sys/psm.h>
33 #include <sys/archsystm.h>
34 #include <sys/apic.h>
35 #include <sys/sunddi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/mach_intr.h>
38 #include <sys/sysmacros.h>
39 #include <sys/trap.h>
40 #include <sys/pci.h>
41 #include <sys/pci_intr_lib.h>
42 
43 extern struct av_head autovect[];
44 
45 /*
46  *	Local Function Prototypes
47  */
48 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
49 
50 /*
51  * MSI support flag:
52  * reflects whether MSI is supported at APIC level
53  * it can also be patched through /etc/system
54  *
55  *  0 = default value - don't know and need to call apic_check_msi_support()
56  *      to find out then set it accordingly
57  *  1 = supported
58  * -1 = not supported
59  */
60 int	apic_support_msi = 0;
61 
62 /* Multiple vector support for MSI */
63 #if !defined(__xpv)
64 int	apic_multi_msi_enable = 1;
65 #else
66 /*
67  * Xen hypervisor does not seem to properly support multi-MSI
68  */
69 int	apic_multi_msi_enable = 0;
70 #endif	/* __xpv */
71 
72 /* Multiple vector support for MSI-X */
73 int	apic_msix_enable = 1;
74 
75 /*
76  * apic_pci_msi_enable_vector:
77  *	Set the address/data fields in the MSI/X capability structure
78  *	XXX: MSI-X support
79  */
80 /* ARGSUSED */
81 void
82 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
83     int count, int target_apic_id)
84 {
85 	uint64_t		msi_addr, msi_data;
86 	ushort_t		msi_ctrl;
87 	dev_info_t		*dip = irq_ptr->airq_dip;
88 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
89 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
90 #if !defined(__xpv)
91 	msi_regs_t		msi_regs;
92 #endif	/* ! __xpv */
93 
94 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
95 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
96 	    ddi_driver_name(dip), inum, vector, target_apic_id));
97 
98 	ASSERT((handle != NULL) && (cap_ptr != 0));
99 
100 #if !defined(__xpv)
101 	msi_regs.mr_data = vector;
102 	msi_regs.mr_addr = target_apic_id;
103 
104 	apic_vt_ops->apic_intrr_alloc_entry(irq_ptr);
105 	apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&msi_regs);
106 	apic_vt_ops->apic_intrr_record_msi(irq_ptr, &msi_regs);
107 
108 	/* MSI Address */
109 	msi_addr = msi_regs.mr_addr;
110 
111 	/* MSI Data: MSI is edge triggered according to spec */
112 	msi_data = msi_regs.mr_data;
113 #else
114 	/* MSI Address */
115 	msi_addr = (MSI_ADDR_HDR |
116 	    (target_apic_id << MSI_ADDR_DEST_SHIFT));
117 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
118 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
119 
120 	/* MSI Data: MSI is edge triggered according to spec */
121 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
122 #endif	/* ! __xpv */
123 
124 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
125 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
126 
127 	if (type == DDI_INTR_TYPE_MSI) {
128 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
129 
130 		/* Set the bits to inform how many MSIs are enabled */
131 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
132 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
133 
134 #if !defined(__xpv)
135 		/*
136 		 * Only set vector if not on hypervisor
137 		 */
138 		pci_config_put32(handle,
139 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
140 
141 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
142 			pci_config_put32(handle,
143 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
144 			pci_config_put16(handle,
145 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
146 		} else {
147 			pci_config_put16(handle,
148 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
149 		}
150 
151 	} else if (type == DDI_INTR_TYPE_MSIX) {
152 		uintptr_t	off;
153 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
154 
155 		/* Offset into the "inum"th entry in the MSI-X table */
156 		off = (uintptr_t)msix_p->msix_tbl_addr +
157 		    (inum  * PCI_MSIX_VECTOR_SIZE);
158 
159 		ddi_put32(msix_p->msix_tbl_hdl,
160 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
161 		ddi_put64(msix_p->msix_tbl_hdl,
162 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
163 #endif	/* ! __xpv */
164 	}
165 }
166 
167 
168 #if !defined(__xpv)
169 
170 /*
171  * This function returns the no. of vectors available for the pri.
172  * dip is not used at this moment.  If we really don't need that,
173  * it will be removed.
174  */
175 /*ARGSUSED*/
176 int
177 apic_navail_vector(dev_info_t *dip, int pri)
178 {
179 	int	lowest, highest, i, navail, count;
180 
181 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
182 	    (void *)dip, pri));
183 
184 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
185 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
186 	navail = count = 0;
187 
188 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
189 		lowest -= APIC_VECTOR_PER_IPL;
190 
191 	/* It has to be contiguous */
192 	for (i = lowest; i < highest; i++) {
193 		count = 0;
194 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
195 		    (i < highest)) {
196 			if (APIC_CHECK_RESERVE_VECTORS(i))
197 				break;
198 			count++;
199 			i++;
200 		}
201 		if (count > navail)
202 			navail = count;
203 	}
204 	return (navail);
205 }
206 
207 #endif	/* ! __xpv */
208 
209 /*
210  * Finds "count" contiguous MSI vectors starting at the proper alignment
211  * at "pri".
212  * Caller needs to make sure that count has to be power of 2 and should not
213  * be < 1.
214  */
215 uchar_t
216 apic_find_multi_vectors(int pri, int count)
217 {
218 	int	lowest, highest, i, navail, start, msibits;
219 
220 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
221 	    pri, count));
222 
223 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
224 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
225 	navail = 0;
226 
227 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
228 		lowest -= APIC_VECTOR_PER_IPL;
229 
230 	/*
231 	 * msibits is the no. of lower order message data bits for the
232 	 * allocated MSI vectors and is used to calculate the aligned
233 	 * starting vector
234 	 */
235 	msibits = count - 1;
236 
237 	/* It has to be contiguous */
238 	for (i = lowest; i < highest; i++) {
239 		navail = 0;
240 
241 		/*
242 		 * starting vector has to be aligned accordingly for
243 		 * multiple MSIs
244 		 */
245 		if (msibits)
246 			i = (i + msibits) & ~msibits;
247 		start = i;
248 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
249 		    (i < highest)) {
250 			if (APIC_CHECK_RESERVE_VECTORS(i))
251 				break;
252 			navail++;
253 			if (navail >= count)
254 				return (start);
255 			i++;
256 		}
257 	}
258 	return (0);
259 }
260 
261 
262 /*
263  * It finds the apic_irq_t associates with the dip, ispec and type.
264  */
265 apic_irq_t *
266 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
267 {
268 	apic_irq_t	*irqp;
269 	int i;
270 
271 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
272 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
273 	    ispec->intrspec_pri, type));
274 
275 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
276 		if ((irqp = apic_irq_table[i]) == NULL)
277 			continue;
278 		if ((irqp->airq_dip == dip) &&
279 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
280 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
281 			if (type == DDI_INTR_TYPE_MSI) {
282 				if (irqp->airq_mps_intr_index == MSI_INDEX)
283 					return (irqp);
284 			} else if (type == DDI_INTR_TYPE_MSIX) {
285 				if (irqp->airq_mps_intr_index == MSIX_INDEX)
286 					return (irqp);
287 			} else
288 				return (irqp);
289 		}
290 	}
291 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
292 	return (NULL);
293 }
294 
295 
296 #if !defined(__xpv)
297 
298 /*
299  * This function will return the pending bit of the irqp.
300  * It either comes from the IRR register of the APIC or the RDT
301  * entry of the I/O APIC.
302  * For the IRR to work, it needs to be to its binding CPU
303  */
304 static int
305 apic_get_pending(apic_irq_t *irqp, int type)
306 {
307 	int			bit, index, irr, pending;
308 	int			intin_no;
309 	int			apic_ix;
310 
311 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
312 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
313 	    type));
314 
315 	/* need to get on the bound cpu */
316 	mutex_enter(&cpu_lock);
317 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
318 
319 	index = irqp->airq_vector / 32;
320 	bit = irqp->airq_vector % 32;
321 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
322 
323 	affinity_clear();
324 	mutex_exit(&cpu_lock);
325 
326 	pending = (irr & (1 << bit)) ? 1 : 0;
327 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
328 		/* check I/O APIC for fixed interrupt */
329 		intin_no = irqp->airq_intin_no;
330 		apic_ix = irqp->airq_ioapicindex;
331 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
332 		    AV_PENDING) ? 1 : 0;
333 	}
334 	return (pending);
335 }
336 
337 
338 /*
339  * This function will clear the mask for the interrupt on the I/O APIC
340  */
341 static void
342 apic_clear_mask(apic_irq_t *irqp)
343 {
344 	int			intin_no;
345 	ulong_t			iflag;
346 	int32_t			rdt_entry;
347 	int 			apic_ix;
348 
349 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
350 	    (void *)irqp));
351 
352 	intin_no = irqp->airq_intin_no;
353 	apic_ix = irqp->airq_ioapicindex;
354 
355 	iflag = intr_clear();
356 	lock_set(&apic_ioapic_lock);
357 
358 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
359 
360 	/* clear mask */
361 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
362 	    ((~AV_MASK) & rdt_entry));
363 
364 	lock_clear(&apic_ioapic_lock);
365 	intr_restore(iflag);
366 }
367 
368 
369 /*
370  * This function will mask the interrupt on the I/O APIC
371  */
372 static void
373 apic_set_mask(apic_irq_t *irqp)
374 {
375 	int			intin_no;
376 	int 			apic_ix;
377 	ulong_t			iflag;
378 	int32_t			rdt_entry;
379 
380 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
381 
382 	intin_no = irqp->airq_intin_no;
383 	apic_ix = irqp->airq_ioapicindex;
384 
385 	iflag = intr_clear();
386 
387 	lock_set(&apic_ioapic_lock);
388 
389 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
390 
391 	/* mask it */
392 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
393 	    (AV_MASK | rdt_entry));
394 
395 	lock_clear(&apic_ioapic_lock);
396 	intr_restore(iflag);
397 }
398 
399 
400 void
401 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
402 {
403 	int i;
404 	apic_irq_t *irqptr;
405 	struct intrspec ispec;
406 
407 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
408 	    "count: %x pri: %x type: %x\n",
409 	    (void *)dip, inum, count, pri, type));
410 
411 	/* for MSI/X only */
412 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
413 		return;
414 
415 	for (i = 0; i < count; i++) {
416 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
417 		    "pri=0x%x count=0x%x\n", inum, pri, count));
418 		ispec.intrspec_vec = inum + i;
419 		ispec.intrspec_pri = pri;
420 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
421 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
422 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
423 			    "failed\n", (void *)dip, inum, pri));
424 			continue;
425 		}
426 		irqptr->airq_mps_intr_index = FREE_INDEX;
427 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
428 	}
429 }
430 
431 #endif	/* ! __xpv */
432 
433 /*
434  * check whether the system supports MSI
435  *
436  * If PCI-E capability is found, then this must be a PCI-E system.
437  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
438  * to indicate this system supports MSI.
439  */
440 int
441 apic_check_msi_support()
442 {
443 	dev_info_t *cdip;
444 	char dev_type[16];
445 	int dev_len;
446 
447 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
448 
449 	/*
450 	 * check whether the first level children of root_node have
451 	 * PCI-E capability
452 	 */
453 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
454 	    cdip = ddi_get_next_sibling(cdip)) {
455 
456 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
457 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
458 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
459 		    ddi_node_name(cdip)));
460 		dev_len = sizeof (dev_type);
461 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
462 		    "device_type", (caddr_t)dev_type, &dev_len)
463 		    != DDI_PROP_SUCCESS)
464 			continue;
465 		if (strcmp(dev_type, "pciex") == 0)
466 			return (PSM_SUCCESS);
467 	}
468 
469 	/* MSI is not supported on this system */
470 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
471 	    "device_type found\n"));
472 	return (PSM_FAILURE);
473 }
474 
475 #if !defined(__xpv)
476 
477 /*
478  * apic_pci_msi_unconfigure:
479  *
480  * This and next two interfaces are copied from pci_intr_lib.c
481  * Do ensure that these two files stay in sync.
482  * These needed to be copied over here to avoid a deadlock situation on
483  * certain mp systems that use MSI interrupts.
484  *
485  * IMPORTANT regards next three interfaces:
486  * i) are called only for MSI/X interrupts.
487  * ii) called with interrupts disabled, and must not block
488  */
489 void
490 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
491 {
492 	ushort_t		msi_ctrl;
493 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
494 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
495 
496 	ASSERT((handle != NULL) && (cap_ptr != 0));
497 
498 	if (type == DDI_INTR_TYPE_MSI) {
499 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
500 		msi_ctrl &= (~PCI_MSI_MME_MASK);
501 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
502 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
503 
504 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
505 			pci_config_put16(handle,
506 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
507 			pci_config_put32(handle,
508 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
509 		} else {
510 			pci_config_put16(handle,
511 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
512 		}
513 
514 	} else if (type == DDI_INTR_TYPE_MSIX) {
515 		uintptr_t	off;
516 		uint32_t	mask;
517 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
518 
519 		/* Offset into "inum"th entry in the MSI-X table & mask it */
520 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
521 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
522 
523 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
524 
525 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
526 
527 		/* Offset into the "inum"th entry in the MSI-X table */
528 		off = (uintptr_t)msix_p->msix_tbl_addr +
529 		    (inum * PCI_MSIX_VECTOR_SIZE);
530 
531 		/* Reset the "data" and "addr" bits */
532 		ddi_put32(msix_p->msix_tbl_hdl,
533 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
534 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
535 	}
536 }
537 
538 
539 /*
540  * apic_pci_msi_enable_mode:
541  */
542 void
543 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
544 {
545 	ushort_t		msi_ctrl;
546 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
547 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
548 
549 	ASSERT((handle != NULL) && (cap_ptr != 0));
550 
551 	if (type == DDI_INTR_TYPE_MSI) {
552 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
553 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
554 			return;
555 
556 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
557 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
558 
559 	} else if (type == DDI_INTR_TYPE_MSIX) {
560 		uintptr_t	off;
561 		uint32_t	mask;
562 		ddi_intr_msix_t	*msix_p;
563 
564 		msix_p = i_ddi_get_msix(rdip);
565 
566 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
567 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
568 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
569 
570 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
571 
572 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
573 
574 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
575 
576 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
577 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
578 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
579 			    msi_ctrl);
580 		}
581 	}
582 }
583 
584 /*
585  * apic_pci_msi_disable_mode:
586  */
587 void
588 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
589 {
590 	ushort_t		msi_ctrl;
591 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
592 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
593 
594 	ASSERT((handle != NULL) && (cap_ptr != 0));
595 
596 	if (type == DDI_INTR_TYPE_MSI) {
597 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
598 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
599 			return;
600 
601 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
602 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
603 
604 	} else if (type == DDI_INTR_TYPE_MSIX) {
605 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
606 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
607 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
608 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
609 			    msi_ctrl);
610 		}
611 	}
612 }
613 
614 
615 static int
616 apic_set_cpu(int irqno, int cpu, int *result)
617 {
618 	apic_irq_t *irqp;
619 	ulong_t iflag;
620 	int ret;
621 
622 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
623 
624 	mutex_enter(&airq_mutex);
625 	irqp = apic_irq_table[irqno];
626 	mutex_exit(&airq_mutex);
627 
628 	if (irqp == NULL) {
629 		*result = ENXIO;
630 		return (PSM_FAILURE);
631 	}
632 
633 	/* Fail if this is an MSI intr and is part of a group. */
634 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
635 	    (irqp->airq_intin_no > 1)) {
636 		*result = ENXIO;
637 		return (PSM_FAILURE);
638 	}
639 
640 	iflag = intr_clear();
641 	lock_set(&apic_ioapic_lock);
642 
643 	ret = apic_rebind_all(irqp, cpu);
644 
645 	lock_clear(&apic_ioapic_lock);
646 	intr_restore(iflag);
647 
648 	if (ret) {
649 		*result = EIO;
650 		return (PSM_FAILURE);
651 	}
652 	/*
653 	 * keep tracking the default interrupt cpu binding
654 	 */
655 	irqp->airq_cpu = cpu;
656 
657 	*result = 0;
658 	return (PSM_SUCCESS);
659 }
660 
661 static int
662 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
663 {
664 	dev_info_t *orig_dip;
665 	uint32_t orig_cpu;
666 	ulong_t iflag;
667 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
668 	int i;
669 	int cap_ptr;
670 	int msi_mask_off;
671 	ushort_t msi_ctrl;
672 	uint32_t msi_pvm;
673 	ddi_acc_handle_t handle;
674 	int num_vectors = 0;
675 	uint32_t vector;
676 
677 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
678 
679 	/*
680 	 * Take mutex to insure that table doesn't change out from underneath
681 	 * us while we're playing with it.
682 	 */
683 	mutex_enter(&airq_mutex);
684 	irqps[0] = apic_irq_table[irqno];
685 	orig_cpu = irqps[0]->airq_temp_cpu;
686 	orig_dip = irqps[0]->airq_dip;
687 	num_vectors = irqps[0]->airq_intin_no;
688 	vector = irqps[0]->airq_vector;
689 
690 	/* A "group" of 1 */
691 	if (num_vectors == 1) {
692 		mutex_exit(&airq_mutex);
693 		return (apic_set_cpu(irqno, new_cpu, result));
694 	}
695 
696 	*result = ENXIO;
697 
698 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
699 		mutex_exit(&airq_mutex);
700 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
701 		goto set_grp_intr_done;
702 	}
703 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
704 		mutex_exit(&airq_mutex);
705 		DDI_INTR_IMPLDBG((CE_CONT,
706 		    "set_grp: base vec not part of a grp or not aligned: "
707 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
708 		goto set_grp_intr_done;
709 	}
710 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
711 	    num_vectors));
712 
713 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
714 
715 	*result = EIO;
716 
717 	/*
718 	 * All IRQ entries in the table for the given device will be not
719 	 * shared.  Since they are not shared, the dip in the table will
720 	 * be true to the device of interest.
721 	 */
722 	for (i = 1; i < num_vectors; i++) {
723 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
724 		if (irqps[i] == NULL) {
725 			mutex_exit(&airq_mutex);
726 			goto set_grp_intr_done;
727 		}
728 #ifdef DEBUG
729 		/* Sanity check: CPU and dip is the same for all entries. */
730 		if ((irqps[i]->airq_dip != orig_dip) ||
731 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
732 			mutex_exit(&airq_mutex);
733 			DDI_INTR_IMPLDBG((CE_CONT,
734 			    "set_grp: cpu or dip for vec 0x%x difft than for "
735 			    "vec 0x%x\n", vector, vector + i));
736 			DDI_INTR_IMPLDBG((CE_CONT,
737 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
738 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
739 			    (void *)irqps[i]->airq_dip));
740 			goto set_grp_intr_done;
741 		}
742 #endif /* DEBUG */
743 	}
744 	mutex_exit(&airq_mutex);
745 
746 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
747 	handle = i_ddi_get_pci_config_handle(orig_dip);
748 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
749 
750 	/* MSI Per vector masking is supported. */
751 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
752 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
753 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
754 		else
755 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
756 		msi_pvm = pci_config_get32(handle, msi_mask_off);
757 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
758 		DDI_INTR_IMPLDBG((CE_CONT,
759 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
760 		    pci_config_get32(handle, msi_mask_off)));
761 	}
762 
763 	iflag = intr_clear();
764 	lock_set(&apic_ioapic_lock);
765 
766 	/*
767 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
768 	 * an error if the CPU is not accepting interrupts.  If the first one
769 	 * succeeds they all will.
770 	 */
771 	if (apic_rebind_all(irqps[0], new_cpu))
772 		(void) apic_rebind_all(irqps[0], orig_cpu);
773 	else {
774 		irqps[0]->airq_cpu = new_cpu;
775 
776 		for (i = 1; i < num_vectors; i++) {
777 			(void) apic_rebind_all(irqps[i], new_cpu);
778 			irqps[i]->airq_cpu = new_cpu;
779 		}
780 		*result = 0;	/* SUCCESS */
781 	}
782 
783 	lock_clear(&apic_ioapic_lock);
784 	intr_restore(iflag);
785 
786 	/* Reenable vectors if per vector masking is supported. */
787 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
788 		pci_config_put32(handle, msi_mask_off, msi_pvm);
789 		DDI_INTR_IMPLDBG((CE_CONT,
790 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
791 		    pci_config_get32(handle, msi_mask_off)));
792 	}
793 
794 set_grp_intr_done:
795 	if (*result != 0)
796 		return (PSM_FAILURE);
797 
798 	return (PSM_SUCCESS);
799 }
800 
801 #else	/* !__xpv */
802 
803 /*
804  * We let the hypervisor deal with msi configutation
805  * so just stub these out.
806  */
807 
808 /* ARGSUSED */
809 void
810 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
811 {
812 }
813 
814 /* ARGSUSED */
815 void
816 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
817 {
818 }
819 
820 /* ARGSUSED */
821 void
822 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
823 {
824 }
825 
826 #endif	/* __xpv */
827 
828 int
829 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
830 {
831 	struct autovec *av_dev;
832 	uchar_t irqno;
833 	int i;
834 	apic_irq_t *irq_p;
835 
836 	/* Sanity check the vector/irq argument. */
837 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
838 
839 	mutex_enter(&airq_mutex);
840 
841 	/*
842 	 * Convert the vecirq arg to an irq using vector_to_irq table
843 	 * if the arg is a vector.  Pass thru if already an irq.
844 	 */
845 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
846 	    PSMGI_INTRBY_VEC)
847 		irqno = apic_vector_to_irq[vecirq];
848 	else
849 		irqno = vecirq;
850 
851 	irq_p = apic_irq_table[irqno];
852 
853 	if ((irq_p == NULL) ||
854 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
855 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
856 		mutex_exit(&airq_mutex);
857 		return (PSM_FAILURE);
858 	}
859 
860 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
861 
862 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
863 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
864 
865 		/* Return user bound info for intrd. */
866 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
867 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
868 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
869 		}
870 	}
871 
872 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
873 		intr_params_p->avgi_vector = irq_p->airq_vector;
874 
875 	if (intr_params_p->avgi_req_flags &
876 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
877 		/* Get number of devices from apic_irq table shared field. */
878 		intr_params_p->avgi_num_devs = irq_p->airq_share;
879 
880 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
881 
882 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
883 
884 		/* Some devices have NULL dip.  Don't count these. */
885 		if (intr_params_p->avgi_num_devs > 0) {
886 			for (i = 0, av_dev = autovect[irqno].avh_link;
887 			    av_dev; av_dev = av_dev->av_link)
888 				if (av_dev->av_vector && av_dev->av_dip)
889 					i++;
890 			intr_params_p->avgi_num_devs =
891 			    MIN(intr_params_p->avgi_num_devs, i);
892 		}
893 
894 		/* There are no viable dips to return. */
895 		if (intr_params_p->avgi_num_devs == 0)
896 			intr_params_p->avgi_dip_list = NULL;
897 
898 		else {	/* Return list of dips */
899 
900 			/* Allocate space in array for that number of devs. */
901 			intr_params_p->avgi_dip_list = kmem_zalloc(
902 			    intr_params_p->avgi_num_devs *
903 			    sizeof (dev_info_t *),
904 			    KM_SLEEP);
905 
906 			/*
907 			 * Loop through the device list of the autovec table
908 			 * filling in the dip array.
909 			 *
910 			 * Note that the autovect table may have some special
911 			 * entries which contain NULL dips.  These will be
912 			 * ignored.
913 			 */
914 			for (i = 0, av_dev = autovect[irqno].avh_link;
915 			    av_dev; av_dev = av_dev->av_link)
916 				if (av_dev->av_vector && av_dev->av_dip)
917 					intr_params_p->avgi_dip_list[i++] =
918 					    av_dev->av_dip;
919 		}
920 	}
921 
922 	mutex_exit(&airq_mutex);
923 
924 	return (PSM_SUCCESS);
925 }
926 
927 
928 #if !defined(__xpv)
929 
930 /*
931  * This function provides external interface to the nexus for all
932  * functionalities related to the new DDI interrupt framework.
933  *
934  * Input:
935  * dip     - pointer to the dev_info structure of the requested device
936  * hdlp    - pointer to the internal interrupt handle structure for the
937  *	     requested interrupt
938  * intr_op - opcode for this call
939  * result  - pointer to the integer that will hold the result to be
940  *	     passed back if return value is PSM_SUCCESS
941  *
942  * Output:
943  * return value is either PSM_SUCCESS or PSM_FAILURE
944  */
945 int
946 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
947     psm_intr_op_t intr_op, int *result)
948 {
949 	int		cap;
950 	int		count_vec;
951 	int		old_priority;
952 	int		new_priority;
953 	int		new_cpu;
954 	apic_irq_t	*irqp;
955 	struct intrspec *ispec, intr_spec;
956 
957 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
958 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
959 
960 	ispec = &intr_spec;
961 	ispec->intrspec_pri = hdlp->ih_pri;
962 	ispec->intrspec_vec = hdlp->ih_inum;
963 	ispec->intrspec_func = hdlp->ih_cb_func;
964 
965 	switch (intr_op) {
966 	case PSM_INTR_OP_CHECK_MSI:
967 		/*
968 		 * Check MSI/X is supported or not at APIC level and
969 		 * masked off the MSI/X bits in hdlp->ih_type if not
970 		 * supported before return.  If MSI/X is supported,
971 		 * leave the ih_type unchanged and return.
972 		 *
973 		 * hdlp->ih_type passed in from the nexus has all the
974 		 * interrupt types supported by the device.
975 		 */
976 		if (apic_support_msi == 0) {
977 			/*
978 			 * if apic_support_msi is not set, call
979 			 * apic_check_msi_support() to check whether msi
980 			 * is supported first
981 			 */
982 			if (apic_check_msi_support() == PSM_SUCCESS)
983 				apic_support_msi = 1;
984 			else
985 				apic_support_msi = -1;
986 		}
987 		if (apic_support_msi == 1) {
988 			if (apic_msix_enable)
989 				*result = hdlp->ih_type;
990 			else
991 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
992 		} else
993 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
994 			    DDI_INTR_TYPE_MSIX);
995 		break;
996 	case PSM_INTR_OP_ALLOC_VECTORS:
997 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
998 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
999 			    hdlp->ih_scratch1, hdlp->ih_pri,
1000 			    (int)(uintptr_t)hdlp->ih_scratch2);
1001 		else
1002 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
1003 			    hdlp->ih_scratch1, hdlp->ih_pri,
1004 			    (int)(uintptr_t)hdlp->ih_scratch2);
1005 		break;
1006 	case PSM_INTR_OP_FREE_VECTORS:
1007 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1008 		    hdlp->ih_pri, hdlp->ih_type);
1009 		break;
1010 	case PSM_INTR_OP_NAVAIL_VECTORS:
1011 		*result = apic_navail_vector(dip, hdlp->ih_pri);
1012 		break;
1013 	case PSM_INTR_OP_XLATE_VECTOR:
1014 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1015 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
1016 		break;
1017 	case PSM_INTR_OP_GET_PENDING:
1018 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1019 			return (PSM_FAILURE);
1020 		*result = apic_get_pending(irqp, hdlp->ih_type);
1021 		break;
1022 	case PSM_INTR_OP_CLEAR_MASK:
1023 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1024 			return (PSM_FAILURE);
1025 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
1026 		if (irqp == NULL)
1027 			return (PSM_FAILURE);
1028 		apic_clear_mask(irqp);
1029 		break;
1030 	case PSM_INTR_OP_SET_MASK:
1031 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1032 			return (PSM_FAILURE);
1033 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1034 			return (PSM_FAILURE);
1035 		apic_set_mask(irqp);
1036 		break;
1037 	case PSM_INTR_OP_GET_CAP:
1038 		cap = DDI_INTR_FLAG_PENDING;
1039 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1040 			cap |= DDI_INTR_FLAG_MASKABLE;
1041 		else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX)
1042 			cap |= DDI_INTR_FLAG_RETARGETABLE;
1043 		*result = cap;
1044 		break;
1045 	case PSM_INTR_OP_GET_SHARED:
1046 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1047 			return (PSM_FAILURE);
1048 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1049 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1050 			return (PSM_FAILURE);
1051 		*result = (irqp->airq_share > 1) ? 1: 0;
1052 		break;
1053 	case PSM_INTR_OP_SET_PRI:
1054 		old_priority = hdlp->ih_pri;	/* save old value */
1055 		new_priority = *(int *)result;	/* try the new value */
1056 
1057 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1058 			return (PSM_SUCCESS);
1059 		}
1060 
1061 		/* Now allocate the vectors */
1062 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI) {
1063 			/* SET_PRI does not support the case of multiple MSI */
1064 			if (i_ddi_intr_get_current_nintrs(hdlp->ih_dip) > 1)
1065 				return (PSM_FAILURE);
1066 
1067 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
1068 			    1, new_priority,
1069 			    DDI_INTR_ALLOC_STRICT);
1070 		} else {
1071 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
1072 			    1, new_priority,
1073 			    DDI_INTR_ALLOC_STRICT);
1074 		}
1075 
1076 		/* Did we get new vectors? */
1077 		if (!count_vec)
1078 			return (PSM_FAILURE);
1079 
1080 		/* Finally, free the previously allocated vectors */
1081 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
1082 		    old_priority, hdlp->ih_type);
1083 		break;
1084 	case PSM_INTR_OP_SET_CPU:
1085 	case PSM_INTR_OP_GRP_SET_CPU:
1086 		/*
1087 		 * The interrupt handle given here has been allocated
1088 		 * specifically for this command, and ih_private carries
1089 		 * a CPU value.
1090 		 */
1091 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1092 		if (!apic_cpu_in_range(new_cpu)) {
1093 			DDI_INTR_IMPLDBG((CE_CONT,
1094 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1095 			*result = EINVAL;
1096 			return (PSM_FAILURE);
1097 		}
1098 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
1099 			DDI_INTR_IMPLDBG((CE_CONT,
1100 			    "[grp_]set_cpu: vector out of range: %d\n",
1101 			    hdlp->ih_vector));
1102 			*result = EINVAL;
1103 			return (PSM_FAILURE);
1104 		}
1105 		if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ))
1106 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
1107 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1108 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1109 			    PSM_SUCCESS)
1110 				return (PSM_FAILURE);
1111 		} else {
1112 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1113 			    result) != PSM_SUCCESS)
1114 				return (PSM_FAILURE);
1115 		}
1116 		break;
1117 	case PSM_INTR_OP_GET_INTR:
1118 		/*
1119 		 * The interrupt handle given here has been allocated
1120 		 * specifically for this command, and ih_private carries
1121 		 * a pointer to a apic_get_intr_t.
1122 		 */
1123 		if (apic_get_vector_intr_info(
1124 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1125 			return (PSM_FAILURE);
1126 		break;
1127 	case PSM_INTR_OP_APIC_TYPE:
1128 		hdlp->ih_private = apic_get_apic_type();
1129 		hdlp->ih_ver = apic_get_apic_version();
1130 		break;
1131 	case PSM_INTR_OP_SET_CAP:
1132 	default:
1133 		return (PSM_FAILURE);
1134 	}
1135 	return (PSM_SUCCESS);
1136 }
1137 #endif	/* !__xpv */
1138