xref: /titanic_50/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision aecfc01d1bad84e66649703f7fc2926ef70b34ba)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * apic_introp.c:
30  *	Has code for Advanced DDI interrupt framework support.
31  */
32 
33 #include <sys/cpuvar.h>
34 #include <sys/psm.h>
35 #include <sys/archsystm.h>
36 #include <sys/apic.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/mach_intr.h>
40 #include <sys/sysmacros.h>
41 #include <sys/trap.h>
42 #include <sys/pci.h>
43 #include <sys/pci_intr_lib.h>
44 
45 extern struct av_head autovect[];
46 
47 /*
48  *	Local Function Prototypes
49  */
50 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
51 
52 /*
53  * MSI support flag:
54  * reflects whether MSI is supported at APIC level
55  * it can also be patched through /etc/system
56  *
57  *  0 = default value - don't know and need to call apic_check_msi_support()
58  *      to find out then set it accordingly
59  *  1 = supported
60  * -1 = not supported
61  */
62 int	apic_support_msi = 0;
63 
64 /* Multiple vector support for MSI */
65 int	apic_multi_msi_enable = 1;
66 int	apic_multi_msi_max = 2;
67 
68 /* Maximum no. of MSI-X vectors supported */
69 int	apic_msix_enable = 1;
70 int	apic_msix_max = 2;
71 
72 /*
73  * apic_pci_msi_enable_vector:
74  *	Set the address/data fields in the MSI/X capability structure
75  *	XXX: MSI-X support
76  */
77 /* ARGSUSED */
78 void
79 apic_pci_msi_enable_vector(dev_info_t *dip, int type, int inum, int vector,
80     int count, int target_apic_id)
81 {
82 	uint64_t		msi_addr, msi_data;
83 	ushort_t		msi_ctrl;
84 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
85 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
86 
87 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
88 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
89 	    ddi_driver_name(dip), inum, vector, target_apic_id));
90 
91 	ASSERT((handle != NULL) && (cap_ptr != 0));
92 
93 	/* MSI Address */
94 	msi_addr = (MSI_ADDR_HDR | (target_apic_id << MSI_ADDR_DEST_SHIFT));
95 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
96 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
97 
98 	/* MSI Data: MSI is edge triggered according to spec */
99 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
100 
101 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
102 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
103 
104 	if (type == DDI_INTR_TYPE_MSI) {
105 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
106 
107 		/* Set the bits to inform how many MSIs are enabled */
108 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
109 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
110 
111 		pci_config_put32(handle,
112 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
113 
114 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
115 			pci_config_put32(handle,
116 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
117 			pci_config_put16(handle,
118 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
119 		} else {
120 			pci_config_put16(handle,
121 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
122 		}
123 
124 	} else if (type == DDI_INTR_TYPE_MSIX) {
125 		uintptr_t	off;
126 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
127 
128 		/* Offset into the "inum"th entry in the MSI-X table */
129 		off = (uintptr_t)msix_p->msix_tbl_addr +
130 		    (inum  * PCI_MSIX_VECTOR_SIZE);
131 
132 		ddi_put32(msix_p->msix_tbl_hdl,
133 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
134 		ddi_put64(msix_p->msix_tbl_hdl,
135 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
136 	}
137 }
138 
139 
140 /*
141  * This function returns the no. of vectors available for the pri.
142  * dip is not used at this moment.  If we really don't need that,
143  * it will be removed.
144  */
145 /*ARGSUSED*/
146 int
147 apic_navail_vector(dev_info_t *dip, int pri)
148 {
149 	int	lowest, highest, i, navail, count;
150 
151 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
152 	    (void *)dip, pri));
153 
154 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
155 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
156 	navail = count = 0;
157 
158 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
159 		lowest -= APIC_VECTOR_PER_IPL;
160 
161 	/* It has to be contiguous */
162 	for (i = lowest; i < highest; i++) {
163 		count = 0;
164 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
165 		    (i < highest)) {
166 			if (APIC_CHECK_RESERVE_VECTORS(i))
167 				break;
168 			count++;
169 			i++;
170 		}
171 		if (count > navail)
172 			navail = count;
173 	}
174 	return (navail);
175 }
176 
177 /*
178  * Finds "count" contiguous MSI vectors starting at the proper alignment
179  * at "pri".
180  * Caller needs to make sure that count has to be power of 2 and should not
181  * be < 1.
182  */
183 uchar_t
184 apic_find_multi_vectors(int pri, int count)
185 {
186 	int	lowest, highest, i, navail, start, msibits;
187 
188 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
189 	    pri, count));
190 
191 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
192 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
193 	navail = 0;
194 
195 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
196 		lowest -= APIC_VECTOR_PER_IPL;
197 
198 	/*
199 	 * msibits is the no. of lower order message data bits for the
200 	 * allocated MSI vectors and is used to calculate the aligned
201 	 * starting vector
202 	 */
203 	msibits = count - 1;
204 
205 	/* It has to be contiguous */
206 	for (i = lowest; i < highest; i++) {
207 		navail = 0;
208 
209 		/*
210 		 * starting vector has to be aligned accordingly for
211 		 * multiple MSIs
212 		 */
213 		if (msibits)
214 			i = (i + msibits) & ~msibits;
215 		start = i;
216 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
217 		    (i < highest)) {
218 			if (APIC_CHECK_RESERVE_VECTORS(i))
219 				break;
220 			navail++;
221 			if (navail >= count)
222 				return (start);
223 			i++;
224 		}
225 	}
226 	return (0);
227 }
228 
229 
230 /*
231  * It finds the apic_irq_t associates with the dip, ispec and type.
232  */
233 apic_irq_t *
234 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
235 {
236 	apic_irq_t	*irqp;
237 	int i;
238 
239 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
240 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
241 	    ispec->intrspec_pri, type));
242 
243 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
244 		if ((irqp = apic_irq_table[i]) == NULL)
245 			continue;
246 		if ((irqp->airq_dip == dip) &&
247 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
248 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
249 			if (type == DDI_INTR_TYPE_MSI) {
250 				if (irqp->airq_mps_intr_index == MSI_INDEX)
251 					return (irqp);
252 			} else if (type == DDI_INTR_TYPE_MSIX) {
253 				if (irqp->airq_mps_intr_index == MSIX_INDEX)
254 					return (irqp);
255 			} else
256 				return (irqp);
257 		}
258 	}
259 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
260 	return (NULL);
261 }
262 
263 
264 #if !defined(__xpv)
265 
266 /*
267  * This function will return the pending bit of the irqp.
268  * It either comes from the IRR register of the APIC or the RDT
269  * entry of the I/O APIC.
270  * For the IRR to work, it needs to be to its binding CPU
271  */
272 static int
273 apic_get_pending(apic_irq_t *irqp, int type)
274 {
275 	int			bit, index, irr, pending;
276 	int			intin_no;
277 	int			apic_ix;
278 
279 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
280 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
281 	    type));
282 
283 	/* need to get on the bound cpu */
284 	mutex_enter(&cpu_lock);
285 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
286 
287 	index = irqp->airq_vector / 32;
288 	bit = irqp->airq_vector % 32;
289 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
290 
291 	affinity_clear();
292 	mutex_exit(&cpu_lock);
293 
294 	pending = (irr & (1 << bit)) ? 1 : 0;
295 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
296 		/* check I/O APIC for fixed interrupt */
297 		intin_no = irqp->airq_intin_no;
298 		apic_ix = irqp->airq_ioapicindex;
299 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
300 		    AV_PENDING) ? 1 : 0;
301 	}
302 	return (pending);
303 }
304 
305 
306 /*
307  * This function will clear the mask for the interrupt on the I/O APIC
308  */
309 static void
310 apic_clear_mask(apic_irq_t *irqp)
311 {
312 	int			intin_no;
313 	ulong_t			iflag;
314 	int32_t			rdt_entry;
315 	int 			apic_ix;
316 
317 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
318 	    (void *)irqp));
319 
320 	intin_no = irqp->airq_intin_no;
321 	apic_ix = irqp->airq_ioapicindex;
322 
323 	iflag = intr_clear();
324 	lock_set(&apic_ioapic_lock);
325 
326 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
327 
328 	/* clear mask */
329 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
330 	    ((~AV_MASK) & rdt_entry));
331 
332 	lock_clear(&apic_ioapic_lock);
333 	intr_restore(iflag);
334 }
335 
336 
337 /*
338  * This function will mask the interrupt on the I/O APIC
339  */
340 static void
341 apic_set_mask(apic_irq_t *irqp)
342 {
343 	int			intin_no;
344 	int 			apic_ix;
345 	ulong_t			iflag;
346 	int32_t			rdt_entry;
347 
348 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
349 
350 	intin_no = irqp->airq_intin_no;
351 	apic_ix = irqp->airq_ioapicindex;
352 
353 	iflag = intr_clear();
354 
355 	lock_set(&apic_ioapic_lock);
356 
357 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
358 
359 	/* mask it */
360 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
361 	    (AV_MASK | rdt_entry));
362 
363 	lock_clear(&apic_ioapic_lock);
364 	intr_restore(iflag);
365 }
366 
367 #endif	/* ! __xpv */
368 
369 void
370 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
371 {
372 	int i;
373 	apic_irq_t *irqptr;
374 	struct intrspec ispec;
375 
376 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
377 	    "count: %x pri: %x type: %x\n",
378 	    (void *)dip, inum, count, pri, type));
379 
380 	/* for MSI/X only */
381 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
382 		return;
383 
384 	for (i = 0; i < count; i++) {
385 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
386 		    "pri=0x%x count=0x%x\n", inum, pri, count));
387 		ispec.intrspec_vec = inum + i;
388 		ispec.intrspec_pri = pri;
389 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
390 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
391 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
392 			    "failed\n", (void *)dip, inum, pri));
393 			continue;
394 		}
395 		irqptr->airq_mps_intr_index = FREE_INDEX;
396 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
397 	}
398 }
399 
400 
401 /*
402  * check whether the system supports MSI
403  *
404  * If PCI-E capability is found, then this must be a PCI-E system.
405  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
406  * to indicate this system supports MSI.
407  */
408 int
409 apic_check_msi_support()
410 {
411 	dev_info_t *cdip;
412 	char dev_type[16];
413 	int dev_len;
414 
415 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
416 
417 	/*
418 	 * check whether the first level children of root_node have
419 	 * PCI-E capability
420 	 */
421 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
422 	    cdip = ddi_get_next_sibling(cdip)) {
423 
424 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
425 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
426 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
427 		    ddi_node_name(cdip)));
428 		dev_len = sizeof (dev_type);
429 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
430 		    "device_type", (caddr_t)dev_type, &dev_len)
431 		    != DDI_PROP_SUCCESS)
432 			continue;
433 		if (strcmp(dev_type, "pciex") == 0)
434 			return (PSM_SUCCESS);
435 	}
436 
437 	/* MSI is not supported on this system */
438 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
439 	    "device_type found\n"));
440 	return (PSM_FAILURE);
441 }
442 
443 /*
444  * apic_pci_msi_unconfigure:
445  *
446  * This and next two interfaces are copied from pci_intr_lib.c
447  * Do ensure that these two files stay in sync.
448  * These needed to be copied over here to avoid a deadlock situation on
449  * certain mp systems that use MSI interrupts.
450  *
451  * IMPORTANT regards next three interfaces:
452  * i) are called only for MSI/X interrupts.
453  * ii) called with interrupts disabled, and must not block
454  */
455 void
456 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
457 {
458 	ushort_t		msi_ctrl;
459 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
460 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
461 
462 	ASSERT((handle != NULL) && (cap_ptr != 0));
463 
464 	if (type == DDI_INTR_TYPE_MSI) {
465 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
466 		msi_ctrl &= (~PCI_MSI_MME_MASK);
467 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
468 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
469 
470 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
471 			pci_config_put16(handle,
472 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
473 			pci_config_put32(handle,
474 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
475 		} else {
476 			pci_config_put16(handle,
477 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
478 		}
479 
480 	} else if (type == DDI_INTR_TYPE_MSIX) {
481 		uintptr_t	off;
482 		uint32_t	mask;
483 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
484 
485 		/* Offset into "inum"th entry in the MSI-X table & mask it */
486 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
487 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
488 
489 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
490 
491 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
492 
493 		/* Offset into the "inum"th entry in the MSI-X table */
494 		off = (uintptr_t)msix_p->msix_tbl_addr +
495 		    (inum * PCI_MSIX_VECTOR_SIZE);
496 
497 		/* Reset the "data" and "addr" bits */
498 		ddi_put32(msix_p->msix_tbl_hdl,
499 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
500 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
501 	}
502 }
503 
504 
505 /*
506  * apic_pci_msi_enable_mode:
507  */
508 void
509 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
510 {
511 	ushort_t		msi_ctrl;
512 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
513 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
514 
515 	ASSERT((handle != NULL) && (cap_ptr != 0));
516 
517 	if (type == DDI_INTR_TYPE_MSI) {
518 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
519 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
520 			return;
521 
522 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
523 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
524 
525 	} else if (type == DDI_INTR_TYPE_MSIX) {
526 		uintptr_t	off;
527 		uint32_t	mask;
528 		ddi_intr_msix_t	*msix_p;
529 
530 		msix_p = i_ddi_get_msix(rdip);
531 
532 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
533 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
534 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
535 
536 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
537 
538 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
539 
540 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
541 
542 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
543 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
544 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
545 			    msi_ctrl);
546 		}
547 	}
548 }
549 
550 /*
551  * apic_pci_msi_disable_mode:
552  */
553 void
554 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
555 {
556 	ushort_t		msi_ctrl;
557 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
558 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
559 
560 	ASSERT((handle != NULL) && (cap_ptr != 0));
561 
562 	if (type == DDI_INTR_TYPE_MSI) {
563 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
564 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
565 			return;
566 
567 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
568 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
569 
570 	} else if (type == DDI_INTR_TYPE_MSIX) {
571 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
572 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
573 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
574 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
575 			    msi_ctrl);
576 		}
577 	}
578 }
579 
580 #if !defined(__xpv)
581 
582 static int
583 apic_set_cpu(uint32_t vector, int cpu, int *result)
584 {
585 	apic_irq_t *irqp;
586 	ulong_t iflag;
587 	int ret;
588 
589 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
590 
591 	/* Convert the vector to the irq using vector_to_irq table. */
592 	mutex_enter(&airq_mutex);
593 	irqp = apic_irq_table[apic_vector_to_irq[vector]];
594 	mutex_exit(&airq_mutex);
595 
596 	if (irqp == NULL) {
597 		*result = ENXIO;
598 		return (PSM_FAILURE);
599 	}
600 
601 	/* Fail if this is an MSI intr and is part of a group. */
602 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
603 	    (irqp->airq_intin_no > 1)) {
604 		*result = ENXIO;
605 		return (PSM_FAILURE);
606 	}
607 
608 	iflag = intr_clear();
609 	lock_set(&apic_ioapic_lock);
610 
611 	ret = apic_rebind_all(irqp, cpu);
612 
613 	lock_clear(&apic_ioapic_lock);
614 	intr_restore(iflag);
615 
616 	if (ret) {
617 		*result = EIO;
618 		return (PSM_FAILURE);
619 	}
620 	*result = 0;
621 	return (PSM_SUCCESS);
622 }
623 
624 static int
625 apic_grp_set_cpu(uint32_t vector, int new_cpu, int *result)
626 {
627 	dev_info_t *orig_dip;
628 	uint32_t orig_cpu;
629 	ulong_t iflag;
630 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
631 	int i;
632 	int cap_ptr;
633 	int msi_mask_off;
634 	ushort_t msi_ctrl;
635 	uint32_t msi_pvm;
636 	ddi_acc_handle_t handle;
637 	int num_vectors = 0;
638 
639 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
640 
641 	/*
642 	 * Take mutex to insure that table doesn't change out from underneath
643 	 * us while we're playing with it.
644 	 */
645 	mutex_enter(&airq_mutex);
646 	irqps[0] = apic_irq_table[apic_vector_to_irq[vector]];
647 	orig_cpu = irqps[0]->airq_temp_cpu;
648 	orig_dip = irqps[0]->airq_dip;
649 	num_vectors = irqps[0]->airq_intin_no;
650 
651 	/* A "group" of 1 */
652 	if (num_vectors == 1) {
653 		mutex_exit(&airq_mutex);
654 		return (apic_set_cpu(vector, new_cpu, result));
655 	}
656 
657 	*result = ENXIO;
658 
659 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
660 		mutex_exit(&airq_mutex);
661 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
662 		goto set_grp_intr_done;
663 	}
664 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
665 		mutex_exit(&airq_mutex);
666 		DDI_INTR_IMPLDBG((CE_CONT,
667 		    "set_grp: base vec not part of a grp or not aligned: "
668 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
669 		goto set_grp_intr_done;
670 	}
671 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
672 	    num_vectors));
673 
674 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
675 
676 	*result = EIO;
677 
678 	/*
679 	 * All IRQ entries in the table for the given device will be not
680 	 * shared.  Since they are not shared, the dip in the table will
681 	 * be true to the device of interest.
682 	 */
683 	for (i = 1; i < num_vectors; i++) {
684 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
685 		if (irqps[i] == NULL) {
686 			mutex_exit(&airq_mutex);
687 			goto set_grp_intr_done;
688 		}
689 #ifdef DEBUG
690 		/* Sanity check: CPU and dip is the same for all entries. */
691 		if ((irqps[i]->airq_dip != orig_dip) ||
692 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
693 			mutex_exit(&airq_mutex);
694 			DDI_INTR_IMPLDBG((CE_CONT,
695 			    "set_grp: cpu or dip for vec 0x%x difft than for "
696 			    "vec 0x%x\n", vector, vector + i));
697 			DDI_INTR_IMPLDBG((CE_CONT,
698 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
699 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
700 			    (void *)irqps[i]->airq_dip));
701 			goto set_grp_intr_done;
702 		}
703 #endif /* DEBUG */
704 	}
705 	mutex_exit(&airq_mutex);
706 
707 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
708 	handle = i_ddi_get_pci_config_handle(orig_dip);
709 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
710 
711 	/* MSI Per vector masking is supported. */
712 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
713 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
714 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
715 		else
716 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
717 		msi_pvm = pci_config_get32(handle, msi_mask_off);
718 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
719 		DDI_INTR_IMPLDBG((CE_CONT,
720 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
721 		    pci_config_get32(handle, msi_mask_off)));
722 	}
723 
724 	iflag = intr_clear();
725 	lock_set(&apic_ioapic_lock);
726 
727 	/*
728 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
729 	 * an error if the CPU is not accepting interrupts.  If the first one
730 	 * succeeds they all will.
731 	 */
732 	if (apic_rebind_all(irqps[0], new_cpu))
733 		(void) apic_rebind_all(irqps[0], orig_cpu);
734 	else {
735 		for (i = 1; i < num_vectors; i++)
736 			(void) apic_rebind_all(irqps[i], new_cpu);
737 		*result = 0;	/* SUCCESS */
738 	}
739 
740 	lock_clear(&apic_ioapic_lock);
741 	intr_restore(iflag);
742 
743 	/* Reenable vectors if per vector masking is supported. */
744 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
745 		pci_config_put32(handle, msi_mask_off, msi_pvm);
746 		DDI_INTR_IMPLDBG((CE_CONT,
747 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
748 		    pci_config_get32(handle, msi_mask_off)));
749 	}
750 
751 set_grp_intr_done:
752 	if (*result != 0)
753 		return (PSM_FAILURE);
754 
755 	return (PSM_SUCCESS);
756 }
757 
758 #endif	/* !__xpv */
759 
760 int
761 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
762 {
763 	struct autovec *av_dev;
764 	uchar_t irqno;
765 	int i;
766 	apic_irq_t *irq_p;
767 
768 	/* Sanity check the vector/irq argument. */
769 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
770 
771 	mutex_enter(&airq_mutex);
772 
773 	/*
774 	 * Convert the vecirq arg to an irq using vector_to_irq table
775 	 * if the arg is a vector.  Pass thru if already an irq.
776 	 */
777 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
778 	    PSMGI_INTRBY_VEC)
779 		irqno = apic_vector_to_irq[vecirq];
780 	else
781 		irqno = vecirq;
782 
783 	irq_p = apic_irq_table[irqno];
784 
785 	if ((irq_p == NULL) ||
786 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
787 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
788 		mutex_exit(&airq_mutex);
789 		return (PSM_FAILURE);
790 	}
791 
792 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
793 
794 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
795 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
796 
797 		/* Return user bound info for intrd. */
798 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
799 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
800 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
801 		}
802 	}
803 
804 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
805 		intr_params_p->avgi_vector = irq_p->airq_vector;
806 
807 	if (intr_params_p->avgi_req_flags &
808 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
809 		/* Get number of devices from apic_irq table shared field. */
810 		intr_params_p->avgi_num_devs = irq_p->airq_share;
811 
812 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
813 
814 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
815 
816 		/* Some devices have NULL dip.  Don't count these. */
817 		if (intr_params_p->avgi_num_devs > 0) {
818 			for (i = 0, av_dev = autovect[irqno].avh_link;
819 			    av_dev; av_dev = av_dev->av_link)
820 				if (av_dev->av_vector && av_dev->av_dip)
821 					i++;
822 			intr_params_p->avgi_num_devs =
823 			    MIN(intr_params_p->avgi_num_devs, i);
824 		}
825 
826 		/* There are no viable dips to return. */
827 		if (intr_params_p->avgi_num_devs == 0)
828 			intr_params_p->avgi_dip_list = NULL;
829 
830 		else {	/* Return list of dips */
831 
832 			/* Allocate space in array for that number of devs. */
833 			intr_params_p->avgi_dip_list = kmem_zalloc(
834 			    intr_params_p->avgi_num_devs *
835 			    sizeof (dev_info_t *),
836 			    KM_SLEEP);
837 
838 			/*
839 			 * Loop through the device list of the autovec table
840 			 * filling in the dip array.
841 			 *
842 			 * Note that the autovect table may have some special
843 			 * entries which contain NULL dips.  These will be
844 			 * ignored.
845 			 */
846 			for (i = 0, av_dev = autovect[irqno].avh_link;
847 			    av_dev; av_dev = av_dev->av_link)
848 				if (av_dev->av_vector && av_dev->av_dip)
849 					intr_params_p->avgi_dip_list[i++] =
850 					    av_dev->av_dip;
851 		}
852 	}
853 
854 	mutex_exit(&airq_mutex);
855 
856 	return (PSM_SUCCESS);
857 }
858 
859 
860 #if !defined(__xpv)
861 
862 /*
863  * This function provides external interface to the nexus for all
864  * functionalities related to the new DDI interrupt framework.
865  *
866  * Input:
867  * dip     - pointer to the dev_info structure of the requested device
868  * hdlp    - pointer to the internal interrupt handle structure for the
869  *	     requested interrupt
870  * intr_op - opcode for this call
871  * result  - pointer to the integer that will hold the result to be
872  *	     passed back if return value is PSM_SUCCESS
873  *
874  * Output:
875  * return value is either PSM_SUCCESS or PSM_FAILURE
876  */
877 int
878 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
879     psm_intr_op_t intr_op, int *result)
880 {
881 	int		cap;
882 	int		count_vec;
883 	int		old_priority;
884 	int		new_priority;
885 	int		new_cpu;
886 	apic_irq_t	*irqp;
887 	struct intrspec *ispec, intr_spec;
888 
889 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
890 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
891 
892 	ispec = &intr_spec;
893 	ispec->intrspec_pri = hdlp->ih_pri;
894 	ispec->intrspec_vec = hdlp->ih_inum;
895 	ispec->intrspec_func = hdlp->ih_cb_func;
896 
897 	switch (intr_op) {
898 	case PSM_INTR_OP_CHECK_MSI:
899 		/*
900 		 * Check MSI/X is supported or not at APIC level and
901 		 * masked off the MSI/X bits in hdlp->ih_type if not
902 		 * supported before return.  If MSI/X is supported,
903 		 * leave the ih_type unchanged and return.
904 		 *
905 		 * hdlp->ih_type passed in from the nexus has all the
906 		 * interrupt types supported by the device.
907 		 */
908 		if (apic_support_msi == 0) {
909 			/*
910 			 * if apic_support_msi is not set, call
911 			 * apic_check_msi_support() to check whether msi
912 			 * is supported first
913 			 */
914 			if (apic_check_msi_support() == PSM_SUCCESS)
915 				apic_support_msi = 1;
916 			else
917 				apic_support_msi = -1;
918 		}
919 		if (apic_support_msi == 1) {
920 			if (apic_msix_enable)
921 				*result = hdlp->ih_type;
922 			else
923 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
924 		} else
925 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
926 			    DDI_INTR_TYPE_MSIX);
927 		break;
928 	case PSM_INTR_OP_ALLOC_VECTORS:
929 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
930 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
931 			    hdlp->ih_scratch1, hdlp->ih_pri,
932 			    (int)(uintptr_t)hdlp->ih_scratch2);
933 		else
934 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
935 			    hdlp->ih_scratch1, hdlp->ih_pri,
936 			    (int)(uintptr_t)hdlp->ih_scratch2);
937 		break;
938 	case PSM_INTR_OP_FREE_VECTORS:
939 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
940 		    hdlp->ih_pri, hdlp->ih_type);
941 		break;
942 	case PSM_INTR_OP_NAVAIL_VECTORS:
943 		*result = apic_navail_vector(dip, hdlp->ih_pri);
944 		break;
945 	case PSM_INTR_OP_XLATE_VECTOR:
946 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
947 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
948 		break;
949 	case PSM_INTR_OP_GET_PENDING:
950 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
951 			return (PSM_FAILURE);
952 		*result = apic_get_pending(irqp, hdlp->ih_type);
953 		break;
954 	case PSM_INTR_OP_CLEAR_MASK:
955 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
956 			return (PSM_FAILURE);
957 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
958 		if (irqp == NULL)
959 			return (PSM_FAILURE);
960 		apic_clear_mask(irqp);
961 		break;
962 	case PSM_INTR_OP_SET_MASK:
963 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
964 			return (PSM_FAILURE);
965 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
966 			return (PSM_FAILURE);
967 		apic_set_mask(irqp);
968 		break;
969 	case PSM_INTR_OP_GET_CAP:
970 		cap = DDI_INTR_FLAG_PENDING;
971 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
972 			cap |= DDI_INTR_FLAG_MASKABLE;
973 		*result = cap;
974 		break;
975 	case PSM_INTR_OP_GET_SHARED:
976 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
977 			return (PSM_FAILURE);
978 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
979 			return (PSM_FAILURE);
980 		*result = irqp->airq_share ? 1: 0;
981 		break;
982 	case PSM_INTR_OP_SET_PRI:
983 		old_priority = hdlp->ih_pri;	/* save old value */
984 		new_priority = *(int *)result;	/* try the new value */
985 
986 		/* First, check if "hdlp->ih_scratch1" vectors exist? */
987 		if (apic_navail_vector(dip, new_priority) < hdlp->ih_scratch1)
988 			return (PSM_FAILURE);
989 
990 		/* Now allocate the vectors */
991 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
992 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
993 			    hdlp->ih_scratch1, new_priority,
994 			    DDI_INTR_ALLOC_STRICT);
995 		else
996 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
997 			    hdlp->ih_scratch1, new_priority,
998 			    DDI_INTR_ALLOC_STRICT);
999 
1000 		/* Did we get new vectors? */
1001 		if (!count_vec)
1002 			return (PSM_FAILURE);
1003 
1004 		/* Finally, free the previously allocated vectors */
1005 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
1006 		    old_priority, hdlp->ih_type);
1007 		hdlp->ih_pri = new_priority; /* set the new value */
1008 		break;
1009 	case PSM_INTR_OP_SET_CPU:
1010 	case PSM_INTR_OP_GRP_SET_CPU:
1011 		/*
1012 		 * The interrupt handle given here has been allocated
1013 		 * specifically for this command, and ih_private carries
1014 		 * a CPU value.
1015 		 */
1016 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1017 		if (!apic_cpu_in_range(new_cpu)) {
1018 			DDI_INTR_IMPLDBG((CE_CONT,
1019 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1020 			*result = EINVAL;
1021 			return (PSM_FAILURE);
1022 		}
1023 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1024 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1025 			    PSM_SUCCESS)
1026 				return (PSM_FAILURE);
1027 		} else {
1028 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1029 			    result) != PSM_SUCCESS)
1030 				return (PSM_FAILURE);
1031 		}
1032 		break;
1033 	case PSM_INTR_OP_GET_INTR:
1034 		/*
1035 		 * The interrupt handle given here has been allocated
1036 		 * specifically for this command, and ih_private carries
1037 		 * a pointer to a apic_get_intr_t.
1038 		 */
1039 		if (apic_get_vector_intr_info(
1040 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1041 			return (PSM_FAILURE);
1042 		break;
1043 	case PSM_INTR_OP_APIC_TYPE:
1044 		hdlp->ih_private = apic_get_apic_type();
1045 		hdlp->ih_ver = apic_get_apic_version();
1046 		break;
1047 	case PSM_INTR_OP_SET_CAP:
1048 	default:
1049 		return (PSM_FAILURE);
1050 	}
1051 	return (PSM_SUCCESS);
1052 }
1053 #endif	/* !__xpv */
1054