xref: /titanic_50/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision 5e3e415a5bedb7bb9ff36b225630e414258e6ece)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * apic_introp.c:
30  *	Has code for Advanced DDI interrupt framework support.
31  */
32 
33 #include <sys/cpuvar.h>
34 #include <sys/psm.h>
35 #include <sys/archsystm.h>
36 #include <sys/apic.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/mach_intr.h>
40 #include <sys/sysmacros.h>
41 #include <sys/trap.h>
42 #include <sys/pci.h>
43 #include <sys/pci_intr_lib.h>
44 
45 extern struct av_head autovect[];
46 
47 /*
48  *	Local Function Prototypes
49  */
50 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
51 static int	apic_get_pending(apic_irq_t *, int);
52 static void	apic_clear_mask(apic_irq_t *);
53 static void	apic_set_mask(apic_irq_t *);
54 
55 /*
56  * MSI support flag:
57  * reflects whether MSI is supported at APIC level
58  * it can also be patched through /etc/system
59  *
60  *  0 = default value - don't know and need to call apic_check_msi_support()
61  *      to find out then set it accordingly
62  *  1 = supported
63  * -1 = not supported
64  */
65 int	apic_support_msi = 0;
66 
67 /* Multiple vector support for MSI */
68 int	apic_multi_msi_enable = 1;
69 int	apic_multi_msi_max = 2;
70 
71 /* Maximum no. of MSI-X vectors supported */
72 int	apic_msix_enable = 1;
73 int	apic_msix_max = 2;
74 
75 /*
76  * apic_pci_msi_enable_vector:
77  *	Set the address/data fields in the MSI/X capability structure
78  *	XXX: MSI-X support
79  */
80 /* ARGSUSED */
81 void
82 apic_pci_msi_enable_vector(dev_info_t *dip, int type, int inum, int vector,
83     int count, int target_apic_id)
84 {
85 	uint64_t		msi_addr, msi_data;
86 	ushort_t		msi_ctrl;
87 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
88 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
89 
90 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
91 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
92 	    ddi_driver_name(dip), inum, vector, target_apic_id));
93 
94 	ASSERT((handle != NULL) && (cap_ptr != 0));
95 
96 	/* MSI Address */
97 	msi_addr = (MSI_ADDR_HDR | (target_apic_id << MSI_ADDR_DEST_SHIFT));
98 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
99 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
100 
101 	/* MSI Data: MSI is edge triggered according to spec */
102 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
103 
104 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
105 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
106 
107 	if (type == DDI_INTR_TYPE_MSI) {
108 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
109 
110 		/* Set the bits to inform how many MSIs are enabled */
111 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
112 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
113 
114 		pci_config_put32(handle,
115 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
116 
117 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
118 			pci_config_put32(handle,
119 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
120 			pci_config_put16(handle,
121 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
122 		} else {
123 			pci_config_put16(handle,
124 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
125 		}
126 
127 	} else if (type == DDI_INTR_TYPE_MSIX) {
128 		uintptr_t	off;
129 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
130 
131 		/* Offset into the "inum"th entry in the MSI-X table */
132 		off = (uintptr_t)msix_p->msix_tbl_addr +
133 		    (inum  * PCI_MSIX_VECTOR_SIZE);
134 
135 		ddi_put32(msix_p->msix_tbl_hdl,
136 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
137 		ddi_put64(msix_p->msix_tbl_hdl,
138 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
139 	}
140 }
141 
142 
143 /*
144  * This function returns the no. of vectors available for the pri.
145  * dip is not used at this moment.  If we really don't need that,
146  * it will be removed.
147  */
148 /*ARGSUSED*/
149 int
150 apic_navail_vector(dev_info_t *dip, int pri)
151 {
152 	int	lowest, highest, i, navail, count;
153 
154 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
155 	    (void *)dip, pri));
156 
157 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
158 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
159 	navail = count = 0;
160 
161 	/* It has to be contiguous */
162 	for (i = lowest; i < highest; i++) {
163 		count = 0;
164 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
165 		    (i < highest)) {
166 			if (APIC_CHECK_RESERVE_VECTORS(i))
167 				break;
168 			count++;
169 			i++;
170 		}
171 		if (count > navail)
172 			navail = count;
173 	}
174 	return (navail);
175 }
176 
177 /*
178  * Finds "count" contiguous MSI vectors starting at the proper alignment
179  * at "pri".
180  * Caller needs to make sure that count has to be power of 2 and should not
181  * be < 1.
182  */
183 uchar_t
184 apic_find_multi_vectors(int pri, int count)
185 {
186 	int	lowest, highest, i, navail, start, msibits;
187 
188 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
189 	    pri, count));
190 
191 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
192 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
193 	navail = 0;
194 
195 	/*
196 	 * msibits is the no. of lower order message data bits for the
197 	 * allocated MSI vectors and is used to calculate the aligned
198 	 * starting vector
199 	 */
200 	msibits = count - 1;
201 
202 	/* It has to be contiguous */
203 	for (i = lowest; i < highest; i++) {
204 		navail = 0;
205 
206 		/*
207 		 * starting vector has to be aligned accordingly for
208 		 * multiple MSIs
209 		 */
210 		if (msibits)
211 			i = (i + msibits) & ~msibits;
212 		start = i;
213 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
214 		    (i < highest)) {
215 			if (APIC_CHECK_RESERVE_VECTORS(i))
216 				break;
217 			navail++;
218 			if (navail >= count)
219 				return (start);
220 			i++;
221 		}
222 	}
223 	return (0);
224 }
225 
226 
227 /*
228  * It finds the apic_irq_t associates with the dip, ispec and type.
229  */
230 apic_irq_t *
231 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
232 {
233 	apic_irq_t	*irqp;
234 	int i;
235 
236 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
237 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
238 	    ispec->intrspec_pri, type));
239 
240 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
241 		if ((irqp = apic_irq_table[i]) == NULL)
242 			continue;
243 		if ((irqp->airq_dip == dip) &&
244 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
245 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
246 			if (type == DDI_INTR_TYPE_MSI) {
247 				if (irqp->airq_mps_intr_index == MSI_INDEX)
248 					return (irqp);
249 			} else if (type == DDI_INTR_TYPE_MSIX) {
250 				if (irqp->airq_mps_intr_index == MSIX_INDEX)
251 					return (irqp);
252 			} else
253 				return (irqp);
254 		}
255 	}
256 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
257 	return (NULL);
258 }
259 
260 
261 /*
262  * This function will return the pending bit of the irqp.
263  * It either comes from the IRR register of the APIC or the RDT
264  * entry of the I/O APIC.
265  * For the IRR to work, it needs to be to its binding CPU
266  */
267 static int
268 apic_get_pending(apic_irq_t *irqp, int type)
269 {
270 	int			bit, index, irr, pending;
271 	int			intin_no;
272 	int			apic_ix;
273 
274 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
275 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
276 	    type));
277 
278 	/* need to get on the bound cpu */
279 	mutex_enter(&cpu_lock);
280 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
281 
282 	index = irqp->airq_vector / 32;
283 	bit = irqp->airq_vector % 32;
284 	irr = apicadr[APIC_IRR_REG + index];
285 
286 	affinity_clear();
287 	mutex_exit(&cpu_lock);
288 
289 	pending = (irr & (1 << bit)) ? 1 : 0;
290 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
291 		/* check I/O APIC for fixed interrupt */
292 		intin_no = irqp->airq_intin_no;
293 		apic_ix = irqp->airq_ioapicindex;
294 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
295 		    AV_PENDING) ? 1 : 0;
296 	}
297 	return (pending);
298 }
299 
300 
301 /*
302  * This function will clear the mask for the interrupt on the I/O APIC
303  */
304 static void
305 apic_clear_mask(apic_irq_t *irqp)
306 {
307 	int			intin_no;
308 	ulong_t			iflag;
309 	int32_t			rdt_entry;
310 	int 			apic_ix;
311 
312 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
313 	    (void *)irqp));
314 
315 	intin_no = irqp->airq_intin_no;
316 	apic_ix = irqp->airq_ioapicindex;
317 
318 	iflag = intr_clear();
319 	lock_set(&apic_ioapic_lock);
320 
321 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
322 
323 	/* clear mask */
324 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
325 	    ((~AV_MASK) & rdt_entry));
326 
327 	lock_clear(&apic_ioapic_lock);
328 	intr_restore(iflag);
329 }
330 
331 
332 /*
333  * This function will mask the interrupt on the I/O APIC
334  */
335 static void
336 apic_set_mask(apic_irq_t *irqp)
337 {
338 	int			intin_no;
339 	int 			apic_ix;
340 	ulong_t			iflag;
341 	int32_t			rdt_entry;
342 
343 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
344 
345 	intin_no = irqp->airq_intin_no;
346 	apic_ix = irqp->airq_ioapicindex;
347 
348 	iflag = intr_clear();
349 
350 	lock_set(&apic_ioapic_lock);
351 
352 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
353 
354 	/* mask it */
355 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
356 	    (AV_MASK | rdt_entry));
357 
358 	lock_clear(&apic_ioapic_lock);
359 	intr_restore(iflag);
360 }
361 
362 
363 void
364 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
365 {
366 	int i;
367 	apic_irq_t *irqptr;
368 	struct intrspec ispec;
369 
370 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
371 	    "count: %x pri: %x type: %x\n",
372 	    (void *)dip, inum, count, pri, type));
373 
374 	/* for MSI/X only */
375 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
376 		return;
377 
378 	for (i = 0; i < count; i++) {
379 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
380 		    "pri=0x%x count=0x%x\n", inum, pri, count));
381 		ispec.intrspec_vec = inum + i;
382 		ispec.intrspec_pri = pri;
383 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
384 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
385 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
386 			    "failed\n", (void *)dip, inum, pri));
387 			continue;
388 		}
389 		irqptr->airq_mps_intr_index = FREE_INDEX;
390 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
391 	}
392 }
393 
394 
395 /*
396  * check whether the system supports MSI
397  *
398  * If PCI-E capability is found, then this must be a PCI-E system.
399  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
400  * to indicate this system supports MSI.
401  */
402 int
403 apic_check_msi_support()
404 {
405 	dev_info_t *cdip;
406 	char dev_type[16];
407 	int dev_len;
408 
409 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
410 
411 	/*
412 	 * check whether the first level children of root_node have
413 	 * PCI-E capability
414 	 */
415 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
416 	    cdip = ddi_get_next_sibling(cdip)) {
417 
418 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
419 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
420 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
421 		    ddi_node_name(cdip)));
422 		dev_len = sizeof (dev_type);
423 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
424 		    "device_type", (caddr_t)dev_type, &dev_len)
425 		    != DDI_PROP_SUCCESS)
426 			continue;
427 		if (strcmp(dev_type, "pciex") == 0)
428 			return (PSM_SUCCESS);
429 	}
430 
431 	/* MSI is not supported on this system */
432 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
433 	    "device_type found\n"));
434 	return (PSM_FAILURE);
435 }
436 
437 /*
438  * apic_pci_msi_unconfigure:
439  *
440  * This and next two interfaces are copied from pci_intr_lib.c
441  * Do ensure that these two files stay in sync.
442  * These needed to be copied over here to avoid a deadlock situation on
443  * certain mp systems that use MSI interrupts.
444  *
445  * IMPORTANT regards next three interfaces:
446  * i) are called only for MSI/X interrupts.
447  * ii) called with interrupts disabled, and must not block
448  */
449 void
450 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
451 {
452 	ushort_t		msi_ctrl;
453 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
454 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
455 
456 	ASSERT((handle != NULL) && (cap_ptr != 0));
457 
458 	if (type == DDI_INTR_TYPE_MSI) {
459 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
460 		msi_ctrl &= (~PCI_MSI_MME_MASK);
461 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
462 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
463 
464 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
465 			pci_config_put16(handle,
466 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
467 			pci_config_put32(handle,
468 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
469 		} else {
470 			pci_config_put16(handle,
471 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
472 		}
473 
474 	} else if (type == DDI_INTR_TYPE_MSIX) {
475 		uintptr_t	off;
476 		uint32_t	mask;
477 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
478 
479 		/* Offset into "inum"th entry in the MSI-X table & mask it */
480 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
481 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
482 
483 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
484 
485 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
486 
487 		/* Offset into the "inum"th entry in the MSI-X table */
488 		off = (uintptr_t)msix_p->msix_tbl_addr +
489 		    (inum * PCI_MSIX_VECTOR_SIZE);
490 
491 		/* Reset the "data" and "addr" bits */
492 		ddi_put32(msix_p->msix_tbl_hdl,
493 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
494 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
495 	}
496 }
497 
498 
499 /*
500  * apic_pci_msi_enable_mode:
501  */
502 void
503 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
504 {
505 	ushort_t		msi_ctrl;
506 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
507 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
508 
509 	ASSERT((handle != NULL) && (cap_ptr != 0));
510 
511 	if (type == DDI_INTR_TYPE_MSI) {
512 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
513 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
514 			return;
515 
516 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
517 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
518 
519 	} else if (type == DDI_INTR_TYPE_MSIX) {
520 		uintptr_t	off;
521 		uint32_t	mask;
522 		ddi_intr_msix_t	*msix_p;
523 
524 		msix_p = i_ddi_get_msix(rdip);
525 
526 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
527 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
528 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
529 
530 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
531 
532 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
533 
534 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
535 
536 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
537 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
538 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
539 			    msi_ctrl);
540 		}
541 	}
542 }
543 
544 /*
545  * apic_pci_msi_disable_mode:
546  */
547 void
548 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
549 {
550 	ushort_t		msi_ctrl;
551 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
552 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
553 
554 	ASSERT((handle != NULL) && (cap_ptr != 0));
555 
556 	if (type == DDI_INTR_TYPE_MSI) {
557 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
558 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
559 			return;
560 
561 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
562 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
563 
564 	} else if (type == DDI_INTR_TYPE_MSIX) {
565 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
566 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
567 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
568 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
569 			    msi_ctrl);
570 		}
571 	}
572 }
573 
574 static int
575 apic_set_cpu(uint32_t vector, int cpu, int *result)
576 {
577 	apic_irq_t *irqp;
578 	int iflag;
579 	int ret;
580 
581 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
582 
583 	/* Convert the vector to the irq using vector_to_irq table. */
584 	mutex_enter(&airq_mutex);
585 	irqp = apic_irq_table[apic_vector_to_irq[vector]];
586 	mutex_exit(&airq_mutex);
587 
588 	if (irqp == NULL) {
589 		*result = ENXIO;
590 		return (PSM_FAILURE);
591 	}
592 
593 	/* Fail if this is an MSI intr and is part of a group. */
594 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
595 	    (irqp->airq_intin_no > 1)) {
596 		*result = ENXIO;
597 		return (PSM_FAILURE);
598 	}
599 
600 	iflag = intr_clear();
601 	lock_set(&apic_ioapic_lock);
602 
603 	ret = apic_rebind_all(irqp, cpu);
604 
605 	lock_clear(&apic_ioapic_lock);
606 	intr_restore(iflag);
607 
608 	if (ret) {
609 		*result = EIO;
610 		return (PSM_FAILURE);
611 	}
612 	*result = 0;
613 	return (PSM_SUCCESS);
614 }
615 
616 static int
617 apic_grp_set_cpu(uint32_t vector, int new_cpu, int *result)
618 {
619 	dev_info_t *orig_dip;
620 	uchar_t orig_cpu;
621 	int iflag;
622 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
623 	int i;
624 	int cap_ptr;
625 	int msi_mask_off;
626 	ushort_t msi_ctrl;
627 	uint32_t msi_pvm;
628 	ddi_acc_handle_t handle;
629 	int num_vectors = 0;
630 
631 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
632 
633 	/*
634 	 * Take mutex to insure that table doesn't change out from underneath
635 	 * us while we're playing with it.
636 	 */
637 	mutex_enter(&airq_mutex);
638 	irqps[0] = apic_irq_table[apic_vector_to_irq[vector]];
639 	orig_cpu = irqps[0]->airq_temp_cpu;
640 	orig_dip = irqps[0]->airq_dip;
641 	num_vectors = irqps[0]->airq_intin_no;
642 
643 	/* A "group" of 1 */
644 	if (num_vectors == 1) {
645 		mutex_exit(&airq_mutex);
646 		return (apic_set_cpu(vector, new_cpu, result));
647 	}
648 
649 	*result = ENXIO;
650 
651 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
652 		mutex_exit(&airq_mutex);
653 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
654 		goto set_grp_intr_done;
655 	}
656 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
657 		mutex_exit(&airq_mutex);
658 		DDI_INTR_IMPLDBG((CE_CONT,
659 		    "set_grp: base vec not part of a grp or not aligned: "
660 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
661 		goto set_grp_intr_done;
662 	}
663 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
664 	    num_vectors));
665 
666 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
667 
668 	*result = EIO;
669 
670 	/*
671 	 * All IRQ entries in the table for the given device will be not
672 	 * shared.  Since they are not shared, the dip in the table will
673 	 * be true to the device of interest.
674 	 */
675 	for (i = 1; i < num_vectors; i++) {
676 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
677 		if (irqps[i] == NULL) {
678 			mutex_exit(&airq_mutex);
679 			goto set_grp_intr_done;
680 		}
681 #ifdef DEBUG
682 		/* Sanity check: CPU and dip is the same for all entries. */
683 		if ((irqps[i]->airq_dip != orig_dip) ||
684 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
685 			mutex_exit(&airq_mutex);
686 			DDI_INTR_IMPLDBG((CE_CONT,
687 			    "set_grp: cpu or dip for vec 0x%x difft than for "
688 			    "vec 0x%x\n", vector, vector + i));
689 			DDI_INTR_IMPLDBG((CE_CONT,
690 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
691 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
692 			    (void *)irqps[i]->airq_dip));
693 			goto set_grp_intr_done;
694 		}
695 #endif /* DEBUG */
696 	}
697 	mutex_exit(&airq_mutex);
698 
699 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
700 	handle = i_ddi_get_pci_config_handle(orig_dip);
701 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
702 
703 	/* MSI Per vector masking is supported. */
704 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
705 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
706 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
707 		else
708 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
709 		msi_pvm = pci_config_get32(handle, msi_mask_off);
710 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
711 		DDI_INTR_IMPLDBG((CE_CONT,
712 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
713 		    pci_config_get32(handle, msi_mask_off)));
714 	}
715 
716 	iflag = intr_clear();
717 	lock_set(&apic_ioapic_lock);
718 
719 	/*
720 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
721 	 * an error if the CPU is not accepting interrupts.  If the first one
722 	 * succeeds they all will.
723 	 */
724 	if (apic_rebind_all(irqps[0], new_cpu))
725 		(void) apic_rebind_all(irqps[0], orig_cpu);
726 	else {
727 		for (i = 1; i < num_vectors; i++)
728 			(void) apic_rebind_all(irqps[i], new_cpu);
729 		*result = 0;	/* SUCCESS */
730 	}
731 
732 	lock_clear(&apic_ioapic_lock);
733 	intr_restore(iflag);
734 
735 	/* Reenable vectors if per vector masking is supported. */
736 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
737 		pci_config_put32(handle, msi_mask_off, msi_pvm);
738 		DDI_INTR_IMPLDBG((CE_CONT,
739 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
740 		    pci_config_get32(handle, msi_mask_off)));
741 	}
742 
743 set_grp_intr_done:
744 	if (*result != 0)
745 		return (PSM_FAILURE);
746 
747 	return (PSM_SUCCESS);
748 }
749 
750 static int
751 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
752 {
753 	struct autovec *av_dev;
754 	uchar_t irqno;
755 	int i;
756 	apic_irq_t *irq_p;
757 
758 	/* Sanity check the vector/irq argument. */
759 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
760 
761 	mutex_enter(&airq_mutex);
762 
763 	/*
764 	 * Convert the vecirq arg to an irq using vector_to_irq table
765 	 * if the arg is a vector.  Pass thru if already an irq.
766 	 */
767 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
768 	    PSMGI_INTRBY_VEC)
769 		irqno = apic_vector_to_irq[vecirq];
770 	else
771 		irqno = vecirq;
772 
773 	irq_p = apic_irq_table[irqno];
774 
775 	if ((irq_p == NULL) ||
776 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
777 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
778 		mutex_exit(&airq_mutex);
779 		return (PSM_FAILURE);
780 	}
781 
782 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
783 
784 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
785 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
786 
787 		/* Return user bound info for intrd. */
788 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
789 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
790 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
791 		}
792 	}
793 
794 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
795 		intr_params_p->avgi_vector = irq_p->airq_vector;
796 
797 	if (intr_params_p->avgi_req_flags &
798 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
799 		/* Get number of devices from apic_irq table shared field. */
800 		intr_params_p->avgi_num_devs = irq_p->airq_share;
801 
802 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
803 
804 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
805 
806 		/* Some devices have NULL dip.  Don't count these. */
807 		if (intr_params_p->avgi_num_devs > 0) {
808 			for (i = 0, av_dev = autovect[irqno].avh_link;
809 			    av_dev; av_dev = av_dev->av_link)
810 				if (av_dev->av_vector && av_dev->av_dip)
811 					i++;
812 			intr_params_p->avgi_num_devs =
813 			    MIN(intr_params_p->avgi_num_devs, i);
814 		}
815 
816 		/* There are no viable dips to return. */
817 		if (intr_params_p->avgi_num_devs == 0)
818 			intr_params_p->avgi_dip_list = NULL;
819 
820 		else {	/* Return list of dips */
821 
822 			/* Allocate space in array for that number of devs. */
823 			intr_params_p->avgi_dip_list = kmem_zalloc(
824 			    intr_params_p->avgi_num_devs *
825 			    sizeof (dev_info_t *),
826 			    KM_SLEEP);
827 
828 			/*
829 			 * Loop through the device list of the autovec table
830 			 * filling in the dip array.
831 			 *
832 			 * Note that the autovect table may have some special
833 			 * entries which contain NULL dips.  These will be
834 			 * ignored.
835 			 */
836 			for (i = 0, av_dev = autovect[irqno].avh_link;
837 			    av_dev; av_dev = av_dev->av_link)
838 				if (av_dev->av_vector && av_dev->av_dip)
839 					intr_params_p->avgi_dip_list[i++] =
840 					    av_dev->av_dip;
841 		}
842 	}
843 
844 	mutex_exit(&airq_mutex);
845 
846 	return (PSM_SUCCESS);
847 }
848 
849 
850 /*
851  * This function provides external interface to the nexus for all
852  * functionalities related to the new DDI interrupt framework.
853  *
854  * Input:
855  * dip     - pointer to the dev_info structure of the requested device
856  * hdlp    - pointer to the internal interrupt handle structure for the
857  *	     requested interrupt
858  * intr_op - opcode for this call
859  * result  - pointer to the integer that will hold the result to be
860  *	     passed back if return value is PSM_SUCCESS
861  *
862  * Output:
863  * return value is either PSM_SUCCESS or PSM_FAILURE
864  */
865 int
866 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
867     psm_intr_op_t intr_op, int *result)
868 {
869 	int		cap;
870 	int		count_vec;
871 	int		old_priority;
872 	int		new_priority;
873 	int		new_cpu;
874 	apic_irq_t	*irqp;
875 	struct intrspec *ispec, intr_spec;
876 
877 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
878 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
879 
880 	ispec = &intr_spec;
881 	ispec->intrspec_pri = hdlp->ih_pri;
882 	ispec->intrspec_vec = hdlp->ih_inum;
883 	ispec->intrspec_func = hdlp->ih_cb_func;
884 
885 	switch (intr_op) {
886 	case PSM_INTR_OP_CHECK_MSI:
887 		/*
888 		 * Check MSI/X is supported or not at APIC level and
889 		 * masked off the MSI/X bits in hdlp->ih_type if not
890 		 * supported before return.  If MSI/X is supported,
891 		 * leave the ih_type unchanged and return.
892 		 *
893 		 * hdlp->ih_type passed in from the nexus has all the
894 		 * interrupt types supported by the device.
895 		 */
896 		if (apic_support_msi == 0) {
897 			/*
898 			 * if apic_support_msi is not set, call
899 			 * apic_check_msi_support() to check whether msi
900 			 * is supported first
901 			 */
902 			if (apic_check_msi_support() == PSM_SUCCESS)
903 				apic_support_msi = 1;
904 			else
905 				apic_support_msi = -1;
906 		}
907 		if (apic_support_msi == 1) {
908 			if (apic_msix_enable)
909 				*result = hdlp->ih_type;
910 			else
911 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
912 		} else
913 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
914 			    DDI_INTR_TYPE_MSIX);
915 		break;
916 	case PSM_INTR_OP_ALLOC_VECTORS:
917 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
918 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
919 			    hdlp->ih_scratch1, hdlp->ih_pri,
920 			    (int)(uintptr_t)hdlp->ih_scratch2);
921 		else
922 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
923 			    hdlp->ih_scratch1, hdlp->ih_pri,
924 			    (int)(uintptr_t)hdlp->ih_scratch2);
925 		break;
926 	case PSM_INTR_OP_FREE_VECTORS:
927 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
928 		    hdlp->ih_pri, hdlp->ih_type);
929 		break;
930 	case PSM_INTR_OP_NAVAIL_VECTORS:
931 		*result = apic_navail_vector(dip, hdlp->ih_pri);
932 		break;
933 	case PSM_INTR_OP_XLATE_VECTOR:
934 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
935 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
936 		break;
937 	case PSM_INTR_OP_GET_PENDING:
938 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
939 			return (PSM_FAILURE);
940 		*result = apic_get_pending(irqp, hdlp->ih_type);
941 		break;
942 	case PSM_INTR_OP_CLEAR_MASK:
943 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
944 			return (PSM_FAILURE);
945 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
946 		if (irqp == NULL)
947 			return (PSM_FAILURE);
948 		apic_clear_mask(irqp);
949 		break;
950 	case PSM_INTR_OP_SET_MASK:
951 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
952 			return (PSM_FAILURE);
953 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
954 			return (PSM_FAILURE);
955 		apic_set_mask(irqp);
956 		break;
957 	case PSM_INTR_OP_GET_CAP:
958 		cap = DDI_INTR_FLAG_PENDING;
959 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
960 			cap |= DDI_INTR_FLAG_MASKABLE;
961 		*result = cap;
962 		break;
963 	case PSM_INTR_OP_GET_SHARED:
964 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
965 			return (PSM_FAILURE);
966 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
967 			return (PSM_FAILURE);
968 		*result = irqp->airq_share ? 1: 0;
969 		break;
970 	case PSM_INTR_OP_SET_PRI:
971 		old_priority = hdlp->ih_pri;	/* save old value */
972 		new_priority = *(int *)result;	/* try the new value */
973 
974 		/* First, check if "hdlp->ih_scratch1" vectors exist? */
975 		if (apic_navail_vector(dip, new_priority) < hdlp->ih_scratch1)
976 			return (PSM_FAILURE);
977 
978 		/* Now allocate the vectors */
979 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
980 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
981 			    hdlp->ih_scratch1, new_priority,
982 			    DDI_INTR_ALLOC_STRICT);
983 		else
984 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
985 			    hdlp->ih_scratch1, new_priority,
986 			    DDI_INTR_ALLOC_STRICT);
987 
988 		/* Did we get new vectors? */
989 		if (!count_vec)
990 			return (PSM_FAILURE);
991 
992 		/* Finally, free the previously allocated vectors */
993 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
994 		    old_priority, hdlp->ih_type);
995 		hdlp->ih_pri = new_priority; /* set the new value */
996 		break;
997 	case PSM_INTR_OP_SET_CPU:
998 	case PSM_INTR_OP_GRP_SET_CPU:
999 		/*
1000 		 * The interrupt handle given here has been allocated
1001 		 * specifically for this command, and ih_private carries
1002 		 * a CPU value.
1003 		 */
1004 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1005 		if (!apic_cpu_in_range(new_cpu)) {
1006 			DDI_INTR_IMPLDBG((CE_CONT,
1007 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1008 			*result = EINVAL;
1009 			return (PSM_FAILURE);
1010 		}
1011 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1012 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1013 			    PSM_SUCCESS)
1014 				return (PSM_FAILURE);
1015 		} else {
1016 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1017 			    result) != PSM_SUCCESS)
1018 				return (PSM_FAILURE);
1019 		}
1020 		break;
1021 	case PSM_INTR_OP_GET_INTR:
1022 		/*
1023 		 * The interrupt handle given here has been allocated
1024 		 * specifically for this command, and ih_private carries
1025 		 * a pointer to a apic_get_intr_t.
1026 		 */
1027 		if (apic_get_vector_intr_info(
1028 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1029 			return (PSM_FAILURE);
1030 		break;
1031 	case PSM_INTR_OP_APIC_TYPE:
1032 		hdlp->ih_private = apic_get_apic_type();
1033 		hdlp->ih_ver = apic_get_apic_version();
1034 		break;
1035 	case PSM_INTR_OP_SET_CAP:
1036 	default:
1037 		return (PSM_FAILURE);
1038 	}
1039 	return (PSM_SUCCESS);
1040 }
1041