xref: /titanic_50/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision ce0bfb39c0479ba97372eb0e5bf2ef4275d0876e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * apic_introp.c:
28  *	Has code for Advanced DDI interrupt framework support.
29  */
30 
31 #include <sys/cpuvar.h>
32 #include <sys/psm.h>
33 #include <sys/archsystm.h>
34 #include <sys/apic.h>
35 #include <sys/sunddi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/mach_intr.h>
38 #include <sys/sysmacros.h>
39 #include <sys/trap.h>
40 #include <sys/pci.h>
41 #include <sys/pci_intr_lib.h>
42 
43 extern struct av_head autovect[];
44 
45 /*
46  *	Local Function Prototypes
47  */
48 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
49 
50 /*
51  * MSI support flag:
52  * reflects whether MSI is supported at APIC level
53  * it can also be patched through /etc/system
54  *
55  *  0 = default value - don't know and need to call apic_check_msi_support()
56  *      to find out then set it accordingly
57  *  1 = supported
58  * -1 = not supported
59  */
60 int	apic_support_msi = 0;
61 
62 /* Multiple vector support for MSI */
63 int	apic_multi_msi_enable = 1;
64 
65 /* Multiple vector support for MSI-X */
66 int	apic_msix_enable = 1;
67 
68 /*
69  * apic_pci_msi_enable_vector:
70  *	Set the address/data fields in the MSI/X capability structure
71  *	XXX: MSI-X support
72  */
73 /* ARGSUSED */
74 void
75 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
76     int count, int target_apic_id)
77 {
78 	uint64_t		msi_addr, msi_data;
79 	ushort_t		msi_ctrl;
80 	dev_info_t		*dip = irq_ptr->airq_dip;
81 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
82 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
83 #if !defined(__xpv)
84 	msi_regs_t		msi_regs;
85 #endif	/* ! __xpv */
86 
87 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
88 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
89 	    ddi_driver_name(dip), inum, vector, target_apic_id));
90 
91 	ASSERT((handle != NULL) && (cap_ptr != 0));
92 
93 #if !defined(__xpv)
94 	msi_regs.mr_data = vector;
95 	msi_regs.mr_addr = target_apic_id;
96 
97 	apic_vt_ops->apic_intrr_alloc_entry(irq_ptr);
98 	apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&msi_regs);
99 	apic_vt_ops->apic_intrr_record_msi(irq_ptr, &msi_regs);
100 
101 	/* MSI Address */
102 	msi_addr = msi_regs.mr_addr;
103 
104 	/* MSI Data: MSI is edge triggered according to spec */
105 	msi_data = msi_regs.mr_data;
106 #else
107 	/* MSI Address */
108 	msi_addr = (MSI_ADDR_HDR |
109 	    (target_apic_id << MSI_ADDR_DEST_SHIFT));
110 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
111 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
112 
113 	/* MSI Data: MSI is edge triggered according to spec */
114 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
115 #endif	/* ! __xpv */
116 
117 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
118 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
119 
120 	if (type == DDI_INTR_TYPE_MSI) {
121 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
122 
123 		/* Set the bits to inform how many MSIs are enabled */
124 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
125 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
126 
127 		pci_config_put32(handle,
128 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
129 
130 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
131 			pci_config_put32(handle,
132 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
133 			pci_config_put16(handle,
134 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
135 		} else {
136 			pci_config_put16(handle,
137 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
138 		}
139 
140 	} else if (type == DDI_INTR_TYPE_MSIX) {
141 		uintptr_t	off;
142 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
143 
144 		/* Offset into the "inum"th entry in the MSI-X table */
145 		off = (uintptr_t)msix_p->msix_tbl_addr +
146 		    (inum  * PCI_MSIX_VECTOR_SIZE);
147 
148 		ddi_put32(msix_p->msix_tbl_hdl,
149 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
150 		ddi_put64(msix_p->msix_tbl_hdl,
151 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
152 	}
153 }
154 
155 
156 /*
157  * This function returns the no. of vectors available for the pri.
158  * dip is not used at this moment.  If we really don't need that,
159  * it will be removed.
160  */
161 /*ARGSUSED*/
162 int
163 apic_navail_vector(dev_info_t *dip, int pri)
164 {
165 	int	lowest, highest, i, navail, count;
166 
167 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
168 	    (void *)dip, pri));
169 
170 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
171 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
172 	navail = count = 0;
173 
174 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
175 		lowest -= APIC_VECTOR_PER_IPL;
176 
177 	/* It has to be contiguous */
178 	for (i = lowest; i < highest; i++) {
179 		count = 0;
180 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
181 		    (i < highest)) {
182 			if (APIC_CHECK_RESERVE_VECTORS(i))
183 				break;
184 			count++;
185 			i++;
186 		}
187 		if (count > navail)
188 			navail = count;
189 	}
190 	return (navail);
191 }
192 
193 /*
194  * Finds "count" contiguous MSI vectors starting at the proper alignment
195  * at "pri".
196  * Caller needs to make sure that count has to be power of 2 and should not
197  * be < 1.
198  */
199 uchar_t
200 apic_find_multi_vectors(int pri, int count)
201 {
202 	int	lowest, highest, i, navail, start, msibits;
203 
204 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
205 	    pri, count));
206 
207 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
208 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
209 	navail = 0;
210 
211 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
212 		lowest -= APIC_VECTOR_PER_IPL;
213 
214 	/*
215 	 * msibits is the no. of lower order message data bits for the
216 	 * allocated MSI vectors and is used to calculate the aligned
217 	 * starting vector
218 	 */
219 	msibits = count - 1;
220 
221 	/* It has to be contiguous */
222 	for (i = lowest; i < highest; i++) {
223 		navail = 0;
224 
225 		/*
226 		 * starting vector has to be aligned accordingly for
227 		 * multiple MSIs
228 		 */
229 		if (msibits)
230 			i = (i + msibits) & ~msibits;
231 		start = i;
232 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
233 		    (i < highest)) {
234 			if (APIC_CHECK_RESERVE_VECTORS(i))
235 				break;
236 			navail++;
237 			if (navail >= count)
238 				return (start);
239 			i++;
240 		}
241 	}
242 	return (0);
243 }
244 
245 
246 /*
247  * It finds the apic_irq_t associates with the dip, ispec and type.
248  */
249 apic_irq_t *
250 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
251 {
252 	apic_irq_t	*irqp;
253 	int i;
254 
255 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
256 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
257 	    ispec->intrspec_pri, type));
258 
259 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
260 		if ((irqp = apic_irq_table[i]) == NULL)
261 			continue;
262 		if ((irqp->airq_dip == dip) &&
263 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
264 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
265 			if (type == DDI_INTR_TYPE_MSI) {
266 				if (irqp->airq_mps_intr_index == MSI_INDEX)
267 					return (irqp);
268 			} else if (type == DDI_INTR_TYPE_MSIX) {
269 				if (irqp->airq_mps_intr_index == MSIX_INDEX)
270 					return (irqp);
271 			} else
272 				return (irqp);
273 		}
274 	}
275 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
276 	return (NULL);
277 }
278 
279 
280 #if !defined(__xpv)
281 
282 /*
283  * This function will return the pending bit of the irqp.
284  * It either comes from the IRR register of the APIC or the RDT
285  * entry of the I/O APIC.
286  * For the IRR to work, it needs to be to its binding CPU
287  */
288 static int
289 apic_get_pending(apic_irq_t *irqp, int type)
290 {
291 	int			bit, index, irr, pending;
292 	int			intin_no;
293 	int			apic_ix;
294 
295 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
296 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
297 	    type));
298 
299 	/* need to get on the bound cpu */
300 	mutex_enter(&cpu_lock);
301 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
302 
303 	index = irqp->airq_vector / 32;
304 	bit = irqp->airq_vector % 32;
305 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
306 
307 	affinity_clear();
308 	mutex_exit(&cpu_lock);
309 
310 	pending = (irr & (1 << bit)) ? 1 : 0;
311 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
312 		/* check I/O APIC for fixed interrupt */
313 		intin_no = irqp->airq_intin_no;
314 		apic_ix = irqp->airq_ioapicindex;
315 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
316 		    AV_PENDING) ? 1 : 0;
317 	}
318 	return (pending);
319 }
320 
321 
322 /*
323  * This function will clear the mask for the interrupt on the I/O APIC
324  */
325 static void
326 apic_clear_mask(apic_irq_t *irqp)
327 {
328 	int			intin_no;
329 	ulong_t			iflag;
330 	int32_t			rdt_entry;
331 	int 			apic_ix;
332 
333 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
334 	    (void *)irqp));
335 
336 	intin_no = irqp->airq_intin_no;
337 	apic_ix = irqp->airq_ioapicindex;
338 
339 	iflag = intr_clear();
340 	lock_set(&apic_ioapic_lock);
341 
342 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
343 
344 	/* clear mask */
345 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
346 	    ((~AV_MASK) & rdt_entry));
347 
348 	lock_clear(&apic_ioapic_lock);
349 	intr_restore(iflag);
350 }
351 
352 
353 /*
354  * This function will mask the interrupt on the I/O APIC
355  */
356 static void
357 apic_set_mask(apic_irq_t *irqp)
358 {
359 	int			intin_no;
360 	int 			apic_ix;
361 	ulong_t			iflag;
362 	int32_t			rdt_entry;
363 
364 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
365 
366 	intin_no = irqp->airq_intin_no;
367 	apic_ix = irqp->airq_ioapicindex;
368 
369 	iflag = intr_clear();
370 
371 	lock_set(&apic_ioapic_lock);
372 
373 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
374 
375 	/* mask it */
376 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
377 	    (AV_MASK | rdt_entry));
378 
379 	lock_clear(&apic_ioapic_lock);
380 	intr_restore(iflag);
381 }
382 
383 #endif	/* ! __xpv */
384 
385 void
386 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
387 {
388 	int i;
389 	apic_irq_t *irqptr;
390 	struct intrspec ispec;
391 
392 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
393 	    "count: %x pri: %x type: %x\n",
394 	    (void *)dip, inum, count, pri, type));
395 
396 	/* for MSI/X only */
397 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
398 		return;
399 
400 	for (i = 0; i < count; i++) {
401 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
402 		    "pri=0x%x count=0x%x\n", inum, pri, count));
403 		ispec.intrspec_vec = inum + i;
404 		ispec.intrspec_pri = pri;
405 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
406 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
407 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
408 			    "failed\n", (void *)dip, inum, pri));
409 			continue;
410 		}
411 		irqptr->airq_mps_intr_index = FREE_INDEX;
412 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
413 	}
414 }
415 
416 
417 /*
418  * check whether the system supports MSI
419  *
420  * If PCI-E capability is found, then this must be a PCI-E system.
421  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
422  * to indicate this system supports MSI.
423  */
424 int
425 apic_check_msi_support()
426 {
427 	dev_info_t *cdip;
428 	char dev_type[16];
429 	int dev_len;
430 
431 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
432 
433 	/*
434 	 * check whether the first level children of root_node have
435 	 * PCI-E capability
436 	 */
437 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
438 	    cdip = ddi_get_next_sibling(cdip)) {
439 
440 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
441 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
442 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
443 		    ddi_node_name(cdip)));
444 		dev_len = sizeof (dev_type);
445 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
446 		    "device_type", (caddr_t)dev_type, &dev_len)
447 		    != DDI_PROP_SUCCESS)
448 			continue;
449 		if (strcmp(dev_type, "pciex") == 0)
450 			return (PSM_SUCCESS);
451 	}
452 
453 	/* MSI is not supported on this system */
454 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
455 	    "device_type found\n"));
456 	return (PSM_FAILURE);
457 }
458 
459 /*
460  * apic_pci_msi_unconfigure:
461  *
462  * This and next two interfaces are copied from pci_intr_lib.c
463  * Do ensure that these two files stay in sync.
464  * These needed to be copied over here to avoid a deadlock situation on
465  * certain mp systems that use MSI interrupts.
466  *
467  * IMPORTANT regards next three interfaces:
468  * i) are called only for MSI/X interrupts.
469  * ii) called with interrupts disabled, and must not block
470  */
471 void
472 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
473 {
474 	ushort_t		msi_ctrl;
475 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
476 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
477 
478 	ASSERT((handle != NULL) && (cap_ptr != 0));
479 
480 	if (type == DDI_INTR_TYPE_MSI) {
481 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
482 		msi_ctrl &= (~PCI_MSI_MME_MASK);
483 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
484 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
485 
486 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
487 			pci_config_put16(handle,
488 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
489 			pci_config_put32(handle,
490 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
491 		} else {
492 			pci_config_put16(handle,
493 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
494 		}
495 
496 	} else if (type == DDI_INTR_TYPE_MSIX) {
497 		uintptr_t	off;
498 		uint32_t	mask;
499 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
500 
501 		/* Offset into "inum"th entry in the MSI-X table & mask it */
502 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
503 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
504 
505 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
506 
507 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
508 
509 		/* Offset into the "inum"th entry in the MSI-X table */
510 		off = (uintptr_t)msix_p->msix_tbl_addr +
511 		    (inum * PCI_MSIX_VECTOR_SIZE);
512 
513 		/* Reset the "data" and "addr" bits */
514 		ddi_put32(msix_p->msix_tbl_hdl,
515 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
516 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
517 	}
518 }
519 
520 
521 /*
522  * apic_pci_msi_enable_mode:
523  */
524 void
525 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
526 {
527 	ushort_t		msi_ctrl;
528 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
529 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
530 
531 	ASSERT((handle != NULL) && (cap_ptr != 0));
532 
533 	if (type == DDI_INTR_TYPE_MSI) {
534 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
535 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
536 			return;
537 
538 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
539 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
540 
541 	} else if (type == DDI_INTR_TYPE_MSIX) {
542 		uintptr_t	off;
543 		uint32_t	mask;
544 		ddi_intr_msix_t	*msix_p;
545 
546 		msix_p = i_ddi_get_msix(rdip);
547 
548 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
549 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
550 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
551 
552 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
553 
554 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
555 
556 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
557 
558 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
559 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
560 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
561 			    msi_ctrl);
562 		}
563 	}
564 }
565 
566 /*
567  * apic_pci_msi_disable_mode:
568  */
569 void
570 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
571 {
572 	ushort_t		msi_ctrl;
573 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
574 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
575 
576 	ASSERT((handle != NULL) && (cap_ptr != 0));
577 
578 	if (type == DDI_INTR_TYPE_MSI) {
579 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
580 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
581 			return;
582 
583 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
584 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
585 
586 	} else if (type == DDI_INTR_TYPE_MSIX) {
587 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
588 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
589 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
590 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
591 			    msi_ctrl);
592 		}
593 	}
594 }
595 
596 #if !defined(__xpv)
597 
598 static int
599 apic_set_cpu(int irqno, int cpu, int *result)
600 {
601 	apic_irq_t *irqp;
602 	ulong_t iflag;
603 	int ret;
604 
605 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
606 
607 	mutex_enter(&airq_mutex);
608 	irqp = apic_irq_table[irqno];
609 	mutex_exit(&airq_mutex);
610 
611 	if (irqp == NULL) {
612 		*result = ENXIO;
613 		return (PSM_FAILURE);
614 	}
615 
616 	/* Fail if this is an MSI intr and is part of a group. */
617 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
618 	    (irqp->airq_intin_no > 1)) {
619 		*result = ENXIO;
620 		return (PSM_FAILURE);
621 	}
622 
623 	iflag = intr_clear();
624 	lock_set(&apic_ioapic_lock);
625 
626 	ret = apic_rebind_all(irqp, cpu);
627 
628 	lock_clear(&apic_ioapic_lock);
629 	intr_restore(iflag);
630 
631 	if (ret) {
632 		*result = EIO;
633 		return (PSM_FAILURE);
634 	}
635 	/*
636 	 * keep tracking the default interrupt cpu binding
637 	 */
638 	irqp->airq_cpu = cpu;
639 
640 	*result = 0;
641 	return (PSM_SUCCESS);
642 }
643 
644 static int
645 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
646 {
647 	dev_info_t *orig_dip;
648 	uint32_t orig_cpu;
649 	ulong_t iflag;
650 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
651 	int i;
652 	int cap_ptr;
653 	int msi_mask_off;
654 	ushort_t msi_ctrl;
655 	uint32_t msi_pvm;
656 	ddi_acc_handle_t handle;
657 	int num_vectors = 0;
658 	uint32_t vector;
659 
660 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
661 
662 	/*
663 	 * Take mutex to insure that table doesn't change out from underneath
664 	 * us while we're playing with it.
665 	 */
666 	mutex_enter(&airq_mutex);
667 	irqps[0] = apic_irq_table[irqno];
668 	orig_cpu = irqps[0]->airq_temp_cpu;
669 	orig_dip = irqps[0]->airq_dip;
670 	num_vectors = irqps[0]->airq_intin_no;
671 	vector = irqps[0]->airq_vector;
672 
673 	/* A "group" of 1 */
674 	if (num_vectors == 1) {
675 		mutex_exit(&airq_mutex);
676 		return (apic_set_cpu(irqno, new_cpu, result));
677 	}
678 
679 	*result = ENXIO;
680 
681 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
682 		mutex_exit(&airq_mutex);
683 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
684 		goto set_grp_intr_done;
685 	}
686 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
687 		mutex_exit(&airq_mutex);
688 		DDI_INTR_IMPLDBG((CE_CONT,
689 		    "set_grp: base vec not part of a grp or not aligned: "
690 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
691 		goto set_grp_intr_done;
692 	}
693 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
694 	    num_vectors));
695 
696 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
697 
698 	*result = EIO;
699 
700 	/*
701 	 * All IRQ entries in the table for the given device will be not
702 	 * shared.  Since they are not shared, the dip in the table will
703 	 * be true to the device of interest.
704 	 */
705 	for (i = 1; i < num_vectors; i++) {
706 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
707 		if (irqps[i] == NULL) {
708 			mutex_exit(&airq_mutex);
709 			goto set_grp_intr_done;
710 		}
711 #ifdef DEBUG
712 		/* Sanity check: CPU and dip is the same for all entries. */
713 		if ((irqps[i]->airq_dip != orig_dip) ||
714 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
715 			mutex_exit(&airq_mutex);
716 			DDI_INTR_IMPLDBG((CE_CONT,
717 			    "set_grp: cpu or dip for vec 0x%x difft than for "
718 			    "vec 0x%x\n", vector, vector + i));
719 			DDI_INTR_IMPLDBG((CE_CONT,
720 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
721 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
722 			    (void *)irqps[i]->airq_dip));
723 			goto set_grp_intr_done;
724 		}
725 #endif /* DEBUG */
726 	}
727 	mutex_exit(&airq_mutex);
728 
729 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
730 	handle = i_ddi_get_pci_config_handle(orig_dip);
731 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
732 
733 	/* MSI Per vector masking is supported. */
734 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
735 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
736 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
737 		else
738 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
739 		msi_pvm = pci_config_get32(handle, msi_mask_off);
740 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
741 		DDI_INTR_IMPLDBG((CE_CONT,
742 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
743 		    pci_config_get32(handle, msi_mask_off)));
744 	}
745 
746 	iflag = intr_clear();
747 	lock_set(&apic_ioapic_lock);
748 
749 	/*
750 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
751 	 * an error if the CPU is not accepting interrupts.  If the first one
752 	 * succeeds they all will.
753 	 */
754 	if (apic_rebind_all(irqps[0], new_cpu))
755 		(void) apic_rebind_all(irqps[0], orig_cpu);
756 	else {
757 		irqps[0]->airq_cpu = new_cpu;
758 
759 		for (i = 1; i < num_vectors; i++) {
760 			(void) apic_rebind_all(irqps[i], new_cpu);
761 			irqps[i]->airq_cpu = new_cpu;
762 		}
763 		*result = 0;	/* SUCCESS */
764 	}
765 
766 	lock_clear(&apic_ioapic_lock);
767 	intr_restore(iflag);
768 
769 	/* Reenable vectors if per vector masking is supported. */
770 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
771 		pci_config_put32(handle, msi_mask_off, msi_pvm);
772 		DDI_INTR_IMPLDBG((CE_CONT,
773 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
774 		    pci_config_get32(handle, msi_mask_off)));
775 	}
776 
777 set_grp_intr_done:
778 	if (*result != 0)
779 		return (PSM_FAILURE);
780 
781 	return (PSM_SUCCESS);
782 }
783 
784 #endif	/* !__xpv */
785 
786 int
787 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
788 {
789 	struct autovec *av_dev;
790 	uchar_t irqno;
791 	int i;
792 	apic_irq_t *irq_p;
793 
794 	/* Sanity check the vector/irq argument. */
795 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
796 
797 	mutex_enter(&airq_mutex);
798 
799 	/*
800 	 * Convert the vecirq arg to an irq using vector_to_irq table
801 	 * if the arg is a vector.  Pass thru if already an irq.
802 	 */
803 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
804 	    PSMGI_INTRBY_VEC)
805 		irqno = apic_vector_to_irq[vecirq];
806 	else
807 		irqno = vecirq;
808 
809 	irq_p = apic_irq_table[irqno];
810 
811 	if ((irq_p == NULL) ||
812 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
813 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
814 		mutex_exit(&airq_mutex);
815 		return (PSM_FAILURE);
816 	}
817 
818 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
819 
820 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
821 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
822 
823 		/* Return user bound info for intrd. */
824 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
825 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
826 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
827 		}
828 	}
829 
830 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
831 		intr_params_p->avgi_vector = irq_p->airq_vector;
832 
833 	if (intr_params_p->avgi_req_flags &
834 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
835 		/* Get number of devices from apic_irq table shared field. */
836 		intr_params_p->avgi_num_devs = irq_p->airq_share;
837 
838 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
839 
840 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
841 
842 		/* Some devices have NULL dip.  Don't count these. */
843 		if (intr_params_p->avgi_num_devs > 0) {
844 			for (i = 0, av_dev = autovect[irqno].avh_link;
845 			    av_dev; av_dev = av_dev->av_link)
846 				if (av_dev->av_vector && av_dev->av_dip)
847 					i++;
848 			intr_params_p->avgi_num_devs =
849 			    MIN(intr_params_p->avgi_num_devs, i);
850 		}
851 
852 		/* There are no viable dips to return. */
853 		if (intr_params_p->avgi_num_devs == 0)
854 			intr_params_p->avgi_dip_list = NULL;
855 
856 		else {	/* Return list of dips */
857 
858 			/* Allocate space in array for that number of devs. */
859 			intr_params_p->avgi_dip_list = kmem_zalloc(
860 			    intr_params_p->avgi_num_devs *
861 			    sizeof (dev_info_t *),
862 			    KM_SLEEP);
863 
864 			/*
865 			 * Loop through the device list of the autovec table
866 			 * filling in the dip array.
867 			 *
868 			 * Note that the autovect table may have some special
869 			 * entries which contain NULL dips.  These will be
870 			 * ignored.
871 			 */
872 			for (i = 0, av_dev = autovect[irqno].avh_link;
873 			    av_dev; av_dev = av_dev->av_link)
874 				if (av_dev->av_vector && av_dev->av_dip)
875 					intr_params_p->avgi_dip_list[i++] =
876 					    av_dev->av_dip;
877 		}
878 	}
879 
880 	mutex_exit(&airq_mutex);
881 
882 	return (PSM_SUCCESS);
883 }
884 
885 
886 #if !defined(__xpv)
887 
888 /*
889  * This function provides external interface to the nexus for all
890  * functionalities related to the new DDI interrupt framework.
891  *
892  * Input:
893  * dip     - pointer to the dev_info structure of the requested device
894  * hdlp    - pointer to the internal interrupt handle structure for the
895  *	     requested interrupt
896  * intr_op - opcode for this call
897  * result  - pointer to the integer that will hold the result to be
898  *	     passed back if return value is PSM_SUCCESS
899  *
900  * Output:
901  * return value is either PSM_SUCCESS or PSM_FAILURE
902  */
903 int
904 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
905     psm_intr_op_t intr_op, int *result)
906 {
907 	int		cap;
908 	int		count_vec;
909 	int		old_priority;
910 	int		new_priority;
911 	int		new_cpu;
912 	apic_irq_t	*irqp;
913 	struct intrspec *ispec, intr_spec;
914 
915 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
916 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
917 
918 	ispec = &intr_spec;
919 	ispec->intrspec_pri = hdlp->ih_pri;
920 	ispec->intrspec_vec = hdlp->ih_inum;
921 	ispec->intrspec_func = hdlp->ih_cb_func;
922 
923 	switch (intr_op) {
924 	case PSM_INTR_OP_CHECK_MSI:
925 		/*
926 		 * Check MSI/X is supported or not at APIC level and
927 		 * masked off the MSI/X bits in hdlp->ih_type if not
928 		 * supported before return.  If MSI/X is supported,
929 		 * leave the ih_type unchanged and return.
930 		 *
931 		 * hdlp->ih_type passed in from the nexus has all the
932 		 * interrupt types supported by the device.
933 		 */
934 		if (apic_support_msi == 0) {
935 			/*
936 			 * if apic_support_msi is not set, call
937 			 * apic_check_msi_support() to check whether msi
938 			 * is supported first
939 			 */
940 			if (apic_check_msi_support() == PSM_SUCCESS)
941 				apic_support_msi = 1;
942 			else
943 				apic_support_msi = -1;
944 		}
945 		if (apic_support_msi == 1) {
946 			if (apic_msix_enable)
947 				*result = hdlp->ih_type;
948 			else
949 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
950 		} else
951 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
952 			    DDI_INTR_TYPE_MSIX);
953 		break;
954 	case PSM_INTR_OP_ALLOC_VECTORS:
955 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
956 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
957 			    hdlp->ih_scratch1, hdlp->ih_pri,
958 			    (int)(uintptr_t)hdlp->ih_scratch2);
959 		else
960 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
961 			    hdlp->ih_scratch1, hdlp->ih_pri,
962 			    (int)(uintptr_t)hdlp->ih_scratch2);
963 		break;
964 	case PSM_INTR_OP_FREE_VECTORS:
965 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
966 		    hdlp->ih_pri, hdlp->ih_type);
967 		break;
968 	case PSM_INTR_OP_NAVAIL_VECTORS:
969 		*result = apic_navail_vector(dip, hdlp->ih_pri);
970 		break;
971 	case PSM_INTR_OP_XLATE_VECTOR:
972 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
973 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
974 		break;
975 	case PSM_INTR_OP_GET_PENDING:
976 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
977 			return (PSM_FAILURE);
978 		*result = apic_get_pending(irqp, hdlp->ih_type);
979 		break;
980 	case PSM_INTR_OP_CLEAR_MASK:
981 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
982 			return (PSM_FAILURE);
983 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
984 		if (irqp == NULL)
985 			return (PSM_FAILURE);
986 		apic_clear_mask(irqp);
987 		break;
988 	case PSM_INTR_OP_SET_MASK:
989 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
990 			return (PSM_FAILURE);
991 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
992 			return (PSM_FAILURE);
993 		apic_set_mask(irqp);
994 		break;
995 	case PSM_INTR_OP_GET_CAP:
996 		cap = DDI_INTR_FLAG_PENDING;
997 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
998 			cap |= DDI_INTR_FLAG_MASKABLE;
999 		else if (hdlp->ih_type == DDI_INTR_TYPE_MSIX)
1000 			cap |= DDI_INTR_FLAG_RETARGETABLE;
1001 		*result = cap;
1002 		break;
1003 	case PSM_INTR_OP_GET_SHARED:
1004 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1005 			return (PSM_FAILURE);
1006 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
1007 			return (PSM_FAILURE);
1008 		*result = irqp->airq_share ? 1: 0;
1009 		break;
1010 	case PSM_INTR_OP_SET_PRI:
1011 		old_priority = hdlp->ih_pri;	/* save old value */
1012 		new_priority = *(int *)result;	/* try the new value */
1013 
1014 		/* First, check if "hdlp->ih_scratch1" vectors exist? */
1015 		if (apic_navail_vector(dip, new_priority) < hdlp->ih_scratch1)
1016 			return (PSM_FAILURE);
1017 
1018 		/* Now allocate the vectors */
1019 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
1020 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
1021 			    hdlp->ih_scratch1, new_priority,
1022 			    DDI_INTR_ALLOC_STRICT);
1023 		else
1024 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
1025 			    hdlp->ih_scratch1, new_priority,
1026 			    DDI_INTR_ALLOC_STRICT);
1027 
1028 		/* Did we get new vectors? */
1029 		if (!count_vec)
1030 			return (PSM_FAILURE);
1031 
1032 		/* Finally, free the previously allocated vectors */
1033 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
1034 		    old_priority, hdlp->ih_type);
1035 		hdlp->ih_pri = new_priority; /* set the new value */
1036 		break;
1037 	case PSM_INTR_OP_SET_CPU:
1038 	case PSM_INTR_OP_GRP_SET_CPU:
1039 		/*
1040 		 * The interrupt handle given here has been allocated
1041 		 * specifically for this command, and ih_private carries
1042 		 * a CPU value.
1043 		 */
1044 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1045 		if (!apic_cpu_in_range(new_cpu)) {
1046 			DDI_INTR_IMPLDBG((CE_CONT,
1047 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1048 			*result = EINVAL;
1049 			return (PSM_FAILURE);
1050 		}
1051 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
1052 			DDI_INTR_IMPLDBG((CE_CONT,
1053 			    "[grp_]set_cpu: vector out of range: %d\n",
1054 			    hdlp->ih_vector));
1055 			*result = EINVAL;
1056 			return (PSM_FAILURE);
1057 		}
1058 		if (!(hdlp->ih_flags & PSMGI_INTRBY_IRQ))
1059 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
1060 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1061 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1062 			    PSM_SUCCESS)
1063 				return (PSM_FAILURE);
1064 		} else {
1065 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1066 			    result) != PSM_SUCCESS)
1067 				return (PSM_FAILURE);
1068 		}
1069 		break;
1070 	case PSM_INTR_OP_GET_INTR:
1071 		/*
1072 		 * The interrupt handle given here has been allocated
1073 		 * specifically for this command, and ih_private carries
1074 		 * a pointer to a apic_get_intr_t.
1075 		 */
1076 		if (apic_get_vector_intr_info(
1077 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1078 			return (PSM_FAILURE);
1079 		break;
1080 	case PSM_INTR_OP_APIC_TYPE:
1081 		hdlp->ih_private = apic_get_apic_type();
1082 		hdlp->ih_ver = apic_get_apic_version();
1083 		break;
1084 	case PSM_INTR_OP_SET_CAP:
1085 	default:
1086 		return (PSM_FAILURE);
1087 	}
1088 	return (PSM_SUCCESS);
1089 }
1090 #endif	/* !__xpv */
1091