xref: /titanic_50/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision 9844da31e6f9a1bffcbbb9ec7926f759ee04c460)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * apic_introp.c:
28  *	Has code for Advanced DDI interrupt framework support.
29  */
30 
31 #include <sys/cpuvar.h>
32 #include <sys/psm.h>
33 #include <sys/archsystm.h>
34 #include <sys/apic.h>
35 #include <sys/sunddi.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/mach_intr.h>
38 #include <sys/sysmacros.h>
39 #include <sys/trap.h>
40 #include <sys/pci.h>
41 #include <sys/pci_intr_lib.h>
42 
43 extern struct av_head autovect[];
44 
45 /*
46  *	Local Function Prototypes
47  */
48 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
49 
50 /*
51  * MSI support flag:
52  * reflects whether MSI is supported at APIC level
53  * it can also be patched through /etc/system
54  *
55  *  0 = default value - don't know and need to call apic_check_msi_support()
56  *      to find out then set it accordingly
57  *  1 = supported
58  * -1 = not supported
59  */
60 int	apic_support_msi = 0;
61 
62 /* Multiple vector support for MSI */
63 int	apic_multi_msi_enable = 1;
64 int	apic_multi_msi_max = 2;
65 
66 /* Maximum no. of MSI-X vectors supported */
67 int	apic_msix_enable = 1;
68 int	apic_msix_max = 2;
69 
70 /*
71  * apic_pci_msi_enable_vector:
72  *	Set the address/data fields in the MSI/X capability structure
73  *	XXX: MSI-X support
74  */
75 /* ARGSUSED */
76 void
77 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
78     int count, int target_apic_id)
79 {
80 	uint64_t		msi_addr, msi_data;
81 	ushort_t		msi_ctrl;
82 	dev_info_t		*dip = irq_ptr->airq_dip;
83 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
84 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
85 #if !defined(__xpv)
86 	msi_regs_t		msi_regs;
87 #endif	/* ! __xpv */
88 
89 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
90 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
91 	    ddi_driver_name(dip), inum, vector, target_apic_id));
92 
93 	ASSERT((handle != NULL) && (cap_ptr != 0));
94 
95 #if !defined(__xpv)
96 	msi_regs.mr_data = vector;
97 	msi_regs.mr_addr = target_apic_id;
98 
99 	apic_vt_ops->apic_intrr_alloc_entry(irq_ptr);
100 	apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&msi_regs);
101 	apic_vt_ops->apic_intrr_record_msi(irq_ptr, &msi_regs);
102 
103 	/* MSI Address */
104 	msi_addr = msi_regs.mr_addr;
105 
106 	/* MSI Data: MSI is edge triggered according to spec */
107 	msi_data = msi_regs.mr_data;
108 #else
109 	/* MSI Address */
110 	msi_addr = (MSI_ADDR_HDR |
111 	    (target_apic_id << MSI_ADDR_DEST_SHIFT));
112 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
113 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
114 
115 	/* MSI Data: MSI is edge triggered according to spec */
116 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
117 #endif	/* ! __xpv */
118 
119 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
120 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
121 
122 	if (type == DDI_INTR_TYPE_MSI) {
123 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
124 
125 		/* Set the bits to inform how many MSIs are enabled */
126 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
127 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
128 
129 		pci_config_put32(handle,
130 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
131 
132 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
133 			pci_config_put32(handle,
134 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
135 			pci_config_put16(handle,
136 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
137 		} else {
138 			pci_config_put16(handle,
139 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
140 		}
141 
142 	} else if (type == DDI_INTR_TYPE_MSIX) {
143 		uintptr_t	off;
144 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
145 
146 		/* Offset into the "inum"th entry in the MSI-X table */
147 		off = (uintptr_t)msix_p->msix_tbl_addr +
148 		    (inum  * PCI_MSIX_VECTOR_SIZE);
149 
150 		ddi_put32(msix_p->msix_tbl_hdl,
151 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
152 		ddi_put64(msix_p->msix_tbl_hdl,
153 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
154 	}
155 }
156 
157 
158 /*
159  * This function returns the no. of vectors available for the pri.
160  * dip is not used at this moment.  If we really don't need that,
161  * it will be removed.
162  */
163 /*ARGSUSED*/
164 int
165 apic_navail_vector(dev_info_t *dip, int pri)
166 {
167 	int	lowest, highest, i, navail, count;
168 
169 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
170 	    (void *)dip, pri));
171 
172 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
173 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
174 	navail = count = 0;
175 
176 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
177 		lowest -= APIC_VECTOR_PER_IPL;
178 
179 	/* It has to be contiguous */
180 	for (i = lowest; i < highest; i++) {
181 		count = 0;
182 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
183 		    (i < highest)) {
184 			if (APIC_CHECK_RESERVE_VECTORS(i))
185 				break;
186 			count++;
187 			i++;
188 		}
189 		if (count > navail)
190 			navail = count;
191 	}
192 	return (navail);
193 }
194 
195 /*
196  * Finds "count" contiguous MSI vectors starting at the proper alignment
197  * at "pri".
198  * Caller needs to make sure that count has to be power of 2 and should not
199  * be < 1.
200  */
201 uchar_t
202 apic_find_multi_vectors(int pri, int count)
203 {
204 	int	lowest, highest, i, navail, start, msibits;
205 
206 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
207 	    pri, count));
208 
209 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
210 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
211 	navail = 0;
212 
213 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
214 		lowest -= APIC_VECTOR_PER_IPL;
215 
216 	/*
217 	 * msibits is the no. of lower order message data bits for the
218 	 * allocated MSI vectors and is used to calculate the aligned
219 	 * starting vector
220 	 */
221 	msibits = count - 1;
222 
223 	/* It has to be contiguous */
224 	for (i = lowest; i < highest; i++) {
225 		navail = 0;
226 
227 		/*
228 		 * starting vector has to be aligned accordingly for
229 		 * multiple MSIs
230 		 */
231 		if (msibits)
232 			i = (i + msibits) & ~msibits;
233 		start = i;
234 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
235 		    (i < highest)) {
236 			if (APIC_CHECK_RESERVE_VECTORS(i))
237 				break;
238 			navail++;
239 			if (navail >= count)
240 				return (start);
241 			i++;
242 		}
243 	}
244 	return (0);
245 }
246 
247 
248 /*
249  * It finds the apic_irq_t associates with the dip, ispec and type.
250  */
251 apic_irq_t *
252 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
253 {
254 	apic_irq_t	*irqp;
255 	int i;
256 
257 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
258 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
259 	    ispec->intrspec_pri, type));
260 
261 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
262 		if ((irqp = apic_irq_table[i]) == NULL)
263 			continue;
264 		if ((irqp->airq_dip == dip) &&
265 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
266 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
267 			if (type == DDI_INTR_TYPE_MSI) {
268 				if (irqp->airq_mps_intr_index == MSI_INDEX)
269 					return (irqp);
270 			} else if (type == DDI_INTR_TYPE_MSIX) {
271 				if (irqp->airq_mps_intr_index == MSIX_INDEX)
272 					return (irqp);
273 			} else
274 				return (irqp);
275 		}
276 	}
277 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
278 	return (NULL);
279 }
280 
281 
282 #if !defined(__xpv)
283 
284 /*
285  * This function will return the pending bit of the irqp.
286  * It either comes from the IRR register of the APIC or the RDT
287  * entry of the I/O APIC.
288  * For the IRR to work, it needs to be to its binding CPU
289  */
290 static int
291 apic_get_pending(apic_irq_t *irqp, int type)
292 {
293 	int			bit, index, irr, pending;
294 	int			intin_no;
295 	int			apic_ix;
296 
297 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
298 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
299 	    type));
300 
301 	/* need to get on the bound cpu */
302 	mutex_enter(&cpu_lock);
303 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
304 
305 	index = irqp->airq_vector / 32;
306 	bit = irqp->airq_vector % 32;
307 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
308 
309 	affinity_clear();
310 	mutex_exit(&cpu_lock);
311 
312 	pending = (irr & (1 << bit)) ? 1 : 0;
313 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
314 		/* check I/O APIC for fixed interrupt */
315 		intin_no = irqp->airq_intin_no;
316 		apic_ix = irqp->airq_ioapicindex;
317 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
318 		    AV_PENDING) ? 1 : 0;
319 	}
320 	return (pending);
321 }
322 
323 
324 /*
325  * This function will clear the mask for the interrupt on the I/O APIC
326  */
327 static void
328 apic_clear_mask(apic_irq_t *irqp)
329 {
330 	int			intin_no;
331 	ulong_t			iflag;
332 	int32_t			rdt_entry;
333 	int 			apic_ix;
334 
335 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
336 	    (void *)irqp));
337 
338 	intin_no = irqp->airq_intin_no;
339 	apic_ix = irqp->airq_ioapicindex;
340 
341 	iflag = intr_clear();
342 	lock_set(&apic_ioapic_lock);
343 
344 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
345 
346 	/* clear mask */
347 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
348 	    ((~AV_MASK) & rdt_entry));
349 
350 	lock_clear(&apic_ioapic_lock);
351 	intr_restore(iflag);
352 }
353 
354 
355 /*
356  * This function will mask the interrupt on the I/O APIC
357  */
358 static void
359 apic_set_mask(apic_irq_t *irqp)
360 {
361 	int			intin_no;
362 	int 			apic_ix;
363 	ulong_t			iflag;
364 	int32_t			rdt_entry;
365 
366 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
367 
368 	intin_no = irqp->airq_intin_no;
369 	apic_ix = irqp->airq_ioapicindex;
370 
371 	iflag = intr_clear();
372 
373 	lock_set(&apic_ioapic_lock);
374 
375 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
376 
377 	/* mask it */
378 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
379 	    (AV_MASK | rdt_entry));
380 
381 	lock_clear(&apic_ioapic_lock);
382 	intr_restore(iflag);
383 }
384 
385 #endif	/* ! __xpv */
386 
387 void
388 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
389 {
390 	int i;
391 	apic_irq_t *irqptr;
392 	struct intrspec ispec;
393 
394 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
395 	    "count: %x pri: %x type: %x\n",
396 	    (void *)dip, inum, count, pri, type));
397 
398 	/* for MSI/X only */
399 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
400 		return;
401 
402 	for (i = 0; i < count; i++) {
403 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
404 		    "pri=0x%x count=0x%x\n", inum, pri, count));
405 		ispec.intrspec_vec = inum + i;
406 		ispec.intrspec_pri = pri;
407 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
408 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
409 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
410 			    "failed\n", (void *)dip, inum, pri));
411 			continue;
412 		}
413 		irqptr->airq_mps_intr_index = FREE_INDEX;
414 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
415 	}
416 }
417 
418 
419 /*
420  * check whether the system supports MSI
421  *
422  * If PCI-E capability is found, then this must be a PCI-E system.
423  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
424  * to indicate this system supports MSI.
425  */
426 int
427 apic_check_msi_support()
428 {
429 	dev_info_t *cdip;
430 	char dev_type[16];
431 	int dev_len;
432 
433 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
434 
435 	/*
436 	 * check whether the first level children of root_node have
437 	 * PCI-E capability
438 	 */
439 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
440 	    cdip = ddi_get_next_sibling(cdip)) {
441 
442 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
443 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
444 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
445 		    ddi_node_name(cdip)));
446 		dev_len = sizeof (dev_type);
447 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
448 		    "device_type", (caddr_t)dev_type, &dev_len)
449 		    != DDI_PROP_SUCCESS)
450 			continue;
451 		if (strcmp(dev_type, "pciex") == 0)
452 			return (PSM_SUCCESS);
453 	}
454 
455 	/* MSI is not supported on this system */
456 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
457 	    "device_type found\n"));
458 	return (PSM_FAILURE);
459 }
460 
461 /*
462  * apic_pci_msi_unconfigure:
463  *
464  * This and next two interfaces are copied from pci_intr_lib.c
465  * Do ensure that these two files stay in sync.
466  * These needed to be copied over here to avoid a deadlock situation on
467  * certain mp systems that use MSI interrupts.
468  *
469  * IMPORTANT regards next three interfaces:
470  * i) are called only for MSI/X interrupts.
471  * ii) called with interrupts disabled, and must not block
472  */
473 void
474 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
475 {
476 	ushort_t		msi_ctrl;
477 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
478 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
479 
480 	ASSERT((handle != NULL) && (cap_ptr != 0));
481 
482 	if (type == DDI_INTR_TYPE_MSI) {
483 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
484 		msi_ctrl &= (~PCI_MSI_MME_MASK);
485 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
486 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
487 
488 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
489 			pci_config_put16(handle,
490 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
491 			pci_config_put32(handle,
492 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
493 		} else {
494 			pci_config_put16(handle,
495 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
496 		}
497 
498 	} else if (type == DDI_INTR_TYPE_MSIX) {
499 		uintptr_t	off;
500 		uint32_t	mask;
501 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
502 
503 		/* Offset into "inum"th entry in the MSI-X table & mask it */
504 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
505 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
506 
507 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
508 
509 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
510 
511 		/* Offset into the "inum"th entry in the MSI-X table */
512 		off = (uintptr_t)msix_p->msix_tbl_addr +
513 		    (inum * PCI_MSIX_VECTOR_SIZE);
514 
515 		/* Reset the "data" and "addr" bits */
516 		ddi_put32(msix_p->msix_tbl_hdl,
517 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
518 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
519 	}
520 }
521 
522 
523 /*
524  * apic_pci_msi_enable_mode:
525  */
526 void
527 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
528 {
529 	ushort_t		msi_ctrl;
530 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
531 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
532 
533 	ASSERT((handle != NULL) && (cap_ptr != 0));
534 
535 	if (type == DDI_INTR_TYPE_MSI) {
536 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
537 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
538 			return;
539 
540 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
541 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
542 
543 	} else if (type == DDI_INTR_TYPE_MSIX) {
544 		uintptr_t	off;
545 		uint32_t	mask;
546 		ddi_intr_msix_t	*msix_p;
547 
548 		msix_p = i_ddi_get_msix(rdip);
549 
550 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
551 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
552 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
553 
554 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
555 
556 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
557 
558 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
559 
560 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
561 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
562 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
563 			    msi_ctrl);
564 		}
565 	}
566 }
567 
568 /*
569  * apic_pci_msi_disable_mode:
570  */
571 void
572 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
573 {
574 	ushort_t		msi_ctrl;
575 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
576 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
577 
578 	ASSERT((handle != NULL) && (cap_ptr != 0));
579 
580 	if (type == DDI_INTR_TYPE_MSI) {
581 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
582 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
583 			return;
584 
585 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
586 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
587 
588 	} else if (type == DDI_INTR_TYPE_MSIX) {
589 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
590 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
591 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
592 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
593 			    msi_ctrl);
594 		}
595 	}
596 }
597 
598 #if !defined(__xpv)
599 
600 static int
601 apic_set_cpu(uint32_t vector, int cpu, int *result)
602 {
603 	apic_irq_t *irqp;
604 	ulong_t iflag;
605 	int ret;
606 
607 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
608 
609 	/* Convert the vector to the irq using vector_to_irq table. */
610 	mutex_enter(&airq_mutex);
611 	irqp = apic_irq_table[apic_vector_to_irq[vector]];
612 	mutex_exit(&airq_mutex);
613 
614 	if (irqp == NULL) {
615 		*result = ENXIO;
616 		return (PSM_FAILURE);
617 	}
618 
619 	/* Fail if this is an MSI intr and is part of a group. */
620 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
621 	    (irqp->airq_intin_no > 1)) {
622 		*result = ENXIO;
623 		return (PSM_FAILURE);
624 	}
625 
626 	iflag = intr_clear();
627 	lock_set(&apic_ioapic_lock);
628 
629 	ret = apic_rebind_all(irqp, cpu);
630 
631 	lock_clear(&apic_ioapic_lock);
632 	intr_restore(iflag);
633 
634 	if (ret) {
635 		*result = EIO;
636 		return (PSM_FAILURE);
637 	}
638 	*result = 0;
639 	return (PSM_SUCCESS);
640 }
641 
642 static int
643 apic_grp_set_cpu(uint32_t vector, int new_cpu, int *result)
644 {
645 	dev_info_t *orig_dip;
646 	uint32_t orig_cpu;
647 	ulong_t iflag;
648 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
649 	int i;
650 	int cap_ptr;
651 	int msi_mask_off;
652 	ushort_t msi_ctrl;
653 	uint32_t msi_pvm;
654 	ddi_acc_handle_t handle;
655 	int num_vectors = 0;
656 
657 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
658 
659 	/*
660 	 * Take mutex to insure that table doesn't change out from underneath
661 	 * us while we're playing with it.
662 	 */
663 	mutex_enter(&airq_mutex);
664 	irqps[0] = apic_irq_table[apic_vector_to_irq[vector]];
665 	orig_cpu = irqps[0]->airq_temp_cpu;
666 	orig_dip = irqps[0]->airq_dip;
667 	num_vectors = irqps[0]->airq_intin_no;
668 
669 	/* A "group" of 1 */
670 	if (num_vectors == 1) {
671 		mutex_exit(&airq_mutex);
672 		return (apic_set_cpu(vector, new_cpu, result));
673 	}
674 
675 	*result = ENXIO;
676 
677 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
678 		mutex_exit(&airq_mutex);
679 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
680 		goto set_grp_intr_done;
681 	}
682 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
683 		mutex_exit(&airq_mutex);
684 		DDI_INTR_IMPLDBG((CE_CONT,
685 		    "set_grp: base vec not part of a grp or not aligned: "
686 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
687 		goto set_grp_intr_done;
688 	}
689 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
690 	    num_vectors));
691 
692 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
693 
694 	*result = EIO;
695 
696 	/*
697 	 * All IRQ entries in the table for the given device will be not
698 	 * shared.  Since they are not shared, the dip in the table will
699 	 * be true to the device of interest.
700 	 */
701 	for (i = 1; i < num_vectors; i++) {
702 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
703 		if (irqps[i] == NULL) {
704 			mutex_exit(&airq_mutex);
705 			goto set_grp_intr_done;
706 		}
707 #ifdef DEBUG
708 		/* Sanity check: CPU and dip is the same for all entries. */
709 		if ((irqps[i]->airq_dip != orig_dip) ||
710 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
711 			mutex_exit(&airq_mutex);
712 			DDI_INTR_IMPLDBG((CE_CONT,
713 			    "set_grp: cpu or dip for vec 0x%x difft than for "
714 			    "vec 0x%x\n", vector, vector + i));
715 			DDI_INTR_IMPLDBG((CE_CONT,
716 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
717 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
718 			    (void *)irqps[i]->airq_dip));
719 			goto set_grp_intr_done;
720 		}
721 #endif /* DEBUG */
722 	}
723 	mutex_exit(&airq_mutex);
724 
725 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
726 	handle = i_ddi_get_pci_config_handle(orig_dip);
727 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
728 
729 	/* MSI Per vector masking is supported. */
730 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
731 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
732 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
733 		else
734 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
735 		msi_pvm = pci_config_get32(handle, msi_mask_off);
736 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
737 		DDI_INTR_IMPLDBG((CE_CONT,
738 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
739 		    pci_config_get32(handle, msi_mask_off)));
740 	}
741 
742 	iflag = intr_clear();
743 	lock_set(&apic_ioapic_lock);
744 
745 	/*
746 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
747 	 * an error if the CPU is not accepting interrupts.  If the first one
748 	 * succeeds they all will.
749 	 */
750 	if (apic_rebind_all(irqps[0], new_cpu))
751 		(void) apic_rebind_all(irqps[0], orig_cpu);
752 	else {
753 		for (i = 1; i < num_vectors; i++)
754 			(void) apic_rebind_all(irqps[i], new_cpu);
755 		*result = 0;	/* SUCCESS */
756 	}
757 
758 	lock_clear(&apic_ioapic_lock);
759 	intr_restore(iflag);
760 
761 	/* Reenable vectors if per vector masking is supported. */
762 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
763 		pci_config_put32(handle, msi_mask_off, msi_pvm);
764 		DDI_INTR_IMPLDBG((CE_CONT,
765 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
766 		    pci_config_get32(handle, msi_mask_off)));
767 	}
768 
769 set_grp_intr_done:
770 	if (*result != 0)
771 		return (PSM_FAILURE);
772 
773 	return (PSM_SUCCESS);
774 }
775 
776 #endif	/* !__xpv */
777 
778 int
779 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
780 {
781 	struct autovec *av_dev;
782 	uchar_t irqno;
783 	int i;
784 	apic_irq_t *irq_p;
785 
786 	/* Sanity check the vector/irq argument. */
787 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
788 
789 	mutex_enter(&airq_mutex);
790 
791 	/*
792 	 * Convert the vecirq arg to an irq using vector_to_irq table
793 	 * if the arg is a vector.  Pass thru if already an irq.
794 	 */
795 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
796 	    PSMGI_INTRBY_VEC)
797 		irqno = apic_vector_to_irq[vecirq];
798 	else
799 		irqno = vecirq;
800 
801 	irq_p = apic_irq_table[irqno];
802 
803 	if ((irq_p == NULL) ||
804 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
805 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
806 		mutex_exit(&airq_mutex);
807 		return (PSM_FAILURE);
808 	}
809 
810 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
811 
812 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
813 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
814 
815 		/* Return user bound info for intrd. */
816 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
817 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
818 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
819 		}
820 	}
821 
822 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
823 		intr_params_p->avgi_vector = irq_p->airq_vector;
824 
825 	if (intr_params_p->avgi_req_flags &
826 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
827 		/* Get number of devices from apic_irq table shared field. */
828 		intr_params_p->avgi_num_devs = irq_p->airq_share;
829 
830 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
831 
832 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
833 
834 		/* Some devices have NULL dip.  Don't count these. */
835 		if (intr_params_p->avgi_num_devs > 0) {
836 			for (i = 0, av_dev = autovect[irqno].avh_link;
837 			    av_dev; av_dev = av_dev->av_link)
838 				if (av_dev->av_vector && av_dev->av_dip)
839 					i++;
840 			intr_params_p->avgi_num_devs =
841 			    MIN(intr_params_p->avgi_num_devs, i);
842 		}
843 
844 		/* There are no viable dips to return. */
845 		if (intr_params_p->avgi_num_devs == 0)
846 			intr_params_p->avgi_dip_list = NULL;
847 
848 		else {	/* Return list of dips */
849 
850 			/* Allocate space in array for that number of devs. */
851 			intr_params_p->avgi_dip_list = kmem_zalloc(
852 			    intr_params_p->avgi_num_devs *
853 			    sizeof (dev_info_t *),
854 			    KM_SLEEP);
855 
856 			/*
857 			 * Loop through the device list of the autovec table
858 			 * filling in the dip array.
859 			 *
860 			 * Note that the autovect table may have some special
861 			 * entries which contain NULL dips.  These will be
862 			 * ignored.
863 			 */
864 			for (i = 0, av_dev = autovect[irqno].avh_link;
865 			    av_dev; av_dev = av_dev->av_link)
866 				if (av_dev->av_vector && av_dev->av_dip)
867 					intr_params_p->avgi_dip_list[i++] =
868 					    av_dev->av_dip;
869 		}
870 	}
871 
872 	mutex_exit(&airq_mutex);
873 
874 	return (PSM_SUCCESS);
875 }
876 
877 
878 #if !defined(__xpv)
879 
880 /*
881  * This function provides external interface to the nexus for all
882  * functionalities related to the new DDI interrupt framework.
883  *
884  * Input:
885  * dip     - pointer to the dev_info structure of the requested device
886  * hdlp    - pointer to the internal interrupt handle structure for the
887  *	     requested interrupt
888  * intr_op - opcode for this call
889  * result  - pointer to the integer that will hold the result to be
890  *	     passed back if return value is PSM_SUCCESS
891  *
892  * Output:
893  * return value is either PSM_SUCCESS or PSM_FAILURE
894  */
895 int
896 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
897     psm_intr_op_t intr_op, int *result)
898 {
899 	int		cap;
900 	int		count_vec;
901 	int		old_priority;
902 	int		new_priority;
903 	int		new_cpu;
904 	apic_irq_t	*irqp;
905 	struct intrspec *ispec, intr_spec;
906 
907 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
908 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
909 
910 	ispec = &intr_spec;
911 	ispec->intrspec_pri = hdlp->ih_pri;
912 	ispec->intrspec_vec = hdlp->ih_inum;
913 	ispec->intrspec_func = hdlp->ih_cb_func;
914 
915 	switch (intr_op) {
916 	case PSM_INTR_OP_CHECK_MSI:
917 		/*
918 		 * Check MSI/X is supported or not at APIC level and
919 		 * masked off the MSI/X bits in hdlp->ih_type if not
920 		 * supported before return.  If MSI/X is supported,
921 		 * leave the ih_type unchanged and return.
922 		 *
923 		 * hdlp->ih_type passed in from the nexus has all the
924 		 * interrupt types supported by the device.
925 		 */
926 		if (apic_support_msi == 0) {
927 			/*
928 			 * if apic_support_msi is not set, call
929 			 * apic_check_msi_support() to check whether msi
930 			 * is supported first
931 			 */
932 			if (apic_check_msi_support() == PSM_SUCCESS)
933 				apic_support_msi = 1;
934 			else
935 				apic_support_msi = -1;
936 		}
937 		if (apic_support_msi == 1) {
938 			if (apic_msix_enable)
939 				*result = hdlp->ih_type;
940 			else
941 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
942 		} else
943 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
944 			    DDI_INTR_TYPE_MSIX);
945 		break;
946 	case PSM_INTR_OP_ALLOC_VECTORS:
947 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
948 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
949 			    hdlp->ih_scratch1, hdlp->ih_pri,
950 			    (int)(uintptr_t)hdlp->ih_scratch2);
951 		else
952 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
953 			    hdlp->ih_scratch1, hdlp->ih_pri,
954 			    (int)(uintptr_t)hdlp->ih_scratch2);
955 		break;
956 	case PSM_INTR_OP_FREE_VECTORS:
957 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
958 		    hdlp->ih_pri, hdlp->ih_type);
959 		break;
960 	case PSM_INTR_OP_NAVAIL_VECTORS:
961 		*result = apic_navail_vector(dip, hdlp->ih_pri);
962 		break;
963 	case PSM_INTR_OP_XLATE_VECTOR:
964 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
965 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
966 		break;
967 	case PSM_INTR_OP_GET_PENDING:
968 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
969 			return (PSM_FAILURE);
970 		*result = apic_get_pending(irqp, hdlp->ih_type);
971 		break;
972 	case PSM_INTR_OP_CLEAR_MASK:
973 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
974 			return (PSM_FAILURE);
975 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
976 		if (irqp == NULL)
977 			return (PSM_FAILURE);
978 		apic_clear_mask(irqp);
979 		break;
980 	case PSM_INTR_OP_SET_MASK:
981 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
982 			return (PSM_FAILURE);
983 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
984 			return (PSM_FAILURE);
985 		apic_set_mask(irqp);
986 		break;
987 	case PSM_INTR_OP_GET_CAP:
988 		cap = DDI_INTR_FLAG_PENDING;
989 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
990 			cap |= DDI_INTR_FLAG_MASKABLE;
991 		*result = cap;
992 		break;
993 	case PSM_INTR_OP_GET_SHARED:
994 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
995 			return (PSM_FAILURE);
996 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
997 			return (PSM_FAILURE);
998 		*result = irqp->airq_share ? 1: 0;
999 		break;
1000 	case PSM_INTR_OP_SET_PRI:
1001 		old_priority = hdlp->ih_pri;	/* save old value */
1002 		new_priority = *(int *)result;	/* try the new value */
1003 
1004 		/* First, check if "hdlp->ih_scratch1" vectors exist? */
1005 		if (apic_navail_vector(dip, new_priority) < hdlp->ih_scratch1)
1006 			return (PSM_FAILURE);
1007 
1008 		/* Now allocate the vectors */
1009 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
1010 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
1011 			    hdlp->ih_scratch1, new_priority,
1012 			    DDI_INTR_ALLOC_STRICT);
1013 		else
1014 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
1015 			    hdlp->ih_scratch1, new_priority,
1016 			    DDI_INTR_ALLOC_STRICT);
1017 
1018 		/* Did we get new vectors? */
1019 		if (!count_vec)
1020 			return (PSM_FAILURE);
1021 
1022 		/* Finally, free the previously allocated vectors */
1023 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
1024 		    old_priority, hdlp->ih_type);
1025 		hdlp->ih_pri = new_priority; /* set the new value */
1026 		break;
1027 	case PSM_INTR_OP_SET_CPU:
1028 	case PSM_INTR_OP_GRP_SET_CPU:
1029 		/*
1030 		 * The interrupt handle given here has been allocated
1031 		 * specifically for this command, and ih_private carries
1032 		 * a CPU value.
1033 		 */
1034 		new_cpu = (int)(intptr_t)hdlp->ih_private;
1035 		if (!apic_cpu_in_range(new_cpu)) {
1036 			DDI_INTR_IMPLDBG((CE_CONT,
1037 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
1038 			*result = EINVAL;
1039 			return (PSM_FAILURE);
1040 		}
1041 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1042 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1043 			    PSM_SUCCESS)
1044 				return (PSM_FAILURE);
1045 		} else {
1046 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1047 			    result) != PSM_SUCCESS)
1048 				return (PSM_FAILURE);
1049 		}
1050 		break;
1051 	case PSM_INTR_OP_GET_INTR:
1052 		/*
1053 		 * The interrupt handle given here has been allocated
1054 		 * specifically for this command, and ih_private carries
1055 		 * a pointer to a apic_get_intr_t.
1056 		 */
1057 		if (apic_get_vector_intr_info(
1058 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1059 			return (PSM_FAILURE);
1060 		break;
1061 	case PSM_INTR_OP_APIC_TYPE:
1062 		hdlp->ih_private = apic_get_apic_type();
1063 		hdlp->ih_ver = apic_get_apic_version();
1064 		break;
1065 	case PSM_INTR_OP_SET_CAP:
1066 	default:
1067 		return (PSM_FAILURE);
1068 	}
1069 	return (PSM_SUCCESS);
1070 }
1071 #endif	/* !__xpv */
1072