xref: /titanic_51/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision a60fc142342386d0b786e65fba901234400d7020)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * apic_introp.c:
30  *	Has code for Advanced DDI interrupt framework support.
31  */
32 
33 #include <sys/cpuvar.h>
34 #include <sys/psm.h>
35 #include <sys/archsystm.h>
36 #include <sys/apic.h>
37 #include <sys/sunddi.h>
38 #include <sys/ddi_impldefs.h>
39 #include <sys/mach_intr.h>
40 #include <sys/sysmacros.h>
41 #include <sys/trap.h>
42 #include <sys/pci.h>
43 #include <sys/pci_intr_lib.h>
44 
45 extern struct av_head autovect[];
46 
47 /*
48  *	Local Function Prototypes
49  */
50 int		apic_pci_msi_enable_vector(dev_info_t *, int, int,
51 		    int, int, int);
52 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
53 static int	apic_get_pending(apic_irq_t *, int);
54 static void	apic_clear_mask(apic_irq_t *);
55 static void	apic_set_mask(apic_irq_t *);
56 
57 /*
58  * MSI support flag:
59  * reflects whether MSI is supported at APIC level
60  * it can also be patched through /etc/system
61  *
62  *  0 = default value - don't know and need to call apic_check_msi_support()
63  *      to find out then set it accordingly
64  *  1 = supported
65  * -1 = not supported
66  */
67 int	apic_support_msi = 0;
68 
69 /* Multiple vector support for MSI */
70 int	apic_multi_msi_enable = 1;
71 int	apic_multi_msi_max = 2;
72 
73 /*
74  * apic_pci_msi_enable_vector:
75  *	Set the address/data fields in the MSI/X capability structure
76  *	XXX: MSI-X support
77  */
78 /* ARGSUSED */
79 int
80 apic_pci_msi_enable_vector(dev_info_t *dip, int type, int inum, int vector,
81     int count, int target_apic_id)
82 {
83 	uint64_t		msi_addr, msi_data;
84 	ushort_t		msi_ctrl;
85 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
86 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
87 
88 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
89 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
90 	    ddi_driver_name(dip), inum, vector, target_apic_id));
91 
92 	if (handle == NULL || cap_ptr == 0)
93 		return (PSM_FAILURE);
94 
95 	/* MSI Address */
96 	msi_addr = (MSI_ADDR_HDR | (target_apic_id << MSI_ADDR_DEST_SHIFT));
97 	msi_addr |= ((MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
98 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT));
99 
100 	/* MSI Data: MSI is edge triggered according to spec */
101 	msi_data = ((MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | vector);
102 
103 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
104 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
105 
106 	if (type == DDI_INTR_TYPE_MSI) {
107 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
108 
109 		/* Set the bits to inform how many MSIs are enabled */
110 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
111 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
112 
113 		pci_config_put32(handle,
114 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
115 
116 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
117 			pci_config_put32(handle,
118 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
119 			pci_config_put16(handle,
120 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
121 		} else {
122 			pci_config_put16(handle,
123 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
124 		}
125 
126 	} else if (type == DDI_INTR_TYPE_MSIX) {
127 		uintptr_t	off;
128 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
129 
130 		/* Offset into the "inum"th entry in the MSI-X table */
131 		off = (uintptr_t)msix_p->msix_tbl_addr +
132 		    (inum  * PCI_MSIX_VECTOR_SIZE);
133 
134 		ddi_put32(msix_p->msix_tbl_hdl,
135 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
136 		ddi_put64(msix_p->msix_tbl_hdl,
137 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
138 	}
139 
140 	return (PSM_SUCCESS);
141 }
142 
143 
144 /*
145  * This function returns the no. of vectors available for the pri.
146  * dip is not used at this moment.  If we really don't need that,
147  * it will be removed.
148  */
149 /*ARGSUSED*/
150 int
151 apic_navail_vector(dev_info_t *dip, int pri)
152 {
153 	int	lowest, highest, i, navail, count;
154 
155 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
156 	    (void *)dip, pri));
157 
158 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
159 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
160 	navail = count = 0;
161 
162 	/* It has to be contiguous */
163 	for (i = lowest; i < highest; i++) {
164 		count = 0;
165 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
166 		    (i < highest)) {
167 			if (APIC_CHECK_RESERVE_VECTORS(i))
168 				break;
169 			count++;
170 			i++;
171 		}
172 		if (count > navail)
173 			navail = count;
174 	}
175 	return (navail);
176 }
177 
178 /*
179  * Finds "count" contiguous MSI vectors starting at the proper alignment
180  * at "pri".
181  * Caller needs to make sure that count has to be power of 2 and should not
182  * be < 1.
183  */
184 uchar_t
185 apic_find_multi_vectors(int pri, int count)
186 {
187 	int	lowest, highest, i, navail, start, msibits;
188 
189 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
190 	    pri, count));
191 
192 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
193 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
194 	navail = 0;
195 
196 	/*
197 	 * msibits is the no. of lower order message data bits for the
198 	 * allocated MSI vectors and is used to calculate the aligned
199 	 * starting vector
200 	 */
201 	msibits = count - 1;
202 
203 	/* It has to be contiguous */
204 	for (i = lowest; i < highest; i++) {
205 		navail = 0;
206 
207 		/*
208 		 * starting vector has to be aligned accordingly for
209 		 * multiple MSIs
210 		 */
211 		if (msibits)
212 			i = (i + msibits) & ~msibits;
213 		start = i;
214 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
215 		    (i < highest)) {
216 			if (APIC_CHECK_RESERVE_VECTORS(i))
217 				break;
218 			navail++;
219 			if (navail >= count)
220 				return (start);
221 			i++;
222 		}
223 	}
224 	return (0);
225 }
226 
227 
228 /*
229  * It finds the apic_irq_t associates with the dip, ispec and type.
230  */
231 apic_irq_t *
232 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
233 {
234 	apic_irq_t	*irqp;
235 	int i;
236 
237 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
238 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
239 	    ispec->intrspec_pri, type));
240 
241 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
242 		if ((irqp = apic_irq_table[i]) == NULL)
243 			continue;
244 		if ((irqp->airq_dip == dip) &&
245 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
246 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
247 			if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
248 				if (APIC_IS_MSI_OR_MSIX_INDEX(irqp->
249 				    airq_mps_intr_index))
250 					return (irqp);
251 			} else
252 				return (irqp);
253 		}
254 	}
255 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
256 	return (NULL);
257 }
258 
259 
260 /*
261  * This function will return the pending bit of the irqp.
262  * It either comes from the IRR register of the APIC or the RDT
263  * entry of the I/O APIC.
264  * For the IRR to work, it needs to be to its binding CPU
265  */
266 static int
267 apic_get_pending(apic_irq_t *irqp, int type)
268 {
269 	int			bit, index, irr, pending;
270 	int			intin_no;
271 	int			apic_ix;
272 
273 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
274 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
275 	    type));
276 
277 	/* need to get on the bound cpu */
278 	mutex_enter(&cpu_lock);
279 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
280 
281 	index = irqp->airq_vector / 32;
282 	bit = irqp->airq_vector % 32;
283 	irr = apicadr[APIC_IRR_REG + index];
284 
285 	affinity_clear();
286 	mutex_exit(&cpu_lock);
287 
288 	pending = (irr & (1 << bit)) ? 1 : 0;
289 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
290 		/* check I/O APIC for fixed interrupt */
291 		intin_no = irqp->airq_intin_no;
292 		apic_ix = irqp->airq_ioapicindex;
293 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
294 		    AV_PENDING) ? 1 : 0;
295 	}
296 	return (pending);
297 }
298 
299 
300 /*
301  * This function will clear the mask for the interrupt on the I/O APIC
302  */
303 static void
304 apic_clear_mask(apic_irq_t *irqp)
305 {
306 	int			intin_no;
307 	ulong_t			iflag;
308 	int32_t			rdt_entry;
309 	int 			apic_ix;
310 
311 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
312 	    (void *)irqp));
313 
314 	intin_no = irqp->airq_intin_no;
315 	apic_ix = irqp->airq_ioapicindex;
316 
317 	iflag = intr_clear();
318 	lock_set(&apic_ioapic_lock);
319 
320 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
321 
322 	/* clear mask */
323 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
324 	    ((~AV_MASK) & rdt_entry));
325 
326 	lock_clear(&apic_ioapic_lock);
327 	intr_restore(iflag);
328 }
329 
330 
331 /*
332  * This function will mask the interrupt on the I/O APIC
333  */
334 static void
335 apic_set_mask(apic_irq_t *irqp)
336 {
337 	int			intin_no;
338 	int 			apic_ix;
339 	ulong_t			iflag;
340 	int32_t			rdt_entry;
341 
342 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
343 
344 	intin_no = irqp->airq_intin_no;
345 	apic_ix = irqp->airq_ioapicindex;
346 
347 	iflag = intr_clear();
348 
349 	lock_set(&apic_ioapic_lock);
350 
351 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
352 
353 	/* mask it */
354 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
355 	    (AV_MASK | rdt_entry));
356 
357 	lock_clear(&apic_ioapic_lock);
358 	intr_restore(iflag);
359 }
360 
361 
362 void
363 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
364 {
365 	int i;
366 	apic_irq_t *irqptr;
367 	struct intrspec ispec;
368 
369 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
370 	    "count: %x pri: %x type: %x\n",
371 	    (void *)dip, inum, count, pri, type));
372 
373 	/* for MSI/X only */
374 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
375 		return;
376 
377 	for (i = 0; i < count; i++) {
378 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
379 		    "pri=0x%x count=0x%x\n", inum, pri, count));
380 		ispec.intrspec_vec = inum + i;
381 		ispec.intrspec_pri = pri;
382 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
383 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
384 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
385 			    "failed\n", (void *)dip, inum, pri));
386 			continue;
387 		}
388 		irqptr->airq_mps_intr_index = FREE_INDEX;
389 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
390 	}
391 }
392 
393 
394 /*
395  * check whether the system supports MSI
396  *
397  * If PCI-E capability is found, then this must be a PCI-E system.
398  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
399  * to indicate this system supports MSI.
400  */
401 int
402 apic_check_msi_support()
403 {
404 	dev_info_t *cdip;
405 	char dev_type[16];
406 	int dev_len;
407 
408 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
409 
410 	/*
411 	 * check whether the first level children of root_node have
412 	 * PCI-E capability
413 	 */
414 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
415 	    cdip = ddi_get_next_sibling(cdip)) {
416 
417 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
418 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
419 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
420 		    ddi_node_name(cdip)));
421 		dev_len = sizeof (dev_type);
422 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
423 		    "device_type", (caddr_t)dev_type, &dev_len)
424 		    != DDI_PROP_SUCCESS)
425 			continue;
426 		if (strcmp(dev_type, "pciex") == 0)
427 			return (PSM_SUCCESS);
428 	}
429 
430 	/* MSI is not supported on this system */
431 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
432 	    "device_type found\n"));
433 	return (PSM_FAILURE);
434 }
435 
436 /*
437  * apic_pci_msi_unconfigure:
438  *
439  * This and next two interfaces are copied from pci_intr_lib.c
440  * Do ensure that these two files stay in sync.
441  * These needed to be copied over here to avoid a deadlock situation on
442  * certain mp systems that use MSI interrupts.
443  *
444  * IMPORTANT regards next three interfaces:
445  * i) are called only for MSI/X interrupts.
446  * ii) called with interrupts disabled, and must not block
447  */
448 int
449 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
450 {
451 	ushort_t		msi_ctrl;
452 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
453 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
454 
455 	if (handle == NULL || cap_ptr == 0)
456 		return (PSM_FAILURE);
457 
458 	if (type == DDI_INTR_TYPE_MSI) {
459 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
460 		msi_ctrl &= (~PCI_MSI_MME_MASK);
461 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
462 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
463 
464 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
465 			pci_config_put16(handle,
466 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
467 			pci_config_put32(handle,
468 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
469 		} else {
470 			pci_config_put16(handle,
471 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
472 		}
473 
474 	} else if (type == DDI_INTR_TYPE_MSIX) {
475 		uintptr_t	off;
476 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
477 
478 		/* Offset into the "inum"th entry in the MSI-X table */
479 		off = (uintptr_t)msix_p->msix_tbl_addr +
480 		    (inum * PCI_MSIX_VECTOR_SIZE);
481 
482 		/* Reset the "data" and "addr" bits */
483 		ddi_put32(msix_p->msix_tbl_hdl,
484 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
485 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
486 	}
487 
488 	return (PSM_SUCCESS);
489 }
490 
491 
492 /*
493  * apic_pci_msi_enable_mode:
494  */
495 int
496 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
497 {
498 	ushort_t		msi_ctrl;
499 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
500 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
501 
502 	if (handle == NULL || cap_ptr == 0)
503 		return (PSM_FAILURE);
504 
505 	if (type == DDI_INTR_TYPE_MSI) {
506 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
507 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
508 			return (PSM_SUCCESS);
509 
510 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
511 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
512 
513 	} else if (type == DDI_INTR_TYPE_MSIX) {
514 		uintptr_t	off;
515 		ddi_intr_msix_t	*msix_p;
516 
517 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
518 
519 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT)
520 			return (PSM_SUCCESS);
521 
522 		msi_ctrl |= PCI_MSIX_ENABLE_BIT;
523 		pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, msi_ctrl);
524 
525 		msix_p = i_ddi_get_msix(rdip);
526 
527 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
528 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
529 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
530 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, 0);
531 	}
532 
533 	return (PSM_SUCCESS);
534 }
535 
536 /*
537  * apic_pci_msi_disable_mode:
538  */
539 int
540 apic_pci_msi_disable_mode(dev_info_t *rdip, int type, int inum)
541 {
542 	ushort_t		msi_ctrl;
543 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
544 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
545 
546 	if (handle == NULL || cap_ptr == 0)
547 		return (PSM_FAILURE);
548 
549 	if (type == DDI_INTR_TYPE_MSI) {
550 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
551 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
552 			return (PSM_SUCCESS);
553 
554 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
555 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
556 
557 	} else if (type == DDI_INTR_TYPE_MSIX) {
558 		uintptr_t	off;
559 		ddi_intr_msix_t	*msix_p;
560 
561 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
562 
563 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT))
564 			return (PSM_SUCCESS);
565 
566 		msix_p = i_ddi_get_msix(rdip);
567 
568 		/* Offset into "inum"th entry in the MSI-X table & mask it */
569 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
570 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
571 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, 0x1);
572 	}
573 
574 	return (PSM_SUCCESS);
575 }
576 
577 
578 static int
579 apic_set_cpu(uint32_t vector, int cpu, int *result)
580 {
581 	apic_irq_t *irqp;
582 	int iflag;
583 	int ret;
584 
585 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
586 
587 	/* Convert the vector to the irq using vector_to_irq table. */
588 	mutex_enter(&airq_mutex);
589 	irqp = apic_irq_table[apic_vector_to_irq[vector]];
590 	mutex_exit(&airq_mutex);
591 
592 	if (irqp == NULL) {
593 		*result = ENXIO;
594 		return (PSM_FAILURE);
595 	}
596 
597 	/* Fail if this is an MSI intr and is part of a group. */
598 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
599 	    (irqp->airq_intin_no > 1)) {
600 		*result = ENXIO;
601 		return (PSM_FAILURE);
602 	}
603 
604 	iflag = intr_clear();
605 	lock_set(&apic_ioapic_lock);
606 
607 	ret = apic_rebind_all(irqp, cpu);
608 
609 	lock_clear(&apic_ioapic_lock);
610 	intr_restore(iflag);
611 
612 	if (ret) {
613 		*result = EIO;
614 		return (PSM_FAILURE);
615 	}
616 	*result = 0;
617 	return (PSM_SUCCESS);
618 }
619 
620 static int
621 apic_grp_set_cpu(uint32_t vector, int new_cpu, int *result)
622 {
623 	dev_info_t *orig_dip;
624 	uchar_t orig_cpu;
625 	int iflag;
626 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
627 	int i;
628 	int cap_ptr;
629 	int msi_mask_off;
630 	ushort_t msi_ctrl;
631 	uint32_t msi_pvm;
632 	ddi_acc_handle_t handle;
633 	int num_vectors = 0;
634 
635 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
636 
637 	/*
638 	 * Take mutex to insure that table doesn't change out from underneath
639 	 * us while we're playing with it.
640 	 */
641 	mutex_enter(&airq_mutex);
642 	irqps[0] = apic_irq_table[apic_vector_to_irq[vector]];
643 	orig_cpu = irqps[0]->airq_temp_cpu;
644 	orig_dip = irqps[0]->airq_dip;
645 	num_vectors = irqps[0]->airq_intin_no;
646 
647 	/* A "group" of 1 */
648 	if (num_vectors == 1) {
649 		mutex_exit(&airq_mutex);
650 		return (apic_set_cpu(vector, new_cpu, result));
651 	}
652 
653 	*result = ENXIO;
654 
655 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
656 		mutex_exit(&airq_mutex);
657 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
658 		goto set_grp_intr_done;
659 	}
660 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
661 		mutex_exit(&airq_mutex);
662 		DDI_INTR_IMPLDBG((CE_CONT,
663 		    "set_grp: base vec not part of a grp or not aligned: "
664 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
665 		goto set_grp_intr_done;
666 	}
667 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
668 	    num_vectors));
669 
670 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
671 
672 	*result = EIO;
673 
674 	/*
675 	 * All IRQ entries in the table for the given device will be not
676 	 * shared.  Since they are not shared, the dip in the table will
677 	 * be true to the device of interest.
678 	 */
679 	for (i = 1; i < num_vectors; i++) {
680 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
681 		if (irqps[i] == NULL) {
682 			mutex_exit(&airq_mutex);
683 			goto set_grp_intr_done;
684 		}
685 #ifdef DEBUG
686 		/* Sanity check: CPU and dip is the same for all entries. */
687 		if ((irqps[i]->airq_dip != orig_dip) ||
688 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
689 			mutex_exit(&airq_mutex);
690 			DDI_INTR_IMPLDBG((CE_CONT,
691 			    "set_grp: cpu or dip for vec 0x%x difft than for "
692 			    "vec 0x%x\n", vector, vector + i));
693 			DDI_INTR_IMPLDBG((CE_CONT,
694 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
695 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
696 			    (void *)irqps[i]->airq_dip));
697 			goto set_grp_intr_done;
698 		}
699 #endif /* DEBUG */
700 	}
701 	mutex_exit(&airq_mutex);
702 
703 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
704 	handle = i_ddi_get_pci_config_handle(orig_dip);
705 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
706 
707 	/* MSI Per vector masking is supported. */
708 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
709 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
710 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
711 		else
712 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
713 		msi_pvm = pci_config_get32(handle, msi_mask_off);
714 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
715 		DDI_INTR_IMPLDBG((CE_CONT,
716 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
717 		    pci_config_get32(handle, msi_mask_off)));
718 	}
719 
720 	iflag = intr_clear();
721 	lock_set(&apic_ioapic_lock);
722 
723 	/*
724 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
725 	 * an error if the CPU is not accepting interrupts.  If the first one
726 	 * succeeds they all will.
727 	 */
728 	if (apic_rebind_all(irqps[0], new_cpu))
729 		(void) apic_rebind_all(irqps[0], orig_cpu);
730 	else {
731 		for (i = 1; i < num_vectors; i++)
732 			(void) apic_rebind_all(irqps[i], new_cpu);
733 		*result = 0;	/* SUCCESS */
734 	}
735 
736 	lock_clear(&apic_ioapic_lock);
737 	intr_restore(iflag);
738 
739 	/* Reenable vectors if per vector masking is supported. */
740 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
741 		pci_config_put32(handle, msi_mask_off, msi_pvm);
742 		DDI_INTR_IMPLDBG((CE_CONT,
743 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
744 		    pci_config_get32(handle, msi_mask_off)));
745 	}
746 
747 set_grp_intr_done:
748 	if (*result != 0)
749 		return (PSM_FAILURE);
750 
751 	return (PSM_SUCCESS);
752 }
753 
754 static int
755 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
756 {
757 	struct autovec *av_dev;
758 	uchar_t irqno;
759 	int i;
760 	apic_irq_t *irq_p;
761 
762 	/* Sanity check the vector/irq argument. */
763 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
764 
765 	mutex_enter(&airq_mutex);
766 
767 	/*
768 	 * Convert the vecirq arg to an irq using vector_to_irq table
769 	 * if the arg is a vector.  Pass thru if already an irq.
770 	 */
771 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
772 	    PSMGI_INTRBY_VEC)
773 		irqno = apic_vector_to_irq[vecirq];
774 	else
775 		irqno = vecirq;
776 
777 	irq_p = apic_irq_table[irqno];
778 
779 	if ((irq_p == NULL) ||
780 	    (irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
781 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)) {
782 		mutex_exit(&airq_mutex);
783 		return (PSM_FAILURE);
784 	}
785 
786 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
787 
788 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
789 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
790 
791 		/* Return user bound info for intrd. */
792 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
793 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
794 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
795 		}
796 	}
797 
798 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
799 		intr_params_p->avgi_vector = irq_p->airq_vector;
800 
801 	if (intr_params_p->avgi_req_flags &
802 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
803 		/* Get number of devices from apic_irq table shared field. */
804 		intr_params_p->avgi_num_devs = irq_p->airq_share;
805 
806 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
807 
808 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
809 
810 		/* Some devices have NULL dip.  Don't count these. */
811 		if (intr_params_p->avgi_num_devs > 0) {
812 			for (i = 0, av_dev = autovect[irqno].avh_link;
813 			    av_dev; av_dev = av_dev->av_link)
814 				if (av_dev->av_vector && av_dev->av_dip)
815 					i++;
816 			intr_params_p->avgi_num_devs =
817 			    MIN(intr_params_p->avgi_num_devs, i);
818 		}
819 
820 		/* There are no viable dips to return. */
821 		if (intr_params_p->avgi_num_devs == 0)
822 			intr_params_p->avgi_dip_list = NULL;
823 
824 		else {	/* Return list of dips */
825 
826 			/* Allocate space in array for that number of devs. */
827 			intr_params_p->avgi_dip_list = kmem_zalloc(
828 			    intr_params_p->avgi_num_devs *
829 			    sizeof (dev_info_t *),
830 			    KM_SLEEP);
831 
832 			/*
833 			 * Loop through the device list of the autovec table
834 			 * filling in the dip array.
835 			 *
836 			 * Note that the autovect table may have some special
837 			 * entries which contain NULL dips.  These will be
838 			 * ignored.
839 			 */
840 			for (i = 0, av_dev = autovect[irqno].avh_link;
841 			    av_dev; av_dev = av_dev->av_link)
842 				if (av_dev->av_vector && av_dev->av_dip)
843 					intr_params_p->avgi_dip_list[i++] =
844 					    av_dev->av_dip;
845 		}
846 	}
847 
848 	mutex_exit(&airq_mutex);
849 
850 	return (PSM_SUCCESS);
851 }
852 
853 
854 /*
855  * This function provides external interface to the nexus for all
856  * functionalities related to the new DDI interrupt framework.
857  *
858  * Input:
859  * dip     - pointer to the dev_info structure of the requested device
860  * hdlp    - pointer to the internal interrupt handle structure for the
861  *	     requested interrupt
862  * intr_op - opcode for this call
863  * result  - pointer to the integer that will hold the result to be
864  *	     passed back if return value is PSM_SUCCESS
865  *
866  * Output:
867  * return value is either PSM_SUCCESS or PSM_FAILURE
868  */
869 int
870 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
871     psm_intr_op_t intr_op, int *result)
872 {
873 	int		cap;
874 	int		count_vec;
875 	int		old_priority;
876 	int		new_priority;
877 	int		new_cpu;
878 	apic_irq_t	*irqp;
879 	struct intrspec *ispec, intr_spec;
880 
881 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
882 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
883 
884 	ispec = &intr_spec;
885 	ispec->intrspec_pri = hdlp->ih_pri;
886 	ispec->intrspec_vec = hdlp->ih_inum;
887 	ispec->intrspec_func = hdlp->ih_cb_func;
888 
889 	switch (intr_op) {
890 	case PSM_INTR_OP_CHECK_MSI:
891 		/*
892 		 * Check MSI/X is supported or not at APIC level and
893 		 * masked off the MSI/X bits in hdlp->ih_type if not
894 		 * supported before return.  If MSI/X is supported,
895 		 * leave the ih_type unchanged and return.
896 		 *
897 		 * hdlp->ih_type passed in from the nexus has all the
898 		 * interrupt types supported by the device.
899 		 */
900 		if (apic_support_msi == 0) {
901 			/*
902 			 * if apic_support_msi is not set, call
903 			 * apic_check_msi_support() to check whether msi
904 			 * is supported first
905 			 */
906 			if (apic_check_msi_support() == PSM_SUCCESS)
907 				apic_support_msi = 1;
908 			else
909 				apic_support_msi = -1;
910 		}
911 		if (apic_support_msi == 1)
912 			*result = hdlp->ih_type;
913 		else
914 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
915 			    DDI_INTR_TYPE_MSIX);
916 		break;
917 	case PSM_INTR_OP_ALLOC_VECTORS:
918 		*result = apic_alloc_vectors(dip, hdlp->ih_inum,
919 		    hdlp->ih_scratch1, hdlp->ih_pri, hdlp->ih_type,
920 		    (int)(uintptr_t)hdlp->ih_scratch2);
921 		break;
922 	case PSM_INTR_OP_FREE_VECTORS:
923 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
924 		    hdlp->ih_pri, hdlp->ih_type);
925 		break;
926 	case PSM_INTR_OP_NAVAIL_VECTORS:
927 		*result = apic_navail_vector(dip, hdlp->ih_pri);
928 		break;
929 	case PSM_INTR_OP_XLATE_VECTOR:
930 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
931 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
932 		break;
933 	case PSM_INTR_OP_GET_PENDING:
934 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
935 			return (PSM_FAILURE);
936 		*result = apic_get_pending(irqp, hdlp->ih_type);
937 		break;
938 	case PSM_INTR_OP_CLEAR_MASK:
939 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
940 			return (PSM_FAILURE);
941 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
942 		if (irqp == NULL)
943 			return (PSM_FAILURE);
944 		apic_clear_mask(irqp);
945 		break;
946 	case PSM_INTR_OP_SET_MASK:
947 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
948 			return (PSM_FAILURE);
949 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
950 			return (PSM_FAILURE);
951 		apic_set_mask(irqp);
952 		break;
953 	case PSM_INTR_OP_GET_CAP:
954 		cap = DDI_INTR_FLAG_PENDING;
955 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
956 			cap |= DDI_INTR_FLAG_MASKABLE;
957 		*result = cap;
958 		break;
959 	case PSM_INTR_OP_GET_SHARED:
960 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
961 			return (PSM_FAILURE);
962 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
963 			return (PSM_FAILURE);
964 		*result = irqp->airq_share ? 1: 0;
965 		break;
966 	case PSM_INTR_OP_SET_PRI:
967 		old_priority = hdlp->ih_pri;	/* save old value */
968 		new_priority = *(int *)result;	/* try the new value */
969 
970 		/* First, check if "hdlp->ih_scratch1" vectors exist? */
971 		if (apic_navail_vector(dip, new_priority) < hdlp->ih_scratch1)
972 			return (PSM_FAILURE);
973 
974 		/* Now allocate the vectors */
975 		count_vec = apic_alloc_vectors(dip, hdlp->ih_inum,
976 		    hdlp->ih_scratch1, new_priority, hdlp->ih_type,
977 		    DDI_INTR_ALLOC_STRICT);
978 
979 		/* Did we get new vectors? */
980 		if (!count_vec)
981 			return (PSM_FAILURE);
982 
983 		/* Finally, free the previously allocated vectors */
984 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
985 		    old_priority, hdlp->ih_type);
986 		hdlp->ih_pri = new_priority; /* set the new value */
987 		break;
988 	case PSM_INTR_OP_SET_CPU:
989 	case PSM_INTR_OP_GRP_SET_CPU:
990 		/*
991 		 * The interrupt handle given here has been allocated
992 		 * specifically for this command, and ih_private carries
993 		 * a CPU value.
994 		 */
995 		new_cpu = (int)(intptr_t)hdlp->ih_private;
996 		if (!apic_cpu_in_range(new_cpu)) {
997 			DDI_INTR_IMPLDBG((CE_CONT,
998 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
999 			*result = EINVAL;
1000 			return (PSM_FAILURE);
1001 		}
1002 		if (intr_op == PSM_INTR_OP_SET_CPU) {
1003 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
1004 			    PSM_SUCCESS)
1005 				return (PSM_FAILURE);
1006 		} else {
1007 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
1008 			    result) != PSM_SUCCESS)
1009 				return (PSM_FAILURE);
1010 		}
1011 		break;
1012 	case PSM_INTR_OP_GET_INTR:
1013 		/*
1014 		 * The interrupt handle given here has been allocated
1015 		 * specifically for this command, and ih_private carries
1016 		 * a pointer to a apic_get_intr_t.
1017 		 */
1018 		if (apic_get_vector_intr_info(
1019 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
1020 			return (PSM_FAILURE);
1021 		break;
1022 	case PSM_INTR_OP_APIC_TYPE:
1023 		hdlp->ih_private = apic_get_apic_type();
1024 		hdlp->ih_ver = apic_get_apic_version();
1025 		break;
1026 	case PSM_INTR_OP_SET_CAP:
1027 	default:
1028 		return (PSM_FAILURE);
1029 	}
1030 	return (PSM_SUCCESS);
1031 }
1032