xref: /illumos-gate/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision e5803b76927480e8f9b67b22201c484ccf4c2bcf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * apic_introp.c:
27  *	Has code for Advanced DDI interrupt framework support.
28  */
29 
30 #include <sys/cpuvar.h>
31 #include <sys/psm.h>
32 #include <sys/archsystm.h>
33 #include <sys/apic.h>
34 #include <sys/sunddi.h>
35 #include <sys/ddi_impldefs.h>
36 #include <sys/mach_intr.h>
37 #include <sys/sysmacros.h>
38 #include <sys/trap.h>
39 #include <sys/pci.h>
40 #include <sys/pci_intr_lib.h>
41 #include <sys/apic_common.h>
42 
43 extern struct av_head autovect[];
44 
45 /*
46  *	Local Function Prototypes
47  */
48 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
49 
50 /*
51  * apic_pci_msi_enable_vector:
52  *	Set the address/data fields in the MSI/X capability structure
53  *	XXX: MSI-X support
54  */
55 /* ARGSUSED */
56 void
57 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
58     int count, int target_apic_id)
59 {
60 	uint64_t		msi_addr, msi_data;
61 	ushort_t		msi_ctrl;
62 	dev_info_t		*dip = irq_ptr->airq_dip;
63 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
64 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
65 	msi_regs_t		msi_regs;
66 	int			irqno, i;
67 	void			*intrmap_tbl[PCI_MSI_MAX_INTRS];
68 
69 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
70 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
71 	    ddi_driver_name(dip), inum, vector, target_apic_id));
72 
73 	ASSERT((handle != NULL) && (cap_ptr != 0));
74 
75 	msi_regs.mr_data = vector;
76 	msi_regs.mr_addr = target_apic_id;
77 
78 	for (i = 0; i < count; i++) {
79 		irqno = apic_vector_to_irq[vector + i];
80 		intrmap_tbl[i] = apic_irq_table[irqno]->airq_intrmap_private;
81 	}
82 	apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
83 	    count, 0xff);
84 	for (i = 0; i < count; i++) {
85 		irqno = apic_vector_to_irq[vector + i];
86 		apic_irq_table[irqno]->airq_intrmap_private =
87 		    intrmap_tbl[i];
88 	}
89 
90 	apic_vt_ops->apic_intrmap_map_entry(irq_ptr->airq_intrmap_private,
91 	    (void *)&msi_regs, type, count);
92 	apic_vt_ops->apic_intrmap_record_msi(irq_ptr->airq_intrmap_private,
93 	    &msi_regs);
94 
95 	/* MSI Address */
96 	msi_addr = msi_regs.mr_addr;
97 
98 	/* MSI Data: MSI is edge triggered according to spec */
99 	msi_data = msi_regs.mr_data;
100 
101 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
102 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
103 
104 	if (type == DDI_INTR_TYPE_MSI) {
105 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
106 
107 		/* Set the bits to inform how many MSIs are enabled */
108 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
109 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
110 
111 		/*
112 		 * Only set vector if not on hypervisor
113 		 */
114 		pci_config_put32(handle,
115 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
116 
117 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
118 			pci_config_put32(handle,
119 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
120 			pci_config_put16(handle,
121 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
122 		} else {
123 			pci_config_put16(handle,
124 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
125 		}
126 
127 	} else if (type == DDI_INTR_TYPE_MSIX) {
128 		uintptr_t	off;
129 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
130 
131 		ASSERT(msix_p != NULL);
132 
133 		/* Offset into the "inum"th entry in the MSI-X table */
134 		off = (uintptr_t)msix_p->msix_tbl_addr +
135 		    (inum  * PCI_MSIX_VECTOR_SIZE);
136 
137 		ddi_put32(msix_p->msix_tbl_hdl,
138 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
139 		ddi_put64(msix_p->msix_tbl_hdl,
140 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
141 	}
142 }
143 
144 /*
145  * This function returns the no. of vectors available for the pri.
146  * dip is not used at this moment.  If we really don't need that,
147  * it will be removed.
148  */
149 /*ARGSUSED*/
150 int
151 apic_navail_vector(dev_info_t *dip, int pri)
152 {
153 	int	lowest, highest, i, navail, count;
154 
155 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
156 	    (void *)dip, pri));
157 
158 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
159 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
160 	navail = count = 0;
161 
162 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
163 		lowest -= APIC_VECTOR_PER_IPL;
164 
165 	/* It has to be contiguous */
166 	for (i = lowest; i <= highest; i++) {
167 		count = 0;
168 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
169 		    (i <= highest)) {
170 			if (APIC_CHECK_RESERVE_VECTORS(i))
171 				break;
172 			count++;
173 			i++;
174 		}
175 		if (count > navail)
176 			navail = count;
177 	}
178 	return (navail);
179 }
180 
181 /*
182  * Finds "count" contiguous MSI vectors starting at the proper alignment
183  * at "pri".
184  * Caller needs to make sure that count has to be power of 2 and should not
185  * be < 1.
186  */
187 uchar_t
188 apic_find_multi_vectors(int pri, int count)
189 {
190 	int	lowest, highest, i, navail, start, msibits;
191 
192 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
193 	    pri, count));
194 
195 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
196 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
197 	navail = 0;
198 
199 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
200 		lowest -= APIC_VECTOR_PER_IPL;
201 
202 	/*
203 	 * msibits is the no. of lower order message data bits for the
204 	 * allocated MSI vectors and is used to calculate the aligned
205 	 * starting vector
206 	 */
207 	msibits = count - 1;
208 
209 	/* It has to be contiguous */
210 	for (i = lowest; i <= highest; i++) {
211 		navail = 0;
212 
213 		/*
214 		 * starting vector has to be aligned accordingly for
215 		 * multiple MSIs
216 		 */
217 		if (msibits)
218 			i = (i + msibits) & ~msibits;
219 		start = i;
220 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
221 		    (i <= highest)) {
222 			if (APIC_CHECK_RESERVE_VECTORS(i))
223 				break;
224 			navail++;
225 			if (navail >= count)
226 				return (start);
227 			i++;
228 		}
229 	}
230 	return (0);
231 }
232 
233 
234 /*
235  * It finds the apic_irq_t associates with the dip, ispec and type.
236  */
237 apic_irq_t *
238 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
239 {
240 	apic_irq_t	*irqp;
241 	int i;
242 
243 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
244 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
245 	    ispec->intrspec_pri, type));
246 
247 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
248 		for (irqp = apic_irq_table[i]; irqp; irqp = irqp->airq_next) {
249 			if ((irqp->airq_dip == dip) &&
250 			    (irqp->airq_origirq == ispec->intrspec_vec) &&
251 			    (irqp->airq_ipl == ispec->intrspec_pri)) {
252 				if (type == DDI_INTR_TYPE_MSI) {
253 					if (irqp->airq_mps_intr_index ==
254 					    MSI_INDEX)
255 						return (irqp);
256 				} else if (type == DDI_INTR_TYPE_MSIX) {
257 					if (irqp->airq_mps_intr_index ==
258 					    MSIX_INDEX)
259 						return (irqp);
260 				} else
261 					return (irqp);
262 			}
263 		}
264 	}
265 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
266 	return (NULL);
267 }
268 
269 /*
270  * This function will return the pending bit of the irqp.
271  * It either comes from the IRR register of the APIC or the RDT
272  * entry of the I/O APIC.
273  * For the IRR to work, it needs to be to its binding CPU
274  */
275 static int
276 apic_get_pending(apic_irq_t *irqp, int type)
277 {
278 	int			bit, index, irr, pending;
279 	int			intin_no;
280 	int			apic_ix;
281 
282 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
283 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
284 	    type));
285 
286 	/* need to get on the bound cpu */
287 	mutex_enter(&cpu_lock);
288 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
289 
290 	index = irqp->airq_vector / 32;
291 	bit = irqp->airq_vector % 32;
292 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
293 
294 	affinity_clear();
295 	mutex_exit(&cpu_lock);
296 
297 	pending = (irr & (1 << bit)) ? 1 : 0;
298 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
299 		/* check I/O APIC for fixed interrupt */
300 		intin_no = irqp->airq_intin_no;
301 		apic_ix = irqp->airq_ioapicindex;
302 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
303 		    AV_PENDING) ? 1 : 0;
304 	}
305 	return (pending);
306 }
307 
308 
309 /*
310  * This function will clear the mask for the interrupt on the I/O APIC
311  */
312 static void
313 apic_clear_mask(apic_irq_t *irqp)
314 {
315 	int			intin_no;
316 	ulong_t			iflag;
317 	int32_t			rdt_entry;
318 	int 			apic_ix;
319 
320 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
321 	    (void *)irqp));
322 
323 	intin_no = irqp->airq_intin_no;
324 	apic_ix = irqp->airq_ioapicindex;
325 
326 	iflag = intr_clear();
327 	lock_set(&apic_ioapic_lock);
328 
329 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
330 
331 	/* clear mask */
332 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
333 	    ((~AV_MASK) & rdt_entry));
334 
335 	lock_clear(&apic_ioapic_lock);
336 	intr_restore(iflag);
337 }
338 
339 
340 /*
341  * This function will mask the interrupt on the I/O APIC
342  */
343 static void
344 apic_set_mask(apic_irq_t *irqp)
345 {
346 	int			intin_no;
347 	int 			apic_ix;
348 	ulong_t			iflag;
349 	int32_t			rdt_entry;
350 
351 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
352 
353 	intin_no = irqp->airq_intin_no;
354 	apic_ix = irqp->airq_ioapicindex;
355 
356 	iflag = intr_clear();
357 
358 	lock_set(&apic_ioapic_lock);
359 
360 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
361 
362 	/* mask it */
363 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
364 	    (AV_MASK | rdt_entry));
365 
366 	lock_clear(&apic_ioapic_lock);
367 	intr_restore(iflag);
368 }
369 
370 
371 void
372 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
373 {
374 	int i;
375 	apic_irq_t *irqptr;
376 	struct intrspec ispec;
377 
378 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
379 	    "count: %x pri: %x type: %x\n",
380 	    (void *)dip, inum, count, pri, type));
381 
382 	/* for MSI/X only */
383 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
384 		return;
385 
386 	for (i = 0; i < count; i++) {
387 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
388 		    "pri=0x%x count=0x%x\n", inum, pri, count));
389 		ispec.intrspec_vec = inum + i;
390 		ispec.intrspec_pri = pri;
391 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
392 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
393 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
394 			    "failed\n", (void *)dip, inum, pri));
395 			continue;
396 		}
397 		irqptr->airq_mps_intr_index = FREE_INDEX;
398 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
399 	}
400 }
401 
402 /*
403  * apic_pci_msi_enable_mode:
404  */
405 void
406 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
407 {
408 	ushort_t		msi_ctrl;
409 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
410 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
411 
412 	ASSERT((handle != NULL) && (cap_ptr != 0));
413 
414 	if (type == DDI_INTR_TYPE_MSI) {
415 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
416 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
417 			return;
418 
419 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
420 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
421 
422 	} else if (type == DDI_INTR_TYPE_MSIX) {
423 		uintptr_t	off;
424 		uint32_t	mask;
425 		ddi_intr_msix_t	*msix_p;
426 
427 		msix_p = i_ddi_get_msix(rdip);
428 
429 		ASSERT(msix_p != NULL);
430 
431 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
432 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
433 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
434 
435 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
436 
437 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
438 
439 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
440 
441 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
442 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
443 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
444 			    msi_ctrl);
445 		}
446 	}
447 }
448 
449 static int
450 apic_set_cpu(int irqno, int cpu, int *result)
451 {
452 	apic_irq_t *irqp;
453 	ulong_t iflag;
454 	int ret;
455 
456 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
457 
458 	mutex_enter(&airq_mutex);
459 	irqp = apic_irq_table[irqno];
460 	mutex_exit(&airq_mutex);
461 
462 	if (irqp == NULL) {
463 		*result = ENXIO;
464 		return (PSM_FAILURE);
465 	}
466 
467 	/* Fail if this is an MSI intr and is part of a group. */
468 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
469 	    (irqp->airq_intin_no > 1)) {
470 		*result = ENXIO;
471 		return (PSM_FAILURE);
472 	}
473 
474 	iflag = intr_clear();
475 	lock_set(&apic_ioapic_lock);
476 
477 	ret = apic_rebind_all(irqp, cpu);
478 
479 	lock_clear(&apic_ioapic_lock);
480 	intr_restore(iflag);
481 
482 	if (ret) {
483 		*result = EIO;
484 		return (PSM_FAILURE);
485 	}
486 	/*
487 	 * keep tracking the default interrupt cpu binding
488 	 */
489 	irqp->airq_cpu = cpu;
490 
491 	*result = 0;
492 	return (PSM_SUCCESS);
493 }
494 
495 static int
496 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
497 {
498 	dev_info_t *orig_dip;
499 	uint32_t orig_cpu;
500 	ulong_t iflag;
501 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
502 	int i;
503 	int cap_ptr;
504 	int msi_mask_off;
505 	ushort_t msi_ctrl;
506 	uint32_t msi_pvm;
507 	ddi_acc_handle_t handle;
508 	int num_vectors = 0;
509 	uint32_t vector;
510 
511 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
512 
513 	/*
514 	 * Take mutex to insure that table doesn't change out from underneath
515 	 * us while we're playing with it.
516 	 */
517 	mutex_enter(&airq_mutex);
518 	irqps[0] = apic_irq_table[irqno];
519 	orig_cpu = irqps[0]->airq_temp_cpu;
520 	orig_dip = irqps[0]->airq_dip;
521 	num_vectors = irqps[0]->airq_intin_no;
522 	vector = irqps[0]->airq_vector;
523 
524 	/* A "group" of 1 */
525 	if (num_vectors == 1) {
526 		mutex_exit(&airq_mutex);
527 		return (apic_set_cpu(irqno, new_cpu, result));
528 	}
529 
530 	*result = ENXIO;
531 
532 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
533 		mutex_exit(&airq_mutex);
534 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
535 		goto set_grp_intr_done;
536 	}
537 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
538 		mutex_exit(&airq_mutex);
539 		DDI_INTR_IMPLDBG((CE_CONT,
540 		    "set_grp: base vec not part of a grp or not aligned: "
541 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
542 		goto set_grp_intr_done;
543 	}
544 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
545 	    num_vectors));
546 
547 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
548 
549 	*result = EIO;
550 
551 	/*
552 	 * All IRQ entries in the table for the given device will be not
553 	 * shared.  Since they are not shared, the dip in the table will
554 	 * be true to the device of interest.
555 	 */
556 	for (i = 1; i < num_vectors; i++) {
557 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
558 		if (irqps[i] == NULL) {
559 			mutex_exit(&airq_mutex);
560 			goto set_grp_intr_done;
561 		}
562 #ifdef DEBUG
563 		/* Sanity check: CPU and dip is the same for all entries. */
564 		if ((irqps[i]->airq_dip != orig_dip) ||
565 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
566 			mutex_exit(&airq_mutex);
567 			DDI_INTR_IMPLDBG((CE_CONT,
568 			    "set_grp: cpu or dip for vec 0x%x difft than for "
569 			    "vec 0x%x\n", vector, vector + i));
570 			DDI_INTR_IMPLDBG((CE_CONT,
571 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
572 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
573 			    (void *)irqps[i]->airq_dip));
574 			goto set_grp_intr_done;
575 		}
576 #endif /* DEBUG */
577 	}
578 	mutex_exit(&airq_mutex);
579 
580 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
581 	handle = i_ddi_get_pci_config_handle(orig_dip);
582 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
583 
584 	/* MSI Per vector masking is supported. */
585 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
586 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
587 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
588 		else
589 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
590 		msi_pvm = pci_config_get32(handle, msi_mask_off);
591 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
592 		DDI_INTR_IMPLDBG((CE_CONT,
593 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
594 		    pci_config_get32(handle, msi_mask_off)));
595 	}
596 
597 	iflag = intr_clear();
598 	lock_set(&apic_ioapic_lock);
599 
600 	/*
601 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
602 	 * an error if the CPU is not accepting interrupts.  If the first one
603 	 * succeeds they all will.
604 	 */
605 	if (apic_rebind_all(irqps[0], new_cpu))
606 		(void) apic_rebind_all(irqps[0], orig_cpu);
607 	else {
608 		irqps[0]->airq_cpu = new_cpu;
609 
610 		for (i = 1; i < num_vectors; i++) {
611 			(void) apic_rebind_all(irqps[i], new_cpu);
612 			irqps[i]->airq_cpu = new_cpu;
613 		}
614 		*result = 0;	/* SUCCESS */
615 	}
616 
617 	lock_clear(&apic_ioapic_lock);
618 	intr_restore(iflag);
619 
620 	/* Reenable vectors if per vector masking is supported. */
621 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
622 		pci_config_put32(handle, msi_mask_off, msi_pvm);
623 		DDI_INTR_IMPLDBG((CE_CONT,
624 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
625 		    pci_config_get32(handle, msi_mask_off)));
626 	}
627 
628 set_grp_intr_done:
629 	if (*result != 0)
630 		return (PSM_FAILURE);
631 
632 	return (PSM_SUCCESS);
633 }
634 
635 int
636 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
637 {
638 	struct autovec *av_dev;
639 	uchar_t irqno;
640 	int i;
641 	apic_irq_t *irq_p;
642 
643 	/* Sanity check the vector/irq argument. */
644 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
645 
646 	mutex_enter(&airq_mutex);
647 
648 	/*
649 	 * Convert the vecirq arg to an irq using vector_to_irq table
650 	 * if the arg is a vector.  Pass thru if already an irq.
651 	 */
652 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
653 	    PSMGI_INTRBY_VEC)
654 		irqno = apic_vector_to_irq[vecirq];
655 	else
656 		irqno = vecirq;
657 
658 	irq_p = apic_irq_table[irqno];
659 
660 	if ((irq_p == NULL) ||
661 	    ((irq_p->airq_mps_intr_index != RESERVE_INDEX) &&
662 	    ((irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
663 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)))) {
664 		mutex_exit(&airq_mutex);
665 		return (PSM_FAILURE);
666 	}
667 
668 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
669 
670 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
671 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
672 
673 		/* Return user bound info for intrd. */
674 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
675 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
676 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
677 		}
678 	}
679 
680 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
681 		intr_params_p->avgi_vector = irq_p->airq_vector;
682 
683 	if (intr_params_p->avgi_req_flags &
684 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
685 		/* Get number of devices from apic_irq table shared field. */
686 		intr_params_p->avgi_num_devs = irq_p->airq_share;
687 
688 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
689 
690 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
691 
692 		/* Some devices have NULL dip.  Don't count these. */
693 		if (intr_params_p->avgi_num_devs > 0) {
694 			for (i = 0, av_dev = autovect[irqno].avh_link;
695 			    av_dev; av_dev = av_dev->av_link)
696 				if (av_dev->av_vector && av_dev->av_dip)
697 					i++;
698 			intr_params_p->avgi_num_devs =
699 			    MIN(intr_params_p->avgi_num_devs, i);
700 		}
701 
702 		/* There are no viable dips to return. */
703 		if (intr_params_p->avgi_num_devs == 0)
704 			intr_params_p->avgi_dip_list = NULL;
705 
706 		else {	/* Return list of dips */
707 
708 			/* Allocate space in array for that number of devs. */
709 			intr_params_p->avgi_dip_list = kmem_zalloc(
710 			    intr_params_p->avgi_num_devs *
711 			    sizeof (dev_info_t *),
712 			    KM_SLEEP);
713 
714 			/*
715 			 * Loop through the device list of the autovec table
716 			 * filling in the dip array.
717 			 *
718 			 * Note that the autovect table may have some special
719 			 * entries which contain NULL dips.  These will be
720 			 * ignored.
721 			 */
722 			for (i = 0, av_dev = autovect[irqno].avh_link;
723 			    av_dev; av_dev = av_dev->av_link)
724 				if (av_dev->av_vector && av_dev->av_dip)
725 					intr_params_p->avgi_dip_list[i++] =
726 					    av_dev->av_dip;
727 		}
728 	}
729 
730 	mutex_exit(&airq_mutex);
731 
732 	return (PSM_SUCCESS);
733 }
734 
735 /*
736  * This function provides external interface to the nexus for all
737  * functionalities related to the new DDI interrupt framework.
738  *
739  * Input:
740  * dip     - pointer to the dev_info structure of the requested device
741  * hdlp    - pointer to the internal interrupt handle structure for the
742  *	     requested interrupt
743  * intr_op - opcode for this call
744  * result  - pointer to the integer that will hold the result to be
745  *	     passed back if return value is PSM_SUCCESS
746  *
747  * Output:
748  * return value is either PSM_SUCCESS or PSM_FAILURE
749  */
750 int
751 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
752     psm_intr_op_t intr_op, int *result)
753 {
754 	int		cap;
755 	int		count_vec;
756 	int		old_priority;
757 	int		new_priority;
758 	int		new_cpu;
759 	apic_irq_t	*irqp;
760 	struct intrspec *ispec, intr_spec;
761 
762 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
763 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
764 
765 	ispec = &intr_spec;
766 	ispec->intrspec_pri = hdlp->ih_pri;
767 	ispec->intrspec_vec = hdlp->ih_inum;
768 	ispec->intrspec_func = hdlp->ih_cb_func;
769 
770 	switch (intr_op) {
771 	case PSM_INTR_OP_CHECK_MSI:
772 		/*
773 		 * Check MSI/X is supported or not at APIC level and
774 		 * masked off the MSI/X bits in hdlp->ih_type if not
775 		 * supported before return.  If MSI/X is supported,
776 		 * leave the ih_type unchanged and return.
777 		 *
778 		 * hdlp->ih_type passed in from the nexus has all the
779 		 * interrupt types supported by the device.
780 		 */
781 		if (apic_support_msi == 0) {
782 			/*
783 			 * if apic_support_msi is not set, call
784 			 * apic_check_msi_support() to check whether msi
785 			 * is supported first
786 			 */
787 			if (apic_check_msi_support() == PSM_SUCCESS)
788 				apic_support_msi = 1;
789 			else
790 				apic_support_msi = -1;
791 		}
792 		if (apic_support_msi == 1) {
793 			if (apic_msix_enable)
794 				*result = hdlp->ih_type;
795 			else
796 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
797 		} else
798 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
799 			    DDI_INTR_TYPE_MSIX);
800 		break;
801 	case PSM_INTR_OP_ALLOC_VECTORS:
802 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
803 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
804 			    hdlp->ih_scratch1, hdlp->ih_pri,
805 			    (int)(uintptr_t)hdlp->ih_scratch2);
806 		else
807 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
808 			    hdlp->ih_scratch1, hdlp->ih_pri,
809 			    (int)(uintptr_t)hdlp->ih_scratch2);
810 		break;
811 	case PSM_INTR_OP_FREE_VECTORS:
812 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
813 		    hdlp->ih_pri, hdlp->ih_type);
814 		break;
815 	case PSM_INTR_OP_NAVAIL_VECTORS:
816 		*result = apic_navail_vector(dip, hdlp->ih_pri);
817 		break;
818 	case PSM_INTR_OP_XLATE_VECTOR:
819 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
820 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
821 		if (*result == -1)
822 			return (PSM_FAILURE);
823 		break;
824 	case PSM_INTR_OP_GET_PENDING:
825 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
826 			return (PSM_FAILURE);
827 		*result = apic_get_pending(irqp, hdlp->ih_type);
828 		break;
829 	case PSM_INTR_OP_CLEAR_MASK:
830 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
831 			return (PSM_FAILURE);
832 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
833 		if (irqp == NULL)
834 			return (PSM_FAILURE);
835 		apic_clear_mask(irqp);
836 		break;
837 	case PSM_INTR_OP_SET_MASK:
838 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
839 			return (PSM_FAILURE);
840 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
841 			return (PSM_FAILURE);
842 		apic_set_mask(irqp);
843 		break;
844 	case PSM_INTR_OP_GET_CAP:
845 		cap = DDI_INTR_FLAG_PENDING;
846 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
847 			cap |= DDI_INTR_FLAG_MASKABLE;
848 		*result = cap;
849 		break;
850 	case PSM_INTR_OP_GET_SHARED:
851 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
852 			return (PSM_FAILURE);
853 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
854 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
855 			return (PSM_FAILURE);
856 		*result = (irqp->airq_share > 1) ? 1: 0;
857 		break;
858 	case PSM_INTR_OP_SET_PRI:
859 		old_priority = hdlp->ih_pri;	/* save old value */
860 		new_priority = *(int *)result;	/* try the new value */
861 
862 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
863 			return (PSM_SUCCESS);
864 		}
865 
866 		/* Now allocate the vectors */
867 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI) {
868 			/* SET_PRI does not support the case of multiple MSI */
869 			if (i_ddi_intr_get_current_nintrs(hdlp->ih_dip) > 1)
870 				return (PSM_FAILURE);
871 
872 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
873 			    1, new_priority,
874 			    DDI_INTR_ALLOC_STRICT);
875 		} else {
876 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
877 			    1, new_priority,
878 			    DDI_INTR_ALLOC_STRICT);
879 		}
880 
881 		/* Did we get new vectors? */
882 		if (!count_vec)
883 			return (PSM_FAILURE);
884 
885 		/* Finally, free the previously allocated vectors */
886 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
887 		    old_priority, hdlp->ih_type);
888 		break;
889 	case PSM_INTR_OP_SET_CPU:
890 	case PSM_INTR_OP_GRP_SET_CPU:
891 		/*
892 		 * The interrupt handle given here has been allocated
893 		 * specifically for this command, and ih_private carries
894 		 * a CPU value.
895 		 */
896 		new_cpu = (int)(intptr_t)hdlp->ih_private;
897 		if (!apic_cpu_in_range(new_cpu)) {
898 			DDI_INTR_IMPLDBG((CE_CONT,
899 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
900 			*result = EINVAL;
901 			return (PSM_FAILURE);
902 		}
903 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
904 			DDI_INTR_IMPLDBG((CE_CONT,
905 			    "[grp_]set_cpu: vector out of range: %d\n",
906 			    hdlp->ih_vector));
907 			*result = EINVAL;
908 			return (PSM_FAILURE);
909 		}
910 		if ((hdlp->ih_flags & PSMGI_INTRBY_FLAGS) == PSMGI_INTRBY_VEC)
911 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
912 		if (intr_op == PSM_INTR_OP_SET_CPU) {
913 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
914 			    PSM_SUCCESS)
915 				return (PSM_FAILURE);
916 		} else {
917 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
918 			    result) != PSM_SUCCESS)
919 				return (PSM_FAILURE);
920 		}
921 		break;
922 	case PSM_INTR_OP_GET_INTR:
923 		/*
924 		 * The interrupt handle given here has been allocated
925 		 * specifically for this command, and ih_private carries
926 		 * a pointer to a apic_get_intr_t.
927 		 */
928 		if (apic_get_vector_intr_info(
929 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
930 			return (PSM_FAILURE);
931 		break;
932 	case PSM_INTR_OP_APIC_TYPE:
933 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
934 		    apic_get_apic_type();
935 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
936 		    APIC_MAX_VECTOR;
937 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
938 		    boot_ncpus;
939 		hdlp->ih_ver = apic_get_apic_version();
940 		break;
941 	case PSM_INTR_OP_SET_CAP:
942 	default:
943 		return (PSM_FAILURE);
944 	}
945 	return (PSM_SUCCESS);
946 }
947