xref: /illumos-gate/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision 3fe455549728ac525df3be56130ad8e075d645d7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Pluribus Networks, Inc.
24  * Copyright 2017 Joyent, Inc.
25  */
26 
27 /*
28  * apic_introp.c:
29  *	Has code for Advanced DDI interrupt framework support.
30  */
31 
32 #include <sys/cpuvar.h>
33 #include <sys/psm.h>
34 #include <sys/archsystm.h>
35 #include <sys/apic.h>
36 #include <sys/sunddi.h>
37 #include <sys/ddi_impldefs.h>
38 #include <sys/mach_intr.h>
39 #include <sys/sysmacros.h>
40 #include <sys/trap.h>
41 #include <sys/pci.h>
42 #include <sys/pci_intr_lib.h>
43 #include <sys/apic_common.h>
44 
45 extern struct av_head autovect[];
46 
47 /*
48  *	Local Function Prototypes
49  */
50 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
51 
52 /*
53  * apic_pci_msi_enable_vector:
54  *	Set the address/data fields in the MSI/X capability structure
55  *	XXX: MSI-X support
56  */
57 /* ARGSUSED */
58 void
59 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
60     int count, int target_apic_id)
61 {
62 	uint64_t		msi_addr, msi_data;
63 	ushort_t		msi_ctrl;
64 	dev_info_t		*dip = irq_ptr->airq_dip;
65 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
66 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
67 	msi_regs_t		msi_regs;
68 	int			irqno, i;
69 	void			*intrmap_tbl[PCI_MSI_MAX_INTRS];
70 
71 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
72 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
73 	    ddi_driver_name(dip), inum, vector, target_apic_id));
74 
75 	ASSERT((handle != NULL) && (cap_ptr != 0));
76 
77 	msi_regs.mr_data = vector;
78 	msi_regs.mr_addr = target_apic_id;
79 
80 	for (i = 0; i < count; i++) {
81 		irqno = apic_vector_to_irq[vector + i];
82 		intrmap_tbl[i] = apic_irq_table[irqno]->airq_intrmap_private;
83 	}
84 	apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
85 	    count, 0xff);
86 	for (i = 0; i < count; i++) {
87 		irqno = apic_vector_to_irq[vector + i];
88 		apic_irq_table[irqno]->airq_intrmap_private =
89 		    intrmap_tbl[i];
90 	}
91 
92 	apic_vt_ops->apic_intrmap_map_entry(irq_ptr->airq_intrmap_private,
93 	    (void *)&msi_regs, type, count);
94 	apic_vt_ops->apic_intrmap_record_msi(irq_ptr->airq_intrmap_private,
95 	    &msi_regs);
96 
97 	/* MSI Address */
98 	msi_addr = msi_regs.mr_addr;
99 
100 	/* MSI Data: MSI is edge triggered according to spec */
101 	msi_data = msi_regs.mr_data;
102 
103 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
104 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
105 
106 	if (type == DDI_INTR_TYPE_MSI) {
107 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
108 
109 		/* Set the bits to inform how many MSIs are enabled */
110 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
111 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
112 
113 		/*
114 		 * Only set vector if not on hypervisor
115 		 */
116 		pci_config_put32(handle,
117 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
118 
119 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
120 			pci_config_put32(handle,
121 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
122 			pci_config_put16(handle,
123 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
124 		} else {
125 			pci_config_put16(handle,
126 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
127 		}
128 
129 	} else if (type == DDI_INTR_TYPE_MSIX) {
130 		uintptr_t	off;
131 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
132 
133 		ASSERT(msix_p != NULL);
134 
135 		/* Offset into the "inum"th entry in the MSI-X table */
136 		off = (uintptr_t)msix_p->msix_tbl_addr +
137 		    (inum  * PCI_MSIX_VECTOR_SIZE);
138 
139 		ddi_put32(msix_p->msix_tbl_hdl,
140 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
141 		ddi_put32(msix_p->msix_tbl_hdl,
142 		    (uint32_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
143 		ddi_put32(msix_p->msix_tbl_hdl,
144 		    (uint32_t *)(off + PCI_MSIX_UPPER_ADDR_OFFSET),
145 		    msi_addr >> 32);
146 	}
147 }
148 
149 /*
150  * This function returns the no. of vectors available for the pri.
151  * dip is not used at this moment.  If we really don't need that,
152  * it will be removed.
153  */
154 /*ARGSUSED*/
155 int
156 apic_navail_vector(dev_info_t *dip, int pri)
157 {
158 	int	lowest, highest, i, navail, count;
159 
160 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
161 	    (void *)dip, pri));
162 
163 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
164 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
165 	navail = count = 0;
166 
167 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
168 		lowest -= APIC_VECTOR_PER_IPL;
169 
170 	/* It has to be contiguous */
171 	for (i = lowest; i <= highest; i++) {
172 		count = 0;
173 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
174 		    (i <= highest)) {
175 			if (APIC_CHECK_RESERVE_VECTORS(i))
176 				break;
177 			count++;
178 			i++;
179 		}
180 		if (count > navail)
181 			navail = count;
182 	}
183 	return (navail);
184 }
185 
186 /*
187  * Finds "count" contiguous MSI vectors starting at the proper alignment
188  * at "pri".
189  * Caller needs to make sure that count has to be power of 2 and should not
190  * be < 1.
191  */
192 uchar_t
193 apic_find_multi_vectors(int pri, int count)
194 {
195 	int	lowest, highest, i, navail, start, msibits;
196 
197 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
198 	    pri, count));
199 
200 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
201 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
202 	navail = 0;
203 
204 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
205 		lowest -= APIC_VECTOR_PER_IPL;
206 
207 	/*
208 	 * msibits is the no. of lower order message data bits for the
209 	 * allocated MSI vectors and is used to calculate the aligned
210 	 * starting vector
211 	 */
212 	msibits = count - 1;
213 
214 	/* It has to be contiguous */
215 	for (i = lowest; i <= highest; i++) {
216 		navail = 0;
217 
218 		/*
219 		 * starting vector has to be aligned accordingly for
220 		 * multiple MSIs
221 		 */
222 		if (msibits)
223 			i = (i + msibits) & ~msibits;
224 		start = i;
225 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
226 		    (i <= highest)) {
227 			if (APIC_CHECK_RESERVE_VECTORS(i))
228 				break;
229 			navail++;
230 			if (navail >= count) {
231 				ASSERT(start >= 0 && start <= UCHAR_MAX);
232 				return ((uchar_t)start);
233 			}
234 			i++;
235 		}
236 	}
237 	return (0);
238 }
239 
240 
241 /*
242  * It finds the apic_irq_t associates with the dip, ispec and type.
243  */
244 apic_irq_t *
245 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
246 {
247 	apic_irq_t	*irqp;
248 	int i;
249 
250 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
251 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
252 	    ispec->intrspec_pri, type));
253 
254 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
255 		for (irqp = apic_irq_table[i]; irqp; irqp = irqp->airq_next) {
256 			if ((irqp->airq_dip == dip) &&
257 			    (irqp->airq_origirq == ispec->intrspec_vec) &&
258 			    (irqp->airq_ipl == ispec->intrspec_pri)) {
259 				if (type == DDI_INTR_TYPE_MSI) {
260 					if (irqp->airq_mps_intr_index ==
261 					    MSI_INDEX)
262 						return (irqp);
263 				} else if (type == DDI_INTR_TYPE_MSIX) {
264 					if (irqp->airq_mps_intr_index ==
265 					    MSIX_INDEX)
266 						return (irqp);
267 				} else
268 					return (irqp);
269 			}
270 		}
271 	}
272 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
273 	return (NULL);
274 }
275 
276 /*
277  * This function will return the pending bit of the irqp.
278  * It either comes from the IRR register of the APIC or the RDT
279  * entry of the I/O APIC.
280  * For the IRR to work, it needs to be to its binding CPU
281  */
282 static int
283 apic_get_pending(apic_irq_t *irqp, int type)
284 {
285 	int			bit, index, irr, pending;
286 	int			intin_no;
287 	int			apic_ix;
288 
289 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
290 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
291 	    type));
292 
293 	/* need to get on the bound cpu */
294 	mutex_enter(&cpu_lock);
295 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
296 
297 	index = irqp->airq_vector / 32;
298 	bit = irqp->airq_vector % 32;
299 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
300 
301 	affinity_clear();
302 	mutex_exit(&cpu_lock);
303 
304 	pending = (irr & (1 << bit)) ? 1 : 0;
305 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
306 		/* check I/O APIC for fixed interrupt */
307 		intin_no = irqp->airq_intin_no;
308 		apic_ix = irqp->airq_ioapicindex;
309 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
310 		    AV_PENDING) ? 1 : 0;
311 	}
312 	return (pending);
313 }
314 
315 
316 /*
317  * This function will clear the mask for the interrupt on the I/O APIC
318  */
319 static void
320 apic_clear_mask(apic_irq_t *irqp)
321 {
322 	int			intin_no;
323 	ulong_t			iflag;
324 	int32_t			rdt_entry;
325 	int			apic_ix;
326 
327 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
328 	    (void *)irqp));
329 
330 	intin_no = irqp->airq_intin_no;
331 	apic_ix = irqp->airq_ioapicindex;
332 
333 	iflag = intr_clear();
334 	lock_set(&apic_ioapic_lock);
335 
336 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
337 
338 	/* clear mask */
339 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
340 	    ((~AV_MASK) & rdt_entry));
341 
342 	lock_clear(&apic_ioapic_lock);
343 	intr_restore(iflag);
344 }
345 
346 
347 /*
348  * This function will mask the interrupt on the I/O APIC
349  */
350 static void
351 apic_set_mask(apic_irq_t *irqp)
352 {
353 	int			intin_no;
354 	int			apic_ix;
355 	ulong_t			iflag;
356 	int32_t			rdt_entry;
357 
358 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
359 
360 	intin_no = irqp->airq_intin_no;
361 	apic_ix = irqp->airq_ioapicindex;
362 
363 	iflag = intr_clear();
364 
365 	lock_set(&apic_ioapic_lock);
366 
367 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
368 
369 	/* mask it */
370 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
371 	    (AV_MASK | rdt_entry));
372 
373 	lock_clear(&apic_ioapic_lock);
374 	intr_restore(iflag);
375 }
376 
377 
378 void
379 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
380 {
381 	int i;
382 	apic_irq_t *irqptr;
383 	struct intrspec ispec;
384 
385 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
386 	    "count: %x pri: %x type: %x\n",
387 	    (void *)dip, inum, count, pri, type));
388 
389 	/* for MSI/X only */
390 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
391 		return;
392 
393 	for (i = 0; i < count; i++) {
394 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
395 		    "pri=0x%x count=0x%x\n", inum, pri, count));
396 		ispec.intrspec_vec = inum + i;
397 		ispec.intrspec_pri = pri;
398 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
399 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
400 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
401 			    "failed\n", (void *)dip, inum, pri));
402 			continue;
403 		}
404 		irqptr->airq_mps_intr_index = FREE_INDEX;
405 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
406 	}
407 }
408 
409 /*
410  * apic_pci_msi_enable_mode:
411  */
412 void
413 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
414 {
415 	ushort_t		msi_ctrl;
416 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
417 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
418 
419 	ASSERT((handle != NULL) && (cap_ptr != 0));
420 
421 	if (type == DDI_INTR_TYPE_MSI) {
422 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
423 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
424 			return;
425 
426 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
427 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
428 
429 	} else if (type == DDI_INTR_TYPE_MSIX) {
430 		uintptr_t	off;
431 		uint32_t	mask;
432 		ddi_intr_msix_t	*msix_p;
433 
434 		msix_p = i_ddi_get_msix(rdip);
435 
436 		ASSERT(msix_p != NULL);
437 
438 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
439 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
440 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
441 
442 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
443 
444 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
445 
446 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
447 
448 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
449 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
450 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
451 			    msi_ctrl);
452 		}
453 	}
454 }
455 
456 static int
457 apic_set_cpu(int irqno, int cpu, int *result)
458 {
459 	apic_irq_t *irqp;
460 	ulong_t iflag;
461 	int ret;
462 
463 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
464 
465 	mutex_enter(&airq_mutex);
466 	irqp = apic_irq_table[irqno];
467 	mutex_exit(&airq_mutex);
468 
469 	if (irqp == NULL) {
470 		*result = ENXIO;
471 		return (PSM_FAILURE);
472 	}
473 
474 	/* Fail if this is an MSI intr and is part of a group. */
475 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
476 	    (irqp->airq_intin_no > 1)) {
477 		*result = ENXIO;
478 		return (PSM_FAILURE);
479 	}
480 
481 	iflag = intr_clear();
482 	lock_set(&apic_ioapic_lock);
483 
484 	ret = apic_rebind_all(irqp, cpu);
485 
486 	lock_clear(&apic_ioapic_lock);
487 	intr_restore(iflag);
488 
489 	if (ret) {
490 		*result = EIO;
491 		return (PSM_FAILURE);
492 	}
493 	/*
494 	 * keep tracking the default interrupt cpu binding
495 	 */
496 	irqp->airq_cpu = cpu;
497 
498 	*result = 0;
499 	return (PSM_SUCCESS);
500 }
501 
502 static int
503 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
504 {
505 	dev_info_t *orig_dip;
506 	uint32_t orig_cpu;
507 	ulong_t iflag;
508 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
509 	int i;
510 	int cap_ptr;
511 	int msi_mask_off = 0;
512 	ushort_t msi_ctrl;
513 	uint32_t msi_pvm = 0;
514 	ddi_acc_handle_t handle;
515 	int num_vectors = 0;
516 	uint32_t vector;
517 
518 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
519 
520 	/*
521 	 * Take mutex to insure that table doesn't change out from underneath
522 	 * us while we're playing with it.
523 	 */
524 	mutex_enter(&airq_mutex);
525 	irqps[0] = apic_irq_table[irqno];
526 	orig_cpu = irqps[0]->airq_temp_cpu;
527 	orig_dip = irqps[0]->airq_dip;
528 	num_vectors = irqps[0]->airq_intin_no;
529 	vector = irqps[0]->airq_vector;
530 
531 	/* A "group" of 1 */
532 	if (num_vectors == 1) {
533 		mutex_exit(&airq_mutex);
534 		return (apic_set_cpu(irqno, new_cpu, result));
535 	}
536 
537 	*result = ENXIO;
538 
539 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
540 		mutex_exit(&airq_mutex);
541 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
542 		goto set_grp_intr_done;
543 	}
544 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
545 		mutex_exit(&airq_mutex);
546 		DDI_INTR_IMPLDBG((CE_CONT,
547 		    "set_grp: base vec not part of a grp or not aligned: "
548 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
549 		goto set_grp_intr_done;
550 	}
551 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
552 	    num_vectors));
553 
554 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
555 
556 	*result = EIO;
557 
558 	/*
559 	 * All IRQ entries in the table for the given device will be not
560 	 * shared.  Since they are not shared, the dip in the table will
561 	 * be true to the device of interest.
562 	 */
563 	for (i = 1; i < num_vectors; i++) {
564 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
565 		if (irqps[i] == NULL) {
566 			mutex_exit(&airq_mutex);
567 			goto set_grp_intr_done;
568 		}
569 #ifdef DEBUG
570 		/* Sanity check: CPU and dip is the same for all entries. */
571 		if ((irqps[i]->airq_dip != orig_dip) ||
572 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
573 			mutex_exit(&airq_mutex);
574 			DDI_INTR_IMPLDBG((CE_CONT,
575 			    "set_grp: cpu or dip for vec 0x%x difft than for "
576 			    "vec 0x%x\n", vector, vector + i));
577 			DDI_INTR_IMPLDBG((CE_CONT,
578 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
579 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
580 			    (void *)irqps[i]->airq_dip));
581 			goto set_grp_intr_done;
582 		}
583 #endif /* DEBUG */
584 	}
585 	mutex_exit(&airq_mutex);
586 
587 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
588 	handle = i_ddi_get_pci_config_handle(orig_dip);
589 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
590 
591 	/* MSI Per vector masking is supported. */
592 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
593 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
594 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
595 		else
596 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
597 		msi_pvm = pci_config_get32(handle, msi_mask_off);
598 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
599 		DDI_INTR_IMPLDBG((CE_CONT,
600 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
601 		    pci_config_get32(handle, msi_mask_off)));
602 	}
603 
604 	iflag = intr_clear();
605 	lock_set(&apic_ioapic_lock);
606 
607 	/*
608 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
609 	 * an error if the CPU is not accepting interrupts.  If the first one
610 	 * succeeds they all will.
611 	 */
612 	if (apic_rebind_all(irqps[0], new_cpu))
613 		(void) apic_rebind_all(irqps[0], orig_cpu);
614 	else {
615 		irqps[0]->airq_cpu = new_cpu;
616 
617 		for (i = 1; i < num_vectors; i++) {
618 			(void) apic_rebind_all(irqps[i], new_cpu);
619 			irqps[i]->airq_cpu = new_cpu;
620 		}
621 		*result = 0;	/* SUCCESS */
622 	}
623 
624 	lock_clear(&apic_ioapic_lock);
625 	intr_restore(iflag);
626 
627 	/* Reenable vectors if per vector masking is supported. */
628 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
629 		pci_config_put32(handle, msi_mask_off, msi_pvm);
630 		DDI_INTR_IMPLDBG((CE_CONT,
631 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
632 		    pci_config_get32(handle, msi_mask_off)));
633 	}
634 
635 set_grp_intr_done:
636 	if (*result != 0)
637 		return (PSM_FAILURE);
638 
639 	return (PSM_SUCCESS);
640 }
641 
642 int
643 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
644 {
645 	struct autovec *av_dev;
646 	uchar_t irqno;
647 	uint_t i;
648 	apic_irq_t *irq_p;
649 
650 	/* Sanity check the vector/irq argument. */
651 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
652 
653 	mutex_enter(&airq_mutex);
654 
655 	/*
656 	 * Convert the vecirq arg to an irq using vector_to_irq table
657 	 * if the arg is a vector.  Pass thru if already an irq.
658 	 */
659 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
660 	    PSMGI_INTRBY_VEC)
661 		irqno = apic_vector_to_irq[vecirq];
662 	else
663 		irqno = (uchar_t)vecirq;
664 
665 	irq_p = apic_irq_table[irqno];
666 
667 	if ((irq_p == NULL) ||
668 	    ((irq_p->airq_mps_intr_index != RESERVE_INDEX) &&
669 	    ((irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
670 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)))) {
671 		mutex_exit(&airq_mutex);
672 		return (PSM_FAILURE);
673 	}
674 
675 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
676 
677 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
678 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
679 
680 		/* Return user bound info for intrd. */
681 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
682 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
683 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
684 		}
685 	}
686 
687 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
688 		intr_params_p->avgi_vector = irq_p->airq_vector;
689 
690 	if (intr_params_p->avgi_req_flags &
691 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
692 		/* Get number of devices from apic_irq table shared field. */
693 		intr_params_p->avgi_num_devs = irq_p->airq_share;
694 
695 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
696 
697 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
698 
699 		/* Some devices have NULL dip.  Don't count these. */
700 		if (intr_params_p->avgi_num_devs > 0) {
701 			for (i = 0, av_dev = autovect[irqno].avh_link;
702 			    av_dev; av_dev = av_dev->av_link)
703 				if (av_dev->av_vector && av_dev->av_dip)
704 					i++;
705 			intr_params_p->avgi_num_devs =
706 			    (uchar_t)MIN(intr_params_p->avgi_num_devs, i);
707 		}
708 
709 		/* There are no viable dips to return. */
710 		if (intr_params_p->avgi_num_devs == 0)
711 			intr_params_p->avgi_dip_list = NULL;
712 
713 		else {	/* Return list of dips */
714 
715 			/* Allocate space in array for that number of devs. */
716 			intr_params_p->avgi_dip_list = kmem_zalloc(
717 			    intr_params_p->avgi_num_devs *
718 			    sizeof (dev_info_t *),
719 			    KM_SLEEP);
720 
721 			/*
722 			 * Loop through the device list of the autovec table
723 			 * filling in the dip array.
724 			 *
725 			 * Note that the autovect table may have some special
726 			 * entries which contain NULL dips.  These will be
727 			 * ignored.
728 			 */
729 			for (i = 0, av_dev = autovect[irqno].avh_link;
730 			    av_dev; av_dev = av_dev->av_link)
731 				if (av_dev->av_vector && av_dev->av_dip)
732 					intr_params_p->avgi_dip_list[i++] =
733 					    av_dev->av_dip;
734 		}
735 	}
736 
737 	mutex_exit(&airq_mutex);
738 
739 	return (PSM_SUCCESS);
740 }
741 
742 /*
743  * This function provides external interface to the nexus for all
744  * functionalities related to the new DDI interrupt framework.
745  *
746  * Input:
747  * dip     - pointer to the dev_info structure of the requested device
748  * hdlp    - pointer to the internal interrupt handle structure for the
749  *	     requested interrupt
750  * intr_op - opcode for this call
751  * result  - pointer to the integer that will hold the result to be
752  *	     passed back if return value is PSM_SUCCESS
753  *
754  * Output:
755  * return value is either PSM_SUCCESS or PSM_FAILURE
756  */
757 int
758 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
759     psm_intr_op_t intr_op, int *result)
760 {
761 	int		cap;
762 	int		count_vec;
763 	int		old_priority;
764 	int		new_priority;
765 	int		new_cpu;
766 	apic_irq_t	*irqp;
767 	struct intrspec *ispec, intr_spec;
768 
769 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
770 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
771 
772 	ispec = &intr_spec;
773 	ispec->intrspec_pri = hdlp->ih_pri;
774 	ispec->intrspec_vec = hdlp->ih_inum;
775 	ispec->intrspec_func = hdlp->ih_cb_func;
776 
777 	switch (intr_op) {
778 	case PSM_INTR_OP_CHECK_MSI:
779 		/*
780 		 * Check MSI/X is supported or not at APIC level and
781 		 * masked off the MSI/X bits in hdlp->ih_type if not
782 		 * supported before return.  If MSI/X is supported,
783 		 * leave the ih_type unchanged and return.
784 		 *
785 		 * hdlp->ih_type passed in from the nexus has all the
786 		 * interrupt types supported by the device.
787 		 */
788 		if (apic_support_msi == 0) {
789 			/*
790 			 * if apic_support_msi is not set, call
791 			 * apic_check_msi_support() to check whether msi
792 			 * is supported first
793 			 */
794 			if (apic_check_msi_support() == PSM_SUCCESS)
795 				apic_support_msi = 1;
796 			else
797 				apic_support_msi = -1;
798 		}
799 		if (apic_support_msi == 1) {
800 			if (apic_msix_enable)
801 				*result = hdlp->ih_type;
802 			else
803 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
804 		} else
805 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
806 			    DDI_INTR_TYPE_MSIX);
807 		break;
808 	case PSM_INTR_OP_ALLOC_VECTORS:
809 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
810 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
811 			    hdlp->ih_scratch1, hdlp->ih_pri,
812 			    (int)(uintptr_t)hdlp->ih_scratch2);
813 		else
814 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
815 			    hdlp->ih_scratch1, hdlp->ih_pri,
816 			    (int)(uintptr_t)hdlp->ih_scratch2);
817 		break;
818 	case PSM_INTR_OP_FREE_VECTORS:
819 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
820 		    hdlp->ih_pri, hdlp->ih_type);
821 		break;
822 	case PSM_INTR_OP_NAVAIL_VECTORS:
823 		*result = apic_navail_vector(dip, hdlp->ih_pri);
824 		break;
825 	case PSM_INTR_OP_XLATE_VECTOR:
826 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
827 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
828 		if (*result == -1)
829 			return (PSM_FAILURE);
830 		break;
831 	case PSM_INTR_OP_GET_PENDING:
832 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
833 			return (PSM_FAILURE);
834 		*result = apic_get_pending(irqp, hdlp->ih_type);
835 		break;
836 	case PSM_INTR_OP_CLEAR_MASK:
837 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
838 			return (PSM_FAILURE);
839 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
840 		if (irqp == NULL)
841 			return (PSM_FAILURE);
842 		apic_clear_mask(irqp);
843 		break;
844 	case PSM_INTR_OP_SET_MASK:
845 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
846 			return (PSM_FAILURE);
847 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
848 			return (PSM_FAILURE);
849 		apic_set_mask(irqp);
850 		break;
851 	case PSM_INTR_OP_GET_CAP:
852 		cap = DDI_INTR_FLAG_PENDING;
853 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
854 			cap |= DDI_INTR_FLAG_MASKABLE;
855 		*result = cap;
856 		break;
857 	case PSM_INTR_OP_GET_SHARED:
858 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
859 			return (PSM_FAILURE);
860 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
861 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
862 			return (PSM_FAILURE);
863 		*result = (irqp->airq_share > 1) ? 1: 0;
864 		break;
865 	case PSM_INTR_OP_SET_PRI:
866 		old_priority = hdlp->ih_pri;	/* save old value */
867 		new_priority = *(int *)result;	/* try the new value */
868 
869 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
870 			return (PSM_SUCCESS);
871 		}
872 
873 		/* Now allocate the vectors */
874 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI) {
875 			/* SET_PRI does not support the case of multiple MSI */
876 			if (i_ddi_intr_get_current_nintrs(hdlp->ih_dip) > 1)
877 				return (PSM_FAILURE);
878 
879 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
880 			    1, new_priority,
881 			    DDI_INTR_ALLOC_STRICT);
882 		} else {
883 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
884 			    1, new_priority,
885 			    DDI_INTR_ALLOC_STRICT);
886 		}
887 
888 		/* Did we get new vectors? */
889 		if (!count_vec)
890 			return (PSM_FAILURE);
891 
892 		/* Finally, free the previously allocated vectors */
893 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
894 		    old_priority, hdlp->ih_type);
895 		break;
896 	case PSM_INTR_OP_SET_CPU:
897 	case PSM_INTR_OP_GRP_SET_CPU:
898 		/*
899 		 * The interrupt handle given here has been allocated
900 		 * specifically for this command, and ih_private carries
901 		 * a CPU value.
902 		 */
903 		new_cpu = (int)(intptr_t)hdlp->ih_private;
904 		if (!apic_cpu_in_range(new_cpu)) {
905 			DDI_INTR_IMPLDBG((CE_CONT,
906 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
907 			*result = EINVAL;
908 			return (PSM_FAILURE);
909 		}
910 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
911 			DDI_INTR_IMPLDBG((CE_CONT,
912 			    "[grp_]set_cpu: vector out of range: %d\n",
913 			    hdlp->ih_vector));
914 			*result = EINVAL;
915 			return (PSM_FAILURE);
916 		}
917 		if ((hdlp->ih_flags & PSMGI_INTRBY_FLAGS) == PSMGI_INTRBY_VEC)
918 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
919 		if (intr_op == PSM_INTR_OP_SET_CPU) {
920 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
921 			    PSM_SUCCESS)
922 				return (PSM_FAILURE);
923 		} else {
924 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
925 			    result) != PSM_SUCCESS)
926 				return (PSM_FAILURE);
927 		}
928 		break;
929 	case PSM_INTR_OP_GET_INTR:
930 		/*
931 		 * The interrupt handle given here has been allocated
932 		 * specifically for this command, and ih_private carries
933 		 * a pointer to a apic_get_intr_t.
934 		 */
935 		if (apic_get_vector_intr_info(
936 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
937 			return (PSM_FAILURE);
938 		break;
939 	case PSM_INTR_OP_APIC_TYPE:
940 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
941 		    apic_get_apic_type();
942 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
943 		    APIC_MAX_VECTOR;
944 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
945 		    boot_ncpus;
946 		hdlp->ih_ver = apic_get_apic_version();
947 		break;
948 	case PSM_INTR_OP_SET_CAP:
949 	default:
950 		return (PSM_FAILURE);
951 	}
952 	return (PSM_SUCCESS);
953 }
954