xref: /illumos-gate/usr/src/uts/i86pc/io/pcplusmp/apic_introp.c (revision bde334a8dbd66dfa70ce4d7fc9dcad6e1ae45fe4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Pluribus Networks, Inc.
24  * Copyright 2017 Joyent, Inc.
25  */
26 
27 /*
28  * apic_introp.c:
29  *	Has code for Advanced DDI interrupt framework support.
30  */
31 
32 #include <sys/cpuvar.h>
33 #include <sys/psm.h>
34 #include <sys/archsystm.h>
35 #include <sys/apic.h>
36 #include <sys/sunddi.h>
37 #include <sys/ddi_impldefs.h>
38 #include <sys/mach_intr.h>
39 #include <sys/sysmacros.h>
40 #include <sys/trap.h>
41 #include <sys/pci.h>
42 #include <sys/pci_intr_lib.h>
43 #include <sys/apic_common.h>
44 
45 #define	UCHAR_MAX	UINT8_MAX
46 
47 extern struct av_head autovect[];
48 
49 /*
50  *	Local Function Prototypes
51  */
52 apic_irq_t	*apic_find_irq(dev_info_t *, struct intrspec *, int);
53 
54 /*
55  * apic_pci_msi_enable_vector:
56  *	Set the address/data fields in the MSI/X capability structure
57  *	XXX: MSI-X support
58  */
59 /* ARGSUSED */
60 void
61 apic_pci_msi_enable_vector(apic_irq_t *irq_ptr, int type, int inum, int vector,
62     int count, int target_apic_id)
63 {
64 	uint64_t		msi_addr, msi_data;
65 	ushort_t		msi_ctrl;
66 	dev_info_t		*dip = irq_ptr->airq_dip;
67 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
68 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
69 	msi_regs_t		msi_regs;
70 	int			irqno, i;
71 	void			*intrmap_tbl[PCI_MSI_MAX_INTRS];
72 
73 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: dip=0x%p\n"
74 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
75 	    ddi_driver_name(dip), inum, vector, target_apic_id));
76 
77 	ASSERT((handle != NULL) && (cap_ptr != 0));
78 
79 	msi_regs.mr_data = vector;
80 	msi_regs.mr_addr = target_apic_id;
81 
82 	for (i = 0; i < count; i++) {
83 		irqno = apic_vector_to_irq[vector + i];
84 		intrmap_tbl[i] = apic_irq_table[irqno]->airq_intrmap_private;
85 	}
86 	apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
87 	    count, 0xff);
88 	for (i = 0; i < count; i++) {
89 		irqno = apic_vector_to_irq[vector + i];
90 		apic_irq_table[irqno]->airq_intrmap_private =
91 		    intrmap_tbl[i];
92 	}
93 
94 	apic_vt_ops->apic_intrmap_map_entry(irq_ptr->airq_intrmap_private,
95 	    (void *)&msi_regs, type, count);
96 	apic_vt_ops->apic_intrmap_record_msi(irq_ptr->airq_intrmap_private,
97 	    &msi_regs);
98 
99 	/* MSI Address */
100 	msi_addr = msi_regs.mr_addr;
101 
102 	/* MSI Data: MSI is edge triggered according to spec */
103 	msi_data = msi_regs.mr_data;
104 
105 	DDI_INTR_IMPLDBG((CE_CONT, "apic_pci_msi_enable_vector: addr=0x%lx "
106 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
107 
108 	if (type == DDI_INTR_TYPE_MSI) {
109 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
110 
111 		/* Set the bits to inform how many MSIs are enabled */
112 		msi_ctrl |= ((highbit(count) -1) << PCI_MSI_MME_SHIFT);
113 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
114 
115 		/*
116 		 * Only set vector if not on hypervisor
117 		 */
118 		pci_config_put32(handle,
119 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
120 
121 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
122 			pci_config_put32(handle,
123 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
124 			pci_config_put16(handle,
125 			    cap_ptr + PCI_MSI_64BIT_DATA, msi_data);
126 		} else {
127 			pci_config_put16(handle,
128 			    cap_ptr + PCI_MSI_32BIT_DATA, msi_data);
129 		}
130 
131 	} else if (type == DDI_INTR_TYPE_MSIX) {
132 		uintptr_t	off;
133 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
134 
135 		ASSERT(msix_p != NULL);
136 
137 		/* Offset into the "inum"th entry in the MSI-X table */
138 		off = (uintptr_t)msix_p->msix_tbl_addr +
139 		    (inum  * PCI_MSIX_VECTOR_SIZE);
140 
141 		ddi_put32(msix_p->msix_tbl_hdl,
142 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
143 		ddi_put32(msix_p->msix_tbl_hdl,
144 		    (uint32_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
145 		ddi_put32(msix_p->msix_tbl_hdl,
146 		    (uint32_t *)(off + PCI_MSIX_UPPER_ADDR_OFFSET),
147 		    msi_addr >> 32);
148 	}
149 }
150 
151 /*
152  * This function returns the no. of vectors available for the pri.
153  * dip is not used at this moment.  If we really don't need that,
154  * it will be removed.
155  */
156 /*ARGSUSED*/
157 int
158 apic_navail_vector(dev_info_t *dip, int pri)
159 {
160 	int	lowest, highest, i, navail, count;
161 
162 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
163 	    (void *)dip, pri));
164 
165 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
166 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
167 	navail = count = 0;
168 
169 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
170 		lowest -= APIC_VECTOR_PER_IPL;
171 
172 	/* It has to be contiguous */
173 	for (i = lowest; i <= highest; i++) {
174 		count = 0;
175 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
176 		    (i <= highest)) {
177 			if (APIC_CHECK_RESERVE_VECTORS(i))
178 				break;
179 			count++;
180 			i++;
181 		}
182 		if (count > navail)
183 			navail = count;
184 	}
185 	return (navail);
186 }
187 
188 /*
189  * Finds "count" contiguous MSI vectors starting at the proper alignment
190  * at "pri".
191  * Caller needs to make sure that count has to be power of 2 and should not
192  * be < 1.
193  */
194 uchar_t
195 apic_find_multi_vectors(int pri, int count)
196 {
197 	int	lowest, highest, i, navail, start, msibits;
198 
199 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_mult: pri: %x, count: %x\n",
200 	    pri, count));
201 
202 	highest = apic_ipltopri[pri] + APIC_VECTOR_MASK;
203 	lowest = apic_ipltopri[pri - 1] + APIC_VECTOR_PER_IPL;
204 	navail = 0;
205 
206 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
207 		lowest -= APIC_VECTOR_PER_IPL;
208 
209 	/*
210 	 * msibits is the no. of lower order message data bits for the
211 	 * allocated MSI vectors and is used to calculate the aligned
212 	 * starting vector
213 	 */
214 	msibits = count - 1;
215 
216 	/* It has to be contiguous */
217 	for (i = lowest; i <= highest; i++) {
218 		navail = 0;
219 
220 		/*
221 		 * starting vector has to be aligned accordingly for
222 		 * multiple MSIs
223 		 */
224 		if (msibits)
225 			i = (i + msibits) & ~msibits;
226 		start = i;
227 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
228 		    (i <= highest)) {
229 			if (APIC_CHECK_RESERVE_VECTORS(i))
230 				break;
231 			navail++;
232 			if (navail >= count) {
233 				ASSERT(start >= 0 && start <= UCHAR_MAX);
234 				return ((uchar_t)start);
235 			}
236 			i++;
237 		}
238 	}
239 	return (0);
240 }
241 
242 
243 /*
244  * It finds the apic_irq_t associates with the dip, ispec and type.
245  */
246 apic_irq_t *
247 apic_find_irq(dev_info_t *dip, struct intrspec *ispec, int type)
248 {
249 	apic_irq_t	*irqp;
250 	int i;
251 
252 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: dip=0x%p vec=0x%x "
253 	    "ipl=0x%x type=0x%x\n", (void *)dip, ispec->intrspec_vec,
254 	    ispec->intrspec_pri, type));
255 
256 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
257 		for (irqp = apic_irq_table[i]; irqp; irqp = irqp->airq_next) {
258 			if ((irqp->airq_dip == dip) &&
259 			    (irqp->airq_origirq == ispec->intrspec_vec) &&
260 			    (irqp->airq_ipl == ispec->intrspec_pri)) {
261 				if (type == DDI_INTR_TYPE_MSI) {
262 					if (irqp->airq_mps_intr_index ==
263 					    MSI_INDEX)
264 						return (irqp);
265 				} else if (type == DDI_INTR_TYPE_MSIX) {
266 					if (irqp->airq_mps_intr_index ==
267 					    MSIX_INDEX)
268 						return (irqp);
269 				} else
270 					return (irqp);
271 			}
272 		}
273 	}
274 	DDI_INTR_IMPLDBG((CE_CONT, "apic_find_irq: return NULL\n"));
275 	return (NULL);
276 }
277 
278 /*
279  * This function will return the pending bit of the irqp.
280  * It either comes from the IRR register of the APIC or the RDT
281  * entry of the I/O APIC.
282  * For the IRR to work, it needs to be to its binding CPU
283  */
284 static int
285 apic_get_pending(apic_irq_t *irqp, int type)
286 {
287 	int			bit, index, irr, pending;
288 	int			intin_no;
289 	int			apic_ix;
290 
291 	DDI_INTR_IMPLDBG((CE_CONT, "apic_get_pending: irqp: %p, cpuid: %x "
292 	    "type: %x\n", (void *)irqp, irqp->airq_cpu & ~IRQ_USER_BOUND,
293 	    type));
294 
295 	/* need to get on the bound cpu */
296 	mutex_enter(&cpu_lock);
297 	affinity_set(irqp->airq_cpu & ~IRQ_USER_BOUND);
298 
299 	index = irqp->airq_vector / 32;
300 	bit = irqp->airq_vector % 32;
301 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
302 
303 	affinity_clear();
304 	mutex_exit(&cpu_lock);
305 
306 	pending = (irr & (1 << bit)) ? 1 : 0;
307 	if (!pending && (type == DDI_INTR_TYPE_FIXED)) {
308 		/* check I/O APIC for fixed interrupt */
309 		intin_no = irqp->airq_intin_no;
310 		apic_ix = irqp->airq_ioapicindex;
311 		pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no) &
312 		    AV_PENDING) ? 1 : 0;
313 	}
314 	return (pending);
315 }
316 
317 
318 /*
319  * This function will clear the mask for the interrupt on the I/O APIC
320  */
321 static void
322 apic_clear_mask(apic_irq_t *irqp)
323 {
324 	int			intin_no;
325 	ulong_t			iflag;
326 	int32_t			rdt_entry;
327 	int 			apic_ix;
328 
329 	DDI_INTR_IMPLDBG((CE_CONT, "apic_clear_mask: irqp: %p\n",
330 	    (void *)irqp));
331 
332 	intin_no = irqp->airq_intin_no;
333 	apic_ix = irqp->airq_ioapicindex;
334 
335 	iflag = intr_clear();
336 	lock_set(&apic_ioapic_lock);
337 
338 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
339 
340 	/* clear mask */
341 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
342 	    ((~AV_MASK) & rdt_entry));
343 
344 	lock_clear(&apic_ioapic_lock);
345 	intr_restore(iflag);
346 }
347 
348 
349 /*
350  * This function will mask the interrupt on the I/O APIC
351  */
352 static void
353 apic_set_mask(apic_irq_t *irqp)
354 {
355 	int			intin_no;
356 	int 			apic_ix;
357 	ulong_t			iflag;
358 	int32_t			rdt_entry;
359 
360 	DDI_INTR_IMPLDBG((CE_CONT, "apic_set_mask: irqp: %p\n", (void *)irqp));
361 
362 	intin_no = irqp->airq_intin_no;
363 	apic_ix = irqp->airq_ioapicindex;
364 
365 	iflag = intr_clear();
366 
367 	lock_set(&apic_ioapic_lock);
368 
369 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no);
370 
371 	/* mask it */
372 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_no,
373 	    (AV_MASK | rdt_entry));
374 
375 	lock_clear(&apic_ioapic_lock);
376 	intr_restore(iflag);
377 }
378 
379 
380 void
381 apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
382 {
383 	int i;
384 	apic_irq_t *irqptr;
385 	struct intrspec ispec;
386 
387 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
388 	    "count: %x pri: %x type: %x\n",
389 	    (void *)dip, inum, count, pri, type));
390 
391 	/* for MSI/X only */
392 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
393 		return;
394 
395 	for (i = 0; i < count; i++) {
396 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
397 		    "pri=0x%x count=0x%x\n", inum, pri, count));
398 		ispec.intrspec_vec = inum + i;
399 		ispec.intrspec_pri = pri;
400 		if ((irqptr = apic_find_irq(dip, &ispec, type)) == NULL) {
401 			DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: "
402 			    "dip=0x%p inum=0x%x pri=0x%x apic_find_irq() "
403 			    "failed\n", (void *)dip, inum, pri));
404 			continue;
405 		}
406 		irqptr->airq_mps_intr_index = FREE_INDEX;
407 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
408 	}
409 }
410 
411 /*
412  * apic_pci_msi_enable_mode:
413  */
414 void
415 apic_pci_msi_enable_mode(dev_info_t *rdip, int type, int inum)
416 {
417 	ushort_t		msi_ctrl;
418 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
419 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
420 
421 	ASSERT((handle != NULL) && (cap_ptr != 0));
422 
423 	if (type == DDI_INTR_TYPE_MSI) {
424 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
425 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
426 			return;
427 
428 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
429 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
430 
431 	} else if (type == DDI_INTR_TYPE_MSIX) {
432 		uintptr_t	off;
433 		uint32_t	mask;
434 		ddi_intr_msix_t	*msix_p;
435 
436 		msix_p = i_ddi_get_msix(rdip);
437 
438 		ASSERT(msix_p != NULL);
439 
440 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
441 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
442 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
443 
444 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
445 
446 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
447 
448 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
449 
450 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
451 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
452 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
453 			    msi_ctrl);
454 		}
455 	}
456 }
457 
458 static int
459 apic_set_cpu(int irqno, int cpu, int *result)
460 {
461 	apic_irq_t *irqp;
462 	ulong_t iflag;
463 	int ret;
464 
465 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_SET_CPU\n"));
466 
467 	mutex_enter(&airq_mutex);
468 	irqp = apic_irq_table[irqno];
469 	mutex_exit(&airq_mutex);
470 
471 	if (irqp == NULL) {
472 		*result = ENXIO;
473 		return (PSM_FAILURE);
474 	}
475 
476 	/* Fail if this is an MSI intr and is part of a group. */
477 	if ((irqp->airq_mps_intr_index == MSI_INDEX) &&
478 	    (irqp->airq_intin_no > 1)) {
479 		*result = ENXIO;
480 		return (PSM_FAILURE);
481 	}
482 
483 	iflag = intr_clear();
484 	lock_set(&apic_ioapic_lock);
485 
486 	ret = apic_rebind_all(irqp, cpu);
487 
488 	lock_clear(&apic_ioapic_lock);
489 	intr_restore(iflag);
490 
491 	if (ret) {
492 		*result = EIO;
493 		return (PSM_FAILURE);
494 	}
495 	/*
496 	 * keep tracking the default interrupt cpu binding
497 	 */
498 	irqp->airq_cpu = cpu;
499 
500 	*result = 0;
501 	return (PSM_SUCCESS);
502 }
503 
504 static int
505 apic_grp_set_cpu(int irqno, int new_cpu, int *result)
506 {
507 	dev_info_t *orig_dip;
508 	uint32_t orig_cpu;
509 	ulong_t iflag;
510 	apic_irq_t *irqps[PCI_MSI_MAX_INTRS];
511 	int i;
512 	int cap_ptr;
513 	int msi_mask_off = 0;
514 	ushort_t msi_ctrl;
515 	uint32_t msi_pvm = 0;
516 	ddi_acc_handle_t handle;
517 	int num_vectors = 0;
518 	uint32_t vector;
519 
520 	DDI_INTR_IMPLDBG((CE_CONT, "APIC_GRP_SET_CPU\n"));
521 
522 	/*
523 	 * Take mutex to insure that table doesn't change out from underneath
524 	 * us while we're playing with it.
525 	 */
526 	mutex_enter(&airq_mutex);
527 	irqps[0] = apic_irq_table[irqno];
528 	orig_cpu = irqps[0]->airq_temp_cpu;
529 	orig_dip = irqps[0]->airq_dip;
530 	num_vectors = irqps[0]->airq_intin_no;
531 	vector = irqps[0]->airq_vector;
532 
533 	/* A "group" of 1 */
534 	if (num_vectors == 1) {
535 		mutex_exit(&airq_mutex);
536 		return (apic_set_cpu(irqno, new_cpu, result));
537 	}
538 
539 	*result = ENXIO;
540 
541 	if (irqps[0]->airq_mps_intr_index != MSI_INDEX) {
542 		mutex_exit(&airq_mutex);
543 		DDI_INTR_IMPLDBG((CE_CONT, "set_grp: intr not MSI\n"));
544 		goto set_grp_intr_done;
545 	}
546 	if ((num_vectors < 1) || ((num_vectors - 1) & vector)) {
547 		mutex_exit(&airq_mutex);
548 		DDI_INTR_IMPLDBG((CE_CONT,
549 		    "set_grp: base vec not part of a grp or not aligned: "
550 		    "vec:0x%x, num_vec:0x%x\n", vector, num_vectors));
551 		goto set_grp_intr_done;
552 	}
553 	DDI_INTR_IMPLDBG((CE_CONT, "set_grp: num intrs in grp: %d\n",
554 	    num_vectors));
555 
556 	ASSERT((num_vectors + vector) < APIC_MAX_VECTOR);
557 
558 	*result = EIO;
559 
560 	/*
561 	 * All IRQ entries in the table for the given device will be not
562 	 * shared.  Since they are not shared, the dip in the table will
563 	 * be true to the device of interest.
564 	 */
565 	for (i = 1; i < num_vectors; i++) {
566 		irqps[i] = apic_irq_table[apic_vector_to_irq[vector + i]];
567 		if (irqps[i] == NULL) {
568 			mutex_exit(&airq_mutex);
569 			goto set_grp_intr_done;
570 		}
571 #ifdef DEBUG
572 		/* Sanity check: CPU and dip is the same for all entries. */
573 		if ((irqps[i]->airq_dip != orig_dip) ||
574 		    (irqps[i]->airq_temp_cpu != orig_cpu)) {
575 			mutex_exit(&airq_mutex);
576 			DDI_INTR_IMPLDBG((CE_CONT,
577 			    "set_grp: cpu or dip for vec 0x%x difft than for "
578 			    "vec 0x%x\n", vector, vector + i));
579 			DDI_INTR_IMPLDBG((CE_CONT,
580 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
581 			    irqps[i]->airq_temp_cpu, (void *)orig_dip,
582 			    (void *)irqps[i]->airq_dip));
583 			goto set_grp_intr_done;
584 		}
585 #endif /* DEBUG */
586 	}
587 	mutex_exit(&airq_mutex);
588 
589 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(orig_dip);
590 	handle = i_ddi_get_pci_config_handle(orig_dip);
591 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
592 
593 	/* MSI Per vector masking is supported. */
594 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
595 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
596 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
597 		else
598 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
599 		msi_pvm = pci_config_get32(handle, msi_mask_off);
600 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
601 		DDI_INTR_IMPLDBG((CE_CONT,
602 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
603 		    pci_config_get32(handle, msi_mask_off)));
604 	}
605 
606 	iflag = intr_clear();
607 	lock_set(&apic_ioapic_lock);
608 
609 	/*
610 	 * Do the first rebind and check for errors.  Apic_rebind_all returns
611 	 * an error if the CPU is not accepting interrupts.  If the first one
612 	 * succeeds they all will.
613 	 */
614 	if (apic_rebind_all(irqps[0], new_cpu))
615 		(void) apic_rebind_all(irqps[0], orig_cpu);
616 	else {
617 		irqps[0]->airq_cpu = new_cpu;
618 
619 		for (i = 1; i < num_vectors; i++) {
620 			(void) apic_rebind_all(irqps[i], new_cpu);
621 			irqps[i]->airq_cpu = new_cpu;
622 		}
623 		*result = 0;	/* SUCCESS */
624 	}
625 
626 	lock_clear(&apic_ioapic_lock);
627 	intr_restore(iflag);
628 
629 	/* Reenable vectors if per vector masking is supported. */
630 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
631 		pci_config_put32(handle, msi_mask_off, msi_pvm);
632 		DDI_INTR_IMPLDBG((CE_CONT,
633 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
634 		    pci_config_get32(handle, msi_mask_off)));
635 	}
636 
637 set_grp_intr_done:
638 	if (*result != 0)
639 		return (PSM_FAILURE);
640 
641 	return (PSM_SUCCESS);
642 }
643 
644 int
645 apic_get_vector_intr_info(int vecirq, apic_get_intr_t *intr_params_p)
646 {
647 	struct autovec *av_dev;
648 	uchar_t irqno;
649 	uint_t i;
650 	apic_irq_t *irq_p;
651 
652 	/* Sanity check the vector/irq argument. */
653 	ASSERT((vecirq >= 0) || (vecirq <= APIC_MAX_VECTOR));
654 
655 	mutex_enter(&airq_mutex);
656 
657 	/*
658 	 * Convert the vecirq arg to an irq using vector_to_irq table
659 	 * if the arg is a vector.  Pass thru if already an irq.
660 	 */
661 	if ((intr_params_p->avgi_req_flags & PSMGI_INTRBY_FLAGS) ==
662 	    PSMGI_INTRBY_VEC)
663 		irqno = apic_vector_to_irq[vecirq];
664 	else
665 		irqno = (uchar_t)vecirq;
666 
667 	irq_p = apic_irq_table[irqno];
668 
669 	if ((irq_p == NULL) ||
670 	    ((irq_p->airq_mps_intr_index != RESERVE_INDEX) &&
671 	    ((irq_p->airq_temp_cpu == IRQ_UNBOUND) ||
672 	    (irq_p->airq_temp_cpu == IRQ_UNINIT)))) {
673 		mutex_exit(&airq_mutex);
674 		return (PSM_FAILURE);
675 	}
676 
677 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
678 
679 		/* Get the (temp) cpu from apic_irq table, indexed by irq. */
680 		intr_params_p->avgi_cpu_id = irq_p->airq_temp_cpu;
681 
682 		/* Return user bound info for intrd. */
683 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
684 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
685 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
686 		}
687 	}
688 
689 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
690 		intr_params_p->avgi_vector = irq_p->airq_vector;
691 
692 	if (intr_params_p->avgi_req_flags &
693 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
694 		/* Get number of devices from apic_irq table shared field. */
695 		intr_params_p->avgi_num_devs = irq_p->airq_share;
696 
697 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
698 
699 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
700 
701 		/* Some devices have NULL dip.  Don't count these. */
702 		if (intr_params_p->avgi_num_devs > 0) {
703 			for (i = 0, av_dev = autovect[irqno].avh_link;
704 			    av_dev; av_dev = av_dev->av_link)
705 				if (av_dev->av_vector && av_dev->av_dip)
706 					i++;
707 			intr_params_p->avgi_num_devs =
708 			    (uchar_t)MIN(intr_params_p->avgi_num_devs, i);
709 		}
710 
711 		/* There are no viable dips to return. */
712 		if (intr_params_p->avgi_num_devs == 0)
713 			intr_params_p->avgi_dip_list = NULL;
714 
715 		else {	/* Return list of dips */
716 
717 			/* Allocate space in array for that number of devs. */
718 			intr_params_p->avgi_dip_list = kmem_zalloc(
719 			    intr_params_p->avgi_num_devs *
720 			    sizeof (dev_info_t *),
721 			    KM_SLEEP);
722 
723 			/*
724 			 * Loop through the device list of the autovec table
725 			 * filling in the dip array.
726 			 *
727 			 * Note that the autovect table may have some special
728 			 * entries which contain NULL dips.  These will be
729 			 * ignored.
730 			 */
731 			for (i = 0, av_dev = autovect[irqno].avh_link;
732 			    av_dev; av_dev = av_dev->av_link)
733 				if (av_dev->av_vector && av_dev->av_dip)
734 					intr_params_p->avgi_dip_list[i++] =
735 					    av_dev->av_dip;
736 		}
737 	}
738 
739 	mutex_exit(&airq_mutex);
740 
741 	return (PSM_SUCCESS);
742 }
743 
744 /*
745  * This function provides external interface to the nexus for all
746  * functionalities related to the new DDI interrupt framework.
747  *
748  * Input:
749  * dip     - pointer to the dev_info structure of the requested device
750  * hdlp    - pointer to the internal interrupt handle structure for the
751  *	     requested interrupt
752  * intr_op - opcode for this call
753  * result  - pointer to the integer that will hold the result to be
754  *	     passed back if return value is PSM_SUCCESS
755  *
756  * Output:
757  * return value is either PSM_SUCCESS or PSM_FAILURE
758  */
759 int
760 apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
761     psm_intr_op_t intr_op, int *result)
762 {
763 	int		cap;
764 	int		count_vec;
765 	int		old_priority;
766 	int		new_priority;
767 	int		new_cpu;
768 	apic_irq_t	*irqp;
769 	struct intrspec *ispec, intr_spec;
770 
771 	DDI_INTR_IMPLDBG((CE_CONT, "apic_intr_ops: dip: %p hdlp: %p "
772 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
773 
774 	ispec = &intr_spec;
775 	ispec->intrspec_pri = hdlp->ih_pri;
776 	ispec->intrspec_vec = hdlp->ih_inum;
777 	ispec->intrspec_func = hdlp->ih_cb_func;
778 
779 	switch (intr_op) {
780 	case PSM_INTR_OP_CHECK_MSI:
781 		/*
782 		 * Check MSI/X is supported or not at APIC level and
783 		 * masked off the MSI/X bits in hdlp->ih_type if not
784 		 * supported before return.  If MSI/X is supported,
785 		 * leave the ih_type unchanged and return.
786 		 *
787 		 * hdlp->ih_type passed in from the nexus has all the
788 		 * interrupt types supported by the device.
789 		 */
790 		if (apic_support_msi == 0) {
791 			/*
792 			 * if apic_support_msi is not set, call
793 			 * apic_check_msi_support() to check whether msi
794 			 * is supported first
795 			 */
796 			if (apic_check_msi_support() == PSM_SUCCESS)
797 				apic_support_msi = 1;
798 			else
799 				apic_support_msi = -1;
800 		}
801 		if (apic_support_msi == 1) {
802 			if (apic_msix_enable)
803 				*result = hdlp->ih_type;
804 			else
805 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
806 		} else
807 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
808 			    DDI_INTR_TYPE_MSIX);
809 		break;
810 	case PSM_INTR_OP_ALLOC_VECTORS:
811 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
812 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
813 			    hdlp->ih_scratch1, hdlp->ih_pri,
814 			    (int)(uintptr_t)hdlp->ih_scratch2);
815 		else
816 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
817 			    hdlp->ih_scratch1, hdlp->ih_pri,
818 			    (int)(uintptr_t)hdlp->ih_scratch2);
819 		break;
820 	case PSM_INTR_OP_FREE_VECTORS:
821 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
822 		    hdlp->ih_pri, hdlp->ih_type);
823 		break;
824 	case PSM_INTR_OP_NAVAIL_VECTORS:
825 		*result = apic_navail_vector(dip, hdlp->ih_pri);
826 		break;
827 	case PSM_INTR_OP_XLATE_VECTOR:
828 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
829 		*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
830 		if (*result == -1)
831 			return (PSM_FAILURE);
832 		break;
833 	case PSM_INTR_OP_GET_PENDING:
834 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
835 			return (PSM_FAILURE);
836 		*result = apic_get_pending(irqp, hdlp->ih_type);
837 		break;
838 	case PSM_INTR_OP_CLEAR_MASK:
839 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
840 			return (PSM_FAILURE);
841 		irqp = apic_find_irq(dip, ispec, hdlp->ih_type);
842 		if (irqp == NULL)
843 			return (PSM_FAILURE);
844 		apic_clear_mask(irqp);
845 		break;
846 	case PSM_INTR_OP_SET_MASK:
847 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
848 			return (PSM_FAILURE);
849 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
850 			return (PSM_FAILURE);
851 		apic_set_mask(irqp);
852 		break;
853 	case PSM_INTR_OP_GET_CAP:
854 		cap = DDI_INTR_FLAG_PENDING;
855 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
856 			cap |= DDI_INTR_FLAG_MASKABLE;
857 		*result = cap;
858 		break;
859 	case PSM_INTR_OP_GET_SHARED:
860 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
861 			return (PSM_FAILURE);
862 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
863 		if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type)) == NULL)
864 			return (PSM_FAILURE);
865 		*result = (irqp->airq_share > 1) ? 1: 0;
866 		break;
867 	case PSM_INTR_OP_SET_PRI:
868 		old_priority = hdlp->ih_pri;	/* save old value */
869 		new_priority = *(int *)result;	/* try the new value */
870 
871 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
872 			return (PSM_SUCCESS);
873 		}
874 
875 		/* Now allocate the vectors */
876 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI) {
877 			/* SET_PRI does not support the case of multiple MSI */
878 			if (i_ddi_intr_get_current_nintrs(hdlp->ih_dip) > 1)
879 				return (PSM_FAILURE);
880 
881 			count_vec = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
882 			    1, new_priority,
883 			    DDI_INTR_ALLOC_STRICT);
884 		} else {
885 			count_vec = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
886 			    1, new_priority,
887 			    DDI_INTR_ALLOC_STRICT);
888 		}
889 
890 		/* Did we get new vectors? */
891 		if (!count_vec)
892 			return (PSM_FAILURE);
893 
894 		/* Finally, free the previously allocated vectors */
895 		apic_free_vectors(dip, hdlp->ih_inum, count_vec,
896 		    old_priority, hdlp->ih_type);
897 		break;
898 	case PSM_INTR_OP_SET_CPU:
899 	case PSM_INTR_OP_GRP_SET_CPU:
900 		/*
901 		 * The interrupt handle given here has been allocated
902 		 * specifically for this command, and ih_private carries
903 		 * a CPU value.
904 		 */
905 		new_cpu = (int)(intptr_t)hdlp->ih_private;
906 		if (!apic_cpu_in_range(new_cpu)) {
907 			DDI_INTR_IMPLDBG((CE_CONT,
908 			    "[grp_]set_cpu: cpu out of range: %d\n", new_cpu));
909 			*result = EINVAL;
910 			return (PSM_FAILURE);
911 		}
912 		if (hdlp->ih_vector > APIC_MAX_VECTOR) {
913 			DDI_INTR_IMPLDBG((CE_CONT,
914 			    "[grp_]set_cpu: vector out of range: %d\n",
915 			    hdlp->ih_vector));
916 			*result = EINVAL;
917 			return (PSM_FAILURE);
918 		}
919 		if ((hdlp->ih_flags & PSMGI_INTRBY_FLAGS) == PSMGI_INTRBY_VEC)
920 			hdlp->ih_vector = apic_vector_to_irq[hdlp->ih_vector];
921 		if (intr_op == PSM_INTR_OP_SET_CPU) {
922 			if (apic_set_cpu(hdlp->ih_vector, new_cpu, result) !=
923 			    PSM_SUCCESS)
924 				return (PSM_FAILURE);
925 		} else {
926 			if (apic_grp_set_cpu(hdlp->ih_vector, new_cpu,
927 			    result) != PSM_SUCCESS)
928 				return (PSM_FAILURE);
929 		}
930 		break;
931 	case PSM_INTR_OP_GET_INTR:
932 		/*
933 		 * The interrupt handle given here has been allocated
934 		 * specifically for this command, and ih_private carries
935 		 * a pointer to a apic_get_intr_t.
936 		 */
937 		if (apic_get_vector_intr_info(
938 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
939 			return (PSM_FAILURE);
940 		break;
941 	case PSM_INTR_OP_APIC_TYPE:
942 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
943 		    apic_get_apic_type();
944 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
945 		    APIC_MAX_VECTOR;
946 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
947 		    boot_ncpus;
948 		hdlp->ih_ver = apic_get_apic_version();
949 		break;
950 	case PSM_INTR_OP_SET_CAP:
951 	default:
952 		return (PSM_FAILURE);
953 	}
954 	return (PSM_SUCCESS);
955 }
956