xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix.c (revision 50f7888b60b9fee4c775b56966f02e23da2deef5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  */
29 /*
30  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
31  */
32 
33 /*
34  * To understand how the apix module interacts with the interrupt subsystem read
35  * the theory statement in uts/i86pc/os/intr.c.
36  */
37 
38 /*
39  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
40  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
41  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
42  * PSMI 1.5 extensions are supported in Solaris Nevada.
43  * PSMI 1.6 extensions are supported in Solaris Nevada.
44  * PSMI 1.7 extensions are supported in Solaris Nevada.
45  */
46 #define	PSMI_1_7
47 
48 #include <sys/processor.h>
49 #include <sys/time.h>
50 #include <sys/psm.h>
51 #include <sys/smp_impldefs.h>
52 #include <sys/cram.h>
53 #include <sys/acpi/acpi.h>
54 #include <sys/acpica.h>
55 #include <sys/psm_common.h>
56 #include <sys/pit.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/ddi_impldefs.h>
60 #include <sys/pci.h>
61 #include <sys/promif.h>
62 #include <sys/x86_archext.h>
63 #include <sys/cpc_impl.h>
64 #include <sys/uadmin.h>
65 #include <sys/panic.h>
66 #include <sys/debug.h>
67 #include <sys/archsystm.h>
68 #include <sys/trap.h>
69 #include <sys/machsystm.h>
70 #include <sys/sysmacros.h>
71 #include <sys/cpuvar.h>
72 #include <sys/rm_platter.h>
73 #include <sys/privregs.h>
74 #include <sys/note.h>
75 #include <sys/pci_intr_lib.h>
76 #include <sys/spl.h>
77 #include <sys/clock.h>
78 #include <sys/cyclic.h>
79 #include <sys/dditypes.h>
80 #include <sys/sunddi.h>
81 #include <sys/x_call.h>
82 #include <sys/reboot.h>
83 #include <sys/mach_intr.h>
84 #include <sys/apix.h>
85 #include <sys/apix_irm_impl.h>
86 
87 static int apix_probe();
88 static void apix_init();
89 static void apix_picinit(void);
90 static int apix_intr_enter(int, int *);
91 static void apix_intr_exit(int, int);
92 static void apix_setspl(int);
93 static int apix_disable_intr(processorid_t);
94 static void apix_enable_intr(processorid_t);
95 static int apix_get_clkvect(int);
96 static int apix_get_ipivect(int, int);
97 static void apix_post_cyclic_setup(void *);
98 static int apix_post_cpu_start();
99 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
100     psm_intr_op_t, int *);
101 
102 /*
103  * Helper functions for apix_intr_ops()
104  */
105 static void apix_redistribute_compute(void);
106 static int apix_get_pending(apix_vector_t *);
107 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
108 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
109 static char *apix_get_apic_type(void);
110 static int apix_intx_get_pending(int);
111 static void apix_intx_set_mask(int irqno);
112 static void apix_intx_clear_mask(int irqno);
113 static int apix_intx_get_shared(int irqno);
114 static void apix_intx_set_shared(int irqno, int delta);
115 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
116     struct intrspec *);
117 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
118 
119 extern int apic_clkinit(int);
120 
121 /* IRM initialization for APIX PSM module */
122 extern void apix_irm_init(void);
123 
124 extern int irm_enable;
125 
126 /*
127  *	Local static data
128  */
129 static struct	psm_ops apix_ops = {
130 	apix_probe,
131 
132 	apix_init,
133 	apix_picinit,
134 	apix_intr_enter,
135 	apix_intr_exit,
136 	apix_setspl,
137 	apix_addspl,
138 	apix_delspl,
139 	apix_disable_intr,
140 	apix_enable_intr,
141 	NULL,			/* psm_softlvl_to_irq */
142 	NULL,			/* psm_set_softintr */
143 
144 	apic_set_idlecpu,
145 	apic_unset_idlecpu,
146 
147 	apic_clkinit,
148 	apix_get_clkvect,
149 	NULL,			/* psm_hrtimeinit */
150 	apic_gethrtime,
151 
152 	apic_get_next_processorid,
153 	apic_cpu_start,
154 	apix_post_cpu_start,
155 	apic_shutdown,
156 	apix_get_ipivect,
157 	apic_send_ipi,
158 
159 	NULL,			/* psm_translate_irq */
160 	NULL,			/* psm_notify_error */
161 	NULL,			/* psm_notify_func */
162 	apic_timer_reprogram,
163 	apic_timer_enable,
164 	apic_timer_disable,
165 	apix_post_cyclic_setup,
166 	apic_preshutdown,
167 	apix_intr_ops,		/* Advanced DDI Interrupt framework */
168 	apic_state,		/* save, restore apic state for S3 */
169 	apic_cpu_ops,		/* CPU control interface. */
170 };
171 
172 struct psm_ops *psmops = &apix_ops;
173 
174 static struct	psm_info apix_psm_info = {
175 	PSM_INFO_VER01_7,			/* version */
176 	PSM_OWN_EXCLUSIVE,			/* ownership */
177 	&apix_ops,				/* operation */
178 	APIX_NAME,				/* machine name */
179 	"apix MPv1.4 compatible",
180 };
181 
182 static void *apix_hdlp;
183 
184 static int apix_is_enabled = 0;
185 
186 /*
187  * Flag to indicate if APIX is to be enabled only for platforms
188  * with specific hw feature(s).
189  */
190 int apix_hw_chk_enable = 1;
191 
192 /*
193  * Hw features that are checked for enabling APIX support.
194  */
195 #define	APIX_SUPPORT_X2APIC	0x00000001
196 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
197 
198 /*
199  * apix_lock is used for cpu selection and vector re-binding
200  */
201 lock_t apix_lock;
202 apix_impl_t *apixs[NCPU];
203 /*
204  * Mapping between device interrupt and the allocated vector. Indexed
205  * by major number.
206  */
207 apix_dev_vector_t **apix_dev_vector;
208 /*
209  * Mapping between device major number and cpu id. It gets used
210  * when interrupt binding policy round robin with affinity is
211  * applied. With that policy, devices with the same major number
212  * will be bound to the same CPU.
213  */
214 processorid_t *apix_major_to_cpu;	/* major to cpu mapping */
215 kmutex_t apix_mutex;	/* for apix_dev_vector & apix_major_to_cpu */
216 
217 int apix_nipis = 16;	/* Maximum number of IPIs */
218 /*
219  * Maximum number of vectors in a CPU that can be used for interrupt
220  * allocation (including IPIs and the reserved vectors).
221  */
222 int apix_cpu_nvectors = APIX_NVECTOR;
223 
224 /* gcpu.h */
225 
226 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
227 extern void apic_change_eoi();
228 
229 /*
230  *	This is the loadable module wrapper
231  */
232 
233 int
234 _init(void)
235 {
236 	if (apic_coarse_hrtime)
237 		apix_ops.psm_gethrtime = &apic_gettime;
238 	return (psm_mod_init(&apix_hdlp, &apix_psm_info));
239 }
240 
241 int
242 _fini(void)
243 {
244 	return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
245 }
246 
247 int
248 _info(struct modinfo *modinfop)
249 {
250 	return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
251 }
252 
253 static int
254 apix_probe()
255 {
256 	int rval;
257 
258 	if (apix_enable == 0)
259 		return (PSM_FAILURE);
260 
261 	/*
262 	 * FIXME Temporarily disable apix module on Xen HVM platform due to
263 	 * known hang during boot (see #3605).
264 	 *
265 	 * Please remove when/if the issue is resolved.
266 	 */
267 	if (get_hwenv() == HW_XEN_HVM)
268 		return (PSM_FAILURE);
269 
270 	/* check for hw features if specified  */
271 	if (apix_hw_chk_enable) {
272 		/* check if x2APIC mode is supported */
273 		if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
274 		    APIX_SUPPORT_X2APIC) {
275 			if (!((apic_local_mode() == LOCAL_X2APIC) ||
276 			    apic_detect_x2apic())) {
277 				/* x2APIC mode is not supported in the hw */
278 				apix_enable = 0;
279 			}
280 		}
281 		if (apix_enable == 0)
282 			return (PSM_FAILURE);
283 	}
284 
285 	rval = apic_probe_common(apix_psm_info.p_mach_idstring);
286 	if (rval == PSM_SUCCESS)
287 		apix_is_enabled = 1;
288 	else
289 		apix_is_enabled = 0;
290 	return (rval);
291 }
292 
293 /*
294  * Initialize the data structures needed by pcplusmpx module.
295  * Specifically, the data structures used by addspl() and delspl()
296  * routines.
297  */
298 static void
299 apix_softinit()
300 {
301 	int i, *iptr;
302 	apix_impl_t *hdlp;
303 	int nproc;
304 
305 	nproc = max(apic_nproc, apic_max_nproc);
306 
307 	hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
308 	for (i = 0; i < nproc; i++) {
309 		apixs[i] = &hdlp[i];
310 		apixs[i]->x_cpuid = i;
311 		LOCK_INIT_CLEAR(&apixs[i]->x_lock);
312 	}
313 
314 	/* cpu 0 is always up (for now) */
315 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
316 
317 	iptr = (int *)&apic_irq_table[0];
318 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
319 		apic_level_intr[i] = 0;
320 		*iptr++ = NULL;
321 	}
322 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
323 
324 	apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
325 	    KM_SLEEP);
326 
327 	if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
328 		apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
329 		    KM_SLEEP);
330 		for (i = 0; i < devcnt; i++)
331 			apix_major_to_cpu[i] = IRQ_UNINIT;
332 	}
333 
334 	mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
335 }
336 
337 static int
338 apix_get_pending_spl(void)
339 {
340 	int cpuid = CPU->cpu_id;
341 
342 	return (bsrw_insn(apixs[cpuid]->x_intr_pending));
343 }
344 
345 static uintptr_t
346 apix_get_intr_handler(int cpu, short vec)
347 {
348 	apix_vector_t *apix_vector;
349 
350 	ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
351 	if (cpu >= apic_nproc)
352 		return (NULL);
353 
354 	apix_vector = apixs[cpu]->x_vectbl[vec];
355 
356 	return ((uintptr_t)(apix_vector->v_autovect));
357 }
358 
359 static void
360 apix_init()
361 {
362 	extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
363 
364 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
365 
366 	do_interrupt_common = apix_do_interrupt;
367 	addintr = apix_add_avintr;
368 	remintr = apix_rem_avintr;
369 	get_pending_spl = apix_get_pending_spl;
370 	get_intr_handler = apix_get_intr_handler;
371 	psm_get_localapicid = apic_get_localapicid;
372 	psm_get_ioapicid = apic_get_ioapicid;
373 
374 	apix_softinit();
375 
376 #if !defined(__amd64)
377 	if (cpuid_have_cr8access(CPU))
378 		apic_have_32bit_cr8 = 1;
379 #endif
380 
381 	/*
382 	 * Initialize IRM pool parameters
383 	 */
384 	if (irm_enable) {
385 		int	i;
386 		int	lowest_irq;
387 		int	highest_irq;
388 
389 		/* number of CPUs present */
390 		apix_irminfo.apix_ncpus = apic_nproc;
391 		/* total number of entries in all of the IOAPICs present */
392 		lowest_irq = apic_io_vectbase[0];
393 		highest_irq = apic_io_vectend[0];
394 		for (i = 1; i < apic_io_max; i++) {
395 			if (apic_io_vectbase[i] < lowest_irq)
396 				lowest_irq = apic_io_vectbase[i];
397 			if (apic_io_vectend[i] > highest_irq)
398 				highest_irq = apic_io_vectend[i];
399 		}
400 		apix_irminfo.apix_ioapic_max_vectors =
401 		    highest_irq - lowest_irq + 1;
402 		/*
403 		 * Number of available per-CPU vectors excluding
404 		 * reserved vectors for Dtrace, int80, system-call,
405 		 * fast-trap, etc.
406 		 */
407 		apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
408 		    APIX_SW_RESERVED_VECTORS;
409 
410 		/* Number of vectors (pre) allocated (SCI and HPET) */
411 		apix_irminfo.apix_vectors_allocated = 0;
412 		if (apic_hpet_vect != -1)
413 			apix_irminfo.apix_vectors_allocated++;
414 		if (apic_sci_vect != -1)
415 			apix_irminfo.apix_vectors_allocated++;
416 	}
417 }
418 
419 static void
420 apix_init_intr()
421 {
422 	processorid_t	cpun = psm_get_cpu_id();
423 	uint_t nlvt;
424 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
425 	extern void cmi_cmci_trap(void);
426 
427 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
428 
429 	if (apic_mode == LOCAL_APIC) {
430 		/*
431 		 * We are running APIC in MMIO mode.
432 		 */
433 		if (apic_flat_model) {
434 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
435 			    APIC_FLAT_MODEL);
436 		} else {
437 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
438 			    APIC_CLUSTER_MODEL);
439 		}
440 
441 		apic_reg_ops->apic_write(APIC_DEST_REG,
442 		    AV_HIGH_ORDER >> cpun);
443 	}
444 
445 	if (apic_directed_EOI_supported()) {
446 		/*
447 		 * Setting the 12th bit in the Spurious Interrupt Vector
448 		 * Register suppresses broadcast EOIs generated by the local
449 		 * APIC. The suppression of broadcast EOIs happens only when
450 		 * interrupts are level-triggered.
451 		 */
452 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
453 	}
454 
455 	/* need to enable APIC before unmasking NMI */
456 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
457 
458 	/*
459 	 * Presence of an invalid vector with delivery mode AV_FIXED can
460 	 * cause an error interrupt, even if the entry is masked...so
461 	 * write a valid vector to LVT entries along with the mask bit
462 	 */
463 
464 	/* All APICs have timer and LINT0/1 */
465 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
466 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
467 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
468 
469 	/*
470 	 * On integrated APICs, the number of LVT entries is
471 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
472 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
473 	 */
474 
475 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
476 		nlvt = 3;
477 	} else {
478 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
479 		    0xFF) + 1;
480 	}
481 
482 	if (nlvt >= 5) {
483 		/* Enable performance counter overflow interrupt */
484 
485 		if (!is_x86_feature(x86_featureset, X86FSET_MSR))
486 			apic_enable_cpcovf_intr = 0;
487 		if (apic_enable_cpcovf_intr) {
488 			if (apic_cpcovf_vect == 0) {
489 				int ipl = APIC_PCINT_IPL;
490 
491 				apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
492 				ASSERT(apic_cpcovf_vect);
493 
494 				(void) add_avintr(NULL, ipl,
495 				    (avfunc)kcpc_hw_overflow_intr,
496 				    "apic pcint", apic_cpcovf_vect,
497 				    NULL, NULL, NULL, NULL);
498 				kcpc_hw_overflow_intr_installed = 1;
499 				kcpc_hw_enable_cpc_intr =
500 				    apic_cpcovf_mask_clear;
501 			}
502 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
503 			    apic_cpcovf_vect);
504 		}
505 	}
506 
507 	if (nlvt >= 6) {
508 		/* Only mask TM intr if the BIOS apparently doesn't use it */
509 
510 		uint32_t lvtval;
511 
512 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
513 		if (((lvtval & AV_MASK) == AV_MASK) ||
514 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
515 			apic_reg_ops->apic_write(APIC_THERM_VECT,
516 			    AV_MASK|APIC_RESV_IRQ);
517 		}
518 	}
519 
520 	/* Enable error interrupt */
521 
522 	if (nlvt >= 4 && apic_enable_error_intr) {
523 		if (apic_errvect == 0) {
524 			int ipl = 0xf;	/* get highest priority intr */
525 			apic_errvect = apix_get_ipivect(ipl, -1);
526 			ASSERT(apic_errvect);
527 			/*
528 			 * Not PSMI compliant, but we are going to merge
529 			 * with ON anyway
530 			 */
531 			(void) add_avintr(NULL, ipl,
532 			    (avfunc)apic_error_intr, "apic error intr",
533 			    apic_errvect, NULL, NULL, NULL, NULL);
534 		}
535 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
536 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
537 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
538 	}
539 
540 	/* Enable CMCI interrupt */
541 	if (cmi_enable_cmci) {
542 		mutex_enter(&cmci_cpu_setup_lock);
543 		if (cmci_cpu_setup_registered == 0) {
544 			mutex_enter(&cpu_lock);
545 			register_cpu_setup_func(cmci_cpu_setup, NULL);
546 			mutex_exit(&cpu_lock);
547 			cmci_cpu_setup_registered = 1;
548 		}
549 		mutex_exit(&cmci_cpu_setup_lock);
550 
551 		if (apic_cmci_vect == 0) {
552 			int ipl = 0x2;
553 			apic_cmci_vect = apix_get_ipivect(ipl, -1);
554 			ASSERT(apic_cmci_vect);
555 
556 			(void) add_avintr(NULL, ipl,
557 			    (avfunc)cmi_cmci_trap, "apic cmci intr",
558 			    apic_cmci_vect, NULL, NULL, NULL, NULL);
559 		}
560 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
561 	}
562 
563 	apic_reg_ops->apic_write_task_reg(0);
564 }
565 
566 static void
567 apix_picinit(void)
568 {
569 	int i, j;
570 	uint_t isr;
571 
572 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
573 
574 	/*
575 	 * initialize interrupt remapping before apic
576 	 * hardware initialization
577 	 */
578 	apic_intrmap_init(apic_mode);
579 	if (apic_vt_ops == psm_vt_ops)
580 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
581 
582 	/*
583 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
584 	 * bit on without clearing it with EOI.  Since softint
585 	 * uses vector 0x20 to interrupt itself, so softint will
586 	 * not work on this machine.  In order to fix this problem
587 	 * a check is made to verify all the isr bits are clear.
588 	 * If not, EOIs are issued to clear the bits.
589 	 */
590 	for (i = 7; i >= 1; i--) {
591 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
592 		if (isr != 0)
593 			for (j = 0; ((j < 32) && (isr != 0)); j++)
594 				if (isr & (1 << j)) {
595 					apic_reg_ops->apic_write(
596 					    APIC_EOI_REG, 0);
597 					isr &= ~(1 << j);
598 					apic_error |= APIC_ERR_BOOT_EOI;
599 				}
600 	}
601 
602 	/* set a flag so we know we have run apic_picinit() */
603 	apic_picinit_called = 1;
604 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
605 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
606 	LOCK_INIT_CLEAR(&apic_error_lock);
607 	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
608 
609 	picsetup();	 /* initialise the 8259 */
610 
611 	/* add nmi handler - least priority nmi handler */
612 	LOCK_INIT_CLEAR(&apic_nmi_lock);
613 
614 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
615 	    "apix NMI handler", (caddr_t)NULL))
616 		cmn_err(CE_WARN, "apix: Unable to add nmi handler");
617 
618 	apix_init_intr();
619 
620 	/* enable apic mode if imcr present */
621 	if (apic_imcrp) {
622 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
623 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
624 	}
625 
626 	ioapix_init_intr(IOAPIC_MASK);
627 
628 	/* setup global IRM pool if applicable */
629 	if (irm_enable)
630 		apix_irm_init();
631 }
632 
633 static __inline__ void
634 apix_send_eoi(void)
635 {
636 	if (apic_mode == LOCAL_APIC)
637 		LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
638 	else
639 		X2APIC_WRITE(APIC_EOI_REG, 0);
640 }
641 
642 /*
643  * platform_intr_enter
644  *
645  *	Called at the beginning of the interrupt service routine, but unlike
646  *	pcplusmp, does not mask interrupts. An EOI is given to the interrupt
647  *	controller to enable other HW interrupts but interrupts are still
648  * 	masked by the IF flag.
649  *
650  *	Return -1 for spurious interrupts
651  *
652  */
653 static int
654 apix_intr_enter(int ipl, int *vectorp)
655 {
656 	struct cpu *cpu = CPU;
657 	uint32_t cpuid = CPU->cpu_id;
658 	apic_cpus_info_t *cpu_infop;
659 	uchar_t vector;
660 	apix_vector_t *vecp;
661 	int nipl = -1;
662 
663 	/*
664 	 * The real vector delivered is (*vectorp + 0x20), but our caller
665 	 * subtracts 0x20 from the vector before passing it to us.
666 	 * (That's why APIC_BASE_VECT is 0x20.)
667 	 */
668 	vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
669 
670 	cpu_infop = &apic_cpus[cpuid];
671 	if (vector == APIC_SPUR_INTR) {
672 		cpu_infop->aci_spur_cnt++;
673 		return (APIC_INT_SPURIOUS);
674 	}
675 
676 	vecp = xv_vector(cpuid, vector);
677 	if (vecp == NULL) {
678 		if (APIX_IS_FAKE_INTR(vector))
679 			nipl = apix_rebindinfo.i_pri;
680 		apix_send_eoi();
681 		return (nipl);
682 	}
683 	nipl = vecp->v_pri;
684 
685 	/* if interrupted by the clock, increment apic_nsec_since_boot */
686 	if (vector == (apic_clkvect + APIC_BASE_VECT)) {
687 		if (!apic_oneshot) {
688 			/* NOTE: this is not MT aware */
689 			apic_hrtime_stamp++;
690 			apic_nsec_since_boot += apic_nsec_per_intr;
691 			apic_hrtime_stamp++;
692 			last_count_read = apic_hertz_count;
693 			apix_redistribute_compute();
694 		}
695 
696 		apix_send_eoi();
697 
698 		return (nipl);
699 	}
700 
701 	ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
702 
703 	/* pre-EOI handling for level-triggered interrupts */
704 	if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
705 	    (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
706 		apix_level_intr_pre_eoi(vecp->v_inum);
707 
708 	/* send back EOI */
709 	apix_send_eoi();
710 
711 	cpu_infop->aci_current[nipl] = vector;
712 	if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
713 		cpu_infop->aci_curipl = (uchar_t)nipl;
714 		cpu_infop->aci_ISR_in_progress |= 1 << nipl;
715 	}
716 
717 #ifdef	DEBUG
718 	if (vector >= APIX_IPI_MIN)
719 		return (nipl);	/* skip IPI */
720 
721 	APIC_DEBUG_BUF_PUT(vector);
722 	APIC_DEBUG_BUF_PUT(vecp->v_inum);
723 	APIC_DEBUG_BUF_PUT(nipl);
724 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
725 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
726 		drv_usecwait(apic_stretch_interrupts);
727 #endif /* DEBUG */
728 
729 	return (nipl);
730 }
731 
732 /*
733  * Any changes made to this function must also change X2APIC
734  * version of intr_exit.
735  */
736 static void
737 apix_intr_exit(int prev_ipl, int arg2)
738 {
739 	int cpuid = psm_get_cpu_id();
740 	apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
741 	apix_impl_t *apixp = apixs[cpuid];
742 
743 	UNREFERENCED_1PARAMETER(arg2);
744 
745 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
746 	/* ISR above current pri could not be in progress */
747 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
748 
749 	if (apixp->x_obsoletes != NULL) {
750 		if (APIX_CPU_LOCK_HELD(cpuid))
751 			return;
752 
753 		APIX_ENTER_CPU_LOCK(cpuid);
754 		(void) apix_obsolete_vector(apixp->x_obsoletes);
755 		APIX_LEAVE_CPU_LOCK(cpuid);
756 	}
757 }
758 
759 /*
760  * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
761  * given ipl, but apix never uses the TPR and we never mask a subset of the
762  * interrupts. They are either all blocked by the IF flag or all can come in.
763  *
764  * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
765  * can come in if currently enabled by the IF flag. This table shows the state
766  * of the IF flag when we leave this function.
767  *
768  *    curr IF |	ipl == 15	ipl != 15
769  *    --------+---------------------------
770  *       0    |    0		    0
771  *       1    |    0		    1
772  */
773 static void
774 apix_setspl(int ipl)
775 {
776 	/*
777 	 * Interrupts at ipl above this cannot be in progress, so the following
778 	 * mask is ok.
779 	 */
780 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
781 
782 	if (ipl == XC_HI_PIL)
783 		cli();
784 }
785 
786 int
787 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
788 {
789 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
790 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
791 	apix_vector_t *vecp = xv_vector(cpuid, vector);
792 
793 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
794 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
795 
796 	if (vecp->v_type == APIX_TYPE_FIXED)
797 		apix_intx_set_shared(vecp->v_inum, 1);
798 
799 	/* There are more interrupts, so it's already been enabled */
800 	if (vecp->v_share > 1)
801 		return (PSM_SUCCESS);
802 
803 	/* return if it is not hardware interrupt */
804 	if (vecp->v_type == APIX_TYPE_IPI)
805 		return (PSM_SUCCESS);
806 
807 	/*
808 	 * if apix_picinit() has not been called yet, just return.
809 	 * At the end of apic_picinit(), we will call setup_io_intr().
810 	 */
811 	if (!apic_picinit_called)
812 		return (PSM_SUCCESS);
813 
814 	(void) apix_setup_io_intr(vecp);
815 
816 	return (PSM_SUCCESS);
817 }
818 
819 int
820 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
821 {
822 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
823 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
824 	apix_vector_t *vecp = xv_vector(cpuid, vector);
825 
826 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
827 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
828 
829 	if (vecp->v_type == APIX_TYPE_FIXED)
830 		apix_intx_set_shared(vecp->v_inum, -1);
831 
832 	/* There are more interrupts */
833 	if (vecp->v_share > 1)
834 		return (PSM_SUCCESS);
835 
836 	/* return if it is not hardware interrupt */
837 	if (vecp->v_type == APIX_TYPE_IPI)
838 		return (PSM_SUCCESS);
839 
840 	if (!apic_picinit_called) {
841 		cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
842 		    virtvec);
843 		return (PSM_SUCCESS);
844 	}
845 
846 	apix_disable_vector(vecp);
847 
848 	return (PSM_SUCCESS);
849 }
850 
851 /*
852  * Try and disable all interrupts. We just assign interrupts to other
853  * processors based on policy. If any were bound by user request, we
854  * let them continue and return failure. We do not bother to check
855  * for cache affinity while rebinding.
856  */
857 static int
858 apix_disable_intr(processorid_t cpun)
859 {
860 	apix_impl_t *apixp = apixs[cpun];
861 	apix_vector_t *vecp, *newp;
862 	int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
863 
864 	lock_set(&apix_lock);
865 
866 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
867 	apic_cpus[cpun].aci_curipl = 0;
868 
869 	/* if this is for SUSPEND operation, skip rebinding */
870 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
871 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
872 			vecp = apixp->x_vectbl[i];
873 			if (!IS_VECT_ENABLED(vecp))
874 				continue;
875 
876 			apix_disable_vector(vecp);
877 		}
878 		lock_clear(&apix_lock);
879 		return (PSM_SUCCESS);
880 	}
881 
882 	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
883 		vecp = apixp->x_vectbl[i];
884 		if (!IS_VECT_ENABLED(vecp))
885 			continue;
886 
887 		if (vecp->v_flags & APIX_VECT_USER_BOUND) {
888 			hardbound++;
889 			continue;
890 		}
891 		type = vecp->v_type;
892 
893 		/*
894 		 * If there are bound interrupts on this cpu, then
895 		 * rebind them to other processors.
896 		 */
897 		loop = 0;
898 		do {
899 			bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
900 
901 			if (type != APIX_TYPE_MSI)
902 				newp = apix_set_cpu(vecp, bindcpu, &ret);
903 			else
904 				newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
905 		} while ((newp == NULL) && (loop++ < apic_nproc));
906 
907 		if (loop >= apic_nproc) {
908 			errbound++;
909 			cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
910 			    vecp->v_cpuid, vecp->v_vector);
911 		}
912 	}
913 
914 	lock_clear(&apix_lock);
915 
916 	if (hardbound || errbound) {
917 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
918 		    "due to user bound interrupts or failed operation",
919 		    cpun);
920 		return (PSM_FAILURE);
921 	}
922 
923 	return (PSM_SUCCESS);
924 }
925 
926 /*
927  * Bind interrupts to specified CPU
928  */
929 static void
930 apix_enable_intr(processorid_t cpun)
931 {
932 	apix_vector_t *vecp;
933 	int i, ret;
934 	processorid_t n;
935 
936 	lock_set(&apix_lock);
937 
938 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
939 
940 	/* interrupt enabling for system resume */
941 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
942 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
943 			vecp = xv_vector(cpun, i);
944 			if (!IS_VECT_ENABLED(vecp))
945 				continue;
946 
947 			apix_enable_vector(vecp);
948 		}
949 		apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
950 	}
951 
952 	for (n = 0; n < apic_nproc; n++) {
953 		if (!apic_cpu_in_range(n) || n == cpun ||
954 		    (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
955 			continue;
956 
957 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
958 			vecp = xv_vector(n, i);
959 			if (!IS_VECT_ENABLED(vecp) ||
960 			    vecp->v_bound_cpuid != cpun)
961 				continue;
962 
963 			if (vecp->v_type != APIX_TYPE_MSI)
964 				(void) apix_set_cpu(vecp, cpun, &ret);
965 			else
966 				(void) apix_grp_set_cpu(vecp, cpun, &ret);
967 		}
968 	}
969 
970 	lock_clear(&apix_lock);
971 }
972 
973 /*
974  * Allocate vector for IPI
975  * type == -1 indicates it is an internal request. Do not change
976  * resv_vector for these requests.
977  */
978 static int
979 apix_get_ipivect(int ipl, int type)
980 {
981 	uchar_t vector;
982 
983 	if ((vector = apix_alloc_ipi(ipl)) > 0) {
984 		if (type != -1)
985 			apic_resv_vector[ipl] = vector;
986 		return (vector);
987 	}
988 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
989 	return (-1);	/* shouldn't happen */
990 }
991 
992 static int
993 apix_get_clkvect(int ipl)
994 {
995 	int vector;
996 
997 	if ((vector = apix_get_ipivect(ipl, -1)) == -1)
998 		return (-1);
999 
1000 	apic_clkvect = vector - APIC_BASE_VECT;
1001 	APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1002 	    apic_clkvect));
1003 	return (vector);
1004 }
1005 
1006 static int
1007 apix_post_cpu_start()
1008 {
1009 	int cpun;
1010 	static int cpus_started = 1;
1011 
1012 	/* We know this CPU + BSP  started successfully. */
1013 	cpus_started++;
1014 
1015 	/*
1016 	 * On BSP we would have enabled X2APIC, if supported by processor,
1017 	 * in acpi_probe(), but on AP we do it here.
1018 	 *
1019 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1020 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1021 	 */
1022 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1023 	    apic_local_mode() == LOCAL_APIC) {
1024 		apic_enable_x2apic();
1025 	}
1026 
1027 	/*
1028 	 * Switch back to x2apic IPI sending method for performance when target
1029 	 * CPU has entered x2apic mode.
1030 	 */
1031 	if (apic_mode == LOCAL_X2APIC) {
1032 		apic_switch_ipi_callback(B_FALSE);
1033 	}
1034 
1035 	splx(ipltospl(LOCK_LEVEL));
1036 	apix_init_intr();
1037 
1038 	/*
1039 	 * since some systems don't enable the internal cache on the non-boot
1040 	 * cpus, so we have to enable them here
1041 	 */
1042 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1043 
1044 #ifdef	DEBUG
1045 	APIC_AV_PENDING_SET();
1046 #else
1047 	if (apic_mode == LOCAL_APIC)
1048 		APIC_AV_PENDING_SET();
1049 #endif	/* DEBUG */
1050 
1051 	/*
1052 	 * We may be booting, or resuming from suspend; aci_status will
1053 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1054 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1055 	 */
1056 	cpun = psm_get_cpu_id();
1057 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1058 
1059 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1060 
1061 	return (PSM_SUCCESS);
1062 }
1063 
1064 /*
1065  * If this module needs a periodic handler for the interrupt distribution, it
1066  * can be added here. The argument to the periodic handler is not currently
1067  * used, but is reserved for future.
1068  */
1069 static void
1070 apix_post_cyclic_setup(void *arg)
1071 {
1072 	UNREFERENCED_1PARAMETER(arg);
1073 
1074 	cyc_handler_t cyh;
1075 	cyc_time_t cyt;
1076 
1077 	/* cpu_lock is held */
1078 	/* set up a periodic handler for intr redistribution */
1079 
1080 	/*
1081 	 * In peridoc mode intr redistribution processing is done in
1082 	 * apic_intr_enter during clk intr processing
1083 	 */
1084 	if (!apic_oneshot)
1085 		return;
1086 
1087 	/*
1088 	 * Register a periodical handler for the redistribution processing.
1089 	 * Though we would generally prefer to use the DDI interface for
1090 	 * periodic handler invocation, ddi_periodic_add(9F), we are
1091 	 * unfortunately already holding cpu_lock, which ddi_periodic_add will
1092 	 * attempt to take for us.  Thus, we add our own cyclic directly:
1093 	 */
1094 	cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1095 	cyh.cyh_arg = NULL;
1096 	cyh.cyh_level = CY_LOW_LEVEL;
1097 
1098 	cyt.cyt_when = 0;
1099 	cyt.cyt_interval = apic_redistribute_sample_interval;
1100 
1101 	apic_cyclic_id = cyclic_add(&cyh, &cyt);
1102 }
1103 
1104 /*
1105  * Called the first time we enable x2apic mode on this cpu.
1106  * Update some of the function pointers to use x2apic routines.
1107  */
1108 void
1109 x2apic_update_psm()
1110 {
1111 	struct psm_ops *pops = &apix_ops;
1112 
1113 	ASSERT(pops != NULL);
1114 
1115 	/*
1116 	 * The pcplusmp module's version of x2apic_update_psm makes additional
1117 	 * changes that we do not have to make here. It needs to make those
1118 	 * changes because pcplusmp relies on the TPR register and the means of
1119 	 * addressing that changes when using the local apic versus the x2apic.
1120 	 * It's also worth noting that the apix driver specific function end up
1121 	 * being apix_foo as opposed to apic_foo and x2apic_foo.
1122 	 */
1123 	pops->psm_send_ipi = x2apic_send_ipi;
1124 
1125 	send_dirintf = pops->psm_send_ipi;
1126 
1127 	apic_mode = LOCAL_X2APIC;
1128 	apic_change_ops();
1129 }
1130 
1131 /*
1132  * This function provides external interface to the nexus for all
1133  * functionalities related to the new DDI interrupt framework.
1134  *
1135  * Input:
1136  * dip     - pointer to the dev_info structure of the requested device
1137  * hdlp    - pointer to the internal interrupt handle structure for the
1138  *	     requested interrupt
1139  * intr_op - opcode for this call
1140  * result  - pointer to the integer that will hold the result to be
1141  *	     passed back if return value is PSM_SUCCESS
1142  *
1143  * Output:
1144  * return value is either PSM_SUCCESS or PSM_FAILURE
1145  */
1146 static int
1147 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1148     psm_intr_op_t intr_op, int *result)
1149 {
1150 	int		cap;
1151 	apix_vector_t	*vecp, *newvecp;
1152 	struct intrspec *ispec, intr_spec;
1153 	processorid_t target;
1154 
1155 	ispec = &intr_spec;
1156 	ispec->intrspec_pri = hdlp->ih_pri;
1157 	ispec->intrspec_vec = hdlp->ih_inum;
1158 	ispec->intrspec_func = hdlp->ih_cb_func;
1159 
1160 	switch (intr_op) {
1161 	case PSM_INTR_OP_ALLOC_VECTORS:
1162 		switch (hdlp->ih_type) {
1163 		case DDI_INTR_TYPE_MSI:
1164 			/* allocate MSI vectors */
1165 			*result = apix_alloc_msi(dip, hdlp->ih_inum,
1166 			    hdlp->ih_scratch1,
1167 			    (int)(uintptr_t)hdlp->ih_scratch2);
1168 			break;
1169 		case DDI_INTR_TYPE_MSIX:
1170 			/* allocate MSI-X vectors */
1171 			*result = apix_alloc_msix(dip, hdlp->ih_inum,
1172 			    hdlp->ih_scratch1,
1173 			    (int)(uintptr_t)hdlp->ih_scratch2);
1174 			break;
1175 		case DDI_INTR_TYPE_FIXED:
1176 			/* allocate or share vector for fixed */
1177 			if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1178 				return (PSM_FAILURE);
1179 			}
1180 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1181 			*result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1182 			    ispec);
1183 			break;
1184 		default:
1185 			return (PSM_FAILURE);
1186 		}
1187 		break;
1188 	case PSM_INTR_OP_FREE_VECTORS:
1189 		apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1190 		    hdlp->ih_type);
1191 		break;
1192 	case PSM_INTR_OP_XLATE_VECTOR:
1193 		/*
1194 		 * Vectors are allocated by ALLOC and freed by FREE.
1195 		 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1196 		 */
1197 		*result = APIX_INVALID_VECT;
1198 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1199 		if (vecp != NULL) {
1200 			*result = APIX_VIRTVECTOR(vecp->v_cpuid,
1201 			    vecp->v_vector);
1202 			break;
1203 		}
1204 
1205 		/*
1206 		 * No vector to device mapping exists. If this is FIXED type
1207 		 * then check if this IRQ is already mapped for another device
1208 		 * then return the vector number for it (i.e. shared IRQ case).
1209 		 * Otherwise, return PSM_FAILURE.
1210 		 */
1211 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1212 			vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1213 			    ispec);
1214 			*result = (vecp == NULL) ? APIX_INVALID_VECT :
1215 			    APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1216 		}
1217 		if (*result == APIX_INVALID_VECT)
1218 			return (PSM_FAILURE);
1219 		break;
1220 	case PSM_INTR_OP_GET_PENDING:
1221 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1222 		if (vecp == NULL)
1223 			return (PSM_FAILURE);
1224 
1225 		*result = apix_get_pending(vecp);
1226 		break;
1227 	case PSM_INTR_OP_CLEAR_MASK:
1228 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1229 			return (PSM_FAILURE);
1230 
1231 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1232 		if (vecp == NULL)
1233 			return (PSM_FAILURE);
1234 
1235 		apix_intx_clear_mask(vecp->v_inum);
1236 		break;
1237 	case PSM_INTR_OP_SET_MASK:
1238 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1239 			return (PSM_FAILURE);
1240 
1241 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1242 		if (vecp == NULL)
1243 			return (PSM_FAILURE);
1244 
1245 		apix_intx_set_mask(vecp->v_inum);
1246 		break;
1247 	case PSM_INTR_OP_GET_SHARED:
1248 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1249 			return (PSM_FAILURE);
1250 
1251 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1252 		if (vecp == NULL)
1253 			return (PSM_FAILURE);
1254 
1255 		*result = apix_intx_get_shared(vecp->v_inum);
1256 		break;
1257 	case PSM_INTR_OP_SET_PRI:
1258 		/*
1259 		 * Called prior to adding the interrupt handler or when
1260 		 * an interrupt handler is unassigned.
1261 		 */
1262 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1263 			return (PSM_SUCCESS);
1264 
1265 		if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1266 			return (PSM_FAILURE);
1267 
1268 		break;
1269 	case PSM_INTR_OP_SET_CPU:
1270 	case PSM_INTR_OP_GRP_SET_CPU:
1271 		/*
1272 		 * The interrupt handle given here has been allocated
1273 		 * specifically for this command, and ih_private carries
1274 		 * a CPU value.
1275 		 */
1276 		*result = EINVAL;
1277 		target = (int)(intptr_t)hdlp->ih_private;
1278 		if (!apic_cpu_in_range(target)) {
1279 			DDI_INTR_IMPLDBG((CE_WARN,
1280 			    "[grp_]set_cpu: cpu out of range: %d\n", target));
1281 			return (PSM_FAILURE);
1282 		}
1283 
1284 		lock_set(&apix_lock);
1285 
1286 		vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1287 		if (!IS_VECT_ENABLED(vecp)) {
1288 			DDI_INTR_IMPLDBG((CE_WARN,
1289 			    "[grp]_set_cpu: invalid vector 0x%x\n",
1290 			    hdlp->ih_vector));
1291 			lock_clear(&apix_lock);
1292 			return (PSM_FAILURE);
1293 		}
1294 
1295 		*result = 0;
1296 
1297 		if (intr_op == PSM_INTR_OP_SET_CPU)
1298 			newvecp = apix_set_cpu(vecp, target, result);
1299 		else
1300 			newvecp = apix_grp_set_cpu(vecp, target, result);
1301 
1302 		lock_clear(&apix_lock);
1303 
1304 		if (newvecp == NULL) {
1305 			*result = EIO;
1306 			return (PSM_FAILURE);
1307 		}
1308 		newvecp->v_bound_cpuid = target;
1309 		hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1310 		    newvecp->v_vector);
1311 		break;
1312 
1313 	case PSM_INTR_OP_GET_INTR:
1314 		/*
1315 		 * The interrupt handle given here has been allocated
1316 		 * specifically for this command, and ih_private carries
1317 		 * a pointer to a apic_get_intr_t.
1318 		 */
1319 		if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1320 			return (PSM_FAILURE);
1321 		break;
1322 
1323 	case PSM_INTR_OP_CHECK_MSI:
1324 		/*
1325 		 * Check MSI/X is supported or not at APIC level and
1326 		 * masked off the MSI/X bits in hdlp->ih_type if not
1327 		 * supported before return.  If MSI/X is supported,
1328 		 * leave the ih_type unchanged and return.
1329 		 *
1330 		 * hdlp->ih_type passed in from the nexus has all the
1331 		 * interrupt types supported by the device.
1332 		 */
1333 		if (apic_support_msi == 0) {	/* uninitialized */
1334 			/*
1335 			 * if apic_support_msi is not set, call
1336 			 * apic_check_msi_support() to check whether msi
1337 			 * is supported first
1338 			 */
1339 			if (apic_check_msi_support() == PSM_SUCCESS)
1340 				apic_support_msi = 1;	/* supported */
1341 			else
1342 				apic_support_msi = -1;	/* not-supported */
1343 		}
1344 		if (apic_support_msi == 1) {
1345 			if (apic_msix_enable)
1346 				*result = hdlp->ih_type;
1347 			else
1348 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1349 		} else
1350 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1351 			    DDI_INTR_TYPE_MSIX);
1352 		break;
1353 	case PSM_INTR_OP_GET_CAP:
1354 		cap = DDI_INTR_FLAG_PENDING;
1355 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1356 			cap |= DDI_INTR_FLAG_MASKABLE;
1357 		*result = cap;
1358 		break;
1359 	case PSM_INTR_OP_APIC_TYPE:
1360 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1361 		    apix_get_apic_type();
1362 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1363 		    APIX_IPI_MIN;
1364 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1365 		    apic_nproc;
1366 		hdlp->ih_ver = apic_get_apic_version();
1367 		break;
1368 	case PSM_INTR_OP_SET_CAP:
1369 	default:
1370 		return (PSM_FAILURE);
1371 	}
1372 
1373 	return (PSM_SUCCESS);
1374 }
1375 
1376 static void
1377 apix_cleanup_busy(void)
1378 {
1379 	int i, j;
1380 	apix_vector_t *vecp;
1381 
1382 	for (i = 0; i < apic_nproc; i++) {
1383 		if (!apic_cpu_in_range(i))
1384 			continue;
1385 		apic_cpus[i].aci_busy = 0;
1386 		for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1387 			if ((vecp = xv_vector(i, j)) != NULL)
1388 				vecp->v_busy = 0;
1389 		}
1390 	}
1391 }
1392 
1393 static void
1394 apix_redistribute_compute(void)
1395 {
1396 	int	i, j, max_busy;
1397 
1398 	if (!apic_enable_dynamic_migration)
1399 		return;
1400 
1401 	if (++apic_nticks == apic_sample_factor_redistribution) {
1402 		/*
1403 		 * Time to call apic_intr_redistribute().
1404 		 * reset apic_nticks. This will cause max_busy
1405 		 * to be calculated below and if it is more than
1406 		 * apic_int_busy, we will do the whole thing
1407 		 */
1408 		apic_nticks = 0;
1409 	}
1410 	max_busy = 0;
1411 	for (i = 0; i < apic_nproc; i++) {
1412 		if (!apic_cpu_in_range(i))
1413 			continue;
1414 		/*
1415 		 * Check if curipl is non zero & if ISR is in
1416 		 * progress
1417 		 */
1418 		if (((j = apic_cpus[i].aci_curipl) != 0) &&
1419 		    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1420 
1421 			int	vect;
1422 			apic_cpus[i].aci_busy++;
1423 			vect = apic_cpus[i].aci_current[j];
1424 			apixs[i]->x_vectbl[vect]->v_busy++;
1425 		}
1426 
1427 		if (!apic_nticks &&
1428 		    (apic_cpus[i].aci_busy > max_busy))
1429 			max_busy = apic_cpus[i].aci_busy;
1430 	}
1431 	if (!apic_nticks) {
1432 		if (max_busy > apic_int_busy_mark) {
1433 		/*
1434 		 * We could make the following check be
1435 		 * skipped > 1 in which case, we get a
1436 		 * redistribution at half the busy mark (due to
1437 		 * double interval). Need to be able to collect
1438 		 * more empirical data to decide if that is a
1439 		 * good strategy. Punt for now.
1440 		 */
1441 			apix_cleanup_busy();
1442 			apic_skipped_redistribute = 0;
1443 		} else
1444 			apic_skipped_redistribute++;
1445 	}
1446 }
1447 
1448 /*
1449  * intr_ops() service routines
1450  */
1451 
1452 static int
1453 apix_get_pending(apix_vector_t *vecp)
1454 {
1455 	int bit, index, irr, pending;
1456 
1457 	/* need to get on the bound cpu */
1458 	mutex_enter(&cpu_lock);
1459 	affinity_set(vecp->v_cpuid);
1460 
1461 	index = vecp->v_vector / 32;
1462 	bit = vecp->v_vector % 32;
1463 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1464 
1465 	affinity_clear();
1466 	mutex_exit(&cpu_lock);
1467 
1468 	pending = (irr & (1 << bit)) ? 1 : 0;
1469 	if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1470 		pending = apix_intx_get_pending(vecp->v_inum);
1471 
1472 	return (pending);
1473 }
1474 
1475 static apix_vector_t *
1476 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1477 {
1478 	apix_vector_t *vecp;
1479 	processorid_t cpuid;
1480 	int32_t virt_vec = 0;
1481 
1482 	switch (flags & PSMGI_INTRBY_FLAGS) {
1483 	case PSMGI_INTRBY_IRQ:
1484 		return (apix_intx_get_vector(hdlp->ih_vector));
1485 	case PSMGI_INTRBY_VEC:
1486 		virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1487 
1488 		cpuid = APIX_VIRTVEC_CPU(virt_vec);
1489 		if (!apic_cpu_in_range(cpuid))
1490 			return (NULL);
1491 
1492 		vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1493 		break;
1494 	case PSMGI_INTRBY_DEFAULT:
1495 		vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1496 		    hdlp->ih_type);
1497 		break;
1498 	default:
1499 		return (NULL);
1500 	}
1501 
1502 	return (vecp);
1503 }
1504 
1505 static int
1506 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1507     apic_get_intr_t *intr_params_p)
1508 {
1509 	apix_vector_t *vecp;
1510 	struct autovec *av_dev;
1511 	int i;
1512 
1513 	vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1514 	if (IS_VECT_FREE(vecp)) {
1515 		intr_params_p->avgi_num_devs = 0;
1516 		intr_params_p->avgi_cpu_id = 0;
1517 		intr_params_p->avgi_req_flags = 0;
1518 		return (PSM_SUCCESS);
1519 	}
1520 
1521 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1522 		intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1523 
1524 		/* Return user bound info for intrd. */
1525 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1526 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1527 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1528 		}
1529 	}
1530 
1531 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1532 		intr_params_p->avgi_vector = vecp->v_vector;
1533 
1534 	if (intr_params_p->avgi_req_flags &
1535 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1536 		/* Get number of devices from apic_irq table shared field. */
1537 		intr_params_p->avgi_num_devs = vecp->v_share;
1538 
1539 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
1540 
1541 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
1542 
1543 		/* Some devices have NULL dip.  Don't count these. */
1544 		if (intr_params_p->avgi_num_devs > 0) {
1545 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1546 			    av_dev = av_dev->av_link) {
1547 				if (av_dev->av_vector && av_dev->av_dip)
1548 					i++;
1549 			}
1550 			intr_params_p->avgi_num_devs =
1551 			    (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1552 		}
1553 
1554 		/* There are no viable dips to return. */
1555 		if (intr_params_p->avgi_num_devs == 0) {
1556 			intr_params_p->avgi_dip_list = NULL;
1557 
1558 		} else {	/* Return list of dips */
1559 
1560 			/* Allocate space in array for that number of devs. */
1561 			intr_params_p->avgi_dip_list = kmem_zalloc(
1562 			    intr_params_p->avgi_num_devs *
1563 			    sizeof (dev_info_t *),
1564 			    KM_NOSLEEP);
1565 			if (intr_params_p->avgi_dip_list == NULL) {
1566 				DDI_INTR_IMPLDBG((CE_WARN,
1567 				    "apix_get_vector_intr_info: no memory"));
1568 				return (PSM_FAILURE);
1569 			}
1570 
1571 			/*
1572 			 * Loop through the device list of the autovec table
1573 			 * filling in the dip array.
1574 			 *
1575 			 * Note that the autovect table may have some special
1576 			 * entries which contain NULL dips.  These will be
1577 			 * ignored.
1578 			 */
1579 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1580 			    av_dev = av_dev->av_link) {
1581 				if (av_dev->av_vector && av_dev->av_dip)
1582 					intr_params_p->avgi_dip_list[i++] =
1583 					    av_dev->av_dip;
1584 			}
1585 		}
1586 	}
1587 
1588 	return (PSM_SUCCESS);
1589 }
1590 
1591 static char *
1592 apix_get_apic_type(void)
1593 {
1594 	return (apix_psm_info.p_mach_idstring);
1595 }
1596 
1597 apix_vector_t *
1598 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1599 {
1600 	apix_vector_t *newp = NULL;
1601 	dev_info_t *dip;
1602 	int inum, cap_ptr;
1603 	ddi_acc_handle_t handle;
1604 	ddi_intr_msix_t *msix_p = NULL;
1605 	ushort_t msix_ctrl;
1606 	uintptr_t off;
1607 	uint32_t mask;
1608 
1609 	ASSERT(LOCK_HELD(&apix_lock));
1610 	*result = ENXIO;
1611 
1612 	/* Fail if this is an MSI intr and is part of a group. */
1613 	if (vecp->v_type == APIX_TYPE_MSI) {
1614 		if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1615 			return (NULL);
1616 		else
1617 			return (apix_grp_set_cpu(vecp, new_cpu, result));
1618 	}
1619 
1620 	/*
1621 	 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1622 	 */
1623 	if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1624 		if ((dip = APIX_GET_DIP(vecp)) == NULL)
1625 			return (NULL);
1626 		inum = vecp->v_devp->dv_inum;
1627 
1628 		handle = i_ddi_get_pci_config_handle(dip);
1629 		cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1630 		msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1631 		if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1632 			/*
1633 			 * Function is not masked, then mask "inum"th
1634 			 * entry in the MSI-X table
1635 			 */
1636 			msix_p = i_ddi_get_msix(dip);
1637 			off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1638 			    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1639 			mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1640 			ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1641 			    mask | 1);
1642 		}
1643 	}
1644 
1645 	*result = 0;
1646 	if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1647 		*result = EIO;
1648 
1649 	/* Restore mask bit */
1650 	if (msix_p != NULL)
1651 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1652 
1653 	return (newp);
1654 }
1655 
1656 /*
1657  * Set cpu for MSIs
1658  */
1659 apix_vector_t *
1660 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1661 {
1662 	apix_vector_t *newp, *vp;
1663 	uint32_t orig_cpu = vecp->v_cpuid;
1664 	int orig_vect = vecp->v_vector;
1665 	int i, num_vectors, cap_ptr, msi_mask_off;
1666 	uint32_t msi_pvm;
1667 	ushort_t msi_ctrl;
1668 	ddi_acc_handle_t handle;
1669 	dev_info_t *dip;
1670 
1671 	APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1672 	    " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1673 
1674 	ASSERT(LOCK_HELD(&apix_lock));
1675 
1676 	*result = ENXIO;
1677 
1678 	if (vecp->v_type != APIX_TYPE_MSI) {
1679 		DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1680 		return (NULL);
1681 	}
1682 
1683 	if ((dip = APIX_GET_DIP(vecp)) == NULL)
1684 		return (NULL);
1685 
1686 	num_vectors = i_ddi_intr_get_current_nintrs(dip);
1687 	if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1688 		APIC_VERBOSE(INTR, (CE_WARN,
1689 		    "set_grp: base vec not part of a grp or not aligned: "
1690 		    "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1691 		return (NULL);
1692 	}
1693 
1694 	if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1695 		return (NULL);
1696 
1697 	*result = EIO;
1698 	for (i = 1; i < num_vectors; i++) {
1699 		if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1700 			return (NULL);
1701 #ifdef DEBUG
1702 		/*
1703 		 * Sanity check: CPU and dip is the same for all entries.
1704 		 * May be called when first msi to be enabled, at this time
1705 		 * add_avintr() is not called for other msi
1706 		 */
1707 		if ((vp->v_share != 0) &&
1708 		    ((APIX_GET_DIP(vp) != dip) ||
1709 		    (vp->v_cpuid != vecp->v_cpuid))) {
1710 			APIC_VERBOSE(INTR, (CE_WARN,
1711 			    "set_grp: cpu or dip for vec 0x%x difft than for "
1712 			    "vec 0x%x\n", orig_vect, orig_vect + i));
1713 			APIC_VERBOSE(INTR, (CE_WARN,
1714 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1715 			    vp->v_cpuid, (void *)dip,
1716 			    (void *)APIX_GET_DIP(vp)));
1717 			return (NULL);
1718 		}
1719 #endif /* DEBUG */
1720 	}
1721 
1722 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1723 	handle = i_ddi_get_pci_config_handle(dip);
1724 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1725 
1726 	/* MSI Per vector masking is supported. */
1727 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1728 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
1729 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1730 		else
1731 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1732 		msi_pvm = pci_config_get32(handle, msi_mask_off);
1733 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1734 		APIC_VERBOSE(INTR, (CE_CONT,
1735 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
1736 		    pci_config_get32(handle, msi_mask_off)));
1737 	}
1738 
1739 	if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1740 		*result = 0;
1741 
1742 	/* Reenable vectors if per vector masking is supported. */
1743 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1744 		pci_config_put32(handle, msi_mask_off, msi_pvm);
1745 		APIC_VERBOSE(INTR, (CE_CONT,
1746 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
1747 		    pci_config_get32(handle, msi_mask_off)));
1748 	}
1749 
1750 	return (newp);
1751 }
1752 
1753 void
1754 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1755 {
1756 	apic_irq_t *irqp;
1757 
1758 	mutex_enter(&airq_mutex);
1759 	irqp = apic_irq_table[irqno];
1760 	irqp->airq_cpu = cpuid;
1761 	irqp->airq_vector = vector;
1762 	apic_record_rdt_entry(irqp, irqno);
1763 	mutex_exit(&airq_mutex);
1764 }
1765 
1766 apix_vector_t *
1767 apix_intx_get_vector(int irqno)
1768 {
1769 	apic_irq_t *irqp;
1770 	uint32_t cpuid;
1771 	uchar_t vector;
1772 
1773 	mutex_enter(&airq_mutex);
1774 	irqp = apic_irq_table[irqno & 0xff];
1775 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1776 		mutex_exit(&airq_mutex);
1777 		return (NULL);
1778 	}
1779 	cpuid = irqp->airq_cpu;
1780 	vector = irqp->airq_vector;
1781 	mutex_exit(&airq_mutex);
1782 
1783 	return (xv_vector(cpuid, vector));
1784 }
1785 
1786 /*
1787  * Must called with interrupts disabled and apic_ioapic_lock held
1788  */
1789 void
1790 apix_intx_enable(int irqno)
1791 {
1792 	uchar_t ioapicindex, intin;
1793 	apic_irq_t *irqp = apic_irq_table[irqno];
1794 	ioapic_rdt_t irdt;
1795 	apic_cpus_info_t *cpu_infop;
1796 	apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1797 
1798 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1799 
1800 	ioapicindex = irqp->airq_ioapicindex;
1801 	intin = irqp->airq_intin_no;
1802 	cpu_infop =  &apic_cpus[irqp->airq_cpu];
1803 
1804 	irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1805 	irdt.ir_hi = cpu_infop->aci_local_id;
1806 
1807 	apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1808 	    vecp->v_type, 1, ioapicindex);
1809 	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1810 	    (void *)&irdt, vecp->v_type, 1);
1811 	apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1812 
1813 	/* write RDT entry high dword - destination */
1814 	WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1815 	    irdt.ir_hi);
1816 
1817 	/* Write the vector, trigger, and polarity portion of the RDT */
1818 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1819 
1820 	vecp->v_state = APIX_STATE_ENABLED;
1821 
1822 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1823 	    " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1824 	    ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1825 }
1826 
1827 /*
1828  * Must called with interrupts disabled and apic_ioapic_lock held
1829  */
1830 void
1831 apix_intx_disable(int irqno)
1832 {
1833 	apic_irq_t *irqp = apic_irq_table[irqno];
1834 	int ioapicindex, intin;
1835 
1836 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1837 	/*
1838 	 * The assumption here is that this is safe, even for
1839 	 * systems with IOAPICs that suffer from the hardware
1840 	 * erratum because all devices have been quiesced before
1841 	 * they unregister their interrupt handlers.  If that
1842 	 * assumption turns out to be false, this mask operation
1843 	 * can induce the same erratum result we're trying to
1844 	 * avoid.
1845 	 */
1846 	ioapicindex = irqp->airq_ioapicindex;
1847 	intin = irqp->airq_intin_no;
1848 	ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1849 
1850 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1851 	    " intin 0x%x\n", ioapicindex, intin));
1852 }
1853 
1854 void
1855 apix_intx_free(int irqno)
1856 {
1857 	apic_irq_t *irqp;
1858 
1859 	mutex_enter(&airq_mutex);
1860 	irqp = apic_irq_table[irqno];
1861 
1862 	if (IS_IRQ_FREE(irqp)) {
1863 		mutex_exit(&airq_mutex);
1864 		return;
1865 	}
1866 
1867 	irqp->airq_mps_intr_index = FREE_INDEX;
1868 	irqp->airq_cpu = IRQ_UNINIT;
1869 	irqp->airq_vector = APIX_INVALID_VECT;
1870 	mutex_exit(&airq_mutex);
1871 }
1872 
1873 #ifdef DEBUG
1874 int apix_intr_deliver_timeouts = 0;
1875 int apix_intr_rirr_timeouts = 0;
1876 int apix_intr_rirr_reset_failure = 0;
1877 #endif
1878 int apix_max_reps_irr_pending = 10;
1879 
1880 #define	GET_RDT_BITS(ioapic, intin, bits)	\
1881 	(READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1882 #define	APIX_CHECK_IRR_DELAY	drv_usectohz(5000)
1883 
1884 int
1885 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1886 {
1887 	apic_irq_t *irqp = apic_irq_table[irqno];
1888 	ulong_t iflag;
1889 	int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1890 
1891 	ASSERT(irqp != NULL);
1892 
1893 	iflag = intr_clear();
1894 	lock_set(&apic_ioapic_lock);
1895 
1896 	ioapic_ix = irqp->airq_ioapicindex;
1897 	intin_no = irqp->airq_intin_no;
1898 	level = apic_level_intr[irqno];
1899 
1900 	/*
1901 	 * Wait for the delivery status bit to be cleared. This should
1902 	 * be a very small amount of time.
1903 	 */
1904 	repeats = 0;
1905 	do {
1906 		repeats++;
1907 
1908 		for (waited = 0; waited < apic_max_reps_clear_pending;
1909 		    waited++) {
1910 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1911 				break;
1912 		}
1913 		if (!level)
1914 			break;
1915 
1916 		/*
1917 		 * Mask the RDT entry for level-triggered interrupts.
1918 		 */
1919 		irqp->airq_rdt_entry |= AV_MASK;
1920 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1921 		    intin_no);
1922 		if ((masked = (rdt_entry & AV_MASK)) == 0) {
1923 			/* Mask it */
1924 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1925 			    AV_MASK | rdt_entry);
1926 		}
1927 
1928 		/*
1929 		 * If there was a race and an interrupt was injected
1930 		 * just before we masked, check for that case here.
1931 		 * Then, unmask the RDT entry and try again.  If we're
1932 		 * on our last try, don't unmask (because we want the
1933 		 * RDT entry to remain masked for the rest of the
1934 		 * function).
1935 		 */
1936 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1937 		    intin_no);
1938 		if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1939 		    (repeats < apic_max_reps_clear_pending)) {
1940 			/* Unmask it */
1941 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1942 			    intin_no, rdt_entry & ~AV_MASK);
1943 			irqp->airq_rdt_entry &= ~AV_MASK;
1944 		}
1945 	} while ((rdt_entry & AV_PENDING) &&
1946 	    (repeats < apic_max_reps_clear_pending));
1947 
1948 #ifdef DEBUG
1949 	if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1950 		apix_intr_deliver_timeouts++;
1951 #endif
1952 
1953 	if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1954 		goto done;
1955 
1956 	/*
1957 	 * wait for remote IRR to be cleared for level-triggered
1958 	 * interrupts
1959 	 */
1960 	repeats = 0;
1961 	do {
1962 		repeats++;
1963 
1964 		for (waited = 0; waited < apic_max_reps_clear_pending;
1965 		    waited++) {
1966 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1967 			    == 0)
1968 				break;
1969 		}
1970 
1971 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1972 			lock_clear(&apic_ioapic_lock);
1973 			intr_restore(iflag);
1974 
1975 			delay(APIX_CHECK_IRR_DELAY);
1976 
1977 			iflag = intr_clear();
1978 			lock_set(&apic_ioapic_lock);
1979 		}
1980 	} while (repeats < apix_max_reps_irr_pending);
1981 
1982 	if (repeats >= apix_max_reps_irr_pending) {
1983 #ifdef DEBUG
1984 		apix_intr_rirr_timeouts++;
1985 #endif
1986 
1987 		/*
1988 		 * If we waited and the Remote IRR bit is still not cleared,
1989 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1990 		 * times for this interrupt, try the last-ditch workaround:
1991 		 */
1992 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1993 			/*
1994 			 * Trying to clear the bit through normal
1995 			 * channels has failed.  So as a last-ditch
1996 			 * effort, try to set the trigger mode to
1997 			 * edge, then to level.  This has been
1998 			 * observed to work on many systems.
1999 			 */
2000 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2001 			    intin_no,
2002 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2003 			    intin_no) & ~AV_LEVEL);
2004 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2005 			    intin_no,
2006 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2007 			    intin_no) | AV_LEVEL);
2008 		}
2009 
2010 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2011 #ifdef DEBUG
2012 			apix_intr_rirr_reset_failure++;
2013 #endif
2014 			lock_clear(&apic_ioapic_lock);
2015 			intr_restore(iflag);
2016 			prom_printf("apix: Remote IRR still "
2017 			    "not clear for IOAPIC %d intin %d.\n"
2018 			    "\tInterrupts to this pin may cease "
2019 			    "functioning.\n", ioapic_ix, intin_no);
2020 			return (1);	/* return failure */
2021 		}
2022 	}
2023 
2024 done:
2025 	/* change apic_irq_table */
2026 	lock_clear(&apic_ioapic_lock);
2027 	intr_restore(iflag);
2028 	apix_intx_set_vector(irqno, cpuid, vector);
2029 	iflag = intr_clear();
2030 	lock_set(&apic_ioapic_lock);
2031 
2032 	/* reprogramme IO-APIC RDT entry */
2033 	apix_intx_enable(irqno);
2034 
2035 	lock_clear(&apic_ioapic_lock);
2036 	intr_restore(iflag);
2037 
2038 	return (0);
2039 }
2040 
2041 static int
2042 apix_intx_get_pending(int irqno)
2043 {
2044 	apic_irq_t *irqp;
2045 	int intin, ioapicindex, pending;
2046 	ulong_t iflag;
2047 
2048 	mutex_enter(&airq_mutex);
2049 	irqp = apic_irq_table[irqno];
2050 	if (IS_IRQ_FREE(irqp)) {
2051 		mutex_exit(&airq_mutex);
2052 		return (0);
2053 	}
2054 
2055 	/* check IO-APIC delivery status */
2056 	intin = irqp->airq_intin_no;
2057 	ioapicindex = irqp->airq_ioapicindex;
2058 	mutex_exit(&airq_mutex);
2059 
2060 	iflag = intr_clear();
2061 	lock_set(&apic_ioapic_lock);
2062 
2063 	pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2064 	    AV_PENDING) ? 1 : 0;
2065 
2066 	lock_clear(&apic_ioapic_lock);
2067 	intr_restore(iflag);
2068 
2069 	return (pending);
2070 }
2071 
2072 /*
2073  * This function will mask the interrupt on the I/O APIC
2074  */
2075 static void
2076 apix_intx_set_mask(int irqno)
2077 {
2078 	int intin, ioapixindex, rdt_entry;
2079 	ulong_t iflag;
2080 	apic_irq_t *irqp;
2081 
2082 	mutex_enter(&airq_mutex);
2083 	irqp = apic_irq_table[irqno];
2084 
2085 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2086 
2087 	intin = irqp->airq_intin_no;
2088 	ioapixindex = irqp->airq_ioapicindex;
2089 	mutex_exit(&airq_mutex);
2090 
2091 	iflag = intr_clear();
2092 	lock_set(&apic_ioapic_lock);
2093 
2094 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2095 
2096 	/* clear mask */
2097 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2098 	    (AV_MASK | rdt_entry));
2099 
2100 	lock_clear(&apic_ioapic_lock);
2101 	intr_restore(iflag);
2102 }
2103 
2104 /*
2105  * This function will clear the mask for the interrupt on the I/O APIC
2106  */
2107 static void
2108 apix_intx_clear_mask(int irqno)
2109 {
2110 	int intin, ioapixindex, rdt_entry;
2111 	ulong_t iflag;
2112 	apic_irq_t *irqp;
2113 
2114 	mutex_enter(&airq_mutex);
2115 	irqp = apic_irq_table[irqno];
2116 
2117 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2118 
2119 	intin = irqp->airq_intin_no;
2120 	ioapixindex = irqp->airq_ioapicindex;
2121 	mutex_exit(&airq_mutex);
2122 
2123 	iflag = intr_clear();
2124 	lock_set(&apic_ioapic_lock);
2125 
2126 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2127 
2128 	/* clear mask */
2129 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2130 	    ((~AV_MASK) & rdt_entry));
2131 
2132 	lock_clear(&apic_ioapic_lock);
2133 	intr_restore(iflag);
2134 }
2135 
2136 /*
2137  * For level-triggered interrupt, mask the IRQ line. Mask means
2138  * new interrupts will not be delivered. The interrupt already
2139  * accepted by a local APIC is not affected
2140  */
2141 void
2142 apix_level_intr_pre_eoi(int irq)
2143 {
2144 	apic_irq_t *irqp = apic_irq_table[irq];
2145 	int apic_ix, intin_ix;
2146 
2147 	if (irqp == NULL)
2148 		return;
2149 
2150 	ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2151 
2152 	lock_set(&apic_ioapic_lock);
2153 
2154 	intin_ix = irqp->airq_intin_no;
2155 	apic_ix = irqp->airq_ioapicindex;
2156 
2157 	if (irqp->airq_cpu != CPU->cpu_id) {
2158 		if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2159 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2160 		lock_clear(&apic_ioapic_lock);
2161 		return;
2162 	}
2163 
2164 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2165 		/*
2166 		 * This is a IOxAPIC and there is EOI register:
2167 		 * 	Change the vector to reserved unused vector, so that
2168 		 * 	the EOI	from Local APIC won't clear the Remote IRR for
2169 		 * 	this level trigger interrupt. Instead, we'll manually
2170 		 * 	clear it in apix_post_hardint() after ISR handling.
2171 		 */
2172 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2173 		    (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2174 	} else {
2175 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2176 		    AV_MASK | irqp->airq_rdt_entry);
2177 	}
2178 
2179 	lock_clear(&apic_ioapic_lock);
2180 }
2181 
2182 /*
2183  * For level-triggered interrupt, unmask the IRQ line
2184  * or restore the original vector number.
2185  */
2186 void
2187 apix_level_intr_post_dispatch(int irq)
2188 {
2189 	apic_irq_t *irqp = apic_irq_table[irq];
2190 	int apic_ix, intin_ix;
2191 
2192 	if (irqp == NULL)
2193 		return;
2194 
2195 	lock_set(&apic_ioapic_lock);
2196 
2197 	intin_ix = irqp->airq_intin_no;
2198 	apic_ix = irqp->airq_ioapicindex;
2199 
2200 	if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2201 		/*
2202 		 * Already sent EOI back to Local APIC.
2203 		 * Send EOI to IO-APIC
2204 		 */
2205 		ioapic_write_eoi(apic_ix, irqp->airq_vector);
2206 	} else {
2207 		/* clear the mask or restore the vector */
2208 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2209 		    irqp->airq_rdt_entry);
2210 
2211 		/* send EOI to IOxAPIC */
2212 		if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2213 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2214 	}
2215 
2216 	lock_clear(&apic_ioapic_lock);
2217 }
2218 
2219 static int
2220 apix_intx_get_shared(int irqno)
2221 {
2222 	apic_irq_t *irqp;
2223 	int share;
2224 
2225 	mutex_enter(&airq_mutex);
2226 	irqp = apic_irq_table[irqno];
2227 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2228 		mutex_exit(&airq_mutex);
2229 		return (0);
2230 	}
2231 	share = irqp->airq_share;
2232 	mutex_exit(&airq_mutex);
2233 
2234 	return (share);
2235 }
2236 
2237 static void
2238 apix_intx_set_shared(int irqno, int delta)
2239 {
2240 	apic_irq_t *irqp;
2241 
2242 	mutex_enter(&airq_mutex);
2243 	irqp = apic_irq_table[irqno];
2244 	if (IS_IRQ_FREE(irqp)) {
2245 		mutex_exit(&airq_mutex);
2246 		return;
2247 	}
2248 	irqp->airq_share += delta;
2249 	mutex_exit(&airq_mutex);
2250 }
2251 
2252 /*
2253  * Setup IRQ table. Return IRQ no or -1 on failure
2254  */
2255 static int
2256 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2257     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2258 {
2259 	int origirq = ispec->intrspec_vec;
2260 	int newirq;
2261 	short intr_index;
2262 	uchar_t ipin, ioapic, ioapicindex;
2263 	apic_irq_t *irqp;
2264 
2265 	UNREFERENCED_1PARAMETER(inum);
2266 
2267 	if (intrp != NULL) {
2268 		intr_index = (short)(intrp - apic_io_intrp);
2269 		ioapic = intrp->intr_destid;
2270 		ipin = intrp->intr_destintin;
2271 
2272 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
2273 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2274 			if (apic_io_id[ioapicindex] == ioapic)
2275 				break;
2276 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2277 		    (ioapic == INTR_ALL_APIC));
2278 
2279 		/* check whether this intin# has been used by another irqno */
2280 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2281 			return (newirq);
2282 
2283 	} else if (iflagp != NULL) {	/* ACPI */
2284 		intr_index = ACPI_INDEX;
2285 		ioapicindex = acpi_find_ioapic(irqno);
2286 		ASSERT(ioapicindex != 0xFF);
2287 		ioapic = apic_io_id[ioapicindex];
2288 		ipin = irqno - apic_io_vectbase[ioapicindex];
2289 
2290 		if (apic_irq_table[irqno] &&
2291 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2292 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2293 			    apic_irq_table[irqno]->airq_ioapicindex ==
2294 			    ioapicindex);
2295 			return (irqno);
2296 		}
2297 
2298 	} else {	/* default configuration */
2299 		intr_index = DEFAULT_INDEX;
2300 		ioapicindex = 0;
2301 		ioapic = apic_io_id[ioapicindex];
2302 		ipin = (uchar_t)irqno;
2303 	}
2304 
2305 	/* allocate a new IRQ no */
2306 	if ((irqp = apic_irq_table[irqno]) == NULL) {
2307 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2308 		apic_irq_table[irqno] = irqp;
2309 	} else {
2310 		if (irqp->airq_mps_intr_index != FREE_INDEX) {
2311 			newirq = apic_allocate_irq(apic_first_avail_irq);
2312 			if (newirq == -1) {
2313 				return (-1);
2314 			}
2315 			irqno = newirq;
2316 			irqp = apic_irq_table[irqno];
2317 			ASSERT(irqp != NULL);
2318 		}
2319 	}
2320 	apic_max_device_irq = max(irqno, apic_max_device_irq);
2321 	apic_min_device_irq = min(irqno, apic_min_device_irq);
2322 
2323 	irqp->airq_mps_intr_index = intr_index;
2324 	irqp->airq_ioapicindex = ioapicindex;
2325 	irqp->airq_intin_no = ipin;
2326 	irqp->airq_dip = dip;
2327 	irqp->airq_origirq = (uchar_t)origirq;
2328 	if (iflagp != NULL)
2329 		irqp->airq_iflag = *iflagp;
2330 	irqp->airq_cpu = IRQ_UNINIT;
2331 	irqp->airq_vector = 0;
2332 
2333 	return (irqno);
2334 }
2335 
2336 /*
2337  * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2338  */
2339 static int
2340 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2341     struct intrspec *ispec)
2342 {
2343 	int irqno = ispec->intrspec_vec;
2344 	int newirq, i;
2345 	iflag_t intr_flag;
2346 	ACPI_SUBTABLE_HEADER	*hp;
2347 	ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2348 	struct apic_io_intr *intrp;
2349 
2350 	if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2351 		int busid;
2352 
2353 		if (bustype == 0)
2354 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2355 
2356 		/* loop checking BUS_ISA/BUS_EISA */
2357 		for (i = 0; i < 2; i++) {
2358 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
2359 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2360 			    != NULL)) {
2361 				return (apix_intx_setup(dip, inum, irqno,
2362 				    intrp, ispec, NULL));
2363 			}
2364 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2365 		}
2366 
2367 		/* fall back to default configuration */
2368 		return (-1);
2369 	}
2370 
2371 	/* search iso entries first */
2372 	if (acpi_iso_cnt != 0) {
2373 		hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2374 		i = 0;
2375 		while (i < acpi_iso_cnt) {
2376 			if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2377 				isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2378 				if (isop->Bus == 0 &&
2379 				    isop->SourceIrq == irqno) {
2380 					newirq = isop->GlobalIrq;
2381 					intr_flag.intr_po = isop->IntiFlags &
2382 					    ACPI_MADT_POLARITY_MASK;
2383 					intr_flag.intr_el = (isop->IntiFlags &
2384 					    ACPI_MADT_TRIGGER_MASK) >> 2;
2385 					intr_flag.bustype = BUS_ISA;
2386 
2387 					return (apix_intx_setup(dip, inum,
2388 					    newirq, NULL, ispec, &intr_flag));
2389 				}
2390 				i++;
2391 			}
2392 			hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2393 			    hp->Length);
2394 		}
2395 	}
2396 	intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2397 	intr_flag.intr_el = INTR_EL_EDGE;
2398 	intr_flag.bustype = BUS_ISA;
2399 	return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2400 }
2401 
2402 
2403 /*
2404  * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2405  */
2406 static int
2407 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2408     struct intrspec *ispec)
2409 {
2410 	int busid, devid, pci_irq;
2411 	ddi_acc_handle_t cfg_handle;
2412 	uchar_t ipin;
2413 	iflag_t intr_flag;
2414 	struct apic_io_intr *intrp;
2415 
2416 	if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2417 		return (-1);
2418 
2419 	if (busid == 0 && apic_pci_bus_total == 1)
2420 		busid = (int)apic_single_pci_busid;
2421 
2422 	if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2423 		return (-1);
2424 	ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2425 	pci_config_teardown(&cfg_handle);
2426 
2427 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {	/* ACPI */
2428 		if (apic_acpi_translate_pci_irq(dip, busid, devid,
2429 		    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2430 			return (-1);
2431 
2432 		intr_flag.bustype = (uchar_t)bustype;
2433 		return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2434 		    &intr_flag));
2435 	}
2436 
2437 	/* MP configuration table */
2438 	pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2439 	if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2440 		pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2441 		if (pci_irq == -1)
2442 			return (-1);
2443 	}
2444 
2445 	return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2446 }
2447 
2448 /*
2449  * Translate and return IRQ no
2450  */
2451 static int
2452 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2453 {
2454 	int newirq, irqno = ispec->intrspec_vec;
2455 	int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2456 	int bustype = 0, dev_len;
2457 	char dev_type[16];
2458 
2459 	if (apic_defconf) {
2460 		mutex_enter(&airq_mutex);
2461 		goto defconf;
2462 	}
2463 
2464 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2465 		mutex_enter(&airq_mutex);
2466 		goto nonpci;
2467 	}
2468 
2469 	/*
2470 	 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2471 	 * to avoid extra buffer allocation.
2472 	 */
2473 	dev_len = sizeof (dev_type);
2474 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2475 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2476 	    &dev_len) == DDI_PROP_SUCCESS) {
2477 		if ((strcmp(dev_type, "pci") == 0) ||
2478 		    (strcmp(dev_type, "pciex") == 0))
2479 			parent_is_pci_or_pciex = 1;
2480 	}
2481 
2482 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2483 	    DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2484 	    &dev_len) == DDI_PROP_SUCCESS) {
2485 		if (strstr(dev_type, "pciex"))
2486 			child_is_pciex = 1;
2487 	}
2488 
2489 	mutex_enter(&airq_mutex);
2490 
2491 	if (parent_is_pci_or_pciex) {
2492 		bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2493 		newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2494 		if (newirq != -1)
2495 			goto done;
2496 		bustype = 0;
2497 	} else if (strcmp(dev_type, "isa") == 0)
2498 		bustype = BUS_ISA;
2499 	else if (strcmp(dev_type, "eisa") == 0)
2500 		bustype = BUS_EISA;
2501 
2502 nonpci:
2503 	newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2504 	if (newirq != -1)
2505 		goto done;
2506 
2507 defconf:
2508 	newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2509 	if (newirq == -1) {
2510 		mutex_exit(&airq_mutex);
2511 		return (-1);
2512 	}
2513 done:
2514 	ASSERT(apic_irq_table[newirq]);
2515 	mutex_exit(&airq_mutex);
2516 	return (newirq);
2517 }
2518 
2519 static int
2520 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2521 {
2522 	int irqno;
2523 	apix_vector_t *vecp;
2524 
2525 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2526 		return (0);
2527 
2528 	if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2529 		return (0);
2530 
2531 	DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2532 	    "irqno=0x%x cpuid=%d vector=0x%x\n",
2533 	    (void *)dip, ddi_driver_name(dip), irqno,
2534 	    vecp->v_cpuid, vecp->v_vector));
2535 
2536 	return (1);
2537 }
2538 
2539 /*
2540  * Return the vector number if the translated IRQ for this device
2541  * has a vector mapping setup. If no IRQ setup exists or no vector is
2542  * allocated to it then return 0.
2543  */
2544 static apix_vector_t *
2545 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2546 {
2547 	int irqno;
2548 	apix_vector_t *vecp;
2549 
2550 	/* get the IRQ number */
2551 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2552 		return (NULL);
2553 
2554 	/* get the vector number if a vector is allocated to this irqno */
2555 	vecp = apix_intx_get_vector(irqno);
2556 
2557 	return (vecp);
2558 }
2559 
2560 /* stub function */
2561 int
2562 apix_loaded(void)
2563 {
2564 	return (apix_is_enabled);
2565 }
2566