xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix.c (revision a92282e44f968185a6bba094d1e5fece2da819cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  * Copyright 2018 Joyent, Inc.
29  */
30 
31 /*
32  * To understand how the apix module interacts with the interrupt subsystem read
33  * the theory statement in uts/i86pc/os/intr.c.
34  */
35 
36 /*
37  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
38  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
39  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
40  * PSMI 1.5 extensions are supported in Solaris Nevada.
41  * PSMI 1.6 extensions are supported in Solaris Nevada.
42  * PSMI 1.7 extensions are supported in Solaris Nevada.
43  */
44 #define	PSMI_1_7
45 
46 #include <sys/processor.h>
47 #include <sys/time.h>
48 #include <sys/psm.h>
49 #include <sys/smp_impldefs.h>
50 #include <sys/cram.h>
51 #include <sys/acpi/acpi.h>
52 #include <sys/acpica.h>
53 #include <sys/psm_common.h>
54 #include <sys/pit.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddi_impldefs.h>
58 #include <sys/pci.h>
59 #include <sys/promif.h>
60 #include <sys/x86_archext.h>
61 #include <sys/cpc_impl.h>
62 #include <sys/uadmin.h>
63 #include <sys/panic.h>
64 #include <sys/debug.h>
65 #include <sys/archsystm.h>
66 #include <sys/trap.h>
67 #include <sys/machsystm.h>
68 #include <sys/sysmacros.h>
69 #include <sys/cpuvar.h>
70 #include <sys/rm_platter.h>
71 #include <sys/privregs.h>
72 #include <sys/note.h>
73 #include <sys/pci_intr_lib.h>
74 #include <sys/spl.h>
75 #include <sys/clock.h>
76 #include <sys/cyclic.h>
77 #include <sys/dditypes.h>
78 #include <sys/sunddi.h>
79 #include <sys/x_call.h>
80 #include <sys/reboot.h>
81 #include <sys/mach_intr.h>
82 #include <sys/apix.h>
83 #include <sys/apix_irm_impl.h>
84 
85 static int apix_probe();
86 static void apix_init();
87 static void apix_picinit(void);
88 static int apix_intr_enter(int, int *);
89 static void apix_intr_exit(int, int);
90 static void apix_setspl(int);
91 static int apix_disable_intr(processorid_t);
92 static void apix_enable_intr(processorid_t);
93 static int apix_get_clkvect(int);
94 static int apix_get_ipivect(int, int);
95 static void apix_post_cyclic_setup(void *);
96 static int apix_post_cpu_start();
97 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
98     psm_intr_op_t, int *);
99 
100 /*
101  * Helper functions for apix_intr_ops()
102  */
103 static void apix_redistribute_compute(void);
104 static int apix_get_pending(apix_vector_t *);
105 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
106 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
107 static char *apix_get_apic_type(void);
108 static int apix_intx_get_pending(int);
109 static void apix_intx_set_mask(int irqno);
110 static void apix_intx_clear_mask(int irqno);
111 static int apix_intx_get_shared(int irqno);
112 static void apix_intx_set_shared(int irqno, int delta);
113 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
114     struct intrspec *);
115 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
116 
117 extern int apic_clkinit(int);
118 
119 /* IRM initialization for APIX PSM module */
120 extern void apix_irm_init(void);
121 
122 extern int irm_enable;
123 
124 /*
125  *	Local static data
126  */
127 static struct	psm_ops apix_ops = {
128 	apix_probe,
129 
130 	apix_init,
131 	apix_picinit,
132 	apix_intr_enter,
133 	apix_intr_exit,
134 	apix_setspl,
135 	apix_addspl,
136 	apix_delspl,
137 	apix_disable_intr,
138 	apix_enable_intr,
139 	NULL,			/* psm_softlvl_to_irq */
140 	NULL,			/* psm_set_softintr */
141 
142 	apic_set_idlecpu,
143 	apic_unset_idlecpu,
144 
145 	apic_clkinit,
146 	apix_get_clkvect,
147 	NULL,			/* psm_hrtimeinit */
148 	apic_gethrtime,
149 
150 	apic_get_next_processorid,
151 	apic_cpu_start,
152 	apix_post_cpu_start,
153 	apic_shutdown,
154 	apix_get_ipivect,
155 	apic_send_ipi,
156 
157 	NULL,			/* psm_translate_irq */
158 	NULL,			/* psm_notify_error */
159 	NULL,			/* psm_notify_func */
160 	apic_timer_reprogram,
161 	apic_timer_enable,
162 	apic_timer_disable,
163 	apix_post_cyclic_setup,
164 	apic_preshutdown,
165 	apix_intr_ops,		/* Advanced DDI Interrupt framework */
166 	apic_state,		/* save, restore apic state for S3 */
167 	apic_cpu_ops,		/* CPU control interface. */
168 
169 	apic_get_pir_ipivect,
170 	apic_send_pir_ipi,
171 	apic_cmci_setup
172 };
173 
174 struct psm_ops *psmops = &apix_ops;
175 
176 static struct	psm_info apix_psm_info = {
177 	PSM_INFO_VER01_7,			/* version */
178 	PSM_OWN_EXCLUSIVE,			/* ownership */
179 	&apix_ops,				/* operation */
180 	APIX_NAME,				/* machine name */
181 	"apix MPv1.4 compatible",
182 };
183 
184 static void *apix_hdlp;
185 
186 static int apix_is_enabled = 0;
187 
188 /*
189  * apix_lock is used for cpu selection and vector re-binding
190  */
191 lock_t apix_lock;
192 apix_impl_t *apixs[NCPU];
193 /*
194  * Mapping between device interrupt and the allocated vector. Indexed
195  * by major number.
196  */
197 apix_dev_vector_t **apix_dev_vector;
198 /*
199  * Mapping between device major number and cpu id. It gets used
200  * when interrupt binding policy round robin with affinity is
201  * applied. With that policy, devices with the same major number
202  * will be bound to the same CPU.
203  */
204 processorid_t *apix_major_to_cpu;	/* major to cpu mapping */
205 kmutex_t apix_mutex;	/* for apix_dev_vector & apix_major_to_cpu */
206 
207 int apix_nipis = 16;	/* Maximum number of IPIs */
208 /*
209  * Maximum number of vectors in a CPU that can be used for interrupt
210  * allocation (including IPIs and the reserved vectors).
211  */
212 int apix_cpu_nvectors = APIX_NVECTOR;
213 
214 /* number of CPUs in power-on transition state */
215 static int apic_poweron_cnt = 0;
216 
217 /* gcpu.h */
218 
219 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
220 extern void apic_change_eoi();
221 
222 /*
223  *	This is the loadable module wrapper
224  */
225 
226 int
227 _init(void)
228 {
229 	if (apic_coarse_hrtime)
230 		apix_ops.psm_gethrtime = &apic_gettime;
231 	return (psm_mod_init(&apix_hdlp, &apix_psm_info));
232 }
233 
234 int
235 _fini(void)
236 {
237 	return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
238 }
239 
240 int
241 _info(struct modinfo *modinfop)
242 {
243 	return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
244 }
245 
246 static int
247 apix_probe()
248 {
249 	int rval;
250 
251 	if (apix_enable == 0)
252 		return (PSM_FAILURE);
253 
254 	/*
255 	 * FIXME Temporarily disable apix module on Xen HVM platform due to
256 	 * known hang during boot (see #3605).
257 	 *
258 	 * Please remove when/if the issue is resolved.
259 	 */
260 	if (get_hwenv() & HW_XEN_HVM)
261 		return (PSM_FAILURE);
262 
263 	if (apic_local_mode() == LOCAL_X2APIC) {
264 		/* x2APIC mode activated by BIOS, switch ops */
265 		apic_mode = LOCAL_X2APIC;
266 		apic_change_ops();
267 	}
268 
269 	rval = apic_probe_common(apix_psm_info.p_mach_idstring);
270 	if (rval == PSM_SUCCESS)
271 		apix_is_enabled = 1;
272 	else
273 		apix_is_enabled = 0;
274 	return (rval);
275 }
276 
277 /*
278  * Initialize the data structures needed by pcplusmpx module.
279  * Specifically, the data structures used by addspl() and delspl()
280  * routines.
281  */
282 static void
283 apix_softinit()
284 {
285 	int i, *iptr;
286 	apix_impl_t *hdlp;
287 	int nproc;
288 
289 	nproc = max(apic_nproc, apic_max_nproc);
290 
291 	hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
292 	for (i = 0; i < nproc; i++) {
293 		apixs[i] = &hdlp[i];
294 		apixs[i]->x_cpuid = i;
295 		LOCK_INIT_CLEAR(&apixs[i]->x_lock);
296 	}
297 
298 	/* cpu 0 is always up (for now) */
299 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
300 
301 	iptr = (int *)&apic_irq_table[0];
302 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
303 		apic_level_intr[i] = 0;
304 		*iptr++ = 0;
305 	}
306 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
307 
308 	apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
309 	    KM_SLEEP);
310 
311 	if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
312 		apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
313 		    KM_SLEEP);
314 		for (i = 0; i < devcnt; i++)
315 			apix_major_to_cpu[i] = IRQ_UNINIT;
316 	}
317 
318 	mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
319 }
320 
321 static int
322 apix_get_pending_spl(void)
323 {
324 	int cpuid = CPU->cpu_id;
325 
326 	return (bsrw_insn(apixs[cpuid]->x_intr_pending));
327 }
328 
329 static uintptr_t
330 apix_get_intr_handler(int cpu, short vec)
331 {
332 	apix_vector_t *apix_vector;
333 
334 	ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
335 	if (cpu >= apic_nproc || vec >= APIX_NVECTOR)
336 		return (0);
337 
338 	apix_vector = apixs[cpu]->x_vectbl[vec];
339 
340 	return ((uintptr_t)(apix_vector->v_autovect));
341 }
342 
343 static void
344 apix_init()
345 {
346 	extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
347 
348 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
349 
350 	do_interrupt_common = apix_do_interrupt;
351 	addintr = apix_add_avintr;
352 	remintr = apix_rem_avintr;
353 	get_pending_spl = apix_get_pending_spl;
354 	get_intr_handler = apix_get_intr_handler;
355 	psm_get_localapicid = apic_get_localapicid;
356 	psm_get_ioapicid = apic_get_ioapicid;
357 
358 	apix_softinit();
359 
360 #if !defined(__amd64)
361 	if (cpuid_have_cr8access(CPU))
362 		apic_have_32bit_cr8 = 1;
363 #endif
364 
365 	apic_pir_vect = apix_get_ipivect(XC_CPUPOKE_PIL, -1);
366 
367 	/*
368 	 * Initialize IRM pool parameters
369 	 */
370 	if (irm_enable) {
371 		int	i;
372 		int	lowest_irq;
373 		int	highest_irq;
374 
375 		/* number of CPUs present */
376 		apix_irminfo.apix_ncpus = apic_nproc;
377 		/* total number of entries in all of the IOAPICs present */
378 		lowest_irq = apic_io_vectbase[0];
379 		highest_irq = apic_io_vectend[0];
380 		for (i = 1; i < apic_io_max; i++) {
381 			if (apic_io_vectbase[i] < lowest_irq)
382 				lowest_irq = apic_io_vectbase[i];
383 			if (apic_io_vectend[i] > highest_irq)
384 				highest_irq = apic_io_vectend[i];
385 		}
386 		apix_irminfo.apix_ioapic_max_vectors =
387 		    highest_irq - lowest_irq + 1;
388 		/*
389 		 * Number of available per-CPU vectors excluding
390 		 * reserved vectors for Dtrace, int80, system-call,
391 		 * fast-trap, etc.
392 		 */
393 		apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
394 		    APIX_SW_RESERVED_VECTORS;
395 
396 		/* Number of vectors (pre) allocated (SCI and HPET) */
397 		apix_irminfo.apix_vectors_allocated = 0;
398 		if (apic_hpet_vect != -1)
399 			apix_irminfo.apix_vectors_allocated++;
400 		if (apic_sci_vect != -1)
401 			apix_irminfo.apix_vectors_allocated++;
402 	}
403 }
404 
405 static void
406 apix_init_intr()
407 {
408 	processorid_t	cpun = psm_get_cpu_id();
409 	uint_t nlvt;
410 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
411 	extern void cmi_cmci_trap(void);
412 
413 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
414 
415 	if (apic_mode == LOCAL_APIC) {
416 		/*
417 		 * We are running APIC in MMIO mode.
418 		 */
419 		if (apic_flat_model) {
420 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
421 			    APIC_FLAT_MODEL);
422 		} else {
423 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
424 			    APIC_CLUSTER_MODEL);
425 		}
426 
427 		apic_reg_ops->apic_write(APIC_DEST_REG,
428 		    AV_HIGH_ORDER >> cpun);
429 	}
430 
431 	if (apic_directed_EOI_supported()) {
432 		/*
433 		 * Setting the 12th bit in the Spurious Interrupt Vector
434 		 * Register suppresses broadcast EOIs generated by the local
435 		 * APIC. The suppression of broadcast EOIs happens only when
436 		 * interrupts are level-triggered.
437 		 */
438 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
439 	}
440 
441 	/* need to enable APIC before unmasking NMI */
442 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
443 
444 	/*
445 	 * Presence of an invalid vector with delivery mode AV_FIXED can
446 	 * cause an error interrupt, even if the entry is masked...so
447 	 * write a valid vector to LVT entries along with the mask bit
448 	 */
449 
450 	/* All APICs have timer and LINT0/1 */
451 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
452 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
453 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
454 
455 	/*
456 	 * On integrated APICs, the number of LVT entries is
457 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
458 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
459 	 */
460 
461 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
462 		nlvt = 3;
463 	} else {
464 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
465 		    0xFF) + 1;
466 	}
467 
468 	if (nlvt >= 5) {
469 		/* Enable performance counter overflow interrupt */
470 
471 		if (!is_x86_feature(x86_featureset, X86FSET_MSR))
472 			apic_enable_cpcovf_intr = 0;
473 		if (apic_enable_cpcovf_intr) {
474 			if (apic_cpcovf_vect == 0) {
475 				int ipl = APIC_PCINT_IPL;
476 
477 				apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
478 				ASSERT(apic_cpcovf_vect);
479 
480 				(void) add_avintr(NULL, ipl,
481 				    (avfunc)kcpc_hw_overflow_intr,
482 				    "apic pcint", apic_cpcovf_vect,
483 				    NULL, NULL, NULL, NULL);
484 				kcpc_hw_overflow_intr_installed = 1;
485 				kcpc_hw_enable_cpc_intr =
486 				    apic_cpcovf_mask_clear;
487 			}
488 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
489 			    apic_cpcovf_vect);
490 		}
491 	}
492 
493 	if (nlvt >= 6) {
494 		/* Only mask TM intr if the BIOS apparently doesn't use it */
495 
496 		uint32_t lvtval;
497 
498 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
499 		if (((lvtval & AV_MASK) == AV_MASK) ||
500 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
501 			apic_reg_ops->apic_write(APIC_THERM_VECT,
502 			    AV_MASK|APIC_RESV_IRQ);
503 		}
504 	}
505 
506 	/* Enable error interrupt */
507 
508 	if (nlvt >= 4 && apic_enable_error_intr) {
509 		if (apic_errvect == 0) {
510 			int ipl = 0xf;	/* get highest priority intr */
511 			apic_errvect = apix_get_ipivect(ipl, -1);
512 			ASSERT(apic_errvect);
513 			/*
514 			 * Not PSMI compliant, but we are going to merge
515 			 * with ON anyway
516 			 */
517 			(void) add_avintr(NULL, ipl,
518 			    (avfunc)apic_error_intr, "apic error intr",
519 			    apic_errvect, NULL, NULL, NULL, NULL);
520 		}
521 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
522 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
523 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
524 	}
525 
526 	/*
527 	 * Ensure a CMCI interrupt is allocated, regardless of whether it is
528 	 * enabled or not.
529 	 */
530 	if (apic_cmci_vect == 0) {
531 		const int ipl = 0x2;
532 		apic_cmci_vect = apix_get_ipivect(ipl, -1);
533 		ASSERT(apic_cmci_vect);
534 
535 		(void) add_avintr(NULL, ipl,
536 		    (avfunc)cmi_cmci_trap, "apic cmci intr",
537 		    apic_cmci_vect, NULL, NULL, NULL, NULL);
538 	}
539 
540 	apic_reg_ops->apic_write_task_reg(0);
541 }
542 
543 static void
544 apix_picinit(void)
545 {
546 	int i, j;
547 	uint_t isr;
548 
549 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
550 
551 	/*
552 	 * initialize interrupt remapping before apic
553 	 * hardware initialization
554 	 */
555 	apic_intrmap_init(apic_mode);
556 	if (apic_vt_ops == psm_vt_ops)
557 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
558 
559 	/*
560 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
561 	 * bit on without clearing it with EOI.  Since softint
562 	 * uses vector 0x20 to interrupt itself, so softint will
563 	 * not work on this machine.  In order to fix this problem
564 	 * a check is made to verify all the isr bits are clear.
565 	 * If not, EOIs are issued to clear the bits.
566 	 */
567 	for (i = 7; i >= 1; i--) {
568 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
569 		if (isr != 0)
570 			for (j = 0; ((j < 32) && (isr != 0)); j++)
571 				if (isr & (1 << j)) {
572 					apic_reg_ops->apic_write(
573 					    APIC_EOI_REG, 0);
574 					isr &= ~(1 << j);
575 					apic_error |= APIC_ERR_BOOT_EOI;
576 				}
577 	}
578 
579 	/* set a flag so we know we have run apic_picinit() */
580 	apic_picinit_called = 1;
581 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
582 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
583 	LOCK_INIT_CLEAR(&apic_error_lock);
584 	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
585 
586 	picsetup();	 /* initialise the 8259 */
587 
588 	/* add nmi handler - least priority nmi handler */
589 	LOCK_INIT_CLEAR(&apic_nmi_lock);
590 
591 	if (!psm_add_nmintr(0, apic_nmi_intr,
592 	    "apix NMI handler", (caddr_t)NULL))
593 		cmn_err(CE_WARN, "apix: Unable to add nmi handler");
594 
595 	apix_init_intr();
596 
597 	/* enable apic mode if imcr present */
598 	if (apic_imcrp) {
599 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
600 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
601 	}
602 
603 	ioapix_init_intr(IOAPIC_MASK);
604 
605 	/* setup global IRM pool if applicable */
606 	if (irm_enable)
607 		apix_irm_init();
608 }
609 
610 static __inline__ void
611 apix_send_eoi(void)
612 {
613 	if (apic_mode == LOCAL_APIC)
614 		LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
615 	else
616 		X2APIC_WRITE(APIC_EOI_REG, 0);
617 }
618 
619 /*
620  * platform_intr_enter
621  *
622  *	Called at the beginning of the interrupt service routine, but unlike
623  *	pcplusmp, does not mask interrupts. An EOI is given to the interrupt
624  *	controller to enable other HW interrupts but interrupts are still
625  *	masked by the IF flag.
626  *
627  *	Return -1 for spurious interrupts
628  *
629  */
630 static int
631 apix_intr_enter(int ipl, int *vectorp)
632 {
633 	struct cpu *cpu = CPU;
634 	uint32_t cpuid = CPU->cpu_id;
635 	apic_cpus_info_t *cpu_infop;
636 	uchar_t vector;
637 	apix_vector_t *vecp;
638 	int nipl = -1;
639 
640 	/*
641 	 * The real vector delivered is (*vectorp + 0x20), but our caller
642 	 * subtracts 0x20 from the vector before passing it to us.
643 	 * (That's why APIC_BASE_VECT is 0x20.)
644 	 */
645 	vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
646 
647 	cpu_infop = &apic_cpus[cpuid];
648 	if (vector == APIC_SPUR_INTR) {
649 		cpu_infop->aci_spur_cnt++;
650 		return (APIC_INT_SPURIOUS);
651 	}
652 
653 	vecp = xv_vector(cpuid, vector);
654 	if (vecp == NULL) {
655 		if (APIX_IS_FAKE_INTR(vector))
656 			nipl = apix_rebindinfo.i_pri;
657 		apix_send_eoi();
658 		return (nipl);
659 	}
660 	nipl = vecp->v_pri;
661 
662 	/* if interrupted by the clock, increment apic_nsec_since_boot */
663 	if (vector == (apic_clkvect + APIC_BASE_VECT)) {
664 		if (!apic_oneshot) {
665 			/* NOTE: this is not MT aware */
666 			apic_hrtime_stamp++;
667 			apic_nsec_since_boot += apic_nsec_per_intr;
668 			apic_hrtime_stamp++;
669 			last_count_read = apic_hertz_count;
670 			apix_redistribute_compute();
671 		}
672 
673 		apix_send_eoi();
674 
675 		return (nipl);
676 	}
677 
678 	ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
679 
680 	/* pre-EOI handling for level-triggered interrupts */
681 	if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
682 	    (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
683 		apix_level_intr_pre_eoi(vecp->v_inum);
684 
685 	/* send back EOI */
686 	apix_send_eoi();
687 
688 	cpu_infop->aci_current[nipl] = vector;
689 	if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
690 		cpu_infop->aci_curipl = (uchar_t)nipl;
691 		cpu_infop->aci_ISR_in_progress |= 1 << nipl;
692 	}
693 
694 #ifdef	DEBUG
695 	if (vector >= APIX_IPI_MIN)
696 		return (nipl);	/* skip IPI */
697 
698 	APIC_DEBUG_BUF_PUT(vector);
699 	APIC_DEBUG_BUF_PUT(vecp->v_inum);
700 	APIC_DEBUG_BUF_PUT(nipl);
701 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
702 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
703 		drv_usecwait(apic_stretch_interrupts);
704 #endif /* DEBUG */
705 
706 	return (nipl);
707 }
708 
709 /*
710  * Any changes made to this function must also change X2APIC
711  * version of intr_exit.
712  */
713 static void
714 apix_intr_exit(int prev_ipl, int arg2)
715 {
716 	int cpuid = psm_get_cpu_id();
717 	apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
718 	apix_impl_t *apixp = apixs[cpuid];
719 
720 	UNREFERENCED_1PARAMETER(arg2);
721 
722 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
723 	/* ISR above current pri could not be in progress */
724 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
725 
726 	if (apixp->x_obsoletes != NULL) {
727 		if (APIX_CPU_LOCK_HELD(cpuid))
728 			return;
729 
730 		APIX_ENTER_CPU_LOCK(cpuid);
731 		(void) apix_obsolete_vector(apixp->x_obsoletes);
732 		APIX_LEAVE_CPU_LOCK(cpuid);
733 	}
734 }
735 
736 /*
737  * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
738  * given ipl, but apix never uses the TPR and we never mask a subset of the
739  * interrupts. They are either all blocked by the IF flag or all can come in.
740  *
741  * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
742  * can come in if currently enabled by the IF flag. This table shows the state
743  * of the IF flag when we leave this function.
744  *
745  *    curr IF |	ipl == 15	ipl != 15
746  *    --------+---------------------------
747  *       0    |    0		    0
748  *       1    |    0		    1
749  */
750 static void
751 apix_setspl(int ipl)
752 {
753 	/*
754 	 * Interrupts at ipl above this cannot be in progress, so the following
755 	 * mask is ok.
756 	 */
757 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
758 
759 	if (ipl == XC_HI_PIL)
760 		cli();
761 }
762 
763 int
764 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
765 {
766 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
767 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
768 	apix_vector_t *vecp = xv_vector(cpuid, vector);
769 
770 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
771 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
772 
773 	if (vecp->v_type == APIX_TYPE_FIXED)
774 		apix_intx_set_shared(vecp->v_inum, 1);
775 
776 	/* There are more interrupts, so it's already been enabled */
777 	if (vecp->v_share > 1)
778 		return (PSM_SUCCESS);
779 
780 	/* return if it is not hardware interrupt */
781 	if (vecp->v_type == APIX_TYPE_IPI)
782 		return (PSM_SUCCESS);
783 
784 	/*
785 	 * if apix_picinit() has not been called yet, just return.
786 	 * At the end of apic_picinit(), we will call setup_io_intr().
787 	 */
788 	if (!apic_picinit_called)
789 		return (PSM_SUCCESS);
790 
791 	(void) apix_setup_io_intr(vecp);
792 
793 	return (PSM_SUCCESS);
794 }
795 
796 int
797 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
798 {
799 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
800 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
801 	apix_vector_t *vecp = xv_vector(cpuid, vector);
802 
803 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
804 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
805 
806 	if (vecp->v_type == APIX_TYPE_FIXED)
807 		apix_intx_set_shared(vecp->v_inum, -1);
808 
809 	/* There are more interrupts */
810 	if (vecp->v_share > 1)
811 		return (PSM_SUCCESS);
812 
813 	/* return if it is not hardware interrupt */
814 	if (vecp->v_type == APIX_TYPE_IPI)
815 		return (PSM_SUCCESS);
816 
817 	if (!apic_picinit_called) {
818 		cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
819 		    virtvec);
820 		return (PSM_SUCCESS);
821 	}
822 
823 	apix_disable_vector(vecp);
824 
825 	return (PSM_SUCCESS);
826 }
827 
828 /*
829  * Try and disable all interrupts. We just assign interrupts to other
830  * processors based on policy. If any were bound by user request, we
831  * let them continue and return failure. We do not bother to check
832  * for cache affinity while rebinding.
833  */
834 static int
835 apix_disable_intr(processorid_t cpun)
836 {
837 	apix_impl_t *apixp = apixs[cpun];
838 	apix_vector_t *vecp, *newp;
839 	int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
840 
841 	lock_set(&apix_lock);
842 
843 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
844 	apic_cpus[cpun].aci_curipl = 0;
845 
846 	/* if this is for SUSPEND operation, skip rebinding */
847 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
848 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
849 			vecp = apixp->x_vectbl[i];
850 			if (!IS_VECT_ENABLED(vecp))
851 				continue;
852 
853 			apix_disable_vector(vecp);
854 		}
855 		lock_clear(&apix_lock);
856 		return (PSM_SUCCESS);
857 	}
858 
859 	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
860 		vecp = apixp->x_vectbl[i];
861 		if (!IS_VECT_ENABLED(vecp))
862 			continue;
863 
864 		if (vecp->v_flags & APIX_VECT_USER_BOUND) {
865 			hardbound++;
866 			continue;
867 		}
868 		type = vecp->v_type;
869 
870 		/*
871 		 * If there are bound interrupts on this cpu, then
872 		 * rebind them to other processors.
873 		 */
874 		loop = 0;
875 		do {
876 			bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
877 
878 			if (type != APIX_TYPE_MSI)
879 				newp = apix_set_cpu(vecp, bindcpu, &ret);
880 			else
881 				newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
882 		} while ((newp == NULL) && (loop++ < apic_nproc));
883 
884 		if (loop >= apic_nproc) {
885 			errbound++;
886 			cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
887 			    vecp->v_cpuid, vecp->v_vector);
888 		}
889 	}
890 
891 	lock_clear(&apix_lock);
892 
893 	if (hardbound || errbound) {
894 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
895 		    "due to user bound interrupts or failed operation",
896 		    cpun);
897 		return (PSM_FAILURE);
898 	}
899 
900 	return (PSM_SUCCESS);
901 }
902 
903 /*
904  * Bind interrupts to specified CPU
905  */
906 static void
907 apix_enable_intr(processorid_t cpun)
908 {
909 	apix_vector_t *vecp;
910 	int i, ret;
911 	processorid_t n;
912 
913 	lock_set(&apix_lock);
914 
915 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
916 
917 	/* interrupt enabling for system resume */
918 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
919 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
920 			vecp = xv_vector(cpun, i);
921 			if (!IS_VECT_ENABLED(vecp))
922 				continue;
923 
924 			apix_enable_vector(vecp);
925 		}
926 		apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
927 	}
928 
929 	for (n = 0; n < apic_nproc; n++) {
930 		if (!apic_cpu_in_range(n) || n == cpun ||
931 		    (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
932 			continue;
933 
934 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
935 			vecp = xv_vector(n, i);
936 			if (!IS_VECT_ENABLED(vecp) ||
937 			    vecp->v_bound_cpuid != cpun)
938 				continue;
939 
940 			if (vecp->v_type != APIX_TYPE_MSI)
941 				(void) apix_set_cpu(vecp, cpun, &ret);
942 			else
943 				(void) apix_grp_set_cpu(vecp, cpun, &ret);
944 		}
945 	}
946 
947 	lock_clear(&apix_lock);
948 }
949 
950 /*
951  * Allocate vector for IPI
952  * type == -1 indicates it is an internal request. Do not change
953  * resv_vector for these requests.
954  */
955 static int
956 apix_get_ipivect(int ipl, int type)
957 {
958 	uchar_t vector;
959 
960 	if ((vector = apix_alloc_ipi(ipl)) > 0) {
961 		if (type != -1)
962 			apic_resv_vector[ipl] = vector;
963 		return (vector);
964 	}
965 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
966 	return (-1);	/* shouldn't happen */
967 }
968 
969 static int
970 apix_get_clkvect(int ipl)
971 {
972 	int vector;
973 
974 	if ((vector = apix_get_ipivect(ipl, -1)) == -1)
975 		return (-1);
976 
977 	apic_clkvect = vector - APIC_BASE_VECT;
978 	APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
979 	    apic_clkvect));
980 	return (vector);
981 }
982 
983 static int
984 apix_post_cpu_start()
985 {
986 	int cpun;
987 	static int cpus_started = 1;
988 
989 	/* We know this CPU + BSP  started successfully. */
990 	cpus_started++;
991 
992 	/*
993 	 * On BSP we would have enabled X2APIC, if supported by processor,
994 	 * in acpi_probe(), but on AP we do it here.
995 	 *
996 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
997 	 * local APIC mode of the current CPU is MMIO (xAPIC).
998 	 */
999 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1000 	    apic_local_mode() == LOCAL_APIC) {
1001 		apic_enable_x2apic();
1002 	}
1003 
1004 	/*
1005 	 * Switch back to x2apic IPI sending method for performance when target
1006 	 * CPU has entered x2apic mode.
1007 	 */
1008 	if (apic_mode == LOCAL_X2APIC) {
1009 		apic_switch_ipi_callback(B_FALSE);
1010 	}
1011 
1012 	splx(ipltospl(LOCK_LEVEL));
1013 	apix_init_intr();
1014 
1015 	/*
1016 	 * since some systems don't enable the internal cache on the non-boot
1017 	 * cpus, so we have to enable them here
1018 	 */
1019 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1020 
1021 #ifdef	DEBUG
1022 	APIC_AV_PENDING_SET();
1023 #else
1024 	if (apic_mode == LOCAL_APIC)
1025 		APIC_AV_PENDING_SET();
1026 #endif	/* DEBUG */
1027 
1028 	/*
1029 	 * We may be booting, or resuming from suspend; aci_status will
1030 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1031 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1032 	 */
1033 	cpun = psm_get_cpu_id();
1034 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1035 
1036 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1037 
1038 	return (PSM_SUCCESS);
1039 }
1040 
1041 /*
1042  * If this module needs a periodic handler for the interrupt distribution, it
1043  * can be added here. The argument to the periodic handler is not currently
1044  * used, but is reserved for future.
1045  */
1046 static void
1047 apix_post_cyclic_setup(void *arg)
1048 {
1049 	UNREFERENCED_1PARAMETER(arg);
1050 
1051 	cyc_handler_t cyh;
1052 	cyc_time_t cyt;
1053 
1054 	/* cpu_lock is held */
1055 	/* set up a periodic handler for intr redistribution */
1056 
1057 	/*
1058 	 * In peridoc mode intr redistribution processing is done in
1059 	 * apic_intr_enter during clk intr processing
1060 	 */
1061 	if (!apic_oneshot)
1062 		return;
1063 
1064 	/*
1065 	 * Register a periodical handler for the redistribution processing.
1066 	 * Though we would generally prefer to use the DDI interface for
1067 	 * periodic handler invocation, ddi_periodic_add(9F), we are
1068 	 * unfortunately already holding cpu_lock, which ddi_periodic_add will
1069 	 * attempt to take for us.  Thus, we add our own cyclic directly:
1070 	 */
1071 	cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1072 	cyh.cyh_arg = NULL;
1073 	cyh.cyh_level = CY_LOW_LEVEL;
1074 
1075 	cyt.cyt_when = 0;
1076 	cyt.cyt_interval = apic_redistribute_sample_interval;
1077 
1078 	apic_cyclic_id = cyclic_add(&cyh, &cyt);
1079 }
1080 
1081 /*
1082  * Called the first time we enable x2apic mode on this cpu.
1083  * Update some of the function pointers to use x2apic routines.
1084  */
1085 void
1086 x2apic_update_psm()
1087 {
1088 	struct psm_ops *pops = &apix_ops;
1089 
1090 	ASSERT(pops != NULL);
1091 
1092 	/*
1093 	 * The pcplusmp module's version of x2apic_update_psm makes additional
1094 	 * changes that we do not have to make here. It needs to make those
1095 	 * changes because pcplusmp relies on the TPR register and the means of
1096 	 * addressing that changes when using the local apic versus the x2apic.
1097 	 * It's also worth noting that the apix driver specific function end up
1098 	 * being apix_foo as opposed to apic_foo and x2apic_foo.
1099 	 */
1100 	pops->psm_send_ipi = x2apic_send_ipi;
1101 	send_dirintf = pops->psm_send_ipi;
1102 
1103 	pops->psm_send_pir_ipi = x2apic_send_pir_ipi;
1104 	psm_send_pir_ipi = pops->psm_send_pir_ipi;
1105 
1106 	apic_mode = LOCAL_X2APIC;
1107 	apic_change_ops();
1108 }
1109 
1110 /*
1111  * This function provides external interface to the nexus for all
1112  * functionalities related to the new DDI interrupt framework.
1113  *
1114  * Input:
1115  * dip     - pointer to the dev_info structure of the requested device
1116  * hdlp    - pointer to the internal interrupt handle structure for the
1117  *	     requested interrupt
1118  * intr_op - opcode for this call
1119  * result  - pointer to the integer that will hold the result to be
1120  *	     passed back if return value is PSM_SUCCESS
1121  *
1122  * Output:
1123  * return value is either PSM_SUCCESS or PSM_FAILURE
1124  */
1125 static int
1126 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1127     psm_intr_op_t intr_op, int *result)
1128 {
1129 	int		cap;
1130 	apix_vector_t	*vecp, *newvecp;
1131 	struct intrspec *ispec, intr_spec;
1132 	processorid_t target;
1133 
1134 	ispec = &intr_spec;
1135 	ispec->intrspec_pri = hdlp->ih_pri;
1136 	ispec->intrspec_vec = hdlp->ih_inum;
1137 	ispec->intrspec_func = hdlp->ih_cb_func;
1138 
1139 	switch (intr_op) {
1140 	case PSM_INTR_OP_ALLOC_VECTORS:
1141 		switch (hdlp->ih_type) {
1142 		case DDI_INTR_TYPE_MSI:
1143 			/* allocate MSI vectors */
1144 			*result = apix_alloc_msi(dip, hdlp->ih_inum,
1145 			    hdlp->ih_scratch1,
1146 			    (int)(uintptr_t)hdlp->ih_scratch2);
1147 			break;
1148 		case DDI_INTR_TYPE_MSIX:
1149 			/* allocate MSI-X vectors */
1150 			*result = apix_alloc_msix(dip, hdlp->ih_inum,
1151 			    hdlp->ih_scratch1,
1152 			    (int)(uintptr_t)hdlp->ih_scratch2);
1153 			break;
1154 		case DDI_INTR_TYPE_FIXED:
1155 			/* allocate or share vector for fixed */
1156 			if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1157 				return (PSM_FAILURE);
1158 			}
1159 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1160 			*result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1161 			    ispec);
1162 			break;
1163 		default:
1164 			return (PSM_FAILURE);
1165 		}
1166 		break;
1167 	case PSM_INTR_OP_FREE_VECTORS:
1168 		apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1169 		    hdlp->ih_type);
1170 		break;
1171 	case PSM_INTR_OP_XLATE_VECTOR:
1172 		/*
1173 		 * Vectors are allocated by ALLOC and freed by FREE.
1174 		 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1175 		 */
1176 		*result = APIX_INVALID_VECT;
1177 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1178 		if (vecp != NULL) {
1179 			*result = APIX_VIRTVECTOR(vecp->v_cpuid,
1180 			    vecp->v_vector);
1181 			break;
1182 		}
1183 
1184 		/*
1185 		 * No vector to device mapping exists. If this is FIXED type
1186 		 * then check if this IRQ is already mapped for another device
1187 		 * then return the vector number for it (i.e. shared IRQ case).
1188 		 * Otherwise, return PSM_FAILURE.
1189 		 */
1190 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1191 			vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1192 			    ispec);
1193 			*result = (vecp == NULL) ? APIX_INVALID_VECT :
1194 			    APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1195 		}
1196 		if (*result == APIX_INVALID_VECT)
1197 			return (PSM_FAILURE);
1198 		break;
1199 	case PSM_INTR_OP_GET_PENDING:
1200 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1201 		if (vecp == NULL)
1202 			return (PSM_FAILURE);
1203 
1204 		*result = apix_get_pending(vecp);
1205 		break;
1206 	case PSM_INTR_OP_CLEAR_MASK:
1207 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1208 			return (PSM_FAILURE);
1209 
1210 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1211 		if (vecp == NULL)
1212 			return (PSM_FAILURE);
1213 
1214 		apix_intx_clear_mask(vecp->v_inum);
1215 		break;
1216 	case PSM_INTR_OP_SET_MASK:
1217 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1218 			return (PSM_FAILURE);
1219 
1220 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1221 		if (vecp == NULL)
1222 			return (PSM_FAILURE);
1223 
1224 		apix_intx_set_mask(vecp->v_inum);
1225 		break;
1226 	case PSM_INTR_OP_GET_SHARED:
1227 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1228 			return (PSM_FAILURE);
1229 
1230 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1231 		if (vecp == NULL)
1232 			return (PSM_FAILURE);
1233 
1234 		*result = apix_intx_get_shared(vecp->v_inum);
1235 		break;
1236 	case PSM_INTR_OP_SET_PRI:
1237 		/*
1238 		 * Called prior to adding the interrupt handler or when
1239 		 * an interrupt handler is unassigned.
1240 		 */
1241 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1242 			return (PSM_SUCCESS);
1243 
1244 		if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1245 			return (PSM_FAILURE);
1246 
1247 		break;
1248 	case PSM_INTR_OP_SET_CPU:
1249 	case PSM_INTR_OP_GRP_SET_CPU:
1250 		/*
1251 		 * The interrupt handle given here has been allocated
1252 		 * specifically for this command, and ih_private carries
1253 		 * a CPU value.
1254 		 */
1255 		*result = EINVAL;
1256 		target = (int)(intptr_t)hdlp->ih_private;
1257 		if (!apic_cpu_in_range(target)) {
1258 			DDI_INTR_IMPLDBG((CE_WARN,
1259 			    "[grp_]set_cpu: cpu out of range: %d\n", target));
1260 			return (PSM_FAILURE);
1261 		}
1262 
1263 		lock_set(&apix_lock);
1264 
1265 		vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1266 		if (!IS_VECT_ENABLED(vecp)) {
1267 			DDI_INTR_IMPLDBG((CE_WARN,
1268 			    "[grp]_set_cpu: invalid vector 0x%x\n",
1269 			    hdlp->ih_vector));
1270 			lock_clear(&apix_lock);
1271 			return (PSM_FAILURE);
1272 		}
1273 
1274 		*result = 0;
1275 
1276 		if (intr_op == PSM_INTR_OP_SET_CPU)
1277 			newvecp = apix_set_cpu(vecp, target, result);
1278 		else
1279 			newvecp = apix_grp_set_cpu(vecp, target, result);
1280 
1281 		lock_clear(&apix_lock);
1282 
1283 		if (newvecp == NULL) {
1284 			*result = EIO;
1285 			return (PSM_FAILURE);
1286 		}
1287 		newvecp->v_bound_cpuid = target;
1288 		hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1289 		    newvecp->v_vector);
1290 		break;
1291 
1292 	case PSM_INTR_OP_GET_INTR:
1293 		/*
1294 		 * The interrupt handle given here has been allocated
1295 		 * specifically for this command, and ih_private carries
1296 		 * a pointer to a apic_get_intr_t.
1297 		 */
1298 		if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1299 			return (PSM_FAILURE);
1300 		break;
1301 
1302 	case PSM_INTR_OP_CHECK_MSI:
1303 		/*
1304 		 * Check MSI/X is supported or not at APIC level and
1305 		 * masked off the MSI/X bits in hdlp->ih_type if not
1306 		 * supported before return.  If MSI/X is supported,
1307 		 * leave the ih_type unchanged and return.
1308 		 *
1309 		 * hdlp->ih_type passed in from the nexus has all the
1310 		 * interrupt types supported by the device.
1311 		 */
1312 		if (apic_support_msi == 0) {	/* uninitialized */
1313 			/*
1314 			 * if apic_support_msi is not set, call
1315 			 * apic_check_msi_support() to check whether msi
1316 			 * is supported first
1317 			 */
1318 			if (apic_check_msi_support() == PSM_SUCCESS)
1319 				apic_support_msi = 1;	/* supported */
1320 			else
1321 				apic_support_msi = -1;	/* not-supported */
1322 		}
1323 		if (apic_support_msi == 1) {
1324 			if (apic_msix_enable)
1325 				*result = hdlp->ih_type;
1326 			else
1327 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1328 		} else
1329 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1330 			    DDI_INTR_TYPE_MSIX);
1331 		break;
1332 	case PSM_INTR_OP_GET_CAP:
1333 		cap = DDI_INTR_FLAG_PENDING;
1334 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1335 			cap |= DDI_INTR_FLAG_MASKABLE;
1336 		*result = cap;
1337 		break;
1338 	case PSM_INTR_OP_APIC_TYPE:
1339 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1340 		    apix_get_apic_type();
1341 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1342 		    APIX_IPI_MIN;
1343 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1344 		    apic_nproc;
1345 		hdlp->ih_ver = apic_get_apic_version();
1346 		break;
1347 	case PSM_INTR_OP_SET_CAP:
1348 	default:
1349 		return (PSM_FAILURE);
1350 	}
1351 
1352 	return (PSM_SUCCESS);
1353 }
1354 
1355 static void
1356 apix_cleanup_busy(void)
1357 {
1358 	int i, j;
1359 	apix_vector_t *vecp;
1360 
1361 	for (i = 0; i < apic_nproc; i++) {
1362 		if (!apic_cpu_in_range(i))
1363 			continue;
1364 		apic_cpus[i].aci_busy = 0;
1365 		for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1366 			if ((vecp = xv_vector(i, j)) != NULL)
1367 				vecp->v_busy = 0;
1368 		}
1369 	}
1370 }
1371 
1372 static void
1373 apix_redistribute_compute(void)
1374 {
1375 	int	i, j, max_busy;
1376 
1377 	if (!apic_enable_dynamic_migration)
1378 		return;
1379 
1380 	if (++apic_nticks == apic_sample_factor_redistribution) {
1381 		/*
1382 		 * Time to call apic_intr_redistribute().
1383 		 * reset apic_nticks. This will cause max_busy
1384 		 * to be calculated below and if it is more than
1385 		 * apic_int_busy, we will do the whole thing
1386 		 */
1387 		apic_nticks = 0;
1388 	}
1389 	max_busy = 0;
1390 	for (i = 0; i < apic_nproc; i++) {
1391 		if (!apic_cpu_in_range(i))
1392 			continue;
1393 		/*
1394 		 * Check if curipl is non zero & if ISR is in
1395 		 * progress
1396 		 */
1397 		if (((j = apic_cpus[i].aci_curipl) != 0) &&
1398 		    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1399 
1400 			int	vect;
1401 			apic_cpus[i].aci_busy++;
1402 			vect = apic_cpus[i].aci_current[j];
1403 			apixs[i]->x_vectbl[vect]->v_busy++;
1404 		}
1405 
1406 		if (!apic_nticks &&
1407 		    (apic_cpus[i].aci_busy > max_busy))
1408 			max_busy = apic_cpus[i].aci_busy;
1409 	}
1410 	if (!apic_nticks) {
1411 		if (max_busy > apic_int_busy_mark) {
1412 		/*
1413 		 * We could make the following check be
1414 		 * skipped > 1 in which case, we get a
1415 		 * redistribution at half the busy mark (due to
1416 		 * double interval). Need to be able to collect
1417 		 * more empirical data to decide if that is a
1418 		 * good strategy. Punt for now.
1419 		 */
1420 			apix_cleanup_busy();
1421 			apic_skipped_redistribute = 0;
1422 		} else
1423 			apic_skipped_redistribute++;
1424 	}
1425 }
1426 
1427 /*
1428  * intr_ops() service routines
1429  */
1430 
1431 static int
1432 apix_get_pending(apix_vector_t *vecp)
1433 {
1434 	int bit, index, irr, pending;
1435 
1436 	/* need to get on the bound cpu */
1437 	mutex_enter(&cpu_lock);
1438 	affinity_set(vecp->v_cpuid);
1439 
1440 	index = vecp->v_vector / 32;
1441 	bit = vecp->v_vector % 32;
1442 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1443 
1444 	affinity_clear();
1445 	mutex_exit(&cpu_lock);
1446 
1447 	pending = (irr & (1 << bit)) ? 1 : 0;
1448 	if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1449 		pending = apix_intx_get_pending(vecp->v_inum);
1450 
1451 	return (pending);
1452 }
1453 
1454 static apix_vector_t *
1455 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1456 {
1457 	apix_vector_t *vecp;
1458 	processorid_t cpuid;
1459 	int32_t virt_vec = 0;
1460 
1461 	switch (flags & PSMGI_INTRBY_FLAGS) {
1462 	case PSMGI_INTRBY_IRQ:
1463 		return (apix_intx_get_vector(hdlp->ih_vector));
1464 	case PSMGI_INTRBY_VEC:
1465 		virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1466 
1467 		cpuid = APIX_VIRTVEC_CPU(virt_vec);
1468 		if (!apic_cpu_in_range(cpuid))
1469 			return (NULL);
1470 
1471 		vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1472 		break;
1473 	case PSMGI_INTRBY_DEFAULT:
1474 		vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1475 		    hdlp->ih_type);
1476 		break;
1477 	default:
1478 		return (NULL);
1479 	}
1480 
1481 	return (vecp);
1482 }
1483 
1484 static int
1485 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1486     apic_get_intr_t *intr_params_p)
1487 {
1488 	apix_vector_t *vecp;
1489 	struct autovec *av_dev;
1490 	int i;
1491 
1492 	vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1493 	if (IS_VECT_FREE(vecp)) {
1494 		intr_params_p->avgi_num_devs = 0;
1495 		intr_params_p->avgi_cpu_id = 0;
1496 		intr_params_p->avgi_req_flags = 0;
1497 		return (PSM_SUCCESS);
1498 	}
1499 
1500 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1501 		intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1502 
1503 		/* Return user bound info for intrd. */
1504 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1505 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1506 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1507 		}
1508 	}
1509 
1510 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1511 		intr_params_p->avgi_vector = vecp->v_vector;
1512 
1513 	if (intr_params_p->avgi_req_flags &
1514 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1515 		/* Get number of devices from apic_irq table shared field. */
1516 		intr_params_p->avgi_num_devs = vecp->v_share;
1517 
1518 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
1519 
1520 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
1521 
1522 		/* Some devices have NULL dip.  Don't count these. */
1523 		if (intr_params_p->avgi_num_devs > 0) {
1524 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1525 			    av_dev = av_dev->av_link) {
1526 				if (av_dev->av_vector && av_dev->av_dip)
1527 					i++;
1528 			}
1529 			intr_params_p->avgi_num_devs =
1530 			    (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1531 		}
1532 
1533 		/* There are no viable dips to return. */
1534 		if (intr_params_p->avgi_num_devs == 0) {
1535 			intr_params_p->avgi_dip_list = NULL;
1536 
1537 		} else {	/* Return list of dips */
1538 
1539 			/* Allocate space in array for that number of devs. */
1540 			intr_params_p->avgi_dip_list = kmem_zalloc(
1541 			    intr_params_p->avgi_num_devs *
1542 			    sizeof (dev_info_t *),
1543 			    KM_NOSLEEP);
1544 			if (intr_params_p->avgi_dip_list == NULL) {
1545 				DDI_INTR_IMPLDBG((CE_WARN,
1546 				    "apix_get_vector_intr_info: no memory"));
1547 				return (PSM_FAILURE);
1548 			}
1549 
1550 			/*
1551 			 * Loop through the device list of the autovec table
1552 			 * filling in the dip array.
1553 			 *
1554 			 * Note that the autovect table may have some special
1555 			 * entries which contain NULL dips.  These will be
1556 			 * ignored.
1557 			 */
1558 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1559 			    av_dev = av_dev->av_link) {
1560 				if (av_dev->av_vector && av_dev->av_dip)
1561 					intr_params_p->avgi_dip_list[i++] =
1562 					    av_dev->av_dip;
1563 			}
1564 		}
1565 	}
1566 
1567 	return (PSM_SUCCESS);
1568 }
1569 
1570 static char *
1571 apix_get_apic_type(void)
1572 {
1573 	return (apix_psm_info.p_mach_idstring);
1574 }
1575 
1576 apix_vector_t *
1577 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1578 {
1579 	apix_vector_t *newp = NULL;
1580 	dev_info_t *dip;
1581 	int inum, cap_ptr;
1582 	ddi_acc_handle_t handle;
1583 	ddi_intr_msix_t *msix_p = NULL;
1584 	ushort_t msix_ctrl;
1585 	uintptr_t off = 0;
1586 	uint32_t mask = 0;
1587 
1588 	ASSERT(LOCK_HELD(&apix_lock));
1589 	*result = ENXIO;
1590 
1591 	/* Fail if this is an MSI intr and is part of a group. */
1592 	if (vecp->v_type == APIX_TYPE_MSI) {
1593 		if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1594 			return (NULL);
1595 		else
1596 			return (apix_grp_set_cpu(vecp, new_cpu, result));
1597 	}
1598 
1599 	/*
1600 	 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1601 	 */
1602 	if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1603 		if ((dip = APIX_GET_DIP(vecp)) == NULL)
1604 			return (NULL);
1605 		inum = vecp->v_devp->dv_inum;
1606 
1607 		handle = i_ddi_get_pci_config_handle(dip);
1608 		cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1609 		msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1610 		if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1611 			/*
1612 			 * Function is not masked, then mask "inum"th
1613 			 * entry in the MSI-X table
1614 			 */
1615 			msix_p = i_ddi_get_msix(dip);
1616 			off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1617 			    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1618 			mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1619 			ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1620 			    mask | 1);
1621 		}
1622 	}
1623 
1624 	*result = 0;
1625 	if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1626 		*result = EIO;
1627 
1628 	/* Restore mask bit */
1629 	if (msix_p != NULL)
1630 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1631 
1632 	return (newp);
1633 }
1634 
1635 /*
1636  * Set cpu for MSIs
1637  */
1638 apix_vector_t *
1639 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1640 {
1641 	apix_vector_t *newp, *vp;
1642 	uint32_t orig_cpu = vecp->v_cpuid;
1643 	int orig_vect = vecp->v_vector;
1644 	int i, num_vectors, cap_ptr, msi_mask_off = 0;
1645 	uint32_t msi_pvm = 0;
1646 	ushort_t msi_ctrl;
1647 	ddi_acc_handle_t handle;
1648 	dev_info_t *dip;
1649 
1650 	APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1651 	    " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1652 
1653 	ASSERT(LOCK_HELD(&apix_lock));
1654 
1655 	*result = ENXIO;
1656 
1657 	if (vecp->v_type != APIX_TYPE_MSI) {
1658 		DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1659 		return (NULL);
1660 	}
1661 
1662 	if ((dip = APIX_GET_DIP(vecp)) == NULL)
1663 		return (NULL);
1664 
1665 	num_vectors = i_ddi_intr_get_current_nintrs(dip);
1666 	if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1667 		APIC_VERBOSE(INTR, (CE_WARN,
1668 		    "set_grp: base vec not part of a grp or not aligned: "
1669 		    "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1670 		return (NULL);
1671 	}
1672 
1673 	if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1674 		return (NULL);
1675 
1676 	*result = EIO;
1677 	for (i = 1; i < num_vectors; i++) {
1678 		if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1679 			return (NULL);
1680 #ifdef DEBUG
1681 		/*
1682 		 * Sanity check: CPU and dip is the same for all entries.
1683 		 * May be called when first msi to be enabled, at this time
1684 		 * add_avintr() is not called for other msi
1685 		 */
1686 		if ((vp->v_share != 0) &&
1687 		    ((APIX_GET_DIP(vp) != dip) ||
1688 		    (vp->v_cpuid != vecp->v_cpuid))) {
1689 			APIC_VERBOSE(INTR, (CE_WARN,
1690 			    "set_grp: cpu or dip for vec 0x%x difft than for "
1691 			    "vec 0x%x\n", orig_vect, orig_vect + i));
1692 			APIC_VERBOSE(INTR, (CE_WARN,
1693 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1694 			    vp->v_cpuid, (void *)dip,
1695 			    (void *)APIX_GET_DIP(vp)));
1696 			return (NULL);
1697 		}
1698 #endif /* DEBUG */
1699 	}
1700 
1701 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1702 	handle = i_ddi_get_pci_config_handle(dip);
1703 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1704 
1705 	/* MSI Per vector masking is supported. */
1706 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1707 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
1708 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1709 		else
1710 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1711 		msi_pvm = pci_config_get32(handle, msi_mask_off);
1712 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1713 		APIC_VERBOSE(INTR, (CE_CONT,
1714 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
1715 		    pci_config_get32(handle, msi_mask_off)));
1716 	}
1717 
1718 	if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1719 		*result = 0;
1720 
1721 	/* Reenable vectors if per vector masking is supported. */
1722 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1723 		pci_config_put32(handle, msi_mask_off, msi_pvm);
1724 		APIC_VERBOSE(INTR, (CE_CONT,
1725 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
1726 		    pci_config_get32(handle, msi_mask_off)));
1727 	}
1728 
1729 	return (newp);
1730 }
1731 
1732 void
1733 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1734 {
1735 	apic_irq_t *irqp;
1736 
1737 	mutex_enter(&airq_mutex);
1738 	irqp = apic_irq_table[irqno];
1739 	irqp->airq_cpu = cpuid;
1740 	irqp->airq_vector = vector;
1741 	apic_record_rdt_entry(irqp, irqno);
1742 	mutex_exit(&airq_mutex);
1743 }
1744 
1745 apix_vector_t *
1746 apix_intx_get_vector(int irqno)
1747 {
1748 	apic_irq_t *irqp;
1749 	uint32_t cpuid;
1750 	uchar_t vector;
1751 
1752 	mutex_enter(&airq_mutex);
1753 	irqp = apic_irq_table[irqno & 0xff];
1754 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1755 		mutex_exit(&airq_mutex);
1756 		return (NULL);
1757 	}
1758 	cpuid = irqp->airq_cpu;
1759 	vector = irqp->airq_vector;
1760 	mutex_exit(&airq_mutex);
1761 
1762 	return (xv_vector(cpuid, vector));
1763 }
1764 
1765 /*
1766  * Must called with interrupts disabled and apic_ioapic_lock held
1767  */
1768 void
1769 apix_intx_enable(int irqno)
1770 {
1771 	uchar_t ioapicindex, intin;
1772 	apic_irq_t *irqp = apic_irq_table[irqno];
1773 	ioapic_rdt_t irdt;
1774 	apic_cpus_info_t *cpu_infop;
1775 	apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1776 
1777 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1778 
1779 	ioapicindex = irqp->airq_ioapicindex;
1780 	intin = irqp->airq_intin_no;
1781 	cpu_infop =  &apic_cpus[irqp->airq_cpu];
1782 
1783 	irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1784 	irdt.ir_hi = cpu_infop->aci_local_id;
1785 
1786 	apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1787 	    vecp->v_type, 1, ioapicindex);
1788 	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1789 	    (void *)&irdt, vecp->v_type, 1);
1790 	apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1791 
1792 	/* write RDT entry high dword - destination */
1793 	WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1794 	    irdt.ir_hi);
1795 
1796 	/* Write the vector, trigger, and polarity portion of the RDT */
1797 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1798 
1799 	vecp->v_state = APIX_STATE_ENABLED;
1800 
1801 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1802 	    " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1803 	    ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1804 }
1805 
1806 /*
1807  * Must called with interrupts disabled and apic_ioapic_lock held
1808  */
1809 void
1810 apix_intx_disable(int irqno)
1811 {
1812 	apic_irq_t *irqp = apic_irq_table[irqno];
1813 	int ioapicindex, intin;
1814 
1815 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1816 	/*
1817 	 * The assumption here is that this is safe, even for
1818 	 * systems with IOAPICs that suffer from the hardware
1819 	 * erratum because all devices have been quiesced before
1820 	 * they unregister their interrupt handlers.  If that
1821 	 * assumption turns out to be false, this mask operation
1822 	 * can induce the same erratum result we're trying to
1823 	 * avoid.
1824 	 */
1825 	ioapicindex = irqp->airq_ioapicindex;
1826 	intin = irqp->airq_intin_no;
1827 	ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1828 
1829 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1830 	    " intin 0x%x\n", ioapicindex, intin));
1831 }
1832 
1833 void
1834 apix_intx_free(int irqno)
1835 {
1836 	apic_irq_t *irqp;
1837 
1838 	mutex_enter(&airq_mutex);
1839 	irqp = apic_irq_table[irqno];
1840 
1841 	if (IS_IRQ_FREE(irqp)) {
1842 		mutex_exit(&airq_mutex);
1843 		return;
1844 	}
1845 
1846 	irqp->airq_mps_intr_index = FREE_INDEX;
1847 	irqp->airq_cpu = IRQ_UNINIT;
1848 	irqp->airq_vector = APIX_INVALID_VECT;
1849 	mutex_exit(&airq_mutex);
1850 }
1851 
1852 #ifdef DEBUG
1853 int apix_intr_deliver_timeouts = 0;
1854 int apix_intr_rirr_timeouts = 0;
1855 int apix_intr_rirr_reset_failure = 0;
1856 #endif
1857 int apix_max_reps_irr_pending = 10;
1858 
1859 #define	GET_RDT_BITS(ioapic, intin, bits)	\
1860 	(READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1861 #define	APIX_CHECK_IRR_DELAY	drv_usectohz(5000)
1862 
1863 int
1864 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1865 {
1866 	apic_irq_t *irqp = apic_irq_table[irqno];
1867 	ulong_t iflag;
1868 	int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1869 
1870 	ASSERT(irqp != NULL);
1871 
1872 	iflag = intr_clear();
1873 	lock_set(&apic_ioapic_lock);
1874 
1875 	ioapic_ix = irqp->airq_ioapicindex;
1876 	intin_no = irqp->airq_intin_no;
1877 	level = apic_level_intr[irqno];
1878 
1879 	/*
1880 	 * Wait for the delivery status bit to be cleared. This should
1881 	 * be a very small amount of time.
1882 	 */
1883 	repeats = 0;
1884 	do {
1885 		repeats++;
1886 
1887 		for (waited = 0; waited < apic_max_reps_clear_pending;
1888 		    waited++) {
1889 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1890 				break;
1891 		}
1892 		if (!level)
1893 			break;
1894 
1895 		/*
1896 		 * Mask the RDT entry for level-triggered interrupts.
1897 		 */
1898 		irqp->airq_rdt_entry |= AV_MASK;
1899 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1900 		    intin_no);
1901 		if ((masked = (rdt_entry & AV_MASK)) == 0) {
1902 			/* Mask it */
1903 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1904 			    AV_MASK | rdt_entry);
1905 		}
1906 
1907 		/*
1908 		 * If there was a race and an interrupt was injected
1909 		 * just before we masked, check for that case here.
1910 		 * Then, unmask the RDT entry and try again.  If we're
1911 		 * on our last try, don't unmask (because we want the
1912 		 * RDT entry to remain masked for the rest of the
1913 		 * function).
1914 		 */
1915 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1916 		    intin_no);
1917 		if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1918 		    (repeats < apic_max_reps_clear_pending)) {
1919 			/* Unmask it */
1920 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1921 			    intin_no, rdt_entry & ~AV_MASK);
1922 			irqp->airq_rdt_entry &= ~AV_MASK;
1923 		}
1924 	} while ((rdt_entry & AV_PENDING) &&
1925 	    (repeats < apic_max_reps_clear_pending));
1926 
1927 #ifdef DEBUG
1928 	if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1929 		apix_intr_deliver_timeouts++;
1930 #endif
1931 
1932 	if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1933 		goto done;
1934 
1935 	/*
1936 	 * wait for remote IRR to be cleared for level-triggered
1937 	 * interrupts
1938 	 */
1939 	repeats = 0;
1940 	do {
1941 		repeats++;
1942 
1943 		for (waited = 0; waited < apic_max_reps_clear_pending;
1944 		    waited++) {
1945 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1946 			    == 0)
1947 				break;
1948 		}
1949 
1950 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1951 			lock_clear(&apic_ioapic_lock);
1952 			intr_restore(iflag);
1953 
1954 			delay(APIX_CHECK_IRR_DELAY);
1955 
1956 			iflag = intr_clear();
1957 			lock_set(&apic_ioapic_lock);
1958 		}
1959 	} while (repeats < apix_max_reps_irr_pending);
1960 
1961 	if (repeats >= apix_max_reps_irr_pending) {
1962 #ifdef DEBUG
1963 		apix_intr_rirr_timeouts++;
1964 #endif
1965 
1966 		/*
1967 		 * If we waited and the Remote IRR bit is still not cleared,
1968 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1969 		 * times for this interrupt, try the last-ditch workaround:
1970 		 */
1971 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1972 			/*
1973 			 * Trying to clear the bit through normal
1974 			 * channels has failed.  So as a last-ditch
1975 			 * effort, try to set the trigger mode to
1976 			 * edge, then to level.  This has been
1977 			 * observed to work on many systems.
1978 			 */
1979 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1980 			    intin_no,
1981 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1982 			    intin_no) & ~AV_LEVEL);
1983 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1984 			    intin_no,
1985 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1986 			    intin_no) | AV_LEVEL);
1987 		}
1988 
1989 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1990 #ifdef DEBUG
1991 			apix_intr_rirr_reset_failure++;
1992 #endif
1993 			lock_clear(&apic_ioapic_lock);
1994 			intr_restore(iflag);
1995 			prom_printf("apix: Remote IRR still "
1996 			    "not clear for IOAPIC %d intin %d.\n"
1997 			    "\tInterrupts to this pin may cease "
1998 			    "functioning.\n", ioapic_ix, intin_no);
1999 			return (1);	/* return failure */
2000 		}
2001 	}
2002 
2003 done:
2004 	/* change apic_irq_table */
2005 	lock_clear(&apic_ioapic_lock);
2006 	intr_restore(iflag);
2007 	apix_intx_set_vector(irqno, cpuid, vector);
2008 	iflag = intr_clear();
2009 	lock_set(&apic_ioapic_lock);
2010 
2011 	/* reprogramme IO-APIC RDT entry */
2012 	apix_intx_enable(irqno);
2013 
2014 	lock_clear(&apic_ioapic_lock);
2015 	intr_restore(iflag);
2016 
2017 	return (0);
2018 }
2019 
2020 static int
2021 apix_intx_get_pending(int irqno)
2022 {
2023 	apic_irq_t *irqp;
2024 	int intin, ioapicindex, pending;
2025 	ulong_t iflag;
2026 
2027 	mutex_enter(&airq_mutex);
2028 	irqp = apic_irq_table[irqno];
2029 	if (IS_IRQ_FREE(irqp)) {
2030 		mutex_exit(&airq_mutex);
2031 		return (0);
2032 	}
2033 
2034 	/* check IO-APIC delivery status */
2035 	intin = irqp->airq_intin_no;
2036 	ioapicindex = irqp->airq_ioapicindex;
2037 	mutex_exit(&airq_mutex);
2038 
2039 	iflag = intr_clear();
2040 	lock_set(&apic_ioapic_lock);
2041 
2042 	pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2043 	    AV_PENDING) ? 1 : 0;
2044 
2045 	lock_clear(&apic_ioapic_lock);
2046 	intr_restore(iflag);
2047 
2048 	return (pending);
2049 }
2050 
2051 /*
2052  * This function will mask the interrupt on the I/O APIC
2053  */
2054 static void
2055 apix_intx_set_mask(int irqno)
2056 {
2057 	int intin, ioapixindex, rdt_entry;
2058 	ulong_t iflag;
2059 	apic_irq_t *irqp;
2060 
2061 	mutex_enter(&airq_mutex);
2062 	irqp = apic_irq_table[irqno];
2063 
2064 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2065 
2066 	intin = irqp->airq_intin_no;
2067 	ioapixindex = irqp->airq_ioapicindex;
2068 	mutex_exit(&airq_mutex);
2069 
2070 	iflag = intr_clear();
2071 	lock_set(&apic_ioapic_lock);
2072 
2073 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2074 
2075 	/* clear mask */
2076 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2077 	    (AV_MASK | rdt_entry));
2078 
2079 	lock_clear(&apic_ioapic_lock);
2080 	intr_restore(iflag);
2081 }
2082 
2083 /*
2084  * This function will clear the mask for the interrupt on the I/O APIC
2085  */
2086 static void
2087 apix_intx_clear_mask(int irqno)
2088 {
2089 	int intin, ioapixindex, rdt_entry;
2090 	ulong_t iflag;
2091 	apic_irq_t *irqp;
2092 
2093 	mutex_enter(&airq_mutex);
2094 	irqp = apic_irq_table[irqno];
2095 
2096 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2097 
2098 	intin = irqp->airq_intin_no;
2099 	ioapixindex = irqp->airq_ioapicindex;
2100 	mutex_exit(&airq_mutex);
2101 
2102 	iflag = intr_clear();
2103 	lock_set(&apic_ioapic_lock);
2104 
2105 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2106 
2107 	/* clear mask */
2108 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2109 	    ((~AV_MASK) & rdt_entry));
2110 
2111 	lock_clear(&apic_ioapic_lock);
2112 	intr_restore(iflag);
2113 }
2114 
2115 /*
2116  * For level-triggered interrupt, mask the IRQ line. Mask means
2117  * new interrupts will not be delivered. The interrupt already
2118  * accepted by a local APIC is not affected
2119  */
2120 void
2121 apix_level_intr_pre_eoi(int irq)
2122 {
2123 	apic_irq_t *irqp = apic_irq_table[irq];
2124 	int apic_ix, intin_ix;
2125 
2126 	if (irqp == NULL)
2127 		return;
2128 
2129 	ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2130 
2131 	lock_set(&apic_ioapic_lock);
2132 
2133 	intin_ix = irqp->airq_intin_no;
2134 	apic_ix = irqp->airq_ioapicindex;
2135 
2136 	if (irqp->airq_cpu != CPU->cpu_id) {
2137 		if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2138 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2139 		lock_clear(&apic_ioapic_lock);
2140 		return;
2141 	}
2142 
2143 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2144 		/*
2145 		 * This is a IOxAPIC and there is EOI register:
2146 		 *	Change the vector to reserved unused vector, so that
2147 		 *	the EOI	from Local APIC won't clear the Remote IRR for
2148 		 *	this level trigger interrupt. Instead, we'll manually
2149 		 *	clear it in apix_post_hardint() after ISR handling.
2150 		 */
2151 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2152 		    (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2153 	} else {
2154 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2155 		    AV_MASK | irqp->airq_rdt_entry);
2156 	}
2157 
2158 	lock_clear(&apic_ioapic_lock);
2159 }
2160 
2161 /*
2162  * For level-triggered interrupt, unmask the IRQ line
2163  * or restore the original vector number.
2164  */
2165 void
2166 apix_level_intr_post_dispatch(int irq)
2167 {
2168 	apic_irq_t *irqp = apic_irq_table[irq];
2169 	int apic_ix, intin_ix;
2170 
2171 	if (irqp == NULL)
2172 		return;
2173 
2174 	lock_set(&apic_ioapic_lock);
2175 
2176 	intin_ix = irqp->airq_intin_no;
2177 	apic_ix = irqp->airq_ioapicindex;
2178 
2179 	if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2180 		/*
2181 		 * Already sent EOI back to Local APIC.
2182 		 * Send EOI to IO-APIC
2183 		 */
2184 		ioapic_write_eoi(apic_ix, irqp->airq_vector);
2185 	} else {
2186 		/* clear the mask or restore the vector */
2187 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2188 		    irqp->airq_rdt_entry);
2189 
2190 		/* send EOI to IOxAPIC */
2191 		if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2192 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2193 	}
2194 
2195 	lock_clear(&apic_ioapic_lock);
2196 }
2197 
2198 static int
2199 apix_intx_get_shared(int irqno)
2200 {
2201 	apic_irq_t *irqp;
2202 	int share;
2203 
2204 	mutex_enter(&airq_mutex);
2205 	irqp = apic_irq_table[irqno];
2206 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2207 		mutex_exit(&airq_mutex);
2208 		return (0);
2209 	}
2210 	share = irqp->airq_share;
2211 	mutex_exit(&airq_mutex);
2212 
2213 	return (share);
2214 }
2215 
2216 static void
2217 apix_intx_set_shared(int irqno, int delta)
2218 {
2219 	apic_irq_t *irqp;
2220 
2221 	mutex_enter(&airq_mutex);
2222 	irqp = apic_irq_table[irqno];
2223 	if (IS_IRQ_FREE(irqp)) {
2224 		mutex_exit(&airq_mutex);
2225 		return;
2226 	}
2227 	irqp->airq_share += delta;
2228 	mutex_exit(&airq_mutex);
2229 }
2230 
2231 /*
2232  * Setup IRQ table. Return IRQ no or -1 on failure
2233  */
2234 static int
2235 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2236     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2237 {
2238 	int origirq = ispec->intrspec_vec;
2239 	int newirq;
2240 	short intr_index;
2241 	uchar_t ipin, ioapic, ioapicindex;
2242 	apic_irq_t *irqp;
2243 
2244 	UNREFERENCED_1PARAMETER(inum);
2245 
2246 	if (intrp != NULL) {
2247 		intr_index = (short)(intrp - apic_io_intrp);
2248 		ioapic = intrp->intr_destid;
2249 		ipin = intrp->intr_destintin;
2250 
2251 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
2252 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2253 			if (apic_io_id[ioapicindex] == ioapic)
2254 				break;
2255 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2256 		    (ioapic == INTR_ALL_APIC));
2257 
2258 		/* check whether this intin# has been used by another irqno */
2259 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2260 			return (newirq);
2261 
2262 	} else if (iflagp != NULL) {	/* ACPI */
2263 		intr_index = ACPI_INDEX;
2264 		ioapicindex = acpi_find_ioapic(irqno);
2265 		ASSERT(ioapicindex != 0xFF);
2266 		ioapic = apic_io_id[ioapicindex];
2267 		ipin = irqno - apic_io_vectbase[ioapicindex];
2268 
2269 		if (apic_irq_table[irqno] &&
2270 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2271 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2272 			    apic_irq_table[irqno]->airq_ioapicindex ==
2273 			    ioapicindex);
2274 			return (irqno);
2275 		}
2276 
2277 	} else {	/* default configuration */
2278 		intr_index = DEFAULT_INDEX;
2279 		ioapicindex = 0;
2280 		ioapic = apic_io_id[ioapicindex];
2281 		ipin = (uchar_t)irqno;
2282 	}
2283 
2284 	/* allocate a new IRQ no */
2285 	if ((irqp = apic_irq_table[irqno]) == NULL) {
2286 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2287 		apic_irq_table[irqno] = irqp;
2288 	} else {
2289 		if (irqp->airq_mps_intr_index != FREE_INDEX) {
2290 			newirq = apic_allocate_irq(apic_first_avail_irq);
2291 			if (newirq == -1) {
2292 				return (-1);
2293 			}
2294 			irqno = newirq;
2295 			irqp = apic_irq_table[irqno];
2296 			ASSERT(irqp != NULL);
2297 		}
2298 	}
2299 	apic_max_device_irq = max(irqno, apic_max_device_irq);
2300 	apic_min_device_irq = min(irqno, apic_min_device_irq);
2301 
2302 	irqp->airq_mps_intr_index = intr_index;
2303 	irqp->airq_ioapicindex = ioapicindex;
2304 	irqp->airq_intin_no = ipin;
2305 	irqp->airq_dip = dip;
2306 	irqp->airq_origirq = (uchar_t)origirq;
2307 	if (iflagp != NULL)
2308 		irqp->airq_iflag = *iflagp;
2309 	irqp->airq_cpu = IRQ_UNINIT;
2310 	irqp->airq_vector = 0;
2311 
2312 	return (irqno);
2313 }
2314 
2315 /*
2316  * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2317  */
2318 static int
2319 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2320     struct intrspec *ispec)
2321 {
2322 	int irqno = ispec->intrspec_vec;
2323 	int newirq, i;
2324 	iflag_t intr_flag;
2325 	ACPI_SUBTABLE_HEADER	*hp;
2326 	ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2327 	struct apic_io_intr *intrp;
2328 
2329 	if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2330 		int busid;
2331 
2332 		if (bustype == 0)
2333 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2334 
2335 		/* loop checking BUS_ISA/BUS_EISA */
2336 		for (i = 0; i < 2; i++) {
2337 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
2338 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2339 			    != NULL)) {
2340 				return (apix_intx_setup(dip, inum, irqno,
2341 				    intrp, ispec, NULL));
2342 			}
2343 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2344 		}
2345 
2346 		/* fall back to default configuration */
2347 		return (-1);
2348 	}
2349 
2350 	/* search iso entries first */
2351 	if (acpi_iso_cnt != 0) {
2352 		hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2353 		i = 0;
2354 		while (i < acpi_iso_cnt) {
2355 			if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2356 				isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2357 				if (isop->Bus == 0 &&
2358 				    isop->SourceIrq == irqno) {
2359 					newirq = isop->GlobalIrq;
2360 					intr_flag.intr_po = isop->IntiFlags &
2361 					    ACPI_MADT_POLARITY_MASK;
2362 					intr_flag.intr_el = (isop->IntiFlags &
2363 					    ACPI_MADT_TRIGGER_MASK) >> 2;
2364 					intr_flag.bustype = BUS_ISA;
2365 
2366 					return (apix_intx_setup(dip, inum,
2367 					    newirq, NULL, ispec, &intr_flag));
2368 				}
2369 				i++;
2370 			}
2371 			hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2372 			    hp->Length);
2373 		}
2374 	}
2375 	intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2376 	intr_flag.intr_el = INTR_EL_EDGE;
2377 	intr_flag.bustype = BUS_ISA;
2378 	return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2379 }
2380 
2381 
2382 /*
2383  * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2384  */
2385 static int
2386 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2387     struct intrspec *ispec)
2388 {
2389 	int busid, devid, pci_irq;
2390 	ddi_acc_handle_t cfg_handle;
2391 	uchar_t ipin;
2392 	iflag_t intr_flag;
2393 	struct apic_io_intr *intrp;
2394 
2395 	if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2396 		return (-1);
2397 
2398 	if (busid == 0 && apic_pci_bus_total == 1)
2399 		busid = (int)apic_single_pci_busid;
2400 
2401 	if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2402 		return (-1);
2403 	ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2404 	pci_config_teardown(&cfg_handle);
2405 
2406 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {	/* ACPI */
2407 		if (apic_acpi_translate_pci_irq(dip, busid, devid,
2408 		    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2409 			return (-1);
2410 
2411 		intr_flag.bustype = (uchar_t)bustype;
2412 		return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2413 		    &intr_flag));
2414 	}
2415 
2416 	/* MP configuration table */
2417 	pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2418 	if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2419 		pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2420 		if (pci_irq == -1)
2421 			return (-1);
2422 	}
2423 
2424 	return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2425 }
2426 
2427 /*
2428  * Translate and return IRQ no
2429  */
2430 static int
2431 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2432 {
2433 	int newirq, irqno = ispec->intrspec_vec;
2434 	int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2435 	int bustype = 0, dev_len;
2436 	char dev_type[16];
2437 
2438 	if (apic_defconf) {
2439 		mutex_enter(&airq_mutex);
2440 		goto defconf;
2441 	}
2442 
2443 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2444 		mutex_enter(&airq_mutex);
2445 		goto nonpci;
2446 	}
2447 
2448 	/*
2449 	 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2450 	 * to avoid extra buffer allocation.
2451 	 */
2452 	dev_len = sizeof (dev_type);
2453 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2454 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2455 	    &dev_len) == DDI_PROP_SUCCESS) {
2456 		if ((strcmp(dev_type, "pci") == 0) ||
2457 		    (strcmp(dev_type, "pciex") == 0))
2458 			parent_is_pci_or_pciex = 1;
2459 	}
2460 
2461 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2462 	    DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2463 	    &dev_len) == DDI_PROP_SUCCESS) {
2464 		if (strstr(dev_type, "pciex"))
2465 			child_is_pciex = 1;
2466 	}
2467 
2468 	mutex_enter(&airq_mutex);
2469 
2470 	if (parent_is_pci_or_pciex) {
2471 		bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2472 		newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2473 		if (newirq != -1)
2474 			goto done;
2475 		bustype = 0;
2476 	} else if (strcmp(dev_type, "isa") == 0)
2477 		bustype = BUS_ISA;
2478 	else if (strcmp(dev_type, "eisa") == 0)
2479 		bustype = BUS_EISA;
2480 
2481 nonpci:
2482 	newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2483 	if (newirq != -1)
2484 		goto done;
2485 
2486 defconf:
2487 	newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2488 	if (newirq == -1) {
2489 		mutex_exit(&airq_mutex);
2490 		return (-1);
2491 	}
2492 done:
2493 	ASSERT(apic_irq_table[newirq]);
2494 	mutex_exit(&airq_mutex);
2495 	return (newirq);
2496 }
2497 
2498 static int
2499 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2500 {
2501 	int irqno;
2502 	apix_vector_t *vecp;
2503 
2504 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2505 		return (0);
2506 
2507 	if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2508 		return (0);
2509 
2510 	DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2511 	    "irqno=0x%x cpuid=%d vector=0x%x\n",
2512 	    (void *)dip, ddi_driver_name(dip), irqno,
2513 	    vecp->v_cpuid, vecp->v_vector));
2514 
2515 	return (1);
2516 }
2517 
2518 /*
2519  * Return the vector number if the translated IRQ for this device
2520  * has a vector mapping setup. If no IRQ setup exists or no vector is
2521  * allocated to it then return 0.
2522  */
2523 static apix_vector_t *
2524 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2525 {
2526 	int irqno;
2527 	apix_vector_t *vecp;
2528 
2529 	/* get the IRQ number */
2530 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2531 		return (NULL);
2532 
2533 	/* get the vector number if a vector is allocated to this irqno */
2534 	vecp = apix_intx_get_vector(irqno);
2535 
2536 	return (vecp);
2537 }
2538 
2539 /*
2540  * Switch between safe and x2APIC IPI sending method.
2541  * The CPU may power on in xapic mode or x2apic mode. If the CPU needs to send
2542  * an IPI to other CPUs before entering x2APIC mode, it still needs to use the
2543  * xAPIC method. Before sending a StartIPI to the target CPU, psm_send_ipi will
2544  * be changed to apic_common_send_ipi, which detects current local APIC mode and
2545  * use the right method to send an IPI. If some CPUs fail to start up,
2546  * apic_poweron_cnt won't return to zero, so apic_common_send_ipi will always be
2547  * used. psm_send_ipi can't be simply changed back to x2apic_send_ipi if some
2548  * CPUs failed to start up because those failed CPUs may recover itself later at
2549  * unpredictable time.
2550  */
2551 void
2552 apic_switch_ipi_callback(boolean_t enter)
2553 {
2554 	ulong_t iflag;
2555 	struct psm_ops *pops = psmops;
2556 
2557 	iflag = intr_clear();
2558 	lock_set(&apic_mode_switch_lock);
2559 	if (enter) {
2560 		ASSERT(apic_poweron_cnt >= 0);
2561 		if (apic_poweron_cnt == 0) {
2562 			pops->psm_send_ipi = apic_common_send_ipi;
2563 			send_dirintf = pops->psm_send_ipi;
2564 			pops->psm_send_pir_ipi = apic_common_send_pir_ipi;
2565 			psm_send_pir_ipi = pops->psm_send_pir_ipi;
2566 		}
2567 		apic_poweron_cnt++;
2568 	} else {
2569 		ASSERT(apic_poweron_cnt > 0);
2570 		apic_poweron_cnt--;
2571 		if (apic_poweron_cnt == 0) {
2572 			pops->psm_send_ipi = x2apic_send_ipi;
2573 			send_dirintf = pops->psm_send_ipi;
2574 			pops->psm_send_pir_ipi = x2apic_send_pir_ipi;
2575 			psm_send_pir_ipi = pops->psm_send_pir_ipi;
2576 		}
2577 	}
2578 	lock_clear(&apic_mode_switch_lock);
2579 	intr_restore(iflag);
2580 }
2581 
2582 /* stub function */
2583 int
2584 apix_loaded(void)
2585 {
2586 	return (apix_is_enabled);
2587 }
2588