xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix.c (revision bde334a8dbd66dfa70ce4d7fc9dcad6e1ae45fe4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  * Copyright 2018 Joyent, Inc.
29  */
30 
31 /*
32  * To understand how the apix module interacts with the interrupt subsystem read
33  * the theory statement in uts/i86pc/os/intr.c.
34  */
35 
36 /*
37  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
38  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
39  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
40  * PSMI 1.5 extensions are supported in Solaris Nevada.
41  * PSMI 1.6 extensions are supported in Solaris Nevada.
42  * PSMI 1.7 extensions are supported in Solaris Nevada.
43  */
44 #define	PSMI_1_7
45 
46 #include <sys/processor.h>
47 #include <sys/time.h>
48 #include <sys/psm.h>
49 #include <sys/smp_impldefs.h>
50 #include <sys/cram.h>
51 #include <sys/acpi/acpi.h>
52 #include <sys/acpica.h>
53 #include <sys/psm_common.h>
54 #include <sys/pit.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddi_impldefs.h>
58 #include <sys/pci.h>
59 #include <sys/promif.h>
60 #include <sys/x86_archext.h>
61 #include <sys/cpc_impl.h>
62 #include <sys/uadmin.h>
63 #include <sys/panic.h>
64 #include <sys/debug.h>
65 #include <sys/archsystm.h>
66 #include <sys/trap.h>
67 #include <sys/machsystm.h>
68 #include <sys/sysmacros.h>
69 #include <sys/cpuvar.h>
70 #include <sys/rm_platter.h>
71 #include <sys/privregs.h>
72 #include <sys/note.h>
73 #include <sys/pci_intr_lib.h>
74 #include <sys/spl.h>
75 #include <sys/clock.h>
76 #include <sys/cyclic.h>
77 #include <sys/dditypes.h>
78 #include <sys/sunddi.h>
79 #include <sys/x_call.h>
80 #include <sys/reboot.h>
81 #include <sys/mach_intr.h>
82 #include <sys/apix.h>
83 #include <sys/apix_irm_impl.h>
84 
85 static int apix_probe();
86 static void apix_init();
87 static void apix_picinit(void);
88 static int apix_intr_enter(int, int *);
89 static void apix_intr_exit(int, int);
90 static void apix_setspl(int);
91 static int apix_disable_intr(processorid_t);
92 static void apix_enable_intr(processorid_t);
93 static int apix_get_clkvect(int);
94 static int apix_get_ipivect(int, int);
95 static void apix_post_cyclic_setup(void *);
96 static int apix_post_cpu_start();
97 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
98     psm_intr_op_t, int *);
99 
100 /*
101  * Helper functions for apix_intr_ops()
102  */
103 static void apix_redistribute_compute(void);
104 static int apix_get_pending(apix_vector_t *);
105 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
106 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
107 static char *apix_get_apic_type(void);
108 static int apix_intx_get_pending(int);
109 static void apix_intx_set_mask(int irqno);
110 static void apix_intx_clear_mask(int irqno);
111 static int apix_intx_get_shared(int irqno);
112 static void apix_intx_set_shared(int irqno, int delta);
113 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
114     struct intrspec *);
115 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
116 
117 extern int apic_clkinit(int);
118 
119 /* IRM initialization for APIX PSM module */
120 extern void apix_irm_init(void);
121 
122 extern int irm_enable;
123 
124 /*
125  *	Local static data
126  */
127 static struct	psm_ops apix_ops = {
128 	apix_probe,
129 
130 	apix_init,
131 	apix_picinit,
132 	apix_intr_enter,
133 	apix_intr_exit,
134 	apix_setspl,
135 	apix_addspl,
136 	apix_delspl,
137 	apix_disable_intr,
138 	apix_enable_intr,
139 	NULL,			/* psm_softlvl_to_irq */
140 	NULL,			/* psm_set_softintr */
141 
142 	apic_set_idlecpu,
143 	apic_unset_idlecpu,
144 
145 	apic_clkinit,
146 	apix_get_clkvect,
147 	NULL,			/* psm_hrtimeinit */
148 	apic_gethrtime,
149 
150 	apic_get_next_processorid,
151 	apic_cpu_start,
152 	apix_post_cpu_start,
153 	apic_shutdown,
154 	apix_get_ipivect,
155 	apic_send_ipi,
156 
157 	NULL,			/* psm_translate_irq */
158 	NULL,			/* psm_notify_error */
159 	NULL,			/* psm_notify_func */
160 	apic_timer_reprogram,
161 	apic_timer_enable,
162 	apic_timer_disable,
163 	apix_post_cyclic_setup,
164 	apic_preshutdown,
165 	apix_intr_ops,		/* Advanced DDI Interrupt framework */
166 	apic_state,		/* save, restore apic state for S3 */
167 	apic_cpu_ops,		/* CPU control interface. */
168 
169 	apic_get_pir_ipivect,
170 	apic_send_pir_ipi,
171 	apic_cmci_setup
172 };
173 
174 struct psm_ops *psmops = &apix_ops;
175 
176 static struct	psm_info apix_psm_info = {
177 	PSM_INFO_VER01_7,			/* version */
178 	PSM_OWN_EXCLUSIVE,			/* ownership */
179 	&apix_ops,				/* operation */
180 	APIX_NAME,				/* machine name */
181 	"apix MPv1.4 compatible",
182 };
183 
184 static void *apix_hdlp;
185 
186 static int apix_is_enabled = 0;
187 
188 /*
189  * Flag to indicate if APIX is to be enabled only for platforms
190  * with specific hw feature(s).
191  */
192 int apix_hw_chk_enable = 1;
193 
194 /*
195  * Hw features that are checked for enabling APIX support.
196  */
197 #define	APIX_SUPPORT_X2APIC	0x00000001
198 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
199 
200 /*
201  * apix_lock is used for cpu selection and vector re-binding
202  */
203 lock_t apix_lock;
204 apix_impl_t *apixs[NCPU];
205 /*
206  * Mapping between device interrupt and the allocated vector. Indexed
207  * by major number.
208  */
209 apix_dev_vector_t **apix_dev_vector;
210 /*
211  * Mapping between device major number and cpu id. It gets used
212  * when interrupt binding policy round robin with affinity is
213  * applied. With that policy, devices with the same major number
214  * will be bound to the same CPU.
215  */
216 processorid_t *apix_major_to_cpu;	/* major to cpu mapping */
217 kmutex_t apix_mutex;	/* for apix_dev_vector & apix_major_to_cpu */
218 
219 int apix_nipis = 16;	/* Maximum number of IPIs */
220 /*
221  * Maximum number of vectors in a CPU that can be used for interrupt
222  * allocation (including IPIs and the reserved vectors).
223  */
224 int apix_cpu_nvectors = APIX_NVECTOR;
225 
226 /* number of CPUs in power-on transition state */
227 static int apic_poweron_cnt = 0;
228 
229 /* gcpu.h */
230 
231 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
232 extern void apic_change_eoi();
233 
234 /*
235  *	This is the loadable module wrapper
236  */
237 
238 int
239 _init(void)
240 {
241 	if (apic_coarse_hrtime)
242 		apix_ops.psm_gethrtime = &apic_gettime;
243 	return (psm_mod_init(&apix_hdlp, &apix_psm_info));
244 }
245 
246 int
247 _fini(void)
248 {
249 	return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
250 }
251 
252 int
253 _info(struct modinfo *modinfop)
254 {
255 	return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
256 }
257 
258 static int
259 apix_probe()
260 {
261 	int rval;
262 
263 	if (apix_enable == 0)
264 		return (PSM_FAILURE);
265 
266 	/*
267 	 * FIXME Temporarily disable apix module on Xen HVM platform due to
268 	 * known hang during boot (see #3605).
269 	 *
270 	 * Please remove when/if the issue is resolved.
271 	 */
272 	if (get_hwenv() & HW_XEN_HVM)
273 		return (PSM_FAILURE);
274 
275 	/* check for hw features if specified  */
276 	if (apix_hw_chk_enable) {
277 		/* check if x2APIC mode is supported */
278 		if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
279 		    APIX_SUPPORT_X2APIC) {
280 			if (apic_local_mode() == LOCAL_X2APIC) {
281 				/* x2APIC mode activated by BIOS, switch ops */
282 				apic_mode = LOCAL_X2APIC;
283 				apic_change_ops();
284 			} else if (!apic_detect_x2apic()) {
285 				/* x2APIC mode is not supported in the hw */
286 				apix_enable = 0;
287 			}
288 		}
289 		if (apix_enable == 0)
290 			return (PSM_FAILURE);
291 	}
292 
293 	rval = apic_probe_common(apix_psm_info.p_mach_idstring);
294 	if (rval == PSM_SUCCESS)
295 		apix_is_enabled = 1;
296 	else
297 		apix_is_enabled = 0;
298 	return (rval);
299 }
300 
301 /*
302  * Initialize the data structures needed by pcplusmpx module.
303  * Specifically, the data structures used by addspl() and delspl()
304  * routines.
305  */
306 static void
307 apix_softinit()
308 {
309 	int i, *iptr;
310 	apix_impl_t *hdlp;
311 	int nproc;
312 
313 	nproc = max(apic_nproc, apic_max_nproc);
314 
315 	hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
316 	for (i = 0; i < nproc; i++) {
317 		apixs[i] = &hdlp[i];
318 		apixs[i]->x_cpuid = i;
319 		LOCK_INIT_CLEAR(&apixs[i]->x_lock);
320 	}
321 
322 	/* cpu 0 is always up (for now) */
323 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
324 
325 	iptr = (int *)&apic_irq_table[0];
326 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
327 		apic_level_intr[i] = 0;
328 		*iptr++ = 0;
329 	}
330 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
331 
332 	apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
333 	    KM_SLEEP);
334 
335 	if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
336 		apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
337 		    KM_SLEEP);
338 		for (i = 0; i < devcnt; i++)
339 			apix_major_to_cpu[i] = IRQ_UNINIT;
340 	}
341 
342 	mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
343 }
344 
345 static int
346 apix_get_pending_spl(void)
347 {
348 	int cpuid = CPU->cpu_id;
349 
350 	return (bsrw_insn(apixs[cpuid]->x_intr_pending));
351 }
352 
353 static uintptr_t
354 apix_get_intr_handler(int cpu, short vec)
355 {
356 	apix_vector_t *apix_vector;
357 
358 	ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
359 	if (cpu >= apic_nproc || vec >= APIX_NVECTOR)
360 		return (0);
361 
362 	apix_vector = apixs[cpu]->x_vectbl[vec];
363 
364 	return ((uintptr_t)(apix_vector->v_autovect));
365 }
366 
367 static void
368 apix_init()
369 {
370 	extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
371 
372 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
373 
374 	do_interrupt_common = apix_do_interrupt;
375 	addintr = apix_add_avintr;
376 	remintr = apix_rem_avintr;
377 	get_pending_spl = apix_get_pending_spl;
378 	get_intr_handler = apix_get_intr_handler;
379 	psm_get_localapicid = apic_get_localapicid;
380 	psm_get_ioapicid = apic_get_ioapicid;
381 
382 	apix_softinit();
383 
384 #if !defined(__amd64)
385 	if (cpuid_have_cr8access(CPU))
386 		apic_have_32bit_cr8 = 1;
387 #endif
388 
389 	apic_pir_vect = apix_get_ipivect(XC_CPUPOKE_PIL, -1);
390 
391 	/*
392 	 * Initialize IRM pool parameters
393 	 */
394 	if (irm_enable) {
395 		int	i;
396 		int	lowest_irq;
397 		int	highest_irq;
398 
399 		/* number of CPUs present */
400 		apix_irminfo.apix_ncpus = apic_nproc;
401 		/* total number of entries in all of the IOAPICs present */
402 		lowest_irq = apic_io_vectbase[0];
403 		highest_irq = apic_io_vectend[0];
404 		for (i = 1; i < apic_io_max; i++) {
405 			if (apic_io_vectbase[i] < lowest_irq)
406 				lowest_irq = apic_io_vectbase[i];
407 			if (apic_io_vectend[i] > highest_irq)
408 				highest_irq = apic_io_vectend[i];
409 		}
410 		apix_irminfo.apix_ioapic_max_vectors =
411 		    highest_irq - lowest_irq + 1;
412 		/*
413 		 * Number of available per-CPU vectors excluding
414 		 * reserved vectors for Dtrace, int80, system-call,
415 		 * fast-trap, etc.
416 		 */
417 		apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
418 		    APIX_SW_RESERVED_VECTORS;
419 
420 		/* Number of vectors (pre) allocated (SCI and HPET) */
421 		apix_irminfo.apix_vectors_allocated = 0;
422 		if (apic_hpet_vect != -1)
423 			apix_irminfo.apix_vectors_allocated++;
424 		if (apic_sci_vect != -1)
425 			apix_irminfo.apix_vectors_allocated++;
426 	}
427 }
428 
429 static void
430 apix_init_intr()
431 {
432 	processorid_t	cpun = psm_get_cpu_id();
433 	uint_t nlvt;
434 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
435 	extern void cmi_cmci_trap(void);
436 
437 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
438 
439 	if (apic_mode == LOCAL_APIC) {
440 		/*
441 		 * We are running APIC in MMIO mode.
442 		 */
443 		if (apic_flat_model) {
444 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
445 			    APIC_FLAT_MODEL);
446 		} else {
447 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
448 			    APIC_CLUSTER_MODEL);
449 		}
450 
451 		apic_reg_ops->apic_write(APIC_DEST_REG,
452 		    AV_HIGH_ORDER >> cpun);
453 	}
454 
455 	if (apic_directed_EOI_supported()) {
456 		/*
457 		 * Setting the 12th bit in the Spurious Interrupt Vector
458 		 * Register suppresses broadcast EOIs generated by the local
459 		 * APIC. The suppression of broadcast EOIs happens only when
460 		 * interrupts are level-triggered.
461 		 */
462 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
463 	}
464 
465 	/* need to enable APIC before unmasking NMI */
466 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
467 
468 	/*
469 	 * Presence of an invalid vector with delivery mode AV_FIXED can
470 	 * cause an error interrupt, even if the entry is masked...so
471 	 * write a valid vector to LVT entries along with the mask bit
472 	 */
473 
474 	/* All APICs have timer and LINT0/1 */
475 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
476 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
477 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
478 
479 	/*
480 	 * On integrated APICs, the number of LVT entries is
481 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
482 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
483 	 */
484 
485 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
486 		nlvt = 3;
487 	} else {
488 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
489 		    0xFF) + 1;
490 	}
491 
492 	if (nlvt >= 5) {
493 		/* Enable performance counter overflow interrupt */
494 
495 		if (!is_x86_feature(x86_featureset, X86FSET_MSR))
496 			apic_enable_cpcovf_intr = 0;
497 		if (apic_enable_cpcovf_intr) {
498 			if (apic_cpcovf_vect == 0) {
499 				int ipl = APIC_PCINT_IPL;
500 
501 				apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
502 				ASSERT(apic_cpcovf_vect);
503 
504 				(void) add_avintr(NULL, ipl,
505 				    (avfunc)kcpc_hw_overflow_intr,
506 				    "apic pcint", apic_cpcovf_vect,
507 				    NULL, NULL, NULL, NULL);
508 				kcpc_hw_overflow_intr_installed = 1;
509 				kcpc_hw_enable_cpc_intr =
510 				    apic_cpcovf_mask_clear;
511 			}
512 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
513 			    apic_cpcovf_vect);
514 		}
515 	}
516 
517 	if (nlvt >= 6) {
518 		/* Only mask TM intr if the BIOS apparently doesn't use it */
519 
520 		uint32_t lvtval;
521 
522 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
523 		if (((lvtval & AV_MASK) == AV_MASK) ||
524 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
525 			apic_reg_ops->apic_write(APIC_THERM_VECT,
526 			    AV_MASK|APIC_RESV_IRQ);
527 		}
528 	}
529 
530 	/* Enable error interrupt */
531 
532 	if (nlvt >= 4 && apic_enable_error_intr) {
533 		if (apic_errvect == 0) {
534 			int ipl = 0xf;	/* get highest priority intr */
535 			apic_errvect = apix_get_ipivect(ipl, -1);
536 			ASSERT(apic_errvect);
537 			/*
538 			 * Not PSMI compliant, but we are going to merge
539 			 * with ON anyway
540 			 */
541 			(void) add_avintr(NULL, ipl,
542 			    (avfunc)apic_error_intr, "apic error intr",
543 			    apic_errvect, NULL, NULL, NULL, NULL);
544 		}
545 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
546 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
547 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
548 	}
549 
550 	/*
551 	 * Ensure a CMCI interrupt is allocated, regardless of whether it is
552 	 * enabled or not.
553 	 */
554 	if (apic_cmci_vect == 0) {
555 		const int ipl = 0x2;
556 		apic_cmci_vect = apix_get_ipivect(ipl, -1);
557 		ASSERT(apic_cmci_vect);
558 
559 		(void) add_avintr(NULL, ipl,
560 		    (avfunc)cmi_cmci_trap, "apic cmci intr",
561 		    apic_cmci_vect, NULL, NULL, NULL, NULL);
562 	}
563 
564 	apic_reg_ops->apic_write_task_reg(0);
565 }
566 
567 static void
568 apix_picinit(void)
569 {
570 	int i, j;
571 	uint_t isr;
572 
573 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
574 
575 	/*
576 	 * initialize interrupt remapping before apic
577 	 * hardware initialization
578 	 */
579 	apic_intrmap_init(apic_mode);
580 	if (apic_vt_ops == psm_vt_ops)
581 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
582 
583 	/*
584 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
585 	 * bit on without clearing it with EOI.  Since softint
586 	 * uses vector 0x20 to interrupt itself, so softint will
587 	 * not work on this machine.  In order to fix this problem
588 	 * a check is made to verify all the isr bits are clear.
589 	 * If not, EOIs are issued to clear the bits.
590 	 */
591 	for (i = 7; i >= 1; i--) {
592 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
593 		if (isr != 0)
594 			for (j = 0; ((j < 32) && (isr != 0)); j++)
595 				if (isr & (1 << j)) {
596 					apic_reg_ops->apic_write(
597 					    APIC_EOI_REG, 0);
598 					isr &= ~(1 << j);
599 					apic_error |= APIC_ERR_BOOT_EOI;
600 				}
601 	}
602 
603 	/* set a flag so we know we have run apic_picinit() */
604 	apic_picinit_called = 1;
605 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
606 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
607 	LOCK_INIT_CLEAR(&apic_error_lock);
608 	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
609 
610 	picsetup();	 /* initialise the 8259 */
611 
612 	/* add nmi handler - least priority nmi handler */
613 	LOCK_INIT_CLEAR(&apic_nmi_lock);
614 
615 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
616 	    "apix NMI handler", (caddr_t)NULL))
617 		cmn_err(CE_WARN, "apix: Unable to add nmi handler");
618 
619 	apix_init_intr();
620 
621 	/* enable apic mode if imcr present */
622 	if (apic_imcrp) {
623 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
624 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
625 	}
626 
627 	ioapix_init_intr(IOAPIC_MASK);
628 
629 	/* setup global IRM pool if applicable */
630 	if (irm_enable)
631 		apix_irm_init();
632 }
633 
634 static __inline__ void
635 apix_send_eoi(void)
636 {
637 	if (apic_mode == LOCAL_APIC)
638 		LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
639 	else
640 		X2APIC_WRITE(APIC_EOI_REG, 0);
641 }
642 
643 /*
644  * platform_intr_enter
645  *
646  *	Called at the beginning of the interrupt service routine, but unlike
647  *	pcplusmp, does not mask interrupts. An EOI is given to the interrupt
648  *	controller to enable other HW interrupts but interrupts are still
649  *	masked by the IF flag.
650  *
651  *	Return -1 for spurious interrupts
652  *
653  */
654 static int
655 apix_intr_enter(int ipl, int *vectorp)
656 {
657 	struct cpu *cpu = CPU;
658 	uint32_t cpuid = CPU->cpu_id;
659 	apic_cpus_info_t *cpu_infop;
660 	uchar_t vector;
661 	apix_vector_t *vecp;
662 	int nipl = -1;
663 
664 	/*
665 	 * The real vector delivered is (*vectorp + 0x20), but our caller
666 	 * subtracts 0x20 from the vector before passing it to us.
667 	 * (That's why APIC_BASE_VECT is 0x20.)
668 	 */
669 	vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
670 
671 	cpu_infop = &apic_cpus[cpuid];
672 	if (vector == APIC_SPUR_INTR) {
673 		cpu_infop->aci_spur_cnt++;
674 		return (APIC_INT_SPURIOUS);
675 	}
676 
677 	vecp = xv_vector(cpuid, vector);
678 	if (vecp == NULL) {
679 		if (APIX_IS_FAKE_INTR(vector))
680 			nipl = apix_rebindinfo.i_pri;
681 		apix_send_eoi();
682 		return (nipl);
683 	}
684 	nipl = vecp->v_pri;
685 
686 	/* if interrupted by the clock, increment apic_nsec_since_boot */
687 	if (vector == (apic_clkvect + APIC_BASE_VECT)) {
688 		if (!apic_oneshot) {
689 			/* NOTE: this is not MT aware */
690 			apic_hrtime_stamp++;
691 			apic_nsec_since_boot += apic_nsec_per_intr;
692 			apic_hrtime_stamp++;
693 			last_count_read = apic_hertz_count;
694 			apix_redistribute_compute();
695 		}
696 
697 		apix_send_eoi();
698 
699 		return (nipl);
700 	}
701 
702 	ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
703 
704 	/* pre-EOI handling for level-triggered interrupts */
705 	if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
706 	    (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
707 		apix_level_intr_pre_eoi(vecp->v_inum);
708 
709 	/* send back EOI */
710 	apix_send_eoi();
711 
712 	cpu_infop->aci_current[nipl] = vector;
713 	if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
714 		cpu_infop->aci_curipl = (uchar_t)nipl;
715 		cpu_infop->aci_ISR_in_progress |= 1 << nipl;
716 	}
717 
718 #ifdef	DEBUG
719 	if (vector >= APIX_IPI_MIN)
720 		return (nipl);	/* skip IPI */
721 
722 	APIC_DEBUG_BUF_PUT(vector);
723 	APIC_DEBUG_BUF_PUT(vecp->v_inum);
724 	APIC_DEBUG_BUF_PUT(nipl);
725 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
726 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
727 		drv_usecwait(apic_stretch_interrupts);
728 #endif /* DEBUG */
729 
730 	return (nipl);
731 }
732 
733 /*
734  * Any changes made to this function must also change X2APIC
735  * version of intr_exit.
736  */
737 static void
738 apix_intr_exit(int prev_ipl, int arg2)
739 {
740 	int cpuid = psm_get_cpu_id();
741 	apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
742 	apix_impl_t *apixp = apixs[cpuid];
743 
744 	UNREFERENCED_1PARAMETER(arg2);
745 
746 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
747 	/* ISR above current pri could not be in progress */
748 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
749 
750 	if (apixp->x_obsoletes != NULL) {
751 		if (APIX_CPU_LOCK_HELD(cpuid))
752 			return;
753 
754 		APIX_ENTER_CPU_LOCK(cpuid);
755 		(void) apix_obsolete_vector(apixp->x_obsoletes);
756 		APIX_LEAVE_CPU_LOCK(cpuid);
757 	}
758 }
759 
760 /*
761  * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
762  * given ipl, but apix never uses the TPR and we never mask a subset of the
763  * interrupts. They are either all blocked by the IF flag or all can come in.
764  *
765  * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
766  * can come in if currently enabled by the IF flag. This table shows the state
767  * of the IF flag when we leave this function.
768  *
769  *    curr IF |	ipl == 15	ipl != 15
770  *    --------+---------------------------
771  *       0    |    0		    0
772  *       1    |    0		    1
773  */
774 static void
775 apix_setspl(int ipl)
776 {
777 	/*
778 	 * Interrupts at ipl above this cannot be in progress, so the following
779 	 * mask is ok.
780 	 */
781 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
782 
783 	if (ipl == XC_HI_PIL)
784 		cli();
785 }
786 
787 int
788 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
789 {
790 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
791 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
792 	apix_vector_t *vecp = xv_vector(cpuid, vector);
793 
794 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
795 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
796 
797 	if (vecp->v_type == APIX_TYPE_FIXED)
798 		apix_intx_set_shared(vecp->v_inum, 1);
799 
800 	/* There are more interrupts, so it's already been enabled */
801 	if (vecp->v_share > 1)
802 		return (PSM_SUCCESS);
803 
804 	/* return if it is not hardware interrupt */
805 	if (vecp->v_type == APIX_TYPE_IPI)
806 		return (PSM_SUCCESS);
807 
808 	/*
809 	 * if apix_picinit() has not been called yet, just return.
810 	 * At the end of apic_picinit(), we will call setup_io_intr().
811 	 */
812 	if (!apic_picinit_called)
813 		return (PSM_SUCCESS);
814 
815 	(void) apix_setup_io_intr(vecp);
816 
817 	return (PSM_SUCCESS);
818 }
819 
820 int
821 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
822 {
823 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
824 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
825 	apix_vector_t *vecp = xv_vector(cpuid, vector);
826 
827 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
828 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
829 
830 	if (vecp->v_type == APIX_TYPE_FIXED)
831 		apix_intx_set_shared(vecp->v_inum, -1);
832 
833 	/* There are more interrupts */
834 	if (vecp->v_share > 1)
835 		return (PSM_SUCCESS);
836 
837 	/* return if it is not hardware interrupt */
838 	if (vecp->v_type == APIX_TYPE_IPI)
839 		return (PSM_SUCCESS);
840 
841 	if (!apic_picinit_called) {
842 		cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
843 		    virtvec);
844 		return (PSM_SUCCESS);
845 	}
846 
847 	apix_disable_vector(vecp);
848 
849 	return (PSM_SUCCESS);
850 }
851 
852 /*
853  * Try and disable all interrupts. We just assign interrupts to other
854  * processors based on policy. If any were bound by user request, we
855  * let them continue and return failure. We do not bother to check
856  * for cache affinity while rebinding.
857  */
858 static int
859 apix_disable_intr(processorid_t cpun)
860 {
861 	apix_impl_t *apixp = apixs[cpun];
862 	apix_vector_t *vecp, *newp;
863 	int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
864 
865 	lock_set(&apix_lock);
866 
867 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
868 	apic_cpus[cpun].aci_curipl = 0;
869 
870 	/* if this is for SUSPEND operation, skip rebinding */
871 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
872 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
873 			vecp = apixp->x_vectbl[i];
874 			if (!IS_VECT_ENABLED(vecp))
875 				continue;
876 
877 			apix_disable_vector(vecp);
878 		}
879 		lock_clear(&apix_lock);
880 		return (PSM_SUCCESS);
881 	}
882 
883 	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
884 		vecp = apixp->x_vectbl[i];
885 		if (!IS_VECT_ENABLED(vecp))
886 			continue;
887 
888 		if (vecp->v_flags & APIX_VECT_USER_BOUND) {
889 			hardbound++;
890 			continue;
891 		}
892 		type = vecp->v_type;
893 
894 		/*
895 		 * If there are bound interrupts on this cpu, then
896 		 * rebind them to other processors.
897 		 */
898 		loop = 0;
899 		do {
900 			bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
901 
902 			if (type != APIX_TYPE_MSI)
903 				newp = apix_set_cpu(vecp, bindcpu, &ret);
904 			else
905 				newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
906 		} while ((newp == NULL) && (loop++ < apic_nproc));
907 
908 		if (loop >= apic_nproc) {
909 			errbound++;
910 			cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
911 			    vecp->v_cpuid, vecp->v_vector);
912 		}
913 	}
914 
915 	lock_clear(&apix_lock);
916 
917 	if (hardbound || errbound) {
918 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
919 		    "due to user bound interrupts or failed operation",
920 		    cpun);
921 		return (PSM_FAILURE);
922 	}
923 
924 	return (PSM_SUCCESS);
925 }
926 
927 /*
928  * Bind interrupts to specified CPU
929  */
930 static void
931 apix_enable_intr(processorid_t cpun)
932 {
933 	apix_vector_t *vecp;
934 	int i, ret;
935 	processorid_t n;
936 
937 	lock_set(&apix_lock);
938 
939 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
940 
941 	/* interrupt enabling for system resume */
942 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
943 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
944 			vecp = xv_vector(cpun, i);
945 			if (!IS_VECT_ENABLED(vecp))
946 				continue;
947 
948 			apix_enable_vector(vecp);
949 		}
950 		apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
951 	}
952 
953 	for (n = 0; n < apic_nproc; n++) {
954 		if (!apic_cpu_in_range(n) || n == cpun ||
955 		    (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
956 			continue;
957 
958 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
959 			vecp = xv_vector(n, i);
960 			if (!IS_VECT_ENABLED(vecp) ||
961 			    vecp->v_bound_cpuid != cpun)
962 				continue;
963 
964 			if (vecp->v_type != APIX_TYPE_MSI)
965 				(void) apix_set_cpu(vecp, cpun, &ret);
966 			else
967 				(void) apix_grp_set_cpu(vecp, cpun, &ret);
968 		}
969 	}
970 
971 	lock_clear(&apix_lock);
972 }
973 
974 /*
975  * Allocate vector for IPI
976  * type == -1 indicates it is an internal request. Do not change
977  * resv_vector for these requests.
978  */
979 static int
980 apix_get_ipivect(int ipl, int type)
981 {
982 	uchar_t vector;
983 
984 	if ((vector = apix_alloc_ipi(ipl)) > 0) {
985 		if (type != -1)
986 			apic_resv_vector[ipl] = vector;
987 		return (vector);
988 	}
989 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
990 	return (-1);	/* shouldn't happen */
991 }
992 
993 static int
994 apix_get_clkvect(int ipl)
995 {
996 	int vector;
997 
998 	if ((vector = apix_get_ipivect(ipl, -1)) == -1)
999 		return (-1);
1000 
1001 	apic_clkvect = vector - APIC_BASE_VECT;
1002 	APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1003 	    apic_clkvect));
1004 	return (vector);
1005 }
1006 
1007 static int
1008 apix_post_cpu_start()
1009 {
1010 	int cpun;
1011 	static int cpus_started = 1;
1012 
1013 	/* We know this CPU + BSP  started successfully. */
1014 	cpus_started++;
1015 
1016 	/*
1017 	 * On BSP we would have enabled X2APIC, if supported by processor,
1018 	 * in acpi_probe(), but on AP we do it here.
1019 	 *
1020 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1021 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1022 	 */
1023 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1024 	    apic_local_mode() == LOCAL_APIC) {
1025 		apic_enable_x2apic();
1026 	}
1027 
1028 	/*
1029 	 * Switch back to x2apic IPI sending method for performance when target
1030 	 * CPU has entered x2apic mode.
1031 	 */
1032 	if (apic_mode == LOCAL_X2APIC) {
1033 		apic_switch_ipi_callback(B_FALSE);
1034 	}
1035 
1036 	splx(ipltospl(LOCK_LEVEL));
1037 	apix_init_intr();
1038 
1039 	/*
1040 	 * since some systems don't enable the internal cache on the non-boot
1041 	 * cpus, so we have to enable them here
1042 	 */
1043 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1044 
1045 #ifdef	DEBUG
1046 	APIC_AV_PENDING_SET();
1047 #else
1048 	if (apic_mode == LOCAL_APIC)
1049 		APIC_AV_PENDING_SET();
1050 #endif	/* DEBUG */
1051 
1052 	/*
1053 	 * We may be booting, or resuming from suspend; aci_status will
1054 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1055 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1056 	 */
1057 	cpun = psm_get_cpu_id();
1058 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1059 
1060 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1061 
1062 	return (PSM_SUCCESS);
1063 }
1064 
1065 /*
1066  * If this module needs a periodic handler for the interrupt distribution, it
1067  * can be added here. The argument to the periodic handler is not currently
1068  * used, but is reserved for future.
1069  */
1070 static void
1071 apix_post_cyclic_setup(void *arg)
1072 {
1073 	UNREFERENCED_1PARAMETER(arg);
1074 
1075 	cyc_handler_t cyh;
1076 	cyc_time_t cyt;
1077 
1078 	/* cpu_lock is held */
1079 	/* set up a periodic handler for intr redistribution */
1080 
1081 	/*
1082 	 * In peridoc mode intr redistribution processing is done in
1083 	 * apic_intr_enter during clk intr processing
1084 	 */
1085 	if (!apic_oneshot)
1086 		return;
1087 
1088 	/*
1089 	 * Register a periodical handler for the redistribution processing.
1090 	 * Though we would generally prefer to use the DDI interface for
1091 	 * periodic handler invocation, ddi_periodic_add(9F), we are
1092 	 * unfortunately already holding cpu_lock, which ddi_periodic_add will
1093 	 * attempt to take for us.  Thus, we add our own cyclic directly:
1094 	 */
1095 	cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1096 	cyh.cyh_arg = NULL;
1097 	cyh.cyh_level = CY_LOW_LEVEL;
1098 
1099 	cyt.cyt_when = 0;
1100 	cyt.cyt_interval = apic_redistribute_sample_interval;
1101 
1102 	apic_cyclic_id = cyclic_add(&cyh, &cyt);
1103 }
1104 
1105 /*
1106  * Called the first time we enable x2apic mode on this cpu.
1107  * Update some of the function pointers to use x2apic routines.
1108  */
1109 void
1110 x2apic_update_psm()
1111 {
1112 	struct psm_ops *pops = &apix_ops;
1113 
1114 	ASSERT(pops != NULL);
1115 
1116 	/*
1117 	 * The pcplusmp module's version of x2apic_update_psm makes additional
1118 	 * changes that we do not have to make here. It needs to make those
1119 	 * changes because pcplusmp relies on the TPR register and the means of
1120 	 * addressing that changes when using the local apic versus the x2apic.
1121 	 * It's also worth noting that the apix driver specific function end up
1122 	 * being apix_foo as opposed to apic_foo and x2apic_foo.
1123 	 */
1124 	pops->psm_send_ipi = x2apic_send_ipi;
1125 	send_dirintf = pops->psm_send_ipi;
1126 
1127 	pops->psm_send_pir_ipi = x2apic_send_pir_ipi;
1128 	psm_send_pir_ipi = pops->psm_send_pir_ipi;
1129 
1130 	apic_mode = LOCAL_X2APIC;
1131 	apic_change_ops();
1132 }
1133 
1134 /*
1135  * This function provides external interface to the nexus for all
1136  * functionalities related to the new DDI interrupt framework.
1137  *
1138  * Input:
1139  * dip     - pointer to the dev_info structure of the requested device
1140  * hdlp    - pointer to the internal interrupt handle structure for the
1141  *	     requested interrupt
1142  * intr_op - opcode for this call
1143  * result  - pointer to the integer that will hold the result to be
1144  *	     passed back if return value is PSM_SUCCESS
1145  *
1146  * Output:
1147  * return value is either PSM_SUCCESS or PSM_FAILURE
1148  */
1149 static int
1150 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1151     psm_intr_op_t intr_op, int *result)
1152 {
1153 	int		cap;
1154 	apix_vector_t	*vecp, *newvecp;
1155 	struct intrspec *ispec, intr_spec;
1156 	processorid_t target;
1157 
1158 	ispec = &intr_spec;
1159 	ispec->intrspec_pri = hdlp->ih_pri;
1160 	ispec->intrspec_vec = hdlp->ih_inum;
1161 	ispec->intrspec_func = hdlp->ih_cb_func;
1162 
1163 	switch (intr_op) {
1164 	case PSM_INTR_OP_ALLOC_VECTORS:
1165 		switch (hdlp->ih_type) {
1166 		case DDI_INTR_TYPE_MSI:
1167 			/* allocate MSI vectors */
1168 			*result = apix_alloc_msi(dip, hdlp->ih_inum,
1169 			    hdlp->ih_scratch1,
1170 			    (int)(uintptr_t)hdlp->ih_scratch2);
1171 			break;
1172 		case DDI_INTR_TYPE_MSIX:
1173 			/* allocate MSI-X vectors */
1174 			*result = apix_alloc_msix(dip, hdlp->ih_inum,
1175 			    hdlp->ih_scratch1,
1176 			    (int)(uintptr_t)hdlp->ih_scratch2);
1177 			break;
1178 		case DDI_INTR_TYPE_FIXED:
1179 			/* allocate or share vector for fixed */
1180 			if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1181 				return (PSM_FAILURE);
1182 			}
1183 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1184 			*result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1185 			    ispec);
1186 			break;
1187 		default:
1188 			return (PSM_FAILURE);
1189 		}
1190 		break;
1191 	case PSM_INTR_OP_FREE_VECTORS:
1192 		apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1193 		    hdlp->ih_type);
1194 		break;
1195 	case PSM_INTR_OP_XLATE_VECTOR:
1196 		/*
1197 		 * Vectors are allocated by ALLOC and freed by FREE.
1198 		 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1199 		 */
1200 		*result = APIX_INVALID_VECT;
1201 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1202 		if (vecp != NULL) {
1203 			*result = APIX_VIRTVECTOR(vecp->v_cpuid,
1204 			    vecp->v_vector);
1205 			break;
1206 		}
1207 
1208 		/*
1209 		 * No vector to device mapping exists. If this is FIXED type
1210 		 * then check if this IRQ is already mapped for another device
1211 		 * then return the vector number for it (i.e. shared IRQ case).
1212 		 * Otherwise, return PSM_FAILURE.
1213 		 */
1214 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1215 			vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1216 			    ispec);
1217 			*result = (vecp == NULL) ? APIX_INVALID_VECT :
1218 			    APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1219 		}
1220 		if (*result == APIX_INVALID_VECT)
1221 			return (PSM_FAILURE);
1222 		break;
1223 	case PSM_INTR_OP_GET_PENDING:
1224 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1225 		if (vecp == NULL)
1226 			return (PSM_FAILURE);
1227 
1228 		*result = apix_get_pending(vecp);
1229 		break;
1230 	case PSM_INTR_OP_CLEAR_MASK:
1231 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1232 			return (PSM_FAILURE);
1233 
1234 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1235 		if (vecp == NULL)
1236 			return (PSM_FAILURE);
1237 
1238 		apix_intx_clear_mask(vecp->v_inum);
1239 		break;
1240 	case PSM_INTR_OP_SET_MASK:
1241 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1242 			return (PSM_FAILURE);
1243 
1244 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1245 		if (vecp == NULL)
1246 			return (PSM_FAILURE);
1247 
1248 		apix_intx_set_mask(vecp->v_inum);
1249 		break;
1250 	case PSM_INTR_OP_GET_SHARED:
1251 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1252 			return (PSM_FAILURE);
1253 
1254 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1255 		if (vecp == NULL)
1256 			return (PSM_FAILURE);
1257 
1258 		*result = apix_intx_get_shared(vecp->v_inum);
1259 		break;
1260 	case PSM_INTR_OP_SET_PRI:
1261 		/*
1262 		 * Called prior to adding the interrupt handler or when
1263 		 * an interrupt handler is unassigned.
1264 		 */
1265 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1266 			return (PSM_SUCCESS);
1267 
1268 		if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1269 			return (PSM_FAILURE);
1270 
1271 		break;
1272 	case PSM_INTR_OP_SET_CPU:
1273 	case PSM_INTR_OP_GRP_SET_CPU:
1274 		/*
1275 		 * The interrupt handle given here has been allocated
1276 		 * specifically for this command, and ih_private carries
1277 		 * a CPU value.
1278 		 */
1279 		*result = EINVAL;
1280 		target = (int)(intptr_t)hdlp->ih_private;
1281 		if (!apic_cpu_in_range(target)) {
1282 			DDI_INTR_IMPLDBG((CE_WARN,
1283 			    "[grp_]set_cpu: cpu out of range: %d\n", target));
1284 			return (PSM_FAILURE);
1285 		}
1286 
1287 		lock_set(&apix_lock);
1288 
1289 		vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1290 		if (!IS_VECT_ENABLED(vecp)) {
1291 			DDI_INTR_IMPLDBG((CE_WARN,
1292 			    "[grp]_set_cpu: invalid vector 0x%x\n",
1293 			    hdlp->ih_vector));
1294 			lock_clear(&apix_lock);
1295 			return (PSM_FAILURE);
1296 		}
1297 
1298 		*result = 0;
1299 
1300 		if (intr_op == PSM_INTR_OP_SET_CPU)
1301 			newvecp = apix_set_cpu(vecp, target, result);
1302 		else
1303 			newvecp = apix_grp_set_cpu(vecp, target, result);
1304 
1305 		lock_clear(&apix_lock);
1306 
1307 		if (newvecp == NULL) {
1308 			*result = EIO;
1309 			return (PSM_FAILURE);
1310 		}
1311 		newvecp->v_bound_cpuid = target;
1312 		hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1313 		    newvecp->v_vector);
1314 		break;
1315 
1316 	case PSM_INTR_OP_GET_INTR:
1317 		/*
1318 		 * The interrupt handle given here has been allocated
1319 		 * specifically for this command, and ih_private carries
1320 		 * a pointer to a apic_get_intr_t.
1321 		 */
1322 		if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1323 			return (PSM_FAILURE);
1324 		break;
1325 
1326 	case PSM_INTR_OP_CHECK_MSI:
1327 		/*
1328 		 * Check MSI/X is supported or not at APIC level and
1329 		 * masked off the MSI/X bits in hdlp->ih_type if not
1330 		 * supported before return.  If MSI/X is supported,
1331 		 * leave the ih_type unchanged and return.
1332 		 *
1333 		 * hdlp->ih_type passed in from the nexus has all the
1334 		 * interrupt types supported by the device.
1335 		 */
1336 		if (apic_support_msi == 0) {	/* uninitialized */
1337 			/*
1338 			 * if apic_support_msi is not set, call
1339 			 * apic_check_msi_support() to check whether msi
1340 			 * is supported first
1341 			 */
1342 			if (apic_check_msi_support() == PSM_SUCCESS)
1343 				apic_support_msi = 1;	/* supported */
1344 			else
1345 				apic_support_msi = -1;	/* not-supported */
1346 		}
1347 		if (apic_support_msi == 1) {
1348 			if (apic_msix_enable)
1349 				*result = hdlp->ih_type;
1350 			else
1351 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1352 		} else
1353 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1354 			    DDI_INTR_TYPE_MSIX);
1355 		break;
1356 	case PSM_INTR_OP_GET_CAP:
1357 		cap = DDI_INTR_FLAG_PENDING;
1358 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1359 			cap |= DDI_INTR_FLAG_MASKABLE;
1360 		*result = cap;
1361 		break;
1362 	case PSM_INTR_OP_APIC_TYPE:
1363 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1364 		    apix_get_apic_type();
1365 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1366 		    APIX_IPI_MIN;
1367 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1368 		    apic_nproc;
1369 		hdlp->ih_ver = apic_get_apic_version();
1370 		break;
1371 	case PSM_INTR_OP_SET_CAP:
1372 	default:
1373 		return (PSM_FAILURE);
1374 	}
1375 
1376 	return (PSM_SUCCESS);
1377 }
1378 
1379 static void
1380 apix_cleanup_busy(void)
1381 {
1382 	int i, j;
1383 	apix_vector_t *vecp;
1384 
1385 	for (i = 0; i < apic_nproc; i++) {
1386 		if (!apic_cpu_in_range(i))
1387 			continue;
1388 		apic_cpus[i].aci_busy = 0;
1389 		for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1390 			if ((vecp = xv_vector(i, j)) != NULL)
1391 				vecp->v_busy = 0;
1392 		}
1393 	}
1394 }
1395 
1396 static void
1397 apix_redistribute_compute(void)
1398 {
1399 	int	i, j, max_busy;
1400 
1401 	if (!apic_enable_dynamic_migration)
1402 		return;
1403 
1404 	if (++apic_nticks == apic_sample_factor_redistribution) {
1405 		/*
1406 		 * Time to call apic_intr_redistribute().
1407 		 * reset apic_nticks. This will cause max_busy
1408 		 * to be calculated below and if it is more than
1409 		 * apic_int_busy, we will do the whole thing
1410 		 */
1411 		apic_nticks = 0;
1412 	}
1413 	max_busy = 0;
1414 	for (i = 0; i < apic_nproc; i++) {
1415 		if (!apic_cpu_in_range(i))
1416 			continue;
1417 		/*
1418 		 * Check if curipl is non zero & if ISR is in
1419 		 * progress
1420 		 */
1421 		if (((j = apic_cpus[i].aci_curipl) != 0) &&
1422 		    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1423 
1424 			int	vect;
1425 			apic_cpus[i].aci_busy++;
1426 			vect = apic_cpus[i].aci_current[j];
1427 			apixs[i]->x_vectbl[vect]->v_busy++;
1428 		}
1429 
1430 		if (!apic_nticks &&
1431 		    (apic_cpus[i].aci_busy > max_busy))
1432 			max_busy = apic_cpus[i].aci_busy;
1433 	}
1434 	if (!apic_nticks) {
1435 		if (max_busy > apic_int_busy_mark) {
1436 		/*
1437 		 * We could make the following check be
1438 		 * skipped > 1 in which case, we get a
1439 		 * redistribution at half the busy mark (due to
1440 		 * double interval). Need to be able to collect
1441 		 * more empirical data to decide if that is a
1442 		 * good strategy. Punt for now.
1443 		 */
1444 			apix_cleanup_busy();
1445 			apic_skipped_redistribute = 0;
1446 		} else
1447 			apic_skipped_redistribute++;
1448 	}
1449 }
1450 
1451 /*
1452  * intr_ops() service routines
1453  */
1454 
1455 static int
1456 apix_get_pending(apix_vector_t *vecp)
1457 {
1458 	int bit, index, irr, pending;
1459 
1460 	/* need to get on the bound cpu */
1461 	mutex_enter(&cpu_lock);
1462 	affinity_set(vecp->v_cpuid);
1463 
1464 	index = vecp->v_vector / 32;
1465 	bit = vecp->v_vector % 32;
1466 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1467 
1468 	affinity_clear();
1469 	mutex_exit(&cpu_lock);
1470 
1471 	pending = (irr & (1 << bit)) ? 1 : 0;
1472 	if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1473 		pending = apix_intx_get_pending(vecp->v_inum);
1474 
1475 	return (pending);
1476 }
1477 
1478 static apix_vector_t *
1479 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1480 {
1481 	apix_vector_t *vecp;
1482 	processorid_t cpuid;
1483 	int32_t virt_vec = 0;
1484 
1485 	switch (flags & PSMGI_INTRBY_FLAGS) {
1486 	case PSMGI_INTRBY_IRQ:
1487 		return (apix_intx_get_vector(hdlp->ih_vector));
1488 	case PSMGI_INTRBY_VEC:
1489 		virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1490 
1491 		cpuid = APIX_VIRTVEC_CPU(virt_vec);
1492 		if (!apic_cpu_in_range(cpuid))
1493 			return (NULL);
1494 
1495 		vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1496 		break;
1497 	case PSMGI_INTRBY_DEFAULT:
1498 		vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1499 		    hdlp->ih_type);
1500 		break;
1501 	default:
1502 		return (NULL);
1503 	}
1504 
1505 	return (vecp);
1506 }
1507 
1508 static int
1509 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1510     apic_get_intr_t *intr_params_p)
1511 {
1512 	apix_vector_t *vecp;
1513 	struct autovec *av_dev;
1514 	int i;
1515 
1516 	vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1517 	if (IS_VECT_FREE(vecp)) {
1518 		intr_params_p->avgi_num_devs = 0;
1519 		intr_params_p->avgi_cpu_id = 0;
1520 		intr_params_p->avgi_req_flags = 0;
1521 		return (PSM_SUCCESS);
1522 	}
1523 
1524 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1525 		intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1526 
1527 		/* Return user bound info for intrd. */
1528 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1529 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1530 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1531 		}
1532 	}
1533 
1534 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1535 		intr_params_p->avgi_vector = vecp->v_vector;
1536 
1537 	if (intr_params_p->avgi_req_flags &
1538 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1539 		/* Get number of devices from apic_irq table shared field. */
1540 		intr_params_p->avgi_num_devs = vecp->v_share;
1541 
1542 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
1543 
1544 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
1545 
1546 		/* Some devices have NULL dip.  Don't count these. */
1547 		if (intr_params_p->avgi_num_devs > 0) {
1548 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1549 			    av_dev = av_dev->av_link) {
1550 				if (av_dev->av_vector && av_dev->av_dip)
1551 					i++;
1552 			}
1553 			intr_params_p->avgi_num_devs =
1554 			    (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1555 		}
1556 
1557 		/* There are no viable dips to return. */
1558 		if (intr_params_p->avgi_num_devs == 0) {
1559 			intr_params_p->avgi_dip_list = NULL;
1560 
1561 		} else {	/* Return list of dips */
1562 
1563 			/* Allocate space in array for that number of devs. */
1564 			intr_params_p->avgi_dip_list = kmem_zalloc(
1565 			    intr_params_p->avgi_num_devs *
1566 			    sizeof (dev_info_t *),
1567 			    KM_NOSLEEP);
1568 			if (intr_params_p->avgi_dip_list == NULL) {
1569 				DDI_INTR_IMPLDBG((CE_WARN,
1570 				    "apix_get_vector_intr_info: no memory"));
1571 				return (PSM_FAILURE);
1572 			}
1573 
1574 			/*
1575 			 * Loop through the device list of the autovec table
1576 			 * filling in the dip array.
1577 			 *
1578 			 * Note that the autovect table may have some special
1579 			 * entries which contain NULL dips.  These will be
1580 			 * ignored.
1581 			 */
1582 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1583 			    av_dev = av_dev->av_link) {
1584 				if (av_dev->av_vector && av_dev->av_dip)
1585 					intr_params_p->avgi_dip_list[i++] =
1586 					    av_dev->av_dip;
1587 			}
1588 		}
1589 	}
1590 
1591 	return (PSM_SUCCESS);
1592 }
1593 
1594 static char *
1595 apix_get_apic_type(void)
1596 {
1597 	return (apix_psm_info.p_mach_idstring);
1598 }
1599 
1600 apix_vector_t *
1601 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1602 {
1603 	apix_vector_t *newp = NULL;
1604 	dev_info_t *dip;
1605 	int inum, cap_ptr;
1606 	ddi_acc_handle_t handle;
1607 	ddi_intr_msix_t *msix_p = NULL;
1608 	ushort_t msix_ctrl;
1609 	uintptr_t off = 0;
1610 	uint32_t mask = 0;
1611 
1612 	ASSERT(LOCK_HELD(&apix_lock));
1613 	*result = ENXIO;
1614 
1615 	/* Fail if this is an MSI intr and is part of a group. */
1616 	if (vecp->v_type == APIX_TYPE_MSI) {
1617 		if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1618 			return (NULL);
1619 		else
1620 			return (apix_grp_set_cpu(vecp, new_cpu, result));
1621 	}
1622 
1623 	/*
1624 	 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1625 	 */
1626 	if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1627 		if ((dip = APIX_GET_DIP(vecp)) == NULL)
1628 			return (NULL);
1629 		inum = vecp->v_devp->dv_inum;
1630 
1631 		handle = i_ddi_get_pci_config_handle(dip);
1632 		cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1633 		msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1634 		if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1635 			/*
1636 			 * Function is not masked, then mask "inum"th
1637 			 * entry in the MSI-X table
1638 			 */
1639 			msix_p = i_ddi_get_msix(dip);
1640 			off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1641 			    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1642 			mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1643 			ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1644 			    mask | 1);
1645 		}
1646 	}
1647 
1648 	*result = 0;
1649 	if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1650 		*result = EIO;
1651 
1652 	/* Restore mask bit */
1653 	if (msix_p != NULL)
1654 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1655 
1656 	return (newp);
1657 }
1658 
1659 /*
1660  * Set cpu for MSIs
1661  */
1662 apix_vector_t *
1663 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1664 {
1665 	apix_vector_t *newp, *vp;
1666 	uint32_t orig_cpu = vecp->v_cpuid;
1667 	int orig_vect = vecp->v_vector;
1668 	int i, num_vectors, cap_ptr, msi_mask_off = 0;
1669 	uint32_t msi_pvm = 0;
1670 	ushort_t msi_ctrl;
1671 	ddi_acc_handle_t handle;
1672 	dev_info_t *dip;
1673 
1674 	APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1675 	    " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1676 
1677 	ASSERT(LOCK_HELD(&apix_lock));
1678 
1679 	*result = ENXIO;
1680 
1681 	if (vecp->v_type != APIX_TYPE_MSI) {
1682 		DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1683 		return (NULL);
1684 	}
1685 
1686 	if ((dip = APIX_GET_DIP(vecp)) == NULL)
1687 		return (NULL);
1688 
1689 	num_vectors = i_ddi_intr_get_current_nintrs(dip);
1690 	if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1691 		APIC_VERBOSE(INTR, (CE_WARN,
1692 		    "set_grp: base vec not part of a grp or not aligned: "
1693 		    "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1694 		return (NULL);
1695 	}
1696 
1697 	if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1698 		return (NULL);
1699 
1700 	*result = EIO;
1701 	for (i = 1; i < num_vectors; i++) {
1702 		if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1703 			return (NULL);
1704 #ifdef DEBUG
1705 		/*
1706 		 * Sanity check: CPU and dip is the same for all entries.
1707 		 * May be called when first msi to be enabled, at this time
1708 		 * add_avintr() is not called for other msi
1709 		 */
1710 		if ((vp->v_share != 0) &&
1711 		    ((APIX_GET_DIP(vp) != dip) ||
1712 		    (vp->v_cpuid != vecp->v_cpuid))) {
1713 			APIC_VERBOSE(INTR, (CE_WARN,
1714 			    "set_grp: cpu or dip for vec 0x%x difft than for "
1715 			    "vec 0x%x\n", orig_vect, orig_vect + i));
1716 			APIC_VERBOSE(INTR, (CE_WARN,
1717 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1718 			    vp->v_cpuid, (void *)dip,
1719 			    (void *)APIX_GET_DIP(vp)));
1720 			return (NULL);
1721 		}
1722 #endif /* DEBUG */
1723 	}
1724 
1725 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1726 	handle = i_ddi_get_pci_config_handle(dip);
1727 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1728 
1729 	/* MSI Per vector masking is supported. */
1730 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1731 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
1732 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1733 		else
1734 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1735 		msi_pvm = pci_config_get32(handle, msi_mask_off);
1736 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1737 		APIC_VERBOSE(INTR, (CE_CONT,
1738 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
1739 		    pci_config_get32(handle, msi_mask_off)));
1740 	}
1741 
1742 	if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1743 		*result = 0;
1744 
1745 	/* Reenable vectors if per vector masking is supported. */
1746 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1747 		pci_config_put32(handle, msi_mask_off, msi_pvm);
1748 		APIC_VERBOSE(INTR, (CE_CONT,
1749 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
1750 		    pci_config_get32(handle, msi_mask_off)));
1751 	}
1752 
1753 	return (newp);
1754 }
1755 
1756 void
1757 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1758 {
1759 	apic_irq_t *irqp;
1760 
1761 	mutex_enter(&airq_mutex);
1762 	irqp = apic_irq_table[irqno];
1763 	irqp->airq_cpu = cpuid;
1764 	irqp->airq_vector = vector;
1765 	apic_record_rdt_entry(irqp, irqno);
1766 	mutex_exit(&airq_mutex);
1767 }
1768 
1769 apix_vector_t *
1770 apix_intx_get_vector(int irqno)
1771 {
1772 	apic_irq_t *irqp;
1773 	uint32_t cpuid;
1774 	uchar_t vector;
1775 
1776 	mutex_enter(&airq_mutex);
1777 	irqp = apic_irq_table[irqno & 0xff];
1778 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1779 		mutex_exit(&airq_mutex);
1780 		return (NULL);
1781 	}
1782 	cpuid = irqp->airq_cpu;
1783 	vector = irqp->airq_vector;
1784 	mutex_exit(&airq_mutex);
1785 
1786 	return (xv_vector(cpuid, vector));
1787 }
1788 
1789 /*
1790  * Must called with interrupts disabled and apic_ioapic_lock held
1791  */
1792 void
1793 apix_intx_enable(int irqno)
1794 {
1795 	uchar_t ioapicindex, intin;
1796 	apic_irq_t *irqp = apic_irq_table[irqno];
1797 	ioapic_rdt_t irdt;
1798 	apic_cpus_info_t *cpu_infop;
1799 	apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1800 
1801 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1802 
1803 	ioapicindex = irqp->airq_ioapicindex;
1804 	intin = irqp->airq_intin_no;
1805 	cpu_infop =  &apic_cpus[irqp->airq_cpu];
1806 
1807 	irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1808 	irdt.ir_hi = cpu_infop->aci_local_id;
1809 
1810 	apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1811 	    vecp->v_type, 1, ioapicindex);
1812 	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1813 	    (void *)&irdt, vecp->v_type, 1);
1814 	apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1815 
1816 	/* write RDT entry high dword - destination */
1817 	WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1818 	    irdt.ir_hi);
1819 
1820 	/* Write the vector, trigger, and polarity portion of the RDT */
1821 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1822 
1823 	vecp->v_state = APIX_STATE_ENABLED;
1824 
1825 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1826 	    " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1827 	    ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1828 }
1829 
1830 /*
1831  * Must called with interrupts disabled and apic_ioapic_lock held
1832  */
1833 void
1834 apix_intx_disable(int irqno)
1835 {
1836 	apic_irq_t *irqp = apic_irq_table[irqno];
1837 	int ioapicindex, intin;
1838 
1839 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1840 	/*
1841 	 * The assumption here is that this is safe, even for
1842 	 * systems with IOAPICs that suffer from the hardware
1843 	 * erratum because all devices have been quiesced before
1844 	 * they unregister their interrupt handlers.  If that
1845 	 * assumption turns out to be false, this mask operation
1846 	 * can induce the same erratum result we're trying to
1847 	 * avoid.
1848 	 */
1849 	ioapicindex = irqp->airq_ioapicindex;
1850 	intin = irqp->airq_intin_no;
1851 	ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1852 
1853 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1854 	    " intin 0x%x\n", ioapicindex, intin));
1855 }
1856 
1857 void
1858 apix_intx_free(int irqno)
1859 {
1860 	apic_irq_t *irqp;
1861 
1862 	mutex_enter(&airq_mutex);
1863 	irqp = apic_irq_table[irqno];
1864 
1865 	if (IS_IRQ_FREE(irqp)) {
1866 		mutex_exit(&airq_mutex);
1867 		return;
1868 	}
1869 
1870 	irqp->airq_mps_intr_index = FREE_INDEX;
1871 	irqp->airq_cpu = IRQ_UNINIT;
1872 	irqp->airq_vector = APIX_INVALID_VECT;
1873 	mutex_exit(&airq_mutex);
1874 }
1875 
1876 #ifdef DEBUG
1877 int apix_intr_deliver_timeouts = 0;
1878 int apix_intr_rirr_timeouts = 0;
1879 int apix_intr_rirr_reset_failure = 0;
1880 #endif
1881 int apix_max_reps_irr_pending = 10;
1882 
1883 #define	GET_RDT_BITS(ioapic, intin, bits)	\
1884 	(READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1885 #define	APIX_CHECK_IRR_DELAY	drv_usectohz(5000)
1886 
1887 int
1888 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1889 {
1890 	apic_irq_t *irqp = apic_irq_table[irqno];
1891 	ulong_t iflag;
1892 	int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1893 
1894 	ASSERT(irqp != NULL);
1895 
1896 	iflag = intr_clear();
1897 	lock_set(&apic_ioapic_lock);
1898 
1899 	ioapic_ix = irqp->airq_ioapicindex;
1900 	intin_no = irqp->airq_intin_no;
1901 	level = apic_level_intr[irqno];
1902 
1903 	/*
1904 	 * Wait for the delivery status bit to be cleared. This should
1905 	 * be a very small amount of time.
1906 	 */
1907 	repeats = 0;
1908 	do {
1909 		repeats++;
1910 
1911 		for (waited = 0; waited < apic_max_reps_clear_pending;
1912 		    waited++) {
1913 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1914 				break;
1915 		}
1916 		if (!level)
1917 			break;
1918 
1919 		/*
1920 		 * Mask the RDT entry for level-triggered interrupts.
1921 		 */
1922 		irqp->airq_rdt_entry |= AV_MASK;
1923 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1924 		    intin_no);
1925 		if ((masked = (rdt_entry & AV_MASK)) == 0) {
1926 			/* Mask it */
1927 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1928 			    AV_MASK | rdt_entry);
1929 		}
1930 
1931 		/*
1932 		 * If there was a race and an interrupt was injected
1933 		 * just before we masked, check for that case here.
1934 		 * Then, unmask the RDT entry and try again.  If we're
1935 		 * on our last try, don't unmask (because we want the
1936 		 * RDT entry to remain masked for the rest of the
1937 		 * function).
1938 		 */
1939 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1940 		    intin_no);
1941 		if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1942 		    (repeats < apic_max_reps_clear_pending)) {
1943 			/* Unmask it */
1944 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1945 			    intin_no, rdt_entry & ~AV_MASK);
1946 			irqp->airq_rdt_entry &= ~AV_MASK;
1947 		}
1948 	} while ((rdt_entry & AV_PENDING) &&
1949 	    (repeats < apic_max_reps_clear_pending));
1950 
1951 #ifdef DEBUG
1952 	if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1953 		apix_intr_deliver_timeouts++;
1954 #endif
1955 
1956 	if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1957 		goto done;
1958 
1959 	/*
1960 	 * wait for remote IRR to be cleared for level-triggered
1961 	 * interrupts
1962 	 */
1963 	repeats = 0;
1964 	do {
1965 		repeats++;
1966 
1967 		for (waited = 0; waited < apic_max_reps_clear_pending;
1968 		    waited++) {
1969 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1970 			    == 0)
1971 				break;
1972 		}
1973 
1974 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1975 			lock_clear(&apic_ioapic_lock);
1976 			intr_restore(iflag);
1977 
1978 			delay(APIX_CHECK_IRR_DELAY);
1979 
1980 			iflag = intr_clear();
1981 			lock_set(&apic_ioapic_lock);
1982 		}
1983 	} while (repeats < apix_max_reps_irr_pending);
1984 
1985 	if (repeats >= apix_max_reps_irr_pending) {
1986 #ifdef DEBUG
1987 		apix_intr_rirr_timeouts++;
1988 #endif
1989 
1990 		/*
1991 		 * If we waited and the Remote IRR bit is still not cleared,
1992 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1993 		 * times for this interrupt, try the last-ditch workaround:
1994 		 */
1995 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1996 			/*
1997 			 * Trying to clear the bit through normal
1998 			 * channels has failed.  So as a last-ditch
1999 			 * effort, try to set the trigger mode to
2000 			 * edge, then to level.  This has been
2001 			 * observed to work on many systems.
2002 			 */
2003 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2004 			    intin_no,
2005 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2006 			    intin_no) & ~AV_LEVEL);
2007 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2008 			    intin_no,
2009 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2010 			    intin_no) | AV_LEVEL);
2011 		}
2012 
2013 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2014 #ifdef DEBUG
2015 			apix_intr_rirr_reset_failure++;
2016 #endif
2017 			lock_clear(&apic_ioapic_lock);
2018 			intr_restore(iflag);
2019 			prom_printf("apix: Remote IRR still "
2020 			    "not clear for IOAPIC %d intin %d.\n"
2021 			    "\tInterrupts to this pin may cease "
2022 			    "functioning.\n", ioapic_ix, intin_no);
2023 			return (1);	/* return failure */
2024 		}
2025 	}
2026 
2027 done:
2028 	/* change apic_irq_table */
2029 	lock_clear(&apic_ioapic_lock);
2030 	intr_restore(iflag);
2031 	apix_intx_set_vector(irqno, cpuid, vector);
2032 	iflag = intr_clear();
2033 	lock_set(&apic_ioapic_lock);
2034 
2035 	/* reprogramme IO-APIC RDT entry */
2036 	apix_intx_enable(irqno);
2037 
2038 	lock_clear(&apic_ioapic_lock);
2039 	intr_restore(iflag);
2040 
2041 	return (0);
2042 }
2043 
2044 static int
2045 apix_intx_get_pending(int irqno)
2046 {
2047 	apic_irq_t *irqp;
2048 	int intin, ioapicindex, pending;
2049 	ulong_t iflag;
2050 
2051 	mutex_enter(&airq_mutex);
2052 	irqp = apic_irq_table[irqno];
2053 	if (IS_IRQ_FREE(irqp)) {
2054 		mutex_exit(&airq_mutex);
2055 		return (0);
2056 	}
2057 
2058 	/* check IO-APIC delivery status */
2059 	intin = irqp->airq_intin_no;
2060 	ioapicindex = irqp->airq_ioapicindex;
2061 	mutex_exit(&airq_mutex);
2062 
2063 	iflag = intr_clear();
2064 	lock_set(&apic_ioapic_lock);
2065 
2066 	pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2067 	    AV_PENDING) ? 1 : 0;
2068 
2069 	lock_clear(&apic_ioapic_lock);
2070 	intr_restore(iflag);
2071 
2072 	return (pending);
2073 }
2074 
2075 /*
2076  * This function will mask the interrupt on the I/O APIC
2077  */
2078 static void
2079 apix_intx_set_mask(int irqno)
2080 {
2081 	int intin, ioapixindex, rdt_entry;
2082 	ulong_t iflag;
2083 	apic_irq_t *irqp;
2084 
2085 	mutex_enter(&airq_mutex);
2086 	irqp = apic_irq_table[irqno];
2087 
2088 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2089 
2090 	intin = irqp->airq_intin_no;
2091 	ioapixindex = irqp->airq_ioapicindex;
2092 	mutex_exit(&airq_mutex);
2093 
2094 	iflag = intr_clear();
2095 	lock_set(&apic_ioapic_lock);
2096 
2097 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2098 
2099 	/* clear mask */
2100 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2101 	    (AV_MASK | rdt_entry));
2102 
2103 	lock_clear(&apic_ioapic_lock);
2104 	intr_restore(iflag);
2105 }
2106 
2107 /*
2108  * This function will clear the mask for the interrupt on the I/O APIC
2109  */
2110 static void
2111 apix_intx_clear_mask(int irqno)
2112 {
2113 	int intin, ioapixindex, rdt_entry;
2114 	ulong_t iflag;
2115 	apic_irq_t *irqp;
2116 
2117 	mutex_enter(&airq_mutex);
2118 	irqp = apic_irq_table[irqno];
2119 
2120 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2121 
2122 	intin = irqp->airq_intin_no;
2123 	ioapixindex = irqp->airq_ioapicindex;
2124 	mutex_exit(&airq_mutex);
2125 
2126 	iflag = intr_clear();
2127 	lock_set(&apic_ioapic_lock);
2128 
2129 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2130 
2131 	/* clear mask */
2132 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2133 	    ((~AV_MASK) & rdt_entry));
2134 
2135 	lock_clear(&apic_ioapic_lock);
2136 	intr_restore(iflag);
2137 }
2138 
2139 /*
2140  * For level-triggered interrupt, mask the IRQ line. Mask means
2141  * new interrupts will not be delivered. The interrupt already
2142  * accepted by a local APIC is not affected
2143  */
2144 void
2145 apix_level_intr_pre_eoi(int irq)
2146 {
2147 	apic_irq_t *irqp = apic_irq_table[irq];
2148 	int apic_ix, intin_ix;
2149 
2150 	if (irqp == NULL)
2151 		return;
2152 
2153 	ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2154 
2155 	lock_set(&apic_ioapic_lock);
2156 
2157 	intin_ix = irqp->airq_intin_no;
2158 	apic_ix = irqp->airq_ioapicindex;
2159 
2160 	if (irqp->airq_cpu != CPU->cpu_id) {
2161 		if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2162 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2163 		lock_clear(&apic_ioapic_lock);
2164 		return;
2165 	}
2166 
2167 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2168 		/*
2169 		 * This is a IOxAPIC and there is EOI register:
2170 		 *	Change the vector to reserved unused vector, so that
2171 		 *	the EOI	from Local APIC won't clear the Remote IRR for
2172 		 *	this level trigger interrupt. Instead, we'll manually
2173 		 *	clear it in apix_post_hardint() after ISR handling.
2174 		 */
2175 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2176 		    (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2177 	} else {
2178 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2179 		    AV_MASK | irqp->airq_rdt_entry);
2180 	}
2181 
2182 	lock_clear(&apic_ioapic_lock);
2183 }
2184 
2185 /*
2186  * For level-triggered interrupt, unmask the IRQ line
2187  * or restore the original vector number.
2188  */
2189 void
2190 apix_level_intr_post_dispatch(int irq)
2191 {
2192 	apic_irq_t *irqp = apic_irq_table[irq];
2193 	int apic_ix, intin_ix;
2194 
2195 	if (irqp == NULL)
2196 		return;
2197 
2198 	lock_set(&apic_ioapic_lock);
2199 
2200 	intin_ix = irqp->airq_intin_no;
2201 	apic_ix = irqp->airq_ioapicindex;
2202 
2203 	if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2204 		/*
2205 		 * Already sent EOI back to Local APIC.
2206 		 * Send EOI to IO-APIC
2207 		 */
2208 		ioapic_write_eoi(apic_ix, irqp->airq_vector);
2209 	} else {
2210 		/* clear the mask or restore the vector */
2211 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2212 		    irqp->airq_rdt_entry);
2213 
2214 		/* send EOI to IOxAPIC */
2215 		if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2216 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2217 	}
2218 
2219 	lock_clear(&apic_ioapic_lock);
2220 }
2221 
2222 static int
2223 apix_intx_get_shared(int irqno)
2224 {
2225 	apic_irq_t *irqp;
2226 	int share;
2227 
2228 	mutex_enter(&airq_mutex);
2229 	irqp = apic_irq_table[irqno];
2230 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2231 		mutex_exit(&airq_mutex);
2232 		return (0);
2233 	}
2234 	share = irqp->airq_share;
2235 	mutex_exit(&airq_mutex);
2236 
2237 	return (share);
2238 }
2239 
2240 static void
2241 apix_intx_set_shared(int irqno, int delta)
2242 {
2243 	apic_irq_t *irqp;
2244 
2245 	mutex_enter(&airq_mutex);
2246 	irqp = apic_irq_table[irqno];
2247 	if (IS_IRQ_FREE(irqp)) {
2248 		mutex_exit(&airq_mutex);
2249 		return;
2250 	}
2251 	irqp->airq_share += delta;
2252 	mutex_exit(&airq_mutex);
2253 }
2254 
2255 /*
2256  * Setup IRQ table. Return IRQ no or -1 on failure
2257  */
2258 static int
2259 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2260     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2261 {
2262 	int origirq = ispec->intrspec_vec;
2263 	int newirq;
2264 	short intr_index;
2265 	uchar_t ipin, ioapic, ioapicindex;
2266 	apic_irq_t *irqp;
2267 
2268 	UNREFERENCED_1PARAMETER(inum);
2269 
2270 	if (intrp != NULL) {
2271 		intr_index = (short)(intrp - apic_io_intrp);
2272 		ioapic = intrp->intr_destid;
2273 		ipin = intrp->intr_destintin;
2274 
2275 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
2276 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2277 			if (apic_io_id[ioapicindex] == ioapic)
2278 				break;
2279 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2280 		    (ioapic == INTR_ALL_APIC));
2281 
2282 		/* check whether this intin# has been used by another irqno */
2283 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2284 			return (newirq);
2285 
2286 	} else if (iflagp != NULL) {	/* ACPI */
2287 		intr_index = ACPI_INDEX;
2288 		ioapicindex = acpi_find_ioapic(irqno);
2289 		ASSERT(ioapicindex != 0xFF);
2290 		ioapic = apic_io_id[ioapicindex];
2291 		ipin = irqno - apic_io_vectbase[ioapicindex];
2292 
2293 		if (apic_irq_table[irqno] &&
2294 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2295 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2296 			    apic_irq_table[irqno]->airq_ioapicindex ==
2297 			    ioapicindex);
2298 			return (irqno);
2299 		}
2300 
2301 	} else {	/* default configuration */
2302 		intr_index = DEFAULT_INDEX;
2303 		ioapicindex = 0;
2304 		ioapic = apic_io_id[ioapicindex];
2305 		ipin = (uchar_t)irqno;
2306 	}
2307 
2308 	/* allocate a new IRQ no */
2309 	if ((irqp = apic_irq_table[irqno]) == NULL) {
2310 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2311 		apic_irq_table[irqno] = irqp;
2312 	} else {
2313 		if (irqp->airq_mps_intr_index != FREE_INDEX) {
2314 			newirq = apic_allocate_irq(apic_first_avail_irq);
2315 			if (newirq == -1) {
2316 				return (-1);
2317 			}
2318 			irqno = newirq;
2319 			irqp = apic_irq_table[irqno];
2320 			ASSERT(irqp != NULL);
2321 		}
2322 	}
2323 	apic_max_device_irq = max(irqno, apic_max_device_irq);
2324 	apic_min_device_irq = min(irqno, apic_min_device_irq);
2325 
2326 	irqp->airq_mps_intr_index = intr_index;
2327 	irqp->airq_ioapicindex = ioapicindex;
2328 	irqp->airq_intin_no = ipin;
2329 	irqp->airq_dip = dip;
2330 	irqp->airq_origirq = (uchar_t)origirq;
2331 	if (iflagp != NULL)
2332 		irqp->airq_iflag = *iflagp;
2333 	irqp->airq_cpu = IRQ_UNINIT;
2334 	irqp->airq_vector = 0;
2335 
2336 	return (irqno);
2337 }
2338 
2339 /*
2340  * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2341  */
2342 static int
2343 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2344     struct intrspec *ispec)
2345 {
2346 	int irqno = ispec->intrspec_vec;
2347 	int newirq, i;
2348 	iflag_t intr_flag;
2349 	ACPI_SUBTABLE_HEADER	*hp;
2350 	ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2351 	struct apic_io_intr *intrp;
2352 
2353 	if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2354 		int busid;
2355 
2356 		if (bustype == 0)
2357 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2358 
2359 		/* loop checking BUS_ISA/BUS_EISA */
2360 		for (i = 0; i < 2; i++) {
2361 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
2362 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2363 			    != NULL)) {
2364 				return (apix_intx_setup(dip, inum, irqno,
2365 				    intrp, ispec, NULL));
2366 			}
2367 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2368 		}
2369 
2370 		/* fall back to default configuration */
2371 		return (-1);
2372 	}
2373 
2374 	/* search iso entries first */
2375 	if (acpi_iso_cnt != 0) {
2376 		hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2377 		i = 0;
2378 		while (i < acpi_iso_cnt) {
2379 			if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2380 				isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2381 				if (isop->Bus == 0 &&
2382 				    isop->SourceIrq == irqno) {
2383 					newirq = isop->GlobalIrq;
2384 					intr_flag.intr_po = isop->IntiFlags &
2385 					    ACPI_MADT_POLARITY_MASK;
2386 					intr_flag.intr_el = (isop->IntiFlags &
2387 					    ACPI_MADT_TRIGGER_MASK) >> 2;
2388 					intr_flag.bustype = BUS_ISA;
2389 
2390 					return (apix_intx_setup(dip, inum,
2391 					    newirq, NULL, ispec, &intr_flag));
2392 				}
2393 				i++;
2394 			}
2395 			hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2396 			    hp->Length);
2397 		}
2398 	}
2399 	intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2400 	intr_flag.intr_el = INTR_EL_EDGE;
2401 	intr_flag.bustype = BUS_ISA;
2402 	return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2403 }
2404 
2405 
2406 /*
2407  * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2408  */
2409 static int
2410 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2411     struct intrspec *ispec)
2412 {
2413 	int busid, devid, pci_irq;
2414 	ddi_acc_handle_t cfg_handle;
2415 	uchar_t ipin;
2416 	iflag_t intr_flag;
2417 	struct apic_io_intr *intrp;
2418 
2419 	if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2420 		return (-1);
2421 
2422 	if (busid == 0 && apic_pci_bus_total == 1)
2423 		busid = (int)apic_single_pci_busid;
2424 
2425 	if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2426 		return (-1);
2427 	ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2428 	pci_config_teardown(&cfg_handle);
2429 
2430 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {	/* ACPI */
2431 		if (apic_acpi_translate_pci_irq(dip, busid, devid,
2432 		    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2433 			return (-1);
2434 
2435 		intr_flag.bustype = (uchar_t)bustype;
2436 		return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2437 		    &intr_flag));
2438 	}
2439 
2440 	/* MP configuration table */
2441 	pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2442 	if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2443 		pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2444 		if (pci_irq == -1)
2445 			return (-1);
2446 	}
2447 
2448 	return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2449 }
2450 
2451 /*
2452  * Translate and return IRQ no
2453  */
2454 static int
2455 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2456 {
2457 	int newirq, irqno = ispec->intrspec_vec;
2458 	int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2459 	int bustype = 0, dev_len;
2460 	char dev_type[16];
2461 
2462 	if (apic_defconf) {
2463 		mutex_enter(&airq_mutex);
2464 		goto defconf;
2465 	}
2466 
2467 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2468 		mutex_enter(&airq_mutex);
2469 		goto nonpci;
2470 	}
2471 
2472 	/*
2473 	 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2474 	 * to avoid extra buffer allocation.
2475 	 */
2476 	dev_len = sizeof (dev_type);
2477 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2478 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2479 	    &dev_len) == DDI_PROP_SUCCESS) {
2480 		if ((strcmp(dev_type, "pci") == 0) ||
2481 		    (strcmp(dev_type, "pciex") == 0))
2482 			parent_is_pci_or_pciex = 1;
2483 	}
2484 
2485 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2486 	    DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2487 	    &dev_len) == DDI_PROP_SUCCESS) {
2488 		if (strstr(dev_type, "pciex"))
2489 			child_is_pciex = 1;
2490 	}
2491 
2492 	mutex_enter(&airq_mutex);
2493 
2494 	if (parent_is_pci_or_pciex) {
2495 		bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2496 		newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2497 		if (newirq != -1)
2498 			goto done;
2499 		bustype = 0;
2500 	} else if (strcmp(dev_type, "isa") == 0)
2501 		bustype = BUS_ISA;
2502 	else if (strcmp(dev_type, "eisa") == 0)
2503 		bustype = BUS_EISA;
2504 
2505 nonpci:
2506 	newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2507 	if (newirq != -1)
2508 		goto done;
2509 
2510 defconf:
2511 	newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2512 	if (newirq == -1) {
2513 		mutex_exit(&airq_mutex);
2514 		return (-1);
2515 	}
2516 done:
2517 	ASSERT(apic_irq_table[newirq]);
2518 	mutex_exit(&airq_mutex);
2519 	return (newirq);
2520 }
2521 
2522 static int
2523 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2524 {
2525 	int irqno;
2526 	apix_vector_t *vecp;
2527 
2528 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2529 		return (0);
2530 
2531 	if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2532 		return (0);
2533 
2534 	DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2535 	    "irqno=0x%x cpuid=%d vector=0x%x\n",
2536 	    (void *)dip, ddi_driver_name(dip), irqno,
2537 	    vecp->v_cpuid, vecp->v_vector));
2538 
2539 	return (1);
2540 }
2541 
2542 /*
2543  * Return the vector number if the translated IRQ for this device
2544  * has a vector mapping setup. If no IRQ setup exists or no vector is
2545  * allocated to it then return 0.
2546  */
2547 static apix_vector_t *
2548 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2549 {
2550 	int irqno;
2551 	apix_vector_t *vecp;
2552 
2553 	/* get the IRQ number */
2554 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2555 		return (NULL);
2556 
2557 	/* get the vector number if a vector is allocated to this irqno */
2558 	vecp = apix_intx_get_vector(irqno);
2559 
2560 	return (vecp);
2561 }
2562 
2563 /*
2564  * Switch between safe and x2APIC IPI sending method.
2565  * The CPU may power on in xapic mode or x2apic mode. If the CPU needs to send
2566  * an IPI to other CPUs before entering x2APIC mode, it still needs to use the
2567  * xAPIC method. Before sending a StartIPI to the target CPU, psm_send_ipi will
2568  * be changed to apic_common_send_ipi, which detects current local APIC mode and
2569  * use the right method to send an IPI. If some CPUs fail to start up,
2570  * apic_poweron_cnt won't return to zero, so apic_common_send_ipi will always be
2571  * used. psm_send_ipi can't be simply changed back to x2apic_send_ipi if some
2572  * CPUs failed to start up because those failed CPUs may recover itself later at
2573  * unpredictable time.
2574  */
2575 void
2576 apic_switch_ipi_callback(boolean_t enter)
2577 {
2578 	ulong_t iflag;
2579 	struct psm_ops *pops = psmops;
2580 
2581 	iflag = intr_clear();
2582 	lock_set(&apic_mode_switch_lock);
2583 	if (enter) {
2584 		ASSERT(apic_poweron_cnt >= 0);
2585 		if (apic_poweron_cnt == 0) {
2586 			pops->psm_send_ipi = apic_common_send_ipi;
2587 			send_dirintf = pops->psm_send_ipi;
2588 			pops->psm_send_pir_ipi = apic_common_send_pir_ipi;
2589 			psm_send_pir_ipi = pops->psm_send_pir_ipi;
2590 		}
2591 		apic_poweron_cnt++;
2592 	} else {
2593 		ASSERT(apic_poweron_cnt > 0);
2594 		apic_poweron_cnt--;
2595 		if (apic_poweron_cnt == 0) {
2596 			pops->psm_send_ipi = x2apic_send_ipi;
2597 			send_dirintf = pops->psm_send_ipi;
2598 			pops->psm_send_pir_ipi = x2apic_send_pir_ipi;
2599 			psm_send_pir_ipi = pops->psm_send_pir_ipi;
2600 		}
2601 	}
2602 	lock_clear(&apic_mode_switch_lock);
2603 	intr_restore(iflag);
2604 }
2605 
2606 /* stub function */
2607 int
2608 apix_loaded(void)
2609 {
2610 	return (apix_is_enabled);
2611 }
2612