xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix.c (revision c5749750a3e052f1194f65a303456224c51dea63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  * Copyright 2018 Joyent, Inc.
29  */
30 
31 /*
32  * To understand how the apix module interacts with the interrupt subsystem read
33  * the theory statement in uts/i86pc/os/intr.c.
34  */
35 
36 /*
37  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
38  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
39  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
40  * PSMI 1.5 extensions are supported in Solaris Nevada.
41  * PSMI 1.6 extensions are supported in Solaris Nevada.
42  * PSMI 1.7 extensions are supported in Solaris Nevada.
43  */
44 #define	PSMI_1_7
45 
46 #include <sys/processor.h>
47 #include <sys/time.h>
48 #include <sys/psm.h>
49 #include <sys/smp_impldefs.h>
50 #include <sys/cram.h>
51 #include <sys/acpi/acpi.h>
52 #include <sys/acpica.h>
53 #include <sys/psm_common.h>
54 #include <sys/pit.h>
55 #include <sys/ddi.h>
56 #include <sys/sunddi.h>
57 #include <sys/ddi_impldefs.h>
58 #include <sys/pci.h>
59 #include <sys/promif.h>
60 #include <sys/x86_archext.h>
61 #include <sys/cpc_impl.h>
62 #include <sys/uadmin.h>
63 #include <sys/panic.h>
64 #include <sys/debug.h>
65 #include <sys/archsystm.h>
66 #include <sys/trap.h>
67 #include <sys/machsystm.h>
68 #include <sys/sysmacros.h>
69 #include <sys/cpuvar.h>
70 #include <sys/rm_platter.h>
71 #include <sys/privregs.h>
72 #include <sys/note.h>
73 #include <sys/pci_intr_lib.h>
74 #include <sys/spl.h>
75 #include <sys/clock.h>
76 #include <sys/cyclic.h>
77 #include <sys/dditypes.h>
78 #include <sys/sunddi.h>
79 #include <sys/x_call.h>
80 #include <sys/reboot.h>
81 #include <sys/mach_intr.h>
82 #include <sys/apix.h>
83 #include <sys/apix_irm_impl.h>
84 
85 static int apix_probe();
86 static void apix_init();
87 static void apix_picinit(void);
88 static int apix_intr_enter(int, int *);
89 static void apix_intr_exit(int, int);
90 static void apix_setspl(int);
91 static int apix_disable_intr(processorid_t);
92 static void apix_enable_intr(processorid_t);
93 static int apix_get_clkvect(int);
94 static int apix_get_ipivect(int, int);
95 static void apix_post_cyclic_setup(void *);
96 static int apix_post_cpu_start();
97 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
98     psm_intr_op_t, int *);
99 
100 /*
101  * Helper functions for apix_intr_ops()
102  */
103 static void apix_redistribute_compute(void);
104 static int apix_get_pending(apix_vector_t *);
105 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
106 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
107 static char *apix_get_apic_type(void);
108 static int apix_intx_get_pending(int);
109 static void apix_intx_set_mask(int irqno);
110 static void apix_intx_clear_mask(int irqno);
111 static int apix_intx_get_shared(int irqno);
112 static void apix_intx_set_shared(int irqno, int delta);
113 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
114     struct intrspec *);
115 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
116 
117 extern int apic_clkinit(int);
118 
119 /* IRM initialization for APIX PSM module */
120 extern void apix_irm_init(void);
121 
122 extern int irm_enable;
123 
124 /*
125  *	Local static data
126  */
127 static struct	psm_ops apix_ops = {
128 	apix_probe,
129 
130 	apix_init,
131 	apix_picinit,
132 	apix_intr_enter,
133 	apix_intr_exit,
134 	apix_setspl,
135 	apix_addspl,
136 	apix_delspl,
137 	apix_disable_intr,
138 	apix_enable_intr,
139 	NULL,			/* psm_softlvl_to_irq */
140 	NULL,			/* psm_set_softintr */
141 
142 	apic_set_idlecpu,
143 	apic_unset_idlecpu,
144 
145 	apic_clkinit,
146 	apix_get_clkvect,
147 	NULL,			/* psm_hrtimeinit */
148 	apic_gethrtime,
149 
150 	apic_get_next_processorid,
151 	apic_cpu_start,
152 	apix_post_cpu_start,
153 	apic_shutdown,
154 	apix_get_ipivect,
155 	apic_send_ipi,
156 
157 	NULL,			/* psm_translate_irq */
158 	NULL,			/* psm_notify_error */
159 	NULL,			/* psm_notify_func */
160 	apic_timer_reprogram,
161 	apic_timer_enable,
162 	apic_timer_disable,
163 	apix_post_cyclic_setup,
164 	apic_preshutdown,
165 	apix_intr_ops,		/* Advanced DDI Interrupt framework */
166 	apic_state,		/* save, restore apic state for S3 */
167 	apic_cpu_ops,		/* CPU control interface. */
168 
169 	apic_get_pir_ipivect,
170 	apic_send_pir_ipi,
171 };
172 
173 struct psm_ops *psmops = &apix_ops;
174 
175 static struct	psm_info apix_psm_info = {
176 	PSM_INFO_VER01_7,			/* version */
177 	PSM_OWN_EXCLUSIVE,			/* ownership */
178 	&apix_ops,				/* operation */
179 	APIX_NAME,				/* machine name */
180 	"apix MPv1.4 compatible",
181 };
182 
183 static void *apix_hdlp;
184 
185 static int apix_is_enabled = 0;
186 
187 /*
188  * Flag to indicate if APIX is to be enabled only for platforms
189  * with specific hw feature(s).
190  */
191 int apix_hw_chk_enable = 1;
192 
193 /*
194  * Hw features that are checked for enabling APIX support.
195  */
196 #define	APIX_SUPPORT_X2APIC	0x00000001
197 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
198 
199 /*
200  * apix_lock is used for cpu selection and vector re-binding
201  */
202 lock_t apix_lock;
203 apix_impl_t *apixs[NCPU];
204 /*
205  * Mapping between device interrupt and the allocated vector. Indexed
206  * by major number.
207  */
208 apix_dev_vector_t **apix_dev_vector;
209 /*
210  * Mapping between device major number and cpu id. It gets used
211  * when interrupt binding policy round robin with affinity is
212  * applied. With that policy, devices with the same major number
213  * will be bound to the same CPU.
214  */
215 processorid_t *apix_major_to_cpu;	/* major to cpu mapping */
216 kmutex_t apix_mutex;	/* for apix_dev_vector & apix_major_to_cpu */
217 
218 int apix_nipis = 16;	/* Maximum number of IPIs */
219 /*
220  * Maximum number of vectors in a CPU that can be used for interrupt
221  * allocation (including IPIs and the reserved vectors).
222  */
223 int apix_cpu_nvectors = APIX_NVECTOR;
224 
225 /* number of CPUs in power-on transition state */
226 static int apic_poweron_cnt = 0;
227 
228 /* gcpu.h */
229 
230 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
231 extern void apic_change_eoi();
232 
233 /*
234  *	This is the loadable module wrapper
235  */
236 
237 int
238 _init(void)
239 {
240 	if (apic_coarse_hrtime)
241 		apix_ops.psm_gethrtime = &apic_gettime;
242 	return (psm_mod_init(&apix_hdlp, &apix_psm_info));
243 }
244 
245 int
246 _fini(void)
247 {
248 	return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
249 }
250 
251 int
252 _info(struct modinfo *modinfop)
253 {
254 	return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
255 }
256 
257 static int
258 apix_probe()
259 {
260 	int rval;
261 
262 	if (apix_enable == 0)
263 		return (PSM_FAILURE);
264 
265 	/*
266 	 * FIXME Temporarily disable apix module on Xen HVM platform due to
267 	 * known hang during boot (see #3605).
268 	 *
269 	 * Please remove when/if the issue is resolved.
270 	 */
271 	if (get_hwenv() & HW_XEN_HVM)
272 		return (PSM_FAILURE);
273 
274 	/* check for hw features if specified  */
275 	if (apix_hw_chk_enable) {
276 		/* check if x2APIC mode is supported */
277 		if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
278 		    APIX_SUPPORT_X2APIC) {
279 			if (apic_local_mode() == LOCAL_X2APIC) {
280 				/* x2APIC mode activated by BIOS, switch ops */
281 				apic_mode = LOCAL_X2APIC;
282 				apic_change_ops();
283 			} else if (!apic_detect_x2apic()) {
284 				/* x2APIC mode is not supported in the hw */
285 				apix_enable = 0;
286 			}
287 		}
288 		if (apix_enable == 0)
289 			return (PSM_FAILURE);
290 	}
291 
292 	rval = apic_probe_common(apix_psm_info.p_mach_idstring);
293 	if (rval == PSM_SUCCESS)
294 		apix_is_enabled = 1;
295 	else
296 		apix_is_enabled = 0;
297 	return (rval);
298 }
299 
300 /*
301  * Initialize the data structures needed by pcplusmpx module.
302  * Specifically, the data structures used by addspl() and delspl()
303  * routines.
304  */
305 static void
306 apix_softinit()
307 {
308 	int i, *iptr;
309 	apix_impl_t *hdlp;
310 	int nproc;
311 
312 	nproc = max(apic_nproc, apic_max_nproc);
313 
314 	hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
315 	for (i = 0; i < nproc; i++) {
316 		apixs[i] = &hdlp[i];
317 		apixs[i]->x_cpuid = i;
318 		LOCK_INIT_CLEAR(&apixs[i]->x_lock);
319 	}
320 
321 	/* cpu 0 is always up (for now) */
322 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
323 
324 	iptr = (int *)&apic_irq_table[0];
325 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
326 		apic_level_intr[i] = 0;
327 		*iptr++ = NULL;
328 	}
329 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
330 
331 	apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
332 	    KM_SLEEP);
333 
334 	if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
335 		apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
336 		    KM_SLEEP);
337 		for (i = 0; i < devcnt; i++)
338 			apix_major_to_cpu[i] = IRQ_UNINIT;
339 	}
340 
341 	mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
342 }
343 
344 static int
345 apix_get_pending_spl(void)
346 {
347 	int cpuid = CPU->cpu_id;
348 
349 	return (bsrw_insn(apixs[cpuid]->x_intr_pending));
350 }
351 
352 static uintptr_t
353 apix_get_intr_handler(int cpu, short vec)
354 {
355 	apix_vector_t *apix_vector;
356 
357 	ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
358 	if (cpu >= apic_nproc || vec >= APIX_NVECTOR)
359 		return (NULL);
360 
361 	apix_vector = apixs[cpu]->x_vectbl[vec];
362 
363 	return ((uintptr_t)(apix_vector->v_autovect));
364 }
365 
366 static void
367 apix_init()
368 {
369 	extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
370 
371 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
372 
373 	do_interrupt_common = apix_do_interrupt;
374 	addintr = apix_add_avintr;
375 	remintr = apix_rem_avintr;
376 	get_pending_spl = apix_get_pending_spl;
377 	get_intr_handler = apix_get_intr_handler;
378 	psm_get_localapicid = apic_get_localapicid;
379 	psm_get_ioapicid = apic_get_ioapicid;
380 
381 	apix_softinit();
382 
383 #if !defined(__amd64)
384 	if (cpuid_have_cr8access(CPU))
385 		apic_have_32bit_cr8 = 1;
386 #endif
387 
388 	apic_pir_vect = apix_get_ipivect(XC_CPUPOKE_PIL, -1);
389 
390 	/*
391 	 * Initialize IRM pool parameters
392 	 */
393 	if (irm_enable) {
394 		int	i;
395 		int	lowest_irq;
396 		int	highest_irq;
397 
398 		/* number of CPUs present */
399 		apix_irminfo.apix_ncpus = apic_nproc;
400 		/* total number of entries in all of the IOAPICs present */
401 		lowest_irq = apic_io_vectbase[0];
402 		highest_irq = apic_io_vectend[0];
403 		for (i = 1; i < apic_io_max; i++) {
404 			if (apic_io_vectbase[i] < lowest_irq)
405 				lowest_irq = apic_io_vectbase[i];
406 			if (apic_io_vectend[i] > highest_irq)
407 				highest_irq = apic_io_vectend[i];
408 		}
409 		apix_irminfo.apix_ioapic_max_vectors =
410 		    highest_irq - lowest_irq + 1;
411 		/*
412 		 * Number of available per-CPU vectors excluding
413 		 * reserved vectors for Dtrace, int80, system-call,
414 		 * fast-trap, etc.
415 		 */
416 		apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
417 		    APIX_SW_RESERVED_VECTORS;
418 
419 		/* Number of vectors (pre) allocated (SCI and HPET) */
420 		apix_irminfo.apix_vectors_allocated = 0;
421 		if (apic_hpet_vect != -1)
422 			apix_irminfo.apix_vectors_allocated++;
423 		if (apic_sci_vect != -1)
424 			apix_irminfo.apix_vectors_allocated++;
425 	}
426 }
427 
428 static void
429 apix_init_intr()
430 {
431 	processorid_t	cpun = psm_get_cpu_id();
432 	uint_t nlvt;
433 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
434 	extern void cmi_cmci_trap(void);
435 
436 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
437 
438 	if (apic_mode == LOCAL_APIC) {
439 		/*
440 		 * We are running APIC in MMIO mode.
441 		 */
442 		if (apic_flat_model) {
443 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
444 			    APIC_FLAT_MODEL);
445 		} else {
446 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
447 			    APIC_CLUSTER_MODEL);
448 		}
449 
450 		apic_reg_ops->apic_write(APIC_DEST_REG,
451 		    AV_HIGH_ORDER >> cpun);
452 	}
453 
454 	if (apic_directed_EOI_supported()) {
455 		/*
456 		 * Setting the 12th bit in the Spurious Interrupt Vector
457 		 * Register suppresses broadcast EOIs generated by the local
458 		 * APIC. The suppression of broadcast EOIs happens only when
459 		 * interrupts are level-triggered.
460 		 */
461 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
462 	}
463 
464 	/* need to enable APIC before unmasking NMI */
465 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
466 
467 	/*
468 	 * Presence of an invalid vector with delivery mode AV_FIXED can
469 	 * cause an error interrupt, even if the entry is masked...so
470 	 * write a valid vector to LVT entries along with the mask bit
471 	 */
472 
473 	/* All APICs have timer and LINT0/1 */
474 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
475 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
476 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
477 
478 	/*
479 	 * On integrated APICs, the number of LVT entries is
480 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
481 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
482 	 */
483 
484 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
485 		nlvt = 3;
486 	} else {
487 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
488 		    0xFF) + 1;
489 	}
490 
491 	if (nlvt >= 5) {
492 		/* Enable performance counter overflow interrupt */
493 
494 		if (!is_x86_feature(x86_featureset, X86FSET_MSR))
495 			apic_enable_cpcovf_intr = 0;
496 		if (apic_enable_cpcovf_intr) {
497 			if (apic_cpcovf_vect == 0) {
498 				int ipl = APIC_PCINT_IPL;
499 
500 				apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
501 				ASSERT(apic_cpcovf_vect);
502 
503 				(void) add_avintr(NULL, ipl,
504 				    (avfunc)kcpc_hw_overflow_intr,
505 				    "apic pcint", apic_cpcovf_vect,
506 				    NULL, NULL, NULL, NULL);
507 				kcpc_hw_overflow_intr_installed = 1;
508 				kcpc_hw_enable_cpc_intr =
509 				    apic_cpcovf_mask_clear;
510 			}
511 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
512 			    apic_cpcovf_vect);
513 		}
514 	}
515 
516 	if (nlvt >= 6) {
517 		/* Only mask TM intr if the BIOS apparently doesn't use it */
518 
519 		uint32_t lvtval;
520 
521 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
522 		if (((lvtval & AV_MASK) == AV_MASK) ||
523 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
524 			apic_reg_ops->apic_write(APIC_THERM_VECT,
525 			    AV_MASK|APIC_RESV_IRQ);
526 		}
527 	}
528 
529 	/* Enable error interrupt */
530 
531 	if (nlvt >= 4 && apic_enable_error_intr) {
532 		if (apic_errvect == 0) {
533 			int ipl = 0xf;	/* get highest priority intr */
534 			apic_errvect = apix_get_ipivect(ipl, -1);
535 			ASSERT(apic_errvect);
536 			/*
537 			 * Not PSMI compliant, but we are going to merge
538 			 * with ON anyway
539 			 */
540 			(void) add_avintr(NULL, ipl,
541 			    (avfunc)apic_error_intr, "apic error intr",
542 			    apic_errvect, NULL, NULL, NULL, NULL);
543 		}
544 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
545 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
546 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
547 	}
548 
549 	/* Enable CMCI interrupt */
550 	if (cmi_enable_cmci) {
551 		mutex_enter(&cmci_cpu_setup_lock);
552 		if (cmci_cpu_setup_registered == 0) {
553 			mutex_enter(&cpu_lock);
554 			register_cpu_setup_func(cmci_cpu_setup, NULL);
555 			mutex_exit(&cpu_lock);
556 			cmci_cpu_setup_registered = 1;
557 		}
558 		mutex_exit(&cmci_cpu_setup_lock);
559 
560 		if (apic_cmci_vect == 0) {
561 			int ipl = 0x2;
562 			apic_cmci_vect = apix_get_ipivect(ipl, -1);
563 			ASSERT(apic_cmci_vect);
564 
565 			(void) add_avintr(NULL, ipl,
566 			    (avfunc)cmi_cmci_trap, "apic cmci intr",
567 			    apic_cmci_vect, NULL, NULL, NULL, NULL);
568 		}
569 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
570 	}
571 
572 	apic_reg_ops->apic_write_task_reg(0);
573 }
574 
575 static void
576 apix_picinit(void)
577 {
578 	int i, j;
579 	uint_t isr;
580 
581 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
582 
583 	/*
584 	 * initialize interrupt remapping before apic
585 	 * hardware initialization
586 	 */
587 	apic_intrmap_init(apic_mode);
588 	if (apic_vt_ops == psm_vt_ops)
589 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
590 
591 	/*
592 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
593 	 * bit on without clearing it with EOI.  Since softint
594 	 * uses vector 0x20 to interrupt itself, so softint will
595 	 * not work on this machine.  In order to fix this problem
596 	 * a check is made to verify all the isr bits are clear.
597 	 * If not, EOIs are issued to clear the bits.
598 	 */
599 	for (i = 7; i >= 1; i--) {
600 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
601 		if (isr != 0)
602 			for (j = 0; ((j < 32) && (isr != 0)); j++)
603 				if (isr & (1 << j)) {
604 					apic_reg_ops->apic_write(
605 					    APIC_EOI_REG, 0);
606 					isr &= ~(1 << j);
607 					apic_error |= APIC_ERR_BOOT_EOI;
608 				}
609 	}
610 
611 	/* set a flag so we know we have run apic_picinit() */
612 	apic_picinit_called = 1;
613 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
614 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
615 	LOCK_INIT_CLEAR(&apic_error_lock);
616 	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
617 
618 	picsetup();	 /* initialise the 8259 */
619 
620 	/* add nmi handler - least priority nmi handler */
621 	LOCK_INIT_CLEAR(&apic_nmi_lock);
622 
623 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
624 	    "apix NMI handler", (caddr_t)NULL))
625 		cmn_err(CE_WARN, "apix: Unable to add nmi handler");
626 
627 	apix_init_intr();
628 
629 	/* enable apic mode if imcr present */
630 	if (apic_imcrp) {
631 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
632 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
633 	}
634 
635 	ioapix_init_intr(IOAPIC_MASK);
636 
637 	/* setup global IRM pool if applicable */
638 	if (irm_enable)
639 		apix_irm_init();
640 }
641 
642 static __inline__ void
643 apix_send_eoi(void)
644 {
645 	if (apic_mode == LOCAL_APIC)
646 		LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
647 	else
648 		X2APIC_WRITE(APIC_EOI_REG, 0);
649 }
650 
651 /*
652  * platform_intr_enter
653  *
654  *	Called at the beginning of the interrupt service routine, but unlike
655  *	pcplusmp, does not mask interrupts. An EOI is given to the interrupt
656  *	controller to enable other HW interrupts but interrupts are still
657  *	masked by the IF flag.
658  *
659  *	Return -1 for spurious interrupts
660  *
661  */
662 static int
663 apix_intr_enter(int ipl, int *vectorp)
664 {
665 	struct cpu *cpu = CPU;
666 	uint32_t cpuid = CPU->cpu_id;
667 	apic_cpus_info_t *cpu_infop;
668 	uchar_t vector;
669 	apix_vector_t *vecp;
670 	int nipl = -1;
671 
672 	/*
673 	 * The real vector delivered is (*vectorp + 0x20), but our caller
674 	 * subtracts 0x20 from the vector before passing it to us.
675 	 * (That's why APIC_BASE_VECT is 0x20.)
676 	 */
677 	vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
678 
679 	cpu_infop = &apic_cpus[cpuid];
680 	if (vector == APIC_SPUR_INTR) {
681 		cpu_infop->aci_spur_cnt++;
682 		return (APIC_INT_SPURIOUS);
683 	}
684 
685 	vecp = xv_vector(cpuid, vector);
686 	if (vecp == NULL) {
687 		if (APIX_IS_FAKE_INTR(vector))
688 			nipl = apix_rebindinfo.i_pri;
689 		apix_send_eoi();
690 		return (nipl);
691 	}
692 	nipl = vecp->v_pri;
693 
694 	/* if interrupted by the clock, increment apic_nsec_since_boot */
695 	if (vector == (apic_clkvect + APIC_BASE_VECT)) {
696 		if (!apic_oneshot) {
697 			/* NOTE: this is not MT aware */
698 			apic_hrtime_stamp++;
699 			apic_nsec_since_boot += apic_nsec_per_intr;
700 			apic_hrtime_stamp++;
701 			last_count_read = apic_hertz_count;
702 			apix_redistribute_compute();
703 		}
704 
705 		apix_send_eoi();
706 
707 		return (nipl);
708 	}
709 
710 	ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
711 
712 	/* pre-EOI handling for level-triggered interrupts */
713 	if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
714 	    (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
715 		apix_level_intr_pre_eoi(vecp->v_inum);
716 
717 	/* send back EOI */
718 	apix_send_eoi();
719 
720 	cpu_infop->aci_current[nipl] = vector;
721 	if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
722 		cpu_infop->aci_curipl = (uchar_t)nipl;
723 		cpu_infop->aci_ISR_in_progress |= 1 << nipl;
724 	}
725 
726 #ifdef	DEBUG
727 	if (vector >= APIX_IPI_MIN)
728 		return (nipl);	/* skip IPI */
729 
730 	APIC_DEBUG_BUF_PUT(vector);
731 	APIC_DEBUG_BUF_PUT(vecp->v_inum);
732 	APIC_DEBUG_BUF_PUT(nipl);
733 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
734 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
735 		drv_usecwait(apic_stretch_interrupts);
736 #endif /* DEBUG */
737 
738 	return (nipl);
739 }
740 
741 /*
742  * Any changes made to this function must also change X2APIC
743  * version of intr_exit.
744  */
745 static void
746 apix_intr_exit(int prev_ipl, int arg2)
747 {
748 	int cpuid = psm_get_cpu_id();
749 	apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
750 	apix_impl_t *apixp = apixs[cpuid];
751 
752 	UNREFERENCED_1PARAMETER(arg2);
753 
754 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
755 	/* ISR above current pri could not be in progress */
756 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
757 
758 	if (apixp->x_obsoletes != NULL) {
759 		if (APIX_CPU_LOCK_HELD(cpuid))
760 			return;
761 
762 		APIX_ENTER_CPU_LOCK(cpuid);
763 		(void) apix_obsolete_vector(apixp->x_obsoletes);
764 		APIX_LEAVE_CPU_LOCK(cpuid);
765 	}
766 }
767 
768 /*
769  * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
770  * given ipl, but apix never uses the TPR and we never mask a subset of the
771  * interrupts. They are either all blocked by the IF flag or all can come in.
772  *
773  * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
774  * can come in if currently enabled by the IF flag. This table shows the state
775  * of the IF flag when we leave this function.
776  *
777  *    curr IF |	ipl == 15	ipl != 15
778  *    --------+---------------------------
779  *       0    |    0		    0
780  *       1    |    0		    1
781  */
782 static void
783 apix_setspl(int ipl)
784 {
785 	/*
786 	 * Interrupts at ipl above this cannot be in progress, so the following
787 	 * mask is ok.
788 	 */
789 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
790 
791 	if (ipl == XC_HI_PIL)
792 		cli();
793 }
794 
795 int
796 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
797 {
798 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
799 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
800 	apix_vector_t *vecp = xv_vector(cpuid, vector);
801 
802 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
803 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
804 
805 	if (vecp->v_type == APIX_TYPE_FIXED)
806 		apix_intx_set_shared(vecp->v_inum, 1);
807 
808 	/* There are more interrupts, so it's already been enabled */
809 	if (vecp->v_share > 1)
810 		return (PSM_SUCCESS);
811 
812 	/* return if it is not hardware interrupt */
813 	if (vecp->v_type == APIX_TYPE_IPI)
814 		return (PSM_SUCCESS);
815 
816 	/*
817 	 * if apix_picinit() has not been called yet, just return.
818 	 * At the end of apic_picinit(), we will call setup_io_intr().
819 	 */
820 	if (!apic_picinit_called)
821 		return (PSM_SUCCESS);
822 
823 	(void) apix_setup_io_intr(vecp);
824 
825 	return (PSM_SUCCESS);
826 }
827 
828 int
829 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
830 {
831 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
832 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
833 	apix_vector_t *vecp = xv_vector(cpuid, vector);
834 
835 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
836 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
837 
838 	if (vecp->v_type == APIX_TYPE_FIXED)
839 		apix_intx_set_shared(vecp->v_inum, -1);
840 
841 	/* There are more interrupts */
842 	if (vecp->v_share > 1)
843 		return (PSM_SUCCESS);
844 
845 	/* return if it is not hardware interrupt */
846 	if (vecp->v_type == APIX_TYPE_IPI)
847 		return (PSM_SUCCESS);
848 
849 	if (!apic_picinit_called) {
850 		cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
851 		    virtvec);
852 		return (PSM_SUCCESS);
853 	}
854 
855 	apix_disable_vector(vecp);
856 
857 	return (PSM_SUCCESS);
858 }
859 
860 /*
861  * Try and disable all interrupts. We just assign interrupts to other
862  * processors based on policy. If any were bound by user request, we
863  * let them continue and return failure. We do not bother to check
864  * for cache affinity while rebinding.
865  */
866 static int
867 apix_disable_intr(processorid_t cpun)
868 {
869 	apix_impl_t *apixp = apixs[cpun];
870 	apix_vector_t *vecp, *newp;
871 	int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
872 
873 	lock_set(&apix_lock);
874 
875 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
876 	apic_cpus[cpun].aci_curipl = 0;
877 
878 	/* if this is for SUSPEND operation, skip rebinding */
879 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
880 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
881 			vecp = apixp->x_vectbl[i];
882 			if (!IS_VECT_ENABLED(vecp))
883 				continue;
884 
885 			apix_disable_vector(vecp);
886 		}
887 		lock_clear(&apix_lock);
888 		return (PSM_SUCCESS);
889 	}
890 
891 	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
892 		vecp = apixp->x_vectbl[i];
893 		if (!IS_VECT_ENABLED(vecp))
894 			continue;
895 
896 		if (vecp->v_flags & APIX_VECT_USER_BOUND) {
897 			hardbound++;
898 			continue;
899 		}
900 		type = vecp->v_type;
901 
902 		/*
903 		 * If there are bound interrupts on this cpu, then
904 		 * rebind them to other processors.
905 		 */
906 		loop = 0;
907 		do {
908 			bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
909 
910 			if (type != APIX_TYPE_MSI)
911 				newp = apix_set_cpu(vecp, bindcpu, &ret);
912 			else
913 				newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
914 		} while ((newp == NULL) && (loop++ < apic_nproc));
915 
916 		if (loop >= apic_nproc) {
917 			errbound++;
918 			cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
919 			    vecp->v_cpuid, vecp->v_vector);
920 		}
921 	}
922 
923 	lock_clear(&apix_lock);
924 
925 	if (hardbound || errbound) {
926 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
927 		    "due to user bound interrupts or failed operation",
928 		    cpun);
929 		return (PSM_FAILURE);
930 	}
931 
932 	return (PSM_SUCCESS);
933 }
934 
935 /*
936  * Bind interrupts to specified CPU
937  */
938 static void
939 apix_enable_intr(processorid_t cpun)
940 {
941 	apix_vector_t *vecp;
942 	int i, ret;
943 	processorid_t n;
944 
945 	lock_set(&apix_lock);
946 
947 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
948 
949 	/* interrupt enabling for system resume */
950 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
951 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
952 			vecp = xv_vector(cpun, i);
953 			if (!IS_VECT_ENABLED(vecp))
954 				continue;
955 
956 			apix_enable_vector(vecp);
957 		}
958 		apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
959 	}
960 
961 	for (n = 0; n < apic_nproc; n++) {
962 		if (!apic_cpu_in_range(n) || n == cpun ||
963 		    (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
964 			continue;
965 
966 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
967 			vecp = xv_vector(n, i);
968 			if (!IS_VECT_ENABLED(vecp) ||
969 			    vecp->v_bound_cpuid != cpun)
970 				continue;
971 
972 			if (vecp->v_type != APIX_TYPE_MSI)
973 				(void) apix_set_cpu(vecp, cpun, &ret);
974 			else
975 				(void) apix_grp_set_cpu(vecp, cpun, &ret);
976 		}
977 	}
978 
979 	lock_clear(&apix_lock);
980 }
981 
982 /*
983  * Allocate vector for IPI
984  * type == -1 indicates it is an internal request. Do not change
985  * resv_vector for these requests.
986  */
987 static int
988 apix_get_ipivect(int ipl, int type)
989 {
990 	uchar_t vector;
991 
992 	if ((vector = apix_alloc_ipi(ipl)) > 0) {
993 		if (type != -1)
994 			apic_resv_vector[ipl] = vector;
995 		return (vector);
996 	}
997 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
998 	return (-1);	/* shouldn't happen */
999 }
1000 
1001 static int
1002 apix_get_clkvect(int ipl)
1003 {
1004 	int vector;
1005 
1006 	if ((vector = apix_get_ipivect(ipl, -1)) == -1)
1007 		return (-1);
1008 
1009 	apic_clkvect = vector - APIC_BASE_VECT;
1010 	APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1011 	    apic_clkvect));
1012 	return (vector);
1013 }
1014 
1015 static int
1016 apix_post_cpu_start()
1017 {
1018 	int cpun;
1019 	static int cpus_started = 1;
1020 
1021 	/* We know this CPU + BSP  started successfully. */
1022 	cpus_started++;
1023 
1024 	/*
1025 	 * On BSP we would have enabled X2APIC, if supported by processor,
1026 	 * in acpi_probe(), but on AP we do it here.
1027 	 *
1028 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1029 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1030 	 */
1031 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1032 	    apic_local_mode() == LOCAL_APIC) {
1033 		apic_enable_x2apic();
1034 	}
1035 
1036 	/*
1037 	 * Switch back to x2apic IPI sending method for performance when target
1038 	 * CPU has entered x2apic mode.
1039 	 */
1040 	if (apic_mode == LOCAL_X2APIC) {
1041 		apic_switch_ipi_callback(B_FALSE);
1042 	}
1043 
1044 	splx(ipltospl(LOCK_LEVEL));
1045 	apix_init_intr();
1046 
1047 	/*
1048 	 * since some systems don't enable the internal cache on the non-boot
1049 	 * cpus, so we have to enable them here
1050 	 */
1051 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1052 
1053 #ifdef	DEBUG
1054 	APIC_AV_PENDING_SET();
1055 #else
1056 	if (apic_mode == LOCAL_APIC)
1057 		APIC_AV_PENDING_SET();
1058 #endif	/* DEBUG */
1059 
1060 	/*
1061 	 * We may be booting, or resuming from suspend; aci_status will
1062 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1063 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1064 	 */
1065 	cpun = psm_get_cpu_id();
1066 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1067 
1068 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1069 
1070 	return (PSM_SUCCESS);
1071 }
1072 
1073 /*
1074  * If this module needs a periodic handler for the interrupt distribution, it
1075  * can be added here. The argument to the periodic handler is not currently
1076  * used, but is reserved for future.
1077  */
1078 static void
1079 apix_post_cyclic_setup(void *arg)
1080 {
1081 	UNREFERENCED_1PARAMETER(arg);
1082 
1083 	cyc_handler_t cyh;
1084 	cyc_time_t cyt;
1085 
1086 	/* cpu_lock is held */
1087 	/* set up a periodic handler for intr redistribution */
1088 
1089 	/*
1090 	 * In peridoc mode intr redistribution processing is done in
1091 	 * apic_intr_enter during clk intr processing
1092 	 */
1093 	if (!apic_oneshot)
1094 		return;
1095 
1096 	/*
1097 	 * Register a periodical handler for the redistribution processing.
1098 	 * Though we would generally prefer to use the DDI interface for
1099 	 * periodic handler invocation, ddi_periodic_add(9F), we are
1100 	 * unfortunately already holding cpu_lock, which ddi_periodic_add will
1101 	 * attempt to take for us.  Thus, we add our own cyclic directly:
1102 	 */
1103 	cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1104 	cyh.cyh_arg = NULL;
1105 	cyh.cyh_level = CY_LOW_LEVEL;
1106 
1107 	cyt.cyt_when = 0;
1108 	cyt.cyt_interval = apic_redistribute_sample_interval;
1109 
1110 	apic_cyclic_id = cyclic_add(&cyh, &cyt);
1111 }
1112 
1113 /*
1114  * Called the first time we enable x2apic mode on this cpu.
1115  * Update some of the function pointers to use x2apic routines.
1116  */
1117 void
1118 x2apic_update_psm()
1119 {
1120 	struct psm_ops *pops = &apix_ops;
1121 
1122 	ASSERT(pops != NULL);
1123 
1124 	/*
1125 	 * The pcplusmp module's version of x2apic_update_psm makes additional
1126 	 * changes that we do not have to make here. It needs to make those
1127 	 * changes because pcplusmp relies on the TPR register and the means of
1128 	 * addressing that changes when using the local apic versus the x2apic.
1129 	 * It's also worth noting that the apix driver specific function end up
1130 	 * being apix_foo as opposed to apic_foo and x2apic_foo.
1131 	 */
1132 	pops->psm_send_ipi = x2apic_send_ipi;
1133 	send_dirintf = pops->psm_send_ipi;
1134 
1135 	pops->psm_send_pir_ipi = x2apic_send_pir_ipi;
1136 	psm_send_pir_ipi = pops->psm_send_pir_ipi;
1137 
1138 	apic_mode = LOCAL_X2APIC;
1139 	apic_change_ops();
1140 }
1141 
1142 /*
1143  * This function provides external interface to the nexus for all
1144  * functionalities related to the new DDI interrupt framework.
1145  *
1146  * Input:
1147  * dip     - pointer to the dev_info structure of the requested device
1148  * hdlp    - pointer to the internal interrupt handle structure for the
1149  *	     requested interrupt
1150  * intr_op - opcode for this call
1151  * result  - pointer to the integer that will hold the result to be
1152  *	     passed back if return value is PSM_SUCCESS
1153  *
1154  * Output:
1155  * return value is either PSM_SUCCESS or PSM_FAILURE
1156  */
1157 static int
1158 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1159     psm_intr_op_t intr_op, int *result)
1160 {
1161 	int		cap;
1162 	apix_vector_t	*vecp, *newvecp;
1163 	struct intrspec *ispec, intr_spec;
1164 	processorid_t target;
1165 
1166 	ispec = &intr_spec;
1167 	ispec->intrspec_pri = hdlp->ih_pri;
1168 	ispec->intrspec_vec = hdlp->ih_inum;
1169 	ispec->intrspec_func = hdlp->ih_cb_func;
1170 
1171 	switch (intr_op) {
1172 	case PSM_INTR_OP_ALLOC_VECTORS:
1173 		switch (hdlp->ih_type) {
1174 		case DDI_INTR_TYPE_MSI:
1175 			/* allocate MSI vectors */
1176 			*result = apix_alloc_msi(dip, hdlp->ih_inum,
1177 			    hdlp->ih_scratch1,
1178 			    (int)(uintptr_t)hdlp->ih_scratch2);
1179 			break;
1180 		case DDI_INTR_TYPE_MSIX:
1181 			/* allocate MSI-X vectors */
1182 			*result = apix_alloc_msix(dip, hdlp->ih_inum,
1183 			    hdlp->ih_scratch1,
1184 			    (int)(uintptr_t)hdlp->ih_scratch2);
1185 			break;
1186 		case DDI_INTR_TYPE_FIXED:
1187 			/* allocate or share vector for fixed */
1188 			if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1189 				return (PSM_FAILURE);
1190 			}
1191 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1192 			*result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1193 			    ispec);
1194 			break;
1195 		default:
1196 			return (PSM_FAILURE);
1197 		}
1198 		break;
1199 	case PSM_INTR_OP_FREE_VECTORS:
1200 		apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1201 		    hdlp->ih_type);
1202 		break;
1203 	case PSM_INTR_OP_XLATE_VECTOR:
1204 		/*
1205 		 * Vectors are allocated by ALLOC and freed by FREE.
1206 		 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1207 		 */
1208 		*result = APIX_INVALID_VECT;
1209 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1210 		if (vecp != NULL) {
1211 			*result = APIX_VIRTVECTOR(vecp->v_cpuid,
1212 			    vecp->v_vector);
1213 			break;
1214 		}
1215 
1216 		/*
1217 		 * No vector to device mapping exists. If this is FIXED type
1218 		 * then check if this IRQ is already mapped for another device
1219 		 * then return the vector number for it (i.e. shared IRQ case).
1220 		 * Otherwise, return PSM_FAILURE.
1221 		 */
1222 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1223 			vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1224 			    ispec);
1225 			*result = (vecp == NULL) ? APIX_INVALID_VECT :
1226 			    APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1227 		}
1228 		if (*result == APIX_INVALID_VECT)
1229 			return (PSM_FAILURE);
1230 		break;
1231 	case PSM_INTR_OP_GET_PENDING:
1232 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1233 		if (vecp == NULL)
1234 			return (PSM_FAILURE);
1235 
1236 		*result = apix_get_pending(vecp);
1237 		break;
1238 	case PSM_INTR_OP_CLEAR_MASK:
1239 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1240 			return (PSM_FAILURE);
1241 
1242 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1243 		if (vecp == NULL)
1244 			return (PSM_FAILURE);
1245 
1246 		apix_intx_clear_mask(vecp->v_inum);
1247 		break;
1248 	case PSM_INTR_OP_SET_MASK:
1249 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1250 			return (PSM_FAILURE);
1251 
1252 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1253 		if (vecp == NULL)
1254 			return (PSM_FAILURE);
1255 
1256 		apix_intx_set_mask(vecp->v_inum);
1257 		break;
1258 	case PSM_INTR_OP_GET_SHARED:
1259 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1260 			return (PSM_FAILURE);
1261 
1262 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1263 		if (vecp == NULL)
1264 			return (PSM_FAILURE);
1265 
1266 		*result = apix_intx_get_shared(vecp->v_inum);
1267 		break;
1268 	case PSM_INTR_OP_SET_PRI:
1269 		/*
1270 		 * Called prior to adding the interrupt handler or when
1271 		 * an interrupt handler is unassigned.
1272 		 */
1273 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1274 			return (PSM_SUCCESS);
1275 
1276 		if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1277 			return (PSM_FAILURE);
1278 
1279 		break;
1280 	case PSM_INTR_OP_SET_CPU:
1281 	case PSM_INTR_OP_GRP_SET_CPU:
1282 		/*
1283 		 * The interrupt handle given here has been allocated
1284 		 * specifically for this command, and ih_private carries
1285 		 * a CPU value.
1286 		 */
1287 		*result = EINVAL;
1288 		target = (int)(intptr_t)hdlp->ih_private;
1289 		if (!apic_cpu_in_range(target)) {
1290 			DDI_INTR_IMPLDBG((CE_WARN,
1291 			    "[grp_]set_cpu: cpu out of range: %d\n", target));
1292 			return (PSM_FAILURE);
1293 		}
1294 
1295 		lock_set(&apix_lock);
1296 
1297 		vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1298 		if (!IS_VECT_ENABLED(vecp)) {
1299 			DDI_INTR_IMPLDBG((CE_WARN,
1300 			    "[grp]_set_cpu: invalid vector 0x%x\n",
1301 			    hdlp->ih_vector));
1302 			lock_clear(&apix_lock);
1303 			return (PSM_FAILURE);
1304 		}
1305 
1306 		*result = 0;
1307 
1308 		if (intr_op == PSM_INTR_OP_SET_CPU)
1309 			newvecp = apix_set_cpu(vecp, target, result);
1310 		else
1311 			newvecp = apix_grp_set_cpu(vecp, target, result);
1312 
1313 		lock_clear(&apix_lock);
1314 
1315 		if (newvecp == NULL) {
1316 			*result = EIO;
1317 			return (PSM_FAILURE);
1318 		}
1319 		newvecp->v_bound_cpuid = target;
1320 		hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1321 		    newvecp->v_vector);
1322 		break;
1323 
1324 	case PSM_INTR_OP_GET_INTR:
1325 		/*
1326 		 * The interrupt handle given here has been allocated
1327 		 * specifically for this command, and ih_private carries
1328 		 * a pointer to a apic_get_intr_t.
1329 		 */
1330 		if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1331 			return (PSM_FAILURE);
1332 		break;
1333 
1334 	case PSM_INTR_OP_CHECK_MSI:
1335 		/*
1336 		 * Check MSI/X is supported or not at APIC level and
1337 		 * masked off the MSI/X bits in hdlp->ih_type if not
1338 		 * supported before return.  If MSI/X is supported,
1339 		 * leave the ih_type unchanged and return.
1340 		 *
1341 		 * hdlp->ih_type passed in from the nexus has all the
1342 		 * interrupt types supported by the device.
1343 		 */
1344 		if (apic_support_msi == 0) {	/* uninitialized */
1345 			/*
1346 			 * if apic_support_msi is not set, call
1347 			 * apic_check_msi_support() to check whether msi
1348 			 * is supported first
1349 			 */
1350 			if (apic_check_msi_support() == PSM_SUCCESS)
1351 				apic_support_msi = 1;	/* supported */
1352 			else
1353 				apic_support_msi = -1;	/* not-supported */
1354 		}
1355 		if (apic_support_msi == 1) {
1356 			if (apic_msix_enable)
1357 				*result = hdlp->ih_type;
1358 			else
1359 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1360 		} else
1361 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1362 			    DDI_INTR_TYPE_MSIX);
1363 		break;
1364 	case PSM_INTR_OP_GET_CAP:
1365 		cap = DDI_INTR_FLAG_PENDING;
1366 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1367 			cap |= DDI_INTR_FLAG_MASKABLE;
1368 		*result = cap;
1369 		break;
1370 	case PSM_INTR_OP_APIC_TYPE:
1371 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1372 		    apix_get_apic_type();
1373 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1374 		    APIX_IPI_MIN;
1375 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1376 		    apic_nproc;
1377 		hdlp->ih_ver = apic_get_apic_version();
1378 		break;
1379 	case PSM_INTR_OP_SET_CAP:
1380 	default:
1381 		return (PSM_FAILURE);
1382 	}
1383 
1384 	return (PSM_SUCCESS);
1385 }
1386 
1387 static void
1388 apix_cleanup_busy(void)
1389 {
1390 	int i, j;
1391 	apix_vector_t *vecp;
1392 
1393 	for (i = 0; i < apic_nproc; i++) {
1394 		if (!apic_cpu_in_range(i))
1395 			continue;
1396 		apic_cpus[i].aci_busy = 0;
1397 		for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1398 			if ((vecp = xv_vector(i, j)) != NULL)
1399 				vecp->v_busy = 0;
1400 		}
1401 	}
1402 }
1403 
1404 static void
1405 apix_redistribute_compute(void)
1406 {
1407 	int	i, j, max_busy;
1408 
1409 	if (!apic_enable_dynamic_migration)
1410 		return;
1411 
1412 	if (++apic_nticks == apic_sample_factor_redistribution) {
1413 		/*
1414 		 * Time to call apic_intr_redistribute().
1415 		 * reset apic_nticks. This will cause max_busy
1416 		 * to be calculated below and if it is more than
1417 		 * apic_int_busy, we will do the whole thing
1418 		 */
1419 		apic_nticks = 0;
1420 	}
1421 	max_busy = 0;
1422 	for (i = 0; i < apic_nproc; i++) {
1423 		if (!apic_cpu_in_range(i))
1424 			continue;
1425 		/*
1426 		 * Check if curipl is non zero & if ISR is in
1427 		 * progress
1428 		 */
1429 		if (((j = apic_cpus[i].aci_curipl) != 0) &&
1430 		    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1431 
1432 			int	vect;
1433 			apic_cpus[i].aci_busy++;
1434 			vect = apic_cpus[i].aci_current[j];
1435 			apixs[i]->x_vectbl[vect]->v_busy++;
1436 		}
1437 
1438 		if (!apic_nticks &&
1439 		    (apic_cpus[i].aci_busy > max_busy))
1440 			max_busy = apic_cpus[i].aci_busy;
1441 	}
1442 	if (!apic_nticks) {
1443 		if (max_busy > apic_int_busy_mark) {
1444 		/*
1445 		 * We could make the following check be
1446 		 * skipped > 1 in which case, we get a
1447 		 * redistribution at half the busy mark (due to
1448 		 * double interval). Need to be able to collect
1449 		 * more empirical data to decide if that is a
1450 		 * good strategy. Punt for now.
1451 		 */
1452 			apix_cleanup_busy();
1453 			apic_skipped_redistribute = 0;
1454 		} else
1455 			apic_skipped_redistribute++;
1456 	}
1457 }
1458 
1459 /*
1460  * intr_ops() service routines
1461  */
1462 
1463 static int
1464 apix_get_pending(apix_vector_t *vecp)
1465 {
1466 	int bit, index, irr, pending;
1467 
1468 	/* need to get on the bound cpu */
1469 	mutex_enter(&cpu_lock);
1470 	affinity_set(vecp->v_cpuid);
1471 
1472 	index = vecp->v_vector / 32;
1473 	bit = vecp->v_vector % 32;
1474 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1475 
1476 	affinity_clear();
1477 	mutex_exit(&cpu_lock);
1478 
1479 	pending = (irr & (1 << bit)) ? 1 : 0;
1480 	if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1481 		pending = apix_intx_get_pending(vecp->v_inum);
1482 
1483 	return (pending);
1484 }
1485 
1486 static apix_vector_t *
1487 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1488 {
1489 	apix_vector_t *vecp;
1490 	processorid_t cpuid;
1491 	int32_t virt_vec = 0;
1492 
1493 	switch (flags & PSMGI_INTRBY_FLAGS) {
1494 	case PSMGI_INTRBY_IRQ:
1495 		return (apix_intx_get_vector(hdlp->ih_vector));
1496 	case PSMGI_INTRBY_VEC:
1497 		virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1498 
1499 		cpuid = APIX_VIRTVEC_CPU(virt_vec);
1500 		if (!apic_cpu_in_range(cpuid))
1501 			return (NULL);
1502 
1503 		vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1504 		break;
1505 	case PSMGI_INTRBY_DEFAULT:
1506 		vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1507 		    hdlp->ih_type);
1508 		break;
1509 	default:
1510 		return (NULL);
1511 	}
1512 
1513 	return (vecp);
1514 }
1515 
1516 static int
1517 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1518     apic_get_intr_t *intr_params_p)
1519 {
1520 	apix_vector_t *vecp;
1521 	struct autovec *av_dev;
1522 	int i;
1523 
1524 	vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1525 	if (IS_VECT_FREE(vecp)) {
1526 		intr_params_p->avgi_num_devs = 0;
1527 		intr_params_p->avgi_cpu_id = 0;
1528 		intr_params_p->avgi_req_flags = 0;
1529 		return (PSM_SUCCESS);
1530 	}
1531 
1532 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1533 		intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1534 
1535 		/* Return user bound info for intrd. */
1536 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1537 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1538 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1539 		}
1540 	}
1541 
1542 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1543 		intr_params_p->avgi_vector = vecp->v_vector;
1544 
1545 	if (intr_params_p->avgi_req_flags &
1546 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1547 		/* Get number of devices from apic_irq table shared field. */
1548 		intr_params_p->avgi_num_devs = vecp->v_share;
1549 
1550 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
1551 
1552 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
1553 
1554 		/* Some devices have NULL dip.  Don't count these. */
1555 		if (intr_params_p->avgi_num_devs > 0) {
1556 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1557 			    av_dev = av_dev->av_link) {
1558 				if (av_dev->av_vector && av_dev->av_dip)
1559 					i++;
1560 			}
1561 			intr_params_p->avgi_num_devs =
1562 			    (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1563 		}
1564 
1565 		/* There are no viable dips to return. */
1566 		if (intr_params_p->avgi_num_devs == 0) {
1567 			intr_params_p->avgi_dip_list = NULL;
1568 
1569 		} else {	/* Return list of dips */
1570 
1571 			/* Allocate space in array for that number of devs. */
1572 			intr_params_p->avgi_dip_list = kmem_zalloc(
1573 			    intr_params_p->avgi_num_devs *
1574 			    sizeof (dev_info_t *),
1575 			    KM_NOSLEEP);
1576 			if (intr_params_p->avgi_dip_list == NULL) {
1577 				DDI_INTR_IMPLDBG((CE_WARN,
1578 				    "apix_get_vector_intr_info: no memory"));
1579 				return (PSM_FAILURE);
1580 			}
1581 
1582 			/*
1583 			 * Loop through the device list of the autovec table
1584 			 * filling in the dip array.
1585 			 *
1586 			 * Note that the autovect table may have some special
1587 			 * entries which contain NULL dips.  These will be
1588 			 * ignored.
1589 			 */
1590 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1591 			    av_dev = av_dev->av_link) {
1592 				if (av_dev->av_vector && av_dev->av_dip)
1593 					intr_params_p->avgi_dip_list[i++] =
1594 					    av_dev->av_dip;
1595 			}
1596 		}
1597 	}
1598 
1599 	return (PSM_SUCCESS);
1600 }
1601 
1602 static char *
1603 apix_get_apic_type(void)
1604 {
1605 	return (apix_psm_info.p_mach_idstring);
1606 }
1607 
1608 apix_vector_t *
1609 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1610 {
1611 	apix_vector_t *newp = NULL;
1612 	dev_info_t *dip;
1613 	int inum, cap_ptr;
1614 	ddi_acc_handle_t handle;
1615 	ddi_intr_msix_t *msix_p = NULL;
1616 	ushort_t msix_ctrl;
1617 	uintptr_t off = 0;
1618 	uint32_t mask = 0;
1619 
1620 	ASSERT(LOCK_HELD(&apix_lock));
1621 	*result = ENXIO;
1622 
1623 	/* Fail if this is an MSI intr and is part of a group. */
1624 	if (vecp->v_type == APIX_TYPE_MSI) {
1625 		if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1626 			return (NULL);
1627 		else
1628 			return (apix_grp_set_cpu(vecp, new_cpu, result));
1629 	}
1630 
1631 	/*
1632 	 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1633 	 */
1634 	if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1635 		if ((dip = APIX_GET_DIP(vecp)) == NULL)
1636 			return (NULL);
1637 		inum = vecp->v_devp->dv_inum;
1638 
1639 		handle = i_ddi_get_pci_config_handle(dip);
1640 		cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1641 		msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1642 		if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1643 			/*
1644 			 * Function is not masked, then mask "inum"th
1645 			 * entry in the MSI-X table
1646 			 */
1647 			msix_p = i_ddi_get_msix(dip);
1648 			off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1649 			    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1650 			mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1651 			ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1652 			    mask | 1);
1653 		}
1654 	}
1655 
1656 	*result = 0;
1657 	if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1658 		*result = EIO;
1659 
1660 	/* Restore mask bit */
1661 	if (msix_p != NULL)
1662 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1663 
1664 	return (newp);
1665 }
1666 
1667 /*
1668  * Set cpu for MSIs
1669  */
1670 apix_vector_t *
1671 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1672 {
1673 	apix_vector_t *newp, *vp;
1674 	uint32_t orig_cpu = vecp->v_cpuid;
1675 	int orig_vect = vecp->v_vector;
1676 	int i, num_vectors, cap_ptr, msi_mask_off = 0;
1677 	uint32_t msi_pvm = 0;
1678 	ushort_t msi_ctrl;
1679 	ddi_acc_handle_t handle;
1680 	dev_info_t *dip;
1681 
1682 	APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1683 	    " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1684 
1685 	ASSERT(LOCK_HELD(&apix_lock));
1686 
1687 	*result = ENXIO;
1688 
1689 	if (vecp->v_type != APIX_TYPE_MSI) {
1690 		DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1691 		return (NULL);
1692 	}
1693 
1694 	if ((dip = APIX_GET_DIP(vecp)) == NULL)
1695 		return (NULL);
1696 
1697 	num_vectors = i_ddi_intr_get_current_nintrs(dip);
1698 	if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1699 		APIC_VERBOSE(INTR, (CE_WARN,
1700 		    "set_grp: base vec not part of a grp or not aligned: "
1701 		    "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1702 		return (NULL);
1703 	}
1704 
1705 	if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1706 		return (NULL);
1707 
1708 	*result = EIO;
1709 	for (i = 1; i < num_vectors; i++) {
1710 		if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1711 			return (NULL);
1712 #ifdef DEBUG
1713 		/*
1714 		 * Sanity check: CPU and dip is the same for all entries.
1715 		 * May be called when first msi to be enabled, at this time
1716 		 * add_avintr() is not called for other msi
1717 		 */
1718 		if ((vp->v_share != 0) &&
1719 		    ((APIX_GET_DIP(vp) != dip) ||
1720 		    (vp->v_cpuid != vecp->v_cpuid))) {
1721 			APIC_VERBOSE(INTR, (CE_WARN,
1722 			    "set_grp: cpu or dip for vec 0x%x difft than for "
1723 			    "vec 0x%x\n", orig_vect, orig_vect + i));
1724 			APIC_VERBOSE(INTR, (CE_WARN,
1725 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1726 			    vp->v_cpuid, (void *)dip,
1727 			    (void *)APIX_GET_DIP(vp)));
1728 			return (NULL);
1729 		}
1730 #endif /* DEBUG */
1731 	}
1732 
1733 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1734 	handle = i_ddi_get_pci_config_handle(dip);
1735 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1736 
1737 	/* MSI Per vector masking is supported. */
1738 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1739 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
1740 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1741 		else
1742 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1743 		msi_pvm = pci_config_get32(handle, msi_mask_off);
1744 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1745 		APIC_VERBOSE(INTR, (CE_CONT,
1746 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
1747 		    pci_config_get32(handle, msi_mask_off)));
1748 	}
1749 
1750 	if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1751 		*result = 0;
1752 
1753 	/* Reenable vectors if per vector masking is supported. */
1754 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1755 		pci_config_put32(handle, msi_mask_off, msi_pvm);
1756 		APIC_VERBOSE(INTR, (CE_CONT,
1757 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
1758 		    pci_config_get32(handle, msi_mask_off)));
1759 	}
1760 
1761 	return (newp);
1762 }
1763 
1764 void
1765 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1766 {
1767 	apic_irq_t *irqp;
1768 
1769 	mutex_enter(&airq_mutex);
1770 	irqp = apic_irq_table[irqno];
1771 	irqp->airq_cpu = cpuid;
1772 	irqp->airq_vector = vector;
1773 	apic_record_rdt_entry(irqp, irqno);
1774 	mutex_exit(&airq_mutex);
1775 }
1776 
1777 apix_vector_t *
1778 apix_intx_get_vector(int irqno)
1779 {
1780 	apic_irq_t *irqp;
1781 	uint32_t cpuid;
1782 	uchar_t vector;
1783 
1784 	mutex_enter(&airq_mutex);
1785 	irqp = apic_irq_table[irqno & 0xff];
1786 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1787 		mutex_exit(&airq_mutex);
1788 		return (NULL);
1789 	}
1790 	cpuid = irqp->airq_cpu;
1791 	vector = irqp->airq_vector;
1792 	mutex_exit(&airq_mutex);
1793 
1794 	return (xv_vector(cpuid, vector));
1795 }
1796 
1797 /*
1798  * Must called with interrupts disabled and apic_ioapic_lock held
1799  */
1800 void
1801 apix_intx_enable(int irqno)
1802 {
1803 	uchar_t ioapicindex, intin;
1804 	apic_irq_t *irqp = apic_irq_table[irqno];
1805 	ioapic_rdt_t irdt;
1806 	apic_cpus_info_t *cpu_infop;
1807 	apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1808 
1809 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1810 
1811 	ioapicindex = irqp->airq_ioapicindex;
1812 	intin = irqp->airq_intin_no;
1813 	cpu_infop =  &apic_cpus[irqp->airq_cpu];
1814 
1815 	irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1816 	irdt.ir_hi = cpu_infop->aci_local_id;
1817 
1818 	apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1819 	    vecp->v_type, 1, ioapicindex);
1820 	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1821 	    (void *)&irdt, vecp->v_type, 1);
1822 	apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1823 
1824 	/* write RDT entry high dword - destination */
1825 	WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1826 	    irdt.ir_hi);
1827 
1828 	/* Write the vector, trigger, and polarity portion of the RDT */
1829 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1830 
1831 	vecp->v_state = APIX_STATE_ENABLED;
1832 
1833 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1834 	    " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1835 	    ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1836 }
1837 
1838 /*
1839  * Must called with interrupts disabled and apic_ioapic_lock held
1840  */
1841 void
1842 apix_intx_disable(int irqno)
1843 {
1844 	apic_irq_t *irqp = apic_irq_table[irqno];
1845 	int ioapicindex, intin;
1846 
1847 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1848 	/*
1849 	 * The assumption here is that this is safe, even for
1850 	 * systems with IOAPICs that suffer from the hardware
1851 	 * erratum because all devices have been quiesced before
1852 	 * they unregister their interrupt handlers.  If that
1853 	 * assumption turns out to be false, this mask operation
1854 	 * can induce the same erratum result we're trying to
1855 	 * avoid.
1856 	 */
1857 	ioapicindex = irqp->airq_ioapicindex;
1858 	intin = irqp->airq_intin_no;
1859 	ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1860 
1861 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1862 	    " intin 0x%x\n", ioapicindex, intin));
1863 }
1864 
1865 void
1866 apix_intx_free(int irqno)
1867 {
1868 	apic_irq_t *irqp;
1869 
1870 	mutex_enter(&airq_mutex);
1871 	irqp = apic_irq_table[irqno];
1872 
1873 	if (IS_IRQ_FREE(irqp)) {
1874 		mutex_exit(&airq_mutex);
1875 		return;
1876 	}
1877 
1878 	irqp->airq_mps_intr_index = FREE_INDEX;
1879 	irqp->airq_cpu = IRQ_UNINIT;
1880 	irqp->airq_vector = APIX_INVALID_VECT;
1881 	mutex_exit(&airq_mutex);
1882 }
1883 
1884 #ifdef DEBUG
1885 int apix_intr_deliver_timeouts = 0;
1886 int apix_intr_rirr_timeouts = 0;
1887 int apix_intr_rirr_reset_failure = 0;
1888 #endif
1889 int apix_max_reps_irr_pending = 10;
1890 
1891 #define	GET_RDT_BITS(ioapic, intin, bits)	\
1892 	(READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1893 #define	APIX_CHECK_IRR_DELAY	drv_usectohz(5000)
1894 
1895 int
1896 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1897 {
1898 	apic_irq_t *irqp = apic_irq_table[irqno];
1899 	ulong_t iflag;
1900 	int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1901 
1902 	ASSERT(irqp != NULL);
1903 
1904 	iflag = intr_clear();
1905 	lock_set(&apic_ioapic_lock);
1906 
1907 	ioapic_ix = irqp->airq_ioapicindex;
1908 	intin_no = irqp->airq_intin_no;
1909 	level = apic_level_intr[irqno];
1910 
1911 	/*
1912 	 * Wait for the delivery status bit to be cleared. This should
1913 	 * be a very small amount of time.
1914 	 */
1915 	repeats = 0;
1916 	do {
1917 		repeats++;
1918 
1919 		for (waited = 0; waited < apic_max_reps_clear_pending;
1920 		    waited++) {
1921 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1922 				break;
1923 		}
1924 		if (!level)
1925 			break;
1926 
1927 		/*
1928 		 * Mask the RDT entry for level-triggered interrupts.
1929 		 */
1930 		irqp->airq_rdt_entry |= AV_MASK;
1931 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1932 		    intin_no);
1933 		if ((masked = (rdt_entry & AV_MASK)) == 0) {
1934 			/* Mask it */
1935 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1936 			    AV_MASK | rdt_entry);
1937 		}
1938 
1939 		/*
1940 		 * If there was a race and an interrupt was injected
1941 		 * just before we masked, check for that case here.
1942 		 * Then, unmask the RDT entry and try again.  If we're
1943 		 * on our last try, don't unmask (because we want the
1944 		 * RDT entry to remain masked for the rest of the
1945 		 * function).
1946 		 */
1947 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1948 		    intin_no);
1949 		if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1950 		    (repeats < apic_max_reps_clear_pending)) {
1951 			/* Unmask it */
1952 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1953 			    intin_no, rdt_entry & ~AV_MASK);
1954 			irqp->airq_rdt_entry &= ~AV_MASK;
1955 		}
1956 	} while ((rdt_entry & AV_PENDING) &&
1957 	    (repeats < apic_max_reps_clear_pending));
1958 
1959 #ifdef DEBUG
1960 	if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1961 		apix_intr_deliver_timeouts++;
1962 #endif
1963 
1964 	if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1965 		goto done;
1966 
1967 	/*
1968 	 * wait for remote IRR to be cleared for level-triggered
1969 	 * interrupts
1970 	 */
1971 	repeats = 0;
1972 	do {
1973 		repeats++;
1974 
1975 		for (waited = 0; waited < apic_max_reps_clear_pending;
1976 		    waited++) {
1977 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1978 			    == 0)
1979 				break;
1980 		}
1981 
1982 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1983 			lock_clear(&apic_ioapic_lock);
1984 			intr_restore(iflag);
1985 
1986 			delay(APIX_CHECK_IRR_DELAY);
1987 
1988 			iflag = intr_clear();
1989 			lock_set(&apic_ioapic_lock);
1990 		}
1991 	} while (repeats < apix_max_reps_irr_pending);
1992 
1993 	if (repeats >= apix_max_reps_irr_pending) {
1994 #ifdef DEBUG
1995 		apix_intr_rirr_timeouts++;
1996 #endif
1997 
1998 		/*
1999 		 * If we waited and the Remote IRR bit is still not cleared,
2000 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
2001 		 * times for this interrupt, try the last-ditch workaround:
2002 		 */
2003 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2004 			/*
2005 			 * Trying to clear the bit through normal
2006 			 * channels has failed.  So as a last-ditch
2007 			 * effort, try to set the trigger mode to
2008 			 * edge, then to level.  This has been
2009 			 * observed to work on many systems.
2010 			 */
2011 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2012 			    intin_no,
2013 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2014 			    intin_no) & ~AV_LEVEL);
2015 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2016 			    intin_no,
2017 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2018 			    intin_no) | AV_LEVEL);
2019 		}
2020 
2021 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2022 #ifdef DEBUG
2023 			apix_intr_rirr_reset_failure++;
2024 #endif
2025 			lock_clear(&apic_ioapic_lock);
2026 			intr_restore(iflag);
2027 			prom_printf("apix: Remote IRR still "
2028 			    "not clear for IOAPIC %d intin %d.\n"
2029 			    "\tInterrupts to this pin may cease "
2030 			    "functioning.\n", ioapic_ix, intin_no);
2031 			return (1);	/* return failure */
2032 		}
2033 	}
2034 
2035 done:
2036 	/* change apic_irq_table */
2037 	lock_clear(&apic_ioapic_lock);
2038 	intr_restore(iflag);
2039 	apix_intx_set_vector(irqno, cpuid, vector);
2040 	iflag = intr_clear();
2041 	lock_set(&apic_ioapic_lock);
2042 
2043 	/* reprogramme IO-APIC RDT entry */
2044 	apix_intx_enable(irqno);
2045 
2046 	lock_clear(&apic_ioapic_lock);
2047 	intr_restore(iflag);
2048 
2049 	return (0);
2050 }
2051 
2052 static int
2053 apix_intx_get_pending(int irqno)
2054 {
2055 	apic_irq_t *irqp;
2056 	int intin, ioapicindex, pending;
2057 	ulong_t iflag;
2058 
2059 	mutex_enter(&airq_mutex);
2060 	irqp = apic_irq_table[irqno];
2061 	if (IS_IRQ_FREE(irqp)) {
2062 		mutex_exit(&airq_mutex);
2063 		return (0);
2064 	}
2065 
2066 	/* check IO-APIC delivery status */
2067 	intin = irqp->airq_intin_no;
2068 	ioapicindex = irqp->airq_ioapicindex;
2069 	mutex_exit(&airq_mutex);
2070 
2071 	iflag = intr_clear();
2072 	lock_set(&apic_ioapic_lock);
2073 
2074 	pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2075 	    AV_PENDING) ? 1 : 0;
2076 
2077 	lock_clear(&apic_ioapic_lock);
2078 	intr_restore(iflag);
2079 
2080 	return (pending);
2081 }
2082 
2083 /*
2084  * This function will mask the interrupt on the I/O APIC
2085  */
2086 static void
2087 apix_intx_set_mask(int irqno)
2088 {
2089 	int intin, ioapixindex, rdt_entry;
2090 	ulong_t iflag;
2091 	apic_irq_t *irqp;
2092 
2093 	mutex_enter(&airq_mutex);
2094 	irqp = apic_irq_table[irqno];
2095 
2096 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2097 
2098 	intin = irqp->airq_intin_no;
2099 	ioapixindex = irqp->airq_ioapicindex;
2100 	mutex_exit(&airq_mutex);
2101 
2102 	iflag = intr_clear();
2103 	lock_set(&apic_ioapic_lock);
2104 
2105 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2106 
2107 	/* clear mask */
2108 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2109 	    (AV_MASK | rdt_entry));
2110 
2111 	lock_clear(&apic_ioapic_lock);
2112 	intr_restore(iflag);
2113 }
2114 
2115 /*
2116  * This function will clear the mask for the interrupt on the I/O APIC
2117  */
2118 static void
2119 apix_intx_clear_mask(int irqno)
2120 {
2121 	int intin, ioapixindex, rdt_entry;
2122 	ulong_t iflag;
2123 	apic_irq_t *irqp;
2124 
2125 	mutex_enter(&airq_mutex);
2126 	irqp = apic_irq_table[irqno];
2127 
2128 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2129 
2130 	intin = irqp->airq_intin_no;
2131 	ioapixindex = irqp->airq_ioapicindex;
2132 	mutex_exit(&airq_mutex);
2133 
2134 	iflag = intr_clear();
2135 	lock_set(&apic_ioapic_lock);
2136 
2137 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2138 
2139 	/* clear mask */
2140 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2141 	    ((~AV_MASK) & rdt_entry));
2142 
2143 	lock_clear(&apic_ioapic_lock);
2144 	intr_restore(iflag);
2145 }
2146 
2147 /*
2148  * For level-triggered interrupt, mask the IRQ line. Mask means
2149  * new interrupts will not be delivered. The interrupt already
2150  * accepted by a local APIC is not affected
2151  */
2152 void
2153 apix_level_intr_pre_eoi(int irq)
2154 {
2155 	apic_irq_t *irqp = apic_irq_table[irq];
2156 	int apic_ix, intin_ix;
2157 
2158 	if (irqp == NULL)
2159 		return;
2160 
2161 	ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2162 
2163 	lock_set(&apic_ioapic_lock);
2164 
2165 	intin_ix = irqp->airq_intin_no;
2166 	apic_ix = irqp->airq_ioapicindex;
2167 
2168 	if (irqp->airq_cpu != CPU->cpu_id) {
2169 		if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2170 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2171 		lock_clear(&apic_ioapic_lock);
2172 		return;
2173 	}
2174 
2175 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2176 		/*
2177 		 * This is a IOxAPIC and there is EOI register:
2178 		 *	Change the vector to reserved unused vector, so that
2179 		 *	the EOI	from Local APIC won't clear the Remote IRR for
2180 		 *	this level trigger interrupt. Instead, we'll manually
2181 		 *	clear it in apix_post_hardint() after ISR handling.
2182 		 */
2183 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2184 		    (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2185 	} else {
2186 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2187 		    AV_MASK | irqp->airq_rdt_entry);
2188 	}
2189 
2190 	lock_clear(&apic_ioapic_lock);
2191 }
2192 
2193 /*
2194  * For level-triggered interrupt, unmask the IRQ line
2195  * or restore the original vector number.
2196  */
2197 void
2198 apix_level_intr_post_dispatch(int irq)
2199 {
2200 	apic_irq_t *irqp = apic_irq_table[irq];
2201 	int apic_ix, intin_ix;
2202 
2203 	if (irqp == NULL)
2204 		return;
2205 
2206 	lock_set(&apic_ioapic_lock);
2207 
2208 	intin_ix = irqp->airq_intin_no;
2209 	apic_ix = irqp->airq_ioapicindex;
2210 
2211 	if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2212 		/*
2213 		 * Already sent EOI back to Local APIC.
2214 		 * Send EOI to IO-APIC
2215 		 */
2216 		ioapic_write_eoi(apic_ix, irqp->airq_vector);
2217 	} else {
2218 		/* clear the mask or restore the vector */
2219 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2220 		    irqp->airq_rdt_entry);
2221 
2222 		/* send EOI to IOxAPIC */
2223 		if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2224 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2225 	}
2226 
2227 	lock_clear(&apic_ioapic_lock);
2228 }
2229 
2230 static int
2231 apix_intx_get_shared(int irqno)
2232 {
2233 	apic_irq_t *irqp;
2234 	int share;
2235 
2236 	mutex_enter(&airq_mutex);
2237 	irqp = apic_irq_table[irqno];
2238 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2239 		mutex_exit(&airq_mutex);
2240 		return (0);
2241 	}
2242 	share = irqp->airq_share;
2243 	mutex_exit(&airq_mutex);
2244 
2245 	return (share);
2246 }
2247 
2248 static void
2249 apix_intx_set_shared(int irqno, int delta)
2250 {
2251 	apic_irq_t *irqp;
2252 
2253 	mutex_enter(&airq_mutex);
2254 	irqp = apic_irq_table[irqno];
2255 	if (IS_IRQ_FREE(irqp)) {
2256 		mutex_exit(&airq_mutex);
2257 		return;
2258 	}
2259 	irqp->airq_share += delta;
2260 	mutex_exit(&airq_mutex);
2261 }
2262 
2263 /*
2264  * Setup IRQ table. Return IRQ no or -1 on failure
2265  */
2266 static int
2267 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2268     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2269 {
2270 	int origirq = ispec->intrspec_vec;
2271 	int newirq;
2272 	short intr_index;
2273 	uchar_t ipin, ioapic, ioapicindex;
2274 	apic_irq_t *irqp;
2275 
2276 	UNREFERENCED_1PARAMETER(inum);
2277 
2278 	if (intrp != NULL) {
2279 		intr_index = (short)(intrp - apic_io_intrp);
2280 		ioapic = intrp->intr_destid;
2281 		ipin = intrp->intr_destintin;
2282 
2283 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
2284 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2285 			if (apic_io_id[ioapicindex] == ioapic)
2286 				break;
2287 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2288 		    (ioapic == INTR_ALL_APIC));
2289 
2290 		/* check whether this intin# has been used by another irqno */
2291 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2292 			return (newirq);
2293 
2294 	} else if (iflagp != NULL) {	/* ACPI */
2295 		intr_index = ACPI_INDEX;
2296 		ioapicindex = acpi_find_ioapic(irqno);
2297 		ASSERT(ioapicindex != 0xFF);
2298 		ioapic = apic_io_id[ioapicindex];
2299 		ipin = irqno - apic_io_vectbase[ioapicindex];
2300 
2301 		if (apic_irq_table[irqno] &&
2302 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2303 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2304 			    apic_irq_table[irqno]->airq_ioapicindex ==
2305 			    ioapicindex);
2306 			return (irqno);
2307 		}
2308 
2309 	} else {	/* default configuration */
2310 		intr_index = DEFAULT_INDEX;
2311 		ioapicindex = 0;
2312 		ioapic = apic_io_id[ioapicindex];
2313 		ipin = (uchar_t)irqno;
2314 	}
2315 
2316 	/* allocate a new IRQ no */
2317 	if ((irqp = apic_irq_table[irqno]) == NULL) {
2318 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2319 		apic_irq_table[irqno] = irqp;
2320 	} else {
2321 		if (irqp->airq_mps_intr_index != FREE_INDEX) {
2322 			newirq = apic_allocate_irq(apic_first_avail_irq);
2323 			if (newirq == -1) {
2324 				return (-1);
2325 			}
2326 			irqno = newirq;
2327 			irqp = apic_irq_table[irqno];
2328 			ASSERT(irqp != NULL);
2329 		}
2330 	}
2331 	apic_max_device_irq = max(irqno, apic_max_device_irq);
2332 	apic_min_device_irq = min(irqno, apic_min_device_irq);
2333 
2334 	irqp->airq_mps_intr_index = intr_index;
2335 	irqp->airq_ioapicindex = ioapicindex;
2336 	irqp->airq_intin_no = ipin;
2337 	irqp->airq_dip = dip;
2338 	irqp->airq_origirq = (uchar_t)origirq;
2339 	if (iflagp != NULL)
2340 		irqp->airq_iflag = *iflagp;
2341 	irqp->airq_cpu = IRQ_UNINIT;
2342 	irqp->airq_vector = 0;
2343 
2344 	return (irqno);
2345 }
2346 
2347 /*
2348  * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2349  */
2350 static int
2351 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2352     struct intrspec *ispec)
2353 {
2354 	int irqno = ispec->intrspec_vec;
2355 	int newirq, i;
2356 	iflag_t intr_flag;
2357 	ACPI_SUBTABLE_HEADER	*hp;
2358 	ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2359 	struct apic_io_intr *intrp;
2360 
2361 	if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2362 		int busid;
2363 
2364 		if (bustype == 0)
2365 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2366 
2367 		/* loop checking BUS_ISA/BUS_EISA */
2368 		for (i = 0; i < 2; i++) {
2369 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
2370 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2371 			    != NULL)) {
2372 				return (apix_intx_setup(dip, inum, irqno,
2373 				    intrp, ispec, NULL));
2374 			}
2375 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2376 		}
2377 
2378 		/* fall back to default configuration */
2379 		return (-1);
2380 	}
2381 
2382 	/* search iso entries first */
2383 	if (acpi_iso_cnt != 0) {
2384 		hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2385 		i = 0;
2386 		while (i < acpi_iso_cnt) {
2387 			if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2388 				isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2389 				if (isop->Bus == 0 &&
2390 				    isop->SourceIrq == irqno) {
2391 					newirq = isop->GlobalIrq;
2392 					intr_flag.intr_po = isop->IntiFlags &
2393 					    ACPI_MADT_POLARITY_MASK;
2394 					intr_flag.intr_el = (isop->IntiFlags &
2395 					    ACPI_MADT_TRIGGER_MASK) >> 2;
2396 					intr_flag.bustype = BUS_ISA;
2397 
2398 					return (apix_intx_setup(dip, inum,
2399 					    newirq, NULL, ispec, &intr_flag));
2400 				}
2401 				i++;
2402 			}
2403 			hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2404 			    hp->Length);
2405 		}
2406 	}
2407 	intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2408 	intr_flag.intr_el = INTR_EL_EDGE;
2409 	intr_flag.bustype = BUS_ISA;
2410 	return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2411 }
2412 
2413 
2414 /*
2415  * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2416  */
2417 static int
2418 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2419     struct intrspec *ispec)
2420 {
2421 	int busid, devid, pci_irq;
2422 	ddi_acc_handle_t cfg_handle;
2423 	uchar_t ipin;
2424 	iflag_t intr_flag;
2425 	struct apic_io_intr *intrp;
2426 
2427 	if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2428 		return (-1);
2429 
2430 	if (busid == 0 && apic_pci_bus_total == 1)
2431 		busid = (int)apic_single_pci_busid;
2432 
2433 	if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2434 		return (-1);
2435 	ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2436 	pci_config_teardown(&cfg_handle);
2437 
2438 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {	/* ACPI */
2439 		if (apic_acpi_translate_pci_irq(dip, busid, devid,
2440 		    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2441 			return (-1);
2442 
2443 		intr_flag.bustype = (uchar_t)bustype;
2444 		return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2445 		    &intr_flag));
2446 	}
2447 
2448 	/* MP configuration table */
2449 	pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2450 	if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2451 		pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2452 		if (pci_irq == -1)
2453 			return (-1);
2454 	}
2455 
2456 	return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2457 }
2458 
2459 /*
2460  * Translate and return IRQ no
2461  */
2462 static int
2463 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2464 {
2465 	int newirq, irqno = ispec->intrspec_vec;
2466 	int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2467 	int bustype = 0, dev_len;
2468 	char dev_type[16];
2469 
2470 	if (apic_defconf) {
2471 		mutex_enter(&airq_mutex);
2472 		goto defconf;
2473 	}
2474 
2475 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2476 		mutex_enter(&airq_mutex);
2477 		goto nonpci;
2478 	}
2479 
2480 	/*
2481 	 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2482 	 * to avoid extra buffer allocation.
2483 	 */
2484 	dev_len = sizeof (dev_type);
2485 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2486 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2487 	    &dev_len) == DDI_PROP_SUCCESS) {
2488 		if ((strcmp(dev_type, "pci") == 0) ||
2489 		    (strcmp(dev_type, "pciex") == 0))
2490 			parent_is_pci_or_pciex = 1;
2491 	}
2492 
2493 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2494 	    DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2495 	    &dev_len) == DDI_PROP_SUCCESS) {
2496 		if (strstr(dev_type, "pciex"))
2497 			child_is_pciex = 1;
2498 	}
2499 
2500 	mutex_enter(&airq_mutex);
2501 
2502 	if (parent_is_pci_or_pciex) {
2503 		bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2504 		newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2505 		if (newirq != -1)
2506 			goto done;
2507 		bustype = 0;
2508 	} else if (strcmp(dev_type, "isa") == 0)
2509 		bustype = BUS_ISA;
2510 	else if (strcmp(dev_type, "eisa") == 0)
2511 		bustype = BUS_EISA;
2512 
2513 nonpci:
2514 	newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2515 	if (newirq != -1)
2516 		goto done;
2517 
2518 defconf:
2519 	newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2520 	if (newirq == -1) {
2521 		mutex_exit(&airq_mutex);
2522 		return (-1);
2523 	}
2524 done:
2525 	ASSERT(apic_irq_table[newirq]);
2526 	mutex_exit(&airq_mutex);
2527 	return (newirq);
2528 }
2529 
2530 static int
2531 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2532 {
2533 	int irqno;
2534 	apix_vector_t *vecp;
2535 
2536 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2537 		return (0);
2538 
2539 	if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2540 		return (0);
2541 
2542 	DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2543 	    "irqno=0x%x cpuid=%d vector=0x%x\n",
2544 	    (void *)dip, ddi_driver_name(dip), irqno,
2545 	    vecp->v_cpuid, vecp->v_vector));
2546 
2547 	return (1);
2548 }
2549 
2550 /*
2551  * Return the vector number if the translated IRQ for this device
2552  * has a vector mapping setup. If no IRQ setup exists or no vector is
2553  * allocated to it then return 0.
2554  */
2555 static apix_vector_t *
2556 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2557 {
2558 	int irqno;
2559 	apix_vector_t *vecp;
2560 
2561 	/* get the IRQ number */
2562 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2563 		return (NULL);
2564 
2565 	/* get the vector number if a vector is allocated to this irqno */
2566 	vecp = apix_intx_get_vector(irqno);
2567 
2568 	return (vecp);
2569 }
2570 
2571 /*
2572  * Switch between safe and x2APIC IPI sending method.
2573  * The CPU may power on in xapic mode or x2apic mode. If the CPU needs to send
2574  * an IPI to other CPUs before entering x2APIC mode, it still needs to use the
2575  * xAPIC method. Before sending a StartIPI to the target CPU, psm_send_ipi will
2576  * be changed to apic_common_send_ipi, which detects current local APIC mode and
2577  * use the right method to send an IPI. If some CPUs fail to start up,
2578  * apic_poweron_cnt won't return to zero, so apic_common_send_ipi will always be
2579  * used. psm_send_ipi can't be simply changed back to x2apic_send_ipi if some
2580  * CPUs failed to start up because those failed CPUs may recover itself later at
2581  * unpredictable time.
2582  */
2583 void
2584 apic_switch_ipi_callback(boolean_t enter)
2585 {
2586 	ulong_t iflag;
2587 	struct psm_ops *pops = psmops;
2588 
2589 	iflag = intr_clear();
2590 	lock_set(&apic_mode_switch_lock);
2591 	if (enter) {
2592 		ASSERT(apic_poweron_cnt >= 0);
2593 		if (apic_poweron_cnt == 0) {
2594 			pops->psm_send_ipi = apic_common_send_ipi;
2595 			send_dirintf = pops->psm_send_ipi;
2596 			pops->psm_send_pir_ipi = apic_common_send_pir_ipi;
2597 			psm_send_pir_ipi = pops->psm_send_pir_ipi;
2598 		}
2599 		apic_poweron_cnt++;
2600 	} else {
2601 		ASSERT(apic_poweron_cnt > 0);
2602 		apic_poweron_cnt--;
2603 		if (apic_poweron_cnt == 0) {
2604 			pops->psm_send_ipi = x2apic_send_ipi;
2605 			send_dirintf = pops->psm_send_ipi;
2606 			pops->psm_send_pir_ipi = x2apic_send_pir_ipi;
2607 			psm_send_pir_ipi = pops->psm_send_pir_ipi;
2608 		}
2609 	}
2610 	lock_clear(&apic_mode_switch_lock);
2611 	intr_restore(iflag);
2612 }
2613 
2614 /* stub function */
2615 int
2616 apix_loaded(void)
2617 {
2618 	return (apix_is_enabled);
2619 }
2620