xref: /titanic_51/usr/src/uts/i86pc/io/apix/apix.c (revision b533f56bf95137d3de6666bd923e15ec373ea611)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  */
29 
30 /*
31  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
32  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
33  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
34  * PSMI 1.5 extensions are supported in Solaris Nevada.
35  * PSMI 1.6 extensions are supported in Solaris Nevada.
36  * PSMI 1.7 extensions are supported in Solaris Nevada.
37  */
38 #define	PSMI_1_7
39 
40 #include <sys/processor.h>
41 #include <sys/time.h>
42 #include <sys/psm.h>
43 #include <sys/smp_impldefs.h>
44 #include <sys/cram.h>
45 #include <sys/acpi/acpi.h>
46 #include <sys/acpica.h>
47 #include <sys/psm_common.h>
48 #include <sys/pit.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/ddi_impldefs.h>
52 #include <sys/pci.h>
53 #include <sys/promif.h>
54 #include <sys/x86_archext.h>
55 #include <sys/cpc_impl.h>
56 #include <sys/uadmin.h>
57 #include <sys/panic.h>
58 #include <sys/debug.h>
59 #include <sys/archsystm.h>
60 #include <sys/trap.h>
61 #include <sys/machsystm.h>
62 #include <sys/sysmacros.h>
63 #include <sys/cpuvar.h>
64 #include <sys/rm_platter.h>
65 #include <sys/privregs.h>
66 #include <sys/note.h>
67 #include <sys/pci_intr_lib.h>
68 #include <sys/spl.h>
69 #include <sys/clock.h>
70 #include <sys/dditypes.h>
71 #include <sys/sunddi.h>
72 #include <sys/x_call.h>
73 #include <sys/reboot.h>
74 #include <sys/mach_intr.h>
75 #include <sys/apix.h>
76 #include <sys/apix_irm_impl.h>
77 
78 static int apix_probe();
79 static void apix_init();
80 static void apix_picinit(void);
81 static int apix_intr_enter(int, int *);
82 static void apix_intr_exit(int, int);
83 static void apix_setspl(int);
84 static int apix_disable_intr(processorid_t);
85 static void apix_enable_intr(processorid_t);
86 static int apix_get_clkvect(int);
87 static int apix_get_ipivect(int, int);
88 static void apix_post_cyclic_setup(void *);
89 static int apix_post_cpu_start();
90 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
91     psm_intr_op_t, int *);
92 
93 /*
94  * Helper functions for apix_intr_ops()
95  */
96 static void apix_redistribute_compute(void);
97 static int apix_get_pending(apix_vector_t *);
98 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
99 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
100 static char *apix_get_apic_type(void);
101 static int apix_intx_get_pending(int);
102 static void apix_intx_set_mask(int irqno);
103 static void apix_intx_clear_mask(int irqno);
104 static int apix_intx_get_shared(int irqno);
105 static void apix_intx_set_shared(int irqno, int delta);
106 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
107     struct intrspec *);
108 static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
109 
110 extern int apic_clkinit(int);
111 
112 /* IRM initialization for APIX PSM module */
113 extern void apix_irm_init(void);
114 
115 extern int irm_enable;
116 
117 /*
118  *	Local static data
119  */
120 static struct	psm_ops apix_ops = {
121 	apix_probe,
122 
123 	apix_init,
124 	apix_picinit,
125 	apix_intr_enter,
126 	apix_intr_exit,
127 	apix_setspl,
128 	apix_addspl,
129 	apix_delspl,
130 	apix_disable_intr,
131 	apix_enable_intr,
132 	NULL,			/* psm_softlvl_to_irq */
133 	NULL,			/* psm_set_softintr */
134 
135 	apic_set_idlecpu,
136 	apic_unset_idlecpu,
137 
138 	apic_clkinit,
139 	apix_get_clkvect,
140 	NULL,			/* psm_hrtimeinit */
141 	apic_gethrtime,
142 
143 	apic_get_next_processorid,
144 	apic_cpu_start,
145 	apix_post_cpu_start,
146 	apic_shutdown,
147 	apix_get_ipivect,
148 	apic_send_ipi,
149 
150 	NULL,			/* psm_translate_irq */
151 	NULL,			/* psm_notify_error */
152 	NULL,			/* psm_notify_func */
153 	apic_timer_reprogram,
154 	apic_timer_enable,
155 	apic_timer_disable,
156 	apix_post_cyclic_setup,
157 	apic_preshutdown,
158 	apix_intr_ops,		/* Advanced DDI Interrupt framework */
159 	apic_state,		/* save, restore apic state for S3 */
160 	apic_cpu_ops,		/* CPU control interface. */
161 };
162 
163 struct psm_ops *psmops = &apix_ops;
164 
165 static struct	psm_info apix_psm_info = {
166 	PSM_INFO_VER01_7,			/* version */
167 	PSM_OWN_EXCLUSIVE,			/* ownership */
168 	&apix_ops,				/* operation */
169 	APIX_NAME,				/* machine name */
170 	"apix MPv1.4 compatible",
171 };
172 
173 static void *apix_hdlp;
174 
175 static int apix_is_enabled = 0;
176 
177 /*
178  * Flag to indicate if APIX is to be enabled only for platforms
179  * with specific hw feature(s).
180  */
181 int apix_hw_chk_enable = 1;
182 
183 /*
184  * Hw features that are checked for enabling APIX support.
185  */
186 #define	APIX_SUPPORT_X2APIC	0x00000001
187 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
188 
189 /*
190  * apix_lock is used for cpu selection and vector re-binding
191  */
192 lock_t apix_lock;
193 apix_impl_t *apixs[NCPU];
194 /*
195  * Mapping between device interrupt and the allocated vector. Indexed
196  * by major number.
197  */
198 apix_dev_vector_t **apix_dev_vector;
199 /*
200  * Mapping between device major number and cpu id. It gets used
201  * when interrupt binding policy round robin with affinity is
202  * applied. With that policy, devices with the same major number
203  * will be bound to the same CPU.
204  */
205 processorid_t *apix_major_to_cpu;	/* major to cpu mapping */
206 kmutex_t apix_mutex;	/* for apix_dev_vector & apix_major_to_cpu */
207 
208 int apix_nipis = 16;	/* Maximum number of IPIs */
209 /*
210  * Maximum number of vectors in a CPU that can be used for interrupt
211  * allocation (including IPIs and the reserved vectors).
212  */
213 int apix_cpu_nvectors = APIX_NVECTOR;
214 
215 /* gcpu.h */
216 
217 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
218 extern void apic_change_eoi();
219 
220 /*
221  *	This is the loadable module wrapper
222  */
223 
224 int
225 _init(void)
226 {
227 	if (apic_coarse_hrtime)
228 		apix_ops.psm_gethrtime = &apic_gettime;
229 	return (psm_mod_init(&apix_hdlp, &apix_psm_info));
230 }
231 
232 int
233 _fini(void)
234 {
235 	return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
236 }
237 
238 int
239 _info(struct modinfo *modinfop)
240 {
241 	return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
242 }
243 
244 static int
245 apix_probe()
246 {
247 	int rval;
248 
249 	if (apix_enable == 0)
250 		return (PSM_FAILURE);
251 
252 	/* check for hw features if specified  */
253 	if (apix_hw_chk_enable) {
254 		/* check if x2APIC mode is supported */
255 		if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
256 		    APIX_SUPPORT_X2APIC) {
257 			if (!((apic_local_mode() == LOCAL_X2APIC) ||
258 			    apic_detect_x2apic())) {
259 				/* x2APIC mode is not supported in the hw */
260 				apix_enable = 0;
261 			}
262 		}
263 		if (apix_enable == 0)
264 			return (PSM_FAILURE);
265 	}
266 
267 	rval = apic_probe_common(apix_psm_info.p_mach_idstring);
268 	if (rval == PSM_SUCCESS)
269 		apix_is_enabled = 1;
270 	else
271 		apix_is_enabled = 0;
272 	return (rval);
273 }
274 
275 /*
276  * Initialize the data structures needed by pcplusmpx module.
277  * Specifically, the data structures used by addspl() and delspl()
278  * routines.
279  */
280 static void
281 apix_softinit()
282 {
283 	int i, *iptr;
284 	apix_impl_t *hdlp;
285 	int nproc;
286 
287 	nproc = max(apic_nproc, apic_max_nproc);
288 
289 	hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
290 	for (i = 0; i < nproc; i++) {
291 		apixs[i] = &hdlp[i];
292 		apixs[i]->x_cpuid = i;
293 		LOCK_INIT_CLEAR(&apixs[i]->x_lock);
294 	}
295 
296 	/* cpu 0 is always up (for now) */
297 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
298 
299 	iptr = (int *)&apic_irq_table[0];
300 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
301 		apic_level_intr[i] = 0;
302 		*iptr++ = NULL;
303 	}
304 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
305 
306 	apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
307 	    KM_SLEEP);
308 
309 	if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
310 		apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
311 		    KM_SLEEP);
312 		for (i = 0; i < devcnt; i++)
313 			apix_major_to_cpu[i] = IRQ_UNINIT;
314 	}
315 
316 	mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
317 }
318 
319 static int
320 apix_get_pending_spl(void)
321 {
322 	int cpuid = CPU->cpu_id;
323 
324 	return (bsrw_insn(apixs[cpuid]->x_intr_pending));
325 }
326 
327 static uintptr_t
328 apix_get_intr_handler(int cpu, short vec)
329 {
330 	apix_vector_t *apix_vector;
331 
332 	ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
333 	if (cpu >= apic_nproc)
334 		return (NULL);
335 
336 	apix_vector = apixs[cpu]->x_vectbl[vec];
337 
338 	return ((uintptr_t)(apix_vector->v_autovect));
339 }
340 
341 #if defined(__amd64)
342 static unsigned char dummy_cpu_pri[MAXIPL + 1] = {
343 	0, 0, 0, 0, 0, 0, 0, 0,
344 	0, 0, 0, 0, 0, 0, 0, 0, 0
345 };
346 #endif
347 
348 static void
349 apix_init()
350 {
351 	extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
352 
353 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
354 
355 	do_interrupt_common = apix_do_interrupt;
356 	addintr = apix_add_avintr;
357 	remintr = apix_rem_avintr;
358 	get_pending_spl = apix_get_pending_spl;
359 	get_intr_handler = apix_get_intr_handler;
360 	psm_get_localapicid = apic_get_localapicid;
361 	psm_get_ioapicid = apic_get_ioapicid;
362 
363 	apix_softinit();
364 #if defined(__amd64)
365 	/*
366 	 * Make cpu-specific interrupt info point to cr8pri vector
367 	 */
368 	CPU->cpu_pri_data = dummy_cpu_pri;
369 #else
370 	if (cpuid_have_cr8access(CPU))
371 		apic_have_32bit_cr8 = 1;
372 #endif	/* __amd64 */
373 
374 	/*
375 	 * Initialize IRM pool parameters
376 	 */
377 	if (irm_enable) {
378 		int	i;
379 		int	lowest_irq;
380 		int	highest_irq;
381 
382 		/* number of CPUs present */
383 		apix_irminfo.apix_ncpus = apic_nproc;
384 		/* total number of entries in all of the IOAPICs present */
385 		lowest_irq = apic_io_vectbase[0];
386 		highest_irq = apic_io_vectend[0];
387 		for (i = 1; i < apic_io_max; i++) {
388 			if (apic_io_vectbase[i] < lowest_irq)
389 				lowest_irq = apic_io_vectbase[i];
390 			if (apic_io_vectend[i] > highest_irq)
391 				highest_irq = apic_io_vectend[i];
392 		}
393 		apix_irminfo.apix_ioapic_max_vectors =
394 		    highest_irq - lowest_irq + 1;
395 		/*
396 		 * Number of available per-CPU vectors excluding
397 		 * reserved vectors for Dtrace, int80, system-call,
398 		 * fast-trap, etc.
399 		 */
400 		apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
401 		    APIX_SW_RESERVED_VECTORS;
402 
403 		/* Number of vectors (pre) allocated (SCI and HPET) */
404 		apix_irminfo.apix_vectors_allocated = 0;
405 		if (apic_hpet_vect != -1)
406 			apix_irminfo.apix_vectors_allocated++;
407 		if (apic_sci_vect != -1)
408 			apix_irminfo.apix_vectors_allocated++;
409 	}
410 }
411 
412 static void
413 apix_init_intr()
414 {
415 	processorid_t	cpun = psm_get_cpu_id();
416 	uint_t nlvt;
417 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
418 	extern void cmi_cmci_trap(void);
419 
420 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
421 
422 	if (apic_mode == LOCAL_APIC) {
423 		/*
424 		 * We are running APIC in MMIO mode.
425 		 */
426 		if (apic_flat_model) {
427 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
428 			    APIC_FLAT_MODEL);
429 		} else {
430 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
431 			    APIC_CLUSTER_MODEL);
432 		}
433 
434 		apic_reg_ops->apic_write(APIC_DEST_REG,
435 		    AV_HIGH_ORDER >> cpun);
436 	}
437 
438 	if (apic_directed_EOI_supported()) {
439 		/*
440 		 * Setting the 12th bit in the Spurious Interrupt Vector
441 		 * Register suppresses broadcast EOIs generated by the local
442 		 * APIC. The suppression of broadcast EOIs happens only when
443 		 * interrupts are level-triggered.
444 		 */
445 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
446 	}
447 
448 	/* need to enable APIC before unmasking NMI */
449 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
450 
451 	/*
452 	 * Presence of an invalid vector with delivery mode AV_FIXED can
453 	 * cause an error interrupt, even if the entry is masked...so
454 	 * write a valid vector to LVT entries along with the mask bit
455 	 */
456 
457 	/* All APICs have timer and LINT0/1 */
458 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
459 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
460 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
461 
462 	/*
463 	 * On integrated APICs, the number of LVT entries is
464 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
465 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
466 	 */
467 
468 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
469 		nlvt = 3;
470 	} else {
471 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
472 		    0xFF) + 1;
473 	}
474 
475 	if (nlvt >= 5) {
476 		/* Enable performance counter overflow interrupt */
477 
478 		if (!is_x86_feature(x86_featureset, X86FSET_MSR))
479 			apic_enable_cpcovf_intr = 0;
480 		if (apic_enable_cpcovf_intr) {
481 			if (apic_cpcovf_vect == 0) {
482 				int ipl = APIC_PCINT_IPL;
483 
484 				apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
485 				ASSERT(apic_cpcovf_vect);
486 
487 				(void) add_avintr(NULL, ipl,
488 				    (avfunc)kcpc_hw_overflow_intr,
489 				    "apic pcint", apic_cpcovf_vect,
490 				    NULL, NULL, NULL, NULL);
491 				kcpc_hw_overflow_intr_installed = 1;
492 				kcpc_hw_enable_cpc_intr =
493 				    apic_cpcovf_mask_clear;
494 			}
495 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
496 			    apic_cpcovf_vect);
497 		}
498 	}
499 
500 	if (nlvt >= 6) {
501 		/* Only mask TM intr if the BIOS apparently doesn't use it */
502 
503 		uint32_t lvtval;
504 
505 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
506 		if (((lvtval & AV_MASK) == AV_MASK) ||
507 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
508 			apic_reg_ops->apic_write(APIC_THERM_VECT,
509 			    AV_MASK|APIC_RESV_IRQ);
510 		}
511 	}
512 
513 	/* Enable error interrupt */
514 
515 	if (nlvt >= 4 && apic_enable_error_intr) {
516 		if (apic_errvect == 0) {
517 			int ipl = 0xf;	/* get highest priority intr */
518 			apic_errvect = apix_get_ipivect(ipl, -1);
519 			ASSERT(apic_errvect);
520 			/*
521 			 * Not PSMI compliant, but we are going to merge
522 			 * with ON anyway
523 			 */
524 			(void) add_avintr(NULL, ipl,
525 			    (avfunc)apic_error_intr, "apic error intr",
526 			    apic_errvect, NULL, NULL, NULL, NULL);
527 		}
528 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
529 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
530 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
531 	}
532 
533 	/* Enable CMCI interrupt */
534 	if (cmi_enable_cmci) {
535 		mutex_enter(&cmci_cpu_setup_lock);
536 		if (cmci_cpu_setup_registered == 0) {
537 			mutex_enter(&cpu_lock);
538 			register_cpu_setup_func(cmci_cpu_setup, NULL);
539 			mutex_exit(&cpu_lock);
540 			cmci_cpu_setup_registered = 1;
541 		}
542 		mutex_exit(&cmci_cpu_setup_lock);
543 
544 		if (apic_cmci_vect == 0) {
545 			int ipl = 0x2;
546 			apic_cmci_vect = apix_get_ipivect(ipl, -1);
547 			ASSERT(apic_cmci_vect);
548 
549 			(void) add_avintr(NULL, ipl,
550 			    (avfunc)cmi_cmci_trap, "apic cmci intr",
551 			    apic_cmci_vect, NULL, NULL, NULL, NULL);
552 		}
553 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
554 	}
555 
556 	apic_reg_ops->apic_write_task_reg(0);
557 }
558 
559 static void
560 apix_picinit(void)
561 {
562 	int i, j;
563 	uint_t isr;
564 
565 	APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
566 
567 	/*
568 	 * initialize interrupt remapping before apic
569 	 * hardware initialization
570 	 */
571 	apic_intrmap_init(apic_mode);
572 	if (apic_vt_ops == psm_vt_ops)
573 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
574 
575 	/*
576 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
577 	 * bit on without clearing it with EOI.  Since softint
578 	 * uses vector 0x20 to interrupt itself, so softint will
579 	 * not work on this machine.  In order to fix this problem
580 	 * a check is made to verify all the isr bits are clear.
581 	 * If not, EOIs are issued to clear the bits.
582 	 */
583 	for (i = 7; i >= 1; i--) {
584 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
585 		if (isr != 0)
586 			for (j = 0; ((j < 32) && (isr != 0)); j++)
587 				if (isr & (1 << j)) {
588 					apic_reg_ops->apic_write(
589 					    APIC_EOI_REG, 0);
590 					isr &= ~(1 << j);
591 					apic_error |= APIC_ERR_BOOT_EOI;
592 				}
593 	}
594 
595 	/* set a flag so we know we have run apic_picinit() */
596 	apic_picinit_called = 1;
597 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
598 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
599 	LOCK_INIT_CLEAR(&apic_error_lock);
600 	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
601 
602 	picsetup();	 /* initialise the 8259 */
603 
604 	/* add nmi handler - least priority nmi handler */
605 	LOCK_INIT_CLEAR(&apic_nmi_lock);
606 
607 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
608 	    "apix NMI handler", (caddr_t)NULL))
609 		cmn_err(CE_WARN, "apix: Unable to add nmi handler");
610 
611 	apix_init_intr();
612 
613 	/* enable apic mode if imcr present */
614 	if (apic_imcrp) {
615 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
616 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
617 	}
618 
619 	ioapix_init_intr(IOAPIC_MASK);
620 
621 	/* setup global IRM pool if applicable */
622 	if (irm_enable)
623 		apix_irm_init();
624 }
625 
626 static __inline__ void
627 apix_send_eoi(void)
628 {
629 	if (apic_mode == LOCAL_APIC)
630 		LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
631 	else
632 		X2APIC_WRITE(APIC_EOI_REG, 0);
633 }
634 
635 /*
636  * platform_intr_enter
637  *
638  *	Called at the beginning of the interrupt service routine to
639  *	mask all level equal to and below the interrupt priority
640  *	of the interrupting vector.  An EOI should be given to
641  *	the interrupt controller to enable other HW interrupts.
642  *
643  *	Return -1 for spurious interrupts
644  *
645  */
646 static int
647 apix_intr_enter(int ipl, int *vectorp)
648 {
649 	struct cpu *cpu = CPU;
650 	uint32_t cpuid = CPU->cpu_id;
651 	apic_cpus_info_t *cpu_infop;
652 	uchar_t vector;
653 	apix_vector_t *vecp;
654 	int nipl = -1;
655 
656 	/*
657 	 * The real vector delivered is (*vectorp + 0x20), but our caller
658 	 * subtracts 0x20 from the vector before passing it to us.
659 	 * (That's why APIC_BASE_VECT is 0x20.)
660 	 */
661 	vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
662 
663 	cpu_infop = &apic_cpus[cpuid];
664 	if (vector == APIC_SPUR_INTR) {
665 		cpu_infop->aci_spur_cnt++;
666 		return (APIC_INT_SPURIOUS);
667 	}
668 
669 	vecp = xv_vector(cpuid, vector);
670 	if (vecp == NULL) {
671 		if (APIX_IS_FAKE_INTR(vector))
672 			nipl = apix_rebindinfo.i_pri;
673 		apix_send_eoi();
674 		return (nipl);
675 	}
676 	nipl = vecp->v_pri;
677 
678 	/* if interrupted by the clock, increment apic_nsec_since_boot */
679 	if (vector == (apic_clkvect + APIC_BASE_VECT)) {
680 		if (!apic_oneshot) {
681 			/* NOTE: this is not MT aware */
682 			apic_hrtime_stamp++;
683 			apic_nsec_since_boot += apic_nsec_per_intr;
684 			apic_hrtime_stamp++;
685 			last_count_read = apic_hertz_count;
686 			apix_redistribute_compute();
687 		}
688 
689 		apix_send_eoi();
690 
691 		return (nipl);
692 	}
693 
694 	ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
695 
696 	/* pre-EOI handling for level-triggered interrupts */
697 	if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
698 	    (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
699 		apix_level_intr_pre_eoi(vecp->v_inum);
700 
701 	/* send back EOI */
702 	apix_send_eoi();
703 
704 	cpu_infop->aci_current[nipl] = vector;
705 	if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
706 		cpu_infop->aci_curipl = (uchar_t)nipl;
707 		cpu_infop->aci_ISR_in_progress |= 1 << nipl;
708 	}
709 
710 #ifdef	DEBUG
711 	if (vector >= APIX_IPI_MIN)
712 		return (nipl);	/* skip IPI */
713 
714 	APIC_DEBUG_BUF_PUT(vector);
715 	APIC_DEBUG_BUF_PUT(vecp->v_inum);
716 	APIC_DEBUG_BUF_PUT(nipl);
717 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
718 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
719 		drv_usecwait(apic_stretch_interrupts);
720 #endif /* DEBUG */
721 
722 	return (nipl);
723 }
724 
725 /*
726  * Any changes made to this function must also change X2APIC
727  * version of intr_exit.
728  */
729 static void
730 apix_intr_exit(int prev_ipl, int arg2)
731 {
732 	int cpuid = psm_get_cpu_id();
733 	apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
734 	apix_impl_t *apixp = apixs[cpuid];
735 
736 	UNREFERENCED_1PARAMETER(arg2);
737 
738 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
739 	/* ISR above current pri could not be in progress */
740 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
741 
742 	if (apixp->x_obsoletes != NULL) {
743 		if (APIX_CPU_LOCK_HELD(cpuid))
744 			return;
745 
746 		APIX_ENTER_CPU_LOCK(cpuid);
747 		(void) apix_obsolete_vector(apixp->x_obsoletes);
748 		APIX_LEAVE_CPU_LOCK(cpuid);
749 	}
750 }
751 
752 /*
753  * Mask all interrupts below or equal to the given IPL.
754  * Any changes made to this function must also change X2APIC
755  * version of setspl.
756  */
757 static void
758 apix_setspl(int ipl)
759 {
760 	/* interrupts at ipl above this cannot be in progress */
761 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
762 
763 	/*
764 	 * Mask all interrupts for XC_HI_PIL (i.e set TPR to 0xf).
765 	 * Otherwise, enable all interrupts (i.e. set TPR to 0).
766 	 */
767 	if (ipl != XC_HI_PIL)
768 		ipl = 0;
769 
770 #if defined(__amd64)
771 	setcr8((ulong_t)ipl);
772 #else
773 	if (apic_have_32bit_cr8)
774 		setcr8((ulong_t)ipl);
775 	else
776 		apicadr[APIC_TASK_REG] = ipl << APIC_IPL_SHIFT;
777 #endif
778 
779 	/*
780 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
781 	 * have enough time to come in before the priority is raised again
782 	 * during the idle() loop.
783 	 */
784 	if (apic_setspl_delay)
785 		(void) apic_reg_ops->apic_get_pri();
786 }
787 
788 /*
789  * X2APIC version of setspl.
790  */
791 static void
792 x2apix_setspl(int ipl)
793 {
794 	/* interrupts at ipl above this cannot be in progress */
795 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
796 
797 	/*
798 	 * Mask all interrupts for XC_HI_PIL (i.e set TPR to 0xf).
799 	 * Otherwise, enable all interrupts (i.e. set TPR to 0).
800 	 */
801 	if (ipl != XC_HI_PIL)
802 		ipl = 0;
803 
804 	X2APIC_WRITE(APIC_TASK_REG, ipl << APIC_IPL_SHIFT);
805 }
806 
807 int
808 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
809 {
810 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
811 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
812 	apix_vector_t *vecp = xv_vector(cpuid, vector);
813 
814 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
815 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
816 
817 	if (vecp->v_type == APIX_TYPE_FIXED)
818 		apix_intx_set_shared(vecp->v_inum, 1);
819 
820 	/* There are more interrupts, so it's already been enabled */
821 	if (vecp->v_share > 1)
822 		return (PSM_SUCCESS);
823 
824 	/* return if it is not hardware interrupt */
825 	if (vecp->v_type == APIX_TYPE_IPI)
826 		return (PSM_SUCCESS);
827 
828 	/*
829 	 * if apix_picinit() has not been called yet, just return.
830 	 * At the end of apic_picinit(), we will call setup_io_intr().
831 	 */
832 	if (!apic_picinit_called)
833 		return (PSM_SUCCESS);
834 
835 	(void) apix_setup_io_intr(vecp);
836 
837 	return (PSM_SUCCESS);
838 }
839 
840 int
841 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
842 {
843 	uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
844 	uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
845 	apix_vector_t *vecp = xv_vector(cpuid, vector);
846 
847 	UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
848 	ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
849 
850 	if (vecp->v_type == APIX_TYPE_FIXED)
851 		apix_intx_set_shared(vecp->v_inum, -1);
852 
853 	/* There are more interrupts */
854 	if (vecp->v_share > 1)
855 		return (PSM_SUCCESS);
856 
857 	/* return if it is not hardware interrupt */
858 	if (vecp->v_type == APIX_TYPE_IPI)
859 		return (PSM_SUCCESS);
860 
861 	if (!apic_picinit_called) {
862 		cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
863 		    virtvec);
864 		return (PSM_SUCCESS);
865 	}
866 
867 	apix_disable_vector(vecp);
868 
869 	return (PSM_SUCCESS);
870 }
871 
872 /*
873  * Try and disable all interrupts. We just assign interrupts to other
874  * processors based on policy. If any were bound by user request, we
875  * let them continue and return failure. We do not bother to check
876  * for cache affinity while rebinding.
877  */
878 static int
879 apix_disable_intr(processorid_t cpun)
880 {
881 	apix_impl_t *apixp = apixs[cpun];
882 	apix_vector_t *vecp, *newp;
883 	int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
884 
885 	lock_set(&apix_lock);
886 
887 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
888 	apic_cpus[cpun].aci_curipl = 0;
889 
890 	/* if this is for SUSPEND operation, skip rebinding */
891 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
892 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
893 			vecp = apixp->x_vectbl[i];
894 			if (!IS_VECT_ENABLED(vecp))
895 				continue;
896 
897 			apix_disable_vector(vecp);
898 		}
899 		lock_clear(&apix_lock);
900 		return (PSM_SUCCESS);
901 	}
902 
903 	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
904 		vecp = apixp->x_vectbl[i];
905 		if (!IS_VECT_ENABLED(vecp))
906 			continue;
907 
908 		if (vecp->v_flags & APIX_VECT_USER_BOUND) {
909 			hardbound++;
910 			continue;
911 		}
912 		type = vecp->v_type;
913 
914 		/*
915 		 * If there are bound interrupts on this cpu, then
916 		 * rebind them to other processors.
917 		 */
918 		loop = 0;
919 		do {
920 			bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
921 
922 			if (type != APIX_TYPE_MSI)
923 				newp = apix_set_cpu(vecp, bindcpu, &ret);
924 			else
925 				newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
926 		} while ((newp == NULL) && (loop++ < apic_nproc));
927 
928 		if (loop >= apic_nproc) {
929 			errbound++;
930 			cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
931 			    vecp->v_cpuid, vecp->v_vector);
932 		}
933 	}
934 
935 	lock_clear(&apix_lock);
936 
937 	if (hardbound || errbound) {
938 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
939 		    "due to user bound interrupts or failed operation",
940 		    cpun);
941 		return (PSM_FAILURE);
942 	}
943 
944 	return (PSM_SUCCESS);
945 }
946 
947 /*
948  * Bind interrupts to specified CPU
949  */
950 static void
951 apix_enable_intr(processorid_t cpun)
952 {
953 	apix_vector_t *vecp;
954 	int i, ret;
955 	processorid_t n;
956 
957 	lock_set(&apix_lock);
958 
959 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
960 
961 	/* interrupt enabling for system resume */
962 	if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
963 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
964 			vecp = xv_vector(cpun, i);
965 			if (!IS_VECT_ENABLED(vecp))
966 				continue;
967 
968 			apix_enable_vector(vecp);
969 		}
970 		apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
971 	}
972 
973 	for (n = 0; n < apic_nproc; n++) {
974 		if (!apic_cpu_in_range(n) || n == cpun ||
975 		    (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
976 			continue;
977 
978 		for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
979 			vecp = xv_vector(n, i);
980 			if (!IS_VECT_ENABLED(vecp) ||
981 			    vecp->v_bound_cpuid != cpun)
982 				continue;
983 
984 			if (vecp->v_type != APIX_TYPE_MSI)
985 				(void) apix_set_cpu(vecp, cpun, &ret);
986 			else
987 				(void) apix_grp_set_cpu(vecp, cpun, &ret);
988 		}
989 	}
990 
991 	lock_clear(&apix_lock);
992 }
993 
994 /*
995  * Allocate vector for IPI
996  * type == -1 indicates it is an internal request. Do not change
997  * resv_vector for these requests.
998  */
999 static int
1000 apix_get_ipivect(int ipl, int type)
1001 {
1002 	uchar_t vector;
1003 
1004 	if ((vector = apix_alloc_ipi(ipl)) > 0) {
1005 		if (type != -1)
1006 			apic_resv_vector[ipl] = vector;
1007 		return (vector);
1008 	}
1009 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
1010 	return (-1);	/* shouldn't happen */
1011 }
1012 
1013 static int
1014 apix_get_clkvect(int ipl)
1015 {
1016 	int vector;
1017 
1018 	if ((vector = apix_get_ipivect(ipl, -1)) == -1)
1019 		return (-1);
1020 
1021 	apic_clkvect = vector - APIC_BASE_VECT;
1022 	APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1023 	    apic_clkvect));
1024 	return (vector);
1025 }
1026 
1027 static int
1028 apix_post_cpu_start()
1029 {
1030 	int cpun;
1031 	static int cpus_started = 1;
1032 
1033 	/* We know this CPU + BSP  started successfully. */
1034 	cpus_started++;
1035 
1036 	/*
1037 	 * On BSP we would have enabled X2APIC, if supported by processor,
1038 	 * in acpi_probe(), but on AP we do it here.
1039 	 *
1040 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1041 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1042 	 */
1043 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1044 	    apic_local_mode() == LOCAL_APIC) {
1045 		apic_enable_x2apic();
1046 	}
1047 
1048 	/*
1049 	 * Switch back to x2apic IPI sending method for performance when target
1050 	 * CPU has entered x2apic mode.
1051 	 */
1052 	if (apic_mode == LOCAL_X2APIC) {
1053 		apic_switch_ipi_callback(B_FALSE);
1054 	}
1055 
1056 	splx(ipltospl(LOCK_LEVEL));
1057 	apix_init_intr();
1058 
1059 	/*
1060 	 * since some systems don't enable the internal cache on the non-boot
1061 	 * cpus, so we have to enable them here
1062 	 */
1063 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1064 
1065 #ifdef	DEBUG
1066 	APIC_AV_PENDING_SET();
1067 #else
1068 	if (apic_mode == LOCAL_APIC)
1069 		APIC_AV_PENDING_SET();
1070 #endif	/* DEBUG */
1071 
1072 	/*
1073 	 * We may be booting, or resuming from suspend; aci_status will
1074 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1075 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1076 	 */
1077 	cpun = psm_get_cpu_id();
1078 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1079 
1080 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1081 
1082 	return (PSM_SUCCESS);
1083 }
1084 
1085 /*
1086  * If this module needs a periodic handler for the interrupt distribution, it
1087  * can be added here. The argument to the periodic handler is not currently
1088  * used, but is reserved for future.
1089  */
1090 static void
1091 apix_post_cyclic_setup(void *arg)
1092 {
1093 	UNREFERENCED_1PARAMETER(arg);
1094 
1095 	/* cpu_lock is held */
1096 	/* set up a periodic handler for intr redistribution */
1097 
1098 	/*
1099 	 * In peridoc mode intr redistribution processing is done in
1100 	 * apic_intr_enter during clk intr processing
1101 	 */
1102 	if (!apic_oneshot)
1103 		return;
1104 
1105 	/*
1106 	 * Register a periodical handler for the redistribution processing.
1107 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
1108 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
1109 	 */
1110 	apic_periodic_id = ddi_periodic_add(
1111 	    (void (*)(void *))apix_redistribute_compute, NULL,
1112 	    apic_redistribute_sample_interval, DDI_IPL_2);
1113 }
1114 
1115 void
1116 x2apic_update_psm()
1117 {
1118 	struct psm_ops *pops = &apix_ops;
1119 
1120 	ASSERT(pops != NULL);
1121 
1122 	/*
1123 	 * The xxx_intr_exit() sets TPR and sends back EOI. The
1124 	 * xxx_setspl() sets TPR. These two routines are not
1125 	 * needed in new design.
1126 	 *
1127 	 * pops->psm_intr_exit = x2apic_intr_exit;
1128 	 * pops->psm_setspl = x2apic_setspl;
1129 	 */
1130 	pops->psm_setspl = x2apix_setspl;
1131 	pops->psm_send_ipi = x2apic_send_ipi;
1132 
1133 	send_dirintf = pops->psm_send_ipi;
1134 
1135 	apic_mode = LOCAL_X2APIC;
1136 	apic_change_ops();
1137 }
1138 
1139 /*
1140  * This function provides external interface to the nexus for all
1141  * functionalities related to the new DDI interrupt framework.
1142  *
1143  * Input:
1144  * dip     - pointer to the dev_info structure of the requested device
1145  * hdlp    - pointer to the internal interrupt handle structure for the
1146  *	     requested interrupt
1147  * intr_op - opcode for this call
1148  * result  - pointer to the integer that will hold the result to be
1149  *	     passed back if return value is PSM_SUCCESS
1150  *
1151  * Output:
1152  * return value is either PSM_SUCCESS or PSM_FAILURE
1153  */
1154 static int
1155 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1156     psm_intr_op_t intr_op, int *result)
1157 {
1158 	int		cap;
1159 	apix_vector_t	*vecp, *newvecp;
1160 	struct intrspec *ispec, intr_spec;
1161 	processorid_t target;
1162 
1163 	ispec = &intr_spec;
1164 	ispec->intrspec_pri = hdlp->ih_pri;
1165 	ispec->intrspec_vec = hdlp->ih_inum;
1166 	ispec->intrspec_func = hdlp->ih_cb_func;
1167 
1168 	switch (intr_op) {
1169 	case PSM_INTR_OP_ALLOC_VECTORS:
1170 		switch (hdlp->ih_type) {
1171 		case DDI_INTR_TYPE_MSI:
1172 			/* allocate MSI vectors */
1173 			*result = apix_alloc_msi(dip, hdlp->ih_inum,
1174 			    hdlp->ih_scratch1,
1175 			    (int)(uintptr_t)hdlp->ih_scratch2);
1176 			break;
1177 		case DDI_INTR_TYPE_MSIX:
1178 			/* allocate MSI-X vectors */
1179 			*result = apix_alloc_msix(dip, hdlp->ih_inum,
1180 			    hdlp->ih_scratch1,
1181 			    (int)(uintptr_t)hdlp->ih_scratch2);
1182 			break;
1183 		case DDI_INTR_TYPE_FIXED:
1184 			/* allocate or share vector for fixed */
1185 			if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1186 				return (PSM_FAILURE);
1187 			}
1188 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1189 			*result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1190 			    ispec);
1191 			break;
1192 		default:
1193 			return (PSM_FAILURE);
1194 		}
1195 		break;
1196 	case PSM_INTR_OP_FREE_VECTORS:
1197 		apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1198 		    hdlp->ih_type);
1199 		break;
1200 	case PSM_INTR_OP_XLATE_VECTOR:
1201 		/*
1202 		 * Vectors are allocated by ALLOC and freed by FREE.
1203 		 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1204 		 */
1205 		*result = APIX_INVALID_VECT;
1206 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1207 		if (vecp != NULL) {
1208 			*result = APIX_VIRTVECTOR(vecp->v_cpuid,
1209 			    vecp->v_vector);
1210 			break;
1211 		}
1212 
1213 		/*
1214 		 * No vector to device mapping exists. If this is FIXED type
1215 		 * then check if this IRQ is already mapped for another device
1216 		 * then return the vector number for it (i.e. shared IRQ case).
1217 		 * Otherwise, return PSM_FAILURE.
1218 		 */
1219 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1220 			vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1221 			    ispec);
1222 			*result = (vecp == NULL) ? APIX_INVALID_VECT :
1223 			    APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1224 		}
1225 		if (*result == APIX_INVALID_VECT)
1226 			return (PSM_FAILURE);
1227 		break;
1228 	case PSM_INTR_OP_GET_PENDING:
1229 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1230 		if (vecp == NULL)
1231 			return (PSM_FAILURE);
1232 
1233 		*result = apix_get_pending(vecp);
1234 		break;
1235 	case PSM_INTR_OP_CLEAR_MASK:
1236 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1237 			return (PSM_FAILURE);
1238 
1239 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1240 		if (vecp == NULL)
1241 			return (PSM_FAILURE);
1242 
1243 		apix_intx_clear_mask(vecp->v_inum);
1244 		break;
1245 	case PSM_INTR_OP_SET_MASK:
1246 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1247 			return (PSM_FAILURE);
1248 
1249 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1250 		if (vecp == NULL)
1251 			return (PSM_FAILURE);
1252 
1253 		apix_intx_set_mask(vecp->v_inum);
1254 		break;
1255 	case PSM_INTR_OP_GET_SHARED:
1256 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1257 			return (PSM_FAILURE);
1258 
1259 		vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1260 		if (vecp == NULL)
1261 			return (PSM_FAILURE);
1262 
1263 		*result = apix_intx_get_shared(vecp->v_inum);
1264 		break;
1265 	case PSM_INTR_OP_SET_PRI:
1266 		/*
1267 		 * Called prior to adding the interrupt handler or when
1268 		 * an interrupt handler is unassigned.
1269 		 */
1270 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1271 			return (PSM_SUCCESS);
1272 
1273 		if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1274 			return (PSM_FAILURE);
1275 
1276 		break;
1277 	case PSM_INTR_OP_SET_CPU:
1278 	case PSM_INTR_OP_GRP_SET_CPU:
1279 		/*
1280 		 * The interrupt handle given here has been allocated
1281 		 * specifically for this command, and ih_private carries
1282 		 * a CPU value.
1283 		 */
1284 		*result = EINVAL;
1285 		target = (int)(intptr_t)hdlp->ih_private;
1286 		if (!apic_cpu_in_range(target)) {
1287 			DDI_INTR_IMPLDBG((CE_WARN,
1288 			    "[grp_]set_cpu: cpu out of range: %d\n", target));
1289 			return (PSM_FAILURE);
1290 		}
1291 
1292 		lock_set(&apix_lock);
1293 
1294 		vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1295 		if (!IS_VECT_ENABLED(vecp)) {
1296 			DDI_INTR_IMPLDBG((CE_WARN,
1297 			    "[grp]_set_cpu: invalid vector 0x%x\n",
1298 			    hdlp->ih_vector));
1299 			lock_clear(&apix_lock);
1300 			return (PSM_FAILURE);
1301 		}
1302 
1303 		*result = 0;
1304 
1305 		if (intr_op == PSM_INTR_OP_SET_CPU)
1306 			newvecp = apix_set_cpu(vecp, target, result);
1307 		else
1308 			newvecp = apix_grp_set_cpu(vecp, target, result);
1309 
1310 		lock_clear(&apix_lock);
1311 
1312 		if (newvecp == NULL) {
1313 			*result = EIO;
1314 			return (PSM_FAILURE);
1315 		}
1316 		newvecp->v_bound_cpuid = target;
1317 		hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1318 		    newvecp->v_vector);
1319 		break;
1320 
1321 	case PSM_INTR_OP_GET_INTR:
1322 		/*
1323 		 * The interrupt handle given here has been allocated
1324 		 * specifically for this command, and ih_private carries
1325 		 * a pointer to a apic_get_intr_t.
1326 		 */
1327 		if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1328 			return (PSM_FAILURE);
1329 		break;
1330 
1331 	case PSM_INTR_OP_CHECK_MSI:
1332 		/*
1333 		 * Check MSI/X is supported or not at APIC level and
1334 		 * masked off the MSI/X bits in hdlp->ih_type if not
1335 		 * supported before return.  If MSI/X is supported,
1336 		 * leave the ih_type unchanged and return.
1337 		 *
1338 		 * hdlp->ih_type passed in from the nexus has all the
1339 		 * interrupt types supported by the device.
1340 		 */
1341 		if (apic_support_msi == 0) {	/* uninitialized */
1342 			/*
1343 			 * if apic_support_msi is not set, call
1344 			 * apic_check_msi_support() to check whether msi
1345 			 * is supported first
1346 			 */
1347 			if (apic_check_msi_support() == PSM_SUCCESS)
1348 				apic_support_msi = 1;	/* supported */
1349 			else
1350 				apic_support_msi = -1;	/* not-supported */
1351 		}
1352 		if (apic_support_msi == 1) {
1353 			if (apic_msix_enable)
1354 				*result = hdlp->ih_type;
1355 			else
1356 				*result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1357 		} else
1358 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1359 			    DDI_INTR_TYPE_MSIX);
1360 		break;
1361 	case PSM_INTR_OP_GET_CAP:
1362 		cap = DDI_INTR_FLAG_PENDING;
1363 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1364 			cap |= DDI_INTR_FLAG_MASKABLE;
1365 		*result = cap;
1366 		break;
1367 	case PSM_INTR_OP_APIC_TYPE:
1368 		((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1369 		    apix_get_apic_type();
1370 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1371 		    APIX_IPI_MIN;
1372 		((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1373 		    apic_nproc;
1374 		hdlp->ih_ver = apic_get_apic_version();
1375 		break;
1376 	case PSM_INTR_OP_SET_CAP:
1377 	default:
1378 		return (PSM_FAILURE);
1379 	}
1380 
1381 	return (PSM_SUCCESS);
1382 }
1383 
1384 static void
1385 apix_cleanup_busy(void)
1386 {
1387 	int i, j;
1388 	apix_vector_t *vecp;
1389 
1390 	for (i = 0; i < apic_nproc; i++) {
1391 		if (!apic_cpu_in_range(i))
1392 			continue;
1393 		apic_cpus[i].aci_busy = 0;
1394 		for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1395 			if ((vecp = xv_vector(i, j)) != NULL)
1396 				vecp->v_busy = 0;
1397 		}
1398 	}
1399 }
1400 
1401 static void
1402 apix_redistribute_compute(void)
1403 {
1404 	int	i, j, max_busy;
1405 
1406 	if (!apic_enable_dynamic_migration)
1407 		return;
1408 
1409 	if (++apic_nticks == apic_sample_factor_redistribution) {
1410 		/*
1411 		 * Time to call apic_intr_redistribute().
1412 		 * reset apic_nticks. This will cause max_busy
1413 		 * to be calculated below and if it is more than
1414 		 * apic_int_busy, we will do the whole thing
1415 		 */
1416 		apic_nticks = 0;
1417 	}
1418 	max_busy = 0;
1419 	for (i = 0; i < apic_nproc; i++) {
1420 		if (!apic_cpu_in_range(i))
1421 			continue;
1422 		/*
1423 		 * Check if curipl is non zero & if ISR is in
1424 		 * progress
1425 		 */
1426 		if (((j = apic_cpus[i].aci_curipl) != 0) &&
1427 		    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1428 
1429 			int	vect;
1430 			apic_cpus[i].aci_busy++;
1431 			vect = apic_cpus[i].aci_current[j];
1432 			apixs[i]->x_vectbl[vect]->v_busy++;
1433 		}
1434 
1435 		if (!apic_nticks &&
1436 		    (apic_cpus[i].aci_busy > max_busy))
1437 			max_busy = apic_cpus[i].aci_busy;
1438 	}
1439 	if (!apic_nticks) {
1440 		if (max_busy > apic_int_busy_mark) {
1441 		/*
1442 		 * We could make the following check be
1443 		 * skipped > 1 in which case, we get a
1444 		 * redistribution at half the busy mark (due to
1445 		 * double interval). Need to be able to collect
1446 		 * more empirical data to decide if that is a
1447 		 * good strategy. Punt for now.
1448 		 */
1449 			apix_cleanup_busy();
1450 			apic_skipped_redistribute = 0;
1451 		} else
1452 			apic_skipped_redistribute++;
1453 	}
1454 }
1455 
1456 /*
1457  * intr_ops() service routines
1458  */
1459 
1460 static int
1461 apix_get_pending(apix_vector_t *vecp)
1462 {
1463 	int bit, index, irr, pending;
1464 
1465 	/* need to get on the bound cpu */
1466 	mutex_enter(&cpu_lock);
1467 	affinity_set(vecp->v_cpuid);
1468 
1469 	index = vecp->v_vector / 32;
1470 	bit = vecp->v_vector % 32;
1471 	irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1472 
1473 	affinity_clear();
1474 	mutex_exit(&cpu_lock);
1475 
1476 	pending = (irr & (1 << bit)) ? 1 : 0;
1477 	if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1478 		pending = apix_intx_get_pending(vecp->v_inum);
1479 
1480 	return (pending);
1481 }
1482 
1483 static apix_vector_t *
1484 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1485 {
1486 	apix_vector_t *vecp;
1487 	processorid_t cpuid;
1488 	int32_t virt_vec = 0;
1489 
1490 	switch (flags & PSMGI_INTRBY_FLAGS) {
1491 	case PSMGI_INTRBY_IRQ:
1492 		return (apix_intx_get_vector(hdlp->ih_vector));
1493 	case PSMGI_INTRBY_VEC:
1494 		virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1495 
1496 		cpuid = APIX_VIRTVEC_CPU(virt_vec);
1497 		if (!apic_cpu_in_range(cpuid))
1498 			return (NULL);
1499 
1500 		vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1501 		break;
1502 	case PSMGI_INTRBY_DEFAULT:
1503 		vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1504 		    hdlp->ih_type);
1505 		break;
1506 	default:
1507 		return (NULL);
1508 	}
1509 
1510 	return (vecp);
1511 }
1512 
1513 static int
1514 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1515     apic_get_intr_t *intr_params_p)
1516 {
1517 	apix_vector_t *vecp;
1518 	struct autovec *av_dev;
1519 	int i;
1520 
1521 	vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1522 	if (IS_VECT_FREE(vecp)) {
1523 		intr_params_p->avgi_num_devs = 0;
1524 		intr_params_p->avgi_cpu_id = 0;
1525 		intr_params_p->avgi_req_flags = 0;
1526 		return (PSM_SUCCESS);
1527 	}
1528 
1529 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1530 		intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1531 
1532 		/* Return user bound info for intrd. */
1533 		if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1534 			intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1535 			intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1536 		}
1537 	}
1538 
1539 	if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1540 		intr_params_p->avgi_vector = vecp->v_vector;
1541 
1542 	if (intr_params_p->avgi_req_flags &
1543 	    (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1544 		/* Get number of devices from apic_irq table shared field. */
1545 		intr_params_p->avgi_num_devs = vecp->v_share;
1546 
1547 	if (intr_params_p->avgi_req_flags &  PSMGI_REQ_GET_DEVS) {
1548 
1549 		intr_params_p->avgi_req_flags  |= PSMGI_REQ_NUM_DEVS;
1550 
1551 		/* Some devices have NULL dip.  Don't count these. */
1552 		if (intr_params_p->avgi_num_devs > 0) {
1553 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1554 			    av_dev = av_dev->av_link) {
1555 				if (av_dev->av_vector && av_dev->av_dip)
1556 					i++;
1557 			}
1558 			intr_params_p->avgi_num_devs =
1559 			    (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1560 		}
1561 
1562 		/* There are no viable dips to return. */
1563 		if (intr_params_p->avgi_num_devs == 0) {
1564 			intr_params_p->avgi_dip_list = NULL;
1565 
1566 		} else {	/* Return list of dips */
1567 
1568 			/* Allocate space in array for that number of devs. */
1569 			intr_params_p->avgi_dip_list = kmem_zalloc(
1570 			    intr_params_p->avgi_num_devs *
1571 			    sizeof (dev_info_t *),
1572 			    KM_NOSLEEP);
1573 			if (intr_params_p->avgi_dip_list == NULL) {
1574 				DDI_INTR_IMPLDBG((CE_WARN,
1575 				    "apix_get_vector_intr_info: no memory"));
1576 				return (PSM_FAILURE);
1577 			}
1578 
1579 			/*
1580 			 * Loop through the device list of the autovec table
1581 			 * filling in the dip array.
1582 			 *
1583 			 * Note that the autovect table may have some special
1584 			 * entries which contain NULL dips.  These will be
1585 			 * ignored.
1586 			 */
1587 			for (i = 0, av_dev = vecp->v_autovect; av_dev;
1588 			    av_dev = av_dev->av_link) {
1589 				if (av_dev->av_vector && av_dev->av_dip)
1590 					intr_params_p->avgi_dip_list[i++] =
1591 					    av_dev->av_dip;
1592 			}
1593 		}
1594 	}
1595 
1596 	return (PSM_SUCCESS);
1597 }
1598 
1599 static char *
1600 apix_get_apic_type(void)
1601 {
1602 	return (apix_psm_info.p_mach_idstring);
1603 }
1604 
1605 apix_vector_t *
1606 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1607 {
1608 	apix_vector_t *newp = NULL;
1609 	dev_info_t *dip;
1610 	int inum, cap_ptr;
1611 	ddi_acc_handle_t handle;
1612 	ddi_intr_msix_t *msix_p = NULL;
1613 	ushort_t msix_ctrl;
1614 	uintptr_t off;
1615 	uint32_t mask;
1616 
1617 	ASSERT(LOCK_HELD(&apix_lock));
1618 	*result = ENXIO;
1619 
1620 	/* Fail if this is an MSI intr and is part of a group. */
1621 	if (vecp->v_type == APIX_TYPE_MSI) {
1622 		if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1623 			return (NULL);
1624 		else
1625 			return (apix_grp_set_cpu(vecp, new_cpu, result));
1626 	}
1627 
1628 	/*
1629 	 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1630 	 */
1631 	if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1632 		if ((dip = APIX_GET_DIP(vecp)) == NULL)
1633 			return (NULL);
1634 		inum = vecp->v_devp->dv_inum;
1635 
1636 		handle = i_ddi_get_pci_config_handle(dip);
1637 		cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1638 		msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1639 		if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1640 			/*
1641 			 * Function is not masked, then mask "inum"th
1642 			 * entry in the MSI-X table
1643 			 */
1644 			msix_p = i_ddi_get_msix(dip);
1645 			off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1646 			    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1647 			mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1648 			ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1649 			    mask | 1);
1650 		}
1651 	}
1652 
1653 	*result = 0;
1654 	if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1655 		*result = EIO;
1656 
1657 	/* Restore mask bit */
1658 	if (msix_p != NULL)
1659 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1660 
1661 	return (newp);
1662 }
1663 
1664 /*
1665  * Set cpu for MSIs
1666  */
1667 apix_vector_t *
1668 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1669 {
1670 	apix_vector_t *newp, *vp;
1671 	uint32_t orig_cpu = vecp->v_cpuid;
1672 	int orig_vect = vecp->v_vector;
1673 	int i, num_vectors, cap_ptr, msi_mask_off;
1674 	uint32_t msi_pvm;
1675 	ushort_t msi_ctrl;
1676 	ddi_acc_handle_t handle;
1677 	dev_info_t *dip;
1678 
1679 	APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1680 	    " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1681 
1682 	ASSERT(LOCK_HELD(&apix_lock));
1683 
1684 	*result = ENXIO;
1685 
1686 	if (vecp->v_type != APIX_TYPE_MSI) {
1687 		DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1688 		return (NULL);
1689 	}
1690 
1691 	if ((dip = APIX_GET_DIP(vecp)) == NULL)
1692 		return (NULL);
1693 
1694 	num_vectors = i_ddi_intr_get_current_nintrs(dip);
1695 	if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1696 		APIC_VERBOSE(INTR, (CE_WARN,
1697 		    "set_grp: base vec not part of a grp or not aligned: "
1698 		    "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1699 		return (NULL);
1700 	}
1701 
1702 	if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1703 		return (NULL);
1704 
1705 	*result = EIO;
1706 	for (i = 1; i < num_vectors; i++) {
1707 		if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1708 			return (NULL);
1709 #ifdef DEBUG
1710 		/*
1711 		 * Sanity check: CPU and dip is the same for all entries.
1712 		 * May be called when first msi to be enabled, at this time
1713 		 * add_avintr() is not called for other msi
1714 		 */
1715 		if ((vp->v_share != 0) &&
1716 		    ((APIX_GET_DIP(vp) != dip) ||
1717 		    (vp->v_cpuid != vecp->v_cpuid))) {
1718 			APIC_VERBOSE(INTR, (CE_WARN,
1719 			    "set_grp: cpu or dip for vec 0x%x difft than for "
1720 			    "vec 0x%x\n", orig_vect, orig_vect + i));
1721 			APIC_VERBOSE(INTR, (CE_WARN,
1722 			    "  cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1723 			    vp->v_cpuid, (void *)dip,
1724 			    (void *)APIX_GET_DIP(vp)));
1725 			return (NULL);
1726 		}
1727 #endif /* DEBUG */
1728 	}
1729 
1730 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1731 	handle = i_ddi_get_pci_config_handle(dip);
1732 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1733 
1734 	/* MSI Per vector masking is supported. */
1735 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1736 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
1737 			msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1738 		else
1739 			msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1740 		msi_pvm = pci_config_get32(handle, msi_mask_off);
1741 		pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1742 		APIC_VERBOSE(INTR, (CE_CONT,
1743 		    "set_grp: pvm supported.  Mask set to 0x%x\n",
1744 		    pci_config_get32(handle, msi_mask_off)));
1745 	}
1746 
1747 	if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1748 		*result = 0;
1749 
1750 	/* Reenable vectors if per vector masking is supported. */
1751 	if (msi_ctrl & PCI_MSI_PVM_MASK) {
1752 		pci_config_put32(handle, msi_mask_off, msi_pvm);
1753 		APIC_VERBOSE(INTR, (CE_CONT,
1754 		    "set_grp: pvm supported.  Mask restored to 0x%x\n",
1755 		    pci_config_get32(handle, msi_mask_off)));
1756 	}
1757 
1758 	return (newp);
1759 }
1760 
1761 void
1762 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1763 {
1764 	apic_irq_t *irqp;
1765 
1766 	mutex_enter(&airq_mutex);
1767 	irqp = apic_irq_table[irqno];
1768 	irqp->airq_cpu = cpuid;
1769 	irqp->airq_vector = vector;
1770 	apic_record_rdt_entry(irqp, irqno);
1771 	mutex_exit(&airq_mutex);
1772 }
1773 
1774 apix_vector_t *
1775 apix_intx_get_vector(int irqno)
1776 {
1777 	apic_irq_t *irqp;
1778 	uint32_t cpuid;
1779 	uchar_t vector;
1780 
1781 	mutex_enter(&airq_mutex);
1782 	irqp = apic_irq_table[irqno & 0xff];
1783 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1784 		mutex_exit(&airq_mutex);
1785 		return (NULL);
1786 	}
1787 	cpuid = irqp->airq_cpu;
1788 	vector = irqp->airq_vector;
1789 	mutex_exit(&airq_mutex);
1790 
1791 	return (xv_vector(cpuid, vector));
1792 }
1793 
1794 /*
1795  * Must called with interrupts disabled and apic_ioapic_lock held
1796  */
1797 void
1798 apix_intx_enable(int irqno)
1799 {
1800 	uchar_t ioapicindex, intin;
1801 	apic_irq_t *irqp = apic_irq_table[irqno];
1802 	ioapic_rdt_t irdt;
1803 	apic_cpus_info_t *cpu_infop;
1804 	apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1805 
1806 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1807 
1808 	ioapicindex = irqp->airq_ioapicindex;
1809 	intin = irqp->airq_intin_no;
1810 	cpu_infop =  &apic_cpus[irqp->airq_cpu];
1811 
1812 	irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1813 	irdt.ir_hi = cpu_infop->aci_local_id;
1814 
1815 	apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1816 	    vecp->v_type, 1, ioapicindex);
1817 	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1818 	    (void *)&irdt, vecp->v_type, 1);
1819 	apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1820 
1821 	/* write RDT entry high dword - destination */
1822 	WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1823 	    irdt.ir_hi);
1824 
1825 	/* Write the vector, trigger, and polarity portion of the RDT */
1826 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1827 
1828 	vecp->v_state = APIX_STATE_ENABLED;
1829 
1830 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1831 	    " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1832 	    ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1833 }
1834 
1835 /*
1836  * Must called with interrupts disabled and apic_ioapic_lock held
1837  */
1838 void
1839 apix_intx_disable(int irqno)
1840 {
1841 	apic_irq_t *irqp = apic_irq_table[irqno];
1842 	int ioapicindex, intin;
1843 
1844 	ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1845 	/*
1846 	 * The assumption here is that this is safe, even for
1847 	 * systems with IOAPICs that suffer from the hardware
1848 	 * erratum because all devices have been quiesced before
1849 	 * they unregister their interrupt handlers.  If that
1850 	 * assumption turns out to be false, this mask operation
1851 	 * can induce the same erratum result we're trying to
1852 	 * avoid.
1853 	 */
1854 	ioapicindex = irqp->airq_ioapicindex;
1855 	intin = irqp->airq_intin_no;
1856 	ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1857 
1858 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1859 	    " intin 0x%x\n", ioapicindex, intin));
1860 }
1861 
1862 void
1863 apix_intx_free(int irqno)
1864 {
1865 	apic_irq_t *irqp;
1866 
1867 	mutex_enter(&airq_mutex);
1868 	irqp = apic_irq_table[irqno];
1869 
1870 	if (IS_IRQ_FREE(irqp)) {
1871 		mutex_exit(&airq_mutex);
1872 		return;
1873 	}
1874 
1875 	irqp->airq_mps_intr_index = FREE_INDEX;
1876 	irqp->airq_cpu = IRQ_UNINIT;
1877 	irqp->airq_vector = APIX_INVALID_VECT;
1878 	mutex_exit(&airq_mutex);
1879 }
1880 
1881 #ifdef DEBUG
1882 int apix_intr_deliver_timeouts = 0;
1883 int apix_intr_rirr_timeouts = 0;
1884 int apix_intr_rirr_reset_failure = 0;
1885 #endif
1886 int apix_max_reps_irr_pending = 10;
1887 
1888 #define	GET_RDT_BITS(ioapic, intin, bits)	\
1889 	(READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1890 #define	APIX_CHECK_IRR_DELAY	drv_usectohz(5000)
1891 
1892 int
1893 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1894 {
1895 	apic_irq_t *irqp = apic_irq_table[irqno];
1896 	ulong_t iflag;
1897 	int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1898 
1899 	ASSERT(irqp != NULL);
1900 
1901 	iflag = intr_clear();
1902 	lock_set(&apic_ioapic_lock);
1903 
1904 	ioapic_ix = irqp->airq_ioapicindex;
1905 	intin_no = irqp->airq_intin_no;
1906 	level = apic_level_intr[irqno];
1907 
1908 	/*
1909 	 * Wait for the delivery status bit to be cleared. This should
1910 	 * be a very small amount of time.
1911 	 */
1912 	repeats = 0;
1913 	do {
1914 		repeats++;
1915 
1916 		for (waited = 0; waited < apic_max_reps_clear_pending;
1917 		    waited++) {
1918 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1919 				break;
1920 		}
1921 		if (!level)
1922 			break;
1923 
1924 		/*
1925 		 * Mask the RDT entry for level-triggered interrupts.
1926 		 */
1927 		irqp->airq_rdt_entry |= AV_MASK;
1928 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1929 		    intin_no);
1930 		if ((masked = (rdt_entry & AV_MASK)) == 0) {
1931 			/* Mask it */
1932 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1933 			    AV_MASK | rdt_entry);
1934 		}
1935 
1936 		/*
1937 		 * If there was a race and an interrupt was injected
1938 		 * just before we masked, check for that case here.
1939 		 * Then, unmask the RDT entry and try again.  If we're
1940 		 * on our last try, don't unmask (because we want the
1941 		 * RDT entry to remain masked for the rest of the
1942 		 * function).
1943 		 */
1944 		rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1945 		    intin_no);
1946 		if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1947 		    (repeats < apic_max_reps_clear_pending)) {
1948 			/* Unmask it */
1949 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1950 			    intin_no, rdt_entry & ~AV_MASK);
1951 			irqp->airq_rdt_entry &= ~AV_MASK;
1952 		}
1953 	} while ((rdt_entry & AV_PENDING) &&
1954 	    (repeats < apic_max_reps_clear_pending));
1955 
1956 #ifdef DEBUG
1957 	if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1958 		apix_intr_deliver_timeouts++;
1959 #endif
1960 
1961 	if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1962 		goto done;
1963 
1964 	/*
1965 	 * wait for remote IRR to be cleared for level-triggered
1966 	 * interrupts
1967 	 */
1968 	repeats = 0;
1969 	do {
1970 		repeats++;
1971 
1972 		for (waited = 0; waited < apic_max_reps_clear_pending;
1973 		    waited++) {
1974 			if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1975 			    == 0)
1976 				break;
1977 		}
1978 
1979 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1980 			lock_clear(&apic_ioapic_lock);
1981 			intr_restore(iflag);
1982 
1983 			delay(APIX_CHECK_IRR_DELAY);
1984 
1985 			iflag = intr_clear();
1986 			lock_set(&apic_ioapic_lock);
1987 		}
1988 	} while (repeats < apix_max_reps_irr_pending);
1989 
1990 	if (repeats >= apix_max_reps_irr_pending) {
1991 #ifdef DEBUG
1992 		apix_intr_rirr_timeouts++;
1993 #endif
1994 
1995 		/*
1996 		 * If we waited and the Remote IRR bit is still not cleared,
1997 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1998 		 * times for this interrupt, try the last-ditch workaround:
1999 		 */
2000 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2001 			/*
2002 			 * Trying to clear the bit through normal
2003 			 * channels has failed.  So as a last-ditch
2004 			 * effort, try to set the trigger mode to
2005 			 * edge, then to level.  This has been
2006 			 * observed to work on many systems.
2007 			 */
2008 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2009 			    intin_no,
2010 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2011 			    intin_no) & ~AV_LEVEL);
2012 			WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2013 			    intin_no,
2014 			    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2015 			    intin_no) | AV_LEVEL);
2016 		}
2017 
2018 		if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2019 #ifdef DEBUG
2020 			apix_intr_rirr_reset_failure++;
2021 #endif
2022 			lock_clear(&apic_ioapic_lock);
2023 			intr_restore(iflag);
2024 			prom_printf("apix: Remote IRR still "
2025 			    "not clear for IOAPIC %d intin %d.\n"
2026 			    "\tInterrupts to this pin may cease "
2027 			    "functioning.\n", ioapic_ix, intin_no);
2028 			return (1);	/* return failure */
2029 		}
2030 	}
2031 
2032 done:
2033 	/* change apic_irq_table */
2034 	lock_clear(&apic_ioapic_lock);
2035 	intr_restore(iflag);
2036 	apix_intx_set_vector(irqno, cpuid, vector);
2037 	iflag = intr_clear();
2038 	lock_set(&apic_ioapic_lock);
2039 
2040 	/* reprogramme IO-APIC RDT entry */
2041 	apix_intx_enable(irqno);
2042 
2043 	lock_clear(&apic_ioapic_lock);
2044 	intr_restore(iflag);
2045 
2046 	return (0);
2047 }
2048 
2049 static int
2050 apix_intx_get_pending(int irqno)
2051 {
2052 	apic_irq_t *irqp;
2053 	int intin, ioapicindex, pending;
2054 	ulong_t iflag;
2055 
2056 	mutex_enter(&airq_mutex);
2057 	irqp = apic_irq_table[irqno];
2058 	if (IS_IRQ_FREE(irqp)) {
2059 		mutex_exit(&airq_mutex);
2060 		return (0);
2061 	}
2062 
2063 	/* check IO-APIC delivery status */
2064 	intin = irqp->airq_intin_no;
2065 	ioapicindex = irqp->airq_ioapicindex;
2066 	mutex_exit(&airq_mutex);
2067 
2068 	iflag = intr_clear();
2069 	lock_set(&apic_ioapic_lock);
2070 
2071 	pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2072 	    AV_PENDING) ? 1 : 0;
2073 
2074 	lock_clear(&apic_ioapic_lock);
2075 	intr_restore(iflag);
2076 
2077 	return (pending);
2078 }
2079 
2080 static void
2081 apix_intx_set_mask(int irqno)
2082 {
2083 	int intin, ioapixindex, rdt_entry;
2084 	ulong_t iflag;
2085 	apic_irq_t *irqp;
2086 
2087 	mutex_enter(&airq_mutex);
2088 	irqp = apic_irq_table[irqno];
2089 
2090 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2091 
2092 	intin = irqp->airq_intin_no;
2093 	ioapixindex = irqp->airq_ioapicindex;
2094 	mutex_exit(&airq_mutex);
2095 
2096 	iflag = intr_clear();
2097 	lock_set(&apic_ioapic_lock);
2098 
2099 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2100 
2101 	/* clear mask */
2102 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2103 	    (AV_MASK | rdt_entry));
2104 
2105 	lock_clear(&apic_ioapic_lock);
2106 	intr_restore(iflag);
2107 }
2108 
2109 static void
2110 apix_intx_clear_mask(int irqno)
2111 {
2112 	int intin, ioapixindex, rdt_entry;
2113 	ulong_t iflag;
2114 	apic_irq_t *irqp;
2115 
2116 	mutex_enter(&airq_mutex);
2117 	irqp = apic_irq_table[irqno];
2118 
2119 	ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2120 
2121 	intin = irqp->airq_intin_no;
2122 	ioapixindex = irqp->airq_ioapicindex;
2123 	mutex_exit(&airq_mutex);
2124 
2125 	iflag = intr_clear();
2126 	lock_set(&apic_ioapic_lock);
2127 
2128 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2129 
2130 	/* clear mask */
2131 	WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2132 	    ((~AV_MASK) & rdt_entry));
2133 
2134 	lock_clear(&apic_ioapic_lock);
2135 	intr_restore(iflag);
2136 }
2137 
2138 /*
2139  * For level-triggered interrupt, mask the IRQ line. Mask means
2140  * new interrupts will not be delivered. The interrupt already
2141  * accepted by a local APIC is not affected
2142  */
2143 void
2144 apix_level_intr_pre_eoi(int irq)
2145 {
2146 	apic_irq_t *irqp = apic_irq_table[irq];
2147 	int apic_ix, intin_ix;
2148 
2149 	if (irqp == NULL)
2150 		return;
2151 
2152 	ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2153 
2154 	lock_set(&apic_ioapic_lock);
2155 
2156 	intin_ix = irqp->airq_intin_no;
2157 	apic_ix = irqp->airq_ioapicindex;
2158 
2159 	if (irqp->airq_cpu != CPU->cpu_id) {
2160 		if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2161 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2162 		lock_clear(&apic_ioapic_lock);
2163 		return;
2164 	}
2165 
2166 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2167 		/*
2168 		 * This is a IOxAPIC and there is EOI register:
2169 		 * 	Change the vector to reserved unused vector, so that
2170 		 * 	the EOI	from Local APIC won't clear the Remote IRR for
2171 		 * 	this level trigger interrupt. Instead, we'll manually
2172 		 * 	clear it in apix_post_hardint() after ISR handling.
2173 		 */
2174 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2175 		    (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2176 	} else {
2177 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2178 		    AV_MASK | irqp->airq_rdt_entry);
2179 	}
2180 
2181 	lock_clear(&apic_ioapic_lock);
2182 }
2183 
2184 /*
2185  * For level-triggered interrupt, unmask the IRQ line
2186  * or restore the original vector number.
2187  */
2188 void
2189 apix_level_intr_post_dispatch(int irq)
2190 {
2191 	apic_irq_t *irqp = apic_irq_table[irq];
2192 	int apic_ix, intin_ix;
2193 
2194 	if (irqp == NULL)
2195 		return;
2196 
2197 	lock_set(&apic_ioapic_lock);
2198 
2199 	intin_ix = irqp->airq_intin_no;
2200 	apic_ix = irqp->airq_ioapicindex;
2201 
2202 	if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2203 		/*
2204 		 * Already sent EOI back to Local APIC.
2205 		 * Send EOI to IO-APIC
2206 		 */
2207 		ioapic_write_eoi(apic_ix, irqp->airq_vector);
2208 	} else {
2209 		/* clear the mask or restore the vector */
2210 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2211 		    irqp->airq_rdt_entry);
2212 
2213 		/* send EOI to IOxAPIC */
2214 		if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2215 			ioapic_write_eoi(apic_ix, irqp->airq_vector);
2216 	}
2217 
2218 	lock_clear(&apic_ioapic_lock);
2219 }
2220 
2221 static int
2222 apix_intx_get_shared(int irqno)
2223 {
2224 	apic_irq_t *irqp;
2225 	int share;
2226 
2227 	mutex_enter(&airq_mutex);
2228 	irqp = apic_irq_table[irqno];
2229 	if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2230 		mutex_exit(&airq_mutex);
2231 		return (0);
2232 	}
2233 	share = irqp->airq_share;
2234 	mutex_exit(&airq_mutex);
2235 
2236 	return (share);
2237 }
2238 
2239 static void
2240 apix_intx_set_shared(int irqno, int delta)
2241 {
2242 	apic_irq_t *irqp;
2243 
2244 	mutex_enter(&airq_mutex);
2245 	irqp = apic_irq_table[irqno];
2246 	if (IS_IRQ_FREE(irqp)) {
2247 		mutex_exit(&airq_mutex);
2248 		return;
2249 	}
2250 	irqp->airq_share += delta;
2251 	mutex_exit(&airq_mutex);
2252 }
2253 
2254 /*
2255  * Setup IRQ table. Return IRQ no or -1 on failure
2256  */
2257 static int
2258 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2259     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2260 {
2261 	int origirq = ispec->intrspec_vec;
2262 	int newirq;
2263 	short intr_index;
2264 	uchar_t ipin, ioapic, ioapicindex;
2265 	apic_irq_t *irqp;
2266 
2267 	UNREFERENCED_1PARAMETER(inum);
2268 
2269 	if (intrp != NULL) {
2270 		intr_index = (short)(intrp - apic_io_intrp);
2271 		ioapic = intrp->intr_destid;
2272 		ipin = intrp->intr_destintin;
2273 
2274 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
2275 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2276 			if (apic_io_id[ioapicindex] == ioapic)
2277 				break;
2278 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2279 		    (ioapic == INTR_ALL_APIC));
2280 
2281 		/* check whether this intin# has been used by another irqno */
2282 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2283 			return (newirq);
2284 
2285 	} else if (iflagp != NULL) {	/* ACPI */
2286 		intr_index = ACPI_INDEX;
2287 		ioapicindex = acpi_find_ioapic(irqno);
2288 		ASSERT(ioapicindex != 0xFF);
2289 		ioapic = apic_io_id[ioapicindex];
2290 		ipin = irqno - apic_io_vectbase[ioapicindex];
2291 
2292 		if (apic_irq_table[irqno] &&
2293 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2294 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2295 			    apic_irq_table[irqno]->airq_ioapicindex ==
2296 			    ioapicindex);
2297 			return (irqno);
2298 		}
2299 
2300 	} else {	/* default configuration */
2301 		intr_index = DEFAULT_INDEX;
2302 		ioapicindex = 0;
2303 		ioapic = apic_io_id[ioapicindex];
2304 		ipin = (uchar_t)irqno;
2305 	}
2306 
2307 	/* allocate a new IRQ no */
2308 	if ((irqp = apic_irq_table[irqno]) == NULL) {
2309 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2310 		apic_irq_table[irqno] = irqp;
2311 	} else {
2312 		if (irqp->airq_mps_intr_index != FREE_INDEX) {
2313 			newirq = apic_allocate_irq(apic_first_avail_irq);
2314 			if (newirq == -1) {
2315 				return (-1);
2316 			}
2317 			irqno = newirq;
2318 			irqp = apic_irq_table[irqno];
2319 			ASSERT(irqp != NULL);
2320 		}
2321 	}
2322 	apic_max_device_irq = max(irqno, apic_max_device_irq);
2323 	apic_min_device_irq = min(irqno, apic_min_device_irq);
2324 
2325 	irqp->airq_mps_intr_index = intr_index;
2326 	irqp->airq_ioapicindex = ioapicindex;
2327 	irqp->airq_intin_no = ipin;
2328 	irqp->airq_dip = dip;
2329 	irqp->airq_origirq = (uchar_t)origirq;
2330 	if (iflagp != NULL)
2331 		irqp->airq_iflag = *iflagp;
2332 	irqp->airq_cpu = IRQ_UNINIT;
2333 	irqp->airq_vector = 0;
2334 
2335 	return (irqno);
2336 }
2337 
2338 /*
2339  * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2340  */
2341 static int
2342 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2343     struct intrspec *ispec)
2344 {
2345 	int irqno = ispec->intrspec_vec;
2346 	int newirq, i;
2347 	iflag_t intr_flag;
2348 	ACPI_SUBTABLE_HEADER	*hp;
2349 	ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2350 	struct apic_io_intr *intrp;
2351 
2352 	if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2353 		int busid;
2354 
2355 		if (bustype == 0)
2356 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2357 
2358 		/* loop checking BUS_ISA/BUS_EISA */
2359 		for (i = 0; i < 2; i++) {
2360 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
2361 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2362 			    != NULL)) {
2363 				return (apix_intx_setup(dip, inum, irqno,
2364 				    intrp, ispec, NULL));
2365 			}
2366 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2367 		}
2368 
2369 		/* fall back to default configuration */
2370 		return (-1);
2371 	}
2372 
2373 	/* search iso entries first */
2374 	if (acpi_iso_cnt != 0) {
2375 		hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2376 		i = 0;
2377 		while (i < acpi_iso_cnt) {
2378 			if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2379 				isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2380 				if (isop->Bus == 0 &&
2381 				    isop->SourceIrq == irqno) {
2382 					newirq = isop->GlobalIrq;
2383 					intr_flag.intr_po = isop->IntiFlags &
2384 					    ACPI_MADT_POLARITY_MASK;
2385 					intr_flag.intr_el = (isop->IntiFlags &
2386 					    ACPI_MADT_TRIGGER_MASK) >> 2;
2387 					intr_flag.bustype = BUS_ISA;
2388 
2389 					return (apix_intx_setup(dip, inum,
2390 					    newirq, NULL, ispec, &intr_flag));
2391 				}
2392 				i++;
2393 			}
2394 			hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2395 			    hp->Length);
2396 		}
2397 	}
2398 	intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2399 	intr_flag.intr_el = INTR_EL_EDGE;
2400 	intr_flag.bustype = BUS_ISA;
2401 	return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2402 }
2403 
2404 
2405 /*
2406  * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2407  */
2408 static int
2409 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2410     struct intrspec *ispec)
2411 {
2412 	int busid, devid, pci_irq;
2413 	ddi_acc_handle_t cfg_handle;
2414 	uchar_t ipin;
2415 	iflag_t intr_flag;
2416 	struct apic_io_intr *intrp;
2417 
2418 	if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2419 		return (-1);
2420 
2421 	if (busid == 0 && apic_pci_bus_total == 1)
2422 		busid = (int)apic_single_pci_busid;
2423 
2424 	if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2425 		return (-1);
2426 	ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2427 	pci_config_teardown(&cfg_handle);
2428 
2429 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {	/* ACPI */
2430 		if (apic_acpi_translate_pci_irq(dip, busid, devid,
2431 		    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2432 			return (-1);
2433 
2434 		intr_flag.bustype = (uchar_t)bustype;
2435 		return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2436 		    &intr_flag));
2437 	}
2438 
2439 	/* MP configuration table */
2440 	pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2441 	if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2442 		pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2443 		if (pci_irq == -1)
2444 			return (-1);
2445 	}
2446 
2447 	return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2448 }
2449 
2450 /*
2451  * Translate and return IRQ no
2452  */
2453 static int
2454 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2455 {
2456 	int newirq, irqno = ispec->intrspec_vec;
2457 	int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2458 	int bustype = 0, dev_len;
2459 	char dev_type[16];
2460 
2461 	if (apic_defconf) {
2462 		mutex_enter(&airq_mutex);
2463 		goto defconf;
2464 	}
2465 
2466 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2467 		mutex_enter(&airq_mutex);
2468 		goto nonpci;
2469 	}
2470 
2471 	/*
2472 	 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2473 	 * to avoid extra buffer allocation.
2474 	 */
2475 	dev_len = sizeof (dev_type);
2476 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2477 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2478 	    &dev_len) == DDI_PROP_SUCCESS) {
2479 		if ((strcmp(dev_type, "pci") == 0) ||
2480 		    (strcmp(dev_type, "pciex") == 0))
2481 			parent_is_pci_or_pciex = 1;
2482 	}
2483 
2484 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2485 	    DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2486 	    &dev_len) == DDI_PROP_SUCCESS) {
2487 		if (strstr(dev_type, "pciex"))
2488 			child_is_pciex = 1;
2489 	}
2490 
2491 	mutex_enter(&airq_mutex);
2492 
2493 	if (parent_is_pci_or_pciex) {
2494 		bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2495 		newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2496 		if (newirq != -1)
2497 			goto done;
2498 		bustype = 0;
2499 	} else if (strcmp(dev_type, "isa") == 0)
2500 		bustype = BUS_ISA;
2501 	else if (strcmp(dev_type, "eisa") == 0)
2502 		bustype = BUS_EISA;
2503 
2504 nonpci:
2505 	newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2506 	if (newirq != -1)
2507 		goto done;
2508 
2509 defconf:
2510 	newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2511 	if (newirq == -1) {
2512 		mutex_exit(&airq_mutex);
2513 		return (-1);
2514 	}
2515 done:
2516 	ASSERT(apic_irq_table[newirq]);
2517 	mutex_exit(&airq_mutex);
2518 	return (newirq);
2519 }
2520 
2521 static int
2522 apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2523 {
2524 	int irqno;
2525 	apix_vector_t *vecp;
2526 
2527 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2528 		return (0);
2529 
2530 	if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2531 		return (0);
2532 
2533 	DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2534 	    "irqno=0x%x cpuid=%d vector=0x%x\n",
2535 	    (void *)dip, ddi_driver_name(dip), irqno,
2536 	    vecp->v_cpuid, vecp->v_vector));
2537 
2538 	return (1);
2539 }
2540 
2541 /*
2542  * Return the vector number if the translated IRQ for this device
2543  * has a vector mapping setup. If no IRQ setup exists or no vector is
2544  * allocated to it then return 0.
2545  */
2546 static apix_vector_t *
2547 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2548 {
2549 	int irqno;
2550 	apix_vector_t *vecp;
2551 
2552 	/* get the IRQ number */
2553 	if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2554 		return (NULL);
2555 
2556 	/* get the vector number if a vector is allocated to this irqno */
2557 	vecp = apix_intx_get_vector(irqno);
2558 
2559 	return (vecp);
2560 }
2561 
2562 /* stub function */
2563 int
2564 apix_loaded(void)
2565 {
2566 	return (apix_is_enabled);
2567 }
2568