xref: /titanic_52/usr/src/uts/i86pc/io/pcplusmp/apic.c (revision 03831d35f7499c87d51205817c93e9a8d42c4bae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
30  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
31  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
32  * PSMI 1.5 extensions are supported in Solaris Nevada.
33  */
34 #define	PSMI_1_5
35 
36 #include <sys/processor.h>
37 #include <sys/time.h>
38 #include <sys/psm.h>
39 #include <sys/smp_impldefs.h>
40 #include <sys/cram.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/psm_common.h>
44 #include "apic.h"
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/archsystm.h>
57 #include <sys/trap.h>
58 #include <sys/machsystm.h>
59 #include <sys/cpuvar.h>
60 #include <sys/rm_platter.h>
61 #include <sys/privregs.h>
62 #include <sys/cyclic.h>
63 #include <sys/note.h>
64 #include <sys/pci_intr_lib.h>
65 
66 /*
67  *	Local Function Prototypes
68  */
69 static void apic_init_intr();
70 static void apic_ret();
71 static int apic_handle_defconf();
72 static int apic_parse_mpct(caddr_t mpct, int bypass);
73 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size);
74 static int apic_checksum(caddr_t bptr, int len);
75 static int get_apic_cmd1();
76 static int get_apic_pri();
77 static int apic_find_bus_type(char *bus);
78 static int apic_find_bus(int busid);
79 static int apic_find_bus_id(int bustype);
80 static struct apic_io_intr *apic_find_io_intr(int irqno);
81 int apic_allocate_irq(int irq);
82 static int apic_find_free_irq(int start, int end);
83 static uchar_t apic_allocate_vector(int ipl, int irq, int pri);
84 static void apic_modify_vector(uchar_t vector, int irq);
85 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector);
86 static uchar_t apic_xlate_vector(uchar_t oldvector);
87 static void apic_xlate_vector_free_timeout_handler(void *arg);
88 static void apic_free_vector(uchar_t vector);
89 static void apic_reprogram_timeout_handler(void *arg);
90 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu,
91     int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq);
92 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq);
93 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq);
94 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq);
95 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid);
96 static int apic_find_intin(uchar_t ioapic, uchar_t intin);
97 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno,
98     int child_ipin, struct apic_io_intr **intrp);
99 static int apic_setup_irq_table(dev_info_t *dip, int irqno,
100     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp,
101     int type);
102 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl,
103     iflag_t *intr_flagp);
104 static void apic_nmi_intr(caddr_t arg);
105 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid,
106     uchar_t intin);
107 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock,
108     int when);
109 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe);
110 static void apic_intr_redistribute();
111 static void apic_cleanup_busy();
112 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp);
113 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type);
114 
115 /* ACPI support routines */
116 static int acpi_probe(void);
117 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip,
118     int *pci_irqp, iflag_t *intr_flagp);
119 
120 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid,
121     int ipin, int *pci_irqp, iflag_t *intr_flagp);
122 static uchar_t acpi_find_ioapic(int irq);
123 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2);
124 
125 /*
126  *	standard MP entries
127  */
128 static int	apic_probe();
129 static int	apic_clkinit();
130 static int	apic_getclkirq(int ipl);
131 static uint_t	apic_calibrate(volatile uint32_t *addr,
132     uint16_t *pit_ticks_adj);
133 static hrtime_t apic_gettime();
134 static hrtime_t apic_gethrtime();
135 static void	apic_init();
136 static void	apic_picinit(void);
137 static void	apic_cpu_start(processorid_t cpun, caddr_t rm_code);
138 static int	apic_post_cpu_start(void);
139 static void	apic_send_ipi(int cpun, int ipl);
140 static void	apic_set_softintr(int softintr);
141 static void	apic_set_idlecpu(processorid_t cpun);
142 static void	apic_unset_idlecpu(processorid_t cpun);
143 static int	apic_softlvl_to_irq(int ipl);
144 static int	apic_intr_enter(int ipl, int *vect);
145 static void	apic_intr_exit(int ipl, int vect);
146 static void	apic_setspl(int ipl);
147 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
148 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
149 static void	apic_shutdown(int cmd, int fcn);
150 static void	apic_preshutdown(int cmd, int fcn);
151 static int	apic_disable_intr(processorid_t cpun);
152 static void	apic_enable_intr(processorid_t cpun);
153 static processorid_t	apic_get_next_processorid(processorid_t cpun);
154 static int		apic_get_ipivect(int ipl, int type);
155 static void	apic_timer_reprogram(hrtime_t time);
156 static void	apic_timer_enable(void);
157 static void	apic_timer_disable(void);
158 static void	apic_post_cyclic_setup(void *arg);
159 extern int	apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
160 		    psm_intr_op_t, int *);
161 
162 static int	apic_oneshot = 0;
163 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
164 
165 /*
166  * These variables are frequently accessed in apic_intr_enter(),
167  * apic_intr_exit and apic_setspl, so group them together
168  */
169 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
170 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
171 int apic_clkvect;
172 
173 /* ACPI SCI interrupt configuration; -1 if SCI not used */
174 int apic_sci_vect = -1;
175 iflag_t apic_sci_flags;
176 
177 /* vector at which error interrupts come in */
178 int apic_errvect;
179 int apic_enable_error_intr = 1;
180 int apic_error_display_delay = 100;
181 
182 /* vector at which performance counter overflow interrupts come in */
183 int apic_cpcovf_vect;
184 int apic_enable_cpcovf_intr = 1;
185 
186 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */
187 static int apic_max_usecs_clear_pending = 1000;
188 
189 /* Amt of usecs to wait before checking if RDT flags have reset. */
190 #define	APIC_USECS_PER_WAIT_INTERVAL 100
191 
192 /* Maximum number of times to retry reprogramming via the timeout */
193 #define	APIC_REPROGRAM_MAX_TIMEOUTS 10
194 
195 /* timeout delay for IOAPIC delayed reprogramming */
196 #define	APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */
197 
198 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */
199 #define	DEFERRED 1
200 #define	IMMEDIATE 0
201 
202 /*
203  * number of bits per byte, from <sys/param.h>
204  */
205 #define	UCHAR_MAX	((1 << NBBY) - 1)
206 
207 uchar_t	apic_reserved_irqlist[MAX_ISA_IRQ + 1];
208 
209 /*
210  * The following vector assignments influence the value of ipltopri and
211  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
212  * idle to 0 and IPL 0 to 0x10 to differentiate idle in case
213  * we care to do so in future. Note some IPLs which are rarely used
214  * will share the vector ranges and heavily used IPLs (5 and 6) have
215  * a wide range.
216  *	IPL		Vector range.		as passed to intr_enter
217  *	0		none.
218  *	1,2,3		0x20-0x2f		0x0-0xf
219  *	4		0x30-0x3f		0x10-0x1f
220  *	5		0x40-0x5f		0x20-0x3f
221  *	6		0x60-0x7f		0x40-0x5f
222  *	7,8,9		0x80-0x8f		0x60-0x6f
223  *	10		0x90-0x9f		0x70-0x7f
224  *	11		0xa0-0xaf		0x80-0x8f
225  *	...		...
226  *	16		0xf0-0xff		0xd0-0xdf
227  */
228 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
229 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16
230 };
231 	/*
232 	 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4]
233 	 * NOTE that this is vector as passed into intr_enter which is
234 	 * programmed vector - 0x20 (APIC_BASE_VECT)
235 	 */
236 
237 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
238 	/* The taskpri to be programmed into apic to mask given ipl */
239 
240 #if defined(__amd64)
241 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
242 #endif
243 
244 /*
245  * Patchable global variables.
246  */
247 int	apic_forceload = 0;
248 
249 #define	INTR_ROUND_ROBIN_WITH_AFFINITY	0
250 #define	INTR_ROUND_ROBIN		1
251 #define	INTR_LOWEST_PRIORITY		2
252 
253 int	apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY;
254 
255 static int	apic_next_bind_cpu = 1; /* For round robin assignment */
256 					/* start with cpu 1 */
257 
258 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
259 					/* 1 - use gettime() for performance */
260 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
261 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
262 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
263 int	apic_panic_on_nmi = 0;
264 int	apic_panic_on_apic_error = 0;
265 
266 int	apic_verbose = 0;
267 
268 /* Flag definitions for apic_verbose */
269 #define	APIC_VERBOSE_IOAPIC_FLAG		0x00000001
270 #define	APIC_VERBOSE_IRQ_FLAG			0x00000002
271 #define	APIC_VERBOSE_POWEROFF_FLAG		0x00000004
272 #define	APIC_VERBOSE_POWEROFF_PAUSE_FLAG	0x00000008
273 
274 
275 #define	APIC_VERBOSE_IOAPIC(fmt) \
276 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \
277 		cmn_err fmt;
278 
279 #define	APIC_VERBOSE_IRQ(fmt) \
280 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \
281 		cmn_err fmt;
282 
283 #define	APIC_VERBOSE_POWEROFF(fmt) \
284 	if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \
285 		prom_printf fmt;
286 
287 
288 /* Now the ones for Dynamic Interrupt distribution */
289 int	apic_enable_dynamic_migration = 0;
290 
291 /*
292  * If enabled, the distribution works as follows:
293  * On every interrupt entry, the current ipl for the CPU is set in cpu_info
294  * and the irq corresponding to the ipl is also set in the aci_current array.
295  * interrupt exit and setspl (due to soft interrupts) will cause the current
296  * ipl to be be changed. This is cache friendly as these frequently used
297  * paths write into a per cpu structure.
298  *
299  * Sampling is done by checking the structures for all CPUs and incrementing
300  * the busy field of the irq (if any) executing on each CPU and the busy field
301  * of the corresponding CPU.
302  * In periodic mode this is done on every clock interrupt.
303  * In one-shot mode, this is done thru a cyclic with an interval of
304  * apic_redistribute_sample_interval (default 10 milli sec).
305  *
306  * Every apic_sample_factor_redistribution times we sample, we do computations
307  * to decide which interrupt needs to be migrated (see comments
308  * before apic_intr_redistribute().
309  */
310 
311 /*
312  * Following 3 variables start as % and can be patched or set using an
313  * API to be defined in future. They will be scaled to
314  * sample_factor_redistribution which is in turn set to hertz+1 (in periodic
315  * mode), or 101 in one-shot mode to stagger it away from one sec processing
316  */
317 
318 int	apic_int_busy_mark = 60;
319 int	apic_int_free_mark = 20;
320 int	apic_diff_for_redistribution = 10;
321 
322 /* sampling interval for interrupt redistribution for dynamic migration */
323 int	apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */
324 
325 /*
326  * number of times we sample before deciding to redistribute interrupts
327  * for dynamic migration
328  */
329 int	apic_sample_factor_redistribution = 101;
330 
331 /* timeout for xlate_vector, mark_vector */
332 int	apic_revector_timeout = 16 * 10000; /* 160 millisec */
333 
334 int	apic_redist_cpu_skip = 0;
335 int	apic_num_imbalance = 0;
336 int	apic_num_rebind = 0;
337 
338 int	apic_nproc = 0;
339 int	apic_defconf = 0;
340 int	apic_irq_translate = 0;
341 int	apic_spec_rev = 0;
342 int	apic_imcrp = 0;
343 
344 int	apic_use_acpi = 1;	/* 1 = use ACPI, 0 = don't use ACPI */
345 int	apic_use_acpi_madt_only = 0;	/* 1=ONLY use MADT from ACPI */
346 
347 /*
348  * For interrupt link devices, if apic_unconditional_srs is set, an irq resource
349  * will be assigned (via _SRS). If it is not set, use the current
350  * irq setting (via _CRS), but only if that irq is in the set of possible
351  * irqs (returned by _PRS) for the device.
352  */
353 int	apic_unconditional_srs = 1;
354 
355 /*
356  * For interrupt link devices, if apic_prefer_crs is set when we are
357  * assigning an IRQ resource to a device, prefer the current IRQ setting
358  * over other possible irq settings under same conditions.
359  */
360 
361 int	apic_prefer_crs = 1;
362 
363 
364 /* minimum number of timer ticks to program to */
365 int apic_min_timer_ticks = 1;
366 /*
367  *	Local static data
368  */
369 static struct	psm_ops apic_ops = {
370 	apic_probe,
371 
372 	apic_init,
373 	apic_picinit,
374 	apic_intr_enter,
375 	apic_intr_exit,
376 	apic_setspl,
377 	apic_addspl,
378 	apic_delspl,
379 	apic_disable_intr,
380 	apic_enable_intr,
381 	apic_softlvl_to_irq,
382 	apic_set_softintr,
383 
384 	apic_set_idlecpu,
385 	apic_unset_idlecpu,
386 
387 	apic_clkinit,
388 	apic_getclkirq,
389 	(void (*)(void))NULL,		/* psm_hrtimeinit */
390 	apic_gethrtime,
391 
392 	apic_get_next_processorid,
393 	apic_cpu_start,
394 	apic_post_cpu_start,
395 	apic_shutdown,
396 	apic_get_ipivect,
397 	apic_send_ipi,
398 
399 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
400 	(int (*)(todinfo_t *))NULL,	/* psm_tod_get */
401 	(int (*)(todinfo_t *))NULL,	/* psm_tod_set */
402 	(void (*)(int, char *))NULL,	/* psm_notify_error */
403 	(void (*)(int))NULL,		/* psm_notify_func */
404 	apic_timer_reprogram,
405 	apic_timer_enable,
406 	apic_timer_disable,
407 	apic_post_cyclic_setup,
408 	apic_preshutdown,
409 	apic_intr_ops			/* Advanced DDI Interrupt framework */
410 };
411 
412 
413 static struct	psm_info apic_psm_info = {
414 	PSM_INFO_VER01_5,			/* version */
415 	PSM_OWN_EXCLUSIVE,			/* ownership */
416 	(struct psm_ops *)&apic_ops,		/* operation */
417 	"pcplusmp",				/* machine name */
418 	"pcplusmp v1.4 compatible %I%",
419 };
420 
421 static void *apic_hdlp;
422 
423 #ifdef DEBUG
424 #define	DENT		0x0001
425 int	apic_debug = 0;
426 /*
427  * set apic_restrict_vector to the # of vectors we want to allow per range
428  * useful in testing shared interrupt logic by setting it to 2 or 3
429  */
430 int	apic_restrict_vector = 0;
431 
432 #define	APIC_DEBUG_MSGBUFSIZE	2048
433 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
434 int	apic_debug_msgbufindex = 0;
435 
436 /*
437  * Put "int" info into debug buffer. No MP consistency, but light weight.
438  * Good enough for most debugging.
439  */
440 #define	APIC_DEBUG_BUF_PUT(x) \
441 	apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \
442 	if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \
443 		apic_debug_msgbufindex = 0;
444 
445 #endif /* DEBUG */
446 
447 apic_cpus_info_t	*apic_cpus;
448 
449 static uint_t	apic_cpumask = 0;
450 static uint_t	apic_flag;
451 
452 /* Flag to indicate that we need to shut down all processors */
453 static uint_t	apic_shutdown_processors;
454 
455 uint_t apic_nsec_per_intr = 0;
456 
457 /*
458  * apic_let_idle_redistribute can have the following values:
459  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
460  * apic_redistribute_lock prevents multiple idle cpus from redistributing
461  */
462 int	apic_num_idle_redistributions = 0;
463 static	int apic_let_idle_redistribute = 0;
464 static	uint_t apic_nticks = 0;
465 static	uint_t apic_skipped_redistribute = 0;
466 
467 /* to gather intr data and redistribute */
468 static void apic_redistribute_compute(void);
469 
470 static	uint_t last_count_read = 0;
471 static	lock_t	apic_gethrtime_lock;
472 volatile int	apic_hrtime_stamp = 0;
473 volatile hrtime_t apic_nsec_since_boot = 0;
474 static uint_t apic_hertz_count, apic_nsec_per_tick;
475 static hrtime_t apic_nsec_max;
476 
477 static	hrtime_t	apic_last_hrtime = 0;
478 int		apic_hrtime_error = 0;
479 int		apic_remote_hrterr = 0;
480 int		apic_num_nmis = 0;
481 int		apic_apic_error = 0;
482 int		apic_num_apic_errors = 0;
483 int		apic_num_cksum_errors = 0;
484 
485 static	uchar_t	apic_io_id[MAX_IO_APIC];
486 static	uchar_t	apic_io_ver[MAX_IO_APIC];
487 static	uchar_t	apic_io_vectbase[MAX_IO_APIC];
488 static	uchar_t	apic_io_vectend[MAX_IO_APIC];
489 volatile int32_t *apicioadr[MAX_IO_APIC];
490 
491 /*
492  * First available slot to be used as IRQ index into the apic_irq_table
493  * for those interrupts (like MSI/X) that don't have a physical IRQ.
494  */
495 int apic_first_avail_irq  = APIC_FIRST_FREE_IRQ;
496 
497 /*
498  * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound
499  * and bound elements of cpus_info and the temp_cpu element of irq_struct
500  */
501 lock_t	apic_ioapic_lock;
502 
503 /*
504  * apic_ioapic_reprogram_lock prevents a CPU from exiting
505  * apic_intr_exit before IOAPIC reprogramming information
506  * is collected.
507  */
508 static	lock_t	apic_ioapic_reprogram_lock;
509 static	int	apic_io_max = 0;	/* no. of i/o apics enabled */
510 
511 static	struct apic_io_intr *apic_io_intrp = 0;
512 static	struct apic_bus	*apic_busp;
513 
514 uchar_t	apic_vector_to_irq[APIC_MAX_VECTOR+1];
515 static	uchar_t	apic_resv_vector[MAXIPL+1];
516 
517 static	char	apic_level_intr[APIC_MAX_VECTOR+1];
518 static	int	apic_error = 0;
519 /* values which apic_error can take. Not catastrophic, but may help debug */
520 #define	APIC_ERR_BOOT_EOI		0x1
521 #define	APIC_ERR_GET_IPIVECT_FAIL	0x2
522 #define	APIC_ERR_INVALID_INDEX		0x4
523 #define	APIC_ERR_MARK_VECTOR_FAIL	0x8
524 #define	APIC_ERR_APIC_ERROR		0x40000000
525 #define	APIC_ERR_NMI			0x80000000
526 
527 static	int	apic_cmos_ssb_set = 0;
528 
529 static	uint32_t	eisa_level_intr_mask = 0;
530 	/* At least MSB will be set if EISA bus */
531 
532 static	int	apic_pci_bus_total = 0;
533 static	uchar_t	apic_single_pci_busid = 0;
534 
535 
536 /*
537  * airq_mutex protects additions to the apic_irq_table - the first
538  * pointer and any airq_nexts off of that one. It also protects
539  * apic_max_device_irq & apic_min_device_irq. It also guarantees
540  * that share_id is unique as new ids are generated only when new
541  * irq_t structs are linked in. Once linked in the structs are never
542  * deleted. temp_cpu & mps_intr_index field indicate if it is programmed
543  * or allocated. Note that there is a slight gap between allocating in
544  * apic_introp_xlate and programming in addspl.
545  */
546 kmutex_t	airq_mutex;
547 apic_irq_t	*apic_irq_table[APIC_MAX_VECTOR+1];
548 int		apic_max_device_irq = 0;
549 int		apic_min_device_irq = APIC_MAX_VECTOR;
550 
551 /* use to make sure only one cpu handles the nmi */
552 static	lock_t	apic_nmi_lock;
553 /* use to make sure only one cpu handles the error interrupt */
554 static	lock_t	apic_error_lock;
555 
556 /*
557  * Following declarations are for revectoring; used when ISRs at different
558  * IPLs share an irq.
559  */
560 static	lock_t	apic_revector_lock;
561 static	int	apic_revector_pending = 0;
562 static	uchar_t	*apic_oldvec_to_newvec;
563 static	uchar_t	*apic_newvec_to_oldvec;
564 
565 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */
566 static	kmutex_t	apic_reprogram_timeout_mutex;
567 
568 static	struct	ioapic_reprogram_data {
569 	int		valid;	 /* This entry is valid */
570 	int		bindcpu; /* The CPU to which the int will be bound */
571 	unsigned	timeouts; /* # times the reprogram timeout was called */
572 } apic_reprogram_info[APIC_MAX_VECTOR+1];
573 /*
574  * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info
575  * is indexed by IRQ number, NOT by vector number.
576  */
577 
578 
579 /*
580  * The following added to identify a software poweroff method if available.
581  */
582 
583 static struct {
584 	int	poweroff_method;
585 	char	oem_id[APIC_MPS_OEM_ID_LEN + 1];	/* MAX + 1 for NULL */
586 	char	prod_id[APIC_MPS_PROD_ID_LEN + 1];	/* MAX + 1 for NULL */
587 } apic_mps_ids[] = {
588 	{ APIC_POWEROFF_VIA_RTC,	"INTEL",	"ALDER" },   /* 4300 */
589 	{ APIC_POWEROFF_VIA_RTC,	"NCR",		"AMC" },    /* 4300 */
590 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"A450NX" },  /* 4400? */
591 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"AD450NX" }, /* 4400 */
592 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"AC450NX" }, /* 4400R */
593 	{ APIC_POWEROFF_VIA_SITKA_BMC,	"INTEL",	"S450NX" },  /* S50  */
594 	{ APIC_POWEROFF_VIA_SITKA_BMC,	"INTEL",	"SC450NX" }  /* S50? */
595 };
596 
597 int	apic_poweroff_method = APIC_POWEROFF_NONE;
598 
599 static	struct {
600 	uchar_t	cntl;
601 	uchar_t	data;
602 } aspen_bmc[] = {
603 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
604 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
605 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
606 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
607 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
608 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
609 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
610 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
611 
612 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
613 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
614 };
615 
616 static	struct {
617 	int	port;
618 	uchar_t	data;
619 } sitka_bmc[] = {
620 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
621 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
622 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
623 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
624 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
625 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
626 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
627 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
628 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
629 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
630 
631 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
632 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
633 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
634 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
635 };
636 
637 
638 /* Patchable global variables. */
639 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
640 int		apic_debug_mps_id = 0;		/* 1 - print MPS ID strings */
641 
642 /*
643  * ACPI definitions
644  */
645 /* _PIC method arguments */
646 #define	ACPI_PIC_MODE	0
647 #define	ACPI_APIC_MODE	1
648 
649 /* APIC error flags we care about */
650 #define	APIC_SEND_CS_ERROR	0x01
651 #define	APIC_RECV_CS_ERROR	0x02
652 #define	APIC_CS_ERRORS		(APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR)
653 
654 /*
655  * ACPI variables
656  */
657 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */
658 static	int apic_enable_acpi = 0;
659 
660 /* ACPI Multiple APIC Description Table ptr */
661 static	MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL;
662 
663 /* ACPI Interrupt Source Override Structure ptr */
664 static	MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL;
665 static	int acpi_iso_cnt = 0;
666 
667 /* ACPI Non-maskable Interrupt Sources ptr */
668 static	MADT_NMI_SOURCE *acpi_nmi_sp = NULL;
669 static	int acpi_nmi_scnt = 0;
670 static	MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL;
671 static	int acpi_nmi_ccnt = 0;
672 
673 /*
674  * extern declarations
675  */
676 extern	int	intr_clear(void);
677 extern	void	intr_restore(uint_t);
678 #if defined(__amd64)
679 extern	int	intpri_use_cr8;
680 #endif	/* __amd64 */
681 
682 extern int	apic_pci_msi_enable_vector(dev_info_t *, int, int,
683 		    int, int, int);
684 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int);
685 
686 /*
687  *	This is the loadable module wrapper
688  */
689 
690 int
691 _init(void)
692 {
693 	if (apic_coarse_hrtime)
694 		apic_ops.psm_gethrtime = &apic_gettime;
695 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
696 }
697 
698 int
699 _fini(void)
700 {
701 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
702 }
703 
704 int
705 _info(struct modinfo *modinfop)
706 {
707 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
708 }
709 
710 /*
711  * Auto-configuration routines
712  */
713 
714 /*
715  * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here
716  * May work with 1.1 - but not guaranteed.
717  * According to the MP Spec, the MP floating pointer structure
718  * will be searched in the order described below:
719  * 1. In the first kilobyte of Extended BIOS Data Area (EBDA)
720  * 2. Within the last kilobyte of system base memory
721  * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh
722  * Once we find the right signature with proper checksum, we call
723  * either handle_defconf or parse_mpct to get all info necessary for
724  * subsequent operations.
725  */
726 static int
727 apic_probe()
728 {
729 	uint32_t mpct_addr, ebda_start = 0, base_mem_end;
730 	caddr_t	biosdatap;
731 	caddr_t	mpct;
732 	caddr_t	fptr;
733 	int	i, mpct_size, mapsize, retval = PSM_FAILURE;
734 	ushort_t	ebda_seg, base_mem_size;
735 	struct	apic_mpfps_hdr	*fpsp;
736 	struct	apic_mp_cnf_hdr	*hdrp;
737 	int bypass_cpu_and_ioapics_in_mptables;
738 	int acpi_user_options;
739 
740 	if (apic_forceload < 0)
741 		return (retval);
742 
743 	/* Allow override for MADT-only mode */
744 	acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0,
745 	    "acpi-user-options", 0);
746 	apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0);
747 
748 	/* Allow apic_use_acpi to override MADT-only mode */
749 	if (!apic_use_acpi)
750 		apic_use_acpi_madt_only = 0;
751 
752 	retval = acpi_probe();
753 
754 	/*
755 	 * mapin the bios data area 40:0
756 	 * 40:13h - two-byte location reports the base memory size
757 	 * 40:0Eh - two-byte location for the exact starting address of
758 	 *	    the EBDA segment for EISA
759 	 */
760 	biosdatap = psm_map_phys(0x400, 0x20, PROT_READ);
761 	if (!biosdatap)
762 		return (retval);
763 	fpsp = (struct apic_mpfps_hdr *)NULL;
764 	mapsize = MPFPS_RAM_WIN_LEN;
765 	/*LINTED: pointer cast may result in improper alignment */
766 	ebda_seg = *((ushort_t *)(biosdatap+0xe));
767 	/* check the 1k of EBDA */
768 	if (ebda_seg) {
769 		ebda_start = ((uint32_t)ebda_seg) << 4;
770 		fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ);
771 		if (fptr) {
772 			if (!(fpsp =
773 			    apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN)))
774 				psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN);
775 		}
776 	}
777 	/* If not in EBDA, check the last k of system base memory */
778 	if (!fpsp) {
779 		/*LINTED: pointer cast may result in improper alignment */
780 		base_mem_size = *((ushort_t *)(biosdatap + 0x13));
781 
782 		if (base_mem_size > 512)
783 			base_mem_end = 639 * 1024;
784 		else
785 			base_mem_end = 511 * 1024;
786 		/* if ebda == last k of base mem, skip to check BIOS ROM */
787 		if (base_mem_end != ebda_start) {
788 
789 			fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN,
790 			    PROT_READ);
791 
792 			if (fptr) {
793 				if (!(fpsp = apic_find_fps_sig(fptr,
794 				    MPFPS_RAM_WIN_LEN)))
795 					psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN);
796 			}
797 		}
798 	}
799 	psm_unmap_phys(biosdatap, 0x20);
800 
801 	/* If still cannot find it, check the BIOS ROM space */
802 	if (!fpsp) {
803 		mapsize = MPFPS_ROM_WIN_LEN;
804 		fptr = psm_map_phys(MPFPS_ROM_WIN_START,
805 		    MPFPS_ROM_WIN_LEN, PROT_READ);
806 		if (fptr) {
807 			if (!(fpsp =
808 			    apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) {
809 				psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
810 				return (retval);
811 			}
812 		}
813 	}
814 
815 	if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) {
816 		psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
817 		return (retval);
818 	}
819 
820 	apic_spec_rev = fpsp->mpfps_spec_rev;
821 	if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) {
822 		psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
823 		return (retval);
824 	}
825 
826 	/* check IMCR is present or not */
827 	apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP;
828 
829 	/* check default configuration (dual CPUs) */
830 	if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) {
831 		psm_unmap_phys(fptr, mapsize);
832 		return (apic_handle_defconf());
833 	}
834 
835 	/* MP Configuration Table */
836 	mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr);
837 
838 	psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */
839 
840 	/*
841 	 * Map in enough memory for the MP Configuration Table Header.
842 	 * Use this table to read the total length of the BIOS data and
843 	 * map in all the info
844 	 */
845 	/*LINTED: pointer cast may result in improper alignment */
846 	hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr,
847 	    sizeof (struct apic_mp_cnf_hdr), PROT_READ);
848 	if (!hdrp)
849 		return (retval);
850 
851 	/* check mp configuration table signature PCMP */
852 	if (hdrp->mpcnf_sig != 0x504d4350) {
853 		psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
854 		return (retval);
855 	}
856 	mpct_size = (int)hdrp->mpcnf_tbl_length;
857 
858 	apic_set_pwroff_method_from_mpcnfhdr(hdrp);
859 
860 	psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
861 
862 	if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) {
863 		/* This is an ACPI machine No need for further checks */
864 		return (retval);
865 	}
866 
867 	/*
868 	 * Map in the entries for this machine, ie. Processor
869 	 * Entry Tables, Bus Entry Tables, etc.
870 	 * They are in fixed order following one another
871 	 */
872 	mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ);
873 	if (!mpct)
874 		return (retval);
875 
876 	if (apic_checksum(mpct, mpct_size) != 0)
877 		goto apic_fail1;
878 
879 
880 	/*LINTED: pointer cast may result in improper alignment */
881 	hdrp = (struct apic_mp_cnf_hdr *)mpct;
882 	/*LINTED: pointer cast may result in improper alignment */
883 	apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic,
884 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
885 	if (!apicadr)
886 		goto apic_fail1;
887 
888 	/* Parse all information in the tables */
889 	bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS);
890 	if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) ==
891 	    PSM_SUCCESS)
892 		return (PSM_SUCCESS);
893 
894 	for (i = 0; i < apic_io_max; i++)
895 		psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
896 	if (apic_cpus)
897 		kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc);
898 	if (apicadr)
899 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
900 apic_fail1:
901 	psm_unmap_phys(mpct, mpct_size);
902 	return (retval);
903 }
904 
905 static void
906 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp)
907 {
908 	int	i;
909 
910 	for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0]));
911 	    i++) {
912 		if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id,
913 		    strlen(apic_mps_ids[i].oem_id)) == 0) &&
914 		    (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id,
915 		    strlen(apic_mps_ids[i].prod_id)) == 0)) {
916 
917 			apic_poweroff_method = apic_mps_ids[i].poweroff_method;
918 			break;
919 		}
920 	}
921 
922 	if (apic_debug_mps_id != 0) {
923 		cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'"
924 		    "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n",
925 		    hdrp->mpcnf_oem_str[0],
926 		    hdrp->mpcnf_oem_str[1],
927 		    hdrp->mpcnf_oem_str[2],
928 		    hdrp->mpcnf_oem_str[3],
929 		    hdrp->mpcnf_oem_str[4],
930 		    hdrp->mpcnf_oem_str[5],
931 		    hdrp->mpcnf_oem_str[6],
932 		    hdrp->mpcnf_oem_str[7],
933 		    hdrp->mpcnf_prod_str[0],
934 		    hdrp->mpcnf_prod_str[1],
935 		    hdrp->mpcnf_prod_str[2],
936 		    hdrp->mpcnf_prod_str[3],
937 		    hdrp->mpcnf_prod_str[4],
938 		    hdrp->mpcnf_prod_str[5],
939 		    hdrp->mpcnf_prod_str[6],
940 		    hdrp->mpcnf_prod_str[7],
941 		    hdrp->mpcnf_prod_str[8],
942 		    hdrp->mpcnf_prod_str[9],
943 		    hdrp->mpcnf_prod_str[10],
944 		    hdrp->mpcnf_prod_str[11]);
945 	}
946 }
947 
948 static int
949 acpi_probe(void)
950 {
951 	int			i, id, intmax, ver, index, rv;
952 	int			acpi_verboseflags = 0;
953 	int			madt_seen, madt_size;
954 	APIC_HEADER		*ap;
955 	MADT_PROCESSOR_APIC	*mpa;
956 	MADT_IO_APIC		*mia;
957 	MADT_IO_SAPIC		*misa;
958 	MADT_INTERRUPT_OVERRIDE	*mio;
959 	MADT_NMI_SOURCE		*mns;
960 	MADT_INTERRUPT_SOURCE	*mis;
961 	MADT_LOCAL_APIC_NMI	*mlan;
962 	MADT_ADDRESS_OVERRIDE	*mao;
963 	ACPI_OBJECT_LIST 	arglist;
964 	ACPI_OBJECT		arg;
965 	int			sci;
966 	iflag_t			sci_flags;
967 	volatile int32_t	*ioapic;
968 	char			local_ids[NCPU];
969 	char			proc_ids[NCPU];
970 	uchar_t			hid;
971 
972 	if (!apic_use_acpi)
973 		return (PSM_FAILURE);
974 
975 	if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING,
976 	    (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK)
977 		return (PSM_FAILURE);
978 
979 	apicadr = (uint32_t *)psm_map_phys(
980 	    (uint32_t)acpi_mapic_dtp->LocalApicAddress,
981 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
982 	if (!apicadr)
983 		return (PSM_FAILURE);
984 
985 	id = apicadr[APIC_LID_REG];
986 	local_ids[0] = (uchar_t)(((uint_t)id) >> 24);
987 	apic_nproc = index = 1;
988 	apic_io_max = 0;
989 
990 	ap = (APIC_HEADER *) (acpi_mapic_dtp + 1);
991 	madt_size = acpi_mapic_dtp->Length;
992 	madt_seen = sizeof (*acpi_mapic_dtp);
993 
994 	while (madt_seen < madt_size) {
995 		switch (ap->Type) {
996 		case APIC_PROCESSOR:
997 			mpa = (MADT_PROCESSOR_APIC *) ap;
998 			if (mpa->ProcessorEnabled) {
999 				if (mpa->LocalApicId == local_ids[0])
1000 					proc_ids[0] = mpa->ProcessorId;
1001 				else if (apic_nproc < NCPU) {
1002 					local_ids[index] = mpa->LocalApicId;
1003 					proc_ids[index] = mpa->ProcessorId;
1004 					index++;
1005 					apic_nproc++;
1006 				} else
1007 					cmn_err(CE_WARN, "pcplusmp: exceeded "
1008 					    "maximum no. of CPUs (= %d)", NCPU);
1009 			}
1010 			break;
1011 
1012 		case APIC_IO:
1013 			mia = (MADT_IO_APIC *) ap;
1014 			if (apic_io_max < MAX_IO_APIC) {
1015 				apic_io_id[apic_io_max] = mia->IoApicId;
1016 				apic_io_vectbase[apic_io_max] =
1017 				    mia->Interrupt;
1018 				ioapic = apicioadr[apic_io_max] =
1019 				    (int32_t *)psm_map_phys(
1020 				    (uint32_t)mia->Address,
1021 				    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1022 				if (!ioapic)
1023 					goto cleanup;
1024 				apic_io_max++;
1025 			}
1026 			break;
1027 
1028 		case APIC_XRUPT_OVERRIDE:
1029 			mio = (MADT_INTERRUPT_OVERRIDE *) ap;
1030 			if (acpi_isop == NULL)
1031 				acpi_isop = mio;
1032 			acpi_iso_cnt++;
1033 			break;
1034 
1035 		case APIC_NMI:
1036 			/* UNIMPLEMENTED */
1037 			mns = (MADT_NMI_SOURCE *) ap;
1038 			if (acpi_nmi_sp == NULL)
1039 				acpi_nmi_sp = mns;
1040 			acpi_nmi_scnt++;
1041 
1042 			cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n",
1043 				mns->Interrupt, mns->Polarity,
1044 				mns->TriggerMode);
1045 			break;
1046 
1047 		case APIC_LOCAL_NMI:
1048 			/* UNIMPLEMENTED */
1049 			mlan = (MADT_LOCAL_APIC_NMI *) ap;
1050 			if (acpi_nmi_cp == NULL)
1051 				acpi_nmi_cp = mlan;
1052 			acpi_nmi_ccnt++;
1053 
1054 			cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n",
1055 				mlan->ProcessorId, mlan->Polarity,
1056 				mlan->TriggerMode, mlan->Lint);
1057 			break;
1058 
1059 		case APIC_ADDRESS_OVERRIDE:
1060 			/* UNIMPLEMENTED */
1061 			mao = (MADT_ADDRESS_OVERRIDE *) ap;
1062 			cmn_err(CE_NOTE, "!apic: address override: %lx\n",
1063 				(long)mao->Address);
1064 			break;
1065 
1066 		case APIC_IO_SAPIC:
1067 			/* UNIMPLEMENTED */
1068 			misa = (MADT_IO_SAPIC *) ap;
1069 
1070 			cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n",
1071 				misa->IoSapicId, misa->InterruptBase,
1072 				(long)misa->Address);
1073 			break;
1074 
1075 		case APIC_XRUPT_SOURCE:
1076 			/* UNIMPLEMENTED */
1077 			mis = (MADT_INTERRUPT_SOURCE *) ap;
1078 
1079 			cmn_err(CE_NOTE,
1080 				"!apic: irq source: %d %d %d %d %d %d %d\n",
1081 				mis->ProcessorId, mis->ProcessorEid,
1082 				mis->Interrupt, mis->Polarity,
1083 				mis->TriggerMode, mis->InterruptType,
1084 				mis->IoSapicVector);
1085 			break;
1086 		case APIC_RESERVED:
1087 		default:
1088 			break;	/* ignore unknown items as per ACPI spec */
1089 		}
1090 
1091 		/* advance to next entry */
1092 		madt_seen += ap->Length;
1093 		ap = (APIC_HEADER *)(((char *)ap) + ap->Length);
1094 	}
1095 
1096 	if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc,
1097 	    KM_NOSLEEP)) == NULL)
1098 		goto cleanup;
1099 
1100 	apic_cpumask = (1 << apic_nproc) - 1;
1101 
1102 	/*
1103 	 * ACPI doesn't provide the local apic ver, get it directly from the
1104 	 * local apic
1105 	 */
1106 	ver = apicadr[APIC_VERS_REG];
1107 	for (i = 0; i < apic_nproc; i++) {
1108 		apic_cpus[i].aci_local_id = local_ids[i];
1109 		apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
1110 	}
1111 	for (i = 0; i < apic_io_max; i++) {
1112 		ioapic = apicioadr[i];
1113 
1114 		/*
1115 		 * need to check Sitka on the following acpi problem
1116 		 * On the Sitka, the ioapic's apic_id field isn't reporting
1117 		 * the actual io apic id. We have reported this problem
1118 		 * to Intel. Until they fix the problem, we will get the
1119 		 * actual id directly from the ioapic.
1120 		 */
1121 		ioapic[APIC_IO_REG] = APIC_ID_CMD;
1122 		id = ioapic[APIC_IO_DATA];
1123 		hid = (uchar_t)(((uint_t)id) >> 24);
1124 
1125 		if (hid != apic_io_id[i]) {
1126 			if (apic_io_id[i] == 0)
1127 				apic_io_id[i] = hid;
1128 			else { /* set ioapic id to whatever reported by ACPI */
1129 				id = ((int32_t)apic_io_id[i]) << 24;
1130 				ioapic[APIC_IO_REG] = APIC_ID_CMD;
1131 				ioapic[APIC_IO_DATA] = id;
1132 			}
1133 		}
1134 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
1135 		ver = ioapic[APIC_IO_DATA];
1136 		apic_io_ver[i] = (uchar_t)(ver & 0xff);
1137 		intmax = (ver >> 16) & 0xff;
1138 		apic_io_vectend[i] = apic_io_vectbase[i] + intmax;
1139 		if (apic_first_avail_irq <= apic_io_vectend[i])
1140 			apic_first_avail_irq = apic_io_vectend[i] + 1;
1141 	}
1142 
1143 
1144 	/*
1145 	 * Process SCI configuration here
1146 	 * An error may be returned here if
1147 	 * acpi-user-options specifies legacy mode
1148 	 * (no SCI, no ACPI mode)
1149 	 */
1150 	if (acpica_get_sci(&sci, &sci_flags) != AE_OK)
1151 		sci = -1;
1152 
1153 	/*
1154 	 * Now call acpi_init() to generate namespaces
1155 	 * If this fails, we don't attempt to use ACPI
1156 	 * even if we were able to get a MADT above
1157 	 */
1158 	if (acpica_init() != AE_OK)
1159 		goto cleanup;
1160 
1161 	/*
1162 	 * Squirrel away the SCI and flags for later on
1163 	 * in apic_picinit() when we're ready
1164 	 */
1165 	apic_sci_vect = sci;
1166 	apic_sci_flags = sci_flags;
1167 
1168 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG)
1169 		acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG;
1170 
1171 	if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG)
1172 		acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG;
1173 
1174 	if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG)
1175 		acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG;
1176 
1177 	if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) ==
1178 	    ACPI_PSM_FAILURE)
1179 		goto cleanup;
1180 
1181 	/* Enable ACPI APIC interrupt routing */
1182 	arglist.Count = 1;
1183 	arglist.Pointer = &arg;
1184 	arg.Type = ACPI_TYPE_INTEGER;
1185 	arg.Integer.Value = ACPI_APIC_MODE;	/* 1 */
1186 	rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL);
1187 	if (rv == AE_OK) {
1188 		build_reserved_irqlist((uchar_t *)apic_reserved_irqlist);
1189 		apic_enable_acpi = 1;
1190 		if (apic_use_acpi_madt_only) {
1191 			cmn_err(CE_CONT,
1192 			    "?Using ACPI for CPU/IOAPIC information ONLY\n");
1193 		}
1194 		return (PSM_SUCCESS);
1195 	}
1196 	/* if setting APIC mode failed above, we fall through to cleanup */
1197 
1198 cleanup:
1199 	if (apicadr != NULL) {
1200 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
1201 		apicadr = NULL;
1202 	}
1203 	apic_nproc = 0;
1204 	for (i = 0; i < apic_io_max; i++) {
1205 		psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
1206 		apicioadr[i] = NULL;
1207 	}
1208 	apic_io_max = 0;
1209 	acpi_isop = NULL;
1210 	acpi_iso_cnt = 0;
1211 	acpi_nmi_sp = NULL;
1212 	acpi_nmi_scnt = 0;
1213 	acpi_nmi_cp = NULL;
1214 	acpi_nmi_ccnt = 0;
1215 	return (PSM_FAILURE);
1216 }
1217 
1218 /*
1219  * Handle default configuration. Fill in reqd global variables & tables
1220  * Fill all details as MP table does not give any more info
1221  */
1222 static int
1223 apic_handle_defconf()
1224 {
1225 	uint_t	lid;
1226 
1227 	/*LINTED: pointer cast may result in improper alignment */
1228 	apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR,
1229 	    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1230 	/*LINTED: pointer cast may result in improper alignment */
1231 	apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR,
1232 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
1233 	apic_cpus = (apic_cpus_info_t *)
1234 	    kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP);
1235 	if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus))
1236 		goto apic_handle_defconf_fail;
1237 	apic_cpumask = 3;
1238 	apic_nproc = 2;
1239 	lid = apicadr[APIC_LID_REG];
1240 	apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET);
1241 	/*
1242 	 * According to the PC+MP spec 1.1, the local ids
1243 	 * for the default configuration has to be 0 or 1
1244 	 */
1245 	if (apic_cpus[0].aci_local_id == 1)
1246 		apic_cpus[1].aci_local_id = 0;
1247 	else if (apic_cpus[0].aci_local_id == 0)
1248 		apic_cpus[1].aci_local_id = 1;
1249 	else
1250 		goto apic_handle_defconf_fail;
1251 
1252 	apic_io_id[0] = 2;
1253 	apic_io_max = 1;
1254 	if (apic_defconf >= 5) {
1255 		apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS;
1256 		apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS;
1257 		apic_io_ver[0] = APIC_INTEGRATED_VERS;
1258 	} else {
1259 		apic_cpus[0].aci_local_ver = 0;		/* 82489 DX */
1260 		apic_cpus[1].aci_local_ver = 0;
1261 		apic_io_ver[0] = 0;
1262 	}
1263 	if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6)
1264 		eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) |
1265 		    inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1);
1266 	return (PSM_SUCCESS);
1267 
1268 apic_handle_defconf_fail:
1269 	if (apic_cpus)
1270 		kmem_free(apic_cpus, sizeof (*apic_cpus) * 2);
1271 	if (apicadr)
1272 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
1273 	if (apicioadr[0])
1274 		psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN);
1275 	return (PSM_FAILURE);
1276 }
1277 
1278 /* Parse the entries in MP configuration table and collect info that we need */
1279 static int
1280 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
1281 {
1282 	struct	apic_procent	*procp;
1283 	struct	apic_bus	*busp;
1284 	struct	apic_io_entry	*ioapicp;
1285 	struct	apic_io_intr	*intrp;
1286 	volatile int32_t	*ioapic;
1287 	uint_t	lid;
1288 	int	id;
1289 	uchar_t hid;
1290 
1291 	/*LINTED: pointer cast may result in improper alignment */
1292 	procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
1293 
1294 	/* No need to count cpu entries if we won't use them */
1295 	if (!bypass_cpus_and_ioapics) {
1296 
1297 		/* Find max # of CPUS and allocate structure accordingly */
1298 		apic_nproc = 0;
1299 		while (procp->proc_entry == APIC_CPU_ENTRY) {
1300 			if (procp->proc_cpuflags & CPUFLAGS_EN) {
1301 				apic_nproc++;
1302 			}
1303 			procp++;
1304 		}
1305 		if (apic_nproc > NCPU)
1306 			cmn_err(CE_WARN, "pcplusmp: exceeded "
1307 			    "maximum no. of CPUs (= %d)", NCPU);
1308 		if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *)
1309 		    kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP)))
1310 			return (PSM_FAILURE);
1311 	}
1312 
1313 	/*LINTED: pointer cast may result in improper alignment */
1314 	procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
1315 
1316 	/*
1317 	 * start with index 1 as 0 needs to be filled in with Boot CPU, but
1318 	 * if we're bypassing this information, it has already been filled
1319 	 * in by acpi_probe(), so don't overwrite it.
1320 	 */
1321 	if (!bypass_cpus_and_ioapics)
1322 		apic_nproc = 1;
1323 
1324 	while (procp->proc_entry == APIC_CPU_ENTRY) {
1325 		/* check whether the cpu exists or not */
1326 		if (!bypass_cpus_and_ioapics &&
1327 		    procp->proc_cpuflags & CPUFLAGS_EN) {
1328 			if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */
1329 				lid = apicadr[APIC_LID_REG];
1330 				apic_cpus[0].aci_local_id = procp->proc_apicid;
1331 				if (apic_cpus[0].aci_local_id !=
1332 				    (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) {
1333 					return (PSM_FAILURE);
1334 				}
1335 				apic_cpus[0].aci_local_ver =
1336 				    procp->proc_version;
1337 			} else {
1338 
1339 				apic_cpus[apic_nproc].aci_local_id =
1340 				    procp->proc_apicid;
1341 				apic_cpus[apic_nproc].aci_local_ver =
1342 				    procp->proc_version;
1343 				apic_nproc++;
1344 
1345 			}
1346 		}
1347 		procp++;
1348 	}
1349 
1350 	if (!bypass_cpus_and_ioapics) {
1351 		/* convert the number of processors into a cpumask */
1352 		apic_cpumask = (1 << apic_nproc) - 1;
1353 	}
1354 
1355 	/*
1356 	 * Save start of bus entries for later use.
1357 	 * Get EISA level cntrl if EISA bus is present.
1358 	 * Also get the CPI bus id for single CPI bus case
1359 	 */
1360 	apic_busp = busp = (struct apic_bus *)procp;
1361 	while (busp->bus_entry == APIC_BUS_ENTRY) {
1362 		lid = apic_find_bus_type((char *)&busp->bus_str1);
1363 		if (lid	== BUS_EISA) {
1364 			eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) |
1365 			    inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1);
1366 		} else if (lid == BUS_PCI) {
1367 			/*
1368 			 * apic_single_pci_busid will be used only if
1369 			 * apic_pic_bus_total is equal to 1
1370 			 */
1371 			apic_pci_bus_total++;
1372 			apic_single_pci_busid = busp->bus_id;
1373 		}
1374 		busp++;
1375 	}
1376 
1377 	ioapicp = (struct apic_io_entry *)busp;
1378 
1379 	if (!bypass_cpus_and_ioapics)
1380 		apic_io_max = 0;
1381 	do {
1382 		if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) {
1383 			if (ioapicp->io_flags & IOAPIC_FLAGS_EN) {
1384 				apic_io_id[apic_io_max] = ioapicp->io_apicid;
1385 				apic_io_ver[apic_io_max] = ioapicp->io_version;
1386 		/*LINTED: pointer cast may result in improper alignment */
1387 				apicioadr[apic_io_max] =
1388 				    (int32_t *)psm_map_phys(
1389 				    (uint32_t)ioapicp->io_apic_addr,
1390 				    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1391 
1392 				if (!apicioadr[apic_io_max])
1393 					return (PSM_FAILURE);
1394 
1395 				ioapic = apicioadr[apic_io_max];
1396 				ioapic[APIC_IO_REG] = APIC_ID_CMD;
1397 				id = ioapic[APIC_IO_DATA];
1398 				hid = (uchar_t)(((uint_t)id) >> 24);
1399 
1400 				if (hid != apic_io_id[apic_io_max]) {
1401 					if (apic_io_id[apic_io_max] == 0)
1402 						apic_io_id[apic_io_max] = hid;
1403 					else {
1404 						/*
1405 						 * set ioapic id to whatever
1406 						 * reported by MPS
1407 						 *
1408 						 * may not need to set index
1409 						 * again ???
1410 						 * take it out and try
1411 						 */
1412 
1413 						id = ((int32_t)
1414 						    apic_io_id[apic_io_max]) <<
1415 						    24;
1416 
1417 						ioapic[APIC_IO_REG] =
1418 						    APIC_ID_CMD;
1419 
1420 						ioapic[APIC_IO_DATA] = id;
1421 
1422 					}
1423 				}
1424 				apic_io_max++;
1425 			}
1426 		}
1427 		ioapicp++;
1428 	} while (ioapicp->io_entry == APIC_IO_ENTRY);
1429 
1430 	apic_io_intrp = (struct apic_io_intr *)ioapicp;
1431 
1432 	intrp = apic_io_intrp;
1433 	while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
1434 		if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) ||
1435 		    (apic_find_bus(intrp->intr_busid) == BUS_PCI)) {
1436 			apic_irq_translate = 1;
1437 			break;
1438 		}
1439 		intrp++;
1440 	}
1441 
1442 	return (PSM_SUCCESS);
1443 }
1444 
1445 boolean_t
1446 apic_cpu_in_range(int cpu)
1447 {
1448 	return ((cpu & ~IRQ_USER_BOUND) < apic_nproc);
1449 }
1450 
1451 static struct apic_mpfps_hdr *
1452 apic_find_fps_sig(caddr_t cptr, int len)
1453 {
1454 	int	i;
1455 
1456 	/* Look for the pattern "_MP_" */
1457 	for (i = 0; i < len; i += 16) {
1458 		if ((*(cptr+i) == '_') &&
1459 		    (*(cptr+i+1) == 'M') &&
1460 		    (*(cptr+i+2) == 'P') &&
1461 		    (*(cptr+i+3) == '_'))
1462 		    /*LINTED: pointer cast may result in improper alignment */
1463 			return ((struct apic_mpfps_hdr *)(cptr + i));
1464 	}
1465 	return (NULL);
1466 }
1467 
1468 static int
1469 apic_checksum(caddr_t bptr, int len)
1470 {
1471 	int	i;
1472 	uchar_t	cksum;
1473 
1474 	cksum = 0;
1475 	for (i = 0; i < len; i++)
1476 		cksum += *bptr++;
1477 	return ((int)cksum);
1478 }
1479 
1480 
1481 /*
1482  * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable
1483  * are also set to NULL. vector->irq is set to a value which cannot map
1484  * to a real irq to show that it is free.
1485  */
1486 void
1487 apic_init()
1488 {
1489 	int	i;
1490 	int	*iptr;
1491 
1492 	int	j = 1;
1493 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
1494 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
1495 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
1496 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
1497 			/* get to highest vector at the same ipl */
1498 			continue;
1499 		for (; j <= apic_vectortoipl[i]; j++) {
1500 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
1501 			    APIC_BASE_VECT;
1502 		}
1503 	}
1504 	for (; j < MAXIPL + 1; j++)
1505 		/* fill up any empty ipltopri slots */
1506 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
1507 
1508 	/* cpu 0 is always up */
1509 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
1510 
1511 	iptr = (int *)&apic_irq_table[0];
1512 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
1513 		apic_level_intr[i] = 0;
1514 		*iptr++ = NULL;
1515 		apic_vector_to_irq[i] = APIC_RESV_IRQ;
1516 		apic_reprogram_info[i].valid = 0;
1517 		apic_reprogram_info[i].bindcpu = 0;
1518 		apic_reprogram_info[i].timeouts = 0;
1519 	}
1520 
1521 	/*
1522 	 * Allocate a dummy irq table entry for the reserved entry.
1523 	 * This takes care of the race between removing an irq and
1524 	 * clock detecting a CPU in that irq during interrupt load
1525 	 * sampling.
1526 	 */
1527 	apic_irq_table[APIC_RESV_IRQ] =
1528 	    kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
1529 
1530 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
1531 	mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL);
1532 #if defined(__amd64)
1533 	/*
1534 	 * Make cpu-specific interrupt info point to cr8pri vector
1535 	 */
1536 	for (i = 0; i <= MAXIPL; i++)
1537 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
1538 	CPU->cpu_pri_data = apic_cr8pri;
1539 	intpri_use_cr8 = 1;
1540 #endif	/* __amd64 */
1541 }
1542 
1543 /*
1544  * handler for APIC Error interrupt. Just print a warning and continue
1545  */
1546 static int
1547 apic_error_intr()
1548 {
1549 	uint_t	error0, error1, error;
1550 	uint_t	i;
1551 
1552 	/*
1553 	 * We need to write before read as per 7.4.17 of system prog manual.
1554 	 * We do both and or the results to be safe
1555 	 */
1556 	error0 = apicadr[APIC_ERROR_STATUS];
1557 	apicadr[APIC_ERROR_STATUS] = 0;
1558 	error1 = apicadr[APIC_ERROR_STATUS];
1559 	error = error0 | error1;
1560 
1561 	/*
1562 	 * Clear the APIC error status (do this on all cpus that enter here)
1563 	 * (two writes are required due to the semantics of accessing the
1564 	 * error status register.)
1565 	 */
1566 	apicadr[APIC_ERROR_STATUS] = 0;
1567 	apicadr[APIC_ERROR_STATUS] = 0;
1568 
1569 	/*
1570 	 * Prevent more than 1 CPU from handling error interrupt causing
1571 	 * double printing (interleave of characters from multiple
1572 	 * CPU's when using prom_printf)
1573 	 */
1574 	if (lock_try(&apic_error_lock) == 0)
1575 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
1576 	if (error) {
1577 #if	DEBUG
1578 		if (apic_debug)
1579 			debug_enter("pcplusmp: APIC Error interrupt received");
1580 #endif /* DEBUG */
1581 		if (apic_panic_on_apic_error)
1582 			cmn_err(CE_PANIC,
1583 			    "APIC Error interrupt on CPU %d. Status = %x\n",
1584 			    psm_get_cpu_id(), error);
1585 		else {
1586 			if ((error & ~APIC_CS_ERRORS) == 0) {
1587 				/* cksum error only */
1588 				apic_error |= APIC_ERR_APIC_ERROR;
1589 				apic_apic_error |= error;
1590 				apic_num_apic_errors++;
1591 				apic_num_cksum_errors++;
1592 			} else {
1593 				/*
1594 				 * prom_printf is the best shot we have of
1595 				 * something which is problem free from
1596 				 * high level/NMI type of interrupts
1597 				 */
1598 				prom_printf("APIC Error interrupt on CPU %d. "
1599 				    "Status 0 = %x, Status 1 = %x\n",
1600 				    psm_get_cpu_id(), error0, error1);
1601 				apic_error |= APIC_ERR_APIC_ERROR;
1602 				apic_apic_error |= error;
1603 				apic_num_apic_errors++;
1604 				for (i = 0; i < apic_error_display_delay; i++) {
1605 					tenmicrosec();
1606 				}
1607 				/*
1608 				 * provide more delay next time limited to
1609 				 * roughly 1 clock tick time
1610 				 */
1611 				if (apic_error_display_delay < 500)
1612 					apic_error_display_delay *= 2;
1613 			}
1614 		}
1615 		lock_clear(&apic_error_lock);
1616 		return (DDI_INTR_CLAIMED);
1617 	} else {
1618 		lock_clear(&apic_error_lock);
1619 		return (DDI_INTR_UNCLAIMED);
1620 	}
1621 	/* NOTREACHED */
1622 }
1623 
1624 /*
1625  * Turn off the mask bit in the performance counter Local Vector Table entry.
1626  */
1627 static void
1628 apic_cpcovf_mask_clear(void)
1629 {
1630 	apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK;
1631 }
1632 
1633 static void
1634 apic_init_intr()
1635 {
1636 	processorid_t	cpun = psm_get_cpu_id();
1637 
1638 #if defined(__amd64)
1639 	setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT));
1640 #else
1641 	apicadr[APIC_TASK_REG] = APIC_MASK_ALL;
1642 #endif
1643 
1644 	if (apic_flat_model)
1645 		apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL;
1646 	else
1647 		apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL;
1648 	apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun;
1649 
1650 	/* need to enable APIC before unmasking NMI */
1651 	apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR;
1652 
1653 	apicadr[APIC_LOCAL_TIMER] = AV_MASK;
1654 	apicadr[APIC_INT_VECT0]	= AV_MASK;	/* local intr reg 0 */
1655 	apicadr[APIC_INT_VECT1] = AV_NMI;	/* enable NMI */
1656 
1657 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS)
1658 		return;
1659 
1660 	/* Enable performance counter overflow interrupt */
1661 
1662 	if ((x86_feature & X86_MSR) != X86_MSR)
1663 		apic_enable_cpcovf_intr = 0;
1664 	if (apic_enable_cpcovf_intr) {
1665 		if (apic_cpcovf_vect == 0) {
1666 			int ipl = APIC_PCINT_IPL;
1667 			int irq = apic_get_ipivect(ipl, -1);
1668 
1669 			ASSERT(irq != -1);
1670 			apic_cpcovf_vect = apic_irq_table[irq]->airq_vector;
1671 			ASSERT(apic_cpcovf_vect);
1672 			(void) add_avintr(NULL, ipl,
1673 			    (avfunc)kcpc_hw_overflow_intr,
1674 			    "apic pcint", irq, NULL, NULL, NULL, NULL);
1675 			kcpc_hw_overflow_intr_installed = 1;
1676 			kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear;
1677 		}
1678 		apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect;
1679 	}
1680 
1681 	/* Enable error interrupt */
1682 
1683 	if (apic_enable_error_intr) {
1684 		if (apic_errvect == 0) {
1685 			int ipl = 0xf;	/* get highest priority intr */
1686 			int irq = apic_get_ipivect(ipl, -1);
1687 
1688 			ASSERT(irq != -1);
1689 			apic_errvect = apic_irq_table[irq]->airq_vector;
1690 			ASSERT(apic_errvect);
1691 			/*
1692 			 * Not PSMI compliant, but we are going to merge
1693 			 * with ON anyway
1694 			 */
1695 			(void) add_avintr((void *)NULL, ipl,
1696 			    (avfunc)apic_error_intr, "apic error intr",
1697 			    irq, NULL, NULL, NULL, NULL);
1698 		}
1699 		apicadr[APIC_ERR_VECT] = apic_errvect;
1700 		apicadr[APIC_ERROR_STATUS] = 0;
1701 		apicadr[APIC_ERROR_STATUS] = 0;
1702 	}
1703 }
1704 
1705 static void
1706 apic_disable_local_apic()
1707 {
1708 	apicadr[APIC_TASK_REG] = APIC_MASK_ALL;
1709 	apicadr[APIC_LOCAL_TIMER] = AV_MASK;
1710 	apicadr[APIC_INT_VECT0] = AV_MASK;	/* local intr reg 0 */
1711 	apicadr[APIC_INT_VECT1] = AV_MASK;	/* disable NMI */
1712 	apicadr[APIC_ERR_VECT] = AV_MASK;	/* and error interrupt */
1713 	apicadr[APIC_PCINT_VECT] = AV_MASK;	/* and perf counter intr */
1714 	apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR;
1715 }
1716 
1717 static void
1718 apic_picinit(void)
1719 {
1720 	int i, j;
1721 	uint_t isr;
1722 	volatile int32_t *ioapic;
1723 	apic_irq_t	*irqptr;
1724 	struct intrspec ispec;
1725 
1726 	/*
1727 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
1728 	 * bit on without clearing it with EOI.  Since softint
1729 	 * uses vector 0x20 to interrupt itself, so softint will
1730 	 * not work on this machine.  In order to fix this problem
1731 	 * a check is made to verify all the isr bits are clear.
1732 	 * If not, EOIs are issued to clear the bits.
1733 	 */
1734 	for (i = 7; i >= 1; i--) {
1735 		if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0)
1736 			for (j = 0; ((j < 32) && (isr != 0)); j++)
1737 				if (isr & (1 << j)) {
1738 					apicadr[APIC_EOI_REG] = 0;
1739 					isr &= ~(1 << j);
1740 					apic_error |= APIC_ERR_BOOT_EOI;
1741 				}
1742 	}
1743 
1744 	/* set a flag so we know we have run apic_picinit() */
1745 	apic_flag = 1;
1746 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
1747 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
1748 	LOCK_INIT_CLEAR(&apic_revector_lock);
1749 	LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock);
1750 	LOCK_INIT_CLEAR(&apic_error_lock);
1751 
1752 	picsetup();	 /* initialise the 8259 */
1753 
1754 	/* add nmi handler - least priority nmi handler */
1755 	LOCK_INIT_CLEAR(&apic_nmi_lock);
1756 
1757 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
1758 	    "pcplusmp NMI handler", (caddr_t)NULL))
1759 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
1760 
1761 	apic_init_intr();
1762 
1763 	/* enable apic mode if imcr present */
1764 	if (apic_imcrp) {
1765 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1766 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
1767 	}
1768 
1769 	/* mask interrupt vectors					*/
1770 	for (j = 0; j < apic_io_max; j++) {
1771 		int intin_max;
1772 		ioapic = apicioadr[j];
1773 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
1774 		/* Bits 23-16 define the maximum redirection entries */
1775 		intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff;
1776 		for (i = 0; i < intin_max; i++) {
1777 			ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i;
1778 			ioapic[APIC_IO_DATA] = AV_MASK;
1779 		}
1780 	}
1781 
1782 	/*
1783 	 * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1784 	 */
1785 	if (apic_sci_vect > 0) {
1786 		/*
1787 		 * acpica has already done add_avintr(); we just
1788 		 * to finish the job by mimicing translate_irq()
1789 		 *
1790 		 * Fake up an intrspec and setup the tables
1791 		 */
1792 		ispec.intrspec_vec = apic_sci_vect;
1793 		ispec.intrspec_pri = SCI_IPL;
1794 
1795 		if (apic_setup_irq_table(NULL, apic_sci_vect, NULL,
1796 		    &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) {
1797 			cmn_err(CE_WARN, "!apic: SCI setup failed");
1798 			return;
1799 		}
1800 		irqptr = apic_irq_table[apic_sci_vect];
1801 
1802 		/* Program I/O APIC */
1803 		(void) apic_setup_io_intr(irqptr, apic_sci_vect);
1804 
1805 		irqptr->airq_share++;
1806 	}
1807 }
1808 
1809 
1810 static void
1811 apic_cpu_start(processorid_t cpun, caddr_t rm_code)
1812 {
1813 	int		loop_count;
1814 	uint32_t	vector;
1815 	uint_t		cpu_id, iflag;
1816 
1817 	cpu_id = apic_cpus[cpun].aci_local_id;
1818 
1819 	apic_cmos_ssb_set = 1;
1820 
1821 	/*
1822 	 * Interrupts on BSP cpu will be disabled during these startup
1823 	 * steps in order to avoid unwanted side effects from
1824 	 * executing interrupt handlers on a problematic BIOS.
1825 	 */
1826 
1827 	iflag = intr_clear();
1828 	outb(CMOS_ADDR, SSB);
1829 	outb(CMOS_DATA, BIOS_SHUTDOWN);
1830 
1831 	while (get_apic_cmd1() & AV_PENDING)
1832 		apic_ret();
1833 
1834 	/* for integrated - make sure there is one INIT IPI in buffer */
1835 	/* for external - it will wake up the cpu */
1836 	apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1837 	apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET;
1838 
1839 	/* If only 1 CPU is installed, PENDING bit will not go low */
1840 	for (loop_count = 0x1000; loop_count; loop_count--)
1841 		if (get_apic_cmd1() & AV_PENDING)
1842 			apic_ret();
1843 		else
1844 			break;
1845 
1846 	apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1847 	apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET;
1848 
1849 	drv_usecwait(20000);		/* 20 milli sec */
1850 
1851 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
1852 		/* integrated apic */
1853 
1854 		rm_code = (caddr_t)(uintptr_t)rm_platter_pa;
1855 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
1856 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
1857 
1858 		/* to offset the INIT IPI queue up in the buffer */
1859 		apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1860 		apicadr[APIC_INT_CMD1] = vector | AV_STARTUP;
1861 
1862 		drv_usecwait(200);		/* 20 micro sec */
1863 
1864 		apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1865 		apicadr[APIC_INT_CMD1] = vector | AV_STARTUP;
1866 
1867 		drv_usecwait(200);		/* 20 micro sec */
1868 	}
1869 	intr_restore(iflag);
1870 }
1871 
1872 
1873 #ifdef	DEBUG
1874 int	apic_break_on_cpu = 9;
1875 int	apic_stretch_interrupts = 0;
1876 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
1877 
1878 void
1879 apic_break()
1880 {
1881 }
1882 #endif /* DEBUG */
1883 
1884 /*
1885  * platform_intr_enter
1886  *
1887  *	Called at the beginning of the interrupt service routine to
1888  *	mask all level equal to and below the interrupt priority
1889  *	of the interrupting vector.  An EOI should be given to
1890  *	the interrupt controller to enable other HW interrupts.
1891  *
1892  *	Return -1 for spurious interrupts
1893  *
1894  */
1895 /*ARGSUSED*/
1896 static int
1897 apic_intr_enter(int ipl, int *vectorp)
1898 {
1899 	uchar_t vector;
1900 	int nipl;
1901 	int irq, iflag;
1902 	apic_cpus_info_t *cpu_infop;
1903 
1904 	/*
1905 	 * The real vector programmed in APIC is *vectorp + 0x20
1906 	 * But, cmnint code subtracts 0x20 before pushing it.
1907 	 * Hence APIC_BASE_VECT is 0x20.
1908 	 */
1909 
1910 	vector = (uchar_t)*vectorp;
1911 
1912 	/* if interrupted by the clock, increment apic_nsec_since_boot */
1913 	if (vector == apic_clkvect) {
1914 		if (!apic_oneshot) {
1915 			/* NOTE: this is not MT aware */
1916 			apic_hrtime_stamp++;
1917 			apic_nsec_since_boot += apic_nsec_per_intr;
1918 			apic_hrtime_stamp++;
1919 			last_count_read = apic_hertz_count;
1920 			apic_redistribute_compute();
1921 		}
1922 
1923 		/* We will avoid all the book keeping overhead for clock */
1924 		nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT];
1925 #if defined(__amd64)
1926 		setcr8((ulong_t)apic_cr8pri[nipl]);
1927 #else
1928 		apicadr[APIC_TASK_REG] = apic_ipltopri[nipl];
1929 #endif
1930 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
1931 		apicadr[APIC_EOI_REG] = 0;
1932 		return (nipl);
1933 	}
1934 
1935 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
1936 
1937 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
1938 		cpu_infop->aci_spur_cnt++;
1939 		return (APIC_INT_SPURIOUS);
1940 	}
1941 
1942 	/* Check if the vector we got is really what we need */
1943 	if (apic_revector_pending) {
1944 		/*
1945 		 * Disable interrupts for the duration of
1946 		 * the vector translation to prevent a self-race for
1947 		 * the apic_revector_lock.  This cannot be done
1948 		 * in apic_xlate_vector because it is recursive and
1949 		 * we want the vector translation to be atomic with
1950 		 * respect to other (higher-priority) interrupts.
1951 		 */
1952 		iflag = intr_clear();
1953 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
1954 		    APIC_BASE_VECT;
1955 		intr_restore(iflag);
1956 	}
1957 
1958 	nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT];
1959 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
1960 
1961 #if defined(__amd64)
1962 	setcr8((ulong_t)apic_cr8pri[nipl]);
1963 #else
1964 	apicadr[APIC_TASK_REG] = apic_ipltopri[nipl];
1965 #endif
1966 
1967 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
1968 	cpu_infop->aci_curipl = (uchar_t)nipl;
1969 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
1970 
1971 	/*
1972 	 * apic_level_intr could have been assimilated into the irq struct.
1973 	 * but, having it as a character array is more efficient in terms of
1974 	 * cache usage. So, we leave it as is.
1975 	 */
1976 	if (!apic_level_intr[irq])
1977 		apicadr[APIC_EOI_REG] = 0;
1978 
1979 #ifdef	DEBUG
1980 	APIC_DEBUG_BUF_PUT(vector);
1981 	APIC_DEBUG_BUF_PUT(irq);
1982 	APIC_DEBUG_BUF_PUT(nipl);
1983 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
1984 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
1985 		drv_usecwait(apic_stretch_interrupts);
1986 
1987 	if (apic_break_on_cpu == psm_get_cpu_id())
1988 		apic_break();
1989 #endif /* DEBUG */
1990 	return (nipl);
1991 }
1992 
1993 static void
1994 apic_intr_exit(int prev_ipl, int irq)
1995 {
1996 	apic_cpus_info_t *cpu_infop;
1997 
1998 #if defined(__amd64)
1999 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
2000 #else
2001 	apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
2002 #endif
2003 
2004 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
2005 	if (apic_level_intr[irq])
2006 		apicadr[APIC_EOI_REG] = 0;
2007 
2008 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
2009 	/* ISR above current pri could not be in progress */
2010 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
2011 }
2012 
2013 /*
2014  * Mask all interrupts below or equal to the given IPL
2015  */
2016 static void
2017 apic_setspl(int ipl)
2018 {
2019 
2020 #if defined(__amd64)
2021 	setcr8((ulong_t)apic_cr8pri[ipl]);
2022 #else
2023 	apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
2024 #endif
2025 
2026 	/* interrupts at ipl above this cannot be in progress */
2027 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
2028 	/*
2029 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
2030 	 * have enough time to come in before the priority is raised again
2031 	 * during the idle() loop.
2032 	 */
2033 	if (apic_setspl_delay)
2034 		(void) get_apic_pri();
2035 }
2036 
2037 /*
2038  * trigger a software interrupt at the given IPL
2039  */
2040 static void
2041 apic_set_softintr(int ipl)
2042 {
2043 	int vector;
2044 	uint_t flag;
2045 
2046 	vector = apic_resv_vector[ipl];
2047 
2048 	flag = intr_clear();
2049 
2050 	while (get_apic_cmd1() & AV_PENDING)
2051 		apic_ret();
2052 
2053 	/* generate interrupt at vector on itself only */
2054 	apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector;
2055 
2056 	intr_restore(flag);
2057 }
2058 
2059 /*
2060  * generates an interprocessor interrupt to another CPU
2061  */
2062 static void
2063 apic_send_ipi(int cpun, int ipl)
2064 {
2065 	int vector;
2066 	uint_t flag;
2067 
2068 	vector = apic_resv_vector[ipl];
2069 
2070 	flag = intr_clear();
2071 
2072 	while (get_apic_cmd1() & AV_PENDING)
2073 		apic_ret();
2074 
2075 	apicadr[APIC_INT_CMD2] =
2076 	    apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET;
2077 	apicadr[APIC_INT_CMD1] = vector;
2078 
2079 	intr_restore(flag);
2080 }
2081 
2082 
2083 /*ARGSUSED*/
2084 static void
2085 apic_set_idlecpu(processorid_t cpun)
2086 {
2087 }
2088 
2089 /*ARGSUSED*/
2090 static void
2091 apic_unset_idlecpu(processorid_t cpun)
2092 {
2093 }
2094 
2095 
2096 static void
2097 apic_ret()
2098 {
2099 }
2100 
2101 static int
2102 get_apic_cmd1()
2103 {
2104 	return (apicadr[APIC_INT_CMD1]);
2105 }
2106 
2107 static int
2108 get_apic_pri()
2109 {
2110 #if defined(__amd64)
2111 	return ((int)getcr8());
2112 #else
2113 	return (apicadr[APIC_TASK_REG]);
2114 #endif
2115 }
2116 
2117 /*
2118  * If apic_coarse_time == 1, then apic_gettime() is used instead of
2119  * apic_gethrtime().  This is used for performance instead of accuracy.
2120  */
2121 
2122 static hrtime_t
2123 apic_gettime()
2124 {
2125 	int old_hrtime_stamp;
2126 	hrtime_t temp;
2127 
2128 	/*
2129 	 * In one-shot mode, we do not keep time, so if anyone
2130 	 * calls psm_gettime() directly, we vector over to
2131 	 * gethrtime().
2132 	 * one-shot mode MUST NOT be enabled if this psm is the source of
2133 	 * hrtime.
2134 	 */
2135 
2136 	if (apic_oneshot)
2137 		return (gethrtime());
2138 
2139 
2140 gettime_again:
2141 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
2142 		apic_ret();
2143 
2144 	temp = apic_nsec_since_boot;
2145 
2146 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
2147 		goto gettime_again;
2148 	}
2149 	return (temp);
2150 }
2151 
2152 /*
2153  * Here we return the number of nanoseconds since booting.  Note every
2154  * clock interrupt increments apic_nsec_since_boot by the appropriate
2155  * amount.
2156  */
2157 static hrtime_t
2158 apic_gethrtime()
2159 {
2160 	int curr_timeval, countval, elapsed_ticks, oflags;
2161 	int old_hrtime_stamp, status;
2162 	hrtime_t temp;
2163 	uchar_t	cpun;
2164 
2165 
2166 	/*
2167 	 * In one-shot mode, we do not keep time, so if anyone
2168 	 * calls psm_gethrtime() directly, we vector over to
2169 	 * gethrtime().
2170 	 * one-shot mode MUST NOT be enabled if this psm is the source of
2171 	 * hrtime.
2172 	 */
2173 
2174 	if (apic_oneshot)
2175 		return (gethrtime());
2176 
2177 	oflags = intr_clear();	/* prevent migration */
2178 
2179 	cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET);
2180 
2181 	lock_set(&apic_gethrtime_lock);
2182 
2183 gethrtime_again:
2184 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
2185 		apic_ret();
2186 
2187 	/*
2188 	 * Check to see which CPU we are on.  Note the time is kept on
2189 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
2190 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
2191 	 */
2192 	if (cpun == apic_cpus[0].aci_local_id) {
2193 		countval = apicadr[APIC_CURR_COUNT];
2194 	} else {
2195 		while (get_apic_cmd1() & AV_PENDING)
2196 			apic_ret();
2197 
2198 		apicadr[APIC_INT_CMD2] =
2199 		    apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET;
2200 		apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE;
2201 
2202 		while ((status = get_apic_cmd1()) & AV_READ_PENDING)
2203 			apic_ret();
2204 
2205 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
2206 			countval = apicadr[APIC_REMOTE_READ];
2207 		else {	/* 0 = invalid */
2208 			apic_remote_hrterr++;
2209 			/*
2210 			 * return last hrtime right now, will need more
2211 			 * testing if change to retry
2212 			 */
2213 			temp = apic_last_hrtime;
2214 
2215 			lock_clear(&apic_gethrtime_lock);
2216 
2217 			intr_restore(oflags);
2218 
2219 			return (temp);
2220 		}
2221 	}
2222 	if (countval > last_count_read)
2223 		countval = 0;
2224 	else
2225 		last_count_read = countval;
2226 
2227 	elapsed_ticks = apic_hertz_count - countval;
2228 
2229 	curr_timeval = elapsed_ticks * apic_nsec_per_tick;
2230 	temp = apic_nsec_since_boot + curr_timeval;
2231 
2232 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
2233 		/* we might have clobbered last_count_read. Restore it */
2234 		last_count_read = apic_hertz_count;
2235 		goto gethrtime_again;
2236 	}
2237 
2238 	if (temp < apic_last_hrtime) {
2239 		/* return last hrtime if error occurs */
2240 		apic_hrtime_error++;
2241 		temp = apic_last_hrtime;
2242 	}
2243 	else
2244 		apic_last_hrtime = temp;
2245 
2246 	lock_clear(&apic_gethrtime_lock);
2247 	intr_restore(oflags);
2248 
2249 	return (temp);
2250 }
2251 
2252 /* apic NMI handler */
2253 /*ARGSUSED*/
2254 static void
2255 apic_nmi_intr(caddr_t arg)
2256 {
2257 	if (apic_shutdown_processors) {
2258 		apic_disable_local_apic();
2259 		return;
2260 	}
2261 
2262 	if (lock_try(&apic_nmi_lock)) {
2263 		if (apic_kmdb_on_nmi) {
2264 			if (psm_debugger() == 0) {
2265 				cmn_err(CE_PANIC,
2266 				    "NMI detected, kmdb is not available.");
2267 			} else {
2268 				debug_enter("\nNMI detected, entering kmdb.\n");
2269 			}
2270 		} else {
2271 			if (apic_panic_on_nmi) {
2272 				/* Keep panic from entering kmdb. */
2273 				nopanicdebug = 1;
2274 				cmn_err(CE_PANIC, "pcplusmp: NMI received");
2275 			} else {
2276 				/*
2277 				 * prom_printf is the best shot we have
2278 				 * of something which is problem free from
2279 				 * high level/NMI type of interrupts
2280 				 */
2281 				prom_printf("pcplusmp: NMI received\n");
2282 				apic_error |= APIC_ERR_NMI;
2283 				apic_num_nmis++;
2284 			}
2285 		}
2286 		lock_clear(&apic_nmi_lock);
2287 	}
2288 }
2289 
2290 /*
2291  * Add mask bits to disable interrupt vector from happening
2292  * at or above IPL. In addition, it should remove mask bits
2293  * to enable interrupt vectors below the given IPL.
2294  *
2295  * Both add and delspl are complicated by the fact that different interrupts
2296  * may share IRQs. This can happen in two ways.
2297  * 1. The same H/W line is shared by more than 1 device
2298  * 1a. with interrupts at different IPLs
2299  * 1b. with interrupts at same IPL
2300  * 2. We ran out of vectors at a given IPL and started sharing vectors.
2301  * 1b and 2 should be handled gracefully, except for the fact some ISRs
2302  * will get called often when no interrupt is pending for the device.
2303  * For 1a, we just hope that the machine blows up with the person who
2304  * set it up that way!. In the meantime, we handle it at the higher IPL.
2305  */
2306 /*ARGSUSED*/
2307 static int
2308 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
2309 {
2310 	uchar_t vector;
2311 	int iflag;
2312 	apic_irq_t *irqptr, *irqheadptr;
2313 	int irqindex;
2314 
2315 	ASSERT(max_ipl <= UCHAR_MAX);
2316 	irqindex = IRQINDEX(irqno);
2317 
2318 	if ((irqindex == -1) || (!apic_irq_table[irqindex]))
2319 		return (PSM_FAILURE);
2320 
2321 	irqptr = irqheadptr = apic_irq_table[irqindex];
2322 
2323 	DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x "
2324 	    "vector=0x%x\n", (void *)irqptr->airq_dip,
2325 	    irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector));
2326 
2327 	while (irqptr) {
2328 		if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno)
2329 			break;
2330 		irqptr = irqptr->airq_next;
2331 	}
2332 	irqptr->airq_share++;
2333 
2334 	/* return if it is not hardware interrupt */
2335 	if (irqptr->airq_mps_intr_index == RESERVE_INDEX)
2336 		return (PSM_SUCCESS);
2337 
2338 	/* Or if there are more interupts at a higher IPL */
2339 	if (ipl != max_ipl)
2340 		return (PSM_SUCCESS);
2341 
2342 	/*
2343 	 * if apic_picinit() has not been called yet, just return.
2344 	 * At the end of apic_picinit(), we will call setup_io_intr().
2345 	 */
2346 
2347 	if (!apic_flag)
2348 		return (PSM_SUCCESS);
2349 
2350 	iflag = intr_clear();
2351 
2352 	/*
2353 	 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate,
2354 	 * return failure. Not very elegant, but then we hope the
2355 	 * machine will blow up with ...
2356 	 */
2357 	if (irqptr->airq_ipl != max_ipl) {
2358 		vector = apic_allocate_vector(max_ipl, irqindex, 1);
2359 		if (vector == 0) {
2360 			intr_restore(iflag);
2361 			irqptr->airq_share--;
2362 			return (PSM_FAILURE);
2363 		}
2364 		irqptr = irqheadptr;
2365 		apic_mark_vector(irqptr->airq_vector, vector);
2366 		while (irqptr) {
2367 			irqptr->airq_vector = vector;
2368 			irqptr->airq_ipl = (uchar_t)max_ipl;
2369 			/*
2370 			 * reprogram irq being added and every one else
2371 			 * who is not in the UNINIT state
2372 			 */
2373 			if ((VIRTIRQ(irqindex, irqptr->airq_share_id) ==
2374 			    irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) {
2375 				apic_record_rdt_entry(irqptr, irqindex);
2376 				(void) apic_setup_io_intr(irqptr, irqindex);
2377 			}
2378 			irqptr = irqptr->airq_next;
2379 		}
2380 		intr_restore(iflag);
2381 		return (PSM_SUCCESS);
2382 	}
2383 
2384 	ASSERT(irqptr);
2385 	(void) apic_setup_io_intr(irqptr, irqindex);
2386 	intr_restore(iflag);
2387 	return (PSM_SUCCESS);
2388 }
2389 
2390 /*
2391  * Recompute mask bits for the given interrupt vector.
2392  * If there is no interrupt servicing routine for this
2393  * vector, this function should disable interrupt vector
2394  * from happening at all IPLs. If there are still
2395  * handlers using the given vector, this function should
2396  * disable the given vector from happening below the lowest
2397  * IPL of the remaining hadlers.
2398  */
2399 /*ARGSUSED*/
2400 static int
2401 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
2402 {
2403 	uchar_t vector, bind_cpu;
2404 	int	iflag, intin, irqindex;
2405 	volatile int32_t *ioapic;
2406 	apic_irq_t	*irqptr, *irqheadptr;
2407 
2408 	irqindex = IRQINDEX(irqno);
2409 	irqptr = irqheadptr = apic_irq_table[irqindex];
2410 
2411 	DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x "
2412 	    "vector=0x%x\n", (void *)irqptr->airq_dip,
2413 	    irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector));
2414 
2415 	while (irqptr) {
2416 		if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno)
2417 			break;
2418 		irqptr = irqptr->airq_next;
2419 	}
2420 	ASSERT(irqptr);
2421 
2422 	irqptr->airq_share--;
2423 
2424 	if (ipl < max_ipl)
2425 		return (PSM_SUCCESS);
2426 
2427 	/* return if it is not hardware interrupt */
2428 	if (irqptr->airq_mps_intr_index == RESERVE_INDEX)
2429 		return (PSM_SUCCESS);
2430 
2431 	if (!apic_flag) {
2432 		/*
2433 		 * Clear irq_struct. If two devices shared an intpt
2434 		 * line & 1 unloaded before picinit, we are hosed. But, then
2435 		 * we hope the machine will ...
2436 		 */
2437 		irqptr->airq_mps_intr_index = FREE_INDEX;
2438 		irqptr->airq_temp_cpu = IRQ_UNINIT;
2439 		apic_free_vector(irqptr->airq_vector);
2440 		return (PSM_SUCCESS);
2441 	}
2442 	/*
2443 	 * Downgrade vector to new max_ipl if needed.If we cannot allocate,
2444 	 * use old IPL. Not very elegant, but then we hope ...
2445 	 */
2446 	if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) {
2447 		apic_irq_t	*irqp;
2448 		if (vector = apic_allocate_vector(max_ipl, irqno, 1)) {
2449 			apic_mark_vector(irqheadptr->airq_vector, vector);
2450 			irqp = irqheadptr;
2451 			while (irqp) {
2452 				irqp->airq_vector = vector;
2453 				irqp->airq_ipl = (uchar_t)max_ipl;
2454 				if (irqp->airq_temp_cpu != IRQ_UNINIT) {
2455 					apic_record_rdt_entry(irqp, irqindex);
2456 					(void) apic_setup_io_intr(irqp,
2457 					    irqindex);
2458 				}
2459 				irqp = irqp->airq_next;
2460 			}
2461 		}
2462 	}
2463 
2464 	if (irqptr->airq_share)
2465 		return (PSM_SUCCESS);
2466 
2467 	ioapic = apicioadr[irqptr->airq_ioapicindex];
2468 	intin = irqptr->airq_intin_no;
2469 	iflag = intr_clear();
2470 	lock_set(&apic_ioapic_lock);
2471 	ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin;
2472 	ioapic[APIC_IO_DATA] = AV_MASK;
2473 
2474 	/* Disable the MSI/X vector */
2475 	if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) {
2476 		int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ?
2477 		    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX;
2478 
2479 		/*
2480 		 * Make sure we only disable on the last
2481 		 * of the multi-MSI support
2482 		 */
2483 		if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) {
2484 			(void) pci_msi_unconfigure(irqptr->airq_dip, type,
2485 			    irqptr->airq_ioapicindex);
2486 
2487 			(void) pci_msi_disable_mode(irqptr->airq_dip, type,
2488 			    irqptr->airq_ioapicindex);
2489 		}
2490 	}
2491 
2492 	if (max_ipl == PSM_INVALID_IPL) {
2493 		ASSERT(irqheadptr == irqptr);
2494 		bind_cpu = irqptr->airq_temp_cpu;
2495 		if (((uchar_t)bind_cpu != IRQ_UNBOUND) &&
2496 		    ((uchar_t)bind_cpu != IRQ_UNINIT)) {
2497 			ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
2498 			if (bind_cpu & IRQ_USER_BOUND) {
2499 				/* If hardbound, temp_cpu == cpu */
2500 				bind_cpu &= ~IRQ_USER_BOUND;
2501 				apic_cpus[bind_cpu].aci_bound--;
2502 			} else
2503 				apic_cpus[bind_cpu].aci_temp_bound--;
2504 		}
2505 		lock_clear(&apic_ioapic_lock);
2506 		intr_restore(iflag);
2507 		irqptr->airq_temp_cpu = IRQ_UNINIT;
2508 		irqptr->airq_mps_intr_index = FREE_INDEX;
2509 		apic_free_vector(irqptr->airq_vector);
2510 		return (PSM_SUCCESS);
2511 	}
2512 	lock_clear(&apic_ioapic_lock);
2513 	intr_restore(iflag);
2514 
2515 	mutex_enter(&airq_mutex);
2516 	if ((irqptr == apic_irq_table[irqindex])) {
2517 		apic_irq_t	*oldirqptr;
2518 		/* Move valid irq entry to the head */
2519 		irqheadptr = oldirqptr = irqptr;
2520 		irqptr = irqptr->airq_next;
2521 		ASSERT(irqptr);
2522 		while (irqptr) {
2523 			if (irqptr->airq_mps_intr_index != FREE_INDEX)
2524 				break;
2525 			oldirqptr = irqptr;
2526 			irqptr = irqptr->airq_next;
2527 		}
2528 		/* remove all invalid ones from the beginning */
2529 		apic_irq_table[irqindex] = irqptr;
2530 		/*
2531 		 * and link them back after the head. The invalid ones
2532 		 * begin with irqheadptr and end at oldirqptr
2533 		 */
2534 		oldirqptr->airq_next = irqptr->airq_next;
2535 		irqptr->airq_next = irqheadptr;
2536 	}
2537 	mutex_exit(&airq_mutex);
2538 
2539 	irqptr->airq_temp_cpu = IRQ_UNINIT;
2540 	irqptr->airq_mps_intr_index = FREE_INDEX;
2541 	return (PSM_SUCCESS);
2542 }
2543 
2544 /*
2545  * Return HW interrupt number corresponding to the given IPL
2546  */
2547 /*ARGSUSED*/
2548 static int
2549 apic_softlvl_to_irq(int ipl)
2550 {
2551 	/*
2552 	 * Do not use apic to trigger soft interrupt.
2553 	 * It will cause the system to hang when 2 hardware interrupts
2554 	 * at the same priority with the softint are already accepted
2555 	 * by the apic.  Cause the AV_PENDING bit will not be cleared
2556 	 * until one of the hardware interrupt is eoi'ed.  If we need
2557 	 * to send an ipi at this time, we will end up looping forever
2558 	 * to wait for the AV_PENDING bit to clear.
2559 	 */
2560 	return (PSM_SV_SOFTWARE);
2561 }
2562 
2563 static int
2564 apic_post_cpu_start()
2565 {
2566 	int i, cpun;
2567 	apic_irq_t *irq_ptr;
2568 
2569 	apic_init_intr();
2570 
2571 	/*
2572 	 * since some systems don't enable the internal cache on the non-boot
2573 	 * cpus, so we have to enable them here
2574 	 */
2575 	setcr0(getcr0() & ~(0x60000000));
2576 
2577 	while (get_apic_cmd1() & AV_PENDING)
2578 		apic_ret();
2579 
2580 	cpun = psm_get_cpu_id();
2581 	apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
2582 
2583 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
2584 		irq_ptr = apic_irq_table[i];
2585 		if ((irq_ptr == NULL) ||
2586 		    ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun))
2587 			continue;
2588 
2589 		while (irq_ptr) {
2590 			if (irq_ptr->airq_temp_cpu != IRQ_UNINIT)
2591 				(void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE);
2592 			irq_ptr = irq_ptr->airq_next;
2593 		}
2594 	}
2595 
2596 	return (PSM_SUCCESS);
2597 }
2598 
2599 processorid_t
2600 apic_get_next_processorid(processorid_t cpu_id)
2601 {
2602 
2603 	int i;
2604 
2605 	if (cpu_id == -1)
2606 		return ((processorid_t)0);
2607 
2608 	for (i = cpu_id + 1; i < NCPU; i++) {
2609 		if (apic_cpumask & (1 << i))
2610 			return (i);
2611 	}
2612 
2613 	return ((processorid_t)-1);
2614 }
2615 
2616 
2617 /*
2618  * type == -1 indicates it is an internal request. Do not change
2619  * resv_vector for these requests
2620  */
2621 static int
2622 apic_get_ipivect(int ipl, int type)
2623 {
2624 	uchar_t vector;
2625 	int irq;
2626 
2627 	if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) {
2628 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
2629 			apic_irq_table[irq]->airq_mps_intr_index =
2630 			    RESERVE_INDEX;
2631 			apic_irq_table[irq]->airq_vector = vector;
2632 			if (type != -1) {
2633 				apic_resv_vector[ipl] = vector;
2634 			}
2635 			return (irq);
2636 		}
2637 	}
2638 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
2639 	return (-1);	/* shouldn't happen */
2640 }
2641 
2642 static int
2643 apic_getclkirq(int ipl)
2644 {
2645 	int	irq;
2646 
2647 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
2648 		return (-1);
2649 	/*
2650 	 * Note the vector in apic_clkvect for per clock handling.
2651 	 */
2652 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
2653 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
2654 	    apic_clkvect));
2655 	return (irq);
2656 }
2657 
2658 /*
2659  * Return the number of APIC clock ticks elapsed for 8245 to decrement
2660  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
2661  */
2662 static uint_t
2663 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
2664 {
2665 	uint8_t		pit_tick_lo;
2666 	uint16_t	pit_tick, target_pit_tick;
2667 	uint32_t	start_apic_tick, end_apic_tick;
2668 	int		iflag;
2669 
2670 	addr += APIC_CURR_COUNT;
2671 
2672 	iflag = intr_clear();
2673 
2674 	do {
2675 		pit_tick_lo = inb(PITCTR0_PORT);
2676 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2677 	} while (pit_tick < APIC_TIME_MIN ||
2678 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
2679 
2680 	/*
2681 	 * Wait for the 8254 to decrement by 5 ticks to ensure
2682 	 * we didn't start in the middle of a tick.
2683 	 * Compare with 0x10 for the wrap around case.
2684 	 */
2685 	target_pit_tick = pit_tick - 5;
2686 	do {
2687 		pit_tick_lo = inb(PITCTR0_PORT);
2688 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2689 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
2690 
2691 	start_apic_tick = *addr;
2692 
2693 	/*
2694 	 * Wait for the 8254 to decrement by
2695 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
2696 	 */
2697 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
2698 	do {
2699 		pit_tick_lo = inb(PITCTR0_PORT);
2700 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2701 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
2702 
2703 	end_apic_tick = *addr;
2704 
2705 	*pit_ticks_adj = target_pit_tick - pit_tick;
2706 
2707 	intr_restore(iflag);
2708 
2709 	return (start_apic_tick - end_apic_tick);
2710 }
2711 
2712 /*
2713  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
2714  * frequency.  Note at this stage in the boot sequence, the boot processor
2715  * is the only active processor.
2716  * hertz value of 0 indicates a one-shot mode request.  In this case
2717  * the function returns the resolution (in nanoseconds) for the hardware
2718  * timer interrupt.  If one-shot mode capability is not available,
2719  * the return value will be 0. apic_enable_oneshot is a global switch
2720  * for disabling the functionality.
2721  * A non-zero positive value for hertz indicates a periodic mode request.
2722  * In this case the hardware will be programmed to generate clock interrupts
2723  * at hertz frequency and returns the resolution of interrupts in
2724  * nanosecond.
2725  */
2726 
2727 static int
2728 apic_clkinit(int hertz)
2729 {
2730 
2731 	uint_t		apic_ticks = 0;
2732 	uint_t		pit_time;
2733 	int		ret;
2734 	uint16_t	pit_ticks_adj;
2735 	static int	firsttime = 1;
2736 
2737 	if (firsttime) {
2738 		/* first time calibrate */
2739 
2740 		apicadr[APIC_DIVIDE_REG] = 0x0;
2741 		apicadr[APIC_INIT_COUNT] = APIC_MAXVAL;
2742 
2743 		/* set periodic interrupt based on CLKIN */
2744 		apicadr[APIC_LOCAL_TIMER] =
2745 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
2746 		tenmicrosec();
2747 
2748 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
2749 
2750 		apicadr[APIC_LOCAL_TIMER] =
2751 		    (apic_clkvect + APIC_BASE_VECT) | AV_MASK;
2752 		/*
2753 		 * pit time is the amount of real time (in nanoseconds ) it took
2754 		 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks
2755 		 */
2756 		pit_time = ((longlong_t)(APIC_TIME_COUNT +
2757 		    pit_ticks_adj) * NANOSEC) / PIT_HZ;
2758 
2759 		/*
2760 		 * Determine the number of nanoseconds per APIC clock tick
2761 		 * and then determine how many APIC ticks to interrupt at the
2762 		 * desired frequency
2763 		 */
2764 		apic_nsec_per_tick = pit_time / apic_ticks;
2765 		if (apic_nsec_per_tick == 0)
2766 			apic_nsec_per_tick = 1;
2767 
2768 		/* the interval timer initial count is 32 bit max */
2769 		apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL;
2770 		firsttime = 0;
2771 	}
2772 
2773 	if (hertz != 0) {
2774 		/* periodic */
2775 		apic_nsec_per_intr = NANOSEC / hertz;
2776 		apic_hertz_count = (longlong_t)apic_nsec_per_intr /
2777 		    apic_nsec_per_tick;
2778 		apic_sample_factor_redistribution = hertz + 1;
2779 	}
2780 
2781 	apic_int_busy_mark = (apic_int_busy_mark *
2782 	    apic_sample_factor_redistribution) / 100;
2783 	apic_int_free_mark = (apic_int_free_mark *
2784 	    apic_sample_factor_redistribution) / 100;
2785 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
2786 	    apic_sample_factor_redistribution) / 100;
2787 
2788 	if (hertz == 0) {
2789 		/* requested one_shot */
2790 		if (!apic_oneshot_enable)
2791 			return (0);
2792 		apic_oneshot = 1;
2793 		ret = (int)apic_nsec_per_tick;
2794 	} else {
2795 		/* program the local APIC to interrupt at the given frequency */
2796 		apicadr[APIC_INIT_COUNT] = apic_hertz_count;
2797 		apicadr[APIC_LOCAL_TIMER] =
2798 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
2799 		apic_oneshot = 0;
2800 		ret = NANOSEC / hertz;
2801 	}
2802 
2803 	return (ret);
2804 
2805 }
2806 
2807 /*
2808  * apic_preshutdown:
2809  * Called early in shutdown whilst we can still access filesystems to do
2810  * things like loading modules which will be required to complete shutdown
2811  * after filesystems are all unmounted.
2812  */
2813 static void
2814 apic_preshutdown(int cmd, int fcn)
2815 {
2816 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
2817 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
2818 
2819 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
2820 		return;
2821 	}
2822 }
2823 
2824 static void
2825 apic_shutdown(int cmd, int fcn)
2826 {
2827 	int iflag, restarts, attempts;
2828 	int i, j;
2829 	volatile int32_t *ioapic;
2830 	uchar_t	byte;
2831 
2832 	/* Send NMI to all CPUs except self to do per processor shutdown */
2833 	iflag = intr_clear();
2834 	while (get_apic_cmd1() & AV_PENDING)
2835 		apic_ret();
2836 	apic_shutdown_processors = 1;
2837 	apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF;
2838 
2839 	/* restore cmos shutdown byte before reboot */
2840 	if (apic_cmos_ssb_set) {
2841 		outb(CMOS_ADDR, SSB);
2842 		outb(CMOS_DATA, 0);
2843 	}
2844 	/* Disable the I/O APIC redirection entries */
2845 	for (j = 0; j < apic_io_max; j++) {
2846 		int intin_max;
2847 		ioapic = apicioadr[j];
2848 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
2849 		/* Bits 23-16 define the maximum redirection entries */
2850 		intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff;
2851 		for (i = 0; i < intin_max; i++) {
2852 			ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i;
2853 			ioapic[APIC_IO_DATA] = AV_MASK;
2854 		}
2855 	}
2856 
2857 	/*	disable apic mode if imcr present	*/
2858 	if (apic_imcrp) {
2859 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
2860 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
2861 	}
2862 
2863 	apic_disable_local_apic();
2864 
2865 	intr_restore(iflag);
2866 
2867 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
2868 		return;
2869 	}
2870 
2871 	switch (apic_poweroff_method) {
2872 		case APIC_POWEROFF_VIA_RTC:
2873 
2874 			/* select the extended NVRAM bank in the RTC */
2875 			outb(CMOS_ADDR, RTC_REGA);
2876 			byte = inb(CMOS_DATA);
2877 			outb(CMOS_DATA, (byte | EXT_BANK));
2878 
2879 			outb(CMOS_ADDR, PFR_REG);
2880 
2881 			/* for Predator must toggle the PAB bit */
2882 			byte = inb(CMOS_DATA);
2883 
2884 			/*
2885 			 * clear power active bar, wakeup alarm and
2886 			 * kickstart
2887 			 */
2888 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
2889 			outb(CMOS_DATA, byte);
2890 
2891 			/* delay before next write */
2892 			drv_usecwait(1000);
2893 
2894 			/* for S40 the following would suffice */
2895 			byte = inb(CMOS_DATA);
2896 
2897 			/* power active bar control bit */
2898 			byte |= PAB_CBIT;
2899 			outb(CMOS_DATA, byte);
2900 
2901 			break;
2902 
2903 		case APIC_POWEROFF_VIA_ASPEN_BMC:
2904 			restarts = 0;
2905 restart_aspen_bmc:
2906 			if (++restarts == 3)
2907 				break;
2908 			attempts = 0;
2909 			do {
2910 				byte = inb(MISMIC_FLAG_REGISTER);
2911 				byte &= MISMIC_BUSY_MASK;
2912 				if (byte != 0) {
2913 					drv_usecwait(1000);
2914 					if (attempts >= 3)
2915 						goto restart_aspen_bmc;
2916 					++attempts;
2917 				}
2918 			} while (byte != 0);
2919 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
2920 			byte = inb(MISMIC_FLAG_REGISTER);
2921 			byte |= 0x1;
2922 			outb(MISMIC_FLAG_REGISTER, byte);
2923 			i = 0;
2924 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
2925 			    i++) {
2926 				attempts = 0;
2927 				do {
2928 					byte = inb(MISMIC_FLAG_REGISTER);
2929 					byte &= MISMIC_BUSY_MASK;
2930 					if (byte != 0) {
2931 						drv_usecwait(1000);
2932 						if (attempts >= 3)
2933 							goto restart_aspen_bmc;
2934 						++attempts;
2935 					}
2936 				} while (byte != 0);
2937 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
2938 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
2939 				byte = inb(MISMIC_FLAG_REGISTER);
2940 				byte |= 0x1;
2941 				outb(MISMIC_FLAG_REGISTER, byte);
2942 			}
2943 			break;
2944 
2945 		case APIC_POWEROFF_VIA_SITKA_BMC:
2946 			restarts = 0;
2947 restart_sitka_bmc:
2948 			if (++restarts == 3)
2949 				break;
2950 			attempts = 0;
2951 			do {
2952 				byte = inb(SMS_STATUS_REGISTER);
2953 				byte &= SMS_STATE_MASK;
2954 				if ((byte == SMS_READ_STATE) ||
2955 				    (byte == SMS_WRITE_STATE)) {
2956 					drv_usecwait(1000);
2957 					if (attempts >= 3)
2958 						goto restart_sitka_bmc;
2959 					++attempts;
2960 				}
2961 			} while ((byte == SMS_READ_STATE) ||
2962 			    (byte == SMS_WRITE_STATE));
2963 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
2964 			i = 0;
2965 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
2966 			    i++) {
2967 				attempts = 0;
2968 				do {
2969 					byte = inb(SMS_STATUS_REGISTER);
2970 					byte &= SMS_IBF_MASK;
2971 					if (byte != 0) {
2972 						drv_usecwait(1000);
2973 						if (attempts >= 3)
2974 							goto restart_sitka_bmc;
2975 						++attempts;
2976 					}
2977 				} while (byte != 0);
2978 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
2979 			}
2980 			break;
2981 
2982 		case APIC_POWEROFF_NONE:
2983 
2984 			/* If no APIC direct method, we will try using ACPI */
2985 			if (apic_enable_acpi) {
2986 				if (acpi_poweroff() == 1)
2987 					return;
2988 			} else
2989 				return;
2990 
2991 			break;
2992 	}
2993 	/*
2994 	 * Wait a limited time here for power to go off.
2995 	 * If the power does not go off, then there was a
2996 	 * problem and we should continue to the halt which
2997 	 * prints a message for the user to press a key to
2998 	 * reboot.
2999 	 */
3000 	drv_usecwait(7000000); /* wait seven seconds */
3001 
3002 }
3003 
3004 /*
3005  * Try and disable all interrupts. We just assign interrupts to other
3006  * processors based on policy. If any were bound by user request, we
3007  * let them continue and return failure. We do not bother to check
3008  * for cache affinity while rebinding.
3009  */
3010 
3011 static int
3012 apic_disable_intr(processorid_t cpun)
3013 {
3014 	int bind_cpu = 0, i, hardbound = 0, iflag;
3015 	apic_irq_t *irq_ptr;
3016 
3017 	iflag = intr_clear();
3018 	lock_set(&apic_ioapic_lock);
3019 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
3020 	lock_clear(&apic_ioapic_lock);
3021 	intr_restore(iflag);
3022 	apic_cpus[cpun].aci_curipl = 0;
3023 	i = apic_min_device_irq;
3024 	for (; i <= apic_max_device_irq; i++) {
3025 		/*
3026 		 * If there are bound interrupts on this cpu, then
3027 		 * rebind them to other processors.
3028 		 */
3029 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
3030 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
3031 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
3032 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
3033 			    apic_nproc));
3034 
3035 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
3036 				hardbound = 1;
3037 				continue;
3038 			}
3039 
3040 			if (irq_ptr->airq_temp_cpu == cpun) {
3041 				do {
3042 					apic_next_bind_cpu += 2;
3043 					bind_cpu = apic_next_bind_cpu / 2;
3044 					if (bind_cpu >= apic_nproc) {
3045 						apic_next_bind_cpu = 1;
3046 						bind_cpu = 0;
3047 
3048 					}
3049 				} while (apic_rebind_all(irq_ptr, bind_cpu, 1));
3050 			}
3051 		}
3052 	}
3053 	if (hardbound) {
3054 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
3055 		    "due to user bound interrupts", cpun);
3056 		return (PSM_FAILURE);
3057 	}
3058 	else
3059 		return (PSM_SUCCESS);
3060 }
3061 
3062 static void
3063 apic_enable_intr(processorid_t cpun)
3064 {
3065 	int	i, iflag;
3066 	apic_irq_t *irq_ptr;
3067 
3068 	iflag = intr_clear();
3069 	lock_set(&apic_ioapic_lock);
3070 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
3071 	lock_clear(&apic_ioapic_lock);
3072 	intr_restore(iflag);
3073 
3074 	i = apic_min_device_irq;
3075 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
3076 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
3077 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
3078 				(void) apic_rebind_all(irq_ptr,
3079 				    irq_ptr->airq_cpu, 1);
3080 			}
3081 		}
3082 	}
3083 }
3084 
3085 /*
3086  * apic_introp_xlate() replaces apic_translate_irq() and is
3087  * called only from apic_intr_ops().  With the new ADII framework,
3088  * the priority can no longer be retrived through i_ddi_get_intrspec().
3089  * It has to be passed in from the caller.
3090  */
3091 int
3092 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type)
3093 {
3094 	char dev_type[16];
3095 	int dev_len, pci_irq, newirq, bustype, devid, busid, i;
3096 	int irqno = ispec->intrspec_vec;
3097 	ddi_acc_handle_t cfg_handle;
3098 	uchar_t ipin;
3099 	struct apic_io_intr *intrp;
3100 	iflag_t intr_flag;
3101 	APIC_HEADER	*hp;
3102 	MADT_INTERRUPT_OVERRIDE	*isop;
3103 	apic_irq_t *airqp;
3104 
3105 	DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s "
3106 	    "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type,
3107 	    irqno));
3108 
3109 	if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
3110 		if ((airqp = apic_find_irq(dip, ispec, type)) != NULL)
3111 			return (apic_vector_to_irq[airqp->airq_vector]);
3112 		return (apic_setup_irq_table(dip, irqno, NULL, ispec,
3113 		    NULL, type));
3114 	}
3115 
3116 	bustype = 0;
3117 
3118 	/* check if we have already translated this irq */
3119 	mutex_enter(&airq_mutex);
3120 	newirq = apic_min_device_irq;
3121 	for (; newirq <= apic_max_device_irq; newirq++) {
3122 		airqp = apic_irq_table[newirq];
3123 		while (airqp) {
3124 			if ((airqp->airq_dip == dip) &&
3125 			    (airqp->airq_origirq == irqno) &&
3126 			    (airqp->airq_mps_intr_index != FREE_INDEX)) {
3127 
3128 				mutex_exit(&airq_mutex);
3129 				return (VIRTIRQ(newirq, airqp->airq_share_id));
3130 			}
3131 			airqp = airqp->airq_next;
3132 		}
3133 	}
3134 	mutex_exit(&airq_mutex);
3135 
3136 	if (apic_defconf)
3137 		goto defconf;
3138 
3139 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi))
3140 		goto nonpci;
3141 
3142 	dev_len = sizeof (dev_type);
3143 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
3144 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
3145 	    &dev_len) != DDI_PROP_SUCCESS) {
3146 		goto nonpci;
3147 	}
3148 
3149 	if ((strcmp(dev_type, "pci") == 0) ||
3150 	    (strcmp(dev_type, "pciex") == 0)) {
3151 		/* pci device */
3152 		if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
3153 			goto nonpci;
3154 		if (busid == 0 && apic_pci_bus_total == 1)
3155 			busid = (int)apic_single_pci_busid;
3156 
3157 		if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
3158 			goto nonpci;
3159 		ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
3160 		pci_config_teardown(&cfg_handle);
3161 		if (apic_enable_acpi && !apic_use_acpi_madt_only) {
3162 			if (apic_acpi_translate_pci_irq(dip, busid, devid,
3163 			    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
3164 				goto nonpci;
3165 
3166 			intr_flag.bustype = BUS_PCI;
3167 			if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL,
3168 			    ispec, &intr_flag, type)) == -1)
3169 				goto nonpci;
3170 			return (newirq);
3171 		} else {
3172 			pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
3173 			if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid))
3174 			    == NULL) {
3175 				if ((pci_irq = apic_handle_pci_pci_bridge(dip,
3176 				    devid, ipin, &intrp)) == -1)
3177 					goto nonpci;
3178 			}
3179 			if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp,
3180 			    ispec, NULL, type)) == -1)
3181 				goto nonpci;
3182 			return (newirq);
3183 		}
3184 	} else if (strcmp(dev_type, "isa") == 0)
3185 		bustype = BUS_ISA;
3186 	else if (strcmp(dev_type, "eisa") == 0)
3187 		bustype = BUS_EISA;
3188 
3189 nonpci:
3190 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {
3191 		/* search iso entries first */
3192 		if (acpi_iso_cnt != 0) {
3193 			hp = (APIC_HEADER *)acpi_isop;
3194 			i = 0;
3195 			while (i < acpi_iso_cnt) {
3196 				if (hp->Type == APIC_XRUPT_OVERRIDE) {
3197 					isop = (MADT_INTERRUPT_OVERRIDE *)hp;
3198 					if (isop->Bus == 0 &&
3199 					    isop->Source == irqno) {
3200 						newirq = isop->Interrupt;
3201 						intr_flag.intr_po =
3202 						    isop->Polarity;
3203 						intr_flag.intr_el =
3204 						    isop->TriggerMode;
3205 						intr_flag.bustype = BUS_ISA;
3206 
3207 						return (apic_setup_irq_table(
3208 						    dip, newirq, NULL, ispec,
3209 						    &intr_flag, type));
3210 
3211 					}
3212 					i++;
3213 				}
3214 				hp = (APIC_HEADER *)(((char *)hp) +
3215 				    hp->Length);
3216 			}
3217 		}
3218 		intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
3219 		intr_flag.intr_el = INTR_EL_EDGE;
3220 		intr_flag.bustype = BUS_ISA;
3221 		return (apic_setup_irq_table(dip, irqno, NULL, ispec,
3222 		    &intr_flag, type));
3223 	} else {
3224 		if (bustype == 0)
3225 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
3226 		for (i = 0; i < 2; i++) {
3227 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
3228 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
3229 			    != NULL)) {
3230 				if ((newirq = apic_setup_irq_table(dip, irqno,
3231 				    intrp, ispec, NULL, type)) != -1) {
3232 					return (newirq);
3233 				}
3234 				goto defconf;
3235 			}
3236 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
3237 		}
3238 	}
3239 
3240 /* MPS default configuration */
3241 defconf:
3242 	newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type);
3243 	if (newirq == -1)
3244 		return (newirq);
3245 	ASSERT(IRQINDEX(newirq) == irqno);
3246 	ASSERT(apic_irq_table[irqno]);
3247 	return (newirq);
3248 }
3249 
3250 
3251 
3252 
3253 
3254 
3255 /*
3256  * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge
3257  * needs special handling.  We may need to chase up the device tree,
3258  * using the PCI-PCI Bridge specification's "rotating IPIN assumptions",
3259  * to find the IPIN at the root bus that relates to the IPIN on the
3260  * subsidiary bus (for ACPI or MP).  We may, however, have an entry
3261  * in the MP table or the ACPI namespace for this device itself.
3262  * We handle both cases in the search below.
3263  */
3264 /* this is the non-acpi version */
3265 static int
3266 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin,
3267 			struct apic_io_intr **intrp)
3268 {
3269 	dev_info_t *dipp, *dip;
3270 	int pci_irq;
3271 	ddi_acc_handle_t cfg_handle;
3272 	int bridge_devno, bridge_bus;
3273 	int ipin;
3274 
3275 	dip = idip;
3276 
3277 	/*CONSTCOND*/
3278 	while (1) {
3279 		if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL)
3280 			return (-1);
3281 		if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) &&
3282 		    (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) ==
3283 		    PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle,
3284 		    PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) {
3285 			pci_config_teardown(&cfg_handle);
3286 			if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno,
3287 			    NULL) != 0)
3288 				return (-1);
3289 			/*
3290 			 * This is the rotating scheme that Compaq is using
3291 			 * and documented in the pci to pci spec.  Also, if
3292 			 * the pci to pci bridge is behind another pci to
3293 			 * pci bridge, then it need to keep transversing
3294 			 * up until an interrupt entry is found or reach
3295 			 * the top of the tree
3296 			 */
3297 			ipin = (child_devno + child_ipin) % PCI_INTD;
3298 				if (bridge_bus == 0 && apic_pci_bus_total == 1)
3299 					bridge_bus = (int)apic_single_pci_busid;
3300 				pci_irq = ((bridge_devno & 0x1f) << 2) |
3301 				    (ipin & 0x3);
3302 				if ((*intrp = apic_find_io_intr_w_busid(pci_irq,
3303 				    bridge_bus)) != NULL) {
3304 					return (pci_irq);
3305 				}
3306 			dip = dipp;
3307 			child_devno = bridge_devno;
3308 			child_ipin = ipin;
3309 		} else
3310 			return (-1);
3311 	}
3312 	/*LINTED: function will not fall off the bottom */
3313 }
3314 
3315 
3316 
3317 
3318 static uchar_t
3319 acpi_find_ioapic(int irq)
3320 {
3321 	int i;
3322 
3323 	for (i = 0; i < apic_io_max; i++) {
3324 		if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i])
3325 			return (i);
3326 	}
3327 	return (0xFF);	/* shouldn't happen */
3328 }
3329 
3330 /*
3331  * See if two irqs are compatible for sharing a vector.
3332  * Currently we only support sharing of PCI devices.
3333  */
3334 static int
3335 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2)
3336 {
3337 	uint_t	level1, po1;
3338 	uint_t	level2, po2;
3339 
3340 	/* Assume active high by default */
3341 	po1 = 0;
3342 	po2 = 0;
3343 
3344 	if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI)
3345 		return (0);
3346 
3347 	if (iflag1.intr_el == INTR_EL_CONFORM)
3348 		level1 = AV_LEVEL;
3349 	else
3350 		level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0;
3351 
3352 	if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) ||
3353 	    (iflag1.intr_po == INTR_PO_CONFORM)))
3354 		po1 = AV_ACTIVE_LOW;
3355 
3356 	if (iflag2.intr_el == INTR_EL_CONFORM)
3357 		level2 = AV_LEVEL;
3358 	else
3359 		level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0;
3360 
3361 	if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) ||
3362 	    (iflag2.intr_po == INTR_PO_CONFORM)))
3363 		po2 = AV_ACTIVE_LOW;
3364 
3365 	if ((level1 == level2) && (po1 == po2))
3366 		return (1);
3367 
3368 	return (0);
3369 }
3370 
3371 /*
3372  * Attempt to share vector with someone else
3373  */
3374 static int
3375 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl,
3376 	uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp)
3377 {
3378 #ifdef DEBUG
3379 	apic_irq_t *tmpirqp = NULL;
3380 #endif /* DEBUG */
3381 	apic_irq_t *irqptr, dummyirq;
3382 	int	newirq, chosen_irq = -1, share = 127;
3383 	int	lowest, highest, i;
3384 	uchar_t	share_id;
3385 
3386 	DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x "
3387 	    "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl));
3388 
3389 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
3390 	lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL;
3391 
3392 	if (highest < lowest) /* Both ipl and ipl-1 map to same pri */
3393 		lowest -= APIC_VECTOR_PER_IPL;
3394 	dummyirq.airq_mps_intr_index = intr_index;
3395 	dummyirq.airq_ioapicindex = ioapicindex;
3396 	dummyirq.airq_intin_no = ipin;
3397 	if (intr_flagp)
3398 		dummyirq.airq_iflag = *intr_flagp;
3399 	apic_record_rdt_entry(&dummyirq, irqno);
3400 	for (i = lowest; i <= highest; i++) {
3401 		newirq = apic_vector_to_irq[i];
3402 		if (newirq == APIC_RESV_IRQ)
3403 			continue;
3404 		irqptr = apic_irq_table[newirq];
3405 
3406 		if ((dummyirq.airq_rdt_entry & 0xFF00) !=
3407 		    (irqptr->airq_rdt_entry & 0xFF00))
3408 			/* not compatible */
3409 			continue;
3410 
3411 		if (irqptr->airq_share < share) {
3412 			share = irqptr->airq_share;
3413 			chosen_irq = newirq;
3414 		}
3415 	}
3416 	if (chosen_irq != -1) {
3417 		/*
3418 		 * Assign a share id which is free or which is larger
3419 		 * than the largest one.
3420 		 */
3421 		share_id = 1;
3422 		mutex_enter(&airq_mutex);
3423 		irqptr = apic_irq_table[chosen_irq];
3424 		while (irqptr) {
3425 			if (irqptr->airq_mps_intr_index == FREE_INDEX) {
3426 				share_id = irqptr->airq_share_id;
3427 				break;
3428 			}
3429 			if (share_id <= irqptr->airq_share_id)
3430 				share_id = irqptr->airq_share_id + 1;
3431 #ifdef DEBUG
3432 			tmpirqp = irqptr;
3433 #endif /* DEBUG */
3434 			irqptr = irqptr->airq_next;
3435 		}
3436 		if (!irqptr) {
3437 			irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3438 			irqptr->airq_temp_cpu = IRQ_UNINIT;
3439 			irqptr->airq_next =
3440 			    apic_irq_table[chosen_irq]->airq_next;
3441 			apic_irq_table[chosen_irq]->airq_next = irqptr;
3442 #ifdef	DEBUG
3443 			tmpirqp = apic_irq_table[chosen_irq];
3444 #endif /* DEBUG */
3445 		}
3446 		irqptr->airq_mps_intr_index = intr_index;
3447 		irqptr->airq_ioapicindex = ioapicindex;
3448 		irqptr->airq_intin_no = ipin;
3449 		if (intr_flagp)
3450 			irqptr->airq_iflag = *intr_flagp;
3451 		irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector;
3452 		irqptr->airq_share_id = share_id;
3453 		apic_record_rdt_entry(irqptr, irqno);
3454 		*irqptrp = irqptr;
3455 #ifdef	DEBUG
3456 		/* shuffle the pointers to test apic_delspl path */
3457 		if (tmpirqp) {
3458 			tmpirqp->airq_next = irqptr->airq_next;
3459 			irqptr->airq_next = apic_irq_table[chosen_irq];
3460 			apic_irq_table[chosen_irq] = irqptr;
3461 		}
3462 #endif /* DEBUG */
3463 		mutex_exit(&airq_mutex);
3464 		return (VIRTIRQ(chosen_irq, share_id));
3465 	}
3466 	return (-1);
3467 }
3468 
3469 /*
3470  *
3471  */
3472 static int
3473 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp,
3474     struct intrspec *ispec, iflag_t *intr_flagp, int type)
3475 {
3476 	int origirq = ispec->intrspec_vec;
3477 	uchar_t ipl = ispec->intrspec_pri;
3478 	int	newirq, intr_index;
3479 	uchar_t	ipin, ioapic, ioapicindex, vector;
3480 	apic_irq_t *irqptr;
3481 	major_t	major;
3482 	dev_info_t	*sdip;
3483 
3484 	DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d "
3485 	    "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq));
3486 
3487 	ASSERT(ispec != NULL);
3488 
3489 	major =  (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
3490 
3491 	if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
3492 		/* MSI/X doesn't need to setup ioapic stuffs */
3493 		ioapicindex = 0xff;
3494 		ioapic = 0xff;
3495 		ipin = (uchar_t)0xff;
3496 		intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX :
3497 		    MSIX_INDEX;
3498 		mutex_enter(&airq_mutex);
3499 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) {
3500 			mutex_exit(&airq_mutex);
3501 			/* need an irq for MSI/X to index into autovect[] */
3502 			cmn_err(CE_WARN, "No interrupt irq: %s instance %d",
3503 			    ddi_get_name(dip), ddi_get_instance(dip));
3504 			return (-1);
3505 		}
3506 		mutex_exit(&airq_mutex);
3507 
3508 	} else if (intrp != NULL) {
3509 		intr_index = (int)(intrp - apic_io_intrp);
3510 		ioapic = intrp->intr_destid;
3511 		ipin = intrp->intr_destintin;
3512 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
3513 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
3514 			if (apic_io_id[ioapicindex] == ioapic)
3515 				break;
3516 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
3517 		    (ioapic == INTR_ALL_APIC));
3518 
3519 		/* check whether this intin# has been used by another irqno */
3520 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) {
3521 			return (newirq);
3522 		}
3523 
3524 	} else if (intr_flagp != NULL) {
3525 		/* ACPI case */
3526 		intr_index = ACPI_INDEX;
3527 		ioapicindex = acpi_find_ioapic(irqno);
3528 		ASSERT(ioapicindex != 0xFF);
3529 		ioapic = apic_io_id[ioapicindex];
3530 		ipin = irqno - apic_io_vectbase[ioapicindex];
3531 		if (apic_irq_table[irqno] &&
3532 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
3533 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
3534 			    apic_irq_table[irqno]->airq_ioapicindex ==
3535 			    ioapicindex);
3536 			return (irqno);
3537 		}
3538 
3539 	} else {
3540 		/* default configuration */
3541 		ioapicindex = 0;
3542 		ioapic = apic_io_id[ioapicindex];
3543 		ipin = (uchar_t)irqno;
3544 		intr_index = DEFAULT_INDEX;
3545 	}
3546 
3547 	if (ispec == NULL) {
3548 		APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n",
3549 		    irqno));
3550 	} else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) {
3551 		if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index,
3552 		    ipl, ioapicindex, ipin, &irqptr)) != -1) {
3553 			irqptr->airq_ipl = ipl;
3554 			irqptr->airq_origirq = (uchar_t)origirq;
3555 			irqptr->airq_dip = dip;
3556 			irqptr->airq_major = major;
3557 			sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip;
3558 			/* This is OK to do really */
3559 			if (sdip == NULL) {
3560 				cmn_err(CE_WARN, "Sharing vectors: %s"
3561 				    " instance %d and SCI",
3562 				    ddi_get_name(dip), ddi_get_instance(dip));
3563 			} else {
3564 				cmn_err(CE_WARN, "Sharing vectors: %s"
3565 				    " instance %d and %s instance %d",
3566 				    ddi_get_name(sdip), ddi_get_instance(sdip),
3567 				    ddi_get_name(dip), ddi_get_instance(dip));
3568 			}
3569 			return (newirq);
3570 		}
3571 		/* try high priority allocation now  that share has failed */
3572 		if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) {
3573 			cmn_err(CE_WARN, "No interrupt vector: %s instance %d",
3574 			    ddi_get_name(dip), ddi_get_instance(dip));
3575 			return (-1);
3576 		}
3577 	}
3578 
3579 	mutex_enter(&airq_mutex);
3580 	if (apic_irq_table[irqno] == NULL) {
3581 		irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3582 		irqptr->airq_temp_cpu = IRQ_UNINIT;
3583 		apic_irq_table[irqno] = irqptr;
3584 	} else {
3585 		irqptr = apic_irq_table[irqno];
3586 		if (irqptr->airq_mps_intr_index != FREE_INDEX) {
3587 			/*
3588 			 * The slot is used by another irqno, so allocate
3589 			 * a free irqno for this interrupt
3590 			 */
3591 			newirq = apic_allocate_irq(apic_first_avail_irq);
3592 			if (newirq == -1) {
3593 				mutex_exit(&airq_mutex);
3594 				return (-1);
3595 			}
3596 			irqno = newirq;
3597 			irqptr = apic_irq_table[irqno];
3598 			if (irqptr == NULL) {
3599 				irqptr = kmem_zalloc(sizeof (apic_irq_t),
3600 				    KM_SLEEP);
3601 				irqptr->airq_temp_cpu = IRQ_UNINIT;
3602 				apic_irq_table[irqno] = irqptr;
3603 			}
3604 			apic_modify_vector(vector, newirq);
3605 		}
3606 	}
3607 	apic_max_device_irq = max(irqno, apic_max_device_irq);
3608 	apic_min_device_irq = min(irqno, apic_min_device_irq);
3609 	mutex_exit(&airq_mutex);
3610 	irqptr->airq_ioapicindex = ioapicindex;
3611 	irqptr->airq_intin_no = ipin;
3612 	irqptr->airq_ipl = ipl;
3613 	irqptr->airq_vector = vector;
3614 	irqptr->airq_origirq = (uchar_t)origirq;
3615 	irqptr->airq_share_id = 0;
3616 	irqptr->airq_mps_intr_index = (short)intr_index;
3617 	irqptr->airq_dip = dip;
3618 	irqptr->airq_major = major;
3619 	irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin);
3620 	if (intr_flagp)
3621 		irqptr->airq_iflag = *intr_flagp;
3622 
3623 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
3624 		/* setup I/O APIC entry for non-MSI/X interrupts */
3625 		apic_record_rdt_entry(irqptr, irqno);
3626 	}
3627 	return (irqno);
3628 }
3629 
3630 /*
3631  * return the cpu to which this intr should be bound.
3632  * Check properties or any other mechanism to see if user wants it
3633  * bound to a specific CPU. If so, return the cpu id with high bit set.
3634  * If not, use the policy to choose a cpu and return the id.
3635  */
3636 uchar_t
3637 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
3638 {
3639 	int	instance, instno, prop_len, bind_cpu, count;
3640 	uint_t	i, rc;
3641 	uchar_t	cpu;
3642 	major_t	major;
3643 	char	*name, *drv_name, *prop_val, *cptr;
3644 	char	prop_name[32];
3645 
3646 
3647 	if (apic_intr_policy == INTR_LOWEST_PRIORITY)
3648 		return (IRQ_UNBOUND);
3649 
3650 	drv_name = NULL;
3651 	rc = DDI_PROP_NOT_FOUND;
3652 	major = (major_t)-1;
3653 	if (dip != NULL) {
3654 		name = ddi_get_name(dip);
3655 		major = ddi_name_to_major(name);
3656 		drv_name = ddi_major_to_name(major);
3657 		instance = ddi_get_instance(dip);
3658 		if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
3659 			i = apic_min_device_irq;
3660 			for (; i <= apic_max_device_irq; i++) {
3661 
3662 				if ((i == irq) || (apic_irq_table[i] == NULL) ||
3663 				    (apic_irq_table[i]->airq_mps_intr_index
3664 				    == FREE_INDEX))
3665 					continue;
3666 
3667 				if ((apic_irq_table[i]->airq_major == major) &&
3668 				    (!(apic_irq_table[i]->airq_cpu &
3669 				    IRQ_USER_BOUND))) {
3670 
3671 					cpu = apic_irq_table[i]->airq_cpu;
3672 
3673 					cmn_err(CE_CONT,
3674 					    "!pcplusmp: %s (%s) instance #%d "
3675 					    "vector 0x%x ioapic 0x%x "
3676 					    "intin 0x%x is bound to cpu %d\n",
3677 					    name, drv_name, instance, irq,
3678 					    ioapicid, intin, cpu);
3679 					return (cpu);
3680 				}
3681 			}
3682 		}
3683 		/*
3684 		 * search for "drvname"_intpt_bind_cpus property first, the
3685 		 * syntax of the property should be "a[,b,c,...]" where
3686 		 * instance 0 binds to cpu a, instance 1 binds to cpu b,
3687 		 * instance 3 binds to cpu c...
3688 		 * ddi_getlongprop() will search /option first, then /
3689 		 * if "drvname"_intpt_bind_cpus doesn't exist, then find
3690 		 * intpt_bind_cpus property.  The syntax is the same, and
3691 		 * it applies to all the devices if its "drvname" specific
3692 		 * property doesn't exist
3693 		 */
3694 		(void) strcpy(prop_name, drv_name);
3695 		(void) strcat(prop_name, "_intpt_bind_cpus");
3696 		rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
3697 		    (caddr_t)&prop_val, &prop_len);
3698 		if (rc != DDI_PROP_SUCCESS) {
3699 			rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
3700 			    "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
3701 		}
3702 	}
3703 	if (rc == DDI_PROP_SUCCESS) {
3704 		for (i = count = 0; i < (prop_len - 1); i++)
3705 			if (prop_val[i] == ',')
3706 				count++;
3707 		if (prop_val[i-1] != ',')
3708 			count++;
3709 		/*
3710 		 * if somehow the binding instances defined in the
3711 		 * property are not enough for this instno., then
3712 		 * reuse the pattern for the next instance until
3713 		 * it reaches the requested instno
3714 		 */
3715 		instno = instance % count;
3716 		i = 0;
3717 		cptr = prop_val;
3718 		while (i < instno)
3719 			if (*cptr++ == ',')
3720 				i++;
3721 		bind_cpu = stoi(&cptr);
3722 		kmem_free(prop_val, prop_len);
3723 		/* if specific cpu is bogus, then default to cpu 0 */
3724 		if (bind_cpu >= apic_nproc) {
3725 			cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present",
3726 			    prop_name, prop_val, bind_cpu);
3727 			bind_cpu = 0;
3728 		} else {
3729 			/* indicate that we are bound at user request */
3730 			bind_cpu |= IRQ_USER_BOUND;
3731 		}
3732 		/*
3733 		 * no need to check apic_cpus[].aci_status, if specific cpu is
3734 		 * not up, then post_cpu_start will handle it.
3735 		 */
3736 	} else {
3737 		bind_cpu = apic_next_bind_cpu++;
3738 		if (bind_cpu >= apic_nproc) {
3739 			apic_next_bind_cpu = 1;
3740 			bind_cpu = 0;
3741 		}
3742 	}
3743 	if (drv_name != NULL)
3744 		cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d "
3745 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
3746 		    name, drv_name, instance,
3747 		    irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND);
3748 	else
3749 		cmn_err(CE_CONT, "!pcplusmp: "
3750 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
3751 		    irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND);
3752 
3753 	return ((uchar_t)bind_cpu);
3754 }
3755 
3756 static struct apic_io_intr *
3757 apic_find_io_intr_w_busid(int irqno, int busid)
3758 {
3759 	struct	apic_io_intr	*intrp;
3760 
3761 	/*
3762 	 * It can have more than 1 entry with same source bus IRQ,
3763 	 * but unique with the source bus id
3764 	 */
3765 	intrp = apic_io_intrp;
3766 	if (intrp != NULL) {
3767 		while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
3768 			if (intrp->intr_irq == irqno &&
3769 			    intrp->intr_busid == busid &&
3770 			    intrp->intr_type == IO_INTR_INT)
3771 				return (intrp);
3772 			intrp++;
3773 		}
3774 	}
3775 	APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:"
3776 	    "busid %x:%x\n", irqno, busid));
3777 	return ((struct apic_io_intr *)NULL);
3778 }
3779 
3780 
3781 struct mps_bus_info {
3782 	char	*bus_name;
3783 	int	bus_id;
3784 } bus_info_array[] = {
3785 	"ISA ", BUS_ISA,
3786 	"PCI ", BUS_PCI,
3787 	"EISA ", BUS_EISA,
3788 	"XPRESS", BUS_XPRESS,
3789 	"PCMCIA", BUS_PCMCIA,
3790 	"VL ", BUS_VL,
3791 	"CBUS ", BUS_CBUS,
3792 	"CBUSII", BUS_CBUSII,
3793 	"FUTURE", BUS_FUTURE,
3794 	"INTERN", BUS_INTERN,
3795 	"MBI ", BUS_MBI,
3796 	"MBII ", BUS_MBII,
3797 	"MPI ", BUS_MPI,
3798 	"MPSA ", BUS_MPSA,
3799 	"NUBUS ", BUS_NUBUS,
3800 	"TC ", BUS_TC,
3801 	"VME ", BUS_VME
3802 };
3803 
3804 static int
3805 apic_find_bus_type(char *bus)
3806 {
3807 	int	i = 0;
3808 
3809 	for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++)
3810 		if (strncmp(bus, bus_info_array[i].bus_name,
3811 		    strlen(bus_info_array[i].bus_name)) == 0)
3812 			return (bus_info_array[i].bus_id);
3813 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus));
3814 	return (0);
3815 }
3816 
3817 static int
3818 apic_find_bus(int busid)
3819 {
3820 	struct	apic_bus	*busp;
3821 
3822 	busp = apic_busp;
3823 	while (busp->bus_entry == APIC_BUS_ENTRY) {
3824 		if (busp->bus_id == busid)
3825 			return (apic_find_bus_type((char *)&busp->bus_str1));
3826 		busp++;
3827 	}
3828 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid));
3829 	return (0);
3830 }
3831 
3832 static int
3833 apic_find_bus_id(int bustype)
3834 {
3835 	struct	apic_bus	*busp;
3836 
3837 	busp = apic_busp;
3838 	while (busp->bus_entry == APIC_BUS_ENTRY) {
3839 		if (apic_find_bus_type((char *)&busp->bus_str1) == bustype)
3840 			return (busp->bus_id);
3841 		busp++;
3842 	}
3843 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x",
3844 	    bustype));
3845 	return (-1);
3846 }
3847 
3848 /*
3849  * Check if a particular irq need to be reserved for any io_intr
3850  */
3851 static struct apic_io_intr *
3852 apic_find_io_intr(int irqno)
3853 {
3854 	struct	apic_io_intr	*intrp;
3855 
3856 	intrp = apic_io_intrp;
3857 	if (intrp != NULL) {
3858 		while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
3859 			if (intrp->intr_irq == irqno &&
3860 			    intrp->intr_type == IO_INTR_INT)
3861 				return (intrp);
3862 			intrp++;
3863 		}
3864 	}
3865 	return ((struct apic_io_intr *)NULL);
3866 }
3867 
3868 /*
3869  * Check if the given ioapicindex intin combination has already been assigned
3870  * an irq. If so return irqno. Else -1
3871  */
3872 static int
3873 apic_find_intin(uchar_t ioapic, uchar_t intin)
3874 {
3875 	apic_irq_t *irqptr;
3876 	int	i;
3877 
3878 	/* find ioapic and intin in the apic_irq_table[] and return the index */
3879 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
3880 		irqptr = apic_irq_table[i];
3881 		while (irqptr) {
3882 			if ((irqptr->airq_mps_intr_index >= 0) &&
3883 			    (irqptr->airq_intin_no == intin) &&
3884 			    (irqptr->airq_ioapicindex == ioapic)) {
3885 				APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq "
3886 				    "entry for ioapic:intin %x:%x "
3887 				    "shared interrupts ?", ioapic, intin));
3888 				return (i);
3889 			}
3890 			irqptr = irqptr->airq_next;
3891 		}
3892 	}
3893 	return (-1);
3894 }
3895 
3896 int
3897 apic_allocate_irq(int irq)
3898 {
3899 	int	freeirq, i;
3900 
3901 	if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1)
3902 		if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ,
3903 		    (irq - 1))) == -1) {
3904 			/*
3905 			 * if BIOS really defines every single irq in the mps
3906 			 * table, then don't worry about conflicting with
3907 			 * them, just use any free slot in apic_irq_table
3908 			 */
3909 			for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
3910 				if ((apic_irq_table[i] == NULL) ||
3911 				    apic_irq_table[i]->airq_mps_intr_index ==
3912 				    FREE_INDEX) {
3913 				freeirq = i;
3914 				break;
3915 			}
3916 		}
3917 		if (freeirq == -1) {
3918 			/* This shouldn't happen, but just in case */
3919 			cmn_err(CE_WARN, "pcplusmp: NO available IRQ");
3920 			return (-1);
3921 		}
3922 	}
3923 	if (apic_irq_table[freeirq] == NULL) {
3924 		apic_irq_table[freeirq] =
3925 		    kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
3926 		if (apic_irq_table[freeirq] == NULL) {
3927 			cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ");
3928 			return (-1);
3929 		}
3930 		apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX;
3931 	}
3932 	return (freeirq);
3933 }
3934 
3935 static int
3936 apic_find_free_irq(int start, int end)
3937 {
3938 	int	i;
3939 
3940 	for (i = start; i <= end; i++)
3941 		/* Check if any I/O entry needs this IRQ */
3942 		if (apic_find_io_intr(i) == NULL) {
3943 			/* Then see if it is free */
3944 			if ((apic_irq_table[i] == NULL) ||
3945 			    (apic_irq_table[i]->airq_mps_intr_index ==
3946 			    FREE_INDEX)) {
3947 				return (i);
3948 			}
3949 		}
3950 	return (-1);
3951 }
3952 
3953 /*
3954  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
3955  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
3956  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
3957  * requests and allocated only when pri is set.
3958  */
3959 static uchar_t
3960 apic_allocate_vector(int ipl, int irq, int pri)
3961 {
3962 	int	lowest, highest, i;
3963 
3964 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
3965 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
3966 
3967 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
3968 		lowest -= APIC_VECTOR_PER_IPL;
3969 
3970 #ifdef	DEBUG
3971 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
3972 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
3973 #endif /* DEBUG */
3974 	if (pri == 0)
3975 		highest -= APIC_HI_PRI_VECTS;
3976 
3977 	for (i = lowest; i < highest; i++) {
3978 		if ((i == T_FASTTRAP) || (i == APIC_SPUR_INTR) ||
3979 			(i == T_SYSCALLINT) || (i == T_DTRACE_PROBE) ||
3980 			(i == T_DTRACE_RET))
3981 			continue;
3982 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
3983 			apic_vector_to_irq[i] = (uchar_t)irq;
3984 			return (i);
3985 		}
3986 	}
3987 
3988 	return (0);
3989 }
3990 
3991 static void
3992 apic_modify_vector(uchar_t vector, int irq)
3993 {
3994 	apic_vector_to_irq[vector] = (uchar_t)irq;
3995 }
3996 
3997 /*
3998  * Mark vector as being in the process of being deleted. Interrupts
3999  * may still come in on some CPU. The moment an interrupt comes with
4000  * the new vector, we know we can free the old one. Called only from
4001  * addspl and delspl with interrupts disabled. Because an interrupt
4002  * can be shared, but no interrupt from either device may come in,
4003  * we also use a timeout mechanism, which we arbitrarily set to
4004  * apic_revector_timeout microseconds.
4005  */
4006 static void
4007 apic_mark_vector(uchar_t oldvector, uchar_t newvector)
4008 {
4009 	int iflag = intr_clear();
4010 	lock_set(&apic_revector_lock);
4011 	if (!apic_oldvec_to_newvec) {
4012 		apic_oldvec_to_newvec =
4013 		    kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2,
4014 		    KM_NOSLEEP);
4015 
4016 		if (!apic_oldvec_to_newvec) {
4017 			/*
4018 			 * This failure is not catastrophic.
4019 			 * But, the oldvec will never be freed.
4020 			 */
4021 			apic_error |= APIC_ERR_MARK_VECTOR_FAIL;
4022 			lock_clear(&apic_revector_lock);
4023 			intr_restore(iflag);
4024 			return;
4025 		}
4026 		apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR];
4027 	}
4028 
4029 	/* See if we already did this for drivers which do double addintrs */
4030 	if (apic_oldvec_to_newvec[oldvector] != newvector) {
4031 		apic_oldvec_to_newvec[oldvector] = newvector;
4032 		apic_newvec_to_oldvec[newvector] = oldvector;
4033 		apic_revector_pending++;
4034 	}
4035 	lock_clear(&apic_revector_lock);
4036 	intr_restore(iflag);
4037 	(void) timeout(apic_xlate_vector_free_timeout_handler,
4038 	    (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout));
4039 }
4040 
4041 /*
4042  * xlate_vector is called from intr_enter if revector_pending is set.
4043  * It will xlate it if needed and mark the old vector as free.
4044  */
4045 static uchar_t
4046 apic_xlate_vector(uchar_t vector)
4047 {
4048 	uchar_t	newvector, oldvector = 0;
4049 
4050 	lock_set(&apic_revector_lock);
4051 	/* Do we really need to do this ? */
4052 	if (!apic_revector_pending) {
4053 		lock_clear(&apic_revector_lock);
4054 		return (vector);
4055 	}
4056 	if ((newvector = apic_oldvec_to_newvec[vector]) != 0)
4057 		oldvector = vector;
4058 	else {
4059 		/*
4060 		 * The incoming vector is new . See if a stale entry is
4061 		 * remaining
4062 		 */
4063 		if ((oldvector = apic_newvec_to_oldvec[vector]) != 0)
4064 			newvector = vector;
4065 	}
4066 
4067 	if (oldvector) {
4068 		apic_revector_pending--;
4069 		apic_oldvec_to_newvec[oldvector] = 0;
4070 		apic_newvec_to_oldvec[newvector] = 0;
4071 		apic_free_vector(oldvector);
4072 		lock_clear(&apic_revector_lock);
4073 		/* There could have been more than one reprogramming! */
4074 		return (apic_xlate_vector(newvector));
4075 	}
4076 	lock_clear(&apic_revector_lock);
4077 	return (vector);
4078 }
4079 
4080 void
4081 apic_xlate_vector_free_timeout_handler(void *arg)
4082 {
4083 	int iflag;
4084 	uchar_t oldvector, newvector;
4085 
4086 	oldvector = (uchar_t)(uintptr_t)arg;
4087 	iflag = intr_clear();
4088 	lock_set(&apic_revector_lock);
4089 	if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) {
4090 		apic_free_vector(oldvector);
4091 		apic_oldvec_to_newvec[oldvector] = 0;
4092 		apic_newvec_to_oldvec[newvector] = 0;
4093 		apic_revector_pending--;
4094 	}
4095 
4096 	lock_clear(&apic_revector_lock);
4097 	intr_restore(iflag);
4098 }
4099 
4100 
4101 /* Mark vector as not being used by any irq */
4102 static void
4103 apic_free_vector(uchar_t vector)
4104 {
4105 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
4106 }
4107 
4108 /*
4109  * compute the polarity, trigger mode and vector for programming into
4110  * the I/O apic and record in airq_rdt_entry.
4111  */
4112 static void
4113 apic_record_rdt_entry(apic_irq_t *irqptr, int irq)
4114 {
4115 	int	ioapicindex, bus_type, vector;
4116 	short	intr_index;
4117 	uint_t	level, po, io_po;
4118 	struct apic_io_intr *iointrp;
4119 
4120 	intr_index = irqptr->airq_mps_intr_index;
4121 	DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d "
4122 	    "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq,
4123 	    (void *)irqptr->airq_dip, irqptr->airq_vector));
4124 
4125 	if (intr_index == RESERVE_INDEX) {
4126 		apic_error |= APIC_ERR_INVALID_INDEX;
4127 		return;
4128 	} else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) {
4129 		return;
4130 	}
4131 
4132 	vector = irqptr->airq_vector;
4133 	ioapicindex = irqptr->airq_ioapicindex;
4134 	/* Assume edge triggered by default */
4135 	level = 0;
4136 	/* Assume active high by default */
4137 	po = 0;
4138 
4139 	if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) {
4140 		ASSERT(irq < 16);
4141 		if (eisa_level_intr_mask & (1 << irq))
4142 			level = AV_LEVEL;
4143 		if (intr_index == FREE_INDEX && apic_defconf == 0)
4144 			apic_error |= APIC_ERR_INVALID_INDEX;
4145 	} else if (intr_index == ACPI_INDEX) {
4146 		bus_type = irqptr->airq_iflag.bustype;
4147 		if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) {
4148 			if (bus_type == BUS_PCI)
4149 				level = AV_LEVEL;
4150 		} else
4151 			level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ?
4152 			    AV_LEVEL : 0;
4153 		if (level &&
4154 		    ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) ||
4155 		    (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM &&
4156 		    bus_type == BUS_PCI)))
4157 			po = AV_ACTIVE_LOW;
4158 	} else {
4159 		iointrp = apic_io_intrp + intr_index;
4160 		bus_type = apic_find_bus(iointrp->intr_busid);
4161 		if (iointrp->intr_el == INTR_EL_CONFORM) {
4162 			if ((irq < 16) && (eisa_level_intr_mask & (1 << irq)))
4163 				level = AV_LEVEL;
4164 			else if (bus_type == BUS_PCI)
4165 				level = AV_LEVEL;
4166 		} else
4167 			level = (iointrp->intr_el == INTR_EL_LEVEL) ?
4168 			    AV_LEVEL : 0;
4169 		if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) ||
4170 		    (iointrp->intr_po == INTR_PO_CONFORM &&
4171 		    bus_type == BUS_PCI)))
4172 			po = AV_ACTIVE_LOW;
4173 	}
4174 	if (level)
4175 		apic_level_intr[irq] = 1;
4176 	/*
4177 	 * The 82489DX External APIC cannot do active low polarity interrupts.
4178 	 */
4179 	if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX))
4180 		io_po = po;
4181 	else
4182 		io_po = 0;
4183 
4184 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG)
4185 		printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n",
4186 		    ioapicindex, irqptr->airq_intin_no, level, io_po, vector);
4187 
4188 	irqptr->airq_rdt_entry = level|io_po|vector;
4189 }
4190 
4191 /*
4192  * Call rebind to do the actual programming.
4193  */
4194 static int
4195 apic_setup_io_intr(apic_irq_t *irqptr, int irq)
4196 {
4197 	int rv;
4198 
4199 	if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1,
4200 	    IMMEDIATE))
4201 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
4202 		rv = apic_rebind(irqptr, 0, 1, IMMEDIATE);
4203 
4204 	return (rv);
4205 }
4206 
4207 /*
4208  * Deferred reprogramming: Call apic_rebind to do the real work.
4209  */
4210 static int
4211 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq)
4212 {
4213 	int rv;
4214 
4215 	if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1,
4216 	    DEFERRED))
4217 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
4218 		rv = apic_rebind(irqptr, 0, 1, DEFERRED);
4219 
4220 	return (rv);
4221 }
4222 
4223 /*
4224  * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock
4225  * if false (0) means lock is already held (e.g: in rebind_all).
4226  */
4227 static int
4228 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when)
4229 {
4230 	int			intin_no;
4231 	volatile int32_t	*ioapic;
4232 	uchar_t			airq_temp_cpu;
4233 	apic_cpus_info_t	*cpu_infop;
4234 	int			iflag;
4235 	int		which_irq = apic_vector_to_irq[irq_ptr->airq_vector];
4236 
4237 	intin_no = irq_ptr->airq_intin_no;
4238 	ioapic = apicioadr[irq_ptr->airq_ioapicindex];
4239 	airq_temp_cpu = irq_ptr->airq_temp_cpu;
4240 	if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) {
4241 		if (airq_temp_cpu & IRQ_USER_BOUND)
4242 			/* Mask off high bit so it can be used as array index */
4243 			airq_temp_cpu &= ~IRQ_USER_BOUND;
4244 
4245 		ASSERT(airq_temp_cpu < apic_nproc);
4246 	}
4247 
4248 	iflag = intr_clear();
4249 
4250 	if (acquire_lock)
4251 		lock_set(&apic_ioapic_lock);
4252 
4253 	/*
4254 	 * Can't bind to a CPU that's not online:
4255 	 */
4256 	cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND];
4257 	if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) {
4258 
4259 		if (acquire_lock)
4260 			lock_clear(&apic_ioapic_lock);
4261 
4262 		intr_restore(iflag);
4263 		return (1);
4264 	}
4265 
4266 	/*
4267 	 * If this is a deferred reprogramming attempt, ensure we have
4268 	 * not been passed stale data:
4269 	 */
4270 	if ((when == DEFERRED) &&
4271 	    (apic_reprogram_info[which_irq].valid == 0)) {
4272 		/* stale info, so just return */
4273 		if (acquire_lock)
4274 			lock_clear(&apic_ioapic_lock);
4275 
4276 		intr_restore(iflag);
4277 		return (0);
4278 	}
4279 
4280 	/*
4281 	 * If this interrupt has been delivered to a CPU and that CPU
4282 	 * has not handled it yet, we cannot reprogram the IOAPIC now:
4283 	 */
4284 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) &&
4285 	    apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu,
4286 	    ioapic, intin_no, which_irq) != 0) {
4287 
4288 		if (acquire_lock)
4289 			lock_clear(&apic_ioapic_lock);
4290 
4291 		intr_restore(iflag);
4292 		return (0);
4293 	}
4294 
4295 	/*
4296 	 * NOTE: We do not unmask the RDT here, as an interrupt MAY still
4297 	 * come in before we have a chance to reprogram it below.  The
4298 	 * reprogramming below will simultaneously change and unmask the
4299 	 * RDT entry.
4300 	 */
4301 
4302 	if ((uchar_t)bind_cpu == IRQ_UNBOUND) {
4303 		/* Write the RDT entry -- no specific CPU binding */
4304 		WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL);
4305 
4306 		if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND)
4307 			apic_cpus[airq_temp_cpu].aci_temp_bound--;
4308 
4309 		/* Write the vector, trigger, and polarity portion of the RDT */
4310 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4311 		    AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry);
4312 		if (acquire_lock)
4313 			lock_clear(&apic_ioapic_lock);
4314 		irq_ptr->airq_temp_cpu = IRQ_UNBOUND;
4315 		intr_restore(iflag);
4316 		return (0);
4317 	}
4318 
4319 	if (bind_cpu & IRQ_USER_BOUND) {
4320 		cpu_infop->aci_bound++;
4321 	} else {
4322 		cpu_infop->aci_temp_bound++;
4323 	}
4324 	ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
4325 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
4326 		/* Write the RDT entry -- bind to a specific CPU: */
4327 		WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no,
4328 		    cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET);
4329 	}
4330 	if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) {
4331 		apic_cpus[airq_temp_cpu].aci_temp_bound--;
4332 	}
4333 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
4334 		/* Write the vector, trigger, and polarity portion of the RDT */
4335 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4336 		    AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry);
4337 	} else {
4338 		int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ?
4339 		    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX;
4340 		(void) pci_msi_disable_mode(irq_ptr->airq_dip, type,
4341 		    irq_ptr->airq_ioapicindex);
4342 		if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) {
4343 			/* first one */
4344 			DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call "
4345 			    "apic_pci_msi_enable_vector\n"));
4346 			if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type,
4347 			    which_irq, irq_ptr->airq_vector,
4348 			    irq_ptr->airq_intin_no,
4349 			    cpu_infop->aci_local_id) != PSM_SUCCESS) {
4350 				cmn_err(CE_WARN, "pcplusmp: "
4351 					"apic_pci_msi_enable_vector "
4352 					"returned PSM_FAILURE");
4353 			}
4354 		}
4355 		if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) ==
4356 		    irq_ptr->airq_origirq) { /* last one */
4357 			DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call "
4358 			    "pci_msi_enable_mode\n"));
4359 			if (pci_msi_enable_mode(irq_ptr->airq_dip, type,
4360 			    which_irq) != DDI_SUCCESS) {
4361 				DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: "
4362 				    "pci_msi_enable failed\n"));
4363 				(void) pci_msi_unconfigure(irq_ptr->airq_dip,
4364 				(irq_ptr->airq_mps_intr_index == MSI_INDEX) ?
4365 				DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX,
4366 				which_irq);
4367 			}
4368 		}
4369 	}
4370 	if (acquire_lock)
4371 		lock_clear(&apic_ioapic_lock);
4372 	irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu;
4373 	apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND));
4374 	intr_restore(iflag);
4375 	return (0);
4376 }
4377 
4378 /*
4379  * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR
4380  * bit set.  Sets up a timeout to perform the reprogramming at a later time
4381  * if it cannot wait for the Remote IRR bit to clear (or if waiting did not
4382  * result in the bit's clearing).
4383  *
4384  * This function will mask the RDT entry if the Remote IRR bit is set.
4385  *
4386  * Returns non-zero if the caller should defer IOAPIC reprogramming.
4387  */
4388 static int
4389 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu,
4390 	int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq)
4391 {
4392 	int32_t			rdt_entry;
4393 	int			waited;
4394 
4395 	/* Mask the RDT entry, but only if it's a level-triggered interrupt */
4396 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no);
4397 	if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) {
4398 
4399 		/* Mask it */
4400 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4401 		    AV_MASK | rdt_entry);
4402 	}
4403 
4404 	/*
4405 	 * Wait for the delivery pending bit to clear.
4406 	 */
4407 	if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4408 	    (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) {
4409 
4410 		/*
4411 		 * If we're still waiting on the delivery of this interrupt,
4412 		 * continue to wait here until it is delivered (this should be
4413 		 * a very small amount of time, but include a timeout just in
4414 		 * case).
4415 		 */
4416 		for (waited = 0; waited < apic_max_usecs_clear_pending;
4417 		    waited += APIC_USECS_PER_WAIT_INTERVAL) {
4418 			if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no)
4419 			    & AV_PENDING) == 0) {
4420 				break;
4421 			}
4422 			drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL);
4423 		}
4424 
4425 		if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4426 		    AV_PENDING) != 0) {
4427 			cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not "
4428 			    "deliver interrupt to local APIC within "
4429 			    "%d usecs.", irq_ptr->airq_ioapicindex,
4430 			    irq_ptr->airq_intin_no,
4431 			    apic_max_usecs_clear_pending);
4432 		}
4433 	}
4434 
4435 	/*
4436 	 * If the remote IRR bit is set, then the interrupt has been sent
4437 	 * to a CPU for processing.  We have no choice but to wait for
4438 	 * that CPU to process the interrupt, at which point the remote IRR
4439 	 * bit will be cleared.
4440 	 */
4441 	if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4442 	    (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) {
4443 
4444 		/*
4445 		 * If the CPU that this RDT is bound to is NOT the current
4446 		 * CPU, wait until that CPU handles the interrupt and ACKs
4447 		 * it.  If this interrupt is not bound to any CPU (that is,
4448 		 * if it's bound to the logical destination of "anyone"), it
4449 		 * may have been delivered to the current CPU so handle that
4450 		 * case by deferring the reprogramming (below).
4451 		 */
4452 		kpreempt_disable();
4453 		if ((old_bind_cpu != IRQ_UNBOUND) &&
4454 		    (old_bind_cpu != IRQ_UNINIT) &&
4455 		    (old_bind_cpu != psm_get_cpu_id())) {
4456 			for (waited = 0; waited < apic_max_usecs_clear_pending;
4457 			    waited += APIC_USECS_PER_WAIT_INTERVAL) {
4458 				if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4459 				    intin_no) & AV_REMOTE_IRR) == 0) {
4460 
4461 					/* Clear the reprogramming state: */
4462 					lock_set(&apic_ioapic_reprogram_lock);
4463 
4464 					apic_reprogram_info[which_irq].valid
4465 					    = 0;
4466 					apic_reprogram_info[which_irq].bindcpu
4467 					    = 0;
4468 					apic_reprogram_info[which_irq].timeouts
4469 					    = 0;
4470 
4471 					lock_clear(&apic_ioapic_reprogram_lock);
4472 
4473 					/* Remote IRR has cleared! */
4474 					kpreempt_enable();
4475 					return (0);
4476 				}
4477 				drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL);
4478 			}
4479 		}
4480 		kpreempt_enable();
4481 
4482 		/*
4483 		 * If we waited and the Remote IRR bit is still not cleared,
4484 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
4485 		 * times for this interrupt, try the last-ditch workarounds:
4486 		 */
4487 		if (apic_reprogram_info[which_irq].timeouts >=
4488 		    APIC_REPROGRAM_MAX_TIMEOUTS) {
4489 
4490 			if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no)
4491 			    & AV_REMOTE_IRR) != 0) {
4492 				/*
4493 				 * Trying to clear the bit through normal
4494 				 * channels has failed.  So as a last-ditch
4495 				 * effort, try to set the trigger mode to
4496 				 * edge, then to level.  This has been
4497 				 * observed to work on many systems.
4498 				 */
4499 				WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4500 				    intin_no,
4501 				    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4502 				    intin_no) & ~AV_LEVEL);
4503 
4504 				WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4505 				    intin_no,
4506 				    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4507 				    intin_no) | AV_LEVEL);
4508 
4509 				/*
4510 				 * If the bit's STILL set, declare total and
4511 				 * utter failure
4512 				 */
4513 				if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4514 				    intin_no) & AV_REMOTE_IRR) != 0) {
4515 					cmn_err(CE_WARN, "!IOAPIC %d intin %d: "
4516 					    "Remote IRR failed to reset "
4517 					    "within %d usecs.  Interrupts to "
4518 					    "this pin may cease to function.",
4519 					    irq_ptr->airq_ioapicindex,
4520 					    irq_ptr->airq_intin_no,
4521 					    apic_max_usecs_clear_pending);
4522 				}
4523 			}
4524 			/* Clear the reprogramming state: */
4525 			lock_set(&apic_ioapic_reprogram_lock);
4526 
4527 			apic_reprogram_info[which_irq].valid = 0;
4528 			apic_reprogram_info[which_irq].bindcpu = 0;
4529 			apic_reprogram_info[which_irq].timeouts = 0;
4530 
4531 			lock_clear(&apic_ioapic_reprogram_lock);
4532 		} else {
4533 #ifdef DEBUG
4534 			cmn_err(CE_WARN, "Deferring reprogramming of irq %d",
4535 			    which_irq);
4536 #endif	/* DEBUG */
4537 			/*
4538 			 * If waiting for the Remote IRR bit (above) didn't
4539 			 * allow it to clear, defer the reprogramming:
4540 			 */
4541 			lock_set(&apic_ioapic_reprogram_lock);
4542 
4543 			apic_reprogram_info[which_irq].valid = 1;
4544 			apic_reprogram_info[which_irq].bindcpu = new_bind_cpu;
4545 			apic_reprogram_info[which_irq].timeouts++;
4546 
4547 			lock_clear(&apic_ioapic_reprogram_lock);
4548 
4549 			/* Fire up a timeout to handle this later */
4550 			(void) timeout(apic_reprogram_timeout_handler,
4551 			    (void *) 0,
4552 			    drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY));
4553 
4554 			/* Inform caller to defer IOAPIC programming: */
4555 			return (1);
4556 		}
4557 	}
4558 	return (0);
4559 }
4560 
4561 /*
4562  * Timeout handler that performs the APIC reprogramming
4563  */
4564 /*ARGSUSED*/
4565 static void
4566 apic_reprogram_timeout_handler(void *arg)
4567 {
4568 	/*LINTED: set but not used in function*/
4569 	int i, result;
4570 
4571 	/* Serialize access to this function */
4572 	mutex_enter(&apic_reprogram_timeout_mutex);
4573 
4574 	/*
4575 	 * For each entry in the reprogramming state that's valid,
4576 	 * try the reprogramming again:
4577 	 */
4578 	for (i = 0; i < APIC_MAX_VECTOR; i++) {
4579 		if (apic_reprogram_info[i].valid == 0)
4580 			continue;
4581 		/*
4582 		 * Though we can't really do anything about errors
4583 		 * at this point, keep track of them for reporting.
4584 		 * Note that it is very possible for apic_setup_io_intr
4585 		 * to re-register this very timeout if the Remote IRR bit
4586 		 * has not yet cleared.
4587 		 */
4588 		result = apic_setup_io_intr_deferred(apic_irq_table[i], i);
4589 
4590 #ifdef DEBUG
4591 		if (result)
4592 			cmn_err(CE_WARN, "apic_reprogram_timeout: "
4593 			    "apic_setup_io_intr returned nonzero for "
4594 			    "irq=%d!", i);
4595 #endif	/* DEBUG */
4596 	}
4597 
4598 	mutex_exit(&apic_reprogram_timeout_mutex);
4599 }
4600 
4601 
4602 /*
4603  * Called to migrate all interrupts at an irq to another cpu. safe
4604  * if true means we are not being called from an interrupt
4605  * context and hence it is safe to do a lock_set. If false
4606  * do only a lock_try and return failure ( non 0 ) if we cannot get it
4607  */
4608 int
4609 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe)
4610 {
4611 	apic_irq_t	*irqptr = irq_ptr;
4612 	int		retval = 0;
4613 	int		iflag;
4614 
4615 	iflag = intr_clear();
4616 	if (!safe) {
4617 		if (lock_try(&apic_ioapic_lock) == 0) {
4618 			intr_restore(iflag);
4619 			return (1);
4620 		}
4621 	} else
4622 		lock_set(&apic_ioapic_lock);
4623 
4624 	while (irqptr) {
4625 		if (irqptr->airq_temp_cpu != IRQ_UNINIT)
4626 			retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE);
4627 		irqptr = irqptr->airq_next;
4628 	}
4629 	lock_clear(&apic_ioapic_lock);
4630 	intr_restore(iflag);
4631 	return (retval);
4632 }
4633 
4634 /*
4635  * apic_intr_redistribute does all the messy computations for identifying
4636  * which interrupt to move to which CPU. Currently we do just one interrupt
4637  * at a time. This reduces the time we spent doing all this within clock
4638  * interrupt. When it is done in idle, we could do more than 1.
4639  * First we find the most busy and the most free CPU (time in ISR only)
4640  * skipping those CPUs that has been identified as being ineligible (cpu_skip)
4641  * Then we look for IRQs which are closest to the difference between the
4642  * most busy CPU and the average ISR load. We try to find one whose load
4643  * is less than difference.If none exists, then we chose one larger than the
4644  * difference, provided it does not make the most idle CPU worse than the
4645  * most busy one. In the end, we clear all the busy fields for CPUs. For
4646  * IRQs, they are cleared as they are scanned.
4647  */
4648 static void
4649 apic_intr_redistribute()
4650 {
4651 	int busiest_cpu, most_free_cpu;
4652 	int cpu_free, cpu_busy, max_busy, min_busy;
4653 	int min_free, diff;
4654 	int	average_busy, cpus_online;
4655 	int i, busy;
4656 	apic_cpus_info_t *cpu_infop;
4657 	apic_irq_t *min_busy_irq = NULL;
4658 	apic_irq_t *max_busy_irq = NULL;
4659 
4660 	busiest_cpu = most_free_cpu = -1;
4661 	cpu_free = cpu_busy = max_busy = average_busy = 0;
4662 	min_free = apic_sample_factor_redistribution;
4663 	cpus_online = 0;
4664 	/*
4665 	 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu
4666 	 * without ioapic_lock. That is OK as we are just doing statistical
4667 	 * sampling anyway and any inaccuracy now will get corrected next time
4668 	 * The call to rebind which actually changes things will make sure
4669 	 * we are consistent.
4670 	 */
4671 	for (i = 0; i < apic_nproc; i++) {
4672 		if (!(apic_redist_cpu_skip & (1 << i)) &&
4673 		    (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
4674 
4675 			cpu_infop = &apic_cpus[i];
4676 			/*
4677 			 * If no unbound interrupts or only 1 total on this
4678 			 * CPU, skip
4679 			 */
4680 			if (!cpu_infop->aci_temp_bound ||
4681 			    (cpu_infop->aci_bound + cpu_infop->aci_temp_bound)
4682 			    == 1) {
4683 				apic_redist_cpu_skip |= 1 << i;
4684 				continue;
4685 			}
4686 
4687 			busy = cpu_infop->aci_busy;
4688 			average_busy += busy;
4689 			cpus_online++;
4690 			if (max_busy < busy) {
4691 				max_busy = busy;
4692 				busiest_cpu = i;
4693 			}
4694 			if (min_free > busy) {
4695 				min_free = busy;
4696 				most_free_cpu = i;
4697 			}
4698 			if (busy > apic_int_busy_mark) {
4699 				cpu_busy |= 1 << i;
4700 			} else {
4701 				if (busy < apic_int_free_mark)
4702 					cpu_free |= 1 << i;
4703 			}
4704 		}
4705 	}
4706 	if ((cpu_busy && cpu_free) ||
4707 	    (max_busy >= (min_free + apic_diff_for_redistribution))) {
4708 
4709 		apic_num_imbalance++;
4710 #ifdef	DEBUG
4711 		if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4712 			prom_printf(
4713 			    "redistribute busy=%x free=%x max=%x min=%x",
4714 			    cpu_busy, cpu_free, max_busy, min_free);
4715 		}
4716 #endif /* DEBUG */
4717 
4718 
4719 		average_busy /= cpus_online;
4720 
4721 		diff = max_busy - average_busy;
4722 		min_busy = max_busy; /* start with the max possible value */
4723 		max_busy = 0;
4724 		min_busy_irq = max_busy_irq = NULL;
4725 		i = apic_min_device_irq;
4726 		for (; i < apic_max_device_irq; i++) {
4727 			apic_irq_t *irq_ptr;
4728 			/* Change to linked list per CPU ? */
4729 			if ((irq_ptr = apic_irq_table[i]) == NULL)
4730 				continue;
4731 			/* Check for irq_busy & decide which one to move */
4732 			/* Also zero them for next round */
4733 			if ((irq_ptr->airq_temp_cpu == busiest_cpu) &&
4734 			    irq_ptr->airq_busy) {
4735 				if (irq_ptr->airq_busy < diff) {
4736 					/*
4737 					 * Check for least busy CPU,
4738 					 * best fit or what ?
4739 					 */
4740 					if (max_busy < irq_ptr->airq_busy) {
4741 						/*
4742 						 * Most busy within the
4743 						 * required differential
4744 						 */
4745 						max_busy = irq_ptr->airq_busy;
4746 						max_busy_irq = irq_ptr;
4747 					}
4748 				} else {
4749 					if (min_busy > irq_ptr->airq_busy) {
4750 						/*
4751 						 * least busy, but more than
4752 						 * the reqd diff
4753 						 */
4754 						if (min_busy <
4755 						    (diff + average_busy -
4756 						    min_free)) {
4757 							/*
4758 							 * Making sure new cpu
4759 							 * will not end up
4760 							 * worse
4761 							 */
4762 							min_busy =
4763 							    irq_ptr->airq_busy;
4764 
4765 							min_busy_irq = irq_ptr;
4766 						}
4767 					}
4768 				}
4769 			}
4770 			irq_ptr->airq_busy = 0;
4771 		}
4772 
4773 		if (max_busy_irq != NULL) {
4774 #ifdef	DEBUG
4775 			if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4776 				prom_printf("rebinding %x to %x",
4777 				    max_busy_irq->airq_vector, most_free_cpu);
4778 			}
4779 #endif /* DEBUG */
4780 			if (apic_rebind_all(max_busy_irq, most_free_cpu, 0)
4781 			    == 0)
4782 				/* Make change permenant */
4783 				max_busy_irq->airq_cpu = (uchar_t)most_free_cpu;
4784 		} else if (min_busy_irq != NULL) {
4785 #ifdef	DEBUG
4786 			if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4787 				prom_printf("rebinding %x to %x",
4788 				    min_busy_irq->airq_vector, most_free_cpu);
4789 			}
4790 #endif /* DEBUG */
4791 
4792 			if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) ==
4793 			    0)
4794 				/* Make change permenant */
4795 				min_busy_irq->airq_cpu = (uchar_t)most_free_cpu;
4796 		} else {
4797 			if (cpu_busy != (1 << busiest_cpu)) {
4798 				apic_redist_cpu_skip |= 1 << busiest_cpu;
4799 				/*
4800 				 * We leave cpu_skip set so that next time we
4801 				 * can choose another cpu
4802 				 */
4803 			}
4804 		}
4805 		apic_num_rebind++;
4806 	} else {
4807 		/*
4808 		 * found nothing. Could be that we skipped over valid CPUs
4809 		 * or we have balanced everything. If we had a variable
4810 		 * ticks_for_redistribution, it could be increased here.
4811 		 * apic_int_busy, int_free etc would also need to be
4812 		 * changed.
4813 		 */
4814 		if (apic_redist_cpu_skip)
4815 			apic_redist_cpu_skip = 0;
4816 	}
4817 	for (i = 0; i < apic_nproc; i++) {
4818 		apic_cpus[i].aci_busy = 0;
4819 	}
4820 }
4821 
4822 static void
4823 apic_cleanup_busy()
4824 {
4825 	int i;
4826 	apic_irq_t *irq_ptr;
4827 
4828 	for (i = 0; i < apic_nproc; i++) {
4829 		apic_cpus[i].aci_busy = 0;
4830 	}
4831 
4832 	for (i = apic_min_device_irq; i < apic_max_device_irq; i++) {
4833 		if ((irq_ptr = apic_irq_table[i]) != NULL)
4834 			irq_ptr->airq_busy = 0;
4835 	}
4836 	apic_skipped_redistribute = 0;
4837 }
4838 
4839 
4840 /*
4841  * This function will reprogram the timer.
4842  *
4843  * When in oneshot mode the argument is the absolute time in future to
4844  * generate the interrupt at.
4845  *
4846  * When in periodic mode, the argument is the interval at which the
4847  * interrupts should be generated. There is no need to support the periodic
4848  * mode timer change at this time.
4849  */
4850 static void
4851 apic_timer_reprogram(hrtime_t time)
4852 {
4853 	hrtime_t now;
4854 	uint_t ticks;
4855 
4856 	/*
4857 	 * We should be called from high PIL context (CBE_HIGH_PIL),
4858 	 * so kpreempt is disabled.
4859 	 */
4860 
4861 	if (!apic_oneshot) {
4862 		/* time is the interval for periodic mode */
4863 		ticks = (uint_t)((time) / apic_nsec_per_tick);
4864 	} else {
4865 		/* one shot mode */
4866 
4867 		now = gethrtime();
4868 
4869 		if (time <= now) {
4870 			/*
4871 			 * requested to generate an interrupt in the past
4872 			 * generate an interrupt as soon as possible
4873 			 */
4874 			ticks = apic_min_timer_ticks;
4875 		} else if ((time - now) > apic_nsec_max) {
4876 			/*
4877 			 * requested to generate an interrupt at a time
4878 			 * further than what we are capable of. Set to max
4879 			 * the hardware can handle
4880 			 */
4881 
4882 			ticks = APIC_MAXVAL;
4883 #ifdef DEBUG
4884 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
4885 			    "  %lld  too far in future, current time"
4886 			    "  %lld \n", time, now);
4887 #endif	/* DEBUG */
4888 		} else
4889 			ticks = (uint_t)((time - now) / apic_nsec_per_tick);
4890 	}
4891 
4892 	if (ticks < apic_min_timer_ticks)
4893 		ticks = apic_min_timer_ticks;
4894 
4895 	apicadr[APIC_INIT_COUNT] = ticks;
4896 
4897 }
4898 
4899 /*
4900  * This function will enable timer interrupts.
4901  */
4902 static void
4903 apic_timer_enable(void)
4904 {
4905 	/*
4906 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
4907 	 * so kpreempt is disabled.
4908 	 */
4909 
4910 	if (!apic_oneshot)
4911 		apicadr[APIC_LOCAL_TIMER] =
4912 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
4913 	else {
4914 		/* one shot */
4915 		apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT);
4916 	}
4917 }
4918 
4919 /*
4920  * This function will disable timer interrupts.
4921  */
4922 static void
4923 apic_timer_disable(void)
4924 {
4925 	/*
4926 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
4927 	 * so kpreempt is disabled.
4928 	 */
4929 
4930 	apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK;
4931 }
4932 
4933 
4934 cyclic_id_t apic_cyclic_id;
4935 
4936 /*
4937  * If this module needs to be a consumer of cyclic subsystem, they
4938  * can be added here, since at this time kernel cyclic subsystem is initialized
4939  * argument is not currently used, and is reserved for future.
4940  */
4941 static void
4942 apic_post_cyclic_setup(void *arg)
4943 {
4944 _NOTE(ARGUNUSED(arg))
4945 	cyc_handler_t hdlr;
4946 	cyc_time_t when;
4947 
4948 	/* cpu_lock is held */
4949 
4950 	/* set up cyclics for intr redistribution */
4951 
4952 	/*
4953 	 * In peridoc mode intr redistribution processing is done in
4954 	 * apic_intr_enter during clk intr processing
4955 	 */
4956 	if (!apic_oneshot)
4957 		return;
4958 
4959 	hdlr.cyh_level = CY_LOW_LEVEL;
4960 	hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute;
4961 	hdlr.cyh_arg = NULL;
4962 
4963 	when.cyt_when = 0;
4964 	when.cyt_interval = apic_redistribute_sample_interval;
4965 	apic_cyclic_id = cyclic_add(&hdlr, &when);
4966 
4967 
4968 }
4969 
4970 static void
4971 apic_redistribute_compute(void)
4972 {
4973 	int	i, j, max_busy;
4974 
4975 	if (apic_enable_dynamic_migration) {
4976 		if (++apic_nticks == apic_sample_factor_redistribution) {
4977 			/*
4978 			 * Time to call apic_intr_redistribute().
4979 			 * reset apic_nticks. This will cause max_busy
4980 			 * to be calculated below and if it is more than
4981 			 * apic_int_busy, we will do the whole thing
4982 			 */
4983 			apic_nticks = 0;
4984 		}
4985 		max_busy = 0;
4986 		for (i = 0; i < apic_nproc; i++) {
4987 
4988 			/*
4989 			 * Check if curipl is non zero & if ISR is in
4990 			 * progress
4991 			 */
4992 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
4993 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
4994 
4995 				int	irq;
4996 				apic_cpus[i].aci_busy++;
4997 				irq = apic_cpus[i].aci_current[j];
4998 				apic_irq_table[irq]->airq_busy++;
4999 			}
5000 
5001 			if (!apic_nticks &&
5002 			    (apic_cpus[i].aci_busy > max_busy))
5003 				max_busy = apic_cpus[i].aci_busy;
5004 		}
5005 		if (!apic_nticks) {
5006 			if (max_busy > apic_int_busy_mark) {
5007 			/*
5008 			 * We could make the following check be
5009 			 * skipped > 1 in which case, we get a
5010 			 * redistribution at half the busy mark (due to
5011 			 * double interval). Need to be able to collect
5012 			 * more empirical data to decide if that is a
5013 			 * good strategy. Punt for now.
5014 			 */
5015 				if (apic_skipped_redistribute)
5016 					apic_cleanup_busy();
5017 				else
5018 					apic_intr_redistribute();
5019 			} else
5020 				apic_skipped_redistribute++;
5021 		}
5022 	}
5023 }
5024 
5025 
5026 static int
5027 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid,
5028     int ipin, int *pci_irqp, iflag_t *intr_flagp)
5029 {
5030 
5031 	int status;
5032 	acpi_psm_lnk_t acpipsmlnk;
5033 
5034 	if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp,
5035 	    intr_flagp)) == ACPI_PSM_SUCCESS) {
5036 		APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d "
5037 		    "from cache for device %s, instance #%d\n", *pci_irqp,
5038 		    ddi_get_name(dip), ddi_get_instance(dip)));
5039 		return (status);
5040 	}
5041 
5042 	bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t));
5043 
5044 	if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp,
5045 	    &acpipsmlnk)) == ACPI_PSM_FAILURE) {
5046 		APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: "
5047 		    " acpi_translate_pci_irq failed for device %s, instance"
5048 		    " #%d", ddi_get_name(dip), ddi_get_instance(dip)));
5049 		return (status);
5050 	}
5051 
5052 	if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) {
5053 		status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp,
5054 		    intr_flagp);
5055 		if (status != ACPI_PSM_SUCCESS) {
5056 			status = acpi_get_current_irq_resource(&acpipsmlnk,
5057 			    pci_irqp, intr_flagp);
5058 		}
5059 	}
5060 
5061 	if (status == ACPI_PSM_SUCCESS) {
5062 		acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp,
5063 		    intr_flagp, &acpipsmlnk);
5064 
5065 		APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] "
5066 		    "new irq %d for device %s, instance #%d\n",
5067 		    *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip)));
5068 	}
5069 
5070 	return (status);
5071 }
5072 
5073 /*
5074  * Configures the irq for the interrupt link device identified by
5075  * acpipsmlnkp.
5076  *
5077  * Gets the current and the list of possible irq settings for the
5078  * device. If apic_unconditional_srs is not set, and the current
5079  * resource setting is in the list of possible irq settings,
5080  * current irq resource setting is passed to the caller.
5081  *
5082  * Otherwise, picks an irq number from the list of possible irq
5083  * settings, and sets the irq of the device to this value.
5084  * If prefer_crs is set, among a set of irq numbers in the list that have
5085  * the least number of devices sharing the interrupt, we pick current irq
5086  * resource setting if it is a member of this set.
5087  *
5088  * Passes the irq number in the value pointed to by pci_irqp, and
5089  * polarity and sensitivity in the structure pointed to by dipintrflagp
5090  * to the caller.
5091  *
5092  * Note that if setting the irq resource failed, but successfuly obtained
5093  * the current irq resource settings, passes the current irq resources
5094  * and considers it a success.
5095  *
5096  * Returns:
5097  * ACPI_PSM_SUCCESS on success.
5098  *
5099  * ACPI_PSM_FAILURE if an error occured during the configuration or
5100  * if a suitable irq was not found for this device, or if setting the
5101  * irq resource and obtaining the current resource fails.
5102  *
5103  */
5104 static int
5105 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip,
5106     int *pci_irqp, iflag_t *dipintr_flagp)
5107 {
5108 
5109 	int i, min_share, foundnow, done = 0;
5110 	int32_t irq;
5111 	int32_t share_irq = -1;
5112 	int32_t chosen_irq = -1;
5113 	int cur_irq = -1;
5114 	acpi_irqlist_t *irqlistp;
5115 	acpi_irqlist_t *irqlistent;
5116 
5117 	if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp))
5118 	    == ACPI_PSM_FAILURE) {
5119 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine "
5120 		    "or assign IRQ for device %s, instance #%d: The system was "
5121 		    "unable to get the list of potential IRQs from ACPI.",
5122 		    ddi_get_name(dip), ddi_get_instance(dip)));
5123 
5124 		return (ACPI_PSM_FAILURE);
5125 	}
5126 
5127 	if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq,
5128 	    dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) &&
5129 	    (cur_irq > 0)) {
5130 		/*
5131 		 * If an IRQ is set in CRS and that IRQ exists in the set
5132 		 * returned from _PRS, return that IRQ, otherwise print
5133 		 * a warning
5134 		 */
5135 
5136 		if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL)
5137 		    == ACPI_PSM_SUCCESS) {
5138 
5139 			acpi_free_irqlist(irqlistp);
5140 			ASSERT(pci_irqp != NULL);
5141 			*pci_irqp = cur_irq;
5142 			return (ACPI_PSM_SUCCESS);
5143 		}
5144 
5145 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the "
5146 		    "current irq %d for device %s, instance #%d in ACPI's "
5147 		    "list of possible irqs for this device. Picking one from "
5148 		    " the latter list.", cur_irq, ddi_get_name(dip),
5149 		    ddi_get_instance(dip)));
5150 	}
5151 
5152 	irqlistent = irqlistp;
5153 	min_share = 255;
5154 
5155 	while (irqlistent != NULL) {
5156 		irqlistent->intr_flags.bustype = BUS_PCI;
5157 
5158 		for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) {
5159 
5160 			irq = irqlistent->irqs[i];
5161 
5162 			if ((irq < 16) && (apic_reserved_irqlist[irq]))
5163 				continue;
5164 
5165 			if (irq == 0) {
5166 				/* invalid irq number */
5167 				continue;
5168 			}
5169 
5170 			if ((apic_irq_table[irq] == NULL) ||
5171 			    (apic_irq_table[irq]->airq_dip == dip)) {
5172 				chosen_irq = irq;
5173 				foundnow = 1;
5174 				/*
5175 				 * If we do not prefer current irq from crs
5176 				 * or if we do and this irq is the same as
5177 				 * current irq from crs, this is the one
5178 				 * to pick.
5179 				 */
5180 				if (!(apic_prefer_crs) || (irq == cur_irq)) {
5181 					done = 1;
5182 					break;
5183 				}
5184 				continue;
5185 			}
5186 
5187 			if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE)
5188 				continue;
5189 
5190 			if (!acpi_intr_compatible(irqlistent->intr_flags,
5191 			    apic_irq_table[irq]->airq_iflag))
5192 				continue;
5193 
5194 			if ((apic_irq_table[irq]->airq_share < min_share) ||
5195 			    ((apic_irq_table[irq]->airq_share == min_share) &&
5196 			    (cur_irq == irq) && (apic_prefer_crs))) {
5197 				min_share = apic_irq_table[irq]->airq_share;
5198 				share_irq = irq;
5199 				foundnow = 1;
5200 			}
5201 		}
5202 
5203 		/*
5204 		 * If we found an IRQ in the inner loop this time, save the
5205 		 * details from the irqlist for later use.
5206 		 */
5207 		if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) {
5208 			/*
5209 			 * Copy the acpi_prs_private_t and flags from this
5210 			 * irq list entry, since we found an irq from this
5211 			 * entry.
5212 			 */
5213 			acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv;
5214 			*dipintr_flagp = irqlistent->intr_flags;
5215 		}
5216 
5217 		if (done)
5218 			break;
5219 
5220 		/* Go to the next irqlist entry */
5221 		irqlistent = irqlistent->next;
5222 	}
5223 
5224 
5225 	acpi_free_irqlist(irqlistp);
5226 	if (chosen_irq != -1)
5227 		irq = chosen_irq;
5228 	else if (share_irq != -1)
5229 		irq = share_irq;
5230 	else {
5231 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a "
5232 		    "suitable irq from the list of possible irqs for device "
5233 		    "%s, instance #%d in ACPI's list of possible irqs",
5234 		    ddi_get_name(dip), ddi_get_instance(dip)));
5235 		return (ACPI_PSM_FAILURE);
5236 	}
5237 
5238 	APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s "
5239 	    "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip)));
5240 
5241 	if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) {
5242 		/*
5243 		 * setting irq was successful, check to make sure CRS
5244 		 * reflects that. If CRS does not agree with what we
5245 		 * set, return the irq that was set.
5246 		 */
5247 
5248 		if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq,
5249 		    dipintr_flagp) == ACPI_PSM_SUCCESS) {
5250 
5251 			if (cur_irq != irq)
5252 				APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: "
5253 				    "IRQ resource set (irqno %d) for device %s "
5254 				    "instance #%d, differs from current "
5255 				    "setting irqno %d",
5256 				    irq, ddi_get_name(dip),
5257 				    ddi_get_instance(dip), cur_irq));
5258 		}
5259 
5260 		/*
5261 		 * return the irq that was set, and not what CRS reports,
5262 		 * since CRS has been seen to be bogus on some systems
5263 		 */
5264 		cur_irq = irq;
5265 	} else {
5266 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d "
5267 		    "failed for device %s instance #%d",
5268 		    irq, ddi_get_name(dip), ddi_get_instance(dip)));
5269 
5270 		if (cur_irq == -1)
5271 			return (ACPI_PSM_FAILURE);
5272 	}
5273 
5274 	ASSERT(pci_irqp != NULL);
5275 	*pci_irqp = cur_irq;
5276 	return (ACPI_PSM_SUCCESS);
5277 }
5278