xref: /titanic_51/usr/src/uts/i86pc/io/pcplusmp/apic.c (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
31  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
32  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
33  * PSMI 1.5 extensions are supported in Solaris Nevada.
34  */
35 #define	PSMI_1_5
36 
37 #include <sys/processor.h>
38 #include <sys/time.h>
39 #include <sys/psm.h>
40 #include <sys/smp_impldefs.h>
41 #include <sys/cram.h>
42 #include <sys/acpi/acpi.h>
43 #include <sys/acpica.h>
44 #include <sys/psm_common.h>
45 #include "apic.h"
46 #include <sys/pit.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/ddi_impldefs.h>
50 #include <sys/pci.h>
51 #include <sys/promif.h>
52 #include <sys/x86_archext.h>
53 #include <sys/cpc_impl.h>
54 #include <sys/uadmin.h>
55 #include <sys/panic.h>
56 #include <sys/debug.h>
57 #include <sys/archsystm.h>
58 #include <sys/trap.h>
59 #include <sys/machsystm.h>
60 #include <sys/cpuvar.h>
61 #include <sys/rm_platter.h>
62 #include <sys/privregs.h>
63 #include <sys/cyclic.h>
64 #include <sys/note.h>
65 #include <sys/pci_intr_lib.h>
66 
67 /*
68  *	Local Function Prototypes
69  */
70 static void apic_init_intr();
71 static void apic_ret();
72 static int apic_handle_defconf();
73 static int apic_parse_mpct(caddr_t mpct, int bypass);
74 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size);
75 static int apic_checksum(caddr_t bptr, int len);
76 static int get_apic_cmd1();
77 static int get_apic_pri();
78 static int apic_find_bus_type(char *bus);
79 static int apic_find_bus(int busid);
80 static int apic_find_bus_id(int bustype);
81 static struct apic_io_intr *apic_find_io_intr(int irqno);
82 int apic_allocate_irq(int irq);
83 static int apic_find_free_irq(int start, int end);
84 static uchar_t apic_allocate_vector(int ipl, int irq, int pri);
85 static void apic_modify_vector(uchar_t vector, int irq);
86 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector);
87 static uchar_t apic_xlate_vector(uchar_t oldvector);
88 static void apic_xlate_vector_free_timeout_handler(void *arg);
89 static void apic_free_vector(uchar_t vector);
90 static void apic_reprogram_timeout_handler(void *arg);
91 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu,
92     int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq);
93 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq);
94 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq);
95 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq);
96 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid);
97 static int apic_find_intin(uchar_t ioapic, uchar_t intin);
98 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno,
99     int child_ipin, struct apic_io_intr **intrp);
100 static int apic_setup_irq_table(dev_info_t *dip, int irqno,
101     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp,
102     int type);
103 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl,
104     iflag_t *intr_flagp);
105 static void apic_nmi_intr(caddr_t arg);
106 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid,
107     uchar_t intin);
108 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock,
109     int when);
110 static int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe);
111 static void apic_intr_redistribute();
112 static void apic_cleanup_busy();
113 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp);
114 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type);
115 
116 /* ACPI support routines */
117 static int acpi_probe(void);
118 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip,
119     int *pci_irqp, iflag_t *intr_flagp);
120 
121 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid,
122     int ipin, int *pci_irqp, iflag_t *intr_flagp);
123 static uchar_t acpi_find_ioapic(int irq);
124 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2);
125 
126 /*
127  *	standard MP entries
128  */
129 static int	apic_probe();
130 static int	apic_clkinit();
131 static int	apic_getclkirq(int ipl);
132 static uint_t	apic_calibrate(volatile uint32_t *addr,
133     uint16_t *pit_ticks_adj);
134 static hrtime_t apic_gettime();
135 static hrtime_t apic_gethrtime();
136 static void	apic_init();
137 static void	apic_picinit(void);
138 static void	apic_cpu_start(processorid_t cpun, caddr_t rm_code);
139 static int	apic_post_cpu_start(void);
140 static void	apic_send_ipi(int cpun, int ipl);
141 static void	apic_set_softintr(int softintr);
142 static void	apic_set_idlecpu(processorid_t cpun);
143 static void	apic_unset_idlecpu(processorid_t cpun);
144 static int	apic_softlvl_to_irq(int ipl);
145 static int	apic_intr_enter(int ipl, int *vect);
146 static void	apic_intr_exit(int ipl, int vect);
147 static void	apic_setspl(int ipl);
148 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
149 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
150 static void	apic_shutdown(int cmd, int fcn);
151 static void	apic_preshutdown(int cmd, int fcn);
152 static int	apic_disable_intr(processorid_t cpun);
153 static void	apic_enable_intr(processorid_t cpun);
154 static processorid_t	apic_get_next_processorid(processorid_t cpun);
155 static int		apic_get_ipivect(int ipl, int type);
156 static void	apic_timer_reprogram(hrtime_t time);
157 static void	apic_timer_enable(void);
158 static void	apic_timer_disable(void);
159 static void	apic_post_cyclic_setup(void *arg);
160 extern int	apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
161 		    psm_intr_op_t, int *);
162 
163 static int	apic_oneshot = 0;
164 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
165 
166 /*
167  * These variables are frequently accessed in apic_intr_enter(),
168  * apic_intr_exit and apic_setspl, so group them together
169  */
170 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
171 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
172 int apic_clkvect;
173 
174 /* ACPI SCI interrupt configuration; -1 if SCI not used */
175 int apic_sci_vect = -1;
176 iflag_t apic_sci_flags;
177 
178 /* vector at which error interrupts come in */
179 int apic_errvect;
180 int apic_enable_error_intr = 1;
181 int apic_error_display_delay = 100;
182 
183 /* vector at which performance counter overflow interrupts come in */
184 int apic_cpcovf_vect;
185 int apic_enable_cpcovf_intr = 1;
186 
187 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */
188 static int apic_max_usecs_clear_pending = 1000;
189 
190 /* Amt of usecs to wait before checking if RDT flags have reset. */
191 #define	APIC_USECS_PER_WAIT_INTERVAL 100
192 
193 /* Maximum number of times to retry reprogramming via the timeout */
194 #define	APIC_REPROGRAM_MAX_TIMEOUTS 10
195 
196 /* timeout delay for IOAPIC delayed reprogramming */
197 #define	APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */
198 
199 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */
200 #define	DEFERRED 1
201 #define	IMMEDIATE 0
202 
203 /*
204  * number of bits per byte, from <sys/param.h>
205  */
206 #define	UCHAR_MAX	((1 << NBBY) - 1)
207 
208 uchar_t	apic_reserved_irqlist[MAX_ISA_IRQ];
209 
210 /*
211  * The following vector assignments influence the value of ipltopri and
212  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
213  * idle to 0 and IPL 0 to 0x10 to differentiate idle in case
214  * we care to do so in future. Note some IPLs which are rarely used
215  * will share the vector ranges and heavily used IPLs (5 and 6) have
216  * a wide range.
217  *	IPL		Vector range.		as passed to intr_enter
218  *	0		none.
219  *	1,2,3		0x20-0x2f		0x0-0xf
220  *	4		0x30-0x3f		0x10-0x1f
221  *	5		0x40-0x5f		0x20-0x3f
222  *	6		0x60-0x7f		0x40-0x5f
223  *	7,8,9		0x80-0x8f		0x60-0x6f
224  *	10		0x90-0x9f		0x70-0x7f
225  *	11		0xa0-0xaf		0x80-0x8f
226  *	...		...
227  *	16		0xf0-0xff		0xd0-0xdf
228  */
229 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
230 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16
231 };
232 	/*
233 	 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4]
234 	 * NOTE that this is vector as passed into intr_enter which is
235 	 * programmed vector - 0x20 (APIC_BASE_VECT)
236 	 */
237 
238 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
239 	/* The taskpri to be programmed into apic to mask given ipl */
240 
241 #if defined(__amd64)
242 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
243 #endif
244 
245 /*
246  * Patchable global variables.
247  */
248 int	apic_forceload = 0;
249 
250 #define	INTR_ROUND_ROBIN_WITH_AFFINITY	0
251 #define	INTR_ROUND_ROBIN		1
252 #define	INTR_LOWEST_PRIORITY		2
253 
254 int	apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY;
255 
256 static int	apic_next_bind_cpu = 2; /* For round robin assignment */
257 					/* start with cpu 1 */
258 
259 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
260 					/* 1 - use gettime() for performance */
261 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
262 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
263 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
264 int	apic_panic_on_nmi = 0;
265 int	apic_panic_on_apic_error = 0;
266 
267 int	apic_verbose = 0;
268 
269 /* Flag definitions for apic_verbose */
270 #define	APIC_VERBOSE_IOAPIC_FLAG		0x00000001
271 #define	APIC_VERBOSE_IRQ_FLAG			0x00000002
272 #define	APIC_VERBOSE_POWEROFF_FLAG		0x00000004
273 #define	APIC_VERBOSE_POWEROFF_PAUSE_FLAG	0x00000008
274 
275 
276 #define	APIC_VERBOSE_IOAPIC(fmt) \
277 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \
278 		cmn_err fmt;
279 
280 #define	APIC_VERBOSE_IRQ(fmt) \
281 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \
282 		cmn_err fmt;
283 
284 #define	APIC_VERBOSE_POWEROFF(fmt) \
285 	if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \
286 		prom_printf fmt;
287 
288 
289 /* Now the ones for Dynamic Interrupt distribution */
290 int	apic_enable_dynamic_migration = 1;
291 
292 /*
293  * If enabled, the distribution works as follows:
294  * On every interrupt entry, the current ipl for the CPU is set in cpu_info
295  * and the irq corresponding to the ipl is also set in the aci_current array.
296  * interrupt exit and setspl (due to soft interrupts) will cause the current
297  * ipl to be be changed. This is cache friendly as these frequently used
298  * paths write into a per cpu structure.
299  *
300  * Sampling is done by checking the structures for all CPUs and incrementing
301  * the busy field of the irq (if any) executing on each CPU and the busy field
302  * of the corresponding CPU.
303  * In periodic mode this is done on every clock interrupt.
304  * In one-shot mode, this is done thru a cyclic with an interval of
305  * apic_redistribute_sample_interval (default 10 milli sec).
306  *
307  * Every apic_sample_factor_redistribution times we sample, we do computations
308  * to decide which interrupt needs to be migrated (see comments
309  * before apic_intr_redistribute().
310  */
311 
312 /*
313  * Following 3 variables start as % and can be patched or set using an
314  * API to be defined in future. They will be scaled to
315  * sample_factor_redistribution which is in turn set to hertz+1 (in periodic
316  * mode), or 101 in one-shot mode to stagger it away from one sec processing
317  */
318 
319 int	apic_int_busy_mark = 60;
320 int	apic_int_free_mark = 20;
321 int	apic_diff_for_redistribution = 10;
322 
323 /* sampling interval for interrupt redistribution for dynamic migration */
324 int	apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */
325 
326 /*
327  * number of times we sample before deciding to redistribute interrupts
328  * for dynamic migration
329  */
330 int	apic_sample_factor_redistribution = 101;
331 
332 /* timeout for xlate_vector, mark_vector */
333 int	apic_revector_timeout = 16 * 10000; /* 160 millisec */
334 
335 int	apic_redist_cpu_skip = 0;
336 int	apic_num_imbalance = 0;
337 int	apic_num_rebind = 0;
338 
339 int	apic_nproc = 0;
340 int	apic_defconf = 0;
341 int	apic_irq_translate = 0;
342 int	apic_spec_rev = 0;
343 int	apic_imcrp = 0;
344 
345 int	apic_use_acpi = 1;	/* 1 = use ACPI, 0 = don't use ACPI */
346 int	apic_use_acpi_madt_only = 0;	/* 1=ONLY use MADT from ACPI */
347 
348 /*
349  * For interrupt link devices, if apic_unconditional_srs is set, an irq resource
350  * will be assigned (via _SRS). If it is not set, use the current
351  * irq setting (via _CRS), but only if that irq is in the set of possible
352  * irqs (returned by _PRS) for the device.
353  */
354 int	apic_unconditional_srs = 1;
355 
356 /*
357  * For interrupt link devices, if apic_prefer_crs is set when we are
358  * assigning an IRQ resource to a device, prefer the current IRQ setting
359  * over other possible irq settings under same conditions.
360  */
361 
362 int	apic_prefer_crs = 1;
363 
364 
365 /* minimum number of timer ticks to program to */
366 int apic_min_timer_ticks = 1;
367 /*
368  *	Local static data
369  */
370 static struct	psm_ops apic_ops = {
371 	apic_probe,
372 
373 	apic_init,
374 	apic_picinit,
375 	apic_intr_enter,
376 	apic_intr_exit,
377 	apic_setspl,
378 	apic_addspl,
379 	apic_delspl,
380 	apic_disable_intr,
381 	apic_enable_intr,
382 	apic_softlvl_to_irq,
383 	apic_set_softintr,
384 
385 	apic_set_idlecpu,
386 	apic_unset_idlecpu,
387 
388 	apic_clkinit,
389 	apic_getclkirq,
390 	(void (*)(void))NULL,		/* psm_hrtimeinit */
391 	apic_gethrtime,
392 
393 	apic_get_next_processorid,
394 	apic_cpu_start,
395 	apic_post_cpu_start,
396 	apic_shutdown,
397 	apic_get_ipivect,
398 	apic_send_ipi,
399 
400 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
401 	(int (*)(todinfo_t *))NULL,	/* psm_tod_get */
402 	(int (*)(todinfo_t *))NULL,	/* psm_tod_set */
403 	(void (*)(int, char *))NULL,	/* psm_notify_error */
404 	(void (*)(int))NULL,		/* psm_notify_func */
405 	apic_timer_reprogram,
406 	apic_timer_enable,
407 	apic_timer_disable,
408 	apic_post_cyclic_setup,
409 	apic_preshutdown,
410 	apic_intr_ops			/* Advanced DDI Interrupt framework */
411 };
412 
413 
414 static struct	psm_info apic_psm_info = {
415 	PSM_INFO_VER01_5,			/* version */
416 	PSM_OWN_EXCLUSIVE,			/* ownership */
417 	(struct psm_ops *)&apic_ops,		/* operation */
418 	"pcplusmp",				/* machine name */
419 	"pcplusmp v1.4 compatible %I%",
420 };
421 
422 static void *apic_hdlp;
423 
424 #ifdef DEBUG
425 #define	DENT		0x0001
426 int	apic_debug = 0;
427 /*
428  * set apic_restrict_vector to the # of vectors we want to allow per range
429  * useful in testing shared interrupt logic by setting it to 2 or 3
430  */
431 int	apic_restrict_vector = 0;
432 
433 #define	APIC_DEBUG_MSGBUFSIZE	2048
434 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
435 int	apic_debug_msgbufindex = 0;
436 
437 /*
438  * Put "int" info into debug buffer. No MP consistency, but light weight.
439  * Good enough for most debugging.
440  */
441 #define	APIC_DEBUG_BUF_PUT(x) \
442 	apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \
443 	if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \
444 		apic_debug_msgbufindex = 0;
445 
446 #endif /* DEBUG */
447 
448 apic_cpus_info_t	*apic_cpus;
449 
450 static uint_t	apic_cpumask = 0;
451 static uint_t	apic_flag;
452 
453 /* Flag to indicate that we need to shut down all processors */
454 static uint_t	apic_shutdown_processors;
455 
456 uint_t apic_nsec_per_intr = 0;
457 
458 /*
459  * apic_let_idle_redistribute can have the following values:
460  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
461  * apic_redistribute_lock prevents multiple idle cpus from redistributing
462  */
463 int	apic_num_idle_redistributions = 0;
464 static	int apic_let_idle_redistribute = 0;
465 static	uint_t apic_nticks = 0;
466 static	uint_t apic_skipped_redistribute = 0;
467 
468 /* to gather intr data and redistribute */
469 static void apic_redistribute_compute(void);
470 
471 static	uint_t last_count_read = 0;
472 static	lock_t	apic_gethrtime_lock;
473 volatile int	apic_hrtime_stamp = 0;
474 volatile hrtime_t apic_nsec_since_boot = 0;
475 static uint_t apic_hertz_count, apic_nsec_per_tick;
476 static hrtime_t apic_nsec_max;
477 
478 static	hrtime_t	apic_last_hrtime = 0;
479 int		apic_hrtime_error = 0;
480 int		apic_remote_hrterr = 0;
481 int		apic_num_nmis = 0;
482 int		apic_apic_error = 0;
483 int		apic_num_apic_errors = 0;
484 int		apic_num_cksum_errors = 0;
485 
486 static	uchar_t	apic_io_id[MAX_IO_APIC];
487 static	uchar_t	apic_io_ver[MAX_IO_APIC];
488 static	uchar_t	apic_io_vectbase[MAX_IO_APIC];
489 static	uchar_t	apic_io_vectend[MAX_IO_APIC];
490 volatile int32_t *apicioadr[MAX_IO_APIC];
491 /*
492  * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound
493  * and bound elements of cpus_info and the temp_cpu element of irq_struct
494  */
495 lock_t	apic_ioapic_lock;
496 
497 /*
498  * apic_ioapic_reprogram_lock prevents a CPU from exiting
499  * apic_intr_exit before IOAPIC reprogramming information
500  * is collected.
501  */
502 static	lock_t	apic_ioapic_reprogram_lock;
503 static	int	apic_io_max = 0;	/* no. of i/o apics enabled */
504 
505 static	struct apic_io_intr *apic_io_intrp = 0;
506 static	struct apic_bus	*apic_busp;
507 
508 uchar_t	apic_vector_to_irq[APIC_MAX_VECTOR+1];
509 static	uchar_t	apic_resv_vector[MAXIPL+1];
510 
511 static	char	apic_level_intr[APIC_MAX_VECTOR+1];
512 static	int	apic_error = 0;
513 /* values which apic_error can take. Not catastrophic, but may help debug */
514 #define	APIC_ERR_BOOT_EOI		0x1
515 #define	APIC_ERR_GET_IPIVECT_FAIL	0x2
516 #define	APIC_ERR_INVALID_INDEX		0x4
517 #define	APIC_ERR_MARK_VECTOR_FAIL	0x8
518 #define	APIC_ERR_APIC_ERROR		0x40000000
519 #define	APIC_ERR_NMI			0x80000000
520 
521 static	int	apic_cmos_ssb_set = 0;
522 
523 static	uint32_t	eisa_level_intr_mask = 0;
524 	/* At least MSB will be set if EISA bus */
525 
526 static	int	apic_pci_bus_total = 0;
527 static	uchar_t	apic_single_pci_busid = 0;
528 
529 
530 /*
531  * airq_mutex protects additions to the apic_irq_table - the first
532  * pointer and any airq_nexts off of that one. It also protects
533  * apic_max_device_irq & apic_min_device_irq. It also guarantees
534  * that share_id is unique as new ids are generated only when new
535  * irq_t structs are linked in. Once linked in the structs are never
536  * deleted. temp_cpu & mps_intr_index field indicate if it is programmed
537  * or allocated. Note that there is a slight gap between allocating in
538  * apic_introp_xlate and programming in addspl.
539  */
540 kmutex_t	airq_mutex;
541 apic_irq_t	*apic_irq_table[APIC_MAX_VECTOR+1];
542 int		apic_max_device_irq = 0;
543 int		apic_min_device_irq = APIC_MAX_VECTOR;
544 
545 /* use to make sure only one cpu handles the nmi */
546 static	lock_t	apic_nmi_lock;
547 /* use to make sure only one cpu handles the error interrupt */
548 static	lock_t	apic_error_lock;
549 
550 /*
551  * Following declarations are for revectoring; used when ISRs at different
552  * IPLs share an irq.
553  */
554 static	lock_t	apic_revector_lock;
555 static	int	apic_revector_pending = 0;
556 static	uchar_t	*apic_oldvec_to_newvec;
557 static	uchar_t	*apic_newvec_to_oldvec;
558 
559 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */
560 static	kmutex_t	apic_reprogram_timeout_mutex;
561 
562 static	struct	ioapic_reprogram_data {
563 	int		valid;	 /* This entry is valid */
564 	int		bindcpu; /* The CPU to which the int will be bound */
565 	unsigned	timeouts; /* # times the reprogram timeout was called */
566 } apic_reprogram_info[APIC_MAX_VECTOR+1];
567 /*
568  * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info
569  * is indexed by IRQ number, NOT by vector number.
570  */
571 
572 
573 /*
574  * The following added to identify a software poweroff method if available.
575  */
576 
577 static struct {
578 	int	poweroff_method;
579 	char	oem_id[APIC_MPS_OEM_ID_LEN + 1];	/* MAX + 1 for NULL */
580 	char	prod_id[APIC_MPS_PROD_ID_LEN + 1];	/* MAX + 1 for NULL */
581 } apic_mps_ids[] = {
582 	{ APIC_POWEROFF_VIA_RTC,	"INTEL",	"ALDER" },   /* 4300 */
583 	{ APIC_POWEROFF_VIA_RTC,	"NCR",		"AMC" },    /* 4300 */
584 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"A450NX" },  /* 4400? */
585 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"AD450NX" }, /* 4400 */
586 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"AC450NX" }, /* 4400R */
587 	{ APIC_POWEROFF_VIA_SITKA_BMC,	"INTEL",	"S450NX" },  /* S50  */
588 	{ APIC_POWEROFF_VIA_SITKA_BMC,	"INTEL",	"SC450NX" }  /* S50? */
589 };
590 
591 int	apic_poweroff_method = APIC_POWEROFF_NONE;
592 
593 static	struct {
594 	uchar_t	cntl;
595 	uchar_t	data;
596 } aspen_bmc[] = {
597 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
598 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
599 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
600 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
601 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
602 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
603 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
604 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
605 
606 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
607 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
608 };
609 
610 static	struct {
611 	int	port;
612 	uchar_t	data;
613 } sitka_bmc[] = {
614 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
615 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
616 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
617 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
618 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
619 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
620 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
621 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
622 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
623 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
624 
625 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
626 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
627 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
628 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
629 };
630 
631 
632 /* Patchable global variables. */
633 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
634 int		apic_debug_mps_id = 0;		/* 1 - print MPS ID strings */
635 
636 /*
637  * ACPI definitions
638  */
639 /* _PIC method arguments */
640 #define	ACPI_PIC_MODE	0
641 #define	ACPI_APIC_MODE	1
642 
643 /* APIC error flags we care about */
644 #define	APIC_SEND_CS_ERROR	0x01
645 #define	APIC_RECV_CS_ERROR	0x02
646 #define	APIC_CS_ERRORS		(APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR)
647 
648 /*
649  * ACPI variables
650  */
651 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */
652 static	int apic_enable_acpi = 0;
653 
654 /* ACPI Multiple APIC Description Table ptr */
655 static	MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL;
656 
657 /* ACPI Interrupt Source Override Structure ptr */
658 static	MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL;
659 static	int acpi_iso_cnt = 0;
660 
661 /* ACPI Non-maskable Interrupt Sources ptr */
662 static	MADT_NMI_SOURCE *acpi_nmi_sp = NULL;
663 static	int acpi_nmi_scnt = 0;
664 static	MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL;
665 static	int acpi_nmi_ccnt = 0;
666 
667 /*
668  * extern declarations
669  */
670 extern	int	intr_clear(void);
671 extern	void	intr_restore(uint_t);
672 #if defined(__amd64)
673 extern	int	intpri_use_cr8;
674 #endif	/* __amd64 */
675 
676 extern int	apic_pci_msi_enable_vector(dev_info_t *, int, int,
677 		    int, int, int);
678 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int);
679 
680 /*
681  *	This is the loadable module wrapper
682  */
683 
684 int
685 _init(void)
686 {
687 	if (apic_coarse_hrtime)
688 		apic_ops.psm_gethrtime = &apic_gettime;
689 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
690 }
691 
692 int
693 _fini(void)
694 {
695 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
696 }
697 
698 int
699 _info(struct modinfo *modinfop)
700 {
701 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
702 }
703 
704 /*
705  * Auto-configuration routines
706  */
707 
708 /*
709  * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here
710  * May work with 1.1 - but not guaranteed.
711  * According to the MP Spec, the MP floating pointer structure
712  * will be searched in the order described below:
713  * 1. In the first kilobyte of Extended BIOS Data Area (EBDA)
714  * 2. Within the last kilobyte of system base memory
715  * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh
716  * Once we find the right signature with proper checksum, we call
717  * either handle_defconf or parse_mpct to get all info necessary for
718  * subsequent operations.
719  */
720 static int
721 apic_probe()
722 {
723 	uint32_t mpct_addr, ebda_start = 0, base_mem_end;
724 	caddr_t	biosdatap;
725 	caddr_t	mpct;
726 	caddr_t	fptr;
727 	int	i, mpct_size, mapsize, retval = PSM_FAILURE;
728 	ushort_t	ebda_seg, base_mem_size;
729 	struct	apic_mpfps_hdr	*fpsp;
730 	struct	apic_mp_cnf_hdr	*hdrp;
731 	int bypass_cpu_and_ioapics_in_mptables;
732 	int acpi_user_options;
733 
734 	if (apic_forceload < 0)
735 		return (retval);
736 
737 	/* Allow override for MADT-only mode */
738 	acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0,
739 	    "acpi-user-options", 0);
740 	apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0);
741 
742 	/* Allow apic_use_acpi to override MADT-only mode */
743 	if (!apic_use_acpi)
744 		apic_use_acpi_madt_only = 0;
745 
746 	retval = acpi_probe();
747 
748 	/*
749 	 * mapin the bios data area 40:0
750 	 * 40:13h - two-byte location reports the base memory size
751 	 * 40:0Eh - two-byte location for the exact starting address of
752 	 *	    the EBDA segment for EISA
753 	 */
754 	biosdatap = psm_map_phys(0x400, 0x20, PROT_READ);
755 	if (!biosdatap)
756 		return (retval);
757 	fpsp = (struct apic_mpfps_hdr *)NULL;
758 	mapsize = MPFPS_RAM_WIN_LEN;
759 	/*LINTED: pointer cast may result in improper alignment */
760 	ebda_seg = *((ushort_t *)(biosdatap+0xe));
761 	/* check the 1k of EBDA */
762 	if (ebda_seg) {
763 		ebda_start = ((uint32_t)ebda_seg) << 4;
764 		fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ);
765 		if (fptr) {
766 			if (!(fpsp =
767 			    apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN)))
768 				psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN);
769 		}
770 	}
771 	/* If not in EBDA, check the last k of system base memory */
772 	if (!fpsp) {
773 		/*LINTED: pointer cast may result in improper alignment */
774 		base_mem_size = *((ushort_t *)(biosdatap + 0x13));
775 
776 		if (base_mem_size > 512)
777 			base_mem_end = 639 * 1024;
778 		else
779 			base_mem_end = 511 * 1024;
780 		/* if ebda == last k of base mem, skip to check BIOS ROM */
781 		if (base_mem_end != ebda_start) {
782 
783 			fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN,
784 			    PROT_READ);
785 
786 			if (fptr) {
787 				if (!(fpsp = apic_find_fps_sig(fptr,
788 				    MPFPS_RAM_WIN_LEN)))
789 					psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN);
790 			}
791 		}
792 	}
793 	psm_unmap_phys(biosdatap, 0x20);
794 
795 	/* If still cannot find it, check the BIOS ROM space */
796 	if (!fpsp) {
797 		mapsize = MPFPS_ROM_WIN_LEN;
798 		fptr = psm_map_phys(MPFPS_ROM_WIN_START,
799 		    MPFPS_ROM_WIN_LEN, PROT_READ);
800 		if (fptr) {
801 			if (!(fpsp =
802 			    apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) {
803 				psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
804 				return (retval);
805 			}
806 		}
807 	}
808 
809 	if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) {
810 		psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
811 		return (retval);
812 	}
813 
814 	apic_spec_rev = fpsp->mpfps_spec_rev;
815 	if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) {
816 		psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
817 		return (retval);
818 	}
819 
820 	/* check IMCR is present or not */
821 	apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP;
822 
823 	/* check default configuration (dual CPUs) */
824 	if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) {
825 		psm_unmap_phys(fptr, mapsize);
826 		return (apic_handle_defconf());
827 	}
828 
829 	/* MP Configuration Table */
830 	mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr);
831 
832 	psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */
833 
834 	/*
835 	 * Map in enough memory for the MP Configuration Table Header.
836 	 * Use this table to read the total length of the BIOS data and
837 	 * map in all the info
838 	 */
839 	/*LINTED: pointer cast may result in improper alignment */
840 	hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr,
841 	    sizeof (struct apic_mp_cnf_hdr), PROT_READ);
842 	if (!hdrp)
843 		return (retval);
844 
845 	/* check mp configuration table signature PCMP */
846 	if (hdrp->mpcnf_sig != 0x504d4350) {
847 		psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
848 		return (retval);
849 	}
850 	mpct_size = (int)hdrp->mpcnf_tbl_length;
851 
852 	apic_set_pwroff_method_from_mpcnfhdr(hdrp);
853 
854 	psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
855 
856 	if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) {
857 		/* This is an ACPI machine No need for further checks */
858 		return (retval);
859 	}
860 
861 	/*
862 	 * Map in the entries for this machine, ie. Processor
863 	 * Entry Tables, Bus Entry Tables, etc.
864 	 * They are in fixed order following one another
865 	 */
866 	mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ);
867 	if (!mpct)
868 		return (retval);
869 
870 	if (apic_checksum(mpct, mpct_size) != 0)
871 		goto apic_fail1;
872 
873 
874 	/*LINTED: pointer cast may result in improper alignment */
875 	hdrp = (struct apic_mp_cnf_hdr *)mpct;
876 	/*LINTED: pointer cast may result in improper alignment */
877 	apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic,
878 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
879 	if (!apicadr)
880 		goto apic_fail1;
881 
882 	/* Parse all information in the tables */
883 	bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS);
884 	if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) ==
885 	    PSM_SUCCESS)
886 		return (PSM_SUCCESS);
887 
888 	for (i = 0; i < apic_io_max; i++)
889 		psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
890 	if (apic_cpus)
891 		kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc);
892 	if (apicadr)
893 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
894 apic_fail1:
895 	psm_unmap_phys(mpct, mpct_size);
896 	return (retval);
897 }
898 
899 static void
900 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp)
901 {
902 	int	i;
903 
904 	for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0]));
905 	    i++) {
906 		if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id,
907 		    strlen(apic_mps_ids[i].oem_id)) == 0) &&
908 		    (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id,
909 		    strlen(apic_mps_ids[i].prod_id)) == 0)) {
910 
911 			apic_poweroff_method = apic_mps_ids[i].poweroff_method;
912 			break;
913 		}
914 	}
915 
916 	if (apic_debug_mps_id != 0) {
917 		cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'"
918 		    "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n",
919 		    hdrp->mpcnf_oem_str[0],
920 		    hdrp->mpcnf_oem_str[1],
921 		    hdrp->mpcnf_oem_str[2],
922 		    hdrp->mpcnf_oem_str[3],
923 		    hdrp->mpcnf_oem_str[4],
924 		    hdrp->mpcnf_oem_str[5],
925 		    hdrp->mpcnf_oem_str[6],
926 		    hdrp->mpcnf_oem_str[7],
927 		    hdrp->mpcnf_prod_str[0],
928 		    hdrp->mpcnf_prod_str[1],
929 		    hdrp->mpcnf_prod_str[2],
930 		    hdrp->mpcnf_prod_str[3],
931 		    hdrp->mpcnf_prod_str[4],
932 		    hdrp->mpcnf_prod_str[5],
933 		    hdrp->mpcnf_prod_str[6],
934 		    hdrp->mpcnf_prod_str[7],
935 		    hdrp->mpcnf_prod_str[8],
936 		    hdrp->mpcnf_prod_str[9],
937 		    hdrp->mpcnf_prod_str[10],
938 		    hdrp->mpcnf_prod_str[11]);
939 	}
940 }
941 
942 static int
943 acpi_probe(void)
944 {
945 	int			i, id, intmax, ver, index, rv;
946 	int			acpi_verboseflags = 0;
947 	int			madt_seen, madt_size;
948 	APIC_HEADER		*ap;
949 	MADT_PROCESSOR_APIC	*mpa;
950 	MADT_IO_APIC		*mia;
951 	MADT_IO_SAPIC		*misa;
952 	MADT_INTERRUPT_OVERRIDE	*mio;
953 	MADT_NMI_SOURCE		*mns;
954 	MADT_INTERRUPT_SOURCE	*mis;
955 	MADT_LOCAL_APIC_NMI	*mlan;
956 	MADT_ADDRESS_OVERRIDE	*mao;
957 	ACPI_OBJECT_LIST 	arglist;
958 	ACPI_OBJECT		arg;
959 	int			sci;
960 	iflag_t			sci_flags;
961 	volatile int32_t	*ioapic;
962 	char			local_ids[NCPU];
963 	char			proc_ids[NCPU];
964 	uchar_t			hid;
965 
966 	if (!apic_use_acpi)
967 		return (PSM_FAILURE);
968 
969 	if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING,
970 	    (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK)
971 		return (PSM_FAILURE);
972 
973 	apicadr = (uint32_t *)psm_map_phys(
974 	    (uint32_t)acpi_mapic_dtp->LocalApicAddress,
975 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
976 	if (!apicadr)
977 		return (PSM_FAILURE);
978 
979 	id = apicadr[APIC_LID_REG];
980 	local_ids[0] = (uchar_t)(((uint_t)id) >> 24);
981 	apic_nproc = index = 1;
982 	apic_io_max = 0;
983 
984 	ap = (APIC_HEADER *) (acpi_mapic_dtp + 1);
985 	madt_size = acpi_mapic_dtp->Length;
986 	madt_seen = sizeof (*acpi_mapic_dtp);
987 
988 	while (madt_seen < madt_size) {
989 		switch (ap->Type) {
990 		case APIC_PROCESSOR:
991 			mpa = (MADT_PROCESSOR_APIC *) ap;
992 			if (mpa->ProcessorEnabled) {
993 				if (mpa->LocalApicId == local_ids[0])
994 					proc_ids[0] = mpa->ProcessorId;
995 				else if (apic_nproc < NCPU) {
996 					local_ids[index] = mpa->LocalApicId;
997 					proc_ids[index] = mpa->ProcessorId;
998 					index++;
999 					apic_nproc++;
1000 				} else
1001 					cmn_err(CE_WARN, "pcplusmp: exceeded "
1002 					    "maximum no. of CPUs (= %d)", NCPU);
1003 			}
1004 			break;
1005 
1006 		case APIC_IO:
1007 			mia = (MADT_IO_APIC *) ap;
1008 			if (apic_io_max < MAX_IO_APIC) {
1009 				apic_io_id[apic_io_max] = mia->IoApicId;
1010 				apic_io_vectbase[apic_io_max] =
1011 				    mia->Interrupt;
1012 				ioapic = apicioadr[apic_io_max] =
1013 				    (int32_t *)psm_map_phys(
1014 				    (uint32_t)mia->Address,
1015 				    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1016 				if (!ioapic)
1017 					goto cleanup;
1018 				apic_io_max++;
1019 			}
1020 			break;
1021 
1022 		case APIC_XRUPT_OVERRIDE:
1023 			mio = (MADT_INTERRUPT_OVERRIDE *) ap;
1024 			if (acpi_isop == NULL)
1025 				acpi_isop = mio;
1026 			acpi_iso_cnt++;
1027 			break;
1028 
1029 		case APIC_NMI:
1030 			/* UNIMPLEMENTED */
1031 			mns = (MADT_NMI_SOURCE *) ap;
1032 			if (acpi_nmi_sp == NULL)
1033 				acpi_nmi_sp = mns;
1034 			acpi_nmi_scnt++;
1035 
1036 			cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n",
1037 				mns->Interrupt, mns->Polarity,
1038 				mns->TriggerMode);
1039 			break;
1040 
1041 		case APIC_LOCAL_NMI:
1042 			/* UNIMPLEMENTED */
1043 			mlan = (MADT_LOCAL_APIC_NMI *) ap;
1044 			if (acpi_nmi_cp == NULL)
1045 				acpi_nmi_cp = mlan;
1046 			acpi_nmi_ccnt++;
1047 
1048 			cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n",
1049 				mlan->ProcessorId, mlan->Polarity,
1050 				mlan->TriggerMode, mlan->Lint);
1051 			break;
1052 
1053 		case APIC_ADDRESS_OVERRIDE:
1054 			/* UNIMPLEMENTED */
1055 			mao = (MADT_ADDRESS_OVERRIDE *) ap;
1056 			cmn_err(CE_NOTE, "!apic: address override: %lx\n",
1057 				(long)mao->Address);
1058 			break;
1059 
1060 		case APIC_IO_SAPIC:
1061 			/* UNIMPLEMENTED */
1062 			misa = (MADT_IO_SAPIC *) ap;
1063 
1064 			cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n",
1065 				misa->IoSapicId, misa->InterruptBase,
1066 				(long)misa->Address);
1067 			break;
1068 
1069 		case APIC_XRUPT_SOURCE:
1070 			/* UNIMPLEMENTED */
1071 			mis = (MADT_INTERRUPT_SOURCE *) ap;
1072 
1073 			cmn_err(CE_NOTE,
1074 				"!apic: irq source: %d %d %d %d %d %d %d\n",
1075 				mis->ProcessorId, mis->ProcessorEid,
1076 				mis->Interrupt, mis->Polarity,
1077 				mis->TriggerMode, mis->InterruptType,
1078 				mis->IoSapicVector);
1079 			break;
1080 		case APIC_RESERVED:
1081 		default:
1082 			goto cleanup;
1083 		}
1084 
1085 		/* advance to next entry */
1086 		madt_seen += ap->Length;
1087 		ap = (APIC_HEADER *)(((char *)ap) + ap->Length);
1088 	}
1089 
1090 	if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc,
1091 	    KM_NOSLEEP)) == NULL)
1092 		goto cleanup;
1093 
1094 	apic_cpumask = (1 << apic_nproc) - 1;
1095 
1096 	/*
1097 	 * ACPI doesn't provide the local apic ver, get it directly from the
1098 	 * local apic
1099 	 */
1100 	ver = apicadr[APIC_VERS_REG];
1101 	for (i = 0; i < apic_nproc; i++) {
1102 		apic_cpus[i].aci_local_id = local_ids[i];
1103 		apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
1104 	}
1105 	for (i = 0; i < apic_io_max; i++) {
1106 		ioapic = apicioadr[i];
1107 
1108 		/*
1109 		 * need to check Sitka on the following acpi problem
1110 		 * On the Sitka, the ioapic's apic_id field isn't reporting
1111 		 * the actual io apic id. We have reported this problem
1112 		 * to Intel. Until they fix the problem, we will get the
1113 		 * actual id directly from the ioapic.
1114 		 */
1115 		ioapic[APIC_IO_REG] = APIC_ID_CMD;
1116 		id = ioapic[APIC_IO_DATA];
1117 		hid = (uchar_t)(((uint_t)id) >> 24);
1118 
1119 		if (hid != apic_io_id[i]) {
1120 			if (apic_io_id[i] == 0)
1121 				apic_io_id[i] = hid;
1122 			else { /* set ioapic id to whatever reported by ACPI */
1123 				id = ((int32_t)apic_io_id[i]) << 24;
1124 				ioapic[APIC_IO_REG] = APIC_ID_CMD;
1125 				ioapic[APIC_IO_DATA] = id;
1126 			}
1127 		}
1128 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
1129 		ver = ioapic[APIC_IO_DATA];
1130 		apic_io_ver[i] = (uchar_t)(ver & 0xff);
1131 		intmax = (ver >> 16) & 0xff;
1132 		apic_io_vectend[i] = apic_io_vectbase[i] + intmax;
1133 	}
1134 
1135 
1136 	/*
1137 	 * Process SCI configuration here
1138 	 * An error may be returned here if
1139 	 * acpi-user-options specifies legacy mode
1140 	 * (no SCI, no ACPI mode)
1141 	 */
1142 	if (acpica_get_sci(&sci, &sci_flags) != AE_OK)
1143 		sci = -1;
1144 
1145 	/*
1146 	 * Now call acpi_init() to generate namespaces
1147 	 * If this fails, we don't attempt to use ACPI
1148 	 * even if we were able to get a MADT above
1149 	 */
1150 	if (acpica_init() != AE_OK)
1151 		goto cleanup;
1152 
1153 	/*
1154 	 * Squirrel away the SCI and flags for later on
1155 	 * in apic_picinit() when we're ready
1156 	 */
1157 	apic_sci_vect = sci;
1158 	apic_sci_flags = sci_flags;
1159 
1160 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG)
1161 		acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG;
1162 
1163 	if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG)
1164 		acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG;
1165 
1166 	if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG)
1167 		acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG;
1168 
1169 	if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) ==
1170 	    ACPI_PSM_FAILURE)
1171 		goto cleanup;
1172 
1173 	/* Enable ACPI APIC interrupt routing */
1174 	arglist.Count = 1;
1175 	arglist.Pointer = &arg;
1176 	arg.Type = ACPI_TYPE_INTEGER;
1177 	arg.Integer.Value = ACPI_APIC_MODE;	/* 1 */
1178 	rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL);
1179 	if (rv == AE_OK) {
1180 		build_reserved_irqlist((uchar_t *)apic_reserved_irqlist);
1181 		apic_enable_acpi = 1;
1182 		if (apic_use_acpi_madt_only) {
1183 			cmn_err(CE_CONT,
1184 			    "?Using ACPI for CPU/IOAPIC information ONLY\n");
1185 		}
1186 		return (PSM_SUCCESS);
1187 	}
1188 	/* if setting APIC mode failed above, we fall through to cleanup */
1189 
1190 cleanup:
1191 	if (apicadr != NULL) {
1192 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
1193 		apicadr = NULL;
1194 	}
1195 	apic_nproc = 0;
1196 	for (i = 0; i < apic_io_max; i++) {
1197 		psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
1198 		apicioadr[i] = NULL;
1199 	}
1200 	apic_io_max = 0;
1201 	acpi_isop = NULL;
1202 	acpi_iso_cnt = 0;
1203 	acpi_nmi_sp = NULL;
1204 	acpi_nmi_scnt = 0;
1205 	acpi_nmi_cp = NULL;
1206 	acpi_nmi_ccnt = 0;
1207 	return (PSM_FAILURE);
1208 }
1209 
1210 /*
1211  * Handle default configuration. Fill in reqd global variables & tables
1212  * Fill all details as MP table does not give any more info
1213  */
1214 static int
1215 apic_handle_defconf()
1216 {
1217 	uint_t	lid;
1218 
1219 	/*LINTED: pointer cast may result in improper alignment */
1220 	apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR,
1221 	    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1222 	/*LINTED: pointer cast may result in improper alignment */
1223 	apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR,
1224 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
1225 	apic_cpus = (apic_cpus_info_t *)
1226 	    kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP);
1227 	if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus))
1228 		goto apic_handle_defconf_fail;
1229 	apic_cpumask = 3;
1230 	apic_nproc = 2;
1231 	lid = apicadr[APIC_LID_REG];
1232 	apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET);
1233 	/*
1234 	 * According to the PC+MP spec 1.1, the local ids
1235 	 * for the default configuration has to be 0 or 1
1236 	 */
1237 	if (apic_cpus[0].aci_local_id == 1)
1238 		apic_cpus[1].aci_local_id = 0;
1239 	else if (apic_cpus[0].aci_local_id == 0)
1240 		apic_cpus[1].aci_local_id = 1;
1241 	else
1242 		goto apic_handle_defconf_fail;
1243 
1244 	apic_io_id[0] = 2;
1245 	apic_io_max = 1;
1246 	if (apic_defconf >= 5) {
1247 		apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS;
1248 		apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS;
1249 		apic_io_ver[0] = APIC_INTEGRATED_VERS;
1250 	} else {
1251 		apic_cpus[0].aci_local_ver = 0;		/* 82489 DX */
1252 		apic_cpus[1].aci_local_ver = 0;
1253 		apic_io_ver[0] = 0;
1254 	}
1255 	if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6)
1256 		eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) |
1257 		    inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1);
1258 	return (PSM_SUCCESS);
1259 
1260 apic_handle_defconf_fail:
1261 	if (apic_cpus)
1262 		kmem_free(apic_cpus, sizeof (*apic_cpus) * 2);
1263 	if (apicadr)
1264 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
1265 	if (apicioadr[0])
1266 		psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN);
1267 	return (PSM_FAILURE);
1268 }
1269 
1270 /* Parse the entries in MP configuration table and collect info that we need */
1271 static int
1272 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
1273 {
1274 	struct	apic_procent	*procp;
1275 	struct	apic_bus	*busp;
1276 	struct	apic_io_entry	*ioapicp;
1277 	struct	apic_io_intr	*intrp;
1278 	volatile int32_t	*ioapic;
1279 	uint_t	lid;
1280 	int	id;
1281 	uchar_t hid;
1282 
1283 	/*LINTED: pointer cast may result in improper alignment */
1284 	procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
1285 
1286 	/* No need to count cpu entries if we won't use them */
1287 	if (!bypass_cpus_and_ioapics) {
1288 
1289 		/* Find max # of CPUS and allocate structure accordingly */
1290 		apic_nproc = 0;
1291 		while (procp->proc_entry == APIC_CPU_ENTRY) {
1292 			if (procp->proc_cpuflags & CPUFLAGS_EN) {
1293 				apic_nproc++;
1294 			}
1295 			procp++;
1296 		}
1297 		if (apic_nproc > NCPU)
1298 			cmn_err(CE_WARN, "pcplusmp: exceeded "
1299 			    "maximum no. of CPUs (= %d)", NCPU);
1300 		if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *)
1301 		    kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP)))
1302 			return (PSM_FAILURE);
1303 	}
1304 
1305 	/*LINTED: pointer cast may result in improper alignment */
1306 	procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
1307 
1308 	/*
1309 	 * start with index 1 as 0 needs to be filled in with Boot CPU, but
1310 	 * if we're bypassing this information, it has already been filled
1311 	 * in by acpi_probe(), so don't overwrite it.
1312 	 */
1313 	if (!bypass_cpus_and_ioapics)
1314 		apic_nproc = 1;
1315 
1316 	while (procp->proc_entry == APIC_CPU_ENTRY) {
1317 		/* check whether the cpu exists or not */
1318 		if (!bypass_cpus_and_ioapics &&
1319 		    procp->proc_cpuflags & CPUFLAGS_EN) {
1320 			if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */
1321 				lid = apicadr[APIC_LID_REG];
1322 				apic_cpus[0].aci_local_id = procp->proc_apicid;
1323 				if (apic_cpus[0].aci_local_id !=
1324 				    (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) {
1325 					return (PSM_FAILURE);
1326 				}
1327 				apic_cpus[0].aci_local_ver =
1328 				    procp->proc_version;
1329 			} else {
1330 
1331 				apic_cpus[apic_nproc].aci_local_id =
1332 				    procp->proc_apicid;
1333 				apic_cpus[apic_nproc].aci_local_ver =
1334 				    procp->proc_version;
1335 				apic_nproc++;
1336 
1337 			}
1338 		}
1339 		procp++;
1340 	}
1341 
1342 	if (!bypass_cpus_and_ioapics) {
1343 		/* convert the number of processors into a cpumask */
1344 		apic_cpumask = (1 << apic_nproc) - 1;
1345 	}
1346 
1347 	/*
1348 	 * Save start of bus entries for later use.
1349 	 * Get EISA level cntrl if EISA bus is present.
1350 	 * Also get the CPI bus id for single CPI bus case
1351 	 */
1352 	apic_busp = busp = (struct apic_bus *)procp;
1353 	while (busp->bus_entry == APIC_BUS_ENTRY) {
1354 		lid = apic_find_bus_type((char *)&busp->bus_str1);
1355 		if (lid	== BUS_EISA) {
1356 			eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) |
1357 			    inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1);
1358 		} else if (lid == BUS_PCI) {
1359 			/*
1360 			 * apic_single_pci_busid will be used only if
1361 			 * apic_pic_bus_total is equal to 1
1362 			 */
1363 			apic_pci_bus_total++;
1364 			apic_single_pci_busid = busp->bus_id;
1365 		}
1366 		busp++;
1367 	}
1368 
1369 	ioapicp = (struct apic_io_entry *)busp;
1370 
1371 	if (!bypass_cpus_and_ioapics)
1372 		apic_io_max = 0;
1373 	do {
1374 		if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) {
1375 			if (ioapicp->io_flags & IOAPIC_FLAGS_EN) {
1376 				apic_io_id[apic_io_max] = ioapicp->io_apicid;
1377 				apic_io_ver[apic_io_max] = ioapicp->io_version;
1378 		/*LINTED: pointer cast may result in improper alignment */
1379 				apicioadr[apic_io_max] =
1380 				    (int32_t *)psm_map_phys(
1381 				    (uint32_t)ioapicp->io_apic_addr,
1382 				    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1383 
1384 				if (!apicioadr[apic_io_max])
1385 					return (PSM_FAILURE);
1386 
1387 				ioapic = apicioadr[apic_io_max];
1388 				ioapic[APIC_IO_REG] = APIC_ID_CMD;
1389 				id = ioapic[APIC_IO_DATA];
1390 				hid = (uchar_t)(((uint_t)id) >> 24);
1391 
1392 				if (hid != apic_io_id[apic_io_max]) {
1393 					if (apic_io_id[apic_io_max] == 0)
1394 						apic_io_id[apic_io_max] = hid;
1395 					else {
1396 						/*
1397 						 * set ioapic id to whatever
1398 						 * reported by MPS
1399 						 *
1400 						 * may not need to set index
1401 						 * again ???
1402 						 * take it out and try
1403 						 */
1404 
1405 						id = ((int32_t)
1406 						    apic_io_id[apic_io_max]) <<
1407 						    24;
1408 
1409 						ioapic[APIC_IO_REG] =
1410 						    APIC_ID_CMD;
1411 
1412 						ioapic[APIC_IO_DATA] = id;
1413 
1414 					}
1415 				}
1416 				apic_io_max++;
1417 			}
1418 		}
1419 		ioapicp++;
1420 	} while (ioapicp->io_entry == APIC_IO_ENTRY);
1421 
1422 	apic_io_intrp = (struct apic_io_intr *)ioapicp;
1423 
1424 	intrp = apic_io_intrp;
1425 	while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
1426 		if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) ||
1427 		    (apic_find_bus(intrp->intr_busid) == BUS_PCI)) {
1428 			apic_irq_translate = 1;
1429 			break;
1430 		}
1431 		intrp++;
1432 	}
1433 
1434 	return (PSM_SUCCESS);
1435 }
1436 
1437 static struct apic_mpfps_hdr *
1438 apic_find_fps_sig(caddr_t cptr, int len)
1439 {
1440 	int	i;
1441 
1442 	/* Look for the pattern "_MP_" */
1443 	for (i = 0; i < len; i += 16) {
1444 		if ((*(cptr+i) == '_') &&
1445 		    (*(cptr+i+1) == 'M') &&
1446 		    (*(cptr+i+2) == 'P') &&
1447 		    (*(cptr+i+3) == '_'))
1448 		    /*LINTED: pointer cast may result in improper alignment */
1449 			return ((struct apic_mpfps_hdr *)(cptr + i));
1450 	}
1451 	return (NULL);
1452 }
1453 
1454 static int
1455 apic_checksum(caddr_t bptr, int len)
1456 {
1457 	int	i;
1458 	uchar_t	cksum;
1459 
1460 	cksum = 0;
1461 	for (i = 0; i < len; i++)
1462 		cksum += *bptr++;
1463 	return ((int)cksum);
1464 }
1465 
1466 
1467 /*
1468  * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable
1469  * are also set to NULL. vector->irq is set to a value which cannot map
1470  * to a real irq to show that it is free.
1471  */
1472 void
1473 apic_init()
1474 {
1475 	int	i;
1476 	int	*iptr;
1477 
1478 	int	j = 1;
1479 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
1480 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
1481 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
1482 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
1483 			/* get to highest vector at the same ipl */
1484 			continue;
1485 		for (; j <= apic_vectortoipl[i]; j++) {
1486 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
1487 			    APIC_BASE_VECT;
1488 		}
1489 	}
1490 	for (; j < MAXIPL + 1; j++)
1491 		/* fill up any empty ipltopri slots */
1492 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
1493 
1494 	/* cpu 0 is always up */
1495 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
1496 
1497 	iptr = (int *)&apic_irq_table[0];
1498 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
1499 		apic_level_intr[i] = 0;
1500 		*iptr++ = NULL;
1501 		apic_vector_to_irq[i] = APIC_RESV_IRQ;
1502 		apic_reprogram_info[i].valid = 0;
1503 		apic_reprogram_info[i].bindcpu = 0;
1504 		apic_reprogram_info[i].timeouts = 0;
1505 	}
1506 
1507 	/*
1508 	 * Allocate a dummy irq table entry for the reserved entry.
1509 	 * This takes care of the race between removing an irq and
1510 	 * clock detecting a CPU in that irq during interrupt load
1511 	 * sampling.
1512 	 */
1513 	apic_irq_table[APIC_RESV_IRQ] =
1514 	    kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
1515 
1516 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
1517 	mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL);
1518 #if defined(__amd64)
1519 	/*
1520 	 * Make cpu-specific interrupt info point to cr8pri vector
1521 	 */
1522 	for (i = 0; i <= MAXIPL; i++)
1523 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
1524 	CPU->cpu_pri_data = apic_cr8pri;
1525 	intpri_use_cr8 = 1;
1526 #endif	/* __amd64 */
1527 }
1528 
1529 /*
1530  * handler for APIC Error interrupt. Just print a warning and continue
1531  */
1532 static int
1533 apic_error_intr()
1534 {
1535 	uint_t	error0, error1, error;
1536 	uint_t	i;
1537 
1538 	/*
1539 	 * We need to write before read as per 7.4.17 of system prog manual.
1540 	 * We do both and or the results to be safe
1541 	 */
1542 	error0 = apicadr[APIC_ERROR_STATUS];
1543 	apicadr[APIC_ERROR_STATUS] = 0;
1544 	error1 = apicadr[APIC_ERROR_STATUS];
1545 	error = error0 | error1;
1546 
1547 	/*
1548 	 * Prevent more than 1 CPU from handling error interrupt causing
1549 	 * double printing (interleave of characters from multiple
1550 	 * CPU's when using prom_printf)
1551 	 */
1552 	if (lock_try(&apic_error_lock) == 0)
1553 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
1554 	if (error) {
1555 #if	DEBUG
1556 		if (apic_debug)
1557 			debug_enter("pcplusmp: APIC Error interrupt received");
1558 #endif /* DEBUG */
1559 		if (apic_panic_on_apic_error)
1560 			cmn_err(CE_PANIC,
1561 			    "APIC Error interrupt on CPU %d. Status = %x\n",
1562 			    psm_get_cpu_id(), error);
1563 		else {
1564 			if ((error & ~APIC_CS_ERRORS) == 0) {
1565 				/* cksum error only */
1566 				apic_error |= APIC_ERR_APIC_ERROR;
1567 				apic_apic_error |= error;
1568 				apic_num_apic_errors++;
1569 				apic_num_cksum_errors++;
1570 			} else {
1571 				/*
1572 				 * prom_printf is the best shot we have of
1573 				 * something which is problem free from
1574 				 * high level/NMI type of interrupts
1575 				 */
1576 				prom_printf("APIC Error interrupt on CPU %d. "
1577 				    "Status 0 = %x, Status 1 = %x\n",
1578 				    psm_get_cpu_id(), error0, error1);
1579 				apic_error |= APIC_ERR_APIC_ERROR;
1580 				apic_apic_error |= error;
1581 				apic_num_apic_errors++;
1582 				for (i = 0; i < apic_error_display_delay; i++) {
1583 					tenmicrosec();
1584 				}
1585 				/*
1586 				 * provide more delay next time limited to
1587 				 * roughly 1 clock tick time
1588 				 */
1589 				if (apic_error_display_delay < 500)
1590 					apic_error_display_delay *= 2;
1591 			}
1592 		}
1593 		lock_clear(&apic_error_lock);
1594 		return (DDI_INTR_CLAIMED);
1595 	} else {
1596 		lock_clear(&apic_error_lock);
1597 		return (DDI_INTR_UNCLAIMED);
1598 	}
1599 	/* NOTREACHED */
1600 }
1601 
1602 /*
1603  * Turn off the mask bit in the performance counter Local Vector Table entry.
1604  */
1605 static void
1606 apic_cpcovf_mask_clear(void)
1607 {
1608 	apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK;
1609 }
1610 
1611 static void
1612 apic_init_intr()
1613 {
1614 	processorid_t	cpun = psm_get_cpu_id();
1615 
1616 #if defined(__amd64)
1617 	setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT));
1618 #else
1619 	apicadr[APIC_TASK_REG] = APIC_MASK_ALL;
1620 #endif
1621 
1622 	if (apic_flat_model)
1623 		apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL;
1624 	else
1625 		apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL;
1626 	apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun;
1627 
1628 	/* need to enable APIC before unmasking NMI */
1629 	apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR;
1630 
1631 	apicadr[APIC_LOCAL_TIMER] = AV_MASK;
1632 	apicadr[APIC_INT_VECT0]	= AV_MASK;	/* local intr reg 0 */
1633 	apicadr[APIC_INT_VECT1] = AV_NMI;	/* enable NMI */
1634 
1635 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS)
1636 		return;
1637 
1638 	/* Enable performance counter overflow interrupt */
1639 
1640 	if ((x86_feature & X86_MSR) != X86_MSR)
1641 		apic_enable_cpcovf_intr = 0;
1642 	if (apic_enable_cpcovf_intr) {
1643 		if (apic_cpcovf_vect == 0) {
1644 			int ipl = APIC_PCINT_IPL;
1645 			int irq = apic_get_ipivect(ipl, -1);
1646 
1647 			ASSERT(irq != -1);
1648 			apic_cpcovf_vect = apic_irq_table[irq]->airq_vector;
1649 			ASSERT(apic_cpcovf_vect);
1650 			(void) add_avintr(NULL, ipl,
1651 			    (avfunc)kcpc_hw_overflow_intr,
1652 			    "apic pcint", irq, NULL, NULL, NULL);
1653 			kcpc_hw_overflow_intr_installed = 1;
1654 			kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear;
1655 		}
1656 		apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect;
1657 	}
1658 
1659 	/* Enable error interrupt */
1660 
1661 	if (apic_enable_error_intr) {
1662 		if (apic_errvect == 0) {
1663 			int ipl = 0xf;	/* get highest priority intr */
1664 			int irq = apic_get_ipivect(ipl, -1);
1665 
1666 			ASSERT(irq != -1);
1667 			apic_errvect = apic_irq_table[irq]->airq_vector;
1668 			ASSERT(apic_errvect);
1669 			/*
1670 			 * Not PSMI compliant, but we are going to merge
1671 			 * with ON anyway
1672 			 */
1673 			(void) add_avintr((void *)NULL, ipl,
1674 			    (avfunc)apic_error_intr, "apic error intr",
1675 			    irq, NULL, NULL, NULL);
1676 		}
1677 		apicadr[APIC_ERR_VECT] = apic_errvect;
1678 		apicadr[APIC_ERROR_STATUS] = 0;
1679 		apicadr[APIC_ERROR_STATUS] = 0;
1680 	}
1681 }
1682 
1683 static void
1684 apic_disable_local_apic()
1685 {
1686 	apicadr[APIC_TASK_REG] = APIC_MASK_ALL;
1687 	apicadr[APIC_LOCAL_TIMER] = AV_MASK;
1688 	apicadr[APIC_INT_VECT0] = AV_MASK;	/* local intr reg 0 */
1689 	apicadr[APIC_INT_VECT1] = AV_MASK;	/* disable NMI */
1690 	apicadr[APIC_ERR_VECT] = AV_MASK;	/* and error interrupt */
1691 	apicadr[APIC_PCINT_VECT] = AV_MASK;	/* and perf counter intr */
1692 	apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR;
1693 }
1694 
1695 static void
1696 apic_picinit(void)
1697 {
1698 	int i, j;
1699 	uint_t isr;
1700 	volatile int32_t *ioapic;
1701 	apic_irq_t	*irqptr;
1702 
1703 	/*
1704 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
1705 	 * bit on without clearing it with EOI.  Since softint
1706 	 * uses vector 0x20 to interrupt itself, so softint will
1707 	 * not work on this machine.  In order to fix this problem
1708 	 * a check is made to verify all the isr bits are clear.
1709 	 * If not, EOIs are issued to clear the bits.
1710 	 */
1711 	for (i = 7; i >= 1; i--) {
1712 		if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0)
1713 			for (j = 0; ((j < 32) && (isr != 0)); j++)
1714 				if (isr & (1 << j)) {
1715 					apicadr[APIC_EOI_REG] = 0;
1716 					isr &= ~(1 << j);
1717 					apic_error |= APIC_ERR_BOOT_EOI;
1718 				}
1719 	}
1720 
1721 	/* set a flag so we know we have run apic_picinit() */
1722 	apic_flag = 1;
1723 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
1724 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
1725 	LOCK_INIT_CLEAR(&apic_revector_lock);
1726 	LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock);
1727 	LOCK_INIT_CLEAR(&apic_error_lock);
1728 
1729 	picsetup();	 /* initialise the 8259 */
1730 
1731 	/* add nmi handler - least priority nmi handler */
1732 	LOCK_INIT_CLEAR(&apic_nmi_lock);
1733 
1734 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
1735 	    "pcplusmp NMI handler", (caddr_t)NULL))
1736 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
1737 
1738 	apic_init_intr();
1739 
1740 	/* enable apic mode if imcr present */
1741 	if (apic_imcrp) {
1742 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1743 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
1744 	}
1745 
1746 	/* mask interrupt vectors					*/
1747 	for (j = 0; j < apic_io_max; j++) {
1748 		int intin_max;
1749 		ioapic = apicioadr[j];
1750 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
1751 		/* Bits 23-16 define the maximum redirection entries */
1752 		intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff;
1753 		for (i = 0; i < intin_max; i++) {
1754 			ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i;
1755 			ioapic[APIC_IO_DATA] = AV_MASK;
1756 		}
1757 	}
1758 
1759 	/*
1760 	 * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1761 	 */
1762 	if (apic_sci_vect >= 0) {
1763 		/*
1764 		 * acpica has already done add_avintr(); we just
1765 		 * to finish the job by mimicing translate_irq()
1766 		 */
1767 		if (apic_setup_sci_irq_table(apic_sci_vect, SCI_IPL,
1768 		    &apic_sci_flags) < 0) {
1769 			cmn_err(CE_WARN, "!apic: SCI setup failed");
1770 			return;
1771 		}
1772 		irqptr = apic_irq_table[apic_sci_vect];
1773 
1774 		/* Assert we're the sole entry in the list */
1775 		ASSERT(irqptr != NULL);
1776 		ASSERT(irqptr->airq_next == NULL);
1777 
1778 		/* Program I/O APIC */
1779 		(void) apic_setup_io_intr(irqptr, apic_sci_vect);
1780 	}
1781 }
1782 
1783 
1784 static void
1785 apic_cpu_start(processorid_t cpun, caddr_t rm_code)
1786 {
1787 	int		loop_count;
1788 	uint32_t	vector;
1789 	uint_t		cpu_id, iflag;
1790 
1791 	cpu_id = apic_cpus[cpun].aci_local_id;
1792 
1793 	apic_cmos_ssb_set = 1;
1794 
1795 	/*
1796 	 * Interrupts on BSP cpu will be disabled during these startup
1797 	 * steps in order to avoid unwanted side effects from
1798 	 * executing interrupt handlers on a problematic BIOS.
1799 	 */
1800 
1801 	iflag = intr_clear();
1802 	outb(CMOS_ADDR, SSB);
1803 	outb(CMOS_DATA, BIOS_SHUTDOWN);
1804 
1805 	while (get_apic_cmd1() & AV_PENDING)
1806 		apic_ret();
1807 
1808 	/* for integrated - make sure there is one INIT IPI in buffer */
1809 	/* for external - it will wake up the cpu */
1810 	apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1811 	apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET;
1812 
1813 	/* If only 1 CPU is installed, PENDING bit will not go low */
1814 	for (loop_count = 0x1000; loop_count; loop_count--)
1815 		if (get_apic_cmd1() & AV_PENDING)
1816 			apic_ret();
1817 		else
1818 			break;
1819 
1820 	apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1821 	apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET;
1822 
1823 	drv_usecwait(20000);		/* 20 milli sec */
1824 
1825 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
1826 		/* integrated apic */
1827 
1828 		rm_code = (caddr_t)(uintptr_t)rm_platter_pa;
1829 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
1830 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
1831 
1832 		/* to offset the INIT IPI queue up in the buffer */
1833 		apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1834 		apicadr[APIC_INT_CMD1] = vector | AV_STARTUP;
1835 
1836 		drv_usecwait(200);		/* 20 micro sec */
1837 
1838 		apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1839 		apicadr[APIC_INT_CMD1] = vector | AV_STARTUP;
1840 
1841 		drv_usecwait(200);		/* 20 micro sec */
1842 	}
1843 	intr_restore(iflag);
1844 }
1845 
1846 
1847 #ifdef	DEBUG
1848 int	apic_break_on_cpu = 9;
1849 int	apic_stretch_interrupts = 0;
1850 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
1851 
1852 void
1853 apic_break()
1854 {
1855 }
1856 #endif /* DEBUG */
1857 
1858 /*
1859  * platform_intr_enter
1860  *
1861  *	Called at the beginning of the interrupt service routine to
1862  *	mask all level equal to and below the interrupt priority
1863  *	of the interrupting vector.  An EOI should be given to
1864  *	the interrupt controller to enable other HW interrupts.
1865  *
1866  *	Return -1 for spurious interrupts
1867  *
1868  */
1869 /*ARGSUSED*/
1870 static int
1871 apic_intr_enter(int ipl, int *vectorp)
1872 {
1873 	uchar_t vector;
1874 	int nipl;
1875 	int irq, iflag;
1876 	apic_cpus_info_t *cpu_infop;
1877 
1878 	/*
1879 	 * The real vector programmed in APIC is *vectorp + 0x20
1880 	 * But, cmnint code subtracts 0x20 before pushing it.
1881 	 * Hence APIC_BASE_VECT is 0x20.
1882 	 */
1883 
1884 	vector = (uchar_t)*vectorp;
1885 
1886 	/* if interrupted by the clock, increment apic_nsec_since_boot */
1887 	if (vector == apic_clkvect) {
1888 		if (!apic_oneshot) {
1889 			/* NOTE: this is not MT aware */
1890 			apic_hrtime_stamp++;
1891 			apic_nsec_since_boot += apic_nsec_per_intr;
1892 			apic_hrtime_stamp++;
1893 			last_count_read = apic_hertz_count;
1894 			apic_redistribute_compute();
1895 		}
1896 
1897 		/* We will avoid all the book keeping overhead for clock */
1898 		nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT];
1899 #if defined(__amd64)
1900 		setcr8((ulong_t)apic_cr8pri[nipl]);
1901 #else
1902 		apicadr[APIC_TASK_REG] = apic_ipltopri[nipl];
1903 #endif
1904 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
1905 		apicadr[APIC_EOI_REG] = 0;
1906 		return (nipl);
1907 	}
1908 
1909 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
1910 
1911 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
1912 		cpu_infop->aci_spur_cnt++;
1913 		return (APIC_INT_SPURIOUS);
1914 	}
1915 
1916 	/* Check if the vector we got is really what we need */
1917 	if (apic_revector_pending) {
1918 		/*
1919 		 * Disable interrupts for the duration of
1920 		 * the vector translation to prevent a self-race for
1921 		 * the apic_revector_lock.  This cannot be done
1922 		 * in apic_xlate_vector because it is recursive and
1923 		 * we want the vector translation to be atomic with
1924 		 * respect to other (higher-priority) interrupts.
1925 		 */
1926 		iflag = intr_clear();
1927 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
1928 		    APIC_BASE_VECT;
1929 		intr_restore(iflag);
1930 	}
1931 
1932 	nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT];
1933 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
1934 
1935 #if defined(__amd64)
1936 	setcr8((ulong_t)apic_cr8pri[nipl]);
1937 #else
1938 	apicadr[APIC_TASK_REG] = apic_ipltopri[nipl];
1939 #endif
1940 
1941 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
1942 	cpu_infop->aci_curipl = (uchar_t)nipl;
1943 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
1944 
1945 	/*
1946 	 * apic_level_intr could have been assimilated into the irq struct.
1947 	 * but, having it as a character array is more efficient in terms of
1948 	 * cache usage. So, we leave it as is.
1949 	 */
1950 	if (!apic_level_intr[irq])
1951 		apicadr[APIC_EOI_REG] = 0;
1952 
1953 #ifdef	DEBUG
1954 	APIC_DEBUG_BUF_PUT(vector);
1955 	APIC_DEBUG_BUF_PUT(irq);
1956 	APIC_DEBUG_BUF_PUT(nipl);
1957 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
1958 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
1959 		drv_usecwait(apic_stretch_interrupts);
1960 
1961 	if (apic_break_on_cpu == psm_get_cpu_id())
1962 		apic_break();
1963 #endif /* DEBUG */
1964 	return (nipl);
1965 }
1966 
1967 static void
1968 apic_intr_exit(int prev_ipl, int irq)
1969 {
1970 	apic_cpus_info_t *cpu_infop;
1971 
1972 #if defined(__amd64)
1973 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
1974 #else
1975 	apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
1976 #endif
1977 
1978 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
1979 	if (apic_level_intr[irq])
1980 		apicadr[APIC_EOI_REG] = 0;
1981 
1982 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
1983 	/* ISR above current pri could not be in progress */
1984 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
1985 }
1986 
1987 /*
1988  * Mask all interrupts below or equal to the given IPL
1989  */
1990 static void
1991 apic_setspl(int ipl)
1992 {
1993 
1994 #if defined(__amd64)
1995 	setcr8((ulong_t)apic_cr8pri[ipl]);
1996 #else
1997 	apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
1998 #endif
1999 
2000 	/* interrupts at ipl above this cannot be in progress */
2001 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
2002 	/*
2003 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
2004 	 * have enough time to come in before the priority is raised again
2005 	 * during the idle() loop.
2006 	 */
2007 	if (apic_setspl_delay)
2008 		(void) get_apic_pri();
2009 }
2010 
2011 /*
2012  * trigger a software interrupt at the given IPL
2013  */
2014 static void
2015 apic_set_softintr(int ipl)
2016 {
2017 	int vector;
2018 	uint_t flag;
2019 
2020 	vector = apic_resv_vector[ipl];
2021 
2022 	flag = intr_clear();
2023 
2024 	while (get_apic_cmd1() & AV_PENDING)
2025 		apic_ret();
2026 
2027 	/* generate interrupt at vector on itself only */
2028 	apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector;
2029 
2030 	intr_restore(flag);
2031 }
2032 
2033 /*
2034  * generates an interprocessor interrupt to another CPU
2035  */
2036 static void
2037 apic_send_ipi(int cpun, int ipl)
2038 {
2039 	int vector;
2040 	uint_t flag;
2041 
2042 	vector = apic_resv_vector[ipl];
2043 
2044 	flag = intr_clear();
2045 
2046 	while (get_apic_cmd1() & AV_PENDING)
2047 		apic_ret();
2048 
2049 	apicadr[APIC_INT_CMD2] =
2050 	    apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET;
2051 	apicadr[APIC_INT_CMD1] = vector;
2052 
2053 	intr_restore(flag);
2054 }
2055 
2056 
2057 /*ARGSUSED*/
2058 static void
2059 apic_set_idlecpu(processorid_t cpun)
2060 {
2061 }
2062 
2063 /*ARGSUSED*/
2064 static void
2065 apic_unset_idlecpu(processorid_t cpun)
2066 {
2067 }
2068 
2069 
2070 static void
2071 apic_ret()
2072 {
2073 }
2074 
2075 static int
2076 get_apic_cmd1()
2077 {
2078 	return (apicadr[APIC_INT_CMD1]);
2079 }
2080 
2081 static int
2082 get_apic_pri()
2083 {
2084 #if defined(__amd64)
2085 	return ((int)getcr8());
2086 #else
2087 	return (apicadr[APIC_TASK_REG]);
2088 #endif
2089 }
2090 
2091 /*
2092  * If apic_coarse_time == 1, then apic_gettime() is used instead of
2093  * apic_gethrtime().  This is used for performance instead of accuracy.
2094  */
2095 
2096 static hrtime_t
2097 apic_gettime()
2098 {
2099 	int old_hrtime_stamp;
2100 	hrtime_t temp;
2101 
2102 	/*
2103 	 * In one-shot mode, we do not keep time, so if anyone
2104 	 * calls psm_gettime() directly, we vector over to
2105 	 * gethrtime().
2106 	 * one-shot mode MUST NOT be enabled if this psm is the source of
2107 	 * hrtime.
2108 	 */
2109 
2110 	if (apic_oneshot)
2111 		return (gethrtime());
2112 
2113 
2114 gettime_again:
2115 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
2116 		apic_ret();
2117 
2118 	temp = apic_nsec_since_boot;
2119 
2120 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
2121 		goto gettime_again;
2122 	}
2123 	return (temp);
2124 }
2125 
2126 /*
2127  * Here we return the number of nanoseconds since booting.  Note every
2128  * clock interrupt increments apic_nsec_since_boot by the appropriate
2129  * amount.
2130  */
2131 static hrtime_t
2132 apic_gethrtime()
2133 {
2134 	int curr_timeval, countval, elapsed_ticks, oflags;
2135 	int old_hrtime_stamp, status;
2136 	hrtime_t temp;
2137 	uchar_t	cpun;
2138 
2139 
2140 	/*
2141 	 * In one-shot mode, we do not keep time, so if anyone
2142 	 * calls psm_gethrtime() directly, we vector over to
2143 	 * gethrtime().
2144 	 * one-shot mode MUST NOT be enabled if this psm is the source of
2145 	 * hrtime.
2146 	 */
2147 
2148 	if (apic_oneshot)
2149 		return (gethrtime());
2150 
2151 	oflags = intr_clear();	/* prevent migration */
2152 
2153 	cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET);
2154 
2155 	lock_set(&apic_gethrtime_lock);
2156 
2157 gethrtime_again:
2158 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
2159 		apic_ret();
2160 
2161 	/*
2162 	 * Check to see which CPU we are on.  Note the time is kept on
2163 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
2164 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
2165 	 */
2166 	if (cpun == apic_cpus[0].aci_local_id) {
2167 		countval = apicadr[APIC_CURR_COUNT];
2168 	} else {
2169 		while (get_apic_cmd1() & AV_PENDING)
2170 			apic_ret();
2171 
2172 		apicadr[APIC_INT_CMD2] =
2173 		    apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET;
2174 		apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE;
2175 
2176 		while ((status = get_apic_cmd1()) & AV_READ_PENDING)
2177 			apic_ret();
2178 
2179 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
2180 			countval = apicadr[APIC_REMOTE_READ];
2181 		else {	/* 0 = invalid */
2182 			apic_remote_hrterr++;
2183 			/*
2184 			 * return last hrtime right now, will need more
2185 			 * testing if change to retry
2186 			 */
2187 			temp = apic_last_hrtime;
2188 
2189 			lock_clear(&apic_gethrtime_lock);
2190 
2191 			intr_restore(oflags);
2192 
2193 			return (temp);
2194 		}
2195 	}
2196 	if (countval > last_count_read)
2197 		countval = 0;
2198 	else
2199 		last_count_read = countval;
2200 
2201 	elapsed_ticks = apic_hertz_count - countval;
2202 
2203 	curr_timeval = elapsed_ticks * apic_nsec_per_tick;
2204 	temp = apic_nsec_since_boot + curr_timeval;
2205 
2206 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
2207 		/* we might have clobbered last_count_read. Restore it */
2208 		last_count_read = apic_hertz_count;
2209 		goto gethrtime_again;
2210 	}
2211 
2212 	if (temp < apic_last_hrtime) {
2213 		/* return last hrtime if error occurs */
2214 		apic_hrtime_error++;
2215 		temp = apic_last_hrtime;
2216 	}
2217 	else
2218 		apic_last_hrtime = temp;
2219 
2220 	lock_clear(&apic_gethrtime_lock);
2221 	intr_restore(oflags);
2222 
2223 	return (temp);
2224 }
2225 
2226 /* apic NMI handler */
2227 /*ARGSUSED*/
2228 static void
2229 apic_nmi_intr(caddr_t arg)
2230 {
2231 	if (apic_shutdown_processors) {
2232 		apic_disable_local_apic();
2233 		return;
2234 	}
2235 
2236 	if (lock_try(&apic_nmi_lock)) {
2237 		if (apic_kmdb_on_nmi) {
2238 			if (psm_debugger() == 0) {
2239 				cmn_err(CE_PANIC,
2240 				    "NMI detected, kmdb is not available.");
2241 			} else {
2242 				debug_enter("\nNMI detected, entering kmdb.\n");
2243 			}
2244 		} else {
2245 			if (apic_panic_on_nmi) {
2246 				/* Keep panic from entering kmdb. */
2247 				nopanicdebug = 1;
2248 				cmn_err(CE_PANIC, "pcplusmp: NMI received");
2249 			} else {
2250 				/*
2251 				 * prom_printf is the best shot we have
2252 				 * of something which is problem free from
2253 				 * high level/NMI type of interrupts
2254 				 */
2255 				prom_printf("pcplusmp: NMI received\n");
2256 				apic_error |= APIC_ERR_NMI;
2257 				apic_num_nmis++;
2258 			}
2259 		}
2260 		lock_clear(&apic_nmi_lock);
2261 	}
2262 }
2263 
2264 /*
2265  * Add mask bits to disable interrupt vector from happening
2266  * at or above IPL. In addition, it should remove mask bits
2267  * to enable interrupt vectors below the given IPL.
2268  *
2269  * Both add and delspl are complicated by the fact that different interrupts
2270  * may share IRQs. This can happen in two ways.
2271  * 1. The same H/W line is shared by more than 1 device
2272  * 1a. with interrupts at different IPLs
2273  * 1b. with interrupts at same IPL
2274  * 2. We ran out of vectors at a given IPL and started sharing vectors.
2275  * 1b and 2 should be handled gracefully, except for the fact some ISRs
2276  * will get called often when no interrupt is pending for the device.
2277  * For 1a, we just hope that the machine blows up with the person who
2278  * set it up that way!. In the meantime, we handle it at the higher IPL.
2279  */
2280 /*ARGSUSED*/
2281 static int
2282 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
2283 {
2284 	uchar_t vector;
2285 	int iflag;
2286 	apic_irq_t *irqptr, *irqheadptr;
2287 	int irqindex;
2288 
2289 	ASSERT(max_ipl <= UCHAR_MAX);
2290 	irqindex = IRQINDEX(irqno);
2291 
2292 	if ((irqindex == -1) || (!apic_irq_table[irqindex]))
2293 		return (PSM_FAILURE);
2294 
2295 	irqptr = irqheadptr = apic_irq_table[irqindex];
2296 
2297 	DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x "
2298 	    "vector=0x%x\n", (void *)irqptr->airq_dip,
2299 	    irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector));
2300 
2301 	while (irqptr) {
2302 		if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno)
2303 			break;
2304 		irqptr = irqptr->airq_next;
2305 	}
2306 	irqptr->airq_share++;
2307 
2308 	/* return if it is not hardware interrupt */
2309 	if (irqptr->airq_mps_intr_index == RESERVE_INDEX)
2310 		return (PSM_SUCCESS);
2311 
2312 	/* Or if there are more interupts at a higher IPL */
2313 	if (ipl != max_ipl)
2314 		return (PSM_SUCCESS);
2315 
2316 	/*
2317 	 * if apic_picinit() has not been called yet, just return.
2318 	 * At the end of apic_picinit(), we will call setup_io_intr().
2319 	 */
2320 
2321 	if (!apic_flag)
2322 		return (PSM_SUCCESS);
2323 
2324 	iflag = intr_clear();
2325 
2326 	/*
2327 	 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate,
2328 	 * return failure. Not very elegant, but then we hope the
2329 	 * machine will blow up with ...
2330 	 */
2331 	if (irqptr->airq_ipl != max_ipl) {
2332 		vector = apic_allocate_vector(max_ipl, irqindex, 1);
2333 		if (vector == 0) {
2334 			intr_restore(iflag);
2335 			irqptr->airq_share--;
2336 			return (PSM_FAILURE);
2337 		}
2338 		irqptr = irqheadptr;
2339 		apic_mark_vector(irqptr->airq_vector, vector);
2340 		while (irqptr) {
2341 			irqptr->airq_vector = vector;
2342 			irqptr->airq_ipl = (uchar_t)max_ipl;
2343 			/*
2344 			 * reprogram irq being added and every one else
2345 			 * who is not in the UNINIT state
2346 			 */
2347 			if ((VIRTIRQ(irqindex, irqptr->airq_share_id) ==
2348 			    irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) {
2349 				apic_record_rdt_entry(irqptr, irqindex);
2350 				(void) apic_setup_io_intr(irqptr, irqindex);
2351 			}
2352 			irqptr = irqptr->airq_next;
2353 		}
2354 		intr_restore(iflag);
2355 		return (PSM_SUCCESS);
2356 	}
2357 
2358 	ASSERT(irqptr);
2359 	(void) apic_setup_io_intr(irqptr, irqindex);
2360 	intr_restore(iflag);
2361 	return (PSM_SUCCESS);
2362 }
2363 
2364 /*
2365  * Recompute mask bits for the given interrupt vector.
2366  * If there is no interrupt servicing routine for this
2367  * vector, this function should disable interrupt vector
2368  * from happening at all IPLs. If there are still
2369  * handlers using the given vector, this function should
2370  * disable the given vector from happening below the lowest
2371  * IPL of the remaining hadlers.
2372  */
2373 /*ARGSUSED*/
2374 static int
2375 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
2376 {
2377 	uchar_t vector, bind_cpu;
2378 	int	iflag, intin, irqindex;
2379 	volatile int32_t *ioapic;
2380 	apic_irq_t	*irqptr, *irqheadptr;
2381 
2382 	irqindex = IRQINDEX(irqno);
2383 	irqptr = irqheadptr = apic_irq_table[irqindex];
2384 
2385 	DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x "
2386 	    "vector=0x%x\n", (void *)irqptr->airq_dip,
2387 	    irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector));
2388 
2389 	while (irqptr) {
2390 		if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno)
2391 			break;
2392 		irqptr = irqptr->airq_next;
2393 	}
2394 	ASSERT(irqptr);
2395 
2396 	irqptr->airq_share--;
2397 
2398 	if (ipl < max_ipl)
2399 		return (PSM_SUCCESS);
2400 
2401 	/* return if it is not hardware interrupt */
2402 	if (irqptr->airq_mps_intr_index == RESERVE_INDEX)
2403 		return (PSM_SUCCESS);
2404 
2405 	if (!apic_flag) {
2406 		/*
2407 		 * Clear irq_struct. If two devices shared an intpt
2408 		 * line & 1 unloaded before picinit, we are hosed. But, then
2409 		 * we hope the machine will ...
2410 		 */
2411 		irqptr->airq_mps_intr_index = FREE_INDEX;
2412 		irqptr->airq_temp_cpu = IRQ_UNINIT;
2413 		apic_free_vector(irqptr->airq_vector);
2414 		return (PSM_SUCCESS);
2415 	}
2416 	/*
2417 	 * Downgrade vector to new max_ipl if needed.If we cannot allocate,
2418 	 * use old IPL. Not very elegant, but then we hope ...
2419 	 */
2420 	if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) {
2421 		apic_irq_t	*irqp;
2422 		if (vector = apic_allocate_vector(max_ipl, irqno, 1)) {
2423 			apic_mark_vector(irqheadptr->airq_vector, vector);
2424 			irqp = irqheadptr;
2425 			while (irqp) {
2426 				irqp->airq_vector = vector;
2427 				irqp->airq_ipl = (uchar_t)max_ipl;
2428 				if (irqp->airq_temp_cpu != IRQ_UNINIT) {
2429 					apic_record_rdt_entry(irqp, irqindex);
2430 					(void) apic_setup_io_intr(irqp,
2431 					    irqindex);
2432 				}
2433 				irqp = irqp->airq_next;
2434 			}
2435 		}
2436 	}
2437 
2438 	if (irqptr->airq_share)
2439 		return (PSM_SUCCESS);
2440 
2441 	ioapic = apicioadr[irqptr->airq_ioapicindex];
2442 	intin = irqptr->airq_intin_no;
2443 	iflag = intr_clear();
2444 	lock_set(&apic_ioapic_lock);
2445 	ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin;
2446 	ioapic[APIC_IO_DATA] = AV_MASK;
2447 
2448 	/* Disable the MSI/X vector */
2449 	if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) {
2450 		int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ?
2451 		    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX;
2452 
2453 		/*
2454 		 * Make sure we only disable on the last
2455 		 * of the multi-MSI support
2456 		 */
2457 		if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) {
2458 			(void) pci_msi_unconfigure(irqptr->airq_dip, type,
2459 			    irqptr->airq_ioapicindex);
2460 
2461 			(void) pci_msi_disable_mode(irqptr->airq_dip, type,
2462 			    irqptr->airq_ioapicindex);
2463 		}
2464 	}
2465 
2466 	if (max_ipl == PSM_INVALID_IPL) {
2467 		ASSERT(irqheadptr == irqptr);
2468 		bind_cpu = irqptr->airq_temp_cpu;
2469 		if (((uchar_t)bind_cpu != IRQ_UNBOUND) &&
2470 		    ((uchar_t)bind_cpu != IRQ_UNINIT)) {
2471 			ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
2472 			if (bind_cpu & IRQ_USER_BOUND) {
2473 				/* If hardbound, temp_cpu == cpu */
2474 				bind_cpu &= ~IRQ_USER_BOUND;
2475 				apic_cpus[bind_cpu].aci_bound--;
2476 			} else
2477 				apic_cpus[bind_cpu].aci_temp_bound--;
2478 		}
2479 		lock_clear(&apic_ioapic_lock);
2480 		intr_restore(iflag);
2481 		irqptr->airq_temp_cpu = IRQ_UNINIT;
2482 		irqptr->airq_mps_intr_index = FREE_INDEX;
2483 		apic_free_vector(irqptr->airq_vector);
2484 		return (PSM_SUCCESS);
2485 	}
2486 	lock_clear(&apic_ioapic_lock);
2487 	intr_restore(iflag);
2488 
2489 	mutex_enter(&airq_mutex);
2490 	if ((irqptr == apic_irq_table[irqindex])) {
2491 		apic_irq_t	*oldirqptr;
2492 		/* Move valid irq entry to the head */
2493 		irqheadptr = oldirqptr = irqptr;
2494 		irqptr = irqptr->airq_next;
2495 		ASSERT(irqptr);
2496 		while (irqptr) {
2497 			if (irqptr->airq_mps_intr_index != FREE_INDEX)
2498 				break;
2499 			oldirqptr = irqptr;
2500 			irqptr = irqptr->airq_next;
2501 		}
2502 		/* remove all invalid ones from the beginning */
2503 		apic_irq_table[irqindex] = irqptr;
2504 		/*
2505 		 * and link them back after the head. The invalid ones
2506 		 * begin with irqheadptr and end at oldirqptr
2507 		 */
2508 		oldirqptr->airq_next = irqptr->airq_next;
2509 		irqptr->airq_next = irqheadptr;
2510 	}
2511 	mutex_exit(&airq_mutex);
2512 
2513 	irqptr->airq_temp_cpu = IRQ_UNINIT;
2514 	irqptr->airq_mps_intr_index = FREE_INDEX;
2515 	return (PSM_SUCCESS);
2516 }
2517 
2518 /*
2519  * Return HW interrupt number corresponding to the given IPL
2520  */
2521 /*ARGSUSED*/
2522 static int
2523 apic_softlvl_to_irq(int ipl)
2524 {
2525 	/*
2526 	 * Do not use apic to trigger soft interrupt.
2527 	 * It will cause the system to hang when 2 hardware interrupts
2528 	 * at the same priority with the softint are already accepted
2529 	 * by the apic.  Cause the AV_PENDING bit will not be cleared
2530 	 * until one of the hardware interrupt is eoi'ed.  If we need
2531 	 * to send an ipi at this time, we will end up looping forever
2532 	 * to wait for the AV_PENDING bit to clear.
2533 	 */
2534 	return (PSM_SV_SOFTWARE);
2535 }
2536 
2537 static int
2538 apic_post_cpu_start()
2539 {
2540 	int i, cpun;
2541 	apic_irq_t *irq_ptr;
2542 
2543 	apic_init_intr();
2544 
2545 	/*
2546 	 * since some systems don't enable the internal cache on the non-boot
2547 	 * cpus, so we have to enable them here
2548 	 */
2549 	setcr0(getcr0() & ~(0x60000000));
2550 
2551 	while (get_apic_cmd1() & AV_PENDING)
2552 		apic_ret();
2553 
2554 	cpun = psm_get_cpu_id();
2555 	apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
2556 
2557 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
2558 		irq_ptr = apic_irq_table[i];
2559 		if ((irq_ptr == NULL) ||
2560 		    ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun))
2561 			continue;
2562 
2563 		while (irq_ptr) {
2564 			if (irq_ptr->airq_temp_cpu != IRQ_UNINIT)
2565 				(void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE);
2566 			irq_ptr = irq_ptr->airq_next;
2567 		}
2568 	}
2569 
2570 	return (PSM_SUCCESS);
2571 }
2572 
2573 processorid_t
2574 apic_get_next_processorid(processorid_t cpu_id)
2575 {
2576 
2577 	int i;
2578 
2579 	if (cpu_id == -1)
2580 		return ((processorid_t)0);
2581 
2582 	for (i = cpu_id + 1; i < NCPU; i++) {
2583 		if (apic_cpumask & (1 << i))
2584 			return (i);
2585 	}
2586 
2587 	return ((processorid_t)-1);
2588 }
2589 
2590 
2591 /*
2592  * type == -1 indicates it is an internal request. Do not change
2593  * resv_vector for these requests
2594  */
2595 static int
2596 apic_get_ipivect(int ipl, int type)
2597 {
2598 	uchar_t vector;
2599 	int irq;
2600 
2601 	if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) {
2602 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
2603 			apic_irq_table[irq]->airq_mps_intr_index =
2604 			    RESERVE_INDEX;
2605 			apic_irq_table[irq]->airq_vector = vector;
2606 			if (type != -1) {
2607 				apic_resv_vector[ipl] = vector;
2608 			}
2609 			return (irq);
2610 		}
2611 	}
2612 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
2613 	return (-1);	/* shouldn't happen */
2614 }
2615 
2616 static int
2617 apic_getclkirq(int ipl)
2618 {
2619 	int	irq;
2620 
2621 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
2622 		return (-1);
2623 	/*
2624 	 * Note the vector in apic_clkvect for per clock handling.
2625 	 */
2626 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
2627 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
2628 	    apic_clkvect));
2629 	return (irq);
2630 }
2631 
2632 /*
2633  * Return the number of APIC clock ticks elapsed for 8245 to decrement
2634  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
2635  */
2636 static uint_t
2637 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
2638 {
2639 	uint8_t		pit_tick_lo;
2640 	uint16_t	pit_tick, target_pit_tick;
2641 	uint32_t	start_apic_tick, end_apic_tick;
2642 	int		iflag;
2643 
2644 	addr += APIC_CURR_COUNT;
2645 
2646 	iflag = intr_clear();
2647 
2648 	do {
2649 		pit_tick_lo = inb(PITCTR0_PORT);
2650 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2651 	} while (pit_tick < APIC_TIME_MIN ||
2652 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
2653 
2654 	/*
2655 	 * Wait for the 8254 to decrement by 5 ticks to ensure
2656 	 * we didn't start in the middle of a tick.
2657 	 * Compare with 0x10 for the wrap around case.
2658 	 */
2659 	target_pit_tick = pit_tick - 5;
2660 	do {
2661 		pit_tick_lo = inb(PITCTR0_PORT);
2662 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2663 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
2664 
2665 	start_apic_tick = *addr;
2666 
2667 	/*
2668 	 * Wait for the 8254 to decrement by
2669 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
2670 	 */
2671 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
2672 	do {
2673 		pit_tick_lo = inb(PITCTR0_PORT);
2674 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2675 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
2676 
2677 	end_apic_tick = *addr;
2678 
2679 	*pit_ticks_adj = target_pit_tick - pit_tick;
2680 
2681 	intr_restore(iflag);
2682 
2683 	return (start_apic_tick - end_apic_tick);
2684 }
2685 
2686 /*
2687  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
2688  * frequency.  Note at this stage in the boot sequence, the boot processor
2689  * is the only active processor.
2690  * hertz value of 0 indicates a one-shot mode request.  In this case
2691  * the function returns the resolution (in nanoseconds) for the hardware
2692  * timer interrupt.  If one-shot mode capability is not available,
2693  * the return value will be 0. apic_enable_oneshot is a global switch
2694  * for disabling the functionality.
2695  * A non-zero positive value for hertz indicates a periodic mode request.
2696  * In this case the hardware will be programmed to generate clock interrupts
2697  * at hertz frequency and returns the resolution of interrupts in
2698  * nanosecond.
2699  */
2700 
2701 static int
2702 apic_clkinit(int hertz)
2703 {
2704 
2705 	uint_t		apic_ticks = 0;
2706 	uint_t		pit_time;
2707 	int		ret;
2708 	uint16_t	pit_ticks_adj;
2709 	static int	firsttime = 1;
2710 
2711 	if (firsttime) {
2712 		/* first time calibrate */
2713 
2714 		apicadr[APIC_DIVIDE_REG] = 0x0;
2715 		apicadr[APIC_INIT_COUNT] = APIC_MAXVAL;
2716 
2717 		/* set periodic interrupt based on CLKIN */
2718 		apicadr[APIC_LOCAL_TIMER] =
2719 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
2720 		tenmicrosec();
2721 
2722 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
2723 
2724 		apicadr[APIC_LOCAL_TIMER] =
2725 		    (apic_clkvect + APIC_BASE_VECT) | AV_MASK;
2726 		/*
2727 		 * pit time is the amount of real time (in nanoseconds ) it took
2728 		 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks
2729 		 */
2730 		pit_time = ((longlong_t)(APIC_TIME_COUNT +
2731 		    pit_ticks_adj) * NANOSEC) / PIT_HZ;
2732 
2733 		/*
2734 		 * Determine the number of nanoseconds per APIC clock tick
2735 		 * and then determine how many APIC ticks to interrupt at the
2736 		 * desired frequency
2737 		 */
2738 		apic_nsec_per_tick = pit_time / apic_ticks;
2739 		if (apic_nsec_per_tick == 0)
2740 			apic_nsec_per_tick = 1;
2741 
2742 		/* the interval timer initial count is 32 bit max */
2743 		apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL;
2744 		firsttime = 0;
2745 	}
2746 
2747 	if (hertz != 0) {
2748 		/* periodic */
2749 		apic_nsec_per_intr = NANOSEC / hertz;
2750 		apic_hertz_count = (longlong_t)apic_nsec_per_intr /
2751 		    apic_nsec_per_tick;
2752 		apic_sample_factor_redistribution = hertz + 1;
2753 	}
2754 
2755 	apic_int_busy_mark = (apic_int_busy_mark *
2756 	    apic_sample_factor_redistribution) / 100;
2757 	apic_int_free_mark = (apic_int_free_mark *
2758 	    apic_sample_factor_redistribution) / 100;
2759 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
2760 	    apic_sample_factor_redistribution) / 100;
2761 
2762 	if (hertz == 0) {
2763 		/* requested one_shot */
2764 		if (!apic_oneshot_enable)
2765 			return (0);
2766 		apic_oneshot = 1;
2767 		ret = (int)apic_nsec_per_tick;
2768 	} else {
2769 		/* program the local APIC to interrupt at the given frequency */
2770 		apicadr[APIC_INIT_COUNT] = apic_hertz_count;
2771 		apicadr[APIC_LOCAL_TIMER] =
2772 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
2773 		apic_oneshot = 0;
2774 		ret = NANOSEC / hertz;
2775 	}
2776 
2777 	return (ret);
2778 
2779 }
2780 
2781 /*
2782  * apic_preshutdown:
2783  * Called early in shutdown whilst we can still access filesystems to do
2784  * things like loading modules which will be required to complete shutdown
2785  * after filesystems are all unmounted.
2786  */
2787 static void
2788 apic_preshutdown(int cmd, int fcn)
2789 {
2790 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
2791 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
2792 
2793 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
2794 		return;
2795 	}
2796 }
2797 
2798 static void
2799 apic_shutdown(int cmd, int fcn)
2800 {
2801 	int iflag, restarts, attempts;
2802 	int i, j;
2803 	volatile int32_t *ioapic;
2804 	uchar_t	byte;
2805 
2806 	/* Send NMI to all CPUs except self to do per processor shutdown */
2807 	iflag = intr_clear();
2808 	while (get_apic_cmd1() & AV_PENDING)
2809 		apic_ret();
2810 	apic_shutdown_processors = 1;
2811 	apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF;
2812 
2813 	/* restore cmos shutdown byte before reboot */
2814 	if (apic_cmos_ssb_set) {
2815 		outb(CMOS_ADDR, SSB);
2816 		outb(CMOS_DATA, 0);
2817 	}
2818 	/* Disable the I/O APIC redirection entries */
2819 	for (j = 0; j < apic_io_max; j++) {
2820 		int intin_max;
2821 		ioapic = apicioadr[j];
2822 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
2823 		/* Bits 23-16 define the maximum redirection entries */
2824 		intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff;
2825 		for (i = 0; i < intin_max; i++) {
2826 			ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i;
2827 			ioapic[APIC_IO_DATA] = AV_MASK;
2828 		}
2829 	}
2830 
2831 	/*	disable apic mode if imcr present	*/
2832 	if (apic_imcrp) {
2833 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
2834 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
2835 	}
2836 
2837 	apic_disable_local_apic();
2838 
2839 	intr_restore(iflag);
2840 
2841 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
2842 		return;
2843 	}
2844 
2845 	switch (apic_poweroff_method) {
2846 		case APIC_POWEROFF_VIA_RTC:
2847 
2848 			/* select the extended NVRAM bank in the RTC */
2849 			outb(CMOS_ADDR, RTC_REGA);
2850 			byte = inb(CMOS_DATA);
2851 			outb(CMOS_DATA, (byte | EXT_BANK));
2852 
2853 			outb(CMOS_ADDR, PFR_REG);
2854 
2855 			/* for Predator must toggle the PAB bit */
2856 			byte = inb(CMOS_DATA);
2857 
2858 			/*
2859 			 * clear power active bar, wakeup alarm and
2860 			 * kickstart
2861 			 */
2862 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
2863 			outb(CMOS_DATA, byte);
2864 
2865 			/* delay before next write */
2866 			drv_usecwait(1000);
2867 
2868 			/* for S40 the following would suffice */
2869 			byte = inb(CMOS_DATA);
2870 
2871 			/* power active bar control bit */
2872 			byte |= PAB_CBIT;
2873 			outb(CMOS_DATA, byte);
2874 
2875 			break;
2876 
2877 		case APIC_POWEROFF_VIA_ASPEN_BMC:
2878 			restarts = 0;
2879 restart_aspen_bmc:
2880 			if (++restarts == 3)
2881 				break;
2882 			attempts = 0;
2883 			do {
2884 				byte = inb(MISMIC_FLAG_REGISTER);
2885 				byte &= MISMIC_BUSY_MASK;
2886 				if (byte != 0) {
2887 					drv_usecwait(1000);
2888 					if (attempts >= 3)
2889 						goto restart_aspen_bmc;
2890 					++attempts;
2891 				}
2892 			} while (byte != 0);
2893 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
2894 			byte = inb(MISMIC_FLAG_REGISTER);
2895 			byte |= 0x1;
2896 			outb(MISMIC_FLAG_REGISTER, byte);
2897 			i = 0;
2898 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
2899 			    i++) {
2900 				attempts = 0;
2901 				do {
2902 					byte = inb(MISMIC_FLAG_REGISTER);
2903 					byte &= MISMIC_BUSY_MASK;
2904 					if (byte != 0) {
2905 						drv_usecwait(1000);
2906 						if (attempts >= 3)
2907 							goto restart_aspen_bmc;
2908 						++attempts;
2909 					}
2910 				} while (byte != 0);
2911 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
2912 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
2913 				byte = inb(MISMIC_FLAG_REGISTER);
2914 				byte |= 0x1;
2915 				outb(MISMIC_FLAG_REGISTER, byte);
2916 			}
2917 			break;
2918 
2919 		case APIC_POWEROFF_VIA_SITKA_BMC:
2920 			restarts = 0;
2921 restart_sitka_bmc:
2922 			if (++restarts == 3)
2923 				break;
2924 			attempts = 0;
2925 			do {
2926 				byte = inb(SMS_STATUS_REGISTER);
2927 				byte &= SMS_STATE_MASK;
2928 				if ((byte == SMS_READ_STATE) ||
2929 				    (byte == SMS_WRITE_STATE)) {
2930 					drv_usecwait(1000);
2931 					if (attempts >= 3)
2932 						goto restart_sitka_bmc;
2933 					++attempts;
2934 				}
2935 			} while ((byte == SMS_READ_STATE) ||
2936 			    (byte == SMS_WRITE_STATE));
2937 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
2938 			i = 0;
2939 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
2940 			    i++) {
2941 				attempts = 0;
2942 				do {
2943 					byte = inb(SMS_STATUS_REGISTER);
2944 					byte &= SMS_IBF_MASK;
2945 					if (byte != 0) {
2946 						drv_usecwait(1000);
2947 						if (attempts >= 3)
2948 							goto restart_sitka_bmc;
2949 						++attempts;
2950 					}
2951 				} while (byte != 0);
2952 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
2953 			}
2954 			break;
2955 
2956 		case APIC_POWEROFF_NONE:
2957 
2958 			/* If no APIC direct method, we will try using ACPI */
2959 			if (apic_enable_acpi) {
2960 				if (acpi_poweroff() == 1)
2961 					return;
2962 			} else
2963 				return;
2964 
2965 			break;
2966 	}
2967 	/*
2968 	 * Wait a limited time here for power to go off.
2969 	 * If the power does not go off, then there was a
2970 	 * problem and we should continue to the halt which
2971 	 * prints a message for the user to press a key to
2972 	 * reboot.
2973 	 */
2974 	drv_usecwait(7000000); /* wait seven seconds */
2975 
2976 }
2977 
2978 /*
2979  * Try and disable all interrupts. We just assign interrupts to other
2980  * processors based on policy. If any were bound by user request, we
2981  * let them continue and return failure. We do not bother to check
2982  * for cache affinity while rebinding.
2983  */
2984 
2985 static int
2986 apic_disable_intr(processorid_t cpun)
2987 {
2988 	int bind_cpu = 0, i, hardbound = 0, iflag;
2989 	apic_irq_t *irq_ptr;
2990 
2991 	if (cpun == 0)
2992 		return (PSM_FAILURE);
2993 
2994 	iflag = intr_clear();
2995 	lock_set(&apic_ioapic_lock);
2996 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
2997 	lock_clear(&apic_ioapic_lock);
2998 	intr_restore(iflag);
2999 	apic_cpus[cpun].aci_curipl = 0;
3000 	i = apic_min_device_irq;
3001 	for (; i <= apic_max_device_irq; i++) {
3002 		/*
3003 		 * If there are bound interrupts on this cpu, then
3004 		 * rebind them to other processors.
3005 		 */
3006 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
3007 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
3008 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
3009 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
3010 			    apic_nproc));
3011 
3012 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
3013 				hardbound = 1;
3014 				continue;
3015 			}
3016 
3017 			if (irq_ptr->airq_temp_cpu == cpun) {
3018 				do {
3019 					apic_next_bind_cpu += 2;
3020 					bind_cpu = apic_next_bind_cpu / 2;
3021 					if (bind_cpu >= apic_nproc) {
3022 						apic_next_bind_cpu = 1;
3023 						bind_cpu = 0;
3024 
3025 					}
3026 				} while (apic_rebind_all(irq_ptr, bind_cpu, 1));
3027 			}
3028 		}
3029 	}
3030 	if (hardbound) {
3031 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
3032 		    "due to user bound interrupts", cpun);
3033 		return (PSM_FAILURE);
3034 	}
3035 	else
3036 		return (PSM_SUCCESS);
3037 }
3038 
3039 static void
3040 apic_enable_intr(processorid_t cpun)
3041 {
3042 	int	i, iflag;
3043 	apic_irq_t *irq_ptr;
3044 
3045 	iflag = intr_clear();
3046 	lock_set(&apic_ioapic_lock);
3047 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
3048 	lock_clear(&apic_ioapic_lock);
3049 	intr_restore(iflag);
3050 
3051 	i = apic_min_device_irq;
3052 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
3053 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
3054 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
3055 				(void) apic_rebind_all(irq_ptr,
3056 				    irq_ptr->airq_cpu, 1);
3057 			}
3058 		}
3059 	}
3060 }
3061 
3062 /*
3063  * apic_introp_xlate() replaces apic_translate_irq() and is
3064  * called only from apic_intr_ops().  With the new ADII framework,
3065  * the priority can no longer be retrived through i_ddi_get_intrspec().
3066  * It has to be passed in from the caller.
3067  */
3068 int
3069 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type)
3070 {
3071 	char dev_type[16];
3072 	int dev_len, pci_irq, newirq, bustype, devid, busid, i;
3073 	int irqno = ispec->intrspec_vec;
3074 	ddi_acc_handle_t cfg_handle;
3075 	uchar_t ipin;
3076 	struct apic_io_intr *intrp;
3077 	iflag_t intr_flag;
3078 	APIC_HEADER	*hp;
3079 	MADT_INTERRUPT_OVERRIDE	*isop;
3080 	apic_irq_t *airqp;
3081 
3082 	DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s "
3083 	    "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type,
3084 	    irqno));
3085 
3086 	if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
3087 		if ((airqp = apic_find_irq(dip, ispec, type)) != NULL)
3088 			return (apic_vector_to_irq[airqp->airq_vector]);
3089 		return (apic_setup_irq_table(dip, irqno, NULL, ispec,
3090 		    NULL, type));
3091 	}
3092 
3093 	bustype = 0;
3094 
3095 	/* check if we have already translated this irq */
3096 	mutex_enter(&airq_mutex);
3097 	newirq = apic_min_device_irq;
3098 	for (; newirq <= apic_max_device_irq; newirq++) {
3099 		airqp = apic_irq_table[newirq];
3100 		while (airqp) {
3101 			if ((airqp->airq_dip == dip) &&
3102 			    (airqp->airq_origirq == irqno) &&
3103 			    (airqp->airq_mps_intr_index != FREE_INDEX)) {
3104 
3105 				mutex_exit(&airq_mutex);
3106 				return (VIRTIRQ(newirq, airqp->airq_share_id));
3107 			}
3108 			airqp = airqp->airq_next;
3109 		}
3110 	}
3111 	mutex_exit(&airq_mutex);
3112 
3113 	if (apic_defconf)
3114 		goto defconf;
3115 
3116 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi))
3117 		goto nonpci;
3118 
3119 	dev_len = sizeof (dev_type);
3120 	if (ddi_getlongprop_buf(DDI_DEV_T_NONE, ddi_get_parent(dip),
3121 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
3122 	    &dev_len) != DDI_PROP_SUCCESS) {
3123 		goto nonpci;
3124 	}
3125 
3126 	if (strcmp(dev_type, "pci") == 0) {
3127 		/* pci device */
3128 		if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
3129 			goto nonpci;
3130 		if (busid == 0 && apic_pci_bus_total == 1)
3131 			busid = (int)apic_single_pci_busid;
3132 
3133 		if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
3134 			goto nonpci;
3135 		ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
3136 		pci_config_teardown(&cfg_handle);
3137 		if (apic_enable_acpi && !apic_use_acpi_madt_only) {
3138 			if (apic_acpi_translate_pci_irq(dip, busid, devid,
3139 			    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
3140 				goto nonpci;
3141 
3142 			intr_flag.bustype = BUS_PCI;
3143 			if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL,
3144 			    ispec, &intr_flag, type)) == -1)
3145 				goto nonpci;
3146 			return (newirq);
3147 		} else {
3148 			pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
3149 			if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid))
3150 			    == NULL) {
3151 				if ((pci_irq = apic_handle_pci_pci_bridge(dip,
3152 				    devid, ipin, &intrp)) == -1)
3153 					goto nonpci;
3154 			}
3155 			if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp,
3156 			    ispec, NULL, type)) == -1)
3157 				goto nonpci;
3158 			return (newirq);
3159 		}
3160 	} else if (strcmp(dev_type, "isa") == 0)
3161 		bustype = BUS_ISA;
3162 	else if (strcmp(dev_type, "eisa") == 0)
3163 		bustype = BUS_EISA;
3164 
3165 nonpci:
3166 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {
3167 		/* search iso entries first */
3168 		if (acpi_iso_cnt != 0) {
3169 			hp = (APIC_HEADER *)acpi_isop;
3170 			i = 0;
3171 			while (i < acpi_iso_cnt) {
3172 				if (hp->Type == APIC_XRUPT_OVERRIDE) {
3173 					isop = (MADT_INTERRUPT_OVERRIDE *)hp;
3174 					if (isop->Bus == 0 &&
3175 					    isop->Source == irqno) {
3176 						newirq = isop->Interrupt;
3177 						intr_flag.intr_po =
3178 						    isop->Polarity;
3179 						intr_flag.intr_el =
3180 						    isop->TriggerMode;
3181 						intr_flag.bustype = BUS_ISA;
3182 
3183 						return (apic_setup_irq_table(
3184 						    dip, newirq, NULL, ispec,
3185 						    &intr_flag, type));
3186 
3187 					}
3188 					i++;
3189 				}
3190 				hp = (APIC_HEADER *)(((char *)hp) +
3191 				    hp->Length);
3192 			}
3193 		}
3194 		intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
3195 		intr_flag.intr_el = INTR_EL_EDGE;
3196 		intr_flag.bustype = BUS_ISA;
3197 		return (apic_setup_irq_table(dip, irqno, NULL, ispec,
3198 		    &intr_flag, type));
3199 	} else {
3200 		if (bustype == 0)
3201 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
3202 		for (i = 0; i < 2; i++) {
3203 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
3204 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
3205 			    != NULL)) {
3206 				if ((newirq = apic_setup_irq_table(dip, irqno,
3207 				    intrp, ispec, NULL, type)) != -1) {
3208 					return (newirq);
3209 				}
3210 				goto defconf;
3211 			}
3212 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
3213 		}
3214 	}
3215 
3216 /* MPS default configuration */
3217 defconf:
3218 	newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type);
3219 	if (newirq == -1)
3220 		return (newirq);
3221 	ASSERT(IRQINDEX(newirq) == irqno);
3222 	ASSERT(apic_irq_table[irqno]);
3223 	return (newirq);
3224 }
3225 
3226 
3227 
3228 
3229 
3230 
3231 /*
3232  * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge
3233  * needs special handling.  We may need to chase up the device tree,
3234  * using the PCI-PCI Bridge specification's "rotating IPIN assumptions",
3235  * to find the IPIN at the root bus that relates to the IPIN on the
3236  * subsidiary bus (for ACPI or MP).  We may, however, have an entry
3237  * in the MP table or the ACPI namespace for this device itself.
3238  * We handle both cases in the search below.
3239  */
3240 /* this is the non-acpi version */
3241 static int
3242 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin,
3243 			struct apic_io_intr **intrp)
3244 {
3245 	dev_info_t *dipp, *dip;
3246 	int pci_irq;
3247 	ddi_acc_handle_t cfg_handle;
3248 	int bridge_devno, bridge_bus;
3249 	int ipin;
3250 
3251 	dip = idip;
3252 
3253 	/*CONSTCOND*/
3254 	while (1) {
3255 		if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL)
3256 			return (-1);
3257 		if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) &&
3258 		    (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) ==
3259 		    PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle,
3260 		    PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) {
3261 			pci_config_teardown(&cfg_handle);
3262 			if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno,
3263 			    NULL) != 0)
3264 				return (-1);
3265 			/*
3266 			 * This is the rotating scheme that Compaq is using
3267 			 * and documented in the pci to pci spec.  Also, if
3268 			 * the pci to pci bridge is behind another pci to
3269 			 * pci bridge, then it need to keep transversing
3270 			 * up until an interrupt entry is found or reach
3271 			 * the top of the tree
3272 			 */
3273 			ipin = (child_devno + child_ipin) % PCI_INTD;
3274 				if (bridge_bus == 0 && apic_pci_bus_total == 1)
3275 					bridge_bus = (int)apic_single_pci_busid;
3276 				pci_irq = ((bridge_devno & 0x1f) << 2) |
3277 				    (ipin & 0x3);
3278 				if ((*intrp = apic_find_io_intr_w_busid(pci_irq,
3279 				    bridge_bus)) != NULL) {
3280 					return (pci_irq);
3281 				}
3282 			dip = dipp;
3283 			child_devno = bridge_devno;
3284 			child_ipin = ipin;
3285 		} else
3286 			return (-1);
3287 	}
3288 	/*LINTED: function will not fall off the bottom */
3289 }
3290 
3291 
3292 
3293 
3294 static uchar_t
3295 acpi_find_ioapic(int irq)
3296 {
3297 	int i;
3298 
3299 	for (i = 0; i < apic_io_max; i++) {
3300 		if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i])
3301 			return (i);
3302 	}
3303 	return (0xFF);	/* shouldn't happen */
3304 }
3305 
3306 /*
3307  * See if two irqs are compatible for sharing a vector.
3308  * Currently we only support sharing of PCI devices.
3309  */
3310 static int
3311 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2)
3312 {
3313 	uint_t	level1, po1;
3314 	uint_t	level2, po2;
3315 
3316 	/* Assume active high by default */
3317 	po1 = 0;
3318 	po2 = 0;
3319 
3320 	if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI)
3321 		return (0);
3322 
3323 	if (iflag1.intr_el == INTR_EL_CONFORM)
3324 		level1 = AV_LEVEL;
3325 	else
3326 		level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0;
3327 
3328 	if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) ||
3329 	    (iflag1.intr_po == INTR_PO_CONFORM)))
3330 		po1 = AV_ACTIVE_LOW;
3331 
3332 	if (iflag2.intr_el == INTR_EL_CONFORM)
3333 		level2 = AV_LEVEL;
3334 	else
3335 		level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0;
3336 
3337 	if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) ||
3338 	    (iflag2.intr_po == INTR_PO_CONFORM)))
3339 		po2 = AV_ACTIVE_LOW;
3340 
3341 	if ((level1 == level2) && (po1 == po2))
3342 		return (1);
3343 
3344 	return (0);
3345 }
3346 
3347 /*
3348  * Attempt to share vector with someone else
3349  */
3350 static int
3351 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl,
3352 	uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp)
3353 {
3354 #ifdef DEBUG
3355 	apic_irq_t *tmpirqp = NULL;
3356 #endif /* DEBUG */
3357 	apic_irq_t *irqptr, dummyirq;
3358 	int	newirq, chosen_irq = -1, share = 127;
3359 	int	lowest, highest, i;
3360 	uchar_t	share_id;
3361 
3362 	DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x "
3363 	    "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl));
3364 
3365 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
3366 	lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL;
3367 
3368 	if (highest < lowest) /* Both ipl and ipl-1 map to same pri */
3369 		lowest -= APIC_VECTOR_PER_IPL;
3370 	dummyirq.airq_mps_intr_index = intr_index;
3371 	dummyirq.airq_ioapicindex = ioapicindex;
3372 	dummyirq.airq_intin_no = ipin;
3373 	if (intr_flagp)
3374 		dummyirq.airq_iflag = *intr_flagp;
3375 	apic_record_rdt_entry(&dummyirq, irqno);
3376 	for (i = lowest; i <= highest; i++) {
3377 		newirq = apic_vector_to_irq[i];
3378 		if (newirq == APIC_RESV_IRQ)
3379 			continue;
3380 		irqptr = apic_irq_table[newirq];
3381 
3382 		/* don't share SCI */
3383 		if (irqptr->airq_mps_intr_index == SCI_INDEX)
3384 			continue;
3385 
3386 		if ((dummyirq.airq_rdt_entry & 0xFF00) !=
3387 		    (irqptr->airq_rdt_entry & 0xFF00))
3388 			/* not compatible */
3389 			continue;
3390 
3391 		if (irqptr->airq_share < share) {
3392 			share = irqptr->airq_share;
3393 			chosen_irq = newirq;
3394 		}
3395 	}
3396 	if (chosen_irq != -1) {
3397 		/*
3398 		 * Assign a share id which is free or which is larger
3399 		 * than the largest one.
3400 		 */
3401 		share_id = 1;
3402 		mutex_enter(&airq_mutex);
3403 		irqptr = apic_irq_table[chosen_irq];
3404 		while (irqptr) {
3405 			if (irqptr->airq_mps_intr_index == FREE_INDEX) {
3406 				share_id = irqptr->airq_share_id;
3407 				break;
3408 			}
3409 			if (share_id <= irqptr->airq_share_id)
3410 				share_id = irqptr->airq_share_id + 1;
3411 #ifdef DEBUG
3412 			tmpirqp = irqptr;
3413 #endif /* DEBUG */
3414 			irqptr = irqptr->airq_next;
3415 		}
3416 		if (!irqptr) {
3417 			irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3418 			irqptr->airq_temp_cpu = IRQ_UNINIT;
3419 			irqptr->airq_next =
3420 			    apic_irq_table[chosen_irq]->airq_next;
3421 			apic_irq_table[chosen_irq]->airq_next = irqptr;
3422 #ifdef	DEBUG
3423 			tmpirqp = apic_irq_table[chosen_irq];
3424 #endif /* DEBUG */
3425 		}
3426 		irqptr->airq_mps_intr_index = intr_index;
3427 		irqptr->airq_ioapicindex = ioapicindex;
3428 		irqptr->airq_intin_no = ipin;
3429 		if (intr_flagp)
3430 			irqptr->airq_iflag = *intr_flagp;
3431 		irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector;
3432 		irqptr->airq_share_id = share_id;
3433 		apic_record_rdt_entry(irqptr, irqno);
3434 		*irqptrp = irqptr;
3435 #ifdef	DEBUG
3436 		/* shuffle the pointers to test apic_delspl path */
3437 		if (tmpirqp) {
3438 			tmpirqp->airq_next = irqptr->airq_next;
3439 			irqptr->airq_next = apic_irq_table[chosen_irq];
3440 			apic_irq_table[chosen_irq] = irqptr;
3441 		}
3442 #endif /* DEBUG */
3443 		mutex_exit(&airq_mutex);
3444 		return (VIRTIRQ(chosen_irq, share_id));
3445 	}
3446 	return (-1);
3447 }
3448 
3449 /*
3450  *
3451  */
3452 static int
3453 apic_setup_sci_irq_table(int irqno, uchar_t ipl, iflag_t *intr_flagp)
3454 {
3455 	int	intr_index;
3456 	uchar_t	ipin, ioapicindex, vector;
3457 	apic_irq_t *irqptr;
3458 
3459 	ASSERT(intr_flagp != NULL);
3460 
3461 	intr_index = SCI_INDEX;
3462 	ioapicindex = acpi_find_ioapic(irqno);
3463 	ASSERT(ioapicindex != 0xFF);
3464 	ipin = irqno - apic_io_vectbase[ioapicindex];
3465 	if (apic_irq_table[irqno] &&
3466 	    apic_irq_table[irqno]->airq_mps_intr_index == SCI_INDEX) {
3467 		ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
3468 		    apic_irq_table[irqno]->airq_ioapicindex ==
3469 		    ioapicindex);
3470 		return (irqno);
3471 	}
3472 
3473 	if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) {
3474 		cmn_err(CE_WARN, "!apic: failed to allocate vector for SCI");
3475 		return (-1);
3476 	}
3477 	mutex_enter(&airq_mutex);
3478 	if (apic_irq_table[irqno] == NULL) {
3479 		irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3480 		irqptr->airq_temp_cpu = IRQ_UNINIT;
3481 		apic_irq_table[irqno] = irqptr;
3482 	} else {
3483 		/*
3484 		 *  We assume that SCI is the first to attach this IRQ
3485 		 */
3486 		cmn_err(CE_WARN, "!acpi: apic_irq_t not empty for SCI");
3487 		return (-1);
3488 	}
3489 
3490 	apic_max_device_irq = max(irqno, apic_max_device_irq);
3491 	apic_min_device_irq = min(irqno, apic_min_device_irq);
3492 	mutex_exit(&airq_mutex);
3493 	irqptr->airq_ioapicindex = ioapicindex;
3494 	irqptr->airq_intin_no = ipin;
3495 	irqptr->airq_ipl = ipl;
3496 	irqptr->airq_vector = vector;
3497 	irqptr->airq_origirq = (uchar_t)irqno;
3498 	irqptr->airq_share_id = 0;
3499 	irqptr->airq_mps_intr_index = (short)intr_index;
3500 	irqptr->airq_dip = NULL;
3501 	irqptr->airq_major = 0;
3502 	irqptr->airq_cpu = 0;	/* SCI always on CPU 0 */
3503 	irqptr->airq_iflag = *intr_flagp;
3504 	apic_record_rdt_entry(irqptr, irqno);
3505 	return (irqno);
3506 }
3507 
3508 /*
3509  *
3510  */
3511 static int
3512 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp,
3513     struct intrspec *ispec, iflag_t *intr_flagp, int type)
3514 {
3515 	int origirq = ispec->intrspec_vec;
3516 	uchar_t ipl = ispec->intrspec_pri;
3517 	int	newirq, intr_index;
3518 	uchar_t	ipin, ioapic, ioapicindex, vector;
3519 	apic_irq_t *irqptr;
3520 	major_t	major;
3521 	dev_info_t	*sdip;
3522 
3523 	DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d "
3524 	    "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq));
3525 
3526 	ASSERT(ispec != NULL);
3527 
3528 	major =  (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
3529 
3530 	if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
3531 		/* MSI/X doesn't need to setup ioapic stuffs */
3532 		ioapicindex = 0xff;
3533 		ioapic = 0xff;
3534 		ipin = (uchar_t)0xff;
3535 		intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX :
3536 		    MSIX_INDEX;
3537 		mutex_enter(&airq_mutex);
3538 		if ((irqno = apic_allocate_irq(APIC_FIRST_FREE_IRQ)) == -1) {
3539 			mutex_exit(&airq_mutex);
3540 			/* need an irq for MSI/X to index into autovect[] */
3541 			cmn_err(CE_WARN, "No interrupt irq: %s instance %d",
3542 			    ddi_get_name(dip), ddi_get_instance(dip));
3543 			return (-1);
3544 		}
3545 		mutex_exit(&airq_mutex);
3546 
3547 	} else if (intrp != NULL) {
3548 		intr_index = (int)(intrp - apic_io_intrp);
3549 		ioapic = intrp->intr_destid;
3550 		ipin = intrp->intr_destintin;
3551 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
3552 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
3553 			if (apic_io_id[ioapicindex] == ioapic)
3554 				break;
3555 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
3556 		    (ioapic == INTR_ALL_APIC));
3557 
3558 		/* check whether this intin# has been used by another irqno */
3559 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) {
3560 			return (newirq);
3561 		}
3562 
3563 	} else if (intr_flagp != NULL) {
3564 		/* ACPI case */
3565 		intr_index = ACPI_INDEX;
3566 		ioapicindex = acpi_find_ioapic(irqno);
3567 		ASSERT(ioapicindex != 0xFF);
3568 		ioapic = apic_io_id[ioapicindex];
3569 		ipin = irqno - apic_io_vectbase[ioapicindex];
3570 		if (apic_irq_table[irqno] &&
3571 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
3572 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
3573 			    apic_irq_table[irqno]->airq_ioapicindex ==
3574 			    ioapicindex);
3575 			return (irqno);
3576 		}
3577 
3578 	} else {
3579 		/* default configuration */
3580 		ioapicindex = 0;
3581 		ioapic = apic_io_id[ioapicindex];
3582 		ipin = (uchar_t)irqno;
3583 		intr_index = DEFAULT_INDEX;
3584 	}
3585 
3586 	if (ispec == NULL) {
3587 		APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n",
3588 		    irqno));
3589 	} else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) {
3590 		if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index,
3591 		    ipl, ioapicindex, ipin, &irqptr)) != -1) {
3592 			irqptr->airq_ipl = ipl;
3593 			irqptr->airq_origirq = (uchar_t)origirq;
3594 			irqptr->airq_dip = dip;
3595 			irqptr->airq_major = major;
3596 			sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip;
3597 			if (sdip == NULL) {
3598 				cmn_err(CE_WARN, "Sharing vectors: %s"
3599 				    " instance %d and SCI",
3600 				    ddi_get_name(dip), ddi_get_instance(dip));
3601 			} else {
3602 				cmn_err(CE_WARN, "Sharing vectors: %s"
3603 				    " instance %d and %s instance %d",
3604 				    ddi_get_name(sdip), ddi_get_instance(sdip),
3605 				    ddi_get_name(dip), ddi_get_instance(dip));
3606 			}
3607 			return (newirq);
3608 		}
3609 		/* try high priority allocation now  that share has failed */
3610 		if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) {
3611 			cmn_err(CE_WARN, "No interrupt vector: %s instance %d",
3612 			    ddi_get_name(dip), ddi_get_instance(dip));
3613 			return (-1);
3614 		}
3615 	}
3616 
3617 	mutex_enter(&airq_mutex);
3618 	if (apic_irq_table[irqno] == NULL) {
3619 		irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3620 		irqptr->airq_temp_cpu = IRQ_UNINIT;
3621 		apic_irq_table[irqno] = irqptr;
3622 	} else {
3623 		irqptr = apic_irq_table[irqno];
3624 		if (irqptr->airq_mps_intr_index != FREE_INDEX) {
3625 			/*
3626 			 * The slot is used by another irqno, so allocate
3627 			 * a free irqno for this interrupt
3628 			 */
3629 			newirq = apic_allocate_irq(APIC_FIRST_FREE_IRQ);
3630 			if (newirq == -1) {
3631 				mutex_exit(&airq_mutex);
3632 				return (-1);
3633 			}
3634 			irqno = newirq;
3635 			irqptr = apic_irq_table[irqno];
3636 			if (irqptr == NULL) {
3637 				irqptr = kmem_zalloc(sizeof (apic_irq_t),
3638 				    KM_SLEEP);
3639 				irqptr->airq_temp_cpu = IRQ_UNINIT;
3640 				apic_irq_table[irqno] = irqptr;
3641 			}
3642 			apic_modify_vector(vector, newirq);
3643 		}
3644 	}
3645 	apic_max_device_irq = max(irqno, apic_max_device_irq);
3646 	apic_min_device_irq = min(irqno, apic_min_device_irq);
3647 	mutex_exit(&airq_mutex);
3648 	irqptr->airq_ioapicindex = ioapicindex;
3649 	irqptr->airq_intin_no = ipin;
3650 	irqptr->airq_ipl = ipl;
3651 	irqptr->airq_vector = vector;
3652 	irqptr->airq_origirq = (uchar_t)origirq;
3653 	irqptr->airq_share_id = 0;
3654 	irqptr->airq_mps_intr_index = (short)intr_index;
3655 	irqptr->airq_dip = dip;
3656 	irqptr->airq_major = major;
3657 	irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin);
3658 	if (intr_flagp)
3659 		irqptr->airq_iflag = *intr_flagp;
3660 
3661 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
3662 		/* setup I/O APIC entry for non-MSI/X interrupts */
3663 		apic_record_rdt_entry(irqptr, irqno);
3664 	}
3665 	return (irqno);
3666 }
3667 
3668 /*
3669  * return the cpu to which this intr should be bound.
3670  * Check properties or any other mechanism to see if user wants it
3671  * bound to a specific CPU. If so, return the cpu id with high bit set.
3672  * If not, use the policy to choose a cpu and return the id.
3673  */
3674 uchar_t
3675 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
3676 {
3677 	int	instance, instno, prop_len, bind_cpu, count;
3678 	uint_t	i, rc;
3679 	uchar_t	cpu;
3680 	major_t	major;
3681 	char	*name, *drv_name, *prop_val, *cptr;
3682 	char	prop_name[32];
3683 
3684 
3685 	if (apic_intr_policy == INTR_LOWEST_PRIORITY)
3686 		return (IRQ_UNBOUND);
3687 
3688 	drv_name = NULL;
3689 	rc = DDI_PROP_NOT_FOUND;
3690 	major = (major_t)-1;
3691 	if (dip != NULL) {
3692 		name = ddi_get_name(dip);
3693 		major = ddi_name_to_major(name);
3694 		drv_name = ddi_major_to_name(major);
3695 		instance = ddi_get_instance(dip);
3696 		if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
3697 			i = apic_min_device_irq;
3698 			for (; i <= apic_max_device_irq; i++) {
3699 
3700 				if ((i == irq) || (apic_irq_table[i] == NULL) ||
3701 				    (apic_irq_table[i]->airq_mps_intr_index
3702 				    == FREE_INDEX))
3703 					continue;
3704 
3705 				if ((apic_irq_table[i]->airq_major == major) &&
3706 				    (!(apic_irq_table[i]->airq_cpu &
3707 				    IRQ_USER_BOUND))) {
3708 
3709 					cpu = apic_irq_table[i]->airq_cpu;
3710 
3711 					cmn_err(CE_CONT,
3712 					    "!pcplusmp: %s (%s) instance #%d "
3713 					    "vector 0x%x ioapic 0x%x "
3714 					    "intin 0x%x is bound to cpu %d\n",
3715 					    name, drv_name, instance, irq,
3716 					    ioapicid, intin, cpu);
3717 					return (cpu);
3718 				}
3719 			}
3720 		}
3721 		/*
3722 		 * search for "drvname"_intpt_bind_cpus property first, the
3723 		 * syntax of the property should be "a[,b,c,...]" where
3724 		 * instance 0 binds to cpu a, instance 1 binds to cpu b,
3725 		 * instance 3 binds to cpu c...
3726 		 * ddi_getlongprop() will search /option first, then /
3727 		 * if "drvname"_intpt_bind_cpus doesn't exist, then find
3728 		 * intpt_bind_cpus property.  The syntax is the same, and
3729 		 * it applies to all the devices if its "drvname" specific
3730 		 * property doesn't exist
3731 		 */
3732 		(void) strcpy(prop_name, drv_name);
3733 		(void) strcat(prop_name, "_intpt_bind_cpus");
3734 		rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
3735 		    (caddr_t)&prop_val, &prop_len);
3736 		if (rc != DDI_PROP_SUCCESS) {
3737 			rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
3738 			    "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
3739 		}
3740 	}
3741 	if (rc == DDI_PROP_SUCCESS) {
3742 		for (i = count = 0; i < (prop_len - 1); i++)
3743 			if (prop_val[i] == ',')
3744 				count++;
3745 		if (prop_val[i-1] != ',')
3746 			count++;
3747 		/*
3748 		 * if somehow the binding instances defined in the
3749 		 * property are not enough for this instno., then
3750 		 * reuse the pattern for the next instance until
3751 		 * it reaches the requested instno
3752 		 */
3753 		instno = instance % count;
3754 		i = 0;
3755 		cptr = prop_val;
3756 		while (i < instno)
3757 			if (*cptr++ == ',')
3758 				i++;
3759 		bind_cpu = stoi(&cptr);
3760 		kmem_free(prop_val, prop_len);
3761 		/* if specific cpu is bogus, then default to cpu 0 */
3762 		if (bind_cpu >= apic_nproc) {
3763 			cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present",
3764 			    prop_name, prop_val, bind_cpu);
3765 			bind_cpu = 0;
3766 		} else {
3767 			/* indicate that we are bound at user request */
3768 			bind_cpu |= IRQ_USER_BOUND;
3769 		}
3770 		/*
3771 		 * no need to check apic_cpus[].aci_status, if specific cpu is
3772 		 * not up, then post_cpu_start will handle it.
3773 		 */
3774 	} else {
3775 		/*
3776 		 * We change bind_cpu only for every two calls
3777 		 * as most drivers still do 2 add_intrs for every
3778 		 * interrupt
3779 		 */
3780 		bind_cpu = (apic_next_bind_cpu++) / 2;
3781 		if (bind_cpu >= apic_nproc) {
3782 			apic_next_bind_cpu = 1;
3783 			bind_cpu = 0;
3784 		}
3785 	}
3786 	if (drv_name != NULL)
3787 		cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d "
3788 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
3789 		    name, drv_name, instance,
3790 		    irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND);
3791 	else
3792 		cmn_err(CE_CONT, "!pcplusmp: "
3793 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
3794 		    irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND);
3795 
3796 	return ((uchar_t)bind_cpu);
3797 }
3798 
3799 static struct apic_io_intr *
3800 apic_find_io_intr_w_busid(int irqno, int busid)
3801 {
3802 	struct	apic_io_intr	*intrp;
3803 
3804 	/*
3805 	 * It can have more than 1 entry with same source bus IRQ,
3806 	 * but unique with the source bus id
3807 	 */
3808 	intrp = apic_io_intrp;
3809 	if (intrp != NULL) {
3810 		while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
3811 			if (intrp->intr_irq == irqno &&
3812 			    intrp->intr_busid == busid &&
3813 			    intrp->intr_type == IO_INTR_INT)
3814 				return (intrp);
3815 			intrp++;
3816 		}
3817 	}
3818 	APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:"
3819 	    "busid %x:%x\n", irqno, busid));
3820 	return ((struct apic_io_intr *)NULL);
3821 }
3822 
3823 
3824 struct mps_bus_info {
3825 	char	*bus_name;
3826 	int	bus_id;
3827 } bus_info_array[] = {
3828 	"ISA ", BUS_ISA,
3829 	"PCI ", BUS_PCI,
3830 	"EISA ", BUS_EISA,
3831 	"XPRESS", BUS_XPRESS,
3832 	"PCMCIA", BUS_PCMCIA,
3833 	"VL ", BUS_VL,
3834 	"CBUS ", BUS_CBUS,
3835 	"CBUSII", BUS_CBUSII,
3836 	"FUTURE", BUS_FUTURE,
3837 	"INTERN", BUS_INTERN,
3838 	"MBI ", BUS_MBI,
3839 	"MBII ", BUS_MBII,
3840 	"MPI ", BUS_MPI,
3841 	"MPSA ", BUS_MPSA,
3842 	"NUBUS ", BUS_NUBUS,
3843 	"TC ", BUS_TC,
3844 	"VME ", BUS_VME
3845 };
3846 
3847 static int
3848 apic_find_bus_type(char *bus)
3849 {
3850 	int	i = 0;
3851 
3852 	for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++)
3853 		if (strncmp(bus, bus_info_array[i].bus_name,
3854 		    strlen(bus_info_array[i].bus_name)) == 0)
3855 			return (bus_info_array[i].bus_id);
3856 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus));
3857 	return (0);
3858 }
3859 
3860 static int
3861 apic_find_bus(int busid)
3862 {
3863 	struct	apic_bus	*busp;
3864 
3865 	busp = apic_busp;
3866 	while (busp->bus_entry == APIC_BUS_ENTRY) {
3867 		if (busp->bus_id == busid)
3868 			return (apic_find_bus_type((char *)&busp->bus_str1));
3869 		busp++;
3870 	}
3871 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid));
3872 	return (0);
3873 }
3874 
3875 static int
3876 apic_find_bus_id(int bustype)
3877 {
3878 	struct	apic_bus	*busp;
3879 
3880 	busp = apic_busp;
3881 	while (busp->bus_entry == APIC_BUS_ENTRY) {
3882 		if (apic_find_bus_type((char *)&busp->bus_str1) == bustype)
3883 			return (busp->bus_id);
3884 		busp++;
3885 	}
3886 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x",
3887 	    bustype));
3888 	return (-1);
3889 }
3890 
3891 /*
3892  * Check if a particular irq need to be reserved for any io_intr
3893  */
3894 static struct apic_io_intr *
3895 apic_find_io_intr(int irqno)
3896 {
3897 	struct	apic_io_intr	*intrp;
3898 
3899 	intrp = apic_io_intrp;
3900 	if (intrp != NULL) {
3901 		while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
3902 			if (intrp->intr_irq == irqno &&
3903 			    intrp->intr_type == IO_INTR_INT)
3904 				return (intrp);
3905 			intrp++;
3906 		}
3907 	}
3908 	return ((struct apic_io_intr *)NULL);
3909 }
3910 
3911 /*
3912  * Check if the given ioapicindex intin combination has already been assigned
3913  * an irq. If so return irqno. Else -1
3914  */
3915 static int
3916 apic_find_intin(uchar_t ioapic, uchar_t intin)
3917 {
3918 	apic_irq_t *irqptr;
3919 	int	i;
3920 
3921 	/* find ioapic and intin in the apic_irq_table[] and return the index */
3922 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
3923 		irqptr = apic_irq_table[i];
3924 		while (irqptr) {
3925 			if ((irqptr->airq_mps_intr_index >= 0) &&
3926 			    (irqptr->airq_intin_no == intin) &&
3927 			    (irqptr->airq_ioapicindex == ioapic)) {
3928 				APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq "
3929 				    "entry for ioapic:intin %x:%x "
3930 				    "shared interrupts ?", ioapic, intin));
3931 				return (i);
3932 			}
3933 			irqptr = irqptr->airq_next;
3934 		}
3935 	}
3936 	return (-1);
3937 }
3938 
3939 int
3940 apic_allocate_irq(int irq)
3941 {
3942 	int	freeirq, i;
3943 
3944 	if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1)
3945 		if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ,
3946 		    (irq - 1))) == -1) {
3947 			/*
3948 			 * if BIOS really defines every single irq in the mps
3949 			 * table, then don't worry about conflicting with
3950 			 * them, just use any free slot in apic_irq_table
3951 			 */
3952 			for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
3953 				if ((apic_irq_table[i] == NULL) ||
3954 				    apic_irq_table[i]->airq_mps_intr_index ==
3955 				    FREE_INDEX) {
3956 				freeirq = i;
3957 				break;
3958 			}
3959 		}
3960 		if (freeirq == -1) {
3961 			/* This shouldn't happen, but just in case */
3962 			cmn_err(CE_WARN, "pcplusmp: NO available IRQ");
3963 			return (-1);
3964 		}
3965 	}
3966 	if (apic_irq_table[freeirq] == NULL) {
3967 		apic_irq_table[freeirq] =
3968 		    kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
3969 		if (apic_irq_table[freeirq] == NULL) {
3970 			cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ");
3971 			return (-1);
3972 		}
3973 		apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX;
3974 	}
3975 	return (freeirq);
3976 }
3977 
3978 static int
3979 apic_find_free_irq(int start, int end)
3980 {
3981 	int	i;
3982 
3983 	for (i = start; i <= end; i++)
3984 		/* Check if any I/O entry needs this IRQ */
3985 		if (apic_find_io_intr(i) == NULL) {
3986 			/* Then see if it is free */
3987 			if ((apic_irq_table[i] == NULL) ||
3988 			    (apic_irq_table[i]->airq_mps_intr_index ==
3989 			    FREE_INDEX)) {
3990 				return (i);
3991 			}
3992 		}
3993 	return (-1);
3994 }
3995 
3996 /*
3997  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
3998  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
3999  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
4000  * requests and allocated only when pri is set.
4001  */
4002 static uchar_t
4003 apic_allocate_vector(int ipl, int irq, int pri)
4004 {
4005 	int	lowest, highest, i;
4006 
4007 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
4008 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
4009 
4010 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
4011 		lowest -= APIC_VECTOR_PER_IPL;
4012 
4013 #ifdef	DEBUG
4014 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
4015 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
4016 #endif /* DEBUG */
4017 	if (pri == 0)
4018 		highest -= APIC_HI_PRI_VECTS;
4019 
4020 	for (i = lowest; i < highest; i++) {
4021 		if ((i == T_FASTTRAP) || (i == APIC_SPUR_INTR) ||
4022 			(i == T_SYSCALLINT) || (i == T_DTRACE_PROBE) ||
4023 			(i == T_DTRACE_RET))
4024 			continue;
4025 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
4026 			apic_vector_to_irq[i] = (uchar_t)irq;
4027 			return (i);
4028 		}
4029 	}
4030 
4031 	return (0);
4032 }
4033 
4034 static void
4035 apic_modify_vector(uchar_t vector, int irq)
4036 {
4037 	apic_vector_to_irq[vector] = (uchar_t)irq;
4038 }
4039 
4040 /*
4041  * Mark vector as being in the process of being deleted. Interrupts
4042  * may still come in on some CPU. The moment an interrupt comes with
4043  * the new vector, we know we can free the old one. Called only from
4044  * addspl and delspl with interrupts disabled. Because an interrupt
4045  * can be shared, but no interrupt from either device may come in,
4046  * we also use a timeout mechanism, which we arbitrarily set to
4047  * apic_revector_timeout microseconds.
4048  */
4049 static void
4050 apic_mark_vector(uchar_t oldvector, uchar_t newvector)
4051 {
4052 	int iflag = intr_clear();
4053 	lock_set(&apic_revector_lock);
4054 	if (!apic_oldvec_to_newvec) {
4055 		apic_oldvec_to_newvec =
4056 		    kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2,
4057 		    KM_NOSLEEP);
4058 
4059 		if (!apic_oldvec_to_newvec) {
4060 			/*
4061 			 * This failure is not catastrophic.
4062 			 * But, the oldvec will never be freed.
4063 			 */
4064 			apic_error |= APIC_ERR_MARK_VECTOR_FAIL;
4065 			lock_clear(&apic_revector_lock);
4066 			intr_restore(iflag);
4067 			return;
4068 		}
4069 		apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR];
4070 	}
4071 
4072 	/* See if we already did this for drivers which do double addintrs */
4073 	if (apic_oldvec_to_newvec[oldvector] != newvector) {
4074 		apic_oldvec_to_newvec[oldvector] = newvector;
4075 		apic_newvec_to_oldvec[newvector] = oldvector;
4076 		apic_revector_pending++;
4077 	}
4078 	lock_clear(&apic_revector_lock);
4079 	intr_restore(iflag);
4080 	(void) timeout(apic_xlate_vector_free_timeout_handler,
4081 	    (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout));
4082 }
4083 
4084 /*
4085  * xlate_vector is called from intr_enter if revector_pending is set.
4086  * It will xlate it if needed and mark the old vector as free.
4087  */
4088 static uchar_t
4089 apic_xlate_vector(uchar_t vector)
4090 {
4091 	uchar_t	newvector, oldvector = 0;
4092 
4093 	lock_set(&apic_revector_lock);
4094 	/* Do we really need to do this ? */
4095 	if (!apic_revector_pending) {
4096 		lock_clear(&apic_revector_lock);
4097 		return (vector);
4098 	}
4099 	if ((newvector = apic_oldvec_to_newvec[vector]) != 0)
4100 		oldvector = vector;
4101 	else {
4102 		/*
4103 		 * The incoming vector is new . See if a stale entry is
4104 		 * remaining
4105 		 */
4106 		if ((oldvector = apic_newvec_to_oldvec[vector]) != 0)
4107 			newvector = vector;
4108 	}
4109 
4110 	if (oldvector) {
4111 		apic_revector_pending--;
4112 		apic_oldvec_to_newvec[oldvector] = 0;
4113 		apic_newvec_to_oldvec[newvector] = 0;
4114 		apic_free_vector(oldvector);
4115 		lock_clear(&apic_revector_lock);
4116 		/* There could have been more than one reprogramming! */
4117 		return (apic_xlate_vector(newvector));
4118 	}
4119 	lock_clear(&apic_revector_lock);
4120 	return (vector);
4121 }
4122 
4123 void
4124 apic_xlate_vector_free_timeout_handler(void *arg)
4125 {
4126 	int iflag;
4127 	uchar_t oldvector, newvector;
4128 
4129 	oldvector = (uchar_t)(uintptr_t)arg;
4130 	iflag = intr_clear();
4131 	lock_set(&apic_revector_lock);
4132 	if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) {
4133 		apic_free_vector(oldvector);
4134 		apic_oldvec_to_newvec[oldvector] = 0;
4135 		apic_newvec_to_oldvec[newvector] = 0;
4136 		apic_revector_pending--;
4137 	}
4138 
4139 	lock_clear(&apic_revector_lock);
4140 	intr_restore(iflag);
4141 }
4142 
4143 
4144 /* Mark vector as not being used by any irq */
4145 static void
4146 apic_free_vector(uchar_t vector)
4147 {
4148 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
4149 }
4150 
4151 /*
4152  * compute the polarity, trigger mode and vector for programming into
4153  * the I/O apic and record in airq_rdt_entry.
4154  */
4155 static void
4156 apic_record_rdt_entry(apic_irq_t *irqptr, int irq)
4157 {
4158 	int	ioapicindex, bus_type, vector;
4159 	short	intr_index;
4160 	uint_t	level, po, io_po;
4161 	struct apic_io_intr *iointrp;
4162 
4163 	intr_index = irqptr->airq_mps_intr_index;
4164 	DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d "
4165 	    "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq,
4166 	    (void *)irqptr->airq_dip, irqptr->airq_vector));
4167 
4168 	if (intr_index == RESERVE_INDEX) {
4169 		apic_error |= APIC_ERR_INVALID_INDEX;
4170 		return;
4171 	} else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) {
4172 		return;
4173 	}
4174 
4175 	vector = irqptr->airq_vector;
4176 	ioapicindex = irqptr->airq_ioapicindex;
4177 	/* Assume edge triggered by default */
4178 	level = 0;
4179 	/* Assume active high by default */
4180 	po = 0;
4181 
4182 	if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) {
4183 		ASSERT(irq < 16);
4184 		if (eisa_level_intr_mask & (1 << irq))
4185 			level = AV_LEVEL;
4186 		if (intr_index == FREE_INDEX && apic_defconf == 0)
4187 			apic_error |= APIC_ERR_INVALID_INDEX;
4188 	} else if (intr_index == ACPI_INDEX || intr_index == SCI_INDEX) {
4189 		bus_type = irqptr->airq_iflag.bustype;
4190 		if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) {
4191 			if (bus_type == BUS_PCI)
4192 				level = AV_LEVEL;
4193 		} else
4194 			level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ?
4195 			    AV_LEVEL : 0;
4196 		if (level &&
4197 		    ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) ||
4198 		    (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM &&
4199 		    bus_type == BUS_PCI)))
4200 			po = AV_ACTIVE_LOW;
4201 	} else {
4202 		iointrp = apic_io_intrp + intr_index;
4203 		bus_type = apic_find_bus(iointrp->intr_busid);
4204 		if (iointrp->intr_el == INTR_EL_CONFORM) {
4205 			if ((irq < 16) && (eisa_level_intr_mask & (1 << irq)))
4206 				level = AV_LEVEL;
4207 			else if (bus_type == BUS_PCI)
4208 				level = AV_LEVEL;
4209 		} else
4210 			level = (iointrp->intr_el == INTR_EL_LEVEL) ?
4211 			    AV_LEVEL : 0;
4212 		if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) ||
4213 		    (iointrp->intr_po == INTR_PO_CONFORM &&
4214 		    bus_type == BUS_PCI)))
4215 			po = AV_ACTIVE_LOW;
4216 	}
4217 	if (level)
4218 		apic_level_intr[irq] = 1;
4219 	/*
4220 	 * The 82489DX External APIC cannot do active low polarity interrupts.
4221 	 */
4222 	if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX))
4223 		io_po = po;
4224 	else
4225 		io_po = 0;
4226 
4227 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG)
4228 		printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n",
4229 		    ioapicindex, irqptr->airq_intin_no, level, io_po, vector);
4230 
4231 	irqptr->airq_rdt_entry = level|io_po|vector;
4232 }
4233 
4234 /*
4235  * Call rebind to do the actual programming.
4236  */
4237 static int
4238 apic_setup_io_intr(apic_irq_t *irqptr, int irq)
4239 {
4240 	int rv;
4241 
4242 	if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1,
4243 	    IMMEDIATE))
4244 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
4245 		rv = apic_rebind(irqptr, 0, 1, IMMEDIATE);
4246 
4247 	return (rv);
4248 }
4249 
4250 /*
4251  * Deferred reprogramming: Call apic_rebind to do the real work.
4252  */
4253 static int
4254 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq)
4255 {
4256 	int rv;
4257 
4258 	if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1,
4259 	    DEFERRED))
4260 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
4261 		rv = apic_rebind(irqptr, 0, 1, DEFERRED);
4262 
4263 	return (rv);
4264 }
4265 
4266 /*
4267  * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock
4268  * if false (0) means lock is already held (e.g: in rebind_all).
4269  */
4270 static int
4271 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when)
4272 {
4273 	int			intin_no;
4274 	volatile int32_t	*ioapic;
4275 	uchar_t			airq_temp_cpu;
4276 	apic_cpus_info_t	*cpu_infop;
4277 	int			iflag;
4278 	int		which_irq = apic_vector_to_irq[irq_ptr->airq_vector];
4279 
4280 	intin_no = irq_ptr->airq_intin_no;
4281 	ioapic = apicioadr[irq_ptr->airq_ioapicindex];
4282 	airq_temp_cpu = irq_ptr->airq_temp_cpu;
4283 	if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) {
4284 		if (airq_temp_cpu & IRQ_USER_BOUND)
4285 			/* Mask off high bit so it can be used as array index */
4286 			airq_temp_cpu &= ~IRQ_USER_BOUND;
4287 
4288 		ASSERT(airq_temp_cpu < apic_nproc);
4289 	}
4290 
4291 	iflag = intr_clear();
4292 
4293 	if (acquire_lock)
4294 		lock_set(&apic_ioapic_lock);
4295 
4296 	/*
4297 	 * Can't bind to a CPU that's not online:
4298 	 */
4299 	cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND];
4300 	if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) {
4301 
4302 		if (acquire_lock)
4303 			lock_clear(&apic_ioapic_lock);
4304 
4305 		intr_restore(iflag);
4306 		return (1);
4307 	}
4308 
4309 	/*
4310 	 * If this is a deferred reprogramming attempt, ensure we have
4311 	 * not been passed stale data:
4312 	 */
4313 	if ((when == DEFERRED) &&
4314 	    (apic_reprogram_info[which_irq].valid == 0)) {
4315 		/* stale info, so just return */
4316 		if (acquire_lock)
4317 			lock_clear(&apic_ioapic_lock);
4318 
4319 		intr_restore(iflag);
4320 		return (0);
4321 	}
4322 
4323 	/*
4324 	 * If this interrupt has been delivered to a CPU and that CPU
4325 	 * has not handled it yet, we cannot reprogram the IOAPIC now:
4326 	 */
4327 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) &&
4328 	    apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu,
4329 	    ioapic, intin_no, which_irq) != 0) {
4330 
4331 		if (acquire_lock)
4332 			lock_clear(&apic_ioapic_lock);
4333 
4334 		intr_restore(iflag);
4335 		return (0);
4336 	}
4337 
4338 	/*
4339 	 * NOTE: We do not unmask the RDT here, as an interrupt MAY still
4340 	 * come in before we have a chance to reprogram it below.  The
4341 	 * reprogramming below will simultaneously change and unmask the
4342 	 * RDT entry.
4343 	 */
4344 
4345 	if ((uchar_t)bind_cpu == IRQ_UNBOUND) {
4346 		/* Write the RDT entry -- no specific CPU binding */
4347 		WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL);
4348 
4349 		if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND)
4350 			apic_cpus[airq_temp_cpu].aci_temp_bound--;
4351 
4352 		/* Write the vector, trigger, and polarity portion of the RDT */
4353 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4354 		    AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry);
4355 		if (acquire_lock)
4356 			lock_clear(&apic_ioapic_lock);
4357 		irq_ptr->airq_temp_cpu = IRQ_UNBOUND;
4358 		intr_restore(iflag);
4359 		return (0);
4360 	}
4361 
4362 	if (bind_cpu & IRQ_USER_BOUND) {
4363 		cpu_infop->aci_bound++;
4364 	} else {
4365 		cpu_infop->aci_temp_bound++;
4366 	}
4367 	ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
4368 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
4369 		/* Write the RDT entry -- bind to a specific CPU: */
4370 		WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no,
4371 		    cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET);
4372 	}
4373 	if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) {
4374 		apic_cpus[airq_temp_cpu].aci_temp_bound--;
4375 	}
4376 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
4377 		/* Write the vector, trigger, and polarity portion of the RDT */
4378 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4379 		    AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry);
4380 	} else {
4381 		if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) {
4382 			/* first one */
4383 			DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call "
4384 			    "apic_pci_msi_enable_vector\n"));
4385 			if (apic_pci_msi_enable_vector(irq_ptr->airq_dip,
4386 			    (irq_ptr->airq_mps_intr_index == MSI_INDEX) ?
4387 			    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX, which_irq,
4388 			    irq_ptr->airq_vector, irq_ptr->airq_intin_no,
4389 			    cpu_infop->aci_local_id) != PSM_SUCCESS) {
4390 				cmn_err(CE_WARN, "pcplusmp: "
4391 					"apic_pci_msi_enable_vector "
4392 					"returned PSM_FAILURE");
4393 			}
4394 		}
4395 		if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) ==
4396 		    irq_ptr->airq_origirq) { /* last one */
4397 			DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call "
4398 			    "pci_msi_enable_mode\n"));
4399 			if (pci_msi_enable_mode(irq_ptr->airq_dip,
4400 			    (irq_ptr->airq_mps_intr_index == MSI_INDEX) ?
4401 			    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX,
4402 			    which_irq) != DDI_SUCCESS) {
4403 				DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: "
4404 				    "pci_msi_enable failed\n"));
4405 				(void) pci_msi_unconfigure(irq_ptr->airq_dip,
4406 				(irq_ptr->airq_mps_intr_index == MSI_INDEX) ?
4407 				DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX,
4408 				which_irq);
4409 			}
4410 		}
4411 	}
4412 	if (acquire_lock)
4413 		lock_clear(&apic_ioapic_lock);
4414 	irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu;
4415 	apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND));
4416 	intr_restore(iflag);
4417 	return (0);
4418 }
4419 
4420 /*
4421  * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR
4422  * bit set.  Sets up a timeout to perform the reprogramming at a later time
4423  * if it cannot wait for the Remote IRR bit to clear (or if waiting did not
4424  * result in the bit's clearing).
4425  *
4426  * This function will mask the RDT entry if the Remote IRR bit is set.
4427  *
4428  * Returns non-zero if the caller should defer IOAPIC reprogramming.
4429  */
4430 static int
4431 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu,
4432 	int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq)
4433 {
4434 	int32_t			rdt_entry;
4435 	int			waited;
4436 
4437 	/* Mask the RDT entry, but only if it's a level-triggered interrupt */
4438 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no);
4439 	if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) {
4440 
4441 		/* Mask it */
4442 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4443 		    AV_MASK | rdt_entry);
4444 	}
4445 
4446 	/*
4447 	 * Wait for the delivery pending bit to clear.
4448 	 */
4449 	if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4450 	    (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) {
4451 
4452 		/*
4453 		 * If we're still waiting on the delivery of this interrupt,
4454 		 * continue to wait here until it is delivered (this should be
4455 		 * a very small amount of time, but include a timeout just in
4456 		 * case).
4457 		 */
4458 		for (waited = 0; waited < apic_max_usecs_clear_pending;
4459 		    waited += APIC_USECS_PER_WAIT_INTERVAL) {
4460 			if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no)
4461 			    & AV_PENDING) == 0) {
4462 				break;
4463 			}
4464 			drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL);
4465 		}
4466 
4467 		if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4468 		    AV_PENDING) != 0) {
4469 			cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not "
4470 			    "deliver interrupt to local APIC within "
4471 			    "%d usecs.", irq_ptr->airq_ioapicindex,
4472 			    irq_ptr->airq_intin_no,
4473 			    apic_max_usecs_clear_pending);
4474 		}
4475 	}
4476 
4477 	/*
4478 	 * If the remote IRR bit is set, then the interrupt has been sent
4479 	 * to a CPU for processing.  We have no choice but to wait for
4480 	 * that CPU to process the interrupt, at which point the remote IRR
4481 	 * bit will be cleared.
4482 	 */
4483 	if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4484 	    (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) {
4485 
4486 		/*
4487 		 * If the CPU that this RDT is bound to is NOT the current
4488 		 * CPU, wait until that CPU handles the interrupt and ACKs
4489 		 * it.  If this interrupt is not bound to any CPU (that is,
4490 		 * if it's bound to the logical destination of "anyone"), it
4491 		 * may have been delivered to the current CPU so handle that
4492 		 * case by deferring the reprogramming (below).
4493 		 */
4494 		kpreempt_disable();
4495 		if ((old_bind_cpu != IRQ_UNBOUND) &&
4496 		    (old_bind_cpu != IRQ_UNINIT) &&
4497 		    (old_bind_cpu != psm_get_cpu_id())) {
4498 			for (waited = 0; waited < apic_max_usecs_clear_pending;
4499 			    waited += APIC_USECS_PER_WAIT_INTERVAL) {
4500 				if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4501 				    intin_no) & AV_REMOTE_IRR) == 0) {
4502 
4503 					/* Clear the reprogramming state: */
4504 					lock_set(&apic_ioapic_reprogram_lock);
4505 
4506 					apic_reprogram_info[which_irq].valid
4507 					    = 0;
4508 					apic_reprogram_info[which_irq].bindcpu
4509 					    = 0;
4510 					apic_reprogram_info[which_irq].timeouts
4511 					    = 0;
4512 
4513 					lock_clear(&apic_ioapic_reprogram_lock);
4514 
4515 					/* Remote IRR has cleared! */
4516 					kpreempt_enable();
4517 					return (0);
4518 				}
4519 				drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL);
4520 			}
4521 		}
4522 		kpreempt_enable();
4523 
4524 		/*
4525 		 * If we waited and the Remote IRR bit is still not cleared,
4526 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
4527 		 * times for this interrupt, try the last-ditch workarounds:
4528 		 */
4529 		if (apic_reprogram_info[which_irq].timeouts >=
4530 		    APIC_REPROGRAM_MAX_TIMEOUTS) {
4531 
4532 			if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no)
4533 			    & AV_REMOTE_IRR) != 0) {
4534 				/*
4535 				 * Trying to clear the bit through normal
4536 				 * channels has failed.  So as a last-ditch
4537 				 * effort, try to set the trigger mode to
4538 				 * edge, then to level.  This has been
4539 				 * observed to work on many systems.
4540 				 */
4541 				WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4542 				    intin_no,
4543 				    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4544 				    intin_no) & ~AV_LEVEL);
4545 
4546 				WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4547 				    intin_no,
4548 				    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4549 				    intin_no) | AV_LEVEL);
4550 
4551 				/*
4552 				 * If the bit's STILL set, declare total and
4553 				 * utter failure
4554 				 */
4555 				if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4556 				    intin_no) & AV_REMOTE_IRR) != 0) {
4557 					cmn_err(CE_WARN, "!IOAPIC %d intin %d: "
4558 					    "Remote IRR failed to reset "
4559 					    "within %d usecs.  Interrupts to "
4560 					    "this pin may cease to function.",
4561 					    irq_ptr->airq_ioapicindex,
4562 					    irq_ptr->airq_intin_no,
4563 					    apic_max_usecs_clear_pending);
4564 				}
4565 			}
4566 			/* Clear the reprogramming state: */
4567 			lock_set(&apic_ioapic_reprogram_lock);
4568 
4569 			apic_reprogram_info[which_irq].valid = 0;
4570 			apic_reprogram_info[which_irq].bindcpu = 0;
4571 			apic_reprogram_info[which_irq].timeouts = 0;
4572 
4573 			lock_clear(&apic_ioapic_reprogram_lock);
4574 		} else {
4575 #ifdef DEBUG
4576 			cmn_err(CE_WARN, "Deferring reprogramming of irq %d",
4577 			    which_irq);
4578 #endif	/* DEBUG */
4579 			/*
4580 			 * If waiting for the Remote IRR bit (above) didn't
4581 			 * allow it to clear, defer the reprogramming:
4582 			 */
4583 			lock_set(&apic_ioapic_reprogram_lock);
4584 
4585 			apic_reprogram_info[which_irq].valid = 1;
4586 			apic_reprogram_info[which_irq].bindcpu = new_bind_cpu;
4587 			apic_reprogram_info[which_irq].timeouts++;
4588 
4589 			lock_clear(&apic_ioapic_reprogram_lock);
4590 
4591 			/* Fire up a timeout to handle this later */
4592 			(void) timeout(apic_reprogram_timeout_handler,
4593 			    (void *) 0,
4594 			    drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY));
4595 
4596 			/* Inform caller to defer IOAPIC programming: */
4597 			return (1);
4598 		}
4599 	}
4600 	return (0);
4601 }
4602 
4603 /*
4604  * Timeout handler that performs the APIC reprogramming
4605  */
4606 /*ARGSUSED*/
4607 static void
4608 apic_reprogram_timeout_handler(void *arg)
4609 {
4610 	/*LINTED: set but not used in function*/
4611 	int i, result;
4612 
4613 	/* Serialize access to this function */
4614 	mutex_enter(&apic_reprogram_timeout_mutex);
4615 
4616 	/*
4617 	 * For each entry in the reprogramming state that's valid,
4618 	 * try the reprogramming again:
4619 	 */
4620 	for (i = 0; i < APIC_MAX_VECTOR; i++) {
4621 		if (apic_reprogram_info[i].valid == 0)
4622 			continue;
4623 		/*
4624 		 * Though we can't really do anything about errors
4625 		 * at this point, keep track of them for reporting.
4626 		 * Note that it is very possible for apic_setup_io_intr
4627 		 * to re-register this very timeout if the Remote IRR bit
4628 		 * has not yet cleared.
4629 		 */
4630 		result = apic_setup_io_intr_deferred(apic_irq_table[i], i);
4631 
4632 #ifdef DEBUG
4633 		if (result)
4634 			cmn_err(CE_WARN, "apic_reprogram_timeout: "
4635 			    "apic_setup_io_intr returned nonzero for "
4636 			    "irq=%d!", i);
4637 #endif	/* DEBUG */
4638 	}
4639 
4640 	mutex_exit(&apic_reprogram_timeout_mutex);
4641 }
4642 
4643 
4644 /*
4645  * Called to migrate all interrupts at an irq to another cpu. safe
4646  * if true means we are not being called from an interrupt
4647  * context and hence it is safe to do a lock_set. If false
4648  * do only a lock_try and return failure ( non 0 ) if we cannot get it
4649  */
4650 static int
4651 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe)
4652 {
4653 	apic_irq_t	*irqptr = irq_ptr;
4654 	int		retval = 0;
4655 	int		iflag;
4656 
4657 	iflag = intr_clear();
4658 	if (!safe) {
4659 		if (lock_try(&apic_ioapic_lock) == 0) {
4660 			intr_restore(iflag);
4661 			return (1);
4662 		}
4663 	} else
4664 		lock_set(&apic_ioapic_lock);
4665 
4666 	while (irqptr) {
4667 		if (irqptr->airq_temp_cpu != IRQ_UNINIT)
4668 			retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE);
4669 		irqptr = irqptr->airq_next;
4670 	}
4671 	lock_clear(&apic_ioapic_lock);
4672 	intr_restore(iflag);
4673 	return (retval);
4674 }
4675 
4676 /*
4677  * apic_intr_redistribute does all the messy computations for identifying
4678  * which interrupt to move to which CPU. Currently we do just one interrupt
4679  * at a time. This reduces the time we spent doing all this within clock
4680  * interrupt. When it is done in idle, we could do more than 1.
4681  * First we find the most busy and the most free CPU (time in ISR only)
4682  * skipping those CPUs that has been identified as being ineligible (cpu_skip)
4683  * Then we look for IRQs which are closest to the difference between the
4684  * most busy CPU and the average ISR load. We try to find one whose load
4685  * is less than difference.If none exists, then we chose one larger than the
4686  * difference, provided it does not make the most idle CPU worse than the
4687  * most busy one. In the end, we clear all the busy fields for CPUs. For
4688  * IRQs, they are cleared as they are scanned.
4689  */
4690 static void
4691 apic_intr_redistribute()
4692 {
4693 	int busiest_cpu, most_free_cpu;
4694 	int cpu_free, cpu_busy, max_busy, min_busy;
4695 	int min_free, diff;
4696 	int	average_busy, cpus_online;
4697 	int i, busy;
4698 	apic_cpus_info_t *cpu_infop;
4699 	apic_irq_t *min_busy_irq = NULL;
4700 	apic_irq_t *max_busy_irq = NULL;
4701 
4702 	busiest_cpu = most_free_cpu = -1;
4703 	cpu_free = cpu_busy = max_busy = average_busy = 0;
4704 	min_free = apic_sample_factor_redistribution;
4705 	cpus_online = 0;
4706 	/*
4707 	 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu
4708 	 * without ioapic_lock. That is OK as we are just doing statistical
4709 	 * sampling anyway and any inaccuracy now will get corrected next time
4710 	 * The call to rebind which actually changes things will make sure
4711 	 * we are consistent.
4712 	 */
4713 	for (i = 0; i < apic_nproc; i++) {
4714 		if (!(apic_redist_cpu_skip & (1 << i)) &&
4715 		    (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
4716 
4717 			cpu_infop = &apic_cpus[i];
4718 			/*
4719 			 * If no unbound interrupts or only 1 total on this
4720 			 * CPU, skip
4721 			 */
4722 			if (!cpu_infop->aci_temp_bound ||
4723 			    (cpu_infop->aci_bound + cpu_infop->aci_temp_bound)
4724 			    == 1) {
4725 				apic_redist_cpu_skip |= 1 << i;
4726 				continue;
4727 			}
4728 
4729 			busy = cpu_infop->aci_busy;
4730 			average_busy += busy;
4731 			cpus_online++;
4732 			if (max_busy < busy) {
4733 				max_busy = busy;
4734 				busiest_cpu = i;
4735 			}
4736 			if (min_free > busy) {
4737 				min_free = busy;
4738 				most_free_cpu = i;
4739 			}
4740 			if (busy > apic_int_busy_mark) {
4741 				cpu_busy |= 1 << i;
4742 			} else {
4743 				if (busy < apic_int_free_mark)
4744 					cpu_free |= 1 << i;
4745 			}
4746 		}
4747 	}
4748 	if ((cpu_busy && cpu_free) ||
4749 	    (max_busy >= (min_free + apic_diff_for_redistribution))) {
4750 
4751 		apic_num_imbalance++;
4752 #ifdef	DEBUG
4753 		if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4754 			prom_printf(
4755 			    "redistribute busy=%x free=%x max=%x min=%x",
4756 			    cpu_busy, cpu_free, max_busy, min_free);
4757 		}
4758 #endif /* DEBUG */
4759 
4760 
4761 		average_busy /= cpus_online;
4762 
4763 		diff = max_busy - average_busy;
4764 		min_busy = max_busy; /* start with the max possible value */
4765 		max_busy = 0;
4766 		min_busy_irq = max_busy_irq = NULL;
4767 		i = apic_min_device_irq;
4768 		for (; i < apic_max_device_irq; i++) {
4769 			apic_irq_t *irq_ptr;
4770 			/* Change to linked list per CPU ? */
4771 			if ((irq_ptr = apic_irq_table[i]) == NULL)
4772 				continue;
4773 			/* Check for irq_busy & decide which one to move */
4774 			/* Also zero them for next round */
4775 			if ((irq_ptr->airq_temp_cpu == busiest_cpu) &&
4776 			    irq_ptr->airq_busy) {
4777 				if (irq_ptr->airq_busy < diff) {
4778 					/*
4779 					 * Check for least busy CPU,
4780 					 * best fit or what ?
4781 					 */
4782 					if (max_busy < irq_ptr->airq_busy) {
4783 						/*
4784 						 * Most busy within the
4785 						 * required differential
4786 						 */
4787 						max_busy = irq_ptr->airq_busy;
4788 						max_busy_irq = irq_ptr;
4789 					}
4790 				} else {
4791 					if (min_busy > irq_ptr->airq_busy) {
4792 						/*
4793 						 * least busy, but more than
4794 						 * the reqd diff
4795 						 */
4796 						if (min_busy <
4797 						    (diff + average_busy -
4798 						    min_free)) {
4799 							/*
4800 							 * Making sure new cpu
4801 							 * will not end up
4802 							 * worse
4803 							 */
4804 							min_busy =
4805 							    irq_ptr->airq_busy;
4806 
4807 							min_busy_irq = irq_ptr;
4808 						}
4809 					}
4810 				}
4811 			}
4812 			irq_ptr->airq_busy = 0;
4813 		}
4814 
4815 		if (max_busy_irq != NULL) {
4816 #ifdef	DEBUG
4817 			if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4818 				prom_printf("rebinding %x to %x",
4819 				    max_busy_irq->airq_vector, most_free_cpu);
4820 			}
4821 #endif /* DEBUG */
4822 			if (apic_rebind_all(max_busy_irq, most_free_cpu, 0)
4823 			    == 0)
4824 				/* Make change permenant */
4825 				max_busy_irq->airq_cpu = (uchar_t)most_free_cpu;
4826 		} else if (min_busy_irq != NULL) {
4827 #ifdef	DEBUG
4828 			if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4829 				prom_printf("rebinding %x to %x",
4830 				    min_busy_irq->airq_vector, most_free_cpu);
4831 			}
4832 #endif /* DEBUG */
4833 
4834 			if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) ==
4835 			    0)
4836 				/* Make change permenant */
4837 				min_busy_irq->airq_cpu = (uchar_t)most_free_cpu;
4838 		} else {
4839 			if (cpu_busy != (1 << busiest_cpu)) {
4840 				apic_redist_cpu_skip |= 1 << busiest_cpu;
4841 				/*
4842 				 * We leave cpu_skip set so that next time we
4843 				 * can choose another cpu
4844 				 */
4845 			}
4846 		}
4847 		apic_num_rebind++;
4848 	} else {
4849 		/*
4850 		 * found nothing. Could be that we skipped over valid CPUs
4851 		 * or we have balanced everything. If we had a variable
4852 		 * ticks_for_redistribution, it could be increased here.
4853 		 * apic_int_busy, int_free etc would also need to be
4854 		 * changed.
4855 		 */
4856 		if (apic_redist_cpu_skip)
4857 			apic_redist_cpu_skip = 0;
4858 	}
4859 	for (i = 0; i < apic_nproc; i++) {
4860 		apic_cpus[i].aci_busy = 0;
4861 	}
4862 }
4863 
4864 static void
4865 apic_cleanup_busy()
4866 {
4867 	int i;
4868 	apic_irq_t *irq_ptr;
4869 
4870 	for (i = 0; i < apic_nproc; i++) {
4871 		apic_cpus[i].aci_busy = 0;
4872 	}
4873 
4874 	for (i = apic_min_device_irq; i < apic_max_device_irq; i++) {
4875 		if ((irq_ptr = apic_irq_table[i]) != NULL)
4876 			irq_ptr->airq_busy = 0;
4877 	}
4878 	apic_skipped_redistribute = 0;
4879 }
4880 
4881 
4882 /*
4883  * This function will reprogram the timer.
4884  *
4885  * When in oneshot mode the argument is the absolute time in future to
4886  * generate the interrupt at.
4887  *
4888  * When in periodic mode, the argument is the interval at which the
4889  * interrupts should be generated. There is no need to support the periodic
4890  * mode timer change at this time.
4891  */
4892 static void
4893 apic_timer_reprogram(hrtime_t time)
4894 {
4895 	hrtime_t now;
4896 	uint_t ticks;
4897 
4898 	/*
4899 	 * We should be called from high PIL context (CBE_HIGH_PIL),
4900 	 * so kpreempt is disabled.
4901 	 */
4902 
4903 	if (!apic_oneshot) {
4904 		/* time is the interval for periodic mode */
4905 		ticks = (uint_t)((time) / apic_nsec_per_tick);
4906 	} else {
4907 		/* one shot mode */
4908 
4909 		now = gethrtime();
4910 
4911 		if (time <= now) {
4912 			/*
4913 			 * requested to generate an interrupt in the past
4914 			 * generate an interrupt as soon as possible
4915 			 */
4916 			ticks = apic_min_timer_ticks;
4917 		} else if ((time - now) > apic_nsec_max) {
4918 			/*
4919 			 * requested to generate an interrupt at a time
4920 			 * further than what we are capable of. Set to max
4921 			 * the hardware can handle
4922 			 */
4923 
4924 			ticks = APIC_MAXVAL;
4925 #ifdef DEBUG
4926 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
4927 			    "  %lld  too far in future, current time"
4928 			    "  %lld \n", time, now);
4929 #endif	/* DEBUG */
4930 		} else
4931 			ticks = (uint_t)((time - now) / apic_nsec_per_tick);
4932 	}
4933 
4934 	if (ticks < apic_min_timer_ticks)
4935 		ticks = apic_min_timer_ticks;
4936 
4937 	apicadr[APIC_INIT_COUNT] = ticks;
4938 
4939 }
4940 
4941 /*
4942  * This function will enable timer interrupts.
4943  */
4944 static void
4945 apic_timer_enable(void)
4946 {
4947 	/*
4948 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
4949 	 * so kpreempt is disabled.
4950 	 */
4951 
4952 	if (!apic_oneshot)
4953 		apicadr[APIC_LOCAL_TIMER] =
4954 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
4955 	else {
4956 		/* one shot */
4957 		apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT);
4958 	}
4959 }
4960 
4961 /*
4962  * This function will disable timer interrupts.
4963  */
4964 static void
4965 apic_timer_disable(void)
4966 {
4967 	/*
4968 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
4969 	 * so kpreempt is disabled.
4970 	 */
4971 
4972 	apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK;
4973 }
4974 
4975 
4976 cyclic_id_t apic_cyclic_id;
4977 
4978 /*
4979  * If this module needs to be a consumer of cyclic subsystem, they
4980  * can be added here, since at this time kernel cyclic subsystem is initialized
4981  * argument is not currently used, and is reserved for future.
4982  */
4983 static void
4984 apic_post_cyclic_setup(void *arg)
4985 {
4986 _NOTE(ARGUNUSED(arg))
4987 	cyc_handler_t hdlr;
4988 	cyc_time_t when;
4989 
4990 	/* cpu_lock is held */
4991 
4992 	/* set up cyclics for intr redistribution */
4993 
4994 	/*
4995 	 * In peridoc mode intr redistribution processing is done in
4996 	 * apic_intr_enter during clk intr processing
4997 	 */
4998 	if (!apic_oneshot)
4999 		return;
5000 
5001 	hdlr.cyh_level = CY_LOW_LEVEL;
5002 	hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute;
5003 	hdlr.cyh_arg = NULL;
5004 
5005 	when.cyt_when = 0;
5006 	when.cyt_interval = apic_redistribute_sample_interval;
5007 	apic_cyclic_id = cyclic_add(&hdlr, &when);
5008 
5009 
5010 }
5011 
5012 static void
5013 apic_redistribute_compute(void)
5014 {
5015 	int	i, j, max_busy;
5016 
5017 	if (apic_enable_dynamic_migration) {
5018 		if (++apic_nticks == apic_sample_factor_redistribution) {
5019 			/*
5020 			 * Time to call apic_intr_redistribute().
5021 			 * reset apic_nticks. This will cause max_busy
5022 			 * to be calculated below and if it is more than
5023 			 * apic_int_busy, we will do the whole thing
5024 			 */
5025 			apic_nticks = 0;
5026 		}
5027 		max_busy = 0;
5028 		for (i = 0; i < apic_nproc; i++) {
5029 
5030 			/*
5031 			 * Check if curipl is non zero & if ISR is in
5032 			 * progress
5033 			 */
5034 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
5035 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
5036 
5037 				int	irq;
5038 				apic_cpus[i].aci_busy++;
5039 				irq = apic_cpus[i].aci_current[j];
5040 				apic_irq_table[irq]->airq_busy++;
5041 			}
5042 
5043 			if (!apic_nticks &&
5044 			    (apic_cpus[i].aci_busy > max_busy))
5045 				max_busy = apic_cpus[i].aci_busy;
5046 		}
5047 		if (!apic_nticks) {
5048 			if (max_busy > apic_int_busy_mark) {
5049 			/*
5050 			 * We could make the following check be
5051 			 * skipped > 1 in which case, we get a
5052 			 * redistribution at half the busy mark (due to
5053 			 * double interval). Need to be able to collect
5054 			 * more empirical data to decide if that is a
5055 			 * good strategy. Punt for now.
5056 			 */
5057 				if (apic_skipped_redistribute)
5058 					apic_cleanup_busy();
5059 				else
5060 					apic_intr_redistribute();
5061 			} else
5062 				apic_skipped_redistribute++;
5063 		}
5064 	}
5065 }
5066 
5067 
5068 static int
5069 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid,
5070     int ipin, int *pci_irqp, iflag_t *intr_flagp)
5071 {
5072 
5073 	int status;
5074 	acpi_psm_lnk_t acpipsmlnk;
5075 
5076 	if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp,
5077 	    intr_flagp)) == ACPI_PSM_SUCCESS) {
5078 		APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d "
5079 		    "from cache for device %s, instance #%d\n", *pci_irqp,
5080 		    ddi_get_name(dip), ddi_get_instance(dip)));
5081 		return (status);
5082 	}
5083 
5084 	bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t));
5085 
5086 	if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp,
5087 	    &acpipsmlnk)) == ACPI_PSM_FAILURE) {
5088 		APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: "
5089 		    " acpi_translate_pci_irq failed for device %s, instance"
5090 		    " #%d", ddi_get_name(dip), ddi_get_instance(dip)));
5091 		return (status);
5092 	}
5093 
5094 	if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) {
5095 		status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp,
5096 		    intr_flagp);
5097 		if (status != ACPI_PSM_SUCCESS) {
5098 			status = acpi_get_current_irq_resource(&acpipsmlnk,
5099 			    pci_irqp, intr_flagp);
5100 		}
5101 	}
5102 
5103 	if (status == ACPI_PSM_SUCCESS) {
5104 		acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp,
5105 		    intr_flagp, &acpipsmlnk);
5106 
5107 		APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] "
5108 		    "new irq %d for device %s, instance #%d\n",
5109 		    *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip)));
5110 	}
5111 
5112 	return (status);
5113 }
5114 
5115 /*
5116  * Configures the irq for the interrupt link device identified by
5117  * acpipsmlnkp.
5118  *
5119  * Gets the current and the list of possible irq settings for the
5120  * device. If apic_unconditional_srs is not set, and the current
5121  * resource setting is in the list of possible irq settings,
5122  * current irq resource setting is passed to the caller.
5123  *
5124  * Otherwise, picks an irq number from the list of possible irq
5125  * settings, and sets the irq of the device to this value.
5126  * If prefer_crs is set, among a set of irq numbers in the list that have
5127  * the least number of devices sharing the interrupt, we pick current irq
5128  * resource setting if it is a member of this set.
5129  *
5130  * Passes the irq number in the value pointed to by pci_irqp, and
5131  * polarity and sensitivity in the structure pointed to by dipintrflagp
5132  * to the caller.
5133  *
5134  * Note that if setting the irq resource failed, but successfuly obtained
5135  * the current irq resource settings, passes the current irq resources
5136  * and considers it a success.
5137  *
5138  * Returns:
5139  * ACPI_PSM_SUCCESS on success.
5140  *
5141  * ACPI_PSM_FAILURE if an error occured during the configuration or
5142  * if a suitable irq was not found for this device, or if setting the
5143  * irq resource and obtaining the current resource fails.
5144  *
5145  */
5146 static int
5147 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip,
5148     int *pci_irqp, iflag_t *dipintr_flagp)
5149 {
5150 
5151 	int i, min_share, foundnow, done = 0;
5152 	int32_t irq;
5153 	int32_t share_irq = -1;
5154 	int32_t chosen_irq = -1;
5155 	int cur_irq = -1;
5156 	acpi_irqlist_t *irqlistp;
5157 	acpi_irqlist_t *irqlistent;
5158 
5159 	if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp))
5160 	    == ACPI_PSM_FAILURE) {
5161 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine "
5162 		    "or assign IRQ for device %s, instance #%d: The system was "
5163 		    "unable to get the list of potential IRQs from ACPI.",
5164 		    ddi_get_name(dip), ddi_get_instance(dip)));
5165 
5166 		return (ACPI_PSM_FAILURE);
5167 	}
5168 
5169 	if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq,
5170 	    dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) &&
5171 	    (cur_irq > 0)) {
5172 		/*
5173 		 * If an IRQ is set in CRS and that IRQ exists in the set
5174 		 * returned from _PRS, return that IRQ, otherwise print
5175 		 * a warning
5176 		 */
5177 
5178 		if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL)
5179 		    == ACPI_PSM_SUCCESS) {
5180 
5181 			acpi_free_irqlist(irqlistp);
5182 			ASSERT(pci_irqp != NULL);
5183 			*pci_irqp = cur_irq;
5184 			return (ACPI_PSM_SUCCESS);
5185 		}
5186 
5187 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the "
5188 		    "current irq %d for device %s, instance #%d in ACPI's "
5189 		    "list of possible irqs for this device. Picking one from "
5190 		    " the latter list.", cur_irq, ddi_get_name(dip),
5191 		    ddi_get_instance(dip)));
5192 	}
5193 
5194 	irqlistent = irqlistp;
5195 	min_share = 255;
5196 
5197 	while (irqlistent != NULL) {
5198 		irqlistent->intr_flags.bustype = BUS_PCI;
5199 
5200 		for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) {
5201 
5202 			irq = irqlistent->irqs[i];
5203 
5204 			if ((irq < 16) && (apic_reserved_irqlist[irq]))
5205 				continue;
5206 
5207 			if (irq == 0) {
5208 				/* invalid irq number */
5209 				continue;
5210 			}
5211 
5212 			if ((apic_irq_table[irq] == NULL) ||
5213 			    (apic_irq_table[irq]->airq_dip == dip)) {
5214 				chosen_irq = irq;
5215 				foundnow = 1;
5216 				/*
5217 				 * If we do not prefer current irq from crs
5218 				 * or if we do and this irq is the same as
5219 				 * current irq from crs, this is the one
5220 				 * to pick.
5221 				 */
5222 				if (!(apic_prefer_crs) || (irq == cur_irq)) {
5223 					done = 1;
5224 					break;
5225 				}
5226 				continue;
5227 			}
5228 
5229 			if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE)
5230 				continue;
5231 
5232 			if (!acpi_intr_compatible(irqlistent->intr_flags,
5233 			    apic_irq_table[irq]->airq_iflag))
5234 				continue;
5235 
5236 			if ((apic_irq_table[irq]->airq_share < min_share) ||
5237 			    ((apic_irq_table[irq]->airq_share == min_share) &&
5238 			    (cur_irq == irq) && (apic_prefer_crs))) {
5239 				min_share = apic_irq_table[irq]->airq_share;
5240 				share_irq = irq;
5241 				foundnow = 1;
5242 			}
5243 		}
5244 
5245 		/*
5246 		 * If we found an IRQ in the inner loop this time, save the
5247 		 * details from the irqlist for later use.
5248 		 */
5249 		if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) {
5250 			/*
5251 			 * Copy the acpi_prs_private_t and flags from this
5252 			 * irq list entry, since we found an irq from this
5253 			 * entry.
5254 			 */
5255 			acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv;
5256 			*dipintr_flagp = irqlistent->intr_flags;
5257 		}
5258 
5259 		if (done)
5260 			break;
5261 
5262 		/* Go to the next irqlist entry */
5263 		irqlistent = irqlistent->next;
5264 	}
5265 
5266 
5267 	acpi_free_irqlist(irqlistp);
5268 	if (chosen_irq != -1)
5269 		irq = chosen_irq;
5270 	else if (share_irq != -1)
5271 		irq = share_irq;
5272 	else {
5273 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a "
5274 		    "suitable irq from the list of possible irqs for device "
5275 		    "%s, instance #%d in ACPI's list of possible irqs",
5276 		    ddi_get_name(dip), ddi_get_instance(dip)));
5277 		return (ACPI_PSM_FAILURE);
5278 	}
5279 
5280 	APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s "
5281 	    "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip)));
5282 
5283 	if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) {
5284 		/*
5285 		 * setting irq was successful, check to make sure CRS
5286 		 * reflects that. If CRS does not agree with what we
5287 		 * set, return the irq that was set.
5288 		 */
5289 
5290 		if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq,
5291 		    dipintr_flagp) == ACPI_PSM_SUCCESS) {
5292 
5293 			if (cur_irq != irq)
5294 				APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: "
5295 				    "IRQ resource set (irqno %d) for device %s "
5296 				    "instance #%d, differs from current "
5297 				    "setting irqno %d",
5298 				    irq, ddi_get_name(dip),
5299 				    ddi_get_instance(dip), cur_irq));
5300 		}
5301 
5302 		/*
5303 		 * return the irq that was set, and not what CRS reports,
5304 		 * since CRS has been seen to be bogus on some systems
5305 		 */
5306 		cur_irq = irq;
5307 	} else {
5308 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d "
5309 		    "failed for device %s instance #%d",
5310 		    irq, ddi_get_name(dip), ddi_get_instance(dip)));
5311 
5312 		if (cur_irq == -1)
5313 			return (ACPI_PSM_FAILURE);
5314 	}
5315 
5316 	ASSERT(pci_irqp != NULL);
5317 	*pci_irqp = cur_irq;
5318 	return (ACPI_PSM_SUCCESS);
5319 }
5320