xref: /titanic_52/usr/src/uts/i86pc/io/pcplusmp/apic.c (revision 7d968cb8b4b6274092771b93e94bf88d1ee31c6c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
30  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
31  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
32  * PSMI 1.5 extensions are supported in Solaris Nevada.
33  */
34 #define	PSMI_1_5
35 
36 #include <sys/processor.h>
37 #include <sys/time.h>
38 #include <sys/psm.h>
39 #include <sys/smp_impldefs.h>
40 #include <sys/cram.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/psm_common.h>
44 #include "apic.h"
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/archsystm.h>
57 #include <sys/trap.h>
58 #include <sys/machsystm.h>
59 #include <sys/cpuvar.h>
60 #include <sys/rm_platter.h>
61 #include <sys/privregs.h>
62 #include <sys/cyclic.h>
63 #include <sys/note.h>
64 #include <sys/pci_intr_lib.h>
65 
66 /*
67  *	Local Function Prototypes
68  */
69 static void apic_init_intr();
70 static void apic_ret();
71 static int apic_handle_defconf();
72 static int apic_parse_mpct(caddr_t mpct, int bypass);
73 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size);
74 static int apic_checksum(caddr_t bptr, int len);
75 static int get_apic_cmd1();
76 static int get_apic_pri();
77 static int apic_find_bus_type(char *bus);
78 static int apic_find_bus(int busid);
79 static int apic_find_bus_id(int bustype);
80 static struct apic_io_intr *apic_find_io_intr(int irqno);
81 int apic_allocate_irq(int irq);
82 static int apic_find_free_irq(int start, int end);
83 static uchar_t apic_allocate_vector(int ipl, int irq, int pri);
84 static void apic_modify_vector(uchar_t vector, int irq);
85 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector);
86 static uchar_t apic_xlate_vector(uchar_t oldvector);
87 static void apic_xlate_vector_free_timeout_handler(void *arg);
88 static void apic_free_vector(uchar_t vector);
89 static void apic_reprogram_timeout_handler(void *arg);
90 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu,
91     int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq,
92     int iflag, boolean_t *restore_intrp);
93 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq);
94 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq);
95 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq);
96 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid);
97 static int apic_find_intin(uchar_t ioapic, uchar_t intin);
98 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno,
99     int child_ipin, struct apic_io_intr **intrp);
100 static int apic_setup_irq_table(dev_info_t *dip, int irqno,
101     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp,
102     int type);
103 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl,
104     iflag_t *intr_flagp);
105 static void apic_nmi_intr(caddr_t arg);
106 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid,
107     uchar_t intin);
108 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock,
109     int when);
110 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe);
111 static void apic_intr_redistribute();
112 static void apic_cleanup_busy();
113 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp);
114 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type);
115 
116 /* ACPI support routines */
117 static int acpi_probe(void);
118 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip,
119     int *pci_irqp, iflag_t *intr_flagp);
120 
121 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid,
122     int ipin, int *pci_irqp, iflag_t *intr_flagp);
123 static uchar_t acpi_find_ioapic(int irq);
124 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2);
125 
126 /*
127  *	standard MP entries
128  */
129 static int	apic_probe();
130 static int	apic_clkinit();
131 static int	apic_getclkirq(int ipl);
132 static uint_t	apic_calibrate(volatile uint32_t *addr,
133     uint16_t *pit_ticks_adj);
134 static hrtime_t apic_gettime();
135 static hrtime_t apic_gethrtime();
136 static void	apic_init();
137 static void	apic_picinit(void);
138 static void	apic_cpu_start(processorid_t cpun, caddr_t rm_code);
139 static int	apic_post_cpu_start(void);
140 static void	apic_send_ipi(int cpun, int ipl);
141 static void	apic_set_softintr(int softintr);
142 static void	apic_set_idlecpu(processorid_t cpun);
143 static void	apic_unset_idlecpu(processorid_t cpun);
144 static int	apic_softlvl_to_irq(int ipl);
145 static int	apic_intr_enter(int ipl, int *vect);
146 static void	apic_intr_exit(int ipl, int vect);
147 static void	apic_setspl(int ipl);
148 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
149 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
150 static void	apic_shutdown(int cmd, int fcn);
151 static void	apic_preshutdown(int cmd, int fcn);
152 static int	apic_disable_intr(processorid_t cpun);
153 static void	apic_enable_intr(processorid_t cpun);
154 static processorid_t	apic_get_next_processorid(processorid_t cpun);
155 static int		apic_get_ipivect(int ipl, int type);
156 static void	apic_timer_reprogram(hrtime_t time);
157 static void	apic_timer_enable(void);
158 static void	apic_timer_disable(void);
159 static void	apic_post_cyclic_setup(void *arg);
160 extern int	apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
161 		    psm_intr_op_t, int *);
162 
163 static int	apic_oneshot = 0;
164 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
165 
166 /*
167  * These variables are frequently accessed in apic_intr_enter(),
168  * apic_intr_exit and apic_setspl, so group them together
169  */
170 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
171 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
172 int apic_clkvect;
173 
174 /* ACPI SCI interrupt configuration; -1 if SCI not used */
175 int apic_sci_vect = -1;
176 iflag_t apic_sci_flags;
177 
178 /* vector at which error interrupts come in */
179 int apic_errvect;
180 int apic_enable_error_intr = 1;
181 int apic_error_display_delay = 100;
182 
183 /* vector at which performance counter overflow interrupts come in */
184 int apic_cpcovf_vect;
185 int apic_enable_cpcovf_intr = 1;
186 
187 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */
188 static int apic_max_usecs_clear_pending = 1000;
189 
190 /* Amt of usecs to wait before checking if RDT flags have reset. */
191 #define	APIC_USECS_PER_WAIT_INTERVAL 100
192 
193 /* Maximum number of times to retry reprogramming via the timeout */
194 #define	APIC_REPROGRAM_MAX_TIMEOUTS 10
195 
196 /* timeout delay for IOAPIC delayed reprogramming */
197 #define	APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */
198 
199 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */
200 #define	DEFERRED 1
201 #define	IMMEDIATE 0
202 
203 /*
204  * number of bits per byte, from <sys/param.h>
205  */
206 #define	UCHAR_MAX	((1 << NBBY) - 1)
207 
208 uchar_t	apic_reserved_irqlist[MAX_ISA_IRQ + 1];
209 
210 /*
211  * The following vector assignments influence the value of ipltopri and
212  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
213  * idle to 0 and IPL 0 to 0x10 to differentiate idle in case
214  * we care to do so in future. Note some IPLs which are rarely used
215  * will share the vector ranges and heavily used IPLs (5 and 6) have
216  * a wide range.
217  *	IPL		Vector range.		as passed to intr_enter
218  *	0		none.
219  *	1,2,3		0x20-0x2f		0x0-0xf
220  *	4		0x30-0x3f		0x10-0x1f
221  *	5		0x40-0x5f		0x20-0x3f
222  *	6		0x60-0x7f		0x40-0x5f
223  *	7,8,9		0x80-0x8f		0x60-0x6f
224  *	10		0x90-0x9f		0x70-0x7f
225  *	11		0xa0-0xaf		0x80-0x8f
226  *	...		...
227  *	16		0xf0-0xff		0xd0-0xdf
228  */
229 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
230 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16
231 };
232 	/*
233 	 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4]
234 	 * NOTE that this is vector as passed into intr_enter which is
235 	 * programmed vector - 0x20 (APIC_BASE_VECT)
236 	 */
237 
238 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
239 	/* The taskpri to be programmed into apic to mask given ipl */
240 
241 #if defined(__amd64)
242 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
243 #endif
244 
245 /*
246  * Patchable global variables.
247  */
248 int	apic_forceload = 0;
249 
250 #define	INTR_ROUND_ROBIN_WITH_AFFINITY	0
251 #define	INTR_ROUND_ROBIN		1
252 #define	INTR_LOWEST_PRIORITY		2
253 
254 int	apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY;
255 
256 static int	apic_next_bind_cpu = 1; /* For round robin assignment */
257 					/* start with cpu 1 */
258 
259 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
260 					/* 1 - use gettime() for performance */
261 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
262 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
263 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
264 int	apic_panic_on_nmi = 0;
265 int	apic_panic_on_apic_error = 0;
266 
267 int	apic_verbose = 0;
268 
269 /* Flag definitions for apic_verbose */
270 #define	APIC_VERBOSE_IOAPIC_FLAG		0x00000001
271 #define	APIC_VERBOSE_IRQ_FLAG			0x00000002
272 #define	APIC_VERBOSE_POWEROFF_FLAG		0x00000004
273 #define	APIC_VERBOSE_POWEROFF_PAUSE_FLAG	0x00000008
274 
275 
276 #define	APIC_VERBOSE_IOAPIC(fmt) \
277 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \
278 		cmn_err fmt;
279 
280 #define	APIC_VERBOSE_IRQ(fmt) \
281 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \
282 		cmn_err fmt;
283 
284 #define	APIC_VERBOSE_POWEROFF(fmt) \
285 	if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \
286 		prom_printf fmt;
287 
288 
289 /* Now the ones for Dynamic Interrupt distribution */
290 int	apic_enable_dynamic_migration = 0;
291 
292 /*
293  * If enabled, the distribution works as follows:
294  * On every interrupt entry, the current ipl for the CPU is set in cpu_info
295  * and the irq corresponding to the ipl is also set in the aci_current array.
296  * interrupt exit and setspl (due to soft interrupts) will cause the current
297  * ipl to be be changed. This is cache friendly as these frequently used
298  * paths write into a per cpu structure.
299  *
300  * Sampling is done by checking the structures for all CPUs and incrementing
301  * the busy field of the irq (if any) executing on each CPU and the busy field
302  * of the corresponding CPU.
303  * In periodic mode this is done on every clock interrupt.
304  * In one-shot mode, this is done thru a cyclic with an interval of
305  * apic_redistribute_sample_interval (default 10 milli sec).
306  *
307  * Every apic_sample_factor_redistribution times we sample, we do computations
308  * to decide which interrupt needs to be migrated (see comments
309  * before apic_intr_redistribute().
310  */
311 
312 /*
313  * Following 3 variables start as % and can be patched or set using an
314  * API to be defined in future. They will be scaled to
315  * sample_factor_redistribution which is in turn set to hertz+1 (in periodic
316  * mode), or 101 in one-shot mode to stagger it away from one sec processing
317  */
318 
319 int	apic_int_busy_mark = 60;
320 int	apic_int_free_mark = 20;
321 int	apic_diff_for_redistribution = 10;
322 
323 /* sampling interval for interrupt redistribution for dynamic migration */
324 int	apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */
325 
326 /*
327  * number of times we sample before deciding to redistribute interrupts
328  * for dynamic migration
329  */
330 int	apic_sample_factor_redistribution = 101;
331 
332 /* timeout for xlate_vector, mark_vector */
333 int	apic_revector_timeout = 16 * 10000; /* 160 millisec */
334 
335 int	apic_redist_cpu_skip = 0;
336 int	apic_num_imbalance = 0;
337 int	apic_num_rebind = 0;
338 
339 int	apic_nproc = 0;
340 int	apic_defconf = 0;
341 int	apic_irq_translate = 0;
342 int	apic_spec_rev = 0;
343 int	apic_imcrp = 0;
344 
345 int	apic_use_acpi = 1;	/* 1 = use ACPI, 0 = don't use ACPI */
346 int	apic_use_acpi_madt_only = 0;	/* 1=ONLY use MADT from ACPI */
347 
348 /*
349  * For interrupt link devices, if apic_unconditional_srs is set, an irq resource
350  * will be assigned (via _SRS). If it is not set, use the current
351  * irq setting (via _CRS), but only if that irq is in the set of possible
352  * irqs (returned by _PRS) for the device.
353  */
354 int	apic_unconditional_srs = 1;
355 
356 /*
357  * For interrupt link devices, if apic_prefer_crs is set when we are
358  * assigning an IRQ resource to a device, prefer the current IRQ setting
359  * over other possible irq settings under same conditions.
360  */
361 
362 int	apic_prefer_crs = 1;
363 
364 
365 /* minimum number of timer ticks to program to */
366 int apic_min_timer_ticks = 1;
367 /*
368  *	Local static data
369  */
370 static struct	psm_ops apic_ops = {
371 	apic_probe,
372 
373 	apic_init,
374 	apic_picinit,
375 	apic_intr_enter,
376 	apic_intr_exit,
377 	apic_setspl,
378 	apic_addspl,
379 	apic_delspl,
380 	apic_disable_intr,
381 	apic_enable_intr,
382 	apic_softlvl_to_irq,
383 	apic_set_softintr,
384 
385 	apic_set_idlecpu,
386 	apic_unset_idlecpu,
387 
388 	apic_clkinit,
389 	apic_getclkirq,
390 	(void (*)(void))NULL,		/* psm_hrtimeinit */
391 	apic_gethrtime,
392 
393 	apic_get_next_processorid,
394 	apic_cpu_start,
395 	apic_post_cpu_start,
396 	apic_shutdown,
397 	apic_get_ipivect,
398 	apic_send_ipi,
399 
400 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
401 	(int (*)(todinfo_t *))NULL,	/* psm_tod_get */
402 	(int (*)(todinfo_t *))NULL,	/* psm_tod_set */
403 	(void (*)(int, char *))NULL,	/* psm_notify_error */
404 	(void (*)(int))NULL,		/* psm_notify_func */
405 	apic_timer_reprogram,
406 	apic_timer_enable,
407 	apic_timer_disable,
408 	apic_post_cyclic_setup,
409 	apic_preshutdown,
410 	apic_intr_ops			/* Advanced DDI Interrupt framework */
411 };
412 
413 
414 static struct	psm_info apic_psm_info = {
415 	PSM_INFO_VER01_5,			/* version */
416 	PSM_OWN_EXCLUSIVE,			/* ownership */
417 	(struct psm_ops *)&apic_ops,		/* operation */
418 	"pcplusmp",				/* machine name */
419 	"pcplusmp v1.4 compatible %I%",
420 };
421 
422 static void *apic_hdlp;
423 
424 #ifdef DEBUG
425 #define	DENT		0x0001
426 int	apic_debug = 0;
427 /*
428  * set apic_restrict_vector to the # of vectors we want to allow per range
429  * useful in testing shared interrupt logic by setting it to 2 or 3
430  */
431 int	apic_restrict_vector = 0;
432 
433 #define	APIC_DEBUG_MSGBUFSIZE	2048
434 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
435 int	apic_debug_msgbufindex = 0;
436 
437 /*
438  * Put "int" info into debug buffer. No MP consistency, but light weight.
439  * Good enough for most debugging.
440  */
441 #define	APIC_DEBUG_BUF_PUT(x) \
442 	apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \
443 	if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \
444 		apic_debug_msgbufindex = 0;
445 
446 #endif /* DEBUG */
447 
448 apic_cpus_info_t	*apic_cpus;
449 
450 static cpuset_t	apic_cpumask;
451 static uint_t	apic_flag;
452 
453 /* Flag to indicate that we need to shut down all processors */
454 static uint_t	apic_shutdown_processors;
455 
456 uint_t apic_nsec_per_intr = 0;
457 
458 /*
459  * apic_let_idle_redistribute can have the following values:
460  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
461  * apic_redistribute_lock prevents multiple idle cpus from redistributing
462  */
463 int	apic_num_idle_redistributions = 0;
464 static	int apic_let_idle_redistribute = 0;
465 static	uint_t apic_nticks = 0;
466 static	uint_t apic_skipped_redistribute = 0;
467 
468 /* to gather intr data and redistribute */
469 static void apic_redistribute_compute(void);
470 
471 static	uint_t last_count_read = 0;
472 static	lock_t	apic_gethrtime_lock;
473 volatile int	apic_hrtime_stamp = 0;
474 volatile hrtime_t apic_nsec_since_boot = 0;
475 static uint_t apic_hertz_count;
476 
477 uint64_t apic_ticks_per_SFnsecs;	/* # of ticks in SF nsecs */
478 
479 static hrtime_t apic_nsec_max;
480 
481 static	hrtime_t	apic_last_hrtime = 0;
482 int		apic_hrtime_error = 0;
483 int		apic_remote_hrterr = 0;
484 int		apic_num_nmis = 0;
485 int		apic_apic_error = 0;
486 int		apic_num_apic_errors = 0;
487 int		apic_num_cksum_errors = 0;
488 
489 static	uchar_t	apic_io_id[MAX_IO_APIC];
490 static	uchar_t	apic_io_ver[MAX_IO_APIC];
491 static	uchar_t	apic_io_vectbase[MAX_IO_APIC];
492 static	uchar_t	apic_io_vectend[MAX_IO_APIC];
493 volatile int32_t *apicioadr[MAX_IO_APIC];
494 
495 /*
496  * First available slot to be used as IRQ index into the apic_irq_table
497  * for those interrupts (like MSI/X) that don't have a physical IRQ.
498  */
499 int apic_first_avail_irq  = APIC_FIRST_FREE_IRQ;
500 
501 /*
502  * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound
503  * and bound elements of cpus_info and the temp_cpu element of irq_struct
504  */
505 lock_t	apic_ioapic_lock;
506 
507 /*
508  * apic_ioapic_reprogram_lock prevents a CPU from exiting
509  * apic_intr_exit before IOAPIC reprogramming information
510  * is collected.
511  */
512 static	lock_t	apic_ioapic_reprogram_lock;
513 static	int	apic_io_max = 0;	/* no. of i/o apics enabled */
514 
515 static	struct apic_io_intr *apic_io_intrp = 0;
516 static	struct apic_bus	*apic_busp;
517 
518 uchar_t	apic_vector_to_irq[APIC_MAX_VECTOR+1];
519 static	uchar_t	apic_resv_vector[MAXIPL+1];
520 
521 static	char	apic_level_intr[APIC_MAX_VECTOR+1];
522 static	int	apic_error = 0;
523 /* values which apic_error can take. Not catastrophic, but may help debug */
524 #define	APIC_ERR_BOOT_EOI		0x1
525 #define	APIC_ERR_GET_IPIVECT_FAIL	0x2
526 #define	APIC_ERR_INVALID_INDEX		0x4
527 #define	APIC_ERR_MARK_VECTOR_FAIL	0x8
528 #define	APIC_ERR_APIC_ERROR		0x40000000
529 #define	APIC_ERR_NMI			0x80000000
530 
531 static	int	apic_cmos_ssb_set = 0;
532 
533 static	uint32_t	eisa_level_intr_mask = 0;
534 	/* At least MSB will be set if EISA bus */
535 
536 static	int	apic_pci_bus_total = 0;
537 static	uchar_t	apic_single_pci_busid = 0;
538 
539 
540 /*
541  * airq_mutex protects additions to the apic_irq_table - the first
542  * pointer and any airq_nexts off of that one. It also protects
543  * apic_max_device_irq & apic_min_device_irq. It also guarantees
544  * that share_id is unique as new ids are generated only when new
545  * irq_t structs are linked in. Once linked in the structs are never
546  * deleted. temp_cpu & mps_intr_index field indicate if it is programmed
547  * or allocated. Note that there is a slight gap between allocating in
548  * apic_introp_xlate and programming in addspl.
549  */
550 kmutex_t	airq_mutex;
551 apic_irq_t	*apic_irq_table[APIC_MAX_VECTOR+1];
552 int		apic_max_device_irq = 0;
553 int		apic_min_device_irq = APIC_MAX_VECTOR;
554 
555 /* use to make sure only one cpu handles the nmi */
556 static	lock_t	apic_nmi_lock;
557 /* use to make sure only one cpu handles the error interrupt */
558 static	lock_t	apic_error_lock;
559 
560 /*
561  * Following declarations are for revectoring; used when ISRs at different
562  * IPLs share an irq.
563  */
564 static	lock_t	apic_revector_lock;
565 static	int	apic_revector_pending = 0;
566 static	uchar_t	*apic_oldvec_to_newvec;
567 static	uchar_t	*apic_newvec_to_oldvec;
568 
569 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */
570 static	kmutex_t	apic_reprogram_timeout_mutex;
571 
572 static	struct	ioapic_reprogram_data {
573 	int		valid;	 /* This entry is valid */
574 	int		bindcpu; /* The CPU to which the int will be bound */
575 	unsigned	timeouts; /* # times the reprogram timeout was called */
576 } apic_reprogram_info[APIC_MAX_VECTOR+1];
577 /*
578  * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info
579  * is indexed by IRQ number, NOT by vector number.
580  */
581 
582 
583 /*
584  * The following added to identify a software poweroff method if available.
585  */
586 
587 static struct {
588 	int	poweroff_method;
589 	char	oem_id[APIC_MPS_OEM_ID_LEN + 1];	/* MAX + 1 for NULL */
590 	char	prod_id[APIC_MPS_PROD_ID_LEN + 1];	/* MAX + 1 for NULL */
591 } apic_mps_ids[] = {
592 	{ APIC_POWEROFF_VIA_RTC,	"INTEL",	"ALDER" },   /* 4300 */
593 	{ APIC_POWEROFF_VIA_RTC,	"NCR",		"AMC" },    /* 4300 */
594 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"A450NX" },  /* 4400? */
595 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"AD450NX" }, /* 4400 */
596 	{ APIC_POWEROFF_VIA_ASPEN_BMC,	"INTEL",	"AC450NX" }, /* 4400R */
597 	{ APIC_POWEROFF_VIA_SITKA_BMC,	"INTEL",	"S450NX" },  /* S50  */
598 	{ APIC_POWEROFF_VIA_SITKA_BMC,	"INTEL",	"SC450NX" }  /* S50? */
599 };
600 
601 int	apic_poweroff_method = APIC_POWEROFF_NONE;
602 
603 static	struct {
604 	uchar_t	cntl;
605 	uchar_t	data;
606 } aspen_bmc[] = {
607 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
608 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
609 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
610 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
611 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
612 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
613 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
614 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
615 
616 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
617 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
618 };
619 
620 static	struct {
621 	int	port;
622 	uchar_t	data;
623 } sitka_bmc[] = {
624 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
625 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
626 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
627 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
628 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
629 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
630 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
631 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
632 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
633 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
634 
635 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
636 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
637 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
638 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
639 };
640 
641 
642 /* Patchable global variables. */
643 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
644 int		apic_debug_mps_id = 0;		/* 1 - print MPS ID strings */
645 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
646 
647 /*
648  * ACPI definitions
649  */
650 /* _PIC method arguments */
651 #define	ACPI_PIC_MODE	0
652 #define	ACPI_APIC_MODE	1
653 
654 /* APIC error flags we care about */
655 #define	APIC_SEND_CS_ERROR	0x01
656 #define	APIC_RECV_CS_ERROR	0x02
657 #define	APIC_CS_ERRORS		(APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR)
658 
659 /*
660  * ACPI variables
661  */
662 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */
663 static	int apic_enable_acpi = 0;
664 
665 /* ACPI Multiple APIC Description Table ptr */
666 static	MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL;
667 
668 /* ACPI Interrupt Source Override Structure ptr */
669 static	MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL;
670 static	int acpi_iso_cnt = 0;
671 
672 /* ACPI Non-maskable Interrupt Sources ptr */
673 static	MADT_NMI_SOURCE *acpi_nmi_sp = NULL;
674 static	int acpi_nmi_scnt = 0;
675 static	MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL;
676 static	int acpi_nmi_ccnt = 0;
677 
678 /*
679  * extern declarations
680  */
681 extern	int	intr_clear(void);
682 extern	void	intr_restore(uint_t);
683 #if defined(__amd64)
684 extern	int	intpri_use_cr8;
685 #endif	/* __amd64 */
686 
687 extern int	apic_pci_msi_enable_vector(dev_info_t *, int, int,
688 		    int, int, int);
689 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int);
690 extern int	apic_pci_msi_unconfigure(dev_info_t *, int, int);
691 extern int	apic_pci_msi_disable_mode(dev_info_t *, int, int);
692 extern int	apic_pci_msi_enable_mode(dev_info_t *, int, int);
693 
694 /*
695  *	This is the loadable module wrapper
696  */
697 
698 int
699 _init(void)
700 {
701 	if (apic_coarse_hrtime)
702 		apic_ops.psm_gethrtime = &apic_gettime;
703 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
704 }
705 
706 int
707 _fini(void)
708 {
709 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
710 }
711 
712 int
713 _info(struct modinfo *modinfop)
714 {
715 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
716 }
717 
718 /*
719  * Auto-configuration routines
720  */
721 
722 /*
723  * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here
724  * May work with 1.1 - but not guaranteed.
725  * According to the MP Spec, the MP floating pointer structure
726  * will be searched in the order described below:
727  * 1. In the first kilobyte of Extended BIOS Data Area (EBDA)
728  * 2. Within the last kilobyte of system base memory
729  * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh
730  * Once we find the right signature with proper checksum, we call
731  * either handle_defconf or parse_mpct to get all info necessary for
732  * subsequent operations.
733  */
734 static int
735 apic_probe()
736 {
737 	uint32_t mpct_addr, ebda_start = 0, base_mem_end;
738 	caddr_t	biosdatap;
739 	caddr_t	mpct;
740 	caddr_t	fptr;
741 	int	i, mpct_size, mapsize, retval = PSM_FAILURE;
742 	ushort_t	ebda_seg, base_mem_size;
743 	struct	apic_mpfps_hdr	*fpsp;
744 	struct	apic_mp_cnf_hdr	*hdrp;
745 	int bypass_cpu_and_ioapics_in_mptables;
746 	int acpi_user_options;
747 
748 	if (apic_forceload < 0)
749 		return (retval);
750 
751 	/* Allow override for MADT-only mode */
752 	acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0,
753 	    "acpi-user-options", 0);
754 	apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0);
755 
756 	/* Allow apic_use_acpi to override MADT-only mode */
757 	if (!apic_use_acpi)
758 		apic_use_acpi_madt_only = 0;
759 
760 	retval = acpi_probe();
761 
762 	/*
763 	 * mapin the bios data area 40:0
764 	 * 40:13h - two-byte location reports the base memory size
765 	 * 40:0Eh - two-byte location for the exact starting address of
766 	 *	    the EBDA segment for EISA
767 	 */
768 	biosdatap = psm_map_phys(0x400, 0x20, PROT_READ);
769 	if (!biosdatap)
770 		return (retval);
771 	fpsp = (struct apic_mpfps_hdr *)NULL;
772 	mapsize = MPFPS_RAM_WIN_LEN;
773 	/*LINTED: pointer cast may result in improper alignment */
774 	ebda_seg = *((ushort_t *)(biosdatap+0xe));
775 	/* check the 1k of EBDA */
776 	if (ebda_seg) {
777 		ebda_start = ((uint32_t)ebda_seg) << 4;
778 		fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ);
779 		if (fptr) {
780 			if (!(fpsp =
781 			    apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN)))
782 				psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN);
783 		}
784 	}
785 	/* If not in EBDA, check the last k of system base memory */
786 	if (!fpsp) {
787 		/*LINTED: pointer cast may result in improper alignment */
788 		base_mem_size = *((ushort_t *)(biosdatap + 0x13));
789 
790 		if (base_mem_size > 512)
791 			base_mem_end = 639 * 1024;
792 		else
793 			base_mem_end = 511 * 1024;
794 		/* if ebda == last k of base mem, skip to check BIOS ROM */
795 		if (base_mem_end != ebda_start) {
796 
797 			fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN,
798 			    PROT_READ);
799 
800 			if (fptr) {
801 				if (!(fpsp = apic_find_fps_sig(fptr,
802 				    MPFPS_RAM_WIN_LEN)))
803 					psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN);
804 			}
805 		}
806 	}
807 	psm_unmap_phys(biosdatap, 0x20);
808 
809 	/* If still cannot find it, check the BIOS ROM space */
810 	if (!fpsp) {
811 		mapsize = MPFPS_ROM_WIN_LEN;
812 		fptr = psm_map_phys(MPFPS_ROM_WIN_START,
813 		    MPFPS_ROM_WIN_LEN, PROT_READ);
814 		if (fptr) {
815 			if (!(fpsp =
816 			    apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) {
817 				psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
818 				return (retval);
819 			}
820 		}
821 	}
822 
823 	if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) {
824 		psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
825 		return (retval);
826 	}
827 
828 	apic_spec_rev = fpsp->mpfps_spec_rev;
829 	if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) {
830 		psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN);
831 		return (retval);
832 	}
833 
834 	/* check IMCR is present or not */
835 	apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP;
836 
837 	/* check default configuration (dual CPUs) */
838 	if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) {
839 		psm_unmap_phys(fptr, mapsize);
840 		return (apic_handle_defconf());
841 	}
842 
843 	/* MP Configuration Table */
844 	mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr);
845 
846 	psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */
847 
848 	/*
849 	 * Map in enough memory for the MP Configuration Table Header.
850 	 * Use this table to read the total length of the BIOS data and
851 	 * map in all the info
852 	 */
853 	/*LINTED: pointer cast may result in improper alignment */
854 	hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr,
855 	    sizeof (struct apic_mp_cnf_hdr), PROT_READ);
856 	if (!hdrp)
857 		return (retval);
858 
859 	/* check mp configuration table signature PCMP */
860 	if (hdrp->mpcnf_sig != 0x504d4350) {
861 		psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
862 		return (retval);
863 	}
864 	mpct_size = (int)hdrp->mpcnf_tbl_length;
865 
866 	apic_set_pwroff_method_from_mpcnfhdr(hdrp);
867 
868 	psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr));
869 
870 	if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) {
871 		/* This is an ACPI machine No need for further checks */
872 		return (retval);
873 	}
874 
875 	/*
876 	 * Map in the entries for this machine, ie. Processor
877 	 * Entry Tables, Bus Entry Tables, etc.
878 	 * They are in fixed order following one another
879 	 */
880 	mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ);
881 	if (!mpct)
882 		return (retval);
883 
884 	if (apic_checksum(mpct, mpct_size) != 0)
885 		goto apic_fail1;
886 
887 
888 	/*LINTED: pointer cast may result in improper alignment */
889 	hdrp = (struct apic_mp_cnf_hdr *)mpct;
890 	/*LINTED: pointer cast may result in improper alignment */
891 	apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic,
892 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
893 	if (!apicadr)
894 		goto apic_fail1;
895 
896 	/* Parse all information in the tables */
897 	bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS);
898 	if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) ==
899 	    PSM_SUCCESS)
900 		return (PSM_SUCCESS);
901 
902 	for (i = 0; i < apic_io_max; i++)
903 		psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
904 	if (apic_cpus)
905 		kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc);
906 	if (apicadr)
907 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
908 apic_fail1:
909 	psm_unmap_phys(mpct, mpct_size);
910 	return (retval);
911 }
912 
913 static void
914 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp)
915 {
916 	int	i;
917 
918 	for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0]));
919 	    i++) {
920 		if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id,
921 		    strlen(apic_mps_ids[i].oem_id)) == 0) &&
922 		    (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id,
923 		    strlen(apic_mps_ids[i].prod_id)) == 0)) {
924 
925 			apic_poweroff_method = apic_mps_ids[i].poweroff_method;
926 			break;
927 		}
928 	}
929 
930 	if (apic_debug_mps_id != 0) {
931 		cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'"
932 		    "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n",
933 		    hdrp->mpcnf_oem_str[0],
934 		    hdrp->mpcnf_oem_str[1],
935 		    hdrp->mpcnf_oem_str[2],
936 		    hdrp->mpcnf_oem_str[3],
937 		    hdrp->mpcnf_oem_str[4],
938 		    hdrp->mpcnf_oem_str[5],
939 		    hdrp->mpcnf_oem_str[6],
940 		    hdrp->mpcnf_oem_str[7],
941 		    hdrp->mpcnf_prod_str[0],
942 		    hdrp->mpcnf_prod_str[1],
943 		    hdrp->mpcnf_prod_str[2],
944 		    hdrp->mpcnf_prod_str[3],
945 		    hdrp->mpcnf_prod_str[4],
946 		    hdrp->mpcnf_prod_str[5],
947 		    hdrp->mpcnf_prod_str[6],
948 		    hdrp->mpcnf_prod_str[7],
949 		    hdrp->mpcnf_prod_str[8],
950 		    hdrp->mpcnf_prod_str[9],
951 		    hdrp->mpcnf_prod_str[10],
952 		    hdrp->mpcnf_prod_str[11]);
953 	}
954 }
955 
956 static int
957 acpi_probe(void)
958 {
959 	int			i, id, intmax, ver, index, rv;
960 	int			acpi_verboseflags = 0;
961 	int			madt_seen, madt_size;
962 	APIC_HEADER		*ap;
963 	MADT_PROCESSOR_APIC	*mpa;
964 	MADT_IO_APIC		*mia;
965 	MADT_IO_SAPIC		*misa;
966 	MADT_INTERRUPT_OVERRIDE	*mio;
967 	MADT_NMI_SOURCE		*mns;
968 	MADT_INTERRUPT_SOURCE	*mis;
969 	MADT_LOCAL_APIC_NMI	*mlan;
970 	MADT_ADDRESS_OVERRIDE	*mao;
971 	ACPI_OBJECT_LIST 	arglist;
972 	ACPI_OBJECT		arg;
973 	int			sci;
974 	iflag_t			sci_flags;
975 	volatile int32_t	*ioapic;
976 	char			local_ids[NCPU];
977 	char			proc_ids[NCPU];
978 	uchar_t			hid;
979 
980 	if (!apic_use_acpi)
981 		return (PSM_FAILURE);
982 
983 	if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING,
984 	    (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK)
985 		return (PSM_FAILURE);
986 
987 	apicadr = (uint32_t *)psm_map_phys(
988 	    (uint32_t)acpi_mapic_dtp->LocalApicAddress,
989 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
990 	if (!apicadr)
991 		return (PSM_FAILURE);
992 
993 	id = apicadr[APIC_LID_REG];
994 	local_ids[0] = (uchar_t)(((uint_t)id) >> 24);
995 	apic_nproc = index = 1;
996 	CPUSET_ONLY(apic_cpumask, 0);
997 	apic_io_max = 0;
998 
999 	ap = (APIC_HEADER *) (acpi_mapic_dtp + 1);
1000 	madt_size = acpi_mapic_dtp->Length;
1001 	madt_seen = sizeof (*acpi_mapic_dtp);
1002 
1003 	while (madt_seen < madt_size) {
1004 		switch (ap->Type) {
1005 		case APIC_PROCESSOR:
1006 			mpa = (MADT_PROCESSOR_APIC *) ap;
1007 			if (mpa->ProcessorEnabled) {
1008 				if (mpa->LocalApicId == local_ids[0])
1009 					proc_ids[0] = mpa->ProcessorId;
1010 				else if (apic_nproc < NCPU) {
1011 					local_ids[index] = mpa->LocalApicId;
1012 					proc_ids[index] = mpa->ProcessorId;
1013 					CPUSET_ADD(apic_cpumask, index);
1014 					index++;
1015 					apic_nproc++;
1016 				} else
1017 					cmn_err(CE_WARN, "pcplusmp: exceeded "
1018 					    "maximum no. of CPUs (= %d)", NCPU);
1019 			}
1020 			break;
1021 
1022 		case APIC_IO:
1023 			mia = (MADT_IO_APIC *) ap;
1024 			if (apic_io_max < MAX_IO_APIC) {
1025 				apic_io_id[apic_io_max] = mia->IoApicId;
1026 				apic_io_vectbase[apic_io_max] =
1027 				    mia->Interrupt;
1028 				ioapic = apicioadr[apic_io_max] =
1029 				    (int32_t *)psm_map_phys(
1030 				    (uint32_t)mia->Address,
1031 				    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1032 				if (!ioapic)
1033 					goto cleanup;
1034 				apic_io_max++;
1035 			}
1036 			break;
1037 
1038 		case APIC_XRUPT_OVERRIDE:
1039 			mio = (MADT_INTERRUPT_OVERRIDE *) ap;
1040 			if (acpi_isop == NULL)
1041 				acpi_isop = mio;
1042 			acpi_iso_cnt++;
1043 			break;
1044 
1045 		case APIC_NMI:
1046 			/* UNIMPLEMENTED */
1047 			mns = (MADT_NMI_SOURCE *) ap;
1048 			if (acpi_nmi_sp == NULL)
1049 				acpi_nmi_sp = mns;
1050 			acpi_nmi_scnt++;
1051 
1052 			cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n",
1053 				mns->Interrupt, mns->Polarity,
1054 				mns->TriggerMode);
1055 			break;
1056 
1057 		case APIC_LOCAL_NMI:
1058 			/* UNIMPLEMENTED */
1059 			mlan = (MADT_LOCAL_APIC_NMI *) ap;
1060 			if (acpi_nmi_cp == NULL)
1061 				acpi_nmi_cp = mlan;
1062 			acpi_nmi_ccnt++;
1063 
1064 			cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n",
1065 				mlan->ProcessorId, mlan->Polarity,
1066 				mlan->TriggerMode, mlan->Lint);
1067 			break;
1068 
1069 		case APIC_ADDRESS_OVERRIDE:
1070 			/* UNIMPLEMENTED */
1071 			mao = (MADT_ADDRESS_OVERRIDE *) ap;
1072 			cmn_err(CE_NOTE, "!apic: address override: %lx\n",
1073 				(long)mao->Address);
1074 			break;
1075 
1076 		case APIC_IO_SAPIC:
1077 			/* UNIMPLEMENTED */
1078 			misa = (MADT_IO_SAPIC *) ap;
1079 
1080 			cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n",
1081 				misa->IoSapicId, misa->InterruptBase,
1082 				(long)misa->Address);
1083 			break;
1084 
1085 		case APIC_XRUPT_SOURCE:
1086 			/* UNIMPLEMENTED */
1087 			mis = (MADT_INTERRUPT_SOURCE *) ap;
1088 
1089 			cmn_err(CE_NOTE,
1090 				"!apic: irq source: %d %d %d %d %d %d %d\n",
1091 				mis->ProcessorId, mis->ProcessorEid,
1092 				mis->Interrupt, mis->Polarity,
1093 				mis->TriggerMode, mis->InterruptType,
1094 				mis->IoSapicVector);
1095 			break;
1096 		case APIC_RESERVED:
1097 		default:
1098 			break;	/* ignore unknown items as per ACPI spec */
1099 		}
1100 
1101 		/* advance to next entry */
1102 		madt_seen += ap->Length;
1103 		ap = (APIC_HEADER *)(((char *)ap) + ap->Length);
1104 	}
1105 
1106 	if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc,
1107 	    KM_NOSLEEP)) == NULL)
1108 		goto cleanup;
1109 
1110 	/*
1111 	 * ACPI doesn't provide the local apic ver, get it directly from the
1112 	 * local apic
1113 	 */
1114 	ver = apicadr[APIC_VERS_REG];
1115 	for (i = 0; i < apic_nproc; i++) {
1116 		apic_cpus[i].aci_local_id = local_ids[i];
1117 		apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
1118 	}
1119 	for (i = 0; i < apic_io_max; i++) {
1120 		ioapic = apicioadr[i];
1121 
1122 		/*
1123 		 * need to check Sitka on the following acpi problem
1124 		 * On the Sitka, the ioapic's apic_id field isn't reporting
1125 		 * the actual io apic id. We have reported this problem
1126 		 * to Intel. Until they fix the problem, we will get the
1127 		 * actual id directly from the ioapic.
1128 		 */
1129 		ioapic[APIC_IO_REG] = APIC_ID_CMD;
1130 		id = ioapic[APIC_IO_DATA];
1131 		hid = (uchar_t)(((uint_t)id) >> 24);
1132 
1133 		if (hid != apic_io_id[i]) {
1134 			if (apic_io_id[i] == 0)
1135 				apic_io_id[i] = hid;
1136 			else { /* set ioapic id to whatever reported by ACPI */
1137 				id = ((int32_t)apic_io_id[i]) << 24;
1138 				ioapic[APIC_IO_REG] = APIC_ID_CMD;
1139 				ioapic[APIC_IO_DATA] = id;
1140 			}
1141 		}
1142 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
1143 		ver = ioapic[APIC_IO_DATA];
1144 		apic_io_ver[i] = (uchar_t)(ver & 0xff);
1145 		intmax = (ver >> 16) & 0xff;
1146 		apic_io_vectend[i] = apic_io_vectbase[i] + intmax;
1147 		if (apic_first_avail_irq <= apic_io_vectend[i])
1148 			apic_first_avail_irq = apic_io_vectend[i] + 1;
1149 	}
1150 
1151 
1152 	/*
1153 	 * Process SCI configuration here
1154 	 * An error may be returned here if
1155 	 * acpi-user-options specifies legacy mode
1156 	 * (no SCI, no ACPI mode)
1157 	 */
1158 	if (acpica_get_sci(&sci, &sci_flags) != AE_OK)
1159 		sci = -1;
1160 
1161 	/*
1162 	 * Now call acpi_init() to generate namespaces
1163 	 * If this fails, we don't attempt to use ACPI
1164 	 * even if we were able to get a MADT above
1165 	 */
1166 	if (acpica_init() != AE_OK)
1167 		goto cleanup;
1168 
1169 	/*
1170 	 * Squirrel away the SCI and flags for later on
1171 	 * in apic_picinit() when we're ready
1172 	 */
1173 	apic_sci_vect = sci;
1174 	apic_sci_flags = sci_flags;
1175 
1176 	if (apic_verbose & APIC_VERBOSE_IRQ_FLAG)
1177 		acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG;
1178 
1179 	if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG)
1180 		acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG;
1181 
1182 	if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG)
1183 		acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG;
1184 
1185 	if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) ==
1186 	    ACPI_PSM_FAILURE)
1187 		goto cleanup;
1188 
1189 	/* Enable ACPI APIC interrupt routing */
1190 	arglist.Count = 1;
1191 	arglist.Pointer = &arg;
1192 	arg.Type = ACPI_TYPE_INTEGER;
1193 	arg.Integer.Value = ACPI_APIC_MODE;	/* 1 */
1194 	rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL);
1195 	if (rv == AE_OK) {
1196 		build_reserved_irqlist((uchar_t *)apic_reserved_irqlist);
1197 		apic_enable_acpi = 1;
1198 		if (apic_use_acpi_madt_only) {
1199 			cmn_err(CE_CONT,
1200 			    "?Using ACPI for CPU/IOAPIC information ONLY\n");
1201 		}
1202 		return (PSM_SUCCESS);
1203 	}
1204 	/* if setting APIC mode failed above, we fall through to cleanup */
1205 
1206 cleanup:
1207 	if (apicadr != NULL) {
1208 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
1209 		apicadr = NULL;
1210 	}
1211 	apic_nproc = 0;
1212 	for (i = 0; i < apic_io_max; i++) {
1213 		psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
1214 		apicioadr[i] = NULL;
1215 	}
1216 	apic_io_max = 0;
1217 	acpi_isop = NULL;
1218 	acpi_iso_cnt = 0;
1219 	acpi_nmi_sp = NULL;
1220 	acpi_nmi_scnt = 0;
1221 	acpi_nmi_cp = NULL;
1222 	acpi_nmi_ccnt = 0;
1223 	return (PSM_FAILURE);
1224 }
1225 
1226 /*
1227  * Handle default configuration. Fill in reqd global variables & tables
1228  * Fill all details as MP table does not give any more info
1229  */
1230 static int
1231 apic_handle_defconf()
1232 {
1233 	uint_t	lid;
1234 
1235 	/*LINTED: pointer cast may result in improper alignment */
1236 	apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR,
1237 	    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1238 	/*LINTED: pointer cast may result in improper alignment */
1239 	apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR,
1240 	    APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE);
1241 	apic_cpus = (apic_cpus_info_t *)
1242 	    kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP);
1243 	if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus))
1244 		goto apic_handle_defconf_fail;
1245 	CPUSET_ONLY(apic_cpumask, 0);
1246 	CPUSET_ADD(apic_cpumask, 1);
1247 	apic_nproc = 2;
1248 	lid = apicadr[APIC_LID_REG];
1249 	apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET);
1250 	/*
1251 	 * According to the PC+MP spec 1.1, the local ids
1252 	 * for the default configuration has to be 0 or 1
1253 	 */
1254 	if (apic_cpus[0].aci_local_id == 1)
1255 		apic_cpus[1].aci_local_id = 0;
1256 	else if (apic_cpus[0].aci_local_id == 0)
1257 		apic_cpus[1].aci_local_id = 1;
1258 	else
1259 		goto apic_handle_defconf_fail;
1260 
1261 	apic_io_id[0] = 2;
1262 	apic_io_max = 1;
1263 	if (apic_defconf >= 5) {
1264 		apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS;
1265 		apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS;
1266 		apic_io_ver[0] = APIC_INTEGRATED_VERS;
1267 	} else {
1268 		apic_cpus[0].aci_local_ver = 0;		/* 82489 DX */
1269 		apic_cpus[1].aci_local_ver = 0;
1270 		apic_io_ver[0] = 0;
1271 	}
1272 	if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6)
1273 		eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) |
1274 		    inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1);
1275 	return (PSM_SUCCESS);
1276 
1277 apic_handle_defconf_fail:
1278 	if (apic_cpus)
1279 		kmem_free(apic_cpus, sizeof (*apic_cpus) * 2);
1280 	if (apicadr)
1281 		psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
1282 	if (apicioadr[0])
1283 		psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN);
1284 	return (PSM_FAILURE);
1285 }
1286 
1287 /* Parse the entries in MP configuration table and collect info that we need */
1288 static int
1289 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
1290 {
1291 	struct	apic_procent	*procp;
1292 	struct	apic_bus	*busp;
1293 	struct	apic_io_entry	*ioapicp;
1294 	struct	apic_io_intr	*intrp;
1295 	volatile int32_t	*ioapic;
1296 	uint_t	lid;
1297 	int	id;
1298 	uchar_t hid;
1299 
1300 	/*LINTED: pointer cast may result in improper alignment */
1301 	procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
1302 
1303 	/* No need to count cpu entries if we won't use them */
1304 	if (!bypass_cpus_and_ioapics) {
1305 
1306 		/* Find max # of CPUS and allocate structure accordingly */
1307 		apic_nproc = 0;
1308 		CPUSET_ZERO(apic_cpumask);
1309 		while (procp->proc_entry == APIC_CPU_ENTRY) {
1310 			if (procp->proc_cpuflags & CPUFLAGS_EN) {
1311 				if (apic_nproc < NCPU)
1312 					CPUSET_ADD(apic_cpumask, apic_nproc);
1313 				apic_nproc++;
1314 			}
1315 			procp++;
1316 		}
1317 		if (apic_nproc > NCPU)
1318 			cmn_err(CE_WARN, "pcplusmp: exceeded "
1319 			    "maximum no. of CPUs (= %d)", NCPU);
1320 		if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *)
1321 		    kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP)))
1322 			return (PSM_FAILURE);
1323 	}
1324 
1325 	/*LINTED: pointer cast may result in improper alignment */
1326 	procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr));
1327 
1328 	/*
1329 	 * start with index 1 as 0 needs to be filled in with Boot CPU, but
1330 	 * if we're bypassing this information, it has already been filled
1331 	 * in by acpi_probe(), so don't overwrite it.
1332 	 */
1333 	if (!bypass_cpus_and_ioapics)
1334 		apic_nproc = 1;
1335 
1336 	while (procp->proc_entry == APIC_CPU_ENTRY) {
1337 		/* check whether the cpu exists or not */
1338 		if (!bypass_cpus_and_ioapics &&
1339 		    procp->proc_cpuflags & CPUFLAGS_EN) {
1340 			if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */
1341 				lid = apicadr[APIC_LID_REG];
1342 				apic_cpus[0].aci_local_id = procp->proc_apicid;
1343 				if (apic_cpus[0].aci_local_id !=
1344 				    (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) {
1345 					return (PSM_FAILURE);
1346 				}
1347 				apic_cpus[0].aci_local_ver =
1348 				    procp->proc_version;
1349 			} else {
1350 
1351 				apic_cpus[apic_nproc].aci_local_id =
1352 				    procp->proc_apicid;
1353 				apic_cpus[apic_nproc].aci_local_ver =
1354 				    procp->proc_version;
1355 				apic_nproc++;
1356 
1357 			}
1358 		}
1359 		procp++;
1360 	}
1361 
1362 	/*
1363 	 * Save start of bus entries for later use.
1364 	 * Get EISA level cntrl if EISA bus is present.
1365 	 * Also get the CPI bus id for single CPI bus case
1366 	 */
1367 	apic_busp = busp = (struct apic_bus *)procp;
1368 	while (busp->bus_entry == APIC_BUS_ENTRY) {
1369 		lid = apic_find_bus_type((char *)&busp->bus_str1);
1370 		if (lid	== BUS_EISA) {
1371 			eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) |
1372 			    inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1);
1373 		} else if (lid == BUS_PCI) {
1374 			/*
1375 			 * apic_single_pci_busid will be used only if
1376 			 * apic_pic_bus_total is equal to 1
1377 			 */
1378 			apic_pci_bus_total++;
1379 			apic_single_pci_busid = busp->bus_id;
1380 		}
1381 		busp++;
1382 	}
1383 
1384 	ioapicp = (struct apic_io_entry *)busp;
1385 
1386 	if (!bypass_cpus_and_ioapics)
1387 		apic_io_max = 0;
1388 	do {
1389 		if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) {
1390 			if (ioapicp->io_flags & IOAPIC_FLAGS_EN) {
1391 				apic_io_id[apic_io_max] = ioapicp->io_apicid;
1392 				apic_io_ver[apic_io_max] = ioapicp->io_version;
1393 		/*LINTED: pointer cast may result in improper alignment */
1394 				apicioadr[apic_io_max] =
1395 				    (int32_t *)psm_map_phys(
1396 				    (uint32_t)ioapicp->io_apic_addr,
1397 				    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
1398 
1399 				if (!apicioadr[apic_io_max])
1400 					return (PSM_FAILURE);
1401 
1402 				ioapic = apicioadr[apic_io_max];
1403 				ioapic[APIC_IO_REG] = APIC_ID_CMD;
1404 				id = ioapic[APIC_IO_DATA];
1405 				hid = (uchar_t)(((uint_t)id) >> 24);
1406 
1407 				if (hid != apic_io_id[apic_io_max]) {
1408 					if (apic_io_id[apic_io_max] == 0)
1409 						apic_io_id[apic_io_max] = hid;
1410 					else {
1411 						/*
1412 						 * set ioapic id to whatever
1413 						 * reported by MPS
1414 						 *
1415 						 * may not need to set index
1416 						 * again ???
1417 						 * take it out and try
1418 						 */
1419 
1420 						id = ((int32_t)
1421 						    apic_io_id[apic_io_max]) <<
1422 						    24;
1423 
1424 						ioapic[APIC_IO_REG] =
1425 						    APIC_ID_CMD;
1426 
1427 						ioapic[APIC_IO_DATA] = id;
1428 
1429 					}
1430 				}
1431 				apic_io_max++;
1432 			}
1433 		}
1434 		ioapicp++;
1435 	} while (ioapicp->io_entry == APIC_IO_ENTRY);
1436 
1437 	apic_io_intrp = (struct apic_io_intr *)ioapicp;
1438 
1439 	intrp = apic_io_intrp;
1440 	while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
1441 		if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) ||
1442 		    (apic_find_bus(intrp->intr_busid) == BUS_PCI)) {
1443 			apic_irq_translate = 1;
1444 			break;
1445 		}
1446 		intrp++;
1447 	}
1448 
1449 	return (PSM_SUCCESS);
1450 }
1451 
1452 boolean_t
1453 apic_cpu_in_range(int cpu)
1454 {
1455 	return ((cpu & ~IRQ_USER_BOUND) < apic_nproc);
1456 }
1457 
1458 static struct apic_mpfps_hdr *
1459 apic_find_fps_sig(caddr_t cptr, int len)
1460 {
1461 	int	i;
1462 
1463 	/* Look for the pattern "_MP_" */
1464 	for (i = 0; i < len; i += 16) {
1465 		if ((*(cptr+i) == '_') &&
1466 		    (*(cptr+i+1) == 'M') &&
1467 		    (*(cptr+i+2) == 'P') &&
1468 		    (*(cptr+i+3) == '_'))
1469 		    /*LINTED: pointer cast may result in improper alignment */
1470 			return ((struct apic_mpfps_hdr *)(cptr + i));
1471 	}
1472 	return (NULL);
1473 }
1474 
1475 static int
1476 apic_checksum(caddr_t bptr, int len)
1477 {
1478 	int	i;
1479 	uchar_t	cksum;
1480 
1481 	cksum = 0;
1482 	for (i = 0; i < len; i++)
1483 		cksum += *bptr++;
1484 	return ((int)cksum);
1485 }
1486 
1487 
1488 /*
1489  * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable
1490  * are also set to NULL. vector->irq is set to a value which cannot map
1491  * to a real irq to show that it is free.
1492  */
1493 void
1494 apic_init()
1495 {
1496 	int	i;
1497 	int	*iptr;
1498 
1499 	int	j = 1;
1500 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
1501 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
1502 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
1503 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
1504 			/* get to highest vector at the same ipl */
1505 			continue;
1506 		for (; j <= apic_vectortoipl[i]; j++) {
1507 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
1508 			    APIC_BASE_VECT;
1509 		}
1510 	}
1511 	for (; j < MAXIPL + 1; j++)
1512 		/* fill up any empty ipltopri slots */
1513 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
1514 
1515 	/* cpu 0 is always up */
1516 	apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
1517 
1518 	iptr = (int *)&apic_irq_table[0];
1519 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
1520 		apic_level_intr[i] = 0;
1521 		*iptr++ = NULL;
1522 		apic_vector_to_irq[i] = APIC_RESV_IRQ;
1523 		apic_reprogram_info[i].valid = 0;
1524 		apic_reprogram_info[i].bindcpu = 0;
1525 		apic_reprogram_info[i].timeouts = 0;
1526 	}
1527 
1528 	/*
1529 	 * Allocate a dummy irq table entry for the reserved entry.
1530 	 * This takes care of the race between removing an irq and
1531 	 * clock detecting a CPU in that irq during interrupt load
1532 	 * sampling.
1533 	 */
1534 	apic_irq_table[APIC_RESV_IRQ] =
1535 	    kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
1536 
1537 	mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
1538 	mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL);
1539 #if defined(__amd64)
1540 	/*
1541 	 * Make cpu-specific interrupt info point to cr8pri vector
1542 	 */
1543 	for (i = 0; i <= MAXIPL; i++)
1544 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
1545 	CPU->cpu_pri_data = apic_cr8pri;
1546 	intpri_use_cr8 = 1;
1547 #endif	/* __amd64 */
1548 }
1549 
1550 /*
1551  * handler for APIC Error interrupt. Just print a warning and continue
1552  */
1553 static int
1554 apic_error_intr()
1555 {
1556 	uint_t	error0, error1, error;
1557 	uint_t	i;
1558 
1559 	/*
1560 	 * We need to write before read as per 7.4.17 of system prog manual.
1561 	 * We do both and or the results to be safe
1562 	 */
1563 	error0 = apicadr[APIC_ERROR_STATUS];
1564 	apicadr[APIC_ERROR_STATUS] = 0;
1565 	error1 = apicadr[APIC_ERROR_STATUS];
1566 	error = error0 | error1;
1567 
1568 	/*
1569 	 * Clear the APIC error status (do this on all cpus that enter here)
1570 	 * (two writes are required due to the semantics of accessing the
1571 	 * error status register.)
1572 	 */
1573 	apicadr[APIC_ERROR_STATUS] = 0;
1574 	apicadr[APIC_ERROR_STATUS] = 0;
1575 
1576 	/*
1577 	 * Prevent more than 1 CPU from handling error interrupt causing
1578 	 * double printing (interleave of characters from multiple
1579 	 * CPU's when using prom_printf)
1580 	 */
1581 	if (lock_try(&apic_error_lock) == 0)
1582 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
1583 	if (error) {
1584 #if	DEBUG
1585 		if (apic_debug)
1586 			debug_enter("pcplusmp: APIC Error interrupt received");
1587 #endif /* DEBUG */
1588 		if (apic_panic_on_apic_error)
1589 			cmn_err(CE_PANIC,
1590 			    "APIC Error interrupt on CPU %d. Status = %x\n",
1591 			    psm_get_cpu_id(), error);
1592 		else {
1593 			if ((error & ~APIC_CS_ERRORS) == 0) {
1594 				/* cksum error only */
1595 				apic_error |= APIC_ERR_APIC_ERROR;
1596 				apic_apic_error |= error;
1597 				apic_num_apic_errors++;
1598 				apic_num_cksum_errors++;
1599 			} else {
1600 				/*
1601 				 * prom_printf is the best shot we have of
1602 				 * something which is problem free from
1603 				 * high level/NMI type of interrupts
1604 				 */
1605 				prom_printf("APIC Error interrupt on CPU %d. "
1606 				    "Status 0 = %x, Status 1 = %x\n",
1607 				    psm_get_cpu_id(), error0, error1);
1608 				apic_error |= APIC_ERR_APIC_ERROR;
1609 				apic_apic_error |= error;
1610 				apic_num_apic_errors++;
1611 				for (i = 0; i < apic_error_display_delay; i++) {
1612 					tenmicrosec();
1613 				}
1614 				/*
1615 				 * provide more delay next time limited to
1616 				 * roughly 1 clock tick time
1617 				 */
1618 				if (apic_error_display_delay < 500)
1619 					apic_error_display_delay *= 2;
1620 			}
1621 		}
1622 		lock_clear(&apic_error_lock);
1623 		return (DDI_INTR_CLAIMED);
1624 	} else {
1625 		lock_clear(&apic_error_lock);
1626 		return (DDI_INTR_UNCLAIMED);
1627 	}
1628 	/* NOTREACHED */
1629 }
1630 
1631 /*
1632  * Turn off the mask bit in the performance counter Local Vector Table entry.
1633  */
1634 static void
1635 apic_cpcovf_mask_clear(void)
1636 {
1637 	apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK;
1638 }
1639 
1640 static void
1641 apic_init_intr()
1642 {
1643 	processorid_t	cpun = psm_get_cpu_id();
1644 
1645 #if defined(__amd64)
1646 	setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT));
1647 #else
1648 	apicadr[APIC_TASK_REG] = APIC_MASK_ALL;
1649 #endif
1650 
1651 	if (apic_flat_model)
1652 		apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL;
1653 	else
1654 		apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL;
1655 	apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun;
1656 
1657 	/* need to enable APIC before unmasking NMI */
1658 	apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR;
1659 
1660 	apicadr[APIC_LOCAL_TIMER] = AV_MASK;
1661 	apicadr[APIC_INT_VECT0]	= AV_MASK;	/* local intr reg 0 */
1662 	apicadr[APIC_INT_VECT1] = AV_NMI;	/* enable NMI */
1663 
1664 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS)
1665 		return;
1666 
1667 	/* Enable performance counter overflow interrupt */
1668 
1669 	if ((x86_feature & X86_MSR) != X86_MSR)
1670 		apic_enable_cpcovf_intr = 0;
1671 	if (apic_enable_cpcovf_intr) {
1672 		if (apic_cpcovf_vect == 0) {
1673 			int ipl = APIC_PCINT_IPL;
1674 			int irq = apic_get_ipivect(ipl, -1);
1675 
1676 			ASSERT(irq != -1);
1677 			apic_cpcovf_vect = apic_irq_table[irq]->airq_vector;
1678 			ASSERT(apic_cpcovf_vect);
1679 			(void) add_avintr(NULL, ipl,
1680 			    (avfunc)kcpc_hw_overflow_intr,
1681 			    "apic pcint", irq, NULL, NULL, NULL, NULL);
1682 			kcpc_hw_overflow_intr_installed = 1;
1683 			kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear;
1684 		}
1685 		apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect;
1686 	}
1687 
1688 	/* Enable error interrupt */
1689 
1690 	if (apic_enable_error_intr) {
1691 		if (apic_errvect == 0) {
1692 			int ipl = 0xf;	/* get highest priority intr */
1693 			int irq = apic_get_ipivect(ipl, -1);
1694 
1695 			ASSERT(irq != -1);
1696 			apic_errvect = apic_irq_table[irq]->airq_vector;
1697 			ASSERT(apic_errvect);
1698 			/*
1699 			 * Not PSMI compliant, but we are going to merge
1700 			 * with ON anyway
1701 			 */
1702 			(void) add_avintr((void *)NULL, ipl,
1703 			    (avfunc)apic_error_intr, "apic error intr",
1704 			    irq, NULL, NULL, NULL, NULL);
1705 		}
1706 		apicadr[APIC_ERR_VECT] = apic_errvect;
1707 		apicadr[APIC_ERROR_STATUS] = 0;
1708 		apicadr[APIC_ERROR_STATUS] = 0;
1709 	}
1710 }
1711 
1712 static void
1713 apic_disable_local_apic()
1714 {
1715 	apicadr[APIC_TASK_REG] = APIC_MASK_ALL;
1716 	apicadr[APIC_LOCAL_TIMER] = AV_MASK;
1717 	apicadr[APIC_INT_VECT0] = AV_MASK;	/* local intr reg 0 */
1718 	apicadr[APIC_INT_VECT1] = AV_MASK;	/* disable NMI */
1719 	apicadr[APIC_ERR_VECT] = AV_MASK;	/* and error interrupt */
1720 	apicadr[APIC_PCINT_VECT] = AV_MASK;	/* and perf counter intr */
1721 	apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR;
1722 }
1723 
1724 static void
1725 apic_picinit(void)
1726 {
1727 	int i, j;
1728 	uint_t isr;
1729 	volatile int32_t *ioapic;
1730 	apic_irq_t	*irqptr;
1731 	struct intrspec ispec;
1732 
1733 	/*
1734 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
1735 	 * bit on without clearing it with EOI.  Since softint
1736 	 * uses vector 0x20 to interrupt itself, so softint will
1737 	 * not work on this machine.  In order to fix this problem
1738 	 * a check is made to verify all the isr bits are clear.
1739 	 * If not, EOIs are issued to clear the bits.
1740 	 */
1741 	for (i = 7; i >= 1; i--) {
1742 		if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0)
1743 			for (j = 0; ((j < 32) && (isr != 0)); j++)
1744 				if (isr & (1 << j)) {
1745 					apicadr[APIC_EOI_REG] = 0;
1746 					isr &= ~(1 << j);
1747 					apic_error |= APIC_ERR_BOOT_EOI;
1748 				}
1749 	}
1750 
1751 	/* set a flag so we know we have run apic_picinit() */
1752 	apic_flag = 1;
1753 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
1754 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
1755 	LOCK_INIT_CLEAR(&apic_revector_lock);
1756 	LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock);
1757 	LOCK_INIT_CLEAR(&apic_error_lock);
1758 
1759 	picsetup();	 /* initialise the 8259 */
1760 
1761 	/* add nmi handler - least priority nmi handler */
1762 	LOCK_INIT_CLEAR(&apic_nmi_lock);
1763 
1764 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
1765 	    "pcplusmp NMI handler", (caddr_t)NULL))
1766 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
1767 
1768 	apic_init_intr();
1769 
1770 	/* enable apic mode if imcr present */
1771 	if (apic_imcrp) {
1772 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1773 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
1774 	}
1775 
1776 	/* mask interrupt vectors					*/
1777 	for (j = 0; j < apic_io_max; j++) {
1778 		int intin_max;
1779 		ioapic = apicioadr[j];
1780 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
1781 		/* Bits 23-16 define the maximum redirection entries */
1782 		intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff;
1783 		for (i = 0; i < intin_max; i++) {
1784 			ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i;
1785 			ioapic[APIC_IO_DATA] = AV_MASK;
1786 		}
1787 	}
1788 
1789 	/*
1790 	 * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1791 	 */
1792 	if (apic_sci_vect > 0) {
1793 		/*
1794 		 * acpica has already done add_avintr(); we just
1795 		 * to finish the job by mimicing translate_irq()
1796 		 *
1797 		 * Fake up an intrspec and setup the tables
1798 		 */
1799 		ispec.intrspec_vec = apic_sci_vect;
1800 		ispec.intrspec_pri = SCI_IPL;
1801 
1802 		if (apic_setup_irq_table(NULL, apic_sci_vect, NULL,
1803 		    &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) {
1804 			cmn_err(CE_WARN, "!apic: SCI setup failed");
1805 			return;
1806 		}
1807 		irqptr = apic_irq_table[apic_sci_vect];
1808 
1809 		/* Program I/O APIC */
1810 		(void) apic_setup_io_intr(irqptr, apic_sci_vect);
1811 
1812 		irqptr->airq_share++;
1813 	}
1814 }
1815 
1816 
1817 static void
1818 apic_cpu_start(processorid_t cpun, caddr_t rm_code)
1819 {
1820 	int		loop_count;
1821 	uint32_t	vector;
1822 	uint_t		cpu_id, iflag;
1823 
1824 	cpu_id = apic_cpus[cpun].aci_local_id;
1825 
1826 	apic_cmos_ssb_set = 1;
1827 
1828 	/*
1829 	 * Interrupts on BSP cpu will be disabled during these startup
1830 	 * steps in order to avoid unwanted side effects from
1831 	 * executing interrupt handlers on a problematic BIOS.
1832 	 */
1833 
1834 	iflag = intr_clear();
1835 	outb(CMOS_ADDR, SSB);
1836 	outb(CMOS_DATA, BIOS_SHUTDOWN);
1837 
1838 	while (get_apic_cmd1() & AV_PENDING)
1839 		apic_ret();
1840 
1841 	/* for integrated - make sure there is one INIT IPI in buffer */
1842 	/* for external - it will wake up the cpu */
1843 	apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1844 	apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET;
1845 
1846 	/* If only 1 CPU is installed, PENDING bit will not go low */
1847 	for (loop_count = 0x1000; loop_count; loop_count--)
1848 		if (get_apic_cmd1() & AV_PENDING)
1849 			apic_ret();
1850 		else
1851 			break;
1852 
1853 	apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1854 	apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET;
1855 
1856 	drv_usecwait(20000);		/* 20 milli sec */
1857 
1858 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
1859 		/* integrated apic */
1860 
1861 		rm_code = (caddr_t)(uintptr_t)rm_platter_pa;
1862 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
1863 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
1864 
1865 		/* to offset the INIT IPI queue up in the buffer */
1866 		apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1867 		apicadr[APIC_INT_CMD1] = vector | AV_STARTUP;
1868 
1869 		drv_usecwait(200);		/* 20 micro sec */
1870 
1871 		apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET;
1872 		apicadr[APIC_INT_CMD1] = vector | AV_STARTUP;
1873 
1874 		drv_usecwait(200);		/* 20 micro sec */
1875 	}
1876 	intr_restore(iflag);
1877 }
1878 
1879 
1880 #ifdef	DEBUG
1881 int	apic_break_on_cpu = 9;
1882 int	apic_stretch_interrupts = 0;
1883 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
1884 
1885 void
1886 apic_break()
1887 {
1888 }
1889 #endif /* DEBUG */
1890 
1891 /*
1892  * platform_intr_enter
1893  *
1894  *	Called at the beginning of the interrupt service routine to
1895  *	mask all level equal to and below the interrupt priority
1896  *	of the interrupting vector.  An EOI should be given to
1897  *	the interrupt controller to enable other HW interrupts.
1898  *
1899  *	Return -1 for spurious interrupts
1900  *
1901  */
1902 /*ARGSUSED*/
1903 static int
1904 apic_intr_enter(int ipl, int *vectorp)
1905 {
1906 	uchar_t vector;
1907 	int nipl;
1908 	int irq, iflag;
1909 	apic_cpus_info_t *cpu_infop;
1910 
1911 	/*
1912 	 * The real vector programmed in APIC is *vectorp + 0x20
1913 	 * But, cmnint code subtracts 0x20 before pushing it.
1914 	 * Hence APIC_BASE_VECT is 0x20.
1915 	 */
1916 
1917 	vector = (uchar_t)*vectorp;
1918 
1919 	/* if interrupted by the clock, increment apic_nsec_since_boot */
1920 	if (vector == apic_clkvect) {
1921 		if (!apic_oneshot) {
1922 			/* NOTE: this is not MT aware */
1923 			apic_hrtime_stamp++;
1924 			apic_nsec_since_boot += apic_nsec_per_intr;
1925 			apic_hrtime_stamp++;
1926 			last_count_read = apic_hertz_count;
1927 			apic_redistribute_compute();
1928 		}
1929 
1930 		/* We will avoid all the book keeping overhead for clock */
1931 		nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT];
1932 #if defined(__amd64)
1933 		setcr8((ulong_t)apic_cr8pri[nipl]);
1934 #else
1935 		apicadr[APIC_TASK_REG] = apic_ipltopri[nipl];
1936 #endif
1937 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
1938 		apicadr[APIC_EOI_REG] = 0;
1939 		return (nipl);
1940 	}
1941 
1942 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
1943 
1944 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
1945 		cpu_infop->aci_spur_cnt++;
1946 		return (APIC_INT_SPURIOUS);
1947 	}
1948 
1949 	/* Check if the vector we got is really what we need */
1950 	if (apic_revector_pending) {
1951 		/*
1952 		 * Disable interrupts for the duration of
1953 		 * the vector translation to prevent a self-race for
1954 		 * the apic_revector_lock.  This cannot be done
1955 		 * in apic_xlate_vector because it is recursive and
1956 		 * we want the vector translation to be atomic with
1957 		 * respect to other (higher-priority) interrupts.
1958 		 */
1959 		iflag = intr_clear();
1960 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
1961 		    APIC_BASE_VECT;
1962 		intr_restore(iflag);
1963 	}
1964 
1965 	nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT];
1966 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
1967 
1968 #if defined(__amd64)
1969 	setcr8((ulong_t)apic_cr8pri[nipl]);
1970 #else
1971 	apicadr[APIC_TASK_REG] = apic_ipltopri[nipl];
1972 #endif
1973 
1974 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
1975 	cpu_infop->aci_curipl = (uchar_t)nipl;
1976 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
1977 
1978 	/*
1979 	 * apic_level_intr could have been assimilated into the irq struct.
1980 	 * but, having it as a character array is more efficient in terms of
1981 	 * cache usage. So, we leave it as is.
1982 	 */
1983 	if (!apic_level_intr[irq])
1984 		apicadr[APIC_EOI_REG] = 0;
1985 
1986 #ifdef	DEBUG
1987 	APIC_DEBUG_BUF_PUT(vector);
1988 	APIC_DEBUG_BUF_PUT(irq);
1989 	APIC_DEBUG_BUF_PUT(nipl);
1990 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
1991 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
1992 		drv_usecwait(apic_stretch_interrupts);
1993 
1994 	if (apic_break_on_cpu == psm_get_cpu_id())
1995 		apic_break();
1996 #endif /* DEBUG */
1997 	return (nipl);
1998 }
1999 
2000 static void
2001 apic_intr_exit(int prev_ipl, int irq)
2002 {
2003 	apic_cpus_info_t *cpu_infop;
2004 
2005 #if defined(__amd64)
2006 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
2007 #else
2008 	apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
2009 #endif
2010 
2011 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
2012 	if (apic_level_intr[irq])
2013 		apicadr[APIC_EOI_REG] = 0;
2014 
2015 	cpu_infop->aci_curipl = (uchar_t)prev_ipl;
2016 	/* ISR above current pri could not be in progress */
2017 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
2018 }
2019 
2020 /*
2021  * Mask all interrupts below or equal to the given IPL
2022  */
2023 static void
2024 apic_setspl(int ipl)
2025 {
2026 
2027 #if defined(__amd64)
2028 	setcr8((ulong_t)apic_cr8pri[ipl]);
2029 #else
2030 	apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
2031 #endif
2032 
2033 	/* interrupts at ipl above this cannot be in progress */
2034 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
2035 	/*
2036 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
2037 	 * have enough time to come in before the priority is raised again
2038 	 * during the idle() loop.
2039 	 */
2040 	if (apic_setspl_delay)
2041 		(void) get_apic_pri();
2042 }
2043 
2044 /*
2045  * trigger a software interrupt at the given IPL
2046  */
2047 static void
2048 apic_set_softintr(int ipl)
2049 {
2050 	int vector;
2051 	uint_t flag;
2052 
2053 	vector = apic_resv_vector[ipl];
2054 
2055 	flag = intr_clear();
2056 
2057 	while (get_apic_cmd1() & AV_PENDING)
2058 		apic_ret();
2059 
2060 	/* generate interrupt at vector on itself only */
2061 	apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector;
2062 
2063 	intr_restore(flag);
2064 }
2065 
2066 /*
2067  * generates an interprocessor interrupt to another CPU
2068  */
2069 static void
2070 apic_send_ipi(int cpun, int ipl)
2071 {
2072 	int vector;
2073 	uint_t flag;
2074 
2075 	vector = apic_resv_vector[ipl];
2076 
2077 	flag = intr_clear();
2078 
2079 	while (get_apic_cmd1() & AV_PENDING)
2080 		apic_ret();
2081 
2082 	apicadr[APIC_INT_CMD2] =
2083 	    apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET;
2084 	apicadr[APIC_INT_CMD1] = vector;
2085 
2086 	intr_restore(flag);
2087 }
2088 
2089 
2090 /*ARGSUSED*/
2091 static void
2092 apic_set_idlecpu(processorid_t cpun)
2093 {
2094 }
2095 
2096 /*ARGSUSED*/
2097 static void
2098 apic_unset_idlecpu(processorid_t cpun)
2099 {
2100 }
2101 
2102 
2103 static void
2104 apic_ret()
2105 {
2106 }
2107 
2108 static int
2109 get_apic_cmd1()
2110 {
2111 	return (apicadr[APIC_INT_CMD1]);
2112 }
2113 
2114 static int
2115 get_apic_pri()
2116 {
2117 #if defined(__amd64)
2118 	return ((int)getcr8());
2119 #else
2120 	return (apicadr[APIC_TASK_REG]);
2121 #endif
2122 }
2123 
2124 /*
2125  * If apic_coarse_time == 1, then apic_gettime() is used instead of
2126  * apic_gethrtime().  This is used for performance instead of accuracy.
2127  */
2128 
2129 static hrtime_t
2130 apic_gettime()
2131 {
2132 	int old_hrtime_stamp;
2133 	hrtime_t temp;
2134 
2135 	/*
2136 	 * In one-shot mode, we do not keep time, so if anyone
2137 	 * calls psm_gettime() directly, we vector over to
2138 	 * gethrtime().
2139 	 * one-shot mode MUST NOT be enabled if this psm is the source of
2140 	 * hrtime.
2141 	 */
2142 
2143 	if (apic_oneshot)
2144 		return (gethrtime());
2145 
2146 
2147 gettime_again:
2148 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
2149 		apic_ret();
2150 
2151 	temp = apic_nsec_since_boot;
2152 
2153 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
2154 		goto gettime_again;
2155 	}
2156 	return (temp);
2157 }
2158 
2159 /*
2160  * Here we return the number of nanoseconds since booting.  Note every
2161  * clock interrupt increments apic_nsec_since_boot by the appropriate
2162  * amount.
2163  */
2164 static hrtime_t
2165 apic_gethrtime()
2166 {
2167 	int curr_timeval, countval, elapsed_ticks, oflags;
2168 	int old_hrtime_stamp, status;
2169 	hrtime_t temp;
2170 	uchar_t	cpun;
2171 
2172 
2173 	/*
2174 	 * In one-shot mode, we do not keep time, so if anyone
2175 	 * calls psm_gethrtime() directly, we vector over to
2176 	 * gethrtime().
2177 	 * one-shot mode MUST NOT be enabled if this psm is the source of
2178 	 * hrtime.
2179 	 */
2180 
2181 	if (apic_oneshot)
2182 		return (gethrtime());
2183 
2184 	oflags = intr_clear();	/* prevent migration */
2185 
2186 	cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET);
2187 
2188 	lock_set(&apic_gethrtime_lock);
2189 
2190 gethrtime_again:
2191 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
2192 		apic_ret();
2193 
2194 	/*
2195 	 * Check to see which CPU we are on.  Note the time is kept on
2196 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
2197 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
2198 	 */
2199 	if (cpun == apic_cpus[0].aci_local_id) {
2200 		countval = apicadr[APIC_CURR_COUNT];
2201 	} else {
2202 		while (get_apic_cmd1() & AV_PENDING)
2203 			apic_ret();
2204 
2205 		apicadr[APIC_INT_CMD2] =
2206 		    apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET;
2207 		apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE;
2208 
2209 		while ((status = get_apic_cmd1()) & AV_READ_PENDING)
2210 			apic_ret();
2211 
2212 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
2213 			countval = apicadr[APIC_REMOTE_READ];
2214 		else {	/* 0 = invalid */
2215 			apic_remote_hrterr++;
2216 			/*
2217 			 * return last hrtime right now, will need more
2218 			 * testing if change to retry
2219 			 */
2220 			temp = apic_last_hrtime;
2221 
2222 			lock_clear(&apic_gethrtime_lock);
2223 
2224 			intr_restore(oflags);
2225 
2226 			return (temp);
2227 		}
2228 	}
2229 	if (countval > last_count_read)
2230 		countval = 0;
2231 	else
2232 		last_count_read = countval;
2233 
2234 	elapsed_ticks = apic_hertz_count - countval;
2235 
2236 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
2237 	temp = apic_nsec_since_boot + curr_timeval;
2238 
2239 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
2240 		/* we might have clobbered last_count_read. Restore it */
2241 		last_count_read = apic_hertz_count;
2242 		goto gethrtime_again;
2243 	}
2244 
2245 	if (temp < apic_last_hrtime) {
2246 		/* return last hrtime if error occurs */
2247 		apic_hrtime_error++;
2248 		temp = apic_last_hrtime;
2249 	}
2250 	else
2251 		apic_last_hrtime = temp;
2252 
2253 	lock_clear(&apic_gethrtime_lock);
2254 	intr_restore(oflags);
2255 
2256 	return (temp);
2257 }
2258 
2259 /* apic NMI handler */
2260 /*ARGSUSED*/
2261 static void
2262 apic_nmi_intr(caddr_t arg)
2263 {
2264 	if (apic_shutdown_processors) {
2265 		apic_disable_local_apic();
2266 		return;
2267 	}
2268 
2269 	if (lock_try(&apic_nmi_lock)) {
2270 		if (apic_kmdb_on_nmi) {
2271 			if (psm_debugger() == 0) {
2272 				cmn_err(CE_PANIC,
2273 				    "NMI detected, kmdb is not available.");
2274 			} else {
2275 				debug_enter("\nNMI detected, entering kmdb.\n");
2276 			}
2277 		} else {
2278 			if (apic_panic_on_nmi) {
2279 				/* Keep panic from entering kmdb. */
2280 				nopanicdebug = 1;
2281 				cmn_err(CE_PANIC, "pcplusmp: NMI received");
2282 			} else {
2283 				/*
2284 				 * prom_printf is the best shot we have
2285 				 * of something which is problem free from
2286 				 * high level/NMI type of interrupts
2287 				 */
2288 				prom_printf("pcplusmp: NMI received\n");
2289 				apic_error |= APIC_ERR_NMI;
2290 				apic_num_nmis++;
2291 			}
2292 		}
2293 		lock_clear(&apic_nmi_lock);
2294 	}
2295 }
2296 
2297 /*
2298  * Add mask bits to disable interrupt vector from happening
2299  * at or above IPL. In addition, it should remove mask bits
2300  * to enable interrupt vectors below the given IPL.
2301  *
2302  * Both add and delspl are complicated by the fact that different interrupts
2303  * may share IRQs. This can happen in two ways.
2304  * 1. The same H/W line is shared by more than 1 device
2305  * 1a. with interrupts at different IPLs
2306  * 1b. with interrupts at same IPL
2307  * 2. We ran out of vectors at a given IPL and started sharing vectors.
2308  * 1b and 2 should be handled gracefully, except for the fact some ISRs
2309  * will get called often when no interrupt is pending for the device.
2310  * For 1a, we just hope that the machine blows up with the person who
2311  * set it up that way!. In the meantime, we handle it at the higher IPL.
2312  */
2313 /*ARGSUSED*/
2314 static int
2315 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
2316 {
2317 	uchar_t vector;
2318 	int iflag;
2319 	apic_irq_t *irqptr, *irqheadptr;
2320 	int irqindex;
2321 
2322 	ASSERT(max_ipl <= UCHAR_MAX);
2323 	irqindex = IRQINDEX(irqno);
2324 
2325 	if ((irqindex == -1) || (!apic_irq_table[irqindex]))
2326 		return (PSM_FAILURE);
2327 
2328 	irqptr = irqheadptr = apic_irq_table[irqindex];
2329 
2330 	DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x "
2331 	    "vector=0x%x\n", (void *)irqptr->airq_dip,
2332 	    irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector));
2333 
2334 	while (irqptr) {
2335 		if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno)
2336 			break;
2337 		irqptr = irqptr->airq_next;
2338 	}
2339 	irqptr->airq_share++;
2340 
2341 	/* return if it is not hardware interrupt */
2342 	if (irqptr->airq_mps_intr_index == RESERVE_INDEX)
2343 		return (PSM_SUCCESS);
2344 
2345 	/* Or if there are more interupts at a higher IPL */
2346 	if (ipl != max_ipl)
2347 		return (PSM_SUCCESS);
2348 
2349 	/*
2350 	 * if apic_picinit() has not been called yet, just return.
2351 	 * At the end of apic_picinit(), we will call setup_io_intr().
2352 	 */
2353 
2354 	if (!apic_flag)
2355 		return (PSM_SUCCESS);
2356 
2357 	iflag = intr_clear();
2358 
2359 	/*
2360 	 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate,
2361 	 * return failure. Not very elegant, but then we hope the
2362 	 * machine will blow up with ...
2363 	 */
2364 	if (irqptr->airq_ipl != max_ipl) {
2365 		vector = apic_allocate_vector(max_ipl, irqindex, 1);
2366 		if (vector == 0) {
2367 			intr_restore(iflag);
2368 			irqptr->airq_share--;
2369 			return (PSM_FAILURE);
2370 		}
2371 		irqptr = irqheadptr;
2372 		apic_mark_vector(irqptr->airq_vector, vector);
2373 		while (irqptr) {
2374 			irqptr->airq_vector = vector;
2375 			irqptr->airq_ipl = (uchar_t)max_ipl;
2376 			/*
2377 			 * reprogram irq being added and every one else
2378 			 * who is not in the UNINIT state
2379 			 */
2380 			if ((VIRTIRQ(irqindex, irqptr->airq_share_id) ==
2381 			    irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) {
2382 				apic_record_rdt_entry(irqptr, irqindex);
2383 				(void) apic_setup_io_intr(irqptr, irqindex);
2384 			}
2385 			irqptr = irqptr->airq_next;
2386 		}
2387 		intr_restore(iflag);
2388 		return (PSM_SUCCESS);
2389 	}
2390 
2391 	ASSERT(irqptr);
2392 	(void) apic_setup_io_intr(irqptr, irqindex);
2393 	intr_restore(iflag);
2394 	return (PSM_SUCCESS);
2395 }
2396 
2397 /*
2398  * Recompute mask bits for the given interrupt vector.
2399  * If there is no interrupt servicing routine for this
2400  * vector, this function should disable interrupt vector
2401  * from happening at all IPLs. If there are still
2402  * handlers using the given vector, this function should
2403  * disable the given vector from happening below the lowest
2404  * IPL of the remaining hadlers.
2405  */
2406 /*ARGSUSED*/
2407 static int
2408 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
2409 {
2410 	uchar_t vector, bind_cpu;
2411 	int	iflag, intin, irqindex;
2412 	volatile int32_t *ioapic;
2413 	apic_irq_t	*irqptr, *irqheadptr;
2414 
2415 	irqindex = IRQINDEX(irqno);
2416 	irqptr = irqheadptr = apic_irq_table[irqindex];
2417 
2418 	DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x "
2419 	    "vector=0x%x\n", (void *)irqptr->airq_dip,
2420 	    irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector));
2421 
2422 	while (irqptr) {
2423 		if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno)
2424 			break;
2425 		irqptr = irqptr->airq_next;
2426 	}
2427 	ASSERT(irqptr);
2428 
2429 	irqptr->airq_share--;
2430 
2431 	if (ipl < max_ipl)
2432 		return (PSM_SUCCESS);
2433 
2434 	/* return if it is not hardware interrupt */
2435 	if (irqptr->airq_mps_intr_index == RESERVE_INDEX)
2436 		return (PSM_SUCCESS);
2437 
2438 	if (!apic_flag) {
2439 		/*
2440 		 * Clear irq_struct. If two devices shared an intpt
2441 		 * line & 1 unloaded before picinit, we are hosed. But, then
2442 		 * we hope the machine will ...
2443 		 */
2444 		irqptr->airq_mps_intr_index = FREE_INDEX;
2445 		irqptr->airq_temp_cpu = IRQ_UNINIT;
2446 		apic_free_vector(irqptr->airq_vector);
2447 		return (PSM_SUCCESS);
2448 	}
2449 	/*
2450 	 * Downgrade vector to new max_ipl if needed.If we cannot allocate,
2451 	 * use old IPL. Not very elegant, but then we hope ...
2452 	 */
2453 	if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) {
2454 		apic_irq_t	*irqp;
2455 		if (vector = apic_allocate_vector(max_ipl, irqno, 1)) {
2456 			apic_mark_vector(irqheadptr->airq_vector, vector);
2457 			irqp = irqheadptr;
2458 			while (irqp) {
2459 				irqp->airq_vector = vector;
2460 				irqp->airq_ipl = (uchar_t)max_ipl;
2461 				if (irqp->airq_temp_cpu != IRQ_UNINIT) {
2462 					apic_record_rdt_entry(irqp, irqindex);
2463 					(void) apic_setup_io_intr(irqp,
2464 					    irqindex);
2465 				}
2466 				irqp = irqp->airq_next;
2467 			}
2468 		}
2469 	}
2470 
2471 	if (irqptr->airq_share)
2472 		return (PSM_SUCCESS);
2473 
2474 	ioapic = apicioadr[irqptr->airq_ioapicindex];
2475 	intin = irqptr->airq_intin_no;
2476 	iflag = intr_clear();
2477 	lock_set(&apic_ioapic_lock);
2478 	ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin;
2479 	ioapic[APIC_IO_DATA] = AV_MASK;
2480 
2481 	/* Disable the MSI/X vector */
2482 	if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) {
2483 		int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ?
2484 		    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX;
2485 
2486 		/*
2487 		 * Make sure we only disable on the last
2488 		 * of the multi-MSI support
2489 		 */
2490 		if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) {
2491 			(void) apic_pci_msi_unconfigure(irqptr->airq_dip,
2492 			    type, irqptr->airq_ioapicindex);
2493 			(void) apic_pci_msi_disable_mode(irqptr->airq_dip,
2494 			    type, irqptr->airq_ioapicindex);
2495 		}
2496 	}
2497 
2498 	if (max_ipl == PSM_INVALID_IPL) {
2499 		ASSERT(irqheadptr == irqptr);
2500 		bind_cpu = irqptr->airq_temp_cpu;
2501 		if (((uchar_t)bind_cpu != IRQ_UNBOUND) &&
2502 		    ((uchar_t)bind_cpu != IRQ_UNINIT)) {
2503 			ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
2504 			if (bind_cpu & IRQ_USER_BOUND) {
2505 				/* If hardbound, temp_cpu == cpu */
2506 				bind_cpu &= ~IRQ_USER_BOUND;
2507 				apic_cpus[bind_cpu].aci_bound--;
2508 			} else
2509 				apic_cpus[bind_cpu].aci_temp_bound--;
2510 		}
2511 		lock_clear(&apic_ioapic_lock);
2512 		intr_restore(iflag);
2513 		irqptr->airq_temp_cpu = IRQ_UNINIT;
2514 		irqptr->airq_mps_intr_index = FREE_INDEX;
2515 		apic_free_vector(irqptr->airq_vector);
2516 		return (PSM_SUCCESS);
2517 	}
2518 	lock_clear(&apic_ioapic_lock);
2519 	intr_restore(iflag);
2520 
2521 	mutex_enter(&airq_mutex);
2522 	if ((irqptr == apic_irq_table[irqindex])) {
2523 		apic_irq_t	*oldirqptr;
2524 		/* Move valid irq entry to the head */
2525 		irqheadptr = oldirqptr = irqptr;
2526 		irqptr = irqptr->airq_next;
2527 		ASSERT(irqptr);
2528 		while (irqptr) {
2529 			if (irqptr->airq_mps_intr_index != FREE_INDEX)
2530 				break;
2531 			oldirqptr = irqptr;
2532 			irqptr = irqptr->airq_next;
2533 		}
2534 		/* remove all invalid ones from the beginning */
2535 		apic_irq_table[irqindex] = irqptr;
2536 		/*
2537 		 * and link them back after the head. The invalid ones
2538 		 * begin with irqheadptr and end at oldirqptr
2539 		 */
2540 		oldirqptr->airq_next = irqptr->airq_next;
2541 		irqptr->airq_next = irqheadptr;
2542 	}
2543 	mutex_exit(&airq_mutex);
2544 
2545 	irqptr->airq_temp_cpu = IRQ_UNINIT;
2546 	irqptr->airq_mps_intr_index = FREE_INDEX;
2547 	return (PSM_SUCCESS);
2548 }
2549 
2550 /*
2551  * Return HW interrupt number corresponding to the given IPL
2552  */
2553 /*ARGSUSED*/
2554 static int
2555 apic_softlvl_to_irq(int ipl)
2556 {
2557 	/*
2558 	 * Do not use apic to trigger soft interrupt.
2559 	 * It will cause the system to hang when 2 hardware interrupts
2560 	 * at the same priority with the softint are already accepted
2561 	 * by the apic.  Cause the AV_PENDING bit will not be cleared
2562 	 * until one of the hardware interrupt is eoi'ed.  If we need
2563 	 * to send an ipi at this time, we will end up looping forever
2564 	 * to wait for the AV_PENDING bit to clear.
2565 	 */
2566 	return (PSM_SV_SOFTWARE);
2567 }
2568 
2569 static int
2570 apic_post_cpu_start()
2571 {
2572 	int i, cpun;
2573 	apic_irq_t *irq_ptr;
2574 
2575 	apic_init_intr();
2576 
2577 	/*
2578 	 * since some systems don't enable the internal cache on the non-boot
2579 	 * cpus, so we have to enable them here
2580 	 */
2581 	setcr0(getcr0() & ~(0x60000000));
2582 
2583 	while (get_apic_cmd1() & AV_PENDING)
2584 		apic_ret();
2585 
2586 	cpun = psm_get_cpu_id();
2587 	apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
2588 
2589 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
2590 		irq_ptr = apic_irq_table[i];
2591 		if ((irq_ptr == NULL) ||
2592 		    ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun))
2593 			continue;
2594 
2595 		while (irq_ptr) {
2596 			if (irq_ptr->airq_temp_cpu != IRQ_UNINIT)
2597 				(void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE);
2598 			irq_ptr = irq_ptr->airq_next;
2599 		}
2600 	}
2601 
2602 	apicadr[APIC_DIVIDE_REG] = apic_divide_reg_init;
2603 	return (PSM_SUCCESS);
2604 }
2605 
2606 processorid_t
2607 apic_get_next_processorid(processorid_t cpu_id)
2608 {
2609 
2610 	int i;
2611 
2612 	if (cpu_id == -1)
2613 		return ((processorid_t)0);
2614 
2615 	for (i = cpu_id + 1; i < NCPU; i++) {
2616 		if (CPU_IN_SET(apic_cpumask, i))
2617 			return (i);
2618 	}
2619 
2620 	return ((processorid_t)-1);
2621 }
2622 
2623 
2624 /*
2625  * type == -1 indicates it is an internal request. Do not change
2626  * resv_vector for these requests
2627  */
2628 static int
2629 apic_get_ipivect(int ipl, int type)
2630 {
2631 	uchar_t vector;
2632 	int irq;
2633 
2634 	if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) {
2635 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
2636 			apic_irq_table[irq]->airq_mps_intr_index =
2637 			    RESERVE_INDEX;
2638 			apic_irq_table[irq]->airq_vector = vector;
2639 			if (type != -1) {
2640 				apic_resv_vector[ipl] = vector;
2641 			}
2642 			return (irq);
2643 		}
2644 	}
2645 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
2646 	return (-1);	/* shouldn't happen */
2647 }
2648 
2649 static int
2650 apic_getclkirq(int ipl)
2651 {
2652 	int	irq;
2653 
2654 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
2655 		return (-1);
2656 	/*
2657 	 * Note the vector in apic_clkvect for per clock handling.
2658 	 */
2659 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
2660 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
2661 	    apic_clkvect));
2662 	return (irq);
2663 }
2664 
2665 
2666 /*
2667  * Return the number of APIC clock ticks elapsed for 8245 to decrement
2668  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
2669  */
2670 static uint_t
2671 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
2672 {
2673 	uint8_t		pit_tick_lo;
2674 	uint16_t	pit_tick, target_pit_tick;
2675 	uint32_t	start_apic_tick, end_apic_tick;
2676 	int		iflag;
2677 
2678 	addr += APIC_CURR_COUNT;
2679 
2680 	iflag = intr_clear();
2681 
2682 	do {
2683 		pit_tick_lo = inb(PITCTR0_PORT);
2684 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2685 	} while (pit_tick < APIC_TIME_MIN ||
2686 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
2687 
2688 	/*
2689 	 * Wait for the 8254 to decrement by 5 ticks to ensure
2690 	 * we didn't start in the middle of a tick.
2691 	 * Compare with 0x10 for the wrap around case.
2692 	 */
2693 	target_pit_tick = pit_tick - 5;
2694 	do {
2695 		pit_tick_lo = inb(PITCTR0_PORT);
2696 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2697 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
2698 
2699 	start_apic_tick = *addr;
2700 
2701 	/*
2702 	 * Wait for the 8254 to decrement by
2703 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
2704 	 */
2705 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
2706 	do {
2707 		pit_tick_lo = inb(PITCTR0_PORT);
2708 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
2709 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
2710 
2711 	end_apic_tick = *addr;
2712 
2713 	*pit_ticks_adj = target_pit_tick - pit_tick;
2714 
2715 	intr_restore(iflag);
2716 
2717 	return (start_apic_tick - end_apic_tick);
2718 }
2719 
2720 /*
2721  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
2722  * frequency.  Note at this stage in the boot sequence, the boot processor
2723  * is the only active processor.
2724  * hertz value of 0 indicates a one-shot mode request.  In this case
2725  * the function returns the resolution (in nanoseconds) for the hardware
2726  * timer interrupt.  If one-shot mode capability is not available,
2727  * the return value will be 0. apic_enable_oneshot is a global switch
2728  * for disabling the functionality.
2729  * A non-zero positive value for hertz indicates a periodic mode request.
2730  * In this case the hardware will be programmed to generate clock interrupts
2731  * at hertz frequency and returns the resolution of interrupts in
2732  * nanosecond.
2733  */
2734 
2735 static int
2736 apic_clkinit(int hertz)
2737 {
2738 
2739 	uint_t		apic_ticks = 0;
2740 	uint_t		pit_ticks;
2741 	int		ret;
2742 	uint16_t	pit_ticks_adj;
2743 	static int	firsttime = 1;
2744 
2745 	if (firsttime) {
2746 		/* first time calibrate on CPU0 only */
2747 
2748 		apicadr[APIC_DIVIDE_REG] = apic_divide_reg_init;
2749 		apicadr[APIC_INIT_COUNT] = APIC_MAXVAL;	/* start counting */
2750 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
2751 
2752 		/* total number of PIT ticks corresponding to apic_ticks */
2753 		pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
2754 
2755 		/*
2756 		 * Determine the number of nanoseconds per APIC clock tick
2757 		 * and then determine how many APIC ticks to interrupt at the
2758 		 * desired frequency
2759 		 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
2760 		 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
2761 		 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
2762 		 * apic_ticks_per_SFns =
2763 		 *   (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
2764 		 */
2765 		apic_ticks_per_SFnsecs =
2766 		    ((SF * apic_ticks * PIT_HZ) /
2767 		    ((uint64_t)pit_ticks * NANOSEC));
2768 
2769 		/* the interval timer initial count is 32 bit max */
2770 		apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
2771 		firsttime = 0;
2772 	}
2773 
2774 	if (hertz != 0) {
2775 		/* periodic */
2776 		apic_nsec_per_intr = NANOSEC / hertz;
2777 		apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
2778 	}
2779 
2780 	apic_int_busy_mark = (apic_int_busy_mark *
2781 	    apic_sample_factor_redistribution) / 100;
2782 	apic_int_free_mark = (apic_int_free_mark *
2783 	    apic_sample_factor_redistribution) / 100;
2784 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
2785 	    apic_sample_factor_redistribution) / 100;
2786 
2787 	if (hertz == 0) {
2788 		/* requested one_shot */
2789 		if (!apic_oneshot_enable)
2790 			return (0);
2791 		apic_oneshot = 1;
2792 		ret = (int)APIC_TICKS_TO_NSECS(1);
2793 	} else {
2794 		/* program the local APIC to interrupt at the given frequency */
2795 		apicadr[APIC_INIT_COUNT] = apic_hertz_count;
2796 		apicadr[APIC_LOCAL_TIMER] =
2797 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
2798 		apic_oneshot = 0;
2799 		ret = NANOSEC / hertz;
2800 	}
2801 
2802 	return (ret);
2803 
2804 }
2805 
2806 /*
2807  * apic_preshutdown:
2808  * Called early in shutdown whilst we can still access filesystems to do
2809  * things like loading modules which will be required to complete shutdown
2810  * after filesystems are all unmounted.
2811  */
2812 static void
2813 apic_preshutdown(int cmd, int fcn)
2814 {
2815 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
2816 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
2817 
2818 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
2819 		return;
2820 	}
2821 }
2822 
2823 static void
2824 apic_shutdown(int cmd, int fcn)
2825 {
2826 	int iflag, restarts, attempts;
2827 	int i, j;
2828 	volatile int32_t *ioapic;
2829 	uchar_t	byte;
2830 
2831 	/* Send NMI to all CPUs except self to do per processor shutdown */
2832 	iflag = intr_clear();
2833 	while (get_apic_cmd1() & AV_PENDING)
2834 		apic_ret();
2835 	apic_shutdown_processors = 1;
2836 	apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF;
2837 
2838 	/* restore cmos shutdown byte before reboot */
2839 	if (apic_cmos_ssb_set) {
2840 		outb(CMOS_ADDR, SSB);
2841 		outb(CMOS_DATA, 0);
2842 	}
2843 	/* Disable the I/O APIC redirection entries */
2844 	for (j = 0; j < apic_io_max; j++) {
2845 		int intin_max;
2846 		ioapic = apicioadr[j];
2847 		ioapic[APIC_IO_REG] = APIC_VERS_CMD;
2848 		/* Bits 23-16 define the maximum redirection entries */
2849 		intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff;
2850 		for (i = 0; i < intin_max; i++) {
2851 			ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i;
2852 			ioapic[APIC_IO_DATA] = AV_MASK;
2853 		}
2854 	}
2855 
2856 	/*	disable apic mode if imcr present	*/
2857 	if (apic_imcrp) {
2858 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
2859 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
2860 	}
2861 
2862 	apic_disable_local_apic();
2863 
2864 	intr_restore(iflag);
2865 
2866 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
2867 		return;
2868 	}
2869 
2870 	switch (apic_poweroff_method) {
2871 		case APIC_POWEROFF_VIA_RTC:
2872 
2873 			/* select the extended NVRAM bank in the RTC */
2874 			outb(CMOS_ADDR, RTC_REGA);
2875 			byte = inb(CMOS_DATA);
2876 			outb(CMOS_DATA, (byte | EXT_BANK));
2877 
2878 			outb(CMOS_ADDR, PFR_REG);
2879 
2880 			/* for Predator must toggle the PAB bit */
2881 			byte = inb(CMOS_DATA);
2882 
2883 			/*
2884 			 * clear power active bar, wakeup alarm and
2885 			 * kickstart
2886 			 */
2887 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
2888 			outb(CMOS_DATA, byte);
2889 
2890 			/* delay before next write */
2891 			drv_usecwait(1000);
2892 
2893 			/* for S40 the following would suffice */
2894 			byte = inb(CMOS_DATA);
2895 
2896 			/* power active bar control bit */
2897 			byte |= PAB_CBIT;
2898 			outb(CMOS_DATA, byte);
2899 
2900 			break;
2901 
2902 		case APIC_POWEROFF_VIA_ASPEN_BMC:
2903 			restarts = 0;
2904 restart_aspen_bmc:
2905 			if (++restarts == 3)
2906 				break;
2907 			attempts = 0;
2908 			do {
2909 				byte = inb(MISMIC_FLAG_REGISTER);
2910 				byte &= MISMIC_BUSY_MASK;
2911 				if (byte != 0) {
2912 					drv_usecwait(1000);
2913 					if (attempts >= 3)
2914 						goto restart_aspen_bmc;
2915 					++attempts;
2916 				}
2917 			} while (byte != 0);
2918 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
2919 			byte = inb(MISMIC_FLAG_REGISTER);
2920 			byte |= 0x1;
2921 			outb(MISMIC_FLAG_REGISTER, byte);
2922 			i = 0;
2923 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
2924 			    i++) {
2925 				attempts = 0;
2926 				do {
2927 					byte = inb(MISMIC_FLAG_REGISTER);
2928 					byte &= MISMIC_BUSY_MASK;
2929 					if (byte != 0) {
2930 						drv_usecwait(1000);
2931 						if (attempts >= 3)
2932 							goto restart_aspen_bmc;
2933 						++attempts;
2934 					}
2935 				} while (byte != 0);
2936 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
2937 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
2938 				byte = inb(MISMIC_FLAG_REGISTER);
2939 				byte |= 0x1;
2940 				outb(MISMIC_FLAG_REGISTER, byte);
2941 			}
2942 			break;
2943 
2944 		case APIC_POWEROFF_VIA_SITKA_BMC:
2945 			restarts = 0;
2946 restart_sitka_bmc:
2947 			if (++restarts == 3)
2948 				break;
2949 			attempts = 0;
2950 			do {
2951 				byte = inb(SMS_STATUS_REGISTER);
2952 				byte &= SMS_STATE_MASK;
2953 				if ((byte == SMS_READ_STATE) ||
2954 				    (byte == SMS_WRITE_STATE)) {
2955 					drv_usecwait(1000);
2956 					if (attempts >= 3)
2957 						goto restart_sitka_bmc;
2958 					++attempts;
2959 				}
2960 			} while ((byte == SMS_READ_STATE) ||
2961 			    (byte == SMS_WRITE_STATE));
2962 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
2963 			i = 0;
2964 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
2965 			    i++) {
2966 				attempts = 0;
2967 				do {
2968 					byte = inb(SMS_STATUS_REGISTER);
2969 					byte &= SMS_IBF_MASK;
2970 					if (byte != 0) {
2971 						drv_usecwait(1000);
2972 						if (attempts >= 3)
2973 							goto restart_sitka_bmc;
2974 						++attempts;
2975 					}
2976 				} while (byte != 0);
2977 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
2978 			}
2979 			break;
2980 
2981 		case APIC_POWEROFF_NONE:
2982 
2983 			/* If no APIC direct method, we will try using ACPI */
2984 			if (apic_enable_acpi) {
2985 				if (acpi_poweroff() == 1)
2986 					return;
2987 			} else
2988 				return;
2989 
2990 			break;
2991 	}
2992 	/*
2993 	 * Wait a limited time here for power to go off.
2994 	 * If the power does not go off, then there was a
2995 	 * problem and we should continue to the halt which
2996 	 * prints a message for the user to press a key to
2997 	 * reboot.
2998 	 */
2999 	drv_usecwait(7000000); /* wait seven seconds */
3000 
3001 }
3002 
3003 /*
3004  * Try and disable all interrupts. We just assign interrupts to other
3005  * processors based on policy. If any were bound by user request, we
3006  * let them continue and return failure. We do not bother to check
3007  * for cache affinity while rebinding.
3008  */
3009 
3010 static int
3011 apic_disable_intr(processorid_t cpun)
3012 {
3013 	int bind_cpu = 0, i, hardbound = 0, iflag;
3014 	apic_irq_t *irq_ptr;
3015 
3016 	iflag = intr_clear();
3017 	lock_set(&apic_ioapic_lock);
3018 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
3019 	lock_clear(&apic_ioapic_lock);
3020 	intr_restore(iflag);
3021 	apic_cpus[cpun].aci_curipl = 0;
3022 	i = apic_min_device_irq;
3023 	for (; i <= apic_max_device_irq; i++) {
3024 		/*
3025 		 * If there are bound interrupts on this cpu, then
3026 		 * rebind them to other processors.
3027 		 */
3028 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
3029 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
3030 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
3031 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
3032 			    apic_nproc));
3033 
3034 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
3035 				hardbound = 1;
3036 				continue;
3037 			}
3038 
3039 			if (irq_ptr->airq_temp_cpu == cpun) {
3040 				do {
3041 					apic_next_bind_cpu += 2;
3042 					bind_cpu = apic_next_bind_cpu / 2;
3043 					if (bind_cpu >= apic_nproc) {
3044 						apic_next_bind_cpu = 1;
3045 						bind_cpu = 0;
3046 
3047 					}
3048 				} while (apic_rebind_all(irq_ptr, bind_cpu, 1));
3049 			}
3050 		}
3051 	}
3052 	if (hardbound) {
3053 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
3054 		    "due to user bound interrupts", cpun);
3055 		return (PSM_FAILURE);
3056 	}
3057 	else
3058 		return (PSM_SUCCESS);
3059 }
3060 
3061 static void
3062 apic_enable_intr(processorid_t cpun)
3063 {
3064 	int	i, iflag;
3065 	apic_irq_t *irq_ptr;
3066 
3067 	iflag = intr_clear();
3068 	lock_set(&apic_ioapic_lock);
3069 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
3070 	lock_clear(&apic_ioapic_lock);
3071 	intr_restore(iflag);
3072 
3073 	i = apic_min_device_irq;
3074 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
3075 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
3076 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
3077 				(void) apic_rebind_all(irq_ptr,
3078 				    irq_ptr->airq_cpu, 1);
3079 			}
3080 		}
3081 	}
3082 }
3083 
3084 /*
3085  * apic_introp_xlate() replaces apic_translate_irq() and is
3086  * called only from apic_intr_ops().  With the new ADII framework,
3087  * the priority can no longer be retrived through i_ddi_get_intrspec().
3088  * It has to be passed in from the caller.
3089  */
3090 int
3091 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type)
3092 {
3093 	char dev_type[16];
3094 	int dev_len, pci_irq, newirq, bustype, devid, busid, i;
3095 	int irqno = ispec->intrspec_vec;
3096 	ddi_acc_handle_t cfg_handle;
3097 	uchar_t ipin;
3098 	struct apic_io_intr *intrp;
3099 	iflag_t intr_flag;
3100 	APIC_HEADER	*hp;
3101 	MADT_INTERRUPT_OVERRIDE	*isop;
3102 	apic_irq_t *airqp;
3103 	int parent_is_pci_or_pciex = 0;
3104 	int child_is_pciex = 0;
3105 
3106 	DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s "
3107 	    "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type,
3108 	    irqno));
3109 
3110 	dev_len = sizeof (dev_type);
3111 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
3112 	    DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
3113 	    &dev_len) == DDI_PROP_SUCCESS) {
3114 		if ((strcmp(dev_type, "pci") == 0) ||
3115 		    (strcmp(dev_type, "pciex") == 0))
3116 			parent_is_pci_or_pciex = 1;
3117 	}
3118 
3119 	if (parent_is_pci_or_pciex && ddi_prop_get_int(DDI_DEV_T_ANY, dip,
3120 	    DDI_PROP_DONTPASS, "pcie-capid-pointer", PCI_CAP_NEXT_PTR_NULL) !=
3121 	    PCI_CAP_NEXT_PTR_NULL) {
3122 		child_is_pciex = 1;
3123 	}
3124 
3125 	if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
3126 		if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) {
3127 			airqp->airq_iflag.bustype =
3128 			    child_is_pciex ? BUS_PCIE : BUS_PCI;
3129 			return (apic_vector_to_irq[airqp->airq_vector]);
3130 		}
3131 		return (apic_setup_irq_table(dip, irqno, NULL, ispec,
3132 		    NULL, type));
3133 	}
3134 
3135 	bustype = 0;
3136 
3137 	/* check if we have already translated this irq */
3138 	mutex_enter(&airq_mutex);
3139 	newirq = apic_min_device_irq;
3140 	for (; newirq <= apic_max_device_irq; newirq++) {
3141 		airqp = apic_irq_table[newirq];
3142 		while (airqp) {
3143 			if ((airqp->airq_dip == dip) &&
3144 			    (airqp->airq_origirq == irqno) &&
3145 			    (airqp->airq_mps_intr_index != FREE_INDEX)) {
3146 
3147 				mutex_exit(&airq_mutex);
3148 				return (VIRTIRQ(newirq, airqp->airq_share_id));
3149 			}
3150 			airqp = airqp->airq_next;
3151 		}
3152 	}
3153 	mutex_exit(&airq_mutex);
3154 
3155 	if (apic_defconf)
3156 		goto defconf;
3157 
3158 	if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi))
3159 		goto nonpci;
3160 
3161 	if (parent_is_pci_or_pciex) {
3162 		/* pci device */
3163 		if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
3164 			goto nonpci;
3165 		if (busid == 0 && apic_pci_bus_total == 1)
3166 			busid = (int)apic_single_pci_busid;
3167 
3168 		if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
3169 			goto nonpci;
3170 		ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
3171 		pci_config_teardown(&cfg_handle);
3172 		if (apic_enable_acpi && !apic_use_acpi_madt_only) {
3173 			if (apic_acpi_translate_pci_irq(dip, busid, devid,
3174 			    ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
3175 				goto nonpci;
3176 
3177 			intr_flag.bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
3178 			if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL,
3179 			    ispec, &intr_flag, type)) == -1)
3180 				goto nonpci;
3181 			return (newirq);
3182 		} else {
3183 			pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
3184 			if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid))
3185 			    == NULL) {
3186 				if ((pci_irq = apic_handle_pci_pci_bridge(dip,
3187 				    devid, ipin, &intrp)) == -1)
3188 					goto nonpci;
3189 			}
3190 			if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp,
3191 			    ispec, NULL, type)) == -1)
3192 				goto nonpci;
3193 			return (newirq);
3194 		}
3195 	} else if (strcmp(dev_type, "isa") == 0)
3196 		bustype = BUS_ISA;
3197 	else if (strcmp(dev_type, "eisa") == 0)
3198 		bustype = BUS_EISA;
3199 
3200 nonpci:
3201 	if (apic_enable_acpi && !apic_use_acpi_madt_only) {
3202 		/* search iso entries first */
3203 		if (acpi_iso_cnt != 0) {
3204 			hp = (APIC_HEADER *)acpi_isop;
3205 			i = 0;
3206 			while (i < acpi_iso_cnt) {
3207 				if (hp->Type == APIC_XRUPT_OVERRIDE) {
3208 					isop = (MADT_INTERRUPT_OVERRIDE *)hp;
3209 					if (isop->Bus == 0 &&
3210 					    isop->Source == irqno) {
3211 						newirq = isop->Interrupt;
3212 						intr_flag.intr_po =
3213 						    isop->Polarity;
3214 						intr_flag.intr_el =
3215 						    isop->TriggerMode;
3216 						intr_flag.bustype = BUS_ISA;
3217 
3218 						return (apic_setup_irq_table(
3219 						    dip, newirq, NULL, ispec,
3220 						    &intr_flag, type));
3221 
3222 					}
3223 					i++;
3224 				}
3225 				hp = (APIC_HEADER *)(((char *)hp) +
3226 				    hp->Length);
3227 			}
3228 		}
3229 		intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
3230 		intr_flag.intr_el = INTR_EL_EDGE;
3231 		intr_flag.bustype = BUS_ISA;
3232 		return (apic_setup_irq_table(dip, irqno, NULL, ispec,
3233 		    &intr_flag, type));
3234 	} else {
3235 		if (bustype == 0)
3236 			bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
3237 		for (i = 0; i < 2; i++) {
3238 			if (((busid = apic_find_bus_id(bustype)) != -1) &&
3239 			    ((intrp = apic_find_io_intr_w_busid(irqno, busid))
3240 			    != NULL)) {
3241 				if ((newirq = apic_setup_irq_table(dip, irqno,
3242 				    intrp, ispec, NULL, type)) != -1) {
3243 					return (newirq);
3244 				}
3245 				goto defconf;
3246 			}
3247 			bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
3248 		}
3249 	}
3250 
3251 /* MPS default configuration */
3252 defconf:
3253 	newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type);
3254 	if (newirq == -1)
3255 		return (newirq);
3256 	ASSERT(IRQINDEX(newirq) == irqno);
3257 	ASSERT(apic_irq_table[irqno]);
3258 	return (newirq);
3259 }
3260 
3261 
3262 
3263 
3264 
3265 
3266 /*
3267  * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge
3268  * needs special handling.  We may need to chase up the device tree,
3269  * using the PCI-PCI Bridge specification's "rotating IPIN assumptions",
3270  * to find the IPIN at the root bus that relates to the IPIN on the
3271  * subsidiary bus (for ACPI or MP).  We may, however, have an entry
3272  * in the MP table or the ACPI namespace for this device itself.
3273  * We handle both cases in the search below.
3274  */
3275 /* this is the non-acpi version */
3276 static int
3277 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin,
3278 			struct apic_io_intr **intrp)
3279 {
3280 	dev_info_t *dipp, *dip;
3281 	int pci_irq;
3282 	ddi_acc_handle_t cfg_handle;
3283 	int bridge_devno, bridge_bus;
3284 	int ipin;
3285 
3286 	dip = idip;
3287 
3288 	/*CONSTCOND*/
3289 	while (1) {
3290 		if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL)
3291 			return (-1);
3292 		if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) &&
3293 		    (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) ==
3294 		    PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle,
3295 		    PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) {
3296 			pci_config_teardown(&cfg_handle);
3297 			if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno,
3298 			    NULL) != 0)
3299 				return (-1);
3300 			/*
3301 			 * This is the rotating scheme that Compaq is using
3302 			 * and documented in the pci to pci spec.  Also, if
3303 			 * the pci to pci bridge is behind another pci to
3304 			 * pci bridge, then it need to keep transversing
3305 			 * up until an interrupt entry is found or reach
3306 			 * the top of the tree
3307 			 */
3308 			ipin = (child_devno + child_ipin) % PCI_INTD;
3309 				if (bridge_bus == 0 && apic_pci_bus_total == 1)
3310 					bridge_bus = (int)apic_single_pci_busid;
3311 				pci_irq = ((bridge_devno & 0x1f) << 2) |
3312 				    (ipin & 0x3);
3313 				if ((*intrp = apic_find_io_intr_w_busid(pci_irq,
3314 				    bridge_bus)) != NULL) {
3315 					return (pci_irq);
3316 				}
3317 			dip = dipp;
3318 			child_devno = bridge_devno;
3319 			child_ipin = ipin;
3320 		} else
3321 			return (-1);
3322 	}
3323 	/*LINTED: function will not fall off the bottom */
3324 }
3325 
3326 
3327 
3328 
3329 static uchar_t
3330 acpi_find_ioapic(int irq)
3331 {
3332 	int i;
3333 
3334 	for (i = 0; i < apic_io_max; i++) {
3335 		if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i])
3336 			return (i);
3337 	}
3338 	return (0xFF);	/* shouldn't happen */
3339 }
3340 
3341 /*
3342  * See if two irqs are compatible for sharing a vector.
3343  * Currently we only support sharing of PCI devices.
3344  */
3345 static int
3346 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2)
3347 {
3348 	uint_t	level1, po1;
3349 	uint_t	level2, po2;
3350 
3351 	/* Assume active high by default */
3352 	po1 = 0;
3353 	po2 = 0;
3354 
3355 	if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI)
3356 		return (0);
3357 
3358 	if (iflag1.intr_el == INTR_EL_CONFORM)
3359 		level1 = AV_LEVEL;
3360 	else
3361 		level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0;
3362 
3363 	if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) ||
3364 	    (iflag1.intr_po == INTR_PO_CONFORM)))
3365 		po1 = AV_ACTIVE_LOW;
3366 
3367 	if (iflag2.intr_el == INTR_EL_CONFORM)
3368 		level2 = AV_LEVEL;
3369 	else
3370 		level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0;
3371 
3372 	if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) ||
3373 	    (iflag2.intr_po == INTR_PO_CONFORM)))
3374 		po2 = AV_ACTIVE_LOW;
3375 
3376 	if ((level1 == level2) && (po1 == po2))
3377 		return (1);
3378 
3379 	return (0);
3380 }
3381 
3382 /*
3383  * Attempt to share vector with someone else
3384  */
3385 static int
3386 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl,
3387 	uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp)
3388 {
3389 #ifdef DEBUG
3390 	apic_irq_t *tmpirqp = NULL;
3391 #endif /* DEBUG */
3392 	apic_irq_t *irqptr, dummyirq;
3393 	int	newirq, chosen_irq = -1, share = 127;
3394 	int	lowest, highest, i;
3395 	uchar_t	share_id;
3396 
3397 	DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x "
3398 	    "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl));
3399 
3400 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
3401 	lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL;
3402 
3403 	if (highest < lowest) /* Both ipl and ipl-1 map to same pri */
3404 		lowest -= APIC_VECTOR_PER_IPL;
3405 	dummyirq.airq_mps_intr_index = intr_index;
3406 	dummyirq.airq_ioapicindex = ioapicindex;
3407 	dummyirq.airq_intin_no = ipin;
3408 	if (intr_flagp)
3409 		dummyirq.airq_iflag = *intr_flagp;
3410 	apic_record_rdt_entry(&dummyirq, irqno);
3411 	for (i = lowest; i <= highest; i++) {
3412 		newirq = apic_vector_to_irq[i];
3413 		if (newirq == APIC_RESV_IRQ)
3414 			continue;
3415 		irqptr = apic_irq_table[newirq];
3416 
3417 		if ((dummyirq.airq_rdt_entry & 0xFF00) !=
3418 		    (irqptr->airq_rdt_entry & 0xFF00))
3419 			/* not compatible */
3420 			continue;
3421 
3422 		if (irqptr->airq_share < share) {
3423 			share = irqptr->airq_share;
3424 			chosen_irq = newirq;
3425 		}
3426 	}
3427 	if (chosen_irq != -1) {
3428 		/*
3429 		 * Assign a share id which is free or which is larger
3430 		 * than the largest one.
3431 		 */
3432 		share_id = 1;
3433 		mutex_enter(&airq_mutex);
3434 		irqptr = apic_irq_table[chosen_irq];
3435 		while (irqptr) {
3436 			if (irqptr->airq_mps_intr_index == FREE_INDEX) {
3437 				share_id = irqptr->airq_share_id;
3438 				break;
3439 			}
3440 			if (share_id <= irqptr->airq_share_id)
3441 				share_id = irqptr->airq_share_id + 1;
3442 #ifdef DEBUG
3443 			tmpirqp = irqptr;
3444 #endif /* DEBUG */
3445 			irqptr = irqptr->airq_next;
3446 		}
3447 		if (!irqptr) {
3448 			irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3449 			irqptr->airq_temp_cpu = IRQ_UNINIT;
3450 			irqptr->airq_next =
3451 			    apic_irq_table[chosen_irq]->airq_next;
3452 			apic_irq_table[chosen_irq]->airq_next = irqptr;
3453 #ifdef	DEBUG
3454 			tmpirqp = apic_irq_table[chosen_irq];
3455 #endif /* DEBUG */
3456 		}
3457 		irqptr->airq_mps_intr_index = intr_index;
3458 		irqptr->airq_ioapicindex = ioapicindex;
3459 		irqptr->airq_intin_no = ipin;
3460 		if (intr_flagp)
3461 			irqptr->airq_iflag = *intr_flagp;
3462 		irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector;
3463 		irqptr->airq_share_id = share_id;
3464 		apic_record_rdt_entry(irqptr, irqno);
3465 		*irqptrp = irqptr;
3466 #ifdef	DEBUG
3467 		/* shuffle the pointers to test apic_delspl path */
3468 		if (tmpirqp) {
3469 			tmpirqp->airq_next = irqptr->airq_next;
3470 			irqptr->airq_next = apic_irq_table[chosen_irq];
3471 			apic_irq_table[chosen_irq] = irqptr;
3472 		}
3473 #endif /* DEBUG */
3474 		mutex_exit(&airq_mutex);
3475 		return (VIRTIRQ(chosen_irq, share_id));
3476 	}
3477 	return (-1);
3478 }
3479 
3480 /*
3481  *
3482  */
3483 static int
3484 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp,
3485     struct intrspec *ispec, iflag_t *intr_flagp, int type)
3486 {
3487 	int origirq = ispec->intrspec_vec;
3488 	uchar_t ipl = ispec->intrspec_pri;
3489 	int	newirq, intr_index;
3490 	uchar_t	ipin, ioapic, ioapicindex, vector;
3491 	apic_irq_t *irqptr;
3492 	major_t	major;
3493 	dev_info_t	*sdip;
3494 
3495 	DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d "
3496 	    "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq));
3497 
3498 	ASSERT(ispec != NULL);
3499 
3500 	major =  (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
3501 
3502 	if (DDI_INTR_IS_MSI_OR_MSIX(type)) {
3503 		/* MSI/X doesn't need to setup ioapic stuffs */
3504 		ioapicindex = 0xff;
3505 		ioapic = 0xff;
3506 		ipin = (uchar_t)0xff;
3507 		intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX :
3508 		    MSIX_INDEX;
3509 		mutex_enter(&airq_mutex);
3510 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) {
3511 			mutex_exit(&airq_mutex);
3512 			/* need an irq for MSI/X to index into autovect[] */
3513 			cmn_err(CE_WARN, "No interrupt irq: %s instance %d",
3514 			    ddi_get_name(dip), ddi_get_instance(dip));
3515 			return (-1);
3516 		}
3517 		mutex_exit(&airq_mutex);
3518 
3519 	} else if (intrp != NULL) {
3520 		intr_index = (int)(intrp - apic_io_intrp);
3521 		ioapic = intrp->intr_destid;
3522 		ipin = intrp->intr_destintin;
3523 		/* Find ioapicindex. If destid was ALL, we will exit with 0. */
3524 		for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
3525 			if (apic_io_id[ioapicindex] == ioapic)
3526 				break;
3527 		ASSERT((ioapic == apic_io_id[ioapicindex]) ||
3528 		    (ioapic == INTR_ALL_APIC));
3529 
3530 		/* check whether this intin# has been used by another irqno */
3531 		if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) {
3532 			return (newirq);
3533 		}
3534 
3535 	} else if (intr_flagp != NULL) {
3536 		/* ACPI case */
3537 		intr_index = ACPI_INDEX;
3538 		ioapicindex = acpi_find_ioapic(irqno);
3539 		ASSERT(ioapicindex != 0xFF);
3540 		ioapic = apic_io_id[ioapicindex];
3541 		ipin = irqno - apic_io_vectbase[ioapicindex];
3542 		if (apic_irq_table[irqno] &&
3543 		    apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
3544 			ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
3545 			    apic_irq_table[irqno]->airq_ioapicindex ==
3546 			    ioapicindex);
3547 			return (irqno);
3548 		}
3549 
3550 	} else {
3551 		/* default configuration */
3552 		ioapicindex = 0;
3553 		ioapic = apic_io_id[ioapicindex];
3554 		ipin = (uchar_t)irqno;
3555 		intr_index = DEFAULT_INDEX;
3556 	}
3557 
3558 	if (ispec == NULL) {
3559 		APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n",
3560 		    irqno));
3561 	} else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) {
3562 		if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index,
3563 		    ipl, ioapicindex, ipin, &irqptr)) != -1) {
3564 			irqptr->airq_ipl = ipl;
3565 			irqptr->airq_origirq = (uchar_t)origirq;
3566 			irqptr->airq_dip = dip;
3567 			irqptr->airq_major = major;
3568 			sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip;
3569 			/* This is OK to do really */
3570 			if (sdip == NULL) {
3571 				cmn_err(CE_WARN, "Sharing vectors: %s"
3572 				    " instance %d and SCI",
3573 				    ddi_get_name(dip), ddi_get_instance(dip));
3574 			} else {
3575 				cmn_err(CE_WARN, "Sharing vectors: %s"
3576 				    " instance %d and %s instance %d",
3577 				    ddi_get_name(sdip), ddi_get_instance(sdip),
3578 				    ddi_get_name(dip), ddi_get_instance(dip));
3579 			}
3580 			return (newirq);
3581 		}
3582 		/* try high priority allocation now  that share has failed */
3583 		if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) {
3584 			cmn_err(CE_WARN, "No interrupt vector: %s instance %d",
3585 			    ddi_get_name(dip), ddi_get_instance(dip));
3586 			return (-1);
3587 		}
3588 	}
3589 
3590 	mutex_enter(&airq_mutex);
3591 	if (apic_irq_table[irqno] == NULL) {
3592 		irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
3593 		irqptr->airq_temp_cpu = IRQ_UNINIT;
3594 		apic_irq_table[irqno] = irqptr;
3595 	} else {
3596 		irqptr = apic_irq_table[irqno];
3597 		if (irqptr->airq_mps_intr_index != FREE_INDEX) {
3598 			/*
3599 			 * The slot is used by another irqno, so allocate
3600 			 * a free irqno for this interrupt
3601 			 */
3602 			newirq = apic_allocate_irq(apic_first_avail_irq);
3603 			if (newirq == -1) {
3604 				mutex_exit(&airq_mutex);
3605 				return (-1);
3606 			}
3607 			irqno = newirq;
3608 			irqptr = apic_irq_table[irqno];
3609 			if (irqptr == NULL) {
3610 				irqptr = kmem_zalloc(sizeof (apic_irq_t),
3611 				    KM_SLEEP);
3612 				irqptr->airq_temp_cpu = IRQ_UNINIT;
3613 				apic_irq_table[irqno] = irqptr;
3614 			}
3615 			apic_modify_vector(vector, newirq);
3616 		}
3617 	}
3618 	apic_max_device_irq = max(irqno, apic_max_device_irq);
3619 	apic_min_device_irq = min(irqno, apic_min_device_irq);
3620 	mutex_exit(&airq_mutex);
3621 	irqptr->airq_ioapicindex = ioapicindex;
3622 	irqptr->airq_intin_no = ipin;
3623 	irqptr->airq_ipl = ipl;
3624 	irqptr->airq_vector = vector;
3625 	irqptr->airq_origirq = (uchar_t)origirq;
3626 	irqptr->airq_share_id = 0;
3627 	irqptr->airq_mps_intr_index = (short)intr_index;
3628 	irqptr->airq_dip = dip;
3629 	irqptr->airq_major = major;
3630 	irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin);
3631 	if (intr_flagp)
3632 		irqptr->airq_iflag = *intr_flagp;
3633 
3634 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {
3635 		/* setup I/O APIC entry for non-MSI/X interrupts */
3636 		apic_record_rdt_entry(irqptr, irqno);
3637 	}
3638 	return (irqno);
3639 }
3640 
3641 /*
3642  * return the cpu to which this intr should be bound.
3643  * Check properties or any other mechanism to see if user wants it
3644  * bound to a specific CPU. If so, return the cpu id with high bit set.
3645  * If not, use the policy to choose a cpu and return the id.
3646  */
3647 uchar_t
3648 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
3649 {
3650 	int	instance, instno, prop_len, bind_cpu, count;
3651 	uint_t	i, rc;
3652 	uchar_t	cpu;
3653 	major_t	major;
3654 	char	*name, *drv_name, *prop_val, *cptr;
3655 	char	prop_name[32];
3656 
3657 
3658 	if (apic_intr_policy == INTR_LOWEST_PRIORITY)
3659 		return (IRQ_UNBOUND);
3660 
3661 	drv_name = NULL;
3662 	rc = DDI_PROP_NOT_FOUND;
3663 	major = (major_t)-1;
3664 	if (dip != NULL) {
3665 		name = ddi_get_name(dip);
3666 		major = ddi_name_to_major(name);
3667 		drv_name = ddi_major_to_name(major);
3668 		instance = ddi_get_instance(dip);
3669 		if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
3670 			i = apic_min_device_irq;
3671 			for (; i <= apic_max_device_irq; i++) {
3672 
3673 				if ((i == irq) || (apic_irq_table[i] == NULL) ||
3674 				    (apic_irq_table[i]->airq_mps_intr_index
3675 				    == FREE_INDEX))
3676 					continue;
3677 
3678 				if ((apic_irq_table[i]->airq_major == major) &&
3679 				    (!(apic_irq_table[i]->airq_cpu &
3680 				    IRQ_USER_BOUND))) {
3681 
3682 					cpu = apic_irq_table[i]->airq_cpu;
3683 
3684 					cmn_err(CE_CONT,
3685 					    "!pcplusmp: %s (%s) instance #%d "
3686 					    "vector 0x%x ioapic 0x%x "
3687 					    "intin 0x%x is bound to cpu %d\n",
3688 					    name, drv_name, instance, irq,
3689 					    ioapicid, intin, cpu);
3690 					return (cpu);
3691 				}
3692 			}
3693 		}
3694 		/*
3695 		 * search for "drvname"_intpt_bind_cpus property first, the
3696 		 * syntax of the property should be "a[,b,c,...]" where
3697 		 * instance 0 binds to cpu a, instance 1 binds to cpu b,
3698 		 * instance 3 binds to cpu c...
3699 		 * ddi_getlongprop() will search /option first, then /
3700 		 * if "drvname"_intpt_bind_cpus doesn't exist, then find
3701 		 * intpt_bind_cpus property.  The syntax is the same, and
3702 		 * it applies to all the devices if its "drvname" specific
3703 		 * property doesn't exist
3704 		 */
3705 		(void) strcpy(prop_name, drv_name);
3706 		(void) strcat(prop_name, "_intpt_bind_cpus");
3707 		rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
3708 		    (caddr_t)&prop_val, &prop_len);
3709 		if (rc != DDI_PROP_SUCCESS) {
3710 			rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
3711 			    "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
3712 		}
3713 	}
3714 	if (rc == DDI_PROP_SUCCESS) {
3715 		for (i = count = 0; i < (prop_len - 1); i++)
3716 			if (prop_val[i] == ',')
3717 				count++;
3718 		if (prop_val[i-1] != ',')
3719 			count++;
3720 		/*
3721 		 * if somehow the binding instances defined in the
3722 		 * property are not enough for this instno., then
3723 		 * reuse the pattern for the next instance until
3724 		 * it reaches the requested instno
3725 		 */
3726 		instno = instance % count;
3727 		i = 0;
3728 		cptr = prop_val;
3729 		while (i < instno)
3730 			if (*cptr++ == ',')
3731 				i++;
3732 		bind_cpu = stoi(&cptr);
3733 		kmem_free(prop_val, prop_len);
3734 		/* if specific cpu is bogus, then default to cpu 0 */
3735 		if (bind_cpu >= apic_nproc) {
3736 			cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present",
3737 			    prop_name, prop_val, bind_cpu);
3738 			bind_cpu = 0;
3739 		} else {
3740 			/* indicate that we are bound at user request */
3741 			bind_cpu |= IRQ_USER_BOUND;
3742 		}
3743 		/*
3744 		 * no need to check apic_cpus[].aci_status, if specific cpu is
3745 		 * not up, then post_cpu_start will handle it.
3746 		 */
3747 	} else {
3748 		bind_cpu = apic_next_bind_cpu++;
3749 		if (bind_cpu >= apic_nproc) {
3750 			apic_next_bind_cpu = 1;
3751 			bind_cpu = 0;
3752 		}
3753 	}
3754 	if (drv_name != NULL)
3755 		cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d "
3756 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
3757 		    name, drv_name, instance,
3758 		    irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND);
3759 	else
3760 		cmn_err(CE_CONT, "!pcplusmp: "
3761 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
3762 		    irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND);
3763 
3764 	return ((uchar_t)bind_cpu);
3765 }
3766 
3767 static struct apic_io_intr *
3768 apic_find_io_intr_w_busid(int irqno, int busid)
3769 {
3770 	struct	apic_io_intr	*intrp;
3771 
3772 	/*
3773 	 * It can have more than 1 entry with same source bus IRQ,
3774 	 * but unique with the source bus id
3775 	 */
3776 	intrp = apic_io_intrp;
3777 	if (intrp != NULL) {
3778 		while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
3779 			if (intrp->intr_irq == irqno &&
3780 			    intrp->intr_busid == busid &&
3781 			    intrp->intr_type == IO_INTR_INT)
3782 				return (intrp);
3783 			intrp++;
3784 		}
3785 	}
3786 	APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:"
3787 	    "busid %x:%x\n", irqno, busid));
3788 	return ((struct apic_io_intr *)NULL);
3789 }
3790 
3791 
3792 struct mps_bus_info {
3793 	char	*bus_name;
3794 	int	bus_id;
3795 } bus_info_array[] = {
3796 	"ISA ", BUS_ISA,
3797 	"PCI ", BUS_PCI,
3798 	"EISA ", BUS_EISA,
3799 	"XPRESS", BUS_XPRESS,
3800 	"PCMCIA", BUS_PCMCIA,
3801 	"VL ", BUS_VL,
3802 	"CBUS ", BUS_CBUS,
3803 	"CBUSII", BUS_CBUSII,
3804 	"FUTURE", BUS_FUTURE,
3805 	"INTERN", BUS_INTERN,
3806 	"MBI ", BUS_MBI,
3807 	"MBII ", BUS_MBII,
3808 	"MPI ", BUS_MPI,
3809 	"MPSA ", BUS_MPSA,
3810 	"NUBUS ", BUS_NUBUS,
3811 	"TC ", BUS_TC,
3812 	"VME ", BUS_VME,
3813 	"PCI-E ", BUS_PCIE
3814 };
3815 
3816 static int
3817 apic_find_bus_type(char *bus)
3818 {
3819 	int	i = 0;
3820 
3821 	for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++)
3822 		if (strncmp(bus, bus_info_array[i].bus_name,
3823 		    strlen(bus_info_array[i].bus_name)) == 0)
3824 			return (bus_info_array[i].bus_id);
3825 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus));
3826 	return (0);
3827 }
3828 
3829 static int
3830 apic_find_bus(int busid)
3831 {
3832 	struct	apic_bus	*busp;
3833 
3834 	busp = apic_busp;
3835 	while (busp->bus_entry == APIC_BUS_ENTRY) {
3836 		if (busp->bus_id == busid)
3837 			return (apic_find_bus_type((char *)&busp->bus_str1));
3838 		busp++;
3839 	}
3840 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid));
3841 	return (0);
3842 }
3843 
3844 static int
3845 apic_find_bus_id(int bustype)
3846 {
3847 	struct	apic_bus	*busp;
3848 
3849 	busp = apic_busp;
3850 	while (busp->bus_entry == APIC_BUS_ENTRY) {
3851 		if (apic_find_bus_type((char *)&busp->bus_str1) == bustype)
3852 			return (busp->bus_id);
3853 		busp++;
3854 	}
3855 	APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x",
3856 	    bustype));
3857 	return (-1);
3858 }
3859 
3860 /*
3861  * Check if a particular irq need to be reserved for any io_intr
3862  */
3863 static struct apic_io_intr *
3864 apic_find_io_intr(int irqno)
3865 {
3866 	struct	apic_io_intr	*intrp;
3867 
3868 	intrp = apic_io_intrp;
3869 	if (intrp != NULL) {
3870 		while (intrp->intr_entry == APIC_IO_INTR_ENTRY) {
3871 			if (intrp->intr_irq == irqno &&
3872 			    intrp->intr_type == IO_INTR_INT)
3873 				return (intrp);
3874 			intrp++;
3875 		}
3876 	}
3877 	return ((struct apic_io_intr *)NULL);
3878 }
3879 
3880 /*
3881  * Check if the given ioapicindex intin combination has already been assigned
3882  * an irq. If so return irqno. Else -1
3883  */
3884 static int
3885 apic_find_intin(uchar_t ioapic, uchar_t intin)
3886 {
3887 	apic_irq_t *irqptr;
3888 	int	i;
3889 
3890 	/* find ioapic and intin in the apic_irq_table[] and return the index */
3891 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
3892 		irqptr = apic_irq_table[i];
3893 		while (irqptr) {
3894 			if ((irqptr->airq_mps_intr_index >= 0) &&
3895 			    (irqptr->airq_intin_no == intin) &&
3896 			    (irqptr->airq_ioapicindex == ioapic)) {
3897 				APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq "
3898 				    "entry for ioapic:intin %x:%x "
3899 				    "shared interrupts ?", ioapic, intin));
3900 				return (i);
3901 			}
3902 			irqptr = irqptr->airq_next;
3903 		}
3904 	}
3905 	return (-1);
3906 }
3907 
3908 int
3909 apic_allocate_irq(int irq)
3910 {
3911 	int	freeirq, i;
3912 
3913 	if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1)
3914 		if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ,
3915 		    (irq - 1))) == -1) {
3916 			/*
3917 			 * if BIOS really defines every single irq in the mps
3918 			 * table, then don't worry about conflicting with
3919 			 * them, just use any free slot in apic_irq_table
3920 			 */
3921 			for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
3922 				if ((apic_irq_table[i] == NULL) ||
3923 				    apic_irq_table[i]->airq_mps_intr_index ==
3924 				    FREE_INDEX) {
3925 				freeirq = i;
3926 				break;
3927 			}
3928 		}
3929 		if (freeirq == -1) {
3930 			/* This shouldn't happen, but just in case */
3931 			cmn_err(CE_WARN, "pcplusmp: NO available IRQ");
3932 			return (-1);
3933 		}
3934 	}
3935 	if (apic_irq_table[freeirq] == NULL) {
3936 		apic_irq_table[freeirq] =
3937 		    kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
3938 		if (apic_irq_table[freeirq] == NULL) {
3939 			cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ");
3940 			return (-1);
3941 		}
3942 		apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX;
3943 	}
3944 	return (freeirq);
3945 }
3946 
3947 static int
3948 apic_find_free_irq(int start, int end)
3949 {
3950 	int	i;
3951 
3952 	for (i = start; i <= end; i++)
3953 		/* Check if any I/O entry needs this IRQ */
3954 		if (apic_find_io_intr(i) == NULL) {
3955 			/* Then see if it is free */
3956 			if ((apic_irq_table[i] == NULL) ||
3957 			    (apic_irq_table[i]->airq_mps_intr_index ==
3958 			    FREE_INDEX)) {
3959 				return (i);
3960 			}
3961 		}
3962 	return (-1);
3963 }
3964 
3965 /*
3966  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
3967  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
3968  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
3969  * requests and allocated only when pri is set.
3970  */
3971 static uchar_t
3972 apic_allocate_vector(int ipl, int irq, int pri)
3973 {
3974 	int	lowest, highest, i;
3975 
3976 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
3977 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
3978 
3979 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
3980 		lowest -= APIC_VECTOR_PER_IPL;
3981 
3982 #ifdef	DEBUG
3983 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
3984 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
3985 #endif /* DEBUG */
3986 	if (pri == 0)
3987 		highest -= APIC_HI_PRI_VECTS;
3988 
3989 	for (i = lowest; i < highest; i++) {
3990 		if (APIC_CHECK_RESERVE_VECTORS(i))
3991 			continue;
3992 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
3993 			apic_vector_to_irq[i] = (uchar_t)irq;
3994 			return (i);
3995 		}
3996 	}
3997 
3998 	return (0);
3999 }
4000 
4001 static void
4002 apic_modify_vector(uchar_t vector, int irq)
4003 {
4004 	apic_vector_to_irq[vector] = (uchar_t)irq;
4005 }
4006 
4007 /*
4008  * Mark vector as being in the process of being deleted. Interrupts
4009  * may still come in on some CPU. The moment an interrupt comes with
4010  * the new vector, we know we can free the old one. Called only from
4011  * addspl and delspl with interrupts disabled. Because an interrupt
4012  * can be shared, but no interrupt from either device may come in,
4013  * we also use a timeout mechanism, which we arbitrarily set to
4014  * apic_revector_timeout microseconds.
4015  */
4016 static void
4017 apic_mark_vector(uchar_t oldvector, uchar_t newvector)
4018 {
4019 	int iflag = intr_clear();
4020 	lock_set(&apic_revector_lock);
4021 	if (!apic_oldvec_to_newvec) {
4022 		apic_oldvec_to_newvec =
4023 		    kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2,
4024 		    KM_NOSLEEP);
4025 
4026 		if (!apic_oldvec_to_newvec) {
4027 			/*
4028 			 * This failure is not catastrophic.
4029 			 * But, the oldvec will never be freed.
4030 			 */
4031 			apic_error |= APIC_ERR_MARK_VECTOR_FAIL;
4032 			lock_clear(&apic_revector_lock);
4033 			intr_restore(iflag);
4034 			return;
4035 		}
4036 		apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR];
4037 	}
4038 
4039 	/* See if we already did this for drivers which do double addintrs */
4040 	if (apic_oldvec_to_newvec[oldvector] != newvector) {
4041 		apic_oldvec_to_newvec[oldvector] = newvector;
4042 		apic_newvec_to_oldvec[newvector] = oldvector;
4043 		apic_revector_pending++;
4044 	}
4045 	lock_clear(&apic_revector_lock);
4046 	intr_restore(iflag);
4047 	(void) timeout(apic_xlate_vector_free_timeout_handler,
4048 	    (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout));
4049 }
4050 
4051 /*
4052  * xlate_vector is called from intr_enter if revector_pending is set.
4053  * It will xlate it if needed and mark the old vector as free.
4054  */
4055 static uchar_t
4056 apic_xlate_vector(uchar_t vector)
4057 {
4058 	uchar_t	newvector, oldvector = 0;
4059 
4060 	lock_set(&apic_revector_lock);
4061 	/* Do we really need to do this ? */
4062 	if (!apic_revector_pending) {
4063 		lock_clear(&apic_revector_lock);
4064 		return (vector);
4065 	}
4066 	if ((newvector = apic_oldvec_to_newvec[vector]) != 0)
4067 		oldvector = vector;
4068 	else {
4069 		/*
4070 		 * The incoming vector is new . See if a stale entry is
4071 		 * remaining
4072 		 */
4073 		if ((oldvector = apic_newvec_to_oldvec[vector]) != 0)
4074 			newvector = vector;
4075 	}
4076 
4077 	if (oldvector) {
4078 		apic_revector_pending--;
4079 		apic_oldvec_to_newvec[oldvector] = 0;
4080 		apic_newvec_to_oldvec[newvector] = 0;
4081 		apic_free_vector(oldvector);
4082 		lock_clear(&apic_revector_lock);
4083 		/* There could have been more than one reprogramming! */
4084 		return (apic_xlate_vector(newvector));
4085 	}
4086 	lock_clear(&apic_revector_lock);
4087 	return (vector);
4088 }
4089 
4090 void
4091 apic_xlate_vector_free_timeout_handler(void *arg)
4092 {
4093 	int iflag;
4094 	uchar_t oldvector, newvector;
4095 
4096 	oldvector = (uchar_t)(uintptr_t)arg;
4097 	iflag = intr_clear();
4098 	lock_set(&apic_revector_lock);
4099 	if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) {
4100 		apic_free_vector(oldvector);
4101 		apic_oldvec_to_newvec[oldvector] = 0;
4102 		apic_newvec_to_oldvec[newvector] = 0;
4103 		apic_revector_pending--;
4104 	}
4105 
4106 	lock_clear(&apic_revector_lock);
4107 	intr_restore(iflag);
4108 }
4109 
4110 
4111 /* Mark vector as not being used by any irq */
4112 static void
4113 apic_free_vector(uchar_t vector)
4114 {
4115 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
4116 }
4117 
4118 /*
4119  * compute the polarity, trigger mode and vector for programming into
4120  * the I/O apic and record in airq_rdt_entry.
4121  */
4122 static void
4123 apic_record_rdt_entry(apic_irq_t *irqptr, int irq)
4124 {
4125 	int	ioapicindex, bus_type, vector;
4126 	short	intr_index;
4127 	uint_t	level, po, io_po;
4128 	struct apic_io_intr *iointrp;
4129 
4130 	intr_index = irqptr->airq_mps_intr_index;
4131 	DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d "
4132 	    "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq,
4133 	    (void *)irqptr->airq_dip, irqptr->airq_vector));
4134 
4135 	if (intr_index == RESERVE_INDEX) {
4136 		apic_error |= APIC_ERR_INVALID_INDEX;
4137 		return;
4138 	} else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) {
4139 		return;
4140 	}
4141 
4142 	vector = irqptr->airq_vector;
4143 	ioapicindex = irqptr->airq_ioapicindex;
4144 	/* Assume edge triggered by default */
4145 	level = 0;
4146 	/* Assume active high by default */
4147 	po = 0;
4148 
4149 	if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) {
4150 		ASSERT(irq < 16);
4151 		if (eisa_level_intr_mask & (1 << irq))
4152 			level = AV_LEVEL;
4153 		if (intr_index == FREE_INDEX && apic_defconf == 0)
4154 			apic_error |= APIC_ERR_INVALID_INDEX;
4155 	} else if (intr_index == ACPI_INDEX) {
4156 		bus_type = irqptr->airq_iflag.bustype;
4157 		if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) {
4158 			if (bus_type == BUS_PCI)
4159 				level = AV_LEVEL;
4160 		} else
4161 			level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ?
4162 			    AV_LEVEL : 0;
4163 		if (level &&
4164 		    ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) ||
4165 		    (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM &&
4166 		    bus_type == BUS_PCI)))
4167 			po = AV_ACTIVE_LOW;
4168 	} else {
4169 		iointrp = apic_io_intrp + intr_index;
4170 		bus_type = apic_find_bus(iointrp->intr_busid);
4171 		if (iointrp->intr_el == INTR_EL_CONFORM) {
4172 			if ((irq < 16) && (eisa_level_intr_mask & (1 << irq)))
4173 				level = AV_LEVEL;
4174 			else if (bus_type == BUS_PCI)
4175 				level = AV_LEVEL;
4176 		} else
4177 			level = (iointrp->intr_el == INTR_EL_LEVEL) ?
4178 			    AV_LEVEL : 0;
4179 		if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) ||
4180 		    (iointrp->intr_po == INTR_PO_CONFORM &&
4181 		    bus_type == BUS_PCI)))
4182 			po = AV_ACTIVE_LOW;
4183 	}
4184 	if (level)
4185 		apic_level_intr[irq] = 1;
4186 	/*
4187 	 * The 82489DX External APIC cannot do active low polarity interrupts.
4188 	 */
4189 	if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX))
4190 		io_po = po;
4191 	else
4192 		io_po = 0;
4193 
4194 	if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG)
4195 		printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n",
4196 		    ioapicindex, irqptr->airq_intin_no, level, io_po, vector);
4197 
4198 	irqptr->airq_rdt_entry = level|io_po|vector;
4199 }
4200 
4201 /*
4202  * Call rebind to do the actual programming.
4203  */
4204 static int
4205 apic_setup_io_intr(apic_irq_t *irqptr, int irq)
4206 {
4207 	int rv;
4208 
4209 	if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1,
4210 	    IMMEDIATE))
4211 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
4212 		rv = apic_rebind(irqptr, 0, 1, IMMEDIATE);
4213 
4214 	return (rv);
4215 }
4216 
4217 /*
4218  * Deferred reprogramming: Call apic_rebind to do the real work.
4219  */
4220 static int
4221 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq)
4222 {
4223 	int rv;
4224 
4225 	if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1,
4226 	    DEFERRED))
4227 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
4228 		rv = apic_rebind(irqptr, 0, 1, DEFERRED);
4229 
4230 	return (rv);
4231 }
4232 
4233 /*
4234  * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock
4235  * if false (0) means lock is already held (e.g: in rebind_all).
4236  */
4237 static int
4238 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when)
4239 {
4240 	int			intin_no;
4241 	volatile int32_t	*ioapic;
4242 	uchar_t			airq_temp_cpu;
4243 	apic_cpus_info_t	*cpu_infop;
4244 	int			iflag;
4245 	int		which_irq = apic_vector_to_irq[irq_ptr->airq_vector];
4246 	boolean_t		restore_iflag = B_TRUE;
4247 
4248 	intin_no = irq_ptr->airq_intin_no;
4249 	ioapic = apicioadr[irq_ptr->airq_ioapicindex];
4250 	airq_temp_cpu = irq_ptr->airq_temp_cpu;
4251 	if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) {
4252 		if (airq_temp_cpu & IRQ_USER_BOUND)
4253 			/* Mask off high bit so it can be used as array index */
4254 			airq_temp_cpu &= ~IRQ_USER_BOUND;
4255 
4256 		ASSERT(airq_temp_cpu < apic_nproc);
4257 	}
4258 
4259 	iflag = intr_clear();
4260 
4261 	if (acquire_lock)
4262 		lock_set(&apic_ioapic_lock);
4263 
4264 	/*
4265 	 * Can't bind to a CPU that's not online:
4266 	 */
4267 	cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND];
4268 	if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) {
4269 
4270 		if (acquire_lock)
4271 			lock_clear(&apic_ioapic_lock);
4272 
4273 		intr_restore(iflag);
4274 		return (1);
4275 	}
4276 
4277 	/*
4278 	 * If this is a deferred reprogramming attempt, ensure we have
4279 	 * not been passed stale data:
4280 	 */
4281 	if ((when == DEFERRED) &&
4282 	    (apic_reprogram_info[which_irq].valid == 0)) {
4283 		/* stale info, so just return */
4284 		if (acquire_lock)
4285 			lock_clear(&apic_ioapic_lock);
4286 
4287 		intr_restore(iflag);
4288 		return (0);
4289 	}
4290 
4291 	/*
4292 	 * If this interrupt has been delivered to a CPU and that CPU
4293 	 * has not handled it yet, we cannot reprogram the IOAPIC now:
4294 	 */
4295 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) &&
4296 	    apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu,
4297 	    ioapic, intin_no, which_irq, iflag, &restore_iflag) != 0) {
4298 
4299 		if (acquire_lock)
4300 			lock_clear(&apic_ioapic_lock);
4301 
4302 		if (restore_iflag)
4303 			intr_restore(iflag);
4304 		return (0);
4305 	}
4306 
4307 	/*
4308 	 * NOTE: We do not unmask the RDT here, as an interrupt MAY still
4309 	 * come in before we have a chance to reprogram it below.  The
4310 	 * reprogramming below will simultaneously change and unmask the
4311 	 * RDT entry.
4312 	 */
4313 
4314 	if ((uchar_t)bind_cpu == IRQ_UNBOUND) {
4315 		/* Write the RDT entry -- no specific CPU binding */
4316 		WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL);
4317 
4318 		if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND)
4319 			apic_cpus[airq_temp_cpu].aci_temp_bound--;
4320 
4321 		/* Write the vector, trigger, and polarity portion of the RDT */
4322 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4323 		    AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry);
4324 		if (acquire_lock)
4325 			lock_clear(&apic_ioapic_lock);
4326 		irq_ptr->airq_temp_cpu = IRQ_UNBOUND;
4327 		intr_restore(iflag);
4328 		return (0);
4329 	}
4330 
4331 	if (bind_cpu & IRQ_USER_BOUND) {
4332 		cpu_infop->aci_bound++;
4333 	} else {
4334 		cpu_infop->aci_temp_bound++;
4335 	}
4336 	ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
4337 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
4338 		/* Write the RDT entry -- bind to a specific CPU: */
4339 		WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no,
4340 		    cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET);
4341 	}
4342 	if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) {
4343 		apic_cpus[airq_temp_cpu].aci_temp_bound--;
4344 	}
4345 	if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) {
4346 		/* Write the vector, trigger, and polarity portion of the RDT */
4347 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4348 		    AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry);
4349 	} else {
4350 		int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ?
4351 		    DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX;
4352 		(void) apic_pci_msi_disable_mode(irq_ptr->airq_dip, type,
4353 		    irq_ptr->airq_ioapicindex);
4354 		if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) {
4355 			/* first one */
4356 			DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call "
4357 			    "apic_pci_msi_enable_vector\n"));
4358 			if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type,
4359 			    which_irq, irq_ptr->airq_vector,
4360 			    irq_ptr->airq_intin_no,
4361 			    cpu_infop->aci_local_id) != PSM_SUCCESS) {
4362 				cmn_err(CE_WARN, "pcplusmp: "
4363 					"apic_pci_msi_enable_vector "
4364 					"returned PSM_FAILURE");
4365 			}
4366 		}
4367 		if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) ==
4368 		    irq_ptr->airq_origirq) { /* last one */
4369 			DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call "
4370 			    "pci_msi_enable_mode\n"));
4371 			if (apic_pci_msi_enable_mode(irq_ptr->airq_dip,
4372 			    type, which_irq) != PSM_SUCCESS) {
4373 				DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: "
4374 				    "pci_msi_enable failed\n"));
4375 				(void) apic_pci_msi_unconfigure(
4376 				    irq_ptr->airq_dip, type, which_irq);
4377 			}
4378 		}
4379 	}
4380 	if (acquire_lock)
4381 		lock_clear(&apic_ioapic_lock);
4382 	irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu;
4383 	apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND));
4384 	intr_restore(iflag);
4385 	return (0);
4386 }
4387 
4388 /*
4389  * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR
4390  * bit set.  Sets up a timeout to perform the reprogramming at a later time
4391  * if it cannot wait for the Remote IRR bit to clear (or if waiting did not
4392  * result in the bit's clearing).
4393  *
4394  * This function will mask the RDT entry if the Remote IRR bit is set.
4395  *
4396  * Returns non-zero if the caller should defer IOAPIC reprogramming.
4397  */
4398 static int
4399 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu,
4400     int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq,
4401     int iflag, boolean_t *intr_restorep)
4402 {
4403 	int32_t			rdt_entry;
4404 	int			waited;
4405 
4406 	/* Mask the RDT entry, but only if it's a level-triggered interrupt */
4407 	rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no);
4408 	if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) {
4409 
4410 		/* Mask it */
4411 		WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no,
4412 		    AV_MASK | rdt_entry);
4413 	}
4414 
4415 	/*
4416 	 * Wait for the delivery pending bit to clear.
4417 	 */
4418 	if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4419 	    (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) {
4420 
4421 		/*
4422 		 * If we're still waiting on the delivery of this interrupt,
4423 		 * continue to wait here until it is delivered (this should be
4424 		 * a very small amount of time, but include a timeout just in
4425 		 * case).
4426 		 */
4427 		for (waited = 0; waited < apic_max_usecs_clear_pending;
4428 		    waited += APIC_USECS_PER_WAIT_INTERVAL) {
4429 			if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no)
4430 			    & AV_PENDING) == 0) {
4431 				break;
4432 			}
4433 			drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL);
4434 		}
4435 
4436 		if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4437 		    AV_PENDING) != 0) {
4438 			cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not "
4439 			    "deliver interrupt to local APIC within "
4440 			    "%d usecs.", irq_ptr->airq_ioapicindex,
4441 			    irq_ptr->airq_intin_no,
4442 			    apic_max_usecs_clear_pending);
4443 		}
4444 	}
4445 
4446 	/*
4447 	 * If the remote IRR bit is set, then the interrupt has been sent
4448 	 * to a CPU for processing.  We have no choice but to wait for
4449 	 * that CPU to process the interrupt, at which point the remote IRR
4450 	 * bit will be cleared.
4451 	 */
4452 	if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) &
4453 	    (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) {
4454 
4455 		/*
4456 		 * If the CPU that this RDT is bound to is NOT the current
4457 		 * CPU, wait until that CPU handles the interrupt and ACKs
4458 		 * it.  If this interrupt is not bound to any CPU (that is,
4459 		 * if it's bound to the logical destination of "anyone"), it
4460 		 * may have been delivered to the current CPU so handle that
4461 		 * case by deferring the reprogramming (below).
4462 		 */
4463 		kpreempt_disable();
4464 		if ((old_bind_cpu != IRQ_UNBOUND) &&
4465 		    (old_bind_cpu != IRQ_UNINIT) &&
4466 		    (old_bind_cpu != psm_get_cpu_id())) {
4467 			for (waited = 0; waited < apic_max_usecs_clear_pending;
4468 			    waited += APIC_USECS_PER_WAIT_INTERVAL) {
4469 				if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4470 				    intin_no) & AV_REMOTE_IRR) == 0) {
4471 
4472 					/* Clear the reprogramming state: */
4473 					lock_set(&apic_ioapic_reprogram_lock);
4474 
4475 					apic_reprogram_info[which_irq].valid
4476 					    = 0;
4477 					apic_reprogram_info[which_irq].bindcpu
4478 					    = 0;
4479 					apic_reprogram_info[which_irq].timeouts
4480 					    = 0;
4481 
4482 					lock_clear(&apic_ioapic_reprogram_lock);
4483 
4484 					/* Remote IRR has cleared! */
4485 					kpreempt_enable();
4486 					return (0);
4487 				}
4488 				drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL);
4489 			}
4490 		}
4491 		kpreempt_enable();
4492 
4493 		/*
4494 		 * If we waited and the Remote IRR bit is still not cleared,
4495 		 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
4496 		 * times for this interrupt, try the last-ditch workarounds:
4497 		 */
4498 		if (apic_reprogram_info[which_irq].timeouts >=
4499 		    APIC_REPROGRAM_MAX_TIMEOUTS) {
4500 
4501 			if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no)
4502 			    & AV_REMOTE_IRR) != 0) {
4503 				/*
4504 				 * Trying to clear the bit through normal
4505 				 * channels has failed.  So as a last-ditch
4506 				 * effort, try to set the trigger mode to
4507 				 * edge, then to level.  This has been
4508 				 * observed to work on many systems.
4509 				 */
4510 				WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4511 				    intin_no,
4512 				    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4513 				    intin_no) & ~AV_LEVEL);
4514 
4515 				WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4516 				    intin_no,
4517 				    READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4518 				    intin_no) | AV_LEVEL);
4519 
4520 				/*
4521 				 * If the bit's STILL set, declare total and
4522 				 * utter failure
4523 				 */
4524 				if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic,
4525 				    intin_no) & AV_REMOTE_IRR) != 0) {
4526 					cmn_err(CE_WARN, "!IOAPIC %d intin %d: "
4527 					    "Remote IRR failed to reset "
4528 					    "within %d usecs.  Interrupts to "
4529 					    "this pin may cease to function.",
4530 					    irq_ptr->airq_ioapicindex,
4531 					    irq_ptr->airq_intin_no,
4532 					    apic_max_usecs_clear_pending);
4533 				}
4534 			}
4535 			/* Clear the reprogramming state: */
4536 			lock_set(&apic_ioapic_reprogram_lock);
4537 
4538 			apic_reprogram_info[which_irq].valid = 0;
4539 			apic_reprogram_info[which_irq].bindcpu = 0;
4540 			apic_reprogram_info[which_irq].timeouts = 0;
4541 
4542 			lock_clear(&apic_ioapic_reprogram_lock);
4543 		} else {
4544 #ifdef DEBUG
4545 			cmn_err(CE_WARN, "Deferring reprogramming of irq %d",
4546 			    which_irq);
4547 #endif	/* DEBUG */
4548 			/*
4549 			 * If waiting for the Remote IRR bit (above) didn't
4550 			 * allow it to clear, defer the reprogramming:
4551 			 */
4552 			lock_set(&apic_ioapic_reprogram_lock);
4553 
4554 			apic_reprogram_info[which_irq].valid = 1;
4555 			apic_reprogram_info[which_irq].bindcpu = new_bind_cpu;
4556 			apic_reprogram_info[which_irq].timeouts++;
4557 
4558 			lock_clear(&apic_ioapic_reprogram_lock);
4559 
4560 			*intr_restorep = B_FALSE;
4561 			intr_restore(iflag);
4562 
4563 			/* Fire up a timeout to handle this later */
4564 			(void) timeout(apic_reprogram_timeout_handler,
4565 			    (void *) 0,
4566 			    drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY));
4567 
4568 			/* Inform caller to defer IOAPIC programming: */
4569 			return (1);
4570 		}
4571 	}
4572 	return (0);
4573 }
4574 
4575 /*
4576  * Timeout handler that performs the APIC reprogramming
4577  */
4578 /*ARGSUSED*/
4579 static void
4580 apic_reprogram_timeout_handler(void *arg)
4581 {
4582 	/*LINTED: set but not used in function*/
4583 	int i, result;
4584 
4585 	/* Serialize access to this function */
4586 	mutex_enter(&apic_reprogram_timeout_mutex);
4587 
4588 	/*
4589 	 * For each entry in the reprogramming state that's valid,
4590 	 * try the reprogramming again:
4591 	 */
4592 	for (i = 0; i < APIC_MAX_VECTOR; i++) {
4593 		if (apic_reprogram_info[i].valid == 0)
4594 			continue;
4595 		/*
4596 		 * Though we can't really do anything about errors
4597 		 * at this point, keep track of them for reporting.
4598 		 * Note that it is very possible for apic_setup_io_intr
4599 		 * to re-register this very timeout if the Remote IRR bit
4600 		 * has not yet cleared.
4601 		 */
4602 		result = apic_setup_io_intr_deferred(apic_irq_table[i], i);
4603 
4604 #ifdef DEBUG
4605 		if (result)
4606 			cmn_err(CE_WARN, "apic_reprogram_timeout: "
4607 			    "apic_setup_io_intr returned nonzero for "
4608 			    "irq=%d!", i);
4609 #endif	/* DEBUG */
4610 	}
4611 
4612 	mutex_exit(&apic_reprogram_timeout_mutex);
4613 }
4614 
4615 
4616 /*
4617  * Called to migrate all interrupts at an irq to another cpu. safe
4618  * if true means we are not being called from an interrupt
4619  * context and hence it is safe to do a lock_set. If false
4620  * do only a lock_try and return failure ( non 0 ) if we cannot get it
4621  */
4622 int
4623 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe)
4624 {
4625 	apic_irq_t	*irqptr = irq_ptr;
4626 	int		retval = 0;
4627 	int		iflag;
4628 
4629 	iflag = intr_clear();
4630 	if (!safe) {
4631 		if (lock_try(&apic_ioapic_lock) == 0) {
4632 			intr_restore(iflag);
4633 			return (1);
4634 		}
4635 	} else
4636 		lock_set(&apic_ioapic_lock);
4637 
4638 	while (irqptr) {
4639 		if (irqptr->airq_temp_cpu != IRQ_UNINIT)
4640 			retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE);
4641 		irqptr = irqptr->airq_next;
4642 	}
4643 	lock_clear(&apic_ioapic_lock);
4644 	intr_restore(iflag);
4645 	return (retval);
4646 }
4647 
4648 /*
4649  * apic_intr_redistribute does all the messy computations for identifying
4650  * which interrupt to move to which CPU. Currently we do just one interrupt
4651  * at a time. This reduces the time we spent doing all this within clock
4652  * interrupt. When it is done in idle, we could do more than 1.
4653  * First we find the most busy and the most free CPU (time in ISR only)
4654  * skipping those CPUs that has been identified as being ineligible (cpu_skip)
4655  * Then we look for IRQs which are closest to the difference between the
4656  * most busy CPU and the average ISR load. We try to find one whose load
4657  * is less than difference.If none exists, then we chose one larger than the
4658  * difference, provided it does not make the most idle CPU worse than the
4659  * most busy one. In the end, we clear all the busy fields for CPUs. For
4660  * IRQs, they are cleared as they are scanned.
4661  */
4662 static void
4663 apic_intr_redistribute()
4664 {
4665 	int busiest_cpu, most_free_cpu;
4666 	int cpu_free, cpu_busy, max_busy, min_busy;
4667 	int min_free, diff;
4668 	int	average_busy, cpus_online;
4669 	int i, busy;
4670 	apic_cpus_info_t *cpu_infop;
4671 	apic_irq_t *min_busy_irq = NULL;
4672 	apic_irq_t *max_busy_irq = NULL;
4673 
4674 	busiest_cpu = most_free_cpu = -1;
4675 	cpu_free = cpu_busy = max_busy = average_busy = 0;
4676 	min_free = apic_sample_factor_redistribution;
4677 	cpus_online = 0;
4678 	/*
4679 	 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu
4680 	 * without ioapic_lock. That is OK as we are just doing statistical
4681 	 * sampling anyway and any inaccuracy now will get corrected next time
4682 	 * The call to rebind which actually changes things will make sure
4683 	 * we are consistent.
4684 	 */
4685 	for (i = 0; i < apic_nproc; i++) {
4686 		if (!(apic_redist_cpu_skip & (1 << i)) &&
4687 		    (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
4688 
4689 			cpu_infop = &apic_cpus[i];
4690 			/*
4691 			 * If no unbound interrupts or only 1 total on this
4692 			 * CPU, skip
4693 			 */
4694 			if (!cpu_infop->aci_temp_bound ||
4695 			    (cpu_infop->aci_bound + cpu_infop->aci_temp_bound)
4696 			    == 1) {
4697 				apic_redist_cpu_skip |= 1 << i;
4698 				continue;
4699 			}
4700 
4701 			busy = cpu_infop->aci_busy;
4702 			average_busy += busy;
4703 			cpus_online++;
4704 			if (max_busy < busy) {
4705 				max_busy = busy;
4706 				busiest_cpu = i;
4707 			}
4708 			if (min_free > busy) {
4709 				min_free = busy;
4710 				most_free_cpu = i;
4711 			}
4712 			if (busy > apic_int_busy_mark) {
4713 				cpu_busy |= 1 << i;
4714 			} else {
4715 				if (busy < apic_int_free_mark)
4716 					cpu_free |= 1 << i;
4717 			}
4718 		}
4719 	}
4720 	if ((cpu_busy && cpu_free) ||
4721 	    (max_busy >= (min_free + apic_diff_for_redistribution))) {
4722 
4723 		apic_num_imbalance++;
4724 #ifdef	DEBUG
4725 		if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4726 			prom_printf(
4727 			    "redistribute busy=%x free=%x max=%x min=%x",
4728 			    cpu_busy, cpu_free, max_busy, min_free);
4729 		}
4730 #endif /* DEBUG */
4731 
4732 
4733 		average_busy /= cpus_online;
4734 
4735 		diff = max_busy - average_busy;
4736 		min_busy = max_busy; /* start with the max possible value */
4737 		max_busy = 0;
4738 		min_busy_irq = max_busy_irq = NULL;
4739 		i = apic_min_device_irq;
4740 		for (; i < apic_max_device_irq; i++) {
4741 			apic_irq_t *irq_ptr;
4742 			/* Change to linked list per CPU ? */
4743 			if ((irq_ptr = apic_irq_table[i]) == NULL)
4744 				continue;
4745 			/* Check for irq_busy & decide which one to move */
4746 			/* Also zero them for next round */
4747 			if ((irq_ptr->airq_temp_cpu == busiest_cpu) &&
4748 			    irq_ptr->airq_busy) {
4749 				if (irq_ptr->airq_busy < diff) {
4750 					/*
4751 					 * Check for least busy CPU,
4752 					 * best fit or what ?
4753 					 */
4754 					if (max_busy < irq_ptr->airq_busy) {
4755 						/*
4756 						 * Most busy within the
4757 						 * required differential
4758 						 */
4759 						max_busy = irq_ptr->airq_busy;
4760 						max_busy_irq = irq_ptr;
4761 					}
4762 				} else {
4763 					if (min_busy > irq_ptr->airq_busy) {
4764 						/*
4765 						 * least busy, but more than
4766 						 * the reqd diff
4767 						 */
4768 						if (min_busy <
4769 						    (diff + average_busy -
4770 						    min_free)) {
4771 							/*
4772 							 * Making sure new cpu
4773 							 * will not end up
4774 							 * worse
4775 							 */
4776 							min_busy =
4777 							    irq_ptr->airq_busy;
4778 
4779 							min_busy_irq = irq_ptr;
4780 						}
4781 					}
4782 				}
4783 			}
4784 			irq_ptr->airq_busy = 0;
4785 		}
4786 
4787 		if (max_busy_irq != NULL) {
4788 #ifdef	DEBUG
4789 			if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4790 				prom_printf("rebinding %x to %x",
4791 				    max_busy_irq->airq_vector, most_free_cpu);
4792 			}
4793 #endif /* DEBUG */
4794 			if (apic_rebind_all(max_busy_irq, most_free_cpu, 0)
4795 			    == 0)
4796 				/* Make change permenant */
4797 				max_busy_irq->airq_cpu = (uchar_t)most_free_cpu;
4798 		} else if (min_busy_irq != NULL) {
4799 #ifdef	DEBUG
4800 			if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) {
4801 				prom_printf("rebinding %x to %x",
4802 				    min_busy_irq->airq_vector, most_free_cpu);
4803 			}
4804 #endif /* DEBUG */
4805 
4806 			if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) ==
4807 			    0)
4808 				/* Make change permenant */
4809 				min_busy_irq->airq_cpu = (uchar_t)most_free_cpu;
4810 		} else {
4811 			if (cpu_busy != (1 << busiest_cpu)) {
4812 				apic_redist_cpu_skip |= 1 << busiest_cpu;
4813 				/*
4814 				 * We leave cpu_skip set so that next time we
4815 				 * can choose another cpu
4816 				 */
4817 			}
4818 		}
4819 		apic_num_rebind++;
4820 	} else {
4821 		/*
4822 		 * found nothing. Could be that we skipped over valid CPUs
4823 		 * or we have balanced everything. If we had a variable
4824 		 * ticks_for_redistribution, it could be increased here.
4825 		 * apic_int_busy, int_free etc would also need to be
4826 		 * changed.
4827 		 */
4828 		if (apic_redist_cpu_skip)
4829 			apic_redist_cpu_skip = 0;
4830 	}
4831 	for (i = 0; i < apic_nproc; i++) {
4832 		apic_cpus[i].aci_busy = 0;
4833 	}
4834 }
4835 
4836 static void
4837 apic_cleanup_busy()
4838 {
4839 	int i;
4840 	apic_irq_t *irq_ptr;
4841 
4842 	for (i = 0; i < apic_nproc; i++) {
4843 		apic_cpus[i].aci_busy = 0;
4844 	}
4845 
4846 	for (i = apic_min_device_irq; i < apic_max_device_irq; i++) {
4847 		if ((irq_ptr = apic_irq_table[i]) != NULL)
4848 			irq_ptr->airq_busy = 0;
4849 	}
4850 	apic_skipped_redistribute = 0;
4851 }
4852 
4853 
4854 /*
4855  * This function will reprogram the timer.
4856  *
4857  * When in oneshot mode the argument is the absolute time in future to
4858  * generate the interrupt at.
4859  *
4860  * When in periodic mode, the argument is the interval at which the
4861  * interrupts should be generated. There is no need to support the periodic
4862  * mode timer change at this time.
4863  */
4864 static void
4865 apic_timer_reprogram(hrtime_t time)
4866 {
4867 	hrtime_t now;
4868 	uint_t ticks;
4869 	int64_t	delta;
4870 
4871 	/*
4872 	 * We should be called from high PIL context (CBE_HIGH_PIL),
4873 	 * so kpreempt is disabled.
4874 	 */
4875 
4876 	if (!apic_oneshot) {
4877 		/* time is the interval for periodic mode */
4878 		ticks = APIC_NSECS_TO_TICKS(time);
4879 	} else {
4880 		/* one shot mode */
4881 
4882 		now = gethrtime();
4883 		delta = time - now;
4884 
4885 		if (delta <= 0) {
4886 			/*
4887 			 * requested to generate an interrupt in the past
4888 			 * generate an interrupt as soon as possible
4889 			 */
4890 			ticks = apic_min_timer_ticks;
4891 		} else if (delta > apic_nsec_max) {
4892 			/*
4893 			 * requested to generate an interrupt at a time
4894 			 * further than what we are capable of. Set to max
4895 			 * the hardware can handle
4896 			 */
4897 
4898 			ticks = APIC_MAXVAL;
4899 #ifdef DEBUG
4900 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
4901 			    "  %lld  too far in future, current time"
4902 			    "  %lld \n", time, now);
4903 #endif
4904 		} else
4905 			ticks = APIC_NSECS_TO_TICKS(delta);
4906 	}
4907 
4908 	if (ticks < apic_min_timer_ticks)
4909 		ticks = apic_min_timer_ticks;
4910 
4911 	apicadr[APIC_INIT_COUNT] = ticks;
4912 
4913 }
4914 
4915 /*
4916  * This function will enable timer interrupts.
4917  */
4918 static void
4919 apic_timer_enable(void)
4920 {
4921 	/*
4922 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
4923 	 * so kpreempt is disabled.
4924 	 */
4925 
4926 	if (!apic_oneshot)
4927 		apicadr[APIC_LOCAL_TIMER] =
4928 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME;
4929 	else {
4930 		/* one shot */
4931 		apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT);
4932 	}
4933 }
4934 
4935 /*
4936  * This function will disable timer interrupts.
4937  */
4938 static void
4939 apic_timer_disable(void)
4940 {
4941 	/*
4942 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
4943 	 * so kpreempt is disabled.
4944 	 */
4945 
4946 	apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK;
4947 }
4948 
4949 
4950 cyclic_id_t apic_cyclic_id;
4951 
4952 /*
4953  * If this module needs to be a consumer of cyclic subsystem, they
4954  * can be added here, since at this time kernel cyclic subsystem is initialized
4955  * argument is not currently used, and is reserved for future.
4956  */
4957 static void
4958 apic_post_cyclic_setup(void *arg)
4959 {
4960 _NOTE(ARGUNUSED(arg))
4961 	cyc_handler_t hdlr;
4962 	cyc_time_t when;
4963 
4964 	/* cpu_lock is held */
4965 
4966 	/* set up cyclics for intr redistribution */
4967 
4968 	/*
4969 	 * In peridoc mode intr redistribution processing is done in
4970 	 * apic_intr_enter during clk intr processing
4971 	 */
4972 	if (!apic_oneshot)
4973 		return;
4974 
4975 	hdlr.cyh_level = CY_LOW_LEVEL;
4976 	hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute;
4977 	hdlr.cyh_arg = NULL;
4978 
4979 	when.cyt_when = 0;
4980 	when.cyt_interval = apic_redistribute_sample_interval;
4981 	apic_cyclic_id = cyclic_add(&hdlr, &when);
4982 
4983 
4984 }
4985 
4986 static void
4987 apic_redistribute_compute(void)
4988 {
4989 	int	i, j, max_busy;
4990 
4991 	if (apic_enable_dynamic_migration) {
4992 		if (++apic_nticks == apic_sample_factor_redistribution) {
4993 			/*
4994 			 * Time to call apic_intr_redistribute().
4995 			 * reset apic_nticks. This will cause max_busy
4996 			 * to be calculated below and if it is more than
4997 			 * apic_int_busy, we will do the whole thing
4998 			 */
4999 			apic_nticks = 0;
5000 		}
5001 		max_busy = 0;
5002 		for (i = 0; i < apic_nproc; i++) {
5003 
5004 			/*
5005 			 * Check if curipl is non zero & if ISR is in
5006 			 * progress
5007 			 */
5008 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
5009 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
5010 
5011 				int	irq;
5012 				apic_cpus[i].aci_busy++;
5013 				irq = apic_cpus[i].aci_current[j];
5014 				apic_irq_table[irq]->airq_busy++;
5015 			}
5016 
5017 			if (!apic_nticks &&
5018 			    (apic_cpus[i].aci_busy > max_busy))
5019 				max_busy = apic_cpus[i].aci_busy;
5020 		}
5021 		if (!apic_nticks) {
5022 			if (max_busy > apic_int_busy_mark) {
5023 			/*
5024 			 * We could make the following check be
5025 			 * skipped > 1 in which case, we get a
5026 			 * redistribution at half the busy mark (due to
5027 			 * double interval). Need to be able to collect
5028 			 * more empirical data to decide if that is a
5029 			 * good strategy. Punt for now.
5030 			 */
5031 				if (apic_skipped_redistribute)
5032 					apic_cleanup_busy();
5033 				else
5034 					apic_intr_redistribute();
5035 			} else
5036 				apic_skipped_redistribute++;
5037 		}
5038 	}
5039 }
5040 
5041 
5042 static int
5043 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid,
5044     int ipin, int *pci_irqp, iflag_t *intr_flagp)
5045 {
5046 
5047 	int status;
5048 	acpi_psm_lnk_t acpipsmlnk;
5049 
5050 	if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp,
5051 	    intr_flagp)) == ACPI_PSM_SUCCESS) {
5052 		APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d "
5053 		    "from cache for device %s, instance #%d\n", *pci_irqp,
5054 		    ddi_get_name(dip), ddi_get_instance(dip)));
5055 		return (status);
5056 	}
5057 
5058 	bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t));
5059 
5060 	if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp,
5061 	    &acpipsmlnk)) == ACPI_PSM_FAILURE) {
5062 		APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: "
5063 		    " acpi_translate_pci_irq failed for device %s, instance"
5064 		    " #%d", ddi_get_name(dip), ddi_get_instance(dip)));
5065 		return (status);
5066 	}
5067 
5068 	if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) {
5069 		status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp,
5070 		    intr_flagp);
5071 		if (status != ACPI_PSM_SUCCESS) {
5072 			status = acpi_get_current_irq_resource(&acpipsmlnk,
5073 			    pci_irqp, intr_flagp);
5074 		}
5075 	}
5076 
5077 	if (status == ACPI_PSM_SUCCESS) {
5078 		acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp,
5079 		    intr_flagp, &acpipsmlnk);
5080 
5081 		APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] "
5082 		    "new irq %d for device %s, instance #%d\n",
5083 		    *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip)));
5084 	}
5085 
5086 	return (status);
5087 }
5088 
5089 /*
5090  * Configures the irq for the interrupt link device identified by
5091  * acpipsmlnkp.
5092  *
5093  * Gets the current and the list of possible irq settings for the
5094  * device. If apic_unconditional_srs is not set, and the current
5095  * resource setting is in the list of possible irq settings,
5096  * current irq resource setting is passed to the caller.
5097  *
5098  * Otherwise, picks an irq number from the list of possible irq
5099  * settings, and sets the irq of the device to this value.
5100  * If prefer_crs is set, among a set of irq numbers in the list that have
5101  * the least number of devices sharing the interrupt, we pick current irq
5102  * resource setting if it is a member of this set.
5103  *
5104  * Passes the irq number in the value pointed to by pci_irqp, and
5105  * polarity and sensitivity in the structure pointed to by dipintrflagp
5106  * to the caller.
5107  *
5108  * Note that if setting the irq resource failed, but successfuly obtained
5109  * the current irq resource settings, passes the current irq resources
5110  * and considers it a success.
5111  *
5112  * Returns:
5113  * ACPI_PSM_SUCCESS on success.
5114  *
5115  * ACPI_PSM_FAILURE if an error occured during the configuration or
5116  * if a suitable irq was not found for this device, or if setting the
5117  * irq resource and obtaining the current resource fails.
5118  *
5119  */
5120 static int
5121 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip,
5122     int *pci_irqp, iflag_t *dipintr_flagp)
5123 {
5124 
5125 	int i, min_share, foundnow, done = 0;
5126 	int32_t irq;
5127 	int32_t share_irq = -1;
5128 	int32_t chosen_irq = -1;
5129 	int cur_irq = -1;
5130 	acpi_irqlist_t *irqlistp;
5131 	acpi_irqlist_t *irqlistent;
5132 
5133 	if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp))
5134 	    == ACPI_PSM_FAILURE) {
5135 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine "
5136 		    "or assign IRQ for device %s, instance #%d: The system was "
5137 		    "unable to get the list of potential IRQs from ACPI.",
5138 		    ddi_get_name(dip), ddi_get_instance(dip)));
5139 
5140 		return (ACPI_PSM_FAILURE);
5141 	}
5142 
5143 	if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq,
5144 	    dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) &&
5145 	    (cur_irq > 0)) {
5146 		/*
5147 		 * If an IRQ is set in CRS and that IRQ exists in the set
5148 		 * returned from _PRS, return that IRQ, otherwise print
5149 		 * a warning
5150 		 */
5151 
5152 		if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL)
5153 		    == ACPI_PSM_SUCCESS) {
5154 
5155 			acpi_free_irqlist(irqlistp);
5156 			ASSERT(pci_irqp != NULL);
5157 			*pci_irqp = cur_irq;
5158 			return (ACPI_PSM_SUCCESS);
5159 		}
5160 
5161 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the "
5162 		    "current irq %d for device %s, instance #%d in ACPI's "
5163 		    "list of possible irqs for this device. Picking one from "
5164 		    " the latter list.", cur_irq, ddi_get_name(dip),
5165 		    ddi_get_instance(dip)));
5166 	}
5167 
5168 	irqlistent = irqlistp;
5169 	min_share = 255;
5170 
5171 	while (irqlistent != NULL) {
5172 		irqlistent->intr_flags.bustype = BUS_PCI;
5173 
5174 		for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) {
5175 
5176 			irq = irqlistent->irqs[i];
5177 
5178 			if ((irq < 16) && (apic_reserved_irqlist[irq]))
5179 				continue;
5180 
5181 			if (irq == 0) {
5182 				/* invalid irq number */
5183 				continue;
5184 			}
5185 
5186 			if ((apic_irq_table[irq] == NULL) ||
5187 			    (apic_irq_table[irq]->airq_dip == dip)) {
5188 				chosen_irq = irq;
5189 				foundnow = 1;
5190 				/*
5191 				 * If we do not prefer current irq from crs
5192 				 * or if we do and this irq is the same as
5193 				 * current irq from crs, this is the one
5194 				 * to pick.
5195 				 */
5196 				if (!(apic_prefer_crs) || (irq == cur_irq)) {
5197 					done = 1;
5198 					break;
5199 				}
5200 				continue;
5201 			}
5202 
5203 			if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE)
5204 				continue;
5205 
5206 			if (!acpi_intr_compatible(irqlistent->intr_flags,
5207 			    apic_irq_table[irq]->airq_iflag))
5208 				continue;
5209 
5210 			if ((apic_irq_table[irq]->airq_share < min_share) ||
5211 			    ((apic_irq_table[irq]->airq_share == min_share) &&
5212 			    (cur_irq == irq) && (apic_prefer_crs))) {
5213 				min_share = apic_irq_table[irq]->airq_share;
5214 				share_irq = irq;
5215 				foundnow = 1;
5216 			}
5217 		}
5218 
5219 		/*
5220 		 * If we found an IRQ in the inner loop this time, save the
5221 		 * details from the irqlist for later use.
5222 		 */
5223 		if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) {
5224 			/*
5225 			 * Copy the acpi_prs_private_t and flags from this
5226 			 * irq list entry, since we found an irq from this
5227 			 * entry.
5228 			 */
5229 			acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv;
5230 			*dipintr_flagp = irqlistent->intr_flags;
5231 		}
5232 
5233 		if (done)
5234 			break;
5235 
5236 		/* Go to the next irqlist entry */
5237 		irqlistent = irqlistent->next;
5238 	}
5239 
5240 
5241 	acpi_free_irqlist(irqlistp);
5242 	if (chosen_irq != -1)
5243 		irq = chosen_irq;
5244 	else if (share_irq != -1)
5245 		irq = share_irq;
5246 	else {
5247 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a "
5248 		    "suitable irq from the list of possible irqs for device "
5249 		    "%s, instance #%d in ACPI's list of possible irqs",
5250 		    ddi_get_name(dip), ddi_get_instance(dip)));
5251 		return (ACPI_PSM_FAILURE);
5252 	}
5253 
5254 	APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s "
5255 	    "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip)));
5256 
5257 	if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) {
5258 		/*
5259 		 * setting irq was successful, check to make sure CRS
5260 		 * reflects that. If CRS does not agree with what we
5261 		 * set, return the irq that was set.
5262 		 */
5263 
5264 		if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq,
5265 		    dipintr_flagp) == ACPI_PSM_SUCCESS) {
5266 
5267 			if (cur_irq != irq)
5268 				APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: "
5269 				    "IRQ resource set (irqno %d) for device %s "
5270 				    "instance #%d, differs from current "
5271 				    "setting irqno %d",
5272 				    irq, ddi_get_name(dip),
5273 				    ddi_get_instance(dip), cur_irq));
5274 		}
5275 
5276 		/*
5277 		 * return the irq that was set, and not what CRS reports,
5278 		 * since CRS has been seen to be bogus on some systems
5279 		 */
5280 		cur_irq = irq;
5281 	} else {
5282 		APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d "
5283 		    "failed for device %s instance #%d",
5284 		    irq, ddi_get_name(dip), ddi_get_instance(dip)));
5285 
5286 		if (cur_irq == -1)
5287 			return (ACPI_PSM_FAILURE);
5288 	}
5289 
5290 	ASSERT(pci_irqp != NULL);
5291 	*pci_irqp = cur_irq;
5292 	return (ACPI_PSM_SUCCESS);
5293 }
5294