xref: /titanic_51/usr/src/uts/i86pc/io/pcplusmp/apic.c (revision ce0bfb39c0479ba97372eb0e5bf2ef4275d0876e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
29  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
30  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
31  * PSMI 1.5 extensions are supported in Solaris Nevada.
32  * PSMI 1.6 extensions are supported in Solaris Nevada.
33  */
34 #define	PSMI_1_6
35 
36 #include <sys/processor.h>
37 #include <sys/time.h>
38 #include <sys/psm.h>
39 #include <sys/smp_impldefs.h>
40 #include <sys/cram.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/psm_common.h>
44 #include <sys/apic.h>
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/archsystm.h>
57 #include <sys/trap.h>
58 #include <sys/machsystm.h>
59 #include <sys/sysmacros.h>
60 #include <sys/cpuvar.h>
61 #include <sys/rm_platter.h>
62 #include <sys/privregs.h>
63 #include <sys/note.h>
64 #include <sys/pci_intr_lib.h>
65 #include <sys/spl.h>
66 #include <sys/clock.h>
67 #include <sys/dditypes.h>
68 #include <sys/sunddi.h>
69 #include <sys/x_call.h>
70 #include <sys/reboot.h>
71 #include <sys/hpet.h>
72 
73 /*
74  *	Local Function Prototypes
75  */
76 static void apic_init_intr();
77 static void apic_nmi_intr(caddr_t arg, struct regs *rp);
78 
79 /*
80  *	standard MP entries
81  */
82 static int	apic_probe();
83 static int	apic_clkinit();
84 static int	apic_getclkirq(int ipl);
85 static uint_t	apic_calibrate(volatile uint32_t *addr,
86     uint16_t *pit_ticks_adj);
87 static hrtime_t apic_gettime();
88 static hrtime_t apic_gethrtime();
89 static void	apic_init();
90 static void	apic_picinit(void);
91 static int	apic_cpu_start(processorid_t, caddr_t);
92 static int	apic_post_cpu_start(void);
93 static void	apic_send_ipi(int cpun, int ipl);
94 static void	apic_set_idlecpu(processorid_t cpun);
95 static void	apic_unset_idlecpu(processorid_t cpun);
96 static int	apic_intr_enter(int ipl, int *vect);
97 static void	apic_setspl(int ipl);
98 static void	x2apic_setspl(int ipl);
99 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
100 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
101 static void	apic_shutdown(int cmd, int fcn);
102 static void	apic_preshutdown(int cmd, int fcn);
103 static int	apic_disable_intr(processorid_t cpun);
104 static void	apic_enable_intr(processorid_t cpun);
105 static processorid_t	apic_get_next_processorid(processorid_t cpun);
106 static int		apic_get_ipivect(int ipl, int type);
107 static void	apic_timer_reprogram(hrtime_t time);
108 static void	apic_timer_enable(void);
109 static void	apic_timer_disable(void);
110 static void	apic_post_cyclic_setup(void *arg);
111 static void	apic_intrr_init(int apic_mode);
112 static void	apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
113 static void	apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);
114 
115 static int	apic_oneshot = 0;
116 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
117 
118 /* Now the ones for Dynamic Interrupt distribution */
119 int	apic_enable_dynamic_migration = 0;
120 
121 int apic_have_32bit_cr8 = 0;
122 
123 /*
124  * These variables are frequently accessed in apic_intr_enter(),
125  * apic_intr_exit and apic_setspl, so group them together
126  */
127 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
128 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
129 int apic_clkvect;
130 
131 /* vector at which error interrupts come in */
132 int apic_errvect;
133 int apic_enable_error_intr = 1;
134 int apic_error_display_delay = 100;
135 
136 /* vector at which performance counter overflow interrupts come in */
137 int apic_cpcovf_vect;
138 int apic_enable_cpcovf_intr = 1;
139 
140 /* vector at which CMCI interrupts come in */
141 int apic_cmci_vect;
142 extern int cmi_enable_cmci;
143 extern void cmi_cmci_trap(void);
144 
145 static kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
146 static int cmci_cpu_setup_registered;
147 
148 /*
149  * The following vector assignments influence the value of ipltopri and
150  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
151  * idle to 0 and IPL 0 to 0xf to differentiate idle in case
152  * we care to do so in future. Note some IPLs which are rarely used
153  * will share the vector ranges and heavily used IPLs (5 and 6) have
154  * a wide range.
155  *
156  * This array is used to initialize apic_ipls[] (in apic_init()).
157  *
158  *	IPL		Vector range.		as passed to intr_enter
159  *	0		none.
160  *	1,2,3		0x20-0x2f		0x0-0xf
161  *	4		0x30-0x3f		0x10-0x1f
162  *	5		0x40-0x5f		0x20-0x3f
163  *	6		0x60-0x7f		0x40-0x5f
164  *	7,8,9		0x80-0x8f		0x60-0x6f
165  *	10		0x90-0x9f		0x70-0x7f
166  *	11		0xa0-0xaf		0x80-0x8f
167  *	...		...
168  *	15		0xe0-0xef		0xc0-0xcf
169  *	15		0xf0-0xff		0xd0-0xdf
170  */
171 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
172 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
173 };
174 	/*
175 	 * The ipl of an ISR at vector X is apic_vectortoipl[X>>4]
176 	 * NOTE that this is vector as passed into intr_enter which is
177 	 * programmed vector - 0x20 (APIC_BASE_VECT)
178 	 */
179 
180 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
181 	/* The taskpri to be programmed into apic to mask given ipl */
182 
183 #if defined(__amd64)
184 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
185 #endif
186 
187 /*
188  * Correlation of the hardware vector to the IPL in use, initialized
189  * from apic_vectortoipl[] in apic_init().  The final IPLs may not correlate
190  * to the IPLs in apic_vectortoipl on some systems that share interrupt lines
191  * connected to errata-stricken IOAPICs
192  */
193 uchar_t apic_ipls[APIC_AVAIL_VECTOR];
194 
195 /*
196  * Patchable global variables.
197  */
198 int	apic_forceload = 0;
199 
200 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
201 					/* 1 - use gettime() for performance */
202 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
203 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
204 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
205 int	apic_panic_on_nmi = 0;
206 int	apic_panic_on_apic_error = 0;
207 
208 int	apic_verbose = 0;
209 
210 /* minimum number of timer ticks to program to */
211 int apic_min_timer_ticks = 1;
212 /*
213  *	Local static data
214  */
215 static struct	psm_ops apic_ops = {
216 	apic_probe,
217 
218 	apic_init,
219 	apic_picinit,
220 	apic_intr_enter,
221 	apic_intr_exit,
222 	apic_setspl,
223 	apic_addspl,
224 	apic_delspl,
225 	apic_disable_intr,
226 	apic_enable_intr,
227 	(int (*)(int))NULL,		/* psm_softlvl_to_irq */
228 	(void (*)(int))NULL,		/* psm_set_softintr */
229 
230 	apic_set_idlecpu,
231 	apic_unset_idlecpu,
232 
233 	apic_clkinit,
234 	apic_getclkirq,
235 	(void (*)(void))NULL,		/* psm_hrtimeinit */
236 	apic_gethrtime,
237 
238 	apic_get_next_processorid,
239 	apic_cpu_start,
240 	apic_post_cpu_start,
241 	apic_shutdown,
242 	apic_get_ipivect,
243 	apic_send_ipi,
244 
245 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
246 	(void (*)(int, char *))NULL,	/* psm_notify_error */
247 	(void (*)(int))NULL,		/* psm_notify_func */
248 	apic_timer_reprogram,
249 	apic_timer_enable,
250 	apic_timer_disable,
251 	apic_post_cyclic_setup,
252 	apic_preshutdown,
253 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
254 	apic_state,			/* save, restore apic state for S3 */
255 };
256 
257 
258 static struct	psm_info apic_psm_info = {
259 	PSM_INFO_VER01_6,			/* version */
260 	PSM_OWN_EXCLUSIVE,			/* ownership */
261 	(struct psm_ops *)&apic_ops,		/* operation */
262 	APIC_PCPLUSMP_NAME,			/* machine name */
263 	"pcplusmp v1.4 compatible",
264 };
265 
266 static void *apic_hdlp;
267 
268 #ifdef DEBUG
269 int	apic_debug = 0;
270 int	apic_restrict_vector = 0;
271 
272 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
273 int	apic_debug_msgbufindex = 0;
274 
275 #endif /* DEBUG */
276 
277 apic_cpus_info_t	*apic_cpus;
278 
279 cpuset_t	apic_cpumask;
280 uint_t	apic_picinit_called;
281 
282 /* Flag to indicate that we need to shut down all processors */
283 static uint_t	apic_shutdown_processors;
284 
285 uint_t apic_nsec_per_intr = 0;
286 
287 /*
288  * apic_let_idle_redistribute can have the following values:
289  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
290  * apic_redistribute_lock prevents multiple idle cpus from redistributing
291  */
292 int	apic_num_idle_redistributions = 0;
293 static	int apic_let_idle_redistribute = 0;
294 static	uint_t apic_nticks = 0;
295 static	uint_t apic_skipped_redistribute = 0;
296 
297 /* to gather intr data and redistribute */
298 static void apic_redistribute_compute(void);
299 
300 static	uint_t last_count_read = 0;
301 static	lock_t	apic_gethrtime_lock;
302 volatile int	apic_hrtime_stamp = 0;
303 volatile hrtime_t apic_nsec_since_boot = 0;
304 static uint_t apic_hertz_count;
305 
306 uint64_t apic_ticks_per_SFnsecs;	/* # of ticks in SF nsecs */
307 
308 static hrtime_t apic_nsec_max;
309 
310 static	hrtime_t	apic_last_hrtime = 0;
311 int		apic_hrtime_error = 0;
312 int		apic_remote_hrterr = 0;
313 int		apic_num_nmis = 0;
314 int		apic_apic_error = 0;
315 int		apic_num_apic_errors = 0;
316 int		apic_num_cksum_errors = 0;
317 
318 int	apic_error = 0;
319 static	int	apic_cmos_ssb_set = 0;
320 
321 /* use to make sure only one cpu handles the nmi */
322 static	lock_t	apic_nmi_lock;
323 /* use to make sure only one cpu handles the error interrupt */
324 static	lock_t	apic_error_lock;
325 
326 static	struct {
327 	uchar_t	cntl;
328 	uchar_t	data;
329 } aspen_bmc[] = {
330 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
331 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
332 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
333 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
334 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
335 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
336 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
337 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
338 
339 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
340 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
341 };
342 
343 static	struct {
344 	int	port;
345 	uchar_t	data;
346 } sitka_bmc[] = {
347 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
348 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
349 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
350 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
351 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
352 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
353 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
354 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
355 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
356 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
357 
358 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
359 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
360 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
361 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
362 };
363 
364 /* Patchable global variables. */
365 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
366 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
367 
368 /* default apic ops without interrupt remapping */
369 static apic_intrr_ops_t apic_nointrr_ops = {
370 	(int (*)(int))return_instr,
371 	(void (*)(void))return_instr,
372 	(void (*)(apic_irq_t *))return_instr,
373 	(void (*)(apic_irq_t *, void *))return_instr,
374 	(void (*)(apic_irq_t *))return_instr,
375 	apic_record_ioapic_rdt,
376 	apic_record_msi,
377 };
378 
379 apic_intrr_ops_t *apic_vt_ops = &apic_nointrr_ops;
380 
381 /*
382  *	This is the loadable module wrapper
383  */
384 
385 int
386 _init(void)
387 {
388 	if (apic_coarse_hrtime)
389 		apic_ops.psm_gethrtime = &apic_gettime;
390 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
391 }
392 
393 int
394 _fini(void)
395 {
396 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
397 }
398 
399 int
400 _info(struct modinfo *modinfop)
401 {
402 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
403 }
404 
405 
406 static int
407 apic_probe()
408 {
409 	return (apic_probe_common(apic_psm_info.p_mach_idstring));
410 }
411 
412 void
413 apic_init()
414 {
415 	int i;
416 	int	j = 1;
417 
418 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
419 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
420 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
421 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
422 			/* get to highest vector at the same ipl */
423 			continue;
424 		for (; j <= apic_vectortoipl[i]; j++) {
425 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
426 			    APIC_BASE_VECT;
427 		}
428 	}
429 	for (; j < MAXIPL + 1; j++)
430 		/* fill up any empty ipltopri slots */
431 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
432 	apic_init_common();
433 #if defined(__amd64)
434 	/*
435 	 * Make cpu-specific interrupt info point to cr8pri vector
436 	 */
437 	for (i = 0; i <= MAXIPL; i++)
438 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
439 	CPU->cpu_pri_data = apic_cr8pri;
440 #else
441 	if (cpuid_have_cr8access(CPU))
442 		apic_have_32bit_cr8 = 1;
443 #endif	/* __amd64 */
444 }
445 
446 /*
447  * handler for APIC Error interrupt. Just print a warning and continue
448  */
449 static int
450 apic_error_intr()
451 {
452 	uint_t	error0, error1, error;
453 	uint_t	i;
454 
455 	/*
456 	 * We need to write before read as per 7.4.17 of system prog manual.
457 	 * We do both and or the results to be safe
458 	 */
459 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
460 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
461 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
462 	error = error0 | error1;
463 
464 	/*
465 	 * Clear the APIC error status (do this on all cpus that enter here)
466 	 * (two writes are required due to the semantics of accessing the
467 	 * error status register.)
468 	 */
469 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
470 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
471 
472 	/*
473 	 * Prevent more than 1 CPU from handling error interrupt causing
474 	 * double printing (interleave of characters from multiple
475 	 * CPU's when using prom_printf)
476 	 */
477 	if (lock_try(&apic_error_lock) == 0)
478 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
479 	if (error) {
480 #if	DEBUG
481 		if (apic_debug)
482 			debug_enter("pcplusmp: APIC Error interrupt received");
483 #endif /* DEBUG */
484 		if (apic_panic_on_apic_error)
485 			cmn_err(CE_PANIC,
486 			    "APIC Error interrupt on CPU %d. Status = %x\n",
487 			    psm_get_cpu_id(), error);
488 		else {
489 			if ((error & ~APIC_CS_ERRORS) == 0) {
490 				/* cksum error only */
491 				apic_error |= APIC_ERR_APIC_ERROR;
492 				apic_apic_error |= error;
493 				apic_num_apic_errors++;
494 				apic_num_cksum_errors++;
495 			} else {
496 				/*
497 				 * prom_printf is the best shot we have of
498 				 * something which is problem free from
499 				 * high level/NMI type of interrupts
500 				 */
501 				prom_printf("APIC Error interrupt on CPU %d. "
502 				    "Status 0 = %x, Status 1 = %x\n",
503 				    psm_get_cpu_id(), error0, error1);
504 				apic_error |= APIC_ERR_APIC_ERROR;
505 				apic_apic_error |= error;
506 				apic_num_apic_errors++;
507 				for (i = 0; i < apic_error_display_delay; i++) {
508 					tenmicrosec();
509 				}
510 				/*
511 				 * provide more delay next time limited to
512 				 * roughly 1 clock tick time
513 				 */
514 				if (apic_error_display_delay < 500)
515 					apic_error_display_delay *= 2;
516 			}
517 		}
518 		lock_clear(&apic_error_lock);
519 		return (DDI_INTR_CLAIMED);
520 	} else {
521 		lock_clear(&apic_error_lock);
522 		return (DDI_INTR_UNCLAIMED);
523 	}
524 	/* NOTREACHED */
525 }
526 
527 /*
528  * Turn off the mask bit in the performance counter Local Vector Table entry.
529  */
530 static void
531 apic_cpcovf_mask_clear(void)
532 {
533 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
534 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
535 }
536 
537 /*ARGSUSED*/
538 static int
539 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
540 {
541 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
542 	return (0);
543 }
544 
545 /*ARGSUSED*/
546 static int
547 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
548 {
549 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
550 	return (0);
551 }
552 
553 /*ARGSUSED*/
554 static int
555 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
556 {
557 	cpuset_t	cpu_set;
558 
559 	CPUSET_ONLY(cpu_set, cpuid);
560 
561 	switch (what) {
562 		case CPU_ON:
563 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
564 			    (xc_func_t)apic_cmci_enable);
565 			break;
566 
567 		case CPU_OFF:
568 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
569 			    (xc_func_t)apic_cmci_disable);
570 			break;
571 
572 		default:
573 			break;
574 	}
575 
576 	return (0);
577 }
578 
579 static void
580 apic_init_intr()
581 {
582 	processorid_t	cpun = psm_get_cpu_id();
583 	uint_t nlvt;
584 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
585 
586 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
587 
588 	if (apic_mode == LOCAL_APIC) {
589 		/*
590 		 * We are running APIC in MMIO mode.
591 		 */
592 		if (apic_flat_model) {
593 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
594 			    APIC_FLAT_MODEL);
595 		} else {
596 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
597 			    APIC_CLUSTER_MODEL);
598 		}
599 
600 		apic_reg_ops->apic_write(APIC_DEST_REG,
601 		    AV_HIGH_ORDER >> cpun);
602 	}
603 
604 	if (apic_direct_EOI) {
605 		/*
606 		 * Set 12th bit in Spurious Interrupt Vector
607 		 * Register to support level triggered interrupt
608 		 * directed EOI.
609 		 */
610 		svr |= (0x1 << APIC_SVR);
611 	}
612 
613 	/* need to enable APIC before unmasking NMI */
614 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
615 
616 	/*
617 	 * Presence of an invalid vector with delivery mode AV_FIXED can
618 	 * cause an error interrupt, even if the entry is masked...so
619 	 * write a valid vector to LVT entries along with the mask bit
620 	 */
621 
622 	/* All APICs have timer and LINT0/1 */
623 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
624 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
625 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
626 
627 	/*
628 	 * On integrated APICs, the number of LVT entries is
629 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
630 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
631 	 */
632 
633 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
634 		nlvt = 3;
635 	} else {
636 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
637 		    0xFF) + 1;
638 	}
639 
640 	if (nlvt >= 5) {
641 		/* Enable performance counter overflow interrupt */
642 
643 		if ((x86_feature & X86_MSR) != X86_MSR)
644 			apic_enable_cpcovf_intr = 0;
645 		if (apic_enable_cpcovf_intr) {
646 			if (apic_cpcovf_vect == 0) {
647 				int ipl = APIC_PCINT_IPL;
648 				int irq = apic_get_ipivect(ipl, -1);
649 
650 				ASSERT(irq != -1);
651 				apic_cpcovf_vect =
652 				    apic_irq_table[irq]->airq_vector;
653 				ASSERT(apic_cpcovf_vect);
654 				(void) add_avintr(NULL, ipl,
655 				    (avfunc)kcpc_hw_overflow_intr,
656 				    "apic pcint", irq, NULL, NULL, NULL, NULL);
657 				kcpc_hw_overflow_intr_installed = 1;
658 				kcpc_hw_enable_cpc_intr =
659 				    apic_cpcovf_mask_clear;
660 			}
661 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
662 			    apic_cpcovf_vect);
663 		}
664 	}
665 
666 	if (nlvt >= 6) {
667 		/* Only mask TM intr if the BIOS apparently doesn't use it */
668 
669 		uint32_t lvtval;
670 
671 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
672 		if (((lvtval & AV_MASK) == AV_MASK) ||
673 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
674 			apic_reg_ops->apic_write(APIC_THERM_VECT,
675 			    AV_MASK|APIC_RESV_IRQ);
676 		}
677 	}
678 
679 	/* Enable error interrupt */
680 
681 	if (nlvt >= 4 && apic_enable_error_intr) {
682 		if (apic_errvect == 0) {
683 			int ipl = 0xf;	/* get highest priority intr */
684 			int irq = apic_get_ipivect(ipl, -1);
685 
686 			ASSERT(irq != -1);
687 			apic_errvect = apic_irq_table[irq]->airq_vector;
688 			ASSERT(apic_errvect);
689 			/*
690 			 * Not PSMI compliant, but we are going to merge
691 			 * with ON anyway
692 			 */
693 			(void) add_avintr((void *)NULL, ipl,
694 			    (avfunc)apic_error_intr, "apic error intr",
695 			    irq, NULL, NULL, NULL, NULL);
696 		}
697 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
698 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
699 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
700 	}
701 
702 	/* Enable CMCI interrupt */
703 	if (cmi_enable_cmci) {
704 
705 		mutex_enter(&cmci_cpu_setup_lock);
706 		if (cmci_cpu_setup_registered == 0) {
707 			mutex_enter(&cpu_lock);
708 			register_cpu_setup_func(cmci_cpu_setup, NULL);
709 			mutex_exit(&cpu_lock);
710 			cmci_cpu_setup_registered = 1;
711 		}
712 		mutex_exit(&cmci_cpu_setup_lock);
713 
714 		if (apic_cmci_vect == 0) {
715 			int ipl = 0x2;
716 			int irq = apic_get_ipivect(ipl, -1);
717 
718 			ASSERT(irq != -1);
719 			apic_cmci_vect = apic_irq_table[irq]->airq_vector;
720 			ASSERT(apic_cmci_vect);
721 
722 			(void) add_avintr(NULL, ipl,
723 			    (avfunc)cmi_cmci_trap,
724 			    "apic cmci intr", irq, NULL, NULL, NULL, NULL);
725 		}
726 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
727 	}
728 }
729 
730 static void
731 apic_disable_local_apic()
732 {
733 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
734 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
735 
736 	/* local intr reg 0 */
737 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
738 
739 	/* disable NMI */
740 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
741 
742 	/* and error interrupt */
743 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
744 
745 	/* and perf counter intr */
746 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
747 
748 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
749 }
750 
751 static void
752 apic_picinit(void)
753 {
754 	int i, j;
755 	uint_t isr;
756 	uint32_t ver;
757 
758 	/*
759 	 * initialize interrupt remapping before apic
760 	 * hardware initialization
761 	 */
762 	apic_intrr_init(apic_mode);
763 
764 	/*
765 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
766 	 * bit on without clearing it with EOI.  Since softint
767 	 * uses vector 0x20 to interrupt itself, so softint will
768 	 * not work on this machine.  In order to fix this problem
769 	 * a check is made to verify all the isr bits are clear.
770 	 * If not, EOIs are issued to clear the bits.
771 	 */
772 	for (i = 7; i >= 1; i--) {
773 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
774 		if (isr != 0)
775 			for (j = 0; ((j < 32) && (isr != 0)); j++)
776 				if (isr & (1 << j)) {
777 					apic_reg_ops->apic_write(
778 					    APIC_EOI_REG, 0);
779 					isr &= ~(1 << j);
780 					apic_error |= APIC_ERR_BOOT_EOI;
781 				}
782 	}
783 
784 	/* set a flag so we know we have run apic_picinit() */
785 	apic_picinit_called = 1;
786 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
787 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
788 	LOCK_INIT_CLEAR(&apic_error_lock);
789 
790 	picsetup();	 /* initialise the 8259 */
791 
792 	/* add nmi handler - least priority nmi handler */
793 	LOCK_INIT_CLEAR(&apic_nmi_lock);
794 
795 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
796 	    "pcplusmp NMI handler", (caddr_t)NULL))
797 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
798 
799 	ver = apic_reg_ops->apic_read(APIC_VERS_REG);
800 	/*
801 	 * In order to determine support for Directed EOI capability,
802 	 * we check for 24th bit in Local APIC Version Register.
803 	 */
804 	if (ver & (0x1 << APIC_DIRECTED_EOI)) {
805 		apic_direct_EOI = 1;
806 		apic_change_eoi();
807 	}
808 
809 	apic_init_intr();
810 
811 	/* enable apic mode if imcr present */
812 	if (apic_imcrp) {
813 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
814 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
815 	}
816 
817 	ioapic_init_intr(IOAPIC_MASK);
818 }
819 
820 
821 /*ARGSUSED1*/
822 static int
823 apic_cpu_start(processorid_t cpun, caddr_t arg)
824 {
825 	int		loop_count;
826 	uint32_t	vector;
827 	uint_t		cpu_id;
828 	ulong_t		iflag;
829 
830 	cpu_id =  apic_cpus[cpun].aci_local_id;
831 
832 	apic_cmos_ssb_set = 1;
833 
834 	/*
835 	 * Interrupts on BSP cpu will be disabled during these startup
836 	 * steps in order to avoid unwanted side effects from
837 	 * executing interrupt handlers on a problematic BIOS.
838 	 */
839 
840 	iflag = intr_clear();
841 	outb(CMOS_ADDR, SSB);
842 	outb(CMOS_DATA, BIOS_SHUTDOWN);
843 
844 	/*
845 	 * According to X2APIC specification in section '2.3.5.1' of
846 	 * Interrupt Command Register Semantics, the semantics of
847 	 * programming the Interrupt Command Register to dispatch an interrupt
848 	 * is simplified. A single MSR write to the 64-bit ICR is required
849 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
850 	 * interface to ICR, system software is not required to check the
851 	 * status of the delivery status bit prior to writing to the ICR
852 	 * to send an IPI. With the removal of the Delivery Status bit,
853 	 * system software no longer has a reason to read the ICR. It remains
854 	 * readable only to aid in debugging.
855 	 */
856 #ifdef	DEBUG
857 	APIC_AV_PENDING_SET();
858 #else
859 	if (apic_mode == LOCAL_APIC) {
860 		APIC_AV_PENDING_SET();
861 	}
862 #endif /* DEBUG */
863 
864 	/* for integrated - make sure there is one INIT IPI in buffer */
865 	/* for external - it will wake up the cpu */
866 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
867 
868 	/* If only 1 CPU is installed, PENDING bit will not go low */
869 	for (loop_count = 0x1000; loop_count; loop_count--) {
870 		if (apic_mode == LOCAL_APIC &&
871 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
872 			apic_ret();
873 		else
874 			break;
875 	}
876 
877 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
878 
879 	drv_usecwait(20000);		/* 20 milli sec */
880 
881 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
882 		/* integrated apic */
883 
884 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
885 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
886 
887 		/* to offset the INIT IPI queue up in the buffer */
888 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
889 
890 		drv_usecwait(200);		/* 20 micro sec */
891 
892 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
893 
894 		drv_usecwait(200);		/* 20 micro sec */
895 	}
896 	intr_restore(iflag);
897 	return (0);
898 }
899 
900 
901 #ifdef	DEBUG
902 int	apic_break_on_cpu = 9;
903 int	apic_stretch_interrupts = 0;
904 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
905 
906 void
907 apic_break()
908 {
909 }
910 #endif /* DEBUG */
911 
912 /*
913  * platform_intr_enter
914  *
915  *	Called at the beginning of the interrupt service routine to
916  *	mask all level equal to and below the interrupt priority
917  *	of the interrupting vector.  An EOI should be given to
918  *	the interrupt controller to enable other HW interrupts.
919  *
920  *	Return -1 for spurious interrupts
921  *
922  */
923 /*ARGSUSED*/
924 static int
925 apic_intr_enter(int ipl, int *vectorp)
926 {
927 	uchar_t vector;
928 	int nipl;
929 	int irq;
930 	ulong_t iflag;
931 	apic_cpus_info_t *cpu_infop;
932 
933 	/*
934 	 * The real vector delivered is (*vectorp + 0x20), but our caller
935 	 * subtracts 0x20 from the vector before passing it to us.
936 	 * (That's why APIC_BASE_VECT is 0x20.)
937 	 */
938 	vector = (uchar_t)*vectorp;
939 
940 	/* if interrupted by the clock, increment apic_nsec_since_boot */
941 	if (vector == apic_clkvect) {
942 		if (!apic_oneshot) {
943 			/* NOTE: this is not MT aware */
944 			apic_hrtime_stamp++;
945 			apic_nsec_since_boot += apic_nsec_per_intr;
946 			apic_hrtime_stamp++;
947 			last_count_read = apic_hertz_count;
948 			apic_redistribute_compute();
949 		}
950 
951 		/* We will avoid all the book keeping overhead for clock */
952 		nipl = apic_ipls[vector];
953 
954 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
955 		if (apic_mode == LOCAL_APIC) {
956 #if defined(__amd64)
957 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
958 			    APIC_IPL_SHIFT));
959 #else
960 			if (apic_have_32bit_cr8)
961 				setcr8((ulong_t)(apic_ipltopri[nipl] >>
962 				    APIC_IPL_SHIFT));
963 			else
964 				LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
965 				    (uint32_t)apic_ipltopri[nipl]);
966 #endif
967 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
968 		} else {
969 			X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
970 			X2APIC_WRITE(APIC_EOI_REG, 0);
971 		}
972 
973 		return (nipl);
974 	}
975 
976 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
977 
978 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
979 		cpu_infop->aci_spur_cnt++;
980 		return (APIC_INT_SPURIOUS);
981 	}
982 
983 	/* Check if the vector we got is really what we need */
984 	if (apic_revector_pending) {
985 		/*
986 		 * Disable interrupts for the duration of
987 		 * the vector translation to prevent a self-race for
988 		 * the apic_revector_lock.  This cannot be done
989 		 * in apic_xlate_vector because it is recursive and
990 		 * we want the vector translation to be atomic with
991 		 * respect to other (higher-priority) interrupts.
992 		 */
993 		iflag = intr_clear();
994 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
995 		    APIC_BASE_VECT;
996 		intr_restore(iflag);
997 	}
998 
999 	nipl = apic_ipls[vector];
1000 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
1001 
1002 	if (apic_mode == LOCAL_APIC) {
1003 #if defined(__amd64)
1004 		setcr8((ulong_t)(apic_ipltopri[nipl] >> APIC_IPL_SHIFT));
1005 #else
1006 		if (apic_have_32bit_cr8)
1007 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
1008 			    APIC_IPL_SHIFT));
1009 		else
1010 			LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
1011 			    (uint32_t)apic_ipltopri[nipl]);
1012 #endif
1013 	} else {
1014 		X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
1015 	}
1016 
1017 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
1018 	cpu_infop->aci_curipl = (uchar_t)nipl;
1019 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
1020 
1021 	/*
1022 	 * apic_level_intr could have been assimilated into the irq struct.
1023 	 * but, having it as a character array is more efficient in terms of
1024 	 * cache usage. So, we leave it as is.
1025 	 */
1026 	if (!apic_level_intr[irq]) {
1027 		if (apic_mode == LOCAL_APIC) {
1028 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
1029 		} else {
1030 			X2APIC_WRITE(APIC_EOI_REG, 0);
1031 		}
1032 	}
1033 
1034 #ifdef	DEBUG
1035 	APIC_DEBUG_BUF_PUT(vector);
1036 	APIC_DEBUG_BUF_PUT(irq);
1037 	APIC_DEBUG_BUF_PUT(nipl);
1038 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
1039 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
1040 		drv_usecwait(apic_stretch_interrupts);
1041 
1042 	if (apic_break_on_cpu == psm_get_cpu_id())
1043 		apic_break();
1044 #endif /* DEBUG */
1045 	return (nipl);
1046 }
1047 
1048 /*
1049  * This macro is a common code used by MMIO local apic and X2APIC
1050  * local apic.
1051  */
1052 #define	APIC_INTR_EXIT() \
1053 { \
1054 	cpu_infop = &apic_cpus[psm_get_cpu_id()]; \
1055 	if (apic_level_intr[irq]) \
1056 		apic_reg_ops->apic_send_eoi(irq); \
1057 	cpu_infop->aci_curipl = (uchar_t)prev_ipl; \
1058 	/* ISR above current pri could not be in progress */ \
1059 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; \
1060 }
1061 
1062 /*
1063  * Any changes made to this function must also change X2APIC
1064  * version of intr_exit.
1065  */
1066 void
1067 apic_intr_exit(int prev_ipl, int irq)
1068 {
1069 	apic_cpus_info_t *cpu_infop;
1070 
1071 #if defined(__amd64)
1072 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
1073 #else
1074 	if (apic_have_32bit_cr8)
1075 		setcr8((ulong_t)(apic_ipltopri[prev_ipl] >> APIC_IPL_SHIFT));
1076 	else
1077 		apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
1078 #endif
1079 
1080 	APIC_INTR_EXIT();
1081 }
1082 
1083 /*
1084  * Same as apic_intr_exit() except it uses MSR rather than MMIO
1085  * to access local apic registers.
1086  */
1087 void
1088 x2apic_intr_exit(int prev_ipl, int irq)
1089 {
1090 	apic_cpus_info_t *cpu_infop;
1091 
1092 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[prev_ipl]);
1093 	APIC_INTR_EXIT();
1094 }
1095 
1096 intr_exit_fn_t
1097 psm_intr_exit_fn(void)
1098 {
1099 	if (apic_mode == LOCAL_X2APIC)
1100 		return (x2apic_intr_exit);
1101 
1102 	return (apic_intr_exit);
1103 }
1104 
1105 /*
1106  * Mask all interrupts below or equal to the given IPL.
1107  * Any changes made to this function must also change X2APIC
1108  * version of setspl.
1109  */
1110 static void
1111 apic_setspl(int ipl)
1112 {
1113 #if defined(__amd64)
1114 	setcr8((ulong_t)apic_cr8pri[ipl]);
1115 #else
1116 	if (apic_have_32bit_cr8)
1117 		setcr8((ulong_t)(apic_ipltopri[ipl] >> APIC_IPL_SHIFT));
1118 	else
1119 		apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
1120 #endif
1121 
1122 	/* interrupts at ipl above this cannot be in progress */
1123 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
1124 	/*
1125 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
1126 	 * have enough time to come in before the priority is raised again
1127 	 * during the idle() loop.
1128 	 */
1129 	if (apic_setspl_delay)
1130 		(void) apic_reg_ops->apic_get_pri();
1131 }
1132 
1133 /*
1134  * X2APIC version of setspl.
1135  * Mask all interrupts below or equal to the given IPL
1136  */
1137 static void
1138 x2apic_setspl(int ipl)
1139 {
1140 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[ipl]);
1141 
1142 	/* interrupts at ipl above this cannot be in progress */
1143 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
1144 }
1145 
1146 /*
1147  * generates an interprocessor interrupt to another CPU. Any changes made to
1148  * this routine must be accompanied by similar changes to
1149  * apic_common_send_ipi().
1150  */
1151 static void
1152 apic_send_ipi(int cpun, int ipl)
1153 {
1154 	int vector;
1155 	ulong_t flag;
1156 
1157 	vector = apic_resv_vector[ipl];
1158 
1159 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
1160 
1161 	flag = intr_clear();
1162 
1163 	APIC_AV_PENDING_SET();
1164 
1165 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
1166 	    vector);
1167 
1168 	intr_restore(flag);
1169 }
1170 
1171 
1172 /*ARGSUSED*/
1173 static void
1174 apic_set_idlecpu(processorid_t cpun)
1175 {
1176 }
1177 
1178 /*ARGSUSED*/
1179 static void
1180 apic_unset_idlecpu(processorid_t cpun)
1181 {
1182 }
1183 
1184 
1185 void
1186 apic_ret()
1187 {
1188 }
1189 
1190 /*
1191  * If apic_coarse_time == 1, then apic_gettime() is used instead of
1192  * apic_gethrtime().  This is used for performance instead of accuracy.
1193  */
1194 
1195 static hrtime_t
1196 apic_gettime()
1197 {
1198 	int old_hrtime_stamp;
1199 	hrtime_t temp;
1200 
1201 	/*
1202 	 * In one-shot mode, we do not keep time, so if anyone
1203 	 * calls psm_gettime() directly, we vector over to
1204 	 * gethrtime().
1205 	 * one-shot mode MUST NOT be enabled if this psm is the source of
1206 	 * hrtime.
1207 	 */
1208 
1209 	if (apic_oneshot)
1210 		return (gethrtime());
1211 
1212 
1213 gettime_again:
1214 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
1215 		apic_ret();
1216 
1217 	temp = apic_nsec_since_boot;
1218 
1219 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
1220 		goto gettime_again;
1221 	}
1222 	return (temp);
1223 }
1224 
1225 /*
1226  * Here we return the number of nanoseconds since booting.  Note every
1227  * clock interrupt increments apic_nsec_since_boot by the appropriate
1228  * amount.
1229  */
1230 static hrtime_t
1231 apic_gethrtime()
1232 {
1233 	int curr_timeval, countval, elapsed_ticks;
1234 	int old_hrtime_stamp, status;
1235 	hrtime_t temp;
1236 	uint32_t cpun;
1237 	ulong_t oflags;
1238 
1239 	/*
1240 	 * In one-shot mode, we do not keep time, so if anyone
1241 	 * calls psm_gethrtime() directly, we vector over to
1242 	 * gethrtime().
1243 	 * one-shot mode MUST NOT be enabled if this psm is the source of
1244 	 * hrtime.
1245 	 */
1246 
1247 	if (apic_oneshot)
1248 		return (gethrtime());
1249 
1250 	oflags = intr_clear();	/* prevent migration */
1251 
1252 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
1253 	if (apic_mode == LOCAL_APIC)
1254 		cpun >>= APIC_ID_BIT_OFFSET;
1255 
1256 	lock_set(&apic_gethrtime_lock);
1257 
1258 gethrtime_again:
1259 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
1260 		apic_ret();
1261 
1262 	/*
1263 	 * Check to see which CPU we are on.  Note the time is kept on
1264 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
1265 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
1266 	 */
1267 	if (cpun == apic_cpus[0].aci_local_id) {
1268 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1269 	} else {
1270 #ifdef	DEBUG
1271 		APIC_AV_PENDING_SET();
1272 #else
1273 		if (apic_mode == LOCAL_APIC)
1274 			APIC_AV_PENDING_SET();
1275 #endif /* DEBUG */
1276 
1277 		apic_reg_ops->apic_write_int_cmd(
1278 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
1279 
1280 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
1281 		    & AV_READ_PENDING) {
1282 			apic_ret();
1283 		}
1284 
1285 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
1286 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
1287 		else {	/* 0 = invalid */
1288 			apic_remote_hrterr++;
1289 			/*
1290 			 * return last hrtime right now, will need more
1291 			 * testing if change to retry
1292 			 */
1293 			temp = apic_last_hrtime;
1294 
1295 			lock_clear(&apic_gethrtime_lock);
1296 
1297 			intr_restore(oflags);
1298 
1299 			return (temp);
1300 		}
1301 	}
1302 	if (countval > last_count_read)
1303 		countval = 0;
1304 	else
1305 		last_count_read = countval;
1306 
1307 	elapsed_ticks = apic_hertz_count - countval;
1308 
1309 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
1310 	temp = apic_nsec_since_boot + curr_timeval;
1311 
1312 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
1313 		/* we might have clobbered last_count_read. Restore it */
1314 		last_count_read = apic_hertz_count;
1315 		goto gethrtime_again;
1316 	}
1317 
1318 	if (temp < apic_last_hrtime) {
1319 		/* return last hrtime if error occurs */
1320 		apic_hrtime_error++;
1321 		temp = apic_last_hrtime;
1322 	}
1323 	else
1324 		apic_last_hrtime = temp;
1325 
1326 	lock_clear(&apic_gethrtime_lock);
1327 	intr_restore(oflags);
1328 
1329 	return (temp);
1330 }
1331 
1332 /* apic NMI handler */
1333 /*ARGSUSED*/
1334 static void
1335 apic_nmi_intr(caddr_t arg, struct regs *rp)
1336 {
1337 	if (apic_shutdown_processors) {
1338 		apic_disable_local_apic();
1339 		return;
1340 	}
1341 
1342 	apic_error |= APIC_ERR_NMI;
1343 
1344 	if (!lock_try(&apic_nmi_lock))
1345 		return;
1346 	apic_num_nmis++;
1347 
1348 	if (apic_kmdb_on_nmi && psm_debugger()) {
1349 		debug_enter("NMI received: entering kmdb\n");
1350 	} else if (apic_panic_on_nmi) {
1351 		/* Keep panic from entering kmdb. */
1352 		nopanicdebug = 1;
1353 		panic("NMI received\n");
1354 	} else {
1355 		/*
1356 		 * prom_printf is the best shot we have of something which is
1357 		 * problem free from high level/NMI type of interrupts
1358 		 */
1359 		prom_printf("NMI received\n");
1360 	}
1361 
1362 	lock_clear(&apic_nmi_lock);
1363 }
1364 
1365 /*ARGSUSED*/
1366 static int
1367 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
1368 {
1369 	return (apic_addspl_common(irqno, ipl, min_ipl, max_ipl));
1370 }
1371 
1372 static int
1373 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
1374 {
1375 	return (apic_delspl_common(irqno, ipl, min_ipl,  max_ipl));
1376 }
1377 
1378 static int
1379 apic_post_cpu_start()
1380 {
1381 	int cpun;
1382 	static int cpus_started = 1;
1383 	struct psm_ops *pops = &apic_ops;
1384 
1385 	/* We know this CPU + BSP  started successfully. */
1386 	cpus_started++;
1387 
1388 	/*
1389 	 * On BSP we would have enabled X2APIC, if supported by processor,
1390 	 * in acpi_probe(), but on AP we do it here.
1391 	 *
1392 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1393 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1394 	 */
1395 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1396 	    apic_local_mode() == LOCAL_APIC) {
1397 		apic_enable_x2apic();
1398 	}
1399 
1400 	/*
1401 	 * We change psm_send_ipi and send_dirintf only if Solaris
1402 	 * is booted in kmdb & the current CPU is the last CPU being
1403 	 * brought up. We don't need to do anything if Solaris is running
1404 	 * in MMIO mode (xAPIC).
1405 	 */
1406 	if ((boothowto & RB_DEBUG) &&
1407 	    (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
1408 	    apic_mode == LOCAL_X2APIC) {
1409 		/*
1410 		 * We no longer need help from apic_common_send_ipi()
1411 		 * since we will not start any more CPUs.
1412 		 *
1413 		 * We will need to revisit this if we start supporting
1414 		 * hot-plugging of CPUs.
1415 		 */
1416 		pops->psm_send_ipi = x2apic_send_ipi;
1417 		send_dirintf = pops->psm_send_ipi;
1418 	}
1419 
1420 	splx(ipltospl(LOCK_LEVEL));
1421 	apic_init_intr();
1422 
1423 	/*
1424 	 * since some systems don't enable the internal cache on the non-boot
1425 	 * cpus, so we have to enable them here
1426 	 */
1427 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1428 
1429 #ifdef	DEBUG
1430 	APIC_AV_PENDING_SET();
1431 #else
1432 	if (apic_mode == LOCAL_APIC)
1433 		APIC_AV_PENDING_SET();
1434 #endif	/* DEBUG */
1435 
1436 	/*
1437 	 * We may be booting, or resuming from suspend; aci_status will
1438 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1439 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1440 	 */
1441 	cpun = psm_get_cpu_id();
1442 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1443 
1444 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1445 	return (PSM_SUCCESS);
1446 }
1447 
1448 processorid_t
1449 apic_get_next_processorid(processorid_t cpu_id)
1450 {
1451 
1452 	int i;
1453 
1454 	if (cpu_id == -1)
1455 		return ((processorid_t)0);
1456 
1457 	for (i = cpu_id + 1; i < NCPU; i++) {
1458 		if (CPU_IN_SET(apic_cpumask, i))
1459 			return (i);
1460 	}
1461 
1462 	return ((processorid_t)-1);
1463 }
1464 
1465 
1466 /*
1467  * type == -1 indicates it is an internal request. Do not change
1468  * resv_vector for these requests
1469  */
1470 static int
1471 apic_get_ipivect(int ipl, int type)
1472 {
1473 	uchar_t vector;
1474 	int irq;
1475 
1476 	if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) {
1477 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
1478 			apic_irq_table[irq]->airq_mps_intr_index =
1479 			    RESERVE_INDEX;
1480 			apic_irq_table[irq]->airq_vector = vector;
1481 			if (type != -1) {
1482 				apic_resv_vector[ipl] = vector;
1483 			}
1484 			return (irq);
1485 		}
1486 	}
1487 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
1488 	return (-1);	/* shouldn't happen */
1489 }
1490 
1491 static int
1492 apic_getclkirq(int ipl)
1493 {
1494 	int	irq;
1495 
1496 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
1497 		return (-1);
1498 	/*
1499 	 * Note the vector in apic_clkvect for per clock handling.
1500 	 */
1501 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
1502 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
1503 	    apic_clkvect));
1504 	return (irq);
1505 }
1506 
1507 
1508 /*
1509  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1510  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1511  */
1512 static uint_t
1513 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1514 {
1515 	uint8_t		pit_tick_lo;
1516 	uint16_t	pit_tick, target_pit_tick;
1517 	uint32_t	start_apic_tick, end_apic_tick;
1518 	ulong_t		iflag;
1519 	uint32_t	reg;
1520 
1521 	reg = addr + APIC_CURR_COUNT - apicadr;
1522 
1523 	iflag = intr_clear();
1524 
1525 	do {
1526 		pit_tick_lo = inb(PITCTR0_PORT);
1527 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1528 	} while (pit_tick < APIC_TIME_MIN ||
1529 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1530 
1531 	/*
1532 	 * Wait for the 8254 to decrement by 5 ticks to ensure
1533 	 * we didn't start in the middle of a tick.
1534 	 * Compare with 0x10 for the wrap around case.
1535 	 */
1536 	target_pit_tick = pit_tick - 5;
1537 	do {
1538 		pit_tick_lo = inb(PITCTR0_PORT);
1539 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1540 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1541 
1542 	start_apic_tick = apic_reg_ops->apic_read(reg);
1543 
1544 	/*
1545 	 * Wait for the 8254 to decrement by
1546 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1547 	 */
1548 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1549 	do {
1550 		pit_tick_lo = inb(PITCTR0_PORT);
1551 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1552 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1553 
1554 	end_apic_tick = apic_reg_ops->apic_read(reg);
1555 
1556 	*pit_ticks_adj = target_pit_tick - pit_tick;
1557 
1558 	intr_restore(iflag);
1559 
1560 	return (start_apic_tick - end_apic_tick);
1561 }
1562 
1563 /*
1564  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1565  * frequency.  Note at this stage in the boot sequence, the boot processor
1566  * is the only active processor.
1567  * hertz value of 0 indicates a one-shot mode request.  In this case
1568  * the function returns the resolution (in nanoseconds) for the hardware
1569  * timer interrupt.  If one-shot mode capability is not available,
1570  * the return value will be 0. apic_enable_oneshot is a global switch
1571  * for disabling the functionality.
1572  * A non-zero positive value for hertz indicates a periodic mode request.
1573  * In this case the hardware will be programmed to generate clock interrupts
1574  * at hertz frequency and returns the resolution of interrupts in
1575  * nanosecond.
1576  */
1577 
1578 static int
1579 apic_clkinit(int hertz)
1580 {
1581 	uint_t		apic_ticks = 0;
1582 	uint_t		pit_ticks;
1583 	int		ret;
1584 	uint16_t	pit_ticks_adj;
1585 	static int	firsttime = 1;
1586 
1587 	if (firsttime) {
1588 		/* first time calibrate on CPU0 only */
1589 
1590 		apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1591 		apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1592 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
1593 
1594 		/* total number of PIT ticks corresponding to apic_ticks */
1595 		pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
1596 
1597 		/*
1598 		 * Determine the number of nanoseconds per APIC clock tick
1599 		 * and then determine how many APIC ticks to interrupt at the
1600 		 * desired frequency
1601 		 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
1602 		 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
1603 		 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
1604 		 * pic_ticks_per_SFns =
1605 		 *   (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
1606 		 */
1607 		apic_ticks_per_SFnsecs =
1608 		    ((SF * apic_ticks * PIT_HZ) /
1609 		    ((uint64_t)pit_ticks * NANOSEC));
1610 
1611 		/* the interval timer initial count is 32 bit max */
1612 		apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
1613 		firsttime = 0;
1614 	}
1615 
1616 	if (hertz != 0) {
1617 		/* periodic */
1618 		apic_nsec_per_intr = NANOSEC / hertz;
1619 		apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
1620 	}
1621 
1622 	apic_int_busy_mark = (apic_int_busy_mark *
1623 	    apic_sample_factor_redistribution) / 100;
1624 	apic_int_free_mark = (apic_int_free_mark *
1625 	    apic_sample_factor_redistribution) / 100;
1626 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1627 	    apic_sample_factor_redistribution) / 100;
1628 
1629 	if (hertz == 0) {
1630 		/* requested one_shot */
1631 		if (!tsc_gethrtime_enable || !apic_oneshot_enable)
1632 			return (0);
1633 		apic_oneshot = 1;
1634 		ret = (int)APIC_TICKS_TO_NSECS(1);
1635 	} else {
1636 		/* program the local APIC to interrupt at the given frequency */
1637 		apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
1638 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
1639 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
1640 		apic_oneshot = 0;
1641 		ret = NANOSEC / hertz;
1642 	}
1643 
1644 	return (ret);
1645 
1646 }
1647 
1648 /*
1649  * apic_preshutdown:
1650  * Called early in shutdown whilst we can still access filesystems to do
1651  * things like loading modules which will be required to complete shutdown
1652  * after filesystems are all unmounted.
1653  */
1654 static void
1655 apic_preshutdown(int cmd, int fcn)
1656 {
1657 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1658 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1659 
1660 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
1661 		return;
1662 	}
1663 }
1664 
1665 static void
1666 apic_shutdown(int cmd, int fcn)
1667 {
1668 	int restarts, attempts;
1669 	int i;
1670 	uchar_t	byte;
1671 	ulong_t iflag;
1672 
1673 	hpet_acpi_fini();
1674 
1675 	/* Send NMI to all CPUs except self to do per processor shutdown */
1676 	iflag = intr_clear();
1677 #ifdef	DEBUG
1678 	APIC_AV_PENDING_SET();
1679 #else
1680 	if (apic_mode == LOCAL_APIC)
1681 		APIC_AV_PENDING_SET();
1682 #endif /* DEBUG */
1683 	apic_shutdown_processors = 1;
1684 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1685 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1686 
1687 	/* restore cmos shutdown byte before reboot */
1688 	if (apic_cmos_ssb_set) {
1689 		outb(CMOS_ADDR, SSB);
1690 		outb(CMOS_DATA, 0);
1691 	}
1692 
1693 	ioapic_disable_redirection();
1694 
1695 	/*	disable apic mode if imcr present	*/
1696 	if (apic_imcrp) {
1697 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1698 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1699 	}
1700 
1701 	apic_disable_local_apic();
1702 
1703 	intr_restore(iflag);
1704 
1705 	/* remainder of function is for shutdown cases only */
1706 	if (cmd != A_SHUTDOWN)
1707 		return;
1708 
1709 	/*
1710 	 * Switch system back into Legacy-Mode if using ACPI and
1711 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1712 	 * for power-off to succeed (Dell Dimension 4600)
1713 	 * Do not disable ACPI while doing fastreboot
1714 	 */
1715 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1716 		(void) AcpiDisable();
1717 
1718 	if (fcn == AD_FASTREBOOT) {
1719 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1720 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1721 	}
1722 
1723 	/* remainder of function is for shutdown+poweroff case only */
1724 	if (fcn != AD_POWEROFF)
1725 		return;
1726 
1727 	switch (apic_poweroff_method) {
1728 		case APIC_POWEROFF_VIA_RTC:
1729 
1730 			/* select the extended NVRAM bank in the RTC */
1731 			outb(CMOS_ADDR, RTC_REGA);
1732 			byte = inb(CMOS_DATA);
1733 			outb(CMOS_DATA, (byte | EXT_BANK));
1734 
1735 			outb(CMOS_ADDR, PFR_REG);
1736 
1737 			/* for Predator must toggle the PAB bit */
1738 			byte = inb(CMOS_DATA);
1739 
1740 			/*
1741 			 * clear power active bar, wakeup alarm and
1742 			 * kickstart
1743 			 */
1744 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1745 			outb(CMOS_DATA, byte);
1746 
1747 			/* delay before next write */
1748 			drv_usecwait(1000);
1749 
1750 			/* for S40 the following would suffice */
1751 			byte = inb(CMOS_DATA);
1752 
1753 			/* power active bar control bit */
1754 			byte |= PAB_CBIT;
1755 			outb(CMOS_DATA, byte);
1756 
1757 			break;
1758 
1759 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1760 			restarts = 0;
1761 restart_aspen_bmc:
1762 			if (++restarts == 3)
1763 				break;
1764 			attempts = 0;
1765 			do {
1766 				byte = inb(MISMIC_FLAG_REGISTER);
1767 				byte &= MISMIC_BUSY_MASK;
1768 				if (byte != 0) {
1769 					drv_usecwait(1000);
1770 					if (attempts >= 3)
1771 						goto restart_aspen_bmc;
1772 					++attempts;
1773 				}
1774 			} while (byte != 0);
1775 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1776 			byte = inb(MISMIC_FLAG_REGISTER);
1777 			byte |= 0x1;
1778 			outb(MISMIC_FLAG_REGISTER, byte);
1779 			i = 0;
1780 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1781 			    i++) {
1782 				attempts = 0;
1783 				do {
1784 					byte = inb(MISMIC_FLAG_REGISTER);
1785 					byte &= MISMIC_BUSY_MASK;
1786 					if (byte != 0) {
1787 						drv_usecwait(1000);
1788 						if (attempts >= 3)
1789 							goto restart_aspen_bmc;
1790 						++attempts;
1791 					}
1792 				} while (byte != 0);
1793 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1794 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1795 				byte = inb(MISMIC_FLAG_REGISTER);
1796 				byte |= 0x1;
1797 				outb(MISMIC_FLAG_REGISTER, byte);
1798 			}
1799 			break;
1800 
1801 		case APIC_POWEROFF_VIA_SITKA_BMC:
1802 			restarts = 0;
1803 restart_sitka_bmc:
1804 			if (++restarts == 3)
1805 				break;
1806 			attempts = 0;
1807 			do {
1808 				byte = inb(SMS_STATUS_REGISTER);
1809 				byte &= SMS_STATE_MASK;
1810 				if ((byte == SMS_READ_STATE) ||
1811 				    (byte == SMS_WRITE_STATE)) {
1812 					drv_usecwait(1000);
1813 					if (attempts >= 3)
1814 						goto restart_sitka_bmc;
1815 					++attempts;
1816 				}
1817 			} while ((byte == SMS_READ_STATE) ||
1818 			    (byte == SMS_WRITE_STATE));
1819 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1820 			i = 0;
1821 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1822 			    i++) {
1823 				attempts = 0;
1824 				do {
1825 					byte = inb(SMS_STATUS_REGISTER);
1826 					byte &= SMS_IBF_MASK;
1827 					if (byte != 0) {
1828 						drv_usecwait(1000);
1829 						if (attempts >= 3)
1830 							goto restart_sitka_bmc;
1831 						++attempts;
1832 					}
1833 				} while (byte != 0);
1834 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1835 			}
1836 			break;
1837 
1838 		case APIC_POWEROFF_NONE:
1839 
1840 			/* If no APIC direct method, we will try using ACPI */
1841 			if (apic_enable_acpi) {
1842 				if (acpi_poweroff() == 1)
1843 					return;
1844 			} else
1845 				return;
1846 
1847 			break;
1848 	}
1849 	/*
1850 	 * Wait a limited time here for power to go off.
1851 	 * If the power does not go off, then there was a
1852 	 * problem and we should continue to the halt which
1853 	 * prints a message for the user to press a key to
1854 	 * reboot.
1855 	 */
1856 	drv_usecwait(7000000); /* wait seven seconds */
1857 
1858 }
1859 
1860 /*
1861  * Try and disable all interrupts. We just assign interrupts to other
1862  * processors based on policy. If any were bound by user request, we
1863  * let them continue and return failure. We do not bother to check
1864  * for cache affinity while rebinding.
1865  */
1866 
1867 static int
1868 apic_disable_intr(processorid_t cpun)
1869 {
1870 	int bind_cpu = 0, i, hardbound = 0;
1871 	apic_irq_t *irq_ptr;
1872 	ulong_t iflag;
1873 
1874 	iflag = intr_clear();
1875 	lock_set(&apic_ioapic_lock);
1876 
1877 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
1878 		if (apic_reprogram_info[i].done == B_FALSE) {
1879 			if (apic_reprogram_info[i].bindcpu == cpun) {
1880 				/*
1881 				 * CPU is busy -- it's the target of
1882 				 * a pending reprogramming attempt
1883 				 */
1884 				lock_clear(&apic_ioapic_lock);
1885 				intr_restore(iflag);
1886 				return (PSM_FAILURE);
1887 			}
1888 		}
1889 	}
1890 
1891 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
1892 
1893 	apic_cpus[cpun].aci_curipl = 0;
1894 
1895 	i = apic_min_device_irq;
1896 	for (; i <= apic_max_device_irq; i++) {
1897 		/*
1898 		 * If there are bound interrupts on this cpu, then
1899 		 * rebind them to other processors.
1900 		 */
1901 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
1902 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
1903 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
1904 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
1905 			    apic_nproc));
1906 
1907 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
1908 				hardbound = 1;
1909 				continue;
1910 			}
1911 
1912 			if (irq_ptr->airq_temp_cpu == cpun) {
1913 				do {
1914 					bind_cpu = apic_next_bind_cpu++;
1915 					if (bind_cpu >= apic_nproc) {
1916 						apic_next_bind_cpu = 1;
1917 						bind_cpu = 0;
1918 
1919 					}
1920 				} while (apic_rebind_all(irq_ptr, bind_cpu));
1921 			}
1922 		}
1923 	}
1924 
1925 	lock_clear(&apic_ioapic_lock);
1926 	intr_restore(iflag);
1927 
1928 	if (hardbound) {
1929 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
1930 		    "due to user bound interrupts", cpun);
1931 		return (PSM_FAILURE);
1932 	}
1933 	else
1934 		return (PSM_SUCCESS);
1935 }
1936 
1937 /*
1938  * Bind interrupts to the CPU's local APIC.
1939  * Interrupts should not be bound to a CPU's local APIC until the CPU
1940  * is ready to receive interrupts.
1941  */
1942 static void
1943 apic_enable_intr(processorid_t cpun)
1944 {
1945 	int	i;
1946 	apic_irq_t *irq_ptr;
1947 	ulong_t iflag;
1948 
1949 	iflag = intr_clear();
1950 	lock_set(&apic_ioapic_lock);
1951 
1952 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
1953 
1954 	i = apic_min_device_irq;
1955 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
1956 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
1957 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
1958 				(void) apic_rebind_all(irq_ptr,
1959 				    irq_ptr->airq_cpu);
1960 			}
1961 		}
1962 	}
1963 
1964 	lock_clear(&apic_ioapic_lock);
1965 	intr_restore(iflag);
1966 }
1967 
1968 
1969 /*
1970  * This function will reprogram the timer.
1971  *
1972  * When in oneshot mode the argument is the absolute time in future to
1973  * generate the interrupt at.
1974  *
1975  * When in periodic mode, the argument is the interval at which the
1976  * interrupts should be generated. There is no need to support the periodic
1977  * mode timer change at this time.
1978  */
1979 static void
1980 apic_timer_reprogram(hrtime_t time)
1981 {
1982 	hrtime_t now;
1983 	uint_t ticks;
1984 	int64_t delta;
1985 
1986 	/*
1987 	 * We should be called from high PIL context (CBE_HIGH_PIL),
1988 	 * so kpreempt is disabled.
1989 	 */
1990 
1991 	if (!apic_oneshot) {
1992 		/* time is the interval for periodic mode */
1993 		ticks = APIC_NSECS_TO_TICKS(time);
1994 	} else {
1995 		/* one shot mode */
1996 
1997 		now = gethrtime();
1998 		delta = time - now;
1999 
2000 		if (delta <= 0) {
2001 			/*
2002 			 * requested to generate an interrupt in the past
2003 			 * generate an interrupt as soon as possible
2004 			 */
2005 			ticks = apic_min_timer_ticks;
2006 		} else if (delta > apic_nsec_max) {
2007 			/*
2008 			 * requested to generate an interrupt at a time
2009 			 * further than what we are capable of. Set to max
2010 			 * the hardware can handle
2011 			 */
2012 
2013 			ticks = APIC_MAXVAL;
2014 #ifdef DEBUG
2015 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
2016 			    "  %lld  too far in future, current time"
2017 			    "  %lld \n", time, now);
2018 #endif
2019 		} else
2020 			ticks = APIC_NSECS_TO_TICKS(delta);
2021 	}
2022 
2023 	if (ticks < apic_min_timer_ticks)
2024 		ticks = apic_min_timer_ticks;
2025 
2026 	apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
2027 }
2028 
2029 /*
2030  * This function will enable timer interrupts.
2031  */
2032 static void
2033 apic_timer_enable(void)
2034 {
2035 	/*
2036 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
2037 	 * so kpreempt is disabled.
2038 	 */
2039 
2040 	if (!apic_oneshot) {
2041 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2042 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
2043 	} else {
2044 		/* one shot */
2045 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2046 		    (apic_clkvect + APIC_BASE_VECT));
2047 	}
2048 }
2049 
2050 /*
2051  * This function will disable timer interrupts.
2052  */
2053 static void
2054 apic_timer_disable(void)
2055 {
2056 	/*
2057 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
2058 	 * so kpreempt is disabled.
2059 	 */
2060 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2061 	    (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
2062 }
2063 
2064 /*
2065  * Set timer far into the future and return timer
2066  * current Count in nanoseconds.
2067  */
2068 hrtime_t
2069 apic_timer_stop_count(void)
2070 {
2071 	hrtime_t	ns_val;
2072 	int		enable_val, count_val;
2073 
2074 	/*
2075 	 * Should be called with interrupts disabled.
2076 	 */
2077 	ASSERT(!interrupts_enabled());
2078 
2079 	enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
2080 	if ((enable_val & AV_MASK) == AV_MASK)
2081 		return ((hrtime_t)-1);		/* timer is disabled */
2082 
2083 	count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
2084 	ns_val = APIC_TICKS_TO_NSECS(count_val);
2085 
2086 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
2087 
2088 	return (ns_val);
2089 }
2090 
2091 /*
2092  * Reprogram timer after Deep C-State.
2093  */
2094 void
2095 apic_timer_restart(hrtime_t time)
2096 {
2097 	apic_timer_reprogram(time);
2098 }
2099 
2100 ddi_periodic_t apic_periodic_id;
2101 
2102 /*
2103  * If this module needs a periodic handler for the interrupt distribution, it
2104  * can be added here. The argument to the periodic handler is not currently
2105  * used, but is reserved for future.
2106  */
2107 static void
2108 apic_post_cyclic_setup(void *arg)
2109 {
2110 _NOTE(ARGUNUSED(arg))
2111 	/* cpu_lock is held */
2112 	/* set up a periodic handler for intr redistribution */
2113 
2114 	/*
2115 	 * In peridoc mode intr redistribution processing is done in
2116 	 * apic_intr_enter during clk intr processing
2117 	 */
2118 	if (!apic_oneshot)
2119 		return;
2120 	/*
2121 	 * Register a periodical handler for the redistribution processing.
2122 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
2123 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
2124 	 */
2125 	apic_periodic_id = ddi_periodic_add(
2126 	    (void (*)(void *))apic_redistribute_compute, NULL,
2127 	    apic_redistribute_sample_interval, DDI_IPL_2);
2128 }
2129 
2130 static void
2131 apic_redistribute_compute(void)
2132 {
2133 	int	i, j, max_busy;
2134 
2135 	if (apic_enable_dynamic_migration) {
2136 		if (++apic_nticks == apic_sample_factor_redistribution) {
2137 			/*
2138 			 * Time to call apic_intr_redistribute().
2139 			 * reset apic_nticks. This will cause max_busy
2140 			 * to be calculated below and if it is more than
2141 			 * apic_int_busy, we will do the whole thing
2142 			 */
2143 			apic_nticks = 0;
2144 		}
2145 		max_busy = 0;
2146 		for (i = 0; i < apic_nproc; i++) {
2147 
2148 			/*
2149 			 * Check if curipl is non zero & if ISR is in
2150 			 * progress
2151 			 */
2152 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
2153 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
2154 
2155 				int	irq;
2156 				apic_cpus[i].aci_busy++;
2157 				irq = apic_cpus[i].aci_current[j];
2158 				apic_irq_table[irq]->airq_busy++;
2159 			}
2160 
2161 			if (!apic_nticks &&
2162 			    (apic_cpus[i].aci_busy > max_busy))
2163 				max_busy = apic_cpus[i].aci_busy;
2164 		}
2165 		if (!apic_nticks) {
2166 			if (max_busy > apic_int_busy_mark) {
2167 			/*
2168 			 * We could make the following check be
2169 			 * skipped > 1 in which case, we get a
2170 			 * redistribution at half the busy mark (due to
2171 			 * double interval). Need to be able to collect
2172 			 * more empirical data to decide if that is a
2173 			 * good strategy. Punt for now.
2174 			 */
2175 				if (apic_skipped_redistribute) {
2176 					apic_cleanup_busy();
2177 					apic_skipped_redistribute = 0;
2178 				} else {
2179 					apic_intr_redistribute();
2180 				}
2181 			} else
2182 				apic_skipped_redistribute++;
2183 		}
2184 	}
2185 }
2186 
2187 
2188 /*
2189  * The following functions are in the platform specific file so that they
2190  * can be different functions depending on whether we are running on
2191  * bare metal or a hypervisor.
2192  */
2193 
2194 /*
2195  * map an apic for memory-mapped access
2196  */
2197 uint32_t *
2198 mapin_apic(uint32_t addr, size_t len, int flags)
2199 {
2200 	/*LINTED: pointer cast may result in improper alignment */
2201 	return ((uint32_t *)psm_map_phys(addr, len, flags));
2202 }
2203 
2204 uint32_t *
2205 mapin_ioapic(uint32_t addr, size_t len, int flags)
2206 {
2207 	return (mapin_apic(addr, len, flags));
2208 }
2209 
2210 /*
2211  * unmap an apic
2212  */
2213 void
2214 mapout_apic(caddr_t addr, size_t len)
2215 {
2216 	psm_unmap_phys(addr, len);
2217 }
2218 
2219 void
2220 mapout_ioapic(caddr_t addr, size_t len)
2221 {
2222 	mapout_apic(addr, len);
2223 }
2224 
2225 /*
2226  * Check to make sure there are enough irq slots
2227  */
2228 int
2229 apic_check_free_irqs(int count)
2230 {
2231 	int i, avail;
2232 
2233 	avail = 0;
2234 	for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
2235 		if ((apic_irq_table[i] == NULL) ||
2236 		    apic_irq_table[i]->airq_mps_intr_index == FREE_INDEX) {
2237 			if (++avail >= count)
2238 				return (PSM_SUCCESS);
2239 		}
2240 	}
2241 	return (PSM_FAILURE);
2242 }
2243 
2244 /*
2245  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
2246  */
2247 int
2248 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
2249     int behavior)
2250 {
2251 	int	rcount, i;
2252 	uchar_t	start, irqno;
2253 	uint32_t cpu;
2254 	major_t	major;
2255 	apic_irq_t	*irqptr;
2256 
2257 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
2258 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
2259 	    (void *)dip, inum, pri, count, behavior));
2260 
2261 	if (count > 1) {
2262 		if (behavior == DDI_INTR_ALLOC_STRICT &&
2263 		    apic_multi_msi_enable == 0)
2264 			return (0);
2265 		if (apic_multi_msi_enable == 0)
2266 			count = 1;
2267 	}
2268 
2269 	if ((rcount = apic_navail_vector(dip, pri)) > count)
2270 		rcount = count;
2271 	else if (rcount == 0 || (rcount < count &&
2272 	    behavior == DDI_INTR_ALLOC_STRICT))
2273 		return (0);
2274 
2275 	/* if not ISP2, then round it down */
2276 	if (!ISP2(rcount))
2277 		rcount = 1 << (highbit(rcount) - 1);
2278 
2279 	mutex_enter(&airq_mutex);
2280 
2281 	for (start = 0; rcount > 0; rcount >>= 1) {
2282 		if ((start = apic_find_multi_vectors(pri, rcount)) != 0 ||
2283 		    behavior == DDI_INTR_ALLOC_STRICT)
2284 			break;
2285 	}
2286 
2287 	if (start == 0) {
2288 		/* no vector available */
2289 		mutex_exit(&airq_mutex);
2290 		return (0);
2291 	}
2292 
2293 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
2294 		/* not enough free irq slots available */
2295 		mutex_exit(&airq_mutex);
2296 		return (0);
2297 	}
2298 
2299 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
2300 	for (i = 0; i < rcount; i++) {
2301 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
2302 		    (uchar_t)-1) {
2303 			/*
2304 			 * shouldn't happen because of the
2305 			 * apic_check_free_irqs() check earlier
2306 			 */
2307 			mutex_exit(&airq_mutex);
2308 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
2309 			    "apic_allocate_irq failed\n"));
2310 			return (i);
2311 		}
2312 		apic_max_device_irq = max(irqno, apic_max_device_irq);
2313 		apic_min_device_irq = min(irqno, apic_min_device_irq);
2314 		irqptr = apic_irq_table[irqno];
2315 #ifdef	DEBUG
2316 		if (apic_vector_to_irq[start + i] != APIC_RESV_IRQ)
2317 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
2318 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
2319 #endif
2320 		apic_vector_to_irq[start + i] = (uchar_t)irqno;
2321 
2322 		irqptr->airq_vector = (uchar_t)(start + i);
2323 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
2324 		irqptr->airq_intin_no = (uchar_t)rcount;
2325 		irqptr->airq_ipl = pri;
2326 		irqptr->airq_vector = start + i;
2327 		irqptr->airq_origirq = (uchar_t)(inum + i);
2328 		irqptr->airq_share_id = 0;
2329 		irqptr->airq_mps_intr_index = MSI_INDEX;
2330 		irqptr->airq_dip = dip;
2331 		irqptr->airq_major = major;
2332 		if (i == 0) /* they all bound to the same cpu */
2333 			cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno,
2334 			    0xff, 0xff);
2335 		else
2336 			irqptr->airq_cpu = cpu;
2337 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
2338 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
2339 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
2340 		    irqptr->airq_origirq, pri));
2341 	}
2342 	mutex_exit(&airq_mutex);
2343 	return (rcount);
2344 }
2345 
2346 /*
2347  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
2348  */
2349 int
2350 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
2351     int behavior)
2352 {
2353 	int	rcount, i;
2354 	major_t	major;
2355 
2356 	mutex_enter(&airq_mutex);
2357 
2358 	if ((rcount = apic_navail_vector(dip, pri)) > count)
2359 		rcount = count;
2360 	else if (rcount == 0 || (rcount < count &&
2361 	    behavior == DDI_INTR_ALLOC_STRICT)) {
2362 		rcount = 0;
2363 		goto out;
2364 	}
2365 
2366 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
2367 		/* not enough free irq slots available */
2368 		rcount = 0;
2369 		goto out;
2370 	}
2371 
2372 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
2373 	for (i = 0; i < rcount; i++) {
2374 		uchar_t	vector, irqno;
2375 		apic_irq_t	*irqptr;
2376 
2377 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
2378 		    (uchar_t)-1) {
2379 			/*
2380 			 * shouldn't happen because of the
2381 			 * apic_check_free_irqs() check earlier
2382 			 */
2383 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
2384 			    "apic_allocate_irq failed\n"));
2385 			rcount = i;
2386 			goto out;
2387 		}
2388 		if ((vector = apic_allocate_vector(pri, irqno, 1)) == 0) {
2389 			/*
2390 			 * shouldn't happen because of the
2391 			 * apic_navail_vector() call earlier
2392 			 */
2393 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
2394 			    "apic_allocate_vector failed\n"));
2395 			rcount = i;
2396 			goto out;
2397 		}
2398 		apic_max_device_irq = max(irqno, apic_max_device_irq);
2399 		apic_min_device_irq = min(irqno, apic_min_device_irq);
2400 		irqptr = apic_irq_table[irqno];
2401 		irqptr->airq_vector = (uchar_t)vector;
2402 		irqptr->airq_ipl = pri;
2403 		irqptr->airq_origirq = (uchar_t)(inum + i);
2404 		irqptr->airq_share_id = 0;
2405 		irqptr->airq_mps_intr_index = MSIX_INDEX;
2406 		irqptr->airq_dip = dip;
2407 		irqptr->airq_major = major;
2408 		irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff);
2409 	}
2410 out:
2411 	mutex_exit(&airq_mutex);
2412 	return (rcount);
2413 }
2414 
2415 /*
2416  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
2417  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
2418  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
2419  * requests and allocated only when pri is set.
2420  */
2421 uchar_t
2422 apic_allocate_vector(int ipl, int irq, int pri)
2423 {
2424 	int	lowest, highest, i;
2425 
2426 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
2427 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
2428 
2429 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
2430 		lowest -= APIC_VECTOR_PER_IPL;
2431 
2432 #ifdef	DEBUG
2433 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
2434 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
2435 #endif /* DEBUG */
2436 	if (pri == 0)
2437 		highest -= APIC_HI_PRI_VECTS;
2438 
2439 	for (i = lowest; i < highest; i++) {
2440 		if (APIC_CHECK_RESERVE_VECTORS(i))
2441 			continue;
2442 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
2443 			apic_vector_to_irq[i] = (uchar_t)irq;
2444 			return (i);
2445 		}
2446 	}
2447 
2448 	return (0);
2449 }
2450 
2451 /* Mark vector as not being used by any irq */
2452 void
2453 apic_free_vector(uchar_t vector)
2454 {
2455 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
2456 }
2457 
2458 uint32_t
2459 ioapic_read(int ioapic_ix, uint32_t reg)
2460 {
2461 	volatile uint32_t *ioapic;
2462 
2463 	ioapic = apicioadr[ioapic_ix];
2464 	ioapic[APIC_IO_REG] = reg;
2465 	return (ioapic[APIC_IO_DATA]);
2466 }
2467 
2468 void
2469 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
2470 {
2471 	volatile uint32_t *ioapic;
2472 
2473 	ioapic = apicioadr[ioapic_ix];
2474 	ioapic[APIC_IO_REG] = reg;
2475 	ioapic[APIC_IO_DATA] = value;
2476 }
2477 
2478 void
2479 ioapic_write_eoi(int ioapic_ix, uint32_t value)
2480 {
2481 	volatile uint32_t *ioapic;
2482 
2483 	ioapic = apicioadr[ioapic_ix];
2484 	ioapic[APIC_IO_EOI] = value;
2485 }
2486 
2487 static processorid_t
2488 apic_find_cpu(int flag)
2489 {
2490 	processorid_t acid = 0;
2491 	int i;
2492 
2493 	/* Find the first CPU with the passed-in flag set */
2494 	for (i = 0; i < apic_nproc; i++) {
2495 		if (apic_cpus[i].aci_status & flag) {
2496 			acid = i;
2497 			break;
2498 		}
2499 	}
2500 
2501 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
2502 	return (acid);
2503 }
2504 
2505 /*
2506  * Call rebind to do the actual programming.
2507  * Must be called with interrupts disabled and apic_ioapic_lock held
2508  * 'p' is polymorphic -- if this function is called to process a deferred
2509  * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which
2510  * the irq pointer is retrieved.  If not doing deferred reprogramming,
2511  * p is of the type 'apic_irq_t *'.
2512  *
2513  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
2514  * and it protects apic_find_cpu() from a race in which a CPU can be taken
2515  * offline after a cpu is selected, but before apic_rebind is called to
2516  * bind interrupts to it.
2517  */
2518 int
2519 apic_setup_io_intr(void *p, int irq, boolean_t deferred)
2520 {
2521 	apic_irq_t *irqptr;
2522 	struct ioapic_reprogram_data *drep = NULL;
2523 	int rv;
2524 
2525 	if (deferred) {
2526 		drep = (struct ioapic_reprogram_data *)p;
2527 		ASSERT(drep != NULL);
2528 		irqptr = drep->irqp;
2529 	} else
2530 		irqptr = (apic_irq_t *)p;
2531 
2532 	ASSERT(irqptr != NULL);
2533 
2534 	rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep);
2535 	if (rv) {
2536 		/*
2537 		 * CPU is not up or interrupts are disabled. Fall back to
2538 		 * the first available CPU
2539 		 */
2540 		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
2541 		    drep);
2542 	}
2543 
2544 	return (rv);
2545 }
2546 
2547 
2548 uchar_t
2549 apic_modify_vector(uchar_t vector, int irq)
2550 {
2551 	apic_vector_to_irq[vector] = (uchar_t)irq;
2552 	return (vector);
2553 }
2554 
2555 char *
2556 apic_get_apic_type()
2557 {
2558 	return (apic_psm_info.p_mach_idstring);
2559 }
2560 
2561 void
2562 x2apic_update_psm()
2563 {
2564 	struct psm_ops *pops = &apic_ops;
2565 
2566 	ASSERT(pops != NULL);
2567 
2568 	/*
2569 	 * We don't need to do any magic if one of the following
2570 	 * conditions is true :
2571 	 * - Not being run under kernel debugger.
2572 	 * - MP is not set.
2573 	 * - Booted with one CPU only.
2574 	 * - One CPU configured.
2575 	 *
2576 	 * We set apic_common_send_ipi() since kernel debuggers
2577 	 * attempt to send IPIs to other slave CPUs during
2578 	 * entry (exit) from (to) debugger.
2579 	 */
2580 	if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
2581 	    apic_nproc == 1 || boot_ncpus == 1) {
2582 		pops->psm_send_ipi =  x2apic_send_ipi;
2583 	} else {
2584 		pops->psm_send_ipi =  apic_common_send_ipi;
2585 	}
2586 
2587 	pops->psm_intr_exit = x2apic_intr_exit;
2588 	pops->psm_setspl = x2apic_setspl;
2589 
2590 	send_dirintf = pops->psm_send_ipi;
2591 
2592 	apic_mode = LOCAL_X2APIC;
2593 	apic_change_ops();
2594 }
2595 
2596 static void
2597 apic_intrr_init(int apic_mode)
2598 {
2599 	if (psm_vt_ops != NULL) {
2600 		if (((apic_intrr_ops_t *)psm_vt_ops)->apic_intrr_init(apic_mode)
2601 		    == DDI_SUCCESS) {
2602 			apic_vt_ops = psm_vt_ops;
2603 			apic_vt_ops->apic_intrr_enable();
2604 		}
2605 	}
2606 }
2607 
2608 /*ARGSUSED*/
2609 static void
2610 apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
2611 {
2612 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
2613 }
2614 
2615 /*ARGSUSED*/
2616 static void
2617 apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
2618 {
2619 	mregs->mr_addr = MSI_ADDR_HDR |
2620 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
2621 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
2622 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
2623 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
2624 	    mregs->mr_data;
2625 }
2626