xref: /illumos-gate/usr/src/uts/i86pc/io/pcplusmp/apic.c (revision bb9b6b3f59b8820022416cea99b49c50fef6e391)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
29  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
30  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
31  * PSMI 1.5 extensions are supported in Solaris Nevada.
32  * PSMI 1.6 extensions are supported in Solaris Nevada.
33  */
34 #define	PSMI_1_6
35 
36 #include <sys/processor.h>
37 #include <sys/time.h>
38 #include <sys/psm.h>
39 #include <sys/smp_impldefs.h>
40 #include <sys/cram.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/psm_common.h>
44 #include <sys/apic.h>
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/archsystm.h>
57 #include <sys/trap.h>
58 #include <sys/machsystm.h>
59 #include <sys/sysmacros.h>
60 #include <sys/cpuvar.h>
61 #include <sys/rm_platter.h>
62 #include <sys/privregs.h>
63 #include <sys/note.h>
64 #include <sys/pci_intr_lib.h>
65 #include <sys/spl.h>
66 #include <sys/clock.h>
67 #include <sys/dditypes.h>
68 #include <sys/sunddi.h>
69 #include <sys/x_call.h>
70 #include <sys/reboot.h>
71 #include <sys/hpet.h>
72 
73 /*
74  *	Local Function Prototypes
75  */
76 static void apic_init_intr();
77 static void apic_nmi_intr(caddr_t arg, struct regs *rp);
78 
79 /*
80  *	standard MP entries
81  */
82 static int	apic_probe();
83 static int	apic_clkinit();
84 static int	apic_getclkirq(int ipl);
85 static uint_t	apic_calibrate(volatile uint32_t *addr,
86     uint16_t *pit_ticks_adj);
87 static hrtime_t apic_gettime();
88 static hrtime_t apic_gethrtime();
89 static void	apic_init();
90 static void	apic_picinit(void);
91 static int	apic_cpu_start(processorid_t, caddr_t);
92 static int	apic_post_cpu_start(void);
93 static void	apic_send_ipi(int cpun, int ipl);
94 static void	apic_set_idlecpu(processorid_t cpun);
95 static void	apic_unset_idlecpu(processorid_t cpun);
96 static int	apic_intr_enter(int ipl, int *vect);
97 static void	apic_setspl(int ipl);
98 static void	x2apic_setspl(int ipl);
99 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
100 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
101 static void	apic_shutdown(int cmd, int fcn);
102 static void	apic_preshutdown(int cmd, int fcn);
103 static int	apic_disable_intr(processorid_t cpun);
104 static void	apic_enable_intr(processorid_t cpun);
105 static processorid_t	apic_get_next_processorid(processorid_t cpun);
106 static int		apic_get_ipivect(int ipl, int type);
107 static void	apic_timer_reprogram(hrtime_t time);
108 static void	apic_timer_enable(void);
109 static void	apic_timer_disable(void);
110 static void	apic_post_cyclic_setup(void *arg);
111 static void	apic_intrr_init(int apic_mode);
112 static void	apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
113 static void	apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);
114 
115 static int	apic_oneshot = 0;
116 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
117 
118 /* Now the ones for Dynamic Interrupt distribution */
119 int	apic_enable_dynamic_migration = 0;
120 
121 extern int apic_have_32bit_cr8;
122 
123 /*
124  * These variables are frequently accessed in apic_intr_enter(),
125  * apic_intr_exit and apic_setspl, so group them together
126  */
127 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
128 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
129 int apic_clkvect;
130 
131 /* vector at which error interrupts come in */
132 int apic_errvect;
133 int apic_enable_error_intr = 1;
134 int apic_error_display_delay = 100;
135 
136 /* vector at which performance counter overflow interrupts come in */
137 int apic_cpcovf_vect;
138 int apic_enable_cpcovf_intr = 1;
139 
140 /* vector at which CMCI interrupts come in */
141 int apic_cmci_vect;
142 extern int cmi_enable_cmci;
143 extern void cmi_cmci_trap(void);
144 
145 static kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
146 static int cmci_cpu_setup_registered;
147 
148 /*
149  * The following vector assignments influence the value of ipltopri and
150  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
151  * idle to 0 and IPL 0 to 0xf to differentiate idle in case
152  * we care to do so in future. Note some IPLs which are rarely used
153  * will share the vector ranges and heavily used IPLs (5 and 6) have
154  * a wide range.
155  *
156  * This array is used to initialize apic_ipls[] (in apic_init()).
157  *
158  *	IPL		Vector range.		as passed to intr_enter
159  *	0		none.
160  *	1,2,3		0x20-0x2f		0x0-0xf
161  *	4		0x30-0x3f		0x10-0x1f
162  *	5		0x40-0x5f		0x20-0x3f
163  *	6		0x60-0x7f		0x40-0x5f
164  *	7,8,9		0x80-0x8f		0x60-0x6f
165  *	10		0x90-0x9f		0x70-0x7f
166  *	11		0xa0-0xaf		0x80-0x8f
167  *	...		...
168  *	15		0xe0-0xef		0xc0-0xcf
169  *	15		0xf0-0xff		0xd0-0xdf
170  */
171 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
172 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
173 };
174 	/*
175 	 * The ipl of an ISR at vector X is apic_vectortoipl[X>>4]
176 	 * NOTE that this is vector as passed into intr_enter which is
177 	 * programmed vector - 0x20 (APIC_BASE_VECT)
178 	 */
179 
180 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
181 	/* The taskpri to be programmed into apic to mask given ipl */
182 
183 #if defined(__amd64)
184 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
185 #endif
186 
187 /*
188  * Correlation of the hardware vector to the IPL in use, initialized
189  * from apic_vectortoipl[] in apic_init().  The final IPLs may not correlate
190  * to the IPLs in apic_vectortoipl on some systems that share interrupt lines
191  * connected to errata-stricken IOAPICs
192  */
193 uchar_t apic_ipls[APIC_AVAIL_VECTOR];
194 
195 /*
196  * Patchable global variables.
197  */
198 int	apic_forceload = 0;
199 
200 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
201 					/* 1 - use gettime() for performance */
202 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
203 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
204 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
205 int	apic_panic_on_nmi = 0;
206 int	apic_panic_on_apic_error = 0;
207 
208 int	apic_verbose = 0;
209 
210 /* minimum number of timer ticks to program to */
211 int apic_min_timer_ticks = 1;
212 /*
213  *	Local static data
214  */
215 static struct	psm_ops apic_ops = {
216 	apic_probe,
217 
218 	apic_init,
219 	apic_picinit,
220 	apic_intr_enter,
221 	apic_intr_exit,
222 	apic_setspl,
223 	apic_addspl,
224 	apic_delspl,
225 	apic_disable_intr,
226 	apic_enable_intr,
227 	(int (*)(int))NULL,		/* psm_softlvl_to_irq */
228 	(void (*)(int))NULL,		/* psm_set_softintr */
229 
230 	apic_set_idlecpu,
231 	apic_unset_idlecpu,
232 
233 	apic_clkinit,
234 	apic_getclkirq,
235 	(void (*)(void))NULL,		/* psm_hrtimeinit */
236 	apic_gethrtime,
237 
238 	apic_get_next_processorid,
239 	apic_cpu_start,
240 	apic_post_cpu_start,
241 	apic_shutdown,
242 	apic_get_ipivect,
243 	apic_send_ipi,
244 
245 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
246 	(void (*)(int, char *))NULL,	/* psm_notify_error */
247 	(void (*)(int))NULL,		/* psm_notify_func */
248 	apic_timer_reprogram,
249 	apic_timer_enable,
250 	apic_timer_disable,
251 	apic_post_cyclic_setup,
252 	apic_preshutdown,
253 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
254 	apic_state,			/* save, restore apic state for S3 */
255 };
256 
257 
258 static struct	psm_info apic_psm_info = {
259 	PSM_INFO_VER01_6,			/* version */
260 	PSM_OWN_EXCLUSIVE,			/* ownership */
261 	(struct psm_ops *)&apic_ops,		/* operation */
262 	APIC_PCPLUSMP_NAME,			/* machine name */
263 	"pcplusmp v1.4 compatible",
264 };
265 
266 static void *apic_hdlp;
267 
268 #ifdef DEBUG
269 int	apic_debug = 0;
270 int	apic_restrict_vector = 0;
271 
272 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
273 int	apic_debug_msgbufindex = 0;
274 
275 #endif /* DEBUG */
276 
277 apic_cpus_info_t	*apic_cpus;
278 
279 cpuset_t	apic_cpumask;
280 uint_t	apic_picinit_called;
281 
282 /* Flag to indicate that we need to shut down all processors */
283 static uint_t	apic_shutdown_processors;
284 
285 uint_t apic_nsec_per_intr = 0;
286 
287 /*
288  * apic_let_idle_redistribute can have the following values:
289  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
290  * apic_redistribute_lock prevents multiple idle cpus from redistributing
291  */
292 int	apic_num_idle_redistributions = 0;
293 static	int apic_let_idle_redistribute = 0;
294 static	uint_t apic_nticks = 0;
295 static	uint_t apic_skipped_redistribute = 0;
296 
297 /* to gather intr data and redistribute */
298 static void apic_redistribute_compute(void);
299 
300 static	uint_t last_count_read = 0;
301 static	lock_t	apic_gethrtime_lock;
302 volatile int	apic_hrtime_stamp = 0;
303 volatile hrtime_t apic_nsec_since_boot = 0;
304 static uint_t apic_hertz_count;
305 
306 uint64_t apic_ticks_per_SFnsecs;	/* # of ticks in SF nsecs */
307 
308 static hrtime_t apic_nsec_max;
309 
310 static	hrtime_t	apic_last_hrtime = 0;
311 int		apic_hrtime_error = 0;
312 int		apic_remote_hrterr = 0;
313 int		apic_num_nmis = 0;
314 int		apic_apic_error = 0;
315 int		apic_num_apic_errors = 0;
316 int		apic_num_cksum_errors = 0;
317 
318 int	apic_error = 0;
319 static	int	apic_cmos_ssb_set = 0;
320 
321 /* use to make sure only one cpu handles the nmi */
322 static	lock_t	apic_nmi_lock;
323 /* use to make sure only one cpu handles the error interrupt */
324 static	lock_t	apic_error_lock;
325 
326 static	struct {
327 	uchar_t	cntl;
328 	uchar_t	data;
329 } aspen_bmc[] = {
330 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
331 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
332 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
333 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
334 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
335 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
336 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
337 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
338 
339 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
340 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
341 };
342 
343 static	struct {
344 	int	port;
345 	uchar_t	data;
346 } sitka_bmc[] = {
347 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
348 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
349 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
350 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
351 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
352 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
353 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
354 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
355 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
356 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
357 
358 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
359 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
360 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
361 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
362 };
363 
364 /* Patchable global variables. */
365 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
366 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
367 
368 /* default apic ops without interrupt remapping */
369 static apic_intrr_ops_t apic_nointrr_ops = {
370 	(int (*)(int))return_instr,
371 	(void (*)(int))return_instr,
372 	(void (*)(apic_irq_t *))return_instr,
373 	(void (*)(apic_irq_t *, void *))return_instr,
374 	(void (*)(apic_irq_t *))return_instr,
375 	apic_record_ioapic_rdt,
376 	apic_record_msi,
377 };
378 
379 apic_intrr_ops_t *apic_vt_ops = &apic_nointrr_ops;
380 
381 /*
382  *	This is the loadable module wrapper
383  */
384 
385 int
386 _init(void)
387 {
388 	if (apic_coarse_hrtime)
389 		apic_ops.psm_gethrtime = &apic_gettime;
390 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
391 }
392 
393 int
394 _fini(void)
395 {
396 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
397 }
398 
399 int
400 _info(struct modinfo *modinfop)
401 {
402 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
403 }
404 
405 
406 static int
407 apic_probe()
408 {
409 	return (apic_probe_common(apic_psm_info.p_mach_idstring));
410 }
411 
412 void
413 apic_init()
414 {
415 	int i;
416 	int	j = 1;
417 
418 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
419 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
420 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
421 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
422 			/* get to highest vector at the same ipl */
423 			continue;
424 		for (; j <= apic_vectortoipl[i]; j++) {
425 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
426 			    APIC_BASE_VECT;
427 		}
428 	}
429 	for (; j < MAXIPL + 1; j++)
430 		/* fill up any empty ipltopri slots */
431 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
432 	apic_init_common();
433 #if defined(__amd64)
434 	/*
435 	 * Make cpu-specific interrupt info point to cr8pri vector
436 	 */
437 	for (i = 0; i <= MAXIPL; i++)
438 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
439 	CPU->cpu_pri_data = apic_cr8pri;
440 #else
441 	if (cpuid_have_cr8access(CPU))
442 		apic_have_32bit_cr8 = 1;
443 #endif	/* __amd64 */
444 }
445 
446 /*
447  * handler for APIC Error interrupt. Just print a warning and continue
448  */
449 static int
450 apic_error_intr()
451 {
452 	uint_t	error0, error1, error;
453 	uint_t	i;
454 
455 	/*
456 	 * We need to write before read as per 7.4.17 of system prog manual.
457 	 * We do both and or the results to be safe
458 	 */
459 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
460 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
461 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
462 	error = error0 | error1;
463 
464 	/*
465 	 * Clear the APIC error status (do this on all cpus that enter here)
466 	 * (two writes are required due to the semantics of accessing the
467 	 * error status register.)
468 	 */
469 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
470 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
471 
472 	/*
473 	 * Prevent more than 1 CPU from handling error interrupt causing
474 	 * double printing (interleave of characters from multiple
475 	 * CPU's when using prom_printf)
476 	 */
477 	if (lock_try(&apic_error_lock) == 0)
478 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
479 	if (error) {
480 #if	DEBUG
481 		if (apic_debug)
482 			debug_enter("pcplusmp: APIC Error interrupt received");
483 #endif /* DEBUG */
484 		if (apic_panic_on_apic_error)
485 			cmn_err(CE_PANIC,
486 			    "APIC Error interrupt on CPU %d. Status = %x\n",
487 			    psm_get_cpu_id(), error);
488 		else {
489 			if ((error & ~APIC_CS_ERRORS) == 0) {
490 				/* cksum error only */
491 				apic_error |= APIC_ERR_APIC_ERROR;
492 				apic_apic_error |= error;
493 				apic_num_apic_errors++;
494 				apic_num_cksum_errors++;
495 			} else {
496 				/*
497 				 * prom_printf is the best shot we have of
498 				 * something which is problem free from
499 				 * high level/NMI type of interrupts
500 				 */
501 				prom_printf("APIC Error interrupt on CPU %d. "
502 				    "Status 0 = %x, Status 1 = %x\n",
503 				    psm_get_cpu_id(), error0, error1);
504 				apic_error |= APIC_ERR_APIC_ERROR;
505 				apic_apic_error |= error;
506 				apic_num_apic_errors++;
507 				for (i = 0; i < apic_error_display_delay; i++) {
508 					tenmicrosec();
509 				}
510 				/*
511 				 * provide more delay next time limited to
512 				 * roughly 1 clock tick time
513 				 */
514 				if (apic_error_display_delay < 500)
515 					apic_error_display_delay *= 2;
516 			}
517 		}
518 		lock_clear(&apic_error_lock);
519 		return (DDI_INTR_CLAIMED);
520 	} else {
521 		lock_clear(&apic_error_lock);
522 		return (DDI_INTR_UNCLAIMED);
523 	}
524 	/* NOTREACHED */
525 }
526 
527 /*
528  * Turn off the mask bit in the performance counter Local Vector Table entry.
529  */
530 static void
531 apic_cpcovf_mask_clear(void)
532 {
533 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
534 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
535 }
536 
537 /*ARGSUSED*/
538 static int
539 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
540 {
541 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
542 	return (0);
543 }
544 
545 /*ARGSUSED*/
546 static int
547 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
548 {
549 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
550 	return (0);
551 }
552 
553 /*ARGSUSED*/
554 static int
555 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
556 {
557 	cpuset_t	cpu_set;
558 
559 	CPUSET_ONLY(cpu_set, cpuid);
560 
561 	switch (what) {
562 		case CPU_ON:
563 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
564 			    (xc_func_t)apic_cmci_enable);
565 			break;
566 
567 		case CPU_OFF:
568 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
569 			    (xc_func_t)apic_cmci_disable);
570 			break;
571 
572 		default:
573 			break;
574 	}
575 
576 	return (0);
577 }
578 
579 static void
580 apic_init_intr()
581 {
582 	processorid_t	cpun = psm_get_cpu_id();
583 	uint_t nlvt;
584 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
585 
586 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
587 
588 	if (apic_mode == LOCAL_APIC) {
589 		/*
590 		 * We are running APIC in MMIO mode.
591 		 */
592 		if (apic_flat_model) {
593 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
594 			    APIC_FLAT_MODEL);
595 		} else {
596 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
597 			    APIC_CLUSTER_MODEL);
598 		}
599 
600 		apic_reg_ops->apic_write(APIC_DEST_REG,
601 		    AV_HIGH_ORDER >> cpun);
602 	}
603 
604 	if (apic_directed_EOI_supported()) {
605 		/*
606 		 * Setting the 12th bit in the Spurious Interrupt Vector
607 		 * Register suppresses broadcast EOIs generated by the local
608 		 * APIC. The suppression of broadcast EOIs happens only when
609 		 * interrupts are level-triggered.
610 		 */
611 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
612 	}
613 
614 	/* need to enable APIC before unmasking NMI */
615 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
616 
617 	/*
618 	 * Presence of an invalid vector with delivery mode AV_FIXED can
619 	 * cause an error interrupt, even if the entry is masked...so
620 	 * write a valid vector to LVT entries along with the mask bit
621 	 */
622 
623 	/* All APICs have timer and LINT0/1 */
624 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
625 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
626 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
627 
628 	/*
629 	 * On integrated APICs, the number of LVT entries is
630 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
631 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
632 	 */
633 
634 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
635 		nlvt = 3;
636 	} else {
637 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
638 		    0xFF) + 1;
639 	}
640 
641 	if (nlvt >= 5) {
642 		/* Enable performance counter overflow interrupt */
643 
644 		if ((x86_feature & X86_MSR) != X86_MSR)
645 			apic_enable_cpcovf_intr = 0;
646 		if (apic_enable_cpcovf_intr) {
647 			if (apic_cpcovf_vect == 0) {
648 				int ipl = APIC_PCINT_IPL;
649 				int irq = apic_get_ipivect(ipl, -1);
650 
651 				ASSERT(irq != -1);
652 				apic_cpcovf_vect =
653 				    apic_irq_table[irq]->airq_vector;
654 				ASSERT(apic_cpcovf_vect);
655 				(void) add_avintr(NULL, ipl,
656 				    (avfunc)kcpc_hw_overflow_intr,
657 				    "apic pcint", irq, NULL, NULL, NULL, NULL);
658 				kcpc_hw_overflow_intr_installed = 1;
659 				kcpc_hw_enable_cpc_intr =
660 				    apic_cpcovf_mask_clear;
661 			}
662 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
663 			    apic_cpcovf_vect);
664 		}
665 	}
666 
667 	if (nlvt >= 6) {
668 		/* Only mask TM intr if the BIOS apparently doesn't use it */
669 
670 		uint32_t lvtval;
671 
672 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
673 		if (((lvtval & AV_MASK) == AV_MASK) ||
674 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
675 			apic_reg_ops->apic_write(APIC_THERM_VECT,
676 			    AV_MASK|APIC_RESV_IRQ);
677 		}
678 	}
679 
680 	/* Enable error interrupt */
681 
682 	if (nlvt >= 4 && apic_enable_error_intr) {
683 		if (apic_errvect == 0) {
684 			int ipl = 0xf;	/* get highest priority intr */
685 			int irq = apic_get_ipivect(ipl, -1);
686 
687 			ASSERT(irq != -1);
688 			apic_errvect = apic_irq_table[irq]->airq_vector;
689 			ASSERT(apic_errvect);
690 			/*
691 			 * Not PSMI compliant, but we are going to merge
692 			 * with ON anyway
693 			 */
694 			(void) add_avintr((void *)NULL, ipl,
695 			    (avfunc)apic_error_intr, "apic error intr",
696 			    irq, NULL, NULL, NULL, NULL);
697 		}
698 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
699 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
700 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
701 	}
702 
703 	/* Enable CMCI interrupt */
704 	if (cmi_enable_cmci) {
705 
706 		mutex_enter(&cmci_cpu_setup_lock);
707 		if (cmci_cpu_setup_registered == 0) {
708 			mutex_enter(&cpu_lock);
709 			register_cpu_setup_func(cmci_cpu_setup, NULL);
710 			mutex_exit(&cpu_lock);
711 			cmci_cpu_setup_registered = 1;
712 		}
713 		mutex_exit(&cmci_cpu_setup_lock);
714 
715 		if (apic_cmci_vect == 0) {
716 			int ipl = 0x2;
717 			int irq = apic_get_ipivect(ipl, -1);
718 
719 			ASSERT(irq != -1);
720 			apic_cmci_vect = apic_irq_table[irq]->airq_vector;
721 			ASSERT(apic_cmci_vect);
722 
723 			(void) add_avintr(NULL, ipl,
724 			    (avfunc)cmi_cmci_trap,
725 			    "apic cmci intr", irq, NULL, NULL, NULL, NULL);
726 		}
727 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
728 	}
729 }
730 
731 static void
732 apic_disable_local_apic()
733 {
734 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
735 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
736 
737 	/* local intr reg 0 */
738 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
739 
740 	/* disable NMI */
741 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
742 
743 	/* and error interrupt */
744 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
745 
746 	/* and perf counter intr */
747 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
748 
749 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
750 }
751 
752 static void
753 apic_picinit(void)
754 {
755 	int i, j;
756 	uint_t isr;
757 
758 	/*
759 	 * initialize interrupt remapping before apic
760 	 * hardware initialization
761 	 */
762 	apic_intrr_init(apic_mode);
763 
764 	/*
765 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
766 	 * bit on without clearing it with EOI.  Since softint
767 	 * uses vector 0x20 to interrupt itself, so softint will
768 	 * not work on this machine.  In order to fix this problem
769 	 * a check is made to verify all the isr bits are clear.
770 	 * If not, EOIs are issued to clear the bits.
771 	 */
772 	for (i = 7; i >= 1; i--) {
773 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
774 		if (isr != 0)
775 			for (j = 0; ((j < 32) && (isr != 0)); j++)
776 				if (isr & (1 << j)) {
777 					apic_reg_ops->apic_write(
778 					    APIC_EOI_REG, 0);
779 					isr &= ~(1 << j);
780 					apic_error |= APIC_ERR_BOOT_EOI;
781 				}
782 	}
783 
784 	/* set a flag so we know we have run apic_picinit() */
785 	apic_picinit_called = 1;
786 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
787 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
788 	LOCK_INIT_CLEAR(&apic_error_lock);
789 
790 	picsetup();	 /* initialise the 8259 */
791 
792 	/* add nmi handler - least priority nmi handler */
793 	LOCK_INIT_CLEAR(&apic_nmi_lock);
794 
795 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
796 	    "pcplusmp NMI handler", (caddr_t)NULL))
797 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
798 
799 	apic_init_intr();
800 
801 	/* enable apic mode if imcr present */
802 	if (apic_imcrp) {
803 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
804 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
805 	}
806 
807 	ioapic_init_intr(IOAPIC_MASK);
808 }
809 
810 
811 /*ARGSUSED1*/
812 static int
813 apic_cpu_start(processorid_t cpun, caddr_t arg)
814 {
815 	int		loop_count;
816 	uint32_t	vector;
817 	uint_t		cpu_id;
818 	ulong_t		iflag;
819 
820 	cpu_id =  apic_cpus[cpun].aci_local_id;
821 
822 	apic_cmos_ssb_set = 1;
823 
824 	/*
825 	 * Interrupts on BSP cpu will be disabled during these startup
826 	 * steps in order to avoid unwanted side effects from
827 	 * executing interrupt handlers on a problematic BIOS.
828 	 */
829 
830 	iflag = intr_clear();
831 	outb(CMOS_ADDR, SSB);
832 	outb(CMOS_DATA, BIOS_SHUTDOWN);
833 
834 	/*
835 	 * According to X2APIC specification in section '2.3.5.1' of
836 	 * Interrupt Command Register Semantics, the semantics of
837 	 * programming the Interrupt Command Register to dispatch an interrupt
838 	 * is simplified. A single MSR write to the 64-bit ICR is required
839 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
840 	 * interface to ICR, system software is not required to check the
841 	 * status of the delivery status bit prior to writing to the ICR
842 	 * to send an IPI. With the removal of the Delivery Status bit,
843 	 * system software no longer has a reason to read the ICR. It remains
844 	 * readable only to aid in debugging.
845 	 */
846 #ifdef	DEBUG
847 	APIC_AV_PENDING_SET();
848 #else
849 	if (apic_mode == LOCAL_APIC) {
850 		APIC_AV_PENDING_SET();
851 	}
852 #endif /* DEBUG */
853 
854 	/* for integrated - make sure there is one INIT IPI in buffer */
855 	/* for external - it will wake up the cpu */
856 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
857 
858 	/* If only 1 CPU is installed, PENDING bit will not go low */
859 	for (loop_count = 0x1000; loop_count; loop_count--) {
860 		if (apic_mode == LOCAL_APIC &&
861 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
862 			apic_ret();
863 		else
864 			break;
865 	}
866 
867 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
868 
869 	drv_usecwait(20000);		/* 20 milli sec */
870 
871 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
872 		/* integrated apic */
873 
874 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
875 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
876 
877 		/* to offset the INIT IPI queue up in the buffer */
878 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
879 
880 		drv_usecwait(200);		/* 20 micro sec */
881 
882 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
883 
884 		drv_usecwait(200);		/* 20 micro sec */
885 	}
886 	intr_restore(iflag);
887 	return (0);
888 }
889 
890 
891 #ifdef	DEBUG
892 int	apic_break_on_cpu = 9;
893 int	apic_stretch_interrupts = 0;
894 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
895 
896 void
897 apic_break()
898 {
899 }
900 #endif /* DEBUG */
901 
902 /*
903  * platform_intr_enter
904  *
905  *	Called at the beginning of the interrupt service routine to
906  *	mask all level equal to and below the interrupt priority
907  *	of the interrupting vector.  An EOI should be given to
908  *	the interrupt controller to enable other HW interrupts.
909  *
910  *	Return -1 for spurious interrupts
911  *
912  */
913 /*ARGSUSED*/
914 static int
915 apic_intr_enter(int ipl, int *vectorp)
916 {
917 	uchar_t vector;
918 	int nipl;
919 	int irq;
920 	ulong_t iflag;
921 	apic_cpus_info_t *cpu_infop;
922 
923 	/*
924 	 * The real vector delivered is (*vectorp + 0x20), but our caller
925 	 * subtracts 0x20 from the vector before passing it to us.
926 	 * (That's why APIC_BASE_VECT is 0x20.)
927 	 */
928 	vector = (uchar_t)*vectorp;
929 
930 	/* if interrupted by the clock, increment apic_nsec_since_boot */
931 	if (vector == apic_clkvect) {
932 		if (!apic_oneshot) {
933 			/* NOTE: this is not MT aware */
934 			apic_hrtime_stamp++;
935 			apic_nsec_since_boot += apic_nsec_per_intr;
936 			apic_hrtime_stamp++;
937 			last_count_read = apic_hertz_count;
938 			apic_redistribute_compute();
939 		}
940 
941 		/* We will avoid all the book keeping overhead for clock */
942 		nipl = apic_ipls[vector];
943 
944 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
945 		if (apic_mode == LOCAL_APIC) {
946 #if defined(__amd64)
947 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
948 			    APIC_IPL_SHIFT));
949 #else
950 			if (apic_have_32bit_cr8)
951 				setcr8((ulong_t)(apic_ipltopri[nipl] >>
952 				    APIC_IPL_SHIFT));
953 			else
954 				LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
955 				    (uint32_t)apic_ipltopri[nipl]);
956 #endif
957 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
958 		} else {
959 			X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
960 			X2APIC_WRITE(APIC_EOI_REG, 0);
961 		}
962 
963 		return (nipl);
964 	}
965 
966 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
967 
968 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
969 		cpu_infop->aci_spur_cnt++;
970 		return (APIC_INT_SPURIOUS);
971 	}
972 
973 	/* Check if the vector we got is really what we need */
974 	if (apic_revector_pending) {
975 		/*
976 		 * Disable interrupts for the duration of
977 		 * the vector translation to prevent a self-race for
978 		 * the apic_revector_lock.  This cannot be done
979 		 * in apic_xlate_vector because it is recursive and
980 		 * we want the vector translation to be atomic with
981 		 * respect to other (higher-priority) interrupts.
982 		 */
983 		iflag = intr_clear();
984 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
985 		    APIC_BASE_VECT;
986 		intr_restore(iflag);
987 	}
988 
989 	nipl = apic_ipls[vector];
990 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
991 
992 	if (apic_mode == LOCAL_APIC) {
993 #if defined(__amd64)
994 		setcr8((ulong_t)(apic_ipltopri[nipl] >> APIC_IPL_SHIFT));
995 #else
996 		if (apic_have_32bit_cr8)
997 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
998 			    APIC_IPL_SHIFT));
999 		else
1000 			LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
1001 			    (uint32_t)apic_ipltopri[nipl]);
1002 #endif
1003 	} else {
1004 		X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
1005 	}
1006 
1007 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
1008 	cpu_infop->aci_curipl = (uchar_t)nipl;
1009 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
1010 
1011 	/*
1012 	 * apic_level_intr could have been assimilated into the irq struct.
1013 	 * but, having it as a character array is more efficient in terms of
1014 	 * cache usage. So, we leave it as is.
1015 	 */
1016 	if (!apic_level_intr[irq]) {
1017 		if (apic_mode == LOCAL_APIC) {
1018 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
1019 		} else {
1020 			X2APIC_WRITE(APIC_EOI_REG, 0);
1021 		}
1022 	}
1023 
1024 #ifdef	DEBUG
1025 	APIC_DEBUG_BUF_PUT(vector);
1026 	APIC_DEBUG_BUF_PUT(irq);
1027 	APIC_DEBUG_BUF_PUT(nipl);
1028 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
1029 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
1030 		drv_usecwait(apic_stretch_interrupts);
1031 
1032 	if (apic_break_on_cpu == psm_get_cpu_id())
1033 		apic_break();
1034 #endif /* DEBUG */
1035 	return (nipl);
1036 }
1037 
1038 /*
1039  * This macro is a common code used by MMIO local apic and X2APIC
1040  * local apic.
1041  */
1042 #define	APIC_INTR_EXIT() \
1043 { \
1044 	cpu_infop = &apic_cpus[psm_get_cpu_id()]; \
1045 	if (apic_level_intr[irq]) \
1046 		apic_reg_ops->apic_send_eoi(irq); \
1047 	cpu_infop->aci_curipl = (uchar_t)prev_ipl; \
1048 	/* ISR above current pri could not be in progress */ \
1049 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; \
1050 }
1051 
1052 /*
1053  * Any changes made to this function must also change X2APIC
1054  * version of intr_exit.
1055  */
1056 void
1057 apic_intr_exit(int prev_ipl, int irq)
1058 {
1059 	apic_cpus_info_t *cpu_infop;
1060 
1061 #if defined(__amd64)
1062 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
1063 #else
1064 	if (apic_have_32bit_cr8)
1065 		setcr8((ulong_t)(apic_ipltopri[prev_ipl] >> APIC_IPL_SHIFT));
1066 	else
1067 		apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
1068 #endif
1069 
1070 	APIC_INTR_EXIT();
1071 }
1072 
1073 /*
1074  * Same as apic_intr_exit() except it uses MSR rather than MMIO
1075  * to access local apic registers.
1076  */
1077 void
1078 x2apic_intr_exit(int prev_ipl, int irq)
1079 {
1080 	apic_cpus_info_t *cpu_infop;
1081 
1082 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[prev_ipl]);
1083 	APIC_INTR_EXIT();
1084 }
1085 
1086 intr_exit_fn_t
1087 psm_intr_exit_fn(void)
1088 {
1089 	if (apic_mode == LOCAL_X2APIC)
1090 		return (x2apic_intr_exit);
1091 
1092 	return (apic_intr_exit);
1093 }
1094 
1095 /*
1096  * Mask all interrupts below or equal to the given IPL.
1097  * Any changes made to this function must also change X2APIC
1098  * version of setspl.
1099  */
1100 static void
1101 apic_setspl(int ipl)
1102 {
1103 #if defined(__amd64)
1104 	setcr8((ulong_t)apic_cr8pri[ipl]);
1105 #else
1106 	if (apic_have_32bit_cr8)
1107 		setcr8((ulong_t)(apic_ipltopri[ipl] >> APIC_IPL_SHIFT));
1108 	else
1109 		apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
1110 #endif
1111 
1112 	/* interrupts at ipl above this cannot be in progress */
1113 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
1114 	/*
1115 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
1116 	 * have enough time to come in before the priority is raised again
1117 	 * during the idle() loop.
1118 	 */
1119 	if (apic_setspl_delay)
1120 		(void) apic_reg_ops->apic_get_pri();
1121 }
1122 
1123 /*
1124  * X2APIC version of setspl.
1125  * Mask all interrupts below or equal to the given IPL
1126  */
1127 static void
1128 x2apic_setspl(int ipl)
1129 {
1130 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[ipl]);
1131 
1132 	/* interrupts at ipl above this cannot be in progress */
1133 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
1134 }
1135 
1136 /*
1137  * generates an interprocessor interrupt to another CPU. Any changes made to
1138  * this routine must be accompanied by similar changes to
1139  * apic_common_send_ipi().
1140  */
1141 static void
1142 apic_send_ipi(int cpun, int ipl)
1143 {
1144 	int vector;
1145 	ulong_t flag;
1146 
1147 	vector = apic_resv_vector[ipl];
1148 
1149 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
1150 
1151 	flag = intr_clear();
1152 
1153 	APIC_AV_PENDING_SET();
1154 
1155 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
1156 	    vector);
1157 
1158 	intr_restore(flag);
1159 }
1160 
1161 
1162 /*ARGSUSED*/
1163 static void
1164 apic_set_idlecpu(processorid_t cpun)
1165 {
1166 }
1167 
1168 /*ARGSUSED*/
1169 static void
1170 apic_unset_idlecpu(processorid_t cpun)
1171 {
1172 }
1173 
1174 
1175 void
1176 apic_ret()
1177 {
1178 }
1179 
1180 /*
1181  * If apic_coarse_time == 1, then apic_gettime() is used instead of
1182  * apic_gethrtime().  This is used for performance instead of accuracy.
1183  */
1184 
1185 static hrtime_t
1186 apic_gettime()
1187 {
1188 	int old_hrtime_stamp;
1189 	hrtime_t temp;
1190 
1191 	/*
1192 	 * In one-shot mode, we do not keep time, so if anyone
1193 	 * calls psm_gettime() directly, we vector over to
1194 	 * gethrtime().
1195 	 * one-shot mode MUST NOT be enabled if this psm is the source of
1196 	 * hrtime.
1197 	 */
1198 
1199 	if (apic_oneshot)
1200 		return (gethrtime());
1201 
1202 
1203 gettime_again:
1204 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
1205 		apic_ret();
1206 
1207 	temp = apic_nsec_since_boot;
1208 
1209 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
1210 		goto gettime_again;
1211 	}
1212 	return (temp);
1213 }
1214 
1215 /*
1216  * Here we return the number of nanoseconds since booting.  Note every
1217  * clock interrupt increments apic_nsec_since_boot by the appropriate
1218  * amount.
1219  */
1220 static hrtime_t
1221 apic_gethrtime()
1222 {
1223 	int curr_timeval, countval, elapsed_ticks;
1224 	int old_hrtime_stamp, status;
1225 	hrtime_t temp;
1226 	uint32_t cpun;
1227 	ulong_t oflags;
1228 
1229 	/*
1230 	 * In one-shot mode, we do not keep time, so if anyone
1231 	 * calls psm_gethrtime() directly, we vector over to
1232 	 * gethrtime().
1233 	 * one-shot mode MUST NOT be enabled if this psm is the source of
1234 	 * hrtime.
1235 	 */
1236 
1237 	if (apic_oneshot)
1238 		return (gethrtime());
1239 
1240 	oflags = intr_clear();	/* prevent migration */
1241 
1242 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
1243 	if (apic_mode == LOCAL_APIC)
1244 		cpun >>= APIC_ID_BIT_OFFSET;
1245 
1246 	lock_set(&apic_gethrtime_lock);
1247 
1248 gethrtime_again:
1249 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
1250 		apic_ret();
1251 
1252 	/*
1253 	 * Check to see which CPU we are on.  Note the time is kept on
1254 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
1255 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
1256 	 */
1257 	if (cpun == apic_cpus[0].aci_local_id) {
1258 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1259 	} else {
1260 #ifdef	DEBUG
1261 		APIC_AV_PENDING_SET();
1262 #else
1263 		if (apic_mode == LOCAL_APIC)
1264 			APIC_AV_PENDING_SET();
1265 #endif /* DEBUG */
1266 
1267 		apic_reg_ops->apic_write_int_cmd(
1268 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
1269 
1270 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
1271 		    & AV_READ_PENDING) {
1272 			apic_ret();
1273 		}
1274 
1275 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
1276 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
1277 		else {	/* 0 = invalid */
1278 			apic_remote_hrterr++;
1279 			/*
1280 			 * return last hrtime right now, will need more
1281 			 * testing if change to retry
1282 			 */
1283 			temp = apic_last_hrtime;
1284 
1285 			lock_clear(&apic_gethrtime_lock);
1286 
1287 			intr_restore(oflags);
1288 
1289 			return (temp);
1290 		}
1291 	}
1292 	if (countval > last_count_read)
1293 		countval = 0;
1294 	else
1295 		last_count_read = countval;
1296 
1297 	elapsed_ticks = apic_hertz_count - countval;
1298 
1299 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
1300 	temp = apic_nsec_since_boot + curr_timeval;
1301 
1302 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
1303 		/* we might have clobbered last_count_read. Restore it */
1304 		last_count_read = apic_hertz_count;
1305 		goto gethrtime_again;
1306 	}
1307 
1308 	if (temp < apic_last_hrtime) {
1309 		/* return last hrtime if error occurs */
1310 		apic_hrtime_error++;
1311 		temp = apic_last_hrtime;
1312 	}
1313 	else
1314 		apic_last_hrtime = temp;
1315 
1316 	lock_clear(&apic_gethrtime_lock);
1317 	intr_restore(oflags);
1318 
1319 	return (temp);
1320 }
1321 
1322 /* apic NMI handler */
1323 /*ARGSUSED*/
1324 static void
1325 apic_nmi_intr(caddr_t arg, struct regs *rp)
1326 {
1327 	if (apic_shutdown_processors) {
1328 		apic_disable_local_apic();
1329 		return;
1330 	}
1331 
1332 	apic_error |= APIC_ERR_NMI;
1333 
1334 	if (!lock_try(&apic_nmi_lock))
1335 		return;
1336 	apic_num_nmis++;
1337 
1338 	if (apic_kmdb_on_nmi && psm_debugger()) {
1339 		debug_enter("NMI received: entering kmdb\n");
1340 	} else if (apic_panic_on_nmi) {
1341 		/* Keep panic from entering kmdb. */
1342 		nopanicdebug = 1;
1343 		panic("NMI received\n");
1344 	} else {
1345 		/*
1346 		 * prom_printf is the best shot we have of something which is
1347 		 * problem free from high level/NMI type of interrupts
1348 		 */
1349 		prom_printf("NMI received\n");
1350 	}
1351 
1352 	lock_clear(&apic_nmi_lock);
1353 }
1354 
1355 /*ARGSUSED*/
1356 static int
1357 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
1358 {
1359 	return (apic_addspl_common(irqno, ipl, min_ipl, max_ipl));
1360 }
1361 
1362 static int
1363 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
1364 {
1365 	return (apic_delspl_common(irqno, ipl, min_ipl,  max_ipl));
1366 }
1367 
1368 static int
1369 apic_post_cpu_start()
1370 {
1371 	int cpun;
1372 	static int cpus_started = 1;
1373 	struct psm_ops *pops = &apic_ops;
1374 
1375 	/* We know this CPU + BSP  started successfully. */
1376 	cpus_started++;
1377 
1378 	/*
1379 	 * On BSP we would have enabled X2APIC, if supported by processor,
1380 	 * in acpi_probe(), but on AP we do it here.
1381 	 *
1382 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1383 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1384 	 */
1385 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1386 	    apic_local_mode() == LOCAL_APIC) {
1387 		apic_enable_x2apic();
1388 	}
1389 
1390 	/*
1391 	 * We change psm_send_ipi and send_dirintf only if Solaris
1392 	 * is booted in kmdb & the current CPU is the last CPU being
1393 	 * brought up. We don't need to do anything if Solaris is running
1394 	 * in MMIO mode (xAPIC).
1395 	 */
1396 	if ((boothowto & RB_DEBUG) &&
1397 	    (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
1398 	    apic_mode == LOCAL_X2APIC) {
1399 		/*
1400 		 * We no longer need help from apic_common_send_ipi()
1401 		 * since we will not start any more CPUs.
1402 		 *
1403 		 * We will need to revisit this if we start supporting
1404 		 * hot-plugging of CPUs.
1405 		 */
1406 		pops->psm_send_ipi = x2apic_send_ipi;
1407 		send_dirintf = pops->psm_send_ipi;
1408 	}
1409 
1410 	splx(ipltospl(LOCK_LEVEL));
1411 	apic_init_intr();
1412 
1413 	/*
1414 	 * since some systems don't enable the internal cache on the non-boot
1415 	 * cpus, so we have to enable them here
1416 	 */
1417 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1418 
1419 #ifdef	DEBUG
1420 	APIC_AV_PENDING_SET();
1421 #else
1422 	if (apic_mode == LOCAL_APIC)
1423 		APIC_AV_PENDING_SET();
1424 #endif	/* DEBUG */
1425 
1426 	/*
1427 	 * We may be booting, or resuming from suspend; aci_status will
1428 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1429 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1430 	 */
1431 	cpun = psm_get_cpu_id();
1432 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1433 
1434 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1435 	return (PSM_SUCCESS);
1436 }
1437 
1438 processorid_t
1439 apic_get_next_processorid(processorid_t cpu_id)
1440 {
1441 
1442 	int i;
1443 
1444 	if (cpu_id == -1)
1445 		return ((processorid_t)0);
1446 
1447 	for (i = cpu_id + 1; i < NCPU; i++) {
1448 		if (CPU_IN_SET(apic_cpumask, i))
1449 			return (i);
1450 	}
1451 
1452 	return ((processorid_t)-1);
1453 }
1454 
1455 
1456 /*
1457  * type == -1 indicates it is an internal request. Do not change
1458  * resv_vector for these requests
1459  */
1460 static int
1461 apic_get_ipivect(int ipl, int type)
1462 {
1463 	uchar_t vector;
1464 	int irq;
1465 
1466 	if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) {
1467 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
1468 			apic_irq_table[irq]->airq_mps_intr_index =
1469 			    RESERVE_INDEX;
1470 			apic_irq_table[irq]->airq_vector = vector;
1471 			if (type != -1) {
1472 				apic_resv_vector[ipl] = vector;
1473 			}
1474 			return (irq);
1475 		}
1476 	}
1477 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
1478 	return (-1);	/* shouldn't happen */
1479 }
1480 
1481 static int
1482 apic_getclkirq(int ipl)
1483 {
1484 	int	irq;
1485 
1486 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
1487 		return (-1);
1488 	/*
1489 	 * Note the vector in apic_clkvect for per clock handling.
1490 	 */
1491 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
1492 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
1493 	    apic_clkvect));
1494 	return (irq);
1495 }
1496 
1497 
1498 /*
1499  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1500  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1501  */
1502 static uint_t
1503 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1504 {
1505 	uint8_t		pit_tick_lo;
1506 	uint16_t	pit_tick, target_pit_tick;
1507 	uint32_t	start_apic_tick, end_apic_tick;
1508 	ulong_t		iflag;
1509 	uint32_t	reg;
1510 
1511 	reg = addr + APIC_CURR_COUNT - apicadr;
1512 
1513 	iflag = intr_clear();
1514 
1515 	do {
1516 		pit_tick_lo = inb(PITCTR0_PORT);
1517 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1518 	} while (pit_tick < APIC_TIME_MIN ||
1519 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1520 
1521 	/*
1522 	 * Wait for the 8254 to decrement by 5 ticks to ensure
1523 	 * we didn't start in the middle of a tick.
1524 	 * Compare with 0x10 for the wrap around case.
1525 	 */
1526 	target_pit_tick = pit_tick - 5;
1527 	do {
1528 		pit_tick_lo = inb(PITCTR0_PORT);
1529 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1530 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1531 
1532 	start_apic_tick = apic_reg_ops->apic_read(reg);
1533 
1534 	/*
1535 	 * Wait for the 8254 to decrement by
1536 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1537 	 */
1538 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1539 	do {
1540 		pit_tick_lo = inb(PITCTR0_PORT);
1541 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1542 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1543 
1544 	end_apic_tick = apic_reg_ops->apic_read(reg);
1545 
1546 	*pit_ticks_adj = target_pit_tick - pit_tick;
1547 
1548 	intr_restore(iflag);
1549 
1550 	return (start_apic_tick - end_apic_tick);
1551 }
1552 
1553 /*
1554  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1555  * frequency.  Note at this stage in the boot sequence, the boot processor
1556  * is the only active processor.
1557  * hertz value of 0 indicates a one-shot mode request.  In this case
1558  * the function returns the resolution (in nanoseconds) for the hardware
1559  * timer interrupt.  If one-shot mode capability is not available,
1560  * the return value will be 0. apic_enable_oneshot is a global switch
1561  * for disabling the functionality.
1562  * A non-zero positive value for hertz indicates a periodic mode request.
1563  * In this case the hardware will be programmed to generate clock interrupts
1564  * at hertz frequency and returns the resolution of interrupts in
1565  * nanosecond.
1566  */
1567 
1568 static int
1569 apic_clkinit(int hertz)
1570 {
1571 	uint_t		apic_ticks = 0;
1572 	uint_t		pit_ticks;
1573 	int		ret;
1574 	uint16_t	pit_ticks_adj;
1575 	static int	firsttime = 1;
1576 
1577 	if (firsttime) {
1578 		/* first time calibrate on CPU0 only */
1579 
1580 		apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1581 		apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1582 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
1583 
1584 		/* total number of PIT ticks corresponding to apic_ticks */
1585 		pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
1586 
1587 		/*
1588 		 * Determine the number of nanoseconds per APIC clock tick
1589 		 * and then determine how many APIC ticks to interrupt at the
1590 		 * desired frequency
1591 		 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
1592 		 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
1593 		 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
1594 		 * pic_ticks_per_SFns =
1595 		 *   (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
1596 		 */
1597 		apic_ticks_per_SFnsecs =
1598 		    ((SF * apic_ticks * PIT_HZ) /
1599 		    ((uint64_t)pit_ticks * NANOSEC));
1600 
1601 		/* the interval timer initial count is 32 bit max */
1602 		apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
1603 		firsttime = 0;
1604 	}
1605 
1606 	if (hertz != 0) {
1607 		/* periodic */
1608 		apic_nsec_per_intr = NANOSEC / hertz;
1609 		apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
1610 	}
1611 
1612 	apic_int_busy_mark = (apic_int_busy_mark *
1613 	    apic_sample_factor_redistribution) / 100;
1614 	apic_int_free_mark = (apic_int_free_mark *
1615 	    apic_sample_factor_redistribution) / 100;
1616 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1617 	    apic_sample_factor_redistribution) / 100;
1618 
1619 	if (hertz == 0) {
1620 		/* requested one_shot */
1621 		if (!tsc_gethrtime_enable || !apic_oneshot_enable)
1622 			return (0);
1623 		apic_oneshot = 1;
1624 		ret = (int)APIC_TICKS_TO_NSECS(1);
1625 	} else {
1626 		/* program the local APIC to interrupt at the given frequency */
1627 		apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
1628 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
1629 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
1630 		apic_oneshot = 0;
1631 		ret = NANOSEC / hertz;
1632 	}
1633 
1634 	return (ret);
1635 
1636 }
1637 
1638 /*
1639  * apic_preshutdown:
1640  * Called early in shutdown whilst we can still access filesystems to do
1641  * things like loading modules which will be required to complete shutdown
1642  * after filesystems are all unmounted.
1643  */
1644 static void
1645 apic_preshutdown(int cmd, int fcn)
1646 {
1647 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1648 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1649 
1650 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
1651 		return;
1652 	}
1653 }
1654 
1655 static void
1656 apic_shutdown(int cmd, int fcn)
1657 {
1658 	int restarts, attempts;
1659 	int i;
1660 	uchar_t	byte;
1661 	ulong_t iflag;
1662 
1663 	hpet_acpi_fini();
1664 
1665 	/* Send NMI to all CPUs except self to do per processor shutdown */
1666 	iflag = intr_clear();
1667 #ifdef	DEBUG
1668 	APIC_AV_PENDING_SET();
1669 #else
1670 	if (apic_mode == LOCAL_APIC)
1671 		APIC_AV_PENDING_SET();
1672 #endif /* DEBUG */
1673 	apic_shutdown_processors = 1;
1674 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1675 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1676 
1677 	/* restore cmos shutdown byte before reboot */
1678 	if (apic_cmos_ssb_set) {
1679 		outb(CMOS_ADDR, SSB);
1680 		outb(CMOS_DATA, 0);
1681 	}
1682 
1683 	ioapic_disable_redirection();
1684 
1685 	/*	disable apic mode if imcr present	*/
1686 	if (apic_imcrp) {
1687 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1688 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1689 	}
1690 
1691 	apic_disable_local_apic();
1692 
1693 	intr_restore(iflag);
1694 
1695 	/* remainder of function is for shutdown cases only */
1696 	if (cmd != A_SHUTDOWN)
1697 		return;
1698 
1699 	/*
1700 	 * Switch system back into Legacy-Mode if using ACPI and
1701 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1702 	 * for power-off to succeed (Dell Dimension 4600)
1703 	 * Do not disable ACPI while doing fastreboot
1704 	 */
1705 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1706 		(void) AcpiDisable();
1707 
1708 	if (fcn == AD_FASTREBOOT) {
1709 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1710 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1711 	}
1712 
1713 	/* remainder of function is for shutdown+poweroff case only */
1714 	if (fcn != AD_POWEROFF)
1715 		return;
1716 
1717 	switch (apic_poweroff_method) {
1718 		case APIC_POWEROFF_VIA_RTC:
1719 
1720 			/* select the extended NVRAM bank in the RTC */
1721 			outb(CMOS_ADDR, RTC_REGA);
1722 			byte = inb(CMOS_DATA);
1723 			outb(CMOS_DATA, (byte | EXT_BANK));
1724 
1725 			outb(CMOS_ADDR, PFR_REG);
1726 
1727 			/* for Predator must toggle the PAB bit */
1728 			byte = inb(CMOS_DATA);
1729 
1730 			/*
1731 			 * clear power active bar, wakeup alarm and
1732 			 * kickstart
1733 			 */
1734 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1735 			outb(CMOS_DATA, byte);
1736 
1737 			/* delay before next write */
1738 			drv_usecwait(1000);
1739 
1740 			/* for S40 the following would suffice */
1741 			byte = inb(CMOS_DATA);
1742 
1743 			/* power active bar control bit */
1744 			byte |= PAB_CBIT;
1745 			outb(CMOS_DATA, byte);
1746 
1747 			break;
1748 
1749 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1750 			restarts = 0;
1751 restart_aspen_bmc:
1752 			if (++restarts == 3)
1753 				break;
1754 			attempts = 0;
1755 			do {
1756 				byte = inb(MISMIC_FLAG_REGISTER);
1757 				byte &= MISMIC_BUSY_MASK;
1758 				if (byte != 0) {
1759 					drv_usecwait(1000);
1760 					if (attempts >= 3)
1761 						goto restart_aspen_bmc;
1762 					++attempts;
1763 				}
1764 			} while (byte != 0);
1765 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1766 			byte = inb(MISMIC_FLAG_REGISTER);
1767 			byte |= 0x1;
1768 			outb(MISMIC_FLAG_REGISTER, byte);
1769 			i = 0;
1770 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1771 			    i++) {
1772 				attempts = 0;
1773 				do {
1774 					byte = inb(MISMIC_FLAG_REGISTER);
1775 					byte &= MISMIC_BUSY_MASK;
1776 					if (byte != 0) {
1777 						drv_usecwait(1000);
1778 						if (attempts >= 3)
1779 							goto restart_aspen_bmc;
1780 						++attempts;
1781 					}
1782 				} while (byte != 0);
1783 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1784 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1785 				byte = inb(MISMIC_FLAG_REGISTER);
1786 				byte |= 0x1;
1787 				outb(MISMIC_FLAG_REGISTER, byte);
1788 			}
1789 			break;
1790 
1791 		case APIC_POWEROFF_VIA_SITKA_BMC:
1792 			restarts = 0;
1793 restart_sitka_bmc:
1794 			if (++restarts == 3)
1795 				break;
1796 			attempts = 0;
1797 			do {
1798 				byte = inb(SMS_STATUS_REGISTER);
1799 				byte &= SMS_STATE_MASK;
1800 				if ((byte == SMS_READ_STATE) ||
1801 				    (byte == SMS_WRITE_STATE)) {
1802 					drv_usecwait(1000);
1803 					if (attempts >= 3)
1804 						goto restart_sitka_bmc;
1805 					++attempts;
1806 				}
1807 			} while ((byte == SMS_READ_STATE) ||
1808 			    (byte == SMS_WRITE_STATE));
1809 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1810 			i = 0;
1811 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1812 			    i++) {
1813 				attempts = 0;
1814 				do {
1815 					byte = inb(SMS_STATUS_REGISTER);
1816 					byte &= SMS_IBF_MASK;
1817 					if (byte != 0) {
1818 						drv_usecwait(1000);
1819 						if (attempts >= 3)
1820 							goto restart_sitka_bmc;
1821 						++attempts;
1822 					}
1823 				} while (byte != 0);
1824 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1825 			}
1826 			break;
1827 
1828 		case APIC_POWEROFF_NONE:
1829 
1830 			/* If no APIC direct method, we will try using ACPI */
1831 			if (apic_enable_acpi) {
1832 				if (acpi_poweroff() == 1)
1833 					return;
1834 			} else
1835 				return;
1836 
1837 			break;
1838 	}
1839 	/*
1840 	 * Wait a limited time here for power to go off.
1841 	 * If the power does not go off, then there was a
1842 	 * problem and we should continue to the halt which
1843 	 * prints a message for the user to press a key to
1844 	 * reboot.
1845 	 */
1846 	drv_usecwait(7000000); /* wait seven seconds */
1847 
1848 }
1849 
1850 /*
1851  * Try and disable all interrupts. We just assign interrupts to other
1852  * processors based on policy. If any were bound by user request, we
1853  * let them continue and return failure. We do not bother to check
1854  * for cache affinity while rebinding.
1855  */
1856 
1857 static int
1858 apic_disable_intr(processorid_t cpun)
1859 {
1860 	int bind_cpu = 0, i, hardbound = 0;
1861 	apic_irq_t *irq_ptr;
1862 	ulong_t iflag;
1863 
1864 	iflag = intr_clear();
1865 	lock_set(&apic_ioapic_lock);
1866 
1867 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
1868 		if (apic_reprogram_info[i].done == B_FALSE) {
1869 			if (apic_reprogram_info[i].bindcpu == cpun) {
1870 				/*
1871 				 * CPU is busy -- it's the target of
1872 				 * a pending reprogramming attempt
1873 				 */
1874 				lock_clear(&apic_ioapic_lock);
1875 				intr_restore(iflag);
1876 				return (PSM_FAILURE);
1877 			}
1878 		}
1879 	}
1880 
1881 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
1882 
1883 	apic_cpus[cpun].aci_curipl = 0;
1884 
1885 	i = apic_min_device_irq;
1886 	for (; i <= apic_max_device_irq; i++) {
1887 		/*
1888 		 * If there are bound interrupts on this cpu, then
1889 		 * rebind them to other processors.
1890 		 */
1891 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
1892 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
1893 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
1894 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
1895 			    apic_nproc));
1896 
1897 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
1898 				hardbound = 1;
1899 				continue;
1900 			}
1901 
1902 			if (irq_ptr->airq_temp_cpu == cpun) {
1903 				do {
1904 					bind_cpu = apic_next_bind_cpu++;
1905 					if (bind_cpu >= apic_nproc) {
1906 						apic_next_bind_cpu = 1;
1907 						bind_cpu = 0;
1908 
1909 					}
1910 				} while (apic_rebind_all(irq_ptr, bind_cpu));
1911 			}
1912 		}
1913 	}
1914 
1915 	lock_clear(&apic_ioapic_lock);
1916 	intr_restore(iflag);
1917 
1918 	if (hardbound) {
1919 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
1920 		    "due to user bound interrupts", cpun);
1921 		return (PSM_FAILURE);
1922 	}
1923 	else
1924 		return (PSM_SUCCESS);
1925 }
1926 
1927 /*
1928  * Bind interrupts to the CPU's local APIC.
1929  * Interrupts should not be bound to a CPU's local APIC until the CPU
1930  * is ready to receive interrupts.
1931  */
1932 static void
1933 apic_enable_intr(processorid_t cpun)
1934 {
1935 	int	i;
1936 	apic_irq_t *irq_ptr;
1937 	ulong_t iflag;
1938 
1939 	iflag = intr_clear();
1940 	lock_set(&apic_ioapic_lock);
1941 
1942 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
1943 
1944 	i = apic_min_device_irq;
1945 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
1946 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
1947 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
1948 				(void) apic_rebind_all(irq_ptr,
1949 				    irq_ptr->airq_cpu);
1950 			}
1951 		}
1952 	}
1953 
1954 	lock_clear(&apic_ioapic_lock);
1955 	intr_restore(iflag);
1956 }
1957 
1958 
1959 /*
1960  * This function will reprogram the timer.
1961  *
1962  * When in oneshot mode the argument is the absolute time in future to
1963  * generate the interrupt at.
1964  *
1965  * When in periodic mode, the argument is the interval at which the
1966  * interrupts should be generated. There is no need to support the periodic
1967  * mode timer change at this time.
1968  */
1969 static void
1970 apic_timer_reprogram(hrtime_t time)
1971 {
1972 	hrtime_t now;
1973 	uint_t ticks;
1974 	int64_t delta;
1975 
1976 	/*
1977 	 * We should be called from high PIL context (CBE_HIGH_PIL),
1978 	 * so kpreempt is disabled.
1979 	 */
1980 
1981 	if (!apic_oneshot) {
1982 		/* time is the interval for periodic mode */
1983 		ticks = APIC_NSECS_TO_TICKS(time);
1984 	} else {
1985 		/* one shot mode */
1986 
1987 		now = gethrtime();
1988 		delta = time - now;
1989 
1990 		if (delta <= 0) {
1991 			/*
1992 			 * requested to generate an interrupt in the past
1993 			 * generate an interrupt as soon as possible
1994 			 */
1995 			ticks = apic_min_timer_ticks;
1996 		} else if (delta > apic_nsec_max) {
1997 			/*
1998 			 * requested to generate an interrupt at a time
1999 			 * further than what we are capable of. Set to max
2000 			 * the hardware can handle
2001 			 */
2002 
2003 			ticks = APIC_MAXVAL;
2004 #ifdef DEBUG
2005 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
2006 			    "  %lld  too far in future, current time"
2007 			    "  %lld \n", time, now);
2008 #endif
2009 		} else
2010 			ticks = APIC_NSECS_TO_TICKS(delta);
2011 	}
2012 
2013 	if (ticks < apic_min_timer_ticks)
2014 		ticks = apic_min_timer_ticks;
2015 
2016 	apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
2017 }
2018 
2019 /*
2020  * This function will enable timer interrupts.
2021  */
2022 static void
2023 apic_timer_enable(void)
2024 {
2025 	/*
2026 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
2027 	 * so kpreempt is disabled.
2028 	 */
2029 
2030 	if (!apic_oneshot) {
2031 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2032 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
2033 	} else {
2034 		/* one shot */
2035 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2036 		    (apic_clkvect + APIC_BASE_VECT));
2037 	}
2038 }
2039 
2040 /*
2041  * This function will disable timer interrupts.
2042  */
2043 static void
2044 apic_timer_disable(void)
2045 {
2046 	/*
2047 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
2048 	 * so kpreempt is disabled.
2049 	 */
2050 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2051 	    (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
2052 }
2053 
2054 /*
2055  * Set timer far into the future and return timer
2056  * current Count in nanoseconds.
2057  */
2058 hrtime_t
2059 apic_timer_stop_count(void)
2060 {
2061 	hrtime_t	ns_val;
2062 	int		enable_val, count_val;
2063 
2064 	/*
2065 	 * Should be called with interrupts disabled.
2066 	 */
2067 	ASSERT(!interrupts_enabled());
2068 
2069 	enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
2070 	if ((enable_val & AV_MASK) == AV_MASK)
2071 		return ((hrtime_t)-1);		/* timer is disabled */
2072 
2073 	count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
2074 	ns_val = APIC_TICKS_TO_NSECS(count_val);
2075 
2076 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
2077 
2078 	return (ns_val);
2079 }
2080 
2081 /*
2082  * Reprogram timer after Deep C-State.
2083  */
2084 void
2085 apic_timer_restart(hrtime_t time)
2086 {
2087 	apic_timer_reprogram(time);
2088 }
2089 
2090 ddi_periodic_t apic_periodic_id;
2091 
2092 /*
2093  * If this module needs a periodic handler for the interrupt distribution, it
2094  * can be added here. The argument to the periodic handler is not currently
2095  * used, but is reserved for future.
2096  */
2097 static void
2098 apic_post_cyclic_setup(void *arg)
2099 {
2100 _NOTE(ARGUNUSED(arg))
2101 	/* cpu_lock is held */
2102 	/* set up a periodic handler for intr redistribution */
2103 
2104 	/*
2105 	 * In peridoc mode intr redistribution processing is done in
2106 	 * apic_intr_enter during clk intr processing
2107 	 */
2108 	if (!apic_oneshot)
2109 		return;
2110 	/*
2111 	 * Register a periodical handler for the redistribution processing.
2112 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
2113 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
2114 	 */
2115 	apic_periodic_id = ddi_periodic_add(
2116 	    (void (*)(void *))apic_redistribute_compute, NULL,
2117 	    apic_redistribute_sample_interval, DDI_IPL_2);
2118 }
2119 
2120 static void
2121 apic_redistribute_compute(void)
2122 {
2123 	int	i, j, max_busy;
2124 
2125 	if (apic_enable_dynamic_migration) {
2126 		if (++apic_nticks == apic_sample_factor_redistribution) {
2127 			/*
2128 			 * Time to call apic_intr_redistribute().
2129 			 * reset apic_nticks. This will cause max_busy
2130 			 * to be calculated below and if it is more than
2131 			 * apic_int_busy, we will do the whole thing
2132 			 */
2133 			apic_nticks = 0;
2134 		}
2135 		max_busy = 0;
2136 		for (i = 0; i < apic_nproc; i++) {
2137 
2138 			/*
2139 			 * Check if curipl is non zero & if ISR is in
2140 			 * progress
2141 			 */
2142 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
2143 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
2144 
2145 				int	irq;
2146 				apic_cpus[i].aci_busy++;
2147 				irq = apic_cpus[i].aci_current[j];
2148 				apic_irq_table[irq]->airq_busy++;
2149 			}
2150 
2151 			if (!apic_nticks &&
2152 			    (apic_cpus[i].aci_busy > max_busy))
2153 				max_busy = apic_cpus[i].aci_busy;
2154 		}
2155 		if (!apic_nticks) {
2156 			if (max_busy > apic_int_busy_mark) {
2157 			/*
2158 			 * We could make the following check be
2159 			 * skipped > 1 in which case, we get a
2160 			 * redistribution at half the busy mark (due to
2161 			 * double interval). Need to be able to collect
2162 			 * more empirical data to decide if that is a
2163 			 * good strategy. Punt for now.
2164 			 */
2165 				if (apic_skipped_redistribute) {
2166 					apic_cleanup_busy();
2167 					apic_skipped_redistribute = 0;
2168 				} else {
2169 					apic_intr_redistribute();
2170 				}
2171 			} else
2172 				apic_skipped_redistribute++;
2173 		}
2174 	}
2175 }
2176 
2177 
2178 /*
2179  * The following functions are in the platform specific file so that they
2180  * can be different functions depending on whether we are running on
2181  * bare metal or a hypervisor.
2182  */
2183 
2184 /*
2185  * map an apic for memory-mapped access
2186  */
2187 uint32_t *
2188 mapin_apic(uint32_t addr, size_t len, int flags)
2189 {
2190 	/*LINTED: pointer cast may result in improper alignment */
2191 	return ((uint32_t *)psm_map_phys(addr, len, flags));
2192 }
2193 
2194 uint32_t *
2195 mapin_ioapic(uint32_t addr, size_t len, int flags)
2196 {
2197 	return (mapin_apic(addr, len, flags));
2198 }
2199 
2200 /*
2201  * unmap an apic
2202  */
2203 void
2204 mapout_apic(caddr_t addr, size_t len)
2205 {
2206 	psm_unmap_phys(addr, len);
2207 }
2208 
2209 void
2210 mapout_ioapic(caddr_t addr, size_t len)
2211 {
2212 	mapout_apic(addr, len);
2213 }
2214 
2215 /*
2216  * Check to make sure there are enough irq slots
2217  */
2218 int
2219 apic_check_free_irqs(int count)
2220 {
2221 	int i, avail;
2222 
2223 	avail = 0;
2224 	for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
2225 		if ((apic_irq_table[i] == NULL) ||
2226 		    apic_irq_table[i]->airq_mps_intr_index == FREE_INDEX) {
2227 			if (++avail >= count)
2228 				return (PSM_SUCCESS);
2229 		}
2230 	}
2231 	return (PSM_FAILURE);
2232 }
2233 
2234 /*
2235  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
2236  */
2237 int
2238 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
2239     int behavior)
2240 {
2241 	int	rcount, i;
2242 	uchar_t	start, irqno;
2243 	uint32_t cpu;
2244 	major_t	major;
2245 	apic_irq_t	*irqptr;
2246 
2247 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
2248 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
2249 	    (void *)dip, inum, pri, count, behavior));
2250 
2251 	if (count > 1) {
2252 		if (behavior == DDI_INTR_ALLOC_STRICT &&
2253 		    apic_multi_msi_enable == 0)
2254 			return (0);
2255 		if (apic_multi_msi_enable == 0)
2256 			count = 1;
2257 	}
2258 
2259 	if ((rcount = apic_navail_vector(dip, pri)) > count)
2260 		rcount = count;
2261 	else if (rcount == 0 || (rcount < count &&
2262 	    behavior == DDI_INTR_ALLOC_STRICT))
2263 		return (0);
2264 
2265 	/* if not ISP2, then round it down */
2266 	if (!ISP2(rcount))
2267 		rcount = 1 << (highbit(rcount) - 1);
2268 
2269 	mutex_enter(&airq_mutex);
2270 
2271 	for (start = 0; rcount > 0; rcount >>= 1) {
2272 		if ((start = apic_find_multi_vectors(pri, rcount)) != 0 ||
2273 		    behavior == DDI_INTR_ALLOC_STRICT)
2274 			break;
2275 	}
2276 
2277 	if (start == 0) {
2278 		/* no vector available */
2279 		mutex_exit(&airq_mutex);
2280 		return (0);
2281 	}
2282 
2283 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
2284 		/* not enough free irq slots available */
2285 		mutex_exit(&airq_mutex);
2286 		return (0);
2287 	}
2288 
2289 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
2290 	for (i = 0; i < rcount; i++) {
2291 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
2292 		    (uchar_t)-1) {
2293 			/*
2294 			 * shouldn't happen because of the
2295 			 * apic_check_free_irqs() check earlier
2296 			 */
2297 			mutex_exit(&airq_mutex);
2298 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
2299 			    "apic_allocate_irq failed\n"));
2300 			return (i);
2301 		}
2302 		apic_max_device_irq = max(irqno, apic_max_device_irq);
2303 		apic_min_device_irq = min(irqno, apic_min_device_irq);
2304 		irqptr = apic_irq_table[irqno];
2305 #ifdef	DEBUG
2306 		if (apic_vector_to_irq[start + i] != APIC_RESV_IRQ)
2307 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
2308 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
2309 #endif
2310 		apic_vector_to_irq[start + i] = (uchar_t)irqno;
2311 
2312 		irqptr->airq_vector = (uchar_t)(start + i);
2313 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
2314 		irqptr->airq_intin_no = (uchar_t)rcount;
2315 		irqptr->airq_ipl = pri;
2316 		irqptr->airq_vector = start + i;
2317 		irqptr->airq_origirq = (uchar_t)(inum + i);
2318 		irqptr->airq_share_id = 0;
2319 		irqptr->airq_mps_intr_index = MSI_INDEX;
2320 		irqptr->airq_dip = dip;
2321 		irqptr->airq_major = major;
2322 		if (i == 0) /* they all bound to the same cpu */
2323 			cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno,
2324 			    0xff, 0xff);
2325 		else
2326 			irqptr->airq_cpu = cpu;
2327 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
2328 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
2329 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
2330 		    irqptr->airq_origirq, pri));
2331 	}
2332 	mutex_exit(&airq_mutex);
2333 	return (rcount);
2334 }
2335 
2336 /*
2337  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
2338  */
2339 int
2340 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
2341     int behavior)
2342 {
2343 	int	rcount, i;
2344 	major_t	major;
2345 
2346 	mutex_enter(&airq_mutex);
2347 
2348 	if ((rcount = apic_navail_vector(dip, pri)) > count)
2349 		rcount = count;
2350 	else if (rcount == 0 || (rcount < count &&
2351 	    behavior == DDI_INTR_ALLOC_STRICT)) {
2352 		rcount = 0;
2353 		goto out;
2354 	}
2355 
2356 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
2357 		/* not enough free irq slots available */
2358 		rcount = 0;
2359 		goto out;
2360 	}
2361 
2362 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
2363 	for (i = 0; i < rcount; i++) {
2364 		uchar_t	vector, irqno;
2365 		apic_irq_t	*irqptr;
2366 
2367 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
2368 		    (uchar_t)-1) {
2369 			/*
2370 			 * shouldn't happen because of the
2371 			 * apic_check_free_irqs() check earlier
2372 			 */
2373 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
2374 			    "apic_allocate_irq failed\n"));
2375 			rcount = i;
2376 			goto out;
2377 		}
2378 		if ((vector = apic_allocate_vector(pri, irqno, 1)) == 0) {
2379 			/*
2380 			 * shouldn't happen because of the
2381 			 * apic_navail_vector() call earlier
2382 			 */
2383 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
2384 			    "apic_allocate_vector failed\n"));
2385 			rcount = i;
2386 			goto out;
2387 		}
2388 		apic_max_device_irq = max(irqno, apic_max_device_irq);
2389 		apic_min_device_irq = min(irqno, apic_min_device_irq);
2390 		irqptr = apic_irq_table[irqno];
2391 		irqptr->airq_vector = (uchar_t)vector;
2392 		irqptr->airq_ipl = pri;
2393 		irqptr->airq_origirq = (uchar_t)(inum + i);
2394 		irqptr->airq_share_id = 0;
2395 		irqptr->airq_mps_intr_index = MSIX_INDEX;
2396 		irqptr->airq_dip = dip;
2397 		irqptr->airq_major = major;
2398 		irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff);
2399 	}
2400 out:
2401 	mutex_exit(&airq_mutex);
2402 	return (rcount);
2403 }
2404 
2405 /*
2406  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
2407  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
2408  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
2409  * requests and allocated only when pri is set.
2410  */
2411 uchar_t
2412 apic_allocate_vector(int ipl, int irq, int pri)
2413 {
2414 	int	lowest, highest, i;
2415 
2416 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
2417 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
2418 
2419 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
2420 		lowest -= APIC_VECTOR_PER_IPL;
2421 
2422 #ifdef	DEBUG
2423 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
2424 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
2425 #endif /* DEBUG */
2426 	if (pri == 0)
2427 		highest -= APIC_HI_PRI_VECTS;
2428 
2429 	for (i = lowest; i < highest; i++) {
2430 		if (APIC_CHECK_RESERVE_VECTORS(i))
2431 			continue;
2432 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
2433 			apic_vector_to_irq[i] = (uchar_t)irq;
2434 			return (i);
2435 		}
2436 	}
2437 
2438 	return (0);
2439 }
2440 
2441 /* Mark vector as not being used by any irq */
2442 void
2443 apic_free_vector(uchar_t vector)
2444 {
2445 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
2446 }
2447 
2448 uint32_t
2449 ioapic_read(int ioapic_ix, uint32_t reg)
2450 {
2451 	volatile uint32_t *ioapic;
2452 
2453 	ioapic = apicioadr[ioapic_ix];
2454 	ioapic[APIC_IO_REG] = reg;
2455 	return (ioapic[APIC_IO_DATA]);
2456 }
2457 
2458 void
2459 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
2460 {
2461 	volatile uint32_t *ioapic;
2462 
2463 	ioapic = apicioadr[ioapic_ix];
2464 	ioapic[APIC_IO_REG] = reg;
2465 	ioapic[APIC_IO_DATA] = value;
2466 }
2467 
2468 void
2469 ioapic_write_eoi(int ioapic_ix, uint32_t value)
2470 {
2471 	volatile uint32_t *ioapic;
2472 
2473 	ioapic = apicioadr[ioapic_ix];
2474 	ioapic[APIC_IO_EOI] = value;
2475 }
2476 
2477 static processorid_t
2478 apic_find_cpu(int flag)
2479 {
2480 	processorid_t acid = 0;
2481 	int i;
2482 
2483 	/* Find the first CPU with the passed-in flag set */
2484 	for (i = 0; i < apic_nproc; i++) {
2485 		if (apic_cpus[i].aci_status & flag) {
2486 			acid = i;
2487 			break;
2488 		}
2489 	}
2490 
2491 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
2492 	return (acid);
2493 }
2494 
2495 /*
2496  * Call rebind to do the actual programming.
2497  * Must be called with interrupts disabled and apic_ioapic_lock held
2498  * 'p' is polymorphic -- if this function is called to process a deferred
2499  * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which
2500  * the irq pointer is retrieved.  If not doing deferred reprogramming,
2501  * p is of the type 'apic_irq_t *'.
2502  *
2503  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
2504  * and it protects apic_find_cpu() from a race in which a CPU can be taken
2505  * offline after a cpu is selected, but before apic_rebind is called to
2506  * bind interrupts to it.
2507  */
2508 int
2509 apic_setup_io_intr(void *p, int irq, boolean_t deferred)
2510 {
2511 	apic_irq_t *irqptr;
2512 	struct ioapic_reprogram_data *drep = NULL;
2513 	int rv;
2514 
2515 	if (deferred) {
2516 		drep = (struct ioapic_reprogram_data *)p;
2517 		ASSERT(drep != NULL);
2518 		irqptr = drep->irqp;
2519 	} else
2520 		irqptr = (apic_irq_t *)p;
2521 
2522 	ASSERT(irqptr != NULL);
2523 
2524 	rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep);
2525 	if (rv) {
2526 		/*
2527 		 * CPU is not up or interrupts are disabled. Fall back to
2528 		 * the first available CPU
2529 		 */
2530 		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
2531 		    drep);
2532 	}
2533 
2534 	return (rv);
2535 }
2536 
2537 
2538 uchar_t
2539 apic_modify_vector(uchar_t vector, int irq)
2540 {
2541 	apic_vector_to_irq[vector] = (uchar_t)irq;
2542 	return (vector);
2543 }
2544 
2545 char *
2546 apic_get_apic_type()
2547 {
2548 	return (apic_psm_info.p_mach_idstring);
2549 }
2550 
2551 void
2552 x2apic_update_psm()
2553 {
2554 	struct psm_ops *pops = &apic_ops;
2555 
2556 	ASSERT(pops != NULL);
2557 
2558 	/*
2559 	 * We don't need to do any magic if one of the following
2560 	 * conditions is true :
2561 	 * - Not being run under kernel debugger.
2562 	 * - MP is not set.
2563 	 * - Booted with one CPU only.
2564 	 * - One CPU configured.
2565 	 *
2566 	 * We set apic_common_send_ipi() since kernel debuggers
2567 	 * attempt to send IPIs to other slave CPUs during
2568 	 * entry (exit) from (to) debugger.
2569 	 */
2570 	if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
2571 	    apic_nproc == 1 || boot_ncpus == 1) {
2572 		pops->psm_send_ipi =  x2apic_send_ipi;
2573 	} else {
2574 		pops->psm_send_ipi =  apic_common_send_ipi;
2575 	}
2576 
2577 	pops->psm_intr_exit = x2apic_intr_exit;
2578 	pops->psm_setspl = x2apic_setspl;
2579 
2580 	send_dirintf = pops->psm_send_ipi;
2581 
2582 	apic_mode = LOCAL_X2APIC;
2583 	apic_change_ops();
2584 }
2585 
2586 static void
2587 apic_intrr_init(int apic_mode)
2588 {
2589 	int suppress_brdcst_eoi = 0;
2590 
2591 	if (psm_vt_ops != NULL) {
2592 		if (((apic_intrr_ops_t *)psm_vt_ops)->apic_intrr_init(apic_mode)
2593 		    == DDI_SUCCESS) {
2594 			apic_vt_ops = psm_vt_ops;
2595 
2596 			/*
2597 			 * We leverage the interrupt remapping engine to
2598 			 * suppress broadcast EOI; thus we must send the
2599 			 * directed EOI with the directed-EOI handler.
2600 			 */
2601 			if (apic_directed_EOI_supported() == 0) {
2602 				suppress_brdcst_eoi = 1;
2603 				apic_set_directed_EOI_handler();
2604 			}
2605 
2606 			apic_vt_ops->apic_intrr_enable(suppress_brdcst_eoi);
2607 		}
2608 	}
2609 }
2610 
2611 /*ARGSUSED*/
2612 static void
2613 apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
2614 {
2615 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
2616 }
2617 
2618 /*ARGSUSED*/
2619 static void
2620 apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
2621 {
2622 	mregs->mr_addr = MSI_ADDR_HDR |
2623 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
2624 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
2625 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
2626 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
2627 	    mregs->mr_data;
2628 }
2629