xref: /titanic_51/usr/src/uts/i86pc/io/pcplusmp/apic.c (revision 34f1a571c0d0c682a4a70b97b1e62430aa630559)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
29  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
30  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
31  * PSMI 1.5 extensions are supported in Solaris Nevada.
32  * PSMI 1.6 extensions are supported in Solaris Nevada.
33  */
34 #define	PSMI_1_6
35 
36 #include <sys/processor.h>
37 #include <sys/time.h>
38 #include <sys/psm.h>
39 #include <sys/smp_impldefs.h>
40 #include <sys/cram.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/psm_common.h>
44 #include <sys/apic.h>
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/archsystm.h>
57 #include <sys/trap.h>
58 #include <sys/machsystm.h>
59 #include <sys/sysmacros.h>
60 #include <sys/cpuvar.h>
61 #include <sys/rm_platter.h>
62 #include <sys/privregs.h>
63 #include <sys/note.h>
64 #include <sys/pci_intr_lib.h>
65 #include <sys/spl.h>
66 #include <sys/clock.h>
67 #include <sys/dditypes.h>
68 #include <sys/sunddi.h>
69 #include <sys/x_call.h>
70 #include <sys/reboot.h>
71 #include <sys/hpet.h>
72 
73 /*
74  *	Local Function Prototypes
75  */
76 static void apic_init_intr();
77 static void apic_nmi_intr(caddr_t arg, struct regs *rp);
78 
79 /*
80  *	standard MP entries
81  */
82 static int	apic_probe();
83 static int	apic_clkinit();
84 static int	apic_getclkirq(int ipl);
85 static uint_t	apic_calibrate(volatile uint32_t *addr,
86     uint16_t *pit_ticks_adj);
87 static hrtime_t apic_gettime();
88 static hrtime_t apic_gethrtime();
89 static void	apic_init();
90 static void	apic_picinit(void);
91 static int	apic_cpu_start(processorid_t, caddr_t);
92 static int	apic_post_cpu_start(void);
93 static void	apic_send_ipi(int cpun, int ipl);
94 static void	apic_set_idlecpu(processorid_t cpun);
95 static void	apic_unset_idlecpu(processorid_t cpun);
96 static int	apic_intr_enter(int ipl, int *vect);
97 static void	apic_setspl(int ipl);
98 static void	x2apic_setspl(int ipl);
99 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
100 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
101 static void	apic_shutdown(int cmd, int fcn);
102 static void	apic_preshutdown(int cmd, int fcn);
103 static int	apic_disable_intr(processorid_t cpun);
104 static void	apic_enable_intr(processorid_t cpun);
105 static processorid_t	apic_get_next_processorid(processorid_t cpun);
106 static int		apic_get_ipivect(int ipl, int type);
107 static void	apic_timer_reprogram(hrtime_t time);
108 static void	apic_timer_enable(void);
109 static void	apic_timer_disable(void);
110 static void	apic_post_cyclic_setup(void *arg);
111 static void	apic_intrr_init(int apic_mode);
112 static void	apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt);
113 static void	apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs);
114 
115 static int	apic_oneshot = 0;
116 int	apic_oneshot_enable = 1; /* to allow disabling one-shot capability */
117 
118 /* Now the ones for Dynamic Interrupt distribution */
119 int	apic_enable_dynamic_migration = 0;
120 
121 extern int apic_have_32bit_cr8;
122 
123 /*
124  * These variables are frequently accessed in apic_intr_enter(),
125  * apic_intr_exit and apic_setspl, so group them together
126  */
127 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
128 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
129 int apic_clkvect;
130 
131 /* vector at which error interrupts come in */
132 int apic_errvect;
133 int apic_enable_error_intr = 1;
134 int apic_error_display_delay = 100;
135 
136 /* vector at which performance counter overflow interrupts come in */
137 int apic_cpcovf_vect;
138 int apic_enable_cpcovf_intr = 1;
139 
140 /* vector at which CMCI interrupts come in */
141 int apic_cmci_vect;
142 extern int cmi_enable_cmci;
143 extern void cmi_cmci_trap(void);
144 
145 static kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
146 static int cmci_cpu_setup_registered;
147 
148 /*
149  * The following vector assignments influence the value of ipltopri and
150  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
151  * idle to 0 and IPL 0 to 0xf to differentiate idle in case
152  * we care to do so in future. Note some IPLs which are rarely used
153  * will share the vector ranges and heavily used IPLs (5 and 6) have
154  * a wide range.
155  *
156  * This array is used to initialize apic_ipls[] (in apic_init()).
157  *
158  *	IPL		Vector range.		as passed to intr_enter
159  *	0		none.
160  *	1,2,3		0x20-0x2f		0x0-0xf
161  *	4		0x30-0x3f		0x10-0x1f
162  *	5		0x40-0x5f		0x20-0x3f
163  *	6		0x60-0x7f		0x40-0x5f
164  *	7,8,9		0x80-0x8f		0x60-0x6f
165  *	10		0x90-0x9f		0x70-0x7f
166  *	11		0xa0-0xaf		0x80-0x8f
167  *	...		...
168  *	15		0xe0-0xef		0xc0-0xcf
169  *	15		0xf0-0xff		0xd0-0xdf
170  */
171 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
172 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
173 };
174 	/*
175 	 * The ipl of an ISR at vector X is apic_vectortoipl[X>>4]
176 	 * NOTE that this is vector as passed into intr_enter which is
177 	 * programmed vector - 0x20 (APIC_BASE_VECT)
178 	 */
179 
180 uchar_t	apic_ipltopri[MAXIPL + 1];	/* unix ipl to apic pri	*/
181 	/* The taskpri to be programmed into apic to mask given ipl */
182 
183 #if defined(__amd64)
184 uchar_t	apic_cr8pri[MAXIPL + 1];	/* unix ipl to cr8 pri	*/
185 #endif
186 
187 /*
188  * Correlation of the hardware vector to the IPL in use, initialized
189  * from apic_vectortoipl[] in apic_init().  The final IPLs may not correlate
190  * to the IPLs in apic_vectortoipl on some systems that share interrupt lines
191  * connected to errata-stricken IOAPICs
192  */
193 uchar_t apic_ipls[APIC_AVAIL_VECTOR];
194 
195 /*
196  * Patchable global variables.
197  */
198 int	apic_forceload = 0;
199 
200 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
201 					/* 1 - use gettime() for performance */
202 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
203 int	apic_enable_hwsoftint = 0;	/* 0 - disable, 1 - enable	*/
204 int	apic_enable_bind_log = 1;	/* 1 - display interrupt binding log */
205 int	apic_panic_on_nmi = 0;
206 int	apic_panic_on_apic_error = 0;
207 
208 int	apic_verbose = 0;
209 
210 /* minimum number of timer ticks to program to */
211 int apic_min_timer_ticks = 1;
212 /*
213  *	Local static data
214  */
215 static struct	psm_ops apic_ops = {
216 	apic_probe,
217 
218 	apic_init,
219 	apic_picinit,
220 	apic_intr_enter,
221 	apic_intr_exit,
222 	apic_setspl,
223 	apic_addspl,
224 	apic_delspl,
225 	apic_disable_intr,
226 	apic_enable_intr,
227 	(int (*)(int))NULL,		/* psm_softlvl_to_irq */
228 	(void (*)(int))NULL,		/* psm_set_softintr */
229 
230 	apic_set_idlecpu,
231 	apic_unset_idlecpu,
232 
233 	apic_clkinit,
234 	apic_getclkirq,
235 	(void (*)(void))NULL,		/* psm_hrtimeinit */
236 	apic_gethrtime,
237 
238 	apic_get_next_processorid,
239 	apic_cpu_start,
240 	apic_post_cpu_start,
241 	apic_shutdown,
242 	apic_get_ipivect,
243 	apic_send_ipi,
244 
245 	(int (*)(dev_info_t *, int))NULL,	/* psm_translate_irq */
246 	(void (*)(int, char *))NULL,	/* psm_notify_error */
247 	(void (*)(int))NULL,		/* psm_notify_func */
248 	apic_timer_reprogram,
249 	apic_timer_enable,
250 	apic_timer_disable,
251 	apic_post_cyclic_setup,
252 	apic_preshutdown,
253 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
254 	apic_state,			/* save, restore apic state for S3 */
255 };
256 
257 
258 static struct	psm_info apic_psm_info = {
259 	PSM_INFO_VER01_6,			/* version */
260 	PSM_OWN_EXCLUSIVE,			/* ownership */
261 	(struct psm_ops *)&apic_ops,		/* operation */
262 	APIC_PCPLUSMP_NAME,			/* machine name */
263 	"pcplusmp v1.4 compatible",
264 };
265 
266 static void *apic_hdlp;
267 
268 #ifdef DEBUG
269 int	apic_debug = 0;
270 int	apic_restrict_vector = 0;
271 
272 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
273 int	apic_debug_msgbufindex = 0;
274 
275 #endif /* DEBUG */
276 
277 apic_cpus_info_t	*apic_cpus;
278 
279 cpuset_t	apic_cpumask;
280 uint_t	apic_picinit_called;
281 
282 /* Flag to indicate that we need to shut down all processors */
283 static uint_t	apic_shutdown_processors;
284 
285 uint_t apic_nsec_per_intr = 0;
286 
287 /*
288  * apic_let_idle_redistribute can have the following values:
289  * 0 - If clock decremented it from 1 to 0, clock has to call redistribute.
290  * apic_redistribute_lock prevents multiple idle cpus from redistributing
291  */
292 int	apic_num_idle_redistributions = 0;
293 static	int apic_let_idle_redistribute = 0;
294 static	uint_t apic_nticks = 0;
295 static	uint_t apic_skipped_redistribute = 0;
296 
297 /* to gather intr data and redistribute */
298 static void apic_redistribute_compute(void);
299 
300 static	uint_t last_count_read = 0;
301 static	lock_t	apic_gethrtime_lock;
302 volatile int	apic_hrtime_stamp = 0;
303 volatile hrtime_t apic_nsec_since_boot = 0;
304 static uint_t apic_hertz_count;
305 
306 uint64_t apic_ticks_per_SFnsecs;	/* # of ticks in SF nsecs */
307 
308 static hrtime_t apic_nsec_max;
309 
310 static	hrtime_t	apic_last_hrtime = 0;
311 int		apic_hrtime_error = 0;
312 int		apic_remote_hrterr = 0;
313 int		apic_num_nmis = 0;
314 int		apic_apic_error = 0;
315 int		apic_num_apic_errors = 0;
316 int		apic_num_cksum_errors = 0;
317 
318 int	apic_error = 0;
319 static	int	apic_cmos_ssb_set = 0;
320 
321 /* use to make sure only one cpu handles the nmi */
322 static	lock_t	apic_nmi_lock;
323 /* use to make sure only one cpu handles the error interrupt */
324 static	lock_t	apic_error_lock;
325 
326 static	struct {
327 	uchar_t	cntl;
328 	uchar_t	data;
329 } aspen_bmc[] = {
330 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
331 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
332 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
333 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
334 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
335 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
336 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
337 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
338 
339 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
340 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
341 };
342 
343 static	struct {
344 	int	port;
345 	uchar_t	data;
346 } sitka_bmc[] = {
347 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
348 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
349 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
350 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
351 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
352 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
353 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
354 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
355 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
356 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
357 
358 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
359 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
360 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
361 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
362 };
363 
364 /* Patchable global variables. */
365 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
366 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
367 
368 /* default apic ops without interrupt remapping */
369 static apic_intrr_ops_t apic_nointrr_ops = {
370 	(int (*)(int))return_instr,
371 	(void (*)(int))return_instr,
372 	(void (*)(apic_irq_t *))return_instr,
373 	(void (*)(apic_irq_t *, void *))return_instr,
374 	(void (*)(apic_irq_t *))return_instr,
375 	apic_record_ioapic_rdt,
376 	apic_record_msi,
377 };
378 
379 apic_intrr_ops_t *apic_vt_ops = &apic_nointrr_ops;
380 
381 /*
382  *	This is the loadable module wrapper
383  */
384 
385 int
386 _init(void)
387 {
388 	if (apic_coarse_hrtime)
389 		apic_ops.psm_gethrtime = &apic_gettime;
390 	return (psm_mod_init(&apic_hdlp, &apic_psm_info));
391 }
392 
393 int
394 _fini(void)
395 {
396 	return (psm_mod_fini(&apic_hdlp, &apic_psm_info));
397 }
398 
399 int
400 _info(struct modinfo *modinfop)
401 {
402 	return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop));
403 }
404 
405 
406 static int
407 apic_probe()
408 {
409 	return (apic_probe_common(apic_psm_info.p_mach_idstring));
410 }
411 
412 void
413 apic_init()
414 {
415 	int i;
416 	int	j = 1;
417 
418 	apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */
419 	for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) {
420 		if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) &&
421 		    (apic_vectortoipl[i + 1] == apic_vectortoipl[i]))
422 			/* get to highest vector at the same ipl */
423 			continue;
424 		for (; j <= apic_vectortoipl[i]; j++) {
425 			apic_ipltopri[j] = (i << APIC_IPL_SHIFT) +
426 			    APIC_BASE_VECT;
427 		}
428 	}
429 	for (; j < MAXIPL + 1; j++)
430 		/* fill up any empty ipltopri slots */
431 		apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT;
432 	apic_init_common();
433 #if defined(__amd64)
434 	/*
435 	 * Make cpu-specific interrupt info point to cr8pri vector
436 	 */
437 	for (i = 0; i <= MAXIPL; i++)
438 		apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT;
439 	CPU->cpu_pri_data = apic_cr8pri;
440 #else
441 	if (cpuid_have_cr8access(CPU))
442 		apic_have_32bit_cr8 = 1;
443 #endif	/* __amd64 */
444 }
445 
446 /*
447  * handler for APIC Error interrupt. Just print a warning and continue
448  */
449 static int
450 apic_error_intr()
451 {
452 	uint_t	error0, error1, error;
453 	uint_t	i;
454 
455 	/*
456 	 * We need to write before read as per 7.4.17 of system prog manual.
457 	 * We do both and or the results to be safe
458 	 */
459 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
460 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
461 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
462 	error = error0 | error1;
463 
464 	/*
465 	 * Clear the APIC error status (do this on all cpus that enter here)
466 	 * (two writes are required due to the semantics of accessing the
467 	 * error status register.)
468 	 */
469 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
470 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
471 
472 	/*
473 	 * Prevent more than 1 CPU from handling error interrupt causing
474 	 * double printing (interleave of characters from multiple
475 	 * CPU's when using prom_printf)
476 	 */
477 	if (lock_try(&apic_error_lock) == 0)
478 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
479 	if (error) {
480 #if	DEBUG
481 		if (apic_debug)
482 			debug_enter("pcplusmp: APIC Error interrupt received");
483 #endif /* DEBUG */
484 		if (apic_panic_on_apic_error)
485 			cmn_err(CE_PANIC,
486 			    "APIC Error interrupt on CPU %d. Status = %x\n",
487 			    psm_get_cpu_id(), error);
488 		else {
489 			if ((error & ~APIC_CS_ERRORS) == 0) {
490 				/* cksum error only */
491 				apic_error |= APIC_ERR_APIC_ERROR;
492 				apic_apic_error |= error;
493 				apic_num_apic_errors++;
494 				apic_num_cksum_errors++;
495 			} else {
496 				/*
497 				 * prom_printf is the best shot we have of
498 				 * something which is problem free from
499 				 * high level/NMI type of interrupts
500 				 */
501 				prom_printf("APIC Error interrupt on CPU %d. "
502 				    "Status 0 = %x, Status 1 = %x\n",
503 				    psm_get_cpu_id(), error0, error1);
504 				apic_error |= APIC_ERR_APIC_ERROR;
505 				apic_apic_error |= error;
506 				apic_num_apic_errors++;
507 				for (i = 0; i < apic_error_display_delay; i++) {
508 					tenmicrosec();
509 				}
510 				/*
511 				 * provide more delay next time limited to
512 				 * roughly 1 clock tick time
513 				 */
514 				if (apic_error_display_delay < 500)
515 					apic_error_display_delay *= 2;
516 			}
517 		}
518 		lock_clear(&apic_error_lock);
519 		return (DDI_INTR_CLAIMED);
520 	} else {
521 		lock_clear(&apic_error_lock);
522 		return (DDI_INTR_UNCLAIMED);
523 	}
524 	/* NOTREACHED */
525 }
526 
527 /*
528  * Turn off the mask bit in the performance counter Local Vector Table entry.
529  */
530 static void
531 apic_cpcovf_mask_clear(void)
532 {
533 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
534 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
535 }
536 
537 /*ARGSUSED*/
538 static int
539 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
540 {
541 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
542 	return (0);
543 }
544 
545 /*ARGSUSED*/
546 static int
547 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
548 {
549 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
550 	return (0);
551 }
552 
553 /*ARGSUSED*/
554 static int
555 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
556 {
557 	cpuset_t	cpu_set;
558 
559 	CPUSET_ONLY(cpu_set, cpuid);
560 
561 	switch (what) {
562 		case CPU_ON:
563 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
564 			    (xc_func_t)apic_cmci_enable);
565 			break;
566 
567 		case CPU_OFF:
568 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
569 			    (xc_func_t)apic_cmci_disable);
570 			break;
571 
572 		default:
573 			break;
574 	}
575 
576 	return (0);
577 }
578 
579 static void
580 apic_init_intr()
581 {
582 	processorid_t	cpun = psm_get_cpu_id();
583 	uint_t nlvt;
584 	uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
585 
586 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
587 
588 	if (apic_mode == LOCAL_APIC) {
589 		/*
590 		 * We are running APIC in MMIO mode.
591 		 */
592 		if (apic_flat_model) {
593 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
594 			    APIC_FLAT_MODEL);
595 		} else {
596 			apic_reg_ops->apic_write(APIC_FORMAT_REG,
597 			    APIC_CLUSTER_MODEL);
598 		}
599 
600 		apic_reg_ops->apic_write(APIC_DEST_REG,
601 		    AV_HIGH_ORDER >> cpun);
602 	}
603 
604 	if (apic_directed_EOI_supported()) {
605 		/*
606 		 * Setting the 12th bit in the Spurious Interrupt Vector
607 		 * Register suppresses broadcast EOIs generated by the local
608 		 * APIC. The suppression of broadcast EOIs happens only when
609 		 * interrupts are level-triggered.
610 		 */
611 		svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
612 	}
613 
614 	/* need to enable APIC before unmasking NMI */
615 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
616 
617 	/*
618 	 * Presence of an invalid vector with delivery mode AV_FIXED can
619 	 * cause an error interrupt, even if the entry is masked...so
620 	 * write a valid vector to LVT entries along with the mask bit
621 	 */
622 
623 	/* All APICs have timer and LINT0/1 */
624 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
625 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
626 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI);	/* enable NMI */
627 
628 	/*
629 	 * On integrated APICs, the number of LVT entries is
630 	 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
631 	 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
632 	 */
633 
634 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
635 		nlvt = 3;
636 	} else {
637 		nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
638 		    0xFF) + 1;
639 	}
640 
641 	if (nlvt >= 5) {
642 		/* Enable performance counter overflow interrupt */
643 
644 		if ((x86_feature & X86_MSR) != X86_MSR)
645 			apic_enable_cpcovf_intr = 0;
646 		if (apic_enable_cpcovf_intr) {
647 			if (apic_cpcovf_vect == 0) {
648 				int ipl = APIC_PCINT_IPL;
649 				int irq = apic_get_ipivect(ipl, -1);
650 
651 				ASSERT(irq != -1);
652 				apic_cpcovf_vect =
653 				    apic_irq_table[irq]->airq_vector;
654 				ASSERT(apic_cpcovf_vect);
655 				(void) add_avintr(NULL, ipl,
656 				    (avfunc)kcpc_hw_overflow_intr,
657 				    "apic pcint", irq, NULL, NULL, NULL, NULL);
658 				kcpc_hw_overflow_intr_installed = 1;
659 				kcpc_hw_enable_cpc_intr =
660 				    apic_cpcovf_mask_clear;
661 			}
662 			apic_reg_ops->apic_write(APIC_PCINT_VECT,
663 			    apic_cpcovf_vect);
664 		}
665 	}
666 
667 	if (nlvt >= 6) {
668 		/* Only mask TM intr if the BIOS apparently doesn't use it */
669 
670 		uint32_t lvtval;
671 
672 		lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
673 		if (((lvtval & AV_MASK) == AV_MASK) ||
674 		    ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
675 			apic_reg_ops->apic_write(APIC_THERM_VECT,
676 			    AV_MASK|APIC_RESV_IRQ);
677 		}
678 	}
679 
680 	/* Enable error interrupt */
681 
682 	if (nlvt >= 4 && apic_enable_error_intr) {
683 		if (apic_errvect == 0) {
684 			int ipl = 0xf;	/* get highest priority intr */
685 			int irq = apic_get_ipivect(ipl, -1);
686 
687 			ASSERT(irq != -1);
688 			apic_errvect = apic_irq_table[irq]->airq_vector;
689 			ASSERT(apic_errvect);
690 			/*
691 			 * Not PSMI compliant, but we are going to merge
692 			 * with ON anyway
693 			 */
694 			(void) add_avintr((void *)NULL, ipl,
695 			    (avfunc)apic_error_intr, "apic error intr",
696 			    irq, NULL, NULL, NULL, NULL);
697 		}
698 		apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
699 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
700 		apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
701 	}
702 
703 	/* Enable CMCI interrupt */
704 	if (cmi_enable_cmci) {
705 
706 		mutex_enter(&cmci_cpu_setup_lock);
707 		if (cmci_cpu_setup_registered == 0) {
708 			mutex_enter(&cpu_lock);
709 			register_cpu_setup_func(cmci_cpu_setup, NULL);
710 			mutex_exit(&cpu_lock);
711 			cmci_cpu_setup_registered = 1;
712 		}
713 		mutex_exit(&cmci_cpu_setup_lock);
714 
715 		if (apic_cmci_vect == 0) {
716 			int ipl = 0x2;
717 			int irq = apic_get_ipivect(ipl, -1);
718 
719 			ASSERT(irq != -1);
720 			apic_cmci_vect = apic_irq_table[irq]->airq_vector;
721 			ASSERT(apic_cmci_vect);
722 
723 			(void) add_avintr(NULL, ipl,
724 			    (avfunc)cmi_cmci_trap,
725 			    "apic cmci intr", irq, NULL, NULL, NULL, NULL);
726 		}
727 		apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
728 	}
729 }
730 
731 static void
732 apic_disable_local_apic()
733 {
734 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
735 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
736 
737 	/* local intr reg 0 */
738 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
739 
740 	/* disable NMI */
741 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
742 
743 	/* and error interrupt */
744 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
745 
746 	/* and perf counter intr */
747 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
748 
749 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
750 }
751 
752 static void
753 apic_picinit(void)
754 {
755 	int i, j;
756 	uint_t isr;
757 
758 	/*
759 	 * Initialize and enable interrupt remapping before apic
760 	 * hardware initialization
761 	 */
762 	apic_intrr_init(apic_mode);
763 
764 	/*
765 	 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
766 	 * bit on without clearing it with EOI.  Since softint
767 	 * uses vector 0x20 to interrupt itself, so softint will
768 	 * not work on this machine.  In order to fix this problem
769 	 * a check is made to verify all the isr bits are clear.
770 	 * If not, EOIs are issued to clear the bits.
771 	 */
772 	for (i = 7; i >= 1; i--) {
773 		isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
774 		if (isr != 0)
775 			for (j = 0; ((j < 32) && (isr != 0)); j++)
776 				if (isr & (1 << j)) {
777 					apic_reg_ops->apic_write(
778 					    APIC_EOI_REG, 0);
779 					isr &= ~(1 << j);
780 					apic_error |= APIC_ERR_BOOT_EOI;
781 				}
782 	}
783 
784 	/* set a flag so we know we have run apic_picinit() */
785 	apic_picinit_called = 1;
786 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
787 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
788 	LOCK_INIT_CLEAR(&apic_error_lock);
789 
790 	picsetup();	 /* initialise the 8259 */
791 
792 	/* add nmi handler - least priority nmi handler */
793 	LOCK_INIT_CLEAR(&apic_nmi_lock);
794 
795 	if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
796 	    "pcplusmp NMI handler", (caddr_t)NULL))
797 		cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler");
798 
799 	/*
800 	 * Check for directed-EOI capability in the local APIC.
801 	 */
802 	if (apic_directed_EOI_supported() == 1) {
803 		apic_set_directed_EOI_handler();
804 	}
805 
806 	apic_init_intr();
807 
808 	/* enable apic mode if imcr present */
809 	if (apic_imcrp) {
810 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
811 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
812 	}
813 
814 	ioapic_init_intr(IOAPIC_MASK);
815 }
816 
817 
818 /*ARGSUSED1*/
819 static int
820 apic_cpu_start(processorid_t cpun, caddr_t arg)
821 {
822 	int		loop_count;
823 	uint32_t	vector;
824 	uint_t		cpu_id;
825 	ulong_t		iflag;
826 
827 	cpu_id =  apic_cpus[cpun].aci_local_id;
828 
829 	apic_cmos_ssb_set = 1;
830 
831 	/*
832 	 * Interrupts on BSP cpu will be disabled during these startup
833 	 * steps in order to avoid unwanted side effects from
834 	 * executing interrupt handlers on a problematic BIOS.
835 	 */
836 
837 	iflag = intr_clear();
838 	outb(CMOS_ADDR, SSB);
839 	outb(CMOS_DATA, BIOS_SHUTDOWN);
840 
841 	/*
842 	 * According to X2APIC specification in section '2.3.5.1' of
843 	 * Interrupt Command Register Semantics, the semantics of
844 	 * programming the Interrupt Command Register to dispatch an interrupt
845 	 * is simplified. A single MSR write to the 64-bit ICR is required
846 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
847 	 * interface to ICR, system software is not required to check the
848 	 * status of the delivery status bit prior to writing to the ICR
849 	 * to send an IPI. With the removal of the Delivery Status bit,
850 	 * system software no longer has a reason to read the ICR. It remains
851 	 * readable only to aid in debugging.
852 	 */
853 #ifdef	DEBUG
854 	APIC_AV_PENDING_SET();
855 #else
856 	if (apic_mode == LOCAL_APIC) {
857 		APIC_AV_PENDING_SET();
858 	}
859 #endif /* DEBUG */
860 
861 	/* for integrated - make sure there is one INIT IPI in buffer */
862 	/* for external - it will wake up the cpu */
863 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
864 
865 	/* If only 1 CPU is installed, PENDING bit will not go low */
866 	for (loop_count = 0x1000; loop_count; loop_count--) {
867 		if (apic_mode == LOCAL_APIC &&
868 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
869 			apic_ret();
870 		else
871 			break;
872 	}
873 
874 	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
875 
876 	drv_usecwait(20000);		/* 20 milli sec */
877 
878 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
879 		/* integrated apic */
880 
881 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
882 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
883 
884 		/* to offset the INIT IPI queue up in the buffer */
885 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
886 
887 		drv_usecwait(200);		/* 20 micro sec */
888 
889 		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
890 
891 		drv_usecwait(200);		/* 20 micro sec */
892 	}
893 	intr_restore(iflag);
894 	return (0);
895 }
896 
897 
898 #ifdef	DEBUG
899 int	apic_break_on_cpu = 9;
900 int	apic_stretch_interrupts = 0;
901 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
902 
903 void
904 apic_break()
905 {
906 }
907 #endif /* DEBUG */
908 
909 /*
910  * platform_intr_enter
911  *
912  *	Called at the beginning of the interrupt service routine to
913  *	mask all level equal to and below the interrupt priority
914  *	of the interrupting vector.  An EOI should be given to
915  *	the interrupt controller to enable other HW interrupts.
916  *
917  *	Return -1 for spurious interrupts
918  *
919  */
920 /*ARGSUSED*/
921 static int
922 apic_intr_enter(int ipl, int *vectorp)
923 {
924 	uchar_t vector;
925 	int nipl;
926 	int irq;
927 	ulong_t iflag;
928 	apic_cpus_info_t *cpu_infop;
929 
930 	/*
931 	 * The real vector delivered is (*vectorp + 0x20), but our caller
932 	 * subtracts 0x20 from the vector before passing it to us.
933 	 * (That's why APIC_BASE_VECT is 0x20.)
934 	 */
935 	vector = (uchar_t)*vectorp;
936 
937 	/* if interrupted by the clock, increment apic_nsec_since_boot */
938 	if (vector == apic_clkvect) {
939 		if (!apic_oneshot) {
940 			/* NOTE: this is not MT aware */
941 			apic_hrtime_stamp++;
942 			apic_nsec_since_boot += apic_nsec_per_intr;
943 			apic_hrtime_stamp++;
944 			last_count_read = apic_hertz_count;
945 			apic_redistribute_compute();
946 		}
947 
948 		/* We will avoid all the book keeping overhead for clock */
949 		nipl = apic_ipls[vector];
950 
951 		*vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT];
952 		if (apic_mode == LOCAL_APIC) {
953 #if defined(__amd64)
954 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
955 			    APIC_IPL_SHIFT));
956 #else
957 			if (apic_have_32bit_cr8)
958 				setcr8((ulong_t)(apic_ipltopri[nipl] >>
959 				    APIC_IPL_SHIFT));
960 			else
961 				LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
962 				    (uint32_t)apic_ipltopri[nipl]);
963 #endif
964 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
965 		} else {
966 			X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
967 			X2APIC_WRITE(APIC_EOI_REG, 0);
968 		}
969 
970 		return (nipl);
971 	}
972 
973 	cpu_infop = &apic_cpus[psm_get_cpu_id()];
974 
975 	if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) {
976 		cpu_infop->aci_spur_cnt++;
977 		return (APIC_INT_SPURIOUS);
978 	}
979 
980 	/* Check if the vector we got is really what we need */
981 	if (apic_revector_pending) {
982 		/*
983 		 * Disable interrupts for the duration of
984 		 * the vector translation to prevent a self-race for
985 		 * the apic_revector_lock.  This cannot be done
986 		 * in apic_xlate_vector because it is recursive and
987 		 * we want the vector translation to be atomic with
988 		 * respect to other (higher-priority) interrupts.
989 		 */
990 		iflag = intr_clear();
991 		vector = apic_xlate_vector(vector + APIC_BASE_VECT) -
992 		    APIC_BASE_VECT;
993 		intr_restore(iflag);
994 	}
995 
996 	nipl = apic_ipls[vector];
997 	*vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT];
998 
999 	if (apic_mode == LOCAL_APIC) {
1000 #if defined(__amd64)
1001 		setcr8((ulong_t)(apic_ipltopri[nipl] >> APIC_IPL_SHIFT));
1002 #else
1003 		if (apic_have_32bit_cr8)
1004 			setcr8((ulong_t)(apic_ipltopri[nipl] >>
1005 			    APIC_IPL_SHIFT));
1006 		else
1007 			LOCAL_APIC_WRITE_REG(APIC_TASK_REG,
1008 			    (uint32_t)apic_ipltopri[nipl]);
1009 #endif
1010 	} else {
1011 		X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[nipl]);
1012 	}
1013 
1014 	cpu_infop->aci_current[nipl] = (uchar_t)irq;
1015 	cpu_infop->aci_curipl = (uchar_t)nipl;
1016 	cpu_infop->aci_ISR_in_progress |= 1 << nipl;
1017 
1018 	/*
1019 	 * apic_level_intr could have been assimilated into the irq struct.
1020 	 * but, having it as a character array is more efficient in terms of
1021 	 * cache usage. So, we leave it as is.
1022 	 */
1023 	if (!apic_level_intr[irq]) {
1024 		if (apic_mode == LOCAL_APIC) {
1025 			LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
1026 		} else {
1027 			X2APIC_WRITE(APIC_EOI_REG, 0);
1028 		}
1029 	}
1030 
1031 #ifdef	DEBUG
1032 	APIC_DEBUG_BUF_PUT(vector);
1033 	APIC_DEBUG_BUF_PUT(irq);
1034 	APIC_DEBUG_BUF_PUT(nipl);
1035 	APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
1036 	if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
1037 		drv_usecwait(apic_stretch_interrupts);
1038 
1039 	if (apic_break_on_cpu == psm_get_cpu_id())
1040 		apic_break();
1041 #endif /* DEBUG */
1042 	return (nipl);
1043 }
1044 
1045 /*
1046  * This macro is a common code used by MMIO local apic and X2APIC
1047  * local apic.
1048  */
1049 #define	APIC_INTR_EXIT() \
1050 { \
1051 	cpu_infop = &apic_cpus[psm_get_cpu_id()]; \
1052 	if (apic_level_intr[irq]) \
1053 		apic_reg_ops->apic_send_eoi(irq); \
1054 	cpu_infop->aci_curipl = (uchar_t)prev_ipl; \
1055 	/* ISR above current pri could not be in progress */ \
1056 	cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; \
1057 }
1058 
1059 /*
1060  * Any changes made to this function must also change X2APIC
1061  * version of intr_exit.
1062  */
1063 void
1064 apic_intr_exit(int prev_ipl, int irq)
1065 {
1066 	apic_cpus_info_t *cpu_infop;
1067 
1068 #if defined(__amd64)
1069 	setcr8((ulong_t)apic_cr8pri[prev_ipl]);
1070 #else
1071 	if (apic_have_32bit_cr8)
1072 		setcr8((ulong_t)(apic_ipltopri[prev_ipl] >> APIC_IPL_SHIFT));
1073 	else
1074 		apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl];
1075 #endif
1076 
1077 	APIC_INTR_EXIT();
1078 }
1079 
1080 /*
1081  * Same as apic_intr_exit() except it uses MSR rather than MMIO
1082  * to access local apic registers.
1083  */
1084 void
1085 x2apic_intr_exit(int prev_ipl, int irq)
1086 {
1087 	apic_cpus_info_t *cpu_infop;
1088 
1089 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[prev_ipl]);
1090 	APIC_INTR_EXIT();
1091 }
1092 
1093 intr_exit_fn_t
1094 psm_intr_exit_fn(void)
1095 {
1096 	if (apic_mode == LOCAL_X2APIC)
1097 		return (x2apic_intr_exit);
1098 
1099 	return (apic_intr_exit);
1100 }
1101 
1102 /*
1103  * Mask all interrupts below or equal to the given IPL.
1104  * Any changes made to this function must also change X2APIC
1105  * version of setspl.
1106  */
1107 static void
1108 apic_setspl(int ipl)
1109 {
1110 #if defined(__amd64)
1111 	setcr8((ulong_t)apic_cr8pri[ipl]);
1112 #else
1113 	if (apic_have_32bit_cr8)
1114 		setcr8((ulong_t)(apic_ipltopri[ipl] >> APIC_IPL_SHIFT));
1115 	else
1116 		apicadr[APIC_TASK_REG] = apic_ipltopri[ipl];
1117 #endif
1118 
1119 	/* interrupts at ipl above this cannot be in progress */
1120 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
1121 	/*
1122 	 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts
1123 	 * have enough time to come in before the priority is raised again
1124 	 * during the idle() loop.
1125 	 */
1126 	if (apic_setspl_delay)
1127 		(void) apic_reg_ops->apic_get_pri();
1128 }
1129 
1130 /*
1131  * X2APIC version of setspl.
1132  * Mask all interrupts below or equal to the given IPL
1133  */
1134 static void
1135 x2apic_setspl(int ipl)
1136 {
1137 	X2APIC_WRITE(APIC_TASK_REG, apic_ipltopri[ipl]);
1138 
1139 	/* interrupts at ipl above this cannot be in progress */
1140 	apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
1141 }
1142 
1143 /*
1144  * generates an interprocessor interrupt to another CPU. Any changes made to
1145  * this routine must be accompanied by similar changes to
1146  * apic_common_send_ipi().
1147  */
1148 static void
1149 apic_send_ipi(int cpun, int ipl)
1150 {
1151 	int vector;
1152 	ulong_t flag;
1153 
1154 	vector = apic_resv_vector[ipl];
1155 
1156 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
1157 
1158 	flag = intr_clear();
1159 
1160 	APIC_AV_PENDING_SET();
1161 
1162 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
1163 	    vector);
1164 
1165 	intr_restore(flag);
1166 }
1167 
1168 
1169 /*ARGSUSED*/
1170 static void
1171 apic_set_idlecpu(processorid_t cpun)
1172 {
1173 }
1174 
1175 /*ARGSUSED*/
1176 static void
1177 apic_unset_idlecpu(processorid_t cpun)
1178 {
1179 }
1180 
1181 
1182 void
1183 apic_ret()
1184 {
1185 }
1186 
1187 /*
1188  * If apic_coarse_time == 1, then apic_gettime() is used instead of
1189  * apic_gethrtime().  This is used for performance instead of accuracy.
1190  */
1191 
1192 static hrtime_t
1193 apic_gettime()
1194 {
1195 	int old_hrtime_stamp;
1196 	hrtime_t temp;
1197 
1198 	/*
1199 	 * In one-shot mode, we do not keep time, so if anyone
1200 	 * calls psm_gettime() directly, we vector over to
1201 	 * gethrtime().
1202 	 * one-shot mode MUST NOT be enabled if this psm is the source of
1203 	 * hrtime.
1204 	 */
1205 
1206 	if (apic_oneshot)
1207 		return (gethrtime());
1208 
1209 
1210 gettime_again:
1211 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
1212 		apic_ret();
1213 
1214 	temp = apic_nsec_since_boot;
1215 
1216 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
1217 		goto gettime_again;
1218 	}
1219 	return (temp);
1220 }
1221 
1222 /*
1223  * Here we return the number of nanoseconds since booting.  Note every
1224  * clock interrupt increments apic_nsec_since_boot by the appropriate
1225  * amount.
1226  */
1227 static hrtime_t
1228 apic_gethrtime()
1229 {
1230 	int curr_timeval, countval, elapsed_ticks;
1231 	int old_hrtime_stamp, status;
1232 	hrtime_t temp;
1233 	uint32_t cpun;
1234 	ulong_t oflags;
1235 
1236 	/*
1237 	 * In one-shot mode, we do not keep time, so if anyone
1238 	 * calls psm_gethrtime() directly, we vector over to
1239 	 * gethrtime().
1240 	 * one-shot mode MUST NOT be enabled if this psm is the source of
1241 	 * hrtime.
1242 	 */
1243 
1244 	if (apic_oneshot)
1245 		return (gethrtime());
1246 
1247 	oflags = intr_clear();	/* prevent migration */
1248 
1249 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
1250 	if (apic_mode == LOCAL_APIC)
1251 		cpun >>= APIC_ID_BIT_OFFSET;
1252 
1253 	lock_set(&apic_gethrtime_lock);
1254 
1255 gethrtime_again:
1256 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
1257 		apic_ret();
1258 
1259 	/*
1260 	 * Check to see which CPU we are on.  Note the time is kept on
1261 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
1262 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
1263 	 */
1264 	if (cpun == apic_cpus[0].aci_local_id) {
1265 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1266 	} else {
1267 #ifdef	DEBUG
1268 		APIC_AV_PENDING_SET();
1269 #else
1270 		if (apic_mode == LOCAL_APIC)
1271 			APIC_AV_PENDING_SET();
1272 #endif /* DEBUG */
1273 
1274 		apic_reg_ops->apic_write_int_cmd(
1275 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
1276 
1277 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
1278 		    & AV_READ_PENDING) {
1279 			apic_ret();
1280 		}
1281 
1282 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
1283 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
1284 		else {	/* 0 = invalid */
1285 			apic_remote_hrterr++;
1286 			/*
1287 			 * return last hrtime right now, will need more
1288 			 * testing if change to retry
1289 			 */
1290 			temp = apic_last_hrtime;
1291 
1292 			lock_clear(&apic_gethrtime_lock);
1293 
1294 			intr_restore(oflags);
1295 
1296 			return (temp);
1297 		}
1298 	}
1299 	if (countval > last_count_read)
1300 		countval = 0;
1301 	else
1302 		last_count_read = countval;
1303 
1304 	elapsed_ticks = apic_hertz_count - countval;
1305 
1306 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
1307 	temp = apic_nsec_since_boot + curr_timeval;
1308 
1309 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
1310 		/* we might have clobbered last_count_read. Restore it */
1311 		last_count_read = apic_hertz_count;
1312 		goto gethrtime_again;
1313 	}
1314 
1315 	if (temp < apic_last_hrtime) {
1316 		/* return last hrtime if error occurs */
1317 		apic_hrtime_error++;
1318 		temp = apic_last_hrtime;
1319 	}
1320 	else
1321 		apic_last_hrtime = temp;
1322 
1323 	lock_clear(&apic_gethrtime_lock);
1324 	intr_restore(oflags);
1325 
1326 	return (temp);
1327 }
1328 
1329 /* apic NMI handler */
1330 /*ARGSUSED*/
1331 static void
1332 apic_nmi_intr(caddr_t arg, struct regs *rp)
1333 {
1334 	if (apic_shutdown_processors) {
1335 		apic_disable_local_apic();
1336 		return;
1337 	}
1338 
1339 	apic_error |= APIC_ERR_NMI;
1340 
1341 	if (!lock_try(&apic_nmi_lock))
1342 		return;
1343 	apic_num_nmis++;
1344 
1345 	if (apic_kmdb_on_nmi && psm_debugger()) {
1346 		debug_enter("NMI received: entering kmdb\n");
1347 	} else if (apic_panic_on_nmi) {
1348 		/* Keep panic from entering kmdb. */
1349 		nopanicdebug = 1;
1350 		panic("NMI received\n");
1351 	} else {
1352 		/*
1353 		 * prom_printf is the best shot we have of something which is
1354 		 * problem free from high level/NMI type of interrupts
1355 		 */
1356 		prom_printf("NMI received\n");
1357 	}
1358 
1359 	lock_clear(&apic_nmi_lock);
1360 }
1361 
1362 /*ARGSUSED*/
1363 static int
1364 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
1365 {
1366 	return (apic_addspl_common(irqno, ipl, min_ipl, max_ipl));
1367 }
1368 
1369 static int
1370 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
1371 {
1372 	return (apic_delspl_common(irqno, ipl, min_ipl,  max_ipl));
1373 }
1374 
1375 static int
1376 apic_post_cpu_start()
1377 {
1378 	int cpun;
1379 	static int cpus_started = 1;
1380 	struct psm_ops *pops = &apic_ops;
1381 
1382 	/* We know this CPU + BSP  started successfully. */
1383 	cpus_started++;
1384 
1385 	/*
1386 	 * On BSP we would have enabled X2APIC, if supported by processor,
1387 	 * in acpi_probe(), but on AP we do it here.
1388 	 *
1389 	 * We enable X2APIC mode only if BSP is running in X2APIC & the
1390 	 * local APIC mode of the current CPU is MMIO (xAPIC).
1391 	 */
1392 	if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1393 	    apic_local_mode() == LOCAL_APIC) {
1394 		apic_enable_x2apic();
1395 	}
1396 
1397 	/*
1398 	 * We change psm_send_ipi and send_dirintf only if Solaris
1399 	 * is booted in kmdb & the current CPU is the last CPU being
1400 	 * brought up. We don't need to do anything if Solaris is running
1401 	 * in MMIO mode (xAPIC).
1402 	 */
1403 	if ((boothowto & RB_DEBUG) &&
1404 	    (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
1405 	    apic_mode == LOCAL_X2APIC) {
1406 		/*
1407 		 * We no longer need help from apic_common_send_ipi()
1408 		 * since we will not start any more CPUs.
1409 		 *
1410 		 * We will need to revisit this if we start supporting
1411 		 * hot-plugging of CPUs.
1412 		 */
1413 		pops->psm_send_ipi = x2apic_send_ipi;
1414 		send_dirintf = pops->psm_send_ipi;
1415 	}
1416 
1417 	splx(ipltospl(LOCK_LEVEL));
1418 	apic_init_intr();
1419 
1420 	/*
1421 	 * since some systems don't enable the internal cache on the non-boot
1422 	 * cpus, so we have to enable them here
1423 	 */
1424 	setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1425 
1426 #ifdef	DEBUG
1427 	APIC_AV_PENDING_SET();
1428 #else
1429 	if (apic_mode == LOCAL_APIC)
1430 		APIC_AV_PENDING_SET();
1431 #endif	/* DEBUG */
1432 
1433 	/*
1434 	 * We may be booting, or resuming from suspend; aci_status will
1435 	 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1436 	 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1437 	 */
1438 	cpun = psm_get_cpu_id();
1439 	apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1440 
1441 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1442 	return (PSM_SUCCESS);
1443 }
1444 
1445 processorid_t
1446 apic_get_next_processorid(processorid_t cpu_id)
1447 {
1448 
1449 	int i;
1450 
1451 	if (cpu_id == -1)
1452 		return ((processorid_t)0);
1453 
1454 	for (i = cpu_id + 1; i < NCPU; i++) {
1455 		if (CPU_IN_SET(apic_cpumask, i))
1456 			return (i);
1457 	}
1458 
1459 	return ((processorid_t)-1);
1460 }
1461 
1462 
1463 /*
1464  * type == -1 indicates it is an internal request. Do not change
1465  * resv_vector for these requests
1466  */
1467 static int
1468 apic_get_ipivect(int ipl, int type)
1469 {
1470 	uchar_t vector;
1471 	int irq;
1472 
1473 	if ((irq = apic_allocate_irq(APIC_VECTOR(ipl))) != -1) {
1474 		if (vector = apic_allocate_vector(ipl, irq, 1)) {
1475 			apic_irq_table[irq]->airq_mps_intr_index =
1476 			    RESERVE_INDEX;
1477 			apic_irq_table[irq]->airq_vector = vector;
1478 			if (type != -1) {
1479 				apic_resv_vector[ipl] = vector;
1480 			}
1481 			return (irq);
1482 		}
1483 	}
1484 	apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
1485 	return (-1);	/* shouldn't happen */
1486 }
1487 
1488 static int
1489 apic_getclkirq(int ipl)
1490 {
1491 	int	irq;
1492 
1493 	if ((irq = apic_get_ipivect(ipl, -1)) == -1)
1494 		return (-1);
1495 	/*
1496 	 * Note the vector in apic_clkvect for per clock handling.
1497 	 */
1498 	apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT;
1499 	APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n",
1500 	    apic_clkvect));
1501 	return (irq);
1502 }
1503 
1504 
1505 /*
1506  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1507  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1508  */
1509 static uint_t
1510 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1511 {
1512 	uint8_t		pit_tick_lo;
1513 	uint16_t	pit_tick, target_pit_tick;
1514 	uint32_t	start_apic_tick, end_apic_tick;
1515 	ulong_t		iflag;
1516 	uint32_t	reg;
1517 
1518 	reg = addr + APIC_CURR_COUNT - apicadr;
1519 
1520 	iflag = intr_clear();
1521 
1522 	do {
1523 		pit_tick_lo = inb(PITCTR0_PORT);
1524 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1525 	} while (pit_tick < APIC_TIME_MIN ||
1526 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1527 
1528 	/*
1529 	 * Wait for the 8254 to decrement by 5 ticks to ensure
1530 	 * we didn't start in the middle of a tick.
1531 	 * Compare with 0x10 for the wrap around case.
1532 	 */
1533 	target_pit_tick = pit_tick - 5;
1534 	do {
1535 		pit_tick_lo = inb(PITCTR0_PORT);
1536 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1537 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1538 
1539 	start_apic_tick = apic_reg_ops->apic_read(reg);
1540 
1541 	/*
1542 	 * Wait for the 8254 to decrement by
1543 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1544 	 */
1545 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1546 	do {
1547 		pit_tick_lo = inb(PITCTR0_PORT);
1548 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1549 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1550 
1551 	end_apic_tick = apic_reg_ops->apic_read(reg);
1552 
1553 	*pit_ticks_adj = target_pit_tick - pit_tick;
1554 
1555 	intr_restore(iflag);
1556 
1557 	return (start_apic_tick - end_apic_tick);
1558 }
1559 
1560 /*
1561  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1562  * frequency.  Note at this stage in the boot sequence, the boot processor
1563  * is the only active processor.
1564  * hertz value of 0 indicates a one-shot mode request.  In this case
1565  * the function returns the resolution (in nanoseconds) for the hardware
1566  * timer interrupt.  If one-shot mode capability is not available,
1567  * the return value will be 0. apic_enable_oneshot is a global switch
1568  * for disabling the functionality.
1569  * A non-zero positive value for hertz indicates a periodic mode request.
1570  * In this case the hardware will be programmed to generate clock interrupts
1571  * at hertz frequency and returns the resolution of interrupts in
1572  * nanosecond.
1573  */
1574 
1575 static int
1576 apic_clkinit(int hertz)
1577 {
1578 	uint_t		apic_ticks = 0;
1579 	uint_t		pit_ticks;
1580 	int		ret;
1581 	uint16_t	pit_ticks_adj;
1582 	static int	firsttime = 1;
1583 
1584 	if (firsttime) {
1585 		/* first time calibrate on CPU0 only */
1586 
1587 		apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1588 		apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1589 		apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj);
1590 
1591 		/* total number of PIT ticks corresponding to apic_ticks */
1592 		pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
1593 
1594 		/*
1595 		 * Determine the number of nanoseconds per APIC clock tick
1596 		 * and then determine how many APIC ticks to interrupt at the
1597 		 * desired frequency
1598 		 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
1599 		 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
1600 		 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
1601 		 * pic_ticks_per_SFns =
1602 		 *   (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
1603 		 */
1604 		apic_ticks_per_SFnsecs =
1605 		    ((SF * apic_ticks * PIT_HZ) /
1606 		    ((uint64_t)pit_ticks * NANOSEC));
1607 
1608 		/* the interval timer initial count is 32 bit max */
1609 		apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL);
1610 		firsttime = 0;
1611 	}
1612 
1613 	if (hertz != 0) {
1614 		/* periodic */
1615 		apic_nsec_per_intr = NANOSEC / hertz;
1616 		apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr);
1617 	}
1618 
1619 	apic_int_busy_mark = (apic_int_busy_mark *
1620 	    apic_sample_factor_redistribution) / 100;
1621 	apic_int_free_mark = (apic_int_free_mark *
1622 	    apic_sample_factor_redistribution) / 100;
1623 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1624 	    apic_sample_factor_redistribution) / 100;
1625 
1626 	if (hertz == 0) {
1627 		/* requested one_shot */
1628 		if (!tsc_gethrtime_enable || !apic_oneshot_enable)
1629 			return (0);
1630 		apic_oneshot = 1;
1631 		ret = (int)APIC_TICKS_TO_NSECS(1);
1632 	} else {
1633 		/* program the local APIC to interrupt at the given frequency */
1634 		apic_reg_ops->apic_write(APIC_INIT_COUNT, apic_hertz_count);
1635 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
1636 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
1637 		apic_oneshot = 0;
1638 		ret = NANOSEC / hertz;
1639 	}
1640 
1641 	return (ret);
1642 
1643 }
1644 
1645 /*
1646  * apic_preshutdown:
1647  * Called early in shutdown whilst we can still access filesystems to do
1648  * things like loading modules which will be required to complete shutdown
1649  * after filesystems are all unmounted.
1650  */
1651 static void
1652 apic_preshutdown(int cmd, int fcn)
1653 {
1654 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1655 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1656 
1657 	if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) {
1658 		return;
1659 	}
1660 }
1661 
1662 static void
1663 apic_shutdown(int cmd, int fcn)
1664 {
1665 	int restarts, attempts;
1666 	int i;
1667 	uchar_t	byte;
1668 	ulong_t iflag;
1669 
1670 	hpet_acpi_fini();
1671 
1672 	/* Send NMI to all CPUs except self to do per processor shutdown */
1673 	iflag = intr_clear();
1674 #ifdef	DEBUG
1675 	APIC_AV_PENDING_SET();
1676 #else
1677 	if (apic_mode == LOCAL_APIC)
1678 		APIC_AV_PENDING_SET();
1679 #endif /* DEBUG */
1680 	apic_shutdown_processors = 1;
1681 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1682 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1683 
1684 	/* restore cmos shutdown byte before reboot */
1685 	if (apic_cmos_ssb_set) {
1686 		outb(CMOS_ADDR, SSB);
1687 		outb(CMOS_DATA, 0);
1688 	}
1689 
1690 	ioapic_disable_redirection();
1691 
1692 	/*	disable apic mode if imcr present	*/
1693 	if (apic_imcrp) {
1694 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1695 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1696 	}
1697 
1698 	apic_disable_local_apic();
1699 
1700 	intr_restore(iflag);
1701 
1702 	/* remainder of function is for shutdown cases only */
1703 	if (cmd != A_SHUTDOWN)
1704 		return;
1705 
1706 	/*
1707 	 * Switch system back into Legacy-Mode if using ACPI and
1708 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1709 	 * for power-off to succeed (Dell Dimension 4600)
1710 	 * Do not disable ACPI while doing fastreboot
1711 	 */
1712 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1713 		(void) AcpiDisable();
1714 
1715 	if (fcn == AD_FASTREBOOT) {
1716 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1717 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1718 	}
1719 
1720 	/* remainder of function is for shutdown+poweroff case only */
1721 	if (fcn != AD_POWEROFF)
1722 		return;
1723 
1724 	switch (apic_poweroff_method) {
1725 		case APIC_POWEROFF_VIA_RTC:
1726 
1727 			/* select the extended NVRAM bank in the RTC */
1728 			outb(CMOS_ADDR, RTC_REGA);
1729 			byte = inb(CMOS_DATA);
1730 			outb(CMOS_DATA, (byte | EXT_BANK));
1731 
1732 			outb(CMOS_ADDR, PFR_REG);
1733 
1734 			/* for Predator must toggle the PAB bit */
1735 			byte = inb(CMOS_DATA);
1736 
1737 			/*
1738 			 * clear power active bar, wakeup alarm and
1739 			 * kickstart
1740 			 */
1741 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1742 			outb(CMOS_DATA, byte);
1743 
1744 			/* delay before next write */
1745 			drv_usecwait(1000);
1746 
1747 			/* for S40 the following would suffice */
1748 			byte = inb(CMOS_DATA);
1749 
1750 			/* power active bar control bit */
1751 			byte |= PAB_CBIT;
1752 			outb(CMOS_DATA, byte);
1753 
1754 			break;
1755 
1756 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1757 			restarts = 0;
1758 restart_aspen_bmc:
1759 			if (++restarts == 3)
1760 				break;
1761 			attempts = 0;
1762 			do {
1763 				byte = inb(MISMIC_FLAG_REGISTER);
1764 				byte &= MISMIC_BUSY_MASK;
1765 				if (byte != 0) {
1766 					drv_usecwait(1000);
1767 					if (attempts >= 3)
1768 						goto restart_aspen_bmc;
1769 					++attempts;
1770 				}
1771 			} while (byte != 0);
1772 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1773 			byte = inb(MISMIC_FLAG_REGISTER);
1774 			byte |= 0x1;
1775 			outb(MISMIC_FLAG_REGISTER, byte);
1776 			i = 0;
1777 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1778 			    i++) {
1779 				attempts = 0;
1780 				do {
1781 					byte = inb(MISMIC_FLAG_REGISTER);
1782 					byte &= MISMIC_BUSY_MASK;
1783 					if (byte != 0) {
1784 						drv_usecwait(1000);
1785 						if (attempts >= 3)
1786 							goto restart_aspen_bmc;
1787 						++attempts;
1788 					}
1789 				} while (byte != 0);
1790 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1791 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1792 				byte = inb(MISMIC_FLAG_REGISTER);
1793 				byte |= 0x1;
1794 				outb(MISMIC_FLAG_REGISTER, byte);
1795 			}
1796 			break;
1797 
1798 		case APIC_POWEROFF_VIA_SITKA_BMC:
1799 			restarts = 0;
1800 restart_sitka_bmc:
1801 			if (++restarts == 3)
1802 				break;
1803 			attempts = 0;
1804 			do {
1805 				byte = inb(SMS_STATUS_REGISTER);
1806 				byte &= SMS_STATE_MASK;
1807 				if ((byte == SMS_READ_STATE) ||
1808 				    (byte == SMS_WRITE_STATE)) {
1809 					drv_usecwait(1000);
1810 					if (attempts >= 3)
1811 						goto restart_sitka_bmc;
1812 					++attempts;
1813 				}
1814 			} while ((byte == SMS_READ_STATE) ||
1815 			    (byte == SMS_WRITE_STATE));
1816 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1817 			i = 0;
1818 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1819 			    i++) {
1820 				attempts = 0;
1821 				do {
1822 					byte = inb(SMS_STATUS_REGISTER);
1823 					byte &= SMS_IBF_MASK;
1824 					if (byte != 0) {
1825 						drv_usecwait(1000);
1826 						if (attempts >= 3)
1827 							goto restart_sitka_bmc;
1828 						++attempts;
1829 					}
1830 				} while (byte != 0);
1831 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1832 			}
1833 			break;
1834 
1835 		case APIC_POWEROFF_NONE:
1836 
1837 			/* If no APIC direct method, we will try using ACPI */
1838 			if (apic_enable_acpi) {
1839 				if (acpi_poweroff() == 1)
1840 					return;
1841 			} else
1842 				return;
1843 
1844 			break;
1845 	}
1846 	/*
1847 	 * Wait a limited time here for power to go off.
1848 	 * If the power does not go off, then there was a
1849 	 * problem and we should continue to the halt which
1850 	 * prints a message for the user to press a key to
1851 	 * reboot.
1852 	 */
1853 	drv_usecwait(7000000); /* wait seven seconds */
1854 
1855 }
1856 
1857 /*
1858  * Try and disable all interrupts. We just assign interrupts to other
1859  * processors based on policy. If any were bound by user request, we
1860  * let them continue and return failure. We do not bother to check
1861  * for cache affinity while rebinding.
1862  */
1863 
1864 static int
1865 apic_disable_intr(processorid_t cpun)
1866 {
1867 	int bind_cpu = 0, i, hardbound = 0;
1868 	apic_irq_t *irq_ptr;
1869 	ulong_t iflag;
1870 
1871 	iflag = intr_clear();
1872 	lock_set(&apic_ioapic_lock);
1873 
1874 	for (i = 0; i <= APIC_MAX_VECTOR; i++) {
1875 		if (apic_reprogram_info[i].done == B_FALSE) {
1876 			if (apic_reprogram_info[i].bindcpu == cpun) {
1877 				/*
1878 				 * CPU is busy -- it's the target of
1879 				 * a pending reprogramming attempt
1880 				 */
1881 				lock_clear(&apic_ioapic_lock);
1882 				intr_restore(iflag);
1883 				return (PSM_FAILURE);
1884 			}
1885 		}
1886 	}
1887 
1888 	apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
1889 
1890 	apic_cpus[cpun].aci_curipl = 0;
1891 
1892 	i = apic_min_device_irq;
1893 	for (; i <= apic_max_device_irq; i++) {
1894 		/*
1895 		 * If there are bound interrupts on this cpu, then
1896 		 * rebind them to other processors.
1897 		 */
1898 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
1899 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
1900 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
1901 			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
1902 			    apic_nproc));
1903 
1904 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
1905 				hardbound = 1;
1906 				continue;
1907 			}
1908 
1909 			if (irq_ptr->airq_temp_cpu == cpun) {
1910 				do {
1911 					bind_cpu = apic_next_bind_cpu++;
1912 					if (bind_cpu >= apic_nproc) {
1913 						apic_next_bind_cpu = 1;
1914 						bind_cpu = 0;
1915 
1916 					}
1917 				} while (apic_rebind_all(irq_ptr, bind_cpu));
1918 			}
1919 		}
1920 	}
1921 
1922 	lock_clear(&apic_ioapic_lock);
1923 	intr_restore(iflag);
1924 
1925 	if (hardbound) {
1926 		cmn_err(CE_WARN, "Could not disable interrupts on %d"
1927 		    "due to user bound interrupts", cpun);
1928 		return (PSM_FAILURE);
1929 	}
1930 	else
1931 		return (PSM_SUCCESS);
1932 }
1933 
1934 /*
1935  * Bind interrupts to the CPU's local APIC.
1936  * Interrupts should not be bound to a CPU's local APIC until the CPU
1937  * is ready to receive interrupts.
1938  */
1939 static void
1940 apic_enable_intr(processorid_t cpun)
1941 {
1942 	int	i;
1943 	apic_irq_t *irq_ptr;
1944 	ulong_t iflag;
1945 
1946 	iflag = intr_clear();
1947 	lock_set(&apic_ioapic_lock);
1948 
1949 	apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
1950 
1951 	i = apic_min_device_irq;
1952 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
1953 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
1954 			if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) {
1955 				(void) apic_rebind_all(irq_ptr,
1956 				    irq_ptr->airq_cpu);
1957 			}
1958 		}
1959 	}
1960 
1961 	lock_clear(&apic_ioapic_lock);
1962 	intr_restore(iflag);
1963 }
1964 
1965 
1966 /*
1967  * This function will reprogram the timer.
1968  *
1969  * When in oneshot mode the argument is the absolute time in future to
1970  * generate the interrupt at.
1971  *
1972  * When in periodic mode, the argument is the interval at which the
1973  * interrupts should be generated. There is no need to support the periodic
1974  * mode timer change at this time.
1975  */
1976 static void
1977 apic_timer_reprogram(hrtime_t time)
1978 {
1979 	hrtime_t now;
1980 	uint_t ticks;
1981 	int64_t delta;
1982 
1983 	/*
1984 	 * We should be called from high PIL context (CBE_HIGH_PIL),
1985 	 * so kpreempt is disabled.
1986 	 */
1987 
1988 	if (!apic_oneshot) {
1989 		/* time is the interval for periodic mode */
1990 		ticks = APIC_NSECS_TO_TICKS(time);
1991 	} else {
1992 		/* one shot mode */
1993 
1994 		now = gethrtime();
1995 		delta = time - now;
1996 
1997 		if (delta <= 0) {
1998 			/*
1999 			 * requested to generate an interrupt in the past
2000 			 * generate an interrupt as soon as possible
2001 			 */
2002 			ticks = apic_min_timer_ticks;
2003 		} else if (delta > apic_nsec_max) {
2004 			/*
2005 			 * requested to generate an interrupt at a time
2006 			 * further than what we are capable of. Set to max
2007 			 * the hardware can handle
2008 			 */
2009 
2010 			ticks = APIC_MAXVAL;
2011 #ifdef DEBUG
2012 			cmn_err(CE_CONT, "apic_timer_reprogram, request at"
2013 			    "  %lld  too far in future, current time"
2014 			    "  %lld \n", time, now);
2015 #endif
2016 		} else
2017 			ticks = APIC_NSECS_TO_TICKS(delta);
2018 	}
2019 
2020 	if (ticks < apic_min_timer_ticks)
2021 		ticks = apic_min_timer_ticks;
2022 
2023 	apic_reg_ops->apic_write(APIC_INIT_COUNT, ticks);
2024 }
2025 
2026 /*
2027  * This function will enable timer interrupts.
2028  */
2029 static void
2030 apic_timer_enable(void)
2031 {
2032 	/*
2033 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
2034 	 * so kpreempt is disabled.
2035 	 */
2036 
2037 	if (!apic_oneshot) {
2038 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2039 		    (apic_clkvect + APIC_BASE_VECT) | AV_TIME);
2040 	} else {
2041 		/* one shot */
2042 		apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2043 		    (apic_clkvect + APIC_BASE_VECT));
2044 	}
2045 }
2046 
2047 /*
2048  * This function will disable timer interrupts.
2049  */
2050 static void
2051 apic_timer_disable(void)
2052 {
2053 	/*
2054 	 * We should be Called from high PIL context (CBE_HIGH_PIL),
2055 	 * so kpreempt is disabled.
2056 	 */
2057 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER,
2058 	    (apic_clkvect + APIC_BASE_VECT) | AV_MASK);
2059 }
2060 
2061 /*
2062  * Set timer far into the future and return timer
2063  * current Count in nanoseconds.
2064  */
2065 hrtime_t
2066 apic_timer_stop_count(void)
2067 {
2068 	hrtime_t	ns_val;
2069 	int		enable_val, count_val;
2070 
2071 	/*
2072 	 * Should be called with interrupts disabled.
2073 	 */
2074 	ASSERT(!interrupts_enabled());
2075 
2076 	enable_val = apic_reg_ops->apic_read(APIC_LOCAL_TIMER);
2077 	if ((enable_val & AV_MASK) == AV_MASK)
2078 		return ((hrtime_t)-1);		/* timer is disabled */
2079 
2080 	count_val = apic_reg_ops->apic_read(APIC_CURR_COUNT);
2081 	ns_val = APIC_TICKS_TO_NSECS(count_val);
2082 
2083 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
2084 
2085 	return (ns_val);
2086 }
2087 
2088 /*
2089  * Reprogram timer after Deep C-State.
2090  */
2091 void
2092 apic_timer_restart(hrtime_t time)
2093 {
2094 	apic_timer_reprogram(time);
2095 }
2096 
2097 ddi_periodic_t apic_periodic_id;
2098 
2099 /*
2100  * If this module needs a periodic handler for the interrupt distribution, it
2101  * can be added here. The argument to the periodic handler is not currently
2102  * used, but is reserved for future.
2103  */
2104 static void
2105 apic_post_cyclic_setup(void *arg)
2106 {
2107 _NOTE(ARGUNUSED(arg))
2108 	/* cpu_lock is held */
2109 	/* set up a periodic handler for intr redistribution */
2110 
2111 	/*
2112 	 * In peridoc mode intr redistribution processing is done in
2113 	 * apic_intr_enter during clk intr processing
2114 	 */
2115 	if (!apic_oneshot)
2116 		return;
2117 	/*
2118 	 * Register a periodical handler for the redistribution processing.
2119 	 * On X86, CY_LOW_LEVEL is mapped to the level 2 interrupt, so
2120 	 * DDI_IPL_2 should be passed to ddi_periodic_add() here.
2121 	 */
2122 	apic_periodic_id = ddi_periodic_add(
2123 	    (void (*)(void *))apic_redistribute_compute, NULL,
2124 	    apic_redistribute_sample_interval, DDI_IPL_2);
2125 }
2126 
2127 static void
2128 apic_redistribute_compute(void)
2129 {
2130 	int	i, j, max_busy;
2131 
2132 	if (apic_enable_dynamic_migration) {
2133 		if (++apic_nticks == apic_sample_factor_redistribution) {
2134 			/*
2135 			 * Time to call apic_intr_redistribute().
2136 			 * reset apic_nticks. This will cause max_busy
2137 			 * to be calculated below and if it is more than
2138 			 * apic_int_busy, we will do the whole thing
2139 			 */
2140 			apic_nticks = 0;
2141 		}
2142 		max_busy = 0;
2143 		for (i = 0; i < apic_nproc; i++) {
2144 
2145 			/*
2146 			 * Check if curipl is non zero & if ISR is in
2147 			 * progress
2148 			 */
2149 			if (((j = apic_cpus[i].aci_curipl) != 0) &&
2150 			    (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
2151 
2152 				int	irq;
2153 				apic_cpus[i].aci_busy++;
2154 				irq = apic_cpus[i].aci_current[j];
2155 				apic_irq_table[irq]->airq_busy++;
2156 			}
2157 
2158 			if (!apic_nticks &&
2159 			    (apic_cpus[i].aci_busy > max_busy))
2160 				max_busy = apic_cpus[i].aci_busy;
2161 		}
2162 		if (!apic_nticks) {
2163 			if (max_busy > apic_int_busy_mark) {
2164 			/*
2165 			 * We could make the following check be
2166 			 * skipped > 1 in which case, we get a
2167 			 * redistribution at half the busy mark (due to
2168 			 * double interval). Need to be able to collect
2169 			 * more empirical data to decide if that is a
2170 			 * good strategy. Punt for now.
2171 			 */
2172 				if (apic_skipped_redistribute) {
2173 					apic_cleanup_busy();
2174 					apic_skipped_redistribute = 0;
2175 				} else {
2176 					apic_intr_redistribute();
2177 				}
2178 			} else
2179 				apic_skipped_redistribute++;
2180 		}
2181 	}
2182 }
2183 
2184 
2185 /*
2186  * The following functions are in the platform specific file so that they
2187  * can be different functions depending on whether we are running on
2188  * bare metal or a hypervisor.
2189  */
2190 
2191 /*
2192  * map an apic for memory-mapped access
2193  */
2194 uint32_t *
2195 mapin_apic(uint32_t addr, size_t len, int flags)
2196 {
2197 	/*LINTED: pointer cast may result in improper alignment */
2198 	return ((uint32_t *)psm_map_phys(addr, len, flags));
2199 }
2200 
2201 uint32_t *
2202 mapin_ioapic(uint32_t addr, size_t len, int flags)
2203 {
2204 	return (mapin_apic(addr, len, flags));
2205 }
2206 
2207 /*
2208  * unmap an apic
2209  */
2210 void
2211 mapout_apic(caddr_t addr, size_t len)
2212 {
2213 	psm_unmap_phys(addr, len);
2214 }
2215 
2216 void
2217 mapout_ioapic(caddr_t addr, size_t len)
2218 {
2219 	mapout_apic(addr, len);
2220 }
2221 
2222 /*
2223  * Check to make sure there are enough irq slots
2224  */
2225 int
2226 apic_check_free_irqs(int count)
2227 {
2228 	int i, avail;
2229 
2230 	avail = 0;
2231 	for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) {
2232 		if ((apic_irq_table[i] == NULL) ||
2233 		    apic_irq_table[i]->airq_mps_intr_index == FREE_INDEX) {
2234 			if (++avail >= count)
2235 				return (PSM_SUCCESS);
2236 		}
2237 	}
2238 	return (PSM_FAILURE);
2239 }
2240 
2241 /*
2242  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
2243  */
2244 int
2245 apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
2246     int behavior)
2247 {
2248 	int	rcount, i;
2249 	uchar_t	start, irqno;
2250 	uint32_t cpu;
2251 	major_t	major;
2252 	apic_irq_t	*irqptr;
2253 
2254 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
2255 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
2256 	    (void *)dip, inum, pri, count, behavior));
2257 
2258 	if (count > 1) {
2259 		if (behavior == DDI_INTR_ALLOC_STRICT &&
2260 		    apic_multi_msi_enable == 0)
2261 			return (0);
2262 		if (apic_multi_msi_enable == 0)
2263 			count = 1;
2264 	}
2265 
2266 	if ((rcount = apic_navail_vector(dip, pri)) > count)
2267 		rcount = count;
2268 	else if (rcount == 0 || (rcount < count &&
2269 	    behavior == DDI_INTR_ALLOC_STRICT))
2270 		return (0);
2271 
2272 	/* if not ISP2, then round it down */
2273 	if (!ISP2(rcount))
2274 		rcount = 1 << (highbit(rcount) - 1);
2275 
2276 	mutex_enter(&airq_mutex);
2277 
2278 	for (start = 0; rcount > 0; rcount >>= 1) {
2279 		if ((start = apic_find_multi_vectors(pri, rcount)) != 0 ||
2280 		    behavior == DDI_INTR_ALLOC_STRICT)
2281 			break;
2282 	}
2283 
2284 	if (start == 0) {
2285 		/* no vector available */
2286 		mutex_exit(&airq_mutex);
2287 		return (0);
2288 	}
2289 
2290 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
2291 		/* not enough free irq slots available */
2292 		mutex_exit(&airq_mutex);
2293 		return (0);
2294 	}
2295 
2296 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
2297 	for (i = 0; i < rcount; i++) {
2298 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
2299 		    (uchar_t)-1) {
2300 			/*
2301 			 * shouldn't happen because of the
2302 			 * apic_check_free_irqs() check earlier
2303 			 */
2304 			mutex_exit(&airq_mutex);
2305 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
2306 			    "apic_allocate_irq failed\n"));
2307 			return (i);
2308 		}
2309 		apic_max_device_irq = max(irqno, apic_max_device_irq);
2310 		apic_min_device_irq = min(irqno, apic_min_device_irq);
2311 		irqptr = apic_irq_table[irqno];
2312 #ifdef	DEBUG
2313 		if (apic_vector_to_irq[start + i] != APIC_RESV_IRQ)
2314 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
2315 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
2316 #endif
2317 		apic_vector_to_irq[start + i] = (uchar_t)irqno;
2318 
2319 		irqptr->airq_vector = (uchar_t)(start + i);
2320 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
2321 		irqptr->airq_intin_no = (uchar_t)rcount;
2322 		irqptr->airq_ipl = pri;
2323 		irqptr->airq_vector = start + i;
2324 		irqptr->airq_origirq = (uchar_t)(inum + i);
2325 		irqptr->airq_share_id = 0;
2326 		irqptr->airq_mps_intr_index = MSI_INDEX;
2327 		irqptr->airq_dip = dip;
2328 		irqptr->airq_major = major;
2329 		if (i == 0) /* they all bound to the same cpu */
2330 			cpu = irqptr->airq_cpu = apic_bind_intr(dip, irqno,
2331 			    0xff, 0xff);
2332 		else
2333 			irqptr->airq_cpu = cpu;
2334 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
2335 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
2336 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
2337 		    irqptr->airq_origirq, pri));
2338 	}
2339 	mutex_exit(&airq_mutex);
2340 	return (rcount);
2341 }
2342 
2343 /*
2344  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
2345  */
2346 int
2347 apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
2348     int behavior)
2349 {
2350 	int	rcount, i;
2351 	major_t	major;
2352 
2353 	mutex_enter(&airq_mutex);
2354 
2355 	if ((rcount = apic_navail_vector(dip, pri)) > count)
2356 		rcount = count;
2357 	else if (rcount == 0 || (rcount < count &&
2358 	    behavior == DDI_INTR_ALLOC_STRICT)) {
2359 		rcount = 0;
2360 		goto out;
2361 	}
2362 
2363 	if (apic_check_free_irqs(rcount) == PSM_FAILURE) {
2364 		/* not enough free irq slots available */
2365 		rcount = 0;
2366 		goto out;
2367 	}
2368 
2369 	major = (dip != NULL) ? ddi_driver_major(dip) : 0;
2370 	for (i = 0; i < rcount; i++) {
2371 		uchar_t	vector, irqno;
2372 		apic_irq_t	*irqptr;
2373 
2374 		if ((irqno = apic_allocate_irq(apic_first_avail_irq)) ==
2375 		    (uchar_t)-1) {
2376 			/*
2377 			 * shouldn't happen because of the
2378 			 * apic_check_free_irqs() check earlier
2379 			 */
2380 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
2381 			    "apic_allocate_irq failed\n"));
2382 			rcount = i;
2383 			goto out;
2384 		}
2385 		if ((vector = apic_allocate_vector(pri, irqno, 1)) == 0) {
2386 			/*
2387 			 * shouldn't happen because of the
2388 			 * apic_navail_vector() call earlier
2389 			 */
2390 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msix_vectors: "
2391 			    "apic_allocate_vector failed\n"));
2392 			rcount = i;
2393 			goto out;
2394 		}
2395 		apic_max_device_irq = max(irqno, apic_max_device_irq);
2396 		apic_min_device_irq = min(irqno, apic_min_device_irq);
2397 		irqptr = apic_irq_table[irqno];
2398 		irqptr->airq_vector = (uchar_t)vector;
2399 		irqptr->airq_ipl = pri;
2400 		irqptr->airq_origirq = (uchar_t)(inum + i);
2401 		irqptr->airq_share_id = 0;
2402 		irqptr->airq_mps_intr_index = MSIX_INDEX;
2403 		irqptr->airq_dip = dip;
2404 		irqptr->airq_major = major;
2405 		irqptr->airq_cpu = apic_bind_intr(dip, irqno, 0xff, 0xff);
2406 	}
2407 out:
2408 	mutex_exit(&airq_mutex);
2409 	return (rcount);
2410 }
2411 
2412 /*
2413  * Allocate a free vector for irq at ipl. Takes care of merging of multiple
2414  * IPLs into a single APIC level as well as stretching some IPLs onto multiple
2415  * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority
2416  * requests and allocated only when pri is set.
2417  */
2418 uchar_t
2419 apic_allocate_vector(int ipl, int irq, int pri)
2420 {
2421 	int	lowest, highest, i;
2422 
2423 	highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK;
2424 	lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL;
2425 
2426 	if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */
2427 		lowest -= APIC_VECTOR_PER_IPL;
2428 
2429 #ifdef	DEBUG
2430 	if (apic_restrict_vector)	/* for testing shared interrupt logic */
2431 		highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS;
2432 #endif /* DEBUG */
2433 	if (pri == 0)
2434 		highest -= APIC_HI_PRI_VECTS;
2435 
2436 	for (i = lowest; i < highest; i++) {
2437 		if (APIC_CHECK_RESERVE_VECTORS(i))
2438 			continue;
2439 		if (apic_vector_to_irq[i] == APIC_RESV_IRQ) {
2440 			apic_vector_to_irq[i] = (uchar_t)irq;
2441 			return (i);
2442 		}
2443 	}
2444 
2445 	return (0);
2446 }
2447 
2448 /* Mark vector as not being used by any irq */
2449 void
2450 apic_free_vector(uchar_t vector)
2451 {
2452 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
2453 }
2454 
2455 uint32_t
2456 ioapic_read(int ioapic_ix, uint32_t reg)
2457 {
2458 	volatile uint32_t *ioapic;
2459 
2460 	ioapic = apicioadr[ioapic_ix];
2461 	ioapic[APIC_IO_REG] = reg;
2462 	return (ioapic[APIC_IO_DATA]);
2463 }
2464 
2465 void
2466 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
2467 {
2468 	volatile uint32_t *ioapic;
2469 
2470 	ioapic = apicioadr[ioapic_ix];
2471 	ioapic[APIC_IO_REG] = reg;
2472 	ioapic[APIC_IO_DATA] = value;
2473 }
2474 
2475 void
2476 ioapic_write_eoi(int ioapic_ix, uint32_t value)
2477 {
2478 	volatile uint32_t *ioapic;
2479 
2480 	ioapic = apicioadr[ioapic_ix];
2481 	ioapic[APIC_IO_EOI] = value;
2482 }
2483 
2484 static processorid_t
2485 apic_find_cpu(int flag)
2486 {
2487 	processorid_t acid = 0;
2488 	int i;
2489 
2490 	/* Find the first CPU with the passed-in flag set */
2491 	for (i = 0; i < apic_nproc; i++) {
2492 		if (apic_cpus[i].aci_status & flag) {
2493 			acid = i;
2494 			break;
2495 		}
2496 	}
2497 
2498 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
2499 	return (acid);
2500 }
2501 
2502 /*
2503  * Call rebind to do the actual programming.
2504  * Must be called with interrupts disabled and apic_ioapic_lock held
2505  * 'p' is polymorphic -- if this function is called to process a deferred
2506  * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which
2507  * the irq pointer is retrieved.  If not doing deferred reprogramming,
2508  * p is of the type 'apic_irq_t *'.
2509  *
2510  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
2511  * and it protects apic_find_cpu() from a race in which a CPU can be taken
2512  * offline after a cpu is selected, but before apic_rebind is called to
2513  * bind interrupts to it.
2514  */
2515 int
2516 apic_setup_io_intr(void *p, int irq, boolean_t deferred)
2517 {
2518 	apic_irq_t *irqptr;
2519 	struct ioapic_reprogram_data *drep = NULL;
2520 	int rv;
2521 
2522 	if (deferred) {
2523 		drep = (struct ioapic_reprogram_data *)p;
2524 		ASSERT(drep != NULL);
2525 		irqptr = drep->irqp;
2526 	} else
2527 		irqptr = (apic_irq_t *)p;
2528 
2529 	ASSERT(irqptr != NULL);
2530 
2531 	rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep);
2532 	if (rv) {
2533 		/*
2534 		 * CPU is not up or interrupts are disabled. Fall back to
2535 		 * the first available CPU
2536 		 */
2537 		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
2538 		    drep);
2539 	}
2540 
2541 	return (rv);
2542 }
2543 
2544 
2545 uchar_t
2546 apic_modify_vector(uchar_t vector, int irq)
2547 {
2548 	apic_vector_to_irq[vector] = (uchar_t)irq;
2549 	return (vector);
2550 }
2551 
2552 char *
2553 apic_get_apic_type()
2554 {
2555 	return (apic_psm_info.p_mach_idstring);
2556 }
2557 
2558 void
2559 x2apic_update_psm()
2560 {
2561 	struct psm_ops *pops = &apic_ops;
2562 
2563 	ASSERT(pops != NULL);
2564 
2565 	/*
2566 	 * We don't need to do any magic if one of the following
2567 	 * conditions is true :
2568 	 * - Not being run under kernel debugger.
2569 	 * - MP is not set.
2570 	 * - Booted with one CPU only.
2571 	 * - One CPU configured.
2572 	 *
2573 	 * We set apic_common_send_ipi() since kernel debuggers
2574 	 * attempt to send IPIs to other slave CPUs during
2575 	 * entry (exit) from (to) debugger.
2576 	 */
2577 	if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
2578 	    apic_nproc == 1 || boot_ncpus == 1) {
2579 		pops->psm_send_ipi =  x2apic_send_ipi;
2580 	} else {
2581 		pops->psm_send_ipi =  apic_common_send_ipi;
2582 	}
2583 
2584 	pops->psm_intr_exit = x2apic_intr_exit;
2585 	pops->psm_setspl = x2apic_setspl;
2586 
2587 	send_dirintf = pops->psm_send_ipi;
2588 
2589 	apic_mode = LOCAL_X2APIC;
2590 	apic_change_ops();
2591 }
2592 
2593 static void
2594 apic_intrr_init(int apic_mode)
2595 {
2596 	int suppress_brdcst_eoi = 0;
2597 
2598 	if (psm_vt_ops != NULL) {
2599 		/*
2600 		 * Since X2APIC requires the use of interrupt remapping
2601 		 * (though this is not documented explicitly in the Intel
2602 		 * documentation (yet)), initialize interrupt remapping
2603 		 * support before initializing the X2APIC unit.
2604 		 */
2605 		if (((apic_intrr_ops_t *)psm_vt_ops)->apic_intrr_init(apic_mode)
2606 		    == DDI_SUCCESS) {
2607 			apic_vt_ops = psm_vt_ops;
2608 
2609 			/*
2610 			 * We leverage the interrupt remapping engine to
2611 			 * suppress broadcast EOI; thus we must send the
2612 			 * directed EOI with the directed-EOI handler.
2613 			 */
2614 			if (apic_directed_EOI_supported() == 0) {
2615 				suppress_brdcst_eoi = 1;
2616 			}
2617 
2618 			apic_vt_ops->apic_intrr_enable(suppress_brdcst_eoi);
2619 
2620 			if (apic_detect_x2apic()) {
2621 				apic_enable_x2apic();
2622 			}
2623 
2624 			if (apic_directed_EOI_supported() == 0) {
2625 				apic_set_directed_EOI_handler();
2626 			}
2627 		}
2628 	}
2629 }
2630 
2631 /*ARGSUSED*/
2632 static void
2633 apic_record_ioapic_rdt(apic_irq_t *irq_ptr, ioapic_rdt_t *irdt)
2634 {
2635 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
2636 }
2637 
2638 /*ARGSUSED*/
2639 static void
2640 apic_record_msi(apic_irq_t *irq_ptr, msi_regs_t *mregs)
2641 {
2642 	mregs->mr_addr = MSI_ADDR_HDR |
2643 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
2644 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
2645 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
2646 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
2647 	    mregs->mr_data;
2648 }
2649