xref: /illumos-gate/usr/src/uts/i86pc/io/pcplusmp/apic_common.c (revision 96b6509c49b81cb0d89ec222d92d421d946caa0c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
27  * Copyright (c) 2016 by Delphix. All rights reserved.
28  */
29 
30 /*
31  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
32  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
33  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
34  * PSMI 1.5 extensions are supported in Solaris Nevada.
35  * PSMI 1.6 extensions are supported in Solaris Nevada.
36  * PSMI 1.7 extensions are supported in Solaris Nevada.
37  */
38 #define	PSMI_1_7
39 
40 #include <sys/processor.h>
41 #include <sys/time.h>
42 #include <sys/psm.h>
43 #include <sys/smp_impldefs.h>
44 #include <sys/cram.h>
45 #include <sys/acpi/acpi.h>
46 #include <sys/acpica.h>
47 #include <sys/psm_common.h>
48 #include <sys/apic.h>
49 #include <sys/pit.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/ddi_impldefs.h>
53 #include <sys/pci.h>
54 #include <sys/promif.h>
55 #include <sys/x86_archext.h>
56 #include <sys/cpc_impl.h>
57 #include <sys/uadmin.h>
58 #include <sys/panic.h>
59 #include <sys/debug.h>
60 #include <sys/archsystm.h>
61 #include <sys/trap.h>
62 #include <sys/machsystm.h>
63 #include <sys/sysmacros.h>
64 #include <sys/cpuvar.h>
65 #include <sys/rm_platter.h>
66 #include <sys/privregs.h>
67 #include <sys/note.h>
68 #include <sys/pci_intr_lib.h>
69 #include <sys/spl.h>
70 #include <sys/clock.h>
71 #include <sys/dditypes.h>
72 #include <sys/sunddi.h>
73 #include <sys/x_call.h>
74 #include <sys/reboot.h>
75 #include <sys/hpet.h>
76 #include <sys/apic_common.h>
77 #include <sys/apic_timer.h>
78 
79 static void	apic_record_ioapic_rdt(void *intrmap_private,
80 		    ioapic_rdt_t *irdt);
81 static void	apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
82 
83 /*
84  * Common routines between pcplusmp & apix (taken from apic.c).
85  */
86 
87 int	apic_clkinit(int);
88 hrtime_t apic_gethrtime(void);
89 void	apic_send_ipi(int, int);
90 void	apic_set_idlecpu(processorid_t);
91 void	apic_unset_idlecpu(processorid_t);
92 void	apic_shutdown(int, int);
93 void	apic_preshutdown(int, int);
94 processorid_t	apic_get_next_processorid(processorid_t);
95 
96 hrtime_t apic_gettime();
97 
98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
99 
100 /* Now the ones for Dynamic Interrupt distribution */
101 int	apic_enable_dynamic_migration = 0;
102 
103 /* maximum loop count when sending Start IPIs. */
104 int apic_sipi_max_loop_count = 0x1000;
105 
106 /*
107  * These variables are frequently accessed in apic_intr_enter(),
108  * apic_intr_exit and apic_setspl, so group them together
109  */
110 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
111 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
112 int apic_clkvect;
113 
114 /* vector at which error interrupts come in */
115 int apic_errvect;
116 int apic_enable_error_intr = 1;
117 int apic_error_display_delay = 100;
118 
119 /* vector at which performance counter overflow interrupts come in */
120 int apic_cpcovf_vect;
121 int apic_enable_cpcovf_intr = 1;
122 
123 /* vector at which CMCI interrupts come in */
124 int apic_cmci_vect;
125 extern int cmi_enable_cmci;
126 extern void cmi_cmci_trap(void);
127 
128 kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
129 int cmci_cpu_setup_registered;
130 
131 lock_t apic_mode_switch_lock;
132 
133 /*
134  * Patchable global variables.
135  */
136 int	apic_forceload = 0;
137 
138 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
139 
140 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
141 int	apic_panic_on_nmi = 0;
142 int	apic_panic_on_apic_error = 0;
143 
144 int	apic_verbose = 0;	/* 0x1ff */
145 
146 #ifdef DEBUG
147 int	apic_debug = 0;
148 int	apic_restrict_vector = 0;
149 
150 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
151 int	apic_debug_msgbufindex = 0;
152 
153 #endif /* DEBUG */
154 
155 uint_t apic_nticks = 0;
156 uint_t apic_skipped_redistribute = 0;
157 
158 uint_t last_count_read = 0;
159 lock_t	apic_gethrtime_lock;
160 volatile int	apic_hrtime_stamp = 0;
161 volatile hrtime_t apic_nsec_since_boot = 0;
162 
163 static	hrtime_t	apic_last_hrtime = 0;
164 int		apic_hrtime_error = 0;
165 int		apic_remote_hrterr = 0;
166 int		apic_num_nmis = 0;
167 int		apic_apic_error = 0;
168 int		apic_num_apic_errors = 0;
169 int		apic_num_cksum_errors = 0;
170 
171 int	apic_error = 0;
172 
173 static	int	apic_cmos_ssb_set = 0;
174 
175 /* use to make sure only one cpu handles the nmi */
176 lock_t	apic_nmi_lock;
177 /* use to make sure only one cpu handles the error interrupt */
178 lock_t	apic_error_lock;
179 
180 static	struct {
181 	uchar_t	cntl;
182 	uchar_t	data;
183 } aspen_bmc[] = {
184 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
185 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
186 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
187 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
188 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
189 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
190 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
191 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
192 
193 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
194 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
195 };
196 
197 static	struct {
198 	int	port;
199 	uchar_t	data;
200 } sitka_bmc[] = {
201 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
202 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
203 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
204 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
205 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
206 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
207 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
208 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
209 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
210 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
211 
212 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
213 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
214 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
215 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
216 };
217 
218 /* Patchable global variables. */
219 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
220 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
221 
222 /* default apic ops without interrupt remapping */
223 static apic_intrmap_ops_t apic_nointrmap_ops = {
224 	(int (*)(int))return_instr,
225 	(void (*)(int))return_instr,
226 	(void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
227 	(void (*)(void *, void *, uint16_t, int))return_instr,
228 	(void (*)(void **))return_instr,
229 	apic_record_ioapic_rdt,
230 	apic_record_msi,
231 };
232 
233 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
234 apic_cpus_info_t	*apic_cpus = NULL;
235 cpuset_t	apic_cpumask;
236 uint_t		apic_picinit_called;
237 
238 /* Flag to indicate that we need to shut down all processors */
239 static uint_t	apic_shutdown_processors;
240 
241 /*
242  * Probe the ioapic method for apix module. Called in apic_probe_common()
243  */
244 int
245 apic_ioapic_method_probe()
246 {
247 	if (apix_enable == 0)
248 		return (PSM_SUCCESS);
249 
250 	/*
251 	 * Set IOAPIC EOI handling method. The priority from low to high is:
252 	 * 	1. IOxAPIC: with EOI register
253 	 * 	2. IOMMU interrupt mapping
254 	 *	3. Mask-Before-EOI method for systems without boot
255 	 *	interrupt routing, such as systems with only one IOAPIC;
256 	 *	NVIDIA CK8-04/MCP55 systems; systems with bridge solution
257 	 *	which disables the boot interrupt routing already.
258 	 * 	4. Directed EOI
259 	 */
260 	if (apic_io_ver[0] >= 0x20)
261 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
262 	if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
263 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
264 	if (apic_directed_EOI_supported())
265 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
266 
267 	/* fall back to pcplusmp */
268 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
269 		/* make sure apix is after pcplusmp in /etc/mach */
270 		apix_enable = 0; /* go ahead with pcplusmp install next */
271 		return (PSM_FAILURE);
272 	}
273 
274 	return (PSM_SUCCESS);
275 }
276 
277 /*
278  * handler for APIC Error interrupt. Just print a warning and continue
279  */
280 int
281 apic_error_intr()
282 {
283 	uint_t	error0, error1, error;
284 	uint_t	i;
285 
286 	/*
287 	 * We need to write before read as per 7.4.17 of system prog manual.
288 	 * We do both and or the results to be safe
289 	 */
290 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
291 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
292 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
293 	error = error0 | error1;
294 
295 	/*
296 	 * Clear the APIC error status (do this on all cpus that enter here)
297 	 * (two writes are required due to the semantics of accessing the
298 	 * error status register.)
299 	 */
300 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
301 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
302 
303 	/*
304 	 * Prevent more than 1 CPU from handling error interrupt causing
305 	 * double printing (interleave of characters from multiple
306 	 * CPU's when using prom_printf)
307 	 */
308 	if (lock_try(&apic_error_lock) == 0)
309 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
310 	if (error) {
311 #if	DEBUG
312 		if (apic_debug)
313 			debug_enter("pcplusmp: APIC Error interrupt received");
314 #endif /* DEBUG */
315 		if (apic_panic_on_apic_error)
316 			cmn_err(CE_PANIC,
317 			    "APIC Error interrupt on CPU %d. Status = %x",
318 			    psm_get_cpu_id(), error);
319 		else {
320 			if ((error & ~APIC_CS_ERRORS) == 0) {
321 				/* cksum error only */
322 				apic_error |= APIC_ERR_APIC_ERROR;
323 				apic_apic_error |= error;
324 				apic_num_apic_errors++;
325 				apic_num_cksum_errors++;
326 			} else {
327 				/*
328 				 * prom_printf is the best shot we have of
329 				 * something which is problem free from
330 				 * high level/NMI type of interrupts
331 				 */
332 				prom_printf("APIC Error interrupt on CPU %d. "
333 				    "Status 0 = %x, Status 1 = %x\n",
334 				    psm_get_cpu_id(), error0, error1);
335 				apic_error |= APIC_ERR_APIC_ERROR;
336 				apic_apic_error |= error;
337 				apic_num_apic_errors++;
338 				for (i = 0; i < apic_error_display_delay; i++) {
339 					tenmicrosec();
340 				}
341 				/*
342 				 * provide more delay next time limited to
343 				 * roughly 1 clock tick time
344 				 */
345 				if (apic_error_display_delay < 500)
346 					apic_error_display_delay *= 2;
347 			}
348 		}
349 		lock_clear(&apic_error_lock);
350 		return (DDI_INTR_CLAIMED);
351 	} else {
352 		lock_clear(&apic_error_lock);
353 		return (DDI_INTR_UNCLAIMED);
354 	}
355 }
356 
357 /*
358  * Turn off the mask bit in the performance counter Local Vector Table entry.
359  */
360 void
361 apic_cpcovf_mask_clear(void)
362 {
363 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
364 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
365 }
366 
367 /*ARGSUSED*/
368 static int
369 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
370 {
371 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
372 	return (0);
373 }
374 
375 /*ARGSUSED*/
376 static int
377 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
378 {
379 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
380 	return (0);
381 }
382 
383 /*ARGSUSED*/
384 int
385 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
386 {
387 	cpuset_t	cpu_set;
388 
389 	CPUSET_ONLY(cpu_set, cpuid);
390 
391 	switch (what) {
392 		case CPU_ON:
393 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
394 			    (xc_func_t)apic_cmci_enable);
395 			break;
396 
397 		case CPU_OFF:
398 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
399 			    (xc_func_t)apic_cmci_disable);
400 			break;
401 
402 		default:
403 			break;
404 	}
405 
406 	return (0);
407 }
408 
409 static void
410 apic_disable_local_apic(void)
411 {
412 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
413 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
414 
415 	/* local intr reg 0 */
416 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
417 
418 	/* disable NMI */
419 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
420 
421 	/* and error interrupt */
422 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
423 
424 	/* and perf counter intr */
425 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
426 
427 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
428 }
429 
430 static void
431 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
432 {
433 	int		loop_count;
434 	uint32_t	vector;
435 	uint_t		apicid;
436 	ulong_t		iflag;
437 
438 	apicid =  apic_cpus[cpun].aci_local_id;
439 
440 	/*
441 	 * Interrupts on current CPU will be disabled during the
442 	 * steps in order to avoid unwanted side effects from
443 	 * executing interrupt handlers on a problematic BIOS.
444 	 */
445 	iflag = intr_clear();
446 
447 	if (start) {
448 		outb(CMOS_ADDR, SSB);
449 		outb(CMOS_DATA, BIOS_SHUTDOWN);
450 	}
451 
452 	/*
453 	 * According to X2APIC specification in section '2.3.5.1' of
454 	 * Interrupt Command Register Semantics, the semantics of
455 	 * programming the Interrupt Command Register to dispatch an interrupt
456 	 * is simplified. A single MSR write to the 64-bit ICR is required
457 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
458 	 * interface to ICR, system software is not required to check the
459 	 * status of the delivery status bit prior to writing to the ICR
460 	 * to send an IPI. With the removal of the Delivery Status bit,
461 	 * system software no longer has a reason to read the ICR. It remains
462 	 * readable only to aid in debugging.
463 	 */
464 #ifdef	DEBUG
465 	APIC_AV_PENDING_SET();
466 #else
467 	if (apic_mode == LOCAL_APIC) {
468 		APIC_AV_PENDING_SET();
469 	}
470 #endif /* DEBUG */
471 
472 	/* for integrated - make sure there is one INIT IPI in buffer */
473 	/* for external - it will wake up the cpu */
474 	apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
475 
476 	/* If only 1 CPU is installed, PENDING bit will not go low */
477 	for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
478 		if (apic_mode == LOCAL_APIC &&
479 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
480 			apic_ret();
481 		else
482 			break;
483 	}
484 
485 	apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
486 	drv_usecwait(20000);		/* 20 milli sec */
487 
488 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
489 		/* integrated apic */
490 
491 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
492 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
493 
494 		/* to offset the INIT IPI queue up in the buffer */
495 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
496 		drv_usecwait(200);		/* 20 micro sec */
497 
498 		/*
499 		 * send the second SIPI (Startup IPI) as recommended by Intel
500 		 * software development manual.
501 		 */
502 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
503 		drv_usecwait(200);	/* 20 micro sec */
504 	}
505 
506 	intr_restore(iflag);
507 }
508 
509 /*ARGSUSED1*/
510 int
511 apic_cpu_start(processorid_t cpun, caddr_t arg)
512 {
513 	ASSERT(MUTEX_HELD(&cpu_lock));
514 
515 	if (!apic_cpu_in_range(cpun)) {
516 		return (EINVAL);
517 	}
518 
519 	/*
520 	 * Switch to apic_common_send_ipi for safety during starting other CPUs.
521 	 */
522 	if (apic_mode == LOCAL_X2APIC) {
523 		apic_switch_ipi_callback(B_TRUE);
524 	}
525 
526 	apic_cmos_ssb_set = 1;
527 	apic_cpu_send_SIPI(cpun, B_TRUE);
528 
529 	return (0);
530 }
531 
532 /*
533  * Put CPU into halted state with interrupts disabled.
534  */
535 /*ARGSUSED1*/
536 int
537 apic_cpu_stop(processorid_t cpun, caddr_t arg)
538 {
539 	int		rc;
540 	cpu_t 		*cp;
541 	extern cpuset_t cpu_ready_set;
542 	extern void cpu_idle_intercept_cpu(cpu_t *cp);
543 
544 	ASSERT(MUTEX_HELD(&cpu_lock));
545 
546 	if (!apic_cpu_in_range(cpun)) {
547 		return (EINVAL);
548 	}
549 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
550 		return (ENOTSUP);
551 	}
552 
553 	cp = cpu_get(cpun);
554 	ASSERT(cp != NULL);
555 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
556 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
557 	ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
558 
559 	/* Clear CPU_READY flag to disable cross calls. */
560 	cp->cpu_flags &= ~CPU_READY;
561 	CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
562 	rc = xc_flush_cpu(cp);
563 	if (rc != 0) {
564 		CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
565 		cp->cpu_flags |= CPU_READY;
566 		return (rc);
567 	}
568 
569 	/* Intercept target CPU at a safe point before powering it off. */
570 	cpu_idle_intercept_cpu(cp);
571 
572 	apic_cpu_send_SIPI(cpun, B_FALSE);
573 	cp->cpu_flags &= ~CPU_RUNNING;
574 
575 	return (0);
576 }
577 
578 int
579 apic_cpu_ops(psm_cpu_request_t *reqp)
580 {
581 	if (reqp == NULL) {
582 		return (EINVAL);
583 	}
584 
585 	switch (reqp->pcr_cmd) {
586 	case PSM_CPU_ADD:
587 		return (apic_cpu_add(reqp));
588 
589 	case PSM_CPU_REMOVE:
590 		return (apic_cpu_remove(reqp));
591 
592 	case PSM_CPU_STOP:
593 		return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
594 		    reqp->req.cpu_stop.ctx));
595 
596 	default:
597 		return (ENOTSUP);
598 	}
599 }
600 
601 #ifdef	DEBUG
602 int	apic_break_on_cpu = 9;
603 int	apic_stretch_interrupts = 0;
604 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
605 #endif /* DEBUG */
606 
607 /*
608  * generates an interprocessor interrupt to another CPU. Any changes made to
609  * this routine must be accompanied by similar changes to
610  * apic_common_send_ipi().
611  */
612 void
613 apic_send_ipi(int cpun, int ipl)
614 {
615 	int vector;
616 	ulong_t flag;
617 
618 	vector = apic_resv_vector[ipl];
619 
620 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
621 
622 	flag = intr_clear();
623 
624 	APIC_AV_PENDING_SET();
625 
626 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
627 	    vector);
628 
629 	intr_restore(flag);
630 }
631 
632 
633 /*ARGSUSED*/
634 void
635 apic_set_idlecpu(processorid_t cpun)
636 {
637 }
638 
639 /*ARGSUSED*/
640 void
641 apic_unset_idlecpu(processorid_t cpun)
642 {
643 }
644 
645 
646 void
647 apic_ret()
648 {
649 }
650 
651 /*
652  * If apic_coarse_time == 1, then apic_gettime() is used instead of
653  * apic_gethrtime().  This is used for performance instead of accuracy.
654  */
655 
656 hrtime_t
657 apic_gettime()
658 {
659 	int old_hrtime_stamp;
660 	hrtime_t temp;
661 
662 	/*
663 	 * In one-shot mode, we do not keep time, so if anyone
664 	 * calls psm_gettime() directly, we vector over to
665 	 * gethrtime().
666 	 * one-shot mode MUST NOT be enabled if this psm is the source of
667 	 * hrtime.
668 	 */
669 
670 	if (apic_oneshot)
671 		return (gethrtime());
672 
673 
674 gettime_again:
675 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
676 		apic_ret();
677 
678 	temp = apic_nsec_since_boot;
679 
680 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
681 		goto gettime_again;
682 	}
683 	return (temp);
684 }
685 
686 /*
687  * Here we return the number of nanoseconds since booting.  Note every
688  * clock interrupt increments apic_nsec_since_boot by the appropriate
689  * amount.
690  */
691 hrtime_t
692 apic_gethrtime(void)
693 {
694 	int curr_timeval, countval, elapsed_ticks;
695 	int old_hrtime_stamp, status;
696 	hrtime_t temp;
697 	uint32_t cpun;
698 	ulong_t oflags;
699 
700 	/*
701 	 * In one-shot mode, we do not keep time, so if anyone
702 	 * calls psm_gethrtime() directly, we vector over to
703 	 * gethrtime().
704 	 * one-shot mode MUST NOT be enabled if this psm is the source of
705 	 * hrtime.
706 	 */
707 
708 	if (apic_oneshot)
709 		return (gethrtime());
710 
711 	oflags = intr_clear();	/* prevent migration */
712 
713 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
714 	if (apic_mode == LOCAL_APIC)
715 		cpun >>= APIC_ID_BIT_OFFSET;
716 
717 	lock_set(&apic_gethrtime_lock);
718 
719 gethrtime_again:
720 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
721 		apic_ret();
722 
723 	/*
724 	 * Check to see which CPU we are on.  Note the time is kept on
725 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
726 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
727 	 */
728 	if (cpun == apic_cpus[0].aci_local_id) {
729 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
730 	} else {
731 #ifdef	DEBUG
732 		APIC_AV_PENDING_SET();
733 #else
734 		if (apic_mode == LOCAL_APIC)
735 			APIC_AV_PENDING_SET();
736 #endif /* DEBUG */
737 
738 		apic_reg_ops->apic_write_int_cmd(
739 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
740 
741 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
742 		    & AV_READ_PENDING) {
743 			apic_ret();
744 		}
745 
746 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
747 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
748 		else {	/* 0 = invalid */
749 			apic_remote_hrterr++;
750 			/*
751 			 * return last hrtime right now, will need more
752 			 * testing if change to retry
753 			 */
754 			temp = apic_last_hrtime;
755 
756 			lock_clear(&apic_gethrtime_lock);
757 
758 			intr_restore(oflags);
759 
760 			return (temp);
761 		}
762 	}
763 	if (countval > last_count_read)
764 		countval = 0;
765 	else
766 		last_count_read = countval;
767 
768 	elapsed_ticks = apic_hertz_count - countval;
769 
770 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
771 	temp = apic_nsec_since_boot + curr_timeval;
772 
773 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
774 		/* we might have clobbered last_count_read. Restore it */
775 		last_count_read = apic_hertz_count;
776 		goto gethrtime_again;
777 	}
778 
779 	if (temp < apic_last_hrtime) {
780 		/* return last hrtime if error occurs */
781 		apic_hrtime_error++;
782 		temp = apic_last_hrtime;
783 	}
784 	else
785 		apic_last_hrtime = temp;
786 
787 	lock_clear(&apic_gethrtime_lock);
788 	intr_restore(oflags);
789 
790 	return (temp);
791 }
792 
793 /* apic NMI handler */
794 /*ARGSUSED*/
795 void
796 apic_nmi_intr(caddr_t arg, struct regs *rp)
797 {
798 	if (apic_shutdown_processors) {
799 		apic_disable_local_apic();
800 		return;
801 	}
802 
803 	apic_error |= APIC_ERR_NMI;
804 
805 	if (!lock_try(&apic_nmi_lock))
806 		return;
807 	apic_num_nmis++;
808 
809 	if (apic_kmdb_on_nmi && psm_debugger()) {
810 		debug_enter("NMI received: entering kmdb\n");
811 	} else if (apic_panic_on_nmi) {
812 		/* Keep panic from entering kmdb. */
813 		nopanicdebug = 1;
814 		panic("NMI received\n");
815 	} else {
816 		/*
817 		 * prom_printf is the best shot we have of something which is
818 		 * problem free from high level/NMI type of interrupts
819 		 */
820 		prom_printf("NMI received\n");
821 	}
822 
823 	lock_clear(&apic_nmi_lock);
824 }
825 
826 processorid_t
827 apic_get_next_processorid(processorid_t cpu_id)
828 {
829 
830 	int i;
831 
832 	if (cpu_id == -1)
833 		return ((processorid_t)0);
834 
835 	for (i = cpu_id + 1; i < NCPU; i++) {
836 		if (apic_cpu_in_range(i))
837 			return (i);
838 	}
839 
840 	return ((processorid_t)-1);
841 }
842 
843 int
844 apic_cpu_add(psm_cpu_request_t *reqp)
845 {
846 	int i, rv = 0;
847 	ulong_t iflag;
848 	boolean_t first = B_TRUE;
849 	uchar_t localver = 0;
850 	uint32_t localid, procid;
851 	processorid_t cpuid = (processorid_t)-1;
852 	mach_cpu_add_arg_t *ap;
853 
854 	ASSERT(reqp != NULL);
855 	reqp->req.cpu_add.cpuid = (processorid_t)-1;
856 
857 	/* Check whether CPU hotplug is supported. */
858 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
859 		return (ENOTSUP);
860 	}
861 
862 	ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
863 	switch (ap->type) {
864 	case MACH_CPU_ARG_LOCAL_APIC:
865 		localid = ap->arg.apic.apic_id;
866 		procid = ap->arg.apic.proc_id;
867 		if (localid >= 255 || procid > 255) {
868 			cmn_err(CE_WARN,
869 			    "!apic: apicid(%u) or procid(%u) is invalid.",
870 			    localid, procid);
871 			return (EINVAL);
872 		}
873 		break;
874 
875 	case MACH_CPU_ARG_LOCAL_X2APIC:
876 		localid = ap->arg.apic.apic_id;
877 		procid = ap->arg.apic.proc_id;
878 		if (localid >= UINT32_MAX) {
879 			cmn_err(CE_WARN,
880 			    "!apic: x2apicid(%u) is invalid.", localid);
881 			return (EINVAL);
882 		} else if (localid >= 255 && apic_mode == LOCAL_APIC) {
883 			cmn_err(CE_WARN, "!apic: system is in APIC mode, "
884 			    "can't support x2APIC processor.");
885 			return (ENOTSUP);
886 		}
887 		break;
888 
889 	default:
890 		cmn_err(CE_WARN,
891 		    "!apic: unknown argument type %d to apic_cpu_add().",
892 		    ap->type);
893 		return (EINVAL);
894 	}
895 
896 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
897 	iflag = intr_clear();
898 	lock_set(&apic_ioapic_lock);
899 
900 	/* Check whether local APIC id already exists. */
901 	for (i = 0; i < apic_nproc; i++) {
902 		if (!CPU_IN_SET(apic_cpumask, i))
903 			continue;
904 		if (apic_cpus[i].aci_local_id == localid) {
905 			lock_clear(&apic_ioapic_lock);
906 			intr_restore(iflag);
907 			cmn_err(CE_WARN,
908 			    "!apic: local apic id %u already exists.",
909 			    localid);
910 			return (EEXIST);
911 		} else if (apic_cpus[i].aci_processor_id == procid) {
912 			lock_clear(&apic_ioapic_lock);
913 			intr_restore(iflag);
914 			cmn_err(CE_WARN,
915 			    "!apic: processor id %u already exists.",
916 			    (int)procid);
917 			return (EEXIST);
918 		}
919 
920 		/*
921 		 * There's no local APIC version number available in MADT table,
922 		 * so assume that all CPUs are homogeneous and use local APIC
923 		 * version number of the first existing CPU.
924 		 */
925 		if (first) {
926 			first = B_FALSE;
927 			localver = apic_cpus[i].aci_local_ver;
928 		}
929 	}
930 	ASSERT(first == B_FALSE);
931 
932 	/*
933 	 * Try to assign the same cpuid if APIC id exists in the dirty cache.
934 	 */
935 	for (i = 0; i < apic_max_nproc; i++) {
936 		if (CPU_IN_SET(apic_cpumask, i)) {
937 			ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
938 			continue;
939 		}
940 		ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
941 		if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
942 		    apic_cpus[i].aci_local_id == localid &&
943 		    apic_cpus[i].aci_processor_id == procid) {
944 			cpuid = i;
945 			break;
946 		}
947 	}
948 
949 	/* Avoid the dirty cache and allocate fresh slot if possible. */
950 	if (cpuid == (processorid_t)-1) {
951 		for (i = 0; i < apic_max_nproc; i++) {
952 			if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
953 			    (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
954 				cpuid = i;
955 				break;
956 			}
957 		}
958 	}
959 
960 	/* Try to find any free slot as last resort. */
961 	if (cpuid == (processorid_t)-1) {
962 		for (i = 0; i < apic_max_nproc; i++) {
963 			if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
964 				cpuid = i;
965 				break;
966 			}
967 		}
968 	}
969 
970 	if (cpuid == (processorid_t)-1) {
971 		lock_clear(&apic_ioapic_lock);
972 		intr_restore(iflag);
973 		cmn_err(CE_NOTE,
974 		    "!apic: failed to allocate cpu id for processor %u.",
975 		    procid);
976 		rv = EAGAIN;
977 	} else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
978 		lock_clear(&apic_ioapic_lock);
979 		intr_restore(iflag);
980 		cmn_err(CE_NOTE,
981 		    "!apic: failed to build mapping for processor %u.",
982 		    procid);
983 		rv = EBUSY;
984 	} else {
985 		ASSERT(cpuid >= 0 && cpuid < NCPU);
986 		ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
987 		bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
988 		apic_cpus[cpuid].aci_processor_id = procid;
989 		apic_cpus[cpuid].aci_local_id = localid;
990 		apic_cpus[cpuid].aci_local_ver = localver;
991 		CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
992 		if (cpuid >= apic_nproc) {
993 			apic_nproc = cpuid + 1;
994 		}
995 		lock_clear(&apic_ioapic_lock);
996 		intr_restore(iflag);
997 		reqp->req.cpu_add.cpuid = cpuid;
998 	}
999 
1000 	return (rv);
1001 }
1002 
1003 int
1004 apic_cpu_remove(psm_cpu_request_t *reqp)
1005 {
1006 	int i;
1007 	ulong_t iflag;
1008 	processorid_t cpuid;
1009 
1010 	/* Check whether CPU hotplug is supported. */
1011 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1012 		return (ENOTSUP);
1013 	}
1014 
1015 	cpuid = reqp->req.cpu_remove.cpuid;
1016 
1017 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1018 	iflag = intr_clear();
1019 	lock_set(&apic_ioapic_lock);
1020 
1021 	if (!apic_cpu_in_range(cpuid)) {
1022 		lock_clear(&apic_ioapic_lock);
1023 		intr_restore(iflag);
1024 		cmn_err(CE_WARN,
1025 		    "!apic: cpuid %d doesn't exist in apic_cpus array.",
1026 		    cpuid);
1027 		return (ENODEV);
1028 	}
1029 	ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1030 
1031 	if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1032 		lock_clear(&apic_ioapic_lock);
1033 		intr_restore(iflag);
1034 		return (ENOENT);
1035 	}
1036 
1037 	if (cpuid == apic_nproc - 1) {
1038 		/*
1039 		 * We are removing the highest numbered cpuid so we need to
1040 		 * find the next highest cpuid as the new value for apic_nproc.
1041 		 */
1042 		for (i = apic_nproc; i > 0; i--) {
1043 			if (CPU_IN_SET(apic_cpumask, i - 1)) {
1044 				apic_nproc = i;
1045 				break;
1046 			}
1047 		}
1048 		/* at least one CPU left */
1049 		ASSERT(i > 0);
1050 	}
1051 	CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1052 	/* mark slot as free and keep it in the dirty cache */
1053 	apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1054 
1055 	lock_clear(&apic_ioapic_lock);
1056 	intr_restore(iflag);
1057 
1058 	return (0);
1059 }
1060 
1061 /*
1062  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1063  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1064  */
1065 uint_t
1066 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1067 {
1068 	uint8_t		pit_tick_lo;
1069 	uint16_t	pit_tick, target_pit_tick;
1070 	uint32_t	start_apic_tick, end_apic_tick;
1071 	ulong_t		iflag;
1072 	uint32_t	reg;
1073 
1074 	reg = addr + APIC_CURR_COUNT - apicadr;
1075 
1076 	iflag = intr_clear();
1077 
1078 	do {
1079 		pit_tick_lo = inb(PITCTR0_PORT);
1080 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1081 	} while (pit_tick < APIC_TIME_MIN ||
1082 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1083 
1084 	/*
1085 	 * Wait for the 8254 to decrement by 5 ticks to ensure
1086 	 * we didn't start in the middle of a tick.
1087 	 * Compare with 0x10 for the wrap around case.
1088 	 */
1089 	target_pit_tick = pit_tick - 5;
1090 	do {
1091 		pit_tick_lo = inb(PITCTR0_PORT);
1092 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1093 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1094 
1095 	start_apic_tick = apic_reg_ops->apic_read(reg);
1096 
1097 	/*
1098 	 * Wait for the 8254 to decrement by
1099 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1100 	 */
1101 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1102 	do {
1103 		pit_tick_lo = inb(PITCTR0_PORT);
1104 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1105 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1106 
1107 	end_apic_tick = apic_reg_ops->apic_read(reg);
1108 
1109 	*pit_ticks_adj = target_pit_tick - pit_tick;
1110 
1111 	intr_restore(iflag);
1112 
1113 	return (start_apic_tick - end_apic_tick);
1114 }
1115 
1116 /*
1117  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1118  * frequency.  Note at this stage in the boot sequence, the boot processor
1119  * is the only active processor.
1120  * hertz value of 0 indicates a one-shot mode request.  In this case
1121  * the function returns the resolution (in nanoseconds) for the hardware
1122  * timer interrupt.  If one-shot mode capability is not available,
1123  * the return value will be 0. apic_enable_oneshot is a global switch
1124  * for disabling the functionality.
1125  * A non-zero positive value for hertz indicates a periodic mode request.
1126  * In this case the hardware will be programmed to generate clock interrupts
1127  * at hertz frequency and returns the resolution of interrupts in
1128  * nanosecond.
1129  */
1130 
1131 int
1132 apic_clkinit(int hertz)
1133 {
1134 	int		ret;
1135 
1136 	apic_int_busy_mark = (apic_int_busy_mark *
1137 	    apic_sample_factor_redistribution) / 100;
1138 	apic_int_free_mark = (apic_int_free_mark *
1139 	    apic_sample_factor_redistribution) / 100;
1140 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1141 	    apic_sample_factor_redistribution) / 100;
1142 
1143 	ret = apic_timer_init(hertz);
1144 	return (ret);
1145 
1146 }
1147 
1148 /*
1149  * apic_preshutdown:
1150  * Called early in shutdown whilst we can still access filesystems to do
1151  * things like loading modules which will be required to complete shutdown
1152  * after filesystems are all unmounted.
1153  */
1154 void
1155 apic_preshutdown(int cmd, int fcn)
1156 {
1157 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1158 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1159 }
1160 
1161 void
1162 apic_shutdown(int cmd, int fcn)
1163 {
1164 	int restarts, attempts;
1165 	int i;
1166 	uchar_t	byte;
1167 	ulong_t iflag;
1168 
1169 	hpet_acpi_fini();
1170 
1171 	/* Send NMI to all CPUs except self to do per processor shutdown */
1172 	iflag = intr_clear();
1173 #ifdef	DEBUG
1174 	APIC_AV_PENDING_SET();
1175 #else
1176 	if (apic_mode == LOCAL_APIC)
1177 		APIC_AV_PENDING_SET();
1178 #endif /* DEBUG */
1179 	apic_shutdown_processors = 1;
1180 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1181 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1182 
1183 	/* restore cmos shutdown byte before reboot */
1184 	if (apic_cmos_ssb_set) {
1185 		outb(CMOS_ADDR, SSB);
1186 		outb(CMOS_DATA, 0);
1187 	}
1188 
1189 	ioapic_disable_redirection();
1190 
1191 	/*	disable apic mode if imcr present	*/
1192 	if (apic_imcrp) {
1193 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1194 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1195 	}
1196 
1197 	apic_disable_local_apic();
1198 
1199 	intr_restore(iflag);
1200 
1201 	/* remainder of function is for shutdown cases only */
1202 	if (cmd != A_SHUTDOWN)
1203 		return;
1204 
1205 	/*
1206 	 * Switch system back into Legacy-Mode if using ACPI and
1207 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1208 	 * for power-off to succeed (Dell Dimension 4600)
1209 	 * Do not disable ACPI while doing fastreboot
1210 	 */
1211 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1212 		(void) AcpiDisable();
1213 
1214 	if (fcn == AD_FASTREBOOT) {
1215 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1216 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1217 	}
1218 
1219 	/* remainder of function is for shutdown+poweroff case only */
1220 	if (fcn != AD_POWEROFF)
1221 		return;
1222 
1223 	switch (apic_poweroff_method) {
1224 		case APIC_POWEROFF_VIA_RTC:
1225 
1226 			/* select the extended NVRAM bank in the RTC */
1227 			outb(CMOS_ADDR, RTC_REGA);
1228 			byte = inb(CMOS_DATA);
1229 			outb(CMOS_DATA, (byte | EXT_BANK));
1230 
1231 			outb(CMOS_ADDR, PFR_REG);
1232 
1233 			/* for Predator must toggle the PAB bit */
1234 			byte = inb(CMOS_DATA);
1235 
1236 			/*
1237 			 * clear power active bar, wakeup alarm and
1238 			 * kickstart
1239 			 */
1240 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1241 			outb(CMOS_DATA, byte);
1242 
1243 			/* delay before next write */
1244 			drv_usecwait(1000);
1245 
1246 			/* for S40 the following would suffice */
1247 			byte = inb(CMOS_DATA);
1248 
1249 			/* power active bar control bit */
1250 			byte |= PAB_CBIT;
1251 			outb(CMOS_DATA, byte);
1252 
1253 			break;
1254 
1255 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1256 			restarts = 0;
1257 restart_aspen_bmc:
1258 			if (++restarts == 3)
1259 				break;
1260 			attempts = 0;
1261 			do {
1262 				byte = inb(MISMIC_FLAG_REGISTER);
1263 				byte &= MISMIC_BUSY_MASK;
1264 				if (byte != 0) {
1265 					drv_usecwait(1000);
1266 					if (attempts >= 3)
1267 						goto restart_aspen_bmc;
1268 					++attempts;
1269 				}
1270 			} while (byte != 0);
1271 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1272 			byte = inb(MISMIC_FLAG_REGISTER);
1273 			byte |= 0x1;
1274 			outb(MISMIC_FLAG_REGISTER, byte);
1275 			i = 0;
1276 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1277 			    i++) {
1278 				attempts = 0;
1279 				do {
1280 					byte = inb(MISMIC_FLAG_REGISTER);
1281 					byte &= MISMIC_BUSY_MASK;
1282 					if (byte != 0) {
1283 						drv_usecwait(1000);
1284 						if (attempts >= 3)
1285 							goto restart_aspen_bmc;
1286 						++attempts;
1287 					}
1288 				} while (byte != 0);
1289 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1290 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1291 				byte = inb(MISMIC_FLAG_REGISTER);
1292 				byte |= 0x1;
1293 				outb(MISMIC_FLAG_REGISTER, byte);
1294 			}
1295 			break;
1296 
1297 		case APIC_POWEROFF_VIA_SITKA_BMC:
1298 			restarts = 0;
1299 restart_sitka_bmc:
1300 			if (++restarts == 3)
1301 				break;
1302 			attempts = 0;
1303 			do {
1304 				byte = inb(SMS_STATUS_REGISTER);
1305 				byte &= SMS_STATE_MASK;
1306 				if ((byte == SMS_READ_STATE) ||
1307 				    (byte == SMS_WRITE_STATE)) {
1308 					drv_usecwait(1000);
1309 					if (attempts >= 3)
1310 						goto restart_sitka_bmc;
1311 					++attempts;
1312 				}
1313 			} while ((byte == SMS_READ_STATE) ||
1314 			    (byte == SMS_WRITE_STATE));
1315 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1316 			i = 0;
1317 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1318 			    i++) {
1319 				attempts = 0;
1320 				do {
1321 					byte = inb(SMS_STATUS_REGISTER);
1322 					byte &= SMS_IBF_MASK;
1323 					if (byte != 0) {
1324 						drv_usecwait(1000);
1325 						if (attempts >= 3)
1326 							goto restart_sitka_bmc;
1327 						++attempts;
1328 					}
1329 				} while (byte != 0);
1330 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1331 			}
1332 			break;
1333 
1334 		case APIC_POWEROFF_NONE:
1335 
1336 			/* If no APIC direct method, we will try using ACPI */
1337 			if (apic_enable_acpi) {
1338 				if (acpi_poweroff() == 1)
1339 					return;
1340 			} else
1341 				return;
1342 
1343 			break;
1344 	}
1345 	/*
1346 	 * Wait a limited time here for power to go off.
1347 	 * If the power does not go off, then there was a
1348 	 * problem and we should continue to the halt which
1349 	 * prints a message for the user to press a key to
1350 	 * reboot.
1351 	 */
1352 	drv_usecwait(7000000); /* wait seven seconds */
1353 
1354 }
1355 
1356 cyclic_id_t apic_cyclic_id;
1357 
1358 /*
1359  * The following functions are in the platform specific file so that they
1360  * can be different functions depending on whether we are running on
1361  * bare metal or a hypervisor.
1362  */
1363 
1364 /*
1365  * map an apic for memory-mapped access
1366  */
1367 uint32_t *
1368 mapin_apic(uint32_t addr, size_t len, int flags)
1369 {
1370 	return ((void *)psm_map_phys(addr, len, flags));
1371 }
1372 
1373 uint32_t *
1374 mapin_ioapic(uint32_t addr, size_t len, int flags)
1375 {
1376 	return (mapin_apic(addr, len, flags));
1377 }
1378 
1379 /*
1380  * unmap an apic
1381  */
1382 void
1383 mapout_apic(caddr_t addr, size_t len)
1384 {
1385 	psm_unmap_phys(addr, len);
1386 }
1387 
1388 void
1389 mapout_ioapic(caddr_t addr, size_t len)
1390 {
1391 	mapout_apic(addr, len);
1392 }
1393 
1394 uint32_t
1395 ioapic_read(int ioapic_ix, uint32_t reg)
1396 {
1397 	volatile uint32_t *ioapic;
1398 
1399 	ioapic = apicioadr[ioapic_ix];
1400 	ioapic[APIC_IO_REG] = reg;
1401 	return (ioapic[APIC_IO_DATA]);
1402 }
1403 
1404 void
1405 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1406 {
1407 	volatile uint32_t *ioapic;
1408 
1409 	ioapic = apicioadr[ioapic_ix];
1410 	ioapic[APIC_IO_REG] = reg;
1411 	ioapic[APIC_IO_DATA] = value;
1412 }
1413 
1414 void
1415 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1416 {
1417 	volatile uint32_t *ioapic;
1418 
1419 	ioapic = apicioadr[ioapic_ix];
1420 	ioapic[APIC_IO_EOI] = value;
1421 }
1422 
1423 /*
1424  * Round-robin algorithm to find the next CPU with interrupts enabled.
1425  * It can't share the same static variable apic_next_bind_cpu with
1426  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1427  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1428  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1429  * are called.  However, the pcplusmp driver assumes that there will be
1430  * boot_ncpus CPUs configured eventually so it tries to distribute all
1431  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1432  * interrupts being targetted at CPU1, we need to use a dedicated static
1433  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1434  */
1435 
1436 processorid_t
1437 apic_find_cpu(int flag)
1438 {
1439 	int i;
1440 	static processorid_t acid = 0;
1441 
1442 	/* Find the first CPU with the passed-in flag set */
1443 	for (i = 0; i < apic_nproc; i++) {
1444 		if (++acid >= apic_nproc) {
1445 			acid = 0;
1446 		}
1447 		if (apic_cpu_in_range(acid) &&
1448 		    (apic_cpus[acid].aci_status & flag)) {
1449 			break;
1450 		}
1451 	}
1452 
1453 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1454 	return (acid);
1455 }
1456 
1457 void
1458 apic_intrmap_init(int apic_mode)
1459 {
1460 	int suppress_brdcst_eoi = 0;
1461 
1462 	/*
1463 	 * Intel Software Developer's Manual 3A, 10.12.7:
1464 	 *
1465 	 * Routing of device interrupts to local APIC units operating in
1466 	 * x2APIC mode requires use of the interrupt-remapping architecture
1467 	 * specified in the Intel Virtualization Technology for Directed
1468 	 * I/O, Revision 1.3.  Because of this, BIOS must enumerate support
1469 	 * for and software must enable this interrupt remapping with
1470 	 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1471 	 * the local APIC units.
1472 	 *
1473 	 *
1474 	 * In other words, to use the APIC in x2APIC mode, we need interrupt
1475 	 * remapping.  Since we don't start up the IOMMU by default, we
1476 	 * won't be able to do any interrupt remapping and therefore have to
1477 	 * use the APIC in traditional 'local APIC' mode with memory mapped
1478 	 * I/O.
1479 	 */
1480 
1481 	if (psm_vt_ops != NULL) {
1482 		if (((apic_intrmap_ops_t *)psm_vt_ops)->
1483 		    apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1484 
1485 			apic_vt_ops = psm_vt_ops;
1486 
1487 			/*
1488 			 * We leverage the interrupt remapping engine to
1489 			 * suppress broadcast EOI; thus we must send the
1490 			 * directed EOI with the directed-EOI handler.
1491 			 */
1492 			if (apic_directed_EOI_supported() == 0) {
1493 				suppress_brdcst_eoi = 1;
1494 			}
1495 
1496 			apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1497 
1498 			if (apic_detect_x2apic()) {
1499 				apic_enable_x2apic();
1500 			}
1501 
1502 			if (apic_directed_EOI_supported() == 0) {
1503 				apic_set_directed_EOI_handler();
1504 			}
1505 		}
1506 	}
1507 }
1508 
1509 /*ARGSUSED*/
1510 static void
1511 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1512 {
1513 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1514 }
1515 
1516 /*ARGSUSED*/
1517 static void
1518 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1519 {
1520 	mregs->mr_addr = MSI_ADDR_HDR |
1521 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1522 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1523 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1524 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1525 	    mregs->mr_data;
1526 }
1527 
1528 /*
1529  * Functions from apic_introp.c
1530  *
1531  * Those functions are used by apic_intr_ops().
1532  */
1533 
1534 /*
1535  * MSI support flag:
1536  * reflects whether MSI is supported at APIC level
1537  * it can also be patched through /etc/system
1538  *
1539  *  0 = default value - don't know and need to call apic_check_msi_support()
1540  *      to find out then set it accordingly
1541  *  1 = supported
1542  * -1 = not supported
1543  */
1544 int	apic_support_msi = 0;
1545 
1546 /* Multiple vector support for MSI-X */
1547 int	apic_msix_enable = 1;
1548 
1549 /* Multiple vector support for MSI */
1550 int	apic_multi_msi_enable = 1;
1551 
1552 /*
1553  * Check whether the system supports MSI.
1554  *
1555  * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1556  * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1557  * return PSM_SUCCESS to indicate this system supports MSI.
1558  *
1559  * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1560  * by detecting if we are running inside the KVM hypervisor, which guarantees
1561  * this version number.)
1562  */
1563 int
1564 apic_check_msi_support()
1565 {
1566 	dev_info_t *cdip;
1567 	char dev_type[16];
1568 	int dev_len;
1569 
1570 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1571 
1572 	/*
1573 	 * check whether the first level children of root_node have
1574 	 * PCI-E or PCI capability.
1575 	 */
1576 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1577 	    cdip = ddi_get_next_sibling(cdip)) {
1578 
1579 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1580 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1581 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
1582 		    ddi_node_name(cdip)));
1583 		dev_len = sizeof (dev_type);
1584 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1585 		    "device_type", (caddr_t)dev_type, &dev_len)
1586 		    != DDI_PROP_SUCCESS)
1587 			continue;
1588 		if (strcmp(dev_type, "pciex") == 0)
1589 			return (PSM_SUCCESS);
1590 		if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM)
1591 			return (PSM_SUCCESS);
1592 	}
1593 
1594 	/* MSI is not supported on this system */
1595 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1596 	    "device_type found\n"));
1597 	return (PSM_FAILURE);
1598 }
1599 
1600 /*
1601  * apic_pci_msi_unconfigure:
1602  *
1603  * This and next two interfaces are copied from pci_intr_lib.c
1604  * Do ensure that these two files stay in sync.
1605  * These needed to be copied over here to avoid a deadlock situation on
1606  * certain mp systems that use MSI interrupts.
1607  *
1608  * IMPORTANT regards next three interfaces:
1609  * i) are called only for MSI/X interrupts.
1610  * ii) called with interrupts disabled, and must not block
1611  */
1612 void
1613 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1614 {
1615 	ushort_t		msi_ctrl;
1616 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1617 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1618 
1619 	ASSERT((handle != NULL) && (cap_ptr != 0));
1620 
1621 	if (type == DDI_INTR_TYPE_MSI) {
1622 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1623 		msi_ctrl &= (~PCI_MSI_MME_MASK);
1624 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1625 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1626 
1627 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1628 			pci_config_put16(handle,
1629 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
1630 			pci_config_put32(handle,
1631 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1632 		} else {
1633 			pci_config_put16(handle,
1634 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
1635 		}
1636 
1637 	} else if (type == DDI_INTR_TYPE_MSIX) {
1638 		uintptr_t	off;
1639 		uint32_t	mask;
1640 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
1641 
1642 		ASSERT(msix_p != NULL);
1643 
1644 		/* Offset into "inum"th entry in the MSI-X table & mask it */
1645 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1646 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1647 
1648 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1649 
1650 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1651 
1652 		/* Offset into the "inum"th entry in the MSI-X table */
1653 		off = (uintptr_t)msix_p->msix_tbl_addr +
1654 		    (inum * PCI_MSIX_VECTOR_SIZE);
1655 
1656 		/* Reset the "data" and "addr" bits */
1657 		ddi_put32(msix_p->msix_tbl_hdl,
1658 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1659 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1660 	}
1661 }
1662 
1663 /*
1664  * apic_pci_msi_disable_mode:
1665  */
1666 void
1667 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1668 {
1669 	ushort_t		msi_ctrl;
1670 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1671 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1672 
1673 	ASSERT((handle != NULL) && (cap_ptr != 0));
1674 
1675 	if (type == DDI_INTR_TYPE_MSI) {
1676 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1677 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1678 			return;
1679 
1680 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
1681 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1682 
1683 	} else if (type == DDI_INTR_TYPE_MSIX) {
1684 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1685 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1686 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1687 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1688 			    msi_ctrl);
1689 		}
1690 	}
1691 }
1692 
1693 uint32_t
1694 apic_get_localapicid(uint32_t cpuid)
1695 {
1696 	ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1697 
1698 	return (apic_cpus[cpuid].aci_local_id);
1699 }
1700 
1701 uchar_t
1702 apic_get_ioapicid(uchar_t ioapicindex)
1703 {
1704 	ASSERT(ioapicindex < MAX_IO_APIC);
1705 
1706 	return (apic_io_id[ioapicindex]);
1707 }
1708