xref: /titanic_44/usr/src/uts/i86pc/io/pcplusmp/apic_common.c (revision ad0b1ea5d69a45fe23c434277599e315f29a5fca)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
27  */
28 
29 /*
30  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
31  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
32  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
33  * PSMI 1.5 extensions are supported in Solaris Nevada.
34  * PSMI 1.6 extensions are supported in Solaris Nevada.
35  * PSMI 1.7 extensions are supported in Solaris Nevada.
36  */
37 #define	PSMI_1_7
38 
39 #include <sys/processor.h>
40 #include <sys/time.h>
41 #include <sys/psm.h>
42 #include <sys/smp_impldefs.h>
43 #include <sys/cram.h>
44 #include <sys/acpi/acpi.h>
45 #include <sys/acpica.h>
46 #include <sys/psm_common.h>
47 #include <sys/apic.h>
48 #include <sys/pit.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/ddi_impldefs.h>
52 #include <sys/pci.h>
53 #include <sys/promif.h>
54 #include <sys/x86_archext.h>
55 #include <sys/cpc_impl.h>
56 #include <sys/uadmin.h>
57 #include <sys/panic.h>
58 #include <sys/debug.h>
59 #include <sys/archsystm.h>
60 #include <sys/trap.h>
61 #include <sys/machsystm.h>
62 #include <sys/sysmacros.h>
63 #include <sys/cpuvar.h>
64 #include <sys/rm_platter.h>
65 #include <sys/privregs.h>
66 #include <sys/note.h>
67 #include <sys/pci_intr_lib.h>
68 #include <sys/spl.h>
69 #include <sys/clock.h>
70 #include <sys/dditypes.h>
71 #include <sys/sunddi.h>
72 #include <sys/x_call.h>
73 #include <sys/reboot.h>
74 #include <sys/hpet.h>
75 #include <sys/apic_common.h>
76 #include <sys/apic_timer.h>
77 
78 static void	apic_record_ioapic_rdt(void *intrmap_private,
79 		    ioapic_rdt_t *irdt);
80 static void	apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
81 
82 /*
83  * Common routines between pcplusmp & apix (taken from apic.c).
84  */
85 
86 int	apic_clkinit(int);
87 hrtime_t apic_gethrtime(void);
88 void	apic_send_ipi(int, int);
89 void	apic_set_idlecpu(processorid_t);
90 void	apic_unset_idlecpu(processorid_t);
91 void	apic_shutdown(int, int);
92 void	apic_preshutdown(int, int);
93 processorid_t	apic_get_next_processorid(processorid_t);
94 
95 hrtime_t apic_gettime();
96 
97 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
98 
99 /* Now the ones for Dynamic Interrupt distribution */
100 int	apic_enable_dynamic_migration = 0;
101 
102 /* maximum loop count when sending Start IPIs. */
103 int apic_sipi_max_loop_count = 0x1000;
104 
105 /*
106  * These variables are frequently accessed in apic_intr_enter(),
107  * apic_intr_exit and apic_setspl, so group them together
108  */
109 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
110 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
111 int apic_clkvect;
112 
113 /* vector at which error interrupts come in */
114 int apic_errvect;
115 int apic_enable_error_intr = 1;
116 int apic_error_display_delay = 100;
117 
118 /* vector at which performance counter overflow interrupts come in */
119 int apic_cpcovf_vect;
120 int apic_enable_cpcovf_intr = 1;
121 
122 /* vector at which CMCI interrupts come in */
123 int apic_cmci_vect;
124 extern int cmi_enable_cmci;
125 extern void cmi_cmci_trap(void);
126 
127 kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
128 int cmci_cpu_setup_registered;
129 
130 /* number of CPUs in power-on transition state */
131 static int apic_poweron_cnt = 0;
132 lock_t apic_mode_switch_lock;
133 
134 /*
135  * Patchable global variables.
136  */
137 int	apic_forceload = 0;
138 
139 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
140 
141 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
142 int	apic_panic_on_nmi = 0;
143 int	apic_panic_on_apic_error = 0;
144 
145 int	apic_verbose = 0;	/* 0x1ff */
146 
147 #ifdef DEBUG
148 int	apic_debug = 0;
149 int	apic_restrict_vector = 0;
150 
151 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
152 int	apic_debug_msgbufindex = 0;
153 
154 #endif /* DEBUG */
155 
156 uint_t apic_nticks = 0;
157 uint_t apic_skipped_redistribute = 0;
158 
159 uint_t last_count_read = 0;
160 lock_t	apic_gethrtime_lock;
161 volatile int	apic_hrtime_stamp = 0;
162 volatile hrtime_t apic_nsec_since_boot = 0;
163 
164 static	hrtime_t	apic_last_hrtime = 0;
165 int		apic_hrtime_error = 0;
166 int		apic_remote_hrterr = 0;
167 int		apic_num_nmis = 0;
168 int		apic_apic_error = 0;
169 int		apic_num_apic_errors = 0;
170 int		apic_num_cksum_errors = 0;
171 
172 int	apic_error = 0;
173 
174 static	int	apic_cmos_ssb_set = 0;
175 
176 /* use to make sure only one cpu handles the nmi */
177 lock_t	apic_nmi_lock;
178 /* use to make sure only one cpu handles the error interrupt */
179 lock_t	apic_error_lock;
180 
181 static	struct {
182 	uchar_t	cntl;
183 	uchar_t	data;
184 } aspen_bmc[] = {
185 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
186 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
187 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
188 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
189 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
190 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
191 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
192 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
193 
194 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
195 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
196 };
197 
198 static	struct {
199 	int	port;
200 	uchar_t	data;
201 } sitka_bmc[] = {
202 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
203 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
204 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
205 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
206 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
207 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
208 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
209 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
210 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
211 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
212 
213 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
214 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
215 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
216 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
217 };
218 
219 /* Patchable global variables. */
220 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
221 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
222 
223 /* default apic ops without interrupt remapping */
224 static apic_intrmap_ops_t apic_nointrmap_ops = {
225 	(int (*)(int))return_instr,
226 	(void (*)(int))return_instr,
227 	(void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
228 	(void (*)(void *, void *, uint16_t, int))return_instr,
229 	(void (*)(void **))return_instr,
230 	apic_record_ioapic_rdt,
231 	apic_record_msi,
232 };
233 
234 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
235 apic_cpus_info_t	*apic_cpus = NULL;
236 cpuset_t	apic_cpumask;
237 uint_t		apic_picinit_called;
238 
239 /* Flag to indicate that we need to shut down all processors */
240 static uint_t	apic_shutdown_processors;
241 
242 /*
243  * Probe the ioapic method for apix module. Called in apic_probe_common()
244  */
245 int
246 apic_ioapic_method_probe()
247 {
248 	if (apix_enable == 0)
249 		return (PSM_SUCCESS);
250 
251 	/*
252 	 * Set IOAPIC EOI handling method. The priority from low to high is:
253 	 * 	1. IOxAPIC: with EOI register
254 	 * 	2. IOMMU interrupt mapping
255 	 *	3. Mask-Before-EOI method for systems without boot
256 	 *	interrupt routing, such as systems with only one IOAPIC;
257 	 *	NVIDIA CK8-04/MCP55 systems; systems with bridge solution
258 	 *	which disables the boot interrupt routing already.
259 	 * 	4. Directed EOI
260 	 */
261 	if (apic_io_ver[0] >= 0x20)
262 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
263 	if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
264 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
265 	if (apic_directed_EOI_supported())
266 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
267 
268 	/* fall back to pcplusmp */
269 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
270 		/* make sure apix is after pcplusmp in /etc/mach */
271 		apix_enable = 0; /* go ahead with pcplusmp install next */
272 		return (PSM_FAILURE);
273 	}
274 
275 	return (PSM_SUCCESS);
276 }
277 
278 /*
279  * handler for APIC Error interrupt. Just print a warning and continue
280  */
281 int
282 apic_error_intr()
283 {
284 	uint_t	error0, error1, error;
285 	uint_t	i;
286 
287 	/*
288 	 * We need to write before read as per 7.4.17 of system prog manual.
289 	 * We do both and or the results to be safe
290 	 */
291 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
292 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
293 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
294 	error = error0 | error1;
295 
296 	/*
297 	 * Clear the APIC error status (do this on all cpus that enter here)
298 	 * (two writes are required due to the semantics of accessing the
299 	 * error status register.)
300 	 */
301 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
302 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
303 
304 	/*
305 	 * Prevent more than 1 CPU from handling error interrupt causing
306 	 * double printing (interleave of characters from multiple
307 	 * CPU's when using prom_printf)
308 	 */
309 	if (lock_try(&apic_error_lock) == 0)
310 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
311 	if (error) {
312 #if	DEBUG
313 		if (apic_debug)
314 			debug_enter("pcplusmp: APIC Error interrupt received");
315 #endif /* DEBUG */
316 		if (apic_panic_on_apic_error)
317 			cmn_err(CE_PANIC,
318 			    "APIC Error interrupt on CPU %d. Status = %x",
319 			    psm_get_cpu_id(), error);
320 		else {
321 			if ((error & ~APIC_CS_ERRORS) == 0) {
322 				/* cksum error only */
323 				apic_error |= APIC_ERR_APIC_ERROR;
324 				apic_apic_error |= error;
325 				apic_num_apic_errors++;
326 				apic_num_cksum_errors++;
327 			} else {
328 				/*
329 				 * prom_printf is the best shot we have of
330 				 * something which is problem free from
331 				 * high level/NMI type of interrupts
332 				 */
333 				prom_printf("APIC Error interrupt on CPU %d. "
334 				    "Status 0 = %x, Status 1 = %x\n",
335 				    psm_get_cpu_id(), error0, error1);
336 				apic_error |= APIC_ERR_APIC_ERROR;
337 				apic_apic_error |= error;
338 				apic_num_apic_errors++;
339 				for (i = 0; i < apic_error_display_delay; i++) {
340 					tenmicrosec();
341 				}
342 				/*
343 				 * provide more delay next time limited to
344 				 * roughly 1 clock tick time
345 				 */
346 				if (apic_error_display_delay < 500)
347 					apic_error_display_delay *= 2;
348 			}
349 		}
350 		lock_clear(&apic_error_lock);
351 		return (DDI_INTR_CLAIMED);
352 	} else {
353 		lock_clear(&apic_error_lock);
354 		return (DDI_INTR_UNCLAIMED);
355 	}
356 }
357 
358 /*
359  * Turn off the mask bit in the performance counter Local Vector Table entry.
360  */
361 void
362 apic_cpcovf_mask_clear(void)
363 {
364 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
365 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
366 }
367 
368 /*ARGSUSED*/
369 static int
370 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
371 {
372 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
373 	return (0);
374 }
375 
376 /*ARGSUSED*/
377 static int
378 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
379 {
380 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
381 	return (0);
382 }
383 
384 /*ARGSUSED*/
385 int
386 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
387 {
388 	cpuset_t	cpu_set;
389 
390 	CPUSET_ONLY(cpu_set, cpuid);
391 
392 	switch (what) {
393 		case CPU_ON:
394 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
395 			    (xc_func_t)apic_cmci_enable);
396 			break;
397 
398 		case CPU_OFF:
399 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
400 			    (xc_func_t)apic_cmci_disable);
401 			break;
402 
403 		default:
404 			break;
405 	}
406 
407 	return (0);
408 }
409 
410 static void
411 apic_disable_local_apic(void)
412 {
413 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
414 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
415 
416 	/* local intr reg 0 */
417 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
418 
419 	/* disable NMI */
420 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
421 
422 	/* and error interrupt */
423 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
424 
425 	/* and perf counter intr */
426 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
427 
428 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
429 }
430 
431 static void
432 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
433 {
434 	int		loop_count;
435 	uint32_t	vector;
436 	uint_t		apicid;
437 	ulong_t		iflag;
438 
439 	apicid =  apic_cpus[cpun].aci_local_id;
440 
441 	/*
442 	 * Interrupts on current CPU will be disabled during the
443 	 * steps in order to avoid unwanted side effects from
444 	 * executing interrupt handlers on a problematic BIOS.
445 	 */
446 	iflag = intr_clear();
447 
448 	if (start) {
449 		outb(CMOS_ADDR, SSB);
450 		outb(CMOS_DATA, BIOS_SHUTDOWN);
451 	}
452 
453 	/*
454 	 * According to X2APIC specification in section '2.3.5.1' of
455 	 * Interrupt Command Register Semantics, the semantics of
456 	 * programming the Interrupt Command Register to dispatch an interrupt
457 	 * is simplified. A single MSR write to the 64-bit ICR is required
458 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
459 	 * interface to ICR, system software is not required to check the
460 	 * status of the delivery status bit prior to writing to the ICR
461 	 * to send an IPI. With the removal of the Delivery Status bit,
462 	 * system software no longer has a reason to read the ICR. It remains
463 	 * readable only to aid in debugging.
464 	 */
465 #ifdef	DEBUG
466 	APIC_AV_PENDING_SET();
467 #else
468 	if (apic_mode == LOCAL_APIC) {
469 		APIC_AV_PENDING_SET();
470 	}
471 #endif /* DEBUG */
472 
473 	/* for integrated - make sure there is one INIT IPI in buffer */
474 	/* for external - it will wake up the cpu */
475 	apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
476 
477 	/* If only 1 CPU is installed, PENDING bit will not go low */
478 	for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
479 		if (apic_mode == LOCAL_APIC &&
480 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
481 			apic_ret();
482 		else
483 			break;
484 	}
485 
486 	apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
487 	drv_usecwait(20000);		/* 20 milli sec */
488 
489 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
490 		/* integrated apic */
491 
492 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
493 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
494 
495 		/* to offset the INIT IPI queue up in the buffer */
496 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
497 		drv_usecwait(200);		/* 20 micro sec */
498 
499 		/*
500 		 * send the second SIPI (Startup IPI) as recommended by Intel
501 		 * software development manual.
502 		 */
503 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
504 		drv_usecwait(200);	/* 20 micro sec */
505 	}
506 
507 	intr_restore(iflag);
508 }
509 
510 /*ARGSUSED1*/
511 int
512 apic_cpu_start(processorid_t cpun, caddr_t arg)
513 {
514 	ASSERT(MUTEX_HELD(&cpu_lock));
515 
516 	if (!apic_cpu_in_range(cpun)) {
517 		return (EINVAL);
518 	}
519 
520 	/*
521 	 * Switch to apic_common_send_ipi for safety during starting other CPUs.
522 	 */
523 	if (apic_mode == LOCAL_X2APIC) {
524 		apic_switch_ipi_callback(B_TRUE);
525 	}
526 
527 	apic_cmos_ssb_set = 1;
528 	apic_cpu_send_SIPI(cpun, B_TRUE);
529 
530 	return (0);
531 }
532 
533 /*
534  * Put CPU into halted state with interrupts disabled.
535  */
536 /*ARGSUSED1*/
537 int
538 apic_cpu_stop(processorid_t cpun, caddr_t arg)
539 {
540 	int		rc;
541 	cpu_t 		*cp;
542 	extern cpuset_t cpu_ready_set;
543 	extern void cpu_idle_intercept_cpu(cpu_t *cp);
544 
545 	ASSERT(MUTEX_HELD(&cpu_lock));
546 
547 	if (!apic_cpu_in_range(cpun)) {
548 		return (EINVAL);
549 	}
550 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
551 		return (ENOTSUP);
552 	}
553 
554 	cp = cpu_get(cpun);
555 	ASSERT(cp != NULL);
556 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
557 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
558 	ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
559 
560 	/* Clear CPU_READY flag to disable cross calls. */
561 	cp->cpu_flags &= ~CPU_READY;
562 	CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
563 	rc = xc_flush_cpu(cp);
564 	if (rc != 0) {
565 		CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
566 		cp->cpu_flags |= CPU_READY;
567 		return (rc);
568 	}
569 
570 	/* Intercept target CPU at a safe point before powering it off. */
571 	cpu_idle_intercept_cpu(cp);
572 
573 	apic_cpu_send_SIPI(cpun, B_FALSE);
574 	cp->cpu_flags &= ~CPU_RUNNING;
575 
576 	return (0);
577 }
578 
579 int
580 apic_cpu_ops(psm_cpu_request_t *reqp)
581 {
582 	if (reqp == NULL) {
583 		return (EINVAL);
584 	}
585 
586 	switch (reqp->pcr_cmd) {
587 	case PSM_CPU_ADD:
588 		return (apic_cpu_add(reqp));
589 
590 	case PSM_CPU_REMOVE:
591 		return (apic_cpu_remove(reqp));
592 
593 	case PSM_CPU_STOP:
594 		return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
595 		    reqp->req.cpu_stop.ctx));
596 
597 	default:
598 		return (ENOTSUP);
599 	}
600 }
601 
602 #ifdef	DEBUG
603 int	apic_break_on_cpu = 9;
604 int	apic_stretch_interrupts = 0;
605 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
606 #endif /* DEBUG */
607 
608 /*
609  * generates an interprocessor interrupt to another CPU. Any changes made to
610  * this routine must be accompanied by similar changes to
611  * apic_common_send_ipi().
612  */
613 void
614 apic_send_ipi(int cpun, int ipl)
615 {
616 	int vector;
617 	ulong_t flag;
618 
619 	vector = apic_resv_vector[ipl];
620 
621 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
622 
623 	flag = intr_clear();
624 
625 	APIC_AV_PENDING_SET();
626 
627 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
628 	    vector);
629 
630 	intr_restore(flag);
631 }
632 
633 
634 /*ARGSUSED*/
635 void
636 apic_set_idlecpu(processorid_t cpun)
637 {
638 }
639 
640 /*ARGSUSED*/
641 void
642 apic_unset_idlecpu(processorid_t cpun)
643 {
644 }
645 
646 
647 void
648 apic_ret()
649 {
650 }
651 
652 /*
653  * If apic_coarse_time == 1, then apic_gettime() is used instead of
654  * apic_gethrtime().  This is used for performance instead of accuracy.
655  */
656 
657 hrtime_t
658 apic_gettime()
659 {
660 	int old_hrtime_stamp;
661 	hrtime_t temp;
662 
663 	/*
664 	 * In one-shot mode, we do not keep time, so if anyone
665 	 * calls psm_gettime() directly, we vector over to
666 	 * gethrtime().
667 	 * one-shot mode MUST NOT be enabled if this psm is the source of
668 	 * hrtime.
669 	 */
670 
671 	if (apic_oneshot)
672 		return (gethrtime());
673 
674 
675 gettime_again:
676 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
677 		apic_ret();
678 
679 	temp = apic_nsec_since_boot;
680 
681 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
682 		goto gettime_again;
683 	}
684 	return (temp);
685 }
686 
687 /*
688  * Here we return the number of nanoseconds since booting.  Note every
689  * clock interrupt increments apic_nsec_since_boot by the appropriate
690  * amount.
691  */
692 hrtime_t
693 apic_gethrtime(void)
694 {
695 	int curr_timeval, countval, elapsed_ticks;
696 	int old_hrtime_stamp, status;
697 	hrtime_t temp;
698 	uint32_t cpun;
699 	ulong_t oflags;
700 
701 	/*
702 	 * In one-shot mode, we do not keep time, so if anyone
703 	 * calls psm_gethrtime() directly, we vector over to
704 	 * gethrtime().
705 	 * one-shot mode MUST NOT be enabled if this psm is the source of
706 	 * hrtime.
707 	 */
708 
709 	if (apic_oneshot)
710 		return (gethrtime());
711 
712 	oflags = intr_clear();	/* prevent migration */
713 
714 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
715 	if (apic_mode == LOCAL_APIC)
716 		cpun >>= APIC_ID_BIT_OFFSET;
717 
718 	lock_set(&apic_gethrtime_lock);
719 
720 gethrtime_again:
721 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
722 		apic_ret();
723 
724 	/*
725 	 * Check to see which CPU we are on.  Note the time is kept on
726 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
727 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
728 	 */
729 	if (cpun == apic_cpus[0].aci_local_id) {
730 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
731 	} else {
732 #ifdef	DEBUG
733 		APIC_AV_PENDING_SET();
734 #else
735 		if (apic_mode == LOCAL_APIC)
736 			APIC_AV_PENDING_SET();
737 #endif /* DEBUG */
738 
739 		apic_reg_ops->apic_write_int_cmd(
740 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
741 
742 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
743 		    & AV_READ_PENDING) {
744 			apic_ret();
745 		}
746 
747 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
748 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
749 		else {	/* 0 = invalid */
750 			apic_remote_hrterr++;
751 			/*
752 			 * return last hrtime right now, will need more
753 			 * testing if change to retry
754 			 */
755 			temp = apic_last_hrtime;
756 
757 			lock_clear(&apic_gethrtime_lock);
758 
759 			intr_restore(oflags);
760 
761 			return (temp);
762 		}
763 	}
764 	if (countval > last_count_read)
765 		countval = 0;
766 	else
767 		last_count_read = countval;
768 
769 	elapsed_ticks = apic_hertz_count - countval;
770 
771 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
772 	temp = apic_nsec_since_boot + curr_timeval;
773 
774 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
775 		/* we might have clobbered last_count_read. Restore it */
776 		last_count_read = apic_hertz_count;
777 		goto gethrtime_again;
778 	}
779 
780 	if (temp < apic_last_hrtime) {
781 		/* return last hrtime if error occurs */
782 		apic_hrtime_error++;
783 		temp = apic_last_hrtime;
784 	}
785 	else
786 		apic_last_hrtime = temp;
787 
788 	lock_clear(&apic_gethrtime_lock);
789 	intr_restore(oflags);
790 
791 	return (temp);
792 }
793 
794 /* apic NMI handler */
795 /*ARGSUSED*/
796 void
797 apic_nmi_intr(caddr_t arg, struct regs *rp)
798 {
799 	if (apic_shutdown_processors) {
800 		apic_disable_local_apic();
801 		return;
802 	}
803 
804 	apic_error |= APIC_ERR_NMI;
805 
806 	if (!lock_try(&apic_nmi_lock))
807 		return;
808 	apic_num_nmis++;
809 
810 	if (apic_kmdb_on_nmi && psm_debugger()) {
811 		debug_enter("NMI received: entering kmdb\n");
812 	} else if (apic_panic_on_nmi) {
813 		/* Keep panic from entering kmdb. */
814 		nopanicdebug = 1;
815 		panic("NMI received\n");
816 	} else {
817 		/*
818 		 * prom_printf is the best shot we have of something which is
819 		 * problem free from high level/NMI type of interrupts
820 		 */
821 		prom_printf("NMI received\n");
822 	}
823 
824 	lock_clear(&apic_nmi_lock);
825 }
826 
827 processorid_t
828 apic_get_next_processorid(processorid_t cpu_id)
829 {
830 
831 	int i;
832 
833 	if (cpu_id == -1)
834 		return ((processorid_t)0);
835 
836 	for (i = cpu_id + 1; i < NCPU; i++) {
837 		if (apic_cpu_in_range(i))
838 			return (i);
839 	}
840 
841 	return ((processorid_t)-1);
842 }
843 
844 int
845 apic_cpu_add(psm_cpu_request_t *reqp)
846 {
847 	int i, rv = 0;
848 	ulong_t iflag;
849 	boolean_t first = B_TRUE;
850 	uchar_t localver;
851 	uint32_t localid, procid;
852 	processorid_t cpuid = (processorid_t)-1;
853 	mach_cpu_add_arg_t *ap;
854 
855 	ASSERT(reqp != NULL);
856 	reqp->req.cpu_add.cpuid = (processorid_t)-1;
857 
858 	/* Check whether CPU hotplug is supported. */
859 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
860 		return (ENOTSUP);
861 	}
862 
863 	ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
864 	switch (ap->type) {
865 	case MACH_CPU_ARG_LOCAL_APIC:
866 		localid = ap->arg.apic.apic_id;
867 		procid = ap->arg.apic.proc_id;
868 		if (localid >= 255 || procid > 255) {
869 			cmn_err(CE_WARN,
870 			    "!apic: apicid(%u) or procid(%u) is invalid.",
871 			    localid, procid);
872 			return (EINVAL);
873 		}
874 		break;
875 
876 	case MACH_CPU_ARG_LOCAL_X2APIC:
877 		localid = ap->arg.apic.apic_id;
878 		procid = ap->arg.apic.proc_id;
879 		if (localid >= UINT32_MAX) {
880 			cmn_err(CE_WARN,
881 			    "!apic: x2apicid(%u) is invalid.", localid);
882 			return (EINVAL);
883 		} else if (localid >= 255 && apic_mode == LOCAL_APIC) {
884 			cmn_err(CE_WARN, "!apic: system is in APIC mode, "
885 			    "can't support x2APIC processor.");
886 			return (ENOTSUP);
887 		}
888 		break;
889 
890 	default:
891 		cmn_err(CE_WARN,
892 		    "!apic: unknown argument type %d to apic_cpu_add().",
893 		    ap->type);
894 		return (EINVAL);
895 	}
896 
897 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
898 	iflag = intr_clear();
899 	lock_set(&apic_ioapic_lock);
900 
901 	/* Check whether local APIC id already exists. */
902 	for (i = 0; i < apic_nproc; i++) {
903 		if (!CPU_IN_SET(apic_cpumask, i))
904 			continue;
905 		if (apic_cpus[i].aci_local_id == localid) {
906 			lock_clear(&apic_ioapic_lock);
907 			intr_restore(iflag);
908 			cmn_err(CE_WARN,
909 			    "!apic: local apic id %u already exists.",
910 			    localid);
911 			return (EEXIST);
912 		} else if (apic_cpus[i].aci_processor_id == procid) {
913 			lock_clear(&apic_ioapic_lock);
914 			intr_restore(iflag);
915 			cmn_err(CE_WARN,
916 			    "!apic: processor id %u already exists.",
917 			    (int)procid);
918 			return (EEXIST);
919 		}
920 
921 		/*
922 		 * There's no local APIC version number available in MADT table,
923 		 * so assume that all CPUs are homogeneous and use local APIC
924 		 * version number of the first existing CPU.
925 		 */
926 		if (first) {
927 			first = B_FALSE;
928 			localver = apic_cpus[i].aci_local_ver;
929 		}
930 	}
931 	ASSERT(first == B_FALSE);
932 
933 	/*
934 	 * Try to assign the same cpuid if APIC id exists in the dirty cache.
935 	 */
936 	for (i = 0; i < apic_max_nproc; i++) {
937 		if (CPU_IN_SET(apic_cpumask, i)) {
938 			ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
939 			continue;
940 		}
941 		ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
942 		if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
943 		    apic_cpus[i].aci_local_id == localid &&
944 		    apic_cpus[i].aci_processor_id == procid) {
945 			cpuid = i;
946 			break;
947 		}
948 	}
949 
950 	/* Avoid the dirty cache and allocate fresh slot if possible. */
951 	if (cpuid == (processorid_t)-1) {
952 		for (i = 0; i < apic_max_nproc; i++) {
953 			if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
954 			    (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
955 				cpuid = i;
956 				break;
957 			}
958 		}
959 	}
960 
961 	/* Try to find any free slot as last resort. */
962 	if (cpuid == (processorid_t)-1) {
963 		for (i = 0; i < apic_max_nproc; i++) {
964 			if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
965 				cpuid = i;
966 				break;
967 			}
968 		}
969 	}
970 
971 	if (cpuid == (processorid_t)-1) {
972 		lock_clear(&apic_ioapic_lock);
973 		intr_restore(iflag);
974 		cmn_err(CE_NOTE,
975 		    "!apic: failed to allocate cpu id for processor %u.",
976 		    procid);
977 		rv = EAGAIN;
978 	} else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
979 		lock_clear(&apic_ioapic_lock);
980 		intr_restore(iflag);
981 		cmn_err(CE_NOTE,
982 		    "!apic: failed to build mapping for processor %u.",
983 		    procid);
984 		rv = EBUSY;
985 	} else {
986 		ASSERT(cpuid >= 0 && cpuid < NCPU);
987 		ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
988 		bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
989 		apic_cpus[cpuid].aci_processor_id = procid;
990 		apic_cpus[cpuid].aci_local_id = localid;
991 		apic_cpus[cpuid].aci_local_ver = localver;
992 		CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
993 		if (cpuid >= apic_nproc) {
994 			apic_nproc = cpuid + 1;
995 		}
996 		lock_clear(&apic_ioapic_lock);
997 		intr_restore(iflag);
998 		reqp->req.cpu_add.cpuid = cpuid;
999 	}
1000 
1001 	return (rv);
1002 }
1003 
1004 int
1005 apic_cpu_remove(psm_cpu_request_t *reqp)
1006 {
1007 	int i;
1008 	ulong_t iflag;
1009 	processorid_t cpuid;
1010 
1011 	/* Check whether CPU hotplug is supported. */
1012 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1013 		return (ENOTSUP);
1014 	}
1015 
1016 	cpuid = reqp->req.cpu_remove.cpuid;
1017 
1018 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1019 	iflag = intr_clear();
1020 	lock_set(&apic_ioapic_lock);
1021 
1022 	if (!apic_cpu_in_range(cpuid)) {
1023 		lock_clear(&apic_ioapic_lock);
1024 		intr_restore(iflag);
1025 		cmn_err(CE_WARN,
1026 		    "!apic: cpuid %d doesn't exist in apic_cpus array.",
1027 		    cpuid);
1028 		return (ENODEV);
1029 	}
1030 	ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1031 
1032 	if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1033 		lock_clear(&apic_ioapic_lock);
1034 		intr_restore(iflag);
1035 		return (ENOENT);
1036 	}
1037 
1038 	if (cpuid == apic_nproc - 1) {
1039 		/*
1040 		 * We are removing the highest numbered cpuid so we need to
1041 		 * find the next highest cpuid as the new value for apic_nproc.
1042 		 */
1043 		for (i = apic_nproc; i > 0; i--) {
1044 			if (CPU_IN_SET(apic_cpumask, i - 1)) {
1045 				apic_nproc = i;
1046 				break;
1047 			}
1048 		}
1049 		/* at least one CPU left */
1050 		ASSERT(i > 0);
1051 	}
1052 	CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1053 	/* mark slot as free and keep it in the dirty cache */
1054 	apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1055 
1056 	lock_clear(&apic_ioapic_lock);
1057 	intr_restore(iflag);
1058 
1059 	return (0);
1060 }
1061 
1062 /*
1063  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1064  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1065  */
1066 uint_t
1067 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1068 {
1069 	uint8_t		pit_tick_lo;
1070 	uint16_t	pit_tick, target_pit_tick;
1071 	uint32_t	start_apic_tick, end_apic_tick;
1072 	ulong_t		iflag;
1073 	uint32_t	reg;
1074 
1075 	reg = addr + APIC_CURR_COUNT - apicadr;
1076 
1077 	iflag = intr_clear();
1078 
1079 	do {
1080 		pit_tick_lo = inb(PITCTR0_PORT);
1081 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1082 	} while (pit_tick < APIC_TIME_MIN ||
1083 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1084 
1085 	/*
1086 	 * Wait for the 8254 to decrement by 5 ticks to ensure
1087 	 * we didn't start in the middle of a tick.
1088 	 * Compare with 0x10 for the wrap around case.
1089 	 */
1090 	target_pit_tick = pit_tick - 5;
1091 	do {
1092 		pit_tick_lo = inb(PITCTR0_PORT);
1093 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1094 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1095 
1096 	start_apic_tick = apic_reg_ops->apic_read(reg);
1097 
1098 	/*
1099 	 * Wait for the 8254 to decrement by
1100 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1101 	 */
1102 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1103 	do {
1104 		pit_tick_lo = inb(PITCTR0_PORT);
1105 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1106 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1107 
1108 	end_apic_tick = apic_reg_ops->apic_read(reg);
1109 
1110 	*pit_ticks_adj = target_pit_tick - pit_tick;
1111 
1112 	intr_restore(iflag);
1113 
1114 	return (start_apic_tick - end_apic_tick);
1115 }
1116 
1117 /*
1118  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1119  * frequency.  Note at this stage in the boot sequence, the boot processor
1120  * is the only active processor.
1121  * hertz value of 0 indicates a one-shot mode request.  In this case
1122  * the function returns the resolution (in nanoseconds) for the hardware
1123  * timer interrupt.  If one-shot mode capability is not available,
1124  * the return value will be 0. apic_enable_oneshot is a global switch
1125  * for disabling the functionality.
1126  * A non-zero positive value for hertz indicates a periodic mode request.
1127  * In this case the hardware will be programmed to generate clock interrupts
1128  * at hertz frequency and returns the resolution of interrupts in
1129  * nanosecond.
1130  */
1131 
1132 int
1133 apic_clkinit(int hertz)
1134 {
1135 	int		ret;
1136 
1137 	apic_int_busy_mark = (apic_int_busy_mark *
1138 	    apic_sample_factor_redistribution) / 100;
1139 	apic_int_free_mark = (apic_int_free_mark *
1140 	    apic_sample_factor_redistribution) / 100;
1141 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1142 	    apic_sample_factor_redistribution) / 100;
1143 
1144 	ret = apic_timer_init(hertz);
1145 	return (ret);
1146 
1147 }
1148 
1149 /*
1150  * apic_preshutdown:
1151  * Called early in shutdown whilst we can still access filesystems to do
1152  * things like loading modules which will be required to complete shutdown
1153  * after filesystems are all unmounted.
1154  */
1155 void
1156 apic_preshutdown(int cmd, int fcn)
1157 {
1158 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1159 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1160 }
1161 
1162 void
1163 apic_shutdown(int cmd, int fcn)
1164 {
1165 	int restarts, attempts;
1166 	int i;
1167 	uchar_t	byte;
1168 	ulong_t iflag;
1169 
1170 	hpet_acpi_fini();
1171 
1172 	/* Send NMI to all CPUs except self to do per processor shutdown */
1173 	iflag = intr_clear();
1174 #ifdef	DEBUG
1175 	APIC_AV_PENDING_SET();
1176 #else
1177 	if (apic_mode == LOCAL_APIC)
1178 		APIC_AV_PENDING_SET();
1179 #endif /* DEBUG */
1180 	apic_shutdown_processors = 1;
1181 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1182 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1183 
1184 	/* restore cmos shutdown byte before reboot */
1185 	if (apic_cmos_ssb_set) {
1186 		outb(CMOS_ADDR, SSB);
1187 		outb(CMOS_DATA, 0);
1188 	}
1189 
1190 	ioapic_disable_redirection();
1191 
1192 	/*	disable apic mode if imcr present	*/
1193 	if (apic_imcrp) {
1194 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1195 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1196 	}
1197 
1198 	apic_disable_local_apic();
1199 
1200 	intr_restore(iflag);
1201 
1202 	/* remainder of function is for shutdown cases only */
1203 	if (cmd != A_SHUTDOWN)
1204 		return;
1205 
1206 	/*
1207 	 * Switch system back into Legacy-Mode if using ACPI and
1208 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1209 	 * for power-off to succeed (Dell Dimension 4600)
1210 	 * Do not disable ACPI while doing fastreboot
1211 	 */
1212 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1213 		(void) AcpiDisable();
1214 
1215 	if (fcn == AD_FASTREBOOT) {
1216 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1217 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1218 	}
1219 
1220 	/* remainder of function is for shutdown+poweroff case only */
1221 	if (fcn != AD_POWEROFF)
1222 		return;
1223 
1224 	switch (apic_poweroff_method) {
1225 		case APIC_POWEROFF_VIA_RTC:
1226 
1227 			/* select the extended NVRAM bank in the RTC */
1228 			outb(CMOS_ADDR, RTC_REGA);
1229 			byte = inb(CMOS_DATA);
1230 			outb(CMOS_DATA, (byte | EXT_BANK));
1231 
1232 			outb(CMOS_ADDR, PFR_REG);
1233 
1234 			/* for Predator must toggle the PAB bit */
1235 			byte = inb(CMOS_DATA);
1236 
1237 			/*
1238 			 * clear power active bar, wakeup alarm and
1239 			 * kickstart
1240 			 */
1241 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1242 			outb(CMOS_DATA, byte);
1243 
1244 			/* delay before next write */
1245 			drv_usecwait(1000);
1246 
1247 			/* for S40 the following would suffice */
1248 			byte = inb(CMOS_DATA);
1249 
1250 			/* power active bar control bit */
1251 			byte |= PAB_CBIT;
1252 			outb(CMOS_DATA, byte);
1253 
1254 			break;
1255 
1256 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1257 			restarts = 0;
1258 restart_aspen_bmc:
1259 			if (++restarts == 3)
1260 				break;
1261 			attempts = 0;
1262 			do {
1263 				byte = inb(MISMIC_FLAG_REGISTER);
1264 				byte &= MISMIC_BUSY_MASK;
1265 				if (byte != 0) {
1266 					drv_usecwait(1000);
1267 					if (attempts >= 3)
1268 						goto restart_aspen_bmc;
1269 					++attempts;
1270 				}
1271 			} while (byte != 0);
1272 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1273 			byte = inb(MISMIC_FLAG_REGISTER);
1274 			byte |= 0x1;
1275 			outb(MISMIC_FLAG_REGISTER, byte);
1276 			i = 0;
1277 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1278 			    i++) {
1279 				attempts = 0;
1280 				do {
1281 					byte = inb(MISMIC_FLAG_REGISTER);
1282 					byte &= MISMIC_BUSY_MASK;
1283 					if (byte != 0) {
1284 						drv_usecwait(1000);
1285 						if (attempts >= 3)
1286 							goto restart_aspen_bmc;
1287 						++attempts;
1288 					}
1289 				} while (byte != 0);
1290 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1291 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1292 				byte = inb(MISMIC_FLAG_REGISTER);
1293 				byte |= 0x1;
1294 				outb(MISMIC_FLAG_REGISTER, byte);
1295 			}
1296 			break;
1297 
1298 		case APIC_POWEROFF_VIA_SITKA_BMC:
1299 			restarts = 0;
1300 restart_sitka_bmc:
1301 			if (++restarts == 3)
1302 				break;
1303 			attempts = 0;
1304 			do {
1305 				byte = inb(SMS_STATUS_REGISTER);
1306 				byte &= SMS_STATE_MASK;
1307 				if ((byte == SMS_READ_STATE) ||
1308 				    (byte == SMS_WRITE_STATE)) {
1309 					drv_usecwait(1000);
1310 					if (attempts >= 3)
1311 						goto restart_sitka_bmc;
1312 					++attempts;
1313 				}
1314 			} while ((byte == SMS_READ_STATE) ||
1315 			    (byte == SMS_WRITE_STATE));
1316 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1317 			i = 0;
1318 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1319 			    i++) {
1320 				attempts = 0;
1321 				do {
1322 					byte = inb(SMS_STATUS_REGISTER);
1323 					byte &= SMS_IBF_MASK;
1324 					if (byte != 0) {
1325 						drv_usecwait(1000);
1326 						if (attempts >= 3)
1327 							goto restart_sitka_bmc;
1328 						++attempts;
1329 					}
1330 				} while (byte != 0);
1331 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1332 			}
1333 			break;
1334 
1335 		case APIC_POWEROFF_NONE:
1336 
1337 			/* If no APIC direct method, we will try using ACPI */
1338 			if (apic_enable_acpi) {
1339 				if (acpi_poweroff() == 1)
1340 					return;
1341 			} else
1342 				return;
1343 
1344 			break;
1345 	}
1346 	/*
1347 	 * Wait a limited time here for power to go off.
1348 	 * If the power does not go off, then there was a
1349 	 * problem and we should continue to the halt which
1350 	 * prints a message for the user to press a key to
1351 	 * reboot.
1352 	 */
1353 	drv_usecwait(7000000); /* wait seven seconds */
1354 
1355 }
1356 
1357 cyclic_id_t apic_cyclic_id;
1358 
1359 /*
1360  * The following functions are in the platform specific file so that they
1361  * can be different functions depending on whether we are running on
1362  * bare metal or a hypervisor.
1363  */
1364 
1365 /*
1366  * map an apic for memory-mapped access
1367  */
1368 uint32_t *
1369 mapin_apic(uint32_t addr, size_t len, int flags)
1370 {
1371 	return ((void *)psm_map_phys(addr, len, flags));
1372 }
1373 
1374 uint32_t *
1375 mapin_ioapic(uint32_t addr, size_t len, int flags)
1376 {
1377 	return (mapin_apic(addr, len, flags));
1378 }
1379 
1380 /*
1381  * unmap an apic
1382  */
1383 void
1384 mapout_apic(caddr_t addr, size_t len)
1385 {
1386 	psm_unmap_phys(addr, len);
1387 }
1388 
1389 void
1390 mapout_ioapic(caddr_t addr, size_t len)
1391 {
1392 	mapout_apic(addr, len);
1393 }
1394 
1395 uint32_t
1396 ioapic_read(int ioapic_ix, uint32_t reg)
1397 {
1398 	volatile uint32_t *ioapic;
1399 
1400 	ioapic = apicioadr[ioapic_ix];
1401 	ioapic[APIC_IO_REG] = reg;
1402 	return (ioapic[APIC_IO_DATA]);
1403 }
1404 
1405 void
1406 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1407 {
1408 	volatile uint32_t *ioapic;
1409 
1410 	ioapic = apicioadr[ioapic_ix];
1411 	ioapic[APIC_IO_REG] = reg;
1412 	ioapic[APIC_IO_DATA] = value;
1413 }
1414 
1415 void
1416 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1417 {
1418 	volatile uint32_t *ioapic;
1419 
1420 	ioapic = apicioadr[ioapic_ix];
1421 	ioapic[APIC_IO_EOI] = value;
1422 }
1423 
1424 /*
1425  * Round-robin algorithm to find the next CPU with interrupts enabled.
1426  * It can't share the same static variable apic_next_bind_cpu with
1427  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1428  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1429  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1430  * are called.  However, the pcplusmp driver assumes that there will be
1431  * boot_ncpus CPUs configured eventually so it tries to distribute all
1432  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1433  * interrupts being targetted at CPU1, we need to use a dedicated static
1434  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1435  */
1436 
1437 processorid_t
1438 apic_find_cpu(int flag)
1439 {
1440 	int i;
1441 	static processorid_t acid = 0;
1442 
1443 	/* Find the first CPU with the passed-in flag set */
1444 	for (i = 0; i < apic_nproc; i++) {
1445 		if (++acid >= apic_nproc) {
1446 			acid = 0;
1447 		}
1448 		if (apic_cpu_in_range(acid) &&
1449 		    (apic_cpus[acid].aci_status & flag)) {
1450 			break;
1451 		}
1452 	}
1453 
1454 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1455 	return (acid);
1456 }
1457 
1458 /*
1459  * Switch between safe and x2APIC IPI sending method.
1460  * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
1461  * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
1462  * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
1463  * apic_common_send_ipi, which detects current local APIC mode and use right
1464  * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
1465  * won't return to zero, so apic_common_send_ipi will always be used.
1466  * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
1467  * failed to start up because those failed CPUs may recover itself later at
1468  * unpredictable time.
1469  */
1470 void
1471 apic_switch_ipi_callback(boolean_t enter)
1472 {
1473 	ulong_t iflag;
1474 	struct psm_ops *pops = psmops;
1475 
1476 	iflag = intr_clear();
1477 	lock_set(&apic_mode_switch_lock);
1478 	if (enter) {
1479 		ASSERT(apic_poweron_cnt >= 0);
1480 		if (apic_poweron_cnt == 0) {
1481 			pops->psm_send_ipi = apic_common_send_ipi;
1482 			send_dirintf = pops->psm_send_ipi;
1483 		}
1484 		apic_poweron_cnt++;
1485 	} else {
1486 		ASSERT(apic_poweron_cnt > 0);
1487 		apic_poweron_cnt--;
1488 		if (apic_poweron_cnt == 0) {
1489 			pops->psm_send_ipi = x2apic_send_ipi;
1490 			send_dirintf = pops->psm_send_ipi;
1491 		}
1492 	}
1493 	lock_clear(&apic_mode_switch_lock);
1494 	intr_restore(iflag);
1495 }
1496 
1497 void
1498 apic_intrmap_init(int apic_mode)
1499 {
1500 	int suppress_brdcst_eoi = 0;
1501 
1502 	/*
1503 	 * Intel Software Developer's Manual 3A, 10.12.7:
1504 	 *
1505 	 * Routing of device interrupts to local APIC units operating in
1506 	 * x2APIC mode requires use of the interrupt-remapping architecture
1507 	 * specified in the Intel Virtualization Technology for Directed
1508 	 * I/O, Revision 1.3.  Because of this, BIOS must enumerate support
1509 	 * for and software must enable this interrupt remapping with
1510 	 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1511 	 * the local APIC units.
1512 	 *
1513 	 *
1514 	 * In other words, to use the APIC in x2APIC mode, we need interrupt
1515 	 * remapping.  Since we don't start up the IOMMU by default, we
1516 	 * won't be able to do any interrupt remapping and therefore have to
1517 	 * use the APIC in traditional 'local APIC' mode with memory mapped
1518 	 * I/O.
1519 	 */
1520 
1521 	if (psm_vt_ops != NULL) {
1522 		if (((apic_intrmap_ops_t *)psm_vt_ops)->
1523 		    apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1524 
1525 			apic_vt_ops = psm_vt_ops;
1526 
1527 			/*
1528 			 * We leverage the interrupt remapping engine to
1529 			 * suppress broadcast EOI; thus we must send the
1530 			 * directed EOI with the directed-EOI handler.
1531 			 */
1532 			if (apic_directed_EOI_supported() == 0) {
1533 				suppress_brdcst_eoi = 1;
1534 			}
1535 
1536 			apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1537 
1538 			if (apic_detect_x2apic()) {
1539 				apic_enable_x2apic();
1540 			}
1541 
1542 			if (apic_directed_EOI_supported() == 0) {
1543 				apic_set_directed_EOI_handler();
1544 			}
1545 		}
1546 	}
1547 }
1548 
1549 /*ARGSUSED*/
1550 static void
1551 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1552 {
1553 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1554 }
1555 
1556 /*ARGSUSED*/
1557 static void
1558 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1559 {
1560 	mregs->mr_addr = MSI_ADDR_HDR |
1561 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1562 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1563 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1564 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1565 	    mregs->mr_data;
1566 }
1567 
1568 /*
1569  * Functions from apic_introp.c
1570  *
1571  * Those functions are used by apic_intr_ops().
1572  */
1573 
1574 /*
1575  * MSI support flag:
1576  * reflects whether MSI is supported at APIC level
1577  * it can also be patched through /etc/system
1578  *
1579  *  0 = default value - don't know and need to call apic_check_msi_support()
1580  *      to find out then set it accordingly
1581  *  1 = supported
1582  * -1 = not supported
1583  */
1584 int	apic_support_msi = 0;
1585 
1586 /* Multiple vector support for MSI-X */
1587 int	apic_msix_enable = 1;
1588 
1589 /* Multiple vector support for MSI */
1590 int	apic_multi_msi_enable = 1;
1591 
1592 /*
1593  * check whether the system supports MSI
1594  *
1595  * If PCI-E capability is found, then this must be a PCI-E system.
1596  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
1597  * to indicate this system supports MSI.
1598  */
1599 int
1600 apic_check_msi_support()
1601 {
1602 	dev_info_t *cdip;
1603 	char dev_type[16];
1604 	int dev_len;
1605 
1606 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1607 
1608 	/*
1609 	 * check whether the first level children of root_node have
1610 	 * PCI-E capability
1611 	 */
1612 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1613 	    cdip = ddi_get_next_sibling(cdip)) {
1614 
1615 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1616 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1617 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
1618 		    ddi_node_name(cdip)));
1619 		dev_len = sizeof (dev_type);
1620 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1621 		    "device_type", (caddr_t)dev_type, &dev_len)
1622 		    != DDI_PROP_SUCCESS)
1623 			continue;
1624 		if (strcmp(dev_type, "pciex") == 0)
1625 			return (PSM_SUCCESS);
1626 	}
1627 
1628 	/* MSI is not supported on this system */
1629 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1630 	    "device_type found\n"));
1631 	return (PSM_FAILURE);
1632 }
1633 
1634 /*
1635  * apic_pci_msi_unconfigure:
1636  *
1637  * This and next two interfaces are copied from pci_intr_lib.c
1638  * Do ensure that these two files stay in sync.
1639  * These needed to be copied over here to avoid a deadlock situation on
1640  * certain mp systems that use MSI interrupts.
1641  *
1642  * IMPORTANT regards next three interfaces:
1643  * i) are called only for MSI/X interrupts.
1644  * ii) called with interrupts disabled, and must not block
1645  */
1646 void
1647 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1648 {
1649 	ushort_t		msi_ctrl;
1650 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1651 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1652 
1653 	ASSERT((handle != NULL) && (cap_ptr != 0));
1654 
1655 	if (type == DDI_INTR_TYPE_MSI) {
1656 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1657 		msi_ctrl &= (~PCI_MSI_MME_MASK);
1658 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1659 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1660 
1661 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1662 			pci_config_put16(handle,
1663 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
1664 			pci_config_put32(handle,
1665 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1666 		} else {
1667 			pci_config_put16(handle,
1668 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
1669 		}
1670 
1671 	} else if (type == DDI_INTR_TYPE_MSIX) {
1672 		uintptr_t	off;
1673 		uint32_t	mask;
1674 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
1675 
1676 		ASSERT(msix_p != NULL);
1677 
1678 		/* Offset into "inum"th entry in the MSI-X table & mask it */
1679 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1680 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1681 
1682 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1683 
1684 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1685 
1686 		/* Offset into the "inum"th entry in the MSI-X table */
1687 		off = (uintptr_t)msix_p->msix_tbl_addr +
1688 		    (inum * PCI_MSIX_VECTOR_SIZE);
1689 
1690 		/* Reset the "data" and "addr" bits */
1691 		ddi_put32(msix_p->msix_tbl_hdl,
1692 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1693 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1694 	}
1695 }
1696 
1697 /*
1698  * apic_pci_msi_disable_mode:
1699  */
1700 void
1701 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1702 {
1703 	ushort_t		msi_ctrl;
1704 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1705 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1706 
1707 	ASSERT((handle != NULL) && (cap_ptr != 0));
1708 
1709 	if (type == DDI_INTR_TYPE_MSI) {
1710 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1711 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1712 			return;
1713 
1714 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
1715 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1716 
1717 	} else if (type == DDI_INTR_TYPE_MSIX) {
1718 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1719 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1720 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1721 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1722 			    msi_ctrl);
1723 		}
1724 	}
1725 }
1726 
1727 uint32_t
1728 apic_get_localapicid(uint32_t cpuid)
1729 {
1730 	ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1731 
1732 	return (apic_cpus[cpuid].aci_local_id);
1733 }
1734 
1735 uchar_t
1736 apic_get_ioapicid(uchar_t ioapicindex)
1737 {
1738 	ASSERT(ioapicindex < MAX_IO_APIC);
1739 
1740 	return (apic_io_id[ioapicindex]);
1741 }
1742