xref: /titanic_41/usr/src/uts/i86pc/io/pcplusmp/apic_common.c (revision 5df82708d5dd3f4214863e7d3ce5a0ba6d0da2bf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
28  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
29  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
30  * PSMI 1.5 extensions are supported in Solaris Nevada.
31  * PSMI 1.6 extensions are supported in Solaris Nevada.
32  * PSMI 1.7 extensions are supported in Solaris Nevada.
33  */
34 #define	PSMI_1_7
35 
36 #include <sys/processor.h>
37 #include <sys/time.h>
38 #include <sys/psm.h>
39 #include <sys/smp_impldefs.h>
40 #include <sys/cram.h>
41 #include <sys/acpi/acpi.h>
42 #include <sys/acpica.h>
43 #include <sys/psm_common.h>
44 #include <sys/apic.h>
45 #include <sys/pit.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48 #include <sys/ddi_impldefs.h>
49 #include <sys/pci.h>
50 #include <sys/promif.h>
51 #include <sys/x86_archext.h>
52 #include <sys/cpc_impl.h>
53 #include <sys/uadmin.h>
54 #include <sys/panic.h>
55 #include <sys/debug.h>
56 #include <sys/archsystm.h>
57 #include <sys/trap.h>
58 #include <sys/machsystm.h>
59 #include <sys/sysmacros.h>
60 #include <sys/cpuvar.h>
61 #include <sys/rm_platter.h>
62 #include <sys/privregs.h>
63 #include <sys/note.h>
64 #include <sys/pci_intr_lib.h>
65 #include <sys/spl.h>
66 #include <sys/clock.h>
67 #include <sys/dditypes.h>
68 #include <sys/sunddi.h>
69 #include <sys/x_call.h>
70 #include <sys/reboot.h>
71 #include <sys/hpet.h>
72 #include <sys/apic_common.h>
73 #include <sys/apic_timer.h>
74 
75 static void	apic_record_ioapic_rdt(void *intrmap_private,
76 		    ioapic_rdt_t *irdt);
77 static void	apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
78 
79 /*
80  * Common routines between pcplusmp & apix (taken from apic.c).
81  */
82 
83 int	apic_clkinit(int);
84 hrtime_t apic_gethrtime(void);
85 void	apic_send_ipi(int, int);
86 void	apic_set_idlecpu(processorid_t);
87 void	apic_unset_idlecpu(processorid_t);
88 void	apic_shutdown(int, int);
89 void	apic_preshutdown(int, int);
90 processorid_t	apic_get_next_processorid(processorid_t);
91 
92 hrtime_t apic_gettime();
93 
94 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
95 
96 /* Now the ones for Dynamic Interrupt distribution */
97 int	apic_enable_dynamic_migration = 0;
98 
99 /* maximum loop count when sending Start IPIs. */
100 int apic_sipi_max_loop_count = 0x1000;
101 
102 /*
103  * These variables are frequently accessed in apic_intr_enter(),
104  * apic_intr_exit and apic_setspl, so group them together
105  */
106 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
107 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
108 int apic_clkvect;
109 
110 /* vector at which error interrupts come in */
111 int apic_errvect;
112 int apic_enable_error_intr = 1;
113 int apic_error_display_delay = 100;
114 
115 /* vector at which performance counter overflow interrupts come in */
116 int apic_cpcovf_vect;
117 int apic_enable_cpcovf_intr = 1;
118 
119 /* vector at which CMCI interrupts come in */
120 int apic_cmci_vect;
121 extern int cmi_enable_cmci;
122 extern void cmi_cmci_trap(void);
123 
124 kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
125 int cmci_cpu_setup_registered;
126 
127 /* number of CPUs in power-on transition state */
128 static int apic_poweron_cnt = 0;
129 lock_t apic_mode_switch_lock;
130 
131 /*
132  * Patchable global variables.
133  */
134 int	apic_forceload = 0;
135 
136 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
137 
138 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
139 int	apic_panic_on_nmi = 0;
140 int	apic_panic_on_apic_error = 0;
141 
142 int	apic_verbose = 0;	/* 0x1ff */
143 
144 #ifdef DEBUG
145 int	apic_debug = 0;
146 int	apic_restrict_vector = 0;
147 
148 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
149 int	apic_debug_msgbufindex = 0;
150 
151 #endif /* DEBUG */
152 
153 uint_t apic_nticks = 0;
154 uint_t apic_skipped_redistribute = 0;
155 
156 uint_t last_count_read = 0;
157 lock_t	apic_gethrtime_lock;
158 volatile int	apic_hrtime_stamp = 0;
159 volatile hrtime_t apic_nsec_since_boot = 0;
160 
161 static	hrtime_t	apic_last_hrtime = 0;
162 int		apic_hrtime_error = 0;
163 int		apic_remote_hrterr = 0;
164 int		apic_num_nmis = 0;
165 int		apic_apic_error = 0;
166 int		apic_num_apic_errors = 0;
167 int		apic_num_cksum_errors = 0;
168 
169 int	apic_error = 0;
170 
171 static	int	apic_cmos_ssb_set = 0;
172 
173 /* use to make sure only one cpu handles the nmi */
174 lock_t	apic_nmi_lock;
175 /* use to make sure only one cpu handles the error interrupt */
176 lock_t	apic_error_lock;
177 
178 static	struct {
179 	uchar_t	cntl;
180 	uchar_t	data;
181 } aspen_bmc[] = {
182 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
183 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
184 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
185 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
186 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
187 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
188 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
189 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
190 
191 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
192 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
193 };
194 
195 static	struct {
196 	int	port;
197 	uchar_t	data;
198 } sitka_bmc[] = {
199 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
200 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
201 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
202 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
203 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
204 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
205 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
206 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
207 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
208 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
209 
210 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
211 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
212 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
213 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
214 };
215 
216 /* Patchable global variables. */
217 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
218 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
219 
220 /* default apic ops without interrupt remapping */
221 static apic_intrmap_ops_t apic_nointrmap_ops = {
222 	(int (*)(int))return_instr,
223 	(void (*)(int))return_instr,
224 	(void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
225 	(void (*)(void *, void *, uint16_t, int))return_instr,
226 	(void (*)(void **))return_instr,
227 	apic_record_ioapic_rdt,
228 	apic_record_msi,
229 };
230 
231 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
232 apic_cpus_info_t	*apic_cpus = NULL;
233 cpuset_t	apic_cpumask;
234 uint_t		apic_picinit_called;
235 
236 /* Flag to indicate that we need to shut down all processors */
237 static uint_t	apic_shutdown_processors;
238 
239 /*
240  * Probe the ioapic method for apix module. Called in apic_probe_common()
241  */
242 int
243 apic_ioapic_method_probe()
244 {
245 	if (apix_enable == 0)
246 		return (PSM_SUCCESS);
247 
248 	/*
249 	 * Set IOAPIC EOI handling method. The priority from low to high is:
250 	 * 	1. IOxAPIC: with EOI register
251 	 * 	2. IOMMU interrupt mapping
252 	 *	3. Mask-Before-EOI method for systems without boot
253 	 *	interrupt routing, such as systems with only one IOAPIC;
254 	 *	NVIDIA CK8-04/MCP55 systems; systems with bridge solution
255 	 *	which disables the boot interrupt routing already.
256 	 * 	4. Directed EOI
257 	 */
258 	if (apic_io_ver[0] >= 0x20)
259 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
260 	if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
261 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
262 	if (apic_directed_EOI_supported())
263 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
264 
265 	/* fall back to pcplusmp */
266 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
267 		/* make sure apix is after pcplusmp in /etc/mach */
268 		apix_enable = 0; /* go ahead with pcplusmp install next */
269 		return (PSM_FAILURE);
270 	}
271 
272 	return (PSM_SUCCESS);
273 }
274 
275 /*
276  * handler for APIC Error interrupt. Just print a warning and continue
277  */
278 int
279 apic_error_intr()
280 {
281 	uint_t	error0, error1, error;
282 	uint_t	i;
283 
284 	/*
285 	 * We need to write before read as per 7.4.17 of system prog manual.
286 	 * We do both and or the results to be safe
287 	 */
288 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
289 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
290 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
291 	error = error0 | error1;
292 
293 	/*
294 	 * Clear the APIC error status (do this on all cpus that enter here)
295 	 * (two writes are required due to the semantics of accessing the
296 	 * error status register.)
297 	 */
298 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
299 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
300 
301 	/*
302 	 * Prevent more than 1 CPU from handling error interrupt causing
303 	 * double printing (interleave of characters from multiple
304 	 * CPU's when using prom_printf)
305 	 */
306 	if (lock_try(&apic_error_lock) == 0)
307 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
308 	if (error) {
309 #if	DEBUG
310 		if (apic_debug)
311 			debug_enter("pcplusmp: APIC Error interrupt received");
312 #endif /* DEBUG */
313 		if (apic_panic_on_apic_error)
314 			cmn_err(CE_PANIC,
315 			    "APIC Error interrupt on CPU %d. Status = %x",
316 			    psm_get_cpu_id(), error);
317 		else {
318 			if ((error & ~APIC_CS_ERRORS) == 0) {
319 				/* cksum error only */
320 				apic_error |= APIC_ERR_APIC_ERROR;
321 				apic_apic_error |= error;
322 				apic_num_apic_errors++;
323 				apic_num_cksum_errors++;
324 			} else {
325 				/*
326 				 * prom_printf is the best shot we have of
327 				 * something which is problem free from
328 				 * high level/NMI type of interrupts
329 				 */
330 				prom_printf("APIC Error interrupt on CPU %d. "
331 				    "Status 0 = %x, Status 1 = %x\n",
332 				    psm_get_cpu_id(), error0, error1);
333 				apic_error |= APIC_ERR_APIC_ERROR;
334 				apic_apic_error |= error;
335 				apic_num_apic_errors++;
336 				for (i = 0; i < apic_error_display_delay; i++) {
337 					tenmicrosec();
338 				}
339 				/*
340 				 * provide more delay next time limited to
341 				 * roughly 1 clock tick time
342 				 */
343 				if (apic_error_display_delay < 500)
344 					apic_error_display_delay *= 2;
345 			}
346 		}
347 		lock_clear(&apic_error_lock);
348 		return (DDI_INTR_CLAIMED);
349 	} else {
350 		lock_clear(&apic_error_lock);
351 		return (DDI_INTR_UNCLAIMED);
352 	}
353 }
354 
355 /*
356  * Turn off the mask bit in the performance counter Local Vector Table entry.
357  */
358 void
359 apic_cpcovf_mask_clear(void)
360 {
361 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
362 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
363 }
364 
365 /*ARGSUSED*/
366 static int
367 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
368 {
369 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
370 	return (0);
371 }
372 
373 /*ARGSUSED*/
374 static int
375 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
376 {
377 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
378 	return (0);
379 }
380 
381 /*ARGSUSED*/
382 int
383 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg)
384 {
385 	cpuset_t	cpu_set;
386 
387 	CPUSET_ONLY(cpu_set, cpuid);
388 
389 	switch (what) {
390 		case CPU_ON:
391 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
392 			    (xc_func_t)apic_cmci_enable);
393 			break;
394 
395 		case CPU_OFF:
396 			xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
397 			    (xc_func_t)apic_cmci_disable);
398 			break;
399 
400 		default:
401 			break;
402 	}
403 
404 	return (0);
405 }
406 
407 static void
408 apic_disable_local_apic(void)
409 {
410 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
411 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
412 
413 	/* local intr reg 0 */
414 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
415 
416 	/* disable NMI */
417 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
418 
419 	/* and error interrupt */
420 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
421 
422 	/* and perf counter intr */
423 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
424 
425 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
426 }
427 
428 static void
429 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
430 {
431 	int		loop_count;
432 	uint32_t	vector;
433 	uint_t		apicid;
434 	ulong_t		iflag;
435 
436 	apicid =  apic_cpus[cpun].aci_local_id;
437 
438 	/*
439 	 * Interrupts on current CPU will be disabled during the
440 	 * steps in order to avoid unwanted side effects from
441 	 * executing interrupt handlers on a problematic BIOS.
442 	 */
443 	iflag = intr_clear();
444 
445 	if (start) {
446 		outb(CMOS_ADDR, SSB);
447 		outb(CMOS_DATA, BIOS_SHUTDOWN);
448 	}
449 
450 	/*
451 	 * According to X2APIC specification in section '2.3.5.1' of
452 	 * Interrupt Command Register Semantics, the semantics of
453 	 * programming the Interrupt Command Register to dispatch an interrupt
454 	 * is simplified. A single MSR write to the 64-bit ICR is required
455 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
456 	 * interface to ICR, system software is not required to check the
457 	 * status of the delivery status bit prior to writing to the ICR
458 	 * to send an IPI. With the removal of the Delivery Status bit,
459 	 * system software no longer has a reason to read the ICR. It remains
460 	 * readable only to aid in debugging.
461 	 */
462 #ifdef	DEBUG
463 	APIC_AV_PENDING_SET();
464 #else
465 	if (apic_mode == LOCAL_APIC) {
466 		APIC_AV_PENDING_SET();
467 	}
468 #endif /* DEBUG */
469 
470 	/* for integrated - make sure there is one INIT IPI in buffer */
471 	/* for external - it will wake up the cpu */
472 	apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
473 
474 	/* If only 1 CPU is installed, PENDING bit will not go low */
475 	for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
476 		if (apic_mode == LOCAL_APIC &&
477 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
478 			apic_ret();
479 		else
480 			break;
481 	}
482 
483 	apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
484 	drv_usecwait(20000);		/* 20 milli sec */
485 
486 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
487 		/* integrated apic */
488 
489 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
490 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
491 
492 		/* to offset the INIT IPI queue up in the buffer */
493 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
494 		drv_usecwait(200);		/* 20 micro sec */
495 
496 		/*
497 		 * send the second SIPI (Startup IPI) as recommended by Intel
498 		 * software development manual.
499 		 */
500 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
501 		drv_usecwait(200);	/* 20 micro sec */
502 	}
503 
504 	intr_restore(iflag);
505 }
506 
507 /*ARGSUSED1*/
508 int
509 apic_cpu_start(processorid_t cpun, caddr_t arg)
510 {
511 	ASSERT(MUTEX_HELD(&cpu_lock));
512 
513 	if (!apic_cpu_in_range(cpun)) {
514 		return (EINVAL);
515 	}
516 
517 	/*
518 	 * Switch to apic_common_send_ipi for safety during starting other CPUs.
519 	 */
520 	if (apic_mode == LOCAL_X2APIC) {
521 		apic_switch_ipi_callback(B_TRUE);
522 	}
523 
524 	apic_cmos_ssb_set = 1;
525 	apic_cpu_send_SIPI(cpun, B_TRUE);
526 
527 	return (0);
528 }
529 
530 /*
531  * Put CPU into halted state with interrupts disabled.
532  */
533 /*ARGSUSED1*/
534 int
535 apic_cpu_stop(processorid_t cpun, caddr_t arg)
536 {
537 	int		rc;
538 	cpu_t 		*cp;
539 	extern cpuset_t cpu_ready_set;
540 	extern void cpu_idle_intercept_cpu(cpu_t *cp);
541 
542 	ASSERT(MUTEX_HELD(&cpu_lock));
543 
544 	if (!apic_cpu_in_range(cpun)) {
545 		return (EINVAL);
546 	}
547 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
548 		return (ENOTSUP);
549 	}
550 
551 	cp = cpu_get(cpun);
552 	ASSERT(cp != NULL);
553 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
554 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
555 	ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
556 
557 	/* Clear CPU_READY flag to disable cross calls. */
558 	cp->cpu_flags &= ~CPU_READY;
559 	CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
560 	rc = xc_flush_cpu(cp);
561 	if (rc != 0) {
562 		CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
563 		cp->cpu_flags |= CPU_READY;
564 		return (rc);
565 	}
566 
567 	/* Intercept target CPU at a safe point before powering it off. */
568 	cpu_idle_intercept_cpu(cp);
569 
570 	apic_cpu_send_SIPI(cpun, B_FALSE);
571 	cp->cpu_flags &= ~CPU_RUNNING;
572 
573 	return (0);
574 }
575 
576 int
577 apic_cpu_ops(psm_cpu_request_t *reqp)
578 {
579 	if (reqp == NULL) {
580 		return (EINVAL);
581 	}
582 
583 	switch (reqp->pcr_cmd) {
584 	case PSM_CPU_ADD:
585 		return (apic_cpu_add(reqp));
586 
587 	case PSM_CPU_REMOVE:
588 		return (apic_cpu_remove(reqp));
589 
590 	case PSM_CPU_STOP:
591 		return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
592 		    reqp->req.cpu_stop.ctx));
593 
594 	default:
595 		return (ENOTSUP);
596 	}
597 }
598 
599 #ifdef	DEBUG
600 int	apic_break_on_cpu = 9;
601 int	apic_stretch_interrupts = 0;
602 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
603 #endif /* DEBUG */
604 
605 /*
606  * generates an interprocessor interrupt to another CPU. Any changes made to
607  * this routine must be accompanied by similar changes to
608  * apic_common_send_ipi().
609  */
610 void
611 apic_send_ipi(int cpun, int ipl)
612 {
613 	int vector;
614 	ulong_t flag;
615 
616 	vector = apic_resv_vector[ipl];
617 
618 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
619 
620 	flag = intr_clear();
621 
622 	APIC_AV_PENDING_SET();
623 
624 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
625 	    vector);
626 
627 	intr_restore(flag);
628 }
629 
630 
631 /*ARGSUSED*/
632 void
633 apic_set_idlecpu(processorid_t cpun)
634 {
635 }
636 
637 /*ARGSUSED*/
638 void
639 apic_unset_idlecpu(processorid_t cpun)
640 {
641 }
642 
643 
644 void
645 apic_ret()
646 {
647 }
648 
649 /*
650  * If apic_coarse_time == 1, then apic_gettime() is used instead of
651  * apic_gethrtime().  This is used for performance instead of accuracy.
652  */
653 
654 hrtime_t
655 apic_gettime()
656 {
657 	int old_hrtime_stamp;
658 	hrtime_t temp;
659 
660 	/*
661 	 * In one-shot mode, we do not keep time, so if anyone
662 	 * calls psm_gettime() directly, we vector over to
663 	 * gethrtime().
664 	 * one-shot mode MUST NOT be enabled if this psm is the source of
665 	 * hrtime.
666 	 */
667 
668 	if (apic_oneshot)
669 		return (gethrtime());
670 
671 
672 gettime_again:
673 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
674 		apic_ret();
675 
676 	temp = apic_nsec_since_boot;
677 
678 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
679 		goto gettime_again;
680 	}
681 	return (temp);
682 }
683 
684 /*
685  * Here we return the number of nanoseconds since booting.  Note every
686  * clock interrupt increments apic_nsec_since_boot by the appropriate
687  * amount.
688  */
689 hrtime_t
690 apic_gethrtime(void)
691 {
692 	int curr_timeval, countval, elapsed_ticks;
693 	int old_hrtime_stamp, status;
694 	hrtime_t temp;
695 	uint32_t cpun;
696 	ulong_t oflags;
697 
698 	/*
699 	 * In one-shot mode, we do not keep time, so if anyone
700 	 * calls psm_gethrtime() directly, we vector over to
701 	 * gethrtime().
702 	 * one-shot mode MUST NOT be enabled if this psm is the source of
703 	 * hrtime.
704 	 */
705 
706 	if (apic_oneshot)
707 		return (gethrtime());
708 
709 	oflags = intr_clear();	/* prevent migration */
710 
711 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
712 	if (apic_mode == LOCAL_APIC)
713 		cpun >>= APIC_ID_BIT_OFFSET;
714 
715 	lock_set(&apic_gethrtime_lock);
716 
717 gethrtime_again:
718 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
719 		apic_ret();
720 
721 	/*
722 	 * Check to see which CPU we are on.  Note the time is kept on
723 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
724 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
725 	 */
726 	if (cpun == apic_cpus[0].aci_local_id) {
727 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
728 	} else {
729 #ifdef	DEBUG
730 		APIC_AV_PENDING_SET();
731 #else
732 		if (apic_mode == LOCAL_APIC)
733 			APIC_AV_PENDING_SET();
734 #endif /* DEBUG */
735 
736 		apic_reg_ops->apic_write_int_cmd(
737 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
738 
739 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
740 		    & AV_READ_PENDING) {
741 			apic_ret();
742 		}
743 
744 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
745 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
746 		else {	/* 0 = invalid */
747 			apic_remote_hrterr++;
748 			/*
749 			 * return last hrtime right now, will need more
750 			 * testing if change to retry
751 			 */
752 			temp = apic_last_hrtime;
753 
754 			lock_clear(&apic_gethrtime_lock);
755 
756 			intr_restore(oflags);
757 
758 			return (temp);
759 		}
760 	}
761 	if (countval > last_count_read)
762 		countval = 0;
763 	else
764 		last_count_read = countval;
765 
766 	elapsed_ticks = apic_hertz_count - countval;
767 
768 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
769 	temp = apic_nsec_since_boot + curr_timeval;
770 
771 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
772 		/* we might have clobbered last_count_read. Restore it */
773 		last_count_read = apic_hertz_count;
774 		goto gethrtime_again;
775 	}
776 
777 	if (temp < apic_last_hrtime) {
778 		/* return last hrtime if error occurs */
779 		apic_hrtime_error++;
780 		temp = apic_last_hrtime;
781 	}
782 	else
783 		apic_last_hrtime = temp;
784 
785 	lock_clear(&apic_gethrtime_lock);
786 	intr_restore(oflags);
787 
788 	return (temp);
789 }
790 
791 /* apic NMI handler */
792 /*ARGSUSED*/
793 void
794 apic_nmi_intr(caddr_t arg, struct regs *rp)
795 {
796 	if (apic_shutdown_processors) {
797 		apic_disable_local_apic();
798 		return;
799 	}
800 
801 	apic_error |= APIC_ERR_NMI;
802 
803 	if (!lock_try(&apic_nmi_lock))
804 		return;
805 	apic_num_nmis++;
806 
807 	if (apic_kmdb_on_nmi && psm_debugger()) {
808 		debug_enter("NMI received: entering kmdb\n");
809 	} else if (apic_panic_on_nmi) {
810 		/* Keep panic from entering kmdb. */
811 		nopanicdebug = 1;
812 		panic("NMI received\n");
813 	} else {
814 		/*
815 		 * prom_printf is the best shot we have of something which is
816 		 * problem free from high level/NMI type of interrupts
817 		 */
818 		prom_printf("NMI received\n");
819 	}
820 
821 	lock_clear(&apic_nmi_lock);
822 }
823 
824 processorid_t
825 apic_get_next_processorid(processorid_t cpu_id)
826 {
827 
828 	int i;
829 
830 	if (cpu_id == -1)
831 		return ((processorid_t)0);
832 
833 	for (i = cpu_id + 1; i < NCPU; i++) {
834 		if (apic_cpu_in_range(i))
835 			return (i);
836 	}
837 
838 	return ((processorid_t)-1);
839 }
840 
841 int
842 apic_cpu_add(psm_cpu_request_t *reqp)
843 {
844 	int i, rv = 0;
845 	ulong_t iflag;
846 	boolean_t first = B_TRUE;
847 	uchar_t localver;
848 	uint32_t localid, procid;
849 	processorid_t cpuid = (processorid_t)-1;
850 	mach_cpu_add_arg_t *ap;
851 
852 	ASSERT(reqp != NULL);
853 	reqp->req.cpu_add.cpuid = (processorid_t)-1;
854 
855 	/* Check whether CPU hotplug is supported. */
856 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
857 		return (ENOTSUP);
858 	}
859 
860 	ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
861 	switch (ap->type) {
862 	case MACH_CPU_ARG_LOCAL_APIC:
863 		localid = ap->arg.apic.apic_id;
864 		procid = ap->arg.apic.proc_id;
865 		if (localid >= 255 || procid > 255) {
866 			cmn_err(CE_WARN,
867 			    "!apic: apicid(%u) or procid(%u) is invalid.",
868 			    localid, procid);
869 			return (EINVAL);
870 		}
871 		break;
872 
873 	case MACH_CPU_ARG_LOCAL_X2APIC:
874 		localid = ap->arg.apic.apic_id;
875 		procid = ap->arg.apic.proc_id;
876 		if (localid >= UINT32_MAX) {
877 			cmn_err(CE_WARN,
878 			    "!apic: x2apicid(%u) is invalid.", localid);
879 			return (EINVAL);
880 		} else if (localid >= 255 && apic_mode == LOCAL_APIC) {
881 			cmn_err(CE_WARN, "!apic: system is in APIC mode, "
882 			    "can't support x2APIC processor.");
883 			return (ENOTSUP);
884 		}
885 		break;
886 
887 	default:
888 		cmn_err(CE_WARN,
889 		    "!apic: unknown argument type %d to apic_cpu_add().",
890 		    ap->type);
891 		return (EINVAL);
892 	}
893 
894 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
895 	iflag = intr_clear();
896 	lock_set(&apic_ioapic_lock);
897 
898 	/* Check whether local APIC id already exists. */
899 	for (i = 0; i < apic_nproc; i++) {
900 		if (!CPU_IN_SET(apic_cpumask, i))
901 			continue;
902 		if (apic_cpus[i].aci_local_id == localid) {
903 			lock_clear(&apic_ioapic_lock);
904 			intr_restore(iflag);
905 			cmn_err(CE_WARN,
906 			    "!apic: local apic id %u already exists.",
907 			    localid);
908 			return (EEXIST);
909 		} else if (apic_cpus[i].aci_processor_id == procid) {
910 			lock_clear(&apic_ioapic_lock);
911 			intr_restore(iflag);
912 			cmn_err(CE_WARN,
913 			    "!apic: processor id %u already exists.",
914 			    (int)procid);
915 			return (EEXIST);
916 		}
917 
918 		/*
919 		 * There's no local APIC version number available in MADT table,
920 		 * so assume that all CPUs are homogeneous and use local APIC
921 		 * version number of the first existing CPU.
922 		 */
923 		if (first) {
924 			first = B_FALSE;
925 			localver = apic_cpus[i].aci_local_ver;
926 		}
927 	}
928 	ASSERT(first == B_FALSE);
929 
930 	/*
931 	 * Try to assign the same cpuid if APIC id exists in the dirty cache.
932 	 */
933 	for (i = 0; i < apic_max_nproc; i++) {
934 		if (CPU_IN_SET(apic_cpumask, i)) {
935 			ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
936 			continue;
937 		}
938 		ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
939 		if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
940 		    apic_cpus[i].aci_local_id == localid &&
941 		    apic_cpus[i].aci_processor_id == procid) {
942 			cpuid = i;
943 			break;
944 		}
945 	}
946 
947 	/* Avoid the dirty cache and allocate fresh slot if possible. */
948 	if (cpuid == (processorid_t)-1) {
949 		for (i = 0; i < apic_max_nproc; i++) {
950 			if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
951 			    (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
952 				cpuid = i;
953 				break;
954 			}
955 		}
956 	}
957 
958 	/* Try to find any free slot as last resort. */
959 	if (cpuid == (processorid_t)-1) {
960 		for (i = 0; i < apic_max_nproc; i++) {
961 			if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
962 				cpuid = i;
963 				break;
964 			}
965 		}
966 	}
967 
968 	if (cpuid == (processorid_t)-1) {
969 		lock_clear(&apic_ioapic_lock);
970 		intr_restore(iflag);
971 		cmn_err(CE_NOTE,
972 		    "!apic: failed to allocate cpu id for processor %u.",
973 		    procid);
974 		rv = EAGAIN;
975 	} else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
976 		lock_clear(&apic_ioapic_lock);
977 		intr_restore(iflag);
978 		cmn_err(CE_NOTE,
979 		    "!apic: failed to build mapping for processor %u.",
980 		    procid);
981 		rv = EBUSY;
982 	} else {
983 		ASSERT(cpuid >= 0 && cpuid < NCPU);
984 		ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
985 		bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
986 		apic_cpus[cpuid].aci_processor_id = procid;
987 		apic_cpus[cpuid].aci_local_id = localid;
988 		apic_cpus[cpuid].aci_local_ver = localver;
989 		CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
990 		if (cpuid >= apic_nproc) {
991 			apic_nproc = cpuid + 1;
992 		}
993 		lock_clear(&apic_ioapic_lock);
994 		intr_restore(iflag);
995 		reqp->req.cpu_add.cpuid = cpuid;
996 	}
997 
998 	return (rv);
999 }
1000 
1001 int
1002 apic_cpu_remove(psm_cpu_request_t *reqp)
1003 {
1004 	int i;
1005 	ulong_t iflag;
1006 	processorid_t cpuid;
1007 
1008 	/* Check whether CPU hotplug is supported. */
1009 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1010 		return (ENOTSUP);
1011 	}
1012 
1013 	cpuid = reqp->req.cpu_remove.cpuid;
1014 
1015 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1016 	iflag = intr_clear();
1017 	lock_set(&apic_ioapic_lock);
1018 
1019 	if (!apic_cpu_in_range(cpuid)) {
1020 		lock_clear(&apic_ioapic_lock);
1021 		intr_restore(iflag);
1022 		cmn_err(CE_WARN,
1023 		    "!apic: cpuid %d doesn't exist in apic_cpus array.",
1024 		    cpuid);
1025 		return (ENODEV);
1026 	}
1027 	ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1028 
1029 	if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1030 		lock_clear(&apic_ioapic_lock);
1031 		intr_restore(iflag);
1032 		return (ENOENT);
1033 	}
1034 
1035 	if (cpuid == apic_nproc - 1) {
1036 		/*
1037 		 * We are removing the highest numbered cpuid so we need to
1038 		 * find the next highest cpuid as the new value for apic_nproc.
1039 		 */
1040 		for (i = apic_nproc; i > 0; i--) {
1041 			if (CPU_IN_SET(apic_cpumask, i - 1)) {
1042 				apic_nproc = i;
1043 				break;
1044 			}
1045 		}
1046 		/* at least one CPU left */
1047 		ASSERT(i > 0);
1048 	}
1049 	CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1050 	/* mark slot as free and keep it in the dirty cache */
1051 	apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1052 
1053 	lock_clear(&apic_ioapic_lock);
1054 	intr_restore(iflag);
1055 
1056 	return (0);
1057 }
1058 
1059 /*
1060  * Return the number of APIC clock ticks elapsed for 8245 to decrement
1061  * (APIC_TIME_COUNT + pit_ticks_adj) ticks.
1062  */
1063 uint_t
1064 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj)
1065 {
1066 	uint8_t		pit_tick_lo;
1067 	uint16_t	pit_tick, target_pit_tick;
1068 	uint32_t	start_apic_tick, end_apic_tick;
1069 	ulong_t		iflag;
1070 	uint32_t	reg;
1071 
1072 	reg = addr + APIC_CURR_COUNT - apicadr;
1073 
1074 	iflag = intr_clear();
1075 
1076 	do {
1077 		pit_tick_lo = inb(PITCTR0_PORT);
1078 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1079 	} while (pit_tick < APIC_TIME_MIN ||
1080 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1081 
1082 	/*
1083 	 * Wait for the 8254 to decrement by 5 ticks to ensure
1084 	 * we didn't start in the middle of a tick.
1085 	 * Compare with 0x10 for the wrap around case.
1086 	 */
1087 	target_pit_tick = pit_tick - 5;
1088 	do {
1089 		pit_tick_lo = inb(PITCTR0_PORT);
1090 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1091 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1092 
1093 	start_apic_tick = apic_reg_ops->apic_read(reg);
1094 
1095 	/*
1096 	 * Wait for the 8254 to decrement by
1097 	 * (APIC_TIME_COUNT + pit_ticks_adj) ticks
1098 	 */
1099 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1100 	do {
1101 		pit_tick_lo = inb(PITCTR0_PORT);
1102 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1103 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1104 
1105 	end_apic_tick = apic_reg_ops->apic_read(reg);
1106 
1107 	*pit_ticks_adj = target_pit_tick - pit_tick;
1108 
1109 	intr_restore(iflag);
1110 
1111 	return (start_apic_tick - end_apic_tick);
1112 }
1113 
1114 /*
1115  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1116  * frequency.  Note at this stage in the boot sequence, the boot processor
1117  * is the only active processor.
1118  * hertz value of 0 indicates a one-shot mode request.  In this case
1119  * the function returns the resolution (in nanoseconds) for the hardware
1120  * timer interrupt.  If one-shot mode capability is not available,
1121  * the return value will be 0. apic_enable_oneshot is a global switch
1122  * for disabling the functionality.
1123  * A non-zero positive value for hertz indicates a periodic mode request.
1124  * In this case the hardware will be programmed to generate clock interrupts
1125  * at hertz frequency and returns the resolution of interrupts in
1126  * nanosecond.
1127  */
1128 
1129 int
1130 apic_clkinit(int hertz)
1131 {
1132 	int		ret;
1133 
1134 	apic_int_busy_mark = (apic_int_busy_mark *
1135 	    apic_sample_factor_redistribution) / 100;
1136 	apic_int_free_mark = (apic_int_free_mark *
1137 	    apic_sample_factor_redistribution) / 100;
1138 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1139 	    apic_sample_factor_redistribution) / 100;
1140 
1141 	ret = apic_timer_init(hertz);
1142 	return (ret);
1143 
1144 }
1145 
1146 /*
1147  * apic_preshutdown:
1148  * Called early in shutdown whilst we can still access filesystems to do
1149  * things like loading modules which will be required to complete shutdown
1150  * after filesystems are all unmounted.
1151  */
1152 void
1153 apic_preshutdown(int cmd, int fcn)
1154 {
1155 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1156 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1157 }
1158 
1159 void
1160 apic_shutdown(int cmd, int fcn)
1161 {
1162 	int restarts, attempts;
1163 	int i;
1164 	uchar_t	byte;
1165 	ulong_t iflag;
1166 
1167 	hpet_acpi_fini();
1168 
1169 	/* Send NMI to all CPUs except self to do per processor shutdown */
1170 	iflag = intr_clear();
1171 #ifdef	DEBUG
1172 	APIC_AV_PENDING_SET();
1173 #else
1174 	if (apic_mode == LOCAL_APIC)
1175 		APIC_AV_PENDING_SET();
1176 #endif /* DEBUG */
1177 	apic_shutdown_processors = 1;
1178 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1179 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1180 
1181 	/* restore cmos shutdown byte before reboot */
1182 	if (apic_cmos_ssb_set) {
1183 		outb(CMOS_ADDR, SSB);
1184 		outb(CMOS_DATA, 0);
1185 	}
1186 
1187 	ioapic_disable_redirection();
1188 
1189 	/*	disable apic mode if imcr present	*/
1190 	if (apic_imcrp) {
1191 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1192 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1193 	}
1194 
1195 	apic_disable_local_apic();
1196 
1197 	intr_restore(iflag);
1198 
1199 	/* remainder of function is for shutdown cases only */
1200 	if (cmd != A_SHUTDOWN)
1201 		return;
1202 
1203 	/*
1204 	 * Switch system back into Legacy-Mode if using ACPI and
1205 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1206 	 * for power-off to succeed (Dell Dimension 4600)
1207 	 * Do not disable ACPI while doing fastreboot
1208 	 */
1209 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1210 		(void) AcpiDisable();
1211 
1212 	if (fcn == AD_FASTREBOOT) {
1213 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1214 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1215 	}
1216 
1217 	/* remainder of function is for shutdown+poweroff case only */
1218 	if (fcn != AD_POWEROFF)
1219 		return;
1220 
1221 	switch (apic_poweroff_method) {
1222 		case APIC_POWEROFF_VIA_RTC:
1223 
1224 			/* select the extended NVRAM bank in the RTC */
1225 			outb(CMOS_ADDR, RTC_REGA);
1226 			byte = inb(CMOS_DATA);
1227 			outb(CMOS_DATA, (byte | EXT_BANK));
1228 
1229 			outb(CMOS_ADDR, PFR_REG);
1230 
1231 			/* for Predator must toggle the PAB bit */
1232 			byte = inb(CMOS_DATA);
1233 
1234 			/*
1235 			 * clear power active bar, wakeup alarm and
1236 			 * kickstart
1237 			 */
1238 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1239 			outb(CMOS_DATA, byte);
1240 
1241 			/* delay before next write */
1242 			drv_usecwait(1000);
1243 
1244 			/* for S40 the following would suffice */
1245 			byte = inb(CMOS_DATA);
1246 
1247 			/* power active bar control bit */
1248 			byte |= PAB_CBIT;
1249 			outb(CMOS_DATA, byte);
1250 
1251 			break;
1252 
1253 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1254 			restarts = 0;
1255 restart_aspen_bmc:
1256 			if (++restarts == 3)
1257 				break;
1258 			attempts = 0;
1259 			do {
1260 				byte = inb(MISMIC_FLAG_REGISTER);
1261 				byte &= MISMIC_BUSY_MASK;
1262 				if (byte != 0) {
1263 					drv_usecwait(1000);
1264 					if (attempts >= 3)
1265 						goto restart_aspen_bmc;
1266 					++attempts;
1267 				}
1268 			} while (byte != 0);
1269 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1270 			byte = inb(MISMIC_FLAG_REGISTER);
1271 			byte |= 0x1;
1272 			outb(MISMIC_FLAG_REGISTER, byte);
1273 			i = 0;
1274 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1275 			    i++) {
1276 				attempts = 0;
1277 				do {
1278 					byte = inb(MISMIC_FLAG_REGISTER);
1279 					byte &= MISMIC_BUSY_MASK;
1280 					if (byte != 0) {
1281 						drv_usecwait(1000);
1282 						if (attempts >= 3)
1283 							goto restart_aspen_bmc;
1284 						++attempts;
1285 					}
1286 				} while (byte != 0);
1287 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1288 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1289 				byte = inb(MISMIC_FLAG_REGISTER);
1290 				byte |= 0x1;
1291 				outb(MISMIC_FLAG_REGISTER, byte);
1292 			}
1293 			break;
1294 
1295 		case APIC_POWEROFF_VIA_SITKA_BMC:
1296 			restarts = 0;
1297 restart_sitka_bmc:
1298 			if (++restarts == 3)
1299 				break;
1300 			attempts = 0;
1301 			do {
1302 				byte = inb(SMS_STATUS_REGISTER);
1303 				byte &= SMS_STATE_MASK;
1304 				if ((byte == SMS_READ_STATE) ||
1305 				    (byte == SMS_WRITE_STATE)) {
1306 					drv_usecwait(1000);
1307 					if (attempts >= 3)
1308 						goto restart_sitka_bmc;
1309 					++attempts;
1310 				}
1311 			} while ((byte == SMS_READ_STATE) ||
1312 			    (byte == SMS_WRITE_STATE));
1313 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1314 			i = 0;
1315 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1316 			    i++) {
1317 				attempts = 0;
1318 				do {
1319 					byte = inb(SMS_STATUS_REGISTER);
1320 					byte &= SMS_IBF_MASK;
1321 					if (byte != 0) {
1322 						drv_usecwait(1000);
1323 						if (attempts >= 3)
1324 							goto restart_sitka_bmc;
1325 						++attempts;
1326 					}
1327 				} while (byte != 0);
1328 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1329 			}
1330 			break;
1331 
1332 		case APIC_POWEROFF_NONE:
1333 
1334 			/* If no APIC direct method, we will try using ACPI */
1335 			if (apic_enable_acpi) {
1336 				if (acpi_poweroff() == 1)
1337 					return;
1338 			} else
1339 				return;
1340 
1341 			break;
1342 	}
1343 	/*
1344 	 * Wait a limited time here for power to go off.
1345 	 * If the power does not go off, then there was a
1346 	 * problem and we should continue to the halt which
1347 	 * prints a message for the user to press a key to
1348 	 * reboot.
1349 	 */
1350 	drv_usecwait(7000000); /* wait seven seconds */
1351 
1352 }
1353 
1354 ddi_periodic_t apic_periodic_id;
1355 
1356 /*
1357  * The following functions are in the platform specific file so that they
1358  * can be different functions depending on whether we are running on
1359  * bare metal or a hypervisor.
1360  */
1361 
1362 /*
1363  * map an apic for memory-mapped access
1364  */
1365 uint32_t *
1366 mapin_apic(uint32_t addr, size_t len, int flags)
1367 {
1368 	return ((void *)psm_map_phys(addr, len, flags));
1369 }
1370 
1371 uint32_t *
1372 mapin_ioapic(uint32_t addr, size_t len, int flags)
1373 {
1374 	return (mapin_apic(addr, len, flags));
1375 }
1376 
1377 /*
1378  * unmap an apic
1379  */
1380 void
1381 mapout_apic(caddr_t addr, size_t len)
1382 {
1383 	psm_unmap_phys(addr, len);
1384 }
1385 
1386 void
1387 mapout_ioapic(caddr_t addr, size_t len)
1388 {
1389 	mapout_apic(addr, len);
1390 }
1391 
1392 uint32_t
1393 ioapic_read(int ioapic_ix, uint32_t reg)
1394 {
1395 	volatile uint32_t *ioapic;
1396 
1397 	ioapic = apicioadr[ioapic_ix];
1398 	ioapic[APIC_IO_REG] = reg;
1399 	return (ioapic[APIC_IO_DATA]);
1400 }
1401 
1402 void
1403 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1404 {
1405 	volatile uint32_t *ioapic;
1406 
1407 	ioapic = apicioadr[ioapic_ix];
1408 	ioapic[APIC_IO_REG] = reg;
1409 	ioapic[APIC_IO_DATA] = value;
1410 }
1411 
1412 void
1413 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1414 {
1415 	volatile uint32_t *ioapic;
1416 
1417 	ioapic = apicioadr[ioapic_ix];
1418 	ioapic[APIC_IO_EOI] = value;
1419 }
1420 
1421 /*
1422  * Round-robin algorithm to find the next CPU with interrupts enabled.
1423  * It can't share the same static variable apic_next_bind_cpu with
1424  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1425  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1426  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1427  * are called.  However, the pcplusmp driver assumes that there will be
1428  * boot_ncpus CPUs configured eventually so it tries to distribute all
1429  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1430  * interrupts being targetted at CPU1, we need to use a dedicated static
1431  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1432  */
1433 
1434 processorid_t
1435 apic_find_cpu(int flag)
1436 {
1437 	int i;
1438 	static processorid_t acid = 0;
1439 
1440 	/* Find the first CPU with the passed-in flag set */
1441 	for (i = 0; i < apic_nproc; i++) {
1442 		if (++acid >= apic_nproc) {
1443 			acid = 0;
1444 		}
1445 		if (apic_cpu_in_range(acid) &&
1446 		    (apic_cpus[acid].aci_status & flag)) {
1447 			break;
1448 		}
1449 	}
1450 
1451 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1452 	return (acid);
1453 }
1454 
1455 /*
1456  * Switch between safe and x2APIC IPI sending method.
1457  * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
1458  * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
1459  * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
1460  * apic_common_send_ipi, which detects current local APIC mode and use right
1461  * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
1462  * won't return to zero, so apic_common_send_ipi will always be used.
1463  * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
1464  * failed to start up because those failed CPUs may recover itself later at
1465  * unpredictable time.
1466  */
1467 void
1468 apic_switch_ipi_callback(boolean_t enter)
1469 {
1470 	ulong_t iflag;
1471 	struct psm_ops *pops = psmops;
1472 
1473 	iflag = intr_clear();
1474 	lock_set(&apic_mode_switch_lock);
1475 	if (enter) {
1476 		ASSERT(apic_poweron_cnt >= 0);
1477 		if (apic_poweron_cnt == 0) {
1478 			pops->psm_send_ipi = apic_common_send_ipi;
1479 			send_dirintf = pops->psm_send_ipi;
1480 		}
1481 		apic_poweron_cnt++;
1482 	} else {
1483 		ASSERT(apic_poweron_cnt > 0);
1484 		apic_poweron_cnt--;
1485 		if (apic_poweron_cnt == 0) {
1486 			pops->psm_send_ipi = x2apic_send_ipi;
1487 			send_dirintf = pops->psm_send_ipi;
1488 		}
1489 	}
1490 	lock_clear(&apic_mode_switch_lock);
1491 	intr_restore(iflag);
1492 }
1493 
1494 void
1495 apic_intrmap_init(int apic_mode)
1496 {
1497 	int suppress_brdcst_eoi = 0;
1498 
1499 	if (psm_vt_ops != NULL) {
1500 		/*
1501 		 * Since X2APIC requires the use of interrupt remapping
1502 		 * (though this is not documented explicitly in the Intel
1503 		 * documentation (yet)), initialize interrupt remapping
1504 		 * support before initializing the X2APIC unit.
1505 		 */
1506 		if (((apic_intrmap_ops_t *)psm_vt_ops)->
1507 		    apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1508 
1509 			apic_vt_ops = psm_vt_ops;
1510 
1511 			/*
1512 			 * We leverage the interrupt remapping engine to
1513 			 * suppress broadcast EOI; thus we must send the
1514 			 * directed EOI with the directed-EOI handler.
1515 			 */
1516 			if (apic_directed_EOI_supported() == 0) {
1517 				suppress_brdcst_eoi = 1;
1518 			}
1519 
1520 			apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1521 
1522 			if (apic_detect_x2apic()) {
1523 				apic_enable_x2apic();
1524 			}
1525 
1526 			if (apic_directed_EOI_supported() == 0) {
1527 				apic_set_directed_EOI_handler();
1528 			}
1529 		}
1530 	}
1531 }
1532 
1533 /*ARGSUSED*/
1534 static void
1535 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1536 {
1537 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1538 }
1539 
1540 /*ARGSUSED*/
1541 static void
1542 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1543 {
1544 	mregs->mr_addr = MSI_ADDR_HDR |
1545 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1546 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1547 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1548 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1549 	    mregs->mr_data;
1550 }
1551 
1552 /*
1553  * Functions from apic_introp.c
1554  *
1555  * Those functions are used by apic_intr_ops().
1556  */
1557 
1558 /*
1559  * MSI support flag:
1560  * reflects whether MSI is supported at APIC level
1561  * it can also be patched through /etc/system
1562  *
1563  *  0 = default value - don't know and need to call apic_check_msi_support()
1564  *      to find out then set it accordingly
1565  *  1 = supported
1566  * -1 = not supported
1567  */
1568 int	apic_support_msi = 0;
1569 
1570 /* Multiple vector support for MSI-X */
1571 int	apic_msix_enable = 1;
1572 
1573 /* Multiple vector support for MSI */
1574 int	apic_multi_msi_enable = 1;
1575 
1576 /*
1577  * check whether the system supports MSI
1578  *
1579  * If PCI-E capability is found, then this must be a PCI-E system.
1580  * Since MSI is required for PCI-E system, it returns PSM_SUCCESS
1581  * to indicate this system supports MSI.
1582  */
1583 int
1584 apic_check_msi_support()
1585 {
1586 	dev_info_t *cdip;
1587 	char dev_type[16];
1588 	int dev_len;
1589 
1590 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1591 
1592 	/*
1593 	 * check whether the first level children of root_node have
1594 	 * PCI-E capability
1595 	 */
1596 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1597 	    cdip = ddi_get_next_sibling(cdip)) {
1598 
1599 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1600 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1601 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
1602 		    ddi_node_name(cdip)));
1603 		dev_len = sizeof (dev_type);
1604 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1605 		    "device_type", (caddr_t)dev_type, &dev_len)
1606 		    != DDI_PROP_SUCCESS)
1607 			continue;
1608 		if (strcmp(dev_type, "pciex") == 0)
1609 			return (PSM_SUCCESS);
1610 	}
1611 
1612 	/* MSI is not supported on this system */
1613 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1614 	    "device_type found\n"));
1615 	return (PSM_FAILURE);
1616 }
1617 
1618 /*
1619  * apic_pci_msi_unconfigure:
1620  *
1621  * This and next two interfaces are copied from pci_intr_lib.c
1622  * Do ensure that these two files stay in sync.
1623  * These needed to be copied over here to avoid a deadlock situation on
1624  * certain mp systems that use MSI interrupts.
1625  *
1626  * IMPORTANT regards next three interfaces:
1627  * i) are called only for MSI/X interrupts.
1628  * ii) called with interrupts disabled, and must not block
1629  */
1630 void
1631 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1632 {
1633 	ushort_t		msi_ctrl;
1634 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1635 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1636 
1637 	ASSERT((handle != NULL) && (cap_ptr != 0));
1638 
1639 	if (type == DDI_INTR_TYPE_MSI) {
1640 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1641 		msi_ctrl &= (~PCI_MSI_MME_MASK);
1642 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1643 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1644 
1645 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1646 			pci_config_put16(handle,
1647 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
1648 			pci_config_put32(handle,
1649 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1650 		} else {
1651 			pci_config_put16(handle,
1652 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
1653 		}
1654 
1655 	} else if (type == DDI_INTR_TYPE_MSIX) {
1656 		uintptr_t	off;
1657 		uint32_t	mask;
1658 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
1659 
1660 		ASSERT(msix_p != NULL);
1661 
1662 		/* Offset into "inum"th entry in the MSI-X table & mask it */
1663 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1664 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1665 
1666 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1667 
1668 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1669 
1670 		/* Offset into the "inum"th entry in the MSI-X table */
1671 		off = (uintptr_t)msix_p->msix_tbl_addr +
1672 		    (inum * PCI_MSIX_VECTOR_SIZE);
1673 
1674 		/* Reset the "data" and "addr" bits */
1675 		ddi_put32(msix_p->msix_tbl_hdl,
1676 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1677 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1678 	}
1679 }
1680 
1681 /*
1682  * apic_pci_msi_disable_mode:
1683  */
1684 void
1685 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1686 {
1687 	ushort_t		msi_ctrl;
1688 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1689 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1690 
1691 	ASSERT((handle != NULL) && (cap_ptr != 0));
1692 
1693 	if (type == DDI_INTR_TYPE_MSI) {
1694 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1695 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1696 			return;
1697 
1698 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
1699 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1700 
1701 	} else if (type == DDI_INTR_TYPE_MSIX) {
1702 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1703 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1704 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1705 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1706 			    msi_ctrl);
1707 		}
1708 	}
1709 }
1710 
1711 uint32_t
1712 apic_get_localapicid(uint32_t cpuid)
1713 {
1714 	ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1715 
1716 	return (apic_cpus[cpuid].aci_local_id);
1717 }
1718 
1719 uchar_t
1720 apic_get_ioapicid(uchar_t ioapicindex)
1721 {
1722 	ASSERT(ioapicindex < MAX_IO_APIC);
1723 
1724 	return (apic_io_id[ioapicindex]);
1725 }
1726