xref: /titanic_50/usr/src/uts/i86pc/os/cpr_impl.c (revision 0b1b4412cfd6c4ac5467dbe6f4088dcec4f55fe8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Platform specific implementation code
28  * Currently only suspend to RAM is supported (ACPI S3)
29  */
30 
31 #define	SUNDDI_IMPL
32 
33 #include <sys/types.h>
34 #include <sys/promif.h>
35 #include <sys/prom_isa.h>
36 #include <sys/prom_plat.h>
37 #include <sys/cpuvar.h>
38 #include <sys/pte.h>
39 #include <vm/hat.h>
40 #include <vm/page.h>
41 #include <vm/as.h>
42 #include <sys/cpr.h>
43 #include <sys/kmem.h>
44 #include <sys/clock.h>
45 #include <sys/kmem.h>
46 #include <sys/panic.h>
47 #include <vm/seg_kmem.h>
48 #include <sys/cpu_module.h>
49 #include <sys/callb.h>
50 #include <sys/machsystm.h>
51 #include <sys/vmsystm.h>
52 #include <sys/systm.h>
53 #include <sys/archsystm.h>
54 #include <sys/stack.h>
55 #include <sys/fs/ufs_fs.h>
56 #include <sys/memlist.h>
57 #include <sys/bootconf.h>
58 #include <sys/thread.h>
59 #include <sys/x_call.h>
60 #include <sys/smp_impldefs.h>
61 #include <vm/vm_dep.h>
62 #include <sys/psm.h>
63 #include <sys/epm.h>
64 #include <sys/cpr_wakecode.h>
65 #include <sys/x86_archext.h>
66 #include <sys/reboot.h>
67 #include <sys/acpi/acpi.h>
68 #include <sys/acpica.h>
69 
70 #define	AFMT	"%lx"
71 
72 extern int	flushes_require_xcalls;
73 extern cpuset_t	cpu_ready_set;
74 
75 #if defined(__amd64)
76 extern void	*wc_long_mode_64(void);
77 #endif	/* __amd64 */
78 extern int	tsc_gethrtime_enable;
79 extern	void	i_cpr_start_cpu(void);
80 
81 ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
82 void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
83 
84 static wc_cpu_t	*wc_other_cpus = NULL;
85 static cpuset_t procset;
86 
87 static void
88 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
89 
90 static int i_cpr_platform_alloc(psm_state_request_t *req);
91 static void i_cpr_platform_free(psm_state_request_t *req);
92 static int i_cpr_save_apic(psm_state_request_t *req);
93 static int i_cpr_restore_apic(psm_state_request_t *req);
94 static int wait_for_set(cpuset_t *set, int who);
95 
96 /*
97  * restart paused slave cpus
98  */
99 void
100 i_cpr_machdep_setup(void)
101 {
102 	if (ncpus > 1) {
103 		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
104 		mutex_enter(&cpu_lock);
105 		start_cpus();
106 		mutex_exit(&cpu_lock);
107 	}
108 }
109 
110 
111 /*
112  * Stop all interrupt activities in the system
113  */
114 void
115 i_cpr_stop_intr(void)
116 {
117 	(void) spl7();
118 }
119 
120 /*
121  * Set machine up to take interrupts
122  */
123 void
124 i_cpr_enable_intr(void)
125 {
126 	(void) spl0();
127 }
128 
129 /*
130  * Save miscellaneous information which needs to be written to the
131  * state file.  This information is required to re-initialize
132  * kernel/prom handshaking.
133  */
134 void
135 i_cpr_save_machdep_info(void)
136 {
137 	int notcalled = 0;
138 	ASSERT(notcalled);
139 }
140 
141 
142 void
143 i_cpr_set_tbr(void)
144 {
145 }
146 
147 
148 processorid_t
149 i_cpr_bootcpuid(void)
150 {
151 	return (0);
152 }
153 
154 /*
155  * cpu0 should contain bootcpu info
156  */
157 cpu_t *
158 i_cpr_bootcpu(void)
159 {
160 	ASSERT(MUTEX_HELD(&cpu_lock));
161 
162 	return (cpu_get(i_cpr_bootcpuid()));
163 }
164 
165 /*
166  *	Save context for the specified CPU
167  */
168 void *
169 i_cpr_save_context(void *arg)
170 {
171 	long	index = (long)arg;
172 	psm_state_request_t *papic_state;
173 	int resuming;
174 	int	ret;
175 
176 	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
177 
178 	ASSERT(index < NCPU);
179 
180 	papic_state = &(wc_other_cpus + index)->wc_apic_state;
181 
182 	ret = i_cpr_platform_alloc(papic_state);
183 	ASSERT(ret == 0);
184 
185 	ret = i_cpr_save_apic(papic_state);
186 	ASSERT(ret == 0);
187 
188 	/*
189 	 * wc_save_context returns twice, once when susending and
190 	 * once when resuming,  wc_save_context() returns 0 when
191 	 * suspending and non-zero upon resume
192 	 */
193 	resuming = (wc_save_context(wc_other_cpus + index) == 0);
194 
195 	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
196 	    resuming))
197 
198 	/*
199 	 * do NOT call any functions after this point, because doing so
200 	 * will modify the stack that we are running on
201 	 */
202 
203 	if (resuming) {
204 
205 		ret = i_cpr_restore_apic(papic_state);
206 		ASSERT(ret == 0);
207 
208 		i_cpr_platform_free(papic_state);
209 
210 		/*
211 		 * Enable interrupts on this cpu.
212 		 * Do not bind interrupts to this CPU's local APIC until
213 		 * the CPU is ready to recieve interrupts.
214 		 */
215 		ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
216 		mutex_enter(&cpu_lock);
217 		cpu_enable_intr(CPU);
218 		mutex_exit(&cpu_lock);
219 
220 		/*
221 		 * Setting the bit in cpu_ready_set must be the last operation
222 		 * in processor initialization; the boot CPU will continue to
223 		 * boot once it sees this bit set for all active CPUs.
224 		 */
225 		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
226 
227 		PMD(PMD_SX,
228 		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
229 		    CPU->cpu_id))
230 	}
231 	return (NULL);
232 }
233 
234 static ushort_t *warm_reset_vector = NULL;
235 
236 static ushort_t *
237 map_warm_reset_vector()
238 {
239 	/*LINTED*/
240 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
241 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
242 		return (NULL);
243 
244 	/*
245 	 * setup secondary cpu bios boot up vector
246 	 */
247 	*warm_reset_vector = (ushort_t)((caddr_t)
248 	    /*LINTED*/
249 	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
250 	    + ((ulong_t)rm_platter_va & 0xf));
251 	warm_reset_vector++;
252 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
253 
254 	--warm_reset_vector;
255 	return (warm_reset_vector);
256 }
257 
258 void
259 i_cpr_pre_resume_cpus()
260 {
261 	/*
262 	 * this is a cut down version of start_other_cpus()
263 	 * just do the initialization to wake the other cpus
264 	 */
265 	unsigned who;
266 	int boot_cpuid = i_cpr_bootcpuid();
267 	uint32_t		code_length = 0;
268 	caddr_t			wakevirt = rm_platter_va;
269 	/*LINTED*/
270 	wakecode_t		*wp = (wakecode_t *)wakevirt;
271 	char *str = "i_cpr_pre_resume_cpus";
272 	extern int get_tsc_ready();
273 	int err;
274 
275 	/*LINTED*/
276 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
277 
278 	/*
279 	 * Copy the real mode code at "real_mode_start" to the
280 	 * page at rm_platter_va.
281 	 */
282 	warm_reset_vector = map_warm_reset_vector();
283 	if (warm_reset_vector == NULL) {
284 		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
285 		return;
286 	}
287 
288 	flushes_require_xcalls = 1;
289 
290 	/*
291 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
292 	 * do their TSC syncs with the same CPU.
293 	 */
294 
295 	affinity_set(CPU_CURRENT);
296 
297 	/*
298 	 * Mark the boot cpu as being ready and in the procset, since we are
299 	 * running on that cpu.
300 	 */
301 	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
302 	CPUSET_ONLY(procset, boot_cpuid);
303 
304 	for (who = 0; who < ncpus; who++) {
305 
306 		wc_cpu_t	*cpup = wc_other_cpus + who;
307 		wc_desctbr_t	gdt;
308 
309 		if (who == boot_cpuid)
310 			continue;
311 
312 		if (!CPU_IN_SET(mp_cpus, who))
313 			continue;
314 
315 		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
316 
317 		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
318 
319 		gdt.base = cpup->wc_gdt_base;
320 		gdt.limit = cpup->wc_gdt_limit;
321 
322 #if defined(__amd64)
323 		code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
324 #else
325 		code_length = 0;
326 #endif
327 
328 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
329 
330 		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
331 			cmn_err(CE_WARN, "cpu%d: failed to start during "
332 			    "suspend/resume error %d", who, err);
333 			continue;
334 		}
335 
336 		PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
337 
338 		if (!wait_for_set(&procset, who))
339 			continue;
340 
341 		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
342 
343 		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
344 
345 		if (tsc_gethrtime_enable) {
346 			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
347 			tsc_sync_master(who);
348 		}
349 
350 		PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
351 		    who))
352 		/*
353 		 * Wait for cpu to declare that it is ready, we want the
354 		 * cpus to start serially instead of in parallel, so that
355 		 * they do not contend with each other in wc_rm_start()
356 		 */
357 		if (!wait_for_set(&cpu_ready_set, who))
358 			continue;
359 
360 		/*
361 		 * do not need to re-initialize dtrace using dtrace_cpu_init
362 		 * function
363 		 */
364 		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
365 	}
366 
367 	affinity_clear();
368 
369 	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
370 
371 }
372 
373 static void
374 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
375 {
376 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
377 }
378 
379 /*
380  * We need to setup a 1:1 (virtual to physical) mapping for the
381  * page containing the wakeup code.
382  */
383 static struct as *save_as;	/* when switching to kas */
384 
385 static void
386 unmap_wakeaddr_1to1(uint64_t wakephys)
387 {
388 	uintptr_t	wp = (uintptr_t)wakephys;
389 	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
390 	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
391 }
392 
393 void
394 i_cpr_post_resume_cpus()
395 {
396 	uint64_t	wakephys = rm_platter_pa;
397 
398 	if (warm_reset_vector != NULL)
399 		unmap_warm_reset_vector(warm_reset_vector);
400 
401 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
402 	    HAT_UNLOAD);
403 
404 	/*
405 	 * cmi_post_mpstartup() is only required upon boot not upon
406 	 * resume from RAM
407 	 */
408 
409 	PT(PT_UNDO1to1);
410 	/* Tear down 1:1 mapping for wakeup code */
411 	unmap_wakeaddr_1to1(wakephys);
412 }
413 
414 /* ARGSUSED */
415 void
416 i_cpr_handle_xc(int flag)
417 {
418 }
419 
420 int
421 i_cpr_reusable_supported(void)
422 {
423 	return (0);
424 }
425 static void
426 map_wakeaddr_1to1(uint64_t wakephys)
427 {
428 	uintptr_t	wp = (uintptr_t)wakephys;
429 	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
430 	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
431 	    HAT_LOAD);
432 	save_as = curthread->t_procp->p_as;
433 	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
434 }
435 
436 
437 void
438 prt_other_cpus()
439 {
440 	int	who;
441 
442 	if (ncpus == 1) {
443 		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
444 		    "uniprocessor machine\n"))
445 		return;
446 	}
447 
448 	for (who = 0; who < ncpus; who++) {
449 
450 		wc_cpu_t	*cpup = wc_other_cpus + who;
451 
452 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
453 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
454 		    AFMT ", sp=%lx\n", who,
455 		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
456 		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
457 		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
458 		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
459 	}
460 }
461 
462 /*
463  * Power down the system.
464  */
465 int
466 i_cpr_power_down(int sleeptype)
467 {
468 	caddr_t		wakevirt = rm_platter_va;
469 	uint64_t	wakephys = rm_platter_pa;
470 	ulong_t		saved_intr;
471 	uint32_t	code_length = 0;
472 	wc_desctbr_t	gdt;
473 	/*LINTED*/
474 	wakecode_t	*wp = (wakecode_t *)wakevirt;
475 	/*LINTED*/
476 	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
477 	wc_cpu_t	*cpup = &(wp->wc_cpu);
478 	dev_info_t	*ppm;
479 	int		ret = 0;
480 	power_req_t	power_req;
481 	char *str =	"i_cpr_power_down";
482 #if defined(__amd64)
483 	/*LINTED*/
484 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
485 #endif
486 	extern int	cpr_suspend_succeeded;
487 	extern void	kernel_wc_code();
488 
489 	ASSERT(sleeptype == CPR_TORAM);
490 	ASSERT(CPU->cpu_id == 0);
491 
492 	if ((ppm = PPM(ddi_root_node())) == NULL) {
493 		PMD(PMD_SX, ("%s: root node not claimed\n", str))
494 		return (ENOTTY);
495 	}
496 
497 	PMD(PMD_SX, ("Entering %s()\n", str))
498 
499 	PT(PT_IC);
500 	saved_intr = intr_clear();
501 
502 	PT(PT_1to1);
503 	/* Setup 1:1 mapping for wakeup code */
504 	map_wakeaddr_1to1(wakephys);
505 
506 	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
507 
508 	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
509 	    ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
510 
511 	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
512 	    (void *)wakevirt, (uint_t)wakephys))
513 
514 	ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
515 	    WC_CODESIZE);
516 
517 	bzero(wakevirt, PAGESIZE);
518 
519 	/* Copy code to rm_platter */
520 	bcopy((caddr_t)wc_rm_start, wakevirt,
521 	    (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
522 
523 	prt_other_cpus();
524 
525 #if defined(__amd64)
526 
527 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
528 	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
529 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
530 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
531 
532 	real_mode_platter->rm_cr4 = getcr4();
533 	real_mode_platter->rm_pdbr = getcr3();
534 
535 	rmp_gdt_init(real_mode_platter);
536 
537 	/*
538 	 * Since the CPU needs to jump to protected mode using an identity
539 	 * mapped address, we need to calculate it here.
540 	 */
541 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
542 	    ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
543 
544 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
545 	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
546 
547 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
548 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
549 
550 	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
551 	    (ulong_t)real_mode_platter->rm_longmode64_addr))
552 
553 #endif
554 
555 	PT(PT_SC);
556 	if (wc_save_context(cpup)) {
557 
558 		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
559 		if (ret != 0)
560 			return (ret);
561 
562 		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
563 		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
564 		if (ret != 0)
565 			return (ret);
566 
567 		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
568 		    (uint_t)wakephys, (void *)&kernel_wc_code))
569 		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
570 		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
571 		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
572 		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
573 		    cpup->wc_esp))
574 		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
575 		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
576 		    (long)cpup->wc_cr4))
577 		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
578 		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
579 		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
580 		    (long)cpup->wc_eflags))
581 
582 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
583 		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
584 		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
585 		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
586 		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
587 
588 		gdt.base = cpup->wc_gdt_base;
589 		gdt.limit = cpup->wc_gdt_limit;
590 
591 #if defined(__amd64)
592 		code_length = (uint32_t)wc_long_mode_64 -
593 		    (uint32_t)wc_rm_start;
594 #else
595 		code_length = 0;
596 #endif
597 
598 		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
599 
600 #if defined(__amd64)
601 		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
602 		    (ulong_t)wcpp->rm_cr4, getcr4()))
603 
604 		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
605 		    (ulong_t)wcpp->rm_pdbr, getcr3()))
606 
607 		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
608 		    (ulong_t)wcpp->rm_longmode64_addr))
609 
610 		PMD(PMD_SX,
611 		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
612 		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
613 #endif
614 
615 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
616 		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
617 		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
618 		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
619 		    (long)cpup->wc_kgsbase))
620 
621 		power_req.request_type = PMR_PPM_ENTER_SX;
622 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
623 		power_req.req.ppm_power_enter_sx_req.test_point =
624 		    cpr_test_point;
625 		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
626 
627 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
628 		PT(PT_PPMCTLOP);
629 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
630 		    &power_req, &ret);
631 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
632 
633 		/*
634 		 * If it works, we get control back to the else branch below
635 		 * If we get control back here, it didn't work.
636 		 * XXX return EINVAL here?
637 		 */
638 
639 		unmap_wakeaddr_1to1(wakephys);
640 		intr_restore(saved_intr);
641 
642 		return (ret);
643 	} else {
644 		cpr_suspend_succeeded = 1;
645 
646 		power_req.request_type = PMR_PPM_EXIT_SX;
647 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
648 
649 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
650 		PT(PT_PPMCTLOP);
651 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
652 		    &power_req, &ret);
653 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
654 
655 		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
656 		/*
657 		 * the restore should never fail, if the saved suceeded
658 		 */
659 		ASSERT(ret == 0);
660 
661 		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
662 
663 		/*
664 		 * Enable interrupts on boot cpu.
665 		 */
666 		ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
667 		mutex_enter(&cpu_lock);
668 		cpu_enable_intr(CPU);
669 		mutex_exit(&cpu_lock);
670 
671 		PT(PT_INTRRESTORE);
672 		intr_restore(saved_intr);
673 		PT(PT_CPU);
674 
675 		return (ret);
676 	}
677 }
678 
679 /*
680  * Stop all other cpu's before halting or rebooting. We pause the cpu's
681  * instead of sending a cross call.
682  * Stolen from sun4/os/mp_states.c
683  */
684 
685 static int cpu_are_paused;	/* sic */
686 
687 void
688 i_cpr_stop_other_cpus(void)
689 {
690 	mutex_enter(&cpu_lock);
691 	if (cpu_are_paused) {
692 		mutex_exit(&cpu_lock);
693 		return;
694 	}
695 	pause_cpus(NULL);
696 	cpu_are_paused = 1;
697 
698 	mutex_exit(&cpu_lock);
699 }
700 
701 int
702 i_cpr_is_supported(int sleeptype)
703 {
704 	extern int cpr_supported_override;
705 	extern int cpr_platform_enable;
706 	extern int pm_S3_enabled;
707 
708 	if (sleeptype != CPR_TORAM)
709 		return (0);
710 
711 	/*
712 	 * The next statement tests if a specific platform has turned off
713 	 * cpr support.
714 	 */
715 	if (cpr_supported_override)
716 		return (0);
717 
718 	/*
719 	 * If a platform has specifically turned on cpr support ...
720 	 */
721 	if (cpr_platform_enable)
722 		return (1);
723 
724 	return (pm_S3_enabled);
725 }
726 
727 void
728 i_cpr_bitmap_cleanup(void)
729 {
730 }
731 
732 void
733 i_cpr_free_memory_resources(void)
734 {
735 }
736 
737 /*
738  * Needed only for S3 so far
739  */
740 static int
741 i_cpr_platform_alloc(psm_state_request_t *req)
742 {
743 	char	*str = "i_cpr_platform_alloc";
744 
745 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
746 
747 	if (ncpus == 1) {
748 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
749 		return (0);
750 	}
751 
752 	req->psr_cmd = PSM_STATE_ALLOC;
753 	return ((*psm_state)(req));
754 }
755 
756 /*
757  * Needed only for S3 so far
758  */
759 static void
760 i_cpr_platform_free(psm_state_request_t *req)
761 {
762 	char	*str = "i_cpr_platform_free";
763 
764 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
765 
766 	if (ncpus == 1) {
767 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
768 	}
769 
770 	req->psr_cmd = PSM_STATE_FREE;
771 	(void) (*psm_state)(req);
772 }
773 
774 static int
775 i_cpr_save_apic(psm_state_request_t *req)
776 {
777 	char	*str = "i_cpr_save_apic";
778 
779 	if (ncpus == 1) {
780 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
781 		return (0);
782 	}
783 
784 	req->psr_cmd = PSM_STATE_SAVE;
785 	return ((*psm_state)(req));
786 }
787 
788 static int
789 i_cpr_restore_apic(psm_state_request_t *req)
790 {
791 	char	*str = "i_cpr_restore_apic";
792 
793 	if (ncpus == 1) {
794 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
795 		return (0);
796 	}
797 
798 	req->psr_cmd = PSM_STATE_RESTORE;
799 	return ((*psm_state)(req));
800 }
801 
802 
803 /* stop lint complaining about offset not being used in 32bit mode */
804 #if !defined(__amd64)
805 /*ARGSUSED*/
806 #endif
807 static void
808 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
809 {
810 	/*LINTED*/
811 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
812 
813 	/*
814 	 * Fill up the real mode platter to make it easy for real mode code to
815 	 * kick it off. This area should really be one passed by boot to kernel
816 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
817 	 * have identical physical and virtual address in paged mode.
818 	 */
819 
820 	real_mode_platter->rm_pdbr = getcr3();
821 	real_mode_platter->rm_cpu = cpun;
822 	real_mode_platter->rm_cr4 = cr4;
823 
824 	real_mode_platter->rm_gdt_base = gdt.base;
825 	real_mode_platter->rm_gdt_lim = gdt.limit;
826 
827 #if defined(__amd64)
828 	real_mode_platter->rm_x86feature = x86_feature;
829 
830 	if (getcr3() > 0xffffffffUL)
831 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
832 		    "located above 4G in physical memory (@ 0x%llx).",
833 		    (unsigned long long)getcr3());
834 
835 	/*
836 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
837 	 * by code in real_mode_start():
838 	 *
839 	 * GDT[0]:  NULL selector
840 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
841 	 *
842 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
843 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
844 	 * a course of action as any other, though it may cause the entire
845 	 * platform to reset in some cases...
846 	 */
847 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
848 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
849 
850 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
851 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
852 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
853 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
854 
855 	real_mode_platter->rm_temp_idt_lim = 0;
856 	real_mode_platter->rm_temp_idt_base = 0;
857 
858 	/*
859 	 * Since the CPU needs to jump to protected mode using an identity
860 	 * mapped address, we need to calculate it here.
861 	 */
862 	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
863 #endif	/* __amd64 */
864 
865 	/* return; */
866 }
867 
868 void
869 i_cpr_start_cpu(void)
870 {
871 
872 	struct cpu *cp = CPU;
873 
874 	char *str = "i_cpr_start_cpu";
875 	extern void init_cpu_syscall(struct cpu *cp);
876 
877 	PMD(PMD_SX, ("%s() called\n", str))
878 
879 	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
880 	    cp->cpu_base_spl))
881 
882 	mutex_enter(&cpu_lock);
883 	if (cp == i_cpr_bootcpu()) {
884 		mutex_exit(&cpu_lock);
885 		PMD(PMD_SX,
886 		    ("%s() called on bootcpu nothing to do!\n", str))
887 		return;
888 	}
889 	mutex_exit(&cpu_lock);
890 
891 	/*
892 	 * We need to Sync PAT with cpu0's PAT. We have to do
893 	 * this with interrupts disabled.
894 	 */
895 	if (x86_feature & X86_PAT)
896 		pat_sync();
897 
898 	/*
899 	 * Initialize this CPU's syscall handlers
900 	 */
901 	init_cpu_syscall(cp);
902 
903 	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
904 
905 	/*
906 	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
907 	 * init_cpu_info(), since the work that they do is only needed to
908 	 * be done once at boot time
909 	 */
910 
911 
912 	mutex_enter(&cpu_lock);
913 	CPUSET_ADD(procset, cp->cpu_id);
914 	mutex_exit(&cpu_lock);
915 
916 	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
917 	    cp->cpu_base_spl))
918 
919 	if (tsc_gethrtime_enable) {
920 		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
921 		tsc_sync_slave();
922 	}
923 
924 	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
925 	    cp->cpu_id, cp->cpu_intr_actv))
926 	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
927 	    cp->cpu_base_spl))
928 
929 	(void) spl0();		/* enable interrupts */
930 
931 	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
932 	    cp->cpu_base_spl))
933 
934 	/*
935 	 * Set up the CPU module for this CPU.  This can't be done before
936 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
937 	 * need to go load another CPU module.  The act of attempting to load
938 	 * a module may trigger a cross-call, which will ASSERT unless this
939 	 * cpu is CPU_READY.
940 	 */
941 
942 	/*
943 	 * cmi already been init'd (during boot), so do not need to do it again
944 	 */
945 #ifdef PM_REINITMCAONRESUME
946 	if (x86_feature & X86_MCA)
947 		cmi_mca_init();
948 #endif
949 
950 	PMD(PMD_SX, ("%s() returning\n", str))
951 
952 	/* return; */
953 }
954 
955 void
956 i_cpr_alloc_cpus(void)
957 {
958 	char *str = "i_cpr_alloc_cpus";
959 
960 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
961 	/*
962 	 * we allocate this only when we actually need it to save on
963 	 * kernel memory
964 	 */
965 
966 	if (wc_other_cpus == NULL) {
967 		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
968 		    KM_SLEEP);
969 	}
970 
971 }
972 
973 void
974 i_cpr_free_cpus(void)
975 {
976 	if (wc_other_cpus != NULL) {
977 		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
978 		wc_other_cpus = NULL;
979 	}
980 }
981 
982 /*
983  * wrapper for acpica_ddi_save_resources()
984  */
985 void
986 i_cpr_save_configuration(dev_info_t *dip)
987 {
988 	acpica_ddi_save_resources(dip);
989 }
990 
991 /*
992  * wrapper for acpica_ddi_restore_resources()
993  */
994 void
995 i_cpr_restore_configuration(dev_info_t *dip)
996 {
997 	acpica_ddi_restore_resources(dip);
998 }
999 
1000 static int
1001 wait_for_set(cpuset_t *set, int who)
1002 {
1003 	int delays;
1004 	char *str = "wait_for_set";
1005 
1006 	for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1007 		if (delays == 500) {
1008 			/*
1009 			 * After five seconds, things are probably
1010 			 * looking a bit bleak - explain the hang.
1011 			 */
1012 			cmn_err(CE_NOTE, "cpu%d: started, "
1013 			    "but not running in the kernel yet", who);
1014 			PMD(PMD_SX, ("%s() %d cpu started "
1015 			    "but not running in the kernel yet\n",
1016 			    str, who))
1017 		} else if (delays > 2000) {
1018 			/*
1019 			 * We waited at least 20 seconds, bail ..
1020 			 */
1021 			cmn_err(CE_WARN, "cpu%d: timed out", who);
1022 			PMD(PMD_SX, ("%s() %d cpu timed out\n",
1023 			    str, who))
1024 			return (0);
1025 		}
1026 
1027 		/*
1028 		 * wait at least 10ms, then check again..
1029 		 */
1030 		drv_usecwait(10000);
1031 	}
1032 
1033 	return (1);
1034 }
1035