xref: /titanic_41/usr/src/uts/i86pc/os/cpr_impl.c (revision d69c2551e89e9440043ac6ff5739b58746286f33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Platform specific implementation code
30  * Currently only suspend to RAM is supported (ACPI S3)
31  */
32 
33 #define	SUNDDI_IMPL
34 
35 #include <sys/types.h>
36 #include <sys/promif.h>
37 #include <sys/prom_isa.h>
38 #include <sys/prom_plat.h>
39 #include <sys/cpuvar.h>
40 #include <sys/pte.h>
41 #include <vm/hat.h>
42 #include <vm/page.h>
43 #include <vm/as.h>
44 #include <sys/cpr.h>
45 #include <sys/kmem.h>
46 #include <sys/clock.h>
47 #include <sys/kmem.h>
48 #include <sys/panic.h>
49 #include <vm/seg_kmem.h>
50 #include <sys/cpu_module.h>
51 #include <sys/callb.h>
52 #include <sys/machsystm.h>
53 #include <sys/vmsystm.h>
54 #include <sys/systm.h>
55 #include <sys/archsystm.h>
56 #include <sys/stack.h>
57 #include <sys/fs/ufs_fs.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/thread.h>
61 #include <sys/x_call.h>
62 #include <sys/smp_impldefs.h>
63 #include <vm/vm_dep.h>
64 #include <sys/psm.h>
65 #include <sys/epm.h>
66 #include <sys/cpr_wakecode.h>
67 #include <sys/x86_archext.h>
68 #include <sys/reboot.h>
69 #include <sys/acpi/acpi.h>
70 #include <sys/acpica.h>
71 
72 #define	AFMT	"%lx"
73 
74 extern int	flushes_require_xcalls;
75 extern cpuset_t	cpu_ready_set;
76 
77 #if defined(__amd64)
78 extern void	*wc_long_mode_64(void);
79 #endif	/* __amd64 */
80 extern int	tsc_gethrtime_enable;
81 extern	void	i_cpr_start_cpu(void);
82 
83 ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
84 void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
85 
86 static wc_cpu_t	*wc_other_cpus = NULL;
87 static cpuset_t procset;
88 
89 static void
90 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
91 
92 static int i_cpr_platform_alloc(psm_state_request_t *req);
93 static void i_cpr_platform_free(psm_state_request_t *req);
94 static int i_cpr_save_apic(psm_state_request_t *req);
95 static int i_cpr_restore_apic(psm_state_request_t *req);
96 static int wait_for_set(cpuset_t *set, int who);
97 
98 #if defined(__amd64)
99 static void restore_stack(wc_cpu_t *cpup);
100 static void save_stack(wc_cpu_t *cpup);
101 void (*save_stack_func)(wc_cpu_t *) = save_stack;
102 #endif	/* __amd64 */
103 
104 /*
105  * restart paused slave cpus
106  */
107 void
108 i_cpr_machdep_setup(void)
109 {
110 	if (ncpus > 1) {
111 		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
112 		mutex_enter(&cpu_lock);
113 		start_cpus();
114 		mutex_exit(&cpu_lock);
115 	}
116 }
117 
118 
119 /*
120  * Stop all interrupt activities in the system
121  */
122 void
123 i_cpr_stop_intr(void)
124 {
125 	(void) spl7();
126 }
127 
128 /*
129  * Set machine up to take interrupts
130  */
131 void
132 i_cpr_enable_intr(void)
133 {
134 	(void) spl0();
135 }
136 
137 /*
138  * Save miscellaneous information which needs to be written to the
139  * state file.  This information is required to re-initialize
140  * kernel/prom handshaking.
141  */
142 void
143 i_cpr_save_machdep_info(void)
144 {
145 	int notcalled = 0;
146 	ASSERT(notcalled);
147 }
148 
149 
150 void
151 i_cpr_set_tbr(void)
152 {
153 }
154 
155 
156 processorid_t
157 i_cpr_bootcpuid(void)
158 {
159 	return (0);
160 }
161 
162 /*
163  * cpu0 should contain bootcpu info
164  */
165 cpu_t *
166 i_cpr_bootcpu(void)
167 {
168 	ASSERT(MUTEX_HELD(&cpu_lock));
169 
170 	return (cpu_get(i_cpr_bootcpuid()));
171 }
172 
173 /*
174  *	Save context for the specified CPU
175  */
176 void *
177 i_cpr_save_context(void *arg)
178 {
179 	long	index = (long)arg;
180 	psm_state_request_t *papic_state;
181 	int resuming;
182 	int	ret;
183 
184 	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
185 
186 	ASSERT(index < NCPU);
187 
188 	papic_state = &(wc_other_cpus + index)->wc_apic_state;
189 
190 	ret = i_cpr_platform_alloc(papic_state);
191 	ASSERT(ret == 0);
192 
193 	ret = i_cpr_save_apic(papic_state);
194 	ASSERT(ret == 0);
195 
196 	/*
197 	 * wc_save_context returns twice, once when susending and
198 	 * once when resuming,  wc_save_context() returns 0 when
199 	 * suspending and non-zero upon resume
200 	 */
201 	resuming = (wc_save_context(wc_other_cpus + index) == 0);
202 
203 	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
204 	    resuming))
205 
206 	/*
207 	 * do NOT call any functions after this point, because doing so
208 	 * will modify the stack that we are running on
209 	 */
210 
211 	if (resuming) {
212 
213 		ret = i_cpr_restore_apic(papic_state);
214 		ASSERT(ret == 0);
215 
216 		i_cpr_platform_free(papic_state);
217 
218 		/*
219 		 * Enable interrupts on this cpu.
220 		 * Do not bind interrupts to this CPU's local APIC until
221 		 * the CPU is ready to recieve interrupts.
222 		 */
223 		ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
224 		mutex_enter(&cpu_lock);
225 		cpu_enable_intr(CPU);
226 		mutex_exit(&cpu_lock);
227 
228 		/*
229 		 * Setting the bit in cpu_ready_set must be the last operation
230 		 * in processor initialization; the boot CPU will continue to
231 		 * boot once it sees this bit set for all active CPUs.
232 		 */
233 		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
234 
235 		PMD(PMD_SX,
236 		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
237 		    CPU->cpu_id))
238 	}
239 	return (NULL);
240 }
241 
242 static ushort_t *warm_reset_vector = NULL;
243 
244 static ushort_t *
245 map_warm_reset_vector()
246 {
247 	/*LINTED*/
248 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
249 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
250 		return (NULL);
251 
252 	/*
253 	 * setup secondary cpu bios boot up vector
254 	 */
255 	*warm_reset_vector = (ushort_t)((caddr_t)
256 	    /*LINTED*/
257 	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
258 	    + ((ulong_t)rm_platter_va & 0xf));
259 	warm_reset_vector++;
260 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
261 
262 	--warm_reset_vector;
263 	return (warm_reset_vector);
264 }
265 
266 void
267 i_cpr_pre_resume_cpus()
268 {
269 	/*
270 	 * this is a cut down version of start_other_cpus()
271 	 * just do the initialization to wake the other cpus
272 	 */
273 	unsigned who;
274 	int boot_cpuid = i_cpr_bootcpuid();
275 	uint32_t		code_length = 0;
276 	caddr_t			wakevirt = rm_platter_va;
277 	/*LINTED*/
278 	wakecode_t		*wp = (wakecode_t *)wakevirt;
279 	char *str = "i_cpr_pre_resume_cpus";
280 	extern int get_tsc_ready();
281 	int err;
282 
283 	/*LINTED*/
284 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
285 
286 	/*
287 	 * Copy the real mode code at "real_mode_start" to the
288 	 * page at rm_platter_va.
289 	 */
290 	warm_reset_vector = map_warm_reset_vector();
291 	if (warm_reset_vector == NULL) {
292 		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
293 		return;
294 	}
295 
296 	flushes_require_xcalls = 1;
297 
298 	/*
299 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
300 	 * do their TSC syncs with the same CPU.
301 	 */
302 
303 	affinity_set(CPU_CURRENT);
304 
305 	/*
306 	 * Mark the boot cpu as being ready and in the procset, since we are
307 	 * running on that cpu.
308 	 */
309 	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
310 	CPUSET_ONLY(procset, boot_cpuid);
311 
312 	for (who = 0; who < ncpus; who++) {
313 
314 		wc_cpu_t	*cpup = wc_other_cpus + who;
315 		wc_desctbr_t	gdt;
316 
317 		if (who == boot_cpuid)
318 			continue;
319 
320 		if (!CPU_IN_SET(mp_cpus, who))
321 			continue;
322 
323 		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
324 
325 		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
326 
327 		gdt.base = cpup->wc_gdt_base;
328 		gdt.limit = cpup->wc_gdt_limit;
329 
330 #if defined(__amd64)
331 		code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
332 #else
333 		code_length = 0;
334 #endif
335 
336 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
337 
338 		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
339 			cmn_err(CE_WARN, "cpu%d: failed to start during "
340 			    "suspend/resume error %d", who, err);
341 			continue;
342 		}
343 
344 		PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
345 
346 		if (!wait_for_set(&procset, who))
347 			continue;
348 
349 		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
350 
351 		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
352 
353 		if (tsc_gethrtime_enable) {
354 			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
355 			tsc_sync_master(who);
356 		}
357 
358 		PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
359 		    who))
360 		/*
361 		 * Wait for cpu to declare that it is ready, we want the
362 		 * cpus to start serially instead of in parallel, so that
363 		 * they do not contend with each other in wc_rm_start()
364 		 */
365 		if (!wait_for_set(&cpu_ready_set, who))
366 			continue;
367 
368 		/*
369 		 * do not need to re-initialize dtrace using dtrace_cpu_init
370 		 * function
371 		 */
372 		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
373 	}
374 
375 	affinity_clear();
376 
377 	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
378 
379 }
380 
381 static void
382 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
383 {
384 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
385 }
386 
387 /*
388  * We need to setup a 1:1 (virtual to physical) mapping for the
389  * page containing the wakeup code.
390  */
391 static struct as *save_as;	/* when switching to kas */
392 
393 static void
394 unmap_wakeaddr_1to1(uint64_t wakephys)
395 {
396 	uintptr_t	wp = (uintptr_t)wakephys;
397 	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
398 	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
399 }
400 
401 void
402 i_cpr_post_resume_cpus()
403 {
404 	uint64_t	wakephys = rm_platter_pa;
405 
406 	if (warm_reset_vector != NULL)
407 		unmap_warm_reset_vector(warm_reset_vector);
408 
409 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
410 	    HAT_UNLOAD);
411 
412 	/*
413 	 * cmi_post_mpstartup() is only required upon boot not upon
414 	 * resume from RAM
415 	 */
416 
417 	PT(PT_UNDO1to1);
418 	/* Tear down 1:1 mapping for wakeup code */
419 	unmap_wakeaddr_1to1(wakephys);
420 }
421 
422 /* ARGSUSED */
423 void
424 i_cpr_handle_xc(int flag)
425 {
426 }
427 
428 int
429 i_cpr_reusable_supported(void)
430 {
431 	return (0);
432 }
433 static void
434 map_wakeaddr_1to1(uint64_t wakephys)
435 {
436 	uintptr_t	wp = (uintptr_t)wakephys;
437 	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
438 	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
439 	    HAT_LOAD);
440 	save_as = curthread->t_procp->p_as;
441 	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
442 }
443 
444 
445 void
446 prt_other_cpus()
447 {
448 	int	who;
449 
450 	if (ncpus == 1) {
451 		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
452 		    "uniprocessor machine\n"))
453 		return;
454 	}
455 
456 	for (who = 0; who < ncpus; who++) {
457 
458 		wc_cpu_t	*cpup = wc_other_cpus + who;
459 
460 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
461 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
462 		    AFMT ", sp=%lx\n", who,
463 		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
464 		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
465 		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
466 		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
467 	}
468 }
469 
470 /*
471  * Power down the system.
472  */
473 int
474 i_cpr_power_down(int sleeptype)
475 {
476 	caddr_t		wakevirt = rm_platter_va;
477 	uint64_t	wakephys = rm_platter_pa;
478 	ulong_t		saved_intr;
479 	uint32_t	code_length = 0;
480 	wc_desctbr_t	gdt;
481 	/*LINTED*/
482 	wakecode_t	*wp = (wakecode_t *)wakevirt;
483 	/*LINTED*/
484 	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
485 	wc_cpu_t	*cpup = &(wp->wc_cpu);
486 	dev_info_t	*ppm;
487 	int		ret = 0;
488 	power_req_t	power_req;
489 	char *str =	"i_cpr_power_down";
490 #if defined(__amd64)
491 	/*LINTED*/
492 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
493 #endif
494 	extern int	cpr_suspend_succeeded;
495 	extern void	kernel_wc_code();
496 
497 	ASSERT(sleeptype == CPR_TORAM);
498 	ASSERT(CPU->cpu_id == 0);
499 
500 	if ((ppm = PPM(ddi_root_node())) == NULL) {
501 		PMD(PMD_SX, ("%s: root node not claimed\n", str))
502 		return (ENOTTY);
503 	}
504 
505 	PMD(PMD_SX, ("Entering %s()\n", str))
506 
507 	PT(PT_IC);
508 	saved_intr = intr_clear();
509 
510 	PT(PT_1to1);
511 	/* Setup 1:1 mapping for wakeup code */
512 	map_wakeaddr_1to1(wakephys);
513 
514 	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
515 
516 	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
517 	    ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
518 
519 	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
520 	    (void *)wakevirt, (uint_t)wakephys))
521 
522 	ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
523 	    WC_CODESIZE);
524 
525 	bzero(wakevirt, PAGESIZE);
526 
527 	/* Copy code to rm_platter */
528 	bcopy((caddr_t)wc_rm_start, wakevirt,
529 	    (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
530 
531 	prt_other_cpus();
532 
533 #if defined(__amd64)
534 
535 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
536 	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
537 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
538 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
539 
540 	real_mode_platter->rm_cr4 = getcr4();
541 	real_mode_platter->rm_pdbr = getcr3();
542 
543 	rmp_gdt_init(real_mode_platter);
544 
545 	/*
546 	 * Since the CPU needs to jump to protected mode using an identity
547 	 * mapped address, we need to calculate it here.
548 	 */
549 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
550 	    ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
551 
552 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
553 	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
554 
555 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
556 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
557 
558 	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
559 	    (ulong_t)real_mode_platter->rm_longmode64_addr))
560 
561 #endif
562 
563 	PT(PT_SC);
564 	if (wc_save_context(cpup)) {
565 
566 		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
567 		if (ret != 0)
568 			return (ret);
569 
570 		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
571 		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
572 		if (ret != 0)
573 			return (ret);
574 
575 		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
576 		    (uint_t)wakephys, (void *)&kernel_wc_code))
577 		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
578 		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
579 		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
580 		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
581 		    cpup->wc_esp))
582 		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
583 		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
584 		    (long)cpup->wc_cr4))
585 		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
586 		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
587 		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
588 		    (long)cpup->wc_eflags))
589 
590 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
591 		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
592 		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
593 		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
594 		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
595 
596 		gdt.base = cpup->wc_gdt_base;
597 		gdt.limit = cpup->wc_gdt_limit;
598 
599 #if defined(__amd64)
600 		code_length = (uint32_t)wc_long_mode_64 -
601 		    (uint32_t)wc_rm_start;
602 #else
603 		code_length = 0;
604 #endif
605 
606 		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
607 
608 #if defined(__amd64)
609 		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
610 		    (ulong_t)wcpp->rm_cr4, getcr4()))
611 
612 		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
613 		    (ulong_t)wcpp->rm_pdbr, getcr3()))
614 
615 		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
616 		    (ulong_t)wcpp->rm_longmode64_addr))
617 
618 		PMD(PMD_SX,
619 		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
620 		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
621 #endif
622 
623 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
624 		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
625 		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
626 		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
627 		    (long)cpup->wc_kgsbase))
628 
629 		power_req.request_type = PMR_PPM_ENTER_SX;
630 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
631 		power_req.req.ppm_power_enter_sx_req.test_point =
632 		    cpr_test_point;
633 		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
634 
635 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
636 		PT(PT_PPMCTLOP);
637 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
638 		    &power_req, &ret);
639 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
640 
641 		/*
642 		 * If it works, we get control back to the else branch below
643 		 * If we get control back here, it didn't work.
644 		 * XXX return EINVAL here?
645 		 */
646 
647 		unmap_wakeaddr_1to1(wakephys);
648 		intr_restore(saved_intr);
649 
650 		return (ret);
651 	} else {
652 		cpr_suspend_succeeded = 1;
653 
654 		power_req.request_type = PMR_PPM_EXIT_SX;
655 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
656 
657 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
658 		PT(PT_PPMCTLOP);
659 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
660 		    &power_req, &ret);
661 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
662 
663 		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
664 		/*
665 		 * the restore should never fail, if the saved suceeded
666 		 */
667 		ASSERT(ret == 0);
668 
669 		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
670 
671 		/*
672 		 * Enable interrupts on boot cpu.
673 		 */
674 		ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
675 		mutex_enter(&cpu_lock);
676 		cpu_enable_intr(CPU);
677 		mutex_exit(&cpu_lock);
678 
679 		PT(PT_INTRRESTORE);
680 		intr_restore(saved_intr);
681 		PT(PT_CPU);
682 
683 		return (ret);
684 	}
685 }
686 
687 /*
688  * Stop all other cpu's before halting or rebooting. We pause the cpu's
689  * instead of sending a cross call.
690  * Stolen from sun4/os/mp_states.c
691  */
692 
693 static int cpu_are_paused;	/* sic */
694 
695 void
696 i_cpr_stop_other_cpus(void)
697 {
698 	mutex_enter(&cpu_lock);
699 	if (cpu_are_paused) {
700 		mutex_exit(&cpu_lock);
701 		return;
702 	}
703 	pause_cpus(NULL);
704 	cpu_are_paused = 1;
705 
706 	mutex_exit(&cpu_lock);
707 }
708 
709 int
710 i_cpr_is_supported(int sleeptype)
711 {
712 	extern int cpr_supported_override;
713 	extern int cpr_platform_enable;
714 	extern int pm_S3_enabled;
715 
716 	if (sleeptype != CPR_TORAM)
717 		return (0);
718 
719 	/*
720 	 * The next statement tests if a specific platform has turned off
721 	 * cpr support.
722 	 */
723 	if (cpr_supported_override)
724 		return (0);
725 
726 	/*
727 	 * If a platform has specifically turned on cpr support ...
728 	 */
729 	if (cpr_platform_enable)
730 		return (1);
731 
732 	return (pm_S3_enabled);
733 }
734 
735 void
736 i_cpr_bitmap_cleanup(void)
737 {
738 }
739 
740 void
741 i_cpr_free_memory_resources(void)
742 {
743 }
744 
745 /*
746  * Needed only for S3 so far
747  */
748 static int
749 i_cpr_platform_alloc(psm_state_request_t *req)
750 {
751 	char	*str = "i_cpr_platform_alloc";
752 
753 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
754 
755 	if (ncpus == 1) {
756 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
757 		return (0);
758 	}
759 
760 	req->psr_cmd = PSM_STATE_ALLOC;
761 	return ((*psm_state)(req));
762 }
763 
764 /*
765  * Needed only for S3 so far
766  */
767 static void
768 i_cpr_platform_free(psm_state_request_t *req)
769 {
770 	char	*str = "i_cpr_platform_free";
771 
772 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
773 
774 	if (ncpus == 1) {
775 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
776 	}
777 
778 	req->psr_cmd = PSM_STATE_FREE;
779 	(void) (*psm_state)(req);
780 }
781 
782 static int
783 i_cpr_save_apic(psm_state_request_t *req)
784 {
785 	char	*str = "i_cpr_save_apic";
786 
787 	if (ncpus == 1) {
788 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
789 		return (0);
790 	}
791 
792 	req->psr_cmd = PSM_STATE_SAVE;
793 	return ((*psm_state)(req));
794 }
795 
796 static int
797 i_cpr_restore_apic(psm_state_request_t *req)
798 {
799 	char	*str = "i_cpr_restore_apic";
800 
801 	if (ncpus == 1) {
802 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
803 		return (0);
804 	}
805 
806 	req->psr_cmd = PSM_STATE_RESTORE;
807 	return ((*psm_state)(req));
808 }
809 
810 
811 /* stop lint complaining about offset not being used in 32bit mode */
812 #if !defined(__amd64)
813 /*ARGSUSED*/
814 #endif
815 static void
816 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
817 {
818 	/*LINTED*/
819 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
820 
821 	/*
822 	 * Fill up the real mode platter to make it easy for real mode code to
823 	 * kick it off. This area should really be one passed by boot to kernel
824 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
825 	 * have identical physical and virtual address in paged mode.
826 	 */
827 
828 	real_mode_platter->rm_pdbr = getcr3();
829 	real_mode_platter->rm_cpu = cpun;
830 	real_mode_platter->rm_cr4 = cr4;
831 
832 	real_mode_platter->rm_gdt_base = gdt.base;
833 	real_mode_platter->rm_gdt_lim = gdt.limit;
834 
835 #if defined(__amd64)
836 	real_mode_platter->rm_x86feature = x86_feature;
837 
838 	if (getcr3() > 0xffffffffUL)
839 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
840 		    "located above 4G in physical memory (@ 0x%llx).",
841 		    (unsigned long long)getcr3());
842 
843 	/*
844 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
845 	 * by code in real_mode_start():
846 	 *
847 	 * GDT[0]:  NULL selector
848 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
849 	 *
850 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
851 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
852 	 * a course of action as any other, though it may cause the entire
853 	 * platform to reset in some cases...
854 	 */
855 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
856 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
857 
858 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
859 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
860 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
861 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
862 
863 	real_mode_platter->rm_temp_idt_lim = 0;
864 	real_mode_platter->rm_temp_idt_base = 0;
865 
866 	/*
867 	 * Since the CPU needs to jump to protected mode using an identity
868 	 * mapped address, we need to calculate it here.
869 	 */
870 	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
871 #endif	/* __amd64 */
872 
873 	/* return; */
874 }
875 
876 void
877 i_cpr_start_cpu(void)
878 {
879 
880 	struct cpu *cp = CPU;
881 
882 	char *str = "i_cpr_start_cpu";
883 	extern void init_cpu_syscall(struct cpu *cp);
884 
885 #if defined(__amd64)
886 	wc_cpu_t	*cpup = wc_other_cpus + cp->cpu_id;
887 #endif	/*	__amd64	*/
888 
889 	PMD(PMD_SX, ("%s() called\n", str))
890 
891 	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
892 	    cp->cpu_base_spl))
893 
894 	mutex_enter(&cpu_lock);
895 	if (cp == i_cpr_bootcpu()) {
896 		mutex_exit(&cpu_lock);
897 		PMD(PMD_SX,
898 		    ("%s() called on bootcpu nothing to do!\n", str))
899 		return;
900 	}
901 	mutex_exit(&cpu_lock);
902 
903 	/*
904 	 * We need to Sync PAT with cpu0's PAT. We have to do
905 	 * this with interrupts disabled.
906 	 */
907 	if (x86_feature & X86_PAT)
908 		pat_sync();
909 
910 	/*
911 	 * Initialize this CPU's syscall handlers
912 	 */
913 	init_cpu_syscall(cp);
914 
915 	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
916 
917 	/*
918 	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
919 	 * init_cpu_info(), since the work that they do is only needed to
920 	 * be done once at boot time
921 	 */
922 
923 
924 	mutex_enter(&cpu_lock);
925 
926 #if defined(__amd64)
927 	restore_stack(cpup);
928 #endif	/*	__amd64	*/
929 
930 	CPUSET_ADD(procset, cp->cpu_id);
931 	mutex_exit(&cpu_lock);
932 
933 	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
934 	    cp->cpu_base_spl))
935 
936 	if (tsc_gethrtime_enable) {
937 		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
938 		tsc_sync_slave();
939 	}
940 
941 	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
942 	    cp->cpu_id, cp->cpu_intr_actv))
943 	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
944 	    cp->cpu_base_spl))
945 
946 	(void) spl0();		/* enable interrupts */
947 
948 	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
949 	    cp->cpu_base_spl))
950 
951 	/*
952 	 * Set up the CPU module for this CPU.  This can't be done before
953 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
954 	 * need to go load another CPU module.  The act of attempting to load
955 	 * a module may trigger a cross-call, which will ASSERT unless this
956 	 * cpu is CPU_READY.
957 	 */
958 
959 	/*
960 	 * cmi already been init'd (during boot), so do not need to do it again
961 	 */
962 #ifdef PM_REINITMCAONRESUME
963 	if (x86_feature & X86_MCA)
964 		cmi_mca_init();
965 #endif
966 
967 	PMD(PMD_SX, ("%s() returning\n", str))
968 
969 	/* return; */
970 }
971 
972 #if defined(__amd64)
973 /*
974  * we only need to do this for amd64!
975  */
976 
977 /*
978  * save the stack
979  */
980 void
981 save_stack(wc_cpu_t *cpup)
982 {
983 	char *str = "save_stack";
984 	caddr_t base = curthread->t_stk;
985 	caddr_t sp = (caddr_t)cpup->wc_rsp;
986 
987 
988 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
989 	PMD(PMD_SX, ("save_stack() curthread->t_stk = %p, sp = %p\n",
990 	    (void *)base, (void *)sp))
991 
992 	ASSERT(base > sp);
993 	/*LINTED*/
994 	bcopy(sp, cpup->wc_stack, base - sp);
995 
996 }
997 
998 /*
999  * restore the stack
1000  */
1001 static	void
1002 restore_stack(wc_cpu_t *cpup)
1003 {
1004 	/*
1005 	 * we only need to do this for amd64!
1006 	 */
1007 
1008 	char *str = "restore_stack";
1009 	caddr_t base = curthread->t_stk;
1010 	caddr_t sp = (caddr_t)cpup->wc_rsp;
1011 
1012 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1013 	PMD(PMD_SX, ("%s() curthread->t_stk = %p, sp = %p\n", str,
1014 	    (void *)base, (void *)sp))
1015 
1016 	ASSERT(base > sp);
1017 	/*LINTED*/
1018 	bcopy(cpup->wc_stack, sp, base - sp);
1019 
1020 }
1021 
1022 #endif	/*	__amd64	*/
1023 
1024 
1025 void
1026 i_cpr_alloc_cpus(void)
1027 {
1028 	char *str = "i_cpr_alloc_cpus";
1029 
1030 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1031 	/*
1032 	 * we allocate this only when we actually need it to save on
1033 	 * kernel memory
1034 	 */
1035 
1036 	if (wc_other_cpus == NULL) {
1037 		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
1038 		    KM_SLEEP);
1039 	}
1040 
1041 }
1042 
1043 void
1044 i_cpr_free_cpus(void)
1045 {
1046 	if (wc_other_cpus != NULL) {
1047 		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
1048 		wc_other_cpus = NULL;
1049 	}
1050 }
1051 
1052 /*
1053  * wrapper for acpica_ddi_save_resources()
1054  */
1055 void
1056 i_cpr_save_configuration(dev_info_t *dip)
1057 {
1058 	acpica_ddi_save_resources(dip);
1059 }
1060 
1061 /*
1062  * wrapper for acpica_ddi_restore_resources()
1063  */
1064 void
1065 i_cpr_restore_configuration(dev_info_t *dip)
1066 {
1067 	acpica_ddi_restore_resources(dip);
1068 }
1069 
1070 static int
1071 wait_for_set(cpuset_t *set, int who)
1072 {
1073 	int delays;
1074 	char *str = "wait_for_set";
1075 
1076 	for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1077 		if (delays == 500) {
1078 			/*
1079 			 * After five seconds, things are probably
1080 			 * looking a bit bleak - explain the hang.
1081 			 */
1082 			cmn_err(CE_NOTE, "cpu%d: started, "
1083 			    "but not running in the kernel yet", who);
1084 			PMD(PMD_SX, ("%s() %d cpu started "
1085 			    "but not running in the kernel yet\n",
1086 			    str, who))
1087 		} else if (delays > 2000) {
1088 			/*
1089 			 * We waited at least 20 seconds, bail ..
1090 			 */
1091 			cmn_err(CE_WARN, "cpu%d: timed out", who);
1092 			PMD(PMD_SX, ("%s() %d cpu timed out\n",
1093 			    str, who))
1094 			return (0);
1095 		}
1096 
1097 		/*
1098 		 * wait at least 10ms, then check again..
1099 		 */
1100 		drv_usecwait(10000);
1101 	}
1102 
1103 	return (1);
1104 }
1105