xref: /titanic_44/usr/src/uts/i86pc/os/cpr_impl.c (revision a194faf8907a6722dcf10ad16c6ca72c9b7bd0ba)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Platform specific implementation code
30  * Currently only suspend to RAM is supported (ACPI S3)
31  */
32 
33 #define	SUNDDI_IMPL
34 
35 #include <sys/types.h>
36 #include <sys/promif.h>
37 #include <sys/prom_isa.h>
38 #include <sys/prom_plat.h>
39 #include <sys/cpuvar.h>
40 #include <sys/pte.h>
41 #include <vm/hat.h>
42 #include <vm/page.h>
43 #include <vm/as.h>
44 #include <sys/cpr.h>
45 #include <sys/kmem.h>
46 #include <sys/clock.h>
47 #include <sys/kmem.h>
48 #include <sys/panic.h>
49 #include <vm/seg_kmem.h>
50 #include <sys/cpu_module.h>
51 #include <sys/callb.h>
52 #include <sys/machsystm.h>
53 #include <sys/vmsystm.h>
54 #include <sys/systm.h>
55 #include <sys/archsystm.h>
56 #include <sys/stack.h>
57 #include <sys/fs/ufs_fs.h>
58 #include <sys/memlist.h>
59 #include <sys/bootconf.h>
60 #include <sys/thread.h>
61 #include <sys/x_call.h>
62 #include <sys/smp_impldefs.h>
63 #include <vm/vm_dep.h>
64 #include <sys/psm.h>
65 #include <sys/epm.h>
66 #include <sys/cpr_wakecode.h>
67 #include <sys/x86_archext.h>
68 #include <sys/reboot.h>
69 #include <sys/acpi/acpi.h>
70 #include <sys/acpica.h>
71 
72 #define	AFMT	"%lx"
73 
74 extern int	flushes_require_xcalls;
75 extern cpuset_t	cpu_ready_set;
76 
77 #if defined(__amd64)
78 extern void	*wc_long_mode_64(void);
79 #endif	/* __amd64 */
80 extern int	tsc_gethrtime_enable;
81 extern	void	i_cpr_start_cpu(void);
82 
83 ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
84 void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
85 
86 static wc_cpu_t	*wc_other_cpus = NULL;
87 static cpuset_t procset = 1;
88 
89 static void
90 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
91 
92 static int i_cpr_platform_alloc(psm_state_request_t *req);
93 static void i_cpr_platform_free(psm_state_request_t *req);
94 static int i_cpr_save_apic(psm_state_request_t *req);
95 static int i_cpr_restore_apic(psm_state_request_t *req);
96 
97 #if defined(__amd64)
98 static void restore_stack(wc_cpu_t *cpup);
99 static void save_stack(wc_cpu_t *cpup);
100 void (*save_stack_func)(wc_cpu_t *) = save_stack;
101 #endif	/* __amd64 */
102 
103 /*
104  * restart paused slave cpus
105  */
106 void
107 i_cpr_machdep_setup(void)
108 {
109 	if (ncpus > 1) {
110 		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
111 		mutex_enter(&cpu_lock);
112 		start_cpus();
113 		mutex_exit(&cpu_lock);
114 	}
115 }
116 
117 
118 /*
119  * Stop all interrupt activities in the system
120  */
121 void
122 i_cpr_stop_intr(void)
123 {
124 	(void) spl7();
125 }
126 
127 /*
128  * Set machine up to take interrupts
129  */
130 void
131 i_cpr_enable_intr(void)
132 {
133 	(void) spl0();
134 }
135 
136 /*
137  * Save miscellaneous information which needs to be written to the
138  * state file.  This information is required to re-initialize
139  * kernel/prom handshaking.
140  */
141 void
142 i_cpr_save_machdep_info(void)
143 {
144 	int notcalled = 0;
145 	ASSERT(notcalled);
146 }
147 
148 
149 void
150 i_cpr_set_tbr(void)
151 {
152 }
153 
154 
155 processorid_t
156 i_cpr_bootcpuid(void)
157 {
158 	return (0);
159 }
160 
161 /*
162  * cpu0 should contain bootcpu info
163  */
164 cpu_t *
165 i_cpr_bootcpu(void)
166 {
167 	ASSERT(MUTEX_HELD(&cpu_lock));
168 
169 	return (cpu_get(i_cpr_bootcpuid()));
170 }
171 
172 /*
173  *	Save context for the specified CPU
174  */
175 void *
176 i_cpr_save_context(void *arg)
177 {
178 	long	index = (long)arg;
179 	psm_state_request_t *papic_state;
180 	int resuming;
181 	int	ret;
182 
183 	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
184 
185 	ASSERT(index < NCPU);
186 
187 	papic_state = &(wc_other_cpus + index)->wc_apic_state;
188 
189 	ret = i_cpr_platform_alloc(papic_state);
190 	ASSERT(ret == 0);
191 
192 	ret = i_cpr_save_apic(papic_state);
193 	ASSERT(ret == 0);
194 
195 	/*
196 	 * wc_save_context returns twice, once when susending and
197 	 * once when resuming,  wc_save_context() returns 0 when
198 	 * suspending and non-zero upon resume
199 	 */
200 	resuming = (wc_save_context(wc_other_cpus + index) == 0);
201 
202 	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
203 	    resuming))
204 
205 	/*
206 	 * do NOT call any functions after this point, because doing so
207 	 * will modify the stack that we are running on
208 	 */
209 
210 	if (resuming) {
211 
212 		ret = i_cpr_restore_apic(papic_state);
213 		ASSERT(ret == 0);
214 
215 		i_cpr_platform_free(papic_state);
216 
217 		/*
218 		 * Setting the bit in cpu_ready_set must be the last operation
219 		 * in processor initialization; the boot CPU will continue to
220 		 * boot once it sees this bit set for all active CPUs.
221 		 */
222 		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
223 
224 		PMD(PMD_SX,
225 		    ("cpu_release() cpu_ready_set = %lx, CPU->cpu_id = %d\n",
226 		    cpu_ready_set, CPU->cpu_id))
227 	}
228 	return (NULL);
229 }
230 
231 static ushort_t *warm_reset_vector = NULL;
232 
233 static ushort_t *
234 map_warm_reset_vector()
235 {
236 	/*LINTED*/
237 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
238 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
239 		return (NULL);
240 
241 	/*
242 	 * setup secondary cpu bios boot up vector
243 	 */
244 	*warm_reset_vector = (ushort_t)((caddr_t)
245 	    /*LINTED*/
246 	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
247 	    + ((ulong_t)rm_platter_va & 0xf));
248 	warm_reset_vector++;
249 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
250 
251 	--warm_reset_vector;
252 	return (warm_reset_vector);
253 }
254 
255 void
256 i_cpr_pre_resume_cpus()
257 {
258 	/*
259 	 * this is a cut down version of start_other_cpus()
260 	 * just do the initialization to wake the other cpus
261 	 */
262 	unsigned who;
263 	int cpuid = i_cpr_bootcpuid();
264 	int started_cpu;
265 	uint32_t		code_length = 0;
266 	caddr_t			wakevirt = rm_platter_va;
267 	/*LINTED*/
268 	wakecode_t		*wp = (wakecode_t *)wakevirt;
269 	char *str = "i_cpr_pre_resume_cpus";
270 	extern int get_tsc_ready();
271 	int err;
272 
273 	/*LINTED*/
274 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
275 
276 	/*
277 	 * Copy the real mode code at "real_mode_start" to the
278 	 * page at rm_platter_va.
279 	 */
280 	warm_reset_vector = map_warm_reset_vector();
281 	if (warm_reset_vector == NULL) {
282 		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
283 		return;
284 	}
285 
286 	flushes_require_xcalls = 1;
287 
288 	/*
289 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
290 	 * do their TSC syncs with the same CPU.
291 	 */
292 
293 	affinity_set(CPU_CURRENT);
294 
295 	cpu_ready_set = 0;
296 
297 	for (who = 0; who < ncpus; who++) {
298 
299 		wc_cpu_t	*cpup = wc_other_cpus + who;
300 		wc_desctbr_t	gdt;
301 
302 		if (who == cpuid)
303 			continue;
304 
305 		if (!CPU_IN_SET(mp_cpus, who))
306 			continue;
307 
308 		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
309 
310 		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
311 
312 		gdt.base = cpup->wc_gdt_base;
313 		gdt.limit = cpup->wc_gdt_limit;
314 
315 #if defined(__amd64)
316 		code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
317 #else
318 		code_length = 0;
319 #endif
320 
321 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
322 
323 		started_cpu = 1;
324 
325 		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
326 			cmn_err(CE_WARN, "cpu%d: failed to start during "
327 			    "suspend/resume error %d", who, err);
328 			continue;
329 		}
330 
331 		PMD(PMD_SX, ("%s() #1 waiting for procset 0x%lx\n", str,
332 		    (ulong_t)procset))
333 
334 /*
335  * This conditional compile only affects the MP case.
336  */
337 #ifdef	MP_PM
338 		for (delays = 0; !CPU_IN_SET(procset, who); delays++) {
339 			if (delays == 500) {
340 				/*
341 				 * After five seconds, things are probably
342 				 * looking a bit bleak - explain the hang.
343 				 */
344 				cmn_err(CE_NOTE, "cpu%d: started, "
345 				    "but not running in the kernel yet", who);
346 				PMD(PMD_SX, ("%s() %d cpu started "
347 				    "but not running in the kernel yet\n",
348 				    str, who))
349 			} else if (delays > 2000) {
350 				/*
351 				 * We waited at least 20 seconds, bail ..
352 				 */
353 				cmn_err(CE_WARN, "cpu%d: timed out", who);
354 				PMD(PMD_SX, ("%s() %d cpu timed out\n",
355 				    str, who))
356 				started_cpu = 0;
357 			}
358 
359 			/*
360 			 * wait at least 10ms, then check again..
361 			 */
362 			delay(USEC_TO_TICK_ROUNDUP(10000));
363 		}
364 #else
365 		while (!CPU_IN_SET(procset, who)) {
366 			;
367 		}
368 
369 #endif	/*	MP_PM	*/
370 
371 		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
372 
373 		if (!started_cpu)
374 			continue;
375 
376 		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str,
377 		    get_tsc_ready()))
378 
379 		if (tsc_gethrtime_enable) {
380 			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
381 			tsc_sync_master(who);
382 		}
383 
384 
385 		PMD(PMD_SX, ("%s() waiting for cpu_ready_set %ld\n", str,
386 		    cpu_ready_set))
387 		/*
388 		 * Wait for cpu to declare that it is ready, we want the
389 		 * cpus to start serially instead of in parallel, so that
390 		 * they do not contend with each other in wc_rm_start()
391 		 */
392 		while (!CPU_IN_SET(cpu_ready_set, who)) {
393 			PMD(PMD_SX, ("%s() waiting for "
394 			    "cpu_ready_set %ld\n", str, cpu_ready_set))
395 			;
396 		}
397 
398 		/*
399 		 * do not need to re-initialize dtrace using dtrace_cpu_init
400 		 * function
401 		 */
402 		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
403 	}
404 
405 	affinity_clear();
406 
407 	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
408 }
409 
410 static void
411 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
412 {
413 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
414 }
415 
416 /*
417  * We need to setup a 1:1 (virtual to physical) mapping for the
418  * page containing the wakeup code.
419  */
420 static struct as *save_as;	/* when switching to kas */
421 
422 static void
423 unmap_wakeaddr_1to1(uint64_t wakephys)
424 {
425 	uintptr_t	wp = (uintptr_t)wakephys;
426 	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
427 	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
428 }
429 
430 void
431 i_cpr_post_resume_cpus()
432 {
433 	uint64_t	wakephys = rm_platter_pa;
434 
435 	if (warm_reset_vector != NULL)
436 		unmap_warm_reset_vector(warm_reset_vector);
437 
438 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
439 	    HAT_UNLOAD);
440 
441 	/*
442 	 * cmi_post_mpstartup() is only required upon boot not upon
443 	 * resume from RAM
444 	 */
445 
446 	PT(PT_UNDO1to1);
447 	/* Tear down 1:1 mapping for wakeup code */
448 	unmap_wakeaddr_1to1(wakephys);
449 }
450 
451 /* ARGSUSED */
452 void
453 i_cpr_handle_xc(int flag)
454 {
455 }
456 
457 int
458 i_cpr_reusable_supported(void)
459 {
460 	return (0);
461 }
462 static void
463 map_wakeaddr_1to1(uint64_t wakephys)
464 {
465 	uintptr_t	wp = (uintptr_t)wakephys;
466 	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
467 	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
468 	    HAT_LOAD);
469 	save_as = curthread->t_procp->p_as;
470 	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
471 }
472 
473 
474 void
475 prt_other_cpus()
476 {
477 	int	who;
478 
479 	if (ncpus == 1) {
480 		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
481 		    "uniprocessor machine\n"))
482 		return;
483 	}
484 
485 	for (who = 0; who < ncpus; who++) {
486 
487 		wc_cpu_t	*cpup = wc_other_cpus + who;
488 
489 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
490 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
491 		    AFMT ", sp=%lx\n", who,
492 		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
493 		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
494 		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
495 		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
496 	}
497 }
498 
499 /*
500  * Power down the system.
501  */
502 int
503 i_cpr_power_down(int sleeptype)
504 {
505 	caddr_t		wakevirt = rm_platter_va;
506 	uint64_t	wakephys = rm_platter_pa;
507 	uint_t		saved_intr;
508 	uint32_t	code_length = 0;
509 	wc_desctbr_t	gdt;
510 	/*LINTED*/
511 	wakecode_t	*wp = (wakecode_t *)wakevirt;
512 	/*LINTED*/
513 	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
514 	wc_cpu_t	*cpup = &(wp->wc_cpu);
515 	dev_info_t	*ppm;
516 	int		ret = 0;
517 	power_req_t	power_req;
518 	char *str =	"i_cpr_power_down";
519 #if defined(__amd64)
520 	/*LINTED*/
521 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
522 #endif
523 	extern int	cpr_suspend_succeeded;
524 	extern void	kernel_wc_code();
525 	extern ulong_t	intr_clear(void);
526 	extern void	intr_restore(ulong_t);
527 
528 	ASSERT(sleeptype == CPR_TORAM);
529 	ASSERT(CPU->cpu_id == 0);
530 
531 	if ((ppm = PPM(ddi_root_node())) == NULL) {
532 		PMD(PMD_SX, ("%s: root node not claimed\n", str))
533 		return (ENOTTY);
534 	}
535 
536 	PMD(PMD_SX, ("Entering %s()\n", str))
537 
538 	PT(PT_IC);
539 	saved_intr = intr_clear();
540 
541 	PT(PT_1to1);
542 	/* Setup 1:1 mapping for wakeup code */
543 	map_wakeaddr_1to1(wakephys);
544 
545 	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
546 
547 	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
548 	    ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
549 
550 	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
551 	    (void *)wakevirt, (uint_t)wakephys))
552 
553 	ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
554 	    WC_CODESIZE);
555 
556 	bzero(wakevirt, PAGESIZE);
557 
558 	/* Copy code to rm_platter */
559 	bcopy((caddr_t)wc_rm_start, wakevirt,
560 	    (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
561 
562 	prt_other_cpus();
563 
564 #if defined(__amd64)
565 
566 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
567 	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
568 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
569 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
570 
571 	real_mode_platter->rm_cr4 = getcr4();
572 	real_mode_platter->rm_pdbr = getcr3();
573 
574 	rmp_gdt_init(real_mode_platter);
575 
576 	/*
577 	 * Since the CPU needs to jump to protected mode using an identity
578 	 * mapped address, we need to calculate it here.
579 	 */
580 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
581 	    ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
582 
583 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
584 	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
585 
586 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
587 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
588 
589 	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
590 	    (ulong_t)real_mode_platter->rm_longmode64_addr))
591 
592 #endif
593 
594 	PMD(PMD_SX, ("mp_cpus=%lx\n", (ulong_t)mp_cpus))
595 
596 	PT(PT_SC);
597 	if (wc_save_context(cpup)) {
598 
599 		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
600 		if (ret != 0)
601 			return (ret);
602 
603 		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
604 		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
605 		if (ret != 0)
606 			return (ret);
607 
608 		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
609 		    (uint_t)wakephys, (void *)&kernel_wc_code))
610 		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
611 		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
612 		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
613 		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
614 		    cpup->wc_esp))
615 		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
616 		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
617 		    (long)cpup->wc_cr4))
618 		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
619 		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
620 		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
621 		    (long)cpup->wc_eflags))
622 
623 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
624 		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
625 		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
626 		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
627 		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
628 
629 		gdt.base = cpup->wc_gdt_base;
630 		gdt.limit = cpup->wc_gdt_limit;
631 
632 #if defined(__amd64)
633 		code_length = (uint32_t)wc_long_mode_64 -
634 		    (uint32_t)wc_rm_start;
635 #else
636 		code_length = 0;
637 #endif
638 
639 		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
640 
641 #if defined(__amd64)
642 		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
643 		    (ulong_t)wcpp->rm_cr4, getcr4()))
644 
645 		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
646 		    (ulong_t)wcpp->rm_pdbr, getcr3()))
647 
648 		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
649 		    (ulong_t)wcpp->rm_longmode64_addr))
650 
651 		PMD(PMD_SX,
652 		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
653 		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
654 #endif
655 
656 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
657 		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
658 		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
659 		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
660 		    (long)cpup->wc_kgsbase))
661 
662 		power_req.request_type = PMR_PPM_ENTER_SX;
663 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
664 		power_req.req.ppm_power_enter_sx_req.test_point =
665 		    cpr_test_point;
666 		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
667 
668 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
669 		PT(PT_PPMCTLOP);
670 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
671 		    &power_req, &ret);
672 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
673 
674 		/*
675 		 * If it works, we get control back to the else branch below
676 		 * If we get control back here, it didn't work.
677 		 * XXX return EINVAL here?
678 		 */
679 
680 		unmap_wakeaddr_1to1(wakephys);
681 		intr_restore(saved_intr);
682 
683 		return (ret);
684 	} else {
685 		cpr_suspend_succeeded = 1;
686 
687 		power_req.request_type = PMR_PPM_EXIT_SX;
688 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
689 
690 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
691 		PT(PT_PPMCTLOP);
692 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
693 		    &power_req, &ret);
694 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
695 
696 		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
697 		/*
698 		 * the restore should never fail, if the saved suceeded
699 		 */
700 		ASSERT(ret == 0);
701 
702 		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
703 
704 		PT(PT_INTRRESTORE);
705 		intr_restore(saved_intr);
706 		PT(PT_CPU);
707 
708 		return (ret);
709 	}
710 }
711 
712 /*
713  * Stop all other cpu's before halting or rebooting. We pause the cpu's
714  * instead of sending a cross call.
715  * Stolen from sun4/os/mp_states.c
716  */
717 
718 static int cpu_are_paused;	/* sic */
719 
720 void
721 i_cpr_stop_other_cpus(void)
722 {
723 	mutex_enter(&cpu_lock);
724 	if (cpu_are_paused) {
725 		mutex_exit(&cpu_lock);
726 		return;
727 	}
728 	pause_cpus(NULL);
729 	cpu_are_paused = 1;
730 
731 	mutex_exit(&cpu_lock);
732 }
733 
734 int
735 i_cpr_is_supported(int sleeptype)
736 {
737 	extern int cpr_supported_override;
738 	extern int cpr_platform_enable;
739 	extern int pm_S3_enabled;
740 
741 	if (sleeptype != CPR_TORAM)
742 		return (0);
743 
744 	/*
745 	 * The next statement tests if a specific platform has turned off
746 	 * cpr support.
747 	 */
748 	if (cpr_supported_override)
749 		return (0);
750 
751 	/*
752 	 * If a platform has specifically turned on cpr support ...
753 	 */
754 	if (cpr_platform_enable)
755 		return (1);
756 
757 	return (pm_S3_enabled);
758 }
759 
760 void
761 i_cpr_bitmap_cleanup(void)
762 {
763 }
764 
765 void
766 i_cpr_free_memory_resources(void)
767 {
768 }
769 
770 /*
771  * Needed only for S3 so far
772  */
773 static int
774 i_cpr_platform_alloc(psm_state_request_t *req)
775 {
776 	char	*str = "i_cpr_platform_alloc";
777 
778 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
779 
780 	if (ncpus == 1) {
781 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
782 		return (0);
783 	}
784 
785 	req->psr_cmd = PSM_STATE_ALLOC;
786 	return ((*psm_state)(req));
787 }
788 
789 /*
790  * Needed only for S3 so far
791  */
792 static void
793 i_cpr_platform_free(psm_state_request_t *req)
794 {
795 	char	*str = "i_cpr_platform_free";
796 
797 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
798 
799 	if (ncpus == 1) {
800 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
801 	}
802 
803 	req->psr_cmd = PSM_STATE_FREE;
804 	(void) (*psm_state)(req);
805 }
806 
807 static int
808 i_cpr_save_apic(psm_state_request_t *req)
809 {
810 	char	*str = "i_cpr_save_apic";
811 
812 	if (ncpus == 1) {
813 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
814 		return (0);
815 	}
816 
817 	req->psr_cmd = PSM_STATE_SAVE;
818 	return ((*psm_state)(req));
819 }
820 
821 static int
822 i_cpr_restore_apic(psm_state_request_t *req)
823 {
824 	char	*str = "i_cpr_restore_apic";
825 
826 	if (ncpus == 1) {
827 		PMD(PMD_SX, ("%s() : ncpus == 1\n", str))
828 		return (0);
829 	}
830 
831 	req->psr_cmd = PSM_STATE_RESTORE;
832 	return ((*psm_state)(req));
833 }
834 
835 
836 /* stop lint complaining about offset not being used in 32bit mode */
837 #if !defined(__amd64)
838 /*ARGSUSED*/
839 #endif
840 static void
841 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
842 {
843 	/*LINTED*/
844 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
845 
846 	/*
847 	 * Fill up the real mode platter to make it easy for real mode code to
848 	 * kick it off. This area should really be one passed by boot to kernel
849 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
850 	 * have identical physical and virtual address in paged mode.
851 	 */
852 
853 	real_mode_platter->rm_pdbr = getcr3();
854 	real_mode_platter->rm_cpu = cpun;
855 	real_mode_platter->rm_cr4 = cr4;
856 
857 	real_mode_platter->rm_gdt_base = gdt.base;
858 	real_mode_platter->rm_gdt_lim = gdt.limit;
859 
860 #if defined(__amd64)
861 	real_mode_platter->rm_x86feature = x86_feature;
862 
863 	if (getcr3() > 0xffffffffUL)
864 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
865 		    "located above 4G in physical memory (@ 0x%llx).",
866 		    (unsigned long long)getcr3());
867 
868 	/*
869 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
870 	 * by code in real_mode_start():
871 	 *
872 	 * GDT[0]:  NULL selector
873 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
874 	 *
875 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
876 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
877 	 * a course of action as any other, though it may cause the entire
878 	 * platform to reset in some cases...
879 	 */
880 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
881 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
882 
883 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
884 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
885 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
886 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
887 
888 	real_mode_platter->rm_temp_idt_lim = 0;
889 	real_mode_platter->rm_temp_idt_base = 0;
890 
891 	/*
892 	 * Since the CPU needs to jump to protected mode using an identity
893 	 * mapped address, we need to calculate it here.
894 	 */
895 	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
896 #endif	/* __amd64 */
897 
898 	/* return; */
899 }
900 
901 void
902 i_cpr_start_cpu(void)
903 {
904 
905 	struct cpu *cp = CPU;
906 
907 	char *str = "i_cpr_start_cpu";
908 	extern void init_cpu_syscall(struct cpu *cp);
909 
910 #if defined(__amd64)
911 	wc_cpu_t	*cpup = wc_other_cpus + cp->cpu_id;
912 #endif	/*	__amd64	*/
913 
914 	PMD(PMD_SX, ("%s() called\n", str))
915 
916 	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
917 	    cp->cpu_base_spl))
918 
919 	mutex_enter(&cpu_lock);
920 	if (cp == i_cpr_bootcpu()) {
921 		mutex_exit(&cpu_lock);
922 		PMD(PMD_SX,
923 		    ("%s() called on bootcpu nothing to do!\n", str))
924 		return;
925 	}
926 	mutex_exit(&cpu_lock);
927 
928 	/*
929 	 * We need to Sync PAT with cpu0's PAT. We have to do
930 	 * this with interrupts disabled.
931 	 */
932 	if (x86_feature & X86_PAT)
933 		pat_sync();
934 
935 	/*
936 	 * Initialize this CPU's syscall handlers
937 	 */
938 	init_cpu_syscall(cp);
939 
940 	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
941 
942 	/*
943 	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
944 	 * init_cpu_info(), since the work that they do is only needed to
945 	 * be done once at boot time
946 	 */
947 
948 
949 	mutex_enter(&cpu_lock);
950 
951 #if defined(__amd64)
952 	restore_stack(cpup);
953 #endif	/*	__amd64	*/
954 
955 	CPUSET_ADD(procset, cp->cpu_id);
956 	mutex_exit(&cpu_lock);
957 
958 	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
959 	    cp->cpu_base_spl))
960 
961 	/* XXX remove before integration */
962 	PMD(PMD_SX, ("%s() procset 0x%lx\n", str, (ulong_t)procset))
963 
964 	if (tsc_gethrtime_enable) {
965 		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
966 		tsc_sync_slave();
967 	}
968 
969 	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
970 	    cp->cpu_id, cp->cpu_intr_actv))
971 	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
972 	    cp->cpu_base_spl))
973 
974 	(void) spl0();		/* enable interrupts */
975 
976 	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
977 	    cp->cpu_base_spl))
978 
979 	/*
980 	 * Set up the CPU module for this CPU.  This can't be done before
981 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
982 	 * need to go load another CPU module.  The act of attempting to load
983 	 * a module may trigger a cross-call, which will ASSERT unless this
984 	 * cpu is CPU_READY.
985 	 */
986 
987 	/*
988 	 * cmi already been init'd (during boot), so do not need to do it again
989 	 */
990 #ifdef PM_REINITMCAONRESUME
991 	if (x86_feature & X86_MCA)
992 		cmi_mca_init();
993 #endif
994 
995 	PMD(PMD_SX, ("%s() returning\n", str))
996 
997 	/* return; */
998 }
999 
1000 #if defined(__amd64)
1001 /*
1002  * we only need to do this for amd64!
1003  */
1004 
1005 /*
1006  * save the stack
1007  */
1008 void
1009 save_stack(wc_cpu_t *cpup)
1010 {
1011 	char *str = "save_stack";
1012 	caddr_t base = curthread->t_stk;
1013 	caddr_t sp = (caddr_t)cpup->wc_rsp;
1014 
1015 
1016 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1017 	PMD(PMD_SX, ("save_stack() curthread->t_stk = %p, sp = %p\n",
1018 	    (void *)base, (void *)sp))
1019 
1020 	ASSERT(base > sp);
1021 	/*LINTED*/
1022 	bcopy(sp, cpup->wc_stack, base - sp);
1023 
1024 }
1025 
1026 /*
1027  * restore the stack
1028  */
1029 static	void
1030 restore_stack(wc_cpu_t *cpup)
1031 {
1032 	/*
1033 	 * we only need to do this for amd64!
1034 	 */
1035 
1036 	char *str = "restore_stack";
1037 	caddr_t base = curthread->t_stk;
1038 	caddr_t sp = (caddr_t)cpup->wc_rsp;
1039 
1040 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1041 	PMD(PMD_SX, ("%s() curthread->t_stk = %p, sp = %p\n", str,
1042 	    (void *)base, (void *)sp))
1043 
1044 	ASSERT(base > sp);
1045 	/*LINTED*/
1046 	bcopy(cpup->wc_stack, sp, base - sp);
1047 
1048 }
1049 
1050 #endif	/*	__amd64	*/
1051 
1052 
1053 void
1054 i_cpr_alloc_cpus(void)
1055 {
1056 	char *str = "i_cpr_alloc_cpus";
1057 
1058 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1059 	/*
1060 	 * we allocate this only when we actually need it to save on
1061 	 * kernel memory
1062 	 */
1063 
1064 	if (wc_other_cpus == NULL) {
1065 		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
1066 		    KM_SLEEP);
1067 	}
1068 
1069 }
1070 
1071 void
1072 i_cpr_free_cpus(void)
1073 {
1074 	if (wc_other_cpus != NULL) {
1075 		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
1076 		wc_other_cpus = NULL;
1077 	}
1078 }
1079 
1080 /*
1081  * wrapper for acpica_ddi_save_resources()
1082  */
1083 void
1084 i_cpr_save_configuration(dev_info_t *dip)
1085 {
1086 	acpica_ddi_save_resources(dip);
1087 }
1088 
1089 /*
1090  * wrapper for acpica_ddi_restore_resources()
1091  */
1092 void
1093 i_cpr_restore_configuration(dev_info_t *dip)
1094 {
1095 	acpica_ddi_restore_resources(dip);
1096 }
1097