xref: /titanic_44/usr/src/uts/i86pc/os/cpr_impl.c (revision 8793b36b40d14ad0a0fecc97738dc118a928f46c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Platform specific implementation code
28  * Currently only suspend to RAM is supported (ACPI S3)
29  */
30 
31 #define	SUNDDI_IMPL
32 
33 #include <sys/types.h>
34 #include <sys/promif.h>
35 #include <sys/prom_isa.h>
36 #include <sys/prom_plat.h>
37 #include <sys/cpuvar.h>
38 #include <sys/pte.h>
39 #include <vm/hat.h>
40 #include <vm/page.h>
41 #include <vm/as.h>
42 #include <sys/cpr.h>
43 #include <sys/kmem.h>
44 #include <sys/clock.h>
45 #include <sys/kmem.h>
46 #include <sys/panic.h>
47 #include <vm/seg_kmem.h>
48 #include <sys/cpu_module.h>
49 #include <sys/callb.h>
50 #include <sys/machsystm.h>
51 #include <sys/vmsystm.h>
52 #include <sys/systm.h>
53 #include <sys/archsystm.h>
54 #include <sys/stack.h>
55 #include <sys/fs/ufs_fs.h>
56 #include <sys/memlist.h>
57 #include <sys/bootconf.h>
58 #include <sys/thread.h>
59 #include <sys/x_call.h>
60 #include <sys/smp_impldefs.h>
61 #include <vm/vm_dep.h>
62 #include <sys/psm.h>
63 #include <sys/epm.h>
64 #include <sys/cpr_wakecode.h>
65 #include <sys/x86_archext.h>
66 #include <sys/reboot.h>
67 #include <sys/acpi/acpi.h>
68 #include <sys/acpica.h>
69 
70 #define	AFMT	"%lx"
71 
72 extern int	flushes_require_xcalls;
73 extern cpuset_t	cpu_ready_set;
74 
75 #if defined(__amd64)
76 extern void	*wc_long_mode_64(void);
77 #endif	/* __amd64 */
78 extern int	tsc_gethrtime_enable;
79 extern	void	i_cpr_start_cpu(void);
80 
81 ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
82 void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
83 
84 static wc_cpu_t	*wc_other_cpus = NULL;
85 static cpuset_t procset;
86 
87 static void
88 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
89 
90 static int i_cpr_platform_alloc(psm_state_request_t *req);
91 static void i_cpr_platform_free(psm_state_request_t *req);
92 static int i_cpr_save_apic(psm_state_request_t *req);
93 static int i_cpr_restore_apic(psm_state_request_t *req);
94 static int wait_for_set(cpuset_t *set, int who);
95 
96 /*
97  * restart paused slave cpus
98  */
99 void
100 i_cpr_machdep_setup(void)
101 {
102 	if (ncpus > 1) {
103 		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
104 		mutex_enter(&cpu_lock);
105 		start_cpus();
106 		mutex_exit(&cpu_lock);
107 	}
108 }
109 
110 
111 /*
112  * Stop all interrupt activities in the system
113  */
114 void
115 i_cpr_stop_intr(void)
116 {
117 	(void) spl7();
118 }
119 
120 /*
121  * Set machine up to take interrupts
122  */
123 void
124 i_cpr_enable_intr(void)
125 {
126 	(void) spl0();
127 }
128 
129 /*
130  * Save miscellaneous information which needs to be written to the
131  * state file.  This information is required to re-initialize
132  * kernel/prom handshaking.
133  */
134 void
135 i_cpr_save_machdep_info(void)
136 {
137 	int notcalled = 0;
138 	ASSERT(notcalled);
139 }
140 
141 
142 void
143 i_cpr_set_tbr(void)
144 {
145 }
146 
147 
148 processorid_t
149 i_cpr_bootcpuid(void)
150 {
151 	return (0);
152 }
153 
154 /*
155  * cpu0 should contain bootcpu info
156  */
157 cpu_t *
158 i_cpr_bootcpu(void)
159 {
160 	ASSERT(MUTEX_HELD(&cpu_lock));
161 
162 	return (cpu_get(i_cpr_bootcpuid()));
163 }
164 
165 /*
166  *	Save context for the specified CPU
167  */
168 void *
169 i_cpr_save_context(void *arg)
170 {
171 	long	index = (long)arg;
172 	psm_state_request_t *papic_state;
173 	int resuming;
174 	int	ret;
175 
176 	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
177 
178 	ASSERT(index < NCPU);
179 
180 	papic_state = &(wc_other_cpus + index)->wc_apic_state;
181 
182 	ret = i_cpr_platform_alloc(papic_state);
183 	ASSERT(ret == 0);
184 
185 	ret = i_cpr_save_apic(papic_state);
186 	ASSERT(ret == 0);
187 
188 	/*
189 	 * wc_save_context returns twice, once when susending and
190 	 * once when resuming,  wc_save_context() returns 0 when
191 	 * suspending and non-zero upon resume
192 	 */
193 	resuming = (wc_save_context(wc_other_cpus + index) == 0);
194 
195 	/*
196 	 * do NOT call any functions after this point, because doing so
197 	 * will modify the stack that we are running on
198 	 */
199 
200 	if (resuming) {
201 
202 		ret = i_cpr_restore_apic(papic_state);
203 		ASSERT(ret == 0);
204 
205 		i_cpr_platform_free(papic_state);
206 
207 		/*
208 		 * Enable interrupts on this cpu.
209 		 * Do not bind interrupts to this CPU's local APIC until
210 		 * the CPU is ready to receive interrupts.
211 		 */
212 		ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
213 		mutex_enter(&cpu_lock);
214 		cpu_enable_intr(CPU);
215 		mutex_exit(&cpu_lock);
216 
217 		/*
218 		 * Setting the bit in cpu_ready_set must be the last operation
219 		 * in processor initialization; the boot CPU will continue to
220 		 * boot once it sees this bit set for all active CPUs.
221 		 */
222 		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
223 
224 		PMD(PMD_SX,
225 		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
226 		    CPU->cpu_id))
227 	} else {
228 		/*
229 		 * Disable interrupts on this CPU so that PSM knows not to bind
230 		 * interrupts here on resume until the CPU has executed
231 		 * cpu_enable_intr() (above) in the resume path.
232 		 * We explicitly do not grab cpu_lock here because at this point
233 		 * in the suspend process, the boot cpu owns cpu_lock and all
234 		 * other cpus are also executing in the pause thread (only
235 		 * modifying their respective CPU structure).
236 		 */
237 		(void) cpu_disable_intr(CPU);
238 	}
239 
240 	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
241 	    resuming))
242 
243 	return (NULL);
244 }
245 
246 static ushort_t *warm_reset_vector = NULL;
247 
248 static ushort_t *
249 map_warm_reset_vector()
250 {
251 	/*LINTED*/
252 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
253 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
254 		return (NULL);
255 
256 	/*
257 	 * setup secondary cpu bios boot up vector
258 	 */
259 	*warm_reset_vector = (ushort_t)((caddr_t)
260 	    /*LINTED*/
261 	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
262 	    + ((ulong_t)rm_platter_va & 0xf));
263 	warm_reset_vector++;
264 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
265 
266 	--warm_reset_vector;
267 	return (warm_reset_vector);
268 }
269 
270 void
271 i_cpr_pre_resume_cpus()
272 {
273 	/*
274 	 * this is a cut down version of start_other_cpus()
275 	 * just do the initialization to wake the other cpus
276 	 */
277 	unsigned who;
278 	int boot_cpuid = i_cpr_bootcpuid();
279 	uint32_t		code_length = 0;
280 	caddr_t			wakevirt = rm_platter_va;
281 	/*LINTED*/
282 	wakecode_t		*wp = (wakecode_t *)wakevirt;
283 	char *str = "i_cpr_pre_resume_cpus";
284 	extern int get_tsc_ready();
285 	int err;
286 
287 	/*LINTED*/
288 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
289 
290 	/*
291 	 * If startup wasn't able to find a page under 1M, we cannot
292 	 * proceed.
293 	 */
294 	if (rm_platter_va == 0) {
295 		cmn_err(CE_WARN, "Cannot suspend the system because no "
296 		    "memory below 1M could be found for processor startup");
297 		return;
298 	}
299 
300 	/*
301 	 * Copy the real mode code at "real_mode_start" to the
302 	 * page at rm_platter_va.
303 	 */
304 	warm_reset_vector = map_warm_reset_vector();
305 	if (warm_reset_vector == NULL) {
306 		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
307 		return;
308 	}
309 
310 	flushes_require_xcalls = 1;
311 
312 	/*
313 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
314 	 * do their TSC syncs with the same CPU.
315 	 */
316 
317 	affinity_set(CPU_CURRENT);
318 
319 	/*
320 	 * Mark the boot cpu as being ready and in the procset, since we are
321 	 * running on that cpu.
322 	 */
323 	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
324 	CPUSET_ONLY(procset, boot_cpuid);
325 
326 	for (who = 0; who < ncpus; who++) {
327 
328 		wc_cpu_t	*cpup = wc_other_cpus + who;
329 		wc_desctbr_t	gdt;
330 
331 		if (who == boot_cpuid)
332 			continue;
333 
334 		if (!CPU_IN_SET(mp_cpus, who))
335 			continue;
336 
337 		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
338 
339 		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
340 
341 		gdt.base = cpup->wc_gdt_base;
342 		gdt.limit = cpup->wc_gdt_limit;
343 
344 #if defined(__amd64)
345 		code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
346 #else
347 		code_length = 0;
348 #endif
349 
350 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
351 
352 		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
353 			cmn_err(CE_WARN, "cpu%d: failed to start during "
354 			    "suspend/resume error %d", who, err);
355 			continue;
356 		}
357 
358 		PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
359 
360 		if (!wait_for_set(&procset, who))
361 			continue;
362 
363 		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
364 
365 		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
366 
367 		if (tsc_gethrtime_enable) {
368 			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
369 			tsc_sync_master(who);
370 		}
371 
372 		PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
373 		    who))
374 		/*
375 		 * Wait for cpu to declare that it is ready, we want the
376 		 * cpus to start serially instead of in parallel, so that
377 		 * they do not contend with each other in wc_rm_start()
378 		 */
379 		if (!wait_for_set(&cpu_ready_set, who))
380 			continue;
381 
382 		/*
383 		 * do not need to re-initialize dtrace using dtrace_cpu_init
384 		 * function
385 		 */
386 		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
387 	}
388 
389 	affinity_clear();
390 
391 	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
392 
393 }
394 
395 static void
396 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
397 {
398 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
399 }
400 
401 /*
402  * We need to setup a 1:1 (virtual to physical) mapping for the
403  * page containing the wakeup code.
404  */
405 static struct as *save_as;	/* when switching to kas */
406 
407 static void
408 unmap_wakeaddr_1to1(uint64_t wakephys)
409 {
410 	uintptr_t	wp = (uintptr_t)wakephys;
411 	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
412 	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
413 }
414 
415 void
416 i_cpr_post_resume_cpus()
417 {
418 	uint64_t	wakephys = rm_platter_pa;
419 
420 	if (warm_reset_vector != NULL)
421 		unmap_warm_reset_vector(warm_reset_vector);
422 
423 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
424 	    HAT_UNLOAD);
425 
426 	/*
427 	 * cmi_post_mpstartup() is only required upon boot not upon
428 	 * resume from RAM
429 	 */
430 
431 	PT(PT_UNDO1to1);
432 	/* Tear down 1:1 mapping for wakeup code */
433 	unmap_wakeaddr_1to1(wakephys);
434 }
435 
436 /* ARGSUSED */
437 void
438 i_cpr_handle_xc(int flag)
439 {
440 }
441 
442 int
443 i_cpr_reusable_supported(void)
444 {
445 	return (0);
446 }
447 static void
448 map_wakeaddr_1to1(uint64_t wakephys)
449 {
450 	uintptr_t	wp = (uintptr_t)wakephys;
451 	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
452 	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
453 	    HAT_LOAD);
454 	save_as = curthread->t_procp->p_as;
455 	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
456 }
457 
458 
459 void
460 prt_other_cpus()
461 {
462 	int	who;
463 
464 	if (ncpus == 1) {
465 		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
466 		    "uniprocessor machine\n"))
467 		return;
468 	}
469 
470 	for (who = 0; who < ncpus; who++) {
471 
472 		wc_cpu_t	*cpup = wc_other_cpus + who;
473 
474 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
475 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
476 		    AFMT ", sp=%lx\n", who,
477 		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
478 		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
479 		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
480 		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
481 	}
482 }
483 
484 /*
485  * Power down the system.
486  */
487 int
488 i_cpr_power_down(int sleeptype)
489 {
490 	caddr_t		wakevirt = rm_platter_va;
491 	uint64_t	wakephys = rm_platter_pa;
492 	ulong_t		saved_intr;
493 	uint32_t	code_length = 0;
494 	wc_desctbr_t	gdt;
495 	/*LINTED*/
496 	wakecode_t	*wp = (wakecode_t *)wakevirt;
497 	/*LINTED*/
498 	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
499 	wc_cpu_t	*cpup = &(wp->wc_cpu);
500 	dev_info_t	*ppm;
501 	int		ret = 0;
502 	power_req_t	power_req;
503 	char *str =	"i_cpr_power_down";
504 #if defined(__amd64)
505 	/*LINTED*/
506 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
507 #endif
508 	extern int	cpr_suspend_succeeded;
509 	extern void	kernel_wc_code();
510 
511 	ASSERT(sleeptype == CPR_TORAM);
512 	ASSERT(CPU->cpu_id == 0);
513 
514 	if ((ppm = PPM(ddi_root_node())) == NULL) {
515 		PMD(PMD_SX, ("%s: root node not claimed\n", str))
516 		return (ENOTTY);
517 	}
518 
519 	PMD(PMD_SX, ("Entering %s()\n", str))
520 
521 	PT(PT_IC);
522 	saved_intr = intr_clear();
523 
524 	PT(PT_1to1);
525 	/* Setup 1:1 mapping for wakeup code */
526 	map_wakeaddr_1to1(wakephys);
527 
528 	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
529 
530 	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
531 	    ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
532 
533 	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
534 	    (void *)wakevirt, (uint_t)wakephys))
535 
536 	ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
537 	    WC_CODESIZE);
538 
539 	bzero(wakevirt, PAGESIZE);
540 
541 	/* Copy code to rm_platter */
542 	bcopy((caddr_t)wc_rm_start, wakevirt,
543 	    (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
544 
545 	prt_other_cpus();
546 
547 #if defined(__amd64)
548 
549 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
550 	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
551 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
552 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
553 
554 	real_mode_platter->rm_cr4 = getcr4();
555 	real_mode_platter->rm_pdbr = getcr3();
556 
557 	rmp_gdt_init(real_mode_platter);
558 
559 	/*
560 	 * Since the CPU needs to jump to protected mode using an identity
561 	 * mapped address, we need to calculate it here.
562 	 */
563 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
564 	    ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
565 
566 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
567 	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
568 
569 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
570 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
571 
572 	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
573 	    (ulong_t)real_mode_platter->rm_longmode64_addr))
574 
575 #endif
576 
577 	PT(PT_SC);
578 	if (wc_save_context(cpup)) {
579 
580 		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
581 		if (ret != 0)
582 			return (ret);
583 
584 		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
585 		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
586 		if (ret != 0)
587 			return (ret);
588 
589 		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
590 		    (uint_t)wakephys, (void *)&kernel_wc_code))
591 		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
592 		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
593 		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
594 		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
595 		    cpup->wc_esp))
596 		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
597 		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
598 		    (long)cpup->wc_cr4))
599 		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
600 		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
601 		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
602 		    (long)cpup->wc_eflags))
603 
604 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
605 		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
606 		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
607 		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
608 		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
609 
610 		gdt.base = cpup->wc_gdt_base;
611 		gdt.limit = cpup->wc_gdt_limit;
612 
613 #if defined(__amd64)
614 		code_length = (uint32_t)wc_long_mode_64 -
615 		    (uint32_t)wc_rm_start;
616 #else
617 		code_length = 0;
618 #endif
619 
620 		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
621 
622 #if defined(__amd64)
623 		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
624 		    (ulong_t)wcpp->rm_cr4, getcr4()))
625 
626 		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
627 		    (ulong_t)wcpp->rm_pdbr, getcr3()))
628 
629 		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
630 		    (ulong_t)wcpp->rm_longmode64_addr))
631 
632 		PMD(PMD_SX,
633 		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
634 		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
635 #endif
636 
637 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
638 		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
639 		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
640 		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
641 		    (long)cpup->wc_kgsbase))
642 
643 		power_req.request_type = PMR_PPM_ENTER_SX;
644 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
645 		power_req.req.ppm_power_enter_sx_req.test_point =
646 		    cpr_test_point;
647 		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
648 
649 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
650 		PT(PT_PPMCTLOP);
651 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
652 		    &power_req, &ret);
653 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
654 
655 		/*
656 		 * If it works, we get control back to the else branch below
657 		 * If we get control back here, it didn't work.
658 		 * XXX return EINVAL here?
659 		 */
660 
661 		unmap_wakeaddr_1to1(wakephys);
662 		intr_restore(saved_intr);
663 
664 		return (ret);
665 	} else {
666 		cpr_suspend_succeeded = 1;
667 
668 		power_req.request_type = PMR_PPM_EXIT_SX;
669 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
670 
671 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
672 		PT(PT_PPMCTLOP);
673 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
674 		    &power_req, &ret);
675 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
676 
677 		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
678 		/*
679 		 * the restore should never fail, if the saved suceeded
680 		 */
681 		ASSERT(ret == 0);
682 
683 		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
684 
685 		/*
686 		 * Enable interrupts on boot cpu.
687 		 */
688 		ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
689 		mutex_enter(&cpu_lock);
690 		cpu_enable_intr(CPU);
691 		mutex_exit(&cpu_lock);
692 
693 		PT(PT_INTRRESTORE);
694 		intr_restore(saved_intr);
695 		PT(PT_CPU);
696 
697 		return (ret);
698 	}
699 }
700 
701 /*
702  * Stop all other cpu's before halting or rebooting. We pause the cpu's
703  * instead of sending a cross call.
704  * Stolen from sun4/os/mp_states.c
705  */
706 
707 static int cpu_are_paused;	/* sic */
708 
709 void
710 i_cpr_stop_other_cpus(void)
711 {
712 	mutex_enter(&cpu_lock);
713 	if (cpu_are_paused) {
714 		mutex_exit(&cpu_lock);
715 		return;
716 	}
717 	pause_cpus(NULL);
718 	cpu_are_paused = 1;
719 
720 	mutex_exit(&cpu_lock);
721 }
722 
723 int
724 i_cpr_is_supported(int sleeptype)
725 {
726 	extern int cpr_supported_override;
727 	extern int cpr_platform_enable;
728 	extern int pm_S3_enabled;
729 
730 	if (sleeptype != CPR_TORAM)
731 		return (0);
732 
733 	/*
734 	 * The next statement tests if a specific platform has turned off
735 	 * cpr support.
736 	 */
737 	if (cpr_supported_override)
738 		return (0);
739 
740 	/*
741 	 * If a platform has specifically turned on cpr support ...
742 	 */
743 	if (cpr_platform_enable)
744 		return (1);
745 
746 	return (pm_S3_enabled);
747 }
748 
749 void
750 i_cpr_bitmap_cleanup(void)
751 {
752 }
753 
754 void
755 i_cpr_free_memory_resources(void)
756 {
757 }
758 
759 /*
760  * Needed only for S3 so far
761  */
762 static int
763 i_cpr_platform_alloc(psm_state_request_t *req)
764 {
765 #ifdef DEBUG
766 	char	*str = "i_cpr_platform_alloc";
767 #endif
768 
769 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
770 
771 	if (psm_state == NULL) {
772 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
773 		return (0);
774 	}
775 
776 	req->psr_cmd = PSM_STATE_ALLOC;
777 	return ((*psm_state)(req));
778 }
779 
780 /*
781  * Needed only for S3 so far
782  */
783 static void
784 i_cpr_platform_free(psm_state_request_t *req)
785 {
786 #ifdef DEBUG
787 	char	*str = "i_cpr_platform_free";
788 #endif
789 
790 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
791 
792 	if (psm_state == NULL) {
793 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
794 		return;
795 	}
796 
797 	req->psr_cmd = PSM_STATE_FREE;
798 	(void) (*psm_state)(req);
799 }
800 
801 static int
802 i_cpr_save_apic(psm_state_request_t *req)
803 {
804 #ifdef DEBUG
805 	char	*str = "i_cpr_save_apic";
806 #endif
807 
808 	if (psm_state == NULL) {
809 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
810 		return (0);
811 	}
812 
813 	req->psr_cmd = PSM_STATE_SAVE;
814 	return ((*psm_state)(req));
815 }
816 
817 static int
818 i_cpr_restore_apic(psm_state_request_t *req)
819 {
820 #ifdef DEBUG
821 	char	*str = "i_cpr_restore_apic";
822 #endif
823 
824 	if (psm_state == NULL) {
825 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
826 		return (0);
827 	}
828 
829 	req->psr_cmd = PSM_STATE_RESTORE;
830 	return ((*psm_state)(req));
831 }
832 
833 
834 /* stop lint complaining about offset not being used in 32bit mode */
835 #if !defined(__amd64)
836 /*ARGSUSED*/
837 #endif
838 static void
839 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
840 {
841 	/*LINTED*/
842 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
843 
844 	/*
845 	 * Fill up the real mode platter to make it easy for real mode code to
846 	 * kick it off. This area should really be one passed by boot to kernel
847 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
848 	 * have identical physical and virtual address in paged mode.
849 	 */
850 
851 	real_mode_platter->rm_pdbr = getcr3();
852 	real_mode_platter->rm_cpu = cpun;
853 	real_mode_platter->rm_cr4 = cr4;
854 
855 	real_mode_platter->rm_gdt_base = gdt.base;
856 	real_mode_platter->rm_gdt_lim = gdt.limit;
857 
858 #if defined(__amd64)
859 	real_mode_platter->rm_x86feature = x86_feature;
860 
861 	if (getcr3() > 0xffffffffUL)
862 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
863 		    "located above 4G in physical memory (@ 0x%llx).",
864 		    (unsigned long long)getcr3());
865 
866 	/*
867 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
868 	 * by code in real_mode_start():
869 	 *
870 	 * GDT[0]:  NULL selector
871 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
872 	 *
873 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
874 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
875 	 * a course of action as any other, though it may cause the entire
876 	 * platform to reset in some cases...
877 	 */
878 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
879 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
880 
881 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
882 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
883 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
884 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
885 
886 	real_mode_platter->rm_temp_idt_lim = 0;
887 	real_mode_platter->rm_temp_idt_base = 0;
888 
889 	/*
890 	 * Since the CPU needs to jump to protected mode using an identity
891 	 * mapped address, we need to calculate it here.
892 	 */
893 	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
894 #endif	/* __amd64 */
895 
896 	/* return; */
897 }
898 
899 void
900 i_cpr_start_cpu(void)
901 {
902 
903 	struct cpu *cp = CPU;
904 
905 	char *str = "i_cpr_start_cpu";
906 	extern void init_cpu_syscall(struct cpu *cp);
907 
908 	PMD(PMD_SX, ("%s() called\n", str))
909 
910 	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
911 	    cp->cpu_base_spl))
912 
913 	mutex_enter(&cpu_lock);
914 	if (cp == i_cpr_bootcpu()) {
915 		mutex_exit(&cpu_lock);
916 		PMD(PMD_SX,
917 		    ("%s() called on bootcpu nothing to do!\n", str))
918 		return;
919 	}
920 	mutex_exit(&cpu_lock);
921 
922 	/*
923 	 * We need to Sync PAT with cpu0's PAT. We have to do
924 	 * this with interrupts disabled.
925 	 */
926 	if (x86_feature & X86_PAT)
927 		pat_sync();
928 
929 	/*
930 	 * Initialize this CPU's syscall handlers
931 	 */
932 	init_cpu_syscall(cp);
933 
934 	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
935 
936 	/*
937 	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
938 	 * init_cpu_info(), since the work that they do is only needed to
939 	 * be done once at boot time
940 	 */
941 
942 
943 	mutex_enter(&cpu_lock);
944 	CPUSET_ADD(procset, cp->cpu_id);
945 	mutex_exit(&cpu_lock);
946 
947 	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
948 	    cp->cpu_base_spl))
949 
950 	if (tsc_gethrtime_enable) {
951 		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
952 		tsc_sync_slave();
953 	}
954 
955 	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
956 	    cp->cpu_id, cp->cpu_intr_actv))
957 	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
958 	    cp->cpu_base_spl))
959 
960 	(void) spl0();		/* enable interrupts */
961 
962 	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
963 	    cp->cpu_base_spl))
964 
965 	/*
966 	 * Set up the CPU module for this CPU.  This can't be done before
967 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
968 	 * need to go load another CPU module.  The act of attempting to load
969 	 * a module may trigger a cross-call, which will ASSERT unless this
970 	 * cpu is CPU_READY.
971 	 */
972 
973 	/*
974 	 * cmi already been init'd (during boot), so do not need to do it again
975 	 */
976 #ifdef PM_REINITMCAONRESUME
977 	if (x86_feature & X86_MCA)
978 		cmi_mca_init();
979 #endif
980 
981 	PMD(PMD_SX, ("%s() returning\n", str))
982 
983 	/* return; */
984 }
985 
986 void
987 i_cpr_alloc_cpus(void)
988 {
989 	char *str = "i_cpr_alloc_cpus";
990 
991 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
992 	/*
993 	 * we allocate this only when we actually need it to save on
994 	 * kernel memory
995 	 */
996 
997 	if (wc_other_cpus == NULL) {
998 		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
999 		    KM_SLEEP);
1000 	}
1001 
1002 }
1003 
1004 void
1005 i_cpr_free_cpus(void)
1006 {
1007 	if (wc_other_cpus != NULL) {
1008 		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
1009 		wc_other_cpus = NULL;
1010 	}
1011 }
1012 
1013 /*
1014  * wrapper for acpica_ddi_save_resources()
1015  */
1016 void
1017 i_cpr_save_configuration(dev_info_t *dip)
1018 {
1019 	acpica_ddi_save_resources(dip);
1020 }
1021 
1022 /*
1023  * wrapper for acpica_ddi_restore_resources()
1024  */
1025 void
1026 i_cpr_restore_configuration(dev_info_t *dip)
1027 {
1028 	acpica_ddi_restore_resources(dip);
1029 }
1030 
1031 static int
1032 wait_for_set(cpuset_t *set, int who)
1033 {
1034 	int delays;
1035 	char *str = "wait_for_set";
1036 
1037 	for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1038 		if (delays == 500) {
1039 			/*
1040 			 * After five seconds, things are probably
1041 			 * looking a bit bleak - explain the hang.
1042 			 */
1043 			cmn_err(CE_NOTE, "cpu%d: started, "
1044 			    "but not running in the kernel yet", who);
1045 			PMD(PMD_SX, ("%s() %d cpu started "
1046 			    "but not running in the kernel yet\n",
1047 			    str, who))
1048 		} else if (delays > 2000) {
1049 			/*
1050 			 * We waited at least 20 seconds, bail ..
1051 			 */
1052 			cmn_err(CE_WARN, "cpu%d: timed out", who);
1053 			PMD(PMD_SX, ("%s() %d cpu timed out\n",
1054 			    str, who))
1055 			return (0);
1056 		}
1057 
1058 		/*
1059 		 * wait at least 10ms, then check again..
1060 		 */
1061 		drv_usecwait(10000);
1062 	}
1063 
1064 	return (1);
1065 }
1066