1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Platform specific implementation code
27 * Currently only suspend to RAM is supported (ACPI S3)
28 */
29
30 #define SUNDDI_IMPL
31
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
36 #include <sys/cpuvar.h>
37 #include <sys/pte.h>
38 #include <vm/hat.h>
39 #include <vm/page.h>
40 #include <vm/as.h>
41 #include <sys/cpr.h>
42 #include <sys/kmem.h>
43 #include <sys/clock.h>
44 #include <sys/kmem.h>
45 #include <sys/panic.h>
46 #include <vm/seg_kmem.h>
47 #include <sys/cpu_module.h>
48 #include <sys/callb.h>
49 #include <sys/machsystm.h>
50 #include <sys/vmsystm.h>
51 #include <sys/systm.h>
52 #include <sys/archsystm.h>
53 #include <sys/stack.h>
54 #include <sys/fs/ufs_fs.h>
55 #include <sys/memlist.h>
56 #include <sys/bootconf.h>
57 #include <sys/thread.h>
58 #include <sys/x_call.h>
59 #include <sys/smp_impldefs.h>
60 #include <vm/vm_dep.h>
61 #include <sys/psm.h>
62 #include <sys/epm.h>
63 #include <sys/cpr_wakecode.h>
64 #include <sys/x86_archext.h>
65 #include <sys/reboot.h>
66 #include <sys/acpi/acpi.h>
67 #include <sys/acpica.h>
68 #include <sys/fp.h>
69 #include <sys/sysmacros.h>
70
71 #define AFMT "%lx"
72
73 extern int flushes_require_xcalls;
74 extern cpuset_t cpu_ready_set;
75
76 extern void *wc_long_mode_64(void);
77 extern int tsc_gethrtime_enable;
78 extern void i_cpr_start_cpu(void);
79
80 ushort_t cpr_mach_type = CPR_MACHTYPE_X86;
81 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
82
83 static wc_cpu_t *wc_other_cpus = NULL;
84 static cpuset_t procset;
85
86 static void
87 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
88
89 static int i_cpr_platform_alloc(psm_state_request_t *req);
90 static void i_cpr_platform_free(psm_state_request_t *req);
91 static int i_cpr_save_apic(psm_state_request_t *req);
92 static int i_cpr_restore_apic(psm_state_request_t *req);
93 static int wait_for_set(cpuset_t *set, int who);
94
95 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
96 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
97
98 #ifdef STACK_GROWTH_DOWN
99 #define CPR_GET_STACK_START(t) ((t)->t_stkbase)
100 #define CPR_GET_STACK_END(t) ((t)->t_stk)
101 #else
102 #define CPR_GET_STACK_START(t) ((t)->t_stk)
103 #define CPR_GET_STACK_END(t) ((t)->t_stkbase)
104 #endif /* STACK_GROWTH_DOWN */
105
106 /*
107 * restart paused slave cpus
108 */
109 void
i_cpr_machdep_setup(void)110 i_cpr_machdep_setup(void)
111 {
112 if (ncpus > 1) {
113 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
114 mutex_enter(&cpu_lock);
115 start_cpus();
116 mutex_exit(&cpu_lock);
117 }
118 }
119
120
121 /*
122 * Stop all interrupt activities in the system
123 */
124 void
i_cpr_stop_intr(void)125 i_cpr_stop_intr(void)
126 {
127 (void) spl7();
128 }
129
130 /*
131 * Set machine up to take interrupts
132 */
133 void
i_cpr_enable_intr(void)134 i_cpr_enable_intr(void)
135 {
136 (void) spl0();
137 }
138
139 /*
140 * Save miscellaneous information which needs to be written to the
141 * state file. This information is required to re-initialize
142 * kernel/prom handshaking.
143 */
144 void
i_cpr_save_machdep_info(void)145 i_cpr_save_machdep_info(void)
146 {
147 int notcalled = 0;
148 ASSERT(notcalled);
149 }
150
151
152 void
i_cpr_set_tbr(void)153 i_cpr_set_tbr(void)
154 {
155 }
156
157
158 processorid_t
i_cpr_bootcpuid(void)159 i_cpr_bootcpuid(void)
160 {
161 return (0);
162 }
163
164 /*
165 * cpu0 should contain bootcpu info
166 */
167 cpu_t *
i_cpr_bootcpu(void)168 i_cpr_bootcpu(void)
169 {
170 ASSERT(MUTEX_HELD(&cpu_lock));
171
172 return (cpu_get(i_cpr_bootcpuid()));
173 }
174
175 /*
176 * Save context for the specified CPU
177 */
178 void *
i_cpr_save_context(void * arg)179 i_cpr_save_context(void *arg)
180 {
181 long index = (long)arg;
182 psm_state_request_t *papic_state;
183 int resuming;
184 int ret;
185 wc_cpu_t *wc_cpu = wc_other_cpus + index;
186
187 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
188
189 ASSERT(index < NCPU);
190
191 papic_state = &(wc_cpu)->wc_apic_state;
192
193 ret = i_cpr_platform_alloc(papic_state);
194 ASSERT(ret == 0);
195
196 ret = i_cpr_save_apic(papic_state);
197 ASSERT(ret == 0);
198
199 i_cpr_save_stack(curthread, wc_cpu);
200
201 /*
202 * wc_save_context returns twice, once when susending and
203 * once when resuming, wc_save_context() returns 0 when
204 * suspending and non-zero upon resume
205 */
206 resuming = (wc_save_context(wc_cpu) == 0);
207
208 /*
209 * do NOT call any functions after this point, because doing so
210 * will modify the stack that we are running on
211 */
212
213 if (resuming) {
214
215 ret = i_cpr_restore_apic(papic_state);
216 ASSERT(ret == 0);
217
218 i_cpr_platform_free(papic_state);
219
220 /*
221 * Enable interrupts on this cpu.
222 * Do not bind interrupts to this CPU's local APIC until
223 * the CPU is ready to receive interrupts.
224 */
225 ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
226 mutex_enter(&cpu_lock);
227 cpu_enable_intr(CPU);
228 mutex_exit(&cpu_lock);
229
230 /*
231 * Setting the bit in cpu_ready_set must be the last operation
232 * in processor initialization; the boot CPU will continue to
233 * boot once it sees this bit set for all active CPUs.
234 */
235 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
236
237 PMD(PMD_SX,
238 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
239 CPU->cpu_id))
240 } else {
241 /*
242 * Disable interrupts on this CPU so that PSM knows not to bind
243 * interrupts here on resume until the CPU has executed
244 * cpu_enable_intr() (above) in the resume path.
245 * We explicitly do not grab cpu_lock here because at this point
246 * in the suspend process, the boot cpu owns cpu_lock and all
247 * other cpus are also executing in the pause thread (only
248 * modifying their respective CPU structure).
249 */
250 (void) cpu_disable_intr(CPU);
251 }
252
253 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
254 resuming))
255
256 return (NULL);
257 }
258
259 static ushort_t *warm_reset_vector = NULL;
260
261 static ushort_t *
map_warm_reset_vector()262 map_warm_reset_vector()
263 {
264 /*LINTED*/
265 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
266 sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
267 return (NULL);
268
269 /*
270 * setup secondary cpu bios boot up vector
271 */
272 *warm_reset_vector = (ushort_t)((caddr_t)
273 /*LINTED*/
274 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
275 + ((ulong_t)rm_platter_va & 0xf));
276 warm_reset_vector++;
277 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
278
279 --warm_reset_vector;
280 return (warm_reset_vector);
281 }
282
283 void
i_cpr_pre_resume_cpus()284 i_cpr_pre_resume_cpus()
285 {
286 /*
287 * this is a cut down version of start_other_cpus()
288 * just do the initialization to wake the other cpus
289 */
290 unsigned who;
291 int boot_cpuid = i_cpr_bootcpuid();
292 uint32_t code_length = 0;
293 caddr_t wakevirt = rm_platter_va;
294 /*LINTED*/
295 wakecode_t *wp = (wakecode_t *)wakevirt;
296 char *str = "i_cpr_pre_resume_cpus";
297 extern int get_tsc_ready();
298 int err;
299
300 /*LINTED*/
301 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
302
303 /*
304 * If startup wasn't able to find a page under 1M, we cannot
305 * proceed.
306 */
307 if (rm_platter_va == 0) {
308 cmn_err(CE_WARN, "Cannot suspend the system because no "
309 "memory below 1M could be found for processor startup");
310 return;
311 }
312
313 /*
314 * Copy the real mode code at "real_mode_start" to the
315 * page at rm_platter_va.
316 */
317 warm_reset_vector = map_warm_reset_vector();
318 if (warm_reset_vector == NULL) {
319 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
320 return;
321 }
322
323 flushes_require_xcalls = 1;
324
325 /*
326 * We lock our affinity to the master CPU to ensure that all slave CPUs
327 * do their TSC syncs with the same CPU.
328 */
329
330 affinity_set(CPU_CURRENT);
331
332 /*
333 * Mark the boot cpu as being ready and in the procset, since we are
334 * running on that cpu.
335 */
336 CPUSET_ONLY(cpu_ready_set, boot_cpuid);
337 CPUSET_ONLY(procset, boot_cpuid);
338
339 for (who = 0; who < max_ncpus; who++) {
340
341 wc_cpu_t *cpup = wc_other_cpus + who;
342 wc_desctbr_t gdt;
343
344 if (who == boot_cpuid)
345 continue;
346
347 if (!CPU_IN_SET(mp_cpus, who))
348 continue;
349
350 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
351
352 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
353
354 gdt.base = cpup->wc_gdt_base;
355 gdt.limit = cpup->wc_gdt_limit;
356
357 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
358 (uintptr_t)wc_rm_start);
359
360 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
361
362 mutex_enter(&cpu_lock);
363 err = mach_cpuid_start(who, rm_platter_va);
364 mutex_exit(&cpu_lock);
365 if (err != 0) {
366 cmn_err(CE_WARN, "cpu%d: failed to start during "
367 "suspend/resume error %d", who, err);
368 continue;
369 }
370
371 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
372
373 if (!wait_for_set(&procset, who))
374 continue;
375
376 PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
377
378 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
379
380 if (tsc_gethrtime_enable) {
381 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
382 tsc_sync_master(who);
383 }
384
385 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
386 who))
387 /*
388 * Wait for cpu to declare that it is ready, we want the
389 * cpus to start serially instead of in parallel, so that
390 * they do not contend with each other in wc_rm_start()
391 */
392 if (!wait_for_set(&cpu_ready_set, who))
393 continue;
394
395 /*
396 * do not need to re-initialize dtrace using dtrace_cpu_init
397 * function
398 */
399 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
400 }
401
402 affinity_clear();
403
404 PMD(PMD_SX, ("%s() all cpus now ready\n", str))
405
406 }
407
408 static void
unmap_warm_reset_vector(ushort_t * warm_reset_vector)409 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
410 {
411 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
412 }
413
414 /*
415 * We need to setup a 1:1 (virtual to physical) mapping for the
416 * page containing the wakeup code.
417 */
418 static struct as *save_as; /* when switching to kas */
419
420 static void
unmap_wakeaddr_1to1(uint64_t wakephys)421 unmap_wakeaddr_1to1(uint64_t wakephys)
422 {
423 uintptr_t wp = (uintptr_t)wakephys;
424 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */
425 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
426 }
427
428 void
i_cpr_post_resume_cpus()429 i_cpr_post_resume_cpus()
430 {
431 uint64_t wakephys = rm_platter_pa;
432
433 if (warm_reset_vector != NULL)
434 unmap_warm_reset_vector(warm_reset_vector);
435
436 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
437 HAT_UNLOAD);
438
439 /*
440 * cmi_post_mpstartup() is only required upon boot not upon
441 * resume from RAM
442 */
443
444 PT(PT_UNDO1to1);
445 /* Tear down 1:1 mapping for wakeup code */
446 unmap_wakeaddr_1to1(wakephys);
447 }
448
449 /* ARGSUSED */
450 void
i_cpr_handle_xc(int flag)451 i_cpr_handle_xc(int flag)
452 {
453 }
454
455 int
i_cpr_reusable_supported(void)456 i_cpr_reusable_supported(void)
457 {
458 return (0);
459 }
460 static void
map_wakeaddr_1to1(uint64_t wakephys)461 map_wakeaddr_1to1(uint64_t wakephys)
462 {
463 uintptr_t wp = (uintptr_t)wakephys;
464 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
465 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
466 HAT_LOAD);
467 save_as = curthread->t_procp->p_as;
468 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */
469 }
470
471
472 void
prt_other_cpus()473 prt_other_cpus()
474 {
475 int who;
476
477 if (ncpus == 1) {
478 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
479 "uniprocessor machine\n"))
480 return;
481 }
482
483 for (who = 0; who < max_ncpus; who++) {
484
485 wc_cpu_t *cpup = wc_other_cpus + who;
486
487 if (!CPU_IN_SET(mp_cpus, who))
488 continue;
489
490 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
491 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
492 AFMT ", sp=%lx\n", who,
493 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
494 (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
495 (long)cpup->wc_ldt, (long)cpup->wc_tr,
496 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
497 }
498 }
499
500 /*
501 * Power down the system.
502 */
503 int
i_cpr_power_down(int sleeptype)504 i_cpr_power_down(int sleeptype)
505 {
506 caddr_t wakevirt = rm_platter_va;
507 uint64_t wakephys = rm_platter_pa;
508 ulong_t saved_intr;
509 uint32_t code_length = 0;
510 wc_desctbr_t gdt;
511 /*LINTED*/
512 wakecode_t *wp = (wakecode_t *)wakevirt;
513 /*LINTED*/
514 rm_platter_t *wcpp = (rm_platter_t *)wakevirt;
515 wc_cpu_t *cpup = &(wp->wc_cpu);
516 dev_info_t *ppm;
517 int ret = 0;
518 power_req_t power_req;
519 char *str = "i_cpr_power_down";
520 /*LINTED*/
521 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
522 extern int cpr_suspend_succeeded;
523 extern void kernel_wc_code();
524
525 ASSERT(sleeptype == CPR_TORAM);
526 ASSERT(CPU->cpu_id == 0);
527
528 if ((ppm = PPM(ddi_root_node())) == NULL) {
529 PMD(PMD_SX, ("%s: root node not claimed\n", str))
530 return (ENOTTY);
531 }
532
533 PMD(PMD_SX, ("Entering %s()\n", str))
534
535 PT(PT_IC);
536 saved_intr = intr_clear();
537
538 PT(PT_1to1);
539 /* Setup 1:1 mapping for wakeup code */
540 map_wakeaddr_1to1(wakephys);
541
542 PMD(PMD_SX, ("ncpus=%d\n", ncpus))
543
544 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
545 ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)),
546 WC_CODESIZE))
547
548 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
549 (void *)wakevirt, (uint_t)wakephys))
550
551 ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) <
552 WC_CODESIZE);
553
554 bzero(wakevirt, PAGESIZE);
555
556 /* Copy code to rm_platter */
557 bcopy((caddr_t)wc_rm_start, wakevirt,
558 (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start));
559
560 prt_other_cpus();
561
562
563 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
564 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
565
566 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
567 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
568
569 real_mode_platter->rm_cr4 = getcr4();
570 real_mode_platter->rm_pdbr = getcr3();
571
572 rmp_gdt_init(real_mode_platter);
573
574 /*
575 * Since the CPU needs to jump to protected mode using an identity
576 * mapped address, we need to calculate it here.
577 */
578 real_mode_platter->rm_longmode64_addr = rm_platter_pa +
579 (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start);
580
581 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
582 (ulong_t)real_mode_platter->rm_cr4, getcr4()))
583 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
584 (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
585
586 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
587 (ulong_t)real_mode_platter->rm_longmode64_addr))
588
589
590 PT(PT_SC);
591 if (wc_save_context(cpup)) {
592
593 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
594 if (ret != 0)
595 return (ret);
596
597 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
598 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
599 if (ret != 0)
600 return (ret);
601
602 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
603 (uint_t)wakephys, (void *)&kernel_wc_code))
604 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
605 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
606 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
607 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
608 cpup->wc_esp))
609 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
610 (long)cpup->wc_cr0, (long)cpup->wc_cr3,
611 (long)cpup->wc_cr4))
612 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
613 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
614 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
615 (long)cpup->wc_eflags))
616
617 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
618 "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
619 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
620 cpup->wc_idt_limit, (long)cpup->wc_ldt,
621 (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
622
623 gdt.base = cpup->wc_gdt_base;
624 gdt.limit = cpup->wc_gdt_limit;
625
626 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
627 (uintptr_t)wc_rm_start);
628
629 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
630
631 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
632 (ulong_t)wcpp->rm_cr4, getcr4()))
633
634 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
635 (ulong_t)wcpp->rm_pdbr, getcr3()))
636
637 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
638 (ulong_t)wcpp->rm_longmode64_addr))
639
640 PMD(PMD_SX,
641 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
642 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
643
644 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
645 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
646 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
647 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
648 (long)cpup->wc_kgsbase))
649
650 power_req.request_type = PMR_PPM_ENTER_SX;
651 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
652 power_req.req.ppm_power_enter_sx_req.test_point =
653 cpr_test_point;
654 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
655
656 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
657 PT(PT_PPMCTLOP);
658 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
659 &power_req, &ret);
660 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
661
662 /*
663 * If it works, we get control back to the else branch below
664 * If we get control back here, it didn't work.
665 * XXX return EINVAL here?
666 */
667
668 unmap_wakeaddr_1to1(wakephys);
669 intr_restore(saved_intr);
670
671 return (ret);
672 } else {
673 cpr_suspend_succeeded = 1;
674
675 power_req.request_type = PMR_PPM_EXIT_SX;
676 power_req.req.ppm_power_enter_sx_req.sx_state = S3;
677
678 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
679 PT(PT_PPMCTLOP);
680 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
681 &power_req, &ret);
682 PMD(PMD_SX, ("%s: returns %d\n", str, ret))
683
684 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
685 /*
686 * the restore should never fail, if the saved suceeded
687 */
688 ASSERT(ret == 0);
689
690 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
691
692 /*
693 * Enable interrupts on boot cpu.
694 */
695 ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
696 mutex_enter(&cpu_lock);
697 cpu_enable_intr(CPU);
698 mutex_exit(&cpu_lock);
699
700 PT(PT_INTRRESTORE);
701 intr_restore(saved_intr);
702 PT(PT_CPU);
703
704 return (ret);
705 }
706 }
707
708 /*
709 * Stop all other cpu's before halting or rebooting. We pause the cpu's
710 * instead of sending a cross call.
711 * Stolen from sun4/os/mp_states.c
712 */
713
714 static int cpu_are_paused; /* sic */
715
716 void
i_cpr_stop_other_cpus(void)717 i_cpr_stop_other_cpus(void)
718 {
719 mutex_enter(&cpu_lock);
720 if (cpu_are_paused) {
721 mutex_exit(&cpu_lock);
722 return;
723 }
724 pause_cpus(NULL, NULL);
725 cpu_are_paused = 1;
726
727 mutex_exit(&cpu_lock);
728 }
729
730 int
i_cpr_is_supported(int sleeptype)731 i_cpr_is_supported(int sleeptype)
732 {
733 extern int cpr_supported_override;
734 extern int cpr_platform_enable;
735 extern int pm_S3_enabled;
736
737 if (sleeptype != CPR_TORAM)
738 return (0);
739
740 /*
741 * The next statement tests if a specific platform has turned off
742 * cpr support.
743 */
744 if (cpr_supported_override)
745 return (0);
746
747 /*
748 * If a platform has specifically turned on cpr support ...
749 */
750 if (cpr_platform_enable)
751 return (1);
752
753 return (pm_S3_enabled);
754 }
755
756 void
i_cpr_bitmap_cleanup(void)757 i_cpr_bitmap_cleanup(void)
758 {
759 }
760
761 void
i_cpr_free_memory_resources(void)762 i_cpr_free_memory_resources(void)
763 {
764 }
765
766 /*
767 * Needed only for S3 so far
768 */
769 static int
i_cpr_platform_alloc(psm_state_request_t * req)770 i_cpr_platform_alloc(psm_state_request_t *req)
771 {
772 #ifdef DEBUG
773 char *str = "i_cpr_platform_alloc";
774 #endif
775
776 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
777
778 if (psm_state == NULL) {
779 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
780 return (0);
781 }
782
783 req->psr_cmd = PSM_STATE_ALLOC;
784 return ((*psm_state)(req));
785 }
786
787 /*
788 * Needed only for S3 so far
789 */
790 static void
i_cpr_platform_free(psm_state_request_t * req)791 i_cpr_platform_free(psm_state_request_t *req)
792 {
793 #ifdef DEBUG
794 char *str = "i_cpr_platform_free";
795 #endif
796
797 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
798
799 if (psm_state == NULL) {
800 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
801 return;
802 }
803
804 req->psr_cmd = PSM_STATE_FREE;
805 (void) (*psm_state)(req);
806 }
807
808 static int
i_cpr_save_apic(psm_state_request_t * req)809 i_cpr_save_apic(psm_state_request_t *req)
810 {
811 #ifdef DEBUG
812 char *str = "i_cpr_save_apic";
813 #endif
814
815 if (psm_state == NULL) {
816 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
817 return (0);
818 }
819
820 req->psr_cmd = PSM_STATE_SAVE;
821 return ((*psm_state)(req));
822 }
823
824 static int
i_cpr_restore_apic(psm_state_request_t * req)825 i_cpr_restore_apic(psm_state_request_t *req)
826 {
827 #ifdef DEBUG
828 char *str = "i_cpr_restore_apic";
829 #endif
830
831 if (psm_state == NULL) {
832 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
833 return (0);
834 }
835
836 req->psr_cmd = PSM_STATE_RESTORE;
837 return ((*psm_state)(req));
838 }
839
840 static void
init_real_mode_platter(int cpun,uint32_t offset,uint_t cr4,wc_desctbr_t gdt)841 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
842 {
843 /*LINTED*/
844 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
845
846 /*
847 * Fill up the real mode platter to make it easy for real mode code to
848 * kick it off. This area should really be one passed by boot to kernel
849 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
850 * have identical physical and virtual address in paged mode.
851 */
852
853 real_mode_platter->rm_pdbr = getcr3();
854 real_mode_platter->rm_cpu = cpun;
855 real_mode_platter->rm_cr4 = cr4;
856
857 real_mode_platter->rm_gdt_base = gdt.base;
858 real_mode_platter->rm_gdt_lim = gdt.limit;
859
860 if (getcr3() > 0xffffffffUL)
861 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
862 "located above 4G in physical memory (@ 0x%llx).",
863 (unsigned long long)getcr3());
864
865 /*
866 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
867 * by code in real_mode_start():
868 *
869 * GDT[0]: NULL selector
870 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
871 *
872 * Clear the IDT as interrupts will be off and a limit of 0 will cause
873 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
874 * a course of action as any other, though it may cause the entire
875 * platform to reset in some cases...
876 */
877 real_mode_platter->rm_temp_gdt[0] = 0ULL;
878 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
879
880 real_mode_platter->rm_temp_gdt_lim = (ushort_t)
881 (sizeof (real_mode_platter->rm_temp_gdt) - 1);
882 real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
883 offsetof(rm_platter_t, rm_temp_gdt);
884
885 real_mode_platter->rm_temp_idt_lim = 0;
886 real_mode_platter->rm_temp_idt_base = 0;
887
888 /*
889 * Since the CPU needs to jump to protected mode using an identity
890 * mapped address, we need to calculate it here.
891 */
892 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
893
894 /* return; */
895 }
896
897 void
i_cpr_start_cpu(void)898 i_cpr_start_cpu(void)
899 {
900
901 struct cpu *cp = CPU;
902
903 char *str = "i_cpr_start_cpu";
904 extern void init_cpu_syscall(struct cpu *cp);
905
906 PMD(PMD_SX, ("%s() called\n", str))
907
908 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
909 cp->cpu_base_spl))
910
911 mutex_enter(&cpu_lock);
912 if (cp == i_cpr_bootcpu()) {
913 mutex_exit(&cpu_lock);
914 PMD(PMD_SX,
915 ("%s() called on bootcpu nothing to do!\n", str))
916 return;
917 }
918 mutex_exit(&cpu_lock);
919
920 /*
921 * We need to Sync PAT with cpu0's PAT. We have to do
922 * this with interrupts disabled.
923 */
924 pat_sync();
925
926 /*
927 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
928 */
929 if (fp_save_mech == FP_XSAVE) {
930 setup_xfem();
931 }
932
933 /*
934 * Initialize this CPU's syscall handlers
935 */
936 init_cpu_syscall(cp);
937
938 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
939
940 /*
941 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
942 * init_cpu_info(), since the work that they do is only needed to
943 * be done once at boot time
944 */
945
946
947 mutex_enter(&cpu_lock);
948 CPUSET_ADD(procset, cp->cpu_id);
949 mutex_exit(&cpu_lock);
950
951 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
952 cp->cpu_base_spl))
953
954 if (tsc_gethrtime_enable) {
955 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
956 tsc_sync_slave();
957 }
958
959 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
960 cp->cpu_id, cp->cpu_intr_actv))
961 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
962 cp->cpu_base_spl))
963
964 (void) spl0(); /* enable interrupts */
965
966 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
967 cp->cpu_base_spl))
968
969 /*
970 * Set up the CPU module for this CPU. This can't be done before
971 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
972 * need to go load another CPU module. The act of attempting to load
973 * a module may trigger a cross-call, which will ASSERT unless this
974 * cpu is CPU_READY.
975 */
976
977 /*
978 * cmi already been init'd (during boot), so do not need to do it again
979 */
980 #ifdef PM_REINITMCAONRESUME
981 if (is_x86_feature(x86_featureset, X86FSET_MCA))
982 cmi_mca_init();
983 #endif
984
985 PMD(PMD_SX, ("%s() returning\n", str))
986
987 /* return; */
988 }
989
990 void
i_cpr_alloc_cpus(void)991 i_cpr_alloc_cpus(void)
992 {
993 char *str = "i_cpr_alloc_cpus";
994
995 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
996 /*
997 * we allocate this only when we actually need it to save on
998 * kernel memory
999 */
1000
1001 if (wc_other_cpus == NULL) {
1002 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
1003 KM_SLEEP);
1004 }
1005
1006 }
1007
1008 void
i_cpr_free_cpus(void)1009 i_cpr_free_cpus(void)
1010 {
1011 int index;
1012 wc_cpu_t *wc_cpu;
1013
1014 if (wc_other_cpus != NULL) {
1015 for (index = 0; index < max_ncpus; index++) {
1016 wc_cpu = wc_other_cpus + index;
1017 if (wc_cpu->wc_saved_stack != NULL) {
1018 kmem_free(wc_cpu->wc_saved_stack,
1019 wc_cpu->wc_saved_stack_size);
1020 }
1021 }
1022
1023 kmem_free((void *) wc_other_cpus,
1024 max_ncpus * sizeof (wc_cpu_t));
1025 wc_other_cpus = NULL;
1026 }
1027 }
1028
1029 /*
1030 * wrapper for acpica_ddi_save_resources()
1031 */
1032 void
i_cpr_save_configuration(dev_info_t * dip)1033 i_cpr_save_configuration(dev_info_t *dip)
1034 {
1035 acpica_ddi_save_resources(dip);
1036 }
1037
1038 /*
1039 * wrapper for acpica_ddi_restore_resources()
1040 */
1041 void
i_cpr_restore_configuration(dev_info_t * dip)1042 i_cpr_restore_configuration(dev_info_t *dip)
1043 {
1044 acpica_ddi_restore_resources(dip);
1045 }
1046
1047 static int
wait_for_set(cpuset_t * set,int who)1048 wait_for_set(cpuset_t *set, int who)
1049 {
1050 int delays;
1051 char *str = "wait_for_set";
1052
1053 for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1054 if (delays == 500) {
1055 /*
1056 * After five seconds, things are probably
1057 * looking a bit bleak - explain the hang.
1058 */
1059 cmn_err(CE_NOTE, "cpu%d: started, "
1060 "but not running in the kernel yet", who);
1061 PMD(PMD_SX, ("%s() %d cpu started "
1062 "but not running in the kernel yet\n",
1063 str, who))
1064 } else if (delays > 2000) {
1065 /*
1066 * We waited at least 20 seconds, bail ..
1067 */
1068 cmn_err(CE_WARN, "cpu%d: timed out", who);
1069 PMD(PMD_SX, ("%s() %d cpu timed out\n",
1070 str, who))
1071 return (0);
1072 }
1073
1074 /*
1075 * wait at least 10ms, then check again..
1076 */
1077 drv_usecwait(10000);
1078 }
1079
1080 return (1);
1081 }
1082
1083 static void
i_cpr_save_stack(kthread_t * t,wc_cpu_t * wc_cpu)1084 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
1085 {
1086 size_t stack_size; /* size of stack */
1087 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1088 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1089
1090 stack_size = (size_t)end - (size_t)start;
1091
1092 if (wc_cpu->wc_saved_stack_size < stack_size) {
1093 if (wc_cpu->wc_saved_stack != NULL) {
1094 kmem_free(wc_cpu->wc_saved_stack,
1095 wc_cpu->wc_saved_stack_size);
1096 }
1097 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
1098 wc_cpu->wc_saved_stack_size = stack_size;
1099 }
1100
1101 bcopy(start, wc_cpu->wc_saved_stack, stack_size);
1102 }
1103
1104 void
i_cpr_restore_stack(kthread_t * t,greg_t * save_stack)1105 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
1106 {
1107 size_t stack_size; /* size of stack */
1108 caddr_t start = CPR_GET_STACK_START(t); /* stack start */
1109 caddr_t end = CPR_GET_STACK_END(t); /* stack end */
1110
1111 stack_size = (size_t)end - (size_t)start;
1112
1113 bcopy(save_stack, start, stack_size);
1114 }
1115