1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Platform specific implementation code 27 * Currently only suspend to RAM is supported (ACPI S3) 28 */ 29 30 #define SUNDDI_IMPL 31 32 #include <sys/types.h> 33 #include <sys/promif.h> 34 #include <sys/prom_isa.h> 35 #include <sys/prom_plat.h> 36 #include <sys/cpuvar.h> 37 #include <sys/pte.h> 38 #include <vm/hat.h> 39 #include <vm/page.h> 40 #include <vm/as.h> 41 #include <sys/cpr.h> 42 #include <sys/kmem.h> 43 #include <sys/clock.h> 44 #include <sys/kmem.h> 45 #include <sys/panic.h> 46 #include <vm/seg_kmem.h> 47 #include <sys/cpu_module.h> 48 #include <sys/callb.h> 49 #include <sys/machsystm.h> 50 #include <sys/vmsystm.h> 51 #include <sys/systm.h> 52 #include <sys/archsystm.h> 53 #include <sys/stack.h> 54 #include <sys/fs/ufs_fs.h> 55 #include <sys/memlist.h> 56 #include <sys/bootconf.h> 57 #include <sys/thread.h> 58 #include <sys/x_call.h> 59 #include <sys/smp_impldefs.h> 60 #include <vm/vm_dep.h> 61 #include <sys/psm.h> 62 #include <sys/epm.h> 63 #include <sys/cpr_wakecode.h> 64 #include <sys/x86_archext.h> 65 #include <sys/reboot.h> 66 #include <sys/acpi/acpi.h> 67 #include <sys/acpica.h> 68 #include <sys/fp.h> 69 #include <sys/sysmacros.h> 70 71 #define AFMT "%lx" 72 73 extern int flushes_require_xcalls; 74 extern cpuset_t cpu_ready_set; 75 76 extern void *wc_long_mode_64(void); 77 extern int tsc_gethrtime_enable; 78 extern void i_cpr_start_cpu(void); 79 80 ushort_t cpr_mach_type = CPR_MACHTYPE_X86; 81 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu; 82 83 static wc_cpu_t *wc_other_cpus = NULL; 84 static cpuset_t procset; 85 86 static void 87 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt); 88 89 static int i_cpr_platform_alloc(psm_state_request_t *req); 90 static void i_cpr_platform_free(psm_state_request_t *req); 91 static int i_cpr_save_apic(psm_state_request_t *req); 92 static int i_cpr_restore_apic(psm_state_request_t *req); 93 static int wait_for_set(cpuset_t *set, int who); 94 95 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu); 96 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack); 97 98 #ifdef STACK_GROWTH_DOWN 99 #define CPR_GET_STACK_START(t) ((t)->t_stkbase) 100 #define CPR_GET_STACK_END(t) ((t)->t_stk) 101 #else 102 #define CPR_GET_STACK_START(t) ((t)->t_stk) 103 #define CPR_GET_STACK_END(t) ((t)->t_stkbase) 104 #endif /* STACK_GROWTH_DOWN */ 105 106 /* 107 * restart paused slave cpus 108 */ 109 void 110 i_cpr_machdep_setup(void) 111 { 112 if (ncpus > 1) { 113 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n")); 114 mutex_enter(&cpu_lock); 115 start_cpus(); 116 mutex_exit(&cpu_lock); 117 } 118 } 119 120 121 /* 122 * Stop all interrupt activities in the system 123 */ 124 void 125 i_cpr_stop_intr(void) 126 { 127 (void) spl7(); 128 } 129 130 /* 131 * Set machine up to take interrupts 132 */ 133 void 134 i_cpr_enable_intr(void) 135 { 136 (void) spl0(); 137 } 138 139 /* 140 * Save miscellaneous information which needs to be written to the 141 * state file. This information is required to re-initialize 142 * kernel/prom handshaking. 143 */ 144 void 145 i_cpr_save_machdep_info(void) 146 { 147 int notcalled = 0; 148 ASSERT(notcalled); 149 } 150 151 152 void 153 i_cpr_set_tbr(void) 154 { 155 } 156 157 158 processorid_t 159 i_cpr_bootcpuid(void) 160 { 161 return (0); 162 } 163 164 /* 165 * cpu0 should contain bootcpu info 166 */ 167 cpu_t * 168 i_cpr_bootcpu(void) 169 { 170 ASSERT(MUTEX_HELD(&cpu_lock)); 171 172 return (cpu_get(i_cpr_bootcpuid())); 173 } 174 175 /* 176 * Save context for the specified CPU 177 */ 178 void * 179 i_cpr_save_context(void *arg) 180 { 181 long index = (long)arg; 182 psm_state_request_t *papic_state; 183 int resuming; 184 int ret; 185 wc_cpu_t *wc_cpu = wc_other_cpus + index; 186 187 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index)) 188 189 ASSERT(index < NCPU); 190 191 papic_state = &(wc_cpu)->wc_apic_state; 192 193 ret = i_cpr_platform_alloc(papic_state); 194 ASSERT(ret == 0); 195 196 ret = i_cpr_save_apic(papic_state); 197 ASSERT(ret == 0); 198 199 i_cpr_save_stack(curthread, wc_cpu); 200 201 /* 202 * wc_save_context returns twice, once when susending and 203 * once when resuming, wc_save_context() returns 0 when 204 * suspending and non-zero upon resume 205 */ 206 resuming = (wc_save_context(wc_cpu) == 0); 207 208 /* 209 * do NOT call any functions after this point, because doing so 210 * will modify the stack that we are running on 211 */ 212 213 if (resuming) { 214 215 ret = i_cpr_restore_apic(papic_state); 216 ASSERT(ret == 0); 217 218 i_cpr_platform_free(papic_state); 219 220 /* 221 * Enable interrupts on this cpu. 222 * Do not bind interrupts to this CPU's local APIC until 223 * the CPU is ready to receive interrupts. 224 */ 225 ASSERT(CPU->cpu_id != i_cpr_bootcpuid()); 226 mutex_enter(&cpu_lock); 227 cpu_enable_intr(CPU); 228 mutex_exit(&cpu_lock); 229 230 /* 231 * Setting the bit in cpu_ready_set must be the last operation 232 * in processor initialization; the boot CPU will continue to 233 * boot once it sees this bit set for all active CPUs. 234 */ 235 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id); 236 237 PMD(PMD_SX, 238 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n", 239 CPU->cpu_id)) 240 } else { 241 /* 242 * Disable interrupts on this CPU so that PSM knows not to bind 243 * interrupts here on resume until the CPU has executed 244 * cpu_enable_intr() (above) in the resume path. 245 * We explicitly do not grab cpu_lock here because at this point 246 * in the suspend process, the boot cpu owns cpu_lock and all 247 * other cpus are also executing in the pause thread (only 248 * modifying their respective CPU structure). 249 */ 250 (void) cpu_disable_intr(CPU); 251 } 252 253 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n", 254 resuming)) 255 256 return (NULL); 257 } 258 259 static ushort_t *warm_reset_vector = NULL; 260 261 static ushort_t * 262 map_warm_reset_vector() 263 { 264 /*LINTED*/ 265 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR, 266 sizeof (ushort_t *), PROT_READ|PROT_WRITE))) 267 return (NULL); 268 269 /* 270 * setup secondary cpu bios boot up vector 271 */ 272 *warm_reset_vector = (ushort_t)((caddr_t) 273 /*LINTED*/ 274 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va 275 + ((ulong_t)rm_platter_va & 0xf)); 276 warm_reset_vector++; 277 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4); 278 279 --warm_reset_vector; 280 return (warm_reset_vector); 281 } 282 283 void 284 i_cpr_pre_resume_cpus() 285 { 286 /* 287 * this is a cut down version of start_other_cpus() 288 * just do the initialization to wake the other cpus 289 */ 290 unsigned who; 291 int boot_cpuid = i_cpr_bootcpuid(); 292 uint32_t code_length = 0; 293 caddr_t wakevirt = rm_platter_va; 294 /*LINTED*/ 295 wakecode_t *wp = (wakecode_t *)wakevirt; 296 char *str = "i_cpr_pre_resume_cpus"; 297 extern int get_tsc_ready(); 298 int err; 299 300 /*LINTED*/ 301 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 302 303 /* 304 * If startup wasn't able to find a page under 1M, we cannot 305 * proceed. 306 */ 307 if (rm_platter_va == 0) { 308 cmn_err(CE_WARN, "Cannot suspend the system because no " 309 "memory below 1M could be found for processor startup"); 310 return; 311 } 312 313 /* 314 * Copy the real mode code at "real_mode_start" to the 315 * page at rm_platter_va. 316 */ 317 warm_reset_vector = map_warm_reset_vector(); 318 if (warm_reset_vector == NULL) { 319 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n")) 320 return; 321 } 322 323 flushes_require_xcalls = 1; 324 325 /* 326 * We lock our affinity to the master CPU to ensure that all slave CPUs 327 * do their TSC syncs with the same CPU. 328 */ 329 330 affinity_set(CPU_CURRENT); 331 332 /* 333 * Mark the boot cpu as being ready and in the procset, since we are 334 * running on that cpu. 335 */ 336 CPUSET_ONLY(cpu_ready_set, boot_cpuid); 337 CPUSET_ONLY(procset, boot_cpuid); 338 339 for (who = 0; who < max_ncpus; who++) { 340 341 wc_cpu_t *cpup = wc_other_cpus + who; 342 wc_desctbr_t gdt; 343 344 if (who == boot_cpuid) 345 continue; 346 347 if (!CPU_IN_SET(mp_cpus, who)) 348 continue; 349 350 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who)) 351 352 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t)); 353 354 gdt.base = cpup->wc_gdt_base; 355 gdt.limit = cpup->wc_gdt_limit; 356 357 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 - 358 (uintptr_t)wc_rm_start); 359 360 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt); 361 362 mutex_enter(&cpu_lock); 363 err = mach_cpuid_start(who, rm_platter_va); 364 mutex_exit(&cpu_lock); 365 if (err != 0) { 366 cmn_err(CE_WARN, "cpu%d: failed to start during " 367 "suspend/resume error %d", who, err); 368 continue; 369 } 370 371 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who)) 372 373 if (!wait_for_set(&procset, who)) 374 continue; 375 376 PMD(PMD_SX, ("%s() %d cpu started\n", str, who)) 377 378 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready())) 379 380 if (tsc_gethrtime_enable) { 381 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str)) 382 tsc_sync_master(who); 383 } 384 385 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str, 386 who)) 387 /* 388 * Wait for cpu to declare that it is ready, we want the 389 * cpus to start serially instead of in parallel, so that 390 * they do not contend with each other in wc_rm_start() 391 */ 392 if (!wait_for_set(&cpu_ready_set, who)) 393 continue; 394 395 /* 396 * do not need to re-initialize dtrace using dtrace_cpu_init 397 * function 398 */ 399 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who)) 400 } 401 402 affinity_clear(); 403 404 PMD(PMD_SX, ("%s() all cpus now ready\n", str)) 405 406 } 407 408 static void 409 unmap_warm_reset_vector(ushort_t *warm_reset_vector) 410 { 411 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *)); 412 } 413 414 /* 415 * We need to setup a 1:1 (virtual to physical) mapping for the 416 * page containing the wakeup code. 417 */ 418 static struct as *save_as; /* when switching to kas */ 419 420 static void 421 unmap_wakeaddr_1to1(uint64_t wakephys) 422 { 423 uintptr_t wp = (uintptr_t)wakephys; 424 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */ 425 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD); 426 } 427 428 void 429 i_cpr_post_resume_cpus() 430 { 431 uint64_t wakephys = rm_platter_pa; 432 433 if (warm_reset_vector != NULL) 434 unmap_warm_reset_vector(warm_reset_vector); 435 436 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, 437 HAT_UNLOAD); 438 439 /* 440 * cmi_post_mpstartup() is only required upon boot not upon 441 * resume from RAM 442 */ 443 444 PT(PT_UNDO1to1); 445 /* Tear down 1:1 mapping for wakeup code */ 446 unmap_wakeaddr_1to1(wakephys); 447 } 448 449 /* ARGSUSED */ 450 void 451 i_cpr_handle_xc(int flag) 452 { 453 } 454 455 int 456 i_cpr_reusable_supported(void) 457 { 458 return (0); 459 } 460 static void 461 map_wakeaddr_1to1(uint64_t wakephys) 462 { 463 uintptr_t wp = (uintptr_t)wakephys; 464 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys), 465 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC), 466 HAT_LOAD); 467 save_as = curthread->t_procp->p_as; 468 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */ 469 } 470 471 472 void 473 prt_other_cpus() 474 { 475 int who; 476 477 if (ncpus == 1) { 478 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for " 479 "uniprocessor machine\n")) 480 return; 481 } 482 483 for (who = 0; who < max_ncpus; who++) { 484 485 wc_cpu_t *cpup = wc_other_cpus + who; 486 487 if (!CPU_IN_SET(mp_cpus, who)) 488 continue; 489 490 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, " 491 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase=" 492 AFMT ", sp=%lx\n", who, 493 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit, 494 (void *)cpup->wc_idt_base, cpup->wc_idt_limit, 495 (long)cpup->wc_ldt, (long)cpup->wc_tr, 496 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp)) 497 } 498 } 499 500 /* 501 * Power down the system. 502 */ 503 int 504 i_cpr_power_down(int sleeptype) 505 { 506 caddr_t wakevirt = rm_platter_va; 507 uint64_t wakephys = rm_platter_pa; 508 ulong_t saved_intr; 509 uint32_t code_length = 0; 510 wc_desctbr_t gdt; 511 /*LINTED*/ 512 wakecode_t *wp = (wakecode_t *)wakevirt; 513 /*LINTED*/ 514 rm_platter_t *wcpp = (rm_platter_t *)wakevirt; 515 wc_cpu_t *cpup = &(wp->wc_cpu); 516 dev_info_t *ppm; 517 int ret = 0; 518 power_req_t power_req; 519 char *str = "i_cpr_power_down"; 520 /*LINTED*/ 521 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 522 extern int cpr_suspend_succeeded; 523 extern void kernel_wc_code(); 524 525 ASSERT(sleeptype == CPR_TORAM); 526 ASSERT(CPU->cpu_id == 0); 527 528 if ((ppm = PPM(ddi_root_node())) == NULL) { 529 PMD(PMD_SX, ("%s: root node not claimed\n", str)) 530 return (ENOTTY); 531 } 532 533 PMD(PMD_SX, ("Entering %s()\n", str)) 534 535 PT(PT_IC); 536 saved_intr = intr_clear(); 537 538 PT(PT_1to1); 539 /* Setup 1:1 mapping for wakeup code */ 540 map_wakeaddr_1to1(wakephys); 541 542 PMD(PMD_SX, ("ncpus=%d\n", ncpus)) 543 544 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n", 545 ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)), 546 WC_CODESIZE)) 547 548 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n", 549 (void *)wakevirt, (uint_t)wakephys)) 550 551 ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) < 552 WC_CODESIZE); 553 554 bzero(wakevirt, PAGESIZE); 555 556 /* Copy code to rm_platter */ 557 bcopy((caddr_t)wc_rm_start, wakevirt, 558 (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)); 559 560 prt_other_cpus(); 561 562 563 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 564 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4())) 565 566 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 567 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 568 569 real_mode_platter->rm_cr4 = getcr4(); 570 real_mode_platter->rm_pdbr = getcr3(); 571 572 rmp_gdt_init(real_mode_platter); 573 574 /* 575 * Since the CPU needs to jump to protected mode using an identity 576 * mapped address, we need to calculate it here. 577 */ 578 real_mode_platter->rm_longmode64_addr = rm_platter_pa + 579 (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start); 580 581 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 582 (ulong_t)real_mode_platter->rm_cr4, getcr4())) 583 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 584 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 585 586 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 587 (ulong_t)real_mode_platter->rm_longmode64_addr)) 588 589 590 PT(PT_SC); 591 if (wc_save_context(cpup)) { 592 593 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state)); 594 if (ret != 0) 595 return (ret); 596 597 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state)); 598 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret)) 599 if (ret != 0) 600 return (ret); 601 602 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n", 603 (uint_t)wakephys, (void *)&kernel_wc_code)) 604 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n", 605 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr)) 606 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n", 607 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp, 608 cpup->wc_esp)) 609 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n", 610 (long)cpup->wc_cr0, (long)cpup->wc_cr3, 611 (long)cpup->wc_cr4)) 612 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, " 613 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es, 614 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs, 615 (long)cpup->wc_eflags)) 616 617 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 618 "kgbase=%lx\n", (void *)cpup->wc_gdt_base, 619 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base, 620 cpup->wc_idt_limit, (long)cpup->wc_ldt, 621 (long)cpup->wc_tr, (long)cpup->wc_kgsbase)) 622 623 gdt.base = cpup->wc_gdt_base; 624 gdt.limit = cpup->wc_gdt_limit; 625 626 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 - 627 (uintptr_t)wc_rm_start); 628 629 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt); 630 631 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 632 (ulong_t)wcpp->rm_cr4, getcr4())) 633 634 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 635 (ulong_t)wcpp->rm_pdbr, getcr3())) 636 637 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 638 (ulong_t)wcpp->rm_longmode64_addr)) 639 640 PMD(PMD_SX, 641 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n", 642 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64])) 643 644 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 645 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base, 646 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base, 647 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr, 648 (long)cpup->wc_kgsbase)) 649 650 power_req.request_type = PMR_PPM_ENTER_SX; 651 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 652 power_req.req.ppm_power_enter_sx_req.test_point = 653 cpr_test_point; 654 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys; 655 656 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str)) 657 PT(PT_PPMCTLOP); 658 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 659 &power_req, &ret); 660 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 661 662 /* 663 * If it works, we get control back to the else branch below 664 * If we get control back here, it didn't work. 665 * XXX return EINVAL here? 666 */ 667 668 unmap_wakeaddr_1to1(wakephys); 669 intr_restore(saved_intr); 670 671 return (ret); 672 } else { 673 cpr_suspend_succeeded = 1; 674 675 power_req.request_type = PMR_PPM_EXIT_SX; 676 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 677 678 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str)) 679 PT(PT_PPMCTLOP); 680 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 681 &power_req, &ret); 682 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 683 684 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state)); 685 /* 686 * the restore should never fail, if the saved suceeded 687 */ 688 ASSERT(ret == 0); 689 690 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state)); 691 692 /* 693 * Enable interrupts on boot cpu. 694 */ 695 ASSERT(CPU->cpu_id == i_cpr_bootcpuid()); 696 mutex_enter(&cpu_lock); 697 cpu_enable_intr(CPU); 698 mutex_exit(&cpu_lock); 699 700 PT(PT_INTRRESTORE); 701 intr_restore(saved_intr); 702 PT(PT_CPU); 703 704 return (ret); 705 } 706 } 707 708 /* 709 * Stop all other cpu's before halting or rebooting. We pause the cpu's 710 * instead of sending a cross call. 711 * Stolen from sun4/os/mp_states.c 712 */ 713 714 static int cpu_are_paused; /* sic */ 715 716 void 717 i_cpr_stop_other_cpus(void) 718 { 719 mutex_enter(&cpu_lock); 720 if (cpu_are_paused) { 721 mutex_exit(&cpu_lock); 722 return; 723 } 724 pause_cpus(NULL, NULL); 725 cpu_are_paused = 1; 726 727 mutex_exit(&cpu_lock); 728 } 729 730 int 731 i_cpr_is_supported(int sleeptype) 732 { 733 extern int cpr_supported_override; 734 extern int cpr_platform_enable; 735 extern int pm_S3_enabled; 736 737 if (sleeptype != CPR_TORAM) 738 return (0); 739 740 /* 741 * The next statement tests if a specific platform has turned off 742 * cpr support. 743 */ 744 if (cpr_supported_override) 745 return (0); 746 747 /* 748 * If a platform has specifically turned on cpr support ... 749 */ 750 if (cpr_platform_enable) 751 return (1); 752 753 return (pm_S3_enabled); 754 } 755 756 void 757 i_cpr_bitmap_cleanup(void) 758 { 759 } 760 761 void 762 i_cpr_free_memory_resources(void) 763 { 764 } 765 766 /* 767 * Needed only for S3 so far 768 */ 769 static int 770 i_cpr_platform_alloc(psm_state_request_t *req) 771 { 772 #ifdef DEBUG 773 char *str = "i_cpr_platform_alloc"; 774 #endif 775 776 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 777 778 if (psm_state == NULL) { 779 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 780 return (0); 781 } 782 783 req->psr_cmd = PSM_STATE_ALLOC; 784 return ((*psm_state)(req)); 785 } 786 787 /* 788 * Needed only for S3 so far 789 */ 790 static void 791 i_cpr_platform_free(psm_state_request_t *req) 792 { 793 #ifdef DEBUG 794 char *str = "i_cpr_platform_free"; 795 #endif 796 797 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 798 799 if (psm_state == NULL) { 800 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 801 return; 802 } 803 804 req->psr_cmd = PSM_STATE_FREE; 805 (void) (*psm_state)(req); 806 } 807 808 static int 809 i_cpr_save_apic(psm_state_request_t *req) 810 { 811 #ifdef DEBUG 812 char *str = "i_cpr_save_apic"; 813 #endif 814 815 if (psm_state == NULL) { 816 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 817 return (0); 818 } 819 820 req->psr_cmd = PSM_STATE_SAVE; 821 return ((*psm_state)(req)); 822 } 823 824 static int 825 i_cpr_restore_apic(psm_state_request_t *req) 826 { 827 #ifdef DEBUG 828 char *str = "i_cpr_restore_apic"; 829 #endif 830 831 if (psm_state == NULL) { 832 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 833 return (0); 834 } 835 836 req->psr_cmd = PSM_STATE_RESTORE; 837 return ((*psm_state)(req)); 838 } 839 840 static void 841 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt) 842 { 843 /*LINTED*/ 844 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 845 846 /* 847 * Fill up the real mode platter to make it easy for real mode code to 848 * kick it off. This area should really be one passed by boot to kernel 849 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also 850 * have identical physical and virtual address in paged mode. 851 */ 852 853 real_mode_platter->rm_pdbr = getcr3(); 854 real_mode_platter->rm_cpu = cpun; 855 real_mode_platter->rm_cr4 = cr4; 856 857 real_mode_platter->rm_gdt_base = gdt.base; 858 real_mode_platter->rm_gdt_lim = gdt.limit; 859 860 if (getcr3() > 0xffffffffUL) 861 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" 862 "located above 4G in physical memory (@ 0x%llx).", 863 (unsigned long long)getcr3()); 864 865 /* 866 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY 867 * by code in real_mode_start(): 868 * 869 * GDT[0]: NULL selector 870 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1 871 * 872 * Clear the IDT as interrupts will be off and a limit of 0 will cause 873 * the CPU to triple fault and reset on an NMI, seemingly as reasonable 874 * a course of action as any other, though it may cause the entire 875 * platform to reset in some cases... 876 */ 877 real_mode_platter->rm_temp_gdt[0] = 0ULL; 878 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL; 879 880 real_mode_platter->rm_temp_gdt_lim = (ushort_t) 881 (sizeof (real_mode_platter->rm_temp_gdt) - 1); 882 real_mode_platter->rm_temp_gdt_base = rm_platter_pa + 883 offsetof(rm_platter_t, rm_temp_gdt); 884 885 real_mode_platter->rm_temp_idt_lim = 0; 886 real_mode_platter->rm_temp_idt_base = 0; 887 888 /* 889 * Since the CPU needs to jump to protected mode using an identity 890 * mapped address, we need to calculate it here. 891 */ 892 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset; 893 894 /* return; */ 895 } 896 897 void 898 i_cpr_start_cpu(void) 899 { 900 901 struct cpu *cp = CPU; 902 903 char *str = "i_cpr_start_cpu"; 904 extern void init_cpu_syscall(struct cpu *cp); 905 906 PMD(PMD_SX, ("%s() called\n", str)) 907 908 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str, 909 cp->cpu_base_spl)) 910 911 mutex_enter(&cpu_lock); 912 if (cp == i_cpr_bootcpu()) { 913 mutex_exit(&cpu_lock); 914 PMD(PMD_SX, 915 ("%s() called on bootcpu nothing to do!\n", str)) 916 return; 917 } 918 mutex_exit(&cpu_lock); 919 920 /* 921 * We need to Sync PAT with cpu0's PAT. We have to do 922 * this with interrupts disabled. 923 */ 924 pat_sync(); 925 926 /* 927 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register. 928 */ 929 if (fp_save_mech == FP_XSAVE) { 930 setup_xfem(); 931 } 932 933 /* 934 * Initialize this CPU's syscall handlers 935 */ 936 init_cpu_syscall(cp); 937 938 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl)) 939 940 /* 941 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or 942 * init_cpu_info(), since the work that they do is only needed to 943 * be done once at boot time 944 */ 945 946 947 mutex_enter(&cpu_lock); 948 CPUSET_ADD(procset, cp->cpu_id); 949 mutex_exit(&cpu_lock); 950 951 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str, 952 cp->cpu_base_spl)) 953 954 if (tsc_gethrtime_enable) { 955 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str)) 956 tsc_sync_slave(); 957 } 958 959 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str, 960 cp->cpu_id, cp->cpu_intr_actv)) 961 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str, 962 cp->cpu_base_spl)) 963 964 (void) spl0(); /* enable interrupts */ 965 966 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str, 967 cp->cpu_base_spl)) 968 969 /* 970 * Set up the CPU module for this CPU. This can't be done before 971 * this CPU is made CPU_READY, because we may (in heterogeneous systems) 972 * need to go load another CPU module. The act of attempting to load 973 * a module may trigger a cross-call, which will ASSERT unless this 974 * cpu is CPU_READY. 975 */ 976 977 /* 978 * cmi already been init'd (during boot), so do not need to do it again 979 */ 980 #ifdef PM_REINITMCAONRESUME 981 if (is_x86_feature(x86_featureset, X86FSET_MCA)) 982 cmi_mca_init(); 983 #endif 984 985 PMD(PMD_SX, ("%s() returning\n", str)) 986 987 /* return; */ 988 } 989 990 void 991 i_cpr_alloc_cpus(void) 992 { 993 char *str = "i_cpr_alloc_cpus"; 994 995 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id)) 996 /* 997 * we allocate this only when we actually need it to save on 998 * kernel memory 999 */ 1000 1001 if (wc_other_cpus == NULL) { 1002 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t), 1003 KM_SLEEP); 1004 } 1005 1006 } 1007 1008 void 1009 i_cpr_free_cpus(void) 1010 { 1011 int index; 1012 wc_cpu_t *wc_cpu; 1013 1014 if (wc_other_cpus != NULL) { 1015 for (index = 0; index < max_ncpus; index++) { 1016 wc_cpu = wc_other_cpus + index; 1017 if (wc_cpu->wc_saved_stack != NULL) { 1018 kmem_free(wc_cpu->wc_saved_stack, 1019 wc_cpu->wc_saved_stack_size); 1020 } 1021 } 1022 1023 kmem_free((void *) wc_other_cpus, 1024 max_ncpus * sizeof (wc_cpu_t)); 1025 wc_other_cpus = NULL; 1026 } 1027 } 1028 1029 /* 1030 * wrapper for acpica_ddi_save_resources() 1031 */ 1032 void 1033 i_cpr_save_configuration(dev_info_t *dip) 1034 { 1035 acpica_ddi_save_resources(dip); 1036 } 1037 1038 /* 1039 * wrapper for acpica_ddi_restore_resources() 1040 */ 1041 void 1042 i_cpr_restore_configuration(dev_info_t *dip) 1043 { 1044 acpica_ddi_restore_resources(dip); 1045 } 1046 1047 static int 1048 wait_for_set(cpuset_t *set, int who) 1049 { 1050 int delays; 1051 char *str = "wait_for_set"; 1052 1053 for (delays = 0; !CPU_IN_SET(*set, who); delays++) { 1054 if (delays == 500) { 1055 /* 1056 * After five seconds, things are probably 1057 * looking a bit bleak - explain the hang. 1058 */ 1059 cmn_err(CE_NOTE, "cpu%d: started, " 1060 "but not running in the kernel yet", who); 1061 PMD(PMD_SX, ("%s() %d cpu started " 1062 "but not running in the kernel yet\n", 1063 str, who)) 1064 } else if (delays > 2000) { 1065 /* 1066 * We waited at least 20 seconds, bail .. 1067 */ 1068 cmn_err(CE_WARN, "cpu%d: timed out", who); 1069 PMD(PMD_SX, ("%s() %d cpu timed out\n", 1070 str, who)) 1071 return (0); 1072 } 1073 1074 /* 1075 * wait at least 10ms, then check again.. 1076 */ 1077 drv_usecwait(10000); 1078 } 1079 1080 return (1); 1081 } 1082 1083 static void 1084 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu) 1085 { 1086 size_t stack_size; /* size of stack */ 1087 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1088 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1089 1090 stack_size = (size_t)end - (size_t)start; 1091 1092 if (wc_cpu->wc_saved_stack_size < stack_size) { 1093 if (wc_cpu->wc_saved_stack != NULL) { 1094 kmem_free(wc_cpu->wc_saved_stack, 1095 wc_cpu->wc_saved_stack_size); 1096 } 1097 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP); 1098 wc_cpu->wc_saved_stack_size = stack_size; 1099 } 1100 1101 bcopy(start, wc_cpu->wc_saved_stack, stack_size); 1102 } 1103 1104 void 1105 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack) 1106 { 1107 size_t stack_size; /* size of stack */ 1108 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1109 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1110 1111 stack_size = (size_t)end - (size_t)start; 1112 1113 bcopy(save_stack, start, stack_size); 1114 } 1115