1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Platform specific implementation code 27 * Currently only suspend to RAM is supported (ACPI S3) 28 */ 29 30 #define SUNDDI_IMPL 31 32 #include <sys/types.h> 33 #include <sys/promif.h> 34 #include <sys/prom_isa.h> 35 #include <sys/prom_plat.h> 36 #include <sys/cpuvar.h> 37 #include <sys/pte.h> 38 #include <vm/hat.h> 39 #include <vm/page.h> 40 #include <vm/as.h> 41 #include <sys/cpr.h> 42 #include <sys/kmem.h> 43 #include <sys/clock.h> 44 #include <sys/kmem.h> 45 #include <sys/panic.h> 46 #include <vm/seg_kmem.h> 47 #include <sys/cpu_module.h> 48 #include <sys/callb.h> 49 #include <sys/machsystm.h> 50 #include <sys/vmsystm.h> 51 #include <sys/systm.h> 52 #include <sys/archsystm.h> 53 #include <sys/stack.h> 54 #include <sys/fs/ufs_fs.h> 55 #include <sys/memlist.h> 56 #include <sys/bootconf.h> 57 #include <sys/thread.h> 58 #include <sys/x_call.h> 59 #include <sys/smp_impldefs.h> 60 #include <vm/vm_dep.h> 61 #include <sys/psm.h> 62 #include <sys/epm.h> 63 #include <sys/cpr_wakecode.h> 64 #include <sys/x86_archext.h> 65 #include <sys/reboot.h> 66 #include <sys/acpi/acpi.h> 67 #include <sys/acpica.h> 68 #include <sys/fp.h> 69 70 #define AFMT "%lx" 71 72 extern int flushes_require_xcalls; 73 extern cpuset_t cpu_ready_set; 74 75 #if defined(__amd64) 76 extern void *wc_long_mode_64(void); 77 #endif /* __amd64 */ 78 extern int tsc_gethrtime_enable; 79 extern void i_cpr_start_cpu(void); 80 81 ushort_t cpr_mach_type = CPR_MACHTYPE_X86; 82 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu; 83 84 static wc_cpu_t *wc_other_cpus = NULL; 85 static cpuset_t procset; 86 87 static void 88 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt); 89 90 static int i_cpr_platform_alloc(psm_state_request_t *req); 91 static void i_cpr_platform_free(psm_state_request_t *req); 92 static int i_cpr_save_apic(psm_state_request_t *req); 93 static int i_cpr_restore_apic(psm_state_request_t *req); 94 static int wait_for_set(cpuset_t *set, int who); 95 96 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu); 97 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack); 98 99 #ifdef STACK_GROWTH_DOWN 100 #define CPR_GET_STACK_START(t) ((t)->t_stkbase) 101 #define CPR_GET_STACK_END(t) ((t)->t_stk) 102 #else 103 #define CPR_GET_STACK_START(t) ((t)->t_stk) 104 #define CPR_GET_STACK_END(t) ((t)->t_stkbase) 105 #endif /* STACK_GROWTH_DOWN */ 106 107 /* 108 * restart paused slave cpus 109 */ 110 void 111 i_cpr_machdep_setup(void) 112 { 113 if (ncpus > 1) { 114 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n")); 115 mutex_enter(&cpu_lock); 116 start_cpus(); 117 mutex_exit(&cpu_lock); 118 } 119 } 120 121 122 /* 123 * Stop all interrupt activities in the system 124 */ 125 void 126 i_cpr_stop_intr(void) 127 { 128 (void) spl7(); 129 } 130 131 /* 132 * Set machine up to take interrupts 133 */ 134 void 135 i_cpr_enable_intr(void) 136 { 137 (void) spl0(); 138 } 139 140 /* 141 * Save miscellaneous information which needs to be written to the 142 * state file. This information is required to re-initialize 143 * kernel/prom handshaking. 144 */ 145 void 146 i_cpr_save_machdep_info(void) 147 { 148 int notcalled = 0; 149 ASSERT(notcalled); 150 } 151 152 153 void 154 i_cpr_set_tbr(void) 155 { 156 } 157 158 159 processorid_t 160 i_cpr_bootcpuid(void) 161 { 162 return (0); 163 } 164 165 /* 166 * cpu0 should contain bootcpu info 167 */ 168 cpu_t * 169 i_cpr_bootcpu(void) 170 { 171 ASSERT(MUTEX_HELD(&cpu_lock)); 172 173 return (cpu_get(i_cpr_bootcpuid())); 174 } 175 176 /* 177 * Save context for the specified CPU 178 */ 179 void * 180 i_cpr_save_context(void *arg) 181 { 182 long index = (long)arg; 183 psm_state_request_t *papic_state; 184 int resuming; 185 int ret; 186 wc_cpu_t *wc_cpu = wc_other_cpus + index; 187 188 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index)) 189 190 ASSERT(index < NCPU); 191 192 papic_state = &(wc_cpu)->wc_apic_state; 193 194 ret = i_cpr_platform_alloc(papic_state); 195 ASSERT(ret == 0); 196 197 ret = i_cpr_save_apic(papic_state); 198 ASSERT(ret == 0); 199 200 i_cpr_save_stack(curthread, wc_cpu); 201 202 /* 203 * wc_save_context returns twice, once when susending and 204 * once when resuming, wc_save_context() returns 0 when 205 * suspending and non-zero upon resume 206 */ 207 resuming = (wc_save_context(wc_cpu) == 0); 208 209 /* 210 * do NOT call any functions after this point, because doing so 211 * will modify the stack that we are running on 212 */ 213 214 if (resuming) { 215 216 ret = i_cpr_restore_apic(papic_state); 217 ASSERT(ret == 0); 218 219 i_cpr_platform_free(papic_state); 220 221 /* 222 * Enable interrupts on this cpu. 223 * Do not bind interrupts to this CPU's local APIC until 224 * the CPU is ready to receive interrupts. 225 */ 226 ASSERT(CPU->cpu_id != i_cpr_bootcpuid()); 227 mutex_enter(&cpu_lock); 228 cpu_enable_intr(CPU); 229 mutex_exit(&cpu_lock); 230 231 /* 232 * Setting the bit in cpu_ready_set must be the last operation 233 * in processor initialization; the boot CPU will continue to 234 * boot once it sees this bit set for all active CPUs. 235 */ 236 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id); 237 238 PMD(PMD_SX, 239 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n", 240 CPU->cpu_id)) 241 } else { 242 /* 243 * Disable interrupts on this CPU so that PSM knows not to bind 244 * interrupts here on resume until the CPU has executed 245 * cpu_enable_intr() (above) in the resume path. 246 * We explicitly do not grab cpu_lock here because at this point 247 * in the suspend process, the boot cpu owns cpu_lock and all 248 * other cpus are also executing in the pause thread (only 249 * modifying their respective CPU structure). 250 */ 251 (void) cpu_disable_intr(CPU); 252 } 253 254 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n", 255 resuming)) 256 257 return (NULL); 258 } 259 260 static ushort_t *warm_reset_vector = NULL; 261 262 static ushort_t * 263 map_warm_reset_vector() 264 { 265 /*LINTED*/ 266 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR, 267 sizeof (ushort_t *), PROT_READ|PROT_WRITE))) 268 return (NULL); 269 270 /* 271 * setup secondary cpu bios boot up vector 272 */ 273 *warm_reset_vector = (ushort_t)((caddr_t) 274 /*LINTED*/ 275 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va 276 + ((ulong_t)rm_platter_va & 0xf)); 277 warm_reset_vector++; 278 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4); 279 280 --warm_reset_vector; 281 return (warm_reset_vector); 282 } 283 284 void 285 i_cpr_pre_resume_cpus() 286 { 287 /* 288 * this is a cut down version of start_other_cpus() 289 * just do the initialization to wake the other cpus 290 */ 291 unsigned who; 292 int boot_cpuid = i_cpr_bootcpuid(); 293 uint32_t code_length = 0; 294 caddr_t wakevirt = rm_platter_va; 295 /*LINTED*/ 296 wakecode_t *wp = (wakecode_t *)wakevirt; 297 char *str = "i_cpr_pre_resume_cpus"; 298 extern int get_tsc_ready(); 299 int err; 300 301 /*LINTED*/ 302 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 303 304 /* 305 * If startup wasn't able to find a page under 1M, we cannot 306 * proceed. 307 */ 308 if (rm_platter_va == 0) { 309 cmn_err(CE_WARN, "Cannot suspend the system because no " 310 "memory below 1M could be found for processor startup"); 311 return; 312 } 313 314 /* 315 * Copy the real mode code at "real_mode_start" to the 316 * page at rm_platter_va. 317 */ 318 warm_reset_vector = map_warm_reset_vector(); 319 if (warm_reset_vector == NULL) { 320 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n")) 321 return; 322 } 323 324 flushes_require_xcalls = 1; 325 326 /* 327 * We lock our affinity to the master CPU to ensure that all slave CPUs 328 * do their TSC syncs with the same CPU. 329 */ 330 331 affinity_set(CPU_CURRENT); 332 333 /* 334 * Mark the boot cpu as being ready and in the procset, since we are 335 * running on that cpu. 336 */ 337 CPUSET_ONLY(cpu_ready_set, boot_cpuid); 338 CPUSET_ONLY(procset, boot_cpuid); 339 340 for (who = 0; who < max_ncpus; who++) { 341 342 wc_cpu_t *cpup = wc_other_cpus + who; 343 wc_desctbr_t gdt; 344 345 if (who == boot_cpuid) 346 continue; 347 348 if (!CPU_IN_SET(mp_cpus, who)) 349 continue; 350 351 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who)) 352 353 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t)); 354 355 gdt.base = cpup->wc_gdt_base; 356 gdt.limit = cpup->wc_gdt_limit; 357 358 #if defined(__amd64) 359 code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start; 360 #else 361 code_length = 0; 362 #endif 363 364 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt); 365 366 mutex_enter(&cpu_lock); 367 err = mach_cpuid_start(who, rm_platter_va); 368 mutex_exit(&cpu_lock); 369 if (err != 0) { 370 cmn_err(CE_WARN, "cpu%d: failed to start during " 371 "suspend/resume error %d", who, err); 372 continue; 373 } 374 375 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who)) 376 377 if (!wait_for_set(&procset, who)) 378 continue; 379 380 PMD(PMD_SX, ("%s() %d cpu started\n", str, who)) 381 382 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready())) 383 384 if (tsc_gethrtime_enable) { 385 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str)) 386 tsc_sync_master(who); 387 } 388 389 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str, 390 who)) 391 /* 392 * Wait for cpu to declare that it is ready, we want the 393 * cpus to start serially instead of in parallel, so that 394 * they do not contend with each other in wc_rm_start() 395 */ 396 if (!wait_for_set(&cpu_ready_set, who)) 397 continue; 398 399 /* 400 * do not need to re-initialize dtrace using dtrace_cpu_init 401 * function 402 */ 403 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who)) 404 } 405 406 affinity_clear(); 407 408 PMD(PMD_SX, ("%s() all cpus now ready\n", str)) 409 410 } 411 412 static void 413 unmap_warm_reset_vector(ushort_t *warm_reset_vector) 414 { 415 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *)); 416 } 417 418 /* 419 * We need to setup a 1:1 (virtual to physical) mapping for the 420 * page containing the wakeup code. 421 */ 422 static struct as *save_as; /* when switching to kas */ 423 424 static void 425 unmap_wakeaddr_1to1(uint64_t wakephys) 426 { 427 uintptr_t wp = (uintptr_t)wakephys; 428 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */ 429 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD); 430 } 431 432 void 433 i_cpr_post_resume_cpus() 434 { 435 uint64_t wakephys = rm_platter_pa; 436 437 if (warm_reset_vector != NULL) 438 unmap_warm_reset_vector(warm_reset_vector); 439 440 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, 441 HAT_UNLOAD); 442 443 /* 444 * cmi_post_mpstartup() is only required upon boot not upon 445 * resume from RAM 446 */ 447 448 PT(PT_UNDO1to1); 449 /* Tear down 1:1 mapping for wakeup code */ 450 unmap_wakeaddr_1to1(wakephys); 451 } 452 453 /* ARGSUSED */ 454 void 455 i_cpr_handle_xc(int flag) 456 { 457 } 458 459 int 460 i_cpr_reusable_supported(void) 461 { 462 return (0); 463 } 464 static void 465 map_wakeaddr_1to1(uint64_t wakephys) 466 { 467 uintptr_t wp = (uintptr_t)wakephys; 468 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys), 469 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC), 470 HAT_LOAD); 471 save_as = curthread->t_procp->p_as; 472 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */ 473 } 474 475 476 void 477 prt_other_cpus() 478 { 479 int who; 480 481 if (ncpus == 1) { 482 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for " 483 "uniprocessor machine\n")) 484 return; 485 } 486 487 for (who = 0; who < max_ncpus; who++) { 488 489 wc_cpu_t *cpup = wc_other_cpus + who; 490 491 if (!CPU_IN_SET(mp_cpus, who)) 492 continue; 493 494 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, " 495 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase=" 496 AFMT ", sp=%lx\n", who, 497 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit, 498 (void *)cpup->wc_idt_base, cpup->wc_idt_limit, 499 (long)cpup->wc_ldt, (long)cpup->wc_tr, 500 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp)) 501 } 502 } 503 504 /* 505 * Power down the system. 506 */ 507 int 508 i_cpr_power_down(int sleeptype) 509 { 510 caddr_t wakevirt = rm_platter_va; 511 uint64_t wakephys = rm_platter_pa; 512 ulong_t saved_intr; 513 uint32_t code_length = 0; 514 wc_desctbr_t gdt; 515 /*LINTED*/ 516 wakecode_t *wp = (wakecode_t *)wakevirt; 517 /*LINTED*/ 518 rm_platter_t *wcpp = (rm_platter_t *)wakevirt; 519 wc_cpu_t *cpup = &(wp->wc_cpu); 520 dev_info_t *ppm; 521 int ret = 0; 522 power_req_t power_req; 523 char *str = "i_cpr_power_down"; 524 #if defined(__amd64) 525 /*LINTED*/ 526 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 527 #endif 528 extern int cpr_suspend_succeeded; 529 extern void kernel_wc_code(); 530 531 ASSERT(sleeptype == CPR_TORAM); 532 ASSERT(CPU->cpu_id == 0); 533 534 if ((ppm = PPM(ddi_root_node())) == NULL) { 535 PMD(PMD_SX, ("%s: root node not claimed\n", str)) 536 return (ENOTTY); 537 } 538 539 PMD(PMD_SX, ("Entering %s()\n", str)) 540 541 PT(PT_IC); 542 saved_intr = intr_clear(); 543 544 PT(PT_1to1); 545 /* Setup 1:1 mapping for wakeup code */ 546 map_wakeaddr_1to1(wakephys); 547 548 PMD(PMD_SX, ("ncpus=%d\n", ncpus)) 549 550 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n", 551 ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE)) 552 553 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n", 554 (void *)wakevirt, (uint_t)wakephys)) 555 556 ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) < 557 WC_CODESIZE); 558 559 bzero(wakevirt, PAGESIZE); 560 561 /* Copy code to rm_platter */ 562 bcopy((caddr_t)wc_rm_start, wakevirt, 563 (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)); 564 565 prt_other_cpus(); 566 567 #if defined(__amd64) 568 569 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 570 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4())) 571 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 572 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 573 574 real_mode_platter->rm_cr4 = getcr4(); 575 real_mode_platter->rm_pdbr = getcr3(); 576 577 rmp_gdt_init(real_mode_platter); 578 579 /* 580 * Since the CPU needs to jump to protected mode using an identity 581 * mapped address, we need to calculate it here. 582 */ 583 real_mode_platter->rm_longmode64_addr = rm_platter_pa + 584 ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start); 585 586 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 587 (ulong_t)real_mode_platter->rm_cr4, getcr4())) 588 589 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 590 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 591 592 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 593 (ulong_t)real_mode_platter->rm_longmode64_addr)) 594 595 #endif 596 597 PT(PT_SC); 598 if (wc_save_context(cpup)) { 599 600 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state)); 601 if (ret != 0) 602 return (ret); 603 604 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state)); 605 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret)) 606 if (ret != 0) 607 return (ret); 608 609 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n", 610 (uint_t)wakephys, (void *)&kernel_wc_code)) 611 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n", 612 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr)) 613 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n", 614 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp, 615 cpup->wc_esp)) 616 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n", 617 (long)cpup->wc_cr0, (long)cpup->wc_cr3, 618 (long)cpup->wc_cr4)) 619 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, " 620 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es, 621 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs, 622 (long)cpup->wc_eflags)) 623 624 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 625 "kgbase=%lx\n", (void *)cpup->wc_gdt_base, 626 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base, 627 cpup->wc_idt_limit, (long)cpup->wc_ldt, 628 (long)cpup->wc_tr, (long)cpup->wc_kgsbase)) 629 630 gdt.base = cpup->wc_gdt_base; 631 gdt.limit = cpup->wc_gdt_limit; 632 633 #if defined(__amd64) 634 code_length = (uint32_t)wc_long_mode_64 - 635 (uint32_t)wc_rm_start; 636 #else 637 code_length = 0; 638 #endif 639 640 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt); 641 642 #if defined(__amd64) 643 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 644 (ulong_t)wcpp->rm_cr4, getcr4())) 645 646 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 647 (ulong_t)wcpp->rm_pdbr, getcr3())) 648 649 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 650 (ulong_t)wcpp->rm_longmode64_addr)) 651 652 PMD(PMD_SX, 653 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n", 654 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64])) 655 #endif 656 657 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 658 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base, 659 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base, 660 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr, 661 (long)cpup->wc_kgsbase)) 662 663 power_req.request_type = PMR_PPM_ENTER_SX; 664 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 665 power_req.req.ppm_power_enter_sx_req.test_point = 666 cpr_test_point; 667 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys; 668 669 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str)) 670 PT(PT_PPMCTLOP); 671 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 672 &power_req, &ret); 673 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 674 675 /* 676 * If it works, we get control back to the else branch below 677 * If we get control back here, it didn't work. 678 * XXX return EINVAL here? 679 */ 680 681 unmap_wakeaddr_1to1(wakephys); 682 intr_restore(saved_intr); 683 684 return (ret); 685 } else { 686 cpr_suspend_succeeded = 1; 687 688 power_req.request_type = PMR_PPM_EXIT_SX; 689 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 690 691 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str)) 692 PT(PT_PPMCTLOP); 693 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 694 &power_req, &ret); 695 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 696 697 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state)); 698 /* 699 * the restore should never fail, if the saved suceeded 700 */ 701 ASSERT(ret == 0); 702 703 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state)); 704 705 /* 706 * Enable interrupts on boot cpu. 707 */ 708 ASSERT(CPU->cpu_id == i_cpr_bootcpuid()); 709 mutex_enter(&cpu_lock); 710 cpu_enable_intr(CPU); 711 mutex_exit(&cpu_lock); 712 713 PT(PT_INTRRESTORE); 714 intr_restore(saved_intr); 715 PT(PT_CPU); 716 717 return (ret); 718 } 719 } 720 721 /* 722 * Stop all other cpu's before halting or rebooting. We pause the cpu's 723 * instead of sending a cross call. 724 * Stolen from sun4/os/mp_states.c 725 */ 726 727 static int cpu_are_paused; /* sic */ 728 729 void 730 i_cpr_stop_other_cpus(void) 731 { 732 mutex_enter(&cpu_lock); 733 if (cpu_are_paused) { 734 mutex_exit(&cpu_lock); 735 return; 736 } 737 pause_cpus(NULL); 738 cpu_are_paused = 1; 739 740 mutex_exit(&cpu_lock); 741 } 742 743 int 744 i_cpr_is_supported(int sleeptype) 745 { 746 extern int cpr_supported_override; 747 extern int cpr_platform_enable; 748 extern int pm_S3_enabled; 749 750 if (sleeptype != CPR_TORAM) 751 return (0); 752 753 /* 754 * The next statement tests if a specific platform has turned off 755 * cpr support. 756 */ 757 if (cpr_supported_override) 758 return (0); 759 760 /* 761 * If a platform has specifically turned on cpr support ... 762 */ 763 if (cpr_platform_enable) 764 return (1); 765 766 return (pm_S3_enabled); 767 } 768 769 void 770 i_cpr_bitmap_cleanup(void) 771 { 772 } 773 774 void 775 i_cpr_free_memory_resources(void) 776 { 777 } 778 779 /* 780 * Needed only for S3 so far 781 */ 782 static int 783 i_cpr_platform_alloc(psm_state_request_t *req) 784 { 785 #ifdef DEBUG 786 char *str = "i_cpr_platform_alloc"; 787 #endif 788 789 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 790 791 if (psm_state == NULL) { 792 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 793 return (0); 794 } 795 796 req->psr_cmd = PSM_STATE_ALLOC; 797 return ((*psm_state)(req)); 798 } 799 800 /* 801 * Needed only for S3 so far 802 */ 803 static void 804 i_cpr_platform_free(psm_state_request_t *req) 805 { 806 #ifdef DEBUG 807 char *str = "i_cpr_platform_free"; 808 #endif 809 810 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 811 812 if (psm_state == NULL) { 813 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 814 return; 815 } 816 817 req->psr_cmd = PSM_STATE_FREE; 818 (void) (*psm_state)(req); 819 } 820 821 static int 822 i_cpr_save_apic(psm_state_request_t *req) 823 { 824 #ifdef DEBUG 825 char *str = "i_cpr_save_apic"; 826 #endif 827 828 if (psm_state == NULL) { 829 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 830 return (0); 831 } 832 833 req->psr_cmd = PSM_STATE_SAVE; 834 return ((*psm_state)(req)); 835 } 836 837 static int 838 i_cpr_restore_apic(psm_state_request_t *req) 839 { 840 #ifdef DEBUG 841 char *str = "i_cpr_restore_apic"; 842 #endif 843 844 if (psm_state == NULL) { 845 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 846 return (0); 847 } 848 849 req->psr_cmd = PSM_STATE_RESTORE; 850 return ((*psm_state)(req)); 851 } 852 853 854 /* stop lint complaining about offset not being used in 32bit mode */ 855 #if !defined(__amd64) 856 /*ARGSUSED*/ 857 #endif 858 static void 859 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt) 860 { 861 /*LINTED*/ 862 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 863 864 /* 865 * Fill up the real mode platter to make it easy for real mode code to 866 * kick it off. This area should really be one passed by boot to kernel 867 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also 868 * have identical physical and virtual address in paged mode. 869 */ 870 871 real_mode_platter->rm_pdbr = getcr3(); 872 real_mode_platter->rm_cpu = cpun; 873 real_mode_platter->rm_cr4 = cr4; 874 875 real_mode_platter->rm_gdt_base = gdt.base; 876 real_mode_platter->rm_gdt_lim = gdt.limit; 877 878 #if defined(__amd64) 879 if (getcr3() > 0xffffffffUL) 880 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" 881 "located above 4G in physical memory (@ 0x%llx).", 882 (unsigned long long)getcr3()); 883 884 /* 885 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY 886 * by code in real_mode_start(): 887 * 888 * GDT[0]: NULL selector 889 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1 890 * 891 * Clear the IDT as interrupts will be off and a limit of 0 will cause 892 * the CPU to triple fault and reset on an NMI, seemingly as reasonable 893 * a course of action as any other, though it may cause the entire 894 * platform to reset in some cases... 895 */ 896 real_mode_platter->rm_temp_gdt[0] = 0ULL; 897 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL; 898 899 real_mode_platter->rm_temp_gdt_lim = (ushort_t) 900 (sizeof (real_mode_platter->rm_temp_gdt) - 1); 901 real_mode_platter->rm_temp_gdt_base = rm_platter_pa + 902 (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt); 903 904 real_mode_platter->rm_temp_idt_lim = 0; 905 real_mode_platter->rm_temp_idt_base = 0; 906 907 /* 908 * Since the CPU needs to jump to protected mode using an identity 909 * mapped address, we need to calculate it here. 910 */ 911 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset; 912 #endif /* __amd64 */ 913 914 /* return; */ 915 } 916 917 void 918 i_cpr_start_cpu(void) 919 { 920 921 struct cpu *cp = CPU; 922 923 char *str = "i_cpr_start_cpu"; 924 extern void init_cpu_syscall(struct cpu *cp); 925 926 PMD(PMD_SX, ("%s() called\n", str)) 927 928 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str, 929 cp->cpu_base_spl)) 930 931 mutex_enter(&cpu_lock); 932 if (cp == i_cpr_bootcpu()) { 933 mutex_exit(&cpu_lock); 934 PMD(PMD_SX, 935 ("%s() called on bootcpu nothing to do!\n", str)) 936 return; 937 } 938 mutex_exit(&cpu_lock); 939 940 /* 941 * We need to Sync PAT with cpu0's PAT. We have to do 942 * this with interrupts disabled. 943 */ 944 if (is_x86_feature(x86_featureset, X86FSET_PAT)) 945 pat_sync(); 946 947 /* 948 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register. 949 */ 950 if (fp_save_mech == FP_XSAVE) { 951 setup_xfem(); 952 } 953 954 /* 955 * Initialize this CPU's syscall handlers 956 */ 957 init_cpu_syscall(cp); 958 959 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl)) 960 961 /* 962 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or 963 * init_cpu_info(), since the work that they do is only needed to 964 * be done once at boot time 965 */ 966 967 968 mutex_enter(&cpu_lock); 969 CPUSET_ADD(procset, cp->cpu_id); 970 mutex_exit(&cpu_lock); 971 972 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str, 973 cp->cpu_base_spl)) 974 975 if (tsc_gethrtime_enable) { 976 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str)) 977 tsc_sync_slave(); 978 } 979 980 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str, 981 cp->cpu_id, cp->cpu_intr_actv)) 982 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str, 983 cp->cpu_base_spl)) 984 985 (void) spl0(); /* enable interrupts */ 986 987 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str, 988 cp->cpu_base_spl)) 989 990 /* 991 * Set up the CPU module for this CPU. This can't be done before 992 * this CPU is made CPU_READY, because we may (in heterogeneous systems) 993 * need to go load another CPU module. The act of attempting to load 994 * a module may trigger a cross-call, which will ASSERT unless this 995 * cpu is CPU_READY. 996 */ 997 998 /* 999 * cmi already been init'd (during boot), so do not need to do it again 1000 */ 1001 #ifdef PM_REINITMCAONRESUME 1002 if (is_x86_feature(x86_featureset, X86FSET_MCA)) 1003 cmi_mca_init(); 1004 #endif 1005 1006 PMD(PMD_SX, ("%s() returning\n", str)) 1007 1008 /* return; */ 1009 } 1010 1011 void 1012 i_cpr_alloc_cpus(void) 1013 { 1014 char *str = "i_cpr_alloc_cpus"; 1015 1016 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id)) 1017 /* 1018 * we allocate this only when we actually need it to save on 1019 * kernel memory 1020 */ 1021 1022 if (wc_other_cpus == NULL) { 1023 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t), 1024 KM_SLEEP); 1025 } 1026 1027 } 1028 1029 void 1030 i_cpr_free_cpus(void) 1031 { 1032 int index; 1033 wc_cpu_t *wc_cpu; 1034 1035 if (wc_other_cpus != NULL) { 1036 for (index = 0; index < max_ncpus; index++) { 1037 wc_cpu = wc_other_cpus + index; 1038 if (wc_cpu->wc_saved_stack != NULL) { 1039 kmem_free(wc_cpu->wc_saved_stack, 1040 wc_cpu->wc_saved_stack_size); 1041 } 1042 } 1043 1044 kmem_free((void *) wc_other_cpus, 1045 max_ncpus * sizeof (wc_cpu_t)); 1046 wc_other_cpus = NULL; 1047 } 1048 } 1049 1050 /* 1051 * wrapper for acpica_ddi_save_resources() 1052 */ 1053 void 1054 i_cpr_save_configuration(dev_info_t *dip) 1055 { 1056 acpica_ddi_save_resources(dip); 1057 } 1058 1059 /* 1060 * wrapper for acpica_ddi_restore_resources() 1061 */ 1062 void 1063 i_cpr_restore_configuration(dev_info_t *dip) 1064 { 1065 acpica_ddi_restore_resources(dip); 1066 } 1067 1068 static int 1069 wait_for_set(cpuset_t *set, int who) 1070 { 1071 int delays; 1072 char *str = "wait_for_set"; 1073 1074 for (delays = 0; !CPU_IN_SET(*set, who); delays++) { 1075 if (delays == 500) { 1076 /* 1077 * After five seconds, things are probably 1078 * looking a bit bleak - explain the hang. 1079 */ 1080 cmn_err(CE_NOTE, "cpu%d: started, " 1081 "but not running in the kernel yet", who); 1082 PMD(PMD_SX, ("%s() %d cpu started " 1083 "but not running in the kernel yet\n", 1084 str, who)) 1085 } else if (delays > 2000) { 1086 /* 1087 * We waited at least 20 seconds, bail .. 1088 */ 1089 cmn_err(CE_WARN, "cpu%d: timed out", who); 1090 PMD(PMD_SX, ("%s() %d cpu timed out\n", 1091 str, who)) 1092 return (0); 1093 } 1094 1095 /* 1096 * wait at least 10ms, then check again.. 1097 */ 1098 drv_usecwait(10000); 1099 } 1100 1101 return (1); 1102 } 1103 1104 static void 1105 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu) 1106 { 1107 size_t stack_size; /* size of stack */ 1108 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1109 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1110 1111 stack_size = (size_t)end - (size_t)start; 1112 1113 if (wc_cpu->wc_saved_stack_size < stack_size) { 1114 if (wc_cpu->wc_saved_stack != NULL) { 1115 kmem_free(wc_cpu->wc_saved_stack, 1116 wc_cpu->wc_saved_stack_size); 1117 } 1118 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP); 1119 wc_cpu->wc_saved_stack_size = stack_size; 1120 } 1121 1122 bcopy(start, wc_cpu->wc_saved_stack, stack_size); 1123 } 1124 1125 void 1126 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack) 1127 { 1128 size_t stack_size; /* size of stack */ 1129 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1130 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1131 1132 stack_size = (size_t)end - (size_t)start; 1133 1134 bcopy(save_stack, start, stack_size); 1135 } 1136