1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Platform specific implementation code 27 * Currently only suspend to RAM is supported (ACPI S3) 28 */ 29 30 #define SUNDDI_IMPL 31 32 #include <sys/types.h> 33 #include <sys/promif.h> 34 #include <sys/prom_isa.h> 35 #include <sys/prom_plat.h> 36 #include <sys/cpuvar.h> 37 #include <sys/pte.h> 38 #include <vm/hat.h> 39 #include <vm/page.h> 40 #include <vm/as.h> 41 #include <sys/cpr.h> 42 #include <sys/kmem.h> 43 #include <sys/clock.h> 44 #include <sys/kmem.h> 45 #include <sys/panic.h> 46 #include <vm/seg_kmem.h> 47 #include <sys/cpu_module.h> 48 #include <sys/callb.h> 49 #include <sys/machsystm.h> 50 #include <sys/vmsystm.h> 51 #include <sys/systm.h> 52 #include <sys/archsystm.h> 53 #include <sys/stack.h> 54 #include <sys/fs/ufs_fs.h> 55 #include <sys/memlist.h> 56 #include <sys/bootconf.h> 57 #include <sys/thread.h> 58 #include <sys/x_call.h> 59 #include <sys/smp_impldefs.h> 60 #include <vm/vm_dep.h> 61 #include <sys/psm.h> 62 #include <sys/epm.h> 63 #include <sys/cpr_wakecode.h> 64 #include <sys/x86_archext.h> 65 #include <sys/reboot.h> 66 #include <sys/acpi/acpi.h> 67 #include <sys/acpica.h> 68 #include <sys/fp.h> 69 #include <sys/sysmacros.h> 70 71 #define AFMT "%lx" 72 73 extern int flushes_require_xcalls; 74 extern cpuset_t cpu_ready_set; 75 76 #if defined(__amd64) 77 extern void *wc_long_mode_64(void); 78 #endif /* __amd64 */ 79 extern int tsc_gethrtime_enable; 80 extern void i_cpr_start_cpu(void); 81 82 ushort_t cpr_mach_type = CPR_MACHTYPE_X86; 83 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu; 84 85 static wc_cpu_t *wc_other_cpus = NULL; 86 static cpuset_t procset; 87 88 static void 89 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt); 90 91 static int i_cpr_platform_alloc(psm_state_request_t *req); 92 static void i_cpr_platform_free(psm_state_request_t *req); 93 static int i_cpr_save_apic(psm_state_request_t *req); 94 static int i_cpr_restore_apic(psm_state_request_t *req); 95 static int wait_for_set(cpuset_t *set, int who); 96 97 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu); 98 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack); 99 100 #ifdef STACK_GROWTH_DOWN 101 #define CPR_GET_STACK_START(t) ((t)->t_stkbase) 102 #define CPR_GET_STACK_END(t) ((t)->t_stk) 103 #else 104 #define CPR_GET_STACK_START(t) ((t)->t_stk) 105 #define CPR_GET_STACK_END(t) ((t)->t_stkbase) 106 #endif /* STACK_GROWTH_DOWN */ 107 108 /* 109 * restart paused slave cpus 110 */ 111 void 112 i_cpr_machdep_setup(void) 113 { 114 if (ncpus > 1) { 115 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n")); 116 mutex_enter(&cpu_lock); 117 start_cpus(); 118 mutex_exit(&cpu_lock); 119 } 120 } 121 122 123 /* 124 * Stop all interrupt activities in the system 125 */ 126 void 127 i_cpr_stop_intr(void) 128 { 129 (void) spl7(); 130 } 131 132 /* 133 * Set machine up to take interrupts 134 */ 135 void 136 i_cpr_enable_intr(void) 137 { 138 (void) spl0(); 139 } 140 141 /* 142 * Save miscellaneous information which needs to be written to the 143 * state file. This information is required to re-initialize 144 * kernel/prom handshaking. 145 */ 146 void 147 i_cpr_save_machdep_info(void) 148 { 149 int notcalled = 0; 150 ASSERT(notcalled); 151 } 152 153 154 void 155 i_cpr_set_tbr(void) 156 { 157 } 158 159 160 processorid_t 161 i_cpr_bootcpuid(void) 162 { 163 return (0); 164 } 165 166 /* 167 * cpu0 should contain bootcpu info 168 */ 169 cpu_t * 170 i_cpr_bootcpu(void) 171 { 172 ASSERT(MUTEX_HELD(&cpu_lock)); 173 174 return (cpu_get(i_cpr_bootcpuid())); 175 } 176 177 /* 178 * Save context for the specified CPU 179 */ 180 void * 181 i_cpr_save_context(void *arg) 182 { 183 long index = (long)arg; 184 psm_state_request_t *papic_state; 185 int resuming; 186 int ret; 187 wc_cpu_t *wc_cpu = wc_other_cpus + index; 188 189 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index)) 190 191 ASSERT(index < NCPU); 192 193 papic_state = &(wc_cpu)->wc_apic_state; 194 195 ret = i_cpr_platform_alloc(papic_state); 196 ASSERT(ret == 0); 197 198 ret = i_cpr_save_apic(papic_state); 199 ASSERT(ret == 0); 200 201 i_cpr_save_stack(curthread, wc_cpu); 202 203 /* 204 * wc_save_context returns twice, once when susending and 205 * once when resuming, wc_save_context() returns 0 when 206 * suspending and non-zero upon resume 207 */ 208 resuming = (wc_save_context(wc_cpu) == 0); 209 210 /* 211 * do NOT call any functions after this point, because doing so 212 * will modify the stack that we are running on 213 */ 214 215 if (resuming) { 216 217 ret = i_cpr_restore_apic(papic_state); 218 ASSERT(ret == 0); 219 220 i_cpr_platform_free(papic_state); 221 222 /* 223 * Enable interrupts on this cpu. 224 * Do not bind interrupts to this CPU's local APIC until 225 * the CPU is ready to receive interrupts. 226 */ 227 ASSERT(CPU->cpu_id != i_cpr_bootcpuid()); 228 mutex_enter(&cpu_lock); 229 cpu_enable_intr(CPU); 230 mutex_exit(&cpu_lock); 231 232 /* 233 * Setting the bit in cpu_ready_set must be the last operation 234 * in processor initialization; the boot CPU will continue to 235 * boot once it sees this bit set for all active CPUs. 236 */ 237 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id); 238 239 PMD(PMD_SX, 240 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n", 241 CPU->cpu_id)) 242 } else { 243 /* 244 * Disable interrupts on this CPU so that PSM knows not to bind 245 * interrupts here on resume until the CPU has executed 246 * cpu_enable_intr() (above) in the resume path. 247 * We explicitly do not grab cpu_lock here because at this point 248 * in the suspend process, the boot cpu owns cpu_lock and all 249 * other cpus are also executing in the pause thread (only 250 * modifying their respective CPU structure). 251 */ 252 (void) cpu_disable_intr(CPU); 253 } 254 255 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n", 256 resuming)) 257 258 return (NULL); 259 } 260 261 static ushort_t *warm_reset_vector = NULL; 262 263 static ushort_t * 264 map_warm_reset_vector() 265 { 266 /*LINTED*/ 267 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR, 268 sizeof (ushort_t *), PROT_READ|PROT_WRITE))) 269 return (NULL); 270 271 /* 272 * setup secondary cpu bios boot up vector 273 */ 274 *warm_reset_vector = (ushort_t)((caddr_t) 275 /*LINTED*/ 276 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va 277 + ((ulong_t)rm_platter_va & 0xf)); 278 warm_reset_vector++; 279 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4); 280 281 --warm_reset_vector; 282 return (warm_reset_vector); 283 } 284 285 void 286 i_cpr_pre_resume_cpus() 287 { 288 /* 289 * this is a cut down version of start_other_cpus() 290 * just do the initialization to wake the other cpus 291 */ 292 unsigned who; 293 int boot_cpuid = i_cpr_bootcpuid(); 294 uint32_t code_length = 0; 295 caddr_t wakevirt = rm_platter_va; 296 /*LINTED*/ 297 wakecode_t *wp = (wakecode_t *)wakevirt; 298 char *str = "i_cpr_pre_resume_cpus"; 299 extern int get_tsc_ready(); 300 int err; 301 302 /*LINTED*/ 303 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 304 305 /* 306 * If startup wasn't able to find a page under 1M, we cannot 307 * proceed. 308 */ 309 if (rm_platter_va == 0) { 310 cmn_err(CE_WARN, "Cannot suspend the system because no " 311 "memory below 1M could be found for processor startup"); 312 return; 313 } 314 315 /* 316 * Copy the real mode code at "real_mode_start" to the 317 * page at rm_platter_va. 318 */ 319 warm_reset_vector = map_warm_reset_vector(); 320 if (warm_reset_vector == NULL) { 321 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n")) 322 return; 323 } 324 325 flushes_require_xcalls = 1; 326 327 /* 328 * We lock our affinity to the master CPU to ensure that all slave CPUs 329 * do their TSC syncs with the same CPU. 330 */ 331 332 affinity_set(CPU_CURRENT); 333 334 /* 335 * Mark the boot cpu as being ready and in the procset, since we are 336 * running on that cpu. 337 */ 338 CPUSET_ONLY(cpu_ready_set, boot_cpuid); 339 CPUSET_ONLY(procset, boot_cpuid); 340 341 for (who = 0; who < max_ncpus; who++) { 342 343 wc_cpu_t *cpup = wc_other_cpus + who; 344 wc_desctbr_t gdt; 345 346 if (who == boot_cpuid) 347 continue; 348 349 if (!CPU_IN_SET(mp_cpus, who)) 350 continue; 351 352 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who)) 353 354 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t)); 355 356 gdt.base = cpup->wc_gdt_base; 357 gdt.limit = cpup->wc_gdt_limit; 358 359 #if defined(__amd64) 360 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 - 361 (uintptr_t)wc_rm_start); 362 #else 363 code_length = 0; 364 #endif 365 366 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt); 367 368 mutex_enter(&cpu_lock); 369 err = mach_cpuid_start(who, rm_platter_va); 370 mutex_exit(&cpu_lock); 371 if (err != 0) { 372 cmn_err(CE_WARN, "cpu%d: failed to start during " 373 "suspend/resume error %d", who, err); 374 continue; 375 } 376 377 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who)) 378 379 if (!wait_for_set(&procset, who)) 380 continue; 381 382 PMD(PMD_SX, ("%s() %d cpu started\n", str, who)) 383 384 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready())) 385 386 if (tsc_gethrtime_enable) { 387 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str)) 388 tsc_sync_master(who); 389 } 390 391 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str, 392 who)) 393 /* 394 * Wait for cpu to declare that it is ready, we want the 395 * cpus to start serially instead of in parallel, so that 396 * they do not contend with each other in wc_rm_start() 397 */ 398 if (!wait_for_set(&cpu_ready_set, who)) 399 continue; 400 401 /* 402 * do not need to re-initialize dtrace using dtrace_cpu_init 403 * function 404 */ 405 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who)) 406 } 407 408 affinity_clear(); 409 410 PMD(PMD_SX, ("%s() all cpus now ready\n", str)) 411 412 } 413 414 static void 415 unmap_warm_reset_vector(ushort_t *warm_reset_vector) 416 { 417 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *)); 418 } 419 420 /* 421 * We need to setup a 1:1 (virtual to physical) mapping for the 422 * page containing the wakeup code. 423 */ 424 static struct as *save_as; /* when switching to kas */ 425 426 static void 427 unmap_wakeaddr_1to1(uint64_t wakephys) 428 { 429 uintptr_t wp = (uintptr_t)wakephys; 430 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */ 431 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD); 432 } 433 434 void 435 i_cpr_post_resume_cpus() 436 { 437 uint64_t wakephys = rm_platter_pa; 438 439 if (warm_reset_vector != NULL) 440 unmap_warm_reset_vector(warm_reset_vector); 441 442 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, 443 HAT_UNLOAD); 444 445 /* 446 * cmi_post_mpstartup() is only required upon boot not upon 447 * resume from RAM 448 */ 449 450 PT(PT_UNDO1to1); 451 /* Tear down 1:1 mapping for wakeup code */ 452 unmap_wakeaddr_1to1(wakephys); 453 } 454 455 /* ARGSUSED */ 456 void 457 i_cpr_handle_xc(int flag) 458 { 459 } 460 461 int 462 i_cpr_reusable_supported(void) 463 { 464 return (0); 465 } 466 static void 467 map_wakeaddr_1to1(uint64_t wakephys) 468 { 469 uintptr_t wp = (uintptr_t)wakephys; 470 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys), 471 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC), 472 HAT_LOAD); 473 save_as = curthread->t_procp->p_as; 474 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */ 475 } 476 477 478 void 479 prt_other_cpus() 480 { 481 int who; 482 483 if (ncpus == 1) { 484 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for " 485 "uniprocessor machine\n")) 486 return; 487 } 488 489 for (who = 0; who < max_ncpus; who++) { 490 491 wc_cpu_t *cpup = wc_other_cpus + who; 492 493 if (!CPU_IN_SET(mp_cpus, who)) 494 continue; 495 496 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, " 497 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase=" 498 AFMT ", sp=%lx\n", who, 499 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit, 500 (void *)cpup->wc_idt_base, cpup->wc_idt_limit, 501 (long)cpup->wc_ldt, (long)cpup->wc_tr, 502 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp)) 503 } 504 } 505 506 /* 507 * Power down the system. 508 */ 509 int 510 i_cpr_power_down(int sleeptype) 511 { 512 caddr_t wakevirt = rm_platter_va; 513 uint64_t wakephys = rm_platter_pa; 514 ulong_t saved_intr; 515 uint32_t code_length = 0; 516 wc_desctbr_t gdt; 517 /*LINTED*/ 518 wakecode_t *wp = (wakecode_t *)wakevirt; 519 /*LINTED*/ 520 rm_platter_t *wcpp = (rm_platter_t *)wakevirt; 521 wc_cpu_t *cpup = &(wp->wc_cpu); 522 dev_info_t *ppm; 523 int ret = 0; 524 power_req_t power_req; 525 char *str = "i_cpr_power_down"; 526 #if defined(__amd64) 527 /*LINTED*/ 528 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 529 #endif 530 extern int cpr_suspend_succeeded; 531 extern void kernel_wc_code(); 532 533 ASSERT(sleeptype == CPR_TORAM); 534 ASSERT(CPU->cpu_id == 0); 535 536 if ((ppm = PPM(ddi_root_node())) == NULL) { 537 PMD(PMD_SX, ("%s: root node not claimed\n", str)) 538 return (ENOTTY); 539 } 540 541 PMD(PMD_SX, ("Entering %s()\n", str)) 542 543 PT(PT_IC); 544 saved_intr = intr_clear(); 545 546 PT(PT_1to1); 547 /* Setup 1:1 mapping for wakeup code */ 548 map_wakeaddr_1to1(wakephys); 549 550 PMD(PMD_SX, ("ncpus=%d\n", ncpus)) 551 552 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n", 553 ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)), 554 WC_CODESIZE)) 555 556 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n", 557 (void *)wakevirt, (uint_t)wakephys)) 558 559 ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) < 560 WC_CODESIZE); 561 562 bzero(wakevirt, PAGESIZE); 563 564 /* Copy code to rm_platter */ 565 bcopy((caddr_t)wc_rm_start, wakevirt, 566 (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)); 567 568 prt_other_cpus(); 569 570 #if defined(__amd64) 571 572 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 573 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4())) 574 575 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 576 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 577 578 real_mode_platter->rm_cr4 = getcr4(); 579 real_mode_platter->rm_pdbr = getcr3(); 580 581 rmp_gdt_init(real_mode_platter); 582 583 /* 584 * Since the CPU needs to jump to protected mode using an identity 585 * mapped address, we need to calculate it here. 586 */ 587 real_mode_platter->rm_longmode64_addr = rm_platter_pa + 588 (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start); 589 590 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 591 (ulong_t)real_mode_platter->rm_cr4, getcr4())) 592 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 593 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 594 595 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 596 (ulong_t)real_mode_platter->rm_longmode64_addr)) 597 598 #endif 599 600 PT(PT_SC); 601 if (wc_save_context(cpup)) { 602 603 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state)); 604 if (ret != 0) 605 return (ret); 606 607 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state)); 608 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret)) 609 if (ret != 0) 610 return (ret); 611 612 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n", 613 (uint_t)wakephys, (void *)&kernel_wc_code)) 614 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n", 615 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr)) 616 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n", 617 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp, 618 cpup->wc_esp)) 619 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n", 620 (long)cpup->wc_cr0, (long)cpup->wc_cr3, 621 (long)cpup->wc_cr4)) 622 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, " 623 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es, 624 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs, 625 (long)cpup->wc_eflags)) 626 627 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 628 "kgbase=%lx\n", (void *)cpup->wc_gdt_base, 629 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base, 630 cpup->wc_idt_limit, (long)cpup->wc_ldt, 631 (long)cpup->wc_tr, (long)cpup->wc_kgsbase)) 632 633 gdt.base = cpup->wc_gdt_base; 634 gdt.limit = cpup->wc_gdt_limit; 635 636 #if defined(__amd64) 637 code_length = (uint32_t)((uintptr_t)wc_long_mode_64 - 638 (uintptr_t)wc_rm_start); 639 #else 640 code_length = 0; 641 #endif 642 643 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt); 644 645 #if defined(__amd64) 646 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 647 (ulong_t)wcpp->rm_cr4, getcr4())) 648 649 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 650 (ulong_t)wcpp->rm_pdbr, getcr3())) 651 652 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 653 (ulong_t)wcpp->rm_longmode64_addr)) 654 655 PMD(PMD_SX, 656 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n", 657 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64])) 658 #endif 659 660 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 661 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base, 662 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base, 663 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr, 664 (long)cpup->wc_kgsbase)) 665 666 power_req.request_type = PMR_PPM_ENTER_SX; 667 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 668 power_req.req.ppm_power_enter_sx_req.test_point = 669 cpr_test_point; 670 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys; 671 672 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str)) 673 PT(PT_PPMCTLOP); 674 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 675 &power_req, &ret); 676 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 677 678 /* 679 * If it works, we get control back to the else branch below 680 * If we get control back here, it didn't work. 681 * XXX return EINVAL here? 682 */ 683 684 unmap_wakeaddr_1to1(wakephys); 685 intr_restore(saved_intr); 686 687 return (ret); 688 } else { 689 cpr_suspend_succeeded = 1; 690 691 power_req.request_type = PMR_PPM_EXIT_SX; 692 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 693 694 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str)) 695 PT(PT_PPMCTLOP); 696 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 697 &power_req, &ret); 698 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 699 700 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state)); 701 /* 702 * the restore should never fail, if the saved suceeded 703 */ 704 ASSERT(ret == 0); 705 706 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state)); 707 708 /* 709 * Enable interrupts on boot cpu. 710 */ 711 ASSERT(CPU->cpu_id == i_cpr_bootcpuid()); 712 mutex_enter(&cpu_lock); 713 cpu_enable_intr(CPU); 714 mutex_exit(&cpu_lock); 715 716 PT(PT_INTRRESTORE); 717 intr_restore(saved_intr); 718 PT(PT_CPU); 719 720 return (ret); 721 } 722 } 723 724 /* 725 * Stop all other cpu's before halting or rebooting. We pause the cpu's 726 * instead of sending a cross call. 727 * Stolen from sun4/os/mp_states.c 728 */ 729 730 static int cpu_are_paused; /* sic */ 731 732 void 733 i_cpr_stop_other_cpus(void) 734 { 735 mutex_enter(&cpu_lock); 736 if (cpu_are_paused) { 737 mutex_exit(&cpu_lock); 738 return; 739 } 740 pause_cpus(NULL); 741 cpu_are_paused = 1; 742 743 mutex_exit(&cpu_lock); 744 } 745 746 int 747 i_cpr_is_supported(int sleeptype) 748 { 749 extern int cpr_supported_override; 750 extern int cpr_platform_enable; 751 extern int pm_S3_enabled; 752 753 if (sleeptype != CPR_TORAM) 754 return (0); 755 756 /* 757 * The next statement tests if a specific platform has turned off 758 * cpr support. 759 */ 760 if (cpr_supported_override) 761 return (0); 762 763 /* 764 * If a platform has specifically turned on cpr support ... 765 */ 766 if (cpr_platform_enable) 767 return (1); 768 769 return (pm_S3_enabled); 770 } 771 772 void 773 i_cpr_bitmap_cleanup(void) 774 { 775 } 776 777 void 778 i_cpr_free_memory_resources(void) 779 { 780 } 781 782 /* 783 * Needed only for S3 so far 784 */ 785 static int 786 i_cpr_platform_alloc(psm_state_request_t *req) 787 { 788 #ifdef DEBUG 789 char *str = "i_cpr_platform_alloc"; 790 #endif 791 792 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 793 794 if (psm_state == NULL) { 795 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 796 return (0); 797 } 798 799 req->psr_cmd = PSM_STATE_ALLOC; 800 return ((*psm_state)(req)); 801 } 802 803 /* 804 * Needed only for S3 so far 805 */ 806 static void 807 i_cpr_platform_free(psm_state_request_t *req) 808 { 809 #ifdef DEBUG 810 char *str = "i_cpr_platform_free"; 811 #endif 812 813 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 814 815 if (psm_state == NULL) { 816 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 817 return; 818 } 819 820 req->psr_cmd = PSM_STATE_FREE; 821 (void) (*psm_state)(req); 822 } 823 824 static int 825 i_cpr_save_apic(psm_state_request_t *req) 826 { 827 #ifdef DEBUG 828 char *str = "i_cpr_save_apic"; 829 #endif 830 831 if (psm_state == NULL) { 832 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 833 return (0); 834 } 835 836 req->psr_cmd = PSM_STATE_SAVE; 837 return ((*psm_state)(req)); 838 } 839 840 static int 841 i_cpr_restore_apic(psm_state_request_t *req) 842 { 843 #ifdef DEBUG 844 char *str = "i_cpr_restore_apic"; 845 #endif 846 847 if (psm_state == NULL) { 848 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 849 return (0); 850 } 851 852 req->psr_cmd = PSM_STATE_RESTORE; 853 return ((*psm_state)(req)); 854 } 855 856 857 /* stop lint complaining about offset not being used in 32bit mode */ 858 #if !defined(__amd64) 859 /*ARGSUSED*/ 860 #endif 861 static void 862 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt) 863 { 864 /*LINTED*/ 865 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 866 867 /* 868 * Fill up the real mode platter to make it easy for real mode code to 869 * kick it off. This area should really be one passed by boot to kernel 870 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also 871 * have identical physical and virtual address in paged mode. 872 */ 873 874 real_mode_platter->rm_pdbr = getcr3(); 875 real_mode_platter->rm_cpu = cpun; 876 real_mode_platter->rm_cr4 = cr4; 877 878 real_mode_platter->rm_gdt_base = gdt.base; 879 real_mode_platter->rm_gdt_lim = gdt.limit; 880 881 #if defined(__amd64) 882 if (getcr3() > 0xffffffffUL) 883 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" 884 "located above 4G in physical memory (@ 0x%llx).", 885 (unsigned long long)getcr3()); 886 887 /* 888 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY 889 * by code in real_mode_start(): 890 * 891 * GDT[0]: NULL selector 892 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1 893 * 894 * Clear the IDT as interrupts will be off and a limit of 0 will cause 895 * the CPU to triple fault and reset on an NMI, seemingly as reasonable 896 * a course of action as any other, though it may cause the entire 897 * platform to reset in some cases... 898 */ 899 real_mode_platter->rm_temp_gdt[0] = 0ULL; 900 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL; 901 902 real_mode_platter->rm_temp_gdt_lim = (ushort_t) 903 (sizeof (real_mode_platter->rm_temp_gdt) - 1); 904 real_mode_platter->rm_temp_gdt_base = rm_platter_pa + 905 offsetof(rm_platter_t, rm_temp_gdt); 906 907 real_mode_platter->rm_temp_idt_lim = 0; 908 real_mode_platter->rm_temp_idt_base = 0; 909 910 /* 911 * Since the CPU needs to jump to protected mode using an identity 912 * mapped address, we need to calculate it here. 913 */ 914 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset; 915 #endif /* __amd64 */ 916 917 /* return; */ 918 } 919 920 void 921 i_cpr_start_cpu(void) 922 { 923 924 struct cpu *cp = CPU; 925 926 char *str = "i_cpr_start_cpu"; 927 extern void init_cpu_syscall(struct cpu *cp); 928 929 PMD(PMD_SX, ("%s() called\n", str)) 930 931 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str, 932 cp->cpu_base_spl)) 933 934 mutex_enter(&cpu_lock); 935 if (cp == i_cpr_bootcpu()) { 936 mutex_exit(&cpu_lock); 937 PMD(PMD_SX, 938 ("%s() called on bootcpu nothing to do!\n", str)) 939 return; 940 } 941 mutex_exit(&cpu_lock); 942 943 /* 944 * We need to Sync PAT with cpu0's PAT. We have to do 945 * this with interrupts disabled. 946 */ 947 if (is_x86_feature(x86_featureset, X86FSET_PAT)) 948 pat_sync(); 949 950 /* 951 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register. 952 */ 953 if (fp_save_mech == FP_XSAVE) { 954 setup_xfem(); 955 } 956 957 /* 958 * Initialize this CPU's syscall handlers 959 */ 960 init_cpu_syscall(cp); 961 962 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl)) 963 964 /* 965 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or 966 * init_cpu_info(), since the work that they do is only needed to 967 * be done once at boot time 968 */ 969 970 971 mutex_enter(&cpu_lock); 972 CPUSET_ADD(procset, cp->cpu_id); 973 mutex_exit(&cpu_lock); 974 975 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str, 976 cp->cpu_base_spl)) 977 978 if (tsc_gethrtime_enable) { 979 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str)) 980 tsc_sync_slave(); 981 } 982 983 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str, 984 cp->cpu_id, cp->cpu_intr_actv)) 985 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str, 986 cp->cpu_base_spl)) 987 988 (void) spl0(); /* enable interrupts */ 989 990 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str, 991 cp->cpu_base_spl)) 992 993 /* 994 * Set up the CPU module for this CPU. This can't be done before 995 * this CPU is made CPU_READY, because we may (in heterogeneous systems) 996 * need to go load another CPU module. The act of attempting to load 997 * a module may trigger a cross-call, which will ASSERT unless this 998 * cpu is CPU_READY. 999 */ 1000 1001 /* 1002 * cmi already been init'd (during boot), so do not need to do it again 1003 */ 1004 #ifdef PM_REINITMCAONRESUME 1005 if (is_x86_feature(x86_featureset, X86FSET_MCA)) 1006 cmi_mca_init(); 1007 #endif 1008 1009 PMD(PMD_SX, ("%s() returning\n", str)) 1010 1011 /* return; */ 1012 } 1013 1014 void 1015 i_cpr_alloc_cpus(void) 1016 { 1017 char *str = "i_cpr_alloc_cpus"; 1018 1019 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id)) 1020 /* 1021 * we allocate this only when we actually need it to save on 1022 * kernel memory 1023 */ 1024 1025 if (wc_other_cpus == NULL) { 1026 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t), 1027 KM_SLEEP); 1028 } 1029 1030 } 1031 1032 void 1033 i_cpr_free_cpus(void) 1034 { 1035 int index; 1036 wc_cpu_t *wc_cpu; 1037 1038 if (wc_other_cpus != NULL) { 1039 for (index = 0; index < max_ncpus; index++) { 1040 wc_cpu = wc_other_cpus + index; 1041 if (wc_cpu->wc_saved_stack != NULL) { 1042 kmem_free(wc_cpu->wc_saved_stack, 1043 wc_cpu->wc_saved_stack_size); 1044 } 1045 } 1046 1047 kmem_free((void *) wc_other_cpus, 1048 max_ncpus * sizeof (wc_cpu_t)); 1049 wc_other_cpus = NULL; 1050 } 1051 } 1052 1053 /* 1054 * wrapper for acpica_ddi_save_resources() 1055 */ 1056 void 1057 i_cpr_save_configuration(dev_info_t *dip) 1058 { 1059 acpica_ddi_save_resources(dip); 1060 } 1061 1062 /* 1063 * wrapper for acpica_ddi_restore_resources() 1064 */ 1065 void 1066 i_cpr_restore_configuration(dev_info_t *dip) 1067 { 1068 acpica_ddi_restore_resources(dip); 1069 } 1070 1071 static int 1072 wait_for_set(cpuset_t *set, int who) 1073 { 1074 int delays; 1075 char *str = "wait_for_set"; 1076 1077 for (delays = 0; !CPU_IN_SET(*set, who); delays++) { 1078 if (delays == 500) { 1079 /* 1080 * After five seconds, things are probably 1081 * looking a bit bleak - explain the hang. 1082 */ 1083 cmn_err(CE_NOTE, "cpu%d: started, " 1084 "but not running in the kernel yet", who); 1085 PMD(PMD_SX, ("%s() %d cpu started " 1086 "but not running in the kernel yet\n", 1087 str, who)) 1088 } else if (delays > 2000) { 1089 /* 1090 * We waited at least 20 seconds, bail .. 1091 */ 1092 cmn_err(CE_WARN, "cpu%d: timed out", who); 1093 PMD(PMD_SX, ("%s() %d cpu timed out\n", 1094 str, who)) 1095 return (0); 1096 } 1097 1098 /* 1099 * wait at least 10ms, then check again.. 1100 */ 1101 drv_usecwait(10000); 1102 } 1103 1104 return (1); 1105 } 1106 1107 static void 1108 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu) 1109 { 1110 size_t stack_size; /* size of stack */ 1111 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1112 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1113 1114 stack_size = (size_t)end - (size_t)start; 1115 1116 if (wc_cpu->wc_saved_stack_size < stack_size) { 1117 if (wc_cpu->wc_saved_stack != NULL) { 1118 kmem_free(wc_cpu->wc_saved_stack, 1119 wc_cpu->wc_saved_stack_size); 1120 } 1121 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP); 1122 wc_cpu->wc_saved_stack_size = stack_size; 1123 } 1124 1125 bcopy(start, wc_cpu->wc_saved_stack, stack_size); 1126 } 1127 1128 void 1129 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack) 1130 { 1131 size_t stack_size; /* size of stack */ 1132 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1133 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1134 1135 stack_size = (size_t)end - (size_t)start; 1136 1137 bcopy(save_stack, start, stack_size); 1138 } 1139