1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Platform specific implementation code 27 * Currently only suspend to RAM is supported (ACPI S3) 28 */ 29 30 #define SUNDDI_IMPL 31 32 #include <sys/types.h> 33 #include <sys/promif.h> 34 #include <sys/prom_isa.h> 35 #include <sys/prom_plat.h> 36 #include <sys/cpuvar.h> 37 #include <sys/pte.h> 38 #include <vm/hat.h> 39 #include <vm/page.h> 40 #include <vm/as.h> 41 #include <sys/cpr.h> 42 #include <sys/kmem.h> 43 #include <sys/clock.h> 44 #include <sys/kmem.h> 45 #include <sys/panic.h> 46 #include <vm/seg_kmem.h> 47 #include <sys/cpu_module.h> 48 #include <sys/callb.h> 49 #include <sys/machsystm.h> 50 #include <sys/vmsystm.h> 51 #include <sys/systm.h> 52 #include <sys/archsystm.h> 53 #include <sys/stack.h> 54 #include <sys/fs/ufs_fs.h> 55 #include <sys/memlist.h> 56 #include <sys/bootconf.h> 57 #include <sys/thread.h> 58 #include <sys/x_call.h> 59 #include <sys/smp_impldefs.h> 60 #include <vm/vm_dep.h> 61 #include <sys/psm.h> 62 #include <sys/epm.h> 63 #include <sys/cpr_wakecode.h> 64 #include <sys/x86_archext.h> 65 #include <sys/reboot.h> 66 #include <sys/acpi/acpi.h> 67 #include <sys/acpica.h> 68 69 #define AFMT "%lx" 70 71 extern int flushes_require_xcalls; 72 extern cpuset_t cpu_ready_set; 73 74 #if defined(__amd64) 75 extern void *wc_long_mode_64(void); 76 #endif /* __amd64 */ 77 extern int tsc_gethrtime_enable; 78 extern void i_cpr_start_cpu(void); 79 80 ushort_t cpr_mach_type = CPR_MACHTYPE_X86; 81 void (*cpr_start_cpu_func)(void) = i_cpr_start_cpu; 82 83 static wc_cpu_t *wc_other_cpus = NULL; 84 static cpuset_t procset; 85 86 static void 87 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt); 88 89 static int i_cpr_platform_alloc(psm_state_request_t *req); 90 static void i_cpr_platform_free(psm_state_request_t *req); 91 static int i_cpr_save_apic(psm_state_request_t *req); 92 static int i_cpr_restore_apic(psm_state_request_t *req); 93 static int wait_for_set(cpuset_t *set, int who); 94 95 static void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu); 96 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack); 97 98 #ifdef STACK_GROWTH_DOWN 99 #define CPR_GET_STACK_START(t) ((t)->t_stkbase) 100 #define CPR_GET_STACK_END(t) ((t)->t_stk) 101 #else 102 #define CPR_GET_STACK_START(t) ((t)->t_stk) 103 #define CPR_GET_STACK_END(t) ((t)->t_stkbase) 104 #endif /* STACK_GROWTH_DOWN */ 105 106 /* 107 * restart paused slave cpus 108 */ 109 void 110 i_cpr_machdep_setup(void) 111 { 112 if (ncpus > 1) { 113 CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n")); 114 mutex_enter(&cpu_lock); 115 start_cpus(); 116 mutex_exit(&cpu_lock); 117 } 118 } 119 120 121 /* 122 * Stop all interrupt activities in the system 123 */ 124 void 125 i_cpr_stop_intr(void) 126 { 127 (void) spl7(); 128 } 129 130 /* 131 * Set machine up to take interrupts 132 */ 133 void 134 i_cpr_enable_intr(void) 135 { 136 (void) spl0(); 137 } 138 139 /* 140 * Save miscellaneous information which needs to be written to the 141 * state file. This information is required to re-initialize 142 * kernel/prom handshaking. 143 */ 144 void 145 i_cpr_save_machdep_info(void) 146 { 147 int notcalled = 0; 148 ASSERT(notcalled); 149 } 150 151 152 void 153 i_cpr_set_tbr(void) 154 { 155 } 156 157 158 processorid_t 159 i_cpr_bootcpuid(void) 160 { 161 return (0); 162 } 163 164 /* 165 * cpu0 should contain bootcpu info 166 */ 167 cpu_t * 168 i_cpr_bootcpu(void) 169 { 170 ASSERT(MUTEX_HELD(&cpu_lock)); 171 172 return (cpu_get(i_cpr_bootcpuid())); 173 } 174 175 /* 176 * Save context for the specified CPU 177 */ 178 void * 179 i_cpr_save_context(void *arg) 180 { 181 long index = (long)arg; 182 psm_state_request_t *papic_state; 183 int resuming; 184 int ret; 185 wc_cpu_t *wc_cpu = wc_other_cpus + index; 186 187 PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index)) 188 189 ASSERT(index < NCPU); 190 191 papic_state = &(wc_cpu)->wc_apic_state; 192 193 ret = i_cpr_platform_alloc(papic_state); 194 ASSERT(ret == 0); 195 196 ret = i_cpr_save_apic(papic_state); 197 ASSERT(ret == 0); 198 199 i_cpr_save_stack(curthread, wc_cpu); 200 201 /* 202 * wc_save_context returns twice, once when susending and 203 * once when resuming, wc_save_context() returns 0 when 204 * suspending and non-zero upon resume 205 */ 206 resuming = (wc_save_context(wc_cpu) == 0); 207 208 /* 209 * do NOT call any functions after this point, because doing so 210 * will modify the stack that we are running on 211 */ 212 213 if (resuming) { 214 215 ret = i_cpr_restore_apic(papic_state); 216 ASSERT(ret == 0); 217 218 i_cpr_platform_free(papic_state); 219 220 /* 221 * Enable interrupts on this cpu. 222 * Do not bind interrupts to this CPU's local APIC until 223 * the CPU is ready to receive interrupts. 224 */ 225 ASSERT(CPU->cpu_id != i_cpr_bootcpuid()); 226 mutex_enter(&cpu_lock); 227 cpu_enable_intr(CPU); 228 mutex_exit(&cpu_lock); 229 230 /* 231 * Setting the bit in cpu_ready_set must be the last operation 232 * in processor initialization; the boot CPU will continue to 233 * boot once it sees this bit set for all active CPUs. 234 */ 235 CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id); 236 237 PMD(PMD_SX, 238 ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n", 239 CPU->cpu_id)) 240 } else { 241 /* 242 * Disable interrupts on this CPU so that PSM knows not to bind 243 * interrupts here on resume until the CPU has executed 244 * cpu_enable_intr() (above) in the resume path. 245 * We explicitly do not grab cpu_lock here because at this point 246 * in the suspend process, the boot cpu owns cpu_lock and all 247 * other cpus are also executing in the pause thread (only 248 * modifying their respective CPU structure). 249 */ 250 (void) cpu_disable_intr(CPU); 251 } 252 253 PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n", 254 resuming)) 255 256 return (NULL); 257 } 258 259 static ushort_t *warm_reset_vector = NULL; 260 261 static ushort_t * 262 map_warm_reset_vector() 263 { 264 /*LINTED*/ 265 if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR, 266 sizeof (ushort_t *), PROT_READ|PROT_WRITE))) 267 return (NULL); 268 269 /* 270 * setup secondary cpu bios boot up vector 271 */ 272 *warm_reset_vector = (ushort_t)((caddr_t) 273 /*LINTED*/ 274 ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va 275 + ((ulong_t)rm_platter_va & 0xf)); 276 warm_reset_vector++; 277 *warm_reset_vector = (ushort_t)(rm_platter_pa >> 4); 278 279 --warm_reset_vector; 280 return (warm_reset_vector); 281 } 282 283 void 284 i_cpr_pre_resume_cpus() 285 { 286 /* 287 * this is a cut down version of start_other_cpus() 288 * just do the initialization to wake the other cpus 289 */ 290 unsigned who; 291 int boot_cpuid = i_cpr_bootcpuid(); 292 uint32_t code_length = 0; 293 caddr_t wakevirt = rm_platter_va; 294 /*LINTED*/ 295 wakecode_t *wp = (wakecode_t *)wakevirt; 296 char *str = "i_cpr_pre_resume_cpus"; 297 extern int get_tsc_ready(); 298 int err; 299 300 /*LINTED*/ 301 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 302 303 /* 304 * If startup wasn't able to find a page under 1M, we cannot 305 * proceed. 306 */ 307 if (rm_platter_va == 0) { 308 cmn_err(CE_WARN, "Cannot suspend the system because no " 309 "memory below 1M could be found for processor startup"); 310 return; 311 } 312 313 /* 314 * Copy the real mode code at "real_mode_start" to the 315 * page at rm_platter_va. 316 */ 317 warm_reset_vector = map_warm_reset_vector(); 318 if (warm_reset_vector == NULL) { 319 PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n")) 320 return; 321 } 322 323 flushes_require_xcalls = 1; 324 325 /* 326 * We lock our affinity to the master CPU to ensure that all slave CPUs 327 * do their TSC syncs with the same CPU. 328 */ 329 330 affinity_set(CPU_CURRENT); 331 332 /* 333 * Mark the boot cpu as being ready and in the procset, since we are 334 * running on that cpu. 335 */ 336 CPUSET_ONLY(cpu_ready_set, boot_cpuid); 337 CPUSET_ONLY(procset, boot_cpuid); 338 339 for (who = 0; who < max_ncpus; who++) { 340 341 wc_cpu_t *cpup = wc_other_cpus + who; 342 wc_desctbr_t gdt; 343 344 if (who == boot_cpuid) 345 continue; 346 347 if (!CPU_IN_SET(mp_cpus, who)) 348 continue; 349 350 PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who)) 351 352 bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t)); 353 354 gdt.base = cpup->wc_gdt_base; 355 gdt.limit = cpup->wc_gdt_limit; 356 357 #if defined(__amd64) 358 code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start; 359 #else 360 code_length = 0; 361 #endif 362 363 init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt); 364 365 mutex_enter(&cpu_lock); 366 err = mach_cpuid_start(who, rm_platter_va); 367 mutex_exit(&cpu_lock); 368 if (err != 0) { 369 cmn_err(CE_WARN, "cpu%d: failed to start during " 370 "suspend/resume error %d", who, err); 371 continue; 372 } 373 374 PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who)) 375 376 if (!wait_for_set(&procset, who)) 377 continue; 378 379 PMD(PMD_SX, ("%s() %d cpu started\n", str, who)) 380 381 PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready())) 382 383 if (tsc_gethrtime_enable) { 384 PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str)) 385 tsc_sync_master(who); 386 } 387 388 PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str, 389 who)) 390 /* 391 * Wait for cpu to declare that it is ready, we want the 392 * cpus to start serially instead of in parallel, so that 393 * they do not contend with each other in wc_rm_start() 394 */ 395 if (!wait_for_set(&cpu_ready_set, who)) 396 continue; 397 398 /* 399 * do not need to re-initialize dtrace using dtrace_cpu_init 400 * function 401 */ 402 PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who)) 403 } 404 405 affinity_clear(); 406 407 PMD(PMD_SX, ("%s() all cpus now ready\n", str)) 408 409 } 410 411 static void 412 unmap_warm_reset_vector(ushort_t *warm_reset_vector) 413 { 414 psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *)); 415 } 416 417 /* 418 * We need to setup a 1:1 (virtual to physical) mapping for the 419 * page containing the wakeup code. 420 */ 421 static struct as *save_as; /* when switching to kas */ 422 423 static void 424 unmap_wakeaddr_1to1(uint64_t wakephys) 425 { 426 uintptr_t wp = (uintptr_t)wakephys; 427 hat_setup(save_as->a_hat, 0); /* switch back from kernel hat */ 428 hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD); 429 } 430 431 void 432 i_cpr_post_resume_cpus() 433 { 434 uint64_t wakephys = rm_platter_pa; 435 436 if (warm_reset_vector != NULL) 437 unmap_warm_reset_vector(warm_reset_vector); 438 439 hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE, 440 HAT_UNLOAD); 441 442 /* 443 * cmi_post_mpstartup() is only required upon boot not upon 444 * resume from RAM 445 */ 446 447 PT(PT_UNDO1to1); 448 /* Tear down 1:1 mapping for wakeup code */ 449 unmap_wakeaddr_1to1(wakephys); 450 } 451 452 /* ARGSUSED */ 453 void 454 i_cpr_handle_xc(int flag) 455 { 456 } 457 458 int 459 i_cpr_reusable_supported(void) 460 { 461 return (0); 462 } 463 static void 464 map_wakeaddr_1to1(uint64_t wakephys) 465 { 466 uintptr_t wp = (uintptr_t)wakephys; 467 hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys), 468 (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC), 469 HAT_LOAD); 470 save_as = curthread->t_procp->p_as; 471 hat_setup(kas.a_hat, 0); /* switch to kernel-only hat */ 472 } 473 474 475 void 476 prt_other_cpus() 477 { 478 int who; 479 480 if (ncpus == 1) { 481 PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for " 482 "uniprocessor machine\n")) 483 return; 484 } 485 486 for (who = 0; who < max_ncpus; who++) { 487 488 wc_cpu_t *cpup = wc_other_cpus + who; 489 490 if (!CPU_IN_SET(mp_cpus, who)) 491 continue; 492 493 PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, " 494 "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase=" 495 AFMT ", sp=%lx\n", who, 496 (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit, 497 (void *)cpup->wc_idt_base, cpup->wc_idt_limit, 498 (long)cpup->wc_ldt, (long)cpup->wc_tr, 499 (long)cpup->wc_kgsbase, (long)cpup->wc_rsp)) 500 } 501 } 502 503 /* 504 * Power down the system. 505 */ 506 int 507 i_cpr_power_down(int sleeptype) 508 { 509 caddr_t wakevirt = rm_platter_va; 510 uint64_t wakephys = rm_platter_pa; 511 ulong_t saved_intr; 512 uint32_t code_length = 0; 513 wc_desctbr_t gdt; 514 /*LINTED*/ 515 wakecode_t *wp = (wakecode_t *)wakevirt; 516 /*LINTED*/ 517 rm_platter_t *wcpp = (rm_platter_t *)wakevirt; 518 wc_cpu_t *cpup = &(wp->wc_cpu); 519 dev_info_t *ppm; 520 int ret = 0; 521 power_req_t power_req; 522 char *str = "i_cpr_power_down"; 523 #if defined(__amd64) 524 /*LINTED*/ 525 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 526 #endif 527 extern int cpr_suspend_succeeded; 528 extern void kernel_wc_code(); 529 530 ASSERT(sleeptype == CPR_TORAM); 531 ASSERT(CPU->cpu_id == 0); 532 533 if ((ppm = PPM(ddi_root_node())) == NULL) { 534 PMD(PMD_SX, ("%s: root node not claimed\n", str)) 535 return (ENOTTY); 536 } 537 538 PMD(PMD_SX, ("Entering %s()\n", str)) 539 540 PT(PT_IC); 541 saved_intr = intr_clear(); 542 543 PT(PT_1to1); 544 /* Setup 1:1 mapping for wakeup code */ 545 map_wakeaddr_1to1(wakephys); 546 547 PMD(PMD_SX, ("ncpus=%d\n", ncpus)) 548 549 PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n", 550 ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE)) 551 552 PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n", 553 (void *)wakevirt, (uint_t)wakephys)) 554 555 ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) < 556 WC_CODESIZE); 557 558 bzero(wakevirt, PAGESIZE); 559 560 /* Copy code to rm_platter */ 561 bcopy((caddr_t)wc_rm_start, wakevirt, 562 (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)); 563 564 prt_other_cpus(); 565 566 #if defined(__amd64) 567 568 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 569 (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4())) 570 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 571 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 572 573 real_mode_platter->rm_cr4 = getcr4(); 574 real_mode_platter->rm_pdbr = getcr3(); 575 576 rmp_gdt_init(real_mode_platter); 577 578 /* 579 * Since the CPU needs to jump to protected mode using an identity 580 * mapped address, we need to calculate it here. 581 */ 582 real_mode_platter->rm_longmode64_addr = rm_platter_pa + 583 ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start); 584 585 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 586 (ulong_t)real_mode_platter->rm_cr4, getcr4())) 587 588 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 589 (ulong_t)real_mode_platter->rm_pdbr, getcr3())) 590 591 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 592 (ulong_t)real_mode_platter->rm_longmode64_addr)) 593 594 #endif 595 596 PT(PT_SC); 597 if (wc_save_context(cpup)) { 598 599 ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state)); 600 if (ret != 0) 601 return (ret); 602 603 ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state)); 604 PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret)) 605 if (ret != 0) 606 return (ret); 607 608 PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n", 609 (uint_t)wakephys, (void *)&kernel_wc_code)) 610 PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n", 611 (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr)) 612 PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n", 613 cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp, 614 cpup->wc_esp)) 615 PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n", 616 (long)cpup->wc_cr0, (long)cpup->wc_cr3, 617 (long)cpup->wc_cr4)) 618 PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, " 619 "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es, 620 cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs, 621 (long)cpup->wc_eflags)) 622 623 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 624 "kgbase=%lx\n", (void *)cpup->wc_gdt_base, 625 cpup->wc_gdt_limit, (void *)cpup->wc_idt_base, 626 cpup->wc_idt_limit, (long)cpup->wc_ldt, 627 (long)cpup->wc_tr, (long)cpup->wc_kgsbase)) 628 629 gdt.base = cpup->wc_gdt_base; 630 gdt.limit = cpup->wc_gdt_limit; 631 632 #if defined(__amd64) 633 code_length = (uint32_t)wc_long_mode_64 - 634 (uint32_t)wc_rm_start; 635 #else 636 code_length = 0; 637 #endif 638 639 init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt); 640 641 #if defined(__amd64) 642 PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n", 643 (ulong_t)wcpp->rm_cr4, getcr4())) 644 645 PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n", 646 (ulong_t)wcpp->rm_pdbr, getcr3())) 647 648 PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n", 649 (ulong_t)wcpp->rm_longmode64_addr)) 650 651 PMD(PMD_SX, 652 ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n", 653 (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64])) 654 #endif 655 656 PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, " 657 "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base, 658 wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base, 659 wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr, 660 (long)cpup->wc_kgsbase)) 661 662 power_req.request_type = PMR_PPM_ENTER_SX; 663 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 664 power_req.req.ppm_power_enter_sx_req.test_point = 665 cpr_test_point; 666 power_req.req.ppm_power_enter_sx_req.wakephys = wakephys; 667 668 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str)) 669 PT(PT_PPMCTLOP); 670 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 671 &power_req, &ret); 672 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 673 674 /* 675 * If it works, we get control back to the else branch below 676 * If we get control back here, it didn't work. 677 * XXX return EINVAL here? 678 */ 679 680 unmap_wakeaddr_1to1(wakephys); 681 intr_restore(saved_intr); 682 683 return (ret); 684 } else { 685 cpr_suspend_succeeded = 1; 686 687 power_req.request_type = PMR_PPM_EXIT_SX; 688 power_req.req.ppm_power_enter_sx_req.sx_state = S3; 689 690 PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str)) 691 PT(PT_PPMCTLOP); 692 (void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER, 693 &power_req, &ret); 694 PMD(PMD_SX, ("%s: returns %d\n", str, ret)) 695 696 ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state)); 697 /* 698 * the restore should never fail, if the saved suceeded 699 */ 700 ASSERT(ret == 0); 701 702 i_cpr_platform_free(&(wc_other_cpus->wc_apic_state)); 703 704 /* 705 * Enable interrupts on boot cpu. 706 */ 707 ASSERT(CPU->cpu_id == i_cpr_bootcpuid()); 708 mutex_enter(&cpu_lock); 709 cpu_enable_intr(CPU); 710 mutex_exit(&cpu_lock); 711 712 PT(PT_INTRRESTORE); 713 intr_restore(saved_intr); 714 PT(PT_CPU); 715 716 return (ret); 717 } 718 } 719 720 /* 721 * Stop all other cpu's before halting or rebooting. We pause the cpu's 722 * instead of sending a cross call. 723 * Stolen from sun4/os/mp_states.c 724 */ 725 726 static int cpu_are_paused; /* sic */ 727 728 void 729 i_cpr_stop_other_cpus(void) 730 { 731 mutex_enter(&cpu_lock); 732 if (cpu_are_paused) { 733 mutex_exit(&cpu_lock); 734 return; 735 } 736 pause_cpus(NULL); 737 cpu_are_paused = 1; 738 739 mutex_exit(&cpu_lock); 740 } 741 742 int 743 i_cpr_is_supported(int sleeptype) 744 { 745 extern int cpr_supported_override; 746 extern int cpr_platform_enable; 747 extern int pm_S3_enabled; 748 749 if (sleeptype != CPR_TORAM) 750 return (0); 751 752 /* 753 * The next statement tests if a specific platform has turned off 754 * cpr support. 755 */ 756 if (cpr_supported_override) 757 return (0); 758 759 /* 760 * If a platform has specifically turned on cpr support ... 761 */ 762 if (cpr_platform_enable) 763 return (1); 764 765 return (pm_S3_enabled); 766 } 767 768 void 769 i_cpr_bitmap_cleanup(void) 770 { 771 } 772 773 void 774 i_cpr_free_memory_resources(void) 775 { 776 } 777 778 /* 779 * Needed only for S3 so far 780 */ 781 static int 782 i_cpr_platform_alloc(psm_state_request_t *req) 783 { 784 #ifdef DEBUG 785 char *str = "i_cpr_platform_alloc"; 786 #endif 787 788 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 789 790 if (psm_state == NULL) { 791 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 792 return (0); 793 } 794 795 req->psr_cmd = PSM_STATE_ALLOC; 796 return ((*psm_state)(req)); 797 } 798 799 /* 800 * Needed only for S3 so far 801 */ 802 static void 803 i_cpr_platform_free(psm_state_request_t *req) 804 { 805 #ifdef DEBUG 806 char *str = "i_cpr_platform_free"; 807 #endif 808 809 PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req)) 810 811 if (psm_state == NULL) { 812 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 813 return; 814 } 815 816 req->psr_cmd = PSM_STATE_FREE; 817 (void) (*psm_state)(req); 818 } 819 820 static int 821 i_cpr_save_apic(psm_state_request_t *req) 822 { 823 #ifdef DEBUG 824 char *str = "i_cpr_save_apic"; 825 #endif 826 827 if (psm_state == NULL) { 828 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 829 return (0); 830 } 831 832 req->psr_cmd = PSM_STATE_SAVE; 833 return ((*psm_state)(req)); 834 } 835 836 static int 837 i_cpr_restore_apic(psm_state_request_t *req) 838 { 839 #ifdef DEBUG 840 char *str = "i_cpr_restore_apic"; 841 #endif 842 843 if (psm_state == NULL) { 844 PMD(PMD_SX, ("%s() : psm_state == NULL\n", str)) 845 return (0); 846 } 847 848 req->psr_cmd = PSM_STATE_RESTORE; 849 return ((*psm_state)(req)); 850 } 851 852 853 /* stop lint complaining about offset not being used in 32bit mode */ 854 #if !defined(__amd64) 855 /*ARGSUSED*/ 856 #endif 857 static void 858 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt) 859 { 860 /*LINTED*/ 861 rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va; 862 863 /* 864 * Fill up the real mode platter to make it easy for real mode code to 865 * kick it off. This area should really be one passed by boot to kernel 866 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also 867 * have identical physical and virtual address in paged mode. 868 */ 869 870 real_mode_platter->rm_pdbr = getcr3(); 871 real_mode_platter->rm_cpu = cpun; 872 real_mode_platter->rm_cr4 = cr4; 873 874 real_mode_platter->rm_gdt_base = gdt.base; 875 real_mode_platter->rm_gdt_lim = gdt.limit; 876 877 #if defined(__amd64) 878 if (getcr3() > 0xffffffffUL) 879 panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" 880 "located above 4G in physical memory (@ 0x%llx).", 881 (unsigned long long)getcr3()); 882 883 /* 884 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY 885 * by code in real_mode_start(): 886 * 887 * GDT[0]: NULL selector 888 * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1 889 * 890 * Clear the IDT as interrupts will be off and a limit of 0 will cause 891 * the CPU to triple fault and reset on an NMI, seemingly as reasonable 892 * a course of action as any other, though it may cause the entire 893 * platform to reset in some cases... 894 */ 895 real_mode_platter->rm_temp_gdt[0] = 0ULL; 896 real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL; 897 898 real_mode_platter->rm_temp_gdt_lim = (ushort_t) 899 (sizeof (real_mode_platter->rm_temp_gdt) - 1); 900 real_mode_platter->rm_temp_gdt_base = rm_platter_pa + 901 (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt); 902 903 real_mode_platter->rm_temp_idt_lim = 0; 904 real_mode_platter->rm_temp_idt_base = 0; 905 906 /* 907 * Since the CPU needs to jump to protected mode using an identity 908 * mapped address, we need to calculate it here. 909 */ 910 real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset; 911 #endif /* __amd64 */ 912 913 /* return; */ 914 } 915 916 void 917 i_cpr_start_cpu(void) 918 { 919 920 struct cpu *cp = CPU; 921 922 char *str = "i_cpr_start_cpu"; 923 extern void init_cpu_syscall(struct cpu *cp); 924 925 PMD(PMD_SX, ("%s() called\n", str)) 926 927 PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str, 928 cp->cpu_base_spl)) 929 930 mutex_enter(&cpu_lock); 931 if (cp == i_cpr_bootcpu()) { 932 mutex_exit(&cpu_lock); 933 PMD(PMD_SX, 934 ("%s() called on bootcpu nothing to do!\n", str)) 935 return; 936 } 937 mutex_exit(&cpu_lock); 938 939 /* 940 * We need to Sync PAT with cpu0's PAT. We have to do 941 * this with interrupts disabled. 942 */ 943 if (is_x86_feature(x86_featureset, X86FSET_PAT)) 944 pat_sync(); 945 946 /* 947 * Initialize this CPU's syscall handlers 948 */ 949 init_cpu_syscall(cp); 950 951 PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl)) 952 953 /* 954 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or 955 * init_cpu_info(), since the work that they do is only needed to 956 * be done once at boot time 957 */ 958 959 960 mutex_enter(&cpu_lock); 961 CPUSET_ADD(procset, cp->cpu_id); 962 mutex_exit(&cpu_lock); 963 964 PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str, 965 cp->cpu_base_spl)) 966 967 if (tsc_gethrtime_enable) { 968 PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str)) 969 tsc_sync_slave(); 970 } 971 972 PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str, 973 cp->cpu_id, cp->cpu_intr_actv)) 974 PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str, 975 cp->cpu_base_spl)) 976 977 (void) spl0(); /* enable interrupts */ 978 979 PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str, 980 cp->cpu_base_spl)) 981 982 /* 983 * Set up the CPU module for this CPU. This can't be done before 984 * this CPU is made CPU_READY, because we may (in heterogeneous systems) 985 * need to go load another CPU module. The act of attempting to load 986 * a module may trigger a cross-call, which will ASSERT unless this 987 * cpu is CPU_READY. 988 */ 989 990 /* 991 * cmi already been init'd (during boot), so do not need to do it again 992 */ 993 #ifdef PM_REINITMCAONRESUME 994 if (is_x86_feature(x86_featureset, X86FSET_MCA)) 995 cmi_mca_init(); 996 #endif 997 998 PMD(PMD_SX, ("%s() returning\n", str)) 999 1000 /* return; */ 1001 } 1002 1003 void 1004 i_cpr_alloc_cpus(void) 1005 { 1006 char *str = "i_cpr_alloc_cpus"; 1007 1008 PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id)) 1009 /* 1010 * we allocate this only when we actually need it to save on 1011 * kernel memory 1012 */ 1013 1014 if (wc_other_cpus == NULL) { 1015 wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t), 1016 KM_SLEEP); 1017 } 1018 1019 } 1020 1021 void 1022 i_cpr_free_cpus(void) 1023 { 1024 int index; 1025 wc_cpu_t *wc_cpu; 1026 1027 if (wc_other_cpus != NULL) { 1028 for (index = 0; index < max_ncpus; index++) { 1029 wc_cpu = wc_other_cpus + index; 1030 if (wc_cpu->wc_saved_stack != NULL) { 1031 kmem_free(wc_cpu->wc_saved_stack, 1032 wc_cpu->wc_saved_stack_size); 1033 } 1034 } 1035 1036 kmem_free((void *) wc_other_cpus, 1037 max_ncpus * sizeof (wc_cpu_t)); 1038 wc_other_cpus = NULL; 1039 } 1040 } 1041 1042 /* 1043 * wrapper for acpica_ddi_save_resources() 1044 */ 1045 void 1046 i_cpr_save_configuration(dev_info_t *dip) 1047 { 1048 acpica_ddi_save_resources(dip); 1049 } 1050 1051 /* 1052 * wrapper for acpica_ddi_restore_resources() 1053 */ 1054 void 1055 i_cpr_restore_configuration(dev_info_t *dip) 1056 { 1057 acpica_ddi_restore_resources(dip); 1058 } 1059 1060 static int 1061 wait_for_set(cpuset_t *set, int who) 1062 { 1063 int delays; 1064 char *str = "wait_for_set"; 1065 1066 for (delays = 0; !CPU_IN_SET(*set, who); delays++) { 1067 if (delays == 500) { 1068 /* 1069 * After five seconds, things are probably 1070 * looking a bit bleak - explain the hang. 1071 */ 1072 cmn_err(CE_NOTE, "cpu%d: started, " 1073 "but not running in the kernel yet", who); 1074 PMD(PMD_SX, ("%s() %d cpu started " 1075 "but not running in the kernel yet\n", 1076 str, who)) 1077 } else if (delays > 2000) { 1078 /* 1079 * We waited at least 20 seconds, bail .. 1080 */ 1081 cmn_err(CE_WARN, "cpu%d: timed out", who); 1082 PMD(PMD_SX, ("%s() %d cpu timed out\n", 1083 str, who)) 1084 return (0); 1085 } 1086 1087 /* 1088 * wait at least 10ms, then check again.. 1089 */ 1090 drv_usecwait(10000); 1091 } 1092 1093 return (1); 1094 } 1095 1096 static void 1097 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu) 1098 { 1099 size_t stack_size; /* size of stack */ 1100 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1101 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1102 1103 stack_size = (size_t)end - (size_t)start; 1104 1105 if (wc_cpu->wc_saved_stack_size < stack_size) { 1106 if (wc_cpu->wc_saved_stack != NULL) { 1107 kmem_free(wc_cpu->wc_saved_stack, 1108 wc_cpu->wc_saved_stack_size); 1109 } 1110 wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP); 1111 wc_cpu->wc_saved_stack_size = stack_size; 1112 } 1113 1114 bcopy(start, wc_cpu->wc_saved_stack, stack_size); 1115 } 1116 1117 void 1118 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack) 1119 { 1120 size_t stack_size; /* size of stack */ 1121 caddr_t start = CPR_GET_STACK_START(t); /* stack start */ 1122 caddr_t end = CPR_GET_STACK_END(t); /* stack end */ 1123 1124 stack_size = (size_t)end - (size_t)start; 1125 1126 bcopy(save_stack, start, stack_size); 1127 } 1128