1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 30 /* 31 * This module contains the guts of checkpoint-resume mechanism. 32 * All code in this module is platform independent. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/errno.h> 37 #include <sys/callb.h> 38 #include <sys/processor.h> 39 #include <sys/machsystm.h> 40 #include <sys/clock.h> 41 #include <sys/vfs.h> 42 #include <sys/kmem.h> 43 #include <nfs/lm.h> 44 #include <sys/systm.h> 45 #include <sys/cpr.h> 46 #include <sys/bootconf.h> 47 #include <sys/cyclic.h> 48 #include <sys/filio.h> 49 #include <sys/fs/ufs_filio.h> 50 #include <sys/epm.h> 51 #include <sys/modctl.h> 52 #include <sys/reboot.h> 53 #include <sys/kdi.h> 54 #include <sys/promif.h> 55 56 extern struct cpr_terminator cpr_term; 57 58 extern int cpr_alloc_statefile(int); 59 extern void cpr_start_kernel_threads(void); 60 extern void cpr_abbreviate_devpath(char *, char *); 61 extern void cpr_convert_promtime(cpr_time_t *); 62 extern void cpr_send_notice(void); 63 extern void cpr_set_bitmap_size(void); 64 extern void cpr_stat_init(); 65 extern void cpr_statef_close(void); 66 extern void flush_windows(void); 67 68 extern int pm_powering_down; 69 70 static int cpr_suspend(void); 71 static int cpr_resume(void); 72 static void cpr_suspend_init(void); 73 74 cpr_time_t wholecycle_tv; 75 int cpr_suspend_succeeded; 76 pfn_t curthreadpfn; 77 int curthreadremapped; 78 79 /* 80 * save or restore abort_enable; this prevents a drop 81 * to kadb or prom during cpr_resume_devices() when 82 * there is no kbd present; see abort_sequence_enter() 83 */ 84 static void 85 cpr_sae(int stash) 86 { 87 static int saved_ae = -1; 88 89 if (stash) { 90 saved_ae = abort_enable; 91 abort_enable = 0; 92 } else if (saved_ae != -1) { 93 abort_enable = saved_ae; 94 saved_ae = -1; 95 } 96 } 97 98 99 /* 100 * The main switching point for cpr, this routine starts the ckpt 101 * and state file saving routines; on resume the control is 102 * returned back to here and it then calls the resume routine. 103 */ 104 int 105 cpr_main(void) 106 { 107 label_t saveq = ttolwp(curthread)->lwp_qsav; 108 int rc; 109 110 if (rc = cpr_default_setup(1)) 111 return (rc); 112 113 /* 114 * Remember where we are for resume 115 */ 116 if (!setjmp(&ttolwp(curthread)->lwp_qsav)) { 117 /* 118 * try to checkpoint the system, if failed return back 119 * to userland, otherwise power off. 120 */ 121 rc = cpr_suspend(); 122 if (rc || cpr_reusable_mode) { 123 /* 124 * We don't really want to go down, or 125 * something went wrong in suspend, do what we can 126 * to put the system back to an operable state then 127 * return back to userland. 128 */ 129 (void) cpr_resume(); 130 } 131 } else { 132 /* 133 * This is the resumed side of longjmp, restore the previous 134 * longjmp pointer if there is one so this will be transparent 135 * to the world. 136 */ 137 ttolwp(curthread)->lwp_qsav = saveq; 138 CPR->c_flags &= ~C_SUSPENDING; 139 CPR->c_flags |= C_RESUMING; 140 141 /* 142 * resume the system back to the original state 143 */ 144 rc = cpr_resume(); 145 } 146 147 (void) cpr_default_setup(0); 148 149 return (rc); 150 } 151 152 153 /* 154 * check/disable or re-enable UFS logging 155 */ 156 static void 157 cpr_log_status(int enable, int *svstat, vnode_t *vp) 158 { 159 int cmd, status, error; 160 char *str, *able; 161 fiolog_t fl; 162 refstr_t *mntpt; 163 164 str = "cpr_log_status"; 165 bzero(&fl, sizeof (fl)); 166 fl.error = FIOLOG_ENONE; 167 168 /* 169 * when disabling, first get and save logging status (0 or 1) 170 */ 171 if (enable == 0) { 172 if (error = VOP_IOCTL(vp, _FIOISLOG, 173 (uintptr_t)&status, FKIOCTL, CRED(), NULL)) { 174 mntpt = vfs_getmntpoint(vp->v_vfsp); 175 errp("%s: \"%s\", cant get logging status, error %d\n", 176 str, refstr_value(mntpt), error); 177 refstr_rele(mntpt); 178 return; 179 } 180 *svstat = status; 181 DEBUG5( 182 { 183 mntpt = vfs_getmntpoint(vp->v_vfsp); 184 errp("%s: \"%s\", logging status = %d\n", 185 str, refstr_value(mntpt), status); 186 refstr_rele(mntpt); 187 }); 188 189 able = "disable"; 190 cmd = _FIOLOGDISABLE; 191 } else { 192 able = "enable"; 193 cmd = _FIOLOGENABLE; 194 } 195 196 /* 197 * disable or re-enable logging when the saved status is 1 198 */ 199 if (*svstat == 1) { 200 error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl, 201 FKIOCTL, CRED(), NULL); 202 if (error) { 203 mntpt = vfs_getmntpoint(vp->v_vfsp); 204 errp("%s: \"%s\", cant %s logging, error %d\n", 205 str, refstr_value(mntpt), able, error); 206 refstr_rele(mntpt); 207 } else { 208 DEBUG5( 209 { 210 mntpt = vfs_getmntpoint(vp->v_vfsp); 211 errp("%s: \"%s\", logging is now %sd\n", 212 str, refstr_value(mntpt), able); 213 refstr_rele(mntpt); 214 }); 215 } 216 } 217 218 /* 219 * when enabling logging, reset the saved status 220 * to unknown for next time 221 */ 222 if (enable) 223 *svstat = -1; 224 } 225 226 227 /* 228 * enable/disable UFS logging on filesystems containing cpr_default_path 229 * and cpr statefile. since the statefile can be on any fs, that fs 230 * needs to be handled separately. this routine and cprboot expect that 231 * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs. cprboot 232 * is loaded from the device with rootfs and uses the same device to open 233 * both CPR_CONFIG and CPR_DEFAULT (see common/support.c). moving either 234 * file outside of rootfs would cause errors during cprboot, plus cpr and 235 * fsck problems with the new fs if logging were enabled. 236 */ 237 static int 238 cpr_ufs_logging(int enable) 239 { 240 static int def_status = -1, sf_status = -1; 241 struct vfs *vfsp; 242 char *fname; 243 vnode_t *vp; 244 int error; 245 246 if (cpr_reusable_mode) 247 return (0); 248 249 if (error = cpr_open_deffile(FREAD, &vp)) 250 return (error); 251 cpr_log_status(enable, &def_status, vp); 252 vfsp = vp->v_vfsp; 253 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED()); 254 VN_RELE(vp); 255 256 fname = cpr_build_statefile_path(); 257 if (fname == NULL) 258 return (ENOENT); 259 if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE, 260 0600, &vp, CRCREAT, 0)) { 261 errp("cpr_ufs_logging: cant open/create \"%s\", error %d\n", 262 fname, error); 263 return (error); 264 } 265 266 /* 267 * check logging status for the statefile if it resides 268 * on a different fs and the type is a regular file 269 */ 270 if (vp->v_vfsp != vfsp && vp->v_type == VREG) 271 cpr_log_status(enable, &sf_status, vp); 272 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED()); 273 VN_RELE(vp); 274 275 return (0); 276 } 277 278 279 /* 280 * Check if klmmod is loaded and call a lock manager service; if klmmod 281 * is not loaded, the services aren't needed and a call would trigger a 282 * modload, which would block since another thread would never run. 283 */ 284 static void 285 cpr_lock_mgr(void (*service)(void)) 286 { 287 if (mod_find_by_filename(NULL, "misc/klmmod") != NULL) 288 (*service)(); 289 } 290 291 /* 292 * Take the system down to a checkpointable state and write 293 * the state file, the following are sequentially executed: 294 * 295 * - Request all user threads to stop themselves 296 * - push out and invalidate user pages 297 * - bring statefile inode incore to prevent a miss later 298 * - request all daemons to stop 299 * - check and make sure all threads are stopped 300 * - sync the file system 301 * - suspend all devices 302 * - block intrpts 303 * - dump system state and memory to state file 304 */ 305 static int 306 cpr_suspend(void) 307 { 308 int sf_realloc, rc, skt_rc, nverr; 309 310 cpr_set_substate(C_ST_SUSPEND_BEGIN); 311 312 cpr_suspend_init(); 313 314 cpr_save_time(); 315 316 cpr_tod_get(&wholecycle_tv); 317 CPR_STAT_EVENT_START("Suspend Total"); 318 319 if (!cpr_reusable_mode) { 320 /* 321 * We need to validate default file before fs functionality 322 * is disabled. 323 */ 324 if (rc = cpr_validate_definfo(0)) 325 return (rc); 326 } 327 328 i_cpr_save_machdep_info(); 329 330 /* Stop PM scans ASAP */ 331 (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT); 332 333 pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND, 334 NULL, NULL, PM_DEP_WAIT, NULL, 0); 335 336 cpr_set_substate(C_ST_MP_OFFLINE); 337 if (rc = cpr_mp_offline()) 338 return (rc); 339 340 /* 341 * Ask the user threads to stop by themselves, but 342 * if they don't or can't after 3 retries, we give up on CPR. 343 * The 3 retry is not a random number because 2 is possible if 344 * a thread has been forked before the parent thread is stopped. 345 */ 346 DEBUG1(errp("\nstopping user threads...")); 347 CPR_STAT_EVENT_START(" stop users"); 348 cpr_set_substate(C_ST_STOP_USER_THREADS); 349 if (rc = cpr_stop_user_threads()) 350 return (rc); 351 CPR_STAT_EVENT_END(" stop users"); 352 DEBUG1(errp("done\n")); 353 354 pm_save_direct_levels(); 355 356 /* 357 * User threads are stopped. We will start communicating with the 358 * user via prom_printf (some debug output may have already happened) 359 * so let anybody who cares know about this (bug 4096122) 360 */ 361 (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT); 362 363 cpr_send_notice(); 364 if (cpr_debug) 365 errp("\n"); 366 367 (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT); 368 369 /* 370 * Reattach any drivers which originally exported the 371 * no-involuntary-power-cycles property. We need to do this before 372 * stopping kernel threads because modload is implemented using 373 * a kernel thread. 374 */ 375 cpr_set_substate(C_ST_PM_REATTACH_NOINVOL); 376 if (!pm_reattach_noinvol()) 377 return (ENXIO); 378 379 /* 380 * if ufs logging is enabled, we need to disable before 381 * stopping kernel threads so that ufs delete and roll 382 * threads can do the work. 383 */ 384 cpr_set_substate(C_ST_DISABLE_UFS_LOGGING); 385 if (rc = cpr_ufs_logging(0)) 386 return (rc); 387 388 /* 389 * Use sync_all to swap out all user pages and find out how much 390 * extra space needed for user pages that don't have back store 391 * space left. 392 */ 393 CPR_STAT_EVENT_START(" swapout upages"); 394 vfs_sync(SYNC_ALL); 395 CPR_STAT_EVENT_END(" swapout upages"); 396 397 cpr_set_bitmap_size(); 398 399 alloc_statefile: 400 /* 401 * If our last state was C_ST_DUMP_NOSPC, we're trying to realloc 402 * the statefile, otherwise this is the first attempt. 403 */ 404 sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0; 405 406 CPR_STAT_EVENT_START(" alloc statefile"); 407 cpr_set_substate(C_ST_STATEF_ALLOC); 408 if (rc = cpr_alloc_statefile(sf_realloc)) { 409 if (sf_realloc) 410 errp("realloc failed\n"); 411 return (rc); 412 } 413 CPR_STAT_EVENT_END(" alloc statefile"); 414 415 /* 416 * Sync the filesystem to preserve its integrity. 417 * 418 * This sync is also used to flush out all B_DELWRI buffers (fs cache) 419 * which are mapped and neither dirty nor referenced before 420 * cpr_invalidate_pages destroys them. fsflush does similar thing. 421 */ 422 sync(); 423 424 /* 425 * destroy all clean file mapped kernel pages 426 */ 427 CPR_STAT_EVENT_START(" clean pages"); 428 DEBUG1(errp("cleaning up mapped pages...")); 429 (void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT); 430 DEBUG1(errp("done\n")); 431 CPR_STAT_EVENT_END(" clean pages"); 432 433 434 /* 435 * Hooks needed by lock manager prior to suspending. 436 * Refer to code for more comments. 437 */ 438 cpr_lock_mgr(lm_cprsuspend); 439 440 /* 441 * Now suspend all the devices 442 */ 443 CPR_STAT_EVENT_START(" stop drivers"); 444 DEBUG1(errp("suspending drivers...")); 445 cpr_set_substate(C_ST_SUSPEND_DEVICES); 446 pm_powering_down = 1; 447 rc = cpr_suspend_devices(ddi_root_node()); 448 pm_powering_down = 0; 449 if (rc) 450 return (rc); 451 DEBUG1(errp("done\n")); 452 CPR_STAT_EVENT_END(" stop drivers"); 453 454 /* 455 * Stop all daemon activities 456 */ 457 cpr_set_substate(C_ST_STOP_KERNEL_THREADS); 458 if (skt_rc = cpr_stop_kernel_threads()) 459 return (skt_rc); 460 461 (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT); 462 463 pm_reattach_noinvol_fini(); 464 465 cpr_sae(1); 466 467 (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT); 468 469 /* 470 * It's safer to do tod_get before we disable all intr. 471 */ 472 CPR_STAT_EVENT_START(" write statefile"); 473 474 /* 475 * it's time to ignore the outside world, stop the real time 476 * clock and disable any further intrpt activity. 477 */ 478 i_cpr_handle_xc(1); /* turn it on to disable xc assertion */ 479 480 mutex_enter(&cpu_lock); 481 cyclic_suspend(); 482 mutex_exit(&cpu_lock); 483 484 mon_clock_stop(); 485 mon_clock_unshare(); 486 mon_clock_start(); 487 488 i_cpr_stop_intr(); 489 DEBUG1(errp("interrupt is stopped\n")); 490 491 /* 492 * Since we will now disable the mechanism that causes prom_printfs 493 * to power up (if needed) the console fb/monitor, we assert that 494 * it must be up now. 495 */ 496 ASSERT(pm_cfb_is_up()); 497 prom_suspend_prepost(); 498 499 /* 500 * getting ready to write ourself out, flush the register 501 * windows to make sure that our stack is good when we 502 * come back on the resume side. 503 */ 504 flush_windows(); 505 506 /* 507 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!! 508 * 509 * The system is quiesced at this point, we are ready to either dump 510 * to the state file for a extended sleep or a simple shutdown for 511 * systems with non-volatile memory. 512 */ 513 514 /* 515 * special handling for reusable: 516 */ 517 if (cpr_reusable_mode) { 518 cpr_set_substate(C_ST_SETPROPS_1); 519 if (nverr = cpr_set_properties(1)) 520 return (nverr); 521 } 522 523 cpr_set_substate(C_ST_DUMP); 524 rc = cpr_dump(C_VP); 525 526 /* 527 * if any error occured during dump, more 528 * special handling for reusable: 529 */ 530 if (rc && cpr_reusable_mode) { 531 cpr_set_substate(C_ST_SETPROPS_0); 532 if (nverr = cpr_set_properties(0)) 533 return (nverr); 534 } 535 536 if (rc == ENOSPC) { 537 cpr_set_substate(C_ST_DUMP_NOSPC); 538 (void) cpr_resume(); 539 goto alloc_statefile; 540 } else if (rc == 0) { 541 if (cpr_reusable_mode) { 542 cpr_set_substate(C_ST_REUSABLE); 543 longjmp(&ttolwp(curthread)->lwp_qsav); 544 } else 545 rc = cpr_set_properties(1); 546 } 547 return (rc); 548 } 549 550 551 /* 552 * Bring the system back up from a checkpoint, at this point 553 * the VM has been minimally restored by boot, the following 554 * are executed sequentially: 555 * 556 * - machdep setup and enable interrupts (mp startup if it's mp) 557 * - resume all devices 558 * - restart daemons 559 * - put all threads back on run queue 560 */ 561 static int 562 cpr_resume(void) 563 { 564 cpr_time_t pwron_tv, *ctp; 565 char *str; 566 int rc = 0; 567 568 /* 569 * The following switch is used to resume the system 570 * that was suspended to a different level. 571 */ 572 DEBUG1(errp("\nEntering cpr_resume...\n")); 573 574 /* 575 * Note: 576 * 577 * The rollback labels rb_xyz do not represent the cpr resume 578 * state when event 'xyz' has happened. Instead they represent 579 * the state during cpr suspend when event 'xyz' was being 580 * entered (and where cpr suspend failed). The actual call that 581 * failed may also need to be partially rolled back, since they 582 * aren't atomic in most cases. In other words, rb_xyz means 583 * "roll back all cpr suspend events that happened before 'xyz', 584 * and the one that caused the failure, if necessary." 585 */ 586 switch (CPR->c_substate) { 587 case C_ST_DUMP: 588 /* 589 * This is most likely a full-fledged cpr_resume after 590 * a complete and successful cpr suspend. Just roll back 591 * everything. 592 */ 593 break; 594 595 case C_ST_REUSABLE: 596 case C_ST_DUMP_NOSPC: 597 case C_ST_SETPROPS_0: 598 case C_ST_SETPROPS_1: 599 /* 600 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two 601 * special switch cases here. The other two do not have 602 * any state change during cpr_suspend() that needs to 603 * be rolled back. But these are exit points from 604 * cpr_suspend, so theoretically (or in the future), it 605 * is possible that a need for roll back of a state 606 * change arises between these exit points. 607 */ 608 goto rb_dump; 609 610 case C_ST_STOP_KERNEL_THREADS: 611 goto rb_stop_kernel_threads; 612 613 case C_ST_SUSPEND_DEVICES: 614 goto rb_suspend_devices; 615 616 case C_ST_STATEF_ALLOC: 617 goto rb_statef_alloc; 618 619 case C_ST_DISABLE_UFS_LOGGING: 620 goto rb_disable_ufs_logging; 621 622 case C_ST_PM_REATTACH_NOINVOL: 623 goto rb_pm_reattach_noinvol; 624 625 case C_ST_STOP_USER_THREADS: 626 goto rb_stop_user_threads; 627 628 case C_ST_MP_OFFLINE: 629 goto rb_mp_offline; 630 631 default: 632 goto rb_others; 633 } 634 635 rb_all: 636 /* 637 * setup debugger trapping. 638 */ 639 if (cpr_suspend_succeeded) 640 i_cpr_set_tbr(); 641 642 /* 643 * tell prom to monitor keys before the kernel comes alive 644 */ 645 mon_clock_start(); 646 647 /* 648 * perform platform-dependent initialization 649 */ 650 if (cpr_suspend_succeeded) 651 i_cpr_machdep_setup(); 652 653 /* 654 * system did not really go down if we jump here 655 */ 656 rb_dump: 657 /* 658 * IMPORTANT: SENSITIVE RESUME SEQUENCE 659 * 660 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!! 661 */ 662 (void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME); 663 if (cpr_suspend_succeeded) 664 (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME); 665 666 prom_resume_prepost(); 667 668 if (cpr_suspend_succeeded && (boothowto & RB_DEBUG)) 669 kdi_dvec_cpr_restart(); 670 671 /* 672 * let the tmp callout catch up. 673 */ 674 (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME); 675 676 i_cpr_enable_intr(); 677 678 mon_clock_stop(); 679 mon_clock_share(); 680 681 mutex_enter(&cpu_lock); 682 cyclic_resume(); 683 mutex_exit(&cpu_lock); 684 685 mon_clock_start(); 686 687 i_cpr_handle_xc(0); /* turn it off to allow xc assertion */ 688 689 (void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME); 690 691 /* 692 * statistics gathering 693 */ 694 if (cpr_suspend_succeeded) { 695 /* 696 * Prevent false alarm in tod_validate() due to tod 697 * value change between suspend and resume 698 */ 699 cpr_tod_fault_reset(); 700 701 cpr_convert_promtime(&pwron_tv); 702 703 ctp = &cpr_term.tm_shutdown; 704 CPR_STAT_EVENT_END_TMZ(" write statefile", ctp); 705 CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp); 706 707 CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv); 708 709 str = " prom time"; 710 CPR_STAT_EVENT_START_TMZ(str, &pwron_tv); 711 ctp = &cpr_term.tm_cprboot_start; 712 CPR_STAT_EVENT_END_TMZ(str, ctp); 713 714 str = " read statefile"; 715 CPR_STAT_EVENT_START_TMZ(str, ctp); 716 ctp = &cpr_term.tm_cprboot_end; 717 CPR_STAT_EVENT_END_TMZ(str, ctp); 718 } 719 720 rb_stop_kernel_threads: 721 /* 722 * Put all threads back to where they belong; get the kernel 723 * daemons straightened up too. Note that the callback table 724 * locked during cpr_stop_kernel_threads() is released only 725 * in cpr_start_kernel_threads(). Ensure modunloading is 726 * disabled before starting kernel threads, we don't want 727 * modunload thread to start changing device tree underneath. 728 */ 729 modunload_disable(); 730 cpr_start_kernel_threads(); 731 732 rb_suspend_devices: 733 DEBUG1(errp("resuming devices...")); 734 CPR_STAT_EVENT_START(" start drivers"); 735 736 /* 737 * The policy here is to continue resume everything we can if we did 738 * not successfully finish suspend; and panic if we are coming back 739 * from a fully suspended system. 740 */ 741 rc = cpr_resume_devices(ddi_root_node(), 0); 742 743 cpr_sae(0); 744 745 str = "Failed to resume one or more devices."; 746 if (rc && CPR->c_substate == C_ST_DUMP) 747 cpr_err(CE_PANIC, str); 748 else if (rc) 749 cpr_err(CE_WARN, str); 750 CPR_STAT_EVENT_END(" start drivers"); 751 DEBUG1(errp("done\n")); 752 753 /* 754 * If we had disabled modunloading in this cpr resume cycle (i.e. we 755 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable 756 * modunloading now. 757 */ 758 if (CPR->c_substate != C_ST_SUSPEND_DEVICES) 759 modunload_enable(); 760 761 /* 762 * Hooks needed by lock manager prior to resuming. 763 * Refer to code for more comments. 764 */ 765 cpr_lock_mgr(lm_cprresume); 766 767 /* 768 * This is a partial (half) resume during cpr suspend, we 769 * haven't yet given up on the suspend. On return from here, 770 * cpr_suspend() will try to reallocate and retry the suspend. 771 */ 772 if (CPR->c_substate == C_ST_DUMP_NOSPC) { 773 mon_clock_stop(); 774 return (0); 775 } 776 777 rb_statef_alloc: 778 cpr_statef_close(); 779 780 rb_disable_ufs_logging: 781 /* 782 * if ufs logging was disabled, re-enable 783 */ 784 (void) cpr_ufs_logging(1); 785 786 rb_pm_reattach_noinvol: 787 /* 788 * When pm_reattach_noinvol() succeeds, modunload_thread will 789 * remain disabled until after cpr suspend passes the 790 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before 791 * cpr suspend reaches this state, we'll need to enable modunload 792 * thread during rollback. 793 */ 794 if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING || 795 CPR->c_substate == C_ST_STATEF_ALLOC || 796 CPR->c_substate == C_ST_SUSPEND_DEVICES || 797 CPR->c_substate == C_ST_STOP_KERNEL_THREADS) { 798 pm_reattach_noinvol_fini(); 799 } 800 801 (void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME); 802 (void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME); 803 804 pm_restore_direct_levels(); 805 806 rb_stop_user_threads: 807 DEBUG1(errp("starting user threads...")); 808 cpr_start_user_threads(); 809 DEBUG1(errp("done\n")); 810 811 rb_mp_offline: 812 if (cpr_mp_online()) 813 cpr_err(CE_WARN, "Failed to online all the processors."); 814 815 rb_others: 816 pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL, PM_DEP_WAIT, 817 NULL, 0); 818 819 (void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME); 820 821 /* 822 * now that all the drivers are going, kernel kbd driver can 823 * take over, turn off prom monitor clock 824 */ 825 mon_clock_stop(); 826 827 if (cpr_suspend_succeeded) { 828 cpr_restore_time(); 829 cpr_stat_record_events(); 830 } 831 832 if (!cpr_reusable_mode) 833 cpr_clear_definfo(); 834 835 DEBUG1(errp("Sending SIGTHAW...")); 836 cpr_signal_user(SIGTHAW); 837 DEBUG1(errp("done\n")); 838 839 CPR_STAT_EVENT_END("Resume Total"); 840 841 CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv); 842 CPR_STAT_EVENT_END("WHOLE CYCLE"); 843 844 DEBUG1(cmn_err(CE_CONT, "\nThe system is back where you left!\n")); 845 846 CPR_STAT_EVENT_START("POST CPR DELAY"); 847 848 #ifdef CPR_STAT 849 ctp = &cpr_term.tm_shutdown; 850 CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp); 851 CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv); 852 853 CPR_STAT_EVENT_PRINT(); 854 #endif /* CPR_STAT */ 855 856 return (rc); 857 } 858 859 static void 860 cpr_suspend_init(void) 861 { 862 cpr_time_t *ctp; 863 864 cpr_stat_init(); 865 866 /* 867 * If cpr_suspend() failed before cpr_dump() gets a chance 868 * to reinitialize the terminator of the statefile, 869 * the values of the old terminator will still linger around. 870 * Since the terminator contains information that we need to 871 * decide whether suspend succeeded or not, we need to 872 * reinitialize it as early as possible. 873 */ 874 cpr_term.real_statef_size = 0; 875 ctp = &cpr_term.tm_shutdown; 876 bzero(ctp, sizeof (*ctp)); 877 ctp = &cpr_term.tm_cprboot_start; 878 bzero(ctp, sizeof (*ctp)); 879 ctp = &cpr_term.tm_cprboot_end; 880 bzero(ctp, sizeof (*ctp)); 881 882 /* 883 * Lookup the physical address of our thread structure. This should 884 * never be invalid and the entire thread structure is expected 885 * to reside within the same pfn. 886 */ 887 curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread); 888 ASSERT(curthreadpfn != PFN_INVALID); 889 ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat, 890 (caddr_t)curthread + sizeof (kthread_t) - 1)); 891 892 cpr_suspend_succeeded = 0; 893 } 894