xref: /titanic_44/usr/src/uts/common/cpr/cpr_main.c (revision 18c2aff776a775d34a4c9893a4c72e0434d68e36)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 
30 /*
31  * This module contains the guts of checkpoint-resume mechanism.
32  * All code in this module is platform independent.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/errno.h>
37 #include <sys/callb.h>
38 #include <sys/processor.h>
39 #include <sys/machsystm.h>
40 #include <sys/clock.h>
41 #include <sys/vfs.h>
42 #include <sys/kmem.h>
43 #include <nfs/lm.h>
44 #include <sys/systm.h>
45 #include <sys/cpr.h>
46 #include <sys/bootconf.h>
47 #include <sys/cyclic.h>
48 #include <sys/filio.h>
49 #include <sys/fs/ufs_filio.h>
50 #include <sys/epm.h>
51 #include <sys/modctl.h>
52 #include <sys/reboot.h>
53 #include <sys/kdi.h>
54 #include <sys/promif.h>
55 
56 extern struct cpr_terminator cpr_term;
57 
58 extern int cpr_alloc_statefile(int);
59 extern void cpr_start_kernel_threads(void);
60 extern void cpr_abbreviate_devpath(char *, char *);
61 extern void cpr_convert_promtime(cpr_time_t *);
62 extern void cpr_send_notice(void);
63 extern void cpr_set_bitmap_size(void);
64 extern void cpr_stat_init();
65 extern void cpr_statef_close(void);
66 extern void flush_windows(void);
67 
68 extern int pm_powering_down;
69 
70 static int cpr_suspend(void);
71 static int cpr_resume(void);
72 static void cpr_suspend_init(void);
73 
74 cpr_time_t wholecycle_tv;
75 int cpr_suspend_succeeded;
76 pfn_t curthreadpfn;
77 int curthreadremapped;
78 
79 /*
80  * save or restore abort_enable;  this prevents a drop
81  * to kadb or prom during cpr_resume_devices() when
82  * there is no kbd present;  see abort_sequence_enter()
83  */
84 static void
85 cpr_sae(int stash)
86 {
87 	static int saved_ae = -1;
88 
89 	if (stash) {
90 		saved_ae = abort_enable;
91 		abort_enable = 0;
92 	} else if (saved_ae != -1) {
93 		abort_enable = saved_ae;
94 		saved_ae = -1;
95 	}
96 }
97 
98 
99 /*
100  * The main switching point for cpr, this routine starts the ckpt
101  * and state file saving routines; on resume the control is
102  * returned back to here and it then calls the resume routine.
103  */
104 int
105 cpr_main(void)
106 {
107 	label_t saveq = ttolwp(curthread)->lwp_qsav;
108 	int rc;
109 
110 	if (rc = cpr_default_setup(1))
111 		return (rc);
112 
113 	/*
114 	 * Remember where we are for resume
115 	 */
116 	if (!setjmp(&ttolwp(curthread)->lwp_qsav)) {
117 		/*
118 		 * try to checkpoint the system, if failed return back
119 		 * to userland, otherwise power off.
120 		 */
121 		rc = cpr_suspend();
122 		if (rc || cpr_reusable_mode) {
123 			/*
124 			 * We don't really want to go down, or
125 			 * something went wrong in suspend, do what we can
126 			 * to put the system back to an operable state then
127 			 * return back to userland.
128 			 */
129 			(void) cpr_resume();
130 		}
131 	} else {
132 		/*
133 		 * This is the resumed side of longjmp, restore the previous
134 		 * longjmp pointer if there is one so this will be transparent
135 		 * to the world.
136 		 */
137 		ttolwp(curthread)->lwp_qsav = saveq;
138 		CPR->c_flags &= ~C_SUSPENDING;
139 		CPR->c_flags |= C_RESUMING;
140 
141 		/*
142 		 * resume the system back to the original state
143 		 */
144 		rc = cpr_resume();
145 	}
146 
147 	(void) cpr_default_setup(0);
148 
149 	return (rc);
150 }
151 
152 
153 /*
154  * check/disable or re-enable UFS logging
155  */
156 static void
157 cpr_log_status(int enable, int *svstat, vnode_t *vp)
158 {
159 	int cmd, status, error;
160 	char *str, *able;
161 	fiolog_t fl;
162 	refstr_t *mntpt;
163 
164 	str = "cpr_log_status";
165 	bzero(&fl, sizeof (fl));
166 	fl.error = FIOLOG_ENONE;
167 
168 	/*
169 	 * when disabling, first get and save logging status (0 or 1)
170 	 */
171 	if (enable == 0) {
172 		if (error = VOP_IOCTL(vp, _FIOISLOG,
173 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL)) {
174 			mntpt = vfs_getmntpoint(vp->v_vfsp);
175 			errp("%s: \"%s\", cant get logging status, error %d\n",
176 			    str, refstr_value(mntpt), error);
177 			refstr_rele(mntpt);
178 			return;
179 		}
180 		*svstat = status;
181 		DEBUG5(
182 		{
183 			mntpt = vfs_getmntpoint(vp->v_vfsp);
184 			errp("%s: \"%s\", logging status = %d\n",
185 			    str, refstr_value(mntpt), status);
186 			refstr_rele(mntpt);
187 		});
188 
189 		able = "disable";
190 		cmd = _FIOLOGDISABLE;
191 	} else {
192 		able = "enable";
193 		cmd = _FIOLOGENABLE;
194 	}
195 
196 	/*
197 	 * disable or re-enable logging when the saved status is 1
198 	 */
199 	if (*svstat == 1) {
200 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
201 		    FKIOCTL, CRED(), NULL);
202 		if (error) {
203 			mntpt = vfs_getmntpoint(vp->v_vfsp);
204 			errp("%s: \"%s\", cant %s logging, error %d\n",
205 			    str, refstr_value(mntpt), able, error);
206 			refstr_rele(mntpt);
207 		} else {
208 			DEBUG5(
209 			{
210 				mntpt = vfs_getmntpoint(vp->v_vfsp);
211 				errp("%s: \"%s\", logging is now %sd\n",
212 				    str, refstr_value(mntpt), able);
213 				refstr_rele(mntpt);
214 			});
215 		}
216 	}
217 
218 	/*
219 	 * when enabling logging, reset the saved status
220 	 * to unknown for next time
221 	 */
222 	if (enable)
223 		*svstat = -1;
224 }
225 
226 
227 /*
228  * enable/disable UFS logging on filesystems containing cpr_default_path
229  * and cpr statefile.  since the statefile can be on any fs, that fs
230  * needs to be handled separately.  this routine and cprboot expect that
231  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
232  * is loaded from the device with rootfs and uses the same device to open
233  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
234  * file outside of rootfs would cause errors during cprboot, plus cpr and
235  * fsck problems with the new fs if logging were enabled.
236  */
237 static int
238 cpr_ufs_logging(int enable)
239 {
240 	static int def_status = -1, sf_status = -1;
241 	struct vfs *vfsp;
242 	char *fname;
243 	vnode_t *vp;
244 	int error;
245 
246 	if (cpr_reusable_mode)
247 		return (0);
248 
249 	if (error = cpr_open_deffile(FREAD, &vp))
250 		return (error);
251 	cpr_log_status(enable, &def_status, vp);
252 	vfsp = vp->v_vfsp;
253 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED());
254 	VN_RELE(vp);
255 
256 	fname = cpr_build_statefile_path();
257 	if (fname == NULL)
258 		return (ENOENT);
259 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
260 	    0600, &vp, CRCREAT, 0)) {
261 		errp("cpr_ufs_logging: cant open/create \"%s\", error %d\n",
262 		    fname, error);
263 		return (error);
264 	}
265 
266 	/*
267 	 * check logging status for the statefile if it resides
268 	 * on a different fs and the type is a regular file
269 	 */
270 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
271 		cpr_log_status(enable, &sf_status, vp);
272 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED());
273 	VN_RELE(vp);
274 
275 	return (0);
276 }
277 
278 
279 /*
280  * Check if klmmod is loaded and call a lock manager service; if klmmod
281  * is not loaded, the services aren't needed and a call would trigger a
282  * modload, which would block since another thread would never run.
283  */
284 static void
285 cpr_lock_mgr(void (*service)(void))
286 {
287 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
288 		(*service)();
289 }
290 
291 /*
292  * Take the system down to a checkpointable state and write
293  * the state file, the following are sequentially executed:
294  *
295  *    - Request all user threads to stop themselves
296  *    - push out and invalidate user pages
297  *    - bring statefile inode incore to prevent a miss later
298  *    - request all daemons to stop
299  *    - check and make sure all threads are stopped
300  *    - sync the file system
301  *    - suspend all devices
302  *    - block intrpts
303  *    - dump system state and memory to state file
304  */
305 static int
306 cpr_suspend(void)
307 {
308 	int sf_realloc, rc, skt_rc, nverr;
309 
310 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
311 
312 	cpr_suspend_init();
313 
314 	cpr_save_time();
315 
316 	cpr_tod_get(&wholecycle_tv);
317 	CPR_STAT_EVENT_START("Suspend Total");
318 
319 	if (!cpr_reusable_mode) {
320 		/*
321 		 * We need to validate default file before fs functionality
322 		 * is disabled.
323 		 */
324 		if (rc = cpr_validate_definfo(0))
325 			return (rc);
326 	}
327 
328 	i_cpr_save_machdep_info();
329 
330 	/* Stop PM scans ASAP */
331 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
332 
333 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
334 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
335 
336 	cpr_set_substate(C_ST_MP_OFFLINE);
337 	if (rc = cpr_mp_offline())
338 		return (rc);
339 
340 	/*
341 	 * Ask the user threads to stop by themselves, but
342 	 * if they don't or can't after 3 retries, we give up on CPR.
343 	 * The 3 retry is not a random number because 2 is possible if
344 	 * a thread has been forked before the parent thread is stopped.
345 	 */
346 	DEBUG1(errp("\nstopping user threads..."));
347 	CPR_STAT_EVENT_START("  stop users");
348 	cpr_set_substate(C_ST_STOP_USER_THREADS);
349 	if (rc = cpr_stop_user_threads())
350 		return (rc);
351 	CPR_STAT_EVENT_END("  stop users");
352 	DEBUG1(errp("done\n"));
353 
354 	pm_save_direct_levels();
355 
356 	/*
357 	 * User threads are stopped.  We will start communicating with the
358 	 * user via prom_printf (some debug output may have already happened)
359 	 * so let anybody who cares know about this (bug 4096122)
360 	 */
361 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
362 
363 	cpr_send_notice();
364 	if (cpr_debug)
365 		errp("\n");
366 
367 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
368 
369 	/*
370 	 * Reattach any drivers which originally exported the
371 	 * no-involuntary-power-cycles property.  We need to do this before
372 	 * stopping kernel threads because modload is implemented using
373 	 * a kernel thread.
374 	 */
375 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
376 	if (!pm_reattach_noinvol())
377 		return (ENXIO);
378 
379 	/*
380 	 * if ufs logging is enabled, we need to disable before
381 	 * stopping kernel threads so that ufs delete and roll
382 	 * threads can do the work.
383 	 */
384 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
385 	if (rc = cpr_ufs_logging(0))
386 		return (rc);
387 
388 	/*
389 	 * Use sync_all to swap out all user pages and find out how much
390 	 * extra space needed for user pages that don't have back store
391 	 * space left.
392 	 */
393 	CPR_STAT_EVENT_START("  swapout upages");
394 	vfs_sync(SYNC_ALL);
395 	CPR_STAT_EVENT_END("  swapout upages");
396 
397 	cpr_set_bitmap_size();
398 
399 alloc_statefile:
400 	/*
401 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to realloc
402 	 * the statefile, otherwise this is the first attempt.
403 	 */
404 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
405 
406 	CPR_STAT_EVENT_START("  alloc statefile");
407 	cpr_set_substate(C_ST_STATEF_ALLOC);
408 	if (rc = cpr_alloc_statefile(sf_realloc)) {
409 		if (sf_realloc)
410 			errp("realloc failed\n");
411 		return (rc);
412 	}
413 	CPR_STAT_EVENT_END("  alloc statefile");
414 
415 	/*
416 	 * Sync the filesystem to preserve its integrity.
417 	 *
418 	 * This sync is also used to flush out all B_DELWRI buffers (fs cache)
419 	 * which are mapped and neither dirty nor referenced before
420 	 * cpr_invalidate_pages destroys them. fsflush does similar thing.
421 	 */
422 	sync();
423 
424 	/*
425 	 * destroy all clean file mapped kernel pages
426 	 */
427 	CPR_STAT_EVENT_START("  clean pages");
428 	DEBUG1(errp("cleaning up mapped pages..."));
429 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
430 	DEBUG1(errp("done\n"));
431 	CPR_STAT_EVENT_END("  clean pages");
432 
433 
434 	/*
435 	 * Hooks needed by lock manager prior to suspending.
436 	 * Refer to code for more comments.
437 	 */
438 	cpr_lock_mgr(lm_cprsuspend);
439 
440 	/*
441 	 * Now suspend all the devices
442 	 */
443 	CPR_STAT_EVENT_START("  stop drivers");
444 	DEBUG1(errp("suspending drivers..."));
445 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
446 	pm_powering_down = 1;
447 	rc = cpr_suspend_devices(ddi_root_node());
448 	pm_powering_down = 0;
449 	if (rc)
450 		return (rc);
451 	DEBUG1(errp("done\n"));
452 	CPR_STAT_EVENT_END("  stop drivers");
453 
454 	/*
455 	 * Stop all daemon activities
456 	 */
457 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
458 	if (skt_rc = cpr_stop_kernel_threads())
459 		return (skt_rc);
460 
461 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
462 
463 	pm_reattach_noinvol_fini();
464 
465 	cpr_sae(1);
466 
467 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
468 
469 	/*
470 	 * It's safer to do tod_get before we disable all intr.
471 	 */
472 	CPR_STAT_EVENT_START("  write statefile");
473 
474 	/*
475 	 * it's time to ignore the outside world, stop the real time
476 	 * clock and disable any further intrpt activity.
477 	 */
478 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
479 
480 	mutex_enter(&cpu_lock);
481 	cyclic_suspend();
482 	mutex_exit(&cpu_lock);
483 
484 	mon_clock_stop();
485 	mon_clock_unshare();
486 	mon_clock_start();
487 
488 	i_cpr_stop_intr();
489 	DEBUG1(errp("interrupt is stopped\n"));
490 
491 	/*
492 	 * Since we will now disable the mechanism that causes prom_printfs
493 	 * to power up (if needed) the console fb/monitor, we assert that
494 	 * it must be up now.
495 	 */
496 	ASSERT(pm_cfb_is_up());
497 	prom_suspend_prepost();
498 
499 	/*
500 	 * getting ready to write ourself out, flush the register
501 	 * windows to make sure that our stack is good when we
502 	 * come back on the resume side.
503 	 */
504 	flush_windows();
505 
506 	/*
507 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
508 	 *
509 	 * The system is quiesced at this point, we are ready to either dump
510 	 * to the state file for a extended sleep or a simple shutdown for
511 	 * systems with non-volatile memory.
512 	 */
513 
514 	/*
515 	 * special handling for reusable:
516 	 */
517 	if (cpr_reusable_mode) {
518 		cpr_set_substate(C_ST_SETPROPS_1);
519 		if (nverr = cpr_set_properties(1))
520 			return (nverr);
521 	}
522 
523 	cpr_set_substate(C_ST_DUMP);
524 	rc = cpr_dump(C_VP);
525 
526 	/*
527 	 * if any error occured during dump, more
528 	 * special handling for reusable:
529 	 */
530 	if (rc && cpr_reusable_mode) {
531 		cpr_set_substate(C_ST_SETPROPS_0);
532 		if (nverr = cpr_set_properties(0))
533 			return (nverr);
534 	}
535 
536 	if (rc == ENOSPC) {
537 		cpr_set_substate(C_ST_DUMP_NOSPC);
538 		(void) cpr_resume();
539 		goto alloc_statefile;
540 	} else if (rc == 0) {
541 		if (cpr_reusable_mode) {
542 			cpr_set_substate(C_ST_REUSABLE);
543 			longjmp(&ttolwp(curthread)->lwp_qsav);
544 		} else
545 			rc = cpr_set_properties(1);
546 	}
547 	return (rc);
548 }
549 
550 
551 /*
552  * Bring the system back up from a checkpoint, at this point
553  * the VM has been minimally restored by boot, the following
554  * are executed sequentially:
555  *
556  *    - machdep setup and enable interrupts (mp startup if it's mp)
557  *    - resume all devices
558  *    - restart daemons
559  *    - put all threads back on run queue
560  */
561 static int
562 cpr_resume(void)
563 {
564 	cpr_time_t pwron_tv, *ctp;
565 	char *str;
566 	int rc = 0;
567 
568 	/*
569 	 * The following switch is used to resume the system
570 	 * that was suspended to a different level.
571 	 */
572 	DEBUG1(errp("\nEntering cpr_resume...\n"));
573 
574 	/*
575 	 * Note:
576 	 *
577 	 * The rollback labels rb_xyz do not represent the cpr resume
578 	 * state when event 'xyz' has happened. Instead they represent
579 	 * the state during cpr suspend when event 'xyz' was being
580 	 * entered (and where cpr suspend failed). The actual call that
581 	 * failed may also need to be partially rolled back, since they
582 	 * aren't atomic in most cases.  In other words, rb_xyz means
583 	 * "roll back all cpr suspend events that happened before 'xyz',
584 	 * and the one that caused the failure, if necessary."
585 	 */
586 	switch (CPR->c_substate) {
587 	case C_ST_DUMP:
588 		/*
589 		 * This is most likely a full-fledged cpr_resume after
590 		 * a complete and successful cpr suspend. Just roll back
591 		 * everything.
592 		 */
593 		break;
594 
595 	case C_ST_REUSABLE:
596 	case C_ST_DUMP_NOSPC:
597 	case C_ST_SETPROPS_0:
598 	case C_ST_SETPROPS_1:
599 		/*
600 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
601 		 * special switch cases here. The other two do not have
602 		 * any state change during cpr_suspend() that needs to
603 		 * be rolled back. But these are exit points from
604 		 * cpr_suspend, so theoretically (or in the future), it
605 		 * is possible that a need for roll back of a state
606 		 * change arises between these exit points.
607 		 */
608 		goto rb_dump;
609 
610 	case C_ST_STOP_KERNEL_THREADS:
611 		goto rb_stop_kernel_threads;
612 
613 	case C_ST_SUSPEND_DEVICES:
614 		goto rb_suspend_devices;
615 
616 	case C_ST_STATEF_ALLOC:
617 		goto rb_statef_alloc;
618 
619 	case C_ST_DISABLE_UFS_LOGGING:
620 		goto rb_disable_ufs_logging;
621 
622 	case C_ST_PM_REATTACH_NOINVOL:
623 		goto rb_pm_reattach_noinvol;
624 
625 	case C_ST_STOP_USER_THREADS:
626 		goto rb_stop_user_threads;
627 
628 	case C_ST_MP_OFFLINE:
629 		goto rb_mp_offline;
630 
631 	default:
632 		goto rb_others;
633 	}
634 
635 rb_all:
636 	/*
637 	 * setup debugger trapping.
638 	 */
639 	if (cpr_suspend_succeeded)
640 		i_cpr_set_tbr();
641 
642 	/*
643 	 * tell prom to monitor keys before the kernel comes alive
644 	 */
645 	mon_clock_start();
646 
647 	/*
648 	 * perform platform-dependent initialization
649 	 */
650 	if (cpr_suspend_succeeded)
651 		i_cpr_machdep_setup();
652 
653 	/*
654 	 * system did not really go down if we jump here
655 	 */
656 rb_dump:
657 	/*
658 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
659 	 *
660 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
661 	 */
662 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
663 	if (cpr_suspend_succeeded)
664 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
665 
666 	prom_resume_prepost();
667 
668 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
669 		kdi_dvec_cpr_restart();
670 
671 	/*
672 	 * let the tmp callout catch up.
673 	 */
674 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
675 
676 	i_cpr_enable_intr();
677 
678 	mon_clock_stop();
679 	mon_clock_share();
680 
681 	mutex_enter(&cpu_lock);
682 	cyclic_resume();
683 	mutex_exit(&cpu_lock);
684 
685 	mon_clock_start();
686 
687 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
688 
689 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
690 
691 	/*
692 	 * statistics gathering
693 	 */
694 	if (cpr_suspend_succeeded) {
695 		/*
696 		 * Prevent false alarm in tod_validate() due to tod
697 		 * value change between suspend and resume
698 		 */
699 		cpr_tod_fault_reset();
700 
701 		cpr_convert_promtime(&pwron_tv);
702 
703 		ctp = &cpr_term.tm_shutdown;
704 		CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
705 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
706 
707 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
708 
709 		str = "  prom time";
710 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
711 		ctp = &cpr_term.tm_cprboot_start;
712 		CPR_STAT_EVENT_END_TMZ(str, ctp);
713 
714 		str = "  read statefile";
715 		CPR_STAT_EVENT_START_TMZ(str, ctp);
716 		ctp = &cpr_term.tm_cprboot_end;
717 		CPR_STAT_EVENT_END_TMZ(str, ctp);
718 	}
719 
720 rb_stop_kernel_threads:
721 	/*
722 	 * Put all threads back to where they belong; get the kernel
723 	 * daemons straightened up too. Note that the callback table
724 	 * locked during cpr_stop_kernel_threads() is released only
725 	 * in cpr_start_kernel_threads(). Ensure modunloading is
726 	 * disabled before starting kernel threads, we don't want
727 	 * modunload thread to start changing device tree underneath.
728 	 */
729 	modunload_disable();
730 	cpr_start_kernel_threads();
731 
732 rb_suspend_devices:
733 	DEBUG1(errp("resuming devices..."));
734 	CPR_STAT_EVENT_START("  start drivers");
735 
736 	/*
737 	 * The policy here is to continue resume everything we can if we did
738 	 * not successfully finish suspend; and panic if we are coming back
739 	 * from a fully suspended system.
740 	 */
741 	rc = cpr_resume_devices(ddi_root_node(), 0);
742 
743 	cpr_sae(0);
744 
745 	str = "Failed to resume one or more devices.";
746 	if (rc && CPR->c_substate == C_ST_DUMP)
747 		cpr_err(CE_PANIC, str);
748 	else if (rc)
749 		cpr_err(CE_WARN, str);
750 	CPR_STAT_EVENT_END("  start drivers");
751 	DEBUG1(errp("done\n"));
752 
753 	/*
754 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
755 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
756 	 * modunloading now.
757 	 */
758 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES)
759 		modunload_enable();
760 
761 	/*
762 	 * Hooks needed by lock manager prior to resuming.
763 	 * Refer to code for more comments.
764 	 */
765 	cpr_lock_mgr(lm_cprresume);
766 
767 	/*
768 	 * This is a partial (half) resume during cpr suspend, we
769 	 * haven't yet given up on the suspend. On return from here,
770 	 * cpr_suspend() will try to reallocate and retry the suspend.
771 	 */
772 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
773 		mon_clock_stop();
774 		return (0);
775 	}
776 
777 rb_statef_alloc:
778 	cpr_statef_close();
779 
780 rb_disable_ufs_logging:
781 	/*
782 	 * if ufs logging was disabled, re-enable
783 	 */
784 	(void) cpr_ufs_logging(1);
785 
786 rb_pm_reattach_noinvol:
787 	/*
788 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
789 	 * remain disabled until after cpr suspend passes the
790 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
791 	 * cpr suspend reaches this state, we'll need to enable modunload
792 	 * thread during rollback.
793 	 */
794 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
795 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
796 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
797 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
798 		pm_reattach_noinvol_fini();
799 	}
800 
801 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
802 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
803 
804 	pm_restore_direct_levels();
805 
806 rb_stop_user_threads:
807 	DEBUG1(errp("starting user threads..."));
808 	cpr_start_user_threads();
809 	DEBUG1(errp("done\n"));
810 
811 rb_mp_offline:
812 	if (cpr_mp_online())
813 		cpr_err(CE_WARN, "Failed to online all the processors.");
814 
815 rb_others:
816 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL, PM_DEP_WAIT,
817 		NULL, 0);
818 
819 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
820 
821 	/*
822 	 * now that all the drivers are going, kernel kbd driver can
823 	 * take over, turn off prom monitor clock
824 	 */
825 	mon_clock_stop();
826 
827 	if (cpr_suspend_succeeded) {
828 		cpr_restore_time();
829 		cpr_stat_record_events();
830 	}
831 
832 	if (!cpr_reusable_mode)
833 		cpr_clear_definfo();
834 
835 	DEBUG1(errp("Sending SIGTHAW..."));
836 	cpr_signal_user(SIGTHAW);
837 	DEBUG1(errp("done\n"));
838 
839 	CPR_STAT_EVENT_END("Resume Total");
840 
841 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
842 	CPR_STAT_EVENT_END("WHOLE CYCLE");
843 
844 	DEBUG1(cmn_err(CE_CONT, "\nThe system is back where you left!\n"));
845 
846 	CPR_STAT_EVENT_START("POST CPR DELAY");
847 
848 #ifdef CPR_STAT
849 	ctp = &cpr_term.tm_shutdown;
850 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
851 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
852 
853 	CPR_STAT_EVENT_PRINT();
854 #endif /* CPR_STAT */
855 
856 	return (rc);
857 }
858 
859 static void
860 cpr_suspend_init(void)
861 {
862 	cpr_time_t *ctp;
863 
864 	cpr_stat_init();
865 
866 	/*
867 	 * If cpr_suspend() failed before cpr_dump() gets a chance
868 	 * to reinitialize the terminator of the statefile,
869 	 * the values of the old terminator will still linger around.
870 	 * Since the terminator contains information that we need to
871 	 * decide whether suspend succeeded or not, we need to
872 	 * reinitialize it as early as possible.
873 	 */
874 	cpr_term.real_statef_size = 0;
875 	ctp = &cpr_term.tm_shutdown;
876 	bzero(ctp, sizeof (*ctp));
877 	ctp = &cpr_term.tm_cprboot_start;
878 	bzero(ctp, sizeof (*ctp));
879 	ctp = &cpr_term.tm_cprboot_end;
880 	bzero(ctp, sizeof (*ctp));
881 
882 	/*
883 	 * Lookup the physical address of our thread structure.  This should
884 	 * never be invalid and the entire thread structure is expected
885 	 * to reside within the same pfn.
886 	 */
887 	curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
888 	ASSERT(curthreadpfn != PFN_INVALID);
889 	ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
890 	    (caddr_t)curthread + sizeof (kthread_t) - 1));
891 
892 	cpr_suspend_succeeded = 0;
893 }
894