xref: /illumos-gate/usr/src/uts/common/cpr/cpr_main.c (revision a55b6846f87afedf14b3f9b64fbb8c0d0a3f2fe2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * This module contains the guts of checkpoint-resume mechanism.
30  * All code in this module is platform independent.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/callb.h>
36 #include <sys/processor.h>
37 #include <sys/machsystm.h>
38 #include <sys/clock.h>
39 #include <sys/vfs.h>
40 #include <sys/kmem.h>
41 #include <nfs/lm.h>
42 #include <sys/systm.h>
43 #include <sys/cpr.h>
44 #include <sys/bootconf.h>
45 #include <sys/cyclic.h>
46 #include <sys/filio.h>
47 #include <sys/fs/ufs_filio.h>
48 #include <sys/epm.h>
49 #include <sys/modctl.h>
50 #include <sys/reboot.h>
51 #include <sys/kdi.h>
52 #include <sys/promif.h>
53 #include <sys/srn.h>
54 #include <sys/cpr_impl.h>
55 
56 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
57 
58 extern struct cpr_terminator cpr_term;
59 
60 extern int cpr_alloc_statefile(int);
61 extern void cpr_start_kernel_threads(void);
62 extern void cpr_abbreviate_devpath(char *, char *);
63 extern void cpr_convert_promtime(cpr_time_t *);
64 extern void cpr_send_notice(void);
65 extern void cpr_set_bitmap_size(void);
66 extern void cpr_stat_init();
67 extern void cpr_statef_close(void);
68 extern void flush_windows(void);
69 extern void (*srn_signal)(int, int);
70 extern void init_cpu_syscall(struct cpu *);
71 extern void i_cpr_pre_resume_cpus();
72 extern void i_cpr_post_resume_cpus();
73 
74 extern int pm_powering_down;
75 extern kmutex_t srn_clone_lock;
76 extern int srn_inuse;
77 
78 static int cpr_suspend(int);
79 static int cpr_resume(int);
80 static void cpr_suspend_init(int);
81 #if defined(__x86)
82 static int cpr_suspend_cpus(void);
83 static void cpr_resume_cpus(void);
84 #endif
85 static int cpr_all_online(void);
86 static void cpr_restore_offline(void);
87 
88 cpr_time_t wholecycle_tv;
89 int cpr_suspend_succeeded;
90 pfn_t curthreadpfn;
91 int curthreadremapped;
92 
93 extern cpuset_t cpu_ready_set;
94 extern void *(*cpu_pause_func)(void *);
95 
96 extern processorid_t i_cpr_bootcpuid(void);
97 extern cpu_t *i_cpr_bootcpu(void);
98 extern void tsc_adjust_delta(hrtime_t tdelta);
99 extern void tsc_resume(void);
100 extern int tsc_resume_in_cyclic;
101 
102 /*
103  * Set this variable to 1, to have device drivers resume in an
104  * uniprocessor environment. This is to allow drivers that assume
105  * that they resume on a UP machine to continue to work. Should be
106  * deprecated once the broken drivers are fixed
107  */
108 int cpr_resume_uniproc = 0;
109 
110 /*
111  * save or restore abort_enable;  this prevents a drop
112  * to kadb or prom during cpr_resume_devices() when
113  * there is no kbd present;  see abort_sequence_enter()
114  */
115 static void
116 cpr_sae(int stash)
117 {
118 	static int saved_ae = -1;
119 
120 	if (stash) {
121 		saved_ae = abort_enable;
122 		abort_enable = 0;
123 	} else if (saved_ae != -1) {
124 		abort_enable = saved_ae;
125 		saved_ae = -1;
126 	}
127 }
128 
129 
130 /*
131  * The main switching point for cpr, this routine starts the ckpt
132  * and state file saving routines; on resume the control is
133  * returned back to here and it then calls the resume routine.
134  */
135 int
136 cpr_main(int sleeptype)
137 {
138 	int rc, rc2;
139 	label_t saveq;
140 	klwp_t *tlwp = ttolwp(curthread);
141 
142 	if (sleeptype == CPR_TODISK) {
143 		if ((rc = cpr_default_setup(1)) != 0)
144 			return (rc);
145 		ASSERT(tlwp);
146 		saveq = tlwp->lwp_qsav;
147 	}
148 
149 	if (sleeptype == CPR_TORAM) {
150 		rc = cpr_suspend(sleeptype);
151 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
152 		if (rc == 0) {
153 			int i_cpr_power_down(int sleeptype);
154 
155 			/*
156 			 * From this point on, we should be at a high
157 			 * spl, interrupts disabled, and all but one
158 			 * cpu's paused (effectively UP/single threaded).
159 			 * So this is were we want to put ASSERTS()
160 			 * to let us know otherwise.
161 			 */
162 			ASSERT(cpus_paused());
163 
164 			/*
165 			 * Now do the work of actually putting this
166 			 * machine to sleep!
167 			 */
168 			rc = i_cpr_power_down(sleeptype);
169 			if (rc == 0) {
170 				PMD(PMD_SX, ("back from succssful suspend\n"))
171 			}
172 			/*
173 			 * We do care about the return value from cpr_resume
174 			 * at this point, as it will tell us if one of the
175 			 * resume functions failed (cpr_resume_devices())
176 			 * However, for this to return and _not_ panic, means
177 			 * that we must be in one of the test functions.  So
178 			 * check for that and return an appropriate message.
179 			 */
180 			rc2 = cpr_resume(sleeptype);
181 			if (rc2 != 0) {
182 				ASSERT(cpr_test_point > 0);
183 				cmn_err(CE_NOTE,
184 				    "cpr_resume returned non-zero: %d\n", rc2);
185 				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
186 			}
187 			ASSERT(!cpus_paused());
188 		} else {
189 			PMD(PMD_SX, ("failed suspend, resuming\n"))
190 			rc = cpr_resume(sleeptype);
191 		}
192 		return (rc);
193 	}
194 	/*
195 	 * Remember where we are for resume after reboot
196 	 */
197 	if (!setjmp(&tlwp->lwp_qsav)) {
198 		/*
199 		 * try to checkpoint the system, if failed return back
200 		 * to userland, otherwise power off.
201 		 */
202 		rc = cpr_suspend(sleeptype);
203 		if (rc || cpr_reusable_mode) {
204 			/*
205 			 * We don't really want to go down, or
206 			 * something went wrong in suspend, do what we can
207 			 * to put the system back to an operable state then
208 			 * return back to userland.
209 			 */
210 			PMD(PMD_SX, ("failed suspend, resuming\n"))
211 			(void) cpr_resume(sleeptype);
212 			PMD(PMD_SX, ("back from failed suspend resume\n"))
213 		}
214 	} else {
215 		/*
216 		 * This is the resumed side of longjmp, restore the previous
217 		 * longjmp pointer if there is one so this will be transparent
218 		 * to the world.
219 		 * This path is only for CPR_TODISK, where we reboot
220 		 */
221 		ASSERT(sleeptype == CPR_TODISK);
222 		tlwp->lwp_qsav = saveq;
223 		CPR->c_flags &= ~C_SUSPENDING;
224 		CPR->c_flags |= C_RESUMING;
225 
226 		/*
227 		 * resume the system back to the original state
228 		 */
229 		rc = cpr_resume(sleeptype);
230 		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
231 		    rc))
232 	}
233 
234 	(void) cpr_default_setup(0);
235 
236 	return (rc);
237 }
238 
239 
240 #if defined(__sparc)
241 
242 /*
243  * check/disable or re-enable UFS logging
244  */
245 static void
246 cpr_log_status(int enable, int *svstat, vnode_t *vp)
247 {
248 	int cmd, status, error;
249 	char *str, *able;
250 	fiolog_t fl;
251 	refstr_t *mntpt;
252 
253 	str = "cpr_log_status";
254 	bzero(&fl, sizeof (fl));
255 	fl.error = FIOLOG_ENONE;
256 
257 	/*
258 	 * when disabling, first get and save logging status (0 or 1)
259 	 */
260 	if (enable == 0) {
261 		if (error = VOP_IOCTL(vp, _FIOISLOG,
262 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
263 			mntpt = vfs_getmntpoint(vp->v_vfsp);
264 			prom_printf("%s: \"%s\", cant get logging "
265 			    "status, error %d\n", str, refstr_value(mntpt),
266 			    error);
267 			refstr_rele(mntpt);
268 			return;
269 		}
270 		*svstat = status;
271 		if (cpr_debug & CPR_DEBUG5) {
272 			mntpt = vfs_getmntpoint(vp->v_vfsp);
273 			errp("%s: \"%s\", logging status = %d\n",
274 			    str, refstr_value(mntpt), status);
275 			refstr_rele(mntpt);
276 		};
277 
278 		able = "disable";
279 		cmd = _FIOLOGDISABLE;
280 	} else {
281 		able = "enable";
282 		cmd = _FIOLOGENABLE;
283 	}
284 
285 	/*
286 	 * disable or re-enable logging when the saved status is 1
287 	 */
288 	if (*svstat == 1) {
289 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
290 		    FKIOCTL, CRED(), NULL, NULL);
291 		if (error) {
292 			mntpt = vfs_getmntpoint(vp->v_vfsp);
293 			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
294 			    str, refstr_value(mntpt), able, error);
295 			refstr_rele(mntpt);
296 		} else {
297 			if (cpr_debug & CPR_DEBUG5) {
298 				mntpt = vfs_getmntpoint(vp->v_vfsp);
299 				errp("%s: \"%s\", logging is now %sd\n",
300 				    str, refstr_value(mntpt), able);
301 				refstr_rele(mntpt);
302 			};
303 		}
304 	}
305 
306 	/*
307 	 * when enabling logging, reset the saved status
308 	 * to unknown for next time
309 	 */
310 	if (enable)
311 		*svstat = -1;
312 }
313 
314 /*
315  * enable/disable UFS logging on filesystems containing cpr_default_path
316  * and cpr statefile.  since the statefile can be on any fs, that fs
317  * needs to be handled separately.  this routine and cprboot expect that
318  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
319  * is loaded from the device with rootfs and uses the same device to open
320  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
321  * file outside of rootfs would cause errors during cprboot, plus cpr and
322  * fsck problems with the new fs if logging were enabled.
323  */
324 
325 static int
326 cpr_ufs_logging(int enable)
327 {
328 	static int def_status = -1, sf_status = -1;
329 	struct vfs *vfsp;
330 	char *fname;
331 	vnode_t *vp;
332 	int error;
333 
334 	if (cpr_reusable_mode)
335 		return (0);
336 
337 	if (error = cpr_open_deffile(FREAD, &vp))
338 		return (error);
339 	cpr_log_status(enable, &def_status, vp);
340 	vfsp = vp->v_vfsp;
341 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
342 	VN_RELE(vp);
343 
344 	fname = cpr_build_statefile_path();
345 	if (fname == NULL)
346 		return (ENOENT);
347 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
348 	    0600, &vp, CRCREAT, 0)) {
349 		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
350 		    "error %d\n", fname, error);
351 		return (error);
352 	}
353 
354 	/*
355 	 * check logging status for the statefile if it resides
356 	 * on a different fs and the type is a regular file
357 	 */
358 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
359 		cpr_log_status(enable, &sf_status, vp);
360 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
361 	VN_RELE(vp);
362 
363 	return (0);
364 }
365 #endif
366 
367 
368 /*
369  * Check if klmmod is loaded and call a lock manager service; if klmmod
370  * is not loaded, the services aren't needed and a call would trigger a
371  * modload, which would block since another thread would never run.
372  */
373 static void
374 cpr_lock_mgr(void (*service)(void))
375 {
376 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
377 		(*service)();
378 }
379 
380 int
381 cpr_suspend_cpus(void)
382 {
383 	cpu_t *bootcpu;
384 	int	ret = 0;
385 	extern void *i_cpr_save_context(void *arg);
386 
387 	mutex_enter(&cpu_lock);
388 
389 	/*
390 	 * if bootcpu is offline bring it back online
391 	 */
392 	bootcpu = i_cpr_bootcpu();
393 
394 	/*
395 	 * the machine could not have booted without a bootcpu
396 	 */
397 	ASSERT(bootcpu != NULL);
398 
399 	/*
400 	 * bring all the offline cpus online
401 	 */
402 	if ((ret = cpr_all_online())) {
403 		mutex_exit(&cpu_lock);
404 		return (ret);
405 	}
406 
407 	/*
408 	 * Set the affinity to be the boot processor
409 	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
410 	 */
411 	affinity_set(i_cpr_bootcpuid());
412 
413 	ASSERT(CPU->cpu_id == 0);
414 
415 	PMD(PMD_SX, ("curthread running on bootcpu\n"))
416 
417 	/*
418 	 * pause all other running CPUs and save the CPU state at the sametime
419 	 */
420 	cpu_pause_func = i_cpr_save_context;
421 	pause_cpus(NULL);
422 
423 	mutex_exit(&cpu_lock);
424 
425 	return (0);
426 }
427 
428 /*
429  * Take the system down to a checkpointable state and write
430  * the state file, the following are sequentially executed:
431  *
432  *    - Request all user threads to stop themselves
433  *    - push out and invalidate user pages
434  *    - bring statefile inode incore to prevent a miss later
435  *    - request all daemons to stop
436  *    - check and make sure all threads are stopped
437  *    - sync the file system
438  *    - suspend all devices
439  *    - block intrpts
440  *    - dump system state and memory to state file
441  *    - SPARC code will not be called with CPR_TORAM, caller filters
442  */
443 static int
444 cpr_suspend(int sleeptype)
445 {
446 #if defined(__sparc)
447 	int sf_realloc, nverr;
448 #endif
449 	int	rc = 0;
450 	int	skt_rc = 0;
451 
452 	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
453 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
454 
455 	cpr_suspend_init(sleeptype);
456 
457 	cpr_save_time();
458 
459 	cpr_tod_get(&wholecycle_tv);
460 	CPR_STAT_EVENT_START("Suspend Total");
461 
462 	i_cpr_alloc_cpus();
463 
464 #if defined(__sparc)
465 	ASSERT(sleeptype == CPR_TODISK);
466 	if (!cpr_reusable_mode) {
467 		/*
468 		 * We need to validate default file before fs
469 		 * functionality is disabled.
470 		 */
471 		if (rc = cpr_validate_definfo(0))
472 			return (rc);
473 	}
474 	i_cpr_save_machdep_info();
475 #endif
476 
477 	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
478 	/* Stop PM scans ASAP */
479 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
480 
481 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
482 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
483 
484 #if defined(__sparc)
485 	ASSERT(sleeptype == CPR_TODISK);
486 	cpr_set_substate(C_ST_MP_OFFLINE);
487 	if (rc = cpr_mp_offline())
488 		return (rc);
489 #endif
490 	/*
491 	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
492 	 */
493 	mutex_enter(&srn_clone_lock);
494 	if (srn_signal) {
495 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
496 		    "SRN_SUSPEND_REQ)\n"))
497 		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
498 		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
499 		srn_inuse = 0;
500 	} else {
501 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
502 	}
503 	mutex_exit(&srn_clone_lock);
504 
505 	/*
506 	 * Ask the user threads to stop by themselves, but
507 	 * if they don't or can't after 3 retries, we give up on CPR.
508 	 * The 3 retry is not a random number because 2 is possible if
509 	 * a thread has been forked before the parent thread is stopped.
510 	 */
511 	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
512 	CPR_STAT_EVENT_START("  stop users");
513 	cpr_set_substate(C_ST_STOP_USER_THREADS);
514 	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
515 	if (rc = cpr_stop_user_threads())
516 		return (rc);
517 	CPR_STAT_EVENT_END("  stop users");
518 	CPR_DEBUG(CPR_DEBUG1, "done\n");
519 
520 	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
521 	pm_save_direct_levels();
522 
523 	/*
524 	 * User threads are stopped.  We will start communicating with the
525 	 * user via prom_printf (some debug output may have already happened)
526 	 * so let anybody who cares know about this (bug 4096122)
527 	 */
528 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
529 
530 	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
531 #ifndef DEBUG
532 	cpr_send_notice();
533 	if (cpr_debug)
534 		prom_printf("\n");
535 #endif
536 
537 	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
538 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
539 
540 	/*
541 	 * Reattach any drivers which originally exported the
542 	 * no-involuntary-power-cycles property.  We need to do this before
543 	 * stopping kernel threads because modload is implemented using
544 	 * a kernel thread.
545 	 */
546 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
547 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
548 	if (!pm_reattach_noinvol())
549 		return (ENXIO);
550 
551 #if defined(__sparc)
552 	ASSERT(sleeptype == CPR_TODISK);
553 	/*
554 	 * if ufs logging is enabled, we need to disable before
555 	 * stopping kernel threads so that ufs delete and roll
556 	 * threads can do the work.
557 	 */
558 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
559 	if (rc = cpr_ufs_logging(0))
560 		return (rc);
561 
562 	/*
563 	 * Use sync_all to swap out all user pages and find out how much
564 	 * extra space needed for user pages that don't have back store
565 	 * space left.
566 	 */
567 	CPR_STAT_EVENT_START("  swapout upages");
568 	vfs_sync(SYNC_ALL);
569 	CPR_STAT_EVENT_END("  swapout upages");
570 
571 	cpr_set_bitmap_size();
572 
573 alloc_statefile:
574 	/*
575 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
576 	 * realloc the statefile, otherwise this is the first attempt.
577 	 */
578 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
579 
580 	CPR_STAT_EVENT_START("  alloc statefile");
581 	cpr_set_substate(C_ST_STATEF_ALLOC);
582 	if (rc = cpr_alloc_statefile(sf_realloc)) {
583 		if (sf_realloc)
584 			errp("realloc failed\n");
585 		return (rc);
586 	}
587 	CPR_STAT_EVENT_END("  alloc statefile");
588 
589 	/*
590 	 * Sync the filesystem to preserve its integrity.
591 	 *
592 	 * This sync is also used to flush out all B_DELWRI buffers
593 	 * (fs cache) which are mapped and neither dirty nor referenced
594 	 * before cpr_invalidate_pages destroys them.
595 	 * fsflush does similar thing.
596 	 */
597 	sync();
598 
599 	/*
600 	 * destroy all clean file mapped kernel pages
601 	 */
602 	CPR_STAT_EVENT_START("  clean pages");
603 	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
604 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
605 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
606 	CPR_STAT_EVENT_END("  clean pages");
607 #endif
608 
609 
610 	/*
611 	 * Hooks needed by lock manager prior to suspending.
612 	 * Refer to code for more comments.
613 	 */
614 	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
615 	cpr_lock_mgr(lm_cprsuspend);
616 
617 	/*
618 	 * Now suspend all the devices
619 	 */
620 	CPR_STAT_EVENT_START("  stop drivers");
621 	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
622 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
623 	pm_powering_down = 1;
624 	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
625 	rc = cpr_suspend_devices(ddi_root_node());
626 	pm_powering_down = 0;
627 	if (rc)
628 		return (rc);
629 	CPR_DEBUG(CPR_DEBUG1, "done\n");
630 	CPR_STAT_EVENT_END("  stop drivers");
631 
632 	/*
633 	 * Stop all daemon activities
634 	 */
635 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
636 	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
637 	if (skt_rc = cpr_stop_kernel_threads())
638 		return (skt_rc);
639 
640 	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
641 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
642 
643 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
644 	pm_reattach_noinvol_fini();
645 
646 	cpr_sae(1);
647 
648 	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
649 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
650 
651 	if (sleeptype == CPR_TODISK) {
652 		/*
653 		 * It's safer to do tod_get before we disable all intr.
654 		 */
655 		CPR_STAT_EVENT_START("  write statefile");
656 	}
657 
658 	/*
659 	 * it's time to ignore the outside world, stop the real time
660 	 * clock and disable any further intrpt activity.
661 	 */
662 	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
663 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
664 
665 	mutex_enter(&cpu_lock);
666 	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
667 	cyclic_suspend();
668 	mutex_exit(&cpu_lock);
669 
670 	/*
671 	 * Due to the different methods of resuming the system between
672 	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
673 	 * and CPR_TORAM (restart via reset into existing kernel image)
674 	 * cpus are not suspended and restored in the SPARC case, since it
675 	 * is necessary to restart the cpus and pause them before restoring
676 	 * the OBP image
677 	 */
678 
679 #if defined(__x86)
680 
681 	/* pause aux cpus */
682 	PMD(PMD_SX, ("pause aux cpus\n"))
683 
684 	cpr_set_substate(C_ST_MP_PAUSED);
685 
686 	if ((rc = cpr_suspend_cpus()) != 0)
687 		return (rc);
688 #endif
689 
690 	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
691 	i_cpr_stop_intr();
692 	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
693 
694 	/*
695 	 * Since we will now disable the mechanism that causes prom_printfs
696 	 * to power up (if needed) the console fb/monitor, we assert that
697 	 * it must be up now.
698 	 */
699 	ASSERT(pm_cfb_is_up());
700 	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
701 	prom_suspend_prepost();
702 
703 #if defined(__sparc)
704 	/*
705 	 * getting ready to write ourself out, flush the register
706 	 * windows to make sure that our stack is good when we
707 	 * come back on the resume side.
708 	 */
709 	flush_windows();
710 #endif
711 
712 	/*
713 	 * For S3, we're done
714 	 */
715 	if (sleeptype == CPR_TORAM) {
716 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
717 		cpr_set_substate(C_ST_NODUMP);
718 		return (rc);
719 	}
720 #if defined(__sparc)
721 	/*
722 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
723 	 *
724 	 * The system is quiesced at this point, we are ready to either dump
725 	 * to the state file for a extended sleep or a simple shutdown for
726 	 * systems with non-volatile memory.
727 	 */
728 
729 	/*
730 	 * special handling for reusable:
731 	 */
732 	if (cpr_reusable_mode) {
733 		cpr_set_substate(C_ST_SETPROPS_1);
734 		if (nverr = cpr_set_properties(1))
735 			return (nverr);
736 	}
737 
738 	cpr_set_substate(C_ST_DUMP);
739 	rc = cpr_dump(C_VP);
740 
741 	/*
742 	 * if any error occurred during dump, more
743 	 * special handling for reusable:
744 	 */
745 	if (rc && cpr_reusable_mode) {
746 		cpr_set_substate(C_ST_SETPROPS_0);
747 		if (nverr = cpr_set_properties(0))
748 			return (nverr);
749 	}
750 
751 	if (rc == ENOSPC) {
752 		cpr_set_substate(C_ST_DUMP_NOSPC);
753 		(void) cpr_resume(sleeptype);
754 		goto alloc_statefile;
755 	} else if (rc == 0) {
756 		if (cpr_reusable_mode) {
757 			cpr_set_substate(C_ST_REUSABLE);
758 			longjmp(&ttolwp(curthread)->lwp_qsav);
759 		} else
760 			rc = cpr_set_properties(1);
761 	}
762 #endif
763 	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
764 	return (rc);
765 }
766 
767 void
768 cpr_resume_cpus(void)
769 {
770 	/*
771 	 * this is a cut down version of start_other_cpus()
772 	 * just do the initialization to wake the other cpus
773 	 */
774 
775 #if defined(__x86)
776 	/*
777 	 * Initialize our syscall handlers
778 	 */
779 	init_cpu_syscall(CPU);
780 
781 #endif
782 
783 	i_cpr_pre_resume_cpus();
784 
785 	/*
786 	 * Restart the paused cpus
787 	 */
788 	mutex_enter(&cpu_lock);
789 	start_cpus();
790 	mutex_exit(&cpu_lock);
791 
792 	/*
793 	 * clear the affinity set in cpr_suspend_cpus()
794 	 */
795 	affinity_clear();
796 
797 	i_cpr_post_resume_cpus();
798 
799 	mutex_enter(&cpu_lock);
800 	/*
801 	 * Restore this cpu to use the regular cpu_pause(), so that
802 	 * online and offline will work correctly
803 	 */
804 	cpu_pause_func = NULL;
805 
806 	/*
807 	 * offline all the cpus that were brought online during suspend
808 	 */
809 	cpr_restore_offline();
810 
811 	/*
812 	 * clear the affinity set in cpr_suspend_cpus()
813 	 */
814 	affinity_clear();
815 
816 	mutex_exit(&cpu_lock);
817 }
818 
819 void
820 cpr_unpause_cpus(void)
821 {
822 	/*
823 	 * Now restore the system back to what it was before we suspended
824 	 */
825 
826 	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
827 
828 	mutex_enter(&cpu_lock);
829 
830 	/*
831 	 * Restore this cpu to use the regular cpu_pause(), so that
832 	 * online and offline will work correctly
833 	 */
834 	cpu_pause_func = NULL;
835 
836 	/*
837 	 * Restart the paused cpus
838 	 */
839 	start_cpus();
840 
841 	/*
842 	 * offline all the cpus that were brought online during suspend
843 	 */
844 	cpr_restore_offline();
845 
846 	/*
847 	 * clear the affinity set in cpr_suspend_cpus()
848 	 */
849 	affinity_clear();
850 
851 	mutex_exit(&cpu_lock);
852 }
853 
854 /*
855  * Bring the system back up from a checkpoint, at this point
856  * the VM has been minimally restored by boot, the following
857  * are executed sequentially:
858  *
859  *    - machdep setup and enable interrupts (mp startup if it's mp)
860  *    - resume all devices
861  *    - restart daemons
862  *    - put all threads back on run queue
863  */
864 static int
865 cpr_resume(int sleeptype)
866 {
867 	cpr_time_t pwron_tv, *ctp;
868 	char *str;
869 	int rc = 0;
870 
871 	/*
872 	 * The following switch is used to resume the system
873 	 * that was suspended to a different level.
874 	 */
875 	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
876 	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
877 
878 	/*
879 	 * Note:
880 	 *
881 	 * The rollback labels rb_xyz do not represent the cpr resume
882 	 * state when event 'xyz' has happened. Instead they represent
883 	 * the state during cpr suspend when event 'xyz' was being
884 	 * entered (and where cpr suspend failed). The actual call that
885 	 * failed may also need to be partially rolled back, since they
886 	 * aren't atomic in most cases.  In other words, rb_xyz means
887 	 * "roll back all cpr suspend events that happened before 'xyz',
888 	 * and the one that caused the failure, if necessary."
889 	 */
890 	switch (CPR->c_substate) {
891 #if defined(__sparc)
892 	case C_ST_DUMP:
893 		/*
894 		 * This is most likely a full-fledged cpr_resume after
895 		 * a complete and successful cpr suspend. Just roll back
896 		 * everything.
897 		 */
898 		ASSERT(sleeptype == CPR_TODISK);
899 		break;
900 
901 	case C_ST_REUSABLE:
902 	case C_ST_DUMP_NOSPC:
903 	case C_ST_SETPROPS_0:
904 	case C_ST_SETPROPS_1:
905 		/*
906 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
907 		 * special switch cases here. The other two do not have
908 		 * any state change during cpr_suspend() that needs to
909 		 * be rolled back. But these are exit points from
910 		 * cpr_suspend, so theoretically (or in the future), it
911 		 * is possible that a need for roll back of a state
912 		 * change arises between these exit points.
913 		 */
914 		ASSERT(sleeptype == CPR_TODISK);
915 		goto rb_dump;
916 #endif
917 
918 	case C_ST_NODUMP:
919 		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
920 		goto rb_nodump;
921 
922 	case C_ST_STOP_KERNEL_THREADS:
923 		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
924 		goto rb_stop_kernel_threads;
925 
926 	case C_ST_SUSPEND_DEVICES:
927 		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
928 		goto rb_suspend_devices;
929 
930 #if defined(__sparc)
931 	case C_ST_STATEF_ALLOC:
932 		ASSERT(sleeptype == CPR_TODISK);
933 		goto rb_statef_alloc;
934 
935 	case C_ST_DISABLE_UFS_LOGGING:
936 		ASSERT(sleeptype == CPR_TODISK);
937 		goto rb_disable_ufs_logging;
938 #endif
939 
940 	case C_ST_PM_REATTACH_NOINVOL:
941 		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
942 		goto rb_pm_reattach_noinvol;
943 
944 	case C_ST_STOP_USER_THREADS:
945 		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
946 		goto rb_stop_user_threads;
947 
948 #if defined(__sparc)
949 	case C_ST_MP_OFFLINE:
950 		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
951 		goto rb_mp_offline;
952 #endif
953 
954 #if defined(__x86)
955 	case C_ST_MP_PAUSED:
956 		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
957 		goto rb_mp_paused;
958 #endif
959 
960 
961 	default:
962 		PMD(PMD_SX, ("cpr_resume: others\n"))
963 		goto rb_others;
964 	}
965 
966 rb_all:
967 	/*
968 	 * perform platform-dependent initialization
969 	 */
970 	if (cpr_suspend_succeeded)
971 		i_cpr_machdep_setup();
972 
973 	/*
974 	 * system did not really go down if we jump here
975 	 */
976 rb_dump:
977 	/*
978 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
979 	 *
980 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
981 	 */
982 rb_nodump:
983 	/*
984 	 * If we did suspend to RAM, we didn't generate a dump
985 	 */
986 	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
987 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
988 	if (cpr_suspend_succeeded) {
989 		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
990 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
991 	}
992 
993 	prom_resume_prepost();
994 #if !defined(__sparc)
995 	/*
996 	 * Need to sync the software clock with the hardware clock.
997 	 * On Sparc, this occurs in the sparc-specific cbe.  However
998 	 * on x86 this needs to be handled _before_ we bring other cpu's
999 	 * back online.  So we call a resume function in timestamp.c
1000 	 */
1001 	if (tsc_resume_in_cyclic == 0)
1002 		tsc_resume();
1003 
1004 #endif
1005 
1006 #if defined(__sparc)
1007 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
1008 		kdi_dvec_cpr_restart();
1009 #endif
1010 
1011 
1012 #if defined(__x86)
1013 rb_mp_paused:
1014 	PT(PT_RMPO);
1015 	PMD(PMD_SX, ("resume aux cpus\n"))
1016 
1017 	if (cpr_suspend_succeeded) {
1018 		cpr_resume_cpus();
1019 	} else {
1020 		cpr_unpause_cpus();
1021 	}
1022 #endif
1023 
1024 	/*
1025 	 * let the tmp callout catch up.
1026 	 */
1027 	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
1028 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
1029 
1030 	i_cpr_enable_intr();
1031 
1032 	mutex_enter(&cpu_lock);
1033 	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
1034 	cyclic_resume();
1035 	mutex_exit(&cpu_lock);
1036 
1037 	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
1038 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
1039 
1040 	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
1041 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
1042 
1043 	/*
1044 	 * statistics gathering
1045 	 */
1046 	if (cpr_suspend_succeeded) {
1047 		/*
1048 		 * Prevent false alarm in tod_validate() due to tod
1049 		 * value change between suspend and resume
1050 		 */
1051 		cpr_tod_fault_reset();
1052 
1053 		cpr_convert_promtime(&pwron_tv);
1054 
1055 		ctp = &cpr_term.tm_shutdown;
1056 		if (sleeptype == CPR_TODISK)
1057 			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
1058 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
1059 
1060 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
1061 
1062 		str = "  prom time";
1063 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
1064 		ctp = &cpr_term.tm_cprboot_start;
1065 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1066 
1067 		str = "  read statefile";
1068 		CPR_STAT_EVENT_START_TMZ(str, ctp);
1069 		ctp = &cpr_term.tm_cprboot_end;
1070 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1071 	}
1072 
1073 rb_stop_kernel_threads:
1074 	/*
1075 	 * Put all threads back to where they belong; get the kernel
1076 	 * daemons straightened up too. Note that the callback table
1077 	 * locked during cpr_stop_kernel_threads() is released only
1078 	 * in cpr_start_kernel_threads(). Ensure modunloading is
1079 	 * disabled before starting kernel threads, we don't want
1080 	 * modunload thread to start changing device tree underneath.
1081 	 */
1082 	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
1083 	modunload_disable();
1084 	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
1085 	cpr_start_kernel_threads();
1086 
1087 rb_suspend_devices:
1088 	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
1089 	CPR_STAT_EVENT_START("  start drivers");
1090 
1091 	PMD(PMD_SX,
1092 	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1093 	    cpr_resume_uniproc))
1094 
1095 #if defined(__x86)
1096 	/*
1097 	 * If cpr_resume_uniproc is set, then pause all the other cpus
1098 	 * apart from the current cpu, so that broken drivers that think
1099 	 * that they are on a uniprocessor machine will resume
1100 	 */
1101 	if (cpr_resume_uniproc) {
1102 		mutex_enter(&cpu_lock);
1103 		pause_cpus(NULL);
1104 		mutex_exit(&cpu_lock);
1105 	}
1106 #endif
1107 
1108 	/*
1109 	 * The policy here is to continue resume everything we can if we did
1110 	 * not successfully finish suspend; and panic if we are coming back
1111 	 * from a fully suspended system.
1112 	 */
1113 	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
1114 	rc = cpr_resume_devices(ddi_root_node(), 0);
1115 
1116 	cpr_sae(0);
1117 
1118 	str = "Failed to resume one or more devices.";
1119 
1120 	if (rc) {
1121 		if (CPR->c_substate == C_ST_DUMP ||
1122 		    (sleeptype == CPR_TORAM &&
1123 		    CPR->c_substate == C_ST_NODUMP)) {
1124 			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1125 				PMD(PMD_SX, ("cpr_resume: resume device "
1126 				    "warn\n"))
1127 				cpr_err(CE_WARN, str);
1128 			} else {
1129 				PMD(PMD_SX, ("cpr_resume: resume device "
1130 				    "panic\n"))
1131 				cpr_err(CE_PANIC, str);
1132 			}
1133 		} else {
1134 			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1135 			cpr_err(CE_WARN, str);
1136 		}
1137 	}
1138 
1139 	CPR_STAT_EVENT_END("  start drivers");
1140 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1141 
1142 #if defined(__x86)
1143 	/*
1144 	 * If cpr_resume_uniproc is set, then unpause all the processors
1145 	 * that were paused before resuming the drivers
1146 	 */
1147 	if (cpr_resume_uniproc) {
1148 		mutex_enter(&cpu_lock);
1149 		start_cpus();
1150 		mutex_exit(&cpu_lock);
1151 	}
1152 #endif
1153 
1154 	/*
1155 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
1156 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
1157 	 * modunloading now.
1158 	 */
1159 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1160 		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
1161 		modunload_enable();
1162 	}
1163 
1164 	/*
1165 	 * Hooks needed by lock manager prior to resuming.
1166 	 * Refer to code for more comments.
1167 	 */
1168 	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
1169 	cpr_lock_mgr(lm_cprresume);
1170 
1171 #if defined(__sparc)
1172 	/*
1173 	 * This is a partial (half) resume during cpr suspend, we
1174 	 * haven't yet given up on the suspend. On return from here,
1175 	 * cpr_suspend() will try to reallocate and retry the suspend.
1176 	 */
1177 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
1178 		return (0);
1179 	}
1180 
1181 	if (sleeptype == CPR_TODISK) {
1182 rb_statef_alloc:
1183 		cpr_statef_close();
1184 
1185 rb_disable_ufs_logging:
1186 		/*
1187 		 * if ufs logging was disabled, re-enable
1188 		 */
1189 		(void) cpr_ufs_logging(1);
1190 	}
1191 #endif
1192 
1193 rb_pm_reattach_noinvol:
1194 	/*
1195 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
1196 	 * remain disabled until after cpr suspend passes the
1197 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
1198 	 * cpr suspend reaches this state, we'll need to enable modunload
1199 	 * thread during rollback.
1200 	 */
1201 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
1202 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
1203 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
1204 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1205 		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
1206 		pm_reattach_noinvol_fini();
1207 	}
1208 
1209 	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
1210 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1211 	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
1212 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
1213 
1214 	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
1215 	pm_restore_direct_levels();
1216 
1217 rb_stop_user_threads:
1218 	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1219 	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
1220 	cpr_start_user_threads();
1221 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1222 	/*
1223 	 * Ask Xorg to resume the frame buffer, and wait for it to happen
1224 	 */
1225 	mutex_enter(&srn_clone_lock);
1226 	if (srn_signal) {
1227 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1228 		    "SRN_NORMAL_RESUME)\n"))
1229 		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
1230 		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1231 		srn_inuse = 0;
1232 	} else {
1233 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1234 	}
1235 	mutex_exit(&srn_clone_lock);
1236 
1237 #if defined(__sparc)
1238 rb_mp_offline:
1239 	if (cpr_mp_online())
1240 		cpr_err(CE_WARN, "Failed to online all the processors.");
1241 #endif
1242 
1243 rb_others:
1244 	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1245 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1246 	    PM_DEP_WAIT, NULL, 0);
1247 
1248 	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
1249 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
1250 
1251 	if (cpr_suspend_succeeded) {
1252 		cpr_stat_record_events();
1253 	}
1254 
1255 #if defined(__sparc)
1256 	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
1257 		cpr_clear_definfo();
1258 #endif
1259 
1260 	i_cpr_free_cpus();
1261 	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1262 	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
1263 	cpr_signal_user(SIGTHAW);
1264 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1265 
1266 	CPR_STAT_EVENT_END("Resume Total");
1267 
1268 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
1269 	CPR_STAT_EVENT_END("WHOLE CYCLE");
1270 
1271 	if (cpr_debug & CPR_DEBUG1)
1272 		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
1273 
1274 	CPR_STAT_EVENT_START("POST CPR DELAY");
1275 
1276 #ifdef CPR_STAT
1277 	ctp = &cpr_term.tm_shutdown;
1278 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
1279 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
1280 
1281 	CPR_STAT_EVENT_PRINT();
1282 #endif /* CPR_STAT */
1283 
1284 	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
1285 	return (rc);
1286 }
1287 
1288 static void
1289 cpr_suspend_init(int sleeptype)
1290 {
1291 	cpr_time_t *ctp;
1292 
1293 	cpr_stat_init();
1294 
1295 	/*
1296 	 * If cpr_suspend() failed before cpr_dump() gets a chance
1297 	 * to reinitialize the terminator of the statefile,
1298 	 * the values of the old terminator will still linger around.
1299 	 * Since the terminator contains information that we need to
1300 	 * decide whether suspend succeeded or not, we need to
1301 	 * reinitialize it as early as possible.
1302 	 */
1303 	cpr_term.real_statef_size = 0;
1304 	ctp = &cpr_term.tm_shutdown;
1305 	bzero(ctp, sizeof (*ctp));
1306 	ctp = &cpr_term.tm_cprboot_start;
1307 	bzero(ctp, sizeof (*ctp));
1308 	ctp = &cpr_term.tm_cprboot_end;
1309 	bzero(ctp, sizeof (*ctp));
1310 
1311 	if (sleeptype == CPR_TODISK) {
1312 		/*
1313 		 * Lookup the physical address of our thread structure.
1314 		 * This should never be invalid and the entire thread structure
1315 		 * is expected to reside within the same pfn.
1316 		 */
1317 		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1318 		ASSERT(curthreadpfn != PFN_INVALID);
1319 		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1320 		    (caddr_t)curthread + sizeof (kthread_t) - 1));
1321 	}
1322 
1323 	cpr_suspend_succeeded = 0;
1324 }
1325 
1326 /*
1327  * bring all the offline cpus online
1328  */
1329 static int
1330 cpr_all_online(void)
1331 {
1332 	int	rc = 0;
1333 
1334 #ifdef	__sparc
1335 	/*
1336 	 * do nothing
1337 	 */
1338 #else
1339 
1340 	cpu_t	*cp;
1341 
1342 	ASSERT(MUTEX_HELD(&cpu_lock));
1343 
1344 	cp = cpu_list;
1345 	do {
1346 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1347 		if (!CPU_ACTIVE(cp)) {
1348 			if ((rc = cpu_online(cp)) != 0)
1349 				break;
1350 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1351 		}
1352 	} while ((cp = cp->cpu_next) != cpu_list);
1353 
1354 	if (rc) {
1355 		/*
1356 		 * an online operation failed so offline the cpus
1357 		 * that were onlined above to restore the system
1358 		 * to its original state
1359 		 */
1360 		cpr_restore_offline();
1361 	}
1362 #endif
1363 	return (rc);
1364 }
1365 
1366 /*
1367  * offline all the cpus that were brought online by cpr_all_online()
1368  */
1369 static void
1370 cpr_restore_offline(void)
1371 {
1372 
1373 #ifdef	__sparc
1374 	/*
1375 	 * do nothing
1376 	 */
1377 #else
1378 
1379 	cpu_t	*cp;
1380 	int	rc = 0;
1381 
1382 	ASSERT(MUTEX_HELD(&cpu_lock));
1383 
1384 	cp = cpu_list;
1385 	do {
1386 		if (CPU_CPR_IS_ONLINE(cp)) {
1387 			rc =  cpu_offline(cp, 0);
1388 			/*
1389 			 * this offline should work, since the cpu was
1390 			 * offline originally and was successfully onlined
1391 			 * by cpr_all_online()
1392 			 */
1393 			ASSERT(rc == 0);
1394 			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1395 		}
1396 	} while ((cp = cp->cpu_next) != cpu_list);
1397 
1398 #endif
1399 
1400 }
1401