xref: /titanic_52/usr/src/uts/common/cpr/cpr_main.c (revision 6a45aeb4299937971b2d4ebd68553ee5a39fe913)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This module contains the guts of checkpoint-resume mechanism.
28  * All code in this module is platform independent.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/callb.h>
34 #include <sys/processor.h>
35 #include <sys/machsystm.h>
36 #include <sys/clock.h>
37 #include <sys/vfs.h>
38 #include <sys/kmem.h>
39 #include <nfs/lm.h>
40 #include <sys/systm.h>
41 #include <sys/cpr.h>
42 #include <sys/bootconf.h>
43 #include <sys/cyclic.h>
44 #include <sys/filio.h>
45 #include <sys/fs/ufs_filio.h>
46 #include <sys/epm.h>
47 #include <sys/modctl.h>
48 #include <sys/reboot.h>
49 #include <sys/kdi.h>
50 #include <sys/promif.h>
51 #include <sys/srn.h>
52 #include <sys/cpr_impl.h>
53 
54 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
55 
56 extern struct cpr_terminator cpr_term;
57 
58 extern int cpr_alloc_statefile(int);
59 extern void cpr_start_kernel_threads(void);
60 extern void cpr_abbreviate_devpath(char *, char *);
61 extern void cpr_convert_promtime(cpr_time_t *);
62 extern void cpr_send_notice(void);
63 extern void cpr_set_bitmap_size(void);
64 extern void cpr_stat_init();
65 extern void cpr_statef_close(void);
66 extern void flush_windows(void);
67 extern void (*srn_signal)(int, int);
68 extern void init_cpu_syscall(struct cpu *);
69 extern void i_cpr_pre_resume_cpus();
70 extern void i_cpr_post_resume_cpus();
71 extern int cpr_is_ufs(struct vfs *);
72 
73 extern int pm_powering_down;
74 extern kmutex_t srn_clone_lock;
75 extern int srn_inuse;
76 
77 static int cpr_suspend(int);
78 static int cpr_resume(int);
79 static void cpr_suspend_init(int);
80 #if defined(__x86)
81 static int cpr_suspend_cpus(void);
82 static void cpr_resume_cpus(void);
83 #endif
84 static int cpr_all_online(void);
85 static void cpr_restore_offline(void);
86 
87 cpr_time_t wholecycle_tv;
88 int cpr_suspend_succeeded;
89 pfn_t curthreadpfn;
90 int curthreadremapped;
91 
92 extern cpuset_t cpu_ready_set;
93 extern void *(*cpu_pause_func)(void *);
94 
95 extern processorid_t i_cpr_bootcpuid(void);
96 extern cpu_t *i_cpr_bootcpu(void);
97 extern void tsc_adjust_delta(hrtime_t tdelta);
98 extern void tsc_resume(void);
99 extern int tsc_resume_in_cyclic;
100 
101 /*
102  * Set this variable to 1, to have device drivers resume in an
103  * uniprocessor environment. This is to allow drivers that assume
104  * that they resume on a UP machine to continue to work. Should be
105  * deprecated once the broken drivers are fixed
106  */
107 int cpr_resume_uniproc = 0;
108 
109 /*
110  * save or restore abort_enable;  this prevents a drop
111  * to kadb or prom during cpr_resume_devices() when
112  * there is no kbd present;  see abort_sequence_enter()
113  */
114 static void
115 cpr_sae(int stash)
116 {
117 	static int saved_ae = -1;
118 
119 	if (stash) {
120 		saved_ae = abort_enable;
121 		abort_enable = 0;
122 	} else if (saved_ae != -1) {
123 		abort_enable = saved_ae;
124 		saved_ae = -1;
125 	}
126 }
127 
128 
129 /*
130  * The main switching point for cpr, this routine starts the ckpt
131  * and state file saving routines; on resume the control is
132  * returned back to here and it then calls the resume routine.
133  */
134 int
135 cpr_main(int sleeptype)
136 {
137 	int rc, rc2;
138 	label_t saveq;
139 	klwp_t *tlwp = ttolwp(curthread);
140 
141 	if (sleeptype == CPR_TODISK) {
142 		if ((rc = cpr_default_setup(1)) != 0)
143 			return (rc);
144 		ASSERT(tlwp);
145 		saveq = tlwp->lwp_qsav;
146 	}
147 
148 	if (sleeptype == CPR_TORAM) {
149 		rc = cpr_suspend(sleeptype);
150 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
151 		if (rc == 0) {
152 			int i_cpr_power_down(int sleeptype);
153 
154 			/*
155 			 * From this point on, we should be at a high
156 			 * spl, interrupts disabled, and all but one
157 			 * cpu's paused (effectively UP/single threaded).
158 			 * So this is were we want to put ASSERTS()
159 			 * to let us know otherwise.
160 			 */
161 			ASSERT(cpus_paused());
162 
163 			/*
164 			 * Now do the work of actually putting this
165 			 * machine to sleep!
166 			 */
167 			rc = i_cpr_power_down(sleeptype);
168 			if (rc == 0) {
169 				PMD(PMD_SX, ("back from succssful suspend\n"))
170 			}
171 			/*
172 			 * We do care about the return value from cpr_resume
173 			 * at this point, as it will tell us if one of the
174 			 * resume functions failed (cpr_resume_devices())
175 			 * However, for this to return and _not_ panic, means
176 			 * that we must be in one of the test functions.  So
177 			 * check for that and return an appropriate message.
178 			 */
179 			rc2 = cpr_resume(sleeptype);
180 			if (rc2 != 0) {
181 				ASSERT(cpr_test_point > 0);
182 				cmn_err(CE_NOTE,
183 				    "cpr_resume returned non-zero: %d\n", rc2);
184 				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
185 			}
186 			ASSERT(!cpus_paused());
187 		} else {
188 			PMD(PMD_SX, ("failed suspend, resuming\n"))
189 			rc = cpr_resume(sleeptype);
190 		}
191 		return (rc);
192 	}
193 	/*
194 	 * Remember where we are for resume after reboot
195 	 */
196 	if (!setjmp(&tlwp->lwp_qsav)) {
197 		/*
198 		 * try to checkpoint the system, if failed return back
199 		 * to userland, otherwise power off.
200 		 */
201 		rc = cpr_suspend(sleeptype);
202 		if (rc || cpr_reusable_mode) {
203 			/*
204 			 * We don't really want to go down, or
205 			 * something went wrong in suspend, do what we can
206 			 * to put the system back to an operable state then
207 			 * return back to userland.
208 			 */
209 			PMD(PMD_SX, ("failed suspend, resuming\n"))
210 			(void) cpr_resume(sleeptype);
211 			PMD(PMD_SX, ("back from failed suspend resume\n"))
212 		}
213 	} else {
214 		/*
215 		 * This is the resumed side of longjmp, restore the previous
216 		 * longjmp pointer if there is one so this will be transparent
217 		 * to the world.
218 		 * This path is only for CPR_TODISK, where we reboot
219 		 */
220 		ASSERT(sleeptype == CPR_TODISK);
221 		tlwp->lwp_qsav = saveq;
222 		CPR->c_flags &= ~C_SUSPENDING;
223 		CPR->c_flags |= C_RESUMING;
224 
225 		/*
226 		 * resume the system back to the original state
227 		 */
228 		rc = cpr_resume(sleeptype);
229 		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
230 		    rc))
231 	}
232 
233 	(void) cpr_default_setup(0);
234 
235 	return (rc);
236 }
237 
238 
239 #if defined(__sparc)
240 
241 /*
242  * check/disable or re-enable UFS logging
243  */
244 static void
245 cpr_log_status(int enable, int *svstat, vnode_t *vp)
246 {
247 	int cmd, status, error;
248 	char *str, *able;
249 	fiolog_t fl;
250 	refstr_t *mntpt;
251 
252 	str = "cpr_log_status";
253 	bzero(&fl, sizeof (fl));
254 	fl.error = FIOLOG_ENONE;
255 
256 	/*
257 	 * when disabling, first get and save logging status (0 or 1)
258 	 */
259 	if (enable == 0) {
260 		if (error = VOP_IOCTL(vp, _FIOISLOG,
261 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
262 			mntpt = vfs_getmntpoint(vp->v_vfsp);
263 			prom_printf("%s: \"%s\", cant get logging "
264 			    "status, error %d\n", str, refstr_value(mntpt),
265 			    error);
266 			refstr_rele(mntpt);
267 			return;
268 		}
269 		*svstat = status;
270 		if (cpr_debug & CPR_DEBUG5) {
271 			mntpt = vfs_getmntpoint(vp->v_vfsp);
272 			errp("%s: \"%s\", logging status = %d\n",
273 			    str, refstr_value(mntpt), status);
274 			refstr_rele(mntpt);
275 		};
276 
277 		able = "disable";
278 		cmd = _FIOLOGDISABLE;
279 	} else {
280 		able = "enable";
281 		cmd = _FIOLOGENABLE;
282 	}
283 
284 	/*
285 	 * disable or re-enable logging when the saved status is 1
286 	 */
287 	if (*svstat == 1) {
288 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
289 		    FKIOCTL, CRED(), NULL, NULL);
290 		if (error) {
291 			mntpt = vfs_getmntpoint(vp->v_vfsp);
292 			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
293 			    str, refstr_value(mntpt), able, error);
294 			refstr_rele(mntpt);
295 		} else {
296 			if (cpr_debug & CPR_DEBUG5) {
297 				mntpt = vfs_getmntpoint(vp->v_vfsp);
298 				errp("%s: \"%s\", logging is now %sd\n",
299 				    str, refstr_value(mntpt), able);
300 				refstr_rele(mntpt);
301 			};
302 		}
303 	}
304 
305 	/*
306 	 * when enabling logging, reset the saved status
307 	 * to unknown for next time
308 	 */
309 	if (enable)
310 		*svstat = -1;
311 }
312 
313 /*
314  * enable/disable UFS logging on filesystems containing cpr_default_path
315  * and cpr statefile.  since the statefile can be on any fs, that fs
316  * needs to be handled separately.  this routine and cprboot expect that
317  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
318  * is loaded from the device with rootfs and uses the same device to open
319  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
320  * file outside of rootfs would cause errors during cprboot, plus cpr and
321  * fsck problems with the new fs if logging were enabled.
322  */
323 
324 static int
325 cpr_ufs_logging(int enable)
326 {
327 	static int def_status = -1, sf_status = -1;
328 	struct vfs *vfsp;
329 	char *fname;
330 	vnode_t *vp;
331 	int error;
332 
333 	if (cpr_reusable_mode)
334 		return (0);
335 
336 	if (error = cpr_open_deffile(FREAD, &vp))
337 		return (error);
338 	vfsp = vp->v_vfsp;
339 	if (!cpr_is_ufs(vfsp)) {
340 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
341 		VN_RELE(vp);
342 		return (0);
343 	}
344 
345 	cpr_log_status(enable, &def_status, vp);
346 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
347 	VN_RELE(vp);
348 
349 	fname = cpr_build_statefile_path();
350 	if (fname == NULL)
351 		return (ENOENT);
352 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
353 	    0600, &vp, CRCREAT, 0)) {
354 		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
355 		    "error %d\n", fname, error);
356 		return (error);
357 	}
358 
359 	/*
360 	 * check logging status for the statefile if it resides
361 	 * on a different fs and the type is a regular file
362 	 */
363 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
364 		cpr_log_status(enable, &sf_status, vp);
365 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
366 	VN_RELE(vp);
367 
368 	return (0);
369 }
370 #endif
371 
372 
373 /*
374  * Check if klmmod is loaded and call a lock manager service; if klmmod
375  * is not loaded, the services aren't needed and a call would trigger a
376  * modload, which would block since another thread would never run.
377  */
378 static void
379 cpr_lock_mgr(void (*service)(void))
380 {
381 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
382 		(*service)();
383 }
384 
385 int
386 cpr_suspend_cpus(void)
387 {
388 	int	ret = 0;
389 	extern void *i_cpr_save_context(void *arg);
390 
391 	mutex_enter(&cpu_lock);
392 
393 	/*
394 	 * the machine could not have booted without a bootcpu
395 	 */
396 	ASSERT(i_cpr_bootcpu() != NULL);
397 
398 	/*
399 	 * bring all the offline cpus online
400 	 */
401 	if ((ret = cpr_all_online())) {
402 		mutex_exit(&cpu_lock);
403 		return (ret);
404 	}
405 
406 	/*
407 	 * Set the affinity to be the boot processor
408 	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
409 	 */
410 	affinity_set(i_cpr_bootcpuid());
411 
412 	ASSERT(CPU->cpu_id == 0);
413 
414 	PMD(PMD_SX, ("curthread running on bootcpu\n"))
415 
416 	/*
417 	 * pause all other running CPUs and save the CPU state at the sametime
418 	 */
419 	cpu_pause_func = i_cpr_save_context;
420 	pause_cpus(NULL);
421 
422 	mutex_exit(&cpu_lock);
423 
424 	return (0);
425 }
426 
427 /*
428  * Take the system down to a checkpointable state and write
429  * the state file, the following are sequentially executed:
430  *
431  *    - Request all user threads to stop themselves
432  *    - push out and invalidate user pages
433  *    - bring statefile inode incore to prevent a miss later
434  *    - request all daemons to stop
435  *    - check and make sure all threads are stopped
436  *    - sync the file system
437  *    - suspend all devices
438  *    - block intrpts
439  *    - dump system state and memory to state file
440  *    - SPARC code will not be called with CPR_TORAM, caller filters
441  */
442 static int
443 cpr_suspend(int sleeptype)
444 {
445 #if defined(__sparc)
446 	int sf_realloc, nverr;
447 #endif
448 	int	rc = 0;
449 	int	skt_rc = 0;
450 
451 	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
452 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
453 
454 	cpr_suspend_init(sleeptype);
455 
456 	cpr_save_time();
457 
458 	cpr_tod_get(&wholecycle_tv);
459 	CPR_STAT_EVENT_START("Suspend Total");
460 
461 	i_cpr_alloc_cpus();
462 
463 #if defined(__sparc)
464 	ASSERT(sleeptype == CPR_TODISK);
465 	if (!cpr_reusable_mode) {
466 		/*
467 		 * We need to validate default file before fs
468 		 * functionality is disabled.
469 		 */
470 		if (rc = cpr_validate_definfo(0))
471 			return (rc);
472 	}
473 	i_cpr_save_machdep_info();
474 #endif
475 
476 	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
477 	/* Stop PM scans ASAP */
478 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
479 
480 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
481 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
482 
483 #if defined(__sparc)
484 	ASSERT(sleeptype == CPR_TODISK);
485 	cpr_set_substate(C_ST_MP_OFFLINE);
486 	if (rc = cpr_mp_offline())
487 		return (rc);
488 #endif
489 	/*
490 	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
491 	 */
492 	mutex_enter(&srn_clone_lock);
493 	if (srn_signal) {
494 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
495 		    "SRN_SUSPEND_REQ)\n"))
496 		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
497 		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
498 		srn_inuse = 0;
499 	} else {
500 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
501 	}
502 	mutex_exit(&srn_clone_lock);
503 
504 	/*
505 	 * Ask the user threads to stop by themselves, but
506 	 * if they don't or can't after 3 retries, we give up on CPR.
507 	 * The 3 retry is not a random number because 2 is possible if
508 	 * a thread has been forked before the parent thread is stopped.
509 	 */
510 	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
511 	CPR_STAT_EVENT_START("  stop users");
512 	cpr_set_substate(C_ST_STOP_USER_THREADS);
513 	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
514 	if (rc = cpr_stop_user_threads())
515 		return (rc);
516 	CPR_STAT_EVENT_END("  stop users");
517 	CPR_DEBUG(CPR_DEBUG1, "done\n");
518 
519 	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
520 	pm_save_direct_levels();
521 
522 	/*
523 	 * User threads are stopped.  We will start communicating with the
524 	 * user via prom_printf (some debug output may have already happened)
525 	 * so let anybody who cares know about this (bug 4096122)
526 	 */
527 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
528 
529 	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
530 #ifndef DEBUG
531 	cpr_send_notice();
532 	if (cpr_debug)
533 		prom_printf("\n");
534 #endif
535 
536 	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
537 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
538 
539 	/*
540 	 * Reattach any drivers which originally exported the
541 	 * no-involuntary-power-cycles property.  We need to do this before
542 	 * stopping kernel threads because modload is implemented using
543 	 * a kernel thread.
544 	 */
545 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
546 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
547 	if (!pm_reattach_noinvol())
548 		return (ENXIO);
549 
550 #if defined(__sparc)
551 	ASSERT(sleeptype == CPR_TODISK);
552 	/*
553 	 * if ufs logging is enabled, we need to disable before
554 	 * stopping kernel threads so that ufs delete and roll
555 	 * threads can do the work.
556 	 */
557 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
558 	if (rc = cpr_ufs_logging(0))
559 		return (rc);
560 
561 	/*
562 	 * Use sync_all to swap out all user pages and find out how much
563 	 * extra space needed for user pages that don't have back store
564 	 * space left.
565 	 */
566 	CPR_STAT_EVENT_START("  swapout upages");
567 	vfs_sync(SYNC_ALL);
568 	CPR_STAT_EVENT_END("  swapout upages");
569 
570 	cpr_set_bitmap_size();
571 
572 alloc_statefile:
573 	/*
574 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
575 	 * realloc the statefile, otherwise this is the first attempt.
576 	 */
577 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
578 
579 	CPR_STAT_EVENT_START("  alloc statefile");
580 	cpr_set_substate(C_ST_STATEF_ALLOC);
581 	if (rc = cpr_alloc_statefile(sf_realloc)) {
582 		if (sf_realloc)
583 			errp("realloc failed\n");
584 		return (rc);
585 	}
586 	CPR_STAT_EVENT_END("  alloc statefile");
587 
588 	/*
589 	 * Sync the filesystem to preserve its integrity.
590 	 *
591 	 * This sync is also used to flush out all B_DELWRI buffers
592 	 * (fs cache) which are mapped and neither dirty nor referenced
593 	 * before cpr_invalidate_pages destroys them.
594 	 * fsflush does similar thing.
595 	 */
596 	sync();
597 
598 	/*
599 	 * destroy all clean file mapped kernel pages
600 	 */
601 	CPR_STAT_EVENT_START("  clean pages");
602 	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
603 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
604 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
605 	CPR_STAT_EVENT_END("  clean pages");
606 #endif
607 
608 
609 	/*
610 	 * Hooks needed by lock manager prior to suspending.
611 	 * Refer to code for more comments.
612 	 */
613 	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
614 	cpr_lock_mgr(lm_cprsuspend);
615 
616 	/*
617 	 * Now suspend all the devices
618 	 */
619 	CPR_STAT_EVENT_START("  stop drivers");
620 	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
621 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
622 	pm_powering_down = 1;
623 	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
624 	rc = cpr_suspend_devices(ddi_root_node());
625 	pm_powering_down = 0;
626 	if (rc)
627 		return (rc);
628 	CPR_DEBUG(CPR_DEBUG1, "done\n");
629 	CPR_STAT_EVENT_END("  stop drivers");
630 
631 	/*
632 	 * Stop all daemon activities
633 	 */
634 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
635 	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
636 	if (skt_rc = cpr_stop_kernel_threads())
637 		return (skt_rc);
638 
639 	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
640 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
641 
642 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
643 	pm_reattach_noinvol_fini();
644 
645 	cpr_sae(1);
646 
647 	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
648 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
649 
650 	if (sleeptype == CPR_TODISK) {
651 		/*
652 		 * It's safer to do tod_get before we disable all intr.
653 		 */
654 		CPR_STAT_EVENT_START("  write statefile");
655 	}
656 
657 	/*
658 	 * it's time to ignore the outside world, stop the real time
659 	 * clock and disable any further intrpt activity.
660 	 */
661 	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
662 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
663 
664 	mutex_enter(&cpu_lock);
665 	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
666 	cyclic_suspend();
667 	mutex_exit(&cpu_lock);
668 
669 	/*
670 	 * Due to the different methods of resuming the system between
671 	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
672 	 * and CPR_TORAM (restart via reset into existing kernel image)
673 	 * cpus are not suspended and restored in the SPARC case, since it
674 	 * is necessary to restart the cpus and pause them before restoring
675 	 * the OBP image
676 	 */
677 
678 #if defined(__x86)
679 
680 	/* pause aux cpus */
681 	PMD(PMD_SX, ("pause aux cpus\n"))
682 
683 	cpr_set_substate(C_ST_MP_PAUSED);
684 
685 	if ((rc = cpr_suspend_cpus()) != 0)
686 		return (rc);
687 #endif
688 
689 	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
690 	i_cpr_stop_intr();
691 	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
692 
693 	/*
694 	 * Since we will now disable the mechanism that causes prom_printfs
695 	 * to power up (if needed) the console fb/monitor, we assert that
696 	 * it must be up now.
697 	 */
698 	ASSERT(pm_cfb_is_up());
699 	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
700 	prom_suspend_prepost();
701 
702 #if defined(__sparc)
703 	/*
704 	 * getting ready to write ourself out, flush the register
705 	 * windows to make sure that our stack is good when we
706 	 * come back on the resume side.
707 	 */
708 	flush_windows();
709 #endif
710 
711 	/*
712 	 * For S3, we're done
713 	 */
714 	if (sleeptype == CPR_TORAM) {
715 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
716 		cpr_set_substate(C_ST_NODUMP);
717 		return (rc);
718 	}
719 #if defined(__sparc)
720 	/*
721 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
722 	 *
723 	 * The system is quiesced at this point, we are ready to either dump
724 	 * to the state file for a extended sleep or a simple shutdown for
725 	 * systems with non-volatile memory.
726 	 */
727 
728 	/*
729 	 * special handling for reusable:
730 	 */
731 	if (cpr_reusable_mode) {
732 		cpr_set_substate(C_ST_SETPROPS_1);
733 		if (nverr = cpr_set_properties(1))
734 			return (nverr);
735 	}
736 
737 	cpr_set_substate(C_ST_DUMP);
738 	rc = cpr_dump(C_VP);
739 
740 	/*
741 	 * if any error occurred during dump, more
742 	 * special handling for reusable:
743 	 */
744 	if (rc && cpr_reusable_mode) {
745 		cpr_set_substate(C_ST_SETPROPS_0);
746 		if (nverr = cpr_set_properties(0))
747 			return (nverr);
748 	}
749 
750 	if (rc == ENOSPC) {
751 		cpr_set_substate(C_ST_DUMP_NOSPC);
752 		(void) cpr_resume(sleeptype);
753 		goto alloc_statefile;
754 	} else if (rc == 0) {
755 		if (cpr_reusable_mode) {
756 			cpr_set_substate(C_ST_REUSABLE);
757 			longjmp(&ttolwp(curthread)->lwp_qsav);
758 		} else
759 			rc = cpr_set_properties(1);
760 	}
761 #endif
762 	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
763 	return (rc);
764 }
765 
766 void
767 cpr_resume_cpus(void)
768 {
769 	/*
770 	 * this is a cut down version of start_other_cpus()
771 	 * just do the initialization to wake the other cpus
772 	 */
773 
774 #if defined(__x86)
775 	/*
776 	 * Initialize our syscall handlers
777 	 */
778 	init_cpu_syscall(CPU);
779 
780 #endif
781 
782 	i_cpr_pre_resume_cpus();
783 
784 	/*
785 	 * Restart the paused cpus
786 	 */
787 	mutex_enter(&cpu_lock);
788 	start_cpus();
789 	mutex_exit(&cpu_lock);
790 
791 	i_cpr_post_resume_cpus();
792 
793 	mutex_enter(&cpu_lock);
794 	/*
795 	 * Restore this cpu to use the regular cpu_pause(), so that
796 	 * online and offline will work correctly
797 	 */
798 	cpu_pause_func = NULL;
799 
800 	/*
801 	 * clear the affinity set in cpr_suspend_cpus()
802 	 */
803 	affinity_clear();
804 
805 	/*
806 	 * offline all the cpus that were brought online during suspend
807 	 */
808 	cpr_restore_offline();
809 
810 	mutex_exit(&cpu_lock);
811 }
812 
813 void
814 cpr_unpause_cpus(void)
815 {
816 	/*
817 	 * Now restore the system back to what it was before we suspended
818 	 */
819 
820 	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
821 
822 	mutex_enter(&cpu_lock);
823 
824 	/*
825 	 * Restore this cpu to use the regular cpu_pause(), so that
826 	 * online and offline will work correctly
827 	 */
828 	cpu_pause_func = NULL;
829 
830 	/*
831 	 * Restart the paused cpus
832 	 */
833 	start_cpus();
834 
835 	/*
836 	 * clear the affinity set in cpr_suspend_cpus()
837 	 */
838 	affinity_clear();
839 
840 	/*
841 	 * offline all the cpus that were brought online during suspend
842 	 */
843 	cpr_restore_offline();
844 
845 	mutex_exit(&cpu_lock);
846 }
847 
848 /*
849  * Bring the system back up from a checkpoint, at this point
850  * the VM has been minimally restored by boot, the following
851  * are executed sequentially:
852  *
853  *    - machdep setup and enable interrupts (mp startup if it's mp)
854  *    - resume all devices
855  *    - restart daemons
856  *    - put all threads back on run queue
857  */
858 static int
859 cpr_resume(int sleeptype)
860 {
861 	cpr_time_t pwron_tv, *ctp;
862 	char *str;
863 	int rc = 0;
864 
865 	/*
866 	 * The following switch is used to resume the system
867 	 * that was suspended to a different level.
868 	 */
869 	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
870 	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
871 
872 	/*
873 	 * Note:
874 	 *
875 	 * The rollback labels rb_xyz do not represent the cpr resume
876 	 * state when event 'xyz' has happened. Instead they represent
877 	 * the state during cpr suspend when event 'xyz' was being
878 	 * entered (and where cpr suspend failed). The actual call that
879 	 * failed may also need to be partially rolled back, since they
880 	 * aren't atomic in most cases.  In other words, rb_xyz means
881 	 * "roll back all cpr suspend events that happened before 'xyz',
882 	 * and the one that caused the failure, if necessary."
883 	 */
884 	switch (CPR->c_substate) {
885 #if defined(__sparc)
886 	case C_ST_DUMP:
887 		/*
888 		 * This is most likely a full-fledged cpr_resume after
889 		 * a complete and successful cpr suspend. Just roll back
890 		 * everything.
891 		 */
892 		ASSERT(sleeptype == CPR_TODISK);
893 		break;
894 
895 	case C_ST_REUSABLE:
896 	case C_ST_DUMP_NOSPC:
897 	case C_ST_SETPROPS_0:
898 	case C_ST_SETPROPS_1:
899 		/*
900 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
901 		 * special switch cases here. The other two do not have
902 		 * any state change during cpr_suspend() that needs to
903 		 * be rolled back. But these are exit points from
904 		 * cpr_suspend, so theoretically (or in the future), it
905 		 * is possible that a need for roll back of a state
906 		 * change arises between these exit points.
907 		 */
908 		ASSERT(sleeptype == CPR_TODISK);
909 		goto rb_dump;
910 #endif
911 
912 	case C_ST_NODUMP:
913 		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
914 		goto rb_nodump;
915 
916 	case C_ST_STOP_KERNEL_THREADS:
917 		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
918 		goto rb_stop_kernel_threads;
919 
920 	case C_ST_SUSPEND_DEVICES:
921 		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
922 		goto rb_suspend_devices;
923 
924 #if defined(__sparc)
925 	case C_ST_STATEF_ALLOC:
926 		ASSERT(sleeptype == CPR_TODISK);
927 		goto rb_statef_alloc;
928 
929 	case C_ST_DISABLE_UFS_LOGGING:
930 		ASSERT(sleeptype == CPR_TODISK);
931 		goto rb_disable_ufs_logging;
932 #endif
933 
934 	case C_ST_PM_REATTACH_NOINVOL:
935 		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
936 		goto rb_pm_reattach_noinvol;
937 
938 	case C_ST_STOP_USER_THREADS:
939 		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
940 		goto rb_stop_user_threads;
941 
942 #if defined(__sparc)
943 	case C_ST_MP_OFFLINE:
944 		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
945 		goto rb_mp_offline;
946 #endif
947 
948 #if defined(__x86)
949 	case C_ST_MP_PAUSED:
950 		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
951 		goto rb_mp_paused;
952 #endif
953 
954 
955 	default:
956 		PMD(PMD_SX, ("cpr_resume: others\n"))
957 		goto rb_others;
958 	}
959 
960 rb_all:
961 	/*
962 	 * perform platform-dependent initialization
963 	 */
964 	if (cpr_suspend_succeeded)
965 		i_cpr_machdep_setup();
966 
967 	/*
968 	 * system did not really go down if we jump here
969 	 */
970 rb_dump:
971 	/*
972 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
973 	 *
974 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
975 	 */
976 rb_nodump:
977 	/*
978 	 * If we did suspend to RAM, we didn't generate a dump
979 	 */
980 	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
981 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
982 	if (cpr_suspend_succeeded) {
983 		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
984 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
985 	}
986 
987 	prom_resume_prepost();
988 #if !defined(__sparc)
989 	/*
990 	 * Need to sync the software clock with the hardware clock.
991 	 * On Sparc, this occurs in the sparc-specific cbe.  However
992 	 * on x86 this needs to be handled _before_ we bring other cpu's
993 	 * back online.  So we call a resume function in timestamp.c
994 	 */
995 	if (tsc_resume_in_cyclic == 0)
996 		tsc_resume();
997 
998 #endif
999 
1000 #if defined(__sparc)
1001 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
1002 		kdi_dvec_cpr_restart();
1003 #endif
1004 
1005 
1006 #if defined(__x86)
1007 rb_mp_paused:
1008 	PT(PT_RMPO);
1009 	PMD(PMD_SX, ("resume aux cpus\n"))
1010 
1011 	if (cpr_suspend_succeeded) {
1012 		cpr_resume_cpus();
1013 	} else {
1014 		cpr_unpause_cpus();
1015 	}
1016 #endif
1017 
1018 	/*
1019 	 * let the tmp callout catch up.
1020 	 */
1021 	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
1022 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
1023 
1024 	i_cpr_enable_intr();
1025 
1026 	mutex_enter(&cpu_lock);
1027 	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
1028 	cyclic_resume();
1029 	mutex_exit(&cpu_lock);
1030 
1031 	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
1032 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
1033 
1034 	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
1035 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
1036 
1037 	/*
1038 	 * statistics gathering
1039 	 */
1040 	if (cpr_suspend_succeeded) {
1041 		/*
1042 		 * Prevent false alarm in tod_validate() due to tod
1043 		 * value change between suspend and resume
1044 		 */
1045 		cpr_tod_status_set(TOD_CPR_RESUME_DONE);
1046 
1047 		cpr_convert_promtime(&pwron_tv);
1048 
1049 		ctp = &cpr_term.tm_shutdown;
1050 		if (sleeptype == CPR_TODISK)
1051 			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
1052 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
1053 
1054 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
1055 
1056 		str = "  prom time";
1057 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
1058 		ctp = &cpr_term.tm_cprboot_start;
1059 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1060 
1061 		str = "  read statefile";
1062 		CPR_STAT_EVENT_START_TMZ(str, ctp);
1063 		ctp = &cpr_term.tm_cprboot_end;
1064 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1065 	}
1066 
1067 rb_stop_kernel_threads:
1068 	/*
1069 	 * Put all threads back to where they belong; get the kernel
1070 	 * daemons straightened up too. Note that the callback table
1071 	 * locked during cpr_stop_kernel_threads() is released only
1072 	 * in cpr_start_kernel_threads(). Ensure modunloading is
1073 	 * disabled before starting kernel threads, we don't want
1074 	 * modunload thread to start changing device tree underneath.
1075 	 */
1076 	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
1077 	modunload_disable();
1078 	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
1079 	cpr_start_kernel_threads();
1080 
1081 rb_suspend_devices:
1082 	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
1083 	CPR_STAT_EVENT_START("  start drivers");
1084 
1085 	PMD(PMD_SX,
1086 	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1087 	    cpr_resume_uniproc))
1088 
1089 #if defined(__x86)
1090 	/*
1091 	 * If cpr_resume_uniproc is set, then pause all the other cpus
1092 	 * apart from the current cpu, so that broken drivers that think
1093 	 * that they are on a uniprocessor machine will resume
1094 	 */
1095 	if (cpr_resume_uniproc) {
1096 		mutex_enter(&cpu_lock);
1097 		pause_cpus(NULL);
1098 		mutex_exit(&cpu_lock);
1099 	}
1100 #endif
1101 
1102 	/*
1103 	 * The policy here is to continue resume everything we can if we did
1104 	 * not successfully finish suspend; and panic if we are coming back
1105 	 * from a fully suspended system.
1106 	 */
1107 	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
1108 	rc = cpr_resume_devices(ddi_root_node(), 0);
1109 
1110 	cpr_sae(0);
1111 
1112 	str = "Failed to resume one or more devices.";
1113 
1114 	if (rc) {
1115 		if (CPR->c_substate == C_ST_DUMP ||
1116 		    (sleeptype == CPR_TORAM &&
1117 		    CPR->c_substate == C_ST_NODUMP)) {
1118 			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1119 				PMD(PMD_SX, ("cpr_resume: resume device "
1120 				    "warn\n"))
1121 				cpr_err(CE_WARN, str);
1122 			} else {
1123 				PMD(PMD_SX, ("cpr_resume: resume device "
1124 				    "panic\n"))
1125 				cpr_err(CE_PANIC, str);
1126 			}
1127 		} else {
1128 			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1129 			cpr_err(CE_WARN, str);
1130 		}
1131 	}
1132 
1133 	CPR_STAT_EVENT_END("  start drivers");
1134 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1135 
1136 #if defined(__x86)
1137 	/*
1138 	 * If cpr_resume_uniproc is set, then unpause all the processors
1139 	 * that were paused before resuming the drivers
1140 	 */
1141 	if (cpr_resume_uniproc) {
1142 		mutex_enter(&cpu_lock);
1143 		start_cpus();
1144 		mutex_exit(&cpu_lock);
1145 	}
1146 #endif
1147 
1148 	/*
1149 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
1150 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
1151 	 * modunloading now.
1152 	 */
1153 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1154 		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
1155 		modunload_enable();
1156 	}
1157 
1158 	/*
1159 	 * Hooks needed by lock manager prior to resuming.
1160 	 * Refer to code for more comments.
1161 	 */
1162 	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
1163 	cpr_lock_mgr(lm_cprresume);
1164 
1165 #if defined(__sparc)
1166 	/*
1167 	 * This is a partial (half) resume during cpr suspend, we
1168 	 * haven't yet given up on the suspend. On return from here,
1169 	 * cpr_suspend() will try to reallocate and retry the suspend.
1170 	 */
1171 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
1172 		return (0);
1173 	}
1174 
1175 	if (sleeptype == CPR_TODISK) {
1176 rb_statef_alloc:
1177 		cpr_statef_close();
1178 
1179 rb_disable_ufs_logging:
1180 		/*
1181 		 * if ufs logging was disabled, re-enable
1182 		 */
1183 		(void) cpr_ufs_logging(1);
1184 	}
1185 #endif
1186 
1187 rb_pm_reattach_noinvol:
1188 	/*
1189 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
1190 	 * remain disabled until after cpr suspend passes the
1191 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
1192 	 * cpr suspend reaches this state, we'll need to enable modunload
1193 	 * thread during rollback.
1194 	 */
1195 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
1196 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
1197 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
1198 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1199 		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
1200 		pm_reattach_noinvol_fini();
1201 	}
1202 
1203 	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
1204 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1205 	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
1206 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
1207 
1208 	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
1209 	pm_restore_direct_levels();
1210 
1211 rb_stop_user_threads:
1212 	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1213 	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
1214 	cpr_start_user_threads();
1215 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1216 	/*
1217 	 * Ask Xorg to resume the frame buffer, and wait for it to happen
1218 	 */
1219 	mutex_enter(&srn_clone_lock);
1220 	if (srn_signal) {
1221 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1222 		    "SRN_NORMAL_RESUME)\n"))
1223 		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
1224 		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1225 		srn_inuse = 0;
1226 	} else {
1227 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1228 	}
1229 	mutex_exit(&srn_clone_lock);
1230 
1231 #if defined(__sparc)
1232 rb_mp_offline:
1233 	if (cpr_mp_online())
1234 		cpr_err(CE_WARN, "Failed to online all the processors.");
1235 #endif
1236 
1237 rb_others:
1238 	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1239 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1240 	    PM_DEP_WAIT, NULL, 0);
1241 
1242 	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
1243 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
1244 
1245 	if (cpr_suspend_succeeded) {
1246 		cpr_stat_record_events();
1247 	}
1248 
1249 #if defined(__sparc)
1250 	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
1251 		cpr_clear_definfo();
1252 #endif
1253 
1254 	i_cpr_free_cpus();
1255 	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1256 	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
1257 	cpr_signal_user(SIGTHAW);
1258 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1259 
1260 	CPR_STAT_EVENT_END("Resume Total");
1261 
1262 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
1263 	CPR_STAT_EVENT_END("WHOLE CYCLE");
1264 
1265 	if (cpr_debug & CPR_DEBUG1)
1266 		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
1267 
1268 	CPR_STAT_EVENT_START("POST CPR DELAY");
1269 
1270 #ifdef CPR_STAT
1271 	ctp = &cpr_term.tm_shutdown;
1272 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
1273 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
1274 
1275 	CPR_STAT_EVENT_PRINT();
1276 #endif /* CPR_STAT */
1277 
1278 	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
1279 	return (rc);
1280 }
1281 
1282 static void
1283 cpr_suspend_init(int sleeptype)
1284 {
1285 	cpr_time_t *ctp;
1286 
1287 	cpr_stat_init();
1288 
1289 	/*
1290 	 * If cpr_suspend() failed before cpr_dump() gets a chance
1291 	 * to reinitialize the terminator of the statefile,
1292 	 * the values of the old terminator will still linger around.
1293 	 * Since the terminator contains information that we need to
1294 	 * decide whether suspend succeeded or not, we need to
1295 	 * reinitialize it as early as possible.
1296 	 */
1297 	cpr_term.real_statef_size = 0;
1298 	ctp = &cpr_term.tm_shutdown;
1299 	bzero(ctp, sizeof (*ctp));
1300 	ctp = &cpr_term.tm_cprboot_start;
1301 	bzero(ctp, sizeof (*ctp));
1302 	ctp = &cpr_term.tm_cprboot_end;
1303 	bzero(ctp, sizeof (*ctp));
1304 
1305 	if (sleeptype == CPR_TODISK) {
1306 		/*
1307 		 * Lookup the physical address of our thread structure.
1308 		 * This should never be invalid and the entire thread structure
1309 		 * is expected to reside within the same pfn.
1310 		 */
1311 		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1312 		ASSERT(curthreadpfn != PFN_INVALID);
1313 		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1314 		    (caddr_t)curthread + sizeof (kthread_t) - 1));
1315 	}
1316 
1317 	cpr_suspend_succeeded = 0;
1318 }
1319 
1320 /*
1321  * bring all the offline cpus online
1322  */
1323 static int
1324 cpr_all_online(void)
1325 {
1326 	int	rc = 0;
1327 
1328 #ifdef	__sparc
1329 	/*
1330 	 * do nothing
1331 	 */
1332 #else
1333 
1334 	cpu_t	*cp;
1335 
1336 	ASSERT(MUTEX_HELD(&cpu_lock));
1337 
1338 	cp = cpu_list;
1339 	do {
1340 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1341 		if (!CPU_ACTIVE(cp)) {
1342 			if ((rc = cpu_online(cp)) != 0)
1343 				break;
1344 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1345 		}
1346 	} while ((cp = cp->cpu_next) != cpu_list);
1347 
1348 	if (rc) {
1349 		/*
1350 		 * an online operation failed so offline the cpus
1351 		 * that were onlined above to restore the system
1352 		 * to its original state
1353 		 */
1354 		cpr_restore_offline();
1355 	}
1356 #endif
1357 	return (rc);
1358 }
1359 
1360 /*
1361  * offline all the cpus that were brought online by cpr_all_online()
1362  */
1363 static void
1364 cpr_restore_offline(void)
1365 {
1366 
1367 #ifdef	__sparc
1368 	/*
1369 	 * do nothing
1370 	 */
1371 #else
1372 
1373 	cpu_t	*cp;
1374 	int	rc = 0;
1375 
1376 	ASSERT(MUTEX_HELD(&cpu_lock));
1377 
1378 	cp = cpu_list;
1379 	do {
1380 		if (CPU_CPR_IS_ONLINE(cp)) {
1381 			rc =  cpu_offline(cp, 0);
1382 			/*
1383 			 * this offline should work, since the cpu was
1384 			 * offline originally and was successfully onlined
1385 			 * by cpr_all_online()
1386 			 */
1387 			ASSERT(rc == 0);
1388 			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1389 		}
1390 	} while ((cp = cp->cpu_next) != cpu_list);
1391 
1392 #endif
1393 
1394 }
1395