xref: /titanic_44/usr/src/uts/common/cpr/cpr_main.c (revision 5fd03bc0f2e00e7ba02316c2e08f45d52aab15db)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This module contains the guts of checkpoint-resume mechanism.
28  * All code in this module is platform independent.
29  */
30 
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/callb.h>
34 #include <sys/processor.h>
35 #include <sys/machsystm.h>
36 #include <sys/clock.h>
37 #include <sys/vfs.h>
38 #include <sys/kmem.h>
39 #include <nfs/lm.h>
40 #include <sys/systm.h>
41 #include <sys/cpr.h>
42 #include <sys/bootconf.h>
43 #include <sys/cyclic.h>
44 #include <sys/filio.h>
45 #include <sys/fs/ufs_filio.h>
46 #include <sys/epm.h>
47 #include <sys/modctl.h>
48 #include <sys/reboot.h>
49 #include <sys/kdi.h>
50 #include <sys/promif.h>
51 #include <sys/srn.h>
52 #include <sys/cpr_impl.h>
53 
54 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
55 
56 extern struct cpr_terminator cpr_term;
57 
58 extern int cpr_alloc_statefile(int);
59 extern void cpr_start_kernel_threads(void);
60 extern void cpr_abbreviate_devpath(char *, char *);
61 extern void cpr_convert_promtime(cpr_time_t *);
62 extern void cpr_send_notice(void);
63 extern void cpr_set_bitmap_size(void);
64 extern void cpr_stat_init();
65 extern void cpr_statef_close(void);
66 extern void flush_windows(void);
67 extern void (*srn_signal)(int, int);
68 extern void init_cpu_syscall(struct cpu *);
69 extern void i_cpr_pre_resume_cpus();
70 extern void i_cpr_post_resume_cpus();
71 extern int cpr_is_ufs(struct vfs *);
72 
73 extern int pm_powering_down;
74 extern kmutex_t srn_clone_lock;
75 extern int srn_inuse;
76 
77 static int cpr_suspend(int);
78 static int cpr_resume(int);
79 static void cpr_suspend_init(int);
80 #if defined(__x86)
81 static int cpr_suspend_cpus(void);
82 static void cpr_resume_cpus(void);
83 #endif
84 static int cpr_all_online(void);
85 static void cpr_restore_offline(void);
86 
87 cpr_time_t wholecycle_tv;
88 int cpr_suspend_succeeded;
89 pfn_t curthreadpfn;
90 int curthreadremapped;
91 
92 extern cpuset_t cpu_ready_set;
93 
94 extern processorid_t i_cpr_bootcpuid(void);
95 extern cpu_t *i_cpr_bootcpu(void);
96 extern void tsc_adjust_delta(hrtime_t tdelta);
97 extern void tsc_resume(void);
98 extern int tsc_resume_in_cyclic;
99 
100 /*
101  * Set this variable to 1, to have device drivers resume in an
102  * uniprocessor environment. This is to allow drivers that assume
103  * that they resume on a UP machine to continue to work. Should be
104  * deprecated once the broken drivers are fixed
105  */
106 int cpr_resume_uniproc = 0;
107 
108 /*
109  * save or restore abort_enable;  this prevents a drop
110  * to kadb or prom during cpr_resume_devices() when
111  * there is no kbd present;  see abort_sequence_enter()
112  */
113 static void
114 cpr_sae(int stash)
115 {
116 	static int saved_ae = -1;
117 
118 	if (stash) {
119 		saved_ae = abort_enable;
120 		abort_enable = 0;
121 	} else if (saved_ae != -1) {
122 		abort_enable = saved_ae;
123 		saved_ae = -1;
124 	}
125 }
126 
127 
128 /*
129  * The main switching point for cpr, this routine starts the ckpt
130  * and state file saving routines; on resume the control is
131  * returned back to here and it then calls the resume routine.
132  */
133 int
134 cpr_main(int sleeptype)
135 {
136 	int rc, rc2;
137 	label_t saveq;
138 	klwp_t *tlwp = ttolwp(curthread);
139 
140 	if (sleeptype == CPR_TODISK) {
141 		if ((rc = cpr_default_setup(1)) != 0)
142 			return (rc);
143 		ASSERT(tlwp);
144 		saveq = tlwp->lwp_qsav;
145 	}
146 
147 	if (sleeptype == CPR_TORAM) {
148 		rc = cpr_suspend(sleeptype);
149 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
150 		if (rc == 0) {
151 			int i_cpr_power_down(int sleeptype);
152 
153 			/*
154 			 * From this point on, we should be at a high
155 			 * spl, interrupts disabled, and all but one
156 			 * cpu's paused (effectively UP/single threaded).
157 			 * So this is were we want to put ASSERTS()
158 			 * to let us know otherwise.
159 			 */
160 			ASSERT(cpus_paused());
161 
162 			/*
163 			 * Now do the work of actually putting this
164 			 * machine to sleep!
165 			 */
166 			rc = i_cpr_power_down(sleeptype);
167 			if (rc == 0) {
168 				PMD(PMD_SX, ("back from successful suspend\n"))
169 			}
170 			/*
171 			 * We do care about the return value from cpr_resume
172 			 * at this point, as it will tell us if one of the
173 			 * resume functions failed (cpr_resume_devices())
174 			 * However, for this to return and _not_ panic, means
175 			 * that we must be in one of the test functions.  So
176 			 * check for that and return an appropriate message.
177 			 */
178 			rc2 = cpr_resume(sleeptype);
179 			if (rc2 != 0) {
180 				ASSERT(cpr_test_point > 0);
181 				cmn_err(CE_NOTE,
182 				    "cpr_resume returned non-zero: %d\n", rc2);
183 				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
184 			}
185 			ASSERT(!cpus_paused());
186 		} else {
187 			PMD(PMD_SX, ("failed suspend, resuming\n"))
188 			rc = cpr_resume(sleeptype);
189 		}
190 		return (rc);
191 	}
192 	/*
193 	 * Remember where we are for resume after reboot
194 	 */
195 	if (!setjmp(&tlwp->lwp_qsav)) {
196 		/*
197 		 * try to checkpoint the system, if failed return back
198 		 * to userland, otherwise power off.
199 		 */
200 		rc = cpr_suspend(sleeptype);
201 		if (rc || cpr_reusable_mode) {
202 			/*
203 			 * We don't really want to go down, or
204 			 * something went wrong in suspend, do what we can
205 			 * to put the system back to an operable state then
206 			 * return back to userland.
207 			 */
208 			PMD(PMD_SX, ("failed suspend, resuming\n"))
209 			(void) cpr_resume(sleeptype);
210 			PMD(PMD_SX, ("back from failed suspend resume\n"))
211 		}
212 	} else {
213 		/*
214 		 * This is the resumed side of longjmp, restore the previous
215 		 * longjmp pointer if there is one so this will be transparent
216 		 * to the world.
217 		 * This path is only for CPR_TODISK, where we reboot
218 		 */
219 		ASSERT(sleeptype == CPR_TODISK);
220 		tlwp->lwp_qsav = saveq;
221 		CPR->c_flags &= ~C_SUSPENDING;
222 		CPR->c_flags |= C_RESUMING;
223 
224 		/*
225 		 * resume the system back to the original state
226 		 */
227 		rc = cpr_resume(sleeptype);
228 		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
229 		    rc))
230 	}
231 
232 	(void) cpr_default_setup(0);
233 
234 	return (rc);
235 }
236 
237 
238 #if defined(__sparc)
239 
240 /*
241  * check/disable or re-enable UFS logging
242  */
243 static void
244 cpr_log_status(int enable, int *svstat, vnode_t *vp)
245 {
246 	int cmd, status, error;
247 	char *str, *able;
248 	fiolog_t fl;
249 	refstr_t *mntpt;
250 
251 	str = "cpr_log_status";
252 	bzero(&fl, sizeof (fl));
253 	fl.error = FIOLOG_ENONE;
254 
255 	/*
256 	 * when disabling, first get and save logging status (0 or 1)
257 	 */
258 	if (enable == 0) {
259 		if (error = VOP_IOCTL(vp, _FIOISLOG,
260 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
261 			mntpt = vfs_getmntpoint(vp->v_vfsp);
262 			prom_printf("%s: \"%s\", cant get logging "
263 			    "status, error %d\n", str, refstr_value(mntpt),
264 			    error);
265 			refstr_rele(mntpt);
266 			return;
267 		}
268 		*svstat = status;
269 		if (cpr_debug & CPR_DEBUG5) {
270 			mntpt = vfs_getmntpoint(vp->v_vfsp);
271 			errp("%s: \"%s\", logging status = %d\n",
272 			    str, refstr_value(mntpt), status);
273 			refstr_rele(mntpt);
274 		};
275 
276 		able = "disable";
277 		cmd = _FIOLOGDISABLE;
278 	} else {
279 		able = "enable";
280 		cmd = _FIOLOGENABLE;
281 	}
282 
283 	/*
284 	 * disable or re-enable logging when the saved status is 1
285 	 */
286 	if (*svstat == 1) {
287 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
288 		    FKIOCTL, CRED(), NULL, NULL);
289 		if (error) {
290 			mntpt = vfs_getmntpoint(vp->v_vfsp);
291 			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
292 			    str, refstr_value(mntpt), able, error);
293 			refstr_rele(mntpt);
294 		} else {
295 			if (cpr_debug & CPR_DEBUG5) {
296 				mntpt = vfs_getmntpoint(vp->v_vfsp);
297 				errp("%s: \"%s\", logging is now %sd\n",
298 				    str, refstr_value(mntpt), able);
299 				refstr_rele(mntpt);
300 			};
301 		}
302 	}
303 
304 	/*
305 	 * when enabling logging, reset the saved status
306 	 * to unknown for next time
307 	 */
308 	if (enable)
309 		*svstat = -1;
310 }
311 
312 /*
313  * enable/disable UFS logging on filesystems containing cpr_default_path
314  * and cpr statefile.  since the statefile can be on any fs, that fs
315  * needs to be handled separately.  this routine and cprboot expect that
316  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
317  * is loaded from the device with rootfs and uses the same device to open
318  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
319  * file outside of rootfs would cause errors during cprboot, plus cpr and
320  * fsck problems with the new fs if logging were enabled.
321  */
322 
323 static int
324 cpr_ufs_logging(int enable)
325 {
326 	static int def_status = -1, sf_status = -1;
327 	struct vfs *vfsp;
328 	char *fname;
329 	vnode_t *vp;
330 	int error;
331 
332 	if (cpr_reusable_mode)
333 		return (0);
334 
335 	if (error = cpr_open_deffile(FREAD, &vp))
336 		return (error);
337 	vfsp = vp->v_vfsp;
338 	if (!cpr_is_ufs(vfsp)) {
339 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
340 		VN_RELE(vp);
341 		return (0);
342 	}
343 
344 	cpr_log_status(enable, &def_status, vp);
345 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
346 	VN_RELE(vp);
347 
348 	fname = cpr_build_statefile_path();
349 	if (fname == NULL)
350 		return (ENOENT);
351 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
352 	    0600, &vp, CRCREAT, 0)) {
353 		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
354 		    "error %d\n", fname, error);
355 		return (error);
356 	}
357 
358 	/*
359 	 * check logging status for the statefile if it resides
360 	 * on a different fs and the type is a regular file
361 	 */
362 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
363 		cpr_log_status(enable, &sf_status, vp);
364 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
365 	VN_RELE(vp);
366 
367 	return (0);
368 }
369 #endif
370 
371 
372 /*
373  * Check if klmmod is loaded and call a lock manager service; if klmmod
374  * is not loaded, the services aren't needed and a call would trigger a
375  * modload, which would block since another thread would never run.
376  */
377 static void
378 cpr_lock_mgr(void (*service)(void))
379 {
380 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
381 		(*service)();
382 }
383 
384 int
385 cpr_suspend_cpus(void)
386 {
387 	int	ret = 0;
388 	extern void *i_cpr_save_context(void *arg);
389 
390 	mutex_enter(&cpu_lock);
391 
392 	/*
393 	 * the machine could not have booted without a bootcpu
394 	 */
395 	ASSERT(i_cpr_bootcpu() != NULL);
396 
397 	/*
398 	 * bring all the offline cpus online
399 	 */
400 	if ((ret = cpr_all_online())) {
401 		mutex_exit(&cpu_lock);
402 		return (ret);
403 	}
404 
405 	/*
406 	 * Set the affinity to be the boot processor
407 	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
408 	 */
409 	affinity_set(i_cpr_bootcpuid());
410 
411 	ASSERT(CPU->cpu_id == 0);
412 
413 	PMD(PMD_SX, ("curthread running on bootcpu\n"))
414 
415 	/*
416 	 * pause all other running CPUs and save the CPU state at the sametime
417 	 */
418 	pause_cpus(NULL, i_cpr_save_context);
419 
420 	mutex_exit(&cpu_lock);
421 
422 	return (0);
423 }
424 
425 /*
426  * Take the system down to a checkpointable state and write
427  * the state file, the following are sequentially executed:
428  *
429  *    - Request all user threads to stop themselves
430  *    - push out and invalidate user pages
431  *    - bring statefile inode incore to prevent a miss later
432  *    - request all daemons to stop
433  *    - check and make sure all threads are stopped
434  *    - sync the file system
435  *    - suspend all devices
436  *    - block intrpts
437  *    - dump system state and memory to state file
438  *    - SPARC code will not be called with CPR_TORAM, caller filters
439  */
440 static int
441 cpr_suspend(int sleeptype)
442 {
443 #if defined(__sparc)
444 	int sf_realloc, nverr;
445 #endif
446 	int	rc = 0;
447 	int	skt_rc = 0;
448 
449 	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
450 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
451 
452 	cpr_suspend_init(sleeptype);
453 
454 	cpr_save_time();
455 
456 	cpr_tod_get(&wholecycle_tv);
457 	CPR_STAT_EVENT_START("Suspend Total");
458 
459 	i_cpr_alloc_cpus();
460 
461 #if defined(__sparc)
462 	ASSERT(sleeptype == CPR_TODISK);
463 	if (!cpr_reusable_mode) {
464 		/*
465 		 * We need to validate default file before fs
466 		 * functionality is disabled.
467 		 */
468 		if (rc = cpr_validate_definfo(0))
469 			return (rc);
470 	}
471 	i_cpr_save_machdep_info();
472 #endif
473 
474 	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
475 	/* Stop PM scans ASAP */
476 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
477 
478 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
479 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
480 
481 #if defined(__sparc)
482 	ASSERT(sleeptype == CPR_TODISK);
483 	cpr_set_substate(C_ST_MP_OFFLINE);
484 	if (rc = cpr_mp_offline())
485 		return (rc);
486 #endif
487 	/*
488 	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
489 	 */
490 	mutex_enter(&srn_clone_lock);
491 	if (srn_signal) {
492 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
493 		    "SRN_SUSPEND_REQ)\n"))
494 		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
495 		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
496 		srn_inuse = 0;
497 	} else {
498 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
499 	}
500 	mutex_exit(&srn_clone_lock);
501 
502 	/*
503 	 * Ask the user threads to stop by themselves, but
504 	 * if they don't or can't after 3 retries, we give up on CPR.
505 	 * The 3 retry is not a random number because 2 is possible if
506 	 * a thread has been forked before the parent thread is stopped.
507 	 */
508 	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
509 	CPR_STAT_EVENT_START("  stop users");
510 	cpr_set_substate(C_ST_STOP_USER_THREADS);
511 	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
512 	if (rc = cpr_stop_user_threads())
513 		return (rc);
514 	CPR_STAT_EVENT_END("  stop users");
515 	CPR_DEBUG(CPR_DEBUG1, "done\n");
516 
517 	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
518 	pm_save_direct_levels();
519 
520 	/*
521 	 * User threads are stopped.  We will start communicating with the
522 	 * user via prom_printf (some debug output may have already happened)
523 	 * so let anybody who cares know about this (bug 4096122)
524 	 */
525 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
526 
527 	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
528 #ifndef DEBUG
529 	cpr_send_notice();
530 	if (cpr_debug)
531 		prom_printf("\n");
532 #endif
533 
534 	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
535 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
536 
537 	/*
538 	 * Reattach any drivers which originally exported the
539 	 * no-involuntary-power-cycles property.  We need to do this before
540 	 * stopping kernel threads because modload is implemented using
541 	 * a kernel thread.
542 	 */
543 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
544 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
545 	if (!pm_reattach_noinvol())
546 		return (ENXIO);
547 
548 #if defined(__sparc)
549 	ASSERT(sleeptype == CPR_TODISK);
550 	/*
551 	 * if ufs logging is enabled, we need to disable before
552 	 * stopping kernel threads so that ufs delete and roll
553 	 * threads can do the work.
554 	 */
555 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
556 	if (rc = cpr_ufs_logging(0))
557 		return (rc);
558 
559 	/*
560 	 * Use sync_all to swap out all user pages and find out how much
561 	 * extra space needed for user pages that don't have back store
562 	 * space left.
563 	 */
564 	CPR_STAT_EVENT_START("  swapout upages");
565 	vfs_sync(SYNC_ALL);
566 	CPR_STAT_EVENT_END("  swapout upages");
567 
568 	cpr_set_bitmap_size();
569 
570 alloc_statefile:
571 	/*
572 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
573 	 * realloc the statefile, otherwise this is the first attempt.
574 	 */
575 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
576 
577 	CPR_STAT_EVENT_START("  alloc statefile");
578 	cpr_set_substate(C_ST_STATEF_ALLOC);
579 	if (rc = cpr_alloc_statefile(sf_realloc)) {
580 		if (sf_realloc)
581 			errp("realloc failed\n");
582 		return (rc);
583 	}
584 	CPR_STAT_EVENT_END("  alloc statefile");
585 
586 	/*
587 	 * Sync the filesystem to preserve its integrity.
588 	 *
589 	 * This sync is also used to flush out all B_DELWRI buffers
590 	 * (fs cache) which are mapped and neither dirty nor referenced
591 	 * before cpr_invalidate_pages destroys them.
592 	 * fsflush does similar thing.
593 	 */
594 	sync();
595 
596 	/*
597 	 * destroy all clean file mapped kernel pages
598 	 */
599 	CPR_STAT_EVENT_START("  clean pages");
600 	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
601 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
602 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
603 	CPR_STAT_EVENT_END("  clean pages");
604 #endif
605 
606 
607 	/*
608 	 * Hooks needed by lock manager prior to suspending.
609 	 * Refer to code for more comments.
610 	 */
611 	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
612 	cpr_lock_mgr(lm_cprsuspend);
613 
614 	/*
615 	 * Now suspend all the devices
616 	 */
617 	CPR_STAT_EVENT_START("  stop drivers");
618 	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
619 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
620 	pm_powering_down = 1;
621 	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
622 	rc = cpr_suspend_devices(ddi_root_node());
623 	pm_powering_down = 0;
624 	if (rc)
625 		return (rc);
626 	CPR_DEBUG(CPR_DEBUG1, "done\n");
627 	CPR_STAT_EVENT_END("  stop drivers");
628 
629 	/*
630 	 * Stop all daemon activities
631 	 */
632 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
633 	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
634 	if (skt_rc = cpr_stop_kernel_threads())
635 		return (skt_rc);
636 
637 	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
638 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
639 
640 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
641 	pm_reattach_noinvol_fini();
642 
643 	cpr_sae(1);
644 
645 	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
646 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
647 
648 	if (sleeptype == CPR_TODISK) {
649 		/*
650 		 * It's safer to do tod_get before we disable all intr.
651 		 */
652 		CPR_STAT_EVENT_START("  write statefile");
653 	}
654 
655 	/*
656 	 * it's time to ignore the outside world, stop the real time
657 	 * clock and disable any further intrpt activity.
658 	 */
659 	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
660 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
661 
662 	mutex_enter(&cpu_lock);
663 	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
664 	cyclic_suspend();
665 	mutex_exit(&cpu_lock);
666 
667 	/*
668 	 * Due to the different methods of resuming the system between
669 	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
670 	 * and CPR_TORAM (restart via reset into existing kernel image)
671 	 * cpus are not suspended and restored in the SPARC case, since it
672 	 * is necessary to restart the cpus and pause them before restoring
673 	 * the OBP image
674 	 */
675 
676 #if defined(__x86)
677 
678 	/* pause aux cpus */
679 	PMD(PMD_SX, ("pause aux cpus\n"))
680 
681 	cpr_set_substate(C_ST_MP_PAUSED);
682 
683 	if ((rc = cpr_suspend_cpus()) != 0)
684 		return (rc);
685 #endif
686 
687 	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
688 	i_cpr_stop_intr();
689 	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
690 
691 	/*
692 	 * Since we will now disable the mechanism that causes prom_printfs
693 	 * to power up (if needed) the console fb/monitor, we assert that
694 	 * it must be up now.
695 	 */
696 	ASSERT(pm_cfb_is_up());
697 	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
698 	prom_suspend_prepost();
699 
700 #if defined(__sparc)
701 	/*
702 	 * getting ready to write ourself out, flush the register
703 	 * windows to make sure that our stack is good when we
704 	 * come back on the resume side.
705 	 */
706 	flush_windows();
707 #endif
708 
709 	/*
710 	 * For S3, we're done
711 	 */
712 	if (sleeptype == CPR_TORAM) {
713 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
714 		cpr_set_substate(C_ST_NODUMP);
715 		return (rc);
716 	}
717 #if defined(__sparc)
718 	/*
719 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
720 	 *
721 	 * The system is quiesced at this point, we are ready to either dump
722 	 * to the state file for a extended sleep or a simple shutdown for
723 	 * systems with non-volatile memory.
724 	 */
725 
726 	/*
727 	 * special handling for reusable:
728 	 */
729 	if (cpr_reusable_mode) {
730 		cpr_set_substate(C_ST_SETPROPS_1);
731 		if (nverr = cpr_set_properties(1))
732 			return (nverr);
733 	}
734 
735 	cpr_set_substate(C_ST_DUMP);
736 	rc = cpr_dump(C_VP);
737 
738 	/*
739 	 * if any error occurred during dump, more
740 	 * special handling for reusable:
741 	 */
742 	if (rc && cpr_reusable_mode) {
743 		cpr_set_substate(C_ST_SETPROPS_0);
744 		if (nverr = cpr_set_properties(0))
745 			return (nverr);
746 	}
747 
748 	if (rc == ENOSPC) {
749 		cpr_set_substate(C_ST_DUMP_NOSPC);
750 		(void) cpr_resume(sleeptype);
751 		goto alloc_statefile;
752 	} else if (rc == 0) {
753 		if (cpr_reusable_mode) {
754 			cpr_set_substate(C_ST_REUSABLE);
755 			longjmp(&ttolwp(curthread)->lwp_qsav);
756 		} else
757 			rc = cpr_set_properties(1);
758 	}
759 #endif
760 	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
761 	return (rc);
762 }
763 
764 void
765 cpr_resume_cpus(void)
766 {
767 	/*
768 	 * this is a cut down version of start_other_cpus()
769 	 * just do the initialization to wake the other cpus
770 	 */
771 
772 #if defined(__x86)
773 	/*
774 	 * Initialize our syscall handlers
775 	 */
776 	init_cpu_syscall(CPU);
777 
778 #endif
779 
780 	i_cpr_pre_resume_cpus();
781 
782 	/*
783 	 * Restart the paused cpus
784 	 */
785 	mutex_enter(&cpu_lock);
786 	start_cpus();
787 	mutex_exit(&cpu_lock);
788 
789 	i_cpr_post_resume_cpus();
790 
791 	mutex_enter(&cpu_lock);
792 	/*
793 	 * clear the affinity set in cpr_suspend_cpus()
794 	 */
795 	affinity_clear();
796 
797 	/*
798 	 * offline all the cpus that were brought online during suspend
799 	 */
800 	cpr_restore_offline();
801 
802 	mutex_exit(&cpu_lock);
803 }
804 
805 void
806 cpr_unpause_cpus(void)
807 {
808 	/*
809 	 * Now restore the system back to what it was before we suspended
810 	 */
811 
812 	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
813 
814 	mutex_enter(&cpu_lock);
815 	/*
816 	 * Restart the paused cpus
817 	 */
818 	start_cpus();
819 
820 	/*
821 	 * clear the affinity set in cpr_suspend_cpus()
822 	 */
823 	affinity_clear();
824 
825 	/*
826 	 * offline all the cpus that were brought online during suspend
827 	 */
828 	cpr_restore_offline();
829 
830 	mutex_exit(&cpu_lock);
831 }
832 
833 /*
834  * Bring the system back up from a checkpoint, at this point
835  * the VM has been minimally restored by boot, the following
836  * are executed sequentially:
837  *
838  *    - machdep setup and enable interrupts (mp startup if it's mp)
839  *    - resume all devices
840  *    - restart daemons
841  *    - put all threads back on run queue
842  */
843 static int
844 cpr_resume(int sleeptype)
845 {
846 	cpr_time_t pwron_tv, *ctp;
847 	char *str;
848 	int rc = 0;
849 
850 	/*
851 	 * The following switch is used to resume the system
852 	 * that was suspended to a different level.
853 	 */
854 	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
855 	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
856 
857 	/*
858 	 * Note:
859 	 *
860 	 * The rollback labels rb_xyz do not represent the cpr resume
861 	 * state when event 'xyz' has happened. Instead they represent
862 	 * the state during cpr suspend when event 'xyz' was being
863 	 * entered (and where cpr suspend failed). The actual call that
864 	 * failed may also need to be partially rolled back, since they
865 	 * aren't atomic in most cases.  In other words, rb_xyz means
866 	 * "roll back all cpr suspend events that happened before 'xyz',
867 	 * and the one that caused the failure, if necessary."
868 	 */
869 	switch (CPR->c_substate) {
870 #if defined(__sparc)
871 	case C_ST_DUMP:
872 		/*
873 		 * This is most likely a full-fledged cpr_resume after
874 		 * a complete and successful cpr suspend. Just roll back
875 		 * everything.
876 		 */
877 		ASSERT(sleeptype == CPR_TODISK);
878 		break;
879 
880 	case C_ST_REUSABLE:
881 	case C_ST_DUMP_NOSPC:
882 	case C_ST_SETPROPS_0:
883 	case C_ST_SETPROPS_1:
884 		/*
885 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
886 		 * special switch cases here. The other two do not have
887 		 * any state change during cpr_suspend() that needs to
888 		 * be rolled back. But these are exit points from
889 		 * cpr_suspend, so theoretically (or in the future), it
890 		 * is possible that a need for roll back of a state
891 		 * change arises between these exit points.
892 		 */
893 		ASSERT(sleeptype == CPR_TODISK);
894 		goto rb_dump;
895 #endif
896 
897 	case C_ST_NODUMP:
898 		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
899 		goto rb_nodump;
900 
901 	case C_ST_STOP_KERNEL_THREADS:
902 		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
903 		goto rb_stop_kernel_threads;
904 
905 	case C_ST_SUSPEND_DEVICES:
906 		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
907 		goto rb_suspend_devices;
908 
909 #if defined(__sparc)
910 	case C_ST_STATEF_ALLOC:
911 		ASSERT(sleeptype == CPR_TODISK);
912 		goto rb_statef_alloc;
913 
914 	case C_ST_DISABLE_UFS_LOGGING:
915 		ASSERT(sleeptype == CPR_TODISK);
916 		goto rb_disable_ufs_logging;
917 #endif
918 
919 	case C_ST_PM_REATTACH_NOINVOL:
920 		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
921 		goto rb_pm_reattach_noinvol;
922 
923 	case C_ST_STOP_USER_THREADS:
924 		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
925 		goto rb_stop_user_threads;
926 
927 #if defined(__sparc)
928 	case C_ST_MP_OFFLINE:
929 		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
930 		goto rb_mp_offline;
931 #endif
932 
933 #if defined(__x86)
934 	case C_ST_MP_PAUSED:
935 		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
936 		goto rb_mp_paused;
937 #endif
938 
939 
940 	default:
941 		PMD(PMD_SX, ("cpr_resume: others\n"))
942 		goto rb_others;
943 	}
944 
945 rb_all:
946 	/*
947 	 * perform platform-dependent initialization
948 	 */
949 	if (cpr_suspend_succeeded)
950 		i_cpr_machdep_setup();
951 
952 	/*
953 	 * system did not really go down if we jump here
954 	 */
955 rb_dump:
956 	/*
957 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
958 	 *
959 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
960 	 */
961 rb_nodump:
962 	/*
963 	 * If we did suspend to RAM, we didn't generate a dump
964 	 */
965 	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
966 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
967 	if (cpr_suspend_succeeded) {
968 		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
969 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
970 	}
971 
972 	prom_resume_prepost();
973 #if !defined(__sparc)
974 	/*
975 	 * Need to sync the software clock with the hardware clock.
976 	 * On Sparc, this occurs in the sparc-specific cbe.  However
977 	 * on x86 this needs to be handled _before_ we bring other cpu's
978 	 * back online.  So we call a resume function in timestamp.c
979 	 */
980 	if (tsc_resume_in_cyclic == 0)
981 		tsc_resume();
982 
983 #endif
984 
985 #if defined(__sparc)
986 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
987 		kdi_dvec_cpr_restart();
988 #endif
989 
990 
991 #if defined(__x86)
992 rb_mp_paused:
993 	PT(PT_RMPO);
994 	PMD(PMD_SX, ("resume aux cpus\n"))
995 
996 	if (cpr_suspend_succeeded) {
997 		cpr_resume_cpus();
998 	} else {
999 		cpr_unpause_cpus();
1000 	}
1001 #endif
1002 
1003 	/*
1004 	 * let the tmp callout catch up.
1005 	 */
1006 	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
1007 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
1008 
1009 	i_cpr_enable_intr();
1010 
1011 	mutex_enter(&cpu_lock);
1012 	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
1013 	cyclic_resume();
1014 	mutex_exit(&cpu_lock);
1015 
1016 	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
1017 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
1018 
1019 	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
1020 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
1021 
1022 	/*
1023 	 * statistics gathering
1024 	 */
1025 	if (cpr_suspend_succeeded) {
1026 		/*
1027 		 * Prevent false alarm in tod_validate() due to tod
1028 		 * value change between suspend and resume
1029 		 */
1030 		cpr_tod_status_set(TOD_CPR_RESUME_DONE);
1031 
1032 		cpr_convert_promtime(&pwron_tv);
1033 
1034 		ctp = &cpr_term.tm_shutdown;
1035 		if (sleeptype == CPR_TODISK)
1036 			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
1037 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
1038 
1039 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
1040 
1041 		str = "  prom time";
1042 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
1043 		ctp = &cpr_term.tm_cprboot_start;
1044 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1045 
1046 		str = "  read statefile";
1047 		CPR_STAT_EVENT_START_TMZ(str, ctp);
1048 		ctp = &cpr_term.tm_cprboot_end;
1049 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1050 	}
1051 
1052 rb_stop_kernel_threads:
1053 	/*
1054 	 * Put all threads back to where they belong; get the kernel
1055 	 * daemons straightened up too. Note that the callback table
1056 	 * locked during cpr_stop_kernel_threads() is released only
1057 	 * in cpr_start_kernel_threads(). Ensure modunloading is
1058 	 * disabled before starting kernel threads, we don't want
1059 	 * modunload thread to start changing device tree underneath.
1060 	 */
1061 	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
1062 	modunload_disable();
1063 	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
1064 	cpr_start_kernel_threads();
1065 
1066 rb_suspend_devices:
1067 	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
1068 	CPR_STAT_EVENT_START("  start drivers");
1069 
1070 	PMD(PMD_SX,
1071 	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1072 	    cpr_resume_uniproc))
1073 
1074 #if defined(__x86)
1075 	/*
1076 	 * If cpr_resume_uniproc is set, then pause all the other cpus
1077 	 * apart from the current cpu, so that broken drivers that think
1078 	 * that they are on a uniprocessor machine will resume
1079 	 */
1080 	if (cpr_resume_uniproc) {
1081 		mutex_enter(&cpu_lock);
1082 		pause_cpus(NULL, NULL);
1083 		mutex_exit(&cpu_lock);
1084 	}
1085 #endif
1086 
1087 	/*
1088 	 * The policy here is to continue resume everything we can if we did
1089 	 * not successfully finish suspend; and panic if we are coming back
1090 	 * from a fully suspended system.
1091 	 */
1092 	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
1093 	rc = cpr_resume_devices(ddi_root_node(), 0);
1094 
1095 	cpr_sae(0);
1096 
1097 	str = "Failed to resume one or more devices.";
1098 
1099 	if (rc) {
1100 		if (CPR->c_substate == C_ST_DUMP ||
1101 		    (sleeptype == CPR_TORAM &&
1102 		    CPR->c_substate == C_ST_NODUMP)) {
1103 			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1104 				PMD(PMD_SX, ("cpr_resume: resume device "
1105 				    "warn\n"))
1106 				cpr_err(CE_WARN, str);
1107 			} else {
1108 				PMD(PMD_SX, ("cpr_resume: resume device "
1109 				    "panic\n"))
1110 				cpr_err(CE_PANIC, str);
1111 			}
1112 		} else {
1113 			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1114 			cpr_err(CE_WARN, str);
1115 		}
1116 	}
1117 
1118 	CPR_STAT_EVENT_END("  start drivers");
1119 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1120 
1121 #if defined(__x86)
1122 	/*
1123 	 * If cpr_resume_uniproc is set, then unpause all the processors
1124 	 * that were paused before resuming the drivers
1125 	 */
1126 	if (cpr_resume_uniproc) {
1127 		mutex_enter(&cpu_lock);
1128 		start_cpus();
1129 		mutex_exit(&cpu_lock);
1130 	}
1131 #endif
1132 
1133 	/*
1134 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
1135 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
1136 	 * modunloading now.
1137 	 */
1138 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1139 		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
1140 		modunload_enable();
1141 	}
1142 
1143 	/*
1144 	 * Hooks needed by lock manager prior to resuming.
1145 	 * Refer to code for more comments.
1146 	 */
1147 	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
1148 	cpr_lock_mgr(lm_cprresume);
1149 
1150 #if defined(__sparc)
1151 	/*
1152 	 * This is a partial (half) resume during cpr suspend, we
1153 	 * haven't yet given up on the suspend. On return from here,
1154 	 * cpr_suspend() will try to reallocate and retry the suspend.
1155 	 */
1156 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
1157 		return (0);
1158 	}
1159 
1160 	if (sleeptype == CPR_TODISK) {
1161 rb_statef_alloc:
1162 		cpr_statef_close();
1163 
1164 rb_disable_ufs_logging:
1165 		/*
1166 		 * if ufs logging was disabled, re-enable
1167 		 */
1168 		(void) cpr_ufs_logging(1);
1169 	}
1170 #endif
1171 
1172 rb_pm_reattach_noinvol:
1173 	/*
1174 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
1175 	 * remain disabled until after cpr suspend passes the
1176 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
1177 	 * cpr suspend reaches this state, we'll need to enable modunload
1178 	 * thread during rollback.
1179 	 */
1180 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
1181 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
1182 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
1183 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1184 		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
1185 		pm_reattach_noinvol_fini();
1186 	}
1187 
1188 	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
1189 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1190 	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
1191 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
1192 
1193 	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
1194 	pm_restore_direct_levels();
1195 
1196 rb_stop_user_threads:
1197 	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1198 	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
1199 	cpr_start_user_threads();
1200 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1201 	/*
1202 	 * Ask Xorg to resume the frame buffer, and wait for it to happen
1203 	 */
1204 	mutex_enter(&srn_clone_lock);
1205 	if (srn_signal) {
1206 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1207 		    "SRN_NORMAL_RESUME)\n"))
1208 		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
1209 		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1210 		srn_inuse = 0;
1211 	} else {
1212 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1213 	}
1214 	mutex_exit(&srn_clone_lock);
1215 
1216 #if defined(__sparc)
1217 rb_mp_offline:
1218 	if (cpr_mp_online())
1219 		cpr_err(CE_WARN, "Failed to online all the processors.");
1220 #endif
1221 
1222 rb_others:
1223 	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1224 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1225 	    PM_DEP_WAIT, NULL, 0);
1226 
1227 	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
1228 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
1229 
1230 	if (cpr_suspend_succeeded) {
1231 		cpr_stat_record_events();
1232 	}
1233 
1234 #if defined(__sparc)
1235 	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
1236 		cpr_clear_definfo();
1237 #endif
1238 
1239 	i_cpr_free_cpus();
1240 	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1241 	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
1242 	cpr_signal_user(SIGTHAW);
1243 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1244 
1245 	CPR_STAT_EVENT_END("Resume Total");
1246 
1247 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
1248 	CPR_STAT_EVENT_END("WHOLE CYCLE");
1249 
1250 	if (cpr_debug & CPR_DEBUG1)
1251 		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
1252 
1253 	CPR_STAT_EVENT_START("POST CPR DELAY");
1254 
1255 #ifdef CPR_STAT
1256 	ctp = &cpr_term.tm_shutdown;
1257 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
1258 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
1259 
1260 	CPR_STAT_EVENT_PRINT();
1261 #endif /* CPR_STAT */
1262 
1263 	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
1264 	return (rc);
1265 }
1266 
1267 static void
1268 cpr_suspend_init(int sleeptype)
1269 {
1270 	cpr_time_t *ctp;
1271 
1272 	cpr_stat_init();
1273 
1274 	/*
1275 	 * If cpr_suspend() failed before cpr_dump() gets a chance
1276 	 * to reinitialize the terminator of the statefile,
1277 	 * the values of the old terminator will still linger around.
1278 	 * Since the terminator contains information that we need to
1279 	 * decide whether suspend succeeded or not, we need to
1280 	 * reinitialize it as early as possible.
1281 	 */
1282 	cpr_term.real_statef_size = 0;
1283 	ctp = &cpr_term.tm_shutdown;
1284 	bzero(ctp, sizeof (*ctp));
1285 	ctp = &cpr_term.tm_cprboot_start;
1286 	bzero(ctp, sizeof (*ctp));
1287 	ctp = &cpr_term.tm_cprboot_end;
1288 	bzero(ctp, sizeof (*ctp));
1289 
1290 	if (sleeptype == CPR_TODISK) {
1291 		/*
1292 		 * Lookup the physical address of our thread structure.
1293 		 * This should never be invalid and the entire thread structure
1294 		 * is expected to reside within the same pfn.
1295 		 */
1296 		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1297 		ASSERT(curthreadpfn != PFN_INVALID);
1298 		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1299 		    (caddr_t)curthread + sizeof (kthread_t) - 1));
1300 	}
1301 
1302 	cpr_suspend_succeeded = 0;
1303 }
1304 
1305 /*
1306  * bring all the offline cpus online
1307  */
1308 static int
1309 cpr_all_online(void)
1310 {
1311 	int	rc = 0;
1312 
1313 #ifdef	__sparc
1314 	/*
1315 	 * do nothing
1316 	 */
1317 #else
1318 
1319 	cpu_t	*cp;
1320 
1321 	ASSERT(MUTEX_HELD(&cpu_lock));
1322 
1323 	cp = cpu_list;
1324 	do {
1325 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1326 		if (!CPU_ACTIVE(cp)) {
1327 			if ((rc = cpu_online(cp)) != 0)
1328 				break;
1329 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1330 		}
1331 	} while ((cp = cp->cpu_next) != cpu_list);
1332 
1333 	if (rc) {
1334 		/*
1335 		 * an online operation failed so offline the cpus
1336 		 * that were onlined above to restore the system
1337 		 * to its original state
1338 		 */
1339 		cpr_restore_offline();
1340 	}
1341 #endif
1342 	return (rc);
1343 }
1344 
1345 /*
1346  * offline all the cpus that were brought online by cpr_all_online()
1347  */
1348 static void
1349 cpr_restore_offline(void)
1350 {
1351 
1352 #ifdef	__sparc
1353 	/*
1354 	 * do nothing
1355 	 */
1356 #else
1357 
1358 	cpu_t	*cp;
1359 	int	rc = 0;
1360 
1361 	ASSERT(MUTEX_HELD(&cpu_lock));
1362 
1363 	cp = cpu_list;
1364 	do {
1365 		if (CPU_CPR_IS_ONLINE(cp)) {
1366 			rc =  cpu_offline(cp, 0);
1367 			/*
1368 			 * this offline should work, since the cpu was
1369 			 * offline originally and was successfully onlined
1370 			 * by cpr_all_online()
1371 			 */
1372 			ASSERT(rc == 0);
1373 			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1374 		}
1375 	} while ((cp = cp->cpu_next) != cpu_list);
1376 
1377 #endif
1378 
1379 }
1380