xref: /illumos-gate/usr/src/uts/common/cpr/cpr_main.c (revision 7d2d870ed78c1c0b10f15787cf9a400bb0b28fda)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23   * Use is subject to license terms.
24   */
25  
26  /*
27   * This module contains the guts of checkpoint-resume mechanism.
28   * All code in this module is platform independent.
29   */
30  
31  #include <sys/types.h>
32  #include <sys/errno.h>
33  #include <sys/callb.h>
34  #include <sys/processor.h>
35  #include <sys/machsystm.h>
36  #include <sys/clock.h>
37  #include <sys/vfs.h>
38  #include <sys/kmem.h>
39  #include <nfs/lm.h>
40  #include <sys/systm.h>
41  #include <sys/cpr.h>
42  #include <sys/bootconf.h>
43  #include <sys/cyclic.h>
44  #include <sys/filio.h>
45  #include <sys/fs/ufs_filio.h>
46  #include <sys/epm.h>
47  #include <sys/modctl.h>
48  #include <sys/reboot.h>
49  #include <sys/kdi.h>
50  #include <sys/promif.h>
51  #include <sys/srn.h>
52  #include <sys/cpr_impl.h>
53  
54  #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
55  
56  extern struct cpr_terminator cpr_term;
57  
58  extern int cpr_alloc_statefile(int);
59  extern void cpr_start_kernel_threads(void);
60  extern void cpr_abbreviate_devpath(char *, char *);
61  extern void cpr_convert_promtime(cpr_time_t *);
62  extern void cpr_send_notice(void);
63  extern void cpr_set_bitmap_size(void);
64  extern void cpr_stat_init();
65  extern void cpr_statef_close(void);
66  extern void flush_windows(void);
67  extern void (*srn_signal)(int, int);
68  extern void init_cpu_syscall(struct cpu *);
69  extern void i_cpr_pre_resume_cpus();
70  extern void i_cpr_post_resume_cpus();
71  extern int cpr_is_ufs(struct vfs *);
72  
73  extern int pm_powering_down;
74  extern kmutex_t srn_clone_lock;
75  extern int srn_inuse;
76  
77  static int cpr_suspend(int);
78  static int cpr_resume(int);
79  static void cpr_suspend_init(int);
80  #if defined(__x86)
81  static int cpr_suspend_cpus(void);
82  static void cpr_resume_cpus(void);
83  #endif
84  static int cpr_all_online(void);
85  static void cpr_restore_offline(void);
86  
87  cpr_time_t wholecycle_tv;
88  int cpr_suspend_succeeded;
89  pfn_t curthreadpfn;
90  int curthreadremapped;
91  
92  extern cpuset_t cpu_ready_set;
93  extern void *(*cpu_pause_func)(void *);
94  
95  extern processorid_t i_cpr_bootcpuid(void);
96  extern cpu_t *i_cpr_bootcpu(void);
97  extern void tsc_adjust_delta(hrtime_t tdelta);
98  extern void tsc_resume(void);
99  extern int tsc_resume_in_cyclic;
100  
101  /*
102   * Set this variable to 1, to have device drivers resume in an
103   * uniprocessor environment. This is to allow drivers that assume
104   * that they resume on a UP machine to continue to work. Should be
105   * deprecated once the broken drivers are fixed
106   */
107  int cpr_resume_uniproc = 0;
108  
109  /*
110   * save or restore abort_enable;  this prevents a drop
111   * to kadb or prom during cpr_resume_devices() when
112   * there is no kbd present;  see abort_sequence_enter()
113   */
114  static void
115  cpr_sae(int stash)
116  {
117  	static int saved_ae = -1;
118  
119  	if (stash) {
120  		saved_ae = abort_enable;
121  		abort_enable = 0;
122  	} else if (saved_ae != -1) {
123  		abort_enable = saved_ae;
124  		saved_ae = -1;
125  	}
126  }
127  
128  
129  /*
130   * The main switching point for cpr, this routine starts the ckpt
131   * and state file saving routines; on resume the control is
132   * returned back to here and it then calls the resume routine.
133   */
134  int
135  cpr_main(int sleeptype)
136  {
137  	int rc, rc2;
138  	label_t saveq;
139  	klwp_t *tlwp = ttolwp(curthread);
140  
141  	if (sleeptype == CPR_TODISK) {
142  		if ((rc = cpr_default_setup(1)) != 0)
143  			return (rc);
144  		ASSERT(tlwp);
145  		saveq = tlwp->lwp_qsav;
146  	}
147  
148  	if (sleeptype == CPR_TORAM) {
149  		rc = cpr_suspend(sleeptype);
150  		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
151  		if (rc == 0) {
152  			int i_cpr_power_down(int sleeptype);
153  
154  			/*
155  			 * From this point on, we should be at a high
156  			 * spl, interrupts disabled, and all but one
157  			 * cpu's paused (effectively UP/single threaded).
158  			 * So this is were we want to put ASSERTS()
159  			 * to let us know otherwise.
160  			 */
161  			ASSERT(cpus_paused());
162  
163  			/*
164  			 * Now do the work of actually putting this
165  			 * machine to sleep!
166  			 */
167  			rc = i_cpr_power_down(sleeptype);
168  			if (rc == 0) {
169  				PMD(PMD_SX, ("back from succssful suspend\n"))
170  			}
171  			/*
172  			 * We do care about the return value from cpr_resume
173  			 * at this point, as it will tell us if one of the
174  			 * resume functions failed (cpr_resume_devices())
175  			 * However, for this to return and _not_ panic, means
176  			 * that we must be in one of the test functions.  So
177  			 * check for that and return an appropriate message.
178  			 */
179  			rc2 = cpr_resume(sleeptype);
180  			if (rc2 != 0) {
181  				ASSERT(cpr_test_point > 0);
182  				cmn_err(CE_NOTE,
183  				    "cpr_resume returned non-zero: %d\n", rc2);
184  				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
185  			}
186  			ASSERT(!cpus_paused());
187  		} else {
188  			PMD(PMD_SX, ("failed suspend, resuming\n"))
189  			rc = cpr_resume(sleeptype);
190  		}
191  		return (rc);
192  	}
193  	/*
194  	 * Remember where we are for resume after reboot
195  	 */
196  	if (!setjmp(&tlwp->lwp_qsav)) {
197  		/*
198  		 * try to checkpoint the system, if failed return back
199  		 * to userland, otherwise power off.
200  		 */
201  		rc = cpr_suspend(sleeptype);
202  		if (rc || cpr_reusable_mode) {
203  			/*
204  			 * We don't really want to go down, or
205  			 * something went wrong in suspend, do what we can
206  			 * to put the system back to an operable state then
207  			 * return back to userland.
208  			 */
209  			PMD(PMD_SX, ("failed suspend, resuming\n"))
210  			(void) cpr_resume(sleeptype);
211  			PMD(PMD_SX, ("back from failed suspend resume\n"))
212  		}
213  	} else {
214  		/*
215  		 * This is the resumed side of longjmp, restore the previous
216  		 * longjmp pointer if there is one so this will be transparent
217  		 * to the world.
218  		 * This path is only for CPR_TODISK, where we reboot
219  		 */
220  		ASSERT(sleeptype == CPR_TODISK);
221  		tlwp->lwp_qsav = saveq;
222  		CPR->c_flags &= ~C_SUSPENDING;
223  		CPR->c_flags |= C_RESUMING;
224  
225  		/*
226  		 * resume the system back to the original state
227  		 */
228  		rc = cpr_resume(sleeptype);
229  		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
230  		    rc))
231  	}
232  
233  	(void) cpr_default_setup(0);
234  
235  	return (rc);
236  }
237  
238  
239  #if defined(__sparc)
240  
241  /*
242   * check/disable or re-enable UFS logging
243   */
244  static void
245  cpr_log_status(int enable, int *svstat, vnode_t *vp)
246  {
247  	int cmd, status, error;
248  	char *str, *able;
249  	fiolog_t fl;
250  	refstr_t *mntpt;
251  
252  	str = "cpr_log_status";
253  	bzero(&fl, sizeof (fl));
254  	fl.error = FIOLOG_ENONE;
255  
256  	/*
257  	 * when disabling, first get and save logging status (0 or 1)
258  	 */
259  	if (enable == 0) {
260  		if (error = VOP_IOCTL(vp, _FIOISLOG,
261  		    (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
262  			mntpt = vfs_getmntpoint(vp->v_vfsp);
263  			prom_printf("%s: \"%s\", cant get logging "
264  			    "status, error %d\n", str, refstr_value(mntpt),
265  			    error);
266  			refstr_rele(mntpt);
267  			return;
268  		}
269  		*svstat = status;
270  		if (cpr_debug & CPR_DEBUG5) {
271  			mntpt = vfs_getmntpoint(vp->v_vfsp);
272  			errp("%s: \"%s\", logging status = %d\n",
273  			    str, refstr_value(mntpt), status);
274  			refstr_rele(mntpt);
275  		};
276  
277  		able = "disable";
278  		cmd = _FIOLOGDISABLE;
279  	} else {
280  		able = "enable";
281  		cmd = _FIOLOGENABLE;
282  	}
283  
284  	/*
285  	 * disable or re-enable logging when the saved status is 1
286  	 */
287  	if (*svstat == 1) {
288  		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
289  		    FKIOCTL, CRED(), NULL, NULL);
290  		if (error) {
291  			mntpt = vfs_getmntpoint(vp->v_vfsp);
292  			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
293  			    str, refstr_value(mntpt), able, error);
294  			refstr_rele(mntpt);
295  		} else {
296  			if (cpr_debug & CPR_DEBUG5) {
297  				mntpt = vfs_getmntpoint(vp->v_vfsp);
298  				errp("%s: \"%s\", logging is now %sd\n",
299  				    str, refstr_value(mntpt), able);
300  				refstr_rele(mntpt);
301  			};
302  		}
303  	}
304  
305  	/*
306  	 * when enabling logging, reset the saved status
307  	 * to unknown for next time
308  	 */
309  	if (enable)
310  		*svstat = -1;
311  }
312  
313  /*
314   * enable/disable UFS logging on filesystems containing cpr_default_path
315   * and cpr statefile.  since the statefile can be on any fs, that fs
316   * needs to be handled separately.  this routine and cprboot expect that
317   * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
318   * is loaded from the device with rootfs and uses the same device to open
319   * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
320   * file outside of rootfs would cause errors during cprboot, plus cpr and
321   * fsck problems with the new fs if logging were enabled.
322   */
323  
324  static int
325  cpr_ufs_logging(int enable)
326  {
327  	static int def_status = -1, sf_status = -1;
328  	struct vfs *vfsp;
329  	char *fname;
330  	vnode_t *vp;
331  	int error;
332  
333  	if (cpr_reusable_mode)
334  		return (0);
335  
336  	if (error = cpr_open_deffile(FREAD, &vp))
337  		return (error);
338  	vfsp = vp->v_vfsp;
339  	if (!cpr_is_ufs(vfsp)) {
340  		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
341  		VN_RELE(vp);
342  		return (0);
343  	}
344  
345  	cpr_log_status(enable, &def_status, vp);
346  	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
347  	VN_RELE(vp);
348  
349  	fname = cpr_build_statefile_path();
350  	if (fname == NULL)
351  		return (ENOENT);
352  	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
353  	    0600, &vp, CRCREAT, 0)) {
354  		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
355  		    "error %d\n", fname, error);
356  		return (error);
357  	}
358  
359  	/*
360  	 * check logging status for the statefile if it resides
361  	 * on a different fs and the type is a regular file
362  	 */
363  	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
364  		cpr_log_status(enable, &sf_status, vp);
365  	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
366  	VN_RELE(vp);
367  
368  	return (0);
369  }
370  #endif
371  
372  
373  /*
374   * Check if klmmod is loaded and call a lock manager service; if klmmod
375   * is not loaded, the services aren't needed and a call would trigger a
376   * modload, which would block since another thread would never run.
377   */
378  static void
379  cpr_lock_mgr(void (*service)(void))
380  {
381  	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
382  		(*service)();
383  }
384  
385  int
386  cpr_suspend_cpus(void)
387  {
388  	int	ret = 0;
389  	extern void *i_cpr_save_context(void *arg);
390  
391  	mutex_enter(&cpu_lock);
392  
393  	/*
394  	 * the machine could not have booted without a bootcpu
395  	 */
396  	ASSERT(i_cpr_bootcpu() != NULL);
397  
398  	/*
399  	 * bring all the offline cpus online
400  	 */
401  	if ((ret = cpr_all_online())) {
402  		mutex_exit(&cpu_lock);
403  		return (ret);
404  	}
405  
406  	/*
407  	 * Set the affinity to be the boot processor
408  	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
409  	 */
410  	affinity_set(i_cpr_bootcpuid());
411  
412  	ASSERT(CPU->cpu_id == 0);
413  
414  	PMD(PMD_SX, ("curthread running on bootcpu\n"))
415  
416  	/*
417  	 * pause all other running CPUs and save the CPU state at the sametime
418  	 */
419  	cpu_pause_func = i_cpr_save_context;
420  	pause_cpus(NULL);
421  
422  	mutex_exit(&cpu_lock);
423  
424  	return (0);
425  }
426  
427  /*
428   * Take the system down to a checkpointable state and write
429   * the state file, the following are sequentially executed:
430   *
431   *    - Request all user threads to stop themselves
432   *    - push out and invalidate user pages
433   *    - bring statefile inode incore to prevent a miss later
434   *    - request all daemons to stop
435   *    - check and make sure all threads are stopped
436   *    - sync the file system
437   *    - suspend all devices
438   *    - block intrpts
439   *    - dump system state and memory to state file
440   *    - SPARC code will not be called with CPR_TORAM, caller filters
441   */
442  static int
443  cpr_suspend(int sleeptype)
444  {
445  #if defined(__sparc)
446  	int sf_realloc, nverr;
447  #endif
448  	int	rc = 0;
449  	int	skt_rc = 0;
450  
451  	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
452  	cpr_set_substate(C_ST_SUSPEND_BEGIN);
453  
454  	cpr_suspend_init(sleeptype);
455  
456  	cpr_save_time();
457  
458  	cpr_tod_get(&wholecycle_tv);
459  	CPR_STAT_EVENT_START("Suspend Total");
460  
461  	i_cpr_alloc_cpus();
462  
463  #if defined(__sparc)
464  	ASSERT(sleeptype == CPR_TODISK);
465  	if (!cpr_reusable_mode) {
466  		/*
467  		 * We need to validate default file before fs
468  		 * functionality is disabled.
469  		 */
470  		if (rc = cpr_validate_definfo(0))
471  			return (rc);
472  	}
473  	i_cpr_save_machdep_info();
474  #endif
475  
476  	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
477  	/* Stop PM scans ASAP */
478  	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
479  
480  	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
481  	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
482  
483  #if defined(__sparc)
484  	ASSERT(sleeptype == CPR_TODISK);
485  	cpr_set_substate(C_ST_MP_OFFLINE);
486  	if (rc = cpr_mp_offline())
487  		return (rc);
488  #endif
489  	/*
490  	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
491  	 */
492  	mutex_enter(&srn_clone_lock);
493  	if (srn_signal) {
494  		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
495  		    "SRN_SUSPEND_REQ)\n"))
496  		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
497  		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
498  		srn_inuse = 0;
499  	} else {
500  		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
501  	}
502  	mutex_exit(&srn_clone_lock);
503  
504  	/*
505  	 * Ask the user threads to stop by themselves, but
506  	 * if they don't or can't after 3 retries, we give up on CPR.
507  	 * The 3 retry is not a random number because 2 is possible if
508  	 * a thread has been forked before the parent thread is stopped.
509  	 */
510  	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
511  	CPR_STAT_EVENT_START("  stop users");
512  	cpr_set_substate(C_ST_STOP_USER_THREADS);
513  	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
514  	if (rc = cpr_stop_user_threads())
515  		return (rc);
516  	CPR_STAT_EVENT_END("  stop users");
517  	CPR_DEBUG(CPR_DEBUG1, "done\n");
518  
519  	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
520  	pm_save_direct_levels();
521  
522  	/*
523  	 * User threads are stopped.  We will start communicating with the
524  	 * user via prom_printf (some debug output may have already happened)
525  	 * so let anybody who cares know about this (bug 4096122)
526  	 */
527  	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
528  
529  	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
530  #ifndef DEBUG
531  	cpr_send_notice();
532  	if (cpr_debug)
533  		prom_printf("\n");
534  #endif
535  
536  	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
537  	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
538  
539  	/*
540  	 * Reattach any drivers which originally exported the
541  	 * no-involuntary-power-cycles property.  We need to do this before
542  	 * stopping kernel threads because modload is implemented using
543  	 * a kernel thread.
544  	 */
545  	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
546  	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
547  	if (!pm_reattach_noinvol())
548  		return (ENXIO);
549  
550  #if defined(__sparc)
551  	ASSERT(sleeptype == CPR_TODISK);
552  	/*
553  	 * if ufs logging is enabled, we need to disable before
554  	 * stopping kernel threads so that ufs delete and roll
555  	 * threads can do the work.
556  	 */
557  	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
558  	if (rc = cpr_ufs_logging(0))
559  		return (rc);
560  
561  	/*
562  	 * Use sync_all to swap out all user pages and find out how much
563  	 * extra space needed for user pages that don't have back store
564  	 * space left.
565  	 */
566  	CPR_STAT_EVENT_START("  swapout upages");
567  	vfs_sync(SYNC_ALL);
568  	CPR_STAT_EVENT_END("  swapout upages");
569  
570  	cpr_set_bitmap_size();
571  
572  alloc_statefile:
573  	/*
574  	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
575  	 * realloc the statefile, otherwise this is the first attempt.
576  	 */
577  	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
578  
579  	CPR_STAT_EVENT_START("  alloc statefile");
580  	cpr_set_substate(C_ST_STATEF_ALLOC);
581  	if (rc = cpr_alloc_statefile(sf_realloc)) {
582  		if (sf_realloc)
583  			errp("realloc failed\n");
584  		return (rc);
585  	}
586  	CPR_STAT_EVENT_END("  alloc statefile");
587  
588  	/*
589  	 * Sync the filesystem to preserve its integrity.
590  	 *
591  	 * This sync is also used to flush out all B_DELWRI buffers
592  	 * (fs cache) which are mapped and neither dirty nor referenced
593  	 * before cpr_invalidate_pages destroys them.
594  	 * fsflush does similar thing.
595  	 */
596  	sync();
597  
598  	/*
599  	 * destroy all clean file mapped kernel pages
600  	 */
601  	CPR_STAT_EVENT_START("  clean pages");
602  	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
603  	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
604  	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
605  	CPR_STAT_EVENT_END("  clean pages");
606  #endif
607  
608  
609  	/*
610  	 * Hooks needed by lock manager prior to suspending.
611  	 * Refer to code for more comments.
612  	 */
613  	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
614  	cpr_lock_mgr(lm_cprsuspend);
615  
616  	/*
617  	 * Now suspend all the devices
618  	 */
619  	CPR_STAT_EVENT_START("  stop drivers");
620  	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
621  	cpr_set_substate(C_ST_SUSPEND_DEVICES);
622  	pm_powering_down = 1;
623  	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
624  	rc = cpr_suspend_devices(ddi_root_node());
625  	pm_powering_down = 0;
626  	if (rc)
627  		return (rc);
628  	CPR_DEBUG(CPR_DEBUG1, "done\n");
629  	CPR_STAT_EVENT_END("  stop drivers");
630  
631  	/*
632  	 * Stop all daemon activities
633  	 */
634  	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
635  	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
636  	if (skt_rc = cpr_stop_kernel_threads())
637  		return (skt_rc);
638  
639  	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
640  	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
641  
642  	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
643  	pm_reattach_noinvol_fini();
644  
645  	cpr_sae(1);
646  
647  	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
648  	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
649  
650  	if (sleeptype == CPR_TODISK) {
651  		/*
652  		 * It's safer to do tod_get before we disable all intr.
653  		 */
654  		CPR_STAT_EVENT_START("  write statefile");
655  	}
656  
657  	/*
658  	 * it's time to ignore the outside world, stop the real time
659  	 * clock and disable any further intrpt activity.
660  	 */
661  	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
662  	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
663  
664  	mutex_enter(&cpu_lock);
665  	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
666  	cyclic_suspend();
667  	mutex_exit(&cpu_lock);
668  
669  	/*
670  	 * Due to the different methods of resuming the system between
671  	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
672  	 * and CPR_TORAM (restart via reset into existing kernel image)
673  	 * cpus are not suspended and restored in the SPARC case, since it
674  	 * is necessary to restart the cpus and pause them before restoring
675  	 * the OBP image
676  	 */
677  
678  #if defined(__x86)
679  
680  	/* pause aux cpus */
681  	PMD(PMD_SX, ("pause aux cpus\n"))
682  
683  	cpr_set_substate(C_ST_MP_PAUSED);
684  
685  	if ((rc = cpr_suspend_cpus()) != 0)
686  		return (rc);
687  #endif
688  
689  	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
690  	i_cpr_stop_intr();
691  	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
692  
693  	/*
694  	 * Since we will now disable the mechanism that causes prom_printfs
695  	 * to power up (if needed) the console fb/monitor, we assert that
696  	 * it must be up now.
697  	 */
698  	ASSERT(pm_cfb_is_up());
699  	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
700  	prom_suspend_prepost();
701  
702  #if defined(__sparc)
703  	/*
704  	 * getting ready to write ourself out, flush the register
705  	 * windows to make sure that our stack is good when we
706  	 * come back on the resume side.
707  	 */
708  	flush_windows();
709  #endif
710  
711  	/*
712  	 * For S3, we're done
713  	 */
714  	if (sleeptype == CPR_TORAM) {
715  		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
716  		cpr_set_substate(C_ST_NODUMP);
717  		return (rc);
718  	}
719  #if defined(__sparc)
720  	/*
721  	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
722  	 *
723  	 * The system is quiesced at this point, we are ready to either dump
724  	 * to the state file for a extended sleep or a simple shutdown for
725  	 * systems with non-volatile memory.
726  	 */
727  
728  	/*
729  	 * special handling for reusable:
730  	 */
731  	if (cpr_reusable_mode) {
732  		cpr_set_substate(C_ST_SETPROPS_1);
733  		if (nverr = cpr_set_properties(1))
734  			return (nverr);
735  	}
736  
737  	cpr_set_substate(C_ST_DUMP);
738  	rc = cpr_dump(C_VP);
739  
740  	/*
741  	 * if any error occurred during dump, more
742  	 * special handling for reusable:
743  	 */
744  	if (rc && cpr_reusable_mode) {
745  		cpr_set_substate(C_ST_SETPROPS_0);
746  		if (nverr = cpr_set_properties(0))
747  			return (nverr);
748  	}
749  
750  	if (rc == ENOSPC) {
751  		cpr_set_substate(C_ST_DUMP_NOSPC);
752  		(void) cpr_resume(sleeptype);
753  		goto alloc_statefile;
754  	} else if (rc == 0) {
755  		if (cpr_reusable_mode) {
756  			cpr_set_substate(C_ST_REUSABLE);
757  			longjmp(&ttolwp(curthread)->lwp_qsav);
758  		} else
759  			rc = cpr_set_properties(1);
760  	}
761  #endif
762  	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
763  	return (rc);
764  }
765  
766  void
767  cpr_resume_cpus(void)
768  {
769  	/*
770  	 * this is a cut down version of start_other_cpus()
771  	 * just do the initialization to wake the other cpus
772  	 */
773  
774  #if defined(__x86)
775  	/*
776  	 * Initialize our syscall handlers
777  	 */
778  	init_cpu_syscall(CPU);
779  
780  #endif
781  
782  	i_cpr_pre_resume_cpus();
783  
784  	/*
785  	 * Restart the paused cpus
786  	 */
787  	mutex_enter(&cpu_lock);
788  	start_cpus();
789  	mutex_exit(&cpu_lock);
790  
791  	i_cpr_post_resume_cpus();
792  
793  	mutex_enter(&cpu_lock);
794  	/*
795  	 * Restore this cpu to use the regular cpu_pause(), so that
796  	 * online and offline will work correctly
797  	 */
798  	cpu_pause_func = NULL;
799  
800  	/*
801  	 * clear the affinity set in cpr_suspend_cpus()
802  	 */
803  	affinity_clear();
804  
805  	/*
806  	 * offline all the cpus that were brought online during suspend
807  	 */
808  	cpr_restore_offline();
809  
810  	mutex_exit(&cpu_lock);
811  }
812  
813  void
814  cpr_unpause_cpus(void)
815  {
816  	/*
817  	 * Now restore the system back to what it was before we suspended
818  	 */
819  
820  	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
821  
822  	mutex_enter(&cpu_lock);
823  
824  	/*
825  	 * Restore this cpu to use the regular cpu_pause(), so that
826  	 * online and offline will work correctly
827  	 */
828  	cpu_pause_func = NULL;
829  
830  	/*
831  	 * Restart the paused cpus
832  	 */
833  	start_cpus();
834  
835  	/*
836  	 * clear the affinity set in cpr_suspend_cpus()
837  	 */
838  	affinity_clear();
839  
840  	/*
841  	 * offline all the cpus that were brought online during suspend
842  	 */
843  	cpr_restore_offline();
844  
845  	mutex_exit(&cpu_lock);
846  }
847  
848  /*
849   * Bring the system back up from a checkpoint, at this point
850   * the VM has been minimally restored by boot, the following
851   * are executed sequentially:
852   *
853   *    - machdep setup and enable interrupts (mp startup if it's mp)
854   *    - resume all devices
855   *    - restart daemons
856   *    - put all threads back on run queue
857   */
858  static int
859  cpr_resume(int sleeptype)
860  {
861  	cpr_time_t pwron_tv, *ctp;
862  	char *str;
863  	int rc = 0;
864  
865  	/*
866  	 * The following switch is used to resume the system
867  	 * that was suspended to a different level.
868  	 */
869  	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
870  	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
871  
872  	/*
873  	 * Note:
874  	 *
875  	 * The rollback labels rb_xyz do not represent the cpr resume
876  	 * state when event 'xyz' has happened. Instead they represent
877  	 * the state during cpr suspend when event 'xyz' was being
878  	 * entered (and where cpr suspend failed). The actual call that
879  	 * failed may also need to be partially rolled back, since they
880  	 * aren't atomic in most cases.  In other words, rb_xyz means
881  	 * "roll back all cpr suspend events that happened before 'xyz',
882  	 * and the one that caused the failure, if necessary."
883  	 */
884  	switch (CPR->c_substate) {
885  #if defined(__sparc)
886  	case C_ST_DUMP:
887  		/*
888  		 * This is most likely a full-fledged cpr_resume after
889  		 * a complete and successful cpr suspend. Just roll back
890  		 * everything.
891  		 */
892  		ASSERT(sleeptype == CPR_TODISK);
893  		break;
894  
895  	case C_ST_REUSABLE:
896  	case C_ST_DUMP_NOSPC:
897  	case C_ST_SETPROPS_0:
898  	case C_ST_SETPROPS_1:
899  		/*
900  		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
901  		 * special switch cases here. The other two do not have
902  		 * any state change during cpr_suspend() that needs to
903  		 * be rolled back. But these are exit points from
904  		 * cpr_suspend, so theoretically (or in the future), it
905  		 * is possible that a need for roll back of a state
906  		 * change arises between these exit points.
907  		 */
908  		ASSERT(sleeptype == CPR_TODISK);
909  		goto rb_dump;
910  #endif
911  
912  	case C_ST_NODUMP:
913  		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
914  		goto rb_nodump;
915  
916  	case C_ST_STOP_KERNEL_THREADS:
917  		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
918  		goto rb_stop_kernel_threads;
919  
920  	case C_ST_SUSPEND_DEVICES:
921  		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
922  		goto rb_suspend_devices;
923  
924  #if defined(__sparc)
925  	case C_ST_STATEF_ALLOC:
926  		ASSERT(sleeptype == CPR_TODISK);
927  		goto rb_statef_alloc;
928  
929  	case C_ST_DISABLE_UFS_LOGGING:
930  		ASSERT(sleeptype == CPR_TODISK);
931  		goto rb_disable_ufs_logging;
932  #endif
933  
934  	case C_ST_PM_REATTACH_NOINVOL:
935  		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
936  		goto rb_pm_reattach_noinvol;
937  
938  	case C_ST_STOP_USER_THREADS:
939  		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
940  		goto rb_stop_user_threads;
941  
942  #if defined(__sparc)
943  	case C_ST_MP_OFFLINE:
944  		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
945  		goto rb_mp_offline;
946  #endif
947  
948  #if defined(__x86)
949  	case C_ST_MP_PAUSED:
950  		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
951  		goto rb_mp_paused;
952  #endif
953  
954  
955  	default:
956  		PMD(PMD_SX, ("cpr_resume: others\n"))
957  		goto rb_others;
958  	}
959  
960  rb_all:
961  	/*
962  	 * perform platform-dependent initialization
963  	 */
964  	if (cpr_suspend_succeeded)
965  		i_cpr_machdep_setup();
966  
967  	/*
968  	 * system did not really go down if we jump here
969  	 */
970  rb_dump:
971  	/*
972  	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
973  	 *
974  	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
975  	 */
976  rb_nodump:
977  	/*
978  	 * If we did suspend to RAM, we didn't generate a dump
979  	 */
980  	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
981  	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
982  	if (cpr_suspend_succeeded) {
983  		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
984  		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
985  	}
986  
987  	prom_resume_prepost();
988  #if !defined(__sparc)
989  	/*
990  	 * Need to sync the software clock with the hardware clock.
991  	 * On Sparc, this occurs in the sparc-specific cbe.  However
992  	 * on x86 this needs to be handled _before_ we bring other cpu's
993  	 * back online.  So we call a resume function in timestamp.c
994  	 */
995  	if (tsc_resume_in_cyclic == 0)
996  		tsc_resume();
997  
998  #endif
999  
1000  #if defined(__sparc)
1001  	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
1002  		kdi_dvec_cpr_restart();
1003  #endif
1004  
1005  
1006  #if defined(__x86)
1007  rb_mp_paused:
1008  	PT(PT_RMPO);
1009  	PMD(PMD_SX, ("resume aux cpus\n"))
1010  
1011  	if (cpr_suspend_succeeded) {
1012  		cpr_resume_cpus();
1013  	} else {
1014  		cpr_unpause_cpus();
1015  	}
1016  #endif
1017  
1018  	/*
1019  	 * let the tmp callout catch up.
1020  	 */
1021  	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
1022  	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
1023  
1024  	i_cpr_enable_intr();
1025  
1026  	mutex_enter(&cpu_lock);
1027  	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
1028  	cyclic_resume();
1029  	mutex_exit(&cpu_lock);
1030  
1031  	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
1032  	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
1033  
1034  	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
1035  	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
1036  
1037  	/*
1038  	 * statistics gathering
1039  	 */
1040  	if (cpr_suspend_succeeded) {
1041  		/*
1042  		 * Prevent false alarm in tod_validate() due to tod
1043  		 * value change between suspend and resume
1044  		 */
1045  		cpr_tod_status_set(TOD_CPR_RESUME_DONE);
1046  
1047  		cpr_convert_promtime(&pwron_tv);
1048  
1049  		ctp = &cpr_term.tm_shutdown;
1050  		if (sleeptype == CPR_TODISK)
1051  			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
1052  		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
1053  
1054  		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
1055  
1056  		str = "  prom time";
1057  		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
1058  		ctp = &cpr_term.tm_cprboot_start;
1059  		CPR_STAT_EVENT_END_TMZ(str, ctp);
1060  
1061  		str = "  read statefile";
1062  		CPR_STAT_EVENT_START_TMZ(str, ctp);
1063  		ctp = &cpr_term.tm_cprboot_end;
1064  		CPR_STAT_EVENT_END_TMZ(str, ctp);
1065  	}
1066  
1067  rb_stop_kernel_threads:
1068  	/*
1069  	 * Put all threads back to where they belong; get the kernel
1070  	 * daemons straightened up too. Note that the callback table
1071  	 * locked during cpr_stop_kernel_threads() is released only
1072  	 * in cpr_start_kernel_threads(). Ensure modunloading is
1073  	 * disabled before starting kernel threads, we don't want
1074  	 * modunload thread to start changing device tree underneath.
1075  	 */
1076  	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
1077  	modunload_disable();
1078  	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
1079  	cpr_start_kernel_threads();
1080  
1081  rb_suspend_devices:
1082  	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
1083  	CPR_STAT_EVENT_START("  start drivers");
1084  
1085  	PMD(PMD_SX,
1086  	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1087  	    cpr_resume_uniproc))
1088  
1089  #if defined(__x86)
1090  	/*
1091  	 * If cpr_resume_uniproc is set, then pause all the other cpus
1092  	 * apart from the current cpu, so that broken drivers that think
1093  	 * that they are on a uniprocessor machine will resume
1094  	 */
1095  	if (cpr_resume_uniproc) {
1096  		mutex_enter(&cpu_lock);
1097  		pause_cpus(NULL);
1098  		mutex_exit(&cpu_lock);
1099  	}
1100  #endif
1101  
1102  	/*
1103  	 * The policy here is to continue resume everything we can if we did
1104  	 * not successfully finish suspend; and panic if we are coming back
1105  	 * from a fully suspended system.
1106  	 */
1107  	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
1108  	rc = cpr_resume_devices(ddi_root_node(), 0);
1109  
1110  	cpr_sae(0);
1111  
1112  	str = "Failed to resume one or more devices.";
1113  
1114  	if (rc) {
1115  		if (CPR->c_substate == C_ST_DUMP ||
1116  		    (sleeptype == CPR_TORAM &&
1117  		    CPR->c_substate == C_ST_NODUMP)) {
1118  			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1119  				PMD(PMD_SX, ("cpr_resume: resume device "
1120  				    "warn\n"))
1121  				cpr_err(CE_WARN, str);
1122  			} else {
1123  				PMD(PMD_SX, ("cpr_resume: resume device "
1124  				    "panic\n"))
1125  				cpr_err(CE_PANIC, str);
1126  			}
1127  		} else {
1128  			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1129  			cpr_err(CE_WARN, str);
1130  		}
1131  	}
1132  
1133  	CPR_STAT_EVENT_END("  start drivers");
1134  	CPR_DEBUG(CPR_DEBUG1, "done\n");
1135  
1136  #if defined(__x86)
1137  	/*
1138  	 * If cpr_resume_uniproc is set, then unpause all the processors
1139  	 * that were paused before resuming the drivers
1140  	 */
1141  	if (cpr_resume_uniproc) {
1142  		mutex_enter(&cpu_lock);
1143  		start_cpus();
1144  		mutex_exit(&cpu_lock);
1145  	}
1146  #endif
1147  
1148  	/*
1149  	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
1150  	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
1151  	 * modunloading now.
1152  	 */
1153  	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1154  		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
1155  		modunload_enable();
1156  	}
1157  
1158  	/*
1159  	 * Hooks needed by lock manager prior to resuming.
1160  	 * Refer to code for more comments.
1161  	 */
1162  	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
1163  	cpr_lock_mgr(lm_cprresume);
1164  
1165  #if defined(__sparc)
1166  	/*
1167  	 * This is a partial (half) resume during cpr suspend, we
1168  	 * haven't yet given up on the suspend. On return from here,
1169  	 * cpr_suspend() will try to reallocate and retry the suspend.
1170  	 */
1171  	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
1172  		return (0);
1173  	}
1174  
1175  	if (sleeptype == CPR_TODISK) {
1176  rb_statef_alloc:
1177  		cpr_statef_close();
1178  
1179  rb_disable_ufs_logging:
1180  		/*
1181  		 * if ufs logging was disabled, re-enable
1182  		 */
1183  		(void) cpr_ufs_logging(1);
1184  	}
1185  #endif
1186  
1187  rb_pm_reattach_noinvol:
1188  	/*
1189  	 * When pm_reattach_noinvol() succeeds, modunload_thread will
1190  	 * remain disabled until after cpr suspend passes the
1191  	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
1192  	 * cpr suspend reaches this state, we'll need to enable modunload
1193  	 * thread during rollback.
1194  	 */
1195  	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
1196  	    CPR->c_substate == C_ST_STATEF_ALLOC ||
1197  	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
1198  	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1199  		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
1200  		pm_reattach_noinvol_fini();
1201  	}
1202  
1203  	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
1204  	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1205  	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
1206  	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
1207  
1208  	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
1209  	pm_restore_direct_levels();
1210  
1211  rb_stop_user_threads:
1212  	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1213  	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
1214  	cpr_start_user_threads();
1215  	CPR_DEBUG(CPR_DEBUG1, "done\n");
1216  	/*
1217  	 * Ask Xorg to resume the frame buffer, and wait for it to happen
1218  	 */
1219  	mutex_enter(&srn_clone_lock);
1220  	if (srn_signal) {
1221  		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1222  		    "SRN_NORMAL_RESUME)\n"))
1223  		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
1224  		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1225  		srn_inuse = 0;
1226  	} else {
1227  		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1228  	}
1229  	mutex_exit(&srn_clone_lock);
1230  
1231  #if defined(__sparc)
1232  rb_mp_offline:
1233  	if (cpr_mp_online())
1234  		cpr_err(CE_WARN, "Failed to online all the processors.");
1235  #endif
1236  
1237  rb_others:
1238  	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1239  	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1240  	    PM_DEP_WAIT, NULL, 0);
1241  
1242  	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
1243  	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
1244  
1245  	if (cpr_suspend_succeeded) {
1246  		cpr_stat_record_events();
1247  	}
1248  
1249  #if defined(__sparc)
1250  	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
1251  		cpr_clear_definfo();
1252  #endif
1253  
1254  	i_cpr_free_cpus();
1255  	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1256  	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
1257  	cpr_signal_user(SIGTHAW);
1258  	CPR_DEBUG(CPR_DEBUG1, "done\n");
1259  
1260  	CPR_STAT_EVENT_END("Resume Total");
1261  
1262  	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
1263  	CPR_STAT_EVENT_END("WHOLE CYCLE");
1264  
1265  	if (cpr_debug & CPR_DEBUG1)
1266  		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
1267  
1268  	CPR_STAT_EVENT_START("POST CPR DELAY");
1269  
1270  #ifdef CPR_STAT
1271  	ctp = &cpr_term.tm_shutdown;
1272  	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
1273  	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
1274  
1275  	CPR_STAT_EVENT_PRINT();
1276  #endif /* CPR_STAT */
1277  
1278  	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
1279  	return (rc);
1280  }
1281  
1282  static void
1283  cpr_suspend_init(int sleeptype)
1284  {
1285  	cpr_time_t *ctp;
1286  
1287  	cpr_stat_init();
1288  
1289  	/*
1290  	 * If cpr_suspend() failed before cpr_dump() gets a chance
1291  	 * to reinitialize the terminator of the statefile,
1292  	 * the values of the old terminator will still linger around.
1293  	 * Since the terminator contains information that we need to
1294  	 * decide whether suspend succeeded or not, we need to
1295  	 * reinitialize it as early as possible.
1296  	 */
1297  	cpr_term.real_statef_size = 0;
1298  	ctp = &cpr_term.tm_shutdown;
1299  	bzero(ctp, sizeof (*ctp));
1300  	ctp = &cpr_term.tm_cprboot_start;
1301  	bzero(ctp, sizeof (*ctp));
1302  	ctp = &cpr_term.tm_cprboot_end;
1303  	bzero(ctp, sizeof (*ctp));
1304  
1305  	if (sleeptype == CPR_TODISK) {
1306  		/*
1307  		 * Lookup the physical address of our thread structure.
1308  		 * This should never be invalid and the entire thread structure
1309  		 * is expected to reside within the same pfn.
1310  		 */
1311  		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1312  		ASSERT(curthreadpfn != PFN_INVALID);
1313  		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1314  		    (caddr_t)curthread + sizeof (kthread_t) - 1));
1315  	}
1316  
1317  	cpr_suspend_succeeded = 0;
1318  }
1319  
1320  /*
1321   * bring all the offline cpus online
1322   */
1323  static int
1324  cpr_all_online(void)
1325  {
1326  	int	rc = 0;
1327  
1328  #ifdef	__sparc
1329  	/*
1330  	 * do nothing
1331  	 */
1332  #else
1333  
1334  	cpu_t	*cp;
1335  
1336  	ASSERT(MUTEX_HELD(&cpu_lock));
1337  
1338  	cp = cpu_list;
1339  	do {
1340  		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1341  		if (!CPU_ACTIVE(cp)) {
1342  			if ((rc = cpu_online(cp)) != 0)
1343  				break;
1344  			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1345  		}
1346  	} while ((cp = cp->cpu_next) != cpu_list);
1347  
1348  	if (rc) {
1349  		/*
1350  		 * an online operation failed so offline the cpus
1351  		 * that were onlined above to restore the system
1352  		 * to its original state
1353  		 */
1354  		cpr_restore_offline();
1355  	}
1356  #endif
1357  	return (rc);
1358  }
1359  
1360  /*
1361   * offline all the cpus that were brought online by cpr_all_online()
1362   */
1363  static void
1364  cpr_restore_offline(void)
1365  {
1366  
1367  #ifdef	__sparc
1368  	/*
1369  	 * do nothing
1370  	 */
1371  #else
1372  
1373  	cpu_t	*cp;
1374  	int	rc = 0;
1375  
1376  	ASSERT(MUTEX_HELD(&cpu_lock));
1377  
1378  	cp = cpu_list;
1379  	do {
1380  		if (CPU_CPR_IS_ONLINE(cp)) {
1381  			rc =  cpu_offline(cp, 0);
1382  			/*
1383  			 * this offline should work, since the cpu was
1384  			 * offline originally and was successfully onlined
1385  			 * by cpr_all_online()
1386  			 */
1387  			ASSERT(rc == 0);
1388  			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1389  		}
1390  	} while ((cp = cp->cpu_next) != cpu_list);
1391  
1392  #endif
1393  
1394  }
1395