xref: /illumos-gate/usr/src/uts/common/cpr/cpr_main.c (revision 33efde4275d24731ef87927237b0ffb0630b6b2d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright 2019 Joyent, Inc.
28  */
29 
30 /*
31  * This module contains the guts of checkpoint-resume mechanism.
32  * All code in this module is platform independent.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/errno.h>
37 #include <sys/callb.h>
38 #include <sys/processor.h>
39 #include <sys/machsystm.h>
40 #include <sys/clock.h>
41 #include <sys/vfs.h>
42 #include <sys/kmem.h>
43 #include <nfs/lm.h>
44 #include <sys/systm.h>
45 #include <sys/cpr.h>
46 #include <sys/bootconf.h>
47 #include <sys/cyclic.h>
48 #include <sys/filio.h>
49 #include <sys/fs/ufs_filio.h>
50 #include <sys/epm.h>
51 #include <sys/modctl.h>
52 #include <sys/reboot.h>
53 #include <sys/kdi.h>
54 #include <sys/promif.h>
55 #include <sys/srn.h>
56 #include <sys/cpr_impl.h>
57 
58 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
59 
60 extern struct cpr_terminator cpr_term;
61 
62 extern int cpr_alloc_statefile(int);
63 extern void cpr_start_kernel_threads(void);
64 extern void cpr_abbreviate_devpath(char *, char *);
65 extern void cpr_convert_promtime(cpr_time_t *);
66 extern void cpr_send_notice(void);
67 extern void cpr_set_bitmap_size(void);
68 extern void cpr_stat_init();
69 extern void cpr_statef_close(void);
70 extern void flush_windows(void);
71 extern void (*srn_signal)(int, int);
72 extern void init_cpu_syscall(struct cpu *);
73 extern void i_cpr_pre_resume_cpus();
74 extern void i_cpr_post_resume_cpus();
75 extern int cpr_is_ufs(struct vfs *);
76 
77 extern int pm_powering_down;
78 extern kmutex_t srn_clone_lock;
79 extern int srn_inuse;
80 
81 static int cpr_suspend(int);
82 static int cpr_resume(int);
83 static void cpr_suspend_init(int);
84 #if defined(__x86)
85 static int cpr_suspend_cpus(void);
86 static void cpr_resume_cpus(void);
87 #endif
88 static int cpr_all_online(void);
89 static void cpr_restore_offline(void);
90 
91 cpr_time_t wholecycle_tv;
92 int cpr_suspend_succeeded;
93 pfn_t curthreadpfn;
94 int curthreadremapped;
95 
96 extern cpuset_t cpu_ready_set;
97 
98 extern processorid_t i_cpr_bootcpuid(void);
99 extern cpu_t *i_cpr_bootcpu(void);
100 extern void tsc_adjust_delta(hrtime_t tdelta);
101 extern void tsc_resume(void);
102 extern int tsc_resume_in_cyclic;
103 
104 /*
105  * Set this variable to 1, to have device drivers resume in an
106  * uniprocessor environment. This is to allow drivers that assume
107  * that they resume on a UP machine to continue to work. Should be
108  * deprecated once the broken drivers are fixed
109  */
110 int cpr_resume_uniproc = 0;
111 
112 /*
113  * save or restore abort_enable;  this prevents a drop
114  * to kadb or prom during cpr_resume_devices() when
115  * there is no kbd present;  see abort_sequence_enter()
116  */
117 static void
cpr_sae(int stash)118 cpr_sae(int stash)
119 {
120 	static int saved_ae = -1;
121 
122 	if (stash) {
123 		saved_ae = abort_enable;
124 		abort_enable = 0;
125 	} else if (saved_ae != -1) {
126 		abort_enable = saved_ae;
127 		saved_ae = -1;
128 	}
129 }
130 
131 
132 /*
133  * The main switching point for cpr, this routine starts the ckpt
134  * and state file saving routines; on resume the control is
135  * returned back to here and it then calls the resume routine.
136  */
137 int
cpr_main(int sleeptype)138 cpr_main(int sleeptype)
139 {
140 	int rc, rc2;
141 	label_t saveq;
142 	klwp_t *tlwp = ttolwp(curthread);
143 
144 	if (sleeptype == CPR_TODISK) {
145 		if ((rc = cpr_default_setup(1)) != 0)
146 			return (rc);
147 		ASSERT(tlwp);
148 		saveq = tlwp->lwp_qsav;
149 	}
150 
151 	if (sleeptype == CPR_TORAM) {
152 		rc = cpr_suspend(sleeptype);
153 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
154 		if (rc == 0) {
155 			int i_cpr_power_down(int sleeptype);
156 
157 			/*
158 			 * From this point on, we should be at a high
159 			 * spl, interrupts disabled, and all but one
160 			 * cpu's paused (effectively UP/single threaded).
161 			 * So this is were we want to put ASSERTS()
162 			 * to let us know otherwise.
163 			 */
164 			ASSERT(cpus_paused());
165 
166 			/*
167 			 * Now do the work of actually putting this
168 			 * machine to sleep!
169 			 */
170 			rc = i_cpr_power_down(sleeptype);
171 			if (rc == 0) {
172 				PMD(PMD_SX, ("back from successful suspend\n"))
173 			}
174 			/*
175 			 * We do care about the return value from cpr_resume
176 			 * at this point, as it will tell us if one of the
177 			 * resume functions failed (cpr_resume_devices())
178 			 * However, for this to return and _not_ panic, means
179 			 * that we must be in one of the test functions.  So
180 			 * check for that and return an appropriate message.
181 			 */
182 			rc2 = cpr_resume(sleeptype);
183 			if (rc2 != 0) {
184 				ASSERT(cpr_test_point > 0);
185 				cmn_err(CE_NOTE,
186 				    "cpr_resume returned non-zero: %d\n", rc2);
187 				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
188 			}
189 			ASSERT(!cpus_paused());
190 		} else {
191 			PMD(PMD_SX, ("failed suspend, resuming\n"))
192 			rc = cpr_resume(sleeptype);
193 		}
194 		return (rc);
195 	}
196 	/*
197 	 * Remember where we are for resume after reboot
198 	 */
199 	if (!setjmp(&tlwp->lwp_qsav)) {
200 		/*
201 		 * try to checkpoint the system, if failed return back
202 		 * to userland, otherwise power off.
203 		 */
204 		rc = cpr_suspend(sleeptype);
205 		if (rc || cpr_reusable_mode) {
206 			/*
207 			 * We don't really want to go down, or
208 			 * something went wrong in suspend, do what we can
209 			 * to put the system back to an operable state then
210 			 * return back to userland.
211 			 */
212 			PMD(PMD_SX, ("failed suspend, resuming\n"))
213 			(void) cpr_resume(sleeptype);
214 			PMD(PMD_SX, ("back from failed suspend resume\n"))
215 		}
216 	} else {
217 		/*
218 		 * This is the resumed side of longjmp, restore the previous
219 		 * longjmp pointer if there is one so this will be transparent
220 		 * to the world.
221 		 * This path is only for CPR_TODISK, where we reboot
222 		 */
223 		ASSERT(sleeptype == CPR_TODISK);
224 		tlwp->lwp_qsav = saveq;
225 		CPR->c_flags &= ~C_SUSPENDING;
226 		CPR->c_flags |= C_RESUMING;
227 
228 		/*
229 		 * resume the system back to the original state
230 		 */
231 		rc = cpr_resume(sleeptype);
232 		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
233 		    rc))
234 	}
235 
236 	(void) cpr_default_setup(0);
237 
238 	return (rc);
239 }
240 
241 
242 #if defined(__sparc)
243 
244 /*
245  * check/disable or re-enable UFS logging
246  */
247 static void
cpr_log_status(int enable,int * svstat,vnode_t * vp)248 cpr_log_status(int enable, int *svstat, vnode_t *vp)
249 {
250 	int cmd, status, error;
251 	char *str, *able;
252 	fiolog_t fl;
253 	refstr_t *mntpt;
254 
255 	str = "cpr_log_status";
256 	bzero(&fl, sizeof (fl));
257 	fl.error = FIOLOG_ENONE;
258 
259 	/*
260 	 * when disabling, first get and save logging status (0 or 1)
261 	 */
262 	if (enable == 0) {
263 		if (error = VOP_IOCTL(vp, _FIOISLOG,
264 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
265 			mntpt = vfs_getmntpoint(vp->v_vfsp);
266 			prom_printf("%s: \"%s\", cant get logging "
267 			    "status, error %d\n", str, refstr_value(mntpt),
268 			    error);
269 			refstr_rele(mntpt);
270 			return;
271 		}
272 		*svstat = status;
273 		if (cpr_debug & CPR_DEBUG5) {
274 			mntpt = vfs_getmntpoint(vp->v_vfsp);
275 			errp("%s: \"%s\", logging status = %d\n",
276 			    str, refstr_value(mntpt), status);
277 			refstr_rele(mntpt);
278 		};
279 
280 		able = "disable";
281 		cmd = _FIOLOGDISABLE;
282 	} else {
283 		able = "enable";
284 		cmd = _FIOLOGENABLE;
285 	}
286 
287 	/*
288 	 * disable or re-enable logging when the saved status is 1
289 	 */
290 	if (*svstat == 1) {
291 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
292 		    FKIOCTL, CRED(), NULL, NULL);
293 		if (error) {
294 			mntpt = vfs_getmntpoint(vp->v_vfsp);
295 			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
296 			    str, refstr_value(mntpt), able, error);
297 			refstr_rele(mntpt);
298 		} else {
299 			if (cpr_debug & CPR_DEBUG5) {
300 				mntpt = vfs_getmntpoint(vp->v_vfsp);
301 				errp("%s: \"%s\", logging is now %sd\n",
302 				    str, refstr_value(mntpt), able);
303 				refstr_rele(mntpt);
304 			};
305 		}
306 	}
307 
308 	/*
309 	 * when enabling logging, reset the saved status
310 	 * to unknown for next time
311 	 */
312 	if (enable)
313 		*svstat = -1;
314 }
315 
316 /*
317  * enable/disable UFS logging on filesystems containing cpr_default_path
318  * and cpr statefile.  since the statefile can be on any fs, that fs
319  * needs to be handled separately.  this routine and cprboot expect that
320  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
321  * is loaded from the device with rootfs and uses the same device to open
322  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
323  * file outside of rootfs would cause errors during cprboot, plus cpr and
324  * fsck problems with the new fs if logging were enabled.
325  */
326 
327 static int
cpr_ufs_logging(int enable)328 cpr_ufs_logging(int enable)
329 {
330 	static int def_status = -1, sf_status = -1;
331 	struct vfs *vfsp;
332 	char *fname;
333 	vnode_t *vp;
334 	int error;
335 
336 	if (cpr_reusable_mode)
337 		return (0);
338 
339 	if (error = cpr_open_deffile(FREAD, &vp))
340 		return (error);
341 	vfsp = vp->v_vfsp;
342 	if (!cpr_is_ufs(vfsp)) {
343 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
344 		VN_RELE(vp);
345 		return (0);
346 	}
347 
348 	cpr_log_status(enable, &def_status, vp);
349 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
350 	VN_RELE(vp);
351 
352 	fname = cpr_build_statefile_path();
353 	if (fname == NULL)
354 		return (ENOENT);
355 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
356 	    0600, &vp, CRCREAT, 0)) {
357 		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
358 		    "error %d\n", fname, error);
359 		return (error);
360 	}
361 
362 	/*
363 	 * check logging status for the statefile if it resides
364 	 * on a different fs and the type is a regular file
365 	 */
366 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
367 		cpr_log_status(enable, &sf_status, vp);
368 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
369 	VN_RELE(vp);
370 
371 	return (0);
372 }
373 #endif
374 
375 
376 /*
377  * Check if klmmod is loaded and call a lock manager service; if klmmod
378  * is not loaded, the services aren't needed and a call would trigger a
379  * modload, which would block since another thread would never run.
380  */
381 static void
cpr_lock_mgr(void (* service)(void))382 cpr_lock_mgr(void (*service)(void))
383 {
384 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
385 		(*service)();
386 }
387 
388 int
cpr_suspend_cpus(void)389 cpr_suspend_cpus(void)
390 {
391 	int	ret = 0;
392 	extern void *i_cpr_save_context(void *arg);
393 
394 	mutex_enter(&cpu_lock);
395 
396 	/*
397 	 * the machine could not have booted without a bootcpu
398 	 */
399 	ASSERT(i_cpr_bootcpu() != NULL);
400 
401 	/*
402 	 * bring all the offline cpus online
403 	 */
404 	if ((ret = cpr_all_online())) {
405 		mutex_exit(&cpu_lock);
406 		return (ret);
407 	}
408 
409 	/*
410 	 * Set the affinity to be the boot processor
411 	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
412 	 */
413 	affinity_set(i_cpr_bootcpuid());
414 
415 	ASSERT(CPU->cpu_id == 0);
416 
417 	PMD(PMD_SX, ("curthread running on bootcpu\n"))
418 
419 	/*
420 	 * pause all other running CPUs and save the CPU state at the sametime
421 	 */
422 	pause_cpus(NULL, i_cpr_save_context);
423 
424 	mutex_exit(&cpu_lock);
425 
426 	return (0);
427 }
428 
429 /*
430  * Take the system down to a checkpointable state and write
431  * the state file, the following are sequentially executed:
432  *
433  *    - Request all user threads to stop themselves
434  *    - push out and invalidate user pages
435  *    - bring statefile inode incore to prevent a miss later
436  *    - request all daemons to stop
437  *    - check and make sure all threads are stopped
438  *    - sync the file system
439  *    - suspend all devices
440  *    - block intrpts
441  *    - dump system state and memory to state file
442  *    - SPARC code will not be called with CPR_TORAM, caller filters
443  */
444 static int
cpr_suspend(int sleeptype)445 cpr_suspend(int sleeptype)
446 {
447 #if defined(__sparc)
448 	int sf_realloc, nverr;
449 #endif
450 	int	rc = 0;
451 	int	skt_rc = 0;
452 
453 	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
454 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
455 
456 	cpr_suspend_init(sleeptype);
457 
458 	cpr_save_time();
459 
460 	cpr_tod_get(&wholecycle_tv);
461 	CPR_STAT_EVENT_START("Suspend Total");
462 
463 	i_cpr_alloc_cpus();
464 
465 #if defined(__sparc)
466 	ASSERT(sleeptype == CPR_TODISK);
467 	if (!cpr_reusable_mode) {
468 		/*
469 		 * We need to validate default file before fs
470 		 * functionality is disabled.
471 		 */
472 		if (rc = cpr_validate_definfo(0))
473 			return (rc);
474 	}
475 	i_cpr_save_machdep_info();
476 #endif
477 
478 	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
479 	/* Stop PM scans ASAP */
480 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
481 
482 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
483 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
484 
485 #if defined(__sparc)
486 	ASSERT(sleeptype == CPR_TODISK);
487 	cpr_set_substate(C_ST_MP_OFFLINE);
488 	if (rc = cpr_mp_offline())
489 		return (rc);
490 #endif
491 	/*
492 	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
493 	 */
494 	mutex_enter(&srn_clone_lock);
495 	if (srn_signal) {
496 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
497 		    "SRN_SUSPEND_REQ)\n"))
498 		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
499 		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
500 		srn_inuse = 0;
501 	} else {
502 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
503 	}
504 	mutex_exit(&srn_clone_lock);
505 
506 	/*
507 	 * Ask the user threads to stop by themselves, but
508 	 * if they don't or can't after 3 retries, we give up on CPR.
509 	 * The 3 retry is not a random number because 2 is possible if
510 	 * a thread has been forked before the parent thread is stopped.
511 	 */
512 	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
513 	CPR_STAT_EVENT_START("  stop users");
514 	cpr_set_substate(C_ST_STOP_USER_THREADS);
515 	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
516 	if (rc = cpr_stop_user_threads())
517 		return (rc);
518 	CPR_STAT_EVENT_END("  stop users");
519 	CPR_DEBUG(CPR_DEBUG1, "done\n");
520 
521 	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
522 	pm_save_direct_levels();
523 
524 	/*
525 	 * User threads are stopped.  We will start communicating with the
526 	 * user via prom_printf (some debug output may have already happened)
527 	 * so let anybody who cares know about this (bug 4096122)
528 	 */
529 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
530 
531 	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
532 #ifndef DEBUG
533 	cpr_send_notice();
534 	if (cpr_debug)
535 		prom_printf("\n");
536 #endif
537 
538 	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
539 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
540 
541 	/*
542 	 * Reattach any drivers which originally exported the
543 	 * no-involuntary-power-cycles property.  We need to do this before
544 	 * stopping kernel threads because modload is implemented using
545 	 * a kernel thread.
546 	 */
547 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
548 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
549 	if (!pm_reattach_noinvol())
550 		return (ENXIO);
551 
552 #if defined(__sparc)
553 	ASSERT(sleeptype == CPR_TODISK);
554 	/*
555 	 * if ufs logging is enabled, we need to disable before
556 	 * stopping kernel threads so that ufs delete and roll
557 	 * threads can do the work.
558 	 */
559 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
560 	if (rc = cpr_ufs_logging(0))
561 		return (rc);
562 
563 	/*
564 	 * Use sync_all to swap out all user pages and find out how much
565 	 * extra space needed for user pages that don't have back store
566 	 * space left.
567 	 */
568 	CPR_STAT_EVENT_START("  swapout upages");
569 	vfs_sync(SYNC_ALL);
570 	CPR_STAT_EVENT_END("  swapout upages");
571 
572 	cpr_set_bitmap_size();
573 
574 alloc_statefile:
575 	/*
576 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
577 	 * realloc the statefile, otherwise this is the first attempt.
578 	 */
579 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
580 
581 	CPR_STAT_EVENT_START("  alloc statefile");
582 	cpr_set_substate(C_ST_STATEF_ALLOC);
583 	if (rc = cpr_alloc_statefile(sf_realloc)) {
584 		if (sf_realloc)
585 			errp("realloc failed\n");
586 		return (rc);
587 	}
588 	CPR_STAT_EVENT_END("  alloc statefile");
589 
590 	/*
591 	 * Sync the filesystem to preserve its integrity.
592 	 *
593 	 * This sync is also used to flush out all B_DELWRI buffers
594 	 * (fs cache) which are mapped and neither dirty nor referenced
595 	 * before cpr_invalidate_pages destroys them.
596 	 * fsflush does similar thing.
597 	 */
598 	sync();
599 
600 	/*
601 	 * destroy all clean file mapped kernel pages
602 	 */
603 	CPR_STAT_EVENT_START("  clean pages");
604 	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
605 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
606 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
607 	CPR_STAT_EVENT_END("  clean pages");
608 #endif
609 
610 
611 	/*
612 	 * Hooks needed by lock manager prior to suspending.
613 	 * Refer to code for more comments.
614 	 */
615 	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
616 	cpr_lock_mgr(lm_cprsuspend);
617 
618 	/*
619 	 * Now suspend all the devices
620 	 */
621 	CPR_STAT_EVENT_START("  stop drivers");
622 	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
623 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
624 	pm_powering_down = 1;
625 	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
626 	rc = cpr_suspend_devices(ddi_root_node());
627 	pm_powering_down = 0;
628 	if (rc)
629 		return (rc);
630 	CPR_DEBUG(CPR_DEBUG1, "done\n");
631 	CPR_STAT_EVENT_END("  stop drivers");
632 
633 	/*
634 	 * Stop all daemon activities
635 	 */
636 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
637 	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
638 	if (skt_rc = cpr_stop_kernel_threads())
639 		return (skt_rc);
640 
641 	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
642 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
643 
644 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
645 	pm_reattach_noinvol_fini();
646 
647 	cpr_sae(1);
648 
649 	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
650 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
651 
652 	if (sleeptype == CPR_TODISK) {
653 		/*
654 		 * It's safer to do tod_get before we disable all intr.
655 		 */
656 		CPR_STAT_EVENT_START("  write statefile");
657 	}
658 
659 	/*
660 	 * it's time to ignore the outside world, stop the real time
661 	 * clock and disable any further intrpt activity.
662 	 */
663 	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
664 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
665 
666 	mutex_enter(&cpu_lock);
667 	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
668 	cyclic_suspend();
669 	mutex_exit(&cpu_lock);
670 
671 	/*
672 	 * Due to the different methods of resuming the system between
673 	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
674 	 * and CPR_TORAM (restart via reset into existing kernel image)
675 	 * cpus are not suspended and restored in the SPARC case, since it
676 	 * is necessary to restart the cpus and pause them before restoring
677 	 * the OBP image
678 	 */
679 
680 #if defined(__x86)
681 
682 	/* pause aux cpus */
683 	PMD(PMD_SX, ("pause aux cpus\n"))
684 
685 	cpr_set_substate(C_ST_MP_PAUSED);
686 
687 	if ((rc = cpr_suspend_cpus()) != 0)
688 		return (rc);
689 #endif
690 
691 	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
692 	i_cpr_stop_intr();
693 	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
694 
695 	/*
696 	 * Since we will now disable the mechanism that causes prom_printfs
697 	 * to power up (if needed) the console fb/monitor, we assert that
698 	 * it must be up now.
699 	 */
700 	ASSERT(pm_cfb_is_up());
701 	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
702 	prom_suspend_prepost();
703 
704 #if defined(__sparc)
705 	/*
706 	 * getting ready to write ourself out, flush the register
707 	 * windows to make sure that our stack is good when we
708 	 * come back on the resume side.
709 	 */
710 	flush_windows();
711 #endif
712 
713 	/*
714 	 * For S3, we're done
715 	 */
716 	if (sleeptype == CPR_TORAM) {
717 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
718 		cpr_set_substate(C_ST_NODUMP);
719 		return (rc);
720 	}
721 #if defined(__sparc)
722 	/*
723 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
724 	 *
725 	 * The system is quiesced at this point, we are ready to either dump
726 	 * to the state file for a extended sleep or a simple shutdown for
727 	 * systems with non-volatile memory.
728 	 */
729 
730 	/*
731 	 * special handling for reusable:
732 	 */
733 	if (cpr_reusable_mode) {
734 		cpr_set_substate(C_ST_SETPROPS_1);
735 		if (nverr = cpr_set_properties(1))
736 			return (nverr);
737 	}
738 
739 	cpr_set_substate(C_ST_DUMP);
740 	rc = cpr_dump(C_VP);
741 
742 	/*
743 	 * if any error occurred during dump, more
744 	 * special handling for reusable:
745 	 */
746 	if (rc && cpr_reusable_mode) {
747 		cpr_set_substate(C_ST_SETPROPS_0);
748 		if (nverr = cpr_set_properties(0))
749 			return (nverr);
750 	}
751 
752 	if (rc == ENOSPC) {
753 		cpr_set_substate(C_ST_DUMP_NOSPC);
754 		(void) cpr_resume(sleeptype);
755 		goto alloc_statefile;
756 	} else if (rc == 0) {
757 		if (cpr_reusable_mode) {
758 			cpr_set_substate(C_ST_REUSABLE);
759 			longjmp(&ttolwp(curthread)->lwp_qsav);
760 		} else
761 			rc = cpr_set_properties(1);
762 	}
763 #endif
764 	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
765 	return (rc);
766 }
767 
768 void
cpr_resume_cpus(void)769 cpr_resume_cpus(void)
770 {
771 	/*
772 	 * this is a cut down version of start_other_cpus()
773 	 * just do the initialization to wake the other cpus
774 	 */
775 
776 #if defined(__x86)
777 	/*
778 	 * Initialize our syscall handlers
779 	 */
780 	init_cpu_syscall(CPU);
781 
782 #endif
783 
784 	i_cpr_pre_resume_cpus();
785 
786 	/*
787 	 * Restart the paused cpus
788 	 */
789 	mutex_enter(&cpu_lock);
790 	start_cpus();
791 	mutex_exit(&cpu_lock);
792 
793 	i_cpr_post_resume_cpus();
794 
795 	mutex_enter(&cpu_lock);
796 	/*
797 	 * clear the affinity set in cpr_suspend_cpus()
798 	 */
799 	affinity_clear();
800 
801 	/*
802 	 * offline all the cpus that were brought online during suspend
803 	 */
804 	cpr_restore_offline();
805 
806 	mutex_exit(&cpu_lock);
807 }
808 
809 void
cpr_unpause_cpus(void)810 cpr_unpause_cpus(void)
811 {
812 	/*
813 	 * Now restore the system back to what it was before we suspended
814 	 */
815 
816 	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
817 
818 	mutex_enter(&cpu_lock);
819 	/*
820 	 * Restart the paused cpus
821 	 */
822 	start_cpus();
823 
824 	/*
825 	 * clear the affinity set in cpr_suspend_cpus()
826 	 */
827 	affinity_clear();
828 
829 	/*
830 	 * offline all the cpus that were brought online during suspend
831 	 */
832 	cpr_restore_offline();
833 
834 	mutex_exit(&cpu_lock);
835 }
836 
837 /*
838  * Bring the system back up from a checkpoint, at this point
839  * the VM has been minimally restored by boot, the following
840  * are executed sequentially:
841  *
842  *    - machdep setup and enable interrupts (mp startup if it's mp)
843  *    - resume all devices
844  *    - restart daemons
845  *    - put all threads back on run queue
846  */
847 static int
cpr_resume(int sleeptype)848 cpr_resume(int sleeptype)
849 {
850 	cpr_time_t pwron_tv, *ctp;
851 	char *str;
852 	int rc = 0;
853 
854 	/*
855 	 * The following switch is used to resume the system
856 	 * that was suspended to a different level.
857 	 */
858 	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
859 	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
860 
861 	/*
862 	 * Note:
863 	 *
864 	 * The rollback labels rb_xyz do not represent the cpr resume
865 	 * state when event 'xyz' has happened. Instead they represent
866 	 * the state during cpr suspend when event 'xyz' was being
867 	 * entered (and where cpr suspend failed). The actual call that
868 	 * failed may also need to be partially rolled back, since they
869 	 * aren't atomic in most cases.  In other words, rb_xyz means
870 	 * "roll back all cpr suspend events that happened before 'xyz',
871 	 * and the one that caused the failure, if necessary."
872 	 */
873 	switch (CPR->c_substate) {
874 #if defined(__sparc)
875 	case C_ST_DUMP:
876 		/*
877 		 * This is most likely a full-fledged cpr_resume after
878 		 * a complete and successful cpr suspend. Just roll back
879 		 * everything.
880 		 */
881 		ASSERT(sleeptype == CPR_TODISK);
882 		break;
883 
884 	case C_ST_REUSABLE:
885 	case C_ST_DUMP_NOSPC:
886 	case C_ST_SETPROPS_0:
887 	case C_ST_SETPROPS_1:
888 		/*
889 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
890 		 * special switch cases here. The other two do not have
891 		 * any state change during cpr_suspend() that needs to
892 		 * be rolled back. But these are exit points from
893 		 * cpr_suspend, so theoretically (or in the future), it
894 		 * is possible that a need for roll back of a state
895 		 * change arises between these exit points.
896 		 */
897 		ASSERT(sleeptype == CPR_TODISK);
898 		goto rb_dump;
899 #endif
900 
901 	case C_ST_NODUMP:
902 		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
903 		goto rb_nodump;
904 
905 	case C_ST_STOP_KERNEL_THREADS:
906 		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
907 		goto rb_stop_kernel_threads;
908 
909 	case C_ST_SUSPEND_DEVICES:
910 		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
911 		goto rb_suspend_devices;
912 
913 #if defined(__sparc)
914 	case C_ST_STATEF_ALLOC:
915 		ASSERT(sleeptype == CPR_TODISK);
916 		goto rb_statef_alloc;
917 
918 	case C_ST_DISABLE_UFS_LOGGING:
919 		ASSERT(sleeptype == CPR_TODISK);
920 		goto rb_disable_ufs_logging;
921 #endif
922 
923 	case C_ST_PM_REATTACH_NOINVOL:
924 		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
925 		goto rb_pm_reattach_noinvol;
926 
927 	case C_ST_STOP_USER_THREADS:
928 		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
929 		goto rb_stop_user_threads;
930 
931 #if defined(__sparc)
932 	case C_ST_MP_OFFLINE:
933 		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
934 		goto rb_mp_offline;
935 #endif
936 
937 #if defined(__x86)
938 	case C_ST_MP_PAUSED:
939 		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
940 		goto rb_mp_paused;
941 #endif
942 
943 
944 	default:
945 		PMD(PMD_SX, ("cpr_resume: others\n"))
946 		goto rb_others;
947 	}
948 
949 #if defined(__sparc)
950 	/*
951 	 * perform platform-dependent initialization
952 	 */
953 	if (cpr_suspend_succeeded)
954 		i_cpr_machdep_setup();
955 
956 	/*
957 	 * system did not really go down if we jump here
958 	 */
959 rb_dump:
960 #endif
961 	/*
962 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
963 	 *
964 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
965 	 */
966 rb_nodump:
967 	/*
968 	 * If we did suspend to RAM, we didn't generate a dump
969 	 */
970 	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
971 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
972 	if (cpr_suspend_succeeded) {
973 		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
974 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
975 	}
976 
977 	prom_resume_prepost();
978 #if !defined(__sparc)
979 	/*
980 	 * Need to sync the software clock with the hardware clock.
981 	 * On Sparc, this occurs in the sparc-specific cbe.  However
982 	 * on x86 this needs to be handled _before_ we bring other cpu's
983 	 * back online.  So we call a resume function in timestamp.c
984 	 */
985 	if (tsc_resume_in_cyclic == 0)
986 		tsc_resume();
987 
988 #endif
989 
990 #if defined(__sparc)
991 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
992 		kdi_dvec_cpr_restart();
993 #endif
994 
995 
996 #if defined(__x86)
997 rb_mp_paused:
998 	PT(PT_RMPO);
999 	PMD(PMD_SX, ("resume aux cpus\n"))
1000 
1001 	if (cpr_suspend_succeeded) {
1002 		cpr_resume_cpus();
1003 	} else {
1004 		cpr_unpause_cpus();
1005 	}
1006 #endif
1007 
1008 	/*
1009 	 * let the tmp callout catch up.
1010 	 */
1011 	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
1012 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
1013 
1014 	i_cpr_enable_intr();
1015 
1016 	mutex_enter(&cpu_lock);
1017 	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
1018 	cyclic_resume();
1019 	mutex_exit(&cpu_lock);
1020 
1021 	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
1022 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
1023 
1024 	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
1025 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
1026 
1027 	/*
1028 	 * statistics gathering
1029 	 */
1030 	if (cpr_suspend_succeeded) {
1031 		/*
1032 		 * Prevent false alarm in tod_validate() due to tod
1033 		 * value change between suspend and resume
1034 		 */
1035 		cpr_tod_status_set(TOD_CPR_RESUME_DONE);
1036 
1037 		cpr_convert_promtime(&pwron_tv);
1038 
1039 		ctp = &cpr_term.tm_shutdown;
1040 		if (sleeptype == CPR_TODISK)
1041 			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
1042 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
1043 
1044 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
1045 
1046 		str = "  prom time";
1047 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
1048 		ctp = &cpr_term.tm_cprboot_start;
1049 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1050 
1051 		str = "  read statefile";
1052 		CPR_STAT_EVENT_START_TMZ(str, ctp);
1053 		ctp = &cpr_term.tm_cprboot_end;
1054 		CPR_STAT_EVENT_END_TMZ(str, ctp);
1055 	}
1056 
1057 rb_stop_kernel_threads:
1058 	/*
1059 	 * Put all threads back to where they belong; get the kernel
1060 	 * daemons straightened up too. Note that the callback table
1061 	 * locked during cpr_stop_kernel_threads() is released only
1062 	 * in cpr_start_kernel_threads(). Ensure modunloading is
1063 	 * disabled before starting kernel threads, we don't want
1064 	 * modunload thread to start changing device tree underneath.
1065 	 */
1066 	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
1067 	modunload_disable();
1068 	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
1069 	cpr_start_kernel_threads();
1070 
1071 rb_suspend_devices:
1072 	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
1073 	CPR_STAT_EVENT_START("  start drivers");
1074 
1075 	PMD(PMD_SX,
1076 	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
1077 	    cpr_resume_uniproc))
1078 
1079 #if defined(__x86)
1080 	/*
1081 	 * If cpr_resume_uniproc is set, then pause all the other cpus
1082 	 * apart from the current cpu, so that broken drivers that think
1083 	 * that they are on a uniprocessor machine will resume
1084 	 */
1085 	if (cpr_resume_uniproc) {
1086 		mutex_enter(&cpu_lock);
1087 		pause_cpus(NULL, NULL);
1088 		mutex_exit(&cpu_lock);
1089 	}
1090 #endif
1091 
1092 	/*
1093 	 * The policy here is to continue resume everything we can if we did
1094 	 * not successfully finish suspend; and panic if we are coming back
1095 	 * from a fully suspended system.
1096 	 */
1097 	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
1098 	rc = cpr_resume_devices(ddi_root_node(), 0);
1099 
1100 	cpr_sae(0);
1101 
1102 	str = "Failed to resume one or more devices.";
1103 
1104 	if (rc) {
1105 		if (CPR->c_substate == C_ST_DUMP ||
1106 		    (sleeptype == CPR_TORAM &&
1107 		    CPR->c_substate == C_ST_NODUMP)) {
1108 			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
1109 				PMD(PMD_SX, ("cpr_resume: resume device "
1110 				    "warn\n"))
1111 				cpr_err(CE_WARN, str);
1112 			} else {
1113 				PMD(PMD_SX, ("cpr_resume: resume device "
1114 				    "panic\n"))
1115 				cpr_err(CE_PANIC, str);
1116 			}
1117 		} else {
1118 			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
1119 			cpr_err(CE_WARN, str);
1120 		}
1121 	}
1122 
1123 	CPR_STAT_EVENT_END("  start drivers");
1124 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1125 
1126 #if defined(__x86)
1127 	/*
1128 	 * If cpr_resume_uniproc is set, then unpause all the processors
1129 	 * that were paused before resuming the drivers
1130 	 */
1131 	if (cpr_resume_uniproc) {
1132 		mutex_enter(&cpu_lock);
1133 		start_cpus();
1134 		mutex_exit(&cpu_lock);
1135 	}
1136 #endif
1137 
1138 	/*
1139 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
1140 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
1141 	 * modunloading now.
1142 	 */
1143 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
1144 		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
1145 		modunload_enable();
1146 	}
1147 
1148 	/*
1149 	 * Hooks needed by lock manager prior to resuming.
1150 	 * Refer to code for more comments.
1151 	 */
1152 	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
1153 	cpr_lock_mgr(lm_cprresume);
1154 
1155 #if defined(__sparc)
1156 	/*
1157 	 * This is a partial (half) resume during cpr suspend, we
1158 	 * haven't yet given up on the suspend. On return from here,
1159 	 * cpr_suspend() will try to reallocate and retry the suspend.
1160 	 */
1161 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
1162 		return (0);
1163 	}
1164 
1165 	if (sleeptype == CPR_TODISK) {
1166 rb_statef_alloc:
1167 		cpr_statef_close();
1168 
1169 rb_disable_ufs_logging:
1170 		/*
1171 		 * if ufs logging was disabled, re-enable
1172 		 */
1173 		(void) cpr_ufs_logging(1);
1174 	}
1175 #endif
1176 
1177 rb_pm_reattach_noinvol:
1178 	/*
1179 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
1180 	 * remain disabled until after cpr suspend passes the
1181 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
1182 	 * cpr suspend reaches this state, we'll need to enable modunload
1183 	 * thread during rollback.
1184 	 */
1185 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
1186 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
1187 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
1188 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
1189 		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
1190 		pm_reattach_noinvol_fini();
1191 	}
1192 
1193 	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
1194 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
1195 	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
1196 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
1197 
1198 	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
1199 	pm_restore_direct_levels();
1200 
1201 rb_stop_user_threads:
1202 	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
1203 	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
1204 	cpr_start_user_threads();
1205 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1206 	/*
1207 	 * Ask Xorg to resume the frame buffer, and wait for it to happen
1208 	 */
1209 	mutex_enter(&srn_clone_lock);
1210 	if (srn_signal) {
1211 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
1212 		    "SRN_NORMAL_RESUME)\n"))
1213 		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
1214 		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
1215 		srn_inuse = 0;
1216 	} else {
1217 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
1218 	}
1219 	mutex_exit(&srn_clone_lock);
1220 
1221 #if defined(__sparc)
1222 rb_mp_offline:
1223 	if (cpr_mp_online())
1224 		cpr_err(CE_WARN, "Failed to online all the processors.");
1225 #endif
1226 
1227 rb_others:
1228 	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
1229 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
1230 	    PM_DEP_WAIT, NULL, 0);
1231 
1232 	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
1233 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
1234 
1235 	if (cpr_suspend_succeeded) {
1236 		cpr_stat_record_events();
1237 	}
1238 
1239 #if defined(__sparc)
1240 	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
1241 		cpr_clear_definfo();
1242 #endif
1243 
1244 	i_cpr_free_cpus();
1245 	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
1246 	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
1247 	cpr_signal_user(SIGTHAW);
1248 	CPR_DEBUG(CPR_DEBUG1, "done\n");
1249 
1250 	CPR_STAT_EVENT_END("Resume Total");
1251 
1252 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
1253 	CPR_STAT_EVENT_END("WHOLE CYCLE");
1254 
1255 	if (cpr_debug & CPR_DEBUG1)
1256 		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
1257 
1258 	CPR_STAT_EVENT_START("POST CPR DELAY");
1259 
1260 #ifdef CPR_STAT
1261 	ctp = &cpr_term.tm_shutdown;
1262 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
1263 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
1264 
1265 	CPR_STAT_EVENT_PRINT();
1266 #endif /* CPR_STAT */
1267 
1268 	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
1269 	return (rc);
1270 }
1271 
1272 static void
cpr_suspend_init(int sleeptype)1273 cpr_suspend_init(int sleeptype)
1274 {
1275 	cpr_time_t *ctp;
1276 
1277 	cpr_stat_init();
1278 
1279 	/*
1280 	 * If cpr_suspend() failed before cpr_dump() gets a chance
1281 	 * to reinitialize the terminator of the statefile,
1282 	 * the values of the old terminator will still linger around.
1283 	 * Since the terminator contains information that we need to
1284 	 * decide whether suspend succeeded or not, we need to
1285 	 * reinitialize it as early as possible.
1286 	 */
1287 	cpr_term.real_statef_size = 0;
1288 	ctp = &cpr_term.tm_shutdown;
1289 	bzero(ctp, sizeof (*ctp));
1290 	ctp = &cpr_term.tm_cprboot_start;
1291 	bzero(ctp, sizeof (*ctp));
1292 	ctp = &cpr_term.tm_cprboot_end;
1293 	bzero(ctp, sizeof (*ctp));
1294 
1295 	if (sleeptype == CPR_TODISK) {
1296 		/*
1297 		 * Lookup the physical address of our thread structure.
1298 		 * This should never be invalid and the entire thread structure
1299 		 * is expected to reside within the same pfn.
1300 		 */
1301 		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
1302 		ASSERT(curthreadpfn != PFN_INVALID);
1303 		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
1304 		    (caddr_t)curthread + sizeof (kthread_t) - 1));
1305 	}
1306 
1307 	cpr_suspend_succeeded = 0;
1308 }
1309 
1310 /*
1311  * bring all the offline cpus online
1312  */
1313 static int
cpr_all_online(void)1314 cpr_all_online(void)
1315 {
1316 	int	rc = 0;
1317 
1318 #ifdef	__sparc
1319 	/*
1320 	 * do nothing
1321 	 */
1322 #else
1323 
1324 	cpu_t	*cp;
1325 
1326 	ASSERT(MUTEX_HELD(&cpu_lock));
1327 
1328 	cp = cpu_list;
1329 	do {
1330 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1331 		if (!CPU_ACTIVE(cp)) {
1332 			if ((rc = cpu_online(cp, 0)) != 0)
1333 				break;
1334 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
1335 		}
1336 	} while ((cp = cp->cpu_next) != cpu_list);
1337 
1338 	if (rc) {
1339 		/*
1340 		 * an online operation failed so offline the cpus
1341 		 * that were onlined above to restore the system
1342 		 * to its original state
1343 		 */
1344 		cpr_restore_offline();
1345 	}
1346 #endif
1347 	return (rc);
1348 }
1349 
1350 /*
1351  * offline all the cpus that were brought online by cpr_all_online()
1352  */
1353 static void
cpr_restore_offline(void)1354 cpr_restore_offline(void)
1355 {
1356 
1357 #ifdef	__sparc
1358 	/*
1359 	 * do nothing
1360 	 */
1361 #else
1362 
1363 	cpu_t	*cp;
1364 	int	rc = 0;
1365 
1366 	ASSERT(MUTEX_HELD(&cpu_lock));
1367 
1368 	cp = cpu_list;
1369 	do {
1370 		if (CPU_CPR_IS_ONLINE(cp)) {
1371 			rc =  cpu_offline(cp, 0);
1372 			/*
1373 			 * this offline should work, since the cpu was
1374 			 * offline originally and was successfully onlined
1375 			 * by cpr_all_online()
1376 			 */
1377 			ASSERT(rc == 0);
1378 			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
1379 		}
1380 	} while ((cp = cp->cpu_next) != cpu_list);
1381 
1382 #endif
1383 
1384 }
1385