xref: /titanic_50/usr/src/uts/common/cpr/cpr_mod.c (revision 4c06356b0f0fffb4fc1b6eccc8e5d8e2254a84d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * System call to checkpoint and resume the currently running kernel
30  */
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/modctl.h>
34 #include <sys/syscall.h>
35 #include <sys/cred.h>
36 #include <sys/uadmin.h>
37 #include <sys/cmn_err.h>
38 #include <sys/systm.h>
39 #include <sys/cpr.h>
40 #include <sys/swap.h>
41 #include <sys/vfs.h>
42 #include <sys/autoconf.h>
43 #include <sys/machsystm.h>
44 
45 extern int i_cpr_is_supported(int sleeptype);
46 extern int cpr_is_ufs(struct vfs *);
47 extern int cpr_is_zfs(struct vfs *);
48 extern int cpr_check_spec_statefile(void);
49 extern int cpr_reusable_mount_check(void);
50 extern int i_cpr_reusable_supported(void);
51 extern int i_cpr_reusefini(void);
52 extern struct mod_ops mod_miscops;
53 
54 extern int cpr_init(int);
55 extern void cpr_done(void);
56 extern void i_cpr_stop_other_cpus(void);
57 extern int i_cpr_power_down();
58 
59 #if defined(__sparc)
60 extern void cpr_forget_cprconfig(void);
61 #endif
62 
63 static struct modlmisc modlmisc = {
64 	&mod_miscops, "checkpoint resume"
65 };
66 
67 static struct modlinkage modlinkage = {
68 	MODREV_1, (void *)&modlmisc, NULL
69 };
70 
71 char _depends_on[] = "misc/bootdev";	/* i_devname_to_promname() */
72 
73 int cpr_reusable_mode;
74 
75 kmutex_t	cpr_slock;	/* cpr serial lock */
76 cpr_t		cpr_state;
77 int		cpr_debug;
78 int		cpr_test_mode; /* true if called via uadmin testmode */
79 int		cpr_test_point = LOOP_BACK_NONE;	/* cpr test point */
80 int		cpr_mp_enable = 0;	/* set to 1 to enable MP suspend */
81 major_t		cpr_device = 0;		/* major number for S3 on one device */
82 
83 /*
84  * All the loadable module related code follows
85  */
86 int
87 _init(void)
88 {
89 	register int e;
90 
91 	if ((e = mod_install(&modlinkage)) == 0) {
92 		mutex_init(&cpr_slock, NULL, MUTEX_DEFAULT, NULL);
93 	}
94 	return (e);
95 }
96 
97 int
98 _fini(void)
99 {
100 	register int e;
101 
102 	if ((e = mod_remove(&modlinkage)) == 0) {
103 		mutex_destroy(&cpr_slock);
104 	}
105 	return (e);
106 }
107 
108 int
109 _info(struct modinfo *modinfop)
110 {
111 	return (mod_info(&modlinkage, modinfop));
112 }
113 
114 static
115 int
116 atoi(char *p)
117 {
118 	int	i;
119 
120 	i = (*p++ - '0');
121 
122 	while (*p != '\0')
123 		i = 10 * i + (*p++ - '0');
124 
125 	return (i);
126 }
127 
128 int
129 cpr(int fcn, void *mdep)
130 {
131 
132 #if defined(__sparc)
133 	static const char noswapstr[] = "reusable statefile requires "
134 	    "that no swap area be configured.\n";
135 	static const char blockstr[] = "reusable statefile must be "
136 	    "a block device.  See power.conf(4) and pmconfig(1M).\n";
137 	static const char normalfmt[] = "cannot run normal "
138 	    "checkpoint/resume when in reusable statefile mode. "
139 	    "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) "
140 	    "to exit reusable statefile mode.\n";
141 	static const char modefmt[] = "%s in reusable mode.\n";
142 #endif
143 	register int rc = 0;
144 	int cpr_sleeptype;
145 
146 	/*
147 	 * First, reject commands that we don't (yet) support on this arch.
148 	 * This is easier to understand broken out like this than grotting
149 	 * through the second switch below.
150 	 */
151 
152 	switch (fcn) {
153 #if defined(__sparc)
154 	case AD_CHECK_SUSPEND_TO_RAM:
155 	case AD_SUSPEND_TO_RAM:
156 		return (ENOTSUP);
157 	case AD_CHECK_SUSPEND_TO_DISK:
158 	case AD_SUSPEND_TO_DISK:
159 	case AD_CPR_REUSEINIT:
160 	case AD_CPR_NOCOMPRESS:
161 	case AD_CPR_FORCE:
162 	case AD_CPR_REUSABLE:
163 	case AD_CPR_REUSEFINI:
164 	case AD_CPR_TESTZ:
165 	case AD_CPR_TESTNOZ:
166 	case AD_CPR_TESTHALT:
167 	case AD_CPR_SUSP_DEVICES:
168 		cpr_sleeptype = CPR_TODISK;
169 		break;
170 #endif
171 #if defined(__x86)
172 	case AD_CHECK_SUSPEND_TO_DISK:
173 	case AD_SUSPEND_TO_DISK:
174 	case AD_CPR_REUSEINIT:
175 	case AD_CPR_NOCOMPRESS:
176 	case AD_CPR_FORCE:
177 	case AD_CPR_REUSABLE:
178 	case AD_CPR_REUSEFINI:
179 	case AD_CPR_TESTZ:
180 	case AD_CPR_TESTNOZ:
181 	case AD_CPR_TESTHALT:
182 	case AD_CPR_PRINT:
183 		return (ENOTSUP);
184 	/* The DEV_* values need to be removed after sys-syspend is fixed */
185 	case DEV_CHECK_SUSPEND_TO_RAM:
186 	case DEV_SUSPEND_TO_RAM:
187 	case AD_CPR_SUSP_DEVICES:
188 	case AD_CHECK_SUSPEND_TO_RAM:
189 	case AD_SUSPEND_TO_RAM:
190 	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
191 	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
192 	case AD_FORCE_SUSPEND_TO_RAM:
193 	case AD_DEVICE_SUSPEND_TO_RAM:
194 		cpr_sleeptype = CPR_TORAM;
195 		break;
196 #endif
197 	}
198 #if defined(__sparc)
199 	/*
200 	 * Need to know if we're in reusable mode, but we will likely have
201 	 * rebooted since REUSEINIT, so we have to get the info from the
202 	 * file system
203 	 */
204 	if (!cpr_reusable_mode)
205 		cpr_reusable_mode = cpr_get_reusable_mode();
206 
207 	cpr_forget_cprconfig();
208 #endif
209 
210 	switch (fcn) {
211 
212 #if defined(__sparc)
213 	case AD_CPR_REUSEINIT:
214 		if (!i_cpr_reusable_supported())
215 			return (ENOTSUP);
216 		if (!cpr_statefile_is_spec()) {
217 			cpr_err(CE_CONT, blockstr);
218 			return (EINVAL);
219 		}
220 		if ((rc = cpr_check_spec_statefile()) != 0)
221 			return (rc);
222 		if (swapinfo) {
223 			cpr_err(CE_CONT, noswapstr);
224 			return (EINVAL);
225 		}
226 		cpr_test_mode = 0;
227 		break;
228 
229 	case AD_CPR_NOCOMPRESS:
230 	case AD_CPR_COMPRESS:
231 	case AD_CPR_FORCE:
232 		if (cpr_reusable_mode) {
233 			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
234 			return (ENOTSUP);
235 		}
236 		cpr_test_mode = 0;
237 		break;
238 
239 	case AD_CPR_REUSABLE:
240 		if (!i_cpr_reusable_supported())
241 			return (ENOTSUP);
242 		if (!cpr_statefile_is_spec()) {
243 			cpr_err(CE_CONT, blockstr);
244 			return (EINVAL);
245 		}
246 		if ((rc = cpr_check_spec_statefile()) != 0)
247 			return (rc);
248 		if (swapinfo) {
249 			cpr_err(CE_CONT, noswapstr);
250 			return (EINVAL);
251 		}
252 		if ((rc = cpr_reusable_mount_check()) != 0)
253 			return (rc);
254 		cpr_test_mode = 0;
255 		break;
256 
257 	case AD_CPR_REUSEFINI:
258 		if (!i_cpr_reusable_supported())
259 			return (ENOTSUP);
260 		cpr_test_mode = 0;
261 		break;
262 
263 	case AD_CPR_TESTZ:
264 	case AD_CPR_TESTNOZ:
265 	case AD_CPR_TESTHALT:
266 		if (cpr_reusable_mode) {
267 			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
268 			return (ENOTSUP);
269 		}
270 		cpr_test_mode = 1;
271 		break;
272 
273 	case AD_CPR_CHECK:
274 		if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode)
275 			return (ENOTSUP);
276 		return (0);
277 
278 	case AD_CPR_PRINT:
279 		CPR_STAT_EVENT_END("POST CPR DELAY");
280 		cpr_stat_event_print();
281 		return (0);
282 #endif
283 
284 	case AD_CPR_DEBUG0:
285 		cpr_debug = 0;
286 		return (0);
287 
288 	case AD_CPR_DEBUG1:
289 	case AD_CPR_DEBUG2:
290 	case AD_CPR_DEBUG3:
291 	case AD_CPR_DEBUG4:
292 	case AD_CPR_DEBUG5:
293 	case AD_CPR_DEBUG7:
294 	case AD_CPR_DEBUG8:
295 		cpr_debug |= CPR_DEBUG_BIT(fcn);
296 		return (0);
297 
298 	case AD_CPR_DEBUG9:
299 		cpr_debug |= CPR_DEBUG6;
300 		return (0);
301 
302 	/* The DEV_* values need to be removed after sys-syspend is fixed */
303 	case DEV_CHECK_SUSPEND_TO_RAM:
304 	case DEV_SUSPEND_TO_RAM:
305 	case AD_CHECK_SUSPEND_TO_RAM:
306 	case AD_SUSPEND_TO_RAM:
307 		cpr_test_point = LOOP_BACK_NONE;
308 		break;
309 
310 	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
311 		cpr_test_point = LOOP_BACK_PASS;
312 		break;
313 
314 	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
315 		cpr_test_point = LOOP_BACK_FAIL;
316 		break;
317 
318 	case AD_FORCE_SUSPEND_TO_RAM:
319 		cpr_test_point = FORCE_SUSPEND_TO_RAM;
320 		break;
321 
322 	case AD_DEVICE_SUSPEND_TO_RAM:
323 		if (mdep == NULL) {
324 			/* Didn't pass enough arguments */
325 			return (EINVAL);
326 		}
327 		cpr_test_point = DEVICE_SUSPEND_TO_RAM;
328 		cpr_device = (major_t)atoi((char *)mdep);
329 		break;
330 
331 	case AD_CPR_SUSP_DEVICES:
332 		cpr_test_point = FORCE_SUSPEND_TO_RAM;
333 		if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS)
334 			cmn_err(CE_WARN,
335 			    "Some devices did not suspend "
336 			    "and may be unusable");
337 		(void) cpr_resume_devices(ddi_root_node(), 0);
338 		return (0);
339 
340 	default:
341 		return (ENOTSUP);
342 	}
343 
344 	if (!i_cpr_is_supported(cpr_sleeptype) ||
345 	    (cpr_sleeptype == CPR_TODISK &&
346 	    !cpr_is_ufs(rootvfs)&& !cpr_is_zfs(rootvfs)))
347 		return (ENOTSUP);
348 
349 	if (fcn == AD_CHECK_SUSPEND_TO_RAM ||
350 	    fcn == DEV_CHECK_SUSPEND_TO_RAM) {
351 		ASSERT(i_cpr_is_supported(cpr_sleeptype));
352 		return (0);
353 	}
354 
355 #if defined(__sparc)
356 	if (fcn == AD_CPR_REUSEINIT) {
357 		if (mutex_tryenter(&cpr_slock) == 0)
358 			return (EBUSY);
359 		if (cpr_reusable_mode) {
360 			cpr_err(CE_CONT, modefmt, "already");
361 			mutex_exit(&cpr_slock);
362 			return (EBUSY);
363 		}
364 		rc = i_cpr_reuseinit();
365 		mutex_exit(&cpr_slock);
366 		return (rc);
367 	}
368 
369 	if (fcn == AD_CPR_REUSEFINI) {
370 		if (mutex_tryenter(&cpr_slock) == 0)
371 			return (EBUSY);
372 		if (!cpr_reusable_mode) {
373 			cpr_err(CE_CONT, modefmt, "not");
374 			mutex_exit(&cpr_slock);
375 			return (EINVAL);
376 		}
377 		rc = i_cpr_reusefini();
378 		mutex_exit(&cpr_slock);
379 		return (rc);
380 	}
381 #endif
382 
383 	/*
384 	 * acquire cpr serial lock and init cpr state structure.
385 	 */
386 	if (rc = cpr_init(fcn))
387 		return (rc);
388 
389 #if defined(__sparc)
390 	if (fcn == AD_CPR_REUSABLE) {
391 		if ((rc = i_cpr_check_cprinfo()) != 0)  {
392 			mutex_exit(&cpr_slock);
393 			return (rc);
394 		}
395 	}
396 #endif
397 
398 	/*
399 	 * Call the main cpr routine. If we are successful, we will be coming
400 	 * down from the resume side, otherwise we are still in suspend.
401 	 */
402 	cpr_err(CE_CONT, "System is being suspended");
403 	if (rc = cpr_main(cpr_sleeptype)) {
404 		CPR->c_flags |= C_ERROR;
405 		PMD(PMD_SX, ("cpr: Suspend operation failed.\n"))
406 		cpr_err(CE_NOTE, "Suspend operation failed.");
407 	} else if (CPR->c_flags & C_SUSPENDING) {
408 
409 		/*
410 		 * In the suspend to RAM case, by the time we get
411 		 * control back we're already resumed
412 		 */
413 		if (cpr_sleeptype == CPR_TORAM) {
414 			PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n"))
415 			cpr_done();
416 			return (rc);
417 		}
418 
419 #if defined(__sparc)
420 
421 		PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n"))
422 		/*
423 		 * Back from a successful checkpoint
424 		 */
425 		if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) {
426 			mdboot(0, AD_BOOT, "", B_FALSE);
427 			/* NOTREACHED */
428 		}
429 
430 		/* make sure there are no more changes to the device tree */
431 		PMD(PMD_SX, ("cpr: dev tree freeze\n"))
432 		devtree_freeze();
433 
434 		/*
435 		 * stop other cpus and raise our priority.  since there is only
436 		 * one active cpu after this, and our priority will be too high
437 		 * for us to be preempted, we're essentially single threaded
438 		 * from here on out.
439 		 */
440 		PMD(PMD_SX, ("cpr: stop other cpus\n"))
441 		i_cpr_stop_other_cpus();
442 		PMD(PMD_SX, ("cpr: spl6\n"))
443 		(void) spl6();
444 
445 		/*
446 		 * try and reset leaf devices.  reset_leaves() should only
447 		 * be called when there are no other threads that could be
448 		 * accessing devices
449 		 */
450 		PMD(PMD_SX, ("cpr: reset leaves\n"))
451 		reset_leaves();
452 
453 		/*
454 		 * If i_cpr_power_down() succeeds, it'll not return
455 		 *
456 		 * Drives with write-cache enabled need to flush
457 		 * their cache.
458 		 */
459 		if (fcn != AD_CPR_TESTHALT) {
460 			PMD(PMD_SX, ("cpr: power down\n"))
461 			(void) i_cpr_power_down(cpr_sleeptype);
462 		}
463 		ASSERT(cpr_sleeptype == CPR_TODISK);
464 		/* currently CPR_TODISK comes back via a boot path */
465 		CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n");
466 		halt(NULL);
467 		/* NOTREACHED */
468 #endif
469 	}
470 	PMD(PMD_SX, ("cpr: cpr done\n"))
471 	cpr_done();
472 	return (rc);
473 }
474