xref: /titanic_44/usr/src/uts/common/cpr/cpr_mod.c (revision 680047a5d0ef56480110f0de516145ba0efd5caa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
25  * Copyright 2012 Milan Jurik. All rights reserved.
26  */
27 
28 /*
29  * System call to checkpoint and resume the currently running kernel
30  */
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/modctl.h>
34 #include <sys/syscall.h>
35 #include <sys/cred.h>
36 #include <sys/uadmin.h>
37 #include <sys/cmn_err.h>
38 #include <sys/systm.h>
39 #include <sys/cpr.h>
40 #include <sys/swap.h>
41 #include <sys/vfs.h>
42 #include <sys/autoconf.h>
43 #include <sys/machsystm.h>
44 
45 extern int i_cpr_is_supported(int sleeptype);
46 extern int cpr_is_ufs(struct vfs *);
47 extern int cpr_is_zfs(struct vfs *);
48 extern int cpr_check_spec_statefile(void);
49 extern int cpr_reusable_mount_check(void);
50 extern int i_cpr_reusable_supported(void);
51 extern int i_cpr_reusefini(void);
52 extern struct mod_ops mod_miscops;
53 
54 extern int cpr_init(int);
55 extern void cpr_done(void);
56 extern void i_cpr_stop_other_cpus(void);
57 extern int i_cpr_power_down(int);
58 
59 #if defined(__sparc)
60 extern void cpr_forget_cprconfig(void);
61 #endif
62 
63 static struct modlmisc modlmisc = {
64 	&mod_miscops, "checkpoint resume"
65 };
66 
67 static struct modlinkage modlinkage = {
68 	MODREV_1, (void *)&modlmisc, NULL
69 };
70 
71 int cpr_reusable_mode;
72 
73 kmutex_t	cpr_slock;	/* cpr serial lock */
74 cpr_t		cpr_state;
75 int		cpr_debug;
76 int		cpr_test_mode; /* true if called via uadmin testmode */
77 int		cpr_test_point = LOOP_BACK_NONE;	/* cpr test point */
78 int		cpr_mp_enable = 0;	/* set to 1 to enable MP suspend */
79 major_t		cpr_device = 0;		/* major number for S3 on one device */
80 
81 /*
82  * All the loadable module related code follows
83  */
84 int
85 _init(void)
86 {
87 	register int e;
88 
89 	if ((e = mod_install(&modlinkage)) == 0) {
90 		mutex_init(&cpr_slock, NULL, MUTEX_DEFAULT, NULL);
91 	}
92 	return (e);
93 }
94 
95 int
96 _fini(void)
97 {
98 	register int e;
99 
100 	if ((e = mod_remove(&modlinkage)) == 0) {
101 		mutex_destroy(&cpr_slock);
102 	}
103 	return (e);
104 }
105 
106 int
107 _info(struct modinfo *modinfop)
108 {
109 	return (mod_info(&modlinkage, modinfop));
110 }
111 
112 static
113 int
114 atoi(char *p)
115 {
116 	int	i;
117 
118 	i = (*p++ - '0');
119 
120 	while (*p != '\0')
121 		i = 10 * i + (*p++ - '0');
122 
123 	return (i);
124 }
125 
126 int
127 cpr(int fcn, void *mdep)
128 {
129 
130 #if defined(__sparc)
131 	static const char noswapstr[] = "reusable statefile requires "
132 	    "that no swap area be configured.\n";
133 	static const char blockstr[] = "reusable statefile must be "
134 	    "a block device.  See power.conf(4) and pmconfig(1M).\n";
135 	static const char normalfmt[] = "cannot run normal "
136 	    "checkpoint/resume when in reusable statefile mode. "
137 	    "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) "
138 	    "to exit reusable statefile mode.\n";
139 	static const char modefmt[] = "%s in reusable mode.\n";
140 #endif
141 	register int rc = 0;
142 	int cpr_sleeptype;
143 
144 	/*
145 	 * First, reject commands that we don't (yet) support on this arch.
146 	 * This is easier to understand broken out like this than grotting
147 	 * through the second switch below.
148 	 */
149 
150 	switch (fcn) {
151 #if defined(__sparc)
152 	case AD_CHECK_SUSPEND_TO_RAM:
153 	case AD_SUSPEND_TO_RAM:
154 		return (ENOTSUP);
155 	case AD_CHECK_SUSPEND_TO_DISK:
156 	case AD_SUSPEND_TO_DISK:
157 	case AD_CPR_REUSEINIT:
158 	case AD_CPR_NOCOMPRESS:
159 	case AD_CPR_FORCE:
160 	case AD_CPR_REUSABLE:
161 	case AD_CPR_REUSEFINI:
162 	case AD_CPR_TESTZ:
163 	case AD_CPR_TESTNOZ:
164 	case AD_CPR_TESTHALT:
165 	case AD_CPR_SUSP_DEVICES:
166 		cpr_sleeptype = CPR_TODISK;
167 		break;
168 #endif
169 #if defined(__x86)
170 	case AD_CHECK_SUSPEND_TO_DISK:
171 	case AD_SUSPEND_TO_DISK:
172 	case AD_CPR_REUSEINIT:
173 	case AD_CPR_NOCOMPRESS:
174 	case AD_CPR_FORCE:
175 	case AD_CPR_REUSABLE:
176 	case AD_CPR_REUSEFINI:
177 	case AD_CPR_TESTZ:
178 	case AD_CPR_TESTNOZ:
179 	case AD_CPR_TESTHALT:
180 	case AD_CPR_PRINT:
181 		return (ENOTSUP);
182 	/* The DEV_* values need to be removed after sys-syspend is fixed */
183 	case DEV_CHECK_SUSPEND_TO_RAM:
184 	case DEV_SUSPEND_TO_RAM:
185 	case AD_CPR_SUSP_DEVICES:
186 	case AD_CHECK_SUSPEND_TO_RAM:
187 	case AD_SUSPEND_TO_RAM:
188 	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
189 	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
190 	case AD_FORCE_SUSPEND_TO_RAM:
191 	case AD_DEVICE_SUSPEND_TO_RAM:
192 		cpr_sleeptype = CPR_TORAM;
193 		break;
194 #endif
195 	}
196 #if defined(__sparc)
197 	/*
198 	 * Need to know if we're in reusable mode, but we will likely have
199 	 * rebooted since REUSEINIT, so we have to get the info from the
200 	 * file system
201 	 */
202 	if (!cpr_reusable_mode)
203 		cpr_reusable_mode = cpr_get_reusable_mode();
204 
205 	cpr_forget_cprconfig();
206 #endif
207 
208 	switch (fcn) {
209 
210 #if defined(__sparc)
211 	case AD_CPR_REUSEINIT:
212 		if (!i_cpr_reusable_supported())
213 			return (ENOTSUP);
214 		if (!cpr_statefile_is_spec()) {
215 			cpr_err(CE_CONT, blockstr);
216 			return (EINVAL);
217 		}
218 		if ((rc = cpr_check_spec_statefile()) != 0)
219 			return (rc);
220 		if (swapinfo) {
221 			cpr_err(CE_CONT, noswapstr);
222 			return (EINVAL);
223 		}
224 		cpr_test_mode = 0;
225 		break;
226 
227 	case AD_CPR_NOCOMPRESS:
228 	case AD_CPR_COMPRESS:
229 	case AD_CPR_FORCE:
230 		if (cpr_reusable_mode) {
231 			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
232 			return (ENOTSUP);
233 		}
234 		cpr_test_mode = 0;
235 		break;
236 
237 	case AD_CPR_REUSABLE:
238 		if (!i_cpr_reusable_supported())
239 			return (ENOTSUP);
240 		if (!cpr_statefile_is_spec()) {
241 			cpr_err(CE_CONT, blockstr);
242 			return (EINVAL);
243 		}
244 		if ((rc = cpr_check_spec_statefile()) != 0)
245 			return (rc);
246 		if (swapinfo) {
247 			cpr_err(CE_CONT, noswapstr);
248 			return (EINVAL);
249 		}
250 		if ((rc = cpr_reusable_mount_check()) != 0)
251 			return (rc);
252 		cpr_test_mode = 0;
253 		break;
254 
255 	case AD_CPR_REUSEFINI:
256 		if (!i_cpr_reusable_supported())
257 			return (ENOTSUP);
258 		cpr_test_mode = 0;
259 		break;
260 
261 	case AD_CPR_TESTZ:
262 	case AD_CPR_TESTNOZ:
263 	case AD_CPR_TESTHALT:
264 		if (cpr_reusable_mode) {
265 			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
266 			return (ENOTSUP);
267 		}
268 		cpr_test_mode = 1;
269 		break;
270 
271 	case AD_CPR_CHECK:
272 		if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode)
273 			return (ENOTSUP);
274 		return (0);
275 
276 	case AD_CPR_PRINT:
277 		CPR_STAT_EVENT_END("POST CPR DELAY");
278 		cpr_stat_event_print();
279 		return (0);
280 #endif
281 
282 	case AD_CPR_DEBUG0:
283 		cpr_debug = 0;
284 		return (0);
285 
286 	case AD_CPR_DEBUG1:
287 	case AD_CPR_DEBUG2:
288 	case AD_CPR_DEBUG3:
289 	case AD_CPR_DEBUG4:
290 	case AD_CPR_DEBUG5:
291 	case AD_CPR_DEBUG7:
292 	case AD_CPR_DEBUG8:
293 		cpr_debug |= CPR_DEBUG_BIT(fcn);
294 		return (0);
295 
296 	case AD_CPR_DEBUG9:
297 		cpr_debug |= CPR_DEBUG6;
298 		return (0);
299 
300 	/* The DEV_* values need to be removed after sys-syspend is fixed */
301 	case DEV_CHECK_SUSPEND_TO_RAM:
302 	case DEV_SUSPEND_TO_RAM:
303 	case AD_CHECK_SUSPEND_TO_RAM:
304 	case AD_SUSPEND_TO_RAM:
305 		cpr_test_point = LOOP_BACK_NONE;
306 		break;
307 
308 	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
309 		cpr_test_point = LOOP_BACK_PASS;
310 		break;
311 
312 	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
313 		cpr_test_point = LOOP_BACK_FAIL;
314 		break;
315 
316 	case AD_FORCE_SUSPEND_TO_RAM:
317 		cpr_test_point = FORCE_SUSPEND_TO_RAM;
318 		break;
319 
320 	case AD_DEVICE_SUSPEND_TO_RAM:
321 		if (mdep == NULL) {
322 			/* Didn't pass enough arguments */
323 			return (EINVAL);
324 		}
325 		cpr_test_point = DEVICE_SUSPEND_TO_RAM;
326 		cpr_device = (major_t)atoi((char *)mdep);
327 		break;
328 
329 	case AD_CPR_SUSP_DEVICES:
330 		cpr_test_point = FORCE_SUSPEND_TO_RAM;
331 		if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS)
332 			cmn_err(CE_WARN,
333 			    "Some devices did not suspend "
334 			    "and may be unusable");
335 		(void) cpr_resume_devices(ddi_root_node(), 0);
336 		return (0);
337 
338 	default:
339 		return (ENOTSUP);
340 	}
341 
342 	if (!i_cpr_is_supported(cpr_sleeptype))
343 		return (ENOTSUP);
344 
345 #if defined(__sparc)
346 	if ((cpr_sleeptype == CPR_TODISK &&
347 	    !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs)))
348 		return (ENOTSUP);
349 #endif
350 
351 	if (fcn == AD_CHECK_SUSPEND_TO_RAM ||
352 	    fcn == DEV_CHECK_SUSPEND_TO_RAM) {
353 		ASSERT(i_cpr_is_supported(cpr_sleeptype));
354 		return (0);
355 	}
356 
357 #if defined(__sparc)
358 	if (fcn == AD_CPR_REUSEINIT) {
359 		if (mutex_tryenter(&cpr_slock) == 0)
360 			return (EBUSY);
361 		if (cpr_reusable_mode) {
362 			cpr_err(CE_CONT, modefmt, "already");
363 			mutex_exit(&cpr_slock);
364 			return (EBUSY);
365 		}
366 		rc = i_cpr_reuseinit();
367 		mutex_exit(&cpr_slock);
368 		return (rc);
369 	}
370 
371 	if (fcn == AD_CPR_REUSEFINI) {
372 		if (mutex_tryenter(&cpr_slock) == 0)
373 			return (EBUSY);
374 		if (!cpr_reusable_mode) {
375 			cpr_err(CE_CONT, modefmt, "not");
376 			mutex_exit(&cpr_slock);
377 			return (EINVAL);
378 		}
379 		rc = i_cpr_reusefini();
380 		mutex_exit(&cpr_slock);
381 		return (rc);
382 	}
383 #endif
384 
385 	/*
386 	 * acquire cpr serial lock and init cpr state structure.
387 	 */
388 	if (rc = cpr_init(fcn))
389 		return (rc);
390 
391 #if defined(__sparc)
392 	if (fcn == AD_CPR_REUSABLE) {
393 		if ((rc = i_cpr_check_cprinfo()) != 0)  {
394 			mutex_exit(&cpr_slock);
395 			return (rc);
396 		}
397 	}
398 #endif
399 
400 	/*
401 	 * Call the main cpr routine. If we are successful, we will be coming
402 	 * down from the resume side, otherwise we are still in suspend.
403 	 */
404 	cpr_err(CE_CONT, "System is being suspended");
405 	if (rc = cpr_main(cpr_sleeptype)) {
406 		CPR->c_flags |= C_ERROR;
407 		PMD(PMD_SX, ("cpr: Suspend operation failed.\n"))
408 		cpr_err(CE_NOTE, "Suspend operation failed.");
409 	} else if (CPR->c_flags & C_SUSPENDING) {
410 
411 		/*
412 		 * In the suspend to RAM case, by the time we get
413 		 * control back we're already resumed
414 		 */
415 		if (cpr_sleeptype == CPR_TORAM) {
416 			PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n"))
417 			cpr_done();
418 			return (rc);
419 		}
420 
421 #if defined(__sparc)
422 
423 		PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n"))
424 		/*
425 		 * Back from a successful checkpoint
426 		 */
427 		if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) {
428 			mdboot(0, AD_BOOT, "", B_FALSE);
429 			/* NOTREACHED */
430 		}
431 
432 		/* make sure there are no more changes to the device tree */
433 		PMD(PMD_SX, ("cpr: dev tree freeze\n"))
434 		devtree_freeze();
435 
436 		/*
437 		 * stop other cpus and raise our priority.  since there is only
438 		 * one active cpu after this, and our priority will be too high
439 		 * for us to be preempted, we're essentially single threaded
440 		 * from here on out.
441 		 */
442 		PMD(PMD_SX, ("cpr: stop other cpus\n"))
443 		i_cpr_stop_other_cpus();
444 		PMD(PMD_SX, ("cpr: spl6\n"))
445 		(void) spl6();
446 
447 		/*
448 		 * try and reset leaf devices.  reset_leaves() should only
449 		 * be called when there are no other threads that could be
450 		 * accessing devices
451 		 */
452 		PMD(PMD_SX, ("cpr: reset leaves\n"))
453 		reset_leaves();
454 
455 		/*
456 		 * If i_cpr_power_down() succeeds, it'll not return
457 		 *
458 		 * Drives with write-cache enabled need to flush
459 		 * their cache.
460 		 */
461 		if (fcn != AD_CPR_TESTHALT) {
462 			PMD(PMD_SX, ("cpr: power down\n"))
463 			(void) i_cpr_power_down(cpr_sleeptype);
464 		}
465 		ASSERT(cpr_sleeptype == CPR_TODISK);
466 		/* currently CPR_TODISK comes back via a boot path */
467 		CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n");
468 		halt(NULL);
469 		/* NOTREACHED */
470 #endif
471 	}
472 	PMD(PMD_SX, ("cpr: cpr done\n"))
473 	cpr_done();
474 	return (rc);
475 }
476