xref: /titanic_52/usr/src/uts/common/cpr/cpr_mod.c (revision 4a3b1d5b615ff6e54da1cc17f331e1ac794c5191)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
25  */
26 
27 /*
28  * System call to checkpoint and resume the currently running kernel
29  */
30 #include <sys/types.h>
31 #include <sys/errno.h>
32 #include <sys/modctl.h>
33 #include <sys/syscall.h>
34 #include <sys/cred.h>
35 #include <sys/uadmin.h>
36 #include <sys/cmn_err.h>
37 #include <sys/systm.h>
38 #include <sys/cpr.h>
39 #include <sys/swap.h>
40 #include <sys/vfs.h>
41 #include <sys/autoconf.h>
42 #include <sys/machsystm.h>
43 
44 extern int i_cpr_is_supported(int sleeptype);
45 extern int cpr_is_ufs(struct vfs *);
46 extern int cpr_is_zfs(struct vfs *);
47 extern int cpr_check_spec_statefile(void);
48 extern int cpr_reusable_mount_check(void);
49 extern int i_cpr_reusable_supported(void);
50 extern int i_cpr_reusefini(void);
51 extern struct mod_ops mod_miscops;
52 
53 extern int cpr_init(int);
54 extern void cpr_done(void);
55 extern void i_cpr_stop_other_cpus(void);
56 extern int i_cpr_power_down();
57 
58 #if defined(__sparc)
59 extern void cpr_forget_cprconfig(void);
60 #endif
61 
62 static struct modlmisc modlmisc = {
63 	&mod_miscops, "checkpoint resume"
64 };
65 
66 static struct modlinkage modlinkage = {
67 	MODREV_1, (void *)&modlmisc, NULL
68 };
69 
70 int cpr_reusable_mode;
71 
72 kmutex_t	cpr_slock;	/* cpr serial lock */
73 cpr_t		cpr_state;
74 int		cpr_debug;
75 int		cpr_test_mode; /* true if called via uadmin testmode */
76 int		cpr_test_point = LOOP_BACK_NONE;	/* cpr test point */
77 int		cpr_mp_enable = 0;	/* set to 1 to enable MP suspend */
78 major_t		cpr_device = 0;		/* major number for S3 on one device */
79 
80 /*
81  * All the loadable module related code follows
82  */
83 int
84 _init(void)
85 {
86 	register int e;
87 
88 	if ((e = mod_install(&modlinkage)) == 0) {
89 		mutex_init(&cpr_slock, NULL, MUTEX_DEFAULT, NULL);
90 	}
91 	return (e);
92 }
93 
94 int
95 _fini(void)
96 {
97 	register int e;
98 
99 	if ((e = mod_remove(&modlinkage)) == 0) {
100 		mutex_destroy(&cpr_slock);
101 	}
102 	return (e);
103 }
104 
105 int
106 _info(struct modinfo *modinfop)
107 {
108 	return (mod_info(&modlinkage, modinfop));
109 }
110 
111 static
112 int
113 atoi(char *p)
114 {
115 	int	i;
116 
117 	i = (*p++ - '0');
118 
119 	while (*p != '\0')
120 		i = 10 * i + (*p++ - '0');
121 
122 	return (i);
123 }
124 
125 int
126 cpr(int fcn, void *mdep)
127 {
128 
129 #if defined(__sparc)
130 	static const char noswapstr[] = "reusable statefile requires "
131 	    "that no swap area be configured.\n";
132 	static const char blockstr[] = "reusable statefile must be "
133 	    "a block device.  See power.conf(4) and pmconfig(1M).\n";
134 	static const char normalfmt[] = "cannot run normal "
135 	    "checkpoint/resume when in reusable statefile mode. "
136 	    "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) "
137 	    "to exit reusable statefile mode.\n";
138 	static const char modefmt[] = "%s in reusable mode.\n";
139 #endif
140 	register int rc = 0;
141 	int cpr_sleeptype;
142 
143 	/*
144 	 * First, reject commands that we don't (yet) support on this arch.
145 	 * This is easier to understand broken out like this than grotting
146 	 * through the second switch below.
147 	 */
148 
149 	switch (fcn) {
150 #if defined(__sparc)
151 	case AD_CHECK_SUSPEND_TO_RAM:
152 	case AD_SUSPEND_TO_RAM:
153 		return (ENOTSUP);
154 	case AD_CHECK_SUSPEND_TO_DISK:
155 	case AD_SUSPEND_TO_DISK:
156 	case AD_CPR_REUSEINIT:
157 	case AD_CPR_NOCOMPRESS:
158 	case AD_CPR_FORCE:
159 	case AD_CPR_REUSABLE:
160 	case AD_CPR_REUSEFINI:
161 	case AD_CPR_TESTZ:
162 	case AD_CPR_TESTNOZ:
163 	case AD_CPR_TESTHALT:
164 	case AD_CPR_SUSP_DEVICES:
165 		cpr_sleeptype = CPR_TODISK;
166 		break;
167 #endif
168 #if defined(__x86)
169 	case AD_CHECK_SUSPEND_TO_DISK:
170 	case AD_SUSPEND_TO_DISK:
171 	case AD_CPR_REUSEINIT:
172 	case AD_CPR_NOCOMPRESS:
173 	case AD_CPR_FORCE:
174 	case AD_CPR_REUSABLE:
175 	case AD_CPR_REUSEFINI:
176 	case AD_CPR_TESTZ:
177 	case AD_CPR_TESTNOZ:
178 	case AD_CPR_TESTHALT:
179 	case AD_CPR_PRINT:
180 		return (ENOTSUP);
181 	/* The DEV_* values need to be removed after sys-syspend is fixed */
182 	case DEV_CHECK_SUSPEND_TO_RAM:
183 	case DEV_SUSPEND_TO_RAM:
184 	case AD_CPR_SUSP_DEVICES:
185 	case AD_CHECK_SUSPEND_TO_RAM:
186 	case AD_SUSPEND_TO_RAM:
187 	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
188 	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
189 	case AD_FORCE_SUSPEND_TO_RAM:
190 	case AD_DEVICE_SUSPEND_TO_RAM:
191 		cpr_sleeptype = CPR_TORAM;
192 		break;
193 #endif
194 	}
195 #if defined(__sparc)
196 	/*
197 	 * Need to know if we're in reusable mode, but we will likely have
198 	 * rebooted since REUSEINIT, so we have to get the info from the
199 	 * file system
200 	 */
201 	if (!cpr_reusable_mode)
202 		cpr_reusable_mode = cpr_get_reusable_mode();
203 
204 	cpr_forget_cprconfig();
205 #endif
206 
207 	switch (fcn) {
208 
209 #if defined(__sparc)
210 	case AD_CPR_REUSEINIT:
211 		if (!i_cpr_reusable_supported())
212 			return (ENOTSUP);
213 		if (!cpr_statefile_is_spec()) {
214 			cpr_err(CE_CONT, blockstr);
215 			return (EINVAL);
216 		}
217 		if ((rc = cpr_check_spec_statefile()) != 0)
218 			return (rc);
219 		if (swapinfo) {
220 			cpr_err(CE_CONT, noswapstr);
221 			return (EINVAL);
222 		}
223 		cpr_test_mode = 0;
224 		break;
225 
226 	case AD_CPR_NOCOMPRESS:
227 	case AD_CPR_COMPRESS:
228 	case AD_CPR_FORCE:
229 		if (cpr_reusable_mode) {
230 			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
231 			return (ENOTSUP);
232 		}
233 		cpr_test_mode = 0;
234 		break;
235 
236 	case AD_CPR_REUSABLE:
237 		if (!i_cpr_reusable_supported())
238 			return (ENOTSUP);
239 		if (!cpr_statefile_is_spec()) {
240 			cpr_err(CE_CONT, blockstr);
241 			return (EINVAL);
242 		}
243 		if ((rc = cpr_check_spec_statefile()) != 0)
244 			return (rc);
245 		if (swapinfo) {
246 			cpr_err(CE_CONT, noswapstr);
247 			return (EINVAL);
248 		}
249 		if ((rc = cpr_reusable_mount_check()) != 0)
250 			return (rc);
251 		cpr_test_mode = 0;
252 		break;
253 
254 	case AD_CPR_REUSEFINI:
255 		if (!i_cpr_reusable_supported())
256 			return (ENOTSUP);
257 		cpr_test_mode = 0;
258 		break;
259 
260 	case AD_CPR_TESTZ:
261 	case AD_CPR_TESTNOZ:
262 	case AD_CPR_TESTHALT:
263 		if (cpr_reusable_mode) {
264 			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
265 			return (ENOTSUP);
266 		}
267 		cpr_test_mode = 1;
268 		break;
269 
270 	case AD_CPR_CHECK:
271 		if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode)
272 			return (ENOTSUP);
273 		return (0);
274 
275 	case AD_CPR_PRINT:
276 		CPR_STAT_EVENT_END("POST CPR DELAY");
277 		cpr_stat_event_print();
278 		return (0);
279 #endif
280 
281 	case AD_CPR_DEBUG0:
282 		cpr_debug = 0;
283 		return (0);
284 
285 	case AD_CPR_DEBUG1:
286 	case AD_CPR_DEBUG2:
287 	case AD_CPR_DEBUG3:
288 	case AD_CPR_DEBUG4:
289 	case AD_CPR_DEBUG5:
290 	case AD_CPR_DEBUG7:
291 	case AD_CPR_DEBUG8:
292 		cpr_debug |= CPR_DEBUG_BIT(fcn);
293 		return (0);
294 
295 	case AD_CPR_DEBUG9:
296 		cpr_debug |= CPR_DEBUG6;
297 		return (0);
298 
299 	/* The DEV_* values need to be removed after sys-syspend is fixed */
300 	case DEV_CHECK_SUSPEND_TO_RAM:
301 	case DEV_SUSPEND_TO_RAM:
302 	case AD_CHECK_SUSPEND_TO_RAM:
303 	case AD_SUSPEND_TO_RAM:
304 		cpr_test_point = LOOP_BACK_NONE;
305 		break;
306 
307 	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
308 		cpr_test_point = LOOP_BACK_PASS;
309 		break;
310 
311 	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
312 		cpr_test_point = LOOP_BACK_FAIL;
313 		break;
314 
315 	case AD_FORCE_SUSPEND_TO_RAM:
316 		cpr_test_point = FORCE_SUSPEND_TO_RAM;
317 		break;
318 
319 	case AD_DEVICE_SUSPEND_TO_RAM:
320 		if (mdep == NULL) {
321 			/* Didn't pass enough arguments */
322 			return (EINVAL);
323 		}
324 		cpr_test_point = DEVICE_SUSPEND_TO_RAM;
325 		cpr_device = (major_t)atoi((char *)mdep);
326 		break;
327 
328 	case AD_CPR_SUSP_DEVICES:
329 		cpr_test_point = FORCE_SUSPEND_TO_RAM;
330 		if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS)
331 			cmn_err(CE_WARN,
332 			    "Some devices did not suspend "
333 			    "and may be unusable");
334 		(void) cpr_resume_devices(ddi_root_node(), 0);
335 		return (0);
336 
337 	default:
338 		return (ENOTSUP);
339 	}
340 
341 	if (!i_cpr_is_supported(cpr_sleeptype))
342 		return (ENOTSUP);
343 
344 #if defined(__sparc)
345 	if ((cpr_sleeptype == CPR_TODISK &&
346 	    !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs)))
347 		return (ENOTSUP);
348 #endif
349 
350 	if (fcn == AD_CHECK_SUSPEND_TO_RAM ||
351 	    fcn == DEV_CHECK_SUSPEND_TO_RAM) {
352 		ASSERT(i_cpr_is_supported(cpr_sleeptype));
353 		return (0);
354 	}
355 
356 #if defined(__sparc)
357 	if (fcn == AD_CPR_REUSEINIT) {
358 		if (mutex_tryenter(&cpr_slock) == 0)
359 			return (EBUSY);
360 		if (cpr_reusable_mode) {
361 			cpr_err(CE_CONT, modefmt, "already");
362 			mutex_exit(&cpr_slock);
363 			return (EBUSY);
364 		}
365 		rc = i_cpr_reuseinit();
366 		mutex_exit(&cpr_slock);
367 		return (rc);
368 	}
369 
370 	if (fcn == AD_CPR_REUSEFINI) {
371 		if (mutex_tryenter(&cpr_slock) == 0)
372 			return (EBUSY);
373 		if (!cpr_reusable_mode) {
374 			cpr_err(CE_CONT, modefmt, "not");
375 			mutex_exit(&cpr_slock);
376 			return (EINVAL);
377 		}
378 		rc = i_cpr_reusefini();
379 		mutex_exit(&cpr_slock);
380 		return (rc);
381 	}
382 #endif
383 
384 	/*
385 	 * acquire cpr serial lock and init cpr state structure.
386 	 */
387 	if (rc = cpr_init(fcn))
388 		return (rc);
389 
390 #if defined(__sparc)
391 	if (fcn == AD_CPR_REUSABLE) {
392 		if ((rc = i_cpr_check_cprinfo()) != 0)  {
393 			mutex_exit(&cpr_slock);
394 			return (rc);
395 		}
396 	}
397 #endif
398 
399 	/*
400 	 * Call the main cpr routine. If we are successful, we will be coming
401 	 * down from the resume side, otherwise we are still in suspend.
402 	 */
403 	cpr_err(CE_CONT, "System is being suspended");
404 	if (rc = cpr_main(cpr_sleeptype)) {
405 		CPR->c_flags |= C_ERROR;
406 		PMD(PMD_SX, ("cpr: Suspend operation failed.\n"))
407 		cpr_err(CE_NOTE, "Suspend operation failed.");
408 	} else if (CPR->c_flags & C_SUSPENDING) {
409 
410 		/*
411 		 * In the suspend to RAM case, by the time we get
412 		 * control back we're already resumed
413 		 */
414 		if (cpr_sleeptype == CPR_TORAM) {
415 			PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n"))
416 			cpr_done();
417 			return (rc);
418 		}
419 
420 #if defined(__sparc)
421 
422 		PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n"))
423 		/*
424 		 * Back from a successful checkpoint
425 		 */
426 		if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) {
427 			mdboot(0, AD_BOOT, "", B_FALSE);
428 			/* NOTREACHED */
429 		}
430 
431 		/* make sure there are no more changes to the device tree */
432 		PMD(PMD_SX, ("cpr: dev tree freeze\n"))
433 		devtree_freeze();
434 
435 		/*
436 		 * stop other cpus and raise our priority.  since there is only
437 		 * one active cpu after this, and our priority will be too high
438 		 * for us to be preempted, we're essentially single threaded
439 		 * from here on out.
440 		 */
441 		PMD(PMD_SX, ("cpr: stop other cpus\n"))
442 		i_cpr_stop_other_cpus();
443 		PMD(PMD_SX, ("cpr: spl6\n"))
444 		(void) spl6();
445 
446 		/*
447 		 * try and reset leaf devices.  reset_leaves() should only
448 		 * be called when there are no other threads that could be
449 		 * accessing devices
450 		 */
451 		PMD(PMD_SX, ("cpr: reset leaves\n"))
452 		reset_leaves();
453 
454 		/*
455 		 * If i_cpr_power_down() succeeds, it'll not return
456 		 *
457 		 * Drives with write-cache enabled need to flush
458 		 * their cache.
459 		 */
460 		if (fcn != AD_CPR_TESTHALT) {
461 			PMD(PMD_SX, ("cpr: power down\n"))
462 			(void) i_cpr_power_down(cpr_sleeptype);
463 		}
464 		ASSERT(cpr_sleeptype == CPR_TODISK);
465 		/* currently CPR_TODISK comes back via a boot path */
466 		CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n");
467 		halt(NULL);
468 		/* NOTREACHED */
469 #endif
470 	}
471 	PMD(PMD_SX, ("cpr: cpr done\n"))
472 	cpr_done();
473 	return (rc);
474 }
475