1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/cpuvar.h>
29 #include <sys/vfs.h>
30 #include <sys/vnode.h>
31 #include <sys/pathname.h>
32 #include <sys/callb.h>
33 #include <sys/fs/ufs_inode.h>
34 #include <vm/anon.h>
35 #include <sys/fs/swapnode.h> /* for swapfs_minfree */
36 #include <sys/kmem.h>
37 #include <sys/cpr.h>
38 #include <sys/conf.h>
39 #include <sys/machclock.h>
40
41 /*
42 * CPR miscellaneous support routines
43 */
44 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \
45 mode, 0600, vpp, CRCREAT, 0))
46 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \
47 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
48 (ssize_t *)NULL))
49
50 extern void clkset(time_t);
51 extern cpu_t *i_cpr_bootcpu(void);
52 extern caddr_t i_cpr_map_setup(void);
53 extern void i_cpr_free_memory_resources(void);
54
55 extern kmutex_t cpr_slock;
56 extern size_t cpr_buf_size;
57 extern char *cpr_buf;
58 extern size_t cpr_pagedata_size;
59 extern char *cpr_pagedata;
60 extern int cpr_bufs_allocated;
61 extern int cpr_bitmaps_allocated;
62
63 #if defined(__sparc)
64 static struct cprconfig cprconfig;
65 static int cprconfig_loaded = 0;
66 static int cpr_statefile_ok(vnode_t *, int);
67 static int cpr_p_online(cpu_t *, int);
68 static void cpr_save_mp_state(void);
69 #endif
70
71 int cpr_is_ufs(struct vfs *);
72 int cpr_is_zfs(struct vfs *);
73
74 char cpr_default_path[] = CPR_DEFAULT;
75
76 #define COMPRESS_PERCENT 40 /* approx compression ratio in percent */
77 #define SIZE_RATE 115 /* increase size by 15% */
78 #define INTEGRAL 100 /* for integer math */
79
80
81 /*
82 * cmn_err() followed by a 1/4 second delay; this gives the
83 * logging service a chance to flush messages and helps avoid
84 * intermixing output from prom_printf().
85 */
86 /*PRINTFLIKE2*/
87 void
cpr_err(int ce,const char * fmt,...)88 cpr_err(int ce, const char *fmt, ...)
89 {
90 va_list adx;
91
92 va_start(adx, fmt);
93 vcmn_err(ce, fmt, adx);
94 va_end(adx);
95 drv_usecwait(MICROSEC >> 2);
96 }
97
98
99 int
cpr_init(int fcn)100 cpr_init(int fcn)
101 {
102 /*
103 * Allow only one suspend/resume process.
104 */
105 if (mutex_tryenter(&cpr_slock) == 0)
106 return (EBUSY);
107
108 CPR->c_flags = 0;
109 CPR->c_substate = 0;
110 CPR->c_cprboot_magic = 0;
111 CPR->c_alloc_cnt = 0;
112
113 CPR->c_fcn = fcn;
114 if (fcn == AD_CPR_REUSABLE)
115 CPR->c_flags |= C_REUSABLE;
116 else
117 CPR->c_flags |= C_SUSPENDING;
118 if (fcn == AD_SUSPEND_TO_RAM || fcn == DEV_SUSPEND_TO_RAM) {
119 return (0);
120 }
121 #if defined(__sparc)
122 if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ)
123 CPR->c_flags |= C_COMPRESSING;
124 /*
125 * reserve CPR_MAXCONTIG virtual pages for cpr_dump()
126 */
127 CPR->c_mapping_area = i_cpr_map_setup();
128 if (CPR->c_mapping_area == 0) { /* no space in kernelmap */
129 cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n");
130 mutex_exit(&cpr_slock);
131 return (EAGAIN);
132 }
133 if (cpr_debug & CPR_DEBUG3)
134 cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing "
135 "kas\n", (void *)CPR->c_mapping_area);
136 #endif
137
138 return (0);
139 }
140
141 /*
142 * This routine releases any resources used during the checkpoint.
143 */
144 void
cpr_done(void)145 cpr_done(void)
146 {
147 cpr_stat_cleanup();
148 i_cpr_bitmap_cleanup();
149
150 /*
151 * Free pages used by cpr buffers.
152 */
153 if (cpr_buf) {
154 kmem_free(cpr_buf, cpr_buf_size);
155 cpr_buf = NULL;
156 }
157 if (cpr_pagedata) {
158 kmem_free(cpr_pagedata, cpr_pagedata_size);
159 cpr_pagedata = NULL;
160 }
161
162 i_cpr_free_memory_resources();
163 mutex_exit(&cpr_slock);
164 cpr_err(CE_CONT, "System has been resumed.\n");
165 }
166
167
168 #if defined(__sparc)
169 /*
170 * reads config data into cprconfig
171 */
172 static int
cpr_get_config(void)173 cpr_get_config(void)
174 {
175 static char config_path[] = CPR_CONFIG;
176 struct cprconfig *cf = &cprconfig;
177 struct vnode *vp;
178 char *fmt;
179 int err;
180
181 if (cprconfig_loaded)
182 return (0);
183
184 fmt = "cannot %s config file \"%s\", error %d\n";
185 if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) {
186 cpr_err(CE_CONT, fmt, "open", config_path, err);
187 return (err);
188 }
189
190 err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf));
191 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
192 VN_RELE(vp);
193 if (err) {
194 cpr_err(CE_CONT, fmt, "read", config_path, err);
195 return (err);
196 }
197
198 if (cf->cf_magic == CPR_CONFIG_MAGIC)
199 cprconfig_loaded = 1;
200 else {
201 cpr_err(CE_CONT, "invalid config file \"%s\", "
202 "rerun pmconfig(1M)\n", config_path);
203 err = EINVAL;
204 }
205
206 return (err);
207 }
208
209
210 /*
211 * concat fs and path fields of the cprconfig structure;
212 * returns pointer to the base of static data
213 */
214 static char *
cpr_cprconfig_to_path(void)215 cpr_cprconfig_to_path(void)
216 {
217 static char full_path[MAXNAMELEN];
218 struct cprconfig *cf = &cprconfig;
219 char *ptr;
220
221 /*
222 * build /fs/path without extra '/'
223 */
224 (void) strcpy(full_path, cf->cf_fs);
225 if (strcmp(cf->cf_fs, "/"))
226 (void) strcat(full_path, "/");
227 ptr = cf->cf_path;
228 if (*ptr == '/')
229 ptr++;
230 (void) strcat(full_path, ptr);
231 return (full_path);
232 }
233
234
235 /*
236 * Verify that the information in the configuration file regarding the
237 * location for the statefile is still valid, depending on cf_type.
238 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be
239 * mounted on the same device as when pmconfig was last run,
240 * and the translation of that device to a node in the prom's
241 * device tree must be the same as when pmconfig was last run.
242 * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block
243 * special file, it must have no file system mounted on it,
244 * and the translation of that device to a node in the prom's
245 * device tree must be the same as when pmconfig was last run.
246 */
247 static int
cpr_verify_statefile_path(void)248 cpr_verify_statefile_path(void)
249 {
250 struct cprconfig *cf = &cprconfig;
251 static const char long_name[] = "Statefile pathname is too long.\n";
252 static const char lookup_fmt[] = "Lookup failed for "
253 "cpr statefile device %s.\n";
254 static const char path_chg_fmt[] = "Device path for statefile "
255 "has changed from %s to %s.\t%s\n";
256 static const char rerun[] = "Please rerun pmconfig(1m).";
257 struct vfs *vfsp = NULL, *vfsp_save = rootvfs;
258 ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data;
259 ufsvfs_t *ufsvfsp_save = ufsvfsp;
260 int error;
261 struct vnode *vp;
262 char *slash, *tail, *longest;
263 char *errstr;
264 int found = 0;
265 union {
266 char un_devpath[OBP_MAXPATHLEN];
267 char un_sfpath[MAXNAMELEN];
268 } un;
269 #define devpath un.un_devpath
270 #define sfpath un.un_sfpath
271
272 ASSERT(cprconfig_loaded);
273 /*
274 * We need not worry about locking or the timing of releasing
275 * the vnode, since we are single-threaded now.
276 */
277
278 switch (cf->cf_type) {
279 case CFT_SPEC:
280 error = i_devname_to_promname(cf->cf_devfs, devpath,
281 OBP_MAXPATHLEN);
282 if (error || strcmp(devpath, cf->cf_dev_prom)) {
283 cpr_err(CE_CONT, path_chg_fmt,
284 cf->cf_dev_prom, devpath, rerun);
285 return (error);
286 }
287 /*FALLTHROUGH*/
288 case CFT_ZVOL:
289 if (strlen(cf->cf_path) > sizeof (sfpath)) {
290 cpr_err(CE_CONT, long_name);
291 return (ENAMETOOLONG);
292 }
293 if ((error = lookupname(cf->cf_devfs,
294 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
295 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
296 return (error);
297 }
298 if (vp->v_type != VBLK)
299 errstr = "statefile must be a block device";
300 else if (vfs_devismounted(vp->v_rdev))
301 errstr = "statefile device must not "
302 "have a file system mounted on it";
303 else if (IS_SWAPVP(vp))
304 errstr = "statefile device must not "
305 "be configured as swap file";
306 else
307 errstr = NULL;
308
309 VN_RELE(vp);
310 if (errstr) {
311 cpr_err(CE_CONT, "%s.\n", errstr);
312 return (ENOTSUP);
313 }
314
315 return (error);
316 case CFT_UFS:
317 break; /* don't indent all the original code */
318 default:
319 cpr_err(CE_PANIC, "invalid cf_type");
320 }
321
322 /*
323 * The original code for UFS statefile
324 */
325 if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) {
326 cpr_err(CE_CONT, long_name);
327 return (ENAMETOOLONG);
328 }
329
330 bzero(sfpath, sizeof (sfpath));
331 (void) strcpy(sfpath, cpr_cprconfig_to_path());
332
333 if (*sfpath != '/') {
334 cpr_err(CE_CONT, "Statefile pathname %s "
335 "must begin with a /\n", sfpath);
336 return (EINVAL);
337 }
338
339 /*
340 * Find the longest prefix of the statefile pathname which
341 * is the mountpoint of a filesystem. This string must
342 * match the cf_fs field we read from the config file. Other-
343 * wise the user has changed things without running pmconfig.
344 */
345 tail = longest = sfpath + 1; /* pt beyond the leading "/" */
346 while ((slash = strchr(tail, '/')) != NULL) {
347 *slash = '\0'; /* temporarily terminate the string */
348 if ((error = lookupname(sfpath,
349 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
350 *slash = '/';
351 cpr_err(CE_CONT, "A directory in the "
352 "statefile path %s was not found.\n", sfpath);
353 VN_RELE(vp);
354
355 return (error);
356 }
357
358 vfs_list_read_lock();
359 vfsp = rootvfs;
360 do {
361 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
362 if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) {
363 found = 1;
364 break;
365 }
366 vfsp = vfsp->vfs_next;
367 } while (vfsp != rootvfs);
368 vfs_list_unlock();
369
370 /*
371 * If we have found a filesystem mounted on the current
372 * path prefix, remember the end of the string in
373 * "longest". If it happens to be the the exact fs
374 * saved in the configuration file, save the current
375 * ufsvfsp so we can make additional checks further down.
376 */
377 if (found) {
378 longest = slash;
379 if (strcmp(cf->cf_fs, sfpath) == 0) {
380 ufsvfsp_save = ufsvfsp;
381 vfsp_save = vfsp;
382 }
383 found = 0;
384 }
385
386 VN_RELE(vp);
387 *slash = '/';
388 tail = slash + 1;
389 }
390 *longest = '\0';
391 if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) {
392 cpr_err(CE_CONT, "Filesystem containing "
393 "the statefile when pmconfig was run (%s) has "
394 "changed to %s. %s\n", cf->cf_fs, sfpath, rerun);
395 return (EINVAL);
396 }
397
398 if ((error = lookupname(cf->cf_devfs,
399 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) {
400 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs);
401 return (error);
402 }
403
404 if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) {
405 cpr_err(CE_CONT, "Filesystem containing "
406 "statefile no longer mounted on device %s. "
407 "See power.conf(4).", cf->cf_devfs);
408 VN_RELE(vp);
409 return (ENXIO);
410 }
411 VN_RELE(vp);
412
413 error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN);
414 if (error || strcmp(devpath, cf->cf_dev_prom)) {
415 cpr_err(CE_CONT, path_chg_fmt,
416 cf->cf_dev_prom, devpath, rerun);
417 return (error);
418 }
419
420 return (0);
421 }
422
423 /*
424 * Make sure that the statefile can be used as a block special statefile
425 * (meaning that is exists and has nothing mounted on it)
426 * Returns errno if not a valid statefile.
427 */
428 int
cpr_check_spec_statefile(void)429 cpr_check_spec_statefile(void)
430 {
431 int err;
432
433 if (err = cpr_get_config())
434 return (err);
435 ASSERT(cprconfig.cf_type == CFT_SPEC ||
436 cprconfig.cf_type == CFT_ZVOL);
437
438 if (cprconfig.cf_devfs == NULL)
439 return (ENXIO);
440
441 return (cpr_verify_statefile_path());
442
443 }
444
445 int
cpr_alloc_statefile(int alloc_retry)446 cpr_alloc_statefile(int alloc_retry)
447 {
448 register int rc = 0;
449 char *str;
450
451 /*
452 * Statefile size validation. If checkpoint the first time, disk blocks
453 * allocation will be done; otherwise, just do file size check.
454 * if statefile allocation is being retried, C_VP will be inited
455 */
456 if (alloc_retry) {
457 str = "\n-->Retrying statefile allocation...";
458 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7))
459 prom_printf(str);
460 if (C_VP->v_type != VBLK)
461 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
462 } else {
463 /*
464 * Open an exiting file for writing, the state file needs to be
465 * pre-allocated since we can't and don't want to do allocation
466 * during checkpoint (too much of the OS is disabled).
467 * - do a preliminary size checking here, if it is too small,
468 * allocate more space internally and retry.
469 * - check the vp to make sure it's the right type.
470 */
471 char *path = cpr_build_statefile_path();
472
473 if (path == NULL)
474 return (ENXIO);
475 else if (rc = cpr_verify_statefile_path())
476 return (rc);
477
478 if (rc = vn_open(path, UIO_SYSSPACE,
479 FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) {
480 cpr_err(CE_WARN, "cannot open statefile %s", path);
481 return (rc);
482 }
483 }
484
485 /*
486 * Only ufs and block special statefiles supported
487 */
488 if (C_VP->v_type != VREG && C_VP->v_type != VBLK) {
489 cpr_err(CE_CONT,
490 "Statefile must be regular file or block special file.");
491 return (EACCES);
492 }
493
494 if (rc = cpr_statefile_ok(C_VP, alloc_retry))
495 return (rc);
496
497 if (C_VP->v_type != VBLK) {
498 /*
499 * sync out the fs change due to the statefile reservation.
500 */
501 (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED());
502
503 /*
504 * Validate disk blocks allocation for the state file.
505 * Ask the file system prepare itself for the dump operation.
506 */
507 if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) {
508 cpr_err(CE_CONT, "Error allocating "
509 "blocks for cpr statefile.");
510 return (rc);
511 }
512 }
513 return (0);
514 }
515
516
517 /*
518 * Lookup device size and return available space in bytes.
519 * NOTE: Since prop_op(9E) can't tell the difference between a character
520 * and a block reference, it is ok to ask for "Size" instead of "Nblocks".
521 */
522 size_t
cpr_get_devsize(dev_t dev)523 cpr_get_devsize(dev_t dev)
524 {
525 size_t bytes = 0;
526
527 bytes = cdev_Size(dev);
528 if (bytes == 0)
529 bytes = cdev_size(dev);
530
531 if (bytes > CPR_SPEC_OFFSET)
532 bytes -= CPR_SPEC_OFFSET;
533 else
534 bytes = 0;
535
536 return (bytes);
537 }
538
539
540 /*
541 * increase statefile size
542 */
543 static int
cpr_grow_statefile(vnode_t * vp,u_longlong_t newsize)544 cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize)
545 {
546 extern uchar_t cpr_pagecopy[];
547 struct inode *ip = VTOI(vp);
548 u_longlong_t offset;
549 int error, increase;
550 ssize_t resid;
551
552 rw_enter(&ip->i_contents, RW_READER);
553 increase = (ip->i_size < newsize);
554 offset = ip->i_size;
555 rw_exit(&ip->i_contents);
556
557 if (increase == 0)
558 return (0);
559
560 /*
561 * write to each logical block to reserve disk space
562 */
563 error = 0;
564 cpr_pagecopy[0] = '1';
565 for (; offset < newsize; offset += ip->i_fs->fs_bsize) {
566 if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy,
567 ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0,
568 (rlim64_t)MAXOFF_T, CRED(), &resid)) {
569 if (error == ENOSPC) {
570 cpr_err(CE_WARN, "error %d while reserving "
571 "disk space for statefile %s\n"
572 "wanted %lld bytes, file is %lld short",
573 error, cpr_cprconfig_to_path(),
574 newsize, newsize - offset);
575 }
576 break;
577 }
578 }
579 return (error);
580 }
581
582
583 /*
584 * do a simple estimate of the space needed to hold the statefile
585 * taking compression into account, but be fairly conservative
586 * so we have a better chance of completing; when dump fails,
587 * the retry cost is fairly high.
588 *
589 * Do disk blocks allocation for the state file if no space has
590 * been allocated yet. Since the state file will not be removed,
591 * allocation should only be done once.
592 */
593 static int
cpr_statefile_ok(vnode_t * vp,int alloc_retry)594 cpr_statefile_ok(vnode_t *vp, int alloc_retry)
595 {
596 extern size_t cpr_bitmap_size;
597 struct inode *ip = VTOI(vp);
598 const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */
599 u_longlong_t size, isize, ksize, raw_data;
600 char *str, *est_fmt;
601 size_t space;
602 int error;
603
604 /*
605 * number of pages short for swapping.
606 */
607 STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv;
608 if (STAT->cs_nosw_pages < 0)
609 STAT->cs_nosw_pages = 0;
610
611 str = "cpr_statefile_ok:";
612
613 CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n",
614 k_anoninfo.ani_max, k_anoninfo.ani_phys_resv);
615 CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n",
616 MAX(availrmem - swapfs_minfree, 0),
617 k_anoninfo.ani_mem_resv);
618 CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n",
619 CURRENT_TOTAL_AVAILABLE_SWAP);
620
621 /*
622 * try increasing filesize by 15%
623 */
624 if (alloc_retry) {
625 /*
626 * block device doesn't get any bigger
627 */
628 if (vp->v_type == VBLK) {
629 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
630 prom_printf(
631 "Retry statefile on special file\n");
632 return (ENOMEM);
633 } else {
634 rw_enter(&ip->i_contents, RW_READER);
635 size = (ip->i_size * SIZE_RATE) / INTEGRAL;
636 rw_exit(&ip->i_contents);
637 }
638 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
639 prom_printf("Retry statefile size = %lld\n", size);
640 } else {
641 u_longlong_t cpd_size;
642 pgcnt_t npages, nback;
643 int ndvram;
644
645 ndvram = 0;
646 (void) callb_execute_class(CB_CL_CPR_FB,
647 (int)(uintptr_t)&ndvram);
648 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
649 prom_printf("ndvram size = %d\n", ndvram);
650
651 /*
652 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages
653 */
654 npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit);
655 cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2));
656 raw_data = cpd_size + cpr_bitmap_size;
657 ksize = ndvram + mmu_ptob(npages);
658
659 est_fmt = "%s estimated size with "
660 "%scompression %lld, ksize %lld\n";
661 nback = mmu_ptob(STAT->cs_nosw_pages);
662 if (CPR->c_flags & C_COMPRESSING) {
663 size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) +
664 raw_data + ((nback * 10) / UCOMP_RATE);
665 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize);
666 } else {
667 size = ksize + raw_data + nback;
668 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ",
669 size, ksize);
670 }
671 }
672
673 /*
674 * All this is much simpler for a block device
675 */
676 if (vp->v_type == VBLK) {
677 space = cpr_get_devsize(vp->v_rdev);
678 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
679 prom_printf("statefile dev size %lu\n", space);
680
681 /*
682 * Export the estimated filesize info, this value will be
683 * compared before dumping out the statefile in the case of
684 * no compression.
685 */
686 STAT->cs_est_statefsz = size;
687 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))
688 prom_printf("%s Estimated statefile size %llu, "
689 "space %lu\n", str, size, space);
690 if (size > space) {
691 cpr_err(CE_CONT, "Statefile partition too small.");
692 return (ENOMEM);
693 }
694 return (0);
695 } else {
696 if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) {
697 cpr_err(CE_CONT, "Statefile allocation retry failed\n");
698 return (ENOMEM);
699 }
700
701 /*
702 * Estimate space needed for the state file.
703 *
704 * State file size in bytes:
705 * kernel size + non-cache pte seg +
706 * bitmap size + cpr state file headers size
707 * (round up to fs->fs_bsize)
708 */
709 size = blkroundup(ip->i_fs, size);
710
711 /*
712 * Export the estimated filesize info, this value will be
713 * compared before dumping out the statefile in the case of
714 * no compression.
715 */
716 STAT->cs_est_statefsz = size;
717 error = cpr_grow_statefile(vp, size);
718 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) {
719 rw_enter(&ip->i_contents, RW_READER);
720 isize = ip->i_size;
721 rw_exit(&ip->i_contents);
722 prom_printf("%s Estimated statefile size %lld, "
723 "i_size %lld\n", str, size, isize);
724 }
725
726 return (error);
727 }
728 }
729
730
731 void
cpr_statef_close(void)732 cpr_statef_close(void)
733 {
734 if (C_VP) {
735 if (!cpr_reusable_mode)
736 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
737 (void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED(), NULL);
738 VN_RELE(C_VP);
739 C_VP = 0;
740 }
741 }
742
743
744 /*
745 * open cpr default file and display error
746 */
747 int
cpr_open_deffile(int mode,vnode_t ** vpp)748 cpr_open_deffile(int mode, vnode_t **vpp)
749 {
750 int error;
751
752 if (error = cpr_open(cpr_default_path, mode, vpp))
753 cpr_err(CE_CONT, "cannot open \"%s\", error %d\n",
754 cpr_default_path, error);
755 return (error);
756 }
757
758
759 /*
760 * write cdef_t to disk. This contains the original values of prom
761 * properties that we modify. We fill in the magic number of the file
762 * here as a signal to the booter code that the state file is valid.
763 * Be sure the file gets synced, since we may be shutting down the OS.
764 */
765 int
cpr_write_deffile(cdef_t * cdef)766 cpr_write_deffile(cdef_t *cdef)
767 {
768 struct vnode *vp;
769 char *str;
770 int rc;
771
772 if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp))
773 return (rc);
774
775 if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef)))
776 str = "write";
777 else if (rc = VOP_FSYNC(vp, FSYNC, CRED(), NULL))
778 str = "fsync";
779 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
780 VN_RELE(vp);
781
782 if (rc) {
783 cpr_err(CE_WARN, "%s error %d, file \"%s\"",
784 str, rc, cpr_default_path);
785 }
786 return (rc);
787 }
788
789 /*
790 * Clear the magic number in the defaults file. This tells the booter
791 * program that the state file is not current and thus prevents
792 * any attempt to restore from an obsolete state file.
793 */
794 void
cpr_clear_definfo(void)795 cpr_clear_definfo(void)
796 {
797 struct vnode *vp;
798 cmini_t mini;
799
800 if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) ||
801 cpr_open_deffile(FCREAT|FWRITE, &vp))
802 return;
803 mini.magic = mini.reusable = 0;
804 (void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini));
805 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
806 VN_RELE(vp);
807 }
808
809 /*
810 * If the cpr default file is invalid, then we must not be in reusable mode
811 * if it is valid, it tells us our mode
812 */
813 int
cpr_get_reusable_mode(void)814 cpr_get_reusable_mode(void)
815 {
816 struct vnode *vp;
817 cmini_t mini;
818 int rc;
819
820 if (cpr_open(cpr_default_path, FREAD, &vp))
821 return (0);
822
823 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
824 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
825 VN_RELE(vp);
826 if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC)
827 return (mini.reusable);
828
829 return (0);
830 }
831 #endif
832
833 /*
834 * clock/time related routines
835 */
836 static time_t cpr_time_stamp;
837
838
839 void
cpr_tod_get(cpr_time_t * ctp)840 cpr_tod_get(cpr_time_t *ctp)
841 {
842 timestruc_t ts;
843
844 mutex_enter(&tod_lock);
845 ts = TODOP_GET(tod_ops);
846 mutex_exit(&tod_lock);
847 ctp->tv_sec = (time32_t)ts.tv_sec;
848 ctp->tv_nsec = (int32_t)ts.tv_nsec;
849 }
850
851 void
cpr_tod_status_set(int tod_flag)852 cpr_tod_status_set(int tod_flag)
853 {
854 mutex_enter(&tod_lock);
855 tod_status_set(tod_flag);
856 mutex_exit(&tod_lock);
857 }
858
859 void
cpr_save_time(void)860 cpr_save_time(void)
861 {
862 cpr_time_stamp = gethrestime_sec();
863 }
864
865 /*
866 * correct time based on saved time stamp or hardware clock
867 */
868 void
cpr_restore_time(void)869 cpr_restore_time(void)
870 {
871 clkset(cpr_time_stamp);
872 }
873
874 #if defined(__sparc)
875 /*
876 * CPU ONLINE/OFFLINE CODE
877 */
878 int
cpr_mp_offline(void)879 cpr_mp_offline(void)
880 {
881 cpu_t *cp, *bootcpu;
882 int rc = 0;
883 int brought_up_boot = 0;
884
885 /*
886 * Do nothing for UP.
887 */
888 if (ncpus == 1)
889 return (0);
890
891 mutex_enter(&cpu_lock);
892
893 cpr_save_mp_state();
894
895 bootcpu = i_cpr_bootcpu();
896 if (!CPU_ACTIVE(bootcpu)) {
897 if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) {
898 mutex_exit(&cpu_lock);
899 return (rc);
900 }
901 brought_up_boot = 1;
902 }
903
904 cp = cpu_list;
905 do {
906 if (cp == bootcpu)
907 continue;
908 if (cp->cpu_flags & CPU_OFFLINE)
909 continue;
910 if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) {
911 mutex_exit(&cpu_lock);
912 return (rc);
913 }
914 } while ((cp = cp->cpu_next) != cpu_list);
915 if (brought_up_boot && (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)))
916 prom_printf("changed cpu %p to state %d\n",
917 (void *)bootcpu, CPU_CPR_ONLINE);
918 mutex_exit(&cpu_lock);
919
920 return (rc);
921 }
922
923 int
cpr_mp_online(void)924 cpr_mp_online(void)
925 {
926 cpu_t *cp, *bootcpu = CPU;
927 int rc = 0;
928
929 /*
930 * Do nothing for UP.
931 */
932 if (ncpus == 1)
933 return (0);
934
935 /*
936 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags
937 * to indicate a cpu was online at the time of cpr_suspend();
938 * now restart those cpus that were marked as CPU_CPR_ONLINE
939 * and actually are offline.
940 */
941 mutex_enter(&cpu_lock);
942 for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) {
943 /*
944 * Clear the CPU_FROZEN flag in all cases.
945 */
946 cp->cpu_flags &= ~CPU_FROZEN;
947
948 if (CPU_CPR_IS_OFFLINE(cp))
949 continue;
950 if (CPU_ACTIVE(cp))
951 continue;
952 if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) {
953 mutex_exit(&cpu_lock);
954 return (rc);
955 }
956 }
957
958 /*
959 * turn off the boot cpu if it was offlined
960 */
961 if (CPU_CPR_IS_OFFLINE(bootcpu)) {
962 if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) {
963 mutex_exit(&cpu_lock);
964 return (rc);
965 }
966 }
967 mutex_exit(&cpu_lock);
968 return (0);
969 }
970
971 static void
cpr_save_mp_state(void)972 cpr_save_mp_state(void)
973 {
974 cpu_t *cp;
975
976 ASSERT(MUTEX_HELD(&cpu_lock));
977
978 cp = cpu_list;
979 do {
980 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
981 if (CPU_ACTIVE(cp))
982 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
983 } while ((cp = cp->cpu_next) != cpu_list);
984 }
985
986 /*
987 * change cpu to online/offline
988 */
989 static int
cpr_p_online(cpu_t * cp,int state)990 cpr_p_online(cpu_t *cp, int state)
991 {
992 int rc;
993
994 ASSERT(MUTEX_HELD(&cpu_lock));
995
996 switch (state) {
997 case CPU_CPR_ONLINE:
998 rc = cpu_online(cp);
999 break;
1000 case CPU_CPR_OFFLINE:
1001 rc = cpu_offline(cp, CPU_FORCED);
1002 break;
1003 }
1004 if (rc) {
1005 cpr_err(CE_WARN, "Failed to change processor %d to "
1006 "state %d, (errno %d)", cp->cpu_id, state, rc);
1007 }
1008 return (rc);
1009 }
1010
1011 /*
1012 * Construct the pathname of the state file and return a pointer to
1013 * caller. Read the config file to get the mount point of the
1014 * filesystem and the pathname within fs.
1015 */
1016 char *
cpr_build_statefile_path(void)1017 cpr_build_statefile_path(void)
1018 {
1019 struct cprconfig *cf = &cprconfig;
1020
1021 if (cpr_get_config())
1022 return (NULL);
1023
1024 switch (cf->cf_type) {
1025 case CFT_UFS:
1026 if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) {
1027 cpr_err(CE_CONT, "Statefile path is too long.\n");
1028 return (NULL);
1029 }
1030 return (cpr_cprconfig_to_path());
1031 case CFT_ZVOL:
1032 /*FALLTHROUGH*/
1033 case CFT_SPEC:
1034 return (cf->cf_devfs);
1035 default:
1036 cpr_err(CE_PANIC, "invalid statefile type");
1037 /*NOTREACHED*/
1038 return (NULL);
1039 }
1040 }
1041
1042 int
cpr_statefile_is_spec(void)1043 cpr_statefile_is_spec(void)
1044 {
1045 if (cpr_get_config())
1046 return (0);
1047 return (cprconfig.cf_type == CFT_SPEC);
1048 }
1049
1050 char *
cpr_get_statefile_prom_path(void)1051 cpr_get_statefile_prom_path(void)
1052 {
1053 struct cprconfig *cf = &cprconfig;
1054
1055 ASSERT(cprconfig_loaded);
1056 ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC);
1057 ASSERT(cf->cf_type == CFT_SPEC || cf->cf_type == CFT_ZVOL);
1058 return (cf->cf_dev_prom);
1059 }
1060
1061
1062 /*
1063 * XXX The following routines need to be in the vfs source code.
1064 */
1065
1066 int
cpr_is_ufs(struct vfs * vfsp)1067 cpr_is_ufs(struct vfs *vfsp)
1068 {
1069 char *fsname;
1070
1071 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1072 return (strcmp(fsname, "ufs") == 0);
1073 }
1074
1075 int
cpr_is_zfs(struct vfs * vfsp)1076 cpr_is_zfs(struct vfs *vfsp)
1077 {
1078 char *fsname;
1079
1080 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1081 return (strcmp(fsname, "zfs") == 0);
1082 }
1083
1084 /*
1085 * This is a list of file systems that are allowed to be writeable when a
1086 * reusable statefile checkpoint is taken. They must not have any state that
1087 * cannot be restored to consistency by simply rebooting using the checkpoint.
1088 * (In contrast to ufs, cachefs and pcfs which have disk state that could get
1089 * out of sync with the in-kernel data).
1090 */
1091 int
cpr_reusable_mount_check(void)1092 cpr_reusable_mount_check(void)
1093 {
1094 struct vfs *vfsp;
1095 char *fsname;
1096 char **cpp;
1097 static char *cpr_writeok_fss[] = {
1098 "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs",
1099 "proc", "tmpfs", "ctfs", "objfs", "dev", NULL
1100 };
1101
1102 vfs_list_read_lock();
1103 vfsp = rootvfs;
1104 do {
1105 if (vfsp->vfs_flag & VFS_RDONLY) {
1106 vfsp = vfsp->vfs_next;
1107 continue;
1108 }
1109 fsname = vfssw[vfsp->vfs_fstype].vsw_name;
1110 for (cpp = cpr_writeok_fss; *cpp; cpp++) {
1111 if (strcmp(fsname, *cpp) == 0)
1112 break;
1113 }
1114 /*
1115 * if the inner loop reached the NULL terminator,
1116 * the current fs-type does not match any OK-type
1117 */
1118 if (*cpp == NULL) {
1119 cpr_err(CE_CONT, "a filesystem of type %s is "
1120 "mounted read/write.\nReusable statefile requires "
1121 "no writeable filesystem of this type be mounted\n",
1122 fsname);
1123 vfs_list_unlock();
1124 return (EINVAL);
1125 }
1126 vfsp = vfsp->vfs_next;
1127 } while (vfsp != rootvfs);
1128 vfs_list_unlock();
1129 return (0);
1130 }
1131
1132 /*
1133 * return statefile offset in DEV_BSIZE units
1134 */
1135 int
cpr_statefile_offset(void)1136 cpr_statefile_offset(void)
1137 {
1138 return (cprconfig.cf_type != CFT_UFS ? btod(CPR_SPEC_OFFSET) : 0);
1139 }
1140
1141 /*
1142 * Force a fresh read of the cprinfo per uadmin 3 call
1143 */
1144 void
cpr_forget_cprconfig(void)1145 cpr_forget_cprconfig(void)
1146 {
1147 cprconfig_loaded = 0;
1148 }
1149 #endif
1150