1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/errno.h> 30 #include <sys/cpuvar.h> 31 #include <sys/vfs.h> 32 #include <sys/vnode.h> 33 #include <sys/pathname.h> 34 #include <sys/callb.h> 35 #include <sys/fs/ufs_inode.h> 36 #include <vm/anon.h> 37 #include <sys/fs/swapnode.h> /* for swapfs_minfree */ 38 #include <sys/kmem.h> 39 #include <sys/cpr.h> 40 #include <sys/conf.h> 41 42 /* 43 * CPR miscellaneous support routines 44 */ 45 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \ 46 mode, 0600, vpp, CRCREAT, 0)) 47 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \ 48 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \ 49 (ssize_t *)NULL)) 50 51 extern void clkset(time_t); 52 extern cpu_t *i_cpr_bootcpu(void); 53 extern caddr_t i_cpr_map_setup(void); 54 extern void i_cpr_free_memory_resources(void); 55 56 extern kmutex_t cpr_slock; 57 extern size_t cpr_buf_size; 58 extern char *cpr_buf; 59 extern size_t cpr_pagedata_size; 60 extern char *cpr_pagedata; 61 extern int cpr_bufs_allocated; 62 extern int cpr_bitmaps_allocated; 63 64 static struct cprconfig cprconfig; 65 static int cprconfig_loaded = 0; 66 static int cpr_statefile_ok(vnode_t *, int); 67 static int cpr_p_online(cpu_t *, int); 68 static void cpr_save_mp_state(void); 69 int cpr_is_ufs(struct vfs *); 70 71 char cpr_default_path[] = CPR_DEFAULT; 72 73 #define COMPRESS_PERCENT 40 /* approx compression ratio in percent */ 74 #define SIZE_RATE 115 /* increase size by 15% */ 75 #define INTEGRAL 100 /* for integer math */ 76 77 78 /* 79 * cmn_err() followed by a 1/4 second delay; this gives the 80 * logging service a chance to flush messages and helps avoid 81 * intermixing output from prom_printf(). 82 */ 83 /*PRINTFLIKE2*/ 84 void 85 cpr_err(int ce, const char *fmt, ...) 86 { 87 va_list adx; 88 89 va_start(adx, fmt); 90 vcmn_err(ce, fmt, adx); 91 va_end(adx); 92 drv_usecwait(MICROSEC >> 2); 93 } 94 95 96 int 97 cpr_init(int fcn) 98 { 99 /* 100 * Allow only one suspend/resume process. 101 */ 102 if (mutex_tryenter(&cpr_slock) == 0) 103 return (EBUSY); 104 105 CPR->c_flags = 0; 106 CPR->c_substate = 0; 107 CPR->c_cprboot_magic = 0; 108 CPR->c_alloc_cnt = 0; 109 110 CPR->c_fcn = fcn; 111 if (fcn == AD_CPR_REUSABLE) 112 CPR->c_flags |= C_REUSABLE; 113 else 114 CPR->c_flags |= C_SUSPENDING; 115 if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ) 116 CPR->c_flags |= C_COMPRESSING; 117 /* 118 * reserve CPR_MAXCONTIG virtual pages for cpr_dump() 119 */ 120 CPR->c_mapping_area = i_cpr_map_setup(); 121 if (CPR->c_mapping_area == 0) { /* no space in kernelmap */ 122 cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n"); 123 mutex_exit(&cpr_slock); 124 return (EAGAIN); 125 } 126 DEBUG3(cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing " 127 "kas\n", (void *)CPR->c_mapping_area)); 128 129 return (0); 130 } 131 132 /* 133 * This routine releases any resources used during the checkpoint. 134 */ 135 void 136 cpr_done(void) 137 { 138 cpr_stat_cleanup(); 139 i_cpr_bitmap_cleanup(); 140 141 /* 142 * Free pages used by cpr buffers. 143 */ 144 if (cpr_buf) { 145 kmem_free(cpr_buf, cpr_buf_size); 146 cpr_buf = NULL; 147 } 148 if (cpr_pagedata) { 149 kmem_free(cpr_pagedata, cpr_pagedata_size); 150 cpr_pagedata = NULL; 151 } 152 153 i_cpr_free_memory_resources(); 154 mutex_exit(&cpr_slock); 155 cpr_err(CE_CONT, "System has been resumed.\n"); 156 } 157 158 159 /* 160 * reads config data into cprconfig 161 */ 162 static int 163 cpr_get_config(void) 164 { 165 static char config_path[] = CPR_CONFIG; 166 struct cprconfig *cf = &cprconfig; 167 struct vnode *vp; 168 char *fmt; 169 int err; 170 171 if (cprconfig_loaded) 172 return (0); 173 174 fmt = "cannot %s config file \"%s\", error %d\n"; 175 if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) { 176 cpr_err(CE_CONT, fmt, "open", config_path, err); 177 return (err); 178 } 179 180 err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf)); 181 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED()); 182 VN_RELE(vp); 183 if (err) { 184 cpr_err(CE_CONT, fmt, "read", config_path, err); 185 return (err); 186 } 187 188 if (cf->cf_magic == CPR_CONFIG_MAGIC) 189 cprconfig_loaded = 1; 190 else { 191 cpr_err(CE_CONT, "invalid config file \"%s\", " 192 "rerun pmconfig(1M)\n", config_path); 193 err = EINVAL; 194 } 195 196 return (err); 197 } 198 199 200 /* 201 * concat fs and path fields of the cprconfig structure; 202 * returns pointer to the base of static data 203 */ 204 static char * 205 cpr_cprconfig_to_path(void) 206 { 207 static char full_path[MAXNAMELEN]; 208 struct cprconfig *cf = &cprconfig; 209 char *ptr; 210 211 /* 212 * build /fs/path without extra '/' 213 */ 214 (void) strcpy(full_path, cf->cf_fs); 215 if (strcmp(cf->cf_fs, "/")) 216 (void) strcat(full_path, "/"); 217 ptr = cf->cf_path; 218 if (*ptr == '/') 219 ptr++; 220 (void) strcat(full_path, ptr); 221 return (full_path); 222 } 223 224 225 /* 226 * Verify that the information in the configuration file regarding the 227 * location for the statefile is still valid, depending on cf_type. 228 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be 229 * mounted on the same device as when pmconfig was last run, 230 * and the translation of that device to a node in the prom's 231 * device tree must be the same as when pmconfig was last run. 232 * for CFT_SPEC, cf_path must be the path to a block special file, 233 * it must have no file system mounted on it, 234 * and the translation of that device to a node in the prom's 235 * device tree must be the same as when pmconfig was last run. 236 */ 237 static int 238 cpr_verify_statefile_path(void) 239 { 240 struct cprconfig *cf = &cprconfig; 241 static const char long_name[] = "Statefile pathname is too long.\n"; 242 static const char lookup_fmt[] = "Lookup failed for " 243 "cpr statefile device %s.\n"; 244 static const char path_chg_fmt[] = "Device path for statefile " 245 "has changed from %s to %s.\t%s\n"; 246 static const char rerun[] = "Please rerun pmconfig(1m)."; 247 struct vfs *vfsp = NULL, *vfsp_save = rootvfs; 248 ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data; 249 ufsvfs_t *ufsvfsp_save = ufsvfsp; 250 int error; 251 struct vnode *vp; 252 char *slash, *tail, *longest; 253 char *errstr; 254 int found = 0; 255 union { 256 char un_devpath[OBP_MAXPATHLEN]; 257 char un_sfpath[MAXNAMELEN]; 258 } un; 259 #define devpath un.un_devpath 260 #define sfpath un.un_sfpath 261 262 ASSERT(cprconfig_loaded); 263 /* 264 * We need not worry about locking or the timing of releasing 265 * the vnode, since we are single-threaded now. 266 */ 267 268 switch (cf->cf_type) { 269 case CFT_SPEC: 270 if (strlen(cf->cf_path) > sizeof (sfpath)) { 271 cpr_err(CE_CONT, long_name); 272 return (ENAMETOOLONG); 273 } 274 if ((error = lookupname(cf->cf_devfs, 275 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 276 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); 277 return (error); 278 } 279 if (vp->v_type != VBLK) 280 errstr = "statefile must be a block device"; 281 else if (vfs_devismounted(vp->v_rdev)) 282 errstr = "statefile device must not " 283 "have a file system mounted on it"; 284 else if (IS_SWAPVP(vp)) 285 errstr = "statefile device must not " 286 "be configured as swap file"; 287 else 288 errstr = NULL; 289 290 VN_RELE(vp); 291 if (errstr) { 292 cpr_err(CE_CONT, "%s.\n", errstr); 293 return (ENOTSUP); 294 } 295 296 error = i_devname_to_promname(cf->cf_devfs, devpath, 297 OBP_MAXPATHLEN); 298 if (error || strcmp(devpath, cf->cf_dev_prom)) { 299 cpr_err(CE_CONT, path_chg_fmt, 300 cf->cf_dev_prom, devpath, rerun); 301 } 302 return (error); 303 case CFT_UFS: 304 break; /* don't indent all the original code */ 305 default: 306 cpr_err(CE_PANIC, "invalid cf_type"); 307 } 308 309 /* 310 * The original code for UFS statefile 311 */ 312 if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) { 313 cpr_err(CE_CONT, long_name); 314 return (ENAMETOOLONG); 315 } 316 317 bzero(sfpath, sizeof (sfpath)); 318 (void) strcpy(sfpath, cpr_cprconfig_to_path()); 319 320 if (*sfpath != '/') { 321 cpr_err(CE_CONT, "Statefile pathname %s " 322 "must begin with a /\n", sfpath); 323 return (EINVAL); 324 } 325 326 /* 327 * Find the longest prefix of the statefile pathname which 328 * is the mountpoint of a filesystem. This string must 329 * match the cf_fs field we read from the config file. Other- 330 * wise the user has changed things without running pmconfig. 331 */ 332 tail = longest = sfpath + 1; /* pt beyond the leading "/" */ 333 while ((slash = strchr(tail, '/')) != NULL) { 334 *slash = '\0'; /* temporarily terminate the string */ 335 if ((error = lookupname(sfpath, 336 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 337 *slash = '/'; 338 cpr_err(CE_CONT, "A directory in the " 339 "statefile path %s was not found.\n", sfpath); 340 VN_RELE(vp); 341 342 return (error); 343 } 344 345 vfs_list_read_lock(); 346 vfsp = rootvfs; 347 do { 348 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 349 if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) { 350 found = 1; 351 break; 352 } 353 vfsp = vfsp->vfs_next; 354 } while (vfsp != rootvfs); 355 vfs_list_unlock(); 356 357 /* 358 * If we have found a filesystem mounted on the current 359 * path prefix, remember the end of the string in 360 * "longest". If it happens to be the the exact fs 361 * saved in the configuration file, save the current 362 * ufsvfsp so we can make additional checks further down. 363 */ 364 if (found) { 365 longest = slash; 366 if (strcmp(cf->cf_fs, sfpath) == 0) { 367 ufsvfsp_save = ufsvfsp; 368 vfsp_save = vfsp; 369 } 370 found = 0; 371 } 372 373 VN_RELE(vp); 374 *slash = '/'; 375 tail = slash + 1; 376 } 377 *longest = '\0'; 378 if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) { 379 cpr_err(CE_CONT, "Filesystem containing " 380 "the statefile when pmconfig was run (%s) has " 381 "changed to %s. %s\n", cf->cf_fs, sfpath, rerun); 382 return (EINVAL); 383 } 384 385 if ((error = lookupname(cf->cf_devfs, 386 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 387 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); 388 return (error); 389 } 390 391 if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) { 392 cpr_err(CE_CONT, "Filesystem containing " 393 "statefile no longer mounted on device %s. " 394 "See power.conf(4).", cf->cf_devfs); 395 VN_RELE(vp); 396 return (ENXIO); 397 } 398 VN_RELE(vp); 399 400 error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN); 401 if (error || strcmp(devpath, cf->cf_dev_prom)) { 402 cpr_err(CE_CONT, path_chg_fmt, 403 cf->cf_dev_prom, devpath, rerun); 404 return (error); 405 } 406 407 return (0); 408 } 409 410 /* 411 * Make sure that the statefile can be used as a block special statefile 412 * (meaning that is exists and has nothing mounted on it) 413 * Returns errno if not a valid statefile. 414 */ 415 int 416 cpr_check_spec_statefile(void) 417 { 418 int err; 419 420 if (err = cpr_get_config()) 421 return (err); 422 ASSERT(cprconfig.cf_type == CFT_SPEC); 423 424 if (cprconfig.cf_devfs == NULL) 425 return (ENXIO); 426 427 return (cpr_verify_statefile_path()); 428 429 } 430 431 int 432 cpr_alloc_statefile(int alloc_retry) 433 { 434 register int rc = 0; 435 char *str; 436 437 /* 438 * Statefile size validation. If checkpoint the first time, disk blocks 439 * allocation will be done; otherwise, just do file size check. 440 * if statefile allocation is being retried, C_VP will be inited 441 */ 442 if (alloc_retry) { 443 str = "\n-->Retrying statefile allocation..."; 444 if (cpr_debug & (LEVEL1 | LEVEL7)) 445 errp(str); 446 if (C_VP->v_type != VBLK) 447 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL); 448 } else { 449 /* 450 * Open an exiting file for writing, the state file needs to be 451 * pre-allocated since we can't and don't want to do allocation 452 * during checkpoint (too much of the OS is disabled). 453 * - do a preliminary size checking here, if it is too small, 454 * allocate more space internally and retry. 455 * - check the vp to make sure it's the right type. 456 */ 457 char *path = cpr_build_statefile_path(); 458 459 if (path == NULL) 460 return (ENXIO); 461 else if (rc = cpr_verify_statefile_path()) 462 return (rc); 463 464 if (rc = vn_open(path, UIO_SYSSPACE, 465 FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) { 466 cpr_err(CE_WARN, "cannot open statefile %s", path); 467 return (rc); 468 } 469 } 470 471 /* 472 * Only ufs and block special statefiles supported 473 */ 474 if (C_VP->v_type != VREG && C_VP->v_type != VBLK) { 475 cpr_err(CE_CONT, 476 "Statefile must be regular file or block special file."); 477 return (EACCES); 478 } 479 480 if (rc = cpr_statefile_ok(C_VP, alloc_retry)) 481 return (rc); 482 483 if (C_VP->v_type != VBLK) { 484 /* 485 * sync out the fs change due to the statefile reservation. 486 */ 487 (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED()); 488 489 /* 490 * Validate disk blocks allocation for the state file. 491 * Ask the file system prepare itself for the dump operation. 492 */ 493 if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL)) { 494 cpr_err(CE_CONT, "Error allocating " 495 "blocks for cpr statefile."); 496 return (rc); 497 } 498 } 499 return (0); 500 } 501 502 503 /* 504 * lookup device size in blocks, 505 * and return available space in bytes 506 */ 507 size_t 508 cpr_get_devsize(dev_t dev) 509 { 510 size_t bytes = 0; 511 int64_t Nblocks; 512 int nblocks; 513 514 if ((Nblocks = bdev_Size(dev)) != -1) 515 bytes = dbtob(Nblocks); 516 else if ((nblocks = bdev_size(dev)) != -1) 517 bytes = dbtob(nblocks); 518 519 if (bytes > CPR_SPEC_OFFSET) 520 bytes -= CPR_SPEC_OFFSET; 521 else 522 bytes = 0; 523 524 return (bytes); 525 } 526 527 528 /* 529 * increase statefile size 530 */ 531 static int 532 cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize) 533 { 534 extern uchar_t cpr_pagecopy[]; 535 struct inode *ip = VTOI(vp); 536 u_longlong_t offset; 537 int error, increase; 538 ssize_t resid; 539 540 rw_enter(&ip->i_contents, RW_READER); 541 increase = (ip->i_size < newsize); 542 offset = ip->i_size; 543 rw_exit(&ip->i_contents); 544 545 if (increase == 0) 546 return (0); 547 548 /* 549 * write to each logical block to reserve disk space 550 */ 551 error = 0; 552 cpr_pagecopy[0] = '1'; 553 for (; offset < newsize; offset += ip->i_fs->fs_bsize) { 554 if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy, 555 ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0, 556 (rlim64_t)MAXOFF_T, CRED(), &resid)) { 557 if (error == ENOSPC) { 558 cpr_err(CE_WARN, "error %d while reserving " 559 "disk space for statefile %s\n" 560 "wanted %lld bytes, file is %lld short", 561 error, cpr_cprconfig_to_path(), 562 newsize, newsize - offset); 563 } 564 break; 565 } 566 } 567 return (error); 568 } 569 570 571 /* 572 * do a simple estimate of the space needed to hold the statefile 573 * taking compression into account, but be fairly conservative 574 * so we have a better chance of completing; when dump fails, 575 * the retry cost is fairly high. 576 * 577 * Do disk blocks allocation for the state file if no space has 578 * been allocated yet. Since the state file will not be removed, 579 * allocation should only be done once. 580 */ 581 static int 582 cpr_statefile_ok(vnode_t *vp, int alloc_retry) 583 { 584 extern size_t cpr_bitmap_size; 585 struct inode *ip = VTOI(vp); 586 const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */ 587 u_longlong_t size, isize, ksize, raw_data; 588 char *str, *est_fmt; 589 size_t space; 590 int error; 591 592 /* 593 * number of pages short for swapping. 594 */ 595 STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv; 596 if (STAT->cs_nosw_pages < 0) 597 STAT->cs_nosw_pages = 0; 598 599 str = "cpr_statefile_ok:"; 600 601 DEBUG9(errp("Phys swap: max=%lu resv=%lu\n", 602 k_anoninfo.ani_max, k_anoninfo.ani_phys_resv)); 603 DEBUG9(errp("Mem swap: max=%ld resv=%lu\n", 604 MAX(availrmem - swapfs_minfree, 0), 605 k_anoninfo.ani_mem_resv)); 606 DEBUG9(errp("Total available swap: %ld\n", 607 CURRENT_TOTAL_AVAILABLE_SWAP)); 608 609 /* 610 * try increasing filesize by 15% 611 */ 612 if (alloc_retry) { 613 /* 614 * block device doesn't get any bigger 615 */ 616 if (vp->v_type == VBLK) { 617 if (cpr_debug & (LEVEL1 | LEVEL6)) 618 errp("Retry statefile on special file\n"); 619 return (ENOMEM); 620 } else { 621 rw_enter(&ip->i_contents, RW_READER); 622 size = (ip->i_size * SIZE_RATE) / INTEGRAL; 623 rw_exit(&ip->i_contents); 624 } 625 if (cpr_debug & (LEVEL1 | LEVEL6)) 626 errp("Retry statefile size = %lld\n", size); 627 } else { 628 u_longlong_t cpd_size; 629 pgcnt_t npages, nback; 630 int ndvram; 631 632 ndvram = 0; 633 (void) callb_execute_class(CB_CL_CPR_FB, 634 (int)(uintptr_t)&ndvram); 635 if (cpr_debug & (LEVEL1 | LEVEL6)) 636 errp("ndvram size = %d\n", ndvram); 637 638 /* 639 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages 640 */ 641 npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit); 642 cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2)); 643 raw_data = cpd_size + cpr_bitmap_size; 644 ksize = ndvram + mmu_ptob(npages); 645 646 est_fmt = "%s estimated size with " 647 "%scompression %lld, ksize %lld\n"; 648 nback = mmu_ptob(STAT->cs_nosw_pages); 649 if (CPR->c_flags & C_COMPRESSING) { 650 size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) + 651 raw_data + ((nback * 10) / UCOMP_RATE); 652 DEBUG1(errp(est_fmt, str, "", size, ksize)); 653 } else { 654 size = ksize + raw_data + nback; 655 DEBUG1(errp(est_fmt, str, "no ", size, ksize)); 656 } 657 } 658 659 /* 660 * All this is much simpler for a block device 661 */ 662 if (vp->v_type == VBLK) { 663 space = cpr_get_devsize(vp->v_rdev); 664 if (cpr_debug & (LEVEL1 | LEVEL6)) 665 errp("statefile dev size %lu\n", space); 666 667 /* 668 * Export the estimated filesize info, this value will be 669 * compared before dumping out the statefile in the case of 670 * no compression. 671 */ 672 STAT->cs_est_statefsz = size; 673 if (cpr_debug & (LEVEL1 | LEVEL6)) 674 errp("%s Estimated statefile size %llu, space %lu\n", 675 str, size, space); 676 if (size > space) { 677 cpr_err(CE_CONT, "Statefile partition too small."); 678 return (ENOMEM); 679 } 680 return (0); 681 } else { 682 if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) { 683 cpr_err(CE_CONT, "Statefile allocation retry failed\n"); 684 return (ENOMEM); 685 } 686 687 /* 688 * Estimate space needed for the state file. 689 * 690 * State file size in bytes: 691 * kernel size + non-cache pte seg + 692 * bitmap size + cpr state file headers size 693 * (round up to fs->fs_bsize) 694 */ 695 size = blkroundup(ip->i_fs, size); 696 697 /* 698 * Export the estimated filesize info, this value will be 699 * compared before dumping out the statefile in the case of 700 * no compression. 701 */ 702 STAT->cs_est_statefsz = size; 703 error = cpr_grow_statefile(vp, size); 704 if (cpr_debug & (LEVEL1 | LEVEL6)) { 705 rw_enter(&ip->i_contents, RW_READER); 706 isize = ip->i_size; 707 rw_exit(&ip->i_contents); 708 errp("%s Estimated statefile size %lld, i_size %lld\n", 709 str, size, isize); 710 } 711 712 return (error); 713 } 714 } 715 716 717 void 718 cpr_statef_close(void) 719 { 720 if (C_VP) { 721 if (!cpr_reusable_mode) 722 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL); 723 (void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED()); 724 VN_RELE(C_VP); 725 C_VP = 0; 726 } 727 } 728 729 730 /* 731 * open cpr default file and display error 732 */ 733 int 734 cpr_open_deffile(int mode, vnode_t **vpp) 735 { 736 int error; 737 738 if (error = cpr_open(cpr_default_path, mode, vpp)) 739 cpr_err(CE_CONT, "cannot open \"%s\", error %d\n", 740 cpr_default_path, error); 741 return (error); 742 } 743 744 745 /* 746 * write cdef_t to disk. This contains the original values of prom 747 * properties that we modify. We fill in the magic number of the file 748 * here as a signal to the booter code that the state file is valid. 749 * Be sure the file gets synced, since we may be shutting down the OS. 750 */ 751 int 752 cpr_write_deffile(cdef_t *cdef) 753 { 754 struct vnode *vp; 755 char *str; 756 int rc; 757 758 if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp)) 759 return (rc); 760 761 if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef))) 762 str = "write"; 763 else if (rc = VOP_FSYNC(vp, FSYNC, CRED())) 764 str = "fsync"; 765 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED()); 766 VN_RELE(vp); 767 768 if (rc) { 769 cpr_err(CE_WARN, "%s error %d, file \"%s\"", 770 str, rc, cpr_default_path); 771 } 772 return (rc); 773 } 774 775 /* 776 * Clear the magic number in the defaults file. This tells the booter 777 * program that the state file is not current and thus prevents 778 * any attempt to restore from an obsolete state file. 779 */ 780 void 781 cpr_clear_definfo(void) 782 { 783 struct vnode *vp; 784 cmini_t mini; 785 786 if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) || 787 cpr_open_deffile(FCREAT|FWRITE, &vp)) 788 return; 789 mini.magic = mini.reusable = 0; 790 (void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini)); 791 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED()); 792 VN_RELE(vp); 793 } 794 795 /* 796 * If the cpr default file is invalid, then we must not be in reusable mode 797 * if it is valid, it tells us our mode 798 */ 799 int 800 cpr_get_reusable_mode(void) 801 { 802 struct vnode *vp; 803 cmini_t mini; 804 int rc; 805 806 if (cpr_open(cpr_default_path, FREAD, &vp)) 807 return (0); 808 809 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini)); 810 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED()); 811 VN_RELE(vp); 812 if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC) 813 return (mini.reusable); 814 815 return (0); 816 } 817 818 /* 819 * clock/time related routines 820 */ 821 static time_t cpr_time_stamp; 822 823 824 void 825 cpr_tod_get(cpr_time_t *ctp) 826 { 827 timestruc_t ts; 828 829 mutex_enter(&tod_lock); 830 ts = tod_get(); 831 mutex_exit(&tod_lock); 832 ctp->tv_sec = (time32_t)ts.tv_sec; 833 ctp->tv_nsec = (int32_t)ts.tv_nsec; 834 } 835 836 void 837 cpr_tod_fault_reset(void) 838 { 839 mutex_enter(&tod_lock); 840 tod_fault_reset(); 841 mutex_exit(&tod_lock); 842 } 843 844 void 845 cpr_save_time(void) 846 { 847 cpr_time_stamp = gethrestime_sec(); 848 } 849 850 /* 851 * correct time based on saved time stamp or hardware clock 852 */ 853 void 854 cpr_restore_time(void) 855 { 856 clkset(cpr_time_stamp); 857 } 858 859 /* 860 * CPU ONLINE/OFFLINE CODE 861 */ 862 int 863 cpr_mp_offline(void) 864 { 865 cpu_t *cp, *bootcpu; 866 int rc = 0; 867 int brought_up_boot = 0; 868 869 /* 870 * Do nothing for UP. 871 */ 872 if (ncpus == 1) 873 return (0); 874 875 mutex_enter(&cpu_lock); 876 877 cpr_save_mp_state(); 878 879 bootcpu = i_cpr_bootcpu(); 880 if (!CPU_ACTIVE(bootcpu)) { 881 if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) { 882 mutex_exit(&cpu_lock); 883 return (rc); 884 } 885 brought_up_boot = 1; 886 } 887 888 cp = cpu_list; 889 do { 890 if (cp == bootcpu) 891 continue; 892 if (cp->cpu_flags & CPU_OFFLINE) 893 continue; 894 if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) { 895 mutex_exit(&cpu_lock); 896 return (rc); 897 } 898 } while ((cp = cp->cpu_next) != cpu_list); 899 if (brought_up_boot && (cpr_debug & (LEVEL1 | LEVEL6))) 900 errp("changed cpu %p to state %d\n", bootcpu, CPU_CPR_ONLINE); 901 mutex_exit(&cpu_lock); 902 903 return (rc); 904 } 905 906 int 907 cpr_mp_online(void) 908 { 909 cpu_t *cp, *bootcpu = CPU; 910 int rc = 0; 911 912 /* 913 * Do nothing for UP. 914 */ 915 if (ncpus == 1) 916 return (0); 917 918 /* 919 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags 920 * to indicate a cpu was online at the time of cpr_suspend(); 921 * now restart those cpus that were marked as CPU_CPR_ONLINE 922 * and actually are offline. 923 */ 924 mutex_enter(&cpu_lock); 925 for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) { 926 /* 927 * Clear the CPU_FROZEN flag in all cases. 928 */ 929 cp->cpu_flags &= ~CPU_FROZEN; 930 931 if (CPU_CPR_IS_OFFLINE(cp)) 932 continue; 933 if (CPU_ACTIVE(cp)) 934 continue; 935 if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) { 936 mutex_exit(&cpu_lock); 937 return (rc); 938 } 939 } 940 941 /* 942 * turn off the boot cpu if it was offlined 943 */ 944 if (CPU_CPR_IS_OFFLINE(bootcpu)) { 945 if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) { 946 mutex_exit(&cpu_lock); 947 return (rc); 948 } 949 } 950 mutex_exit(&cpu_lock); 951 return (0); 952 } 953 954 static void 955 cpr_save_mp_state(void) 956 { 957 cpu_t *cp; 958 959 ASSERT(MUTEX_HELD(&cpu_lock)); 960 961 cp = cpu_list; 962 do { 963 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE; 964 if (CPU_ACTIVE(cp)) 965 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE); 966 } while ((cp = cp->cpu_next) != cpu_list); 967 } 968 969 /* 970 * change cpu to online/offline 971 */ 972 static int 973 cpr_p_online(cpu_t *cp, int state) 974 { 975 int rc; 976 977 ASSERT(MUTEX_HELD(&cpu_lock)); 978 979 switch (state) { 980 case CPU_CPR_ONLINE: 981 rc = cpu_online(cp); 982 break; 983 case CPU_CPR_OFFLINE: 984 rc = cpu_offline(cp, CPU_FORCED); 985 break; 986 } 987 if (rc) { 988 cpr_err(CE_WARN, "Failed to change processor %d to " 989 "state %d, (errno %d)", cp->cpu_id, state, rc); 990 } 991 return (rc); 992 } 993 994 /* 995 * Construct the pathname of the state file and return a pointer to 996 * caller. Read the config file to get the mount point of the 997 * filesystem and the pathname within fs. 998 */ 999 char * 1000 cpr_build_statefile_path(void) 1001 { 1002 struct cprconfig *cf = &cprconfig; 1003 1004 if (cpr_get_config()) 1005 return (NULL); 1006 1007 switch (cf->cf_type) { 1008 case CFT_UFS: 1009 if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) { 1010 cpr_err(CE_CONT, "Statefile path is too long.\n"); 1011 return (NULL); 1012 } 1013 return (cpr_cprconfig_to_path()); 1014 case CFT_SPEC: 1015 return (cf->cf_devfs); 1016 default: 1017 cpr_err(CE_PANIC, "invalid statefile type"); 1018 /*NOTREACHED*/ 1019 return (NULL); 1020 } 1021 } 1022 1023 int 1024 cpr_statefile_is_spec(void) 1025 { 1026 if (cpr_get_config()) 1027 return (0); 1028 return (cprconfig.cf_type == CFT_SPEC); 1029 } 1030 1031 char * 1032 cpr_get_statefile_prom_path(void) 1033 { 1034 struct cprconfig *cf = &cprconfig; 1035 1036 ASSERT(cprconfig_loaded); 1037 ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC); 1038 ASSERT(cf->cf_type == CFT_SPEC); 1039 return (cf->cf_dev_prom); 1040 } 1041 1042 1043 /* 1044 * XXX The following routines need to be in the vfs source code. 1045 */ 1046 1047 int 1048 cpr_is_ufs(struct vfs *vfsp) 1049 { 1050 char *fsname; 1051 1052 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1053 return (strcmp(fsname, "ufs") == 0); 1054 } 1055 1056 /* 1057 * This is a list of file systems that are allowed to be writeable when a 1058 * reusable statefile checkpoint is taken. They must not have any state that 1059 * cannot be restored to consistency by simply rebooting using the checkpoint. 1060 * (In contrast to ufs, cachefs and pcfs which have disk state that could get 1061 * out of sync with the in-kernel data). 1062 */ 1063 int 1064 cpr_reusable_mount_check(void) 1065 { 1066 struct vfs *vfsp; 1067 char *fsname; 1068 char **cpp; 1069 static char *cpr_writeok_fss[] = { 1070 "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs", 1071 "proc", "tmpfs", "ctfs", "objfs", "dev", NULL 1072 }; 1073 1074 vfs_list_read_lock(); 1075 vfsp = rootvfs; 1076 do { 1077 if (vfsp->vfs_flag & VFS_RDONLY) { 1078 vfsp = vfsp->vfs_next; 1079 continue; 1080 } 1081 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1082 for (cpp = cpr_writeok_fss; *cpp; cpp++) { 1083 if (strcmp(fsname, *cpp) == 0) 1084 break; 1085 } 1086 /* 1087 * if the inner loop reached the NULL terminator, 1088 * the current fs-type does not match any OK-type 1089 */ 1090 if (*cpp == NULL) { 1091 cpr_err(CE_CONT, "a filesystem of type %s is " 1092 "mounted read/write.\nReusable statefile requires " 1093 "no writeable filesystem of this type be mounted\n", 1094 fsname); 1095 vfs_list_unlock(); 1096 return (EINVAL); 1097 } 1098 vfsp = vfsp->vfs_next; 1099 } while (vfsp != rootvfs); 1100 vfs_list_unlock(); 1101 return (0); 1102 } 1103 1104 /* 1105 * Force a fresh read of the cprinfo per uadmin 3 call 1106 */ 1107 void 1108 cpr_forget_cprconfig(void) 1109 { 1110 cprconfig_loaded = 0; 1111 } 1112 1113 1114 /* 1115 * return statefile offset in DEV_BSIZE units 1116 */ 1117 int 1118 cpr_statefile_offset(void) 1119 { 1120 return (cpr_statefile_is_spec() ? btod(CPR_SPEC_OFFSET) : 0); 1121 } 1122