1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/cpuvar.h> 29 #include <sys/vfs.h> 30 #include <sys/vnode.h> 31 #include <sys/pathname.h> 32 #include <sys/callb.h> 33 #include <sys/fs/ufs_inode.h> 34 #include <vm/anon.h> 35 #include <sys/fs/swapnode.h> /* for swapfs_minfree */ 36 #include <sys/kmem.h> 37 #include <sys/cpr.h> 38 #include <sys/conf.h> 39 #include <sys/machclock.h> 40 41 /* 42 * CPR miscellaneous support routines 43 */ 44 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \ 45 mode, 0600, vpp, CRCREAT, 0)) 46 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \ 47 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \ 48 (ssize_t *)NULL)) 49 50 extern void clkset(time_t); 51 extern cpu_t *i_cpr_bootcpu(void); 52 extern caddr_t i_cpr_map_setup(void); 53 extern void i_cpr_free_memory_resources(void); 54 55 extern kmutex_t cpr_slock; 56 extern size_t cpr_buf_size; 57 extern char *cpr_buf; 58 extern size_t cpr_pagedata_size; 59 extern char *cpr_pagedata; 60 extern int cpr_bufs_allocated; 61 extern int cpr_bitmaps_allocated; 62 63 #if defined(__sparc) 64 static struct cprconfig cprconfig; 65 static int cprconfig_loaded = 0; 66 static int cpr_statefile_ok(vnode_t *, int); 67 static int cpr_p_online(cpu_t *, int); 68 static void cpr_save_mp_state(void); 69 #endif 70 71 int cpr_is_ufs(struct vfs *); 72 int cpr_is_zfs(struct vfs *); 73 74 char cpr_default_path[] = CPR_DEFAULT; 75 76 #define COMPRESS_PERCENT 40 /* approx compression ratio in percent */ 77 #define SIZE_RATE 115 /* increase size by 15% */ 78 #define INTEGRAL 100 /* for integer math */ 79 80 81 /* 82 * cmn_err() followed by a 1/4 second delay; this gives the 83 * logging service a chance to flush messages and helps avoid 84 * intermixing output from prom_printf(). 85 */ 86 /*PRINTFLIKE2*/ 87 void 88 cpr_err(int ce, const char *fmt, ...) 89 { 90 va_list adx; 91 92 va_start(adx, fmt); 93 vcmn_err(ce, fmt, adx); 94 va_end(adx); 95 drv_usecwait(MICROSEC >> 2); 96 } 97 98 99 int 100 cpr_init(int fcn) 101 { 102 /* 103 * Allow only one suspend/resume process. 104 */ 105 if (mutex_tryenter(&cpr_slock) == 0) 106 return (EBUSY); 107 108 CPR->c_flags = 0; 109 CPR->c_substate = 0; 110 CPR->c_cprboot_magic = 0; 111 CPR->c_alloc_cnt = 0; 112 113 CPR->c_fcn = fcn; 114 if (fcn == AD_CPR_REUSABLE) 115 CPR->c_flags |= C_REUSABLE; 116 else 117 CPR->c_flags |= C_SUSPENDING; 118 if (fcn == AD_SUSPEND_TO_RAM || fcn == DEV_SUSPEND_TO_RAM) { 119 return (0); 120 } 121 #if defined(__sparc) 122 if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ) 123 CPR->c_flags |= C_COMPRESSING; 124 /* 125 * reserve CPR_MAXCONTIG virtual pages for cpr_dump() 126 */ 127 CPR->c_mapping_area = i_cpr_map_setup(); 128 if (CPR->c_mapping_area == 0) { /* no space in kernelmap */ 129 cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n"); 130 mutex_exit(&cpr_slock); 131 return (EAGAIN); 132 } 133 if (cpr_debug & CPR_DEBUG3) 134 cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing " 135 "kas\n", (void *)CPR->c_mapping_area); 136 #endif 137 138 return (0); 139 } 140 141 /* 142 * This routine releases any resources used during the checkpoint. 143 */ 144 void 145 cpr_done(void) 146 { 147 cpr_stat_cleanup(); 148 i_cpr_bitmap_cleanup(); 149 150 /* 151 * Free pages used by cpr buffers. 152 */ 153 if (cpr_buf) { 154 kmem_free(cpr_buf, cpr_buf_size); 155 cpr_buf = NULL; 156 } 157 if (cpr_pagedata) { 158 kmem_free(cpr_pagedata, cpr_pagedata_size); 159 cpr_pagedata = NULL; 160 } 161 162 i_cpr_free_memory_resources(); 163 mutex_exit(&cpr_slock); 164 cpr_err(CE_CONT, "System has been resumed.\n"); 165 } 166 167 168 #if defined(__sparc) 169 /* 170 * reads config data into cprconfig 171 */ 172 static int 173 cpr_get_config(void) 174 { 175 static char config_path[] = CPR_CONFIG; 176 struct cprconfig *cf = &cprconfig; 177 struct vnode *vp; 178 char *fmt; 179 int err; 180 181 if (cprconfig_loaded) 182 return (0); 183 184 fmt = "cannot %s config file \"%s\", error %d\n"; 185 if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) { 186 cpr_err(CE_CONT, fmt, "open", config_path, err); 187 return (err); 188 } 189 190 err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf)); 191 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 192 VN_RELE(vp); 193 if (err) { 194 cpr_err(CE_CONT, fmt, "read", config_path, err); 195 return (err); 196 } 197 198 if (cf->cf_magic == CPR_CONFIG_MAGIC) 199 cprconfig_loaded = 1; 200 else { 201 cpr_err(CE_CONT, "invalid config file \"%s\", " 202 "rerun pmconfig(1M)\n", config_path); 203 err = EINVAL; 204 } 205 206 return (err); 207 } 208 209 210 /* 211 * concat fs and path fields of the cprconfig structure; 212 * returns pointer to the base of static data 213 */ 214 static char * 215 cpr_cprconfig_to_path(void) 216 { 217 static char full_path[MAXNAMELEN]; 218 struct cprconfig *cf = &cprconfig; 219 char *ptr; 220 221 /* 222 * build /fs/path without extra '/' 223 */ 224 (void) strcpy(full_path, cf->cf_fs); 225 if (strcmp(cf->cf_fs, "/")) 226 (void) strcat(full_path, "/"); 227 ptr = cf->cf_path; 228 if (*ptr == '/') 229 ptr++; 230 (void) strcat(full_path, ptr); 231 return (full_path); 232 } 233 234 235 /* 236 * Verify that the information in the configuration file regarding the 237 * location for the statefile is still valid, depending on cf_type. 238 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be 239 * mounted on the same device as when pmconfig was last run, 240 * and the translation of that device to a node in the prom's 241 * device tree must be the same as when pmconfig was last run. 242 * for CFT_SPEC and CFT_ZVOL, cf_path must be the path to a block 243 * special file, it must have no file system mounted on it, 244 * and the translation of that device to a node in the prom's 245 * device tree must be the same as when pmconfig was last run. 246 */ 247 static int 248 cpr_verify_statefile_path(void) 249 { 250 struct cprconfig *cf = &cprconfig; 251 static const char long_name[] = "Statefile pathname is too long.\n"; 252 static const char lookup_fmt[] = "Lookup failed for " 253 "cpr statefile device %s.\n"; 254 static const char path_chg_fmt[] = "Device path for statefile " 255 "has changed from %s to %s.\t%s\n"; 256 static const char rerun[] = "Please rerun pmconfig(1m)."; 257 struct vfs *vfsp = NULL, *vfsp_save = rootvfs; 258 ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data; 259 ufsvfs_t *ufsvfsp_save = ufsvfsp; 260 int error; 261 struct vnode *vp; 262 char *slash, *tail, *longest; 263 char *errstr; 264 int found = 0; 265 union { 266 char un_devpath[OBP_MAXPATHLEN]; 267 char un_sfpath[MAXNAMELEN]; 268 } un; 269 #define devpath un.un_devpath 270 #define sfpath un.un_sfpath 271 272 ASSERT(cprconfig_loaded); 273 /* 274 * We need not worry about locking or the timing of releasing 275 * the vnode, since we are single-threaded now. 276 */ 277 278 switch (cf->cf_type) { 279 case CFT_SPEC: 280 error = i_devname_to_promname(cf->cf_devfs, devpath, 281 OBP_MAXPATHLEN); 282 if (error || strcmp(devpath, cf->cf_dev_prom)) { 283 cpr_err(CE_CONT, path_chg_fmt, 284 cf->cf_dev_prom, devpath, rerun); 285 return (error); 286 } 287 /*FALLTHROUGH*/ 288 case CFT_ZVOL: 289 if (strlen(cf->cf_path) > sizeof (sfpath)) { 290 cpr_err(CE_CONT, long_name); 291 return (ENAMETOOLONG); 292 } 293 if ((error = lookupname(cf->cf_devfs, 294 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 295 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); 296 return (error); 297 } 298 if (vp->v_type != VBLK) 299 errstr = "statefile must be a block device"; 300 else if (vfs_devismounted(vp->v_rdev)) 301 errstr = "statefile device must not " 302 "have a file system mounted on it"; 303 else if (IS_SWAPVP(vp)) 304 errstr = "statefile device must not " 305 "be configured as swap file"; 306 else 307 errstr = NULL; 308 309 VN_RELE(vp); 310 if (errstr) { 311 cpr_err(CE_CONT, "%s.\n", errstr); 312 return (ENOTSUP); 313 } 314 315 return (error); 316 case CFT_UFS: 317 break; /* don't indent all the original code */ 318 default: 319 cpr_err(CE_PANIC, "invalid cf_type"); 320 } 321 322 /* 323 * The original code for UFS statefile 324 */ 325 if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) { 326 cpr_err(CE_CONT, long_name); 327 return (ENAMETOOLONG); 328 } 329 330 bzero(sfpath, sizeof (sfpath)); 331 (void) strcpy(sfpath, cpr_cprconfig_to_path()); 332 333 if (*sfpath != '/') { 334 cpr_err(CE_CONT, "Statefile pathname %s " 335 "must begin with a /\n", sfpath); 336 return (EINVAL); 337 } 338 339 /* 340 * Find the longest prefix of the statefile pathname which 341 * is the mountpoint of a filesystem. This string must 342 * match the cf_fs field we read from the config file. Other- 343 * wise the user has changed things without running pmconfig. 344 */ 345 tail = longest = sfpath + 1; /* pt beyond the leading "/" */ 346 while ((slash = strchr(tail, '/')) != NULL) { 347 *slash = '\0'; /* temporarily terminate the string */ 348 if ((error = lookupname(sfpath, 349 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 350 *slash = '/'; 351 cpr_err(CE_CONT, "A directory in the " 352 "statefile path %s was not found.\n", sfpath); 353 VN_RELE(vp); 354 355 return (error); 356 } 357 358 vfs_list_read_lock(); 359 vfsp = rootvfs; 360 do { 361 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 362 if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) { 363 found = 1; 364 break; 365 } 366 vfsp = vfsp->vfs_next; 367 } while (vfsp != rootvfs); 368 vfs_list_unlock(); 369 370 /* 371 * If we have found a filesystem mounted on the current 372 * path prefix, remember the end of the string in 373 * "longest". If it happens to be the the exact fs 374 * saved in the configuration file, save the current 375 * ufsvfsp so we can make additional checks further down. 376 */ 377 if (found) { 378 longest = slash; 379 if (strcmp(cf->cf_fs, sfpath) == 0) { 380 ufsvfsp_save = ufsvfsp; 381 vfsp_save = vfsp; 382 } 383 found = 0; 384 } 385 386 VN_RELE(vp); 387 *slash = '/'; 388 tail = slash + 1; 389 } 390 *longest = '\0'; 391 if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) { 392 cpr_err(CE_CONT, "Filesystem containing " 393 "the statefile when pmconfig was run (%s) has " 394 "changed to %s. %s\n", cf->cf_fs, sfpath, rerun); 395 return (EINVAL); 396 } 397 398 if ((error = lookupname(cf->cf_devfs, 399 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 400 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); 401 return (error); 402 } 403 404 if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) { 405 cpr_err(CE_CONT, "Filesystem containing " 406 "statefile no longer mounted on device %s. " 407 "See power.conf(4).", cf->cf_devfs); 408 VN_RELE(vp); 409 return (ENXIO); 410 } 411 VN_RELE(vp); 412 413 error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN); 414 if (error || strcmp(devpath, cf->cf_dev_prom)) { 415 cpr_err(CE_CONT, path_chg_fmt, 416 cf->cf_dev_prom, devpath, rerun); 417 return (error); 418 } 419 420 return (0); 421 } 422 423 /* 424 * Make sure that the statefile can be used as a block special statefile 425 * (meaning that is exists and has nothing mounted on it) 426 * Returns errno if not a valid statefile. 427 */ 428 int 429 cpr_check_spec_statefile(void) 430 { 431 int err; 432 433 if (err = cpr_get_config()) 434 return (err); 435 ASSERT(cprconfig.cf_type == CFT_SPEC || 436 cprconfig.cf_type == CFT_ZVOL); 437 438 if (cprconfig.cf_devfs == NULL) 439 return (ENXIO); 440 441 return (cpr_verify_statefile_path()); 442 443 } 444 445 int 446 cpr_alloc_statefile(int alloc_retry) 447 { 448 register int rc = 0; 449 char *str; 450 451 /* 452 * Statefile size validation. If checkpoint the first time, disk blocks 453 * allocation will be done; otherwise, just do file size check. 454 * if statefile allocation is being retried, C_VP will be inited 455 */ 456 if (alloc_retry) { 457 str = "\n-->Retrying statefile allocation..."; 458 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7)) 459 prom_printf(str); 460 if (C_VP->v_type != VBLK) 461 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL); 462 } else { 463 /* 464 * Open an exiting file for writing, the state file needs to be 465 * pre-allocated since we can't and don't want to do allocation 466 * during checkpoint (too much of the OS is disabled). 467 * - do a preliminary size checking here, if it is too small, 468 * allocate more space internally and retry. 469 * - check the vp to make sure it's the right type. 470 */ 471 char *path = cpr_build_statefile_path(); 472 473 if (path == NULL) 474 return (ENXIO); 475 else if (rc = cpr_verify_statefile_path()) 476 return (rc); 477 478 if (rc = vn_open(path, UIO_SYSSPACE, 479 FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) { 480 cpr_err(CE_WARN, "cannot open statefile %s", path); 481 return (rc); 482 } 483 } 484 485 /* 486 * Only ufs and block special statefiles supported 487 */ 488 if (C_VP->v_type != VREG && C_VP->v_type != VBLK) { 489 cpr_err(CE_CONT, 490 "Statefile must be regular file or block special file."); 491 return (EACCES); 492 } 493 494 if (rc = cpr_statefile_ok(C_VP, alloc_retry)) 495 return (rc); 496 497 if (C_VP->v_type != VBLK) { 498 /* 499 * sync out the fs change due to the statefile reservation. 500 */ 501 (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED()); 502 503 /* 504 * Validate disk blocks allocation for the state file. 505 * Ask the file system prepare itself for the dump operation. 506 */ 507 if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) { 508 cpr_err(CE_CONT, "Error allocating " 509 "blocks for cpr statefile."); 510 return (rc); 511 } 512 } 513 return (0); 514 } 515 516 517 /* 518 * Lookup device size and return available space in bytes. 519 * NOTE: Since prop_op(9E) can't tell the difference between a character 520 * and a block reference, it is ok to ask for "Size" instead of "Nblocks". 521 */ 522 size_t 523 cpr_get_devsize(dev_t dev) 524 { 525 size_t bytes = 0; 526 527 bytes = cdev_Size(dev); 528 if (bytes == 0) 529 bytes = cdev_size(dev); 530 531 if (bytes > CPR_SPEC_OFFSET) 532 bytes -= CPR_SPEC_OFFSET; 533 else 534 bytes = 0; 535 536 return (bytes); 537 } 538 539 540 /* 541 * increase statefile size 542 */ 543 static int 544 cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize) 545 { 546 extern uchar_t cpr_pagecopy[]; 547 struct inode *ip = VTOI(vp); 548 u_longlong_t offset; 549 int error, increase; 550 ssize_t resid; 551 552 rw_enter(&ip->i_contents, RW_READER); 553 increase = (ip->i_size < newsize); 554 offset = ip->i_size; 555 rw_exit(&ip->i_contents); 556 557 if (increase == 0) 558 return (0); 559 560 /* 561 * write to each logical block to reserve disk space 562 */ 563 error = 0; 564 cpr_pagecopy[0] = '1'; 565 for (; offset < newsize; offset += ip->i_fs->fs_bsize) { 566 if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy, 567 ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0, 568 (rlim64_t)MAXOFF_T, CRED(), &resid)) { 569 if (error == ENOSPC) { 570 cpr_err(CE_WARN, "error %d while reserving " 571 "disk space for statefile %s\n" 572 "wanted %lld bytes, file is %lld short", 573 error, cpr_cprconfig_to_path(), 574 newsize, newsize - offset); 575 } 576 break; 577 } 578 } 579 return (error); 580 } 581 582 583 /* 584 * do a simple estimate of the space needed to hold the statefile 585 * taking compression into account, but be fairly conservative 586 * so we have a better chance of completing; when dump fails, 587 * the retry cost is fairly high. 588 * 589 * Do disk blocks allocation for the state file if no space has 590 * been allocated yet. Since the state file will not be removed, 591 * allocation should only be done once. 592 */ 593 static int 594 cpr_statefile_ok(vnode_t *vp, int alloc_retry) 595 { 596 extern size_t cpr_bitmap_size; 597 struct inode *ip = VTOI(vp); 598 const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */ 599 u_longlong_t size, isize, ksize, raw_data; 600 char *str, *est_fmt; 601 size_t space; 602 int error; 603 604 /* 605 * number of pages short for swapping. 606 */ 607 STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv; 608 if (STAT->cs_nosw_pages < 0) 609 STAT->cs_nosw_pages = 0; 610 611 str = "cpr_statefile_ok:"; 612 613 CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n", 614 k_anoninfo.ani_max, k_anoninfo.ani_phys_resv); 615 CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n", 616 MAX(availrmem - swapfs_minfree, 0), 617 k_anoninfo.ani_mem_resv); 618 CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n", 619 CURRENT_TOTAL_AVAILABLE_SWAP); 620 621 /* 622 * try increasing filesize by 15% 623 */ 624 if (alloc_retry) { 625 /* 626 * block device doesn't get any bigger 627 */ 628 if (vp->v_type == VBLK) { 629 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 630 prom_printf( 631 "Retry statefile on special file\n"); 632 return (ENOMEM); 633 } else { 634 rw_enter(&ip->i_contents, RW_READER); 635 size = (ip->i_size * SIZE_RATE) / INTEGRAL; 636 rw_exit(&ip->i_contents); 637 } 638 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 639 prom_printf("Retry statefile size = %lld\n", size); 640 } else { 641 u_longlong_t cpd_size; 642 pgcnt_t npages, nback; 643 int ndvram; 644 645 ndvram = 0; 646 (void) callb_execute_class(CB_CL_CPR_FB, 647 (int)(uintptr_t)&ndvram); 648 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 649 prom_printf("ndvram size = %d\n", ndvram); 650 651 /* 652 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages 653 */ 654 npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit); 655 cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2)); 656 raw_data = cpd_size + cpr_bitmap_size; 657 ksize = ndvram + mmu_ptob(npages); 658 659 est_fmt = "%s estimated size with " 660 "%scompression %lld, ksize %lld\n"; 661 nback = mmu_ptob(STAT->cs_nosw_pages); 662 if (CPR->c_flags & C_COMPRESSING) { 663 size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) + 664 raw_data + ((nback * 10) / UCOMP_RATE); 665 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize); 666 } else { 667 size = ksize + raw_data + nback; 668 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ", 669 size, ksize); 670 } 671 } 672 673 /* 674 * All this is much simpler for a block device 675 */ 676 if (vp->v_type == VBLK) { 677 space = cpr_get_devsize(vp->v_rdev); 678 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 679 prom_printf("statefile dev size %lu\n", space); 680 681 /* 682 * Export the estimated filesize info, this value will be 683 * compared before dumping out the statefile in the case of 684 * no compression. 685 */ 686 STAT->cs_est_statefsz = size; 687 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 688 prom_printf("%s Estimated statefile size %llu, " 689 "space %lu\n", str, size, space); 690 if (size > space) { 691 cpr_err(CE_CONT, "Statefile partition too small."); 692 return (ENOMEM); 693 } 694 return (0); 695 } else { 696 if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) { 697 cpr_err(CE_CONT, "Statefile allocation retry failed\n"); 698 return (ENOMEM); 699 } 700 701 /* 702 * Estimate space needed for the state file. 703 * 704 * State file size in bytes: 705 * kernel size + non-cache pte seg + 706 * bitmap size + cpr state file headers size 707 * (round up to fs->fs_bsize) 708 */ 709 size = blkroundup(ip->i_fs, size); 710 711 /* 712 * Export the estimated filesize info, this value will be 713 * compared before dumping out the statefile in the case of 714 * no compression. 715 */ 716 STAT->cs_est_statefsz = size; 717 error = cpr_grow_statefile(vp, size); 718 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) { 719 rw_enter(&ip->i_contents, RW_READER); 720 isize = ip->i_size; 721 rw_exit(&ip->i_contents); 722 prom_printf("%s Estimated statefile size %lld, " 723 "i_size %lld\n", str, size, isize); 724 } 725 726 return (error); 727 } 728 } 729 730 731 void 732 cpr_statef_close(void) 733 { 734 if (C_VP) { 735 if (!cpr_reusable_mode) 736 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL); 737 (void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED(), NULL); 738 VN_RELE(C_VP); 739 C_VP = 0; 740 } 741 } 742 743 744 /* 745 * open cpr default file and display error 746 */ 747 int 748 cpr_open_deffile(int mode, vnode_t **vpp) 749 { 750 int error; 751 752 if (error = cpr_open(cpr_default_path, mode, vpp)) 753 cpr_err(CE_CONT, "cannot open \"%s\", error %d\n", 754 cpr_default_path, error); 755 return (error); 756 } 757 758 759 /* 760 * write cdef_t to disk. This contains the original values of prom 761 * properties that we modify. We fill in the magic number of the file 762 * here as a signal to the booter code that the state file is valid. 763 * Be sure the file gets synced, since we may be shutting down the OS. 764 */ 765 int 766 cpr_write_deffile(cdef_t *cdef) 767 { 768 struct vnode *vp; 769 char *str; 770 int rc; 771 772 if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp)) 773 return (rc); 774 775 if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef))) 776 str = "write"; 777 else if (rc = VOP_FSYNC(vp, FSYNC, CRED(), NULL)) 778 str = "fsync"; 779 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL); 780 VN_RELE(vp); 781 782 if (rc) { 783 cpr_err(CE_WARN, "%s error %d, file \"%s\"", 784 str, rc, cpr_default_path); 785 } 786 return (rc); 787 } 788 789 /* 790 * Clear the magic number in the defaults file. This tells the booter 791 * program that the state file is not current and thus prevents 792 * any attempt to restore from an obsolete state file. 793 */ 794 void 795 cpr_clear_definfo(void) 796 { 797 struct vnode *vp; 798 cmini_t mini; 799 800 if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) || 801 cpr_open_deffile(FCREAT|FWRITE, &vp)) 802 return; 803 mini.magic = mini.reusable = 0; 804 (void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini)); 805 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL); 806 VN_RELE(vp); 807 } 808 809 /* 810 * If the cpr default file is invalid, then we must not be in reusable mode 811 * if it is valid, it tells us our mode 812 */ 813 int 814 cpr_get_reusable_mode(void) 815 { 816 struct vnode *vp; 817 cmini_t mini; 818 int rc; 819 820 if (cpr_open(cpr_default_path, FREAD, &vp)) 821 return (0); 822 823 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini)); 824 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 825 VN_RELE(vp); 826 if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC) 827 return (mini.reusable); 828 829 return (0); 830 } 831 #endif 832 833 /* 834 * clock/time related routines 835 */ 836 static time_t cpr_time_stamp; 837 838 839 void 840 cpr_tod_get(cpr_time_t *ctp) 841 { 842 timestruc_t ts; 843 844 mutex_enter(&tod_lock); 845 ts = TODOP_GET(tod_ops); 846 mutex_exit(&tod_lock); 847 ctp->tv_sec = (time32_t)ts.tv_sec; 848 ctp->tv_nsec = (int32_t)ts.tv_nsec; 849 } 850 851 void 852 cpr_tod_status_set(int tod_flag) 853 { 854 mutex_enter(&tod_lock); 855 tod_status_set(tod_flag); 856 mutex_exit(&tod_lock); 857 } 858 859 void 860 cpr_save_time(void) 861 { 862 cpr_time_stamp = gethrestime_sec(); 863 } 864 865 /* 866 * correct time based on saved time stamp or hardware clock 867 */ 868 void 869 cpr_restore_time(void) 870 { 871 clkset(cpr_time_stamp); 872 } 873 874 #if defined(__sparc) 875 /* 876 * CPU ONLINE/OFFLINE CODE 877 */ 878 int 879 cpr_mp_offline(void) 880 { 881 cpu_t *cp, *bootcpu; 882 int rc = 0; 883 int brought_up_boot = 0; 884 885 /* 886 * Do nothing for UP. 887 */ 888 if (ncpus == 1) 889 return (0); 890 891 mutex_enter(&cpu_lock); 892 893 cpr_save_mp_state(); 894 895 bootcpu = i_cpr_bootcpu(); 896 if (!CPU_ACTIVE(bootcpu)) { 897 if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) { 898 mutex_exit(&cpu_lock); 899 return (rc); 900 } 901 brought_up_boot = 1; 902 } 903 904 cp = cpu_list; 905 do { 906 if (cp == bootcpu) 907 continue; 908 if (cp->cpu_flags & CPU_OFFLINE) 909 continue; 910 if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) { 911 mutex_exit(&cpu_lock); 912 return (rc); 913 } 914 } while ((cp = cp->cpu_next) != cpu_list); 915 if (brought_up_boot && (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))) 916 prom_printf("changed cpu %p to state %d\n", 917 (void *)bootcpu, CPU_CPR_ONLINE); 918 mutex_exit(&cpu_lock); 919 920 return (rc); 921 } 922 923 int 924 cpr_mp_online(void) 925 { 926 cpu_t *cp, *bootcpu = CPU; 927 int rc = 0; 928 929 /* 930 * Do nothing for UP. 931 */ 932 if (ncpus == 1) 933 return (0); 934 935 /* 936 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags 937 * to indicate a cpu was online at the time of cpr_suspend(); 938 * now restart those cpus that were marked as CPU_CPR_ONLINE 939 * and actually are offline. 940 */ 941 mutex_enter(&cpu_lock); 942 for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) { 943 /* 944 * Clear the CPU_FROZEN flag in all cases. 945 */ 946 cp->cpu_flags &= ~CPU_FROZEN; 947 948 if (CPU_CPR_IS_OFFLINE(cp)) 949 continue; 950 if (CPU_ACTIVE(cp)) 951 continue; 952 if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) { 953 mutex_exit(&cpu_lock); 954 return (rc); 955 } 956 } 957 958 /* 959 * turn off the boot cpu if it was offlined 960 */ 961 if (CPU_CPR_IS_OFFLINE(bootcpu)) { 962 if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) { 963 mutex_exit(&cpu_lock); 964 return (rc); 965 } 966 } 967 mutex_exit(&cpu_lock); 968 return (0); 969 } 970 971 static void 972 cpr_save_mp_state(void) 973 { 974 cpu_t *cp; 975 976 ASSERT(MUTEX_HELD(&cpu_lock)); 977 978 cp = cpu_list; 979 do { 980 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE; 981 if (CPU_ACTIVE(cp)) 982 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE); 983 } while ((cp = cp->cpu_next) != cpu_list); 984 } 985 986 /* 987 * change cpu to online/offline 988 */ 989 static int 990 cpr_p_online(cpu_t *cp, int state) 991 { 992 int rc; 993 994 ASSERT(MUTEX_HELD(&cpu_lock)); 995 996 switch (state) { 997 case CPU_CPR_ONLINE: 998 rc = cpu_online(cp); 999 break; 1000 case CPU_CPR_OFFLINE: 1001 rc = cpu_offline(cp, CPU_FORCED); 1002 break; 1003 } 1004 if (rc) { 1005 cpr_err(CE_WARN, "Failed to change processor %d to " 1006 "state %d, (errno %d)", cp->cpu_id, state, rc); 1007 } 1008 return (rc); 1009 } 1010 1011 /* 1012 * Construct the pathname of the state file and return a pointer to 1013 * caller. Read the config file to get the mount point of the 1014 * filesystem and the pathname within fs. 1015 */ 1016 char * 1017 cpr_build_statefile_path(void) 1018 { 1019 struct cprconfig *cf = &cprconfig; 1020 1021 if (cpr_get_config()) 1022 return (NULL); 1023 1024 switch (cf->cf_type) { 1025 case CFT_UFS: 1026 if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) { 1027 cpr_err(CE_CONT, "Statefile path is too long.\n"); 1028 return (NULL); 1029 } 1030 return (cpr_cprconfig_to_path()); 1031 case CFT_ZVOL: 1032 /*FALLTHROUGH*/ 1033 case CFT_SPEC: 1034 return (cf->cf_devfs); 1035 default: 1036 cpr_err(CE_PANIC, "invalid statefile type"); 1037 /*NOTREACHED*/ 1038 return (NULL); 1039 } 1040 } 1041 1042 int 1043 cpr_statefile_is_spec(void) 1044 { 1045 if (cpr_get_config()) 1046 return (0); 1047 return (cprconfig.cf_type == CFT_SPEC); 1048 } 1049 1050 char * 1051 cpr_get_statefile_prom_path(void) 1052 { 1053 struct cprconfig *cf = &cprconfig; 1054 1055 ASSERT(cprconfig_loaded); 1056 ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC); 1057 ASSERT(cf->cf_type == CFT_SPEC || cf->cf_type == CFT_ZVOL); 1058 return (cf->cf_dev_prom); 1059 } 1060 1061 1062 /* 1063 * XXX The following routines need to be in the vfs source code. 1064 */ 1065 1066 int 1067 cpr_is_ufs(struct vfs *vfsp) 1068 { 1069 char *fsname; 1070 1071 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1072 return (strcmp(fsname, "ufs") == 0); 1073 } 1074 1075 int 1076 cpr_is_zfs(struct vfs *vfsp) 1077 { 1078 char *fsname; 1079 1080 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1081 return (strcmp(fsname, "zfs") == 0); 1082 } 1083 1084 /* 1085 * This is a list of file systems that are allowed to be writeable when a 1086 * reusable statefile checkpoint is taken. They must not have any state that 1087 * cannot be restored to consistency by simply rebooting using the checkpoint. 1088 * (In contrast to ufs, cachefs and pcfs which have disk state that could get 1089 * out of sync with the in-kernel data). 1090 */ 1091 int 1092 cpr_reusable_mount_check(void) 1093 { 1094 struct vfs *vfsp; 1095 char *fsname; 1096 char **cpp; 1097 static char *cpr_writeok_fss[] = { 1098 "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs", 1099 "proc", "tmpfs", "ctfs", "objfs", "dev", NULL 1100 }; 1101 1102 vfs_list_read_lock(); 1103 vfsp = rootvfs; 1104 do { 1105 if (vfsp->vfs_flag & VFS_RDONLY) { 1106 vfsp = vfsp->vfs_next; 1107 continue; 1108 } 1109 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1110 for (cpp = cpr_writeok_fss; *cpp; cpp++) { 1111 if (strcmp(fsname, *cpp) == 0) 1112 break; 1113 } 1114 /* 1115 * if the inner loop reached the NULL terminator, 1116 * the current fs-type does not match any OK-type 1117 */ 1118 if (*cpp == NULL) { 1119 cpr_err(CE_CONT, "a filesystem of type %s is " 1120 "mounted read/write.\nReusable statefile requires " 1121 "no writeable filesystem of this type be mounted\n", 1122 fsname); 1123 vfs_list_unlock(); 1124 return (EINVAL); 1125 } 1126 vfsp = vfsp->vfs_next; 1127 } while (vfsp != rootvfs); 1128 vfs_list_unlock(); 1129 return (0); 1130 } 1131 1132 /* 1133 * return statefile offset in DEV_BSIZE units 1134 */ 1135 int 1136 cpr_statefile_offset(void) 1137 { 1138 return (cprconfig.cf_type != CFT_UFS ? btod(CPR_SPEC_OFFSET) : 0); 1139 } 1140 1141 /* 1142 * Force a fresh read of the cprinfo per uadmin 3 call 1143 */ 1144 void 1145 cpr_forget_cprconfig(void) 1146 { 1147 cprconfig_loaded = 0; 1148 } 1149 #endif 1150