1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/errno.h> 30 #include <sys/cpuvar.h> 31 #include <sys/vfs.h> 32 #include <sys/vnode.h> 33 #include <sys/pathname.h> 34 #include <sys/callb.h> 35 #include <sys/fs/ufs_inode.h> 36 #include <vm/anon.h> 37 #include <sys/fs/swapnode.h> /* for swapfs_minfree */ 38 #include <sys/kmem.h> 39 #include <sys/cpr.h> 40 #include <sys/conf.h> 41 #include <sys/machclock.h> 42 43 /* 44 * CPR miscellaneous support routines 45 */ 46 #define cpr_open(path, mode, vpp) (vn_open(path, UIO_SYSSPACE, \ 47 mode, 0600, vpp, CRCREAT, 0)) 48 #define cpr_rdwr(rw, vp, basep, cnt) (vn_rdwr(rw, vp, (caddr_t)(basep), \ 49 cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \ 50 (ssize_t *)NULL)) 51 52 extern void clkset(time_t); 53 extern cpu_t *i_cpr_bootcpu(void); 54 extern caddr_t i_cpr_map_setup(void); 55 extern void i_cpr_free_memory_resources(void); 56 57 extern kmutex_t cpr_slock; 58 extern size_t cpr_buf_size; 59 extern char *cpr_buf; 60 extern size_t cpr_pagedata_size; 61 extern char *cpr_pagedata; 62 extern int cpr_bufs_allocated; 63 extern int cpr_bitmaps_allocated; 64 65 #if defined(__sparc) 66 static struct cprconfig cprconfig; 67 static int cprconfig_loaded = 0; 68 static int cpr_statefile_ok(vnode_t *, int); 69 static int cpr_p_online(cpu_t *, int); 70 static void cpr_save_mp_state(void); 71 #endif 72 73 int cpr_is_ufs(struct vfs *); 74 75 char cpr_default_path[] = CPR_DEFAULT; 76 77 #define COMPRESS_PERCENT 40 /* approx compression ratio in percent */ 78 #define SIZE_RATE 115 /* increase size by 15% */ 79 #define INTEGRAL 100 /* for integer math */ 80 81 82 /* 83 * cmn_err() followed by a 1/4 second delay; this gives the 84 * logging service a chance to flush messages and helps avoid 85 * intermixing output from prom_printf(). 86 */ 87 /*PRINTFLIKE2*/ 88 void 89 cpr_err(int ce, const char *fmt, ...) 90 { 91 va_list adx; 92 93 va_start(adx, fmt); 94 vcmn_err(ce, fmt, adx); 95 va_end(adx); 96 drv_usecwait(MICROSEC >> 2); 97 } 98 99 100 int 101 cpr_init(int fcn) 102 { 103 /* 104 * Allow only one suspend/resume process. 105 */ 106 if (mutex_tryenter(&cpr_slock) == 0) 107 return (EBUSY); 108 109 CPR->c_flags = 0; 110 CPR->c_substate = 0; 111 CPR->c_cprboot_magic = 0; 112 CPR->c_alloc_cnt = 0; 113 114 CPR->c_fcn = fcn; 115 if (fcn == AD_CPR_REUSABLE) 116 CPR->c_flags |= C_REUSABLE; 117 else 118 CPR->c_flags |= C_SUSPENDING; 119 if (fcn == AD_SUSPEND_TO_RAM || fcn == DEV_SUSPEND_TO_RAM) { 120 return (0); 121 } 122 #if defined(__sparc) 123 if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ) 124 CPR->c_flags |= C_COMPRESSING; 125 /* 126 * reserve CPR_MAXCONTIG virtual pages for cpr_dump() 127 */ 128 CPR->c_mapping_area = i_cpr_map_setup(); 129 if (CPR->c_mapping_area == 0) { /* no space in kernelmap */ 130 cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n"); 131 mutex_exit(&cpr_slock); 132 return (EAGAIN); 133 } 134 if (cpr_debug & CPR_DEBUG3) 135 cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing " 136 "kas\n", (void *)CPR->c_mapping_area); 137 #endif 138 139 return (0); 140 } 141 142 /* 143 * This routine releases any resources used during the checkpoint. 144 */ 145 void 146 cpr_done(void) 147 { 148 cpr_stat_cleanup(); 149 i_cpr_bitmap_cleanup(); 150 151 /* 152 * Free pages used by cpr buffers. 153 */ 154 if (cpr_buf) { 155 kmem_free(cpr_buf, cpr_buf_size); 156 cpr_buf = NULL; 157 } 158 if (cpr_pagedata) { 159 kmem_free(cpr_pagedata, cpr_pagedata_size); 160 cpr_pagedata = NULL; 161 } 162 163 i_cpr_free_memory_resources(); 164 mutex_exit(&cpr_slock); 165 cpr_err(CE_CONT, "System has been resumed.\n"); 166 } 167 168 169 #if defined(__sparc) 170 /* 171 * reads config data into cprconfig 172 */ 173 static int 174 cpr_get_config(void) 175 { 176 static char config_path[] = CPR_CONFIG; 177 struct cprconfig *cf = &cprconfig; 178 struct vnode *vp; 179 char *fmt; 180 int err; 181 182 if (cprconfig_loaded) 183 return (0); 184 185 fmt = "cannot %s config file \"%s\", error %d\n"; 186 if (err = vn_open(config_path, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0)) { 187 cpr_err(CE_CONT, fmt, "open", config_path, err); 188 return (err); 189 } 190 191 err = cpr_rdwr(UIO_READ, vp, cf, sizeof (*cf)); 192 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED()); 193 VN_RELE(vp); 194 if (err) { 195 cpr_err(CE_CONT, fmt, "read", config_path, err); 196 return (err); 197 } 198 199 if (cf->cf_magic == CPR_CONFIG_MAGIC) 200 cprconfig_loaded = 1; 201 else { 202 cpr_err(CE_CONT, "invalid config file \"%s\", " 203 "rerun pmconfig(1M)\n", config_path); 204 err = EINVAL; 205 } 206 207 return (err); 208 } 209 210 211 /* 212 * concat fs and path fields of the cprconfig structure; 213 * returns pointer to the base of static data 214 */ 215 static char * 216 cpr_cprconfig_to_path(void) 217 { 218 static char full_path[MAXNAMELEN]; 219 struct cprconfig *cf = &cprconfig; 220 char *ptr; 221 222 /* 223 * build /fs/path without extra '/' 224 */ 225 (void) strcpy(full_path, cf->cf_fs); 226 if (strcmp(cf->cf_fs, "/")) 227 (void) strcat(full_path, "/"); 228 ptr = cf->cf_path; 229 if (*ptr == '/') 230 ptr++; 231 (void) strcat(full_path, ptr); 232 return (full_path); 233 } 234 235 236 /* 237 * Verify that the information in the configuration file regarding the 238 * location for the statefile is still valid, depending on cf_type. 239 * for CFT_UFS, cf_fs must still be a mounted filesystem, it must be 240 * mounted on the same device as when pmconfig was last run, 241 * and the translation of that device to a node in the prom's 242 * device tree must be the same as when pmconfig was last run. 243 * for CFT_SPEC, cf_path must be the path to a block special file, 244 * it must have no file system mounted on it, 245 * and the translation of that device to a node in the prom's 246 * device tree must be the same as when pmconfig was last run. 247 */ 248 static int 249 cpr_verify_statefile_path(void) 250 { 251 struct cprconfig *cf = &cprconfig; 252 static const char long_name[] = "Statefile pathname is too long.\n"; 253 static const char lookup_fmt[] = "Lookup failed for " 254 "cpr statefile device %s.\n"; 255 static const char path_chg_fmt[] = "Device path for statefile " 256 "has changed from %s to %s.\t%s\n"; 257 static const char rerun[] = "Please rerun pmconfig(1m)."; 258 struct vfs *vfsp = NULL, *vfsp_save = rootvfs; 259 ufsvfs_t *ufsvfsp = (ufsvfs_t *)rootvfs->vfs_data; 260 ufsvfs_t *ufsvfsp_save = ufsvfsp; 261 int error; 262 struct vnode *vp; 263 char *slash, *tail, *longest; 264 char *errstr; 265 int found = 0; 266 union { 267 char un_devpath[OBP_MAXPATHLEN]; 268 char un_sfpath[MAXNAMELEN]; 269 } un; 270 #define devpath un.un_devpath 271 #define sfpath un.un_sfpath 272 273 ASSERT(cprconfig_loaded); 274 /* 275 * We need not worry about locking or the timing of releasing 276 * the vnode, since we are single-threaded now. 277 */ 278 279 switch (cf->cf_type) { 280 case CFT_SPEC: 281 if (strlen(cf->cf_path) > sizeof (sfpath)) { 282 cpr_err(CE_CONT, long_name); 283 return (ENAMETOOLONG); 284 } 285 if ((error = lookupname(cf->cf_devfs, 286 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 287 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); 288 return (error); 289 } 290 if (vp->v_type != VBLK) 291 errstr = "statefile must be a block device"; 292 else if (vfs_devismounted(vp->v_rdev)) 293 errstr = "statefile device must not " 294 "have a file system mounted on it"; 295 else if (IS_SWAPVP(vp)) 296 errstr = "statefile device must not " 297 "be configured as swap file"; 298 else 299 errstr = NULL; 300 301 VN_RELE(vp); 302 if (errstr) { 303 cpr_err(CE_CONT, "%s.\n", errstr); 304 return (ENOTSUP); 305 } 306 307 error = i_devname_to_promname(cf->cf_devfs, devpath, 308 OBP_MAXPATHLEN); 309 if (error || strcmp(devpath, cf->cf_dev_prom)) { 310 cpr_err(CE_CONT, path_chg_fmt, 311 cf->cf_dev_prom, devpath, rerun); 312 } 313 return (error); 314 case CFT_UFS: 315 break; /* don't indent all the original code */ 316 default: 317 cpr_err(CE_PANIC, "invalid cf_type"); 318 } 319 320 /* 321 * The original code for UFS statefile 322 */ 323 if (strlen(cf->cf_fs) + strlen(cf->cf_path) + 2 > sizeof (sfpath)) { 324 cpr_err(CE_CONT, long_name); 325 return (ENAMETOOLONG); 326 } 327 328 bzero(sfpath, sizeof (sfpath)); 329 (void) strcpy(sfpath, cpr_cprconfig_to_path()); 330 331 if (*sfpath != '/') { 332 cpr_err(CE_CONT, "Statefile pathname %s " 333 "must begin with a /\n", sfpath); 334 return (EINVAL); 335 } 336 337 /* 338 * Find the longest prefix of the statefile pathname which 339 * is the mountpoint of a filesystem. This string must 340 * match the cf_fs field we read from the config file. Other- 341 * wise the user has changed things without running pmconfig. 342 */ 343 tail = longest = sfpath + 1; /* pt beyond the leading "/" */ 344 while ((slash = strchr(tail, '/')) != NULL) { 345 *slash = '\0'; /* temporarily terminate the string */ 346 if ((error = lookupname(sfpath, 347 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 348 *slash = '/'; 349 cpr_err(CE_CONT, "A directory in the " 350 "statefile path %s was not found.\n", sfpath); 351 VN_RELE(vp); 352 353 return (error); 354 } 355 356 vfs_list_read_lock(); 357 vfsp = rootvfs; 358 do { 359 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 360 if (ufsvfsp != NULL && ufsvfsp->vfs_root == vp) { 361 found = 1; 362 break; 363 } 364 vfsp = vfsp->vfs_next; 365 } while (vfsp != rootvfs); 366 vfs_list_unlock(); 367 368 /* 369 * If we have found a filesystem mounted on the current 370 * path prefix, remember the end of the string in 371 * "longest". If it happens to be the the exact fs 372 * saved in the configuration file, save the current 373 * ufsvfsp so we can make additional checks further down. 374 */ 375 if (found) { 376 longest = slash; 377 if (strcmp(cf->cf_fs, sfpath) == 0) { 378 ufsvfsp_save = ufsvfsp; 379 vfsp_save = vfsp; 380 } 381 found = 0; 382 } 383 384 VN_RELE(vp); 385 *slash = '/'; 386 tail = slash + 1; 387 } 388 *longest = '\0'; 389 if (cpr_is_ufs(vfsp_save) == 0 || strcmp(cf->cf_fs, sfpath)) { 390 cpr_err(CE_CONT, "Filesystem containing " 391 "the statefile when pmconfig was run (%s) has " 392 "changed to %s. %s\n", cf->cf_fs, sfpath, rerun); 393 return (EINVAL); 394 } 395 396 if ((error = lookupname(cf->cf_devfs, 397 UIO_SYSSPACE, FOLLOW, NULLVPP, &vp)) != 0) { 398 cpr_err(CE_CONT, lookup_fmt, cf->cf_devfs); 399 return (error); 400 } 401 402 if (ufsvfsp_save->vfs_devvp->v_rdev != vp->v_rdev) { 403 cpr_err(CE_CONT, "Filesystem containing " 404 "statefile no longer mounted on device %s. " 405 "See power.conf(4).", cf->cf_devfs); 406 VN_RELE(vp); 407 return (ENXIO); 408 } 409 VN_RELE(vp); 410 411 error = i_devname_to_promname(cf->cf_devfs, devpath, OBP_MAXPATHLEN); 412 if (error || strcmp(devpath, cf->cf_dev_prom)) { 413 cpr_err(CE_CONT, path_chg_fmt, 414 cf->cf_dev_prom, devpath, rerun); 415 return (error); 416 } 417 418 return (0); 419 } 420 421 /* 422 * Make sure that the statefile can be used as a block special statefile 423 * (meaning that is exists and has nothing mounted on it) 424 * Returns errno if not a valid statefile. 425 */ 426 int 427 cpr_check_spec_statefile(void) 428 { 429 int err; 430 431 if (err = cpr_get_config()) 432 return (err); 433 ASSERT(cprconfig.cf_type == CFT_SPEC); 434 435 if (cprconfig.cf_devfs == NULL) 436 return (ENXIO); 437 438 return (cpr_verify_statefile_path()); 439 440 } 441 442 int 443 cpr_alloc_statefile(int alloc_retry) 444 { 445 register int rc = 0; 446 char *str; 447 448 /* 449 * Statefile size validation. If checkpoint the first time, disk blocks 450 * allocation will be done; otherwise, just do file size check. 451 * if statefile allocation is being retried, C_VP will be inited 452 */ 453 if (alloc_retry) { 454 str = "\n-->Retrying statefile allocation..."; 455 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7)) 456 prom_printf(str); 457 if (C_VP->v_type != VBLK) 458 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL); 459 } else { 460 /* 461 * Open an exiting file for writing, the state file needs to be 462 * pre-allocated since we can't and don't want to do allocation 463 * during checkpoint (too much of the OS is disabled). 464 * - do a preliminary size checking here, if it is too small, 465 * allocate more space internally and retry. 466 * - check the vp to make sure it's the right type. 467 */ 468 char *path = cpr_build_statefile_path(); 469 470 if (path == NULL) 471 return (ENXIO); 472 else if (rc = cpr_verify_statefile_path()) 473 return (rc); 474 475 if (rc = vn_open(path, UIO_SYSSPACE, 476 FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) { 477 cpr_err(CE_WARN, "cannot open statefile %s", path); 478 return (rc); 479 } 480 } 481 482 /* 483 * Only ufs and block special statefiles supported 484 */ 485 if (C_VP->v_type != VREG && C_VP->v_type != VBLK) { 486 cpr_err(CE_CONT, 487 "Statefile must be regular file or block special file."); 488 return (EACCES); 489 } 490 491 if (rc = cpr_statefile_ok(C_VP, alloc_retry)) 492 return (rc); 493 494 if (C_VP->v_type != VBLK) { 495 /* 496 * sync out the fs change due to the statefile reservation. 497 */ 498 (void) VFS_SYNC(C_VP->v_vfsp, 0, CRED()); 499 500 /* 501 * Validate disk blocks allocation for the state file. 502 * Ask the file system prepare itself for the dump operation. 503 */ 504 if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL)) { 505 cpr_err(CE_CONT, "Error allocating " 506 "blocks for cpr statefile."); 507 return (rc); 508 } 509 } 510 return (0); 511 } 512 513 514 /* 515 * Lookup device size and return available space in bytes. 516 * NOTE: Since prop_op(9E) can't tell the difference between a character 517 * and a block reference, it is ok to ask for "Size" instead of "Nblocks". 518 */ 519 size_t 520 cpr_get_devsize(dev_t dev) 521 { 522 size_t bytes = 0; 523 524 bytes = cdev_Size(dev); 525 if (bytes == 0) 526 bytes = cdev_size(dev); 527 528 if (bytes > CPR_SPEC_OFFSET) 529 bytes -= CPR_SPEC_OFFSET; 530 else 531 bytes = 0; 532 533 return (bytes); 534 } 535 536 537 /* 538 * increase statefile size 539 */ 540 static int 541 cpr_grow_statefile(vnode_t *vp, u_longlong_t newsize) 542 { 543 extern uchar_t cpr_pagecopy[]; 544 struct inode *ip = VTOI(vp); 545 u_longlong_t offset; 546 int error, increase; 547 ssize_t resid; 548 549 rw_enter(&ip->i_contents, RW_READER); 550 increase = (ip->i_size < newsize); 551 offset = ip->i_size; 552 rw_exit(&ip->i_contents); 553 554 if (increase == 0) 555 return (0); 556 557 /* 558 * write to each logical block to reserve disk space 559 */ 560 error = 0; 561 cpr_pagecopy[0] = '1'; 562 for (; offset < newsize; offset += ip->i_fs->fs_bsize) { 563 if (error = vn_rdwr(UIO_WRITE, vp, (caddr_t)cpr_pagecopy, 564 ip->i_fs->fs_bsize, (offset_t)offset, UIO_SYSSPACE, 0, 565 (rlim64_t)MAXOFF_T, CRED(), &resid)) { 566 if (error == ENOSPC) { 567 cpr_err(CE_WARN, "error %d while reserving " 568 "disk space for statefile %s\n" 569 "wanted %lld bytes, file is %lld short", 570 error, cpr_cprconfig_to_path(), 571 newsize, newsize - offset); 572 } 573 break; 574 } 575 } 576 return (error); 577 } 578 579 580 /* 581 * do a simple estimate of the space needed to hold the statefile 582 * taking compression into account, but be fairly conservative 583 * so we have a better chance of completing; when dump fails, 584 * the retry cost is fairly high. 585 * 586 * Do disk blocks allocation for the state file if no space has 587 * been allocated yet. Since the state file will not be removed, 588 * allocation should only be done once. 589 */ 590 static int 591 cpr_statefile_ok(vnode_t *vp, int alloc_retry) 592 { 593 extern size_t cpr_bitmap_size; 594 struct inode *ip = VTOI(vp); 595 const int UCOMP_RATE = 20; /* comp. ratio*10 for user pages */ 596 u_longlong_t size, isize, ksize, raw_data; 597 char *str, *est_fmt; 598 size_t space; 599 int error; 600 601 /* 602 * number of pages short for swapping. 603 */ 604 STAT->cs_nosw_pages = k_anoninfo.ani_mem_resv; 605 if (STAT->cs_nosw_pages < 0) 606 STAT->cs_nosw_pages = 0; 607 608 str = "cpr_statefile_ok:"; 609 610 CPR_DEBUG(CPR_DEBUG9, "Phys swap: max=%lu resv=%lu\n", 611 k_anoninfo.ani_max, k_anoninfo.ani_phys_resv); 612 CPR_DEBUG(CPR_DEBUG9, "Mem swap: max=%ld resv=%lu\n", 613 MAX(availrmem - swapfs_minfree, 0), 614 k_anoninfo.ani_mem_resv); 615 CPR_DEBUG(CPR_DEBUG9, "Total available swap: %ld\n", 616 CURRENT_TOTAL_AVAILABLE_SWAP); 617 618 /* 619 * try increasing filesize by 15% 620 */ 621 if (alloc_retry) { 622 /* 623 * block device doesn't get any bigger 624 */ 625 if (vp->v_type == VBLK) { 626 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 627 prom_printf( 628 "Retry statefile on special file\n"); 629 return (ENOMEM); 630 } else { 631 rw_enter(&ip->i_contents, RW_READER); 632 size = (ip->i_size * SIZE_RATE) / INTEGRAL; 633 rw_exit(&ip->i_contents); 634 } 635 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 636 prom_printf("Retry statefile size = %lld\n", size); 637 } else { 638 u_longlong_t cpd_size; 639 pgcnt_t npages, nback; 640 int ndvram; 641 642 ndvram = 0; 643 (void) callb_execute_class(CB_CL_CPR_FB, 644 (int)(uintptr_t)&ndvram); 645 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 646 prom_printf("ndvram size = %d\n", ndvram); 647 648 /* 649 * estimate 1 cpd_t for every (CPR_MAXCONTIG / 2) pages 650 */ 651 npages = cpr_count_kpages(REGULAR_BITMAP, cpr_nobit); 652 cpd_size = sizeof (cpd_t) * (npages / (CPR_MAXCONTIG / 2)); 653 raw_data = cpd_size + cpr_bitmap_size; 654 ksize = ndvram + mmu_ptob(npages); 655 656 est_fmt = "%s estimated size with " 657 "%scompression %lld, ksize %lld\n"; 658 nback = mmu_ptob(STAT->cs_nosw_pages); 659 if (CPR->c_flags & C_COMPRESSING) { 660 size = ((ksize * COMPRESS_PERCENT) / INTEGRAL) + 661 raw_data + ((nback * 10) / UCOMP_RATE); 662 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "", size, ksize); 663 } else { 664 size = ksize + raw_data + nback; 665 CPR_DEBUG(CPR_DEBUG1, est_fmt, str, "no ", 666 size, ksize); 667 } 668 } 669 670 /* 671 * All this is much simpler for a block device 672 */ 673 if (vp->v_type == VBLK) { 674 space = cpr_get_devsize(vp->v_rdev); 675 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 676 prom_printf("statefile dev size %lu\n", space); 677 678 /* 679 * Export the estimated filesize info, this value will be 680 * compared before dumping out the statefile in the case of 681 * no compression. 682 */ 683 STAT->cs_est_statefsz = size; 684 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) 685 prom_printf("%s Estimated statefile size %llu, " 686 "space %lu\n", str, size, space); 687 if (size > space) { 688 cpr_err(CE_CONT, "Statefile partition too small."); 689 return (ENOMEM); 690 } 691 return (0); 692 } else { 693 if (CPR->c_alloc_cnt++ > C_MAX_ALLOC_RETRY) { 694 cpr_err(CE_CONT, "Statefile allocation retry failed\n"); 695 return (ENOMEM); 696 } 697 698 /* 699 * Estimate space needed for the state file. 700 * 701 * State file size in bytes: 702 * kernel size + non-cache pte seg + 703 * bitmap size + cpr state file headers size 704 * (round up to fs->fs_bsize) 705 */ 706 size = blkroundup(ip->i_fs, size); 707 708 /* 709 * Export the estimated filesize info, this value will be 710 * compared before dumping out the statefile in the case of 711 * no compression. 712 */ 713 STAT->cs_est_statefsz = size; 714 error = cpr_grow_statefile(vp, size); 715 if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6)) { 716 rw_enter(&ip->i_contents, RW_READER); 717 isize = ip->i_size; 718 rw_exit(&ip->i_contents); 719 prom_printf("%s Estimated statefile size %lld, " 720 "i_size %lld\n", str, size, isize); 721 } 722 723 return (error); 724 } 725 } 726 727 728 void 729 cpr_statef_close(void) 730 { 731 if (C_VP) { 732 if (!cpr_reusable_mode) 733 (void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL); 734 (void) VOP_CLOSE(C_VP, FWRITE, 1, (offset_t)0, CRED()); 735 VN_RELE(C_VP); 736 C_VP = 0; 737 } 738 } 739 740 741 /* 742 * open cpr default file and display error 743 */ 744 int 745 cpr_open_deffile(int mode, vnode_t **vpp) 746 { 747 int error; 748 749 if (error = cpr_open(cpr_default_path, mode, vpp)) 750 cpr_err(CE_CONT, "cannot open \"%s\", error %d\n", 751 cpr_default_path, error); 752 return (error); 753 } 754 755 756 /* 757 * write cdef_t to disk. This contains the original values of prom 758 * properties that we modify. We fill in the magic number of the file 759 * here as a signal to the booter code that the state file is valid. 760 * Be sure the file gets synced, since we may be shutting down the OS. 761 */ 762 int 763 cpr_write_deffile(cdef_t *cdef) 764 { 765 struct vnode *vp; 766 char *str; 767 int rc; 768 769 if (rc = cpr_open_deffile(FCREAT|FWRITE, &vp)) 770 return (rc); 771 772 if (rc = cpr_rdwr(UIO_WRITE, vp, cdef, sizeof (*cdef))) 773 str = "write"; 774 else if (rc = VOP_FSYNC(vp, FSYNC, CRED())) 775 str = "fsync"; 776 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED()); 777 VN_RELE(vp); 778 779 if (rc) { 780 cpr_err(CE_WARN, "%s error %d, file \"%s\"", 781 str, rc, cpr_default_path); 782 } 783 return (rc); 784 } 785 786 /* 787 * Clear the magic number in the defaults file. This tells the booter 788 * program that the state file is not current and thus prevents 789 * any attempt to restore from an obsolete state file. 790 */ 791 void 792 cpr_clear_definfo(void) 793 { 794 struct vnode *vp; 795 cmini_t mini; 796 797 if ((CPR->c_cprboot_magic != CPR_DEFAULT_MAGIC) || 798 cpr_open_deffile(FCREAT|FWRITE, &vp)) 799 return; 800 mini.magic = mini.reusable = 0; 801 (void) cpr_rdwr(UIO_WRITE, vp, &mini, sizeof (mini)); 802 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED()); 803 VN_RELE(vp); 804 } 805 806 /* 807 * If the cpr default file is invalid, then we must not be in reusable mode 808 * if it is valid, it tells us our mode 809 */ 810 int 811 cpr_get_reusable_mode(void) 812 { 813 struct vnode *vp; 814 cmini_t mini; 815 int rc; 816 817 if (cpr_open(cpr_default_path, FREAD, &vp)) 818 return (0); 819 820 rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini)); 821 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED()); 822 VN_RELE(vp); 823 if (rc == 0 && mini.magic == CPR_DEFAULT_MAGIC) 824 return (mini.reusable); 825 826 return (0); 827 } 828 #endif 829 830 /* 831 * clock/time related routines 832 */ 833 static time_t cpr_time_stamp; 834 835 836 void 837 cpr_tod_get(cpr_time_t *ctp) 838 { 839 timestruc_t ts; 840 841 mutex_enter(&tod_lock); 842 ts = TODOP_GET(tod_ops); 843 mutex_exit(&tod_lock); 844 ctp->tv_sec = (time32_t)ts.tv_sec; 845 ctp->tv_nsec = (int32_t)ts.tv_nsec; 846 } 847 848 void 849 cpr_tod_fault_reset(void) 850 { 851 mutex_enter(&tod_lock); 852 tod_fault_reset(); 853 mutex_exit(&tod_lock); 854 } 855 856 void 857 cpr_save_time(void) 858 { 859 cpr_time_stamp = gethrestime_sec(); 860 } 861 862 /* 863 * correct time based on saved time stamp or hardware clock 864 */ 865 void 866 cpr_restore_time(void) 867 { 868 clkset(cpr_time_stamp); 869 } 870 871 #if defined(__sparc) 872 /* 873 * CPU ONLINE/OFFLINE CODE 874 */ 875 int 876 cpr_mp_offline(void) 877 { 878 cpu_t *cp, *bootcpu; 879 int rc = 0; 880 int brought_up_boot = 0; 881 882 /* 883 * Do nothing for UP. 884 */ 885 if (ncpus == 1) 886 return (0); 887 888 mutex_enter(&cpu_lock); 889 890 cpr_save_mp_state(); 891 892 bootcpu = i_cpr_bootcpu(); 893 if (!CPU_ACTIVE(bootcpu)) { 894 if ((rc = cpr_p_online(bootcpu, CPU_CPR_ONLINE))) { 895 mutex_exit(&cpu_lock); 896 return (rc); 897 } 898 brought_up_boot = 1; 899 } 900 901 cp = cpu_list; 902 do { 903 if (cp == bootcpu) 904 continue; 905 if (cp->cpu_flags & CPU_OFFLINE) 906 continue; 907 if ((rc = cpr_p_online(cp, CPU_CPR_OFFLINE))) { 908 mutex_exit(&cpu_lock); 909 return (rc); 910 } 911 } while ((cp = cp->cpu_next) != cpu_list); 912 if (brought_up_boot && (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG6))) 913 prom_printf("changed cpu %p to state %d\n", 914 bootcpu, CPU_CPR_ONLINE); 915 mutex_exit(&cpu_lock); 916 917 return (rc); 918 } 919 920 int 921 cpr_mp_online(void) 922 { 923 cpu_t *cp, *bootcpu = CPU; 924 int rc = 0; 925 926 /* 927 * Do nothing for UP. 928 */ 929 if (ncpus == 1) 930 return (0); 931 932 /* 933 * cpr_save_mp_state() sets CPU_CPR_ONLINE in cpu_cpr_flags 934 * to indicate a cpu was online at the time of cpr_suspend(); 935 * now restart those cpus that were marked as CPU_CPR_ONLINE 936 * and actually are offline. 937 */ 938 mutex_enter(&cpu_lock); 939 for (cp = bootcpu->cpu_next; cp != bootcpu; cp = cp->cpu_next) { 940 /* 941 * Clear the CPU_FROZEN flag in all cases. 942 */ 943 cp->cpu_flags &= ~CPU_FROZEN; 944 945 if (CPU_CPR_IS_OFFLINE(cp)) 946 continue; 947 if (CPU_ACTIVE(cp)) 948 continue; 949 if ((rc = cpr_p_online(cp, CPU_CPR_ONLINE))) { 950 mutex_exit(&cpu_lock); 951 return (rc); 952 } 953 } 954 955 /* 956 * turn off the boot cpu if it was offlined 957 */ 958 if (CPU_CPR_IS_OFFLINE(bootcpu)) { 959 if ((rc = cpr_p_online(bootcpu, CPU_CPR_OFFLINE))) { 960 mutex_exit(&cpu_lock); 961 return (rc); 962 } 963 } 964 mutex_exit(&cpu_lock); 965 return (0); 966 } 967 968 static void 969 cpr_save_mp_state(void) 970 { 971 cpu_t *cp; 972 973 ASSERT(MUTEX_HELD(&cpu_lock)); 974 975 cp = cpu_list; 976 do { 977 cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE; 978 if (CPU_ACTIVE(cp)) 979 CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE); 980 } while ((cp = cp->cpu_next) != cpu_list); 981 } 982 983 /* 984 * change cpu to online/offline 985 */ 986 static int 987 cpr_p_online(cpu_t *cp, int state) 988 { 989 int rc; 990 991 ASSERT(MUTEX_HELD(&cpu_lock)); 992 993 switch (state) { 994 case CPU_CPR_ONLINE: 995 rc = cpu_online(cp); 996 break; 997 case CPU_CPR_OFFLINE: 998 rc = cpu_offline(cp, CPU_FORCED); 999 break; 1000 } 1001 if (rc) { 1002 cpr_err(CE_WARN, "Failed to change processor %d to " 1003 "state %d, (errno %d)", cp->cpu_id, state, rc); 1004 } 1005 return (rc); 1006 } 1007 1008 /* 1009 * Construct the pathname of the state file and return a pointer to 1010 * caller. Read the config file to get the mount point of the 1011 * filesystem and the pathname within fs. 1012 */ 1013 char * 1014 cpr_build_statefile_path(void) 1015 { 1016 struct cprconfig *cf = &cprconfig; 1017 1018 if (cpr_get_config()) 1019 return (NULL); 1020 1021 switch (cf->cf_type) { 1022 case CFT_UFS: 1023 if (strlen(cf->cf_path) + strlen(cf->cf_fs) >= MAXNAMELEN - 1) { 1024 cpr_err(CE_CONT, "Statefile path is too long.\n"); 1025 return (NULL); 1026 } 1027 return (cpr_cprconfig_to_path()); 1028 case CFT_SPEC: 1029 return (cf->cf_devfs); 1030 default: 1031 cpr_err(CE_PANIC, "invalid statefile type"); 1032 /*NOTREACHED*/ 1033 return (NULL); 1034 } 1035 } 1036 1037 int 1038 cpr_statefile_is_spec(void) 1039 { 1040 if (cpr_get_config()) 1041 return (0); 1042 return (cprconfig.cf_type == CFT_SPEC); 1043 } 1044 1045 char * 1046 cpr_get_statefile_prom_path(void) 1047 { 1048 struct cprconfig *cf = &cprconfig; 1049 1050 ASSERT(cprconfig_loaded); 1051 ASSERT(cf->cf_magic == CPR_CONFIG_MAGIC); 1052 ASSERT(cf->cf_type == CFT_SPEC); 1053 return (cf->cf_dev_prom); 1054 } 1055 1056 1057 /* 1058 * XXX The following routines need to be in the vfs source code. 1059 */ 1060 1061 int 1062 cpr_is_ufs(struct vfs *vfsp) 1063 { 1064 char *fsname; 1065 1066 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1067 return (strcmp(fsname, "ufs") == 0); 1068 } 1069 1070 /* 1071 * This is a list of file systems that are allowed to be writeable when a 1072 * reusable statefile checkpoint is taken. They must not have any state that 1073 * cannot be restored to consistency by simply rebooting using the checkpoint. 1074 * (In contrast to ufs, cachefs and pcfs which have disk state that could get 1075 * out of sync with the in-kernel data). 1076 */ 1077 int 1078 cpr_reusable_mount_check(void) 1079 { 1080 struct vfs *vfsp; 1081 char *fsname; 1082 char **cpp; 1083 static char *cpr_writeok_fss[] = { 1084 "autofs", "devfs", "fd", "lofs", "mntfs", "namefs", "nfs", 1085 "proc", "tmpfs", "ctfs", "objfs", "dev", NULL 1086 }; 1087 1088 vfs_list_read_lock(); 1089 vfsp = rootvfs; 1090 do { 1091 if (vfsp->vfs_flag & VFS_RDONLY) { 1092 vfsp = vfsp->vfs_next; 1093 continue; 1094 } 1095 fsname = vfssw[vfsp->vfs_fstype].vsw_name; 1096 for (cpp = cpr_writeok_fss; *cpp; cpp++) { 1097 if (strcmp(fsname, *cpp) == 0) 1098 break; 1099 } 1100 /* 1101 * if the inner loop reached the NULL terminator, 1102 * the current fs-type does not match any OK-type 1103 */ 1104 if (*cpp == NULL) { 1105 cpr_err(CE_CONT, "a filesystem of type %s is " 1106 "mounted read/write.\nReusable statefile requires " 1107 "no writeable filesystem of this type be mounted\n", 1108 fsname); 1109 vfs_list_unlock(); 1110 return (EINVAL); 1111 } 1112 vfsp = vfsp->vfs_next; 1113 } while (vfsp != rootvfs); 1114 vfs_list_unlock(); 1115 return (0); 1116 } 1117 1118 /* 1119 * return statefile offset in DEV_BSIZE units 1120 */ 1121 int 1122 cpr_statefile_offset(void) 1123 { 1124 return (cpr_statefile_is_spec() ? btod(CPR_SPEC_OFFSET) : 0); 1125 } 1126 1127 /* 1128 * Force a fresh read of the cprinfo per uadmin 3 call 1129 */ 1130 void 1131 cpr_forget_cprconfig(void) 1132 { 1133 cprconfig_loaded = 0; 1134 } 1135 #endif 1136