/* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that: (1) source distributions retain this entire copyright * notice and comment, and (2) distributions including binaries display * the following acknowledgement: ``This product includes software * developed by the University of California, Berkeley and its contributors'' * in the documentation or other materials provided with the distribution * and in all advertising materials mentioning features or use of this * software. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED '`AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * In-core structures: * blockmap[] * A bitmap of block usage very similar to what's on disk, but * for the entire filesystem rather than just a cylinder group. * Zero indicates free, one indicates allocated. Note that this * is opposite the interpretation of a cylinder group's free block * bitmap. * * statemap[] * Tracks what is known about each inode in the filesystem. * The fundamental state value is one of USTATE, FSTATE, DSTATE, * or SSTATE (unallocated, file, directory, shadow/acl). * * There are optional modifying attributes as well: INZLINK, * INFOUND, INCLEAR, INORPHAN, and INDELAYD. The IN prefix * stands for inode. INZLINK declares that no links (di_nlink == * 0) to the inode have been found. It is used instead of * examining di_nlink because we've always got the statemap[] in * memory, and on average the odds are against having any given * inode in the cache. INFOUND flags that an inode was * encountered during the descent of the filesystem. In other * words, it's reachable, either by name or by being an acl or * attribute. INCLEAR declares an intent to call clri() on an * inode. The INCLEAR and INZLINK attributes are treated in a * mutually exclusive manner with INCLEAR taking higher precedence * as the intent is to clear the inode. * * INORPHAN indicates that the inode has already been seen once * in pass3 and determined to be an orphan, so any additional * encounters don't need to waste cycles redetermining that status. * It also means we don't ask the user about doing something to the * inode N times. * * INDELAYD marks inodes that pass1 determined needed to be truncated. * They can't be truncated during that pass, because it depends on * having a stable world for building the block and inode tables from. * * The IN flags rarely used directly, but instead are * pre-combined through the {D,F,S}ZLINK, DFOUND, and * {D,F,S}CLEAR convenience macros. This mainly matters when * trying to use grep on the source. * * Three state-test macros are provided: S_IS_DUNFOUND(), * S_IS_DVALID(), and S_IS_ZLINK(). The first is true when an * inode's state indicates that it is either a simple directory * (DSTATE without the INFOUND or INCLEAR modifiers) or a * directory with the INZLINK modifier set. By definition, if a * directory has zero links, then it can't be found. As for * S_IS_DVALID(), it decides if a directory inode is alive. * Effectively, this translates to whether or not it's been * flagged for clearing. If not, then it's valid for current * purposes. This is true even if INZLINK is set, as we may find * a reference to it later. Finally, S_IS_ZLINK() just picks out * the INZLINK flag from the state. * * The S_*() macros all work on a state value. To simplify a * bit, the INO_IS_{DUNFOUND,DVALID}() macros take an inode * number argument. The inode is looked up in the statemap[] and * the result handed off to the corresponding S_*() macro. This * is partly a holdover from working with different data * structures (with the same net intent) in the BSD fsck. * * lncntp * Each entry is initialized to the di_link from the on-disk * inode. Each time we find one of those links, we decrement it. * Once all the traversing is done, we should have a zero. If we * have a positive value, then some reference disappeared * (probably from a directory that got nuked); deal with it by * fixing the count. If we have a negative value, then we found * an extra reference. This is a can't-happen, except in the * special case of when we reconnect a directory to its parent or * to lost+found. An exact match between lncntp[] and the on-disk * inode means it's completely unreferenced. * * aclphead * This is a hash table of the acl inodes in the filesystem. * * aclpsort * The same acls as in aclphead, but as a simple linear array. * It is used to hold the acl pointers for sorting and scanning * in pass3b. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" static void usage(void); static long argtol(int, char *, char *, int); static void checkfilesys(char *); static void check_sanity(char *); static void report_limbo(const void *, VISIT, int); #define QUICK_CHECK 'm' /* are things ok according to superblock? */ #define ALL_no 'n' /* auto-answer interactive questions `no' */ #define ALL_NO 'N' /* auto-answer interactive questions `no' */ #define UFS_OPTS 'o' /* ufs-specific options, see subopts[] */ #define ECHO_CMD 'V' /* echo the command line */ #define ALL_yes 'y' /* auto-answer interactive questions `yes' */ #define ALL_YES 'Y' /* auto-answer interactive questions `yes' */ #define VERBOSE 'v' /* be chatty */ static char *subopts[] = { #define PREEN 0 /* non-interactive mode (parent is parallel) */ "p", #define BLOCK 1 /* alternate superblock */ "b", #define DEBUG 2 /* yammer */ "d", #define ONLY_WRITES 3 /* check all writable filesystems */ "w", #define FORCE 4 /* force checking, even if clean */ "f", NULL }; /* * Filesystems that are `magical' - if they exist in vfstab, * then they have to be mounted for the system to have gotten * far enough to be able to run fsck. Thus, don't get all * bent out of shape if we're asked to check it and it is mounted. */ char *magic_fs[] = { "", /* MAGIC_NONE, for normal filesystems */ "/", /* MAGIC_ROOT */ "/usr", /* MAGIC_USR */ NULL /* MAGIC_LIMIT */ }; int main(int argc, char *argv[]) { int c; int wflag = 0; char *suboptions, *value; struct rlimit rlimit; extern int optind; extern char *optarg; while ((c = getopt(argc, argv, "mnNo:VvyY")) != EOF) { switch (c) { case QUICK_CHECK: mflag++; break; case ALL_no: case ALL_NO: nflag++; yflag = 0; break; case VERBOSE: verbose++; break; case UFS_OPTS: /* * ufs specific options. */ if (optarg == NULL) { usage(); /* * lint does not believe this, nor does it * believe #pragma does_not_return(usage) */ /* NOTREACHED */ } suboptions = optarg; while (*suboptions != '\0') { switch (getsubopt(&suboptions, subopts, &value)) { case PREEN: preen++; break; case BLOCK: bflag = argtol(BLOCK, "block", value, 10); (void) printf("Alternate super block " "location: %ld.\n", (long)bflag); break; case DEBUG: debug++; verbose++; break; case ONLY_WRITES: /* check only writable filesystems */ wflag++; break; case FORCE: fflag++; break; default: usage(); } } break; case ECHO_CMD: { int opt_count; char *opt_text; (void) printf("fsck -F ufs "); for (opt_count = 1; opt_count < argc; opt_count++) { opt_text = argv[opt_count]; if (opt_text) (void) printf("%s ", opt_text); } (void) printf("\n"); } break; case ALL_yes: case ALL_YES: yflag++; nflag = 0; break; default: usage(); } } argc -= optind; argv += optind; if (argc == 0) usage(); rflag++; /* check raw devices where we can */ if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void) signal(SIGINT, catch); if (preen) (void) signal(SIGQUIT, catchquit); /* * Push up our allowed memory limit so we can cope * with huge file systems. */ if (getrlimit(RLIMIT_DATA, &rlimit) == 0) { rlimit.rlim_cur = rlimit.rlim_max; (void) setrlimit(RLIMIT_DATA, &rlimit); } /* * There are a lot of places where we just exit if a problem is * found. This means that we won't necessarily check everything * we were asked to. It would be nice to do everything, and * then provide a summary when we're done. However, the * interface doesn't really allow us to do that in any useful * way. So, we'll just bail on the first unrecoverable * problem encountered. If we've been run by the generic * wrapper, we were only given one filesystem to check, so the * multi-fs case implies being run manually; that means the * user can rerun us on the remaining filesystems when it's * convenient for them. */ while (argc-- > 0) { if (wflag && !writable(*argv)) { (void) fprintf(stderr, "not writeable '%s'\n", *argv); argv++; if (exitstat == 0) exitstat = EXBADPARM; } else { checkfilesys(*argv++); } } if (interrupted) exitstat = EXSIGNAL; exit(exitstat); } /* * A relatively intelligent strtol(). Note that if str is NULL, we'll * exit, so ret does not actually need to be pre-initialized. Lint * doesn't believe this, and it's harmless enough to make lint happy here. */ static long argtol(int flag, char *req, char *str, int base) { char *cp = str; long ret = -1; errno = 0; if (str != NULL) ret = strtol(str, &cp, base); if (cp == str || *cp) { (void) fprintf(stderr, "-%c flag requires a %s\n", flag, req); exit(EXBADPARM); } if (errno != 0) { (void) fprintf(stderr, "-%c %s value out of range\n", flag, req); } return (ret); } /* * Check the specified file system. */ static void checkfilesys(char *filesys) { daddr32_t n_ffree, n_bfree; char *devstr; fsck_ino_t files; daddr32_t blks; fsck_ino_t inumber; int zlinks_printed; fsck_ino_t limbo_victim; double dbl_nffree, dbl_dsize; int quiet_dups; mountfd = -1; hotroot = 0; mountedfs = M_NOMNT; reattached_dir = 0; broke_dir_link = 0; iscorrupt = 1; /* assume failure in setup() */ islog = 0; islogok = 0; overflowed_lf = 0; errorlocked = is_errorlocked(filesys); limbo_dirs = NULL; if ((devstr = setup(filesys)) == NULL) { if (!iscorrupt) { return; } if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); if ((exitstat == 0) && (mflag)) exitstat = EXUMNTCHK; exit(exitstat); } else { devname = devstr; } if (mflag) { check_sanity(filesys); /* NOTREACHED */ } if (debug) printclean(); iscorrupt = 0; /* setup() succeeded, assume good filesystem */ /* * 1: scan inodes tallying blocks used */ if (!preen) { /* hotroot is reported as such in setup() if debug is on */ if (mountedfs != M_NOMNT) (void) printf("** Currently Mounted on %s\n", sblock.fs_fsmnt); else (void) printf("** Last Mounted on %s\n", sblock.fs_fsmnt); (void) printf("** Phase 1 - Check Blocks and Sizes\n"); } pass1(); /* * 1b: locate first references to duplicates, if any */ if (have_dups()) { if (preen) pfatal("INTERNAL ERROR: dups with -o p"); (void) printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); } /* * 2: traverse directories from root to mark all connected directories */ if (!preen) (void) printf("** Phase 2 - Check Pathnames\n"); pass2(); /* * 3a: scan inodes looking for disconnected directories. */ if (!preen) (void) printf("** Phase 3a - Check Connectivity\n"); pass3a(); /* * 3b: check acls */ if (!preen) (void) printf("** Phase 3b - Verify Shadows/ACLs\n"); pass3b(); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (!preen) (void) printf("** Phase 4 - Check Reference Counts\n"); pass4(); /* * 5: check and repair resource counts in cylinder groups */ if (!preen) (void) printf("** Phase 5 - Check Cylinder Groups\n"); recount: pass5(); if (overflowed_lf) { iscorrupt = 1; } if (!nflag && mountedfs == M_RW) { (void) printf("FILESYSTEM MAY STILL BE INCONSISTENT.\n"); rerun = 1; } if (have_dups()) { quiet_dups = (reply("LIST REMAINING DUPS") == 0); if (report_dups(quiet_dups) > 0) iscorrupt = 1; (void) printf("WARNING: DATA LOSS MAY HAVE OCCURRED DUE TO " "DUP BLOCKS.\nVERIFY FILE CONTENTS BEFORE USING.\n"); } if (limbo_dirs != NULL) { /* * Don't force iscorrupt, as this is sufficiently * harmless that the filesystem can be mounted and * used. We just leak some inodes and/or blocks. */ pwarn("Orphan directories not cleared or reconnected:\n"); twalk(limbo_dirs, report_limbo); while (limbo_dirs != NULL) { limbo_victim = *(fsck_ino_t *)limbo_dirs; if (limbo_victim != NULL) { (void) tdelete((void *)limbo_victim, &limbo_dirs, ino_t_cmp); } } rerun = 1; } if (iscorrupt) { if (mountedfs == M_RW) (void) printf("FS IS MOUNTED R/W AND" " FSCK DID ITS BEST TO FIX" " INCONSISTENCIES.\n"); else (void) printf("FILESYSTEM MAY STILL BE" " INCONSISTENT.\n"); rerun = 1; } /* * iscorrupt must be stable at this point. * updateclean() returns true when it had to discard the log. * This can only happen once, since sblock.fs_logbno gets * cleared as part of that operation. */ if (updateclean()) { if (!preen) (void) printf( "Log was discarded, updating cyl groups\n"); goto recount; } if (debug) printclean(); ckfini(); /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; files = maxino - UFSROOTINO - sblock.fs_cstotal.cs_nifree - n_files; blks = n_blks + sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); blks += howmany(sblock.fs_cssize, sblock.fs_fsize); blks = maxfsblock - (n_ffree + sblock.fs_frag * n_bfree) - blks; if (debug && (files > 0 || blks > 0)) { countdirs = sblock.fs_cstotal.cs_ndir - countdirs; pwarn("Reclaimed: %d directories, %d files, %lld fragments\n", countdirs, files - countdirs, (longlong_t)blks); } dbl_nffree = (double)n_ffree; dbl_dsize = (double)sblock.fs_dsize; if (!verbose) { /* * Done as one big string to try for a single write, * so the output doesn't get interleaved with other * preening fscks. */ pwarn("%ld files, %lld used, %lld free " "(%lld frags, %lld blocks, %.1f%% fragmentation)\n", (long)n_files, (longlong_t)n_blks, (longlong_t)n_ffree + sblock.fs_frag * n_bfree, (longlong_t)n_ffree, (longlong_t)n_bfree, (dbl_nffree * 100.0) / dbl_dsize); } else { pwarn("\nFilesystem summary:\n"); pwarn("Inodes in use: %ld\n", (long)n_files); pwarn("Blocks in use: %lld\n", (longlong_t)n_blks); pwarn("Total free fragments: %lld\n", (longlong_t)n_ffree + sblock.fs_frag * n_bfree); pwarn("Free fragments not in blocks: %lld\n", (longlong_t)n_ffree); pwarn("Total free blocks: %lld\n", (longlong_t)n_bfree); pwarn("Fragment/block fragmentation: %.1f%%\n", (dbl_nffree * 100.0) / dbl_dsize); pwarn(""); if (files < 0) pwarn("%d inodes missing\n", -files); if (blks < 0) pwarn("%lld blocks missing\n", -(longlong_t)blks); zlinks_printed = 0; for (inumber = UFSROOTINO; inumber < maxino; inumber++) { if (S_IS_ZLINK(statemap[inumber])) { if (zlinks_printed == 0) { pwarn("The following zero " "link count inodes remain:"); } if (zlinks_printed) { if ((zlinks_printed % 9) == 0) (void) puts(",\n"); else (void) puts(", "); } (void) printf("%u", inumber); zlinks_printed++; } } if ((zlinks_printed != 0) && ((zlinks_printed % 9) != 0)) (void) putchar('\n'); } /* * Clean up after ourselves, so we can do the next filesystem. */ free_dup_state(); inocleanup(); free(blockmap); free(statemap); free((void *)lncntp); lncntp = NULL; blockmap = NULL; statemap = NULL; if (iscorrupt && exitstat == 0) exitstat = EXFNDERRS; if (fsmodified) (void) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (overflowed_lf) (void) printf("\n***** %s FULL, MUST REMOVE ENTRIES *****\n", lfname); if (reattached_dir) { (void) printf("ORPHANED DIRECTORIES REATTACHED; DIR LINK " "COUNTS MAY NOT BE CORRECT.\n"); rerun = 1; } if (broke_dir_link) { (void) printf( "DIRECTORY HARDLINK BROKEN; LOOPS MAY STILL EXIST.\n"); rerun = 1; } if (iscorrupt) (void) printf("***** FILE SYSTEM IS BAD *****\n"); if (rerun) { if (mountedfs == M_RW) (void) printf("\n***** PLEASE RERUN FSCK ON UNMOUNTED" " FILE SYSTEM *****\n"); else (void) printf("\n***** PLEASE RERUN FSCK *****\n"); } if ((exitstat == 0) && (((mountedfs != M_NOMNT) && !errorlocked) || hotroot)) { exitstat = EXROOTOKAY; } if ((exitstat == 0) && rerun) exitstat = EXFNDERRS; if (mountedfs != M_NOMNT) { if (!fsmodified) return; /* * _FIOFFS is much more effective than a simple sync(). * Note that the original fswritefd was discarded in * ckfini(). */ fswritefd = open(devstr, O_RDWR, 0); if (fswritefd != -1) { (void) ioctl(fswritefd, _FIOFFS, NULL); (void) close(fswritefd); } if (!preen) (void) printf("\n***** REBOOT NOW *****\n"); exitstat = EXREBOOTNOW; } } /* * fsck -m: does the filesystem pass cursory examination * * XXX This is very redundant with setup(). The right thing would be * for setup() to modify its behaviour when mflag is set (less * chatty, exit instead of return, etc). */ void check_sanity(char *filename) { struct stat64 stbd, stbr; char *devname; struct ustat usb; char vfsfilename[MAXPATHLEN]; struct vfstab vfsbuf; FILE *vfstab; struct statvfs vfs_stat; int found_magic[MAGIC_LIMIT]; int magic_cnt; int is_magic = 0; int is_block; (void) memset((void *)found_magic, 0, sizeof (found_magic)); if (stat64(filename, &stbd) < 0) { (void) fprintf(stderr, "ufs fsck: sanity check failed : cannot stat %s\n", filename); exit(EXNOSTAT); } if ((stbd.st_mode & S_IFMT) == S_IFBLK) { is_block = 1; } else if ((stbd.st_mode & S_IFMT) == S_IFCHR) { is_block = 0; } else { /* * In !mflag mode, we allow checking the contents * of a file. Since this is intended primarily for * speeding up boot-time checks and allowing for a * file complicates the ok-input tests, we'll disallow * that option. */ (void) fprintf(stderr, "ufs fsck: sanity check failed: " "%s not block or character device\n", filename); exit(EXNOSTAT); } /* * Determine if this is the root file system via vfstab. Give up * silently on failures. The whole point of this is to be tolerant * of the magic file systems being already mounted. */ if ((vfstab = fopen(VFSTAB, "r")) != 0) { for (magic_cnt = 0; magic_cnt < MAGIC_LIMIT; magic_cnt++) { if (magic_cnt == MAGIC_NONE) continue; if (getvfsfile(vfstab, &vfsbuf, magic_fs[magic_cnt]) == 0) { if (is_block) devname = vfsbuf.vfs_special; else devname = vfsbuf.vfs_fsckdev; if (stat64(devname, &stbr) == 0) { if (stbr.st_rdev == stbd.st_rdev) { found_magic[magic_cnt] = 1; is_magic = magic_cnt; break; } } } } } /* * Only works if filename is a block device or if * character and block device has the same dev_t value. * This is currently true, but nothing really forces it. */ if (!is_magic && (ustat(stbd.st_rdev, &usb) == 0)) { (void) fprintf(stderr, "ufs fsck: sanity check: %s already mounted\n", filename); exit(EXMOUNTED); } if (is_magic) { (void) strcpy(vfsfilename, magic_fs[is_magic]); if (statvfs(vfsfilename, &vfs_stat) != 0) { (void) fprintf(stderr, "ufs fsck: Cannot stat %s\n", vfsfilename); exit(EXNOSTAT); } if (!(vfs_stat.f_flag & ST_RDONLY)) { /* * The file system is mounted read/write * We need to exit saying this. If it's only * mounted readonly, we can continue. */ (void) fprintf(stderr, "ufs fsck: sanity check:" "%s already mounted read/write\n", filename); exit(EXMOUNTED); } } /* * We know that at boot, the ufs root file system is mounted * read-only first. After fsck runs, it is remounted as * read-write. Therefore, we do not need to check for different * values for fs_state between the root file system and the * rest of the file systems. */ if (islog && !islogok) { (void) fprintf(stderr, "ufs fsck: sanity check: %s needs checking\n", filename); exit(EXUMNTCHK); } if ((sblock.fs_state + (long)sblock.fs_time == FSOKAY) && (sblock.fs_clean == FSCLEAN || sblock.fs_clean == FSSTABLE || (sblock.fs_clean == FSLOG && islog))) { (void) fprintf(stderr, "ufs fsck: sanity check: %s okay\n", filename); } else { (void) fprintf(stderr, "ufs fsck: sanity check: %s needs checking\n", filename); exit(EXUMNTCHK); } exit(EXOKAY); } caddr_t hasvfsopt(struct vfstab *vfs, char *opt) { struct mnttab mtab; if (vfs->vfs_mntopts == NULL) return (NULL); mtab.mnt_mntopts = vfs->vfs_mntopts; return (hasmntopt(&mtab, opt)); } void usage(void) { (void) fprintf(stderr, "ufs usage: fsck [-F ufs] [-m] [-n] [-V] [-v] [-y] " "[-o p,b=#,w,f] [special ....]\n"); exit(EXBADPARM); } /*ARGSUSED*/ static void report_limbo(const void *node, VISIT order, int level) { fsck_ino_t ino = *(fsck_ino_t *)node; if ((order == postorder) || (order == leaf)) { (void) printf(" Inode %d\n", ino); } }