/* * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 by Delphix. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that: (1) source distributions retain this entire copyright * notice and comment, and (2) distributions including binaries display * the following acknowledgement: ``This product includes software * developed by the University of California, Berkeley and its contributors'' * in the documentation or other materials provided with the distribution * and in all advertising materials mentioning features or use of this * software. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _KERNEL #include #undef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" caddr_t mount_point = NULL; static int64_t diskreads, totalreads; /* Disk cache statistics */ static int log_checksum(int32_t *, int32_t *, int); static void vdirerror(fsck_ino_t, caddr_t, va_list); static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t); static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t); static void vpwarn(caddr_t, va_list); static int getaline(FILE *, caddr_t, int); static struct bufarea *alloc_bufarea(void); static void rwerror(caddr_t, diskaddr_t, int rval); static void debugclean(void); static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t); static void freelogblk(daddr32_t); static void verrexit(caddr_t, va_list); static void vpfatal(caddr_t, va_list); static diskaddr_t get_device_size(int, caddr_t); static diskaddr_t brute_force_get_device_size(int); static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *, daddr32_t *, daddr32_t *, daddr32_t *); int ftypeok(struct dinode *dp) { switch (dp->di_mode & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: case IFSHAD: case IFATTRDIR: return (1); default: if (debug) (void) printf("bad file type 0%o\n", dp->di_mode); return (0); } } int acltypeok(struct dinode *dp) { if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT)) return (1); if (debug) (void) printf("bad file type for acl I=%d: 0%o\n", dp->di_shadow, dp->di_mode); return (0); } NOTE(PRINTFLIKE(1)) int reply(caddr_t fmt, ...) { va_list ap; char line[80]; if (preen) pfatal("INTERNAL ERROR: GOT TO reply() in preen mode"); if (mflag) { /* * We don't know what's going on, so don't potentially * make things worse by having errexit() write stuff * out to disk. */ (void) printf( "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", devname); exit(EXERRFATAL); } va_start(ap, fmt); (void) putchar('\n'); (void) vprintf(fmt, ap); (void) putchar('?'); (void) putchar(' '); va_end(ap); if (nflag || fswritefd < 0) { (void) printf(" no\n\n"); return (0); } if (yflag) { (void) printf(" yes\n\n"); return (1); } (void) fflush(stdout); if (getaline(stdin, line, sizeof (line)) == EOF) errexit("\n"); (void) printf("\n"); if (line[0] == 'y' || line[0] == 'Y') { return (1); } else { return (0); } } int getaline(FILE *fp, caddr_t loc, int maxlen) { int n; caddr_t p, lastloc; p = loc; lastloc = &p[maxlen-1]; while ((n = getc(fp)) != '\n') { if (n == EOF) return (EOF); if (!isspace(n) && p < lastloc) *p++ = (char)n; } *p = '\0'; /* LINTED pointer difference won't overflow */ return (p - loc); } /* * Malloc buffers and set up cache. */ void bufinit(void) { struct bufarea *bp; int bufcnt, i; caddr_t bufp; bufp = malloc((size_t)sblock.fs_bsize); if (bufp == NULL) goto nomem; initbarea(&cgblk); cgblk.b_un.b_buf = bufp; bufhead.b_next = bufhead.b_prev = &bufhead; bufcnt = MAXBUFSPACE / sblock.fs_bsize; if (bufcnt < MINBUFS) bufcnt = MINBUFS; for (i = 0; i < bufcnt; i++) { bp = (struct bufarea *)malloc(sizeof (struct bufarea)); if (bp == NULL) { if (i >= MINBUFS) goto noalloc; goto nomem; } bufp = malloc((size_t)sblock.fs_bsize); if (bufp == NULL) { free((void *)bp); if (i >= MINBUFS) goto noalloc; goto nomem; } initbarea(bp); bp->b_un.b_buf = bufp; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; } noalloc: bufhead.b_size = i; /* save number of buffers */ pbp = pdirbp = NULL; return; nomem: errexit("cannot allocate buffer pool\n"); /* NOTREACHED */ } /* * Undo a bufinit(). */ void unbufinit(void) { int cnt; struct bufarea *bp, *nbp; cnt = 0; for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) { cnt++; flush(fswritefd, bp); nbp = bp->b_prev; /* * We're discarding the entire chain, so this isn't * technically necessary. However, it doesn't hurt * and lint's data flow analysis is much happier * (this prevents it from thinking there's a chance * of our using memory elsewhere after it's been released). */ nbp->b_next = bp->b_next; bp->b_next->b_prev = nbp; free((void *)bp->b_un.b_buf); free((void *)bp); } if (bufhead.b_size != cnt) errexit("Panic: cache lost %d buffers\n", bufhead.b_size - cnt); } /* * Manage a cache of directory blocks. */ struct bufarea * getdatablk(daddr32_t blkno, size_t size) { struct bufarea *bp; for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) if (bp->b_bno == fsbtodb(&sblock, blkno)) { goto foundit; } for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) if ((bp->b_flags & B_INUSE) == 0) break; if (bp == &bufhead) { bp = alloc_bufarea(); if (bp == NULL) { errexit("deadlocked buffer pool\n"); /* NOTREACHED */ } } /* * We're at the same logical level as getblk(), so if there * are any errors, we'll let our caller handle them. */ diskreads++; (void) getblk(bp, blkno, size); foundit: totalreads++; bp->b_cnt++; /* * Move the buffer to head of linked list if it isn't * already there. */ if (bufhead.b_next != bp) { bp->b_prev->b_next = bp->b_next; bp->b_next->b_prev = bp->b_prev; bp->b_prev = &bufhead; bp->b_next = bufhead.b_next; bufhead.b_next->b_prev = bp; bufhead.b_next = bp; } bp->b_flags |= B_INUSE; return (bp); } void brelse(struct bufarea *bp) { bp->b_cnt--; if (bp->b_cnt == 0) { bp->b_flags &= ~B_INUSE; } } struct bufarea * getblk(struct bufarea *bp, daddr32_t blk, size_t size) { diskaddr_t dblk; dblk = fsbtodb(&sblock, blk); if (bp->b_bno == dblk) return (bp); flush(fswritefd, bp); bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size); bp->b_bno = dblk; bp->b_size = size; return (bp); } void flush(int fd, struct bufarea *bp) { int i, j; caddr_t sip; long size; if (!bp->b_dirty) return; /* * It's not our buf, so if there are errors, let whoever * acquired it deal with the actual problem. */ if (bp->b_errs != 0) pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno); bp->b_dirty = 0; bp->b_errs = 0; bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); if (bp != &sblk) { return; } /* * We're flushing the superblock, so make sure all the * ancillary bits go out as well. */ sip = (caddr_t)sblock.fs_u.fs_csp; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize; bwrite(fswritefd, sip, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size); sip += size; } } static void rwerror(caddr_t mesg, diskaddr_t blk, int rval) { int olderr = errno; if (!preen) (void) printf("\n"); if (rval == -1) pfatal("CANNOT %s: DISK BLOCK %lld: %s", mesg, blk, strerror(olderr)); else pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk); if (reply("CONTINUE") == 0) { exitstat = EXERRFATAL; errexit("Program terminated\n"); } } void ckfini(void) { int64_t percentage; if (fswritefd < 0) return; flush(fswritefd, &sblk); /* * Were we using a backup superblock? */ if (havesb && sblk.b_bno != SBOFF / dev_bsize) { if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) { sblk.b_bno = SBOFF / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } } flush(fswritefd, &cgblk); if (cgblk.b_un.b_buf != NULL) { free((void *)cgblk.b_un.b_buf); cgblk.b_un.b_buf = NULL; } unbufinit(); pbp = NULL; pdirbp = NULL; if (debug) { /* * Note that we only count cache-related reads. * Anything that called fsck_bread() or getblk() * directly are explicitly not cached, so they're not * included here. */ if (totalreads != 0) percentage = diskreads * 100 / totalreads; else percentage = 0; (void) printf("cache missed %lld of %lld reads (%lld%%)\n", (longlong_t)diskreads, (longlong_t)totalreads, (longlong_t)percentage); } (void) close(fsreadfd); (void) close(fswritefd); fsreadfd = -1; fswritefd = -1; } int fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size) { caddr_t cp; int i; int errs; offset_t offset = ldbtob(blk); offset_t addr; /* * In our universe, nothing exists before the superblock, so * just pretend it's always zeros. This is the complement of * bwrite()'s ignoring write requests into that space. */ if (blk < SBLOCK) { if (debug) (void) printf( "WARNING: fsck_bread() passed blkno < %d (%lld)\n", SBLOCK, (longlong_t)blk); (void) memset(buf, 0, (size_t)size); return (1); } if (llseek(fd, offset, SEEK_SET) < 0) { rwerror("SEEK", blk, -1); } if ((i = read(fd, buf, size)) == size) { return (0); } rwerror("READ", blk, i); if (llseek(fd, offset, SEEK_SET) < 0) { rwerror("SEEK", blk, -1); } errs = 0; (void) memset(buf, 0, (size_t)size); pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { addr = ldbtob(blk + i); if (llseek(fd, addr, SEEK_SET) < 0 || read(fd, cp, (int)secsize) < 0) { iscorrupt = 1; (void) printf(" %llu", blk + (u_longlong_t)i); errs++; } } (void) printf("\n"); return (errs); } void bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size) { int i; int n; caddr_t cp; offset_t offset = ldbtob(blk); offset_t addr; if (fd < 0) return; if (blk < SBLOCK) { if (debug) (void) printf( "WARNING: Attempt to write illegal blkno %lld on %s\n", (longlong_t)blk, devname); return; } if (llseek(fd, offset, SEEK_SET) < 0) { rwerror("SEEK", blk, -1); } if ((i = write(fd, buf, (int)size)) == size) { fsmodified = 1; return; } rwerror("WRITE", blk, i); if (llseek(fd, offset, SEEK_SET) < 0) { rwerror("SEEK", blk, -1); } pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { n = 0; addr = ldbtob(blk + i); if (llseek(fd, addr, SEEK_SET) < 0 || (n = write(fd, cp, DEV_BSIZE)) < 0) { iscorrupt = 1; (void) printf(" %llu", blk + (u_longlong_t)i); } else if (n > 0) { fsmodified = 1; } } (void) printf("\n"); } /* * Allocates the specified number of contiguous fragments. */ daddr32_t allocblk(int wantedfrags) { int block, leadfrag, tailfrag; daddr32_t selected; size_t size; struct bufarea *bp; /* * It's arguable whether we should just fail, or instead * error out here. Since we should only ever be asked for * a single fragment or an entire block (i.e., sblock.fs_frag), * we'll fail out because anything else means somebody * changed code without considering all of the ramifications. */ if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) { exitstat = EXERRFATAL; errexit("allocblk() asked for %d frags. " "Legal range is 1 to %d", wantedfrags, sblock.fs_frag); } /* * For each filesystem block, look at every possible starting * offset within the block such that we can get the number of * contiguous fragments that we need. This is a drastically * simplified version of the kernel's mapsearch() and alloc*(). * It's also correspondingly slower. */ for (block = 0; block < maxfsblock - sblock.fs_frag; block += sblock.fs_frag) { for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags; leadfrag++) { /* * Is first fragment of candidate run available? */ if (testbmap(block + leadfrag)) continue; /* * Are the rest of them available? */ for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++) if (testbmap(block + leadfrag + tailfrag)) break; if (tailfrag < wantedfrags) { /* * No, skip the known-unusable run. */ leadfrag += tailfrag; continue; } /* * Found what we need, so claim them. */ for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++) setbmap(block + leadfrag + tailfrag); n_blks += wantedfrags; size = wantedfrags * sblock.fs_fsize; selected = block + leadfrag; bp = getdatablk(selected, size); (void) memset((void *)bp->b_un.b_buf, 0, size); dirty(bp); brelse(bp); if (debug) (void) printf( "allocblk: selected %d (in block %d), frags %d, size %d\n", selected, selected % sblock.fs_bsize, wantedfrags, (int)size); return (selected); } } return (0); } /* * Free a previously allocated block */ void freeblk(fsck_ino_t ino, daddr32_t blkno, int frags) { struct inodesc idesc; if (debug) (void) printf("debug: freeing %d fragments starting at %d\n", frags, blkno); init_inodesc(&idesc); idesc.id_number = ino; idesc.id_blkno = blkno; idesc.id_numfrags = frags; idesc.id_truncto = -1; /* * Nothing in the return status has any relevance to how * we're using pass4check(), so just ignore it. */ (void) pass4check(&idesc); } /* * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes * that the given buffer is at least MAXPATHLEN + 1 characters. */ void getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino) { int len; caddr_t cp; struct dinode *dp; struct inodesc idesc; struct inoinfo *inp; if (debug) (void) printf("debug: getpathname(curdir %d, ino %d)\n", curdir, ino); if ((curdir == 0) || (!INO_IS_DVALID(curdir))) { (void) strcpy(namebuf, "?"); return; } if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) { (void) strcpy(namebuf, "/"); return; } init_inodesc(&idesc); idesc.id_type = DATA; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; /* * In the case of extended attributes, our * parent won't necessarily be a directory, so just * return what we've found with a prefix indicating * that it's an XATTR. Presumably our caller will * know what's going on and do something useful, like * work out the path of the parent and then combine * the two names. * * Can't use strcpy(), etc, because we've probably * already got some name information in the buffer and * the usual trailing \0 would lose it. */ dp = ginode(curdir); if ((dp->di_mode & IFMT) == IFATTRDIR) { idesc.id_number = curdir; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; idesc.id_fix = NOFIX; if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) { *cp-- = '?'; } len = sizeof (XATTR_DIR_NAME) - 1; cp -= len; (void) memmove(cp, XATTR_DIR_NAME, len); goto attrname; } /* * If curdir == ino, need to get a handle on .. so we * can search it for ino's name. Otherwise, just search * the given directory for ino. Repeat until out of space * or a full path has been built. */ if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != UFSROOTINO && ino != 0) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = ".."; idesc.id_fix = NOFIX; if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) { inp = getinoinfo(ino); if ((inp == NULL) || (inp->i_parent == 0)) { break; } idesc.id_parent = inp->i_parent; } /* * To get this far, id_parent must have the inode * number for `..' in it. By definition, that's got * to be a directory, so search it for the inode of * interest. */ namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; idesc.id_fix = NOFIX; if ((ckinode(ginode(idesc.id_number), &idesc, CKI_TRAVERSE) & FOUND) == 0) { break; } /* * Prepend to what we've accumulated so far. If * there's not enough room for even one more path element * (of the worst-case length), then bail out. */ len = strlen(namebuf); cp -= len; if (cp < &namebuf[MAXNAMLEN]) break; (void) memmove(cp, namebuf, len); *--cp = '/'; /* * Corner case for a looped-to-itself directory. */ if (ino == idesc.id_number) break; /* * Climb one level of the hierarchy. In other words, * the current .. becomes the inode to search for and * its parent becomes the directory to search in. */ ino = idesc.id_number; } /* * If we hit a discontinuity in the hierarchy, indicate it by * prefixing the path so far with `?'. Otherwise, the first * character will be `/' as a side-effect of the *--cp above. * * The special case is to handle the situation where we're * trying to look something up in UFSROOTINO, but didn't find * it. */ if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) { if (cp > namebuf) cp--; *cp = '?'; } /* * The invariants being used for buffer integrity are: * - namebuf[] is terminated with \0 before anything else * - cp is always <= the last element of namebuf[] * - the new path element is always stored at the * beginning of namebuf[], and is no more than MAXNAMLEN-1 * characters * - cp is is decremented by the number of characters in * the new path element * - if, after the above accounting for the new element's * size, there is no longer enough room at the beginning of * namebuf[] for a full-sized path element and a slash, * terminate the loop. cp is in the range * &namebuf[0]..&namebuf[MAXNAMLEN - 1] */ attrname: /* LINTED per the above discussion */ (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp); } /* ARGSUSED */ void catch(int dummy) { ckfini(); exit(EXSIGNAL); } /* * When preening, allow a single quit to signal * a special exit after filesystem checks complete * so that reboot sequence may be interrupted. */ /* ARGSUSED */ void catchquit(int dummy) { (void) printf("returning to single-user after filesystem check\n"); interrupted = 1; (void) signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ NOTE(PRINTFLIKE(2)) int dofix(struct inodesc *idesc, caddr_t msg, ...) { int rval = 0; va_list ap; va_start(ap, msg); switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) vdirerror(idesc->id_number, msg, ap); else vpwarn(msg, ap); if (preen) { idesc->id_fix = FIX; rval = ALTERED; break; } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; break; } idesc->id_fix = FIX; rval = ALTERED; break; case FIX: rval = ALTERED; break; case NOFIX: break; default: errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix); } va_end(ap); return (rval); } NOTE(PRINTFLIKE(1)) void errexit(caddr_t fmt, ...) { va_list ap; va_start(ap, fmt); verrexit(fmt, ap); /* NOTREACHED */ } NOTE(PRINTFLIKE(1)) static void verrexit(caddr_t fmt, va_list ap) { static int recursing = 0; if (!recursing) { recursing = 1; if (errorlocked || iscorrupt) { if (havesb && fswritefd >= 0) { sblock.fs_clean = FSBAD; sblock.fs_state = FSOKAY - (long)sblock.fs_time; sblock.fs_state = -sblock.fs_state; sbdirty(); write_altsb(fswritefd); flush(fswritefd, &sblk); } } ckfini(); recursing = 0; } (void) vprintf(fmt, ap); if (fmt[strlen(fmt) - 1] != '\n') (void) putchar('\n'); exit((exitstat != 0) ? exitstat : EXERRFATAL); } /* * An unexpected inconsistency occured. * Die if preening, otherwise just print message and continue. */ NOTE(PRINTFLIKE(1)) void pfatal(caddr_t fmt, ...) { va_list ap; va_start(ap, fmt); vpfatal(fmt, ap); va_end(ap); } NOTE(PRINTFLIKE(1)) static void vpfatal(caddr_t fmt, va_list ap) { if (preen) { if (*fmt != '\0') { (void) printf("%s: ", devname); (void) vprintf(fmt, ap); (void) printf("\n"); } (void) printf( "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", devname); if (havesb && fswritefd >= 0) { sblock.fs_clean = FSBAD; sblock.fs_state = -(FSOKAY - (long)sblock.fs_time); sbdirty(); flush(fswritefd, &sblk); } /* * We're exiting, it doesn't really matter that our * caller doesn't get to call va_end(). */ if (exitstat == 0) exitstat = EXFNDERRS; exit(exitstat); } if (*fmt != '\0') { (void) vprintf(fmt, ap); } } /* * Pwarn just prints a message when not preening, * or a warning (preceded by filename) when preening. */ NOTE(PRINTFLIKE(1)) void pwarn(caddr_t fmt, ...) { va_list ap; va_start(ap, fmt); vpwarn(fmt, ap); va_end(ap); } NOTE(PRINTFLIKE(1)) static void vpwarn(caddr_t fmt, va_list ap) { if (*fmt != '\0') { if (preen) (void) printf("%s: ", devname); (void) vprintf(fmt, ap); } } /* * Like sprintf(), except the buffer is dynamically allocated * and returned, instead of being passed in. A pointer to the * buffer is stored in *RET, and FMT is the usual format string. * The number of characters in *RET (excluding the trailing \0, * to be consistent with the other *printf() routines) is returned. * * Solaris doesn't have asprintf(3C) yet, unfortunately. */ NOTE(PRINTFLIKE(2)) int fsck_asprintf(caddr_t *ret, caddr_t fmt, ...) { int len; caddr_t buffer; va_list ap; va_start(ap, fmt); len = vsnprintf(NULL, 0, fmt, ap); va_end(ap); buffer = malloc((len + 1) * sizeof (char)); if (buffer == NULL) { errexit("Out of memory in asprintf\n"); /* NOTREACHED */ } va_start(ap, fmt); (void) vsnprintf(buffer, len + 1, fmt, ap); va_end(ap); *ret = buffer; return (len); } /* * So we can take advantage of kernel routines in ufs_subr.c. */ /* PRINTFLIKE2 */ void cmn_err(int level, caddr_t fmt, ...) { va_list ap; va_start(ap, fmt); if (level == CE_PANIC) { (void) printf("INTERNAL INCONSISTENCY:"); verrexit(fmt, ap); } else { (void) vprintf(fmt, ap); } va_end(ap); } /* * Check to see if unraw version of name is already mounted. * Updates devstr with the device name if devstr is not NULL * and str_size is positive. */ int mounted(caddr_t name, caddr_t devstr, size_t str_size) { int found; struct mnttab *mntent; mntent = search_mnttab(NULL, unrawname(name), devstr, str_size); if (mntent == NULL) return (M_NOMNT); /* * It's mounted. With or without write access? */ if (hasmntopt(mntent, MNTOPT_RO) != 0) found = M_RO; /* mounted as RO */ else found = M_RW; /* mounted as R/W */ if (mount_point == NULL) { mount_point = strdup(mntent->mnt_mountp); if (mount_point == NULL) { errexit("fsck: memory allocation failure: %s", strerror(errno)); /* NOTREACHED */ } if (devstr != NULL && str_size > 0) (void) strlcpy(devstr, mntent->mnt_special, str_size); } return (found); } /* * Check to see if name corresponds to an entry in vfstab, and that the entry * does not have option ro. */ int writable(caddr_t name) { int rw = 1; struct vfstab vfsbuf, vfskey; FILE *vfstab; vfstab = fopen(VFSTAB, "r"); if (vfstab == NULL) { (void) printf("can't open %s\n", VFSTAB); return (1); } (void) memset((void *)&vfskey, 0, sizeof (vfskey)); vfsnull(&vfskey); vfskey.vfs_special = unrawname(name); vfskey.vfs_fstype = MNTTYPE_UFS; if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) && (hasvfsopt(&vfsbuf, MNTOPT_RO))) { rw = 0; } (void) fclose(vfstab); return (rw); } /* * debugclean */ static void debugclean(void) { if (!debug) return; if ((iscorrupt == 0) && (isdirty == 0)) return; if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) || (sblock.fs_clean == FSLOG && islog && islogok) || ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked)) return; (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n", sblock.fs_clean == FSSTABLE ? "stable" : sblock.fs_clean == FSLOG ? "logging" : sblock.fs_clean == FSFIX ? "being fixed" : "clean", devname); } /* * updateclean * Carefully and transparently update the clean flag. * * `iscorrupt' has to be in its final state before this is called. */ int updateclean(void) { int freedlog = 0; struct bufarea cleanbuf; size_t size; ssize_t io_res; diskaddr_t bno; char fsclean; int fsreclaim; char fsflags; int flags_ok = 1; daddr32_t fslogbno; offset_t sblkoff; time_t t; /* * debug stuff */ debugclean(); /* * set fsclean to its appropriate value */ fslogbno = sblock.fs_logbno; fsclean = sblock.fs_clean; fsreclaim = sblock.fs_reclaim; fsflags = sblock.fs_flags; if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) { fsclean = FSACTIVE; } /* * If ufs log is not okay, note that we need to clear it. */ examinelog(NULL); if (fslogbno && !(islog && islogok)) { fsclean = FSACTIVE; fslogbno = 0; } /* * if necessary, update fs_clean and fs_state */ switch (fsclean) { case FSACTIVE: if (!iscorrupt) { fsclean = FSSTABLE; fsreclaim = 0; } break; case FSCLEAN: case FSSTABLE: if (iscorrupt) { fsclean = FSACTIVE; } else { fsreclaim = 0; } break; case FSLOG: if (iscorrupt) { fsclean = FSACTIVE; } else if (!islog || fslogbno == 0) { fsclean = FSSTABLE; fsreclaim = 0; } else if (fflag) { fsreclaim = 0; } break; case FSFIX: fsclean = FSBAD; if (errorlocked && !iscorrupt) { fsclean = islog ? FSLOG : FSCLEAN; } break; default: if (iscorrupt) { fsclean = FSACTIVE; } else { fsclean = FSSTABLE; fsreclaim = 0; } } if (largefile_count > 0) fsflags |= FSLARGEFILES; else fsflags &= ~FSLARGEFILES; /* * There can be two discrepencies here. A) The superblock * shows no largefiles but we found some while scanning. * B) The superblock indicates the presence of largefiles, * but none are present. Note that if preening, the superblock * is silently corrected. */ if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) || (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES)) flags_ok = 0; if (debug) (void) printf( "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n", largefile_count, sblock.fs_flags, flags_ok); /* * If fs is unchanged, do nothing. */ if ((!isdirty) && (flags_ok) && (fslogbno == sblock.fs_logbno) && (sblock.fs_clean == fsclean) && (sblock.fs_reclaim == fsreclaim) && (FSOKAY == (sblock.fs_state + sblock.fs_time))) { if (errorlocked) { if (!do_errorlock(LOCKFS_ULOCK)) pwarn( "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n"); } return (freedlog); } /* * if user allows, update superblock state */ if (debug) { (void) printf( "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", sblock.fs_flags, sblock.fs_logbno, sblock.fs_clean, sblock.fs_reclaim, sblock.fs_state + sblock.fs_time); (void) printf( "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", fsflags, fslogbno, fsclean, fsreclaim, FSOKAY); } if (!isdirty && !preen && !rerun && (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0)) return (freedlog); (void) time(&t); sblock.fs_time = (time32_t)t; if (debug) printclean(); if (sblock.fs_logbno != fslogbno) { examinelog(&freelogblk); freedlog++; } sblock.fs_logbno = fslogbno; sblock.fs_clean = fsclean; sblock.fs_state = FSOKAY - (long)sblock.fs_time; sblock.fs_reclaim = fsreclaim; sblock.fs_flags = fsflags; /* * if superblock can't be written, return */ if (fswritefd < 0) return (freedlog); /* * Read private copy of superblock, update clean flag, and write it. */ bno = sblk.b_bno; size = sblk.b_size; sblkoff = ldbtob(bno); if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL) errexit("out of memory"); if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) { (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", (longlong_t)bno, strerror(errno)); goto out; } if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) { report_io_prob("READ FROM", bno, size, io_res); goto out; } cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno; cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean; cleanbuf.b_un.b_fs->fs_state = sblock.fs_state; cleanbuf.b_un.b_fs->fs_time = sblock.fs_time; cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim; cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags; if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) { (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", (longlong_t)bno, strerror(errno)); goto out; } if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) { report_io_prob("WRITE TO", bno, size, io_res); goto out; } /* * 1208040 * If we had to use -b to grab an alternate superblock, then we * likely had to do so because of unacceptable differences between * the main and alternate superblocks. So, we had better update * the alternate superblock as well, or we'll just fail again * the next time we attempt to run fsck! */ if (bflag != 0) { write_altsb(fswritefd); } if (errorlocked) { if (!do_errorlock(LOCKFS_ULOCK)) pwarn( "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n"); } out: if (cleanbuf.b_un.b_buf != NULL) { free((void *)cleanbuf.b_un.b_buf); } return (freedlog); } static void report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure) { if (failure < 0) (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n", what, (int)bno, strerror(errno)); else if (failure == 0) (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n", what, (int)bno); else (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n", what, (int)bno, (unsigned)failure, (unsigned)expected); } /* * print out clean info */ void printclean(void) { caddr_t s; if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) s = "unknown"; else switch (sblock.fs_clean) { case FSACTIVE: s = "active"; break; case FSCLEAN: s = "clean"; break; case FSSTABLE: s = "stable"; break; case FSLOG: s = "logging"; break; case FSBAD: s = "is bad"; break; case FSFIX: s = "being fixed"; break; default: s = "unknown"; } if (preen) pwarn("is %s.\n", s); else (void) printf("** %s is %s.\n", devname, s); } int is_errorlocked(caddr_t fs) { int retval; struct stat64 statb; caddr_t mountp; struct mnttab *mntent; retval = 0; if (!fs) return (0); if (stat64(fs, &statb) < 0) return (0); if (S_ISDIR(statb.st_mode)) { mountp = fs; } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) { mntent = search_mnttab(NULL, fs, NULL, 0); if (mntent == NULL) return (0); mountp = mntent->mnt_mountp; if (mountp == NULL) /* theoretically a can't-happen */ return (0); } else { return (0); } /* * From here on, must `goto out' to avoid memory leakage. */ if (elock_combuf == NULL) elock_combuf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char)); else elock_combuf = (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN); if (elock_combuf == NULL) goto out; (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN); if (elock_mountp != NULL) { free(elock_mountp); } elock_mountp = strdup(mountp); if (elock_mountp == NULL) goto out; if (mountfd < 0) { if ((mountfd = open64(mountp, O_RDONLY)) == -1) goto out; } if (lfp == NULL) { lfp = (struct lockfs *)malloc(sizeof (struct lockfs)); if (lfp == NULL) goto out; (void) memset((void *)lfp, 0, sizeof (struct lockfs)); } lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; lfp->lf_comment = elock_combuf; if (ioctl(mountfd, _FIOLFSS, lfp) == -1) goto out; /* * lint believes that the ioctl() (or any other function * taking lfp as an arg) could free lfp. This is not the * case, however. */ retval = LOCKFS_IS_ELOCK(lfp); out: return (retval); } /* * Given a name which is known to be a directory, see if it appears * in the vfstab. If so, return the entry's block (special) device * field via devstr. */ int check_vfstab(caddr_t name, caddr_t devstr, size_t str_size) { return (NULL != search_vfstab(name, NULL, devstr, str_size)); } /* * Given a name which is known to be a directory, see if it appears * in the mnttab. If so, return the entry's block (special) device * field via devstr. */ int check_mnttab(caddr_t name, caddr_t devstr, size_t str_size) { return (NULL != search_mnttab(name, NULL, devstr, str_size)); } /* * Search for mount point and/or special device in the given file. * The first matching entry is returned. * * If an entry is found and str_size is greater than zero, then * up to size_str bytes of the special device name from the entry * are copied to devstr. */ #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \ st_nuller, st_init, st_searcher) \ { \ FILE *fp; \ struct st_type *retval = NULL; \ struct st_type key; \ static struct st_type buffer; \ \ /* LINTED ``assigned value never used'' */ \ st_nuller(&key); \ key.st_mount = mountp; \ key.st_special = special; \ st_init; \ \ if ((fp = fopen(st_file, "r")) == NULL) \ return (NULL); \ \ if (st_searcher(fp, &buffer, &key) == 0) { \ retval = &buffer; \ if (devstr != NULL && str_size > 0 && \ buffer.st_special != NULL) { \ (void) strlcpy(devstr, buffer.st_special, \ str_size); \ } \ } \ (void) fclose(fp); \ return (retval); \ } static struct vfstab * search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull, (retval = retval), getvfsany) static struct mnttab * search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull, (key.mnt_fstype = MNTTYPE_UFS), getmntany) int do_errorlock(int lock_type) { caddr_t buf; time_t now; struct tm *local; int rc; if (elock_combuf == NULL) errexit("do_errorlock(%s, %d): unallocated elock_combuf\n", elock_mountp ? elock_mountp : "", lock_type); if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) == NULL) { errexit("Couldn't alloc memory for temp. lock status buffer\n"); } if (lfp == NULL) { errexit("do_errorlock(%s, %d): lockfs status unallocated\n", elock_mountp, lock_type); } (void) memmove((void *)buf, (void *)elock_combuf, LOCKFS_MAXCOMMENTLEN-1); switch (lock_type) { case LOCKFS_ELOCK: /* * Note that if it is error-locked, we won't get an * error back if we try to error-lock it again. */ if (time(&now) != (time_t)-1) { if ((local = localtime(&now)) != NULL) (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d", elock_combuf, (int)pid, local->tm_mon + 1, local->tm_mday, (local->tm_year % 100), local->tm_hour, local->tm_min, local->tm_sec); else (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, "%s [fsck pid %d", elock_combuf, pid); } else { (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, "%s [fsck pid %d", elock_combuf, pid); } break; case LOCKFS_ULOCK: if (time(&now) != (time_t)-1) { if ((local = localtime(&now)) != NULL) { (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, "%s, done:%02d/%02d/%02d %02d:%02d:%02d]", elock_combuf, local->tm_mon + 1, local->tm_mday, (local->tm_year % 100), local->tm_hour, local->tm_min, local->tm_sec); } else { (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, "%s]", elock_combuf); } } else { (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, "%s]", elock_combuf); } if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) { pwarn("do_errorlock: unlock failed: %s\n", strerror(errno)); goto out; } break; default: break; } (void) memmove((void *)elock_combuf, (void *)buf, LOCKFS_MAXCOMMENTLEN - 1); lfp->lf_lock = lock_type; lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; lfp->lf_comment = elock_combuf; lfp->lf_flags = 0; errno = 0; if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) { if (errno == EINVAL) { pwarn("Another fsck active?\n"); iscorrupt = 0; /* don't go away mad, just go away */ } else { pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n", lock_type, elock_combuf, strerror(errno)); } } out: if (buf != NULL) { free((void *)buf); } return (rc != -1); } /* * Shadow inode support. To register a shadow with a client is to note * that an inode (the client) refers to the shadow. */ static struct shadowclients * newshadowclient(struct shadowclients *prev) { struct shadowclients *rc; rc = (struct shadowclients *)malloc(sizeof (*rc)); if (rc == NULL) errexit("newshadowclient: cannot malloc shadow client"); rc->next = prev; rc->nclients = 0; rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) * maxshadowclients); if (rc->client == NULL) errexit("newshadowclient: cannot malloc client array"); return (rc); } void registershadowclient(fsck_ino_t shadow, fsck_ino_t client, struct shadowclientinfo **info) { struct shadowclientinfo *sci; struct shadowclients *scc; /* * Already have a record for this shadow? */ for (sci = *info; sci != NULL; sci = sci->next) if (sci->shadow == shadow) break; if (sci == NULL) { /* * It's a new shadow, add it to the list */ sci = (struct shadowclientinfo *)malloc(sizeof (*sci)); if (sci == NULL) errexit("registershadowclient: cannot malloc"); sci->next = *info; *info = sci; sci->shadow = shadow; sci->totalClients = 0; sci->clients = newshadowclient(NULL); } sci->totalClients++; scc = sci->clients; if (scc->nclients >= maxshadowclients) { scc = newshadowclient(sci->clients); sci->clients = scc; } scc->client[scc->nclients++] = client; } /* * Locate and discard a shadow. */ void clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info) { struct shadowclientinfo *sci, *prev; /* * Do we have a record for this shadow? */ prev = NULL; for (sci = *info; sci != NULL; sci = sci->next) { if (sci->shadow == shadow) break; prev = sci; } if (sci != NULL) { /* * First, pull it off the list, since we know there * shouldn't be any future references to this one. */ if (prev == NULL) *info = sci->next; else prev->next = sci->next; deshadow(sci, clearattrref); } } /* * Discard all memory used to track clients of a shadow. */ void deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t)) { struct shadowclients *clients, *discard; int idx; clients = sci->clients; while (clients != NULL) { discard = clients; clients = clients->next; if (discard->client != NULL) { if (cb != NULL) { for (idx = 0; idx < discard->nclients; idx++) (*cb)(discard->client[idx]); } free((void *)discard->client); } free((void *)discard); } free((void *)sci); } /* * Allocate more buffer as need arises but allocate one at a time. * This is done to make sure that fsck does not exit with error if it * needs more buffer to complete its task. */ static struct bufarea * alloc_bufarea(void) { struct bufarea *newbp; caddr_t bufp; bufp = malloc((unsigned int)sblock.fs_bsize); if (bufp == NULL) return (NULL); newbp = (struct bufarea *)malloc(sizeof (struct bufarea)); if (newbp == NULL) { free((void *)bufp); return (NULL); } initbarea(newbp); newbp->b_un.b_buf = bufp; newbp->b_prev = &bufhead; newbp->b_next = bufhead.b_next; bufhead.b_next->b_prev = newbp; bufhead.b_next = newbp; bufhead.b_size++; return (newbp); } /* * We length-limit in both unrawname() and rawname() to avoid * overflowing our arrays or those of our naive, trusting callers. */ caddr_t unrawname(caddr_t name) { caddr_t dp; static char fullname[MAXPATHLEN + 1]; if ((dp = getfullblkname(name)) == NULL) return (""); (void) strlcpy(fullname, dp, sizeof (fullname)); /* * Not reporting under debug, as the allocation isn't * reported by getfullblkname. The idea is that we * produce balanced alloc/free instances. */ free(dp); return (fullname); } caddr_t rawname(caddr_t name) { caddr_t dp; static char fullname[MAXPATHLEN + 1]; if ((dp = getfullrawname(name)) == NULL) return (""); (void) strlcpy(fullname, dp, sizeof (fullname)); /* * Not reporting under debug, as the allocation isn't * reported by getfullblkname. The idea is that we * produce balanced alloc/free instances. */ free(dp); return (fullname); } /* * Make sure that a cg header looks at least moderately reasonable. * We want to be able to trust the contents enough to be able to use * the standard accessor macros. So, besides looking at the obvious * such as the magic number, we verify that the offset field values * are properly aligned and not too big or small. * * Returns a NULL pointer if the cg is sane enough for our needs, else * a dynamically-allocated string describing all of its faults. */ #define Append_Error(full, full_len, addition, addition_len) \ if (full == NULL) { \ full = addition; \ full_len = addition_len; \ } else { \ /* lint doesn't think realloc() understands NULLs */ \ full = realloc(full, full_len + addition_len + 1); \ if (full == NULL) { \ errexit("Out of memory in cg_sanity"); \ /* NOTREACHED */ \ } \ (void) strcpy(full + full_len, addition); \ full_len += addition_len; \ free(addition); \ } caddr_t cg_sanity(struct cg *cgp, int cgno) { caddr_t full_err; caddr_t this_err = NULL; int full_len, this_len; daddr32_t ndblk; daddr32_t exp_btotoff, exp_boff, exp_iusedoff; daddr32_t exp_freeoff, exp_nextfreeoff; cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, &exp_freeoff, &exp_nextfreeoff, &ndblk); full_err = NULL; full_len = 0; if (!cg_chkmagic(cgp)) { this_len = fsck_asprintf(&this_err, "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n", cgp->cg_magic, CG_MAGIC); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_cgx != cgno) { this_len = fsck_asprintf(&this_err, "WRONG CG NUMBER (%d should be %d)\n", cgp->cg_cgx, cgno); Append_Error(full_err, full_len, this_err, this_len); } if ((cgp->cg_btotoff & 3) != 0) { this_len = fsck_asprintf(&this_err, "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n", cgp->cg_btotoff); Append_Error(full_err, full_len, this_err, this_len); } if ((cgp->cg_boff & 1) != 0) { this_len = fsck_asprintf(&this_err, "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n", cgp->cg_boff); Append_Error(full_err, full_len, this_err, this_len); } if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { if (cgp->cg_ncyl < 1) { this_len = fsck_asprintf(&this_err, "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n", cgp->cg_ncyl); } else { this_len = fsck_asprintf(&this_err, "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n", cgp->cg_ncyl, sblock.fs_cpg); } Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_niblk != sblock.fs_ipg) { this_len = fsck_asprintf(&this_err, "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n", cgp->cg_niblk, sblock.fs_ipg); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_ndblk != ndblk) { this_len = fsck_asprintf(&this_err, "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n", cgp->cg_ndblk, ndblk); Append_Error(full_err, full_len, this_err, this_len); } if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) { this_len = fsck_asprintf(&this_err, "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION " "(%d should be at least 0 and less than %d)\n", cgp->cg_rotor, ndblk); Append_Error(full_err, full_len, this_err, this_len); } if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) { this_len = fsck_asprintf(&this_err, "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION " "(%d should be at least 0 and less than %d)\n", cgp->cg_frotor, ndblk); Append_Error(full_err, full_len, this_err, this_len); } if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { this_len = fsck_asprintf(&this_err, "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION " "(%d should be at least 0 and less than %d)\n", cgp->cg_irotor, sblock.fs_ipg); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_btotoff != exp_btotoff) { this_len = fsck_asprintf(&this_err, "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n", cgp->cg_btotoff, exp_btotoff); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_boff != exp_boff) { this_len = fsck_asprintf(&this_err, "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n", cgp->cg_boff, exp_boff); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_iusedoff != exp_iusedoff) { this_len = fsck_asprintf(&this_err, "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n", cgp->cg_iusedoff, exp_iusedoff); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_freeoff != exp_freeoff) { this_len = fsck_asprintf(&this_err, "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n", cgp->cg_freeoff, exp_freeoff); Append_Error(full_err, full_len, this_err, this_len); } if (cgp->cg_nextfreeoff != exp_nextfreeoff) { this_len = fsck_asprintf(&this_err, "END OF HEADER POSITION INCORRECT (%d should be %d)\n", cgp->cg_nextfreeoff, exp_nextfreeoff); Append_Error(full_err, full_len, this_err, this_len); } return (full_err); } #undef Append_Error /* * This is taken from mkfs, and is what is used to come up with the * original values for a struct cg. This implies that, since these * are all constants, recalculating them now should give us the same * thing as what's on disk. */ static void cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff, daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff, daddr32_t *ndblk) { daddr32_t cbase, dmax; struct cg *cgp; (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno), (size_t)sblock.fs_cgsize); cgp = cgblk.b_un.b_cg; cbase = cgbase(&sblock, cgno); dmax = cbase + sblock.fs_fpg; if (dmax > sblock.fs_size) dmax = sblock.fs_size; /* LINTED pointer difference won't overflow */ *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link); *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t); *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t); *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY); *nextfreeoff = *freeoff + howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY); *ndblk = dmax - cbase; } /* * Corrects all fields in the cg that can be done with the available * redundant data. */ void fix_cg(struct cg *cgp, int cgno) { daddr32_t exp_btotoff, exp_boff, exp_iusedoff; daddr32_t exp_freeoff, exp_nextfreeoff; daddr32_t ndblk; cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, &exp_freeoff, &exp_nextfreeoff, &ndblk); if (cgp->cg_cgx != cgno) { cgp->cg_cgx = cgno; } if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { if (cgno == (sblock.fs_ncg - 1)) { cgp->cg_ncyl = sblock.fs_ncyl - (sblock.fs_cpg * cgno); } else { cgp->cg_ncyl = sblock.fs_cpg; } } if (cgp->cg_niblk != sblock.fs_ipg) { /* * This is not used by the kernel, so it's pretty * harmless if it's wrong. */ cgp->cg_niblk = sblock.fs_ipg; } if (cgp->cg_ndblk != ndblk) { cgp->cg_ndblk = ndblk; } /* * For the rotors, any position's valid, so pick the one we know * will always exist. */ if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) { cgp->cg_rotor = 0; } if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) { cgp->cg_frotor = 0; } if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { cgp->cg_irotor = 0; } /* * For btotoff and boff, if they're misaligned they won't * match the expected values, so we're catching both cases * here. Of course, if any of these are off, it seems likely * that the tables really won't be where we calculate they * should be anyway. */ if (cgp->cg_btotoff != exp_btotoff) { cgp->cg_btotoff = exp_btotoff; } if (cgp->cg_boff != exp_boff) { cgp->cg_boff = exp_boff; } if (cgp->cg_iusedoff != exp_iusedoff) { cgp->cg_iusedoff = exp_iusedoff; } if (cgp->cg_freeoff != exp_freeoff) { cgp->cg_freeoff = exp_freeoff; } if (cgp->cg_nextfreeoff != exp_nextfreeoff) { cgp->cg_nextfreeoff = exp_nextfreeoff; } /* * Reset the magic, as we've recreated this cg, also * update the cg_time, as we're writing out the cg */ cgp->cg_magic = CG_MAGIC; cgp->cg_time = time(NULL); /* * We know there was at least one correctable problem, * or else we wouldn't have been called. So instead of * marking the buffer dirty N times above, just do it * once here. */ cgdirty(); } void examinelog(void (*cb)(daddr32_t)) { struct bufarea *bp; extent_block_t *ebp; extent_t *ep; daddr32_t nfno, fno; int i; int j; /* * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags * we need to translate accordingly using logbtodb() */ if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) { if (debug) { (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \ "Aborting log examination\n", \ logbtodb(&sblock, sblock.fs_logbno), SBLOCK); } return; } /* * Read errors will return zeros, which will cause us * to do nothing harmful, so don't need to handle it. */ bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno), (size_t)sblock.fs_bsize); ebp = (void *)bp->b_un.b_buf; /* * Does it look like a log allocation table? */ /* LINTED pointer cast is aligned */ if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf, sblock.fs_bsize)) return; if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) return; ep = &ebp->extents[0]; for (i = 0; i < ebp->nextents; ++i, ++ep) { fno = logbtofrag(&sblock, ep->pbno); nfno = dbtofsb(&sblock, ep->nbno); for (j = 0; j < nfno; ++j, ++fno) { /* * Invoke the callback first, so that pass1 can * mark the log blocks in-use. Then, if any * subsequent pass over the log shows us that a * block got freed (say, it was also claimed by * an inode that we cleared), we can safely declare * the log bad. */ if (cb != NULL) (*cb)(fno); if (!testbmap(fno)) islogok = 0; } } brelse(bp); if (cb != NULL) { fno = logbtofrag(&sblock, sblock.fs_logbno); for (j = 0; j < sblock.fs_frag; ++j, ++fno) (*cb)(fno); } } static void freelogblk(daddr32_t frag) { freeblk(sblock.fs_logbno, frag, 1); } caddr_t file_id(fsck_ino_t inum, mode_t mode) { static char name[MAXPATHLEN + 1]; if (lfdir == inum) { return (lfname); } if ((mode & IFMT) == IFDIR) { (void) strcpy(name, "DIR"); } else if ((mode & IFMT) == IFATTRDIR) { (void) strcpy(name, "ATTR DIR"); } else if ((mode & IFMT) == IFSHAD) { (void) strcpy(name, "ACL"); } else { (void) strcpy(name, "FILE"); } return (name); } /* * Simple initializer for inodesc structures, so users of only a few * fields don't have to worry about getting the right defaults for * everything out. */ void init_inodesc(struct inodesc *idesc) { /* * Most fields should be zero, just hit the special cases. */ (void) memset((void *)idesc, 0, sizeof (struct inodesc)); idesc->id_fix = DONTKNOW; idesc->id_lbn = -1; idesc->id_truncto = -1; idesc->id_firsthole = -1; } /* * Compare routine for tsearch(C) to use on ino_t instances. */ int ino_t_cmp(const void *left, const void *right) { const fsck_ino_t lino = (const fsck_ino_t)left; const fsck_ino_t rino = (const fsck_ino_t)right; return (lino - rino); } int cgisdirty(void) { return (cgblk.b_dirty); } void cgflush(void) { flush(fswritefd, &cgblk); } void dirty(struct bufarea *bp) { if (fswritefd < 0) { /* * No one should call dirty() in read only mode. * But if one does, it's not fatal issue. Just warn them. */ pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n"); } else { (bp)->b_dirty = 1; isdirty = 1; } } void initbarea(struct bufarea *bp) { (bp)->b_dirty = 0; (bp)->b_bno = (diskaddr_t)-1LL; (bp)->b_flags = 0; (bp)->b_cnt = 0; (bp)->b_errs = 0; } /* * Partition-sizing routines adapted from ../newfs/newfs.c. * Needed because calcsb() needs to use mkfs to work out what the * superblock should be, and mkfs insists on being told how many * sectors to use. * * Error handling assumes we're never called while preening. * * XXX This should be extracted into a ../ufslib.{c,h}, * in the same spirit to ../../fslib.{c,h}. Once that is * done, both fsck and newfs should be modified to link * against it. */ static int label_type; #define LABEL_TYPE_VTOC 1 #define LABEL_TYPE_EFI 2 #define LABEL_TYPE_OTHER 3 #define MB (1024 * 1024) #define SECTORS_PER_TERABYTE (1LL << 31) #define FS_SIZE_UPPER_LIMIT 0x100000000000LL diskaddr_t getdisksize(caddr_t disk, int fd) { int rpm; struct dk_geom g; struct dk_cinfo ci; diskaddr_t actual_size; /* * get_device_size() determines the actual size of the * device, and also the disk's attributes, such as geometry. */ actual_size = get_device_size(fd, disk); if (label_type == LABEL_TYPE_VTOC) { if (ioctl(fd, DKIOCGGEOM, &g)) { pwarn("%s: Unable to read Disk geometry", disk); return (0); } if (sblock.fs_nsect == 0) sblock.fs_nsect = g.dkg_nsect; if (sblock.fs_ntrak == 0) sblock.fs_ntrak = g.dkg_nhead; if (sblock.fs_rps == 0) { rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm; sblock.fs_rps = rpm / 60; } } if (sblock.fs_bsize == 0) sblock.fs_bsize = MAXBSIZE; /* * Adjust maxcontig by the device's maxtransfer. If maxtransfer * information is not available, default to the min of a MB and * maxphys. */ if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) { sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE; if (sblock.fs_maxcontig < 0) { int gotit, maxphys; gotit = fsgetmaxphys(&maxphys, NULL); /* * If we cannot get the maxphys value, default * to ufs_maxmaxphys (MB). */ if (gotit) { sblock.fs_maxcontig = MIN(maxphys, MB); } else { sblock.fs_maxcontig = MB; } } sblock.fs_maxcontig /= sblock.fs_bsize; } return (actual_size); } /* * Figure out how big the partition we're dealing with is. */ static diskaddr_t get_device_size(int fd, caddr_t name) { struct extvtoc vtoc; struct dk_gpt *efi_vtoc; diskaddr_t slicesize = 0; int index = read_extvtoc(fd, &vtoc); if (index >= 0) { label_type = LABEL_TYPE_VTOC; } else { if (index == VT_ENOTSUP || index == VT_ERROR) { /* it might be an EFI label */ index = efi_alloc_and_read(fd, &efi_vtoc); if (index >= 0) label_type = LABEL_TYPE_EFI; } } if (index < 0) { /* * Since both attempts to read the label failed, we're * going to fall back to a brute force approach to * determining the device's size: see how far out we can * perform reads on the device. */ slicesize = brute_force_get_device_size(fd); if (slicesize == 0) { switch (index) { case VT_ERROR: pwarn("%s: %s\n", name, strerror(errno)); break; case VT_EIO: pwarn("%s: I/O error accessing VTOC", name); break; case VT_EINVAL: pwarn("%s: Invalid field in VTOC", name); break; default: pwarn("%s: unknown error %d accessing VTOC", name, index); break; } return (0); } else { label_type = LABEL_TYPE_OTHER; } } if (label_type == LABEL_TYPE_EFI) { slicesize = efi_vtoc->efi_parts[index].p_size; efi_free(efi_vtoc); } else if (label_type == LABEL_TYPE_VTOC) { slicesize = vtoc.v_part[index].p_size; } return (slicesize); } /* * brute_force_get_device_size * * Determine the size of the device by seeing how far we can * read. Doing an llseek( , , SEEK_END) would probably work * in most cases, but we've seen at least one third-party driver * which doesn't correctly support the SEEK_END option when the * the device is greater than a terabyte. */ static diskaddr_t brute_force_get_device_size(int fd) { diskaddr_t min_fail = 0; diskaddr_t max_succeed = 0; diskaddr_t cur_db_off; char buf[DEV_BSIZE]; /* * First, see if we can read the device at all, just to * eliminate errors that have nothing to do with the * device's size. */ if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) || ((read(fd, buf, DEV_BSIZE)) == -1)) return (0); /* can't determine size */ /* * Now, go sequentially through the multiples of 4TB * to find the first read that fails (this isn't strictly * the most efficient way to find the actual size if the * size really could be anything between 0 and 2**64 bytes. * We expect the sizes to be less than 16 TB for some time, * so why do a bunch of reads that are larger than that? * However, this algorithm *will* work for sizes of greater * than 16 TB. We're just not optimizing for those sizes.) */ /* * XXX lint uses 32-bit arithmetic for doing flow analysis. * We're using > 32-bit constants here. Therefore, its flow * analysis is wrong. For the time being, ignore complaints * from it about the body of the for() being unreached. */ for (cur_db_off = SECTORS_PER_TERABYTE * 4; (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT); cur_db_off += 4 * SECTORS_PER_TERABYTE) { if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), SEEK_SET) == -1) || (read(fd, buf, DEV_BSIZE) != DEV_BSIZE)) min_fail = cur_db_off; else max_succeed = cur_db_off; } /* * XXX Same lint flow analysis problem as above. */ if (min_fail == 0) return (0); /* * We now know that the size of the device is less than * min_fail and greater than or equal to max_succeed. Now * keep splitting the difference until the actual size in * sectors in known. We also know that the difference * between max_succeed and min_fail at this time is * 4 * SECTORS_PER_TERABYTE, which is a power of two, which * simplifies the math below. */ while (min_fail - max_succeed > 1) { cur_db_off = max_succeed + (min_fail - max_succeed)/2; if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), SEEK_SET)) == -1) || ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)) min_fail = cur_db_off; else max_succeed = cur_db_off; } /* the size is the last successfully read sector offset plus one */ return (max_succeed + 1); } static void vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap) { struct dinode *dp; char pathbuf[MAXPATHLEN + 1]; vpwarn(fmt, ap); (void) putchar(' '); pinode(ino); (void) printf("\n"); getpathname(pathbuf, cwd, ino); if (ino < UFSROOTINO || ino > maxino) { pfatal("NAME=%s\n", pathbuf); return; } dp = ginode(ino); if (ftypeok(dp)) pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf); else pfatal("NAME=%s\n", pathbuf); } void direrror(fsck_ino_t ino, caddr_t fmt, ...) { va_list ap; va_start(ap, fmt); vfileerror(ino, ino, fmt, ap); va_end(ap); } static void vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap) { vfileerror(ino, ino, fmt, ap); } void fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...) { va_list ap; va_start(ap, fmt); vfileerror(cwd, ino, fmt, ap); va_end(ap); } /* * Adds the given inode to the orphaned-directories list, limbo_dirs. * Assumes that the caller has set INCLEAR in the inode's statemap[] * entry. * * With INCLEAR set, the inode will get ignored by passes 2 and 3, * meaning it's effectively an orphan. It needs to be noted now, so * it will be remembered in pass 4. */ void add_orphan_dir(fsck_ino_t ino) { if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL) errexit("add_orphan_dir: out of memory"); } /* * Remove an inode from the orphaned-directories list, presumably * because it's been cleared. */ void remove_orphan_dir(fsck_ino_t ino) { (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp); } /* * log_setsum() and log_checksum() are equivalent to lufs.c:setsum() * and lufs.c:checksum(). */ static void log_setsum(int32_t *sp, int32_t *lp, int nb) { int32_t csum = 0; *sp = 0; nb /= sizeof (int32_t); while (nb--) csum += *lp++; *sp = csum; } static int log_checksum(int32_t *sp, int32_t *lp, int nb) { int32_t ssum = *sp; log_setsum(sp, lp, nb); if (ssum != *sp) { *sp = ssum; return (0); } return (1); }