/* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that: (1) source distributions retain this entire copyright * notice and comment, and (2) distributions including binaries display * the following acknowledgement: ``This product includes software * developed by the University of California, Berkeley and its contributors'' * in the documentation or other materials provided with the distribution * and in all advertising materials mentioning features or use of this * software. Neither the name of the University nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #define _KERNEL #include #undef _KERNEL #include #include "fsck.h" static int get_indir_offsets(int, daddr_t, int *, int *); static int clearanentry(struct inodesc *); static void pdinode(struct dinode *); static void inoflush(void); static void mark_delayed_inodes(fsck_ino_t, daddr32_t); static int iblock(struct inodesc *, int, u_offset_t, enum cki_action); static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t); static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action); static int lookup_dotdot_ino(fsck_ino_t); /* * ckinode() essentially traverses the blocklist of the provided * inode. For each block either the caller-supplied callback (id_func * in the provided struct inodesc) or dirscan() is invoked. Which is * chosen is controlled by what type of traversal was requested * (id_type) - if it was for an ADDR or ACL, use the callback, * otherwise it is assumed to be DATA (i.e., a directory) whose * contents need to be scanned. * * Note that a directory inode can get passed in with a type of ADDR; * the type field is orthogonal to the IFMT value. This is so that * the file aspects (no duplicate blocks, etc) of a directory can be * verified just like is done for any other file, or the actual * contents can be scanned so that connectivity and such can be * investigated. * * The traversal is controlled by flags in the return value of * dirscan() or the callback. Five flags are defined, STOP, SKIP, * KEEPON, ALTERED, and FOUND. Their semantics are: * * STOP - no further processing of this inode is desired/possible/ * feasible/etc. This can mean that whatever the scan * was searching for was found, or a serious * inconsistency was encountered, or anything else * appropriate. * * SKIP - something that made it impossible to continue was * encountered, and the caller should go on to the next * inode. This is more for i/o failures than for * logical inconsistencies. Nothing actually looks for * this. * * KEEPON - no more blocks of this inode need to be scanned, but * nothing's wrong, so keep on going with the next * inode. It is similar to STOP, except that * ckinode()'s caller will typically advance to the next * inode for KEEPON, whereas it ceases scanning through * the inodes completely for STOP. * * ALTERED - a change was made to the inode. If the caller sees * this set, it should make sure to flush out the * changes. Note that any data blocks read in by the * function need to be marked dirty by it directly; * flushing of those will happen automatically later. * * FOUND - whatever was being searched for was located. * Typically combined with STOP to avoid wasting time * doing additional looking. * * During a traversal, some state needs to be carried around. At the * least, the callback functions need to know what inode they're * working on, which logical block, and whether or not fixing problems * when they're encountered is desired. Rather than try to guess what * else might be needed (and thus end up passing way more arguments * than is reasonable), all the possibilities have been bundled in * struct inodesc. About half of the fields are specific to directory * traversals, and the rest are pretty much generic to any traversal. * * The general fields are: * * id_fix What to do when an error is found. Generally, this * is set to DONTKNOW before a traversal. If a * problem is encountered, it is changed to either FIX * or NOFIX by the dofix() query function. If id_fix * has already been set to FIX when dofix() is called, then * it includes the ALTERED flag (see above) in its return * value; the net effect is that the inode's buffer * will get marked dirty and written to disk at some * point. If id_fix is DONTKNOW, then dofix() will * query the user. If it is NOFIX, then dofix() * essentially does nothing. A few routines set NOFIX * as the initial value, as they are performing a best- * effort informational task, rather than an actual * repair operation. * * id_func This is the function that will be called for every * logical block in the file (assuming id_type is not * DATA). The logical block may represent a hole, so * the callback needs to be prepared to handle that * case. Its return value is a combination of the flags * described above (SKIP, ALTERED, etc). * * id_number The inode number whose block list or data is being * scanned. * * id_parent When id_type is DATA, this is the inode number for * the parent of id_number. Otherwise, it is * available for use as an extra parameter or return * value between the callback and ckinode()'s caller. * Which, if either, of those is left completely up to * the two routines involved, so nothing can generally * be assumed about the id_parent value for non-DATA * traversals. * * id_lbn This is the current logical block (not fragment) * number being visited by the traversal. * * id_blkno This is the physical block corresponding to id_lbn. * * id_numfrags This defines how large a block is being processed in * this particular invocation of the callback. * Usually, it will be the same as sblock.fs_frag. * However, if a direct block is being processed and * it is less than a full filesystem block, * id_numfrags will indicate just how many fragments * (starting from id_lbn) are actually part of the * file. * * id_truncto The pass 4 callback is used in several places to * free the blocks of a file (the `FILE HAS PROBLEM * FOO; CLEAR?' scenario). This has been generalized * to allow truncating a file to a particular length * rather than always completely discarding it. If * id_truncto is -1, then the entire file is released, * otherwise it is logical block number to truncate * to. This generalized interface was motivated by a * desire to be able to discard everything after a * hole in a directory, rather than the entire * directory. * * id_type Selects the type of traversal. DATA for dirscan(), * ADDR or ACL for using the provided callback. * * There are several more fields used just for dirscan() traversals: * * id_filesize The number of bytes in the overall directory left to * process. * * id_loc Byte position within the directory block. Should always * point to the start of a directory entry. * * id_entryno Which logical directory entry is being processed (0 * is `.', 1 is `..', 2 and on are normal entries). * This field is primarily used to enable special * checks when looking at the first two entries. * * The exception (there's always an exception in fsck) * is that in pass 1, it tracks how many fragments are * being used by a particular inode. * * id_firsthole The first logical block number that was found to * be zero. As directories are not supposed to have * holes, this marks where a directory should be * truncated down to. A value of -1 indicates that * no holes were found. * * id_dirp A pointer to the in-memory copy of the current * directory entry (as identified by id_loc). * * id_name This is a directory entry name to either create * (callback is mkentry) or locate (callback is * chgino, findino, or findname). */ int ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action) { struct inodesc cleardesc; mode_t mode; if (idesc->id_filesize == 0) idesc->id_filesize = (offset_t)dp->di_size; /* * Our caller should be filtering out completely-free inodes * (mode == zero), so we'll work on the assumption that what * we're given has some basic validity. * * The kernel is inconsistent about MAXPATHLEN including the * trailing \0, so allow the more-generous length for symlinks. */ mode = dp->di_mode & IFMT; if (mode == IFBLK || mode == IFCHR) return (KEEPON); if (mode == IFLNK && dp->di_size > MAXPATHLEN) { pwarn("I=%d Symlink longer than supported maximum", idesc->id_number); init_inodesc(&cleardesc); cleardesc.id_type = ADDR; cleardesc.id_number = idesc->id_number; cleardesc.id_fix = DONTKNOW; clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT); return (STOP); } return (ckinode_common(dp, idesc, action)); } /* * This was split out from ckinode() to allow it to be used * without having to pass in kludge flags to suppress the * wrong-for-deletion initialization and irrelevant checks. * This feature is no longer needed, but is being kept in case * the need comes back. */ static int ckinode_common(struct dinode *dp, struct inodesc *idesc, enum cki_action action) { offset_t offset; struct dinode dino; daddr_t ndb; int indir_data_blks, last_indir_blk; int ret, i, frags; (void) memmove(&dino, dp, sizeof (struct dinode)); ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize); for (i = 0; i < NDADDR; i++) { idesc->id_lbn++; offset = blkoff(&sblock, dino.di_size); if ((--ndb == 0) && (offset != 0)) { idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); } else { idesc->id_numfrags = sblock.fs_frag; } if (dino.di_db[i] == 0) { if ((ndb > 0) && (idesc->id_firsthole < 0)) { idesc->id_firsthole = i; } continue; } idesc->id_blkno = dino.di_db[i]; if (idesc->id_type == ADDR || idesc->id_type == ACL) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); /* * Need to clear the entry, now that we're done with * it. We depend on freeblk() ignoring a request to * free already-free fragments to handle the problem of * a partial block. */ if ((action == CKI_TRUNCATE) && (idesc->id_truncto >= 0) && (idesc->id_lbn >= idesc->id_truncto)) { dp = ginode(idesc->id_number); /* * The (int) cast is safe, in that if di_size won't * fit, it'll be a multiple of any legal fs_frag, * thus giving a zero result. That value, in turn * means we're doing an entire block. */ frags = howmany((int)dp->di_size, sblock.fs_fsize) % sblock.fs_frag; if (frags == 0) frags = sblock.fs_frag; freeblk(idesc->id_number, dp->di_db[i], frags); dp = ginode(idesc->id_number); dp->di_db[i] = 0; inodirty(); ret |= ALTERED; } if (ret & STOP) return (ret); } #ifdef lint /* * Cure a lint complaint of ``possible use before set''. * Apparently it can't quite figure out the switch statement. */ indir_data_blks = 0; #endif /* * indir_data_blks contains the number of data blocks in all * the previous levels for this iteration. E.g., for the * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth * of blocks have already been covered by the direct blocks * (di_db[]). At the triple indirect level (i = NIADDR - 1), * it is all of the number of data blocks that were covered * by the second indirect, single indirect, and direct block * levels. */ idesc->id_numfrags = sblock.fs_frag; ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize); for (i = 0; i < NIADDR; i++) { (void) get_indir_offsets(i, ndb, &indir_data_blks, &last_indir_blk); if (dino.di_ib[i] != 0) { /* * We'll only clear di_ib[i] if the first entry (and * therefore all of them) is to be cleared, since we * only go through this code on the first entry of * each level of indirection. The +1 is to account * for the fact that we don't modify id_lbn until * we actually start processing on a data block. */ idesc->id_blkno = dino.di_ib[i]; ret = iblock(idesc, i + 1, (u_offset_t)howmany(dino.di_size, (u_offset_t)sblock.fs_bsize) - indir_data_blks, action); if ((action == CKI_TRUNCATE) && (idesc->id_truncto <= indir_data_blks) && ((idesc->id_lbn + 1) >= indir_data_blks) && ((idesc->id_lbn + 1) <= last_indir_blk)) { dp = ginode(idesc->id_number); if (dp->di_ib[i] != 0) { freeblk(idesc->id_number, dp->di_ib[i], sblock.fs_frag); } } if (ret & STOP) return (ret); } else { /* * Need to know which of the file's logical blocks * reside in the missing indirect block. However, the * precise location is only needed for truncating * directories, and level-of-indirection precision is * sufficient for that. */ if ((indir_data_blks < ndb) && (idesc->id_firsthole < 0)) { idesc->id_firsthole = indir_data_blks; } } } return (KEEPON); } static int get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks, int *last_blk) { int ndb_ilevel = -1; int ilevel; int dblks, lblk; for (ilevel = 0; ilevel < NIADDR; ilevel++) { switch (ilevel) { case 0: /* SINGLE */ dblks = NDADDR; lblk = dblks + NINDIR(&sblock) - 1; break; case 1: /* DOUBLE */ dblks = NDADDR + NINDIR(&sblock); lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1; break; case 2: /* TRIPLE */ dblks = NDADDR + NINDIR(&sblock) + (NINDIR(&sblock) * NINDIR(&sblock)); lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) * NINDIR(&sblock)) - 1; break; default: exitstat = EXERRFATAL; /* * Translate from zero-based array to * one-based human-style counting. */ errexit("panic: indirection level %d not 1, 2, or 3", ilevel + 1); /* NOTREACHED */ } if (dblks < ndb && ndb <= lblk) ndb_ilevel = ilevel; if (ilevel == ilevel_wanted) { if (data_blks != NULL) *data_blks = dblks; if (last_blk != NULL) *last_blk = lblk; } } return (ndb_ilevel); } static int iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks, enum cki_action action) { struct bufarea *bp; int i, n; int (*func)(struct inodesc *) = NULL; u_offset_t fsbperindirb; daddr32_t last_lbn; int nif; char buf[BUFSIZ]; n = KEEPON; switch (idesc->id_type) { case ADDR: func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); break; case ACL: func = idesc->id_func; break; case DATA: func = dirscan; break; default: errexit("unknown inodesc type %d in iblock()", idesc->id_type); /* NOTREACHED */ } if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { return ((idesc->id_type == ACL) ? STOP : SKIP); } bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize); if (bp->b_errs != 0) { brelse(bp); return (SKIP); } ilevel--; /* * Trivia note: the BSD fsck has the number of bytes remaining * as the third argument to iblock(), so the equivalent of * fsbperindirb starts at fs_bsize instead of one. We're * working in units of filesystem blocks here, not bytes or * fragments. */ for (fsbperindirb = 1, i = 0; i < ilevel; i++) { fsbperindirb *= (u_offset_t)NINDIR(&sblock); } /* * nif indicates the next "free" pointer (as an array index) in this * indirect block, based on counting the blocks remaining in the * file after subtracting all previously processed blocks. * This figure is based on the size field of the inode. * * Note that in normal operation, nif may initially be calculated * as larger than the number of pointers in this block (as when * there are more indirect blocks following); if that is * the case, nif is limited to the max number of pointers per * indirect block. * * Also note that if an inode is inconsistent (has more blocks * allocated to it than the size field would indicate), the sweep * through any indirect blocks directly pointed at by the inode * continues. Since the block offset of any data blocks referenced * by these indirect blocks is greater than the size of the file, * the index nif may be computed as a negative value. * In this case, we reset nif to indicate that all pointers in * this retrieval block should be zeroed and the resulting * unreferenced data and/or retrieval blocks will be recovered * through garbage collection later. */ nif = (offset_t)howmany(iblks, fsbperindirb); if (nif > NINDIR(&sblock)) nif = NINDIR(&sblock); else if (nif < 0) nif = 0; /* * first pass: all "free" retrieval pointers (from [nif] thru * the end of the indirect block) should be zero. (This * assertion does not hold for directories, which may be * truncated without releasing their allocated space) */ if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check || idesc->id_func == pass3bcheck)) { for (i = nif; i < NINDIR(&sblock); i++) { if (bp->b_un.b_indir[i] == 0) continue; (void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", (ulong_t)idesc->id_number); if (preen) { pfatal(buf); } else if (dofix(idesc, buf)) { freeblk(idesc->id_number, bp->b_un.b_indir[i], sblock.fs_frag); bp->b_un.b_indir[i] = 0; dirty(bp); } } flush(fswritefd, bp); } /* * second pass: all retrieval pointers referring to blocks within * a valid range [0..filesize] (both indirect and data blocks) * are examined in the same manner as ckinode() checks the * direct blocks in the inode. Sweep through from * the first pointer in this retrieval block to [nif-1]. */ last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize); for (i = 0; i < nif; i++) { if (ilevel == 0) idesc->id_lbn++; if (bp->b_un.b_indir[i] != 0) { idesc->id_blkno = bp->b_un.b_indir[i]; if (ilevel > 0) { n = iblock(idesc, ilevel, iblks, action); /* * Each iteration decreases "remaining block * count" by the number of blocks accessible * by a pointer at this indirect block level. */ iblks -= fsbperindirb; } else { /* * If we're truncating, func will discard * the data block for us. */ n = (*func)(idesc); } if ((action == CKI_TRUNCATE) && (idesc->id_truncto >= 0) && (idesc->id_lbn >= idesc->id_truncto)) { freeblk(idesc->id_number, bp->b_un.b_indir[i], sblock.fs_frag); } /* * Note that truncation never gets STOP back * under normal circumstances. Abnormal would * be a bad acl short-circuit in iblock() or * an out-of-range failure in pass4check(). * We still want to keep going when truncating * under those circumstances, since the whole * point of truncating is to get rid of all * that. */ if ((n & STOP) && (action != CKI_TRUNCATE)) { brelse(bp); return (n); } } else { if ((idesc->id_lbn < last_lbn) && (idesc->id_firsthole < 0)) { idesc->id_firsthole = idesc->id_lbn; } if (idesc->id_type == DATA) { /* * No point in continuing in the indirect * blocks of a directory, since they'll just * get freed anyway. */ brelse(bp); return ((n & ~KEEPON) | STOP); } } } brelse(bp); return (KEEPON); } /* * Check that a block is a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(daddr32_t blk, int cnt) { int c; if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) || ((cnt - 1) > (maxfsblock - blk))) { if (debug) (void) printf( "Bad fragment range: should be 1 <= %d..%d < %d\n", blk, blk + cnt, maxfsblock); return (1); } if ((cnt > sblock.fs_frag) || ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) { if (debug) (void) printf("Bad fragment size: size %d\n", cnt); return (1); } c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) { if (debug) (void) printf( "Bad fragment position: %d..%d spans start of cg metadata\n", blk, blk + cnt); return (1); } } else { if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) { if (debug) (void) printf( "Bad frag pos: %d..%d crosses end of cg\n", blk, blk + cnt); return (1); } } return (0); } /* * General purpose interface for reading inodes. */ /* * Note that any call to ginode() can potentially invalidate any * dinode pointers previously acquired from it. To avoid pain, * make sure to always call inodirty() immediately after modifying * an inode, if there's any chance of ginode() being called after * that. Also, always call ginode() right before you need to access * an inode, so that there won't be any surprises from functions * called between the previous ginode() invocation and the dinode * use. * * Despite all that, we aren't doing the amount of i/o that's implied, * as we use the buffer cache that getdatablk() and friends maintain. */ static fsck_ino_t startinum = -1; struct dinode * ginode(fsck_ino_t inum) { daddr32_t iblk; struct dinode *dp; if (inum < UFSROOTINO || inum > maxino) { errexit("bad inode number %d to ginode\n", inum); } if (startinum == -1 || pbp == NULL || inum < startinum || inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) { iblk = itod(&sblock, inum); if (pbp != NULL) { brelse(pbp); } /* * We don't check for errors here, because we can't * tell our caller about it, and the zeros that will * be in the buffer are just as good as anything we * could fake. */ pbp = getdatablk(iblk, (size_t)sblock.fs_bsize); startinum = (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock)); } dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)]; if (dp->di_suid != UID_LONG) dp->di_uid = dp->di_suid; if (dp->di_sgid != GID_LONG) dp->di_gid = dp->di_sgid; return (dp); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. It bypasses the buffer * system used by ginode(), etc in favour of reading the bulk of a * cg's inodes at one time. */ static fsck_ino_t nextino, lastinum; static int64_t readcnt, readpercg, fullcnt, inobufsize; static int64_t partialcnt, partialsize; static size_t lastsize; static struct dinode *inodebuf; static diskaddr_t currentdblk; static struct dinode *currentinode; struct dinode * getnextinode(fsck_ino_t inum) { size_t size; diskaddr_t dblk; static struct dinode *dp; if (inum != nextino++ || inum > maxino) errexit("bad inode number %d to nextinode\n", inum); /* * Will always go into the if() the first time we're called, * so dp will always be valid. */ if (inum >= lastinum) { readcnt++; dblk = fsbtodb(&sblock, itod(&sblock, lastinum)); currentdblk = dblk; if (readcnt % readpercg == 0) { if (partialsize > SIZE_MAX) errexit( "Internal error: partialsize overflow"); size = (size_t)partialsize; lastinum += partialcnt; } else { if (inobufsize > SIZE_MAX) errexit("Internal error: inobufsize overflow"); size = (size_t)inobufsize; lastinum += fullcnt; } /* * If fsck_bread() returns an error, it will already have * zeroed out the buffer, so we do not need to do so here. */ (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size); lastsize = size; dp = inodebuf; } currentinode = dp; return (dp++); } /* * Reread the current getnext() buffer. This allows for changing inodes * other than the current one via ginode()/inodirty()/inoflush(). * * Just reuses all the interesting variables that getnextinode() set up * last time it was called. This shouldn't get called often, so we don't * try to figure out if the caller's actually touched an inode in the * range we have cached. There could have been an arbitrary number of * them, after all. */ struct dinode * getnextrefresh(void) { if (inodebuf == NULL) { return (NULL); } inoflush(); (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize); return (currentinode); } void resetinodebuf(void) { startinum = 0; nextino = 0; lastinum = 0; readcnt = 0; inobufsize = blkroundup(&sblock, INOBUFSIZE); fullcnt = inobufsize / sizeof (struct dinode); readpercg = sblock.fs_ipg / fullcnt; partialcnt = sblock.fs_ipg % fullcnt; partialsize = partialcnt * sizeof (struct dinode); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } if (inodebuf == NULL && (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL) errexit("Cannot allocate space for inode buffer\n"); while (nextino < UFSROOTINO) (void) getnextinode(nextino); } void freeinodebuf(void) { if (inodebuf != NULL) { free((void *)inodebuf); } inodebuf = NULL; } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ void cacheino(struct dinode *dp, fsck_ino_t inum) { struct inoinfo *inp; struct inoinfo **inpp; uint_t blks; blks = NDADDR + NIADDR; inp = (struct inoinfo *) malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t)); if (inp == NULL) errexit("Cannot increase directory list\n"); init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */ inpp = &inphead[inum % numdirs]; inp->i_nextlist = *inpp; *inpp = inp; inp->i_number = inum; if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)realloc((void *)inpsort, (unsigned)listmax * sizeof (struct inoinfo *)); if (inpsort == NULL) errexit("cannot increase directory list"); } inpsort[inplast++] = inp; } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(fsck_ino_t inum) { struct inoinfo *inp; inp = search_cache(inphead[inum % numdirs], inum); return (inp); } /* * Determine whether inode is in cache. */ int inocached(fsck_ino_t inum) { return (search_cache(inphead[inum % numdirs], inum) != NULL); } /* * Clean up all the inode cache structure. */ void inocleanup(void) { struct inoinfo **inpp; if (inphead == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) { free((void *)(*inpp)); } free((void *)inphead); free((void *)inpsort); inphead = inpsort = NULL; } /* * Routines to maintain information about acl inodes. * This is built during the first pass and used during the * second and third passes. * * Enter acl inodes into the cache. */ void cacheacl(struct dinode *dp, fsck_ino_t inum) { struct inoinfo *aclp; struct inoinfo **aclpp; uint_t blks; blks = NDADDR + NIADDR; aclp = (struct inoinfo *) malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t)); if (aclp == NULL) return; aclpp = &aclphead[inum % numacls]; aclp->i_nextlist = *aclpp; *aclpp = aclp; aclp->i_number = inum; aclp->i_isize = (offset_t)dp->di_size; aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t)); (void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize); if (aclplast == aclmax) { aclmax += 100; aclpsort = (struct inoinfo **)realloc((char *)aclpsort, (unsigned)aclmax * sizeof (struct inoinfo *)); if (aclpsort == NULL) errexit("cannot increase acl list"); } aclpsort[aclplast++] = aclp; } /* * Generic cache search function. * ROOT is the first entry in a hash chain (the caller is expected * to have done the initial bucket lookup). KEY is what's being * searched for. * * Returns a pointer to the entry if it is found, NULL otherwise. */ static struct inoinfo * search_cache(struct inoinfo *element, fsck_ino_t key) { while (element != NULL) { if (element->i_number == key) break; element = element->i_nextlist; } return (element); } void inodirty(void) { dirty(pbp); } static void inoflush(void) { if (pbp != NULL) flush(fswritefd, pbp); } /* * Interactive wrapper for freeino(), for those times when we're * not sure if we should throw something away. */ void clri(struct inodesc *idesc, char *type, int verbose, int corrupting) { int need_parent; struct dinode *dp; if (statemap[idesc->id_number] == USTATE) return; dp = ginode(idesc->id_number); if (verbose == CLRI_VERBOSE) { pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode)); pinode(idesc->id_number); } if (preen || (reply("CLEAR") == 1)) { need_parent = (corrupting == CLRI_NOP_OK) ? TI_NOPARENT : TI_PARENT; freeino(idesc->id_number, need_parent); if (preen) (void) printf(" (CLEARED)\n"); remove_orphan_dir(idesc->id_number); } else if (corrupting == CLRI_NOP_CORRUPT) { iscorrupt = 1; } (void) printf("\n"); } /* * Find the directory entry for the inode noted in id_parent (which is * not necessarily the parent of anything, we're just using a convenient * field. */ int findname(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent) return (KEEPON); (void) memmove(idesc->id_name, dirp->d_name, MIN(dirp->d_namlen, MAXNAMLEN) + 1); return (STOP|FOUND); } /* * Find the inode number associated with the given name. */ int findino(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } int cleardirentry(fsck_ino_t parentdir, fsck_ino_t target) { struct inodesc idesc; struct dinode *dp; dp = ginode(parentdir); init_inodesc(&idesc); idesc.id_func = clearanentry; idesc.id_parent = target; idesc.id_type = DATA; idesc.id_fix = NOFIX; return (ckinode(dp, &idesc, CKI_TRAVERSE)); } static int clearanentry(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } dirp->d_ino = 0; return (STOP|FOUND|ALTERED); } void pinode(fsck_ino_t ino) { struct dinode *dp; (void) printf(" I=%lu ", (ulong_t)ino); if (ino < UFSROOTINO || ino > maxino) return; dp = ginode(ino); pdinode(dp); } static void pdinode(struct dinode *dp) { char *p; struct passwd *pw; time_t t; (void) printf(" OWNER="); if ((pw = getpwuid((int)dp->di_uid)) != 0) (void) printf("%s ", pw->pw_name); else (void) printf("%lu ", (ulong_t)dp->di_uid); (void) printf("MODE=%o\n", dp->di_mode); if (preen) (void) printf("%s: ", devname); (void) printf("SIZE=%lld ", (longlong_t)dp->di_size); /* ctime() ignores LOCALE, so this is safe */ t = (time_t)dp->di_mtime; p = ctime(&t); (void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20); } void blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn) { pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn); (void) printf("\n"); switch (statemap[ino] & ~INDELAYD) { case FSTATE: case FZLINK: statemap[ino] = FCLEAR; return; case DFOUND: case DSTATE: case DZLINK: statemap[ino] = DCLEAR; add_orphan_dir(ino); return; case SSTATE: statemap[ino] = SCLEAR; return; case FCLEAR: case DCLEAR: case SCLEAR: return; default: errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]); /* NOTREACHED */ } } /* * allocate an unused inode */ fsck_ino_t allocino(fsck_ino_t request, int type) { fsck_ino_t ino; struct dinode *dp; struct cg *cgp = &cgrp; int cg; time_t t; caddr_t err; if (debug && (request != 0) && (request != UFSROOTINO)) errexit("assertion failed: allocino() asked for " "inode %d instead of 0 or %d", (int)request, (int)UFSROOTINO); /* * We know that we're only going to get requests for UFSROOTINO * or 0. If UFSROOTINO is wanted, then it better be available * because our caller is trying to recreate the root directory. * If we're asked for 0, then which one we return doesn't matter. * We know that inodes 0 and 1 are never valid to return, so we * the start at the lowest-legal inode number. * * If we got a request for UFSROOTINO, then request != 0, and * this pair of conditionals is the only place that treats * UFSROOTINO specially. */ if (request == 0) request = UFSROOTINO; else if (statemap[request] != USTATE) return (0); /* * Doesn't do wrapping, since we know we started at * the smallest inode. */ for (ino = request; ino < maxino; ino++) if (statemap[ino] == USTATE) break; if (ino == maxino) return (0); /* * In pass5, we'll calculate the bitmaps and counts all again from * scratch and do a comparison, but for that to work the cg has * to know what in-memory changes we've made to it. If we have * trouble reading the cg, cg_sanity() should kick it out so * we can skip explicit i/o error checking here. */ cg = itog(&sblock, ino); (void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize); err = cg_sanity(cgp, cg); if (err != NULL) { pfatal("CG %d: %s\n", cg, err); free((void *)err); if (reply("REPAIR") == 0) errexit("Program terminated."); fix_cg(cgp, cg); } setbit(cg_inosused(cgp), ino % sblock.fs_ipg); cgp->cg_cs.cs_nifree--; cgdirty(); if (lastino < ino) lastino = ino; /* * Don't currently support IFATTRDIR or any of the other * types, as they aren't needed. */ switch (type & IFMT) { case IFDIR: statemap[ino] = DSTATE; cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: statemap[ino] = FSTATE; break; default: /* * Pretend nothing ever happened. This clears the * dirty flag, among other things. */ initbarea(&cgblk); if (debug) (void) printf("allocino: unknown type 0%o\n", type & IFMT); return (0); } /* * We're allocating what should be a completely-unused inode, * so make sure we don't inherit anything from any previous * incarnations. */ dp = ginode(ino); (void) memset((void *)dp, 0, sizeof (struct dinode)); dp->di_db[0] = allocblk(1); if (dp->di_db[0] == 0) { statemap[ino] = USTATE; return (0); } dp->di_mode = (mode_t)type; (void) time(&t); dp->di_atime = (time32_t)t; dp->di_ctime = dp->di_atime; dp->di_mtime = dp->di_ctime; dp->di_size = (u_offset_t)sblock.fs_fsize; dp->di_blocks = btodb(sblock.fs_fsize); n_files++; inodirty(); return (ino); } /* * Release some or all of the blocks of an inode. * Only truncates down. Assumes new_length is appropriately aligned * to a block boundary (or a directory block boundary, if it's a * directory). * * If this is a directory, discard all of its contents first, so * we don't create a bunch of orphans that would need another fsck * run to clean up. * * Even if truncating to zero length, the inode remains allocated. */ void truncino(fsck_ino_t ino, offset_t new_length, int update) { struct inodesc idesc; struct inoinfo *iip; struct dinode *dp; fsck_ino_t parent; mode_t mode; caddr_t message; int isdir; int ilevel, dblk; dp = ginode(ino); mode = (dp->di_mode & IFMT); isdir = (mode == IFDIR) || (mode == IFATTRDIR); if (isdir) { /* * Go with the parent we found by chasing references, * if we've gotten that far. Otherwise, use what the * directory itself claims. If there's no ``..'' entry * in it, give up trying to get the link counts right. */ if (update == TI_NOPARENT) { parent = -1; } else { iip = getinoinfo(ino); if (iip != NULL) { parent = iip->i_parent; } else { parent = lookup_dotdot_ino(ino); if (parent != 0) { /* * Make sure that the claimed * parent actually has a * reference to us. */ dp = ginode(parent); idesc.id_name = lfname; idesc.id_type = DATA; idesc.id_func = findino; idesc.id_number = ino; idesc.id_fix = DONTKNOW; if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) parent = 0; } } } mark_delayed_inodes(ino, numfrags(&sblock, new_length)); if (parent > 0) { dp = ginode(parent); LINK_RANGE(message, dp->di_nlink, -1); if (message != NULL) { LINK_CLEAR(message, parent, dp->di_mode, &idesc); if (statemap[parent] == USTATE) goto no_parent_update; } TRACK_LNCNTP(parent, lncntp[parent]--); } else if ((mode == IFDIR) && (parent == 0)) { /* * Currently don't have a good way to * handle this, so throw up our hands. * However, we know that we can still * do some good if we continue, so * don't actually exit yet. * * We don't do it for attrdirs, * because there aren't link counts * between them and their parents. */ pwarn("Could not determine former parent of " "inode %d, link counts are possibly\n" "incorrect. Please rerun fsck(1M) to " "correct this.\n", ino); iscorrupt = 1; } /* * ...else if it's a directory with parent == -1, then * we've not gotten far enough to know connectivity, * and it'll get handled automatically later. */ } no_parent_update: init_inodesc(&idesc); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = ino; idesc.id_fix = DONTKNOW; idesc.id_truncto = howmany(new_length, sblock.fs_bsize); dp = ginode(ino); if (ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED) inodirty(); /* * This has to be done after ckinode(), so that all of * the fragments get visited. Note that we assume we're * always truncating to a block boundary, rather than a * fragment boundary. */ dp = ginode(ino); dp->di_size = new_length; /* * Clear now-obsolete pointers. */ for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) { dp->di_db[dblk] = 0; } ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL); for (ilevel++; ilevel < NIADDR; ilevel++) { dp->di_ib[ilevel] = 0; } inodirty(); } /* * Release an inode's resources, then release the inode itself. */ void freeino(fsck_ino_t ino, int update_parent) { int cg; struct dinode *dp; struct cg *cgp; n_files--; dp = ginode(ino); if (dp->di_size > (u_offset_t)MAXOFF_T) { largefile_count--; } truncino(ino, 0, update_parent); dp = ginode(ino); if ((dp->di_mode & IFMT) == IFATTRDIR) { clearshadow(ino, &attrclientinfo); dp = ginode(ino); } clearinode(dp); inodirty(); statemap[ino] = USTATE; /* * Keep the disk in sync with us so that pass5 doesn't get * upset about spurious inconsistencies. */ cg = itog(&sblock, ino); (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg), (size_t)sblock.fs_cgsize); cgp = cgblk.b_un.b_cg; clrbit(cg_inosused(cgp), ino % sblock.fs_ipg); cgp->cg_cs.cs_nifree += 1; cgdirty(); sblock.fs_cstotal.cs_nifree += 1; sbdirty(); } void init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum) { inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0); inp->i_dotdot = (fsck_ino_t)0; inp->i_isize = (offset_t)dp->di_size; inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t); inp->i_extattr = dp->di_oeftflag; (void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0], inp->i_blkssize); } /* * Return the inode number in the ".." entry of the provided * directory inode. */ static int lookup_dotdot_ino(fsck_ino_t ino) { struct inodesc idesc; init_inodesc(&idesc); idesc.id_type = DATA; idesc.id_func = findino; idesc.id_name = ".."; idesc.id_number = ino; idesc.id_fix = NOFIX; if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) { return (idesc.id_parent); } return (0); } /* * Convenience wrapper around ckinode(findino()). */ int lookup_named_ino(fsck_ino_t dir, caddr_t name) { struct inodesc idesc; init_inodesc(&idesc); idesc.id_type = DATA; idesc.id_func = findino; idesc.id_name = name; idesc.id_number = dir; idesc.id_fix = NOFIX; if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) { return (idesc.id_parent); } return (0); } /* * Marks inodes that are being orphaned and might need to be reconnected * by pass4(). The inode we're traversing is the directory whose * contents will be reconnected later. id_parent is the lfn at which * to start looking at said contents. */ static int mark_a_delayed_inode(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (idesc->id_lbn < idesc->id_parent) { return (KEEPON); } if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0 && strcmp(dirp->d_name, "..") != 0) { statemap[dirp->d_ino] &= ~INFOUND; statemap[dirp->d_ino] |= INDELAYD; } return (KEEPON); } static void mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn) { struct dinode *dp; struct inodesc idelayed; init_inodesc(&idelayed); idelayed.id_number = ino; idelayed.id_type = DATA; idelayed.id_fix = NOFIX; idelayed.id_func = mark_a_delayed_inode; idelayed.id_parent = first_lfn; idelayed.id_entryno = 2; dp = ginode(ino); (void) ckinode(dp, &idelayed, CKI_TRAVERSE); } /* * Clear the i_oeftflag/extended attribute pointer from INO. */ void clearattrref(fsck_ino_t ino) { struct dinode *dp; dp = ginode(ino); if (debug) { if (dp->di_oeftflag == 0) (void) printf("clearattref: no attr to clear on %d\n", ino); } dp->di_oeftflag = 0; inodirty(); }