xref: /illumos-gate/usr/src/cmd/fs.d/ufs/fsck/inode.c (revision c4140c56306ad2a74081dd949618b4f3162dd06b)
1 /*
2  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
6 /*	  All Rights Reserved	*/
7 
8 /*
9  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms are permitted
13  * provided that: (1) source distributions retain this entire copyright
14  * notice and comment, and (2) distributions including binaries display
15  * the following acknowledgement:  ``This product includes software
16  * developed by the University of California, Berkeley and its contributors''
17  * in the documentation or other materials provided with the distribution
18  * and in all advertising materials mentioning features or use of this
19  * software. Neither the name of the University nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25  */
26 
27 
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <time.h>
33 #include <limits.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/sysmacros.h>
37 #include <sys/mntent.h>
38 #include <sys/vnode.h>
39 #include <sys/fs/ufs_inode.h>
40 #include <sys/fs/ufs_fs.h>
41 #define	_KERNEL
42 #include <sys/fs/ufs_fsdir.h>
43 #undef _KERNEL
44 #include <pwd.h>
45 #include "fsck.h"
46 
47 uint_t largefile_count = 0;
48 fsck_ino_t lastino;
49 struct bufarea cgblk;
50 struct inoinfo **aclphead, **aclpsort;
51 struct dinode zino;
52 
53 static int get_indir_offsets(int, daddr_t, int *, int *);
54 static int clearanentry(struct inodesc *);
55 static void pdinode(struct dinode *);
56 static void inoflush(void);
57 static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
58 static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
59 static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
60 static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
61 static int lookup_dotdot_ino(fsck_ino_t);
62 
63 /*
64  * ckinode() essentially traverses the blocklist of the provided
65  * inode.  For each block either the caller-supplied callback (id_func
66  * in the provided struct inodesc) or dirscan() is invoked.  Which is
67  * chosen is controlled by what type of traversal was requested
68  * (id_type) - if it was for an ADDR or ACL, use the callback,
69  * otherwise it is assumed to be DATA (i.e., a directory) whose
70  * contents need to be scanned.
71  *
72  * Note that a directory inode can get passed in with a type of ADDR;
73  * the type field is orthogonal to the IFMT value.  This is so that
74  * the file aspects (no duplicate blocks, etc) of a directory can be
75  * verified just like is done for any other file, or the actual
76  * contents can be scanned so that connectivity and such can be
77  * investigated.
78  *
79  * The traversal is controlled by flags in the return value of
80  * dirscan() or the callback.  Five flags are defined, STOP, SKIP,
81  * KEEPON, ALTERED, and FOUND.  Their semantics are:
82  *
83  *     STOP -    no further processing of this inode is desired/possible/
84  *               feasible/etc.  This can mean that whatever the scan
85  *               was searching for was found, or a serious
86  *               inconsistency was encountered, or anything else
87  *               appropriate.
88  *
89  *     SKIP -    something that made it impossible to continue was
90  *               encountered, and the caller should go on to the next
91  *               inode.  This is more for i/o failures than for
92  *               logical inconsistencies.  Nothing actually looks for
93  *               this.
94  *
95  *     KEEPON -  no more blocks of this inode need to be scanned, but
96  *               nothing's wrong, so keep on going with the next
97  *               inode.  It is similar to STOP, except that
98  *               ckinode()'s caller will typically advance to the next
99  *               inode for KEEPON, whereas it ceases scanning through
100  *               the inodes completely for STOP.
101  *
102  *     ALTERED - a change was made to the inode.  If the caller sees
103  *               this set, it should make sure to flush out the
104  *               changes.  Note that any data blocks read in by the
105  *               function need to be marked dirty by it directly;
106  *               flushing of those will happen automatically later.
107  *
108  *     FOUND -   whatever was being searched for was located.
109  *               Typically combined with STOP to avoid wasting time
110  *               doing additional looking.
111  *
112  * During a traversal, some state needs to be carried around.  At the
113  * least, the callback functions need to know what inode they're
114  * working on, which logical block, and whether or not fixing problems
115  * when they're encountered is desired.  Rather than try to guess what
116  * else might be needed (and thus end up passing way more arguments
117  * than is reasonable), all the possibilities have been bundled in
118  * struct inodesc.  About half of the fields are specific to directory
119  * traversals, and the rest are pretty much generic to any traversal.
120  *
121  * The general fields are:
122  *
123  *     id_fix        What to do when an error is found.  Generally, this
124  *                   is set to DONTKNOW before a traversal.  If a
125  *                   problem is encountered, it is changed to either FIX
126  *                   or NOFIX by the dofix() query function.  If id_fix
127  *                   has already been set to FIX when dofix() is called, then
128  *                   it includes the ALTERED flag (see above) in its return
129  *                   value; the net effect is that the inode's buffer
130  *                   will get marked dirty and written to disk at some
131  *                   point.  If id_fix is DONTKNOW, then dofix() will
132  *                   query the user.  If it is NOFIX, then dofix()
133  *                   essentially does nothing.  A few routines set NOFIX
134  *                   as the initial value, as they are performing a best-
135  *                   effort informational task, rather than an actual
136  *                   repair operation.
137  *
138  *     id_func       This is the function that will be called for every
139  *                   logical block in the file (assuming id_type is not
140  *                   DATA).  The logical block may represent a hole, so
141  *                   the callback needs to be prepared to handle that
142  *                   case.  Its return value is a combination of the flags
143  *                   described above (SKIP, ALTERED, etc).
144  *
145  *     id_number     The inode number whose block list or data is being
146  *                   scanned.
147  *
148  *     id_parent     When id_type is DATA, this is the inode number for
149  *                   the parent of id_number.  Otherwise, it is
150  *                   available for use as an extra parameter or return
151  *                   value between the callback and ckinode()'s caller.
152  *                   Which, if either, of those is left completely up to
153  *                   the two routines involved, so nothing can generally
154  *                   be assumed about the id_parent value for non-DATA
155  *                   traversals.
156  *
157  *     id_lbn        This is the current logical block (not fragment)
158  *                   number being visited by the traversal.
159  *
160  *     id_blkno      This is the physical block corresponding to id_lbn.
161  *
162  *     id_numfrags   This defines how large a block is being processed in
163  *                   this particular invocation of the callback.
164  *                   Usually, it will be the same as sblock.fs_frag.
165  *                   However, if a direct block is being processed and
166  *                   it is less than a full filesystem block,
167  *                   id_numfrags will indicate just how many fragments
168  *                   (starting from id_lbn) are actually part of the
169  *                   file.
170  *
171  *     id_truncto    The pass 4 callback is used in several places to
172  *                   free the blocks of a file (the `FILE HAS PROBLEM
173  *                   FOO; CLEAR?' scenario).  This has been generalized
174  *                   to allow truncating a file to a particular length
175  *                   rather than always completely discarding it.  If
176  *                   id_truncto is -1, then the entire file is released,
177  *                   otherwise it is logical block number to truncate
178  *                   to.  This generalized interface was motivated by a
179  *                   desire to be able to discard everything after a
180  *                   hole in a directory, rather than the entire
181  *                   directory.
182  *
183  *     id_type       Selects the type of traversal.  DATA for dirscan(),
184  *                   ADDR or ACL for using the provided callback.
185  *
186  * There are several more fields used just for dirscan() traversals:
187  *
188  *     id_filesize   The number of bytes in the overall directory left to
189  *                   process.
190  *
191  *     id_loc        Byte position within the directory block.  Should always
192  *                   point to the start of a directory entry.
193  *
194  *     id_entryno    Which logical directory entry is being processed (0
195  *                   is `.', 1 is `..', 2 and on are normal entries).
196  *                   This field is primarily used to enable special
197  *                   checks when looking at the first two entries.
198  *
199  *                   The exception (there's always an exception in fsck)
200  *                   is that in pass 1, it tracks how many fragments are
201  *                   being used by a particular inode.
202  *
203  *     id_firsthole  The first logical block number that was found to
204  *                   be zero.  As directories are not supposed to have
205  *                   holes, this marks where a directory should be
206  *                   truncated down to.  A value of -1 indicates that
207  *                   no holes were found.
208  *
209  *     id_dirp       A pointer to the in-memory copy of the current
210  *                   directory entry (as identified by id_loc).
211  *
212  *     id_name       This is a directory entry name to either create
213  *                   (callback is mkentry) or locate (callback is
214  *                   chgino, findino, or findname).
215  */
216 int
217 ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
218 {
219 	struct inodesc cleardesc;
220 	mode_t	mode;
221 
222 	if (idesc->id_filesize == 0)
223 		idesc->id_filesize = (offset_t)dp->di_size;
224 
225 	/*
226 	 * Our caller should be filtering out completely-free inodes
227 	 * (mode == zero), so we'll work on the assumption that what
228 	 * we're given has some basic validity.
229 	 *
230 	 * The kernel is inconsistent about MAXPATHLEN including the
231 	 * trailing \0, so allow the more-generous length for symlinks.
232 	 */
233 	mode = dp->di_mode & IFMT;
234 	if (mode == IFBLK || mode == IFCHR)
235 		return (KEEPON);
236 	if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
237 		pwarn("I=%d  Symlink longer than supported maximum\n",
238 		    idesc->id_number);
239 		init_inodesc(&cleardesc);
240 		cleardesc.id_type = ADDR;
241 		cleardesc.id_number = idesc->id_number;
242 		cleardesc.id_fix = DONTKNOW;
243 		clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
244 		return (STOP);
245 	}
246 	return (ckinode_common(dp, idesc, action));
247 }
248 
249 /*
250  * This was split out from ckinode() to allow it to be used
251  * without having to pass in kludge flags to suppress the
252  * wrong-for-deletion initialization and irrelevant checks.
253  * This feature is no longer needed, but is being kept in case
254  * the need comes back.
255  */
256 static int
257 ckinode_common(struct dinode *dp, struct inodesc *idesc,
258 	enum cki_action action)
259 {
260 	offset_t offset;
261 	struct dinode dino;
262 	daddr_t ndb;
263 	int indir_data_blks, last_indir_blk;
264 	int ret, i, frags;
265 
266 	(void) memmove(&dino, dp, sizeof (struct dinode));
267 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
268 
269 	for (i = 0; i < NDADDR; i++) {
270 		idesc->id_lbn++;
271 		offset = blkoff(&sblock, dino.di_size);
272 		if ((--ndb == 0) && (offset != 0)) {
273 			idesc->id_numfrags =
274 			    numfrags(&sblock, fragroundup(&sblock, offset));
275 		} else {
276 			idesc->id_numfrags = sblock.fs_frag;
277 		}
278 		if (dino.di_db[i] == 0) {
279 			if ((ndb > 0) && (idesc->id_firsthole < 0)) {
280 				idesc->id_firsthole = i;
281 			}
282 			continue;
283 		}
284 		idesc->id_blkno = dino.di_db[i];
285 		if (idesc->id_type == ADDR || idesc->id_type == ACL)
286 			ret = (*idesc->id_func)(idesc);
287 		else
288 			ret = dirscan(idesc);
289 
290 		/*
291 		 * Need to clear the entry, now that we're done with
292 		 * it.  We depend on freeblk() ignoring a request to
293 		 * free already-free fragments to handle the problem of
294 		 * a partial block.
295 		 */
296 		if ((action == CKI_TRUNCATE) &&
297 		    (idesc->id_truncto >= 0) &&
298 		    (idesc->id_lbn >= idesc->id_truncto)) {
299 			dp = ginode(idesc->id_number);
300 			/*
301 			 * The (int) cast is safe, in that if di_size won't
302 			 * fit, it'll be a multiple of any legal fs_frag,
303 			 * thus giving a zero result.  That value, in turn
304 			 * means we're doing an entire block.
305 			 */
306 			frags = howmany((int)dp->di_size, sblock.fs_fsize) %
307 			    sblock.fs_frag;
308 			if (frags == 0)
309 				frags = sblock.fs_frag;
310 			freeblk(idesc->id_number, dp->di_db[i],
311 			    frags);
312 			dp = ginode(idesc->id_number);
313 			dp->di_db[i] = 0;
314 			inodirty();
315 			ret |= ALTERED;
316 		}
317 
318 		if (ret & STOP)
319 			return (ret);
320 	}
321 
322 #ifdef lint
323 	/*
324 	 * Cure a lint complaint of ``possible use before set''.
325 	 * Apparently it can't quite figure out the switch statement.
326 	 */
327 	indir_data_blks = 0;
328 #endif
329 	/*
330 	 * indir_data_blks contains the number of data blocks in all
331 	 * the previous levels for this iteration.  E.g., for the
332 	 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
333 	 * of blocks have already been covered by the direct blocks
334 	 * (di_db[]).  At the triple indirect level (i = NIADDR - 1),
335 	 * it is all of the number of data blocks that were covered
336 	 * by the second indirect, single indirect, and direct block
337 	 * levels.
338 	 */
339 	idesc->id_numfrags = sblock.fs_frag;
340 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
341 	for (i = 0; i < NIADDR; i++) {
342 		(void) get_indir_offsets(i, ndb, &indir_data_blks,
343 		    &last_indir_blk);
344 		if (dino.di_ib[i] != 0) {
345 			/*
346 			 * We'll only clear di_ib[i] if the first entry (and
347 			 * therefore all of them) is to be cleared, since we
348 			 * only go through this code on the first entry of
349 			 * each level of indirection.  The +1 is to account
350 			 * for the fact that we don't modify id_lbn until
351 			 * we actually start processing on a data block.
352 			 */
353 			idesc->id_blkno = dino.di_ib[i];
354 			ret = iblock(idesc, i + 1,
355 			    (u_offset_t)howmany(dino.di_size,
356 			    (u_offset_t)sblock.fs_bsize) - indir_data_blks,
357 			    action);
358 			if ((action == CKI_TRUNCATE) &&
359 			    (idesc->id_truncto <= indir_data_blks) &&
360 			    ((idesc->id_lbn + 1) >= indir_data_blks) &&
361 			    ((idesc->id_lbn + 1) <= last_indir_blk)) {
362 				dp = ginode(idesc->id_number);
363 				if (dp->di_ib[i] != 0) {
364 					freeblk(idesc->id_number, dp->di_ib[i],
365 					    sblock.fs_frag);
366 				}
367 			}
368 			if (ret & STOP)
369 				return (ret);
370 		} else {
371 			/*
372 			 * Need to know which of the file's logical blocks
373 			 * reside in the missing indirect block.  However, the
374 			 * precise location is only needed for truncating
375 			 * directories, and level-of-indirection precision is
376 			 * sufficient for that.
377 			 */
378 			if ((indir_data_blks < ndb) &&
379 			    (idesc->id_firsthole < 0)) {
380 				idesc->id_firsthole = indir_data_blks;
381 			}
382 		}
383 	}
384 	return (KEEPON);
385 }
386 
387 static int
388 get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
389 	int *last_blk)
390 {
391 	int ndb_ilevel = -1;
392 	int ilevel;
393 	int dblks, lblk;
394 
395 	for (ilevel = 0; ilevel < NIADDR; ilevel++) {
396 		switch (ilevel) {
397 		case 0:	/* SINGLE */
398 			dblks = NDADDR;
399 			lblk = dblks + NINDIR(&sblock) - 1;
400 			break;
401 		case 1:	/* DOUBLE */
402 			dblks = NDADDR + NINDIR(&sblock);
403 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
404 			break;
405 		case 2:	/* TRIPLE */
406 			dblks = NDADDR + NINDIR(&sblock) +
407 			    (NINDIR(&sblock) * NINDIR(&sblock));
408 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
409 			    NINDIR(&sblock)) - 1;
410 			break;
411 		default:
412 			exitstat = EXERRFATAL;
413 			/*
414 			 * Translate from zero-based array to
415 			 * one-based human-style counting.
416 			 */
417 			errexit("panic: indirection level %d not 1, 2, or 3",
418 			    ilevel + 1);
419 			/* NOTREACHED */
420 		}
421 
422 		if (dblks < ndb && ndb <= lblk)
423 			ndb_ilevel = ilevel;
424 
425 		if (ilevel == ilevel_wanted) {
426 			if (data_blks != NULL)
427 				*data_blks = dblks;
428 			if (last_blk != NULL)
429 				*last_blk = lblk;
430 		}
431 	}
432 
433 	return (ndb_ilevel);
434 }
435 
436 static int
437 iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
438 	enum cki_action action)
439 {
440 	struct bufarea *bp;
441 	int i, n;
442 	int (*func)(struct inodesc *) = NULL;
443 	u_offset_t fsbperindirb;
444 	daddr32_t last_lbn;
445 	int nif;
446 	char buf[BUFSIZ];
447 
448 	n = KEEPON;
449 
450 	switch (idesc->id_type) {
451 	case ADDR:
452 		func = idesc->id_func;
453 		if (((n = (*func)(idesc)) & KEEPON) == 0)
454 				return (n);
455 		break;
456 	case ACL:
457 		func = idesc->id_func;
458 		break;
459 	case DATA:
460 		func = dirscan;
461 		break;
462 	default:
463 		errexit("unknown inodesc type %d in iblock()", idesc->id_type);
464 		/* NOTREACHED */
465 	}
466 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
467 		return ((idesc->id_type == ACL) ? STOP : SKIP);
468 	}
469 
470 	bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
471 	if (bp->b_errs != 0) {
472 		brelse(bp);
473 		return (SKIP);
474 	}
475 
476 	ilevel--;
477 	/*
478 	 * Trivia note: the BSD fsck has the number of bytes remaining
479 	 * as the third argument to iblock(), so the equivalent of
480 	 * fsbperindirb starts at fs_bsize instead of one.  We're
481 	 * working in units of filesystem blocks here, not bytes or
482 	 * fragments.
483 	 */
484 	for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
485 		fsbperindirb *= (u_offset_t)NINDIR(&sblock);
486 	}
487 	/*
488 	 * nif indicates the next "free" pointer (as an array index) in this
489 	 * indirect block, based on counting the blocks remaining in the
490 	 * file after subtracting all previously processed blocks.
491 	 * This figure is based on the size field of the inode.
492 	 *
493 	 * Note that in normal operation, nif may initially be calculated
494 	 * as larger than the number of pointers in this block (as when
495 	 * there are more indirect blocks following); if that is
496 	 * the case, nif is limited to the max number of pointers per
497 	 * indirect block.
498 	 *
499 	 * Also note that if an inode is inconsistent (has more blocks
500 	 * allocated to it than the size field would indicate), the sweep
501 	 * through any indirect blocks directly pointed at by the inode
502 	 * continues. Since the block offset of any data blocks referenced
503 	 * by these indirect blocks is greater than the size of the file,
504 	 * the index nif may be computed as a negative value.
505 	 * In this case, we reset nif to indicate that all pointers in
506 	 * this retrieval block should be zeroed and the resulting
507 	 * unreferenced data and/or retrieval blocks will be recovered
508 	 * through garbage collection later.
509 	 */
510 	nif = (offset_t)howmany(iblks, fsbperindirb);
511 	if (nif > NINDIR(&sblock))
512 		nif = NINDIR(&sblock);
513 	else if (nif < 0)
514 		nif = 0;
515 	/*
516 	 * first pass: all "free" retrieval pointers (from [nif] thru
517 	 *	the end of the indirect block) should be zero. (This
518 	 *	assertion does not hold for directories, which may be
519 	 *	truncated without releasing their allocated space)
520 	 */
521 	if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
522 	    idesc->id_func == pass3bcheck)) {
523 		for (i = nif; i < NINDIR(&sblock); i++) {
524 			if (bp->b_un.b_indir[i] == 0)
525 				continue;
526 			(void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
527 			    (ulong_t)idesc->id_number);
528 			if (preen) {
529 				pfatal(buf);
530 			} else if (dofix(idesc, buf)) {
531 				freeblk(idesc->id_number,
532 				    bp->b_un.b_indir[i],
533 				    sblock.fs_frag);
534 				bp->b_un.b_indir[i] = 0;
535 				dirty(bp);
536 			}
537 		}
538 		flush(fswritefd, bp);
539 	}
540 	/*
541 	 * second pass: all retrieval pointers referring to blocks within
542 	 *	a valid range [0..filesize] (both indirect and data blocks)
543 	 *	are examined in the same manner as ckinode() checks the
544 	 *	direct blocks in the inode.  Sweep through from
545 	 *	the first pointer in this retrieval block to [nif-1].
546 	 */
547 	last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
548 	for (i = 0; i < nif; i++) {
549 		if (ilevel == 0)
550 			idesc->id_lbn++;
551 		if (bp->b_un.b_indir[i] != 0) {
552 			idesc->id_blkno = bp->b_un.b_indir[i];
553 			if (ilevel > 0) {
554 				n = iblock(idesc, ilevel, iblks, action);
555 				/*
556 				 * Each iteration decreases "remaining block
557 				 * count" by the number of blocks accessible
558 				 * by a pointer at this indirect block level.
559 				 */
560 				iblks -= fsbperindirb;
561 			} else {
562 				/*
563 				 * If we're truncating, func will discard
564 				 * the data block for us.
565 				 */
566 				n = (*func)(idesc);
567 			}
568 
569 			if ((action == CKI_TRUNCATE) &&
570 			    (idesc->id_truncto >= 0) &&
571 			    (idesc->id_lbn >= idesc->id_truncto)) {
572 				freeblk(idesc->id_number,  bp->b_un.b_indir[i],
573 				    sblock.fs_frag);
574 			}
575 
576 			/*
577 			 * Note that truncation never gets STOP back
578 			 * under normal circumstances.  Abnormal would
579 			 * be a bad acl short-circuit in iblock() or
580 			 * an out-of-range failure in pass4check().
581 			 * We still want to keep going when truncating
582 			 * under those circumstances, since the whole
583 			 * point of truncating is to get rid of all
584 			 * that.
585 			 */
586 			if ((n & STOP) && (action != CKI_TRUNCATE)) {
587 				brelse(bp);
588 				return (n);
589 			}
590 		} else {
591 			if ((idesc->id_lbn < last_lbn) &&
592 			    (idesc->id_firsthole < 0)) {
593 				idesc->id_firsthole = idesc->id_lbn;
594 			}
595 			if (idesc->id_type == DATA) {
596 				/*
597 				 * No point in continuing in the indirect
598 				 * blocks of a directory, since they'll just
599 				 * get freed anyway.
600 				 */
601 				brelse(bp);
602 				return ((n & ~KEEPON) | STOP);
603 			}
604 		}
605 	}
606 
607 	brelse(bp);
608 	return (KEEPON);
609 }
610 
611 /*
612  * Check that a block is a legal block number.
613  * Return 0 if in range, 1 if out of range.
614  */
615 int
616 chkrange(daddr32_t blk, int cnt)
617 {
618 	int c;
619 
620 	if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
621 	    ((cnt - 1) > (maxfsblock - blk))) {
622 		if (debug)
623 			(void) printf(
624 			    "Bad fragment range: should be 1 <= %d..%d < %d\n",
625 			    blk, blk + cnt, maxfsblock);
626 		return (1);
627 	}
628 	if ((cnt > sblock.fs_frag) ||
629 	    ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
630 		if (debug)
631 			(void) printf("Bad fragment size: size %d\n", cnt);
632 		return (1);
633 	}
634 	c = dtog(&sblock, blk);
635 	if (blk < cgdmin(&sblock, c)) {
636 		if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
637 			if (debug)
638 				(void) printf(
639 	    "Bad fragment position: %d..%d spans start of cg metadata\n",
640 				    blk, blk + cnt);
641 			return (1);
642 		}
643 	} else {
644 		if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
645 			if (debug)
646 				(void) printf(
647 				    "Bad frag pos: %d..%d crosses end of cg\n",
648 				    blk, blk + cnt);
649 			return (1);
650 		}
651 	}
652 	return (0);
653 }
654 
655 /*
656  * General purpose interface for reading inodes.
657  */
658 
659 /*
660  * Note that any call to ginode() can potentially invalidate any
661  * dinode pointers previously acquired from it.  To avoid pain,
662  * make sure to always call inodirty() immediately after modifying
663  * an inode, if there's any chance of ginode() being called after
664  * that.  Also, always call ginode() right before you need to access
665  * an inode, so that there won't be any surprises from functions
666  * called between the previous ginode() invocation and the dinode
667  * use.
668  *
669  * Despite all that, we aren't doing the amount of i/o that's implied,
670  * as we use the buffer cache that getdatablk() and friends maintain.
671  */
672 static fsck_ino_t startinum = -1;
673 
674 struct dinode *
675 ginode(fsck_ino_t inum)
676 {
677 	daddr32_t iblk;
678 	struct dinode *dp;
679 
680 	if (inum < UFSROOTINO || inum > maxino) {
681 		errexit("bad inode number %d to ginode\n", inum);
682 	}
683 	if (startinum == -1 ||
684 	    pbp == NULL ||
685 	    inum < startinum ||
686 	    inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
687 		iblk = itod(&sblock, inum);
688 		if (pbp != NULL) {
689 			brelse(pbp);
690 		}
691 		/*
692 		 * We don't check for errors here, because we can't
693 		 * tell our caller about it, and the zeros that will
694 		 * be in the buffer are just as good as anything we
695 		 * could fake.
696 		 */
697 		pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
698 		startinum =
699 		    (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
700 	}
701 	dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
702 	if (dp->di_suid != UID_LONG)
703 		dp->di_uid = dp->di_suid;
704 	if (dp->di_sgid != GID_LONG)
705 		dp->di_gid = dp->di_sgid;
706 	return (dp);
707 }
708 
709 /*
710  * Special purpose version of ginode used to optimize first pass
711  * over all the inodes in numerical order.  It bypasses the buffer
712  * system used by ginode(), etc in favour of reading the bulk of a
713  * cg's inodes at one time.
714  */
715 static fsck_ino_t nextino, lastinum;
716 static int64_t readcnt, readpercg, fullcnt, inobufsize;
717 static int64_t partialcnt, partialsize;
718 static size_t lastsize;
719 static struct dinode *inodebuf;
720 static diskaddr_t currentdblk;
721 static struct dinode *currentinode;
722 
723 struct dinode *
724 getnextinode(fsck_ino_t inum)
725 {
726 	size_t size;
727 	diskaddr_t dblk;
728 	static struct dinode *dp;
729 
730 	if (inum != nextino++ || inum > maxino)
731 		errexit("bad inode number %d to nextinode\n", inum);
732 
733 	/*
734 	 * Will always go into the if() the first time we're called,
735 	 * so dp will always be valid.
736 	 */
737 	if (inum >= lastinum) {
738 		readcnt++;
739 		dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
740 		currentdblk = dblk;
741 		if (readcnt % readpercg == 0) {
742 			if (partialsize > SIZE_MAX)
743 				errexit(
744 				    "Internal error: partialsize overflow");
745 			size = (size_t)partialsize;
746 			lastinum += partialcnt;
747 		} else {
748 			if (inobufsize > SIZE_MAX)
749 				errexit("Internal error: inobufsize overflow");
750 			size = (size_t)inobufsize;
751 			lastinum += fullcnt;
752 		}
753 		/*
754 		 * If fsck_bread() returns an error, it will already have
755 		 * zeroed out the buffer, so we do not need to do so here.
756 		 */
757 		(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
758 		lastsize = size;
759 		dp = inodebuf;
760 	}
761 	currentinode = dp;
762 	return (dp++);
763 }
764 
765 /*
766  * Reread the current getnext() buffer.  This allows for changing inodes
767  * other than the current one via ginode()/inodirty()/inoflush().
768  *
769  * Just reuses all the interesting variables that getnextinode() set up
770  * last time it was called.  This shouldn't get called often, so we don't
771  * try to figure out if the caller's actually touched an inode in the
772  * range we have cached.  There could have been an arbitrary number of
773  * them, after all.
774  */
775 struct dinode *
776 getnextrefresh(void)
777 {
778 	if (inodebuf == NULL) {
779 		return (NULL);
780 	}
781 
782 	inoflush();
783 	(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
784 	return (currentinode);
785 }
786 
787 void
788 resetinodebuf(void)
789 {
790 	startinum = 0;
791 	nextino = 0;
792 	lastinum = 0;
793 	readcnt = 0;
794 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
795 	fullcnt = inobufsize / sizeof (struct dinode);
796 	readpercg = sblock.fs_ipg / fullcnt;
797 	partialcnt = sblock.fs_ipg % fullcnt;
798 	partialsize = partialcnt * sizeof (struct dinode);
799 	if (partialcnt != 0) {
800 		readpercg++;
801 	} else {
802 		partialcnt = fullcnt;
803 		partialsize = inobufsize;
804 	}
805 	if (inodebuf == NULL &&
806 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
807 		errexit("Cannot allocate space for inode buffer\n");
808 	while (nextino < UFSROOTINO)
809 		(void) getnextinode(nextino);
810 }
811 
812 void
813 freeinodebuf(void)
814 {
815 	if (inodebuf != NULL) {
816 		free((void *)inodebuf);
817 	}
818 	inodebuf = NULL;
819 }
820 
821 /*
822  * Routines to maintain information about directory inodes.
823  * This is built during the first pass and used during the
824  * second and third passes.
825  *
826  * Enter inodes into the cache.
827  */
828 void
829 cacheino(struct dinode *dp, fsck_ino_t inum)
830 {
831 	struct inoinfo *inp;
832 	struct inoinfo **inpp;
833 	uint_t blks;
834 
835 	blks = NDADDR + NIADDR;
836 	inp = (struct inoinfo *)
837 	    malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
838 	if (inp == NULL)
839 		errexit("Cannot increase directory list\n");
840 	init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
841 	inpp = &inphead[inum % numdirs];
842 	inp->i_nextlist = *inpp;
843 	*inpp = inp;
844 	inp->i_number = inum;
845 	if (inplast == listmax) {
846 		listmax += 100;
847 		inpsort = (struct inoinfo **)realloc((void *)inpsort,
848 		    (unsigned)listmax * sizeof (struct inoinfo *));
849 		if (inpsort == NULL)
850 			errexit("cannot increase directory list");
851 	}
852 	inpsort[inplast++] = inp;
853 }
854 
855 /*
856  * Look up an inode cache structure.
857  */
858 struct inoinfo *
859 getinoinfo(fsck_ino_t inum)
860 {
861 	struct inoinfo *inp;
862 
863 	inp = search_cache(inphead[inum % numdirs], inum);
864 	return (inp);
865 }
866 
867 /*
868  * Determine whether inode is in cache.
869  */
870 int
871 inocached(fsck_ino_t inum)
872 {
873 	return (search_cache(inphead[inum % numdirs], inum) != NULL);
874 }
875 
876 /*
877  * Clean up all the inode cache structure.
878  */
879 void
880 inocleanup(void)
881 {
882 	struct inoinfo **inpp;
883 
884 	if (inphead == NULL)
885 		return;
886 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
887 		free((void *)(*inpp));
888 	}
889 	free((void *)inphead);
890 	free((void *)inpsort);
891 	inphead = inpsort = NULL;
892 }
893 
894 /*
895  * Routines to maintain information about acl inodes.
896  * This is built during the first pass and used during the
897  * second and third passes.
898  *
899  * Enter acl inodes into the cache.
900  */
901 void
902 cacheacl(struct dinode *dp, fsck_ino_t inum)
903 {
904 	struct inoinfo *aclp;
905 	struct inoinfo **aclpp;
906 	uint_t blks;
907 
908 	blks = NDADDR + NIADDR;
909 	aclp = (struct inoinfo *)
910 	    malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
911 	if (aclp == NULL)
912 		return;
913 	aclpp = &aclphead[inum % numacls];
914 	aclp->i_nextlist = *aclpp;
915 	*aclpp = aclp;
916 	aclp->i_number = inum;
917 	aclp->i_isize = (offset_t)dp->di_size;
918 	aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
919 	(void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
920 	if (aclplast == aclmax) {
921 		aclmax += 100;
922 		aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
923 		    (unsigned)aclmax * sizeof (struct inoinfo *));
924 		if (aclpsort == NULL)
925 			errexit("cannot increase acl list");
926 	}
927 	aclpsort[aclplast++] = aclp;
928 }
929 
930 
931 /*
932  * Generic cache search function.
933  * ROOT is the first entry in a hash chain (the caller is expected
934  * to have done the initial bucket lookup).  KEY is what's being
935  * searched for.
936  *
937  * Returns a pointer to the entry if it is found, NULL otherwise.
938  */
939 static struct inoinfo *
940 search_cache(struct inoinfo *element, fsck_ino_t key)
941 {
942 	while (element != NULL) {
943 		if (element->i_number == key)
944 			break;
945 		element = element->i_nextlist;
946 	}
947 
948 	return (element);
949 }
950 
951 void
952 inodirty(void)
953 {
954 	dirty(pbp);
955 }
956 
957 static void
958 inoflush(void)
959 {
960 	if (pbp != NULL)
961 		flush(fswritefd, pbp);
962 }
963 
964 /*
965  * Interactive wrapper for freeino(), for those times when we're
966  * not sure if we should throw something away.
967  */
968 void
969 clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
970 {
971 	int need_parent;
972 	struct dinode *dp;
973 
974 	if (statemap[idesc->id_number] == USTATE)
975 		return;
976 
977 	dp = ginode(idesc->id_number);
978 	if (verbose == CLRI_VERBOSE) {
979 		pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
980 		pinode(idesc->id_number);
981 	}
982 	if (preen || (reply("CLEAR") == 1)) {
983 		need_parent = (corrupting == CLRI_NOP_OK) ?
984 		    TI_NOPARENT : TI_PARENT;
985 		freeino(idesc->id_number, need_parent);
986 		if (preen)
987 			(void) printf(" (CLEARED)\n");
988 		remove_orphan_dir(idesc->id_number);
989 	} else if (corrupting == CLRI_NOP_CORRUPT) {
990 		iscorrupt = 1;
991 	}
992 	(void) printf("\n");
993 }
994 
995 /*
996  * Find the directory entry for the inode noted in id_parent (which is
997  * not necessarily the parent of anything, we're just using a convenient
998  * field.
999  */
1000 int
1001 findname(struct inodesc *idesc)
1002 {
1003 	struct direct *dirp = idesc->id_dirp;
1004 
1005 	if (dirp->d_ino != idesc->id_parent)
1006 		return (KEEPON);
1007 	(void) memmove(idesc->id_name, dirp->d_name,
1008 	    MIN(dirp->d_namlen, MAXNAMLEN) + 1);
1009 	return (STOP|FOUND);
1010 }
1011 
1012 /*
1013  * Find the inode number associated with the given name.
1014  */
1015 int
1016 findino(struct inodesc *idesc)
1017 {
1018 	struct direct *dirp = idesc->id_dirp;
1019 
1020 	if (dirp->d_ino == 0)
1021 		return (KEEPON);
1022 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1023 	    dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
1024 		idesc->id_parent = dirp->d_ino;
1025 		return (STOP|FOUND);
1026 	}
1027 	return (KEEPON);
1028 }
1029 
1030 int
1031 cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1032 {
1033 	struct inodesc idesc;
1034 	struct dinode *dp;
1035 
1036 	dp = ginode(parentdir);
1037 	init_inodesc(&idesc);
1038 	idesc.id_func = clearanentry;
1039 	idesc.id_parent = target;
1040 	idesc.id_type = DATA;
1041 	idesc.id_fix = NOFIX;
1042 	return (ckinode(dp, &idesc, CKI_TRAVERSE));
1043 }
1044 
1045 static int
1046 clearanentry(struct inodesc *idesc)
1047 {
1048 	struct direct *dirp = idesc->id_dirp;
1049 
1050 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1051 		idesc->id_entryno++;
1052 		return (KEEPON);
1053 	}
1054 	dirp->d_ino = 0;
1055 	return (STOP|FOUND|ALTERED);
1056 }
1057 
1058 void
1059 pinode(fsck_ino_t ino)
1060 {
1061 	struct dinode *dp;
1062 
1063 	(void) printf(" I=%lu ", (ulong_t)ino);
1064 	if (ino < UFSROOTINO || ino > maxino)
1065 		return;
1066 	dp = ginode(ino);
1067 	pdinode(dp);
1068 }
1069 
1070 static void
1071 pdinode(struct dinode *dp)
1072 {
1073 	char *p;
1074 	struct passwd *pw;
1075 	time_t t;
1076 
1077 	(void) printf(" OWNER=");
1078 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
1079 		(void) printf("%s ", pw->pw_name);
1080 	else
1081 		(void) printf("%lu ", (ulong_t)dp->di_uid);
1082 	(void) printf("MODE=%o\n", dp->di_mode);
1083 	if (preen)
1084 		(void) printf("%s: ", devname);
1085 	(void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1086 
1087 	/* ctime() ignores LOCALE, so this is safe */
1088 	t = (time_t)dp->di_mtime;
1089 	p = ctime(&t);
1090 	(void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
1091 }
1092 
1093 void
1094 blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
1095 {
1096 	pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1097 	(void) printf("\n");
1098 
1099 	switch (statemap[ino] & ~INDELAYD) {
1100 
1101 	case FSTATE:
1102 	case FZLINK:
1103 		statemap[ino] = FCLEAR;
1104 		return;
1105 
1106 	case DFOUND:
1107 	case DSTATE:
1108 	case DZLINK:
1109 		statemap[ino] = DCLEAR;
1110 		add_orphan_dir(ino);
1111 		return;
1112 
1113 	case SSTATE:
1114 		statemap[ino] = SCLEAR;
1115 		return;
1116 
1117 	case FCLEAR:
1118 	case DCLEAR:
1119 	case SCLEAR:
1120 		return;
1121 
1122 	default:
1123 		errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
1124 		/* NOTREACHED */
1125 	}
1126 }
1127 
1128 /*
1129  * allocate an unused inode
1130  */
1131 fsck_ino_t
1132 allocino(fsck_ino_t request, int type)
1133 {
1134 	fsck_ino_t ino;
1135 	struct dinode *dp;
1136 	struct cg *cgp = &cgrp;
1137 	int cg;
1138 	time_t t;
1139 	caddr_t err;
1140 
1141 	if (debug && (request != 0) && (request != UFSROOTINO))
1142 		errexit("assertion failed: allocino() asked for "
1143 		    "inode %d instead of 0 or %d",
1144 		    (int)request, (int)UFSROOTINO);
1145 
1146 	/*
1147 	 * We know that we're only going to get requests for UFSROOTINO
1148 	 * or 0.  If UFSROOTINO is wanted, then it better be available
1149 	 * because our caller is trying to recreate the root directory.
1150 	 * If we're asked for 0, then which one we return doesn't matter.
1151 	 * We know that inodes 0 and 1 are never valid to return, so we
1152 	 * the start at the lowest-legal inode number.
1153 	 *
1154 	 * If we got a request for UFSROOTINO, then request != 0, and
1155 	 * this pair of conditionals is the only place that treats
1156 	 * UFSROOTINO specially.
1157 	 */
1158 	if (request == 0)
1159 		request = UFSROOTINO;
1160 	else if (statemap[request] != USTATE)
1161 		return (0);
1162 
1163 	/*
1164 	 * Doesn't do wrapping, since we know we started at
1165 	 * the smallest inode.
1166 	 */
1167 	for (ino = request; ino < maxino; ino++)
1168 		if (statemap[ino] == USTATE)
1169 			break;
1170 	if (ino == maxino)
1171 		return (0);
1172 
1173 	/*
1174 	 * In pass5, we'll calculate the bitmaps and counts all again from
1175 	 * scratch and do a comparison, but for that to work the cg has
1176 	 * to know what in-memory changes we've made to it.  If we have
1177 	 * trouble reading the cg, cg_sanity() should kick it out so
1178 	 * we can skip explicit i/o error checking here.
1179 	 */
1180 	cg = itog(&sblock, ino);
1181 	(void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1182 	err = cg_sanity(cgp, cg);
1183 	if (err != NULL) {
1184 		pfatal("CG %d: %s\n", cg, err);
1185 		free((void *)err);
1186 		if (reply("REPAIR") == 0)
1187 			errexit("Program terminated.");
1188 		fix_cg(cgp, cg);
1189 	}
1190 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1191 	cgp->cg_cs.cs_nifree--;
1192 	cgdirty();
1193 
1194 	if (lastino < ino)
1195 		lastino = ino;
1196 
1197 	/*
1198 	 * Don't currently support IFATTRDIR or any of the other
1199 	 * types, as they aren't needed.
1200 	 */
1201 	switch (type & IFMT) {
1202 	case IFDIR:
1203 		statemap[ino] = DSTATE;
1204 		cgp->cg_cs.cs_ndir++;
1205 		break;
1206 	case IFREG:
1207 	case IFLNK:
1208 		statemap[ino] = FSTATE;
1209 		break;
1210 	default:
1211 		/*
1212 		 * Pretend nothing ever happened.  This clears the
1213 		 * dirty flag, among other things.
1214 		 */
1215 		initbarea(&cgblk);
1216 		if (debug)
1217 			(void) printf("allocino: unknown type 0%o\n",
1218 			    type & IFMT);
1219 		return (0);
1220 	}
1221 
1222 	/*
1223 	 * We're allocating what should be a completely-unused inode,
1224 	 * so make sure we don't inherit anything from any previous
1225 	 * incarnations.
1226 	 */
1227 	dp = ginode(ino);
1228 	(void) memset((void *)dp, 0, sizeof (struct dinode));
1229 	dp->di_db[0] = allocblk(1);
1230 	if (dp->di_db[0] == 0) {
1231 		statemap[ino] = USTATE;
1232 		return (0);
1233 	}
1234 	dp->di_mode = (mode_t)type;
1235 	(void) time(&t);
1236 	dp->di_atime = (time32_t)t;
1237 	dp->di_ctime = dp->di_atime;
1238 	dp->di_mtime = dp->di_ctime;
1239 	dp->di_size = (u_offset_t)sblock.fs_fsize;
1240 	dp->di_blocks = btodb(sblock.fs_fsize);
1241 	n_files++;
1242 	inodirty();
1243 	return (ino);
1244 }
1245 
1246 /*
1247  * Release some or all of the blocks of an inode.
1248  * Only truncates down.  Assumes new_length is appropriately aligned
1249  * to a block boundary (or a directory block boundary, if it's a
1250  * directory).
1251  *
1252  * If this is a directory, discard all of its contents first, so
1253  * we don't create a bunch of orphans that would need another fsck
1254  * run to clean up.
1255  *
1256  * Even if truncating to zero length, the inode remains allocated.
1257  */
1258 void
1259 truncino(fsck_ino_t ino, offset_t new_length, int update)
1260 {
1261 	struct inodesc idesc;
1262 	struct inoinfo *iip;
1263 	struct dinode *dp;
1264 	fsck_ino_t parent;
1265 	mode_t mode;
1266 	caddr_t message;
1267 	int isdir, islink;
1268 	int ilevel, dblk;
1269 
1270 	dp = ginode(ino);
1271 	mode = (dp->di_mode & IFMT);
1272 	isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1273 	islink = (mode == IFLNK);
1274 
1275 	if (isdir) {
1276 		/*
1277 		 * Go with the parent we found by chasing references,
1278 		 * if we've gotten that far.  Otherwise, use what the
1279 		 * directory itself claims.  If there's no ``..'' entry
1280 		 * in it, give up trying to get the link counts right.
1281 		 */
1282 		if (update == TI_NOPARENT) {
1283 			parent = -1;
1284 		} else {
1285 			iip = getinoinfo(ino);
1286 			if (iip != NULL) {
1287 				parent = iip->i_parent;
1288 			} else {
1289 				parent = lookup_dotdot_ino(ino);
1290 				if (parent != 0) {
1291 					/*
1292 					 * Make sure that the claimed
1293 					 * parent actually has a
1294 					 * reference to us.
1295 					 */
1296 					dp = ginode(parent);
1297 					idesc.id_name = lfname;
1298 					idesc.id_type = DATA;
1299 					idesc.id_func = findino;
1300 					idesc.id_number = ino;
1301 					idesc.id_fix = DONTKNOW;
1302 					if ((ckinode(dp, &idesc,
1303 					    CKI_TRAVERSE) & FOUND) == 0)
1304 						parent = 0;
1305 				}
1306 			}
1307 		}
1308 
1309 		mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1310 		if (parent > 0) {
1311 			dp = ginode(parent);
1312 			LINK_RANGE(message, dp->di_nlink, -1);
1313 			if (message != NULL) {
1314 				LINK_CLEAR(message, parent, dp->di_mode,
1315 				    &idesc);
1316 				if (statemap[parent] == USTATE)
1317 					goto no_parent_update;
1318 			}
1319 			TRACK_LNCNTP(parent, lncntp[parent]--);
1320 		} else if ((mode == IFDIR) && (parent == 0)) {
1321 			/*
1322 			 * Currently don't have a good way to
1323 			 * handle this, so throw up our hands.
1324 			 * However, we know that we can still
1325 			 * do some good if we continue, so
1326 			 * don't actually exit yet.
1327 			 *
1328 			 * We don't do it for attrdirs,
1329 			 * because there aren't link counts
1330 			 * between them and their parents.
1331 			 */
1332 			pwarn("Could not determine former parent of "
1333 			    "inode %d, link counts are possibly\n"
1334 			    "incorrect.  Please rerun fsck(8) to "
1335 			    "correct this.\n",
1336 			    ino);
1337 			iscorrupt = 1;
1338 		}
1339 		/*
1340 		 * ...else if it's a directory with parent == -1, then
1341 		 * we've not gotten far enough to know connectivity,
1342 		 * and it'll get handled automatically later.
1343 		 */
1344 	}
1345 
1346 no_parent_update:
1347 	init_inodesc(&idesc);
1348 	idesc.id_type = ADDR;
1349 	idesc.id_func = pass4check;
1350 	idesc.id_number = ino;
1351 	idesc.id_fix = DONTKNOW;
1352 	idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1353 	dp = ginode(ino);
1354 	if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1355 		inodirty();
1356 
1357 	/*
1358 	 * This has to be done after ckinode(), so that all of
1359 	 * the fragments get visited.  Note that we assume we're
1360 	 * always truncating to a block boundary, rather than a
1361 	 * fragment boundary.
1362 	 */
1363 	dp = ginode(ino);
1364 	dp->di_size = new_length;
1365 
1366 	/*
1367 	 * Clear now-obsolete pointers.
1368 	 */
1369 	for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1370 		dp->di_db[dblk] = 0;
1371 	}
1372 
1373 	ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1374 	for (ilevel++; ilevel < NIADDR; ilevel++) {
1375 		dp->di_ib[ilevel] = 0;
1376 	}
1377 
1378 	inodirty();
1379 }
1380 
1381 /*
1382  * Release an inode's resources, then release the inode itself.
1383  */
1384 void
1385 freeino(fsck_ino_t ino, int update_parent)
1386 {
1387 	int cg;
1388 	struct dinode *dp;
1389 	struct cg *cgp;
1390 
1391 	n_files--;
1392 	dp = ginode(ino);
1393 	/*
1394 	 * We need to make sure that the file is really a large file.
1395 	 * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
1396 	 * negative size, which shall be cleared. (see verify_inode() in
1397 	 * pass1.c)
1398 	 */
1399 	if (dp->di_size > (u_offset_t)MAXOFF_T &&
1400 	    dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
1401 	    ftypeok(dp) &&
1402 	    (dp->di_mode & IFMT) != IFBLK &&
1403 	    (dp->di_mode & IFMT) != IFCHR) {
1404 		largefile_count--;
1405 	}
1406 	truncino(ino, 0, update_parent);
1407 
1408 	dp = ginode(ino);
1409 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
1410 		clearshadow(ino, &attrclientinfo);
1411 		dp = ginode(ino);
1412 	}
1413 
1414 	clearinode(dp);
1415 	inodirty();
1416 	statemap[ino] = USTATE;
1417 
1418 	/*
1419 	 * Keep the disk in sync with us so that pass5 doesn't get
1420 	 * upset about spurious inconsistencies.
1421 	 */
1422 	cg = itog(&sblock, ino);
1423 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1424 	    (size_t)sblock.fs_cgsize);
1425 	cgp = cgblk.b_un.b_cg;
1426 	clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1427 	cgp->cg_cs.cs_nifree += 1;
1428 	cgdirty();
1429 	sblock.fs_cstotal.cs_nifree += 1;
1430 	sbdirty();
1431 }
1432 
1433 void
1434 init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1435 {
1436 	inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1437 	inp->i_dotdot = (fsck_ino_t)0;
1438 	inp->i_isize = (offset_t)dp->di_size;
1439 	inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1440 	inp->i_extattr = dp->di_oeftflag;
1441 	(void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1442 	    inp->i_blkssize);
1443 }
1444 
1445 /*
1446  * Return the inode number in the ".." entry of the provided
1447  * directory inode.
1448  */
1449 static int
1450 lookup_dotdot_ino(fsck_ino_t ino)
1451 {
1452 	struct inodesc idesc;
1453 
1454 	init_inodesc(&idesc);
1455 	idesc.id_type = DATA;
1456 	idesc.id_func = findino;
1457 	idesc.id_name = "..";
1458 	idesc.id_number = ino;
1459 	idesc.id_fix = NOFIX;
1460 
1461 	if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1462 		return (idesc.id_parent);
1463 	}
1464 
1465 	return (0);
1466 }
1467 
1468 /*
1469  * Convenience wrapper around ckinode(findino()).
1470  */
1471 int
1472 lookup_named_ino(fsck_ino_t dir, caddr_t name)
1473 {
1474 	struct inodesc idesc;
1475 
1476 	init_inodesc(&idesc);
1477 	idesc.id_type = DATA;
1478 	idesc.id_func = findino;
1479 	idesc.id_name = name;
1480 	idesc.id_number = dir;
1481 	idesc.id_fix = NOFIX;
1482 
1483 	if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1484 		return (idesc.id_parent);
1485 	}
1486 
1487 	return (0);
1488 }
1489 
1490 /*
1491  * Marks inodes that are being orphaned and might need to be reconnected
1492  * by pass4().  The inode we're traversing is the directory whose
1493  * contents will be reconnected later.  id_parent is the lfn at which
1494  * to start looking at said contents.
1495  */
1496 static int
1497 mark_a_delayed_inode(struct inodesc *idesc)
1498 {
1499 	struct direct *dirp = idesc->id_dirp;
1500 
1501 	if (idesc->id_lbn < idesc->id_parent) {
1502 		return (KEEPON);
1503 	}
1504 
1505 	if (dirp->d_ino != 0 &&
1506 	    strcmp(dirp->d_name, ".") != 0 &&
1507 	    strcmp(dirp->d_name, "..") != 0) {
1508 		statemap[dirp->d_ino] &= ~INFOUND;
1509 		statemap[dirp->d_ino] |= INDELAYD;
1510 	}
1511 
1512 	return (KEEPON);
1513 }
1514 
1515 static void
1516 mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1517 {
1518 	struct dinode *dp;
1519 	struct inodesc idelayed;
1520 
1521 	init_inodesc(&idelayed);
1522 	idelayed.id_number = ino;
1523 	idelayed.id_type = DATA;
1524 	idelayed.id_fix = NOFIX;
1525 	idelayed.id_func = mark_a_delayed_inode;
1526 	idelayed.id_parent = first_lfn;
1527 	idelayed.id_entryno = 2;
1528 
1529 	dp = ginode(ino);
1530 	(void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1531 }
1532 
1533 /*
1534  * Clear the i_oeftflag/extended attribute pointer from INO.
1535  */
1536 void
1537 clearattrref(fsck_ino_t ino)
1538 {
1539 	struct dinode *dp;
1540 
1541 	dp = ginode(ino);
1542 	if (debug) {
1543 		if (dp->di_oeftflag == 0)
1544 			(void) printf("clearattref: no attr to clear on %d\n",
1545 			    ino);
1546 	}
1547 
1548 	dp->di_oeftflag = 0;
1549 	inodirty();
1550 }
1551