xref: /illumos-gate/usr/src/cmd/fs.d/ufs/fsck/inode.c (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1 /*
2  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
6 /*	  All Rights Reserved  	*/
7 
8 /*
9  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms are permitted
13  * provided that: (1) source distributions retain this entire copyright
14  * notice and comment, and (2) distributions including binaries display
15  * the following acknowledgement:  ``This product includes software
16  * developed by the University of California, Berkeley and its contributors''
17  * in the documentation or other materials provided with the distribution
18  * and in all advertising materials mentioning features or use of this
19  * software. Neither the name of the University nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25  */
26 
27 
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <time.h>
33 #include <limits.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/sysmacros.h>
37 #include <sys/mntent.h>
38 #include <sys/vnode.h>
39 #include <sys/fs/ufs_inode.h>
40 #include <sys/fs/ufs_fs.h>
41 #define	_KERNEL
42 #include <sys/fs/ufs_fsdir.h>
43 #undef _KERNEL
44 #include <pwd.h>
45 #include "fsck.h"
46 
47 static int get_indir_offsets(int, daddr_t, int *, int *);
48 static int clearanentry(struct inodesc *);
49 static void pdinode(struct dinode *);
50 static void inoflush(void);
51 static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
52 static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
53 static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
54 static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
55 static int lookup_dotdot_ino(fsck_ino_t);
56 
57 /*
58  * ckinode() essentially traverses the blocklist of the provided
59  * inode.  For each block either the caller-supplied callback (id_func
60  * in the provided struct inodesc) or dirscan() is invoked.  Which is
61  * chosen is controlled by what type of traversal was requested
62  * (id_type) - if it was for an ADDR or ACL, use the callback,
63  * otherwise it is assumed to be DATA (i.e., a directory) whose
64  * contents need to be scanned.
65  *
66  * Note that a directory inode can get passed in with a type of ADDR;
67  * the type field is orthogonal to the IFMT value.  This is so that
68  * the file aspects (no duplicate blocks, etc) of a directory can be
69  * verified just like is done for any other file, or the actual
70  * contents can be scanned so that connectivity and such can be
71  * investigated.
72  *
73  * The traversal is controlled by flags in the return value of
74  * dirscan() or the callback.  Five flags are defined, STOP, SKIP,
75  * KEEPON, ALTERED, and FOUND.  Their semantics are:
76  *
77  *     STOP -    no further processing of this inode is desired/possible/
78  *               feasible/etc.  This can mean that whatever the scan
79  *               was searching for was found, or a serious
80  *               inconsistency was encountered, or anything else
81  *               appropriate.
82  *
83  *     SKIP -    something that made it impossible to continue was
84  *               encountered, and the caller should go on to the next
85  *               inode.  This is more for i/o failures than for
86  *               logical inconsistencies.  Nothing actually looks for
87  *               this.
88  *
89  *     KEEPON -  no more blocks of this inode need to be scanned, but
90  *               nothing's wrong, so keep on going with the next
91  *               inode.  It is similar to STOP, except that
92  *               ckinode()'s caller will typically advance to the next
93  *               inode for KEEPON, whereas it ceases scanning through
94  *               the inodes completely for STOP.
95  *
96  *     ALTERED - a change was made to the inode.  If the caller sees
97  *               this set, it should make sure to flush out the
98  *               changes.  Note that any data blocks read in by the
99  *               function need to be marked dirty by it directly;
100  *               flushing of those will happen automatically later.
101  *
102  *     FOUND -   whatever was being searched for was located.
103  *               Typically combined with STOP to avoid wasting time
104  *               doing additional looking.
105  *
106  * During a traversal, some state needs to be carried around.  At the
107  * least, the callback functions need to know what inode they're
108  * working on, which logical block, and whether or not fixing problems
109  * when they're encountered is desired.  Rather than try to guess what
110  * else might be needed (and thus end up passing way more arguments
111  * than is reasonable), all the possibilities have been bundled in
112  * struct inodesc.  About half of the fields are specific to directory
113  * traversals, and the rest are pretty much generic to any traversal.
114  *
115  * The general fields are:
116  *
117  *     id_fix        What to do when an error is found.  Generally, this
118  *                   is set to DONTKNOW before a traversal.  If a
119  *                   problem is encountered, it is changed to either FIX
120  *                   or NOFIX by the dofix() query function.  If id_fix
121  *                   has already been set to FIX when dofix() is called, then
122  *                   it includes the ALTERED flag (see above) in its return
123  *                   value; the net effect is that the inode's buffer
124  *                   will get marked dirty and written to disk at some
125  *                   point.  If id_fix is DONTKNOW, then dofix() will
126  *                   query the user.  If it is NOFIX, then dofix()
127  *                   essentially does nothing.  A few routines set NOFIX
128  *                   as the initial value, as they are performing a best-
129  *                   effort informational task, rather than an actual
130  *                   repair operation.
131  *
132  *     id_func       This is the function that will be called for every
133  *                   logical block in the file (assuming id_type is not
134  *                   DATA).  The logical block may represent a hole, so
135  *                   the callback needs to be prepared to handle that
136  *                   case.  Its return value is a combination of the flags
137  *                   described above (SKIP, ALTERED, etc).
138  *
139  *     id_number     The inode number whose block list or data is being
140  *                   scanned.
141  *
142  *     id_parent     When id_type is DATA, this is the inode number for
143  *                   the parent of id_number.  Otherwise, it is
144  *                   available for use as an extra parameter or return
145  *                   value between the callback and ckinode()'s caller.
146  *                   Which, if either, of those is left completely up to
147  *                   the two routines involved, so nothing can generally
148  *                   be assumed about the id_parent value for non-DATA
149  *                   traversals.
150  *
151  *     id_lbn        This is the current logical block (not fragment)
152  *                   number being visited by the traversal.
153  *
154  *     id_blkno      This is the physical block corresponding to id_lbn.
155  *
156  *     id_numfrags   This defines how large a block is being processed in
157  *                   this particular invocation of the callback.
158  *                   Usually, it will be the same as sblock.fs_frag.
159  *                   However, if a direct block is being processed and
160  *                   it is less than a full filesystem block,
161  *                   id_numfrags will indicate just how many fragments
162  *                   (starting from id_lbn) are actually part of the
163  *                   file.
164  *
165  *     id_truncto    The pass 4 callback is used in several places to
166  *                   free the blocks of a file (the `FILE HAS PROBLEM
167  *                   FOO; CLEAR?' scenario).  This has been generalized
168  *                   to allow truncating a file to a particular length
169  *                   rather than always completely discarding it.  If
170  *                   id_truncto is -1, then the entire file is released,
171  *                   otherwise it is logical block number to truncate
172  *                   to.  This generalized interface was motivated by a
173  *                   desire to be able to discard everything after a
174  *                   hole in a directory, rather than the entire
175  *                   directory.
176  *
177  *     id_type       Selects the type of traversal.  DATA for dirscan(),
178  *                   ADDR or ACL for using the provided callback.
179  *
180  * There are several more fields used just for dirscan() traversals:
181  *
182  *     id_filesize   The number of bytes in the overall directory left to
183  *                   process.
184  *
185  *     id_loc        Byte position within the directory block.  Should always
186  *                   point to the start of a directory entry.
187  *
188  *     id_entryno    Which logical directory entry is being processed (0
189  *                   is `.', 1 is `..', 2 and on are normal entries).
190  *                   This field is primarily used to enable special
191  *                   checks when looking at the first two entries.
192  *
193  *                   The exception (there's always an exception in fsck)
194  *                   is that in pass 1, it tracks how many fragments are
195  *                   being used by a particular inode.
196  *
197  *     id_firsthole  The first logical block number that was found to
198  *                   be zero.  As directories are not supposed to have
199  *                   holes, this marks where a directory should be
200  *                   truncated down to.  A value of -1 indicates that
201  *                   no holes were found.
202  *
203  *     id_dirp       A pointer to the in-memory copy of the current
204  *                   directory entry (as identified by id_loc).
205  *
206  *     id_name       This is a directory entry name to either create
207  *                   (callback is mkentry) or locate (callback is
208  *                   chgino, findino, or findname).
209  */
210 int
211 ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
212 {
213 	struct inodesc cleardesc;
214 	mode_t	mode;
215 
216 	if (idesc->id_filesize == 0)
217 		idesc->id_filesize = (offset_t)dp->di_size;
218 
219 	/*
220 	 * Our caller should be filtering out completely-free inodes
221 	 * (mode == zero), so we'll work on the assumption that what
222 	 * we're given has some basic validity.
223 	 *
224 	 * The kernel is inconsistent about MAXPATHLEN including the
225 	 * trailing \0, so allow the more-generous length for symlinks.
226 	 */
227 	mode = dp->di_mode & IFMT;
228 	if (mode == IFBLK || mode == IFCHR)
229 		return (KEEPON);
230 	if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
231 		pwarn("I=%d  Symlink longer than supported maximum\n",
232 		    idesc->id_number);
233 		init_inodesc(&cleardesc);
234 		cleardesc.id_type = ADDR;
235 		cleardesc.id_number = idesc->id_number;
236 		cleardesc.id_fix = DONTKNOW;
237 		clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
238 		return (STOP);
239 	}
240 	return (ckinode_common(dp, idesc, action));
241 }
242 
243 /*
244  * This was split out from ckinode() to allow it to be used
245  * without having to pass in kludge flags to suppress the
246  * wrong-for-deletion initialization and irrelevant checks.
247  * This feature is no longer needed, but is being kept in case
248  * the need comes back.
249  */
250 static int
251 ckinode_common(struct dinode *dp, struct inodesc *idesc,
252 	enum cki_action action)
253 {
254 	offset_t offset;
255 	struct dinode dino;
256 	daddr_t ndb;
257 	int indir_data_blks, last_indir_blk;
258 	int ret, i, frags;
259 
260 	(void) memmove(&dino, dp, sizeof (struct dinode));
261 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
262 
263 	for (i = 0; i < NDADDR; i++) {
264 		idesc->id_lbn++;
265 		offset = blkoff(&sblock, dino.di_size);
266 		if ((--ndb == 0) && (offset != 0)) {
267 			idesc->id_numfrags =
268 			    numfrags(&sblock, fragroundup(&sblock, offset));
269 		} else {
270 			idesc->id_numfrags = sblock.fs_frag;
271 		}
272 		if (dino.di_db[i] == 0) {
273 			if ((ndb > 0) && (idesc->id_firsthole < 0)) {
274 				idesc->id_firsthole = i;
275 			}
276 			continue;
277 		}
278 		idesc->id_blkno = dino.di_db[i];
279 		if (idesc->id_type == ADDR || idesc->id_type == ACL)
280 			ret = (*idesc->id_func)(idesc);
281 		else
282 			ret = dirscan(idesc);
283 
284 		/*
285 		 * Need to clear the entry, now that we're done with
286 		 * it.  We depend on freeblk() ignoring a request to
287 		 * free already-free fragments to handle the problem of
288 		 * a partial block.
289 		 */
290 		if ((action == CKI_TRUNCATE) &&
291 		    (idesc->id_truncto >= 0) &&
292 		    (idesc->id_lbn >= idesc->id_truncto)) {
293 			dp = ginode(idesc->id_number);
294 			/*
295 			 * The (int) cast is safe, in that if di_size won't
296 			 * fit, it'll be a multiple of any legal fs_frag,
297 			 * thus giving a zero result.  That value, in turn
298 			 * means we're doing an entire block.
299 			 */
300 			frags = howmany((int)dp->di_size, sblock.fs_fsize) %
301 			    sblock.fs_frag;
302 			if (frags == 0)
303 				frags = sblock.fs_frag;
304 			freeblk(idesc->id_number, dp->di_db[i],
305 			    frags);
306 			dp = ginode(idesc->id_number);
307 			dp->di_db[i] = 0;
308 			inodirty();
309 			ret |= ALTERED;
310 		}
311 
312 		if (ret & STOP)
313 			return (ret);
314 	}
315 
316 #ifdef lint
317 	/*
318 	 * Cure a lint complaint of ``possible use before set''.
319 	 * Apparently it can't quite figure out the switch statement.
320 	 */
321 	indir_data_blks = 0;
322 #endif
323 	/*
324 	 * indir_data_blks contains the number of data blocks in all
325 	 * the previous levels for this iteration.  E.g., for the
326 	 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
327 	 * of blocks have already been covered by the direct blocks
328 	 * (di_db[]).  At the triple indirect level (i = NIADDR - 1),
329 	 * it is all of the number of data blocks that were covered
330 	 * by the second indirect, single indirect, and direct block
331 	 * levels.
332 	 */
333 	idesc->id_numfrags = sblock.fs_frag;
334 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
335 	for (i = 0; i < NIADDR; i++) {
336 		(void) get_indir_offsets(i, ndb, &indir_data_blks,
337 		    &last_indir_blk);
338 		if (dino.di_ib[i] != 0) {
339 			/*
340 			 * We'll only clear di_ib[i] if the first entry (and
341 			 * therefore all of them) is to be cleared, since we
342 			 * only go through this code on the first entry of
343 			 * each level of indirection.  The +1 is to account
344 			 * for the fact that we don't modify id_lbn until
345 			 * we actually start processing on a data block.
346 			 */
347 			idesc->id_blkno = dino.di_ib[i];
348 			ret = iblock(idesc, i + 1,
349 			    (u_offset_t)howmany(dino.di_size,
350 			    (u_offset_t)sblock.fs_bsize) - indir_data_blks,
351 			    action);
352 			if ((action == CKI_TRUNCATE) &&
353 			    (idesc->id_truncto <= indir_data_blks) &&
354 			    ((idesc->id_lbn + 1) >= indir_data_blks) &&
355 			    ((idesc->id_lbn + 1) <= last_indir_blk)) {
356 				dp = ginode(idesc->id_number);
357 				if (dp->di_ib[i] != 0) {
358 					freeblk(idesc->id_number, dp->di_ib[i],
359 					    sblock.fs_frag);
360 				}
361 			}
362 			if (ret & STOP)
363 				return (ret);
364 		} else {
365 			/*
366 			 * Need to know which of the file's logical blocks
367 			 * reside in the missing indirect block.  However, the
368 			 * precise location is only needed for truncating
369 			 * directories, and level-of-indirection precision is
370 			 * sufficient for that.
371 			 */
372 			if ((indir_data_blks < ndb) &&
373 			    (idesc->id_firsthole < 0)) {
374 				idesc->id_firsthole = indir_data_blks;
375 			}
376 		}
377 	}
378 	return (KEEPON);
379 }
380 
381 static int
382 get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
383 	int *last_blk)
384 {
385 	int ndb_ilevel = -1;
386 	int ilevel;
387 	int dblks, lblk;
388 
389 	for (ilevel = 0; ilevel < NIADDR; ilevel++) {
390 		switch (ilevel) {
391 		case 0:	/* SINGLE */
392 			dblks = NDADDR;
393 			lblk = dblks + NINDIR(&sblock) - 1;
394 			break;
395 		case 1:	/* DOUBLE */
396 			dblks = NDADDR + NINDIR(&sblock);
397 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
398 			break;
399 		case 2:	/* TRIPLE */
400 			dblks = NDADDR + NINDIR(&sblock) +
401 			    (NINDIR(&sblock) * NINDIR(&sblock));
402 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
403 			    NINDIR(&sblock)) - 1;
404 			break;
405 		default:
406 			exitstat = EXERRFATAL;
407 			/*
408 			 * Translate from zero-based array to
409 			 * one-based human-style counting.
410 			 */
411 			errexit("panic: indirection level %d not 1, 2, or 3",
412 			    ilevel + 1);
413 			/* NOTREACHED */
414 		}
415 
416 		if (dblks < ndb && ndb <= lblk)
417 			ndb_ilevel = ilevel;
418 
419 		if (ilevel == ilevel_wanted) {
420 			if (data_blks != NULL)
421 				*data_blks = dblks;
422 			if (last_blk != NULL)
423 				*last_blk = lblk;
424 		}
425 	}
426 
427 	return (ndb_ilevel);
428 }
429 
430 static int
431 iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
432 	enum cki_action action)
433 {
434 	struct bufarea *bp;
435 	int i, n;
436 	int (*func)(struct inodesc *) = NULL;
437 	u_offset_t fsbperindirb;
438 	daddr32_t last_lbn;
439 	int nif;
440 	char buf[BUFSIZ];
441 
442 	n = KEEPON;
443 
444 	switch (idesc->id_type) {
445 	case ADDR:
446 		func = idesc->id_func;
447 		if (((n = (*func)(idesc)) & KEEPON) == 0)
448 				return (n);
449 		break;
450 	case ACL:
451 		func = idesc->id_func;
452 		break;
453 	case DATA:
454 		func = dirscan;
455 		break;
456 	default:
457 		errexit("unknown inodesc type %d in iblock()", idesc->id_type);
458 		/* NOTREACHED */
459 	}
460 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
461 		return ((idesc->id_type == ACL) ? STOP : SKIP);
462 	}
463 
464 	bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
465 	if (bp->b_errs != 0) {
466 		brelse(bp);
467 		return (SKIP);
468 	}
469 
470 	ilevel--;
471 	/*
472 	 * Trivia note: the BSD fsck has the number of bytes remaining
473 	 * as the third argument to iblock(), so the equivalent of
474 	 * fsbperindirb starts at fs_bsize instead of one.  We're
475 	 * working in units of filesystem blocks here, not bytes or
476 	 * fragments.
477 	 */
478 	for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
479 		fsbperindirb *= (u_offset_t)NINDIR(&sblock);
480 	}
481 	/*
482 	 * nif indicates the next "free" pointer (as an array index) in this
483 	 * indirect block, based on counting the blocks remaining in the
484 	 * file after subtracting all previously processed blocks.
485 	 * This figure is based on the size field of the inode.
486 	 *
487 	 * Note that in normal operation, nif may initially be calculated
488 	 * as larger than the number of pointers in this block (as when
489 	 * there are more indirect blocks following); if that is
490 	 * the case, nif is limited to the max number of pointers per
491 	 * indirect block.
492 	 *
493 	 * Also note that if an inode is inconsistent (has more blocks
494 	 * allocated to it than the size field would indicate), the sweep
495 	 * through any indirect blocks directly pointed at by the inode
496 	 * continues. Since the block offset of any data blocks referenced
497 	 * by these indirect blocks is greater than the size of the file,
498 	 * the index nif may be computed as a negative value.
499 	 * In this case, we reset nif to indicate that all pointers in
500 	 * this retrieval block should be zeroed and the resulting
501 	 * unreferenced data and/or retrieval blocks will be recovered
502 	 * through garbage collection later.
503 	 */
504 	nif = (offset_t)howmany(iblks, fsbperindirb);
505 	if (nif > NINDIR(&sblock))
506 		nif = NINDIR(&sblock);
507 	else if (nif < 0)
508 		nif = 0;
509 	/*
510 	 * first pass: all "free" retrieval pointers (from [nif] thru
511 	 * 	the end of the indirect block) should be zero. (This
512 	 *	assertion does not hold for directories, which may be
513 	 *	truncated without releasing their allocated space)
514 	 */
515 	if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
516 	    idesc->id_func == pass3bcheck)) {
517 		for (i = nif; i < NINDIR(&sblock); i++) {
518 			if (bp->b_un.b_indir[i] == 0)
519 				continue;
520 			(void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
521 			    (ulong_t)idesc->id_number);
522 			if (preen) {
523 				pfatal(buf);
524 			} else if (dofix(idesc, buf)) {
525 				freeblk(idesc->id_number,
526 				    bp->b_un.b_indir[i],
527 				    sblock.fs_frag);
528 				bp->b_un.b_indir[i] = 0;
529 				dirty(bp);
530 			}
531 		}
532 		flush(fswritefd, bp);
533 	}
534 	/*
535 	 * second pass: all retrieval pointers referring to blocks within
536 	 *	a valid range [0..filesize] (both indirect and data blocks)
537 	 *	are examined in the same manner as ckinode() checks the
538 	 *	direct blocks in the inode.  Sweep through from
539 	 *	the first pointer in this retrieval block to [nif-1].
540 	 */
541 	last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
542 	for (i = 0; i < nif; i++) {
543 		if (ilevel == 0)
544 			idesc->id_lbn++;
545 		if (bp->b_un.b_indir[i] != 0) {
546 			idesc->id_blkno = bp->b_un.b_indir[i];
547 			if (ilevel > 0) {
548 				n = iblock(idesc, ilevel, iblks, action);
549 				/*
550 				 * Each iteration decreases "remaining block
551 				 * count" by the number of blocks accessible
552 				 * by a pointer at this indirect block level.
553 				 */
554 				iblks -= fsbperindirb;
555 			} else {
556 				/*
557 				 * If we're truncating, func will discard
558 				 * the data block for us.
559 				 */
560 				n = (*func)(idesc);
561 			}
562 
563 			if ((action == CKI_TRUNCATE) &&
564 			    (idesc->id_truncto >= 0) &&
565 			    (idesc->id_lbn >= idesc->id_truncto)) {
566 				freeblk(idesc->id_number,  bp->b_un.b_indir[i],
567 				    sblock.fs_frag);
568 			}
569 
570 			/*
571 			 * Note that truncation never gets STOP back
572 			 * under normal circumstances.  Abnormal would
573 			 * be a bad acl short-circuit in iblock() or
574 			 * an out-of-range failure in pass4check().
575 			 * We still want to keep going when truncating
576 			 * under those circumstances, since the whole
577 			 * point of truncating is to get rid of all
578 			 * that.
579 			 */
580 			if ((n & STOP) && (action != CKI_TRUNCATE)) {
581 				brelse(bp);
582 				return (n);
583 			}
584 		} else {
585 			if ((idesc->id_lbn < last_lbn) &&
586 			    (idesc->id_firsthole < 0)) {
587 				idesc->id_firsthole = idesc->id_lbn;
588 			}
589 			if (idesc->id_type == DATA) {
590 				/*
591 				 * No point in continuing in the indirect
592 				 * blocks of a directory, since they'll just
593 				 * get freed anyway.
594 				 */
595 				brelse(bp);
596 				return ((n & ~KEEPON) | STOP);
597 			}
598 		}
599 	}
600 
601 	brelse(bp);
602 	return (KEEPON);
603 }
604 
605 /*
606  * Check that a block is a legal block number.
607  * Return 0 if in range, 1 if out of range.
608  */
609 int
610 chkrange(daddr32_t blk, int cnt)
611 {
612 	int c;
613 
614 	if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
615 	    ((cnt - 1) > (maxfsblock - blk))) {
616 		if (debug)
617 			(void) printf(
618 			    "Bad fragment range: should be 1 <= %d..%d < %d\n",
619 			    blk, blk + cnt, maxfsblock);
620 		return (1);
621 	}
622 	if ((cnt > sblock.fs_frag) ||
623 	    ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
624 		if (debug)
625 			(void) printf("Bad fragment size: size %d\n", cnt);
626 		return (1);
627 	}
628 	c = dtog(&sblock, blk);
629 	if (blk < cgdmin(&sblock, c)) {
630 		if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
631 			if (debug)
632 				(void) printf(
633 	    "Bad fragment position: %d..%d spans start of cg metadata\n",
634 				    blk, blk + cnt);
635 			return (1);
636 		}
637 	} else {
638 		if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
639 			if (debug)
640 				(void) printf(
641 				    "Bad frag pos: %d..%d crosses end of cg\n",
642 				    blk, blk + cnt);
643 			return (1);
644 		}
645 	}
646 	return (0);
647 }
648 
649 /*
650  * General purpose interface for reading inodes.
651  */
652 
653 /*
654  * Note that any call to ginode() can potentially invalidate any
655  * dinode pointers previously acquired from it.  To avoid pain,
656  * make sure to always call inodirty() immediately after modifying
657  * an inode, if there's any chance of ginode() being called after
658  * that.  Also, always call ginode() right before you need to access
659  * an inode, so that there won't be any surprises from functions
660  * called between the previous ginode() invocation and the dinode
661  * use.
662  *
663  * Despite all that, we aren't doing the amount of i/o that's implied,
664  * as we use the buffer cache that getdatablk() and friends maintain.
665  */
666 static fsck_ino_t startinum = -1;
667 
668 struct dinode *
669 ginode(fsck_ino_t inum)
670 {
671 	daddr32_t iblk;
672 	struct dinode *dp;
673 
674 	if (inum < UFSROOTINO || inum > maxino) {
675 		errexit("bad inode number %d to ginode\n", inum);
676 	}
677 	if (startinum == -1 ||
678 	    pbp == NULL ||
679 	    inum < startinum ||
680 	    inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
681 		iblk = itod(&sblock, inum);
682 		if (pbp != NULL) {
683 			brelse(pbp);
684 		}
685 		/*
686 		 * We don't check for errors here, because we can't
687 		 * tell our caller about it, and the zeros that will
688 		 * be in the buffer are just as good as anything we
689 		 * could fake.
690 		 */
691 		pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
692 		startinum =
693 		    (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
694 	}
695 	dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
696 	if (dp->di_suid != UID_LONG)
697 		dp->di_uid = dp->di_suid;
698 	if (dp->di_sgid != GID_LONG)
699 		dp->di_gid = dp->di_sgid;
700 	return (dp);
701 }
702 
703 /*
704  * Special purpose version of ginode used to optimize first pass
705  * over all the inodes in numerical order.  It bypasses the buffer
706  * system used by ginode(), etc in favour of reading the bulk of a
707  * cg's inodes at one time.
708  */
709 static fsck_ino_t nextino, lastinum;
710 static int64_t readcnt, readpercg, fullcnt, inobufsize;
711 static int64_t partialcnt, partialsize;
712 static size_t lastsize;
713 static struct dinode *inodebuf;
714 static diskaddr_t currentdblk;
715 static struct dinode *currentinode;
716 
717 struct dinode *
718 getnextinode(fsck_ino_t inum)
719 {
720 	size_t size;
721 	diskaddr_t dblk;
722 	static struct dinode *dp;
723 
724 	if (inum != nextino++ || inum > maxino)
725 		errexit("bad inode number %d to nextinode\n", inum);
726 
727 	/*
728 	 * Will always go into the if() the first time we're called,
729 	 * so dp will always be valid.
730 	 */
731 	if (inum >= lastinum) {
732 		readcnt++;
733 		dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
734 		currentdblk = dblk;
735 		if (readcnt % readpercg == 0) {
736 			if (partialsize > SIZE_MAX)
737 				errexit(
738 				    "Internal error: partialsize overflow");
739 			size = (size_t)partialsize;
740 			lastinum += partialcnt;
741 		} else {
742 			if (inobufsize > SIZE_MAX)
743 				errexit("Internal error: inobufsize overflow");
744 			size = (size_t)inobufsize;
745 			lastinum += fullcnt;
746 		}
747 		/*
748 		 * If fsck_bread() returns an error, it will already have
749 		 * zeroed out the buffer, so we do not need to do so here.
750 		 */
751 		(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
752 		lastsize = size;
753 		dp = inodebuf;
754 	}
755 	currentinode = dp;
756 	return (dp++);
757 }
758 
759 /*
760  * Reread the current getnext() buffer.  This allows for changing inodes
761  * other than the current one via ginode()/inodirty()/inoflush().
762  *
763  * Just reuses all the interesting variables that getnextinode() set up
764  * last time it was called.  This shouldn't get called often, so we don't
765  * try to figure out if the caller's actually touched an inode in the
766  * range we have cached.  There could have been an arbitrary number of
767  * them, after all.
768  */
769 struct dinode *
770 getnextrefresh(void)
771 {
772 	if (inodebuf == NULL) {
773 		return (NULL);
774 	}
775 
776 	inoflush();
777 	(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
778 	return (currentinode);
779 }
780 
781 void
782 resetinodebuf(void)
783 {
784 	startinum = 0;
785 	nextino = 0;
786 	lastinum = 0;
787 	readcnt = 0;
788 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
789 	fullcnt = inobufsize / sizeof (struct dinode);
790 	readpercg = sblock.fs_ipg / fullcnt;
791 	partialcnt = sblock.fs_ipg % fullcnt;
792 	partialsize = partialcnt * sizeof (struct dinode);
793 	if (partialcnt != 0) {
794 		readpercg++;
795 	} else {
796 		partialcnt = fullcnt;
797 		partialsize = inobufsize;
798 	}
799 	if (inodebuf == NULL &&
800 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
801 		errexit("Cannot allocate space for inode buffer\n");
802 	while (nextino < UFSROOTINO)
803 		(void) getnextinode(nextino);
804 }
805 
806 void
807 freeinodebuf(void)
808 {
809 	if (inodebuf != NULL) {
810 		free((void *)inodebuf);
811 	}
812 	inodebuf = NULL;
813 }
814 
815 /*
816  * Routines to maintain information about directory inodes.
817  * This is built during the first pass and used during the
818  * second and third passes.
819  *
820  * Enter inodes into the cache.
821  */
822 void
823 cacheino(struct dinode *dp, fsck_ino_t inum)
824 {
825 	struct inoinfo *inp;
826 	struct inoinfo **inpp;
827 	uint_t blks;
828 
829 	blks = NDADDR + NIADDR;
830 	inp = (struct inoinfo *)
831 	    malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
832 	if (inp == NULL)
833 		errexit("Cannot increase directory list\n");
834 	init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
835 	inpp = &inphead[inum % numdirs];
836 	inp->i_nextlist = *inpp;
837 	*inpp = inp;
838 	inp->i_number = inum;
839 	if (inplast == listmax) {
840 		listmax += 100;
841 		inpsort = (struct inoinfo **)realloc((void *)inpsort,
842 		    (unsigned)listmax * sizeof (struct inoinfo *));
843 		if (inpsort == NULL)
844 			errexit("cannot increase directory list");
845 	}
846 	inpsort[inplast++] = inp;
847 }
848 
849 /*
850  * Look up an inode cache structure.
851  */
852 struct inoinfo *
853 getinoinfo(fsck_ino_t inum)
854 {
855 	struct inoinfo *inp;
856 
857 	inp = search_cache(inphead[inum % numdirs], inum);
858 	return (inp);
859 }
860 
861 /*
862  * Determine whether inode is in cache.
863  */
864 int
865 inocached(fsck_ino_t inum)
866 {
867 	return (search_cache(inphead[inum % numdirs], inum) != NULL);
868 }
869 
870 /*
871  * Clean up all the inode cache structure.
872  */
873 void
874 inocleanup(void)
875 {
876 	struct inoinfo **inpp;
877 
878 	if (inphead == NULL)
879 		return;
880 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
881 		free((void *)(*inpp));
882 	}
883 	free((void *)inphead);
884 	free((void *)inpsort);
885 	inphead = inpsort = NULL;
886 }
887 
888 /*
889  * Routines to maintain information about acl inodes.
890  * This is built during the first pass and used during the
891  * second and third passes.
892  *
893  * Enter acl inodes into the cache.
894  */
895 void
896 cacheacl(struct dinode *dp, fsck_ino_t inum)
897 {
898 	struct inoinfo *aclp;
899 	struct inoinfo **aclpp;
900 	uint_t blks;
901 
902 	blks = NDADDR + NIADDR;
903 	aclp = (struct inoinfo *)
904 	    malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
905 	if (aclp == NULL)
906 		return;
907 	aclpp = &aclphead[inum % numacls];
908 	aclp->i_nextlist = *aclpp;
909 	*aclpp = aclp;
910 	aclp->i_number = inum;
911 	aclp->i_isize = (offset_t)dp->di_size;
912 	aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
913 	(void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
914 	if (aclplast == aclmax) {
915 		aclmax += 100;
916 		aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
917 		    (unsigned)aclmax * sizeof (struct inoinfo *));
918 		if (aclpsort == NULL)
919 			errexit("cannot increase acl list");
920 	}
921 	aclpsort[aclplast++] = aclp;
922 }
923 
924 
925 /*
926  * Generic cache search function.
927  * ROOT is the first entry in a hash chain (the caller is expected
928  * to have done the initial bucket lookup).  KEY is what's being
929  * searched for.
930  *
931  * Returns a pointer to the entry if it is found, NULL otherwise.
932  */
933 static struct inoinfo *
934 search_cache(struct inoinfo *element, fsck_ino_t key)
935 {
936 	while (element != NULL) {
937 		if (element->i_number == key)
938 			break;
939 		element = element->i_nextlist;
940 	}
941 
942 	return (element);
943 }
944 
945 void
946 inodirty(void)
947 {
948 	dirty(pbp);
949 }
950 
951 static void
952 inoflush(void)
953 {
954 	if (pbp != NULL)
955 		flush(fswritefd, pbp);
956 }
957 
958 /*
959  * Interactive wrapper for freeino(), for those times when we're
960  * not sure if we should throw something away.
961  */
962 void
963 clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
964 {
965 	int need_parent;
966 	struct dinode *dp;
967 
968 	if (statemap[idesc->id_number] == USTATE)
969 		return;
970 
971 	dp = ginode(idesc->id_number);
972 	if (verbose == CLRI_VERBOSE) {
973 		pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
974 		pinode(idesc->id_number);
975 	}
976 	if (preen || (reply("CLEAR") == 1)) {
977 		need_parent = (corrupting == CLRI_NOP_OK) ?
978 		    TI_NOPARENT : TI_PARENT;
979 		freeino(idesc->id_number, need_parent);
980 		if (preen)
981 			(void) printf(" (CLEARED)\n");
982 		remove_orphan_dir(idesc->id_number);
983 	} else if (corrupting == CLRI_NOP_CORRUPT) {
984 		iscorrupt = 1;
985 	}
986 	(void) printf("\n");
987 }
988 
989 /*
990  * Find the directory entry for the inode noted in id_parent (which is
991  * not necessarily the parent of anything, we're just using a convenient
992  * field.
993  */
994 int
995 findname(struct inodesc *idesc)
996 {
997 	struct direct *dirp = idesc->id_dirp;
998 
999 	if (dirp->d_ino != idesc->id_parent)
1000 		return (KEEPON);
1001 	(void) memmove(idesc->id_name, dirp->d_name,
1002 	    MIN(dirp->d_namlen, MAXNAMLEN) + 1);
1003 	return (STOP|FOUND);
1004 }
1005 
1006 /*
1007  * Find the inode number associated with the given name.
1008  */
1009 int
1010 findino(struct inodesc *idesc)
1011 {
1012 	struct direct *dirp = idesc->id_dirp;
1013 
1014 	if (dirp->d_ino == 0)
1015 		return (KEEPON);
1016 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1017 	    dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
1018 		idesc->id_parent = dirp->d_ino;
1019 		return (STOP|FOUND);
1020 	}
1021 	return (KEEPON);
1022 }
1023 
1024 int
1025 cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1026 {
1027 	struct inodesc idesc;
1028 	struct dinode *dp;
1029 
1030 	dp = ginode(parentdir);
1031 	init_inodesc(&idesc);
1032 	idesc.id_func = clearanentry;
1033 	idesc.id_parent = target;
1034 	idesc.id_type = DATA;
1035 	idesc.id_fix = NOFIX;
1036 	return (ckinode(dp, &idesc, CKI_TRAVERSE));
1037 }
1038 
1039 static int
1040 clearanentry(struct inodesc *idesc)
1041 {
1042 	struct direct *dirp = idesc->id_dirp;
1043 
1044 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1045 		idesc->id_entryno++;
1046 		return (KEEPON);
1047 	}
1048 	dirp->d_ino = 0;
1049 	return (STOP|FOUND|ALTERED);
1050 }
1051 
1052 void
1053 pinode(fsck_ino_t ino)
1054 {
1055 	struct dinode *dp;
1056 
1057 	(void) printf(" I=%lu ", (ulong_t)ino);
1058 	if (ino < UFSROOTINO || ino > maxino)
1059 		return;
1060 	dp = ginode(ino);
1061 	pdinode(dp);
1062 }
1063 
1064 static void
1065 pdinode(struct dinode *dp)
1066 {
1067 	char *p;
1068 	struct passwd *pw;
1069 	time_t t;
1070 
1071 	(void) printf(" OWNER=");
1072 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
1073 		(void) printf("%s ", pw->pw_name);
1074 	else
1075 		(void) printf("%lu ", (ulong_t)dp->di_uid);
1076 	(void) printf("MODE=%o\n", dp->di_mode);
1077 	if (preen)
1078 		(void) printf("%s: ", devname);
1079 	(void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1080 
1081 	/* ctime() ignores LOCALE, so this is safe */
1082 	t = (time_t)dp->di_mtime;
1083 	p = ctime(&t);
1084 	(void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
1085 }
1086 
1087 void
1088 blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
1089 {
1090 	pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1091 	(void) printf("\n");
1092 
1093 	switch (statemap[ino] & ~INDELAYD) {
1094 
1095 	case FSTATE:
1096 	case FZLINK:
1097 		statemap[ino] = FCLEAR;
1098 		return;
1099 
1100 	case DFOUND:
1101 	case DSTATE:
1102 	case DZLINK:
1103 		statemap[ino] = DCLEAR;
1104 		add_orphan_dir(ino);
1105 		return;
1106 
1107 	case SSTATE:
1108 		statemap[ino] = SCLEAR;
1109 		return;
1110 
1111 	case FCLEAR:
1112 	case DCLEAR:
1113 	case SCLEAR:
1114 		return;
1115 
1116 	default:
1117 		errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
1118 		/* NOTREACHED */
1119 	}
1120 }
1121 
1122 /*
1123  * allocate an unused inode
1124  */
1125 fsck_ino_t
1126 allocino(fsck_ino_t request, int type)
1127 {
1128 	fsck_ino_t ino;
1129 	struct dinode *dp;
1130 	struct cg *cgp = &cgrp;
1131 	int cg;
1132 	time_t t;
1133 	caddr_t err;
1134 
1135 	if (debug && (request != 0) && (request != UFSROOTINO))
1136 		errexit("assertion failed: allocino() asked for "
1137 		    "inode %d instead of 0 or %d",
1138 		    (int)request, (int)UFSROOTINO);
1139 
1140 	/*
1141 	 * We know that we're only going to get requests for UFSROOTINO
1142 	 * or 0.  If UFSROOTINO is wanted, then it better be available
1143 	 * because our caller is trying to recreate the root directory.
1144 	 * If we're asked for 0, then which one we return doesn't matter.
1145 	 * We know that inodes 0 and 1 are never valid to return, so we
1146 	 * the start at the lowest-legal inode number.
1147 	 *
1148 	 * If we got a request for UFSROOTINO, then request != 0, and
1149 	 * this pair of conditionals is the only place that treats
1150 	 * UFSROOTINO specially.
1151 	 */
1152 	if (request == 0)
1153 		request = UFSROOTINO;
1154 	else if (statemap[request] != USTATE)
1155 		return (0);
1156 
1157 	/*
1158 	 * Doesn't do wrapping, since we know we started at
1159 	 * the smallest inode.
1160 	 */
1161 	for (ino = request; ino < maxino; ino++)
1162 		if (statemap[ino] == USTATE)
1163 			break;
1164 	if (ino == maxino)
1165 		return (0);
1166 
1167 	/*
1168 	 * In pass5, we'll calculate the bitmaps and counts all again from
1169 	 * scratch and do a comparison, but for that to work the cg has
1170 	 * to know what in-memory changes we've made to it.  If we have
1171 	 * trouble reading the cg, cg_sanity() should kick it out so
1172 	 * we can skip explicit i/o error checking here.
1173 	 */
1174 	cg = itog(&sblock, ino);
1175 	(void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1176 	err = cg_sanity(cgp, cg);
1177 	if (err != NULL) {
1178 		pfatal("CG %d: %s\n", cg, err);
1179 		free((void *)err);
1180 		if (reply("REPAIR") == 0)
1181 			errexit("Program terminated.");
1182 		fix_cg(cgp, cg);
1183 	}
1184 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1185 	cgp->cg_cs.cs_nifree--;
1186 	cgdirty();
1187 
1188 	if (lastino < ino)
1189 		lastino = ino;
1190 
1191 	/*
1192 	 * Don't currently support IFATTRDIR or any of the other
1193 	 * types, as they aren't needed.
1194 	 */
1195 	switch (type & IFMT) {
1196 	case IFDIR:
1197 		statemap[ino] = DSTATE;
1198 		cgp->cg_cs.cs_ndir++;
1199 		break;
1200 	case IFREG:
1201 	case IFLNK:
1202 		statemap[ino] = FSTATE;
1203 		break;
1204 	default:
1205 		/*
1206 		 * Pretend nothing ever happened.  This clears the
1207 		 * dirty flag, among other things.
1208 		 */
1209 		initbarea(&cgblk);
1210 		if (debug)
1211 			(void) printf("allocino: unknown type 0%o\n",
1212 			    type & IFMT);
1213 		return (0);
1214 	}
1215 
1216 	/*
1217 	 * We're allocating what should be a completely-unused inode,
1218 	 * so make sure we don't inherit anything from any previous
1219 	 * incarnations.
1220 	 */
1221 	dp = ginode(ino);
1222 	(void) memset((void *)dp, 0, sizeof (struct dinode));
1223 	dp->di_db[0] = allocblk(1);
1224 	if (dp->di_db[0] == 0) {
1225 		statemap[ino] = USTATE;
1226 		return (0);
1227 	}
1228 	dp->di_mode = (mode_t)type;
1229 	(void) time(&t);
1230 	dp->di_atime = (time32_t)t;
1231 	dp->di_ctime = dp->di_atime;
1232 	dp->di_mtime = dp->di_ctime;
1233 	dp->di_size = (u_offset_t)sblock.fs_fsize;
1234 	dp->di_blocks = btodb(sblock.fs_fsize);
1235 	n_files++;
1236 	inodirty();
1237 	return (ino);
1238 }
1239 
1240 /*
1241  * Release some or all of the blocks of an inode.
1242  * Only truncates down.  Assumes new_length is appropriately aligned
1243  * to a block boundary (or a directory block boundary, if it's a
1244  * directory).
1245  *
1246  * If this is a directory, discard all of its contents first, so
1247  * we don't create a bunch of orphans that would need another fsck
1248  * run to clean up.
1249  *
1250  * Even if truncating to zero length, the inode remains allocated.
1251  */
1252 void
1253 truncino(fsck_ino_t ino, offset_t new_length, int update)
1254 {
1255 	struct inodesc idesc;
1256 	struct inoinfo *iip;
1257 	struct dinode *dp;
1258 	fsck_ino_t parent;
1259 	mode_t mode;
1260 	caddr_t message;
1261 	int isdir, islink;
1262 	int ilevel, dblk;
1263 
1264 	dp = ginode(ino);
1265 	mode = (dp->di_mode & IFMT);
1266 	isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1267 	islink = (mode == IFLNK);
1268 
1269 	if (isdir) {
1270 		/*
1271 		 * Go with the parent we found by chasing references,
1272 		 * if we've gotten that far.  Otherwise, use what the
1273 		 * directory itself claims.  If there's no ``..'' entry
1274 		 * in it, give up trying to get the link counts right.
1275 		 */
1276 		if (update == TI_NOPARENT) {
1277 			parent = -1;
1278 		} else {
1279 			iip = getinoinfo(ino);
1280 			if (iip != NULL) {
1281 				parent = iip->i_parent;
1282 			} else {
1283 				parent = lookup_dotdot_ino(ino);
1284 				if (parent != 0) {
1285 					/*
1286 					 * Make sure that the claimed
1287 					 * parent actually has a
1288 					 * reference to us.
1289 					 */
1290 					dp = ginode(parent);
1291 					idesc.id_name = lfname;
1292 					idesc.id_type = DATA;
1293 					idesc.id_func = findino;
1294 					idesc.id_number = ino;
1295 					idesc.id_fix = DONTKNOW;
1296 					if ((ckinode(dp, &idesc,
1297 					    CKI_TRAVERSE) & FOUND) == 0)
1298 						parent = 0;
1299 				}
1300 			}
1301 		}
1302 
1303 		mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1304 		if (parent > 0) {
1305 			dp = ginode(parent);
1306 			LINK_RANGE(message, dp->di_nlink, -1);
1307 			if (message != NULL) {
1308 				LINK_CLEAR(message, parent, dp->di_mode,
1309 				    &idesc);
1310 				if (statemap[parent] == USTATE)
1311 					goto no_parent_update;
1312 			}
1313 			TRACK_LNCNTP(parent, lncntp[parent]--);
1314 		} else if ((mode == IFDIR) && (parent == 0)) {
1315 			/*
1316 			 * Currently don't have a good way to
1317 			 * handle this, so throw up our hands.
1318 			 * However, we know that we can still
1319 			 * do some good if we continue, so
1320 			 * don't actually exit yet.
1321 			 *
1322 			 * We don't do it for attrdirs,
1323 			 * because there aren't link counts
1324 			 * between them and their parents.
1325 			 */
1326 			pwarn("Could not determine former parent of "
1327 			    "inode %d, link counts are possibly\n"
1328 			    "incorrect.  Please rerun fsck(1M) to "
1329 			    "correct this.\n",
1330 			    ino);
1331 			iscorrupt = 1;
1332 		}
1333 		/*
1334 		 * ...else if it's a directory with parent == -1, then
1335 		 * we've not gotten far enough to know connectivity,
1336 		 * and it'll get handled automatically later.
1337 		 */
1338 	}
1339 
1340 no_parent_update:
1341 	init_inodesc(&idesc);
1342 	idesc.id_type = ADDR;
1343 	idesc.id_func = pass4check;
1344 	idesc.id_number = ino;
1345 	idesc.id_fix = DONTKNOW;
1346 	idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1347 	dp = ginode(ino);
1348 	if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1349 		inodirty();
1350 
1351 	/*
1352 	 * This has to be done after ckinode(), so that all of
1353 	 * the fragments get visited.  Note that we assume we're
1354 	 * always truncating to a block boundary, rather than a
1355 	 * fragment boundary.
1356 	 */
1357 	dp = ginode(ino);
1358 	dp->di_size = new_length;
1359 
1360 	/*
1361 	 * Clear now-obsolete pointers.
1362 	 */
1363 	for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1364 		dp->di_db[dblk] = 0;
1365 	}
1366 
1367 	ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1368 	for (ilevel++; ilevel < NIADDR; ilevel++) {
1369 		dp->di_ib[ilevel] = 0;
1370 	}
1371 
1372 	inodirty();
1373 }
1374 
1375 /*
1376  * Release an inode's resources, then release the inode itself.
1377  */
1378 void
1379 freeino(fsck_ino_t ino, int update_parent)
1380 {
1381 	int cg;
1382 	struct dinode *dp;
1383 	struct cg *cgp;
1384 
1385 	n_files--;
1386 	dp = ginode(ino);
1387 	/*
1388 	 * We need to make sure that the file is really a large file.
1389 	 * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
1390 	 * negative size, which shall be cleared. (see verify_inode() in
1391 	 * pass1.c)
1392 	 */
1393 	if (dp->di_size > (u_offset_t)MAXOFF_T &&
1394 	    dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
1395 	    ftypeok(dp) &&
1396 	    (dp->di_mode & IFMT) != IFBLK &&
1397 	    (dp->di_mode & IFMT) != IFCHR) {
1398 		largefile_count--;
1399 	}
1400 	truncino(ino, 0, update_parent);
1401 
1402 	dp = ginode(ino);
1403 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
1404 		clearshadow(ino, &attrclientinfo);
1405 		dp = ginode(ino);
1406 	}
1407 
1408 	clearinode(dp);
1409 	inodirty();
1410 	statemap[ino] = USTATE;
1411 
1412 	/*
1413 	 * Keep the disk in sync with us so that pass5 doesn't get
1414 	 * upset about spurious inconsistencies.
1415 	 */
1416 	cg = itog(&sblock, ino);
1417 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1418 	    (size_t)sblock.fs_cgsize);
1419 	cgp = cgblk.b_un.b_cg;
1420 	clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1421 	cgp->cg_cs.cs_nifree += 1;
1422 	cgdirty();
1423 	sblock.fs_cstotal.cs_nifree += 1;
1424 	sbdirty();
1425 }
1426 
1427 void
1428 init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1429 {
1430 	inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1431 	inp->i_dotdot = (fsck_ino_t)0;
1432 	inp->i_isize = (offset_t)dp->di_size;
1433 	inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1434 	inp->i_extattr = dp->di_oeftflag;
1435 	(void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1436 	    inp->i_blkssize);
1437 }
1438 
1439 /*
1440  * Return the inode number in the ".." entry of the provided
1441  * directory inode.
1442  */
1443 static int
1444 lookup_dotdot_ino(fsck_ino_t ino)
1445 {
1446 	struct inodesc idesc;
1447 
1448 	init_inodesc(&idesc);
1449 	idesc.id_type = DATA;
1450 	idesc.id_func = findino;
1451 	idesc.id_name = "..";
1452 	idesc.id_number = ino;
1453 	idesc.id_fix = NOFIX;
1454 
1455 	if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1456 		return (idesc.id_parent);
1457 	}
1458 
1459 	return (0);
1460 }
1461 
1462 /*
1463  * Convenience wrapper around ckinode(findino()).
1464  */
1465 int
1466 lookup_named_ino(fsck_ino_t dir, caddr_t name)
1467 {
1468 	struct inodesc idesc;
1469 
1470 	init_inodesc(&idesc);
1471 	idesc.id_type = DATA;
1472 	idesc.id_func = findino;
1473 	idesc.id_name = name;
1474 	idesc.id_number = dir;
1475 	idesc.id_fix = NOFIX;
1476 
1477 	if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1478 		return (idesc.id_parent);
1479 	}
1480 
1481 	return (0);
1482 }
1483 
1484 /*
1485  * Marks inodes that are being orphaned and might need to be reconnected
1486  * by pass4().  The inode we're traversing is the directory whose
1487  * contents will be reconnected later.  id_parent is the lfn at which
1488  * to start looking at said contents.
1489  */
1490 static int
1491 mark_a_delayed_inode(struct inodesc *idesc)
1492 {
1493 	struct direct *dirp = idesc->id_dirp;
1494 
1495 	if (idesc->id_lbn < idesc->id_parent) {
1496 		return (KEEPON);
1497 	}
1498 
1499 	if (dirp->d_ino != 0 &&
1500 	    strcmp(dirp->d_name, ".") != 0 &&
1501 	    strcmp(dirp->d_name, "..") != 0) {
1502 		statemap[dirp->d_ino] &= ~INFOUND;
1503 		statemap[dirp->d_ino] |= INDELAYD;
1504 	}
1505 
1506 	return (KEEPON);
1507 }
1508 
1509 static void
1510 mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1511 {
1512 	struct dinode *dp;
1513 	struct inodesc idelayed;
1514 
1515 	init_inodesc(&idelayed);
1516 	idelayed.id_number = ino;
1517 	idelayed.id_type = DATA;
1518 	idelayed.id_fix = NOFIX;
1519 	idelayed.id_func = mark_a_delayed_inode;
1520 	idelayed.id_parent = first_lfn;
1521 	idelayed.id_entryno = 2;
1522 
1523 	dp = ginode(ino);
1524 	(void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1525 }
1526 
1527 /*
1528  * Clear the i_oeftflag/extended attribute pointer from INO.
1529  */
1530 void
1531 clearattrref(fsck_ino_t ino)
1532 {
1533 	struct dinode *dp;
1534 
1535 	dp = ginode(ino);
1536 	if (debug) {
1537 		if (dp->di_oeftflag == 0)
1538 			(void) printf("clearattref: no attr to clear on %d\n",
1539 			    ino);
1540 	}
1541 
1542 	dp->di_oeftflag = 0;
1543 	inodirty();
1544 }
1545