xref: /freebsd/sbin/fsck_ffs/inode.c (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #ifndef lint
34 static const char sccsid[] = "@(#)inode.c	8.8 (Berkeley) 4/28/95";
35 #endif /* not lint */
36 #endif
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/stdint.h>
43 #include <sys/sysctl.h>
44 
45 #include <ufs/ufs/dinode.h>
46 #include <ufs/ufs/dir.h>
47 #include <ufs/ffs/fs.h>
48 
49 #include <err.h>
50 #include <pwd.h>
51 #include <string.h>
52 #include <time.h>
53 #include <libufs.h>
54 
55 #include "fsck.h"
56 
57 struct bufarea *icachebp;	/* inode cache buffer */
58 
59 static int iblock(struct inodesc *, off_t isize, int type);
60 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
61     struct bufarea **);
62 static int snapclean(struct inodesc *idesc);
63 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
64     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
65 
66 int
67 ckinode(union dinode *dp, struct inodesc *idesc)
68 {
69 	off_t remsize, sizepb;
70 	int i, offset, ret;
71 	struct inode ip;
72 	union dinode dino;
73 	ufs2_daddr_t ndb;
74 	mode_t mode;
75 	char pathbuf[MAXPATHLEN + 1];
76 
77 	if (idesc->id_fix != IGNORE)
78 		idesc->id_fix = DONTKNOW;
79 	idesc->id_dp = dp;
80 	idesc->id_lbn = -1;
81 	idesc->id_lballoc = -1;
82 	idesc->id_level = 0;
83 	idesc->id_entryno = 0;
84 	idesc->id_filesize = DIP(dp, di_size);
85 	mode = DIP(dp, di_mode) & IFMT;
86 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
87 	    DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
88 		return (KEEPON);
89 	if (sblock.fs_magic == FS_UFS1_MAGIC)
90 		dino.dp1 = dp->dp1;
91 	else
92 		dino.dp2 = dp->dp2;
93 	if (DIP(&dino, di_size) < 0) {
94 		pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
95 		return (STOP);
96 	}
97 	ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
98 	for (i = 0; i < UFS_NDADDR; i++) {
99 		idesc->id_lbn++;
100 		if (--ndb == 0 &&
101 		    (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
102 			idesc->id_numfrags =
103 				numfrags(&sblock, fragroundup(&sblock, offset));
104 		else
105 			idesc->id_numfrags = sblock.fs_frag;
106 		if (DIP(&dino, di_db[i]) == 0) {
107 			if (idesc->id_type == DATA && ndb >= 0) {
108 				/* An empty block in a directory XXX */
109 				getpathname(pathbuf, idesc->id_number,
110 						idesc->id_number);
111 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
112 					pathbuf);
113 				if (reply("ADJUST LENGTH") == 1) {
114 					ginode(idesc->id_number, &ip);
115 					DIP_SET(ip.i_dp, di_size,
116 					    i * sblock.fs_bsize);
117 					printf(
118 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
119 					rerun = 1;
120 					inodirty(&ip);
121 					irelse(&ip);
122 				}
123 				return (STOP);
124 			}
125 			continue;
126 		}
127 		idesc->id_blkno = DIP(&dino, di_db[i]);
128 		if (idesc->id_type != DATA)
129 			ret = (*idesc->id_func)(idesc);
130 		else
131 			ret = dirscan(idesc);
132 		if (ret & STOP)
133 			return (ret);
134 	}
135 	idesc->id_numfrags = sblock.fs_frag;
136 	remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
137 	sizepb = sblock.fs_bsize;
138 	for (i = 0; i < UFS_NIADDR; i++) {
139 		sizepb *= NINDIR(&sblock);
140 		idesc->id_level = i + 1;
141 		if (DIP(&dino, di_ib[i])) {
142 			idesc->id_blkno = DIP(&dino, di_ib[i]);
143 			ret = iblock(idesc, remsize, BT_LEVEL1 + i);
144 			if (ret & STOP)
145 				return (ret);
146 		} else if (remsize > 0) {
147 			idesc->id_lbn += sizepb / sblock.fs_bsize;
148 			if (idesc->id_type == DATA) {
149 				/* An empty block in a directory XXX */
150 				getpathname(pathbuf, idesc->id_number,
151 						idesc->id_number);
152 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
153 					pathbuf);
154 				if (reply("ADJUST LENGTH") == 1) {
155 					ginode(idesc->id_number, &ip);
156 					DIP_SET(ip.i_dp, di_size,
157 					    DIP(ip.i_dp, di_size) - remsize);
158 					remsize = 0;
159 					printf(
160 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
161 					rerun = 1;
162 					inodirty(&ip);
163 					irelse(&ip);
164 					break;
165 				}
166 			}
167 		}
168 		remsize -= sizepb;
169 	}
170 	return (KEEPON);
171 }
172 
173 static int
174 iblock(struct inodesc *idesc, off_t isize, int type)
175 {
176 	struct inode ip;
177 	struct bufarea *bp;
178 	int i, n, (*func)(struct inodesc *), nif;
179 	off_t sizepb;
180 	char buf[BUFSIZ];
181 	char pathbuf[MAXPATHLEN + 1];
182 
183 	if (idesc->id_type != DATA) {
184 		func = idesc->id_func;
185 		if (((n = (*func)(idesc)) & KEEPON) == 0)
186 			return (n);
187 	} else
188 		func = dirscan;
189 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
190 	if (bp->b_errs != 0) {
191 		brelse(bp);
192 		return (SKIP);
193 	}
194 	idesc->id_bp = bp;
195 	idesc->id_level--;
196 	for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
197 		sizepb *= NINDIR(&sblock);
198 	if (howmany(isize, sizepb) > NINDIR(&sblock))
199 		nif = NINDIR(&sblock);
200 	else
201 		nif = howmany(isize, sizepb);
202 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
203 		for (i = nif; i < NINDIR(&sblock); i++) {
204 			if (IBLK(bp, i) == 0)
205 				continue;
206 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
207 			    (u_long)idesc->id_number);
208 			if (preen) {
209 				pfatal("%s", buf);
210 			} else if (dofix(idesc, buf)) {
211 				IBLK_SET(bp, i, 0);
212 				dirty(bp);
213 			}
214 		}
215 		flush(fswritefd, bp);
216 	}
217 	for (i = 0; i < nif; i++) {
218 		if (IBLK(bp, i)) {
219 			idesc->id_blkno = IBLK(bp, i);
220 			bp->b_index = i;
221 			if (idesc->id_level == 0) {
222 				idesc->id_lbn++;
223 				n = (*func)(idesc);
224 			} else {
225 				n = iblock(idesc, isize, type - 1);
226 				idesc->id_level++;
227 			}
228 			if (n & STOP) {
229 				brelse(bp);
230 				return (n);
231 			}
232 		} else {
233 			idesc->id_lbn += sizepb / sblock.fs_bsize;
234 			if (idesc->id_type == DATA && isize > 0) {
235 				/* An empty block in a directory XXX */
236 				getpathname(pathbuf, idesc->id_number,
237 						idesc->id_number);
238 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
239 					pathbuf);
240 				if (reply("ADJUST LENGTH") == 1) {
241 					ginode(idesc->id_number, &ip);
242 					DIP_SET(ip.i_dp, di_size,
243 					    DIP(ip.i_dp, di_size) - isize);
244 					isize = 0;
245 					printf(
246 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
247 					rerun = 1;
248 					inodirty(&ip);
249 					brelse(bp);
250 					return(STOP);
251 				}
252 			}
253 		}
254 		isize -= sizepb;
255 	}
256 	brelse(bp);
257 	return (KEEPON);
258 }
259 
260 /*
261  * Finds the disk block address at the specified lbn within the inode
262  * specified by dp.  This follows the whole tree and honors di_size and
263  * di_extsize so it is a true test of reachability.  The lbn may be
264  * negative if an extattr or indirect block is requested.
265  */
266 ufs2_daddr_t
267 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
268     struct bufarea **bpp)
269 {
270 	ufs_lbn_t tmpval;
271 	ufs_lbn_t cur;
272 	ufs_lbn_t next;
273 	int i;
274 
275 	*frags = 0;
276 	if (bpp != NULL)
277 		*bpp = NULL;
278 	/*
279 	 * Handle extattr blocks first.
280 	 */
281 	if (lbn < 0 && lbn >= -UFS_NXADDR) {
282 		lbn = -1 - lbn;
283 		if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
284 			return (0);
285 		*frags = numfrags(&sblock,
286 		    sblksize(&sblock, dp->dp2.di_extsize, lbn));
287 		return (dp->dp2.di_extb[lbn]);
288 	}
289 	/*
290 	 * Now direct and indirect.
291 	 */
292 	if (DIP(dp, di_mode) == IFLNK &&
293 	    DIP(dp, di_size) < sblock.fs_maxsymlinklen)
294 		return (0);
295 	if (lbn >= 0 && lbn < UFS_NDADDR) {
296 		*frags = numfrags(&sblock,
297 		    sblksize(&sblock, DIP(dp, di_size), lbn));
298 		return (DIP(dp, di_db[lbn]));
299 	}
300 	*frags = sblock.fs_frag;
301 
302 	for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
303 	    i++, tmpval *= NINDIR(&sblock), cur = next) {
304 		next = cur + tmpval;
305 		if (lbn == -cur - i)
306 			return (DIP(dp, di_ib[i]));
307 		/*
308 		 * Determine whether the lbn in question is within this tree.
309 		 */
310 		if (lbn < 0 && -lbn >= next)
311 			continue;
312 		if (lbn > 0 && lbn >= next)
313 			continue;
314 		if (DIP(dp, di_ib[i]) == 0)
315 			return (0);
316 		return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
317 		    bpp));
318 	}
319 	pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
320 	return (0);
321 }
322 
323 /*
324  * Fetch an indirect block to find the block at a given lbn.  The lbn
325  * may be negative to fetch a specific indirect block pointer or positive
326  * to fetch a specific block.
327  */
328 static ufs2_daddr_t
329 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
330     struct bufarea **bpp)
331 {
332 	struct bufarea *bp;
333 	ufs_lbn_t lbnadd;
334 	ufs_lbn_t base;
335 	int i, level;
336 
337 	level = lbn_level(cur);
338 	if (level == -1)
339 		pfatal("Invalid indir lbn %jd in ino %ju\n",
340 		    lbn, (uintmax_t)ino);
341 	if (level == 0 && lbn < 0)
342 		pfatal("Invalid lbn %jd in ino %ju\n",
343 		    lbn, (uintmax_t)ino);
344 	lbnadd = 1;
345 	base = -(cur + level);
346 	for (i = level; i > 0; i--)
347 		lbnadd *= NINDIR(&sblock);
348 	if (lbn > 0)
349 		i = (lbn - base) / lbnadd;
350 	else
351 		i = (-lbn - base) / lbnadd;
352 	if (i < 0 || i >= NINDIR(&sblock)) {
353 		pfatal("Invalid indirect index %d produced by lbn %jd "
354 		    "in ino %ju\n", i, lbn, (uintmax_t)ino);
355 		return (0);
356 	}
357 	if (level == 0)
358 		cur = base + (i * lbnadd);
359 	else
360 		cur = -(base + (i * lbnadd)) - (level - 1);
361 	bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
362 	if (bp->b_errs != 0)
363 		return (0);
364 	blk = IBLK(bp, i);
365 	bp->b_index = i;
366 	if (cur == lbn || blk == 0) {
367 		if (bpp != NULL)
368 			*bpp = bp;
369 		else
370 			brelse(bp);
371 		return (blk);
372 	}
373 	brelse(bp);
374 	if (level == 0)
375 		pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
376 		    (uintmax_t)ino);
377 	return (indir_blkatoff(blk, ino, cur, lbn, bpp));
378 }
379 
380 /*
381  * Check that a block in a legal block number.
382  * Return 0 if in range, 1 if out of range.
383  */
384 int
385 chkrange(ufs2_daddr_t blk, int cnt)
386 {
387 	int c;
388 
389 	if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
390 	    cnt > maxfsblock - blk) {
391 		if (debug)
392 			printf("out of range: blk %ld, offset %i, size %d\n",
393 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
394 		return (1);
395 	}
396 	if (cnt > sblock.fs_frag ||
397 	    fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
398 		if (debug)
399 			printf("bad size: blk %ld, offset %i, size %d\n",
400 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
401 		return (1);
402 	}
403 	c = dtog(&sblock, blk);
404 	if (blk < cgdmin(&sblock, c)) {
405 		if ((blk + cnt) > cgsblock(&sblock, c)) {
406 			if (debug) {
407 				printf("blk %ld < cgdmin %ld;",
408 				    (long)blk, (long)cgdmin(&sblock, c));
409 				printf(" blk + cnt %ld > cgsbase %ld\n",
410 				    (long)(blk + cnt),
411 				    (long)cgsblock(&sblock, c));
412 			}
413 			return (1);
414 		}
415 	} else {
416 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
417 			if (debug)  {
418 				printf("blk %ld >= cgdmin %ld;",
419 				    (long)blk, (long)cgdmin(&sblock, c));
420 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
421 				    (long)(blk + cnt), (long)sblock.fs_fpg);
422 			}
423 			return (1);
424 		}
425 	}
426 	return (0);
427 }
428 
429 /*
430  * General purpose interface for reading inodes.
431  *
432  * firstinum and lastinum track contents of getnextino() cache (below).
433  */
434 static ino_t firstinum, lastinum;
435 static struct bufarea inobuf;
436 
437 void
438 ginode(ino_t inumber, struct inode *ip)
439 {
440 	ufs2_daddr_t iblk;
441 	struct ufs2_dinode *dp;
442 
443 	if (inumber < UFS_ROOTINO || inumber >= maxino)
444 		errx(EEXIT, "bad inode number %ju to ginode",
445 		    (uintmax_t)inumber);
446 	ip->i_number = inumber;
447 	if (inumber >= firstinum && inumber < lastinum) {
448 		/* contents in getnextino() cache */
449 		ip->i_bp = &inobuf;
450 		inobuf.b_refcnt++;
451 		inobuf.b_index = firstinum;
452 	} else if (icachebp != NULL &&
453 	    inumber >= icachebp->b_index &&
454 	    inumber < icachebp->b_index + INOPB(&sblock)) {
455 		/* take an additional reference for the returned inode */
456 		icachebp->b_refcnt++;
457 		ip->i_bp = icachebp;
458 	} else {
459 		iblk = ino_to_fsba(&sblock, inumber);
460 		/* release our cache-hold reference on old icachebp */
461 		if (icachebp != NULL)
462 			brelse(icachebp);
463 		icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
464 		if (icachebp->b_errs != 0) {
465 			icachebp = NULL;
466 			ip->i_bp = NULL;
467 			ip->i_dp = &zino;
468 			return;
469 		}
470 		/* take a cache-hold reference on new icachebp */
471 		icachebp->b_refcnt++;
472 		icachebp->b_index = rounddown(inumber, INOPB(&sblock));
473 		ip->i_bp = icachebp;
474 	}
475 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
476 		ip->i_dp = (union dinode *)
477 		    &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
478 		return;
479 	}
480 	ip->i_dp = (union dinode *)
481 	    &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
482 	dp = (struct ufs2_dinode *)ip->i_dp;
483 	/* Do not check hash of inodes being created */
484 	if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
485 		pwarn("INODE CHECK-HASH FAILED");
486 		prtinode(ip);
487 		if (preen || reply("FIX") != 0) {
488 			if (preen)
489 				printf(" (FIXED)\n");
490 			ffs_update_dinode_ckhash(&sblock, dp);
491 			inodirty(ip);
492 		}
493 	}
494 }
495 
496 /*
497  * Release a held inode.
498  */
499 void
500 irelse(struct inode *ip)
501 {
502 
503 	/* Check for failed inode read */
504 	if (ip->i_bp == NULL)
505 		return;
506 	if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
507 	    ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
508 		pwarn("irelse: releasing inode with bad check-hash");
509 		prtinode(ip);
510 	}
511 	if (ip->i_bp->b_refcnt <= 0)
512 		pfatal("irelse: releasing unreferenced ino %ju\n",
513 		    (uintmax_t) ip->i_number);
514 	brelse(ip->i_bp);
515 }
516 
517 /*
518  * Special purpose version of ginode used to optimize first pass
519  * over all the inodes in numerical order.
520  */
521 static ino_t nextinum, lastvalidinum;
522 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
523 
524 union dinode *
525 getnextinode(ino_t inumber, int rebuiltcg)
526 {
527 	int j;
528 	long size;
529 	mode_t mode;
530 	ufs2_daddr_t ndb, blk;
531 	union dinode *dp;
532 	struct inode ip;
533 	static caddr_t nextinop;
534 
535 	if (inumber != nextinum++ || inumber > lastvalidinum)
536 		errx(EEXIT, "bad inode number %ju to nextinode",
537 		    (uintmax_t)inumber);
538 	if (inumber >= lastinum) {
539 		readcount++;
540 		firstinum = lastinum;
541 		blk = ino_to_fsba(&sblock, lastinum);
542 		if (readcount % readpercg == 0) {
543 			size = partialsize;
544 			lastinum += partialcnt;
545 		} else {
546 			size = inobufsize;
547 			lastinum += fullcnt;
548 		}
549 		/*
550 		 * Flush old contents in case they have been updated.
551 		 * If getblk encounters an error, it will already have zeroed
552 		 * out the buffer, so we do not need to do so here.
553 		 */
554 		if (inobuf.b_refcnt != 0)
555 			pfatal("Non-zero getnextinode() ref count %d\n",
556 			    inobuf.b_refcnt);
557 		flush(fswritefd, &inobuf);
558 		getblk(&inobuf, blk, size);
559 		nextinop = inobuf.b_un.b_buf;
560 	}
561 	dp = (union dinode *)nextinop;
562 	if (sblock.fs_magic == FS_UFS1_MAGIC)
563 		nextinop += sizeof(struct ufs1_dinode);
564 	else
565 		nextinop += sizeof(struct ufs2_dinode);
566 	if ((ckhashadd & CK_INODE) != 0) {
567 		ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
568 		dirty(&inobuf);
569 	}
570 	if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
571 		pwarn("INODE CHECK-HASH FAILED");
572 		ip.i_bp = NULL;
573 		ip.i_dp = dp;
574 		ip.i_number = inumber;
575 		prtinode(&ip);
576 		if (preen || reply("FIX") != 0) {
577 			if (preen)
578 				printf(" (FIXED)\n");
579 			ffs_update_dinode_ckhash(&sblock,
580 			    (struct ufs2_dinode *)dp);
581 			dirty(&inobuf);
582 		}
583 	}
584 	if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
585 		/*
586 		 * Try to determine if we have reached the end of the
587 		 * allocated inodes.
588 		 */
589 		mode = DIP(dp, di_mode) & IFMT;
590 		if (mode == 0) {
591 			if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
592 				UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
593 			      memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
594 				UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
595 			      dp->dp2.di_mode || dp->dp2.di_size)
596 				return (NULL);
597 			return (dp);
598 		}
599 		if (!ftypeok(dp))
600 			return (NULL);
601 		ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
602 		if (ndb < 0)
603 			return (NULL);
604 		if (mode == IFBLK || mode == IFCHR)
605 			ndb++;
606 		if (mode == IFLNK) {
607 			/*
608 			 * Fake ndb value so direct/indirect block checks below
609 			 * will detect any garbage after symlink string.
610 			 */
611 			if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
612 				ndb = howmany(DIP(dp, di_size),
613 				    sizeof(ufs2_daddr_t));
614 				if (ndb > UFS_NDADDR) {
615 					j = ndb - UFS_NDADDR;
616 					for (ndb = 1; j > 1; j--)
617 						ndb *= NINDIR(&sblock);
618 					ndb += UFS_NDADDR;
619 				}
620 			}
621 		}
622 		for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
623 			if (DIP(dp, di_db[j]) != 0)
624 				return (NULL);
625 		for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
626 			ndb /= NINDIR(&sblock);
627 		for (; j < UFS_NIADDR; j++)
628 			if (DIP(dp, di_ib[j]) != 0)
629 				return (NULL);
630 	}
631 	return (dp);
632 }
633 
634 void
635 setinodebuf(int cg, ino_t inosused)
636 {
637 	ino_t inum;
638 
639 	inum = cg * sblock.fs_ipg;
640 	lastvalidinum = inum + inosused - 1;
641 	nextinum = inum;
642 	lastinum = inum;
643 	readcount = 0;
644 	/* Flush old contents in case they have been updated */
645 	flush(fswritefd, &inobuf);
646 	inobuf.b_bno = 0;
647 	if (inobuf.b_un.b_buf == NULL) {
648 		inobufsize = blkroundup(&sblock,
649 		    MAX(INOBUFSIZE, sblock.fs_bsize));
650 		initbarea(&inobuf, BT_INODES);
651 		if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL)
652 			errx(EEXIT, "cannot allocate space for inode buffer");
653 	}
654 	fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
655 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
656 	readpercg = inosused / fullcnt;
657 	partialcnt = inosused % fullcnt;
658 	partialsize = fragroundup(&sblock,
659 	    partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
660 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
661 	if (partialcnt != 0) {
662 		readpercg++;
663 	} else {
664 		partialcnt = fullcnt;
665 		partialsize = inobufsize;
666 	}
667 }
668 
669 int
670 freeblock(struct inodesc *idesc)
671 {
672 	struct dups *dlp;
673 	struct bufarea *cgbp;
674 	struct cg *cgp;
675 	ufs2_daddr_t blkno;
676 	long size, nfrags;
677 
678 	blkno = idesc->id_blkno;
679 	if (idesc->id_type == SNAP) {
680 		pfatal("clearing a snapshot dinode\n");
681 		return (STOP);
682 	}
683 	size = lfragtosize(&sblock, idesc->id_numfrags);
684 	if (snapblkfree(&sblock, blkno, size, idesc->id_number,
685 	    std_checkblkavail))
686 		return (KEEPON);
687 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
688 		if (chkrange(blkno, 1)) {
689 			return (SKIP);
690 		} else if (testbmap(blkno)) {
691 			for (dlp = duplist; dlp; dlp = dlp->next) {
692 				if (dlp->dup != blkno)
693 					continue;
694 				dlp->dup = duplist->dup;
695 				dlp = duplist;
696 				duplist = duplist->next;
697 				free((char *)dlp);
698 				break;
699 			}
700 			if (dlp == NULL) {
701 				clrbmap(blkno);
702 				n_blks--;
703 			}
704 		}
705 	}
706 	/*
707 	 * If all successfully returned, account for them.
708 	 */
709 	if (nfrags == 0) {
710 		cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
711 		cgp = cgbp->b_un.b_cg;
712 		if (idesc->id_numfrags == sblock.fs_frag)
713 			cgp->cg_cs.cs_nbfree++;
714 		else
715 			cgp->cg_cs.cs_nffree += idesc->id_numfrags;
716 		cgdirty(cgbp);
717 	}
718 	return (KEEPON);
719 }
720 
721 /*
722  * Prepare a snapshot file for being removed.
723  */
724 void
725 snapremove(ino_t inum)
726 {
727 	struct inodesc idesc;
728 	struct inode ip;
729 	int i;
730 
731 	for (i = 0; i < snapcnt; i++)
732 		if (snaplist[i].i_number == inum)
733 			break;
734 	if (i == snapcnt)
735 		ginode(inum, &ip);
736 	else
737 		ip = snaplist[i];
738 	if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
739 		printf("snapremove: inode %jd is not a snapshot\n",
740 		    (intmax_t)inum);
741 		if (i == snapcnt)
742 			irelse(&ip);
743 		return;
744 	}
745 	if (debug)
746 		printf("snapremove: remove %sactive snapshot %jd\n",
747 		    i == snapcnt ? "in" : "", (intmax_t)inum);
748 	/*
749 	 * If on active snapshot list, remove it.
750 	 */
751 	if (i < snapcnt) {
752 		for (i++; i < FSMAXSNAP; i++) {
753 			if (sblock.fs_snapinum[i] == 0)
754 				break;
755 			snaplist[i - 1] = snaplist[i];
756 			sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
757 		}
758 		sblock.fs_snapinum[i - 1] = 0;
759 		bzero(&snaplist[i - 1], sizeof(struct inode));
760 		snapcnt--;
761 	}
762 	memset(&idesc, 0, sizeof(struct inodesc));
763 	idesc.id_type = SNAP;
764 	idesc.id_func = snapclean;
765 	idesc.id_number = inum;
766 	(void)ckinode(ip.i_dp, &idesc);
767 	DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
768 	inodirty(&ip);
769 	irelse(&ip);
770 }
771 
772 static int
773 snapclean(struct inodesc *idesc)
774 {
775 	ufs2_daddr_t blkno;
776 	struct bufarea *bp;
777 	union dinode *dp;
778 
779 	blkno = idesc->id_blkno;
780 	if (blkno == 0)
781 		return (KEEPON);
782 
783 	dp = idesc->id_dp;
784 	if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
785 		if (idesc->id_lbn < UFS_NDADDR) {
786 			DIP_SET(dp, di_db[idesc->id_lbn], 0);
787 		} else {
788 			bp = idesc->id_bp;
789 			IBLK_SET(bp, bp->b_index, 0);
790 			dirty(bp);
791 		}
792 	}
793 	return (KEEPON);
794 }
795 
796 /*
797  * Notification that a block is being freed. Return zero if the free
798  * should be allowed to proceed. Return non-zero if the snapshot file
799  * wants to claim the block. The block will be claimed if it is an
800  * uncopied part of one of the snapshots. It will be freed if it is
801  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
802  * If a fragment is being freed, then all snapshots that care about
803  * it must make a copy since a snapshot file can only claim full sized
804  * blocks. Note that if more than one snapshot file maps the block,
805  * we can pick one at random to claim it. Since none of the snapshots
806  * can change, we are assurred that they will all see the same unmodified
807  * image. When deleting a snapshot file (see ino_trunc above), we
808  * must push any of these claimed blocks to one of the other snapshots
809  * that maps it. These claimed blocks are easily identified as they will
810  * have a block number equal to their logical block number within the
811  * snapshot. A copied block can never have this property because they
812  * must always have been allocated from a BLK_NOCOPY location.
813  */
814 int
815 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
816 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
817 {
818 	union dinode *dp;
819 	struct inode ip;
820 	struct bufarea *snapbp;
821 	ufs_lbn_t lbn;
822 	ufs2_daddr_t blkno, relblkno;
823 	int i, frags, claimedblk, copydone;
824 
825 	/* If no snapshots, nothing to do */
826 	if (snapcnt == 0)
827 		return (0);
828 	if (debug)
829 		printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
830 		    (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
831 	relblkno = blknum(fs, bno);
832 	lbn = fragstoblks(fs, relblkno);
833 	/* Direct blocks are always pre-copied */
834 	if (lbn < UFS_NDADDR)
835 		return (0);
836 	copydone = 0;
837 	claimedblk = 0;
838 	for (i = 0; i < snapcnt; i++) {
839 		/*
840 		 * Lookup block being freed.
841 		 */
842 		ip = snaplist[i];
843 		dp = ip.i_dp;
844 		blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
845 		    lbn, &frags, &snapbp);
846 		/*
847 		 * Check to see if block needs to be copied.
848 		 */
849 		if (blkno == 0) {
850 			/*
851 			 * A block that we map is being freed. If it has not
852 			 * been claimed yet, we will claim or copy it (below).
853 			 */
854 			claimedblk = 1;
855 		} else if (blkno == BLK_SNAP) {
856 			/*
857 			 * No previous snapshot claimed the block,
858 			 * so it will be freed and become a BLK_NOCOPY
859 			 * (don't care) for us.
860 			 */
861 			if (claimedblk)
862 				pfatal("snapblkfree: inconsistent block type");
863 			IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
864 			dirty(snapbp);
865 			brelse(snapbp);
866 			continue;
867 		} else /* BLK_NOCOPY or default */ {
868 			/*
869 			 * If the snapshot has already copied the block
870 			 * (default), or does not care about the block,
871 			 * it is not needed.
872 			 */
873 			brelse(snapbp);
874 			continue;
875 		}
876 		/*
877 		 * If this is a full size block, we will just grab it
878 		 * and assign it to the snapshot inode. Otherwise we
879 		 * will proceed to copy it. See explanation for this
880 		 * routine as to why only a single snapshot needs to
881 		 * claim this block.
882 		 */
883 		if (size == fs->fs_bsize) {
884 			if (debug)
885 				printf("Grabonremove snapshot %ju lbn %jd "
886 				    "from inum %ju\n", (intmax_t)ip.i_number,
887 				    (intmax_t)lbn, (uintmax_t)inum);
888 			IBLK_SET(snapbp, snapbp->b_index, relblkno);
889 			dirty(snapbp);
890 			brelse(snapbp);
891 			DIP_SET(dp, di_blocks,
892 			    DIP(dp, di_blocks) + btodb(size));
893 			inodirty(&ip);
894 			return (1);
895 		}
896 
897 		/* First time through, read the contents of the old block. */
898 		if (copydone == 0) {
899 			copydone = 1;
900 			if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
901 			    fs->fs_bsize) != 0) {
902 				pfatal("Could not read snapshot %ju block "
903 				    "%jd\n", (intmax_t)ip.i_number,
904 				    (intmax_t)relblkno);
905 				continue;
906 			}
907 		}
908 		/*
909 		 * This allocation will never require any additional
910 		 * allocations for the snapshot inode.
911 		 */
912 		blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
913 		    checkblkavail);
914 		if (blkno == 0) {
915 			pfatal("Could not allocate block for snapshot %ju\n",
916 			    (intmax_t)ip.i_number);
917 			continue;
918 		}
919 		if (debug)
920 			printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
921 			    "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
922 			    (intmax_t)lbn, (uintmax_t)inum, size,
923 			    (intmax_t)blkno);
924 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
925 		IBLK_SET(snapbp, snapbp->b_index, blkno);
926 		dirty(snapbp);
927 		brelse(snapbp);
928 		DIP_SET(dp, di_blocks,
929 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
930 		inodirty(&ip);
931 	}
932 	return (0);
933 }
934 
935 /*
936  * Notification that a block is being written. Return if the block
937  * is part of a snapshot as snapshots never track other snapshots.
938  * The block will be copied in all of the snapshots that are tracking
939  * it and have not yet copied it. Some buffers may hold more than one
940  * block. Here we need to check each block in the buffer.
941  */
942 void
943 copyonwrite(struct fs *fs, struct bufarea *bp,
944 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
945 {
946 	ufs2_daddr_t copyblkno;
947 	long i, numblks;
948 
949 	/* If no snapshots, nothing to do. */
950 	if (snapcnt == 0)
951 		return;
952 	numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
953 	if (debug)
954 		prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
955 		    (intmax_t)numblks, numblks > 1 ? "s" : "");
956 	copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
957 	for (i = 0; i < numblks; i++) {
958 		chkcopyonwrite(fs, copyblkno, checkblkavail);
959 		copyblkno += fs->fs_frag;
960 	}
961 }
962 
963 static void
964 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
965 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
966 {
967 	struct inode ip;
968 	union dinode *dp;
969 	struct bufarea *snapbp;
970 	ufs2_daddr_t blkno;
971 	int i, frags, copydone;
972 	ufs_lbn_t lbn;
973 
974 	lbn = fragstoblks(fs, copyblkno);
975 	/* Direct blocks are always pre-copied */
976 	if (lbn < UFS_NDADDR)
977 		return;
978 	copydone = 0;
979 	for (i = 0; i < snapcnt; i++) {
980 		/*
981 		 * Lookup block being freed.
982 		 */
983 		ip = snaplist[i];
984 		dp = ip.i_dp;
985 		blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
986 		/*
987 		 * Check to see if block needs to be copied.
988 		 */
989 		if (blkno != 0) {
990 			/*
991 			 * A block that we have already copied or don't track.
992 			 */
993 			brelse(snapbp);
994 			continue;
995 		}
996 		/* First time through, read the contents of the old block. */
997 		if (copydone == 0) {
998 			copydone = 1;
999 			if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
1000 			    fs->fs_bsize) != 0) {
1001 				pfatal("Could not read snapshot %ju block "
1002 				    "%jd\n", (intmax_t)ip.i_number,
1003 				    (intmax_t)copyblkno);
1004 				continue;
1005 			}
1006 		}
1007 		/*
1008 		 * This allocation will never require any additional
1009 		 * allocations for the snapshot inode.
1010 		 */
1011 		if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1012 		    checkblkavail)) == 0) {
1013 			pfatal("Could not allocate block for snapshot %ju\n",
1014 			    (intmax_t)ip.i_number);
1015 			continue;
1016 		}
1017 		if (debug)
1018 			prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1019 			    "blkno %ju setting in buffer",
1020 			    (intmax_t)ip.i_number, (intmax_t)lbn,
1021 			    (intmax_t)blkno);
1022 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1023 		IBLK_SET(snapbp, snapbp->b_index, blkno);
1024 		dirty(snapbp);
1025 		brelse(snapbp);
1026 		DIP_SET(dp, di_blocks,
1027 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1028 		inodirty(&ip);
1029 	}
1030 	return;
1031 }
1032 
1033 /*
1034  * Traverse an inode and check that its block count is correct
1035  * fixing it if necessary.
1036  */
1037 void
1038 check_blkcnt(struct inode *ip)
1039 {
1040 	struct inodesc idesc;
1041 	union dinode *dp;
1042 	ufs2_daddr_t ndb;
1043 	int j, ret, offset;
1044 
1045 	dp = ip->i_dp;
1046 	memset(&idesc, 0, sizeof(struct inodesc));
1047 	idesc.id_func = pass1check;
1048 	idesc.id_number = ip->i_number;
1049 	idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1050 	(void)ckinode(dp, &idesc);
1051 	if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1052 		ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1053 		for (j = 0; j < UFS_NXADDR; j++) {
1054 			if (--ndb == 0 &&
1055 			    (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1056 				idesc.id_numfrags = numfrags(&sblock,
1057 				    fragroundup(&sblock, offset));
1058 			else
1059 				idesc.id_numfrags = sblock.fs_frag;
1060 			if (dp->dp2.di_extb[j] == 0)
1061 				continue;
1062 			idesc.id_blkno = dp->dp2.di_extb[j];
1063 			ret = (*idesc.id_func)(&idesc);
1064 			if (ret & STOP)
1065 				break;
1066 		}
1067 	}
1068 	idesc.id_entryno *= btodb(sblock.fs_fsize);
1069 	if (DIP(dp, di_blocks) != idesc.id_entryno) {
1070 		if (!(sujrecovery && preen)) {
1071 			pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1072 			    (u_long)idesc.id_number,
1073 			    (uintmax_t)DIP(dp, di_blocks),
1074 			    (uintmax_t)idesc.id_entryno);
1075 			if (preen)
1076 				printf(" (CORRECTED)\n");
1077 			else if (reply("CORRECT") == 0)
1078 				return;
1079 		}
1080 		if (bkgrdflag == 0) {
1081 			DIP_SET(dp, di_blocks, idesc.id_entryno);
1082 			inodirty(ip);
1083 		} else {
1084 			cmd.value = idesc.id_number;
1085 			cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1086 			if (debug)
1087 				printf("adjblkcnt ino %ju amount %lld\n",
1088 				    (uintmax_t)cmd.value, (long long)cmd.size);
1089 			if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1090 			    &cmd, sizeof cmd) == -1)
1091 				rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1092 		}
1093 	}
1094 }
1095 
1096 void
1097 freeinodebuf(void)
1098 {
1099 	struct bufarea *bp;
1100 	int i;
1101 
1102 	/*
1103 	 * Flush old contents in case they have been updated.
1104 	 */
1105 	flush(fswritefd, &inobuf);
1106 	if (inobuf.b_un.b_buf != NULL)
1107 		free((char *)inobuf.b_un.b_buf);
1108 	inobuf.b_un.b_buf = NULL;
1109 	firstinum = lastinum = 0;
1110 	/*
1111 	 * Reload the snapshot inodes in case any of them changed.
1112 	 */
1113 	for (i = 0; i < snapcnt; i++) {
1114 		bp = snaplist[i].i_bp;
1115 		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1116 		    bp->b_size);
1117 	}
1118 }
1119 
1120 /*
1121  * Routines to maintain information about directory inodes.
1122  * This is built during the first pass and used during the
1123  * second and third passes.
1124  *
1125  * Enter inodes into the cache.
1126  */
1127 struct inoinfo *
1128 cacheino(union dinode *dp, ino_t inumber)
1129 {
1130 	struct inoinfo *inp;
1131 	int i, blks;
1132 
1133 	if (getinoinfo(inumber) != NULL)
1134 		pfatal("cacheino: duplicate entry for ino %jd\n",
1135 		    (intmax_t)inumber);
1136 	if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1137 		blks = UFS_NDADDR + UFS_NIADDR;
1138 	else if (DIP(dp, di_size) > 0)
1139 		blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1140 	else
1141 		blks = 1;
1142 	inp = (struct inoinfo *)
1143 		Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1144 	if (inp == NULL)
1145 		errx(EEXIT, "cannot increase directory list");
1146 	SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1147 	inp->i_flags = 0;
1148 	inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1149 	inp->i_dotdot = (ino_t)0;
1150 	inp->i_number = inumber;
1151 	inp->i_isize = DIP(dp, di_size);
1152 	inp->i_depth = DIP(dp, di_dirdepth);
1153 	inp->i_numblks = blks;
1154 	for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1155 		inp->i_blks[i] = DIP(dp, di_db[i]);
1156 	if (blks > UFS_NDADDR)
1157 		for (i = 0; i < UFS_NIADDR; i++)
1158 			inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1159 	if (inplast == listmax) {
1160 		listmax += 100;
1161 		inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1162 		    listmax, sizeof(struct inoinfo *));
1163 		if (inpsort == NULL)
1164 			errx(EEXIT, "cannot increase directory list");
1165 	}
1166 	inpsort[inplast++] = inp;
1167 	return (inp);
1168 }
1169 
1170 /*
1171  * Look up an inode cache structure.
1172  */
1173 struct inoinfo *
1174 getinoinfo(ino_t inumber)
1175 {
1176 	struct inoinfo *inp;
1177 
1178 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1179 		if (inp->i_number != inumber)
1180 			continue;
1181 		return (inp);
1182 	}
1183 	return (NULL);
1184 }
1185 
1186 /*
1187  * Remove an entry from the inode cache and disk-order sorted list.
1188  * Return 0 on success and 1 on failure.
1189  */
1190 int
1191 removecachedino(ino_t inumber)
1192 {
1193 	struct inoinfo *inp, **inpp;
1194 	char *listtype;
1195 
1196 	listtype = "hash";
1197 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1198 		if (inp->i_number != inumber)
1199 			continue;
1200 		SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1201 		for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1202 			if (*inpp != inp)
1203 				continue;
1204 			*inpp = inpsort[inplast - 1];
1205 			inplast--;
1206 			free(inp);
1207 			return (0);
1208 		}
1209 		listtype = "sort";
1210 		break;
1211 	}
1212 	pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1213 	    (intmax_t)inumber, listtype);
1214 	return (1);
1215 }
1216 
1217 /*
1218  * Clean up all the inode cache structure.
1219  */
1220 void
1221 inocleanup(void)
1222 {
1223 	struct inoinfo **inpp;
1224 
1225 	if (inphash == NULL)
1226 		return;
1227 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1228 		free((char *)(*inpp));
1229 	free((char *)inphash);
1230 	inphash = NULL;
1231 	free((char *)inpsort);
1232 	inpsort = NULL;
1233 }
1234 
1235 void
1236 inodirty(struct inode *ip)
1237 {
1238 
1239 	if (sblock.fs_magic == FS_UFS2_MAGIC)
1240 		ffs_update_dinode_ckhash(&sblock,
1241 		    (struct ufs2_dinode *)ip->i_dp);
1242 	dirty(ip->i_bp);
1243 }
1244 
1245 void
1246 clri(struct inodesc *idesc, const char *type, int flag)
1247 {
1248 	union dinode *dp;
1249 	struct inode ip;
1250 
1251 	ginode(idesc->id_number, &ip);
1252 	dp = ip.i_dp;
1253 	if (flag == 1) {
1254 		pwarn("%s %s", type,
1255 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1256 		prtinode(&ip);
1257 		printf("\n");
1258 	}
1259 	if (preen || reply("CLEAR") == 1) {
1260 		if (preen)
1261 			printf(" (CLEARED)\n");
1262 		n_files--;
1263 		if (bkgrdflag == 0) {
1264 			if (idesc->id_type == SNAP) {
1265 				snapremove(idesc->id_number);
1266 				idesc->id_type = ADDR;
1267 			}
1268 			(void)ckinode(dp, idesc);
1269 			inoinfo(idesc->id_number)->ino_state = USTATE;
1270 			clearinode(dp);
1271 			inodirty(&ip);
1272 		} else {
1273 			cmd.value = idesc->id_number;
1274 			cmd.size = -DIP(dp, di_nlink);
1275 			if (debug)
1276 				printf("adjrefcnt ino %ld amt %lld\n",
1277 				    (long)cmd.value, (long long)cmd.size);
1278 			if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1279 			    &cmd, sizeof cmd) == -1)
1280 				rwerror("ADJUST INODE", cmd.value);
1281 		}
1282 	}
1283 	irelse(&ip);
1284 }
1285 
1286 int
1287 findname(struct inodesc *idesc)
1288 {
1289 	struct direct *dirp = idesc->id_dirp;
1290 
1291 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1292 		idesc->id_entryno++;
1293 		return (KEEPON);
1294 	}
1295 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1296 	return (STOP|FOUND);
1297 }
1298 
1299 int
1300 findino(struct inodesc *idesc)
1301 {
1302 	struct direct *dirp = idesc->id_dirp;
1303 
1304 	if (dirp->d_ino == 0)
1305 		return (KEEPON);
1306 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1307 	    dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1308 		idesc->id_parent = dirp->d_ino;
1309 		return (STOP|FOUND);
1310 	}
1311 	return (KEEPON);
1312 }
1313 
1314 int
1315 clearentry(struct inodesc *idesc)
1316 {
1317 	struct direct *dirp = idesc->id_dirp;
1318 
1319 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1320 		idesc->id_entryno++;
1321 		return (KEEPON);
1322 	}
1323 	dirp->d_ino = 0;
1324 	return (STOP|FOUND|ALTERED);
1325 }
1326 
1327 void
1328 prtinode(struct inode *ip)
1329 {
1330 	char *p;
1331 	union dinode *dp;
1332 	struct passwd *pw;
1333 	time_t t;
1334 
1335 	dp = ip->i_dp;
1336 	printf(" I=%lu ", (u_long)ip->i_number);
1337 	if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1338 		return;
1339 	printf(" OWNER=");
1340 	if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1341 		printf("%s ", pw->pw_name);
1342 	else
1343 		printf("%u ", (unsigned)DIP(dp, di_uid));
1344 	printf("MODE=%o\n", DIP(dp, di_mode));
1345 	if (preen)
1346 		printf("%s: ", cdevname);
1347 	printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1348 	t = DIP(dp, di_mtime);
1349 	if ((p = ctime(&t)) != NULL)
1350 		printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1351 }
1352 
1353 void
1354 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1355 {
1356 
1357 	pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1358 	printf("\n");
1359 	switch (inoinfo(ino)->ino_state) {
1360 
1361 	case FSTATE:
1362 	case FZLINK:
1363 		inoinfo(ino)->ino_state = FCLEAR;
1364 		return;
1365 
1366 	case DSTATE:
1367 	case DZLINK:
1368 		inoinfo(ino)->ino_state = DCLEAR;
1369 		return;
1370 
1371 	case FCLEAR:
1372 	case DCLEAR:
1373 		return;
1374 
1375 	default:
1376 		errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1377 		/* NOTREACHED */
1378 	}
1379 }
1380 
1381 /*
1382  * allocate an unused inode
1383  */
1384 ino_t
1385 allocino(ino_t request, int type)
1386 {
1387 	ino_t ino;
1388 	struct inode ip;
1389 	union dinode *dp;
1390 	struct bufarea *cgbp;
1391 	struct cg *cgp;
1392 	int cg, anyino;
1393 
1394 	anyino = 0;
1395 	if (request == 0) {
1396 		request = UFS_ROOTINO;
1397 		anyino = 1;
1398 	} else if (inoinfo(request)->ino_state != USTATE)
1399 		return (0);
1400 retry:
1401 	for (ino = request; ino < maxino; ino++)
1402 		if (inoinfo(ino)->ino_state == USTATE)
1403 			break;
1404 	if (ino >= maxino)
1405 		return (0);
1406 	cg = ino_to_cg(&sblock, ino);
1407 	cgbp = cglookup(cg);
1408 	cgp = cgbp->b_un.b_cg;
1409 	if (!check_cgmagic(cg, cgbp)) {
1410 		if (anyino == 0)
1411 			return (0);
1412 		request = (cg + 1) * sblock.fs_ipg;
1413 		goto retry;
1414 	}
1415 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1416 	cgp->cg_cs.cs_nifree--;
1417 	switch (type & IFMT) {
1418 	case IFDIR:
1419 		inoinfo(ino)->ino_state = DSTATE;
1420 		cgp->cg_cs.cs_ndir++;
1421 		break;
1422 	case IFREG:
1423 	case IFLNK:
1424 		inoinfo(ino)->ino_state = FSTATE;
1425 		break;
1426 	default:
1427 		return (0);
1428 	}
1429 	cgdirty(cgbp);
1430 	ginode(ino, &ip);
1431 	dp = ip.i_dp;
1432 	memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1433 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1434 	DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1435 	    std_checkblkavail));
1436 	if (DIP(dp, di_db[0]) == 0) {
1437 		inoinfo(ino)->ino_state = USTATE;
1438 		inodirty(&ip);
1439 		irelse(&ip);
1440 		return (0);
1441 	}
1442 	DIP_SET(dp, di_mode, type);
1443 	DIP_SET(dp, di_atime, time(NULL));
1444 	DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1445 	DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1446 	DIP_SET(dp, di_size, sblock.fs_fsize);
1447 	DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1448 	n_files++;
1449 	inodirty(&ip);
1450 	irelse(&ip);
1451 	inoinfo(ino)->ino_type = IFTODT(type);
1452 	return (ino);
1453 }
1454 
1455 /*
1456  * deallocate an inode
1457  */
1458 void
1459 freeino(ino_t ino)
1460 {
1461 	struct inodesc idesc;
1462 	union dinode *dp;
1463 	struct inode ip;
1464 
1465 	memset(&idesc, 0, sizeof(struct inodesc));
1466 	idesc.id_type = ADDR;
1467 	idesc.id_func = freeblock;
1468 	idesc.id_number = ino;
1469 	ginode(ino, &ip);
1470 	dp = ip.i_dp;
1471 	(void)ckinode(dp, &idesc);
1472 	clearinode(dp);
1473 	inodirty(&ip);
1474 	irelse(&ip);
1475 	inoinfo(ino)->ino_state = USTATE;
1476 	n_files--;
1477 }
1478