xref: /freebsd/sbin/fsck_ffs/inode.c (revision 2008043f386721d58158e37e0d7e50df8095942d)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #ifndef lint
34 static const char sccsid[] = "@(#)inode.c	8.8 (Berkeley) 4/28/95";
35 #endif /* not lint */
36 #endif
37 #include <sys/cdefs.h>
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 #include <sys/stdint.h>
41 #include <sys/sysctl.h>
42 
43 #include <ufs/ufs/dinode.h>
44 #include <ufs/ufs/dir.h>
45 #include <ufs/ffs/fs.h>
46 
47 #include <err.h>
48 #include <pwd.h>
49 #include <string.h>
50 #include <time.h>
51 #include <libufs.h>
52 
53 #include "fsck.h"
54 
55 struct bufarea *icachebp;	/* inode cache buffer */
56 
57 static int iblock(struct inodesc *, off_t isize, int type);
58 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
59     struct bufarea **);
60 static int snapclean(struct inodesc *idesc);
61 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
62     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
63 
64 int
65 ckinode(union dinode *dp, struct inodesc *idesc)
66 {
67 	off_t remsize, sizepb;
68 	int i, offset, ret;
69 	struct inode ip;
70 	union dinode dino;
71 	ufs2_daddr_t ndb;
72 	mode_t mode;
73 	char pathbuf[MAXPATHLEN + 1];
74 
75 	if (idesc->id_fix != IGNORE)
76 		idesc->id_fix = DONTKNOW;
77 	idesc->id_dp = dp;
78 	idesc->id_lbn = -1;
79 	idesc->id_lballoc = -1;
80 	idesc->id_level = 0;
81 	idesc->id_entryno = 0;
82 	idesc->id_filesize = DIP(dp, di_size);
83 	mode = DIP(dp, di_mode) & IFMT;
84 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
85 	    DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
86 		return (KEEPON);
87 	if (sblock.fs_magic == FS_UFS1_MAGIC)
88 		dino.dp1 = dp->dp1;
89 	else
90 		dino.dp2 = dp->dp2;
91 	if (DIP(&dino, di_size) < 0) {
92 		pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
93 		return (STOP);
94 	}
95 	ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
96 	for (i = 0; i < UFS_NDADDR; i++) {
97 		idesc->id_lbn++;
98 		if (--ndb == 0 &&
99 		    (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
100 			idesc->id_numfrags =
101 				numfrags(&sblock, fragroundup(&sblock, offset));
102 		else
103 			idesc->id_numfrags = sblock.fs_frag;
104 		if (DIP(&dino, di_db[i]) == 0) {
105 			if (idesc->id_type == DATA && ndb >= 0) {
106 				/* An empty block in a directory XXX */
107 				getpathname(pathbuf, idesc->id_number,
108 						idesc->id_number);
109 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
110 					pathbuf);
111 				if (reply("ADJUST LENGTH") == 1) {
112 					ginode(idesc->id_number, &ip);
113 					DIP_SET(ip.i_dp, di_size,
114 					    i * sblock.fs_bsize);
115 					printf(
116 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
117 					rerun = 1;
118 					inodirty(&ip);
119 					irelse(&ip);
120 				}
121 				return (STOP);
122 			}
123 			continue;
124 		}
125 		idesc->id_blkno = DIP(&dino, di_db[i]);
126 		if (idesc->id_type != DATA)
127 			ret = (*idesc->id_func)(idesc);
128 		else
129 			ret = dirscan(idesc);
130 		if (ret & STOP)
131 			return (ret);
132 	}
133 	idesc->id_numfrags = sblock.fs_frag;
134 	remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
135 	sizepb = sblock.fs_bsize;
136 	for (i = 0; i < UFS_NIADDR; i++) {
137 		sizepb *= NINDIR(&sblock);
138 		idesc->id_level = i + 1;
139 		if (DIP(&dino, di_ib[i])) {
140 			idesc->id_blkno = DIP(&dino, di_ib[i]);
141 			ret = iblock(idesc, remsize, BT_LEVEL1 + i);
142 			if (ret & STOP)
143 				return (ret);
144 		} else if (remsize > 0) {
145 			idesc->id_lbn += sizepb / sblock.fs_bsize;
146 			if (idesc->id_type == DATA) {
147 				/* An empty block in a directory XXX */
148 				getpathname(pathbuf, idesc->id_number,
149 						idesc->id_number);
150 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
151 					pathbuf);
152 				if (reply("ADJUST LENGTH") == 1) {
153 					ginode(idesc->id_number, &ip);
154 					DIP_SET(ip.i_dp, di_size,
155 					    DIP(ip.i_dp, di_size) - remsize);
156 					remsize = 0;
157 					printf(
158 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
159 					rerun = 1;
160 					inodirty(&ip);
161 					irelse(&ip);
162 					break;
163 				}
164 			}
165 		}
166 		remsize -= sizepb;
167 	}
168 	return (KEEPON);
169 }
170 
171 static int
172 iblock(struct inodesc *idesc, off_t isize, int type)
173 {
174 	struct inode ip;
175 	struct bufarea *bp;
176 	int i, n, (*func)(struct inodesc *), nif;
177 	off_t sizepb;
178 	char buf[BUFSIZ];
179 	char pathbuf[MAXPATHLEN + 1];
180 
181 	if (idesc->id_type != DATA) {
182 		func = idesc->id_func;
183 		if (((n = (*func)(idesc)) & KEEPON) == 0)
184 			return (n);
185 	} else
186 		func = dirscan;
187 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
188 	if (bp->b_errs != 0) {
189 		brelse(bp);
190 		return (SKIP);
191 	}
192 	idesc->id_bp = bp;
193 	idesc->id_level--;
194 	for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
195 		sizepb *= NINDIR(&sblock);
196 	if (howmany(isize, sizepb) > NINDIR(&sblock))
197 		nif = NINDIR(&sblock);
198 	else
199 		nif = howmany(isize, sizepb);
200 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
201 		for (i = nif; i < NINDIR(&sblock); i++) {
202 			if (IBLK(bp, i) == 0)
203 				continue;
204 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
205 			    (u_long)idesc->id_number);
206 			if (preen) {
207 				pfatal("%s", buf);
208 			} else if (dofix(idesc, buf)) {
209 				IBLK_SET(bp, i, 0);
210 				dirty(bp);
211 			}
212 		}
213 		flush(fswritefd, bp);
214 	}
215 	for (i = 0; i < nif; i++) {
216 		if (IBLK(bp, i)) {
217 			idesc->id_blkno = IBLK(bp, i);
218 			bp->b_index = i;
219 			if (idesc->id_level == 0) {
220 				idesc->id_lbn++;
221 				n = (*func)(idesc);
222 			} else {
223 				n = iblock(idesc, isize, type - 1);
224 				idesc->id_level++;
225 			}
226 			if (n & STOP) {
227 				brelse(bp);
228 				return (n);
229 			}
230 		} else {
231 			idesc->id_lbn += sizepb / sblock.fs_bsize;
232 			if (idesc->id_type == DATA && isize > 0) {
233 				/* An empty block in a directory XXX */
234 				getpathname(pathbuf, idesc->id_number,
235 						idesc->id_number);
236 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
237 					pathbuf);
238 				if (reply("ADJUST LENGTH") == 1) {
239 					ginode(idesc->id_number, &ip);
240 					DIP_SET(ip.i_dp, di_size,
241 					    DIP(ip.i_dp, di_size) - isize);
242 					isize = 0;
243 					printf(
244 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
245 					rerun = 1;
246 					inodirty(&ip);
247 					brelse(bp);
248 					return(STOP);
249 				}
250 			}
251 		}
252 		isize -= sizepb;
253 	}
254 	brelse(bp);
255 	return (KEEPON);
256 }
257 
258 /*
259  * Finds the disk block address at the specified lbn within the inode
260  * specified by dp.  This follows the whole tree and honors di_size and
261  * di_extsize so it is a true test of reachability.  The lbn may be
262  * negative if an extattr or indirect block is requested.
263  */
264 ufs2_daddr_t
265 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
266     struct bufarea **bpp)
267 {
268 	ufs_lbn_t tmpval;
269 	ufs_lbn_t cur;
270 	ufs_lbn_t next;
271 	int i;
272 
273 	*frags = 0;
274 	if (bpp != NULL)
275 		*bpp = NULL;
276 	/*
277 	 * Handle extattr blocks first.
278 	 */
279 	if (lbn < 0 && lbn >= -UFS_NXADDR) {
280 		lbn = -1 - lbn;
281 		if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
282 			return (0);
283 		*frags = numfrags(&sblock,
284 		    sblksize(&sblock, dp->dp2.di_extsize, lbn));
285 		return (dp->dp2.di_extb[lbn]);
286 	}
287 	/*
288 	 * Now direct and indirect.
289 	 */
290 	if (DIP(dp, di_mode) == IFLNK &&
291 	    DIP(dp, di_size) < sblock.fs_maxsymlinklen)
292 		return (0);
293 	if (lbn >= 0 && lbn < UFS_NDADDR) {
294 		*frags = numfrags(&sblock,
295 		    sblksize(&sblock, DIP(dp, di_size), lbn));
296 		return (DIP(dp, di_db[lbn]));
297 	}
298 	*frags = sblock.fs_frag;
299 
300 	for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
301 	    i++, tmpval *= NINDIR(&sblock), cur = next) {
302 		next = cur + tmpval;
303 		if (lbn == -cur - i)
304 			return (DIP(dp, di_ib[i]));
305 		/*
306 		 * Determine whether the lbn in question is within this tree.
307 		 */
308 		if (lbn < 0 && -lbn >= next)
309 			continue;
310 		if (lbn > 0 && lbn >= next)
311 			continue;
312 		if (DIP(dp, di_ib[i]) == 0)
313 			return (0);
314 		return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
315 		    bpp));
316 	}
317 	pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
318 	return (0);
319 }
320 
321 /*
322  * Fetch an indirect block to find the block at a given lbn.  The lbn
323  * may be negative to fetch a specific indirect block pointer or positive
324  * to fetch a specific block.
325  */
326 static ufs2_daddr_t
327 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
328     struct bufarea **bpp)
329 {
330 	struct bufarea *bp;
331 	ufs_lbn_t lbnadd;
332 	ufs_lbn_t base;
333 	int i, level;
334 
335 	level = lbn_level(cur);
336 	if (level == -1)
337 		pfatal("Invalid indir lbn %jd in ino %ju\n",
338 		    lbn, (uintmax_t)ino);
339 	if (level == 0 && lbn < 0)
340 		pfatal("Invalid lbn %jd in ino %ju\n",
341 		    lbn, (uintmax_t)ino);
342 	lbnadd = 1;
343 	base = -(cur + level);
344 	for (i = level; i > 0; i--)
345 		lbnadd *= NINDIR(&sblock);
346 	if (lbn > 0)
347 		i = (lbn - base) / lbnadd;
348 	else
349 		i = (-lbn - base) / lbnadd;
350 	if (i < 0 || i >= NINDIR(&sblock)) {
351 		pfatal("Invalid indirect index %d produced by lbn %jd "
352 		    "in ino %ju\n", i, lbn, (uintmax_t)ino);
353 		return (0);
354 	}
355 	if (level == 0)
356 		cur = base + (i * lbnadd);
357 	else
358 		cur = -(base + (i * lbnadd)) - (level - 1);
359 	bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
360 	if (bp->b_errs != 0)
361 		return (0);
362 	blk = IBLK(bp, i);
363 	bp->b_index = i;
364 	if (cur == lbn || blk == 0) {
365 		if (bpp != NULL)
366 			*bpp = bp;
367 		else
368 			brelse(bp);
369 		return (blk);
370 	}
371 	brelse(bp);
372 	if (level == 0)
373 		pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
374 		    (uintmax_t)ino);
375 	return (indir_blkatoff(blk, ino, cur, lbn, bpp));
376 }
377 
378 /*
379  * Check that a block in a legal block number.
380  * Return 0 if in range, 1 if out of range.
381  */
382 int
383 chkrange(ufs2_daddr_t blk, int cnt)
384 {
385 	int c;
386 
387 	if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
388 	    cnt > maxfsblock - blk) {
389 		if (debug)
390 			printf("out of range: blk %ld, offset %i, size %d\n",
391 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
392 		return (1);
393 	}
394 	if (cnt > sblock.fs_frag ||
395 	    fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
396 		if (debug)
397 			printf("bad size: blk %ld, offset %i, size %d\n",
398 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
399 		return (1);
400 	}
401 	c = dtog(&sblock, blk);
402 	if (blk < cgdmin(&sblock, c)) {
403 		if ((blk + cnt) > cgsblock(&sblock, c)) {
404 			if (debug) {
405 				printf("blk %ld < cgdmin %ld;",
406 				    (long)blk, (long)cgdmin(&sblock, c));
407 				printf(" blk + cnt %ld > cgsbase %ld\n",
408 				    (long)(blk + cnt),
409 				    (long)cgsblock(&sblock, c));
410 			}
411 			return (1);
412 		}
413 	} else {
414 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
415 			if (debug)  {
416 				printf("blk %ld >= cgdmin %ld;",
417 				    (long)blk, (long)cgdmin(&sblock, c));
418 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
419 				    (long)(blk + cnt), (long)sblock.fs_fpg);
420 			}
421 			return (1);
422 		}
423 	}
424 	return (0);
425 }
426 
427 /*
428  * General purpose interface for reading inodes.
429  *
430  * firstinum and lastinum track contents of getnextino() cache (below).
431  */
432 static ino_t firstinum, lastinum;
433 static struct bufarea inobuf;
434 
435 void
436 ginode(ino_t inumber, struct inode *ip)
437 {
438 	ufs2_daddr_t iblk;
439 	struct ufs2_dinode *dp;
440 
441 	if (inumber < UFS_ROOTINO || inumber >= maxino)
442 		errx(EEXIT, "bad inode number %ju to ginode",
443 		    (uintmax_t)inumber);
444 	ip->i_number = inumber;
445 	if (inumber >= firstinum && inumber < lastinum) {
446 		/* contents in getnextino() cache */
447 		ip->i_bp = &inobuf;
448 		inobuf.b_refcnt++;
449 		inobuf.b_index = firstinum;
450 	} else if (icachebp != NULL &&
451 	    inumber >= icachebp->b_index &&
452 	    inumber < icachebp->b_index + INOPB(&sblock)) {
453 		/* take an additional reference for the returned inode */
454 		icachebp->b_refcnt++;
455 		ip->i_bp = icachebp;
456 	} else {
457 		iblk = ino_to_fsba(&sblock, inumber);
458 		/* release our cache-hold reference on old icachebp */
459 		if (icachebp != NULL)
460 			brelse(icachebp);
461 		icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
462 		if (icachebp->b_errs != 0) {
463 			icachebp = NULL;
464 			ip->i_bp = NULL;
465 			ip->i_dp = &zino;
466 			return;
467 		}
468 		/* take a cache-hold reference on new icachebp */
469 		icachebp->b_refcnt++;
470 		icachebp->b_index = rounddown(inumber, INOPB(&sblock));
471 		ip->i_bp = icachebp;
472 	}
473 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
474 		ip->i_dp = (union dinode *)
475 		    &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
476 		return;
477 	}
478 	ip->i_dp = (union dinode *)
479 	    &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
480 	dp = (struct ufs2_dinode *)ip->i_dp;
481 	/* Do not check hash of inodes being created */
482 	if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
483 		pwarn("INODE CHECK-HASH FAILED");
484 		prtinode(ip);
485 		if (preen || reply("FIX") != 0) {
486 			if (preen)
487 				printf(" (FIXED)\n");
488 			ffs_update_dinode_ckhash(&sblock, dp);
489 			inodirty(ip);
490 		}
491 	}
492 }
493 
494 /*
495  * Release a held inode.
496  */
497 void
498 irelse(struct inode *ip)
499 {
500 
501 	/* Check for failed inode read */
502 	if (ip->i_bp == NULL)
503 		return;
504 	if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
505 	    ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
506 		pwarn("irelse: releasing inode with bad check-hash");
507 		prtinode(ip);
508 	}
509 	if (ip->i_bp->b_refcnt <= 0)
510 		pfatal("irelse: releasing unreferenced ino %ju\n",
511 		    (uintmax_t) ip->i_number);
512 	brelse(ip->i_bp);
513 }
514 
515 /*
516  * Special purpose version of ginode used to optimize first pass
517  * over all the inodes in numerical order.
518  */
519 static ino_t nextinum, lastvalidinum;
520 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
521 
522 union dinode *
523 getnextinode(ino_t inumber, int rebuiltcg)
524 {
525 	int j;
526 	long size;
527 	mode_t mode;
528 	ufs2_daddr_t ndb, blk;
529 	union dinode *dp;
530 	struct inode ip;
531 	static caddr_t nextinop;
532 
533 	if (inumber != nextinum++ || inumber > lastvalidinum)
534 		errx(EEXIT, "bad inode number %ju to nextinode",
535 		    (uintmax_t)inumber);
536 	if (inumber >= lastinum) {
537 		readcount++;
538 		firstinum = lastinum;
539 		blk = ino_to_fsba(&sblock, lastinum);
540 		if (readcount % readpercg == 0) {
541 			size = partialsize;
542 			lastinum += partialcnt;
543 		} else {
544 			size = inobufsize;
545 			lastinum += fullcnt;
546 		}
547 		/*
548 		 * Flush old contents in case they have been updated.
549 		 * If getblk encounters an error, it will already have zeroed
550 		 * out the buffer, so we do not need to do so here.
551 		 */
552 		if (inobuf.b_refcnt != 0)
553 			pfatal("Non-zero getnextinode() ref count %d\n",
554 			    inobuf.b_refcnt);
555 		flush(fswritefd, &inobuf);
556 		getblk(&inobuf, blk, size);
557 		nextinop = inobuf.b_un.b_buf;
558 	}
559 	dp = (union dinode *)nextinop;
560 	if (sblock.fs_magic == FS_UFS1_MAGIC)
561 		nextinop += sizeof(struct ufs1_dinode);
562 	else
563 		nextinop += sizeof(struct ufs2_dinode);
564 	if ((ckhashadd & CK_INODE) != 0) {
565 		ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
566 		dirty(&inobuf);
567 	}
568 	if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
569 		pwarn("INODE CHECK-HASH FAILED");
570 		ip.i_bp = NULL;
571 		ip.i_dp = dp;
572 		ip.i_number = inumber;
573 		prtinode(&ip);
574 		if (preen || reply("FIX") != 0) {
575 			if (preen)
576 				printf(" (FIXED)\n");
577 			ffs_update_dinode_ckhash(&sblock,
578 			    (struct ufs2_dinode *)dp);
579 			dirty(&inobuf);
580 		}
581 	}
582 	if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
583 		/*
584 		 * Try to determine if we have reached the end of the
585 		 * allocated inodes.
586 		 */
587 		mode = DIP(dp, di_mode) & IFMT;
588 		if (mode == 0) {
589 			if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
590 				UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
591 			      memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
592 				UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
593 			      dp->dp2.di_mode || dp->dp2.di_size)
594 				return (NULL);
595 			return (dp);
596 		}
597 		if (!ftypeok(dp))
598 			return (NULL);
599 		ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
600 		if (ndb < 0)
601 			return (NULL);
602 		if (mode == IFBLK || mode == IFCHR)
603 			ndb++;
604 		if (mode == IFLNK) {
605 			/*
606 			 * Fake ndb value so direct/indirect block checks below
607 			 * will detect any garbage after symlink string.
608 			 */
609 			if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
610 				ndb = howmany(DIP(dp, di_size),
611 				    sizeof(ufs2_daddr_t));
612 				if (ndb > UFS_NDADDR) {
613 					j = ndb - UFS_NDADDR;
614 					for (ndb = 1; j > 1; j--)
615 						ndb *= NINDIR(&sblock);
616 					ndb += UFS_NDADDR;
617 				}
618 			}
619 		}
620 		for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
621 			if (DIP(dp, di_db[j]) != 0)
622 				return (NULL);
623 		for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
624 			ndb /= NINDIR(&sblock);
625 		for (; j < UFS_NIADDR; j++)
626 			if (DIP(dp, di_ib[j]) != 0)
627 				return (NULL);
628 	}
629 	return (dp);
630 }
631 
632 void
633 setinodebuf(int cg, ino_t inosused)
634 {
635 	ino_t inum;
636 
637 	inum = cg * sblock.fs_ipg;
638 	lastvalidinum = inum + inosused - 1;
639 	nextinum = inum;
640 	lastinum = inum;
641 	readcount = 0;
642 	/* Flush old contents in case they have been updated */
643 	flush(fswritefd, &inobuf);
644 	inobuf.b_bno = 0;
645 	if (inobuf.b_un.b_buf == NULL) {
646 		inobufsize = blkroundup(&sblock,
647 		    MAX(INOBUFSIZE, sblock.fs_bsize));
648 		initbarea(&inobuf, BT_INODES);
649 		if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL)
650 			errx(EEXIT, "cannot allocate space for inode buffer");
651 	}
652 	fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
653 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
654 	readpercg = inosused / fullcnt;
655 	partialcnt = inosused % fullcnt;
656 	partialsize = fragroundup(&sblock,
657 	    partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
658 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
659 	if (partialcnt != 0) {
660 		readpercg++;
661 	} else {
662 		partialcnt = fullcnt;
663 		partialsize = inobufsize;
664 	}
665 }
666 
667 int
668 freeblock(struct inodesc *idesc)
669 {
670 	struct dups *dlp;
671 	struct bufarea *cgbp;
672 	struct cg *cgp;
673 	ufs2_daddr_t blkno;
674 	long size, nfrags;
675 
676 	blkno = idesc->id_blkno;
677 	if (idesc->id_type == SNAP) {
678 		pfatal("clearing a snapshot dinode\n");
679 		return (STOP);
680 	}
681 	size = lfragtosize(&sblock, idesc->id_numfrags);
682 	if (snapblkfree(&sblock, blkno, size, idesc->id_number,
683 	    std_checkblkavail))
684 		return (KEEPON);
685 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
686 		if (chkrange(blkno, 1)) {
687 			return (SKIP);
688 		} else if (testbmap(blkno)) {
689 			for (dlp = duplist; dlp; dlp = dlp->next) {
690 				if (dlp->dup != blkno)
691 					continue;
692 				dlp->dup = duplist->dup;
693 				dlp = duplist;
694 				duplist = duplist->next;
695 				free((char *)dlp);
696 				break;
697 			}
698 			if (dlp == NULL) {
699 				clrbmap(blkno);
700 				n_blks--;
701 			}
702 		}
703 	}
704 	/*
705 	 * If all successfully returned, account for them.
706 	 */
707 	if (nfrags == 0) {
708 		cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
709 		cgp = cgbp->b_un.b_cg;
710 		if (idesc->id_numfrags == sblock.fs_frag)
711 			cgp->cg_cs.cs_nbfree++;
712 		else
713 			cgp->cg_cs.cs_nffree += idesc->id_numfrags;
714 		cgdirty(cgbp);
715 	}
716 	return (KEEPON);
717 }
718 
719 /*
720  * Prepare a snapshot file for being removed.
721  */
722 void
723 snapremove(ino_t inum)
724 {
725 	struct inodesc idesc;
726 	struct inode ip;
727 	int i;
728 
729 	for (i = 0; i < snapcnt; i++)
730 		if (snaplist[i].i_number == inum)
731 			break;
732 	if (i == snapcnt)
733 		ginode(inum, &ip);
734 	else
735 		ip = snaplist[i];
736 	if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
737 		printf("snapremove: inode %jd is not a snapshot\n",
738 		    (intmax_t)inum);
739 		if (i == snapcnt)
740 			irelse(&ip);
741 		return;
742 	}
743 	if (debug)
744 		printf("snapremove: remove %sactive snapshot %jd\n",
745 		    i == snapcnt ? "in" : "", (intmax_t)inum);
746 	/*
747 	 * If on active snapshot list, remove it.
748 	 */
749 	if (i < snapcnt) {
750 		for (i++; i < FSMAXSNAP; i++) {
751 			if (sblock.fs_snapinum[i] == 0)
752 				break;
753 			snaplist[i - 1] = snaplist[i];
754 			sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
755 		}
756 		sblock.fs_snapinum[i - 1] = 0;
757 		bzero(&snaplist[i - 1], sizeof(struct inode));
758 		snapcnt--;
759 	}
760 	memset(&idesc, 0, sizeof(struct inodesc));
761 	idesc.id_type = SNAP;
762 	idesc.id_func = snapclean;
763 	idesc.id_number = inum;
764 	(void)ckinode(ip.i_dp, &idesc);
765 	DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
766 	inodirty(&ip);
767 	irelse(&ip);
768 }
769 
770 static int
771 snapclean(struct inodesc *idesc)
772 {
773 	ufs2_daddr_t blkno;
774 	struct bufarea *bp;
775 	union dinode *dp;
776 
777 	blkno = idesc->id_blkno;
778 	if (blkno == 0)
779 		return (KEEPON);
780 
781 	dp = idesc->id_dp;
782 	if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
783 		if (idesc->id_lbn < UFS_NDADDR) {
784 			DIP_SET(dp, di_db[idesc->id_lbn], 0);
785 		} else {
786 			bp = idesc->id_bp;
787 			IBLK_SET(bp, bp->b_index, 0);
788 			dirty(bp);
789 		}
790 	}
791 	return (KEEPON);
792 }
793 
794 /*
795  * Notification that a block is being freed. Return zero if the free
796  * should be allowed to proceed. Return non-zero if the snapshot file
797  * wants to claim the block. The block will be claimed if it is an
798  * uncopied part of one of the snapshots. It will be freed if it is
799  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
800  * If a fragment is being freed, then all snapshots that care about
801  * it must make a copy since a snapshot file can only claim full sized
802  * blocks. Note that if more than one snapshot file maps the block,
803  * we can pick one at random to claim it. Since none of the snapshots
804  * can change, we are assurred that they will all see the same unmodified
805  * image. When deleting a snapshot file (see ino_trunc above), we
806  * must push any of these claimed blocks to one of the other snapshots
807  * that maps it. These claimed blocks are easily identified as they will
808  * have a block number equal to their logical block number within the
809  * snapshot. A copied block can never have this property because they
810  * must always have been allocated from a BLK_NOCOPY location.
811  */
812 int
813 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
814 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
815 {
816 	union dinode *dp;
817 	struct inode ip;
818 	struct bufarea *snapbp;
819 	ufs_lbn_t lbn;
820 	ufs2_daddr_t blkno, relblkno;
821 	int i, frags, claimedblk, copydone;
822 
823 	/* If no snapshots, nothing to do */
824 	if (snapcnt == 0)
825 		return (0);
826 	if (debug)
827 		printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
828 		    (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
829 	relblkno = blknum(fs, bno);
830 	lbn = fragstoblks(fs, relblkno);
831 	/* Direct blocks are always pre-copied */
832 	if (lbn < UFS_NDADDR)
833 		return (0);
834 	copydone = 0;
835 	claimedblk = 0;
836 	for (i = 0; i < snapcnt; i++) {
837 		/*
838 		 * Lookup block being freed.
839 		 */
840 		ip = snaplist[i];
841 		dp = ip.i_dp;
842 		blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
843 		    lbn, &frags, &snapbp);
844 		/*
845 		 * Check to see if block needs to be copied.
846 		 */
847 		if (blkno == 0) {
848 			/*
849 			 * A block that we map is being freed. If it has not
850 			 * been claimed yet, we will claim or copy it (below).
851 			 */
852 			claimedblk = 1;
853 		} else if (blkno == BLK_SNAP) {
854 			/*
855 			 * No previous snapshot claimed the block,
856 			 * so it will be freed and become a BLK_NOCOPY
857 			 * (don't care) for us.
858 			 */
859 			if (claimedblk)
860 				pfatal("snapblkfree: inconsistent block type");
861 			IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
862 			dirty(snapbp);
863 			brelse(snapbp);
864 			continue;
865 		} else /* BLK_NOCOPY or default */ {
866 			/*
867 			 * If the snapshot has already copied the block
868 			 * (default), or does not care about the block,
869 			 * it is not needed.
870 			 */
871 			brelse(snapbp);
872 			continue;
873 		}
874 		/*
875 		 * If this is a full size block, we will just grab it
876 		 * and assign it to the snapshot inode. Otherwise we
877 		 * will proceed to copy it. See explanation for this
878 		 * routine as to why only a single snapshot needs to
879 		 * claim this block.
880 		 */
881 		if (size == fs->fs_bsize) {
882 			if (debug)
883 				printf("Grabonremove snapshot %ju lbn %jd "
884 				    "from inum %ju\n", (intmax_t)ip.i_number,
885 				    (intmax_t)lbn, (uintmax_t)inum);
886 			IBLK_SET(snapbp, snapbp->b_index, relblkno);
887 			dirty(snapbp);
888 			brelse(snapbp);
889 			DIP_SET(dp, di_blocks,
890 			    DIP(dp, di_blocks) + btodb(size));
891 			inodirty(&ip);
892 			return (1);
893 		}
894 
895 		/* First time through, read the contents of the old block. */
896 		if (copydone == 0) {
897 			copydone = 1;
898 			if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
899 			    fs->fs_bsize) != 0) {
900 				pfatal("Could not read snapshot %ju block "
901 				    "%jd\n", (intmax_t)ip.i_number,
902 				    (intmax_t)relblkno);
903 				continue;
904 			}
905 		}
906 		/*
907 		 * This allocation will never require any additional
908 		 * allocations for the snapshot inode.
909 		 */
910 		blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
911 		    checkblkavail);
912 		if (blkno == 0) {
913 			pfatal("Could not allocate block for snapshot %ju\n",
914 			    (intmax_t)ip.i_number);
915 			continue;
916 		}
917 		if (debug)
918 			printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
919 			    "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
920 			    (intmax_t)lbn, (uintmax_t)inum, size,
921 			    (intmax_t)blkno);
922 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
923 		IBLK_SET(snapbp, snapbp->b_index, blkno);
924 		dirty(snapbp);
925 		brelse(snapbp);
926 		DIP_SET(dp, di_blocks,
927 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
928 		inodirty(&ip);
929 	}
930 	return (0);
931 }
932 
933 /*
934  * Notification that a block is being written. Return if the block
935  * is part of a snapshot as snapshots never track other snapshots.
936  * The block will be copied in all of the snapshots that are tracking
937  * it and have not yet copied it. Some buffers may hold more than one
938  * block. Here we need to check each block in the buffer.
939  */
940 void
941 copyonwrite(struct fs *fs, struct bufarea *bp,
942 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
943 {
944 	ufs2_daddr_t copyblkno;
945 	long i, numblks;
946 
947 	/* If no snapshots, nothing to do. */
948 	if (snapcnt == 0)
949 		return;
950 	numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
951 	if (debug)
952 		prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
953 		    (intmax_t)numblks, numblks > 1 ? "s" : "");
954 	copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
955 	for (i = 0; i < numblks; i++) {
956 		chkcopyonwrite(fs, copyblkno, checkblkavail);
957 		copyblkno += fs->fs_frag;
958 	}
959 }
960 
961 static void
962 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
963 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
964 {
965 	struct inode ip;
966 	union dinode *dp;
967 	struct bufarea *snapbp;
968 	ufs2_daddr_t blkno;
969 	int i, frags, copydone;
970 	ufs_lbn_t lbn;
971 
972 	lbn = fragstoblks(fs, copyblkno);
973 	/* Direct blocks are always pre-copied */
974 	if (lbn < UFS_NDADDR)
975 		return;
976 	copydone = 0;
977 	for (i = 0; i < snapcnt; i++) {
978 		/*
979 		 * Lookup block being freed.
980 		 */
981 		ip = snaplist[i];
982 		dp = ip.i_dp;
983 		blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
984 		/*
985 		 * Check to see if block needs to be copied.
986 		 */
987 		if (blkno != 0) {
988 			/*
989 			 * A block that we have already copied or don't track.
990 			 */
991 			brelse(snapbp);
992 			continue;
993 		}
994 		/* First time through, read the contents of the old block. */
995 		if (copydone == 0) {
996 			copydone = 1;
997 			if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
998 			    fs->fs_bsize) != 0) {
999 				pfatal("Could not read snapshot %ju block "
1000 				    "%jd\n", (intmax_t)ip.i_number,
1001 				    (intmax_t)copyblkno);
1002 				continue;
1003 			}
1004 		}
1005 		/*
1006 		 * This allocation will never require any additional
1007 		 * allocations for the snapshot inode.
1008 		 */
1009 		if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1010 		    checkblkavail)) == 0) {
1011 			pfatal("Could not allocate block for snapshot %ju\n",
1012 			    (intmax_t)ip.i_number);
1013 			continue;
1014 		}
1015 		if (debug)
1016 			prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1017 			    "blkno %ju setting in buffer",
1018 			    (intmax_t)ip.i_number, (intmax_t)lbn,
1019 			    (intmax_t)blkno);
1020 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1021 		IBLK_SET(snapbp, snapbp->b_index, blkno);
1022 		dirty(snapbp);
1023 		brelse(snapbp);
1024 		DIP_SET(dp, di_blocks,
1025 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1026 		inodirty(&ip);
1027 	}
1028 	return;
1029 }
1030 
1031 /*
1032  * Traverse an inode and check that its block count is correct
1033  * fixing it if necessary.
1034  */
1035 void
1036 check_blkcnt(struct inode *ip)
1037 {
1038 	struct inodesc idesc;
1039 	union dinode *dp;
1040 	ufs2_daddr_t ndb;
1041 	int j, ret, offset;
1042 
1043 	dp = ip->i_dp;
1044 	memset(&idesc, 0, sizeof(struct inodesc));
1045 	idesc.id_func = pass1check;
1046 	idesc.id_number = ip->i_number;
1047 	idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1048 	(void)ckinode(dp, &idesc);
1049 	if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1050 		ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1051 		for (j = 0; j < UFS_NXADDR; j++) {
1052 			if (--ndb == 0 &&
1053 			    (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1054 				idesc.id_numfrags = numfrags(&sblock,
1055 				    fragroundup(&sblock, offset));
1056 			else
1057 				idesc.id_numfrags = sblock.fs_frag;
1058 			if (dp->dp2.di_extb[j] == 0)
1059 				continue;
1060 			idesc.id_blkno = dp->dp2.di_extb[j];
1061 			ret = (*idesc.id_func)(&idesc);
1062 			if (ret & STOP)
1063 				break;
1064 		}
1065 	}
1066 	idesc.id_entryno *= btodb(sblock.fs_fsize);
1067 	if (DIP(dp, di_blocks) != idesc.id_entryno) {
1068 		if (!(sujrecovery && preen)) {
1069 			pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1070 			    (u_long)idesc.id_number,
1071 			    (uintmax_t)DIP(dp, di_blocks),
1072 			    (uintmax_t)idesc.id_entryno);
1073 			if (preen)
1074 				printf(" (CORRECTED)\n");
1075 			else if (reply("CORRECT") == 0)
1076 				return;
1077 		}
1078 		if (bkgrdflag == 0) {
1079 			DIP_SET(dp, di_blocks, idesc.id_entryno);
1080 			inodirty(ip);
1081 		} else {
1082 			cmd.value = idesc.id_number;
1083 			cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1084 			if (debug)
1085 				printf("adjblkcnt ino %ju amount %lld\n",
1086 				    (uintmax_t)cmd.value, (long long)cmd.size);
1087 			if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1088 			    &cmd, sizeof cmd) == -1)
1089 				rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1090 		}
1091 	}
1092 }
1093 
1094 void
1095 freeinodebuf(void)
1096 {
1097 	struct bufarea *bp;
1098 	int i;
1099 
1100 	/*
1101 	 * Flush old contents in case they have been updated.
1102 	 */
1103 	flush(fswritefd, &inobuf);
1104 	if (inobuf.b_un.b_buf != NULL)
1105 		free((char *)inobuf.b_un.b_buf);
1106 	inobuf.b_un.b_buf = NULL;
1107 	firstinum = lastinum = 0;
1108 	/*
1109 	 * Reload the snapshot inodes in case any of them changed.
1110 	 */
1111 	for (i = 0; i < snapcnt; i++) {
1112 		bp = snaplist[i].i_bp;
1113 		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1114 		    bp->b_size);
1115 	}
1116 }
1117 
1118 /*
1119  * Routines to maintain information about directory inodes.
1120  * This is built during the first pass and used during the
1121  * second and third passes.
1122  *
1123  * Enter inodes into the cache.
1124  */
1125 struct inoinfo *
1126 cacheino(union dinode *dp, ino_t inumber)
1127 {
1128 	struct inoinfo *inp;
1129 	int i, blks;
1130 
1131 	if (getinoinfo(inumber) != NULL)
1132 		pfatal("cacheino: duplicate entry for ino %jd\n",
1133 		    (intmax_t)inumber);
1134 	if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1135 		blks = UFS_NDADDR + UFS_NIADDR;
1136 	else if (DIP(dp, di_size) > 0)
1137 		blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1138 	else
1139 		blks = 1;
1140 	inp = (struct inoinfo *)
1141 		Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1142 	if (inp == NULL)
1143 		errx(EEXIT, "cannot increase directory list");
1144 	SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1145 	inp->i_flags = 0;
1146 	inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1147 	inp->i_dotdot = (ino_t)0;
1148 	inp->i_number = inumber;
1149 	inp->i_isize = DIP(dp, di_size);
1150 	inp->i_depth = DIP(dp, di_dirdepth);
1151 	inp->i_numblks = blks;
1152 	for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1153 		inp->i_blks[i] = DIP(dp, di_db[i]);
1154 	if (blks > UFS_NDADDR)
1155 		for (i = 0; i < UFS_NIADDR; i++)
1156 			inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1157 	if (inplast == listmax) {
1158 		listmax += 100;
1159 		inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1160 		    listmax, sizeof(struct inoinfo *));
1161 		if (inpsort == NULL)
1162 			errx(EEXIT, "cannot increase directory list");
1163 	}
1164 	inpsort[inplast++] = inp;
1165 	return (inp);
1166 }
1167 
1168 /*
1169  * Look up an inode cache structure.
1170  */
1171 struct inoinfo *
1172 getinoinfo(ino_t inumber)
1173 {
1174 	struct inoinfo *inp;
1175 
1176 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1177 		if (inp->i_number != inumber)
1178 			continue;
1179 		return (inp);
1180 	}
1181 	return (NULL);
1182 }
1183 
1184 /*
1185  * Remove an entry from the inode cache and disk-order sorted list.
1186  * Return 0 on success and 1 on failure.
1187  */
1188 int
1189 removecachedino(ino_t inumber)
1190 {
1191 	struct inoinfo *inp, **inpp;
1192 	char *listtype;
1193 
1194 	listtype = "hash";
1195 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1196 		if (inp->i_number != inumber)
1197 			continue;
1198 		SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1199 		for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1200 			if (*inpp != inp)
1201 				continue;
1202 			*inpp = inpsort[inplast - 1];
1203 			inplast--;
1204 			free(inp);
1205 			return (0);
1206 		}
1207 		listtype = "sort";
1208 		break;
1209 	}
1210 	pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1211 	    (intmax_t)inumber, listtype);
1212 	return (1);
1213 }
1214 
1215 /*
1216  * Clean up all the inode cache structure.
1217  */
1218 void
1219 inocleanup(void)
1220 {
1221 	struct inoinfo **inpp;
1222 
1223 	if (inphash == NULL)
1224 		return;
1225 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1226 		free((char *)(*inpp));
1227 	free((char *)inphash);
1228 	inphash = NULL;
1229 	free((char *)inpsort);
1230 	inpsort = NULL;
1231 }
1232 
1233 void
1234 inodirty(struct inode *ip)
1235 {
1236 
1237 	if (sblock.fs_magic == FS_UFS2_MAGIC)
1238 		ffs_update_dinode_ckhash(&sblock,
1239 		    (struct ufs2_dinode *)ip->i_dp);
1240 	dirty(ip->i_bp);
1241 }
1242 
1243 void
1244 clri(struct inodesc *idesc, const char *type, int flag)
1245 {
1246 	union dinode *dp;
1247 	struct inode ip;
1248 
1249 	ginode(idesc->id_number, &ip);
1250 	dp = ip.i_dp;
1251 	if (flag == 1) {
1252 		pwarn("%s %s", type,
1253 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1254 		prtinode(&ip);
1255 		printf("\n");
1256 	}
1257 	if (preen || reply("CLEAR") == 1) {
1258 		if (preen)
1259 			printf(" (CLEARED)\n");
1260 		n_files--;
1261 		if (bkgrdflag == 0) {
1262 			if (idesc->id_type == SNAP) {
1263 				snapremove(idesc->id_number);
1264 				idesc->id_type = ADDR;
1265 			}
1266 			(void)ckinode(dp, idesc);
1267 			inoinfo(idesc->id_number)->ino_state = USTATE;
1268 			clearinode(dp);
1269 			inodirty(&ip);
1270 		} else {
1271 			cmd.value = idesc->id_number;
1272 			cmd.size = -DIP(dp, di_nlink);
1273 			if (debug)
1274 				printf("adjrefcnt ino %ld amt %lld\n",
1275 				    (long)cmd.value, (long long)cmd.size);
1276 			if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1277 			    &cmd, sizeof cmd) == -1)
1278 				rwerror("ADJUST INODE", cmd.value);
1279 		}
1280 	}
1281 	irelse(&ip);
1282 }
1283 
1284 int
1285 findname(struct inodesc *idesc)
1286 {
1287 	struct direct *dirp = idesc->id_dirp;
1288 
1289 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1290 		idesc->id_entryno++;
1291 		return (KEEPON);
1292 	}
1293 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1294 	return (STOP|FOUND);
1295 }
1296 
1297 int
1298 findino(struct inodesc *idesc)
1299 {
1300 	struct direct *dirp = idesc->id_dirp;
1301 
1302 	if (dirp->d_ino == 0)
1303 		return (KEEPON);
1304 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1305 	    dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1306 		idesc->id_parent = dirp->d_ino;
1307 		return (STOP|FOUND);
1308 	}
1309 	return (KEEPON);
1310 }
1311 
1312 int
1313 clearentry(struct inodesc *idesc)
1314 {
1315 	struct direct *dirp = idesc->id_dirp;
1316 
1317 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1318 		idesc->id_entryno++;
1319 		return (KEEPON);
1320 	}
1321 	dirp->d_ino = 0;
1322 	return (STOP|FOUND|ALTERED);
1323 }
1324 
1325 void
1326 prtinode(struct inode *ip)
1327 {
1328 	char *p;
1329 	union dinode *dp;
1330 	struct passwd *pw;
1331 	time_t t;
1332 
1333 	dp = ip->i_dp;
1334 	printf(" I=%lu ", (u_long)ip->i_number);
1335 	if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1336 		return;
1337 	printf(" OWNER=");
1338 	if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1339 		printf("%s ", pw->pw_name);
1340 	else
1341 		printf("%u ", (unsigned)DIP(dp, di_uid));
1342 	printf("MODE=%o\n", DIP(dp, di_mode));
1343 	if (preen)
1344 		printf("%s: ", cdevname);
1345 	printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1346 	t = DIP(dp, di_mtime);
1347 	if ((p = ctime(&t)) != NULL)
1348 		printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1349 }
1350 
1351 void
1352 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1353 {
1354 
1355 	pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1356 	printf("\n");
1357 	switch (inoinfo(ino)->ino_state) {
1358 
1359 	case FSTATE:
1360 	case FZLINK:
1361 		inoinfo(ino)->ino_state = FCLEAR;
1362 		return;
1363 
1364 	case DSTATE:
1365 	case DZLINK:
1366 		inoinfo(ino)->ino_state = DCLEAR;
1367 		return;
1368 
1369 	case FCLEAR:
1370 	case DCLEAR:
1371 		return;
1372 
1373 	default:
1374 		errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1375 		/* NOTREACHED */
1376 	}
1377 }
1378 
1379 /*
1380  * allocate an unused inode
1381  */
1382 ino_t
1383 allocino(ino_t request, int type)
1384 {
1385 	ino_t ino;
1386 	struct inode ip;
1387 	union dinode *dp;
1388 	struct bufarea *cgbp;
1389 	struct cg *cgp;
1390 	int cg, anyino;
1391 
1392 	anyino = 0;
1393 	if (request == 0) {
1394 		request = UFS_ROOTINO;
1395 		anyino = 1;
1396 	} else if (inoinfo(request)->ino_state != USTATE)
1397 		return (0);
1398 retry:
1399 	for (ino = request; ino < maxino; ino++)
1400 		if (inoinfo(ino)->ino_state == USTATE)
1401 			break;
1402 	if (ino >= maxino)
1403 		return (0);
1404 	cg = ino_to_cg(&sblock, ino);
1405 	cgbp = cglookup(cg);
1406 	cgp = cgbp->b_un.b_cg;
1407 	if (!check_cgmagic(cg, cgbp)) {
1408 		if (anyino == 0)
1409 			return (0);
1410 		request = (cg + 1) * sblock.fs_ipg;
1411 		goto retry;
1412 	}
1413 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1414 	cgp->cg_cs.cs_nifree--;
1415 	switch (type & IFMT) {
1416 	case IFDIR:
1417 		inoinfo(ino)->ino_state = DSTATE;
1418 		cgp->cg_cs.cs_ndir++;
1419 		break;
1420 	case IFREG:
1421 	case IFLNK:
1422 		inoinfo(ino)->ino_state = FSTATE;
1423 		break;
1424 	default:
1425 		return (0);
1426 	}
1427 	cgdirty(cgbp);
1428 	ginode(ino, &ip);
1429 	dp = ip.i_dp;
1430 	memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1431 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1432 	DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1433 	    std_checkblkavail));
1434 	if (DIP(dp, di_db[0]) == 0) {
1435 		inoinfo(ino)->ino_state = USTATE;
1436 		inodirty(&ip);
1437 		irelse(&ip);
1438 		return (0);
1439 	}
1440 	DIP_SET(dp, di_mode, type);
1441 	DIP_SET(dp, di_atime, time(NULL));
1442 	DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1443 	DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1444 	DIP_SET(dp, di_size, sblock.fs_fsize);
1445 	DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1446 	n_files++;
1447 	inodirty(&ip);
1448 	irelse(&ip);
1449 	inoinfo(ino)->ino_type = IFTODT(type);
1450 	return (ino);
1451 }
1452 
1453 /*
1454  * deallocate an inode
1455  */
1456 void
1457 freeino(ino_t ino)
1458 {
1459 	struct inodesc idesc;
1460 	union dinode *dp;
1461 	struct inode ip;
1462 
1463 	memset(&idesc, 0, sizeof(struct inodesc));
1464 	idesc.id_type = ADDR;
1465 	idesc.id_func = freeblock;
1466 	idesc.id_number = ino;
1467 	ginode(ino, &ip);
1468 	dp = ip.i_dp;
1469 	(void)ckinode(dp, &idesc);
1470 	clearinode(dp);
1471 	inodirty(&ip);
1472 	irelse(&ip);
1473 	inoinfo(ino)->ino_state = USTATE;
1474 	n_files--;
1475 }
1476