xref: /freebsd/sbin/fsck_ffs/inode.c (revision 2e3f49888ec8851bafb22011533217487764fdb0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <sys/stdint.h>
35 #include <sys/sysctl.h>
36 
37 #include <ufs/ufs/dinode.h>
38 #include <ufs/ufs/dir.h>
39 #include <ufs/ffs/fs.h>
40 
41 #include <err.h>
42 #include <pwd.h>
43 #include <string.h>
44 #include <time.h>
45 
46 #include "fsck.h"
47 
48 struct bufarea *icachebp;	/* inode cache buffer */
49 
50 static int iblock(struct inodesc *, off_t isize, int type);
51 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
52     struct bufarea **);
53 static int snapclean(struct inodesc *idesc);
54 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
55     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
56 
57 int
58 ckinode(union dinode *dp, struct inodesc *idesc)
59 {
60 	off_t remsize, sizepb;
61 	int i, offset, ret;
62 	struct inode ip;
63 	union dinode dino;
64 	ufs2_daddr_t ndb;
65 	mode_t mode;
66 	char pathbuf[MAXPATHLEN + 1];
67 
68 	if (idesc->id_fix != IGNORE)
69 		idesc->id_fix = DONTKNOW;
70 	idesc->id_dp = dp;
71 	idesc->id_lbn = -1;
72 	idesc->id_lballoc = -1;
73 	idesc->id_level = 0;
74 	idesc->id_entryno = 0;
75 	idesc->id_filesize = DIP(dp, di_size);
76 	mode = DIP(dp, di_mode) & IFMT;
77 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
78 	    DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
79 		return (KEEPON);
80 	if (sblock.fs_magic == FS_UFS1_MAGIC)
81 		dino.dp1 = dp->dp1;
82 	else
83 		dino.dp2 = dp->dp2;
84 	if (DIP(&dino, di_size) < 0) {
85 		pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
86 		return (STOP);
87 	}
88 	ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
89 	for (i = 0; i < UFS_NDADDR; i++) {
90 		idesc->id_lbn++;
91 		if (--ndb == 0 &&
92 		    (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
93 			idesc->id_numfrags =
94 				numfrags(&sblock, fragroundup(&sblock, offset));
95 		else
96 			idesc->id_numfrags = sblock.fs_frag;
97 		if (DIP(&dino, di_db[i]) == 0) {
98 			if (idesc->id_type == DATA && ndb >= 0) {
99 				/* An empty block in a directory XXX */
100 				getpathname(pathbuf, idesc->id_number,
101 						idesc->id_number);
102 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
103 					pathbuf);
104 				if (reply("ADJUST LENGTH") == 1) {
105 					ginode(idesc->id_number, &ip);
106 					DIP_SET(ip.i_dp, di_size,
107 					    i * sblock.fs_bsize);
108 					printf(
109 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
110 					rerun = 1;
111 					inodirty(&ip);
112 					irelse(&ip);
113 				}
114 				return (STOP);
115 			}
116 			continue;
117 		}
118 		idesc->id_blkno = DIP(&dino, di_db[i]);
119 		if (idesc->id_type != DATA)
120 			ret = (*idesc->id_func)(idesc);
121 		else
122 			ret = dirscan(idesc);
123 		if (ret & STOP)
124 			return (ret);
125 	}
126 	idesc->id_numfrags = sblock.fs_frag;
127 	remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
128 	sizepb = sblock.fs_bsize;
129 	for (i = 0; i < UFS_NIADDR; i++) {
130 		sizepb *= NINDIR(&sblock);
131 		idesc->id_level = i + 1;
132 		if (DIP(&dino, di_ib[i])) {
133 			idesc->id_blkno = DIP(&dino, di_ib[i]);
134 			ret = iblock(idesc, remsize, BT_LEVEL1 + i);
135 			if (ret & STOP)
136 				return (ret);
137 		} else if (remsize > 0) {
138 			idesc->id_lbn += sizepb / sblock.fs_bsize;
139 			if (idesc->id_type == DATA) {
140 				/* An empty block in a directory XXX */
141 				getpathname(pathbuf, idesc->id_number,
142 						idesc->id_number);
143 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
144 					pathbuf);
145 				if (reply("ADJUST LENGTH") == 1) {
146 					ginode(idesc->id_number, &ip);
147 					DIP_SET(ip.i_dp, di_size,
148 					    DIP(ip.i_dp, di_size) - remsize);
149 					remsize = 0;
150 					printf(
151 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
152 					rerun = 1;
153 					inodirty(&ip);
154 					irelse(&ip);
155 					break;
156 				}
157 			}
158 		}
159 		remsize -= sizepb;
160 	}
161 	return (KEEPON);
162 }
163 
164 static int
165 iblock(struct inodesc *idesc, off_t isize, int type)
166 {
167 	struct inode ip;
168 	struct bufarea *bp;
169 	int i, n, (*func)(struct inodesc *), nif;
170 	off_t sizepb;
171 	char buf[BUFSIZ];
172 	char pathbuf[MAXPATHLEN + 1];
173 
174 	if (idesc->id_type != DATA) {
175 		func = idesc->id_func;
176 		if (((n = (*func)(idesc)) & KEEPON) == 0)
177 			return (n);
178 	} else
179 		func = dirscan;
180 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
181 	if (bp->b_errs != 0) {
182 		brelse(bp);
183 		return (SKIP);
184 	}
185 	idesc->id_bp = bp;
186 	idesc->id_level--;
187 	for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
188 		sizepb *= NINDIR(&sblock);
189 	if (howmany(isize, sizepb) > NINDIR(&sblock))
190 		nif = NINDIR(&sblock);
191 	else
192 		nif = howmany(isize, sizepb);
193 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
194 		for (i = nif; i < NINDIR(&sblock); i++) {
195 			if (IBLK(bp, i) == 0)
196 				continue;
197 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
198 			    (u_long)idesc->id_number);
199 			if (preen) {
200 				pfatal("%s", buf);
201 			} else if (dofix(idesc, buf)) {
202 				IBLK_SET(bp, i, 0);
203 				dirty(bp);
204 			}
205 		}
206 		flush(fswritefd, bp);
207 	}
208 	for (i = 0; i < nif; i++) {
209 		if (IBLK(bp, i)) {
210 			idesc->id_blkno = IBLK(bp, i);
211 			bp->b_index = i;
212 			if (idesc->id_level == 0) {
213 				idesc->id_lbn++;
214 				n = (*func)(idesc);
215 			} else {
216 				n = iblock(idesc, isize, type - 1);
217 				idesc->id_level++;
218 			}
219 			if (n & STOP) {
220 				brelse(bp);
221 				return (n);
222 			}
223 		} else {
224 			idesc->id_lbn += sizepb / sblock.fs_bsize;
225 			if (idesc->id_type == DATA && isize > 0) {
226 				/* An empty block in a directory XXX */
227 				getpathname(pathbuf, idesc->id_number,
228 						idesc->id_number);
229 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
230 					pathbuf);
231 				if (reply("ADJUST LENGTH") == 1) {
232 					ginode(idesc->id_number, &ip);
233 					DIP_SET(ip.i_dp, di_size,
234 					    DIP(ip.i_dp, di_size) - isize);
235 					isize = 0;
236 					printf(
237 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
238 					rerun = 1;
239 					inodirty(&ip);
240 					brelse(bp);
241 					return(STOP);
242 				}
243 			}
244 		}
245 		isize -= sizepb;
246 	}
247 	brelse(bp);
248 	return (KEEPON);
249 }
250 
251 /*
252  * Finds the disk block address at the specified lbn within the inode
253  * specified by dp.  This follows the whole tree and honors di_size and
254  * di_extsize so it is a true test of reachability.  The lbn may be
255  * negative if an extattr or indirect block is requested.
256  */
257 ufs2_daddr_t
258 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
259     struct bufarea **bpp)
260 {
261 	ufs_lbn_t tmpval;
262 	ufs_lbn_t cur;
263 	ufs_lbn_t next;
264 	int i;
265 
266 	*frags = 0;
267 	if (bpp != NULL)
268 		*bpp = NULL;
269 	/*
270 	 * Handle extattr blocks first.
271 	 */
272 	if (lbn < 0 && lbn >= -UFS_NXADDR) {
273 		lbn = -1 - lbn;
274 		if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
275 			return (0);
276 		*frags = numfrags(&sblock,
277 		    sblksize(&sblock, dp->dp2.di_extsize, lbn));
278 		return (dp->dp2.di_extb[lbn]);
279 	}
280 	/*
281 	 * Now direct and indirect.
282 	 */
283 	if (DIP(dp, di_mode) == IFLNK &&
284 	    DIP(dp, di_size) < sblock.fs_maxsymlinklen)
285 		return (0);
286 	if (lbn >= 0 && lbn < UFS_NDADDR) {
287 		*frags = numfrags(&sblock,
288 		    sblksize(&sblock, DIP(dp, di_size), lbn));
289 		return (DIP(dp, di_db[lbn]));
290 	}
291 	*frags = sblock.fs_frag;
292 
293 	for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
294 	    i++, tmpval *= NINDIR(&sblock), cur = next) {
295 		next = cur + tmpval;
296 		if (lbn == -cur - i)
297 			return (DIP(dp, di_ib[i]));
298 		/*
299 		 * Determine whether the lbn in question is within this tree.
300 		 */
301 		if (lbn < 0 && -lbn >= next)
302 			continue;
303 		if (lbn > 0 && lbn >= next)
304 			continue;
305 		if (DIP(dp, di_ib[i]) == 0)
306 			return (0);
307 		return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
308 		    bpp));
309 	}
310 	pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
311 	return (0);
312 }
313 
314 /*
315  * Fetch an indirect block to find the block at a given lbn.  The lbn
316  * may be negative to fetch a specific indirect block pointer or positive
317  * to fetch a specific block.
318  */
319 static ufs2_daddr_t
320 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
321     struct bufarea **bpp)
322 {
323 	struct bufarea *bp;
324 	ufs_lbn_t lbnadd;
325 	ufs_lbn_t base;
326 	int i, level;
327 
328 	level = lbn_level(cur);
329 	if (level == -1)
330 		pfatal("Invalid indir lbn %jd in ino %ju\n",
331 		    lbn, (uintmax_t)ino);
332 	if (level == 0 && lbn < 0)
333 		pfatal("Invalid lbn %jd in ino %ju\n",
334 		    lbn, (uintmax_t)ino);
335 	lbnadd = 1;
336 	base = -(cur + level);
337 	for (i = level; i > 0; i--)
338 		lbnadd *= NINDIR(&sblock);
339 	if (lbn > 0)
340 		i = (lbn - base) / lbnadd;
341 	else
342 		i = (-lbn - base) / lbnadd;
343 	if (i < 0 || i >= NINDIR(&sblock)) {
344 		pfatal("Invalid indirect index %d produced by lbn %jd "
345 		    "in ino %ju\n", i, lbn, (uintmax_t)ino);
346 		return (0);
347 	}
348 	if (level == 0)
349 		cur = base + (i * lbnadd);
350 	else
351 		cur = -(base + (i * lbnadd)) - (level - 1);
352 	bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
353 	if (bp->b_errs != 0)
354 		return (0);
355 	blk = IBLK(bp, i);
356 	bp->b_index = i;
357 	if (cur == lbn || blk == 0) {
358 		if (bpp != NULL)
359 			*bpp = bp;
360 		else
361 			brelse(bp);
362 		return (blk);
363 	}
364 	brelse(bp);
365 	if (level == 0)
366 		pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
367 		    (uintmax_t)ino);
368 	return (indir_blkatoff(blk, ino, cur, lbn, bpp));
369 }
370 
371 /*
372  * Check that a block in a legal block number.
373  * Return 0 if in range, 1 if out of range.
374  */
375 int
376 chkrange(ufs2_daddr_t blk, int cnt)
377 {
378 	int c;
379 
380 	if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
381 	    cnt > maxfsblock - blk) {
382 		if (debug)
383 			printf("out of range: blk %ld, offset %i, size %d\n",
384 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
385 		return (1);
386 	}
387 	if (cnt > sblock.fs_frag ||
388 	    fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
389 		if (debug)
390 			printf("bad size: blk %ld, offset %i, size %d\n",
391 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
392 		return (1);
393 	}
394 	c = dtog(&sblock, blk);
395 	if (blk < cgdmin(&sblock, c)) {
396 		if ((blk + cnt) > cgsblock(&sblock, c)) {
397 			if (debug) {
398 				printf("blk %ld < cgdmin %ld;",
399 				    (long)blk, (long)cgdmin(&sblock, c));
400 				printf(" blk + cnt %ld > cgsbase %ld\n",
401 				    (long)(blk + cnt),
402 				    (long)cgsblock(&sblock, c));
403 			}
404 			return (1);
405 		}
406 	} else {
407 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
408 			if (debug)  {
409 				printf("blk %ld >= cgdmin %ld;",
410 				    (long)blk, (long)cgdmin(&sblock, c));
411 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
412 				    (long)(blk + cnt), (long)sblock.fs_fpg);
413 			}
414 			return (1);
415 		}
416 	}
417 	return (0);
418 }
419 
420 /*
421  * General purpose interface for reading inodes.
422  *
423  * firstinum and lastinum track contents of getnextino() cache (below).
424  */
425 static ino_t firstinum, lastinum;
426 static struct bufarea inobuf;
427 
428 void
429 ginode(ino_t inumber, struct inode *ip)
430 {
431 	ufs2_daddr_t iblk;
432 	struct ufs2_dinode *dp;
433 
434 	if (inumber < UFS_ROOTINO || inumber >= maxino)
435 		errx(EEXIT, "bad inode number %ju to ginode",
436 		    (uintmax_t)inumber);
437 	ip->i_number = inumber;
438 	if (inumber >= firstinum && inumber < lastinum) {
439 		/* contents in getnextino() cache */
440 		ip->i_bp = &inobuf;
441 		inobuf.b_refcnt++;
442 		inobuf.b_index = firstinum;
443 	} else if (icachebp != NULL &&
444 	    inumber >= icachebp->b_index &&
445 	    inumber < icachebp->b_index + INOPB(&sblock)) {
446 		/* take an additional reference for the returned inode */
447 		icachebp->b_refcnt++;
448 		ip->i_bp = icachebp;
449 	} else {
450 		iblk = ino_to_fsba(&sblock, inumber);
451 		/* release our cache-hold reference on old icachebp */
452 		if (icachebp != NULL)
453 			brelse(icachebp);
454 		icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
455 		if (icachebp->b_errs != 0) {
456 			icachebp = NULL;
457 			ip->i_bp = NULL;
458 			ip->i_dp = &zino;
459 			return;
460 		}
461 		/* take a cache-hold reference on new icachebp */
462 		icachebp->b_refcnt++;
463 		icachebp->b_index = rounddown(inumber, INOPB(&sblock));
464 		ip->i_bp = icachebp;
465 	}
466 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
467 		ip->i_dp = (union dinode *)
468 		    &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
469 		return;
470 	}
471 	ip->i_dp = (union dinode *)
472 	    &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
473 	dp = (struct ufs2_dinode *)ip->i_dp;
474 	/* Do not check hash of inodes being created */
475 	if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
476 		pwarn("INODE CHECK-HASH FAILED");
477 		prtinode(ip);
478 		if (preen || reply("FIX") != 0) {
479 			if (preen)
480 				printf(" (FIXED)\n");
481 			ffs_update_dinode_ckhash(&sblock, dp);
482 			inodirty(ip);
483 		}
484 	}
485 }
486 
487 /*
488  * Release a held inode.
489  */
490 void
491 irelse(struct inode *ip)
492 {
493 
494 	/* Check for failed inode read */
495 	if (ip->i_bp == NULL)
496 		return;
497 	if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
498 	    ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
499 		pwarn("irelse: releasing inode with bad check-hash");
500 		prtinode(ip);
501 	}
502 	if (ip->i_bp->b_refcnt <= 0)
503 		pfatal("irelse: releasing unreferenced ino %ju\n",
504 		    (uintmax_t) ip->i_number);
505 	brelse(ip->i_bp);
506 }
507 
508 /*
509  * Special purpose version of ginode used to optimize first pass
510  * over all the inodes in numerical order.
511  */
512 static ino_t nextinum, lastvalidinum;
513 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
514 
515 union dinode *
516 getnextinode(ino_t inumber, int rebuiltcg)
517 {
518 	int j;
519 	long size;
520 	mode_t mode;
521 	ufs2_daddr_t ndb, blk;
522 	union dinode *dp;
523 	struct inode ip;
524 	static caddr_t nextinop;
525 
526 	if (inumber != nextinum++ || inumber > lastvalidinum)
527 		errx(EEXIT, "bad inode number %ju to nextinode",
528 		    (uintmax_t)inumber);
529 	if (inumber >= lastinum) {
530 		readcount++;
531 		firstinum = lastinum;
532 		blk = ino_to_fsba(&sblock, lastinum);
533 		if (readcount % readpercg == 0) {
534 			size = partialsize;
535 			lastinum += partialcnt;
536 		} else {
537 			size = inobufsize;
538 			lastinum += fullcnt;
539 		}
540 		/*
541 		 * Flush old contents in case they have been updated.
542 		 * If getblk encounters an error, it will already have zeroed
543 		 * out the buffer, so we do not need to do so here.
544 		 */
545 		if (inobuf.b_refcnt != 0)
546 			pfatal("Non-zero getnextinode() ref count %d\n",
547 			    inobuf.b_refcnt);
548 		flush(fswritefd, &inobuf);
549 		getblk(&inobuf, blk, size);
550 		nextinop = inobuf.b_un.b_buf;
551 	}
552 	dp = (union dinode *)nextinop;
553 	if (sblock.fs_magic == FS_UFS1_MAGIC)
554 		nextinop += sizeof(struct ufs1_dinode);
555 	else
556 		nextinop += sizeof(struct ufs2_dinode);
557 	if ((ckhashadd & CK_INODE) != 0) {
558 		ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
559 		dirty(&inobuf);
560 	}
561 	if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
562 		pwarn("INODE CHECK-HASH FAILED");
563 		ip.i_bp = NULL;
564 		ip.i_dp = dp;
565 		ip.i_number = inumber;
566 		prtinode(&ip);
567 		if (preen || reply("FIX") != 0) {
568 			if (preen)
569 				printf(" (FIXED)\n");
570 			ffs_update_dinode_ckhash(&sblock,
571 			    (struct ufs2_dinode *)dp);
572 			dirty(&inobuf);
573 		}
574 	}
575 	if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
576 		/*
577 		 * Try to determine if we have reached the end of the
578 		 * allocated inodes.
579 		 */
580 		mode = DIP(dp, di_mode) & IFMT;
581 		if (mode == 0) {
582 			if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
583 				UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
584 			      memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
585 				UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
586 			      dp->dp2.di_mode || dp->dp2.di_size)
587 				return (NULL);
588 			return (dp);
589 		}
590 		if (!ftypeok(dp))
591 			return (NULL);
592 		ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
593 		if (ndb < 0)
594 			return (NULL);
595 		if (mode == IFBLK || mode == IFCHR)
596 			ndb++;
597 		if (mode == IFLNK) {
598 			/*
599 			 * Fake ndb value so direct/indirect block checks below
600 			 * will detect any garbage after symlink string.
601 			 */
602 			if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
603 				ndb = howmany(DIP(dp, di_size),
604 				    sizeof(ufs2_daddr_t));
605 				if (ndb > UFS_NDADDR) {
606 					j = ndb - UFS_NDADDR;
607 					for (ndb = 1; j > 1; j--)
608 						ndb *= NINDIR(&sblock);
609 					ndb += UFS_NDADDR;
610 				}
611 			}
612 		}
613 		for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
614 			if (DIP(dp, di_db[j]) != 0)
615 				return (NULL);
616 		for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
617 			ndb /= NINDIR(&sblock);
618 		for (; j < UFS_NIADDR; j++)
619 			if (DIP(dp, di_ib[j]) != 0)
620 				return (NULL);
621 	}
622 	return (dp);
623 }
624 
625 void
626 setinodebuf(int cg, ino_t inosused)
627 {
628 	ino_t inum;
629 
630 	inum = cg * sblock.fs_ipg;
631 	lastvalidinum = inum + inosused - 1;
632 	nextinum = inum;
633 	lastinum = inum;
634 	readcount = 0;
635 	/* Flush old contents in case they have been updated */
636 	flush(fswritefd, &inobuf);
637 	inobuf.b_bno = 0;
638 	if (inobuf.b_un.b_buf == NULL) {
639 		inobufsize = blkroundup(&sblock,
640 		    MAX(INOBUFSIZE, sblock.fs_bsize));
641 		initbarea(&inobuf, BT_INODES);
642 		if ((inobuf.b_un.b_buf = Balloc((unsigned)inobufsize)) == NULL)
643 			errx(EEXIT, "cannot allocate space for inode buffer");
644 	}
645 	fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
646 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
647 	readpercg = inosused / fullcnt;
648 	partialcnt = inosused % fullcnt;
649 	partialsize = fragroundup(&sblock,
650 	    partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
651 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
652 	if (partialcnt != 0) {
653 		readpercg++;
654 	} else {
655 		partialcnt = fullcnt;
656 		partialsize = inobufsize;
657 	}
658 }
659 
660 int
661 freeblock(struct inodesc *idesc)
662 {
663 	struct dups *dlp;
664 	struct bufarea *cgbp;
665 	struct cg *cgp;
666 	ufs2_daddr_t blkno;
667 	long size, nfrags;
668 
669 	blkno = idesc->id_blkno;
670 	if (idesc->id_type == SNAP) {
671 		pfatal("clearing a snapshot dinode\n");
672 		return (STOP);
673 	}
674 	size = lfragtosize(&sblock, idesc->id_numfrags);
675 	if (snapblkfree(&sblock, blkno, size, idesc->id_number,
676 	    std_checkblkavail))
677 		return (KEEPON);
678 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
679 		if (chkrange(blkno, 1)) {
680 			return (SKIP);
681 		} else if (testbmap(blkno)) {
682 			for (dlp = duplist; dlp; dlp = dlp->next) {
683 				if (dlp->dup != blkno)
684 					continue;
685 				dlp->dup = duplist->dup;
686 				dlp = duplist;
687 				duplist = duplist->next;
688 				free((char *)dlp);
689 				break;
690 			}
691 			if (dlp == NULL) {
692 				clrbmap(blkno);
693 				n_blks--;
694 			}
695 		}
696 	}
697 	/*
698 	 * If all successfully returned, account for them.
699 	 */
700 	if (nfrags == 0) {
701 		cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
702 		cgp = cgbp->b_un.b_cg;
703 		if (idesc->id_numfrags == sblock.fs_frag)
704 			cgp->cg_cs.cs_nbfree++;
705 		else
706 			cgp->cg_cs.cs_nffree += idesc->id_numfrags;
707 		cgdirty(cgbp);
708 	}
709 	return (KEEPON);
710 }
711 
712 /*
713  * Prepare a snapshot file for being removed.
714  */
715 void
716 snapremove(ino_t inum)
717 {
718 	struct inodesc idesc;
719 	struct inode ip;
720 	int i;
721 
722 	for (i = 0; i < snapcnt; i++)
723 		if (snaplist[i].i_number == inum)
724 			break;
725 	if (i == snapcnt)
726 		ginode(inum, &ip);
727 	else
728 		ip = snaplist[i];
729 	if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
730 		printf("snapremove: inode %jd is not a snapshot\n",
731 		    (intmax_t)inum);
732 		if (i == snapcnt)
733 			irelse(&ip);
734 		return;
735 	}
736 	if (debug)
737 		printf("snapremove: remove %sactive snapshot %jd\n",
738 		    i == snapcnt ? "in" : "", (intmax_t)inum);
739 	/*
740 	 * If on active snapshot list, remove it.
741 	 */
742 	if (i < snapcnt) {
743 		for (i++; i < FSMAXSNAP; i++) {
744 			if (sblock.fs_snapinum[i] == 0)
745 				break;
746 			snaplist[i - 1] = snaplist[i];
747 			sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
748 		}
749 		sblock.fs_snapinum[i - 1] = 0;
750 		bzero(&snaplist[i - 1], sizeof(struct inode));
751 		snapcnt--;
752 	}
753 	memset(&idesc, 0, sizeof(struct inodesc));
754 	idesc.id_type = SNAP;
755 	idesc.id_func = snapclean;
756 	idesc.id_number = inum;
757 	(void)ckinode(ip.i_dp, &idesc);
758 	DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
759 	inodirty(&ip);
760 	irelse(&ip);
761 }
762 
763 static int
764 snapclean(struct inodesc *idesc)
765 {
766 	ufs2_daddr_t blkno;
767 	struct bufarea *bp;
768 	union dinode *dp;
769 
770 	blkno = idesc->id_blkno;
771 	if (blkno == 0)
772 		return (KEEPON);
773 
774 	dp = idesc->id_dp;
775 	if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
776 		if (idesc->id_lbn < UFS_NDADDR) {
777 			DIP_SET(dp, di_db[idesc->id_lbn], 0);
778 		} else {
779 			bp = idesc->id_bp;
780 			IBLK_SET(bp, bp->b_index, 0);
781 			dirty(bp);
782 		}
783 	}
784 	return (KEEPON);
785 }
786 
787 /*
788  * Notification that a block is being freed. Return zero if the free
789  * should be allowed to proceed. Return non-zero if the snapshot file
790  * wants to claim the block. The block will be claimed if it is an
791  * uncopied part of one of the snapshots. It will be freed if it is
792  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
793  * If a fragment is being freed, then all snapshots that care about
794  * it must make a copy since a snapshot file can only claim full sized
795  * blocks. Note that if more than one snapshot file maps the block,
796  * we can pick one at random to claim it. Since none of the snapshots
797  * can change, we are assurred that they will all see the same unmodified
798  * image. When deleting a snapshot file (see ino_trunc above), we
799  * must push any of these claimed blocks to one of the other snapshots
800  * that maps it. These claimed blocks are easily identified as they will
801  * have a block number equal to their logical block number within the
802  * snapshot. A copied block can never have this property because they
803  * must always have been allocated from a BLK_NOCOPY location.
804  */
805 int
806 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
807 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
808 {
809 	union dinode *dp;
810 	struct inode ip;
811 	struct bufarea *snapbp;
812 	ufs_lbn_t lbn;
813 	ufs2_daddr_t blkno, relblkno;
814 	int i, frags, claimedblk, copydone;
815 
816 	/* If no snapshots, nothing to do */
817 	if (snapcnt == 0)
818 		return (0);
819 	if (debug)
820 		printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
821 		    (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
822 	relblkno = blknum(fs, bno);
823 	lbn = fragstoblks(fs, relblkno);
824 	/* Direct blocks are always pre-copied */
825 	if (lbn < UFS_NDADDR)
826 		return (0);
827 	copydone = 0;
828 	claimedblk = 0;
829 	for (i = 0; i < snapcnt; i++) {
830 		/*
831 		 * Lookup block being freed.
832 		 */
833 		ip = snaplist[i];
834 		dp = ip.i_dp;
835 		blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
836 		    lbn, &frags, &snapbp);
837 		/*
838 		 * Check to see if block needs to be copied.
839 		 */
840 		if (blkno == 0) {
841 			/*
842 			 * A block that we map is being freed. If it has not
843 			 * been claimed yet, we will claim or copy it (below).
844 			 */
845 			claimedblk = 1;
846 		} else if (blkno == BLK_SNAP) {
847 			/*
848 			 * No previous snapshot claimed the block,
849 			 * so it will be freed and become a BLK_NOCOPY
850 			 * (don't care) for us.
851 			 */
852 			if (claimedblk)
853 				pfatal("snapblkfree: inconsistent block type");
854 			IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
855 			dirty(snapbp);
856 			brelse(snapbp);
857 			continue;
858 		} else /* BLK_NOCOPY or default */ {
859 			/*
860 			 * If the snapshot has already copied the block
861 			 * (default), or does not care about the block,
862 			 * it is not needed.
863 			 */
864 			brelse(snapbp);
865 			continue;
866 		}
867 		/*
868 		 * If this is a full size block, we will just grab it
869 		 * and assign it to the snapshot inode. Otherwise we
870 		 * will proceed to copy it. See explanation for this
871 		 * routine as to why only a single snapshot needs to
872 		 * claim this block.
873 		 */
874 		if (size == fs->fs_bsize) {
875 			if (debug)
876 				printf("Grabonremove snapshot %ju lbn %jd "
877 				    "from inum %ju\n", (intmax_t)ip.i_number,
878 				    (intmax_t)lbn, (uintmax_t)inum);
879 			IBLK_SET(snapbp, snapbp->b_index, relblkno);
880 			dirty(snapbp);
881 			brelse(snapbp);
882 			DIP_SET(dp, di_blocks,
883 			    DIP(dp, di_blocks) + btodb(size));
884 			inodirty(&ip);
885 			return (1);
886 		}
887 
888 		/* First time through, read the contents of the old block. */
889 		if (copydone == 0) {
890 			copydone = 1;
891 			if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
892 			    fs->fs_bsize) != 0) {
893 				pfatal("Could not read snapshot %ju block "
894 				    "%jd\n", (intmax_t)ip.i_number,
895 				    (intmax_t)relblkno);
896 				continue;
897 			}
898 		}
899 		/*
900 		 * This allocation will never require any additional
901 		 * allocations for the snapshot inode.
902 		 */
903 		blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
904 		    checkblkavail);
905 		if (blkno == 0) {
906 			pfatal("Could not allocate block for snapshot %ju\n",
907 			    (intmax_t)ip.i_number);
908 			continue;
909 		}
910 		if (debug)
911 			printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
912 			    "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
913 			    (intmax_t)lbn, (uintmax_t)inum, size,
914 			    (intmax_t)blkno);
915 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
916 		IBLK_SET(snapbp, snapbp->b_index, blkno);
917 		dirty(snapbp);
918 		brelse(snapbp);
919 		DIP_SET(dp, di_blocks,
920 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
921 		inodirty(&ip);
922 	}
923 	return (0);
924 }
925 
926 /*
927  * Notification that a block is being written. Return if the block
928  * is part of a snapshot as snapshots never track other snapshots.
929  * The block will be copied in all of the snapshots that are tracking
930  * it and have not yet copied it. Some buffers may hold more than one
931  * block. Here we need to check each block in the buffer.
932  */
933 void
934 copyonwrite(struct fs *fs, struct bufarea *bp,
935 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
936 {
937 	ufs2_daddr_t copyblkno;
938 	long i, numblks;
939 
940 	/* If no snapshots, nothing to do. */
941 	if (snapcnt == 0)
942 		return;
943 	numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
944 	if (debug)
945 		prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
946 		    (intmax_t)numblks, numblks > 1 ? "s" : "");
947 	copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
948 	for (i = 0; i < numblks; i++) {
949 		chkcopyonwrite(fs, copyblkno, checkblkavail);
950 		copyblkno += fs->fs_frag;
951 	}
952 }
953 
954 static void
955 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
956 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
957 {
958 	struct inode ip;
959 	union dinode *dp;
960 	struct bufarea *snapbp;
961 	ufs2_daddr_t blkno;
962 	int i, frags, copydone;
963 	ufs_lbn_t lbn;
964 
965 	lbn = fragstoblks(fs, copyblkno);
966 	/* Direct blocks are always pre-copied */
967 	if (lbn < UFS_NDADDR)
968 		return;
969 	copydone = 0;
970 	for (i = 0; i < snapcnt; i++) {
971 		/*
972 		 * Lookup block being freed.
973 		 */
974 		ip = snaplist[i];
975 		dp = ip.i_dp;
976 		blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
977 		/*
978 		 * Check to see if block needs to be copied.
979 		 */
980 		if (blkno != 0) {
981 			/*
982 			 * A block that we have already copied or don't track.
983 			 */
984 			brelse(snapbp);
985 			continue;
986 		}
987 		/* First time through, read the contents of the old block. */
988 		if (copydone == 0) {
989 			copydone = 1;
990 			if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
991 			    fs->fs_bsize) != 0) {
992 				pfatal("Could not read snapshot %ju block "
993 				    "%jd\n", (intmax_t)ip.i_number,
994 				    (intmax_t)copyblkno);
995 				continue;
996 			}
997 		}
998 		/*
999 		 * This allocation will never require any additional
1000 		 * allocations for the snapshot inode.
1001 		 */
1002 		if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1003 		    checkblkavail)) == 0) {
1004 			pfatal("Could not allocate block for snapshot %ju\n",
1005 			    (intmax_t)ip.i_number);
1006 			continue;
1007 		}
1008 		if (debug)
1009 			prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1010 			    "blkno %ju setting in buffer",
1011 			    (intmax_t)ip.i_number, (intmax_t)lbn,
1012 			    (intmax_t)blkno);
1013 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1014 		IBLK_SET(snapbp, snapbp->b_index, blkno);
1015 		dirty(snapbp);
1016 		brelse(snapbp);
1017 		DIP_SET(dp, di_blocks,
1018 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1019 		inodirty(&ip);
1020 	}
1021 	return;
1022 }
1023 
1024 /*
1025  * Traverse an inode and check that its block count is correct
1026  * fixing it if necessary.
1027  */
1028 void
1029 check_blkcnt(struct inode *ip)
1030 {
1031 	struct inodesc idesc;
1032 	union dinode *dp;
1033 	ufs2_daddr_t ndb;
1034 	int j, ret, offset;
1035 
1036 	dp = ip->i_dp;
1037 	memset(&idesc, 0, sizeof(struct inodesc));
1038 	idesc.id_func = pass1check;
1039 	idesc.id_number = ip->i_number;
1040 	idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1041 	(void)ckinode(dp, &idesc);
1042 	if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1043 		ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1044 		for (j = 0; j < UFS_NXADDR; j++) {
1045 			if (--ndb == 0 &&
1046 			    (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1047 				idesc.id_numfrags = numfrags(&sblock,
1048 				    fragroundup(&sblock, offset));
1049 			else
1050 				idesc.id_numfrags = sblock.fs_frag;
1051 			if (dp->dp2.di_extb[j] == 0)
1052 				continue;
1053 			idesc.id_blkno = dp->dp2.di_extb[j];
1054 			ret = (*idesc.id_func)(&idesc);
1055 			if (ret & STOP)
1056 				break;
1057 		}
1058 	}
1059 	idesc.id_entryno *= btodb(sblock.fs_fsize);
1060 	if (DIP(dp, di_blocks) != idesc.id_entryno) {
1061 		if (!(sujrecovery && preen)) {
1062 			pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1063 			    (u_long)idesc.id_number,
1064 			    (uintmax_t)DIP(dp, di_blocks),
1065 			    (uintmax_t)idesc.id_entryno);
1066 			if (preen)
1067 				printf(" (CORRECTED)\n");
1068 			else if (reply("CORRECT") == 0)
1069 				return;
1070 		}
1071 		if (bkgrdflag == 0) {
1072 			DIP_SET(dp, di_blocks, idesc.id_entryno);
1073 			inodirty(ip);
1074 		} else {
1075 			cmd.value = idesc.id_number;
1076 			cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1077 			if (debug)
1078 				printf("adjblkcnt ino %ju amount %lld\n",
1079 				    (uintmax_t)cmd.value, (long long)cmd.size);
1080 			if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1081 			    &cmd, sizeof cmd) == -1)
1082 				rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1083 		}
1084 	}
1085 }
1086 
1087 void
1088 freeinodebuf(void)
1089 {
1090 	struct bufarea *bp;
1091 	int i;
1092 
1093 	/*
1094 	 * Flush old contents in case they have been updated.
1095 	 */
1096 	flush(fswritefd, &inobuf);
1097 	if (inobuf.b_un.b_buf != NULL)
1098 		free((char *)inobuf.b_un.b_buf);
1099 	inobuf.b_un.b_buf = NULL;
1100 	firstinum = lastinum = 0;
1101 	/*
1102 	 * Reload the snapshot inodes in case any of them changed.
1103 	 */
1104 	for (i = 0; i < snapcnt; i++) {
1105 		bp = snaplist[i].i_bp;
1106 		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1107 		    bp->b_size);
1108 	}
1109 }
1110 
1111 /*
1112  * Routines to maintain information about directory inodes.
1113  * This is built during the first pass and used during the
1114  * second and third passes.
1115  *
1116  * Enter inodes into the cache.
1117  */
1118 struct inoinfo *
1119 cacheino(union dinode *dp, ino_t inumber)
1120 {
1121 	struct inoinfo *inp;
1122 	int i, blks;
1123 
1124 	if (getinoinfo(inumber) != NULL)
1125 		pfatal("cacheino: duplicate entry for ino %jd\n",
1126 		    (intmax_t)inumber);
1127 	if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1128 		blks = UFS_NDADDR + UFS_NIADDR;
1129 	else if (DIP(dp, di_size) > 0)
1130 		blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1131 	else
1132 		blks = 1;
1133 	inp = (struct inoinfo *)
1134 		Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1135 	if (inp == NULL)
1136 		errx(EEXIT, "cannot increase directory list");
1137 	SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1138 	inp->i_flags = 0;
1139 	inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1140 	inp->i_dotdot = (ino_t)0;
1141 	inp->i_number = inumber;
1142 	inp->i_isize = DIP(dp, di_size);
1143 	inp->i_depth = DIP(dp, di_dirdepth);
1144 	inp->i_numblks = blks;
1145 	for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1146 		inp->i_blks[i] = DIP(dp, di_db[i]);
1147 	if (blks > UFS_NDADDR)
1148 		for (i = 0; i < UFS_NIADDR; i++)
1149 			inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1150 	if (inplast == listmax) {
1151 		listmax += 100;
1152 		inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1153 		    listmax, sizeof(struct inoinfo *));
1154 		if (inpsort == NULL)
1155 			errx(EEXIT, "cannot increase directory list");
1156 	}
1157 	inpsort[inplast++] = inp;
1158 	return (inp);
1159 }
1160 
1161 /*
1162  * Look up an inode cache structure.
1163  */
1164 struct inoinfo *
1165 getinoinfo(ino_t inumber)
1166 {
1167 	struct inoinfo *inp;
1168 
1169 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1170 		if (inp->i_number != inumber)
1171 			continue;
1172 		return (inp);
1173 	}
1174 	return (NULL);
1175 }
1176 
1177 /*
1178  * Remove an entry from the inode cache and disk-order sorted list.
1179  * Return 0 on success and 1 on failure.
1180  */
1181 int
1182 removecachedino(ino_t inumber)
1183 {
1184 	struct inoinfo *inp, **inpp;
1185 	char *listtype;
1186 
1187 	listtype = "hash";
1188 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1189 		if (inp->i_number != inumber)
1190 			continue;
1191 		SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1192 		for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1193 			if (*inpp != inp)
1194 				continue;
1195 			*inpp = inpsort[inplast - 1];
1196 			inplast--;
1197 			free(inp);
1198 			return (0);
1199 		}
1200 		listtype = "sort";
1201 		break;
1202 	}
1203 	pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1204 	    (intmax_t)inumber, listtype);
1205 	return (1);
1206 }
1207 
1208 /*
1209  * Clean up all the inode cache structure.
1210  */
1211 void
1212 inocleanup(void)
1213 {
1214 	struct inoinfo **inpp;
1215 
1216 	if (inphash == NULL)
1217 		return;
1218 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1219 		free((char *)(*inpp));
1220 	free((char *)inphash);
1221 	inphash = NULL;
1222 	free((char *)inpsort);
1223 	inpsort = NULL;
1224 }
1225 
1226 void
1227 inodirty(struct inode *ip)
1228 {
1229 
1230 	if (sblock.fs_magic == FS_UFS2_MAGIC)
1231 		ffs_update_dinode_ckhash(&sblock,
1232 		    (struct ufs2_dinode *)ip->i_dp);
1233 	dirty(ip->i_bp);
1234 }
1235 
1236 void
1237 clri(struct inodesc *idesc, const char *type, int flag)
1238 {
1239 	union dinode *dp;
1240 	struct inode ip;
1241 
1242 	ginode(idesc->id_number, &ip);
1243 	dp = ip.i_dp;
1244 	if (flag == 1) {
1245 		pwarn("%s %s", type,
1246 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1247 		prtinode(&ip);
1248 		printf("\n");
1249 	}
1250 	if (preen || reply("CLEAR") == 1) {
1251 		if (preen)
1252 			printf(" (CLEARED)\n");
1253 		n_files--;
1254 		if (bkgrdflag == 0) {
1255 			if (idesc->id_type == SNAP) {
1256 				snapremove(idesc->id_number);
1257 				idesc->id_type = ADDR;
1258 			}
1259 			(void)ckinode(dp, idesc);
1260 			inoinfo(idesc->id_number)->ino_state = USTATE;
1261 			clearinode(dp);
1262 			inodirty(&ip);
1263 		} else {
1264 			cmd.value = idesc->id_number;
1265 			cmd.size = -DIP(dp, di_nlink);
1266 			if (debug)
1267 				printf("adjrefcnt ino %ld amt %lld\n",
1268 				    (long)cmd.value, (long long)cmd.size);
1269 			if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1270 			    &cmd, sizeof cmd) == -1)
1271 				rwerror("ADJUST INODE", cmd.value);
1272 		}
1273 	}
1274 	irelse(&ip);
1275 }
1276 
1277 int
1278 findname(struct inodesc *idesc)
1279 {
1280 	struct direct *dirp = idesc->id_dirp;
1281 
1282 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1283 		idesc->id_entryno++;
1284 		return (KEEPON);
1285 	}
1286 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1287 	return (STOP|FOUND);
1288 }
1289 
1290 int
1291 findino(struct inodesc *idesc)
1292 {
1293 	struct direct *dirp = idesc->id_dirp;
1294 
1295 	if (dirp->d_ino == 0)
1296 		return (KEEPON);
1297 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1298 	    dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1299 		idesc->id_parent = dirp->d_ino;
1300 		return (STOP|FOUND);
1301 	}
1302 	return (KEEPON);
1303 }
1304 
1305 int
1306 clearentry(struct inodesc *idesc)
1307 {
1308 	struct direct *dirp = idesc->id_dirp;
1309 
1310 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1311 		idesc->id_entryno++;
1312 		return (KEEPON);
1313 	}
1314 	dirp->d_ino = 0;
1315 	return (STOP|FOUND|ALTERED);
1316 }
1317 
1318 void
1319 prtinode(struct inode *ip)
1320 {
1321 	char *p;
1322 	union dinode *dp;
1323 	struct passwd *pw;
1324 	time_t t;
1325 
1326 	dp = ip->i_dp;
1327 	printf(" I=%lu ", (u_long)ip->i_number);
1328 	if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1329 		return;
1330 	printf(" OWNER=");
1331 	if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1332 		printf("%s ", pw->pw_name);
1333 	else
1334 		printf("%u ", (unsigned)DIP(dp, di_uid));
1335 	printf("MODE=%o\n", DIP(dp, di_mode));
1336 	if (preen)
1337 		printf("%s: ", cdevname);
1338 	printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1339 	t = DIP(dp, di_mtime);
1340 	if ((p = ctime(&t)) != NULL)
1341 		printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1342 }
1343 
1344 void
1345 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1346 {
1347 
1348 	pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1349 	printf("\n");
1350 	switch (inoinfo(ino)->ino_state) {
1351 
1352 	case FSTATE:
1353 	case FZLINK:
1354 		inoinfo(ino)->ino_state = FCLEAR;
1355 		return;
1356 
1357 	case DSTATE:
1358 	case DZLINK:
1359 		inoinfo(ino)->ino_state = DCLEAR;
1360 		return;
1361 
1362 	case FCLEAR:
1363 	case DCLEAR:
1364 		return;
1365 
1366 	default:
1367 		errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1368 		/* NOTREACHED */
1369 	}
1370 }
1371 
1372 /*
1373  * allocate an unused inode
1374  */
1375 ino_t
1376 allocino(ino_t request, int type)
1377 {
1378 	ino_t ino;
1379 	struct inode ip;
1380 	union dinode *dp;
1381 	struct bufarea *cgbp;
1382 	struct cg *cgp;
1383 	int cg, anyino;
1384 
1385 	anyino = 0;
1386 	if (request == 0) {
1387 		request = UFS_ROOTINO;
1388 		anyino = 1;
1389 	} else if (inoinfo(request)->ino_state != USTATE)
1390 		return (0);
1391 retry:
1392 	for (ino = request; ino < maxino; ino++)
1393 		if (inoinfo(ino)->ino_state == USTATE)
1394 			break;
1395 	if (ino >= maxino)
1396 		return (0);
1397 	cg = ino_to_cg(&sblock, ino);
1398 	cgbp = cglookup(cg);
1399 	cgp = cgbp->b_un.b_cg;
1400 	if (!check_cgmagic(cg, cgbp)) {
1401 		if (anyino == 0)
1402 			return (0);
1403 		request = (cg + 1) * sblock.fs_ipg;
1404 		goto retry;
1405 	}
1406 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1407 	cgp->cg_cs.cs_nifree--;
1408 	switch (type & IFMT) {
1409 	case IFDIR:
1410 		inoinfo(ino)->ino_state = DSTATE;
1411 		cgp->cg_cs.cs_ndir++;
1412 		break;
1413 	case IFREG:
1414 	case IFLNK:
1415 		inoinfo(ino)->ino_state = FSTATE;
1416 		break;
1417 	default:
1418 		return (0);
1419 	}
1420 	cgdirty(cgbp);
1421 	ginode(ino, &ip);
1422 	dp = ip.i_dp;
1423 	memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1424 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1425 	DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1426 	    std_checkblkavail));
1427 	if (DIP(dp, di_db[0]) == 0) {
1428 		inoinfo(ino)->ino_state = USTATE;
1429 		inodirty(&ip);
1430 		irelse(&ip);
1431 		return (0);
1432 	}
1433 	DIP_SET(dp, di_mode, type);
1434 	DIP_SET(dp, di_atime, time(NULL));
1435 	DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1436 	DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1437 	DIP_SET(dp, di_size, sblock.fs_fsize);
1438 	DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1439 	n_files++;
1440 	inodirty(&ip);
1441 	irelse(&ip);
1442 	inoinfo(ino)->ino_type = IFTODT(type);
1443 	return (ino);
1444 }
1445 
1446 /*
1447  * deallocate an inode
1448  */
1449 void
1450 freeino(ino_t ino)
1451 {
1452 	struct inodesc idesc;
1453 	union dinode *dp;
1454 	struct inode ip;
1455 
1456 	memset(&idesc, 0, sizeof(struct inodesc));
1457 	idesc.id_type = ADDR;
1458 	idesc.id_func = freeblock;
1459 	idesc.id_number = ino;
1460 	ginode(ino, &ip);
1461 	dp = ip.i_dp;
1462 	(void)ckinode(dp, &idesc);
1463 	clearinode(dp);
1464 	inodirty(&ip);
1465 	irelse(&ip);
1466 	inoinfo(ino)->ino_state = USTATE;
1467 	n_files--;
1468 }
1469