xref: /freebsd/sbin/fsck_ffs/inode.c (revision a2f733abcff64628b7771a47089628b7327a88bd)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #endif
34 #include <sys/cdefs.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/stdint.h>
38 #include <sys/sysctl.h>
39 
40 #include <ufs/ufs/dinode.h>
41 #include <ufs/ufs/dir.h>
42 #include <ufs/ffs/fs.h>
43 
44 #include <err.h>
45 #include <pwd.h>
46 #include <string.h>
47 #include <time.h>
48 
49 #include "fsck.h"
50 
51 struct bufarea *icachebp;	/* inode cache buffer */
52 
53 static int iblock(struct inodesc *, off_t isize, int type);
54 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
55     struct bufarea **);
56 static int snapclean(struct inodesc *idesc);
57 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
58     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
59 
60 int
61 ckinode(union dinode *dp, struct inodesc *idesc)
62 {
63 	off_t remsize, sizepb;
64 	int i, offset, ret;
65 	struct inode ip;
66 	union dinode dino;
67 	ufs2_daddr_t ndb;
68 	mode_t mode;
69 	char pathbuf[MAXPATHLEN + 1];
70 
71 	if (idesc->id_fix != IGNORE)
72 		idesc->id_fix = DONTKNOW;
73 	idesc->id_dp = dp;
74 	idesc->id_lbn = -1;
75 	idesc->id_lballoc = -1;
76 	idesc->id_level = 0;
77 	idesc->id_entryno = 0;
78 	idesc->id_filesize = DIP(dp, di_size);
79 	mode = DIP(dp, di_mode) & IFMT;
80 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
81 	    DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
82 		return (KEEPON);
83 	if (sblock.fs_magic == FS_UFS1_MAGIC)
84 		dino.dp1 = dp->dp1;
85 	else
86 		dino.dp2 = dp->dp2;
87 	if (DIP(&dino, di_size) < 0) {
88 		pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
89 		return (STOP);
90 	}
91 	ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
92 	for (i = 0; i < UFS_NDADDR; i++) {
93 		idesc->id_lbn++;
94 		if (--ndb == 0 &&
95 		    (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
96 			idesc->id_numfrags =
97 				numfrags(&sblock, fragroundup(&sblock, offset));
98 		else
99 			idesc->id_numfrags = sblock.fs_frag;
100 		if (DIP(&dino, di_db[i]) == 0) {
101 			if (idesc->id_type == DATA && ndb >= 0) {
102 				/* An empty block in a directory XXX */
103 				getpathname(pathbuf, idesc->id_number,
104 						idesc->id_number);
105 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
106 					pathbuf);
107 				if (reply("ADJUST LENGTH") == 1) {
108 					ginode(idesc->id_number, &ip);
109 					DIP_SET(ip.i_dp, di_size,
110 					    i * sblock.fs_bsize);
111 					printf(
112 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
113 					rerun = 1;
114 					inodirty(&ip);
115 					irelse(&ip);
116 				}
117 				return (STOP);
118 			}
119 			continue;
120 		}
121 		idesc->id_blkno = DIP(&dino, di_db[i]);
122 		if (idesc->id_type != DATA)
123 			ret = (*idesc->id_func)(idesc);
124 		else
125 			ret = dirscan(idesc);
126 		if (ret & STOP)
127 			return (ret);
128 	}
129 	idesc->id_numfrags = sblock.fs_frag;
130 	remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
131 	sizepb = sblock.fs_bsize;
132 	for (i = 0; i < UFS_NIADDR; i++) {
133 		sizepb *= NINDIR(&sblock);
134 		idesc->id_level = i + 1;
135 		if (DIP(&dino, di_ib[i])) {
136 			idesc->id_blkno = DIP(&dino, di_ib[i]);
137 			ret = iblock(idesc, remsize, BT_LEVEL1 + i);
138 			if (ret & STOP)
139 				return (ret);
140 		} else if (remsize > 0) {
141 			idesc->id_lbn += sizepb / sblock.fs_bsize;
142 			if (idesc->id_type == DATA) {
143 				/* An empty block in a directory XXX */
144 				getpathname(pathbuf, idesc->id_number,
145 						idesc->id_number);
146 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
147 					pathbuf);
148 				if (reply("ADJUST LENGTH") == 1) {
149 					ginode(idesc->id_number, &ip);
150 					DIP_SET(ip.i_dp, di_size,
151 					    DIP(ip.i_dp, di_size) - remsize);
152 					remsize = 0;
153 					printf(
154 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
155 					rerun = 1;
156 					inodirty(&ip);
157 					irelse(&ip);
158 					break;
159 				}
160 			}
161 		}
162 		remsize -= sizepb;
163 	}
164 	return (KEEPON);
165 }
166 
167 static int
168 iblock(struct inodesc *idesc, off_t isize, int type)
169 {
170 	struct inode ip;
171 	struct bufarea *bp;
172 	int i, n, (*func)(struct inodesc *), nif;
173 	off_t sizepb;
174 	char buf[BUFSIZ];
175 	char pathbuf[MAXPATHLEN + 1];
176 
177 	if (idesc->id_type != DATA) {
178 		func = idesc->id_func;
179 		if (((n = (*func)(idesc)) & KEEPON) == 0)
180 			return (n);
181 	} else
182 		func = dirscan;
183 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
184 	if (bp->b_errs != 0) {
185 		brelse(bp);
186 		return (SKIP);
187 	}
188 	idesc->id_bp = bp;
189 	idesc->id_level--;
190 	for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
191 		sizepb *= NINDIR(&sblock);
192 	if (howmany(isize, sizepb) > NINDIR(&sblock))
193 		nif = NINDIR(&sblock);
194 	else
195 		nif = howmany(isize, sizepb);
196 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
197 		for (i = nif; i < NINDIR(&sblock); i++) {
198 			if (IBLK(bp, i) == 0)
199 				continue;
200 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
201 			    (u_long)idesc->id_number);
202 			if (preen) {
203 				pfatal("%s", buf);
204 			} else if (dofix(idesc, buf)) {
205 				IBLK_SET(bp, i, 0);
206 				dirty(bp);
207 			}
208 		}
209 		flush(fswritefd, bp);
210 	}
211 	for (i = 0; i < nif; i++) {
212 		if (IBLK(bp, i)) {
213 			idesc->id_blkno = IBLK(bp, i);
214 			bp->b_index = i;
215 			if (idesc->id_level == 0) {
216 				idesc->id_lbn++;
217 				n = (*func)(idesc);
218 			} else {
219 				n = iblock(idesc, isize, type - 1);
220 				idesc->id_level++;
221 			}
222 			if (n & STOP) {
223 				brelse(bp);
224 				return (n);
225 			}
226 		} else {
227 			idesc->id_lbn += sizepb / sblock.fs_bsize;
228 			if (idesc->id_type == DATA && isize > 0) {
229 				/* An empty block in a directory XXX */
230 				getpathname(pathbuf, idesc->id_number,
231 						idesc->id_number);
232 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
233 					pathbuf);
234 				if (reply("ADJUST LENGTH") == 1) {
235 					ginode(idesc->id_number, &ip);
236 					DIP_SET(ip.i_dp, di_size,
237 					    DIP(ip.i_dp, di_size) - isize);
238 					isize = 0;
239 					printf(
240 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
241 					rerun = 1;
242 					inodirty(&ip);
243 					brelse(bp);
244 					return(STOP);
245 				}
246 			}
247 		}
248 		isize -= sizepb;
249 	}
250 	brelse(bp);
251 	return (KEEPON);
252 }
253 
254 /*
255  * Finds the disk block address at the specified lbn within the inode
256  * specified by dp.  This follows the whole tree and honors di_size and
257  * di_extsize so it is a true test of reachability.  The lbn may be
258  * negative if an extattr or indirect block is requested.
259  */
260 ufs2_daddr_t
261 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
262     struct bufarea **bpp)
263 {
264 	ufs_lbn_t tmpval;
265 	ufs_lbn_t cur;
266 	ufs_lbn_t next;
267 	int i;
268 
269 	*frags = 0;
270 	if (bpp != NULL)
271 		*bpp = NULL;
272 	/*
273 	 * Handle extattr blocks first.
274 	 */
275 	if (lbn < 0 && lbn >= -UFS_NXADDR) {
276 		lbn = -1 - lbn;
277 		if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
278 			return (0);
279 		*frags = numfrags(&sblock,
280 		    sblksize(&sblock, dp->dp2.di_extsize, lbn));
281 		return (dp->dp2.di_extb[lbn]);
282 	}
283 	/*
284 	 * Now direct and indirect.
285 	 */
286 	if (DIP(dp, di_mode) == IFLNK &&
287 	    DIP(dp, di_size) < sblock.fs_maxsymlinklen)
288 		return (0);
289 	if (lbn >= 0 && lbn < UFS_NDADDR) {
290 		*frags = numfrags(&sblock,
291 		    sblksize(&sblock, DIP(dp, di_size), lbn));
292 		return (DIP(dp, di_db[lbn]));
293 	}
294 	*frags = sblock.fs_frag;
295 
296 	for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
297 	    i++, tmpval *= NINDIR(&sblock), cur = next) {
298 		next = cur + tmpval;
299 		if (lbn == -cur - i)
300 			return (DIP(dp, di_ib[i]));
301 		/*
302 		 * Determine whether the lbn in question is within this tree.
303 		 */
304 		if (lbn < 0 && -lbn >= next)
305 			continue;
306 		if (lbn > 0 && lbn >= next)
307 			continue;
308 		if (DIP(dp, di_ib[i]) == 0)
309 			return (0);
310 		return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
311 		    bpp));
312 	}
313 	pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
314 	return (0);
315 }
316 
317 /*
318  * Fetch an indirect block to find the block at a given lbn.  The lbn
319  * may be negative to fetch a specific indirect block pointer or positive
320  * to fetch a specific block.
321  */
322 static ufs2_daddr_t
323 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
324     struct bufarea **bpp)
325 {
326 	struct bufarea *bp;
327 	ufs_lbn_t lbnadd;
328 	ufs_lbn_t base;
329 	int i, level;
330 
331 	level = lbn_level(cur);
332 	if (level == -1)
333 		pfatal("Invalid indir lbn %jd in ino %ju\n",
334 		    lbn, (uintmax_t)ino);
335 	if (level == 0 && lbn < 0)
336 		pfatal("Invalid lbn %jd in ino %ju\n",
337 		    lbn, (uintmax_t)ino);
338 	lbnadd = 1;
339 	base = -(cur + level);
340 	for (i = level; i > 0; i--)
341 		lbnadd *= NINDIR(&sblock);
342 	if (lbn > 0)
343 		i = (lbn - base) / lbnadd;
344 	else
345 		i = (-lbn - base) / lbnadd;
346 	if (i < 0 || i >= NINDIR(&sblock)) {
347 		pfatal("Invalid indirect index %d produced by lbn %jd "
348 		    "in ino %ju\n", i, lbn, (uintmax_t)ino);
349 		return (0);
350 	}
351 	if (level == 0)
352 		cur = base + (i * lbnadd);
353 	else
354 		cur = -(base + (i * lbnadd)) - (level - 1);
355 	bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
356 	if (bp->b_errs != 0)
357 		return (0);
358 	blk = IBLK(bp, i);
359 	bp->b_index = i;
360 	if (cur == lbn || blk == 0) {
361 		if (bpp != NULL)
362 			*bpp = bp;
363 		else
364 			brelse(bp);
365 		return (blk);
366 	}
367 	brelse(bp);
368 	if (level == 0)
369 		pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
370 		    (uintmax_t)ino);
371 	return (indir_blkatoff(blk, ino, cur, lbn, bpp));
372 }
373 
374 /*
375  * Check that a block in a legal block number.
376  * Return 0 if in range, 1 if out of range.
377  */
378 int
379 chkrange(ufs2_daddr_t blk, int cnt)
380 {
381 	int c;
382 
383 	if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
384 	    cnt > maxfsblock - blk) {
385 		if (debug)
386 			printf("out of range: blk %ld, offset %i, size %d\n",
387 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
388 		return (1);
389 	}
390 	if (cnt > sblock.fs_frag ||
391 	    fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
392 		if (debug)
393 			printf("bad size: blk %ld, offset %i, size %d\n",
394 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
395 		return (1);
396 	}
397 	c = dtog(&sblock, blk);
398 	if (blk < cgdmin(&sblock, c)) {
399 		if ((blk + cnt) > cgsblock(&sblock, c)) {
400 			if (debug) {
401 				printf("blk %ld < cgdmin %ld;",
402 				    (long)blk, (long)cgdmin(&sblock, c));
403 				printf(" blk + cnt %ld > cgsbase %ld\n",
404 				    (long)(blk + cnt),
405 				    (long)cgsblock(&sblock, c));
406 			}
407 			return (1);
408 		}
409 	} else {
410 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
411 			if (debug)  {
412 				printf("blk %ld >= cgdmin %ld;",
413 				    (long)blk, (long)cgdmin(&sblock, c));
414 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
415 				    (long)(blk + cnt), (long)sblock.fs_fpg);
416 			}
417 			return (1);
418 		}
419 	}
420 	return (0);
421 }
422 
423 /*
424  * General purpose interface for reading inodes.
425  *
426  * firstinum and lastinum track contents of getnextino() cache (below).
427  */
428 static ino_t firstinum, lastinum;
429 static struct bufarea inobuf;
430 
431 void
432 ginode(ino_t inumber, struct inode *ip)
433 {
434 	ufs2_daddr_t iblk;
435 	struct ufs2_dinode *dp;
436 
437 	if (inumber < UFS_ROOTINO || inumber >= maxino)
438 		errx(EEXIT, "bad inode number %ju to ginode",
439 		    (uintmax_t)inumber);
440 	ip->i_number = inumber;
441 	if (inumber >= firstinum && inumber < lastinum) {
442 		/* contents in getnextino() cache */
443 		ip->i_bp = &inobuf;
444 		inobuf.b_refcnt++;
445 		inobuf.b_index = firstinum;
446 	} else if (icachebp != NULL &&
447 	    inumber >= icachebp->b_index &&
448 	    inumber < icachebp->b_index + INOPB(&sblock)) {
449 		/* take an additional reference for the returned inode */
450 		icachebp->b_refcnt++;
451 		ip->i_bp = icachebp;
452 	} else {
453 		iblk = ino_to_fsba(&sblock, inumber);
454 		/* release our cache-hold reference on old icachebp */
455 		if (icachebp != NULL)
456 			brelse(icachebp);
457 		icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
458 		if (icachebp->b_errs != 0) {
459 			icachebp = NULL;
460 			ip->i_bp = NULL;
461 			ip->i_dp = &zino;
462 			return;
463 		}
464 		/* take a cache-hold reference on new icachebp */
465 		icachebp->b_refcnt++;
466 		icachebp->b_index = rounddown(inumber, INOPB(&sblock));
467 		ip->i_bp = icachebp;
468 	}
469 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
470 		ip->i_dp = (union dinode *)
471 		    &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
472 		return;
473 	}
474 	ip->i_dp = (union dinode *)
475 	    &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
476 	dp = (struct ufs2_dinode *)ip->i_dp;
477 	/* Do not check hash of inodes being created */
478 	if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
479 		pwarn("INODE CHECK-HASH FAILED");
480 		prtinode(ip);
481 		if (preen || reply("FIX") != 0) {
482 			if (preen)
483 				printf(" (FIXED)\n");
484 			ffs_update_dinode_ckhash(&sblock, dp);
485 			inodirty(ip);
486 		}
487 	}
488 }
489 
490 /*
491  * Release a held inode.
492  */
493 void
494 irelse(struct inode *ip)
495 {
496 
497 	/* Check for failed inode read */
498 	if (ip->i_bp == NULL)
499 		return;
500 	if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
501 	    ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
502 		pwarn("irelse: releasing inode with bad check-hash");
503 		prtinode(ip);
504 	}
505 	if (ip->i_bp->b_refcnt <= 0)
506 		pfatal("irelse: releasing unreferenced ino %ju\n",
507 		    (uintmax_t) ip->i_number);
508 	brelse(ip->i_bp);
509 }
510 
511 /*
512  * Special purpose version of ginode used to optimize first pass
513  * over all the inodes in numerical order.
514  */
515 static ino_t nextinum, lastvalidinum;
516 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
517 
518 union dinode *
519 getnextinode(ino_t inumber, int rebuiltcg)
520 {
521 	int j;
522 	long size;
523 	mode_t mode;
524 	ufs2_daddr_t ndb, blk;
525 	union dinode *dp;
526 	struct inode ip;
527 	static caddr_t nextinop;
528 
529 	if (inumber != nextinum++ || inumber > lastvalidinum)
530 		errx(EEXIT, "bad inode number %ju to nextinode",
531 		    (uintmax_t)inumber);
532 	if (inumber >= lastinum) {
533 		readcount++;
534 		firstinum = lastinum;
535 		blk = ino_to_fsba(&sblock, lastinum);
536 		if (readcount % readpercg == 0) {
537 			size = partialsize;
538 			lastinum += partialcnt;
539 		} else {
540 			size = inobufsize;
541 			lastinum += fullcnt;
542 		}
543 		/*
544 		 * Flush old contents in case they have been updated.
545 		 * If getblk encounters an error, it will already have zeroed
546 		 * out the buffer, so we do not need to do so here.
547 		 */
548 		if (inobuf.b_refcnt != 0)
549 			pfatal("Non-zero getnextinode() ref count %d\n",
550 			    inobuf.b_refcnt);
551 		flush(fswritefd, &inobuf);
552 		getblk(&inobuf, blk, size);
553 		nextinop = inobuf.b_un.b_buf;
554 	}
555 	dp = (union dinode *)nextinop;
556 	if (sblock.fs_magic == FS_UFS1_MAGIC)
557 		nextinop += sizeof(struct ufs1_dinode);
558 	else
559 		nextinop += sizeof(struct ufs2_dinode);
560 	if ((ckhashadd & CK_INODE) != 0) {
561 		ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
562 		dirty(&inobuf);
563 	}
564 	if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
565 		pwarn("INODE CHECK-HASH FAILED");
566 		ip.i_bp = NULL;
567 		ip.i_dp = dp;
568 		ip.i_number = inumber;
569 		prtinode(&ip);
570 		if (preen || reply("FIX") != 0) {
571 			if (preen)
572 				printf(" (FIXED)\n");
573 			ffs_update_dinode_ckhash(&sblock,
574 			    (struct ufs2_dinode *)dp);
575 			dirty(&inobuf);
576 		}
577 	}
578 	if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
579 		/*
580 		 * Try to determine if we have reached the end of the
581 		 * allocated inodes.
582 		 */
583 		mode = DIP(dp, di_mode) & IFMT;
584 		if (mode == 0) {
585 			if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
586 				UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
587 			      memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
588 				UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
589 			      dp->dp2.di_mode || dp->dp2.di_size)
590 				return (NULL);
591 			return (dp);
592 		}
593 		if (!ftypeok(dp))
594 			return (NULL);
595 		ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
596 		if (ndb < 0)
597 			return (NULL);
598 		if (mode == IFBLK || mode == IFCHR)
599 			ndb++;
600 		if (mode == IFLNK) {
601 			/*
602 			 * Fake ndb value so direct/indirect block checks below
603 			 * will detect any garbage after symlink string.
604 			 */
605 			if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
606 				ndb = howmany(DIP(dp, di_size),
607 				    sizeof(ufs2_daddr_t));
608 				if (ndb > UFS_NDADDR) {
609 					j = ndb - UFS_NDADDR;
610 					for (ndb = 1; j > 1; j--)
611 						ndb *= NINDIR(&sblock);
612 					ndb += UFS_NDADDR;
613 				}
614 			}
615 		}
616 		for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
617 			if (DIP(dp, di_db[j]) != 0)
618 				return (NULL);
619 		for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
620 			ndb /= NINDIR(&sblock);
621 		for (; j < UFS_NIADDR; j++)
622 			if (DIP(dp, di_ib[j]) != 0)
623 				return (NULL);
624 	}
625 	return (dp);
626 }
627 
628 void
629 setinodebuf(int cg, ino_t inosused)
630 {
631 	ino_t inum;
632 
633 	inum = cg * sblock.fs_ipg;
634 	lastvalidinum = inum + inosused - 1;
635 	nextinum = inum;
636 	lastinum = inum;
637 	readcount = 0;
638 	/* Flush old contents in case they have been updated */
639 	flush(fswritefd, &inobuf);
640 	inobuf.b_bno = 0;
641 	if (inobuf.b_un.b_buf == NULL) {
642 		inobufsize = blkroundup(&sblock,
643 		    MAX(INOBUFSIZE, sblock.fs_bsize));
644 		initbarea(&inobuf, BT_INODES);
645 		if ((inobuf.b_un.b_buf = Balloc((unsigned)inobufsize)) == NULL)
646 			errx(EEXIT, "cannot allocate space for inode buffer");
647 	}
648 	fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
649 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
650 	readpercg = inosused / fullcnt;
651 	partialcnt = inosused % fullcnt;
652 	partialsize = fragroundup(&sblock,
653 	    partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
654 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
655 	if (partialcnt != 0) {
656 		readpercg++;
657 	} else {
658 		partialcnt = fullcnt;
659 		partialsize = inobufsize;
660 	}
661 }
662 
663 int
664 freeblock(struct inodesc *idesc)
665 {
666 	struct dups *dlp;
667 	struct bufarea *cgbp;
668 	struct cg *cgp;
669 	ufs2_daddr_t blkno;
670 	long size, nfrags;
671 
672 	blkno = idesc->id_blkno;
673 	if (idesc->id_type == SNAP) {
674 		pfatal("clearing a snapshot dinode\n");
675 		return (STOP);
676 	}
677 	size = lfragtosize(&sblock, idesc->id_numfrags);
678 	if (snapblkfree(&sblock, blkno, size, idesc->id_number,
679 	    std_checkblkavail))
680 		return (KEEPON);
681 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
682 		if (chkrange(blkno, 1)) {
683 			return (SKIP);
684 		} else if (testbmap(blkno)) {
685 			for (dlp = duplist; dlp; dlp = dlp->next) {
686 				if (dlp->dup != blkno)
687 					continue;
688 				dlp->dup = duplist->dup;
689 				dlp = duplist;
690 				duplist = duplist->next;
691 				free((char *)dlp);
692 				break;
693 			}
694 			if (dlp == NULL) {
695 				clrbmap(blkno);
696 				n_blks--;
697 			}
698 		}
699 	}
700 	/*
701 	 * If all successfully returned, account for them.
702 	 */
703 	if (nfrags == 0) {
704 		cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
705 		cgp = cgbp->b_un.b_cg;
706 		if (idesc->id_numfrags == sblock.fs_frag)
707 			cgp->cg_cs.cs_nbfree++;
708 		else
709 			cgp->cg_cs.cs_nffree += idesc->id_numfrags;
710 		cgdirty(cgbp);
711 	}
712 	return (KEEPON);
713 }
714 
715 /*
716  * Prepare a snapshot file for being removed.
717  */
718 void
719 snapremove(ino_t inum)
720 {
721 	struct inodesc idesc;
722 	struct inode ip;
723 	int i;
724 
725 	for (i = 0; i < snapcnt; i++)
726 		if (snaplist[i].i_number == inum)
727 			break;
728 	if (i == snapcnt)
729 		ginode(inum, &ip);
730 	else
731 		ip = snaplist[i];
732 	if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
733 		printf("snapremove: inode %jd is not a snapshot\n",
734 		    (intmax_t)inum);
735 		if (i == snapcnt)
736 			irelse(&ip);
737 		return;
738 	}
739 	if (debug)
740 		printf("snapremove: remove %sactive snapshot %jd\n",
741 		    i == snapcnt ? "in" : "", (intmax_t)inum);
742 	/*
743 	 * If on active snapshot list, remove it.
744 	 */
745 	if (i < snapcnt) {
746 		for (i++; i < FSMAXSNAP; i++) {
747 			if (sblock.fs_snapinum[i] == 0)
748 				break;
749 			snaplist[i - 1] = snaplist[i];
750 			sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
751 		}
752 		sblock.fs_snapinum[i - 1] = 0;
753 		bzero(&snaplist[i - 1], sizeof(struct inode));
754 		snapcnt--;
755 	}
756 	memset(&idesc, 0, sizeof(struct inodesc));
757 	idesc.id_type = SNAP;
758 	idesc.id_func = snapclean;
759 	idesc.id_number = inum;
760 	(void)ckinode(ip.i_dp, &idesc);
761 	DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
762 	inodirty(&ip);
763 	irelse(&ip);
764 }
765 
766 static int
767 snapclean(struct inodesc *idesc)
768 {
769 	ufs2_daddr_t blkno;
770 	struct bufarea *bp;
771 	union dinode *dp;
772 
773 	blkno = idesc->id_blkno;
774 	if (blkno == 0)
775 		return (KEEPON);
776 
777 	dp = idesc->id_dp;
778 	if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
779 		if (idesc->id_lbn < UFS_NDADDR) {
780 			DIP_SET(dp, di_db[idesc->id_lbn], 0);
781 		} else {
782 			bp = idesc->id_bp;
783 			IBLK_SET(bp, bp->b_index, 0);
784 			dirty(bp);
785 		}
786 	}
787 	return (KEEPON);
788 }
789 
790 /*
791  * Notification that a block is being freed. Return zero if the free
792  * should be allowed to proceed. Return non-zero if the snapshot file
793  * wants to claim the block. The block will be claimed if it is an
794  * uncopied part of one of the snapshots. It will be freed if it is
795  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
796  * If a fragment is being freed, then all snapshots that care about
797  * it must make a copy since a snapshot file can only claim full sized
798  * blocks. Note that if more than one snapshot file maps the block,
799  * we can pick one at random to claim it. Since none of the snapshots
800  * can change, we are assurred that they will all see the same unmodified
801  * image. When deleting a snapshot file (see ino_trunc above), we
802  * must push any of these claimed blocks to one of the other snapshots
803  * that maps it. These claimed blocks are easily identified as they will
804  * have a block number equal to their logical block number within the
805  * snapshot. A copied block can never have this property because they
806  * must always have been allocated from a BLK_NOCOPY location.
807  */
808 int
809 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
810 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
811 {
812 	union dinode *dp;
813 	struct inode ip;
814 	struct bufarea *snapbp;
815 	ufs_lbn_t lbn;
816 	ufs2_daddr_t blkno, relblkno;
817 	int i, frags, claimedblk, copydone;
818 
819 	/* If no snapshots, nothing to do */
820 	if (snapcnt == 0)
821 		return (0);
822 	if (debug)
823 		printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
824 		    (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
825 	relblkno = blknum(fs, bno);
826 	lbn = fragstoblks(fs, relblkno);
827 	/* Direct blocks are always pre-copied */
828 	if (lbn < UFS_NDADDR)
829 		return (0);
830 	copydone = 0;
831 	claimedblk = 0;
832 	for (i = 0; i < snapcnt; i++) {
833 		/*
834 		 * Lookup block being freed.
835 		 */
836 		ip = snaplist[i];
837 		dp = ip.i_dp;
838 		blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
839 		    lbn, &frags, &snapbp);
840 		/*
841 		 * Check to see if block needs to be copied.
842 		 */
843 		if (blkno == 0) {
844 			/*
845 			 * A block that we map is being freed. If it has not
846 			 * been claimed yet, we will claim or copy it (below).
847 			 */
848 			claimedblk = 1;
849 		} else if (blkno == BLK_SNAP) {
850 			/*
851 			 * No previous snapshot claimed the block,
852 			 * so it will be freed and become a BLK_NOCOPY
853 			 * (don't care) for us.
854 			 */
855 			if (claimedblk)
856 				pfatal("snapblkfree: inconsistent block type");
857 			IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
858 			dirty(snapbp);
859 			brelse(snapbp);
860 			continue;
861 		} else /* BLK_NOCOPY or default */ {
862 			/*
863 			 * If the snapshot has already copied the block
864 			 * (default), or does not care about the block,
865 			 * it is not needed.
866 			 */
867 			brelse(snapbp);
868 			continue;
869 		}
870 		/*
871 		 * If this is a full size block, we will just grab it
872 		 * and assign it to the snapshot inode. Otherwise we
873 		 * will proceed to copy it. See explanation for this
874 		 * routine as to why only a single snapshot needs to
875 		 * claim this block.
876 		 */
877 		if (size == fs->fs_bsize) {
878 			if (debug)
879 				printf("Grabonremove snapshot %ju lbn %jd "
880 				    "from inum %ju\n", (intmax_t)ip.i_number,
881 				    (intmax_t)lbn, (uintmax_t)inum);
882 			IBLK_SET(snapbp, snapbp->b_index, relblkno);
883 			dirty(snapbp);
884 			brelse(snapbp);
885 			DIP_SET(dp, di_blocks,
886 			    DIP(dp, di_blocks) + btodb(size));
887 			inodirty(&ip);
888 			return (1);
889 		}
890 
891 		/* First time through, read the contents of the old block. */
892 		if (copydone == 0) {
893 			copydone = 1;
894 			if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
895 			    fs->fs_bsize) != 0) {
896 				pfatal("Could not read snapshot %ju block "
897 				    "%jd\n", (intmax_t)ip.i_number,
898 				    (intmax_t)relblkno);
899 				continue;
900 			}
901 		}
902 		/*
903 		 * This allocation will never require any additional
904 		 * allocations for the snapshot inode.
905 		 */
906 		blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
907 		    checkblkavail);
908 		if (blkno == 0) {
909 			pfatal("Could not allocate block for snapshot %ju\n",
910 			    (intmax_t)ip.i_number);
911 			continue;
912 		}
913 		if (debug)
914 			printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
915 			    "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
916 			    (intmax_t)lbn, (uintmax_t)inum, size,
917 			    (intmax_t)blkno);
918 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
919 		IBLK_SET(snapbp, snapbp->b_index, blkno);
920 		dirty(snapbp);
921 		brelse(snapbp);
922 		DIP_SET(dp, di_blocks,
923 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
924 		inodirty(&ip);
925 	}
926 	return (0);
927 }
928 
929 /*
930  * Notification that a block is being written. Return if the block
931  * is part of a snapshot as snapshots never track other snapshots.
932  * The block will be copied in all of the snapshots that are tracking
933  * it and have not yet copied it. Some buffers may hold more than one
934  * block. Here we need to check each block in the buffer.
935  */
936 void
937 copyonwrite(struct fs *fs, struct bufarea *bp,
938 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
939 {
940 	ufs2_daddr_t copyblkno;
941 	long i, numblks;
942 
943 	/* If no snapshots, nothing to do. */
944 	if (snapcnt == 0)
945 		return;
946 	numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
947 	if (debug)
948 		prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
949 		    (intmax_t)numblks, numblks > 1 ? "s" : "");
950 	copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
951 	for (i = 0; i < numblks; i++) {
952 		chkcopyonwrite(fs, copyblkno, checkblkavail);
953 		copyblkno += fs->fs_frag;
954 	}
955 }
956 
957 static void
958 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
959 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
960 {
961 	struct inode ip;
962 	union dinode *dp;
963 	struct bufarea *snapbp;
964 	ufs2_daddr_t blkno;
965 	int i, frags, copydone;
966 	ufs_lbn_t lbn;
967 
968 	lbn = fragstoblks(fs, copyblkno);
969 	/* Direct blocks are always pre-copied */
970 	if (lbn < UFS_NDADDR)
971 		return;
972 	copydone = 0;
973 	for (i = 0; i < snapcnt; i++) {
974 		/*
975 		 * Lookup block being freed.
976 		 */
977 		ip = snaplist[i];
978 		dp = ip.i_dp;
979 		blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
980 		/*
981 		 * Check to see if block needs to be copied.
982 		 */
983 		if (blkno != 0) {
984 			/*
985 			 * A block that we have already copied or don't track.
986 			 */
987 			brelse(snapbp);
988 			continue;
989 		}
990 		/* First time through, read the contents of the old block. */
991 		if (copydone == 0) {
992 			copydone = 1;
993 			if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
994 			    fs->fs_bsize) != 0) {
995 				pfatal("Could not read snapshot %ju block "
996 				    "%jd\n", (intmax_t)ip.i_number,
997 				    (intmax_t)copyblkno);
998 				continue;
999 			}
1000 		}
1001 		/*
1002 		 * This allocation will never require any additional
1003 		 * allocations for the snapshot inode.
1004 		 */
1005 		if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1006 		    checkblkavail)) == 0) {
1007 			pfatal("Could not allocate block for snapshot %ju\n",
1008 			    (intmax_t)ip.i_number);
1009 			continue;
1010 		}
1011 		if (debug)
1012 			prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1013 			    "blkno %ju setting in buffer",
1014 			    (intmax_t)ip.i_number, (intmax_t)lbn,
1015 			    (intmax_t)blkno);
1016 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1017 		IBLK_SET(snapbp, snapbp->b_index, blkno);
1018 		dirty(snapbp);
1019 		brelse(snapbp);
1020 		DIP_SET(dp, di_blocks,
1021 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1022 		inodirty(&ip);
1023 	}
1024 	return;
1025 }
1026 
1027 /*
1028  * Traverse an inode and check that its block count is correct
1029  * fixing it if necessary.
1030  */
1031 void
1032 check_blkcnt(struct inode *ip)
1033 {
1034 	struct inodesc idesc;
1035 	union dinode *dp;
1036 	ufs2_daddr_t ndb;
1037 	int j, ret, offset;
1038 
1039 	dp = ip->i_dp;
1040 	memset(&idesc, 0, sizeof(struct inodesc));
1041 	idesc.id_func = pass1check;
1042 	idesc.id_number = ip->i_number;
1043 	idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1044 	(void)ckinode(dp, &idesc);
1045 	if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1046 		ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1047 		for (j = 0; j < UFS_NXADDR; j++) {
1048 			if (--ndb == 0 &&
1049 			    (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1050 				idesc.id_numfrags = numfrags(&sblock,
1051 				    fragroundup(&sblock, offset));
1052 			else
1053 				idesc.id_numfrags = sblock.fs_frag;
1054 			if (dp->dp2.di_extb[j] == 0)
1055 				continue;
1056 			idesc.id_blkno = dp->dp2.di_extb[j];
1057 			ret = (*idesc.id_func)(&idesc);
1058 			if (ret & STOP)
1059 				break;
1060 		}
1061 	}
1062 	idesc.id_entryno *= btodb(sblock.fs_fsize);
1063 	if (DIP(dp, di_blocks) != idesc.id_entryno) {
1064 		if (!(sujrecovery && preen)) {
1065 			pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1066 			    (u_long)idesc.id_number,
1067 			    (uintmax_t)DIP(dp, di_blocks),
1068 			    (uintmax_t)idesc.id_entryno);
1069 			if (preen)
1070 				printf(" (CORRECTED)\n");
1071 			else if (reply("CORRECT") == 0)
1072 				return;
1073 		}
1074 		if (bkgrdflag == 0) {
1075 			DIP_SET(dp, di_blocks, idesc.id_entryno);
1076 			inodirty(ip);
1077 		} else {
1078 			cmd.value = idesc.id_number;
1079 			cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1080 			if (debug)
1081 				printf("adjblkcnt ino %ju amount %lld\n",
1082 				    (uintmax_t)cmd.value, (long long)cmd.size);
1083 			if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1084 			    &cmd, sizeof cmd) == -1)
1085 				rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1086 		}
1087 	}
1088 }
1089 
1090 void
1091 freeinodebuf(void)
1092 {
1093 	struct bufarea *bp;
1094 	int i;
1095 
1096 	/*
1097 	 * Flush old contents in case they have been updated.
1098 	 */
1099 	flush(fswritefd, &inobuf);
1100 	if (inobuf.b_un.b_buf != NULL)
1101 		free((char *)inobuf.b_un.b_buf);
1102 	inobuf.b_un.b_buf = NULL;
1103 	firstinum = lastinum = 0;
1104 	/*
1105 	 * Reload the snapshot inodes in case any of them changed.
1106 	 */
1107 	for (i = 0; i < snapcnt; i++) {
1108 		bp = snaplist[i].i_bp;
1109 		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1110 		    bp->b_size);
1111 	}
1112 }
1113 
1114 /*
1115  * Routines to maintain information about directory inodes.
1116  * This is built during the first pass and used during the
1117  * second and third passes.
1118  *
1119  * Enter inodes into the cache.
1120  */
1121 struct inoinfo *
1122 cacheino(union dinode *dp, ino_t inumber)
1123 {
1124 	struct inoinfo *inp;
1125 	int i, blks;
1126 
1127 	if (getinoinfo(inumber) != NULL)
1128 		pfatal("cacheino: duplicate entry for ino %jd\n",
1129 		    (intmax_t)inumber);
1130 	if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1131 		blks = UFS_NDADDR + UFS_NIADDR;
1132 	else if (DIP(dp, di_size) > 0)
1133 		blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1134 	else
1135 		blks = 1;
1136 	inp = (struct inoinfo *)
1137 		Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1138 	if (inp == NULL)
1139 		errx(EEXIT, "cannot increase directory list");
1140 	SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1141 	inp->i_flags = 0;
1142 	inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1143 	inp->i_dotdot = (ino_t)0;
1144 	inp->i_number = inumber;
1145 	inp->i_isize = DIP(dp, di_size);
1146 	inp->i_depth = DIP(dp, di_dirdepth);
1147 	inp->i_numblks = blks;
1148 	for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1149 		inp->i_blks[i] = DIP(dp, di_db[i]);
1150 	if (blks > UFS_NDADDR)
1151 		for (i = 0; i < UFS_NIADDR; i++)
1152 			inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1153 	if (inplast == listmax) {
1154 		listmax += 100;
1155 		inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1156 		    listmax, sizeof(struct inoinfo *));
1157 		if (inpsort == NULL)
1158 			errx(EEXIT, "cannot increase directory list");
1159 	}
1160 	inpsort[inplast++] = inp;
1161 	return (inp);
1162 }
1163 
1164 /*
1165  * Look up an inode cache structure.
1166  */
1167 struct inoinfo *
1168 getinoinfo(ino_t inumber)
1169 {
1170 	struct inoinfo *inp;
1171 
1172 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1173 		if (inp->i_number != inumber)
1174 			continue;
1175 		return (inp);
1176 	}
1177 	return (NULL);
1178 }
1179 
1180 /*
1181  * Remove an entry from the inode cache and disk-order sorted list.
1182  * Return 0 on success and 1 on failure.
1183  */
1184 int
1185 removecachedino(ino_t inumber)
1186 {
1187 	struct inoinfo *inp, **inpp;
1188 	char *listtype;
1189 
1190 	listtype = "hash";
1191 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1192 		if (inp->i_number != inumber)
1193 			continue;
1194 		SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1195 		for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1196 			if (*inpp != inp)
1197 				continue;
1198 			*inpp = inpsort[inplast - 1];
1199 			inplast--;
1200 			free(inp);
1201 			return (0);
1202 		}
1203 		listtype = "sort";
1204 		break;
1205 	}
1206 	pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1207 	    (intmax_t)inumber, listtype);
1208 	return (1);
1209 }
1210 
1211 /*
1212  * Clean up all the inode cache structure.
1213  */
1214 void
1215 inocleanup(void)
1216 {
1217 	struct inoinfo **inpp;
1218 
1219 	if (inphash == NULL)
1220 		return;
1221 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1222 		free((char *)(*inpp));
1223 	free((char *)inphash);
1224 	inphash = NULL;
1225 	free((char *)inpsort);
1226 	inpsort = NULL;
1227 }
1228 
1229 void
1230 inodirty(struct inode *ip)
1231 {
1232 
1233 	if (sblock.fs_magic == FS_UFS2_MAGIC)
1234 		ffs_update_dinode_ckhash(&sblock,
1235 		    (struct ufs2_dinode *)ip->i_dp);
1236 	dirty(ip->i_bp);
1237 }
1238 
1239 void
1240 clri(struct inodesc *idesc, const char *type, int flag)
1241 {
1242 	union dinode *dp;
1243 	struct inode ip;
1244 
1245 	ginode(idesc->id_number, &ip);
1246 	dp = ip.i_dp;
1247 	if (flag == 1) {
1248 		pwarn("%s %s", type,
1249 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1250 		prtinode(&ip);
1251 		printf("\n");
1252 	}
1253 	if (preen || reply("CLEAR") == 1) {
1254 		if (preen)
1255 			printf(" (CLEARED)\n");
1256 		n_files--;
1257 		if (bkgrdflag == 0) {
1258 			if (idesc->id_type == SNAP) {
1259 				snapremove(idesc->id_number);
1260 				idesc->id_type = ADDR;
1261 			}
1262 			(void)ckinode(dp, idesc);
1263 			inoinfo(idesc->id_number)->ino_state = USTATE;
1264 			clearinode(dp);
1265 			inodirty(&ip);
1266 		} else {
1267 			cmd.value = idesc->id_number;
1268 			cmd.size = -DIP(dp, di_nlink);
1269 			if (debug)
1270 				printf("adjrefcnt ino %ld amt %lld\n",
1271 				    (long)cmd.value, (long long)cmd.size);
1272 			if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1273 			    &cmd, sizeof cmd) == -1)
1274 				rwerror("ADJUST INODE", cmd.value);
1275 		}
1276 	}
1277 	irelse(&ip);
1278 }
1279 
1280 int
1281 findname(struct inodesc *idesc)
1282 {
1283 	struct direct *dirp = idesc->id_dirp;
1284 
1285 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1286 		idesc->id_entryno++;
1287 		return (KEEPON);
1288 	}
1289 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1290 	return (STOP|FOUND);
1291 }
1292 
1293 int
1294 findino(struct inodesc *idesc)
1295 {
1296 	struct direct *dirp = idesc->id_dirp;
1297 
1298 	if (dirp->d_ino == 0)
1299 		return (KEEPON);
1300 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1301 	    dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1302 		idesc->id_parent = dirp->d_ino;
1303 		return (STOP|FOUND);
1304 	}
1305 	return (KEEPON);
1306 }
1307 
1308 int
1309 clearentry(struct inodesc *idesc)
1310 {
1311 	struct direct *dirp = idesc->id_dirp;
1312 
1313 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1314 		idesc->id_entryno++;
1315 		return (KEEPON);
1316 	}
1317 	dirp->d_ino = 0;
1318 	return (STOP|FOUND|ALTERED);
1319 }
1320 
1321 void
1322 prtinode(struct inode *ip)
1323 {
1324 	char *p;
1325 	union dinode *dp;
1326 	struct passwd *pw;
1327 	time_t t;
1328 
1329 	dp = ip->i_dp;
1330 	printf(" I=%lu ", (u_long)ip->i_number);
1331 	if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1332 		return;
1333 	printf(" OWNER=");
1334 	if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1335 		printf("%s ", pw->pw_name);
1336 	else
1337 		printf("%u ", (unsigned)DIP(dp, di_uid));
1338 	printf("MODE=%o\n", DIP(dp, di_mode));
1339 	if (preen)
1340 		printf("%s: ", cdevname);
1341 	printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1342 	t = DIP(dp, di_mtime);
1343 	if ((p = ctime(&t)) != NULL)
1344 		printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1345 }
1346 
1347 void
1348 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1349 {
1350 
1351 	pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1352 	printf("\n");
1353 	switch (inoinfo(ino)->ino_state) {
1354 
1355 	case FSTATE:
1356 	case FZLINK:
1357 		inoinfo(ino)->ino_state = FCLEAR;
1358 		return;
1359 
1360 	case DSTATE:
1361 	case DZLINK:
1362 		inoinfo(ino)->ino_state = DCLEAR;
1363 		return;
1364 
1365 	case FCLEAR:
1366 	case DCLEAR:
1367 		return;
1368 
1369 	default:
1370 		errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1371 		/* NOTREACHED */
1372 	}
1373 }
1374 
1375 /*
1376  * allocate an unused inode
1377  */
1378 ino_t
1379 allocino(ino_t request, int type)
1380 {
1381 	ino_t ino;
1382 	struct inode ip;
1383 	union dinode *dp;
1384 	struct bufarea *cgbp;
1385 	struct cg *cgp;
1386 	int cg, anyino;
1387 
1388 	anyino = 0;
1389 	if (request == 0) {
1390 		request = UFS_ROOTINO;
1391 		anyino = 1;
1392 	} else if (inoinfo(request)->ino_state != USTATE)
1393 		return (0);
1394 retry:
1395 	for (ino = request; ino < maxino; ino++)
1396 		if (inoinfo(ino)->ino_state == USTATE)
1397 			break;
1398 	if (ino >= maxino)
1399 		return (0);
1400 	cg = ino_to_cg(&sblock, ino);
1401 	cgbp = cglookup(cg);
1402 	cgp = cgbp->b_un.b_cg;
1403 	if (!check_cgmagic(cg, cgbp)) {
1404 		if (anyino == 0)
1405 			return (0);
1406 		request = (cg + 1) * sblock.fs_ipg;
1407 		goto retry;
1408 	}
1409 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1410 	cgp->cg_cs.cs_nifree--;
1411 	switch (type & IFMT) {
1412 	case IFDIR:
1413 		inoinfo(ino)->ino_state = DSTATE;
1414 		cgp->cg_cs.cs_ndir++;
1415 		break;
1416 	case IFREG:
1417 	case IFLNK:
1418 		inoinfo(ino)->ino_state = FSTATE;
1419 		break;
1420 	default:
1421 		return (0);
1422 	}
1423 	cgdirty(cgbp);
1424 	ginode(ino, &ip);
1425 	dp = ip.i_dp;
1426 	memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1427 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1428 	DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1429 	    std_checkblkavail));
1430 	if (DIP(dp, di_db[0]) == 0) {
1431 		inoinfo(ino)->ino_state = USTATE;
1432 		inodirty(&ip);
1433 		irelse(&ip);
1434 		return (0);
1435 	}
1436 	DIP_SET(dp, di_mode, type);
1437 	DIP_SET(dp, di_atime, time(NULL));
1438 	DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1439 	DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1440 	DIP_SET(dp, di_size, sblock.fs_fsize);
1441 	DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1442 	n_files++;
1443 	inodirty(&ip);
1444 	irelse(&ip);
1445 	inoinfo(ino)->ino_type = IFTODT(type);
1446 	return (ino);
1447 }
1448 
1449 /*
1450  * deallocate an inode
1451  */
1452 void
1453 freeino(ino_t ino)
1454 {
1455 	struct inodesc idesc;
1456 	union dinode *dp;
1457 	struct inode ip;
1458 
1459 	memset(&idesc, 0, sizeof(struct inodesc));
1460 	idesc.id_type = ADDR;
1461 	idesc.id_func = freeblock;
1462 	idesc.id_number = ino;
1463 	ginode(ino, &ip);
1464 	dp = ip.i_dp;
1465 	(void)ckinode(dp, &idesc);
1466 	clearinode(dp);
1467 	inodirty(&ip);
1468 	irelse(&ip);
1469 	inoinfo(ino)->ino_state = USTATE;
1470 	n_files--;
1471 }
1472