xref: /freebsd/sbin/fsck_ffs/inode.c (revision fd5aaf2ea0178b03aa93c35245053247e5d3840c)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #ifndef lint
34 static const char sccsid[] = "@(#)inode.c	8.8 (Berkeley) 4/28/95";
35 #endif /* not lint */
36 #endif
37 #include <sys/cdefs.h>
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 #include <sys/stdint.h>
41 #include <sys/sysctl.h>
42 
43 #include <ufs/ufs/dinode.h>
44 #include <ufs/ufs/dir.h>
45 #include <ufs/ffs/fs.h>
46 
47 #include <err.h>
48 #include <pwd.h>
49 #include <string.h>
50 #include <time.h>
51 
52 #include "fsck.h"
53 
54 struct bufarea *icachebp;	/* inode cache buffer */
55 
56 static int iblock(struct inodesc *, off_t isize, int type);
57 static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t,
58     struct bufarea **);
59 static int snapclean(struct inodesc *idesc);
60 static void chkcopyonwrite(struct fs *, ufs2_daddr_t,
61     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long));
62 
63 int
64 ckinode(union dinode *dp, struct inodesc *idesc)
65 {
66 	off_t remsize, sizepb;
67 	int i, offset, ret;
68 	struct inode ip;
69 	union dinode dino;
70 	ufs2_daddr_t ndb;
71 	mode_t mode;
72 	char pathbuf[MAXPATHLEN + 1];
73 
74 	if (idesc->id_fix != IGNORE)
75 		idesc->id_fix = DONTKNOW;
76 	idesc->id_dp = dp;
77 	idesc->id_lbn = -1;
78 	idesc->id_lballoc = -1;
79 	idesc->id_level = 0;
80 	idesc->id_entryno = 0;
81 	idesc->id_filesize = DIP(dp, di_size);
82 	mode = DIP(dp, di_mode) & IFMT;
83 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
84 	    DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen))
85 		return (KEEPON);
86 	if (sblock.fs_magic == FS_UFS1_MAGIC)
87 		dino.dp1 = dp->dp1;
88 	else
89 		dino.dp2 = dp->dp2;
90 	if (DIP(&dino, di_size) < 0) {
91 		pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size));
92 		return (STOP);
93 	}
94 	ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize);
95 	for (i = 0; i < UFS_NDADDR; i++) {
96 		idesc->id_lbn++;
97 		if (--ndb == 0 &&
98 		    (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0)
99 			idesc->id_numfrags =
100 				numfrags(&sblock, fragroundup(&sblock, offset));
101 		else
102 			idesc->id_numfrags = sblock.fs_frag;
103 		if (DIP(&dino, di_db[i]) == 0) {
104 			if (idesc->id_type == DATA && ndb >= 0) {
105 				/* An empty block in a directory XXX */
106 				getpathname(pathbuf, idesc->id_number,
107 						idesc->id_number);
108 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
109 					pathbuf);
110 				if (reply("ADJUST LENGTH") == 1) {
111 					ginode(idesc->id_number, &ip);
112 					DIP_SET(ip.i_dp, di_size,
113 					    i * sblock.fs_bsize);
114 					printf(
115 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
116 					rerun = 1;
117 					inodirty(&ip);
118 					irelse(&ip);
119 				}
120 				return (STOP);
121 			}
122 			continue;
123 		}
124 		idesc->id_blkno = DIP(&dino, di_db[i]);
125 		if (idesc->id_type != DATA)
126 			ret = (*idesc->id_func)(idesc);
127 		else
128 			ret = dirscan(idesc);
129 		if (ret & STOP)
130 			return (ret);
131 	}
132 	idesc->id_numfrags = sblock.fs_frag;
133 	remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR;
134 	sizepb = sblock.fs_bsize;
135 	for (i = 0; i < UFS_NIADDR; i++) {
136 		sizepb *= NINDIR(&sblock);
137 		idesc->id_level = i + 1;
138 		if (DIP(&dino, di_ib[i])) {
139 			idesc->id_blkno = DIP(&dino, di_ib[i]);
140 			ret = iblock(idesc, remsize, BT_LEVEL1 + i);
141 			if (ret & STOP)
142 				return (ret);
143 		} else if (remsize > 0) {
144 			idesc->id_lbn += sizepb / sblock.fs_bsize;
145 			if (idesc->id_type == DATA) {
146 				/* An empty block in a directory XXX */
147 				getpathname(pathbuf, idesc->id_number,
148 						idesc->id_number);
149 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
150 					pathbuf);
151 				if (reply("ADJUST LENGTH") == 1) {
152 					ginode(idesc->id_number, &ip);
153 					DIP_SET(ip.i_dp, di_size,
154 					    DIP(ip.i_dp, di_size) - remsize);
155 					remsize = 0;
156 					printf(
157 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
158 					rerun = 1;
159 					inodirty(&ip);
160 					irelse(&ip);
161 					break;
162 				}
163 			}
164 		}
165 		remsize -= sizepb;
166 	}
167 	return (KEEPON);
168 }
169 
170 static int
171 iblock(struct inodesc *idesc, off_t isize, int type)
172 {
173 	struct inode ip;
174 	struct bufarea *bp;
175 	int i, n, (*func)(struct inodesc *), nif;
176 	off_t sizepb;
177 	char buf[BUFSIZ];
178 	char pathbuf[MAXPATHLEN + 1];
179 
180 	if (idesc->id_type != DATA) {
181 		func = idesc->id_func;
182 		if (((n = (*func)(idesc)) & KEEPON) == 0)
183 			return (n);
184 	} else
185 		func = dirscan;
186 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type);
187 	if (bp->b_errs != 0) {
188 		brelse(bp);
189 		return (SKIP);
190 	}
191 	idesc->id_bp = bp;
192 	idesc->id_level--;
193 	for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++)
194 		sizepb *= NINDIR(&sblock);
195 	if (howmany(isize, sizepb) > NINDIR(&sblock))
196 		nif = NINDIR(&sblock);
197 	else
198 		nif = howmany(isize, sizepb);
199 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
200 		for (i = nif; i < NINDIR(&sblock); i++) {
201 			if (IBLK(bp, i) == 0)
202 				continue;
203 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
204 			    (u_long)idesc->id_number);
205 			if (preen) {
206 				pfatal("%s", buf);
207 			} else if (dofix(idesc, buf)) {
208 				IBLK_SET(bp, i, 0);
209 				dirty(bp);
210 			}
211 		}
212 		flush(fswritefd, bp);
213 	}
214 	for (i = 0; i < nif; i++) {
215 		if (IBLK(bp, i)) {
216 			idesc->id_blkno = IBLK(bp, i);
217 			bp->b_index = i;
218 			if (idesc->id_level == 0) {
219 				idesc->id_lbn++;
220 				n = (*func)(idesc);
221 			} else {
222 				n = iblock(idesc, isize, type - 1);
223 				idesc->id_level++;
224 			}
225 			if (n & STOP) {
226 				brelse(bp);
227 				return (n);
228 			}
229 		} else {
230 			idesc->id_lbn += sizepb / sblock.fs_bsize;
231 			if (idesc->id_type == DATA && isize > 0) {
232 				/* An empty block in a directory XXX */
233 				getpathname(pathbuf, idesc->id_number,
234 						idesc->id_number);
235 				pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
236 					pathbuf);
237 				if (reply("ADJUST LENGTH") == 1) {
238 					ginode(idesc->id_number, &ip);
239 					DIP_SET(ip.i_dp, di_size,
240 					    DIP(ip.i_dp, di_size) - isize);
241 					isize = 0;
242 					printf(
243 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
244 					rerun = 1;
245 					inodirty(&ip);
246 					brelse(bp);
247 					return(STOP);
248 				}
249 			}
250 		}
251 		isize -= sizepb;
252 	}
253 	brelse(bp);
254 	return (KEEPON);
255 }
256 
257 /*
258  * Finds the disk block address at the specified lbn within the inode
259  * specified by dp.  This follows the whole tree and honors di_size and
260  * di_extsize so it is a true test of reachability.  The lbn may be
261  * negative if an extattr or indirect block is requested.
262  */
263 ufs2_daddr_t
264 ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags,
265     struct bufarea **bpp)
266 {
267 	ufs_lbn_t tmpval;
268 	ufs_lbn_t cur;
269 	ufs_lbn_t next;
270 	int i;
271 
272 	*frags = 0;
273 	if (bpp != NULL)
274 		*bpp = NULL;
275 	/*
276 	 * Handle extattr blocks first.
277 	 */
278 	if (lbn < 0 && lbn >= -UFS_NXADDR) {
279 		lbn = -1 - lbn;
280 		if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1))
281 			return (0);
282 		*frags = numfrags(&sblock,
283 		    sblksize(&sblock, dp->dp2.di_extsize, lbn));
284 		return (dp->dp2.di_extb[lbn]);
285 	}
286 	/*
287 	 * Now direct and indirect.
288 	 */
289 	if (DIP(dp, di_mode) == IFLNK &&
290 	    DIP(dp, di_size) < sblock.fs_maxsymlinklen)
291 		return (0);
292 	if (lbn >= 0 && lbn < UFS_NDADDR) {
293 		*frags = numfrags(&sblock,
294 		    sblksize(&sblock, DIP(dp, di_size), lbn));
295 		return (DIP(dp, di_db[lbn]));
296 	}
297 	*frags = sblock.fs_frag;
298 
299 	for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR;
300 	    i++, tmpval *= NINDIR(&sblock), cur = next) {
301 		next = cur + tmpval;
302 		if (lbn == -cur - i)
303 			return (DIP(dp, di_ib[i]));
304 		/*
305 		 * Determine whether the lbn in question is within this tree.
306 		 */
307 		if (lbn < 0 && -lbn >= next)
308 			continue;
309 		if (lbn > 0 && lbn >= next)
310 			continue;
311 		if (DIP(dp, di_ib[i]) == 0)
312 			return (0);
313 		return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn,
314 		    bpp));
315 	}
316 	pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino);
317 	return (0);
318 }
319 
320 /*
321  * Fetch an indirect block to find the block at a given lbn.  The lbn
322  * may be negative to fetch a specific indirect block pointer or positive
323  * to fetch a specific block.
324  */
325 static ufs2_daddr_t
326 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn,
327     struct bufarea **bpp)
328 {
329 	struct bufarea *bp;
330 	ufs_lbn_t lbnadd;
331 	ufs_lbn_t base;
332 	int i, level;
333 
334 	level = lbn_level(cur);
335 	if (level == -1)
336 		pfatal("Invalid indir lbn %jd in ino %ju\n",
337 		    lbn, (uintmax_t)ino);
338 	if (level == 0 && lbn < 0)
339 		pfatal("Invalid lbn %jd in ino %ju\n",
340 		    lbn, (uintmax_t)ino);
341 	lbnadd = 1;
342 	base = -(cur + level);
343 	for (i = level; i > 0; i--)
344 		lbnadd *= NINDIR(&sblock);
345 	if (lbn > 0)
346 		i = (lbn - base) / lbnadd;
347 	else
348 		i = (-lbn - base) / lbnadd;
349 	if (i < 0 || i >= NINDIR(&sblock)) {
350 		pfatal("Invalid indirect index %d produced by lbn %jd "
351 		    "in ino %ju\n", i, lbn, (uintmax_t)ino);
352 		return (0);
353 	}
354 	if (level == 0)
355 		cur = base + (i * lbnadd);
356 	else
357 		cur = -(base + (i * lbnadd)) - (level - 1);
358 	bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level);
359 	if (bp->b_errs != 0)
360 		return (0);
361 	blk = IBLK(bp, i);
362 	bp->b_index = i;
363 	if (cur == lbn || blk == 0) {
364 		if (bpp != NULL)
365 			*bpp = bp;
366 		else
367 			brelse(bp);
368 		return (blk);
369 	}
370 	brelse(bp);
371 	if (level == 0)
372 		pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn,
373 		    (uintmax_t)ino);
374 	return (indir_blkatoff(blk, ino, cur, lbn, bpp));
375 }
376 
377 /*
378  * Check that a block in a legal block number.
379  * Return 0 if in range, 1 if out of range.
380  */
381 int
382 chkrange(ufs2_daddr_t blk, int cnt)
383 {
384 	int c;
385 
386 	if (cnt <= 0 || blk <= 0 || blk >= maxfsblock ||
387 	    cnt > maxfsblock - blk) {
388 		if (debug)
389 			printf("out of range: blk %ld, offset %i, size %d\n",
390 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
391 		return (1);
392 	}
393 	if (cnt > sblock.fs_frag ||
394 	    fragnum(&sblock, blk) + cnt > sblock.fs_frag) {
395 		if (debug)
396 			printf("bad size: blk %ld, offset %i, size %d\n",
397 			    (long)blk, (int)fragnum(&sblock, blk), cnt);
398 		return (1);
399 	}
400 	c = dtog(&sblock, blk);
401 	if (blk < cgdmin(&sblock, c)) {
402 		if ((blk + cnt) > cgsblock(&sblock, c)) {
403 			if (debug) {
404 				printf("blk %ld < cgdmin %ld;",
405 				    (long)blk, (long)cgdmin(&sblock, c));
406 				printf(" blk + cnt %ld > cgsbase %ld\n",
407 				    (long)(blk + cnt),
408 				    (long)cgsblock(&sblock, c));
409 			}
410 			return (1);
411 		}
412 	} else {
413 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
414 			if (debug)  {
415 				printf("blk %ld >= cgdmin %ld;",
416 				    (long)blk, (long)cgdmin(&sblock, c));
417 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
418 				    (long)(blk + cnt), (long)sblock.fs_fpg);
419 			}
420 			return (1);
421 		}
422 	}
423 	return (0);
424 }
425 
426 /*
427  * General purpose interface for reading inodes.
428  *
429  * firstinum and lastinum track contents of getnextino() cache (below).
430  */
431 static ino_t firstinum, lastinum;
432 static struct bufarea inobuf;
433 
434 void
435 ginode(ino_t inumber, struct inode *ip)
436 {
437 	ufs2_daddr_t iblk;
438 	struct ufs2_dinode *dp;
439 
440 	if (inumber < UFS_ROOTINO || inumber >= maxino)
441 		errx(EEXIT, "bad inode number %ju to ginode",
442 		    (uintmax_t)inumber);
443 	ip->i_number = inumber;
444 	if (inumber >= firstinum && inumber < lastinum) {
445 		/* contents in getnextino() cache */
446 		ip->i_bp = &inobuf;
447 		inobuf.b_refcnt++;
448 		inobuf.b_index = firstinum;
449 	} else if (icachebp != NULL &&
450 	    inumber >= icachebp->b_index &&
451 	    inumber < icachebp->b_index + INOPB(&sblock)) {
452 		/* take an additional reference for the returned inode */
453 		icachebp->b_refcnt++;
454 		ip->i_bp = icachebp;
455 	} else {
456 		iblk = ino_to_fsba(&sblock, inumber);
457 		/* release our cache-hold reference on old icachebp */
458 		if (icachebp != NULL)
459 			brelse(icachebp);
460 		icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES);
461 		if (icachebp->b_errs != 0) {
462 			icachebp = NULL;
463 			ip->i_bp = NULL;
464 			ip->i_dp = &zino;
465 			return;
466 		}
467 		/* take a cache-hold reference on new icachebp */
468 		icachebp->b_refcnt++;
469 		icachebp->b_index = rounddown(inumber, INOPB(&sblock));
470 		ip->i_bp = icachebp;
471 	}
472 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
473 		ip->i_dp = (union dinode *)
474 		    &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index];
475 		return;
476 	}
477 	ip->i_dp = (union dinode *)
478 	    &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index];
479 	dp = (struct ufs2_dinode *)ip->i_dp;
480 	/* Do not check hash of inodes being created */
481 	if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) {
482 		pwarn("INODE CHECK-HASH FAILED");
483 		prtinode(ip);
484 		if (preen || reply("FIX") != 0) {
485 			if (preen)
486 				printf(" (FIXED)\n");
487 			ffs_update_dinode_ckhash(&sblock, dp);
488 			inodirty(ip);
489 		}
490 	}
491 }
492 
493 /*
494  * Release a held inode.
495  */
496 void
497 irelse(struct inode *ip)
498 {
499 
500 	/* Check for failed inode read */
501 	if (ip->i_bp == NULL)
502 		return;
503 	if (debug && sblock.fs_magic == FS_UFS2_MAGIC &&
504 	    ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) {
505 		pwarn("irelse: releasing inode with bad check-hash");
506 		prtinode(ip);
507 	}
508 	if (ip->i_bp->b_refcnt <= 0)
509 		pfatal("irelse: releasing unreferenced ino %ju\n",
510 		    (uintmax_t) ip->i_number);
511 	brelse(ip->i_bp);
512 }
513 
514 /*
515  * Special purpose version of ginode used to optimize first pass
516  * over all the inodes in numerical order.
517  */
518 static ino_t nextinum, lastvalidinum;
519 static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
520 
521 union dinode *
522 getnextinode(ino_t inumber, int rebuiltcg)
523 {
524 	int j;
525 	long size;
526 	mode_t mode;
527 	ufs2_daddr_t ndb, blk;
528 	union dinode *dp;
529 	struct inode ip;
530 	static caddr_t nextinop;
531 
532 	if (inumber != nextinum++ || inumber > lastvalidinum)
533 		errx(EEXIT, "bad inode number %ju to nextinode",
534 		    (uintmax_t)inumber);
535 	if (inumber >= lastinum) {
536 		readcount++;
537 		firstinum = lastinum;
538 		blk = ino_to_fsba(&sblock, lastinum);
539 		if (readcount % readpercg == 0) {
540 			size = partialsize;
541 			lastinum += partialcnt;
542 		} else {
543 			size = inobufsize;
544 			lastinum += fullcnt;
545 		}
546 		/*
547 		 * Flush old contents in case they have been updated.
548 		 * If getblk encounters an error, it will already have zeroed
549 		 * out the buffer, so we do not need to do so here.
550 		 */
551 		if (inobuf.b_refcnt != 0)
552 			pfatal("Non-zero getnextinode() ref count %d\n",
553 			    inobuf.b_refcnt);
554 		flush(fswritefd, &inobuf);
555 		getblk(&inobuf, blk, size);
556 		nextinop = inobuf.b_un.b_buf;
557 	}
558 	dp = (union dinode *)nextinop;
559 	if (sblock.fs_magic == FS_UFS1_MAGIC)
560 		nextinop += sizeof(struct ufs1_dinode);
561 	else
562 		nextinop += sizeof(struct ufs2_dinode);
563 	if ((ckhashadd & CK_INODE) != 0) {
564 		ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
565 		dirty(&inobuf);
566 	}
567 	if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
568 		pwarn("INODE CHECK-HASH FAILED");
569 		ip.i_bp = NULL;
570 		ip.i_dp = dp;
571 		ip.i_number = inumber;
572 		prtinode(&ip);
573 		if (preen || reply("FIX") != 0) {
574 			if (preen)
575 				printf(" (FIXED)\n");
576 			ffs_update_dinode_ckhash(&sblock,
577 			    (struct ufs2_dinode *)dp);
578 			dirty(&inobuf);
579 		}
580 	}
581 	if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) {
582 		/*
583 		 * Try to determine if we have reached the end of the
584 		 * allocated inodes.
585 		 */
586 		mode = DIP(dp, di_mode) & IFMT;
587 		if (mode == 0) {
588 			if (memcmp(dp->dp2.di_db, zino.dp2.di_db,
589 				UFS_NDADDR * sizeof(ufs2_daddr_t)) ||
590 			      memcmp(dp->dp2.di_ib, zino.dp2.di_ib,
591 				UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
592 			      dp->dp2.di_mode || dp->dp2.di_size)
593 				return (NULL);
594 			return (dp);
595 		}
596 		if (!ftypeok(dp))
597 			return (NULL);
598 		ndb = howmany(DIP(dp, di_size), sblock.fs_bsize);
599 		if (ndb < 0)
600 			return (NULL);
601 		if (mode == IFBLK || mode == IFCHR)
602 			ndb++;
603 		if (mode == IFLNK) {
604 			/*
605 			 * Fake ndb value so direct/indirect block checks below
606 			 * will detect any garbage after symlink string.
607 			 */
608 			if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) {
609 				ndb = howmany(DIP(dp, di_size),
610 				    sizeof(ufs2_daddr_t));
611 				if (ndb > UFS_NDADDR) {
612 					j = ndb - UFS_NDADDR;
613 					for (ndb = 1; j > 1; j--)
614 						ndb *= NINDIR(&sblock);
615 					ndb += UFS_NDADDR;
616 				}
617 			}
618 		}
619 		for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++)
620 			if (DIP(dp, di_db[j]) != 0)
621 				return (NULL);
622 		for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++)
623 			ndb /= NINDIR(&sblock);
624 		for (; j < UFS_NIADDR; j++)
625 			if (DIP(dp, di_ib[j]) != 0)
626 				return (NULL);
627 	}
628 	return (dp);
629 }
630 
631 void
632 setinodebuf(int cg, ino_t inosused)
633 {
634 	ino_t inum;
635 
636 	inum = cg * sblock.fs_ipg;
637 	lastvalidinum = inum + inosused - 1;
638 	nextinum = inum;
639 	lastinum = inum;
640 	readcount = 0;
641 	/* Flush old contents in case they have been updated */
642 	flush(fswritefd, &inobuf);
643 	inobuf.b_bno = 0;
644 	if (inobuf.b_un.b_buf == NULL) {
645 		inobufsize = blkroundup(&sblock,
646 		    MAX(INOBUFSIZE, sblock.fs_bsize));
647 		initbarea(&inobuf, BT_INODES);
648 		if ((inobuf.b_un.b_buf = Balloc((unsigned)inobufsize)) == NULL)
649 			errx(EEXIT, "cannot allocate space for inode buffer");
650 	}
651 	fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
652 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode));
653 	readpercg = inosused / fullcnt;
654 	partialcnt = inosused % fullcnt;
655 	partialsize = fragroundup(&sblock,
656 	    partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ?
657 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
658 	if (partialcnt != 0) {
659 		readpercg++;
660 	} else {
661 		partialcnt = fullcnt;
662 		partialsize = inobufsize;
663 	}
664 }
665 
666 int
667 freeblock(struct inodesc *idesc)
668 {
669 	struct dups *dlp;
670 	struct bufarea *cgbp;
671 	struct cg *cgp;
672 	ufs2_daddr_t blkno;
673 	long size, nfrags;
674 
675 	blkno = idesc->id_blkno;
676 	if (idesc->id_type == SNAP) {
677 		pfatal("clearing a snapshot dinode\n");
678 		return (STOP);
679 	}
680 	size = lfragtosize(&sblock, idesc->id_numfrags);
681 	if (snapblkfree(&sblock, blkno, size, idesc->id_number,
682 	    std_checkblkavail))
683 		return (KEEPON);
684 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
685 		if (chkrange(blkno, 1)) {
686 			return (SKIP);
687 		} else if (testbmap(blkno)) {
688 			for (dlp = duplist; dlp; dlp = dlp->next) {
689 				if (dlp->dup != blkno)
690 					continue;
691 				dlp->dup = duplist->dup;
692 				dlp = duplist;
693 				duplist = duplist->next;
694 				free((char *)dlp);
695 				break;
696 			}
697 			if (dlp == NULL) {
698 				clrbmap(blkno);
699 				n_blks--;
700 			}
701 		}
702 	}
703 	/*
704 	 * If all successfully returned, account for them.
705 	 */
706 	if (nfrags == 0) {
707 		cgbp = cglookup(dtog(&sblock, idesc->id_blkno));
708 		cgp = cgbp->b_un.b_cg;
709 		if (idesc->id_numfrags == sblock.fs_frag)
710 			cgp->cg_cs.cs_nbfree++;
711 		else
712 			cgp->cg_cs.cs_nffree += idesc->id_numfrags;
713 		cgdirty(cgbp);
714 	}
715 	return (KEEPON);
716 }
717 
718 /*
719  * Prepare a snapshot file for being removed.
720  */
721 void
722 snapremove(ino_t inum)
723 {
724 	struct inodesc idesc;
725 	struct inode ip;
726 	int i;
727 
728 	for (i = 0; i < snapcnt; i++)
729 		if (snaplist[i].i_number == inum)
730 			break;
731 	if (i == snapcnt)
732 		ginode(inum, &ip);
733 	else
734 		ip = snaplist[i];
735 	if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) {
736 		printf("snapremove: inode %jd is not a snapshot\n",
737 		    (intmax_t)inum);
738 		if (i == snapcnt)
739 			irelse(&ip);
740 		return;
741 	}
742 	if (debug)
743 		printf("snapremove: remove %sactive snapshot %jd\n",
744 		    i == snapcnt ? "in" : "", (intmax_t)inum);
745 	/*
746 	 * If on active snapshot list, remove it.
747 	 */
748 	if (i < snapcnt) {
749 		for (i++; i < FSMAXSNAP; i++) {
750 			if (sblock.fs_snapinum[i] == 0)
751 				break;
752 			snaplist[i - 1] = snaplist[i];
753 			sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i];
754 		}
755 		sblock.fs_snapinum[i - 1] = 0;
756 		bzero(&snaplist[i - 1], sizeof(struct inode));
757 		snapcnt--;
758 	}
759 	memset(&idesc, 0, sizeof(struct inodesc));
760 	idesc.id_type = SNAP;
761 	idesc.id_func = snapclean;
762 	idesc.id_number = inum;
763 	(void)ckinode(ip.i_dp, &idesc);
764 	DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT);
765 	inodirty(&ip);
766 	irelse(&ip);
767 }
768 
769 static int
770 snapclean(struct inodesc *idesc)
771 {
772 	ufs2_daddr_t blkno;
773 	struct bufarea *bp;
774 	union dinode *dp;
775 
776 	blkno = idesc->id_blkno;
777 	if (blkno == 0)
778 		return (KEEPON);
779 
780 	dp = idesc->id_dp;
781 	if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) {
782 		if (idesc->id_lbn < UFS_NDADDR) {
783 			DIP_SET(dp, di_db[idesc->id_lbn], 0);
784 		} else {
785 			bp = idesc->id_bp;
786 			IBLK_SET(bp, bp->b_index, 0);
787 			dirty(bp);
788 		}
789 	}
790 	return (KEEPON);
791 }
792 
793 /*
794  * Notification that a block is being freed. Return zero if the free
795  * should be allowed to proceed. Return non-zero if the snapshot file
796  * wants to claim the block. The block will be claimed if it is an
797  * uncopied part of one of the snapshots. It will be freed if it is
798  * either a BLK_NOCOPY or has already been copied in all of the snapshots.
799  * If a fragment is being freed, then all snapshots that care about
800  * it must make a copy since a snapshot file can only claim full sized
801  * blocks. Note that if more than one snapshot file maps the block,
802  * we can pick one at random to claim it. Since none of the snapshots
803  * can change, we are assurred that they will all see the same unmodified
804  * image. When deleting a snapshot file (see ino_trunc above), we
805  * must push any of these claimed blocks to one of the other snapshots
806  * that maps it. These claimed blocks are easily identified as they will
807  * have a block number equal to their logical block number within the
808  * snapshot. A copied block can never have this property because they
809  * must always have been allocated from a BLK_NOCOPY location.
810  */
811 int
812 snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum,
813 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
814 {
815 	union dinode *dp;
816 	struct inode ip;
817 	struct bufarea *snapbp;
818 	ufs_lbn_t lbn;
819 	ufs2_daddr_t blkno, relblkno;
820 	int i, frags, claimedblk, copydone;
821 
822 	/* If no snapshots, nothing to do */
823 	if (snapcnt == 0)
824 		return (0);
825 	if (debug)
826 		printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n",
827 		    (intmax_t)inum, (intmax_t)bno, (intmax_t)size);
828 	relblkno = blknum(fs, bno);
829 	lbn = fragstoblks(fs, relblkno);
830 	/* Direct blocks are always pre-copied */
831 	if (lbn < UFS_NDADDR)
832 		return (0);
833 	copydone = 0;
834 	claimedblk = 0;
835 	for (i = 0; i < snapcnt; i++) {
836 		/*
837 		 * Lookup block being freed.
838 		 */
839 		ip = snaplist[i];
840 		dp = ip.i_dp;
841 		blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number,
842 		    lbn, &frags, &snapbp);
843 		/*
844 		 * Check to see if block needs to be copied.
845 		 */
846 		if (blkno == 0) {
847 			/*
848 			 * A block that we map is being freed. If it has not
849 			 * been claimed yet, we will claim or copy it (below).
850 			 */
851 			claimedblk = 1;
852 		} else if (blkno == BLK_SNAP) {
853 			/*
854 			 * No previous snapshot claimed the block,
855 			 * so it will be freed and become a BLK_NOCOPY
856 			 * (don't care) for us.
857 			 */
858 			if (claimedblk)
859 				pfatal("snapblkfree: inconsistent block type");
860 			IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY);
861 			dirty(snapbp);
862 			brelse(snapbp);
863 			continue;
864 		} else /* BLK_NOCOPY or default */ {
865 			/*
866 			 * If the snapshot has already copied the block
867 			 * (default), or does not care about the block,
868 			 * it is not needed.
869 			 */
870 			brelse(snapbp);
871 			continue;
872 		}
873 		/*
874 		 * If this is a full size block, we will just grab it
875 		 * and assign it to the snapshot inode. Otherwise we
876 		 * will proceed to copy it. See explanation for this
877 		 * routine as to why only a single snapshot needs to
878 		 * claim this block.
879 		 */
880 		if (size == fs->fs_bsize) {
881 			if (debug)
882 				printf("Grabonremove snapshot %ju lbn %jd "
883 				    "from inum %ju\n", (intmax_t)ip.i_number,
884 				    (intmax_t)lbn, (uintmax_t)inum);
885 			IBLK_SET(snapbp, snapbp->b_index, relblkno);
886 			dirty(snapbp);
887 			brelse(snapbp);
888 			DIP_SET(dp, di_blocks,
889 			    DIP(dp, di_blocks) + btodb(size));
890 			inodirty(&ip);
891 			return (1);
892 		}
893 
894 		/* First time through, read the contents of the old block. */
895 		if (copydone == 0) {
896 			copydone = 1;
897 			if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno),
898 			    fs->fs_bsize) != 0) {
899 				pfatal("Could not read snapshot %ju block "
900 				    "%jd\n", (intmax_t)ip.i_number,
901 				    (intmax_t)relblkno);
902 				continue;
903 			}
904 		}
905 		/*
906 		 * This allocation will never require any additional
907 		 * allocations for the snapshot inode.
908 		 */
909 		blkno = allocblk(dtog(fs, relblkno), fs->fs_frag,
910 		    checkblkavail);
911 		if (blkno == 0) {
912 			pfatal("Could not allocate block for snapshot %ju\n",
913 			    (intmax_t)ip.i_number);
914 			continue;
915 		}
916 		if (debug)
917 			printf("Copyonremove: snapino %jd lbn %jd for inum %ju "
918 			    "size %ld new blkno %jd\n", (intmax_t)ip.i_number,
919 			    (intmax_t)lbn, (uintmax_t)inum, size,
920 			    (intmax_t)blkno);
921 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
922 		IBLK_SET(snapbp, snapbp->b_index, blkno);
923 		dirty(snapbp);
924 		brelse(snapbp);
925 		DIP_SET(dp, di_blocks,
926 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
927 		inodirty(&ip);
928 	}
929 	return (0);
930 }
931 
932 /*
933  * Notification that a block is being written. Return if the block
934  * is part of a snapshot as snapshots never track other snapshots.
935  * The block will be copied in all of the snapshots that are tracking
936  * it and have not yet copied it. Some buffers may hold more than one
937  * block. Here we need to check each block in the buffer.
938  */
939 void
940 copyonwrite(struct fs *fs, struct bufarea *bp,
941 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
942 {
943 	ufs2_daddr_t copyblkno;
944 	long i, numblks;
945 
946 	/* If no snapshots, nothing to do. */
947 	if (snapcnt == 0)
948 		return;
949 	numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize;
950 	if (debug)
951 		prtbuf(bp, "copyonwrite: checking %jd block%s in buffer",
952 		    (intmax_t)numblks, numblks > 1 ? "s" : "");
953 	copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno));
954 	for (i = 0; i < numblks; i++) {
955 		chkcopyonwrite(fs, copyblkno, checkblkavail);
956 		copyblkno += fs->fs_frag;
957 	}
958 }
959 
960 static void
961 chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno,
962 	ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
963 {
964 	struct inode ip;
965 	union dinode *dp;
966 	struct bufarea *snapbp;
967 	ufs2_daddr_t blkno;
968 	int i, frags, copydone;
969 	ufs_lbn_t lbn;
970 
971 	lbn = fragstoblks(fs, copyblkno);
972 	/* Direct blocks are always pre-copied */
973 	if (lbn < UFS_NDADDR)
974 		return;
975 	copydone = 0;
976 	for (i = 0; i < snapcnt; i++) {
977 		/*
978 		 * Lookup block being freed.
979 		 */
980 		ip = snaplist[i];
981 		dp = ip.i_dp;
982 		blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp);
983 		/*
984 		 * Check to see if block needs to be copied.
985 		 */
986 		if (blkno != 0) {
987 			/*
988 			 * A block that we have already copied or don't track.
989 			 */
990 			brelse(snapbp);
991 			continue;
992 		}
993 		/* First time through, read the contents of the old block. */
994 		if (copydone == 0) {
995 			copydone = 1;
996 			if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno),
997 			    fs->fs_bsize) != 0) {
998 				pfatal("Could not read snapshot %ju block "
999 				    "%jd\n", (intmax_t)ip.i_number,
1000 				    (intmax_t)copyblkno);
1001 				continue;
1002 			}
1003 		}
1004 		/*
1005 		 * This allocation will never require any additional
1006 		 * allocations for the snapshot inode.
1007 		 */
1008 		if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag,
1009 		    checkblkavail)) == 0) {
1010 			pfatal("Could not allocate block for snapshot %ju\n",
1011 			    (intmax_t)ip.i_number);
1012 			continue;
1013 		}
1014 		if (debug)
1015 			prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using "
1016 			    "blkno %ju setting in buffer",
1017 			    (intmax_t)ip.i_number, (intmax_t)lbn,
1018 			    (intmax_t)blkno);
1019 		blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize);
1020 		IBLK_SET(snapbp, snapbp->b_index, blkno);
1021 		dirty(snapbp);
1022 		brelse(snapbp);
1023 		DIP_SET(dp, di_blocks,
1024 		    DIP(dp, di_blocks) + btodb(fs->fs_bsize));
1025 		inodirty(&ip);
1026 	}
1027 	return;
1028 }
1029 
1030 /*
1031  * Traverse an inode and check that its block count is correct
1032  * fixing it if necessary.
1033  */
1034 void
1035 check_blkcnt(struct inode *ip)
1036 {
1037 	struct inodesc idesc;
1038 	union dinode *dp;
1039 	ufs2_daddr_t ndb;
1040 	int j, ret, offset;
1041 
1042 	dp = ip->i_dp;
1043 	memset(&idesc, 0, sizeof(struct inodesc));
1044 	idesc.id_func = pass1check;
1045 	idesc.id_number = ip->i_number;
1046 	idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP;
1047 	(void)ckinode(dp, &idesc);
1048 	if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
1049 		ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize);
1050 		for (j = 0; j < UFS_NXADDR; j++) {
1051 			if (--ndb == 0 &&
1052 			    (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0)
1053 				idesc.id_numfrags = numfrags(&sblock,
1054 				    fragroundup(&sblock, offset));
1055 			else
1056 				idesc.id_numfrags = sblock.fs_frag;
1057 			if (dp->dp2.di_extb[j] == 0)
1058 				continue;
1059 			idesc.id_blkno = dp->dp2.di_extb[j];
1060 			ret = (*idesc.id_func)(&idesc);
1061 			if (ret & STOP)
1062 				break;
1063 		}
1064 	}
1065 	idesc.id_entryno *= btodb(sblock.fs_fsize);
1066 	if (DIP(dp, di_blocks) != idesc.id_entryno) {
1067 		if (!(sujrecovery && preen)) {
1068 			pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)",
1069 			    (u_long)idesc.id_number,
1070 			    (uintmax_t)DIP(dp, di_blocks),
1071 			    (uintmax_t)idesc.id_entryno);
1072 			if (preen)
1073 				printf(" (CORRECTED)\n");
1074 			else if (reply("CORRECT") == 0)
1075 				return;
1076 		}
1077 		if (bkgrdflag == 0) {
1078 			DIP_SET(dp, di_blocks, idesc.id_entryno);
1079 			inodirty(ip);
1080 		} else {
1081 			cmd.value = idesc.id_number;
1082 			cmd.size = idesc.id_entryno - DIP(dp, di_blocks);
1083 			if (debug)
1084 				printf("adjblkcnt ino %ju amount %lld\n",
1085 				    (uintmax_t)cmd.value, (long long)cmd.size);
1086 			if (sysctl(adjblkcnt, MIBSIZE, 0, 0,
1087 			    &cmd, sizeof cmd) == -1)
1088 				rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
1089 		}
1090 	}
1091 }
1092 
1093 void
1094 freeinodebuf(void)
1095 {
1096 	struct bufarea *bp;
1097 	int i;
1098 
1099 	/*
1100 	 * Flush old contents in case they have been updated.
1101 	 */
1102 	flush(fswritefd, &inobuf);
1103 	if (inobuf.b_un.b_buf != NULL)
1104 		free((char *)inobuf.b_un.b_buf);
1105 	inobuf.b_un.b_buf = NULL;
1106 	firstinum = lastinum = 0;
1107 	/*
1108 	 * Reload the snapshot inodes in case any of them changed.
1109 	 */
1110 	for (i = 0; i < snapcnt; i++) {
1111 		bp = snaplist[i].i_bp;
1112 		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno,
1113 		    bp->b_size);
1114 	}
1115 }
1116 
1117 /*
1118  * Routines to maintain information about directory inodes.
1119  * This is built during the first pass and used during the
1120  * second and third passes.
1121  *
1122  * Enter inodes into the cache.
1123  */
1124 struct inoinfo *
1125 cacheino(union dinode *dp, ino_t inumber)
1126 {
1127 	struct inoinfo *inp;
1128 	int i, blks;
1129 
1130 	if (getinoinfo(inumber) != NULL)
1131 		pfatal("cacheino: duplicate entry for ino %jd\n",
1132 		    (intmax_t)inumber);
1133 	if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR)
1134 		blks = UFS_NDADDR + UFS_NIADDR;
1135 	else if (DIP(dp, di_size) > 0)
1136 		blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
1137 	else
1138 		blks = 1;
1139 	inp = (struct inoinfo *)
1140 		Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
1141 	if (inp == NULL)
1142 		errx(EEXIT, "cannot increase directory list");
1143 	SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash);
1144 	inp->i_flags = 0;
1145 	inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0;
1146 	inp->i_dotdot = (ino_t)0;
1147 	inp->i_number = inumber;
1148 	inp->i_isize = DIP(dp, di_size);
1149 	inp->i_depth = DIP(dp, di_dirdepth);
1150 	inp->i_numblks = blks;
1151 	for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
1152 		inp->i_blks[i] = DIP(dp, di_db[i]);
1153 	if (blks > UFS_NDADDR)
1154 		for (i = 0; i < UFS_NIADDR; i++)
1155 			inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]);
1156 	if (inplast == listmax) {
1157 		listmax += 100;
1158 		inpsort = (struct inoinfo **)reallocarray((char *)inpsort,
1159 		    listmax, sizeof(struct inoinfo *));
1160 		if (inpsort == NULL)
1161 			errx(EEXIT, "cannot increase directory list");
1162 	}
1163 	inpsort[inplast++] = inp;
1164 	return (inp);
1165 }
1166 
1167 /*
1168  * Look up an inode cache structure.
1169  */
1170 struct inoinfo *
1171 getinoinfo(ino_t inumber)
1172 {
1173 	struct inoinfo *inp;
1174 
1175 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1176 		if (inp->i_number != inumber)
1177 			continue;
1178 		return (inp);
1179 	}
1180 	return (NULL);
1181 }
1182 
1183 /*
1184  * Remove an entry from the inode cache and disk-order sorted list.
1185  * Return 0 on success and 1 on failure.
1186  */
1187 int
1188 removecachedino(ino_t inumber)
1189 {
1190 	struct inoinfo *inp, **inpp;
1191 	char *listtype;
1192 
1193 	listtype = "hash";
1194 	SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) {
1195 		if (inp->i_number != inumber)
1196 			continue;
1197 		SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash);
1198 		for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
1199 			if (*inpp != inp)
1200 				continue;
1201 			*inpp = inpsort[inplast - 1];
1202 			inplast--;
1203 			free(inp);
1204 			return (0);
1205 		}
1206 		listtype = "sort";
1207 		break;
1208 	}
1209 	pfatal("removecachedino: entry for ino %jd not found on %s list\n",
1210 	    (intmax_t)inumber, listtype);
1211 	return (1);
1212 }
1213 
1214 /*
1215  * Clean up all the inode cache structure.
1216  */
1217 void
1218 inocleanup(void)
1219 {
1220 	struct inoinfo **inpp;
1221 
1222 	if (inphash == NULL)
1223 		return;
1224 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
1225 		free((char *)(*inpp));
1226 	free((char *)inphash);
1227 	inphash = NULL;
1228 	free((char *)inpsort);
1229 	inpsort = NULL;
1230 }
1231 
1232 void
1233 inodirty(struct inode *ip)
1234 {
1235 
1236 	if (sblock.fs_magic == FS_UFS2_MAGIC)
1237 		ffs_update_dinode_ckhash(&sblock,
1238 		    (struct ufs2_dinode *)ip->i_dp);
1239 	dirty(ip->i_bp);
1240 }
1241 
1242 void
1243 clri(struct inodesc *idesc, const char *type, int flag)
1244 {
1245 	union dinode *dp;
1246 	struct inode ip;
1247 
1248 	ginode(idesc->id_number, &ip);
1249 	dp = ip.i_dp;
1250 	if (flag == 1) {
1251 		pwarn("%s %s", type,
1252 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
1253 		prtinode(&ip);
1254 		printf("\n");
1255 	}
1256 	if (preen || reply("CLEAR") == 1) {
1257 		if (preen)
1258 			printf(" (CLEARED)\n");
1259 		n_files--;
1260 		if (bkgrdflag == 0) {
1261 			if (idesc->id_type == SNAP) {
1262 				snapremove(idesc->id_number);
1263 				idesc->id_type = ADDR;
1264 			}
1265 			(void)ckinode(dp, idesc);
1266 			inoinfo(idesc->id_number)->ino_state = USTATE;
1267 			clearinode(dp);
1268 			inodirty(&ip);
1269 		} else {
1270 			cmd.value = idesc->id_number;
1271 			cmd.size = -DIP(dp, di_nlink);
1272 			if (debug)
1273 				printf("adjrefcnt ino %ld amt %lld\n",
1274 				    (long)cmd.value, (long long)cmd.size);
1275 			if (sysctl(adjrefcnt, MIBSIZE, 0, 0,
1276 			    &cmd, sizeof cmd) == -1)
1277 				rwerror("ADJUST INODE", cmd.value);
1278 		}
1279 	}
1280 	irelse(&ip);
1281 }
1282 
1283 int
1284 findname(struct inodesc *idesc)
1285 {
1286 	struct direct *dirp = idesc->id_dirp;
1287 
1288 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1289 		idesc->id_entryno++;
1290 		return (KEEPON);
1291 	}
1292 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
1293 	return (STOP|FOUND);
1294 }
1295 
1296 int
1297 findino(struct inodesc *idesc)
1298 {
1299 	struct direct *dirp = idesc->id_dirp;
1300 
1301 	if (dirp->d_ino == 0)
1302 		return (KEEPON);
1303 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1304 	    dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) {
1305 		idesc->id_parent = dirp->d_ino;
1306 		return (STOP|FOUND);
1307 	}
1308 	return (KEEPON);
1309 }
1310 
1311 int
1312 clearentry(struct inodesc *idesc)
1313 {
1314 	struct direct *dirp = idesc->id_dirp;
1315 
1316 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1317 		idesc->id_entryno++;
1318 		return (KEEPON);
1319 	}
1320 	dirp->d_ino = 0;
1321 	return (STOP|FOUND|ALTERED);
1322 }
1323 
1324 void
1325 prtinode(struct inode *ip)
1326 {
1327 	char *p;
1328 	union dinode *dp;
1329 	struct passwd *pw;
1330 	time_t t;
1331 
1332 	dp = ip->i_dp;
1333 	printf(" I=%lu ", (u_long)ip->i_number);
1334 	if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino)
1335 		return;
1336 	printf(" OWNER=");
1337 	if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
1338 		printf("%s ", pw->pw_name);
1339 	else
1340 		printf("%u ", (unsigned)DIP(dp, di_uid));
1341 	printf("MODE=%o\n", DIP(dp, di_mode));
1342 	if (preen)
1343 		printf("%s: ", cdevname);
1344 	printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size));
1345 	t = DIP(dp, di_mtime);
1346 	if ((p = ctime(&t)) != NULL)
1347 		printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
1348 }
1349 
1350 void
1351 blkerror(ino_t ino, const char *type, ufs2_daddr_t blk)
1352 {
1353 
1354 	pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino);
1355 	printf("\n");
1356 	switch (inoinfo(ino)->ino_state) {
1357 
1358 	case FSTATE:
1359 	case FZLINK:
1360 		inoinfo(ino)->ino_state = FCLEAR;
1361 		return;
1362 
1363 	case DSTATE:
1364 	case DZLINK:
1365 		inoinfo(ino)->ino_state = DCLEAR;
1366 		return;
1367 
1368 	case FCLEAR:
1369 	case DCLEAR:
1370 		return;
1371 
1372 	default:
1373 		errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state);
1374 		/* NOTREACHED */
1375 	}
1376 }
1377 
1378 /*
1379  * allocate an unused inode
1380  */
1381 ino_t
1382 allocino(ino_t request, int type)
1383 {
1384 	ino_t ino;
1385 	struct inode ip;
1386 	union dinode *dp;
1387 	struct bufarea *cgbp;
1388 	struct cg *cgp;
1389 	int cg, anyino;
1390 
1391 	anyino = 0;
1392 	if (request == 0) {
1393 		request = UFS_ROOTINO;
1394 		anyino = 1;
1395 	} else if (inoinfo(request)->ino_state != USTATE)
1396 		return (0);
1397 retry:
1398 	for (ino = request; ino < maxino; ino++)
1399 		if (inoinfo(ino)->ino_state == USTATE)
1400 			break;
1401 	if (ino >= maxino)
1402 		return (0);
1403 	cg = ino_to_cg(&sblock, ino);
1404 	cgbp = cglookup(cg);
1405 	cgp = cgbp->b_un.b_cg;
1406 	if (!check_cgmagic(cg, cgbp)) {
1407 		if (anyino == 0)
1408 			return (0);
1409 		request = (cg + 1) * sblock.fs_ipg;
1410 		goto retry;
1411 	}
1412 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1413 	cgp->cg_cs.cs_nifree--;
1414 	switch (type & IFMT) {
1415 	case IFDIR:
1416 		inoinfo(ino)->ino_state = DSTATE;
1417 		cgp->cg_cs.cs_ndir++;
1418 		break;
1419 	case IFREG:
1420 	case IFLNK:
1421 		inoinfo(ino)->ino_state = FSTATE;
1422 		break;
1423 	default:
1424 		return (0);
1425 	}
1426 	cgdirty(cgbp);
1427 	ginode(ino, &ip);
1428 	dp = ip.i_dp;
1429 	memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ?
1430 	    sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)));
1431 	DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1,
1432 	    std_checkblkavail));
1433 	if (DIP(dp, di_db[0]) == 0) {
1434 		inoinfo(ino)->ino_state = USTATE;
1435 		inodirty(&ip);
1436 		irelse(&ip);
1437 		return (0);
1438 	}
1439 	DIP_SET(dp, di_mode, type);
1440 	DIP_SET(dp, di_atime, time(NULL));
1441 	DIP_SET(dp, di_ctime, DIP(dp, di_atime));
1442 	DIP_SET(dp, di_mtime, DIP(dp, di_ctime));
1443 	DIP_SET(dp, di_size, sblock.fs_fsize);
1444 	DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize));
1445 	n_files++;
1446 	inodirty(&ip);
1447 	irelse(&ip);
1448 	inoinfo(ino)->ino_type = IFTODT(type);
1449 	return (ino);
1450 }
1451 
1452 /*
1453  * deallocate an inode
1454  */
1455 void
1456 freeino(ino_t ino)
1457 {
1458 	struct inodesc idesc;
1459 	union dinode *dp;
1460 	struct inode ip;
1461 
1462 	memset(&idesc, 0, sizeof(struct inodesc));
1463 	idesc.id_type = ADDR;
1464 	idesc.id_func = freeblock;
1465 	idesc.id_number = ino;
1466 	ginode(ino, &ip);
1467 	dp = ip.i_dp;
1468 	(void)ckinode(dp, &idesc);
1469 	clearinode(dp);
1470 	inodirty(&ip);
1471 	irelse(&ip);
1472 	inoinfo(ino)->ino_state = USTATE;
1473 	n_files--;
1474 }
1475