xref: /freebsd/sbin/fsck_ffs/fsutil.c (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #if 0
33 #ifndef lint
34 static const char sccsid[] = "@(#)utilities.c	8.6 (Berkeley) 5/19/95";
35 #endif /* not lint */
36 #endif
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/time.h>
42 #include <sys/types.h>
43 #include <sys/sysctl.h>
44 #include <sys/disk.h>
45 #include <sys/disklabel.h>
46 #include <sys/ioctl.h>
47 #include <sys/stat.h>
48 
49 #include <ufs/ufs/dinode.h>
50 #include <ufs/ufs/dir.h>
51 #include <ufs/ffs/fs.h>
52 
53 #include <err.h>
54 #include <errno.h>
55 #include <string.h>
56 #include <ctype.h>
57 #include <fstab.h>
58 #include <stdint.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <time.h>
62 #include <unistd.h>
63 #include <libufs.h>
64 
65 #include "fsck.h"
66 
67 int		sujrecovery = 0;
68 
69 static struct bufarea *allocbuf(const char *);
70 static void cg_write(struct bufarea *);
71 static void slowio_start(void);
72 static void slowio_end(void);
73 static void printIOstats(void);
74 
75 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
76 static struct timespec startpass, finishpass;
77 struct timeval slowio_starttime;
78 int slowio_delay_usec = 10000;	/* Initial IO delay for background fsck */
79 int slowio_pollcnt;
80 static struct bufarea cgblk;	/* backup buffer for cylinder group blocks */
81 static struct bufarea failedbuf; /* returned by failed getdatablk() */
82 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
83 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
84 static struct bufhash freebufs;	/* unused buffers */
85 static int numbufs;		/* size of buffer cache */
86 static int cachelookups;	/* number of cache lookups */
87 static int cachereads;		/* number of cache reads */
88 static int flushtries;		/* number of tries to reclaim memory */
89 
90 char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
91 
92 void
93 fsutilinit(void)
94 {
95 	diskreads = totaldiskreads = totalreads = 0;
96 	bzero(&startpass, sizeof(struct timespec));
97 	bzero(&finishpass, sizeof(struct timespec));
98 	bzero(&slowio_starttime, sizeof(struct timeval));
99 	slowio_delay_usec = 10000;
100 	slowio_pollcnt = 0;
101 	flushtries = 0;
102 }
103 
104 int
105 ftypeok(union dinode *dp)
106 {
107 	switch (DIP(dp, di_mode) & IFMT) {
108 
109 	case IFDIR:
110 	case IFREG:
111 	case IFBLK:
112 	case IFCHR:
113 	case IFLNK:
114 	case IFSOCK:
115 	case IFIFO:
116 		return (1);
117 
118 	default:
119 		if (debug)
120 			printf("bad file type 0%o\n", DIP(dp, di_mode));
121 		return (0);
122 	}
123 }
124 
125 int
126 reply(const char *question)
127 {
128 	int persevere;
129 	char c;
130 
131 	if (preen)
132 		pfatal("INTERNAL ERROR: GOT TO reply()");
133 	persevere = strcmp(question, "CONTINUE") == 0 ||
134 		strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0;
135 	printf("\n");
136 	if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) {
137 		printf("%s? no\n\n", question);
138 		resolved = 0;
139 		return (0);
140 	}
141 	if (yflag || (persevere && nflag)) {
142 		printf("%s? yes\n\n", question);
143 		return (1);
144 	}
145 	do	{
146 		printf("%s? [yn] ", question);
147 		(void) fflush(stdout);
148 		c = getc(stdin);
149 		while (c != '\n' && getc(stdin) != '\n') {
150 			if (feof(stdin)) {
151 				resolved = 0;
152 				return (0);
153 			}
154 		}
155 	} while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
156 	printf("\n");
157 	if (c == 'y' || c == 'Y')
158 		return (1);
159 	resolved = 0;
160 	return (0);
161 }
162 
163 /*
164  * Look up state information for an inode.
165  */
166 struct inostat *
167 inoinfo(ino_t inum)
168 {
169 	static struct inostat unallocated = { USTATE, 0, 0, 0 };
170 	struct inostatlist *ilp;
171 	int iloff;
172 
173 	if (inum >= maxino)
174 		errx(EEXIT, "inoinfo: inumber %ju out of range",
175 		    (uintmax_t)inum);
176 	ilp = &inostathead[inum / sblock.fs_ipg];
177 	iloff = inum % sblock.fs_ipg;
178 	if (iloff >= ilp->il_numalloced)
179 		return (&unallocated);
180 	return (&ilp->il_stat[iloff]);
181 }
182 
183 /*
184  * Malloc buffers and set up cache.
185  */
186 void
187 bufinit(void)
188 {
189 	int i;
190 
191 	initbarea(&failedbuf, BT_UNKNOWN);
192 	failedbuf.b_errs = -1;
193 	failedbuf.b_un.b_buf = NULL;
194 	if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
195 		errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
196 	initbarea(&cgblk, BT_CYLGRP);
197 	numbufs = cachelookups = cachereads = 0;
198 	TAILQ_INIT(&bufqueuehd);
199 	LIST_INIT(&freebufs);
200 	for (i = 0; i < HASHSIZE; i++)
201 		LIST_INIT(&bufhashhd[i]);
202 	for (i = 0; i < BT_NUMBUFTYPES; i++) {
203 		readtime[i].tv_sec = totalreadtime[i].tv_sec = 0;
204 		readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0;
205 		readcnt[i] = totalreadcnt[i] = 0;
206 	}
207 }
208 
209 static struct bufarea *
210 allocbuf(const char *failreason)
211 {
212 	struct bufarea *bp;
213 	char *bufp;
214 
215 	bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
216 	bufp = Malloc((unsigned int)sblock.fs_bsize);
217 	if (bp == NULL || bufp == NULL) {
218 		errx(EEXIT, "%s", failreason);
219 		/* NOTREACHED */
220 	}
221 	numbufs++;
222 	bp->b_un.b_buf = bufp;
223 	TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
224 	initbarea(bp, BT_UNKNOWN);
225 	return (bp);
226 }
227 
228 /*
229  * Manage cylinder group buffers.
230  *
231  * Use getblk() here rather than cgget() because the cylinder group
232  * may be corrupted but we want it anyway so we can fix it.
233  */
234 static struct bufarea *cgbufs;	/* header for cylinder group cache */
235 static int flushtries;		/* number of tries to reclaim memory */
236 
237 struct bufarea *
238 cglookup(int cg)
239 {
240 	struct bufarea *cgbp;
241 	struct cg *cgp;
242 
243 	if ((unsigned) cg >= sblock.fs_ncg)
244 		errx(EEXIT, "cglookup: out of range cylinder group %d", cg);
245 	if (cgbufs == NULL) {
246 		cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
247 		if (cgbufs == NULL)
248 			errx(EEXIT, "Cannot allocate cylinder group buffers");
249 	}
250 	cgbp = &cgbufs[cg];
251 	if (cgbp->b_un.b_cg != NULL)
252 		return (cgbp);
253 	cgp = NULL;
254 	if (flushtries == 0)
255 		cgp = Malloc((unsigned int)sblock.fs_cgsize);
256 	if (cgp == NULL) {
257 		if (sujrecovery)
258 			errx(EEXIT,"Ran out of memory during journal recovery");
259 		flush(fswritefd, &cgblk);
260 		getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
261 		return (&cgblk);
262 	}
263 	cgbp->b_un.b_cg = cgp;
264 	initbarea(cgbp, BT_CYLGRP);
265 	getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
266 	return (cgbp);
267 }
268 
269 /*
270  * Mark a cylinder group buffer as dirty.
271  * Update its check-hash if they are enabled.
272  */
273 void
274 cgdirty(struct bufarea *cgbp)
275 {
276 	struct cg *cg;
277 
278 	cg = cgbp->b_un.b_cg;
279 	if ((sblock.fs_metackhash & CK_CYLGRP) != 0) {
280 		cg->cg_ckhash = 0;
281 		cg->cg_ckhash =
282 		    calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize);
283 	}
284 	dirty(cgbp);
285 }
286 
287 /*
288  * Attempt to flush a cylinder group cache entry.
289  * Return whether the flush was successful.
290  */
291 int
292 flushentry(void)
293 {
294 	struct bufarea *cgbp;
295 
296 	if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL)
297 		return (0);
298 	cgbp = &cgbufs[flushtries++];
299 	if (cgbp->b_un.b_cg == NULL)
300 		return (0);
301 	flush(fswritefd, cgbp);
302 	free(cgbp->b_un.b_buf);
303 	cgbp->b_un.b_buf = NULL;
304 	return (1);
305 }
306 
307 /*
308  * Manage a cache of filesystem disk blocks.
309  */
310 struct bufarea *
311 getdatablk(ufs2_daddr_t blkno, long size, int type)
312 {
313 	struct bufarea *bp;
314 	struct bufhash *bhdp;
315 
316 	cachelookups++;
317 	/*
318 	 * If out of range, return empty buffer with b_err == -1
319 	 *
320 	 * Skip check for inodes because chkrange() considers
321 	 * metadata areas invalid to write data.
322 	 */
323 	if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
324 		failedbuf.b_refcnt++;
325 		return (&failedbuf);
326 	}
327 	bhdp = &bufhashhd[HASH(blkno)];
328 	LIST_FOREACH(bp, bhdp, b_hash)
329 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
330 			if (debug && bp->b_size != size) {
331 				prtbuf(bp, "getdatablk: size mismatch");
332 				pfatal("getdatablk: b_size %d != size %ld\n",
333 				    bp->b_size, size);
334 			}
335 			TAILQ_REMOVE(&bufqueuehd, bp, b_list);
336 			goto foundit;
337 		}
338 	/*
339 	 * Move long-term busy buffer back to the front of the LRU so we
340 	 * do not endless inspect them for recycling.
341 	 */
342 	bp = TAILQ_LAST(&bufqueuehd, bufqueue);
343 	if (bp != NULL && bp->b_refcnt != 0) {
344 		TAILQ_REMOVE(&bufqueuehd, bp, b_list);
345 		TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
346 	}
347 	/*
348 	 * Allocate up to the minimum number of buffers before
349 	 * considering recycling any of them.
350 	 */
351 	if (size > sblock.fs_bsize)
352 		errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
353 		    sblock.fs_bsize);
354 	if ((bp = LIST_FIRST(&freebufs)) != NULL) {
355 		LIST_REMOVE(bp, b_hash);
356 	} else if (numbufs < MINBUFS) {
357 		bp = allocbuf("cannot create minimal buffer pool");
358 	} else if (sujrecovery) {
359 		/*
360 		 * SUJ recovery does not want anything written until it
361 		 * has successfully completed (so it can fail back to
362 		 * full fsck). Thus, we can only recycle clean buffers.
363 		 */
364 		TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
365 			if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0)
366 				break;
367 		if (bp == NULL)
368 			bp = allocbuf("Ran out of memory during "
369 			    "journal recovery");
370 		else
371 			LIST_REMOVE(bp, b_hash);
372 	} else {
373 		/*
374 		 * Recycle oldest non-busy buffer.
375 		 */
376 		TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
377 			if (bp->b_refcnt == 0)
378 				break;
379 		if (bp == NULL)
380 			bp = allocbuf("Ran out of memory for buffers");
381 		else
382 			LIST_REMOVE(bp, b_hash);
383 	}
384 	TAILQ_REMOVE(&bufqueuehd, bp, b_list);
385 	flush(fswritefd, bp);
386 	bp->b_type = type;
387 	LIST_INSERT_HEAD(bhdp, bp, b_hash);
388 	getblk(bp, blkno, size);
389 	cachereads++;
390 	/* fall through */
391 foundit:
392 	TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
393 	if (debug && bp->b_type != type) {
394 		printf("getdatablk: buffer type changed to %s",
395 		    BT_BUFTYPE(type));
396 		prtbuf(bp, "");
397 	}
398 	if (bp->b_errs == 0)
399 		bp->b_refcnt++;
400 	return (bp);
401 }
402 
403 void
404 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size)
405 {
406 	ufs2_daddr_t dblk;
407 	struct timespec start, finish;
408 
409 	dblk = fsbtodb(&sblock, blk);
410 	if (bp->b_bno == dblk) {
411 		totalreads++;
412 	} else {
413 		if (debug) {
414 			readcnt[bp->b_type]++;
415 			clock_gettime(CLOCK_REALTIME_PRECISE, &start);
416 		}
417 		bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
418 		if (debug) {
419 			clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
420 			timespecsub(&finish, &start, &finish);
421 			timespecadd(&readtime[bp->b_type], &finish,
422 			    &readtime[bp->b_type]);
423 		}
424 		bp->b_bno = dblk;
425 		bp->b_size = size;
426 	}
427 }
428 
429 void
430 brelse(struct bufarea *bp)
431 {
432 
433 	if (bp->b_refcnt <= 0)
434 		prtbuf(bp, "brelse: buffer with negative reference count");
435 	bp->b_refcnt--;
436 }
437 
438 void
439 binval(struct bufarea *bp)
440 {
441 
442 	bp->b_flags &= ~B_DIRTY;
443 	LIST_REMOVE(bp, b_hash);
444 	LIST_INSERT_HEAD(&freebufs, bp, b_hash);
445 }
446 
447 void
448 flush(int fd, struct bufarea *bp)
449 {
450 	struct inode ip;
451 
452 	if ((bp->b_flags & B_DIRTY) == 0)
453 		return;
454 	bp->b_flags &= ~B_DIRTY;
455 	if (fswritefd < 0) {
456 		pfatal("WRITING IN READ_ONLY MODE.\n");
457 		return;
458 	}
459 	if (bp->b_errs != 0)
460 		pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n",
461 		    (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
462 		    (long long)bp->b_bno);
463 	bp->b_errs = 0;
464 	/*
465 	 * Write using the appropriate function.
466 	 */
467 	switch (bp->b_type) {
468 	case BT_SUPERBLK:
469 		if (bp != &sblk)
470 			pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
471 			    bp, &sblk);
472 		/*
473 		 * Superblocks are always pre-copied so we do not need
474 		 * to check them for copy-on-write.
475 		 */
476 		if (sbput(fd, bp->b_un.b_fs, 0) == 0)
477 			fsmodified = 1;
478 		break;
479 	case BT_CYLGRP:
480 		/*
481 		 * Cylinder groups are always pre-copied so we do not
482 		 * need to check them for copy-on-write.
483 		 */
484 		if (sujrecovery)
485 			cg_write(bp);
486 		if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
487 			fsmodified = 1;
488 		break;
489 	case BT_INODES:
490 		if (debug && sblock.fs_magic == FS_UFS2_MAGIC) {
491 			struct ufs2_dinode *dp = bp->b_un.b_dinode2;
492 			int i;
493 
494 			for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) {
495 				if (ffs_verify_dinode_ckhash(&sblock, dp) == 0)
496 					continue;
497 				pwarn("flush: INODE CHECK-HASH FAILED");
498 				ip.i_bp = bp;
499 				ip.i_dp = (union dinode *)dp;
500 				ip.i_number = bp->b_index + (i / sizeof(*dp));
501 				prtinode(&ip);
502 				if (preen || reply("FIX") != 0) {
503 					if (preen)
504 						printf(" (FIXED)\n");
505 					ffs_update_dinode_ckhash(&sblock, dp);
506 					inodirty(&ip);
507 				}
508 			}
509 		}
510 		/* FALLTHROUGH */
511 	default:
512 		copyonwrite(&sblock, bp, std_checkblkavail);
513 		blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
514 		break;
515 	}
516 }
517 
518 /*
519  * If there are any snapshots, ensure that all the blocks that they
520  * care about have been copied, then release the snapshot inodes.
521  * These operations need to be done before we rebuild the cylinder
522  * groups so that any block allocations are properly recorded.
523  * Since all the cylinder group maps have already been copied in
524  * the snapshots, no further snapshot copies will need to be done.
525  */
526 void
527 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long))
528 {
529 	struct bufarea *bp;
530 	int cnt;
531 
532 	if (snapcnt > 0) {
533 		if (debug)
534 			printf("Check for snapshot copies\n");
535 		TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
536 			if ((bp->b_flags & B_DIRTY) != 0)
537 				copyonwrite(&sblock, bp, checkblkavail);
538 		for (cnt = 0; cnt < snapcnt; cnt++)
539 			irelse(&snaplist[cnt]);
540 		snapcnt = 0;
541 	}
542 }
543 
544 /*
545  * Journaled soft updates does not maintain cylinder group summary
546  * information during cleanup, so this routine recalculates the summary
547  * information and updates the superblock summary in preparation for
548  * writing out the cylinder group.
549  */
550 static void
551 cg_write(struct bufarea *bp)
552 {
553 	ufs1_daddr_t fragno, cgbno, maxbno;
554 	u_int8_t *blksfree;
555 	struct csum *csp;
556 	struct cg *cgp;
557 	int blk;
558 	int i;
559 
560 	/*
561 	 * Fix the frag and cluster summary.
562 	 */
563 	cgp = bp->b_un.b_cg;
564 	cgp->cg_cs.cs_nbfree = 0;
565 	cgp->cg_cs.cs_nffree = 0;
566 	bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum));
567 	maxbno = fragstoblks(&sblock, sblock.fs_fpg);
568 	if (sblock.fs_contigsumsize > 0) {
569 		for (i = 1; i <= sblock.fs_contigsumsize; i++)
570 			cg_clustersum(cgp)[i] = 0;
571 		bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT));
572 	}
573 	blksfree = cg_blksfree(cgp);
574 	for (cgbno = 0; cgbno < maxbno; cgbno++) {
575 		if (ffs_isfreeblock(&sblock, blksfree, cgbno))
576 			continue;
577 		if (ffs_isblock(&sblock, blksfree, cgbno)) {
578 			ffs_clusteracct(&sblock, cgp, cgbno, 1);
579 			cgp->cg_cs.cs_nbfree++;
580 			continue;
581 		}
582 		fragno = blkstofrags(&sblock, cgbno);
583 		blk = blkmap(&sblock, blksfree, fragno);
584 		ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1);
585 		for (i = 0; i < sblock.fs_frag; i++)
586 			if (isset(blksfree, fragno + i))
587 				cgp->cg_cs.cs_nffree++;
588 	}
589 	/*
590 	 * Update the superblock cg summary from our now correct values
591 	 * before writing the block.
592 	 */
593 	csp = &sblock.fs_cs(&sblock, cgp->cg_cgx);
594 	sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir;
595 	sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree;
596 	sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree;
597 	sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree;
598 	sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs;
599 }
600 
601 void
602 rwerror(const char *mesg, ufs2_daddr_t blk)
603 {
604 
605 	if (bkgrdcheck)
606 		exit(EEXIT);
607 	if (preen == 0)
608 		printf("\n");
609 	pfatal("CANNOT %s: %ld", mesg, (long)blk);
610 	if (reply("CONTINUE") == 0)
611 		exit(EEXIT);
612 }
613 
614 void
615 ckfini(int markclean)
616 {
617 	struct bufarea *bp, *nbp;
618 	int ofsmodified, cnt, cg;
619 
620 	if (bkgrdflag) {
621 		unlink(snapname);
622 		if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) {
623 			cmd.value = FS_UNCLEAN;
624 			cmd.size = markclean ? -1 : 1;
625 			if (sysctlbyname("vfs.ffs.setflags", 0, 0,
626 			    &cmd, sizeof cmd) == -1)
627 				pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n");
628 			if (!preen) {
629 				printf("\n***** FILE SYSTEM MARKED %s *****\n",
630 				    markclean ? "CLEAN" : "DIRTY");
631 				if (!markclean)
632 					rerun = 1;
633 			}
634 		} else if (!preen && !markclean) {
635 			printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
636 			rerun = 1;
637 		}
638 		bkgrdflag = 0;
639 	}
640 	if (debug && cachelookups > 0)
641 		printf("cache with %d buffers missed %d of %d (%d%%)\n",
642 		    numbufs, cachereads, cachelookups,
643 		    (int)(cachereads * 100 / cachelookups));
644 	if (fswritefd < 0) {
645 		(void)close(fsreadfd);
646 		return;
647 	}
648 
649 	/*
650 	 * To remain idempotent with partial truncations the buffers
651 	 * must be flushed in this order:
652 	 *  1) cylinder groups (bitmaps)
653 	 *  2) indirect, directory, external attribute, and data blocks
654 	 *  3) inode blocks
655 	 *  4) superblock
656 	 * This ordering preserves access to the modified pointers
657 	 * until they are freed.
658 	 */
659 	/* Step 1: cylinder groups */
660 	if (debug)
661 		printf("Flush Cylinder groups\n");
662 	if (cgbufs != NULL) {
663 		for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
664 			if (cgbufs[cnt].b_un.b_cg == NULL)
665 				continue;
666 			flush(fswritefd, &cgbufs[cnt]);
667 			free(cgbufs[cnt].b_un.b_cg);
668 		}
669 		free(cgbufs);
670 		cgbufs = NULL;
671 	}
672 	flush(fswritefd, &cgblk);
673 	free(cgblk.b_un.b_buf);
674 	cgblk.b_un.b_buf = NULL;
675 	cnt = 0;
676 	/* Step 2: indirect, directory, external attribute, and data blocks */
677 	if (debug)
678 		printf("Flush indirect, directory, external attribute, "
679 		    "and data blocks\n");
680 	if (pdirbp != NULL) {
681 		brelse(pdirbp);
682 		pdirbp = NULL;
683 	}
684 	TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
685 		switch (bp->b_type) {
686 		/* These should not be in the buffer cache list */
687 		case BT_UNKNOWN:
688 		case BT_SUPERBLK:
689 		case BT_CYLGRP:
690 		default:
691 			prtbuf(bp,"ckfini: improper buffer type on cache list");
692 			continue;
693 		/* These are the ones to flush in this step */
694 		case BT_LEVEL1:
695 		case BT_LEVEL2:
696 		case BT_LEVEL3:
697 		case BT_EXTATTR:
698 		case BT_DIRDATA:
699 		case BT_DATA:
700 			break;
701 		/* These are the ones to flush in the next step */
702 		case BT_INODES:
703 			continue;
704 		}
705 		if (debug && bp->b_refcnt != 0)
706 			prtbuf(bp, "ckfini: clearing in-use buffer");
707 		TAILQ_REMOVE(&bufqueuehd, bp, b_list);
708 		LIST_REMOVE(bp, b_hash);
709 		cnt++;
710 		flush(fswritefd, bp);
711 		free(bp->b_un.b_buf);
712 		free((char *)bp);
713 	}
714 	/* Step 3: inode blocks */
715 	if (debug)
716 		printf("Flush inode blocks\n");
717 	if (icachebp != NULL) {
718 		brelse(icachebp);
719 		icachebp = NULL;
720 	}
721 	TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
722 		if (debug && bp->b_refcnt != 0)
723 			prtbuf(bp, "ckfini: clearing in-use buffer");
724 		TAILQ_REMOVE(&bufqueuehd, bp, b_list);
725 		LIST_REMOVE(bp, b_hash);
726 		cnt++;
727 		flush(fswritefd, bp);
728 		free(bp->b_un.b_buf);
729 		free((char *)bp);
730 	}
731 	if (numbufs != cnt)
732 		errx(EEXIT, "panic: lost %d buffers", numbufs - cnt);
733 	/* Step 4: superblock */
734 	if (debug)
735 		printf("Flush the superblock\n");
736 	flush(fswritefd, &sblk);
737 	if (havesb && cursnapshot == 0 &&
738 	    sblk.b_bno != sblock.fs_sblockloc / dev_bsize) {
739 		if (preen || reply("UPDATE STANDARD SUPERBLOCK")) {
740 			/* Change write destination to standard superblock */
741 			sblock.fs_sblockactualloc = sblock.fs_sblockloc;
742 			sblk.b_bno = sblock.fs_sblockloc / dev_bsize;
743 			sbdirty();
744 			flush(fswritefd, &sblk);
745 		} else {
746 			markclean = 0;
747 		}
748 	}
749 	if (cursnapshot == 0 && sblock.fs_clean != markclean) {
750 		if ((sblock.fs_clean = markclean) != 0) {
751 			sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK);
752 			sblock.fs_pendingblocks = 0;
753 			sblock.fs_pendinginodes = 0;
754 		}
755 		sbdirty();
756 		ofsmodified = fsmodified;
757 		flush(fswritefd, &sblk);
758 		fsmodified = ofsmodified;
759 		if (!preen) {
760 			printf("\n***** FILE SYSTEM MARKED %s *****\n",
761 			    markclean ? "CLEAN" : "DIRTY");
762 			if (!markclean)
763 				rerun = 1;
764 		}
765 	} else if (!preen) {
766 		if (markclean) {
767 			printf("\n***** FILE SYSTEM IS CLEAN *****\n");
768 		} else {
769 			printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
770 			rerun = 1;
771 		}
772 	}
773 	/*
774 	 * Free allocated tracking structures.
775 	 */
776 	if (blockmap != NULL)
777 		free(blockmap);
778 	blockmap = NULL;
779 	if (inostathead != NULL) {
780 		for (cg = 0; cg < sblock.fs_ncg; cg++)
781 			if (inostathead[cg].il_stat != NULL)
782 				free((char *)inostathead[cg].il_stat);
783 		free(inostathead);
784 	}
785 	inostathead = NULL;
786 	inocleanup();
787 	finalIOstats();
788 	(void)close(fsreadfd);
789 	(void)close(fswritefd);
790 }
791 
792 /*
793  * Print out I/O statistics.
794  */
795 void
796 IOstats(char *what)
797 {
798 	int i;
799 
800 	if (debug == 0)
801 		return;
802 	if (diskreads == 0) {
803 		printf("%s: no I/O\n\n", what);
804 		return;
805 	}
806 	if (startpass.tv_sec == 0)
807 		startpass = startprog;
808 	printf("%s: I/O statistics\n", what);
809 	printIOstats();
810 	totaldiskreads += diskreads;
811 	diskreads = 0;
812 	for (i = 0; i < BT_NUMBUFTYPES; i++) {
813 		timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
814 		totalreadcnt[i] += readcnt[i];
815 		readtime[i].tv_sec = readtime[i].tv_nsec = 0;
816 		readcnt[i] = 0;
817 	}
818 	clock_gettime(CLOCK_REALTIME_PRECISE, &startpass);
819 }
820 
821 void
822 finalIOstats(void)
823 {
824 	int i;
825 
826 	if (debug == 0)
827 		return;
828 	printf("Final I/O statistics\n");
829 	totaldiskreads += diskreads;
830 	diskreads = totaldiskreads;
831 	startpass = startprog;
832 	for (i = 0; i < BT_NUMBUFTYPES; i++) {
833 		timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
834 		totalreadcnt[i] += readcnt[i];
835 		readtime[i] = totalreadtime[i];
836 		readcnt[i] = totalreadcnt[i];
837 	}
838 	printIOstats();
839 }
840 
841 static void printIOstats(void)
842 {
843 	long long msec, totalmsec;
844 	int i;
845 
846 	clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass);
847 	timespecsub(&finishpass, &startpass, &finishpass);
848 	printf("Running time: %jd.%03ld sec\n",
849 		(intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000);
850 	printf("buffer reads by type:\n");
851 	for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++)
852 		totalmsec += readtime[i].tv_sec * 1000 +
853 		    readtime[i].tv_nsec / 1000000;
854 	if (totalmsec == 0)
855 		totalmsec = 1;
856 	for (i = 0; i < BT_NUMBUFTYPES; i++) {
857 		if (readcnt[i] == 0)
858 			continue;
859 		msec =
860 		    readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000;
861 		printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n",
862 		    buftype[i], readcnt[i], readcnt[i] * 100 / diskreads,
863 		    (readcnt[i] * 1000 / diskreads) % 10,
864 		    (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000,
865 		    msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10);
866 	}
867 	printf("\n");
868 }
869 
870 int
871 blread(int fd, char *buf, ufs2_daddr_t blk, long size)
872 {
873 	char *cp;
874 	int i, errs;
875 	off_t offset;
876 
877 	offset = blk;
878 	offset *= dev_bsize;
879 	if (bkgrdflag)
880 		slowio_start();
881 	totalreads++;
882 	diskreads++;
883 	if (pread(fd, buf, (int)size, offset) == size) {
884 		if (bkgrdflag)
885 			slowio_end();
886 		return (0);
887 	}
888 
889 	/*
890 	 * This is handled specially here instead of in rwerror because
891 	 * rwerror is used for all sorts of errors, not just true read/write
892 	 * errors.  It should be refactored and fixed.
893 	 */
894 	if (surrender) {
895 		pfatal("CANNOT READ_BLK: %ld", (long)blk);
896 		errx(EEXIT, "ABORTING DUE TO READ ERRORS");
897 	} else
898 		rwerror("READ BLK", blk);
899 
900 	errs = 0;
901 	memset(buf, 0, (size_t)size);
902 	printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
903 	for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
904 		if (pread(fd, cp, (int)secsize, offset + i) != secsize) {
905 			if (secsize != dev_bsize && dev_bsize != 1)
906 				printf(" %jd (%jd),",
907 				    (intmax_t)(blk * dev_bsize + i) / secsize,
908 				    (intmax_t)blk + i / dev_bsize);
909 			else
910 				printf(" %jd,", (intmax_t)blk + i / dev_bsize);
911 			errs++;
912 		}
913 	}
914 	printf("\n");
915 	if (errs)
916 		resolved = 0;
917 	return (errs);
918 }
919 
920 void
921 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size)
922 {
923 	int i;
924 	char *cp;
925 	off_t offset;
926 
927 	if (fd < 0)
928 		return;
929 	offset = blk;
930 	offset *= dev_bsize;
931 	if (pwrite(fd, buf, size, offset) == size) {
932 		fsmodified = 1;
933 		return;
934 	}
935 	resolved = 0;
936 	rwerror("WRITE BLK", blk);
937 	printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
938 	for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
939 		if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize)
940 			printf(" %jd,", (intmax_t)blk + i / dev_bsize);
941 	printf("\n");
942 	return;
943 }
944 
945 void
946 blerase(int fd, ufs2_daddr_t blk, long size)
947 {
948 	off_t ioarg[2];
949 
950 	if (fd < 0)
951 		return;
952 	ioarg[0] = blk * dev_bsize;
953 	ioarg[1] = size;
954 	ioctl(fd, DIOCGDELETE, ioarg);
955 	/* we don't really care if we succeed or not */
956 	return;
957 }
958 
959 /*
960  * Fill a contiguous region with all-zeroes.  Note ZEROBUFSIZE is by
961  * definition a multiple of dev_bsize.
962  */
963 void
964 blzero(int fd, ufs2_daddr_t blk, long size)
965 {
966 	static char *zero;
967 	off_t offset, len;
968 
969 	if (fd < 0)
970 		return;
971 	if (zero == NULL) {
972 		zero = calloc(ZEROBUFSIZE, 1);
973 		if (zero == NULL)
974 			errx(EEXIT, "cannot allocate buffer pool");
975 	}
976 	offset = blk * dev_bsize;
977 	if (lseek(fd, offset, 0) < 0)
978 		rwerror("SEEK BLK", blk);
979 	while (size > 0) {
980 		len = MIN(ZEROBUFSIZE, size);
981 		if (write(fd, zero, len) != len)
982 			rwerror("WRITE BLK", blk);
983 		blk += len / dev_bsize;
984 		size -= len;
985 	}
986 }
987 
988 /*
989  * Verify cylinder group's magic number and other parameters.  If the
990  * test fails, offer an option to rebuild the whole cylinder group.
991  *
992  * Return 1 if the cylinder group is good or return 0 if it is bad.
993  */
994 #undef CHK
995 #define CHK(lhs, op, rhs, fmt)						\
996 	if (lhs op rhs) {						\
997 		pwarn("UFS%d cylinder group %d failed: "		\
998 		    "%s (" #fmt ") %s %s (" #fmt ")\n",			\
999 		    sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg,	\
1000 		    #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs);	\
1001 		error = 1;						\
1002 	}
1003 int
1004 check_cgmagic(int cg, struct bufarea *cgbp)
1005 {
1006 	struct cg *cgp = cgbp->b_un.b_cg;
1007 	uint32_t cghash, calchash;
1008 	static int prevfailcg = -1;
1009 	long start;
1010 	int error;
1011 
1012 	/*
1013 	 * Extended cylinder group checks.
1014 	 */
1015 	calchash = cgp->cg_ckhash;
1016 	if ((sblock.fs_metackhash & CK_CYLGRP) != 0 &&
1017 	    (ckhashadd & CK_CYLGRP) == 0) {
1018 		cghash = cgp->cg_ckhash;
1019 		cgp->cg_ckhash = 0;
1020 		calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1021 		cgp->cg_ckhash = cghash;
1022 	}
1023 	error = 0;
1024 	CHK(cgp->cg_ckhash, !=, calchash, "%jd");
1025 	CHK(cg_chkmagic(cgp), ==, 0, "%jd");
1026 	CHK(cgp->cg_cgx, !=, cg, "%jd");
1027 	CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd");
1028 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
1029 		CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1030 		CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd");
1031 	} else if (sblock.fs_magic == FS_UFS2_MAGIC) {
1032 		CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd");
1033 		CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd");
1034 	}
1035 	if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) {
1036 		CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd");
1037 	} else {
1038 		CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg),
1039 		    "%jd");
1040 	}
1041 	start = sizeof(*cgp);
1042 	if (sblock.fs_magic == FS_UFS2_MAGIC) {
1043 		CHK(cgp->cg_iusedoff, !=, start, "%jd");
1044 	} else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1045 		CHK(cgp->cg_niblk, !=, 0, "%jd");
1046 		CHK(cgp->cg_initediblk, !=, 0, "%jd");
1047 		CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd");
1048 		CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1049 		CHK(cgp->cg_old_btotoff, !=, start, "%jd");
1050 		CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff +
1051 		    sblock.fs_old_cpg * sizeof(int32_t), "%jd");
1052 		CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff +
1053 		    sblock.fs_old_cpg * sizeof(u_int16_t), "%jd");
1054 	}
1055 	CHK(cgp->cg_freeoff, !=,
1056 	    cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd");
1057 	if (sblock.fs_contigsumsize == 0) {
1058 		CHK(cgp->cg_nextfreeoff, !=,
1059 		    cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd");
1060 	} else {
1061 		CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag,
1062 		    "%jd");
1063 		CHK(cgp->cg_clustersumoff, !=,
1064 		    roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT),
1065 		    sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd");
1066 		CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff +
1067 		    (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd");
1068 		CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff +
1069 		    howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT),
1070 		    "%jd");
1071 	}
1072 	if (error == 0)
1073 		return (1);
1074 	if (prevfailcg == cg)
1075 		return (0);
1076 	prevfailcg = cg;
1077 	pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg);
1078 	printf("\n");
1079 	return (0);
1080 }
1081 
1082 void
1083 rebuild_cg(int cg, struct bufarea *cgbp)
1084 {
1085 	struct cg *cgp = cgbp->b_un.b_cg;
1086 	long start;
1087 
1088 	/*
1089 	 * Zero out the cylinder group and then initialize critical fields.
1090 	 * Bit maps and summaries will be recalculated by later passes.
1091 	 */
1092 	memset(cgp, 0, (size_t)sblock.fs_cgsize);
1093 	cgp->cg_magic = CG_MAGIC;
1094 	cgp->cg_cgx = cg;
1095 	cgp->cg_niblk = sblock.fs_ipg;
1096 	cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock));
1097 	if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size)
1098 		cgp->cg_ndblk = sblock.fs_fpg;
1099 	else
1100 		cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg);
1101 	start = sizeof(*cgp);
1102 	if (sblock.fs_magic == FS_UFS2_MAGIC) {
1103 		cgp->cg_iusedoff = start;
1104 	} else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1105 		cgp->cg_niblk = 0;
1106 		cgp->cg_initediblk = 0;
1107 		cgp->cg_old_ncyl = sblock.fs_old_cpg;
1108 		cgp->cg_old_niblk = sblock.fs_ipg;
1109 		cgp->cg_old_btotoff = start;
1110 		cgp->cg_old_boff = cgp->cg_old_btotoff +
1111 		    sblock.fs_old_cpg * sizeof(int32_t);
1112 		cgp->cg_iusedoff = cgp->cg_old_boff +
1113 		    sblock.fs_old_cpg * sizeof(u_int16_t);
1114 	}
1115 	cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT);
1116 	cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT);
1117 	if (sblock.fs_contigsumsize > 0) {
1118 		cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag;
1119 		cgp->cg_clustersumoff =
1120 		    roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t));
1121 		cgp->cg_clustersumoff -= sizeof(u_int32_t);
1122 		cgp->cg_clusteroff = cgp->cg_clustersumoff +
1123 		    (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t);
1124 		cgp->cg_nextfreeoff = cgp->cg_clusteroff +
1125 		    howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT);
1126 	}
1127 	cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1128 	cgdirty(cgbp);
1129 }
1130 
1131 /*
1132  * allocate a data block with the specified number of fragments
1133  */
1134 ufs2_daddr_t
1135 allocblk(long startcg, long frags,
1136     ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
1137 {
1138 	ufs2_daddr_t blkno, newblk;
1139 
1140 	if (sujrecovery && checkblkavail == std_checkblkavail) {
1141 		pfatal("allocblk: std_checkblkavail used for SUJ recovery\n");
1142 		return (0);
1143 	}
1144 	if (frags <= 0 || frags > sblock.fs_frag)
1145 		return (0);
1146 	for (blkno = MAX(cgdata(&sblock, startcg), 0);
1147 	     blkno < maxfsblock - sblock.fs_frag;
1148 	     blkno += sblock.fs_frag) {
1149 		if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1150 			continue;
1151 		if (newblk > 0)
1152 			return (newblk);
1153 		if (newblk < 0)
1154 			blkno = -newblk;
1155 	}
1156 	for (blkno = MAX(cgdata(&sblock, 0), 0);
1157 	     blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
1158 	     blkno += sblock.fs_frag) {
1159 		if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1160 			continue;
1161 		if (newblk > 0)
1162 			return (newblk);
1163 		if (newblk < 0)
1164 			blkno = -newblk;
1165 	}
1166 	return (0);
1167 }
1168 
1169 ufs2_daddr_t
1170 std_checkblkavail(ufs2_daddr_t blkno, long frags)
1171 {
1172 	struct bufarea *cgbp;
1173 	struct cg *cgp;
1174 	ufs2_daddr_t j, k, baseblk;
1175 	long cg;
1176 
1177 	if ((u_int64_t)blkno > sblock.fs_size)
1178 		return (0);
1179 	for (j = 0; j <= sblock.fs_frag - frags; j++) {
1180 		if (testbmap(blkno + j))
1181 			continue;
1182 		for (k = 1; k < frags; k++)
1183 			if (testbmap(blkno + j + k))
1184 				break;
1185 		if (k < frags) {
1186 			j += k;
1187 			continue;
1188 		}
1189 		cg = dtog(&sblock, blkno + j);
1190 		cgbp = cglookup(cg);
1191 		cgp = cgbp->b_un.b_cg;
1192 		if (!check_cgmagic(cg, cgbp))
1193 			return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
1194 		baseblk = dtogd(&sblock, blkno + j);
1195 		for (k = 0; k < frags; k++) {
1196 			setbmap(blkno + j + k);
1197 			clrbit(cg_blksfree(cgp), baseblk + k);
1198 		}
1199 		n_blks += frags;
1200 		if (frags == sblock.fs_frag)
1201 			cgp->cg_cs.cs_nbfree--;
1202 		else
1203 			cgp->cg_cs.cs_nffree -= frags;
1204 		cgdirty(cgbp);
1205 		return (blkno + j);
1206 	}
1207 	return (0);
1208 }
1209 
1210 /*
1211  * Check whether a file size is within the limits for the filesystem.
1212  * Return 1 when valid and 0 when too big.
1213  *
1214  * This should match the file size limit in ffs_mountfs().
1215  */
1216 int
1217 chkfilesize(mode_t mode, u_int64_t filesize)
1218 {
1219 	u_int64_t kernmaxfilesize;
1220 
1221 	if (sblock.fs_magic == FS_UFS1_MAGIC)
1222 		kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1;
1223 	else
1224 		kernmaxfilesize = sblock.fs_maxfilesize;
1225 	if (filesize > kernmaxfilesize ||
1226 	    filesize > sblock.fs_maxfilesize ||
1227 	    (mode == IFDIR && filesize > MAXDIRSIZE)) {
1228 		if (debug)
1229 			printf("bad file size %ju:", (uintmax_t)filesize);
1230 		return (0);
1231 	}
1232 	return (1);
1233 }
1234 
1235 /*
1236  * Slow down IO so as to leave some disk bandwidth for other processes
1237  */
1238 void
1239 slowio_start()
1240 {
1241 
1242 	/* Delay one in every 8 operations */
1243 	slowio_pollcnt = (slowio_pollcnt + 1) & 7;
1244 	if (slowio_pollcnt == 0) {
1245 		gettimeofday(&slowio_starttime, NULL);
1246 	}
1247 }
1248 
1249 void
1250 slowio_end()
1251 {
1252 	struct timeval tv;
1253 	int delay_usec;
1254 
1255 	if (slowio_pollcnt != 0)
1256 		return;
1257 
1258 	/* Update the slowdown interval. */
1259 	gettimeofday(&tv, NULL);
1260 	delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 +
1261 	    (tv.tv_usec - slowio_starttime.tv_usec);
1262 	if (delay_usec < 64)
1263 		delay_usec = 64;
1264 	if (delay_usec > 2500000)
1265 		delay_usec = 2500000;
1266 	slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6;
1267 	/* delay by 8 times the average IO delay */
1268 	if (slowio_delay_usec > 64)
1269 		usleep(slowio_delay_usec * 8);
1270 }
1271 
1272 /*
1273  * Find a pathname
1274  */
1275 void
1276 getpathname(char *namebuf, ino_t curdir, ino_t ino)
1277 {
1278 	int len;
1279 	char *cp;
1280 	struct inode ip;
1281 	struct inodesc idesc;
1282 	static int busy = 0;
1283 
1284 	if (curdir == ino && ino == UFS_ROOTINO) {
1285 		(void)strcpy(namebuf, "/");
1286 		return;
1287 	}
1288 	if (busy || !INO_IS_DVALID(curdir)) {
1289 		(void)strcpy(namebuf, "?");
1290 		return;
1291 	}
1292 	busy = 1;
1293 	memset(&idesc, 0, sizeof(struct inodesc));
1294 	idesc.id_type = DATA;
1295 	idesc.id_fix = IGNORE;
1296 	cp = &namebuf[MAXPATHLEN - 1];
1297 	*cp = '\0';
1298 	if (curdir != ino) {
1299 		idesc.id_parent = curdir;
1300 		goto namelookup;
1301 	}
1302 	while (ino != UFS_ROOTINO) {
1303 		idesc.id_number = ino;
1304 		idesc.id_func = findino;
1305 		idesc.id_name = strdup("..");
1306 		ginode(ino, &ip);
1307 		if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1308 			irelse(&ip);
1309 			free(idesc.id_name);
1310 			break;
1311 		}
1312 		irelse(&ip);
1313 		free(idesc.id_name);
1314 	namelookup:
1315 		idesc.id_number = idesc.id_parent;
1316 		idesc.id_parent = ino;
1317 		idesc.id_func = findname;
1318 		idesc.id_name = namebuf;
1319 		ginode(idesc.id_number, &ip);
1320 		if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1321 			irelse(&ip);
1322 			break;
1323 		}
1324 		irelse(&ip);
1325 		len = strlen(namebuf);
1326 		cp -= len;
1327 		memmove(cp, namebuf, (size_t)len);
1328 		*--cp = '/';
1329 		if (cp < &namebuf[UFS_MAXNAMLEN])
1330 			break;
1331 		ino = idesc.id_number;
1332 	}
1333 	busy = 0;
1334 	if (ino != UFS_ROOTINO)
1335 		*--cp = '?';
1336 	memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
1337 }
1338 
1339 void
1340 catch(int sig __unused)
1341 {
1342 
1343 	ckfini(0);
1344 	exit(12);
1345 }
1346 
1347 /*
1348  * When preening, allow a single quit to signal
1349  * a special exit after file system checks complete
1350  * so that reboot sequence may be interrupted.
1351  */
1352 void
1353 catchquit(int sig __unused)
1354 {
1355 	printf("returning to single-user after file system check\n");
1356 	returntosingle = 1;
1357 	(void)signal(SIGQUIT, SIG_DFL);
1358 }
1359 
1360 /*
1361  * determine whether an inode should be fixed.
1362  */
1363 int
1364 dofix(struct inodesc *idesc, const char *msg)
1365 {
1366 
1367 	switch (idesc->id_fix) {
1368 
1369 	case DONTKNOW:
1370 		if (idesc->id_type == DATA)
1371 			direrror(idesc->id_number, msg);
1372 		else
1373 			pwarn("%s", msg);
1374 		if (preen) {
1375 			printf(" (SALVAGED)\n");
1376 			idesc->id_fix = FIX;
1377 			return (ALTERED);
1378 		}
1379 		if (reply("SALVAGE") == 0) {
1380 			idesc->id_fix = NOFIX;
1381 			return (0);
1382 		}
1383 		idesc->id_fix = FIX;
1384 		return (ALTERED);
1385 
1386 	case FIX:
1387 		return (ALTERED);
1388 
1389 	case NOFIX:
1390 	case IGNORE:
1391 		return (0);
1392 
1393 	default:
1394 		errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
1395 	}
1396 	/* NOTREACHED */
1397 	return (0);
1398 }
1399 
1400 #include <stdarg.h>
1401 
1402 /*
1403  * Print details about a buffer.
1404  */
1405 void
1406 prtbuf(struct bufarea *bp, const char *fmt, ...)
1407 {
1408 	va_list ap;
1409 	va_start(ap, fmt);
1410 	if (preen)
1411 		(void)fprintf(stdout, "%s: ", cdevname);
1412 	(void)vfprintf(stdout, fmt, ap);
1413 	va_end(ap);
1414 	printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
1415 	    "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
1416 	    bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
1417 	    (intmax_t) bp->b_index);
1418 }
1419 
1420 /*
1421  * An unexpected inconsistency occurred.
1422  * Die if preening or file system is running with soft dependency protocol,
1423  * otherwise just print message and continue.
1424  */
1425 void
1426 pfatal(const char *fmt, ...)
1427 {
1428 	va_list ap;
1429 	va_start(ap, fmt);
1430 	if (!preen) {
1431 		(void)vfprintf(stdout, fmt, ap);
1432 		va_end(ap);
1433 		if (usedsoftdep)
1434 			(void)fprintf(stdout,
1435 			    "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n");
1436 		/*
1437 		 * Force foreground fsck to clean up inconsistency.
1438 		 */
1439 		if (bkgrdflag) {
1440 			cmd.value = FS_NEEDSFSCK;
1441 			cmd.size = 1;
1442 			if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1443 			    &cmd, sizeof cmd) == -1)
1444 				pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1445 			fprintf(stdout, "CANNOT RUN IN BACKGROUND\n");
1446 			ckfini(0);
1447 			exit(EEXIT);
1448 		}
1449 		return;
1450 	}
1451 	if (cdevname == NULL)
1452 		cdevname = strdup("fsck");
1453 	(void)fprintf(stdout, "%s: ", cdevname);
1454 	(void)vfprintf(stdout, fmt, ap);
1455 	(void)fprintf(stdout,
1456 	    "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
1457 	    cdevname, usedsoftdep ? " SOFT UPDATE " : " ");
1458 	/*
1459 	 * Force foreground fsck to clean up inconsistency.
1460 	 */
1461 	if (bkgrdflag) {
1462 		cmd.value = FS_NEEDSFSCK;
1463 		cmd.size = 1;
1464 		if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1465 		    &cmd, sizeof cmd) == -1)
1466 			pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1467 	}
1468 	ckfini(0);
1469 	exit(EEXIT);
1470 }
1471 
1472 /*
1473  * Pwarn just prints a message when not preening or running soft dependency
1474  * protocol, or a warning (preceded by filename) when preening.
1475  */
1476 void
1477 pwarn(const char *fmt, ...)
1478 {
1479 	va_list ap;
1480 	va_start(ap, fmt);
1481 	if (preen)
1482 		(void)fprintf(stdout, "%s: ", cdevname);
1483 	(void)vfprintf(stdout, fmt, ap);
1484 	va_end(ap);
1485 }
1486 
1487 /*
1488  * Stub for routines from kernel.
1489  */
1490 void
1491 panic(const char *fmt, ...)
1492 {
1493 	va_list ap;
1494 	va_start(ap, fmt);
1495 	pfatal("INTERNAL INCONSISTENCY:");
1496 	(void)vfprintf(stdout, fmt, ap);
1497 	va_end(ap);
1498 	exit(EEXIT);
1499 }
1500