1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1986, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/types.h>
35 #include <sys/sysctl.h>
36 #include <sys/disk.h>
37 #include <sys/disklabel.h>
38 #include <sys/ioctl.h>
39 #include <sys/stat.h>
40
41 #include <ufs/ufs/dinode.h>
42 #include <ufs/ufs/dir.h>
43 #include <ufs/ffs/fs.h>
44
45 #include <err.h>
46 #include <errno.h>
47 #include <string.h>
48 #include <ctype.h>
49 #include <fstab.h>
50 #include <stdint.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <time.h>
54 #include <unistd.h>
55
56 #include "fsck.h"
57
58 int sujrecovery = 0;
59
60 static struct bufarea *allocbuf(const char *);
61 static void cg_write(struct bufarea *);
62 static void slowio_start(void);
63 static void slowio_end(void);
64 static void printIOstats(void);
65
66 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */
67 static struct timespec startpass, finishpass;
68 struct timeval slowio_starttime;
69 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
70 int slowio_pollcnt;
71 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
72 static struct bufarea failedbuf; /* returned by failed getdatablk() */
73 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */
74 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */
75 static struct bufhash freebufs; /* unused buffers */
76 static int numbufs; /* size of buffer cache */
77 static int cachelookups; /* number of cache lookups */
78 static int cachereads; /* number of cache reads */
79 static int flushtries; /* number of tries to reclaim memory */
80
81 char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
82
83 void
fsutilinit(void)84 fsutilinit(void)
85 {
86 diskreads = totaldiskreads = totalreads = 0;
87 bzero(&startpass, sizeof(struct timespec));
88 bzero(&finishpass, sizeof(struct timespec));
89 bzero(&slowio_starttime, sizeof(struct timeval));
90 slowio_delay_usec = 10000;
91 slowio_pollcnt = 0;
92 flushtries = 0;
93 }
94
95 int
ftypeok(union dinode * dp)96 ftypeok(union dinode *dp)
97 {
98 switch (DIP(dp, di_mode) & IFMT) {
99
100 case IFDIR:
101 case IFREG:
102 case IFBLK:
103 case IFCHR:
104 case IFLNK:
105 case IFSOCK:
106 case IFIFO:
107 return (1);
108
109 default:
110 if (debug)
111 printf("bad file type 0%o\n", DIP(dp, di_mode));
112 return (0);
113 }
114 }
115
116 int
reply(const char * question)117 reply(const char *question)
118 {
119 int persevere;
120 char c;
121
122 if (preen)
123 pfatal("INTERNAL ERROR: GOT TO reply()");
124 persevere = strcmp(question, "CONTINUE") == 0 ||
125 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0;
126 printf("\n");
127 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) {
128 printf("%s? no\n\n", question);
129 resolved = 0;
130 return (0);
131 }
132 if (yflag || (persevere && nflag)) {
133 printf("%s? yes\n\n", question);
134 return (1);
135 }
136 do {
137 printf("%s? [yn] ", question);
138 (void) fflush(stdout);
139 c = getc(stdin);
140 while (c != '\n' && getc(stdin) != '\n') {
141 if (feof(stdin)) {
142 resolved = 0;
143 return (0);
144 }
145 }
146 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
147 printf("\n");
148 if (c == 'y' || c == 'Y')
149 return (1);
150 resolved = 0;
151 return (0);
152 }
153
154 /*
155 * Look up state information for an inode.
156 */
157 struct inostat *
inoinfo(ino_t inum)158 inoinfo(ino_t inum)
159 {
160 static struct inostat unallocated = { USTATE, 0, 0, 0 };
161 struct inostatlist *ilp;
162 int iloff;
163
164 if (inum >= maxino)
165 errx(EEXIT, "inoinfo: inumber %ju out of range",
166 (uintmax_t)inum);
167 ilp = &inostathead[inum / sblock.fs_ipg];
168 iloff = inum % sblock.fs_ipg;
169 if (iloff >= ilp->il_numalloced)
170 return (&unallocated);
171 return (&ilp->il_stat[iloff]);
172 }
173
174 /*
175 * Malloc buffers and set up cache.
176 */
177 void
bufinit(void)178 bufinit(void)
179 {
180 int i;
181
182 initbarea(&failedbuf, BT_UNKNOWN);
183 failedbuf.b_errs = -1;
184 failedbuf.b_un.b_buf = NULL;
185 if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL)
186 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
187 initbarea(&cgblk, BT_CYLGRP);
188 numbufs = cachelookups = cachereads = 0;
189 TAILQ_INIT(&bufqueuehd);
190 LIST_INIT(&freebufs);
191 for (i = 0; i < HASHSIZE; i++)
192 LIST_INIT(&bufhashhd[i]);
193 for (i = 0; i < BT_NUMBUFTYPES; i++) {
194 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0;
195 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0;
196 readcnt[i] = totalreadcnt[i] = 0;
197 }
198 }
199
200 static struct bufarea *
allocbuf(const char * failreason)201 allocbuf(const char *failreason)
202 {
203 struct bufarea *bp;
204 char *bufp;
205
206 bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
207 bufp = Balloc((unsigned int)sblock.fs_bsize);
208 if (bp == NULL || bufp == NULL) {
209 errx(EEXIT, "%s", failreason);
210 /* NOTREACHED */
211 }
212 numbufs++;
213 bp->b_un.b_buf = bufp;
214 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
215 initbarea(bp, BT_UNKNOWN);
216 return (bp);
217 }
218
219 /*
220 * Manage cylinder group buffers.
221 *
222 * Use getblk() here rather than cgget() because the cylinder group
223 * may be corrupted but we want it anyway so we can fix it.
224 */
225 static struct bufarea *cgbufs; /* header for cylinder group cache */
226 static int flushtries; /* number of tries to reclaim memory */
227
228 struct bufarea *
cglookup(int cg)229 cglookup(int cg)
230 {
231 struct bufarea *cgbp;
232 struct cg *cgp;
233
234 if ((unsigned) cg >= sblock.fs_ncg)
235 errx(EEXIT, "cglookup: out of range cylinder group %d", cg);
236 if (cgbufs == NULL) {
237 cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea));
238 if (cgbufs == NULL)
239 errx(EEXIT, "Cannot allocate cylinder group buffers");
240 }
241 cgbp = &cgbufs[cg];
242 if (cgbp->b_un.b_cg != NULL)
243 return (cgbp);
244 cgp = NULL;
245 if (flushtries == 0)
246 cgp = Balloc((unsigned int)sblock.fs_cgsize);
247 if (cgp == NULL) {
248 if (sujrecovery)
249 errx(EEXIT,"Ran out of memory during journal recovery");
250 flush(fswritefd, &cgblk);
251 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
252 return (&cgblk);
253 }
254 cgbp->b_un.b_cg = cgp;
255 initbarea(cgbp, BT_CYLGRP);
256 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
257 return (cgbp);
258 }
259
260 /*
261 * Mark a cylinder group buffer as dirty.
262 * Update its check-hash if they are enabled.
263 */
264 void
cgdirty(struct bufarea * cgbp)265 cgdirty(struct bufarea *cgbp)
266 {
267 struct cg *cg;
268
269 cg = cgbp->b_un.b_cg;
270 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) {
271 cg->cg_ckhash = 0;
272 cg->cg_ckhash =
273 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize);
274 }
275 dirty(cgbp);
276 }
277
278 /*
279 * Attempt to flush a cylinder group cache entry.
280 * Return whether the flush was successful.
281 */
282 int
flushentry(void)283 flushentry(void)
284 {
285 struct bufarea *cgbp;
286
287 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL)
288 return (0);
289 cgbp = &cgbufs[flushtries++];
290 if (cgbp->b_un.b_cg == NULL)
291 return (0);
292 flush(fswritefd, cgbp);
293 free(cgbp->b_un.b_buf);
294 cgbp->b_un.b_buf = NULL;
295 return (1);
296 }
297
298 /*
299 * Manage a cache of filesystem disk blocks.
300 */
301 struct bufarea *
getdatablk(ufs2_daddr_t blkno,long size,int type)302 getdatablk(ufs2_daddr_t blkno, long size, int type)
303 {
304 struct bufarea *bp;
305 struct bufhash *bhdp;
306
307 cachelookups++;
308 /*
309 * If out of range, return empty buffer with b_err == -1
310 *
311 * Skip check for inodes because chkrange() considers
312 * metadata areas invalid to write data.
313 */
314 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) {
315 failedbuf.b_refcnt++;
316 return (&failedbuf);
317 }
318 bhdp = &bufhashhd[HASH(blkno)];
319 LIST_FOREACH(bp, bhdp, b_hash)
320 if (bp->b_bno == fsbtodb(&sblock, blkno)) {
321 if (debug && bp->b_size != size) {
322 prtbuf(bp, "getdatablk: size mismatch");
323 pfatal("getdatablk: b_size %d != size %ld\n",
324 bp->b_size, size);
325 }
326 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
327 goto foundit;
328 }
329 /*
330 * Move long-term busy buffer back to the front of the LRU so we
331 * do not endless inspect them for recycling.
332 */
333 bp = TAILQ_LAST(&bufqueuehd, bufqueue);
334 if (bp != NULL && bp->b_refcnt != 0) {
335 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
336 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
337 }
338 /*
339 * Allocate up to the minimum number of buffers before
340 * considering recycling any of them.
341 */
342 if (size > sblock.fs_bsize)
343 errx(EEXIT, "Excessive buffer size %ld > %d\n", size,
344 sblock.fs_bsize);
345 if ((bp = LIST_FIRST(&freebufs)) != NULL) {
346 LIST_REMOVE(bp, b_hash);
347 } else if (numbufs < MINBUFS) {
348 bp = allocbuf("cannot create minimal buffer pool");
349 } else if (sujrecovery) {
350 /*
351 * SUJ recovery does not want anything written until it
352 * has successfully completed (so it can fail back to
353 * full fsck). Thus, we can only recycle clean buffers.
354 */
355 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
356 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0)
357 break;
358 if (bp == NULL)
359 bp = allocbuf("Ran out of memory during "
360 "journal recovery");
361 else
362 LIST_REMOVE(bp, b_hash);
363 } else {
364 /*
365 * Recycle oldest non-busy buffer.
366 */
367 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
368 if (bp->b_refcnt == 0)
369 break;
370 if (bp == NULL)
371 bp = allocbuf("Ran out of memory for buffers");
372 else
373 LIST_REMOVE(bp, b_hash);
374 }
375 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
376 flush(fswritefd, bp);
377 bp->b_type = type;
378 LIST_INSERT_HEAD(bhdp, bp, b_hash);
379 getblk(bp, blkno, size);
380 cachereads++;
381 /* fall through */
382 foundit:
383 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list);
384 if (debug && bp->b_type != type) {
385 printf("getdatablk: buffer type changed to %s",
386 BT_BUFTYPE(type));
387 prtbuf(bp, "");
388 }
389 if (bp->b_errs == 0)
390 bp->b_refcnt++;
391 return (bp);
392 }
393
394 void
getblk(struct bufarea * bp,ufs2_daddr_t blk,long size)395 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size)
396 {
397 ufs2_daddr_t dblk;
398 struct timespec start, finish;
399
400 dblk = fsbtodb(&sblock, blk);
401 if (bp->b_bno == dblk) {
402 totalreads++;
403 } else {
404 if (debug) {
405 readcnt[bp->b_type]++;
406 clock_gettime(CLOCK_REALTIME_PRECISE, &start);
407 }
408 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size);
409 if (debug) {
410 clock_gettime(CLOCK_REALTIME_PRECISE, &finish);
411 timespecsub(&finish, &start, &finish);
412 timespecadd(&readtime[bp->b_type], &finish,
413 &readtime[bp->b_type]);
414 }
415 bp->b_bno = dblk;
416 bp->b_size = size;
417 }
418 }
419
420 void
brelse(struct bufarea * bp)421 brelse(struct bufarea *bp)
422 {
423
424 if (bp->b_refcnt <= 0)
425 prtbuf(bp, "brelse: buffer with negative reference count");
426 bp->b_refcnt--;
427 }
428
429 void
binval(struct bufarea * bp)430 binval(struct bufarea *bp)
431 {
432
433 bp->b_flags &= ~B_DIRTY;
434 LIST_REMOVE(bp, b_hash);
435 LIST_INSERT_HEAD(&freebufs, bp, b_hash);
436 }
437
438 void
flush(int fd,struct bufarea * bp)439 flush(int fd, struct bufarea *bp)
440 {
441 struct inode ip;
442
443 if ((bp->b_flags & B_DIRTY) == 0)
444 return;
445 bp->b_flags &= ~B_DIRTY;
446 if (fswritefd < 0) {
447 pfatal("WRITING IN READ_ONLY MODE.\n");
448 return;
449 }
450 if (bp->b_errs != 0)
451 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n",
452 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
453 (long long)bp->b_bno);
454 bp->b_errs = 0;
455 /*
456 * Write using the appropriate function.
457 */
458 switch (bp->b_type) {
459 case BT_SUPERBLK:
460 if (bp != &sblk)
461 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n",
462 bp, &sblk);
463 /*
464 * Superblocks are always pre-copied so we do not need
465 * to check them for copy-on-write.
466 */
467 if (sbput(fd, bp->b_un.b_fs, 0) == 0)
468 fsmodified = 1;
469 break;
470 case BT_CYLGRP:
471 /*
472 * Cylinder groups are always pre-copied so we do not
473 * need to check them for copy-on-write.
474 */
475 if (sujrecovery)
476 cg_write(bp);
477 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0)
478 fsmodified = 1;
479 break;
480 case BT_INODES:
481 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) {
482 struct ufs2_dinode *dp = bp->b_un.b_dinode2;
483 int i;
484
485 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) {
486 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0)
487 continue;
488 pwarn("flush: INODE CHECK-HASH FAILED");
489 ip.i_bp = bp;
490 ip.i_dp = (union dinode *)dp;
491 ip.i_number = bp->b_index + (i / sizeof(*dp));
492 prtinode(&ip);
493 if (preen || reply("FIX") != 0) {
494 if (preen)
495 printf(" (FIXED)\n");
496 ffs_update_dinode_ckhash(&sblock, dp);
497 inodirty(&ip);
498 }
499 }
500 }
501 /* FALLTHROUGH */
502 default:
503 copyonwrite(&sblock, bp, std_checkblkavail);
504 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size);
505 break;
506 }
507 }
508
509 /*
510 * If there are any snapshots, ensure that all the blocks that they
511 * care about have been copied, then release the snapshot inodes.
512 * These operations need to be done before we rebuild the cylinder
513 * groups so that any block allocations are properly recorded.
514 * Since all the cylinder group maps have already been copied in
515 * the snapshots, no further snapshot copies will need to be done.
516 */
517 void
snapflush(ufs2_daddr_t (* checkblkavail)(ufs2_daddr_t,long))518 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long))
519 {
520 struct bufarea *bp;
521 int cnt;
522
523 if (snapcnt > 0) {
524 if (debug)
525 printf("Check for snapshot copies\n");
526 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list)
527 if ((bp->b_flags & B_DIRTY) != 0)
528 copyonwrite(&sblock, bp, checkblkavail);
529 for (cnt = 0; cnt < snapcnt; cnt++)
530 irelse(&snaplist[cnt]);
531 snapcnt = 0;
532 }
533 }
534
535 /*
536 * Journaled soft updates does not maintain cylinder group summary
537 * information during cleanup, so this routine recalculates the summary
538 * information and updates the superblock summary in preparation for
539 * writing out the cylinder group.
540 */
541 static void
cg_write(struct bufarea * bp)542 cg_write(struct bufarea *bp)
543 {
544 ufs1_daddr_t fragno, cgbno, maxbno;
545 u_int8_t *blksfree;
546 struct csum *csp;
547 struct cg *cgp;
548 int blk;
549 int i;
550
551 /*
552 * Fix the frag and cluster summary.
553 */
554 cgp = bp->b_un.b_cg;
555 cgp->cg_cs.cs_nbfree = 0;
556 cgp->cg_cs.cs_nffree = 0;
557 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum));
558 maxbno = fragstoblks(&sblock, sblock.fs_fpg);
559 if (sblock.fs_contigsumsize > 0) {
560 for (i = 1; i <= sblock.fs_contigsumsize; i++)
561 cg_clustersum(cgp)[i] = 0;
562 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT));
563 }
564 blksfree = cg_blksfree(cgp);
565 for (cgbno = 0; cgbno < maxbno; cgbno++) {
566 if (ffs_isfreeblock(&sblock, blksfree, cgbno))
567 continue;
568 if (ffs_isblock(&sblock, blksfree, cgbno)) {
569 ffs_clusteracct(&sblock, cgp, cgbno, 1);
570 cgp->cg_cs.cs_nbfree++;
571 continue;
572 }
573 fragno = blkstofrags(&sblock, cgbno);
574 blk = blkmap(&sblock, blksfree, fragno);
575 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1);
576 for (i = 0; i < sblock.fs_frag; i++)
577 if (isset(blksfree, fragno + i))
578 cgp->cg_cs.cs_nffree++;
579 }
580 /*
581 * Update the superblock cg summary from our now correct values
582 * before writing the block.
583 */
584 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx);
585 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir;
586 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree;
587 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree;
588 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree;
589 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs;
590 }
591
592 void
rwerror(const char * mesg,ufs2_daddr_t blk)593 rwerror(const char *mesg, ufs2_daddr_t blk)
594 {
595
596 if (bkgrdcheck)
597 exit(EEXIT);
598 if (preen == 0)
599 printf("\n");
600 pfatal("CANNOT %s: %ld", mesg, (long)blk);
601 if (reply("CONTINUE") == 0)
602 exit(EEXIT);
603 }
604
605 void
ckfini(int markclean)606 ckfini(int markclean)
607 {
608 struct bufarea *bp, *nbp;
609 int ofsmodified, cnt, cg;
610
611 if (bkgrdflag) {
612 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) {
613 cmd.value = FS_UNCLEAN;
614 cmd.size = markclean ? -1 : 1;
615 if (sysctlbyname("vfs.ffs.setflags", 0, 0,
616 &cmd, sizeof cmd) == -1)
617 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n");
618 if (!preen) {
619 printf("\n***** FILE SYSTEM MARKED %s *****\n",
620 markclean ? "CLEAN" : "DIRTY");
621 if (!markclean)
622 rerun = 1;
623 }
624 } else if (!preen && !markclean) {
625 printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
626 rerun = 1;
627 }
628 bkgrdflag = 0;
629 }
630 if (debug && cachelookups > 0)
631 printf("cache with %d buffers missed %d of %d (%d%%)\n",
632 numbufs, cachereads, cachelookups,
633 (int)(cachereads * 100 / cachelookups));
634 if (fswritefd < 0) {
635 (void)close(fsreadfd);
636 return;
637 }
638
639 /*
640 * To remain idempotent with partial truncations the buffers
641 * must be flushed in this order:
642 * 1) cylinder groups (bitmaps)
643 * 2) indirect, directory, external attribute, and data blocks
644 * 3) inode blocks
645 * 4) superblock
646 * This ordering preserves access to the modified pointers
647 * until they are freed.
648 */
649 /* Step 1: cylinder groups */
650 if (debug)
651 printf("Flush Cylinder groups\n");
652 if (cgbufs != NULL) {
653 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
654 if (cgbufs[cnt].b_un.b_cg == NULL)
655 continue;
656 flush(fswritefd, &cgbufs[cnt]);
657 free(cgbufs[cnt].b_un.b_cg);
658 }
659 free(cgbufs);
660 cgbufs = NULL;
661 }
662 flush(fswritefd, &cgblk);
663 free(cgblk.b_un.b_buf);
664 cgblk.b_un.b_buf = NULL;
665 cnt = 0;
666 /* Step 2: indirect, directory, external attribute, and data blocks */
667 if (debug)
668 printf("Flush indirect, directory, external attribute, "
669 "and data blocks\n");
670 if (pdirbp != NULL) {
671 brelse(pdirbp);
672 pdirbp = NULL;
673 }
674 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
675 switch (bp->b_type) {
676 /* These should not be in the buffer cache list */
677 case BT_UNKNOWN:
678 case BT_SUPERBLK:
679 case BT_CYLGRP:
680 default:
681 prtbuf(bp,"ckfini: improper buffer type on cache list");
682 continue;
683 /* These are the ones to flush in this step */
684 case BT_LEVEL1:
685 case BT_LEVEL2:
686 case BT_LEVEL3:
687 case BT_EXTATTR:
688 case BT_DIRDATA:
689 case BT_DATA:
690 break;
691 /* These are the ones to flush in the next step */
692 case BT_INODES:
693 continue;
694 }
695 if (debug && bp->b_refcnt != 0)
696 prtbuf(bp, "ckfini: clearing in-use buffer");
697 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
698 LIST_REMOVE(bp, b_hash);
699 cnt++;
700 flush(fswritefd, bp);
701 free(bp->b_un.b_buf);
702 free((char *)bp);
703 }
704 /* Step 3: inode blocks */
705 if (debug)
706 printf("Flush inode blocks\n");
707 if (icachebp != NULL) {
708 brelse(icachebp);
709 icachebp = NULL;
710 }
711 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) {
712 if (debug && bp->b_refcnt != 0)
713 prtbuf(bp, "ckfini: clearing in-use buffer");
714 TAILQ_REMOVE(&bufqueuehd, bp, b_list);
715 LIST_REMOVE(bp, b_hash);
716 cnt++;
717 flush(fswritefd, bp);
718 free(bp->b_un.b_buf);
719 free((char *)bp);
720 }
721 if (numbufs != cnt)
722 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt);
723 /* Step 4: superblock */
724 if (debug)
725 printf("Flush the superblock\n");
726 flush(fswritefd, &sblk);
727 if (havesb && cursnapshot == 0 &&
728 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) {
729 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) {
730 /* Change write destination to standard superblock */
731 sblock.fs_sblockactualloc = sblock.fs_sblockloc;
732 sblk.b_bno = sblock.fs_sblockloc / dev_bsize;
733 sbdirty();
734 flush(fswritefd, &sblk);
735 } else {
736 markclean = 0;
737 }
738 }
739 if (cursnapshot == 0 && sblock.fs_clean != markclean) {
740 if ((sblock.fs_clean = markclean) != 0) {
741 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK);
742 sblock.fs_pendingblocks = 0;
743 sblock.fs_pendinginodes = 0;
744 }
745 sbdirty();
746 ofsmodified = fsmodified;
747 flush(fswritefd, &sblk);
748 fsmodified = ofsmodified;
749 if (!preen) {
750 printf("\n***** FILE SYSTEM MARKED %s *****\n",
751 markclean ? "CLEAN" : "DIRTY");
752 if (!markclean)
753 rerun = 1;
754 }
755 } else if (!preen) {
756 if (markclean) {
757 printf("\n***** FILE SYSTEM IS CLEAN *****\n");
758 } else {
759 printf("\n***** FILE SYSTEM STILL DIRTY *****\n");
760 rerun = 1;
761 }
762 }
763 /*
764 * Free allocated tracking structures.
765 */
766 if (blockmap != NULL)
767 free(blockmap);
768 blockmap = NULL;
769 if (inostathead != NULL) {
770 for (cg = 0; cg < sblock.fs_ncg; cg++)
771 if (inostathead[cg].il_stat != NULL)
772 free((char *)inostathead[cg].il_stat);
773 free(inostathead);
774 }
775 inostathead = NULL;
776 inocleanup();
777 finalIOstats();
778 (void)close(fsreadfd);
779 (void)close(fswritefd);
780 }
781
782 /*
783 * Print out I/O statistics.
784 */
785 void
IOstats(char * what)786 IOstats(char *what)
787 {
788 int i;
789
790 if (debug == 0)
791 return;
792 if (diskreads == 0) {
793 printf("%s: no I/O\n\n", what);
794 return;
795 }
796 if (startpass.tv_sec == 0)
797 startpass = startprog;
798 printf("%s: I/O statistics\n", what);
799 printIOstats();
800 totaldiskreads += diskreads;
801 diskreads = 0;
802 for (i = 0; i < BT_NUMBUFTYPES; i++) {
803 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
804 totalreadcnt[i] += readcnt[i];
805 readtime[i].tv_sec = readtime[i].tv_nsec = 0;
806 readcnt[i] = 0;
807 }
808 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass);
809 }
810
811 void
finalIOstats(void)812 finalIOstats(void)
813 {
814 int i;
815
816 if (debug == 0)
817 return;
818 printf("Final I/O statistics\n");
819 totaldiskreads += diskreads;
820 diskreads = totaldiskreads;
821 startpass = startprog;
822 for (i = 0; i < BT_NUMBUFTYPES; i++) {
823 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]);
824 totalreadcnt[i] += readcnt[i];
825 readtime[i] = totalreadtime[i];
826 readcnt[i] = totalreadcnt[i];
827 }
828 printIOstats();
829 }
830
printIOstats(void)831 static void printIOstats(void)
832 {
833 long long msec, totalmsec;
834 int i;
835
836 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass);
837 timespecsub(&finishpass, &startpass, &finishpass);
838 printf("Running time: %jd.%03ld sec\n",
839 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000);
840 printf("buffer reads by type:\n");
841 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++)
842 totalmsec += readtime[i].tv_sec * 1000 +
843 readtime[i].tv_nsec / 1000000;
844 if (totalmsec == 0)
845 totalmsec = 1;
846 for (i = 0; i < BT_NUMBUFTYPES; i++) {
847 if (readcnt[i] == 0)
848 continue;
849 msec =
850 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000;
851 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n",
852 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads,
853 (readcnt[i] * 1000 / diskreads) % 10,
854 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000,
855 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10);
856 }
857 printf("\n");
858 }
859
860 int
blread(int fd,char * buf,ufs2_daddr_t blk,long size)861 blread(int fd, char *buf, ufs2_daddr_t blk, long size)
862 {
863 char *cp;
864 int i, errs;
865 off_t offset;
866
867 offset = blk;
868 offset *= dev_bsize;
869 if (bkgrdflag)
870 slowio_start();
871 totalreads++;
872 diskreads++;
873 if (pread(fd, buf, (int)size, offset) == size) {
874 if (bkgrdflag)
875 slowio_end();
876 return (0);
877 }
878
879 /*
880 * This is handled specially here instead of in rwerror because
881 * rwerror is used for all sorts of errors, not just true read/write
882 * errors. It should be refactored and fixed.
883 */
884 if (surrender) {
885 pfatal("CANNOT READ_BLK: %ld", (long)blk);
886 errx(EEXIT, "ABORTING DUE TO READ ERRORS");
887 } else
888 rwerror("READ BLK", blk);
889
890 errs = 0;
891 memset(buf, 0, (size_t)size);
892 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
893 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
894 if (pread(fd, cp, (int)secsize, offset + i) != secsize) {
895 if (secsize != dev_bsize && dev_bsize != 1)
896 printf(" %jd (%jd),",
897 (intmax_t)(blk * dev_bsize + i) / secsize,
898 (intmax_t)blk + i / dev_bsize);
899 else
900 printf(" %jd,", (intmax_t)blk + i / dev_bsize);
901 errs++;
902 }
903 }
904 printf("\n");
905 if (errs)
906 resolved = 0;
907 return (errs);
908 }
909
910 void
blwrite(int fd,char * buf,ufs2_daddr_t blk,ssize_t size)911 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size)
912 {
913 int i;
914 char *cp;
915 off_t offset;
916
917 if (fd < 0)
918 return;
919 offset = blk;
920 offset *= dev_bsize;
921 if (pwrite(fd, buf, size, offset) == size) {
922 fsmodified = 1;
923 return;
924 }
925 resolved = 0;
926 rwerror("WRITE BLK", blk);
927 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
928 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
929 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize)
930 printf(" %jd,", (intmax_t)blk + i / dev_bsize);
931 printf("\n");
932 return;
933 }
934
935 void
blerase(int fd,ufs2_daddr_t blk,long size)936 blerase(int fd, ufs2_daddr_t blk, long size)
937 {
938 off_t ioarg[2];
939
940 if (fd < 0)
941 return;
942 ioarg[0] = blk * dev_bsize;
943 ioarg[1] = size;
944 ioctl(fd, DIOCGDELETE, ioarg);
945 /* we don't really care if we succeed or not */
946 return;
947 }
948
949 /*
950 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by
951 * definition a multiple of dev_bsize.
952 */
953 void
blzero(int fd,ufs2_daddr_t blk,long size)954 blzero(int fd, ufs2_daddr_t blk, long size)
955 {
956 static char *zero;
957 off_t offset, len;
958
959 if (fd < 0)
960 return;
961 if (zero == NULL) {
962 zero = Balloc(ZEROBUFSIZE);
963 if (zero == NULL)
964 errx(EEXIT, "cannot allocate buffer pool");
965 }
966 offset = blk * dev_bsize;
967 if (lseek(fd, offset, 0) < 0)
968 rwerror("SEEK BLK", blk);
969 while (size > 0) {
970 len = MIN(ZEROBUFSIZE, size);
971 if (write(fd, zero, len) != len)
972 rwerror("WRITE BLK", blk);
973 blk += len / dev_bsize;
974 size -= len;
975 }
976 }
977
978 /*
979 * Verify cylinder group's magic number and other parameters. If the
980 * test fails, offer an option to rebuild the whole cylinder group.
981 *
982 * Return 1 if the cylinder group is good or return 0 if it is bad.
983 */
984 #undef CHK
985 #define CHK(lhs, op, rhs, fmt) \
986 if (lhs op rhs) { \
987 pwarn("UFS%d cylinder group %d failed: " \
988 "%s (" #fmt ") %s %s (" #fmt ")\n", \
989 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \
990 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \
991 error = 1; \
992 }
993 int
check_cgmagic(int cg,struct bufarea * cgbp)994 check_cgmagic(int cg, struct bufarea *cgbp)
995 {
996 struct cg *cgp = cgbp->b_un.b_cg;
997 uint32_t cghash, calchash;
998 static int prevfailcg = -1;
999 long start;
1000 int error;
1001
1002 /*
1003 * Extended cylinder group checks.
1004 */
1005 calchash = cgp->cg_ckhash;
1006 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 &&
1007 (ckhashadd & CK_CYLGRP) == 0) {
1008 cghash = cgp->cg_ckhash;
1009 cgp->cg_ckhash = 0;
1010 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1011 cgp->cg_ckhash = cghash;
1012 }
1013 error = 0;
1014 CHK(cgp->cg_ckhash, !=, calchash, "%jd");
1015 CHK(cg_chkmagic(cgp), ==, 0, "%jd");
1016 CHK(cgp->cg_cgx, !=, cg, "%jd");
1017 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd");
1018 if (sblock.fs_magic == FS_UFS1_MAGIC) {
1019 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1020 } else if (sblock.fs_magic == FS_UFS2_MAGIC) {
1021 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd");
1022 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd");
1023 }
1024 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) {
1025 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd");
1026 } else {
1027 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg),
1028 "%jd");
1029 }
1030 start = sizeof(*cgp);
1031 if (sblock.fs_magic == FS_UFS2_MAGIC) {
1032 CHK(cgp->cg_iusedoff, !=, start, "%jd");
1033 } else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1034 CHK(cgp->cg_niblk, !=, 0, "%jd");
1035 CHK(cgp->cg_initediblk, !=, 0, "%jd");
1036 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd");
1037 CHK(cgp->cg_old_btotoff, !=, start, "%jd");
1038 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff +
1039 sblock.fs_old_cpg * sizeof(int32_t), "%jd");
1040 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff +
1041 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd");
1042 }
1043 CHK(cgp->cg_freeoff, !=,
1044 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd");
1045 if (sblock.fs_contigsumsize == 0) {
1046 CHK(cgp->cg_nextfreeoff, !=,
1047 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd");
1048 } else {
1049 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag,
1050 "%jd");
1051 CHK(cgp->cg_clustersumoff, !=,
1052 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT),
1053 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd");
1054 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff +
1055 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd");
1056 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff +
1057 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT),
1058 "%jd");
1059 }
1060 if (error == 0)
1061 return (1);
1062 if (prevfailcg == cg)
1063 return (0);
1064 prevfailcg = cg;
1065 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg);
1066 printf("\n");
1067 return (0);
1068 }
1069
1070 void
rebuild_cg(int cg,struct bufarea * cgbp)1071 rebuild_cg(int cg, struct bufarea *cgbp)
1072 {
1073 struct cg *cgp = cgbp->b_un.b_cg;
1074 long start;
1075
1076 /*
1077 * Zero out the cylinder group and then initialize critical fields.
1078 * Bit maps and summaries will be recalculated by later passes.
1079 */
1080 memset(cgp, 0, (size_t)sblock.fs_cgsize);
1081 cgp->cg_magic = CG_MAGIC;
1082 cgp->cg_cgx = cg;
1083 cgp->cg_niblk = sblock.fs_ipg;
1084 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock));
1085 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size)
1086 cgp->cg_ndblk = sblock.fs_fpg;
1087 else
1088 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg);
1089 start = sizeof(*cgp);
1090 if (sblock.fs_magic == FS_UFS2_MAGIC) {
1091 cgp->cg_iusedoff = start;
1092 } else if (sblock.fs_magic == FS_UFS1_MAGIC) {
1093 cgp->cg_niblk = 0;
1094 cgp->cg_initediblk = 0;
1095 cgp->cg_old_ncyl = sblock.fs_old_cpg;
1096 cgp->cg_old_niblk = sblock.fs_ipg;
1097 cgp->cg_old_btotoff = start;
1098 cgp->cg_old_boff = cgp->cg_old_btotoff +
1099 sblock.fs_old_cpg * sizeof(int32_t);
1100 cgp->cg_iusedoff = cgp->cg_old_boff +
1101 sblock.fs_old_cpg * sizeof(u_int16_t);
1102 }
1103 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT);
1104 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT);
1105 if (sblock.fs_contigsumsize > 0) {
1106 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag;
1107 cgp->cg_clustersumoff =
1108 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t));
1109 cgp->cg_clustersumoff -= sizeof(u_int32_t);
1110 cgp->cg_clusteroff = cgp->cg_clustersumoff +
1111 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t);
1112 cgp->cg_nextfreeoff = cgp->cg_clusteroff +
1113 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT);
1114 }
1115 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize);
1116 cgdirty(cgbp);
1117 }
1118
1119 /*
1120 * allocate a data block with the specified number of fragments
1121 */
1122 ufs2_daddr_t
allocblk(long startcg,long frags,ufs2_daddr_t (* checkblkavail)(ufs2_daddr_t blkno,long frags))1123 allocblk(long startcg, long frags,
1124 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags))
1125 {
1126 ufs2_daddr_t blkno, newblk;
1127
1128 if (sujrecovery && checkblkavail == std_checkblkavail) {
1129 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n");
1130 return (0);
1131 }
1132 if (frags <= 0 || frags > sblock.fs_frag)
1133 return (0);
1134 for (blkno = MAX(cgdata(&sblock, startcg), 0);
1135 blkno < maxfsblock - sblock.fs_frag;
1136 blkno += sblock.fs_frag) {
1137 if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1138 continue;
1139 if (newblk > 0)
1140 return (newblk);
1141 if (newblk < 0)
1142 blkno = -newblk;
1143 }
1144 for (blkno = MAX(cgdata(&sblock, 0), 0);
1145 blkno < cgbase(&sblock, startcg) - sblock.fs_frag;
1146 blkno += sblock.fs_frag) {
1147 if ((newblk = (*checkblkavail)(blkno, frags)) == 0)
1148 continue;
1149 if (newblk > 0)
1150 return (newblk);
1151 if (newblk < 0)
1152 blkno = -newblk;
1153 }
1154 return (0);
1155 }
1156
1157 ufs2_daddr_t
std_checkblkavail(ufs2_daddr_t blkno,long frags)1158 std_checkblkavail(ufs2_daddr_t blkno, long frags)
1159 {
1160 struct bufarea *cgbp;
1161 struct cg *cgp;
1162 ufs2_daddr_t j, k, baseblk;
1163 long cg;
1164
1165 if ((u_int64_t)blkno > sblock.fs_size)
1166 return (0);
1167 for (j = 0; j <= sblock.fs_frag - frags; j++) {
1168 if (testbmap(blkno + j))
1169 continue;
1170 for (k = 1; k < frags; k++)
1171 if (testbmap(blkno + j + k))
1172 break;
1173 if (k < frags) {
1174 j += k;
1175 continue;
1176 }
1177 cg = dtog(&sblock, blkno + j);
1178 cgbp = cglookup(cg);
1179 cgp = cgbp->b_un.b_cg;
1180 if (!check_cgmagic(cg, cgbp))
1181 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag));
1182 baseblk = dtogd(&sblock, blkno + j);
1183 for (k = 0; k < frags; k++) {
1184 setbmap(blkno + j + k);
1185 clrbit(cg_blksfree(cgp), baseblk + k);
1186 }
1187 n_blks += frags;
1188 if (frags == sblock.fs_frag)
1189 cgp->cg_cs.cs_nbfree--;
1190 else
1191 cgp->cg_cs.cs_nffree -= frags;
1192 cgdirty(cgbp);
1193 return (blkno + j);
1194 }
1195 return (0);
1196 }
1197
1198 /*
1199 * Check whether a file size is within the limits for the filesystem.
1200 * Return 1 when valid and 0 when too big.
1201 *
1202 * This should match the file size limit in ffs_mountfs().
1203 */
1204 int
chkfilesize(mode_t mode,u_int64_t filesize)1205 chkfilesize(mode_t mode, u_int64_t filesize)
1206 {
1207 u_int64_t kernmaxfilesize;
1208
1209 if (sblock.fs_magic == FS_UFS1_MAGIC)
1210 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1;
1211 else
1212 kernmaxfilesize = sblock.fs_maxfilesize;
1213 if (filesize > kernmaxfilesize ||
1214 filesize > sblock.fs_maxfilesize ||
1215 (mode == IFDIR && filesize > MAXDIRSIZE)) {
1216 if (debug)
1217 printf("bad file size %ju:", (uintmax_t)filesize);
1218 return (0);
1219 }
1220 return (1);
1221 }
1222
1223 /*
1224 * Slow down IO so as to leave some disk bandwidth for other processes
1225 */
1226 void
slowio_start()1227 slowio_start()
1228 {
1229
1230 /* Delay one in every 8 operations */
1231 slowio_pollcnt = (slowio_pollcnt + 1) & 7;
1232 if (slowio_pollcnt == 0) {
1233 gettimeofday(&slowio_starttime, NULL);
1234 }
1235 }
1236
1237 void
slowio_end()1238 slowio_end()
1239 {
1240 struct timeval tv;
1241 int delay_usec;
1242
1243 if (slowio_pollcnt != 0)
1244 return;
1245
1246 /* Update the slowdown interval. */
1247 gettimeofday(&tv, NULL);
1248 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 +
1249 (tv.tv_usec - slowio_starttime.tv_usec);
1250 if (delay_usec < 64)
1251 delay_usec = 64;
1252 if (delay_usec > 2500000)
1253 delay_usec = 2500000;
1254 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6;
1255 /* delay by 8 times the average IO delay */
1256 if (slowio_delay_usec > 64)
1257 usleep(slowio_delay_usec * 8);
1258 }
1259
1260 /*
1261 * Find a pathname
1262 */
1263 void
getpathname(char * namebuf,ino_t curdir,ino_t ino)1264 getpathname(char *namebuf, ino_t curdir, ino_t ino)
1265 {
1266 int len;
1267 char *cp;
1268 struct inode ip;
1269 struct inodesc idesc;
1270 static int busy = 0;
1271
1272 if (curdir == ino && ino == UFS_ROOTINO) {
1273 (void)strcpy(namebuf, "/");
1274 return;
1275 }
1276 if (busy || !INO_IS_DVALID(curdir)) {
1277 (void)strcpy(namebuf, "?");
1278 return;
1279 }
1280 busy = 1;
1281 memset(&idesc, 0, sizeof(struct inodesc));
1282 idesc.id_type = DATA;
1283 idesc.id_fix = IGNORE;
1284 cp = &namebuf[MAXPATHLEN - 1];
1285 *cp = '\0';
1286 if (curdir != ino) {
1287 idesc.id_parent = curdir;
1288 goto namelookup;
1289 }
1290 while (ino != UFS_ROOTINO) {
1291 idesc.id_number = ino;
1292 idesc.id_func = findino;
1293 idesc.id_name = strdup("..");
1294 ginode(ino, &ip);
1295 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1296 irelse(&ip);
1297 free(idesc.id_name);
1298 break;
1299 }
1300 irelse(&ip);
1301 free(idesc.id_name);
1302 namelookup:
1303 idesc.id_number = idesc.id_parent;
1304 idesc.id_parent = ino;
1305 idesc.id_func = findname;
1306 idesc.id_name = namebuf;
1307 ginode(idesc.id_number, &ip);
1308 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) {
1309 irelse(&ip);
1310 break;
1311 }
1312 irelse(&ip);
1313 len = strlen(namebuf);
1314 cp -= len;
1315 memmove(cp, namebuf, (size_t)len);
1316 *--cp = '/';
1317 if (cp < &namebuf[UFS_MAXNAMLEN])
1318 break;
1319 ino = idesc.id_number;
1320 }
1321 busy = 0;
1322 if (ino != UFS_ROOTINO)
1323 *--cp = '?';
1324 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
1325 }
1326
1327 void
catch(int sig __unused)1328 catch(int sig __unused)
1329 {
1330
1331 ckfini(0);
1332 exit(12);
1333 }
1334
1335 /*
1336 * When preening, allow a single quit to signal
1337 * a special exit after file system checks complete
1338 * so that reboot sequence may be interrupted.
1339 */
1340 void
catchquit(int sig __unused)1341 catchquit(int sig __unused)
1342 {
1343 printf("returning to single-user after file system check\n");
1344 returntosingle = 1;
1345 (void)signal(SIGQUIT, SIG_DFL);
1346 }
1347
1348 /*
1349 * determine whether an inode should be fixed.
1350 */
1351 int
dofix(struct inodesc * idesc,const char * msg)1352 dofix(struct inodesc *idesc, const char *msg)
1353 {
1354
1355 switch (idesc->id_fix) {
1356
1357 case DONTKNOW:
1358 if (idesc->id_type == DATA)
1359 direrror(idesc->id_number, msg);
1360 else
1361 pwarn("%s", msg);
1362 if (preen) {
1363 printf(" (SALVAGED)\n");
1364 idesc->id_fix = FIX;
1365 return (ALTERED);
1366 }
1367 if (reply("SALVAGE") == 0) {
1368 idesc->id_fix = NOFIX;
1369 return (0);
1370 }
1371 idesc->id_fix = FIX;
1372 return (ALTERED);
1373
1374 case FIX:
1375 return (ALTERED);
1376
1377 case NOFIX:
1378 case IGNORE:
1379 return (0);
1380
1381 default:
1382 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
1383 }
1384 /* NOTREACHED */
1385 return (0);
1386 }
1387
1388 #include <stdarg.h>
1389
1390 /*
1391 * Print details about a buffer.
1392 */
1393 void
prtbuf(struct bufarea * bp,const char * fmt,...)1394 prtbuf(struct bufarea *bp, const char *fmt, ...)
1395 {
1396 va_list ap;
1397 va_start(ap, fmt);
1398 if (preen)
1399 (void)fprintf(stdout, "%s: ", cdevname);
1400 (void)vfprintf(stdout, fmt, ap);
1401 va_end(ap);
1402 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, "
1403 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno,
1404 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean",
1405 (intmax_t) bp->b_index);
1406 }
1407
1408 /*
1409 * An unexpected inconsistency occurred.
1410 * Die if preening or file system is running with soft dependency protocol,
1411 * otherwise just print message and continue.
1412 */
1413 void
pfatal(const char * fmt,...)1414 pfatal(const char *fmt, ...)
1415 {
1416 va_list ap;
1417 va_start(ap, fmt);
1418 if (!preen) {
1419 (void)vfprintf(stdout, fmt, ap);
1420 va_end(ap);
1421 if (usedsoftdep)
1422 (void)fprintf(stdout,
1423 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n");
1424 /*
1425 * Force foreground fsck to clean up inconsistency.
1426 */
1427 if (bkgrdflag) {
1428 cmd.value = FS_NEEDSFSCK;
1429 cmd.size = 1;
1430 if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1431 &cmd, sizeof cmd) == -1)
1432 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1433 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n");
1434 ckfini(0);
1435 exit(EEXIT);
1436 }
1437 return;
1438 }
1439 if (cdevname == NULL)
1440 cdevname = strdup("fsck");
1441 (void)fprintf(stdout, "%s: ", cdevname);
1442 (void)vfprintf(stdout, fmt, ap);
1443 (void)fprintf(stdout,
1444 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
1445 cdevname, usedsoftdep ? " SOFT UPDATE " : " ");
1446 /*
1447 * Force foreground fsck to clean up inconsistency.
1448 */
1449 if (bkgrdflag) {
1450 cmd.value = FS_NEEDSFSCK;
1451 cmd.size = 1;
1452 if (sysctlbyname("vfs.ffs.setflags", 0, 0,
1453 &cmd, sizeof cmd) == -1)
1454 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n");
1455 }
1456 ckfini(0);
1457 exit(EEXIT);
1458 }
1459
1460 /*
1461 * Pwarn just prints a message when not preening or running soft dependency
1462 * protocol, or a warning (preceded by filename) when preening.
1463 */
1464 void
pwarn(const char * fmt,...)1465 pwarn(const char *fmt, ...)
1466 {
1467 va_list ap;
1468 va_start(ap, fmt);
1469 if (preen)
1470 (void)fprintf(stdout, "%s: ", cdevname);
1471 (void)vfprintf(stdout, fmt, ap);
1472 va_end(ap);
1473 }
1474
1475 /*
1476 * Stub for routines from kernel.
1477 */
1478 void
panic(const char * fmt,...)1479 panic(const char *fmt, ...)
1480 {
1481 va_list ap;
1482 va_start(ap, fmt);
1483 pfatal("INTERNAL INCONSISTENCY:");
1484 (void)vfprintf(stdout, fmt, ap);
1485 va_end(ap);
1486 exit(EEXIT);
1487 }
1488