1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; 35 #endif /* not lint */ 36 #endif 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/time.h> 42 #include <sys/types.h> 43 #include <sys/sysctl.h> 44 #include <sys/disk.h> 45 #include <sys/disklabel.h> 46 #include <sys/ioctl.h> 47 #include <sys/stat.h> 48 49 #include <ufs/ufs/dinode.h> 50 #include <ufs/ufs/dir.h> 51 #include <ufs/ffs/fs.h> 52 53 #include <err.h> 54 #include <errno.h> 55 #include <string.h> 56 #include <ctype.h> 57 #include <fstab.h> 58 #include <stdint.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <time.h> 62 #include <unistd.h> 63 #include <libufs.h> 64 65 #include "fsck.h" 66 67 int sujrecovery = 0; 68 69 static struct bufarea *allocbuf(const char *); 70 static void cg_write(struct bufarea *); 71 static void slowio_start(void); 72 static void slowio_end(void); 73 static void printIOstats(void); 74 static void prtbuf(const char *, struct bufarea *); 75 76 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 77 static struct timespec startpass, finishpass; 78 struct timeval slowio_starttime; 79 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 80 int slowio_pollcnt; 81 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 82 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 83 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 84 static int numbufs; /* size of buffer cache */ 85 static int cachelookups; /* number of cache lookups */ 86 static int cachereads; /* number of cache reads */ 87 static struct bufarea *cgbufs; /* header for cylinder group cache */ 88 static int flushtries; /* number of tries to reclaim memory */ 89 90 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 91 92 void 93 fsutilinit(void) 94 { 95 diskreads = totaldiskreads = totalreads = 0; 96 bzero(&startpass, sizeof(struct timespec)); 97 bzero(&finishpass, sizeof(struct timespec)); 98 bzero(&slowio_starttime, sizeof(struct timeval)); 99 slowio_delay_usec = 10000; 100 slowio_pollcnt = 0; 101 flushtries = 0; 102 } 103 104 int 105 ftypeok(union dinode *dp) 106 { 107 switch (DIP(dp, di_mode) & IFMT) { 108 109 case IFDIR: 110 case IFREG: 111 case IFBLK: 112 case IFCHR: 113 case IFLNK: 114 case IFSOCK: 115 case IFIFO: 116 return (1); 117 118 default: 119 if (debug) 120 printf("bad file type 0%o\n", DIP(dp, di_mode)); 121 return (0); 122 } 123 } 124 125 int 126 reply(const char *question) 127 { 128 int persevere; 129 char c; 130 131 if (preen) 132 pfatal("INTERNAL ERROR: GOT TO reply()"); 133 persevere = !strcmp(question, "CONTINUE"); 134 printf("\n"); 135 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 136 printf("%s? no\n\n", question); 137 resolved = 0; 138 return (0); 139 } 140 if (yflag || (persevere && nflag)) { 141 printf("%s? yes\n\n", question); 142 return (1); 143 } 144 do { 145 printf("%s? [yn] ", question); 146 (void) fflush(stdout); 147 c = getc(stdin); 148 while (c != '\n' && getc(stdin) != '\n') { 149 if (feof(stdin)) { 150 resolved = 0; 151 return (0); 152 } 153 } 154 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 155 printf("\n"); 156 if (c == 'y' || c == 'Y') 157 return (1); 158 resolved = 0; 159 return (0); 160 } 161 162 /* 163 * Look up state information for an inode. 164 */ 165 struct inostat * 166 inoinfo(ino_t inum) 167 { 168 static struct inostat unallocated = { USTATE, 0, 0 }; 169 struct inostatlist *ilp; 170 int iloff; 171 172 if (inum > maxino) 173 errx(EEXIT, "inoinfo: inumber %ju out of range", 174 (uintmax_t)inum); 175 ilp = &inostathead[inum / sblock.fs_ipg]; 176 iloff = inum % sblock.fs_ipg; 177 if (iloff >= ilp->il_numalloced) 178 return (&unallocated); 179 return (&ilp->il_stat[iloff]); 180 } 181 182 /* 183 * Malloc buffers and set up cache. 184 */ 185 void 186 bufinit(void) 187 { 188 int i; 189 190 pdirbp = (struct bufarea *)0; 191 bzero(&cgblk, sizeof(struct bufarea)); 192 cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize); 193 if (cgblk.b_un.b_buf == NULL) 194 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 195 initbarea(&cgblk, BT_CYLGRP); 196 cgbufs = NULL; 197 numbufs = cachelookups = cachereads = 0; 198 TAILQ_INIT(&bufqueuehd); 199 for (i = 0; i < HASHSIZE; i++) 200 LIST_INIT(&bufhashhd[i]); 201 for (i = 0; i < BT_NUMBUFTYPES; i++) { 202 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 203 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 204 readcnt[i] = totalreadcnt[i] = 0; 205 } 206 } 207 208 static struct bufarea * 209 allocbuf(const char *failreason) 210 { 211 struct bufarea *bp; 212 char *bufp; 213 214 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 215 bufp = Malloc((unsigned int)sblock.fs_bsize); 216 if (bp == NULL || bufp == NULL) { 217 errx(EEXIT, "%s", failreason); 218 /* NOTREACHED */ 219 } 220 numbufs++; 221 bp->b_un.b_buf = bufp; 222 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 223 initbarea(bp, BT_UNKNOWN); 224 return (bp); 225 } 226 227 /* 228 * Manage cylinder group buffers. 229 * 230 * Use getblk() here rather than cgget() because the cylinder group 231 * may be corrupted but we want it anyway so we can fix it. 232 */ 233 static struct bufarea *cgbufs; /* header for cylinder group cache */ 234 static int flushtries; /* number of tries to reclaim memory */ 235 236 struct bufarea * 237 cglookup(int cg) 238 { 239 struct bufarea *cgbp; 240 struct cg *cgp; 241 242 if ((unsigned) cg >= sblock.fs_ncg) 243 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 244 if (cgbufs == NULL) { 245 cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); 246 if (cgbufs == NULL) 247 errx(EEXIT, "Cannot allocate cylinder group buffers"); 248 } 249 cgbp = &cgbufs[cg]; 250 if (cgbp->b_un.b_cg != NULL) 251 return (cgbp); 252 cgp = NULL; 253 if (flushtries == 0) 254 cgp = Malloc((unsigned int)sblock.fs_cgsize); 255 if (cgp == NULL) { 256 if (sujrecovery) 257 errx(EEXIT,"Ran out of memory during journal recovery"); 258 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 259 return (&cgblk); 260 } 261 cgbp->b_un.b_cg = cgp; 262 initbarea(cgbp, BT_CYLGRP); 263 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 264 return (cgbp); 265 } 266 267 /* 268 * Mark a cylinder group buffer as dirty. 269 * Update its check-hash if they are enabled. 270 */ 271 void 272 cgdirty(struct bufarea *cgbp) 273 { 274 struct cg *cg; 275 276 cg = cgbp->b_un.b_cg; 277 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 278 cg->cg_ckhash = 0; 279 cg->cg_ckhash = 280 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 281 } 282 dirty(cgbp); 283 } 284 285 /* 286 * Attempt to flush a cylinder group cache entry. 287 * Return whether the flush was successful. 288 */ 289 int 290 flushentry(void) 291 { 292 struct bufarea *cgbp; 293 294 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 295 return (0); 296 cgbp = &cgbufs[flushtries++]; 297 if (cgbp->b_un.b_cg == NULL) 298 return (0); 299 flush(fswritefd, cgbp); 300 free(cgbp->b_un.b_buf); 301 cgbp->b_un.b_buf = NULL; 302 return (1); 303 } 304 305 /* 306 * Manage a cache of directory blocks. 307 */ 308 struct bufarea * 309 getdatablk(ufs2_daddr_t blkno, long size, int type) 310 { 311 struct bufarea *bp; 312 struct bufhash *bhdp; 313 314 cachelookups++; 315 /* If out of range, return empty buffer with b_err == -1 */ 316 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 317 blkno = -1; 318 type = BT_EMPTY; 319 } 320 bhdp = &bufhashhd[HASH(blkno)]; 321 LIST_FOREACH(bp, bhdp, b_hash) 322 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 323 if (debug && bp->b_size != size) { 324 prtbuf("getdatablk: size mismatch", bp); 325 pfatal("getdatablk: b_size %d != size %ld\n", 326 bp->b_size, size); 327 } 328 goto foundit; 329 } 330 /* 331 * Move long-term busy buffer back to the front of the LRU so we 332 * do not endless inspect them for recycling. 333 */ 334 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 335 if (bp != NULL && bp->b_refcnt != 0) { 336 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 337 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 338 } 339 /* 340 * Allocate up to the minimum number of buffers before 341 * considering recycling any of them. 342 */ 343 if (size > sblock.fs_bsize) 344 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 345 sblock.fs_bsize); 346 if (numbufs < MINBUFS) { 347 bp = allocbuf("cannot create minimal buffer pool"); 348 } else if (sujrecovery) { 349 /* 350 * SUJ recovery does not want anything written until it 351 * has successfully completed (so it can fail back to 352 * full fsck). Thus, we can only recycle clean buffers. 353 */ 354 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 355 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 356 break; 357 if (bp == NULL) 358 bp = allocbuf("Ran out of memory during " 359 "journal recovery"); 360 else 361 LIST_REMOVE(bp, b_hash); 362 } else { 363 /* 364 * Recycle oldest non-busy buffer. 365 */ 366 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 367 if (bp->b_refcnt == 0) 368 break; 369 if (bp == NULL) 370 bp = allocbuf("Ran out of memory for buffers"); 371 else 372 LIST_REMOVE(bp, b_hash); 373 } 374 flush(fswritefd, bp); 375 bp->b_type = type; 376 LIST_INSERT_HEAD(bhdp, bp, b_hash); 377 getblk(bp, blkno, size); 378 cachereads++; 379 /* fall through */ 380 foundit: 381 if (debug && bp->b_type != type) { 382 printf("getdatablk: buffer type changed to %s", 383 BT_BUFTYPE(type)); 384 prtbuf("", bp); 385 } 386 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 387 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 388 if (bp->b_errs == 0) 389 bp->b_refcnt++; 390 return (bp); 391 } 392 393 void 394 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 395 { 396 ufs2_daddr_t dblk; 397 struct timespec start, finish; 398 399 dblk = fsbtodb(&sblock, blk); 400 if (bp->b_bno == dblk) { 401 totalreads++; 402 } else { 403 if (debug) { 404 readcnt[bp->b_type]++; 405 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 406 } 407 if (bp->b_type != BT_EMPTY) 408 bp->b_errs = 409 blread(fsreadfd, bp->b_un.b_buf, dblk, size); 410 else 411 bp->b_errs = -1; 412 if (debug) { 413 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 414 timespecsub(&finish, &start, &finish); 415 timespecadd(&readtime[bp->b_type], &finish, 416 &readtime[bp->b_type]); 417 } 418 bp->b_bno = dblk; 419 bp->b_size = size; 420 } 421 } 422 423 void 424 brelse(struct bufarea *bp) 425 { 426 427 if (bp->b_refcnt <= 0) 428 prtbuf("brelse: buffer with negative reference count", bp); 429 bp->b_refcnt--; 430 } 431 432 void 433 flush(int fd, struct bufarea *bp) 434 { 435 struct inode ip; 436 437 if ((bp->b_flags & B_DIRTY) == 0) 438 return; 439 bp->b_flags &= ~B_DIRTY; 440 if (fswritefd < 0) { 441 pfatal("WRITING IN READ_ONLY MODE.\n"); 442 return; 443 } 444 if (bp->b_errs != 0) 445 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 446 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 447 (long long)bp->b_bno); 448 bp->b_errs = 0; 449 /* 450 * Write using the appropriate function. 451 */ 452 switch (bp->b_type) { 453 case BT_SUPERBLK: 454 if (bp != &sblk) 455 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 456 bp, &sblk); 457 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 458 fsmodified = 1; 459 break; 460 case BT_CYLGRP: 461 if (sujrecovery) 462 cg_write(bp); 463 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 464 fsmodified = 1; 465 break; 466 case BT_INODES: 467 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 468 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 469 int i; 470 471 for (i = 0; i < INOPB(&sblock); dp++, i++) { 472 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 473 continue; 474 pwarn("flush: INODE CHECK-HASH FAILED"); 475 ip.i_bp = bp; 476 ip.i_dp = (union dinode *)dp; 477 ip.i_number = bp->b_index + i; 478 prtinode(&ip); 479 if (preen || reply("FIX") != 0) { 480 if (preen) 481 printf(" (FIXED)\n"); 482 ffs_update_dinode_ckhash(&sblock, dp); 483 inodirty(&ip); 484 } 485 } 486 } 487 /* FALLTHROUGH */ 488 default: 489 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 490 break; 491 } 492 } 493 494 /* 495 * Journaled soft updates does not maintain cylinder group summary 496 * information during cleanup, so this routine recalculates the summary 497 * information and updates the superblock summary in preparation for 498 * writing out the cylinder group. 499 */ 500 static void 501 cg_write(struct bufarea *bp) 502 { 503 ufs1_daddr_t fragno, cgbno, maxbno; 504 u_int8_t *blksfree; 505 struct cg *cgp; 506 int blk; 507 int i; 508 509 /* 510 * Fix the frag and cluster summary. 511 */ 512 cgp = bp->b_un.b_cg; 513 cgp->cg_cs.cs_nbfree = 0; 514 cgp->cg_cs.cs_nffree = 0; 515 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 516 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 517 if (sblock.fs_contigsumsize > 0) { 518 for (i = 1; i <= sblock.fs_contigsumsize; i++) 519 cg_clustersum(cgp)[i] = 0; 520 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 521 } 522 blksfree = cg_blksfree(cgp); 523 for (cgbno = 0; cgbno < maxbno; cgbno++) { 524 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 525 continue; 526 if (ffs_isblock(&sblock, blksfree, cgbno)) { 527 ffs_clusteracct(&sblock, cgp, cgbno, 1); 528 cgp->cg_cs.cs_nbfree++; 529 continue; 530 } 531 fragno = blkstofrags(&sblock, cgbno); 532 blk = blkmap(&sblock, blksfree, fragno); 533 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 534 for (i = 0; i < sblock.fs_frag; i++) 535 if (isset(blksfree, fragno + i)) 536 cgp->cg_cs.cs_nffree++; 537 } 538 /* 539 * Update the superblock cg summary from our now correct values 540 * before writing the block. 541 */ 542 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 543 } 544 545 void 546 rwerror(const char *mesg, ufs2_daddr_t blk) 547 { 548 549 if (bkgrdcheck) 550 exit(EEXIT); 551 if (preen == 0) 552 printf("\n"); 553 pfatal("CANNOT %s: %ld", mesg, (long)blk); 554 if (reply("CONTINUE") == 0) 555 exit(EEXIT); 556 } 557 558 void 559 ckfini(int markclean) 560 { 561 struct bufarea *bp, *nbp; 562 int ofsmodified, cnt; 563 564 if (bkgrdflag) { 565 unlink(snapname); 566 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 567 cmd.value = FS_UNCLEAN; 568 cmd.size = markclean ? -1 : 1; 569 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 570 &cmd, sizeof cmd) == -1) 571 rwerror("SET FILE SYSTEM FLAGS", FS_UNCLEAN); 572 if (!preen) { 573 printf("\n***** FILE SYSTEM MARKED %s *****\n", 574 markclean ? "CLEAN" : "DIRTY"); 575 if (!markclean) 576 rerun = 1; 577 } 578 } else if (!preen && !markclean) { 579 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 580 rerun = 1; 581 } 582 } 583 if (debug && totalreads > 0) 584 printf("cache with %d buffers missed %d of %d (%d%%)\n", 585 numbufs, cachereads, cachelookups, 586 (int)(cachereads * 100 / cachelookups)); 587 if (fswritefd < 0) { 588 (void)close(fsreadfd); 589 return; 590 } 591 /* 592 * To remain idempotent with partial truncations the buffers 593 * must be flushed in this order: 594 * 1) cylinder groups (bitmaps) 595 * 2) indirect, directory, external attribute, and data blocks 596 * 3) inode blocks 597 * 4) superblock 598 * This ordering preserves access to the modified pointers 599 * until they are freed. 600 */ 601 /* Step 1: cylinder groups */ 602 if (debug) 603 printf("Flush Cylinder groups\n"); 604 if (cgbufs != NULL) { 605 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 606 if (cgbufs[cnt].b_un.b_cg == NULL) 607 continue; 608 flush(fswritefd, &cgbufs[cnt]); 609 free(cgbufs[cnt].b_un.b_cg); 610 } 611 free(cgbufs); 612 } 613 flush(fswritefd, &cgblk); 614 free(cgblk.b_un.b_buf); 615 cnt = 0; 616 /* Step 2: indirect, directory, external attribute, and data blocks */ 617 if (debug) 618 printf("Flush indirect, directory, external attribute, " 619 "and data blocks\n"); 620 if (pdirbp != NULL) 621 brelse(pdirbp); 622 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 623 switch (bp->b_type) { 624 /* These should not be in the buffer cache list */ 625 case BT_UNKNOWN: 626 case BT_SUPERBLK: 627 case BT_CYLGRP: 628 default: 629 prtbuf("ckfini: improper buffer type on cache list",bp); 630 continue; 631 /* These are the ones to flush in this step */ 632 case BT_EMPTY: 633 if (bp->b_bno >= 0) 634 pfatal("Unused BT_EMPTY buffer for block %jd\n", 635 (intmax_t)bp->b_bno); 636 /* FALLTHROUGH */ 637 case BT_LEVEL1: 638 case BT_LEVEL2: 639 case BT_LEVEL3: 640 case BT_EXTATTR: 641 case BT_DIRDATA: 642 case BT_DATA: 643 break; 644 /* These are the ones to flush in the next step */ 645 case BT_INODES: 646 continue; 647 } 648 if (debug && bp->b_refcnt != 0) { 649 prtbuf("ckfini: clearing in-use buffer", bp); 650 pfatal("ckfini: clearing in-use buffer\n"); 651 } 652 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 653 cnt++; 654 flush(fswritefd, bp); 655 free(bp->b_un.b_buf); 656 free((char *)bp); 657 } 658 /* Step 3: inode blocks */ 659 if (debug) 660 printf("Flush inode blocks\n"); 661 if (icachebp != NULL) 662 brelse(icachebp); 663 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 664 if (debug && bp->b_refcnt != 0) { 665 prtbuf("ckfini: clearing in-use buffer", bp); 666 pfatal("ckfini: clearing in-use buffer\n"); 667 } 668 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 669 cnt++; 670 flush(fswritefd, bp); 671 free(bp->b_un.b_buf); 672 free((char *)bp); 673 } 674 if (numbufs != cnt) 675 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 676 /* Step 4: superblock */ 677 if (debug) 678 printf("Flush the superblock\n"); 679 flush(fswritefd, &sblk); 680 if (havesb && cursnapshot == 0 && sblock.fs_magic == FS_UFS2_MAGIC && 681 sblk.b_bno != sblock.fs_sblockloc / dev_bsize && 682 !preen && reply("UPDATE STANDARD SUPERBLOCK")) { 683 /* Change the write destination to standard superblock */ 684 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 685 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 686 sbdirty(); 687 flush(fswritefd, &sblk); 688 } 689 pdirbp = (struct bufarea *)0; 690 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 691 if ((sblock.fs_clean = markclean) != 0) { 692 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 693 sblock.fs_pendingblocks = 0; 694 sblock.fs_pendinginodes = 0; 695 } 696 sbdirty(); 697 ofsmodified = fsmodified; 698 flush(fswritefd, &sblk); 699 fsmodified = ofsmodified; 700 if (!preen) { 701 printf("\n***** FILE SYSTEM MARKED %s *****\n", 702 markclean ? "CLEAN" : "DIRTY"); 703 if (!markclean) 704 rerun = 1; 705 } 706 } else if (!preen) { 707 if (markclean) { 708 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 709 } else { 710 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 711 rerun = 1; 712 } 713 } 714 finalIOstats(); 715 (void)close(fsreadfd); 716 (void)close(fswritefd); 717 } 718 719 /* 720 * Print out I/O statistics. 721 */ 722 void 723 IOstats(char *what) 724 { 725 int i; 726 727 if (debug == 0) 728 return; 729 if (diskreads == 0) { 730 printf("%s: no I/O\n\n", what); 731 return; 732 } 733 if (startpass.tv_sec == 0) 734 startpass = startprog; 735 printf("%s: I/O statistics\n", what); 736 printIOstats(); 737 totaldiskreads += diskreads; 738 diskreads = 0; 739 for (i = 0; i < BT_NUMBUFTYPES; i++) { 740 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 741 totalreadcnt[i] += readcnt[i]; 742 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 743 readcnt[i] = 0; 744 } 745 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 746 } 747 748 void 749 finalIOstats(void) 750 { 751 int i; 752 753 if (debug == 0) 754 return; 755 printf("Final I/O statistics\n"); 756 totaldiskreads += diskreads; 757 diskreads = totaldiskreads; 758 startpass = startprog; 759 for (i = 0; i < BT_NUMBUFTYPES; i++) { 760 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 761 totalreadcnt[i] += readcnt[i]; 762 readtime[i] = totalreadtime[i]; 763 readcnt[i] = totalreadcnt[i]; 764 } 765 printIOstats(); 766 } 767 768 static void printIOstats(void) 769 { 770 long long msec, totalmsec; 771 int i; 772 773 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 774 timespecsub(&finishpass, &startpass, &finishpass); 775 printf("Running time: %jd.%03ld sec\n", 776 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 777 printf("buffer reads by type:\n"); 778 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 779 totalmsec += readtime[i].tv_sec * 1000 + 780 readtime[i].tv_nsec / 1000000; 781 if (totalmsec == 0) 782 totalmsec = 1; 783 for (i = 0; i < BT_NUMBUFTYPES; i++) { 784 if (readcnt[i] == 0) 785 continue; 786 msec = 787 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 788 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 789 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 790 (readcnt[i] * 1000 / diskreads) % 10, 791 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 792 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 793 } 794 printf("\n"); 795 } 796 797 int 798 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 799 { 800 char *cp; 801 int i, errs; 802 off_t offset; 803 804 offset = blk; 805 offset *= dev_bsize; 806 if (bkgrdflag) 807 slowio_start(); 808 totalreads++; 809 diskreads++; 810 if (pread(fd, buf, (int)size, offset) == size) { 811 if (bkgrdflag) 812 slowio_end(); 813 return (0); 814 } 815 816 /* 817 * This is handled specially here instead of in rwerror because 818 * rwerror is used for all sorts of errors, not just true read/write 819 * errors. It should be refactored and fixed. 820 */ 821 if (surrender) { 822 pfatal("CANNOT READ_BLK: %ld", (long)blk); 823 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 824 } else 825 rwerror("READ BLK", blk); 826 827 errs = 0; 828 memset(buf, 0, (size_t)size); 829 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 830 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 831 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 832 if (secsize != dev_bsize && dev_bsize != 1) 833 printf(" %jd (%jd),", 834 (intmax_t)(blk * dev_bsize + i) / secsize, 835 (intmax_t)blk + i / dev_bsize); 836 else 837 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 838 errs++; 839 } 840 } 841 printf("\n"); 842 if (errs) 843 resolved = 0; 844 return (errs); 845 } 846 847 void 848 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 849 { 850 int i; 851 char *cp; 852 off_t offset; 853 854 if (fd < 0) 855 return; 856 offset = blk; 857 offset *= dev_bsize; 858 if (pwrite(fd, buf, size, offset) == size) { 859 fsmodified = 1; 860 return; 861 } 862 resolved = 0; 863 rwerror("WRITE BLK", blk); 864 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 865 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 866 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 867 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 868 printf("\n"); 869 return; 870 } 871 872 void 873 blerase(int fd, ufs2_daddr_t blk, long size) 874 { 875 off_t ioarg[2]; 876 877 if (fd < 0) 878 return; 879 ioarg[0] = blk * dev_bsize; 880 ioarg[1] = size; 881 ioctl(fd, DIOCGDELETE, ioarg); 882 /* we don't really care if we succeed or not */ 883 return; 884 } 885 886 /* 887 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 888 * definition a multiple of dev_bsize. 889 */ 890 void 891 blzero(int fd, ufs2_daddr_t blk, long size) 892 { 893 static char *zero; 894 off_t offset, len; 895 896 if (fd < 0) 897 return; 898 if (zero == NULL) { 899 zero = calloc(ZEROBUFSIZE, 1); 900 if (zero == NULL) 901 errx(EEXIT, "cannot allocate buffer pool"); 902 } 903 offset = blk * dev_bsize; 904 if (lseek(fd, offset, 0) < 0) 905 rwerror("SEEK BLK", blk); 906 while (size > 0) { 907 len = MIN(ZEROBUFSIZE, size); 908 if (write(fd, zero, len) != len) 909 rwerror("WRITE BLK", blk); 910 blk += len / dev_bsize; 911 size -= len; 912 } 913 } 914 915 /* 916 * Verify cylinder group's magic number and other parameters. If the 917 * test fails, offer an option to rebuild the whole cylinder group. 918 */ 919 int 920 check_cgmagic(int cg, struct bufarea *cgbp, int request_rebuild) 921 { 922 struct cg *cgp = cgbp->b_un.b_cg; 923 uint32_t cghash, calchash; 924 925 /* 926 * Extended cylinder group checks. 927 */ 928 calchash = cgp->cg_ckhash; 929 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 930 cghash = cgp->cg_ckhash; 931 cgp->cg_ckhash = 0; 932 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 933 cgp->cg_ckhash = cghash; 934 } 935 if (cgp->cg_ckhash == calchash && 936 cg_chkmagic(cgp) && 937 cgp->cg_cgx == cg && 938 ((sblock.fs_magic == FS_UFS1_MAGIC && 939 cgp->cg_old_niblk == sblock.fs_ipg && 940 cgp->cg_ndblk <= sblock.fs_fpg && 941 cgp->cg_old_ncyl <= sblock.fs_old_cpg) || 942 (sblock.fs_magic == FS_UFS2_MAGIC && 943 cgp->cg_niblk == sblock.fs_ipg && 944 cgp->cg_ndblk <= sblock.fs_fpg && 945 cgp->cg_initediblk <= sblock.fs_ipg))) { 946 return (1); 947 } 948 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 949 if (!request_rebuild) 950 return (0); 951 if (!reply("REBUILD CYLINDER GROUP")) { 952 printf("YOU WILL NEED TO RERUN FSCK.\n"); 953 rerun = 1; 954 return (1); 955 } 956 /* 957 * Zero out the cylinder group and then initialize critical fields. 958 * Bit maps and summaries will be recalculated by later passes. 959 */ 960 memset(cgp, 0, (size_t)sblock.fs_cgsize); 961 cgp->cg_magic = CG_MAGIC; 962 cgp->cg_cgx = cg; 963 cgp->cg_niblk = sblock.fs_ipg; 964 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 965 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 966 cgp->cg_ndblk = sblock.fs_fpg; 967 else 968 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 969 cgp->cg_iusedoff = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); 970 if (sblock.fs_magic == FS_UFS1_MAGIC) { 971 cgp->cg_niblk = 0; 972 cgp->cg_initediblk = 0; 973 cgp->cg_old_ncyl = sblock.fs_old_cpg; 974 cgp->cg_old_niblk = sblock.fs_ipg; 975 cgp->cg_old_btotoff = cgp->cg_iusedoff; 976 cgp->cg_old_boff = cgp->cg_old_btotoff + 977 sblock.fs_old_cpg * sizeof(int32_t); 978 cgp->cg_iusedoff = cgp->cg_old_boff + 979 sblock.fs_old_cpg * sizeof(u_int16_t); 980 } 981 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 982 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 983 if (sblock.fs_contigsumsize > 0) { 984 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 985 cgp->cg_clustersumoff = 986 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 987 cgp->cg_clustersumoff -= sizeof(u_int32_t); 988 cgp->cg_clusteroff = cgp->cg_clustersumoff + 989 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 990 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 991 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 992 } 993 cgdirty(cgbp); 994 return (0); 995 } 996 997 /* 998 * allocate a data block with the specified number of fragments 999 */ 1000 ufs2_daddr_t 1001 allocblk(long frags) 1002 { 1003 int i, j, k, cg, baseblk; 1004 struct bufarea *cgbp; 1005 struct cg *cgp; 1006 1007 if (frags <= 0 || frags > sblock.fs_frag) 1008 return (0); 1009 for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { 1010 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1011 if (testbmap(i + j)) 1012 continue; 1013 for (k = 1; k < frags; k++) 1014 if (testbmap(i + j + k)) 1015 break; 1016 if (k < frags) { 1017 j += k; 1018 continue; 1019 } 1020 cg = dtog(&sblock, i + j); 1021 cgbp = cglookup(cg); 1022 cgp = cgbp->b_un.b_cg; 1023 if (!check_cgmagic(cg, cgbp, 0)) 1024 return (0); 1025 baseblk = dtogd(&sblock, i + j); 1026 for (k = 0; k < frags; k++) { 1027 setbmap(i + j + k); 1028 clrbit(cg_blksfree(cgp), baseblk + k); 1029 } 1030 n_blks += frags; 1031 if (frags == sblock.fs_frag) 1032 cgp->cg_cs.cs_nbfree--; 1033 else 1034 cgp->cg_cs.cs_nffree -= frags; 1035 cgdirty(cgbp); 1036 return (i + j); 1037 } 1038 } 1039 return (0); 1040 } 1041 1042 /* 1043 * Slow down IO so as to leave some disk bandwidth for other processes 1044 */ 1045 void 1046 slowio_start() 1047 { 1048 1049 /* Delay one in every 8 operations */ 1050 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1051 if (slowio_pollcnt == 0) { 1052 gettimeofday(&slowio_starttime, NULL); 1053 } 1054 } 1055 1056 void 1057 slowio_end() 1058 { 1059 struct timeval tv; 1060 int delay_usec; 1061 1062 if (slowio_pollcnt != 0) 1063 return; 1064 1065 /* Update the slowdown interval. */ 1066 gettimeofday(&tv, NULL); 1067 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1068 (tv.tv_usec - slowio_starttime.tv_usec); 1069 if (delay_usec < 64) 1070 delay_usec = 64; 1071 if (delay_usec > 2500000) 1072 delay_usec = 2500000; 1073 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1074 /* delay by 8 times the average IO delay */ 1075 if (slowio_delay_usec > 64) 1076 usleep(slowio_delay_usec * 8); 1077 } 1078 1079 /* 1080 * Find a pathname 1081 */ 1082 void 1083 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1084 { 1085 int len; 1086 char *cp; 1087 struct inode ip; 1088 struct inodesc idesc; 1089 static int busy = 0; 1090 1091 if (curdir == ino && ino == UFS_ROOTINO) { 1092 (void)strcpy(namebuf, "/"); 1093 return; 1094 } 1095 if (busy || !INO_IS_DVALID(curdir)) { 1096 (void)strcpy(namebuf, "?"); 1097 return; 1098 } 1099 busy = 1; 1100 memset(&idesc, 0, sizeof(struct inodesc)); 1101 idesc.id_type = DATA; 1102 idesc.id_fix = IGNORE; 1103 cp = &namebuf[MAXPATHLEN - 1]; 1104 *cp = '\0'; 1105 if (curdir != ino) { 1106 idesc.id_parent = curdir; 1107 goto namelookup; 1108 } 1109 while (ino != UFS_ROOTINO) { 1110 idesc.id_number = ino; 1111 idesc.id_func = findino; 1112 idesc.id_name = strdup(".."); 1113 ginode(ino, &ip); 1114 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1115 irelse(&ip); 1116 break; 1117 } 1118 irelse(&ip); 1119 namelookup: 1120 idesc.id_number = idesc.id_parent; 1121 idesc.id_parent = ino; 1122 idesc.id_func = findname; 1123 idesc.id_name = namebuf; 1124 ginode(idesc.id_number, &ip); 1125 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1126 irelse(&ip); 1127 break; 1128 } 1129 irelse(&ip); 1130 len = strlen(namebuf); 1131 cp -= len; 1132 memmove(cp, namebuf, (size_t)len); 1133 *--cp = '/'; 1134 if (cp < &namebuf[UFS_MAXNAMLEN]) 1135 break; 1136 ino = idesc.id_number; 1137 } 1138 busy = 0; 1139 if (ino != UFS_ROOTINO) 1140 *--cp = '?'; 1141 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1142 } 1143 1144 void 1145 catch(int sig __unused) 1146 { 1147 1148 ckfini(0); 1149 exit(12); 1150 } 1151 1152 /* 1153 * When preening, allow a single quit to signal 1154 * a special exit after file system checks complete 1155 * so that reboot sequence may be interrupted. 1156 */ 1157 void 1158 catchquit(int sig __unused) 1159 { 1160 printf("returning to single-user after file system check\n"); 1161 returntosingle = 1; 1162 (void)signal(SIGQUIT, SIG_DFL); 1163 } 1164 1165 /* 1166 * determine whether an inode should be fixed. 1167 */ 1168 int 1169 dofix(struct inodesc *idesc, const char *msg) 1170 { 1171 1172 switch (idesc->id_fix) { 1173 1174 case DONTKNOW: 1175 if (idesc->id_type == DATA) 1176 direrror(idesc->id_number, msg); 1177 else 1178 pwarn("%s", msg); 1179 if (preen) { 1180 printf(" (SALVAGED)\n"); 1181 idesc->id_fix = FIX; 1182 return (ALTERED); 1183 } 1184 if (reply("SALVAGE") == 0) { 1185 idesc->id_fix = NOFIX; 1186 return (0); 1187 } 1188 idesc->id_fix = FIX; 1189 return (ALTERED); 1190 1191 case FIX: 1192 return (ALTERED); 1193 1194 case NOFIX: 1195 case IGNORE: 1196 return (0); 1197 1198 default: 1199 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1200 } 1201 /* NOTREACHED */ 1202 return (0); 1203 } 1204 1205 #include <stdarg.h> 1206 1207 /* 1208 * Print details about a buffer. 1209 */ 1210 static void 1211 prtbuf(const char *msg, struct bufarea *bp) 1212 { 1213 1214 printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1215 "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1216 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1217 (intmax_t) bp->b_index); 1218 } 1219 1220 /* 1221 * An unexpected inconsistency occurred. 1222 * Die if preening or file system is running with soft dependency protocol, 1223 * otherwise just print message and continue. 1224 */ 1225 void 1226 pfatal(const char *fmt, ...) 1227 { 1228 va_list ap; 1229 va_start(ap, fmt); 1230 if (!preen) { 1231 (void)vfprintf(stdout, fmt, ap); 1232 va_end(ap); 1233 if (usedsoftdep) 1234 (void)fprintf(stdout, 1235 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1236 /* 1237 * Force foreground fsck to clean up inconsistency. 1238 */ 1239 if (bkgrdflag) { 1240 cmd.value = FS_NEEDSFSCK; 1241 cmd.size = 1; 1242 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1243 &cmd, sizeof cmd) == -1) 1244 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1245 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1246 ckfini(0); 1247 exit(EEXIT); 1248 } 1249 return; 1250 } 1251 if (cdevname == NULL) 1252 cdevname = strdup("fsck"); 1253 (void)fprintf(stdout, "%s: ", cdevname); 1254 (void)vfprintf(stdout, fmt, ap); 1255 (void)fprintf(stdout, 1256 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1257 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1258 /* 1259 * Force foreground fsck to clean up inconsistency. 1260 */ 1261 if (bkgrdflag) { 1262 cmd.value = FS_NEEDSFSCK; 1263 cmd.size = 1; 1264 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1265 &cmd, sizeof cmd) == -1) 1266 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1267 } 1268 ckfini(0); 1269 exit(EEXIT); 1270 } 1271 1272 /* 1273 * Pwarn just prints a message when not preening or running soft dependency 1274 * protocol, or a warning (preceded by filename) when preening. 1275 */ 1276 void 1277 pwarn(const char *fmt, ...) 1278 { 1279 va_list ap; 1280 va_start(ap, fmt); 1281 if (preen) 1282 (void)fprintf(stdout, "%s: ", cdevname); 1283 (void)vfprintf(stdout, fmt, ap); 1284 va_end(ap); 1285 } 1286 1287 /* 1288 * Stub for routines from kernel. 1289 */ 1290 void 1291 panic(const char *fmt, ...) 1292 { 1293 va_list ap; 1294 va_start(ap, fmt); 1295 pfatal("INTERNAL INCONSISTENCY:"); 1296 (void)vfprintf(stdout, fmt, ap); 1297 va_end(ap); 1298 exit(EEXIT); 1299 } 1300