1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; 35 #endif /* not lint */ 36 #endif 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/time.h> 42 #include <sys/types.h> 43 #include <sys/sysctl.h> 44 #include <sys/disk.h> 45 #include <sys/disklabel.h> 46 #include <sys/ioctl.h> 47 #include <sys/stat.h> 48 49 #include <ufs/ufs/dinode.h> 50 #include <ufs/ufs/dir.h> 51 #include <ufs/ffs/fs.h> 52 53 #include <err.h> 54 #include <errno.h> 55 #include <string.h> 56 #include <ctype.h> 57 #include <fstab.h> 58 #include <stdint.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <time.h> 62 #include <unistd.h> 63 #include <libufs.h> 64 65 #include "fsck.h" 66 67 int sujrecovery = 0; 68 69 static struct bufarea *allocbuf(const char *); 70 static void cg_write(struct bufarea *); 71 static void slowio_start(void); 72 static void slowio_end(void); 73 static void printIOstats(void); 74 75 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 76 static struct timespec startpass, finishpass; 77 struct timeval slowio_starttime; 78 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 79 int slowio_pollcnt; 80 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 81 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 82 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 83 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 84 static struct bufhash freebufs; /* unused buffers */ 85 static int numbufs; /* size of buffer cache */ 86 static int cachelookups; /* number of cache lookups */ 87 static int cachereads; /* number of cache reads */ 88 static int flushtries; /* number of tries to reclaim memory */ 89 90 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 91 92 void 93 fsutilinit(void) 94 { 95 diskreads = totaldiskreads = totalreads = 0; 96 bzero(&startpass, sizeof(struct timespec)); 97 bzero(&finishpass, sizeof(struct timespec)); 98 bzero(&slowio_starttime, sizeof(struct timeval)); 99 slowio_delay_usec = 10000; 100 slowio_pollcnt = 0; 101 flushtries = 0; 102 } 103 104 int 105 ftypeok(union dinode *dp) 106 { 107 switch (DIP(dp, di_mode) & IFMT) { 108 109 case IFDIR: 110 case IFREG: 111 case IFBLK: 112 case IFCHR: 113 case IFLNK: 114 case IFSOCK: 115 case IFIFO: 116 return (1); 117 118 default: 119 if (debug) 120 printf("bad file type 0%o\n", DIP(dp, di_mode)); 121 return (0); 122 } 123 } 124 125 int 126 reply(const char *question) 127 { 128 int persevere; 129 char c; 130 131 if (preen) 132 pfatal("INTERNAL ERROR: GOT TO reply()"); 133 persevere = strcmp(question, "CONTINUE") == 0 || 134 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 135 printf("\n"); 136 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 137 printf("%s? no\n\n", question); 138 resolved = 0; 139 return (0); 140 } 141 if (yflag || (persevere && nflag)) { 142 printf("%s? yes\n\n", question); 143 return (1); 144 } 145 do { 146 printf("%s? [yn] ", question); 147 (void) fflush(stdout); 148 c = getc(stdin); 149 while (c != '\n' && getc(stdin) != '\n') { 150 if (feof(stdin)) { 151 resolved = 0; 152 return (0); 153 } 154 } 155 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 156 printf("\n"); 157 if (c == 'y' || c == 'Y') 158 return (1); 159 resolved = 0; 160 return (0); 161 } 162 163 /* 164 * Look up state information for an inode. 165 */ 166 struct inostat * 167 inoinfo(ino_t inum) 168 { 169 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 170 struct inostatlist *ilp; 171 int iloff; 172 173 if (inum > maxino) 174 errx(EEXIT, "inoinfo: inumber %ju out of range", 175 (uintmax_t)inum); 176 ilp = &inostathead[inum / sblock.fs_ipg]; 177 iloff = inum % sblock.fs_ipg; 178 if (iloff >= ilp->il_numalloced) 179 return (&unallocated); 180 return (&ilp->il_stat[iloff]); 181 } 182 183 /* 184 * Malloc buffers and set up cache. 185 */ 186 void 187 bufinit(void) 188 { 189 int i; 190 191 initbarea(&failedbuf, BT_UNKNOWN); 192 failedbuf.b_errs = -1; 193 failedbuf.b_un.b_buf = NULL; 194 if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) 195 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 196 initbarea(&cgblk, BT_CYLGRP); 197 numbufs = cachelookups = cachereads = 0; 198 TAILQ_INIT(&bufqueuehd); 199 LIST_INIT(&freebufs); 200 for (i = 0; i < HASHSIZE; i++) 201 LIST_INIT(&bufhashhd[i]); 202 for (i = 0; i < BT_NUMBUFTYPES; i++) { 203 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 204 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 205 readcnt[i] = totalreadcnt[i] = 0; 206 } 207 } 208 209 static struct bufarea * 210 allocbuf(const char *failreason) 211 { 212 struct bufarea *bp; 213 char *bufp; 214 215 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 216 bufp = Malloc((unsigned int)sblock.fs_bsize); 217 if (bp == NULL || bufp == NULL) { 218 errx(EEXIT, "%s", failreason); 219 /* NOTREACHED */ 220 } 221 numbufs++; 222 bp->b_un.b_buf = bufp; 223 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 224 initbarea(bp, BT_UNKNOWN); 225 return (bp); 226 } 227 228 /* 229 * Manage cylinder group buffers. 230 * 231 * Use getblk() here rather than cgget() because the cylinder group 232 * may be corrupted but we want it anyway so we can fix it. 233 */ 234 static struct bufarea *cgbufs; /* header for cylinder group cache */ 235 static int flushtries; /* number of tries to reclaim memory */ 236 237 struct bufarea * 238 cglookup(int cg) 239 { 240 struct bufarea *cgbp; 241 struct cg *cgp; 242 243 if ((unsigned) cg >= sblock.fs_ncg) 244 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 245 if (cgbufs == NULL) { 246 cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); 247 if (cgbufs == NULL) 248 errx(EEXIT, "Cannot allocate cylinder group buffers"); 249 } 250 cgbp = &cgbufs[cg]; 251 if (cgbp->b_un.b_cg != NULL) 252 return (cgbp); 253 cgp = NULL; 254 if (flushtries == 0) 255 cgp = Malloc((unsigned int)sblock.fs_cgsize); 256 if (cgp == NULL) { 257 if (sujrecovery) 258 errx(EEXIT,"Ran out of memory during journal recovery"); 259 flush(fswritefd, &cgblk); 260 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 261 return (&cgblk); 262 } 263 cgbp->b_un.b_cg = cgp; 264 initbarea(cgbp, BT_CYLGRP); 265 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 266 return (cgbp); 267 } 268 269 /* 270 * Mark a cylinder group buffer as dirty. 271 * Update its check-hash if they are enabled. 272 */ 273 void 274 cgdirty(struct bufarea *cgbp) 275 { 276 struct cg *cg; 277 278 cg = cgbp->b_un.b_cg; 279 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 280 cg->cg_ckhash = 0; 281 cg->cg_ckhash = 282 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 283 } 284 dirty(cgbp); 285 } 286 287 /* 288 * Attempt to flush a cylinder group cache entry. 289 * Return whether the flush was successful. 290 */ 291 int 292 flushentry(void) 293 { 294 struct bufarea *cgbp; 295 296 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 297 return (0); 298 cgbp = &cgbufs[flushtries++]; 299 if (cgbp->b_un.b_cg == NULL) 300 return (0); 301 flush(fswritefd, cgbp); 302 free(cgbp->b_un.b_buf); 303 cgbp->b_un.b_buf = NULL; 304 return (1); 305 } 306 307 /* 308 * Manage a cache of filesystem disk blocks. 309 */ 310 struct bufarea * 311 getdatablk(ufs2_daddr_t blkno, long size, int type) 312 { 313 struct bufarea *bp; 314 struct bufhash *bhdp; 315 316 cachelookups++; 317 /* 318 * If out of range, return empty buffer with b_err == -1 319 * 320 * Skip check for inodes because chkrange() considers 321 * metadata areas invalid to write data. 322 */ 323 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) 324 return (&failedbuf); 325 bhdp = &bufhashhd[HASH(blkno)]; 326 LIST_FOREACH(bp, bhdp, b_hash) 327 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 328 if (debug && bp->b_size != size) { 329 prtbuf(bp, "getdatablk: size mismatch"); 330 pfatal("getdatablk: b_size %d != size %ld\n", 331 bp->b_size, size); 332 } 333 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 334 goto foundit; 335 } 336 /* 337 * Move long-term busy buffer back to the front of the LRU so we 338 * do not endless inspect them for recycling. 339 */ 340 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 341 if (bp != NULL && bp->b_refcnt != 0) { 342 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 343 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 344 } 345 /* 346 * Allocate up to the minimum number of buffers before 347 * considering recycling any of them. 348 */ 349 if (size > sblock.fs_bsize) 350 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 351 sblock.fs_bsize); 352 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 353 LIST_REMOVE(bp, b_hash); 354 } else if (numbufs < MINBUFS) { 355 bp = allocbuf("cannot create minimal buffer pool"); 356 } else if (sujrecovery) { 357 /* 358 * SUJ recovery does not want anything written until it 359 * has successfully completed (so it can fail back to 360 * full fsck). Thus, we can only recycle clean buffers. 361 */ 362 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 363 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 364 break; 365 if (bp == NULL) 366 bp = allocbuf("Ran out of memory during " 367 "journal recovery"); 368 else 369 LIST_REMOVE(bp, b_hash); 370 } else { 371 /* 372 * Recycle oldest non-busy buffer. 373 */ 374 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 375 if (bp->b_refcnt == 0) 376 break; 377 if (bp == NULL) 378 bp = allocbuf("Ran out of memory for buffers"); 379 else 380 LIST_REMOVE(bp, b_hash); 381 } 382 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 383 flush(fswritefd, bp); 384 bp->b_type = type; 385 LIST_INSERT_HEAD(bhdp, bp, b_hash); 386 getblk(bp, blkno, size); 387 cachereads++; 388 /* fall through */ 389 foundit: 390 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 391 if (debug && bp->b_type != type) { 392 printf("getdatablk: buffer type changed to %s", 393 BT_BUFTYPE(type)); 394 prtbuf(bp, ""); 395 } 396 if (bp->b_errs == 0) 397 bp->b_refcnt++; 398 return (bp); 399 } 400 401 void 402 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 403 { 404 ufs2_daddr_t dblk; 405 struct timespec start, finish; 406 407 dblk = fsbtodb(&sblock, blk); 408 if (bp->b_bno == dblk) { 409 totalreads++; 410 } else { 411 if (debug) { 412 readcnt[bp->b_type]++; 413 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 414 } 415 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 416 if (debug) { 417 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 418 timespecsub(&finish, &start, &finish); 419 timespecadd(&readtime[bp->b_type], &finish, 420 &readtime[bp->b_type]); 421 } 422 bp->b_bno = dblk; 423 bp->b_size = size; 424 } 425 } 426 427 void 428 brelse(struct bufarea *bp) 429 { 430 431 if (bp->b_refcnt <= 0) 432 prtbuf(bp, "brelse: buffer with negative reference count"); 433 bp->b_refcnt--; 434 } 435 436 void 437 binval(struct bufarea *bp) 438 { 439 440 bp->b_flags &= ~B_DIRTY; 441 LIST_REMOVE(bp, b_hash); 442 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 443 } 444 445 void 446 flush(int fd, struct bufarea *bp) 447 { 448 struct inode ip; 449 450 if ((bp->b_flags & B_DIRTY) == 0) 451 return; 452 bp->b_flags &= ~B_DIRTY; 453 if (fswritefd < 0) { 454 pfatal("WRITING IN READ_ONLY MODE.\n"); 455 return; 456 } 457 if (bp->b_errs != 0) 458 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 459 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 460 (long long)bp->b_bno); 461 bp->b_errs = 0; 462 /* 463 * Write using the appropriate function. 464 */ 465 switch (bp->b_type) { 466 case BT_SUPERBLK: 467 if (bp != &sblk) 468 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 469 bp, &sblk); 470 /* 471 * Superblocks are always pre-copied so we do not need 472 * to check them for copy-on-write. 473 */ 474 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 475 fsmodified = 1; 476 break; 477 case BT_CYLGRP: 478 /* 479 * Cylinder groups are always pre-copied so we do not 480 * need to check them for copy-on-write. 481 */ 482 if (sujrecovery) 483 cg_write(bp); 484 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 485 fsmodified = 1; 486 break; 487 case BT_INODES: 488 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 489 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 490 int i; 491 492 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 493 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 494 continue; 495 pwarn("flush: INODE CHECK-HASH FAILED"); 496 ip.i_bp = bp; 497 ip.i_dp = (union dinode *)dp; 498 ip.i_number = bp->b_index + (i / sizeof(*dp)); 499 prtinode(&ip); 500 if (preen || reply("FIX") != 0) { 501 if (preen) 502 printf(" (FIXED)\n"); 503 ffs_update_dinode_ckhash(&sblock, dp); 504 inodirty(&ip); 505 } 506 } 507 } 508 /* FALLTHROUGH */ 509 default: 510 copyonwrite(&sblock, bp, std_checkblkavail); 511 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 512 break; 513 } 514 } 515 516 /* 517 * If there are any snapshots, ensure that all the blocks that they 518 * care about have been copied, then release the snapshot inodes. 519 * These operations need to be done before we rebuild the cylinder 520 * groups so that any block allocations are properly recorded. 521 * Since all the cylinder group maps have already been copied in 522 * the snapshots, no further snapshot copies will need to be done. 523 */ 524 void 525 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 526 { 527 struct bufarea *bp; 528 int cnt; 529 530 if (snapcnt > 0) { 531 if (debug) 532 printf("Check for snapshot copies\n"); 533 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 534 if ((bp->b_flags & B_DIRTY) != 0) 535 copyonwrite(&sblock, bp, checkblkavail); 536 for (cnt = 0; cnt < snapcnt; cnt++) 537 irelse(&snaplist[cnt]); 538 snapcnt = 0; 539 } 540 } 541 542 /* 543 * Journaled soft updates does not maintain cylinder group summary 544 * information during cleanup, so this routine recalculates the summary 545 * information and updates the superblock summary in preparation for 546 * writing out the cylinder group. 547 */ 548 static void 549 cg_write(struct bufarea *bp) 550 { 551 ufs1_daddr_t fragno, cgbno, maxbno; 552 u_int8_t *blksfree; 553 struct csum *csp; 554 struct cg *cgp; 555 int blk; 556 int i; 557 558 /* 559 * Fix the frag and cluster summary. 560 */ 561 cgp = bp->b_un.b_cg; 562 cgp->cg_cs.cs_nbfree = 0; 563 cgp->cg_cs.cs_nffree = 0; 564 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 565 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 566 if (sblock.fs_contigsumsize > 0) { 567 for (i = 1; i <= sblock.fs_contigsumsize; i++) 568 cg_clustersum(cgp)[i] = 0; 569 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 570 } 571 blksfree = cg_blksfree(cgp); 572 for (cgbno = 0; cgbno < maxbno; cgbno++) { 573 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 574 continue; 575 if (ffs_isblock(&sblock, blksfree, cgbno)) { 576 ffs_clusteracct(&sblock, cgp, cgbno, 1); 577 cgp->cg_cs.cs_nbfree++; 578 continue; 579 } 580 fragno = blkstofrags(&sblock, cgbno); 581 blk = blkmap(&sblock, blksfree, fragno); 582 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 583 for (i = 0; i < sblock.fs_frag; i++) 584 if (isset(blksfree, fragno + i)) 585 cgp->cg_cs.cs_nffree++; 586 } 587 /* 588 * Update the superblock cg summary from our now correct values 589 * before writing the block. 590 */ 591 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 592 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 593 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 594 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 595 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 596 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 597 } 598 599 void 600 rwerror(const char *mesg, ufs2_daddr_t blk) 601 { 602 603 if (bkgrdcheck) 604 exit(EEXIT); 605 if (preen == 0) 606 printf("\n"); 607 pfatal("CANNOT %s: %ld", mesg, (long)blk); 608 if (reply("CONTINUE") == 0) 609 exit(EEXIT); 610 } 611 612 void 613 ckfini(int markclean) 614 { 615 struct bufarea *bp, *nbp; 616 int ofsmodified, cnt, cg; 617 618 if (bkgrdflag) { 619 unlink(snapname); 620 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 621 cmd.value = FS_UNCLEAN; 622 cmd.size = markclean ? -1 : 1; 623 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 624 &cmd, sizeof cmd) == -1) 625 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 626 if (!preen) { 627 printf("\n***** FILE SYSTEM MARKED %s *****\n", 628 markclean ? "CLEAN" : "DIRTY"); 629 if (!markclean) 630 rerun = 1; 631 } 632 } else if (!preen && !markclean) { 633 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 634 rerun = 1; 635 } 636 bkgrdflag = 0; 637 } 638 if (debug && cachelookups > 0) 639 printf("cache with %d buffers missed %d of %d (%d%%)\n", 640 numbufs, cachereads, cachelookups, 641 (int)(cachereads * 100 / cachelookups)); 642 if (fswritefd < 0) { 643 (void)close(fsreadfd); 644 return; 645 } 646 647 /* 648 * To remain idempotent with partial truncations the buffers 649 * must be flushed in this order: 650 * 1) cylinder groups (bitmaps) 651 * 2) indirect, directory, external attribute, and data blocks 652 * 3) inode blocks 653 * 4) superblock 654 * This ordering preserves access to the modified pointers 655 * until they are freed. 656 */ 657 /* Step 1: cylinder groups */ 658 if (debug) 659 printf("Flush Cylinder groups\n"); 660 if (cgbufs != NULL) { 661 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 662 if (cgbufs[cnt].b_un.b_cg == NULL) 663 continue; 664 flush(fswritefd, &cgbufs[cnt]); 665 free(cgbufs[cnt].b_un.b_cg); 666 } 667 free(cgbufs); 668 cgbufs = NULL; 669 } 670 flush(fswritefd, &cgblk); 671 free(cgblk.b_un.b_buf); 672 cgblk.b_un.b_buf = NULL; 673 cnt = 0; 674 /* Step 2: indirect, directory, external attribute, and data blocks */ 675 if (debug) 676 printf("Flush indirect, directory, external attribute, " 677 "and data blocks\n"); 678 if (pdirbp != NULL) { 679 brelse(pdirbp); 680 pdirbp = NULL; 681 } 682 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 683 switch (bp->b_type) { 684 /* These should not be in the buffer cache list */ 685 case BT_UNKNOWN: 686 case BT_SUPERBLK: 687 case BT_CYLGRP: 688 default: 689 prtbuf(bp,"ckfini: improper buffer type on cache list"); 690 continue; 691 /* These are the ones to flush in this step */ 692 case BT_LEVEL1: 693 case BT_LEVEL2: 694 case BT_LEVEL3: 695 case BT_EXTATTR: 696 case BT_DIRDATA: 697 case BT_DATA: 698 break; 699 /* These are the ones to flush in the next step */ 700 case BT_INODES: 701 continue; 702 } 703 if (debug && bp->b_refcnt != 0) 704 prtbuf(bp, "ckfini: clearing in-use buffer"); 705 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 706 LIST_REMOVE(bp, b_hash); 707 cnt++; 708 flush(fswritefd, bp); 709 free(bp->b_un.b_buf); 710 free((char *)bp); 711 } 712 /* Step 3: inode blocks */ 713 if (debug) 714 printf("Flush inode blocks\n"); 715 if (icachebp != NULL) { 716 brelse(icachebp); 717 icachebp = NULL; 718 } 719 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 720 if (debug && bp->b_refcnt != 0) 721 prtbuf(bp, "ckfini: clearing in-use buffer"); 722 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 723 LIST_REMOVE(bp, b_hash); 724 cnt++; 725 flush(fswritefd, bp); 726 free(bp->b_un.b_buf); 727 free((char *)bp); 728 } 729 if (numbufs != cnt) 730 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 731 /* Step 4: superblock */ 732 if (debug) 733 printf("Flush the superblock\n"); 734 flush(fswritefd, &sblk); 735 if (havesb && cursnapshot == 0 && 736 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 737 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 738 /* Change write destination to standard superblock */ 739 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 740 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 741 sbdirty(); 742 flush(fswritefd, &sblk); 743 } else { 744 markclean = 0; 745 } 746 } 747 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 748 if ((sblock.fs_clean = markclean) != 0) { 749 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 750 sblock.fs_pendingblocks = 0; 751 sblock.fs_pendinginodes = 0; 752 } 753 sbdirty(); 754 ofsmodified = fsmodified; 755 flush(fswritefd, &sblk); 756 fsmodified = ofsmodified; 757 if (!preen) { 758 printf("\n***** FILE SYSTEM MARKED %s *****\n", 759 markclean ? "CLEAN" : "DIRTY"); 760 if (!markclean) 761 rerun = 1; 762 } 763 } else if (!preen) { 764 if (markclean) { 765 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 766 } else { 767 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 768 rerun = 1; 769 } 770 } 771 /* 772 * Free allocated tracking structures. 773 */ 774 if (blockmap != NULL) 775 free(blockmap); 776 blockmap = NULL; 777 if (inostathead != NULL) { 778 for (cg = 0; cg < sblock.fs_ncg; cg++) 779 if (inostathead[cg].il_stat != NULL) 780 free((char *)inostathead[cg].il_stat); 781 free(inostathead); 782 } 783 inostathead = NULL; 784 inocleanup(); 785 finalIOstats(); 786 (void)close(fsreadfd); 787 (void)close(fswritefd); 788 } 789 790 /* 791 * Print out I/O statistics. 792 */ 793 void 794 IOstats(char *what) 795 { 796 int i; 797 798 if (debug == 0) 799 return; 800 if (diskreads == 0) { 801 printf("%s: no I/O\n\n", what); 802 return; 803 } 804 if (startpass.tv_sec == 0) 805 startpass = startprog; 806 printf("%s: I/O statistics\n", what); 807 printIOstats(); 808 totaldiskreads += diskreads; 809 diskreads = 0; 810 for (i = 0; i < BT_NUMBUFTYPES; i++) { 811 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 812 totalreadcnt[i] += readcnt[i]; 813 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 814 readcnt[i] = 0; 815 } 816 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 817 } 818 819 void 820 finalIOstats(void) 821 { 822 int i; 823 824 if (debug == 0) 825 return; 826 printf("Final I/O statistics\n"); 827 totaldiskreads += diskreads; 828 diskreads = totaldiskreads; 829 startpass = startprog; 830 for (i = 0; i < BT_NUMBUFTYPES; i++) { 831 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 832 totalreadcnt[i] += readcnt[i]; 833 readtime[i] = totalreadtime[i]; 834 readcnt[i] = totalreadcnt[i]; 835 } 836 printIOstats(); 837 } 838 839 static void printIOstats(void) 840 { 841 long long msec, totalmsec; 842 int i; 843 844 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 845 timespecsub(&finishpass, &startpass, &finishpass); 846 printf("Running time: %jd.%03ld sec\n", 847 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 848 printf("buffer reads by type:\n"); 849 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 850 totalmsec += readtime[i].tv_sec * 1000 + 851 readtime[i].tv_nsec / 1000000; 852 if (totalmsec == 0) 853 totalmsec = 1; 854 for (i = 0; i < BT_NUMBUFTYPES; i++) { 855 if (readcnt[i] == 0) 856 continue; 857 msec = 858 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 859 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 860 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 861 (readcnt[i] * 1000 / diskreads) % 10, 862 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 863 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 864 } 865 printf("\n"); 866 } 867 868 int 869 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 870 { 871 char *cp; 872 int i, errs; 873 off_t offset; 874 875 offset = blk; 876 offset *= dev_bsize; 877 if (bkgrdflag) 878 slowio_start(); 879 totalreads++; 880 diskreads++; 881 if (pread(fd, buf, (int)size, offset) == size) { 882 if (bkgrdflag) 883 slowio_end(); 884 return (0); 885 } 886 887 /* 888 * This is handled specially here instead of in rwerror because 889 * rwerror is used for all sorts of errors, not just true read/write 890 * errors. It should be refactored and fixed. 891 */ 892 if (surrender) { 893 pfatal("CANNOT READ_BLK: %ld", (long)blk); 894 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 895 } else 896 rwerror("READ BLK", blk); 897 898 errs = 0; 899 memset(buf, 0, (size_t)size); 900 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 901 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 902 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 903 if (secsize != dev_bsize && dev_bsize != 1) 904 printf(" %jd (%jd),", 905 (intmax_t)(blk * dev_bsize + i) / secsize, 906 (intmax_t)blk + i / dev_bsize); 907 else 908 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 909 errs++; 910 } 911 } 912 printf("\n"); 913 if (errs) 914 resolved = 0; 915 return (errs); 916 } 917 918 void 919 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 920 { 921 int i; 922 char *cp; 923 off_t offset; 924 925 if (fd < 0) 926 return; 927 offset = blk; 928 offset *= dev_bsize; 929 if (pwrite(fd, buf, size, offset) == size) { 930 fsmodified = 1; 931 return; 932 } 933 resolved = 0; 934 rwerror("WRITE BLK", blk); 935 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 936 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 937 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 938 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 939 printf("\n"); 940 return; 941 } 942 943 void 944 blerase(int fd, ufs2_daddr_t blk, long size) 945 { 946 off_t ioarg[2]; 947 948 if (fd < 0) 949 return; 950 ioarg[0] = blk * dev_bsize; 951 ioarg[1] = size; 952 ioctl(fd, DIOCGDELETE, ioarg); 953 /* we don't really care if we succeed or not */ 954 return; 955 } 956 957 /* 958 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 959 * definition a multiple of dev_bsize. 960 */ 961 void 962 blzero(int fd, ufs2_daddr_t blk, long size) 963 { 964 static char *zero; 965 off_t offset, len; 966 967 if (fd < 0) 968 return; 969 if (zero == NULL) { 970 zero = calloc(ZEROBUFSIZE, 1); 971 if (zero == NULL) 972 errx(EEXIT, "cannot allocate buffer pool"); 973 } 974 offset = blk * dev_bsize; 975 if (lseek(fd, offset, 0) < 0) 976 rwerror("SEEK BLK", blk); 977 while (size > 0) { 978 len = MIN(ZEROBUFSIZE, size); 979 if (write(fd, zero, len) != len) 980 rwerror("WRITE BLK", blk); 981 blk += len / dev_bsize; 982 size -= len; 983 } 984 } 985 986 /* 987 * Verify cylinder group's magic number and other parameters. If the 988 * test fails, offer an option to rebuild the whole cylinder group. 989 */ 990 #undef CHK 991 #define CHK(lhs, op, rhs, fmt) \ 992 if (lhs op rhs) { \ 993 pwarn("UFS%d cylinder group %d failed: " \ 994 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 995 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 996 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 997 error = 1; \ 998 } 999 int 1000 check_cgmagic(int cg, struct bufarea *cgbp, int request_rebuild) 1001 { 1002 struct cg *cgp = cgbp->b_un.b_cg; 1003 uint32_t cghash, calchash; 1004 static int prevfailcg = -1; 1005 long start; 1006 int error; 1007 1008 /* 1009 * Extended cylinder group checks. 1010 */ 1011 calchash = cgp->cg_ckhash; 1012 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1013 (ckhashadd & CK_CYLGRP) == 0) { 1014 cghash = cgp->cg_ckhash; 1015 cgp->cg_ckhash = 0; 1016 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1017 cgp->cg_ckhash = cghash; 1018 } 1019 error = 0; 1020 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1021 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1022 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1023 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1024 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1025 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1026 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1027 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1028 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1029 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1030 } 1031 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1032 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1033 } else { 1034 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1035 "%jd"); 1036 } 1037 start = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); 1038 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1039 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1040 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1041 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1042 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1043 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1044 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1045 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1046 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1047 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1048 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1049 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1050 } 1051 CHK(cgp->cg_freeoff, !=, 1052 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1053 if (sblock.fs_contigsumsize == 0) { 1054 CHK(cgp->cg_nextfreeoff, !=, 1055 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1056 } else { 1057 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1058 "%jd"); 1059 CHK(cgp->cg_clustersumoff, !=, 1060 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1061 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1062 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1063 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1064 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1065 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1066 "%jd"); 1067 } 1068 if (error == 0) 1069 return (1); 1070 if (prevfailcg == cg) 1071 return (0); 1072 prevfailcg = cg; 1073 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1074 if (!request_rebuild) { 1075 printf("\n"); 1076 return (0); 1077 } 1078 if (!reply("REBUILD CYLINDER GROUP")) { 1079 printf("YOU WILL NEED TO RERUN FSCK.\n"); 1080 rerun = 1; 1081 return (1); 1082 } 1083 /* 1084 * Zero out the cylinder group and then initialize critical fields. 1085 * Bit maps and summaries will be recalculated by later passes. 1086 */ 1087 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1088 cgp->cg_magic = CG_MAGIC; 1089 cgp->cg_cgx = cg; 1090 cgp->cg_niblk = sblock.fs_ipg; 1091 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1092 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1093 cgp->cg_ndblk = sblock.fs_fpg; 1094 else 1095 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1096 start = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); 1097 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1098 cgp->cg_iusedoff = start; 1099 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1100 cgp->cg_niblk = 0; 1101 cgp->cg_initediblk = 0; 1102 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1103 cgp->cg_old_niblk = sblock.fs_ipg; 1104 cgp->cg_old_btotoff = start; 1105 cgp->cg_old_boff = cgp->cg_old_btotoff + 1106 sblock.fs_old_cpg * sizeof(int32_t); 1107 cgp->cg_iusedoff = cgp->cg_old_boff + 1108 sblock.fs_old_cpg * sizeof(u_int16_t); 1109 } 1110 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1111 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1112 if (sblock.fs_contigsumsize > 0) { 1113 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1114 cgp->cg_clustersumoff = 1115 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1116 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1117 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1118 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1119 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1120 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1121 } 1122 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1123 cgdirty(cgbp); 1124 return (0); 1125 } 1126 1127 /* 1128 * allocate a data block with the specified number of fragments 1129 */ 1130 ufs2_daddr_t 1131 allocblk(long startcg, long frags, 1132 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1133 { 1134 ufs2_daddr_t blkno, newblk; 1135 1136 if (sujrecovery && checkblkavail == std_checkblkavail) { 1137 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1138 return (0); 1139 } 1140 if (frags <= 0 || frags > sblock.fs_frag) 1141 return (0); 1142 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1143 blkno < maxfsblock - sblock.fs_frag; 1144 blkno += sblock.fs_frag) { 1145 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1146 continue; 1147 if (newblk > 0) 1148 return (newblk); 1149 if (newblk < 0) 1150 blkno = -newblk; 1151 } 1152 for (blkno = MAX(cgdata(&sblock, 0), 0); 1153 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1154 blkno += sblock.fs_frag) { 1155 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1156 continue; 1157 if (newblk > 0) 1158 return (newblk); 1159 if (newblk < 0) 1160 blkno = -newblk; 1161 } 1162 return (0); 1163 } 1164 1165 ufs2_daddr_t 1166 std_checkblkavail(blkno, frags) 1167 ufs2_daddr_t blkno; 1168 long frags; 1169 { 1170 struct bufarea *cgbp; 1171 struct cg *cgp; 1172 ufs2_daddr_t j, k, baseblk; 1173 long cg; 1174 1175 if ((u_int64_t)blkno > sblock.fs_size) 1176 return (0); 1177 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1178 if (testbmap(blkno + j)) 1179 continue; 1180 for (k = 1; k < frags; k++) 1181 if (testbmap(blkno + j + k)) 1182 break; 1183 if (k < frags) { 1184 j += k; 1185 continue; 1186 } 1187 cg = dtog(&sblock, blkno + j); 1188 cgbp = cglookup(cg); 1189 cgp = cgbp->b_un.b_cg; 1190 if (!check_cgmagic(cg, cgbp, 0)) 1191 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1192 baseblk = dtogd(&sblock, blkno + j); 1193 for (k = 0; k < frags; k++) { 1194 setbmap(blkno + j + k); 1195 clrbit(cg_blksfree(cgp), baseblk + k); 1196 } 1197 n_blks += frags; 1198 if (frags == sblock.fs_frag) 1199 cgp->cg_cs.cs_nbfree--; 1200 else 1201 cgp->cg_cs.cs_nffree -= frags; 1202 cgdirty(cgbp); 1203 return (blkno + j); 1204 } 1205 return (0); 1206 } 1207 1208 /* 1209 * Slow down IO so as to leave some disk bandwidth for other processes 1210 */ 1211 void 1212 slowio_start() 1213 { 1214 1215 /* Delay one in every 8 operations */ 1216 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1217 if (slowio_pollcnt == 0) { 1218 gettimeofday(&slowio_starttime, NULL); 1219 } 1220 } 1221 1222 void 1223 slowio_end() 1224 { 1225 struct timeval tv; 1226 int delay_usec; 1227 1228 if (slowio_pollcnt != 0) 1229 return; 1230 1231 /* Update the slowdown interval. */ 1232 gettimeofday(&tv, NULL); 1233 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1234 (tv.tv_usec - slowio_starttime.tv_usec); 1235 if (delay_usec < 64) 1236 delay_usec = 64; 1237 if (delay_usec > 2500000) 1238 delay_usec = 2500000; 1239 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1240 /* delay by 8 times the average IO delay */ 1241 if (slowio_delay_usec > 64) 1242 usleep(slowio_delay_usec * 8); 1243 } 1244 1245 /* 1246 * Find a pathname 1247 */ 1248 void 1249 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1250 { 1251 int len; 1252 char *cp; 1253 struct inode ip; 1254 struct inodesc idesc; 1255 static int busy = 0; 1256 1257 if (curdir == ino && ino == UFS_ROOTINO) { 1258 (void)strcpy(namebuf, "/"); 1259 return; 1260 } 1261 if (busy || !INO_IS_DVALID(curdir)) { 1262 (void)strcpy(namebuf, "?"); 1263 return; 1264 } 1265 busy = 1; 1266 memset(&idesc, 0, sizeof(struct inodesc)); 1267 idesc.id_type = DATA; 1268 idesc.id_fix = IGNORE; 1269 cp = &namebuf[MAXPATHLEN - 1]; 1270 *cp = '\0'; 1271 if (curdir != ino) { 1272 idesc.id_parent = curdir; 1273 goto namelookup; 1274 } 1275 while (ino != UFS_ROOTINO) { 1276 idesc.id_number = ino; 1277 idesc.id_func = findino; 1278 idesc.id_name = strdup(".."); 1279 ginode(ino, &ip); 1280 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1281 irelse(&ip); 1282 break; 1283 } 1284 irelse(&ip); 1285 namelookup: 1286 idesc.id_number = idesc.id_parent; 1287 idesc.id_parent = ino; 1288 idesc.id_func = findname; 1289 idesc.id_name = namebuf; 1290 ginode(idesc.id_number, &ip); 1291 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1292 irelse(&ip); 1293 break; 1294 } 1295 irelse(&ip); 1296 len = strlen(namebuf); 1297 cp -= len; 1298 memmove(cp, namebuf, (size_t)len); 1299 *--cp = '/'; 1300 if (cp < &namebuf[UFS_MAXNAMLEN]) 1301 break; 1302 ino = idesc.id_number; 1303 } 1304 busy = 0; 1305 if (ino != UFS_ROOTINO) 1306 *--cp = '?'; 1307 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1308 } 1309 1310 void 1311 catch(int sig __unused) 1312 { 1313 1314 ckfini(0); 1315 exit(12); 1316 } 1317 1318 /* 1319 * When preening, allow a single quit to signal 1320 * a special exit after file system checks complete 1321 * so that reboot sequence may be interrupted. 1322 */ 1323 void 1324 catchquit(int sig __unused) 1325 { 1326 printf("returning to single-user after file system check\n"); 1327 returntosingle = 1; 1328 (void)signal(SIGQUIT, SIG_DFL); 1329 } 1330 1331 /* 1332 * determine whether an inode should be fixed. 1333 */ 1334 int 1335 dofix(struct inodesc *idesc, const char *msg) 1336 { 1337 1338 switch (idesc->id_fix) { 1339 1340 case DONTKNOW: 1341 if (idesc->id_type == DATA) 1342 direrror(idesc->id_number, msg); 1343 else 1344 pwarn("%s", msg); 1345 if (preen) { 1346 printf(" (SALVAGED)\n"); 1347 idesc->id_fix = FIX; 1348 return (ALTERED); 1349 } 1350 if (reply("SALVAGE") == 0) { 1351 idesc->id_fix = NOFIX; 1352 return (0); 1353 } 1354 idesc->id_fix = FIX; 1355 return (ALTERED); 1356 1357 case FIX: 1358 return (ALTERED); 1359 1360 case NOFIX: 1361 case IGNORE: 1362 return (0); 1363 1364 default: 1365 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1366 } 1367 /* NOTREACHED */ 1368 return (0); 1369 } 1370 1371 #include <stdarg.h> 1372 1373 /* 1374 * Print details about a buffer. 1375 */ 1376 void 1377 prtbuf(struct bufarea *bp, const char *fmt, ...) 1378 { 1379 va_list ap; 1380 va_start(ap, fmt); 1381 if (preen) 1382 (void)fprintf(stdout, "%s: ", cdevname); 1383 (void)vfprintf(stdout, fmt, ap); 1384 va_end(ap); 1385 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1386 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1387 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1388 (intmax_t) bp->b_index); 1389 } 1390 1391 /* 1392 * An unexpected inconsistency occurred. 1393 * Die if preening or file system is running with soft dependency protocol, 1394 * otherwise just print message and continue. 1395 */ 1396 void 1397 pfatal(const char *fmt, ...) 1398 { 1399 va_list ap; 1400 va_start(ap, fmt); 1401 if (!preen) { 1402 (void)vfprintf(stdout, fmt, ap); 1403 va_end(ap); 1404 if (usedsoftdep) 1405 (void)fprintf(stdout, 1406 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1407 /* 1408 * Force foreground fsck to clean up inconsistency. 1409 */ 1410 if (bkgrdflag) { 1411 cmd.value = FS_NEEDSFSCK; 1412 cmd.size = 1; 1413 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1414 &cmd, sizeof cmd) == -1) 1415 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1416 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1417 ckfini(0); 1418 exit(EEXIT); 1419 } 1420 return; 1421 } 1422 if (cdevname == NULL) 1423 cdevname = strdup("fsck"); 1424 (void)fprintf(stdout, "%s: ", cdevname); 1425 (void)vfprintf(stdout, fmt, ap); 1426 (void)fprintf(stdout, 1427 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1428 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1429 /* 1430 * Force foreground fsck to clean up inconsistency. 1431 */ 1432 if (bkgrdflag) { 1433 cmd.value = FS_NEEDSFSCK; 1434 cmd.size = 1; 1435 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1436 &cmd, sizeof cmd) == -1) 1437 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1438 } 1439 ckfini(0); 1440 exit(EEXIT); 1441 } 1442 1443 /* 1444 * Pwarn just prints a message when not preening or running soft dependency 1445 * protocol, or a warning (preceded by filename) when preening. 1446 */ 1447 void 1448 pwarn(const char *fmt, ...) 1449 { 1450 va_list ap; 1451 va_start(ap, fmt); 1452 if (preen) 1453 (void)fprintf(stdout, "%s: ", cdevname); 1454 (void)vfprintf(stdout, fmt, ap); 1455 va_end(ap); 1456 } 1457 1458 /* 1459 * Stub for routines from kernel. 1460 */ 1461 void 1462 panic(const char *fmt, ...) 1463 { 1464 va_list ap; 1465 va_start(ap, fmt); 1466 pfatal("INTERNAL INCONSISTENCY:"); 1467 (void)vfprintf(stdout, fmt, ap); 1468 va_end(ap); 1469 exit(EEXIT); 1470 } 1471