1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/types.h> 35 #include <sys/sysctl.h> 36 #include <sys/disk.h> 37 #include <sys/disklabel.h> 38 #include <sys/ioctl.h> 39 #include <sys/stat.h> 40 41 #include <ufs/ufs/dinode.h> 42 #include <ufs/ufs/dir.h> 43 #include <ufs/ffs/fs.h> 44 45 #include <err.h> 46 #include <errno.h> 47 #include <string.h> 48 #include <ctype.h> 49 #include <fstab.h> 50 #include <stdint.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <time.h> 54 #include <unistd.h> 55 56 #include "fsck.h" 57 58 int sujrecovery = 0; 59 60 static struct bufarea *allocbuf(const char *); 61 static void cg_write(struct bufarea *); 62 static void slowio_start(void); 63 static void slowio_end(void); 64 static void printIOstats(void); 65 66 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 67 static struct timespec startpass, finishpass; 68 struct timeval slowio_starttime; 69 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 70 int slowio_pollcnt; 71 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 72 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 73 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 74 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 75 static struct bufhash freebufs; /* unused buffers */ 76 static int numbufs; /* size of buffer cache */ 77 static int cachelookups; /* number of cache lookups */ 78 static int cachereads; /* number of cache reads */ 79 static int flushtries; /* number of tries to reclaim memory */ 80 81 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 82 83 void 84 fsutilinit(void) 85 { 86 diskreads = totaldiskreads = totalreads = 0; 87 bzero(&startpass, sizeof(struct timespec)); 88 bzero(&finishpass, sizeof(struct timespec)); 89 bzero(&slowio_starttime, sizeof(struct timeval)); 90 slowio_delay_usec = 10000; 91 slowio_pollcnt = 0; 92 flushtries = 0; 93 } 94 95 int 96 ftypeok(union dinode *dp) 97 { 98 switch (DIP(dp, di_mode) & IFMT) { 99 100 case IFDIR: 101 case IFREG: 102 case IFBLK: 103 case IFCHR: 104 case IFLNK: 105 case IFSOCK: 106 case IFIFO: 107 return (1); 108 109 default: 110 if (debug) 111 printf("bad file type 0%o\n", DIP(dp, di_mode)); 112 return (0); 113 } 114 } 115 116 int 117 reply(const char *question) 118 { 119 int persevere; 120 char c; 121 122 if (preen) 123 pfatal("INTERNAL ERROR: GOT TO reply()"); 124 persevere = strcmp(question, "CONTINUE") == 0 || 125 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 126 printf("\n"); 127 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 128 printf("%s? no\n\n", question); 129 resolved = 0; 130 return (0); 131 } 132 if (yflag || (persevere && nflag)) { 133 printf("%s? yes\n\n", question); 134 return (1); 135 } 136 do { 137 printf("%s? [yn] ", question); 138 (void) fflush(stdout); 139 c = getc(stdin); 140 while (c != '\n' && getc(stdin) != '\n') { 141 if (feof(stdin)) { 142 resolved = 0; 143 return (0); 144 } 145 } 146 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 147 printf("\n"); 148 if (c == 'y' || c == 'Y') 149 return (1); 150 resolved = 0; 151 return (0); 152 } 153 154 /* 155 * Look up state information for an inode. 156 */ 157 struct inostat * 158 inoinfo(ino_t inum) 159 { 160 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 161 struct inostatlist *ilp; 162 int iloff; 163 164 if (inum >= maxino) 165 errx(EEXIT, "inoinfo: inumber %ju out of range", 166 (uintmax_t)inum); 167 ilp = &inostathead[inum / sblock.fs_ipg]; 168 iloff = inum % sblock.fs_ipg; 169 if (iloff >= ilp->il_numalloced) 170 return (&unallocated); 171 return (&ilp->il_stat[iloff]); 172 } 173 174 /* 175 * Malloc buffers and set up cache. 176 */ 177 void 178 bufinit(void) 179 { 180 int i; 181 182 initbarea(&failedbuf, BT_UNKNOWN); 183 failedbuf.b_errs = -1; 184 failedbuf.b_un.b_buf = NULL; 185 if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL) 186 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 187 initbarea(&cgblk, BT_CYLGRP); 188 numbufs = cachelookups = cachereads = 0; 189 TAILQ_INIT(&bufqueuehd); 190 LIST_INIT(&freebufs); 191 for (i = 0; i < HASHSIZE; i++) 192 LIST_INIT(&bufhashhd[i]); 193 for (i = 0; i < BT_NUMBUFTYPES; i++) { 194 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 195 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 196 readcnt[i] = totalreadcnt[i] = 0; 197 } 198 } 199 200 static struct bufarea * 201 allocbuf(const char *failreason) 202 { 203 struct bufarea *bp; 204 char *bufp; 205 206 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 207 bufp = Balloc((unsigned int)sblock.fs_bsize); 208 if (bp == NULL || bufp == NULL) { 209 errx(EEXIT, "%s", failreason); 210 /* NOTREACHED */ 211 } 212 numbufs++; 213 bp->b_un.b_buf = bufp; 214 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 215 initbarea(bp, BT_UNKNOWN); 216 return (bp); 217 } 218 219 /* 220 * Manage cylinder group buffers. 221 * 222 * Use getblk() here rather than cgget() because the cylinder group 223 * may be corrupted but we want it anyway so we can fix it. 224 */ 225 static struct bufarea *cgbufs; /* header for cylinder group cache */ 226 static int flushtries; /* number of tries to reclaim memory */ 227 228 struct bufarea * 229 cglookup(int cg) 230 { 231 struct bufarea *cgbp; 232 struct cg *cgp; 233 234 if ((unsigned) cg >= sblock.fs_ncg) 235 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 236 if (cgbufs == NULL) { 237 cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea)); 238 if (cgbufs == NULL) 239 errx(EEXIT, "Cannot allocate cylinder group buffers"); 240 } 241 cgbp = &cgbufs[cg]; 242 if (cgbp->b_un.b_cg != NULL) 243 return (cgbp); 244 cgp = NULL; 245 if (flushtries == 0) 246 cgp = Balloc((unsigned int)sblock.fs_cgsize); 247 if (cgp == NULL) { 248 if (sujrecovery) 249 errx(EEXIT,"Ran out of memory during journal recovery"); 250 flush(fswritefd, &cgblk); 251 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 252 return (&cgblk); 253 } 254 cgbp->b_un.b_cg = cgp; 255 initbarea(cgbp, BT_CYLGRP); 256 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 257 return (cgbp); 258 } 259 260 /* 261 * Mark a cylinder group buffer as dirty. 262 * Update its check-hash if they are enabled. 263 */ 264 void 265 cgdirty(struct bufarea *cgbp) 266 { 267 struct cg *cg; 268 269 cg = cgbp->b_un.b_cg; 270 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 271 cg->cg_ckhash = 0; 272 cg->cg_ckhash = 273 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 274 } 275 dirty(cgbp); 276 } 277 278 /* 279 * Attempt to flush a cylinder group cache entry. 280 * Return whether the flush was successful. 281 */ 282 int 283 flushentry(void) 284 { 285 struct bufarea *cgbp; 286 287 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 288 return (0); 289 cgbp = &cgbufs[flushtries++]; 290 if (cgbp->b_un.b_cg == NULL) 291 return (0); 292 flush(fswritefd, cgbp); 293 free(cgbp->b_un.b_buf); 294 cgbp->b_un.b_buf = NULL; 295 return (1); 296 } 297 298 /* 299 * Manage a cache of filesystem disk blocks. 300 */ 301 struct bufarea * 302 getdatablk(ufs2_daddr_t blkno, long size, int type) 303 { 304 struct bufarea *bp; 305 struct bufhash *bhdp; 306 307 cachelookups++; 308 /* 309 * If out of range, return empty buffer with b_err == -1 310 * 311 * Skip check for inodes because chkrange() considers 312 * metadata areas invalid to write data. 313 */ 314 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 315 failedbuf.b_refcnt++; 316 return (&failedbuf); 317 } 318 bhdp = &bufhashhd[HASH(blkno)]; 319 LIST_FOREACH(bp, bhdp, b_hash) 320 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 321 if (debug && bp->b_size != size) { 322 prtbuf(bp, "getdatablk: size mismatch"); 323 pfatal("getdatablk: b_size %d != size %ld\n", 324 bp->b_size, size); 325 } 326 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 327 goto foundit; 328 } 329 /* 330 * Move long-term busy buffer back to the front of the LRU so we 331 * do not endless inspect them for recycling. 332 */ 333 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 334 if (bp != NULL && bp->b_refcnt != 0) { 335 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 336 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 337 } 338 /* 339 * Allocate up to the minimum number of buffers before 340 * considering recycling any of them. 341 */ 342 if (size > sblock.fs_bsize) 343 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 344 sblock.fs_bsize); 345 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 346 LIST_REMOVE(bp, b_hash); 347 } else if (numbufs < MINBUFS) { 348 bp = allocbuf("cannot create minimal buffer pool"); 349 } else if (sujrecovery) { 350 /* 351 * SUJ recovery does not want anything written until it 352 * has successfully completed (so it can fail back to 353 * full fsck). Thus, we can only recycle clean buffers. 354 */ 355 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 356 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 357 break; 358 if (bp == NULL) 359 bp = allocbuf("Ran out of memory during " 360 "journal recovery"); 361 else 362 LIST_REMOVE(bp, b_hash); 363 } else { 364 /* 365 * Recycle oldest non-busy buffer. 366 */ 367 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 368 if (bp->b_refcnt == 0) 369 break; 370 if (bp == NULL) 371 bp = allocbuf("Ran out of memory for buffers"); 372 else 373 LIST_REMOVE(bp, b_hash); 374 } 375 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 376 flush(fswritefd, bp); 377 bp->b_type = type; 378 LIST_INSERT_HEAD(bhdp, bp, b_hash); 379 getblk(bp, blkno, size); 380 cachereads++; 381 /* fall through */ 382 foundit: 383 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 384 if (debug && bp->b_type != type) { 385 printf("getdatablk: buffer type changed to %s", 386 BT_BUFTYPE(type)); 387 prtbuf(bp, ""); 388 } 389 if (bp->b_errs == 0) 390 bp->b_refcnt++; 391 return (bp); 392 } 393 394 void 395 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 396 { 397 ufs2_daddr_t dblk; 398 struct timespec start, finish; 399 400 dblk = fsbtodb(&sblock, blk); 401 if (bp->b_bno == dblk) { 402 totalreads++; 403 } else { 404 if (debug) { 405 readcnt[bp->b_type]++; 406 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 407 } 408 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 409 if (debug) { 410 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 411 timespecsub(&finish, &start, &finish); 412 timespecadd(&readtime[bp->b_type], &finish, 413 &readtime[bp->b_type]); 414 } 415 bp->b_bno = dblk; 416 bp->b_size = size; 417 } 418 } 419 420 void 421 brelse(struct bufarea *bp) 422 { 423 424 if (bp->b_refcnt <= 0) 425 prtbuf(bp, "brelse: buffer with negative reference count"); 426 bp->b_refcnt--; 427 } 428 429 void 430 binval(struct bufarea *bp) 431 { 432 433 bp->b_flags &= ~B_DIRTY; 434 LIST_REMOVE(bp, b_hash); 435 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 436 } 437 438 void 439 flush(int fd, struct bufarea *bp) 440 { 441 struct inode ip; 442 443 if ((bp->b_flags & B_DIRTY) == 0) 444 return; 445 bp->b_flags &= ~B_DIRTY; 446 if (fswritefd < 0) { 447 pfatal("WRITING IN READ_ONLY MODE.\n"); 448 return; 449 } 450 if (bp->b_errs != 0) 451 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 452 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 453 (long long)bp->b_bno); 454 bp->b_errs = 0; 455 /* 456 * Write using the appropriate function. 457 */ 458 switch (bp->b_type) { 459 case BT_SUPERBLK: 460 if (bp != &sblk) 461 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 462 bp, &sblk); 463 /* 464 * Superblocks are always pre-copied so we do not need 465 * to check them for copy-on-write. 466 */ 467 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 468 fsmodified = 1; 469 break; 470 case BT_CYLGRP: 471 /* 472 * Cylinder groups are always pre-copied so we do not 473 * need to check them for copy-on-write. 474 */ 475 if (sujrecovery) 476 cg_write(bp); 477 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 478 fsmodified = 1; 479 break; 480 case BT_INODES: 481 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 482 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 483 int i; 484 485 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 486 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 487 continue; 488 pwarn("flush: INODE CHECK-HASH FAILED"); 489 ip.i_bp = bp; 490 ip.i_dp = (union dinode *)dp; 491 ip.i_number = bp->b_index + (i / sizeof(*dp)); 492 prtinode(&ip); 493 if (preen || reply("FIX") != 0) { 494 if (preen) 495 printf(" (FIXED)\n"); 496 ffs_update_dinode_ckhash(&sblock, dp); 497 inodirty(&ip); 498 } 499 } 500 } 501 /* FALLTHROUGH */ 502 default: 503 copyonwrite(&sblock, bp, std_checkblkavail); 504 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 505 break; 506 } 507 } 508 509 /* 510 * If there are any snapshots, ensure that all the blocks that they 511 * care about have been copied, then release the snapshot inodes. 512 * These operations need to be done before we rebuild the cylinder 513 * groups so that any block allocations are properly recorded. 514 * Since all the cylinder group maps have already been copied in 515 * the snapshots, no further snapshot copies will need to be done. 516 */ 517 void 518 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 519 { 520 struct bufarea *bp; 521 int cnt; 522 523 if (snapcnt > 0) { 524 if (debug) 525 printf("Check for snapshot copies\n"); 526 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 527 if ((bp->b_flags & B_DIRTY) != 0) 528 copyonwrite(&sblock, bp, checkblkavail); 529 for (cnt = 0; cnt < snapcnt; cnt++) 530 irelse(&snaplist[cnt]); 531 snapcnt = 0; 532 } 533 } 534 535 /* 536 * Journaled soft updates does not maintain cylinder group summary 537 * information during cleanup, so this routine recalculates the summary 538 * information and updates the superblock summary in preparation for 539 * writing out the cylinder group. 540 */ 541 static void 542 cg_write(struct bufarea *bp) 543 { 544 ufs1_daddr_t fragno, cgbno, maxbno; 545 u_int8_t *blksfree; 546 struct csum *csp; 547 struct cg *cgp; 548 int blk; 549 int i; 550 551 /* 552 * Fix the frag and cluster summary. 553 */ 554 cgp = bp->b_un.b_cg; 555 cgp->cg_cs.cs_nbfree = 0; 556 cgp->cg_cs.cs_nffree = 0; 557 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 558 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 559 if (sblock.fs_contigsumsize > 0) { 560 for (i = 1; i <= sblock.fs_contigsumsize; i++) 561 cg_clustersum(cgp)[i] = 0; 562 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 563 } 564 blksfree = cg_blksfree(cgp); 565 for (cgbno = 0; cgbno < maxbno; cgbno++) { 566 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 567 continue; 568 if (ffs_isblock(&sblock, blksfree, cgbno)) { 569 ffs_clusteracct(&sblock, cgp, cgbno, 1); 570 cgp->cg_cs.cs_nbfree++; 571 continue; 572 } 573 fragno = blkstofrags(&sblock, cgbno); 574 blk = blkmap(&sblock, blksfree, fragno); 575 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 576 for (i = 0; i < sblock.fs_frag; i++) 577 if (isset(blksfree, fragno + i)) 578 cgp->cg_cs.cs_nffree++; 579 } 580 /* 581 * Update the superblock cg summary from our now correct values 582 * before writing the block. 583 */ 584 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 585 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 586 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 587 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 588 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 589 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 590 } 591 592 void 593 rwerror(const char *mesg, ufs2_daddr_t blk) 594 { 595 596 if (bkgrdcheck) 597 exit(EEXIT); 598 if (preen == 0) 599 printf("\n"); 600 pfatal("CANNOT %s: %ld", mesg, (long)blk); 601 if (reply("CONTINUE") == 0) 602 exit(EEXIT); 603 } 604 605 void 606 ckfini(int markclean) 607 { 608 struct bufarea *bp, *nbp; 609 int ofsmodified, cnt, cg; 610 611 if (bkgrdflag) { 612 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 613 cmd.value = FS_UNCLEAN; 614 cmd.size = markclean ? -1 : 1; 615 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 616 &cmd, sizeof cmd) == -1) 617 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 618 if (!preen) { 619 printf("\n***** FILE SYSTEM MARKED %s *****\n", 620 markclean ? "CLEAN" : "DIRTY"); 621 if (!markclean) 622 rerun = 1; 623 } 624 } else if (!preen && !markclean) { 625 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 626 rerun = 1; 627 } 628 bkgrdflag = 0; 629 } 630 if (debug && cachelookups > 0) 631 printf("cache with %d buffers missed %d of %d (%d%%)\n", 632 numbufs, cachereads, cachelookups, 633 (int)(cachereads * 100 / cachelookups)); 634 if (fswritefd < 0) { 635 (void)close(fsreadfd); 636 return; 637 } 638 639 /* 640 * To remain idempotent with partial truncations the buffers 641 * must be flushed in this order: 642 * 1) cylinder groups (bitmaps) 643 * 2) indirect, directory, external attribute, and data blocks 644 * 3) inode blocks 645 * 4) superblock 646 * This ordering preserves access to the modified pointers 647 * until they are freed. 648 */ 649 /* Step 1: cylinder groups */ 650 if (debug) 651 printf("Flush Cylinder groups\n"); 652 if (cgbufs != NULL) { 653 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 654 if (cgbufs[cnt].b_un.b_cg == NULL) 655 continue; 656 flush(fswritefd, &cgbufs[cnt]); 657 free(cgbufs[cnt].b_un.b_cg); 658 } 659 free(cgbufs); 660 cgbufs = NULL; 661 } 662 flush(fswritefd, &cgblk); 663 free(cgblk.b_un.b_buf); 664 cgblk.b_un.b_buf = NULL; 665 cnt = 0; 666 /* Step 2: indirect, directory, external attribute, and data blocks */ 667 if (debug) 668 printf("Flush indirect, directory, external attribute, " 669 "and data blocks\n"); 670 if (pdirbp != NULL) { 671 brelse(pdirbp); 672 pdirbp = NULL; 673 } 674 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 675 switch (bp->b_type) { 676 /* These should not be in the buffer cache list */ 677 case BT_UNKNOWN: 678 case BT_SUPERBLK: 679 case BT_CYLGRP: 680 default: 681 prtbuf(bp,"ckfini: improper buffer type on cache list"); 682 continue; 683 /* These are the ones to flush in this step */ 684 case BT_LEVEL1: 685 case BT_LEVEL2: 686 case BT_LEVEL3: 687 case BT_EXTATTR: 688 case BT_DIRDATA: 689 case BT_DATA: 690 break; 691 /* These are the ones to flush in the next step */ 692 case BT_INODES: 693 continue; 694 } 695 if (debug && bp->b_refcnt != 0) 696 prtbuf(bp, "ckfini: clearing in-use buffer"); 697 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 698 LIST_REMOVE(bp, b_hash); 699 cnt++; 700 flush(fswritefd, bp); 701 free(bp->b_un.b_buf); 702 free((char *)bp); 703 } 704 /* Step 3: inode blocks */ 705 if (debug) 706 printf("Flush inode blocks\n"); 707 if (icachebp != NULL) { 708 brelse(icachebp); 709 icachebp = NULL; 710 } 711 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 712 if (debug && bp->b_refcnt != 0) 713 prtbuf(bp, "ckfini: clearing in-use buffer"); 714 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 715 LIST_REMOVE(bp, b_hash); 716 cnt++; 717 flush(fswritefd, bp); 718 free(bp->b_un.b_buf); 719 free((char *)bp); 720 } 721 if (numbufs != cnt) 722 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 723 /* Step 4: superblock */ 724 if (debug) 725 printf("Flush the superblock\n"); 726 flush(fswritefd, &sblk); 727 if (havesb && cursnapshot == 0 && 728 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 729 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 730 /* Change write destination to standard superblock */ 731 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 732 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 733 sbdirty(); 734 flush(fswritefd, &sblk); 735 } else { 736 markclean = 0; 737 } 738 } 739 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 740 if ((sblock.fs_clean = markclean) != 0) { 741 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 742 sblock.fs_pendingblocks = 0; 743 sblock.fs_pendinginodes = 0; 744 } 745 sbdirty(); 746 ofsmodified = fsmodified; 747 flush(fswritefd, &sblk); 748 fsmodified = ofsmodified; 749 if (!preen) { 750 printf("\n***** FILE SYSTEM MARKED %s *****\n", 751 markclean ? "CLEAN" : "DIRTY"); 752 if (!markclean) 753 rerun = 1; 754 } 755 } else if (!preen) { 756 if (markclean) { 757 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 758 } else { 759 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 760 rerun = 1; 761 } 762 } 763 /* 764 * Free allocated tracking structures. 765 */ 766 if (blockmap != NULL) 767 free(blockmap); 768 blockmap = NULL; 769 if (inostathead != NULL) { 770 for (cg = 0; cg < sblock.fs_ncg; cg++) 771 if (inostathead[cg].il_stat != NULL) 772 free((char *)inostathead[cg].il_stat); 773 free(inostathead); 774 } 775 inostathead = NULL; 776 inocleanup(); 777 finalIOstats(); 778 (void)close(fsreadfd); 779 (void)close(fswritefd); 780 } 781 782 /* 783 * Print out I/O statistics. 784 */ 785 void 786 IOstats(char *what) 787 { 788 int i; 789 790 if (debug == 0) 791 return; 792 if (diskreads == 0) { 793 printf("%s: no I/O\n\n", what); 794 return; 795 } 796 if (startpass.tv_sec == 0) 797 startpass = startprog; 798 printf("%s: I/O statistics\n", what); 799 printIOstats(); 800 totaldiskreads += diskreads; 801 diskreads = 0; 802 for (i = 0; i < BT_NUMBUFTYPES; i++) { 803 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 804 totalreadcnt[i] += readcnt[i]; 805 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 806 readcnt[i] = 0; 807 } 808 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 809 } 810 811 void 812 finalIOstats(void) 813 { 814 int i; 815 816 if (debug == 0) 817 return; 818 printf("Final I/O statistics\n"); 819 totaldiskreads += diskreads; 820 diskreads = totaldiskreads; 821 startpass = startprog; 822 for (i = 0; i < BT_NUMBUFTYPES; i++) { 823 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 824 totalreadcnt[i] += readcnt[i]; 825 readtime[i] = totalreadtime[i]; 826 readcnt[i] = totalreadcnt[i]; 827 } 828 printIOstats(); 829 } 830 831 static void printIOstats(void) 832 { 833 long long msec, totalmsec; 834 int i; 835 836 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 837 timespecsub(&finishpass, &startpass, &finishpass); 838 printf("Running time: %jd.%03ld sec\n", 839 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 840 printf("buffer reads by type:\n"); 841 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 842 totalmsec += readtime[i].tv_sec * 1000 + 843 readtime[i].tv_nsec / 1000000; 844 if (totalmsec == 0) 845 totalmsec = 1; 846 for (i = 0; i < BT_NUMBUFTYPES; i++) { 847 if (readcnt[i] == 0) 848 continue; 849 msec = 850 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 851 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 852 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 853 (readcnt[i] * 1000 / diskreads) % 10, 854 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 855 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 856 } 857 printf("\n"); 858 } 859 860 int 861 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 862 { 863 char *cp; 864 int i, errs; 865 off_t offset; 866 867 offset = blk; 868 offset *= dev_bsize; 869 if (bkgrdflag) 870 slowio_start(); 871 totalreads++; 872 diskreads++; 873 if (pread(fd, buf, (int)size, offset) == size) { 874 if (bkgrdflag) 875 slowio_end(); 876 return (0); 877 } 878 879 /* 880 * This is handled specially here instead of in rwerror because 881 * rwerror is used for all sorts of errors, not just true read/write 882 * errors. It should be refactored and fixed. 883 */ 884 if (surrender) { 885 pfatal("CANNOT READ_BLK: %ld", (long)blk); 886 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 887 } else 888 rwerror("READ BLK", blk); 889 890 errs = 0; 891 memset(buf, 0, (size_t)size); 892 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 893 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 894 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 895 if (secsize != dev_bsize && dev_bsize != 1) 896 printf(" %jd (%jd),", 897 (intmax_t)(blk * dev_bsize + i) / secsize, 898 (intmax_t)blk + i / dev_bsize); 899 else 900 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 901 errs++; 902 } 903 } 904 printf("\n"); 905 if (errs) 906 resolved = 0; 907 return (errs); 908 } 909 910 void 911 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 912 { 913 int i; 914 char *cp; 915 off_t offset; 916 917 if (fd < 0) 918 return; 919 offset = blk; 920 offset *= dev_bsize; 921 if (pwrite(fd, buf, size, offset) == size) { 922 fsmodified = 1; 923 return; 924 } 925 resolved = 0; 926 rwerror("WRITE BLK", blk); 927 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 928 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 929 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 930 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 931 printf("\n"); 932 return; 933 } 934 935 void 936 blerase(int fd, ufs2_daddr_t blk, long size) 937 { 938 off_t ioarg[2]; 939 940 if (fd < 0) 941 return; 942 ioarg[0] = blk * dev_bsize; 943 ioarg[1] = size; 944 ioctl(fd, DIOCGDELETE, ioarg); 945 /* we don't really care if we succeed or not */ 946 return; 947 } 948 949 /* 950 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 951 * definition a multiple of dev_bsize. 952 */ 953 void 954 blzero(int fd, ufs2_daddr_t blk, long size) 955 { 956 static char *zero; 957 off_t offset, len; 958 959 if (fd < 0) 960 return; 961 if (zero == NULL) { 962 zero = Balloc(ZEROBUFSIZE); 963 if (zero == NULL) 964 errx(EEXIT, "cannot allocate buffer pool"); 965 } 966 offset = blk * dev_bsize; 967 if (lseek(fd, offset, 0) < 0) 968 rwerror("SEEK BLK", blk); 969 while (size > 0) { 970 len = MIN(ZEROBUFSIZE, size); 971 if (write(fd, zero, len) != len) 972 rwerror("WRITE BLK", blk); 973 blk += len / dev_bsize; 974 size -= len; 975 } 976 } 977 978 /* 979 * Verify cylinder group's magic number and other parameters. If the 980 * test fails, offer an option to rebuild the whole cylinder group. 981 * 982 * Return 1 if the cylinder group is good or return 0 if it is bad. 983 */ 984 #undef CHK 985 #define CHK(lhs, op, rhs, fmt) \ 986 if (lhs op rhs) { \ 987 pwarn("UFS%d cylinder group %d failed: " \ 988 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 989 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 990 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 991 error = 1; \ 992 } 993 int 994 check_cgmagic(int cg, struct bufarea *cgbp) 995 { 996 struct cg *cgp = cgbp->b_un.b_cg; 997 uint32_t cghash, calchash; 998 static int prevfailcg = -1; 999 long start; 1000 int error; 1001 1002 /* 1003 * Extended cylinder group checks. 1004 */ 1005 calchash = cgp->cg_ckhash; 1006 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1007 (ckhashadd & CK_CYLGRP) == 0) { 1008 cghash = cgp->cg_ckhash; 1009 cgp->cg_ckhash = 0; 1010 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1011 cgp->cg_ckhash = cghash; 1012 } 1013 error = 0; 1014 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1015 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1016 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1017 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1018 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1019 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1020 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1021 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1022 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1023 } 1024 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1025 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1026 } else { 1027 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1028 "%jd"); 1029 } 1030 start = sizeof(*cgp); 1031 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1032 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1033 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1034 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1035 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1036 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1037 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1038 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1039 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1040 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1041 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1042 } 1043 CHK(cgp->cg_freeoff, !=, 1044 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1045 if (sblock.fs_contigsumsize == 0) { 1046 CHK(cgp->cg_nextfreeoff, !=, 1047 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1048 } else { 1049 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1050 "%jd"); 1051 CHK(cgp->cg_clustersumoff, !=, 1052 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1053 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1054 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1055 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1056 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1057 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1058 "%jd"); 1059 } 1060 if (error == 0) 1061 return (1); 1062 if (prevfailcg == cg) 1063 return (0); 1064 prevfailcg = cg; 1065 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1066 printf("\n"); 1067 return (0); 1068 } 1069 1070 void 1071 rebuild_cg(int cg, struct bufarea *cgbp) 1072 { 1073 struct cg *cgp = cgbp->b_un.b_cg; 1074 long start; 1075 1076 /* 1077 * Zero out the cylinder group and then initialize critical fields. 1078 * Bit maps and summaries will be recalculated by later passes. 1079 */ 1080 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1081 cgp->cg_magic = CG_MAGIC; 1082 cgp->cg_cgx = cg; 1083 cgp->cg_niblk = sblock.fs_ipg; 1084 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1085 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1086 cgp->cg_ndblk = sblock.fs_fpg; 1087 else 1088 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1089 start = sizeof(*cgp); 1090 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1091 cgp->cg_iusedoff = start; 1092 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1093 cgp->cg_niblk = 0; 1094 cgp->cg_initediblk = 0; 1095 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1096 cgp->cg_old_niblk = sblock.fs_ipg; 1097 cgp->cg_old_btotoff = start; 1098 cgp->cg_old_boff = cgp->cg_old_btotoff + 1099 sblock.fs_old_cpg * sizeof(int32_t); 1100 cgp->cg_iusedoff = cgp->cg_old_boff + 1101 sblock.fs_old_cpg * sizeof(u_int16_t); 1102 } 1103 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1104 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1105 if (sblock.fs_contigsumsize > 0) { 1106 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1107 cgp->cg_clustersumoff = 1108 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1109 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1110 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1111 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1112 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1113 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1114 } 1115 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1116 cgdirty(cgbp); 1117 } 1118 1119 /* 1120 * allocate a data block with the specified number of fragments 1121 */ 1122 ufs2_daddr_t 1123 allocblk(long startcg, long frags, 1124 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1125 { 1126 ufs2_daddr_t blkno, newblk; 1127 1128 if (sujrecovery && checkblkavail == std_checkblkavail) { 1129 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1130 return (0); 1131 } 1132 if (frags <= 0 || frags > sblock.fs_frag) 1133 return (0); 1134 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1135 blkno < maxfsblock - sblock.fs_frag; 1136 blkno += sblock.fs_frag) { 1137 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1138 continue; 1139 if (newblk > 0) 1140 return (newblk); 1141 if (newblk < 0) 1142 blkno = -newblk; 1143 } 1144 for (blkno = MAX(cgdata(&sblock, 0), 0); 1145 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1146 blkno += sblock.fs_frag) { 1147 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1148 continue; 1149 if (newblk > 0) 1150 return (newblk); 1151 if (newblk < 0) 1152 blkno = -newblk; 1153 } 1154 return (0); 1155 } 1156 1157 ufs2_daddr_t 1158 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1159 { 1160 struct bufarea *cgbp; 1161 struct cg *cgp; 1162 ufs2_daddr_t j, k, baseblk; 1163 long cg; 1164 1165 if ((u_int64_t)blkno > sblock.fs_size) 1166 return (0); 1167 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1168 if (testbmap(blkno + j)) 1169 continue; 1170 for (k = 1; k < frags; k++) 1171 if (testbmap(blkno + j + k)) 1172 break; 1173 if (k < frags) { 1174 j += k; 1175 continue; 1176 } 1177 cg = dtog(&sblock, blkno + j); 1178 cgbp = cglookup(cg); 1179 cgp = cgbp->b_un.b_cg; 1180 if (!check_cgmagic(cg, cgbp)) 1181 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1182 baseblk = dtogd(&sblock, blkno + j); 1183 for (k = 0; k < frags; k++) { 1184 setbmap(blkno + j + k); 1185 clrbit(cg_blksfree(cgp), baseblk + k); 1186 } 1187 n_blks += frags; 1188 if (frags == sblock.fs_frag) 1189 cgp->cg_cs.cs_nbfree--; 1190 else 1191 cgp->cg_cs.cs_nffree -= frags; 1192 cgdirty(cgbp); 1193 return (blkno + j); 1194 } 1195 return (0); 1196 } 1197 1198 /* 1199 * Check whether a file size is within the limits for the filesystem. 1200 * Return 1 when valid and 0 when too big. 1201 * 1202 * This should match the file size limit in ffs_mountfs(). 1203 */ 1204 int 1205 chkfilesize(mode_t mode, u_int64_t filesize) 1206 { 1207 u_int64_t kernmaxfilesize; 1208 1209 if (sblock.fs_magic == FS_UFS1_MAGIC) 1210 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1211 else 1212 kernmaxfilesize = sblock.fs_maxfilesize; 1213 if (filesize > kernmaxfilesize || 1214 filesize > sblock.fs_maxfilesize || 1215 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1216 if (debug) 1217 printf("bad file size %ju:", (uintmax_t)filesize); 1218 return (0); 1219 } 1220 return (1); 1221 } 1222 1223 /* 1224 * Slow down IO so as to leave some disk bandwidth for other processes 1225 */ 1226 void 1227 slowio_start() 1228 { 1229 1230 /* Delay one in every 8 operations */ 1231 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1232 if (slowio_pollcnt == 0) { 1233 gettimeofday(&slowio_starttime, NULL); 1234 } 1235 } 1236 1237 void 1238 slowio_end() 1239 { 1240 struct timeval tv; 1241 int delay_usec; 1242 1243 if (slowio_pollcnt != 0) 1244 return; 1245 1246 /* Update the slowdown interval. */ 1247 gettimeofday(&tv, NULL); 1248 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1249 (tv.tv_usec - slowio_starttime.tv_usec); 1250 if (delay_usec < 64) 1251 delay_usec = 64; 1252 if (delay_usec > 2500000) 1253 delay_usec = 2500000; 1254 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1255 /* delay by 8 times the average IO delay */ 1256 if (slowio_delay_usec > 64) 1257 usleep(slowio_delay_usec * 8); 1258 } 1259 1260 /* 1261 * Find a pathname 1262 */ 1263 void 1264 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1265 { 1266 int len; 1267 char *cp; 1268 struct inode ip; 1269 struct inodesc idesc; 1270 static int busy = 0; 1271 1272 if (curdir == ino && ino == UFS_ROOTINO) { 1273 (void)strcpy(namebuf, "/"); 1274 return; 1275 } 1276 if (busy || !INO_IS_DVALID(curdir)) { 1277 (void)strcpy(namebuf, "?"); 1278 return; 1279 } 1280 busy = 1; 1281 memset(&idesc, 0, sizeof(struct inodesc)); 1282 idesc.id_type = DATA; 1283 idesc.id_fix = IGNORE; 1284 cp = &namebuf[MAXPATHLEN - 1]; 1285 *cp = '\0'; 1286 if (curdir != ino) { 1287 idesc.id_parent = curdir; 1288 goto namelookup; 1289 } 1290 while (ino != UFS_ROOTINO) { 1291 idesc.id_number = ino; 1292 idesc.id_func = findino; 1293 idesc.id_name = strdup(".."); 1294 ginode(ino, &ip); 1295 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1296 irelse(&ip); 1297 free(idesc.id_name); 1298 break; 1299 } 1300 irelse(&ip); 1301 free(idesc.id_name); 1302 namelookup: 1303 idesc.id_number = idesc.id_parent; 1304 idesc.id_parent = ino; 1305 idesc.id_func = findname; 1306 idesc.id_name = namebuf; 1307 ginode(idesc.id_number, &ip); 1308 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1309 irelse(&ip); 1310 break; 1311 } 1312 irelse(&ip); 1313 len = strlen(namebuf); 1314 cp -= len; 1315 memmove(cp, namebuf, (size_t)len); 1316 *--cp = '/'; 1317 if (cp < &namebuf[UFS_MAXNAMLEN]) 1318 break; 1319 ino = idesc.id_number; 1320 } 1321 busy = 0; 1322 if (ino != UFS_ROOTINO) 1323 *--cp = '?'; 1324 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1325 } 1326 1327 void 1328 catch(int sig __unused) 1329 { 1330 1331 ckfini(0); 1332 exit(12); 1333 } 1334 1335 /* 1336 * When preening, allow a single quit to signal 1337 * a special exit after file system checks complete 1338 * so that reboot sequence may be interrupted. 1339 */ 1340 void 1341 catchquit(int sig __unused) 1342 { 1343 printf("returning to single-user after file system check\n"); 1344 returntosingle = 1; 1345 (void)signal(SIGQUIT, SIG_DFL); 1346 } 1347 1348 /* 1349 * determine whether an inode should be fixed. 1350 */ 1351 int 1352 dofix(struct inodesc *idesc, const char *msg) 1353 { 1354 1355 switch (idesc->id_fix) { 1356 1357 case DONTKNOW: 1358 if (idesc->id_type == DATA) 1359 direrror(idesc->id_number, msg); 1360 else 1361 pwarn("%s", msg); 1362 if (preen) { 1363 printf(" (SALVAGED)\n"); 1364 idesc->id_fix = FIX; 1365 return (ALTERED); 1366 } 1367 if (reply("SALVAGE") == 0) { 1368 idesc->id_fix = NOFIX; 1369 return (0); 1370 } 1371 idesc->id_fix = FIX; 1372 return (ALTERED); 1373 1374 case FIX: 1375 return (ALTERED); 1376 1377 case NOFIX: 1378 case IGNORE: 1379 return (0); 1380 1381 default: 1382 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1383 } 1384 /* NOTREACHED */ 1385 return (0); 1386 } 1387 1388 #include <stdarg.h> 1389 1390 /* 1391 * Print details about a buffer. 1392 */ 1393 void 1394 prtbuf(struct bufarea *bp, const char *fmt, ...) 1395 { 1396 va_list ap; 1397 va_start(ap, fmt); 1398 if (preen) 1399 (void)fprintf(stdout, "%s: ", cdevname); 1400 (void)vfprintf(stdout, fmt, ap); 1401 va_end(ap); 1402 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1403 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1404 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1405 (intmax_t) bp->b_index); 1406 } 1407 1408 /* 1409 * An unexpected inconsistency occurred. 1410 * Die if preening or file system is running with soft dependency protocol, 1411 * otherwise just print message and continue. 1412 */ 1413 void 1414 pfatal(const char *fmt, ...) 1415 { 1416 va_list ap; 1417 va_start(ap, fmt); 1418 if (!preen) { 1419 (void)vfprintf(stdout, fmt, ap); 1420 va_end(ap); 1421 if (usedsoftdep) 1422 (void)fprintf(stdout, 1423 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1424 /* 1425 * Force foreground fsck to clean up inconsistency. 1426 */ 1427 if (bkgrdflag) { 1428 cmd.value = FS_NEEDSFSCK; 1429 cmd.size = 1; 1430 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1431 &cmd, sizeof cmd) == -1) 1432 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1433 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1434 ckfini(0); 1435 exit(EEXIT); 1436 } 1437 return; 1438 } 1439 if (cdevname == NULL) 1440 cdevname = strdup("fsck"); 1441 (void)fprintf(stdout, "%s: ", cdevname); 1442 (void)vfprintf(stdout, fmt, ap); 1443 (void)fprintf(stdout, 1444 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1445 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1446 /* 1447 * Force foreground fsck to clean up inconsistency. 1448 */ 1449 if (bkgrdflag) { 1450 cmd.value = FS_NEEDSFSCK; 1451 cmd.size = 1; 1452 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1453 &cmd, sizeof cmd) == -1) 1454 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1455 } 1456 ckfini(0); 1457 exit(EEXIT); 1458 } 1459 1460 /* 1461 * Pwarn just prints a message when not preening or running soft dependency 1462 * protocol, or a warning (preceded by filename) when preening. 1463 */ 1464 void 1465 pwarn(const char *fmt, ...) 1466 { 1467 va_list ap; 1468 va_start(ap, fmt); 1469 if (preen) 1470 (void)fprintf(stdout, "%s: ", cdevname); 1471 (void)vfprintf(stdout, fmt, ap); 1472 va_end(ap); 1473 } 1474 1475 /* 1476 * Stub for routines from kernel. 1477 */ 1478 void 1479 panic(const char *fmt, ...) 1480 { 1481 va_list ap; 1482 va_start(ap, fmt); 1483 pfatal("INTERNAL INCONSISTENCY:"); 1484 (void)vfprintf(stdout, fmt, ap); 1485 va_end(ap); 1486 exit(EEXIT); 1487 } 1488