1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/time.h> 34 #include <sys/types.h> 35 #include <sys/sysctl.h> 36 #include <sys/disk.h> 37 #include <sys/disklabel.h> 38 #include <sys/ioctl.h> 39 #include <sys/stat.h> 40 41 #include <ufs/ufs/dinode.h> 42 #include <ufs/ufs/dir.h> 43 #include <ufs/ffs/fs.h> 44 45 #include <err.h> 46 #include <errno.h> 47 #include <string.h> 48 #include <ctype.h> 49 #include <fstab.h> 50 #include <stdint.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <time.h> 54 #include <unistd.h> 55 56 #include "fsck.h" 57 58 int sujrecovery = 0; 59 60 static struct bufarea *allocbuf(const char *); 61 static void cg_write(struct bufarea *); 62 static void slowio_start(void); 63 static void slowio_end(void); 64 static void printIOstats(void); 65 66 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 67 static struct timespec startpass, finishpass; 68 struct timeval slowio_starttime; 69 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 70 int slowio_pollcnt; 71 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 72 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 73 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 74 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 75 static struct bufhash freebufs; /* unused buffers */ 76 static int numbufs; /* size of buffer cache */ 77 static int cachelookups; /* number of cache lookups */ 78 static int cachereads; /* number of cache reads */ 79 static int flushtries; /* number of tries to reclaim memory */ 80 81 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 82 83 void 84 fsutilinit(void) 85 { 86 diskreads = totaldiskreads = totalreads = 0; 87 bzero(&startpass, sizeof(struct timespec)); 88 bzero(&finishpass, sizeof(struct timespec)); 89 bzero(&slowio_starttime, sizeof(struct timeval)); 90 slowio_delay_usec = 10000; 91 slowio_pollcnt = 0; 92 flushtries = 0; 93 } 94 95 int 96 ftypeok(union dinode *dp) 97 { 98 switch (DIP(dp, di_mode) & IFMT) { 99 100 case IFDIR: 101 case IFREG: 102 case IFBLK: 103 case IFCHR: 104 case IFLNK: 105 case IFSOCK: 106 case IFIFO: 107 return (1); 108 109 default: 110 if (debug) 111 printf("bad file type 0%o\n", DIP(dp, di_mode)); 112 return (0); 113 } 114 } 115 116 int 117 reply(const char *question) 118 { 119 int persevere; 120 char c; 121 122 if (preen) 123 pfatal("INTERNAL ERROR: GOT TO reply()"); 124 persevere = strcmp(question, "CONTINUE") == 0 || 125 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 126 printf("\n"); 127 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 128 printf("%s? no\n\n", question); 129 resolved = 0; 130 return (0); 131 } 132 if (yflag || (persevere && nflag)) { 133 printf("%s? yes\n\n", question); 134 return (1); 135 } 136 do { 137 printf("%s? [yn] ", question); 138 (void) fflush(stdout); 139 c = getc(stdin); 140 while (c != '\n' && getc(stdin) != '\n') { 141 if (feof(stdin)) { 142 resolved = 0; 143 return (0); 144 } 145 } 146 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 147 printf("\n"); 148 if (c == 'y' || c == 'Y') 149 return (1); 150 resolved = 0; 151 return (0); 152 } 153 154 /* 155 * Look up state information for an inode. 156 */ 157 struct inostat * 158 inoinfo(ino_t inum) 159 { 160 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 161 struct inostatlist *ilp; 162 int iloff; 163 164 if (inum >= maxino) 165 errx(EEXIT, "inoinfo: inumber %ju out of range", 166 (uintmax_t)inum); 167 ilp = &inostathead[inum / sblock.fs_ipg]; 168 iloff = inum % sblock.fs_ipg; 169 if (iloff >= ilp->il_numalloced) 170 return (&unallocated); 171 return (&ilp->il_stat[iloff]); 172 } 173 174 /* 175 * Malloc buffers and set up cache. 176 */ 177 void 178 bufinit(void) 179 { 180 int i; 181 182 initbarea(&failedbuf, BT_UNKNOWN); 183 failedbuf.b_errs = -1; 184 failedbuf.b_un.b_buf = NULL; 185 if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL) 186 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 187 initbarea(&cgblk, BT_CYLGRP); 188 numbufs = cachelookups = cachereads = 0; 189 TAILQ_INIT(&bufqueuehd); 190 LIST_INIT(&freebufs); 191 for (i = 0; i < HASHSIZE; i++) 192 LIST_INIT(&bufhashhd[i]); 193 for (i = 0; i < BT_NUMBUFTYPES; i++) { 194 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 195 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 196 readcnt[i] = totalreadcnt[i] = 0; 197 } 198 } 199 200 static struct bufarea * 201 allocbuf(const char *failreason) 202 { 203 struct bufarea *bp; 204 char *bufp; 205 206 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 207 bufp = Balloc((unsigned int)sblock.fs_bsize); 208 if (bp == NULL || bufp == NULL) { 209 errx(EEXIT, "%s", failreason); 210 /* NOTREACHED */ 211 } 212 numbufs++; 213 bp->b_un.b_buf = bufp; 214 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 215 initbarea(bp, BT_UNKNOWN); 216 return (bp); 217 } 218 219 /* 220 * Manage cylinder group buffers. 221 * 222 * Use getblk() here rather than cgget() because the cylinder group 223 * may be corrupted but we want it anyway so we can fix it. 224 */ 225 static struct bufarea *cgbufs; /* header for cylinder group cache */ 226 static int flushtries; /* number of tries to reclaim memory */ 227 228 struct bufarea * 229 cglookup(int cg) 230 { 231 struct bufarea *cgbp; 232 struct cg *cgp; 233 234 if ((unsigned) cg >= sblock.fs_ncg) 235 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 236 if (cgbufs == NULL) { 237 cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea)); 238 if (cgbufs == NULL) 239 errx(EEXIT, "Cannot allocate cylinder group buffers"); 240 } 241 cgbp = &cgbufs[cg]; 242 if (cgbp->b_un.b_cg != NULL) 243 return (cgbp); 244 cgp = NULL; 245 if (flushtries == 0) 246 cgp = Balloc((unsigned int)sblock.fs_cgsize); 247 if (cgp == NULL) { 248 if (sujrecovery) 249 errx(EEXIT,"Ran out of memory during journal recovery"); 250 flush(fswritefd, &cgblk); 251 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 252 return (&cgblk); 253 } 254 cgbp->b_un.b_cg = cgp; 255 initbarea(cgbp, BT_CYLGRP); 256 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 257 return (cgbp); 258 } 259 260 /* 261 * Mark a cylinder group buffer as dirty. 262 * Update its check-hash if they are enabled. 263 */ 264 void 265 cgdirty(struct bufarea *cgbp) 266 { 267 struct cg *cg; 268 269 cg = cgbp->b_un.b_cg; 270 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 271 cg->cg_ckhash = 0; 272 cg->cg_ckhash = 273 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 274 } 275 dirty(cgbp); 276 } 277 278 /* 279 * Attempt to flush a cylinder group cache entry. 280 * Return whether the flush was successful. 281 */ 282 int 283 flushentry(void) 284 { 285 struct bufarea *cgbp; 286 287 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 288 return (0); 289 cgbp = &cgbufs[flushtries++]; 290 if (cgbp->b_un.b_cg == NULL) 291 return (0); 292 flush(fswritefd, cgbp); 293 free(cgbp->b_un.b_buf); 294 cgbp->b_un.b_buf = NULL; 295 return (1); 296 } 297 298 /* 299 * Manage a cache of filesystem disk blocks. 300 */ 301 struct bufarea * 302 getdatablk(ufs2_daddr_t blkno, long size, int type) 303 { 304 struct bufarea *bp; 305 struct bufhash *bhdp; 306 307 cachelookups++; 308 /* 309 * If out of range, return empty buffer with b_err == -1 310 * 311 * Skip check for inodes because chkrange() considers 312 * metadata areas invalid to write data. 313 */ 314 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 315 failedbuf.b_refcnt++; 316 return (&failedbuf); 317 } 318 bhdp = &bufhashhd[HASH(blkno)]; 319 LIST_FOREACH(bp, bhdp, b_hash) 320 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 321 if (debug && bp->b_size != size) { 322 prtbuf(bp, "getdatablk: size mismatch"); 323 pfatal("getdatablk: b_size %d != size %ld\n", 324 bp->b_size, size); 325 } 326 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 327 goto foundit; 328 } 329 /* 330 * Move long-term busy buffer back to the front of the LRU so we 331 * do not endless inspect them for recycling. 332 */ 333 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 334 if (bp != NULL && bp->b_refcnt != 0) { 335 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 336 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 337 } 338 /* 339 * Allocate up to the minimum number of buffers before 340 * considering recycling any of them. 341 */ 342 if (size > sblock.fs_bsize) 343 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 344 sblock.fs_bsize); 345 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 346 LIST_REMOVE(bp, b_hash); 347 } else if (numbufs < MINBUFS) { 348 bp = allocbuf("cannot create minimal buffer pool"); 349 } else if (sujrecovery) { 350 /* 351 * SUJ recovery does not want anything written until it 352 * has successfully completed (so it can fail back to 353 * full fsck). Thus, we can only recycle clean buffers. 354 */ 355 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 356 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 357 break; 358 if (bp == NULL) 359 bp = allocbuf("Ran out of memory during " 360 "journal recovery"); 361 else 362 LIST_REMOVE(bp, b_hash); 363 } else { 364 /* 365 * Recycle oldest non-busy buffer. 366 */ 367 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 368 if (bp->b_refcnt == 0) 369 break; 370 if (bp == NULL) 371 bp = allocbuf("Ran out of memory for buffers"); 372 else 373 LIST_REMOVE(bp, b_hash); 374 } 375 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 376 flush(fswritefd, bp); 377 bp->b_type = type; 378 LIST_INSERT_HEAD(bhdp, bp, b_hash); 379 getblk(bp, blkno, size); 380 cachereads++; 381 /* fall through */ 382 foundit: 383 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 384 if (debug && bp->b_type != type) { 385 printf("getdatablk: buffer type changed to %s", 386 BT_BUFTYPE(type)); 387 prtbuf(bp, ""); 388 } 389 if (bp->b_errs == 0) 390 bp->b_refcnt++; 391 return (bp); 392 } 393 394 void 395 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 396 { 397 ufs2_daddr_t dblk; 398 struct timespec start, finish; 399 400 dblk = fsbtodb(&sblock, blk); 401 if (bp->b_bno == dblk) { 402 totalreads++; 403 } else { 404 if (debug) { 405 readcnt[bp->b_type]++; 406 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 407 } 408 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 409 if (debug) { 410 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 411 timespecsub(&finish, &start, &finish); 412 timespecadd(&readtime[bp->b_type], &finish, 413 &readtime[bp->b_type]); 414 } 415 bp->b_bno = dblk; 416 bp->b_size = size; 417 } 418 } 419 420 void 421 brelse(struct bufarea *bp) 422 { 423 424 if (bp->b_refcnt <= 0) 425 prtbuf(bp, "brelse: buffer with negative reference count"); 426 bp->b_refcnt--; 427 } 428 429 void 430 binval(struct bufarea *bp) 431 { 432 433 bp->b_flags &= ~B_DIRTY; 434 LIST_REMOVE(bp, b_hash); 435 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 436 } 437 438 void 439 flush(int fd, struct bufarea *bp) 440 { 441 struct inode ip; 442 443 if ((bp->b_flags & B_DIRTY) == 0) 444 return; 445 bp->b_flags &= ~B_DIRTY; 446 if (fswritefd < 0) { 447 pfatal("WRITING IN READ_ONLY MODE.\n"); 448 return; 449 } 450 if (bp->b_errs != 0) 451 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 452 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 453 (long long)bp->b_bno); 454 bp->b_errs = 0; 455 /* 456 * Write using the appropriate function. 457 */ 458 switch (bp->b_type) { 459 case BT_SUPERBLK: 460 if (bp != &sblk) 461 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 462 bp, &sblk); 463 /* 464 * Superblocks are always pre-copied so we do not need 465 * to check them for copy-on-write. 466 */ 467 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 468 fsmodified = 1; 469 break; 470 case BT_CYLGRP: 471 /* 472 * Cylinder groups are always pre-copied so we do not 473 * need to check them for copy-on-write. 474 */ 475 if (sujrecovery) 476 cg_write(bp); 477 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 478 fsmodified = 1; 479 break; 480 case BT_INODES: 481 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 482 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 483 int i; 484 485 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 486 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 487 continue; 488 pwarn("flush: INODE CHECK-HASH FAILED"); 489 ip.i_bp = bp; 490 ip.i_dp = (union dinode *)dp; 491 ip.i_number = bp->b_index + (i / sizeof(*dp)); 492 prtinode(&ip); 493 if (preen || reply("FIX") != 0) { 494 if (preen) 495 printf(" (FIXED)\n"); 496 ffs_update_dinode_ckhash(&sblock, dp); 497 inodirty(&ip); 498 } 499 } 500 } 501 /* FALLTHROUGH */ 502 default: 503 copyonwrite(&sblock, bp, std_checkblkavail); 504 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 505 break; 506 } 507 } 508 509 /* 510 * If there are any snapshots, ensure that all the blocks that they 511 * care about have been copied, then release the snapshot inodes. 512 * These operations need to be done before we rebuild the cylinder 513 * groups so that any block allocations are properly recorded. 514 * Since all the cylinder group maps have already been copied in 515 * the snapshots, no further snapshot copies will need to be done. 516 */ 517 void 518 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 519 { 520 struct bufarea *bp; 521 int cnt; 522 523 if (snapcnt > 0) { 524 if (debug) 525 printf("Check for snapshot copies\n"); 526 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 527 if ((bp->b_flags & B_DIRTY) != 0) 528 copyonwrite(&sblock, bp, checkblkavail); 529 for (cnt = 0; cnt < snapcnt; cnt++) 530 irelse(&snaplist[cnt]); 531 snapcnt = 0; 532 } 533 } 534 535 /* 536 * Journaled soft updates does not maintain cylinder group summary 537 * information during cleanup, so this routine recalculates the summary 538 * information and updates the superblock summary in preparation for 539 * writing out the cylinder group. 540 */ 541 static void 542 cg_write(struct bufarea *bp) 543 { 544 ufs1_daddr_t fragno, cgbno, maxbno; 545 u_int8_t *blksfree; 546 struct csum *csp; 547 struct cg *cgp; 548 int blk; 549 int i; 550 551 /* 552 * Fix the frag and cluster summary. 553 */ 554 cgp = bp->b_un.b_cg; 555 cgp->cg_cs.cs_nbfree = 0; 556 cgp->cg_cs.cs_nffree = 0; 557 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 558 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 559 if (sblock.fs_contigsumsize > 0) { 560 for (i = 1; i <= sblock.fs_contigsumsize; i++) 561 cg_clustersum(cgp)[i] = 0; 562 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 563 } 564 blksfree = cg_blksfree(cgp); 565 for (cgbno = 0; cgbno < maxbno; cgbno++) { 566 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 567 continue; 568 if (ffs_isblock(&sblock, blksfree, cgbno)) { 569 ffs_clusteracct(&sblock, cgp, cgbno, 1); 570 cgp->cg_cs.cs_nbfree++; 571 continue; 572 } 573 fragno = blkstofrags(&sblock, cgbno); 574 blk = blkmap(&sblock, blksfree, fragno); 575 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 576 for (i = 0; i < sblock.fs_frag; i++) 577 if (isset(blksfree, fragno + i)) 578 cgp->cg_cs.cs_nffree++; 579 } 580 /* 581 * Update the superblock cg summary from our now correct values 582 * before writing the block. 583 */ 584 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 585 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 586 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 587 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 588 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 589 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 590 } 591 592 void 593 rwerror(const char *mesg, ufs2_daddr_t blk) 594 { 595 596 if (bkgrdcheck) 597 exit(EEXIT); 598 if (preen == 0) 599 printf("\n"); 600 pfatal("CANNOT %s: %ld", mesg, (long)blk); 601 if (reply("CONTINUE") == 0) 602 exit(EEXIT); 603 } 604 605 void 606 ckfini(int markclean) 607 { 608 struct bufarea *bp, *nbp; 609 int ofsmodified, cnt, cg; 610 611 if (bkgrdflag) { 612 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 613 cmd.value = FS_UNCLEAN; 614 cmd.size = markclean ? -1 : 1; 615 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 616 &cmd, sizeof cmd) == -1) 617 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 618 if (!preen) { 619 printf("\n***** FILE SYSTEM MARKED %s *****\n", 620 markclean ? "CLEAN" : "DIRTY"); 621 if (!markclean) 622 rerun = 1; 623 } 624 } else if (!preen && !markclean) { 625 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 626 rerun = 1; 627 } 628 bkgrdflag = 0; 629 } 630 if (debug && cachelookups > 0) 631 printf("cache with %d buffers missed %d of %d (%d%%)\n", 632 numbufs, cachereads, cachelookups, 633 (int)(cachereads * 100 / cachelookups)); 634 if (fswritefd < 0) { 635 (void)close(fsreadfd); 636 return; 637 } 638 639 /* 640 * To remain idempotent with partial truncations the buffers 641 * must be flushed in this order: 642 * 1) cylinder groups (bitmaps) 643 * 2) indirect, directory, external attribute, and data blocks 644 * 3) inode blocks 645 * 4) superblock 646 * This ordering preserves access to the modified pointers 647 * until they are freed. 648 */ 649 /* Step 1: cylinder groups */ 650 if (debug) 651 printf("Flush Cylinder groups\n"); 652 if (cgbufs != NULL) { 653 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 654 if (cgbufs[cnt].b_un.b_cg == NULL) 655 continue; 656 flush(fswritefd, &cgbufs[cnt]); 657 free(cgbufs[cnt].b_un.b_cg); 658 } 659 free(cgbufs); 660 cgbufs = NULL; 661 } 662 flush(fswritefd, &cgblk); 663 free(cgblk.b_un.b_buf); 664 cgblk.b_un.b_buf = NULL; 665 cnt = 0; 666 /* Step 2: indirect, directory, external attribute, and data blocks */ 667 if (debug) 668 printf("Flush indirect, directory, external attribute, " 669 "and data blocks\n"); 670 if (pdirbp != NULL) { 671 brelse(pdirbp); 672 pdirbp = NULL; 673 } 674 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 675 switch (bp->b_type) { 676 /* These should not be in the buffer cache list */ 677 case BT_UNKNOWN: 678 case BT_SUPERBLK: 679 case BT_CYLGRP: 680 default: 681 prtbuf(bp,"ckfini: improper buffer type on cache list"); 682 continue; 683 /* These are the ones to flush in this step */ 684 case BT_LEVEL1: 685 case BT_LEVEL2: 686 case BT_LEVEL3: 687 case BT_EXTATTR: 688 case BT_DIRDATA: 689 case BT_DATA: 690 break; 691 /* These are the ones to flush in the next step */ 692 case BT_INODES: 693 continue; 694 } 695 if (debug && bp->b_refcnt != 0) 696 prtbuf(bp, "ckfini: clearing in-use buffer"); 697 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 698 LIST_REMOVE(bp, b_hash); 699 cnt++; 700 flush(fswritefd, bp); 701 free(bp->b_un.b_buf); 702 free((char *)bp); 703 } 704 /* Step 3: inode blocks */ 705 if (debug) 706 printf("Flush inode blocks\n"); 707 if (icachebp != NULL) { 708 brelse(icachebp); 709 icachebp = NULL; 710 } 711 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 712 if (debug && bp->b_refcnt != 0) 713 prtbuf(bp, "ckfini: clearing in-use buffer"); 714 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 715 LIST_REMOVE(bp, b_hash); 716 cnt++; 717 flush(fswritefd, bp); 718 free(bp->b_un.b_buf); 719 free((char *)bp); 720 } 721 if (numbufs != cnt) 722 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 723 /* Step 4: superblock */ 724 if (debug) 725 printf("Flush the superblock\n"); 726 flush(fswritefd, &sblk); 727 if (havesb && cursnapshot == 0 && 728 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 729 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 730 /* Change write destination to standard superblock */ 731 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 732 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 733 sbdirty(); 734 flush(fswritefd, &sblk); 735 } else { 736 markclean = 0; 737 } 738 } 739 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 740 if ((sblock.fs_clean = markclean) != 0) { 741 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 742 sblock.fs_pendingblocks = 0; 743 sblock.fs_pendinginodes = 0; 744 } 745 sbdirty(); 746 ofsmodified = fsmodified; 747 flush(fswritefd, &sblk); 748 fsmodified = ofsmodified; 749 if (!preen) { 750 printf("\n***** FILE SYSTEM MARKED %s *****\n", 751 markclean ? "CLEAN" : "DIRTY"); 752 if (!markclean) 753 rerun = 1; 754 } 755 } else if (!preen) { 756 if (markclean) { 757 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 758 } else { 759 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 760 rerun = 1; 761 } 762 } 763 /* 764 * Free allocated tracking structures. 765 */ 766 if (blockmap != NULL) 767 free(blockmap); 768 blockmap = NULL; 769 if (inostathead != NULL) { 770 for (cg = 0; cg < sblock.fs_ncg; cg++) 771 if (inostathead[cg].il_stat != NULL) 772 free((char *)inostathead[cg].il_stat); 773 free(inostathead); 774 } 775 inostathead = NULL; 776 inocleanup(); 777 finalIOstats(); 778 (void)close(fsreadfd); 779 (void)close(fswritefd); 780 } 781 782 /* 783 * Print out I/O statistics. 784 */ 785 void 786 IOstats(char *what) 787 { 788 int i; 789 790 if (debug == 0) 791 return; 792 if (diskreads == 0) { 793 printf("%s: no I/O\n\n", what); 794 return; 795 } 796 if (startpass.tv_sec == 0) 797 startpass = startprog; 798 printf("%s: I/O statistics\n", what); 799 printIOstats(); 800 totaldiskreads += diskreads; 801 diskreads = 0; 802 for (i = 0; i < BT_NUMBUFTYPES; i++) { 803 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 804 totalreadcnt[i] += readcnt[i]; 805 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 806 readcnt[i] = 0; 807 } 808 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 809 } 810 811 void 812 finalIOstats(void) 813 { 814 int i; 815 816 if (debug == 0) 817 return; 818 printf("Final I/O statistics\n"); 819 totaldiskreads += diskreads; 820 diskreads = totaldiskreads; 821 startpass = startprog; 822 for (i = 0; i < BT_NUMBUFTYPES; i++) { 823 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 824 totalreadcnt[i] += readcnt[i]; 825 readtime[i] = totalreadtime[i]; 826 readcnt[i] = totalreadcnt[i]; 827 } 828 printIOstats(); 829 } 830 831 static void printIOstats(void) 832 { 833 long long msec, totalmsec; 834 int i; 835 836 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 837 timespecsub(&finishpass, &startpass, &finishpass); 838 printf("Running time: %jd.%03ld sec\n", 839 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 840 printf("buffer reads by type:\n"); 841 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 842 totalmsec += readtime[i].tv_sec * 1000 + 843 readtime[i].tv_nsec / 1000000; 844 if (totalmsec == 0) 845 totalmsec = 1; 846 for (i = 0; i < BT_NUMBUFTYPES; i++) { 847 if (readcnt[i] == 0) 848 continue; 849 msec = 850 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 851 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 852 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 853 (readcnt[i] * 1000 / diskreads) % 10, 854 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 855 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 856 } 857 printf("\n"); 858 } 859 860 int 861 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 862 { 863 char *cp; 864 int i, errs; 865 off_t offset; 866 867 offset = blk; 868 offset *= dev_bsize; 869 if (bkgrdflag) 870 slowio_start(); 871 totalreads++; 872 diskreads++; 873 if (pread(fd, buf, (int)size, offset) == size) { 874 if (bkgrdflag) 875 slowio_end(); 876 return (0); 877 } 878 879 /* 880 * This is handled specially here instead of in rwerror because 881 * rwerror is used for all sorts of errors, not just true read/write 882 * errors. It should be refactored and fixed. 883 */ 884 if (surrender) { 885 pfatal("CANNOT READ_BLK: %ld", (long)blk); 886 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 887 } else 888 rwerror("READ BLK", blk); 889 890 errs = 0; 891 memset(buf, 0, (size_t)size); 892 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 893 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 894 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 895 if (secsize != dev_bsize && dev_bsize != 1) 896 printf(" %jd (%jd),", 897 (intmax_t)(blk * dev_bsize + i) / secsize, 898 (intmax_t)blk + i / dev_bsize); 899 else 900 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 901 errs++; 902 } 903 } 904 printf("\n"); 905 if (errs) 906 resolved = 0; 907 return (errs); 908 } 909 910 void 911 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 912 { 913 int i; 914 char *cp; 915 off_t offset; 916 917 if (fd < 0) 918 return; 919 offset = blk; 920 offset *= dev_bsize; 921 if (pwrite(fd, buf, size, offset) == size) { 922 fsmodified = 1; 923 return; 924 } 925 resolved = 0; 926 rwerror("WRITE BLK", blk); 927 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 928 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 929 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 930 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 931 printf("\n"); 932 return; 933 } 934 935 void 936 blerase(int fd, ufs2_daddr_t blk, long size) 937 { 938 off_t ioarg[2]; 939 940 if (fd < 0) 941 return; 942 ioarg[0] = blk * dev_bsize; 943 ioarg[1] = size; 944 ioctl(fd, DIOCGDELETE, ioarg); 945 /* we don't really care if we succeed or not */ 946 return; 947 } 948 949 /* 950 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 951 * definition a multiple of dev_bsize. 952 */ 953 void 954 blzero(int fd, ufs2_daddr_t blk, long size) 955 { 956 static char *zero; 957 off_t offset, len; 958 959 if (fd < 0) 960 return; 961 if (zero == NULL) { 962 zero = Balloc(ZEROBUFSIZE); 963 if (zero == NULL) 964 errx(EEXIT, "cannot allocate buffer pool"); 965 } 966 offset = blk * dev_bsize; 967 if (lseek(fd, offset, 0) < 0) 968 rwerror("SEEK BLK", blk); 969 while (size > 0) { 970 len = MIN(ZEROBUFSIZE, size); 971 if (write(fd, zero, len) != len) 972 rwerror("WRITE BLK", blk); 973 blk += len / dev_bsize; 974 size -= len; 975 } 976 } 977 978 /* 979 * Verify cylinder group's magic number and other parameters. If the 980 * test fails, offer an option to rebuild the whole cylinder group. 981 * 982 * Return 1 if the cylinder group is good or return 0 if it is bad. 983 */ 984 #undef CHK 985 #define CHK(lhs, op, rhs, fmt) \ 986 if (lhs op rhs) { \ 987 pwarn("UFS%d cylinder group %d failed: " \ 988 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 989 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 990 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 991 error = 1; \ 992 } 993 int 994 check_cgmagic(int cg, struct bufarea *cgbp) 995 { 996 struct cg *cgp = cgbp->b_un.b_cg; 997 uint32_t cghash, calchash; 998 static int prevfailcg = -1; 999 long start; 1000 int error; 1001 1002 /* 1003 * Extended cylinder group checks. 1004 */ 1005 calchash = cgp->cg_ckhash; 1006 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1007 (ckhashadd & CK_CYLGRP) == 0) { 1008 cghash = cgp->cg_ckhash; 1009 cgp->cg_ckhash = 0; 1010 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1011 cgp->cg_ckhash = cghash; 1012 } 1013 error = 0; 1014 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1015 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1016 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1017 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1018 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1019 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1020 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1021 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1022 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1023 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1024 } 1025 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1026 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1027 } else { 1028 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1029 "%jd"); 1030 } 1031 start = sizeof(*cgp); 1032 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1033 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1034 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1035 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1036 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1037 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1038 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1039 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1040 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1041 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1042 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1043 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1044 } 1045 CHK(cgp->cg_freeoff, !=, 1046 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1047 if (sblock.fs_contigsumsize == 0) { 1048 CHK(cgp->cg_nextfreeoff, !=, 1049 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1050 } else { 1051 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1052 "%jd"); 1053 CHK(cgp->cg_clustersumoff, !=, 1054 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1055 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1056 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1057 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1058 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1059 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1060 "%jd"); 1061 } 1062 if (error == 0) 1063 return (1); 1064 if (prevfailcg == cg) 1065 return (0); 1066 prevfailcg = cg; 1067 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1068 printf("\n"); 1069 return (0); 1070 } 1071 1072 void 1073 rebuild_cg(int cg, struct bufarea *cgbp) 1074 { 1075 struct cg *cgp = cgbp->b_un.b_cg; 1076 long start; 1077 1078 /* 1079 * Zero out the cylinder group and then initialize critical fields. 1080 * Bit maps and summaries will be recalculated by later passes. 1081 */ 1082 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1083 cgp->cg_magic = CG_MAGIC; 1084 cgp->cg_cgx = cg; 1085 cgp->cg_niblk = sblock.fs_ipg; 1086 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1087 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1088 cgp->cg_ndblk = sblock.fs_fpg; 1089 else 1090 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1091 start = sizeof(*cgp); 1092 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1093 cgp->cg_iusedoff = start; 1094 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1095 cgp->cg_niblk = 0; 1096 cgp->cg_initediblk = 0; 1097 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1098 cgp->cg_old_niblk = sblock.fs_ipg; 1099 cgp->cg_old_btotoff = start; 1100 cgp->cg_old_boff = cgp->cg_old_btotoff + 1101 sblock.fs_old_cpg * sizeof(int32_t); 1102 cgp->cg_iusedoff = cgp->cg_old_boff + 1103 sblock.fs_old_cpg * sizeof(u_int16_t); 1104 } 1105 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1106 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1107 if (sblock.fs_contigsumsize > 0) { 1108 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1109 cgp->cg_clustersumoff = 1110 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1111 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1112 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1113 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1114 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1115 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1116 } 1117 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1118 cgdirty(cgbp); 1119 } 1120 1121 /* 1122 * allocate a data block with the specified number of fragments 1123 */ 1124 ufs2_daddr_t 1125 allocblk(long startcg, long frags, 1126 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1127 { 1128 ufs2_daddr_t blkno, newblk; 1129 1130 if (sujrecovery && checkblkavail == std_checkblkavail) { 1131 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1132 return (0); 1133 } 1134 if (frags <= 0 || frags > sblock.fs_frag) 1135 return (0); 1136 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1137 blkno < maxfsblock - sblock.fs_frag; 1138 blkno += sblock.fs_frag) { 1139 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1140 continue; 1141 if (newblk > 0) 1142 return (newblk); 1143 if (newblk < 0) 1144 blkno = -newblk; 1145 } 1146 for (blkno = MAX(cgdata(&sblock, 0), 0); 1147 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1148 blkno += sblock.fs_frag) { 1149 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1150 continue; 1151 if (newblk > 0) 1152 return (newblk); 1153 if (newblk < 0) 1154 blkno = -newblk; 1155 } 1156 return (0); 1157 } 1158 1159 ufs2_daddr_t 1160 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1161 { 1162 struct bufarea *cgbp; 1163 struct cg *cgp; 1164 ufs2_daddr_t j, k, baseblk; 1165 long cg; 1166 1167 if ((u_int64_t)blkno > sblock.fs_size) 1168 return (0); 1169 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1170 if (testbmap(blkno + j)) 1171 continue; 1172 for (k = 1; k < frags; k++) 1173 if (testbmap(blkno + j + k)) 1174 break; 1175 if (k < frags) { 1176 j += k; 1177 continue; 1178 } 1179 cg = dtog(&sblock, blkno + j); 1180 cgbp = cglookup(cg); 1181 cgp = cgbp->b_un.b_cg; 1182 if (!check_cgmagic(cg, cgbp)) 1183 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1184 baseblk = dtogd(&sblock, blkno + j); 1185 for (k = 0; k < frags; k++) { 1186 setbmap(blkno + j + k); 1187 clrbit(cg_blksfree(cgp), baseblk + k); 1188 } 1189 n_blks += frags; 1190 if (frags == sblock.fs_frag) 1191 cgp->cg_cs.cs_nbfree--; 1192 else 1193 cgp->cg_cs.cs_nffree -= frags; 1194 cgdirty(cgbp); 1195 return (blkno + j); 1196 } 1197 return (0); 1198 } 1199 1200 /* 1201 * Check whether a file size is within the limits for the filesystem. 1202 * Return 1 when valid and 0 when too big. 1203 * 1204 * This should match the file size limit in ffs_mountfs(). 1205 */ 1206 int 1207 chkfilesize(mode_t mode, u_int64_t filesize) 1208 { 1209 u_int64_t kernmaxfilesize; 1210 1211 if (sblock.fs_magic == FS_UFS1_MAGIC) 1212 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1213 else 1214 kernmaxfilesize = sblock.fs_maxfilesize; 1215 if (filesize > kernmaxfilesize || 1216 filesize > sblock.fs_maxfilesize || 1217 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1218 if (debug) 1219 printf("bad file size %ju:", (uintmax_t)filesize); 1220 return (0); 1221 } 1222 return (1); 1223 } 1224 1225 /* 1226 * Slow down IO so as to leave some disk bandwidth for other processes 1227 */ 1228 void 1229 slowio_start() 1230 { 1231 1232 /* Delay one in every 8 operations */ 1233 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1234 if (slowio_pollcnt == 0) { 1235 gettimeofday(&slowio_starttime, NULL); 1236 } 1237 } 1238 1239 void 1240 slowio_end() 1241 { 1242 struct timeval tv; 1243 int delay_usec; 1244 1245 if (slowio_pollcnt != 0) 1246 return; 1247 1248 /* Update the slowdown interval. */ 1249 gettimeofday(&tv, NULL); 1250 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1251 (tv.tv_usec - slowio_starttime.tv_usec); 1252 if (delay_usec < 64) 1253 delay_usec = 64; 1254 if (delay_usec > 2500000) 1255 delay_usec = 2500000; 1256 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1257 /* delay by 8 times the average IO delay */ 1258 if (slowio_delay_usec > 64) 1259 usleep(slowio_delay_usec * 8); 1260 } 1261 1262 /* 1263 * Find a pathname 1264 */ 1265 void 1266 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1267 { 1268 int len; 1269 char *cp; 1270 struct inode ip; 1271 struct inodesc idesc; 1272 static int busy = 0; 1273 1274 if (curdir == ino && ino == UFS_ROOTINO) { 1275 (void)strcpy(namebuf, "/"); 1276 return; 1277 } 1278 if (busy || !INO_IS_DVALID(curdir)) { 1279 (void)strcpy(namebuf, "?"); 1280 return; 1281 } 1282 busy = 1; 1283 memset(&idesc, 0, sizeof(struct inodesc)); 1284 idesc.id_type = DATA; 1285 idesc.id_fix = IGNORE; 1286 cp = &namebuf[MAXPATHLEN - 1]; 1287 *cp = '\0'; 1288 if (curdir != ino) { 1289 idesc.id_parent = curdir; 1290 goto namelookup; 1291 } 1292 while (ino != UFS_ROOTINO) { 1293 idesc.id_number = ino; 1294 idesc.id_func = findino; 1295 idesc.id_name = strdup(".."); 1296 ginode(ino, &ip); 1297 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1298 irelse(&ip); 1299 free(idesc.id_name); 1300 break; 1301 } 1302 irelse(&ip); 1303 free(idesc.id_name); 1304 namelookup: 1305 idesc.id_number = idesc.id_parent; 1306 idesc.id_parent = ino; 1307 idesc.id_func = findname; 1308 idesc.id_name = namebuf; 1309 ginode(idesc.id_number, &ip); 1310 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1311 irelse(&ip); 1312 break; 1313 } 1314 irelse(&ip); 1315 len = strlen(namebuf); 1316 cp -= len; 1317 memmove(cp, namebuf, (size_t)len); 1318 *--cp = '/'; 1319 if (cp < &namebuf[UFS_MAXNAMLEN]) 1320 break; 1321 ino = idesc.id_number; 1322 } 1323 busy = 0; 1324 if (ino != UFS_ROOTINO) 1325 *--cp = '?'; 1326 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1327 } 1328 1329 void 1330 catch(int sig __unused) 1331 { 1332 1333 ckfini(0); 1334 exit(12); 1335 } 1336 1337 /* 1338 * When preening, allow a single quit to signal 1339 * a special exit after file system checks complete 1340 * so that reboot sequence may be interrupted. 1341 */ 1342 void 1343 catchquit(int sig __unused) 1344 { 1345 printf("returning to single-user after file system check\n"); 1346 returntosingle = 1; 1347 (void)signal(SIGQUIT, SIG_DFL); 1348 } 1349 1350 /* 1351 * determine whether an inode should be fixed. 1352 */ 1353 int 1354 dofix(struct inodesc *idesc, const char *msg) 1355 { 1356 1357 switch (idesc->id_fix) { 1358 1359 case DONTKNOW: 1360 if (idesc->id_type == DATA) 1361 direrror(idesc->id_number, msg); 1362 else 1363 pwarn("%s", msg); 1364 if (preen) { 1365 printf(" (SALVAGED)\n"); 1366 idesc->id_fix = FIX; 1367 return (ALTERED); 1368 } 1369 if (reply("SALVAGE") == 0) { 1370 idesc->id_fix = NOFIX; 1371 return (0); 1372 } 1373 idesc->id_fix = FIX; 1374 return (ALTERED); 1375 1376 case FIX: 1377 return (ALTERED); 1378 1379 case NOFIX: 1380 case IGNORE: 1381 return (0); 1382 1383 default: 1384 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1385 } 1386 /* NOTREACHED */ 1387 return (0); 1388 } 1389 1390 #include <stdarg.h> 1391 1392 /* 1393 * Print details about a buffer. 1394 */ 1395 void 1396 prtbuf(struct bufarea *bp, const char *fmt, ...) 1397 { 1398 va_list ap; 1399 va_start(ap, fmt); 1400 if (preen) 1401 (void)fprintf(stdout, "%s: ", cdevname); 1402 (void)vfprintf(stdout, fmt, ap); 1403 va_end(ap); 1404 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1405 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1406 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1407 (intmax_t) bp->b_index); 1408 } 1409 1410 /* 1411 * An unexpected inconsistency occurred. 1412 * Die if preening or file system is running with soft dependency protocol, 1413 * otherwise just print message and continue. 1414 */ 1415 void 1416 pfatal(const char *fmt, ...) 1417 { 1418 va_list ap; 1419 va_start(ap, fmt); 1420 if (!preen) { 1421 (void)vfprintf(stdout, fmt, ap); 1422 va_end(ap); 1423 if (usedsoftdep) 1424 (void)fprintf(stdout, 1425 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1426 /* 1427 * Force foreground fsck to clean up inconsistency. 1428 */ 1429 if (bkgrdflag) { 1430 cmd.value = FS_NEEDSFSCK; 1431 cmd.size = 1; 1432 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1433 &cmd, sizeof cmd) == -1) 1434 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1435 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1436 ckfini(0); 1437 exit(EEXIT); 1438 } 1439 return; 1440 } 1441 if (cdevname == NULL) 1442 cdevname = strdup("fsck"); 1443 (void)fprintf(stdout, "%s: ", cdevname); 1444 (void)vfprintf(stdout, fmt, ap); 1445 (void)fprintf(stdout, 1446 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1447 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1448 /* 1449 * Force foreground fsck to clean up inconsistency. 1450 */ 1451 if (bkgrdflag) { 1452 cmd.value = FS_NEEDSFSCK; 1453 cmd.size = 1; 1454 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1455 &cmd, sizeof cmd) == -1) 1456 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1457 } 1458 ckfini(0); 1459 exit(EEXIT); 1460 } 1461 1462 /* 1463 * Pwarn just prints a message when not preening or running soft dependency 1464 * protocol, or a warning (preceded by filename) when preening. 1465 */ 1466 void 1467 pwarn(const char *fmt, ...) 1468 { 1469 va_list ap; 1470 va_start(ap, fmt); 1471 if (preen) 1472 (void)fprintf(stdout, "%s: ", cdevname); 1473 (void)vfprintf(stdout, fmt, ap); 1474 va_end(ap); 1475 } 1476 1477 /* 1478 * Stub for routines from kernel. 1479 */ 1480 void 1481 panic(const char *fmt, ...) 1482 { 1483 va_list ap; 1484 va_start(ap, fmt); 1485 pfatal("INTERNAL INCONSISTENCY:"); 1486 (void)vfprintf(stdout, fmt, ap); 1487 va_end(ap); 1488 exit(EEXIT); 1489 } 1490