1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #endif 34 #include <sys/cdefs.h> 35 #include <sys/param.h> 36 #include <sys/time.h> 37 #include <sys/types.h> 38 #include <sys/sysctl.h> 39 #include <sys/disk.h> 40 #include <sys/disklabel.h> 41 #include <sys/ioctl.h> 42 #include <sys/stat.h> 43 44 #include <ufs/ufs/dinode.h> 45 #include <ufs/ufs/dir.h> 46 #include <ufs/ffs/fs.h> 47 48 #include <err.h> 49 #include <errno.h> 50 #include <string.h> 51 #include <ctype.h> 52 #include <fstab.h> 53 #include <stdint.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <time.h> 57 #include <unistd.h> 58 59 #include "fsck.h" 60 61 int sujrecovery = 0; 62 63 static struct bufarea *allocbuf(const char *); 64 static void cg_write(struct bufarea *); 65 static void slowio_start(void); 66 static void slowio_end(void); 67 static void printIOstats(void); 68 69 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 70 static struct timespec startpass, finishpass; 71 struct timeval slowio_starttime; 72 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 73 int slowio_pollcnt; 74 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 75 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 76 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 77 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 78 static struct bufhash freebufs; /* unused buffers */ 79 static int numbufs; /* size of buffer cache */ 80 static int cachelookups; /* number of cache lookups */ 81 static int cachereads; /* number of cache reads */ 82 static int flushtries; /* number of tries to reclaim memory */ 83 84 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 85 86 void 87 fsutilinit(void) 88 { 89 diskreads = totaldiskreads = totalreads = 0; 90 bzero(&startpass, sizeof(struct timespec)); 91 bzero(&finishpass, sizeof(struct timespec)); 92 bzero(&slowio_starttime, sizeof(struct timeval)); 93 slowio_delay_usec = 10000; 94 slowio_pollcnt = 0; 95 flushtries = 0; 96 } 97 98 int 99 ftypeok(union dinode *dp) 100 { 101 switch (DIP(dp, di_mode) & IFMT) { 102 103 case IFDIR: 104 case IFREG: 105 case IFBLK: 106 case IFCHR: 107 case IFLNK: 108 case IFSOCK: 109 case IFIFO: 110 return (1); 111 112 default: 113 if (debug) 114 printf("bad file type 0%o\n", DIP(dp, di_mode)); 115 return (0); 116 } 117 } 118 119 int 120 reply(const char *question) 121 { 122 int persevere; 123 char c; 124 125 if (preen) 126 pfatal("INTERNAL ERROR: GOT TO reply()"); 127 persevere = strcmp(question, "CONTINUE") == 0 || 128 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 129 printf("\n"); 130 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 131 printf("%s? no\n\n", question); 132 resolved = 0; 133 return (0); 134 } 135 if (yflag || (persevere && nflag)) { 136 printf("%s? yes\n\n", question); 137 return (1); 138 } 139 do { 140 printf("%s? [yn] ", question); 141 (void) fflush(stdout); 142 c = getc(stdin); 143 while (c != '\n' && getc(stdin) != '\n') { 144 if (feof(stdin)) { 145 resolved = 0; 146 return (0); 147 } 148 } 149 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 150 printf("\n"); 151 if (c == 'y' || c == 'Y') 152 return (1); 153 resolved = 0; 154 return (0); 155 } 156 157 /* 158 * Look up state information for an inode. 159 */ 160 struct inostat * 161 inoinfo(ino_t inum) 162 { 163 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 164 struct inostatlist *ilp; 165 int iloff; 166 167 if (inum >= maxino) 168 errx(EEXIT, "inoinfo: inumber %ju out of range", 169 (uintmax_t)inum); 170 ilp = &inostathead[inum / sblock.fs_ipg]; 171 iloff = inum % sblock.fs_ipg; 172 if (iloff >= ilp->il_numalloced) 173 return (&unallocated); 174 return (&ilp->il_stat[iloff]); 175 } 176 177 /* 178 * Malloc buffers and set up cache. 179 */ 180 void 181 bufinit(void) 182 { 183 int i; 184 185 initbarea(&failedbuf, BT_UNKNOWN); 186 failedbuf.b_errs = -1; 187 failedbuf.b_un.b_buf = NULL; 188 if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL) 189 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 190 initbarea(&cgblk, BT_CYLGRP); 191 numbufs = cachelookups = cachereads = 0; 192 TAILQ_INIT(&bufqueuehd); 193 LIST_INIT(&freebufs); 194 for (i = 0; i < HASHSIZE; i++) 195 LIST_INIT(&bufhashhd[i]); 196 for (i = 0; i < BT_NUMBUFTYPES; i++) { 197 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 198 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 199 readcnt[i] = totalreadcnt[i] = 0; 200 } 201 } 202 203 static struct bufarea * 204 allocbuf(const char *failreason) 205 { 206 struct bufarea *bp; 207 char *bufp; 208 209 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 210 bufp = Balloc((unsigned int)sblock.fs_bsize); 211 if (bp == NULL || bufp == NULL) { 212 errx(EEXIT, "%s", failreason); 213 /* NOTREACHED */ 214 } 215 numbufs++; 216 bp->b_un.b_buf = bufp; 217 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 218 initbarea(bp, BT_UNKNOWN); 219 return (bp); 220 } 221 222 /* 223 * Manage cylinder group buffers. 224 * 225 * Use getblk() here rather than cgget() because the cylinder group 226 * may be corrupted but we want it anyway so we can fix it. 227 */ 228 static struct bufarea *cgbufs; /* header for cylinder group cache */ 229 static int flushtries; /* number of tries to reclaim memory */ 230 231 struct bufarea * 232 cglookup(int cg) 233 { 234 struct bufarea *cgbp; 235 struct cg *cgp; 236 237 if ((unsigned) cg >= sblock.fs_ncg) 238 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 239 if (cgbufs == NULL) { 240 cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea)); 241 if (cgbufs == NULL) 242 errx(EEXIT, "Cannot allocate cylinder group buffers"); 243 } 244 cgbp = &cgbufs[cg]; 245 if (cgbp->b_un.b_cg != NULL) 246 return (cgbp); 247 cgp = NULL; 248 if (flushtries == 0) 249 cgp = Balloc((unsigned int)sblock.fs_cgsize); 250 if (cgp == NULL) { 251 if (sujrecovery) 252 errx(EEXIT,"Ran out of memory during journal recovery"); 253 flush(fswritefd, &cgblk); 254 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 255 return (&cgblk); 256 } 257 cgbp->b_un.b_cg = cgp; 258 initbarea(cgbp, BT_CYLGRP); 259 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 260 return (cgbp); 261 } 262 263 /* 264 * Mark a cylinder group buffer as dirty. 265 * Update its check-hash if they are enabled. 266 */ 267 void 268 cgdirty(struct bufarea *cgbp) 269 { 270 struct cg *cg; 271 272 cg = cgbp->b_un.b_cg; 273 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 274 cg->cg_ckhash = 0; 275 cg->cg_ckhash = 276 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 277 } 278 dirty(cgbp); 279 } 280 281 /* 282 * Attempt to flush a cylinder group cache entry. 283 * Return whether the flush was successful. 284 */ 285 int 286 flushentry(void) 287 { 288 struct bufarea *cgbp; 289 290 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 291 return (0); 292 cgbp = &cgbufs[flushtries++]; 293 if (cgbp->b_un.b_cg == NULL) 294 return (0); 295 flush(fswritefd, cgbp); 296 free(cgbp->b_un.b_buf); 297 cgbp->b_un.b_buf = NULL; 298 return (1); 299 } 300 301 /* 302 * Manage a cache of filesystem disk blocks. 303 */ 304 struct bufarea * 305 getdatablk(ufs2_daddr_t blkno, long size, int type) 306 { 307 struct bufarea *bp; 308 struct bufhash *bhdp; 309 310 cachelookups++; 311 /* 312 * If out of range, return empty buffer with b_err == -1 313 * 314 * Skip check for inodes because chkrange() considers 315 * metadata areas invalid to write data. 316 */ 317 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 318 failedbuf.b_refcnt++; 319 return (&failedbuf); 320 } 321 bhdp = &bufhashhd[HASH(blkno)]; 322 LIST_FOREACH(bp, bhdp, b_hash) 323 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 324 if (debug && bp->b_size != size) { 325 prtbuf(bp, "getdatablk: size mismatch"); 326 pfatal("getdatablk: b_size %d != size %ld\n", 327 bp->b_size, size); 328 } 329 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 330 goto foundit; 331 } 332 /* 333 * Move long-term busy buffer back to the front of the LRU so we 334 * do not endless inspect them for recycling. 335 */ 336 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 337 if (bp != NULL && bp->b_refcnt != 0) { 338 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 339 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 340 } 341 /* 342 * Allocate up to the minimum number of buffers before 343 * considering recycling any of them. 344 */ 345 if (size > sblock.fs_bsize) 346 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 347 sblock.fs_bsize); 348 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 349 LIST_REMOVE(bp, b_hash); 350 } else if (numbufs < MINBUFS) { 351 bp = allocbuf("cannot create minimal buffer pool"); 352 } else if (sujrecovery) { 353 /* 354 * SUJ recovery does not want anything written until it 355 * has successfully completed (so it can fail back to 356 * full fsck). Thus, we can only recycle clean buffers. 357 */ 358 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 359 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 360 break; 361 if (bp == NULL) 362 bp = allocbuf("Ran out of memory during " 363 "journal recovery"); 364 else 365 LIST_REMOVE(bp, b_hash); 366 } else { 367 /* 368 * Recycle oldest non-busy buffer. 369 */ 370 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 371 if (bp->b_refcnt == 0) 372 break; 373 if (bp == NULL) 374 bp = allocbuf("Ran out of memory for buffers"); 375 else 376 LIST_REMOVE(bp, b_hash); 377 } 378 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 379 flush(fswritefd, bp); 380 bp->b_type = type; 381 LIST_INSERT_HEAD(bhdp, bp, b_hash); 382 getblk(bp, blkno, size); 383 cachereads++; 384 /* fall through */ 385 foundit: 386 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 387 if (debug && bp->b_type != type) { 388 printf("getdatablk: buffer type changed to %s", 389 BT_BUFTYPE(type)); 390 prtbuf(bp, ""); 391 } 392 if (bp->b_errs == 0) 393 bp->b_refcnt++; 394 return (bp); 395 } 396 397 void 398 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 399 { 400 ufs2_daddr_t dblk; 401 struct timespec start, finish; 402 403 dblk = fsbtodb(&sblock, blk); 404 if (bp->b_bno == dblk) { 405 totalreads++; 406 } else { 407 if (debug) { 408 readcnt[bp->b_type]++; 409 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 410 } 411 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 412 if (debug) { 413 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 414 timespecsub(&finish, &start, &finish); 415 timespecadd(&readtime[bp->b_type], &finish, 416 &readtime[bp->b_type]); 417 } 418 bp->b_bno = dblk; 419 bp->b_size = size; 420 } 421 } 422 423 void 424 brelse(struct bufarea *bp) 425 { 426 427 if (bp->b_refcnt <= 0) 428 prtbuf(bp, "brelse: buffer with negative reference count"); 429 bp->b_refcnt--; 430 } 431 432 void 433 binval(struct bufarea *bp) 434 { 435 436 bp->b_flags &= ~B_DIRTY; 437 LIST_REMOVE(bp, b_hash); 438 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 439 } 440 441 void 442 flush(int fd, struct bufarea *bp) 443 { 444 struct inode ip; 445 446 if ((bp->b_flags & B_DIRTY) == 0) 447 return; 448 bp->b_flags &= ~B_DIRTY; 449 if (fswritefd < 0) { 450 pfatal("WRITING IN READ_ONLY MODE.\n"); 451 return; 452 } 453 if (bp->b_errs != 0) 454 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 455 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 456 (long long)bp->b_bno); 457 bp->b_errs = 0; 458 /* 459 * Write using the appropriate function. 460 */ 461 switch (bp->b_type) { 462 case BT_SUPERBLK: 463 if (bp != &sblk) 464 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 465 bp, &sblk); 466 /* 467 * Superblocks are always pre-copied so we do not need 468 * to check them for copy-on-write. 469 */ 470 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 471 fsmodified = 1; 472 break; 473 case BT_CYLGRP: 474 /* 475 * Cylinder groups are always pre-copied so we do not 476 * need to check them for copy-on-write. 477 */ 478 if (sujrecovery) 479 cg_write(bp); 480 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 481 fsmodified = 1; 482 break; 483 case BT_INODES: 484 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 485 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 486 int i; 487 488 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 489 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 490 continue; 491 pwarn("flush: INODE CHECK-HASH FAILED"); 492 ip.i_bp = bp; 493 ip.i_dp = (union dinode *)dp; 494 ip.i_number = bp->b_index + (i / sizeof(*dp)); 495 prtinode(&ip); 496 if (preen || reply("FIX") != 0) { 497 if (preen) 498 printf(" (FIXED)\n"); 499 ffs_update_dinode_ckhash(&sblock, dp); 500 inodirty(&ip); 501 } 502 } 503 } 504 /* FALLTHROUGH */ 505 default: 506 copyonwrite(&sblock, bp, std_checkblkavail); 507 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 508 break; 509 } 510 } 511 512 /* 513 * If there are any snapshots, ensure that all the blocks that they 514 * care about have been copied, then release the snapshot inodes. 515 * These operations need to be done before we rebuild the cylinder 516 * groups so that any block allocations are properly recorded. 517 * Since all the cylinder group maps have already been copied in 518 * the snapshots, no further snapshot copies will need to be done. 519 */ 520 void 521 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 522 { 523 struct bufarea *bp; 524 int cnt; 525 526 if (snapcnt > 0) { 527 if (debug) 528 printf("Check for snapshot copies\n"); 529 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 530 if ((bp->b_flags & B_DIRTY) != 0) 531 copyonwrite(&sblock, bp, checkblkavail); 532 for (cnt = 0; cnt < snapcnt; cnt++) 533 irelse(&snaplist[cnt]); 534 snapcnt = 0; 535 } 536 } 537 538 /* 539 * Journaled soft updates does not maintain cylinder group summary 540 * information during cleanup, so this routine recalculates the summary 541 * information and updates the superblock summary in preparation for 542 * writing out the cylinder group. 543 */ 544 static void 545 cg_write(struct bufarea *bp) 546 { 547 ufs1_daddr_t fragno, cgbno, maxbno; 548 u_int8_t *blksfree; 549 struct csum *csp; 550 struct cg *cgp; 551 int blk; 552 int i; 553 554 /* 555 * Fix the frag and cluster summary. 556 */ 557 cgp = bp->b_un.b_cg; 558 cgp->cg_cs.cs_nbfree = 0; 559 cgp->cg_cs.cs_nffree = 0; 560 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 561 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 562 if (sblock.fs_contigsumsize > 0) { 563 for (i = 1; i <= sblock.fs_contigsumsize; i++) 564 cg_clustersum(cgp)[i] = 0; 565 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 566 } 567 blksfree = cg_blksfree(cgp); 568 for (cgbno = 0; cgbno < maxbno; cgbno++) { 569 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 570 continue; 571 if (ffs_isblock(&sblock, blksfree, cgbno)) { 572 ffs_clusteracct(&sblock, cgp, cgbno, 1); 573 cgp->cg_cs.cs_nbfree++; 574 continue; 575 } 576 fragno = blkstofrags(&sblock, cgbno); 577 blk = blkmap(&sblock, blksfree, fragno); 578 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 579 for (i = 0; i < sblock.fs_frag; i++) 580 if (isset(blksfree, fragno + i)) 581 cgp->cg_cs.cs_nffree++; 582 } 583 /* 584 * Update the superblock cg summary from our now correct values 585 * before writing the block. 586 */ 587 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 588 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 589 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 590 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 591 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 592 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 593 } 594 595 void 596 rwerror(const char *mesg, ufs2_daddr_t blk) 597 { 598 599 if (bkgrdcheck) 600 exit(EEXIT); 601 if (preen == 0) 602 printf("\n"); 603 pfatal("CANNOT %s: %ld", mesg, (long)blk); 604 if (reply("CONTINUE") == 0) 605 exit(EEXIT); 606 } 607 608 void 609 ckfini(int markclean) 610 { 611 struct bufarea *bp, *nbp; 612 int ofsmodified, cnt, cg; 613 614 if (bkgrdflag) { 615 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 616 cmd.value = FS_UNCLEAN; 617 cmd.size = markclean ? -1 : 1; 618 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 619 &cmd, sizeof cmd) == -1) 620 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 621 if (!preen) { 622 printf("\n***** FILE SYSTEM MARKED %s *****\n", 623 markclean ? "CLEAN" : "DIRTY"); 624 if (!markclean) 625 rerun = 1; 626 } 627 } else if (!preen && !markclean) { 628 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 629 rerun = 1; 630 } 631 bkgrdflag = 0; 632 } 633 if (debug && cachelookups > 0) 634 printf("cache with %d buffers missed %d of %d (%d%%)\n", 635 numbufs, cachereads, cachelookups, 636 (int)(cachereads * 100 / cachelookups)); 637 if (fswritefd < 0) { 638 (void)close(fsreadfd); 639 return; 640 } 641 642 /* 643 * To remain idempotent with partial truncations the buffers 644 * must be flushed in this order: 645 * 1) cylinder groups (bitmaps) 646 * 2) indirect, directory, external attribute, and data blocks 647 * 3) inode blocks 648 * 4) superblock 649 * This ordering preserves access to the modified pointers 650 * until they are freed. 651 */ 652 /* Step 1: cylinder groups */ 653 if (debug) 654 printf("Flush Cylinder groups\n"); 655 if (cgbufs != NULL) { 656 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 657 if (cgbufs[cnt].b_un.b_cg == NULL) 658 continue; 659 flush(fswritefd, &cgbufs[cnt]); 660 free(cgbufs[cnt].b_un.b_cg); 661 } 662 free(cgbufs); 663 cgbufs = NULL; 664 } 665 flush(fswritefd, &cgblk); 666 free(cgblk.b_un.b_buf); 667 cgblk.b_un.b_buf = NULL; 668 cnt = 0; 669 /* Step 2: indirect, directory, external attribute, and data blocks */ 670 if (debug) 671 printf("Flush indirect, directory, external attribute, " 672 "and data blocks\n"); 673 if (pdirbp != NULL) { 674 brelse(pdirbp); 675 pdirbp = NULL; 676 } 677 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 678 switch (bp->b_type) { 679 /* These should not be in the buffer cache list */ 680 case BT_UNKNOWN: 681 case BT_SUPERBLK: 682 case BT_CYLGRP: 683 default: 684 prtbuf(bp,"ckfini: improper buffer type on cache list"); 685 continue; 686 /* These are the ones to flush in this step */ 687 case BT_LEVEL1: 688 case BT_LEVEL2: 689 case BT_LEVEL3: 690 case BT_EXTATTR: 691 case BT_DIRDATA: 692 case BT_DATA: 693 break; 694 /* These are the ones to flush in the next step */ 695 case BT_INODES: 696 continue; 697 } 698 if (debug && bp->b_refcnt != 0) 699 prtbuf(bp, "ckfini: clearing in-use buffer"); 700 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 701 LIST_REMOVE(bp, b_hash); 702 cnt++; 703 flush(fswritefd, bp); 704 free(bp->b_un.b_buf); 705 free((char *)bp); 706 } 707 /* Step 3: inode blocks */ 708 if (debug) 709 printf("Flush inode blocks\n"); 710 if (icachebp != NULL) { 711 brelse(icachebp); 712 icachebp = NULL; 713 } 714 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 715 if (debug && bp->b_refcnt != 0) 716 prtbuf(bp, "ckfini: clearing in-use buffer"); 717 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 718 LIST_REMOVE(bp, b_hash); 719 cnt++; 720 flush(fswritefd, bp); 721 free(bp->b_un.b_buf); 722 free((char *)bp); 723 } 724 if (numbufs != cnt) 725 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 726 /* Step 4: superblock */ 727 if (debug) 728 printf("Flush the superblock\n"); 729 flush(fswritefd, &sblk); 730 if (havesb && cursnapshot == 0 && 731 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 732 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 733 /* Change write destination to standard superblock */ 734 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 735 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 736 sbdirty(); 737 flush(fswritefd, &sblk); 738 } else { 739 markclean = 0; 740 } 741 } 742 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 743 if ((sblock.fs_clean = markclean) != 0) { 744 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 745 sblock.fs_pendingblocks = 0; 746 sblock.fs_pendinginodes = 0; 747 } 748 sbdirty(); 749 ofsmodified = fsmodified; 750 flush(fswritefd, &sblk); 751 fsmodified = ofsmodified; 752 if (!preen) { 753 printf("\n***** FILE SYSTEM MARKED %s *****\n", 754 markclean ? "CLEAN" : "DIRTY"); 755 if (!markclean) 756 rerun = 1; 757 } 758 } else if (!preen) { 759 if (markclean) { 760 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 761 } else { 762 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 763 rerun = 1; 764 } 765 } 766 /* 767 * Free allocated tracking structures. 768 */ 769 if (blockmap != NULL) 770 free(blockmap); 771 blockmap = NULL; 772 if (inostathead != NULL) { 773 for (cg = 0; cg < sblock.fs_ncg; cg++) 774 if (inostathead[cg].il_stat != NULL) 775 free((char *)inostathead[cg].il_stat); 776 free(inostathead); 777 } 778 inostathead = NULL; 779 inocleanup(); 780 finalIOstats(); 781 (void)close(fsreadfd); 782 (void)close(fswritefd); 783 } 784 785 /* 786 * Print out I/O statistics. 787 */ 788 void 789 IOstats(char *what) 790 { 791 int i; 792 793 if (debug == 0) 794 return; 795 if (diskreads == 0) { 796 printf("%s: no I/O\n\n", what); 797 return; 798 } 799 if (startpass.tv_sec == 0) 800 startpass = startprog; 801 printf("%s: I/O statistics\n", what); 802 printIOstats(); 803 totaldiskreads += diskreads; 804 diskreads = 0; 805 for (i = 0; i < BT_NUMBUFTYPES; i++) { 806 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 807 totalreadcnt[i] += readcnt[i]; 808 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 809 readcnt[i] = 0; 810 } 811 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 812 } 813 814 void 815 finalIOstats(void) 816 { 817 int i; 818 819 if (debug == 0) 820 return; 821 printf("Final I/O statistics\n"); 822 totaldiskreads += diskreads; 823 diskreads = totaldiskreads; 824 startpass = startprog; 825 for (i = 0; i < BT_NUMBUFTYPES; i++) { 826 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 827 totalreadcnt[i] += readcnt[i]; 828 readtime[i] = totalreadtime[i]; 829 readcnt[i] = totalreadcnt[i]; 830 } 831 printIOstats(); 832 } 833 834 static void printIOstats(void) 835 { 836 long long msec, totalmsec; 837 int i; 838 839 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 840 timespecsub(&finishpass, &startpass, &finishpass); 841 printf("Running time: %jd.%03ld sec\n", 842 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 843 printf("buffer reads by type:\n"); 844 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 845 totalmsec += readtime[i].tv_sec * 1000 + 846 readtime[i].tv_nsec / 1000000; 847 if (totalmsec == 0) 848 totalmsec = 1; 849 for (i = 0; i < BT_NUMBUFTYPES; i++) { 850 if (readcnt[i] == 0) 851 continue; 852 msec = 853 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 854 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 855 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 856 (readcnt[i] * 1000 / diskreads) % 10, 857 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 858 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 859 } 860 printf("\n"); 861 } 862 863 int 864 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 865 { 866 char *cp; 867 int i, errs; 868 off_t offset; 869 870 offset = blk; 871 offset *= dev_bsize; 872 if (bkgrdflag) 873 slowio_start(); 874 totalreads++; 875 diskreads++; 876 if (pread(fd, buf, (int)size, offset) == size) { 877 if (bkgrdflag) 878 slowio_end(); 879 return (0); 880 } 881 882 /* 883 * This is handled specially here instead of in rwerror because 884 * rwerror is used for all sorts of errors, not just true read/write 885 * errors. It should be refactored and fixed. 886 */ 887 if (surrender) { 888 pfatal("CANNOT READ_BLK: %ld", (long)blk); 889 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 890 } else 891 rwerror("READ BLK", blk); 892 893 errs = 0; 894 memset(buf, 0, (size_t)size); 895 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 896 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 897 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 898 if (secsize != dev_bsize && dev_bsize != 1) 899 printf(" %jd (%jd),", 900 (intmax_t)(blk * dev_bsize + i) / secsize, 901 (intmax_t)blk + i / dev_bsize); 902 else 903 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 904 errs++; 905 } 906 } 907 printf("\n"); 908 if (errs) 909 resolved = 0; 910 return (errs); 911 } 912 913 void 914 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 915 { 916 int i; 917 char *cp; 918 off_t offset; 919 920 if (fd < 0) 921 return; 922 offset = blk; 923 offset *= dev_bsize; 924 if (pwrite(fd, buf, size, offset) == size) { 925 fsmodified = 1; 926 return; 927 } 928 resolved = 0; 929 rwerror("WRITE BLK", blk); 930 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 931 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 932 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 933 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 934 printf("\n"); 935 return; 936 } 937 938 void 939 blerase(int fd, ufs2_daddr_t blk, long size) 940 { 941 off_t ioarg[2]; 942 943 if (fd < 0) 944 return; 945 ioarg[0] = blk * dev_bsize; 946 ioarg[1] = size; 947 ioctl(fd, DIOCGDELETE, ioarg); 948 /* we don't really care if we succeed or not */ 949 return; 950 } 951 952 /* 953 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 954 * definition a multiple of dev_bsize. 955 */ 956 void 957 blzero(int fd, ufs2_daddr_t blk, long size) 958 { 959 static char *zero; 960 off_t offset, len; 961 962 if (fd < 0) 963 return; 964 if (zero == NULL) { 965 zero = Balloc(ZEROBUFSIZE); 966 if (zero == NULL) 967 errx(EEXIT, "cannot allocate buffer pool"); 968 } 969 offset = blk * dev_bsize; 970 if (lseek(fd, offset, 0) < 0) 971 rwerror("SEEK BLK", blk); 972 while (size > 0) { 973 len = MIN(ZEROBUFSIZE, size); 974 if (write(fd, zero, len) != len) 975 rwerror("WRITE BLK", blk); 976 blk += len / dev_bsize; 977 size -= len; 978 } 979 } 980 981 /* 982 * Verify cylinder group's magic number and other parameters. If the 983 * test fails, offer an option to rebuild the whole cylinder group. 984 * 985 * Return 1 if the cylinder group is good or return 0 if it is bad. 986 */ 987 #undef CHK 988 #define CHK(lhs, op, rhs, fmt) \ 989 if (lhs op rhs) { \ 990 pwarn("UFS%d cylinder group %d failed: " \ 991 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 992 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 993 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 994 error = 1; \ 995 } 996 int 997 check_cgmagic(int cg, struct bufarea *cgbp) 998 { 999 struct cg *cgp = cgbp->b_un.b_cg; 1000 uint32_t cghash, calchash; 1001 static int prevfailcg = -1; 1002 long start; 1003 int error; 1004 1005 /* 1006 * Extended cylinder group checks. 1007 */ 1008 calchash = cgp->cg_ckhash; 1009 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1010 (ckhashadd & CK_CYLGRP) == 0) { 1011 cghash = cgp->cg_ckhash; 1012 cgp->cg_ckhash = 0; 1013 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1014 cgp->cg_ckhash = cghash; 1015 } 1016 error = 0; 1017 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1018 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1019 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1020 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1021 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1022 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1023 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1024 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1025 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1026 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1027 } 1028 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1029 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1030 } else { 1031 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1032 "%jd"); 1033 } 1034 start = sizeof(*cgp); 1035 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1036 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1037 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1038 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1039 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1040 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1041 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1042 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1043 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1044 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1045 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1046 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1047 } 1048 CHK(cgp->cg_freeoff, !=, 1049 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1050 if (sblock.fs_contigsumsize == 0) { 1051 CHK(cgp->cg_nextfreeoff, !=, 1052 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1053 } else { 1054 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1055 "%jd"); 1056 CHK(cgp->cg_clustersumoff, !=, 1057 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1058 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1059 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1060 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1061 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1062 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1063 "%jd"); 1064 } 1065 if (error == 0) 1066 return (1); 1067 if (prevfailcg == cg) 1068 return (0); 1069 prevfailcg = cg; 1070 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1071 printf("\n"); 1072 return (0); 1073 } 1074 1075 void 1076 rebuild_cg(int cg, struct bufarea *cgbp) 1077 { 1078 struct cg *cgp = cgbp->b_un.b_cg; 1079 long start; 1080 1081 /* 1082 * Zero out the cylinder group and then initialize critical fields. 1083 * Bit maps and summaries will be recalculated by later passes. 1084 */ 1085 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1086 cgp->cg_magic = CG_MAGIC; 1087 cgp->cg_cgx = cg; 1088 cgp->cg_niblk = sblock.fs_ipg; 1089 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1090 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1091 cgp->cg_ndblk = sblock.fs_fpg; 1092 else 1093 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1094 start = sizeof(*cgp); 1095 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1096 cgp->cg_iusedoff = start; 1097 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1098 cgp->cg_niblk = 0; 1099 cgp->cg_initediblk = 0; 1100 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1101 cgp->cg_old_niblk = sblock.fs_ipg; 1102 cgp->cg_old_btotoff = start; 1103 cgp->cg_old_boff = cgp->cg_old_btotoff + 1104 sblock.fs_old_cpg * sizeof(int32_t); 1105 cgp->cg_iusedoff = cgp->cg_old_boff + 1106 sblock.fs_old_cpg * sizeof(u_int16_t); 1107 } 1108 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1109 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1110 if (sblock.fs_contigsumsize > 0) { 1111 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1112 cgp->cg_clustersumoff = 1113 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1114 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1115 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1116 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1117 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1118 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1119 } 1120 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1121 cgdirty(cgbp); 1122 } 1123 1124 /* 1125 * allocate a data block with the specified number of fragments 1126 */ 1127 ufs2_daddr_t 1128 allocblk(long startcg, long frags, 1129 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1130 { 1131 ufs2_daddr_t blkno, newblk; 1132 1133 if (sujrecovery && checkblkavail == std_checkblkavail) { 1134 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1135 return (0); 1136 } 1137 if (frags <= 0 || frags > sblock.fs_frag) 1138 return (0); 1139 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1140 blkno < maxfsblock - sblock.fs_frag; 1141 blkno += sblock.fs_frag) { 1142 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1143 continue; 1144 if (newblk > 0) 1145 return (newblk); 1146 if (newblk < 0) 1147 blkno = -newblk; 1148 } 1149 for (blkno = MAX(cgdata(&sblock, 0), 0); 1150 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1151 blkno += sblock.fs_frag) { 1152 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1153 continue; 1154 if (newblk > 0) 1155 return (newblk); 1156 if (newblk < 0) 1157 blkno = -newblk; 1158 } 1159 return (0); 1160 } 1161 1162 ufs2_daddr_t 1163 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1164 { 1165 struct bufarea *cgbp; 1166 struct cg *cgp; 1167 ufs2_daddr_t j, k, baseblk; 1168 long cg; 1169 1170 if ((u_int64_t)blkno > sblock.fs_size) 1171 return (0); 1172 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1173 if (testbmap(blkno + j)) 1174 continue; 1175 for (k = 1; k < frags; k++) 1176 if (testbmap(blkno + j + k)) 1177 break; 1178 if (k < frags) { 1179 j += k; 1180 continue; 1181 } 1182 cg = dtog(&sblock, blkno + j); 1183 cgbp = cglookup(cg); 1184 cgp = cgbp->b_un.b_cg; 1185 if (!check_cgmagic(cg, cgbp)) 1186 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1187 baseblk = dtogd(&sblock, blkno + j); 1188 for (k = 0; k < frags; k++) { 1189 setbmap(blkno + j + k); 1190 clrbit(cg_blksfree(cgp), baseblk + k); 1191 } 1192 n_blks += frags; 1193 if (frags == sblock.fs_frag) 1194 cgp->cg_cs.cs_nbfree--; 1195 else 1196 cgp->cg_cs.cs_nffree -= frags; 1197 cgdirty(cgbp); 1198 return (blkno + j); 1199 } 1200 return (0); 1201 } 1202 1203 /* 1204 * Check whether a file size is within the limits for the filesystem. 1205 * Return 1 when valid and 0 when too big. 1206 * 1207 * This should match the file size limit in ffs_mountfs(). 1208 */ 1209 int 1210 chkfilesize(mode_t mode, u_int64_t filesize) 1211 { 1212 u_int64_t kernmaxfilesize; 1213 1214 if (sblock.fs_magic == FS_UFS1_MAGIC) 1215 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1216 else 1217 kernmaxfilesize = sblock.fs_maxfilesize; 1218 if (filesize > kernmaxfilesize || 1219 filesize > sblock.fs_maxfilesize || 1220 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1221 if (debug) 1222 printf("bad file size %ju:", (uintmax_t)filesize); 1223 return (0); 1224 } 1225 return (1); 1226 } 1227 1228 /* 1229 * Slow down IO so as to leave some disk bandwidth for other processes 1230 */ 1231 void 1232 slowio_start() 1233 { 1234 1235 /* Delay one in every 8 operations */ 1236 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1237 if (slowio_pollcnt == 0) { 1238 gettimeofday(&slowio_starttime, NULL); 1239 } 1240 } 1241 1242 void 1243 slowio_end() 1244 { 1245 struct timeval tv; 1246 int delay_usec; 1247 1248 if (slowio_pollcnt != 0) 1249 return; 1250 1251 /* Update the slowdown interval. */ 1252 gettimeofday(&tv, NULL); 1253 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1254 (tv.tv_usec - slowio_starttime.tv_usec); 1255 if (delay_usec < 64) 1256 delay_usec = 64; 1257 if (delay_usec > 2500000) 1258 delay_usec = 2500000; 1259 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1260 /* delay by 8 times the average IO delay */ 1261 if (slowio_delay_usec > 64) 1262 usleep(slowio_delay_usec * 8); 1263 } 1264 1265 /* 1266 * Find a pathname 1267 */ 1268 void 1269 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1270 { 1271 int len; 1272 char *cp; 1273 struct inode ip; 1274 struct inodesc idesc; 1275 static int busy = 0; 1276 1277 if (curdir == ino && ino == UFS_ROOTINO) { 1278 (void)strcpy(namebuf, "/"); 1279 return; 1280 } 1281 if (busy || !INO_IS_DVALID(curdir)) { 1282 (void)strcpy(namebuf, "?"); 1283 return; 1284 } 1285 busy = 1; 1286 memset(&idesc, 0, sizeof(struct inodesc)); 1287 idesc.id_type = DATA; 1288 idesc.id_fix = IGNORE; 1289 cp = &namebuf[MAXPATHLEN - 1]; 1290 *cp = '\0'; 1291 if (curdir != ino) { 1292 idesc.id_parent = curdir; 1293 goto namelookup; 1294 } 1295 while (ino != UFS_ROOTINO) { 1296 idesc.id_number = ino; 1297 idesc.id_func = findino; 1298 idesc.id_name = strdup(".."); 1299 ginode(ino, &ip); 1300 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1301 irelse(&ip); 1302 free(idesc.id_name); 1303 break; 1304 } 1305 irelse(&ip); 1306 free(idesc.id_name); 1307 namelookup: 1308 idesc.id_number = idesc.id_parent; 1309 idesc.id_parent = ino; 1310 idesc.id_func = findname; 1311 idesc.id_name = namebuf; 1312 ginode(idesc.id_number, &ip); 1313 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1314 irelse(&ip); 1315 break; 1316 } 1317 irelse(&ip); 1318 len = strlen(namebuf); 1319 cp -= len; 1320 memmove(cp, namebuf, (size_t)len); 1321 *--cp = '/'; 1322 if (cp < &namebuf[UFS_MAXNAMLEN]) 1323 break; 1324 ino = idesc.id_number; 1325 } 1326 busy = 0; 1327 if (ino != UFS_ROOTINO) 1328 *--cp = '?'; 1329 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1330 } 1331 1332 void 1333 catch(int sig __unused) 1334 { 1335 1336 ckfini(0); 1337 exit(12); 1338 } 1339 1340 /* 1341 * When preening, allow a single quit to signal 1342 * a special exit after file system checks complete 1343 * so that reboot sequence may be interrupted. 1344 */ 1345 void 1346 catchquit(int sig __unused) 1347 { 1348 printf("returning to single-user after file system check\n"); 1349 returntosingle = 1; 1350 (void)signal(SIGQUIT, SIG_DFL); 1351 } 1352 1353 /* 1354 * determine whether an inode should be fixed. 1355 */ 1356 int 1357 dofix(struct inodesc *idesc, const char *msg) 1358 { 1359 1360 switch (idesc->id_fix) { 1361 1362 case DONTKNOW: 1363 if (idesc->id_type == DATA) 1364 direrror(idesc->id_number, msg); 1365 else 1366 pwarn("%s", msg); 1367 if (preen) { 1368 printf(" (SALVAGED)\n"); 1369 idesc->id_fix = FIX; 1370 return (ALTERED); 1371 } 1372 if (reply("SALVAGE") == 0) { 1373 idesc->id_fix = NOFIX; 1374 return (0); 1375 } 1376 idesc->id_fix = FIX; 1377 return (ALTERED); 1378 1379 case FIX: 1380 return (ALTERED); 1381 1382 case NOFIX: 1383 case IGNORE: 1384 return (0); 1385 1386 default: 1387 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1388 } 1389 /* NOTREACHED */ 1390 return (0); 1391 } 1392 1393 #include <stdarg.h> 1394 1395 /* 1396 * Print details about a buffer. 1397 */ 1398 void 1399 prtbuf(struct bufarea *bp, const char *fmt, ...) 1400 { 1401 va_list ap; 1402 va_start(ap, fmt); 1403 if (preen) 1404 (void)fprintf(stdout, "%s: ", cdevname); 1405 (void)vfprintf(stdout, fmt, ap); 1406 va_end(ap); 1407 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1408 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1409 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1410 (intmax_t) bp->b_index); 1411 } 1412 1413 /* 1414 * An unexpected inconsistency occurred. 1415 * Die if preening or file system is running with soft dependency protocol, 1416 * otherwise just print message and continue. 1417 */ 1418 void 1419 pfatal(const char *fmt, ...) 1420 { 1421 va_list ap; 1422 va_start(ap, fmt); 1423 if (!preen) { 1424 (void)vfprintf(stdout, fmt, ap); 1425 va_end(ap); 1426 if (usedsoftdep) 1427 (void)fprintf(stdout, 1428 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1429 /* 1430 * Force foreground fsck to clean up inconsistency. 1431 */ 1432 if (bkgrdflag) { 1433 cmd.value = FS_NEEDSFSCK; 1434 cmd.size = 1; 1435 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1436 &cmd, sizeof cmd) == -1) 1437 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1438 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1439 ckfini(0); 1440 exit(EEXIT); 1441 } 1442 return; 1443 } 1444 if (cdevname == NULL) 1445 cdevname = strdup("fsck"); 1446 (void)fprintf(stdout, "%s: ", cdevname); 1447 (void)vfprintf(stdout, fmt, ap); 1448 (void)fprintf(stdout, 1449 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1450 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1451 /* 1452 * Force foreground fsck to clean up inconsistency. 1453 */ 1454 if (bkgrdflag) { 1455 cmd.value = FS_NEEDSFSCK; 1456 cmd.size = 1; 1457 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1458 &cmd, sizeof cmd) == -1) 1459 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1460 } 1461 ckfini(0); 1462 exit(EEXIT); 1463 } 1464 1465 /* 1466 * Pwarn just prints a message when not preening or running soft dependency 1467 * protocol, or a warning (preceded by filename) when preening. 1468 */ 1469 void 1470 pwarn(const char *fmt, ...) 1471 { 1472 va_list ap; 1473 va_start(ap, fmt); 1474 if (preen) 1475 (void)fprintf(stdout, "%s: ", cdevname); 1476 (void)vfprintf(stdout, fmt, ap); 1477 va_end(ap); 1478 } 1479 1480 /* 1481 * Stub for routines from kernel. 1482 */ 1483 void 1484 panic(const char *fmt, ...) 1485 { 1486 va_list ap; 1487 va_start(ap, fmt); 1488 pfatal("INTERNAL INCONSISTENCY:"); 1489 (void)vfprintf(stdout, fmt, ap); 1490 va_end(ap); 1491 exit(EEXIT); 1492 } 1493