1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; 35 #endif /* not lint */ 36 #endif 37 #include <sys/cdefs.h> 38 #include <sys/param.h> 39 #include <sys/time.h> 40 #include <sys/types.h> 41 #include <sys/sysctl.h> 42 #include <sys/disk.h> 43 #include <sys/disklabel.h> 44 #include <sys/ioctl.h> 45 #include <sys/stat.h> 46 47 #include <ufs/ufs/dinode.h> 48 #include <ufs/ufs/dir.h> 49 #include <ufs/ffs/fs.h> 50 51 #include <err.h> 52 #include <errno.h> 53 #include <string.h> 54 #include <ctype.h> 55 #include <fstab.h> 56 #include <stdint.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <time.h> 60 #include <unistd.h> 61 #include <libufs.h> 62 63 #include "fsck.h" 64 65 int sujrecovery = 0; 66 67 static struct bufarea *allocbuf(const char *); 68 static void cg_write(struct bufarea *); 69 static void slowio_start(void); 70 static void slowio_end(void); 71 static void printIOstats(void); 72 73 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 74 static struct timespec startpass, finishpass; 75 struct timeval slowio_starttime; 76 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 77 int slowio_pollcnt; 78 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 79 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 80 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 81 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 82 static struct bufhash freebufs; /* unused buffers */ 83 static int numbufs; /* size of buffer cache */ 84 static int cachelookups; /* number of cache lookups */ 85 static int cachereads; /* number of cache reads */ 86 static int flushtries; /* number of tries to reclaim memory */ 87 88 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 89 90 void 91 fsutilinit(void) 92 { 93 diskreads = totaldiskreads = totalreads = 0; 94 bzero(&startpass, sizeof(struct timespec)); 95 bzero(&finishpass, sizeof(struct timespec)); 96 bzero(&slowio_starttime, sizeof(struct timeval)); 97 slowio_delay_usec = 10000; 98 slowio_pollcnt = 0; 99 flushtries = 0; 100 } 101 102 int 103 ftypeok(union dinode *dp) 104 { 105 switch (DIP(dp, di_mode) & IFMT) { 106 107 case IFDIR: 108 case IFREG: 109 case IFBLK: 110 case IFCHR: 111 case IFLNK: 112 case IFSOCK: 113 case IFIFO: 114 return (1); 115 116 default: 117 if (debug) 118 printf("bad file type 0%o\n", DIP(dp, di_mode)); 119 return (0); 120 } 121 } 122 123 int 124 reply(const char *question) 125 { 126 int persevere; 127 char c; 128 129 if (preen) 130 pfatal("INTERNAL ERROR: GOT TO reply()"); 131 persevere = strcmp(question, "CONTINUE") == 0 || 132 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 133 printf("\n"); 134 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 135 printf("%s? no\n\n", question); 136 resolved = 0; 137 return (0); 138 } 139 if (yflag || (persevere && nflag)) { 140 printf("%s? yes\n\n", question); 141 return (1); 142 } 143 do { 144 printf("%s? [yn] ", question); 145 (void) fflush(stdout); 146 c = getc(stdin); 147 while (c != '\n' && getc(stdin) != '\n') { 148 if (feof(stdin)) { 149 resolved = 0; 150 return (0); 151 } 152 } 153 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 154 printf("\n"); 155 if (c == 'y' || c == 'Y') 156 return (1); 157 resolved = 0; 158 return (0); 159 } 160 161 /* 162 * Look up state information for an inode. 163 */ 164 struct inostat * 165 inoinfo(ino_t inum) 166 { 167 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 168 struct inostatlist *ilp; 169 int iloff; 170 171 if (inum >= maxino) 172 errx(EEXIT, "inoinfo: inumber %ju out of range", 173 (uintmax_t)inum); 174 ilp = &inostathead[inum / sblock.fs_ipg]; 175 iloff = inum % sblock.fs_ipg; 176 if (iloff >= ilp->il_numalloced) 177 return (&unallocated); 178 return (&ilp->il_stat[iloff]); 179 } 180 181 /* 182 * Malloc buffers and set up cache. 183 */ 184 void 185 bufinit(void) 186 { 187 int i; 188 189 initbarea(&failedbuf, BT_UNKNOWN); 190 failedbuf.b_errs = -1; 191 failedbuf.b_un.b_buf = NULL; 192 if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) 193 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 194 initbarea(&cgblk, BT_CYLGRP); 195 numbufs = cachelookups = cachereads = 0; 196 TAILQ_INIT(&bufqueuehd); 197 LIST_INIT(&freebufs); 198 for (i = 0; i < HASHSIZE; i++) 199 LIST_INIT(&bufhashhd[i]); 200 for (i = 0; i < BT_NUMBUFTYPES; i++) { 201 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 202 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 203 readcnt[i] = totalreadcnt[i] = 0; 204 } 205 } 206 207 static struct bufarea * 208 allocbuf(const char *failreason) 209 { 210 struct bufarea *bp; 211 char *bufp; 212 213 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 214 bufp = Malloc((unsigned int)sblock.fs_bsize); 215 if (bp == NULL || bufp == NULL) { 216 errx(EEXIT, "%s", failreason); 217 /* NOTREACHED */ 218 } 219 numbufs++; 220 bp->b_un.b_buf = bufp; 221 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 222 initbarea(bp, BT_UNKNOWN); 223 return (bp); 224 } 225 226 /* 227 * Manage cylinder group buffers. 228 * 229 * Use getblk() here rather than cgget() because the cylinder group 230 * may be corrupted but we want it anyway so we can fix it. 231 */ 232 static struct bufarea *cgbufs; /* header for cylinder group cache */ 233 static int flushtries; /* number of tries to reclaim memory */ 234 235 struct bufarea * 236 cglookup(int cg) 237 { 238 struct bufarea *cgbp; 239 struct cg *cgp; 240 241 if ((unsigned) cg >= sblock.fs_ncg) 242 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 243 if (cgbufs == NULL) { 244 cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); 245 if (cgbufs == NULL) 246 errx(EEXIT, "Cannot allocate cylinder group buffers"); 247 } 248 cgbp = &cgbufs[cg]; 249 if (cgbp->b_un.b_cg != NULL) 250 return (cgbp); 251 cgp = NULL; 252 if (flushtries == 0) 253 cgp = Malloc((unsigned int)sblock.fs_cgsize); 254 if (cgp == NULL) { 255 if (sujrecovery) 256 errx(EEXIT,"Ran out of memory during journal recovery"); 257 flush(fswritefd, &cgblk); 258 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 259 return (&cgblk); 260 } 261 cgbp->b_un.b_cg = cgp; 262 initbarea(cgbp, BT_CYLGRP); 263 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 264 return (cgbp); 265 } 266 267 /* 268 * Mark a cylinder group buffer as dirty. 269 * Update its check-hash if they are enabled. 270 */ 271 void 272 cgdirty(struct bufarea *cgbp) 273 { 274 struct cg *cg; 275 276 cg = cgbp->b_un.b_cg; 277 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 278 cg->cg_ckhash = 0; 279 cg->cg_ckhash = 280 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 281 } 282 dirty(cgbp); 283 } 284 285 /* 286 * Attempt to flush a cylinder group cache entry. 287 * Return whether the flush was successful. 288 */ 289 int 290 flushentry(void) 291 { 292 struct bufarea *cgbp; 293 294 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 295 return (0); 296 cgbp = &cgbufs[flushtries++]; 297 if (cgbp->b_un.b_cg == NULL) 298 return (0); 299 flush(fswritefd, cgbp); 300 free(cgbp->b_un.b_buf); 301 cgbp->b_un.b_buf = NULL; 302 return (1); 303 } 304 305 /* 306 * Manage a cache of filesystem disk blocks. 307 */ 308 struct bufarea * 309 getdatablk(ufs2_daddr_t blkno, long size, int type) 310 { 311 struct bufarea *bp; 312 struct bufhash *bhdp; 313 314 cachelookups++; 315 /* 316 * If out of range, return empty buffer with b_err == -1 317 * 318 * Skip check for inodes because chkrange() considers 319 * metadata areas invalid to write data. 320 */ 321 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 322 failedbuf.b_refcnt++; 323 return (&failedbuf); 324 } 325 bhdp = &bufhashhd[HASH(blkno)]; 326 LIST_FOREACH(bp, bhdp, b_hash) 327 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 328 if (debug && bp->b_size != size) { 329 prtbuf(bp, "getdatablk: size mismatch"); 330 pfatal("getdatablk: b_size %d != size %ld\n", 331 bp->b_size, size); 332 } 333 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 334 goto foundit; 335 } 336 /* 337 * Move long-term busy buffer back to the front of the LRU so we 338 * do not endless inspect them for recycling. 339 */ 340 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 341 if (bp != NULL && bp->b_refcnt != 0) { 342 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 343 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 344 } 345 /* 346 * Allocate up to the minimum number of buffers before 347 * considering recycling any of them. 348 */ 349 if (size > sblock.fs_bsize) 350 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 351 sblock.fs_bsize); 352 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 353 LIST_REMOVE(bp, b_hash); 354 } else if (numbufs < MINBUFS) { 355 bp = allocbuf("cannot create minimal buffer pool"); 356 } else if (sujrecovery) { 357 /* 358 * SUJ recovery does not want anything written until it 359 * has successfully completed (so it can fail back to 360 * full fsck). Thus, we can only recycle clean buffers. 361 */ 362 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 363 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 364 break; 365 if (bp == NULL) 366 bp = allocbuf("Ran out of memory during " 367 "journal recovery"); 368 else 369 LIST_REMOVE(bp, b_hash); 370 } else { 371 /* 372 * Recycle oldest non-busy buffer. 373 */ 374 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 375 if (bp->b_refcnt == 0) 376 break; 377 if (bp == NULL) 378 bp = allocbuf("Ran out of memory for buffers"); 379 else 380 LIST_REMOVE(bp, b_hash); 381 } 382 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 383 flush(fswritefd, bp); 384 bp->b_type = type; 385 LIST_INSERT_HEAD(bhdp, bp, b_hash); 386 getblk(bp, blkno, size); 387 cachereads++; 388 /* fall through */ 389 foundit: 390 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 391 if (debug && bp->b_type != type) { 392 printf("getdatablk: buffer type changed to %s", 393 BT_BUFTYPE(type)); 394 prtbuf(bp, ""); 395 } 396 if (bp->b_errs == 0) 397 bp->b_refcnt++; 398 return (bp); 399 } 400 401 void 402 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 403 { 404 ufs2_daddr_t dblk; 405 struct timespec start, finish; 406 407 dblk = fsbtodb(&sblock, blk); 408 if (bp->b_bno == dblk) { 409 totalreads++; 410 } else { 411 if (debug) { 412 readcnt[bp->b_type]++; 413 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 414 } 415 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 416 if (debug) { 417 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 418 timespecsub(&finish, &start, &finish); 419 timespecadd(&readtime[bp->b_type], &finish, 420 &readtime[bp->b_type]); 421 } 422 bp->b_bno = dblk; 423 bp->b_size = size; 424 } 425 } 426 427 void 428 brelse(struct bufarea *bp) 429 { 430 431 if (bp->b_refcnt <= 0) 432 prtbuf(bp, "brelse: buffer with negative reference count"); 433 bp->b_refcnt--; 434 } 435 436 void 437 binval(struct bufarea *bp) 438 { 439 440 bp->b_flags &= ~B_DIRTY; 441 LIST_REMOVE(bp, b_hash); 442 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 443 } 444 445 void 446 flush(int fd, struct bufarea *bp) 447 { 448 struct inode ip; 449 450 if ((bp->b_flags & B_DIRTY) == 0) 451 return; 452 bp->b_flags &= ~B_DIRTY; 453 if (fswritefd < 0) { 454 pfatal("WRITING IN READ_ONLY MODE.\n"); 455 return; 456 } 457 if (bp->b_errs != 0) 458 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 459 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 460 (long long)bp->b_bno); 461 bp->b_errs = 0; 462 /* 463 * Write using the appropriate function. 464 */ 465 switch (bp->b_type) { 466 case BT_SUPERBLK: 467 if (bp != &sblk) 468 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 469 bp, &sblk); 470 /* 471 * Superblocks are always pre-copied so we do not need 472 * to check them for copy-on-write. 473 */ 474 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 475 fsmodified = 1; 476 break; 477 case BT_CYLGRP: 478 /* 479 * Cylinder groups are always pre-copied so we do not 480 * need to check them for copy-on-write. 481 */ 482 if (sujrecovery) 483 cg_write(bp); 484 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 485 fsmodified = 1; 486 break; 487 case BT_INODES: 488 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 489 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 490 int i; 491 492 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 493 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 494 continue; 495 pwarn("flush: INODE CHECK-HASH FAILED"); 496 ip.i_bp = bp; 497 ip.i_dp = (union dinode *)dp; 498 ip.i_number = bp->b_index + (i / sizeof(*dp)); 499 prtinode(&ip); 500 if (preen || reply("FIX") != 0) { 501 if (preen) 502 printf(" (FIXED)\n"); 503 ffs_update_dinode_ckhash(&sblock, dp); 504 inodirty(&ip); 505 } 506 } 507 } 508 /* FALLTHROUGH */ 509 default: 510 copyonwrite(&sblock, bp, std_checkblkavail); 511 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 512 break; 513 } 514 } 515 516 /* 517 * If there are any snapshots, ensure that all the blocks that they 518 * care about have been copied, then release the snapshot inodes. 519 * These operations need to be done before we rebuild the cylinder 520 * groups so that any block allocations are properly recorded. 521 * Since all the cylinder group maps have already been copied in 522 * the snapshots, no further snapshot copies will need to be done. 523 */ 524 void 525 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 526 { 527 struct bufarea *bp; 528 int cnt; 529 530 if (snapcnt > 0) { 531 if (debug) 532 printf("Check for snapshot copies\n"); 533 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 534 if ((bp->b_flags & B_DIRTY) != 0) 535 copyonwrite(&sblock, bp, checkblkavail); 536 for (cnt = 0; cnt < snapcnt; cnt++) 537 irelse(&snaplist[cnt]); 538 snapcnt = 0; 539 } 540 } 541 542 /* 543 * Journaled soft updates does not maintain cylinder group summary 544 * information during cleanup, so this routine recalculates the summary 545 * information and updates the superblock summary in preparation for 546 * writing out the cylinder group. 547 */ 548 static void 549 cg_write(struct bufarea *bp) 550 { 551 ufs1_daddr_t fragno, cgbno, maxbno; 552 u_int8_t *blksfree; 553 struct csum *csp; 554 struct cg *cgp; 555 int blk; 556 int i; 557 558 /* 559 * Fix the frag and cluster summary. 560 */ 561 cgp = bp->b_un.b_cg; 562 cgp->cg_cs.cs_nbfree = 0; 563 cgp->cg_cs.cs_nffree = 0; 564 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 565 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 566 if (sblock.fs_contigsumsize > 0) { 567 for (i = 1; i <= sblock.fs_contigsumsize; i++) 568 cg_clustersum(cgp)[i] = 0; 569 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 570 } 571 blksfree = cg_blksfree(cgp); 572 for (cgbno = 0; cgbno < maxbno; cgbno++) { 573 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 574 continue; 575 if (ffs_isblock(&sblock, blksfree, cgbno)) { 576 ffs_clusteracct(&sblock, cgp, cgbno, 1); 577 cgp->cg_cs.cs_nbfree++; 578 continue; 579 } 580 fragno = blkstofrags(&sblock, cgbno); 581 blk = blkmap(&sblock, blksfree, fragno); 582 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 583 for (i = 0; i < sblock.fs_frag; i++) 584 if (isset(blksfree, fragno + i)) 585 cgp->cg_cs.cs_nffree++; 586 } 587 /* 588 * Update the superblock cg summary from our now correct values 589 * before writing the block. 590 */ 591 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 592 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 593 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 594 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 595 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 596 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 597 } 598 599 void 600 rwerror(const char *mesg, ufs2_daddr_t blk) 601 { 602 603 if (bkgrdcheck) 604 exit(EEXIT); 605 if (preen == 0) 606 printf("\n"); 607 pfatal("CANNOT %s: %ld", mesg, (long)blk); 608 if (reply("CONTINUE") == 0) 609 exit(EEXIT); 610 } 611 612 void 613 ckfini(int markclean) 614 { 615 struct bufarea *bp, *nbp; 616 int ofsmodified, cnt, cg; 617 618 if (bkgrdflag) { 619 unlink(snapname); 620 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 621 cmd.value = FS_UNCLEAN; 622 cmd.size = markclean ? -1 : 1; 623 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 624 &cmd, sizeof cmd) == -1) 625 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 626 if (!preen) { 627 printf("\n***** FILE SYSTEM MARKED %s *****\n", 628 markclean ? "CLEAN" : "DIRTY"); 629 if (!markclean) 630 rerun = 1; 631 } 632 } else if (!preen && !markclean) { 633 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 634 rerun = 1; 635 } 636 bkgrdflag = 0; 637 } 638 if (debug && cachelookups > 0) 639 printf("cache with %d buffers missed %d of %d (%d%%)\n", 640 numbufs, cachereads, cachelookups, 641 (int)(cachereads * 100 / cachelookups)); 642 if (fswritefd < 0) { 643 (void)close(fsreadfd); 644 return; 645 } 646 647 /* 648 * To remain idempotent with partial truncations the buffers 649 * must be flushed in this order: 650 * 1) cylinder groups (bitmaps) 651 * 2) indirect, directory, external attribute, and data blocks 652 * 3) inode blocks 653 * 4) superblock 654 * This ordering preserves access to the modified pointers 655 * until they are freed. 656 */ 657 /* Step 1: cylinder groups */ 658 if (debug) 659 printf("Flush Cylinder groups\n"); 660 if (cgbufs != NULL) { 661 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 662 if (cgbufs[cnt].b_un.b_cg == NULL) 663 continue; 664 flush(fswritefd, &cgbufs[cnt]); 665 free(cgbufs[cnt].b_un.b_cg); 666 } 667 free(cgbufs); 668 cgbufs = NULL; 669 } 670 flush(fswritefd, &cgblk); 671 free(cgblk.b_un.b_buf); 672 cgblk.b_un.b_buf = NULL; 673 cnt = 0; 674 /* Step 2: indirect, directory, external attribute, and data blocks */ 675 if (debug) 676 printf("Flush indirect, directory, external attribute, " 677 "and data blocks\n"); 678 if (pdirbp != NULL) { 679 brelse(pdirbp); 680 pdirbp = NULL; 681 } 682 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 683 switch (bp->b_type) { 684 /* These should not be in the buffer cache list */ 685 case BT_UNKNOWN: 686 case BT_SUPERBLK: 687 case BT_CYLGRP: 688 default: 689 prtbuf(bp,"ckfini: improper buffer type on cache list"); 690 continue; 691 /* These are the ones to flush in this step */ 692 case BT_LEVEL1: 693 case BT_LEVEL2: 694 case BT_LEVEL3: 695 case BT_EXTATTR: 696 case BT_DIRDATA: 697 case BT_DATA: 698 break; 699 /* These are the ones to flush in the next step */ 700 case BT_INODES: 701 continue; 702 } 703 if (debug && bp->b_refcnt != 0) 704 prtbuf(bp, "ckfini: clearing in-use buffer"); 705 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 706 LIST_REMOVE(bp, b_hash); 707 cnt++; 708 flush(fswritefd, bp); 709 free(bp->b_un.b_buf); 710 free((char *)bp); 711 } 712 /* Step 3: inode blocks */ 713 if (debug) 714 printf("Flush inode blocks\n"); 715 if (icachebp != NULL) { 716 brelse(icachebp); 717 icachebp = NULL; 718 } 719 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 720 if (debug && bp->b_refcnt != 0) 721 prtbuf(bp, "ckfini: clearing in-use buffer"); 722 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 723 LIST_REMOVE(bp, b_hash); 724 cnt++; 725 flush(fswritefd, bp); 726 free(bp->b_un.b_buf); 727 free((char *)bp); 728 } 729 if (numbufs != cnt) 730 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 731 /* Step 4: superblock */ 732 if (debug) 733 printf("Flush the superblock\n"); 734 flush(fswritefd, &sblk); 735 if (havesb && cursnapshot == 0 && 736 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 737 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 738 /* Change write destination to standard superblock */ 739 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 740 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 741 sbdirty(); 742 flush(fswritefd, &sblk); 743 } else { 744 markclean = 0; 745 } 746 } 747 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 748 if ((sblock.fs_clean = markclean) != 0) { 749 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 750 sblock.fs_pendingblocks = 0; 751 sblock.fs_pendinginodes = 0; 752 } 753 sbdirty(); 754 ofsmodified = fsmodified; 755 flush(fswritefd, &sblk); 756 fsmodified = ofsmodified; 757 if (!preen) { 758 printf("\n***** FILE SYSTEM MARKED %s *****\n", 759 markclean ? "CLEAN" : "DIRTY"); 760 if (!markclean) 761 rerun = 1; 762 } 763 } else if (!preen) { 764 if (markclean) { 765 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 766 } else { 767 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 768 rerun = 1; 769 } 770 } 771 /* 772 * Free allocated tracking structures. 773 */ 774 if (blockmap != NULL) 775 free(blockmap); 776 blockmap = NULL; 777 if (inostathead != NULL) { 778 for (cg = 0; cg < sblock.fs_ncg; cg++) 779 if (inostathead[cg].il_stat != NULL) 780 free((char *)inostathead[cg].il_stat); 781 free(inostathead); 782 } 783 inostathead = NULL; 784 inocleanup(); 785 finalIOstats(); 786 (void)close(fsreadfd); 787 (void)close(fswritefd); 788 } 789 790 /* 791 * Print out I/O statistics. 792 */ 793 void 794 IOstats(char *what) 795 { 796 int i; 797 798 if (debug == 0) 799 return; 800 if (diskreads == 0) { 801 printf("%s: no I/O\n\n", what); 802 return; 803 } 804 if (startpass.tv_sec == 0) 805 startpass = startprog; 806 printf("%s: I/O statistics\n", what); 807 printIOstats(); 808 totaldiskreads += diskreads; 809 diskreads = 0; 810 for (i = 0; i < BT_NUMBUFTYPES; i++) { 811 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 812 totalreadcnt[i] += readcnt[i]; 813 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 814 readcnt[i] = 0; 815 } 816 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 817 } 818 819 void 820 finalIOstats(void) 821 { 822 int i; 823 824 if (debug == 0) 825 return; 826 printf("Final I/O statistics\n"); 827 totaldiskreads += diskreads; 828 diskreads = totaldiskreads; 829 startpass = startprog; 830 for (i = 0; i < BT_NUMBUFTYPES; i++) { 831 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 832 totalreadcnt[i] += readcnt[i]; 833 readtime[i] = totalreadtime[i]; 834 readcnt[i] = totalreadcnt[i]; 835 } 836 printIOstats(); 837 } 838 839 static void printIOstats(void) 840 { 841 long long msec, totalmsec; 842 int i; 843 844 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 845 timespecsub(&finishpass, &startpass, &finishpass); 846 printf("Running time: %jd.%03ld sec\n", 847 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 848 printf("buffer reads by type:\n"); 849 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 850 totalmsec += readtime[i].tv_sec * 1000 + 851 readtime[i].tv_nsec / 1000000; 852 if (totalmsec == 0) 853 totalmsec = 1; 854 for (i = 0; i < BT_NUMBUFTYPES; i++) { 855 if (readcnt[i] == 0) 856 continue; 857 msec = 858 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 859 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 860 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 861 (readcnt[i] * 1000 / diskreads) % 10, 862 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 863 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 864 } 865 printf("\n"); 866 } 867 868 int 869 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 870 { 871 char *cp; 872 int i, errs; 873 off_t offset; 874 875 offset = blk; 876 offset *= dev_bsize; 877 if (bkgrdflag) 878 slowio_start(); 879 totalreads++; 880 diskreads++; 881 if (pread(fd, buf, (int)size, offset) == size) { 882 if (bkgrdflag) 883 slowio_end(); 884 return (0); 885 } 886 887 /* 888 * This is handled specially here instead of in rwerror because 889 * rwerror is used for all sorts of errors, not just true read/write 890 * errors. It should be refactored and fixed. 891 */ 892 if (surrender) { 893 pfatal("CANNOT READ_BLK: %ld", (long)blk); 894 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 895 } else 896 rwerror("READ BLK", blk); 897 898 errs = 0; 899 memset(buf, 0, (size_t)size); 900 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 901 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 902 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 903 if (secsize != dev_bsize && dev_bsize != 1) 904 printf(" %jd (%jd),", 905 (intmax_t)(blk * dev_bsize + i) / secsize, 906 (intmax_t)blk + i / dev_bsize); 907 else 908 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 909 errs++; 910 } 911 } 912 printf("\n"); 913 if (errs) 914 resolved = 0; 915 return (errs); 916 } 917 918 void 919 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 920 { 921 int i; 922 char *cp; 923 off_t offset; 924 925 if (fd < 0) 926 return; 927 offset = blk; 928 offset *= dev_bsize; 929 if (pwrite(fd, buf, size, offset) == size) { 930 fsmodified = 1; 931 return; 932 } 933 resolved = 0; 934 rwerror("WRITE BLK", blk); 935 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 936 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 937 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 938 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 939 printf("\n"); 940 return; 941 } 942 943 void 944 blerase(int fd, ufs2_daddr_t blk, long size) 945 { 946 off_t ioarg[2]; 947 948 if (fd < 0) 949 return; 950 ioarg[0] = blk * dev_bsize; 951 ioarg[1] = size; 952 ioctl(fd, DIOCGDELETE, ioarg); 953 /* we don't really care if we succeed or not */ 954 return; 955 } 956 957 /* 958 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 959 * definition a multiple of dev_bsize. 960 */ 961 void 962 blzero(int fd, ufs2_daddr_t blk, long size) 963 { 964 static char *zero; 965 off_t offset, len; 966 967 if (fd < 0) 968 return; 969 if (zero == NULL) { 970 zero = calloc(ZEROBUFSIZE, 1); 971 if (zero == NULL) 972 errx(EEXIT, "cannot allocate buffer pool"); 973 } 974 offset = blk * dev_bsize; 975 if (lseek(fd, offset, 0) < 0) 976 rwerror("SEEK BLK", blk); 977 while (size > 0) { 978 len = MIN(ZEROBUFSIZE, size); 979 if (write(fd, zero, len) != len) 980 rwerror("WRITE BLK", blk); 981 blk += len / dev_bsize; 982 size -= len; 983 } 984 } 985 986 /* 987 * Verify cylinder group's magic number and other parameters. If the 988 * test fails, offer an option to rebuild the whole cylinder group. 989 * 990 * Return 1 if the cylinder group is good or return 0 if it is bad. 991 */ 992 #undef CHK 993 #define CHK(lhs, op, rhs, fmt) \ 994 if (lhs op rhs) { \ 995 pwarn("UFS%d cylinder group %d failed: " \ 996 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 997 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 998 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 999 error = 1; \ 1000 } 1001 int 1002 check_cgmagic(int cg, struct bufarea *cgbp) 1003 { 1004 struct cg *cgp = cgbp->b_un.b_cg; 1005 uint32_t cghash, calchash; 1006 static int prevfailcg = -1; 1007 long start; 1008 int error; 1009 1010 /* 1011 * Extended cylinder group checks. 1012 */ 1013 calchash = cgp->cg_ckhash; 1014 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1015 (ckhashadd & CK_CYLGRP) == 0) { 1016 cghash = cgp->cg_ckhash; 1017 cgp->cg_ckhash = 0; 1018 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1019 cgp->cg_ckhash = cghash; 1020 } 1021 error = 0; 1022 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1023 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1024 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1025 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1026 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1027 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1028 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1029 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1030 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1031 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1032 } 1033 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1034 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1035 } else { 1036 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1037 "%jd"); 1038 } 1039 start = sizeof(*cgp); 1040 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1041 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1042 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1043 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1044 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1045 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1046 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1047 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1048 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1049 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1050 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1051 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1052 } 1053 CHK(cgp->cg_freeoff, !=, 1054 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1055 if (sblock.fs_contigsumsize == 0) { 1056 CHK(cgp->cg_nextfreeoff, !=, 1057 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1058 } else { 1059 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1060 "%jd"); 1061 CHK(cgp->cg_clustersumoff, !=, 1062 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1063 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1064 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1065 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1066 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1067 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1068 "%jd"); 1069 } 1070 if (error == 0) 1071 return (1); 1072 if (prevfailcg == cg) 1073 return (0); 1074 prevfailcg = cg; 1075 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1076 printf("\n"); 1077 return (0); 1078 } 1079 1080 void 1081 rebuild_cg(int cg, struct bufarea *cgbp) 1082 { 1083 struct cg *cgp = cgbp->b_un.b_cg; 1084 long start; 1085 1086 /* 1087 * Zero out the cylinder group and then initialize critical fields. 1088 * Bit maps and summaries will be recalculated by later passes. 1089 */ 1090 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1091 cgp->cg_magic = CG_MAGIC; 1092 cgp->cg_cgx = cg; 1093 cgp->cg_niblk = sblock.fs_ipg; 1094 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1095 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1096 cgp->cg_ndblk = sblock.fs_fpg; 1097 else 1098 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1099 start = sizeof(*cgp); 1100 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1101 cgp->cg_iusedoff = start; 1102 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1103 cgp->cg_niblk = 0; 1104 cgp->cg_initediblk = 0; 1105 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1106 cgp->cg_old_niblk = sblock.fs_ipg; 1107 cgp->cg_old_btotoff = start; 1108 cgp->cg_old_boff = cgp->cg_old_btotoff + 1109 sblock.fs_old_cpg * sizeof(int32_t); 1110 cgp->cg_iusedoff = cgp->cg_old_boff + 1111 sblock.fs_old_cpg * sizeof(u_int16_t); 1112 } 1113 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1114 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1115 if (sblock.fs_contigsumsize > 0) { 1116 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1117 cgp->cg_clustersumoff = 1118 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1119 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1120 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1121 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1122 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1123 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1124 } 1125 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1126 cgdirty(cgbp); 1127 } 1128 1129 /* 1130 * allocate a data block with the specified number of fragments 1131 */ 1132 ufs2_daddr_t 1133 allocblk(long startcg, long frags, 1134 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1135 { 1136 ufs2_daddr_t blkno, newblk; 1137 1138 if (sujrecovery && checkblkavail == std_checkblkavail) { 1139 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1140 return (0); 1141 } 1142 if (frags <= 0 || frags > sblock.fs_frag) 1143 return (0); 1144 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1145 blkno < maxfsblock - sblock.fs_frag; 1146 blkno += sblock.fs_frag) { 1147 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1148 continue; 1149 if (newblk > 0) 1150 return (newblk); 1151 if (newblk < 0) 1152 blkno = -newblk; 1153 } 1154 for (blkno = MAX(cgdata(&sblock, 0), 0); 1155 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1156 blkno += sblock.fs_frag) { 1157 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1158 continue; 1159 if (newblk > 0) 1160 return (newblk); 1161 if (newblk < 0) 1162 blkno = -newblk; 1163 } 1164 return (0); 1165 } 1166 1167 ufs2_daddr_t 1168 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1169 { 1170 struct bufarea *cgbp; 1171 struct cg *cgp; 1172 ufs2_daddr_t j, k, baseblk; 1173 long cg; 1174 1175 if ((u_int64_t)blkno > sblock.fs_size) 1176 return (0); 1177 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1178 if (testbmap(blkno + j)) 1179 continue; 1180 for (k = 1; k < frags; k++) 1181 if (testbmap(blkno + j + k)) 1182 break; 1183 if (k < frags) { 1184 j += k; 1185 continue; 1186 } 1187 cg = dtog(&sblock, blkno + j); 1188 cgbp = cglookup(cg); 1189 cgp = cgbp->b_un.b_cg; 1190 if (!check_cgmagic(cg, cgbp)) 1191 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1192 baseblk = dtogd(&sblock, blkno + j); 1193 for (k = 0; k < frags; k++) { 1194 setbmap(blkno + j + k); 1195 clrbit(cg_blksfree(cgp), baseblk + k); 1196 } 1197 n_blks += frags; 1198 if (frags == sblock.fs_frag) 1199 cgp->cg_cs.cs_nbfree--; 1200 else 1201 cgp->cg_cs.cs_nffree -= frags; 1202 cgdirty(cgbp); 1203 return (blkno + j); 1204 } 1205 return (0); 1206 } 1207 1208 /* 1209 * Check whether a file size is within the limits for the filesystem. 1210 * Return 1 when valid and 0 when too big. 1211 * 1212 * This should match the file size limit in ffs_mountfs(). 1213 */ 1214 int 1215 chkfilesize(mode_t mode, u_int64_t filesize) 1216 { 1217 u_int64_t kernmaxfilesize; 1218 1219 if (sblock.fs_magic == FS_UFS1_MAGIC) 1220 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1221 else 1222 kernmaxfilesize = sblock.fs_maxfilesize; 1223 if (filesize > kernmaxfilesize || 1224 filesize > sblock.fs_maxfilesize || 1225 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1226 if (debug) 1227 printf("bad file size %ju:", (uintmax_t)filesize); 1228 return (0); 1229 } 1230 return (1); 1231 } 1232 1233 /* 1234 * Slow down IO so as to leave some disk bandwidth for other processes 1235 */ 1236 void 1237 slowio_start() 1238 { 1239 1240 /* Delay one in every 8 operations */ 1241 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1242 if (slowio_pollcnt == 0) { 1243 gettimeofday(&slowio_starttime, NULL); 1244 } 1245 } 1246 1247 void 1248 slowio_end() 1249 { 1250 struct timeval tv; 1251 int delay_usec; 1252 1253 if (slowio_pollcnt != 0) 1254 return; 1255 1256 /* Update the slowdown interval. */ 1257 gettimeofday(&tv, NULL); 1258 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1259 (tv.tv_usec - slowio_starttime.tv_usec); 1260 if (delay_usec < 64) 1261 delay_usec = 64; 1262 if (delay_usec > 2500000) 1263 delay_usec = 2500000; 1264 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1265 /* delay by 8 times the average IO delay */ 1266 if (slowio_delay_usec > 64) 1267 usleep(slowio_delay_usec * 8); 1268 } 1269 1270 /* 1271 * Find a pathname 1272 */ 1273 void 1274 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1275 { 1276 int len; 1277 char *cp; 1278 struct inode ip; 1279 struct inodesc idesc; 1280 static int busy = 0; 1281 1282 if (curdir == ino && ino == UFS_ROOTINO) { 1283 (void)strcpy(namebuf, "/"); 1284 return; 1285 } 1286 if (busy || !INO_IS_DVALID(curdir)) { 1287 (void)strcpy(namebuf, "?"); 1288 return; 1289 } 1290 busy = 1; 1291 memset(&idesc, 0, sizeof(struct inodesc)); 1292 idesc.id_type = DATA; 1293 idesc.id_fix = IGNORE; 1294 cp = &namebuf[MAXPATHLEN - 1]; 1295 *cp = '\0'; 1296 if (curdir != ino) { 1297 idesc.id_parent = curdir; 1298 goto namelookup; 1299 } 1300 while (ino != UFS_ROOTINO) { 1301 idesc.id_number = ino; 1302 idesc.id_func = findino; 1303 idesc.id_name = strdup(".."); 1304 ginode(ino, &ip); 1305 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1306 irelse(&ip); 1307 free(idesc.id_name); 1308 break; 1309 } 1310 irelse(&ip); 1311 free(idesc.id_name); 1312 namelookup: 1313 idesc.id_number = idesc.id_parent; 1314 idesc.id_parent = ino; 1315 idesc.id_func = findname; 1316 idesc.id_name = namebuf; 1317 ginode(idesc.id_number, &ip); 1318 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1319 irelse(&ip); 1320 break; 1321 } 1322 irelse(&ip); 1323 len = strlen(namebuf); 1324 cp -= len; 1325 memmove(cp, namebuf, (size_t)len); 1326 *--cp = '/'; 1327 if (cp < &namebuf[UFS_MAXNAMLEN]) 1328 break; 1329 ino = idesc.id_number; 1330 } 1331 busy = 0; 1332 if (ino != UFS_ROOTINO) 1333 *--cp = '?'; 1334 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1335 } 1336 1337 void 1338 catch(int sig __unused) 1339 { 1340 1341 ckfini(0); 1342 exit(12); 1343 } 1344 1345 /* 1346 * When preening, allow a single quit to signal 1347 * a special exit after file system checks complete 1348 * so that reboot sequence may be interrupted. 1349 */ 1350 void 1351 catchquit(int sig __unused) 1352 { 1353 printf("returning to single-user after file system check\n"); 1354 returntosingle = 1; 1355 (void)signal(SIGQUIT, SIG_DFL); 1356 } 1357 1358 /* 1359 * determine whether an inode should be fixed. 1360 */ 1361 int 1362 dofix(struct inodesc *idesc, const char *msg) 1363 { 1364 1365 switch (idesc->id_fix) { 1366 1367 case DONTKNOW: 1368 if (idesc->id_type == DATA) 1369 direrror(idesc->id_number, msg); 1370 else 1371 pwarn("%s", msg); 1372 if (preen) { 1373 printf(" (SALVAGED)\n"); 1374 idesc->id_fix = FIX; 1375 return (ALTERED); 1376 } 1377 if (reply("SALVAGE") == 0) { 1378 idesc->id_fix = NOFIX; 1379 return (0); 1380 } 1381 idesc->id_fix = FIX; 1382 return (ALTERED); 1383 1384 case FIX: 1385 return (ALTERED); 1386 1387 case NOFIX: 1388 case IGNORE: 1389 return (0); 1390 1391 default: 1392 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1393 } 1394 /* NOTREACHED */ 1395 return (0); 1396 } 1397 1398 #include <stdarg.h> 1399 1400 /* 1401 * Print details about a buffer. 1402 */ 1403 void 1404 prtbuf(struct bufarea *bp, const char *fmt, ...) 1405 { 1406 va_list ap; 1407 va_start(ap, fmt); 1408 if (preen) 1409 (void)fprintf(stdout, "%s: ", cdevname); 1410 (void)vfprintf(stdout, fmt, ap); 1411 va_end(ap); 1412 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1413 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1414 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1415 (intmax_t) bp->b_index); 1416 } 1417 1418 /* 1419 * An unexpected inconsistency occurred. 1420 * Die if preening or file system is running with soft dependency protocol, 1421 * otherwise just print message and continue. 1422 */ 1423 void 1424 pfatal(const char *fmt, ...) 1425 { 1426 va_list ap; 1427 va_start(ap, fmt); 1428 if (!preen) { 1429 (void)vfprintf(stdout, fmt, ap); 1430 va_end(ap); 1431 if (usedsoftdep) 1432 (void)fprintf(stdout, 1433 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1434 /* 1435 * Force foreground fsck to clean up inconsistency. 1436 */ 1437 if (bkgrdflag) { 1438 cmd.value = FS_NEEDSFSCK; 1439 cmd.size = 1; 1440 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1441 &cmd, sizeof cmd) == -1) 1442 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1443 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1444 ckfini(0); 1445 exit(EEXIT); 1446 } 1447 return; 1448 } 1449 if (cdevname == NULL) 1450 cdevname = strdup("fsck"); 1451 (void)fprintf(stdout, "%s: ", cdevname); 1452 (void)vfprintf(stdout, fmt, ap); 1453 (void)fprintf(stdout, 1454 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1455 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1456 /* 1457 * Force foreground fsck to clean up inconsistency. 1458 */ 1459 if (bkgrdflag) { 1460 cmd.value = FS_NEEDSFSCK; 1461 cmd.size = 1; 1462 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1463 &cmd, sizeof cmd) == -1) 1464 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1465 } 1466 ckfini(0); 1467 exit(EEXIT); 1468 } 1469 1470 /* 1471 * Pwarn just prints a message when not preening or running soft dependency 1472 * protocol, or a warning (preceded by filename) when preening. 1473 */ 1474 void 1475 pwarn(const char *fmt, ...) 1476 { 1477 va_list ap; 1478 va_start(ap, fmt); 1479 if (preen) 1480 (void)fprintf(stdout, "%s: ", cdevname); 1481 (void)vfprintf(stdout, fmt, ap); 1482 va_end(ap); 1483 } 1484 1485 /* 1486 * Stub for routines from kernel. 1487 */ 1488 void 1489 panic(const char *fmt, ...) 1490 { 1491 va_list ap; 1492 va_start(ap, fmt); 1493 pfatal("INTERNAL INCONSISTENCY:"); 1494 (void)vfprintf(stdout, fmt, ap); 1495 va_end(ap); 1496 exit(EEXIT); 1497 } 1498