1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; 35 #endif /* not lint */ 36 #endif 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/time.h> 42 #include <sys/types.h> 43 #include <sys/sysctl.h> 44 #include <sys/disk.h> 45 #include <sys/disklabel.h> 46 #include <sys/ioctl.h> 47 #include <sys/stat.h> 48 49 #include <ufs/ufs/dinode.h> 50 #include <ufs/ufs/dir.h> 51 #include <ufs/ffs/fs.h> 52 53 #include <err.h> 54 #include <errno.h> 55 #include <string.h> 56 #include <ctype.h> 57 #include <fstab.h> 58 #include <stdint.h> 59 #include <stdio.h> 60 #include <stdlib.h> 61 #include <time.h> 62 #include <unistd.h> 63 #include <libufs.h> 64 65 #include "fsck.h" 66 67 int sujrecovery = 0; 68 69 static struct bufarea *allocbuf(const char *); 70 static void cg_write(struct bufarea *); 71 static void slowio_start(void); 72 static void slowio_end(void); 73 static void printIOstats(void); 74 75 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 76 static struct timespec startpass, finishpass; 77 struct timeval slowio_starttime; 78 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 79 int slowio_pollcnt; 80 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 81 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 82 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 83 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 84 static struct bufhash freebufs; /* unused buffers */ 85 static int numbufs; /* size of buffer cache */ 86 static int cachelookups; /* number of cache lookups */ 87 static int cachereads; /* number of cache reads */ 88 static int flushtries; /* number of tries to reclaim memory */ 89 90 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 91 92 void 93 fsutilinit(void) 94 { 95 diskreads = totaldiskreads = totalreads = 0; 96 bzero(&startpass, sizeof(struct timespec)); 97 bzero(&finishpass, sizeof(struct timespec)); 98 bzero(&slowio_starttime, sizeof(struct timeval)); 99 slowio_delay_usec = 10000; 100 slowio_pollcnt = 0; 101 flushtries = 0; 102 } 103 104 int 105 ftypeok(union dinode *dp) 106 { 107 switch (DIP(dp, di_mode) & IFMT) { 108 109 case IFDIR: 110 case IFREG: 111 case IFBLK: 112 case IFCHR: 113 case IFLNK: 114 case IFSOCK: 115 case IFIFO: 116 return (1); 117 118 default: 119 if (debug) 120 printf("bad file type 0%o\n", DIP(dp, di_mode)); 121 return (0); 122 } 123 } 124 125 int 126 reply(const char *question) 127 { 128 int persevere; 129 char c; 130 131 if (preen) 132 pfatal("INTERNAL ERROR: GOT TO reply()"); 133 persevere = strcmp(question, "CONTINUE") == 0 || 134 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 135 printf("\n"); 136 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 137 printf("%s? no\n\n", question); 138 resolved = 0; 139 return (0); 140 } 141 if (yflag || (persevere && nflag)) { 142 printf("%s? yes\n\n", question); 143 return (1); 144 } 145 do { 146 printf("%s? [yn] ", question); 147 (void) fflush(stdout); 148 c = getc(stdin); 149 while (c != '\n' && getc(stdin) != '\n') { 150 if (feof(stdin)) { 151 resolved = 0; 152 return (0); 153 } 154 } 155 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 156 printf("\n"); 157 if (c == 'y' || c == 'Y') 158 return (1); 159 resolved = 0; 160 return (0); 161 } 162 163 /* 164 * Look up state information for an inode. 165 */ 166 struct inostat * 167 inoinfo(ino_t inum) 168 { 169 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 170 struct inostatlist *ilp; 171 int iloff; 172 173 if (inum >= maxino) 174 errx(EEXIT, "inoinfo: inumber %ju out of range", 175 (uintmax_t)inum); 176 ilp = &inostathead[inum / sblock.fs_ipg]; 177 iloff = inum % sblock.fs_ipg; 178 if (iloff >= ilp->il_numalloced) 179 return (&unallocated); 180 return (&ilp->il_stat[iloff]); 181 } 182 183 /* 184 * Malloc buffers and set up cache. 185 */ 186 void 187 bufinit(void) 188 { 189 int i; 190 191 initbarea(&failedbuf, BT_UNKNOWN); 192 failedbuf.b_errs = -1; 193 failedbuf.b_un.b_buf = NULL; 194 if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) 195 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 196 initbarea(&cgblk, BT_CYLGRP); 197 numbufs = cachelookups = cachereads = 0; 198 TAILQ_INIT(&bufqueuehd); 199 LIST_INIT(&freebufs); 200 for (i = 0; i < HASHSIZE; i++) 201 LIST_INIT(&bufhashhd[i]); 202 for (i = 0; i < BT_NUMBUFTYPES; i++) { 203 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 204 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 205 readcnt[i] = totalreadcnt[i] = 0; 206 } 207 } 208 209 static struct bufarea * 210 allocbuf(const char *failreason) 211 { 212 struct bufarea *bp; 213 char *bufp; 214 215 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 216 bufp = Malloc((unsigned int)sblock.fs_bsize); 217 if (bp == NULL || bufp == NULL) { 218 errx(EEXIT, "%s", failreason); 219 /* NOTREACHED */ 220 } 221 numbufs++; 222 bp->b_un.b_buf = bufp; 223 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 224 initbarea(bp, BT_UNKNOWN); 225 return (bp); 226 } 227 228 /* 229 * Manage cylinder group buffers. 230 * 231 * Use getblk() here rather than cgget() because the cylinder group 232 * may be corrupted but we want it anyway so we can fix it. 233 */ 234 static struct bufarea *cgbufs; /* header for cylinder group cache */ 235 static int flushtries; /* number of tries to reclaim memory */ 236 237 struct bufarea * 238 cglookup(int cg) 239 { 240 struct bufarea *cgbp; 241 struct cg *cgp; 242 243 if ((unsigned) cg >= sblock.fs_ncg) 244 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 245 if (cgbufs == NULL) { 246 cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); 247 if (cgbufs == NULL) 248 errx(EEXIT, "Cannot allocate cylinder group buffers"); 249 } 250 cgbp = &cgbufs[cg]; 251 if (cgbp->b_un.b_cg != NULL) 252 return (cgbp); 253 cgp = NULL; 254 if (flushtries == 0) 255 cgp = Malloc((unsigned int)sblock.fs_cgsize); 256 if (cgp == NULL) { 257 if (sujrecovery) 258 errx(EEXIT,"Ran out of memory during journal recovery"); 259 flush(fswritefd, &cgblk); 260 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 261 return (&cgblk); 262 } 263 cgbp->b_un.b_cg = cgp; 264 initbarea(cgbp, BT_CYLGRP); 265 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 266 return (cgbp); 267 } 268 269 /* 270 * Mark a cylinder group buffer as dirty. 271 * Update its check-hash if they are enabled. 272 */ 273 void 274 cgdirty(struct bufarea *cgbp) 275 { 276 struct cg *cg; 277 278 cg = cgbp->b_un.b_cg; 279 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 280 cg->cg_ckhash = 0; 281 cg->cg_ckhash = 282 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 283 } 284 dirty(cgbp); 285 } 286 287 /* 288 * Attempt to flush a cylinder group cache entry. 289 * Return whether the flush was successful. 290 */ 291 int 292 flushentry(void) 293 { 294 struct bufarea *cgbp; 295 296 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 297 return (0); 298 cgbp = &cgbufs[flushtries++]; 299 if (cgbp->b_un.b_cg == NULL) 300 return (0); 301 flush(fswritefd, cgbp); 302 free(cgbp->b_un.b_buf); 303 cgbp->b_un.b_buf = NULL; 304 return (1); 305 } 306 307 /* 308 * Manage a cache of filesystem disk blocks. 309 */ 310 struct bufarea * 311 getdatablk(ufs2_daddr_t blkno, long size, int type) 312 { 313 struct bufarea *bp; 314 struct bufhash *bhdp; 315 316 cachelookups++; 317 /* 318 * If out of range, return empty buffer with b_err == -1 319 * 320 * Skip check for inodes because chkrange() considers 321 * metadata areas invalid to write data. 322 */ 323 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 324 failedbuf.b_refcnt++; 325 return (&failedbuf); 326 } 327 bhdp = &bufhashhd[HASH(blkno)]; 328 LIST_FOREACH(bp, bhdp, b_hash) 329 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 330 if (debug && bp->b_size != size) { 331 prtbuf(bp, "getdatablk: size mismatch"); 332 pfatal("getdatablk: b_size %d != size %ld\n", 333 bp->b_size, size); 334 } 335 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 336 goto foundit; 337 } 338 /* 339 * Move long-term busy buffer back to the front of the LRU so we 340 * do not endless inspect them for recycling. 341 */ 342 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 343 if (bp != NULL && bp->b_refcnt != 0) { 344 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 345 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 346 } 347 /* 348 * Allocate up to the minimum number of buffers before 349 * considering recycling any of them. 350 */ 351 if (size > sblock.fs_bsize) 352 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 353 sblock.fs_bsize); 354 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 355 LIST_REMOVE(bp, b_hash); 356 } else if (numbufs < MINBUFS) { 357 bp = allocbuf("cannot create minimal buffer pool"); 358 } else if (sujrecovery) { 359 /* 360 * SUJ recovery does not want anything written until it 361 * has successfully completed (so it can fail back to 362 * full fsck). Thus, we can only recycle clean buffers. 363 */ 364 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 365 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 366 break; 367 if (bp == NULL) 368 bp = allocbuf("Ran out of memory during " 369 "journal recovery"); 370 else 371 LIST_REMOVE(bp, b_hash); 372 } else { 373 /* 374 * Recycle oldest non-busy buffer. 375 */ 376 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 377 if (bp->b_refcnt == 0) 378 break; 379 if (bp == NULL) 380 bp = allocbuf("Ran out of memory for buffers"); 381 else 382 LIST_REMOVE(bp, b_hash); 383 } 384 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 385 flush(fswritefd, bp); 386 bp->b_type = type; 387 LIST_INSERT_HEAD(bhdp, bp, b_hash); 388 getblk(bp, blkno, size); 389 cachereads++; 390 /* fall through */ 391 foundit: 392 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 393 if (debug && bp->b_type != type) { 394 printf("getdatablk: buffer type changed to %s", 395 BT_BUFTYPE(type)); 396 prtbuf(bp, ""); 397 } 398 if (bp->b_errs == 0) 399 bp->b_refcnt++; 400 return (bp); 401 } 402 403 void 404 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 405 { 406 ufs2_daddr_t dblk; 407 struct timespec start, finish; 408 409 dblk = fsbtodb(&sblock, blk); 410 if (bp->b_bno == dblk) { 411 totalreads++; 412 } else { 413 if (debug) { 414 readcnt[bp->b_type]++; 415 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 416 } 417 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 418 if (debug) { 419 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 420 timespecsub(&finish, &start, &finish); 421 timespecadd(&readtime[bp->b_type], &finish, 422 &readtime[bp->b_type]); 423 } 424 bp->b_bno = dblk; 425 bp->b_size = size; 426 } 427 } 428 429 void 430 brelse(struct bufarea *bp) 431 { 432 433 if (bp->b_refcnt <= 0) 434 prtbuf(bp, "brelse: buffer with negative reference count"); 435 bp->b_refcnt--; 436 } 437 438 void 439 binval(struct bufarea *bp) 440 { 441 442 bp->b_flags &= ~B_DIRTY; 443 LIST_REMOVE(bp, b_hash); 444 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 445 } 446 447 void 448 flush(int fd, struct bufarea *bp) 449 { 450 struct inode ip; 451 452 if ((bp->b_flags & B_DIRTY) == 0) 453 return; 454 bp->b_flags &= ~B_DIRTY; 455 if (fswritefd < 0) { 456 pfatal("WRITING IN READ_ONLY MODE.\n"); 457 return; 458 } 459 if (bp->b_errs != 0) 460 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 461 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 462 (long long)bp->b_bno); 463 bp->b_errs = 0; 464 /* 465 * Write using the appropriate function. 466 */ 467 switch (bp->b_type) { 468 case BT_SUPERBLK: 469 if (bp != &sblk) 470 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 471 bp, &sblk); 472 /* 473 * Superblocks are always pre-copied so we do not need 474 * to check them for copy-on-write. 475 */ 476 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 477 fsmodified = 1; 478 break; 479 case BT_CYLGRP: 480 /* 481 * Cylinder groups are always pre-copied so we do not 482 * need to check them for copy-on-write. 483 */ 484 if (sujrecovery) 485 cg_write(bp); 486 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 487 fsmodified = 1; 488 break; 489 case BT_INODES: 490 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 491 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 492 int i; 493 494 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 495 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 496 continue; 497 pwarn("flush: INODE CHECK-HASH FAILED"); 498 ip.i_bp = bp; 499 ip.i_dp = (union dinode *)dp; 500 ip.i_number = bp->b_index + (i / sizeof(*dp)); 501 prtinode(&ip); 502 if (preen || reply("FIX") != 0) { 503 if (preen) 504 printf(" (FIXED)\n"); 505 ffs_update_dinode_ckhash(&sblock, dp); 506 inodirty(&ip); 507 } 508 } 509 } 510 /* FALLTHROUGH */ 511 default: 512 copyonwrite(&sblock, bp, std_checkblkavail); 513 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 514 break; 515 } 516 } 517 518 /* 519 * If there are any snapshots, ensure that all the blocks that they 520 * care about have been copied, then release the snapshot inodes. 521 * These operations need to be done before we rebuild the cylinder 522 * groups so that any block allocations are properly recorded. 523 * Since all the cylinder group maps have already been copied in 524 * the snapshots, no further snapshot copies will need to be done. 525 */ 526 void 527 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 528 { 529 struct bufarea *bp; 530 int cnt; 531 532 if (snapcnt > 0) { 533 if (debug) 534 printf("Check for snapshot copies\n"); 535 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 536 if ((bp->b_flags & B_DIRTY) != 0) 537 copyonwrite(&sblock, bp, checkblkavail); 538 for (cnt = 0; cnt < snapcnt; cnt++) 539 irelse(&snaplist[cnt]); 540 snapcnt = 0; 541 } 542 } 543 544 /* 545 * Journaled soft updates does not maintain cylinder group summary 546 * information during cleanup, so this routine recalculates the summary 547 * information and updates the superblock summary in preparation for 548 * writing out the cylinder group. 549 */ 550 static void 551 cg_write(struct bufarea *bp) 552 { 553 ufs1_daddr_t fragno, cgbno, maxbno; 554 u_int8_t *blksfree; 555 struct csum *csp; 556 struct cg *cgp; 557 int blk; 558 int i; 559 560 /* 561 * Fix the frag and cluster summary. 562 */ 563 cgp = bp->b_un.b_cg; 564 cgp->cg_cs.cs_nbfree = 0; 565 cgp->cg_cs.cs_nffree = 0; 566 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 567 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 568 if (sblock.fs_contigsumsize > 0) { 569 for (i = 1; i <= sblock.fs_contigsumsize; i++) 570 cg_clustersum(cgp)[i] = 0; 571 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 572 } 573 blksfree = cg_blksfree(cgp); 574 for (cgbno = 0; cgbno < maxbno; cgbno++) { 575 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 576 continue; 577 if (ffs_isblock(&sblock, blksfree, cgbno)) { 578 ffs_clusteracct(&sblock, cgp, cgbno, 1); 579 cgp->cg_cs.cs_nbfree++; 580 continue; 581 } 582 fragno = blkstofrags(&sblock, cgbno); 583 blk = blkmap(&sblock, blksfree, fragno); 584 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 585 for (i = 0; i < sblock.fs_frag; i++) 586 if (isset(blksfree, fragno + i)) 587 cgp->cg_cs.cs_nffree++; 588 } 589 /* 590 * Update the superblock cg summary from our now correct values 591 * before writing the block. 592 */ 593 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 594 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 595 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 596 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 597 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 598 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 599 } 600 601 void 602 rwerror(const char *mesg, ufs2_daddr_t blk) 603 { 604 605 if (bkgrdcheck) 606 exit(EEXIT); 607 if (preen == 0) 608 printf("\n"); 609 pfatal("CANNOT %s: %ld", mesg, (long)blk); 610 if (reply("CONTINUE") == 0) 611 exit(EEXIT); 612 } 613 614 void 615 ckfini(int markclean) 616 { 617 struct bufarea *bp, *nbp; 618 int ofsmodified, cnt, cg; 619 620 if (bkgrdflag) { 621 unlink(snapname); 622 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 623 cmd.value = FS_UNCLEAN; 624 cmd.size = markclean ? -1 : 1; 625 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 626 &cmd, sizeof cmd) == -1) 627 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 628 if (!preen) { 629 printf("\n***** FILE SYSTEM MARKED %s *****\n", 630 markclean ? "CLEAN" : "DIRTY"); 631 if (!markclean) 632 rerun = 1; 633 } 634 } else if (!preen && !markclean) { 635 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 636 rerun = 1; 637 } 638 bkgrdflag = 0; 639 } 640 if (debug && cachelookups > 0) 641 printf("cache with %d buffers missed %d of %d (%d%%)\n", 642 numbufs, cachereads, cachelookups, 643 (int)(cachereads * 100 / cachelookups)); 644 if (fswritefd < 0) { 645 (void)close(fsreadfd); 646 return; 647 } 648 649 /* 650 * To remain idempotent with partial truncations the buffers 651 * must be flushed in this order: 652 * 1) cylinder groups (bitmaps) 653 * 2) indirect, directory, external attribute, and data blocks 654 * 3) inode blocks 655 * 4) superblock 656 * This ordering preserves access to the modified pointers 657 * until they are freed. 658 */ 659 /* Step 1: cylinder groups */ 660 if (debug) 661 printf("Flush Cylinder groups\n"); 662 if (cgbufs != NULL) { 663 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 664 if (cgbufs[cnt].b_un.b_cg == NULL) 665 continue; 666 flush(fswritefd, &cgbufs[cnt]); 667 free(cgbufs[cnt].b_un.b_cg); 668 } 669 free(cgbufs); 670 cgbufs = NULL; 671 } 672 flush(fswritefd, &cgblk); 673 free(cgblk.b_un.b_buf); 674 cgblk.b_un.b_buf = NULL; 675 cnt = 0; 676 /* Step 2: indirect, directory, external attribute, and data blocks */ 677 if (debug) 678 printf("Flush indirect, directory, external attribute, " 679 "and data blocks\n"); 680 if (pdirbp != NULL) { 681 brelse(pdirbp); 682 pdirbp = NULL; 683 } 684 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 685 switch (bp->b_type) { 686 /* These should not be in the buffer cache list */ 687 case BT_UNKNOWN: 688 case BT_SUPERBLK: 689 case BT_CYLGRP: 690 default: 691 prtbuf(bp,"ckfini: improper buffer type on cache list"); 692 continue; 693 /* These are the ones to flush in this step */ 694 case BT_LEVEL1: 695 case BT_LEVEL2: 696 case BT_LEVEL3: 697 case BT_EXTATTR: 698 case BT_DIRDATA: 699 case BT_DATA: 700 break; 701 /* These are the ones to flush in the next step */ 702 case BT_INODES: 703 continue; 704 } 705 if (debug && bp->b_refcnt != 0) 706 prtbuf(bp, "ckfini: clearing in-use buffer"); 707 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 708 LIST_REMOVE(bp, b_hash); 709 cnt++; 710 flush(fswritefd, bp); 711 free(bp->b_un.b_buf); 712 free((char *)bp); 713 } 714 /* Step 3: inode blocks */ 715 if (debug) 716 printf("Flush inode blocks\n"); 717 if (icachebp != NULL) { 718 brelse(icachebp); 719 icachebp = NULL; 720 } 721 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 722 if (debug && bp->b_refcnt != 0) 723 prtbuf(bp, "ckfini: clearing in-use buffer"); 724 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 725 LIST_REMOVE(bp, b_hash); 726 cnt++; 727 flush(fswritefd, bp); 728 free(bp->b_un.b_buf); 729 free((char *)bp); 730 } 731 if (numbufs != cnt) 732 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 733 /* Step 4: superblock */ 734 if (debug) 735 printf("Flush the superblock\n"); 736 flush(fswritefd, &sblk); 737 if (havesb && cursnapshot == 0 && 738 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 739 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 740 /* Change write destination to standard superblock */ 741 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 742 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 743 sbdirty(); 744 flush(fswritefd, &sblk); 745 } else { 746 markclean = 0; 747 } 748 } 749 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 750 if ((sblock.fs_clean = markclean) != 0) { 751 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 752 sblock.fs_pendingblocks = 0; 753 sblock.fs_pendinginodes = 0; 754 } 755 sbdirty(); 756 ofsmodified = fsmodified; 757 flush(fswritefd, &sblk); 758 fsmodified = ofsmodified; 759 if (!preen) { 760 printf("\n***** FILE SYSTEM MARKED %s *****\n", 761 markclean ? "CLEAN" : "DIRTY"); 762 if (!markclean) 763 rerun = 1; 764 } 765 } else if (!preen) { 766 if (markclean) { 767 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 768 } else { 769 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 770 rerun = 1; 771 } 772 } 773 /* 774 * Free allocated tracking structures. 775 */ 776 if (blockmap != NULL) 777 free(blockmap); 778 blockmap = NULL; 779 if (inostathead != NULL) { 780 for (cg = 0; cg < sblock.fs_ncg; cg++) 781 if (inostathead[cg].il_stat != NULL) 782 free((char *)inostathead[cg].il_stat); 783 free(inostathead); 784 } 785 inostathead = NULL; 786 inocleanup(); 787 finalIOstats(); 788 (void)close(fsreadfd); 789 (void)close(fswritefd); 790 } 791 792 /* 793 * Print out I/O statistics. 794 */ 795 void 796 IOstats(char *what) 797 { 798 int i; 799 800 if (debug == 0) 801 return; 802 if (diskreads == 0) { 803 printf("%s: no I/O\n\n", what); 804 return; 805 } 806 if (startpass.tv_sec == 0) 807 startpass = startprog; 808 printf("%s: I/O statistics\n", what); 809 printIOstats(); 810 totaldiskreads += diskreads; 811 diskreads = 0; 812 for (i = 0; i < BT_NUMBUFTYPES; i++) { 813 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 814 totalreadcnt[i] += readcnt[i]; 815 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 816 readcnt[i] = 0; 817 } 818 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 819 } 820 821 void 822 finalIOstats(void) 823 { 824 int i; 825 826 if (debug == 0) 827 return; 828 printf("Final I/O statistics\n"); 829 totaldiskreads += diskreads; 830 diskreads = totaldiskreads; 831 startpass = startprog; 832 for (i = 0; i < BT_NUMBUFTYPES; i++) { 833 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 834 totalreadcnt[i] += readcnt[i]; 835 readtime[i] = totalreadtime[i]; 836 readcnt[i] = totalreadcnt[i]; 837 } 838 printIOstats(); 839 } 840 841 static void printIOstats(void) 842 { 843 long long msec, totalmsec; 844 int i; 845 846 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 847 timespecsub(&finishpass, &startpass, &finishpass); 848 printf("Running time: %jd.%03ld sec\n", 849 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 850 printf("buffer reads by type:\n"); 851 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 852 totalmsec += readtime[i].tv_sec * 1000 + 853 readtime[i].tv_nsec / 1000000; 854 if (totalmsec == 0) 855 totalmsec = 1; 856 for (i = 0; i < BT_NUMBUFTYPES; i++) { 857 if (readcnt[i] == 0) 858 continue; 859 msec = 860 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 861 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 862 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 863 (readcnt[i] * 1000 / diskreads) % 10, 864 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 865 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 866 } 867 printf("\n"); 868 } 869 870 int 871 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 872 { 873 char *cp; 874 int i, errs; 875 off_t offset; 876 877 offset = blk; 878 offset *= dev_bsize; 879 if (bkgrdflag) 880 slowio_start(); 881 totalreads++; 882 diskreads++; 883 if (pread(fd, buf, (int)size, offset) == size) { 884 if (bkgrdflag) 885 slowio_end(); 886 return (0); 887 } 888 889 /* 890 * This is handled specially here instead of in rwerror because 891 * rwerror is used for all sorts of errors, not just true read/write 892 * errors. It should be refactored and fixed. 893 */ 894 if (surrender) { 895 pfatal("CANNOT READ_BLK: %ld", (long)blk); 896 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 897 } else 898 rwerror("READ BLK", blk); 899 900 errs = 0; 901 memset(buf, 0, (size_t)size); 902 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 903 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 904 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 905 if (secsize != dev_bsize && dev_bsize != 1) 906 printf(" %jd (%jd),", 907 (intmax_t)(blk * dev_bsize + i) / secsize, 908 (intmax_t)blk + i / dev_bsize); 909 else 910 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 911 errs++; 912 } 913 } 914 printf("\n"); 915 if (errs) 916 resolved = 0; 917 return (errs); 918 } 919 920 void 921 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 922 { 923 int i; 924 char *cp; 925 off_t offset; 926 927 if (fd < 0) 928 return; 929 offset = blk; 930 offset *= dev_bsize; 931 if (pwrite(fd, buf, size, offset) == size) { 932 fsmodified = 1; 933 return; 934 } 935 resolved = 0; 936 rwerror("WRITE BLK", blk); 937 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 938 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 939 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 940 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 941 printf("\n"); 942 return; 943 } 944 945 void 946 blerase(int fd, ufs2_daddr_t blk, long size) 947 { 948 off_t ioarg[2]; 949 950 if (fd < 0) 951 return; 952 ioarg[0] = blk * dev_bsize; 953 ioarg[1] = size; 954 ioctl(fd, DIOCGDELETE, ioarg); 955 /* we don't really care if we succeed or not */ 956 return; 957 } 958 959 /* 960 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 961 * definition a multiple of dev_bsize. 962 */ 963 void 964 blzero(int fd, ufs2_daddr_t blk, long size) 965 { 966 static char *zero; 967 off_t offset, len; 968 969 if (fd < 0) 970 return; 971 if (zero == NULL) { 972 zero = calloc(ZEROBUFSIZE, 1); 973 if (zero == NULL) 974 errx(EEXIT, "cannot allocate buffer pool"); 975 } 976 offset = blk * dev_bsize; 977 if (lseek(fd, offset, 0) < 0) 978 rwerror("SEEK BLK", blk); 979 while (size > 0) { 980 len = MIN(ZEROBUFSIZE, size); 981 if (write(fd, zero, len) != len) 982 rwerror("WRITE BLK", blk); 983 blk += len / dev_bsize; 984 size -= len; 985 } 986 } 987 988 /* 989 * Verify cylinder group's magic number and other parameters. If the 990 * test fails, offer an option to rebuild the whole cylinder group. 991 * 992 * Return 1 if the cylinder group is good or return 0 if it is bad. 993 */ 994 #undef CHK 995 #define CHK(lhs, op, rhs, fmt) \ 996 if (lhs op rhs) { \ 997 pwarn("UFS%d cylinder group %d failed: " \ 998 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 999 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 1000 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 1001 error = 1; \ 1002 } 1003 int 1004 check_cgmagic(int cg, struct bufarea *cgbp) 1005 { 1006 struct cg *cgp = cgbp->b_un.b_cg; 1007 uint32_t cghash, calchash; 1008 static int prevfailcg = -1; 1009 long start; 1010 int error; 1011 1012 /* 1013 * Extended cylinder group checks. 1014 */ 1015 calchash = cgp->cg_ckhash; 1016 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1017 (ckhashadd & CK_CYLGRP) == 0) { 1018 cghash = cgp->cg_ckhash; 1019 cgp->cg_ckhash = 0; 1020 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1021 cgp->cg_ckhash = cghash; 1022 } 1023 error = 0; 1024 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1025 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1026 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1027 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1028 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1029 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1030 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1031 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1032 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1033 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1034 } 1035 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1036 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1037 } else { 1038 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1039 "%jd"); 1040 } 1041 start = sizeof(*cgp); 1042 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1043 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1044 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1045 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1046 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1047 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1048 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1049 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1050 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1051 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1052 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1053 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1054 } 1055 CHK(cgp->cg_freeoff, !=, 1056 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1057 if (sblock.fs_contigsumsize == 0) { 1058 CHK(cgp->cg_nextfreeoff, !=, 1059 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1060 } else { 1061 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1062 "%jd"); 1063 CHK(cgp->cg_clustersumoff, !=, 1064 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1065 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1066 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1067 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1068 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1069 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1070 "%jd"); 1071 } 1072 if (error == 0) 1073 return (1); 1074 if (prevfailcg == cg) 1075 return (0); 1076 prevfailcg = cg; 1077 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1078 printf("\n"); 1079 return (0); 1080 } 1081 1082 void 1083 rebuild_cg(int cg, struct bufarea *cgbp) 1084 { 1085 struct cg *cgp = cgbp->b_un.b_cg; 1086 long start; 1087 1088 /* 1089 * Zero out the cylinder group and then initialize critical fields. 1090 * Bit maps and summaries will be recalculated by later passes. 1091 */ 1092 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1093 cgp->cg_magic = CG_MAGIC; 1094 cgp->cg_cgx = cg; 1095 cgp->cg_niblk = sblock.fs_ipg; 1096 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1097 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1098 cgp->cg_ndblk = sblock.fs_fpg; 1099 else 1100 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1101 start = sizeof(*cgp); 1102 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1103 cgp->cg_iusedoff = start; 1104 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1105 cgp->cg_niblk = 0; 1106 cgp->cg_initediblk = 0; 1107 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1108 cgp->cg_old_niblk = sblock.fs_ipg; 1109 cgp->cg_old_btotoff = start; 1110 cgp->cg_old_boff = cgp->cg_old_btotoff + 1111 sblock.fs_old_cpg * sizeof(int32_t); 1112 cgp->cg_iusedoff = cgp->cg_old_boff + 1113 sblock.fs_old_cpg * sizeof(u_int16_t); 1114 } 1115 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1116 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1117 if (sblock.fs_contigsumsize > 0) { 1118 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1119 cgp->cg_clustersumoff = 1120 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1121 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1122 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1123 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1124 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1125 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1126 } 1127 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1128 cgdirty(cgbp); 1129 } 1130 1131 /* 1132 * allocate a data block with the specified number of fragments 1133 */ 1134 ufs2_daddr_t 1135 allocblk(long startcg, long frags, 1136 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1137 { 1138 ufs2_daddr_t blkno, newblk; 1139 1140 if (sujrecovery && checkblkavail == std_checkblkavail) { 1141 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1142 return (0); 1143 } 1144 if (frags <= 0 || frags > sblock.fs_frag) 1145 return (0); 1146 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1147 blkno < maxfsblock - sblock.fs_frag; 1148 blkno += sblock.fs_frag) { 1149 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1150 continue; 1151 if (newblk > 0) 1152 return (newblk); 1153 if (newblk < 0) 1154 blkno = -newblk; 1155 } 1156 for (blkno = MAX(cgdata(&sblock, 0), 0); 1157 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1158 blkno += sblock.fs_frag) { 1159 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1160 continue; 1161 if (newblk > 0) 1162 return (newblk); 1163 if (newblk < 0) 1164 blkno = -newblk; 1165 } 1166 return (0); 1167 } 1168 1169 ufs2_daddr_t 1170 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1171 { 1172 struct bufarea *cgbp; 1173 struct cg *cgp; 1174 ufs2_daddr_t j, k, baseblk; 1175 long cg; 1176 1177 if ((u_int64_t)blkno > sblock.fs_size) 1178 return (0); 1179 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1180 if (testbmap(blkno + j)) 1181 continue; 1182 for (k = 1; k < frags; k++) 1183 if (testbmap(blkno + j + k)) 1184 break; 1185 if (k < frags) { 1186 j += k; 1187 continue; 1188 } 1189 cg = dtog(&sblock, blkno + j); 1190 cgbp = cglookup(cg); 1191 cgp = cgbp->b_un.b_cg; 1192 if (!check_cgmagic(cg, cgbp)) 1193 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1194 baseblk = dtogd(&sblock, blkno + j); 1195 for (k = 0; k < frags; k++) { 1196 setbmap(blkno + j + k); 1197 clrbit(cg_blksfree(cgp), baseblk + k); 1198 } 1199 n_blks += frags; 1200 if (frags == sblock.fs_frag) 1201 cgp->cg_cs.cs_nbfree--; 1202 else 1203 cgp->cg_cs.cs_nffree -= frags; 1204 cgdirty(cgbp); 1205 return (blkno + j); 1206 } 1207 return (0); 1208 } 1209 1210 /* 1211 * Check whether a file size is within the limits for the filesystem. 1212 * Return 1 when valid and 0 when too big. 1213 * 1214 * This should match the file size limit in ffs_mountfs(). 1215 */ 1216 int 1217 chkfilesize(mode_t mode, u_int64_t filesize) 1218 { 1219 u_int64_t kernmaxfilesize; 1220 1221 if (sblock.fs_magic == FS_UFS1_MAGIC) 1222 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1223 else 1224 kernmaxfilesize = sblock.fs_maxfilesize; 1225 if (filesize > kernmaxfilesize || 1226 filesize > sblock.fs_maxfilesize || 1227 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1228 if (debug) 1229 printf("bad file size %ju:", (uintmax_t)filesize); 1230 return (0); 1231 } 1232 return (1); 1233 } 1234 1235 /* 1236 * Slow down IO so as to leave some disk bandwidth for other processes 1237 */ 1238 void 1239 slowio_start() 1240 { 1241 1242 /* Delay one in every 8 operations */ 1243 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1244 if (slowio_pollcnt == 0) { 1245 gettimeofday(&slowio_starttime, NULL); 1246 } 1247 } 1248 1249 void 1250 slowio_end() 1251 { 1252 struct timeval tv; 1253 int delay_usec; 1254 1255 if (slowio_pollcnt != 0) 1256 return; 1257 1258 /* Update the slowdown interval. */ 1259 gettimeofday(&tv, NULL); 1260 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1261 (tv.tv_usec - slowio_starttime.tv_usec); 1262 if (delay_usec < 64) 1263 delay_usec = 64; 1264 if (delay_usec > 2500000) 1265 delay_usec = 2500000; 1266 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1267 /* delay by 8 times the average IO delay */ 1268 if (slowio_delay_usec > 64) 1269 usleep(slowio_delay_usec * 8); 1270 } 1271 1272 /* 1273 * Find a pathname 1274 */ 1275 void 1276 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1277 { 1278 int len; 1279 char *cp; 1280 struct inode ip; 1281 struct inodesc idesc; 1282 static int busy = 0; 1283 1284 if (curdir == ino && ino == UFS_ROOTINO) { 1285 (void)strcpy(namebuf, "/"); 1286 return; 1287 } 1288 if (busy || !INO_IS_DVALID(curdir)) { 1289 (void)strcpy(namebuf, "?"); 1290 return; 1291 } 1292 busy = 1; 1293 memset(&idesc, 0, sizeof(struct inodesc)); 1294 idesc.id_type = DATA; 1295 idesc.id_fix = IGNORE; 1296 cp = &namebuf[MAXPATHLEN - 1]; 1297 *cp = '\0'; 1298 if (curdir != ino) { 1299 idesc.id_parent = curdir; 1300 goto namelookup; 1301 } 1302 while (ino != UFS_ROOTINO) { 1303 idesc.id_number = ino; 1304 idesc.id_func = findino; 1305 idesc.id_name = strdup(".."); 1306 ginode(ino, &ip); 1307 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1308 irelse(&ip); 1309 free(idesc.id_name); 1310 break; 1311 } 1312 irelse(&ip); 1313 free(idesc.id_name); 1314 namelookup: 1315 idesc.id_number = idesc.id_parent; 1316 idesc.id_parent = ino; 1317 idesc.id_func = findname; 1318 idesc.id_name = namebuf; 1319 ginode(idesc.id_number, &ip); 1320 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1321 irelse(&ip); 1322 break; 1323 } 1324 irelse(&ip); 1325 len = strlen(namebuf); 1326 cp -= len; 1327 memmove(cp, namebuf, (size_t)len); 1328 *--cp = '/'; 1329 if (cp < &namebuf[UFS_MAXNAMLEN]) 1330 break; 1331 ino = idesc.id_number; 1332 } 1333 busy = 0; 1334 if (ino != UFS_ROOTINO) 1335 *--cp = '?'; 1336 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1337 } 1338 1339 void 1340 catch(int sig __unused) 1341 { 1342 1343 ckfini(0); 1344 exit(12); 1345 } 1346 1347 /* 1348 * When preening, allow a single quit to signal 1349 * a special exit after file system checks complete 1350 * so that reboot sequence may be interrupted. 1351 */ 1352 void 1353 catchquit(int sig __unused) 1354 { 1355 printf("returning to single-user after file system check\n"); 1356 returntosingle = 1; 1357 (void)signal(SIGQUIT, SIG_DFL); 1358 } 1359 1360 /* 1361 * determine whether an inode should be fixed. 1362 */ 1363 int 1364 dofix(struct inodesc *idesc, const char *msg) 1365 { 1366 1367 switch (idesc->id_fix) { 1368 1369 case DONTKNOW: 1370 if (idesc->id_type == DATA) 1371 direrror(idesc->id_number, msg); 1372 else 1373 pwarn("%s", msg); 1374 if (preen) { 1375 printf(" (SALVAGED)\n"); 1376 idesc->id_fix = FIX; 1377 return (ALTERED); 1378 } 1379 if (reply("SALVAGE") == 0) { 1380 idesc->id_fix = NOFIX; 1381 return (0); 1382 } 1383 idesc->id_fix = FIX; 1384 return (ALTERED); 1385 1386 case FIX: 1387 return (ALTERED); 1388 1389 case NOFIX: 1390 case IGNORE: 1391 return (0); 1392 1393 default: 1394 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1395 } 1396 /* NOTREACHED */ 1397 return (0); 1398 } 1399 1400 #include <stdarg.h> 1401 1402 /* 1403 * Print details about a buffer. 1404 */ 1405 void 1406 prtbuf(struct bufarea *bp, const char *fmt, ...) 1407 { 1408 va_list ap; 1409 va_start(ap, fmt); 1410 if (preen) 1411 (void)fprintf(stdout, "%s: ", cdevname); 1412 (void)vfprintf(stdout, fmt, ap); 1413 va_end(ap); 1414 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1415 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1416 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1417 (intmax_t) bp->b_index); 1418 } 1419 1420 /* 1421 * An unexpected inconsistency occurred. 1422 * Die if preening or file system is running with soft dependency protocol, 1423 * otherwise just print message and continue. 1424 */ 1425 void 1426 pfatal(const char *fmt, ...) 1427 { 1428 va_list ap; 1429 va_start(ap, fmt); 1430 if (!preen) { 1431 (void)vfprintf(stdout, fmt, ap); 1432 va_end(ap); 1433 if (usedsoftdep) 1434 (void)fprintf(stdout, 1435 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1436 /* 1437 * Force foreground fsck to clean up inconsistency. 1438 */ 1439 if (bkgrdflag) { 1440 cmd.value = FS_NEEDSFSCK; 1441 cmd.size = 1; 1442 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1443 &cmd, sizeof cmd) == -1) 1444 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1445 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1446 ckfini(0); 1447 exit(EEXIT); 1448 } 1449 return; 1450 } 1451 if (cdevname == NULL) 1452 cdevname = strdup("fsck"); 1453 (void)fprintf(stdout, "%s: ", cdevname); 1454 (void)vfprintf(stdout, fmt, ap); 1455 (void)fprintf(stdout, 1456 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1457 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1458 /* 1459 * Force foreground fsck to clean up inconsistency. 1460 */ 1461 if (bkgrdflag) { 1462 cmd.value = FS_NEEDSFSCK; 1463 cmd.size = 1; 1464 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1465 &cmd, sizeof cmd) == -1) 1466 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1467 } 1468 ckfini(0); 1469 exit(EEXIT); 1470 } 1471 1472 /* 1473 * Pwarn just prints a message when not preening or running soft dependency 1474 * protocol, or a warning (preceded by filename) when preening. 1475 */ 1476 void 1477 pwarn(const char *fmt, ...) 1478 { 1479 va_list ap; 1480 va_start(ap, fmt); 1481 if (preen) 1482 (void)fprintf(stdout, "%s: ", cdevname); 1483 (void)vfprintf(stdout, fmt, ap); 1484 va_end(ap); 1485 } 1486 1487 /* 1488 * Stub for routines from kernel. 1489 */ 1490 void 1491 panic(const char *fmt, ...) 1492 { 1493 va_list ap; 1494 va_start(ap, fmt); 1495 pfatal("INTERNAL INCONSISTENCY:"); 1496 (void)vfprintf(stdout, fmt, ap); 1497 va_end(ap); 1498 exit(EEXIT); 1499 } 1500