1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; 35 #endif /* not lint */ 36 #endif 37 #include <sys/cdefs.h> 38 #include <sys/param.h> 39 #include <sys/time.h> 40 #include <sys/types.h> 41 #include <sys/sysctl.h> 42 #include <sys/disk.h> 43 #include <sys/disklabel.h> 44 #include <sys/ioctl.h> 45 #include <sys/stat.h> 46 47 #include <ufs/ufs/dinode.h> 48 #include <ufs/ufs/dir.h> 49 #include <ufs/ffs/fs.h> 50 51 #include <err.h> 52 #include <errno.h> 53 #include <string.h> 54 #include <ctype.h> 55 #include <fstab.h> 56 #include <stdint.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <time.h> 60 #include <unistd.h> 61 #include <libufs.h> 62 63 #include "fsck.h" 64 65 int sujrecovery = 0; 66 67 static struct bufarea *allocbuf(const char *); 68 static void cg_write(struct bufarea *); 69 static void slowio_start(void); 70 static void slowio_end(void); 71 static void printIOstats(void); 72 73 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 74 static struct timespec startpass, finishpass; 75 struct timeval slowio_starttime; 76 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 77 int slowio_pollcnt; 78 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 79 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 80 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 81 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 82 static struct bufhash freebufs; /* unused buffers */ 83 static int numbufs; /* size of buffer cache */ 84 static int cachelookups; /* number of cache lookups */ 85 static int cachereads; /* number of cache reads */ 86 static int flushtries; /* number of tries to reclaim memory */ 87 88 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 89 90 void 91 fsutilinit(void) 92 { 93 diskreads = totaldiskreads = totalreads = 0; 94 bzero(&startpass, sizeof(struct timespec)); 95 bzero(&finishpass, sizeof(struct timespec)); 96 bzero(&slowio_starttime, sizeof(struct timeval)); 97 slowio_delay_usec = 10000; 98 slowio_pollcnt = 0; 99 flushtries = 0; 100 } 101 102 int 103 ftypeok(union dinode *dp) 104 { 105 switch (DIP(dp, di_mode) & IFMT) { 106 107 case IFDIR: 108 case IFREG: 109 case IFBLK: 110 case IFCHR: 111 case IFLNK: 112 case IFSOCK: 113 case IFIFO: 114 return (1); 115 116 default: 117 if (debug) 118 printf("bad file type 0%o\n", DIP(dp, di_mode)); 119 return (0); 120 } 121 } 122 123 int 124 reply(const char *question) 125 { 126 int persevere; 127 char c; 128 129 if (preen) 130 pfatal("INTERNAL ERROR: GOT TO reply()"); 131 persevere = strcmp(question, "CONTINUE") == 0 || 132 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 133 printf("\n"); 134 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 135 printf("%s? no\n\n", question); 136 resolved = 0; 137 return (0); 138 } 139 if (yflag || (persevere && nflag)) { 140 printf("%s? yes\n\n", question); 141 return (1); 142 } 143 do { 144 printf("%s? [yn] ", question); 145 (void) fflush(stdout); 146 c = getc(stdin); 147 while (c != '\n' && getc(stdin) != '\n') { 148 if (feof(stdin)) { 149 resolved = 0; 150 return (0); 151 } 152 } 153 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 154 printf("\n"); 155 if (c == 'y' || c == 'Y') 156 return (1); 157 resolved = 0; 158 return (0); 159 } 160 161 /* 162 * Look up state information for an inode. 163 */ 164 struct inostat * 165 inoinfo(ino_t inum) 166 { 167 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 168 struct inostatlist *ilp; 169 int iloff; 170 171 if (inum >= maxino) 172 errx(EEXIT, "inoinfo: inumber %ju out of range", 173 (uintmax_t)inum); 174 ilp = &inostathead[inum / sblock.fs_ipg]; 175 iloff = inum % sblock.fs_ipg; 176 if (iloff >= ilp->il_numalloced) 177 return (&unallocated); 178 return (&ilp->il_stat[iloff]); 179 } 180 181 /* 182 * Malloc buffers and set up cache. 183 */ 184 void 185 bufinit(void) 186 { 187 int i; 188 189 initbarea(&failedbuf, BT_UNKNOWN); 190 failedbuf.b_errs = -1; 191 failedbuf.b_un.b_buf = NULL; 192 if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) 193 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 194 initbarea(&cgblk, BT_CYLGRP); 195 numbufs = cachelookups = cachereads = 0; 196 TAILQ_INIT(&bufqueuehd); 197 LIST_INIT(&freebufs); 198 for (i = 0; i < HASHSIZE; i++) 199 LIST_INIT(&bufhashhd[i]); 200 for (i = 0; i < BT_NUMBUFTYPES; i++) { 201 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 202 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 203 readcnt[i] = totalreadcnt[i] = 0; 204 } 205 } 206 207 static struct bufarea * 208 allocbuf(const char *failreason) 209 { 210 struct bufarea *bp; 211 char *bufp; 212 213 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 214 bufp = Malloc((unsigned int)sblock.fs_bsize); 215 if (bp == NULL || bufp == NULL) { 216 errx(EEXIT, "%s", failreason); 217 /* NOTREACHED */ 218 } 219 numbufs++; 220 bp->b_un.b_buf = bufp; 221 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 222 initbarea(bp, BT_UNKNOWN); 223 return (bp); 224 } 225 226 /* 227 * Manage cylinder group buffers. 228 * 229 * Use getblk() here rather than cgget() because the cylinder group 230 * may be corrupted but we want it anyway so we can fix it. 231 */ 232 static struct bufarea *cgbufs; /* header for cylinder group cache */ 233 static int flushtries; /* number of tries to reclaim memory */ 234 235 struct bufarea * 236 cglookup(int cg) 237 { 238 struct bufarea *cgbp; 239 struct cg *cgp; 240 241 if ((unsigned) cg >= sblock.fs_ncg) 242 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 243 if (cgbufs == NULL) { 244 cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); 245 if (cgbufs == NULL) 246 errx(EEXIT, "Cannot allocate cylinder group buffers"); 247 } 248 cgbp = &cgbufs[cg]; 249 if (cgbp->b_un.b_cg != NULL) 250 return (cgbp); 251 cgp = NULL; 252 if (flushtries == 0) 253 cgp = Malloc((unsigned int)sblock.fs_cgsize); 254 if (cgp == NULL) { 255 if (sujrecovery) 256 errx(EEXIT,"Ran out of memory during journal recovery"); 257 flush(fswritefd, &cgblk); 258 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 259 return (&cgblk); 260 } 261 cgbp->b_un.b_cg = cgp; 262 initbarea(cgbp, BT_CYLGRP); 263 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 264 return (cgbp); 265 } 266 267 /* 268 * Mark a cylinder group buffer as dirty. 269 * Update its check-hash if they are enabled. 270 */ 271 void 272 cgdirty(struct bufarea *cgbp) 273 { 274 struct cg *cg; 275 276 cg = cgbp->b_un.b_cg; 277 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 278 cg->cg_ckhash = 0; 279 cg->cg_ckhash = 280 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 281 } 282 dirty(cgbp); 283 } 284 285 /* 286 * Attempt to flush a cylinder group cache entry. 287 * Return whether the flush was successful. 288 */ 289 int 290 flushentry(void) 291 { 292 struct bufarea *cgbp; 293 294 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 295 return (0); 296 cgbp = &cgbufs[flushtries++]; 297 if (cgbp->b_un.b_cg == NULL) 298 return (0); 299 flush(fswritefd, cgbp); 300 free(cgbp->b_un.b_buf); 301 cgbp->b_un.b_buf = NULL; 302 return (1); 303 } 304 305 /* 306 * Manage a cache of filesystem disk blocks. 307 */ 308 struct bufarea * 309 getdatablk(ufs2_daddr_t blkno, long size, int type) 310 { 311 struct bufarea *bp; 312 struct bufhash *bhdp; 313 314 cachelookups++; 315 /* 316 * If out of range, return empty buffer with b_err == -1 317 * 318 * Skip check for inodes because chkrange() considers 319 * metadata areas invalid to write data. 320 */ 321 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 322 failedbuf.b_refcnt++; 323 return (&failedbuf); 324 } 325 bhdp = &bufhashhd[HASH(blkno)]; 326 LIST_FOREACH(bp, bhdp, b_hash) 327 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 328 if (debug && bp->b_size != size) { 329 prtbuf(bp, "getdatablk: size mismatch"); 330 pfatal("getdatablk: b_size %d != size %ld\n", 331 bp->b_size, size); 332 } 333 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 334 goto foundit; 335 } 336 /* 337 * Move long-term busy buffer back to the front of the LRU so we 338 * do not endless inspect them for recycling. 339 */ 340 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 341 if (bp != NULL && bp->b_refcnt != 0) { 342 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 343 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 344 } 345 /* 346 * Allocate up to the minimum number of buffers before 347 * considering recycling any of them. 348 */ 349 if (size > sblock.fs_bsize) 350 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 351 sblock.fs_bsize); 352 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 353 LIST_REMOVE(bp, b_hash); 354 } else if (numbufs < MINBUFS) { 355 bp = allocbuf("cannot create minimal buffer pool"); 356 } else if (sujrecovery) { 357 /* 358 * SUJ recovery does not want anything written until it 359 * has successfully completed (so it can fail back to 360 * full fsck). Thus, we can only recycle clean buffers. 361 */ 362 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 363 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 364 break; 365 if (bp == NULL) 366 bp = allocbuf("Ran out of memory during " 367 "journal recovery"); 368 else 369 LIST_REMOVE(bp, b_hash); 370 } else { 371 /* 372 * Recycle oldest non-busy buffer. 373 */ 374 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 375 if (bp->b_refcnt == 0) 376 break; 377 if (bp == NULL) 378 bp = allocbuf("Ran out of memory for buffers"); 379 else 380 LIST_REMOVE(bp, b_hash); 381 } 382 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 383 flush(fswritefd, bp); 384 bp->b_type = type; 385 LIST_INSERT_HEAD(bhdp, bp, b_hash); 386 getblk(bp, blkno, size); 387 cachereads++; 388 /* fall through */ 389 foundit: 390 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 391 if (debug && bp->b_type != type) { 392 printf("getdatablk: buffer type changed to %s", 393 BT_BUFTYPE(type)); 394 prtbuf(bp, ""); 395 } 396 if (bp->b_errs == 0) 397 bp->b_refcnt++; 398 return (bp); 399 } 400 401 void 402 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 403 { 404 ufs2_daddr_t dblk; 405 struct timespec start, finish; 406 407 dblk = fsbtodb(&sblock, blk); 408 if (bp->b_bno == dblk) { 409 totalreads++; 410 } else { 411 if (debug) { 412 readcnt[bp->b_type]++; 413 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 414 } 415 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 416 if (debug) { 417 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 418 timespecsub(&finish, &start, &finish); 419 timespecadd(&readtime[bp->b_type], &finish, 420 &readtime[bp->b_type]); 421 } 422 bp->b_bno = dblk; 423 bp->b_size = size; 424 } 425 } 426 427 void 428 brelse(struct bufarea *bp) 429 { 430 431 if (bp->b_refcnt <= 0) 432 prtbuf(bp, "brelse: buffer with negative reference count"); 433 bp->b_refcnt--; 434 } 435 436 void 437 binval(struct bufarea *bp) 438 { 439 440 bp->b_flags &= ~B_DIRTY; 441 LIST_REMOVE(bp, b_hash); 442 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 443 } 444 445 void 446 flush(int fd, struct bufarea *bp) 447 { 448 struct inode ip; 449 450 if ((bp->b_flags & B_DIRTY) == 0) 451 return; 452 bp->b_flags &= ~B_DIRTY; 453 if (fswritefd < 0) { 454 pfatal("WRITING IN READ_ONLY MODE.\n"); 455 return; 456 } 457 if (bp->b_errs != 0) 458 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 459 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 460 (long long)bp->b_bno); 461 bp->b_errs = 0; 462 /* 463 * Write using the appropriate function. 464 */ 465 switch (bp->b_type) { 466 case BT_SUPERBLK: 467 if (bp != &sblk) 468 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 469 bp, &sblk); 470 /* 471 * Superblocks are always pre-copied so we do not need 472 * to check them for copy-on-write. 473 */ 474 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 475 fsmodified = 1; 476 break; 477 case BT_CYLGRP: 478 /* 479 * Cylinder groups are always pre-copied so we do not 480 * need to check them for copy-on-write. 481 */ 482 if (sujrecovery) 483 cg_write(bp); 484 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 485 fsmodified = 1; 486 break; 487 case BT_INODES: 488 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 489 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 490 int i; 491 492 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 493 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 494 continue; 495 pwarn("flush: INODE CHECK-HASH FAILED"); 496 ip.i_bp = bp; 497 ip.i_dp = (union dinode *)dp; 498 ip.i_number = bp->b_index + (i / sizeof(*dp)); 499 prtinode(&ip); 500 if (preen || reply("FIX") != 0) { 501 if (preen) 502 printf(" (FIXED)\n"); 503 ffs_update_dinode_ckhash(&sblock, dp); 504 inodirty(&ip); 505 } 506 } 507 } 508 /* FALLTHROUGH */ 509 default: 510 copyonwrite(&sblock, bp, std_checkblkavail); 511 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 512 break; 513 } 514 } 515 516 /* 517 * If there are any snapshots, ensure that all the blocks that they 518 * care about have been copied, then release the snapshot inodes. 519 * These operations need to be done before we rebuild the cylinder 520 * groups so that any block allocations are properly recorded. 521 * Since all the cylinder group maps have already been copied in 522 * the snapshots, no further snapshot copies will need to be done. 523 */ 524 void 525 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 526 { 527 struct bufarea *bp; 528 int cnt; 529 530 if (snapcnt > 0) { 531 if (debug) 532 printf("Check for snapshot copies\n"); 533 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 534 if ((bp->b_flags & B_DIRTY) != 0) 535 copyonwrite(&sblock, bp, checkblkavail); 536 for (cnt = 0; cnt < snapcnt; cnt++) 537 irelse(&snaplist[cnt]); 538 snapcnt = 0; 539 } 540 } 541 542 /* 543 * Journaled soft updates does not maintain cylinder group summary 544 * information during cleanup, so this routine recalculates the summary 545 * information and updates the superblock summary in preparation for 546 * writing out the cylinder group. 547 */ 548 static void 549 cg_write(struct bufarea *bp) 550 { 551 ufs1_daddr_t fragno, cgbno, maxbno; 552 u_int8_t *blksfree; 553 struct csum *csp; 554 struct cg *cgp; 555 int blk; 556 int i; 557 558 /* 559 * Fix the frag and cluster summary. 560 */ 561 cgp = bp->b_un.b_cg; 562 cgp->cg_cs.cs_nbfree = 0; 563 cgp->cg_cs.cs_nffree = 0; 564 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 565 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 566 if (sblock.fs_contigsumsize > 0) { 567 for (i = 1; i <= sblock.fs_contigsumsize; i++) 568 cg_clustersum(cgp)[i] = 0; 569 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 570 } 571 blksfree = cg_blksfree(cgp); 572 for (cgbno = 0; cgbno < maxbno; cgbno++) { 573 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 574 continue; 575 if (ffs_isblock(&sblock, blksfree, cgbno)) { 576 ffs_clusteracct(&sblock, cgp, cgbno, 1); 577 cgp->cg_cs.cs_nbfree++; 578 continue; 579 } 580 fragno = blkstofrags(&sblock, cgbno); 581 blk = blkmap(&sblock, blksfree, fragno); 582 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 583 for (i = 0; i < sblock.fs_frag; i++) 584 if (isset(blksfree, fragno + i)) 585 cgp->cg_cs.cs_nffree++; 586 } 587 /* 588 * Update the superblock cg summary from our now correct values 589 * before writing the block. 590 */ 591 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 592 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 593 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 594 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 595 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 596 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 597 } 598 599 void 600 rwerror(const char *mesg, ufs2_daddr_t blk) 601 { 602 603 if (bkgrdcheck) 604 exit(EEXIT); 605 if (preen == 0) 606 printf("\n"); 607 pfatal("CANNOT %s: %ld", mesg, (long)blk); 608 if (reply("CONTINUE") == 0) 609 exit(EEXIT); 610 } 611 612 void 613 ckfini(int markclean) 614 { 615 struct bufarea *bp, *nbp; 616 int ofsmodified, cnt, cg; 617 618 if (bkgrdflag) { 619 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 620 cmd.value = FS_UNCLEAN; 621 cmd.size = markclean ? -1 : 1; 622 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 623 &cmd, sizeof cmd) == -1) 624 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 625 if (!preen) { 626 printf("\n***** FILE SYSTEM MARKED %s *****\n", 627 markclean ? "CLEAN" : "DIRTY"); 628 if (!markclean) 629 rerun = 1; 630 } 631 } else if (!preen && !markclean) { 632 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 633 rerun = 1; 634 } 635 bkgrdflag = 0; 636 } 637 if (debug && cachelookups > 0) 638 printf("cache with %d buffers missed %d of %d (%d%%)\n", 639 numbufs, cachereads, cachelookups, 640 (int)(cachereads * 100 / cachelookups)); 641 if (fswritefd < 0) { 642 (void)close(fsreadfd); 643 return; 644 } 645 646 /* 647 * To remain idempotent with partial truncations the buffers 648 * must be flushed in this order: 649 * 1) cylinder groups (bitmaps) 650 * 2) indirect, directory, external attribute, and data blocks 651 * 3) inode blocks 652 * 4) superblock 653 * This ordering preserves access to the modified pointers 654 * until they are freed. 655 */ 656 /* Step 1: cylinder groups */ 657 if (debug) 658 printf("Flush Cylinder groups\n"); 659 if (cgbufs != NULL) { 660 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 661 if (cgbufs[cnt].b_un.b_cg == NULL) 662 continue; 663 flush(fswritefd, &cgbufs[cnt]); 664 free(cgbufs[cnt].b_un.b_cg); 665 } 666 free(cgbufs); 667 cgbufs = NULL; 668 } 669 flush(fswritefd, &cgblk); 670 free(cgblk.b_un.b_buf); 671 cgblk.b_un.b_buf = NULL; 672 cnt = 0; 673 /* Step 2: indirect, directory, external attribute, and data blocks */ 674 if (debug) 675 printf("Flush indirect, directory, external attribute, " 676 "and data blocks\n"); 677 if (pdirbp != NULL) { 678 brelse(pdirbp); 679 pdirbp = NULL; 680 } 681 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 682 switch (bp->b_type) { 683 /* These should not be in the buffer cache list */ 684 case BT_UNKNOWN: 685 case BT_SUPERBLK: 686 case BT_CYLGRP: 687 default: 688 prtbuf(bp,"ckfini: improper buffer type on cache list"); 689 continue; 690 /* These are the ones to flush in this step */ 691 case BT_LEVEL1: 692 case BT_LEVEL2: 693 case BT_LEVEL3: 694 case BT_EXTATTR: 695 case BT_DIRDATA: 696 case BT_DATA: 697 break; 698 /* These are the ones to flush in the next step */ 699 case BT_INODES: 700 continue; 701 } 702 if (debug && bp->b_refcnt != 0) 703 prtbuf(bp, "ckfini: clearing in-use buffer"); 704 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 705 LIST_REMOVE(bp, b_hash); 706 cnt++; 707 flush(fswritefd, bp); 708 free(bp->b_un.b_buf); 709 free((char *)bp); 710 } 711 /* Step 3: inode blocks */ 712 if (debug) 713 printf("Flush inode blocks\n"); 714 if (icachebp != NULL) { 715 brelse(icachebp); 716 icachebp = NULL; 717 } 718 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 719 if (debug && bp->b_refcnt != 0) 720 prtbuf(bp, "ckfini: clearing in-use buffer"); 721 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 722 LIST_REMOVE(bp, b_hash); 723 cnt++; 724 flush(fswritefd, bp); 725 free(bp->b_un.b_buf); 726 free((char *)bp); 727 } 728 if (numbufs != cnt) 729 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 730 /* Step 4: superblock */ 731 if (debug) 732 printf("Flush the superblock\n"); 733 flush(fswritefd, &sblk); 734 if (havesb && cursnapshot == 0 && 735 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 736 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 737 /* Change write destination to standard superblock */ 738 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 739 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 740 sbdirty(); 741 flush(fswritefd, &sblk); 742 } else { 743 markclean = 0; 744 } 745 } 746 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 747 if ((sblock.fs_clean = markclean) != 0) { 748 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 749 sblock.fs_pendingblocks = 0; 750 sblock.fs_pendinginodes = 0; 751 } 752 sbdirty(); 753 ofsmodified = fsmodified; 754 flush(fswritefd, &sblk); 755 fsmodified = ofsmodified; 756 if (!preen) { 757 printf("\n***** FILE SYSTEM MARKED %s *****\n", 758 markclean ? "CLEAN" : "DIRTY"); 759 if (!markclean) 760 rerun = 1; 761 } 762 } else if (!preen) { 763 if (markclean) { 764 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 765 } else { 766 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 767 rerun = 1; 768 } 769 } 770 /* 771 * Free allocated tracking structures. 772 */ 773 if (blockmap != NULL) 774 free(blockmap); 775 blockmap = NULL; 776 if (inostathead != NULL) { 777 for (cg = 0; cg < sblock.fs_ncg; cg++) 778 if (inostathead[cg].il_stat != NULL) 779 free((char *)inostathead[cg].il_stat); 780 free(inostathead); 781 } 782 inostathead = NULL; 783 inocleanup(); 784 finalIOstats(); 785 (void)close(fsreadfd); 786 (void)close(fswritefd); 787 } 788 789 /* 790 * Print out I/O statistics. 791 */ 792 void 793 IOstats(char *what) 794 { 795 int i; 796 797 if (debug == 0) 798 return; 799 if (diskreads == 0) { 800 printf("%s: no I/O\n\n", what); 801 return; 802 } 803 if (startpass.tv_sec == 0) 804 startpass = startprog; 805 printf("%s: I/O statistics\n", what); 806 printIOstats(); 807 totaldiskreads += diskreads; 808 diskreads = 0; 809 for (i = 0; i < BT_NUMBUFTYPES; i++) { 810 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 811 totalreadcnt[i] += readcnt[i]; 812 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 813 readcnt[i] = 0; 814 } 815 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 816 } 817 818 void 819 finalIOstats(void) 820 { 821 int i; 822 823 if (debug == 0) 824 return; 825 printf("Final I/O statistics\n"); 826 totaldiskreads += diskreads; 827 diskreads = totaldiskreads; 828 startpass = startprog; 829 for (i = 0; i < BT_NUMBUFTYPES; i++) { 830 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 831 totalreadcnt[i] += readcnt[i]; 832 readtime[i] = totalreadtime[i]; 833 readcnt[i] = totalreadcnt[i]; 834 } 835 printIOstats(); 836 } 837 838 static void printIOstats(void) 839 { 840 long long msec, totalmsec; 841 int i; 842 843 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 844 timespecsub(&finishpass, &startpass, &finishpass); 845 printf("Running time: %jd.%03ld sec\n", 846 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 847 printf("buffer reads by type:\n"); 848 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 849 totalmsec += readtime[i].tv_sec * 1000 + 850 readtime[i].tv_nsec / 1000000; 851 if (totalmsec == 0) 852 totalmsec = 1; 853 for (i = 0; i < BT_NUMBUFTYPES; i++) { 854 if (readcnt[i] == 0) 855 continue; 856 msec = 857 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 858 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 859 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 860 (readcnt[i] * 1000 / diskreads) % 10, 861 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 862 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 863 } 864 printf("\n"); 865 } 866 867 int 868 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 869 { 870 char *cp; 871 int i, errs; 872 off_t offset; 873 874 offset = blk; 875 offset *= dev_bsize; 876 if (bkgrdflag) 877 slowio_start(); 878 totalreads++; 879 diskreads++; 880 if (pread(fd, buf, (int)size, offset) == size) { 881 if (bkgrdflag) 882 slowio_end(); 883 return (0); 884 } 885 886 /* 887 * This is handled specially here instead of in rwerror because 888 * rwerror is used for all sorts of errors, not just true read/write 889 * errors. It should be refactored and fixed. 890 */ 891 if (surrender) { 892 pfatal("CANNOT READ_BLK: %ld", (long)blk); 893 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 894 } else 895 rwerror("READ BLK", blk); 896 897 errs = 0; 898 memset(buf, 0, (size_t)size); 899 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 900 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 901 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 902 if (secsize != dev_bsize && dev_bsize != 1) 903 printf(" %jd (%jd),", 904 (intmax_t)(blk * dev_bsize + i) / secsize, 905 (intmax_t)blk + i / dev_bsize); 906 else 907 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 908 errs++; 909 } 910 } 911 printf("\n"); 912 if (errs) 913 resolved = 0; 914 return (errs); 915 } 916 917 void 918 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 919 { 920 int i; 921 char *cp; 922 off_t offset; 923 924 if (fd < 0) 925 return; 926 offset = blk; 927 offset *= dev_bsize; 928 if (pwrite(fd, buf, size, offset) == size) { 929 fsmodified = 1; 930 return; 931 } 932 resolved = 0; 933 rwerror("WRITE BLK", blk); 934 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 935 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 936 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 937 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 938 printf("\n"); 939 return; 940 } 941 942 void 943 blerase(int fd, ufs2_daddr_t blk, long size) 944 { 945 off_t ioarg[2]; 946 947 if (fd < 0) 948 return; 949 ioarg[0] = blk * dev_bsize; 950 ioarg[1] = size; 951 ioctl(fd, DIOCGDELETE, ioarg); 952 /* we don't really care if we succeed or not */ 953 return; 954 } 955 956 /* 957 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 958 * definition a multiple of dev_bsize. 959 */ 960 void 961 blzero(int fd, ufs2_daddr_t blk, long size) 962 { 963 static char *zero; 964 off_t offset, len; 965 966 if (fd < 0) 967 return; 968 if (zero == NULL) { 969 zero = calloc(ZEROBUFSIZE, 1); 970 if (zero == NULL) 971 errx(EEXIT, "cannot allocate buffer pool"); 972 } 973 offset = blk * dev_bsize; 974 if (lseek(fd, offset, 0) < 0) 975 rwerror("SEEK BLK", blk); 976 while (size > 0) { 977 len = MIN(ZEROBUFSIZE, size); 978 if (write(fd, zero, len) != len) 979 rwerror("WRITE BLK", blk); 980 blk += len / dev_bsize; 981 size -= len; 982 } 983 } 984 985 /* 986 * Verify cylinder group's magic number and other parameters. If the 987 * test fails, offer an option to rebuild the whole cylinder group. 988 * 989 * Return 1 if the cylinder group is good or return 0 if it is bad. 990 */ 991 #undef CHK 992 #define CHK(lhs, op, rhs, fmt) \ 993 if (lhs op rhs) { \ 994 pwarn("UFS%d cylinder group %d failed: " \ 995 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 996 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 997 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 998 error = 1; \ 999 } 1000 int 1001 check_cgmagic(int cg, struct bufarea *cgbp) 1002 { 1003 struct cg *cgp = cgbp->b_un.b_cg; 1004 uint32_t cghash, calchash; 1005 static int prevfailcg = -1; 1006 long start; 1007 int error; 1008 1009 /* 1010 * Extended cylinder group checks. 1011 */ 1012 calchash = cgp->cg_ckhash; 1013 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1014 (ckhashadd & CK_CYLGRP) == 0) { 1015 cghash = cgp->cg_ckhash; 1016 cgp->cg_ckhash = 0; 1017 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1018 cgp->cg_ckhash = cghash; 1019 } 1020 error = 0; 1021 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1022 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1023 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1024 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1025 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1026 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1027 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1028 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1029 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1030 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1031 } 1032 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1033 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1034 } else { 1035 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1036 "%jd"); 1037 } 1038 start = sizeof(*cgp); 1039 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1040 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1041 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1042 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1043 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1044 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1045 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1046 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1047 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1048 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1049 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1050 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1051 } 1052 CHK(cgp->cg_freeoff, !=, 1053 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1054 if (sblock.fs_contigsumsize == 0) { 1055 CHK(cgp->cg_nextfreeoff, !=, 1056 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1057 } else { 1058 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1059 "%jd"); 1060 CHK(cgp->cg_clustersumoff, !=, 1061 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1062 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1063 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1064 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1065 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1066 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1067 "%jd"); 1068 } 1069 if (error == 0) 1070 return (1); 1071 if (prevfailcg == cg) 1072 return (0); 1073 prevfailcg = cg; 1074 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1075 printf("\n"); 1076 return (0); 1077 } 1078 1079 void 1080 rebuild_cg(int cg, struct bufarea *cgbp) 1081 { 1082 struct cg *cgp = cgbp->b_un.b_cg; 1083 long start; 1084 1085 /* 1086 * Zero out the cylinder group and then initialize critical fields. 1087 * Bit maps and summaries will be recalculated by later passes. 1088 */ 1089 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1090 cgp->cg_magic = CG_MAGIC; 1091 cgp->cg_cgx = cg; 1092 cgp->cg_niblk = sblock.fs_ipg; 1093 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1094 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1095 cgp->cg_ndblk = sblock.fs_fpg; 1096 else 1097 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1098 start = sizeof(*cgp); 1099 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1100 cgp->cg_iusedoff = start; 1101 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1102 cgp->cg_niblk = 0; 1103 cgp->cg_initediblk = 0; 1104 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1105 cgp->cg_old_niblk = sblock.fs_ipg; 1106 cgp->cg_old_btotoff = start; 1107 cgp->cg_old_boff = cgp->cg_old_btotoff + 1108 sblock.fs_old_cpg * sizeof(int32_t); 1109 cgp->cg_iusedoff = cgp->cg_old_boff + 1110 sblock.fs_old_cpg * sizeof(u_int16_t); 1111 } 1112 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1113 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1114 if (sblock.fs_contigsumsize > 0) { 1115 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1116 cgp->cg_clustersumoff = 1117 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1118 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1119 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1120 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1121 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1122 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1123 } 1124 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1125 cgdirty(cgbp); 1126 } 1127 1128 /* 1129 * allocate a data block with the specified number of fragments 1130 */ 1131 ufs2_daddr_t 1132 allocblk(long startcg, long frags, 1133 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1134 { 1135 ufs2_daddr_t blkno, newblk; 1136 1137 if (sujrecovery && checkblkavail == std_checkblkavail) { 1138 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1139 return (0); 1140 } 1141 if (frags <= 0 || frags > sblock.fs_frag) 1142 return (0); 1143 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1144 blkno < maxfsblock - sblock.fs_frag; 1145 blkno += sblock.fs_frag) { 1146 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1147 continue; 1148 if (newblk > 0) 1149 return (newblk); 1150 if (newblk < 0) 1151 blkno = -newblk; 1152 } 1153 for (blkno = MAX(cgdata(&sblock, 0), 0); 1154 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1155 blkno += sblock.fs_frag) { 1156 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1157 continue; 1158 if (newblk > 0) 1159 return (newblk); 1160 if (newblk < 0) 1161 blkno = -newblk; 1162 } 1163 return (0); 1164 } 1165 1166 ufs2_daddr_t 1167 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1168 { 1169 struct bufarea *cgbp; 1170 struct cg *cgp; 1171 ufs2_daddr_t j, k, baseblk; 1172 long cg; 1173 1174 if ((u_int64_t)blkno > sblock.fs_size) 1175 return (0); 1176 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1177 if (testbmap(blkno + j)) 1178 continue; 1179 for (k = 1; k < frags; k++) 1180 if (testbmap(blkno + j + k)) 1181 break; 1182 if (k < frags) { 1183 j += k; 1184 continue; 1185 } 1186 cg = dtog(&sblock, blkno + j); 1187 cgbp = cglookup(cg); 1188 cgp = cgbp->b_un.b_cg; 1189 if (!check_cgmagic(cg, cgbp)) 1190 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1191 baseblk = dtogd(&sblock, blkno + j); 1192 for (k = 0; k < frags; k++) { 1193 setbmap(blkno + j + k); 1194 clrbit(cg_blksfree(cgp), baseblk + k); 1195 } 1196 n_blks += frags; 1197 if (frags == sblock.fs_frag) 1198 cgp->cg_cs.cs_nbfree--; 1199 else 1200 cgp->cg_cs.cs_nffree -= frags; 1201 cgdirty(cgbp); 1202 return (blkno + j); 1203 } 1204 return (0); 1205 } 1206 1207 /* 1208 * Check whether a file size is within the limits for the filesystem. 1209 * Return 1 when valid and 0 when too big. 1210 * 1211 * This should match the file size limit in ffs_mountfs(). 1212 */ 1213 int 1214 chkfilesize(mode_t mode, u_int64_t filesize) 1215 { 1216 u_int64_t kernmaxfilesize; 1217 1218 if (sblock.fs_magic == FS_UFS1_MAGIC) 1219 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1220 else 1221 kernmaxfilesize = sblock.fs_maxfilesize; 1222 if (filesize > kernmaxfilesize || 1223 filesize > sblock.fs_maxfilesize || 1224 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1225 if (debug) 1226 printf("bad file size %ju:", (uintmax_t)filesize); 1227 return (0); 1228 } 1229 return (1); 1230 } 1231 1232 /* 1233 * Slow down IO so as to leave some disk bandwidth for other processes 1234 */ 1235 void 1236 slowio_start() 1237 { 1238 1239 /* Delay one in every 8 operations */ 1240 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1241 if (slowio_pollcnt == 0) { 1242 gettimeofday(&slowio_starttime, NULL); 1243 } 1244 } 1245 1246 void 1247 slowio_end() 1248 { 1249 struct timeval tv; 1250 int delay_usec; 1251 1252 if (slowio_pollcnt != 0) 1253 return; 1254 1255 /* Update the slowdown interval. */ 1256 gettimeofday(&tv, NULL); 1257 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1258 (tv.tv_usec - slowio_starttime.tv_usec); 1259 if (delay_usec < 64) 1260 delay_usec = 64; 1261 if (delay_usec > 2500000) 1262 delay_usec = 2500000; 1263 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1264 /* delay by 8 times the average IO delay */ 1265 if (slowio_delay_usec > 64) 1266 usleep(slowio_delay_usec * 8); 1267 } 1268 1269 /* 1270 * Find a pathname 1271 */ 1272 void 1273 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1274 { 1275 int len; 1276 char *cp; 1277 struct inode ip; 1278 struct inodesc idesc; 1279 static int busy = 0; 1280 1281 if (curdir == ino && ino == UFS_ROOTINO) { 1282 (void)strcpy(namebuf, "/"); 1283 return; 1284 } 1285 if (busy || !INO_IS_DVALID(curdir)) { 1286 (void)strcpy(namebuf, "?"); 1287 return; 1288 } 1289 busy = 1; 1290 memset(&idesc, 0, sizeof(struct inodesc)); 1291 idesc.id_type = DATA; 1292 idesc.id_fix = IGNORE; 1293 cp = &namebuf[MAXPATHLEN - 1]; 1294 *cp = '\0'; 1295 if (curdir != ino) { 1296 idesc.id_parent = curdir; 1297 goto namelookup; 1298 } 1299 while (ino != UFS_ROOTINO) { 1300 idesc.id_number = ino; 1301 idesc.id_func = findino; 1302 idesc.id_name = strdup(".."); 1303 ginode(ino, &ip); 1304 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1305 irelse(&ip); 1306 free(idesc.id_name); 1307 break; 1308 } 1309 irelse(&ip); 1310 free(idesc.id_name); 1311 namelookup: 1312 idesc.id_number = idesc.id_parent; 1313 idesc.id_parent = ino; 1314 idesc.id_func = findname; 1315 idesc.id_name = namebuf; 1316 ginode(idesc.id_number, &ip); 1317 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1318 irelse(&ip); 1319 break; 1320 } 1321 irelse(&ip); 1322 len = strlen(namebuf); 1323 cp -= len; 1324 memmove(cp, namebuf, (size_t)len); 1325 *--cp = '/'; 1326 if (cp < &namebuf[UFS_MAXNAMLEN]) 1327 break; 1328 ino = idesc.id_number; 1329 } 1330 busy = 0; 1331 if (ino != UFS_ROOTINO) 1332 *--cp = '?'; 1333 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1334 } 1335 1336 void 1337 catch(int sig __unused) 1338 { 1339 1340 ckfini(0); 1341 exit(12); 1342 } 1343 1344 /* 1345 * When preening, allow a single quit to signal 1346 * a special exit after file system checks complete 1347 * so that reboot sequence may be interrupted. 1348 */ 1349 void 1350 catchquit(int sig __unused) 1351 { 1352 printf("returning to single-user after file system check\n"); 1353 returntosingle = 1; 1354 (void)signal(SIGQUIT, SIG_DFL); 1355 } 1356 1357 /* 1358 * determine whether an inode should be fixed. 1359 */ 1360 int 1361 dofix(struct inodesc *idesc, const char *msg) 1362 { 1363 1364 switch (idesc->id_fix) { 1365 1366 case DONTKNOW: 1367 if (idesc->id_type == DATA) 1368 direrror(idesc->id_number, msg); 1369 else 1370 pwarn("%s", msg); 1371 if (preen) { 1372 printf(" (SALVAGED)\n"); 1373 idesc->id_fix = FIX; 1374 return (ALTERED); 1375 } 1376 if (reply("SALVAGE") == 0) { 1377 idesc->id_fix = NOFIX; 1378 return (0); 1379 } 1380 idesc->id_fix = FIX; 1381 return (ALTERED); 1382 1383 case FIX: 1384 return (ALTERED); 1385 1386 case NOFIX: 1387 case IGNORE: 1388 return (0); 1389 1390 default: 1391 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1392 } 1393 /* NOTREACHED */ 1394 return (0); 1395 } 1396 1397 #include <stdarg.h> 1398 1399 /* 1400 * Print details about a buffer. 1401 */ 1402 void 1403 prtbuf(struct bufarea *bp, const char *fmt, ...) 1404 { 1405 va_list ap; 1406 va_start(ap, fmt); 1407 if (preen) 1408 (void)fprintf(stdout, "%s: ", cdevname); 1409 (void)vfprintf(stdout, fmt, ap); 1410 va_end(ap); 1411 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1412 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1413 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1414 (intmax_t) bp->b_index); 1415 } 1416 1417 /* 1418 * An unexpected inconsistency occurred. 1419 * Die if preening or file system is running with soft dependency protocol, 1420 * otherwise just print message and continue. 1421 */ 1422 void 1423 pfatal(const char *fmt, ...) 1424 { 1425 va_list ap; 1426 va_start(ap, fmt); 1427 if (!preen) { 1428 (void)vfprintf(stdout, fmt, ap); 1429 va_end(ap); 1430 if (usedsoftdep) 1431 (void)fprintf(stdout, 1432 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1433 /* 1434 * Force foreground fsck to clean up inconsistency. 1435 */ 1436 if (bkgrdflag) { 1437 cmd.value = FS_NEEDSFSCK; 1438 cmd.size = 1; 1439 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1440 &cmd, sizeof cmd) == -1) 1441 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1442 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1443 ckfini(0); 1444 exit(EEXIT); 1445 } 1446 return; 1447 } 1448 if (cdevname == NULL) 1449 cdevname = strdup("fsck"); 1450 (void)fprintf(stdout, "%s: ", cdevname); 1451 (void)vfprintf(stdout, fmt, ap); 1452 (void)fprintf(stdout, 1453 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1454 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1455 /* 1456 * Force foreground fsck to clean up inconsistency. 1457 */ 1458 if (bkgrdflag) { 1459 cmd.value = FS_NEEDSFSCK; 1460 cmd.size = 1; 1461 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1462 &cmd, sizeof cmd) == -1) 1463 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1464 } 1465 ckfini(0); 1466 exit(EEXIT); 1467 } 1468 1469 /* 1470 * Pwarn just prints a message when not preening or running soft dependency 1471 * protocol, or a warning (preceded by filename) when preening. 1472 */ 1473 void 1474 pwarn(const char *fmt, ...) 1475 { 1476 va_list ap; 1477 va_start(ap, fmt); 1478 if (preen) 1479 (void)fprintf(stdout, "%s: ", cdevname); 1480 (void)vfprintf(stdout, fmt, ap); 1481 va_end(ap); 1482 } 1483 1484 /* 1485 * Stub for routines from kernel. 1486 */ 1487 void 1488 panic(const char *fmt, ...) 1489 { 1490 va_list ap; 1491 va_start(ap, fmt); 1492 pfatal("INTERNAL INCONSISTENCY:"); 1493 (void)vfprintf(stdout, fmt, ap); 1494 va_end(ap); 1495 exit(EEXIT); 1496 } 1497