1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #if 0 33 #ifndef lint 34 static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; 35 #endif /* not lint */ 36 #endif 37 #include <sys/cdefs.h> 38 #include <sys/param.h> 39 #include <sys/time.h> 40 #include <sys/types.h> 41 #include <sys/sysctl.h> 42 #include <sys/disk.h> 43 #include <sys/disklabel.h> 44 #include <sys/ioctl.h> 45 #include <sys/stat.h> 46 47 #include <ufs/ufs/dinode.h> 48 #include <ufs/ufs/dir.h> 49 #include <ufs/ffs/fs.h> 50 51 #include <err.h> 52 #include <errno.h> 53 #include <string.h> 54 #include <ctype.h> 55 #include <fstab.h> 56 #include <stdint.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <time.h> 60 #include <unistd.h> 61 62 #include "fsck.h" 63 64 int sujrecovery = 0; 65 66 static struct bufarea *allocbuf(const char *); 67 static void cg_write(struct bufarea *); 68 static void slowio_start(void); 69 static void slowio_end(void); 70 static void printIOstats(void); 71 72 static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ 73 static struct timespec startpass, finishpass; 74 struct timeval slowio_starttime; 75 int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ 76 int slowio_pollcnt; 77 static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ 78 static struct bufarea failedbuf; /* returned by failed getdatablk() */ 79 static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ 80 static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ 81 static struct bufhash freebufs; /* unused buffers */ 82 static int numbufs; /* size of buffer cache */ 83 static int cachelookups; /* number of cache lookups */ 84 static int cachereads; /* number of cache reads */ 85 static int flushtries; /* number of tries to reclaim memory */ 86 87 char *buftype[BT_NUMBUFTYPES] = BT_NAMES; 88 89 void 90 fsutilinit(void) 91 { 92 diskreads = totaldiskreads = totalreads = 0; 93 bzero(&startpass, sizeof(struct timespec)); 94 bzero(&finishpass, sizeof(struct timespec)); 95 bzero(&slowio_starttime, sizeof(struct timeval)); 96 slowio_delay_usec = 10000; 97 slowio_pollcnt = 0; 98 flushtries = 0; 99 } 100 101 int 102 ftypeok(union dinode *dp) 103 { 104 switch (DIP(dp, di_mode) & IFMT) { 105 106 case IFDIR: 107 case IFREG: 108 case IFBLK: 109 case IFCHR: 110 case IFLNK: 111 case IFSOCK: 112 case IFIFO: 113 return (1); 114 115 default: 116 if (debug) 117 printf("bad file type 0%o\n", DIP(dp, di_mode)); 118 return (0); 119 } 120 } 121 122 int 123 reply(const char *question) 124 { 125 int persevere; 126 char c; 127 128 if (preen) 129 pfatal("INTERNAL ERROR: GOT TO reply()"); 130 persevere = strcmp(question, "CONTINUE") == 0 || 131 strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; 132 printf("\n"); 133 if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { 134 printf("%s? no\n\n", question); 135 resolved = 0; 136 return (0); 137 } 138 if (yflag || (persevere && nflag)) { 139 printf("%s? yes\n\n", question); 140 return (1); 141 } 142 do { 143 printf("%s? [yn] ", question); 144 (void) fflush(stdout); 145 c = getc(stdin); 146 while (c != '\n' && getc(stdin) != '\n') { 147 if (feof(stdin)) { 148 resolved = 0; 149 return (0); 150 } 151 } 152 } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); 153 printf("\n"); 154 if (c == 'y' || c == 'Y') 155 return (1); 156 resolved = 0; 157 return (0); 158 } 159 160 /* 161 * Look up state information for an inode. 162 */ 163 struct inostat * 164 inoinfo(ino_t inum) 165 { 166 static struct inostat unallocated = { USTATE, 0, 0, 0 }; 167 struct inostatlist *ilp; 168 int iloff; 169 170 if (inum >= maxino) 171 errx(EEXIT, "inoinfo: inumber %ju out of range", 172 (uintmax_t)inum); 173 ilp = &inostathead[inum / sblock.fs_ipg]; 174 iloff = inum % sblock.fs_ipg; 175 if (iloff >= ilp->il_numalloced) 176 return (&unallocated); 177 return (&ilp->il_stat[iloff]); 178 } 179 180 /* 181 * Malloc buffers and set up cache. 182 */ 183 void 184 bufinit(void) 185 { 186 int i; 187 188 initbarea(&failedbuf, BT_UNKNOWN); 189 failedbuf.b_errs = -1; 190 failedbuf.b_un.b_buf = NULL; 191 if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL) 192 errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); 193 initbarea(&cgblk, BT_CYLGRP); 194 numbufs = cachelookups = cachereads = 0; 195 TAILQ_INIT(&bufqueuehd); 196 LIST_INIT(&freebufs); 197 for (i = 0; i < HASHSIZE; i++) 198 LIST_INIT(&bufhashhd[i]); 199 for (i = 0; i < BT_NUMBUFTYPES; i++) { 200 readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; 201 readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; 202 readcnt[i] = totalreadcnt[i] = 0; 203 } 204 } 205 206 static struct bufarea * 207 allocbuf(const char *failreason) 208 { 209 struct bufarea *bp; 210 char *bufp; 211 212 bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); 213 bufp = Balloc((unsigned int)sblock.fs_bsize); 214 if (bp == NULL || bufp == NULL) { 215 errx(EEXIT, "%s", failreason); 216 /* NOTREACHED */ 217 } 218 numbufs++; 219 bp->b_un.b_buf = bufp; 220 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 221 initbarea(bp, BT_UNKNOWN); 222 return (bp); 223 } 224 225 /* 226 * Manage cylinder group buffers. 227 * 228 * Use getblk() here rather than cgget() because the cylinder group 229 * may be corrupted but we want it anyway so we can fix it. 230 */ 231 static struct bufarea *cgbufs; /* header for cylinder group cache */ 232 static int flushtries; /* number of tries to reclaim memory */ 233 234 struct bufarea * 235 cglookup(int cg) 236 { 237 struct bufarea *cgbp; 238 struct cg *cgp; 239 240 if ((unsigned) cg >= sblock.fs_ncg) 241 errx(EEXIT, "cglookup: out of range cylinder group %d", cg); 242 if (cgbufs == NULL) { 243 cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea)); 244 if (cgbufs == NULL) 245 errx(EEXIT, "Cannot allocate cylinder group buffers"); 246 } 247 cgbp = &cgbufs[cg]; 248 if (cgbp->b_un.b_cg != NULL) 249 return (cgbp); 250 cgp = NULL; 251 if (flushtries == 0) 252 cgp = Balloc((unsigned int)sblock.fs_cgsize); 253 if (cgp == NULL) { 254 if (sujrecovery) 255 errx(EEXIT,"Ran out of memory during journal recovery"); 256 flush(fswritefd, &cgblk); 257 getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); 258 return (&cgblk); 259 } 260 cgbp->b_un.b_cg = cgp; 261 initbarea(cgbp, BT_CYLGRP); 262 getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); 263 return (cgbp); 264 } 265 266 /* 267 * Mark a cylinder group buffer as dirty. 268 * Update its check-hash if they are enabled. 269 */ 270 void 271 cgdirty(struct bufarea *cgbp) 272 { 273 struct cg *cg; 274 275 cg = cgbp->b_un.b_cg; 276 if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { 277 cg->cg_ckhash = 0; 278 cg->cg_ckhash = 279 calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); 280 } 281 dirty(cgbp); 282 } 283 284 /* 285 * Attempt to flush a cylinder group cache entry. 286 * Return whether the flush was successful. 287 */ 288 int 289 flushentry(void) 290 { 291 struct bufarea *cgbp; 292 293 if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) 294 return (0); 295 cgbp = &cgbufs[flushtries++]; 296 if (cgbp->b_un.b_cg == NULL) 297 return (0); 298 flush(fswritefd, cgbp); 299 free(cgbp->b_un.b_buf); 300 cgbp->b_un.b_buf = NULL; 301 return (1); 302 } 303 304 /* 305 * Manage a cache of filesystem disk blocks. 306 */ 307 struct bufarea * 308 getdatablk(ufs2_daddr_t blkno, long size, int type) 309 { 310 struct bufarea *bp; 311 struct bufhash *bhdp; 312 313 cachelookups++; 314 /* 315 * If out of range, return empty buffer with b_err == -1 316 * 317 * Skip check for inodes because chkrange() considers 318 * metadata areas invalid to write data. 319 */ 320 if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { 321 failedbuf.b_refcnt++; 322 return (&failedbuf); 323 } 324 bhdp = &bufhashhd[HASH(blkno)]; 325 LIST_FOREACH(bp, bhdp, b_hash) 326 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 327 if (debug && bp->b_size != size) { 328 prtbuf(bp, "getdatablk: size mismatch"); 329 pfatal("getdatablk: b_size %d != size %ld\n", 330 bp->b_size, size); 331 } 332 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 333 goto foundit; 334 } 335 /* 336 * Move long-term busy buffer back to the front of the LRU so we 337 * do not endless inspect them for recycling. 338 */ 339 bp = TAILQ_LAST(&bufqueuehd, bufqueue); 340 if (bp != NULL && bp->b_refcnt != 0) { 341 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 342 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 343 } 344 /* 345 * Allocate up to the minimum number of buffers before 346 * considering recycling any of them. 347 */ 348 if (size > sblock.fs_bsize) 349 errx(EEXIT, "Excessive buffer size %ld > %d\n", size, 350 sblock.fs_bsize); 351 if ((bp = LIST_FIRST(&freebufs)) != NULL) { 352 LIST_REMOVE(bp, b_hash); 353 } else if (numbufs < MINBUFS) { 354 bp = allocbuf("cannot create minimal buffer pool"); 355 } else if (sujrecovery) { 356 /* 357 * SUJ recovery does not want anything written until it 358 * has successfully completed (so it can fail back to 359 * full fsck). Thus, we can only recycle clean buffers. 360 */ 361 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 362 if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) 363 break; 364 if (bp == NULL) 365 bp = allocbuf("Ran out of memory during " 366 "journal recovery"); 367 else 368 LIST_REMOVE(bp, b_hash); 369 } else { 370 /* 371 * Recycle oldest non-busy buffer. 372 */ 373 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 374 if (bp->b_refcnt == 0) 375 break; 376 if (bp == NULL) 377 bp = allocbuf("Ran out of memory for buffers"); 378 else 379 LIST_REMOVE(bp, b_hash); 380 } 381 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 382 flush(fswritefd, bp); 383 bp->b_type = type; 384 LIST_INSERT_HEAD(bhdp, bp, b_hash); 385 getblk(bp, blkno, size); 386 cachereads++; 387 /* fall through */ 388 foundit: 389 TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); 390 if (debug && bp->b_type != type) { 391 printf("getdatablk: buffer type changed to %s", 392 BT_BUFTYPE(type)); 393 prtbuf(bp, ""); 394 } 395 if (bp->b_errs == 0) 396 bp->b_refcnt++; 397 return (bp); 398 } 399 400 void 401 getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) 402 { 403 ufs2_daddr_t dblk; 404 struct timespec start, finish; 405 406 dblk = fsbtodb(&sblock, blk); 407 if (bp->b_bno == dblk) { 408 totalreads++; 409 } else { 410 if (debug) { 411 readcnt[bp->b_type]++; 412 clock_gettime(CLOCK_REALTIME_PRECISE, &start); 413 } 414 bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); 415 if (debug) { 416 clock_gettime(CLOCK_REALTIME_PRECISE, &finish); 417 timespecsub(&finish, &start, &finish); 418 timespecadd(&readtime[bp->b_type], &finish, 419 &readtime[bp->b_type]); 420 } 421 bp->b_bno = dblk; 422 bp->b_size = size; 423 } 424 } 425 426 void 427 brelse(struct bufarea *bp) 428 { 429 430 if (bp->b_refcnt <= 0) 431 prtbuf(bp, "brelse: buffer with negative reference count"); 432 bp->b_refcnt--; 433 } 434 435 void 436 binval(struct bufarea *bp) 437 { 438 439 bp->b_flags &= ~B_DIRTY; 440 LIST_REMOVE(bp, b_hash); 441 LIST_INSERT_HEAD(&freebufs, bp, b_hash); 442 } 443 444 void 445 flush(int fd, struct bufarea *bp) 446 { 447 struct inode ip; 448 449 if ((bp->b_flags & B_DIRTY) == 0) 450 return; 451 bp->b_flags &= ~B_DIRTY; 452 if (fswritefd < 0) { 453 pfatal("WRITING IN READ_ONLY MODE.\n"); 454 return; 455 } 456 if (bp->b_errs != 0) 457 pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", 458 (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", 459 (long long)bp->b_bno); 460 bp->b_errs = 0; 461 /* 462 * Write using the appropriate function. 463 */ 464 switch (bp->b_type) { 465 case BT_SUPERBLK: 466 if (bp != &sblk) 467 pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", 468 bp, &sblk); 469 /* 470 * Superblocks are always pre-copied so we do not need 471 * to check them for copy-on-write. 472 */ 473 if (sbput(fd, bp->b_un.b_fs, 0) == 0) 474 fsmodified = 1; 475 break; 476 case BT_CYLGRP: 477 /* 478 * Cylinder groups are always pre-copied so we do not 479 * need to check them for copy-on-write. 480 */ 481 if (sujrecovery) 482 cg_write(bp); 483 if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) 484 fsmodified = 1; 485 break; 486 case BT_INODES: 487 if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { 488 struct ufs2_dinode *dp = bp->b_un.b_dinode2; 489 int i; 490 491 for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { 492 if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) 493 continue; 494 pwarn("flush: INODE CHECK-HASH FAILED"); 495 ip.i_bp = bp; 496 ip.i_dp = (union dinode *)dp; 497 ip.i_number = bp->b_index + (i / sizeof(*dp)); 498 prtinode(&ip); 499 if (preen || reply("FIX") != 0) { 500 if (preen) 501 printf(" (FIXED)\n"); 502 ffs_update_dinode_ckhash(&sblock, dp); 503 inodirty(&ip); 504 } 505 } 506 } 507 /* FALLTHROUGH */ 508 default: 509 copyonwrite(&sblock, bp, std_checkblkavail); 510 blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); 511 break; 512 } 513 } 514 515 /* 516 * If there are any snapshots, ensure that all the blocks that they 517 * care about have been copied, then release the snapshot inodes. 518 * These operations need to be done before we rebuild the cylinder 519 * groups so that any block allocations are properly recorded. 520 * Since all the cylinder group maps have already been copied in 521 * the snapshots, no further snapshot copies will need to be done. 522 */ 523 void 524 snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) 525 { 526 struct bufarea *bp; 527 int cnt; 528 529 if (snapcnt > 0) { 530 if (debug) 531 printf("Check for snapshot copies\n"); 532 TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) 533 if ((bp->b_flags & B_DIRTY) != 0) 534 copyonwrite(&sblock, bp, checkblkavail); 535 for (cnt = 0; cnt < snapcnt; cnt++) 536 irelse(&snaplist[cnt]); 537 snapcnt = 0; 538 } 539 } 540 541 /* 542 * Journaled soft updates does not maintain cylinder group summary 543 * information during cleanup, so this routine recalculates the summary 544 * information and updates the superblock summary in preparation for 545 * writing out the cylinder group. 546 */ 547 static void 548 cg_write(struct bufarea *bp) 549 { 550 ufs1_daddr_t fragno, cgbno, maxbno; 551 u_int8_t *blksfree; 552 struct csum *csp; 553 struct cg *cgp; 554 int blk; 555 int i; 556 557 /* 558 * Fix the frag and cluster summary. 559 */ 560 cgp = bp->b_un.b_cg; 561 cgp->cg_cs.cs_nbfree = 0; 562 cgp->cg_cs.cs_nffree = 0; 563 bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); 564 maxbno = fragstoblks(&sblock, sblock.fs_fpg); 565 if (sblock.fs_contigsumsize > 0) { 566 for (i = 1; i <= sblock.fs_contigsumsize; i++) 567 cg_clustersum(cgp)[i] = 0; 568 bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); 569 } 570 blksfree = cg_blksfree(cgp); 571 for (cgbno = 0; cgbno < maxbno; cgbno++) { 572 if (ffs_isfreeblock(&sblock, blksfree, cgbno)) 573 continue; 574 if (ffs_isblock(&sblock, blksfree, cgbno)) { 575 ffs_clusteracct(&sblock, cgp, cgbno, 1); 576 cgp->cg_cs.cs_nbfree++; 577 continue; 578 } 579 fragno = blkstofrags(&sblock, cgbno); 580 blk = blkmap(&sblock, blksfree, fragno); 581 ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); 582 for (i = 0; i < sblock.fs_frag; i++) 583 if (isset(blksfree, fragno + i)) 584 cgp->cg_cs.cs_nffree++; 585 } 586 /* 587 * Update the superblock cg summary from our now correct values 588 * before writing the block. 589 */ 590 csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); 591 sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; 592 sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; 593 sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; 594 sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; 595 sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; 596 } 597 598 void 599 rwerror(const char *mesg, ufs2_daddr_t blk) 600 { 601 602 if (bkgrdcheck) 603 exit(EEXIT); 604 if (preen == 0) 605 printf("\n"); 606 pfatal("CANNOT %s: %ld", mesg, (long)blk); 607 if (reply("CONTINUE") == 0) 608 exit(EEXIT); 609 } 610 611 void 612 ckfini(int markclean) 613 { 614 struct bufarea *bp, *nbp; 615 int ofsmodified, cnt, cg; 616 617 if (bkgrdflag) { 618 if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { 619 cmd.value = FS_UNCLEAN; 620 cmd.size = markclean ? -1 : 1; 621 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 622 &cmd, sizeof cmd) == -1) 623 pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); 624 if (!preen) { 625 printf("\n***** FILE SYSTEM MARKED %s *****\n", 626 markclean ? "CLEAN" : "DIRTY"); 627 if (!markclean) 628 rerun = 1; 629 } 630 } else if (!preen && !markclean) { 631 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 632 rerun = 1; 633 } 634 bkgrdflag = 0; 635 } 636 if (debug && cachelookups > 0) 637 printf("cache with %d buffers missed %d of %d (%d%%)\n", 638 numbufs, cachereads, cachelookups, 639 (int)(cachereads * 100 / cachelookups)); 640 if (fswritefd < 0) { 641 (void)close(fsreadfd); 642 return; 643 } 644 645 /* 646 * To remain idempotent with partial truncations the buffers 647 * must be flushed in this order: 648 * 1) cylinder groups (bitmaps) 649 * 2) indirect, directory, external attribute, and data blocks 650 * 3) inode blocks 651 * 4) superblock 652 * This ordering preserves access to the modified pointers 653 * until they are freed. 654 */ 655 /* Step 1: cylinder groups */ 656 if (debug) 657 printf("Flush Cylinder groups\n"); 658 if (cgbufs != NULL) { 659 for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { 660 if (cgbufs[cnt].b_un.b_cg == NULL) 661 continue; 662 flush(fswritefd, &cgbufs[cnt]); 663 free(cgbufs[cnt].b_un.b_cg); 664 } 665 free(cgbufs); 666 cgbufs = NULL; 667 } 668 flush(fswritefd, &cgblk); 669 free(cgblk.b_un.b_buf); 670 cgblk.b_un.b_buf = NULL; 671 cnt = 0; 672 /* Step 2: indirect, directory, external attribute, and data blocks */ 673 if (debug) 674 printf("Flush indirect, directory, external attribute, " 675 "and data blocks\n"); 676 if (pdirbp != NULL) { 677 brelse(pdirbp); 678 pdirbp = NULL; 679 } 680 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 681 switch (bp->b_type) { 682 /* These should not be in the buffer cache list */ 683 case BT_UNKNOWN: 684 case BT_SUPERBLK: 685 case BT_CYLGRP: 686 default: 687 prtbuf(bp,"ckfini: improper buffer type on cache list"); 688 continue; 689 /* These are the ones to flush in this step */ 690 case BT_LEVEL1: 691 case BT_LEVEL2: 692 case BT_LEVEL3: 693 case BT_EXTATTR: 694 case BT_DIRDATA: 695 case BT_DATA: 696 break; 697 /* These are the ones to flush in the next step */ 698 case BT_INODES: 699 continue; 700 } 701 if (debug && bp->b_refcnt != 0) 702 prtbuf(bp, "ckfini: clearing in-use buffer"); 703 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 704 LIST_REMOVE(bp, b_hash); 705 cnt++; 706 flush(fswritefd, bp); 707 free(bp->b_un.b_buf); 708 free((char *)bp); 709 } 710 /* Step 3: inode blocks */ 711 if (debug) 712 printf("Flush inode blocks\n"); 713 if (icachebp != NULL) { 714 brelse(icachebp); 715 icachebp = NULL; 716 } 717 TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { 718 if (debug && bp->b_refcnt != 0) 719 prtbuf(bp, "ckfini: clearing in-use buffer"); 720 TAILQ_REMOVE(&bufqueuehd, bp, b_list); 721 LIST_REMOVE(bp, b_hash); 722 cnt++; 723 flush(fswritefd, bp); 724 free(bp->b_un.b_buf); 725 free((char *)bp); 726 } 727 if (numbufs != cnt) 728 errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); 729 /* Step 4: superblock */ 730 if (debug) 731 printf("Flush the superblock\n"); 732 flush(fswritefd, &sblk); 733 if (havesb && cursnapshot == 0 && 734 sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { 735 if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { 736 /* Change write destination to standard superblock */ 737 sblock.fs_sblockactualloc = sblock.fs_sblockloc; 738 sblk.b_bno = sblock.fs_sblockloc / dev_bsize; 739 sbdirty(); 740 flush(fswritefd, &sblk); 741 } else { 742 markclean = 0; 743 } 744 } 745 if (cursnapshot == 0 && sblock.fs_clean != markclean) { 746 if ((sblock.fs_clean = markclean) != 0) { 747 sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); 748 sblock.fs_pendingblocks = 0; 749 sblock.fs_pendinginodes = 0; 750 } 751 sbdirty(); 752 ofsmodified = fsmodified; 753 flush(fswritefd, &sblk); 754 fsmodified = ofsmodified; 755 if (!preen) { 756 printf("\n***** FILE SYSTEM MARKED %s *****\n", 757 markclean ? "CLEAN" : "DIRTY"); 758 if (!markclean) 759 rerun = 1; 760 } 761 } else if (!preen) { 762 if (markclean) { 763 printf("\n***** FILE SYSTEM IS CLEAN *****\n"); 764 } else { 765 printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); 766 rerun = 1; 767 } 768 } 769 /* 770 * Free allocated tracking structures. 771 */ 772 if (blockmap != NULL) 773 free(blockmap); 774 blockmap = NULL; 775 if (inostathead != NULL) { 776 for (cg = 0; cg < sblock.fs_ncg; cg++) 777 if (inostathead[cg].il_stat != NULL) 778 free((char *)inostathead[cg].il_stat); 779 free(inostathead); 780 } 781 inostathead = NULL; 782 inocleanup(); 783 finalIOstats(); 784 (void)close(fsreadfd); 785 (void)close(fswritefd); 786 } 787 788 /* 789 * Print out I/O statistics. 790 */ 791 void 792 IOstats(char *what) 793 { 794 int i; 795 796 if (debug == 0) 797 return; 798 if (diskreads == 0) { 799 printf("%s: no I/O\n\n", what); 800 return; 801 } 802 if (startpass.tv_sec == 0) 803 startpass = startprog; 804 printf("%s: I/O statistics\n", what); 805 printIOstats(); 806 totaldiskreads += diskreads; 807 diskreads = 0; 808 for (i = 0; i < BT_NUMBUFTYPES; i++) { 809 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 810 totalreadcnt[i] += readcnt[i]; 811 readtime[i].tv_sec = readtime[i].tv_nsec = 0; 812 readcnt[i] = 0; 813 } 814 clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); 815 } 816 817 void 818 finalIOstats(void) 819 { 820 int i; 821 822 if (debug == 0) 823 return; 824 printf("Final I/O statistics\n"); 825 totaldiskreads += diskreads; 826 diskreads = totaldiskreads; 827 startpass = startprog; 828 for (i = 0; i < BT_NUMBUFTYPES; i++) { 829 timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); 830 totalreadcnt[i] += readcnt[i]; 831 readtime[i] = totalreadtime[i]; 832 readcnt[i] = totalreadcnt[i]; 833 } 834 printIOstats(); 835 } 836 837 static void printIOstats(void) 838 { 839 long long msec, totalmsec; 840 int i; 841 842 clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); 843 timespecsub(&finishpass, &startpass, &finishpass); 844 printf("Running time: %jd.%03ld sec\n", 845 (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); 846 printf("buffer reads by type:\n"); 847 for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) 848 totalmsec += readtime[i].tv_sec * 1000 + 849 readtime[i].tv_nsec / 1000000; 850 if (totalmsec == 0) 851 totalmsec = 1; 852 for (i = 0; i < BT_NUMBUFTYPES; i++) { 853 if (readcnt[i] == 0) 854 continue; 855 msec = 856 readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; 857 printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", 858 buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, 859 (readcnt[i] * 1000 / diskreads) % 10, 860 (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, 861 msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); 862 } 863 printf("\n"); 864 } 865 866 int 867 blread(int fd, char *buf, ufs2_daddr_t blk, long size) 868 { 869 char *cp; 870 int i, errs; 871 off_t offset; 872 873 offset = blk; 874 offset *= dev_bsize; 875 if (bkgrdflag) 876 slowio_start(); 877 totalreads++; 878 diskreads++; 879 if (pread(fd, buf, (int)size, offset) == size) { 880 if (bkgrdflag) 881 slowio_end(); 882 return (0); 883 } 884 885 /* 886 * This is handled specially here instead of in rwerror because 887 * rwerror is used for all sorts of errors, not just true read/write 888 * errors. It should be refactored and fixed. 889 */ 890 if (surrender) { 891 pfatal("CANNOT READ_BLK: %ld", (long)blk); 892 errx(EEXIT, "ABORTING DUE TO READ ERRORS"); 893 } else 894 rwerror("READ BLK", blk); 895 896 errs = 0; 897 memset(buf, 0, (size_t)size); 898 printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); 899 for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { 900 if (pread(fd, cp, (int)secsize, offset + i) != secsize) { 901 if (secsize != dev_bsize && dev_bsize != 1) 902 printf(" %jd (%jd),", 903 (intmax_t)(blk * dev_bsize + i) / secsize, 904 (intmax_t)blk + i / dev_bsize); 905 else 906 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 907 errs++; 908 } 909 } 910 printf("\n"); 911 if (errs) 912 resolved = 0; 913 return (errs); 914 } 915 916 void 917 blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) 918 { 919 int i; 920 char *cp; 921 off_t offset; 922 923 if (fd < 0) 924 return; 925 offset = blk; 926 offset *= dev_bsize; 927 if (pwrite(fd, buf, size, offset) == size) { 928 fsmodified = 1; 929 return; 930 } 931 resolved = 0; 932 rwerror("WRITE BLK", blk); 933 printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 934 for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) 935 if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) 936 printf(" %jd,", (intmax_t)blk + i / dev_bsize); 937 printf("\n"); 938 return; 939 } 940 941 void 942 blerase(int fd, ufs2_daddr_t blk, long size) 943 { 944 off_t ioarg[2]; 945 946 if (fd < 0) 947 return; 948 ioarg[0] = blk * dev_bsize; 949 ioarg[1] = size; 950 ioctl(fd, DIOCGDELETE, ioarg); 951 /* we don't really care if we succeed or not */ 952 return; 953 } 954 955 /* 956 * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by 957 * definition a multiple of dev_bsize. 958 */ 959 void 960 blzero(int fd, ufs2_daddr_t blk, long size) 961 { 962 static char *zero; 963 off_t offset, len; 964 965 if (fd < 0) 966 return; 967 if (zero == NULL) { 968 zero = Balloc(ZEROBUFSIZE); 969 if (zero == NULL) 970 errx(EEXIT, "cannot allocate buffer pool"); 971 } 972 offset = blk * dev_bsize; 973 if (lseek(fd, offset, 0) < 0) 974 rwerror("SEEK BLK", blk); 975 while (size > 0) { 976 len = MIN(ZEROBUFSIZE, size); 977 if (write(fd, zero, len) != len) 978 rwerror("WRITE BLK", blk); 979 blk += len / dev_bsize; 980 size -= len; 981 } 982 } 983 984 /* 985 * Verify cylinder group's magic number and other parameters. If the 986 * test fails, offer an option to rebuild the whole cylinder group. 987 * 988 * Return 1 if the cylinder group is good or return 0 if it is bad. 989 */ 990 #undef CHK 991 #define CHK(lhs, op, rhs, fmt) \ 992 if (lhs op rhs) { \ 993 pwarn("UFS%d cylinder group %d failed: " \ 994 "%s (" #fmt ") %s %s (" #fmt ")\n", \ 995 sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ 996 #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ 997 error = 1; \ 998 } 999 int 1000 check_cgmagic(int cg, struct bufarea *cgbp) 1001 { 1002 struct cg *cgp = cgbp->b_un.b_cg; 1003 uint32_t cghash, calchash; 1004 static int prevfailcg = -1; 1005 long start; 1006 int error; 1007 1008 /* 1009 * Extended cylinder group checks. 1010 */ 1011 calchash = cgp->cg_ckhash; 1012 if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && 1013 (ckhashadd & CK_CYLGRP) == 0) { 1014 cghash = cgp->cg_ckhash; 1015 cgp->cg_ckhash = 0; 1016 calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1017 cgp->cg_ckhash = cghash; 1018 } 1019 error = 0; 1020 CHK(cgp->cg_ckhash, !=, calchash, "%jd"); 1021 CHK(cg_chkmagic(cgp), ==, 0, "%jd"); 1022 CHK(cgp->cg_cgx, !=, cg, "%jd"); 1023 CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); 1024 if (sblock.fs_magic == FS_UFS1_MAGIC) { 1025 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1026 CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); 1027 } else if (sblock.fs_magic == FS_UFS2_MAGIC) { 1028 CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); 1029 CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); 1030 } 1031 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { 1032 CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); 1033 } else { 1034 CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), 1035 "%jd"); 1036 } 1037 start = sizeof(*cgp); 1038 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1039 CHK(cgp->cg_iusedoff, !=, start, "%jd"); 1040 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1041 CHK(cgp->cg_niblk, !=, 0, "%jd"); 1042 CHK(cgp->cg_initediblk, !=, 0, "%jd"); 1043 CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); 1044 CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); 1045 CHK(cgp->cg_old_btotoff, !=, start, "%jd"); 1046 CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + 1047 sblock.fs_old_cpg * sizeof(int32_t), "%jd"); 1048 CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + 1049 sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); 1050 } 1051 CHK(cgp->cg_freeoff, !=, 1052 cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); 1053 if (sblock.fs_contigsumsize == 0) { 1054 CHK(cgp->cg_nextfreeoff, !=, 1055 cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); 1056 } else { 1057 CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, 1058 "%jd"); 1059 CHK(cgp->cg_clustersumoff, !=, 1060 roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), 1061 sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); 1062 CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + 1063 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); 1064 CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + 1065 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), 1066 "%jd"); 1067 } 1068 if (error == 0) 1069 return (1); 1070 if (prevfailcg == cg) 1071 return (0); 1072 prevfailcg = cg; 1073 pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); 1074 printf("\n"); 1075 return (0); 1076 } 1077 1078 void 1079 rebuild_cg(int cg, struct bufarea *cgbp) 1080 { 1081 struct cg *cgp = cgbp->b_un.b_cg; 1082 long start; 1083 1084 /* 1085 * Zero out the cylinder group and then initialize critical fields. 1086 * Bit maps and summaries will be recalculated by later passes. 1087 */ 1088 memset(cgp, 0, (size_t)sblock.fs_cgsize); 1089 cgp->cg_magic = CG_MAGIC; 1090 cgp->cg_cgx = cg; 1091 cgp->cg_niblk = sblock.fs_ipg; 1092 cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); 1093 if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) 1094 cgp->cg_ndblk = sblock.fs_fpg; 1095 else 1096 cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); 1097 start = sizeof(*cgp); 1098 if (sblock.fs_magic == FS_UFS2_MAGIC) { 1099 cgp->cg_iusedoff = start; 1100 } else if (sblock.fs_magic == FS_UFS1_MAGIC) { 1101 cgp->cg_niblk = 0; 1102 cgp->cg_initediblk = 0; 1103 cgp->cg_old_ncyl = sblock.fs_old_cpg; 1104 cgp->cg_old_niblk = sblock.fs_ipg; 1105 cgp->cg_old_btotoff = start; 1106 cgp->cg_old_boff = cgp->cg_old_btotoff + 1107 sblock.fs_old_cpg * sizeof(int32_t); 1108 cgp->cg_iusedoff = cgp->cg_old_boff + 1109 sblock.fs_old_cpg * sizeof(u_int16_t); 1110 } 1111 cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); 1112 cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); 1113 if (sblock.fs_contigsumsize > 0) { 1114 cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; 1115 cgp->cg_clustersumoff = 1116 roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); 1117 cgp->cg_clustersumoff -= sizeof(u_int32_t); 1118 cgp->cg_clusteroff = cgp->cg_clustersumoff + 1119 (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); 1120 cgp->cg_nextfreeoff = cgp->cg_clusteroff + 1121 howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); 1122 } 1123 cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); 1124 cgdirty(cgbp); 1125 } 1126 1127 /* 1128 * allocate a data block with the specified number of fragments 1129 */ 1130 ufs2_daddr_t 1131 allocblk(long startcg, long frags, 1132 ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) 1133 { 1134 ufs2_daddr_t blkno, newblk; 1135 1136 if (sujrecovery && checkblkavail == std_checkblkavail) { 1137 pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); 1138 return (0); 1139 } 1140 if (frags <= 0 || frags > sblock.fs_frag) 1141 return (0); 1142 for (blkno = MAX(cgdata(&sblock, startcg), 0); 1143 blkno < maxfsblock - sblock.fs_frag; 1144 blkno += sblock.fs_frag) { 1145 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1146 continue; 1147 if (newblk > 0) 1148 return (newblk); 1149 if (newblk < 0) 1150 blkno = -newblk; 1151 } 1152 for (blkno = MAX(cgdata(&sblock, 0), 0); 1153 blkno < cgbase(&sblock, startcg) - sblock.fs_frag; 1154 blkno += sblock.fs_frag) { 1155 if ((newblk = (*checkblkavail)(blkno, frags)) == 0) 1156 continue; 1157 if (newblk > 0) 1158 return (newblk); 1159 if (newblk < 0) 1160 blkno = -newblk; 1161 } 1162 return (0); 1163 } 1164 1165 ufs2_daddr_t 1166 std_checkblkavail(ufs2_daddr_t blkno, long frags) 1167 { 1168 struct bufarea *cgbp; 1169 struct cg *cgp; 1170 ufs2_daddr_t j, k, baseblk; 1171 long cg; 1172 1173 if ((u_int64_t)blkno > sblock.fs_size) 1174 return (0); 1175 for (j = 0; j <= sblock.fs_frag - frags; j++) { 1176 if (testbmap(blkno + j)) 1177 continue; 1178 for (k = 1; k < frags; k++) 1179 if (testbmap(blkno + j + k)) 1180 break; 1181 if (k < frags) { 1182 j += k; 1183 continue; 1184 } 1185 cg = dtog(&sblock, blkno + j); 1186 cgbp = cglookup(cg); 1187 cgp = cgbp->b_un.b_cg; 1188 if (!check_cgmagic(cg, cgbp)) 1189 return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); 1190 baseblk = dtogd(&sblock, blkno + j); 1191 for (k = 0; k < frags; k++) { 1192 setbmap(blkno + j + k); 1193 clrbit(cg_blksfree(cgp), baseblk + k); 1194 } 1195 n_blks += frags; 1196 if (frags == sblock.fs_frag) 1197 cgp->cg_cs.cs_nbfree--; 1198 else 1199 cgp->cg_cs.cs_nffree -= frags; 1200 cgdirty(cgbp); 1201 return (blkno + j); 1202 } 1203 return (0); 1204 } 1205 1206 /* 1207 * Check whether a file size is within the limits for the filesystem. 1208 * Return 1 when valid and 0 when too big. 1209 * 1210 * This should match the file size limit in ffs_mountfs(). 1211 */ 1212 int 1213 chkfilesize(mode_t mode, u_int64_t filesize) 1214 { 1215 u_int64_t kernmaxfilesize; 1216 1217 if (sblock.fs_magic == FS_UFS1_MAGIC) 1218 kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; 1219 else 1220 kernmaxfilesize = sblock.fs_maxfilesize; 1221 if (filesize > kernmaxfilesize || 1222 filesize > sblock.fs_maxfilesize || 1223 (mode == IFDIR && filesize > MAXDIRSIZE)) { 1224 if (debug) 1225 printf("bad file size %ju:", (uintmax_t)filesize); 1226 return (0); 1227 } 1228 return (1); 1229 } 1230 1231 /* 1232 * Slow down IO so as to leave some disk bandwidth for other processes 1233 */ 1234 void 1235 slowio_start() 1236 { 1237 1238 /* Delay one in every 8 operations */ 1239 slowio_pollcnt = (slowio_pollcnt + 1) & 7; 1240 if (slowio_pollcnt == 0) { 1241 gettimeofday(&slowio_starttime, NULL); 1242 } 1243 } 1244 1245 void 1246 slowio_end() 1247 { 1248 struct timeval tv; 1249 int delay_usec; 1250 1251 if (slowio_pollcnt != 0) 1252 return; 1253 1254 /* Update the slowdown interval. */ 1255 gettimeofday(&tv, NULL); 1256 delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + 1257 (tv.tv_usec - slowio_starttime.tv_usec); 1258 if (delay_usec < 64) 1259 delay_usec = 64; 1260 if (delay_usec > 2500000) 1261 delay_usec = 2500000; 1262 slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; 1263 /* delay by 8 times the average IO delay */ 1264 if (slowio_delay_usec > 64) 1265 usleep(slowio_delay_usec * 8); 1266 } 1267 1268 /* 1269 * Find a pathname 1270 */ 1271 void 1272 getpathname(char *namebuf, ino_t curdir, ino_t ino) 1273 { 1274 int len; 1275 char *cp; 1276 struct inode ip; 1277 struct inodesc idesc; 1278 static int busy = 0; 1279 1280 if (curdir == ino && ino == UFS_ROOTINO) { 1281 (void)strcpy(namebuf, "/"); 1282 return; 1283 } 1284 if (busy || !INO_IS_DVALID(curdir)) { 1285 (void)strcpy(namebuf, "?"); 1286 return; 1287 } 1288 busy = 1; 1289 memset(&idesc, 0, sizeof(struct inodesc)); 1290 idesc.id_type = DATA; 1291 idesc.id_fix = IGNORE; 1292 cp = &namebuf[MAXPATHLEN - 1]; 1293 *cp = '\0'; 1294 if (curdir != ino) { 1295 idesc.id_parent = curdir; 1296 goto namelookup; 1297 } 1298 while (ino != UFS_ROOTINO) { 1299 idesc.id_number = ino; 1300 idesc.id_func = findino; 1301 idesc.id_name = strdup(".."); 1302 ginode(ino, &ip); 1303 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1304 irelse(&ip); 1305 free(idesc.id_name); 1306 break; 1307 } 1308 irelse(&ip); 1309 free(idesc.id_name); 1310 namelookup: 1311 idesc.id_number = idesc.id_parent; 1312 idesc.id_parent = ino; 1313 idesc.id_func = findname; 1314 idesc.id_name = namebuf; 1315 ginode(idesc.id_number, &ip); 1316 if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { 1317 irelse(&ip); 1318 break; 1319 } 1320 irelse(&ip); 1321 len = strlen(namebuf); 1322 cp -= len; 1323 memmove(cp, namebuf, (size_t)len); 1324 *--cp = '/'; 1325 if (cp < &namebuf[UFS_MAXNAMLEN]) 1326 break; 1327 ino = idesc.id_number; 1328 } 1329 busy = 0; 1330 if (ino != UFS_ROOTINO) 1331 *--cp = '?'; 1332 memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); 1333 } 1334 1335 void 1336 catch(int sig __unused) 1337 { 1338 1339 ckfini(0); 1340 exit(12); 1341 } 1342 1343 /* 1344 * When preening, allow a single quit to signal 1345 * a special exit after file system checks complete 1346 * so that reboot sequence may be interrupted. 1347 */ 1348 void 1349 catchquit(int sig __unused) 1350 { 1351 printf("returning to single-user after file system check\n"); 1352 returntosingle = 1; 1353 (void)signal(SIGQUIT, SIG_DFL); 1354 } 1355 1356 /* 1357 * determine whether an inode should be fixed. 1358 */ 1359 int 1360 dofix(struct inodesc *idesc, const char *msg) 1361 { 1362 1363 switch (idesc->id_fix) { 1364 1365 case DONTKNOW: 1366 if (idesc->id_type == DATA) 1367 direrror(idesc->id_number, msg); 1368 else 1369 pwarn("%s", msg); 1370 if (preen) { 1371 printf(" (SALVAGED)\n"); 1372 idesc->id_fix = FIX; 1373 return (ALTERED); 1374 } 1375 if (reply("SALVAGE") == 0) { 1376 idesc->id_fix = NOFIX; 1377 return (0); 1378 } 1379 idesc->id_fix = FIX; 1380 return (ALTERED); 1381 1382 case FIX: 1383 return (ALTERED); 1384 1385 case NOFIX: 1386 case IGNORE: 1387 return (0); 1388 1389 default: 1390 errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); 1391 } 1392 /* NOTREACHED */ 1393 return (0); 1394 } 1395 1396 #include <stdarg.h> 1397 1398 /* 1399 * Print details about a buffer. 1400 */ 1401 void 1402 prtbuf(struct bufarea *bp, const char *fmt, ...) 1403 { 1404 va_list ap; 1405 va_start(ap, fmt); 1406 if (preen) 1407 (void)fprintf(stdout, "%s: ", cdevname); 1408 (void)vfprintf(stdout, fmt, ap); 1409 va_end(ap); 1410 printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " 1411 "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, 1412 bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", 1413 (intmax_t) bp->b_index); 1414 } 1415 1416 /* 1417 * An unexpected inconsistency occurred. 1418 * Die if preening or file system is running with soft dependency protocol, 1419 * otherwise just print message and continue. 1420 */ 1421 void 1422 pfatal(const char *fmt, ...) 1423 { 1424 va_list ap; 1425 va_start(ap, fmt); 1426 if (!preen) { 1427 (void)vfprintf(stdout, fmt, ap); 1428 va_end(ap); 1429 if (usedsoftdep) 1430 (void)fprintf(stdout, 1431 "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); 1432 /* 1433 * Force foreground fsck to clean up inconsistency. 1434 */ 1435 if (bkgrdflag) { 1436 cmd.value = FS_NEEDSFSCK; 1437 cmd.size = 1; 1438 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1439 &cmd, sizeof cmd) == -1) 1440 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1441 fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); 1442 ckfini(0); 1443 exit(EEXIT); 1444 } 1445 return; 1446 } 1447 if (cdevname == NULL) 1448 cdevname = strdup("fsck"); 1449 (void)fprintf(stdout, "%s: ", cdevname); 1450 (void)vfprintf(stdout, fmt, ap); 1451 (void)fprintf(stdout, 1452 "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", 1453 cdevname, usedsoftdep ? " SOFT UPDATE " : " "); 1454 /* 1455 * Force foreground fsck to clean up inconsistency. 1456 */ 1457 if (bkgrdflag) { 1458 cmd.value = FS_NEEDSFSCK; 1459 cmd.size = 1; 1460 if (sysctlbyname("vfs.ffs.setflags", 0, 0, 1461 &cmd, sizeof cmd) == -1) 1462 pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); 1463 } 1464 ckfini(0); 1465 exit(EEXIT); 1466 } 1467 1468 /* 1469 * Pwarn just prints a message when not preening or running soft dependency 1470 * protocol, or a warning (preceded by filename) when preening. 1471 */ 1472 void 1473 pwarn(const char *fmt, ...) 1474 { 1475 va_list ap; 1476 va_start(ap, fmt); 1477 if (preen) 1478 (void)fprintf(stdout, "%s: ", cdevname); 1479 (void)vfprintf(stdout, fmt, ap); 1480 va_end(ap); 1481 } 1482 1483 /* 1484 * Stub for routines from kernel. 1485 */ 1486 void 1487 panic(const char *fmt, ...) 1488 { 1489 va_list ap; 1490 va_start(ap, fmt); 1491 pfatal("INTERNAL INCONSISTENCY:"); 1492 (void)vfprintf(stdout, fmt, ap); 1493 va_end(ap); 1494 exit(EEXIT); 1495 } 1496