1 /* 2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016 by Delphix. All rights reserved. 4 */ 5 6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms are permitted 14 * provided that: (1) source distributions retain this entire copyright 15 * notice and comment, and (2) distributions including binaries display 16 * the following acknowledgement: ``This product includes software 17 * developed by the University of California, Berkeley and its contributors'' 18 * in the documentation or other materials provided with the distribution 19 * and in all advertising materials mentioning features or use of this 20 * software. Neither the name of the University nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <unistd.h> 31 #include <stdarg.h> 32 #include <libadm.h> 33 #include <note.h> 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/mntent.h> 37 #include <sys/filio.h> 38 #include <sys/fs/ufs_fs.h> 39 #include <sys/vnode.h> 40 #include <sys/fs/ufs_acl.h> 41 #include <sys/fs/ufs_inode.h> 42 #include <sys/fs/ufs_log.h> 43 #define _KERNEL 44 #include <sys/fs/ufs_fsdir.h> 45 #undef _KERNEL 46 #include <sys/mnttab.h> 47 #include <sys/types.h> 48 #include <sys/stat.h> 49 #include <fcntl.h> 50 #include <signal.h> 51 #include <string.h> 52 #include <ctype.h> 53 #include <sys/vfstab.h> 54 #include <sys/lockfs.h> 55 #include <errno.h> 56 #include <sys/cmn_err.h> 57 #include <sys/dkio.h> 58 #include <sys/vtoc.h> 59 #include <sys/efi_partition.h> 60 #include <fslib.h> 61 #include <inttypes.h> 62 #include "fsck.h" 63 64 struct bufarea *pbp; 65 struct bufarea *pdirbp; 66 caddr_t mount_point = NULL; 67 static struct bufarea bufhead; /* head of list of other blks in filesys */ 68 char *elock_combuf; 69 char *elock_mountp; 70 static struct lockfs *lfp; /* current lockfs status */ 71 72 static int64_t diskreads, totalreads; /* Disk cache statistics */ 73 74 static int log_checksum(int32_t *, int32_t *, int); 75 static void vdirerror(fsck_ino_t, caddr_t, va_list); 76 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t); 77 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t); 78 static void vpwarn(caddr_t, va_list); 79 static int getaline(FILE *, caddr_t, int); 80 static struct bufarea *alloc_bufarea(void); 81 static void rwerror(caddr_t, diskaddr_t, int rval); 82 static void debugclean(void); 83 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t); 84 static void freelogblk(daddr32_t); 85 static void verrexit(caddr_t, va_list); 86 static void vpfatal(caddr_t, va_list); 87 static diskaddr_t get_device_size(int, caddr_t); 88 static diskaddr_t brute_force_get_device_size(int); 89 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *, 90 daddr32_t *, daddr32_t *, daddr32_t *); 91 92 int 93 ftypeok(struct dinode *dp) 94 { 95 switch (dp->di_mode & IFMT) { 96 97 case IFDIR: 98 case IFREG: 99 case IFBLK: 100 case IFCHR: 101 case IFLNK: 102 case IFSOCK: 103 case IFIFO: 104 case IFSHAD: 105 case IFATTRDIR: 106 return (1); 107 108 default: 109 if (debug) 110 (void) printf("bad file type 0%o\n", dp->di_mode); 111 return (0); 112 } 113 } 114 115 int 116 acltypeok(struct dinode *dp) 117 { 118 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT)) 119 return (1); 120 121 if (debug) 122 (void) printf("bad file type for acl I=%d: 0%o\n", 123 dp->di_shadow, dp->di_mode); 124 return (0); 125 } 126 127 NOTE(PRINTFLIKE(1)) 128 int 129 reply(caddr_t fmt, ...) 130 { 131 va_list ap; 132 char line[80]; 133 134 if (preen) 135 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode"); 136 137 if (mflag) { 138 /* 139 * We don't know what's going on, so don't potentially 140 * make things worse by having errexit() write stuff 141 * out to disk. 142 */ 143 (void) printf( 144 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 145 devname); 146 exit(EXERRFATAL); 147 } 148 149 va_start(ap, fmt); 150 (void) putchar('\n'); 151 (void) vprintf(fmt, ap); 152 (void) putchar('?'); 153 (void) putchar(' '); 154 va_end(ap); 155 156 if (nflag || fswritefd < 0) { 157 (void) printf(" no\n\n"); 158 return (0); 159 } 160 if (yflag) { 161 (void) printf(" yes\n\n"); 162 return (1); 163 } 164 (void) fflush(stdout); 165 if (getaline(stdin, line, sizeof (line)) == EOF) 166 errexit("\n"); 167 (void) printf("\n"); 168 if (line[0] == 'y' || line[0] == 'Y') { 169 return (1); 170 } else { 171 return (0); 172 } 173 } 174 175 int 176 getaline(FILE *fp, caddr_t loc, int maxlen) 177 { 178 int n; 179 caddr_t p, lastloc; 180 181 p = loc; 182 lastloc = &p[maxlen-1]; 183 while ((n = getc(fp)) != '\n') { 184 if (n == EOF) 185 return (EOF); 186 if (!isspace(n) && p < lastloc) 187 *p++ = (char)n; 188 } 189 *p = '\0'; 190 /* LINTED pointer difference won't overflow */ 191 return (p - loc); 192 } 193 194 /* 195 * Malloc buffers and set up cache. 196 */ 197 void 198 bufinit(void) 199 { 200 struct bufarea *bp; 201 int bufcnt, i; 202 caddr_t bufp; 203 204 bufp = malloc((size_t)sblock.fs_bsize); 205 if (bufp == NULL) 206 goto nomem; 207 initbarea(&cgblk); 208 cgblk.b_un.b_buf = bufp; 209 bufhead.b_next = bufhead.b_prev = &bufhead; 210 bufcnt = MAXBUFSPACE / sblock.fs_bsize; 211 if (bufcnt < MINBUFS) 212 bufcnt = MINBUFS; 213 for (i = 0; i < bufcnt; i++) { 214 bp = (struct bufarea *)malloc(sizeof (struct bufarea)); 215 if (bp == NULL) { 216 if (i >= MINBUFS) 217 goto noalloc; 218 goto nomem; 219 } 220 221 bufp = malloc((size_t)sblock.fs_bsize); 222 if (bufp == NULL) { 223 free((void *)bp); 224 if (i >= MINBUFS) 225 goto noalloc; 226 goto nomem; 227 } 228 initbarea(bp); 229 bp->b_un.b_buf = bufp; 230 bp->b_prev = &bufhead; 231 bp->b_next = bufhead.b_next; 232 bufhead.b_next->b_prev = bp; 233 bufhead.b_next = bp; 234 } 235 noalloc: 236 bufhead.b_size = i; /* save number of buffers */ 237 pbp = pdirbp = NULL; 238 return; 239 240 nomem: 241 errexit("cannot allocate buffer pool\n"); 242 /* NOTREACHED */ 243 } 244 245 /* 246 * Undo a bufinit(). 247 */ 248 void 249 unbufinit(void) 250 { 251 int cnt; 252 struct bufarea *bp, *nbp; 253 254 cnt = 0; 255 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) { 256 cnt++; 257 flush(fswritefd, bp); 258 nbp = bp->b_prev; 259 /* 260 * We're discarding the entire chain, so this isn't 261 * technically necessary. However, it doesn't hurt 262 * and lint's data flow analysis is much happier 263 * (this prevents it from thinking there's a chance 264 * of our using memory elsewhere after it's been released). 265 */ 266 nbp->b_next = bp->b_next; 267 bp->b_next->b_prev = nbp; 268 free((void *)bp->b_un.b_buf); 269 free((void *)bp); 270 } 271 272 if (bufhead.b_size != cnt) 273 errexit("Panic: cache lost %d buffers\n", 274 bufhead.b_size - cnt); 275 } 276 277 /* 278 * Manage a cache of directory blocks. 279 */ 280 struct bufarea * 281 getdatablk(daddr32_t blkno, size_t size) 282 { 283 struct bufarea *bp; 284 285 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) 286 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 287 goto foundit; 288 } 289 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) 290 if ((bp->b_flags & B_INUSE) == 0) 291 break; 292 if (bp == &bufhead) { 293 bp = alloc_bufarea(); 294 if (bp == NULL) { 295 errexit("deadlocked buffer pool\n"); 296 /* NOTREACHED */ 297 } 298 } 299 /* 300 * We're at the same logical level as getblk(), so if there 301 * are any errors, we'll let our caller handle them. 302 */ 303 diskreads++; 304 (void) getblk(bp, blkno, size); 305 306 foundit: 307 totalreads++; 308 bp->b_cnt++; 309 /* 310 * Move the buffer to head of linked list if it isn't 311 * already there. 312 */ 313 if (bufhead.b_next != bp) { 314 bp->b_prev->b_next = bp->b_next; 315 bp->b_next->b_prev = bp->b_prev; 316 bp->b_prev = &bufhead; 317 bp->b_next = bufhead.b_next; 318 bufhead.b_next->b_prev = bp; 319 bufhead.b_next = bp; 320 } 321 bp->b_flags |= B_INUSE; 322 return (bp); 323 } 324 325 void 326 brelse(struct bufarea *bp) 327 { 328 bp->b_cnt--; 329 if (bp->b_cnt == 0) { 330 bp->b_flags &= ~B_INUSE; 331 } 332 } 333 334 struct bufarea * 335 getblk(struct bufarea *bp, daddr32_t blk, size_t size) 336 { 337 diskaddr_t dblk; 338 339 dblk = fsbtodb(&sblock, blk); 340 if (bp->b_bno == dblk) 341 return (bp); 342 flush(fswritefd, bp); 343 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size); 344 bp->b_bno = dblk; 345 bp->b_size = size; 346 return (bp); 347 } 348 349 void 350 flush(int fd, struct bufarea *bp) 351 { 352 int i, j; 353 caddr_t sip; 354 long size; 355 356 if (!bp->b_dirty) 357 return; 358 359 /* 360 * It's not our buf, so if there are errors, let whoever 361 * acquired it deal with the actual problem. 362 */ 363 if (bp->b_errs != 0) 364 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno); 365 bp->b_dirty = 0; 366 bp->b_errs = 0; 367 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); 368 if (bp != &sblk) { 369 return; 370 } 371 372 /* 373 * We're flushing the superblock, so make sure all the 374 * ancillary bits go out as well. 375 */ 376 sip = (caddr_t)sblock.fs_u.fs_csp; 377 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { 378 size = sblock.fs_cssize - i < sblock.fs_bsize ? 379 sblock.fs_cssize - i : sblock.fs_bsize; 380 bwrite(fswritefd, sip, 381 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), 382 size); 383 sip += size; 384 } 385 } 386 387 static void 388 rwerror(caddr_t mesg, diskaddr_t blk, int rval) 389 { 390 int olderr = errno; 391 392 if (!preen) 393 (void) printf("\n"); 394 395 if (rval == -1) 396 pfatal("CANNOT %s: DISK BLOCK %lld: %s", 397 mesg, blk, strerror(olderr)); 398 else 399 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk); 400 401 if (reply("CONTINUE") == 0) { 402 exitstat = EXERRFATAL; 403 errexit("Program terminated\n"); 404 } 405 } 406 407 void 408 ckfini(void) 409 { 410 int64_t percentage; 411 412 if (fswritefd < 0) 413 return; 414 415 flush(fswritefd, &sblk); 416 /* 417 * Were we using a backup superblock? 418 */ 419 if (havesb && sblk.b_bno != SBOFF / dev_bsize) { 420 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) { 421 sblk.b_bno = SBOFF / dev_bsize; 422 sbdirty(); 423 flush(fswritefd, &sblk); 424 } 425 } 426 flush(fswritefd, &cgblk); 427 if (cgblk.b_un.b_buf != NULL) { 428 free((void *)cgblk.b_un.b_buf); 429 cgblk.b_un.b_buf = NULL; 430 } 431 unbufinit(); 432 pbp = NULL; 433 pdirbp = NULL; 434 if (debug) { 435 /* 436 * Note that we only count cache-related reads. 437 * Anything that called fsck_bread() or getblk() 438 * directly are explicitly not cached, so they're not 439 * included here. 440 */ 441 if (totalreads != 0) 442 percentage = diskreads * 100 / totalreads; 443 else 444 percentage = 0; 445 446 (void) printf("cache missed %lld of %lld reads (%lld%%)\n", 447 (longlong_t)diskreads, (longlong_t)totalreads, 448 (longlong_t)percentage); 449 } 450 451 (void) close(fsreadfd); 452 (void) close(fswritefd); 453 fsreadfd = -1; 454 fswritefd = -1; 455 } 456 457 int 458 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size) 459 { 460 caddr_t cp; 461 int i; 462 int errs; 463 offset_t offset = ldbtob(blk); 464 offset_t addr; 465 466 /* 467 * In our universe, nothing exists before the superblock, so 468 * just pretend it's always zeros. This is the complement of 469 * bwrite()'s ignoring write requests into that space. 470 */ 471 if (blk < SBLOCK) { 472 if (debug) 473 (void) printf( 474 "WARNING: fsck_bread() passed blkno < %d (%lld)\n", 475 SBLOCK, (longlong_t)blk); 476 (void) memset(buf, 0, (size_t)size); 477 return (1); 478 } 479 480 if (llseek(fd, offset, SEEK_SET) < 0) { 481 rwerror("SEEK", blk, -1); 482 } 483 484 if ((i = read(fd, buf, size)) == size) { 485 return (0); 486 } 487 rwerror("READ", blk, i); 488 if (llseek(fd, offset, SEEK_SET) < 0) { 489 rwerror("SEEK", blk, -1); 490 } 491 errs = 0; 492 (void) memset(buf, 0, (size_t)size); 493 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:"); 494 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 495 addr = ldbtob(blk + i); 496 if (llseek(fd, addr, SEEK_SET) < 0 || 497 read(fd, cp, (int)secsize) < 0) { 498 iscorrupt = 1; 499 (void) printf(" %llu", blk + (u_longlong_t)i); 500 errs++; 501 } 502 } 503 (void) printf("\n"); 504 return (errs); 505 } 506 507 void 508 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size) 509 { 510 int i; 511 int n; 512 caddr_t cp; 513 offset_t offset = ldbtob(blk); 514 offset_t addr; 515 516 if (fd < 0) 517 return; 518 if (blk < SBLOCK) { 519 if (debug) 520 (void) printf( 521 "WARNING: Attempt to write illegal blkno %lld on %s\n", 522 (longlong_t)blk, devname); 523 return; 524 } 525 if (llseek(fd, offset, SEEK_SET) < 0) { 526 rwerror("SEEK", blk, -1); 527 } 528 if ((i = write(fd, buf, (int)size)) == size) { 529 fsmodified = 1; 530 return; 531 } 532 rwerror("WRITE", blk, i); 533 if (llseek(fd, offset, SEEK_SET) < 0) { 534 rwerror("SEEK", blk, -1); 535 } 536 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 537 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 538 n = 0; 539 addr = ldbtob(blk + i); 540 if (llseek(fd, addr, SEEK_SET) < 0 || 541 (n = write(fd, cp, DEV_BSIZE)) < 0) { 542 iscorrupt = 1; 543 (void) printf(" %llu", blk + (u_longlong_t)i); 544 } else if (n > 0) { 545 fsmodified = 1; 546 } 547 548 } 549 (void) printf("\n"); 550 } 551 552 /* 553 * Allocates the specified number of contiguous fragments. 554 */ 555 daddr32_t 556 allocblk(int wantedfrags) 557 { 558 int block, leadfrag, tailfrag; 559 daddr32_t selected; 560 size_t size; 561 struct bufarea *bp; 562 563 /* 564 * It's arguable whether we should just fail, or instead 565 * error out here. Since we should only ever be asked for 566 * a single fragment or an entire block (i.e., sblock.fs_frag), 567 * we'll fail out because anything else means somebody 568 * changed code without considering all of the ramifications. 569 */ 570 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) { 571 exitstat = EXERRFATAL; 572 errexit("allocblk() asked for %d frags. " 573 "Legal range is 1 to %d", 574 wantedfrags, sblock.fs_frag); 575 } 576 577 /* 578 * For each filesystem block, look at every possible starting 579 * offset within the block such that we can get the number of 580 * contiguous fragments that we need. This is a drastically 581 * simplified version of the kernel's mapsearch() and alloc*(). 582 * It's also correspondingly slower. 583 */ 584 for (block = 0; block < maxfsblock - sblock.fs_frag; 585 block += sblock.fs_frag) { 586 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags; 587 leadfrag++) { 588 /* 589 * Is first fragment of candidate run available? 590 */ 591 if (testbmap(block + leadfrag)) 592 continue; 593 /* 594 * Are the rest of them available? 595 */ 596 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++) 597 if (testbmap(block + leadfrag + tailfrag)) 598 break; 599 if (tailfrag < wantedfrags) { 600 /* 601 * No, skip the known-unusable run. 602 */ 603 leadfrag += tailfrag; 604 continue; 605 } 606 /* 607 * Found what we need, so claim them. 608 */ 609 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++) 610 setbmap(block + leadfrag + tailfrag); 611 n_blks += wantedfrags; 612 size = wantedfrags * sblock.fs_fsize; 613 selected = block + leadfrag; 614 bp = getdatablk(selected, size); 615 (void) memset((void *)bp->b_un.b_buf, 0, size); 616 dirty(bp); 617 brelse(bp); 618 if (debug) 619 (void) printf( 620 "allocblk: selected %d (in block %d), frags %d, size %d\n", 621 selected, selected % sblock.fs_bsize, 622 wantedfrags, (int)size); 623 return (selected); 624 } 625 } 626 return (0); 627 } 628 629 /* 630 * Free a previously allocated block 631 */ 632 void 633 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags) 634 { 635 struct inodesc idesc; 636 637 if (debug) 638 (void) printf("debug: freeing %d fragments starting at %d\n", 639 frags, blkno); 640 641 init_inodesc(&idesc); 642 643 idesc.id_number = ino; 644 idesc.id_blkno = blkno; 645 idesc.id_numfrags = frags; 646 idesc.id_truncto = -1; 647 648 /* 649 * Nothing in the return status has any relevance to how 650 * we're using pass4check(), so just ignore it. 651 */ 652 (void) pass4check(&idesc); 653 } 654 655 /* 656 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes 657 * that the given buffer is at least MAXPATHLEN + 1 characters. 658 */ 659 void 660 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino) 661 { 662 int len; 663 caddr_t cp; 664 struct dinode *dp; 665 struct inodesc idesc; 666 struct inoinfo *inp; 667 668 if (debug) 669 (void) printf("debug: getpathname(curdir %d, ino %d)\n", 670 curdir, ino); 671 672 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) { 673 (void) strcpy(namebuf, "?"); 674 return; 675 } 676 677 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) { 678 (void) strcpy(namebuf, "/"); 679 return; 680 } 681 682 init_inodesc(&idesc); 683 idesc.id_type = DATA; 684 cp = &namebuf[MAXPATHLEN - 1]; 685 *cp = '\0'; 686 687 /* 688 * In the case of extended attributes, our 689 * parent won't necessarily be a directory, so just 690 * return what we've found with a prefix indicating 691 * that it's an XATTR. Presumably our caller will 692 * know what's going on and do something useful, like 693 * work out the path of the parent and then combine 694 * the two names. 695 * 696 * Can't use strcpy(), etc, because we've probably 697 * already got some name information in the buffer and 698 * the usual trailing \0 would lose it. 699 */ 700 dp = ginode(curdir); 701 if ((dp->di_mode & IFMT) == IFATTRDIR) { 702 idesc.id_number = curdir; 703 idesc.id_parent = ino; 704 idesc.id_func = findname; 705 idesc.id_name = namebuf; 706 idesc.id_fix = NOFIX; 707 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) { 708 *cp-- = '?'; 709 } 710 711 len = sizeof (XATTR_DIR_NAME) - 1; 712 cp -= len; 713 (void) memmove(cp, XATTR_DIR_NAME, len); 714 goto attrname; 715 } 716 717 /* 718 * If curdir == ino, need to get a handle on .. so we 719 * can search it for ino's name. Otherwise, just search 720 * the given directory for ino. Repeat until out of space 721 * or a full path has been built. 722 */ 723 if (curdir != ino) { 724 idesc.id_parent = curdir; 725 goto namelookup; 726 } 727 while (ino != UFSROOTINO && ino != 0) { 728 idesc.id_number = ino; 729 idesc.id_func = findino; 730 idesc.id_name = ".."; 731 idesc.id_fix = NOFIX; 732 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) { 733 inp = getinoinfo(ino); 734 if ((inp == NULL) || (inp->i_parent == 0)) { 735 break; 736 } 737 idesc.id_parent = inp->i_parent; 738 } 739 740 /* 741 * To get this far, id_parent must have the inode 742 * number for `..' in it. By definition, that's got 743 * to be a directory, so search it for the inode of 744 * interest. 745 */ 746 namelookup: 747 idesc.id_number = idesc.id_parent; 748 idesc.id_parent = ino; 749 idesc.id_func = findname; 750 idesc.id_name = namebuf; 751 idesc.id_fix = NOFIX; 752 if ((ckinode(ginode(idesc.id_number), 753 &idesc, CKI_TRAVERSE) & FOUND) == 0) { 754 break; 755 } 756 /* 757 * Prepend to what we've accumulated so far. If 758 * there's not enough room for even one more path element 759 * (of the worst-case length), then bail out. 760 */ 761 len = strlen(namebuf); 762 cp -= len; 763 if (cp < &namebuf[MAXNAMLEN]) 764 break; 765 (void) memmove(cp, namebuf, len); 766 *--cp = '/'; 767 768 /* 769 * Corner case for a looped-to-itself directory. 770 */ 771 if (ino == idesc.id_number) 772 break; 773 774 /* 775 * Climb one level of the hierarchy. In other words, 776 * the current .. becomes the inode to search for and 777 * its parent becomes the directory to search in. 778 */ 779 ino = idesc.id_number; 780 } 781 782 /* 783 * If we hit a discontinuity in the hierarchy, indicate it by 784 * prefixing the path so far with `?'. Otherwise, the first 785 * character will be `/' as a side-effect of the *--cp above. 786 * 787 * The special case is to handle the situation where we're 788 * trying to look something up in UFSROOTINO, but didn't find 789 * it. 790 */ 791 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) { 792 if (cp > namebuf) 793 cp--; 794 *cp = '?'; 795 } 796 797 /* 798 * The invariants being used for buffer integrity are: 799 * - namebuf[] is terminated with \0 before anything else 800 * - cp is always <= the last element of namebuf[] 801 * - the new path element is always stored at the 802 * beginning of namebuf[], and is no more than MAXNAMLEN-1 803 * characters 804 * - cp is is decremented by the number of characters in 805 * the new path element 806 * - if, after the above accounting for the new element's 807 * size, there is no longer enough room at the beginning of 808 * namebuf[] for a full-sized path element and a slash, 809 * terminate the loop. cp is in the range 810 * &namebuf[0]..&namebuf[MAXNAMLEN - 1] 811 */ 812 attrname: 813 /* LINTED per the above discussion */ 814 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp); 815 } 816 817 /* ARGSUSED */ 818 void 819 catch(int dummy) 820 { 821 ckfini(); 822 exit(EXSIGNAL); 823 } 824 825 /* 826 * When preening, allow a single quit to signal 827 * a special exit after filesystem checks complete 828 * so that reboot sequence may be interrupted. 829 */ 830 /* ARGSUSED */ 831 void 832 catchquit(int dummy) 833 { 834 (void) printf("returning to single-user after filesystem check\n"); 835 interrupted = 1; 836 (void) signal(SIGQUIT, SIG_DFL); 837 } 838 839 840 /* 841 * determine whether an inode should be fixed. 842 */ 843 NOTE(PRINTFLIKE(2)) 844 int 845 dofix(struct inodesc *idesc, caddr_t msg, ...) 846 { 847 int rval = 0; 848 va_list ap; 849 850 va_start(ap, msg); 851 852 switch (idesc->id_fix) { 853 854 case DONTKNOW: 855 if (idesc->id_type == DATA) 856 vdirerror(idesc->id_number, msg, ap); 857 else 858 vpwarn(msg, ap); 859 if (preen) { 860 idesc->id_fix = FIX; 861 rval = ALTERED; 862 break; 863 } 864 if (reply("SALVAGE") == 0) { 865 idesc->id_fix = NOFIX; 866 break; 867 } 868 idesc->id_fix = FIX; 869 rval = ALTERED; 870 break; 871 872 case FIX: 873 rval = ALTERED; 874 break; 875 876 case NOFIX: 877 break; 878 879 default: 880 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix); 881 } 882 883 va_end(ap); 884 return (rval); 885 } 886 887 NOTE(PRINTFLIKE(1)) 888 void 889 errexit(caddr_t fmt, ...) 890 { 891 va_list ap; 892 893 va_start(ap, fmt); 894 verrexit(fmt, ap); 895 /* NOTREACHED */ 896 } 897 898 NOTE(PRINTFLIKE(1)) 899 static void 900 verrexit(caddr_t fmt, va_list ap) 901 { 902 static int recursing = 0; 903 904 if (!recursing) { 905 recursing = 1; 906 if (errorlocked || iscorrupt) { 907 if (havesb && fswritefd >= 0) { 908 sblock.fs_clean = FSBAD; 909 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 910 sblock.fs_state = -sblock.fs_state; 911 sbdirty(); 912 write_altsb(fswritefd); 913 flush(fswritefd, &sblk); 914 } 915 } 916 ckfini(); 917 recursing = 0; 918 } 919 (void) vprintf(fmt, ap); 920 if (fmt[strlen(fmt) - 1] != '\n') 921 (void) putchar('\n'); 922 exit((exitstat != 0) ? exitstat : EXERRFATAL); 923 } 924 925 /* 926 * An unexpected inconsistency occured. 927 * Die if preening, otherwise just print message and continue. 928 */ 929 NOTE(PRINTFLIKE(1)) 930 void 931 pfatal(caddr_t fmt, ...) 932 { 933 va_list ap; 934 935 va_start(ap, fmt); 936 vpfatal(fmt, ap); 937 va_end(ap); 938 } 939 940 NOTE(PRINTFLIKE(1)) 941 static void 942 vpfatal(caddr_t fmt, va_list ap) 943 { 944 if (preen) { 945 if (*fmt != '\0') { 946 (void) printf("%s: ", devname); 947 (void) vprintf(fmt, ap); 948 (void) printf("\n"); 949 } 950 (void) printf( 951 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 952 devname); 953 if (havesb && fswritefd >= 0) { 954 sblock.fs_clean = FSBAD; 955 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time); 956 sbdirty(); 957 flush(fswritefd, &sblk); 958 } 959 /* 960 * We're exiting, it doesn't really matter that our 961 * caller doesn't get to call va_end(). 962 */ 963 if (exitstat == 0) 964 exitstat = EXFNDERRS; 965 exit(exitstat); 966 } 967 if (*fmt != '\0') { 968 (void) vprintf(fmt, ap); 969 } 970 } 971 972 /* 973 * Pwarn just prints a message when not preening, 974 * or a warning (preceded by filename) when preening. 975 */ 976 NOTE(PRINTFLIKE(1)) 977 void 978 pwarn(caddr_t fmt, ...) 979 { 980 va_list ap; 981 982 va_start(ap, fmt); 983 vpwarn(fmt, ap); 984 va_end(ap); 985 } 986 987 NOTE(PRINTFLIKE(1)) 988 static void 989 vpwarn(caddr_t fmt, va_list ap) 990 { 991 if (*fmt != '\0') { 992 if (preen) 993 (void) printf("%s: ", devname); 994 (void) vprintf(fmt, ap); 995 } 996 } 997 998 /* 999 * Like sprintf(), except the buffer is dynamically allocated 1000 * and returned, instead of being passed in. A pointer to the 1001 * buffer is stored in *RET, and FMT is the usual format string. 1002 * The number of characters in *RET (excluding the trailing \0, 1003 * to be consistent with the other *printf() routines) is returned. 1004 * 1005 * Solaris doesn't have asprintf(3C) yet, unfortunately. 1006 */ 1007 NOTE(PRINTFLIKE(2)) 1008 int 1009 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...) 1010 { 1011 int len; 1012 caddr_t buffer; 1013 va_list ap; 1014 1015 va_start(ap, fmt); 1016 len = vsnprintf(NULL, 0, fmt, ap); 1017 va_end(ap); 1018 1019 buffer = malloc((len + 1) * sizeof (char)); 1020 if (buffer == NULL) { 1021 errexit("Out of memory in asprintf\n"); 1022 /* NOTREACHED */ 1023 } 1024 1025 va_start(ap, fmt); 1026 (void) vsnprintf(buffer, len + 1, fmt, ap); 1027 va_end(ap); 1028 1029 *ret = buffer; 1030 return (len); 1031 } 1032 1033 /* 1034 * So we can take advantage of kernel routines in ufs_subr.c. 1035 */ 1036 /* PRINTFLIKE2 */ 1037 void 1038 cmn_err(int level, caddr_t fmt, ...) 1039 { 1040 va_list ap; 1041 1042 va_start(ap, fmt); 1043 if (level == CE_PANIC) { 1044 (void) printf("INTERNAL INCONSISTENCY:"); 1045 verrexit(fmt, ap); 1046 } else { 1047 (void) vprintf(fmt, ap); 1048 } 1049 va_end(ap); 1050 } 1051 1052 /* 1053 * Check to see if unraw version of name is already mounted. 1054 * Updates devstr with the device name if devstr is not NULL 1055 * and str_size is positive. 1056 */ 1057 int 1058 mounted(caddr_t name, caddr_t devstr, size_t str_size) 1059 { 1060 int found; 1061 struct mnttab *mntent; 1062 1063 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size); 1064 if (mntent == NULL) 1065 return (M_NOMNT); 1066 1067 /* 1068 * It's mounted. With or without write access? 1069 */ 1070 if (hasmntopt(mntent, MNTOPT_RO) != 0) 1071 found = M_RO; /* mounted as RO */ 1072 else 1073 found = M_RW; /* mounted as R/W */ 1074 1075 if (mount_point == NULL) { 1076 mount_point = strdup(mntent->mnt_mountp); 1077 if (mount_point == NULL) { 1078 errexit("fsck: memory allocation failure: %s", 1079 strerror(errno)); 1080 /* NOTREACHED */ 1081 } 1082 1083 if (devstr != NULL && str_size > 0) 1084 (void) strlcpy(devstr, mntent->mnt_special, str_size); 1085 } 1086 1087 return (found); 1088 } 1089 1090 /* 1091 * Check to see if name corresponds to an entry in vfstab, and that the entry 1092 * does not have option ro. 1093 */ 1094 int 1095 writable(caddr_t name) 1096 { 1097 int rw = 1; 1098 struct vfstab vfsbuf, vfskey; 1099 FILE *vfstab; 1100 1101 vfstab = fopen(VFSTAB, "r"); 1102 if (vfstab == NULL) { 1103 (void) printf("can't open %s\n", VFSTAB); 1104 return (1); 1105 } 1106 (void) memset((void *)&vfskey, 0, sizeof (vfskey)); 1107 vfsnull(&vfskey); 1108 vfskey.vfs_special = unrawname(name); 1109 vfskey.vfs_fstype = MNTTYPE_UFS; 1110 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) && 1111 (hasvfsopt(&vfsbuf, MNTOPT_RO))) { 1112 rw = 0; 1113 } 1114 (void) fclose(vfstab); 1115 return (rw); 1116 } 1117 1118 /* 1119 * debugclean 1120 */ 1121 static void 1122 debugclean(void) 1123 { 1124 if (!debug) 1125 return; 1126 1127 if ((iscorrupt == 0) && (isdirty == 0)) 1128 return; 1129 1130 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) || 1131 (sblock.fs_clean == FSLOG && islog && islogok) || 1132 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked)) 1133 return; 1134 1135 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n", 1136 sblock.fs_clean == FSSTABLE ? "stable" : 1137 sblock.fs_clean == FSLOG ? "logging" : 1138 sblock.fs_clean == FSFIX ? "being fixed" : "clean", 1139 devname); 1140 } 1141 1142 /* 1143 * updateclean 1144 * Carefully and transparently update the clean flag. 1145 * 1146 * `iscorrupt' has to be in its final state before this is called. 1147 */ 1148 int 1149 updateclean(void) 1150 { 1151 int freedlog = 0; 1152 struct bufarea cleanbuf; 1153 size_t size; 1154 ssize_t io_res; 1155 diskaddr_t bno; 1156 char fsclean; 1157 int fsreclaim; 1158 char fsflags; 1159 int flags_ok = 1; 1160 daddr32_t fslogbno; 1161 offset_t sblkoff; 1162 time_t t; 1163 1164 /* 1165 * debug stuff 1166 */ 1167 debugclean(); 1168 1169 /* 1170 * set fsclean to its appropriate value 1171 */ 1172 fslogbno = sblock.fs_logbno; 1173 fsclean = sblock.fs_clean; 1174 fsreclaim = sblock.fs_reclaim; 1175 fsflags = sblock.fs_flags; 1176 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) { 1177 fsclean = FSACTIVE; 1178 } 1179 /* 1180 * If ufs log is not okay, note that we need to clear it. 1181 */ 1182 examinelog(NULL); 1183 if (fslogbno && !(islog && islogok)) { 1184 fsclean = FSACTIVE; 1185 fslogbno = 0; 1186 } 1187 1188 /* 1189 * if necessary, update fs_clean and fs_state 1190 */ 1191 switch (fsclean) { 1192 1193 case FSACTIVE: 1194 if (!iscorrupt) { 1195 fsclean = FSSTABLE; 1196 fsreclaim = 0; 1197 } 1198 break; 1199 1200 case FSCLEAN: 1201 case FSSTABLE: 1202 if (iscorrupt) { 1203 fsclean = FSACTIVE; 1204 } else { 1205 fsreclaim = 0; 1206 } 1207 break; 1208 1209 case FSLOG: 1210 if (iscorrupt) { 1211 fsclean = FSACTIVE; 1212 } else if (!islog || fslogbno == 0) { 1213 fsclean = FSSTABLE; 1214 fsreclaim = 0; 1215 } else if (fflag) { 1216 fsreclaim = 0; 1217 } 1218 break; 1219 1220 case FSFIX: 1221 fsclean = FSBAD; 1222 if (errorlocked && !iscorrupt) { 1223 fsclean = islog ? FSLOG : FSCLEAN; 1224 } 1225 break; 1226 1227 default: 1228 if (iscorrupt) { 1229 fsclean = FSACTIVE; 1230 } else { 1231 fsclean = FSSTABLE; 1232 fsreclaim = 0; 1233 } 1234 } 1235 1236 if (largefile_count > 0) 1237 fsflags |= FSLARGEFILES; 1238 else 1239 fsflags &= ~FSLARGEFILES; 1240 1241 /* 1242 * There can be two discrepencies here. A) The superblock 1243 * shows no largefiles but we found some while scanning. 1244 * B) The superblock indicates the presence of largefiles, 1245 * but none are present. Note that if preening, the superblock 1246 * is silently corrected. 1247 */ 1248 if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) || 1249 (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES)) 1250 flags_ok = 0; 1251 1252 if (debug) 1253 (void) printf( 1254 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n", 1255 largefile_count, sblock.fs_flags, flags_ok); 1256 1257 /* 1258 * If fs is unchanged, do nothing. 1259 */ 1260 if ((!isdirty) && (flags_ok) && 1261 (fslogbno == sblock.fs_logbno) && 1262 (sblock.fs_clean == fsclean) && 1263 (sblock.fs_reclaim == fsreclaim) && 1264 (FSOKAY == (sblock.fs_state + sblock.fs_time))) { 1265 if (errorlocked) { 1266 if (!do_errorlock(LOCKFS_ULOCK)) 1267 pwarn( 1268 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n"); 1269 } 1270 return (freedlog); 1271 } 1272 1273 /* 1274 * if user allows, update superblock state 1275 */ 1276 if (debug) { 1277 (void) printf( 1278 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1279 sblock.fs_flags, sblock.fs_logbno, 1280 sblock.fs_clean, sblock.fs_reclaim, 1281 sblock.fs_state + sblock.fs_time); 1282 (void) printf( 1283 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1284 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY); 1285 } 1286 if (!isdirty && !preen && !rerun && 1287 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0)) 1288 return (freedlog); 1289 1290 (void) time(&t); 1291 sblock.fs_time = (time32_t)t; 1292 if (debug) 1293 printclean(); 1294 1295 if (sblock.fs_logbno != fslogbno) { 1296 examinelog(&freelogblk); 1297 freedlog++; 1298 } 1299 1300 sblock.fs_logbno = fslogbno; 1301 sblock.fs_clean = fsclean; 1302 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 1303 sblock.fs_reclaim = fsreclaim; 1304 sblock.fs_flags = fsflags; 1305 1306 /* 1307 * if superblock can't be written, return 1308 */ 1309 if (fswritefd < 0) 1310 return (freedlog); 1311 1312 /* 1313 * Read private copy of superblock, update clean flag, and write it. 1314 */ 1315 bno = sblk.b_bno; 1316 size = sblk.b_size; 1317 1318 sblkoff = ldbtob(bno); 1319 1320 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL) 1321 errexit("out of memory"); 1322 if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) { 1323 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1324 (longlong_t)bno, strerror(errno)); 1325 goto out; 1326 } 1327 1328 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) { 1329 report_io_prob("READ FROM", bno, size, io_res); 1330 goto out; 1331 } 1332 1333 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno; 1334 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean; 1335 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state; 1336 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time; 1337 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim; 1338 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags; 1339 1340 if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) { 1341 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1342 (longlong_t)bno, strerror(errno)); 1343 goto out; 1344 } 1345 1346 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) { 1347 report_io_prob("WRITE TO", bno, size, io_res); 1348 goto out; 1349 } 1350 1351 /* 1352 * 1208040 1353 * If we had to use -b to grab an alternate superblock, then we 1354 * likely had to do so because of unacceptable differences between 1355 * the main and alternate superblocks. So, we had better update 1356 * the alternate superblock as well, or we'll just fail again 1357 * the next time we attempt to run fsck! 1358 */ 1359 if (bflag != 0) { 1360 write_altsb(fswritefd); 1361 } 1362 1363 if (errorlocked) { 1364 if (!do_errorlock(LOCKFS_ULOCK)) 1365 pwarn( 1366 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n"); 1367 } 1368 1369 out: 1370 if (cleanbuf.b_un.b_buf != NULL) { 1371 free((void *)cleanbuf.b_un.b_buf); 1372 } 1373 1374 return (freedlog); 1375 } 1376 1377 static void 1378 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure) 1379 { 1380 if (failure < 0) 1381 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n", 1382 what, (int)bno, strerror(errno)); 1383 else if (failure == 0) 1384 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n", 1385 what, (int)bno); 1386 else 1387 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n", 1388 what, (int)bno, (unsigned)failure, (unsigned)expected); 1389 } 1390 1391 /* 1392 * print out clean info 1393 */ 1394 void 1395 printclean(void) 1396 { 1397 caddr_t s; 1398 1399 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) 1400 s = "unknown"; 1401 else 1402 switch (sblock.fs_clean) { 1403 1404 case FSACTIVE: 1405 s = "active"; 1406 break; 1407 1408 case FSCLEAN: 1409 s = "clean"; 1410 break; 1411 1412 case FSSTABLE: 1413 s = "stable"; 1414 break; 1415 1416 case FSLOG: 1417 s = "logging"; 1418 break; 1419 1420 case FSBAD: 1421 s = "is bad"; 1422 break; 1423 1424 case FSFIX: 1425 s = "being fixed"; 1426 break; 1427 1428 default: 1429 s = "unknown"; 1430 } 1431 1432 if (preen) 1433 pwarn("is %s.\n", s); 1434 else 1435 (void) printf("** %s is %s.\n", devname, s); 1436 } 1437 1438 int 1439 is_errorlocked(caddr_t fs) 1440 { 1441 int retval; 1442 struct stat64 statb; 1443 caddr_t mountp; 1444 struct mnttab *mntent; 1445 1446 retval = 0; 1447 1448 if (!fs) 1449 return (0); 1450 1451 if (stat64(fs, &statb) < 0) 1452 return (0); 1453 1454 if (S_ISDIR(statb.st_mode)) { 1455 mountp = fs; 1456 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) { 1457 mntent = search_mnttab(NULL, fs, NULL, 0); 1458 if (mntent == NULL) 1459 return (0); 1460 mountp = mntent->mnt_mountp; 1461 if (mountp == NULL) /* theoretically a can't-happen */ 1462 return (0); 1463 } else { 1464 return (0); 1465 } 1466 1467 /* 1468 * From here on, must `goto out' to avoid memory leakage. 1469 */ 1470 1471 if (elock_combuf == NULL) 1472 elock_combuf = 1473 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char)); 1474 else 1475 elock_combuf = 1476 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN); 1477 1478 if (elock_combuf == NULL) 1479 goto out; 1480 1481 (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN); 1482 1483 if (elock_mountp != NULL) { 1484 free(elock_mountp); 1485 } 1486 1487 elock_mountp = strdup(mountp); 1488 if (elock_mountp == NULL) 1489 goto out; 1490 1491 if (mountfd < 0) { 1492 if ((mountfd = open64(mountp, O_RDONLY)) == -1) 1493 goto out; 1494 } 1495 1496 if (lfp == NULL) { 1497 lfp = (struct lockfs *)malloc(sizeof (struct lockfs)); 1498 if (lfp == NULL) 1499 goto out; 1500 (void) memset((void *)lfp, 0, sizeof (struct lockfs)); 1501 } 1502 1503 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1504 lfp->lf_comment = elock_combuf; 1505 1506 if (ioctl(mountfd, _FIOLFSS, lfp) == -1) 1507 goto out; 1508 1509 /* 1510 * lint believes that the ioctl() (or any other function 1511 * taking lfp as an arg) could free lfp. This is not the 1512 * case, however. 1513 */ 1514 retval = LOCKFS_IS_ELOCK(lfp); 1515 1516 out: 1517 return (retval); 1518 } 1519 1520 /* 1521 * Given a name which is known to be a directory, see if it appears 1522 * in the vfstab. If so, return the entry's block (special) device 1523 * field via devstr. 1524 */ 1525 int 1526 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size) 1527 { 1528 return (NULL != search_vfstab(name, NULL, devstr, str_size)); 1529 } 1530 1531 /* 1532 * Given a name which is known to be a directory, see if it appears 1533 * in the mnttab. If so, return the entry's block (special) device 1534 * field via devstr. 1535 */ 1536 int 1537 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size) 1538 { 1539 return (NULL != search_mnttab(name, NULL, devstr, str_size)); 1540 } 1541 1542 /* 1543 * Search for mount point and/or special device in the given file. 1544 * The first matching entry is returned. 1545 * 1546 * If an entry is found and str_size is greater than zero, then 1547 * up to size_str bytes of the special device name from the entry 1548 * are copied to devstr. 1549 */ 1550 1551 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \ 1552 st_nuller, st_init, st_searcher) \ 1553 { \ 1554 FILE *fp; \ 1555 struct st_type *retval = NULL; \ 1556 struct st_type key; \ 1557 static struct st_type buffer; \ 1558 \ 1559 /* LINTED ``assigned value never used'' */ \ 1560 st_nuller(&key); \ 1561 key.st_mount = mountp; \ 1562 key.st_special = special; \ 1563 st_init; \ 1564 \ 1565 if ((fp = fopen(st_file, "r")) == NULL) \ 1566 return (NULL); \ 1567 \ 1568 if (st_searcher(fp, &buffer, &key) == 0) { \ 1569 retval = &buffer; \ 1570 if (devstr != NULL && str_size > 0 && \ 1571 buffer.st_special != NULL) { \ 1572 (void) strlcpy(devstr, buffer.st_special, \ 1573 str_size); \ 1574 } \ 1575 } \ 1576 (void) fclose(fp); \ 1577 return (retval); \ 1578 } 1579 1580 static struct vfstab * 1581 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1582 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull, 1583 (retval = retval), getvfsany) 1584 1585 static struct mnttab * 1586 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1587 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull, 1588 (key.mnt_fstype = MNTTYPE_UFS), getmntany) 1589 1590 int 1591 do_errorlock(int lock_type) 1592 { 1593 caddr_t buf; 1594 time_t now; 1595 struct tm *local; 1596 int rc; 1597 1598 if (elock_combuf == NULL) 1599 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n", 1600 elock_mountp ? elock_mountp : "<null>", 1601 lock_type); 1602 1603 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) == 1604 NULL) { 1605 errexit("Couldn't alloc memory for temp. lock status buffer\n"); 1606 } 1607 if (lfp == NULL) { 1608 errexit("do_errorlock(%s, %d): lockfs status unallocated\n", 1609 elock_mountp, lock_type); 1610 } 1611 1612 (void) memmove((void *)buf, (void *)elock_combuf, 1613 LOCKFS_MAXCOMMENTLEN-1); 1614 1615 switch (lock_type) { 1616 case LOCKFS_ELOCK: 1617 /* 1618 * Note that if it is error-locked, we won't get an 1619 * error back if we try to error-lock it again. 1620 */ 1621 if (time(&now) != (time_t)-1) { 1622 if ((local = localtime(&now)) != NULL) 1623 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1624 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d", 1625 elock_combuf, (int)pid, 1626 local->tm_mon + 1, local->tm_mday, 1627 (local->tm_year % 100), local->tm_hour, 1628 local->tm_min, local->tm_sec); 1629 else 1630 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1631 "%s [fsck pid %d", elock_combuf, pid); 1632 1633 } else { 1634 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1635 "%s [fsck pid %d", elock_combuf, pid); 1636 } 1637 break; 1638 1639 case LOCKFS_ULOCK: 1640 if (time(&now) != (time_t)-1) { 1641 if ((local = localtime(&now)) != NULL) { 1642 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1643 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]", 1644 elock_combuf, 1645 local->tm_mon + 1, local->tm_mday, 1646 (local->tm_year % 100), local->tm_hour, 1647 local->tm_min, local->tm_sec); 1648 } else { 1649 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1650 "%s]", elock_combuf); 1651 } 1652 } else { 1653 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1654 "%s]", elock_combuf); 1655 } 1656 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) { 1657 pwarn("do_errorlock: unlock failed: %s\n", 1658 strerror(errno)); 1659 goto out; 1660 } 1661 break; 1662 1663 default: 1664 break; 1665 } 1666 1667 (void) memmove((void *)elock_combuf, (void *)buf, 1668 LOCKFS_MAXCOMMENTLEN - 1); 1669 1670 lfp->lf_lock = lock_type; 1671 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1672 lfp->lf_comment = elock_combuf; 1673 lfp->lf_flags = 0; 1674 errno = 0; 1675 1676 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) { 1677 if (errno == EINVAL) { 1678 pwarn("Another fsck active?\n"); 1679 iscorrupt = 0; /* don't go away mad, just go away */ 1680 } else { 1681 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n", 1682 lock_type, elock_combuf, strerror(errno)); 1683 } 1684 } 1685 out: 1686 if (buf != NULL) { 1687 free((void *)buf); 1688 } 1689 1690 return (rc != -1); 1691 } 1692 1693 /* 1694 * Shadow inode support. To register a shadow with a client is to note 1695 * that an inode (the client) refers to the shadow. 1696 */ 1697 1698 static struct shadowclients * 1699 newshadowclient(struct shadowclients *prev) 1700 { 1701 struct shadowclients *rc; 1702 1703 rc = (struct shadowclients *)malloc(sizeof (*rc)); 1704 if (rc == NULL) 1705 errexit("newshadowclient: cannot malloc shadow client"); 1706 rc->next = prev; 1707 rc->nclients = 0; 1708 1709 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) * 1710 maxshadowclients); 1711 if (rc->client == NULL) 1712 errexit("newshadowclient: cannot malloc client array"); 1713 return (rc); 1714 } 1715 1716 void 1717 registershadowclient(fsck_ino_t shadow, fsck_ino_t client, 1718 struct shadowclientinfo **info) 1719 { 1720 struct shadowclientinfo *sci; 1721 struct shadowclients *scc; 1722 1723 /* 1724 * Already have a record for this shadow? 1725 */ 1726 for (sci = *info; sci != NULL; sci = sci->next) 1727 if (sci->shadow == shadow) 1728 break; 1729 if (sci == NULL) { 1730 /* 1731 * It's a new shadow, add it to the list 1732 */ 1733 sci = (struct shadowclientinfo *)malloc(sizeof (*sci)); 1734 if (sci == NULL) 1735 errexit("registershadowclient: cannot malloc"); 1736 sci->next = *info; 1737 *info = sci; 1738 sci->shadow = shadow; 1739 sci->totalClients = 0; 1740 sci->clients = newshadowclient(NULL); 1741 } 1742 1743 sci->totalClients++; 1744 scc = sci->clients; 1745 if (scc->nclients >= maxshadowclients) { 1746 scc = newshadowclient(sci->clients); 1747 sci->clients = scc; 1748 } 1749 1750 scc->client[scc->nclients++] = client; 1751 } 1752 1753 /* 1754 * Locate and discard a shadow. 1755 */ 1756 void 1757 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info) 1758 { 1759 struct shadowclientinfo *sci, *prev; 1760 1761 /* 1762 * Do we have a record for this shadow? 1763 */ 1764 prev = NULL; 1765 for (sci = *info; sci != NULL; sci = sci->next) { 1766 if (sci->shadow == shadow) 1767 break; 1768 prev = sci; 1769 } 1770 1771 if (sci != NULL) { 1772 /* 1773 * First, pull it off the list, since we know there 1774 * shouldn't be any future references to this one. 1775 */ 1776 if (prev == NULL) 1777 *info = sci->next; 1778 else 1779 prev->next = sci->next; 1780 deshadow(sci, clearattrref); 1781 } 1782 } 1783 1784 /* 1785 * Discard all memory used to track clients of a shadow. 1786 */ 1787 void 1788 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t)) 1789 { 1790 struct shadowclients *clients, *discard; 1791 int idx; 1792 1793 clients = sci->clients; 1794 while (clients != NULL) { 1795 discard = clients; 1796 clients = clients->next; 1797 if (discard->client != NULL) { 1798 if (cb != NULL) { 1799 for (idx = 0; idx < discard->nclients; idx++) 1800 (*cb)(discard->client[idx]); 1801 } 1802 free((void *)discard->client); 1803 } 1804 free((void *)discard); 1805 } 1806 1807 free((void *)sci); 1808 } 1809 1810 /* 1811 * Allocate more buffer as need arises but allocate one at a time. 1812 * This is done to make sure that fsck does not exit with error if it 1813 * needs more buffer to complete its task. 1814 */ 1815 static struct bufarea * 1816 alloc_bufarea(void) 1817 { 1818 struct bufarea *newbp; 1819 caddr_t bufp; 1820 1821 bufp = malloc((unsigned int)sblock.fs_bsize); 1822 if (bufp == NULL) 1823 return (NULL); 1824 1825 newbp = (struct bufarea *)malloc(sizeof (struct bufarea)); 1826 if (newbp == NULL) { 1827 free((void *)bufp); 1828 return (NULL); 1829 } 1830 1831 initbarea(newbp); 1832 newbp->b_un.b_buf = bufp; 1833 newbp->b_prev = &bufhead; 1834 newbp->b_next = bufhead.b_next; 1835 bufhead.b_next->b_prev = newbp; 1836 bufhead.b_next = newbp; 1837 bufhead.b_size++; 1838 return (newbp); 1839 } 1840 1841 /* 1842 * We length-limit in both unrawname() and rawname() to avoid 1843 * overflowing our arrays or those of our naive, trusting callers. 1844 */ 1845 1846 caddr_t 1847 unrawname(caddr_t name) 1848 { 1849 caddr_t dp; 1850 static char fullname[MAXPATHLEN + 1]; 1851 1852 if ((dp = getfullblkname(name)) == NULL) 1853 return (""); 1854 1855 (void) strlcpy(fullname, dp, sizeof (fullname)); 1856 /* 1857 * Not reporting under debug, as the allocation isn't 1858 * reported by getfullblkname. The idea is that we 1859 * produce balanced alloc/free instances. 1860 */ 1861 free(dp); 1862 1863 return (fullname); 1864 } 1865 1866 caddr_t 1867 rawname(caddr_t name) 1868 { 1869 caddr_t dp; 1870 static char fullname[MAXPATHLEN + 1]; 1871 1872 if ((dp = getfullrawname(name)) == NULL) 1873 return (""); 1874 1875 (void) strlcpy(fullname, dp, sizeof (fullname)); 1876 /* 1877 * Not reporting under debug, as the allocation isn't 1878 * reported by getfullblkname. The idea is that we 1879 * produce balanced alloc/free instances. 1880 */ 1881 free(dp); 1882 1883 return (fullname); 1884 } 1885 1886 /* 1887 * Make sure that a cg header looks at least moderately reasonable. 1888 * We want to be able to trust the contents enough to be able to use 1889 * the standard accessor macros. So, besides looking at the obvious 1890 * such as the magic number, we verify that the offset field values 1891 * are properly aligned and not too big or small. 1892 * 1893 * Returns a NULL pointer if the cg is sane enough for our needs, else 1894 * a dynamically-allocated string describing all of its faults. 1895 */ 1896 #define Append_Error(full, full_len, addition, addition_len) \ 1897 if (full == NULL) { \ 1898 full = addition; \ 1899 full_len = addition_len; \ 1900 } else { \ 1901 /* lint doesn't think realloc() understands NULLs */ \ 1902 full = realloc(full, full_len + addition_len + 1); \ 1903 if (full == NULL) { \ 1904 errexit("Out of memory in cg_sanity"); \ 1905 /* NOTREACHED */ \ 1906 } \ 1907 (void) strcpy(full + full_len, addition); \ 1908 full_len += addition_len; \ 1909 free(addition); \ 1910 } 1911 1912 caddr_t 1913 cg_sanity(struct cg *cgp, int cgno) 1914 { 1915 caddr_t full_err; 1916 caddr_t this_err = NULL; 1917 int full_len, this_len; 1918 daddr32_t ndblk; 1919 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 1920 daddr32_t exp_freeoff, exp_nextfreeoff; 1921 1922 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 1923 &exp_freeoff, &exp_nextfreeoff, &ndblk); 1924 1925 full_err = NULL; 1926 full_len = 0; 1927 1928 if (!cg_chkmagic(cgp)) { 1929 this_len = fsck_asprintf(&this_err, 1930 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n", 1931 cgp->cg_magic, CG_MAGIC); 1932 Append_Error(full_err, full_len, this_err, this_len); 1933 } 1934 1935 if (cgp->cg_cgx != cgno) { 1936 this_len = fsck_asprintf(&this_err, 1937 "WRONG CG NUMBER (%d should be %d)\n", 1938 cgp->cg_cgx, cgno); 1939 Append_Error(full_err, full_len, this_err, this_len); 1940 } 1941 1942 if ((cgp->cg_btotoff & 3) != 0) { 1943 this_len = fsck_asprintf(&this_err, 1944 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n", 1945 cgp->cg_btotoff); 1946 Append_Error(full_err, full_len, this_err, this_len); 1947 } 1948 1949 if ((cgp->cg_boff & 1) != 0) { 1950 this_len = fsck_asprintf(&this_err, 1951 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n", 1952 cgp->cg_boff); 1953 Append_Error(full_err, full_len, this_err, this_len); 1954 } 1955 1956 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 1957 if (cgp->cg_ncyl < 1) { 1958 this_len = fsck_asprintf(&this_err, 1959 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n", 1960 cgp->cg_ncyl); 1961 } else { 1962 this_len = fsck_asprintf(&this_err, 1963 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n", 1964 cgp->cg_ncyl, sblock.fs_cpg); 1965 } 1966 Append_Error(full_err, full_len, this_err, this_len); 1967 } 1968 1969 if (cgp->cg_niblk != sblock.fs_ipg) { 1970 this_len = fsck_asprintf(&this_err, 1971 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n", 1972 cgp->cg_niblk, sblock.fs_ipg); 1973 Append_Error(full_err, full_len, this_err, this_len); 1974 } 1975 1976 if (cgp->cg_ndblk != ndblk) { 1977 this_len = fsck_asprintf(&this_err, 1978 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n", 1979 cgp->cg_ndblk, ndblk); 1980 Append_Error(full_err, full_len, this_err, this_len); 1981 } 1982 1983 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) { 1984 this_len = fsck_asprintf(&this_err, 1985 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION " 1986 "(%d should be at least 0 and less than %d)\n", 1987 cgp->cg_rotor, ndblk); 1988 Append_Error(full_err, full_len, this_err, this_len); 1989 } 1990 1991 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) { 1992 this_len = fsck_asprintf(&this_err, 1993 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION " 1994 "(%d should be at least 0 and less than %d)\n", 1995 cgp->cg_frotor, ndblk); 1996 Append_Error(full_err, full_len, this_err, this_len); 1997 } 1998 1999 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 2000 this_len = fsck_asprintf(&this_err, 2001 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION " 2002 "(%d should be at least 0 and less than %d)\n", 2003 cgp->cg_irotor, sblock.fs_ipg); 2004 Append_Error(full_err, full_len, this_err, this_len); 2005 } 2006 2007 if (cgp->cg_btotoff != exp_btotoff) { 2008 this_len = fsck_asprintf(&this_err, 2009 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n", 2010 cgp->cg_btotoff, exp_btotoff); 2011 Append_Error(full_err, full_len, this_err, this_len); 2012 } 2013 2014 if (cgp->cg_boff != exp_boff) { 2015 this_len = fsck_asprintf(&this_err, 2016 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n", 2017 cgp->cg_boff, exp_boff); 2018 Append_Error(full_err, full_len, this_err, this_len); 2019 } 2020 2021 if (cgp->cg_iusedoff != exp_iusedoff) { 2022 this_len = fsck_asprintf(&this_err, 2023 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n", 2024 cgp->cg_iusedoff, exp_iusedoff); 2025 Append_Error(full_err, full_len, this_err, this_len); 2026 } 2027 2028 if (cgp->cg_freeoff != exp_freeoff) { 2029 this_len = fsck_asprintf(&this_err, 2030 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n", 2031 cgp->cg_freeoff, exp_freeoff); 2032 Append_Error(full_err, full_len, this_err, this_len); 2033 } 2034 2035 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2036 this_len = fsck_asprintf(&this_err, 2037 "END OF HEADER POSITION INCORRECT (%d should be %d)\n", 2038 cgp->cg_nextfreeoff, exp_nextfreeoff); 2039 Append_Error(full_err, full_len, this_err, this_len); 2040 } 2041 2042 return (full_err); 2043 } 2044 2045 #undef Append_Error 2046 2047 /* 2048 * This is taken from mkfs, and is what is used to come up with the 2049 * original values for a struct cg. This implies that, since these 2050 * are all constants, recalculating them now should give us the same 2051 * thing as what's on disk. 2052 */ 2053 static void 2054 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff, 2055 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff, 2056 daddr32_t *ndblk) 2057 { 2058 daddr32_t cbase, dmax; 2059 struct cg *cgp; 2060 2061 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno), 2062 (size_t)sblock.fs_cgsize); 2063 cgp = cgblk.b_un.b_cg; 2064 2065 cbase = cgbase(&sblock, cgno); 2066 dmax = cbase + sblock.fs_fpg; 2067 if (dmax > sblock.fs_size) 2068 dmax = sblock.fs_size; 2069 2070 /* LINTED pointer difference won't overflow */ 2071 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link); 2072 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t); 2073 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t); 2074 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY); 2075 *nextfreeoff = *freeoff + 2076 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY); 2077 *ndblk = dmax - cbase; 2078 } 2079 2080 /* 2081 * Corrects all fields in the cg that can be done with the available 2082 * redundant data. 2083 */ 2084 void 2085 fix_cg(struct cg *cgp, int cgno) 2086 { 2087 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 2088 daddr32_t exp_freeoff, exp_nextfreeoff; 2089 daddr32_t ndblk; 2090 2091 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 2092 &exp_freeoff, &exp_nextfreeoff, &ndblk); 2093 2094 if (cgp->cg_cgx != cgno) { 2095 cgp->cg_cgx = cgno; 2096 } 2097 2098 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 2099 if (cgno == (sblock.fs_ncg - 1)) { 2100 cgp->cg_ncyl = sblock.fs_ncyl - 2101 (sblock.fs_cpg * cgno); 2102 } else { 2103 cgp->cg_ncyl = sblock.fs_cpg; 2104 } 2105 } 2106 2107 if (cgp->cg_niblk != sblock.fs_ipg) { 2108 /* 2109 * This is not used by the kernel, so it's pretty 2110 * harmless if it's wrong. 2111 */ 2112 cgp->cg_niblk = sblock.fs_ipg; 2113 } 2114 2115 if (cgp->cg_ndblk != ndblk) { 2116 cgp->cg_ndblk = ndblk; 2117 } 2118 2119 /* 2120 * For the rotors, any position's valid, so pick the one we know 2121 * will always exist. 2122 */ 2123 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) { 2124 cgp->cg_rotor = 0; 2125 } 2126 2127 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) { 2128 cgp->cg_frotor = 0; 2129 } 2130 2131 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 2132 cgp->cg_irotor = 0; 2133 } 2134 2135 /* 2136 * For btotoff and boff, if they're misaligned they won't 2137 * match the expected values, so we're catching both cases 2138 * here. Of course, if any of these are off, it seems likely 2139 * that the tables really won't be where we calculate they 2140 * should be anyway. 2141 */ 2142 if (cgp->cg_btotoff != exp_btotoff) { 2143 cgp->cg_btotoff = exp_btotoff; 2144 } 2145 2146 if (cgp->cg_boff != exp_boff) { 2147 cgp->cg_boff = exp_boff; 2148 } 2149 2150 if (cgp->cg_iusedoff != exp_iusedoff) { 2151 cgp->cg_iusedoff = exp_iusedoff; 2152 } 2153 2154 if (cgp->cg_freeoff != exp_freeoff) { 2155 cgp->cg_freeoff = exp_freeoff; 2156 } 2157 2158 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2159 cgp->cg_nextfreeoff = exp_nextfreeoff; 2160 } 2161 2162 /* 2163 * Reset the magic, as we've recreated this cg, also 2164 * update the cg_time, as we're writing out the cg 2165 */ 2166 cgp->cg_magic = CG_MAGIC; 2167 cgp->cg_time = time(NULL); 2168 2169 /* 2170 * We know there was at least one correctable problem, 2171 * or else we wouldn't have been called. So instead of 2172 * marking the buffer dirty N times above, just do it 2173 * once here. 2174 */ 2175 cgdirty(); 2176 } 2177 2178 void 2179 examinelog(void (*cb)(daddr32_t)) 2180 { 2181 struct bufarea *bp; 2182 extent_block_t *ebp; 2183 extent_t *ep; 2184 daddr32_t nfno, fno; 2185 int i; 2186 int j; 2187 2188 /* 2189 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags 2190 * we need to translate accordingly using logbtodb() 2191 */ 2192 2193 if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) { 2194 if (debug) { 2195 (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \ 2196 "Aborting log examination\n", \ 2197 logbtodb(&sblock, sblock.fs_logbno), SBLOCK); 2198 } 2199 return; 2200 } 2201 2202 /* 2203 * Read errors will return zeros, which will cause us 2204 * to do nothing harmful, so don't need to handle it. 2205 */ 2206 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno), 2207 (size_t)sblock.fs_bsize); 2208 ebp = (void *)bp->b_un.b_buf; 2209 2210 /* 2211 * Does it look like a log allocation table? 2212 */ 2213 /* LINTED pointer cast is aligned */ 2214 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf, 2215 sblock.fs_bsize)) 2216 return; 2217 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) 2218 return; 2219 2220 ep = &ebp->extents[0]; 2221 for (i = 0; i < ebp->nextents; ++i, ++ep) { 2222 fno = logbtofrag(&sblock, ep->pbno); 2223 nfno = dbtofsb(&sblock, ep->nbno); 2224 for (j = 0; j < nfno; ++j, ++fno) { 2225 /* 2226 * Invoke the callback first, so that pass1 can 2227 * mark the log blocks in-use. Then, if any 2228 * subsequent pass over the log shows us that a 2229 * block got freed (say, it was also claimed by 2230 * an inode that we cleared), we can safely declare 2231 * the log bad. 2232 */ 2233 if (cb != NULL) 2234 (*cb)(fno); 2235 if (!testbmap(fno)) 2236 islogok = 0; 2237 } 2238 } 2239 brelse(bp); 2240 2241 if (cb != NULL) { 2242 fno = logbtofrag(&sblock, sblock.fs_logbno); 2243 for (j = 0; j < sblock.fs_frag; ++j, ++fno) 2244 (*cb)(fno); 2245 } 2246 } 2247 2248 static void 2249 freelogblk(daddr32_t frag) 2250 { 2251 freeblk(sblock.fs_logbno, frag, 1); 2252 } 2253 2254 caddr_t 2255 file_id(fsck_ino_t inum, mode_t mode) 2256 { 2257 static char name[MAXPATHLEN + 1]; 2258 2259 if (lfdir == inum) { 2260 return (lfname); 2261 } 2262 2263 if ((mode & IFMT) == IFDIR) { 2264 (void) strcpy(name, "DIR"); 2265 } else if ((mode & IFMT) == IFATTRDIR) { 2266 (void) strcpy(name, "ATTR DIR"); 2267 } else if ((mode & IFMT) == IFSHAD) { 2268 (void) strcpy(name, "ACL"); 2269 } else { 2270 (void) strcpy(name, "FILE"); 2271 } 2272 2273 return (name); 2274 } 2275 2276 /* 2277 * Simple initializer for inodesc structures, so users of only a few 2278 * fields don't have to worry about getting the right defaults for 2279 * everything out. 2280 */ 2281 void 2282 init_inodesc(struct inodesc *idesc) 2283 { 2284 /* 2285 * Most fields should be zero, just hit the special cases. 2286 */ 2287 (void) memset((void *)idesc, 0, sizeof (struct inodesc)); 2288 idesc->id_fix = DONTKNOW; 2289 idesc->id_lbn = -1; 2290 idesc->id_truncto = -1; 2291 idesc->id_firsthole = -1; 2292 } 2293 2294 /* 2295 * Compare routine for tsearch(C) to use on ino_t instances. 2296 */ 2297 int 2298 ino_t_cmp(const void *left, const void *right) 2299 { 2300 const fsck_ino_t lino = (const fsck_ino_t)left; 2301 const fsck_ino_t rino = (const fsck_ino_t)right; 2302 2303 return (lino - rino); 2304 } 2305 2306 int 2307 cgisdirty(void) 2308 { 2309 return (cgblk.b_dirty); 2310 } 2311 2312 void 2313 cgflush(void) 2314 { 2315 flush(fswritefd, &cgblk); 2316 } 2317 2318 void 2319 dirty(struct bufarea *bp) 2320 { 2321 if (fswritefd < 0) { 2322 /* 2323 * No one should call dirty() in read only mode. 2324 * But if one does, it's not fatal issue. Just warn them. 2325 */ 2326 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n"); 2327 } else { 2328 (bp)->b_dirty = 1; 2329 isdirty = 1; 2330 } 2331 } 2332 2333 void 2334 initbarea(struct bufarea *bp) 2335 { 2336 (bp)->b_dirty = 0; 2337 (bp)->b_bno = (diskaddr_t)-1LL; 2338 (bp)->b_flags = 0; 2339 (bp)->b_cnt = 0; 2340 (bp)->b_errs = 0; 2341 } 2342 2343 /* 2344 * Partition-sizing routines adapted from ../newfs/newfs.c. 2345 * Needed because calcsb() needs to use mkfs to work out what the 2346 * superblock should be, and mkfs insists on being told how many 2347 * sectors to use. 2348 * 2349 * Error handling assumes we're never called while preening. 2350 * 2351 * XXX This should be extracted into a ../ufslib.{c,h}, 2352 * in the same spirit to ../../fslib.{c,h}. Once that is 2353 * done, both fsck and newfs should be modified to link 2354 * against it. 2355 */ 2356 2357 static int label_type; 2358 2359 #define LABEL_TYPE_VTOC 1 2360 #define LABEL_TYPE_EFI 2 2361 #define LABEL_TYPE_OTHER 3 2362 2363 #define MB (1024 * 1024) 2364 #define SECTORS_PER_TERABYTE (1LL << 31) 2365 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL 2366 2367 diskaddr_t 2368 getdisksize(caddr_t disk, int fd) 2369 { 2370 int rpm; 2371 struct dk_geom g; 2372 struct dk_cinfo ci; 2373 diskaddr_t actual_size; 2374 2375 /* 2376 * get_device_size() determines the actual size of the 2377 * device, and also the disk's attributes, such as geometry. 2378 */ 2379 actual_size = get_device_size(fd, disk); 2380 2381 if (label_type == LABEL_TYPE_VTOC) { 2382 if (ioctl(fd, DKIOCGGEOM, &g)) { 2383 pwarn("%s: Unable to read Disk geometry", disk); 2384 return (0); 2385 } 2386 if (sblock.fs_nsect == 0) 2387 sblock.fs_nsect = g.dkg_nsect; 2388 if (sblock.fs_ntrak == 0) 2389 sblock.fs_ntrak = g.dkg_nhead; 2390 if (sblock.fs_rps == 0) { 2391 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm; 2392 sblock.fs_rps = rpm / 60; 2393 } 2394 } 2395 2396 if (sblock.fs_bsize == 0) 2397 sblock.fs_bsize = MAXBSIZE; 2398 2399 /* 2400 * Adjust maxcontig by the device's maxtransfer. If maxtransfer 2401 * information is not available, default to the min of a MB and 2402 * maxphys. 2403 */ 2404 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) { 2405 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE; 2406 if (sblock.fs_maxcontig < 0) { 2407 int gotit, maxphys; 2408 2409 gotit = fsgetmaxphys(&maxphys, NULL); 2410 2411 /* 2412 * If we cannot get the maxphys value, default 2413 * to ufs_maxmaxphys (MB). 2414 */ 2415 if (gotit) { 2416 sblock.fs_maxcontig = MIN(maxphys, MB); 2417 } else { 2418 sblock.fs_maxcontig = MB; 2419 } 2420 } 2421 sblock.fs_maxcontig /= sblock.fs_bsize; 2422 } 2423 2424 return (actual_size); 2425 } 2426 2427 /* 2428 * Figure out how big the partition we're dealing with is. 2429 */ 2430 static diskaddr_t 2431 get_device_size(int fd, caddr_t name) 2432 { 2433 struct extvtoc vtoc; 2434 struct dk_gpt *efi_vtoc; 2435 diskaddr_t slicesize = 0; 2436 2437 int index = read_extvtoc(fd, &vtoc); 2438 2439 if (index >= 0) { 2440 label_type = LABEL_TYPE_VTOC; 2441 } else { 2442 if (index == VT_ENOTSUP || index == VT_ERROR) { 2443 /* it might be an EFI label */ 2444 index = efi_alloc_and_read(fd, &efi_vtoc); 2445 if (index >= 0) 2446 label_type = LABEL_TYPE_EFI; 2447 } 2448 } 2449 2450 if (index < 0) { 2451 /* 2452 * Since both attempts to read the label failed, we're 2453 * going to fall back to a brute force approach to 2454 * determining the device's size: see how far out we can 2455 * perform reads on the device. 2456 */ 2457 2458 slicesize = brute_force_get_device_size(fd); 2459 if (slicesize == 0) { 2460 switch (index) { 2461 case VT_ERROR: 2462 pwarn("%s: %s\n", name, strerror(errno)); 2463 break; 2464 case VT_EIO: 2465 pwarn("%s: I/O error accessing VTOC", name); 2466 break; 2467 case VT_EINVAL: 2468 pwarn("%s: Invalid field in VTOC", name); 2469 break; 2470 default: 2471 pwarn("%s: unknown error %d accessing VTOC", 2472 name, index); 2473 break; 2474 } 2475 return (0); 2476 } else { 2477 label_type = LABEL_TYPE_OTHER; 2478 } 2479 } 2480 2481 if (label_type == LABEL_TYPE_EFI) { 2482 slicesize = efi_vtoc->efi_parts[index].p_size; 2483 efi_free(efi_vtoc); 2484 } else if (label_type == LABEL_TYPE_VTOC) { 2485 slicesize = vtoc.v_part[index].p_size; 2486 } 2487 2488 return (slicesize); 2489 } 2490 2491 /* 2492 * brute_force_get_device_size 2493 * 2494 * Determine the size of the device by seeing how far we can 2495 * read. Doing an llseek( , , SEEK_END) would probably work 2496 * in most cases, but we've seen at least one third-party driver 2497 * which doesn't correctly support the SEEK_END option when the 2498 * the device is greater than a terabyte. 2499 */ 2500 2501 static diskaddr_t 2502 brute_force_get_device_size(int fd) 2503 { 2504 diskaddr_t min_fail = 0; 2505 diskaddr_t max_succeed = 0; 2506 diskaddr_t cur_db_off; 2507 char buf[DEV_BSIZE]; 2508 2509 /* 2510 * First, see if we can read the device at all, just to 2511 * eliminate errors that have nothing to do with the 2512 * device's size. 2513 */ 2514 2515 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) || 2516 ((read(fd, buf, DEV_BSIZE)) == -1)) 2517 return (0); /* can't determine size */ 2518 2519 /* 2520 * Now, go sequentially through the multiples of 4TB 2521 * to find the first read that fails (this isn't strictly 2522 * the most efficient way to find the actual size if the 2523 * size really could be anything between 0 and 2**64 bytes. 2524 * We expect the sizes to be less than 16 TB for some time, 2525 * so why do a bunch of reads that are larger than that? 2526 * However, this algorithm *will* work for sizes of greater 2527 * than 16 TB. We're just not optimizing for those sizes.) 2528 */ 2529 2530 /* 2531 * XXX lint uses 32-bit arithmetic for doing flow analysis. 2532 * We're using > 32-bit constants here. Therefore, its flow 2533 * analysis is wrong. For the time being, ignore complaints 2534 * from it about the body of the for() being unreached. 2535 */ 2536 for (cur_db_off = SECTORS_PER_TERABYTE * 4; 2537 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT); 2538 cur_db_off += 4 * SECTORS_PER_TERABYTE) { 2539 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2540 SEEK_SET) == -1) || 2541 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE)) 2542 min_fail = cur_db_off; 2543 else 2544 max_succeed = cur_db_off; 2545 } 2546 2547 /* 2548 * XXX Same lint flow analysis problem as above. 2549 */ 2550 if (min_fail == 0) 2551 return (0); 2552 2553 /* 2554 * We now know that the size of the device is less than 2555 * min_fail and greater than or equal to max_succeed. Now 2556 * keep splitting the difference until the actual size in 2557 * sectors in known. We also know that the difference 2558 * between max_succeed and min_fail at this time is 2559 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which 2560 * simplifies the math below. 2561 */ 2562 2563 while (min_fail - max_succeed > 1) { 2564 cur_db_off = max_succeed + (min_fail - max_succeed)/2; 2565 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2566 SEEK_SET)) == -1) || 2567 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)) 2568 min_fail = cur_db_off; 2569 else 2570 max_succeed = cur_db_off; 2571 } 2572 2573 /* the size is the last successfully read sector offset plus one */ 2574 return (max_succeed + 1); 2575 } 2576 2577 static void 2578 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap) 2579 { 2580 struct dinode *dp; 2581 char pathbuf[MAXPATHLEN + 1]; 2582 2583 vpwarn(fmt, ap); 2584 (void) putchar(' '); 2585 pinode(ino); 2586 (void) printf("\n"); 2587 getpathname(pathbuf, cwd, ino); 2588 if (ino < UFSROOTINO || ino > maxino) { 2589 pfatal("NAME=%s\n", pathbuf); 2590 return; 2591 } 2592 dp = ginode(ino); 2593 if (ftypeok(dp)) 2594 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf); 2595 else 2596 pfatal("NAME=%s\n", pathbuf); 2597 } 2598 2599 void 2600 direrror(fsck_ino_t ino, caddr_t fmt, ...) 2601 { 2602 va_list ap; 2603 2604 va_start(ap, fmt); 2605 vfileerror(ino, ino, fmt, ap); 2606 va_end(ap); 2607 } 2608 2609 static void 2610 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap) 2611 { 2612 vfileerror(ino, ino, fmt, ap); 2613 } 2614 2615 void 2616 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...) 2617 { 2618 va_list ap; 2619 2620 va_start(ap, fmt); 2621 vfileerror(cwd, ino, fmt, ap); 2622 va_end(ap); 2623 } 2624 2625 /* 2626 * Adds the given inode to the orphaned-directories list, limbo_dirs. 2627 * Assumes that the caller has set INCLEAR in the inode's statemap[] 2628 * entry. 2629 * 2630 * With INCLEAR set, the inode will get ignored by passes 2 and 3, 2631 * meaning it's effectively an orphan. It needs to be noted now, so 2632 * it will be remembered in pass 4. 2633 */ 2634 2635 void 2636 add_orphan_dir(fsck_ino_t ino) 2637 { 2638 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL) 2639 errexit("add_orphan_dir: out of memory"); 2640 } 2641 2642 /* 2643 * Remove an inode from the orphaned-directories list, presumably 2644 * because it's been cleared. 2645 */ 2646 void 2647 remove_orphan_dir(fsck_ino_t ino) 2648 { 2649 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp); 2650 } 2651 2652 /* 2653 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum() 2654 * and lufs.c:checksum(). 2655 */ 2656 static void 2657 log_setsum(int32_t *sp, int32_t *lp, int nb) 2658 { 2659 int32_t csum = 0; 2660 2661 *sp = 0; 2662 nb /= sizeof (int32_t); 2663 while (nb--) 2664 csum += *lp++; 2665 *sp = csum; 2666 } 2667 2668 static int 2669 log_checksum(int32_t *sp, int32_t *lp, int nb) 2670 { 2671 int32_t ssum = *sp; 2672 2673 log_setsum(sp, lp, nb); 2674 if (ssum != *sp) { 2675 *sp = ssum; 2676 return (0); 2677 } 2678 return (1); 2679 } 2680