1 /* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms are permitted 14 * provided that: (1) source distributions retain this entire copyright 15 * notice and comment, and (2) distributions including binaries display 16 * the following acknowledgement: ``This product includes software 17 * developed by the University of California, Berkeley and its contributors'' 18 * in the documentation or other materials provided with the distribution 19 * and in all advertising materials mentioning features or use of this 20 * software. Neither the name of the University nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <unistd.h> 33 #include <stdarg.h> 34 #include <libadm.h> 35 #include <note.h> 36 #include <sys/param.h> 37 #include <sys/types.h> 38 #include <sys/mntent.h> 39 #include <sys/filio.h> 40 #include <sys/fs/ufs_fs.h> 41 #include <sys/vnode.h> 42 #include <sys/fs/ufs_acl.h> 43 #include <sys/fs/ufs_inode.h> 44 #include <sys/fs/ufs_log.h> 45 #define _KERNEL 46 #include <sys/fs/ufs_fsdir.h> 47 #undef _KERNEL 48 #include <sys/mnttab.h> 49 #include <sys/types.h> 50 #include <sys/stat.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <string.h> 54 #include <ctype.h> 55 #include <sys/vfstab.h> 56 #include <sys/lockfs.h> 57 #include <errno.h> 58 #include <sys/cmn_err.h> 59 #include <sys/dkio.h> 60 #include <sys/vtoc.h> 61 #include <sys/efi_partition.h> 62 #include <fslib.h> 63 #include <inttypes.h> 64 #include "fsck.h" 65 66 caddr_t mount_point = NULL; 67 68 static int64_t diskreads, totalreads; /* Disk cache statistics */ 69 70 static int log_checksum(int32_t *, int32_t *, int); 71 static void vdirerror(fsck_ino_t, caddr_t, va_list); 72 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t); 73 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t); 74 static void vpwarn(caddr_t, va_list); 75 static int getline(FILE *, caddr_t, int); 76 static struct bufarea *alloc_bufarea(void); 77 static void rwerror(caddr_t, diskaddr_t, int rval); 78 static void debugclean(void); 79 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t); 80 static void freelogblk(daddr32_t); 81 static void verrexit(caddr_t, va_list); 82 static void vpfatal(caddr_t, va_list); 83 static diskaddr_t get_device_size(int, caddr_t); 84 static diskaddr_t brute_force_get_device_size(int); 85 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *, 86 daddr32_t *, daddr32_t *, daddr32_t *); 87 88 int 89 ftypeok(struct dinode *dp) 90 { 91 switch (dp->di_mode & IFMT) { 92 93 case IFDIR: 94 case IFREG: 95 case IFBLK: 96 case IFCHR: 97 case IFLNK: 98 case IFSOCK: 99 case IFIFO: 100 case IFSHAD: 101 case IFATTRDIR: 102 return (1); 103 104 default: 105 if (debug) 106 (void) printf("bad file type 0%o\n", dp->di_mode); 107 return (0); 108 } 109 } 110 111 int 112 acltypeok(struct dinode *dp) 113 { 114 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT)) 115 return (1); 116 117 if (debug) 118 (void) printf("bad file type for acl I=%d: 0%o\n", 119 dp->di_shadow, dp->di_mode); 120 return (0); 121 } 122 123 NOTE(PRINTFLIKE(1)) 124 int 125 reply(caddr_t fmt, ...) 126 { 127 va_list ap; 128 char line[80]; 129 130 if (preen) 131 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode"); 132 133 if (mflag) { 134 /* 135 * We don't know what's going on, so don't potentially 136 * make things worse by having errexit() write stuff 137 * out to disk. 138 */ 139 (void) printf( 140 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 141 devname); 142 exit(EXERRFATAL); 143 } 144 145 va_start(ap, fmt); 146 (void) putchar('\n'); 147 (void) vprintf(fmt, ap); 148 (void) putchar('?'); 149 (void) putchar(' '); 150 va_end(ap); 151 152 if (nflag || fswritefd < 0) { 153 (void) printf(" no\n\n"); 154 return (0); 155 } 156 if (yflag) { 157 (void) printf(" yes\n\n"); 158 return (1); 159 } 160 (void) fflush(stdout); 161 if (getline(stdin, line, sizeof (line)) == EOF) 162 errexit("\n"); 163 (void) printf("\n"); 164 if (line[0] == 'y' || line[0] == 'Y') { 165 return (1); 166 } else { 167 return (0); 168 } 169 } 170 171 int 172 getline(FILE *fp, caddr_t loc, int maxlen) 173 { 174 int n; 175 caddr_t p, lastloc; 176 177 p = loc; 178 lastloc = &p[maxlen-1]; 179 while ((n = getc(fp)) != '\n') { 180 if (n == EOF) 181 return (EOF); 182 if (!isspace(n) && p < lastloc) 183 *p++ = (char)n; 184 } 185 *p = '\0'; 186 /* LINTED pointer difference won't overflow */ 187 return (p - loc); 188 } 189 190 /* 191 * Malloc buffers and set up cache. 192 */ 193 void 194 bufinit(void) 195 { 196 struct bufarea *bp; 197 int bufcnt, i; 198 caddr_t bufp; 199 200 bufp = malloc((size_t)sblock.fs_bsize); 201 if (bufp == NULL) 202 goto nomem; 203 initbarea(&cgblk); 204 cgblk.b_un.b_buf = bufp; 205 bufhead.b_next = bufhead.b_prev = &bufhead; 206 bufcnt = MAXBUFSPACE / sblock.fs_bsize; 207 if (bufcnt < MINBUFS) 208 bufcnt = MINBUFS; 209 for (i = 0; i < bufcnt; i++) { 210 bp = (struct bufarea *)malloc(sizeof (struct bufarea)); 211 if (bp == NULL) { 212 if (i >= MINBUFS) 213 goto noalloc; 214 goto nomem; 215 } 216 217 bufp = malloc((size_t)sblock.fs_bsize); 218 if (bufp == NULL) { 219 free((void *)bp); 220 if (i >= MINBUFS) 221 goto noalloc; 222 goto nomem; 223 } 224 initbarea(bp); 225 bp->b_un.b_buf = bufp; 226 bp->b_prev = &bufhead; 227 bp->b_next = bufhead.b_next; 228 bufhead.b_next->b_prev = bp; 229 bufhead.b_next = bp; 230 } 231 noalloc: 232 bufhead.b_size = i; /* save number of buffers */ 233 pbp = pdirbp = NULL; 234 return; 235 236 nomem: 237 errexit("cannot allocate buffer pool\n"); 238 /* NOTREACHED */ 239 } 240 241 /* 242 * Undo a bufinit(). 243 */ 244 void 245 unbufinit(void) 246 { 247 int cnt; 248 struct bufarea *bp, *nbp; 249 250 cnt = 0; 251 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) { 252 cnt++; 253 flush(fswritefd, bp); 254 nbp = bp->b_prev; 255 /* 256 * We're discarding the entire chain, so this isn't 257 * technically necessary. However, it doesn't hurt 258 * and lint's data flow analysis is much happier 259 * (this prevents it from thinking there's a chance 260 * of our using memory elsewhere after it's been released). 261 */ 262 nbp->b_next = bp->b_next; 263 bp->b_next->b_prev = nbp; 264 free((void *)bp->b_un.b_buf); 265 free((void *)bp); 266 } 267 268 if (bufhead.b_size != cnt) 269 errexit("Panic: cache lost %d buffers\n", 270 bufhead.b_size - cnt); 271 } 272 273 /* 274 * Manage a cache of directory blocks. 275 */ 276 struct bufarea * 277 getdatablk(daddr32_t blkno, size_t size) 278 { 279 struct bufarea *bp; 280 281 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) 282 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 283 goto foundit; 284 } 285 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) 286 if ((bp->b_flags & B_INUSE) == 0) 287 break; 288 if (bp == &bufhead) { 289 bp = alloc_bufarea(); 290 if (bp == NULL) { 291 errexit("deadlocked buffer pool\n"); 292 /* NOTREACHED */ 293 } 294 } 295 /* 296 * We're at the same logical level as getblk(), so if there 297 * are any errors, we'll let our caller handle them. 298 */ 299 diskreads++; 300 (void) getblk(bp, blkno, size); 301 302 foundit: 303 totalreads++; 304 bp->b_cnt++; 305 /* 306 * Move the buffer to head of linked list if it isn't 307 * already there. 308 */ 309 if (bufhead.b_next != bp) { 310 bp->b_prev->b_next = bp->b_next; 311 bp->b_next->b_prev = bp->b_prev; 312 bp->b_prev = &bufhead; 313 bp->b_next = bufhead.b_next; 314 bufhead.b_next->b_prev = bp; 315 bufhead.b_next = bp; 316 } 317 bp->b_flags |= B_INUSE; 318 return (bp); 319 } 320 321 void 322 brelse(struct bufarea *bp) 323 { 324 bp->b_cnt--; 325 if (bp->b_cnt == 0) { 326 bp->b_flags &= ~B_INUSE; 327 } 328 } 329 330 struct bufarea * 331 getblk(struct bufarea *bp, daddr32_t blk, size_t size) 332 { 333 diskaddr_t dblk; 334 335 dblk = fsbtodb(&sblock, blk); 336 if (bp->b_bno == dblk) 337 return (bp); 338 flush(fswritefd, bp); 339 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size); 340 bp->b_bno = dblk; 341 bp->b_size = size; 342 return (bp); 343 } 344 345 void 346 flush(int fd, struct bufarea *bp) 347 { 348 int i, j; 349 caddr_t sip; 350 long size; 351 352 if (!bp->b_dirty) 353 return; 354 355 /* 356 * It's not our buf, so if there are errors, let whoever 357 * acquired it deal with the actual problem. 358 */ 359 if (bp->b_errs != 0) 360 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno); 361 bp->b_dirty = 0; 362 bp->b_errs = 0; 363 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); 364 if (bp != &sblk) { 365 return; 366 } 367 368 /* 369 * We're flushing the superblock, so make sure all the 370 * ancillary bits go out as well. 371 */ 372 sip = (caddr_t)sblock.fs_u.fs_csp; 373 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { 374 size = sblock.fs_cssize - i < sblock.fs_bsize ? 375 sblock.fs_cssize - i : sblock.fs_bsize; 376 bwrite(fswritefd, sip, 377 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), 378 size); 379 sip += size; 380 } 381 } 382 383 static void 384 rwerror(caddr_t mesg, diskaddr_t blk, int rval) 385 { 386 int olderr = errno; 387 388 if (!preen) 389 (void) printf("\n"); 390 391 if (rval == -1) 392 pfatal("CANNOT %s: DISK BLOCK %lld: %s", 393 mesg, blk, strerror(olderr)); 394 else 395 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk); 396 397 if (reply("CONTINUE") == 0) { 398 exitstat = EXERRFATAL; 399 errexit("Program terminated\n"); 400 } 401 } 402 403 void 404 ckfini(void) 405 { 406 int64_t percentage; 407 408 if (fswritefd < 0) 409 return; 410 411 flush(fswritefd, &sblk); 412 /* 413 * Were we using a backup superblock? 414 */ 415 if (havesb && sblk.b_bno != SBOFF / dev_bsize) { 416 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) { 417 sblk.b_bno = SBOFF / dev_bsize; 418 sbdirty(); 419 flush(fswritefd, &sblk); 420 } 421 } 422 flush(fswritefd, &cgblk); 423 if (cgblk.b_un.b_buf != NULL) { 424 free((void *)cgblk.b_un.b_buf); 425 cgblk.b_un.b_buf = NULL; 426 } 427 unbufinit(); 428 pbp = NULL; 429 pdirbp = NULL; 430 if (debug) { 431 /* 432 * Note that we only count cache-related reads. 433 * Anything that called fsck_bread() or getblk() 434 * directly are explicitly not cached, so they're not 435 * included here. 436 */ 437 if (totalreads != 0) 438 percentage = diskreads * 100 / totalreads; 439 else 440 percentage = 0; 441 442 (void) printf("cache missed %lld of %lld reads (%lld%%)\n", 443 (longlong_t)diskreads, (longlong_t)totalreads, 444 (longlong_t)percentage); 445 } 446 447 (void) close(fsreadfd); 448 (void) close(fswritefd); 449 fsreadfd = -1; 450 fswritefd = -1; 451 } 452 453 int 454 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size) 455 { 456 caddr_t cp; 457 int i; 458 int errs; 459 offset_t offset = ldbtob(blk); 460 offset_t addr; 461 462 /* 463 * In our universe, nothing exists before the superblock, so 464 * just pretend it's always zeros. This is the complement of 465 * bwrite()'s ignoring write requests into that space. 466 */ 467 if (blk < SBLOCK) { 468 if (debug) 469 (void) printf( 470 "WARNING: fsck_bread() passed blkno < %d (%lld)\n", 471 SBLOCK, (longlong_t)blk); 472 (void) memset(buf, 0, (size_t)size); 473 return (1); 474 } 475 476 if (llseek(fd, offset, 0) < 0) { 477 rwerror("SEEK", blk, -1); 478 } 479 480 if ((i = read(fd, buf, size)) == size) { 481 return (0); 482 } 483 rwerror("READ", blk, i); 484 if (llseek(fd, offset, 0) < 0) { 485 rwerror("SEEK", blk, -1); 486 } 487 errs = 0; 488 (void) memset(buf, 0, (size_t)size); 489 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:"); 490 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 491 addr = ldbtob(blk + i); 492 if (llseek(fd, addr, SEEK_CUR) < 0 || 493 read(fd, cp, (int)secsize) < 0) { 494 iscorrupt = 1; 495 (void) printf(" %llu", blk + (u_longlong_t)i); 496 errs++; 497 } 498 } 499 (void) printf("\n"); 500 return (errs); 501 } 502 503 void 504 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size) 505 { 506 int i; 507 int n; 508 caddr_t cp; 509 offset_t offset = ldbtob(blk); 510 offset_t addr; 511 512 if (fd < 0) 513 return; 514 if (blk < SBLOCK) { 515 if (debug) 516 (void) printf( 517 "WARNING: Attempt to write illegal blkno %lld on %s\n", 518 (longlong_t)blk, devname); 519 return; 520 } 521 if (llseek(fd, offset, 0) < 0) { 522 rwerror("SEEK", blk, -1); 523 } 524 if ((i = write(fd, buf, (int)size)) == size) { 525 fsmodified = 1; 526 return; 527 } 528 rwerror("WRITE", blk, i); 529 if (llseek(fd, offset, 0) < 0) { 530 rwerror("SEEK", blk, -1); 531 } 532 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 533 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 534 n = 0; 535 addr = ldbtob(blk + i); 536 if (llseek(fd, addr, SEEK_CUR) < 0 || 537 (n = write(fd, cp, DEV_BSIZE)) < 0) { 538 iscorrupt = 1; 539 (void) printf(" %llu", blk + (u_longlong_t)i); 540 } else if (n > 0) { 541 fsmodified = 1; 542 } 543 544 } 545 (void) printf("\n"); 546 } 547 548 /* 549 * Allocates the specified number of contiguous fragments. 550 */ 551 daddr32_t 552 allocblk(int wantedfrags) 553 { 554 int block, leadfrag, tailfrag; 555 daddr32_t selected; 556 size_t size; 557 struct bufarea *bp; 558 559 /* 560 * It's arguable whether we should just fail, or instead 561 * error out here. Since we should only ever be asked for 562 * a single fragment or an entire block (i.e., sblock.fs_frag), 563 * we'll fail out because anything else means somebody 564 * changed code without considering all of the ramifications. 565 */ 566 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) { 567 exitstat = EXERRFATAL; 568 errexit("allocblk() asked for %d frags. " 569 "Legal range is 1 to %d", 570 wantedfrags, sblock.fs_frag); 571 } 572 573 /* 574 * For each filesystem block, look at every possible starting 575 * offset within the block such that we can get the number of 576 * contiguous fragments that we need. This is a drastically 577 * simplified version of the kernel's mapsearch() and alloc*(). 578 * It's also correspondingly slower. 579 */ 580 for (block = 0; block < maxfsblock - sblock.fs_frag; 581 block += sblock.fs_frag) { 582 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags; 583 leadfrag++) { 584 /* 585 * Is first fragment of candidate run available? 586 */ 587 if (testbmap(block + leadfrag)) 588 continue; 589 /* 590 * Are the rest of them available? 591 */ 592 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++) 593 if (testbmap(block + leadfrag + tailfrag)) 594 break; 595 if (tailfrag < wantedfrags) { 596 /* 597 * No, skip the known-unusable run. 598 */ 599 leadfrag += tailfrag; 600 continue; 601 } 602 /* 603 * Found what we need, so claim them. 604 */ 605 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++) 606 setbmap(block + leadfrag + tailfrag); 607 n_blks += wantedfrags; 608 size = wantedfrags * sblock.fs_fsize; 609 selected = block + leadfrag; 610 bp = getdatablk(selected, size); 611 (void) memset((void *)bp->b_un.b_buf, 0, size); 612 dirty(bp); 613 brelse(bp); 614 if (debug) 615 (void) printf( 616 "allocblk: selected %d (in block %d), frags %d, size %d\n", 617 selected, selected % sblock.fs_bsize, 618 wantedfrags, (int)size); 619 return (selected); 620 } 621 } 622 return (0); 623 } 624 625 /* 626 * Free a previously allocated block 627 */ 628 void 629 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags) 630 { 631 struct inodesc idesc; 632 633 if (debug) 634 (void) printf("debug: freeing %d fragments starting at %d\n", 635 frags, blkno); 636 637 init_inodesc(&idesc); 638 639 idesc.id_number = ino; 640 idesc.id_blkno = blkno; 641 idesc.id_numfrags = frags; 642 idesc.id_truncto = -1; 643 644 /* 645 * Nothing in the return status has any relevance to how 646 * we're using pass4check(), so just ignore it. 647 */ 648 (void) pass4check(&idesc); 649 } 650 651 /* 652 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes 653 * that the given buffer is at least MAXPATHLEN + 1 characters. 654 */ 655 void 656 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino) 657 { 658 int len; 659 caddr_t cp; 660 struct dinode *dp; 661 struct inodesc idesc; 662 struct inoinfo *inp; 663 664 if (debug) 665 (void) printf("debug: getpathname(curdir %d, ino %d)\n", 666 curdir, ino); 667 668 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) { 669 (void) strcpy(namebuf, "?"); 670 return; 671 } 672 673 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) { 674 (void) strcpy(namebuf, "/"); 675 return; 676 } 677 678 init_inodesc(&idesc); 679 idesc.id_type = DATA; 680 cp = &namebuf[MAXPATHLEN - 1]; 681 *cp = '\0'; 682 683 /* 684 * In the case of extended attributes, our 685 * parent won't necessarily be a directory, so just 686 * return what we've found with a prefix indicating 687 * that it's an XATTR. Presumably our caller will 688 * know what's going on and do something useful, like 689 * work out the path of the parent and then combine 690 * the two names. 691 * 692 * Can't use strcpy(), etc, because we've probably 693 * already got some name information in the buffer and 694 * the usual trailing \0 would lose it. 695 */ 696 dp = ginode(curdir); 697 if ((dp->di_mode & IFMT) == IFATTRDIR) { 698 idesc.id_number = curdir; 699 idesc.id_parent = ino; 700 idesc.id_func = findname; 701 idesc.id_name = namebuf; 702 idesc.id_fix = NOFIX; 703 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) { 704 *cp-- = '?'; 705 } 706 707 len = sizeof (XATTR_DIR_NAME) - 1; 708 cp -= len; 709 (void) memmove(cp, XATTR_DIR_NAME, len); 710 goto attrname; 711 } 712 713 /* 714 * If curdir == ino, need to get a handle on .. so we 715 * can search it for ino's name. Otherwise, just search 716 * the given directory for ino. Repeat until out of space 717 * or a full path has been built. 718 */ 719 if (curdir != ino) { 720 idesc.id_parent = curdir; 721 goto namelookup; 722 } 723 while (ino != UFSROOTINO && ino != 0) { 724 idesc.id_number = ino; 725 idesc.id_func = findino; 726 idesc.id_name = ".."; 727 idesc.id_fix = NOFIX; 728 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) { 729 inp = getinoinfo(ino); 730 if ((inp == NULL) || (inp->i_parent == 0)) { 731 break; 732 } 733 idesc.id_parent = inp->i_parent; 734 } 735 736 /* 737 * To get this far, id_parent must have the inode 738 * number for `..' in it. By definition, that's got 739 * to be a directory, so search it for the inode of 740 * interest. 741 */ 742 namelookup: 743 idesc.id_number = idesc.id_parent; 744 idesc.id_parent = ino; 745 idesc.id_func = findname; 746 idesc.id_name = namebuf; 747 idesc.id_fix = NOFIX; 748 if ((ckinode(ginode(idesc.id_number), 749 &idesc, CKI_TRAVERSE) & FOUND) == 0) { 750 break; 751 } 752 /* 753 * Prepend to what we've accumulated so far. If 754 * there's not enough room for even one more path element 755 * (of the worst-case length), then bail out. 756 */ 757 len = strlen(namebuf); 758 cp -= len; 759 if (cp < &namebuf[MAXNAMLEN]) 760 break; 761 (void) memmove(cp, namebuf, len); 762 *--cp = '/'; 763 764 /* 765 * Corner case for a looped-to-itself directory. 766 */ 767 if (ino == idesc.id_number) 768 break; 769 770 /* 771 * Climb one level of the hierarchy. In other words, 772 * the current .. becomes the inode to search for and 773 * its parent becomes the directory to search in. 774 */ 775 ino = idesc.id_number; 776 } 777 778 /* 779 * If we hit a discontinuity in the hierarchy, indicate it by 780 * prefixing the path so far with `?'. Otherwise, the first 781 * character will be `/' as a side-effect of the *--cp above. 782 * 783 * The special case is to handle the situation where we're 784 * trying to look something up in UFSROOTINO, but didn't find 785 * it. 786 */ 787 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) { 788 if (cp > namebuf) 789 cp--; 790 *cp = '?'; 791 } 792 793 /* 794 * The invariants being used for buffer integrity are: 795 * - namebuf[] is terminated with \0 before anything else 796 * - cp is always <= the last element of namebuf[] 797 * - the new path element is always stored at the 798 * beginning of namebuf[], and is no more than MAXNAMLEN-1 799 * characters 800 * - cp is is decremented by the number of characters in 801 * the new path element 802 * - if, after the above accounting for the new element's 803 * size, there is no longer enough room at the beginning of 804 * namebuf[] for a full-sized path element and a slash, 805 * terminate the loop. cp is in the range 806 * &namebuf[0]..&namebuf[MAXNAMLEN - 1] 807 */ 808 attrname: 809 /* LINTED per the above discussion */ 810 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp); 811 } 812 813 /* ARGSUSED */ 814 void 815 catch(int dummy) 816 { 817 ckfini(); 818 exit(EXSIGNAL); 819 } 820 821 /* 822 * When preening, allow a single quit to signal 823 * a special exit after filesystem checks complete 824 * so that reboot sequence may be interrupted. 825 */ 826 /* ARGSUSED */ 827 void 828 catchquit(int dummy) 829 { 830 (void) printf("returning to single-user after filesystem check\n"); 831 interrupted = 1; 832 (void) signal(SIGQUIT, SIG_DFL); 833 } 834 835 836 /* 837 * determine whether an inode should be fixed. 838 */ 839 NOTE(PRINTFLIKE(2)) 840 int 841 dofix(struct inodesc *idesc, caddr_t msg, ...) 842 { 843 int rval = 0; 844 va_list ap; 845 846 va_start(ap, msg); 847 848 switch (idesc->id_fix) { 849 850 case DONTKNOW: 851 if (idesc->id_type == DATA) 852 vdirerror(idesc->id_number, msg, ap); 853 else 854 vpwarn(msg, ap); 855 if (preen) { 856 idesc->id_fix = FIX; 857 rval = ALTERED; 858 break; 859 } 860 if (reply("SALVAGE") == 0) { 861 idesc->id_fix = NOFIX; 862 break; 863 } 864 idesc->id_fix = FIX; 865 rval = ALTERED; 866 break; 867 868 case FIX: 869 rval = ALTERED; 870 break; 871 872 case NOFIX: 873 break; 874 875 default: 876 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix); 877 } 878 879 va_end(ap); 880 return (rval); 881 } 882 883 NOTE(PRINTFLIKE(1)) 884 void 885 errexit(caddr_t fmt, ...) 886 { 887 va_list ap; 888 889 va_start(ap, fmt); 890 verrexit(fmt, ap); 891 /* NOTREACHED */ 892 } 893 894 NOTE(PRINTFLIKE(1)) 895 static void 896 verrexit(caddr_t fmt, va_list ap) 897 { 898 static int recursing = 0; 899 900 if (!recursing) { 901 recursing = 1; 902 if (errorlocked || iscorrupt) { 903 if (havesb) { 904 sblock.fs_clean = FSBAD; 905 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 906 sblock.fs_state = -sblock.fs_state; 907 sbdirty(); 908 write_altsb(fswritefd); 909 flush(fswritefd, &sblk); 910 } 911 } 912 ckfini(); 913 recursing = 0; 914 } 915 (void) vprintf(fmt, ap); 916 if (fmt[strlen(fmt) - 1] != '\n') 917 (void) putchar('\n'); 918 exit((exitstat != 0) ? exitstat : EXERRFATAL); 919 } 920 921 /* 922 * An unexpected inconsistency occured. 923 * Die if preening, otherwise just print message and continue. 924 */ 925 NOTE(PRINTFLIKE(1)) 926 void 927 pfatal(caddr_t fmt, ...) 928 { 929 va_list ap; 930 931 va_start(ap, fmt); 932 vpfatal(fmt, ap); 933 va_end(ap); 934 } 935 936 NOTE(PRINTFLIKE(1)) 937 static void 938 vpfatal(caddr_t fmt, va_list ap) 939 { 940 if (preen) { 941 if (*fmt != '\0') { 942 (void) printf("%s: ", devname); 943 (void) vprintf(fmt, ap); 944 (void) printf("\n"); 945 } 946 (void) printf( 947 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 948 devname); 949 if (havesb) { 950 sblock.fs_clean = FSBAD; 951 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time); 952 sbdirty(); 953 flush(fswritefd, &sblk); 954 } 955 /* 956 * We're exiting, it doesn't really matter that our 957 * caller doesn't get to call va_end(). 958 */ 959 if (exitstat == 0) 960 exitstat = EXFNDERRS; 961 exit(exitstat); 962 } 963 if (*fmt != '\0') { 964 (void) vprintf(fmt, ap); 965 } 966 } 967 968 /* 969 * Pwarn just prints a message when not preening, 970 * or a warning (preceded by filename) when preening. 971 */ 972 NOTE(PRINTFLIKE(1)) 973 void 974 pwarn(caddr_t fmt, ...) 975 { 976 va_list ap; 977 978 va_start(ap, fmt); 979 vpwarn(fmt, ap); 980 va_end(ap); 981 } 982 983 NOTE(PRINTFLIKE(1)) 984 static void 985 vpwarn(caddr_t fmt, va_list ap) 986 { 987 if (*fmt != '\0') { 988 if (preen) 989 (void) printf("%s: ", devname); 990 (void) vprintf(fmt, ap); 991 } 992 } 993 994 /* 995 * Like sprintf(), except the buffer is dynamically allocated 996 * and returned, instead of being passed in. A pointer to the 997 * buffer is stored in *RET, and FMT is the usual format string. 998 * The number of characters in *RET (excluding the trailing \0, 999 * to be consistent with the other *printf() routines) is returned. 1000 * 1001 * Solaris doesn't have asprintf(3C) yet, unfortunately. 1002 */ 1003 NOTE(PRINTFLIKE(2)) 1004 int 1005 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...) 1006 { 1007 int len; 1008 caddr_t buffer; 1009 va_list ap; 1010 1011 va_start(ap, fmt); 1012 len = vsnprintf(NULL, 0, fmt, ap); 1013 va_end(ap); 1014 1015 buffer = malloc((len + 1) * sizeof (char)); 1016 if (buffer == NULL) { 1017 errexit("Out of memory in asprintf\n"); 1018 /* NOTREACHED */ 1019 } 1020 1021 va_start(ap, fmt); 1022 (void) vsnprintf(buffer, len + 1, fmt, ap); 1023 va_end(ap); 1024 1025 *ret = buffer; 1026 return (len); 1027 } 1028 1029 /* 1030 * So we can take advantage of kernel routines in ufs_subr.c. 1031 */ 1032 /* PRINTFLIKE2 */ 1033 void 1034 cmn_err(int level, caddr_t fmt, ...) 1035 { 1036 va_list ap; 1037 1038 va_start(ap, fmt); 1039 if (level == CE_PANIC) { 1040 (void) printf("INTERNAL INCONSISTENCY:"); 1041 verrexit(fmt, ap); 1042 } else { 1043 (void) vprintf(fmt, ap); 1044 } 1045 va_end(ap); 1046 } 1047 1048 /* 1049 * Check to see if unraw version of name is already mounted. 1050 * Updates devstr with the device name if devstr is not NULL 1051 * and str_size is positive. 1052 */ 1053 int 1054 mounted(caddr_t name, caddr_t devstr, size_t str_size) 1055 { 1056 int found; 1057 struct mnttab *mntent; 1058 1059 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size); 1060 if (mntent == NULL) 1061 return (M_NOMNT); 1062 1063 /* 1064 * It's mounted. With or without write access? 1065 */ 1066 if (hasmntopt(mntent, MNTOPT_RO) != 0) 1067 found = M_RO; /* mounted as RO */ 1068 else 1069 found = M_RW; /* mounted as R/W */ 1070 1071 if (mount_point == NULL) { 1072 mount_point = strdup(mntent->mnt_mountp); 1073 if (mount_point == NULL) { 1074 errexit("fsck: memory allocation failure: %s", 1075 strerror(errno)); 1076 /* NOTREACHED */ 1077 } 1078 1079 if (devstr != NULL && str_size > 0) 1080 (void) strlcpy(devstr, mntent->mnt_special, str_size); 1081 } 1082 1083 return (found); 1084 } 1085 1086 /* 1087 * Check to see if name corresponds to an entry in vfstab, and that the entry 1088 * does not have option ro. 1089 */ 1090 int 1091 writable(caddr_t name) 1092 { 1093 int rw = 1; 1094 struct vfstab vfsbuf, vfskey; 1095 FILE *vfstab; 1096 1097 vfstab = fopen(VFSTAB, "r"); 1098 if (vfstab == NULL) { 1099 (void) printf("can't open %s\n", VFSTAB); 1100 return (1); 1101 } 1102 (void) memset((void *)&vfskey, 0, sizeof (vfskey)); 1103 vfsnull(&vfskey); 1104 vfskey.vfs_special = unrawname(name); 1105 vfskey.vfs_fstype = MNTTYPE_UFS; 1106 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) && 1107 (hasvfsopt(&vfsbuf, MNTOPT_RO))) { 1108 rw = 0; 1109 } 1110 (void) fclose(vfstab); 1111 return (rw); 1112 } 1113 1114 /* 1115 * debugclean 1116 */ 1117 static void 1118 debugclean(void) 1119 { 1120 if (!debug) 1121 return; 1122 1123 if ((iscorrupt == 0) && (isdirty == 0)) 1124 return; 1125 1126 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) || 1127 (sblock.fs_clean == FSLOG && islog && islogok) || 1128 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked)) 1129 return; 1130 1131 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n", 1132 sblock.fs_clean == FSSTABLE ? "stable" : 1133 sblock.fs_clean == FSLOG ? "logging" : 1134 sblock.fs_clean == FSFIX ? "being fixed" : "clean", 1135 devname); 1136 } 1137 1138 /* 1139 * updateclean 1140 * Carefully and transparently update the clean flag. 1141 * 1142 * `iscorrupt' has to be in its final state before this is called. 1143 */ 1144 int 1145 updateclean(void) 1146 { 1147 int freedlog = 0; 1148 struct bufarea cleanbuf; 1149 size_t size; 1150 ssize_t io_res; 1151 diskaddr_t bno; 1152 char fsclean; 1153 int fsreclaim; 1154 char fsflags; 1155 int flags_ok; 1156 daddr32_t fslogbno; 1157 offset_t sblkoff; 1158 time_t t; 1159 1160 /* 1161 * debug stuff 1162 */ 1163 debugclean(); 1164 1165 /* 1166 * set fsclean to its appropriate value 1167 */ 1168 fslogbno = sblock.fs_logbno; 1169 fsclean = sblock.fs_clean; 1170 fsreclaim = sblock.fs_reclaim; 1171 fsflags = sblock.fs_flags; 1172 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) { 1173 fsclean = FSACTIVE; 1174 } 1175 /* 1176 * If ufs log is not okay, note that we need to clear it. 1177 */ 1178 examinelog(sblock.fs_logbno, NULL); 1179 if (fslogbno && !(islog && islogok)) { 1180 fsclean = FSACTIVE; 1181 fslogbno = 0; 1182 } 1183 1184 /* 1185 * if necessary, update fs_clean and fs_state 1186 */ 1187 switch (fsclean) { 1188 1189 case FSACTIVE: 1190 if (!iscorrupt) { 1191 fsclean = FSSTABLE; 1192 fsreclaim = 0; 1193 } 1194 break; 1195 1196 case FSCLEAN: 1197 case FSSTABLE: 1198 if (iscorrupt) { 1199 fsclean = FSACTIVE; 1200 } else { 1201 fsreclaim = 0; 1202 } 1203 break; 1204 1205 case FSLOG: 1206 if (iscorrupt) { 1207 fsclean = FSACTIVE; 1208 } else if (!islog || fslogbno == 0) { 1209 fsclean = FSSTABLE; 1210 fsreclaim = 0; 1211 } else if (fflag) { 1212 fsreclaim = 0; 1213 } 1214 break; 1215 1216 case FSFIX: 1217 fsclean = FSBAD; 1218 if (errorlocked && !iscorrupt) { 1219 fsclean = islog ? FSLOG : FSCLEAN; 1220 } 1221 break; 1222 1223 default: 1224 if (iscorrupt) { 1225 fsclean = FSACTIVE; 1226 } else { 1227 fsclean = FSSTABLE; 1228 fsreclaim = 0; 1229 } 1230 } 1231 1232 if (largefile_count > 0) 1233 fsflags |= FSLARGEFILES; 1234 else 1235 fsflags &= ~FSLARGEFILES; 1236 1237 /* 1238 * If the only flag difference is that the superblock thinks 1239 * there are largefiles, but we didn't find any, then ignore 1240 * the discrepancy. The kernel never clears the flag, it just 1241 * sets it whenever a largefile is created. Since it is harmless 1242 * to have the flag set when it's not actually true, that by 1243 * itself is not grounds for declaring the superblock to be 1244 * in the wrong state. 1245 * 1246 * This could, in theory, prevent a filesystem from being 1247 * mounted, if the existing superblock claims such files are 1248 * out there and the user uses the nolargefiles option. So, 1249 * if we were forced to scan the filesystem, go ahead and 1250 * take FSLARGEFILES into account as well. 1251 */ 1252 if (fflag) 1253 flags_ok = 0; 1254 else 1255 flags_ok = (sblock.fs_flags & ~FSLARGEFILES) == fsflags; 1256 1257 if (debug) 1258 (void) printf( 1259 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n", 1260 largefile_count, sblock.fs_flags, flags_ok); 1261 1262 /* 1263 * If fs is unchanged, do nothing. 1264 */ 1265 if ((!isdirty) && (flags_ok) && 1266 (fslogbno == sblock.fs_logbno) && 1267 (sblock.fs_clean == fsclean) && 1268 (sblock.fs_reclaim == fsreclaim) && 1269 (FSOKAY == (sblock.fs_state + sblock.fs_time))) { 1270 if (errorlocked) { 1271 if (!do_errorlock(LOCKFS_ULOCK)) 1272 pwarn( 1273 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n"); 1274 } 1275 return (freedlog); 1276 } 1277 1278 /* 1279 * if user allows, update superblock state 1280 */ 1281 if (debug) { 1282 (void) printf( 1283 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1284 sblock.fs_flags, sblock.fs_logbno, 1285 sblock.fs_clean, sblock.fs_reclaim, 1286 sblock.fs_state + sblock.fs_time); 1287 (void) printf( 1288 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1289 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY); 1290 } 1291 if (!isdirty && !preen && !rerun && 1292 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0)) 1293 return (freedlog); 1294 1295 (void) time(&t); 1296 sblock.fs_time = (time32_t)t; 1297 if (debug) 1298 printclean(); 1299 1300 if (sblock.fs_logbno != fslogbno) { 1301 examinelog(sblock.fs_logbno, &freelogblk); 1302 freedlog++; 1303 } 1304 1305 sblock.fs_logbno = fslogbno; 1306 sblock.fs_clean = fsclean; 1307 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 1308 sblock.fs_reclaim = fsreclaim; 1309 sblock.fs_flags = fsflags; 1310 1311 /* 1312 * if superblock can't be written, return 1313 */ 1314 if (fswritefd < 0) 1315 return (freedlog); 1316 1317 /* 1318 * Read private copy of superblock, update clean flag, and write it. 1319 */ 1320 bno = sblk.b_bno; 1321 size = sblk.b_size; 1322 1323 sblkoff = ldbtob(bno); 1324 1325 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL) 1326 errexit("out of memory"); 1327 if (llseek(fsreadfd, sblkoff, 0) == -1) { 1328 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1329 (longlong_t)bno, strerror(errno)); 1330 goto out; 1331 } 1332 1333 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) { 1334 report_io_prob("READ FROM", bno, size, io_res); 1335 goto out; 1336 } 1337 1338 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno; 1339 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean; 1340 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state; 1341 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time; 1342 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim; 1343 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags; 1344 1345 if (llseek(fswritefd, sblkoff, 0) == -1) { 1346 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1347 (longlong_t)bno, strerror(errno)); 1348 goto out; 1349 } 1350 1351 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) { 1352 report_io_prob("WRITE TO", bno, size, io_res); 1353 goto out; 1354 } 1355 1356 /* 1357 * 1208040 1358 * If we had to use -b to grab an alternate superblock, then we 1359 * likely had to do so because of unacceptable differences between 1360 * the main and alternate superblocks. So, we had better update 1361 * the alternate superblock as well, or we'll just fail again 1362 * the next time we attempt to run fsck! 1363 */ 1364 if (bflag != 0) { 1365 write_altsb(fswritefd); 1366 } 1367 1368 if (errorlocked) { 1369 if (!do_errorlock(LOCKFS_ULOCK)) 1370 pwarn( 1371 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n"); 1372 } 1373 1374 out: 1375 if (cleanbuf.b_un.b_buf != NULL) { 1376 free((void *)cleanbuf.b_un.b_buf); 1377 } 1378 1379 return (freedlog); 1380 } 1381 1382 static void 1383 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure) 1384 { 1385 if (failure < 0) 1386 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n", 1387 what, (int)bno, strerror(errno)); 1388 else if (failure == 0) 1389 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n", 1390 what, (int)bno); 1391 else 1392 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n", 1393 what, (int)bno, (unsigned)failure, (unsigned)expected); 1394 } 1395 1396 /* 1397 * print out clean info 1398 */ 1399 void 1400 printclean(void) 1401 { 1402 caddr_t s; 1403 1404 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) 1405 s = "unknown"; 1406 else 1407 switch (sblock.fs_clean) { 1408 1409 case FSACTIVE: 1410 s = "active"; 1411 break; 1412 1413 case FSCLEAN: 1414 s = "clean"; 1415 break; 1416 1417 case FSSTABLE: 1418 s = "stable"; 1419 break; 1420 1421 case FSLOG: 1422 s = "logging"; 1423 break; 1424 1425 case FSBAD: 1426 s = "is bad"; 1427 break; 1428 1429 case FSFIX: 1430 s = "being fixed"; 1431 break; 1432 1433 default: 1434 s = "unknown"; 1435 } 1436 1437 if (preen) 1438 pwarn("is %s.\n", s); 1439 else 1440 (void) printf("** %s is %s.\n", devname, s); 1441 } 1442 1443 int 1444 is_errorlocked(caddr_t fs) 1445 { 1446 int retval; 1447 struct stat64 statb; 1448 caddr_t mountp; 1449 struct mnttab *mntent; 1450 1451 retval = 0; 1452 1453 if (!fs) 1454 return (0); 1455 1456 if (stat64(fs, &statb) < 0) 1457 return (0); 1458 1459 if (S_ISDIR(statb.st_mode)) { 1460 mountp = fs; 1461 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) { 1462 mntent = search_mnttab(NULL, fs, NULL, 0); 1463 if (mntent == NULL) 1464 return (0); 1465 mountp = mntent->mnt_mountp; 1466 if (mountp == NULL) /* theoretically a can't-happen */ 1467 return (0); 1468 } else { 1469 return (0); 1470 } 1471 1472 /* 1473 * From here on, must `goto out' to avoid memory leakage. 1474 */ 1475 1476 if (elock_combuf == NULL) 1477 elock_combuf = 1478 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char)); 1479 else 1480 elock_combuf = 1481 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN); 1482 1483 if (elock_combuf == NULL) 1484 goto out; 1485 1486 (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN); 1487 1488 if (elock_mountp != NULL) { 1489 free(elock_mountp); 1490 } 1491 1492 elock_mountp = strdup(mountp); 1493 if (elock_mountp == NULL) 1494 goto out; 1495 1496 if (mountfd < 0) { 1497 if ((mountfd = open64(mountp, O_RDONLY)) == -1) 1498 goto out; 1499 } 1500 1501 if (lfp == NULL) { 1502 lfp = (struct lockfs *)malloc(sizeof (struct lockfs)); 1503 if (lfp == NULL) 1504 goto out; 1505 (void) memset((void *)lfp, 0, sizeof (struct lockfs)); 1506 } 1507 1508 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1509 lfp->lf_comment = elock_combuf; 1510 1511 if (ioctl(mountfd, _FIOLFSS, lfp) == -1) 1512 goto out; 1513 1514 /* 1515 * lint believes that the ioctl() (or any other function 1516 * taking lfp as an arg) could free lfp. This is not the 1517 * case, however. 1518 */ 1519 retval = LOCKFS_IS_ELOCK(lfp); 1520 1521 out: 1522 return (retval); 1523 } 1524 1525 /* 1526 * Given a name which is known to be a directory, see if it appears 1527 * in the vfstab. If so, return the entry's block (special) device 1528 * field via devstr. 1529 */ 1530 int 1531 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size) 1532 { 1533 return (NULL != search_vfstab(name, NULL, devstr, str_size)); 1534 } 1535 1536 /* 1537 * Given a name which is known to be a directory, see if it appears 1538 * in the mnttab. If so, return the entry's block (special) device 1539 * field via devstr. 1540 */ 1541 int 1542 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size) 1543 { 1544 return (NULL != search_mnttab(name, NULL, devstr, str_size)); 1545 } 1546 1547 /* 1548 * Search for mount point and/or special device in the given file. 1549 * The first matching entry is returned. 1550 * 1551 * If an entry is found and str_size is greater than zero, then 1552 * up to size_str bytes of the special device name from the entry 1553 * are copied to devstr. 1554 */ 1555 1556 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \ 1557 st_nuller, st_init, st_searcher) \ 1558 { \ 1559 FILE *fp; \ 1560 struct st_type *retval = NULL; \ 1561 struct st_type key; \ 1562 static struct st_type buffer; \ 1563 \ 1564 /* LINTED ``assigned value never used'' */ \ 1565 st_nuller(&key); \ 1566 key.st_mount = mountp; \ 1567 key.st_special = special; \ 1568 st_init; \ 1569 \ 1570 if ((fp = fopen(st_file, "r")) == NULL) \ 1571 return (NULL); \ 1572 \ 1573 if (st_searcher(fp, &buffer, &key) == 0) { \ 1574 retval = &buffer; \ 1575 if (devstr != NULL && str_size > 0 && \ 1576 buffer.st_special != NULL) { \ 1577 (void) strlcpy(devstr, buffer.st_special, \ 1578 str_size); \ 1579 } \ 1580 } \ 1581 (void) fclose(fp); \ 1582 return (retval); \ 1583 } 1584 1585 static struct vfstab * 1586 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1587 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull, 1588 (retval = retval), getvfsany) 1589 1590 static struct mnttab * 1591 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1592 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull, 1593 (key.mnt_fstype = MNTTYPE_UFS), getmntany) 1594 1595 int 1596 do_errorlock(int lock_type) 1597 { 1598 caddr_t buf; 1599 time_t now; 1600 struct tm *local; 1601 int rc; 1602 1603 if (elock_combuf == NULL) 1604 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n", 1605 elock_mountp ? elock_mountp : "<null>", 1606 lock_type); 1607 1608 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) == 1609 NULL) { 1610 errexit("Couldn't alloc memory for temp. lock status buffer\n"); 1611 } 1612 if (lfp == NULL) { 1613 errexit("do_errorlock(%s, %d): lockfs status unallocated\n", 1614 elock_mountp, lock_type); 1615 } 1616 1617 (void) memmove((void *)buf, (void *)elock_combuf, 1618 LOCKFS_MAXCOMMENTLEN-1); 1619 1620 switch (lock_type) { 1621 case LOCKFS_ELOCK: 1622 /* 1623 * Note that if it is error-locked, we won't get an 1624 * error back if we try to error-lock it again. 1625 */ 1626 if (time(&now) != (time_t)-1) { 1627 if ((local = localtime(&now)) != NULL) 1628 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1629 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d", 1630 elock_combuf, (int)pid, 1631 local->tm_mon + 1, local->tm_mday, 1632 (local->tm_year % 100), local->tm_hour, 1633 local->tm_min, local->tm_sec); 1634 else 1635 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1636 "%s [fsck pid %d", elock_combuf, pid); 1637 1638 } else { 1639 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1640 "%s [fsck pid %d", elock_combuf, pid); 1641 } 1642 break; 1643 1644 case LOCKFS_ULOCK: 1645 if (time(&now) != (time_t)-1) { 1646 if ((local = localtime(&now)) != NULL) { 1647 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1648 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]", 1649 elock_combuf, 1650 local->tm_mon + 1, local->tm_mday, 1651 (local->tm_year % 100), local->tm_hour, 1652 local->tm_min, local->tm_sec); 1653 } else { 1654 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1655 "%s]", elock_combuf); 1656 } 1657 } else { 1658 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1659 "%s]", elock_combuf); 1660 } 1661 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) { 1662 pwarn("do_errorlock: unlock failed: %s\n", 1663 strerror(errno)); 1664 goto out; 1665 } 1666 break; 1667 1668 default: 1669 break; 1670 } 1671 1672 (void) memmove((void *)elock_combuf, (void *)buf, 1673 LOCKFS_MAXCOMMENTLEN - 1); 1674 1675 lfp->lf_lock = lock_type; 1676 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1677 lfp->lf_comment = elock_combuf; 1678 lfp->lf_flags = 0; 1679 errno = 0; 1680 1681 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) { 1682 if (errno == EINVAL) { 1683 pwarn("Another fsck active?\n"); 1684 iscorrupt = 0; /* don't go away mad, just go away */ 1685 } else { 1686 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n", 1687 lock_type, elock_combuf, strerror(errno)); 1688 } 1689 } 1690 out: 1691 if (buf != NULL) { 1692 free((void *)buf); 1693 } 1694 1695 return (rc != -1); 1696 } 1697 1698 /* 1699 * Shadow inode support. To register a shadow with a client is to note 1700 * that an inode (the client) refers to the shadow. 1701 */ 1702 1703 static struct shadowclients * 1704 newshadowclient(struct shadowclients *prev) 1705 { 1706 struct shadowclients *rc; 1707 1708 rc = (struct shadowclients *)malloc(sizeof (*rc)); 1709 if (rc == NULL) 1710 errexit("newshadowclient: cannot malloc shadow client"); 1711 rc->next = prev; 1712 rc->nclients = 0; 1713 1714 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) * 1715 maxshadowclients); 1716 if (rc->client == NULL) 1717 errexit("newshadowclient: cannot malloc client array"); 1718 return (rc); 1719 } 1720 1721 void 1722 registershadowclient(fsck_ino_t shadow, fsck_ino_t client, 1723 struct shadowclientinfo **info) 1724 { 1725 struct shadowclientinfo *sci; 1726 struct shadowclients *scc; 1727 1728 /* 1729 * Already have a record for this shadow? 1730 */ 1731 for (sci = *info; sci != NULL; sci = sci->next) 1732 if (sci->shadow == shadow) 1733 break; 1734 if (sci == NULL) { 1735 /* 1736 * It's a new shadow, add it to the list 1737 */ 1738 sci = (struct shadowclientinfo *)malloc(sizeof (*sci)); 1739 if (sci == NULL) 1740 errexit("registershadowclient: cannot malloc"); 1741 sci->next = *info; 1742 *info = sci; 1743 sci->shadow = shadow; 1744 sci->totalClients = 0; 1745 sci->clients = newshadowclient(NULL); 1746 } 1747 1748 sci->totalClients++; 1749 scc = sci->clients; 1750 if (scc->nclients >= maxshadowclients) { 1751 scc = newshadowclient(sci->clients); 1752 sci->clients = scc; 1753 } 1754 1755 scc->client[scc->nclients++] = client; 1756 } 1757 1758 /* 1759 * Locate and discard a shadow. 1760 */ 1761 void 1762 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info) 1763 { 1764 struct shadowclientinfo *sci, *prev; 1765 1766 /* 1767 * Do we have a record for this shadow? 1768 */ 1769 prev = NULL; 1770 for (sci = *info; sci != NULL; sci = sci->next) { 1771 if (sci->shadow == shadow) 1772 break; 1773 prev = sci; 1774 } 1775 1776 if (sci != NULL) { 1777 /* 1778 * First, pull it off the list, since we know there 1779 * shouldn't be any future references to this one. 1780 */ 1781 if (prev == NULL) 1782 *info = sci->next; 1783 else 1784 prev->next = sci->next; 1785 deshadow(sci, clearattrref); 1786 } 1787 } 1788 1789 /* 1790 * Discard all memory used to track clients of a shadow. 1791 */ 1792 void 1793 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t)) 1794 { 1795 struct shadowclients *clients, *discard; 1796 int idx; 1797 1798 clients = sci->clients; 1799 while (clients != NULL) { 1800 discard = clients; 1801 clients = clients->next; 1802 if (discard->client != NULL) { 1803 if (cb != NULL) { 1804 for (idx = 0; idx < discard->nclients; idx++) 1805 (*cb)(discard->client[idx]); 1806 } 1807 free((void *)discard->client); 1808 } 1809 free((void *)discard); 1810 } 1811 1812 free((void *)sci); 1813 } 1814 1815 /* 1816 * Allocate more buffer as need arises but allocate one at a time. 1817 * This is done to make sure that fsck does not exit with error if it 1818 * needs more buffer to complete its task. 1819 */ 1820 static struct bufarea * 1821 alloc_bufarea(void) 1822 { 1823 struct bufarea *newbp; 1824 caddr_t bufp; 1825 1826 bufp = malloc((unsigned int)sblock.fs_bsize); 1827 if (bufp == NULL) 1828 return (NULL); 1829 1830 newbp = (struct bufarea *)malloc(sizeof (struct bufarea)); 1831 if (newbp == NULL) { 1832 free((void *)bufp); 1833 return (NULL); 1834 } 1835 1836 initbarea(newbp); 1837 newbp->b_un.b_buf = bufp; 1838 newbp->b_prev = &bufhead; 1839 newbp->b_next = bufhead.b_next; 1840 bufhead.b_next->b_prev = newbp; 1841 bufhead.b_next = newbp; 1842 bufhead.b_size++; 1843 return (newbp); 1844 } 1845 1846 /* 1847 * We length-limit in both unrawname() and rawname() to avoid 1848 * overflowing our arrays or those of our naive, trusting callers. 1849 */ 1850 1851 caddr_t 1852 unrawname(caddr_t name) 1853 { 1854 caddr_t dp; 1855 static char fullname[MAXPATHLEN + 1]; 1856 1857 if ((dp = getfullblkname(name)) == NULL) 1858 return (""); 1859 1860 (void) strlcpy(fullname, dp, sizeof (fullname)); 1861 /* 1862 * Not reporting under debug, as the allocation isn't 1863 * reported by getfullblkname. The idea is that we 1864 * produce balanced alloc/free instances. 1865 */ 1866 free(dp); 1867 1868 return (fullname); 1869 } 1870 1871 caddr_t 1872 rawname(caddr_t name) 1873 { 1874 caddr_t dp; 1875 static char fullname[MAXPATHLEN + 1]; 1876 1877 if ((dp = getfullrawname(name)) == NULL) 1878 return (""); 1879 1880 (void) strlcpy(fullname, dp, sizeof (fullname)); 1881 /* 1882 * Not reporting under debug, as the allocation isn't 1883 * reported by getfullblkname. The idea is that we 1884 * produce balanced alloc/free instances. 1885 */ 1886 free(dp); 1887 1888 return (fullname); 1889 } 1890 1891 /* 1892 * Make sure that a cg header looks at least moderately reasonable. 1893 * We want to be able to trust the contents enough to be able to use 1894 * the standard accessor macros. So, besides looking at the obvious 1895 * such as the magic number, we verify that the offset field values 1896 * are properly aligned and not too big or small. 1897 * 1898 * Returns a NULL pointer if the cg is sane enough for our needs, else 1899 * a dynamically-allocated string describing all of its faults. 1900 */ 1901 #define Append_Error(full, full_len, addition, addition_len) \ 1902 if (full == NULL) { \ 1903 full = addition; \ 1904 full_len = addition_len; \ 1905 } else { \ 1906 /* lint doesn't think realloc() understands NULLs */ \ 1907 full = realloc(full, full_len + addition_len + 1); \ 1908 if (full == NULL) { \ 1909 errexit("Out of memory in cg_sanity"); \ 1910 /* NOTREACHED */ \ 1911 } \ 1912 (void) strcpy(full + full_len, addition); \ 1913 full_len += addition_len; \ 1914 free(addition); \ 1915 } 1916 1917 caddr_t 1918 cg_sanity(struct cg *cgp, int cgno, int *is_fatal) 1919 { 1920 caddr_t full_err; 1921 caddr_t this_err = NULL; 1922 int full_len, this_len; 1923 daddr32_t ndblk; 1924 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 1925 daddr32_t exp_freeoff, exp_nextfreeoff; 1926 1927 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 1928 &exp_freeoff, &exp_nextfreeoff, &ndblk); 1929 1930 full_err = NULL; 1931 full_len = 0; 1932 *is_fatal = 0; 1933 1934 if (!cg_chkmagic(cgp)) { 1935 this_len = fsck_asprintf(&this_err, 1936 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n", 1937 cgp->cg_magic, CG_MAGIC); 1938 Append_Error(full_err, full_len, this_err, this_len); 1939 *is_fatal = 1; 1940 } 1941 1942 if (cgp->cg_cgx != cgno) { 1943 this_len = fsck_asprintf(&this_err, 1944 "WRONG CG NUMBER (%d should be %d)\n", 1945 cgp->cg_cgx, cgno); 1946 Append_Error(full_err, full_len, this_err, this_len); 1947 } 1948 1949 if ((cgp->cg_btotoff & 3) != 0) { 1950 this_len = fsck_asprintf(&this_err, 1951 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n", 1952 cgp->cg_btotoff); 1953 Append_Error(full_err, full_len, this_err, this_len); 1954 } 1955 1956 if ((cgp->cg_boff & 1) != 0) { 1957 this_len = fsck_asprintf(&this_err, 1958 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n", 1959 cgp->cg_boff); 1960 Append_Error(full_err, full_len, this_err, this_len); 1961 } 1962 1963 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 1964 if (cgp->cg_ncyl < 1) { 1965 this_len = fsck_asprintf(&this_err, 1966 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n", 1967 cgp->cg_ncyl); 1968 } else { 1969 this_len = fsck_asprintf(&this_err, 1970 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n", 1971 cgp->cg_ncyl, sblock.fs_cpg); 1972 } 1973 Append_Error(full_err, full_len, this_err, this_len); 1974 } 1975 1976 if (cgp->cg_niblk != sblock.fs_ipg) { 1977 this_len = fsck_asprintf(&this_err, 1978 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n", 1979 cgp->cg_niblk, sblock.fs_ipg); 1980 Append_Error(full_err, full_len, this_err, this_len); 1981 } 1982 1983 if (cgp->cg_ndblk != ndblk) { 1984 this_len = fsck_asprintf(&this_err, 1985 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n", 1986 cgp->cg_ndblk, ndblk); 1987 Append_Error(full_err, full_len, this_err, this_len); 1988 } 1989 1990 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) { 1991 this_len = fsck_asprintf(&this_err, 1992 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION " 1993 "(%d should be at least 0 and less than %d)\n", 1994 cgp->cg_rotor, ndblk); 1995 Append_Error(full_err, full_len, this_err, this_len); 1996 } 1997 1998 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) { 1999 this_len = fsck_asprintf(&this_err, 2000 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION " 2001 "(%d should be at least 0 and less than %d)\n", 2002 cgp->cg_frotor, ndblk); 2003 Append_Error(full_err, full_len, this_err, this_len); 2004 } 2005 2006 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 2007 this_len = fsck_asprintf(&this_err, 2008 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION " 2009 "(%d should be at least 0 and less than %d)\n", 2010 cgp->cg_irotor, sblock.fs_ipg); 2011 Append_Error(full_err, full_len, this_err, this_len); 2012 } 2013 2014 if (cgp->cg_btotoff != exp_btotoff) { 2015 this_len = fsck_asprintf(&this_err, 2016 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n", 2017 cgp->cg_btotoff, exp_btotoff); 2018 Append_Error(full_err, full_len, this_err, this_len); 2019 } 2020 2021 if (cgp->cg_boff != exp_boff) { 2022 this_len = fsck_asprintf(&this_err, 2023 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n", 2024 cgp->cg_boff, exp_boff); 2025 Append_Error(full_err, full_len, this_err, this_len); 2026 } 2027 2028 if (cgp->cg_iusedoff != exp_iusedoff) { 2029 this_len = fsck_asprintf(&this_err, 2030 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n", 2031 cgp->cg_iusedoff, exp_iusedoff); 2032 Append_Error(full_err, full_len, this_err, this_len); 2033 } 2034 2035 if (cgp->cg_freeoff != exp_freeoff) { 2036 this_len = fsck_asprintf(&this_err, 2037 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n", 2038 cgp->cg_freeoff, exp_freeoff); 2039 Append_Error(full_err, full_len, this_err, this_len); 2040 } 2041 2042 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2043 this_len = fsck_asprintf(&this_err, 2044 "END OF HEADER POSITION INCORRECT (%d should be %d)\n", 2045 cgp->cg_nextfreeoff, exp_nextfreeoff); 2046 Append_Error(full_err, full_len, this_err, this_len); 2047 } 2048 2049 return (full_err); 2050 } 2051 2052 #undef Append_Error 2053 2054 /* 2055 * This is taken from mkfs, and is what is used to come up with the 2056 * original values for a struct cg. This implies that, since these 2057 * are all constants, recalculating them now should give us the same 2058 * thing as what's on disk. 2059 */ 2060 static void 2061 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff, 2062 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff, 2063 daddr32_t *ndblk) 2064 { 2065 daddr32_t cbase, dmax; 2066 struct cg *cgp; 2067 2068 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno), 2069 (size_t)sblock.fs_cgsize); 2070 cgp = cgblk.b_un.b_cg; 2071 2072 cbase = cgbase(&sblock, cgno); 2073 dmax = cbase + sblock.fs_fpg; 2074 if (dmax > sblock.fs_size) 2075 dmax = sblock.fs_size; 2076 2077 /* LINTED pointer difference won't overflow */ 2078 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link); 2079 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t); 2080 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t); 2081 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY); 2082 *nextfreeoff = *freeoff + 2083 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY); 2084 *ndblk = dmax - cbase; 2085 } 2086 2087 /* 2088 * Corrects all fields in the cg that can be done with the available 2089 * redundant data. 2090 */ 2091 void 2092 fix_cg(struct cg *cgp, int cgno) 2093 { 2094 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 2095 daddr32_t exp_freeoff, exp_nextfreeoff; 2096 daddr32_t ndblk; 2097 2098 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 2099 &exp_freeoff, &exp_nextfreeoff, &ndblk); 2100 2101 if (cgp->cg_cgx != cgno) { 2102 cgp->cg_cgx = cgno; 2103 } 2104 2105 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 2106 if (cgno == sblock.fs_ncg) { 2107 cgp->cg_ncyl = sblock.fs_ncyl - 2108 (sblock.fs_ncg * (cgno - 1)); 2109 } else { 2110 cgp->cg_ncyl = sblock.fs_cpg; 2111 } 2112 } 2113 2114 if (cgp->cg_niblk != sblock.fs_ipg) { 2115 /* 2116 * This is not used by the kernel, so it's pretty 2117 * harmless if it's wrong. 2118 */ 2119 cgp->cg_niblk = sblock.fs_ipg; 2120 } 2121 2122 if (cgp->cg_ndblk != ndblk) { 2123 cgp->cg_ndblk = ndblk; 2124 } 2125 2126 /* 2127 * For the rotors, any position's valid, so pick the one we know 2128 * will always exist. 2129 */ 2130 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) { 2131 cgp->cg_rotor = 0; 2132 } 2133 2134 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) { 2135 cgp->cg_frotor = 0; 2136 } 2137 2138 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 2139 cgp->cg_irotor = 0; 2140 } 2141 2142 /* 2143 * For btotoff and boff, if they're misaligned they won't 2144 * match the expected values, so we're catching both cases 2145 * here. Of course, if any of these are off, it seems likely 2146 * that the tables really won't be where we calculate they 2147 * should be anyway. 2148 */ 2149 if (cgp->cg_btotoff != exp_btotoff) { 2150 cgp->cg_btotoff = exp_btotoff; 2151 } 2152 2153 if (cgp->cg_boff != exp_boff) { 2154 cgp->cg_boff = exp_boff; 2155 } 2156 2157 if (cgp->cg_iusedoff != exp_iusedoff) { 2158 cgp->cg_iusedoff = exp_iusedoff; 2159 } 2160 2161 if (cgp->cg_freeoff != exp_freeoff) { 2162 cgp->cg_freeoff = exp_freeoff; 2163 } 2164 2165 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2166 cgp->cg_nextfreeoff = exp_nextfreeoff; 2167 } 2168 2169 /* 2170 * We know there was at least one correctable problem, 2171 * or else we wouldn't have been called. So instead of 2172 * marking the buffer dirty N times above, just do it 2173 * once here. 2174 */ 2175 cgdirty(); 2176 } 2177 2178 void 2179 examinelog(daddr32_t start, void (*cb)(daddr32_t)) 2180 { 2181 struct bufarea *bp; 2182 extent_block_t *ebp; 2183 extent_t *ep; 2184 daddr32_t nfno, fno; 2185 int i; 2186 int j; 2187 2188 if (start < SBLOCK) 2189 return; 2190 2191 /* 2192 * Read errors will return zeros, which will cause us 2193 * to do nothing harmful, so don't need to handle it. 2194 */ 2195 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno), 2196 (size_t)sblock.fs_bsize); 2197 ebp = (void *)bp->b_un.b_buf; 2198 2199 /* 2200 * Does it look like a log allocation table? 2201 */ 2202 /* LINTED pointer cast is aligned */ 2203 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf, 2204 sblock.fs_bsize)) 2205 return; 2206 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) 2207 return; 2208 2209 ep = &ebp->extents[0]; 2210 for (i = 0; i < ebp->nextents; ++i, ++ep) { 2211 fno = logbtofrag(&sblock, ep->pbno); 2212 nfno = dbtofsb(&sblock, ep->nbno); 2213 for (j = 0; j < nfno; ++j, ++fno) { 2214 /* 2215 * Invoke the callback first, so that pass1 can 2216 * mark the log blocks in-use. Then, if any 2217 * subsequent pass over the log shows us that a 2218 * block got freed (say, it was also claimed by 2219 * an inode that we cleared), we can safely declare 2220 * the log bad. 2221 */ 2222 if (cb != NULL) 2223 (*cb)(fno); 2224 if (!testbmap(fno)) 2225 islogok = 0; 2226 } 2227 } 2228 brelse(bp); 2229 2230 if (cb != NULL) { 2231 fno = logbtofrag(&sblock, sblock.fs_logbno); 2232 for (j = 0; j < sblock.fs_frag; ++j, ++fno) 2233 (*cb)(fno); 2234 } 2235 } 2236 2237 static void 2238 freelogblk(daddr32_t frag) 2239 { 2240 freeblk(sblock.fs_logbno, frag, 1); 2241 } 2242 2243 caddr_t 2244 file_id(fsck_ino_t inum, mode_t mode) 2245 { 2246 static char name[MAXPATHLEN + 1]; 2247 2248 if (lfdir == inum) { 2249 return (lfname); 2250 } 2251 2252 if ((mode & IFMT) == IFDIR) { 2253 (void) strcpy(name, "DIR"); 2254 } else if ((mode & IFMT) == IFATTRDIR) { 2255 (void) strcpy(name, "ATTR DIR"); 2256 } else if ((mode & IFMT) == IFSHAD) { 2257 (void) strcpy(name, "ACL"); 2258 } else { 2259 (void) strcpy(name, "FILE"); 2260 } 2261 2262 return (name); 2263 } 2264 2265 /* 2266 * Simple initializer for inodesc structures, so users of only a few 2267 * fields don't have to worry about getting the right defaults for 2268 * everything out. 2269 */ 2270 void 2271 init_inodesc(struct inodesc *idesc) 2272 { 2273 /* 2274 * Most fields should be zero, just hit the special cases. 2275 */ 2276 (void) memset((void *)idesc, 0, sizeof (struct inodesc)); 2277 idesc->id_fix = DONTKNOW; 2278 idesc->id_lbn = -1; 2279 idesc->id_truncto = -1; 2280 idesc->id_firsthole = -1; 2281 } 2282 2283 /* 2284 * Compare routine for tsearch(C) to use on ino_t instances. 2285 */ 2286 int 2287 ino_t_cmp(const void *left, const void *right) 2288 { 2289 const fsck_ino_t lino = (const fsck_ino_t)left; 2290 const fsck_ino_t rino = (const fsck_ino_t)right; 2291 2292 return (lino - rino); 2293 } 2294 2295 int 2296 cgisdirty(void) 2297 { 2298 return (cgblk.b_dirty); 2299 } 2300 2301 void 2302 cgflush(void) 2303 { 2304 flush(fswritefd, &cgblk); 2305 } 2306 2307 void 2308 dirty(struct bufarea *bp) 2309 { 2310 if (fswritefd < 0) { 2311 pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); 2312 } else { 2313 (bp)->b_dirty = 1; 2314 isdirty = 1; 2315 } 2316 } 2317 2318 void 2319 initbarea(struct bufarea *bp) 2320 { 2321 (bp)->b_dirty = 0; 2322 (bp)->b_bno = (diskaddr_t)-1LL; 2323 (bp)->b_flags = 0; 2324 (bp)->b_cnt = 0; 2325 (bp)->b_errs = 0; 2326 } 2327 2328 /* 2329 * Partition-sizing routines adapted from ../newfs/newfs.c. 2330 * Needed because calcsb() needs to use mkfs to work out what the 2331 * superblock should be, and mkfs insists on being told how many 2332 * sectors to use. 2333 * 2334 * Error handling assumes we're never called while preening. 2335 * 2336 * XXX This should be extracted into a ../ufslib.{c,h}, 2337 * in the same spirit to ../../fslib.{c,h}. Once that is 2338 * done, both fsck and newfs should be modified to link 2339 * against it. 2340 */ 2341 2342 static int label_type; 2343 2344 #define LABEL_TYPE_VTOC 1 2345 #define LABEL_TYPE_EFI 2 2346 #define LABEL_TYPE_OTHER 3 2347 2348 #define MB (1024 * 1024) 2349 #define SECTORS_PER_TERABYTE (1LL << 31) 2350 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL 2351 2352 diskaddr_t 2353 getdisksize(caddr_t disk, int fd) 2354 { 2355 int rpm; 2356 struct dk_geom g; 2357 struct dk_cinfo ci; 2358 diskaddr_t actual_size; 2359 2360 /* 2361 * get_device_size() determines the actual size of the 2362 * device, and also the disk's attributes, such as geometry. 2363 */ 2364 actual_size = get_device_size(fd, disk); 2365 2366 if (label_type == LABEL_TYPE_VTOC) { 2367 if (ioctl(fd, DKIOCGGEOM, &g)) { 2368 pwarn("%s: Unable to read Disk geometry", disk); 2369 return (0); 2370 } 2371 if (sblock.fs_nsect == 0) 2372 sblock.fs_nsect = g.dkg_nsect; 2373 if (sblock.fs_ntrak == 0) 2374 sblock.fs_ntrak = g.dkg_nhead; 2375 if (sblock.fs_rps == 0) { 2376 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm; 2377 sblock.fs_rps = rpm / 60; 2378 } 2379 } 2380 2381 if (sblock.fs_bsize == 0) 2382 sblock.fs_bsize = MAXBSIZE; 2383 2384 /* 2385 * Adjust maxcontig by the device's maxtransfer. If maxtransfer 2386 * information is not available, default to the min of a MB and 2387 * maxphys. 2388 */ 2389 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) { 2390 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE; 2391 if (sblock.fs_maxcontig < 0) { 2392 int gotit, maxphys; 2393 2394 gotit = fsgetmaxphys(&maxphys, NULL); 2395 2396 /* 2397 * If we cannot get the maxphys value, default 2398 * to ufs_maxmaxphys (MB). 2399 */ 2400 if (gotit) { 2401 sblock.fs_maxcontig = MIN(maxphys, MB); 2402 } else { 2403 sblock.fs_maxcontig = MB; 2404 } 2405 } 2406 sblock.fs_maxcontig /= sblock.fs_bsize; 2407 } 2408 2409 return (actual_size); 2410 } 2411 2412 /* 2413 * Figure out how big the partition we're dealing with is. 2414 */ 2415 static diskaddr_t 2416 get_device_size(int fd, caddr_t name) 2417 { 2418 struct vtoc vtoc; 2419 struct dk_gpt *efi_vtoc; 2420 diskaddr_t slicesize = 0; 2421 2422 int index = read_vtoc(fd, &vtoc); 2423 2424 if (index >= 0) { 2425 label_type = LABEL_TYPE_VTOC; 2426 } else { 2427 if (index == VT_ENOTSUP || index == VT_ERROR) { 2428 /* it might be an EFI label */ 2429 index = efi_alloc_and_read(fd, &efi_vtoc); 2430 if (index >= 0) 2431 label_type = LABEL_TYPE_EFI; 2432 } 2433 } 2434 2435 if (index < 0) { 2436 /* 2437 * Since both attempts to read the label failed, we're 2438 * going to fall back to a brute force approach to 2439 * determining the device's size: see how far out we can 2440 * perform reads on the device. 2441 */ 2442 2443 slicesize = brute_force_get_device_size(fd); 2444 if (slicesize == 0) { 2445 switch (index) { 2446 case VT_ERROR: 2447 pwarn("%s: %s\n", name, strerror(errno)); 2448 break; 2449 case VT_EIO: 2450 pwarn("%s: I/O error accessing VTOC", name); 2451 break; 2452 case VT_EINVAL: 2453 pwarn("%s: Invalid field in VTOC", name); 2454 break; 2455 default: 2456 pwarn("%s: unknown error %d accessing VTOC", 2457 name, index); 2458 break; 2459 } 2460 return (0); 2461 } else { 2462 label_type = LABEL_TYPE_OTHER; 2463 } 2464 } 2465 2466 if (label_type == LABEL_TYPE_EFI) { 2467 slicesize = efi_vtoc->efi_parts[index].p_size; 2468 efi_free(efi_vtoc); 2469 } else if (label_type == LABEL_TYPE_VTOC) { 2470 /* 2471 * In the vtoc struct, p_size is a 32-bit signed quantity. 2472 * In the dk_gpt struct (efi's version of the vtoc), p_size 2473 * is an unsigned 64-bit quantity. By casting the vtoc's 2474 * psize to an unsigned 32-bit quantity, it will be copied 2475 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without 2476 * sign extension. 2477 */ 2478 2479 slicesize = (uint32_t)vtoc.v_part[index].p_size; 2480 } 2481 2482 return (slicesize); 2483 } 2484 2485 /* 2486 * brute_force_get_device_size 2487 * 2488 * Determine the size of the device by seeing how far we can 2489 * read. Doing an llseek( , , SEEK_END) would probably work 2490 * in most cases, but we've seen at least one third-party driver 2491 * which doesn't correctly support the SEEK_END option when the 2492 * the device is greater than a terabyte. 2493 */ 2494 2495 static diskaddr_t 2496 brute_force_get_device_size(int fd) 2497 { 2498 diskaddr_t min_fail = 0; 2499 diskaddr_t max_succeed = 0; 2500 diskaddr_t cur_db_off; 2501 char buf[DEV_BSIZE]; 2502 2503 /* 2504 * First, see if we can read the device at all, just to 2505 * eliminate errors that have nothing to do with the 2506 * device's size. 2507 */ 2508 2509 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) || 2510 ((read(fd, buf, DEV_BSIZE)) == -1)) 2511 return (0); /* can't determine size */ 2512 2513 /* 2514 * Now, go sequentially through the multiples of 4TB 2515 * to find the first read that fails (this isn't strictly 2516 * the most efficient way to find the actual size if the 2517 * size really could be anything between 0 and 2**64 bytes. 2518 * We expect the sizes to be less than 16 TB for some time, 2519 * so why do a bunch of reads that are larger than that? 2520 * However, this algorithm *will* work for sizes of greater 2521 * than 16 TB. We're just not optimizing for those sizes.) 2522 */ 2523 2524 /* 2525 * XXX lint uses 32-bit arithmetic for doing flow analysis. 2526 * We're using > 32-bit constants here. Therefore, its flow 2527 * analysis is wrong. For the time being, ignore complaints 2528 * from it about the body of the for() being unreached. 2529 */ 2530 for (cur_db_off = SECTORS_PER_TERABYTE * 4; 2531 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT); 2532 cur_db_off += 4 * SECTORS_PER_TERABYTE) { 2533 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2534 SEEK_SET) == -1) || 2535 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE)) 2536 min_fail = cur_db_off; 2537 else 2538 max_succeed = cur_db_off; 2539 } 2540 2541 /* 2542 * XXX Same lint flow analysis problem as above. 2543 */ 2544 if (min_fail == 0) 2545 return (0); 2546 2547 /* 2548 * We now know that the size of the device is less than 2549 * min_fail and greater than or equal to max_succeed. Now 2550 * keep splitting the difference until the actual size in 2551 * sectors in known. We also know that the difference 2552 * between max_succeed and min_fail at this time is 2553 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which 2554 * simplifies the math below. 2555 */ 2556 2557 while (min_fail - max_succeed > 1) { 2558 cur_db_off = max_succeed + (min_fail - max_succeed)/2; 2559 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2560 SEEK_SET)) == -1) || 2561 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)) 2562 min_fail = cur_db_off; 2563 else 2564 max_succeed = cur_db_off; 2565 } 2566 2567 /* the size is the last successfully read sector offset plus one */ 2568 return (max_succeed + 1); 2569 } 2570 2571 static void 2572 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap) 2573 { 2574 struct dinode *dp; 2575 char pathbuf[MAXPATHLEN + 1]; 2576 2577 vpwarn(fmt, ap); 2578 (void) putchar(' '); 2579 pinode(ino); 2580 (void) printf("\n"); 2581 getpathname(pathbuf, cwd, ino); 2582 if (ino < UFSROOTINO || ino > maxino) { 2583 pfatal("NAME=%s\n", pathbuf); 2584 return; 2585 } 2586 dp = ginode(ino); 2587 if (ftypeok(dp)) 2588 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf); 2589 else 2590 pfatal("NAME=%s\n", pathbuf); 2591 } 2592 2593 void 2594 direrror(fsck_ino_t ino, caddr_t fmt, ...) 2595 { 2596 va_list ap; 2597 2598 va_start(ap, fmt); 2599 vfileerror(ino, ino, fmt, ap); 2600 va_end(ap); 2601 } 2602 2603 static void 2604 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap) 2605 { 2606 vfileerror(ino, ino, fmt, ap); 2607 } 2608 2609 void 2610 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...) 2611 { 2612 va_list ap; 2613 2614 va_start(ap, fmt); 2615 vfileerror(cwd, ino, fmt, ap); 2616 va_end(ap); 2617 } 2618 2619 /* 2620 * Adds the given inode to the orphaned-directories list, limbo_dirs. 2621 * Assumes that the caller has set INCLEAR in the inode's statemap[] 2622 * entry. 2623 * 2624 * With INCLEAR set, the inode will get ignored by passes 2 and 3, 2625 * meaning it's effectively an orphan. It needs to be noted now, so 2626 * it will be remembered in pass 4. 2627 */ 2628 2629 void 2630 add_orphan_dir(fsck_ino_t ino) 2631 { 2632 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL) 2633 errexit("add_orphan_dir: out of memory"); 2634 } 2635 2636 /* 2637 * Remove an inode from the orphaned-directories list, presumably 2638 * because it's been cleared. 2639 */ 2640 void 2641 remove_orphan_dir(fsck_ino_t ino) 2642 { 2643 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp); 2644 } 2645 2646 /* 2647 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum() 2648 * and lufs.c:checksum(). 2649 */ 2650 static void 2651 log_setsum(int32_t *sp, int32_t *lp, int nb) 2652 { 2653 int32_t csum = 0; 2654 2655 *sp = 0; 2656 nb /= sizeof (int32_t); 2657 while (nb--) 2658 csum += *lp++; 2659 *sp = csum; 2660 } 2661 2662 static int 2663 log_checksum(int32_t *sp, int32_t *lp, int nb) 2664 { 2665 int32_t ssum = *sp; 2666 2667 log_setsum(sp, lp, nb); 2668 if (ssum != *sp) { 2669 *sp = ssum; 2670 return (0); 2671 } 2672 return (1); 2673 } 2674