1 /* 2 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms are permitted 14 * provided that: (1) source distributions retain this entire copyright 15 * notice and comment, and (2) distributions including binaries display 16 * the following acknowledgement: ``This product includes software 17 * developed by the University of California, Berkeley and its contributors'' 18 * in the documentation or other materials provided with the distribution 19 * and in all advertising materials mentioning features or use of this 20 * software. Neither the name of the University nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <unistd.h> 33 #include <stdarg.h> 34 #include <libadm.h> 35 #include <note.h> 36 #include <sys/param.h> 37 #include <sys/types.h> 38 #include <sys/mntent.h> 39 #include <sys/filio.h> 40 #include <sys/fs/ufs_fs.h> 41 #include <sys/vnode.h> 42 #include <sys/fs/ufs_acl.h> 43 #include <sys/fs/ufs_inode.h> 44 #include <sys/fs/ufs_log.h> 45 #define _KERNEL 46 #include <sys/fs/ufs_fsdir.h> 47 #undef _KERNEL 48 #include <sys/mnttab.h> 49 #include <sys/types.h> 50 #include <sys/stat.h> 51 #include <fcntl.h> 52 #include <signal.h> 53 #include <string.h> 54 #include <ctype.h> 55 #include <sys/vfstab.h> 56 #include <sys/lockfs.h> 57 #include <errno.h> 58 #include <sys/cmn_err.h> 59 #include <sys/dkio.h> 60 #include <sys/vtoc.h> 61 #include <sys/efi_partition.h> 62 #include <fslib.h> 63 #include <inttypes.h> 64 #include "fsck.h" 65 66 caddr_t mount_point = NULL; 67 68 static int64_t diskreads, totalreads; /* Disk cache statistics */ 69 70 static int log_checksum(int32_t *, int32_t *, int); 71 static void vdirerror(fsck_ino_t, caddr_t, va_list); 72 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t); 73 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t); 74 static void vpwarn(caddr_t, va_list); 75 static int getline(FILE *, caddr_t, int); 76 static struct bufarea *alloc_bufarea(void); 77 static void rwerror(caddr_t, diskaddr_t, int rval); 78 static void debugclean(void); 79 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t); 80 static void freelogblk(daddr32_t); 81 static void verrexit(caddr_t, va_list); 82 static void vpfatal(caddr_t, va_list); 83 static diskaddr_t get_device_size(int, caddr_t); 84 static diskaddr_t brute_force_get_device_size(int); 85 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *, 86 daddr32_t *, daddr32_t *, daddr32_t *); 87 88 int 89 ftypeok(struct dinode *dp) 90 { 91 switch (dp->di_mode & IFMT) { 92 93 case IFDIR: 94 case IFREG: 95 case IFBLK: 96 case IFCHR: 97 case IFLNK: 98 case IFSOCK: 99 case IFIFO: 100 case IFSHAD: 101 case IFATTRDIR: 102 return (1); 103 104 default: 105 if (debug) 106 (void) printf("bad file type 0%o\n", dp->di_mode); 107 return (0); 108 } 109 } 110 111 int 112 acltypeok(struct dinode *dp) 113 { 114 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT)) 115 return (1); 116 117 if (debug) 118 (void) printf("bad file type for acl I=%d: 0%o\n", 119 dp->di_shadow, dp->di_mode); 120 return (0); 121 } 122 123 NOTE(PRINTFLIKE(1)) 124 int 125 reply(caddr_t fmt, ...) 126 { 127 va_list ap; 128 char line[80]; 129 130 if (preen) 131 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode"); 132 133 if (mflag) { 134 /* 135 * We don't know what's going on, so don't potentially 136 * make things worse by having errexit() write stuff 137 * out to disk. 138 */ 139 (void) printf( 140 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 141 devname); 142 exit(EXERRFATAL); 143 } 144 145 va_start(ap, fmt); 146 (void) putchar('\n'); 147 (void) vprintf(fmt, ap); 148 (void) putchar('?'); 149 (void) putchar(' '); 150 va_end(ap); 151 152 if (nflag || fswritefd < 0) { 153 (void) printf(" no\n\n"); 154 return (0); 155 } 156 if (yflag) { 157 (void) printf(" yes\n\n"); 158 return (1); 159 } 160 (void) fflush(stdout); 161 if (getline(stdin, line, sizeof (line)) == EOF) 162 errexit("\n"); 163 (void) printf("\n"); 164 if (line[0] == 'y' || line[0] == 'Y') { 165 return (1); 166 } else { 167 return (0); 168 } 169 } 170 171 int 172 getline(FILE *fp, caddr_t loc, int maxlen) 173 { 174 int n; 175 caddr_t p, lastloc; 176 177 p = loc; 178 lastloc = &p[maxlen-1]; 179 while ((n = getc(fp)) != '\n') { 180 if (n == EOF) 181 return (EOF); 182 if (!isspace(n) && p < lastloc) 183 *p++ = (char)n; 184 } 185 *p = '\0'; 186 /* LINTED pointer difference won't overflow */ 187 return (p - loc); 188 } 189 190 /* 191 * Malloc buffers and set up cache. 192 */ 193 void 194 bufinit(void) 195 { 196 struct bufarea *bp; 197 int bufcnt, i; 198 caddr_t bufp; 199 200 bufp = malloc((size_t)sblock.fs_bsize); 201 if (bufp == NULL) 202 goto nomem; 203 initbarea(&cgblk); 204 cgblk.b_un.b_buf = bufp; 205 bufhead.b_next = bufhead.b_prev = &bufhead; 206 bufcnt = MAXBUFSPACE / sblock.fs_bsize; 207 if (bufcnt < MINBUFS) 208 bufcnt = MINBUFS; 209 for (i = 0; i < bufcnt; i++) { 210 bp = (struct bufarea *)malloc(sizeof (struct bufarea)); 211 if (bp == NULL) { 212 if (i >= MINBUFS) 213 goto noalloc; 214 goto nomem; 215 } 216 217 bufp = malloc((size_t)sblock.fs_bsize); 218 if (bufp == NULL) { 219 free((void *)bp); 220 if (i >= MINBUFS) 221 goto noalloc; 222 goto nomem; 223 } 224 initbarea(bp); 225 bp->b_un.b_buf = bufp; 226 bp->b_prev = &bufhead; 227 bp->b_next = bufhead.b_next; 228 bufhead.b_next->b_prev = bp; 229 bufhead.b_next = bp; 230 } 231 noalloc: 232 bufhead.b_size = i; /* save number of buffers */ 233 pbp = pdirbp = NULL; 234 return; 235 236 nomem: 237 errexit("cannot allocate buffer pool\n"); 238 /* NOTREACHED */ 239 } 240 241 /* 242 * Undo a bufinit(). 243 */ 244 void 245 unbufinit(void) 246 { 247 int cnt; 248 struct bufarea *bp, *nbp; 249 250 cnt = 0; 251 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) { 252 cnt++; 253 flush(fswritefd, bp); 254 nbp = bp->b_prev; 255 /* 256 * We're discarding the entire chain, so this isn't 257 * technically necessary. However, it doesn't hurt 258 * and lint's data flow analysis is much happier 259 * (this prevents it from thinking there's a chance 260 * of our using memory elsewhere after it's been released). 261 */ 262 nbp->b_next = bp->b_next; 263 bp->b_next->b_prev = nbp; 264 free((void *)bp->b_un.b_buf); 265 free((void *)bp); 266 } 267 268 if (bufhead.b_size != cnt) 269 errexit("Panic: cache lost %d buffers\n", 270 bufhead.b_size - cnt); 271 } 272 273 /* 274 * Manage a cache of directory blocks. 275 */ 276 struct bufarea * 277 getdatablk(daddr32_t blkno, size_t size) 278 { 279 struct bufarea *bp; 280 281 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) 282 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 283 goto foundit; 284 } 285 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) 286 if ((bp->b_flags & B_INUSE) == 0) 287 break; 288 if (bp == &bufhead) { 289 bp = alloc_bufarea(); 290 if (bp == NULL) { 291 errexit("deadlocked buffer pool\n"); 292 /* NOTREACHED */ 293 } 294 } 295 /* 296 * We're at the same logical level as getblk(), so if there 297 * are any errors, we'll let our caller handle them. 298 */ 299 diskreads++; 300 (void) getblk(bp, blkno, size); 301 302 foundit: 303 totalreads++; 304 bp->b_cnt++; 305 /* 306 * Move the buffer to head of linked list if it isn't 307 * already there. 308 */ 309 if (bufhead.b_next != bp) { 310 bp->b_prev->b_next = bp->b_next; 311 bp->b_next->b_prev = bp->b_prev; 312 bp->b_prev = &bufhead; 313 bp->b_next = bufhead.b_next; 314 bufhead.b_next->b_prev = bp; 315 bufhead.b_next = bp; 316 } 317 bp->b_flags |= B_INUSE; 318 return (bp); 319 } 320 321 void 322 brelse(struct bufarea *bp) 323 { 324 bp->b_cnt--; 325 if (bp->b_cnt == 0) { 326 bp->b_flags &= ~B_INUSE; 327 } 328 } 329 330 struct bufarea * 331 getblk(struct bufarea *bp, daddr32_t blk, size_t size) 332 { 333 diskaddr_t dblk; 334 335 dblk = fsbtodb(&sblock, blk); 336 if (bp->b_bno == dblk) 337 return (bp); 338 flush(fswritefd, bp); 339 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size); 340 bp->b_bno = dblk; 341 bp->b_size = size; 342 return (bp); 343 } 344 345 void 346 flush(int fd, struct bufarea *bp) 347 { 348 int i, j; 349 caddr_t sip; 350 long size; 351 352 if (!bp->b_dirty) 353 return; 354 355 /* 356 * It's not our buf, so if there are errors, let whoever 357 * acquired it deal with the actual problem. 358 */ 359 if (bp->b_errs != 0) 360 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno); 361 bp->b_dirty = 0; 362 bp->b_errs = 0; 363 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); 364 if (bp != &sblk) { 365 return; 366 } 367 368 /* 369 * We're flushing the superblock, so make sure all the 370 * ancillary bits go out as well. 371 */ 372 sip = (caddr_t)sblock.fs_u.fs_csp; 373 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { 374 size = sblock.fs_cssize - i < sblock.fs_bsize ? 375 sblock.fs_cssize - i : sblock.fs_bsize; 376 bwrite(fswritefd, sip, 377 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), 378 size); 379 sip += size; 380 } 381 } 382 383 static void 384 rwerror(caddr_t mesg, diskaddr_t blk, int rval) 385 { 386 int olderr = errno; 387 388 if (!preen) 389 (void) printf("\n"); 390 391 if (rval == -1) 392 pfatal("CANNOT %s: DISK BLOCK %lld: %s", 393 mesg, blk, strerror(olderr)); 394 else 395 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk); 396 397 if (reply("CONTINUE") == 0) { 398 exitstat = EXERRFATAL; 399 errexit("Program terminated\n"); 400 } 401 } 402 403 void 404 ckfini(void) 405 { 406 int64_t percentage; 407 408 if (fswritefd < 0) 409 return; 410 411 flush(fswritefd, &sblk); 412 /* 413 * Were we using a backup superblock? 414 */ 415 if (havesb && sblk.b_bno != SBOFF / dev_bsize) { 416 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) { 417 sblk.b_bno = SBOFF / dev_bsize; 418 sbdirty(); 419 flush(fswritefd, &sblk); 420 } 421 } 422 flush(fswritefd, &cgblk); 423 if (cgblk.b_un.b_buf != NULL) { 424 free((void *)cgblk.b_un.b_buf); 425 cgblk.b_un.b_buf = NULL; 426 } 427 unbufinit(); 428 pbp = NULL; 429 pdirbp = NULL; 430 if (debug) { 431 /* 432 * Note that we only count cache-related reads. 433 * Anything that called fsck_bread() or getblk() 434 * directly are explicitly not cached, so they're not 435 * included here. 436 */ 437 if (totalreads != 0) 438 percentage = diskreads * 100 / totalreads; 439 else 440 percentage = 0; 441 442 (void) printf("cache missed %lld of %lld reads (%lld%%)\n", 443 (longlong_t)diskreads, (longlong_t)totalreads, 444 (longlong_t)percentage); 445 } 446 447 (void) close(fsreadfd); 448 (void) close(fswritefd); 449 fsreadfd = -1; 450 fswritefd = -1; 451 } 452 453 int 454 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size) 455 { 456 caddr_t cp; 457 int i; 458 int errs; 459 offset_t offset = ldbtob(blk); 460 offset_t addr; 461 462 /* 463 * In our universe, nothing exists before the superblock, so 464 * just pretend it's always zeros. This is the complement of 465 * bwrite()'s ignoring write requests into that space. 466 */ 467 if (blk < SBLOCK) { 468 if (debug) 469 (void) printf( 470 "WARNING: fsck_bread() passed blkno < %d (%lld)\n", 471 SBLOCK, (longlong_t)blk); 472 (void) memset(buf, 0, (size_t)size); 473 return (1); 474 } 475 476 if (llseek(fd, offset, 0) < 0) { 477 rwerror("SEEK", blk, -1); 478 } 479 480 if ((i = read(fd, buf, size)) == size) { 481 return (0); 482 } 483 rwerror("READ", blk, i); 484 if (llseek(fd, offset, 0) < 0) { 485 rwerror("SEEK", blk, -1); 486 } 487 errs = 0; 488 (void) memset(buf, 0, (size_t)size); 489 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:"); 490 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 491 addr = ldbtob(blk + i); 492 if (llseek(fd, addr, SEEK_CUR) < 0 || 493 read(fd, cp, (int)secsize) < 0) { 494 iscorrupt = 1; 495 (void) printf(" %llu", blk + (u_longlong_t)i); 496 errs++; 497 } 498 } 499 (void) printf("\n"); 500 return (errs); 501 } 502 503 void 504 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size) 505 { 506 int i; 507 int n; 508 caddr_t cp; 509 offset_t offset = ldbtob(blk); 510 offset_t addr; 511 512 if (fd < 0) 513 return; 514 if (blk < SBLOCK) { 515 if (debug) 516 (void) printf( 517 "WARNING: Attempt to write illegal blkno %lld on %s\n", 518 (longlong_t)blk, devname); 519 return; 520 } 521 if (llseek(fd, offset, 0) < 0) { 522 rwerror("SEEK", blk, -1); 523 } 524 if ((i = write(fd, buf, (int)size)) == size) { 525 fsmodified = 1; 526 return; 527 } 528 rwerror("WRITE", blk, i); 529 if (llseek(fd, offset, 0) < 0) { 530 rwerror("SEEK", blk, -1); 531 } 532 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 533 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 534 n = 0; 535 addr = ldbtob(blk + i); 536 if (llseek(fd, addr, SEEK_CUR) < 0 || 537 (n = write(fd, cp, DEV_BSIZE)) < 0) { 538 iscorrupt = 1; 539 (void) printf(" %llu", blk + (u_longlong_t)i); 540 } else if (n > 0) { 541 fsmodified = 1; 542 } 543 544 } 545 (void) printf("\n"); 546 } 547 548 /* 549 * Allocates the specified number of contiguous fragments. 550 */ 551 daddr32_t 552 allocblk(int wantedfrags) 553 { 554 int block, leadfrag, tailfrag; 555 daddr32_t selected; 556 size_t size; 557 struct bufarea *bp; 558 559 /* 560 * It's arguable whether we should just fail, or instead 561 * error out here. Since we should only ever be asked for 562 * a single fragment or an entire block (i.e., sblock.fs_frag), 563 * we'll fail out because anything else means somebody 564 * changed code without considering all of the ramifications. 565 */ 566 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) { 567 exitstat = EXERRFATAL; 568 errexit("allocblk() asked for %d frags. " 569 "Legal range is 1 to %d", 570 wantedfrags, sblock.fs_frag); 571 } 572 573 /* 574 * For each filesystem block, look at every possible starting 575 * offset within the block such that we can get the number of 576 * contiguous fragments that we need. This is a drastically 577 * simplified version of the kernel's mapsearch() and alloc*(). 578 * It's also correspondingly slower. 579 */ 580 for (block = 0; block < maxfsblock - sblock.fs_frag; 581 block += sblock.fs_frag) { 582 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags; 583 leadfrag++) { 584 /* 585 * Is first fragment of candidate run available? 586 */ 587 if (testbmap(block + leadfrag)) 588 continue; 589 /* 590 * Are the rest of them available? 591 */ 592 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++) 593 if (testbmap(block + leadfrag + tailfrag)) 594 break; 595 if (tailfrag < wantedfrags) { 596 /* 597 * No, skip the known-unusable run. 598 */ 599 leadfrag += tailfrag; 600 continue; 601 } 602 /* 603 * Found what we need, so claim them. 604 */ 605 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++) 606 setbmap(block + leadfrag + tailfrag); 607 n_blks += wantedfrags; 608 size = wantedfrags * sblock.fs_fsize; 609 selected = block + leadfrag; 610 bp = getdatablk(selected, size); 611 (void) memset((void *)bp->b_un.b_buf, 0, size); 612 dirty(bp); 613 brelse(bp); 614 if (debug) 615 (void) printf( 616 "allocblk: selected %d (in block %d), frags %d, size %d\n", 617 selected, selected % sblock.fs_bsize, 618 wantedfrags, (int)size); 619 return (selected); 620 } 621 } 622 return (0); 623 } 624 625 /* 626 * Free a previously allocated block 627 */ 628 void 629 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags) 630 { 631 struct inodesc idesc; 632 633 if (debug) 634 (void) printf("debug: freeing %d fragments starting at %d\n", 635 frags, blkno); 636 637 init_inodesc(&idesc); 638 639 idesc.id_number = ino; 640 idesc.id_blkno = blkno; 641 idesc.id_numfrags = frags; 642 idesc.id_truncto = -1; 643 644 /* 645 * Nothing in the return status has any relevance to how 646 * we're using pass4check(), so just ignore it. 647 */ 648 (void) pass4check(&idesc); 649 } 650 651 /* 652 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes 653 * that the given buffer is at least MAXPATHLEN + 1 characters. 654 */ 655 void 656 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino) 657 { 658 int len; 659 caddr_t cp; 660 struct dinode *dp; 661 struct inodesc idesc; 662 struct inoinfo *inp; 663 664 if (debug) 665 (void) printf("debug: getpathname(curdir %d, ino %d)\n", 666 curdir, ino); 667 668 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) { 669 (void) strcpy(namebuf, "?"); 670 return; 671 } 672 673 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) { 674 (void) strcpy(namebuf, "/"); 675 return; 676 } 677 678 init_inodesc(&idesc); 679 idesc.id_type = DATA; 680 cp = &namebuf[MAXPATHLEN - 1]; 681 *cp = '\0'; 682 683 /* 684 * In the case of extended attributes, our 685 * parent won't necessarily be a directory, so just 686 * return what we've found with a prefix indicating 687 * that it's an XATTR. Presumably our caller will 688 * know what's going on and do something useful, like 689 * work out the path of the parent and then combine 690 * the two names. 691 * 692 * Can't use strcpy(), etc, because we've probably 693 * already got some name information in the buffer and 694 * the usual trailing \0 would lose it. 695 */ 696 dp = ginode(curdir); 697 if ((dp->di_mode & IFMT) == IFATTRDIR) { 698 idesc.id_number = curdir; 699 idesc.id_parent = ino; 700 idesc.id_func = findname; 701 idesc.id_name = namebuf; 702 idesc.id_fix = NOFIX; 703 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) { 704 *cp-- = '?'; 705 } 706 707 len = sizeof (XATTR_DIR_NAME) - 1; 708 cp -= len; 709 (void) memmove(cp, XATTR_DIR_NAME, len); 710 goto attrname; 711 } 712 713 /* 714 * If curdir == ino, need to get a handle on .. so we 715 * can search it for ino's name. Otherwise, just search 716 * the given directory for ino. Repeat until out of space 717 * or a full path has been built. 718 */ 719 if (curdir != ino) { 720 idesc.id_parent = curdir; 721 goto namelookup; 722 } 723 while (ino != UFSROOTINO && ino != 0) { 724 idesc.id_number = ino; 725 idesc.id_func = findino; 726 idesc.id_name = ".."; 727 idesc.id_fix = NOFIX; 728 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) { 729 inp = getinoinfo(ino); 730 if ((inp == NULL) || (inp->i_parent == 0)) { 731 break; 732 } 733 idesc.id_parent = inp->i_parent; 734 } 735 736 /* 737 * To get this far, id_parent must have the inode 738 * number for `..' in it. By definition, that's got 739 * to be a directory, so search it for the inode of 740 * interest. 741 */ 742 namelookup: 743 idesc.id_number = idesc.id_parent; 744 idesc.id_parent = ino; 745 idesc.id_func = findname; 746 idesc.id_name = namebuf; 747 idesc.id_fix = NOFIX; 748 if ((ckinode(ginode(idesc.id_number), 749 &idesc, CKI_TRAVERSE) & FOUND) == 0) { 750 break; 751 } 752 /* 753 * Prepend to what we've accumulated so far. If 754 * there's not enough room for even one more path element 755 * (of the worst-case length), then bail out. 756 */ 757 len = strlen(namebuf); 758 cp -= len; 759 if (cp < &namebuf[MAXNAMLEN]) 760 break; 761 (void) memmove(cp, namebuf, len); 762 *--cp = '/'; 763 764 /* 765 * Corner case for a looped-to-itself directory. 766 */ 767 if (ino == idesc.id_number) 768 break; 769 770 /* 771 * Climb one level of the hierarchy. In other words, 772 * the current .. becomes the inode to search for and 773 * its parent becomes the directory to search in. 774 */ 775 ino = idesc.id_number; 776 } 777 778 /* 779 * If we hit a discontinuity in the hierarchy, indicate it by 780 * prefixing the path so far with `?'. Otherwise, the first 781 * character will be `/' as a side-effect of the *--cp above. 782 * 783 * The special case is to handle the situation where we're 784 * trying to look something up in UFSROOTINO, but didn't find 785 * it. 786 */ 787 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) { 788 if (cp > namebuf) 789 cp--; 790 *cp = '?'; 791 } 792 793 /* 794 * The invariants being used for buffer integrity are: 795 * - namebuf[] is terminated with \0 before anything else 796 * - cp is always <= the last element of namebuf[] 797 * - the new path element is always stored at the 798 * beginning of namebuf[], and is no more than MAXNAMLEN-1 799 * characters 800 * - cp is is decremented by the number of characters in 801 * the new path element 802 * - if, after the above accounting for the new element's 803 * size, there is no longer enough room at the beginning of 804 * namebuf[] for a full-sized path element and a slash, 805 * terminate the loop. cp is in the range 806 * &namebuf[0]..&namebuf[MAXNAMLEN - 1] 807 */ 808 attrname: 809 /* LINTED per the above discussion */ 810 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp); 811 } 812 813 /* ARGSUSED */ 814 void 815 catch(int dummy) 816 { 817 ckfini(); 818 exit(EXSIGNAL); 819 } 820 821 /* 822 * When preening, allow a single quit to signal 823 * a special exit after filesystem checks complete 824 * so that reboot sequence may be interrupted. 825 */ 826 /* ARGSUSED */ 827 void 828 catchquit(int dummy) 829 { 830 (void) printf("returning to single-user after filesystem check\n"); 831 interrupted = 1; 832 (void) signal(SIGQUIT, SIG_DFL); 833 } 834 835 836 /* 837 * determine whether an inode should be fixed. 838 */ 839 NOTE(PRINTFLIKE(2)) 840 int 841 dofix(struct inodesc *idesc, caddr_t msg, ...) 842 { 843 int rval = 0; 844 va_list ap; 845 846 va_start(ap, msg); 847 848 switch (idesc->id_fix) { 849 850 case DONTKNOW: 851 if (idesc->id_type == DATA) 852 vdirerror(idesc->id_number, msg, ap); 853 else 854 vpwarn(msg, ap); 855 if (preen) { 856 idesc->id_fix = FIX; 857 rval = ALTERED; 858 break; 859 } 860 if (reply("SALVAGE") == 0) { 861 idesc->id_fix = NOFIX; 862 break; 863 } 864 idesc->id_fix = FIX; 865 rval = ALTERED; 866 break; 867 868 case FIX: 869 rval = ALTERED; 870 break; 871 872 case NOFIX: 873 break; 874 875 default: 876 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix); 877 } 878 879 va_end(ap); 880 return (rval); 881 } 882 883 NOTE(PRINTFLIKE(1)) 884 void 885 errexit(caddr_t fmt, ...) 886 { 887 va_list ap; 888 889 va_start(ap, fmt); 890 verrexit(fmt, ap); 891 /* NOTREACHED */ 892 } 893 894 NOTE(PRINTFLIKE(1)) 895 static void 896 verrexit(caddr_t fmt, va_list ap) 897 { 898 static int recursing = 0; 899 900 if (!recursing) { 901 recursing = 1; 902 if (errorlocked || iscorrupt) { 903 if (havesb) { 904 sblock.fs_clean = FSBAD; 905 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 906 sblock.fs_state = -sblock.fs_state; 907 sbdirty(); 908 write_altsb(fswritefd); 909 flush(fswritefd, &sblk); 910 } 911 } 912 ckfini(); 913 recursing = 0; 914 } 915 (void) vprintf(fmt, ap); 916 if (fmt[strlen(fmt) - 1] != '\n') 917 (void) putchar('\n'); 918 exit((exitstat != 0) ? exitstat : EXERRFATAL); 919 } 920 921 /* 922 * An unexpected inconsistency occured. 923 * Die if preening, otherwise just print message and continue. 924 */ 925 NOTE(PRINTFLIKE(1)) 926 void 927 pfatal(caddr_t fmt, ...) 928 { 929 va_list ap; 930 931 va_start(ap, fmt); 932 vpfatal(fmt, ap); 933 va_end(ap); 934 } 935 936 NOTE(PRINTFLIKE(1)) 937 static void 938 vpfatal(caddr_t fmt, va_list ap) 939 { 940 if (preen) { 941 if (*fmt != '\0') { 942 (void) printf("%s: ", devname); 943 (void) vprintf(fmt, ap); 944 (void) printf("\n"); 945 } 946 (void) printf( 947 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 948 devname); 949 if (havesb) { 950 sblock.fs_clean = FSBAD; 951 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time); 952 sbdirty(); 953 flush(fswritefd, &sblk); 954 } 955 /* 956 * We're exiting, it doesn't really matter that our 957 * caller doesn't get to call va_end(). 958 */ 959 if (exitstat == 0) 960 exitstat = EXFNDERRS; 961 exit(exitstat); 962 } 963 if (*fmt != '\0') { 964 (void) vprintf(fmt, ap); 965 } 966 } 967 968 /* 969 * Pwarn just prints a message when not preening, 970 * or a warning (preceded by filename) when preening. 971 */ 972 NOTE(PRINTFLIKE(1)) 973 void 974 pwarn(caddr_t fmt, ...) 975 { 976 va_list ap; 977 978 va_start(ap, fmt); 979 vpwarn(fmt, ap); 980 va_end(ap); 981 } 982 983 NOTE(PRINTFLIKE(1)) 984 static void 985 vpwarn(caddr_t fmt, va_list ap) 986 { 987 if (*fmt != '\0') { 988 if (preen) 989 (void) printf("%s: ", devname); 990 (void) vprintf(fmt, ap); 991 } 992 } 993 994 /* 995 * Like sprintf(), except the buffer is dynamically allocated 996 * and returned, instead of being passed in. A pointer to the 997 * buffer is stored in *RET, and FMT is the usual format string. 998 * The number of characters in *RET (excluding the trailing \0, 999 * to be consistent with the other *printf() routines) is returned. 1000 * 1001 * Solaris doesn't have asprintf(3C) yet, unfortunately. 1002 */ 1003 NOTE(PRINTFLIKE(2)) 1004 int 1005 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...) 1006 { 1007 int len; 1008 caddr_t buffer; 1009 va_list ap; 1010 1011 va_start(ap, fmt); 1012 len = vsnprintf(NULL, 0, fmt, ap); 1013 va_end(ap); 1014 1015 buffer = malloc((len + 1) * sizeof (char)); 1016 if (buffer == NULL) { 1017 errexit("Out of memory in asprintf\n"); 1018 /* NOTREACHED */ 1019 } 1020 1021 va_start(ap, fmt); 1022 (void) vsnprintf(buffer, len + 1, fmt, ap); 1023 va_end(ap); 1024 1025 *ret = buffer; 1026 return (len); 1027 } 1028 1029 /* 1030 * So we can take advantage of kernel routines in ufs_subr.c. 1031 */ 1032 /* PRINTFLIKE2 */ 1033 void 1034 cmn_err(int level, caddr_t fmt, ...) 1035 { 1036 va_list ap; 1037 1038 va_start(ap, fmt); 1039 if (level == CE_PANIC) { 1040 (void) printf("INTERNAL INCONSISTENCY:"); 1041 verrexit(fmt, ap); 1042 } else { 1043 (void) vprintf(fmt, ap); 1044 } 1045 va_end(ap); 1046 } 1047 1048 /* 1049 * Check to see if unraw version of name is already mounted. 1050 * Updates devstr with the device name if devstr is not NULL 1051 * and str_size is positive. 1052 */ 1053 int 1054 mounted(caddr_t name, caddr_t devstr, size_t str_size) 1055 { 1056 int found; 1057 struct mnttab *mntent; 1058 1059 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size); 1060 if (mntent == NULL) 1061 return (M_NOMNT); 1062 1063 /* 1064 * It's mounted. With or without write access? 1065 */ 1066 if (hasmntopt(mntent, MNTOPT_RO) != 0) 1067 found = M_RO; /* mounted as RO */ 1068 else 1069 found = M_RW; /* mounted as R/W */ 1070 1071 if (mount_point == NULL) { 1072 mount_point = strdup(mntent->mnt_mountp); 1073 if (mount_point == NULL) { 1074 errexit("fsck: memory allocation failure: %s", 1075 strerror(errno)); 1076 /* NOTREACHED */ 1077 } 1078 1079 if (devstr != NULL && str_size > 0) 1080 (void) strlcpy(devstr, mntent->mnt_special, str_size); 1081 } 1082 1083 return (found); 1084 } 1085 1086 /* 1087 * Check to see if name corresponds to an entry in vfstab, and that the entry 1088 * does not have option ro. 1089 */ 1090 int 1091 writable(caddr_t name) 1092 { 1093 int rw = 1; 1094 struct vfstab vfsbuf, vfskey; 1095 FILE *vfstab; 1096 1097 vfstab = fopen(VFSTAB, "r"); 1098 if (vfstab == NULL) { 1099 (void) printf("can't open %s\n", VFSTAB); 1100 return (1); 1101 } 1102 (void) memset((void *)&vfskey, 0, sizeof (vfskey)); 1103 vfsnull(&vfskey); 1104 vfskey.vfs_special = unrawname(name); 1105 vfskey.vfs_fstype = MNTTYPE_UFS; 1106 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) && 1107 (hasvfsopt(&vfsbuf, MNTOPT_RO))) { 1108 rw = 0; 1109 } 1110 (void) fclose(vfstab); 1111 return (rw); 1112 } 1113 1114 /* 1115 * debugclean 1116 */ 1117 static void 1118 debugclean(void) 1119 { 1120 if (!debug) 1121 return; 1122 1123 if ((iscorrupt == 0) && (isdirty == 0)) 1124 return; 1125 1126 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) || 1127 (sblock.fs_clean == FSLOG && islog && islogok) || 1128 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked)) 1129 return; 1130 1131 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n", 1132 sblock.fs_clean == FSSTABLE ? "stable" : 1133 sblock.fs_clean == FSLOG ? "logging" : 1134 sblock.fs_clean == FSFIX ? "being fixed" : "clean", 1135 devname); 1136 } 1137 1138 /* 1139 * updateclean 1140 * Carefully and transparently update the clean flag. 1141 * 1142 * `iscorrupt' has to be in its final state before this is called. 1143 */ 1144 int 1145 updateclean(void) 1146 { 1147 int freedlog = 0; 1148 struct bufarea cleanbuf; 1149 size_t size; 1150 ssize_t io_res; 1151 diskaddr_t bno; 1152 char fsclean; 1153 int fsreclaim; 1154 char fsflags; 1155 int flags_ok = 1; 1156 daddr32_t fslogbno; 1157 offset_t sblkoff; 1158 time_t t; 1159 1160 /* 1161 * debug stuff 1162 */ 1163 debugclean(); 1164 1165 /* 1166 * set fsclean to its appropriate value 1167 */ 1168 fslogbno = sblock.fs_logbno; 1169 fsclean = sblock.fs_clean; 1170 fsreclaim = sblock.fs_reclaim; 1171 fsflags = sblock.fs_flags; 1172 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) { 1173 fsclean = FSACTIVE; 1174 } 1175 /* 1176 * If ufs log is not okay, note that we need to clear it. 1177 */ 1178 examinelog(sblock.fs_logbno, NULL); 1179 if (fslogbno && !(islog && islogok)) { 1180 fsclean = FSACTIVE; 1181 fslogbno = 0; 1182 } 1183 1184 /* 1185 * if necessary, update fs_clean and fs_state 1186 */ 1187 switch (fsclean) { 1188 1189 case FSACTIVE: 1190 if (!iscorrupt) { 1191 fsclean = FSSTABLE; 1192 fsreclaim = 0; 1193 } 1194 break; 1195 1196 case FSCLEAN: 1197 case FSSTABLE: 1198 if (iscorrupt) { 1199 fsclean = FSACTIVE; 1200 } else { 1201 fsreclaim = 0; 1202 } 1203 break; 1204 1205 case FSLOG: 1206 if (iscorrupt) { 1207 fsclean = FSACTIVE; 1208 } else if (!islog || fslogbno == 0) { 1209 fsclean = FSSTABLE; 1210 fsreclaim = 0; 1211 } else if (fflag) { 1212 fsreclaim = 0; 1213 } 1214 break; 1215 1216 case FSFIX: 1217 fsclean = FSBAD; 1218 if (errorlocked && !iscorrupt) { 1219 fsclean = islog ? FSLOG : FSCLEAN; 1220 } 1221 break; 1222 1223 default: 1224 if (iscorrupt) { 1225 fsclean = FSACTIVE; 1226 } else { 1227 fsclean = FSSTABLE; 1228 fsreclaim = 0; 1229 } 1230 } 1231 1232 if (largefile_count > 0) 1233 fsflags |= FSLARGEFILES; 1234 else 1235 fsflags &= ~FSLARGEFILES; 1236 1237 /* 1238 * There can be two discrepencies here. A) The superblock 1239 * shows no largefiles but we found some while scanning. 1240 * B) The superblock indicates the presence of largefiles, 1241 * but none are present. Note that if preening, the superblock 1242 * is silently corrected. 1243 */ 1244 if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) || 1245 (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES)) 1246 flags_ok = 0; 1247 1248 if (debug) 1249 (void) printf( 1250 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n", 1251 largefile_count, sblock.fs_flags, flags_ok); 1252 1253 /* 1254 * If fs is unchanged, do nothing. 1255 */ 1256 if ((!isdirty) && (flags_ok) && 1257 (fslogbno == sblock.fs_logbno) && 1258 (sblock.fs_clean == fsclean) && 1259 (sblock.fs_reclaim == fsreclaim) && 1260 (FSOKAY == (sblock.fs_state + sblock.fs_time))) { 1261 if (errorlocked) { 1262 if (!do_errorlock(LOCKFS_ULOCK)) 1263 pwarn( 1264 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n"); 1265 } 1266 return (freedlog); 1267 } 1268 1269 /* 1270 * if user allows, update superblock state 1271 */ 1272 if (debug) { 1273 (void) printf( 1274 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1275 sblock.fs_flags, sblock.fs_logbno, 1276 sblock.fs_clean, sblock.fs_reclaim, 1277 sblock.fs_state + sblock.fs_time); 1278 (void) printf( 1279 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1280 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY); 1281 } 1282 if (!isdirty && !preen && !rerun && 1283 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0)) 1284 return (freedlog); 1285 1286 (void) time(&t); 1287 sblock.fs_time = (time32_t)t; 1288 if (debug) 1289 printclean(); 1290 1291 if (sblock.fs_logbno != fslogbno) { 1292 examinelog(sblock.fs_logbno, &freelogblk); 1293 freedlog++; 1294 } 1295 1296 sblock.fs_logbno = fslogbno; 1297 sblock.fs_clean = fsclean; 1298 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 1299 sblock.fs_reclaim = fsreclaim; 1300 sblock.fs_flags = fsflags; 1301 1302 /* 1303 * if superblock can't be written, return 1304 */ 1305 if (fswritefd < 0) 1306 return (freedlog); 1307 1308 /* 1309 * Read private copy of superblock, update clean flag, and write it. 1310 */ 1311 bno = sblk.b_bno; 1312 size = sblk.b_size; 1313 1314 sblkoff = ldbtob(bno); 1315 1316 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL) 1317 errexit("out of memory"); 1318 if (llseek(fsreadfd, sblkoff, 0) == -1) { 1319 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1320 (longlong_t)bno, strerror(errno)); 1321 goto out; 1322 } 1323 1324 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) { 1325 report_io_prob("READ FROM", bno, size, io_res); 1326 goto out; 1327 } 1328 1329 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno; 1330 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean; 1331 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state; 1332 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time; 1333 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim; 1334 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags; 1335 1336 if (llseek(fswritefd, sblkoff, 0) == -1) { 1337 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1338 (longlong_t)bno, strerror(errno)); 1339 goto out; 1340 } 1341 1342 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) { 1343 report_io_prob("WRITE TO", bno, size, io_res); 1344 goto out; 1345 } 1346 1347 /* 1348 * 1208040 1349 * If we had to use -b to grab an alternate superblock, then we 1350 * likely had to do so because of unacceptable differences between 1351 * the main and alternate superblocks. So, we had better update 1352 * the alternate superblock as well, or we'll just fail again 1353 * the next time we attempt to run fsck! 1354 */ 1355 if (bflag != 0) { 1356 write_altsb(fswritefd); 1357 } 1358 1359 if (errorlocked) { 1360 if (!do_errorlock(LOCKFS_ULOCK)) 1361 pwarn( 1362 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n"); 1363 } 1364 1365 out: 1366 if (cleanbuf.b_un.b_buf != NULL) { 1367 free((void *)cleanbuf.b_un.b_buf); 1368 } 1369 1370 return (freedlog); 1371 } 1372 1373 static void 1374 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure) 1375 { 1376 if (failure < 0) 1377 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n", 1378 what, (int)bno, strerror(errno)); 1379 else if (failure == 0) 1380 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n", 1381 what, (int)bno); 1382 else 1383 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n", 1384 what, (int)bno, (unsigned)failure, (unsigned)expected); 1385 } 1386 1387 /* 1388 * print out clean info 1389 */ 1390 void 1391 printclean(void) 1392 { 1393 caddr_t s; 1394 1395 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) 1396 s = "unknown"; 1397 else 1398 switch (sblock.fs_clean) { 1399 1400 case FSACTIVE: 1401 s = "active"; 1402 break; 1403 1404 case FSCLEAN: 1405 s = "clean"; 1406 break; 1407 1408 case FSSTABLE: 1409 s = "stable"; 1410 break; 1411 1412 case FSLOG: 1413 s = "logging"; 1414 break; 1415 1416 case FSBAD: 1417 s = "is bad"; 1418 break; 1419 1420 case FSFIX: 1421 s = "being fixed"; 1422 break; 1423 1424 default: 1425 s = "unknown"; 1426 } 1427 1428 if (preen) 1429 pwarn("is %s.\n", s); 1430 else 1431 (void) printf("** %s is %s.\n", devname, s); 1432 } 1433 1434 int 1435 is_errorlocked(caddr_t fs) 1436 { 1437 int retval; 1438 struct stat64 statb; 1439 caddr_t mountp; 1440 struct mnttab *mntent; 1441 1442 retval = 0; 1443 1444 if (!fs) 1445 return (0); 1446 1447 if (stat64(fs, &statb) < 0) 1448 return (0); 1449 1450 if (S_ISDIR(statb.st_mode)) { 1451 mountp = fs; 1452 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) { 1453 mntent = search_mnttab(NULL, fs, NULL, 0); 1454 if (mntent == NULL) 1455 return (0); 1456 mountp = mntent->mnt_mountp; 1457 if (mountp == NULL) /* theoretically a can't-happen */ 1458 return (0); 1459 } else { 1460 return (0); 1461 } 1462 1463 /* 1464 * From here on, must `goto out' to avoid memory leakage. 1465 */ 1466 1467 if (elock_combuf == NULL) 1468 elock_combuf = 1469 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char)); 1470 else 1471 elock_combuf = 1472 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN); 1473 1474 if (elock_combuf == NULL) 1475 goto out; 1476 1477 (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN); 1478 1479 if (elock_mountp != NULL) { 1480 free(elock_mountp); 1481 } 1482 1483 elock_mountp = strdup(mountp); 1484 if (elock_mountp == NULL) 1485 goto out; 1486 1487 if (mountfd < 0) { 1488 if ((mountfd = open64(mountp, O_RDONLY)) == -1) 1489 goto out; 1490 } 1491 1492 if (lfp == NULL) { 1493 lfp = (struct lockfs *)malloc(sizeof (struct lockfs)); 1494 if (lfp == NULL) 1495 goto out; 1496 (void) memset((void *)lfp, 0, sizeof (struct lockfs)); 1497 } 1498 1499 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1500 lfp->lf_comment = elock_combuf; 1501 1502 if (ioctl(mountfd, _FIOLFSS, lfp) == -1) 1503 goto out; 1504 1505 /* 1506 * lint believes that the ioctl() (or any other function 1507 * taking lfp as an arg) could free lfp. This is not the 1508 * case, however. 1509 */ 1510 retval = LOCKFS_IS_ELOCK(lfp); 1511 1512 out: 1513 return (retval); 1514 } 1515 1516 /* 1517 * Given a name which is known to be a directory, see if it appears 1518 * in the vfstab. If so, return the entry's block (special) device 1519 * field via devstr. 1520 */ 1521 int 1522 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size) 1523 { 1524 return (NULL != search_vfstab(name, NULL, devstr, str_size)); 1525 } 1526 1527 /* 1528 * Given a name which is known to be a directory, see if it appears 1529 * in the mnttab. If so, return the entry's block (special) device 1530 * field via devstr. 1531 */ 1532 int 1533 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size) 1534 { 1535 return (NULL != search_mnttab(name, NULL, devstr, str_size)); 1536 } 1537 1538 /* 1539 * Search for mount point and/or special device in the given file. 1540 * The first matching entry is returned. 1541 * 1542 * If an entry is found and str_size is greater than zero, then 1543 * up to size_str bytes of the special device name from the entry 1544 * are copied to devstr. 1545 */ 1546 1547 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \ 1548 st_nuller, st_init, st_searcher) \ 1549 { \ 1550 FILE *fp; \ 1551 struct st_type *retval = NULL; \ 1552 struct st_type key; \ 1553 static struct st_type buffer; \ 1554 \ 1555 /* LINTED ``assigned value never used'' */ \ 1556 st_nuller(&key); \ 1557 key.st_mount = mountp; \ 1558 key.st_special = special; \ 1559 st_init; \ 1560 \ 1561 if ((fp = fopen(st_file, "r")) == NULL) \ 1562 return (NULL); \ 1563 \ 1564 if (st_searcher(fp, &buffer, &key) == 0) { \ 1565 retval = &buffer; \ 1566 if (devstr != NULL && str_size > 0 && \ 1567 buffer.st_special != NULL) { \ 1568 (void) strlcpy(devstr, buffer.st_special, \ 1569 str_size); \ 1570 } \ 1571 } \ 1572 (void) fclose(fp); \ 1573 return (retval); \ 1574 } 1575 1576 static struct vfstab * 1577 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1578 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull, 1579 (retval = retval), getvfsany) 1580 1581 static struct mnttab * 1582 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1583 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull, 1584 (key.mnt_fstype = MNTTYPE_UFS), getmntany) 1585 1586 int 1587 do_errorlock(int lock_type) 1588 { 1589 caddr_t buf; 1590 time_t now; 1591 struct tm *local; 1592 int rc; 1593 1594 if (elock_combuf == NULL) 1595 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n", 1596 elock_mountp ? elock_mountp : "<null>", 1597 lock_type); 1598 1599 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) == 1600 NULL) { 1601 errexit("Couldn't alloc memory for temp. lock status buffer\n"); 1602 } 1603 if (lfp == NULL) { 1604 errexit("do_errorlock(%s, %d): lockfs status unallocated\n", 1605 elock_mountp, lock_type); 1606 } 1607 1608 (void) memmove((void *)buf, (void *)elock_combuf, 1609 LOCKFS_MAXCOMMENTLEN-1); 1610 1611 switch (lock_type) { 1612 case LOCKFS_ELOCK: 1613 /* 1614 * Note that if it is error-locked, we won't get an 1615 * error back if we try to error-lock it again. 1616 */ 1617 if (time(&now) != (time_t)-1) { 1618 if ((local = localtime(&now)) != NULL) 1619 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1620 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d", 1621 elock_combuf, (int)pid, 1622 local->tm_mon + 1, local->tm_mday, 1623 (local->tm_year % 100), local->tm_hour, 1624 local->tm_min, local->tm_sec); 1625 else 1626 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1627 "%s [fsck pid %d", elock_combuf, pid); 1628 1629 } else { 1630 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1631 "%s [fsck pid %d", elock_combuf, pid); 1632 } 1633 break; 1634 1635 case LOCKFS_ULOCK: 1636 if (time(&now) != (time_t)-1) { 1637 if ((local = localtime(&now)) != NULL) { 1638 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1639 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]", 1640 elock_combuf, 1641 local->tm_mon + 1, local->tm_mday, 1642 (local->tm_year % 100), local->tm_hour, 1643 local->tm_min, local->tm_sec); 1644 } else { 1645 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1646 "%s]", elock_combuf); 1647 } 1648 } else { 1649 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1650 "%s]", elock_combuf); 1651 } 1652 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) { 1653 pwarn("do_errorlock: unlock failed: %s\n", 1654 strerror(errno)); 1655 goto out; 1656 } 1657 break; 1658 1659 default: 1660 break; 1661 } 1662 1663 (void) memmove((void *)elock_combuf, (void *)buf, 1664 LOCKFS_MAXCOMMENTLEN - 1); 1665 1666 lfp->lf_lock = lock_type; 1667 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1668 lfp->lf_comment = elock_combuf; 1669 lfp->lf_flags = 0; 1670 errno = 0; 1671 1672 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) { 1673 if (errno == EINVAL) { 1674 pwarn("Another fsck active?\n"); 1675 iscorrupt = 0; /* don't go away mad, just go away */ 1676 } else { 1677 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n", 1678 lock_type, elock_combuf, strerror(errno)); 1679 } 1680 } 1681 out: 1682 if (buf != NULL) { 1683 free((void *)buf); 1684 } 1685 1686 return (rc != -1); 1687 } 1688 1689 /* 1690 * Shadow inode support. To register a shadow with a client is to note 1691 * that an inode (the client) refers to the shadow. 1692 */ 1693 1694 static struct shadowclients * 1695 newshadowclient(struct shadowclients *prev) 1696 { 1697 struct shadowclients *rc; 1698 1699 rc = (struct shadowclients *)malloc(sizeof (*rc)); 1700 if (rc == NULL) 1701 errexit("newshadowclient: cannot malloc shadow client"); 1702 rc->next = prev; 1703 rc->nclients = 0; 1704 1705 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) * 1706 maxshadowclients); 1707 if (rc->client == NULL) 1708 errexit("newshadowclient: cannot malloc client array"); 1709 return (rc); 1710 } 1711 1712 void 1713 registershadowclient(fsck_ino_t shadow, fsck_ino_t client, 1714 struct shadowclientinfo **info) 1715 { 1716 struct shadowclientinfo *sci; 1717 struct shadowclients *scc; 1718 1719 /* 1720 * Already have a record for this shadow? 1721 */ 1722 for (sci = *info; sci != NULL; sci = sci->next) 1723 if (sci->shadow == shadow) 1724 break; 1725 if (sci == NULL) { 1726 /* 1727 * It's a new shadow, add it to the list 1728 */ 1729 sci = (struct shadowclientinfo *)malloc(sizeof (*sci)); 1730 if (sci == NULL) 1731 errexit("registershadowclient: cannot malloc"); 1732 sci->next = *info; 1733 *info = sci; 1734 sci->shadow = shadow; 1735 sci->totalClients = 0; 1736 sci->clients = newshadowclient(NULL); 1737 } 1738 1739 sci->totalClients++; 1740 scc = sci->clients; 1741 if (scc->nclients >= maxshadowclients) { 1742 scc = newshadowclient(sci->clients); 1743 sci->clients = scc; 1744 } 1745 1746 scc->client[scc->nclients++] = client; 1747 } 1748 1749 /* 1750 * Locate and discard a shadow. 1751 */ 1752 void 1753 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info) 1754 { 1755 struct shadowclientinfo *sci, *prev; 1756 1757 /* 1758 * Do we have a record for this shadow? 1759 */ 1760 prev = NULL; 1761 for (sci = *info; sci != NULL; sci = sci->next) { 1762 if (sci->shadow == shadow) 1763 break; 1764 prev = sci; 1765 } 1766 1767 if (sci != NULL) { 1768 /* 1769 * First, pull it off the list, since we know there 1770 * shouldn't be any future references to this one. 1771 */ 1772 if (prev == NULL) 1773 *info = sci->next; 1774 else 1775 prev->next = sci->next; 1776 deshadow(sci, clearattrref); 1777 } 1778 } 1779 1780 /* 1781 * Discard all memory used to track clients of a shadow. 1782 */ 1783 void 1784 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t)) 1785 { 1786 struct shadowclients *clients, *discard; 1787 int idx; 1788 1789 clients = sci->clients; 1790 while (clients != NULL) { 1791 discard = clients; 1792 clients = clients->next; 1793 if (discard->client != NULL) { 1794 if (cb != NULL) { 1795 for (idx = 0; idx < discard->nclients; idx++) 1796 (*cb)(discard->client[idx]); 1797 } 1798 free((void *)discard->client); 1799 } 1800 free((void *)discard); 1801 } 1802 1803 free((void *)sci); 1804 } 1805 1806 /* 1807 * Allocate more buffer as need arises but allocate one at a time. 1808 * This is done to make sure that fsck does not exit with error if it 1809 * needs more buffer to complete its task. 1810 */ 1811 static struct bufarea * 1812 alloc_bufarea(void) 1813 { 1814 struct bufarea *newbp; 1815 caddr_t bufp; 1816 1817 bufp = malloc((unsigned int)sblock.fs_bsize); 1818 if (bufp == NULL) 1819 return (NULL); 1820 1821 newbp = (struct bufarea *)malloc(sizeof (struct bufarea)); 1822 if (newbp == NULL) { 1823 free((void *)bufp); 1824 return (NULL); 1825 } 1826 1827 initbarea(newbp); 1828 newbp->b_un.b_buf = bufp; 1829 newbp->b_prev = &bufhead; 1830 newbp->b_next = bufhead.b_next; 1831 bufhead.b_next->b_prev = newbp; 1832 bufhead.b_next = newbp; 1833 bufhead.b_size++; 1834 return (newbp); 1835 } 1836 1837 /* 1838 * We length-limit in both unrawname() and rawname() to avoid 1839 * overflowing our arrays or those of our naive, trusting callers. 1840 */ 1841 1842 caddr_t 1843 unrawname(caddr_t name) 1844 { 1845 caddr_t dp; 1846 static char fullname[MAXPATHLEN + 1]; 1847 1848 if ((dp = getfullblkname(name)) == NULL) 1849 return (""); 1850 1851 (void) strlcpy(fullname, dp, sizeof (fullname)); 1852 /* 1853 * Not reporting under debug, as the allocation isn't 1854 * reported by getfullblkname. The idea is that we 1855 * produce balanced alloc/free instances. 1856 */ 1857 free(dp); 1858 1859 return (fullname); 1860 } 1861 1862 caddr_t 1863 rawname(caddr_t name) 1864 { 1865 caddr_t dp; 1866 static char fullname[MAXPATHLEN + 1]; 1867 1868 if ((dp = getfullrawname(name)) == NULL) 1869 return (""); 1870 1871 (void) strlcpy(fullname, dp, sizeof (fullname)); 1872 /* 1873 * Not reporting under debug, as the allocation isn't 1874 * reported by getfullblkname. The idea is that we 1875 * produce balanced alloc/free instances. 1876 */ 1877 free(dp); 1878 1879 return (fullname); 1880 } 1881 1882 /* 1883 * Make sure that a cg header looks at least moderately reasonable. 1884 * We want to be able to trust the contents enough to be able to use 1885 * the standard accessor macros. So, besides looking at the obvious 1886 * such as the magic number, we verify that the offset field values 1887 * are properly aligned and not too big or small. 1888 * 1889 * Returns a NULL pointer if the cg is sane enough for our needs, else 1890 * a dynamically-allocated string describing all of its faults. 1891 */ 1892 #define Append_Error(full, full_len, addition, addition_len) \ 1893 if (full == NULL) { \ 1894 full = addition; \ 1895 full_len = addition_len; \ 1896 } else { \ 1897 /* lint doesn't think realloc() understands NULLs */ \ 1898 full = realloc(full, full_len + addition_len + 1); \ 1899 if (full == NULL) { \ 1900 errexit("Out of memory in cg_sanity"); \ 1901 /* NOTREACHED */ \ 1902 } \ 1903 (void) strcpy(full + full_len, addition); \ 1904 full_len += addition_len; \ 1905 free(addition); \ 1906 } 1907 1908 caddr_t 1909 cg_sanity(struct cg *cgp, int cgno) 1910 { 1911 caddr_t full_err; 1912 caddr_t this_err = NULL; 1913 int full_len, this_len; 1914 daddr32_t ndblk; 1915 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 1916 daddr32_t exp_freeoff, exp_nextfreeoff; 1917 1918 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 1919 &exp_freeoff, &exp_nextfreeoff, &ndblk); 1920 1921 full_err = NULL; 1922 full_len = 0; 1923 1924 if (!cg_chkmagic(cgp)) { 1925 this_len = fsck_asprintf(&this_err, 1926 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n", 1927 cgp->cg_magic, CG_MAGIC); 1928 Append_Error(full_err, full_len, this_err, this_len); 1929 } 1930 1931 if (cgp->cg_cgx != cgno) { 1932 this_len = fsck_asprintf(&this_err, 1933 "WRONG CG NUMBER (%d should be %d)\n", 1934 cgp->cg_cgx, cgno); 1935 Append_Error(full_err, full_len, this_err, this_len); 1936 } 1937 1938 if ((cgp->cg_btotoff & 3) != 0) { 1939 this_len = fsck_asprintf(&this_err, 1940 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n", 1941 cgp->cg_btotoff); 1942 Append_Error(full_err, full_len, this_err, this_len); 1943 } 1944 1945 if ((cgp->cg_boff & 1) != 0) { 1946 this_len = fsck_asprintf(&this_err, 1947 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n", 1948 cgp->cg_boff); 1949 Append_Error(full_err, full_len, this_err, this_len); 1950 } 1951 1952 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 1953 if (cgp->cg_ncyl < 1) { 1954 this_len = fsck_asprintf(&this_err, 1955 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n", 1956 cgp->cg_ncyl); 1957 } else { 1958 this_len = fsck_asprintf(&this_err, 1959 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n", 1960 cgp->cg_ncyl, sblock.fs_cpg); 1961 } 1962 Append_Error(full_err, full_len, this_err, this_len); 1963 } 1964 1965 if (cgp->cg_niblk != sblock.fs_ipg) { 1966 this_len = fsck_asprintf(&this_err, 1967 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n", 1968 cgp->cg_niblk, sblock.fs_ipg); 1969 Append_Error(full_err, full_len, this_err, this_len); 1970 } 1971 1972 if (cgp->cg_ndblk != ndblk) { 1973 this_len = fsck_asprintf(&this_err, 1974 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n", 1975 cgp->cg_ndblk, ndblk); 1976 Append_Error(full_err, full_len, this_err, this_len); 1977 } 1978 1979 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) { 1980 this_len = fsck_asprintf(&this_err, 1981 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION " 1982 "(%d should be at least 0 and less than %d)\n", 1983 cgp->cg_rotor, ndblk); 1984 Append_Error(full_err, full_len, this_err, this_len); 1985 } 1986 1987 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) { 1988 this_len = fsck_asprintf(&this_err, 1989 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION " 1990 "(%d should be at least 0 and less than %d)\n", 1991 cgp->cg_frotor, ndblk); 1992 Append_Error(full_err, full_len, this_err, this_len); 1993 } 1994 1995 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 1996 this_len = fsck_asprintf(&this_err, 1997 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION " 1998 "(%d should be at least 0 and less than %d)\n", 1999 cgp->cg_irotor, sblock.fs_ipg); 2000 Append_Error(full_err, full_len, this_err, this_len); 2001 } 2002 2003 if (cgp->cg_btotoff != exp_btotoff) { 2004 this_len = fsck_asprintf(&this_err, 2005 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n", 2006 cgp->cg_btotoff, exp_btotoff); 2007 Append_Error(full_err, full_len, this_err, this_len); 2008 } 2009 2010 if (cgp->cg_boff != exp_boff) { 2011 this_len = fsck_asprintf(&this_err, 2012 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n", 2013 cgp->cg_boff, exp_boff); 2014 Append_Error(full_err, full_len, this_err, this_len); 2015 } 2016 2017 if (cgp->cg_iusedoff != exp_iusedoff) { 2018 this_len = fsck_asprintf(&this_err, 2019 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n", 2020 cgp->cg_iusedoff, exp_iusedoff); 2021 Append_Error(full_err, full_len, this_err, this_len); 2022 } 2023 2024 if (cgp->cg_freeoff != exp_freeoff) { 2025 this_len = fsck_asprintf(&this_err, 2026 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n", 2027 cgp->cg_freeoff, exp_freeoff); 2028 Append_Error(full_err, full_len, this_err, this_len); 2029 } 2030 2031 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2032 this_len = fsck_asprintf(&this_err, 2033 "END OF HEADER POSITION INCORRECT (%d should be %d)\n", 2034 cgp->cg_nextfreeoff, exp_nextfreeoff); 2035 Append_Error(full_err, full_len, this_err, this_len); 2036 } 2037 2038 return (full_err); 2039 } 2040 2041 #undef Append_Error 2042 2043 /* 2044 * This is taken from mkfs, and is what is used to come up with the 2045 * original values for a struct cg. This implies that, since these 2046 * are all constants, recalculating them now should give us the same 2047 * thing as what's on disk. 2048 */ 2049 static void 2050 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff, 2051 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff, 2052 daddr32_t *ndblk) 2053 { 2054 daddr32_t cbase, dmax; 2055 struct cg *cgp; 2056 2057 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno), 2058 (size_t)sblock.fs_cgsize); 2059 cgp = cgblk.b_un.b_cg; 2060 2061 cbase = cgbase(&sblock, cgno); 2062 dmax = cbase + sblock.fs_fpg; 2063 if (dmax > sblock.fs_size) 2064 dmax = sblock.fs_size; 2065 2066 /* LINTED pointer difference won't overflow */ 2067 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link); 2068 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t); 2069 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t); 2070 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY); 2071 *nextfreeoff = *freeoff + 2072 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY); 2073 *ndblk = dmax - cbase; 2074 } 2075 2076 /* 2077 * Corrects all fields in the cg that can be done with the available 2078 * redundant data. 2079 */ 2080 void 2081 fix_cg(struct cg *cgp, int cgno) 2082 { 2083 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 2084 daddr32_t exp_freeoff, exp_nextfreeoff; 2085 daddr32_t ndblk; 2086 2087 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 2088 &exp_freeoff, &exp_nextfreeoff, &ndblk); 2089 2090 if (cgp->cg_cgx != cgno) { 2091 cgp->cg_cgx = cgno; 2092 } 2093 2094 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 2095 if (cgno == (sblock.fs_ncg - 1)) { 2096 cgp->cg_ncyl = sblock.fs_ncyl - 2097 (sblock.fs_cpg * cgno); 2098 } else { 2099 cgp->cg_ncyl = sblock.fs_cpg; 2100 } 2101 } 2102 2103 if (cgp->cg_niblk != sblock.fs_ipg) { 2104 /* 2105 * This is not used by the kernel, so it's pretty 2106 * harmless if it's wrong. 2107 */ 2108 cgp->cg_niblk = sblock.fs_ipg; 2109 } 2110 2111 if (cgp->cg_ndblk != ndblk) { 2112 cgp->cg_ndblk = ndblk; 2113 } 2114 2115 /* 2116 * For the rotors, any position's valid, so pick the one we know 2117 * will always exist. 2118 */ 2119 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) { 2120 cgp->cg_rotor = 0; 2121 } 2122 2123 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) { 2124 cgp->cg_frotor = 0; 2125 } 2126 2127 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 2128 cgp->cg_irotor = 0; 2129 } 2130 2131 /* 2132 * For btotoff and boff, if they're misaligned they won't 2133 * match the expected values, so we're catching both cases 2134 * here. Of course, if any of these are off, it seems likely 2135 * that the tables really won't be where we calculate they 2136 * should be anyway. 2137 */ 2138 if (cgp->cg_btotoff != exp_btotoff) { 2139 cgp->cg_btotoff = exp_btotoff; 2140 } 2141 2142 if (cgp->cg_boff != exp_boff) { 2143 cgp->cg_boff = exp_boff; 2144 } 2145 2146 if (cgp->cg_iusedoff != exp_iusedoff) { 2147 cgp->cg_iusedoff = exp_iusedoff; 2148 } 2149 2150 if (cgp->cg_freeoff != exp_freeoff) { 2151 cgp->cg_freeoff = exp_freeoff; 2152 } 2153 2154 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2155 cgp->cg_nextfreeoff = exp_nextfreeoff; 2156 } 2157 2158 /* 2159 * Reset the magic, as we've recreated this cg, also 2160 * update the cg_time, as we're writing out the cg 2161 */ 2162 cgp->cg_magic = CG_MAGIC; 2163 cgp->cg_time = time(NULL); 2164 2165 /* 2166 * We know there was at least one correctable problem, 2167 * or else we wouldn't have been called. So instead of 2168 * marking the buffer dirty N times above, just do it 2169 * once here. 2170 */ 2171 cgdirty(); 2172 } 2173 2174 void 2175 examinelog(daddr32_t start, void (*cb)(daddr32_t)) 2176 { 2177 struct bufarea *bp; 2178 extent_block_t *ebp; 2179 extent_t *ep; 2180 daddr32_t nfno, fno; 2181 int i; 2182 int j; 2183 2184 if (start < SBLOCK) 2185 return; 2186 2187 /* 2188 * Read errors will return zeros, which will cause us 2189 * to do nothing harmful, so don't need to handle it. 2190 */ 2191 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno), 2192 (size_t)sblock.fs_bsize); 2193 ebp = (void *)bp->b_un.b_buf; 2194 2195 /* 2196 * Does it look like a log allocation table? 2197 */ 2198 /* LINTED pointer cast is aligned */ 2199 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf, 2200 sblock.fs_bsize)) 2201 return; 2202 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) 2203 return; 2204 2205 ep = &ebp->extents[0]; 2206 for (i = 0; i < ebp->nextents; ++i, ++ep) { 2207 fno = logbtofrag(&sblock, ep->pbno); 2208 nfno = dbtofsb(&sblock, ep->nbno); 2209 for (j = 0; j < nfno; ++j, ++fno) { 2210 /* 2211 * Invoke the callback first, so that pass1 can 2212 * mark the log blocks in-use. Then, if any 2213 * subsequent pass over the log shows us that a 2214 * block got freed (say, it was also claimed by 2215 * an inode that we cleared), we can safely declare 2216 * the log bad. 2217 */ 2218 if (cb != NULL) 2219 (*cb)(fno); 2220 if (!testbmap(fno)) 2221 islogok = 0; 2222 } 2223 } 2224 brelse(bp); 2225 2226 if (cb != NULL) { 2227 fno = logbtofrag(&sblock, sblock.fs_logbno); 2228 for (j = 0; j < sblock.fs_frag; ++j, ++fno) 2229 (*cb)(fno); 2230 } 2231 } 2232 2233 static void 2234 freelogblk(daddr32_t frag) 2235 { 2236 freeblk(sblock.fs_logbno, frag, 1); 2237 } 2238 2239 caddr_t 2240 file_id(fsck_ino_t inum, mode_t mode) 2241 { 2242 static char name[MAXPATHLEN + 1]; 2243 2244 if (lfdir == inum) { 2245 return (lfname); 2246 } 2247 2248 if ((mode & IFMT) == IFDIR) { 2249 (void) strcpy(name, "DIR"); 2250 } else if ((mode & IFMT) == IFATTRDIR) { 2251 (void) strcpy(name, "ATTR DIR"); 2252 } else if ((mode & IFMT) == IFSHAD) { 2253 (void) strcpy(name, "ACL"); 2254 } else { 2255 (void) strcpy(name, "FILE"); 2256 } 2257 2258 return (name); 2259 } 2260 2261 /* 2262 * Simple initializer for inodesc structures, so users of only a few 2263 * fields don't have to worry about getting the right defaults for 2264 * everything out. 2265 */ 2266 void 2267 init_inodesc(struct inodesc *idesc) 2268 { 2269 /* 2270 * Most fields should be zero, just hit the special cases. 2271 */ 2272 (void) memset((void *)idesc, 0, sizeof (struct inodesc)); 2273 idesc->id_fix = DONTKNOW; 2274 idesc->id_lbn = -1; 2275 idesc->id_truncto = -1; 2276 idesc->id_firsthole = -1; 2277 } 2278 2279 /* 2280 * Compare routine for tsearch(C) to use on ino_t instances. 2281 */ 2282 int 2283 ino_t_cmp(const void *left, const void *right) 2284 { 2285 const fsck_ino_t lino = (const fsck_ino_t)left; 2286 const fsck_ino_t rino = (const fsck_ino_t)right; 2287 2288 return (lino - rino); 2289 } 2290 2291 int 2292 cgisdirty(void) 2293 { 2294 return (cgblk.b_dirty); 2295 } 2296 2297 void 2298 cgflush(void) 2299 { 2300 flush(fswritefd, &cgblk); 2301 } 2302 2303 void 2304 dirty(struct bufarea *bp) 2305 { 2306 if (fswritefd < 0) { 2307 pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); 2308 } else { 2309 (bp)->b_dirty = 1; 2310 isdirty = 1; 2311 } 2312 } 2313 2314 void 2315 initbarea(struct bufarea *bp) 2316 { 2317 (bp)->b_dirty = 0; 2318 (bp)->b_bno = (diskaddr_t)-1LL; 2319 (bp)->b_flags = 0; 2320 (bp)->b_cnt = 0; 2321 (bp)->b_errs = 0; 2322 } 2323 2324 /* 2325 * Partition-sizing routines adapted from ../newfs/newfs.c. 2326 * Needed because calcsb() needs to use mkfs to work out what the 2327 * superblock should be, and mkfs insists on being told how many 2328 * sectors to use. 2329 * 2330 * Error handling assumes we're never called while preening. 2331 * 2332 * XXX This should be extracted into a ../ufslib.{c,h}, 2333 * in the same spirit to ../../fslib.{c,h}. Once that is 2334 * done, both fsck and newfs should be modified to link 2335 * against it. 2336 */ 2337 2338 static int label_type; 2339 2340 #define LABEL_TYPE_VTOC 1 2341 #define LABEL_TYPE_EFI 2 2342 #define LABEL_TYPE_OTHER 3 2343 2344 #define MB (1024 * 1024) 2345 #define SECTORS_PER_TERABYTE (1LL << 31) 2346 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL 2347 2348 diskaddr_t 2349 getdisksize(caddr_t disk, int fd) 2350 { 2351 int rpm; 2352 struct dk_geom g; 2353 struct dk_cinfo ci; 2354 diskaddr_t actual_size; 2355 2356 /* 2357 * get_device_size() determines the actual size of the 2358 * device, and also the disk's attributes, such as geometry. 2359 */ 2360 actual_size = get_device_size(fd, disk); 2361 2362 if (label_type == LABEL_TYPE_VTOC) { 2363 if (ioctl(fd, DKIOCGGEOM, &g)) { 2364 pwarn("%s: Unable to read Disk geometry", disk); 2365 return (0); 2366 } 2367 if (sblock.fs_nsect == 0) 2368 sblock.fs_nsect = g.dkg_nsect; 2369 if (sblock.fs_ntrak == 0) 2370 sblock.fs_ntrak = g.dkg_nhead; 2371 if (sblock.fs_rps == 0) { 2372 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm; 2373 sblock.fs_rps = rpm / 60; 2374 } 2375 } 2376 2377 if (sblock.fs_bsize == 0) 2378 sblock.fs_bsize = MAXBSIZE; 2379 2380 /* 2381 * Adjust maxcontig by the device's maxtransfer. If maxtransfer 2382 * information is not available, default to the min of a MB and 2383 * maxphys. 2384 */ 2385 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) { 2386 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE; 2387 if (sblock.fs_maxcontig < 0) { 2388 int gotit, maxphys; 2389 2390 gotit = fsgetmaxphys(&maxphys, NULL); 2391 2392 /* 2393 * If we cannot get the maxphys value, default 2394 * to ufs_maxmaxphys (MB). 2395 */ 2396 if (gotit) { 2397 sblock.fs_maxcontig = MIN(maxphys, MB); 2398 } else { 2399 sblock.fs_maxcontig = MB; 2400 } 2401 } 2402 sblock.fs_maxcontig /= sblock.fs_bsize; 2403 } 2404 2405 return (actual_size); 2406 } 2407 2408 /* 2409 * Figure out how big the partition we're dealing with is. 2410 */ 2411 static diskaddr_t 2412 get_device_size(int fd, caddr_t name) 2413 { 2414 struct vtoc vtoc; 2415 struct dk_gpt *efi_vtoc; 2416 diskaddr_t slicesize = 0; 2417 2418 int index = read_vtoc(fd, &vtoc); 2419 2420 if (index >= 0) { 2421 label_type = LABEL_TYPE_VTOC; 2422 } else { 2423 if (index == VT_ENOTSUP || index == VT_ERROR) { 2424 /* it might be an EFI label */ 2425 index = efi_alloc_and_read(fd, &efi_vtoc); 2426 if (index >= 0) 2427 label_type = LABEL_TYPE_EFI; 2428 } 2429 } 2430 2431 if (index < 0) { 2432 /* 2433 * Since both attempts to read the label failed, we're 2434 * going to fall back to a brute force approach to 2435 * determining the device's size: see how far out we can 2436 * perform reads on the device. 2437 */ 2438 2439 slicesize = brute_force_get_device_size(fd); 2440 if (slicesize == 0) { 2441 switch (index) { 2442 case VT_ERROR: 2443 pwarn("%s: %s\n", name, strerror(errno)); 2444 break; 2445 case VT_EIO: 2446 pwarn("%s: I/O error accessing VTOC", name); 2447 break; 2448 case VT_EINVAL: 2449 pwarn("%s: Invalid field in VTOC", name); 2450 break; 2451 default: 2452 pwarn("%s: unknown error %d accessing VTOC", 2453 name, index); 2454 break; 2455 } 2456 return (0); 2457 } else { 2458 label_type = LABEL_TYPE_OTHER; 2459 } 2460 } 2461 2462 if (label_type == LABEL_TYPE_EFI) { 2463 slicesize = efi_vtoc->efi_parts[index].p_size; 2464 efi_free(efi_vtoc); 2465 } else if (label_type == LABEL_TYPE_VTOC) { 2466 /* 2467 * In the vtoc struct, p_size is a 32-bit signed quantity. 2468 * In the dk_gpt struct (efi's version of the vtoc), p_size 2469 * is an unsigned 64-bit quantity. By casting the vtoc's 2470 * psize to an unsigned 32-bit quantity, it will be copied 2471 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without 2472 * sign extension. 2473 */ 2474 2475 slicesize = (uint32_t)vtoc.v_part[index].p_size; 2476 } 2477 2478 return (slicesize); 2479 } 2480 2481 /* 2482 * brute_force_get_device_size 2483 * 2484 * Determine the size of the device by seeing how far we can 2485 * read. Doing an llseek( , , SEEK_END) would probably work 2486 * in most cases, but we've seen at least one third-party driver 2487 * which doesn't correctly support the SEEK_END option when the 2488 * the device is greater than a terabyte. 2489 */ 2490 2491 static diskaddr_t 2492 brute_force_get_device_size(int fd) 2493 { 2494 diskaddr_t min_fail = 0; 2495 diskaddr_t max_succeed = 0; 2496 diskaddr_t cur_db_off; 2497 char buf[DEV_BSIZE]; 2498 2499 /* 2500 * First, see if we can read the device at all, just to 2501 * eliminate errors that have nothing to do with the 2502 * device's size. 2503 */ 2504 2505 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) || 2506 ((read(fd, buf, DEV_BSIZE)) == -1)) 2507 return (0); /* can't determine size */ 2508 2509 /* 2510 * Now, go sequentially through the multiples of 4TB 2511 * to find the first read that fails (this isn't strictly 2512 * the most efficient way to find the actual size if the 2513 * size really could be anything between 0 and 2**64 bytes. 2514 * We expect the sizes to be less than 16 TB for some time, 2515 * so why do a bunch of reads that are larger than that? 2516 * However, this algorithm *will* work for sizes of greater 2517 * than 16 TB. We're just not optimizing for those sizes.) 2518 */ 2519 2520 /* 2521 * XXX lint uses 32-bit arithmetic for doing flow analysis. 2522 * We're using > 32-bit constants here. Therefore, its flow 2523 * analysis is wrong. For the time being, ignore complaints 2524 * from it about the body of the for() being unreached. 2525 */ 2526 for (cur_db_off = SECTORS_PER_TERABYTE * 4; 2527 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT); 2528 cur_db_off += 4 * SECTORS_PER_TERABYTE) { 2529 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2530 SEEK_SET) == -1) || 2531 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE)) 2532 min_fail = cur_db_off; 2533 else 2534 max_succeed = cur_db_off; 2535 } 2536 2537 /* 2538 * XXX Same lint flow analysis problem as above. 2539 */ 2540 if (min_fail == 0) 2541 return (0); 2542 2543 /* 2544 * We now know that the size of the device is less than 2545 * min_fail and greater than or equal to max_succeed. Now 2546 * keep splitting the difference until the actual size in 2547 * sectors in known. We also know that the difference 2548 * between max_succeed and min_fail at this time is 2549 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which 2550 * simplifies the math below. 2551 */ 2552 2553 while (min_fail - max_succeed > 1) { 2554 cur_db_off = max_succeed + (min_fail - max_succeed)/2; 2555 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2556 SEEK_SET)) == -1) || 2557 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)) 2558 min_fail = cur_db_off; 2559 else 2560 max_succeed = cur_db_off; 2561 } 2562 2563 /* the size is the last successfully read sector offset plus one */ 2564 return (max_succeed + 1); 2565 } 2566 2567 static void 2568 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap) 2569 { 2570 struct dinode *dp; 2571 char pathbuf[MAXPATHLEN + 1]; 2572 2573 vpwarn(fmt, ap); 2574 (void) putchar(' '); 2575 pinode(ino); 2576 (void) printf("\n"); 2577 getpathname(pathbuf, cwd, ino); 2578 if (ino < UFSROOTINO || ino > maxino) { 2579 pfatal("NAME=%s\n", pathbuf); 2580 return; 2581 } 2582 dp = ginode(ino); 2583 if (ftypeok(dp)) 2584 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf); 2585 else 2586 pfatal("NAME=%s\n", pathbuf); 2587 } 2588 2589 void 2590 direrror(fsck_ino_t ino, caddr_t fmt, ...) 2591 { 2592 va_list ap; 2593 2594 va_start(ap, fmt); 2595 vfileerror(ino, ino, fmt, ap); 2596 va_end(ap); 2597 } 2598 2599 static void 2600 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap) 2601 { 2602 vfileerror(ino, ino, fmt, ap); 2603 } 2604 2605 void 2606 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...) 2607 { 2608 va_list ap; 2609 2610 va_start(ap, fmt); 2611 vfileerror(cwd, ino, fmt, ap); 2612 va_end(ap); 2613 } 2614 2615 /* 2616 * Adds the given inode to the orphaned-directories list, limbo_dirs. 2617 * Assumes that the caller has set INCLEAR in the inode's statemap[] 2618 * entry. 2619 * 2620 * With INCLEAR set, the inode will get ignored by passes 2 and 3, 2621 * meaning it's effectively an orphan. It needs to be noted now, so 2622 * it will be remembered in pass 4. 2623 */ 2624 2625 void 2626 add_orphan_dir(fsck_ino_t ino) 2627 { 2628 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL) 2629 errexit("add_orphan_dir: out of memory"); 2630 } 2631 2632 /* 2633 * Remove an inode from the orphaned-directories list, presumably 2634 * because it's been cleared. 2635 */ 2636 void 2637 remove_orphan_dir(fsck_ino_t ino) 2638 { 2639 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp); 2640 } 2641 2642 /* 2643 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum() 2644 * and lufs.c:checksum(). 2645 */ 2646 static void 2647 log_setsum(int32_t *sp, int32_t *lp, int nb) 2648 { 2649 int32_t csum = 0; 2650 2651 *sp = 0; 2652 nb /= sizeof (int32_t); 2653 while (nb--) 2654 csum += *lp++; 2655 *sp = csum; 2656 } 2657 2658 static int 2659 log_checksum(int32_t *sp, int32_t *lp, int nb) 2660 { 2661 int32_t ssum = *sp; 2662 2663 log_setsum(sp, lp, nb); 2664 if (ssum != *sp) { 2665 *sp = ssum; 2666 return (0); 2667 } 2668 return (1); 2669 } 2670