1 /* 2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016 by Delphix. All rights reserved. 4 */ 5 6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms are permitted 14 * provided that: (1) source distributions retain this entire copyright 15 * notice and comment, and (2) distributions including binaries display 16 * the following acknowledgement: ``This product includes software 17 * developed by the University of California, Berkeley and its contributors'' 18 * in the documentation or other materials provided with the distribution 19 * and in all advertising materials mentioning features or use of this 20 * software. Neither the name of the University nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <unistd.h> 31 #include <stdarg.h> 32 #include <libadm.h> 33 #include <note.h> 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/mntent.h> 37 #include <sys/filio.h> 38 #include <sys/fs/ufs_fs.h> 39 #include <sys/vnode.h> 40 #include <sys/fs/ufs_acl.h> 41 #include <sys/fs/ufs_inode.h> 42 #include <sys/fs/ufs_log.h> 43 #define _KERNEL 44 #include <sys/fs/ufs_fsdir.h> 45 #undef _KERNEL 46 #include <sys/mnttab.h> 47 #include <sys/types.h> 48 #include <sys/stat.h> 49 #include <fcntl.h> 50 #include <signal.h> 51 #include <string.h> 52 #include <ctype.h> 53 #include <sys/vfstab.h> 54 #include <sys/lockfs.h> 55 #include <errno.h> 56 #include <sys/cmn_err.h> 57 #include <sys/dkio.h> 58 #include <sys/vtoc.h> 59 #include <sys/efi_partition.h> 60 #include <fslib.h> 61 #include <inttypes.h> 62 #include "fsck.h" 63 64 caddr_t mount_point = NULL; 65 66 static int64_t diskreads, totalreads; /* Disk cache statistics */ 67 68 static int log_checksum(int32_t *, int32_t *, int); 69 static void vdirerror(fsck_ino_t, caddr_t, va_list); 70 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t); 71 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t); 72 static void vpwarn(caddr_t, va_list); 73 static int getaline(FILE *, caddr_t, int); 74 static struct bufarea *alloc_bufarea(void); 75 static void rwerror(caddr_t, diskaddr_t, int rval); 76 static void debugclean(void); 77 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t); 78 static void freelogblk(daddr32_t); 79 static void verrexit(caddr_t, va_list); 80 static void vpfatal(caddr_t, va_list); 81 static diskaddr_t get_device_size(int, caddr_t); 82 static diskaddr_t brute_force_get_device_size(int); 83 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *, 84 daddr32_t *, daddr32_t *, daddr32_t *); 85 86 int 87 ftypeok(struct dinode *dp) 88 { 89 switch (dp->di_mode & IFMT) { 90 91 case IFDIR: 92 case IFREG: 93 case IFBLK: 94 case IFCHR: 95 case IFLNK: 96 case IFSOCK: 97 case IFIFO: 98 case IFSHAD: 99 case IFATTRDIR: 100 return (1); 101 102 default: 103 if (debug) 104 (void) printf("bad file type 0%o\n", dp->di_mode); 105 return (0); 106 } 107 } 108 109 int 110 acltypeok(struct dinode *dp) 111 { 112 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT)) 113 return (1); 114 115 if (debug) 116 (void) printf("bad file type for acl I=%d: 0%o\n", 117 dp->di_shadow, dp->di_mode); 118 return (0); 119 } 120 121 NOTE(PRINTFLIKE(1)) 122 int 123 reply(caddr_t fmt, ...) 124 { 125 va_list ap; 126 char line[80]; 127 128 if (preen) 129 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode"); 130 131 if (mflag) { 132 /* 133 * We don't know what's going on, so don't potentially 134 * make things worse by having errexit() write stuff 135 * out to disk. 136 */ 137 (void) printf( 138 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 139 devname); 140 exit(EXERRFATAL); 141 } 142 143 va_start(ap, fmt); 144 (void) putchar('\n'); 145 (void) vprintf(fmt, ap); 146 (void) putchar('?'); 147 (void) putchar(' '); 148 va_end(ap); 149 150 if (nflag || fswritefd < 0) { 151 (void) printf(" no\n\n"); 152 return (0); 153 } 154 if (yflag) { 155 (void) printf(" yes\n\n"); 156 return (1); 157 } 158 (void) fflush(stdout); 159 if (getaline(stdin, line, sizeof (line)) == EOF) 160 errexit("\n"); 161 (void) printf("\n"); 162 if (line[0] == 'y' || line[0] == 'Y') { 163 return (1); 164 } else { 165 return (0); 166 } 167 } 168 169 int 170 getaline(FILE *fp, caddr_t loc, int maxlen) 171 { 172 int n; 173 caddr_t p, lastloc; 174 175 p = loc; 176 lastloc = &p[maxlen-1]; 177 while ((n = getc(fp)) != '\n') { 178 if (n == EOF) 179 return (EOF); 180 if (!isspace(n) && p < lastloc) 181 *p++ = (char)n; 182 } 183 *p = '\0'; 184 /* LINTED pointer difference won't overflow */ 185 return (p - loc); 186 } 187 188 /* 189 * Malloc buffers and set up cache. 190 */ 191 void 192 bufinit(void) 193 { 194 struct bufarea *bp; 195 int bufcnt, i; 196 caddr_t bufp; 197 198 bufp = malloc((size_t)sblock.fs_bsize); 199 if (bufp == NULL) 200 goto nomem; 201 initbarea(&cgblk); 202 cgblk.b_un.b_buf = bufp; 203 bufhead.b_next = bufhead.b_prev = &bufhead; 204 bufcnt = MAXBUFSPACE / sblock.fs_bsize; 205 if (bufcnt < MINBUFS) 206 bufcnt = MINBUFS; 207 for (i = 0; i < bufcnt; i++) { 208 bp = (struct bufarea *)malloc(sizeof (struct bufarea)); 209 if (bp == NULL) { 210 if (i >= MINBUFS) 211 goto noalloc; 212 goto nomem; 213 } 214 215 bufp = malloc((size_t)sblock.fs_bsize); 216 if (bufp == NULL) { 217 free((void *)bp); 218 if (i >= MINBUFS) 219 goto noalloc; 220 goto nomem; 221 } 222 initbarea(bp); 223 bp->b_un.b_buf = bufp; 224 bp->b_prev = &bufhead; 225 bp->b_next = bufhead.b_next; 226 bufhead.b_next->b_prev = bp; 227 bufhead.b_next = bp; 228 } 229 noalloc: 230 bufhead.b_size = i; /* save number of buffers */ 231 pbp = pdirbp = NULL; 232 return; 233 234 nomem: 235 errexit("cannot allocate buffer pool\n"); 236 /* NOTREACHED */ 237 } 238 239 /* 240 * Undo a bufinit(). 241 */ 242 void 243 unbufinit(void) 244 { 245 int cnt; 246 struct bufarea *bp, *nbp; 247 248 cnt = 0; 249 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) { 250 cnt++; 251 flush(fswritefd, bp); 252 nbp = bp->b_prev; 253 /* 254 * We're discarding the entire chain, so this isn't 255 * technically necessary. However, it doesn't hurt 256 * and lint's data flow analysis is much happier 257 * (this prevents it from thinking there's a chance 258 * of our using memory elsewhere after it's been released). 259 */ 260 nbp->b_next = bp->b_next; 261 bp->b_next->b_prev = nbp; 262 free((void *)bp->b_un.b_buf); 263 free((void *)bp); 264 } 265 266 if (bufhead.b_size != cnt) 267 errexit("Panic: cache lost %d buffers\n", 268 bufhead.b_size - cnt); 269 } 270 271 /* 272 * Manage a cache of directory blocks. 273 */ 274 struct bufarea * 275 getdatablk(daddr32_t blkno, size_t size) 276 { 277 struct bufarea *bp; 278 279 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next) 280 if (bp->b_bno == fsbtodb(&sblock, blkno)) { 281 goto foundit; 282 } 283 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev) 284 if ((bp->b_flags & B_INUSE) == 0) 285 break; 286 if (bp == &bufhead) { 287 bp = alloc_bufarea(); 288 if (bp == NULL) { 289 errexit("deadlocked buffer pool\n"); 290 /* NOTREACHED */ 291 } 292 } 293 /* 294 * We're at the same logical level as getblk(), so if there 295 * are any errors, we'll let our caller handle them. 296 */ 297 diskreads++; 298 (void) getblk(bp, blkno, size); 299 300 foundit: 301 totalreads++; 302 bp->b_cnt++; 303 /* 304 * Move the buffer to head of linked list if it isn't 305 * already there. 306 */ 307 if (bufhead.b_next != bp) { 308 bp->b_prev->b_next = bp->b_next; 309 bp->b_next->b_prev = bp->b_prev; 310 bp->b_prev = &bufhead; 311 bp->b_next = bufhead.b_next; 312 bufhead.b_next->b_prev = bp; 313 bufhead.b_next = bp; 314 } 315 bp->b_flags |= B_INUSE; 316 return (bp); 317 } 318 319 void 320 brelse(struct bufarea *bp) 321 { 322 bp->b_cnt--; 323 if (bp->b_cnt == 0) { 324 bp->b_flags &= ~B_INUSE; 325 } 326 } 327 328 struct bufarea * 329 getblk(struct bufarea *bp, daddr32_t blk, size_t size) 330 { 331 diskaddr_t dblk; 332 333 dblk = fsbtodb(&sblock, blk); 334 if (bp->b_bno == dblk) 335 return (bp); 336 flush(fswritefd, bp); 337 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size); 338 bp->b_bno = dblk; 339 bp->b_size = size; 340 return (bp); 341 } 342 343 void 344 flush(int fd, struct bufarea *bp) 345 { 346 int i, j; 347 caddr_t sip; 348 long size; 349 350 if (!bp->b_dirty) 351 return; 352 353 /* 354 * It's not our buf, so if there are errors, let whoever 355 * acquired it deal with the actual problem. 356 */ 357 if (bp->b_errs != 0) 358 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno); 359 bp->b_dirty = 0; 360 bp->b_errs = 0; 361 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size); 362 if (bp != &sblk) { 363 return; 364 } 365 366 /* 367 * We're flushing the superblock, so make sure all the 368 * ancillary bits go out as well. 369 */ 370 sip = (caddr_t)sblock.fs_u.fs_csp; 371 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { 372 size = sblock.fs_cssize - i < sblock.fs_bsize ? 373 sblock.fs_cssize - i : sblock.fs_bsize; 374 bwrite(fswritefd, sip, 375 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), 376 size); 377 sip += size; 378 } 379 } 380 381 static void 382 rwerror(caddr_t mesg, diskaddr_t blk, int rval) 383 { 384 int olderr = errno; 385 386 if (!preen) 387 (void) printf("\n"); 388 389 if (rval == -1) 390 pfatal("CANNOT %s: DISK BLOCK %lld: %s", 391 mesg, blk, strerror(olderr)); 392 else 393 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk); 394 395 if (reply("CONTINUE") == 0) { 396 exitstat = EXERRFATAL; 397 errexit("Program terminated\n"); 398 } 399 } 400 401 void 402 ckfini(void) 403 { 404 int64_t percentage; 405 406 if (fswritefd < 0) 407 return; 408 409 flush(fswritefd, &sblk); 410 /* 411 * Were we using a backup superblock? 412 */ 413 if (havesb && sblk.b_bno != SBOFF / dev_bsize) { 414 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) { 415 sblk.b_bno = SBOFF / dev_bsize; 416 sbdirty(); 417 flush(fswritefd, &sblk); 418 } 419 } 420 flush(fswritefd, &cgblk); 421 if (cgblk.b_un.b_buf != NULL) { 422 free((void *)cgblk.b_un.b_buf); 423 cgblk.b_un.b_buf = NULL; 424 } 425 unbufinit(); 426 pbp = NULL; 427 pdirbp = NULL; 428 if (debug) { 429 /* 430 * Note that we only count cache-related reads. 431 * Anything that called fsck_bread() or getblk() 432 * directly are explicitly not cached, so they're not 433 * included here. 434 */ 435 if (totalreads != 0) 436 percentage = diskreads * 100 / totalreads; 437 else 438 percentage = 0; 439 440 (void) printf("cache missed %lld of %lld reads (%lld%%)\n", 441 (longlong_t)diskreads, (longlong_t)totalreads, 442 (longlong_t)percentage); 443 } 444 445 (void) close(fsreadfd); 446 (void) close(fswritefd); 447 fsreadfd = -1; 448 fswritefd = -1; 449 } 450 451 int 452 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size) 453 { 454 caddr_t cp; 455 int i; 456 int errs; 457 offset_t offset = ldbtob(blk); 458 offset_t addr; 459 460 /* 461 * In our universe, nothing exists before the superblock, so 462 * just pretend it's always zeros. This is the complement of 463 * bwrite()'s ignoring write requests into that space. 464 */ 465 if (blk < SBLOCK) { 466 if (debug) 467 (void) printf( 468 "WARNING: fsck_bread() passed blkno < %d (%lld)\n", 469 SBLOCK, (longlong_t)blk); 470 (void) memset(buf, 0, (size_t)size); 471 return (1); 472 } 473 474 if (llseek(fd, offset, SEEK_SET) < 0) { 475 rwerror("SEEK", blk, -1); 476 } 477 478 if ((i = read(fd, buf, size)) == size) { 479 return (0); 480 } 481 rwerror("READ", blk, i); 482 if (llseek(fd, offset, SEEK_SET) < 0) { 483 rwerror("SEEK", blk, -1); 484 } 485 errs = 0; 486 (void) memset(buf, 0, (size_t)size); 487 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:"); 488 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 489 addr = ldbtob(blk + i); 490 if (llseek(fd, addr, SEEK_SET) < 0 || 491 read(fd, cp, (int)secsize) < 0) { 492 iscorrupt = 1; 493 (void) printf(" %llu", blk + (u_longlong_t)i); 494 errs++; 495 } 496 } 497 (void) printf("\n"); 498 return (errs); 499 } 500 501 void 502 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size) 503 { 504 int i; 505 int n; 506 caddr_t cp; 507 offset_t offset = ldbtob(blk); 508 offset_t addr; 509 510 if (fd < 0) 511 return; 512 if (blk < SBLOCK) { 513 if (debug) 514 (void) printf( 515 "WARNING: Attempt to write illegal blkno %lld on %s\n", 516 (longlong_t)blk, devname); 517 return; 518 } 519 if (llseek(fd, offset, SEEK_SET) < 0) { 520 rwerror("SEEK", blk, -1); 521 } 522 if ((i = write(fd, buf, (int)size)) == size) { 523 fsmodified = 1; 524 return; 525 } 526 rwerror("WRITE", blk, i); 527 if (llseek(fd, offset, SEEK_SET) < 0) { 528 rwerror("SEEK", blk, -1); 529 } 530 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); 531 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) { 532 n = 0; 533 addr = ldbtob(blk + i); 534 if (llseek(fd, addr, SEEK_SET) < 0 || 535 (n = write(fd, cp, DEV_BSIZE)) < 0) { 536 iscorrupt = 1; 537 (void) printf(" %llu", blk + (u_longlong_t)i); 538 } else if (n > 0) { 539 fsmodified = 1; 540 } 541 542 } 543 (void) printf("\n"); 544 } 545 546 /* 547 * Allocates the specified number of contiguous fragments. 548 */ 549 daddr32_t 550 allocblk(int wantedfrags) 551 { 552 int block, leadfrag, tailfrag; 553 daddr32_t selected; 554 size_t size; 555 struct bufarea *bp; 556 557 /* 558 * It's arguable whether we should just fail, or instead 559 * error out here. Since we should only ever be asked for 560 * a single fragment or an entire block (i.e., sblock.fs_frag), 561 * we'll fail out because anything else means somebody 562 * changed code without considering all of the ramifications. 563 */ 564 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) { 565 exitstat = EXERRFATAL; 566 errexit("allocblk() asked for %d frags. " 567 "Legal range is 1 to %d", 568 wantedfrags, sblock.fs_frag); 569 } 570 571 /* 572 * For each filesystem block, look at every possible starting 573 * offset within the block such that we can get the number of 574 * contiguous fragments that we need. This is a drastically 575 * simplified version of the kernel's mapsearch() and alloc*(). 576 * It's also correspondingly slower. 577 */ 578 for (block = 0; block < maxfsblock - sblock.fs_frag; 579 block += sblock.fs_frag) { 580 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags; 581 leadfrag++) { 582 /* 583 * Is first fragment of candidate run available? 584 */ 585 if (testbmap(block + leadfrag)) 586 continue; 587 /* 588 * Are the rest of them available? 589 */ 590 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++) 591 if (testbmap(block + leadfrag + tailfrag)) 592 break; 593 if (tailfrag < wantedfrags) { 594 /* 595 * No, skip the known-unusable run. 596 */ 597 leadfrag += tailfrag; 598 continue; 599 } 600 /* 601 * Found what we need, so claim them. 602 */ 603 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++) 604 setbmap(block + leadfrag + tailfrag); 605 n_blks += wantedfrags; 606 size = wantedfrags * sblock.fs_fsize; 607 selected = block + leadfrag; 608 bp = getdatablk(selected, size); 609 (void) memset((void *)bp->b_un.b_buf, 0, size); 610 dirty(bp); 611 brelse(bp); 612 if (debug) 613 (void) printf( 614 "allocblk: selected %d (in block %d), frags %d, size %d\n", 615 selected, selected % sblock.fs_bsize, 616 wantedfrags, (int)size); 617 return (selected); 618 } 619 } 620 return (0); 621 } 622 623 /* 624 * Free a previously allocated block 625 */ 626 void 627 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags) 628 { 629 struct inodesc idesc; 630 631 if (debug) 632 (void) printf("debug: freeing %d fragments starting at %d\n", 633 frags, blkno); 634 635 init_inodesc(&idesc); 636 637 idesc.id_number = ino; 638 idesc.id_blkno = blkno; 639 idesc.id_numfrags = frags; 640 idesc.id_truncto = -1; 641 642 /* 643 * Nothing in the return status has any relevance to how 644 * we're using pass4check(), so just ignore it. 645 */ 646 (void) pass4check(&idesc); 647 } 648 649 /* 650 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes 651 * that the given buffer is at least MAXPATHLEN + 1 characters. 652 */ 653 void 654 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino) 655 { 656 int len; 657 caddr_t cp; 658 struct dinode *dp; 659 struct inodesc idesc; 660 struct inoinfo *inp; 661 662 if (debug) 663 (void) printf("debug: getpathname(curdir %d, ino %d)\n", 664 curdir, ino); 665 666 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) { 667 (void) strcpy(namebuf, "?"); 668 return; 669 } 670 671 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) { 672 (void) strcpy(namebuf, "/"); 673 return; 674 } 675 676 init_inodesc(&idesc); 677 idesc.id_type = DATA; 678 cp = &namebuf[MAXPATHLEN - 1]; 679 *cp = '\0'; 680 681 /* 682 * In the case of extended attributes, our 683 * parent won't necessarily be a directory, so just 684 * return what we've found with a prefix indicating 685 * that it's an XATTR. Presumably our caller will 686 * know what's going on and do something useful, like 687 * work out the path of the parent and then combine 688 * the two names. 689 * 690 * Can't use strcpy(), etc, because we've probably 691 * already got some name information in the buffer and 692 * the usual trailing \0 would lose it. 693 */ 694 dp = ginode(curdir); 695 if ((dp->di_mode & IFMT) == IFATTRDIR) { 696 idesc.id_number = curdir; 697 idesc.id_parent = ino; 698 idesc.id_func = findname; 699 idesc.id_name = namebuf; 700 idesc.id_fix = NOFIX; 701 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) { 702 *cp-- = '?'; 703 } 704 705 len = sizeof (XATTR_DIR_NAME) - 1; 706 cp -= len; 707 (void) memmove(cp, XATTR_DIR_NAME, len); 708 goto attrname; 709 } 710 711 /* 712 * If curdir == ino, need to get a handle on .. so we 713 * can search it for ino's name. Otherwise, just search 714 * the given directory for ino. Repeat until out of space 715 * or a full path has been built. 716 */ 717 if (curdir != ino) { 718 idesc.id_parent = curdir; 719 goto namelookup; 720 } 721 while (ino != UFSROOTINO && ino != 0) { 722 idesc.id_number = ino; 723 idesc.id_func = findino; 724 idesc.id_name = ".."; 725 idesc.id_fix = NOFIX; 726 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) { 727 inp = getinoinfo(ino); 728 if ((inp == NULL) || (inp->i_parent == 0)) { 729 break; 730 } 731 idesc.id_parent = inp->i_parent; 732 } 733 734 /* 735 * To get this far, id_parent must have the inode 736 * number for `..' in it. By definition, that's got 737 * to be a directory, so search it for the inode of 738 * interest. 739 */ 740 namelookup: 741 idesc.id_number = idesc.id_parent; 742 idesc.id_parent = ino; 743 idesc.id_func = findname; 744 idesc.id_name = namebuf; 745 idesc.id_fix = NOFIX; 746 if ((ckinode(ginode(idesc.id_number), 747 &idesc, CKI_TRAVERSE) & FOUND) == 0) { 748 break; 749 } 750 /* 751 * Prepend to what we've accumulated so far. If 752 * there's not enough room for even one more path element 753 * (of the worst-case length), then bail out. 754 */ 755 len = strlen(namebuf); 756 cp -= len; 757 if (cp < &namebuf[MAXNAMLEN]) 758 break; 759 (void) memmove(cp, namebuf, len); 760 *--cp = '/'; 761 762 /* 763 * Corner case for a looped-to-itself directory. 764 */ 765 if (ino == idesc.id_number) 766 break; 767 768 /* 769 * Climb one level of the hierarchy. In other words, 770 * the current .. becomes the inode to search for and 771 * its parent becomes the directory to search in. 772 */ 773 ino = idesc.id_number; 774 } 775 776 /* 777 * If we hit a discontinuity in the hierarchy, indicate it by 778 * prefixing the path so far with `?'. Otherwise, the first 779 * character will be `/' as a side-effect of the *--cp above. 780 * 781 * The special case is to handle the situation where we're 782 * trying to look something up in UFSROOTINO, but didn't find 783 * it. 784 */ 785 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) { 786 if (cp > namebuf) 787 cp--; 788 *cp = '?'; 789 } 790 791 /* 792 * The invariants being used for buffer integrity are: 793 * - namebuf[] is terminated with \0 before anything else 794 * - cp is always <= the last element of namebuf[] 795 * - the new path element is always stored at the 796 * beginning of namebuf[], and is no more than MAXNAMLEN-1 797 * characters 798 * - cp is is decremented by the number of characters in 799 * the new path element 800 * - if, after the above accounting for the new element's 801 * size, there is no longer enough room at the beginning of 802 * namebuf[] for a full-sized path element and a slash, 803 * terminate the loop. cp is in the range 804 * &namebuf[0]..&namebuf[MAXNAMLEN - 1] 805 */ 806 attrname: 807 /* LINTED per the above discussion */ 808 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp); 809 } 810 811 /* ARGSUSED */ 812 void 813 catch(int dummy) 814 { 815 ckfini(); 816 exit(EXSIGNAL); 817 } 818 819 /* 820 * When preening, allow a single quit to signal 821 * a special exit after filesystem checks complete 822 * so that reboot sequence may be interrupted. 823 */ 824 /* ARGSUSED */ 825 void 826 catchquit(int dummy) 827 { 828 (void) printf("returning to single-user after filesystem check\n"); 829 interrupted = 1; 830 (void) signal(SIGQUIT, SIG_DFL); 831 } 832 833 834 /* 835 * determine whether an inode should be fixed. 836 */ 837 NOTE(PRINTFLIKE(2)) 838 int 839 dofix(struct inodesc *idesc, caddr_t msg, ...) 840 { 841 int rval = 0; 842 va_list ap; 843 844 va_start(ap, msg); 845 846 switch (idesc->id_fix) { 847 848 case DONTKNOW: 849 if (idesc->id_type == DATA) 850 vdirerror(idesc->id_number, msg, ap); 851 else 852 vpwarn(msg, ap); 853 if (preen) { 854 idesc->id_fix = FIX; 855 rval = ALTERED; 856 break; 857 } 858 if (reply("SALVAGE") == 0) { 859 idesc->id_fix = NOFIX; 860 break; 861 } 862 idesc->id_fix = FIX; 863 rval = ALTERED; 864 break; 865 866 case FIX: 867 rval = ALTERED; 868 break; 869 870 case NOFIX: 871 break; 872 873 default: 874 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix); 875 } 876 877 va_end(ap); 878 return (rval); 879 } 880 881 NOTE(PRINTFLIKE(1)) 882 void 883 errexit(caddr_t fmt, ...) 884 { 885 va_list ap; 886 887 va_start(ap, fmt); 888 verrexit(fmt, ap); 889 /* NOTREACHED */ 890 } 891 892 NOTE(PRINTFLIKE(1)) 893 static void 894 verrexit(caddr_t fmt, va_list ap) 895 { 896 static int recursing = 0; 897 898 if (!recursing) { 899 recursing = 1; 900 if (errorlocked || iscorrupt) { 901 if (havesb && fswritefd >= 0) { 902 sblock.fs_clean = FSBAD; 903 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 904 sblock.fs_state = -sblock.fs_state; 905 sbdirty(); 906 write_altsb(fswritefd); 907 flush(fswritefd, &sblk); 908 } 909 } 910 ckfini(); 911 recursing = 0; 912 } 913 (void) vprintf(fmt, ap); 914 if (fmt[strlen(fmt) - 1] != '\n') 915 (void) putchar('\n'); 916 exit((exitstat != 0) ? exitstat : EXERRFATAL); 917 } 918 919 /* 920 * An unexpected inconsistency occured. 921 * Die if preening, otherwise just print message and continue. 922 */ 923 NOTE(PRINTFLIKE(1)) 924 void 925 pfatal(caddr_t fmt, ...) 926 { 927 va_list ap; 928 929 va_start(ap, fmt); 930 vpfatal(fmt, ap); 931 va_end(ap); 932 } 933 934 NOTE(PRINTFLIKE(1)) 935 static void 936 vpfatal(caddr_t fmt, va_list ap) 937 { 938 if (preen) { 939 if (*fmt != '\0') { 940 (void) printf("%s: ", devname); 941 (void) vprintf(fmt, ap); 942 (void) printf("\n"); 943 } 944 (void) printf( 945 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n", 946 devname); 947 if (havesb && fswritefd >= 0) { 948 sblock.fs_clean = FSBAD; 949 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time); 950 sbdirty(); 951 flush(fswritefd, &sblk); 952 } 953 /* 954 * We're exiting, it doesn't really matter that our 955 * caller doesn't get to call va_end(). 956 */ 957 if (exitstat == 0) 958 exitstat = EXFNDERRS; 959 exit(exitstat); 960 } 961 if (*fmt != '\0') { 962 (void) vprintf(fmt, ap); 963 } 964 } 965 966 /* 967 * Pwarn just prints a message when not preening, 968 * or a warning (preceded by filename) when preening. 969 */ 970 NOTE(PRINTFLIKE(1)) 971 void 972 pwarn(caddr_t fmt, ...) 973 { 974 va_list ap; 975 976 va_start(ap, fmt); 977 vpwarn(fmt, ap); 978 va_end(ap); 979 } 980 981 NOTE(PRINTFLIKE(1)) 982 static void 983 vpwarn(caddr_t fmt, va_list ap) 984 { 985 if (*fmt != '\0') { 986 if (preen) 987 (void) printf("%s: ", devname); 988 (void) vprintf(fmt, ap); 989 } 990 } 991 992 /* 993 * Like sprintf(), except the buffer is dynamically allocated 994 * and returned, instead of being passed in. A pointer to the 995 * buffer is stored in *RET, and FMT is the usual format string. 996 * The number of characters in *RET (excluding the trailing \0, 997 * to be consistent with the other *printf() routines) is returned. 998 * 999 * Solaris doesn't have asprintf(3C) yet, unfortunately. 1000 */ 1001 NOTE(PRINTFLIKE(2)) 1002 int 1003 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...) 1004 { 1005 int len; 1006 caddr_t buffer; 1007 va_list ap; 1008 1009 va_start(ap, fmt); 1010 len = vsnprintf(NULL, 0, fmt, ap); 1011 va_end(ap); 1012 1013 buffer = malloc((len + 1) * sizeof (char)); 1014 if (buffer == NULL) { 1015 errexit("Out of memory in asprintf\n"); 1016 /* NOTREACHED */ 1017 } 1018 1019 va_start(ap, fmt); 1020 (void) vsnprintf(buffer, len + 1, fmt, ap); 1021 va_end(ap); 1022 1023 *ret = buffer; 1024 return (len); 1025 } 1026 1027 /* 1028 * So we can take advantage of kernel routines in ufs_subr.c. 1029 */ 1030 /* PRINTFLIKE2 */ 1031 void 1032 cmn_err(int level, caddr_t fmt, ...) 1033 { 1034 va_list ap; 1035 1036 va_start(ap, fmt); 1037 if (level == CE_PANIC) { 1038 (void) printf("INTERNAL INCONSISTENCY:"); 1039 verrexit(fmt, ap); 1040 } else { 1041 (void) vprintf(fmt, ap); 1042 } 1043 va_end(ap); 1044 } 1045 1046 /* 1047 * Check to see if unraw version of name is already mounted. 1048 * Updates devstr with the device name if devstr is not NULL 1049 * and str_size is positive. 1050 */ 1051 int 1052 mounted(caddr_t name, caddr_t devstr, size_t str_size) 1053 { 1054 int found; 1055 struct mnttab *mntent; 1056 1057 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size); 1058 if (mntent == NULL) 1059 return (M_NOMNT); 1060 1061 /* 1062 * It's mounted. With or without write access? 1063 */ 1064 if (hasmntopt(mntent, MNTOPT_RO) != 0) 1065 found = M_RO; /* mounted as RO */ 1066 else 1067 found = M_RW; /* mounted as R/W */ 1068 1069 if (mount_point == NULL) { 1070 mount_point = strdup(mntent->mnt_mountp); 1071 if (mount_point == NULL) { 1072 errexit("fsck: memory allocation failure: %s", 1073 strerror(errno)); 1074 /* NOTREACHED */ 1075 } 1076 1077 if (devstr != NULL && str_size > 0) 1078 (void) strlcpy(devstr, mntent->mnt_special, str_size); 1079 } 1080 1081 return (found); 1082 } 1083 1084 /* 1085 * Check to see if name corresponds to an entry in vfstab, and that the entry 1086 * does not have option ro. 1087 */ 1088 int 1089 writable(caddr_t name) 1090 { 1091 int rw = 1; 1092 struct vfstab vfsbuf, vfskey; 1093 FILE *vfstab; 1094 1095 vfstab = fopen(VFSTAB, "r"); 1096 if (vfstab == NULL) { 1097 (void) printf("can't open %s\n", VFSTAB); 1098 return (1); 1099 } 1100 (void) memset((void *)&vfskey, 0, sizeof (vfskey)); 1101 vfsnull(&vfskey); 1102 vfskey.vfs_special = unrawname(name); 1103 vfskey.vfs_fstype = MNTTYPE_UFS; 1104 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) && 1105 (hasvfsopt(&vfsbuf, MNTOPT_RO))) { 1106 rw = 0; 1107 } 1108 (void) fclose(vfstab); 1109 return (rw); 1110 } 1111 1112 /* 1113 * debugclean 1114 */ 1115 static void 1116 debugclean(void) 1117 { 1118 if (!debug) 1119 return; 1120 1121 if ((iscorrupt == 0) && (isdirty == 0)) 1122 return; 1123 1124 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) || 1125 (sblock.fs_clean == FSLOG && islog && islogok) || 1126 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked)) 1127 return; 1128 1129 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n", 1130 sblock.fs_clean == FSSTABLE ? "stable" : 1131 sblock.fs_clean == FSLOG ? "logging" : 1132 sblock.fs_clean == FSFIX ? "being fixed" : "clean", 1133 devname); 1134 } 1135 1136 /* 1137 * updateclean 1138 * Carefully and transparently update the clean flag. 1139 * 1140 * `iscorrupt' has to be in its final state before this is called. 1141 */ 1142 int 1143 updateclean(void) 1144 { 1145 int freedlog = 0; 1146 struct bufarea cleanbuf; 1147 size_t size; 1148 ssize_t io_res; 1149 diskaddr_t bno; 1150 char fsclean; 1151 int fsreclaim; 1152 char fsflags; 1153 int flags_ok = 1; 1154 daddr32_t fslogbno; 1155 offset_t sblkoff; 1156 time_t t; 1157 1158 /* 1159 * debug stuff 1160 */ 1161 debugclean(); 1162 1163 /* 1164 * set fsclean to its appropriate value 1165 */ 1166 fslogbno = sblock.fs_logbno; 1167 fsclean = sblock.fs_clean; 1168 fsreclaim = sblock.fs_reclaim; 1169 fsflags = sblock.fs_flags; 1170 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) { 1171 fsclean = FSACTIVE; 1172 } 1173 /* 1174 * If ufs log is not okay, note that we need to clear it. 1175 */ 1176 examinelog(NULL); 1177 if (fslogbno && !(islog && islogok)) { 1178 fsclean = FSACTIVE; 1179 fslogbno = 0; 1180 } 1181 1182 /* 1183 * if necessary, update fs_clean and fs_state 1184 */ 1185 switch (fsclean) { 1186 1187 case FSACTIVE: 1188 if (!iscorrupt) { 1189 fsclean = FSSTABLE; 1190 fsreclaim = 0; 1191 } 1192 break; 1193 1194 case FSCLEAN: 1195 case FSSTABLE: 1196 if (iscorrupt) { 1197 fsclean = FSACTIVE; 1198 } else { 1199 fsreclaim = 0; 1200 } 1201 break; 1202 1203 case FSLOG: 1204 if (iscorrupt) { 1205 fsclean = FSACTIVE; 1206 } else if (!islog || fslogbno == 0) { 1207 fsclean = FSSTABLE; 1208 fsreclaim = 0; 1209 } else if (fflag) { 1210 fsreclaim = 0; 1211 } 1212 break; 1213 1214 case FSFIX: 1215 fsclean = FSBAD; 1216 if (errorlocked && !iscorrupt) { 1217 fsclean = islog ? FSLOG : FSCLEAN; 1218 } 1219 break; 1220 1221 default: 1222 if (iscorrupt) { 1223 fsclean = FSACTIVE; 1224 } else { 1225 fsclean = FSSTABLE; 1226 fsreclaim = 0; 1227 } 1228 } 1229 1230 if (largefile_count > 0) 1231 fsflags |= FSLARGEFILES; 1232 else 1233 fsflags &= ~FSLARGEFILES; 1234 1235 /* 1236 * There can be two discrepencies here. A) The superblock 1237 * shows no largefiles but we found some while scanning. 1238 * B) The superblock indicates the presence of largefiles, 1239 * but none are present. Note that if preening, the superblock 1240 * is silently corrected. 1241 */ 1242 if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) || 1243 (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES)) 1244 flags_ok = 0; 1245 1246 if (debug) 1247 (void) printf( 1248 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n", 1249 largefile_count, sblock.fs_flags, flags_ok); 1250 1251 /* 1252 * If fs is unchanged, do nothing. 1253 */ 1254 if ((!isdirty) && (flags_ok) && 1255 (fslogbno == sblock.fs_logbno) && 1256 (sblock.fs_clean == fsclean) && 1257 (sblock.fs_reclaim == fsreclaim) && 1258 (FSOKAY == (sblock.fs_state + sblock.fs_time))) { 1259 if (errorlocked) { 1260 if (!do_errorlock(LOCKFS_ULOCK)) 1261 pwarn( 1262 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n"); 1263 } 1264 return (freedlog); 1265 } 1266 1267 /* 1268 * if user allows, update superblock state 1269 */ 1270 if (debug) { 1271 (void) printf( 1272 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1273 sblock.fs_flags, sblock.fs_logbno, 1274 sblock.fs_clean, sblock.fs_reclaim, 1275 sblock.fs_state + sblock.fs_time); 1276 (void) printf( 1277 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n", 1278 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY); 1279 } 1280 if (!isdirty && !preen && !rerun && 1281 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0)) 1282 return (freedlog); 1283 1284 (void) time(&t); 1285 sblock.fs_time = (time32_t)t; 1286 if (debug) 1287 printclean(); 1288 1289 if (sblock.fs_logbno != fslogbno) { 1290 examinelog(&freelogblk); 1291 freedlog++; 1292 } 1293 1294 sblock.fs_logbno = fslogbno; 1295 sblock.fs_clean = fsclean; 1296 sblock.fs_state = FSOKAY - (long)sblock.fs_time; 1297 sblock.fs_reclaim = fsreclaim; 1298 sblock.fs_flags = fsflags; 1299 1300 /* 1301 * if superblock can't be written, return 1302 */ 1303 if (fswritefd < 0) 1304 return (freedlog); 1305 1306 /* 1307 * Read private copy of superblock, update clean flag, and write it. 1308 */ 1309 bno = sblk.b_bno; 1310 size = sblk.b_size; 1311 1312 sblkoff = ldbtob(bno); 1313 1314 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL) 1315 errexit("out of memory"); 1316 if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) { 1317 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1318 (longlong_t)bno, strerror(errno)); 1319 goto out; 1320 } 1321 1322 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) { 1323 report_io_prob("READ FROM", bno, size, io_res); 1324 goto out; 1325 } 1326 1327 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno; 1328 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean; 1329 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state; 1330 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time; 1331 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim; 1332 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags; 1333 1334 if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) { 1335 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n", 1336 (longlong_t)bno, strerror(errno)); 1337 goto out; 1338 } 1339 1340 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) { 1341 report_io_prob("WRITE TO", bno, size, io_res); 1342 goto out; 1343 } 1344 1345 /* 1346 * 1208040 1347 * If we had to use -b to grab an alternate superblock, then we 1348 * likely had to do so because of unacceptable differences between 1349 * the main and alternate superblocks. So, we had better update 1350 * the alternate superblock as well, or we'll just fail again 1351 * the next time we attempt to run fsck! 1352 */ 1353 if (bflag != 0) { 1354 write_altsb(fswritefd); 1355 } 1356 1357 if (errorlocked) { 1358 if (!do_errorlock(LOCKFS_ULOCK)) 1359 pwarn( 1360 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n"); 1361 } 1362 1363 out: 1364 if (cleanbuf.b_un.b_buf != NULL) { 1365 free((void *)cleanbuf.b_un.b_buf); 1366 } 1367 1368 return (freedlog); 1369 } 1370 1371 static void 1372 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure) 1373 { 1374 if (failure < 0) 1375 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n", 1376 what, (int)bno, strerror(errno)); 1377 else if (failure == 0) 1378 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n", 1379 what, (int)bno); 1380 else 1381 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n", 1382 what, (int)bno, (unsigned)failure, (unsigned)expected); 1383 } 1384 1385 /* 1386 * print out clean info 1387 */ 1388 void 1389 printclean(void) 1390 { 1391 caddr_t s; 1392 1393 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) 1394 s = "unknown"; 1395 else 1396 switch (sblock.fs_clean) { 1397 1398 case FSACTIVE: 1399 s = "active"; 1400 break; 1401 1402 case FSCLEAN: 1403 s = "clean"; 1404 break; 1405 1406 case FSSTABLE: 1407 s = "stable"; 1408 break; 1409 1410 case FSLOG: 1411 s = "logging"; 1412 break; 1413 1414 case FSBAD: 1415 s = "is bad"; 1416 break; 1417 1418 case FSFIX: 1419 s = "being fixed"; 1420 break; 1421 1422 default: 1423 s = "unknown"; 1424 } 1425 1426 if (preen) 1427 pwarn("is %s.\n", s); 1428 else 1429 (void) printf("** %s is %s.\n", devname, s); 1430 } 1431 1432 int 1433 is_errorlocked(caddr_t fs) 1434 { 1435 int retval; 1436 struct stat64 statb; 1437 caddr_t mountp; 1438 struct mnttab *mntent; 1439 1440 retval = 0; 1441 1442 if (!fs) 1443 return (0); 1444 1445 if (stat64(fs, &statb) < 0) 1446 return (0); 1447 1448 if (S_ISDIR(statb.st_mode)) { 1449 mountp = fs; 1450 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) { 1451 mntent = search_mnttab(NULL, fs, NULL, 0); 1452 if (mntent == NULL) 1453 return (0); 1454 mountp = mntent->mnt_mountp; 1455 if (mountp == NULL) /* theoretically a can't-happen */ 1456 return (0); 1457 } else { 1458 return (0); 1459 } 1460 1461 /* 1462 * From here on, must `goto out' to avoid memory leakage. 1463 */ 1464 1465 if (elock_combuf == NULL) 1466 elock_combuf = 1467 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char)); 1468 else 1469 elock_combuf = 1470 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN); 1471 1472 if (elock_combuf == NULL) 1473 goto out; 1474 1475 (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN); 1476 1477 if (elock_mountp != NULL) { 1478 free(elock_mountp); 1479 } 1480 1481 elock_mountp = strdup(mountp); 1482 if (elock_mountp == NULL) 1483 goto out; 1484 1485 if (mountfd < 0) { 1486 if ((mountfd = open64(mountp, O_RDONLY)) == -1) 1487 goto out; 1488 } 1489 1490 if (lfp == NULL) { 1491 lfp = (struct lockfs *)malloc(sizeof (struct lockfs)); 1492 if (lfp == NULL) 1493 goto out; 1494 (void) memset((void *)lfp, 0, sizeof (struct lockfs)); 1495 } 1496 1497 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1498 lfp->lf_comment = elock_combuf; 1499 1500 if (ioctl(mountfd, _FIOLFSS, lfp) == -1) 1501 goto out; 1502 1503 /* 1504 * lint believes that the ioctl() (or any other function 1505 * taking lfp as an arg) could free lfp. This is not the 1506 * case, however. 1507 */ 1508 retval = LOCKFS_IS_ELOCK(lfp); 1509 1510 out: 1511 return (retval); 1512 } 1513 1514 /* 1515 * Given a name which is known to be a directory, see if it appears 1516 * in the vfstab. If so, return the entry's block (special) device 1517 * field via devstr. 1518 */ 1519 int 1520 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size) 1521 { 1522 return (NULL != search_vfstab(name, NULL, devstr, str_size)); 1523 } 1524 1525 /* 1526 * Given a name which is known to be a directory, see if it appears 1527 * in the mnttab. If so, return the entry's block (special) device 1528 * field via devstr. 1529 */ 1530 int 1531 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size) 1532 { 1533 return (NULL != search_mnttab(name, NULL, devstr, str_size)); 1534 } 1535 1536 /* 1537 * Search for mount point and/or special device in the given file. 1538 * The first matching entry is returned. 1539 * 1540 * If an entry is found and str_size is greater than zero, then 1541 * up to size_str bytes of the special device name from the entry 1542 * are copied to devstr. 1543 */ 1544 1545 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \ 1546 st_nuller, st_init, st_searcher) \ 1547 { \ 1548 FILE *fp; \ 1549 struct st_type *retval = NULL; \ 1550 struct st_type key; \ 1551 static struct st_type buffer; \ 1552 \ 1553 /* LINTED ``assigned value never used'' */ \ 1554 st_nuller(&key); \ 1555 key.st_mount = mountp; \ 1556 key.st_special = special; \ 1557 st_init; \ 1558 \ 1559 if ((fp = fopen(st_file, "r")) == NULL) \ 1560 return (NULL); \ 1561 \ 1562 if (st_searcher(fp, &buffer, &key) == 0) { \ 1563 retval = &buffer; \ 1564 if (devstr != NULL && str_size > 0 && \ 1565 buffer.st_special != NULL) { \ 1566 (void) strlcpy(devstr, buffer.st_special, \ 1567 str_size); \ 1568 } \ 1569 } \ 1570 (void) fclose(fp); \ 1571 return (retval); \ 1572 } 1573 1574 static struct vfstab * 1575 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1576 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull, 1577 (retval = retval), getvfsany) 1578 1579 static struct mnttab * 1580 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size) 1581 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull, 1582 (key.mnt_fstype = MNTTYPE_UFS), getmntany) 1583 1584 int 1585 do_errorlock(int lock_type) 1586 { 1587 caddr_t buf; 1588 time_t now; 1589 struct tm *local; 1590 int rc; 1591 1592 if (elock_combuf == NULL) 1593 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n", 1594 elock_mountp ? elock_mountp : "<null>", 1595 lock_type); 1596 1597 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) == 1598 NULL) { 1599 errexit("Couldn't alloc memory for temp. lock status buffer\n"); 1600 } 1601 if (lfp == NULL) { 1602 errexit("do_errorlock(%s, %d): lockfs status unallocated\n", 1603 elock_mountp, lock_type); 1604 } 1605 1606 (void) memmove((void *)buf, (void *)elock_combuf, 1607 LOCKFS_MAXCOMMENTLEN-1); 1608 1609 switch (lock_type) { 1610 case LOCKFS_ELOCK: 1611 /* 1612 * Note that if it is error-locked, we won't get an 1613 * error back if we try to error-lock it again. 1614 */ 1615 if (time(&now) != (time_t)-1) { 1616 if ((local = localtime(&now)) != NULL) 1617 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1618 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d", 1619 elock_combuf, (int)pid, 1620 local->tm_mon + 1, local->tm_mday, 1621 (local->tm_year % 100), local->tm_hour, 1622 local->tm_min, local->tm_sec); 1623 else 1624 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1625 "%s [fsck pid %d", elock_combuf, pid); 1626 1627 } else { 1628 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1629 "%s [fsck pid %d", elock_combuf, pid); 1630 } 1631 break; 1632 1633 case LOCKFS_ULOCK: 1634 if (time(&now) != (time_t)-1) { 1635 if ((local = localtime(&now)) != NULL) { 1636 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1637 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]", 1638 elock_combuf, 1639 local->tm_mon + 1, local->tm_mday, 1640 (local->tm_year % 100), local->tm_hour, 1641 local->tm_min, local->tm_sec); 1642 } else { 1643 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1644 "%s]", elock_combuf); 1645 } 1646 } else { 1647 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN, 1648 "%s]", elock_combuf); 1649 } 1650 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) { 1651 pwarn("do_errorlock: unlock failed: %s\n", 1652 strerror(errno)); 1653 goto out; 1654 } 1655 break; 1656 1657 default: 1658 break; 1659 } 1660 1661 (void) memmove((void *)elock_combuf, (void *)buf, 1662 LOCKFS_MAXCOMMENTLEN - 1); 1663 1664 lfp->lf_lock = lock_type; 1665 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN; 1666 lfp->lf_comment = elock_combuf; 1667 lfp->lf_flags = 0; 1668 errno = 0; 1669 1670 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) { 1671 if (errno == EINVAL) { 1672 pwarn("Another fsck active?\n"); 1673 iscorrupt = 0; /* don't go away mad, just go away */ 1674 } else { 1675 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n", 1676 lock_type, elock_combuf, strerror(errno)); 1677 } 1678 } 1679 out: 1680 if (buf != NULL) { 1681 free((void *)buf); 1682 } 1683 1684 return (rc != -1); 1685 } 1686 1687 /* 1688 * Shadow inode support. To register a shadow with a client is to note 1689 * that an inode (the client) refers to the shadow. 1690 */ 1691 1692 static struct shadowclients * 1693 newshadowclient(struct shadowclients *prev) 1694 { 1695 struct shadowclients *rc; 1696 1697 rc = (struct shadowclients *)malloc(sizeof (*rc)); 1698 if (rc == NULL) 1699 errexit("newshadowclient: cannot malloc shadow client"); 1700 rc->next = prev; 1701 rc->nclients = 0; 1702 1703 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) * 1704 maxshadowclients); 1705 if (rc->client == NULL) 1706 errexit("newshadowclient: cannot malloc client array"); 1707 return (rc); 1708 } 1709 1710 void 1711 registershadowclient(fsck_ino_t shadow, fsck_ino_t client, 1712 struct shadowclientinfo **info) 1713 { 1714 struct shadowclientinfo *sci; 1715 struct shadowclients *scc; 1716 1717 /* 1718 * Already have a record for this shadow? 1719 */ 1720 for (sci = *info; sci != NULL; sci = sci->next) 1721 if (sci->shadow == shadow) 1722 break; 1723 if (sci == NULL) { 1724 /* 1725 * It's a new shadow, add it to the list 1726 */ 1727 sci = (struct shadowclientinfo *)malloc(sizeof (*sci)); 1728 if (sci == NULL) 1729 errexit("registershadowclient: cannot malloc"); 1730 sci->next = *info; 1731 *info = sci; 1732 sci->shadow = shadow; 1733 sci->totalClients = 0; 1734 sci->clients = newshadowclient(NULL); 1735 } 1736 1737 sci->totalClients++; 1738 scc = sci->clients; 1739 if (scc->nclients >= maxshadowclients) { 1740 scc = newshadowclient(sci->clients); 1741 sci->clients = scc; 1742 } 1743 1744 scc->client[scc->nclients++] = client; 1745 } 1746 1747 /* 1748 * Locate and discard a shadow. 1749 */ 1750 void 1751 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info) 1752 { 1753 struct shadowclientinfo *sci, *prev; 1754 1755 /* 1756 * Do we have a record for this shadow? 1757 */ 1758 prev = NULL; 1759 for (sci = *info; sci != NULL; sci = sci->next) { 1760 if (sci->shadow == shadow) 1761 break; 1762 prev = sci; 1763 } 1764 1765 if (sci != NULL) { 1766 /* 1767 * First, pull it off the list, since we know there 1768 * shouldn't be any future references to this one. 1769 */ 1770 if (prev == NULL) 1771 *info = sci->next; 1772 else 1773 prev->next = sci->next; 1774 deshadow(sci, clearattrref); 1775 } 1776 } 1777 1778 /* 1779 * Discard all memory used to track clients of a shadow. 1780 */ 1781 void 1782 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t)) 1783 { 1784 struct shadowclients *clients, *discard; 1785 int idx; 1786 1787 clients = sci->clients; 1788 while (clients != NULL) { 1789 discard = clients; 1790 clients = clients->next; 1791 if (discard->client != NULL) { 1792 if (cb != NULL) { 1793 for (idx = 0; idx < discard->nclients; idx++) 1794 (*cb)(discard->client[idx]); 1795 } 1796 free((void *)discard->client); 1797 } 1798 free((void *)discard); 1799 } 1800 1801 free((void *)sci); 1802 } 1803 1804 /* 1805 * Allocate more buffer as need arises but allocate one at a time. 1806 * This is done to make sure that fsck does not exit with error if it 1807 * needs more buffer to complete its task. 1808 */ 1809 static struct bufarea * 1810 alloc_bufarea(void) 1811 { 1812 struct bufarea *newbp; 1813 caddr_t bufp; 1814 1815 bufp = malloc((unsigned int)sblock.fs_bsize); 1816 if (bufp == NULL) 1817 return (NULL); 1818 1819 newbp = (struct bufarea *)malloc(sizeof (struct bufarea)); 1820 if (newbp == NULL) { 1821 free((void *)bufp); 1822 return (NULL); 1823 } 1824 1825 initbarea(newbp); 1826 newbp->b_un.b_buf = bufp; 1827 newbp->b_prev = &bufhead; 1828 newbp->b_next = bufhead.b_next; 1829 bufhead.b_next->b_prev = newbp; 1830 bufhead.b_next = newbp; 1831 bufhead.b_size++; 1832 return (newbp); 1833 } 1834 1835 /* 1836 * We length-limit in both unrawname() and rawname() to avoid 1837 * overflowing our arrays or those of our naive, trusting callers. 1838 */ 1839 1840 caddr_t 1841 unrawname(caddr_t name) 1842 { 1843 caddr_t dp; 1844 static char fullname[MAXPATHLEN + 1]; 1845 1846 if ((dp = getfullblkname(name)) == NULL) 1847 return (""); 1848 1849 (void) strlcpy(fullname, dp, sizeof (fullname)); 1850 /* 1851 * Not reporting under debug, as the allocation isn't 1852 * reported by getfullblkname. The idea is that we 1853 * produce balanced alloc/free instances. 1854 */ 1855 free(dp); 1856 1857 return (fullname); 1858 } 1859 1860 caddr_t 1861 rawname(caddr_t name) 1862 { 1863 caddr_t dp; 1864 static char fullname[MAXPATHLEN + 1]; 1865 1866 if ((dp = getfullrawname(name)) == NULL) 1867 return (""); 1868 1869 (void) strlcpy(fullname, dp, sizeof (fullname)); 1870 /* 1871 * Not reporting under debug, as the allocation isn't 1872 * reported by getfullblkname. The idea is that we 1873 * produce balanced alloc/free instances. 1874 */ 1875 free(dp); 1876 1877 return (fullname); 1878 } 1879 1880 /* 1881 * Make sure that a cg header looks at least moderately reasonable. 1882 * We want to be able to trust the contents enough to be able to use 1883 * the standard accessor macros. So, besides looking at the obvious 1884 * such as the magic number, we verify that the offset field values 1885 * are properly aligned and not too big or small. 1886 * 1887 * Returns a NULL pointer if the cg is sane enough for our needs, else 1888 * a dynamically-allocated string describing all of its faults. 1889 */ 1890 #define Append_Error(full, full_len, addition, addition_len) \ 1891 if (full == NULL) { \ 1892 full = addition; \ 1893 full_len = addition_len; \ 1894 } else { \ 1895 /* lint doesn't think realloc() understands NULLs */ \ 1896 full = realloc(full, full_len + addition_len + 1); \ 1897 if (full == NULL) { \ 1898 errexit("Out of memory in cg_sanity"); \ 1899 /* NOTREACHED */ \ 1900 } \ 1901 (void) strcpy(full + full_len, addition); \ 1902 full_len += addition_len; \ 1903 free(addition); \ 1904 } 1905 1906 caddr_t 1907 cg_sanity(struct cg *cgp, int cgno) 1908 { 1909 caddr_t full_err; 1910 caddr_t this_err = NULL; 1911 int full_len, this_len; 1912 daddr32_t ndblk; 1913 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 1914 daddr32_t exp_freeoff, exp_nextfreeoff; 1915 1916 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 1917 &exp_freeoff, &exp_nextfreeoff, &ndblk); 1918 1919 full_err = NULL; 1920 full_len = 0; 1921 1922 if (!cg_chkmagic(cgp)) { 1923 this_len = fsck_asprintf(&this_err, 1924 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n", 1925 cgp->cg_magic, CG_MAGIC); 1926 Append_Error(full_err, full_len, this_err, this_len); 1927 } 1928 1929 if (cgp->cg_cgx != cgno) { 1930 this_len = fsck_asprintf(&this_err, 1931 "WRONG CG NUMBER (%d should be %d)\n", 1932 cgp->cg_cgx, cgno); 1933 Append_Error(full_err, full_len, this_err, this_len); 1934 } 1935 1936 if ((cgp->cg_btotoff & 3) != 0) { 1937 this_len = fsck_asprintf(&this_err, 1938 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n", 1939 cgp->cg_btotoff); 1940 Append_Error(full_err, full_len, this_err, this_len); 1941 } 1942 1943 if ((cgp->cg_boff & 1) != 0) { 1944 this_len = fsck_asprintf(&this_err, 1945 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n", 1946 cgp->cg_boff); 1947 Append_Error(full_err, full_len, this_err, this_len); 1948 } 1949 1950 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 1951 if (cgp->cg_ncyl < 1) { 1952 this_len = fsck_asprintf(&this_err, 1953 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n", 1954 cgp->cg_ncyl); 1955 } else { 1956 this_len = fsck_asprintf(&this_err, 1957 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n", 1958 cgp->cg_ncyl, sblock.fs_cpg); 1959 } 1960 Append_Error(full_err, full_len, this_err, this_len); 1961 } 1962 1963 if (cgp->cg_niblk != sblock.fs_ipg) { 1964 this_len = fsck_asprintf(&this_err, 1965 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n", 1966 cgp->cg_niblk, sblock.fs_ipg); 1967 Append_Error(full_err, full_len, this_err, this_len); 1968 } 1969 1970 if (cgp->cg_ndblk != ndblk) { 1971 this_len = fsck_asprintf(&this_err, 1972 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n", 1973 cgp->cg_ndblk, ndblk); 1974 Append_Error(full_err, full_len, this_err, this_len); 1975 } 1976 1977 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) { 1978 this_len = fsck_asprintf(&this_err, 1979 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION " 1980 "(%d should be at least 0 and less than %d)\n", 1981 cgp->cg_rotor, ndblk); 1982 Append_Error(full_err, full_len, this_err, this_len); 1983 } 1984 1985 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) { 1986 this_len = fsck_asprintf(&this_err, 1987 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION " 1988 "(%d should be at least 0 and less than %d)\n", 1989 cgp->cg_frotor, ndblk); 1990 Append_Error(full_err, full_len, this_err, this_len); 1991 } 1992 1993 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 1994 this_len = fsck_asprintf(&this_err, 1995 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION " 1996 "(%d should be at least 0 and less than %d)\n", 1997 cgp->cg_irotor, sblock.fs_ipg); 1998 Append_Error(full_err, full_len, this_err, this_len); 1999 } 2000 2001 if (cgp->cg_btotoff != exp_btotoff) { 2002 this_len = fsck_asprintf(&this_err, 2003 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n", 2004 cgp->cg_btotoff, exp_btotoff); 2005 Append_Error(full_err, full_len, this_err, this_len); 2006 } 2007 2008 if (cgp->cg_boff != exp_boff) { 2009 this_len = fsck_asprintf(&this_err, 2010 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n", 2011 cgp->cg_boff, exp_boff); 2012 Append_Error(full_err, full_len, this_err, this_len); 2013 } 2014 2015 if (cgp->cg_iusedoff != exp_iusedoff) { 2016 this_len = fsck_asprintf(&this_err, 2017 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n", 2018 cgp->cg_iusedoff, exp_iusedoff); 2019 Append_Error(full_err, full_len, this_err, this_len); 2020 } 2021 2022 if (cgp->cg_freeoff != exp_freeoff) { 2023 this_len = fsck_asprintf(&this_err, 2024 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n", 2025 cgp->cg_freeoff, exp_freeoff); 2026 Append_Error(full_err, full_len, this_err, this_len); 2027 } 2028 2029 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2030 this_len = fsck_asprintf(&this_err, 2031 "END OF HEADER POSITION INCORRECT (%d should be %d)\n", 2032 cgp->cg_nextfreeoff, exp_nextfreeoff); 2033 Append_Error(full_err, full_len, this_err, this_len); 2034 } 2035 2036 return (full_err); 2037 } 2038 2039 #undef Append_Error 2040 2041 /* 2042 * This is taken from mkfs, and is what is used to come up with the 2043 * original values for a struct cg. This implies that, since these 2044 * are all constants, recalculating them now should give us the same 2045 * thing as what's on disk. 2046 */ 2047 static void 2048 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff, 2049 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff, 2050 daddr32_t *ndblk) 2051 { 2052 daddr32_t cbase, dmax; 2053 struct cg *cgp; 2054 2055 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno), 2056 (size_t)sblock.fs_cgsize); 2057 cgp = cgblk.b_un.b_cg; 2058 2059 cbase = cgbase(&sblock, cgno); 2060 dmax = cbase + sblock.fs_fpg; 2061 if (dmax > sblock.fs_size) 2062 dmax = sblock.fs_size; 2063 2064 /* LINTED pointer difference won't overflow */ 2065 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link); 2066 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t); 2067 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t); 2068 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY); 2069 *nextfreeoff = *freeoff + 2070 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY); 2071 *ndblk = dmax - cbase; 2072 } 2073 2074 /* 2075 * Corrects all fields in the cg that can be done with the available 2076 * redundant data. 2077 */ 2078 void 2079 fix_cg(struct cg *cgp, int cgno) 2080 { 2081 daddr32_t exp_btotoff, exp_boff, exp_iusedoff; 2082 daddr32_t exp_freeoff, exp_nextfreeoff; 2083 daddr32_t ndblk; 2084 2085 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff, 2086 &exp_freeoff, &exp_nextfreeoff, &ndblk); 2087 2088 if (cgp->cg_cgx != cgno) { 2089 cgp->cg_cgx = cgno; 2090 } 2091 2092 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) { 2093 if (cgno == (sblock.fs_ncg - 1)) { 2094 cgp->cg_ncyl = sblock.fs_ncyl - 2095 (sblock.fs_cpg * cgno); 2096 } else { 2097 cgp->cg_ncyl = sblock.fs_cpg; 2098 } 2099 } 2100 2101 if (cgp->cg_niblk != sblock.fs_ipg) { 2102 /* 2103 * This is not used by the kernel, so it's pretty 2104 * harmless if it's wrong. 2105 */ 2106 cgp->cg_niblk = sblock.fs_ipg; 2107 } 2108 2109 if (cgp->cg_ndblk != ndblk) { 2110 cgp->cg_ndblk = ndblk; 2111 } 2112 2113 /* 2114 * For the rotors, any position's valid, so pick the one we know 2115 * will always exist. 2116 */ 2117 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) { 2118 cgp->cg_rotor = 0; 2119 } 2120 2121 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) { 2122 cgp->cg_frotor = 0; 2123 } 2124 2125 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) { 2126 cgp->cg_irotor = 0; 2127 } 2128 2129 /* 2130 * For btotoff and boff, if they're misaligned they won't 2131 * match the expected values, so we're catching both cases 2132 * here. Of course, if any of these are off, it seems likely 2133 * that the tables really won't be where we calculate they 2134 * should be anyway. 2135 */ 2136 if (cgp->cg_btotoff != exp_btotoff) { 2137 cgp->cg_btotoff = exp_btotoff; 2138 } 2139 2140 if (cgp->cg_boff != exp_boff) { 2141 cgp->cg_boff = exp_boff; 2142 } 2143 2144 if (cgp->cg_iusedoff != exp_iusedoff) { 2145 cgp->cg_iusedoff = exp_iusedoff; 2146 } 2147 2148 if (cgp->cg_freeoff != exp_freeoff) { 2149 cgp->cg_freeoff = exp_freeoff; 2150 } 2151 2152 if (cgp->cg_nextfreeoff != exp_nextfreeoff) { 2153 cgp->cg_nextfreeoff = exp_nextfreeoff; 2154 } 2155 2156 /* 2157 * Reset the magic, as we've recreated this cg, also 2158 * update the cg_time, as we're writing out the cg 2159 */ 2160 cgp->cg_magic = CG_MAGIC; 2161 cgp->cg_time = time(NULL); 2162 2163 /* 2164 * We know there was at least one correctable problem, 2165 * or else we wouldn't have been called. So instead of 2166 * marking the buffer dirty N times above, just do it 2167 * once here. 2168 */ 2169 cgdirty(); 2170 } 2171 2172 void 2173 examinelog(void (*cb)(daddr32_t)) 2174 { 2175 struct bufarea *bp; 2176 extent_block_t *ebp; 2177 extent_t *ep; 2178 daddr32_t nfno, fno; 2179 int i; 2180 int j; 2181 2182 /* 2183 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags 2184 * we need to translate accordingly using logbtodb() 2185 */ 2186 2187 if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) { 2188 if (debug) { 2189 (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \ 2190 "Aborting log examination\n", \ 2191 logbtodb(&sblock, sblock.fs_logbno), SBLOCK); 2192 } 2193 return; 2194 } 2195 2196 /* 2197 * Read errors will return zeros, which will cause us 2198 * to do nothing harmful, so don't need to handle it. 2199 */ 2200 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno), 2201 (size_t)sblock.fs_bsize); 2202 ebp = (void *)bp->b_un.b_buf; 2203 2204 /* 2205 * Does it look like a log allocation table? 2206 */ 2207 /* LINTED pointer cast is aligned */ 2208 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf, 2209 sblock.fs_bsize)) 2210 return; 2211 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0) 2212 return; 2213 2214 ep = &ebp->extents[0]; 2215 for (i = 0; i < ebp->nextents; ++i, ++ep) { 2216 fno = logbtofrag(&sblock, ep->pbno); 2217 nfno = dbtofsb(&sblock, ep->nbno); 2218 for (j = 0; j < nfno; ++j, ++fno) { 2219 /* 2220 * Invoke the callback first, so that pass1 can 2221 * mark the log blocks in-use. Then, if any 2222 * subsequent pass over the log shows us that a 2223 * block got freed (say, it was also claimed by 2224 * an inode that we cleared), we can safely declare 2225 * the log bad. 2226 */ 2227 if (cb != NULL) 2228 (*cb)(fno); 2229 if (!testbmap(fno)) 2230 islogok = 0; 2231 } 2232 } 2233 brelse(bp); 2234 2235 if (cb != NULL) { 2236 fno = logbtofrag(&sblock, sblock.fs_logbno); 2237 for (j = 0; j < sblock.fs_frag; ++j, ++fno) 2238 (*cb)(fno); 2239 } 2240 } 2241 2242 static void 2243 freelogblk(daddr32_t frag) 2244 { 2245 freeblk(sblock.fs_logbno, frag, 1); 2246 } 2247 2248 caddr_t 2249 file_id(fsck_ino_t inum, mode_t mode) 2250 { 2251 static char name[MAXPATHLEN + 1]; 2252 2253 if (lfdir == inum) { 2254 return (lfname); 2255 } 2256 2257 if ((mode & IFMT) == IFDIR) { 2258 (void) strcpy(name, "DIR"); 2259 } else if ((mode & IFMT) == IFATTRDIR) { 2260 (void) strcpy(name, "ATTR DIR"); 2261 } else if ((mode & IFMT) == IFSHAD) { 2262 (void) strcpy(name, "ACL"); 2263 } else { 2264 (void) strcpy(name, "FILE"); 2265 } 2266 2267 return (name); 2268 } 2269 2270 /* 2271 * Simple initializer for inodesc structures, so users of only a few 2272 * fields don't have to worry about getting the right defaults for 2273 * everything out. 2274 */ 2275 void 2276 init_inodesc(struct inodesc *idesc) 2277 { 2278 /* 2279 * Most fields should be zero, just hit the special cases. 2280 */ 2281 (void) memset((void *)idesc, 0, sizeof (struct inodesc)); 2282 idesc->id_fix = DONTKNOW; 2283 idesc->id_lbn = -1; 2284 idesc->id_truncto = -1; 2285 idesc->id_firsthole = -1; 2286 } 2287 2288 /* 2289 * Compare routine for tsearch(C) to use on ino_t instances. 2290 */ 2291 int 2292 ino_t_cmp(const void *left, const void *right) 2293 { 2294 const fsck_ino_t lino = (const fsck_ino_t)left; 2295 const fsck_ino_t rino = (const fsck_ino_t)right; 2296 2297 return (lino - rino); 2298 } 2299 2300 int 2301 cgisdirty(void) 2302 { 2303 return (cgblk.b_dirty); 2304 } 2305 2306 void 2307 cgflush(void) 2308 { 2309 flush(fswritefd, &cgblk); 2310 } 2311 2312 void 2313 dirty(struct bufarea *bp) 2314 { 2315 if (fswritefd < 0) { 2316 /* 2317 * No one should call dirty() in read only mode. 2318 * But if one does, it's not fatal issue. Just warn them. 2319 */ 2320 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n"); 2321 } else { 2322 (bp)->b_dirty = 1; 2323 isdirty = 1; 2324 } 2325 } 2326 2327 void 2328 initbarea(struct bufarea *bp) 2329 { 2330 (bp)->b_dirty = 0; 2331 (bp)->b_bno = (diskaddr_t)-1LL; 2332 (bp)->b_flags = 0; 2333 (bp)->b_cnt = 0; 2334 (bp)->b_errs = 0; 2335 } 2336 2337 /* 2338 * Partition-sizing routines adapted from ../newfs/newfs.c. 2339 * Needed because calcsb() needs to use mkfs to work out what the 2340 * superblock should be, and mkfs insists on being told how many 2341 * sectors to use. 2342 * 2343 * Error handling assumes we're never called while preening. 2344 * 2345 * XXX This should be extracted into a ../ufslib.{c,h}, 2346 * in the same spirit to ../../fslib.{c,h}. Once that is 2347 * done, both fsck and newfs should be modified to link 2348 * against it. 2349 */ 2350 2351 static int label_type; 2352 2353 #define LABEL_TYPE_VTOC 1 2354 #define LABEL_TYPE_EFI 2 2355 #define LABEL_TYPE_OTHER 3 2356 2357 #define MB (1024 * 1024) 2358 #define SECTORS_PER_TERABYTE (1LL << 31) 2359 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL 2360 2361 diskaddr_t 2362 getdisksize(caddr_t disk, int fd) 2363 { 2364 int rpm; 2365 struct dk_geom g; 2366 struct dk_cinfo ci; 2367 diskaddr_t actual_size; 2368 2369 /* 2370 * get_device_size() determines the actual size of the 2371 * device, and also the disk's attributes, such as geometry. 2372 */ 2373 actual_size = get_device_size(fd, disk); 2374 2375 if (label_type == LABEL_TYPE_VTOC) { 2376 if (ioctl(fd, DKIOCGGEOM, &g)) { 2377 pwarn("%s: Unable to read Disk geometry", disk); 2378 return (0); 2379 } 2380 if (sblock.fs_nsect == 0) 2381 sblock.fs_nsect = g.dkg_nsect; 2382 if (sblock.fs_ntrak == 0) 2383 sblock.fs_ntrak = g.dkg_nhead; 2384 if (sblock.fs_rps == 0) { 2385 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm; 2386 sblock.fs_rps = rpm / 60; 2387 } 2388 } 2389 2390 if (sblock.fs_bsize == 0) 2391 sblock.fs_bsize = MAXBSIZE; 2392 2393 /* 2394 * Adjust maxcontig by the device's maxtransfer. If maxtransfer 2395 * information is not available, default to the min of a MB and 2396 * maxphys. 2397 */ 2398 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) { 2399 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE; 2400 if (sblock.fs_maxcontig < 0) { 2401 int gotit, maxphys; 2402 2403 gotit = fsgetmaxphys(&maxphys, NULL); 2404 2405 /* 2406 * If we cannot get the maxphys value, default 2407 * to ufs_maxmaxphys (MB). 2408 */ 2409 if (gotit) { 2410 sblock.fs_maxcontig = MIN(maxphys, MB); 2411 } else { 2412 sblock.fs_maxcontig = MB; 2413 } 2414 } 2415 sblock.fs_maxcontig /= sblock.fs_bsize; 2416 } 2417 2418 return (actual_size); 2419 } 2420 2421 /* 2422 * Figure out how big the partition we're dealing with is. 2423 */ 2424 static diskaddr_t 2425 get_device_size(int fd, caddr_t name) 2426 { 2427 struct extvtoc vtoc; 2428 struct dk_gpt *efi_vtoc; 2429 diskaddr_t slicesize = 0; 2430 2431 int index = read_extvtoc(fd, &vtoc); 2432 2433 if (index >= 0) { 2434 label_type = LABEL_TYPE_VTOC; 2435 } else { 2436 if (index == VT_ENOTSUP || index == VT_ERROR) { 2437 /* it might be an EFI label */ 2438 index = efi_alloc_and_read(fd, &efi_vtoc); 2439 if (index >= 0) 2440 label_type = LABEL_TYPE_EFI; 2441 } 2442 } 2443 2444 if (index < 0) { 2445 /* 2446 * Since both attempts to read the label failed, we're 2447 * going to fall back to a brute force approach to 2448 * determining the device's size: see how far out we can 2449 * perform reads on the device. 2450 */ 2451 2452 slicesize = brute_force_get_device_size(fd); 2453 if (slicesize == 0) { 2454 switch (index) { 2455 case VT_ERROR: 2456 pwarn("%s: %s\n", name, strerror(errno)); 2457 break; 2458 case VT_EIO: 2459 pwarn("%s: I/O error accessing VTOC", name); 2460 break; 2461 case VT_EINVAL: 2462 pwarn("%s: Invalid field in VTOC", name); 2463 break; 2464 default: 2465 pwarn("%s: unknown error %d accessing VTOC", 2466 name, index); 2467 break; 2468 } 2469 return (0); 2470 } else { 2471 label_type = LABEL_TYPE_OTHER; 2472 } 2473 } 2474 2475 if (label_type == LABEL_TYPE_EFI) { 2476 slicesize = efi_vtoc->efi_parts[index].p_size; 2477 efi_free(efi_vtoc); 2478 } else if (label_type == LABEL_TYPE_VTOC) { 2479 slicesize = vtoc.v_part[index].p_size; 2480 } 2481 2482 return (slicesize); 2483 } 2484 2485 /* 2486 * brute_force_get_device_size 2487 * 2488 * Determine the size of the device by seeing how far we can 2489 * read. Doing an llseek( , , SEEK_END) would probably work 2490 * in most cases, but we've seen at least one third-party driver 2491 * which doesn't correctly support the SEEK_END option when the 2492 * the device is greater than a terabyte. 2493 */ 2494 2495 static diskaddr_t 2496 brute_force_get_device_size(int fd) 2497 { 2498 diskaddr_t min_fail = 0; 2499 diskaddr_t max_succeed = 0; 2500 diskaddr_t cur_db_off; 2501 char buf[DEV_BSIZE]; 2502 2503 /* 2504 * First, see if we can read the device at all, just to 2505 * eliminate errors that have nothing to do with the 2506 * device's size. 2507 */ 2508 2509 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) || 2510 ((read(fd, buf, DEV_BSIZE)) == -1)) 2511 return (0); /* can't determine size */ 2512 2513 /* 2514 * Now, go sequentially through the multiples of 4TB 2515 * to find the first read that fails (this isn't strictly 2516 * the most efficient way to find the actual size if the 2517 * size really could be anything between 0 and 2**64 bytes. 2518 * We expect the sizes to be less than 16 TB for some time, 2519 * so why do a bunch of reads that are larger than that? 2520 * However, this algorithm *will* work for sizes of greater 2521 * than 16 TB. We're just not optimizing for those sizes.) 2522 */ 2523 2524 /* 2525 * XXX lint uses 32-bit arithmetic for doing flow analysis. 2526 * We're using > 32-bit constants here. Therefore, its flow 2527 * analysis is wrong. For the time being, ignore complaints 2528 * from it about the body of the for() being unreached. 2529 */ 2530 for (cur_db_off = SECTORS_PER_TERABYTE * 4; 2531 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT); 2532 cur_db_off += 4 * SECTORS_PER_TERABYTE) { 2533 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2534 SEEK_SET) == -1) || 2535 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE)) 2536 min_fail = cur_db_off; 2537 else 2538 max_succeed = cur_db_off; 2539 } 2540 2541 /* 2542 * XXX Same lint flow analysis problem as above. 2543 */ 2544 if (min_fail == 0) 2545 return (0); 2546 2547 /* 2548 * We now know that the size of the device is less than 2549 * min_fail and greater than or equal to max_succeed. Now 2550 * keep splitting the difference until the actual size in 2551 * sectors in known. We also know that the difference 2552 * between max_succeed and min_fail at this time is 2553 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which 2554 * simplifies the math below. 2555 */ 2556 2557 while (min_fail - max_succeed > 1) { 2558 cur_db_off = max_succeed + (min_fail - max_succeed)/2; 2559 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE), 2560 SEEK_SET)) == -1) || 2561 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)) 2562 min_fail = cur_db_off; 2563 else 2564 max_succeed = cur_db_off; 2565 } 2566 2567 /* the size is the last successfully read sector offset plus one */ 2568 return (max_succeed + 1); 2569 } 2570 2571 static void 2572 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap) 2573 { 2574 struct dinode *dp; 2575 char pathbuf[MAXPATHLEN + 1]; 2576 2577 vpwarn(fmt, ap); 2578 (void) putchar(' '); 2579 pinode(ino); 2580 (void) printf("\n"); 2581 getpathname(pathbuf, cwd, ino); 2582 if (ino < UFSROOTINO || ino > maxino) { 2583 pfatal("NAME=%s\n", pathbuf); 2584 return; 2585 } 2586 dp = ginode(ino); 2587 if (ftypeok(dp)) 2588 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf); 2589 else 2590 pfatal("NAME=%s\n", pathbuf); 2591 } 2592 2593 void 2594 direrror(fsck_ino_t ino, caddr_t fmt, ...) 2595 { 2596 va_list ap; 2597 2598 va_start(ap, fmt); 2599 vfileerror(ino, ino, fmt, ap); 2600 va_end(ap); 2601 } 2602 2603 static void 2604 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap) 2605 { 2606 vfileerror(ino, ino, fmt, ap); 2607 } 2608 2609 void 2610 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...) 2611 { 2612 va_list ap; 2613 2614 va_start(ap, fmt); 2615 vfileerror(cwd, ino, fmt, ap); 2616 va_end(ap); 2617 } 2618 2619 /* 2620 * Adds the given inode to the orphaned-directories list, limbo_dirs. 2621 * Assumes that the caller has set INCLEAR in the inode's statemap[] 2622 * entry. 2623 * 2624 * With INCLEAR set, the inode will get ignored by passes 2 and 3, 2625 * meaning it's effectively an orphan. It needs to be noted now, so 2626 * it will be remembered in pass 4. 2627 */ 2628 2629 void 2630 add_orphan_dir(fsck_ino_t ino) 2631 { 2632 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL) 2633 errexit("add_orphan_dir: out of memory"); 2634 } 2635 2636 /* 2637 * Remove an inode from the orphaned-directories list, presumably 2638 * because it's been cleared. 2639 */ 2640 void 2641 remove_orphan_dir(fsck_ino_t ino) 2642 { 2643 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp); 2644 } 2645 2646 /* 2647 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum() 2648 * and lufs.c:checksum(). 2649 */ 2650 static void 2651 log_setsum(int32_t *sp, int32_t *lp, int nb) 2652 { 2653 int32_t csum = 0; 2654 2655 *sp = 0; 2656 nb /= sizeof (int32_t); 2657 while (nb--) 2658 csum += *lp++; 2659 *sp = csum; 2660 } 2661 2662 static int 2663 log_checksum(int32_t *sp, int32_t *lp, int nb) 2664 { 2665 int32_t ssum = *sp; 2666 2667 log_setsum(sp, lp, nb); 2668 if (ssum != *sp) { 2669 *sp = ssum; 2670 return (0); 2671 } 2672 return (1); 2673 } 2674