1 /*
2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016 by Delphix. All rights reserved.
4 */
5
6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
8
9 /*
10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms are permitted
14 * provided that: (1) source distributions retain this entire copyright
15 * notice and comment, and (2) distributions including binaries display
16 * the following acknowledgement: ``This product includes software
17 * developed by the University of California, Berkeley and its contributors''
18 * in the documentation or other materials provided with the distribution
19 * and in all advertising materials mentioning features or use of this
20 * software. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <stdarg.h>
32 #include <libadm.h>
33 #include <note.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/mntent.h>
37 #include <sys/filio.h>
38 #include <sys/fs/ufs_fs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/ufs_acl.h>
41 #include <sys/fs/ufs_inode.h>
42 #include <sys/fs/ufs_log.h>
43 #define _KERNEL
44 #include <sys/fs/ufs_fsdir.h>
45 #undef _KERNEL
46 #include <sys/mnttab.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <fcntl.h>
50 #include <signal.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <sys/vfstab.h>
54 #include <sys/lockfs.h>
55 #include <errno.h>
56 #include <sys/cmn_err.h>
57 #include <sys/dkio.h>
58 #include <sys/vtoc.h>
59 #include <sys/efi_partition.h>
60 #include <fslib.h>
61 #include <inttypes.h>
62 #include "fsck.h"
63
64 struct bufarea *pbp;
65 struct bufarea *pdirbp;
66 caddr_t mount_point = NULL;
67 static struct bufarea bufhead; /* head of list of other blks in filesys */
68 char *elock_combuf;
69 char *elock_mountp;
70 static struct lockfs *lfp; /* current lockfs status */
71
72 static int64_t diskreads, totalreads; /* Disk cache statistics */
73
74 static int log_checksum(int32_t *, int32_t *, int);
75 static void vdirerror(fsck_ino_t, caddr_t, va_list);
76 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
77 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
78 static void vpwarn(caddr_t, va_list);
79 static int getaline(FILE *, caddr_t, int);
80 static struct bufarea *alloc_bufarea(void);
81 static void rwerror(caddr_t, diskaddr_t, int rval);
82 static void debugclean(void);
83 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
84 static void freelogblk(daddr32_t);
85 static void verrexit(caddr_t, va_list);
86 static void vpfatal(caddr_t, va_list);
87 static diskaddr_t get_device_size(int, caddr_t);
88 static diskaddr_t brute_force_get_device_size(int);
89 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
90 daddr32_t *, daddr32_t *, daddr32_t *);
91
92 int
ftypeok(struct dinode * dp)93 ftypeok(struct dinode *dp)
94 {
95 switch (dp->di_mode & IFMT) {
96
97 case IFDIR:
98 case IFREG:
99 case IFBLK:
100 case IFCHR:
101 case IFLNK:
102 case IFSOCK:
103 case IFIFO:
104 case IFSHAD:
105 case IFATTRDIR:
106 return (1);
107
108 default:
109 if (debug)
110 (void) printf("bad file type 0%o\n", dp->di_mode);
111 return (0);
112 }
113 }
114
115 int
acltypeok(struct dinode * dp)116 acltypeok(struct dinode *dp)
117 {
118 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
119 return (1);
120
121 if (debug)
122 (void) printf("bad file type for acl I=%d: 0%o\n",
123 dp->di_shadow, dp->di_mode);
124 return (0);
125 }
126
127 NOTE(PRINTFLIKE(1))
128 int
reply(caddr_t fmt,...)129 reply(caddr_t fmt, ...)
130 {
131 va_list ap;
132 char line[80];
133
134 if (preen)
135 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
136
137 if (mflag) {
138 /*
139 * We don't know what's going on, so don't potentially
140 * make things worse by having errexit() write stuff
141 * out to disk.
142 */
143 (void) printf(
144 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
145 devname);
146 exit(EXERRFATAL);
147 }
148
149 va_start(ap, fmt);
150 (void) putchar('\n');
151 (void) vprintf(fmt, ap);
152 (void) putchar('?');
153 (void) putchar(' ');
154 va_end(ap);
155
156 if (nflag || fswritefd < 0) {
157 (void) printf(" no\n\n");
158 return (0);
159 }
160 if (yflag) {
161 (void) printf(" yes\n\n");
162 return (1);
163 }
164 (void) fflush(stdout);
165 if (getaline(stdin, line, sizeof (line)) == EOF)
166 errexit("\n");
167 (void) printf("\n");
168 if (line[0] == 'y' || line[0] == 'Y') {
169 return (1);
170 } else {
171 return (0);
172 }
173 }
174
175 int
getaline(FILE * fp,caddr_t loc,int maxlen)176 getaline(FILE *fp, caddr_t loc, int maxlen)
177 {
178 int n;
179 caddr_t p, lastloc;
180
181 p = loc;
182 lastloc = &p[maxlen-1];
183 while ((n = getc(fp)) != '\n') {
184 if (n == EOF)
185 return (EOF);
186 if (!isspace(n) && p < lastloc)
187 *p++ = (char)n;
188 }
189 *p = '\0';
190 /* LINTED pointer difference won't overflow */
191 return (p - loc);
192 }
193
194 /*
195 * Malloc buffers and set up cache.
196 */
197 void
bufinit(void)198 bufinit(void)
199 {
200 struct bufarea *bp;
201 int bufcnt, i;
202 caddr_t bufp;
203
204 bufp = malloc((size_t)sblock.fs_bsize);
205 if (bufp == NULL)
206 goto nomem;
207 initbarea(&cgblk);
208 cgblk.b_un.b_buf = bufp;
209 bufhead.b_next = bufhead.b_prev = &bufhead;
210 bufcnt = MAXBUFSPACE / sblock.fs_bsize;
211 if (bufcnt < MINBUFS)
212 bufcnt = MINBUFS;
213 for (i = 0; i < bufcnt; i++) {
214 bp = (struct bufarea *)malloc(sizeof (struct bufarea));
215 if (bp == NULL) {
216 if (i >= MINBUFS)
217 goto noalloc;
218 goto nomem;
219 }
220
221 bufp = malloc((size_t)sblock.fs_bsize);
222 if (bufp == NULL) {
223 free((void *)bp);
224 if (i >= MINBUFS)
225 goto noalloc;
226 goto nomem;
227 }
228 initbarea(bp);
229 bp->b_un.b_buf = bufp;
230 bp->b_prev = &bufhead;
231 bp->b_next = bufhead.b_next;
232 bufhead.b_next->b_prev = bp;
233 bufhead.b_next = bp;
234 }
235 noalloc:
236 bufhead.b_size = i; /* save number of buffers */
237 pbp = pdirbp = NULL;
238 return;
239
240 nomem:
241 errexit("cannot allocate buffer pool\n");
242 /* NOTREACHED */
243 }
244
245 /*
246 * Undo a bufinit().
247 */
248 void
unbufinit(void)249 unbufinit(void)
250 {
251 int cnt;
252 struct bufarea *bp, *nbp;
253
254 cnt = 0;
255 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
256 cnt++;
257 flush(fswritefd, bp);
258 nbp = bp->b_prev;
259 /*
260 * We're discarding the entire chain, so this isn't
261 * technically necessary. However, it doesn't hurt
262 * and lint's data flow analysis is much happier
263 * (this prevents it from thinking there's a chance
264 * of our using memory elsewhere after it's been released).
265 */
266 nbp->b_next = bp->b_next;
267 bp->b_next->b_prev = nbp;
268 free((void *)bp->b_un.b_buf);
269 free((void *)bp);
270 }
271
272 if (bufhead.b_size != cnt)
273 errexit("Panic: cache lost %d buffers\n",
274 bufhead.b_size - cnt);
275 }
276
277 /*
278 * Manage a cache of directory blocks.
279 */
280 struct bufarea *
getdatablk(daddr32_t blkno,size_t size)281 getdatablk(daddr32_t blkno, size_t size)
282 {
283 struct bufarea *bp;
284
285 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
286 if (bp->b_bno == fsbtodb(&sblock, blkno)) {
287 goto foundit;
288 }
289 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
290 if ((bp->b_flags & B_INUSE) == 0)
291 break;
292 if (bp == &bufhead) {
293 bp = alloc_bufarea();
294 if (bp == NULL) {
295 errexit("deadlocked buffer pool\n");
296 /* NOTREACHED */
297 }
298 }
299 /*
300 * We're at the same logical level as getblk(), so if there
301 * are any errors, we'll let our caller handle them.
302 */
303 diskreads++;
304 (void) getblk(bp, blkno, size);
305
306 foundit:
307 totalreads++;
308 bp->b_cnt++;
309 /*
310 * Move the buffer to head of linked list if it isn't
311 * already there.
312 */
313 if (bufhead.b_next != bp) {
314 bp->b_prev->b_next = bp->b_next;
315 bp->b_next->b_prev = bp->b_prev;
316 bp->b_prev = &bufhead;
317 bp->b_next = bufhead.b_next;
318 bufhead.b_next->b_prev = bp;
319 bufhead.b_next = bp;
320 }
321 bp->b_flags |= B_INUSE;
322 return (bp);
323 }
324
325 void
brelse(struct bufarea * bp)326 brelse(struct bufarea *bp)
327 {
328 bp->b_cnt--;
329 if (bp->b_cnt == 0) {
330 bp->b_flags &= ~B_INUSE;
331 }
332 }
333
334 struct bufarea *
getblk(struct bufarea * bp,daddr32_t blk,size_t size)335 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
336 {
337 diskaddr_t dblk;
338
339 dblk = fsbtodb(&sblock, blk);
340 if (bp->b_bno == dblk)
341 return (bp);
342 flush(fswritefd, bp);
343 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
344 bp->b_bno = dblk;
345 bp->b_size = size;
346 return (bp);
347 }
348
349 void
flush(int fd,struct bufarea * bp)350 flush(int fd, struct bufarea *bp)
351 {
352 int i, j;
353 caddr_t sip;
354 long size;
355
356 if (!bp->b_dirty)
357 return;
358
359 /*
360 * It's not our buf, so if there are errors, let whoever
361 * acquired it deal with the actual problem.
362 */
363 if (bp->b_errs != 0)
364 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
365 bp->b_dirty = 0;
366 bp->b_errs = 0;
367 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
368 if (bp != &sblk) {
369 return;
370 }
371
372 /*
373 * We're flushing the superblock, so make sure all the
374 * ancillary bits go out as well.
375 */
376 sip = (caddr_t)sblock.fs_u.fs_csp;
377 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
378 size = sblock.fs_cssize - i < sblock.fs_bsize ?
379 sblock.fs_cssize - i : sblock.fs_bsize;
380 bwrite(fswritefd, sip,
381 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
382 size);
383 sip += size;
384 }
385 }
386
387 static void
rwerror(caddr_t mesg,diskaddr_t blk,int rval)388 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
389 {
390 int olderr = errno;
391
392 if (!preen)
393 (void) printf("\n");
394
395 if (rval == -1)
396 pfatal("CANNOT %s: DISK BLOCK %lld: %s",
397 mesg, blk, strerror(olderr));
398 else
399 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
400
401 if (reply("CONTINUE") == 0) {
402 exitstat = EXERRFATAL;
403 errexit("Program terminated\n");
404 }
405 }
406
407 void
ckfini(void)408 ckfini(void)
409 {
410 int64_t percentage;
411
412 if (fswritefd < 0)
413 return;
414
415 flush(fswritefd, &sblk);
416 /*
417 * Were we using a backup superblock?
418 */
419 if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
420 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
421 sblk.b_bno = SBOFF / dev_bsize;
422 sbdirty();
423 flush(fswritefd, &sblk);
424 }
425 }
426 flush(fswritefd, &cgblk);
427 if (cgblk.b_un.b_buf != NULL) {
428 free((void *)cgblk.b_un.b_buf);
429 cgblk.b_un.b_buf = NULL;
430 }
431 unbufinit();
432 pbp = NULL;
433 pdirbp = NULL;
434 if (debug) {
435 /*
436 * Note that we only count cache-related reads.
437 * Anything that called fsck_bread() or getblk()
438 * directly are explicitly not cached, so they're not
439 * included here.
440 */
441 if (totalreads != 0)
442 percentage = diskreads * 100 / totalreads;
443 else
444 percentage = 0;
445
446 (void) printf("cache missed %lld of %lld reads (%lld%%)\n",
447 (longlong_t)diskreads, (longlong_t)totalreads,
448 (longlong_t)percentage);
449 }
450
451 (void) close(fsreadfd);
452 (void) close(fswritefd);
453 fsreadfd = -1;
454 fswritefd = -1;
455 }
456
457 int
fsck_bread(int fd,caddr_t buf,diskaddr_t blk,size_t size)458 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
459 {
460 caddr_t cp;
461 int i;
462 int errs;
463 offset_t offset = ldbtob(blk);
464 offset_t addr;
465
466 /*
467 * In our universe, nothing exists before the superblock, so
468 * just pretend it's always zeros. This is the complement of
469 * bwrite()'s ignoring write requests into that space.
470 */
471 if (blk < SBLOCK) {
472 if (debug)
473 (void) printf(
474 "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
475 SBLOCK, (longlong_t)blk);
476 (void) memset(buf, 0, (size_t)size);
477 return (1);
478 }
479
480 if (llseek(fd, offset, SEEK_SET) < 0) {
481 rwerror("SEEK", blk, -1);
482 }
483
484 if ((i = read(fd, buf, size)) == size) {
485 return (0);
486 }
487 rwerror("READ", blk, i);
488 if (llseek(fd, offset, SEEK_SET) < 0) {
489 rwerror("SEEK", blk, -1);
490 }
491 errs = 0;
492 (void) memset(buf, 0, (size_t)size);
493 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
494 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
495 addr = ldbtob(blk + i);
496 if (llseek(fd, addr, SEEK_SET) < 0 ||
497 read(fd, cp, (int)secsize) < 0) {
498 iscorrupt = 1;
499 (void) printf(" %llu", blk + (u_longlong_t)i);
500 errs++;
501 }
502 }
503 (void) printf("\n");
504 return (errs);
505 }
506
507 void
bwrite(int fd,caddr_t buf,diskaddr_t blk,int64_t size)508 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
509 {
510 int i;
511 int n;
512 caddr_t cp;
513 offset_t offset = ldbtob(blk);
514 offset_t addr;
515
516 if (fd < 0)
517 return;
518 if (blk < SBLOCK) {
519 if (debug)
520 (void) printf(
521 "WARNING: Attempt to write illegal blkno %lld on %s\n",
522 (longlong_t)blk, devname);
523 return;
524 }
525 if (llseek(fd, offset, SEEK_SET) < 0) {
526 rwerror("SEEK", blk, -1);
527 }
528 if ((i = write(fd, buf, (int)size)) == size) {
529 fsmodified = 1;
530 return;
531 }
532 rwerror("WRITE", blk, i);
533 if (llseek(fd, offset, SEEK_SET) < 0) {
534 rwerror("SEEK", blk, -1);
535 }
536 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
537 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
538 n = 0;
539 addr = ldbtob(blk + i);
540 if (llseek(fd, addr, SEEK_SET) < 0 ||
541 (n = write(fd, cp, DEV_BSIZE)) < 0) {
542 iscorrupt = 1;
543 (void) printf(" %llu", blk + (u_longlong_t)i);
544 } else if (n > 0) {
545 fsmodified = 1;
546 }
547
548 }
549 (void) printf("\n");
550 }
551
552 /*
553 * Allocates the specified number of contiguous fragments.
554 */
555 daddr32_t
allocblk(int wantedfrags)556 allocblk(int wantedfrags)
557 {
558 int block, leadfrag, tailfrag;
559 daddr32_t selected;
560 size_t size;
561 struct bufarea *bp;
562
563 /*
564 * It's arguable whether we should just fail, or instead
565 * error out here. Since we should only ever be asked for
566 * a single fragment or an entire block (i.e., sblock.fs_frag),
567 * we'll fail out because anything else means somebody
568 * changed code without considering all of the ramifications.
569 */
570 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
571 exitstat = EXERRFATAL;
572 errexit("allocblk() asked for %d frags. "
573 "Legal range is 1 to %d",
574 wantedfrags, sblock.fs_frag);
575 }
576
577 /*
578 * For each filesystem block, look at every possible starting
579 * offset within the block such that we can get the number of
580 * contiguous fragments that we need. This is a drastically
581 * simplified version of the kernel's mapsearch() and alloc*().
582 * It's also correspondingly slower.
583 */
584 for (block = 0; block < maxfsblock - sblock.fs_frag;
585 block += sblock.fs_frag) {
586 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
587 leadfrag++) {
588 /*
589 * Is first fragment of candidate run available?
590 */
591 if (testbmap(block + leadfrag))
592 continue;
593 /*
594 * Are the rest of them available?
595 */
596 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
597 if (testbmap(block + leadfrag + tailfrag))
598 break;
599 if (tailfrag < wantedfrags) {
600 /*
601 * No, skip the known-unusable run.
602 */
603 leadfrag += tailfrag;
604 continue;
605 }
606 /*
607 * Found what we need, so claim them.
608 */
609 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
610 setbmap(block + leadfrag + tailfrag);
611 n_blks += wantedfrags;
612 size = wantedfrags * sblock.fs_fsize;
613 selected = block + leadfrag;
614 bp = getdatablk(selected, size);
615 (void) memset((void *)bp->b_un.b_buf, 0, size);
616 dirty(bp);
617 brelse(bp);
618 if (debug)
619 (void) printf(
620 "allocblk: selected %d (in block %d), frags %d, size %d\n",
621 selected, selected % sblock.fs_bsize,
622 wantedfrags, (int)size);
623 return (selected);
624 }
625 }
626 return (0);
627 }
628
629 /*
630 * Free a previously allocated block
631 */
632 void
freeblk(fsck_ino_t ino,daddr32_t blkno,int frags)633 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
634 {
635 struct inodesc idesc;
636
637 if (debug)
638 (void) printf("debug: freeing %d fragments starting at %d\n",
639 frags, blkno);
640
641 init_inodesc(&idesc);
642
643 idesc.id_number = ino;
644 idesc.id_blkno = blkno;
645 idesc.id_numfrags = frags;
646 idesc.id_truncto = -1;
647
648 /*
649 * Nothing in the return status has any relevance to how
650 * we're using pass4check(), so just ignore it.
651 */
652 (void) pass4check(&idesc);
653 }
654
655 /*
656 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes
657 * that the given buffer is at least MAXPATHLEN + 1 characters.
658 */
659 void
getpathname(caddr_t namebuf,fsck_ino_t curdir,fsck_ino_t ino)660 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
661 {
662 int len;
663 caddr_t cp;
664 struct dinode *dp;
665 struct inodesc idesc;
666 struct inoinfo *inp;
667
668 if (debug)
669 (void) printf("debug: getpathname(curdir %d, ino %d)\n",
670 curdir, ino);
671
672 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
673 (void) strcpy(namebuf, "?");
674 return;
675 }
676
677 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
678 (void) strcpy(namebuf, "/");
679 return;
680 }
681
682 init_inodesc(&idesc);
683 idesc.id_type = DATA;
684 cp = &namebuf[MAXPATHLEN - 1];
685 *cp = '\0';
686
687 /*
688 * In the case of extended attributes, our
689 * parent won't necessarily be a directory, so just
690 * return what we've found with a prefix indicating
691 * that it's an XATTR. Presumably our caller will
692 * know what's going on and do something useful, like
693 * work out the path of the parent and then combine
694 * the two names.
695 *
696 * Can't use strcpy(), etc, because we've probably
697 * already got some name information in the buffer and
698 * the usual trailing \0 would lose it.
699 */
700 dp = ginode(curdir);
701 if ((dp->di_mode & IFMT) == IFATTRDIR) {
702 idesc.id_number = curdir;
703 idesc.id_parent = ino;
704 idesc.id_func = findname;
705 idesc.id_name = namebuf;
706 idesc.id_fix = NOFIX;
707 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
708 *cp-- = '?';
709 }
710
711 len = sizeof (XATTR_DIR_NAME) - 1;
712 cp -= len;
713 (void) memmove(cp, XATTR_DIR_NAME, len);
714 goto attrname;
715 }
716
717 /*
718 * If curdir == ino, need to get a handle on .. so we
719 * can search it for ino's name. Otherwise, just search
720 * the given directory for ino. Repeat until out of space
721 * or a full path has been built.
722 */
723 if (curdir != ino) {
724 idesc.id_parent = curdir;
725 goto namelookup;
726 }
727 while (ino != UFSROOTINO && ino != 0) {
728 idesc.id_number = ino;
729 idesc.id_func = findino;
730 idesc.id_name = "..";
731 idesc.id_fix = NOFIX;
732 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
733 inp = getinoinfo(ino);
734 if ((inp == NULL) || (inp->i_parent == 0)) {
735 break;
736 }
737 idesc.id_parent = inp->i_parent;
738 }
739
740 /*
741 * To get this far, id_parent must have the inode
742 * number for `..' in it. By definition, that's got
743 * to be a directory, so search it for the inode of
744 * interest.
745 */
746 namelookup:
747 idesc.id_number = idesc.id_parent;
748 idesc.id_parent = ino;
749 idesc.id_func = findname;
750 idesc.id_name = namebuf;
751 idesc.id_fix = NOFIX;
752 if ((ckinode(ginode(idesc.id_number),
753 &idesc, CKI_TRAVERSE) & FOUND) == 0) {
754 break;
755 }
756 /*
757 * Prepend to what we've accumulated so far. If
758 * there's not enough room for even one more path element
759 * (of the worst-case length), then bail out.
760 */
761 len = strlen(namebuf);
762 cp -= len;
763 if (cp < &namebuf[MAXNAMLEN])
764 break;
765 (void) memmove(cp, namebuf, len);
766 *--cp = '/';
767
768 /*
769 * Corner case for a looped-to-itself directory.
770 */
771 if (ino == idesc.id_number)
772 break;
773
774 /*
775 * Climb one level of the hierarchy. In other words,
776 * the current .. becomes the inode to search for and
777 * its parent becomes the directory to search in.
778 */
779 ino = idesc.id_number;
780 }
781
782 /*
783 * If we hit a discontinuity in the hierarchy, indicate it by
784 * prefixing the path so far with `?'. Otherwise, the first
785 * character will be `/' as a side-effect of the *--cp above.
786 *
787 * The special case is to handle the situation where we're
788 * trying to look something up in UFSROOTINO, but didn't find
789 * it.
790 */
791 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
792 if (cp > namebuf)
793 cp--;
794 *cp = '?';
795 }
796
797 /*
798 * The invariants being used for buffer integrity are:
799 * - namebuf[] is terminated with \0 before anything else
800 * - cp is always <= the last element of namebuf[]
801 * - the new path element is always stored at the
802 * beginning of namebuf[], and is no more than MAXNAMLEN-1
803 * characters
804 * - cp is is decremented by the number of characters in
805 * the new path element
806 * - if, after the above accounting for the new element's
807 * size, there is no longer enough room at the beginning of
808 * namebuf[] for a full-sized path element and a slash,
809 * terminate the loop. cp is in the range
810 * &namebuf[0]..&namebuf[MAXNAMLEN - 1]
811 */
812 attrname:
813 /* LINTED per the above discussion */
814 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
815 }
816
817 /* ARGSUSED */
818 void
catch(int dummy)819 catch(int dummy)
820 {
821 ckfini();
822 exit(EXSIGNAL);
823 }
824
825 /*
826 * When preening, allow a single quit to signal
827 * a special exit after filesystem checks complete
828 * so that reboot sequence may be interrupted.
829 */
830 /* ARGSUSED */
831 void
catchquit(int dummy)832 catchquit(int dummy)
833 {
834 (void) printf("returning to single-user after filesystem check\n");
835 interrupted = 1;
836 (void) signal(SIGQUIT, SIG_DFL);
837 }
838
839
840 /*
841 * determine whether an inode should be fixed.
842 */
843 NOTE(PRINTFLIKE(2))
844 int
dofix(struct inodesc * idesc,caddr_t msg,...)845 dofix(struct inodesc *idesc, caddr_t msg, ...)
846 {
847 int rval = 0;
848 va_list ap;
849
850 va_start(ap, msg);
851
852 switch (idesc->id_fix) {
853
854 case DONTKNOW:
855 if (idesc->id_type == DATA)
856 vdirerror(idesc->id_number, msg, ap);
857 else
858 vpwarn(msg, ap);
859 if (preen) {
860 idesc->id_fix = FIX;
861 rval = ALTERED;
862 break;
863 }
864 if (reply("SALVAGE") == 0) {
865 idesc->id_fix = NOFIX;
866 break;
867 }
868 idesc->id_fix = FIX;
869 rval = ALTERED;
870 break;
871
872 case FIX:
873 rval = ALTERED;
874 break;
875
876 case NOFIX:
877 break;
878
879 default:
880 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
881 }
882
883 va_end(ap);
884 return (rval);
885 }
886
887 NOTE(PRINTFLIKE(1))
888 void
errexit(caddr_t fmt,...)889 errexit(caddr_t fmt, ...)
890 {
891 va_list ap;
892
893 va_start(ap, fmt);
894 verrexit(fmt, ap);
895 /* NOTREACHED */
896 }
897
898 NOTE(PRINTFLIKE(1))
899 static void
verrexit(caddr_t fmt,va_list ap)900 verrexit(caddr_t fmt, va_list ap)
901 {
902 static int recursing = 0;
903
904 if (!recursing) {
905 recursing = 1;
906 if (errorlocked || iscorrupt) {
907 if (havesb && fswritefd >= 0) {
908 sblock.fs_clean = FSBAD;
909 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
910 sblock.fs_state = -sblock.fs_state;
911 sbdirty();
912 write_altsb(fswritefd);
913 flush(fswritefd, &sblk);
914 }
915 }
916 ckfini();
917 recursing = 0;
918 }
919 (void) vprintf(fmt, ap);
920 if (fmt[strlen(fmt) - 1] != '\n')
921 (void) putchar('\n');
922 exit((exitstat != 0) ? exitstat : EXERRFATAL);
923 }
924
925 /*
926 * An unexpected inconsistency occured.
927 * Die if preening, otherwise just print message and continue.
928 */
929 NOTE(PRINTFLIKE(1))
930 void
pfatal(caddr_t fmt,...)931 pfatal(caddr_t fmt, ...)
932 {
933 va_list ap;
934
935 va_start(ap, fmt);
936 vpfatal(fmt, ap);
937 va_end(ap);
938 }
939
940 NOTE(PRINTFLIKE(1))
941 static void
vpfatal(caddr_t fmt,va_list ap)942 vpfatal(caddr_t fmt, va_list ap)
943 {
944 if (preen) {
945 if (*fmt != '\0') {
946 (void) printf("%s: ", devname);
947 (void) vprintf(fmt, ap);
948 (void) printf("\n");
949 }
950 (void) printf(
951 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
952 devname);
953 if (havesb && fswritefd >= 0) {
954 sblock.fs_clean = FSBAD;
955 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
956 sbdirty();
957 flush(fswritefd, &sblk);
958 }
959 /*
960 * We're exiting, it doesn't really matter that our
961 * caller doesn't get to call va_end().
962 */
963 if (exitstat == 0)
964 exitstat = EXFNDERRS;
965 exit(exitstat);
966 }
967 if (*fmt != '\0') {
968 (void) vprintf(fmt, ap);
969 }
970 }
971
972 /*
973 * Pwarn just prints a message when not preening,
974 * or a warning (preceded by filename) when preening.
975 */
976 NOTE(PRINTFLIKE(1))
977 void
pwarn(caddr_t fmt,...)978 pwarn(caddr_t fmt, ...)
979 {
980 va_list ap;
981
982 va_start(ap, fmt);
983 vpwarn(fmt, ap);
984 va_end(ap);
985 }
986
987 NOTE(PRINTFLIKE(1))
988 static void
vpwarn(caddr_t fmt,va_list ap)989 vpwarn(caddr_t fmt, va_list ap)
990 {
991 if (*fmt != '\0') {
992 if (preen)
993 (void) printf("%s: ", devname);
994 (void) vprintf(fmt, ap);
995 }
996 }
997
998 /*
999 * Like sprintf(), except the buffer is dynamically allocated
1000 * and returned, instead of being passed in. A pointer to the
1001 * buffer is stored in *RET, and FMT is the usual format string.
1002 * The number of characters in *RET (excluding the trailing \0,
1003 * to be consistent with the other *printf() routines) is returned.
1004 *
1005 * Solaris doesn't have asprintf(3C) yet, unfortunately.
1006 */
1007 NOTE(PRINTFLIKE(2))
1008 int
fsck_asprintf(caddr_t * ret,caddr_t fmt,...)1009 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1010 {
1011 int len;
1012 caddr_t buffer;
1013 va_list ap;
1014
1015 va_start(ap, fmt);
1016 len = vsnprintf(NULL, 0, fmt, ap);
1017 va_end(ap);
1018
1019 buffer = malloc((len + 1) * sizeof (char));
1020 if (buffer == NULL) {
1021 errexit("Out of memory in asprintf\n");
1022 /* NOTREACHED */
1023 }
1024
1025 va_start(ap, fmt);
1026 (void) vsnprintf(buffer, len + 1, fmt, ap);
1027 va_end(ap);
1028
1029 *ret = buffer;
1030 return (len);
1031 }
1032
1033 /*
1034 * So we can take advantage of kernel routines in ufs_subr.c.
1035 */
1036 /* PRINTFLIKE2 */
1037 void
cmn_err(int level,caddr_t fmt,...)1038 cmn_err(int level, caddr_t fmt, ...)
1039 {
1040 va_list ap;
1041
1042 va_start(ap, fmt);
1043 if (level == CE_PANIC) {
1044 (void) printf("INTERNAL INCONSISTENCY:");
1045 verrexit(fmt, ap);
1046 } else {
1047 (void) vprintf(fmt, ap);
1048 }
1049 va_end(ap);
1050 }
1051
1052 /*
1053 * Check to see if unraw version of name is already mounted.
1054 * Updates devstr with the device name if devstr is not NULL
1055 * and str_size is positive.
1056 */
1057 int
mounted(caddr_t name,caddr_t devstr,size_t str_size)1058 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1059 {
1060 int found;
1061 struct mnttab *mntent;
1062
1063 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1064 if (mntent == NULL)
1065 return (M_NOMNT);
1066
1067 /*
1068 * It's mounted. With or without write access?
1069 */
1070 if (hasmntopt(mntent, MNTOPT_RO) != 0)
1071 found = M_RO; /* mounted as RO */
1072 else
1073 found = M_RW; /* mounted as R/W */
1074
1075 if (mount_point == NULL) {
1076 mount_point = strdup(mntent->mnt_mountp);
1077 if (mount_point == NULL) {
1078 errexit("fsck: memory allocation failure: %s",
1079 strerror(errno));
1080 /* NOTREACHED */
1081 }
1082
1083 if (devstr != NULL && str_size > 0)
1084 (void) strlcpy(devstr, mntent->mnt_special, str_size);
1085 }
1086
1087 return (found);
1088 }
1089
1090 /*
1091 * Check to see if name corresponds to an entry in vfstab, and that the entry
1092 * does not have option ro.
1093 */
1094 int
writable(caddr_t name)1095 writable(caddr_t name)
1096 {
1097 int rw = 1;
1098 struct vfstab vfsbuf, vfskey;
1099 FILE *vfstab;
1100
1101 vfstab = fopen(VFSTAB, "r");
1102 if (vfstab == NULL) {
1103 (void) printf("can't open %s\n", VFSTAB);
1104 return (1);
1105 }
1106 (void) memset((void *)&vfskey, 0, sizeof (vfskey));
1107 vfsnull(&vfskey);
1108 vfskey.vfs_special = unrawname(name);
1109 vfskey.vfs_fstype = MNTTYPE_UFS;
1110 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1111 (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1112 rw = 0;
1113 }
1114 (void) fclose(vfstab);
1115 return (rw);
1116 }
1117
1118 /*
1119 * debugclean
1120 */
1121 static void
debugclean(void)1122 debugclean(void)
1123 {
1124 if (!debug)
1125 return;
1126
1127 if ((iscorrupt == 0) && (isdirty == 0))
1128 return;
1129
1130 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1131 (sblock.fs_clean == FSLOG && islog && islogok) ||
1132 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1133 return;
1134
1135 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1136 sblock.fs_clean == FSSTABLE ? "stable" :
1137 sblock.fs_clean == FSLOG ? "logging" :
1138 sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1139 devname);
1140 }
1141
1142 /*
1143 * updateclean
1144 * Carefully and transparently update the clean flag.
1145 *
1146 * `iscorrupt' has to be in its final state before this is called.
1147 */
1148 int
updateclean(void)1149 updateclean(void)
1150 {
1151 int freedlog = 0;
1152 struct bufarea cleanbuf;
1153 size_t size;
1154 ssize_t io_res;
1155 diskaddr_t bno;
1156 char fsclean;
1157 int fsreclaim;
1158 char fsflags;
1159 int flags_ok = 1;
1160 daddr32_t fslogbno;
1161 offset_t sblkoff;
1162 time_t t;
1163
1164 /*
1165 * debug stuff
1166 */
1167 debugclean();
1168
1169 /*
1170 * set fsclean to its appropriate value
1171 */
1172 fslogbno = sblock.fs_logbno;
1173 fsclean = sblock.fs_clean;
1174 fsreclaim = sblock.fs_reclaim;
1175 fsflags = sblock.fs_flags;
1176 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1177 fsclean = FSACTIVE;
1178 }
1179 /*
1180 * If ufs log is not okay, note that we need to clear it.
1181 */
1182 examinelog(NULL);
1183 if (fslogbno && !(islog && islogok)) {
1184 fsclean = FSACTIVE;
1185 fslogbno = 0;
1186 }
1187
1188 /*
1189 * if necessary, update fs_clean and fs_state
1190 */
1191 switch (fsclean) {
1192
1193 case FSACTIVE:
1194 if (!iscorrupt) {
1195 fsclean = FSSTABLE;
1196 fsreclaim = 0;
1197 }
1198 break;
1199
1200 case FSCLEAN:
1201 case FSSTABLE:
1202 if (iscorrupt) {
1203 fsclean = FSACTIVE;
1204 } else {
1205 fsreclaim = 0;
1206 }
1207 break;
1208
1209 case FSLOG:
1210 if (iscorrupt) {
1211 fsclean = FSACTIVE;
1212 } else if (!islog || fslogbno == 0) {
1213 fsclean = FSSTABLE;
1214 fsreclaim = 0;
1215 } else if (fflag) {
1216 fsreclaim = 0;
1217 }
1218 break;
1219
1220 case FSFIX:
1221 fsclean = FSBAD;
1222 if (errorlocked && !iscorrupt) {
1223 fsclean = islog ? FSLOG : FSCLEAN;
1224 }
1225 break;
1226
1227 default:
1228 if (iscorrupt) {
1229 fsclean = FSACTIVE;
1230 } else {
1231 fsclean = FSSTABLE;
1232 fsreclaim = 0;
1233 }
1234 }
1235
1236 if (largefile_count > 0)
1237 fsflags |= FSLARGEFILES;
1238 else
1239 fsflags &= ~FSLARGEFILES;
1240
1241 /*
1242 * There can be two discrepencies here. A) The superblock
1243 * shows no largefiles but we found some while scanning.
1244 * B) The superblock indicates the presence of largefiles,
1245 * but none are present. Note that if preening, the superblock
1246 * is silently corrected.
1247 */
1248 if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1249 (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1250 flags_ok = 0;
1251
1252 if (debug)
1253 (void) printf(
1254 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1255 largefile_count, sblock.fs_flags, flags_ok);
1256
1257 /*
1258 * If fs is unchanged, do nothing.
1259 */
1260 if ((!isdirty) && (flags_ok) &&
1261 (fslogbno == sblock.fs_logbno) &&
1262 (sblock.fs_clean == fsclean) &&
1263 (sblock.fs_reclaim == fsreclaim) &&
1264 (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1265 if (errorlocked) {
1266 if (!do_errorlock(LOCKFS_ULOCK))
1267 pwarn(
1268 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1269 }
1270 return (freedlog);
1271 }
1272
1273 /*
1274 * if user allows, update superblock state
1275 */
1276 if (debug) {
1277 (void) printf(
1278 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1279 sblock.fs_flags, sblock.fs_logbno,
1280 sblock.fs_clean, sblock.fs_reclaim,
1281 sblock.fs_state + sblock.fs_time);
1282 (void) printf(
1283 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1284 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1285 }
1286 if (!isdirty && !preen && !rerun &&
1287 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1288 return (freedlog);
1289
1290 (void) time(&t);
1291 sblock.fs_time = (time32_t)t;
1292 if (debug)
1293 printclean();
1294
1295 if (sblock.fs_logbno != fslogbno) {
1296 examinelog(&freelogblk);
1297 freedlog++;
1298 }
1299
1300 sblock.fs_logbno = fslogbno;
1301 sblock.fs_clean = fsclean;
1302 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1303 sblock.fs_reclaim = fsreclaim;
1304 sblock.fs_flags = fsflags;
1305
1306 /*
1307 * if superblock can't be written, return
1308 */
1309 if (fswritefd < 0)
1310 return (freedlog);
1311
1312 /*
1313 * Read private copy of superblock, update clean flag, and write it.
1314 */
1315 bno = sblk.b_bno;
1316 size = sblk.b_size;
1317
1318 sblkoff = ldbtob(bno);
1319
1320 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1321 errexit("out of memory");
1322 if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1323 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1324 (longlong_t)bno, strerror(errno));
1325 goto out;
1326 }
1327
1328 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1329 report_io_prob("READ FROM", bno, size, io_res);
1330 goto out;
1331 }
1332
1333 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno;
1334 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean;
1335 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state;
1336 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time;
1337 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1338 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags;
1339
1340 if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1341 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1342 (longlong_t)bno, strerror(errno));
1343 goto out;
1344 }
1345
1346 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1347 report_io_prob("WRITE TO", bno, size, io_res);
1348 goto out;
1349 }
1350
1351 /*
1352 * 1208040
1353 * If we had to use -b to grab an alternate superblock, then we
1354 * likely had to do so because of unacceptable differences between
1355 * the main and alternate superblocks. So, we had better update
1356 * the alternate superblock as well, or we'll just fail again
1357 * the next time we attempt to run fsck!
1358 */
1359 if (bflag != 0) {
1360 write_altsb(fswritefd);
1361 }
1362
1363 if (errorlocked) {
1364 if (!do_errorlock(LOCKFS_ULOCK))
1365 pwarn(
1366 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1367 }
1368
1369 out:
1370 if (cleanbuf.b_un.b_buf != NULL) {
1371 free((void *)cleanbuf.b_un.b_buf);
1372 }
1373
1374 return (freedlog);
1375 }
1376
1377 static void
report_io_prob(caddr_t what,diskaddr_t bno,size_t expected,ssize_t failure)1378 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1379 {
1380 if (failure < 0)
1381 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1382 what, (int)bno, strerror(errno));
1383 else if (failure == 0)
1384 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1385 what, (int)bno);
1386 else
1387 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1388 what, (int)bno, (unsigned)failure, (unsigned)expected);
1389 }
1390
1391 /*
1392 * print out clean info
1393 */
1394 void
printclean(void)1395 printclean(void)
1396 {
1397 caddr_t s;
1398
1399 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1400 s = "unknown";
1401 else
1402 switch (sblock.fs_clean) {
1403
1404 case FSACTIVE:
1405 s = "active";
1406 break;
1407
1408 case FSCLEAN:
1409 s = "clean";
1410 break;
1411
1412 case FSSTABLE:
1413 s = "stable";
1414 break;
1415
1416 case FSLOG:
1417 s = "logging";
1418 break;
1419
1420 case FSBAD:
1421 s = "is bad";
1422 break;
1423
1424 case FSFIX:
1425 s = "being fixed";
1426 break;
1427
1428 default:
1429 s = "unknown";
1430 }
1431
1432 if (preen)
1433 pwarn("is %s.\n", s);
1434 else
1435 (void) printf("** %s is %s.\n", devname, s);
1436 }
1437
1438 int
is_errorlocked(caddr_t fs)1439 is_errorlocked(caddr_t fs)
1440 {
1441 int retval;
1442 struct stat64 statb;
1443 caddr_t mountp;
1444 struct mnttab *mntent;
1445
1446 retval = 0;
1447
1448 if (!fs)
1449 return (0);
1450
1451 if (stat64(fs, &statb) < 0)
1452 return (0);
1453
1454 if (S_ISDIR(statb.st_mode)) {
1455 mountp = fs;
1456 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1457 mntent = search_mnttab(NULL, fs, NULL, 0);
1458 if (mntent == NULL)
1459 return (0);
1460 mountp = mntent->mnt_mountp;
1461 if (mountp == NULL) /* theoretically a can't-happen */
1462 return (0);
1463 } else {
1464 return (0);
1465 }
1466
1467 /*
1468 * From here on, must `goto out' to avoid memory leakage.
1469 */
1470
1471 if (elock_combuf == NULL)
1472 elock_combuf =
1473 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1474 else
1475 elock_combuf =
1476 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1477
1478 if (elock_combuf == NULL)
1479 goto out;
1480
1481 (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1482
1483 if (elock_mountp != NULL) {
1484 free(elock_mountp);
1485 }
1486
1487 elock_mountp = strdup(mountp);
1488 if (elock_mountp == NULL)
1489 goto out;
1490
1491 if (mountfd < 0) {
1492 if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1493 goto out;
1494 }
1495
1496 if (lfp == NULL) {
1497 lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1498 if (lfp == NULL)
1499 goto out;
1500 (void) memset((void *)lfp, 0, sizeof (struct lockfs));
1501 }
1502
1503 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1504 lfp->lf_comment = elock_combuf;
1505
1506 if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1507 goto out;
1508
1509 /*
1510 * lint believes that the ioctl() (or any other function
1511 * taking lfp as an arg) could free lfp. This is not the
1512 * case, however.
1513 */
1514 retval = LOCKFS_IS_ELOCK(lfp);
1515
1516 out:
1517 return (retval);
1518 }
1519
1520 /*
1521 * Given a name which is known to be a directory, see if it appears
1522 * in the vfstab. If so, return the entry's block (special) device
1523 * field via devstr.
1524 */
1525 int
check_vfstab(caddr_t name,caddr_t devstr,size_t str_size)1526 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1527 {
1528 return (NULL != search_vfstab(name, NULL, devstr, str_size));
1529 }
1530
1531 /*
1532 * Given a name which is known to be a directory, see if it appears
1533 * in the mnttab. If so, return the entry's block (special) device
1534 * field via devstr.
1535 */
1536 int
check_mnttab(caddr_t name,caddr_t devstr,size_t str_size)1537 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1538 {
1539 return (NULL != search_mnttab(name, NULL, devstr, str_size));
1540 }
1541
1542 /*
1543 * Search for mount point and/or special device in the given file.
1544 * The first matching entry is returned.
1545 *
1546 * If an entry is found and str_size is greater than zero, then
1547 * up to size_str bytes of the special device name from the entry
1548 * are copied to devstr.
1549 */
1550
1551 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1552 st_nuller, st_init, st_searcher) \
1553 { \
1554 FILE *fp; \
1555 struct st_type *retval = NULL; \
1556 struct st_type key; \
1557 static struct st_type buffer; \
1558 \
1559 /* LINTED ``assigned value never used'' */ \
1560 st_nuller(&key); \
1561 key.st_mount = mountp; \
1562 key.st_special = special; \
1563 st_init; \
1564 \
1565 if ((fp = fopen(st_file, "r")) == NULL) \
1566 return (NULL); \
1567 \
1568 if (st_searcher(fp, &buffer, &key) == 0) { \
1569 retval = &buffer; \
1570 if (devstr != NULL && str_size > 0 && \
1571 buffer.st_special != NULL) { \
1572 (void) strlcpy(devstr, buffer.st_special, \
1573 str_size); \
1574 } \
1575 } \
1576 (void) fclose(fp); \
1577 return (retval); \
1578 }
1579
1580 static struct vfstab *
search_vfstab(caddr_t mountp,caddr_t special,caddr_t devstr,size_t str_size)1581 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1582 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1583 (retval = retval), getvfsany)
1584
1585 static struct mnttab *
1586 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1587 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1588 (key.mnt_fstype = MNTTYPE_UFS), getmntany)
1589
1590 int
1591 do_errorlock(int lock_type)
1592 {
1593 caddr_t buf;
1594 time_t now;
1595 struct tm *local;
1596 int rc;
1597
1598 if (elock_combuf == NULL)
1599 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1600 elock_mountp ? elock_mountp : "<null>",
1601 lock_type);
1602
1603 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1604 NULL) {
1605 errexit("Couldn't alloc memory for temp. lock status buffer\n");
1606 }
1607 if (lfp == NULL) {
1608 errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1609 elock_mountp, lock_type);
1610 }
1611
1612 (void) memmove((void *)buf, (void *)elock_combuf,
1613 LOCKFS_MAXCOMMENTLEN-1);
1614
1615 switch (lock_type) {
1616 case LOCKFS_ELOCK:
1617 /*
1618 * Note that if it is error-locked, we won't get an
1619 * error back if we try to error-lock it again.
1620 */
1621 if (time(&now) != (time_t)-1) {
1622 if ((local = localtime(&now)) != NULL)
1623 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1624 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1625 elock_combuf, (int)pid,
1626 local->tm_mon + 1, local->tm_mday,
1627 (local->tm_year % 100), local->tm_hour,
1628 local->tm_min, local->tm_sec);
1629 else
1630 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1631 "%s [fsck pid %d", elock_combuf, pid);
1632
1633 } else {
1634 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1635 "%s [fsck pid %d", elock_combuf, pid);
1636 }
1637 break;
1638
1639 case LOCKFS_ULOCK:
1640 if (time(&now) != (time_t)-1) {
1641 if ((local = localtime(&now)) != NULL) {
1642 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1643 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1644 elock_combuf,
1645 local->tm_mon + 1, local->tm_mday,
1646 (local->tm_year % 100), local->tm_hour,
1647 local->tm_min, local->tm_sec);
1648 } else {
1649 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1650 "%s]", elock_combuf);
1651 }
1652 } else {
1653 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1654 "%s]", elock_combuf);
1655 }
1656 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1657 pwarn("do_errorlock: unlock failed: %s\n",
1658 strerror(errno));
1659 goto out;
1660 }
1661 break;
1662
1663 default:
1664 break;
1665 }
1666
1667 (void) memmove((void *)elock_combuf, (void *)buf,
1668 LOCKFS_MAXCOMMENTLEN - 1);
1669
1670 lfp->lf_lock = lock_type;
1671 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1672 lfp->lf_comment = elock_combuf;
1673 lfp->lf_flags = 0;
1674 errno = 0;
1675
1676 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1677 if (errno == EINVAL) {
1678 pwarn("Another fsck active?\n");
1679 iscorrupt = 0; /* don't go away mad, just go away */
1680 } else {
1681 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1682 lock_type, elock_combuf, strerror(errno));
1683 }
1684 }
1685 out:
1686 if (buf != NULL) {
1687 free((void *)buf);
1688 }
1689
1690 return (rc != -1);
1691 }
1692
1693 /*
1694 * Shadow inode support. To register a shadow with a client is to note
1695 * that an inode (the client) refers to the shadow.
1696 */
1697
1698 static struct shadowclients *
newshadowclient(struct shadowclients * prev)1699 newshadowclient(struct shadowclients *prev)
1700 {
1701 struct shadowclients *rc;
1702
1703 rc = (struct shadowclients *)malloc(sizeof (*rc));
1704 if (rc == NULL)
1705 errexit("newshadowclient: cannot malloc shadow client");
1706 rc->next = prev;
1707 rc->nclients = 0;
1708
1709 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1710 maxshadowclients);
1711 if (rc->client == NULL)
1712 errexit("newshadowclient: cannot malloc client array");
1713 return (rc);
1714 }
1715
1716 void
registershadowclient(fsck_ino_t shadow,fsck_ino_t client,struct shadowclientinfo ** info)1717 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1718 struct shadowclientinfo **info)
1719 {
1720 struct shadowclientinfo *sci;
1721 struct shadowclients *scc;
1722
1723 /*
1724 * Already have a record for this shadow?
1725 */
1726 for (sci = *info; sci != NULL; sci = sci->next)
1727 if (sci->shadow == shadow)
1728 break;
1729 if (sci == NULL) {
1730 /*
1731 * It's a new shadow, add it to the list
1732 */
1733 sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1734 if (sci == NULL)
1735 errexit("registershadowclient: cannot malloc");
1736 sci->next = *info;
1737 *info = sci;
1738 sci->shadow = shadow;
1739 sci->totalClients = 0;
1740 sci->clients = newshadowclient(NULL);
1741 }
1742
1743 sci->totalClients++;
1744 scc = sci->clients;
1745 if (scc->nclients >= maxshadowclients) {
1746 scc = newshadowclient(sci->clients);
1747 sci->clients = scc;
1748 }
1749
1750 scc->client[scc->nclients++] = client;
1751 }
1752
1753 /*
1754 * Locate and discard a shadow.
1755 */
1756 void
clearshadow(fsck_ino_t shadow,struct shadowclientinfo ** info)1757 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1758 {
1759 struct shadowclientinfo *sci, *prev;
1760
1761 /*
1762 * Do we have a record for this shadow?
1763 */
1764 prev = NULL;
1765 for (sci = *info; sci != NULL; sci = sci->next) {
1766 if (sci->shadow == shadow)
1767 break;
1768 prev = sci;
1769 }
1770
1771 if (sci != NULL) {
1772 /*
1773 * First, pull it off the list, since we know there
1774 * shouldn't be any future references to this one.
1775 */
1776 if (prev == NULL)
1777 *info = sci->next;
1778 else
1779 prev->next = sci->next;
1780 deshadow(sci, clearattrref);
1781 }
1782 }
1783
1784 /*
1785 * Discard all memory used to track clients of a shadow.
1786 */
1787 void
deshadow(struct shadowclientinfo * sci,void (* cb)(fsck_ino_t))1788 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1789 {
1790 struct shadowclients *clients, *discard;
1791 int idx;
1792
1793 clients = sci->clients;
1794 while (clients != NULL) {
1795 discard = clients;
1796 clients = clients->next;
1797 if (discard->client != NULL) {
1798 if (cb != NULL) {
1799 for (idx = 0; idx < discard->nclients; idx++)
1800 (*cb)(discard->client[idx]);
1801 }
1802 free((void *)discard->client);
1803 }
1804 free((void *)discard);
1805 }
1806
1807 free((void *)sci);
1808 }
1809
1810 /*
1811 * Allocate more buffer as need arises but allocate one at a time.
1812 * This is done to make sure that fsck does not exit with error if it
1813 * needs more buffer to complete its task.
1814 */
1815 static struct bufarea *
alloc_bufarea(void)1816 alloc_bufarea(void)
1817 {
1818 struct bufarea *newbp;
1819 caddr_t bufp;
1820
1821 bufp = malloc((unsigned int)sblock.fs_bsize);
1822 if (bufp == NULL)
1823 return (NULL);
1824
1825 newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1826 if (newbp == NULL) {
1827 free((void *)bufp);
1828 return (NULL);
1829 }
1830
1831 initbarea(newbp);
1832 newbp->b_un.b_buf = bufp;
1833 newbp->b_prev = &bufhead;
1834 newbp->b_next = bufhead.b_next;
1835 bufhead.b_next->b_prev = newbp;
1836 bufhead.b_next = newbp;
1837 bufhead.b_size++;
1838 return (newbp);
1839 }
1840
1841 /*
1842 * We length-limit in both unrawname() and rawname() to avoid
1843 * overflowing our arrays or those of our naive, trusting callers.
1844 */
1845
1846 caddr_t
unrawname(caddr_t name)1847 unrawname(caddr_t name)
1848 {
1849 caddr_t dp;
1850 static char fullname[MAXPATHLEN + 1];
1851
1852 if ((dp = getfullblkname(name)) == NULL)
1853 return ("");
1854
1855 (void) strlcpy(fullname, dp, sizeof (fullname));
1856 /*
1857 * Not reporting under debug, as the allocation isn't
1858 * reported by getfullblkname. The idea is that we
1859 * produce balanced alloc/free instances.
1860 */
1861 free(dp);
1862
1863 return (fullname);
1864 }
1865
1866 caddr_t
rawname(caddr_t name)1867 rawname(caddr_t name)
1868 {
1869 caddr_t dp;
1870 static char fullname[MAXPATHLEN + 1];
1871
1872 if ((dp = getfullrawname(name)) == NULL)
1873 return ("");
1874
1875 (void) strlcpy(fullname, dp, sizeof (fullname));
1876 /*
1877 * Not reporting under debug, as the allocation isn't
1878 * reported by getfullblkname. The idea is that we
1879 * produce balanced alloc/free instances.
1880 */
1881 free(dp);
1882
1883 return (fullname);
1884 }
1885
1886 /*
1887 * Make sure that a cg header looks at least moderately reasonable.
1888 * We want to be able to trust the contents enough to be able to use
1889 * the standard accessor macros. So, besides looking at the obvious
1890 * such as the magic number, we verify that the offset field values
1891 * are properly aligned and not too big or small.
1892 *
1893 * Returns a NULL pointer if the cg is sane enough for our needs, else
1894 * a dynamically-allocated string describing all of its faults.
1895 */
1896 #define Append_Error(full, full_len, addition, addition_len) \
1897 if (full == NULL) { \
1898 full = addition; \
1899 full_len = addition_len; \
1900 } else { \
1901 /* lint doesn't think realloc() understands NULLs */ \
1902 full = realloc(full, full_len + addition_len + 1); \
1903 if (full == NULL) { \
1904 errexit("Out of memory in cg_sanity"); \
1905 /* NOTREACHED */ \
1906 } \
1907 (void) strcpy(full + full_len, addition); \
1908 full_len += addition_len; \
1909 free(addition); \
1910 }
1911
1912 caddr_t
cg_sanity(struct cg * cgp,int cgno)1913 cg_sanity(struct cg *cgp, int cgno)
1914 {
1915 caddr_t full_err;
1916 caddr_t this_err = NULL;
1917 int full_len, this_len;
1918 daddr32_t ndblk;
1919 daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1920 daddr32_t exp_freeoff, exp_nextfreeoff;
1921
1922 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1923 &exp_freeoff, &exp_nextfreeoff, &ndblk);
1924
1925 full_err = NULL;
1926 full_len = 0;
1927
1928 if (!cg_chkmagic(cgp)) {
1929 this_len = fsck_asprintf(&this_err,
1930 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1931 cgp->cg_magic, CG_MAGIC);
1932 Append_Error(full_err, full_len, this_err, this_len);
1933 }
1934
1935 if (cgp->cg_cgx != cgno) {
1936 this_len = fsck_asprintf(&this_err,
1937 "WRONG CG NUMBER (%d should be %d)\n",
1938 cgp->cg_cgx, cgno);
1939 Append_Error(full_err, full_len, this_err, this_len);
1940 }
1941
1942 if ((cgp->cg_btotoff & 3) != 0) {
1943 this_len = fsck_asprintf(&this_err,
1944 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1945 cgp->cg_btotoff);
1946 Append_Error(full_err, full_len, this_err, this_len);
1947 }
1948
1949 if ((cgp->cg_boff & 1) != 0) {
1950 this_len = fsck_asprintf(&this_err,
1951 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1952 cgp->cg_boff);
1953 Append_Error(full_err, full_len, this_err, this_len);
1954 }
1955
1956 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1957 if (cgp->cg_ncyl < 1) {
1958 this_len = fsck_asprintf(&this_err,
1959 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1960 cgp->cg_ncyl);
1961 } else {
1962 this_len = fsck_asprintf(&this_err,
1963 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1964 cgp->cg_ncyl, sblock.fs_cpg);
1965 }
1966 Append_Error(full_err, full_len, this_err, this_len);
1967 }
1968
1969 if (cgp->cg_niblk != sblock.fs_ipg) {
1970 this_len = fsck_asprintf(&this_err,
1971 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1972 cgp->cg_niblk, sblock.fs_ipg);
1973 Append_Error(full_err, full_len, this_err, this_len);
1974 }
1975
1976 if (cgp->cg_ndblk != ndblk) {
1977 this_len = fsck_asprintf(&this_err,
1978 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1979 cgp->cg_ndblk, ndblk);
1980 Append_Error(full_err, full_len, this_err, this_len);
1981 }
1982
1983 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1984 this_len = fsck_asprintf(&this_err,
1985 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1986 "(%d should be at least 0 and less than %d)\n",
1987 cgp->cg_rotor, ndblk);
1988 Append_Error(full_err, full_len, this_err, this_len);
1989 }
1990
1991 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1992 this_len = fsck_asprintf(&this_err,
1993 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1994 "(%d should be at least 0 and less than %d)\n",
1995 cgp->cg_frotor, ndblk);
1996 Append_Error(full_err, full_len, this_err, this_len);
1997 }
1998
1999 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2000 this_len = fsck_asprintf(&this_err,
2001 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
2002 "(%d should be at least 0 and less than %d)\n",
2003 cgp->cg_irotor, sblock.fs_ipg);
2004 Append_Error(full_err, full_len, this_err, this_len);
2005 }
2006
2007 if (cgp->cg_btotoff != exp_btotoff) {
2008 this_len = fsck_asprintf(&this_err,
2009 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2010 cgp->cg_btotoff, exp_btotoff);
2011 Append_Error(full_err, full_len, this_err, this_len);
2012 }
2013
2014 if (cgp->cg_boff != exp_boff) {
2015 this_len = fsck_asprintf(&this_err,
2016 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2017 cgp->cg_boff, exp_boff);
2018 Append_Error(full_err, full_len, this_err, this_len);
2019 }
2020
2021 if (cgp->cg_iusedoff != exp_iusedoff) {
2022 this_len = fsck_asprintf(&this_err,
2023 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2024 cgp->cg_iusedoff, exp_iusedoff);
2025 Append_Error(full_err, full_len, this_err, this_len);
2026 }
2027
2028 if (cgp->cg_freeoff != exp_freeoff) {
2029 this_len = fsck_asprintf(&this_err,
2030 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2031 cgp->cg_freeoff, exp_freeoff);
2032 Append_Error(full_err, full_len, this_err, this_len);
2033 }
2034
2035 if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2036 this_len = fsck_asprintf(&this_err,
2037 "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2038 cgp->cg_nextfreeoff, exp_nextfreeoff);
2039 Append_Error(full_err, full_len, this_err, this_len);
2040 }
2041
2042 return (full_err);
2043 }
2044
2045 #undef Append_Error
2046
2047 /*
2048 * This is taken from mkfs, and is what is used to come up with the
2049 * original values for a struct cg. This implies that, since these
2050 * are all constants, recalculating them now should give us the same
2051 * thing as what's on disk.
2052 */
2053 static void
cg_constants(int cgno,daddr32_t * btotoff,daddr32_t * boff,daddr32_t * iusedoff,daddr32_t * freeoff,daddr32_t * nextfreeoff,daddr32_t * ndblk)2054 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2055 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2056 daddr32_t *ndblk)
2057 {
2058 daddr32_t cbase, dmax;
2059 struct cg *cgp;
2060
2061 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2062 (size_t)sblock.fs_cgsize);
2063 cgp = cgblk.b_un.b_cg;
2064
2065 cbase = cgbase(&sblock, cgno);
2066 dmax = cbase + sblock.fs_fpg;
2067 if (dmax > sblock.fs_size)
2068 dmax = sblock.fs_size;
2069
2070 /* LINTED pointer difference won't overflow */
2071 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2072 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2073 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2074 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2075 *nextfreeoff = *freeoff +
2076 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2077 *ndblk = dmax - cbase;
2078 }
2079
2080 /*
2081 * Corrects all fields in the cg that can be done with the available
2082 * redundant data.
2083 */
2084 void
fix_cg(struct cg * cgp,int cgno)2085 fix_cg(struct cg *cgp, int cgno)
2086 {
2087 daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2088 daddr32_t exp_freeoff, exp_nextfreeoff;
2089 daddr32_t ndblk;
2090
2091 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2092 &exp_freeoff, &exp_nextfreeoff, &ndblk);
2093
2094 if (cgp->cg_cgx != cgno) {
2095 cgp->cg_cgx = cgno;
2096 }
2097
2098 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2099 if (cgno == (sblock.fs_ncg - 1)) {
2100 cgp->cg_ncyl = sblock.fs_ncyl -
2101 (sblock.fs_cpg * cgno);
2102 } else {
2103 cgp->cg_ncyl = sblock.fs_cpg;
2104 }
2105 }
2106
2107 if (cgp->cg_niblk != sblock.fs_ipg) {
2108 /*
2109 * This is not used by the kernel, so it's pretty
2110 * harmless if it's wrong.
2111 */
2112 cgp->cg_niblk = sblock.fs_ipg;
2113 }
2114
2115 if (cgp->cg_ndblk != ndblk) {
2116 cgp->cg_ndblk = ndblk;
2117 }
2118
2119 /*
2120 * For the rotors, any position's valid, so pick the one we know
2121 * will always exist.
2122 */
2123 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2124 cgp->cg_rotor = 0;
2125 }
2126
2127 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2128 cgp->cg_frotor = 0;
2129 }
2130
2131 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2132 cgp->cg_irotor = 0;
2133 }
2134
2135 /*
2136 * For btotoff and boff, if they're misaligned they won't
2137 * match the expected values, so we're catching both cases
2138 * here. Of course, if any of these are off, it seems likely
2139 * that the tables really won't be where we calculate they
2140 * should be anyway.
2141 */
2142 if (cgp->cg_btotoff != exp_btotoff) {
2143 cgp->cg_btotoff = exp_btotoff;
2144 }
2145
2146 if (cgp->cg_boff != exp_boff) {
2147 cgp->cg_boff = exp_boff;
2148 }
2149
2150 if (cgp->cg_iusedoff != exp_iusedoff) {
2151 cgp->cg_iusedoff = exp_iusedoff;
2152 }
2153
2154 if (cgp->cg_freeoff != exp_freeoff) {
2155 cgp->cg_freeoff = exp_freeoff;
2156 }
2157
2158 if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2159 cgp->cg_nextfreeoff = exp_nextfreeoff;
2160 }
2161
2162 /*
2163 * Reset the magic, as we've recreated this cg, also
2164 * update the cg_time, as we're writing out the cg
2165 */
2166 cgp->cg_magic = CG_MAGIC;
2167 cgp->cg_time = time(NULL);
2168
2169 /*
2170 * We know there was at least one correctable problem,
2171 * or else we wouldn't have been called. So instead of
2172 * marking the buffer dirty N times above, just do it
2173 * once here.
2174 */
2175 cgdirty();
2176 }
2177
2178 void
examinelog(void (* cb)(daddr32_t))2179 examinelog(void (*cb)(daddr32_t))
2180 {
2181 struct bufarea *bp;
2182 extent_block_t *ebp;
2183 extent_t *ep;
2184 daddr32_t nfno, fno;
2185 int i;
2186 int j;
2187
2188 /*
2189 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2190 * we need to translate accordingly using logbtodb()
2191 */
2192
2193 if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2194 if (debug) {
2195 (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2196 "Aborting log examination\n", \
2197 logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2198 }
2199 return;
2200 }
2201
2202 /*
2203 * Read errors will return zeros, which will cause us
2204 * to do nothing harmful, so don't need to handle it.
2205 */
2206 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2207 (size_t)sblock.fs_bsize);
2208 ebp = (void *)bp->b_un.b_buf;
2209
2210 /*
2211 * Does it look like a log allocation table?
2212 */
2213 /* LINTED pointer cast is aligned */
2214 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2215 sblock.fs_bsize))
2216 return;
2217 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2218 return;
2219
2220 ep = &ebp->extents[0];
2221 for (i = 0; i < ebp->nextents; ++i, ++ep) {
2222 fno = logbtofrag(&sblock, ep->pbno);
2223 nfno = dbtofsb(&sblock, ep->nbno);
2224 for (j = 0; j < nfno; ++j, ++fno) {
2225 /*
2226 * Invoke the callback first, so that pass1 can
2227 * mark the log blocks in-use. Then, if any
2228 * subsequent pass over the log shows us that a
2229 * block got freed (say, it was also claimed by
2230 * an inode that we cleared), we can safely declare
2231 * the log bad.
2232 */
2233 if (cb != NULL)
2234 (*cb)(fno);
2235 if (!testbmap(fno))
2236 islogok = 0;
2237 }
2238 }
2239 brelse(bp);
2240
2241 if (cb != NULL) {
2242 fno = logbtofrag(&sblock, sblock.fs_logbno);
2243 for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2244 (*cb)(fno);
2245 }
2246 }
2247
2248 static void
freelogblk(daddr32_t frag)2249 freelogblk(daddr32_t frag)
2250 {
2251 freeblk(sblock.fs_logbno, frag, 1);
2252 }
2253
2254 caddr_t
file_id(fsck_ino_t inum,mode_t mode)2255 file_id(fsck_ino_t inum, mode_t mode)
2256 {
2257 static char name[MAXPATHLEN + 1];
2258
2259 if (lfdir == inum) {
2260 return (lfname);
2261 }
2262
2263 if ((mode & IFMT) == IFDIR) {
2264 (void) strcpy(name, "DIR");
2265 } else if ((mode & IFMT) == IFATTRDIR) {
2266 (void) strcpy(name, "ATTR DIR");
2267 } else if ((mode & IFMT) == IFSHAD) {
2268 (void) strcpy(name, "ACL");
2269 } else {
2270 (void) strcpy(name, "FILE");
2271 }
2272
2273 return (name);
2274 }
2275
2276 /*
2277 * Simple initializer for inodesc structures, so users of only a few
2278 * fields don't have to worry about getting the right defaults for
2279 * everything out.
2280 */
2281 void
init_inodesc(struct inodesc * idesc)2282 init_inodesc(struct inodesc *idesc)
2283 {
2284 /*
2285 * Most fields should be zero, just hit the special cases.
2286 */
2287 (void) memset((void *)idesc, 0, sizeof (struct inodesc));
2288 idesc->id_fix = DONTKNOW;
2289 idesc->id_lbn = -1;
2290 idesc->id_truncto = -1;
2291 idesc->id_firsthole = -1;
2292 }
2293
2294 /*
2295 * Compare routine for tsearch(C) to use on ino_t instances.
2296 */
2297 int
ino_t_cmp(const void * left,const void * right)2298 ino_t_cmp(const void *left, const void *right)
2299 {
2300 const fsck_ino_t lino = (const fsck_ino_t)left;
2301 const fsck_ino_t rino = (const fsck_ino_t)right;
2302
2303 return (lino - rino);
2304 }
2305
2306 int
cgisdirty(void)2307 cgisdirty(void)
2308 {
2309 return (cgblk.b_dirty);
2310 }
2311
2312 void
cgflush(void)2313 cgflush(void)
2314 {
2315 flush(fswritefd, &cgblk);
2316 }
2317
2318 void
dirty(struct bufarea * bp)2319 dirty(struct bufarea *bp)
2320 {
2321 if (fswritefd < 0) {
2322 /*
2323 * No one should call dirty() in read only mode.
2324 * But if one does, it's not fatal issue. Just warn them.
2325 */
2326 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2327 } else {
2328 (bp)->b_dirty = 1;
2329 isdirty = 1;
2330 }
2331 }
2332
2333 void
initbarea(struct bufarea * bp)2334 initbarea(struct bufarea *bp)
2335 {
2336 (bp)->b_dirty = 0;
2337 (bp)->b_bno = (diskaddr_t)-1LL;
2338 (bp)->b_flags = 0;
2339 (bp)->b_cnt = 0;
2340 (bp)->b_errs = 0;
2341 }
2342
2343 /*
2344 * Partition-sizing routines adapted from ../newfs/newfs.c.
2345 * Needed because calcsb() needs to use mkfs to work out what the
2346 * superblock should be, and mkfs insists on being told how many
2347 * sectors to use.
2348 *
2349 * Error handling assumes we're never called while preening.
2350 *
2351 * XXX This should be extracted into a ../ufslib.{c,h},
2352 * in the same spirit to ../../fslib.{c,h}. Once that is
2353 * done, both fsck and newfs should be modified to link
2354 * against it.
2355 */
2356
2357 static int label_type;
2358
2359 #define LABEL_TYPE_VTOC 1
2360 #define LABEL_TYPE_EFI 2
2361 #define LABEL_TYPE_OTHER 3
2362
2363 #define MB (1024 * 1024)
2364 #define SECTORS_PER_TERABYTE (1LL << 31)
2365 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL
2366
2367 diskaddr_t
getdisksize(caddr_t disk,int fd)2368 getdisksize(caddr_t disk, int fd)
2369 {
2370 int rpm;
2371 struct dk_geom g;
2372 struct dk_cinfo ci;
2373 diskaddr_t actual_size;
2374
2375 /*
2376 * get_device_size() determines the actual size of the
2377 * device, and also the disk's attributes, such as geometry.
2378 */
2379 actual_size = get_device_size(fd, disk);
2380
2381 if (label_type == LABEL_TYPE_VTOC) {
2382 if (ioctl(fd, DKIOCGGEOM, &g)) {
2383 pwarn("%s: Unable to read Disk geometry", disk);
2384 return (0);
2385 }
2386 if (sblock.fs_nsect == 0)
2387 sblock.fs_nsect = g.dkg_nsect;
2388 if (sblock.fs_ntrak == 0)
2389 sblock.fs_ntrak = g.dkg_nhead;
2390 if (sblock.fs_rps == 0) {
2391 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2392 sblock.fs_rps = rpm / 60;
2393 }
2394 }
2395
2396 if (sblock.fs_bsize == 0)
2397 sblock.fs_bsize = MAXBSIZE;
2398
2399 /*
2400 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2401 * information is not available, default to the min of a MB and
2402 * maxphys.
2403 */
2404 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2405 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2406 if (sblock.fs_maxcontig < 0) {
2407 int gotit, maxphys;
2408
2409 gotit = fsgetmaxphys(&maxphys, NULL);
2410
2411 /*
2412 * If we cannot get the maxphys value, default
2413 * to ufs_maxmaxphys (MB).
2414 */
2415 if (gotit) {
2416 sblock.fs_maxcontig = MIN(maxphys, MB);
2417 } else {
2418 sblock.fs_maxcontig = MB;
2419 }
2420 }
2421 sblock.fs_maxcontig /= sblock.fs_bsize;
2422 }
2423
2424 return (actual_size);
2425 }
2426
2427 /*
2428 * Figure out how big the partition we're dealing with is.
2429 */
2430 static diskaddr_t
get_device_size(int fd,caddr_t name)2431 get_device_size(int fd, caddr_t name)
2432 {
2433 struct extvtoc vtoc;
2434 struct dk_gpt *efi_vtoc;
2435 diskaddr_t slicesize = 0;
2436
2437 int index = read_extvtoc(fd, &vtoc);
2438
2439 if (index >= 0) {
2440 label_type = LABEL_TYPE_VTOC;
2441 } else {
2442 if (index == VT_ENOTSUP || index == VT_ERROR) {
2443 /* it might be an EFI label */
2444 index = efi_alloc_and_read(fd, &efi_vtoc);
2445 if (index >= 0)
2446 label_type = LABEL_TYPE_EFI;
2447 }
2448 }
2449
2450 if (index < 0) {
2451 /*
2452 * Since both attempts to read the label failed, we're
2453 * going to fall back to a brute force approach to
2454 * determining the device's size: see how far out we can
2455 * perform reads on the device.
2456 */
2457
2458 slicesize = brute_force_get_device_size(fd);
2459 if (slicesize == 0) {
2460 switch (index) {
2461 case VT_ERROR:
2462 pwarn("%s: %s\n", name, strerror(errno));
2463 break;
2464 case VT_EIO:
2465 pwarn("%s: I/O error accessing VTOC", name);
2466 break;
2467 case VT_EINVAL:
2468 pwarn("%s: Invalid field in VTOC", name);
2469 break;
2470 default:
2471 pwarn("%s: unknown error %d accessing VTOC",
2472 name, index);
2473 break;
2474 }
2475 return (0);
2476 } else {
2477 label_type = LABEL_TYPE_OTHER;
2478 }
2479 }
2480
2481 if (label_type == LABEL_TYPE_EFI) {
2482 slicesize = efi_vtoc->efi_parts[index].p_size;
2483 efi_free(efi_vtoc);
2484 } else if (label_type == LABEL_TYPE_VTOC) {
2485 slicesize = vtoc.v_part[index].p_size;
2486 }
2487
2488 return (slicesize);
2489 }
2490
2491 /*
2492 * brute_force_get_device_size
2493 *
2494 * Determine the size of the device by seeing how far we can
2495 * read. Doing an llseek( , , SEEK_END) would probably work
2496 * in most cases, but we've seen at least one third-party driver
2497 * which doesn't correctly support the SEEK_END option when the
2498 * the device is greater than a terabyte.
2499 */
2500
2501 static diskaddr_t
brute_force_get_device_size(int fd)2502 brute_force_get_device_size(int fd)
2503 {
2504 diskaddr_t min_fail = 0;
2505 diskaddr_t max_succeed = 0;
2506 diskaddr_t cur_db_off;
2507 char buf[DEV_BSIZE];
2508
2509 /*
2510 * First, see if we can read the device at all, just to
2511 * eliminate errors that have nothing to do with the
2512 * device's size.
2513 */
2514
2515 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2516 ((read(fd, buf, DEV_BSIZE)) == -1))
2517 return (0); /* can't determine size */
2518
2519 /*
2520 * Now, go sequentially through the multiples of 4TB
2521 * to find the first read that fails (this isn't strictly
2522 * the most efficient way to find the actual size if the
2523 * size really could be anything between 0 and 2**64 bytes.
2524 * We expect the sizes to be less than 16 TB for some time,
2525 * so why do a bunch of reads that are larger than that?
2526 * However, this algorithm *will* work for sizes of greater
2527 * than 16 TB. We're just not optimizing for those sizes.)
2528 */
2529
2530 /*
2531 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2532 * We're using > 32-bit constants here. Therefore, its flow
2533 * analysis is wrong. For the time being, ignore complaints
2534 * from it about the body of the for() being unreached.
2535 */
2536 for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2537 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2538 cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2539 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2540 SEEK_SET) == -1) ||
2541 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2542 min_fail = cur_db_off;
2543 else
2544 max_succeed = cur_db_off;
2545 }
2546
2547 /*
2548 * XXX Same lint flow analysis problem as above.
2549 */
2550 if (min_fail == 0)
2551 return (0);
2552
2553 /*
2554 * We now know that the size of the device is less than
2555 * min_fail and greater than or equal to max_succeed. Now
2556 * keep splitting the difference until the actual size in
2557 * sectors in known. We also know that the difference
2558 * between max_succeed and min_fail at this time is
2559 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2560 * simplifies the math below.
2561 */
2562
2563 while (min_fail - max_succeed > 1) {
2564 cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2565 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2566 SEEK_SET)) == -1) ||
2567 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2568 min_fail = cur_db_off;
2569 else
2570 max_succeed = cur_db_off;
2571 }
2572
2573 /* the size is the last successfully read sector offset plus one */
2574 return (max_succeed + 1);
2575 }
2576
2577 static void
vfileerror(fsck_ino_t cwd,fsck_ino_t ino,caddr_t fmt,va_list ap)2578 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2579 {
2580 struct dinode *dp;
2581 char pathbuf[MAXPATHLEN + 1];
2582
2583 vpwarn(fmt, ap);
2584 (void) putchar(' ');
2585 pinode(ino);
2586 (void) printf("\n");
2587 getpathname(pathbuf, cwd, ino);
2588 if (ino < UFSROOTINO || ino > maxino) {
2589 pfatal("NAME=%s\n", pathbuf);
2590 return;
2591 }
2592 dp = ginode(ino);
2593 if (ftypeok(dp))
2594 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2595 else
2596 pfatal("NAME=%s\n", pathbuf);
2597 }
2598
2599 void
direrror(fsck_ino_t ino,caddr_t fmt,...)2600 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2601 {
2602 va_list ap;
2603
2604 va_start(ap, fmt);
2605 vfileerror(ino, ino, fmt, ap);
2606 va_end(ap);
2607 }
2608
2609 static void
vdirerror(fsck_ino_t ino,caddr_t fmt,va_list ap)2610 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2611 {
2612 vfileerror(ino, ino, fmt, ap);
2613 }
2614
2615 void
fileerror(fsck_ino_t cwd,fsck_ino_t ino,caddr_t fmt,...)2616 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2617 {
2618 va_list ap;
2619
2620 va_start(ap, fmt);
2621 vfileerror(cwd, ino, fmt, ap);
2622 va_end(ap);
2623 }
2624
2625 /*
2626 * Adds the given inode to the orphaned-directories list, limbo_dirs.
2627 * Assumes that the caller has set INCLEAR in the inode's statemap[]
2628 * entry.
2629 *
2630 * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2631 * meaning it's effectively an orphan. It needs to be noted now, so
2632 * it will be remembered in pass 4.
2633 */
2634
2635 void
add_orphan_dir(fsck_ino_t ino)2636 add_orphan_dir(fsck_ino_t ino)
2637 {
2638 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2639 errexit("add_orphan_dir: out of memory");
2640 }
2641
2642 /*
2643 * Remove an inode from the orphaned-directories list, presumably
2644 * because it's been cleared.
2645 */
2646 void
remove_orphan_dir(fsck_ino_t ino)2647 remove_orphan_dir(fsck_ino_t ino)
2648 {
2649 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2650 }
2651
2652 /*
2653 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2654 * and lufs.c:checksum().
2655 */
2656 static void
log_setsum(int32_t * sp,int32_t * lp,int nb)2657 log_setsum(int32_t *sp, int32_t *lp, int nb)
2658 {
2659 int32_t csum = 0;
2660
2661 *sp = 0;
2662 nb /= sizeof (int32_t);
2663 while (nb--)
2664 csum += *lp++;
2665 *sp = csum;
2666 }
2667
2668 static int
log_checksum(int32_t * sp,int32_t * lp,int nb)2669 log_checksum(int32_t *sp, int32_t *lp, int nb)
2670 {
2671 int32_t ssum = *sp;
2672
2673 log_setsum(sp, lp, nb);
2674 if (ssum != *sp) {
2675 *sp = ssum;
2676 return (0);
2677 }
2678 return (1);
2679 }
2680