1 /*
2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
6 /* All Rights Reserved */
7
8 /*
9 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms are permitted
13 * provided that: (1) source distributions retain this entire copyright
14 * notice and comment, and (2) distributions including binaries display
15 * the following acknowledgement: ``This product includes software
16 * developed by the University of California, Berkeley and its contributors''
17 * in the documentation or other materials provided with the distribution
18 * and in all advertising materials mentioning features or use of this
19 * software. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <stdarg.h>
31 #include <libadm.h>
32 #include <note.h>
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/mntent.h>
36 #include <sys/filio.h>
37 #include <sys/fs/ufs_fs.h>
38 #include <sys/vnode.h>
39 #include <sys/fs/ufs_acl.h>
40 #include <sys/fs/ufs_inode.h>
41 #include <sys/fs/ufs_log.h>
42 #define _KERNEL
43 #include <sys/fs/ufs_fsdir.h>
44 #undef _KERNEL
45 #include <sys/mnttab.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <fcntl.h>
49 #include <signal.h>
50 #include <string.h>
51 #include <ctype.h>
52 #include <sys/vfstab.h>
53 #include <sys/lockfs.h>
54 #include <errno.h>
55 #include <sys/cmn_err.h>
56 #include <sys/dkio.h>
57 #include <sys/vtoc.h>
58 #include <sys/efi_partition.h>
59 #include <fslib.h>
60 #include <inttypes.h>
61 #include "fsck.h"
62
63 caddr_t mount_point = NULL;
64
65 static int64_t diskreads, totalreads; /* Disk cache statistics */
66
67 static int log_checksum(int32_t *, int32_t *, int);
68 static void vdirerror(fsck_ino_t, caddr_t, va_list);
69 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
70 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
71 static void vpwarn(caddr_t, va_list);
72 static int getaline(FILE *, caddr_t, int);
73 static struct bufarea *alloc_bufarea(void);
74 static void rwerror(caddr_t, diskaddr_t, int rval);
75 static void debugclean(void);
76 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
77 static void freelogblk(daddr32_t);
78 static void verrexit(caddr_t, va_list);
79 static void vpfatal(caddr_t, va_list);
80 static diskaddr_t get_device_size(int, caddr_t);
81 static diskaddr_t brute_force_get_device_size(int);
82 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
83 daddr32_t *, daddr32_t *, daddr32_t *);
84
85 int
ftypeok(struct dinode * dp)86 ftypeok(struct dinode *dp)
87 {
88 switch (dp->di_mode & IFMT) {
89
90 case IFDIR:
91 case IFREG:
92 case IFBLK:
93 case IFCHR:
94 case IFLNK:
95 case IFSOCK:
96 case IFIFO:
97 case IFSHAD:
98 case IFATTRDIR:
99 return (1);
100
101 default:
102 if (debug)
103 (void) printf("bad file type 0%o\n", dp->di_mode);
104 return (0);
105 }
106 }
107
108 int
acltypeok(struct dinode * dp)109 acltypeok(struct dinode *dp)
110 {
111 if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
112 return (1);
113
114 if (debug)
115 (void) printf("bad file type for acl I=%d: 0%o\n",
116 dp->di_shadow, dp->di_mode);
117 return (0);
118 }
119
120 NOTE(PRINTFLIKE(1))
121 int
reply(caddr_t fmt,...)122 reply(caddr_t fmt, ...)
123 {
124 va_list ap;
125 char line[80];
126
127 if (preen)
128 pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
129
130 if (mflag) {
131 /*
132 * We don't know what's going on, so don't potentially
133 * make things worse by having errexit() write stuff
134 * out to disk.
135 */
136 (void) printf(
137 "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
138 devname);
139 exit(EXERRFATAL);
140 }
141
142 va_start(ap, fmt);
143 (void) putchar('\n');
144 (void) vprintf(fmt, ap);
145 (void) putchar('?');
146 (void) putchar(' ');
147 va_end(ap);
148
149 if (nflag || fswritefd < 0) {
150 (void) printf(" no\n\n");
151 return (0);
152 }
153 if (yflag) {
154 (void) printf(" yes\n\n");
155 return (1);
156 }
157 (void) fflush(stdout);
158 if (getaline(stdin, line, sizeof (line)) == EOF)
159 errexit("\n");
160 (void) printf("\n");
161 if (line[0] == 'y' || line[0] == 'Y') {
162 return (1);
163 } else {
164 return (0);
165 }
166 }
167
168 int
getaline(FILE * fp,caddr_t loc,int maxlen)169 getaline(FILE *fp, caddr_t loc, int maxlen)
170 {
171 int n;
172 caddr_t p, lastloc;
173
174 p = loc;
175 lastloc = &p[maxlen-1];
176 while ((n = getc(fp)) != '\n') {
177 if (n == EOF)
178 return (EOF);
179 if (!isspace(n) && p < lastloc)
180 *p++ = (char)n;
181 }
182 *p = '\0';
183 /* LINTED pointer difference won't overflow */
184 return (p - loc);
185 }
186
187 /*
188 * Malloc buffers and set up cache.
189 */
190 void
bufinit(void)191 bufinit(void)
192 {
193 struct bufarea *bp;
194 int bufcnt, i;
195 caddr_t bufp;
196
197 bufp = malloc((size_t)sblock.fs_bsize);
198 if (bufp == NULL)
199 goto nomem;
200 initbarea(&cgblk);
201 cgblk.b_un.b_buf = bufp;
202 bufhead.b_next = bufhead.b_prev = &bufhead;
203 bufcnt = MAXBUFSPACE / sblock.fs_bsize;
204 if (bufcnt < MINBUFS)
205 bufcnt = MINBUFS;
206 for (i = 0; i < bufcnt; i++) {
207 bp = (struct bufarea *)malloc(sizeof (struct bufarea));
208 if (bp == NULL) {
209 if (i >= MINBUFS)
210 goto noalloc;
211 goto nomem;
212 }
213
214 bufp = malloc((size_t)sblock.fs_bsize);
215 if (bufp == NULL) {
216 free((void *)bp);
217 if (i >= MINBUFS)
218 goto noalloc;
219 goto nomem;
220 }
221 initbarea(bp);
222 bp->b_un.b_buf = bufp;
223 bp->b_prev = &bufhead;
224 bp->b_next = bufhead.b_next;
225 bufhead.b_next->b_prev = bp;
226 bufhead.b_next = bp;
227 }
228 noalloc:
229 bufhead.b_size = i; /* save number of buffers */
230 pbp = pdirbp = NULL;
231 return;
232
233 nomem:
234 errexit("cannot allocate buffer pool\n");
235 /* NOTREACHED */
236 }
237
238 /*
239 * Undo a bufinit().
240 */
241 void
unbufinit(void)242 unbufinit(void)
243 {
244 int cnt;
245 struct bufarea *bp, *nbp;
246
247 cnt = 0;
248 for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
249 cnt++;
250 flush(fswritefd, bp);
251 nbp = bp->b_prev;
252 /*
253 * We're discarding the entire chain, so this isn't
254 * technically necessary. However, it doesn't hurt
255 * and lint's data flow analysis is much happier
256 * (this prevents it from thinking there's a chance
257 * of our using memory elsewhere after it's been released).
258 */
259 nbp->b_next = bp->b_next;
260 bp->b_next->b_prev = nbp;
261 free((void *)bp->b_un.b_buf);
262 free((void *)bp);
263 }
264
265 if (bufhead.b_size != cnt)
266 errexit("Panic: cache lost %d buffers\n",
267 bufhead.b_size - cnt);
268 }
269
270 /*
271 * Manage a cache of directory blocks.
272 */
273 struct bufarea *
getdatablk(daddr32_t blkno,size_t size)274 getdatablk(daddr32_t blkno, size_t size)
275 {
276 struct bufarea *bp;
277
278 for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
279 if (bp->b_bno == fsbtodb(&sblock, blkno)) {
280 goto foundit;
281 }
282 for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
283 if ((bp->b_flags & B_INUSE) == 0)
284 break;
285 if (bp == &bufhead) {
286 bp = alloc_bufarea();
287 if (bp == NULL) {
288 errexit("deadlocked buffer pool\n");
289 /* NOTREACHED */
290 }
291 }
292 /*
293 * We're at the same logical level as getblk(), so if there
294 * are any errors, we'll let our caller handle them.
295 */
296 diskreads++;
297 (void) getblk(bp, blkno, size);
298
299 foundit:
300 totalreads++;
301 bp->b_cnt++;
302 /*
303 * Move the buffer to head of linked list if it isn't
304 * already there.
305 */
306 if (bufhead.b_next != bp) {
307 bp->b_prev->b_next = bp->b_next;
308 bp->b_next->b_prev = bp->b_prev;
309 bp->b_prev = &bufhead;
310 bp->b_next = bufhead.b_next;
311 bufhead.b_next->b_prev = bp;
312 bufhead.b_next = bp;
313 }
314 bp->b_flags |= B_INUSE;
315 return (bp);
316 }
317
318 void
brelse(struct bufarea * bp)319 brelse(struct bufarea *bp)
320 {
321 bp->b_cnt--;
322 if (bp->b_cnt == 0) {
323 bp->b_flags &= ~B_INUSE;
324 }
325 }
326
327 struct bufarea *
getblk(struct bufarea * bp,daddr32_t blk,size_t size)328 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
329 {
330 diskaddr_t dblk;
331
332 dblk = fsbtodb(&sblock, blk);
333 if (bp->b_bno == dblk)
334 return (bp);
335 flush(fswritefd, bp);
336 bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
337 bp->b_bno = dblk;
338 bp->b_size = size;
339 return (bp);
340 }
341
342 void
flush(int fd,struct bufarea * bp)343 flush(int fd, struct bufarea *bp)
344 {
345 int i, j;
346 caddr_t sip;
347 long size;
348
349 if (!bp->b_dirty)
350 return;
351
352 /*
353 * It's not our buf, so if there are errors, let whoever
354 * acquired it deal with the actual problem.
355 */
356 if (bp->b_errs != 0)
357 pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
358 bp->b_dirty = 0;
359 bp->b_errs = 0;
360 bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
361 if (bp != &sblk) {
362 return;
363 }
364
365 /*
366 * We're flushing the superblock, so make sure all the
367 * ancillary bits go out as well.
368 */
369 sip = (caddr_t)sblock.fs_u.fs_csp;
370 for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
371 size = sblock.fs_cssize - i < sblock.fs_bsize ?
372 sblock.fs_cssize - i : sblock.fs_bsize;
373 bwrite(fswritefd, sip,
374 fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
375 size);
376 sip += size;
377 }
378 }
379
380 static void
rwerror(caddr_t mesg,diskaddr_t blk,int rval)381 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
382 {
383 int olderr = errno;
384
385 if (!preen)
386 (void) printf("\n");
387
388 if (rval == -1)
389 pfatal("CANNOT %s: DISK BLOCK %lld: %s",
390 mesg, blk, strerror(olderr));
391 else
392 pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
393
394 if (reply("CONTINUE") == 0) {
395 exitstat = EXERRFATAL;
396 errexit("Program terminated\n");
397 }
398 }
399
400 void
ckfini(void)401 ckfini(void)
402 {
403 int64_t percentage;
404
405 if (fswritefd < 0)
406 return;
407
408 flush(fswritefd, &sblk);
409 /*
410 * Were we using a backup superblock?
411 */
412 if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
413 if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
414 sblk.b_bno = SBOFF / dev_bsize;
415 sbdirty();
416 flush(fswritefd, &sblk);
417 }
418 }
419 flush(fswritefd, &cgblk);
420 if (cgblk.b_un.b_buf != NULL) {
421 free((void *)cgblk.b_un.b_buf);
422 cgblk.b_un.b_buf = NULL;
423 }
424 unbufinit();
425 pbp = NULL;
426 pdirbp = NULL;
427 if (debug) {
428 /*
429 * Note that we only count cache-related reads.
430 * Anything that called fsck_bread() or getblk()
431 * directly are explicitly not cached, so they're not
432 * included here.
433 */
434 if (totalreads != 0)
435 percentage = diskreads * 100 / totalreads;
436 else
437 percentage = 0;
438
439 (void) printf("cache missed %lld of %lld reads (%lld%%)\n",
440 (longlong_t)diskreads, (longlong_t)totalreads,
441 (longlong_t)percentage);
442 }
443
444 (void) close(fsreadfd);
445 (void) close(fswritefd);
446 fsreadfd = -1;
447 fswritefd = -1;
448 }
449
450 int
fsck_bread(int fd,caddr_t buf,diskaddr_t blk,size_t size)451 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
452 {
453 caddr_t cp;
454 int i;
455 int errs;
456 offset_t offset = ldbtob(blk);
457 offset_t addr;
458
459 /*
460 * In our universe, nothing exists before the superblock, so
461 * just pretend it's always zeros. This is the complement of
462 * bwrite()'s ignoring write requests into that space.
463 */
464 if (blk < SBLOCK) {
465 if (debug)
466 (void) printf(
467 "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
468 SBLOCK, (longlong_t)blk);
469 (void) memset(buf, 0, (size_t)size);
470 return (1);
471 }
472
473 if (llseek(fd, offset, SEEK_SET) < 0) {
474 rwerror("SEEK", blk, -1);
475 }
476
477 if ((i = read(fd, buf, size)) == size) {
478 return (0);
479 }
480 rwerror("READ", blk, i);
481 if (llseek(fd, offset, SEEK_SET) < 0) {
482 rwerror("SEEK", blk, -1);
483 }
484 errs = 0;
485 (void) memset(buf, 0, (size_t)size);
486 pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
487 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
488 addr = ldbtob(blk + i);
489 if (llseek(fd, addr, SEEK_SET) < 0 ||
490 read(fd, cp, (int)secsize) < 0) {
491 iscorrupt = 1;
492 (void) printf(" %llu", blk + (u_longlong_t)i);
493 errs++;
494 }
495 }
496 (void) printf("\n");
497 return (errs);
498 }
499
500 void
bwrite(int fd,caddr_t buf,diskaddr_t blk,int64_t size)501 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
502 {
503 int i;
504 int n;
505 caddr_t cp;
506 offset_t offset = ldbtob(blk);
507 offset_t addr;
508
509 if (fd < 0)
510 return;
511 if (blk < SBLOCK) {
512 if (debug)
513 (void) printf(
514 "WARNING: Attempt to write illegal blkno %lld on %s\n",
515 (longlong_t)blk, devname);
516 return;
517 }
518 if (llseek(fd, offset, SEEK_SET) < 0) {
519 rwerror("SEEK", blk, -1);
520 }
521 if ((i = write(fd, buf, (int)size)) == size) {
522 fsmodified = 1;
523 return;
524 }
525 rwerror("WRITE", blk, i);
526 if (llseek(fd, offset, SEEK_SET) < 0) {
527 rwerror("SEEK", blk, -1);
528 }
529 pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
530 for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
531 n = 0;
532 addr = ldbtob(blk + i);
533 if (llseek(fd, addr, SEEK_SET) < 0 ||
534 (n = write(fd, cp, DEV_BSIZE)) < 0) {
535 iscorrupt = 1;
536 (void) printf(" %llu", blk + (u_longlong_t)i);
537 } else if (n > 0) {
538 fsmodified = 1;
539 }
540
541 }
542 (void) printf("\n");
543 }
544
545 /*
546 * Allocates the specified number of contiguous fragments.
547 */
548 daddr32_t
allocblk(int wantedfrags)549 allocblk(int wantedfrags)
550 {
551 int block, leadfrag, tailfrag;
552 daddr32_t selected;
553 size_t size;
554 struct bufarea *bp;
555
556 /*
557 * It's arguable whether we should just fail, or instead
558 * error out here. Since we should only ever be asked for
559 * a single fragment or an entire block (i.e., sblock.fs_frag),
560 * we'll fail out because anything else means somebody
561 * changed code without considering all of the ramifications.
562 */
563 if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
564 exitstat = EXERRFATAL;
565 errexit("allocblk() asked for %d frags. "
566 "Legal range is 1 to %d",
567 wantedfrags, sblock.fs_frag);
568 }
569
570 /*
571 * For each filesystem block, look at every possible starting
572 * offset within the block such that we can get the number of
573 * contiguous fragments that we need. This is a drastically
574 * simplified version of the kernel's mapsearch() and alloc*().
575 * It's also correspondingly slower.
576 */
577 for (block = 0; block < maxfsblock - sblock.fs_frag;
578 block += sblock.fs_frag) {
579 for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
580 leadfrag++) {
581 /*
582 * Is first fragment of candidate run available?
583 */
584 if (testbmap(block + leadfrag))
585 continue;
586 /*
587 * Are the rest of them available?
588 */
589 for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
590 if (testbmap(block + leadfrag + tailfrag))
591 break;
592 if (tailfrag < wantedfrags) {
593 /*
594 * No, skip the known-unusable run.
595 */
596 leadfrag += tailfrag;
597 continue;
598 }
599 /*
600 * Found what we need, so claim them.
601 */
602 for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
603 setbmap(block + leadfrag + tailfrag);
604 n_blks += wantedfrags;
605 size = wantedfrags * sblock.fs_fsize;
606 selected = block + leadfrag;
607 bp = getdatablk(selected, size);
608 (void) memset((void *)bp->b_un.b_buf, 0, size);
609 dirty(bp);
610 brelse(bp);
611 if (debug)
612 (void) printf(
613 "allocblk: selected %d (in block %d), frags %d, size %d\n",
614 selected, selected % sblock.fs_bsize,
615 wantedfrags, (int)size);
616 return (selected);
617 }
618 }
619 return (0);
620 }
621
622 /*
623 * Free a previously allocated block
624 */
625 void
freeblk(fsck_ino_t ino,daddr32_t blkno,int frags)626 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
627 {
628 struct inodesc idesc;
629
630 if (debug)
631 (void) printf("debug: freeing %d fragments starting at %d\n",
632 frags, blkno);
633
634 init_inodesc(&idesc);
635
636 idesc.id_number = ino;
637 idesc.id_blkno = blkno;
638 idesc.id_numfrags = frags;
639 idesc.id_truncto = -1;
640
641 /*
642 * Nothing in the return status has any relevance to how
643 * we're using pass4check(), so just ignore it.
644 */
645 (void) pass4check(&idesc);
646 }
647
648 /*
649 * Fill NAMEBUF with a path starting in CURDIR for INO. Assumes
650 * that the given buffer is at least MAXPATHLEN + 1 characters.
651 */
652 void
getpathname(caddr_t namebuf,fsck_ino_t curdir,fsck_ino_t ino)653 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
654 {
655 int len;
656 caddr_t cp;
657 struct dinode *dp;
658 struct inodesc idesc;
659 struct inoinfo *inp;
660
661 if (debug)
662 (void) printf("debug: getpathname(curdir %d, ino %d)\n",
663 curdir, ino);
664
665 if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
666 (void) strcpy(namebuf, "?");
667 return;
668 }
669
670 if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
671 (void) strcpy(namebuf, "/");
672 return;
673 }
674
675 init_inodesc(&idesc);
676 idesc.id_type = DATA;
677 cp = &namebuf[MAXPATHLEN - 1];
678 *cp = '\0';
679
680 /*
681 * In the case of extended attributes, our
682 * parent won't necessarily be a directory, so just
683 * return what we've found with a prefix indicating
684 * that it's an XATTR. Presumably our caller will
685 * know what's going on and do something useful, like
686 * work out the path of the parent and then combine
687 * the two names.
688 *
689 * Can't use strcpy(), etc, because we've probably
690 * already got some name information in the buffer and
691 * the usual trailing \0 would lose it.
692 */
693 dp = ginode(curdir);
694 if ((dp->di_mode & IFMT) == IFATTRDIR) {
695 idesc.id_number = curdir;
696 idesc.id_parent = ino;
697 idesc.id_func = findname;
698 idesc.id_name = namebuf;
699 idesc.id_fix = NOFIX;
700 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
701 *cp-- = '?';
702 }
703
704 len = sizeof (XATTR_DIR_NAME) - 1;
705 cp -= len;
706 (void) memmove(cp, XATTR_DIR_NAME, len);
707 goto attrname;
708 }
709
710 /*
711 * If curdir == ino, need to get a handle on .. so we
712 * can search it for ino's name. Otherwise, just search
713 * the given directory for ino. Repeat until out of space
714 * or a full path has been built.
715 */
716 if (curdir != ino) {
717 idesc.id_parent = curdir;
718 goto namelookup;
719 }
720 while (ino != UFSROOTINO && ino != 0) {
721 idesc.id_number = ino;
722 idesc.id_func = findino;
723 idesc.id_name = "..";
724 idesc.id_fix = NOFIX;
725 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
726 inp = getinoinfo(ino);
727 if ((inp == NULL) || (inp->i_parent == 0)) {
728 break;
729 }
730 idesc.id_parent = inp->i_parent;
731 }
732
733 /*
734 * To get this far, id_parent must have the inode
735 * number for `..' in it. By definition, that's got
736 * to be a directory, so search it for the inode of
737 * interest.
738 */
739 namelookup:
740 idesc.id_number = idesc.id_parent;
741 idesc.id_parent = ino;
742 idesc.id_func = findname;
743 idesc.id_name = namebuf;
744 idesc.id_fix = NOFIX;
745 if ((ckinode(ginode(idesc.id_number),
746 &idesc, CKI_TRAVERSE) & FOUND) == 0) {
747 break;
748 }
749 /*
750 * Prepend to what we've accumulated so far. If
751 * there's not enough room for even one more path element
752 * (of the worst-case length), then bail out.
753 */
754 len = strlen(namebuf);
755 cp -= len;
756 if (cp < &namebuf[MAXNAMLEN])
757 break;
758 (void) memmove(cp, namebuf, len);
759 *--cp = '/';
760
761 /*
762 * Corner case for a looped-to-itself directory.
763 */
764 if (ino == idesc.id_number)
765 break;
766
767 /*
768 * Climb one level of the hierarchy. In other words,
769 * the current .. becomes the inode to search for and
770 * its parent becomes the directory to search in.
771 */
772 ino = idesc.id_number;
773 }
774
775 /*
776 * If we hit a discontinuity in the hierarchy, indicate it by
777 * prefixing the path so far with `?'. Otherwise, the first
778 * character will be `/' as a side-effect of the *--cp above.
779 *
780 * The special case is to handle the situation where we're
781 * trying to look something up in UFSROOTINO, but didn't find
782 * it.
783 */
784 if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
785 if (cp > namebuf)
786 cp--;
787 *cp = '?';
788 }
789
790 /*
791 * The invariants being used for buffer integrity are:
792 * - namebuf[] is terminated with \0 before anything else
793 * - cp is always <= the last element of namebuf[]
794 * - the new path element is always stored at the
795 * beginning of namebuf[], and is no more than MAXNAMLEN-1
796 * characters
797 * - cp is is decremented by the number of characters in
798 * the new path element
799 * - if, after the above accounting for the new element's
800 * size, there is no longer enough room at the beginning of
801 * namebuf[] for a full-sized path element and a slash,
802 * terminate the loop. cp is in the range
803 * &namebuf[0]..&namebuf[MAXNAMLEN - 1]
804 */
805 attrname:
806 /* LINTED per the above discussion */
807 (void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
808 }
809
810 /* ARGSUSED */
811 void
catch(int dummy)812 catch(int dummy)
813 {
814 ckfini();
815 exit(EXSIGNAL);
816 }
817
818 /*
819 * When preening, allow a single quit to signal
820 * a special exit after filesystem checks complete
821 * so that reboot sequence may be interrupted.
822 */
823 /* ARGSUSED */
824 void
catchquit(int dummy)825 catchquit(int dummy)
826 {
827 (void) printf("returning to single-user after filesystem check\n");
828 interrupted = 1;
829 (void) signal(SIGQUIT, SIG_DFL);
830 }
831
832
833 /*
834 * determine whether an inode should be fixed.
835 */
836 NOTE(PRINTFLIKE(2))
837 int
dofix(struct inodesc * idesc,caddr_t msg,...)838 dofix(struct inodesc *idesc, caddr_t msg, ...)
839 {
840 int rval = 0;
841 va_list ap;
842
843 va_start(ap, msg);
844
845 switch (idesc->id_fix) {
846
847 case DONTKNOW:
848 if (idesc->id_type == DATA)
849 vdirerror(idesc->id_number, msg, ap);
850 else
851 vpwarn(msg, ap);
852 if (preen) {
853 idesc->id_fix = FIX;
854 rval = ALTERED;
855 break;
856 }
857 if (reply("SALVAGE") == 0) {
858 idesc->id_fix = NOFIX;
859 break;
860 }
861 idesc->id_fix = FIX;
862 rval = ALTERED;
863 break;
864
865 case FIX:
866 rval = ALTERED;
867 break;
868
869 case NOFIX:
870 break;
871
872 default:
873 errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
874 }
875
876 va_end(ap);
877 return (rval);
878 }
879
880 NOTE(PRINTFLIKE(1))
881 void
errexit(caddr_t fmt,...)882 errexit(caddr_t fmt, ...)
883 {
884 va_list ap;
885
886 va_start(ap, fmt);
887 verrexit(fmt, ap);
888 /* NOTREACHED */
889 }
890
891 NOTE(PRINTFLIKE(1))
892 static void
verrexit(caddr_t fmt,va_list ap)893 verrexit(caddr_t fmt, va_list ap)
894 {
895 static int recursing = 0;
896
897 if (!recursing) {
898 recursing = 1;
899 if (errorlocked || iscorrupt) {
900 if (havesb && fswritefd >= 0) {
901 sblock.fs_clean = FSBAD;
902 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
903 sblock.fs_state = -sblock.fs_state;
904 sbdirty();
905 write_altsb(fswritefd);
906 flush(fswritefd, &sblk);
907 }
908 }
909 ckfini();
910 recursing = 0;
911 }
912 (void) vprintf(fmt, ap);
913 if (fmt[strlen(fmt) - 1] != '\n')
914 (void) putchar('\n');
915 exit((exitstat != 0) ? exitstat : EXERRFATAL);
916 }
917
918 /*
919 * An unexpected inconsistency occured.
920 * Die if preening, otherwise just print message and continue.
921 */
922 NOTE(PRINTFLIKE(1))
923 void
pfatal(caddr_t fmt,...)924 pfatal(caddr_t fmt, ...)
925 {
926 va_list ap;
927
928 va_start(ap, fmt);
929 vpfatal(fmt, ap);
930 va_end(ap);
931 }
932
933 NOTE(PRINTFLIKE(1))
934 static void
vpfatal(caddr_t fmt,va_list ap)935 vpfatal(caddr_t fmt, va_list ap)
936 {
937 if (preen) {
938 if (*fmt != '\0') {
939 (void) printf("%s: ", devname);
940 (void) vprintf(fmt, ap);
941 (void) printf("\n");
942 }
943 (void) printf(
944 "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
945 devname);
946 if (havesb && fswritefd >= 0) {
947 sblock.fs_clean = FSBAD;
948 sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
949 sbdirty();
950 flush(fswritefd, &sblk);
951 }
952 /*
953 * We're exiting, it doesn't really matter that our
954 * caller doesn't get to call va_end().
955 */
956 if (exitstat == 0)
957 exitstat = EXFNDERRS;
958 exit(exitstat);
959 }
960 if (*fmt != '\0') {
961 (void) vprintf(fmt, ap);
962 }
963 }
964
965 /*
966 * Pwarn just prints a message when not preening,
967 * or a warning (preceded by filename) when preening.
968 */
969 NOTE(PRINTFLIKE(1))
970 void
pwarn(caddr_t fmt,...)971 pwarn(caddr_t fmt, ...)
972 {
973 va_list ap;
974
975 va_start(ap, fmt);
976 vpwarn(fmt, ap);
977 va_end(ap);
978 }
979
980 NOTE(PRINTFLIKE(1))
981 static void
vpwarn(caddr_t fmt,va_list ap)982 vpwarn(caddr_t fmt, va_list ap)
983 {
984 if (*fmt != '\0') {
985 if (preen)
986 (void) printf("%s: ", devname);
987 (void) vprintf(fmt, ap);
988 }
989 }
990
991 /*
992 * Like sprintf(), except the buffer is dynamically allocated
993 * and returned, instead of being passed in. A pointer to the
994 * buffer is stored in *RET, and FMT is the usual format string.
995 * The number of characters in *RET (excluding the trailing \0,
996 * to be consistent with the other *printf() routines) is returned.
997 *
998 * Solaris doesn't have asprintf(3C) yet, unfortunately.
999 */
1000 NOTE(PRINTFLIKE(2))
1001 int
fsck_asprintf(caddr_t * ret,caddr_t fmt,...)1002 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1003 {
1004 int len;
1005 caddr_t buffer;
1006 va_list ap;
1007
1008 va_start(ap, fmt);
1009 len = vsnprintf(NULL, 0, fmt, ap);
1010 va_end(ap);
1011
1012 buffer = malloc((len + 1) * sizeof (char));
1013 if (buffer == NULL) {
1014 errexit("Out of memory in asprintf\n");
1015 /* NOTREACHED */
1016 }
1017
1018 va_start(ap, fmt);
1019 (void) vsnprintf(buffer, len + 1, fmt, ap);
1020 va_end(ap);
1021
1022 *ret = buffer;
1023 return (len);
1024 }
1025
1026 /*
1027 * So we can take advantage of kernel routines in ufs_subr.c.
1028 */
1029 /* PRINTFLIKE2 */
1030 void
cmn_err(int level,caddr_t fmt,...)1031 cmn_err(int level, caddr_t fmt, ...)
1032 {
1033 va_list ap;
1034
1035 va_start(ap, fmt);
1036 if (level == CE_PANIC) {
1037 (void) printf("INTERNAL INCONSISTENCY:");
1038 verrexit(fmt, ap);
1039 } else {
1040 (void) vprintf(fmt, ap);
1041 }
1042 va_end(ap);
1043 }
1044
1045 /*
1046 * Check to see if unraw version of name is already mounted.
1047 * Updates devstr with the device name if devstr is not NULL
1048 * and str_size is positive.
1049 */
1050 int
mounted(caddr_t name,caddr_t devstr,size_t str_size)1051 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1052 {
1053 int found;
1054 struct mnttab *mntent;
1055
1056 mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1057 if (mntent == NULL)
1058 return (M_NOMNT);
1059
1060 /*
1061 * It's mounted. With or without write access?
1062 */
1063 if (hasmntopt(mntent, MNTOPT_RO) != 0)
1064 found = M_RO; /* mounted as RO */
1065 else
1066 found = M_RW; /* mounted as R/W */
1067
1068 if (mount_point == NULL) {
1069 mount_point = strdup(mntent->mnt_mountp);
1070 if (mount_point == NULL) {
1071 errexit("fsck: memory allocation failure: %s",
1072 strerror(errno));
1073 /* NOTREACHED */
1074 }
1075
1076 if (devstr != NULL && str_size > 0)
1077 (void) strlcpy(devstr, mntent->mnt_special, str_size);
1078 }
1079
1080 return (found);
1081 }
1082
1083 /*
1084 * Check to see if name corresponds to an entry in vfstab, and that the entry
1085 * does not have option ro.
1086 */
1087 int
writable(caddr_t name)1088 writable(caddr_t name)
1089 {
1090 int rw = 1;
1091 struct vfstab vfsbuf, vfskey;
1092 FILE *vfstab;
1093
1094 vfstab = fopen(VFSTAB, "r");
1095 if (vfstab == NULL) {
1096 (void) printf("can't open %s\n", VFSTAB);
1097 return (1);
1098 }
1099 (void) memset((void *)&vfskey, 0, sizeof (vfskey));
1100 vfsnull(&vfskey);
1101 vfskey.vfs_special = unrawname(name);
1102 vfskey.vfs_fstype = MNTTYPE_UFS;
1103 if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1104 (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1105 rw = 0;
1106 }
1107 (void) fclose(vfstab);
1108 return (rw);
1109 }
1110
1111 /*
1112 * debugclean
1113 */
1114 static void
debugclean(void)1115 debugclean(void)
1116 {
1117 if (!debug)
1118 return;
1119
1120 if ((iscorrupt == 0) && (isdirty == 0))
1121 return;
1122
1123 if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1124 (sblock.fs_clean == FSLOG && islog && islogok) ||
1125 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1126 return;
1127
1128 (void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1129 sblock.fs_clean == FSSTABLE ? "stable" :
1130 sblock.fs_clean == FSLOG ? "logging" :
1131 sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1132 devname);
1133 }
1134
1135 /*
1136 * updateclean
1137 * Carefully and transparently update the clean flag.
1138 *
1139 * `iscorrupt' has to be in its final state before this is called.
1140 */
1141 int
updateclean(void)1142 updateclean(void)
1143 {
1144 int freedlog = 0;
1145 struct bufarea cleanbuf;
1146 size_t size;
1147 ssize_t io_res;
1148 diskaddr_t bno;
1149 char fsclean;
1150 int fsreclaim;
1151 char fsflags;
1152 int flags_ok = 1;
1153 daddr32_t fslogbno;
1154 offset_t sblkoff;
1155 time_t t;
1156
1157 /*
1158 * debug stuff
1159 */
1160 debugclean();
1161
1162 /*
1163 * set fsclean to its appropriate value
1164 */
1165 fslogbno = sblock.fs_logbno;
1166 fsclean = sblock.fs_clean;
1167 fsreclaim = sblock.fs_reclaim;
1168 fsflags = sblock.fs_flags;
1169 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1170 fsclean = FSACTIVE;
1171 }
1172 /*
1173 * If ufs log is not okay, note that we need to clear it.
1174 */
1175 examinelog(NULL);
1176 if (fslogbno && !(islog && islogok)) {
1177 fsclean = FSACTIVE;
1178 fslogbno = 0;
1179 }
1180
1181 /*
1182 * if necessary, update fs_clean and fs_state
1183 */
1184 switch (fsclean) {
1185
1186 case FSACTIVE:
1187 if (!iscorrupt) {
1188 fsclean = FSSTABLE;
1189 fsreclaim = 0;
1190 }
1191 break;
1192
1193 case FSCLEAN:
1194 case FSSTABLE:
1195 if (iscorrupt) {
1196 fsclean = FSACTIVE;
1197 } else {
1198 fsreclaim = 0;
1199 }
1200 break;
1201
1202 case FSLOG:
1203 if (iscorrupt) {
1204 fsclean = FSACTIVE;
1205 } else if (!islog || fslogbno == 0) {
1206 fsclean = FSSTABLE;
1207 fsreclaim = 0;
1208 } else if (fflag) {
1209 fsreclaim = 0;
1210 }
1211 break;
1212
1213 case FSFIX:
1214 fsclean = FSBAD;
1215 if (errorlocked && !iscorrupt) {
1216 fsclean = islog ? FSLOG : FSCLEAN;
1217 }
1218 break;
1219
1220 default:
1221 if (iscorrupt) {
1222 fsclean = FSACTIVE;
1223 } else {
1224 fsclean = FSSTABLE;
1225 fsreclaim = 0;
1226 }
1227 }
1228
1229 if (largefile_count > 0)
1230 fsflags |= FSLARGEFILES;
1231 else
1232 fsflags &= ~FSLARGEFILES;
1233
1234 /*
1235 * There can be two discrepencies here. A) The superblock
1236 * shows no largefiles but we found some while scanning.
1237 * B) The superblock indicates the presence of largefiles,
1238 * but none are present. Note that if preening, the superblock
1239 * is silently corrected.
1240 */
1241 if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1242 (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1243 flags_ok = 0;
1244
1245 if (debug)
1246 (void) printf(
1247 "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1248 largefile_count, sblock.fs_flags, flags_ok);
1249
1250 /*
1251 * If fs is unchanged, do nothing.
1252 */
1253 if ((!isdirty) && (flags_ok) &&
1254 (fslogbno == sblock.fs_logbno) &&
1255 (sblock.fs_clean == fsclean) &&
1256 (sblock.fs_reclaim == fsreclaim) &&
1257 (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1258 if (errorlocked) {
1259 if (!do_errorlock(LOCKFS_ULOCK))
1260 pwarn(
1261 "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1262 }
1263 return (freedlog);
1264 }
1265
1266 /*
1267 * if user allows, update superblock state
1268 */
1269 if (debug) {
1270 (void) printf(
1271 "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1272 sblock.fs_flags, sblock.fs_logbno,
1273 sblock.fs_clean, sblock.fs_reclaim,
1274 sblock.fs_state + sblock.fs_time);
1275 (void) printf(
1276 "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1277 fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1278 }
1279 if (!isdirty && !preen && !rerun &&
1280 (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1281 return (freedlog);
1282
1283 (void) time(&t);
1284 sblock.fs_time = (time32_t)t;
1285 if (debug)
1286 printclean();
1287
1288 if (sblock.fs_logbno != fslogbno) {
1289 examinelog(&freelogblk);
1290 freedlog++;
1291 }
1292
1293 sblock.fs_logbno = fslogbno;
1294 sblock.fs_clean = fsclean;
1295 sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1296 sblock.fs_reclaim = fsreclaim;
1297 sblock.fs_flags = fsflags;
1298
1299 /*
1300 * if superblock can't be written, return
1301 */
1302 if (fswritefd < 0)
1303 return (freedlog);
1304
1305 /*
1306 * Read private copy of superblock, update clean flag, and write it.
1307 */
1308 bno = sblk.b_bno;
1309 size = sblk.b_size;
1310
1311 sblkoff = ldbtob(bno);
1312
1313 if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1314 errexit("out of memory");
1315 if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1316 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1317 (longlong_t)bno, strerror(errno));
1318 goto out;
1319 }
1320
1321 if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1322 report_io_prob("READ FROM", bno, size, io_res);
1323 goto out;
1324 }
1325
1326 cleanbuf.b_un.b_fs->fs_logbno = sblock.fs_logbno;
1327 cleanbuf.b_un.b_fs->fs_clean = sblock.fs_clean;
1328 cleanbuf.b_un.b_fs->fs_state = sblock.fs_state;
1329 cleanbuf.b_un.b_fs->fs_time = sblock.fs_time;
1330 cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1331 cleanbuf.b_un.b_fs->fs_flags = sblock.fs_flags;
1332
1333 if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1334 (void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1335 (longlong_t)bno, strerror(errno));
1336 goto out;
1337 }
1338
1339 if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1340 report_io_prob("WRITE TO", bno, size, io_res);
1341 goto out;
1342 }
1343
1344 /*
1345 * 1208040
1346 * If we had to use -b to grab an alternate superblock, then we
1347 * likely had to do so because of unacceptable differences between
1348 * the main and alternate superblocks. So, we had better update
1349 * the alternate superblock as well, or we'll just fail again
1350 * the next time we attempt to run fsck!
1351 */
1352 if (bflag != 0) {
1353 write_altsb(fswritefd);
1354 }
1355
1356 if (errorlocked) {
1357 if (!do_errorlock(LOCKFS_ULOCK))
1358 pwarn(
1359 "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1360 }
1361
1362 out:
1363 if (cleanbuf.b_un.b_buf != NULL) {
1364 free((void *)cleanbuf.b_un.b_buf);
1365 }
1366
1367 return (freedlog);
1368 }
1369
1370 static void
report_io_prob(caddr_t what,diskaddr_t bno,size_t expected,ssize_t failure)1371 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1372 {
1373 if (failure < 0)
1374 (void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1375 what, (int)bno, strerror(errno));
1376 else if (failure == 0)
1377 (void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1378 what, (int)bno);
1379 else
1380 (void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1381 what, (int)bno, (unsigned)failure, (unsigned)expected);
1382 }
1383
1384 /*
1385 * print out clean info
1386 */
1387 void
printclean(void)1388 printclean(void)
1389 {
1390 caddr_t s;
1391
1392 if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1393 s = "unknown";
1394 else
1395 switch (sblock.fs_clean) {
1396
1397 case FSACTIVE:
1398 s = "active";
1399 break;
1400
1401 case FSCLEAN:
1402 s = "clean";
1403 break;
1404
1405 case FSSTABLE:
1406 s = "stable";
1407 break;
1408
1409 case FSLOG:
1410 s = "logging";
1411 break;
1412
1413 case FSBAD:
1414 s = "is bad";
1415 break;
1416
1417 case FSFIX:
1418 s = "being fixed";
1419 break;
1420
1421 default:
1422 s = "unknown";
1423 }
1424
1425 if (preen)
1426 pwarn("is %s.\n", s);
1427 else
1428 (void) printf("** %s is %s.\n", devname, s);
1429 }
1430
1431 int
is_errorlocked(caddr_t fs)1432 is_errorlocked(caddr_t fs)
1433 {
1434 int retval;
1435 struct stat64 statb;
1436 caddr_t mountp;
1437 struct mnttab *mntent;
1438
1439 retval = 0;
1440
1441 if (!fs)
1442 return (0);
1443
1444 if (stat64(fs, &statb) < 0)
1445 return (0);
1446
1447 if (S_ISDIR(statb.st_mode)) {
1448 mountp = fs;
1449 } else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1450 mntent = search_mnttab(NULL, fs, NULL, 0);
1451 if (mntent == NULL)
1452 return (0);
1453 mountp = mntent->mnt_mountp;
1454 if (mountp == NULL) /* theoretically a can't-happen */
1455 return (0);
1456 } else {
1457 return (0);
1458 }
1459
1460 /*
1461 * From here on, must `goto out' to avoid memory leakage.
1462 */
1463
1464 if (elock_combuf == NULL)
1465 elock_combuf =
1466 (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1467 else
1468 elock_combuf =
1469 (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1470
1471 if (elock_combuf == NULL)
1472 goto out;
1473
1474 (void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1475
1476 if (elock_mountp != NULL) {
1477 free(elock_mountp);
1478 }
1479
1480 elock_mountp = strdup(mountp);
1481 if (elock_mountp == NULL)
1482 goto out;
1483
1484 if (mountfd < 0) {
1485 if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1486 goto out;
1487 }
1488
1489 if (lfp == NULL) {
1490 lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1491 if (lfp == NULL)
1492 goto out;
1493 (void) memset((void *)lfp, 0, sizeof (struct lockfs));
1494 }
1495
1496 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1497 lfp->lf_comment = elock_combuf;
1498
1499 if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1500 goto out;
1501
1502 /*
1503 * lint believes that the ioctl() (or any other function
1504 * taking lfp as an arg) could free lfp. This is not the
1505 * case, however.
1506 */
1507 retval = LOCKFS_IS_ELOCK(lfp);
1508
1509 out:
1510 return (retval);
1511 }
1512
1513 /*
1514 * Given a name which is known to be a directory, see if it appears
1515 * in the vfstab. If so, return the entry's block (special) device
1516 * field via devstr.
1517 */
1518 int
check_vfstab(caddr_t name,caddr_t devstr,size_t str_size)1519 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1520 {
1521 return (NULL != search_vfstab(name, NULL, devstr, str_size));
1522 }
1523
1524 /*
1525 * Given a name which is known to be a directory, see if it appears
1526 * in the mnttab. If so, return the entry's block (special) device
1527 * field via devstr.
1528 */
1529 int
check_mnttab(caddr_t name,caddr_t devstr,size_t str_size)1530 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1531 {
1532 return (NULL != search_mnttab(name, NULL, devstr, str_size));
1533 }
1534
1535 /*
1536 * Search for mount point and/or special device in the given file.
1537 * The first matching entry is returned.
1538 *
1539 * If an entry is found and str_size is greater than zero, then
1540 * up to size_str bytes of the special device name from the entry
1541 * are copied to devstr.
1542 */
1543
1544 #define SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1545 st_nuller, st_init, st_searcher) \
1546 { \
1547 FILE *fp; \
1548 struct st_type *retval = NULL; \
1549 struct st_type key; \
1550 static struct st_type buffer; \
1551 \
1552 /* LINTED ``assigned value never used'' */ \
1553 st_nuller(&key); \
1554 key.st_mount = mountp; \
1555 key.st_special = special; \
1556 st_init; \
1557 \
1558 if ((fp = fopen(st_file, "r")) == NULL) \
1559 return (NULL); \
1560 \
1561 if (st_searcher(fp, &buffer, &key) == 0) { \
1562 retval = &buffer; \
1563 if (devstr != NULL && str_size > 0 && \
1564 buffer.st_special != NULL) { \
1565 (void) strlcpy(devstr, buffer.st_special, \
1566 str_size); \
1567 } \
1568 } \
1569 (void) fclose(fp); \
1570 return (retval); \
1571 }
1572
1573 static struct vfstab *
search_vfstab(caddr_t mountp,caddr_t special,caddr_t devstr,size_t str_size)1574 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1575 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1576 (retval = retval), getvfsany)
1577
1578 static struct mnttab *
1579 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1580 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1581 (key.mnt_fstype = MNTTYPE_UFS), getmntany)
1582
1583 int
1584 do_errorlock(int lock_type)
1585 {
1586 caddr_t buf;
1587 time_t now;
1588 struct tm *local;
1589 int rc;
1590
1591 if (elock_combuf == NULL)
1592 errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1593 elock_mountp ? elock_mountp : "<null>",
1594 lock_type);
1595
1596 if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1597 NULL) {
1598 errexit("Couldn't alloc memory for temp. lock status buffer\n");
1599 }
1600 if (lfp == NULL) {
1601 errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1602 elock_mountp, lock_type);
1603 }
1604
1605 (void) memmove((void *)buf, (void *)elock_combuf,
1606 LOCKFS_MAXCOMMENTLEN-1);
1607
1608 switch (lock_type) {
1609 case LOCKFS_ELOCK:
1610 /*
1611 * Note that if it is error-locked, we won't get an
1612 * error back if we try to error-lock it again.
1613 */
1614 if (time(&now) != (time_t)-1) {
1615 if ((local = localtime(&now)) != NULL)
1616 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1617 "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1618 elock_combuf, (int)pid,
1619 local->tm_mon + 1, local->tm_mday,
1620 (local->tm_year % 100), local->tm_hour,
1621 local->tm_min, local->tm_sec);
1622 else
1623 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1624 "%s [fsck pid %d", elock_combuf, pid);
1625
1626 } else {
1627 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1628 "%s [fsck pid %d", elock_combuf, pid);
1629 }
1630 break;
1631
1632 case LOCKFS_ULOCK:
1633 if (time(&now) != (time_t)-1) {
1634 if ((local = localtime(&now)) != NULL) {
1635 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1636 "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1637 elock_combuf,
1638 local->tm_mon + 1, local->tm_mday,
1639 (local->tm_year % 100), local->tm_hour,
1640 local->tm_min, local->tm_sec);
1641 } else {
1642 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1643 "%s]", elock_combuf);
1644 }
1645 } else {
1646 (void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1647 "%s]", elock_combuf);
1648 }
1649 if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1650 pwarn("do_errorlock: unlock failed: %s\n",
1651 strerror(errno));
1652 goto out;
1653 }
1654 break;
1655
1656 default:
1657 break;
1658 }
1659
1660 (void) memmove((void *)elock_combuf, (void *)buf,
1661 LOCKFS_MAXCOMMENTLEN - 1);
1662
1663 lfp->lf_lock = lock_type;
1664 lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1665 lfp->lf_comment = elock_combuf;
1666 lfp->lf_flags = 0;
1667 errno = 0;
1668
1669 if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1670 if (errno == EINVAL) {
1671 pwarn("Another fsck active?\n");
1672 iscorrupt = 0; /* don't go away mad, just go away */
1673 } else {
1674 pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1675 lock_type, elock_combuf, strerror(errno));
1676 }
1677 }
1678 out:
1679 if (buf != NULL) {
1680 free((void *)buf);
1681 }
1682
1683 return (rc != -1);
1684 }
1685
1686 /*
1687 * Shadow inode support. To register a shadow with a client is to note
1688 * that an inode (the client) refers to the shadow.
1689 */
1690
1691 static struct shadowclients *
newshadowclient(struct shadowclients * prev)1692 newshadowclient(struct shadowclients *prev)
1693 {
1694 struct shadowclients *rc;
1695
1696 rc = (struct shadowclients *)malloc(sizeof (*rc));
1697 if (rc == NULL)
1698 errexit("newshadowclient: cannot malloc shadow client");
1699 rc->next = prev;
1700 rc->nclients = 0;
1701
1702 rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1703 maxshadowclients);
1704 if (rc->client == NULL)
1705 errexit("newshadowclient: cannot malloc client array");
1706 return (rc);
1707 }
1708
1709 void
registershadowclient(fsck_ino_t shadow,fsck_ino_t client,struct shadowclientinfo ** info)1710 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1711 struct shadowclientinfo **info)
1712 {
1713 struct shadowclientinfo *sci;
1714 struct shadowclients *scc;
1715
1716 /*
1717 * Already have a record for this shadow?
1718 */
1719 for (sci = *info; sci != NULL; sci = sci->next)
1720 if (sci->shadow == shadow)
1721 break;
1722 if (sci == NULL) {
1723 /*
1724 * It's a new shadow, add it to the list
1725 */
1726 sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1727 if (sci == NULL)
1728 errexit("registershadowclient: cannot malloc");
1729 sci->next = *info;
1730 *info = sci;
1731 sci->shadow = shadow;
1732 sci->totalClients = 0;
1733 sci->clients = newshadowclient(NULL);
1734 }
1735
1736 sci->totalClients++;
1737 scc = sci->clients;
1738 if (scc->nclients >= maxshadowclients) {
1739 scc = newshadowclient(sci->clients);
1740 sci->clients = scc;
1741 }
1742
1743 scc->client[scc->nclients++] = client;
1744 }
1745
1746 /*
1747 * Locate and discard a shadow.
1748 */
1749 void
clearshadow(fsck_ino_t shadow,struct shadowclientinfo ** info)1750 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1751 {
1752 struct shadowclientinfo *sci, *prev;
1753
1754 /*
1755 * Do we have a record for this shadow?
1756 */
1757 prev = NULL;
1758 for (sci = *info; sci != NULL; sci = sci->next) {
1759 if (sci->shadow == shadow)
1760 break;
1761 prev = sci;
1762 }
1763
1764 if (sci != NULL) {
1765 /*
1766 * First, pull it off the list, since we know there
1767 * shouldn't be any future references to this one.
1768 */
1769 if (prev == NULL)
1770 *info = sci->next;
1771 else
1772 prev->next = sci->next;
1773 deshadow(sci, clearattrref);
1774 }
1775 }
1776
1777 /*
1778 * Discard all memory used to track clients of a shadow.
1779 */
1780 void
deshadow(struct shadowclientinfo * sci,void (* cb)(fsck_ino_t))1781 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1782 {
1783 struct shadowclients *clients, *discard;
1784 int idx;
1785
1786 clients = sci->clients;
1787 while (clients != NULL) {
1788 discard = clients;
1789 clients = clients->next;
1790 if (discard->client != NULL) {
1791 if (cb != NULL) {
1792 for (idx = 0; idx < discard->nclients; idx++)
1793 (*cb)(discard->client[idx]);
1794 }
1795 free((void *)discard->client);
1796 }
1797 free((void *)discard);
1798 }
1799
1800 free((void *)sci);
1801 }
1802
1803 /*
1804 * Allocate more buffer as need arises but allocate one at a time.
1805 * This is done to make sure that fsck does not exit with error if it
1806 * needs more buffer to complete its task.
1807 */
1808 static struct bufarea *
alloc_bufarea(void)1809 alloc_bufarea(void)
1810 {
1811 struct bufarea *newbp;
1812 caddr_t bufp;
1813
1814 bufp = malloc((unsigned int)sblock.fs_bsize);
1815 if (bufp == NULL)
1816 return (NULL);
1817
1818 newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1819 if (newbp == NULL) {
1820 free((void *)bufp);
1821 return (NULL);
1822 }
1823
1824 initbarea(newbp);
1825 newbp->b_un.b_buf = bufp;
1826 newbp->b_prev = &bufhead;
1827 newbp->b_next = bufhead.b_next;
1828 bufhead.b_next->b_prev = newbp;
1829 bufhead.b_next = newbp;
1830 bufhead.b_size++;
1831 return (newbp);
1832 }
1833
1834 /*
1835 * We length-limit in both unrawname() and rawname() to avoid
1836 * overflowing our arrays or those of our naive, trusting callers.
1837 */
1838
1839 caddr_t
unrawname(caddr_t name)1840 unrawname(caddr_t name)
1841 {
1842 caddr_t dp;
1843 static char fullname[MAXPATHLEN + 1];
1844
1845 if ((dp = getfullblkname(name)) == NULL)
1846 return ("");
1847
1848 (void) strlcpy(fullname, dp, sizeof (fullname));
1849 /*
1850 * Not reporting under debug, as the allocation isn't
1851 * reported by getfullblkname. The idea is that we
1852 * produce balanced alloc/free instances.
1853 */
1854 free(dp);
1855
1856 return (fullname);
1857 }
1858
1859 caddr_t
rawname(caddr_t name)1860 rawname(caddr_t name)
1861 {
1862 caddr_t dp;
1863 static char fullname[MAXPATHLEN + 1];
1864
1865 if ((dp = getfullrawname(name)) == NULL)
1866 return ("");
1867
1868 (void) strlcpy(fullname, dp, sizeof (fullname));
1869 /*
1870 * Not reporting under debug, as the allocation isn't
1871 * reported by getfullblkname. The idea is that we
1872 * produce balanced alloc/free instances.
1873 */
1874 free(dp);
1875
1876 return (fullname);
1877 }
1878
1879 /*
1880 * Make sure that a cg header looks at least moderately reasonable.
1881 * We want to be able to trust the contents enough to be able to use
1882 * the standard accessor macros. So, besides looking at the obvious
1883 * such as the magic number, we verify that the offset field values
1884 * are properly aligned and not too big or small.
1885 *
1886 * Returns a NULL pointer if the cg is sane enough for our needs, else
1887 * a dynamically-allocated string describing all of its faults.
1888 */
1889 #define Append_Error(full, full_len, addition, addition_len) \
1890 if (full == NULL) { \
1891 full = addition; \
1892 full_len = addition_len; \
1893 } else { \
1894 /* lint doesn't think realloc() understands NULLs */ \
1895 full = realloc(full, full_len + addition_len + 1); \
1896 if (full == NULL) { \
1897 errexit("Out of memory in cg_sanity"); \
1898 /* NOTREACHED */ \
1899 } \
1900 (void) strcpy(full + full_len, addition); \
1901 full_len += addition_len; \
1902 free(addition); \
1903 }
1904
1905 caddr_t
cg_sanity(struct cg * cgp,int cgno)1906 cg_sanity(struct cg *cgp, int cgno)
1907 {
1908 caddr_t full_err;
1909 caddr_t this_err = NULL;
1910 int full_len, this_len;
1911 daddr32_t ndblk;
1912 daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1913 daddr32_t exp_freeoff, exp_nextfreeoff;
1914
1915 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1916 &exp_freeoff, &exp_nextfreeoff, &ndblk);
1917
1918 full_err = NULL;
1919 full_len = 0;
1920
1921 if (!cg_chkmagic(cgp)) {
1922 this_len = fsck_asprintf(&this_err,
1923 "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1924 cgp->cg_magic, CG_MAGIC);
1925 Append_Error(full_err, full_len, this_err, this_len);
1926 }
1927
1928 if (cgp->cg_cgx != cgno) {
1929 this_len = fsck_asprintf(&this_err,
1930 "WRONG CG NUMBER (%d should be %d)\n",
1931 cgp->cg_cgx, cgno);
1932 Append_Error(full_err, full_len, this_err, this_len);
1933 }
1934
1935 if ((cgp->cg_btotoff & 3) != 0) {
1936 this_len = fsck_asprintf(&this_err,
1937 "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1938 cgp->cg_btotoff);
1939 Append_Error(full_err, full_len, this_err, this_len);
1940 }
1941
1942 if ((cgp->cg_boff & 1) != 0) {
1943 this_len = fsck_asprintf(&this_err,
1944 "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1945 cgp->cg_boff);
1946 Append_Error(full_err, full_len, this_err, this_len);
1947 }
1948
1949 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1950 if (cgp->cg_ncyl < 1) {
1951 this_len = fsck_asprintf(&this_err,
1952 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1953 cgp->cg_ncyl);
1954 } else {
1955 this_len = fsck_asprintf(&this_err,
1956 "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1957 cgp->cg_ncyl, sblock.fs_cpg);
1958 }
1959 Append_Error(full_err, full_len, this_err, this_len);
1960 }
1961
1962 if (cgp->cg_niblk != sblock.fs_ipg) {
1963 this_len = fsck_asprintf(&this_err,
1964 "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1965 cgp->cg_niblk, sblock.fs_ipg);
1966 Append_Error(full_err, full_len, this_err, this_len);
1967 }
1968
1969 if (cgp->cg_ndblk != ndblk) {
1970 this_len = fsck_asprintf(&this_err,
1971 "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1972 cgp->cg_ndblk, ndblk);
1973 Append_Error(full_err, full_len, this_err, this_len);
1974 }
1975
1976 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1977 this_len = fsck_asprintf(&this_err,
1978 "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1979 "(%d should be at least 0 and less than %d)\n",
1980 cgp->cg_rotor, ndblk);
1981 Append_Error(full_err, full_len, this_err, this_len);
1982 }
1983
1984 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1985 this_len = fsck_asprintf(&this_err,
1986 "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1987 "(%d should be at least 0 and less than %d)\n",
1988 cgp->cg_frotor, ndblk);
1989 Append_Error(full_err, full_len, this_err, this_len);
1990 }
1991
1992 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1993 this_len = fsck_asprintf(&this_err,
1994 "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1995 "(%d should be at least 0 and less than %d)\n",
1996 cgp->cg_irotor, sblock.fs_ipg);
1997 Append_Error(full_err, full_len, this_err, this_len);
1998 }
1999
2000 if (cgp->cg_btotoff != exp_btotoff) {
2001 this_len = fsck_asprintf(&this_err,
2002 "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2003 cgp->cg_btotoff, exp_btotoff);
2004 Append_Error(full_err, full_len, this_err, this_len);
2005 }
2006
2007 if (cgp->cg_boff != exp_boff) {
2008 this_len = fsck_asprintf(&this_err,
2009 "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2010 cgp->cg_boff, exp_boff);
2011 Append_Error(full_err, full_len, this_err, this_len);
2012 }
2013
2014 if (cgp->cg_iusedoff != exp_iusedoff) {
2015 this_len = fsck_asprintf(&this_err,
2016 "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2017 cgp->cg_iusedoff, exp_iusedoff);
2018 Append_Error(full_err, full_len, this_err, this_len);
2019 }
2020
2021 if (cgp->cg_freeoff != exp_freeoff) {
2022 this_len = fsck_asprintf(&this_err,
2023 "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2024 cgp->cg_freeoff, exp_freeoff);
2025 Append_Error(full_err, full_len, this_err, this_len);
2026 }
2027
2028 if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2029 this_len = fsck_asprintf(&this_err,
2030 "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2031 cgp->cg_nextfreeoff, exp_nextfreeoff);
2032 Append_Error(full_err, full_len, this_err, this_len);
2033 }
2034
2035 return (full_err);
2036 }
2037
2038 #undef Append_Error
2039
2040 /*
2041 * This is taken from mkfs, and is what is used to come up with the
2042 * original values for a struct cg. This implies that, since these
2043 * are all constants, recalculating them now should give us the same
2044 * thing as what's on disk.
2045 */
2046 static void
cg_constants(int cgno,daddr32_t * btotoff,daddr32_t * boff,daddr32_t * iusedoff,daddr32_t * freeoff,daddr32_t * nextfreeoff,daddr32_t * ndblk)2047 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2048 daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2049 daddr32_t *ndblk)
2050 {
2051 daddr32_t cbase, dmax;
2052 struct cg *cgp;
2053
2054 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2055 (size_t)sblock.fs_cgsize);
2056 cgp = cgblk.b_un.b_cg;
2057
2058 cbase = cgbase(&sblock, cgno);
2059 dmax = cbase + sblock.fs_fpg;
2060 if (dmax > sblock.fs_size)
2061 dmax = sblock.fs_size;
2062
2063 /* LINTED pointer difference won't overflow */
2064 *btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2065 *boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2066 *iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2067 *freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2068 *nextfreeoff = *freeoff +
2069 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2070 *ndblk = dmax - cbase;
2071 }
2072
2073 /*
2074 * Corrects all fields in the cg that can be done with the available
2075 * redundant data.
2076 */
2077 void
fix_cg(struct cg * cgp,int cgno)2078 fix_cg(struct cg *cgp, int cgno)
2079 {
2080 daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2081 daddr32_t exp_freeoff, exp_nextfreeoff;
2082 daddr32_t ndblk;
2083
2084 cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2085 &exp_freeoff, &exp_nextfreeoff, &ndblk);
2086
2087 if (cgp->cg_cgx != cgno) {
2088 cgp->cg_cgx = cgno;
2089 }
2090
2091 if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2092 if (cgno == (sblock.fs_ncg - 1)) {
2093 cgp->cg_ncyl = sblock.fs_ncyl -
2094 (sblock.fs_cpg * cgno);
2095 } else {
2096 cgp->cg_ncyl = sblock.fs_cpg;
2097 }
2098 }
2099
2100 if (cgp->cg_niblk != sblock.fs_ipg) {
2101 /*
2102 * This is not used by the kernel, so it's pretty
2103 * harmless if it's wrong.
2104 */
2105 cgp->cg_niblk = sblock.fs_ipg;
2106 }
2107
2108 if (cgp->cg_ndblk != ndblk) {
2109 cgp->cg_ndblk = ndblk;
2110 }
2111
2112 /*
2113 * For the rotors, any position's valid, so pick the one we know
2114 * will always exist.
2115 */
2116 if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2117 cgp->cg_rotor = 0;
2118 }
2119
2120 if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2121 cgp->cg_frotor = 0;
2122 }
2123
2124 if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2125 cgp->cg_irotor = 0;
2126 }
2127
2128 /*
2129 * For btotoff and boff, if they're misaligned they won't
2130 * match the expected values, so we're catching both cases
2131 * here. Of course, if any of these are off, it seems likely
2132 * that the tables really won't be where we calculate they
2133 * should be anyway.
2134 */
2135 if (cgp->cg_btotoff != exp_btotoff) {
2136 cgp->cg_btotoff = exp_btotoff;
2137 }
2138
2139 if (cgp->cg_boff != exp_boff) {
2140 cgp->cg_boff = exp_boff;
2141 }
2142
2143 if (cgp->cg_iusedoff != exp_iusedoff) {
2144 cgp->cg_iusedoff = exp_iusedoff;
2145 }
2146
2147 if (cgp->cg_freeoff != exp_freeoff) {
2148 cgp->cg_freeoff = exp_freeoff;
2149 }
2150
2151 if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2152 cgp->cg_nextfreeoff = exp_nextfreeoff;
2153 }
2154
2155 /*
2156 * Reset the magic, as we've recreated this cg, also
2157 * update the cg_time, as we're writing out the cg
2158 */
2159 cgp->cg_magic = CG_MAGIC;
2160 cgp->cg_time = time(NULL);
2161
2162 /*
2163 * We know there was at least one correctable problem,
2164 * or else we wouldn't have been called. So instead of
2165 * marking the buffer dirty N times above, just do it
2166 * once here.
2167 */
2168 cgdirty();
2169 }
2170
2171 void
examinelog(void (* cb)(daddr32_t))2172 examinelog(void (*cb)(daddr32_t))
2173 {
2174 struct bufarea *bp;
2175 extent_block_t *ebp;
2176 extent_t *ep;
2177 daddr32_t nfno, fno;
2178 int i;
2179 int j;
2180
2181 /*
2182 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2183 * we need to translate accordingly using logbtodb()
2184 */
2185
2186 if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2187 if (debug) {
2188 (void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2189 "Aborting log examination\n", \
2190 logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2191 }
2192 return;
2193 }
2194
2195 /*
2196 * Read errors will return zeros, which will cause us
2197 * to do nothing harmful, so don't need to handle it.
2198 */
2199 bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2200 (size_t)sblock.fs_bsize);
2201 ebp = (void *)bp->b_un.b_buf;
2202
2203 /*
2204 * Does it look like a log allocation table?
2205 */
2206 /* LINTED pointer cast is aligned */
2207 if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2208 sblock.fs_bsize))
2209 return;
2210 if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2211 return;
2212
2213 ep = &ebp->extents[0];
2214 for (i = 0; i < ebp->nextents; ++i, ++ep) {
2215 fno = logbtofrag(&sblock, ep->pbno);
2216 nfno = dbtofsb(&sblock, ep->nbno);
2217 for (j = 0; j < nfno; ++j, ++fno) {
2218 /*
2219 * Invoke the callback first, so that pass1 can
2220 * mark the log blocks in-use. Then, if any
2221 * subsequent pass over the log shows us that a
2222 * block got freed (say, it was also claimed by
2223 * an inode that we cleared), we can safely declare
2224 * the log bad.
2225 */
2226 if (cb != NULL)
2227 (*cb)(fno);
2228 if (!testbmap(fno))
2229 islogok = 0;
2230 }
2231 }
2232 brelse(bp);
2233
2234 if (cb != NULL) {
2235 fno = logbtofrag(&sblock, sblock.fs_logbno);
2236 for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2237 (*cb)(fno);
2238 }
2239 }
2240
2241 static void
freelogblk(daddr32_t frag)2242 freelogblk(daddr32_t frag)
2243 {
2244 freeblk(sblock.fs_logbno, frag, 1);
2245 }
2246
2247 caddr_t
file_id(fsck_ino_t inum,mode_t mode)2248 file_id(fsck_ino_t inum, mode_t mode)
2249 {
2250 static char name[MAXPATHLEN + 1];
2251
2252 if (lfdir == inum) {
2253 return (lfname);
2254 }
2255
2256 if ((mode & IFMT) == IFDIR) {
2257 (void) strcpy(name, "DIR");
2258 } else if ((mode & IFMT) == IFATTRDIR) {
2259 (void) strcpy(name, "ATTR DIR");
2260 } else if ((mode & IFMT) == IFSHAD) {
2261 (void) strcpy(name, "ACL");
2262 } else {
2263 (void) strcpy(name, "FILE");
2264 }
2265
2266 return (name);
2267 }
2268
2269 /*
2270 * Simple initializer for inodesc structures, so users of only a few
2271 * fields don't have to worry about getting the right defaults for
2272 * everything out.
2273 */
2274 void
init_inodesc(struct inodesc * idesc)2275 init_inodesc(struct inodesc *idesc)
2276 {
2277 /*
2278 * Most fields should be zero, just hit the special cases.
2279 */
2280 (void) memset((void *)idesc, 0, sizeof (struct inodesc));
2281 idesc->id_fix = DONTKNOW;
2282 idesc->id_lbn = -1;
2283 idesc->id_truncto = -1;
2284 idesc->id_firsthole = -1;
2285 }
2286
2287 /*
2288 * Compare routine for tsearch(C) to use on ino_t instances.
2289 */
2290 int
ino_t_cmp(const void * left,const void * right)2291 ino_t_cmp(const void *left, const void *right)
2292 {
2293 const fsck_ino_t lino = (const fsck_ino_t)left;
2294 const fsck_ino_t rino = (const fsck_ino_t)right;
2295
2296 return (lino - rino);
2297 }
2298
2299 int
cgisdirty(void)2300 cgisdirty(void)
2301 {
2302 return (cgblk.b_dirty);
2303 }
2304
2305 void
cgflush(void)2306 cgflush(void)
2307 {
2308 flush(fswritefd, &cgblk);
2309 }
2310
2311 void
dirty(struct bufarea * bp)2312 dirty(struct bufarea *bp)
2313 {
2314 if (fswritefd < 0) {
2315 /*
2316 * No one should call dirty() in read only mode.
2317 * But if one does, it's not fatal issue. Just warn him.
2318 */
2319 pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2320 } else {
2321 (bp)->b_dirty = 1;
2322 isdirty = 1;
2323 }
2324 }
2325
2326 void
initbarea(struct bufarea * bp)2327 initbarea(struct bufarea *bp)
2328 {
2329 (bp)->b_dirty = 0;
2330 (bp)->b_bno = (diskaddr_t)-1LL;
2331 (bp)->b_flags = 0;
2332 (bp)->b_cnt = 0;
2333 (bp)->b_errs = 0;
2334 }
2335
2336 /*
2337 * Partition-sizing routines adapted from ../newfs/newfs.c.
2338 * Needed because calcsb() needs to use mkfs to work out what the
2339 * superblock should be, and mkfs insists on being told how many
2340 * sectors to use.
2341 *
2342 * Error handling assumes we're never called while preening.
2343 *
2344 * XXX This should be extracted into a ../ufslib.{c,h},
2345 * in the same spirit to ../../fslib.{c,h}. Once that is
2346 * done, both fsck and newfs should be modified to link
2347 * against it.
2348 */
2349
2350 static int label_type;
2351
2352 #define LABEL_TYPE_VTOC 1
2353 #define LABEL_TYPE_EFI 2
2354 #define LABEL_TYPE_OTHER 3
2355
2356 #define MB (1024 * 1024)
2357 #define SECTORS_PER_TERABYTE (1LL << 31)
2358 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL
2359
2360 diskaddr_t
getdisksize(caddr_t disk,int fd)2361 getdisksize(caddr_t disk, int fd)
2362 {
2363 int rpm;
2364 struct dk_geom g;
2365 struct dk_cinfo ci;
2366 diskaddr_t actual_size;
2367
2368 /*
2369 * get_device_size() determines the actual size of the
2370 * device, and also the disk's attributes, such as geometry.
2371 */
2372 actual_size = get_device_size(fd, disk);
2373
2374 if (label_type == LABEL_TYPE_VTOC) {
2375 if (ioctl(fd, DKIOCGGEOM, &g)) {
2376 pwarn("%s: Unable to read Disk geometry", disk);
2377 return (0);
2378 }
2379 if (sblock.fs_nsect == 0)
2380 sblock.fs_nsect = g.dkg_nsect;
2381 if (sblock.fs_ntrak == 0)
2382 sblock.fs_ntrak = g.dkg_nhead;
2383 if (sblock.fs_rps == 0) {
2384 rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2385 sblock.fs_rps = rpm / 60;
2386 }
2387 }
2388
2389 if (sblock.fs_bsize == 0)
2390 sblock.fs_bsize = MAXBSIZE;
2391
2392 /*
2393 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2394 * information is not available, default to the min of a MB and
2395 * maxphys.
2396 */
2397 if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2398 sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2399 if (sblock.fs_maxcontig < 0) {
2400 int gotit, maxphys;
2401
2402 gotit = fsgetmaxphys(&maxphys, NULL);
2403
2404 /*
2405 * If we cannot get the maxphys value, default
2406 * to ufs_maxmaxphys (MB).
2407 */
2408 if (gotit) {
2409 sblock.fs_maxcontig = MIN(maxphys, MB);
2410 } else {
2411 sblock.fs_maxcontig = MB;
2412 }
2413 }
2414 sblock.fs_maxcontig /= sblock.fs_bsize;
2415 }
2416
2417 return (actual_size);
2418 }
2419
2420 /*
2421 * Figure out how big the partition we're dealing with is.
2422 */
2423 static diskaddr_t
get_device_size(int fd,caddr_t name)2424 get_device_size(int fd, caddr_t name)
2425 {
2426 struct extvtoc vtoc;
2427 struct dk_gpt *efi_vtoc;
2428 diskaddr_t slicesize = 0;
2429
2430 int index = read_extvtoc(fd, &vtoc);
2431
2432 if (index >= 0) {
2433 label_type = LABEL_TYPE_VTOC;
2434 } else {
2435 if (index == VT_ENOTSUP || index == VT_ERROR) {
2436 /* it might be an EFI label */
2437 index = efi_alloc_and_read(fd, &efi_vtoc);
2438 if (index >= 0)
2439 label_type = LABEL_TYPE_EFI;
2440 }
2441 }
2442
2443 if (index < 0) {
2444 /*
2445 * Since both attempts to read the label failed, we're
2446 * going to fall back to a brute force approach to
2447 * determining the device's size: see how far out we can
2448 * perform reads on the device.
2449 */
2450
2451 slicesize = brute_force_get_device_size(fd);
2452 if (slicesize == 0) {
2453 switch (index) {
2454 case VT_ERROR:
2455 pwarn("%s: %s\n", name, strerror(errno));
2456 break;
2457 case VT_EIO:
2458 pwarn("%s: I/O error accessing VTOC", name);
2459 break;
2460 case VT_EINVAL:
2461 pwarn("%s: Invalid field in VTOC", name);
2462 break;
2463 default:
2464 pwarn("%s: unknown error %d accessing VTOC",
2465 name, index);
2466 break;
2467 }
2468 return (0);
2469 } else {
2470 label_type = LABEL_TYPE_OTHER;
2471 }
2472 }
2473
2474 if (label_type == LABEL_TYPE_EFI) {
2475 slicesize = efi_vtoc->efi_parts[index].p_size;
2476 efi_free(efi_vtoc);
2477 } else if (label_type == LABEL_TYPE_VTOC) {
2478 slicesize = vtoc.v_part[index].p_size;
2479 }
2480
2481 return (slicesize);
2482 }
2483
2484 /*
2485 * brute_force_get_device_size
2486 *
2487 * Determine the size of the device by seeing how far we can
2488 * read. Doing an llseek( , , SEEK_END) would probably work
2489 * in most cases, but we've seen at least one third-party driver
2490 * which doesn't correctly support the SEEK_END option when the
2491 * the device is greater than a terabyte.
2492 */
2493
2494 static diskaddr_t
brute_force_get_device_size(int fd)2495 brute_force_get_device_size(int fd)
2496 {
2497 diskaddr_t min_fail = 0;
2498 diskaddr_t max_succeed = 0;
2499 diskaddr_t cur_db_off;
2500 char buf[DEV_BSIZE];
2501
2502 /*
2503 * First, see if we can read the device at all, just to
2504 * eliminate errors that have nothing to do with the
2505 * device's size.
2506 */
2507
2508 if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2509 ((read(fd, buf, DEV_BSIZE)) == -1))
2510 return (0); /* can't determine size */
2511
2512 /*
2513 * Now, go sequentially through the multiples of 4TB
2514 * to find the first read that fails (this isn't strictly
2515 * the most efficient way to find the actual size if the
2516 * size really could be anything between 0 and 2**64 bytes.
2517 * We expect the sizes to be less than 16 TB for some time,
2518 * so why do a bunch of reads that are larger than that?
2519 * However, this algorithm *will* work for sizes of greater
2520 * than 16 TB. We're just not optimizing for those sizes.)
2521 */
2522
2523 /*
2524 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2525 * We're using > 32-bit constants here. Therefore, its flow
2526 * analysis is wrong. For the time being, ignore complaints
2527 * from it about the body of the for() being unreached.
2528 */
2529 for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2530 (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2531 cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2532 if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2533 SEEK_SET) == -1) ||
2534 (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2535 min_fail = cur_db_off;
2536 else
2537 max_succeed = cur_db_off;
2538 }
2539
2540 /*
2541 * XXX Same lint flow analysis problem as above.
2542 */
2543 if (min_fail == 0)
2544 return (0);
2545
2546 /*
2547 * We now know that the size of the device is less than
2548 * min_fail and greater than or equal to max_succeed. Now
2549 * keep splitting the difference until the actual size in
2550 * sectors in known. We also know that the difference
2551 * between max_succeed and min_fail at this time is
2552 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2553 * simplifies the math below.
2554 */
2555
2556 while (min_fail - max_succeed > 1) {
2557 cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2558 if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2559 SEEK_SET)) == -1) ||
2560 ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2561 min_fail = cur_db_off;
2562 else
2563 max_succeed = cur_db_off;
2564 }
2565
2566 /* the size is the last successfully read sector offset plus one */
2567 return (max_succeed + 1);
2568 }
2569
2570 static void
vfileerror(fsck_ino_t cwd,fsck_ino_t ino,caddr_t fmt,va_list ap)2571 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2572 {
2573 struct dinode *dp;
2574 char pathbuf[MAXPATHLEN + 1];
2575
2576 vpwarn(fmt, ap);
2577 (void) putchar(' ');
2578 pinode(ino);
2579 (void) printf("\n");
2580 getpathname(pathbuf, cwd, ino);
2581 if (ino < UFSROOTINO || ino > maxino) {
2582 pfatal("NAME=%s\n", pathbuf);
2583 return;
2584 }
2585 dp = ginode(ino);
2586 if (ftypeok(dp))
2587 pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2588 else
2589 pfatal("NAME=%s\n", pathbuf);
2590 }
2591
2592 void
direrror(fsck_ino_t ino,caddr_t fmt,...)2593 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2594 {
2595 va_list ap;
2596
2597 va_start(ap, fmt);
2598 vfileerror(ino, ino, fmt, ap);
2599 va_end(ap);
2600 }
2601
2602 static void
vdirerror(fsck_ino_t ino,caddr_t fmt,va_list ap)2603 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2604 {
2605 vfileerror(ino, ino, fmt, ap);
2606 }
2607
2608 void
fileerror(fsck_ino_t cwd,fsck_ino_t ino,caddr_t fmt,...)2609 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2610 {
2611 va_list ap;
2612
2613 va_start(ap, fmt);
2614 vfileerror(cwd, ino, fmt, ap);
2615 va_end(ap);
2616 }
2617
2618 /*
2619 * Adds the given inode to the orphaned-directories list, limbo_dirs.
2620 * Assumes that the caller has set INCLEAR in the inode's statemap[]
2621 * entry.
2622 *
2623 * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2624 * meaning it's effectively an orphan. It needs to be noted now, so
2625 * it will be remembered in pass 4.
2626 */
2627
2628 void
add_orphan_dir(fsck_ino_t ino)2629 add_orphan_dir(fsck_ino_t ino)
2630 {
2631 if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2632 errexit("add_orphan_dir: out of memory");
2633 }
2634
2635 /*
2636 * Remove an inode from the orphaned-directories list, presumably
2637 * because it's been cleared.
2638 */
2639 void
remove_orphan_dir(fsck_ino_t ino)2640 remove_orphan_dir(fsck_ino_t ino)
2641 {
2642 (void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2643 }
2644
2645 /*
2646 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2647 * and lufs.c:checksum().
2648 */
2649 static void
log_setsum(int32_t * sp,int32_t * lp,int nb)2650 log_setsum(int32_t *sp, int32_t *lp, int nb)
2651 {
2652 int32_t csum = 0;
2653
2654 *sp = 0;
2655 nb /= sizeof (int32_t);
2656 while (nb--)
2657 csum += *lp++;
2658 *sp = csum;
2659 }
2660
2661 static int
log_checksum(int32_t * sp,int32_t * lp,int nb)2662 log_checksum(int32_t *sp, int32_t *lp, int nb)
2663 {
2664 int32_t ssum = *sp;
2665
2666 log_setsum(sp, lp, nb);
2667 if (ssum != *sp) {
2668 *sp = ssum;
2669 return (0);
2670 }
2671 return (1);
2672 }
2673