xref: /titanic_50/usr/src/cmd/fs.d/ufs/fsck/utilities.c (revision 753a6d457b330b1b29b2d3eefcd0831116ce950d)
1 /*
2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms are permitted
14  * provided that: (1) source distributions retain this entire copyright
15  * notice and comment, and (2) distributions including binaries display
16  * the following acknowledgement:  ``This product includes software
17  * developed by the University of California, Berkeley and its contributors''
18  * in the documentation or other materials provided with the distribution
19  * and in all advertising materials mentioning features or use of this
20  * software. Neither the name of the University nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <stdarg.h>
32 #include <libadm.h>
33 #include <note.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/mntent.h>
37 #include <sys/filio.h>
38 #include <sys/fs/ufs_fs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/ufs_acl.h>
41 #include <sys/fs/ufs_inode.h>
42 #include <sys/fs/ufs_log.h>
43 #define	_KERNEL
44 #include <sys/fs/ufs_fsdir.h>
45 #undef _KERNEL
46 #include <sys/mnttab.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <fcntl.h>
50 #include <signal.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <sys/vfstab.h>
54 #include <sys/lockfs.h>
55 #include <errno.h>
56 #include <sys/cmn_err.h>
57 #include <sys/dkio.h>
58 #include <sys/vtoc.h>
59 #include <sys/efi_partition.h>
60 #include <fslib.h>
61 #include <inttypes.h>
62 #include "fsck.h"
63 
64 caddr_t mount_point = NULL;
65 
66 static int64_t diskreads, totalreads;	/* Disk cache statistics */
67 
68 static int log_checksum(int32_t *, int32_t *, int);
69 static void vdirerror(fsck_ino_t, caddr_t, va_list);
70 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
71 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
72 static void vpwarn(caddr_t, va_list);
73 static int getline(FILE *, caddr_t, int);
74 static struct bufarea *alloc_bufarea(void);
75 static void rwerror(caddr_t, diskaddr_t, int rval);
76 static void debugclean(void);
77 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
78 static void freelogblk(daddr32_t);
79 static void verrexit(caddr_t, va_list);
80 static void vpfatal(caddr_t, va_list);
81 static diskaddr_t get_device_size(int, caddr_t);
82 static diskaddr_t brute_force_get_device_size(int);
83 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
84 	    daddr32_t *, daddr32_t *, daddr32_t *);
85 
86 int
87 ftypeok(struct dinode *dp)
88 {
89 	switch (dp->di_mode & IFMT) {
90 
91 	case IFDIR:
92 	case IFREG:
93 	case IFBLK:
94 	case IFCHR:
95 	case IFLNK:
96 	case IFSOCK:
97 	case IFIFO:
98 	case IFSHAD:
99 	case IFATTRDIR:
100 		return (1);
101 
102 	default:
103 		if (debug)
104 			(void) printf("bad file type 0%o\n", dp->di_mode);
105 		return (0);
106 	}
107 }
108 
109 int
110 acltypeok(struct dinode *dp)
111 {
112 	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
113 		return (1);
114 
115 	if (debug)
116 		(void) printf("bad file type for acl I=%d: 0%o\n",
117 		    dp->di_shadow, dp->di_mode);
118 	return (0);
119 }
120 
121 NOTE(PRINTFLIKE(1))
122 int
123 reply(caddr_t fmt, ...)
124 {
125 	va_list ap;
126 	char line[80];
127 
128 	if (preen)
129 		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
130 
131 	if (mflag) {
132 		/*
133 		 * We don't know what's going on, so don't potentially
134 		 * make things worse by having errexit() write stuff
135 		 * out to disk.
136 		 */
137 		(void) printf(
138 		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
139 		    devname);
140 		exit(EXERRFATAL);
141 	}
142 
143 	va_start(ap, fmt);
144 	(void) putchar('\n');
145 	(void) vprintf(fmt, ap);
146 	(void) putchar('?');
147 	(void) putchar(' ');
148 	va_end(ap);
149 
150 	if (nflag || fswritefd < 0) {
151 		(void) printf(" no\n\n");
152 		return (0);
153 	}
154 	if (yflag) {
155 		(void) printf(" yes\n\n");
156 		return (1);
157 	}
158 	(void) fflush(stdout);
159 	if (getline(stdin, line, sizeof (line)) == EOF)
160 		errexit("\n");
161 	(void) printf("\n");
162 	if (line[0] == 'y' || line[0] == 'Y') {
163 		return (1);
164 	} else {
165 		return (0);
166 	}
167 }
168 
169 int
170 getline(FILE *fp, caddr_t loc, int maxlen)
171 {
172 	int n;
173 	caddr_t p, lastloc;
174 
175 	p = loc;
176 	lastloc = &p[maxlen-1];
177 	while ((n = getc(fp)) != '\n') {
178 		if (n == EOF)
179 			return (EOF);
180 		if (!isspace(n) && p < lastloc)
181 			*p++ = (char)n;
182 	}
183 	*p = '\0';
184 	/* LINTED pointer difference won't overflow */
185 	return (p - loc);
186 }
187 
188 /*
189  * Malloc buffers and set up cache.
190  */
191 void
192 bufinit(void)
193 {
194 	struct bufarea *bp;
195 	int bufcnt, i;
196 	caddr_t bufp;
197 
198 	bufp = malloc((size_t)sblock.fs_bsize);
199 	if (bufp == NULL)
200 		goto nomem;
201 	initbarea(&cgblk);
202 	cgblk.b_un.b_buf = bufp;
203 	bufhead.b_next = bufhead.b_prev = &bufhead;
204 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
205 	if (bufcnt < MINBUFS)
206 		bufcnt = MINBUFS;
207 	for (i = 0; i < bufcnt; i++) {
208 		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
209 		if (bp == NULL) {
210 			if (i >= MINBUFS)
211 				goto noalloc;
212 			goto nomem;
213 		}
214 
215 		bufp = malloc((size_t)sblock.fs_bsize);
216 		if (bufp == NULL) {
217 			free((void *)bp);
218 			if (i >= MINBUFS)
219 				goto noalloc;
220 			goto nomem;
221 		}
222 		initbarea(bp);
223 		bp->b_un.b_buf = bufp;
224 		bp->b_prev = &bufhead;
225 		bp->b_next = bufhead.b_next;
226 		bufhead.b_next->b_prev = bp;
227 		bufhead.b_next = bp;
228 	}
229 noalloc:
230 	bufhead.b_size = i;	/* save number of buffers */
231 	pbp = pdirbp = NULL;
232 	return;
233 
234 nomem:
235 	errexit("cannot allocate buffer pool\n");
236 	/* NOTREACHED */
237 }
238 
239 /*
240  * Undo a bufinit().
241  */
242 void
243 unbufinit(void)
244 {
245 	int cnt;
246 	struct bufarea *bp, *nbp;
247 
248 	cnt = 0;
249 	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
250 		cnt++;
251 		flush(fswritefd, bp);
252 		nbp = bp->b_prev;
253 		/*
254 		 * We're discarding the entire chain, so this isn't
255 		 * technically necessary.  However, it doesn't hurt
256 		 * and lint's data flow analysis is much happier
257 		 * (this prevents it from thinking there's a chance
258 		 * of our using memory elsewhere after it's been released).
259 		 */
260 		nbp->b_next = bp->b_next;
261 		bp->b_next->b_prev = nbp;
262 		free((void *)bp->b_un.b_buf);
263 		free((void *)bp);
264 	}
265 
266 	if (bufhead.b_size != cnt)
267 		errexit("Panic: cache lost %d buffers\n",
268 		    bufhead.b_size - cnt);
269 }
270 
271 /*
272  * Manage a cache of directory blocks.
273  */
274 struct bufarea *
275 getdatablk(daddr32_t blkno, size_t size)
276 {
277 	struct bufarea *bp;
278 
279 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
280 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
281 			goto foundit;
282 		}
283 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
284 		if ((bp->b_flags & B_INUSE) == 0)
285 			break;
286 	if (bp == &bufhead) {
287 		bp = alloc_bufarea();
288 		if (bp == NULL) {
289 			errexit("deadlocked buffer pool\n");
290 			/* NOTREACHED */
291 		}
292 	}
293 	/*
294 	 * We're at the same logical level as getblk(), so if there
295 	 * are any errors, we'll let our caller handle them.
296 	 */
297 	diskreads++;
298 	(void) getblk(bp, blkno, size);
299 
300 foundit:
301 	totalreads++;
302 	bp->b_cnt++;
303 	/*
304 	 * Move the buffer to head of linked list if it isn't
305 	 * already there.
306 	 */
307 	if (bufhead.b_next != bp) {
308 		bp->b_prev->b_next = bp->b_next;
309 		bp->b_next->b_prev = bp->b_prev;
310 		bp->b_prev = &bufhead;
311 		bp->b_next = bufhead.b_next;
312 		bufhead.b_next->b_prev = bp;
313 		bufhead.b_next = bp;
314 	}
315 	bp->b_flags |= B_INUSE;
316 	return (bp);
317 }
318 
319 void
320 brelse(struct bufarea *bp)
321 {
322 	bp->b_cnt--;
323 	if (bp->b_cnt == 0) {
324 		bp->b_flags &= ~B_INUSE;
325 	}
326 }
327 
328 struct bufarea *
329 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
330 {
331 	diskaddr_t dblk;
332 
333 	dblk = fsbtodb(&sblock, blk);
334 	if (bp->b_bno == dblk)
335 		return (bp);
336 	flush(fswritefd, bp);
337 	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
338 	bp->b_bno = dblk;
339 	bp->b_size = size;
340 	return (bp);
341 }
342 
343 void
344 flush(int fd, struct bufarea *bp)
345 {
346 	int i, j;
347 	caddr_t sip;
348 	long size;
349 
350 	if (!bp->b_dirty)
351 		return;
352 
353 	/*
354 	 * It's not our buf, so if there are errors, let whoever
355 	 * acquired it deal with the actual problem.
356 	 */
357 	if (bp->b_errs != 0)
358 		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
359 	bp->b_dirty = 0;
360 	bp->b_errs = 0;
361 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
362 	if (bp != &sblk) {
363 		return;
364 	}
365 
366 	/*
367 	 * We're flushing the superblock, so make sure all the
368 	 * ancillary bits go out as well.
369 	 */
370 	sip = (caddr_t)sblock.fs_u.fs_csp;
371 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
372 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
373 		    sblock.fs_cssize - i : sblock.fs_bsize;
374 		bwrite(fswritefd, sip,
375 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
376 		    size);
377 		sip += size;
378 	}
379 }
380 
381 static void
382 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
383 {
384 	int olderr = errno;
385 
386 	if (!preen)
387 		(void) printf("\n");
388 
389 	if (rval == -1)
390 		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
391 		    mesg, blk, strerror(olderr));
392 	else
393 		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
394 
395 	if (reply("CONTINUE") == 0) {
396 		exitstat = EXERRFATAL;
397 		errexit("Program terminated\n");
398 	}
399 }
400 
401 void
402 ckfini(void)
403 {
404 	int64_t percentage;
405 
406 	if (fswritefd < 0)
407 		return;
408 
409 	flush(fswritefd, &sblk);
410 	/*
411 	 * Were we using a backup superblock?
412 	 */
413 	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
414 		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
415 			sblk.b_bno = SBOFF / dev_bsize;
416 			sbdirty();
417 			flush(fswritefd, &sblk);
418 		}
419 	}
420 	flush(fswritefd, &cgblk);
421 	if (cgblk.b_un.b_buf != NULL) {
422 		free((void *)cgblk.b_un.b_buf);
423 		cgblk.b_un.b_buf = NULL;
424 	}
425 	unbufinit();
426 	pbp = NULL;
427 	pdirbp = NULL;
428 	if (debug) {
429 		/*
430 		 * Note that we only count cache-related reads.
431 		 * Anything that called fsck_bread() or getblk()
432 		 * directly are explicitly not cached, so they're not
433 		 * included here.
434 		 */
435 		if (totalreads != 0)
436 			percentage = diskreads * 100 / totalreads;
437 		else
438 			percentage = 0;
439 
440 		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
441 		    (longlong_t)diskreads, (longlong_t)totalreads,
442 		    (longlong_t)percentage);
443 	}
444 
445 	(void) close(fsreadfd);
446 	(void) close(fswritefd);
447 	fsreadfd = -1;
448 	fswritefd = -1;
449 }
450 
451 int
452 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
453 {
454 	caddr_t cp;
455 	int i;
456 	int errs;
457 	offset_t offset = ldbtob(blk);
458 	offset_t addr;
459 
460 	/*
461 	 * In our universe, nothing exists before the superblock, so
462 	 * just pretend it's always zeros.  This is the complement of
463 	 * bwrite()'s ignoring write requests into that space.
464 	 */
465 	if (blk < SBLOCK) {
466 		if (debug)
467 			(void) printf(
468 			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
469 			    SBLOCK, (longlong_t)blk);
470 		(void) memset(buf, 0, (size_t)size);
471 		return (1);
472 	}
473 
474 	if (llseek(fd, offset, SEEK_SET) < 0) {
475 		rwerror("SEEK", blk, -1);
476 	}
477 
478 	if ((i = read(fd, buf, size)) == size) {
479 		return (0);
480 	}
481 	rwerror("READ", blk, i);
482 	if (llseek(fd, offset, SEEK_SET) < 0) {
483 		rwerror("SEEK", blk, -1);
484 	}
485 	errs = 0;
486 	(void) memset(buf, 0, (size_t)size);
487 	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
488 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
489 		addr = ldbtob(blk + i);
490 		if (llseek(fd, addr, SEEK_SET) < 0 ||
491 		    read(fd, cp, (int)secsize) < 0) {
492 			iscorrupt = 1;
493 			(void) printf(" %llu", blk + (u_longlong_t)i);
494 			errs++;
495 		}
496 	}
497 	(void) printf("\n");
498 	return (errs);
499 }
500 
501 void
502 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
503 {
504 	int i;
505 	int n;
506 	caddr_t cp;
507 	offset_t offset = ldbtob(blk);
508 	offset_t addr;
509 
510 	if (fd < 0)
511 		return;
512 	if (blk < SBLOCK) {
513 		if (debug)
514 			(void) printf(
515 		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
516 			    (longlong_t)blk, devname);
517 		return;
518 	}
519 	if (llseek(fd, offset, SEEK_SET) < 0) {
520 		rwerror("SEEK", blk, -1);
521 	}
522 	if ((i = write(fd, buf, (int)size)) == size) {
523 		fsmodified = 1;
524 		return;
525 	}
526 	rwerror("WRITE", blk, i);
527 	if (llseek(fd, offset, SEEK_SET) < 0) {
528 		rwerror("SEEK", blk, -1);
529 	}
530 	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
531 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
532 		n = 0;
533 		addr = ldbtob(blk + i);
534 		if (llseek(fd, addr, SEEK_SET) < 0 ||
535 		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
536 			iscorrupt = 1;
537 			(void) printf(" %llu", blk + (u_longlong_t)i);
538 		} else if (n > 0) {
539 			fsmodified = 1;
540 		}
541 
542 	}
543 	(void) printf("\n");
544 }
545 
546 /*
547  * Allocates the specified number of contiguous fragments.
548  */
549 daddr32_t
550 allocblk(int wantedfrags)
551 {
552 	int block, leadfrag, tailfrag;
553 	daddr32_t selected;
554 	size_t size;
555 	struct bufarea *bp;
556 
557 	/*
558 	 * It's arguable whether we should just fail, or instead
559 	 * error out here.  Since we should only ever be asked for
560 	 * a single fragment or an entire block (i.e., sblock.fs_frag),
561 	 * we'll fail out because anything else means somebody
562 	 * changed code without considering all of the ramifications.
563 	 */
564 	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
565 		exitstat = EXERRFATAL;
566 		errexit("allocblk() asked for %d frags.  "
567 		    "Legal range is 1 to %d",
568 		    wantedfrags, sblock.fs_frag);
569 	}
570 
571 	/*
572 	 * For each filesystem block, look at every possible starting
573 	 * offset within the block such that we can get the number of
574 	 * contiguous fragments that we need.  This is a drastically
575 	 * simplified version of the kernel's mapsearch() and alloc*().
576 	 * It's also correspondingly slower.
577 	 */
578 	for (block = 0; block < maxfsblock - sblock.fs_frag;
579 	    block += sblock.fs_frag) {
580 		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
581 		    leadfrag++) {
582 			/*
583 			 * Is first fragment of candidate run available?
584 			 */
585 			if (testbmap(block + leadfrag))
586 				continue;
587 			/*
588 			 * Are the rest of them available?
589 			 */
590 			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
591 				if (testbmap(block + leadfrag + tailfrag))
592 					break;
593 			if (tailfrag < wantedfrags) {
594 				/*
595 				 * No, skip the known-unusable run.
596 				 */
597 				leadfrag += tailfrag;
598 				continue;
599 			}
600 			/*
601 			 * Found what we need, so claim them.
602 			 */
603 			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
604 				setbmap(block + leadfrag + tailfrag);
605 			n_blks += wantedfrags;
606 			size = wantedfrags * sblock.fs_fsize;
607 			selected = block + leadfrag;
608 			bp = getdatablk(selected, size);
609 			(void) memset((void *)bp->b_un.b_buf, 0, size);
610 			dirty(bp);
611 			brelse(bp);
612 			if (debug)
613 				(void) printf(
614 		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
615 				    selected, selected % sblock.fs_bsize,
616 				    wantedfrags, (int)size);
617 			return (selected);
618 		}
619 	}
620 	return (0);
621 }
622 
623 /*
624  * Free a previously allocated block
625  */
626 void
627 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
628 {
629 	struct inodesc idesc;
630 
631 	if (debug)
632 		(void) printf("debug: freeing %d fragments starting at %d\n",
633 		    frags, blkno);
634 
635 	init_inodesc(&idesc);
636 
637 	idesc.id_number = ino;
638 	idesc.id_blkno = blkno;
639 	idesc.id_numfrags = frags;
640 	idesc.id_truncto = -1;
641 
642 	/*
643 	 * Nothing in the return status has any relevance to how
644 	 * we're using pass4check(), so just ignore it.
645 	 */
646 	(void) pass4check(&idesc);
647 }
648 
649 /*
650  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
651  * that the given buffer is at least MAXPATHLEN + 1 characters.
652  */
653 void
654 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
655 {
656 	int len;
657 	caddr_t cp;
658 	struct dinode *dp;
659 	struct inodesc idesc;
660 	struct inoinfo *inp;
661 
662 	if (debug)
663 		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
664 		    curdir, ino);
665 
666 	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
667 		(void) strcpy(namebuf, "?");
668 		return;
669 	}
670 
671 	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
672 		(void) strcpy(namebuf, "/");
673 		return;
674 	}
675 
676 	init_inodesc(&idesc);
677 	idesc.id_type = DATA;
678 	cp = &namebuf[MAXPATHLEN - 1];
679 	*cp = '\0';
680 
681 	/*
682 	 * In the case of extended attributes, our
683 	 * parent won't necessarily be a directory, so just
684 	 * return what we've found with a prefix indicating
685 	 * that it's an XATTR.  Presumably our caller will
686 	 * know what's going on and do something useful, like
687 	 * work out the path of the parent and then combine
688 	 * the two names.
689 	 *
690 	 * Can't use strcpy(), etc, because we've probably
691 	 * already got some name information in the buffer and
692 	 * the usual trailing \0 would lose it.
693 	 */
694 	dp = ginode(curdir);
695 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
696 		idesc.id_number = curdir;
697 		idesc.id_parent = ino;
698 		idesc.id_func = findname;
699 		idesc.id_name = namebuf;
700 		idesc.id_fix = NOFIX;
701 		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
702 			*cp-- = '?';
703 		}
704 
705 		len = sizeof (XATTR_DIR_NAME) - 1;
706 		cp -= len;
707 		(void) memmove(cp, XATTR_DIR_NAME, len);
708 		goto attrname;
709 	}
710 
711 	/*
712 	 * If curdir == ino, need to get a handle on .. so we
713 	 * can search it for ino's name.  Otherwise, just search
714 	 * the given directory for ino.  Repeat until out of space
715 	 * or a full path has been built.
716 	 */
717 	if (curdir != ino) {
718 		idesc.id_parent = curdir;
719 		goto namelookup;
720 	}
721 	while (ino != UFSROOTINO && ino != 0) {
722 		idesc.id_number = ino;
723 		idesc.id_func = findino;
724 		idesc.id_name = "..";
725 		idesc.id_fix = NOFIX;
726 		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
727 			inp = getinoinfo(ino);
728 			if ((inp == NULL) || (inp->i_parent == 0)) {
729 				break;
730 			}
731 			idesc.id_parent = inp->i_parent;
732 		}
733 
734 		/*
735 		 * To get this far, id_parent must have the inode
736 		 * number for `..' in it.  By definition, that's got
737 		 * to be a directory, so search it for the inode of
738 		 * interest.
739 		 */
740 namelookup:
741 		idesc.id_number = idesc.id_parent;
742 		idesc.id_parent = ino;
743 		idesc.id_func = findname;
744 		idesc.id_name = namebuf;
745 		idesc.id_fix = NOFIX;
746 		if ((ckinode(ginode(idesc.id_number),
747 		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
748 			break;
749 		}
750 		/*
751 		 * Prepend to what we've accumulated so far.  If
752 		 * there's not enough room for even one more path element
753 		 * (of the worst-case length), then bail out.
754 		 */
755 		len = strlen(namebuf);
756 		cp -= len;
757 		if (cp < &namebuf[MAXNAMLEN])
758 			break;
759 		(void) memmove(cp, namebuf, len);
760 		*--cp = '/';
761 
762 		/*
763 		 * Corner case for a looped-to-itself directory.
764 		 */
765 		if (ino == idesc.id_number)
766 			break;
767 
768 		/*
769 		 * Climb one level of the hierarchy.  In other words,
770 		 * the current .. becomes the inode to search for and
771 		 * its parent becomes the directory to search in.
772 		 */
773 		ino = idesc.id_number;
774 	}
775 
776 	/*
777 	 * If we hit a discontinuity in the hierarchy, indicate it by
778 	 * prefixing the path so far with `?'.  Otherwise, the first
779 	 * character will be `/' as a side-effect of the *--cp above.
780 	 *
781 	 * The special case is to handle the situation where we're
782 	 * trying to look something up in UFSROOTINO, but didn't find
783 	 * it.
784 	 */
785 	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
786 		if (cp > namebuf)
787 			cp--;
788 		*cp = '?';
789 	}
790 
791 	/*
792 	 * The invariants being used for buffer integrity are:
793 	 * - namebuf[] is terminated with \0 before anything else
794 	 * - cp is always <= the last element of namebuf[]
795 	 * - the new path element is always stored at the
796 	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
797 	 *   characters
798 	 * - cp is is decremented by the number of characters in
799 	 *   the new path element
800 	 * - if, after the above accounting for the new element's
801 	 *   size, there is no longer enough room at the beginning of
802 	 *   namebuf[] for a full-sized path element and a slash,
803 	 *   terminate the loop.  cp is in the range
804 	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
805 	 */
806 attrname:
807 	/* LINTED per the above discussion */
808 	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
809 }
810 
811 /* ARGSUSED */
812 void
813 catch(int dummy)
814 {
815 	ckfini();
816 	exit(EXSIGNAL);
817 }
818 
819 /*
820  * When preening, allow a single quit to signal
821  * a special exit after filesystem checks complete
822  * so that reboot sequence may be interrupted.
823  */
824 /* ARGSUSED */
825 void
826 catchquit(int dummy)
827 {
828 	(void) printf("returning to single-user after filesystem check\n");
829 	interrupted = 1;
830 	(void) signal(SIGQUIT, SIG_DFL);
831 }
832 
833 
834 /*
835  * determine whether an inode should be fixed.
836  */
837 NOTE(PRINTFLIKE(2))
838 int
839 dofix(struct inodesc *idesc, caddr_t msg, ...)
840 {
841 	int rval = 0;
842 	va_list ap;
843 
844 	va_start(ap, msg);
845 
846 	switch (idesc->id_fix) {
847 
848 	case DONTKNOW:
849 		if (idesc->id_type == DATA)
850 			vdirerror(idesc->id_number, msg, ap);
851 		else
852 			vpwarn(msg, ap);
853 		if (preen) {
854 			idesc->id_fix = FIX;
855 			rval = ALTERED;
856 			break;
857 		}
858 		if (reply("SALVAGE") == 0) {
859 			idesc->id_fix = NOFIX;
860 			break;
861 		}
862 		idesc->id_fix = FIX;
863 		rval = ALTERED;
864 		break;
865 
866 	case FIX:
867 		rval = ALTERED;
868 		break;
869 
870 	case NOFIX:
871 		break;
872 
873 	default:
874 		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
875 	}
876 
877 	va_end(ap);
878 	return (rval);
879 }
880 
881 NOTE(PRINTFLIKE(1))
882 void
883 errexit(caddr_t fmt, ...)
884 {
885 	va_list ap;
886 
887 	va_start(ap, fmt);
888 	verrexit(fmt, ap);
889 	/* NOTREACHED */
890 }
891 
892 NOTE(PRINTFLIKE(1))
893 static void
894 verrexit(caddr_t fmt, va_list ap)
895 {
896 	static int recursing = 0;
897 
898 	if (!recursing) {
899 		recursing = 1;
900 		if (errorlocked || iscorrupt) {
901 			if (havesb && fswritefd >= 0) {
902 				sblock.fs_clean = FSBAD;
903 				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
904 				sblock.fs_state = -sblock.fs_state;
905 				sbdirty();
906 				write_altsb(fswritefd);
907 				flush(fswritefd, &sblk);
908 			}
909 		}
910 		ckfini();
911 		recursing = 0;
912 	}
913 	(void) vprintf(fmt, ap);
914 	if (fmt[strlen(fmt) - 1] != '\n')
915 		(void) putchar('\n');
916 	exit((exitstat != 0) ? exitstat : EXERRFATAL);
917 }
918 
919 /*
920  * An unexpected inconsistency occured.
921  * Die if preening, otherwise just print message and continue.
922  */
923 NOTE(PRINTFLIKE(1))
924 void
925 pfatal(caddr_t fmt, ...)
926 {
927 	va_list ap;
928 
929 	va_start(ap, fmt);
930 	vpfatal(fmt, ap);
931 	va_end(ap);
932 }
933 
934 NOTE(PRINTFLIKE(1))
935 static void
936 vpfatal(caddr_t fmt, va_list ap)
937 {
938 	if (preen) {
939 		if (*fmt != '\0') {
940 			(void) printf("%s: ", devname);
941 			(void) vprintf(fmt, ap);
942 			(void) printf("\n");
943 		}
944 		(void) printf(
945 		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
946 		    devname);
947 		if (havesb && fswritefd >= 0) {
948 			sblock.fs_clean = FSBAD;
949 			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
950 			sbdirty();
951 			flush(fswritefd, &sblk);
952 		}
953 		/*
954 		 * We're exiting, it doesn't really matter that our
955 		 * caller doesn't get to call va_end().
956 		 */
957 		if (exitstat == 0)
958 			exitstat = EXFNDERRS;
959 		exit(exitstat);
960 	}
961 	if (*fmt != '\0') {
962 		(void) vprintf(fmt, ap);
963 	}
964 }
965 
966 /*
967  * Pwarn just prints a message when not preening,
968  * or a warning (preceded by filename) when preening.
969  */
970 NOTE(PRINTFLIKE(1))
971 void
972 pwarn(caddr_t fmt, ...)
973 {
974 	va_list ap;
975 
976 	va_start(ap, fmt);
977 	vpwarn(fmt, ap);
978 	va_end(ap);
979 }
980 
981 NOTE(PRINTFLIKE(1))
982 static void
983 vpwarn(caddr_t fmt, va_list ap)
984 {
985 	if (*fmt != '\0') {
986 		if (preen)
987 			(void) printf("%s: ", devname);
988 		(void) vprintf(fmt, ap);
989 	}
990 }
991 
992 /*
993  * Like sprintf(), except the buffer is dynamically allocated
994  * and returned, instead of being passed in.  A pointer to the
995  * buffer is stored in *RET, and FMT is the usual format string.
996  * The number of characters in *RET (excluding the trailing \0,
997  * to be consistent with the other *printf() routines) is returned.
998  *
999  * Solaris doesn't have asprintf(3C) yet, unfortunately.
1000  */
1001 NOTE(PRINTFLIKE(2))
1002 int
1003 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1004 {
1005 	int len;
1006 	caddr_t buffer;
1007 	va_list ap;
1008 
1009 	va_start(ap, fmt);
1010 	len = vsnprintf(NULL, 0, fmt, ap);
1011 	va_end(ap);
1012 
1013 	buffer = malloc((len + 1) * sizeof (char));
1014 	if (buffer == NULL) {
1015 		errexit("Out of memory in asprintf\n");
1016 		/* NOTREACHED */
1017 	}
1018 
1019 	va_start(ap, fmt);
1020 	(void) vsnprintf(buffer, len + 1, fmt, ap);
1021 	va_end(ap);
1022 
1023 	*ret = buffer;
1024 	return (len);
1025 }
1026 
1027 /*
1028  * So we can take advantage of kernel routines in ufs_subr.c.
1029  */
1030 /* PRINTFLIKE2 */
1031 void
1032 cmn_err(int level, caddr_t fmt, ...)
1033 {
1034 	va_list ap;
1035 
1036 	va_start(ap, fmt);
1037 	if (level == CE_PANIC) {
1038 		(void) printf("INTERNAL INCONSISTENCY:");
1039 		verrexit(fmt, ap);
1040 	} else {
1041 		(void) vprintf(fmt, ap);
1042 	}
1043 	va_end(ap);
1044 }
1045 
1046 /*
1047  * Check to see if unraw version of name is already mounted.
1048  * Updates devstr with the device name if devstr is not NULL
1049  * and str_size is positive.
1050  */
1051 int
1052 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1053 {
1054 	int found;
1055 	struct mnttab *mntent;
1056 
1057 	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1058 	if (mntent == NULL)
1059 		return (M_NOMNT);
1060 
1061 	/*
1062 	 * It's mounted.  With or without write access?
1063 	 */
1064 	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1065 		found = M_RO;	/* mounted as RO */
1066 	else
1067 		found = M_RW; 	/* mounted as R/W */
1068 
1069 	if (mount_point == NULL) {
1070 		mount_point = strdup(mntent->mnt_mountp);
1071 		if (mount_point == NULL) {
1072 			errexit("fsck: memory allocation failure: %s",
1073 			    strerror(errno));
1074 			/* NOTREACHED */
1075 		}
1076 
1077 		if (devstr != NULL && str_size > 0)
1078 			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1079 	}
1080 
1081 	return (found);
1082 }
1083 
1084 /*
1085  * Check to see if name corresponds to an entry in vfstab, and that the entry
1086  * does not have option ro.
1087  */
1088 int
1089 writable(caddr_t name)
1090 {
1091 	int rw = 1;
1092 	struct vfstab vfsbuf, vfskey;
1093 	FILE *vfstab;
1094 
1095 	vfstab = fopen(VFSTAB, "r");
1096 	if (vfstab == NULL) {
1097 		(void) printf("can't open %s\n", VFSTAB);
1098 		return (1);
1099 	}
1100 	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1101 	vfsnull(&vfskey);
1102 	vfskey.vfs_special = unrawname(name);
1103 	vfskey.vfs_fstype = MNTTYPE_UFS;
1104 	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1105 	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1106 		rw = 0;
1107 	}
1108 	(void) fclose(vfstab);
1109 	return (rw);
1110 }
1111 
1112 /*
1113  * debugclean
1114  */
1115 static void
1116 debugclean(void)
1117 {
1118 	if (!debug)
1119 		return;
1120 
1121 	if ((iscorrupt == 0) && (isdirty == 0))
1122 		return;
1123 
1124 	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1125 	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1126 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1127 		return;
1128 
1129 	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1130 	    sblock.fs_clean == FSSTABLE ? "stable" :
1131 	    sblock.fs_clean == FSLOG ? "logging" :
1132 	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1133 	    devname);
1134 }
1135 
1136 /*
1137  * updateclean
1138  *	Carefully and transparently update the clean flag.
1139  *
1140  * `iscorrupt' has to be in its final state before this is called.
1141  */
1142 int
1143 updateclean(void)
1144 {
1145 	int freedlog = 0;
1146 	struct bufarea cleanbuf;
1147 	size_t size;
1148 	ssize_t io_res;
1149 	diskaddr_t bno;
1150 	char fsclean;
1151 	int fsreclaim;
1152 	char fsflags;
1153 	int flags_ok = 1;
1154 	daddr32_t fslogbno;
1155 	offset_t sblkoff;
1156 	time_t t;
1157 
1158 	/*
1159 	 * debug stuff
1160 	 */
1161 	debugclean();
1162 
1163 	/*
1164 	 * set fsclean to its appropriate value
1165 	 */
1166 	fslogbno = sblock.fs_logbno;
1167 	fsclean = sblock.fs_clean;
1168 	fsreclaim = sblock.fs_reclaim;
1169 	fsflags = sblock.fs_flags;
1170 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1171 		fsclean = FSACTIVE;
1172 	}
1173 	/*
1174 	 * If ufs log is not okay, note that we need to clear it.
1175 	 */
1176 	examinelog(NULL);
1177 	if (fslogbno && !(islog && islogok)) {
1178 		fsclean = FSACTIVE;
1179 		fslogbno = 0;
1180 	}
1181 
1182 	/*
1183 	 * if necessary, update fs_clean and fs_state
1184 	 */
1185 	switch (fsclean) {
1186 
1187 	case FSACTIVE:
1188 		if (!iscorrupt) {
1189 			fsclean = FSSTABLE;
1190 			fsreclaim = 0;
1191 		}
1192 		break;
1193 
1194 	case FSCLEAN:
1195 	case FSSTABLE:
1196 		if (iscorrupt) {
1197 			fsclean = FSACTIVE;
1198 		} else {
1199 			fsreclaim = 0;
1200 		}
1201 		break;
1202 
1203 	case FSLOG:
1204 		if (iscorrupt) {
1205 			fsclean = FSACTIVE;
1206 		} else if (!islog || fslogbno == 0) {
1207 			fsclean = FSSTABLE;
1208 			fsreclaim = 0;
1209 		} else if (fflag) {
1210 			fsreclaim = 0;
1211 		}
1212 		break;
1213 
1214 	case FSFIX:
1215 		fsclean = FSBAD;
1216 		if (errorlocked && !iscorrupt) {
1217 			fsclean = islog ? FSLOG : FSCLEAN;
1218 		}
1219 		break;
1220 
1221 	default:
1222 		if (iscorrupt) {
1223 			fsclean = FSACTIVE;
1224 		} else {
1225 			fsclean = FSSTABLE;
1226 			fsreclaim = 0;
1227 		}
1228 	}
1229 
1230 	if (largefile_count > 0)
1231 		fsflags |= FSLARGEFILES;
1232 	else
1233 		fsflags &= ~FSLARGEFILES;
1234 
1235 	/*
1236 	 * There can be two discrepencies here.  A) The superblock
1237 	 * shows no largefiles but we found some while scanning.
1238 	 * B) The superblock indicates the presence of largefiles,
1239 	 * but none are present.  Note that if preening, the superblock
1240 	 * is silently corrected.
1241 	 */
1242 	if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1243 	    (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1244 		flags_ok = 0;
1245 
1246 	if (debug)
1247 		(void) printf(
1248 		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1249 		    largefile_count, sblock.fs_flags, flags_ok);
1250 
1251 	/*
1252 	 * If fs is unchanged, do nothing.
1253 	 */
1254 	if ((!isdirty) && (flags_ok) &&
1255 	    (fslogbno == sblock.fs_logbno) &&
1256 	    (sblock.fs_clean == fsclean) &&
1257 	    (sblock.fs_reclaim == fsreclaim) &&
1258 	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1259 		if (errorlocked) {
1260 			if (!do_errorlock(LOCKFS_ULOCK))
1261 				pwarn(
1262 		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1263 		}
1264 		return (freedlog);
1265 	}
1266 
1267 	/*
1268 	 * if user allows, update superblock state
1269 	 */
1270 	if (debug) {
1271 		(void) printf(
1272 	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1273 		    sblock.fs_flags, sblock.fs_logbno,
1274 		    sblock.fs_clean, sblock.fs_reclaim,
1275 		    sblock.fs_state + sblock.fs_time);
1276 		(void) printf(
1277 	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1278 		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1279 	}
1280 	if (!isdirty && !preen && !rerun &&
1281 	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1282 		return (freedlog);
1283 
1284 	(void) time(&t);
1285 	sblock.fs_time = (time32_t)t;
1286 	if (debug)
1287 		printclean();
1288 
1289 	if (sblock.fs_logbno != fslogbno) {
1290 		examinelog(&freelogblk);
1291 		freedlog++;
1292 	}
1293 
1294 	sblock.fs_logbno = fslogbno;
1295 	sblock.fs_clean = fsclean;
1296 	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1297 	sblock.fs_reclaim = fsreclaim;
1298 	sblock.fs_flags = fsflags;
1299 
1300 	/*
1301 	 * if superblock can't be written, return
1302 	 */
1303 	if (fswritefd < 0)
1304 		return (freedlog);
1305 
1306 	/*
1307 	 * Read private copy of superblock, update clean flag, and write it.
1308 	 */
1309 	bno  = sblk.b_bno;
1310 	size = sblk.b_size;
1311 
1312 	sblkoff = ldbtob(bno);
1313 
1314 	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1315 		errexit("out of memory");
1316 	if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1317 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1318 		    (longlong_t)bno, strerror(errno));
1319 		goto out;
1320 	}
1321 
1322 	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1323 		report_io_prob("READ FROM", bno, size, io_res);
1324 		goto out;
1325 	}
1326 
1327 	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1328 	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1329 	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1330 	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1331 	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1332 	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1333 
1334 	if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1335 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1336 		    (longlong_t)bno, strerror(errno));
1337 		goto out;
1338 	}
1339 
1340 	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1341 		report_io_prob("WRITE TO", bno, size, io_res);
1342 		goto out;
1343 	}
1344 
1345 	/*
1346 	 * 1208040
1347 	 * If we had to use -b to grab an alternate superblock, then we
1348 	 * likely had to do so because of unacceptable differences between
1349 	 * the main and alternate superblocks.  So, we had better update
1350 	 * the alternate superblock as well, or we'll just fail again
1351 	 * the next time we attempt to run fsck!
1352 	 */
1353 	if (bflag != 0) {
1354 		write_altsb(fswritefd);
1355 	}
1356 
1357 	if (errorlocked) {
1358 		if (!do_errorlock(LOCKFS_ULOCK))
1359 			pwarn(
1360 		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1361 	}
1362 
1363 out:
1364 	if (cleanbuf.b_un.b_buf != NULL) {
1365 		free((void *)cleanbuf.b_un.b_buf);
1366 	}
1367 
1368 	return (freedlog);
1369 }
1370 
1371 static void
1372 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1373 {
1374 	if (failure < 0)
1375 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1376 		    what, (int)bno, strerror(errno));
1377 	else if (failure == 0)
1378 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1379 		    what, (int)bno);
1380 	else
1381 		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1382 		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1383 }
1384 
1385 /*
1386  * print out clean info
1387  */
1388 void
1389 printclean(void)
1390 {
1391 	caddr_t s;
1392 
1393 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1394 		s = "unknown";
1395 	else
1396 		switch (sblock.fs_clean) {
1397 
1398 		case FSACTIVE:
1399 			s = "active";
1400 			break;
1401 
1402 		case FSCLEAN:
1403 			s = "clean";
1404 			break;
1405 
1406 		case FSSTABLE:
1407 			s = "stable";
1408 			break;
1409 
1410 		case FSLOG:
1411 			s = "logging";
1412 			break;
1413 
1414 		case FSBAD:
1415 			s = "is bad";
1416 			break;
1417 
1418 		case FSFIX:
1419 			s = "being fixed";
1420 			break;
1421 
1422 		default:
1423 			s = "unknown";
1424 		}
1425 
1426 	if (preen)
1427 		pwarn("is %s.\n", s);
1428 	else
1429 		(void) printf("** %s is %s.\n", devname, s);
1430 }
1431 
1432 int
1433 is_errorlocked(caddr_t fs)
1434 {
1435 	int		retval;
1436 	struct stat64	statb;
1437 	caddr_t		mountp;
1438 	struct mnttab	*mntent;
1439 
1440 	retval = 0;
1441 
1442 	if (!fs)
1443 		return (0);
1444 
1445 	if (stat64(fs, &statb) < 0)
1446 		return (0);
1447 
1448 	if (S_ISDIR(statb.st_mode)) {
1449 		mountp = fs;
1450 	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1451 		mntent = search_mnttab(NULL, fs, NULL, 0);
1452 		if (mntent == NULL)
1453 			return (0);
1454 		mountp = mntent->mnt_mountp;
1455 		if (mountp == NULL) /* theoretically a can't-happen */
1456 			return (0);
1457 	} else {
1458 		return (0);
1459 	}
1460 
1461 	/*
1462 	 * From here on, must `goto out' to avoid memory leakage.
1463 	 */
1464 
1465 	if (elock_combuf == NULL)
1466 		elock_combuf =
1467 		    (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1468 	else
1469 		elock_combuf =
1470 		    (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1471 
1472 	if (elock_combuf == NULL)
1473 		goto out;
1474 
1475 	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1476 
1477 	if (elock_mountp != NULL) {
1478 		free(elock_mountp);
1479 	}
1480 
1481 	elock_mountp = strdup(mountp);
1482 	if (elock_mountp == NULL)
1483 		goto out;
1484 
1485 	if (mountfd < 0) {
1486 		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1487 			goto out;
1488 	}
1489 
1490 	if (lfp == NULL) {
1491 		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1492 		if (lfp == NULL)
1493 			goto out;
1494 		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1495 	}
1496 
1497 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1498 	lfp->lf_comment = elock_combuf;
1499 
1500 	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1501 		goto out;
1502 
1503 	/*
1504 	 * lint believes that the ioctl() (or any other function
1505 	 * taking lfp as an arg) could free lfp.  This is not the
1506 	 * case, however.
1507 	 */
1508 	retval = LOCKFS_IS_ELOCK(lfp);
1509 
1510 out:
1511 	return (retval);
1512 }
1513 
1514 /*
1515  * Given a name which is known to be a directory, see if it appears
1516  * in the vfstab.  If so, return the entry's block (special) device
1517  * field via devstr.
1518  */
1519 int
1520 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1521 {
1522 	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1523 }
1524 
1525 /*
1526  * Given a name which is known to be a directory, see if it appears
1527  * in the mnttab.  If so, return the entry's block (special) device
1528  * field via devstr.
1529  */
1530 int
1531 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1532 {
1533 	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1534 }
1535 
1536 /*
1537  * Search for mount point and/or special device in the given file.
1538  * The first matching entry is returned.
1539  *
1540  * If an entry is found and str_size is greater than zero, then
1541  * up to size_str bytes of the special device name from the entry
1542  * are copied to devstr.
1543  */
1544 
1545 #define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1546 			st_nuller, st_init, st_searcher) \
1547 	{ \
1548 		FILE *fp; \
1549 		struct st_type *retval = NULL; \
1550 		struct st_type key; \
1551 		static struct st_type buffer; \
1552 		\
1553 		/* LINTED ``assigned value never used'' */ \
1554 		st_nuller(&key); \
1555 		key.st_mount = mountp; \
1556 		key.st_special = special; \
1557 		st_init; \
1558 		\
1559 		if ((fp = fopen(st_file, "r")) == NULL) \
1560 			return (NULL); \
1561 		\
1562 		if (st_searcher(fp, &buffer, &key) == 0) { \
1563 			retval = &buffer; \
1564 			if (devstr != NULL && str_size > 0 && \
1565 			    buffer.st_special != NULL) { \
1566 				(void) strlcpy(devstr, buffer.st_special, \
1567 				    str_size); \
1568 			} \
1569 		} \
1570 		(void) fclose(fp); \
1571 		return (retval); \
1572 	}
1573 
1574 static struct vfstab *
1575 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1576 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1577 		(retval = retval), getvfsany)
1578 
1579 static struct mnttab *
1580 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1581 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1582 		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1583 
1584 int
1585 do_errorlock(int lock_type)
1586 {
1587 	caddr_t	   buf;
1588 	time_t	   now;
1589 	struct tm *local;
1590 	int	   rc;
1591 
1592 	if (elock_combuf == NULL)
1593 		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1594 		    elock_mountp ? elock_mountp : "<null>",
1595 		    lock_type);
1596 
1597 	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1598 	    NULL) {
1599 		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1600 	}
1601 	if (lfp == NULL) {
1602 		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1603 		    elock_mountp, lock_type);
1604 	}
1605 
1606 	(void) memmove((void *)buf, (void *)elock_combuf,
1607 	    LOCKFS_MAXCOMMENTLEN-1);
1608 
1609 	switch (lock_type) {
1610 	case LOCKFS_ELOCK:
1611 		/*
1612 		 * Note that if it is error-locked, we won't get an
1613 		 * error back if we try to error-lock it again.
1614 		 */
1615 		if (time(&now) != (time_t)-1) {
1616 			if ((local = localtime(&now)) != NULL)
1617 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1618 		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1619 				    elock_combuf, (int)pid,
1620 				    local->tm_mon + 1, local->tm_mday,
1621 				    (local->tm_year % 100), local->tm_hour,
1622 				    local->tm_min, local->tm_sec);
1623 			else
1624 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1625 				    "%s [fsck pid %d", elock_combuf, pid);
1626 
1627 		} else {
1628 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1629 			    "%s [fsck pid %d", elock_combuf, pid);
1630 		}
1631 		break;
1632 
1633 	case LOCKFS_ULOCK:
1634 		if (time(&now) != (time_t)-1) {
1635 			if ((local = localtime(&now)) != NULL) {
1636 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1637 				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1638 				    elock_combuf,
1639 				    local->tm_mon + 1, local->tm_mday,
1640 				    (local->tm_year % 100), local->tm_hour,
1641 				    local->tm_min, local->tm_sec);
1642 			} else {
1643 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1644 				    "%s]", elock_combuf);
1645 			}
1646 		} else {
1647 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1648 			    "%s]", elock_combuf);
1649 		}
1650 		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1651 			pwarn("do_errorlock: unlock failed: %s\n",
1652 			    strerror(errno));
1653 			goto out;
1654 		}
1655 		break;
1656 
1657 	default:
1658 		break;
1659 	}
1660 
1661 	(void) memmove((void *)elock_combuf, (void *)buf,
1662 	    LOCKFS_MAXCOMMENTLEN - 1);
1663 
1664 	lfp->lf_lock = lock_type;
1665 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1666 	lfp->lf_comment = elock_combuf;
1667 	lfp->lf_flags = 0;
1668 	errno = 0;
1669 
1670 	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1671 		if (errno == EINVAL) {
1672 			pwarn("Another fsck active?\n");
1673 			iscorrupt = 0;	/* don't go away mad, just go away */
1674 		} else {
1675 			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1676 			    lock_type, elock_combuf, strerror(errno));
1677 		}
1678 	}
1679 out:
1680 	if (buf != NULL) {
1681 		free((void *)buf);
1682 	}
1683 
1684 	return (rc != -1);
1685 }
1686 
1687 /*
1688  * Shadow inode support.  To register a shadow with a client is to note
1689  * that an inode (the client) refers to the shadow.
1690  */
1691 
1692 static struct shadowclients *
1693 newshadowclient(struct shadowclients *prev)
1694 {
1695 	struct shadowclients *rc;
1696 
1697 	rc = (struct shadowclients *)malloc(sizeof (*rc));
1698 	if (rc == NULL)
1699 		errexit("newshadowclient: cannot malloc shadow client");
1700 	rc->next = prev;
1701 	rc->nclients = 0;
1702 
1703 	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1704 	    maxshadowclients);
1705 	if (rc->client == NULL)
1706 		errexit("newshadowclient: cannot malloc client array");
1707 	return (rc);
1708 }
1709 
1710 void
1711 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1712 	struct shadowclientinfo **info)
1713 {
1714 	struct shadowclientinfo *sci;
1715 	struct shadowclients *scc;
1716 
1717 	/*
1718 	 * Already have a record for this shadow?
1719 	 */
1720 	for (sci = *info; sci != NULL; sci = sci->next)
1721 		if (sci->shadow == shadow)
1722 			break;
1723 	if (sci == NULL) {
1724 		/*
1725 		 * It's a new shadow, add it to the list
1726 		 */
1727 		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1728 		if (sci == NULL)
1729 			errexit("registershadowclient: cannot malloc");
1730 		sci->next = *info;
1731 		*info = sci;
1732 		sci->shadow = shadow;
1733 		sci->totalClients = 0;
1734 		sci->clients = newshadowclient(NULL);
1735 	}
1736 
1737 	sci->totalClients++;
1738 	scc = sci->clients;
1739 	if (scc->nclients >= maxshadowclients) {
1740 		scc = newshadowclient(sci->clients);
1741 		sci->clients = scc;
1742 	}
1743 
1744 	scc->client[scc->nclients++] = client;
1745 }
1746 
1747 /*
1748  * Locate and discard a shadow.
1749  */
1750 void
1751 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1752 {
1753 	struct shadowclientinfo *sci, *prev;
1754 
1755 	/*
1756 	 * Do we have a record for this shadow?
1757 	 */
1758 	prev = NULL;
1759 	for (sci = *info; sci != NULL; sci = sci->next) {
1760 		if (sci->shadow == shadow)
1761 			break;
1762 		prev = sci;
1763 	}
1764 
1765 	if (sci != NULL) {
1766 		/*
1767 		 * First, pull it off the list, since we know there
1768 		 * shouldn't be any future references to this one.
1769 		 */
1770 		if (prev == NULL)
1771 			*info = sci->next;
1772 		else
1773 			prev->next = sci->next;
1774 		deshadow(sci, clearattrref);
1775 	}
1776 }
1777 
1778 /*
1779  * Discard all memory used to track clients of a shadow.
1780  */
1781 void
1782 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1783 {
1784 	struct shadowclients *clients, *discard;
1785 	int idx;
1786 
1787 	clients = sci->clients;
1788 	while (clients != NULL) {
1789 		discard = clients;
1790 		clients = clients->next;
1791 		if (discard->client != NULL) {
1792 			if (cb != NULL) {
1793 				for (idx = 0; idx < discard->nclients; idx++)
1794 					(*cb)(discard->client[idx]);
1795 			}
1796 			free((void *)discard->client);
1797 		}
1798 		free((void *)discard);
1799 	}
1800 
1801 	free((void *)sci);
1802 }
1803 
1804 /*
1805  * Allocate more buffer as need arises but allocate one at a time.
1806  * This is done to make sure that fsck does not exit with error if it
1807  * needs more buffer to complete its task.
1808  */
1809 static struct bufarea *
1810 alloc_bufarea(void)
1811 {
1812 	struct bufarea *newbp;
1813 	caddr_t bufp;
1814 
1815 	bufp = malloc((unsigned int)sblock.fs_bsize);
1816 	if (bufp == NULL)
1817 		return (NULL);
1818 
1819 	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1820 	if (newbp == NULL) {
1821 		free((void *)bufp);
1822 		return (NULL);
1823 	}
1824 
1825 	initbarea(newbp);
1826 	newbp->b_un.b_buf = bufp;
1827 	newbp->b_prev = &bufhead;
1828 	newbp->b_next = bufhead.b_next;
1829 	bufhead.b_next->b_prev = newbp;
1830 	bufhead.b_next = newbp;
1831 	bufhead.b_size++;
1832 	return (newbp);
1833 }
1834 
1835 /*
1836  * We length-limit in both unrawname() and rawname() to avoid
1837  * overflowing our arrays or those of our naive, trusting callers.
1838  */
1839 
1840 caddr_t
1841 unrawname(caddr_t name)
1842 {
1843 	caddr_t dp;
1844 	static char fullname[MAXPATHLEN + 1];
1845 
1846 	if ((dp = getfullblkname(name)) == NULL)
1847 		return ("");
1848 
1849 	(void) strlcpy(fullname, dp, sizeof (fullname));
1850 	/*
1851 	 * Not reporting under debug, as the allocation isn't
1852 	 * reported by getfullblkname.  The idea is that we
1853 	 * produce balanced alloc/free instances.
1854 	 */
1855 	free(dp);
1856 
1857 	return (fullname);
1858 }
1859 
1860 caddr_t
1861 rawname(caddr_t name)
1862 {
1863 	caddr_t dp;
1864 	static char fullname[MAXPATHLEN + 1];
1865 
1866 	if ((dp = getfullrawname(name)) == NULL)
1867 		return ("");
1868 
1869 	(void) strlcpy(fullname, dp, sizeof (fullname));
1870 	/*
1871 	 * Not reporting under debug, as the allocation isn't
1872 	 * reported by getfullblkname.  The idea is that we
1873 	 * produce balanced alloc/free instances.
1874 	 */
1875 	free(dp);
1876 
1877 	return (fullname);
1878 }
1879 
1880 /*
1881  * Make sure that a cg header looks at least moderately reasonable.
1882  * We want to be able to trust the contents enough to be able to use
1883  * the standard accessor macros.  So, besides looking at the obvious
1884  * such as the magic number, we verify that the offset field values
1885  * are properly aligned and not too big or small.
1886  *
1887  * Returns a NULL pointer if the cg is sane enough for our needs, else
1888  * a dynamically-allocated string describing all of its faults.
1889  */
1890 #define	Append_Error(full, full_len, addition, addition_len) \
1891 	if (full == NULL) { \
1892 		full = addition; \
1893 		full_len = addition_len; \
1894 	} else { \
1895 		/* lint doesn't think realloc() understands NULLs */ \
1896 		full = realloc(full, full_len + addition_len + 1); \
1897 		if (full == NULL) { \
1898 			errexit("Out of memory in cg_sanity"); \
1899 			/* NOTREACHED */ \
1900 		} \
1901 		(void) strcpy(full + full_len, addition); \
1902 		full_len += addition_len; \
1903 		free(addition); \
1904 	}
1905 
1906 caddr_t
1907 cg_sanity(struct cg *cgp, int cgno)
1908 {
1909 	caddr_t full_err;
1910 	caddr_t this_err = NULL;
1911 	int full_len, this_len;
1912 	daddr32_t ndblk;
1913 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1914 	daddr32_t exp_freeoff, exp_nextfreeoff;
1915 
1916 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1917 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1918 
1919 	full_err = NULL;
1920 	full_len = 0;
1921 
1922 	if (!cg_chkmagic(cgp)) {
1923 		this_len = fsck_asprintf(&this_err,
1924 		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1925 		    cgp->cg_magic, CG_MAGIC);
1926 		Append_Error(full_err, full_len, this_err, this_len);
1927 	}
1928 
1929 	if (cgp->cg_cgx != cgno) {
1930 		this_len = fsck_asprintf(&this_err,
1931 		    "WRONG CG NUMBER (%d should be %d)\n",
1932 		    cgp->cg_cgx, cgno);
1933 		Append_Error(full_err, full_len, this_err, this_len);
1934 	}
1935 
1936 	if ((cgp->cg_btotoff & 3) != 0) {
1937 		this_len = fsck_asprintf(&this_err,
1938 		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1939 		    cgp->cg_btotoff);
1940 		Append_Error(full_err, full_len, this_err, this_len);
1941 	}
1942 
1943 	if ((cgp->cg_boff & 1) != 0) {
1944 		this_len = fsck_asprintf(&this_err,
1945 	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1946 		    cgp->cg_boff);
1947 		Append_Error(full_err, full_len, this_err, this_len);
1948 	}
1949 
1950 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1951 		if (cgp->cg_ncyl < 1) {
1952 			this_len = fsck_asprintf(&this_err,
1953 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1954 			    cgp->cg_ncyl);
1955 		} else {
1956 			this_len = fsck_asprintf(&this_err,
1957 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1958 			    cgp->cg_ncyl, sblock.fs_cpg);
1959 		}
1960 		Append_Error(full_err, full_len, this_err, this_len);
1961 	}
1962 
1963 	if (cgp->cg_niblk != sblock.fs_ipg) {
1964 		this_len = fsck_asprintf(&this_err,
1965 		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1966 		    cgp->cg_niblk, sblock.fs_ipg);
1967 		Append_Error(full_err, full_len, this_err, this_len);
1968 	}
1969 
1970 	if (cgp->cg_ndblk != ndblk) {
1971 		this_len = fsck_asprintf(&this_err,
1972 	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1973 		    cgp->cg_ndblk, ndblk);
1974 		Append_Error(full_err, full_len, this_err, this_len);
1975 	}
1976 
1977 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1978 		this_len = fsck_asprintf(&this_err,
1979 		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1980 		    "(%d should be at least 0 and less than %d)\n",
1981 		    cgp->cg_rotor, ndblk);
1982 		Append_Error(full_err, full_len, this_err, this_len);
1983 	}
1984 
1985 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1986 		this_len = fsck_asprintf(&this_err,
1987 		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1988 		    "(%d should be at least 0 and less than %d)\n",
1989 		    cgp->cg_frotor, ndblk);
1990 		Append_Error(full_err, full_len, this_err, this_len);
1991 	}
1992 
1993 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1994 		this_len = fsck_asprintf(&this_err,
1995 		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1996 		    "(%d should be at least 0 and less than %d)\n",
1997 		    cgp->cg_irotor, sblock.fs_ipg);
1998 		Append_Error(full_err, full_len, this_err, this_len);
1999 	}
2000 
2001 	if (cgp->cg_btotoff != exp_btotoff) {
2002 		this_len = fsck_asprintf(&this_err,
2003 		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2004 		    cgp->cg_btotoff, exp_btotoff);
2005 		Append_Error(full_err, full_len, this_err, this_len);
2006 	}
2007 
2008 	if (cgp->cg_boff != exp_boff) {
2009 		this_len = fsck_asprintf(&this_err,
2010 		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2011 		    cgp->cg_boff, exp_boff);
2012 		Append_Error(full_err, full_len, this_err, this_len);
2013 	}
2014 
2015 	if (cgp->cg_iusedoff != exp_iusedoff) {
2016 		this_len = fsck_asprintf(&this_err,
2017 		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2018 		    cgp->cg_iusedoff, exp_iusedoff);
2019 		Append_Error(full_err, full_len, this_err, this_len);
2020 	}
2021 
2022 	if (cgp->cg_freeoff != exp_freeoff) {
2023 		this_len = fsck_asprintf(&this_err,
2024 		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2025 		    cgp->cg_freeoff, exp_freeoff);
2026 		Append_Error(full_err, full_len, this_err, this_len);
2027 	}
2028 
2029 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2030 		this_len = fsck_asprintf(&this_err,
2031 		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2032 		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2033 		Append_Error(full_err, full_len, this_err, this_len);
2034 	}
2035 
2036 	return (full_err);
2037 }
2038 
2039 #undef	Append_Error
2040 
2041 /*
2042  * This is taken from mkfs, and is what is used to come up with the
2043  * original values for a struct cg.  This implies that, since these
2044  * are all constants, recalculating them now should give us the same
2045  * thing as what's on disk.
2046  */
2047 static void
2048 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2049 	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2050 	daddr32_t *ndblk)
2051 {
2052 	daddr32_t cbase, dmax;
2053 	struct cg *cgp;
2054 
2055 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2056 	    (size_t)sblock.fs_cgsize);
2057 	cgp = cgblk.b_un.b_cg;
2058 
2059 	cbase = cgbase(&sblock, cgno);
2060 	dmax = cbase + sblock.fs_fpg;
2061 	if (dmax > sblock.fs_size)
2062 		dmax = sblock.fs_size;
2063 
2064 	/* LINTED pointer difference won't overflow */
2065 	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2066 	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2067 	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2068 	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2069 	*nextfreeoff = *freeoff +
2070 	    howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2071 	*ndblk = dmax - cbase;
2072 }
2073 
2074 /*
2075  * Corrects all fields in the cg that can be done with the available
2076  * redundant data.
2077  */
2078 void
2079 fix_cg(struct cg *cgp, int cgno)
2080 {
2081 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2082 	daddr32_t exp_freeoff, exp_nextfreeoff;
2083 	daddr32_t ndblk;
2084 
2085 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2086 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2087 
2088 	if (cgp->cg_cgx != cgno) {
2089 		cgp->cg_cgx = cgno;
2090 	}
2091 
2092 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2093 		if (cgno == (sblock.fs_ncg - 1)) {
2094 			cgp->cg_ncyl = sblock.fs_ncyl -
2095 			    (sblock.fs_cpg * cgno);
2096 		} else {
2097 			cgp->cg_ncyl = sblock.fs_cpg;
2098 		}
2099 	}
2100 
2101 	if (cgp->cg_niblk != sblock.fs_ipg) {
2102 		/*
2103 		 * This is not used by the kernel, so it's pretty
2104 		 * harmless if it's wrong.
2105 		 */
2106 		cgp->cg_niblk = sblock.fs_ipg;
2107 	}
2108 
2109 	if (cgp->cg_ndblk != ndblk) {
2110 		cgp->cg_ndblk = ndblk;
2111 	}
2112 
2113 	/*
2114 	 * For the rotors, any position's valid, so pick the one we know
2115 	 * will always exist.
2116 	 */
2117 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2118 		cgp->cg_rotor = 0;
2119 	}
2120 
2121 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2122 		cgp->cg_frotor = 0;
2123 	}
2124 
2125 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2126 		cgp->cg_irotor = 0;
2127 	}
2128 
2129 	/*
2130 	 * For btotoff and boff, if they're misaligned they won't
2131 	 * match the expected values, so we're catching both cases
2132 	 * here.  Of course, if any of these are off, it seems likely
2133 	 * that the tables really won't be where we calculate they
2134 	 * should be anyway.
2135 	 */
2136 	if (cgp->cg_btotoff != exp_btotoff) {
2137 		cgp->cg_btotoff = exp_btotoff;
2138 	}
2139 
2140 	if (cgp->cg_boff != exp_boff) {
2141 		cgp->cg_boff = exp_boff;
2142 	}
2143 
2144 	if (cgp->cg_iusedoff != exp_iusedoff) {
2145 		cgp->cg_iusedoff = exp_iusedoff;
2146 	}
2147 
2148 	if (cgp->cg_freeoff != exp_freeoff) {
2149 		cgp->cg_freeoff = exp_freeoff;
2150 	}
2151 
2152 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2153 		cgp->cg_nextfreeoff = exp_nextfreeoff;
2154 	}
2155 
2156 	/*
2157 	 * Reset the magic, as we've recreated this cg, also
2158 	 * update the cg_time, as we're writing out the cg
2159 	 */
2160 	cgp->cg_magic = CG_MAGIC;
2161 	cgp->cg_time = time(NULL);
2162 
2163 	/*
2164 	 * We know there was at least one correctable problem,
2165 	 * or else we wouldn't have been called.  So instead of
2166 	 * marking the buffer dirty N times above, just do it
2167 	 * once here.
2168 	 */
2169 	cgdirty();
2170 }
2171 
2172 void
2173 examinelog(void (*cb)(daddr32_t))
2174 {
2175 	struct bufarea *bp;
2176 	extent_block_t *ebp;
2177 	extent_t *ep;
2178 	daddr32_t nfno, fno;
2179 	int i;
2180 	int j;
2181 
2182 	/*
2183 	 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2184 	 * we need to translate accordingly using logbtodb()
2185 	 */
2186 
2187 	if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2188 		if (debug) {
2189 			(void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2190 			    "Aborting log examination\n", \
2191 			    logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2192 		}
2193 		return;
2194 	}
2195 
2196 	/*
2197 	 * Read errors will return zeros, which will cause us
2198 	 * to do nothing harmful, so don't need to handle it.
2199 	 */
2200 	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2201 	    (size_t)sblock.fs_bsize);
2202 	ebp = (void *)bp->b_un.b_buf;
2203 
2204 	/*
2205 	 * Does it look like a log allocation table?
2206 	 */
2207 	/* LINTED pointer cast is aligned */
2208 	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2209 	    sblock.fs_bsize))
2210 		return;
2211 	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2212 		return;
2213 
2214 	ep = &ebp->extents[0];
2215 	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2216 		fno = logbtofrag(&sblock, ep->pbno);
2217 		nfno = dbtofsb(&sblock, ep->nbno);
2218 		for (j = 0; j < nfno; ++j, ++fno) {
2219 			/*
2220 			 * Invoke the callback first, so that pass1 can
2221 			 * mark the log blocks in-use.  Then, if any
2222 			 * subsequent pass over the log shows us that a
2223 			 * block got freed (say, it was also claimed by
2224 			 * an inode that we cleared), we can safely declare
2225 			 * the log bad.
2226 			 */
2227 			if (cb != NULL)
2228 				(*cb)(fno);
2229 			if (!testbmap(fno))
2230 				islogok = 0;
2231 		}
2232 	}
2233 	brelse(bp);
2234 
2235 	if (cb != NULL) {
2236 		fno = logbtofrag(&sblock, sblock.fs_logbno);
2237 		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2238 			(*cb)(fno);
2239 	}
2240 }
2241 
2242 static void
2243 freelogblk(daddr32_t frag)
2244 {
2245 	freeblk(sblock.fs_logbno, frag, 1);
2246 }
2247 
2248 caddr_t
2249 file_id(fsck_ino_t inum, mode_t mode)
2250 {
2251 	static char name[MAXPATHLEN + 1];
2252 
2253 	if (lfdir == inum) {
2254 		return (lfname);
2255 	}
2256 
2257 	if ((mode & IFMT) == IFDIR) {
2258 		(void) strcpy(name, "DIR");
2259 	} else if ((mode & IFMT) == IFATTRDIR) {
2260 		(void) strcpy(name, "ATTR DIR");
2261 	} else if ((mode & IFMT) == IFSHAD) {
2262 		(void) strcpy(name, "ACL");
2263 	} else {
2264 		(void) strcpy(name, "FILE");
2265 	}
2266 
2267 	return (name);
2268 }
2269 
2270 /*
2271  * Simple initializer for inodesc structures, so users of only a few
2272  * fields don't have to worry about getting the right defaults for
2273  * everything out.
2274  */
2275 void
2276 init_inodesc(struct inodesc *idesc)
2277 {
2278 	/*
2279 	 * Most fields should be zero, just hit the special cases.
2280 	 */
2281 	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2282 	idesc->id_fix = DONTKNOW;
2283 	idesc->id_lbn = -1;
2284 	idesc->id_truncto = -1;
2285 	idesc->id_firsthole = -1;
2286 }
2287 
2288 /*
2289  * Compare routine for tsearch(C) to use on ino_t instances.
2290  */
2291 int
2292 ino_t_cmp(const void *left, const void *right)
2293 {
2294 	const fsck_ino_t lino = (const fsck_ino_t)left;
2295 	const fsck_ino_t rino = (const fsck_ino_t)right;
2296 
2297 	return (lino - rino);
2298 }
2299 
2300 int
2301 cgisdirty(void)
2302 {
2303 	return (cgblk.b_dirty);
2304 }
2305 
2306 void
2307 cgflush(void)
2308 {
2309 	flush(fswritefd, &cgblk);
2310 }
2311 
2312 void
2313 dirty(struct bufarea *bp)
2314 {
2315 	if (fswritefd < 0) {
2316 		/*
2317 		 * No one should call dirty() in read only mode.
2318 		 * But if one does, it's not fatal issue. Just warn him.
2319 		 */
2320 		pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2321 	} else {
2322 		(bp)->b_dirty = 1;
2323 		isdirty = 1;
2324 	}
2325 }
2326 
2327 void
2328 initbarea(struct bufarea *bp)
2329 {
2330 	(bp)->b_dirty = 0;
2331 	(bp)->b_bno = (diskaddr_t)-1LL;
2332 	(bp)->b_flags = 0;
2333 	(bp)->b_cnt = 0;
2334 	(bp)->b_errs = 0;
2335 }
2336 
2337 /*
2338  * Partition-sizing routines adapted from ../newfs/newfs.c.
2339  * Needed because calcsb() needs to use mkfs to work out what the
2340  * superblock should be, and mkfs insists on being told how many
2341  * sectors to use.
2342  *
2343  * Error handling assumes we're never called while preening.
2344  *
2345  * XXX This should be extracted into a ../ufslib.{c,h},
2346  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2347  *     done, both fsck and newfs should be modified to link
2348  *     against it.
2349  */
2350 
2351 static int label_type;
2352 
2353 #define	LABEL_TYPE_VTOC		1
2354 #define	LABEL_TYPE_EFI		2
2355 #define	LABEL_TYPE_OTHER	3
2356 
2357 #define	MB			(1024 * 1024)
2358 #define	SECTORS_PER_TERABYTE	(1LL << 31)
2359 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2360 
2361 diskaddr_t
2362 getdisksize(caddr_t disk, int fd)
2363 {
2364 	int rpm;
2365 	struct dk_geom g;
2366 	struct dk_cinfo ci;
2367 	diskaddr_t actual_size;
2368 
2369 	/*
2370 	 * get_device_size() determines the actual size of the
2371 	 * device, and also the disk's attributes, such as geometry.
2372 	 */
2373 	actual_size = get_device_size(fd, disk);
2374 
2375 	if (label_type == LABEL_TYPE_VTOC) {
2376 		if (ioctl(fd, DKIOCGGEOM, &g)) {
2377 			pwarn("%s: Unable to read Disk geometry", disk);
2378 			return (0);
2379 		}
2380 		if (sblock.fs_nsect == 0)
2381 			sblock.fs_nsect = g.dkg_nsect;
2382 		if (sblock.fs_ntrak == 0)
2383 			sblock.fs_ntrak = g.dkg_nhead;
2384 		if (sblock.fs_rps == 0) {
2385 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2386 			sblock.fs_rps = rpm / 60;
2387 		}
2388 	}
2389 
2390 	if (sblock.fs_bsize == 0)
2391 		sblock.fs_bsize = MAXBSIZE;
2392 
2393 	/*
2394 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2395 	 * information is not available, default to the min of a MB and
2396 	 * maxphys.
2397 	 */
2398 	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2399 		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2400 		if (sblock.fs_maxcontig < 0) {
2401 			int gotit, maxphys;
2402 
2403 			gotit = fsgetmaxphys(&maxphys, NULL);
2404 
2405 			/*
2406 			 * If we cannot get the maxphys value, default
2407 			 * to ufs_maxmaxphys (MB).
2408 			 */
2409 			if (gotit) {
2410 				sblock.fs_maxcontig = MIN(maxphys, MB);
2411 			} else {
2412 				sblock.fs_maxcontig = MB;
2413 			}
2414 		}
2415 		sblock.fs_maxcontig /= sblock.fs_bsize;
2416 	}
2417 
2418 	return (actual_size);
2419 }
2420 
2421 /*
2422  * Figure out how big the partition we're dealing with is.
2423  */
2424 static diskaddr_t
2425 get_device_size(int fd, caddr_t name)
2426 {
2427 	struct extvtoc vtoc;
2428 	struct dk_gpt *efi_vtoc;
2429 	diskaddr_t slicesize = 0;
2430 
2431 	int index = read_extvtoc(fd, &vtoc);
2432 
2433 	if (index >= 0) {
2434 		label_type = LABEL_TYPE_VTOC;
2435 	} else {
2436 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2437 			/* it might be an EFI label */
2438 			index = efi_alloc_and_read(fd, &efi_vtoc);
2439 			if (index >= 0)
2440 				label_type = LABEL_TYPE_EFI;
2441 		}
2442 	}
2443 
2444 	if (index < 0) {
2445 		/*
2446 		 * Since both attempts to read the label failed, we're
2447 		 * going to fall back to a brute force approach to
2448 		 * determining the device's size:  see how far out we can
2449 		 * perform reads on the device.
2450 		 */
2451 
2452 		slicesize = brute_force_get_device_size(fd);
2453 		if (slicesize == 0) {
2454 			switch (index) {
2455 			case VT_ERROR:
2456 				pwarn("%s: %s\n", name, strerror(errno));
2457 				break;
2458 			case VT_EIO:
2459 				pwarn("%s: I/O error accessing VTOC", name);
2460 				break;
2461 			case VT_EINVAL:
2462 				pwarn("%s: Invalid field in VTOC", name);
2463 				break;
2464 			default:
2465 				pwarn("%s: unknown error %d accessing VTOC",
2466 				    name, index);
2467 				break;
2468 			}
2469 			return (0);
2470 		} else {
2471 			label_type = LABEL_TYPE_OTHER;
2472 		}
2473 	}
2474 
2475 	if (label_type == LABEL_TYPE_EFI) {
2476 		slicesize = efi_vtoc->efi_parts[index].p_size;
2477 		efi_free(efi_vtoc);
2478 	} else if (label_type == LABEL_TYPE_VTOC) {
2479 		slicesize = vtoc.v_part[index].p_size;
2480 	}
2481 
2482 	return (slicesize);
2483 }
2484 
2485 /*
2486  * brute_force_get_device_size
2487  *
2488  * Determine the size of the device by seeing how far we can
2489  * read.  Doing an llseek( , , SEEK_END) would probably work
2490  * in most cases, but we've seen at least one third-party driver
2491  * which doesn't correctly support the SEEK_END option when the
2492  * the device is greater than a terabyte.
2493  */
2494 
2495 static diskaddr_t
2496 brute_force_get_device_size(int fd)
2497 {
2498 	diskaddr_t	min_fail = 0;
2499 	diskaddr_t	max_succeed = 0;
2500 	diskaddr_t	cur_db_off;
2501 	char 		buf[DEV_BSIZE];
2502 
2503 	/*
2504 	 * First, see if we can read the device at all, just to
2505 	 * eliminate errors that have nothing to do with the
2506 	 * device's size.
2507 	 */
2508 
2509 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2510 	    ((read(fd, buf, DEV_BSIZE)) == -1))
2511 		return (0);  /* can't determine size */
2512 
2513 	/*
2514 	 * Now, go sequentially through the multiples of 4TB
2515 	 * to find the first read that fails (this isn't strictly
2516 	 * the most efficient way to find the actual size if the
2517 	 * size really could be anything between 0 and 2**64 bytes.
2518 	 * We expect the sizes to be less than 16 TB for some time,
2519 	 * so why do a bunch of reads that are larger than that?
2520 	 * However, this algorithm *will* work for sizes of greater
2521 	 * than 16 TB.  We're just not optimizing for those sizes.)
2522 	 */
2523 
2524 	/*
2525 	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2526 	 * We're using > 32-bit constants here.  Therefore, its flow
2527 	 * analysis is wrong.  For the time being, ignore complaints
2528 	 * from it about the body of the for() being unreached.
2529 	 */
2530 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2531 	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2532 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2533 		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2534 		    SEEK_SET) == -1) ||
2535 		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2536 			min_fail = cur_db_off;
2537 		else
2538 			max_succeed = cur_db_off;
2539 	}
2540 
2541 	/*
2542 	 * XXX Same lint flow analysis problem as above.
2543 	 */
2544 	if (min_fail == 0)
2545 		return (0);
2546 
2547 	/*
2548 	 * We now know that the size of the device is less than
2549 	 * min_fail and greater than or equal to max_succeed.  Now
2550 	 * keep splitting the difference until the actual size in
2551 	 * sectors in known.  We also know that the difference
2552 	 * between max_succeed and min_fail at this time is
2553 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2554 	 * simplifies the math below.
2555 	 */
2556 
2557 	while (min_fail - max_succeed > 1) {
2558 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2559 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2560 		    SEEK_SET)) == -1) ||
2561 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2562 			min_fail = cur_db_off;
2563 		else
2564 			max_succeed = cur_db_off;
2565 	}
2566 
2567 	/* the size is the last successfully read sector offset plus one */
2568 	return (max_succeed + 1);
2569 }
2570 
2571 static void
2572 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2573 {
2574 	struct dinode *dp;
2575 	char pathbuf[MAXPATHLEN + 1];
2576 
2577 	vpwarn(fmt, ap);
2578 	(void) putchar(' ');
2579 	pinode(ino);
2580 	(void) printf("\n");
2581 	getpathname(pathbuf, cwd, ino);
2582 	if (ino < UFSROOTINO || ino > maxino) {
2583 		pfatal("NAME=%s\n", pathbuf);
2584 		return;
2585 	}
2586 	dp = ginode(ino);
2587 	if (ftypeok(dp))
2588 		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2589 	else
2590 		pfatal("NAME=%s\n", pathbuf);
2591 }
2592 
2593 void
2594 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2595 {
2596 	va_list ap;
2597 
2598 	va_start(ap, fmt);
2599 	vfileerror(ino, ino, fmt, ap);
2600 	va_end(ap);
2601 }
2602 
2603 static void
2604 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2605 {
2606 	vfileerror(ino, ino, fmt, ap);
2607 }
2608 
2609 void
2610 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2611 {
2612 	va_list ap;
2613 
2614 	va_start(ap, fmt);
2615 	vfileerror(cwd, ino, fmt, ap);
2616 	va_end(ap);
2617 }
2618 
2619 /*
2620  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2621  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2622  * entry.
2623  *
2624  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2625  * meaning it's effectively an orphan.  It needs to be noted now, so
2626  * it will be remembered in pass 4.
2627  */
2628 
2629 void
2630 add_orphan_dir(fsck_ino_t ino)
2631 {
2632 	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2633 		errexit("add_orphan_dir: out of memory");
2634 }
2635 
2636 /*
2637  * Remove an inode from the orphaned-directories list, presumably
2638  * because it's been cleared.
2639  */
2640 void
2641 remove_orphan_dir(fsck_ino_t ino)
2642 {
2643 	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2644 }
2645 
2646 /*
2647  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2648  * and lufs.c:checksum().
2649  */
2650 static void
2651 log_setsum(int32_t *sp, int32_t *lp, int nb)
2652 {
2653 	int32_t csum = 0;
2654 
2655 	*sp = 0;
2656 	nb /= sizeof (int32_t);
2657 	while (nb--)
2658 		csum += *lp++;
2659 	*sp = csum;
2660 }
2661 
2662 static int
2663 log_checksum(int32_t *sp, int32_t *lp, int nb)
2664 {
2665 	int32_t ssum = *sp;
2666 
2667 	log_setsum(sp, lp, nb);
2668 	if (ssum != *sp) {
2669 		*sp = ssum;
2670 		return (0);
2671 	}
2672 	return (1);
2673 }
2674