xref: /titanic_44/usr/src/cmd/fs.d/ufs/fsck/utilities.c (revision 5e01956f3000408c2a2c5a08c8d0acf2c2a9d8ee)
1 /*
2  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
6 /*	  All Rights Reserved  	*/
7 
8 /*
9  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms are permitted
13  * provided that: (1) source distributions retain this entire copyright
14  * notice and comment, and (2) distributions including binaries display
15  * the following acknowledgement:  ``This product includes software
16  * developed by the University of California, Berkeley and its contributors''
17  * in the documentation or other materials provided with the distribution
18  * and in all advertising materials mentioning features or use of this
19  * software. Neither the name of the University nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <stdarg.h>
31 #include <libadm.h>
32 #include <note.h>
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/mntent.h>
36 #include <sys/filio.h>
37 #include <sys/fs/ufs_fs.h>
38 #include <sys/vnode.h>
39 #include <sys/fs/ufs_acl.h>
40 #include <sys/fs/ufs_inode.h>
41 #include <sys/fs/ufs_log.h>
42 #define	_KERNEL
43 #include <sys/fs/ufs_fsdir.h>
44 #undef _KERNEL
45 #include <sys/mnttab.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <fcntl.h>
49 #include <signal.h>
50 #include <string.h>
51 #include <ctype.h>
52 #include <sys/vfstab.h>
53 #include <sys/lockfs.h>
54 #include <errno.h>
55 #include <sys/cmn_err.h>
56 #include <sys/dkio.h>
57 #include <sys/vtoc.h>
58 #include <sys/efi_partition.h>
59 #include <fslib.h>
60 #include <inttypes.h>
61 #include "fsck.h"
62 
63 caddr_t mount_point = NULL;
64 
65 static int64_t diskreads, totalreads;	/* Disk cache statistics */
66 
67 static int log_checksum(int32_t *, int32_t *, int);
68 static void vdirerror(fsck_ino_t, caddr_t, va_list);
69 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
70 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
71 static void vpwarn(caddr_t, va_list);
72 static int getaline(FILE *, caddr_t, int);
73 static struct bufarea *alloc_bufarea(void);
74 static void rwerror(caddr_t, diskaddr_t, int rval);
75 static void debugclean(void);
76 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
77 static void freelogblk(daddr32_t);
78 static void verrexit(caddr_t, va_list);
79 static void vpfatal(caddr_t, va_list);
80 static diskaddr_t get_device_size(int, caddr_t);
81 static diskaddr_t brute_force_get_device_size(int);
82 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
83 	    daddr32_t *, daddr32_t *, daddr32_t *);
84 
85 int
86 ftypeok(struct dinode *dp)
87 {
88 	switch (dp->di_mode & IFMT) {
89 
90 	case IFDIR:
91 	case IFREG:
92 	case IFBLK:
93 	case IFCHR:
94 	case IFLNK:
95 	case IFSOCK:
96 	case IFIFO:
97 	case IFSHAD:
98 	case IFATTRDIR:
99 		return (1);
100 
101 	default:
102 		if (debug)
103 			(void) printf("bad file type 0%o\n", dp->di_mode);
104 		return (0);
105 	}
106 }
107 
108 int
109 acltypeok(struct dinode *dp)
110 {
111 	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
112 		return (1);
113 
114 	if (debug)
115 		(void) printf("bad file type for acl I=%d: 0%o\n",
116 		    dp->di_shadow, dp->di_mode);
117 	return (0);
118 }
119 
120 NOTE(PRINTFLIKE(1))
121 int
122 reply(caddr_t fmt, ...)
123 {
124 	va_list ap;
125 	char line[80];
126 
127 	if (preen)
128 		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
129 
130 	if (mflag) {
131 		/*
132 		 * We don't know what's going on, so don't potentially
133 		 * make things worse by having errexit() write stuff
134 		 * out to disk.
135 		 */
136 		(void) printf(
137 		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
138 		    devname);
139 		exit(EXERRFATAL);
140 	}
141 
142 	va_start(ap, fmt);
143 	(void) putchar('\n');
144 	(void) vprintf(fmt, ap);
145 	(void) putchar('?');
146 	(void) putchar(' ');
147 	va_end(ap);
148 
149 	if (nflag || fswritefd < 0) {
150 		(void) printf(" no\n\n");
151 		return (0);
152 	}
153 	if (yflag) {
154 		(void) printf(" yes\n\n");
155 		return (1);
156 	}
157 	(void) fflush(stdout);
158 	if (getaline(stdin, line, sizeof (line)) == EOF)
159 		errexit("\n");
160 	(void) printf("\n");
161 	if (line[0] == 'y' || line[0] == 'Y') {
162 		return (1);
163 	} else {
164 		return (0);
165 	}
166 }
167 
168 int
169 getaline(FILE *fp, caddr_t loc, int maxlen)
170 {
171 	int n;
172 	caddr_t p, lastloc;
173 
174 	p = loc;
175 	lastloc = &p[maxlen-1];
176 	while ((n = getc(fp)) != '\n') {
177 		if (n == EOF)
178 			return (EOF);
179 		if (!isspace(n) && p < lastloc)
180 			*p++ = (char)n;
181 	}
182 	*p = '\0';
183 	/* LINTED pointer difference won't overflow */
184 	return (p - loc);
185 }
186 
187 /*
188  * Malloc buffers and set up cache.
189  */
190 void
191 bufinit(void)
192 {
193 	struct bufarea *bp;
194 	int bufcnt, i;
195 	caddr_t bufp;
196 
197 	bufp = malloc((size_t)sblock.fs_bsize);
198 	if (bufp == NULL)
199 		goto nomem;
200 	initbarea(&cgblk);
201 	cgblk.b_un.b_buf = bufp;
202 	bufhead.b_next = bufhead.b_prev = &bufhead;
203 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
204 	if (bufcnt < MINBUFS)
205 		bufcnt = MINBUFS;
206 	for (i = 0; i < bufcnt; i++) {
207 		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
208 		if (bp == NULL) {
209 			if (i >= MINBUFS)
210 				goto noalloc;
211 			goto nomem;
212 		}
213 
214 		bufp = malloc((size_t)sblock.fs_bsize);
215 		if (bufp == NULL) {
216 			free((void *)bp);
217 			if (i >= MINBUFS)
218 				goto noalloc;
219 			goto nomem;
220 		}
221 		initbarea(bp);
222 		bp->b_un.b_buf = bufp;
223 		bp->b_prev = &bufhead;
224 		bp->b_next = bufhead.b_next;
225 		bufhead.b_next->b_prev = bp;
226 		bufhead.b_next = bp;
227 	}
228 noalloc:
229 	bufhead.b_size = i;	/* save number of buffers */
230 	pbp = pdirbp = NULL;
231 	return;
232 
233 nomem:
234 	errexit("cannot allocate buffer pool\n");
235 	/* NOTREACHED */
236 }
237 
238 /*
239  * Undo a bufinit().
240  */
241 void
242 unbufinit(void)
243 {
244 	int cnt;
245 	struct bufarea *bp, *nbp;
246 
247 	cnt = 0;
248 	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
249 		cnt++;
250 		flush(fswritefd, bp);
251 		nbp = bp->b_prev;
252 		/*
253 		 * We're discarding the entire chain, so this isn't
254 		 * technically necessary.  However, it doesn't hurt
255 		 * and lint's data flow analysis is much happier
256 		 * (this prevents it from thinking there's a chance
257 		 * of our using memory elsewhere after it's been released).
258 		 */
259 		nbp->b_next = bp->b_next;
260 		bp->b_next->b_prev = nbp;
261 		free((void *)bp->b_un.b_buf);
262 		free((void *)bp);
263 	}
264 
265 	if (bufhead.b_size != cnt)
266 		errexit("Panic: cache lost %d buffers\n",
267 		    bufhead.b_size - cnt);
268 }
269 
270 /*
271  * Manage a cache of directory blocks.
272  */
273 struct bufarea *
274 getdatablk(daddr32_t blkno, size_t size)
275 {
276 	struct bufarea *bp;
277 
278 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
279 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
280 			goto foundit;
281 		}
282 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
283 		if ((bp->b_flags & B_INUSE) == 0)
284 			break;
285 	if (bp == &bufhead) {
286 		bp = alloc_bufarea();
287 		if (bp == NULL) {
288 			errexit("deadlocked buffer pool\n");
289 			/* NOTREACHED */
290 		}
291 	}
292 	/*
293 	 * We're at the same logical level as getblk(), so if there
294 	 * are any errors, we'll let our caller handle them.
295 	 */
296 	diskreads++;
297 	(void) getblk(bp, blkno, size);
298 
299 foundit:
300 	totalreads++;
301 	bp->b_cnt++;
302 	/*
303 	 * Move the buffer to head of linked list if it isn't
304 	 * already there.
305 	 */
306 	if (bufhead.b_next != bp) {
307 		bp->b_prev->b_next = bp->b_next;
308 		bp->b_next->b_prev = bp->b_prev;
309 		bp->b_prev = &bufhead;
310 		bp->b_next = bufhead.b_next;
311 		bufhead.b_next->b_prev = bp;
312 		bufhead.b_next = bp;
313 	}
314 	bp->b_flags |= B_INUSE;
315 	return (bp);
316 }
317 
318 void
319 brelse(struct bufarea *bp)
320 {
321 	bp->b_cnt--;
322 	if (bp->b_cnt == 0) {
323 		bp->b_flags &= ~B_INUSE;
324 	}
325 }
326 
327 struct bufarea *
328 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
329 {
330 	diskaddr_t dblk;
331 
332 	dblk = fsbtodb(&sblock, blk);
333 	if (bp->b_bno == dblk)
334 		return (bp);
335 	flush(fswritefd, bp);
336 	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
337 	bp->b_bno = dblk;
338 	bp->b_size = size;
339 	return (bp);
340 }
341 
342 void
343 flush(int fd, struct bufarea *bp)
344 {
345 	int i, j;
346 	caddr_t sip;
347 	long size;
348 
349 	if (!bp->b_dirty)
350 		return;
351 
352 	/*
353 	 * It's not our buf, so if there are errors, let whoever
354 	 * acquired it deal with the actual problem.
355 	 */
356 	if (bp->b_errs != 0)
357 		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
358 	bp->b_dirty = 0;
359 	bp->b_errs = 0;
360 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
361 	if (bp != &sblk) {
362 		return;
363 	}
364 
365 	/*
366 	 * We're flushing the superblock, so make sure all the
367 	 * ancillary bits go out as well.
368 	 */
369 	sip = (caddr_t)sblock.fs_u.fs_csp;
370 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
371 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
372 		    sblock.fs_cssize - i : sblock.fs_bsize;
373 		bwrite(fswritefd, sip,
374 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
375 		    size);
376 		sip += size;
377 	}
378 }
379 
380 static void
381 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
382 {
383 	int olderr = errno;
384 
385 	if (!preen)
386 		(void) printf("\n");
387 
388 	if (rval == -1)
389 		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
390 		    mesg, blk, strerror(olderr));
391 	else
392 		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
393 
394 	if (reply("CONTINUE") == 0) {
395 		exitstat = EXERRFATAL;
396 		errexit("Program terminated\n");
397 	}
398 }
399 
400 void
401 ckfini(void)
402 {
403 	int64_t percentage;
404 
405 	if (fswritefd < 0)
406 		return;
407 
408 	flush(fswritefd, &sblk);
409 	/*
410 	 * Were we using a backup superblock?
411 	 */
412 	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
413 		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
414 			sblk.b_bno = SBOFF / dev_bsize;
415 			sbdirty();
416 			flush(fswritefd, &sblk);
417 		}
418 	}
419 	flush(fswritefd, &cgblk);
420 	if (cgblk.b_un.b_buf != NULL) {
421 		free((void *)cgblk.b_un.b_buf);
422 		cgblk.b_un.b_buf = NULL;
423 	}
424 	unbufinit();
425 	pbp = NULL;
426 	pdirbp = NULL;
427 	if (debug) {
428 		/*
429 		 * Note that we only count cache-related reads.
430 		 * Anything that called fsck_bread() or getblk()
431 		 * directly are explicitly not cached, so they're not
432 		 * included here.
433 		 */
434 		if (totalreads != 0)
435 			percentage = diskreads * 100 / totalreads;
436 		else
437 			percentage = 0;
438 
439 		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
440 		    (longlong_t)diskreads, (longlong_t)totalreads,
441 		    (longlong_t)percentage);
442 	}
443 
444 	(void) close(fsreadfd);
445 	(void) close(fswritefd);
446 	fsreadfd = -1;
447 	fswritefd = -1;
448 }
449 
450 int
451 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
452 {
453 	caddr_t cp;
454 	int i;
455 	int errs;
456 	offset_t offset = ldbtob(blk);
457 	offset_t addr;
458 
459 	/*
460 	 * In our universe, nothing exists before the superblock, so
461 	 * just pretend it's always zeros.  This is the complement of
462 	 * bwrite()'s ignoring write requests into that space.
463 	 */
464 	if (blk < SBLOCK) {
465 		if (debug)
466 			(void) printf(
467 			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
468 			    SBLOCK, (longlong_t)blk);
469 		(void) memset(buf, 0, (size_t)size);
470 		return (1);
471 	}
472 
473 	if (llseek(fd, offset, SEEK_SET) < 0) {
474 		rwerror("SEEK", blk, -1);
475 	}
476 
477 	if ((i = read(fd, buf, size)) == size) {
478 		return (0);
479 	}
480 	rwerror("READ", blk, i);
481 	if (llseek(fd, offset, SEEK_SET) < 0) {
482 		rwerror("SEEK", blk, -1);
483 	}
484 	errs = 0;
485 	(void) memset(buf, 0, (size_t)size);
486 	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
487 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
488 		addr = ldbtob(blk + i);
489 		if (llseek(fd, addr, SEEK_SET) < 0 ||
490 		    read(fd, cp, (int)secsize) < 0) {
491 			iscorrupt = 1;
492 			(void) printf(" %llu", blk + (u_longlong_t)i);
493 			errs++;
494 		}
495 	}
496 	(void) printf("\n");
497 	return (errs);
498 }
499 
500 void
501 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
502 {
503 	int i;
504 	int n;
505 	caddr_t cp;
506 	offset_t offset = ldbtob(blk);
507 	offset_t addr;
508 
509 	if (fd < 0)
510 		return;
511 	if (blk < SBLOCK) {
512 		if (debug)
513 			(void) printf(
514 		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
515 			    (longlong_t)blk, devname);
516 		return;
517 	}
518 	if (llseek(fd, offset, SEEK_SET) < 0) {
519 		rwerror("SEEK", blk, -1);
520 	}
521 	if ((i = write(fd, buf, (int)size)) == size) {
522 		fsmodified = 1;
523 		return;
524 	}
525 	rwerror("WRITE", blk, i);
526 	if (llseek(fd, offset, SEEK_SET) < 0) {
527 		rwerror("SEEK", blk, -1);
528 	}
529 	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
530 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
531 		n = 0;
532 		addr = ldbtob(blk + i);
533 		if (llseek(fd, addr, SEEK_SET) < 0 ||
534 		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
535 			iscorrupt = 1;
536 			(void) printf(" %llu", blk + (u_longlong_t)i);
537 		} else if (n > 0) {
538 			fsmodified = 1;
539 		}
540 
541 	}
542 	(void) printf("\n");
543 }
544 
545 /*
546  * Allocates the specified number of contiguous fragments.
547  */
548 daddr32_t
549 allocblk(int wantedfrags)
550 {
551 	int block, leadfrag, tailfrag;
552 	daddr32_t selected;
553 	size_t size;
554 	struct bufarea *bp;
555 
556 	/*
557 	 * It's arguable whether we should just fail, or instead
558 	 * error out here.  Since we should only ever be asked for
559 	 * a single fragment or an entire block (i.e., sblock.fs_frag),
560 	 * we'll fail out because anything else means somebody
561 	 * changed code without considering all of the ramifications.
562 	 */
563 	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
564 		exitstat = EXERRFATAL;
565 		errexit("allocblk() asked for %d frags.  "
566 		    "Legal range is 1 to %d",
567 		    wantedfrags, sblock.fs_frag);
568 	}
569 
570 	/*
571 	 * For each filesystem block, look at every possible starting
572 	 * offset within the block such that we can get the number of
573 	 * contiguous fragments that we need.  This is a drastically
574 	 * simplified version of the kernel's mapsearch() and alloc*().
575 	 * It's also correspondingly slower.
576 	 */
577 	for (block = 0; block < maxfsblock - sblock.fs_frag;
578 	    block += sblock.fs_frag) {
579 		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
580 		    leadfrag++) {
581 			/*
582 			 * Is first fragment of candidate run available?
583 			 */
584 			if (testbmap(block + leadfrag))
585 				continue;
586 			/*
587 			 * Are the rest of them available?
588 			 */
589 			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
590 				if (testbmap(block + leadfrag + tailfrag))
591 					break;
592 			if (tailfrag < wantedfrags) {
593 				/*
594 				 * No, skip the known-unusable run.
595 				 */
596 				leadfrag += tailfrag;
597 				continue;
598 			}
599 			/*
600 			 * Found what we need, so claim them.
601 			 */
602 			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
603 				setbmap(block + leadfrag + tailfrag);
604 			n_blks += wantedfrags;
605 			size = wantedfrags * sblock.fs_fsize;
606 			selected = block + leadfrag;
607 			bp = getdatablk(selected, size);
608 			(void) memset((void *)bp->b_un.b_buf, 0, size);
609 			dirty(bp);
610 			brelse(bp);
611 			if (debug)
612 				(void) printf(
613 		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
614 				    selected, selected % sblock.fs_bsize,
615 				    wantedfrags, (int)size);
616 			return (selected);
617 		}
618 	}
619 	return (0);
620 }
621 
622 /*
623  * Free a previously allocated block
624  */
625 void
626 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
627 {
628 	struct inodesc idesc;
629 
630 	if (debug)
631 		(void) printf("debug: freeing %d fragments starting at %d\n",
632 		    frags, blkno);
633 
634 	init_inodesc(&idesc);
635 
636 	idesc.id_number = ino;
637 	idesc.id_blkno = blkno;
638 	idesc.id_numfrags = frags;
639 	idesc.id_truncto = -1;
640 
641 	/*
642 	 * Nothing in the return status has any relevance to how
643 	 * we're using pass4check(), so just ignore it.
644 	 */
645 	(void) pass4check(&idesc);
646 }
647 
648 /*
649  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
650  * that the given buffer is at least MAXPATHLEN + 1 characters.
651  */
652 void
653 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
654 {
655 	int len;
656 	caddr_t cp;
657 	struct dinode *dp;
658 	struct inodesc idesc;
659 	struct inoinfo *inp;
660 
661 	if (debug)
662 		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
663 		    curdir, ino);
664 
665 	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
666 		(void) strcpy(namebuf, "?");
667 		return;
668 	}
669 
670 	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
671 		(void) strcpy(namebuf, "/");
672 		return;
673 	}
674 
675 	init_inodesc(&idesc);
676 	idesc.id_type = DATA;
677 	cp = &namebuf[MAXPATHLEN - 1];
678 	*cp = '\0';
679 
680 	/*
681 	 * In the case of extended attributes, our
682 	 * parent won't necessarily be a directory, so just
683 	 * return what we've found with a prefix indicating
684 	 * that it's an XATTR.  Presumably our caller will
685 	 * know what's going on and do something useful, like
686 	 * work out the path of the parent and then combine
687 	 * the two names.
688 	 *
689 	 * Can't use strcpy(), etc, because we've probably
690 	 * already got some name information in the buffer and
691 	 * the usual trailing \0 would lose it.
692 	 */
693 	dp = ginode(curdir);
694 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
695 		idesc.id_number = curdir;
696 		idesc.id_parent = ino;
697 		idesc.id_func = findname;
698 		idesc.id_name = namebuf;
699 		idesc.id_fix = NOFIX;
700 		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
701 			*cp-- = '?';
702 		}
703 
704 		len = sizeof (XATTR_DIR_NAME) - 1;
705 		cp -= len;
706 		(void) memmove(cp, XATTR_DIR_NAME, len);
707 		goto attrname;
708 	}
709 
710 	/*
711 	 * If curdir == ino, need to get a handle on .. so we
712 	 * can search it for ino's name.  Otherwise, just search
713 	 * the given directory for ino.  Repeat until out of space
714 	 * or a full path has been built.
715 	 */
716 	if (curdir != ino) {
717 		idesc.id_parent = curdir;
718 		goto namelookup;
719 	}
720 	while (ino != UFSROOTINO && ino != 0) {
721 		idesc.id_number = ino;
722 		idesc.id_func = findino;
723 		idesc.id_name = "..";
724 		idesc.id_fix = NOFIX;
725 		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
726 			inp = getinoinfo(ino);
727 			if ((inp == NULL) || (inp->i_parent == 0)) {
728 				break;
729 			}
730 			idesc.id_parent = inp->i_parent;
731 		}
732 
733 		/*
734 		 * To get this far, id_parent must have the inode
735 		 * number for `..' in it.  By definition, that's got
736 		 * to be a directory, so search it for the inode of
737 		 * interest.
738 		 */
739 namelookup:
740 		idesc.id_number = idesc.id_parent;
741 		idesc.id_parent = ino;
742 		idesc.id_func = findname;
743 		idesc.id_name = namebuf;
744 		idesc.id_fix = NOFIX;
745 		if ((ckinode(ginode(idesc.id_number),
746 		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
747 			break;
748 		}
749 		/*
750 		 * Prepend to what we've accumulated so far.  If
751 		 * there's not enough room for even one more path element
752 		 * (of the worst-case length), then bail out.
753 		 */
754 		len = strlen(namebuf);
755 		cp -= len;
756 		if (cp < &namebuf[MAXNAMLEN])
757 			break;
758 		(void) memmove(cp, namebuf, len);
759 		*--cp = '/';
760 
761 		/*
762 		 * Corner case for a looped-to-itself directory.
763 		 */
764 		if (ino == idesc.id_number)
765 			break;
766 
767 		/*
768 		 * Climb one level of the hierarchy.  In other words,
769 		 * the current .. becomes the inode to search for and
770 		 * its parent becomes the directory to search in.
771 		 */
772 		ino = idesc.id_number;
773 	}
774 
775 	/*
776 	 * If we hit a discontinuity in the hierarchy, indicate it by
777 	 * prefixing the path so far with `?'.  Otherwise, the first
778 	 * character will be `/' as a side-effect of the *--cp above.
779 	 *
780 	 * The special case is to handle the situation where we're
781 	 * trying to look something up in UFSROOTINO, but didn't find
782 	 * it.
783 	 */
784 	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
785 		if (cp > namebuf)
786 			cp--;
787 		*cp = '?';
788 	}
789 
790 	/*
791 	 * The invariants being used for buffer integrity are:
792 	 * - namebuf[] is terminated with \0 before anything else
793 	 * - cp is always <= the last element of namebuf[]
794 	 * - the new path element is always stored at the
795 	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
796 	 *   characters
797 	 * - cp is is decremented by the number of characters in
798 	 *   the new path element
799 	 * - if, after the above accounting for the new element's
800 	 *   size, there is no longer enough room at the beginning of
801 	 *   namebuf[] for a full-sized path element and a slash,
802 	 *   terminate the loop.  cp is in the range
803 	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
804 	 */
805 attrname:
806 	/* LINTED per the above discussion */
807 	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
808 }
809 
810 /* ARGSUSED */
811 void
812 catch(int dummy)
813 {
814 	ckfini();
815 	exit(EXSIGNAL);
816 }
817 
818 /*
819  * When preening, allow a single quit to signal
820  * a special exit after filesystem checks complete
821  * so that reboot sequence may be interrupted.
822  */
823 /* ARGSUSED */
824 void
825 catchquit(int dummy)
826 {
827 	(void) printf("returning to single-user after filesystem check\n");
828 	interrupted = 1;
829 	(void) signal(SIGQUIT, SIG_DFL);
830 }
831 
832 
833 /*
834  * determine whether an inode should be fixed.
835  */
836 NOTE(PRINTFLIKE(2))
837 int
838 dofix(struct inodesc *idesc, caddr_t msg, ...)
839 {
840 	int rval = 0;
841 	va_list ap;
842 
843 	va_start(ap, msg);
844 
845 	switch (idesc->id_fix) {
846 
847 	case DONTKNOW:
848 		if (idesc->id_type == DATA)
849 			vdirerror(idesc->id_number, msg, ap);
850 		else
851 			vpwarn(msg, ap);
852 		if (preen) {
853 			idesc->id_fix = FIX;
854 			rval = ALTERED;
855 			break;
856 		}
857 		if (reply("SALVAGE") == 0) {
858 			idesc->id_fix = NOFIX;
859 			break;
860 		}
861 		idesc->id_fix = FIX;
862 		rval = ALTERED;
863 		break;
864 
865 	case FIX:
866 		rval = ALTERED;
867 		break;
868 
869 	case NOFIX:
870 		break;
871 
872 	default:
873 		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
874 	}
875 
876 	va_end(ap);
877 	return (rval);
878 }
879 
880 NOTE(PRINTFLIKE(1))
881 void
882 errexit(caddr_t fmt, ...)
883 {
884 	va_list ap;
885 
886 	va_start(ap, fmt);
887 	verrexit(fmt, ap);
888 	/* NOTREACHED */
889 }
890 
891 NOTE(PRINTFLIKE(1))
892 static void
893 verrexit(caddr_t fmt, va_list ap)
894 {
895 	static int recursing = 0;
896 
897 	if (!recursing) {
898 		recursing = 1;
899 		if (errorlocked || iscorrupt) {
900 			if (havesb && fswritefd >= 0) {
901 				sblock.fs_clean = FSBAD;
902 				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
903 				sblock.fs_state = -sblock.fs_state;
904 				sbdirty();
905 				write_altsb(fswritefd);
906 				flush(fswritefd, &sblk);
907 			}
908 		}
909 		ckfini();
910 		recursing = 0;
911 	}
912 	(void) vprintf(fmt, ap);
913 	if (fmt[strlen(fmt) - 1] != '\n')
914 		(void) putchar('\n');
915 	exit((exitstat != 0) ? exitstat : EXERRFATAL);
916 }
917 
918 /*
919  * An unexpected inconsistency occured.
920  * Die if preening, otherwise just print message and continue.
921  */
922 NOTE(PRINTFLIKE(1))
923 void
924 pfatal(caddr_t fmt, ...)
925 {
926 	va_list ap;
927 
928 	va_start(ap, fmt);
929 	vpfatal(fmt, ap);
930 	va_end(ap);
931 }
932 
933 NOTE(PRINTFLIKE(1))
934 static void
935 vpfatal(caddr_t fmt, va_list ap)
936 {
937 	if (preen) {
938 		if (*fmt != '\0') {
939 			(void) printf("%s: ", devname);
940 			(void) vprintf(fmt, ap);
941 			(void) printf("\n");
942 		}
943 		(void) printf(
944 		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
945 		    devname);
946 		if (havesb && fswritefd >= 0) {
947 			sblock.fs_clean = FSBAD;
948 			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
949 			sbdirty();
950 			flush(fswritefd, &sblk);
951 		}
952 		/*
953 		 * We're exiting, it doesn't really matter that our
954 		 * caller doesn't get to call va_end().
955 		 */
956 		if (exitstat == 0)
957 			exitstat = EXFNDERRS;
958 		exit(exitstat);
959 	}
960 	if (*fmt != '\0') {
961 		(void) vprintf(fmt, ap);
962 	}
963 }
964 
965 /*
966  * Pwarn just prints a message when not preening,
967  * or a warning (preceded by filename) when preening.
968  */
969 NOTE(PRINTFLIKE(1))
970 void
971 pwarn(caddr_t fmt, ...)
972 {
973 	va_list ap;
974 
975 	va_start(ap, fmt);
976 	vpwarn(fmt, ap);
977 	va_end(ap);
978 }
979 
980 NOTE(PRINTFLIKE(1))
981 static void
982 vpwarn(caddr_t fmt, va_list ap)
983 {
984 	if (*fmt != '\0') {
985 		if (preen)
986 			(void) printf("%s: ", devname);
987 		(void) vprintf(fmt, ap);
988 	}
989 }
990 
991 /*
992  * Like sprintf(), except the buffer is dynamically allocated
993  * and returned, instead of being passed in.  A pointer to the
994  * buffer is stored in *RET, and FMT is the usual format string.
995  * The number of characters in *RET (excluding the trailing \0,
996  * to be consistent with the other *printf() routines) is returned.
997  *
998  * Solaris doesn't have asprintf(3C) yet, unfortunately.
999  */
1000 NOTE(PRINTFLIKE(2))
1001 int
1002 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1003 {
1004 	int len;
1005 	caddr_t buffer;
1006 	va_list ap;
1007 
1008 	va_start(ap, fmt);
1009 	len = vsnprintf(NULL, 0, fmt, ap);
1010 	va_end(ap);
1011 
1012 	buffer = malloc((len + 1) * sizeof (char));
1013 	if (buffer == NULL) {
1014 		errexit("Out of memory in asprintf\n");
1015 		/* NOTREACHED */
1016 	}
1017 
1018 	va_start(ap, fmt);
1019 	(void) vsnprintf(buffer, len + 1, fmt, ap);
1020 	va_end(ap);
1021 
1022 	*ret = buffer;
1023 	return (len);
1024 }
1025 
1026 /*
1027  * So we can take advantage of kernel routines in ufs_subr.c.
1028  */
1029 /* PRINTFLIKE2 */
1030 void
1031 cmn_err(int level, caddr_t fmt, ...)
1032 {
1033 	va_list ap;
1034 
1035 	va_start(ap, fmt);
1036 	if (level == CE_PANIC) {
1037 		(void) printf("INTERNAL INCONSISTENCY:");
1038 		verrexit(fmt, ap);
1039 	} else {
1040 		(void) vprintf(fmt, ap);
1041 	}
1042 	va_end(ap);
1043 }
1044 
1045 /*
1046  * Check to see if unraw version of name is already mounted.
1047  * Updates devstr with the device name if devstr is not NULL
1048  * and str_size is positive.
1049  */
1050 int
1051 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1052 {
1053 	int found;
1054 	struct mnttab *mntent;
1055 
1056 	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1057 	if (mntent == NULL)
1058 		return (M_NOMNT);
1059 
1060 	/*
1061 	 * It's mounted.  With or without write access?
1062 	 */
1063 	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1064 		found = M_RO;	/* mounted as RO */
1065 	else
1066 		found = M_RW; 	/* mounted as R/W */
1067 
1068 	if (mount_point == NULL) {
1069 		mount_point = strdup(mntent->mnt_mountp);
1070 		if (mount_point == NULL) {
1071 			errexit("fsck: memory allocation failure: %s",
1072 			    strerror(errno));
1073 			/* NOTREACHED */
1074 		}
1075 
1076 		if (devstr != NULL && str_size > 0)
1077 			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1078 	}
1079 
1080 	return (found);
1081 }
1082 
1083 /*
1084  * Check to see if name corresponds to an entry in vfstab, and that the entry
1085  * does not have option ro.
1086  */
1087 int
1088 writable(caddr_t name)
1089 {
1090 	int rw = 1;
1091 	struct vfstab vfsbuf, vfskey;
1092 	FILE *vfstab;
1093 
1094 	vfstab = fopen(VFSTAB, "r");
1095 	if (vfstab == NULL) {
1096 		(void) printf("can't open %s\n", VFSTAB);
1097 		return (1);
1098 	}
1099 	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1100 	vfsnull(&vfskey);
1101 	vfskey.vfs_special = unrawname(name);
1102 	vfskey.vfs_fstype = MNTTYPE_UFS;
1103 	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1104 	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1105 		rw = 0;
1106 	}
1107 	(void) fclose(vfstab);
1108 	return (rw);
1109 }
1110 
1111 /*
1112  * debugclean
1113  */
1114 static void
1115 debugclean(void)
1116 {
1117 	if (!debug)
1118 		return;
1119 
1120 	if ((iscorrupt == 0) && (isdirty == 0))
1121 		return;
1122 
1123 	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1124 	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1125 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1126 		return;
1127 
1128 	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1129 	    sblock.fs_clean == FSSTABLE ? "stable" :
1130 	    sblock.fs_clean == FSLOG ? "logging" :
1131 	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1132 	    devname);
1133 }
1134 
1135 /*
1136  * updateclean
1137  *	Carefully and transparently update the clean flag.
1138  *
1139  * `iscorrupt' has to be in its final state before this is called.
1140  */
1141 int
1142 updateclean(void)
1143 {
1144 	int freedlog = 0;
1145 	struct bufarea cleanbuf;
1146 	size_t size;
1147 	ssize_t io_res;
1148 	diskaddr_t bno;
1149 	char fsclean;
1150 	int fsreclaim;
1151 	char fsflags;
1152 	int flags_ok = 1;
1153 	daddr32_t fslogbno;
1154 	offset_t sblkoff;
1155 	time_t t;
1156 
1157 	/*
1158 	 * debug stuff
1159 	 */
1160 	debugclean();
1161 
1162 	/*
1163 	 * set fsclean to its appropriate value
1164 	 */
1165 	fslogbno = sblock.fs_logbno;
1166 	fsclean = sblock.fs_clean;
1167 	fsreclaim = sblock.fs_reclaim;
1168 	fsflags = sblock.fs_flags;
1169 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1170 		fsclean = FSACTIVE;
1171 	}
1172 	/*
1173 	 * If ufs log is not okay, note that we need to clear it.
1174 	 */
1175 	examinelog(NULL);
1176 	if (fslogbno && !(islog && islogok)) {
1177 		fsclean = FSACTIVE;
1178 		fslogbno = 0;
1179 	}
1180 
1181 	/*
1182 	 * if necessary, update fs_clean and fs_state
1183 	 */
1184 	switch (fsclean) {
1185 
1186 	case FSACTIVE:
1187 		if (!iscorrupt) {
1188 			fsclean = FSSTABLE;
1189 			fsreclaim = 0;
1190 		}
1191 		break;
1192 
1193 	case FSCLEAN:
1194 	case FSSTABLE:
1195 		if (iscorrupt) {
1196 			fsclean = FSACTIVE;
1197 		} else {
1198 			fsreclaim = 0;
1199 		}
1200 		break;
1201 
1202 	case FSLOG:
1203 		if (iscorrupt) {
1204 			fsclean = FSACTIVE;
1205 		} else if (!islog || fslogbno == 0) {
1206 			fsclean = FSSTABLE;
1207 			fsreclaim = 0;
1208 		} else if (fflag) {
1209 			fsreclaim = 0;
1210 		}
1211 		break;
1212 
1213 	case FSFIX:
1214 		fsclean = FSBAD;
1215 		if (errorlocked && !iscorrupt) {
1216 			fsclean = islog ? FSLOG : FSCLEAN;
1217 		}
1218 		break;
1219 
1220 	default:
1221 		if (iscorrupt) {
1222 			fsclean = FSACTIVE;
1223 		} else {
1224 			fsclean = FSSTABLE;
1225 			fsreclaim = 0;
1226 		}
1227 	}
1228 
1229 	if (largefile_count > 0)
1230 		fsflags |= FSLARGEFILES;
1231 	else
1232 		fsflags &= ~FSLARGEFILES;
1233 
1234 	/*
1235 	 * There can be two discrepencies here.  A) The superblock
1236 	 * shows no largefiles but we found some while scanning.
1237 	 * B) The superblock indicates the presence of largefiles,
1238 	 * but none are present.  Note that if preening, the superblock
1239 	 * is silently corrected.
1240 	 */
1241 	if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1242 	    (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1243 		flags_ok = 0;
1244 
1245 	if (debug)
1246 		(void) printf(
1247 		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1248 		    largefile_count, sblock.fs_flags, flags_ok);
1249 
1250 	/*
1251 	 * If fs is unchanged, do nothing.
1252 	 */
1253 	if ((!isdirty) && (flags_ok) &&
1254 	    (fslogbno == sblock.fs_logbno) &&
1255 	    (sblock.fs_clean == fsclean) &&
1256 	    (sblock.fs_reclaim == fsreclaim) &&
1257 	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1258 		if (errorlocked) {
1259 			if (!do_errorlock(LOCKFS_ULOCK))
1260 				pwarn(
1261 		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1262 		}
1263 		return (freedlog);
1264 	}
1265 
1266 	/*
1267 	 * if user allows, update superblock state
1268 	 */
1269 	if (debug) {
1270 		(void) printf(
1271 	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1272 		    sblock.fs_flags, sblock.fs_logbno,
1273 		    sblock.fs_clean, sblock.fs_reclaim,
1274 		    sblock.fs_state + sblock.fs_time);
1275 		(void) printf(
1276 	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1277 		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1278 	}
1279 	if (!isdirty && !preen && !rerun &&
1280 	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1281 		return (freedlog);
1282 
1283 	(void) time(&t);
1284 	sblock.fs_time = (time32_t)t;
1285 	if (debug)
1286 		printclean();
1287 
1288 	if (sblock.fs_logbno != fslogbno) {
1289 		examinelog(&freelogblk);
1290 		freedlog++;
1291 	}
1292 
1293 	sblock.fs_logbno = fslogbno;
1294 	sblock.fs_clean = fsclean;
1295 	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1296 	sblock.fs_reclaim = fsreclaim;
1297 	sblock.fs_flags = fsflags;
1298 
1299 	/*
1300 	 * if superblock can't be written, return
1301 	 */
1302 	if (fswritefd < 0)
1303 		return (freedlog);
1304 
1305 	/*
1306 	 * Read private copy of superblock, update clean flag, and write it.
1307 	 */
1308 	bno  = sblk.b_bno;
1309 	size = sblk.b_size;
1310 
1311 	sblkoff = ldbtob(bno);
1312 
1313 	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1314 		errexit("out of memory");
1315 	if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1316 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1317 		    (longlong_t)bno, strerror(errno));
1318 		goto out;
1319 	}
1320 
1321 	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1322 		report_io_prob("READ FROM", bno, size, io_res);
1323 		goto out;
1324 	}
1325 
1326 	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1327 	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1328 	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1329 	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1330 	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1331 	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1332 
1333 	if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1334 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1335 		    (longlong_t)bno, strerror(errno));
1336 		goto out;
1337 	}
1338 
1339 	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1340 		report_io_prob("WRITE TO", bno, size, io_res);
1341 		goto out;
1342 	}
1343 
1344 	/*
1345 	 * 1208040
1346 	 * If we had to use -b to grab an alternate superblock, then we
1347 	 * likely had to do so because of unacceptable differences between
1348 	 * the main and alternate superblocks.  So, we had better update
1349 	 * the alternate superblock as well, or we'll just fail again
1350 	 * the next time we attempt to run fsck!
1351 	 */
1352 	if (bflag != 0) {
1353 		write_altsb(fswritefd);
1354 	}
1355 
1356 	if (errorlocked) {
1357 		if (!do_errorlock(LOCKFS_ULOCK))
1358 			pwarn(
1359 		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1360 	}
1361 
1362 out:
1363 	if (cleanbuf.b_un.b_buf != NULL) {
1364 		free((void *)cleanbuf.b_un.b_buf);
1365 	}
1366 
1367 	return (freedlog);
1368 }
1369 
1370 static void
1371 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1372 {
1373 	if (failure < 0)
1374 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1375 		    what, (int)bno, strerror(errno));
1376 	else if (failure == 0)
1377 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1378 		    what, (int)bno);
1379 	else
1380 		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1381 		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1382 }
1383 
1384 /*
1385  * print out clean info
1386  */
1387 void
1388 printclean(void)
1389 {
1390 	caddr_t s;
1391 
1392 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1393 		s = "unknown";
1394 	else
1395 		switch (sblock.fs_clean) {
1396 
1397 		case FSACTIVE:
1398 			s = "active";
1399 			break;
1400 
1401 		case FSCLEAN:
1402 			s = "clean";
1403 			break;
1404 
1405 		case FSSTABLE:
1406 			s = "stable";
1407 			break;
1408 
1409 		case FSLOG:
1410 			s = "logging";
1411 			break;
1412 
1413 		case FSBAD:
1414 			s = "is bad";
1415 			break;
1416 
1417 		case FSFIX:
1418 			s = "being fixed";
1419 			break;
1420 
1421 		default:
1422 			s = "unknown";
1423 		}
1424 
1425 	if (preen)
1426 		pwarn("is %s.\n", s);
1427 	else
1428 		(void) printf("** %s is %s.\n", devname, s);
1429 }
1430 
1431 int
1432 is_errorlocked(caddr_t fs)
1433 {
1434 	int		retval;
1435 	struct stat64	statb;
1436 	caddr_t		mountp;
1437 	struct mnttab	*mntent;
1438 
1439 	retval = 0;
1440 
1441 	if (!fs)
1442 		return (0);
1443 
1444 	if (stat64(fs, &statb) < 0)
1445 		return (0);
1446 
1447 	if (S_ISDIR(statb.st_mode)) {
1448 		mountp = fs;
1449 	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1450 		mntent = search_mnttab(NULL, fs, NULL, 0);
1451 		if (mntent == NULL)
1452 			return (0);
1453 		mountp = mntent->mnt_mountp;
1454 		if (mountp == NULL) /* theoretically a can't-happen */
1455 			return (0);
1456 	} else {
1457 		return (0);
1458 	}
1459 
1460 	/*
1461 	 * From here on, must `goto out' to avoid memory leakage.
1462 	 */
1463 
1464 	if (elock_combuf == NULL)
1465 		elock_combuf =
1466 		    (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1467 	else
1468 		elock_combuf =
1469 		    (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1470 
1471 	if (elock_combuf == NULL)
1472 		goto out;
1473 
1474 	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1475 
1476 	if (elock_mountp != NULL) {
1477 		free(elock_mountp);
1478 	}
1479 
1480 	elock_mountp = strdup(mountp);
1481 	if (elock_mountp == NULL)
1482 		goto out;
1483 
1484 	if (mountfd < 0) {
1485 		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1486 			goto out;
1487 	}
1488 
1489 	if (lfp == NULL) {
1490 		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1491 		if (lfp == NULL)
1492 			goto out;
1493 		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1494 	}
1495 
1496 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1497 	lfp->lf_comment = elock_combuf;
1498 
1499 	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1500 		goto out;
1501 
1502 	/*
1503 	 * lint believes that the ioctl() (or any other function
1504 	 * taking lfp as an arg) could free lfp.  This is not the
1505 	 * case, however.
1506 	 */
1507 	retval = LOCKFS_IS_ELOCK(lfp);
1508 
1509 out:
1510 	return (retval);
1511 }
1512 
1513 /*
1514  * Given a name which is known to be a directory, see if it appears
1515  * in the vfstab.  If so, return the entry's block (special) device
1516  * field via devstr.
1517  */
1518 int
1519 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1520 {
1521 	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1522 }
1523 
1524 /*
1525  * Given a name which is known to be a directory, see if it appears
1526  * in the mnttab.  If so, return the entry's block (special) device
1527  * field via devstr.
1528  */
1529 int
1530 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1531 {
1532 	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1533 }
1534 
1535 /*
1536  * Search for mount point and/or special device in the given file.
1537  * The first matching entry is returned.
1538  *
1539  * If an entry is found and str_size is greater than zero, then
1540  * up to size_str bytes of the special device name from the entry
1541  * are copied to devstr.
1542  */
1543 
1544 #define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1545 			st_nuller, st_init, st_searcher) \
1546 	{ \
1547 		FILE *fp; \
1548 		struct st_type *retval = NULL; \
1549 		struct st_type key; \
1550 		static struct st_type buffer; \
1551 		\
1552 		/* LINTED ``assigned value never used'' */ \
1553 		st_nuller(&key); \
1554 		key.st_mount = mountp; \
1555 		key.st_special = special; \
1556 		st_init; \
1557 		\
1558 		if ((fp = fopen(st_file, "r")) == NULL) \
1559 			return (NULL); \
1560 		\
1561 		if (st_searcher(fp, &buffer, &key) == 0) { \
1562 			retval = &buffer; \
1563 			if (devstr != NULL && str_size > 0 && \
1564 			    buffer.st_special != NULL) { \
1565 				(void) strlcpy(devstr, buffer.st_special, \
1566 				    str_size); \
1567 			} \
1568 		} \
1569 		(void) fclose(fp); \
1570 		return (retval); \
1571 	}
1572 
1573 static struct vfstab *
1574 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1575 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1576 		(retval = retval), getvfsany)
1577 
1578 static struct mnttab *
1579 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1580 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1581 		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1582 
1583 int
1584 do_errorlock(int lock_type)
1585 {
1586 	caddr_t	   buf;
1587 	time_t	   now;
1588 	struct tm *local;
1589 	int	   rc;
1590 
1591 	if (elock_combuf == NULL)
1592 		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1593 		    elock_mountp ? elock_mountp : "<null>",
1594 		    lock_type);
1595 
1596 	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1597 	    NULL) {
1598 		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1599 	}
1600 	if (lfp == NULL) {
1601 		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1602 		    elock_mountp, lock_type);
1603 	}
1604 
1605 	(void) memmove((void *)buf, (void *)elock_combuf,
1606 	    LOCKFS_MAXCOMMENTLEN-1);
1607 
1608 	switch (lock_type) {
1609 	case LOCKFS_ELOCK:
1610 		/*
1611 		 * Note that if it is error-locked, we won't get an
1612 		 * error back if we try to error-lock it again.
1613 		 */
1614 		if (time(&now) != (time_t)-1) {
1615 			if ((local = localtime(&now)) != NULL)
1616 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1617 		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1618 				    elock_combuf, (int)pid,
1619 				    local->tm_mon + 1, local->tm_mday,
1620 				    (local->tm_year % 100), local->tm_hour,
1621 				    local->tm_min, local->tm_sec);
1622 			else
1623 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1624 				    "%s [fsck pid %d", elock_combuf, pid);
1625 
1626 		} else {
1627 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1628 			    "%s [fsck pid %d", elock_combuf, pid);
1629 		}
1630 		break;
1631 
1632 	case LOCKFS_ULOCK:
1633 		if (time(&now) != (time_t)-1) {
1634 			if ((local = localtime(&now)) != NULL) {
1635 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1636 				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1637 				    elock_combuf,
1638 				    local->tm_mon + 1, local->tm_mday,
1639 				    (local->tm_year % 100), local->tm_hour,
1640 				    local->tm_min, local->tm_sec);
1641 			} else {
1642 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1643 				    "%s]", elock_combuf);
1644 			}
1645 		} else {
1646 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1647 			    "%s]", elock_combuf);
1648 		}
1649 		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1650 			pwarn("do_errorlock: unlock failed: %s\n",
1651 			    strerror(errno));
1652 			goto out;
1653 		}
1654 		break;
1655 
1656 	default:
1657 		break;
1658 	}
1659 
1660 	(void) memmove((void *)elock_combuf, (void *)buf,
1661 	    LOCKFS_MAXCOMMENTLEN - 1);
1662 
1663 	lfp->lf_lock = lock_type;
1664 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1665 	lfp->lf_comment = elock_combuf;
1666 	lfp->lf_flags = 0;
1667 	errno = 0;
1668 
1669 	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1670 		if (errno == EINVAL) {
1671 			pwarn("Another fsck active?\n");
1672 			iscorrupt = 0;	/* don't go away mad, just go away */
1673 		} else {
1674 			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1675 			    lock_type, elock_combuf, strerror(errno));
1676 		}
1677 	}
1678 out:
1679 	if (buf != NULL) {
1680 		free((void *)buf);
1681 	}
1682 
1683 	return (rc != -1);
1684 }
1685 
1686 /*
1687  * Shadow inode support.  To register a shadow with a client is to note
1688  * that an inode (the client) refers to the shadow.
1689  */
1690 
1691 static struct shadowclients *
1692 newshadowclient(struct shadowclients *prev)
1693 {
1694 	struct shadowclients *rc;
1695 
1696 	rc = (struct shadowclients *)malloc(sizeof (*rc));
1697 	if (rc == NULL)
1698 		errexit("newshadowclient: cannot malloc shadow client");
1699 	rc->next = prev;
1700 	rc->nclients = 0;
1701 
1702 	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1703 	    maxshadowclients);
1704 	if (rc->client == NULL)
1705 		errexit("newshadowclient: cannot malloc client array");
1706 	return (rc);
1707 }
1708 
1709 void
1710 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1711 	struct shadowclientinfo **info)
1712 {
1713 	struct shadowclientinfo *sci;
1714 	struct shadowclients *scc;
1715 
1716 	/*
1717 	 * Already have a record for this shadow?
1718 	 */
1719 	for (sci = *info; sci != NULL; sci = sci->next)
1720 		if (sci->shadow == shadow)
1721 			break;
1722 	if (sci == NULL) {
1723 		/*
1724 		 * It's a new shadow, add it to the list
1725 		 */
1726 		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1727 		if (sci == NULL)
1728 			errexit("registershadowclient: cannot malloc");
1729 		sci->next = *info;
1730 		*info = sci;
1731 		sci->shadow = shadow;
1732 		sci->totalClients = 0;
1733 		sci->clients = newshadowclient(NULL);
1734 	}
1735 
1736 	sci->totalClients++;
1737 	scc = sci->clients;
1738 	if (scc->nclients >= maxshadowclients) {
1739 		scc = newshadowclient(sci->clients);
1740 		sci->clients = scc;
1741 	}
1742 
1743 	scc->client[scc->nclients++] = client;
1744 }
1745 
1746 /*
1747  * Locate and discard a shadow.
1748  */
1749 void
1750 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1751 {
1752 	struct shadowclientinfo *sci, *prev;
1753 
1754 	/*
1755 	 * Do we have a record for this shadow?
1756 	 */
1757 	prev = NULL;
1758 	for (sci = *info; sci != NULL; sci = sci->next) {
1759 		if (sci->shadow == shadow)
1760 			break;
1761 		prev = sci;
1762 	}
1763 
1764 	if (sci != NULL) {
1765 		/*
1766 		 * First, pull it off the list, since we know there
1767 		 * shouldn't be any future references to this one.
1768 		 */
1769 		if (prev == NULL)
1770 			*info = sci->next;
1771 		else
1772 			prev->next = sci->next;
1773 		deshadow(sci, clearattrref);
1774 	}
1775 }
1776 
1777 /*
1778  * Discard all memory used to track clients of a shadow.
1779  */
1780 void
1781 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1782 {
1783 	struct shadowclients *clients, *discard;
1784 	int idx;
1785 
1786 	clients = sci->clients;
1787 	while (clients != NULL) {
1788 		discard = clients;
1789 		clients = clients->next;
1790 		if (discard->client != NULL) {
1791 			if (cb != NULL) {
1792 				for (idx = 0; idx < discard->nclients; idx++)
1793 					(*cb)(discard->client[idx]);
1794 			}
1795 			free((void *)discard->client);
1796 		}
1797 		free((void *)discard);
1798 	}
1799 
1800 	free((void *)sci);
1801 }
1802 
1803 /*
1804  * Allocate more buffer as need arises but allocate one at a time.
1805  * This is done to make sure that fsck does not exit with error if it
1806  * needs more buffer to complete its task.
1807  */
1808 static struct bufarea *
1809 alloc_bufarea(void)
1810 {
1811 	struct bufarea *newbp;
1812 	caddr_t bufp;
1813 
1814 	bufp = malloc((unsigned int)sblock.fs_bsize);
1815 	if (bufp == NULL)
1816 		return (NULL);
1817 
1818 	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1819 	if (newbp == NULL) {
1820 		free((void *)bufp);
1821 		return (NULL);
1822 	}
1823 
1824 	initbarea(newbp);
1825 	newbp->b_un.b_buf = bufp;
1826 	newbp->b_prev = &bufhead;
1827 	newbp->b_next = bufhead.b_next;
1828 	bufhead.b_next->b_prev = newbp;
1829 	bufhead.b_next = newbp;
1830 	bufhead.b_size++;
1831 	return (newbp);
1832 }
1833 
1834 /*
1835  * We length-limit in both unrawname() and rawname() to avoid
1836  * overflowing our arrays or those of our naive, trusting callers.
1837  */
1838 
1839 caddr_t
1840 unrawname(caddr_t name)
1841 {
1842 	caddr_t dp;
1843 	static char fullname[MAXPATHLEN + 1];
1844 
1845 	if ((dp = getfullblkname(name)) == NULL)
1846 		return ("");
1847 
1848 	(void) strlcpy(fullname, dp, sizeof (fullname));
1849 	/*
1850 	 * Not reporting under debug, as the allocation isn't
1851 	 * reported by getfullblkname.  The idea is that we
1852 	 * produce balanced alloc/free instances.
1853 	 */
1854 	free(dp);
1855 
1856 	return (fullname);
1857 }
1858 
1859 caddr_t
1860 rawname(caddr_t name)
1861 {
1862 	caddr_t dp;
1863 	static char fullname[MAXPATHLEN + 1];
1864 
1865 	if ((dp = getfullrawname(name)) == NULL)
1866 		return ("");
1867 
1868 	(void) strlcpy(fullname, dp, sizeof (fullname));
1869 	/*
1870 	 * Not reporting under debug, as the allocation isn't
1871 	 * reported by getfullblkname.  The idea is that we
1872 	 * produce balanced alloc/free instances.
1873 	 */
1874 	free(dp);
1875 
1876 	return (fullname);
1877 }
1878 
1879 /*
1880  * Make sure that a cg header looks at least moderately reasonable.
1881  * We want to be able to trust the contents enough to be able to use
1882  * the standard accessor macros.  So, besides looking at the obvious
1883  * such as the magic number, we verify that the offset field values
1884  * are properly aligned and not too big or small.
1885  *
1886  * Returns a NULL pointer if the cg is sane enough for our needs, else
1887  * a dynamically-allocated string describing all of its faults.
1888  */
1889 #define	Append_Error(full, full_len, addition, addition_len) \
1890 	if (full == NULL) { \
1891 		full = addition; \
1892 		full_len = addition_len; \
1893 	} else { \
1894 		/* lint doesn't think realloc() understands NULLs */ \
1895 		full = realloc(full, full_len + addition_len + 1); \
1896 		if (full == NULL) { \
1897 			errexit("Out of memory in cg_sanity"); \
1898 			/* NOTREACHED */ \
1899 		} \
1900 		(void) strcpy(full + full_len, addition); \
1901 		full_len += addition_len; \
1902 		free(addition); \
1903 	}
1904 
1905 caddr_t
1906 cg_sanity(struct cg *cgp, int cgno)
1907 {
1908 	caddr_t full_err;
1909 	caddr_t this_err = NULL;
1910 	int full_len, this_len;
1911 	daddr32_t ndblk;
1912 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1913 	daddr32_t exp_freeoff, exp_nextfreeoff;
1914 
1915 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1916 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1917 
1918 	full_err = NULL;
1919 	full_len = 0;
1920 
1921 	if (!cg_chkmagic(cgp)) {
1922 		this_len = fsck_asprintf(&this_err,
1923 		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1924 		    cgp->cg_magic, CG_MAGIC);
1925 		Append_Error(full_err, full_len, this_err, this_len);
1926 	}
1927 
1928 	if (cgp->cg_cgx != cgno) {
1929 		this_len = fsck_asprintf(&this_err,
1930 		    "WRONG CG NUMBER (%d should be %d)\n",
1931 		    cgp->cg_cgx, cgno);
1932 		Append_Error(full_err, full_len, this_err, this_len);
1933 	}
1934 
1935 	if ((cgp->cg_btotoff & 3) != 0) {
1936 		this_len = fsck_asprintf(&this_err,
1937 		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1938 		    cgp->cg_btotoff);
1939 		Append_Error(full_err, full_len, this_err, this_len);
1940 	}
1941 
1942 	if ((cgp->cg_boff & 1) != 0) {
1943 		this_len = fsck_asprintf(&this_err,
1944 	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1945 		    cgp->cg_boff);
1946 		Append_Error(full_err, full_len, this_err, this_len);
1947 	}
1948 
1949 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1950 		if (cgp->cg_ncyl < 1) {
1951 			this_len = fsck_asprintf(&this_err,
1952 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1953 			    cgp->cg_ncyl);
1954 		} else {
1955 			this_len = fsck_asprintf(&this_err,
1956 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1957 			    cgp->cg_ncyl, sblock.fs_cpg);
1958 		}
1959 		Append_Error(full_err, full_len, this_err, this_len);
1960 	}
1961 
1962 	if (cgp->cg_niblk != sblock.fs_ipg) {
1963 		this_len = fsck_asprintf(&this_err,
1964 		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1965 		    cgp->cg_niblk, sblock.fs_ipg);
1966 		Append_Error(full_err, full_len, this_err, this_len);
1967 	}
1968 
1969 	if (cgp->cg_ndblk != ndblk) {
1970 		this_len = fsck_asprintf(&this_err,
1971 	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1972 		    cgp->cg_ndblk, ndblk);
1973 		Append_Error(full_err, full_len, this_err, this_len);
1974 	}
1975 
1976 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1977 		this_len = fsck_asprintf(&this_err,
1978 		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1979 		    "(%d should be at least 0 and less than %d)\n",
1980 		    cgp->cg_rotor, ndblk);
1981 		Append_Error(full_err, full_len, this_err, this_len);
1982 	}
1983 
1984 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1985 		this_len = fsck_asprintf(&this_err,
1986 		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1987 		    "(%d should be at least 0 and less than %d)\n",
1988 		    cgp->cg_frotor, ndblk);
1989 		Append_Error(full_err, full_len, this_err, this_len);
1990 	}
1991 
1992 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1993 		this_len = fsck_asprintf(&this_err,
1994 		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1995 		    "(%d should be at least 0 and less than %d)\n",
1996 		    cgp->cg_irotor, sblock.fs_ipg);
1997 		Append_Error(full_err, full_len, this_err, this_len);
1998 	}
1999 
2000 	if (cgp->cg_btotoff != exp_btotoff) {
2001 		this_len = fsck_asprintf(&this_err,
2002 		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2003 		    cgp->cg_btotoff, exp_btotoff);
2004 		Append_Error(full_err, full_len, this_err, this_len);
2005 	}
2006 
2007 	if (cgp->cg_boff != exp_boff) {
2008 		this_len = fsck_asprintf(&this_err,
2009 		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2010 		    cgp->cg_boff, exp_boff);
2011 		Append_Error(full_err, full_len, this_err, this_len);
2012 	}
2013 
2014 	if (cgp->cg_iusedoff != exp_iusedoff) {
2015 		this_len = fsck_asprintf(&this_err,
2016 		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2017 		    cgp->cg_iusedoff, exp_iusedoff);
2018 		Append_Error(full_err, full_len, this_err, this_len);
2019 	}
2020 
2021 	if (cgp->cg_freeoff != exp_freeoff) {
2022 		this_len = fsck_asprintf(&this_err,
2023 		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2024 		    cgp->cg_freeoff, exp_freeoff);
2025 		Append_Error(full_err, full_len, this_err, this_len);
2026 	}
2027 
2028 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2029 		this_len = fsck_asprintf(&this_err,
2030 		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2031 		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2032 		Append_Error(full_err, full_len, this_err, this_len);
2033 	}
2034 
2035 	return (full_err);
2036 }
2037 
2038 #undef	Append_Error
2039 
2040 /*
2041  * This is taken from mkfs, and is what is used to come up with the
2042  * original values for a struct cg.  This implies that, since these
2043  * are all constants, recalculating them now should give us the same
2044  * thing as what's on disk.
2045  */
2046 static void
2047 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2048 	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2049 	daddr32_t *ndblk)
2050 {
2051 	daddr32_t cbase, dmax;
2052 	struct cg *cgp;
2053 
2054 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2055 	    (size_t)sblock.fs_cgsize);
2056 	cgp = cgblk.b_un.b_cg;
2057 
2058 	cbase = cgbase(&sblock, cgno);
2059 	dmax = cbase + sblock.fs_fpg;
2060 	if (dmax > sblock.fs_size)
2061 		dmax = sblock.fs_size;
2062 
2063 	/* LINTED pointer difference won't overflow */
2064 	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2065 	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2066 	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2067 	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2068 	*nextfreeoff = *freeoff +
2069 	    howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2070 	*ndblk = dmax - cbase;
2071 }
2072 
2073 /*
2074  * Corrects all fields in the cg that can be done with the available
2075  * redundant data.
2076  */
2077 void
2078 fix_cg(struct cg *cgp, int cgno)
2079 {
2080 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2081 	daddr32_t exp_freeoff, exp_nextfreeoff;
2082 	daddr32_t ndblk;
2083 
2084 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2085 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2086 
2087 	if (cgp->cg_cgx != cgno) {
2088 		cgp->cg_cgx = cgno;
2089 	}
2090 
2091 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2092 		if (cgno == (sblock.fs_ncg - 1)) {
2093 			cgp->cg_ncyl = sblock.fs_ncyl -
2094 			    (sblock.fs_cpg * cgno);
2095 		} else {
2096 			cgp->cg_ncyl = sblock.fs_cpg;
2097 		}
2098 	}
2099 
2100 	if (cgp->cg_niblk != sblock.fs_ipg) {
2101 		/*
2102 		 * This is not used by the kernel, so it's pretty
2103 		 * harmless if it's wrong.
2104 		 */
2105 		cgp->cg_niblk = sblock.fs_ipg;
2106 	}
2107 
2108 	if (cgp->cg_ndblk != ndblk) {
2109 		cgp->cg_ndblk = ndblk;
2110 	}
2111 
2112 	/*
2113 	 * For the rotors, any position's valid, so pick the one we know
2114 	 * will always exist.
2115 	 */
2116 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2117 		cgp->cg_rotor = 0;
2118 	}
2119 
2120 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2121 		cgp->cg_frotor = 0;
2122 	}
2123 
2124 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2125 		cgp->cg_irotor = 0;
2126 	}
2127 
2128 	/*
2129 	 * For btotoff and boff, if they're misaligned they won't
2130 	 * match the expected values, so we're catching both cases
2131 	 * here.  Of course, if any of these are off, it seems likely
2132 	 * that the tables really won't be where we calculate they
2133 	 * should be anyway.
2134 	 */
2135 	if (cgp->cg_btotoff != exp_btotoff) {
2136 		cgp->cg_btotoff = exp_btotoff;
2137 	}
2138 
2139 	if (cgp->cg_boff != exp_boff) {
2140 		cgp->cg_boff = exp_boff;
2141 	}
2142 
2143 	if (cgp->cg_iusedoff != exp_iusedoff) {
2144 		cgp->cg_iusedoff = exp_iusedoff;
2145 	}
2146 
2147 	if (cgp->cg_freeoff != exp_freeoff) {
2148 		cgp->cg_freeoff = exp_freeoff;
2149 	}
2150 
2151 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2152 		cgp->cg_nextfreeoff = exp_nextfreeoff;
2153 	}
2154 
2155 	/*
2156 	 * Reset the magic, as we've recreated this cg, also
2157 	 * update the cg_time, as we're writing out the cg
2158 	 */
2159 	cgp->cg_magic = CG_MAGIC;
2160 	cgp->cg_time = time(NULL);
2161 
2162 	/*
2163 	 * We know there was at least one correctable problem,
2164 	 * or else we wouldn't have been called.  So instead of
2165 	 * marking the buffer dirty N times above, just do it
2166 	 * once here.
2167 	 */
2168 	cgdirty();
2169 }
2170 
2171 void
2172 examinelog(void (*cb)(daddr32_t))
2173 {
2174 	struct bufarea *bp;
2175 	extent_block_t *ebp;
2176 	extent_t *ep;
2177 	daddr32_t nfno, fno;
2178 	int i;
2179 	int j;
2180 
2181 	/*
2182 	 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2183 	 * we need to translate accordingly using logbtodb()
2184 	 */
2185 
2186 	if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2187 		if (debug) {
2188 			(void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2189 			    "Aborting log examination\n", \
2190 			    logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2191 		}
2192 		return;
2193 	}
2194 
2195 	/*
2196 	 * Read errors will return zeros, which will cause us
2197 	 * to do nothing harmful, so don't need to handle it.
2198 	 */
2199 	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2200 	    (size_t)sblock.fs_bsize);
2201 	ebp = (void *)bp->b_un.b_buf;
2202 
2203 	/*
2204 	 * Does it look like a log allocation table?
2205 	 */
2206 	/* LINTED pointer cast is aligned */
2207 	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2208 	    sblock.fs_bsize))
2209 		return;
2210 	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2211 		return;
2212 
2213 	ep = &ebp->extents[0];
2214 	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2215 		fno = logbtofrag(&sblock, ep->pbno);
2216 		nfno = dbtofsb(&sblock, ep->nbno);
2217 		for (j = 0; j < nfno; ++j, ++fno) {
2218 			/*
2219 			 * Invoke the callback first, so that pass1 can
2220 			 * mark the log blocks in-use.  Then, if any
2221 			 * subsequent pass over the log shows us that a
2222 			 * block got freed (say, it was also claimed by
2223 			 * an inode that we cleared), we can safely declare
2224 			 * the log bad.
2225 			 */
2226 			if (cb != NULL)
2227 				(*cb)(fno);
2228 			if (!testbmap(fno))
2229 				islogok = 0;
2230 		}
2231 	}
2232 	brelse(bp);
2233 
2234 	if (cb != NULL) {
2235 		fno = logbtofrag(&sblock, sblock.fs_logbno);
2236 		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2237 			(*cb)(fno);
2238 	}
2239 }
2240 
2241 static void
2242 freelogblk(daddr32_t frag)
2243 {
2244 	freeblk(sblock.fs_logbno, frag, 1);
2245 }
2246 
2247 caddr_t
2248 file_id(fsck_ino_t inum, mode_t mode)
2249 {
2250 	static char name[MAXPATHLEN + 1];
2251 
2252 	if (lfdir == inum) {
2253 		return (lfname);
2254 	}
2255 
2256 	if ((mode & IFMT) == IFDIR) {
2257 		(void) strcpy(name, "DIR");
2258 	} else if ((mode & IFMT) == IFATTRDIR) {
2259 		(void) strcpy(name, "ATTR DIR");
2260 	} else if ((mode & IFMT) == IFSHAD) {
2261 		(void) strcpy(name, "ACL");
2262 	} else {
2263 		(void) strcpy(name, "FILE");
2264 	}
2265 
2266 	return (name);
2267 }
2268 
2269 /*
2270  * Simple initializer for inodesc structures, so users of only a few
2271  * fields don't have to worry about getting the right defaults for
2272  * everything out.
2273  */
2274 void
2275 init_inodesc(struct inodesc *idesc)
2276 {
2277 	/*
2278 	 * Most fields should be zero, just hit the special cases.
2279 	 */
2280 	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2281 	idesc->id_fix = DONTKNOW;
2282 	idesc->id_lbn = -1;
2283 	idesc->id_truncto = -1;
2284 	idesc->id_firsthole = -1;
2285 }
2286 
2287 /*
2288  * Compare routine for tsearch(C) to use on ino_t instances.
2289  */
2290 int
2291 ino_t_cmp(const void *left, const void *right)
2292 {
2293 	const fsck_ino_t lino = (const fsck_ino_t)left;
2294 	const fsck_ino_t rino = (const fsck_ino_t)right;
2295 
2296 	return (lino - rino);
2297 }
2298 
2299 int
2300 cgisdirty(void)
2301 {
2302 	return (cgblk.b_dirty);
2303 }
2304 
2305 void
2306 cgflush(void)
2307 {
2308 	flush(fswritefd, &cgblk);
2309 }
2310 
2311 void
2312 dirty(struct bufarea *bp)
2313 {
2314 	if (fswritefd < 0) {
2315 		/*
2316 		 * No one should call dirty() in read only mode.
2317 		 * But if one does, it's not fatal issue. Just warn him.
2318 		 */
2319 		pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2320 	} else {
2321 		(bp)->b_dirty = 1;
2322 		isdirty = 1;
2323 	}
2324 }
2325 
2326 void
2327 initbarea(struct bufarea *bp)
2328 {
2329 	(bp)->b_dirty = 0;
2330 	(bp)->b_bno = (diskaddr_t)-1LL;
2331 	(bp)->b_flags = 0;
2332 	(bp)->b_cnt = 0;
2333 	(bp)->b_errs = 0;
2334 }
2335 
2336 /*
2337  * Partition-sizing routines adapted from ../newfs/newfs.c.
2338  * Needed because calcsb() needs to use mkfs to work out what the
2339  * superblock should be, and mkfs insists on being told how many
2340  * sectors to use.
2341  *
2342  * Error handling assumes we're never called while preening.
2343  *
2344  * XXX This should be extracted into a ../ufslib.{c,h},
2345  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2346  *     done, both fsck and newfs should be modified to link
2347  *     against it.
2348  */
2349 
2350 static int label_type;
2351 
2352 #define	LABEL_TYPE_VTOC		1
2353 #define	LABEL_TYPE_EFI		2
2354 #define	LABEL_TYPE_OTHER	3
2355 
2356 #define	MB			(1024 * 1024)
2357 #define	SECTORS_PER_TERABYTE	(1LL << 31)
2358 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2359 
2360 diskaddr_t
2361 getdisksize(caddr_t disk, int fd)
2362 {
2363 	int rpm;
2364 	struct dk_geom g;
2365 	struct dk_cinfo ci;
2366 	diskaddr_t actual_size;
2367 
2368 	/*
2369 	 * get_device_size() determines the actual size of the
2370 	 * device, and also the disk's attributes, such as geometry.
2371 	 */
2372 	actual_size = get_device_size(fd, disk);
2373 
2374 	if (label_type == LABEL_TYPE_VTOC) {
2375 		if (ioctl(fd, DKIOCGGEOM, &g)) {
2376 			pwarn("%s: Unable to read Disk geometry", disk);
2377 			return (0);
2378 		}
2379 		if (sblock.fs_nsect == 0)
2380 			sblock.fs_nsect = g.dkg_nsect;
2381 		if (sblock.fs_ntrak == 0)
2382 			sblock.fs_ntrak = g.dkg_nhead;
2383 		if (sblock.fs_rps == 0) {
2384 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2385 			sblock.fs_rps = rpm / 60;
2386 		}
2387 	}
2388 
2389 	if (sblock.fs_bsize == 0)
2390 		sblock.fs_bsize = MAXBSIZE;
2391 
2392 	/*
2393 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2394 	 * information is not available, default to the min of a MB and
2395 	 * maxphys.
2396 	 */
2397 	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2398 		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2399 		if (sblock.fs_maxcontig < 0) {
2400 			int gotit, maxphys;
2401 
2402 			gotit = fsgetmaxphys(&maxphys, NULL);
2403 
2404 			/*
2405 			 * If we cannot get the maxphys value, default
2406 			 * to ufs_maxmaxphys (MB).
2407 			 */
2408 			if (gotit) {
2409 				sblock.fs_maxcontig = MIN(maxphys, MB);
2410 			} else {
2411 				sblock.fs_maxcontig = MB;
2412 			}
2413 		}
2414 		sblock.fs_maxcontig /= sblock.fs_bsize;
2415 	}
2416 
2417 	return (actual_size);
2418 }
2419 
2420 /*
2421  * Figure out how big the partition we're dealing with is.
2422  */
2423 static diskaddr_t
2424 get_device_size(int fd, caddr_t name)
2425 {
2426 	struct extvtoc vtoc;
2427 	struct dk_gpt *efi_vtoc;
2428 	diskaddr_t slicesize = 0;
2429 
2430 	int index = read_extvtoc(fd, &vtoc);
2431 
2432 	if (index >= 0) {
2433 		label_type = LABEL_TYPE_VTOC;
2434 	} else {
2435 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2436 			/* it might be an EFI label */
2437 			index = efi_alloc_and_read(fd, &efi_vtoc);
2438 			if (index >= 0)
2439 				label_type = LABEL_TYPE_EFI;
2440 		}
2441 	}
2442 
2443 	if (index < 0) {
2444 		/*
2445 		 * Since both attempts to read the label failed, we're
2446 		 * going to fall back to a brute force approach to
2447 		 * determining the device's size:  see how far out we can
2448 		 * perform reads on the device.
2449 		 */
2450 
2451 		slicesize = brute_force_get_device_size(fd);
2452 		if (slicesize == 0) {
2453 			switch (index) {
2454 			case VT_ERROR:
2455 				pwarn("%s: %s\n", name, strerror(errno));
2456 				break;
2457 			case VT_EIO:
2458 				pwarn("%s: I/O error accessing VTOC", name);
2459 				break;
2460 			case VT_EINVAL:
2461 				pwarn("%s: Invalid field in VTOC", name);
2462 				break;
2463 			default:
2464 				pwarn("%s: unknown error %d accessing VTOC",
2465 				    name, index);
2466 				break;
2467 			}
2468 			return (0);
2469 		} else {
2470 			label_type = LABEL_TYPE_OTHER;
2471 		}
2472 	}
2473 
2474 	if (label_type == LABEL_TYPE_EFI) {
2475 		slicesize = efi_vtoc->efi_parts[index].p_size;
2476 		efi_free(efi_vtoc);
2477 	} else if (label_type == LABEL_TYPE_VTOC) {
2478 		slicesize = vtoc.v_part[index].p_size;
2479 	}
2480 
2481 	return (slicesize);
2482 }
2483 
2484 /*
2485  * brute_force_get_device_size
2486  *
2487  * Determine the size of the device by seeing how far we can
2488  * read.  Doing an llseek( , , SEEK_END) would probably work
2489  * in most cases, but we've seen at least one third-party driver
2490  * which doesn't correctly support the SEEK_END option when the
2491  * the device is greater than a terabyte.
2492  */
2493 
2494 static diskaddr_t
2495 brute_force_get_device_size(int fd)
2496 {
2497 	diskaddr_t	min_fail = 0;
2498 	diskaddr_t	max_succeed = 0;
2499 	diskaddr_t	cur_db_off;
2500 	char 		buf[DEV_BSIZE];
2501 
2502 	/*
2503 	 * First, see if we can read the device at all, just to
2504 	 * eliminate errors that have nothing to do with the
2505 	 * device's size.
2506 	 */
2507 
2508 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2509 	    ((read(fd, buf, DEV_BSIZE)) == -1))
2510 		return (0);  /* can't determine size */
2511 
2512 	/*
2513 	 * Now, go sequentially through the multiples of 4TB
2514 	 * to find the first read that fails (this isn't strictly
2515 	 * the most efficient way to find the actual size if the
2516 	 * size really could be anything between 0 and 2**64 bytes.
2517 	 * We expect the sizes to be less than 16 TB for some time,
2518 	 * so why do a bunch of reads that are larger than that?
2519 	 * However, this algorithm *will* work for sizes of greater
2520 	 * than 16 TB.  We're just not optimizing for those sizes.)
2521 	 */
2522 
2523 	/*
2524 	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2525 	 * We're using > 32-bit constants here.  Therefore, its flow
2526 	 * analysis is wrong.  For the time being, ignore complaints
2527 	 * from it about the body of the for() being unreached.
2528 	 */
2529 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2530 	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2531 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2532 		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2533 		    SEEK_SET) == -1) ||
2534 		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2535 			min_fail = cur_db_off;
2536 		else
2537 			max_succeed = cur_db_off;
2538 	}
2539 
2540 	/*
2541 	 * XXX Same lint flow analysis problem as above.
2542 	 */
2543 	if (min_fail == 0)
2544 		return (0);
2545 
2546 	/*
2547 	 * We now know that the size of the device is less than
2548 	 * min_fail and greater than or equal to max_succeed.  Now
2549 	 * keep splitting the difference until the actual size in
2550 	 * sectors in known.  We also know that the difference
2551 	 * between max_succeed and min_fail at this time is
2552 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2553 	 * simplifies the math below.
2554 	 */
2555 
2556 	while (min_fail - max_succeed > 1) {
2557 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2558 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2559 		    SEEK_SET)) == -1) ||
2560 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2561 			min_fail = cur_db_off;
2562 		else
2563 			max_succeed = cur_db_off;
2564 	}
2565 
2566 	/* the size is the last successfully read sector offset plus one */
2567 	return (max_succeed + 1);
2568 }
2569 
2570 static void
2571 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2572 {
2573 	struct dinode *dp;
2574 	char pathbuf[MAXPATHLEN + 1];
2575 
2576 	vpwarn(fmt, ap);
2577 	(void) putchar(' ');
2578 	pinode(ino);
2579 	(void) printf("\n");
2580 	getpathname(pathbuf, cwd, ino);
2581 	if (ino < UFSROOTINO || ino > maxino) {
2582 		pfatal("NAME=%s\n", pathbuf);
2583 		return;
2584 	}
2585 	dp = ginode(ino);
2586 	if (ftypeok(dp))
2587 		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2588 	else
2589 		pfatal("NAME=%s\n", pathbuf);
2590 }
2591 
2592 void
2593 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2594 {
2595 	va_list ap;
2596 
2597 	va_start(ap, fmt);
2598 	vfileerror(ino, ino, fmt, ap);
2599 	va_end(ap);
2600 }
2601 
2602 static void
2603 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2604 {
2605 	vfileerror(ino, ino, fmt, ap);
2606 }
2607 
2608 void
2609 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2610 {
2611 	va_list ap;
2612 
2613 	va_start(ap, fmt);
2614 	vfileerror(cwd, ino, fmt, ap);
2615 	va_end(ap);
2616 }
2617 
2618 /*
2619  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2620  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2621  * entry.
2622  *
2623  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2624  * meaning it's effectively an orphan.  It needs to be noted now, so
2625  * it will be remembered in pass 4.
2626  */
2627 
2628 void
2629 add_orphan_dir(fsck_ino_t ino)
2630 {
2631 	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2632 		errexit("add_orphan_dir: out of memory");
2633 }
2634 
2635 /*
2636  * Remove an inode from the orphaned-directories list, presumably
2637  * because it's been cleared.
2638  */
2639 void
2640 remove_orphan_dir(fsck_ino_t ino)
2641 {
2642 	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2643 }
2644 
2645 /*
2646  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2647  * and lufs.c:checksum().
2648  */
2649 static void
2650 log_setsum(int32_t *sp, int32_t *lp, int nb)
2651 {
2652 	int32_t csum = 0;
2653 
2654 	*sp = 0;
2655 	nb /= sizeof (int32_t);
2656 	while (nb--)
2657 		csum += *lp++;
2658 	*sp = csum;
2659 }
2660 
2661 static int
2662 log_checksum(int32_t *sp, int32_t *lp, int nb)
2663 {
2664 	int32_t ssum = *sp;
2665 
2666 	log_setsum(sp, lp, nb);
2667 	if (ssum != *sp) {
2668 		*sp = ssum;
2669 		return (0);
2670 	}
2671 	return (1);
2672 }
2673