xref: /illumos-gate/usr/src/cmd/fs.d/ufs/fsck/utilities.c (revision 5fbc1fe0da7f34cf8155bf7624c94583cc98e47c)
1 /*
2  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3  * Copyright (c) 2016 by Delphix. All rights reserved.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved	*/
8 
9 /*
10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms are permitted
14  * provided that: (1) source distributions retain this entire copyright
15  * notice and comment, and (2) distributions including binaries display
16  * the following acknowledgement:  ``This product includes software
17  * developed by the University of California, Berkeley and its contributors''
18  * in the documentation or other materials provided with the distribution
19  * and in all advertising materials mentioning features or use of this
20  * software. Neither the name of the University nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <stdarg.h>
32 #include <libadm.h>
33 #include <note.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/mntent.h>
37 #include <sys/filio.h>
38 #include <sys/fs/ufs_fs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/ufs_acl.h>
41 #include <sys/fs/ufs_inode.h>
42 #include <sys/fs/ufs_log.h>
43 #define	_KERNEL
44 #include <sys/fs/ufs_fsdir.h>
45 #undef _KERNEL
46 #include <sys/mnttab.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <fcntl.h>
50 #include <signal.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <sys/vfstab.h>
54 #include <sys/lockfs.h>
55 #include <errno.h>
56 #include <sys/cmn_err.h>
57 #include <sys/dkio.h>
58 #include <sys/vtoc.h>
59 #include <sys/efi_partition.h>
60 #include <fslib.h>
61 #include <inttypes.h>
62 #include "fsck.h"
63 
64 struct bufarea *pbp;
65 struct bufarea *pdirbp;
66 caddr_t mount_point = NULL;
67 static struct bufarea bufhead;	/* head of list of other blks in filesys */
68 char *elock_combuf;
69 char *elock_mountp;
70 static struct lockfs *lfp;		/* current lockfs status */
71 
72 static int64_t diskreads, totalreads;	/* Disk cache statistics */
73 
74 static int log_checksum(int32_t *, int32_t *, int);
75 static void vdirerror(fsck_ino_t, caddr_t, va_list);
76 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
77 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
78 static void vpwarn(caddr_t, va_list);
79 static int getaline(FILE *, caddr_t, int);
80 static struct bufarea *alloc_bufarea(void);
81 static void rwerror(caddr_t, diskaddr_t, int rval);
82 static void debugclean(void);
83 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
84 static void freelogblk(daddr32_t);
85 static void verrexit(caddr_t, va_list);
86 static void vpfatal(caddr_t, va_list);
87 static diskaddr_t get_device_size(int, caddr_t);
88 static diskaddr_t brute_force_get_device_size(int);
89 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
90 	    daddr32_t *, daddr32_t *, daddr32_t *);
91 
92 int
93 ftypeok(struct dinode *dp)
94 {
95 	switch (dp->di_mode & IFMT) {
96 
97 	case IFDIR:
98 	case IFREG:
99 	case IFBLK:
100 	case IFCHR:
101 	case IFLNK:
102 	case IFSOCK:
103 	case IFIFO:
104 	case IFSHAD:
105 	case IFATTRDIR:
106 		return (1);
107 
108 	default:
109 		if (debug)
110 			(void) printf("bad file type 0%o\n", dp->di_mode);
111 		return (0);
112 	}
113 }
114 
115 int
116 acltypeok(struct dinode *dp)
117 {
118 	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
119 		return (1);
120 
121 	if (debug)
122 		(void) printf("bad file type for acl I=%d: 0%o\n",
123 		    dp->di_shadow, dp->di_mode);
124 	return (0);
125 }
126 
127 NOTE(PRINTFLIKE(1))
128 int
129 reply(caddr_t fmt, ...)
130 {
131 	va_list ap;
132 	char line[80];
133 
134 	if (preen)
135 		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
136 
137 	if (mflag) {
138 		/*
139 		 * We don't know what's going on, so don't potentially
140 		 * make things worse by having errexit() write stuff
141 		 * out to disk.
142 		 */
143 		(void) printf(
144 		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
145 		    devname);
146 		exit(EXERRFATAL);
147 	}
148 
149 	va_start(ap, fmt);
150 	(void) putchar('\n');
151 	(void) vprintf(fmt, ap);
152 	(void) putchar('?');
153 	(void) putchar(' ');
154 	va_end(ap);
155 
156 	if (nflag || fswritefd < 0) {
157 		(void) printf(" no\n\n");
158 		return (0);
159 	}
160 	if (yflag) {
161 		(void) printf(" yes\n\n");
162 		return (1);
163 	}
164 	(void) fflush(stdout);
165 	if (getaline(stdin, line, sizeof (line)) == EOF)
166 		errexit("\n");
167 	(void) printf("\n");
168 	if (line[0] == 'y' || line[0] == 'Y') {
169 		return (1);
170 	} else {
171 		return (0);
172 	}
173 }
174 
175 int
176 getaline(FILE *fp, caddr_t loc, int maxlen)
177 {
178 	int n;
179 	caddr_t p, lastloc;
180 
181 	p = loc;
182 	lastloc = &p[maxlen-1];
183 	while ((n = getc(fp)) != '\n') {
184 		if (n == EOF)
185 			return (EOF);
186 		if (!isspace(n) && p < lastloc)
187 			*p++ = (char)n;
188 	}
189 	*p = '\0';
190 	/* LINTED pointer difference won't overflow */
191 	return (p - loc);
192 }
193 
194 /*
195  * Malloc buffers and set up cache.
196  */
197 void
198 bufinit(void)
199 {
200 	struct bufarea *bp;
201 	int bufcnt, i;
202 	caddr_t bufp;
203 
204 	bufp = malloc((size_t)sblock.fs_bsize);
205 	if (bufp == NULL)
206 		goto nomem;
207 	initbarea(&cgblk);
208 	cgblk.b_un.b_buf = bufp;
209 	bufhead.b_next = bufhead.b_prev = &bufhead;
210 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
211 	if (bufcnt < MINBUFS)
212 		bufcnt = MINBUFS;
213 	for (i = 0; i < bufcnt; i++) {
214 		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
215 		if (bp == NULL) {
216 			if (i >= MINBUFS)
217 				goto noalloc;
218 			goto nomem;
219 		}
220 
221 		bufp = malloc((size_t)sblock.fs_bsize);
222 		if (bufp == NULL) {
223 			free((void *)bp);
224 			if (i >= MINBUFS)
225 				goto noalloc;
226 			goto nomem;
227 		}
228 		initbarea(bp);
229 		bp->b_un.b_buf = bufp;
230 		bp->b_prev = &bufhead;
231 		bp->b_next = bufhead.b_next;
232 		bufhead.b_next->b_prev = bp;
233 		bufhead.b_next = bp;
234 	}
235 noalloc:
236 	bufhead.b_size = i;	/* save number of buffers */
237 	pbp = pdirbp = NULL;
238 	return;
239 
240 nomem:
241 	errexit("cannot allocate buffer pool\n");
242 	/* NOTREACHED */
243 }
244 
245 /*
246  * Undo a bufinit().
247  */
248 void
249 unbufinit(void)
250 {
251 	int cnt;
252 	struct bufarea *bp, *nbp;
253 
254 	cnt = 0;
255 	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
256 		cnt++;
257 		flush(fswritefd, bp);
258 		nbp = bp->b_prev;
259 		/*
260 		 * We're discarding the entire chain, so this isn't
261 		 * technically necessary.  However, it doesn't hurt
262 		 * and lint's data flow analysis is much happier
263 		 * (this prevents it from thinking there's a chance
264 		 * of our using memory elsewhere after it's been released).
265 		 */
266 		nbp->b_next = bp->b_next;
267 		bp->b_next->b_prev = nbp;
268 		free((void *)bp->b_un.b_buf);
269 		free((void *)bp);
270 	}
271 
272 	if (bufhead.b_size != cnt)
273 		errexit("Panic: cache lost %d buffers\n",
274 		    bufhead.b_size - cnt);
275 }
276 
277 /*
278  * Manage a cache of directory blocks.
279  */
280 struct bufarea *
281 getdatablk(daddr32_t blkno, size_t size)
282 {
283 	struct bufarea *bp;
284 
285 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
286 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
287 			goto foundit;
288 		}
289 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
290 		if ((bp->b_flags & B_INUSE) == 0)
291 			break;
292 	if (bp == &bufhead) {
293 		bp = alloc_bufarea();
294 		if (bp == NULL) {
295 			errexit("deadlocked buffer pool\n");
296 			/* NOTREACHED */
297 		}
298 	}
299 	/*
300 	 * We're at the same logical level as getblk(), so if there
301 	 * are any errors, we'll let our caller handle them.
302 	 */
303 	diskreads++;
304 	(void) getblk(bp, blkno, size);
305 
306 foundit:
307 	totalreads++;
308 	bp->b_cnt++;
309 	/*
310 	 * Move the buffer to head of linked list if it isn't
311 	 * already there.
312 	 */
313 	if (bufhead.b_next != bp) {
314 		bp->b_prev->b_next = bp->b_next;
315 		bp->b_next->b_prev = bp->b_prev;
316 		bp->b_prev = &bufhead;
317 		bp->b_next = bufhead.b_next;
318 		bufhead.b_next->b_prev = bp;
319 		bufhead.b_next = bp;
320 	}
321 	bp->b_flags |= B_INUSE;
322 	return (bp);
323 }
324 
325 void
326 brelse(struct bufarea *bp)
327 {
328 	bp->b_cnt--;
329 	if (bp->b_cnt == 0) {
330 		bp->b_flags &= ~B_INUSE;
331 	}
332 }
333 
334 struct bufarea *
335 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
336 {
337 	diskaddr_t dblk;
338 
339 	dblk = fsbtodb(&sblock, blk);
340 	if (bp->b_bno == dblk)
341 		return (bp);
342 	flush(fswritefd, bp);
343 	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
344 	bp->b_bno = dblk;
345 	bp->b_size = size;
346 	return (bp);
347 }
348 
349 void
350 flush(int fd, struct bufarea *bp)
351 {
352 	int i, j;
353 	caddr_t sip;
354 	long size;
355 
356 	if (!bp->b_dirty)
357 		return;
358 
359 	/*
360 	 * It's not our buf, so if there are errors, let whoever
361 	 * acquired it deal with the actual problem.
362 	 */
363 	if (bp->b_errs != 0)
364 		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
365 	bp->b_dirty = 0;
366 	bp->b_errs = 0;
367 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
368 	if (bp != &sblk) {
369 		return;
370 	}
371 
372 	/*
373 	 * We're flushing the superblock, so make sure all the
374 	 * ancillary bits go out as well.
375 	 */
376 	sip = (caddr_t)sblock.fs_u.fs_csp;
377 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
378 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
379 		    sblock.fs_cssize - i : sblock.fs_bsize;
380 		bwrite(fswritefd, sip,
381 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
382 		    size);
383 		sip += size;
384 	}
385 }
386 
387 static void
388 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
389 {
390 	int olderr = errno;
391 
392 	if (!preen)
393 		(void) printf("\n");
394 
395 	if (rval == -1)
396 		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
397 		    mesg, blk, strerror(olderr));
398 	else
399 		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
400 
401 	if (reply("CONTINUE") == 0) {
402 		exitstat = EXERRFATAL;
403 		errexit("Program terminated\n");
404 	}
405 }
406 
407 void
408 ckfini(void)
409 {
410 	int64_t percentage;
411 
412 	if (fswritefd < 0)
413 		return;
414 
415 	flush(fswritefd, &sblk);
416 	/*
417 	 * Were we using a backup superblock?
418 	 */
419 	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
420 		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
421 			sblk.b_bno = SBOFF / dev_bsize;
422 			sbdirty();
423 			flush(fswritefd, &sblk);
424 		}
425 	}
426 	flush(fswritefd, &cgblk);
427 	if (cgblk.b_un.b_buf != NULL) {
428 		free((void *)cgblk.b_un.b_buf);
429 		cgblk.b_un.b_buf = NULL;
430 	}
431 	unbufinit();
432 	pbp = NULL;
433 	pdirbp = NULL;
434 	if (debug) {
435 		/*
436 		 * Note that we only count cache-related reads.
437 		 * Anything that called fsck_bread() or getblk()
438 		 * directly are explicitly not cached, so they're not
439 		 * included here.
440 		 */
441 		if (totalreads != 0)
442 			percentage = diskreads * 100 / totalreads;
443 		else
444 			percentage = 0;
445 
446 		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
447 		    (longlong_t)diskreads, (longlong_t)totalreads,
448 		    (longlong_t)percentage);
449 	}
450 
451 	(void) close(fsreadfd);
452 	(void) close(fswritefd);
453 	fsreadfd = -1;
454 	fswritefd = -1;
455 }
456 
457 int
458 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
459 {
460 	caddr_t cp;
461 	int i;
462 	int errs;
463 	offset_t offset = ldbtob(blk);
464 	offset_t addr;
465 
466 	/*
467 	 * In our universe, nothing exists before the superblock, so
468 	 * just pretend it's always zeros.  This is the complement of
469 	 * bwrite()'s ignoring write requests into that space.
470 	 */
471 	if (blk < SBLOCK) {
472 		if (debug)
473 			(void) printf(
474 			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
475 			    SBLOCK, (longlong_t)blk);
476 		(void) memset(buf, 0, (size_t)size);
477 		return (1);
478 	}
479 
480 	if (llseek(fd, offset, SEEK_SET) < 0) {
481 		rwerror("SEEK", blk, -1);
482 	}
483 
484 	if ((i = read(fd, buf, size)) == size) {
485 		return (0);
486 	}
487 	rwerror("READ", blk, i);
488 	if (llseek(fd, offset, SEEK_SET) < 0) {
489 		rwerror("SEEK", blk, -1);
490 	}
491 	errs = 0;
492 	(void) memset(buf, 0, (size_t)size);
493 	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
494 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
495 		addr = ldbtob(blk + i);
496 		if (llseek(fd, addr, SEEK_SET) < 0 ||
497 		    read(fd, cp, (int)secsize) < 0) {
498 			iscorrupt = 1;
499 			(void) printf(" %llu", blk + (u_longlong_t)i);
500 			errs++;
501 		}
502 	}
503 	(void) printf("\n");
504 	return (errs);
505 }
506 
507 void
508 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
509 {
510 	int i;
511 	int n;
512 	caddr_t cp;
513 	offset_t offset = ldbtob(blk);
514 	offset_t addr;
515 
516 	if (fd < 0)
517 		return;
518 	if (blk < SBLOCK) {
519 		if (debug)
520 			(void) printf(
521 		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
522 			    (longlong_t)blk, devname);
523 		return;
524 	}
525 	if (llseek(fd, offset, SEEK_SET) < 0) {
526 		rwerror("SEEK", blk, -1);
527 	}
528 	if ((i = write(fd, buf, (int)size)) == size) {
529 		fsmodified = 1;
530 		return;
531 	}
532 	rwerror("WRITE", blk, i);
533 	if (llseek(fd, offset, SEEK_SET) < 0) {
534 		rwerror("SEEK", blk, -1);
535 	}
536 	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
537 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
538 		n = 0;
539 		addr = ldbtob(blk + i);
540 		if (llseek(fd, addr, SEEK_SET) < 0 ||
541 		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
542 			iscorrupt = 1;
543 			(void) printf(" %llu", blk + (u_longlong_t)i);
544 		} else if (n > 0) {
545 			fsmodified = 1;
546 		}
547 
548 	}
549 	(void) printf("\n");
550 }
551 
552 /*
553  * Allocates the specified number of contiguous fragments.
554  */
555 daddr32_t
556 allocblk(int wantedfrags)
557 {
558 	int block, leadfrag, tailfrag;
559 	daddr32_t selected;
560 	size_t size;
561 	struct bufarea *bp;
562 
563 	/*
564 	 * It's arguable whether we should just fail, or instead
565 	 * error out here.  Since we should only ever be asked for
566 	 * a single fragment or an entire block (i.e., sblock.fs_frag),
567 	 * we'll fail out because anything else means somebody
568 	 * changed code without considering all of the ramifications.
569 	 */
570 	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
571 		exitstat = EXERRFATAL;
572 		errexit("allocblk() asked for %d frags.  "
573 		    "Legal range is 1 to %d",
574 		    wantedfrags, sblock.fs_frag);
575 	}
576 
577 	/*
578 	 * For each filesystem block, look at every possible starting
579 	 * offset within the block such that we can get the number of
580 	 * contiguous fragments that we need.  This is a drastically
581 	 * simplified version of the kernel's mapsearch() and alloc*().
582 	 * It's also correspondingly slower.
583 	 */
584 	for (block = 0; block < maxfsblock - sblock.fs_frag;
585 	    block += sblock.fs_frag) {
586 		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
587 		    leadfrag++) {
588 			/*
589 			 * Is first fragment of candidate run available?
590 			 */
591 			if (testbmap(block + leadfrag))
592 				continue;
593 			/*
594 			 * Are the rest of them available?
595 			 */
596 			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
597 				if (testbmap(block + leadfrag + tailfrag))
598 					break;
599 			if (tailfrag < wantedfrags) {
600 				/*
601 				 * No, skip the known-unusable run.
602 				 */
603 				leadfrag += tailfrag;
604 				continue;
605 			}
606 			/*
607 			 * Found what we need, so claim them.
608 			 */
609 			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
610 				setbmap(block + leadfrag + tailfrag);
611 			n_blks += wantedfrags;
612 			size = wantedfrags * sblock.fs_fsize;
613 			selected = block + leadfrag;
614 			bp = getdatablk(selected, size);
615 			(void) memset((void *)bp->b_un.b_buf, 0, size);
616 			dirty(bp);
617 			brelse(bp);
618 			if (debug)
619 				(void) printf(
620 		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
621 				    selected, selected % sblock.fs_bsize,
622 				    wantedfrags, (int)size);
623 			return (selected);
624 		}
625 	}
626 	return (0);
627 }
628 
629 /*
630  * Free a previously allocated block
631  */
632 void
633 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
634 {
635 	struct inodesc idesc;
636 
637 	if (debug)
638 		(void) printf("debug: freeing %d fragments starting at %d\n",
639 		    frags, blkno);
640 
641 	init_inodesc(&idesc);
642 
643 	idesc.id_number = ino;
644 	idesc.id_blkno = blkno;
645 	idesc.id_numfrags = frags;
646 	idesc.id_truncto = -1;
647 
648 	/*
649 	 * Nothing in the return status has any relevance to how
650 	 * we're using pass4check(), so just ignore it.
651 	 */
652 	(void) pass4check(&idesc);
653 }
654 
655 /*
656  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
657  * that the given buffer is at least MAXPATHLEN + 1 characters.
658  */
659 void
660 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
661 {
662 	int len;
663 	caddr_t cp;
664 	struct dinode *dp;
665 	struct inodesc idesc;
666 	struct inoinfo *inp;
667 
668 	if (debug)
669 		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
670 		    curdir, ino);
671 
672 	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
673 		(void) strcpy(namebuf, "?");
674 		return;
675 	}
676 
677 	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
678 		(void) strcpy(namebuf, "/");
679 		return;
680 	}
681 
682 	init_inodesc(&idesc);
683 	idesc.id_type = DATA;
684 	cp = &namebuf[MAXPATHLEN - 1];
685 	*cp = '\0';
686 
687 	/*
688 	 * In the case of extended attributes, our
689 	 * parent won't necessarily be a directory, so just
690 	 * return what we've found with a prefix indicating
691 	 * that it's an XATTR.  Presumably our caller will
692 	 * know what's going on and do something useful, like
693 	 * work out the path of the parent and then combine
694 	 * the two names.
695 	 *
696 	 * Can't use strcpy(), etc, because we've probably
697 	 * already got some name information in the buffer and
698 	 * the usual trailing \0 would lose it.
699 	 */
700 	dp = ginode(curdir);
701 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
702 		idesc.id_number = curdir;
703 		idesc.id_parent = ino;
704 		idesc.id_func = findname;
705 		idesc.id_name = namebuf;
706 		idesc.id_fix = NOFIX;
707 		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
708 			*cp-- = '?';
709 		}
710 
711 		len = sizeof (XATTR_DIR_NAME) - 1;
712 		cp -= len;
713 		(void) memmove(cp, XATTR_DIR_NAME, len);
714 		goto attrname;
715 	}
716 
717 	/*
718 	 * If curdir == ino, need to get a handle on .. so we
719 	 * can search it for ino's name.  Otherwise, just search
720 	 * the given directory for ino.  Repeat until out of space
721 	 * or a full path has been built.
722 	 */
723 	if (curdir != ino) {
724 		idesc.id_parent = curdir;
725 		goto namelookup;
726 	}
727 	while (ino != UFSROOTINO && ino != 0) {
728 		idesc.id_number = ino;
729 		idesc.id_func = findino;
730 		idesc.id_name = "..";
731 		idesc.id_fix = NOFIX;
732 		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
733 			inp = getinoinfo(ino);
734 			if ((inp == NULL) || (inp->i_parent == 0)) {
735 				break;
736 			}
737 			idesc.id_parent = inp->i_parent;
738 		}
739 
740 		/*
741 		 * To get this far, id_parent must have the inode
742 		 * number for `..' in it.  By definition, that's got
743 		 * to be a directory, so search it for the inode of
744 		 * interest.
745 		 */
746 namelookup:
747 		idesc.id_number = idesc.id_parent;
748 		idesc.id_parent = ino;
749 		idesc.id_func = findname;
750 		idesc.id_name = namebuf;
751 		idesc.id_fix = NOFIX;
752 		if ((ckinode(ginode(idesc.id_number),
753 		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
754 			break;
755 		}
756 		/*
757 		 * Prepend to what we've accumulated so far.  If
758 		 * there's not enough room for even one more path element
759 		 * (of the worst-case length), then bail out.
760 		 */
761 		len = strlen(namebuf);
762 		cp -= len;
763 		if (cp < &namebuf[MAXNAMLEN])
764 			break;
765 		(void) memmove(cp, namebuf, len);
766 		*--cp = '/';
767 
768 		/*
769 		 * Corner case for a looped-to-itself directory.
770 		 */
771 		if (ino == idesc.id_number)
772 			break;
773 
774 		/*
775 		 * Climb one level of the hierarchy.  In other words,
776 		 * the current .. becomes the inode to search for and
777 		 * its parent becomes the directory to search in.
778 		 */
779 		ino = idesc.id_number;
780 	}
781 
782 	/*
783 	 * If we hit a discontinuity in the hierarchy, indicate it by
784 	 * prefixing the path so far with `?'.  Otherwise, the first
785 	 * character will be `/' as a side-effect of the *--cp above.
786 	 *
787 	 * The special case is to handle the situation where we're
788 	 * trying to look something up in UFSROOTINO, but didn't find
789 	 * it.
790 	 */
791 	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
792 		if (cp > namebuf)
793 			cp--;
794 		*cp = '?';
795 	}
796 
797 	/*
798 	 * The invariants being used for buffer integrity are:
799 	 * - namebuf[] is terminated with \0 before anything else
800 	 * - cp is always <= the last element of namebuf[]
801 	 * - the new path element is always stored at the
802 	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
803 	 *   characters
804 	 * - cp is is decremented by the number of characters in
805 	 *   the new path element
806 	 * - if, after the above accounting for the new element's
807 	 *   size, there is no longer enough room at the beginning of
808 	 *   namebuf[] for a full-sized path element and a slash,
809 	 *   terminate the loop.  cp is in the range
810 	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
811 	 */
812 attrname:
813 	/* LINTED per the above discussion */
814 	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
815 }
816 
817 /* ARGSUSED */
818 void
819 catch(int dummy)
820 {
821 	ckfini();
822 	exit(EXSIGNAL);
823 }
824 
825 /*
826  * When preening, allow a single quit to signal
827  * a special exit after filesystem checks complete
828  * so that reboot sequence may be interrupted.
829  */
830 /* ARGSUSED */
831 void
832 catchquit(int dummy)
833 {
834 	(void) printf("returning to single-user after filesystem check\n");
835 	interrupted = 1;
836 	(void) signal(SIGQUIT, SIG_DFL);
837 }
838 
839 
840 /*
841  * determine whether an inode should be fixed.
842  */
843 NOTE(PRINTFLIKE(2))
844 int
845 dofix(struct inodesc *idesc, caddr_t msg, ...)
846 {
847 	int rval = 0;
848 	va_list ap;
849 
850 	va_start(ap, msg);
851 
852 	switch (idesc->id_fix) {
853 
854 	case DONTKNOW:
855 		if (idesc->id_type == DATA)
856 			vdirerror(idesc->id_number, msg, ap);
857 		else
858 			vpwarn(msg, ap);
859 		if (preen) {
860 			idesc->id_fix = FIX;
861 			rval = ALTERED;
862 			break;
863 		}
864 		if (reply("SALVAGE") == 0) {
865 			idesc->id_fix = NOFIX;
866 			break;
867 		}
868 		idesc->id_fix = FIX;
869 		rval = ALTERED;
870 		break;
871 
872 	case FIX:
873 		rval = ALTERED;
874 		break;
875 
876 	case NOFIX:
877 		break;
878 
879 	default:
880 		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
881 	}
882 
883 	va_end(ap);
884 	return (rval);
885 }
886 
887 NOTE(PRINTFLIKE(1))
888 void
889 errexit(caddr_t fmt, ...)
890 {
891 	va_list ap;
892 
893 	va_start(ap, fmt);
894 	verrexit(fmt, ap);
895 	/* NOTREACHED */
896 }
897 
898 NOTE(PRINTFLIKE(1))
899 static void
900 verrexit(caddr_t fmt, va_list ap)
901 {
902 	static int recursing = 0;
903 
904 	if (!recursing) {
905 		recursing = 1;
906 		if (errorlocked || iscorrupt) {
907 			if (havesb && fswritefd >= 0) {
908 				sblock.fs_clean = FSBAD;
909 				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
910 				sblock.fs_state = -sblock.fs_state;
911 				sbdirty();
912 				write_altsb(fswritefd);
913 				flush(fswritefd, &sblk);
914 			}
915 		}
916 		ckfini();
917 		recursing = 0;
918 	}
919 	(void) vprintf(fmt, ap);
920 	if (fmt[strlen(fmt) - 1] != '\n')
921 		(void) putchar('\n');
922 	exit((exitstat != 0) ? exitstat : EXERRFATAL);
923 }
924 
925 /*
926  * An unexpected inconsistency occured.
927  * Die if preening, otherwise just print message and continue.
928  */
929 NOTE(PRINTFLIKE(1))
930 void
931 pfatal(caddr_t fmt, ...)
932 {
933 	va_list ap;
934 
935 	va_start(ap, fmt);
936 	vpfatal(fmt, ap);
937 	va_end(ap);
938 }
939 
940 NOTE(PRINTFLIKE(1))
941 static void
942 vpfatal(caddr_t fmt, va_list ap)
943 {
944 	if (preen) {
945 		if (*fmt != '\0') {
946 			(void) printf("%s: ", devname);
947 			(void) vprintf(fmt, ap);
948 			(void) printf("\n");
949 		}
950 		(void) printf(
951 		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
952 		    devname);
953 		if (havesb && fswritefd >= 0) {
954 			sblock.fs_clean = FSBAD;
955 			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
956 			sbdirty();
957 			flush(fswritefd, &sblk);
958 		}
959 		/*
960 		 * We're exiting, it doesn't really matter that our
961 		 * caller doesn't get to call va_end().
962 		 */
963 		if (exitstat == 0)
964 			exitstat = EXFNDERRS;
965 		exit(exitstat);
966 	}
967 	if (*fmt != '\0') {
968 		(void) vprintf(fmt, ap);
969 	}
970 }
971 
972 /*
973  * Pwarn just prints a message when not preening,
974  * or a warning (preceded by filename) when preening.
975  */
976 NOTE(PRINTFLIKE(1))
977 void
978 pwarn(caddr_t fmt, ...)
979 {
980 	va_list ap;
981 
982 	va_start(ap, fmt);
983 	vpwarn(fmt, ap);
984 	va_end(ap);
985 }
986 
987 NOTE(PRINTFLIKE(1))
988 static void
989 vpwarn(caddr_t fmt, va_list ap)
990 {
991 	if (*fmt != '\0') {
992 		if (preen)
993 			(void) printf("%s: ", devname);
994 		(void) vprintf(fmt, ap);
995 	}
996 }
997 
998 /*
999  * Like sprintf(), except the buffer is dynamically allocated
1000  * and returned, instead of being passed in.  A pointer to the
1001  * buffer is stored in *RET, and FMT is the usual format string.
1002  * The number of characters in *RET (excluding the trailing \0,
1003  * to be consistent with the other *printf() routines) is returned.
1004  *
1005  * Solaris doesn't have asprintf(3C) yet, unfortunately.
1006  */
1007 NOTE(PRINTFLIKE(2))
1008 int
1009 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1010 {
1011 	int len;
1012 	caddr_t buffer;
1013 	va_list ap;
1014 
1015 	va_start(ap, fmt);
1016 	len = vsnprintf(NULL, 0, fmt, ap);
1017 	va_end(ap);
1018 
1019 	buffer = malloc((len + 1) * sizeof (char));
1020 	if (buffer == NULL) {
1021 		errexit("Out of memory in asprintf\n");
1022 		/* NOTREACHED */
1023 	}
1024 
1025 	va_start(ap, fmt);
1026 	(void) vsnprintf(buffer, len + 1, fmt, ap);
1027 	va_end(ap);
1028 
1029 	*ret = buffer;
1030 	return (len);
1031 }
1032 
1033 /*
1034  * So we can take advantage of kernel routines in ufs_subr.c.
1035  */
1036 /* PRINTFLIKE2 */
1037 void
1038 cmn_err(int level, caddr_t fmt, ...)
1039 {
1040 	va_list ap;
1041 
1042 	va_start(ap, fmt);
1043 	if (level == CE_PANIC) {
1044 		(void) printf("INTERNAL INCONSISTENCY:");
1045 		verrexit(fmt, ap);
1046 	} else {
1047 		(void) vprintf(fmt, ap);
1048 	}
1049 	va_end(ap);
1050 }
1051 
1052 /*
1053  * Check to see if unraw version of name is already mounted.
1054  * Updates devstr with the device name if devstr is not NULL
1055  * and str_size is positive.
1056  */
1057 int
1058 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1059 {
1060 	int found;
1061 	struct mnttab *mntent;
1062 
1063 	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1064 	if (mntent == NULL)
1065 		return (M_NOMNT);
1066 
1067 	/*
1068 	 * It's mounted.  With or without write access?
1069 	 */
1070 	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1071 		found = M_RO;	/* mounted as RO */
1072 	else
1073 		found = M_RW;	/* mounted as R/W */
1074 
1075 	if (mount_point == NULL) {
1076 		mount_point = strdup(mntent->mnt_mountp);
1077 		if (mount_point == NULL) {
1078 			errexit("fsck: memory allocation failure: %s",
1079 			    strerror(errno));
1080 			/* NOTREACHED */
1081 		}
1082 
1083 		if (devstr != NULL && str_size > 0)
1084 			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1085 	}
1086 
1087 	return (found);
1088 }
1089 
1090 /*
1091  * Check to see if name corresponds to an entry in vfstab, and that the entry
1092  * does not have option ro.
1093  */
1094 int
1095 writable(caddr_t name)
1096 {
1097 	int rw = 1;
1098 	struct vfstab vfsbuf, vfskey;
1099 	FILE *vfstab;
1100 
1101 	vfstab = fopen(VFSTAB, "r");
1102 	if (vfstab == NULL) {
1103 		(void) printf("can't open %s\n", VFSTAB);
1104 		return (1);
1105 	}
1106 	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1107 	vfsnull(&vfskey);
1108 	vfskey.vfs_special = unrawname(name);
1109 	vfskey.vfs_fstype = MNTTYPE_UFS;
1110 	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1111 	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1112 		rw = 0;
1113 	}
1114 	(void) fclose(vfstab);
1115 	return (rw);
1116 }
1117 
1118 /*
1119  * debugclean
1120  */
1121 static void
1122 debugclean(void)
1123 {
1124 	if (!debug)
1125 		return;
1126 
1127 	if ((iscorrupt == 0) && (isdirty == 0))
1128 		return;
1129 
1130 	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1131 	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1132 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1133 		return;
1134 
1135 	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1136 	    sblock.fs_clean == FSSTABLE ? "stable" :
1137 	    sblock.fs_clean == FSLOG ? "logging" :
1138 	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1139 	    devname);
1140 }
1141 
1142 /*
1143  * updateclean
1144  *	Carefully and transparently update the clean flag.
1145  *
1146  * `iscorrupt' has to be in its final state before this is called.
1147  */
1148 int
1149 updateclean(void)
1150 {
1151 	int freedlog = 0;
1152 	struct bufarea cleanbuf;
1153 	size_t size;
1154 	ssize_t io_res;
1155 	diskaddr_t bno;
1156 	char fsclean;
1157 	int fsreclaim;
1158 	char fsflags;
1159 	int flags_ok = 1;
1160 	daddr32_t fslogbno;
1161 	offset_t sblkoff;
1162 	time_t t;
1163 
1164 	/*
1165 	 * debug stuff
1166 	 */
1167 	debugclean();
1168 
1169 	/*
1170 	 * set fsclean to its appropriate value
1171 	 */
1172 	fslogbno = sblock.fs_logbno;
1173 	fsclean = sblock.fs_clean;
1174 	fsreclaim = sblock.fs_reclaim;
1175 	fsflags = sblock.fs_flags;
1176 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1177 		fsclean = FSACTIVE;
1178 	}
1179 	/*
1180 	 * If ufs log is not okay, note that we need to clear it.
1181 	 */
1182 	examinelog(NULL);
1183 	if (fslogbno && !(islog && islogok)) {
1184 		fsclean = FSACTIVE;
1185 		fslogbno = 0;
1186 	}
1187 
1188 	/*
1189 	 * if necessary, update fs_clean and fs_state
1190 	 */
1191 	switch (fsclean) {
1192 
1193 	case FSACTIVE:
1194 		if (!iscorrupt) {
1195 			fsclean = FSSTABLE;
1196 			fsreclaim = 0;
1197 		}
1198 		break;
1199 
1200 	case FSCLEAN:
1201 	case FSSTABLE:
1202 		if (iscorrupt) {
1203 			fsclean = FSACTIVE;
1204 		} else {
1205 			fsreclaim = 0;
1206 		}
1207 		break;
1208 
1209 	case FSLOG:
1210 		if (iscorrupt) {
1211 			fsclean = FSACTIVE;
1212 		} else if (!islog || fslogbno == 0) {
1213 			fsclean = FSSTABLE;
1214 			fsreclaim = 0;
1215 		} else if (fflag) {
1216 			fsreclaim = 0;
1217 		}
1218 		break;
1219 
1220 	case FSFIX:
1221 		fsclean = FSBAD;
1222 		if (errorlocked && !iscorrupt) {
1223 			fsclean = islog ? FSLOG : FSCLEAN;
1224 		}
1225 		break;
1226 
1227 	default:
1228 		if (iscorrupt) {
1229 			fsclean = FSACTIVE;
1230 		} else {
1231 			fsclean = FSSTABLE;
1232 			fsreclaim = 0;
1233 		}
1234 	}
1235 
1236 	if (largefile_count > 0)
1237 		fsflags |= FSLARGEFILES;
1238 	else
1239 		fsflags &= ~FSLARGEFILES;
1240 
1241 	/*
1242 	 * There can be two discrepencies here.  A) The superblock
1243 	 * shows no largefiles but we found some while scanning.
1244 	 * B) The superblock indicates the presence of largefiles,
1245 	 * but none are present.  Note that if preening, the superblock
1246 	 * is silently corrected.
1247 	 */
1248 	if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1249 	    (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1250 		flags_ok = 0;
1251 
1252 	if (debug)
1253 		(void) printf(
1254 		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1255 		    largefile_count, sblock.fs_flags, flags_ok);
1256 
1257 	/*
1258 	 * If fs is unchanged, do nothing.
1259 	 */
1260 	if ((!isdirty) && (flags_ok) &&
1261 	    (fslogbno == sblock.fs_logbno) &&
1262 	    (sblock.fs_clean == fsclean) &&
1263 	    (sblock.fs_reclaim == fsreclaim) &&
1264 	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1265 		if (errorlocked) {
1266 			if (!do_errorlock(LOCKFS_ULOCK))
1267 				pwarn(
1268 		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1269 		}
1270 		return (freedlog);
1271 	}
1272 
1273 	/*
1274 	 * if user allows, update superblock state
1275 	 */
1276 	if (debug) {
1277 		(void) printf(
1278 	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1279 		    sblock.fs_flags, sblock.fs_logbno,
1280 		    sblock.fs_clean, sblock.fs_reclaim,
1281 		    sblock.fs_state + sblock.fs_time);
1282 		(void) printf(
1283 	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1284 		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1285 	}
1286 	if (!isdirty && !preen && !rerun &&
1287 	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1288 		return (freedlog);
1289 
1290 	(void) time(&t);
1291 	sblock.fs_time = (time32_t)t;
1292 	if (debug)
1293 		printclean();
1294 
1295 	if (sblock.fs_logbno != fslogbno) {
1296 		examinelog(&freelogblk);
1297 		freedlog++;
1298 	}
1299 
1300 	sblock.fs_logbno = fslogbno;
1301 	sblock.fs_clean = fsclean;
1302 	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1303 	sblock.fs_reclaim = fsreclaim;
1304 	sblock.fs_flags = fsflags;
1305 
1306 	/*
1307 	 * if superblock can't be written, return
1308 	 */
1309 	if (fswritefd < 0)
1310 		return (freedlog);
1311 
1312 	/*
1313 	 * Read private copy of superblock, update clean flag, and write it.
1314 	 */
1315 	bno  = sblk.b_bno;
1316 	size = sblk.b_size;
1317 
1318 	sblkoff = ldbtob(bno);
1319 
1320 	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1321 		errexit("out of memory");
1322 	if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1323 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1324 		    (longlong_t)bno, strerror(errno));
1325 		goto out;
1326 	}
1327 
1328 	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1329 		report_io_prob("READ FROM", bno, size, io_res);
1330 		goto out;
1331 	}
1332 
1333 	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1334 	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1335 	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1336 	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1337 	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1338 	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1339 
1340 	if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1341 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1342 		    (longlong_t)bno, strerror(errno));
1343 		goto out;
1344 	}
1345 
1346 	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1347 		report_io_prob("WRITE TO", bno, size, io_res);
1348 		goto out;
1349 	}
1350 
1351 	/*
1352 	 * 1208040
1353 	 * If we had to use -b to grab an alternate superblock, then we
1354 	 * likely had to do so because of unacceptable differences between
1355 	 * the main and alternate superblocks.  So, we had better update
1356 	 * the alternate superblock as well, or we'll just fail again
1357 	 * the next time we attempt to run fsck!
1358 	 */
1359 	if (bflag != 0) {
1360 		write_altsb(fswritefd);
1361 	}
1362 
1363 	if (errorlocked) {
1364 		if (!do_errorlock(LOCKFS_ULOCK))
1365 			pwarn(
1366 		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1367 	}
1368 
1369 out:
1370 	if (cleanbuf.b_un.b_buf != NULL) {
1371 		free((void *)cleanbuf.b_un.b_buf);
1372 	}
1373 
1374 	return (freedlog);
1375 }
1376 
1377 static void
1378 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1379 {
1380 	if (failure < 0)
1381 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1382 		    what, (int)bno, strerror(errno));
1383 	else if (failure == 0)
1384 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1385 		    what, (int)bno);
1386 	else
1387 		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1388 		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1389 }
1390 
1391 /*
1392  * print out clean info
1393  */
1394 void
1395 printclean(void)
1396 {
1397 	caddr_t s;
1398 
1399 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1400 		s = "unknown";
1401 	else
1402 		switch (sblock.fs_clean) {
1403 
1404 		case FSACTIVE:
1405 			s = "active";
1406 			break;
1407 
1408 		case FSCLEAN:
1409 			s = "clean";
1410 			break;
1411 
1412 		case FSSTABLE:
1413 			s = "stable";
1414 			break;
1415 
1416 		case FSLOG:
1417 			s = "logging";
1418 			break;
1419 
1420 		case FSBAD:
1421 			s = "is bad";
1422 			break;
1423 
1424 		case FSFIX:
1425 			s = "being fixed";
1426 			break;
1427 
1428 		default:
1429 			s = "unknown";
1430 		}
1431 
1432 	if (preen)
1433 		pwarn("is %s.\n", s);
1434 	else
1435 		(void) printf("** %s is %s.\n", devname, s);
1436 }
1437 
1438 int
1439 is_errorlocked(caddr_t fs)
1440 {
1441 	int		retval;
1442 	struct stat64	statb;
1443 	caddr_t		mountp;
1444 	struct mnttab	*mntent;
1445 
1446 	retval = 0;
1447 
1448 	if (!fs)
1449 		return (0);
1450 
1451 	if (stat64(fs, &statb) < 0)
1452 		return (0);
1453 
1454 	if (S_ISDIR(statb.st_mode)) {
1455 		mountp = fs;
1456 	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1457 		mntent = search_mnttab(NULL, fs, NULL, 0);
1458 		if (mntent == NULL)
1459 			return (0);
1460 		mountp = mntent->mnt_mountp;
1461 		if (mountp == NULL) /* theoretically a can't-happen */
1462 			return (0);
1463 	} else {
1464 		return (0);
1465 	}
1466 
1467 	/*
1468 	 * From here on, must `goto out' to avoid memory leakage.
1469 	 */
1470 
1471 	if (elock_combuf == NULL)
1472 		elock_combuf =
1473 		    (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1474 	else
1475 		elock_combuf =
1476 		    (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1477 
1478 	if (elock_combuf == NULL)
1479 		goto out;
1480 
1481 	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1482 
1483 	if (elock_mountp != NULL) {
1484 		free(elock_mountp);
1485 	}
1486 
1487 	elock_mountp = strdup(mountp);
1488 	if (elock_mountp == NULL)
1489 		goto out;
1490 
1491 	if (mountfd < 0) {
1492 		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1493 			goto out;
1494 	}
1495 
1496 	if (lfp == NULL) {
1497 		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1498 		if (lfp == NULL)
1499 			goto out;
1500 		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1501 	}
1502 
1503 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1504 	lfp->lf_comment = elock_combuf;
1505 
1506 	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1507 		goto out;
1508 
1509 	/*
1510 	 * lint believes that the ioctl() (or any other function
1511 	 * taking lfp as an arg) could free lfp.  This is not the
1512 	 * case, however.
1513 	 */
1514 	retval = LOCKFS_IS_ELOCK(lfp);
1515 
1516 out:
1517 	return (retval);
1518 }
1519 
1520 /*
1521  * Given a name which is known to be a directory, see if it appears
1522  * in the vfstab.  If so, return the entry's block (special) device
1523  * field via devstr.
1524  */
1525 int
1526 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1527 {
1528 	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1529 }
1530 
1531 /*
1532  * Given a name which is known to be a directory, see if it appears
1533  * in the mnttab.  If so, return the entry's block (special) device
1534  * field via devstr.
1535  */
1536 int
1537 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1538 {
1539 	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1540 }
1541 
1542 /*
1543  * Search for mount point and/or special device in the given file.
1544  * The first matching entry is returned.
1545  *
1546  * If an entry is found and str_size is greater than zero, then
1547  * up to size_str bytes of the special device name from the entry
1548  * are copied to devstr.
1549  */
1550 
1551 #define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1552 			st_nuller, st_init, st_searcher) \
1553 	{ \
1554 		FILE *fp; \
1555 		struct st_type *retval = NULL; \
1556 		struct st_type key; \
1557 		static struct st_type buffer; \
1558 		\
1559 		/* LINTED ``assigned value never used'' */ \
1560 		st_nuller(&key); \
1561 		key.st_mount = mountp; \
1562 		key.st_special = special; \
1563 		st_init; \
1564 		\
1565 		if ((fp = fopen(st_file, "r")) == NULL) \
1566 			return (NULL); \
1567 		\
1568 		if (st_searcher(fp, &buffer, &key) == 0) { \
1569 			retval = &buffer; \
1570 			if (devstr != NULL && str_size > 0 && \
1571 			    buffer.st_special != NULL) { \
1572 				(void) strlcpy(devstr, buffer.st_special, \
1573 				    str_size); \
1574 			} \
1575 		} \
1576 		(void) fclose(fp); \
1577 		return (retval); \
1578 	}
1579 
1580 static struct vfstab *
1581 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1582 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1583 		(retval = retval), getvfsany)
1584 
1585 static struct mnttab *
1586 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1587 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1588 		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1589 
1590 int
1591 do_errorlock(int lock_type)
1592 {
1593 	caddr_t	   buf;
1594 	time_t	   now;
1595 	struct tm *local;
1596 	int	   rc;
1597 
1598 	if (elock_combuf == NULL)
1599 		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1600 		    elock_mountp ? elock_mountp : "<null>",
1601 		    lock_type);
1602 
1603 	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1604 	    NULL) {
1605 		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1606 	}
1607 	if (lfp == NULL) {
1608 		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1609 		    elock_mountp, lock_type);
1610 	}
1611 
1612 	(void) memmove((void *)buf, (void *)elock_combuf,
1613 	    LOCKFS_MAXCOMMENTLEN-1);
1614 
1615 	switch (lock_type) {
1616 	case LOCKFS_ELOCK:
1617 		/*
1618 		 * Note that if it is error-locked, we won't get an
1619 		 * error back if we try to error-lock it again.
1620 		 */
1621 		if (time(&now) != (time_t)-1) {
1622 			if ((local = localtime(&now)) != NULL)
1623 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1624 		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1625 				    elock_combuf, (int)pid,
1626 				    local->tm_mon + 1, local->tm_mday,
1627 				    (local->tm_year % 100), local->tm_hour,
1628 				    local->tm_min, local->tm_sec);
1629 			else
1630 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1631 				    "%s [fsck pid %d", elock_combuf, pid);
1632 
1633 		} else {
1634 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1635 			    "%s [fsck pid %d", elock_combuf, pid);
1636 		}
1637 		break;
1638 
1639 	case LOCKFS_ULOCK:
1640 		if (time(&now) != (time_t)-1) {
1641 			if ((local = localtime(&now)) != NULL) {
1642 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1643 				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1644 				    elock_combuf,
1645 				    local->tm_mon + 1, local->tm_mday,
1646 				    (local->tm_year % 100), local->tm_hour,
1647 				    local->tm_min, local->tm_sec);
1648 			} else {
1649 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1650 				    "%s]", elock_combuf);
1651 			}
1652 		} else {
1653 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1654 			    "%s]", elock_combuf);
1655 		}
1656 		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1657 			pwarn("do_errorlock: unlock failed: %s\n",
1658 			    strerror(errno));
1659 			goto out;
1660 		}
1661 		break;
1662 
1663 	default:
1664 		break;
1665 	}
1666 
1667 	(void) memmove((void *)elock_combuf, (void *)buf,
1668 	    LOCKFS_MAXCOMMENTLEN - 1);
1669 
1670 	lfp->lf_lock = lock_type;
1671 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1672 	lfp->lf_comment = elock_combuf;
1673 	lfp->lf_flags = 0;
1674 	errno = 0;
1675 
1676 	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1677 		if (errno == EINVAL) {
1678 			pwarn("Another fsck active?\n");
1679 			iscorrupt = 0;	/* don't go away mad, just go away */
1680 		} else {
1681 			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1682 			    lock_type, elock_combuf, strerror(errno));
1683 		}
1684 	}
1685 out:
1686 	if (buf != NULL) {
1687 		free((void *)buf);
1688 	}
1689 
1690 	return (rc != -1);
1691 }
1692 
1693 /*
1694  * Shadow inode support.  To register a shadow with a client is to note
1695  * that an inode (the client) refers to the shadow.
1696  */
1697 
1698 static struct shadowclients *
1699 newshadowclient(struct shadowclients *prev)
1700 {
1701 	struct shadowclients *rc;
1702 
1703 	rc = (struct shadowclients *)malloc(sizeof (*rc));
1704 	if (rc == NULL)
1705 		errexit("newshadowclient: cannot malloc shadow client");
1706 	rc->next = prev;
1707 	rc->nclients = 0;
1708 
1709 	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1710 	    maxshadowclients);
1711 	if (rc->client == NULL)
1712 		errexit("newshadowclient: cannot malloc client array");
1713 	return (rc);
1714 }
1715 
1716 void
1717 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1718 	struct shadowclientinfo **info)
1719 {
1720 	struct shadowclientinfo *sci;
1721 	struct shadowclients *scc;
1722 
1723 	/*
1724 	 * Already have a record for this shadow?
1725 	 */
1726 	for (sci = *info; sci != NULL; sci = sci->next)
1727 		if (sci->shadow == shadow)
1728 			break;
1729 	if (sci == NULL) {
1730 		/*
1731 		 * It's a new shadow, add it to the list
1732 		 */
1733 		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1734 		if (sci == NULL)
1735 			errexit("registershadowclient: cannot malloc");
1736 		sci->next = *info;
1737 		*info = sci;
1738 		sci->shadow = shadow;
1739 		sci->totalClients = 0;
1740 		sci->clients = newshadowclient(NULL);
1741 	}
1742 
1743 	sci->totalClients++;
1744 	scc = sci->clients;
1745 	if (scc->nclients >= maxshadowclients) {
1746 		scc = newshadowclient(sci->clients);
1747 		sci->clients = scc;
1748 	}
1749 
1750 	scc->client[scc->nclients++] = client;
1751 }
1752 
1753 /*
1754  * Locate and discard a shadow.
1755  */
1756 void
1757 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1758 {
1759 	struct shadowclientinfo *sci, *prev;
1760 
1761 	/*
1762 	 * Do we have a record for this shadow?
1763 	 */
1764 	prev = NULL;
1765 	for (sci = *info; sci != NULL; sci = sci->next) {
1766 		if (sci->shadow == shadow)
1767 			break;
1768 		prev = sci;
1769 	}
1770 
1771 	if (sci != NULL) {
1772 		/*
1773 		 * First, pull it off the list, since we know there
1774 		 * shouldn't be any future references to this one.
1775 		 */
1776 		if (prev == NULL)
1777 			*info = sci->next;
1778 		else
1779 			prev->next = sci->next;
1780 		deshadow(sci, clearattrref);
1781 	}
1782 }
1783 
1784 /*
1785  * Discard all memory used to track clients of a shadow.
1786  */
1787 void
1788 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1789 {
1790 	struct shadowclients *clients, *discard;
1791 	int idx;
1792 
1793 	clients = sci->clients;
1794 	while (clients != NULL) {
1795 		discard = clients;
1796 		clients = clients->next;
1797 		if (discard->client != NULL) {
1798 			if (cb != NULL) {
1799 				for (idx = 0; idx < discard->nclients; idx++)
1800 					(*cb)(discard->client[idx]);
1801 			}
1802 			free((void *)discard->client);
1803 		}
1804 		free((void *)discard);
1805 	}
1806 
1807 	free((void *)sci);
1808 }
1809 
1810 /*
1811  * Allocate more buffer as need arises but allocate one at a time.
1812  * This is done to make sure that fsck does not exit with error if it
1813  * needs more buffer to complete its task.
1814  */
1815 static struct bufarea *
1816 alloc_bufarea(void)
1817 {
1818 	struct bufarea *newbp;
1819 	caddr_t bufp;
1820 
1821 	bufp = malloc((unsigned int)sblock.fs_bsize);
1822 	if (bufp == NULL)
1823 		return (NULL);
1824 
1825 	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1826 	if (newbp == NULL) {
1827 		free((void *)bufp);
1828 		return (NULL);
1829 	}
1830 
1831 	initbarea(newbp);
1832 	newbp->b_un.b_buf = bufp;
1833 	newbp->b_prev = &bufhead;
1834 	newbp->b_next = bufhead.b_next;
1835 	bufhead.b_next->b_prev = newbp;
1836 	bufhead.b_next = newbp;
1837 	bufhead.b_size++;
1838 	return (newbp);
1839 }
1840 
1841 /*
1842  * We length-limit in both unrawname() and rawname() to avoid
1843  * overflowing our arrays or those of our naive, trusting callers.
1844  */
1845 
1846 caddr_t
1847 unrawname(caddr_t name)
1848 {
1849 	caddr_t dp;
1850 	static char fullname[MAXPATHLEN + 1];
1851 
1852 	if ((dp = getfullblkname(name)) == NULL)
1853 		return ("");
1854 
1855 	(void) strlcpy(fullname, dp, sizeof (fullname));
1856 	/*
1857 	 * Not reporting under debug, as the allocation isn't
1858 	 * reported by getfullblkname.  The idea is that we
1859 	 * produce balanced alloc/free instances.
1860 	 */
1861 	free(dp);
1862 
1863 	return (fullname);
1864 }
1865 
1866 caddr_t
1867 rawname(caddr_t name)
1868 {
1869 	caddr_t dp;
1870 	static char fullname[MAXPATHLEN + 1];
1871 
1872 	if ((dp = getfullrawname(name)) == NULL)
1873 		return ("");
1874 
1875 	(void) strlcpy(fullname, dp, sizeof (fullname));
1876 	/*
1877 	 * Not reporting under debug, as the allocation isn't
1878 	 * reported by getfullblkname.  The idea is that we
1879 	 * produce balanced alloc/free instances.
1880 	 */
1881 	free(dp);
1882 
1883 	return (fullname);
1884 }
1885 
1886 /*
1887  * Make sure that a cg header looks at least moderately reasonable.
1888  * We want to be able to trust the contents enough to be able to use
1889  * the standard accessor macros.  So, besides looking at the obvious
1890  * such as the magic number, we verify that the offset field values
1891  * are properly aligned and not too big or small.
1892  *
1893  * Returns a NULL pointer if the cg is sane enough for our needs, else
1894  * a dynamically-allocated string describing all of its faults.
1895  */
1896 #define	Append_Error(full, full_len, addition, addition_len) \
1897 	if (full == NULL) { \
1898 		full = addition; \
1899 		full_len = addition_len; \
1900 	} else { \
1901 		/* lint doesn't think realloc() understands NULLs */ \
1902 		full = realloc(full, full_len + addition_len + 1); \
1903 		if (full == NULL) { \
1904 			errexit("Out of memory in cg_sanity"); \
1905 			/* NOTREACHED */ \
1906 		} \
1907 		(void) strcpy(full + full_len, addition); \
1908 		full_len += addition_len; \
1909 		free(addition); \
1910 	}
1911 
1912 caddr_t
1913 cg_sanity(struct cg *cgp, int cgno)
1914 {
1915 	caddr_t full_err;
1916 	caddr_t this_err = NULL;
1917 	int full_len, this_len;
1918 	daddr32_t ndblk;
1919 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1920 	daddr32_t exp_freeoff, exp_nextfreeoff;
1921 
1922 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1923 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1924 
1925 	full_err = NULL;
1926 	full_len = 0;
1927 
1928 	if (!cg_chkmagic(cgp)) {
1929 		this_len = fsck_asprintf(&this_err,
1930 		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1931 		    cgp->cg_magic, CG_MAGIC);
1932 		Append_Error(full_err, full_len, this_err, this_len);
1933 	}
1934 
1935 	if (cgp->cg_cgx != cgno) {
1936 		this_len = fsck_asprintf(&this_err,
1937 		    "WRONG CG NUMBER (%d should be %d)\n",
1938 		    cgp->cg_cgx, cgno);
1939 		Append_Error(full_err, full_len, this_err, this_len);
1940 	}
1941 
1942 	if ((cgp->cg_btotoff & 3) != 0) {
1943 		this_len = fsck_asprintf(&this_err,
1944 		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1945 		    cgp->cg_btotoff);
1946 		Append_Error(full_err, full_len, this_err, this_len);
1947 	}
1948 
1949 	if ((cgp->cg_boff & 1) != 0) {
1950 		this_len = fsck_asprintf(&this_err,
1951 	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1952 		    cgp->cg_boff);
1953 		Append_Error(full_err, full_len, this_err, this_len);
1954 	}
1955 
1956 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1957 		if (cgp->cg_ncyl < 1) {
1958 			this_len = fsck_asprintf(&this_err,
1959 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1960 			    cgp->cg_ncyl);
1961 		} else {
1962 			this_len = fsck_asprintf(&this_err,
1963 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1964 			    cgp->cg_ncyl, sblock.fs_cpg);
1965 		}
1966 		Append_Error(full_err, full_len, this_err, this_len);
1967 	}
1968 
1969 	if (cgp->cg_niblk != sblock.fs_ipg) {
1970 		this_len = fsck_asprintf(&this_err,
1971 		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1972 		    cgp->cg_niblk, sblock.fs_ipg);
1973 		Append_Error(full_err, full_len, this_err, this_len);
1974 	}
1975 
1976 	if (cgp->cg_ndblk != ndblk) {
1977 		this_len = fsck_asprintf(&this_err,
1978 	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1979 		    cgp->cg_ndblk, ndblk);
1980 		Append_Error(full_err, full_len, this_err, this_len);
1981 	}
1982 
1983 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1984 		this_len = fsck_asprintf(&this_err,
1985 		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1986 		    "(%d should be at least 0 and less than %d)\n",
1987 		    cgp->cg_rotor, ndblk);
1988 		Append_Error(full_err, full_len, this_err, this_len);
1989 	}
1990 
1991 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1992 		this_len = fsck_asprintf(&this_err,
1993 		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1994 		    "(%d should be at least 0 and less than %d)\n",
1995 		    cgp->cg_frotor, ndblk);
1996 		Append_Error(full_err, full_len, this_err, this_len);
1997 	}
1998 
1999 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2000 		this_len = fsck_asprintf(&this_err,
2001 		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
2002 		    "(%d should be at least 0 and less than %d)\n",
2003 		    cgp->cg_irotor, sblock.fs_ipg);
2004 		Append_Error(full_err, full_len, this_err, this_len);
2005 	}
2006 
2007 	if (cgp->cg_btotoff != exp_btotoff) {
2008 		this_len = fsck_asprintf(&this_err,
2009 		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2010 		    cgp->cg_btotoff, exp_btotoff);
2011 		Append_Error(full_err, full_len, this_err, this_len);
2012 	}
2013 
2014 	if (cgp->cg_boff != exp_boff) {
2015 		this_len = fsck_asprintf(&this_err,
2016 		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2017 		    cgp->cg_boff, exp_boff);
2018 		Append_Error(full_err, full_len, this_err, this_len);
2019 	}
2020 
2021 	if (cgp->cg_iusedoff != exp_iusedoff) {
2022 		this_len = fsck_asprintf(&this_err,
2023 		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2024 		    cgp->cg_iusedoff, exp_iusedoff);
2025 		Append_Error(full_err, full_len, this_err, this_len);
2026 	}
2027 
2028 	if (cgp->cg_freeoff != exp_freeoff) {
2029 		this_len = fsck_asprintf(&this_err,
2030 		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2031 		    cgp->cg_freeoff, exp_freeoff);
2032 		Append_Error(full_err, full_len, this_err, this_len);
2033 	}
2034 
2035 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2036 		this_len = fsck_asprintf(&this_err,
2037 		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2038 		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2039 		Append_Error(full_err, full_len, this_err, this_len);
2040 	}
2041 
2042 	return (full_err);
2043 }
2044 
2045 #undef	Append_Error
2046 
2047 /*
2048  * This is taken from mkfs, and is what is used to come up with the
2049  * original values for a struct cg.  This implies that, since these
2050  * are all constants, recalculating them now should give us the same
2051  * thing as what's on disk.
2052  */
2053 static void
2054 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2055 	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2056 	daddr32_t *ndblk)
2057 {
2058 	daddr32_t cbase, dmax;
2059 	struct cg *cgp;
2060 
2061 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2062 	    (size_t)sblock.fs_cgsize);
2063 	cgp = cgblk.b_un.b_cg;
2064 
2065 	cbase = cgbase(&sblock, cgno);
2066 	dmax = cbase + sblock.fs_fpg;
2067 	if (dmax > sblock.fs_size)
2068 		dmax = sblock.fs_size;
2069 
2070 	/* LINTED pointer difference won't overflow */
2071 	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2072 	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2073 	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2074 	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2075 	*nextfreeoff = *freeoff +
2076 	    howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2077 	*ndblk = dmax - cbase;
2078 }
2079 
2080 /*
2081  * Corrects all fields in the cg that can be done with the available
2082  * redundant data.
2083  */
2084 void
2085 fix_cg(struct cg *cgp, int cgno)
2086 {
2087 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2088 	daddr32_t exp_freeoff, exp_nextfreeoff;
2089 	daddr32_t ndblk;
2090 
2091 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2092 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2093 
2094 	if (cgp->cg_cgx != cgno) {
2095 		cgp->cg_cgx = cgno;
2096 	}
2097 
2098 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2099 		if (cgno == (sblock.fs_ncg - 1)) {
2100 			cgp->cg_ncyl = sblock.fs_ncyl -
2101 			    (sblock.fs_cpg * cgno);
2102 		} else {
2103 			cgp->cg_ncyl = sblock.fs_cpg;
2104 		}
2105 	}
2106 
2107 	if (cgp->cg_niblk != sblock.fs_ipg) {
2108 		/*
2109 		 * This is not used by the kernel, so it's pretty
2110 		 * harmless if it's wrong.
2111 		 */
2112 		cgp->cg_niblk = sblock.fs_ipg;
2113 	}
2114 
2115 	if (cgp->cg_ndblk != ndblk) {
2116 		cgp->cg_ndblk = ndblk;
2117 	}
2118 
2119 	/*
2120 	 * For the rotors, any position's valid, so pick the one we know
2121 	 * will always exist.
2122 	 */
2123 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2124 		cgp->cg_rotor = 0;
2125 	}
2126 
2127 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2128 		cgp->cg_frotor = 0;
2129 	}
2130 
2131 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2132 		cgp->cg_irotor = 0;
2133 	}
2134 
2135 	/*
2136 	 * For btotoff and boff, if they're misaligned they won't
2137 	 * match the expected values, so we're catching both cases
2138 	 * here.  Of course, if any of these are off, it seems likely
2139 	 * that the tables really won't be where we calculate they
2140 	 * should be anyway.
2141 	 */
2142 	if (cgp->cg_btotoff != exp_btotoff) {
2143 		cgp->cg_btotoff = exp_btotoff;
2144 	}
2145 
2146 	if (cgp->cg_boff != exp_boff) {
2147 		cgp->cg_boff = exp_boff;
2148 	}
2149 
2150 	if (cgp->cg_iusedoff != exp_iusedoff) {
2151 		cgp->cg_iusedoff = exp_iusedoff;
2152 	}
2153 
2154 	if (cgp->cg_freeoff != exp_freeoff) {
2155 		cgp->cg_freeoff = exp_freeoff;
2156 	}
2157 
2158 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2159 		cgp->cg_nextfreeoff = exp_nextfreeoff;
2160 	}
2161 
2162 	/*
2163 	 * Reset the magic, as we've recreated this cg, also
2164 	 * update the cg_time, as we're writing out the cg
2165 	 */
2166 	cgp->cg_magic = CG_MAGIC;
2167 	cgp->cg_time = time(NULL);
2168 
2169 	/*
2170 	 * We know there was at least one correctable problem,
2171 	 * or else we wouldn't have been called.  So instead of
2172 	 * marking the buffer dirty N times above, just do it
2173 	 * once here.
2174 	 */
2175 	cgdirty();
2176 }
2177 
2178 void
2179 examinelog(void (*cb)(daddr32_t))
2180 {
2181 	struct bufarea *bp;
2182 	extent_block_t *ebp;
2183 	extent_t *ep;
2184 	daddr32_t nfno, fno;
2185 	int i;
2186 	int j;
2187 
2188 	/*
2189 	 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2190 	 * we need to translate accordingly using logbtodb()
2191 	 */
2192 
2193 	if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2194 		if (debug) {
2195 			(void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2196 			    "Aborting log examination\n", \
2197 			    logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2198 		}
2199 		return;
2200 	}
2201 
2202 	/*
2203 	 * Read errors will return zeros, which will cause us
2204 	 * to do nothing harmful, so don't need to handle it.
2205 	 */
2206 	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2207 	    (size_t)sblock.fs_bsize);
2208 	ebp = (void *)bp->b_un.b_buf;
2209 
2210 	/*
2211 	 * Does it look like a log allocation table?
2212 	 */
2213 	/* LINTED pointer cast is aligned */
2214 	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2215 	    sblock.fs_bsize))
2216 		return;
2217 	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2218 		return;
2219 
2220 	ep = &ebp->extents[0];
2221 	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2222 		fno = logbtofrag(&sblock, ep->pbno);
2223 		nfno = dbtofsb(&sblock, ep->nbno);
2224 		for (j = 0; j < nfno; ++j, ++fno) {
2225 			/*
2226 			 * Invoke the callback first, so that pass1 can
2227 			 * mark the log blocks in-use.  Then, if any
2228 			 * subsequent pass over the log shows us that a
2229 			 * block got freed (say, it was also claimed by
2230 			 * an inode that we cleared), we can safely declare
2231 			 * the log bad.
2232 			 */
2233 			if (cb != NULL)
2234 				(*cb)(fno);
2235 			if (!testbmap(fno))
2236 				islogok = 0;
2237 		}
2238 	}
2239 	brelse(bp);
2240 
2241 	if (cb != NULL) {
2242 		fno = logbtofrag(&sblock, sblock.fs_logbno);
2243 		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2244 			(*cb)(fno);
2245 	}
2246 }
2247 
2248 static void
2249 freelogblk(daddr32_t frag)
2250 {
2251 	freeblk(sblock.fs_logbno, frag, 1);
2252 }
2253 
2254 caddr_t
2255 file_id(fsck_ino_t inum, mode_t mode)
2256 {
2257 	static char name[MAXPATHLEN + 1];
2258 
2259 	if (lfdir == inum) {
2260 		return (lfname);
2261 	}
2262 
2263 	if ((mode & IFMT) == IFDIR) {
2264 		(void) strcpy(name, "DIR");
2265 	} else if ((mode & IFMT) == IFATTRDIR) {
2266 		(void) strcpy(name, "ATTR DIR");
2267 	} else if ((mode & IFMT) == IFSHAD) {
2268 		(void) strcpy(name, "ACL");
2269 	} else {
2270 		(void) strcpy(name, "FILE");
2271 	}
2272 
2273 	return (name);
2274 }
2275 
2276 /*
2277  * Simple initializer for inodesc structures, so users of only a few
2278  * fields don't have to worry about getting the right defaults for
2279  * everything out.
2280  */
2281 void
2282 init_inodesc(struct inodesc *idesc)
2283 {
2284 	/*
2285 	 * Most fields should be zero, just hit the special cases.
2286 	 */
2287 	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2288 	idesc->id_fix = DONTKNOW;
2289 	idesc->id_lbn = -1;
2290 	idesc->id_truncto = -1;
2291 	idesc->id_firsthole = -1;
2292 }
2293 
2294 /*
2295  * Compare routine for tsearch(C) to use on ino_t instances.
2296  */
2297 int
2298 ino_t_cmp(const void *left, const void *right)
2299 {
2300 	const fsck_ino_t lino = (const fsck_ino_t)left;
2301 	const fsck_ino_t rino = (const fsck_ino_t)right;
2302 
2303 	return (lino - rino);
2304 }
2305 
2306 int
2307 cgisdirty(void)
2308 {
2309 	return (cgblk.b_dirty);
2310 }
2311 
2312 void
2313 cgflush(void)
2314 {
2315 	flush(fswritefd, &cgblk);
2316 }
2317 
2318 void
2319 dirty(struct bufarea *bp)
2320 {
2321 	if (fswritefd < 0) {
2322 		/*
2323 		 * No one should call dirty() in read only mode.
2324 		 * But if one does, it's not fatal issue. Just warn them.
2325 		 */
2326 		pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2327 	} else {
2328 		(bp)->b_dirty = 1;
2329 		isdirty = 1;
2330 	}
2331 }
2332 
2333 void
2334 initbarea(struct bufarea *bp)
2335 {
2336 	(bp)->b_dirty = 0;
2337 	(bp)->b_bno = (diskaddr_t)-1LL;
2338 	(bp)->b_flags = 0;
2339 	(bp)->b_cnt = 0;
2340 	(bp)->b_errs = 0;
2341 }
2342 
2343 /*
2344  * Partition-sizing routines adapted from ../newfs/newfs.c.
2345  * Needed because calcsb() needs to use mkfs to work out what the
2346  * superblock should be, and mkfs insists on being told how many
2347  * sectors to use.
2348  *
2349  * Error handling assumes we're never called while preening.
2350  *
2351  * XXX This should be extracted into a ../ufslib.{c,h},
2352  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2353  *     done, both fsck and newfs should be modified to link
2354  *     against it.
2355  */
2356 
2357 static int label_type;
2358 
2359 #define	LABEL_TYPE_VTOC		1
2360 #define	LABEL_TYPE_EFI		2
2361 #define	LABEL_TYPE_OTHER	3
2362 
2363 #define	MB			(1024 * 1024)
2364 #define	SECTORS_PER_TERABYTE	(1LL << 31)
2365 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2366 
2367 diskaddr_t
2368 getdisksize(caddr_t disk, int fd)
2369 {
2370 	int rpm;
2371 	struct dk_geom g;
2372 	struct dk_cinfo ci;
2373 	diskaddr_t actual_size;
2374 
2375 	/*
2376 	 * get_device_size() determines the actual size of the
2377 	 * device, and also the disk's attributes, such as geometry.
2378 	 */
2379 	actual_size = get_device_size(fd, disk);
2380 
2381 	if (label_type == LABEL_TYPE_VTOC) {
2382 		if (ioctl(fd, DKIOCGGEOM, &g)) {
2383 			pwarn("%s: Unable to read Disk geometry", disk);
2384 			return (0);
2385 		}
2386 		if (sblock.fs_nsect == 0)
2387 			sblock.fs_nsect = g.dkg_nsect;
2388 		if (sblock.fs_ntrak == 0)
2389 			sblock.fs_ntrak = g.dkg_nhead;
2390 		if (sblock.fs_rps == 0) {
2391 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2392 			sblock.fs_rps = rpm / 60;
2393 		}
2394 	}
2395 
2396 	if (sblock.fs_bsize == 0)
2397 		sblock.fs_bsize = MAXBSIZE;
2398 
2399 	/*
2400 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2401 	 * information is not available, default to the min of a MB and
2402 	 * maxphys.
2403 	 */
2404 	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2405 		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2406 		if (sblock.fs_maxcontig < 0) {
2407 			int gotit, maxphys;
2408 
2409 			gotit = fsgetmaxphys(&maxphys, NULL);
2410 
2411 			/*
2412 			 * If we cannot get the maxphys value, default
2413 			 * to ufs_maxmaxphys (MB).
2414 			 */
2415 			if (gotit) {
2416 				sblock.fs_maxcontig = MIN(maxphys, MB);
2417 			} else {
2418 				sblock.fs_maxcontig = MB;
2419 			}
2420 		}
2421 		sblock.fs_maxcontig /= sblock.fs_bsize;
2422 	}
2423 
2424 	return (actual_size);
2425 }
2426 
2427 /*
2428  * Figure out how big the partition we're dealing with is.
2429  */
2430 static diskaddr_t
2431 get_device_size(int fd, caddr_t name)
2432 {
2433 	struct extvtoc vtoc;
2434 	struct dk_gpt *efi_vtoc;
2435 	diskaddr_t slicesize = 0;
2436 
2437 	int index = read_extvtoc(fd, &vtoc);
2438 
2439 	if (index >= 0) {
2440 		label_type = LABEL_TYPE_VTOC;
2441 	} else {
2442 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2443 			/* it might be an EFI label */
2444 			index = efi_alloc_and_read(fd, &efi_vtoc);
2445 			if (index >= 0)
2446 				label_type = LABEL_TYPE_EFI;
2447 		}
2448 	}
2449 
2450 	if (index < 0) {
2451 		/*
2452 		 * Since both attempts to read the label failed, we're
2453 		 * going to fall back to a brute force approach to
2454 		 * determining the device's size:  see how far out we can
2455 		 * perform reads on the device.
2456 		 */
2457 
2458 		slicesize = brute_force_get_device_size(fd);
2459 		if (slicesize == 0) {
2460 			switch (index) {
2461 			case VT_ERROR:
2462 				pwarn("%s: %s\n", name, strerror(errno));
2463 				break;
2464 			case VT_EIO:
2465 				pwarn("%s: I/O error accessing VTOC", name);
2466 				break;
2467 			case VT_EINVAL:
2468 				pwarn("%s: Invalid field in VTOC", name);
2469 				break;
2470 			default:
2471 				pwarn("%s: unknown error %d accessing VTOC",
2472 				    name, index);
2473 				break;
2474 			}
2475 			return (0);
2476 		} else {
2477 			label_type = LABEL_TYPE_OTHER;
2478 		}
2479 	}
2480 
2481 	if (label_type == LABEL_TYPE_EFI) {
2482 		slicesize = efi_vtoc->efi_parts[index].p_size;
2483 		efi_free(efi_vtoc);
2484 	} else if (label_type == LABEL_TYPE_VTOC) {
2485 		slicesize = vtoc.v_part[index].p_size;
2486 	}
2487 
2488 	return (slicesize);
2489 }
2490 
2491 /*
2492  * brute_force_get_device_size
2493  *
2494  * Determine the size of the device by seeing how far we can
2495  * read.  Doing an llseek( , , SEEK_END) would probably work
2496  * in most cases, but we've seen at least one third-party driver
2497  * which doesn't correctly support the SEEK_END option when the
2498  * the device is greater than a terabyte.
2499  */
2500 
2501 static diskaddr_t
2502 brute_force_get_device_size(int fd)
2503 {
2504 	diskaddr_t	min_fail = 0;
2505 	diskaddr_t	max_succeed = 0;
2506 	diskaddr_t	cur_db_off;
2507 	char		buf[DEV_BSIZE];
2508 
2509 	/*
2510 	 * First, see if we can read the device at all, just to
2511 	 * eliminate errors that have nothing to do with the
2512 	 * device's size.
2513 	 */
2514 
2515 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2516 	    ((read(fd, buf, DEV_BSIZE)) == -1))
2517 		return (0);  /* can't determine size */
2518 
2519 	/*
2520 	 * Now, go sequentially through the multiples of 4TB
2521 	 * to find the first read that fails (this isn't strictly
2522 	 * the most efficient way to find the actual size if the
2523 	 * size really could be anything between 0 and 2**64 bytes.
2524 	 * We expect the sizes to be less than 16 TB for some time,
2525 	 * so why do a bunch of reads that are larger than that?
2526 	 * However, this algorithm *will* work for sizes of greater
2527 	 * than 16 TB.  We're just not optimizing for those sizes.)
2528 	 */
2529 
2530 	/*
2531 	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2532 	 * We're using > 32-bit constants here.  Therefore, its flow
2533 	 * analysis is wrong.  For the time being, ignore complaints
2534 	 * from it about the body of the for() being unreached.
2535 	 */
2536 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2537 	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2538 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2539 		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2540 		    SEEK_SET) == -1) ||
2541 		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2542 			min_fail = cur_db_off;
2543 		else
2544 			max_succeed = cur_db_off;
2545 	}
2546 
2547 	/*
2548 	 * XXX Same lint flow analysis problem as above.
2549 	 */
2550 	if (min_fail == 0)
2551 		return (0);
2552 
2553 	/*
2554 	 * We now know that the size of the device is less than
2555 	 * min_fail and greater than or equal to max_succeed.  Now
2556 	 * keep splitting the difference until the actual size in
2557 	 * sectors in known.  We also know that the difference
2558 	 * between max_succeed and min_fail at this time is
2559 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2560 	 * simplifies the math below.
2561 	 */
2562 
2563 	while (min_fail - max_succeed > 1) {
2564 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2565 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2566 		    SEEK_SET)) == -1) ||
2567 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2568 			min_fail = cur_db_off;
2569 		else
2570 			max_succeed = cur_db_off;
2571 	}
2572 
2573 	/* the size is the last successfully read sector offset plus one */
2574 	return (max_succeed + 1);
2575 }
2576 
2577 static void
2578 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2579 {
2580 	struct dinode *dp;
2581 	char pathbuf[MAXPATHLEN + 1];
2582 
2583 	vpwarn(fmt, ap);
2584 	(void) putchar(' ');
2585 	pinode(ino);
2586 	(void) printf("\n");
2587 	getpathname(pathbuf, cwd, ino);
2588 	if (ino < UFSROOTINO || ino > maxino) {
2589 		pfatal("NAME=%s\n", pathbuf);
2590 		return;
2591 	}
2592 	dp = ginode(ino);
2593 	if (ftypeok(dp))
2594 		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2595 	else
2596 		pfatal("NAME=%s\n", pathbuf);
2597 }
2598 
2599 void
2600 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2601 {
2602 	va_list ap;
2603 
2604 	va_start(ap, fmt);
2605 	vfileerror(ino, ino, fmt, ap);
2606 	va_end(ap);
2607 }
2608 
2609 static void
2610 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2611 {
2612 	vfileerror(ino, ino, fmt, ap);
2613 }
2614 
2615 void
2616 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2617 {
2618 	va_list ap;
2619 
2620 	va_start(ap, fmt);
2621 	vfileerror(cwd, ino, fmt, ap);
2622 	va_end(ap);
2623 }
2624 
2625 /*
2626  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2627  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2628  * entry.
2629  *
2630  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2631  * meaning it's effectively an orphan.  It needs to be noted now, so
2632  * it will be remembered in pass 4.
2633  */
2634 
2635 void
2636 add_orphan_dir(fsck_ino_t ino)
2637 {
2638 	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2639 		errexit("add_orphan_dir: out of memory");
2640 }
2641 
2642 /*
2643  * Remove an inode from the orphaned-directories list, presumably
2644  * because it's been cleared.
2645  */
2646 void
2647 remove_orphan_dir(fsck_ino_t ino)
2648 {
2649 	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2650 }
2651 
2652 /*
2653  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2654  * and lufs.c:checksum().
2655  */
2656 static void
2657 log_setsum(int32_t *sp, int32_t *lp, int nb)
2658 {
2659 	int32_t csum = 0;
2660 
2661 	*sp = 0;
2662 	nb /= sizeof (int32_t);
2663 	while (nb--)
2664 		csum += *lp++;
2665 	*sp = csum;
2666 }
2667 
2668 static int
2669 log_checksum(int32_t *sp, int32_t *lp, int nb)
2670 {
2671 	int32_t ssum = *sp;
2672 
2673 	log_setsum(sp, lp, nb);
2674 	if (ssum != *sp) {
2675 		*sp = ssum;
2676 		return (0);
2677 	}
2678 	return (1);
2679 }
2680