xref: /titanic_51/usr/src/cmd/fs.d/ufs/fsck/utilities.c (revision fe0e7ec4d916b05b52d8c7cc8a3e6a1b28e77b6f)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms are permitted
14  * provided that: (1) source distributions retain this entire copyright
15  * notice and comment, and (2) distributions including binaries display
16  * the following acknowledgement:  ``This product includes software
17  * developed by the University of California, Berkeley and its contributors''
18  * in the documentation or other materials provided with the distribution
19  * and in all advertising materials mentioning features or use of this
20  * software. Neither the name of the University nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <stdarg.h>
34 #include <libadm.h>
35 #include <note.h>
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/mntent.h>
39 #include <sys/filio.h>
40 #include <sys/fs/ufs_fs.h>
41 #include <sys/vnode.h>
42 #include <sys/fs/ufs_acl.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_log.h>
45 #define	_KERNEL
46 #include <sys/fs/ufs_fsdir.h>
47 #undef _KERNEL
48 #include <sys/mnttab.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include <signal.h>
53 #include <string.h>
54 #include <ctype.h>
55 #include <sys/vfstab.h>
56 #include <sys/lockfs.h>
57 #include <errno.h>
58 #include <sys/cmn_err.h>
59 #include <sys/dkio.h>
60 #include <sys/vtoc.h>
61 #include <sys/efi_partition.h>
62 #include <fslib.h>
63 #include <inttypes.h>
64 #include "fsck.h"
65 
66 caddr_t mount_point = NULL;
67 
68 static int64_t diskreads, totalreads;	/* Disk cache statistics */
69 
70 static int log_checksum(int32_t *, int32_t *, int);
71 static void vdirerror(fsck_ino_t, caddr_t, va_list);
72 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
73 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
74 static void vpwarn(caddr_t, va_list);
75 static int getline(FILE *, caddr_t, int);
76 static struct bufarea *alloc_bufarea(void);
77 static void rwerror(caddr_t, diskaddr_t, int rval);
78 static void debugclean(void);
79 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
80 static void freelogblk(daddr32_t);
81 static void verrexit(caddr_t, va_list);
82 static void vpfatal(caddr_t, va_list);
83 static diskaddr_t get_device_size(int, caddr_t);
84 static diskaddr_t brute_force_get_device_size(int);
85 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
86 	    daddr32_t *, daddr32_t *, daddr32_t *);
87 
88 int
89 ftypeok(struct dinode *dp)
90 {
91 	switch (dp->di_mode & IFMT) {
92 
93 	case IFDIR:
94 	case IFREG:
95 	case IFBLK:
96 	case IFCHR:
97 	case IFLNK:
98 	case IFSOCK:
99 	case IFIFO:
100 	case IFSHAD:
101 	case IFATTRDIR:
102 		return (1);
103 
104 	default:
105 		if (debug)
106 			(void) printf("bad file type 0%o\n", dp->di_mode);
107 		return (0);
108 	}
109 }
110 
111 int
112 acltypeok(struct dinode *dp)
113 {
114 	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
115 		return (1);
116 
117 	if (debug)
118 		(void) printf("bad file type for acl I=%d: 0%o\n",
119 		    dp->di_shadow, dp->di_mode);
120 	return (0);
121 }
122 
123 NOTE(PRINTFLIKE(1))
124 int
125 reply(caddr_t fmt, ...)
126 {
127 	va_list ap;
128 	char line[80];
129 
130 	if (preen)
131 		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
132 
133 	if (mflag) {
134 		/*
135 		 * We don't know what's going on, so don't potentially
136 		 * make things worse by having errexit() write stuff
137 		 * out to disk.
138 		 */
139 		(void) printf(
140 		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
141 		    devname);
142 		exit(EXERRFATAL);
143 	}
144 
145 	va_start(ap, fmt);
146 	(void) putchar('\n');
147 	(void) vprintf(fmt, ap);
148 	(void) putchar('?');
149 	(void) putchar(' ');
150 	va_end(ap);
151 
152 	if (nflag || fswritefd < 0) {
153 		(void) printf(" no\n\n");
154 		return (0);
155 	}
156 	if (yflag) {
157 		(void) printf(" yes\n\n");
158 		return (1);
159 	}
160 	(void) fflush(stdout);
161 	if (getline(stdin, line, sizeof (line)) == EOF)
162 		errexit("\n");
163 	(void) printf("\n");
164 	if (line[0] == 'y' || line[0] == 'Y') {
165 		return (1);
166 	} else {
167 		return (0);
168 	}
169 }
170 
171 int
172 getline(FILE *fp, caddr_t loc, int maxlen)
173 {
174 	int n;
175 	caddr_t p, lastloc;
176 
177 	p = loc;
178 	lastloc = &p[maxlen-1];
179 	while ((n = getc(fp)) != '\n') {
180 		if (n == EOF)
181 			return (EOF);
182 		if (!isspace(n) && p < lastloc)
183 			*p++ = (char)n;
184 	}
185 	*p = '\0';
186 	/* LINTED pointer difference won't overflow */
187 	return (p - loc);
188 }
189 
190 /*
191  * Malloc buffers and set up cache.
192  */
193 void
194 bufinit(void)
195 {
196 	struct bufarea *bp;
197 	int bufcnt, i;
198 	caddr_t bufp;
199 
200 	bufp = malloc((size_t)sblock.fs_bsize);
201 	if (bufp == NULL)
202 		goto nomem;
203 	initbarea(&cgblk);
204 	cgblk.b_un.b_buf = bufp;
205 	bufhead.b_next = bufhead.b_prev = &bufhead;
206 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
207 	if (bufcnt < MINBUFS)
208 		bufcnt = MINBUFS;
209 	for (i = 0; i < bufcnt; i++) {
210 		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
211 		if (bp == NULL) {
212 			if (i >= MINBUFS)
213 				goto noalloc;
214 			goto nomem;
215 		}
216 
217 		bufp = malloc((size_t)sblock.fs_bsize);
218 		if (bufp == NULL) {
219 			free((void *)bp);
220 			if (i >= MINBUFS)
221 				goto noalloc;
222 			goto nomem;
223 		}
224 		initbarea(bp);
225 		bp->b_un.b_buf = bufp;
226 		bp->b_prev = &bufhead;
227 		bp->b_next = bufhead.b_next;
228 		bufhead.b_next->b_prev = bp;
229 		bufhead.b_next = bp;
230 	}
231 noalloc:
232 	bufhead.b_size = i;	/* save number of buffers */
233 	pbp = pdirbp = NULL;
234 	return;
235 
236 nomem:
237 	errexit("cannot allocate buffer pool\n");
238 	/* NOTREACHED */
239 }
240 
241 /*
242  * Undo a bufinit().
243  */
244 void
245 unbufinit(void)
246 {
247 	int cnt;
248 	struct bufarea *bp, *nbp;
249 
250 	cnt = 0;
251 	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
252 		cnt++;
253 		flush(fswritefd, bp);
254 		nbp = bp->b_prev;
255 		/*
256 		 * We're discarding the entire chain, so this isn't
257 		 * technically necessary.  However, it doesn't hurt
258 		 * and lint's data flow analysis is much happier
259 		 * (this prevents it from thinking there's a chance
260 		 * of our using memory elsewhere after it's been released).
261 		 */
262 		nbp->b_next = bp->b_next;
263 		bp->b_next->b_prev = nbp;
264 		free((void *)bp->b_un.b_buf);
265 		free((void *)bp);
266 	}
267 
268 	if (bufhead.b_size != cnt)
269 		errexit("Panic: cache lost %d buffers\n",
270 			bufhead.b_size - cnt);
271 }
272 
273 /*
274  * Manage a cache of directory blocks.
275  */
276 struct bufarea *
277 getdatablk(daddr32_t blkno, size_t size)
278 {
279 	struct bufarea *bp;
280 
281 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
282 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
283 			goto foundit;
284 		}
285 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
286 		if ((bp->b_flags & B_INUSE) == 0)
287 			break;
288 	if (bp == &bufhead) {
289 		bp = alloc_bufarea();
290 		if (bp == NULL) {
291 			errexit("deadlocked buffer pool\n");
292 			/* NOTREACHED */
293 		}
294 	}
295 	/*
296 	 * We're at the same logical level as getblk(), so if there
297 	 * are any errors, we'll let our caller handle them.
298 	 */
299 	diskreads++;
300 	(void) getblk(bp, blkno, size);
301 
302 foundit:
303 	totalreads++;
304 	bp->b_cnt++;
305 	/*
306 	 * Move the buffer to head of linked list if it isn't
307 	 * already there.
308 	 */
309 	if (bufhead.b_next != bp) {
310 		bp->b_prev->b_next = bp->b_next;
311 		bp->b_next->b_prev = bp->b_prev;
312 		bp->b_prev = &bufhead;
313 		bp->b_next = bufhead.b_next;
314 		bufhead.b_next->b_prev = bp;
315 		bufhead.b_next = bp;
316 	}
317 	bp->b_flags |= B_INUSE;
318 	return (bp);
319 }
320 
321 void
322 brelse(struct bufarea *bp)
323 {
324 	bp->b_cnt--;
325 	if (bp->b_cnt == 0) {
326 		bp->b_flags &= ~B_INUSE;
327 	}
328 }
329 
330 struct bufarea *
331 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
332 {
333 	diskaddr_t dblk;
334 
335 	dblk = fsbtodb(&sblock, blk);
336 	if (bp->b_bno == dblk)
337 		return (bp);
338 	flush(fswritefd, bp);
339 	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
340 	bp->b_bno = dblk;
341 	bp->b_size = size;
342 	return (bp);
343 }
344 
345 void
346 flush(int fd, struct bufarea *bp)
347 {
348 	int i, j;
349 	caddr_t sip;
350 	long size;
351 
352 	if (!bp->b_dirty)
353 		return;
354 
355 	/*
356 	 * It's not our buf, so if there are errors, let whoever
357 	 * acquired it deal with the actual problem.
358 	 */
359 	if (bp->b_errs != 0)
360 		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
361 	bp->b_dirty = 0;
362 	bp->b_errs = 0;
363 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
364 	if (bp != &sblk) {
365 		return;
366 	}
367 
368 	/*
369 	 * We're flushing the superblock, so make sure all the
370 	 * ancillary bits go out as well.
371 	 */
372 	sip = (caddr_t)sblock.fs_u.fs_csp;
373 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
374 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
375 		    sblock.fs_cssize - i : sblock.fs_bsize;
376 		bwrite(fswritefd, sip,
377 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
378 		    size);
379 		sip += size;
380 	}
381 }
382 
383 static void
384 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
385 {
386 	int olderr = errno;
387 
388 	if (!preen)
389 		(void) printf("\n");
390 
391 	if (rval == -1)
392 		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
393 		    mesg, blk, strerror(olderr));
394 	else
395 		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
396 
397 	if (reply("CONTINUE") == 0) {
398 		exitstat = EXERRFATAL;
399 		errexit("Program terminated\n");
400 	}
401 }
402 
403 void
404 ckfini(void)
405 {
406 	int64_t percentage;
407 
408 	if (fswritefd < 0)
409 		return;
410 
411 	flush(fswritefd, &sblk);
412 	/*
413 	 * Were we using a backup superblock?
414 	 */
415 	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
416 		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
417 			sblk.b_bno = SBOFF / dev_bsize;
418 			sbdirty();
419 			flush(fswritefd, &sblk);
420 		}
421 	}
422 	flush(fswritefd, &cgblk);
423 	if (cgblk.b_un.b_buf != NULL) {
424 		free((void *)cgblk.b_un.b_buf);
425 		cgblk.b_un.b_buf = NULL;
426 	}
427 	unbufinit();
428 	pbp = NULL;
429 	pdirbp = NULL;
430 	if (debug) {
431 		/*
432 		 * Note that we only count cache-related reads.
433 		 * Anything that called fsck_bread() or getblk()
434 		 * directly are explicitly not cached, so they're not
435 		 * included here.
436 		 */
437 		if (totalreads != 0)
438 			percentage = diskreads * 100 / totalreads;
439 		else
440 			percentage = 0;
441 
442 		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
443 		    (longlong_t)diskreads, (longlong_t)totalreads,
444 		    (longlong_t)percentage);
445 	}
446 
447 	(void) close(fsreadfd);
448 	(void) close(fswritefd);
449 	fsreadfd = -1;
450 	fswritefd = -1;
451 }
452 
453 int
454 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
455 {
456 	caddr_t cp;
457 	int i;
458 	int errs;
459 	offset_t offset = ldbtob(blk);
460 	offset_t addr;
461 
462 	/*
463 	 * In our universe, nothing exists before the superblock, so
464 	 * just pretend it's always zeros.  This is the complement of
465 	 * bwrite()'s ignoring write requests into that space.
466 	 */
467 	if (blk < SBLOCK) {
468 		if (debug)
469 			(void) printf(
470 			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
471 			    SBLOCK, (longlong_t)blk);
472 		(void) memset(buf, 0, (size_t)size);
473 		return (1);
474 	}
475 
476 	if (llseek(fd, offset, 0) < 0) {
477 		rwerror("SEEK", blk, -1);
478 	}
479 
480 	if ((i = read(fd, buf, size)) == size) {
481 		return (0);
482 	}
483 	rwerror("READ", blk, i);
484 	if (llseek(fd, offset, 0) < 0) {
485 		rwerror("SEEK", blk, -1);
486 	}
487 	errs = 0;
488 	(void) memset(buf, 0, (size_t)size);
489 	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
490 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
491 		addr = ldbtob(blk + i);
492 		if (llseek(fd, addr, SEEK_CUR) < 0 ||
493 		    read(fd, cp, (int)secsize) < 0) {
494 			iscorrupt = 1;
495 			(void) printf(" %llu", blk + (u_longlong_t)i);
496 			errs++;
497 		}
498 	}
499 	(void) printf("\n");
500 	return (errs);
501 }
502 
503 void
504 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
505 {
506 	int i;
507 	int n;
508 	caddr_t cp;
509 	offset_t offset = ldbtob(blk);
510 	offset_t addr;
511 
512 	if (fd < 0)
513 		return;
514 	if (blk < SBLOCK) {
515 		if (debug)
516 			(void) printf(
517 		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
518 			    (longlong_t)blk, devname);
519 		return;
520 	}
521 	if (llseek(fd, offset, 0) < 0) {
522 		rwerror("SEEK", blk, -1);
523 	}
524 	if ((i = write(fd, buf, (int)size)) == size) {
525 		fsmodified = 1;
526 		return;
527 	}
528 	rwerror("WRITE", blk, i);
529 	if (llseek(fd, offset, 0) < 0) {
530 		rwerror("SEEK", blk, -1);
531 	}
532 	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
533 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
534 		n = 0;
535 		addr = ldbtob(blk + i);
536 		if (llseek(fd, addr, SEEK_CUR) < 0 ||
537 		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
538 			iscorrupt = 1;
539 			(void) printf(" %llu", blk + (u_longlong_t)i);
540 		} else if (n > 0) {
541 			fsmodified = 1;
542 		}
543 
544 	}
545 	(void) printf("\n");
546 }
547 
548 /*
549  * Allocates the specified number of contiguous fragments.
550  */
551 daddr32_t
552 allocblk(int wantedfrags)
553 {
554 	int block, leadfrag, tailfrag;
555 	daddr32_t selected;
556 	size_t size;
557 	struct bufarea *bp;
558 
559 	/*
560 	 * It's arguable whether we should just fail, or instead
561 	 * error out here.  Since we should only ever be asked for
562 	 * a single fragment or an entire block (i.e., sblock.fs_frag),
563 	 * we'll fail out because anything else means somebody
564 	 * changed code without considering all of the ramifications.
565 	 */
566 	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
567 		exitstat = EXERRFATAL;
568 		errexit("allocblk() asked for %d frags.  "
569 			"Legal range is 1 to %d",
570 			wantedfrags, sblock.fs_frag);
571 	}
572 
573 	/*
574 	 * For each filesystem block, look at every possible starting
575 	 * offset within the block such that we can get the number of
576 	 * contiguous fragments that we need.  This is a drastically
577 	 * simplified version of the kernel's mapsearch() and alloc*().
578 	 * It's also correspondingly slower.
579 	 */
580 	for (block = 0; block < maxfsblock - sblock.fs_frag;
581 	    block += sblock.fs_frag) {
582 		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
583 		    leadfrag++) {
584 			/*
585 			 * Is first fragment of candidate run available?
586 			 */
587 			if (testbmap(block + leadfrag))
588 				continue;
589 			/*
590 			 * Are the rest of them available?
591 			 */
592 			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
593 				if (testbmap(block + leadfrag + tailfrag))
594 					break;
595 			if (tailfrag < wantedfrags) {
596 				/*
597 				 * No, skip the known-unusable run.
598 				 */
599 				leadfrag += tailfrag;
600 				continue;
601 			}
602 			/*
603 			 * Found what we need, so claim them.
604 			 */
605 			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
606 				setbmap(block + leadfrag + tailfrag);
607 			n_blks += wantedfrags;
608 			size = wantedfrags * sblock.fs_fsize;
609 			selected = block + leadfrag;
610 			bp = getdatablk(selected, size);
611 			(void) memset((void *)bp->b_un.b_buf, 0, size);
612 			dirty(bp);
613 			brelse(bp);
614 			if (debug)
615 				(void) printf(
616 		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
617 				    selected, selected % sblock.fs_bsize,
618 				    wantedfrags, (int)size);
619 			return (selected);
620 		}
621 	}
622 	return (0);
623 }
624 
625 /*
626  * Free a previously allocated block
627  */
628 void
629 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
630 {
631 	struct inodesc idesc;
632 
633 	if (debug)
634 		(void) printf("debug: freeing %d fragments starting at %d\n",
635 		    frags, blkno);
636 
637 	init_inodesc(&idesc);
638 
639 	idesc.id_number = ino;
640 	idesc.id_blkno = blkno;
641 	idesc.id_numfrags = frags;
642 	idesc.id_truncto = -1;
643 
644 	/*
645 	 * Nothing in the return status has any relevance to how
646 	 * we're using pass4check(), so just ignore it.
647 	 */
648 	(void) pass4check(&idesc);
649 }
650 
651 /*
652  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
653  * that the given buffer is at least MAXPATHLEN + 1 characters.
654  */
655 void
656 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
657 {
658 	int len;
659 	caddr_t cp;
660 	struct dinode *dp;
661 	struct inodesc idesc;
662 	struct inoinfo *inp;
663 
664 	if (debug)
665 		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
666 		    curdir, ino);
667 
668 	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
669 		(void) strcpy(namebuf, "?");
670 		return;
671 	}
672 
673 	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
674 		(void) strcpy(namebuf, "/");
675 		return;
676 	}
677 
678 	init_inodesc(&idesc);
679 	idesc.id_type = DATA;
680 	cp = &namebuf[MAXPATHLEN - 1];
681 	*cp = '\0';
682 
683 	/*
684 	 * In the case of extended attributes, our
685 	 * parent won't necessarily be a directory, so just
686 	 * return what we've found with a prefix indicating
687 	 * that it's an XATTR.  Presumably our caller will
688 	 * know what's going on and do something useful, like
689 	 * work out the path of the parent and then combine
690 	 * the two names.
691 	 *
692 	 * Can't use strcpy(), etc, because we've probably
693 	 * already got some name information in the buffer and
694 	 * the usual trailing \0 would lose it.
695 	 */
696 	dp = ginode(curdir);
697 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
698 		idesc.id_number = curdir;
699 		idesc.id_parent = ino;
700 		idesc.id_func = findname;
701 		idesc.id_name = namebuf;
702 		idesc.id_fix = NOFIX;
703 		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
704 			*cp-- = '?';
705 		}
706 
707 		len = sizeof (XATTR_DIR_NAME) - 1;
708 		cp -= len;
709 		(void) memmove(cp, XATTR_DIR_NAME, len);
710 		goto attrname;
711 	}
712 
713 	/*
714 	 * If curdir == ino, need to get a handle on .. so we
715 	 * can search it for ino's name.  Otherwise, just search
716 	 * the given directory for ino.  Repeat until out of space
717 	 * or a full path has been built.
718 	 */
719 	if (curdir != ino) {
720 		idesc.id_parent = curdir;
721 		goto namelookup;
722 	}
723 	while (ino != UFSROOTINO && ino != 0) {
724 		idesc.id_number = ino;
725 		idesc.id_func = findino;
726 		idesc.id_name = "..";
727 		idesc.id_fix = NOFIX;
728 		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
729 			inp = getinoinfo(ino);
730 			if ((inp == NULL) || (inp->i_parent == 0)) {
731 				break;
732 			}
733 			idesc.id_parent = inp->i_parent;
734 		}
735 
736 		/*
737 		 * To get this far, id_parent must have the inode
738 		 * number for `..' in it.  By definition, that's got
739 		 * to be a directory, so search it for the inode of
740 		 * interest.
741 		 */
742 namelookup:
743 		idesc.id_number = idesc.id_parent;
744 		idesc.id_parent = ino;
745 		idesc.id_func = findname;
746 		idesc.id_name = namebuf;
747 		idesc.id_fix = NOFIX;
748 		if ((ckinode(ginode(idesc.id_number),
749 		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
750 			break;
751 		}
752 		/*
753 		 * Prepend to what we've accumulated so far.  If
754 		 * there's not enough room for even one more path element
755 		 * (of the worst-case length), then bail out.
756 		 */
757 		len = strlen(namebuf);
758 		cp -= len;
759 		if (cp < &namebuf[MAXNAMLEN])
760 			break;
761 		(void) memmove(cp, namebuf, len);
762 		*--cp = '/';
763 
764 		/*
765 		 * Corner case for a looped-to-itself directory.
766 		 */
767 		if (ino == idesc.id_number)
768 			break;
769 
770 		/*
771 		 * Climb one level of the hierarchy.  In other words,
772 		 * the current .. becomes the inode to search for and
773 		 * its parent becomes the directory to search in.
774 		 */
775 		ino = idesc.id_number;
776 	}
777 
778 	/*
779 	 * If we hit a discontinuity in the hierarchy, indicate it by
780 	 * prefixing the path so far with `?'.  Otherwise, the first
781 	 * character will be `/' as a side-effect of the *--cp above.
782 	 *
783 	 * The special case is to handle the situation where we're
784 	 * trying to look something up in UFSROOTINO, but didn't find
785 	 * it.
786 	 */
787 	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
788 		if (cp > namebuf)
789 			cp--;
790 		*cp = '?';
791 	}
792 
793 	/*
794 	 * The invariants being used for buffer integrity are:
795 	 * - namebuf[] is terminated with \0 before anything else
796 	 * - cp is always <= the last element of namebuf[]
797 	 * - the new path element is always stored at the
798 	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
799 	 *   characters
800 	 * - cp is is decremented by the number of characters in
801 	 *   the new path element
802 	 * - if, after the above accounting for the new element's
803 	 *   size, there is no longer enough room at the beginning of
804 	 *   namebuf[] for a full-sized path element and a slash,
805 	 *   terminate the loop.  cp is in the range
806 	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
807 	 */
808 attrname:
809 	/* LINTED per the above discussion */
810 	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
811 }
812 
813 /* ARGSUSED */
814 void
815 catch(int dummy)
816 {
817 	ckfini();
818 	exit(EXSIGNAL);
819 }
820 
821 /*
822  * When preening, allow a single quit to signal
823  * a special exit after filesystem checks complete
824  * so that reboot sequence may be interrupted.
825  */
826 /* ARGSUSED */
827 void
828 catchquit(int dummy)
829 {
830 	(void) printf("returning to single-user after filesystem check\n");
831 	interrupted = 1;
832 	(void) signal(SIGQUIT, SIG_DFL);
833 }
834 
835 
836 /*
837  * determine whether an inode should be fixed.
838  */
839 NOTE(PRINTFLIKE(2))
840 int
841 dofix(struct inodesc *idesc, caddr_t msg, ...)
842 {
843 	int rval = 0;
844 	va_list ap;
845 
846 	va_start(ap, msg);
847 
848 	switch (idesc->id_fix) {
849 
850 	case DONTKNOW:
851 		if (idesc->id_type == DATA)
852 			vdirerror(idesc->id_number, msg, ap);
853 		else
854 			vpwarn(msg, ap);
855 		if (preen) {
856 			idesc->id_fix = FIX;
857 			rval = ALTERED;
858 			break;
859 		}
860 		if (reply("SALVAGE") == 0) {
861 			idesc->id_fix = NOFIX;
862 			break;
863 		}
864 		idesc->id_fix = FIX;
865 		rval = ALTERED;
866 		break;
867 
868 	case FIX:
869 		rval = ALTERED;
870 		break;
871 
872 	case NOFIX:
873 		break;
874 
875 	default:
876 		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
877 	}
878 
879 	va_end(ap);
880 	return (rval);
881 }
882 
883 NOTE(PRINTFLIKE(1))
884 void
885 errexit(caddr_t fmt, ...)
886 {
887 	va_list ap;
888 
889 	va_start(ap, fmt);
890 	verrexit(fmt, ap);
891 	/* NOTREACHED */
892 }
893 
894 NOTE(PRINTFLIKE(1))
895 static void
896 verrexit(caddr_t fmt, va_list ap)
897 {
898 	static int recursing = 0;
899 
900 	if (!recursing) {
901 		recursing = 1;
902 		if (errorlocked || iscorrupt) {
903 			if (havesb) {
904 				sblock.fs_clean = FSBAD;
905 				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
906 				sblock.fs_state = -sblock.fs_state;
907 				sbdirty();
908 				write_altsb(fswritefd);
909 				flush(fswritefd, &sblk);
910 			}
911 		}
912 		ckfini();
913 		recursing = 0;
914 	}
915 	(void) vprintf(fmt, ap);
916 	if (fmt[strlen(fmt) - 1] != '\n')
917 		(void) putchar('\n');
918 	exit((exitstat != 0) ? exitstat : EXERRFATAL);
919 }
920 
921 /*
922  * An unexpected inconsistency occured.
923  * Die if preening, otherwise just print message and continue.
924  */
925 NOTE(PRINTFLIKE(1))
926 void
927 pfatal(caddr_t fmt, ...)
928 {
929 	va_list ap;
930 
931 	va_start(ap, fmt);
932 	vpfatal(fmt, ap);
933 	va_end(ap);
934 }
935 
936 NOTE(PRINTFLIKE(1))
937 static void
938 vpfatal(caddr_t fmt, va_list ap)
939 {
940 	if (preen) {
941 		if (*fmt != '\0') {
942 			(void) printf("%s: ", devname);
943 			(void) vprintf(fmt, ap);
944 			(void) printf("\n");
945 		}
946 		(void) printf(
947 		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
948 		    devname);
949 		if (havesb) {
950 			sblock.fs_clean = FSBAD;
951 			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
952 			sbdirty();
953 			flush(fswritefd, &sblk);
954 		}
955 		/*
956 		 * We're exiting, it doesn't really matter that our
957 		 * caller doesn't get to call va_end().
958 		 */
959 		if (exitstat == 0)
960 			exitstat = EXFNDERRS;
961 		exit(exitstat);
962 	}
963 	if (*fmt != '\0') {
964 		(void) vprintf(fmt, ap);
965 	}
966 }
967 
968 /*
969  * Pwarn just prints a message when not preening,
970  * or a warning (preceded by filename) when preening.
971  */
972 NOTE(PRINTFLIKE(1))
973 void
974 pwarn(caddr_t fmt, ...)
975 {
976 	va_list ap;
977 
978 	va_start(ap, fmt);
979 	vpwarn(fmt, ap);
980 	va_end(ap);
981 }
982 
983 NOTE(PRINTFLIKE(1))
984 static void
985 vpwarn(caddr_t fmt, va_list ap)
986 {
987 	if (*fmt != '\0') {
988 		if (preen)
989 			(void) printf("%s: ", devname);
990 		(void) vprintf(fmt, ap);
991 	}
992 }
993 
994 /*
995  * Like sprintf(), except the buffer is dynamically allocated
996  * and returned, instead of being passed in.  A pointer to the
997  * buffer is stored in *RET, and FMT is the usual format string.
998  * The number of characters in *RET (excluding the trailing \0,
999  * to be consistent with the other *printf() routines) is returned.
1000  *
1001  * Solaris doesn't have asprintf(3C) yet, unfortunately.
1002  */
1003 NOTE(PRINTFLIKE(2))
1004 int
1005 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1006 {
1007 	int len;
1008 	caddr_t buffer;
1009 	va_list ap;
1010 
1011 	va_start(ap, fmt);
1012 	len = vsnprintf(NULL, 0, fmt, ap);
1013 	va_end(ap);
1014 
1015 	buffer = malloc((len + 1) * sizeof (char));
1016 	if (buffer == NULL) {
1017 		errexit("Out of memory in asprintf\n");
1018 		/* NOTREACHED */
1019 	}
1020 
1021 	va_start(ap, fmt);
1022 	(void) vsnprintf(buffer, len + 1, fmt, ap);
1023 	va_end(ap);
1024 
1025 	*ret = buffer;
1026 	return (len);
1027 }
1028 
1029 /*
1030  * So we can take advantage of kernel routines in ufs_subr.c.
1031  */
1032 /* PRINTFLIKE2 */
1033 void
1034 cmn_err(int level, caddr_t fmt, ...)
1035 {
1036 	va_list ap;
1037 
1038 	va_start(ap, fmt);
1039 	if (level == CE_PANIC) {
1040 		(void) printf("INTERNAL INCONSISTENCY:");
1041 		verrexit(fmt, ap);
1042 	} else {
1043 		(void) vprintf(fmt, ap);
1044 	}
1045 	va_end(ap);
1046 }
1047 
1048 /*
1049  * Check to see if unraw version of name is already mounted.
1050  * Updates devstr with the device name if devstr is not NULL
1051  * and str_size is positive.
1052  */
1053 int
1054 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1055 {
1056 	int found;
1057 	struct mnttab *mntent;
1058 
1059 	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1060 	if (mntent == NULL)
1061 		return (M_NOMNT);
1062 
1063 	/*
1064 	 * It's mounted.  With or without write access?
1065 	 */
1066 	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1067 		found = M_RO;	/* mounted as RO */
1068 	else
1069 		found = M_RW; 	/* mounted as R/W */
1070 
1071 	if (mount_point == NULL) {
1072 		mount_point = strdup(mntent->mnt_mountp);
1073 		if (mount_point == NULL) {
1074 			errexit("fsck: memory allocation failure: %s",
1075 				strerror(errno));
1076 			/* NOTREACHED */
1077 		}
1078 
1079 		if (devstr != NULL && str_size > 0)
1080 			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1081 	}
1082 
1083 	return (found);
1084 }
1085 
1086 /*
1087  * Check to see if name corresponds to an entry in vfstab, and that the entry
1088  * does not have option ro.
1089  */
1090 int
1091 writable(caddr_t name)
1092 {
1093 	int rw = 1;
1094 	struct vfstab vfsbuf, vfskey;
1095 	FILE *vfstab;
1096 
1097 	vfstab = fopen(VFSTAB, "r");
1098 	if (vfstab == NULL) {
1099 		(void) printf("can't open %s\n", VFSTAB);
1100 		return (1);
1101 	}
1102 	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1103 	vfsnull(&vfskey);
1104 	vfskey.vfs_special = unrawname(name);
1105 	vfskey.vfs_fstype = MNTTYPE_UFS;
1106 	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1107 	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1108 		rw = 0;
1109 	}
1110 	(void) fclose(vfstab);
1111 	return (rw);
1112 }
1113 
1114 /*
1115  * debugclean
1116  */
1117 static void
1118 debugclean(void)
1119 {
1120 	if (!debug)
1121 		return;
1122 
1123 	if ((iscorrupt == 0) && (isdirty == 0))
1124 		return;
1125 
1126 	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1127 	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1128 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1129 		return;
1130 
1131 	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1132 	    sblock.fs_clean == FSSTABLE ? "stable" :
1133 	    sblock.fs_clean == FSLOG ? "logging" :
1134 	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1135 	    devname);
1136 }
1137 
1138 /*
1139  * updateclean
1140  *	Carefully and transparently update the clean flag.
1141  *
1142  * `iscorrupt' has to be in its final state before this is called.
1143  */
1144 int
1145 updateclean(void)
1146 {
1147 	int freedlog = 0;
1148 	struct bufarea cleanbuf;
1149 	size_t size;
1150 	ssize_t io_res;
1151 	diskaddr_t bno;
1152 	char fsclean;
1153 	int fsreclaim;
1154 	char fsflags;
1155 	int flags_ok;
1156 	daddr32_t fslogbno;
1157 	offset_t sblkoff;
1158 	time_t t;
1159 
1160 	/*
1161 	 * debug stuff
1162 	 */
1163 	debugclean();
1164 
1165 	/*
1166 	 * set fsclean to its appropriate value
1167 	 */
1168 	fslogbno = sblock.fs_logbno;
1169 	fsclean = sblock.fs_clean;
1170 	fsreclaim = sblock.fs_reclaim;
1171 	fsflags = sblock.fs_flags;
1172 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1173 		fsclean = FSACTIVE;
1174 	}
1175 	/*
1176 	 * If ufs log is not okay, note that we need to clear it.
1177 	 */
1178 	examinelog(sblock.fs_logbno, NULL);
1179 	if (fslogbno && !(islog && islogok)) {
1180 		fsclean = FSACTIVE;
1181 		fslogbno = 0;
1182 	}
1183 
1184 	/*
1185 	 * if necessary, update fs_clean and fs_state
1186 	 */
1187 	switch (fsclean) {
1188 
1189 	case FSACTIVE:
1190 		if (!iscorrupt) {
1191 			fsclean = FSSTABLE;
1192 			fsreclaim = 0;
1193 		}
1194 		break;
1195 
1196 	case FSCLEAN:
1197 	case FSSTABLE:
1198 		if (iscorrupt) {
1199 			fsclean = FSACTIVE;
1200 		} else {
1201 			fsreclaim = 0;
1202 		}
1203 		break;
1204 
1205 	case FSLOG:
1206 		if (iscorrupt) {
1207 			fsclean = FSACTIVE;
1208 		} else if (!islog || fslogbno == 0) {
1209 			fsclean = FSSTABLE;
1210 			fsreclaim = 0;
1211 		} else if (fflag) {
1212 			fsreclaim = 0;
1213 		}
1214 		break;
1215 
1216 	case FSFIX:
1217 		fsclean = FSBAD;
1218 		if (errorlocked && !iscorrupt) {
1219 			fsclean = islog ? FSLOG : FSCLEAN;
1220 		}
1221 		break;
1222 
1223 	default:
1224 		if (iscorrupt) {
1225 			fsclean = FSACTIVE;
1226 		} else {
1227 			fsclean = FSSTABLE;
1228 			fsreclaim = 0;
1229 		}
1230 	}
1231 
1232 	if (largefile_count > 0)
1233 		fsflags |= FSLARGEFILES;
1234 	else
1235 		fsflags &= ~FSLARGEFILES;
1236 
1237 	/*
1238 	 * If the only flag difference is that the superblock thinks
1239 	 * there are largefiles, but we didn't find any, then ignore
1240 	 * the discrepancy.  The kernel never clears the flag, it just
1241 	 * sets it whenever a largefile is created.  Since it is harmless
1242 	 * to have the flag set when it's not actually true, that by
1243 	 * itself is not grounds for declaring the superblock to be
1244 	 * in the wrong state.
1245 	 *
1246 	 * This could, in theory, prevent a filesystem from being
1247 	 * mounted, if the existing superblock claims such files are
1248 	 * out there and the user uses the nolargefiles option.  So,
1249 	 * if we were forced to scan the filesystem, go ahead and
1250 	 * take FSLARGEFILES into account as well.
1251 	 */
1252 	if (fflag)
1253 		flags_ok = 0;
1254 	else
1255 		flags_ok = (sblock.fs_flags & ~FSLARGEFILES) == fsflags;
1256 
1257 	if (debug)
1258 		(void) printf(
1259 		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1260 		    largefile_count, sblock.fs_flags, flags_ok);
1261 
1262 	/*
1263 	 * If fs is unchanged, do nothing.
1264 	 */
1265 	if ((!isdirty) && (flags_ok) &&
1266 	    (fslogbno == sblock.fs_logbno) &&
1267 	    (sblock.fs_clean == fsclean) &&
1268 	    (sblock.fs_reclaim == fsreclaim) &&
1269 	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1270 		if (errorlocked) {
1271 			if (!do_errorlock(LOCKFS_ULOCK))
1272 				pwarn(
1273 		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1274 		}
1275 		return (freedlog);
1276 	}
1277 
1278 	/*
1279 	 * if user allows, update superblock state
1280 	 */
1281 	if (debug) {
1282 		(void) printf(
1283 	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1284 		    sblock.fs_flags, sblock.fs_logbno,
1285 		    sblock.fs_clean, sblock.fs_reclaim,
1286 		    sblock.fs_state + sblock.fs_time);
1287 		(void) printf(
1288 	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1289 		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1290 	}
1291 	if (!isdirty && !preen && !rerun &&
1292 	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1293 		return (freedlog);
1294 
1295 	(void) time(&t);
1296 	sblock.fs_time = (time32_t)t;
1297 	if (debug)
1298 		printclean();
1299 
1300 	if (sblock.fs_logbno != fslogbno) {
1301 		examinelog(sblock.fs_logbno, &freelogblk);
1302 		freedlog++;
1303 	}
1304 
1305 	sblock.fs_logbno = fslogbno;
1306 	sblock.fs_clean = fsclean;
1307 	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1308 	sblock.fs_reclaim = fsreclaim;
1309 	sblock.fs_flags = fsflags;
1310 
1311 	/*
1312 	 * if superblock can't be written, return
1313 	 */
1314 	if (fswritefd < 0)
1315 		return (freedlog);
1316 
1317 	/*
1318 	 * Read private copy of superblock, update clean flag, and write it.
1319 	 */
1320 	bno  = sblk.b_bno;
1321 	size = sblk.b_size;
1322 
1323 	sblkoff = ldbtob(bno);
1324 
1325 	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1326 		errexit("out of memory");
1327 	if (llseek(fsreadfd, sblkoff, 0) == -1) {
1328 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1329 		    (longlong_t)bno, strerror(errno));
1330 		goto out;
1331 	}
1332 
1333 	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1334 		report_io_prob("READ FROM", bno, size, io_res);
1335 		goto out;
1336 	}
1337 
1338 	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1339 	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1340 	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1341 	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1342 	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1343 	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1344 
1345 	if (llseek(fswritefd, sblkoff, 0) == -1) {
1346 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1347 		    (longlong_t)bno, strerror(errno));
1348 		goto out;
1349 	}
1350 
1351 	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1352 		report_io_prob("WRITE TO", bno, size, io_res);
1353 		goto out;
1354 	}
1355 
1356 	/*
1357 	 * 1208040
1358 	 * If we had to use -b to grab an alternate superblock, then we
1359 	 * likely had to do so because of unacceptable differences between
1360 	 * the main and alternate superblocks.  So, we had better update
1361 	 * the alternate superblock as well, or we'll just fail again
1362 	 * the next time we attempt to run fsck!
1363 	 */
1364 	if (bflag != 0) {
1365 		write_altsb(fswritefd);
1366 	}
1367 
1368 	if (errorlocked) {
1369 		if (!do_errorlock(LOCKFS_ULOCK))
1370 			pwarn(
1371 		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1372 	}
1373 
1374 out:
1375 	if (cleanbuf.b_un.b_buf != NULL) {
1376 		free((void *)cleanbuf.b_un.b_buf);
1377 	}
1378 
1379 	return (freedlog);
1380 }
1381 
1382 static void
1383 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1384 {
1385 	if (failure < 0)
1386 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1387 		    what, (int)bno, strerror(errno));
1388 	else if (failure == 0)
1389 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1390 		    what, (int)bno);
1391 	else
1392 		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1393 		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1394 }
1395 
1396 /*
1397  * print out clean info
1398  */
1399 void
1400 printclean(void)
1401 {
1402 	caddr_t s;
1403 
1404 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1405 		s = "unknown";
1406 	else
1407 		switch (sblock.fs_clean) {
1408 
1409 		case FSACTIVE:
1410 			s = "active";
1411 			break;
1412 
1413 		case FSCLEAN:
1414 			s = "clean";
1415 			break;
1416 
1417 		case FSSTABLE:
1418 			s = "stable";
1419 			break;
1420 
1421 		case FSLOG:
1422 			s = "logging";
1423 			break;
1424 
1425 		case FSBAD:
1426 			s = "is bad";
1427 			break;
1428 
1429 		case FSFIX:
1430 			s = "being fixed";
1431 			break;
1432 
1433 		default:
1434 			s = "unknown";
1435 		}
1436 
1437 	if (preen)
1438 		pwarn("is %s.\n", s);
1439 	else
1440 		(void) printf("** %s is %s.\n", devname, s);
1441 }
1442 
1443 int
1444 is_errorlocked(caddr_t fs)
1445 {
1446 	int		retval;
1447 	struct stat64	statb;
1448 	caddr_t		mountp;
1449 	struct mnttab	*mntent;
1450 
1451 	retval = 0;
1452 
1453 	if (!fs)
1454 		return (0);
1455 
1456 	if (stat64(fs, &statb) < 0)
1457 		return (0);
1458 
1459 	if (S_ISDIR(statb.st_mode)) {
1460 		mountp = fs;
1461 	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1462 		mntent = search_mnttab(NULL, fs, NULL, 0);
1463 		if (mntent == NULL)
1464 			return (0);
1465 		mountp = mntent->mnt_mountp;
1466 		if (mountp == NULL) /* theoretically a can't-happen */
1467 			return (0);
1468 	} else {
1469 		return (0);
1470 	}
1471 
1472 	/*
1473 	 * From here on, must `goto out' to avoid memory leakage.
1474 	 */
1475 
1476 	if (elock_combuf == NULL)
1477 		elock_combuf =
1478 			(caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1479 	else
1480 		elock_combuf =
1481 			(caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1482 
1483 	if (elock_combuf == NULL)
1484 		goto out;
1485 
1486 	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1487 
1488 	if (elock_mountp != NULL) {
1489 		free(elock_mountp);
1490 	}
1491 
1492 	elock_mountp = strdup(mountp);
1493 	if (elock_mountp == NULL)
1494 		goto out;
1495 
1496 	if (mountfd < 0) {
1497 		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1498 			goto out;
1499 	}
1500 
1501 	if (lfp == NULL) {
1502 		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1503 		if (lfp == NULL)
1504 			goto out;
1505 		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1506 	}
1507 
1508 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1509 	lfp->lf_comment = elock_combuf;
1510 
1511 	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1512 		goto out;
1513 
1514 	/*
1515 	 * lint believes that the ioctl() (or any other function
1516 	 * taking lfp as an arg) could free lfp.  This is not the
1517 	 * case, however.
1518 	 */
1519 	retval = LOCKFS_IS_ELOCK(lfp);
1520 
1521 out:
1522 	return (retval);
1523 }
1524 
1525 /*
1526  * Given a name which is known to be a directory, see if it appears
1527  * in the vfstab.  If so, return the entry's block (special) device
1528  * field via devstr.
1529  */
1530 int
1531 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1532 {
1533 	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1534 }
1535 
1536 /*
1537  * Given a name which is known to be a directory, see if it appears
1538  * in the mnttab.  If so, return the entry's block (special) device
1539  * field via devstr.
1540  */
1541 int
1542 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1543 {
1544 	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1545 }
1546 
1547 /*
1548  * Search for mount point and/or special device in the given file.
1549  * The first matching entry is returned.
1550  *
1551  * If an entry is found and str_size is greater than zero, then
1552  * up to size_str bytes of the special device name from the entry
1553  * are copied to devstr.
1554  */
1555 
1556 #define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1557 			st_nuller, st_init, st_searcher) \
1558 	{ \
1559 		FILE *fp; \
1560 		struct st_type *retval = NULL; \
1561 		struct st_type key; \
1562 		static struct st_type buffer; \
1563 		\
1564 		/* LINTED ``assigned value never used'' */ \
1565 		st_nuller(&key); \
1566 		key.st_mount = mountp; \
1567 		key.st_special = special; \
1568 		st_init; \
1569 		\
1570 		if ((fp = fopen(st_file, "r")) == NULL) \
1571 			return (NULL); \
1572 		\
1573 		if (st_searcher(fp, &buffer, &key) == 0) { \
1574 			retval = &buffer; \
1575 			if (devstr != NULL && str_size > 0 && \
1576 			    buffer.st_special != NULL) { \
1577 				(void) strlcpy(devstr, buffer.st_special, \
1578 				    str_size); \
1579 			} \
1580 		} \
1581 		(void) fclose(fp); \
1582 		return (retval); \
1583 	}
1584 
1585 static struct vfstab *
1586 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1587 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1588 		(retval = retval), getvfsany)
1589 
1590 static struct mnttab *
1591 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1592 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1593 		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1594 
1595 int
1596 do_errorlock(int lock_type)
1597 {
1598 	caddr_t	   buf;
1599 	time_t	   now;
1600 	struct tm *local;
1601 	int	   rc;
1602 
1603 	if (elock_combuf == NULL)
1604 		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1605 			elock_mountp ? elock_mountp : "<null>",
1606 			lock_type);
1607 
1608 	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1609 	    NULL) {
1610 		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1611 	}
1612 	if (lfp == NULL) {
1613 		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1614 					elock_mountp, lock_type);
1615 	}
1616 
1617 	(void) memmove((void *)buf, (void *)elock_combuf,
1618 	    LOCKFS_MAXCOMMENTLEN-1);
1619 
1620 	switch (lock_type) {
1621 	case LOCKFS_ELOCK:
1622 		/*
1623 		 * Note that if it is error-locked, we won't get an
1624 		 * error back if we try to error-lock it again.
1625 		 */
1626 		if (time(&now) != (time_t)-1) {
1627 			if ((local = localtime(&now)) != NULL)
1628 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1629 		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1630 				    elock_combuf, (int)pid,
1631 				    local->tm_mon + 1, local->tm_mday,
1632 				    (local->tm_year % 100), local->tm_hour,
1633 				    local->tm_min, local->tm_sec);
1634 			else
1635 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1636 				    "%s [fsck pid %d", elock_combuf, pid);
1637 
1638 		} else {
1639 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1640 			    "%s [fsck pid %d", elock_combuf, pid);
1641 		}
1642 		break;
1643 
1644 	case LOCKFS_ULOCK:
1645 		if (time(&now) != (time_t)-1) {
1646 			if ((local = localtime(&now)) != NULL) {
1647 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1648 				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1649 				    elock_combuf,
1650 				    local->tm_mon + 1, local->tm_mday,
1651 				    (local->tm_year % 100), local->tm_hour,
1652 				    local->tm_min, local->tm_sec);
1653 			} else {
1654 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1655 				    "%s]", elock_combuf);
1656 			}
1657 		} else {
1658 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1659 			    "%s]", elock_combuf);
1660 		}
1661 		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1662 			pwarn("do_errorlock: unlock failed: %s\n",
1663 			    strerror(errno));
1664 			goto out;
1665 		}
1666 		break;
1667 
1668 	default:
1669 		break;
1670 	}
1671 
1672 	(void) memmove((void *)elock_combuf, (void *)buf,
1673 	    LOCKFS_MAXCOMMENTLEN - 1);
1674 
1675 	lfp->lf_lock = lock_type;
1676 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1677 	lfp->lf_comment = elock_combuf;
1678 	lfp->lf_flags = 0;
1679 	errno = 0;
1680 
1681 	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1682 		if (errno == EINVAL) {
1683 			pwarn("Another fsck active?\n");
1684 			iscorrupt = 0;	/* don't go away mad, just go away */
1685 		} else {
1686 			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1687 			    lock_type, elock_combuf, strerror(errno));
1688 		}
1689 	}
1690 out:
1691 	if (buf != NULL) {
1692 		free((void *)buf);
1693 	}
1694 
1695 	return (rc != -1);
1696 }
1697 
1698 /*
1699  * Shadow inode support.  To register a shadow with a client is to note
1700  * that an inode (the client) refers to the shadow.
1701  */
1702 
1703 static struct shadowclients *
1704 newshadowclient(struct shadowclients *prev)
1705 {
1706 	struct shadowclients *rc;
1707 
1708 	rc = (struct shadowclients *)malloc(sizeof (*rc));
1709 	if (rc == NULL)
1710 		errexit("newshadowclient: cannot malloc shadow client");
1711 	rc->next = prev;
1712 	rc->nclients = 0;
1713 
1714 	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1715 	    maxshadowclients);
1716 	if (rc->client == NULL)
1717 		errexit("newshadowclient: cannot malloc client array");
1718 	return (rc);
1719 }
1720 
1721 void
1722 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1723 	struct shadowclientinfo **info)
1724 {
1725 	struct shadowclientinfo *sci;
1726 	struct shadowclients *scc;
1727 
1728 	/*
1729 	 * Already have a record for this shadow?
1730 	 */
1731 	for (sci = *info; sci != NULL; sci = sci->next)
1732 		if (sci->shadow == shadow)
1733 			break;
1734 	if (sci == NULL) {
1735 		/*
1736 		 * It's a new shadow, add it to the list
1737 		 */
1738 		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1739 		if (sci == NULL)
1740 			errexit("registershadowclient: cannot malloc");
1741 		sci->next = *info;
1742 		*info = sci;
1743 		sci->shadow = shadow;
1744 		sci->totalClients = 0;
1745 		sci->clients = newshadowclient(NULL);
1746 	}
1747 
1748 	sci->totalClients++;
1749 	scc = sci->clients;
1750 	if (scc->nclients >= maxshadowclients) {
1751 		scc = newshadowclient(sci->clients);
1752 		sci->clients = scc;
1753 	}
1754 
1755 	scc->client[scc->nclients++] = client;
1756 }
1757 
1758 /*
1759  * Locate and discard a shadow.
1760  */
1761 void
1762 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1763 {
1764 	struct shadowclientinfo *sci, *prev;
1765 
1766 	/*
1767 	 * Do we have a record for this shadow?
1768 	 */
1769 	prev = NULL;
1770 	for (sci = *info; sci != NULL; sci = sci->next) {
1771 		if (sci->shadow == shadow)
1772 			break;
1773 		prev = sci;
1774 	}
1775 
1776 	if (sci != NULL) {
1777 		/*
1778 		 * First, pull it off the list, since we know there
1779 		 * shouldn't be any future references to this one.
1780 		 */
1781 		if (prev == NULL)
1782 			*info = sci->next;
1783 		else
1784 			prev->next = sci->next;
1785 		deshadow(sci, clearattrref);
1786 	}
1787 }
1788 
1789 /*
1790  * Discard all memory used to track clients of a shadow.
1791  */
1792 void
1793 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1794 {
1795 	struct shadowclients *clients, *discard;
1796 	int idx;
1797 
1798 	clients = sci->clients;
1799 	while (clients != NULL) {
1800 		discard = clients;
1801 		clients = clients->next;
1802 		if (discard->client != NULL) {
1803 			if (cb != NULL) {
1804 				for (idx = 0; idx < discard->nclients; idx++)
1805 					(*cb)(discard->client[idx]);
1806 			}
1807 			free((void *)discard->client);
1808 		}
1809 		free((void *)discard);
1810 	}
1811 
1812 	free((void *)sci);
1813 }
1814 
1815 /*
1816  * Allocate more buffer as need arises but allocate one at a time.
1817  * This is done to make sure that fsck does not exit with error if it
1818  * needs more buffer to complete its task.
1819  */
1820 static struct bufarea *
1821 alloc_bufarea(void)
1822 {
1823 	struct bufarea *newbp;
1824 	caddr_t bufp;
1825 
1826 	bufp = malloc((unsigned int)sblock.fs_bsize);
1827 	if (bufp == NULL)
1828 		return (NULL);
1829 
1830 	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1831 	if (newbp == NULL) {
1832 		free((void *)bufp);
1833 		return (NULL);
1834 	}
1835 
1836 	initbarea(newbp);
1837 	newbp->b_un.b_buf = bufp;
1838 	newbp->b_prev = &bufhead;
1839 	newbp->b_next = bufhead.b_next;
1840 	bufhead.b_next->b_prev = newbp;
1841 	bufhead.b_next = newbp;
1842 	bufhead.b_size++;
1843 	return (newbp);
1844 }
1845 
1846 /*
1847  * We length-limit in both unrawname() and rawname() to avoid
1848  * overflowing our arrays or those of our naive, trusting callers.
1849  */
1850 
1851 caddr_t
1852 unrawname(caddr_t name)
1853 {
1854 	caddr_t dp;
1855 	static char fullname[MAXPATHLEN + 1];
1856 
1857 	if ((dp = getfullblkname(name)) == NULL)
1858 		return ("");
1859 
1860 	(void) strlcpy(fullname, dp, sizeof (fullname));
1861 	/*
1862 	 * Not reporting under debug, as the allocation isn't
1863 	 * reported by getfullblkname.  The idea is that we
1864 	 * produce balanced alloc/free instances.
1865 	 */
1866 	free(dp);
1867 
1868 	return (fullname);
1869 }
1870 
1871 caddr_t
1872 rawname(caddr_t name)
1873 {
1874 	caddr_t dp;
1875 	static char fullname[MAXPATHLEN + 1];
1876 
1877 	if ((dp = getfullrawname(name)) == NULL)
1878 		return ("");
1879 
1880 	(void) strlcpy(fullname, dp, sizeof (fullname));
1881 	/*
1882 	 * Not reporting under debug, as the allocation isn't
1883 	 * reported by getfullblkname.  The idea is that we
1884 	 * produce balanced alloc/free instances.
1885 	 */
1886 	free(dp);
1887 
1888 	return (fullname);
1889 }
1890 
1891 /*
1892  * Make sure that a cg header looks at least moderately reasonable.
1893  * We want to be able to trust the contents enough to be able to use
1894  * the standard accessor macros.  So, besides looking at the obvious
1895  * such as the magic number, we verify that the offset field values
1896  * are properly aligned and not too big or small.
1897  *
1898  * Returns a NULL pointer if the cg is sane enough for our needs, else
1899  * a dynamically-allocated string describing all of its faults.
1900  */
1901 #define	Append_Error(full, full_len, addition, addition_len) \
1902 	if (full == NULL) { \
1903 		full = addition; \
1904 		full_len = addition_len; \
1905 	} else { \
1906 		/* lint doesn't think realloc() understands NULLs */ \
1907 		full = realloc(full, full_len + addition_len + 1); \
1908 		if (full == NULL) { \
1909 			errexit("Out of memory in cg_sanity"); \
1910 			/* NOTREACHED */ \
1911 		} \
1912 		(void) strcpy(full + full_len, addition); \
1913 		full_len += addition_len; \
1914 		free(addition); \
1915 	}
1916 
1917 caddr_t
1918 cg_sanity(struct cg *cgp, int cgno, int *is_fatal)
1919 {
1920 	caddr_t full_err;
1921 	caddr_t this_err = NULL;
1922 	int full_len, this_len;
1923 	daddr32_t ndblk;
1924 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1925 	daddr32_t exp_freeoff, exp_nextfreeoff;
1926 
1927 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1928 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1929 
1930 	full_err = NULL;
1931 	full_len = 0;
1932 	*is_fatal = 0;
1933 
1934 	if (!cg_chkmagic(cgp)) {
1935 		this_len = fsck_asprintf(&this_err,
1936 		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1937 		    cgp->cg_magic, CG_MAGIC);
1938 		Append_Error(full_err, full_len, this_err, this_len);
1939 		*is_fatal = 1;
1940 	}
1941 
1942 	if (cgp->cg_cgx != cgno) {
1943 		this_len = fsck_asprintf(&this_err,
1944 		    "WRONG CG NUMBER (%d should be %d)\n",
1945 		    cgp->cg_cgx, cgno);
1946 		Append_Error(full_err, full_len, this_err, this_len);
1947 	}
1948 
1949 	if ((cgp->cg_btotoff & 3) != 0) {
1950 		this_len = fsck_asprintf(&this_err,
1951 		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1952 		    cgp->cg_btotoff);
1953 		Append_Error(full_err, full_len, this_err, this_len);
1954 	}
1955 
1956 	if ((cgp->cg_boff & 1) != 0) {
1957 		this_len = fsck_asprintf(&this_err,
1958 	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1959 		    cgp->cg_boff);
1960 		Append_Error(full_err, full_len, this_err, this_len);
1961 	}
1962 
1963 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1964 		if (cgp->cg_ncyl < 1) {
1965 			this_len = fsck_asprintf(&this_err,
1966 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1967 			    cgp->cg_ncyl);
1968 		} else {
1969 			this_len = fsck_asprintf(&this_err,
1970 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1971 			    cgp->cg_ncyl, sblock.fs_cpg);
1972 		}
1973 		Append_Error(full_err, full_len, this_err, this_len);
1974 	}
1975 
1976 	if (cgp->cg_niblk != sblock.fs_ipg) {
1977 		this_len = fsck_asprintf(&this_err,
1978 		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1979 		    cgp->cg_niblk, sblock.fs_ipg);
1980 		Append_Error(full_err, full_len, this_err, this_len);
1981 	}
1982 
1983 	if (cgp->cg_ndblk != ndblk) {
1984 		this_len = fsck_asprintf(&this_err,
1985 	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1986 		    cgp->cg_ndblk, ndblk);
1987 		Append_Error(full_err, full_len, this_err, this_len);
1988 	}
1989 
1990 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1991 		this_len = fsck_asprintf(&this_err,
1992 		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1993 		    "(%d should be at least 0 and less than %d)\n",
1994 		    cgp->cg_rotor, ndblk);
1995 		Append_Error(full_err, full_len, this_err, this_len);
1996 	}
1997 
1998 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1999 		this_len = fsck_asprintf(&this_err,
2000 		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
2001 		    "(%d should be at least 0 and less than %d)\n",
2002 		    cgp->cg_frotor, ndblk);
2003 		Append_Error(full_err, full_len, this_err, this_len);
2004 	}
2005 
2006 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2007 		this_len = fsck_asprintf(&this_err,
2008 		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
2009 		    "(%d should be at least 0 and less than %d)\n",
2010 		    cgp->cg_irotor, sblock.fs_ipg);
2011 		Append_Error(full_err, full_len, this_err, this_len);
2012 	}
2013 
2014 	if (cgp->cg_btotoff != exp_btotoff) {
2015 		this_len = fsck_asprintf(&this_err,
2016 		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2017 		    cgp->cg_btotoff, exp_btotoff);
2018 		Append_Error(full_err, full_len, this_err, this_len);
2019 	}
2020 
2021 	if (cgp->cg_boff != exp_boff) {
2022 		this_len = fsck_asprintf(&this_err,
2023 		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2024 		    cgp->cg_boff, exp_boff);
2025 		Append_Error(full_err, full_len, this_err, this_len);
2026 	}
2027 
2028 	if (cgp->cg_iusedoff != exp_iusedoff) {
2029 		this_len = fsck_asprintf(&this_err,
2030 		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2031 		    cgp->cg_iusedoff, exp_iusedoff);
2032 		Append_Error(full_err, full_len, this_err, this_len);
2033 	}
2034 
2035 	if (cgp->cg_freeoff != exp_freeoff) {
2036 		this_len = fsck_asprintf(&this_err,
2037 		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2038 		    cgp->cg_freeoff, exp_freeoff);
2039 		Append_Error(full_err, full_len, this_err, this_len);
2040 	}
2041 
2042 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2043 		this_len = fsck_asprintf(&this_err,
2044 		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2045 		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2046 		Append_Error(full_err, full_len, this_err, this_len);
2047 	}
2048 
2049 	return (full_err);
2050 }
2051 
2052 #undef	Append_Error
2053 
2054 /*
2055  * This is taken from mkfs, and is what is used to come up with the
2056  * original values for a struct cg.  This implies that, since these
2057  * are all constants, recalculating them now should give us the same
2058  * thing as what's on disk.
2059  */
2060 static void
2061 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2062 	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2063 	daddr32_t *ndblk)
2064 {
2065 	daddr32_t cbase, dmax;
2066 	struct cg *cgp;
2067 
2068 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2069 	    (size_t)sblock.fs_cgsize);
2070 	cgp = cgblk.b_un.b_cg;
2071 
2072 	cbase = cgbase(&sblock, cgno);
2073 	dmax = cbase + sblock.fs_fpg;
2074 	if (dmax > sblock.fs_size)
2075 		dmax = sblock.fs_size;
2076 
2077 	/* LINTED pointer difference won't overflow */
2078 	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2079 	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2080 	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2081 	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2082 	*nextfreeoff = *freeoff +
2083 		howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2084 	*ndblk = dmax - cbase;
2085 }
2086 
2087 /*
2088  * Corrects all fields in the cg that can be done with the available
2089  * redundant data.
2090  */
2091 void
2092 fix_cg(struct cg *cgp, int cgno)
2093 {
2094 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2095 	daddr32_t exp_freeoff, exp_nextfreeoff;
2096 	daddr32_t ndblk;
2097 
2098 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2099 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2100 
2101 	if (cgp->cg_cgx != cgno) {
2102 		cgp->cg_cgx = cgno;
2103 	}
2104 
2105 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2106 		if (cgno == sblock.fs_ncg) {
2107 			cgp->cg_ncyl = sblock.fs_ncyl -
2108 				(sblock.fs_ncg * (cgno - 1));
2109 		} else {
2110 			cgp->cg_ncyl = sblock.fs_cpg;
2111 		}
2112 	}
2113 
2114 	if (cgp->cg_niblk != sblock.fs_ipg) {
2115 		/*
2116 		 * This is not used by the kernel, so it's pretty
2117 		 * harmless if it's wrong.
2118 		 */
2119 		cgp->cg_niblk = sblock.fs_ipg;
2120 	}
2121 
2122 	if (cgp->cg_ndblk != ndblk) {
2123 		cgp->cg_ndblk = ndblk;
2124 	}
2125 
2126 	/*
2127 	 * For the rotors, any position's valid, so pick the one we know
2128 	 * will always exist.
2129 	 */
2130 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2131 		cgp->cg_rotor = 0;
2132 	}
2133 
2134 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2135 		cgp->cg_frotor = 0;
2136 	}
2137 
2138 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2139 		cgp->cg_irotor = 0;
2140 	}
2141 
2142 	/*
2143 	 * For btotoff and boff, if they're misaligned they won't
2144 	 * match the expected values, so we're catching both cases
2145 	 * here.  Of course, if any of these are off, it seems likely
2146 	 * that the tables really won't be where we calculate they
2147 	 * should be anyway.
2148 	 */
2149 	if (cgp->cg_btotoff != exp_btotoff) {
2150 		cgp->cg_btotoff = exp_btotoff;
2151 	}
2152 
2153 	if (cgp->cg_boff != exp_boff) {
2154 		cgp->cg_boff = exp_boff;
2155 	}
2156 
2157 	if (cgp->cg_iusedoff != exp_iusedoff) {
2158 		cgp->cg_iusedoff = exp_iusedoff;
2159 	}
2160 
2161 	if (cgp->cg_freeoff != exp_freeoff) {
2162 		cgp->cg_freeoff = exp_freeoff;
2163 	}
2164 
2165 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2166 		cgp->cg_nextfreeoff = exp_nextfreeoff;
2167 	}
2168 
2169 	/*
2170 	 * We know there was at least one correctable problem,
2171 	 * or else we wouldn't have been called.  So instead of
2172 	 * marking the buffer dirty N times above, just do it
2173 	 * once here.
2174 	 */
2175 	cgdirty();
2176 }
2177 
2178 void
2179 examinelog(daddr32_t start, void (*cb)(daddr32_t))
2180 {
2181 	struct bufarea *bp;
2182 	extent_block_t *ebp;
2183 	extent_t *ep;
2184 	daddr32_t nfno, fno;
2185 	int i;
2186 	int j;
2187 
2188 	if (start < SBLOCK)
2189 		return;
2190 
2191 	/*
2192 	 * Read errors will return zeros, which will cause us
2193 	 * to do nothing harmful, so don't need to handle it.
2194 	 */
2195 	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2196 			(size_t)sblock.fs_bsize);
2197 	ebp = (void *)bp->b_un.b_buf;
2198 
2199 	/*
2200 	 * Does it look like a log allocation table?
2201 	 */
2202 	/* LINTED pointer cast is aligned */
2203 	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2204 	    sblock.fs_bsize))
2205 		return;
2206 	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2207 		return;
2208 
2209 	ep = &ebp->extents[0];
2210 	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2211 		fno = logbtofrag(&sblock, ep->pbno);
2212 		nfno = dbtofsb(&sblock, ep->nbno);
2213 		for (j = 0; j < nfno; ++j, ++fno) {
2214 			/*
2215 			 * Invoke the callback first, so that pass1 can
2216 			 * mark the log blocks in-use.  Then, if any
2217 			 * subsequent pass over the log shows us that a
2218 			 * block got freed (say, it was also claimed by
2219 			 * an inode that we cleared), we can safely declare
2220 			 * the log bad.
2221 			 */
2222 			if (cb != NULL)
2223 				(*cb)(fno);
2224 			if (!testbmap(fno))
2225 				islogok = 0;
2226 		}
2227 	}
2228 	brelse(bp);
2229 
2230 	if (cb != NULL) {
2231 		fno = logbtofrag(&sblock, sblock.fs_logbno);
2232 		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2233 			(*cb)(fno);
2234 	}
2235 }
2236 
2237 static void
2238 freelogblk(daddr32_t frag)
2239 {
2240 	freeblk(sblock.fs_logbno, frag, 1);
2241 }
2242 
2243 caddr_t
2244 file_id(fsck_ino_t inum, mode_t mode)
2245 {
2246 	static char name[MAXPATHLEN + 1];
2247 
2248 	if (lfdir == inum) {
2249 		return (lfname);
2250 	}
2251 
2252 	if ((mode & IFMT) == IFDIR) {
2253 		(void) strcpy(name, "DIR");
2254 	} else if ((mode & IFMT) == IFATTRDIR) {
2255 		(void) strcpy(name, "ATTR DIR");
2256 	} else if ((mode & IFMT) == IFSHAD) {
2257 		(void) strcpy(name, "ACL");
2258 	} else {
2259 		(void) strcpy(name, "FILE");
2260 	}
2261 
2262 	return (name);
2263 }
2264 
2265 /*
2266  * Simple initializer for inodesc structures, so users of only a few
2267  * fields don't have to worry about getting the right defaults for
2268  * everything out.
2269  */
2270 void
2271 init_inodesc(struct inodesc *idesc)
2272 {
2273 	/*
2274 	 * Most fields should be zero, just hit the special cases.
2275 	 */
2276 	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2277 	idesc->id_fix = DONTKNOW;
2278 	idesc->id_lbn = -1;
2279 	idesc->id_truncto = -1;
2280 	idesc->id_firsthole = -1;
2281 }
2282 
2283 /*
2284  * Compare routine for tsearch(C) to use on ino_t instances.
2285  */
2286 int
2287 ino_t_cmp(const void *left, const void *right)
2288 {
2289 	const fsck_ino_t lino = (const fsck_ino_t)left;
2290 	const fsck_ino_t rino = (const fsck_ino_t)right;
2291 
2292 	return (lino - rino);
2293 }
2294 
2295 int
2296 cgisdirty(void)
2297 {
2298 	return (cgblk.b_dirty);
2299 }
2300 
2301 void
2302 cgflush(void)
2303 {
2304 	flush(fswritefd, &cgblk);
2305 }
2306 
2307 void
2308 dirty(struct bufarea *bp)
2309 {
2310 	if (fswritefd < 0) {
2311 		pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n");
2312 	} else {
2313 		(bp)->b_dirty = 1;
2314 		isdirty = 1;
2315 	}
2316 }
2317 
2318 void
2319 initbarea(struct bufarea *bp)
2320 {
2321 	(bp)->b_dirty = 0;
2322 	(bp)->b_bno = (diskaddr_t)-1LL;
2323 	(bp)->b_flags = 0;
2324 	(bp)->b_cnt = 0;
2325 	(bp)->b_errs = 0;
2326 }
2327 
2328 /*
2329  * Partition-sizing routines adapted from ../newfs/newfs.c.
2330  * Needed because calcsb() needs to use mkfs to work out what the
2331  * superblock should be, and mkfs insists on being told how many
2332  * sectors to use.
2333  *
2334  * Error handling assumes we're never called while preening.
2335  *
2336  * XXX This should be extracted into a ../ufslib.{c,h},
2337  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2338  *     done, both fsck and newfs should be modified to link
2339  *     against it.
2340  */
2341 
2342 static int label_type;
2343 
2344 #define	LABEL_TYPE_VTOC		1
2345 #define	LABEL_TYPE_EFI		2
2346 #define	LABEL_TYPE_OTHER	3
2347 
2348 #define	MB			(1024 * 1024)
2349 #define	SECTORS_PER_TERABYTE	(1LL << 31)
2350 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2351 
2352 diskaddr_t
2353 getdisksize(caddr_t disk, int fd)
2354 {
2355 	int rpm;
2356 	struct dk_geom g;
2357 	struct dk_cinfo ci;
2358 	diskaddr_t actual_size;
2359 
2360 	/*
2361 	 * get_device_size() determines the actual size of the
2362 	 * device, and also the disk's attributes, such as geometry.
2363 	 */
2364 	actual_size = get_device_size(fd, disk);
2365 
2366 	if (label_type == LABEL_TYPE_VTOC) {
2367 		if (ioctl(fd, DKIOCGGEOM, &g)) {
2368 			pwarn("%s: Unable to read Disk geometry", disk);
2369 			return (0);
2370 		}
2371 		if (sblock.fs_nsect == 0)
2372 			sblock.fs_nsect = g.dkg_nsect;
2373 		if (sblock.fs_ntrak == 0)
2374 			sblock.fs_ntrak = g.dkg_nhead;
2375 		if (sblock.fs_rps == 0) {
2376 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2377 			sblock.fs_rps = rpm / 60;
2378 		}
2379 	}
2380 
2381 	if (sblock.fs_bsize == 0)
2382 		sblock.fs_bsize = MAXBSIZE;
2383 
2384 	/*
2385 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2386 	 * information is not available, default to the min of a MB and
2387 	 * maxphys.
2388 	 */
2389 	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2390 		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2391 		if (sblock.fs_maxcontig < 0) {
2392 			int gotit, maxphys;
2393 
2394 			gotit = fsgetmaxphys(&maxphys, NULL);
2395 
2396 			/*
2397 			 * If we cannot get the maxphys value, default
2398 			 * to ufs_maxmaxphys (MB).
2399 			 */
2400 			if (gotit) {
2401 				sblock.fs_maxcontig = MIN(maxphys, MB);
2402 			} else {
2403 				sblock.fs_maxcontig = MB;
2404 			}
2405 		}
2406 		sblock.fs_maxcontig /= sblock.fs_bsize;
2407 	}
2408 
2409 	return (actual_size);
2410 }
2411 
2412 /*
2413  * Figure out how big the partition we're dealing with is.
2414  */
2415 static diskaddr_t
2416 get_device_size(int fd, caddr_t name)
2417 {
2418 	struct vtoc vtoc;
2419 	struct dk_gpt *efi_vtoc;
2420 	diskaddr_t slicesize = 0;
2421 
2422 	int index = read_vtoc(fd, &vtoc);
2423 
2424 	if (index >= 0) {
2425 		label_type = LABEL_TYPE_VTOC;
2426 	} else {
2427 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2428 			/* it might be an EFI label */
2429 			index = efi_alloc_and_read(fd, &efi_vtoc);
2430 			if (index >= 0)
2431 				label_type = LABEL_TYPE_EFI;
2432 		}
2433 	}
2434 
2435 	if (index < 0) {
2436 		/*
2437 		 * Since both attempts to read the label failed, we're
2438 		 * going to fall back to a brute force approach to
2439 		 * determining the device's size:  see how far out we can
2440 		 * perform reads on the device.
2441 		 */
2442 
2443 		slicesize = brute_force_get_device_size(fd);
2444 		if (slicesize == 0) {
2445 			switch (index) {
2446 			case VT_ERROR:
2447 				pwarn("%s: %s\n", name, strerror(errno));
2448 				break;
2449 			case VT_EIO:
2450 				pwarn("%s: I/O error accessing VTOC", name);
2451 				break;
2452 			case VT_EINVAL:
2453 				pwarn("%s: Invalid field in VTOC", name);
2454 				break;
2455 			default:
2456 				pwarn("%s: unknown error %d accessing VTOC",
2457 				    name, index);
2458 				break;
2459 			}
2460 			return (0);
2461 		} else {
2462 			label_type = LABEL_TYPE_OTHER;
2463 		}
2464 	}
2465 
2466 	if (label_type == LABEL_TYPE_EFI) {
2467 		slicesize = efi_vtoc->efi_parts[index].p_size;
2468 		efi_free(efi_vtoc);
2469 	} else if (label_type == LABEL_TYPE_VTOC) {
2470 		/*
2471 		 * In the vtoc struct, p_size is a 32-bit signed quantity.
2472 		 * In the dk_gpt struct (efi's version of the vtoc), p_size
2473 		 * is an unsigned 64-bit quantity.  By casting the vtoc's
2474 		 * psize to an unsigned 32-bit quantity, it will be copied
2475 		 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without
2476 		 * sign extension.
2477 		 */
2478 
2479 		slicesize = (uint32_t)vtoc.v_part[index].p_size;
2480 	}
2481 
2482 	return (slicesize);
2483 }
2484 
2485 /*
2486  * brute_force_get_device_size
2487  *
2488  * Determine the size of the device by seeing how far we can
2489  * read.  Doing an llseek( , , SEEK_END) would probably work
2490  * in most cases, but we've seen at least one third-party driver
2491  * which doesn't correctly support the SEEK_END option when the
2492  * the device is greater than a terabyte.
2493  */
2494 
2495 static diskaddr_t
2496 brute_force_get_device_size(int fd)
2497 {
2498 	diskaddr_t	min_fail = 0;
2499 	diskaddr_t	max_succeed = 0;
2500 	diskaddr_t	cur_db_off;
2501 	char 		buf[DEV_BSIZE];
2502 
2503 	/*
2504 	 * First, see if we can read the device at all, just to
2505 	 * eliminate errors that have nothing to do with the
2506 	 * device's size.
2507 	 */
2508 
2509 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2510 	    ((read(fd, buf, DEV_BSIZE)) == -1))
2511 		return (0);  /* can't determine size */
2512 
2513 	/*
2514 	 * Now, go sequentially through the multiples of 4TB
2515 	 * to find the first read that fails (this isn't strictly
2516 	 * the most efficient way to find the actual size if the
2517 	 * size really could be anything between 0 and 2**64 bytes.
2518 	 * We expect the sizes to be less than 16 TB for some time,
2519 	 * so why do a bunch of reads that are larger than that?
2520 	 * However, this algorithm *will* work for sizes of greater
2521 	 * than 16 TB.  We're just not optimizing for those sizes.)
2522 	 */
2523 
2524 	/*
2525 	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2526 	 * We're using > 32-bit constants here.  Therefore, its flow
2527 	 * analysis is wrong.  For the time being, ignore complaints
2528 	 * from it about the body of the for() being unreached.
2529 	 */
2530 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2531 	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2532 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2533 		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2534 		    SEEK_SET) == -1) ||
2535 		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2536 			min_fail = cur_db_off;
2537 		else
2538 			max_succeed = cur_db_off;
2539 	}
2540 
2541 	/*
2542 	 * XXX Same lint flow analysis problem as above.
2543 	 */
2544 	if (min_fail == 0)
2545 		return (0);
2546 
2547 	/*
2548 	 * We now know that the size of the device is less than
2549 	 * min_fail and greater than or equal to max_succeed.  Now
2550 	 * keep splitting the difference until the actual size in
2551 	 * sectors in known.  We also know that the difference
2552 	 * between max_succeed and min_fail at this time is
2553 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2554 	 * simplifies the math below.
2555 	 */
2556 
2557 	while (min_fail - max_succeed > 1) {
2558 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2559 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2560 		    SEEK_SET)) == -1) ||
2561 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2562 			min_fail = cur_db_off;
2563 		else
2564 			max_succeed = cur_db_off;
2565 	}
2566 
2567 	/* the size is the last successfully read sector offset plus one */
2568 	return (max_succeed + 1);
2569 }
2570 
2571 static void
2572 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2573 {
2574 	struct dinode *dp;
2575 	char pathbuf[MAXPATHLEN + 1];
2576 
2577 	vpwarn(fmt, ap);
2578 	(void) putchar(' ');
2579 	pinode(ino);
2580 	(void) printf("\n");
2581 	getpathname(pathbuf, cwd, ino);
2582 	if (ino < UFSROOTINO || ino > maxino) {
2583 		pfatal("NAME=%s\n", pathbuf);
2584 		return;
2585 	}
2586 	dp = ginode(ino);
2587 	if (ftypeok(dp))
2588 		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2589 	else
2590 		pfatal("NAME=%s\n", pathbuf);
2591 }
2592 
2593 void
2594 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2595 {
2596 	va_list ap;
2597 
2598 	va_start(ap, fmt);
2599 	vfileerror(ino, ino, fmt, ap);
2600 	va_end(ap);
2601 }
2602 
2603 static void
2604 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2605 {
2606 	vfileerror(ino, ino, fmt, ap);
2607 }
2608 
2609 void
2610 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2611 {
2612 	va_list ap;
2613 
2614 	va_start(ap, fmt);
2615 	vfileerror(cwd, ino, fmt, ap);
2616 	va_end(ap);
2617 }
2618 
2619 /*
2620  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2621  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2622  * entry.
2623  *
2624  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2625  * meaning it's effectively an orphan.  It needs to be noted now, so
2626  * it will be remembered in pass 4.
2627  */
2628 
2629 void
2630 add_orphan_dir(fsck_ino_t ino)
2631 {
2632 	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2633 		errexit("add_orphan_dir: out of memory");
2634 }
2635 
2636 /*
2637  * Remove an inode from the orphaned-directories list, presumably
2638  * because it's been cleared.
2639  */
2640 void
2641 remove_orphan_dir(fsck_ino_t ino)
2642 {
2643 	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2644 }
2645 
2646 /*
2647  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2648  * and lufs.c:checksum().
2649  */
2650 static void
2651 log_setsum(int32_t *sp, int32_t *lp, int nb)
2652 {
2653 	int32_t csum = 0;
2654 
2655 	*sp = 0;
2656 	nb /= sizeof (int32_t);
2657 	while (nb--)
2658 		csum += *lp++;
2659 	*sp = csum;
2660 }
2661 
2662 static int
2663 log_checksum(int32_t *sp, int32_t *lp, int nb)
2664 {
2665 	int32_t ssum = *sp;
2666 
2667 	log_setsum(sp, lp, nb);
2668 	if (ssum != *sp) {
2669 		*sp = ssum;
2670 		return (0);
2671 	}
2672 	return (1);
2673 }
2674