xref: /titanic_51/usr/src/cmd/fs.d/ufs/fsck/utilities.c (revision 19d61fc7991644175873937566d932d8cf52912a)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms are permitted
14  * provided that: (1) source distributions retain this entire copyright
15  * notice and comment, and (2) distributions including binaries display
16  * the following acknowledgement:  ``This product includes software
17  * developed by the University of California, Berkeley and its contributors''
18  * in the documentation or other materials provided with the distribution
19  * and in all advertising materials mentioning features or use of this
20  * software. Neither the name of the University nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <stdarg.h>
34 #include <libadm.h>
35 #include <note.h>
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/mntent.h>
39 #include <sys/filio.h>
40 #include <sys/fs/ufs_fs.h>
41 #include <sys/vnode.h>
42 #include <sys/fs/ufs_acl.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_log.h>
45 #define	_KERNEL
46 #include <sys/fs/ufs_fsdir.h>
47 #undef _KERNEL
48 #include <sys/mnttab.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include <signal.h>
53 #include <string.h>
54 #include <ctype.h>
55 #include <sys/vfstab.h>
56 #include <sys/lockfs.h>
57 #include <errno.h>
58 #include <sys/cmn_err.h>
59 #include <sys/dkio.h>
60 #include <sys/vtoc.h>
61 #include <sys/efi_partition.h>
62 #include <fslib.h>
63 #include <inttypes.h>
64 #include "fsck.h"
65 
66 caddr_t mount_point = NULL;
67 
68 static int64_t diskreads, totalreads;	/* Disk cache statistics */
69 
70 static int log_checksum(int32_t *, int32_t *, int);
71 static void vdirerror(fsck_ino_t, caddr_t, va_list);
72 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
73 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
74 static void vpwarn(caddr_t, va_list);
75 static int getline(FILE *, caddr_t, int);
76 static struct bufarea *alloc_bufarea(void);
77 static void rwerror(caddr_t, diskaddr_t, int rval);
78 static void debugclean(void);
79 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
80 static void freelogblk(daddr32_t);
81 static void verrexit(caddr_t, va_list);
82 static void vpfatal(caddr_t, va_list);
83 static diskaddr_t get_device_size(int, caddr_t);
84 static diskaddr_t brute_force_get_device_size(int);
85 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
86 	    daddr32_t *, daddr32_t *, daddr32_t *);
87 
88 int
89 ftypeok(struct dinode *dp)
90 {
91 	switch (dp->di_mode & IFMT) {
92 
93 	case IFDIR:
94 	case IFREG:
95 	case IFBLK:
96 	case IFCHR:
97 	case IFLNK:
98 	case IFSOCK:
99 	case IFIFO:
100 	case IFSHAD:
101 	case IFATTRDIR:
102 		return (1);
103 
104 	default:
105 		if (debug)
106 			(void) printf("bad file type 0%o\n", dp->di_mode);
107 		return (0);
108 	}
109 }
110 
111 int
112 acltypeok(struct dinode *dp)
113 {
114 	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
115 		return (1);
116 
117 	if (debug)
118 		(void) printf("bad file type for acl I=%d: 0%o\n",
119 		    dp->di_shadow, dp->di_mode);
120 	return (0);
121 }
122 
123 NOTE(PRINTFLIKE(1))
124 int
125 reply(caddr_t fmt, ...)
126 {
127 	va_list ap;
128 	char line[80];
129 
130 	if (preen)
131 		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
132 
133 	if (mflag) {
134 		/*
135 		 * We don't know what's going on, so don't potentially
136 		 * make things worse by having errexit() write stuff
137 		 * out to disk.
138 		 */
139 		(void) printf(
140 		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
141 		    devname);
142 		exit(EXERRFATAL);
143 	}
144 
145 	va_start(ap, fmt);
146 	(void) putchar('\n');
147 	(void) vprintf(fmt, ap);
148 	(void) putchar('?');
149 	(void) putchar(' ');
150 	va_end(ap);
151 
152 	if (nflag || fswritefd < 0) {
153 		(void) printf(" no\n\n");
154 		return (0);
155 	}
156 	if (yflag) {
157 		(void) printf(" yes\n\n");
158 		return (1);
159 	}
160 	(void) fflush(stdout);
161 	if (getline(stdin, line, sizeof (line)) == EOF)
162 		errexit("\n");
163 	(void) printf("\n");
164 	if (line[0] == 'y' || line[0] == 'Y') {
165 		return (1);
166 	} else {
167 		return (0);
168 	}
169 }
170 
171 int
172 getline(FILE *fp, caddr_t loc, int maxlen)
173 {
174 	int n;
175 	caddr_t p, lastloc;
176 
177 	p = loc;
178 	lastloc = &p[maxlen-1];
179 	while ((n = getc(fp)) != '\n') {
180 		if (n == EOF)
181 			return (EOF);
182 		if (!isspace(n) && p < lastloc)
183 			*p++ = (char)n;
184 	}
185 	*p = '\0';
186 	/* LINTED pointer difference won't overflow */
187 	return (p - loc);
188 }
189 
190 /*
191  * Malloc buffers and set up cache.
192  */
193 void
194 bufinit(void)
195 {
196 	struct bufarea *bp;
197 	int bufcnt, i;
198 	caddr_t bufp;
199 
200 	bufp = malloc((size_t)sblock.fs_bsize);
201 	if (bufp == NULL)
202 		goto nomem;
203 	initbarea(&cgblk);
204 	cgblk.b_un.b_buf = bufp;
205 	bufhead.b_next = bufhead.b_prev = &bufhead;
206 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
207 	if (bufcnt < MINBUFS)
208 		bufcnt = MINBUFS;
209 	for (i = 0; i < bufcnt; i++) {
210 		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
211 		if (bp == NULL) {
212 			if (i >= MINBUFS)
213 				goto noalloc;
214 			goto nomem;
215 		}
216 
217 		bufp = malloc((size_t)sblock.fs_bsize);
218 		if (bufp == NULL) {
219 			free((void *)bp);
220 			if (i >= MINBUFS)
221 				goto noalloc;
222 			goto nomem;
223 		}
224 		initbarea(bp);
225 		bp->b_un.b_buf = bufp;
226 		bp->b_prev = &bufhead;
227 		bp->b_next = bufhead.b_next;
228 		bufhead.b_next->b_prev = bp;
229 		bufhead.b_next = bp;
230 	}
231 noalloc:
232 	bufhead.b_size = i;	/* save number of buffers */
233 	pbp = pdirbp = NULL;
234 	return;
235 
236 nomem:
237 	errexit("cannot allocate buffer pool\n");
238 	/* NOTREACHED */
239 }
240 
241 /*
242  * Undo a bufinit().
243  */
244 void
245 unbufinit(void)
246 {
247 	int cnt;
248 	struct bufarea *bp, *nbp;
249 
250 	cnt = 0;
251 	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
252 		cnt++;
253 		flush(fswritefd, bp);
254 		nbp = bp->b_prev;
255 		/*
256 		 * We're discarding the entire chain, so this isn't
257 		 * technically necessary.  However, it doesn't hurt
258 		 * and lint's data flow analysis is much happier
259 		 * (this prevents it from thinking there's a chance
260 		 * of our using memory elsewhere after it's been released).
261 		 */
262 		nbp->b_next = bp->b_next;
263 		bp->b_next->b_prev = nbp;
264 		free((void *)bp->b_un.b_buf);
265 		free((void *)bp);
266 	}
267 
268 	if (bufhead.b_size != cnt)
269 		errexit("Panic: cache lost %d buffers\n",
270 			bufhead.b_size - cnt);
271 }
272 
273 /*
274  * Manage a cache of directory blocks.
275  */
276 struct bufarea *
277 getdatablk(daddr32_t blkno, size_t size)
278 {
279 	struct bufarea *bp;
280 
281 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
282 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
283 			goto foundit;
284 		}
285 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
286 		if ((bp->b_flags & B_INUSE) == 0)
287 			break;
288 	if (bp == &bufhead) {
289 		bp = alloc_bufarea();
290 		if (bp == NULL) {
291 			errexit("deadlocked buffer pool\n");
292 			/* NOTREACHED */
293 		}
294 	}
295 	/*
296 	 * We're at the same logical level as getblk(), so if there
297 	 * are any errors, we'll let our caller handle them.
298 	 */
299 	diskreads++;
300 	(void) getblk(bp, blkno, size);
301 
302 foundit:
303 	totalreads++;
304 	bp->b_cnt++;
305 	/*
306 	 * Move the buffer to head of linked list if it isn't
307 	 * already there.
308 	 */
309 	if (bufhead.b_next != bp) {
310 		bp->b_prev->b_next = bp->b_next;
311 		bp->b_next->b_prev = bp->b_prev;
312 		bp->b_prev = &bufhead;
313 		bp->b_next = bufhead.b_next;
314 		bufhead.b_next->b_prev = bp;
315 		bufhead.b_next = bp;
316 	}
317 	bp->b_flags |= B_INUSE;
318 	return (bp);
319 }
320 
321 void
322 brelse(struct bufarea *bp)
323 {
324 	bp->b_cnt--;
325 	if (bp->b_cnt == 0) {
326 		bp->b_flags &= ~B_INUSE;
327 	}
328 }
329 
330 struct bufarea *
331 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
332 {
333 	diskaddr_t dblk;
334 
335 	dblk = fsbtodb(&sblock, blk);
336 	if (bp->b_bno == dblk)
337 		return (bp);
338 	flush(fswritefd, bp);
339 	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
340 	bp->b_bno = dblk;
341 	bp->b_size = size;
342 	return (bp);
343 }
344 
345 void
346 flush(int fd, struct bufarea *bp)
347 {
348 	int i, j;
349 	caddr_t sip;
350 	long size;
351 
352 	if (!bp->b_dirty)
353 		return;
354 
355 	/*
356 	 * It's not our buf, so if there are errors, let whoever
357 	 * acquired it deal with the actual problem.
358 	 */
359 	if (bp->b_errs != 0)
360 		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
361 	bp->b_dirty = 0;
362 	bp->b_errs = 0;
363 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
364 	if (bp != &sblk) {
365 		return;
366 	}
367 
368 	/*
369 	 * We're flushing the superblock, so make sure all the
370 	 * ancillary bits go out as well.
371 	 */
372 	sip = (caddr_t)sblock.fs_u.fs_csp;
373 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
374 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
375 		    sblock.fs_cssize - i : sblock.fs_bsize;
376 		bwrite(fswritefd, sip,
377 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
378 		    size);
379 		sip += size;
380 	}
381 }
382 
383 static void
384 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
385 {
386 	int olderr = errno;
387 
388 	if (!preen)
389 		(void) printf("\n");
390 
391 	if (rval == -1)
392 		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
393 		    mesg, blk, strerror(olderr));
394 	else
395 		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
396 
397 	if (reply("CONTINUE") == 0) {
398 		exitstat = EXERRFATAL;
399 		errexit("Program terminated\n");
400 	}
401 }
402 
403 void
404 ckfini(void)
405 {
406 	int64_t percentage;
407 
408 	if (fswritefd < 0)
409 		return;
410 
411 	flush(fswritefd, &sblk);
412 	/*
413 	 * Were we using a backup superblock?
414 	 */
415 	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
416 		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
417 			sblk.b_bno = SBOFF / dev_bsize;
418 			sbdirty();
419 			flush(fswritefd, &sblk);
420 		}
421 	}
422 	flush(fswritefd, &cgblk);
423 	if (cgblk.b_un.b_buf != NULL) {
424 		free((void *)cgblk.b_un.b_buf);
425 		cgblk.b_un.b_buf = NULL;
426 	}
427 	unbufinit();
428 	pbp = NULL;
429 	pdirbp = NULL;
430 	if (debug) {
431 		/*
432 		 * Note that we only count cache-related reads.
433 		 * Anything that called fsck_bread() or getblk()
434 		 * directly are explicitly not cached, so they're not
435 		 * included here.
436 		 */
437 		if (totalreads != 0)
438 			percentage = diskreads * 100 / totalreads;
439 		else
440 			percentage = 0;
441 
442 		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
443 		    (longlong_t)diskreads, (longlong_t)totalreads,
444 		    (longlong_t)percentage);
445 	}
446 
447 	(void) close(fsreadfd);
448 	(void) close(fswritefd);
449 	fsreadfd = -1;
450 	fswritefd = -1;
451 }
452 
453 int
454 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
455 {
456 	caddr_t cp;
457 	int i;
458 	int errs;
459 	offset_t offset = ldbtob(blk);
460 	offset_t addr;
461 
462 	/*
463 	 * In our universe, nothing exists before the superblock, so
464 	 * just pretend it's always zeros.  This is the complement of
465 	 * bwrite()'s ignoring write requests into that space.
466 	 */
467 	if (blk < SBLOCK) {
468 		if (debug)
469 			(void) printf(
470 			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
471 			    SBLOCK, (longlong_t)blk);
472 		(void) memset(buf, 0, (size_t)size);
473 		return (1);
474 	}
475 
476 	if (llseek(fd, offset, SEEK_SET) < 0) {
477 		rwerror("SEEK", blk, -1);
478 	}
479 
480 	if ((i = read(fd, buf, size)) == size) {
481 		return (0);
482 	}
483 	rwerror("READ", blk, i);
484 	if (llseek(fd, offset, SEEK_SET) < 0) {
485 		rwerror("SEEK", blk, -1);
486 	}
487 	errs = 0;
488 	(void) memset(buf, 0, (size_t)size);
489 	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
490 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
491 		addr = ldbtob(blk + i);
492 		if (llseek(fd, addr, SEEK_SET) < 0 ||
493 		    read(fd, cp, (int)secsize) < 0) {
494 			iscorrupt = 1;
495 			(void) printf(" %llu", blk + (u_longlong_t)i);
496 			errs++;
497 		}
498 	}
499 	(void) printf("\n");
500 	return (errs);
501 }
502 
503 void
504 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
505 {
506 	int i;
507 	int n;
508 	caddr_t cp;
509 	offset_t offset = ldbtob(blk);
510 	offset_t addr;
511 
512 	if (fd < 0)
513 		return;
514 	if (blk < SBLOCK) {
515 		if (debug)
516 			(void) printf(
517 		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
518 			    (longlong_t)blk, devname);
519 		return;
520 	}
521 	if (llseek(fd, offset, SEEK_SET) < 0) {
522 		rwerror("SEEK", blk, -1);
523 	}
524 	if ((i = write(fd, buf, (int)size)) == size) {
525 		fsmodified = 1;
526 		return;
527 	}
528 	rwerror("WRITE", blk, i);
529 	if (llseek(fd, offset, SEEK_SET) < 0) {
530 		rwerror("SEEK", blk, -1);
531 	}
532 	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
533 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
534 		n = 0;
535 		addr = ldbtob(blk + i);
536 		if (llseek(fd, addr, SEEK_SET) < 0 ||
537 		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
538 			iscorrupt = 1;
539 			(void) printf(" %llu", blk + (u_longlong_t)i);
540 		} else if (n > 0) {
541 			fsmodified = 1;
542 		}
543 
544 	}
545 	(void) printf("\n");
546 }
547 
548 /*
549  * Allocates the specified number of contiguous fragments.
550  */
551 daddr32_t
552 allocblk(int wantedfrags)
553 {
554 	int block, leadfrag, tailfrag;
555 	daddr32_t selected;
556 	size_t size;
557 	struct bufarea *bp;
558 
559 	/*
560 	 * It's arguable whether we should just fail, or instead
561 	 * error out here.  Since we should only ever be asked for
562 	 * a single fragment or an entire block (i.e., sblock.fs_frag),
563 	 * we'll fail out because anything else means somebody
564 	 * changed code without considering all of the ramifications.
565 	 */
566 	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
567 		exitstat = EXERRFATAL;
568 		errexit("allocblk() asked for %d frags.  "
569 			"Legal range is 1 to %d",
570 			wantedfrags, sblock.fs_frag);
571 	}
572 
573 	/*
574 	 * For each filesystem block, look at every possible starting
575 	 * offset within the block such that we can get the number of
576 	 * contiguous fragments that we need.  This is a drastically
577 	 * simplified version of the kernel's mapsearch() and alloc*().
578 	 * It's also correspondingly slower.
579 	 */
580 	for (block = 0; block < maxfsblock - sblock.fs_frag;
581 	    block += sblock.fs_frag) {
582 		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
583 		    leadfrag++) {
584 			/*
585 			 * Is first fragment of candidate run available?
586 			 */
587 			if (testbmap(block + leadfrag))
588 				continue;
589 			/*
590 			 * Are the rest of them available?
591 			 */
592 			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
593 				if (testbmap(block + leadfrag + tailfrag))
594 					break;
595 			if (tailfrag < wantedfrags) {
596 				/*
597 				 * No, skip the known-unusable run.
598 				 */
599 				leadfrag += tailfrag;
600 				continue;
601 			}
602 			/*
603 			 * Found what we need, so claim them.
604 			 */
605 			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
606 				setbmap(block + leadfrag + tailfrag);
607 			n_blks += wantedfrags;
608 			size = wantedfrags * sblock.fs_fsize;
609 			selected = block + leadfrag;
610 			bp = getdatablk(selected, size);
611 			(void) memset((void *)bp->b_un.b_buf, 0, size);
612 			dirty(bp);
613 			brelse(bp);
614 			if (debug)
615 				(void) printf(
616 		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
617 				    selected, selected % sblock.fs_bsize,
618 				    wantedfrags, (int)size);
619 			return (selected);
620 		}
621 	}
622 	return (0);
623 }
624 
625 /*
626  * Free a previously allocated block
627  */
628 void
629 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
630 {
631 	struct inodesc idesc;
632 
633 	if (debug)
634 		(void) printf("debug: freeing %d fragments starting at %d\n",
635 		    frags, blkno);
636 
637 	init_inodesc(&idesc);
638 
639 	idesc.id_number = ino;
640 	idesc.id_blkno = blkno;
641 	idesc.id_numfrags = frags;
642 	idesc.id_truncto = -1;
643 
644 	/*
645 	 * Nothing in the return status has any relevance to how
646 	 * we're using pass4check(), so just ignore it.
647 	 */
648 	(void) pass4check(&idesc);
649 }
650 
651 /*
652  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
653  * that the given buffer is at least MAXPATHLEN + 1 characters.
654  */
655 void
656 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
657 {
658 	int len;
659 	caddr_t cp;
660 	struct dinode *dp;
661 	struct inodesc idesc;
662 	struct inoinfo *inp;
663 
664 	if (debug)
665 		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
666 		    curdir, ino);
667 
668 	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
669 		(void) strcpy(namebuf, "?");
670 		return;
671 	}
672 
673 	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
674 		(void) strcpy(namebuf, "/");
675 		return;
676 	}
677 
678 	init_inodesc(&idesc);
679 	idesc.id_type = DATA;
680 	cp = &namebuf[MAXPATHLEN - 1];
681 	*cp = '\0';
682 
683 	/*
684 	 * In the case of extended attributes, our
685 	 * parent won't necessarily be a directory, so just
686 	 * return what we've found with a prefix indicating
687 	 * that it's an XATTR.  Presumably our caller will
688 	 * know what's going on and do something useful, like
689 	 * work out the path of the parent and then combine
690 	 * the two names.
691 	 *
692 	 * Can't use strcpy(), etc, because we've probably
693 	 * already got some name information in the buffer and
694 	 * the usual trailing \0 would lose it.
695 	 */
696 	dp = ginode(curdir);
697 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
698 		idesc.id_number = curdir;
699 		idesc.id_parent = ino;
700 		idesc.id_func = findname;
701 		idesc.id_name = namebuf;
702 		idesc.id_fix = NOFIX;
703 		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
704 			*cp-- = '?';
705 		}
706 
707 		len = sizeof (XATTR_DIR_NAME) - 1;
708 		cp -= len;
709 		(void) memmove(cp, XATTR_DIR_NAME, len);
710 		goto attrname;
711 	}
712 
713 	/*
714 	 * If curdir == ino, need to get a handle on .. so we
715 	 * can search it for ino's name.  Otherwise, just search
716 	 * the given directory for ino.  Repeat until out of space
717 	 * or a full path has been built.
718 	 */
719 	if (curdir != ino) {
720 		idesc.id_parent = curdir;
721 		goto namelookup;
722 	}
723 	while (ino != UFSROOTINO && ino != 0) {
724 		idesc.id_number = ino;
725 		idesc.id_func = findino;
726 		idesc.id_name = "..";
727 		idesc.id_fix = NOFIX;
728 		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
729 			inp = getinoinfo(ino);
730 			if ((inp == NULL) || (inp->i_parent == 0)) {
731 				break;
732 			}
733 			idesc.id_parent = inp->i_parent;
734 		}
735 
736 		/*
737 		 * To get this far, id_parent must have the inode
738 		 * number for `..' in it.  By definition, that's got
739 		 * to be a directory, so search it for the inode of
740 		 * interest.
741 		 */
742 namelookup:
743 		idesc.id_number = idesc.id_parent;
744 		idesc.id_parent = ino;
745 		idesc.id_func = findname;
746 		idesc.id_name = namebuf;
747 		idesc.id_fix = NOFIX;
748 		if ((ckinode(ginode(idesc.id_number),
749 		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
750 			break;
751 		}
752 		/*
753 		 * Prepend to what we've accumulated so far.  If
754 		 * there's not enough room for even one more path element
755 		 * (of the worst-case length), then bail out.
756 		 */
757 		len = strlen(namebuf);
758 		cp -= len;
759 		if (cp < &namebuf[MAXNAMLEN])
760 			break;
761 		(void) memmove(cp, namebuf, len);
762 		*--cp = '/';
763 
764 		/*
765 		 * Corner case for a looped-to-itself directory.
766 		 */
767 		if (ino == idesc.id_number)
768 			break;
769 
770 		/*
771 		 * Climb one level of the hierarchy.  In other words,
772 		 * the current .. becomes the inode to search for and
773 		 * its parent becomes the directory to search in.
774 		 */
775 		ino = idesc.id_number;
776 	}
777 
778 	/*
779 	 * If we hit a discontinuity in the hierarchy, indicate it by
780 	 * prefixing the path so far with `?'.  Otherwise, the first
781 	 * character will be `/' as a side-effect of the *--cp above.
782 	 *
783 	 * The special case is to handle the situation where we're
784 	 * trying to look something up in UFSROOTINO, but didn't find
785 	 * it.
786 	 */
787 	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
788 		if (cp > namebuf)
789 			cp--;
790 		*cp = '?';
791 	}
792 
793 	/*
794 	 * The invariants being used for buffer integrity are:
795 	 * - namebuf[] is terminated with \0 before anything else
796 	 * - cp is always <= the last element of namebuf[]
797 	 * - the new path element is always stored at the
798 	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
799 	 *   characters
800 	 * - cp is is decremented by the number of characters in
801 	 *   the new path element
802 	 * - if, after the above accounting for the new element's
803 	 *   size, there is no longer enough room at the beginning of
804 	 *   namebuf[] for a full-sized path element and a slash,
805 	 *   terminate the loop.  cp is in the range
806 	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
807 	 */
808 attrname:
809 	/* LINTED per the above discussion */
810 	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
811 }
812 
813 /* ARGSUSED */
814 void
815 catch(int dummy)
816 {
817 	ckfini();
818 	exit(EXSIGNAL);
819 }
820 
821 /*
822  * When preening, allow a single quit to signal
823  * a special exit after filesystem checks complete
824  * so that reboot sequence may be interrupted.
825  */
826 /* ARGSUSED */
827 void
828 catchquit(int dummy)
829 {
830 	(void) printf("returning to single-user after filesystem check\n");
831 	interrupted = 1;
832 	(void) signal(SIGQUIT, SIG_DFL);
833 }
834 
835 
836 /*
837  * determine whether an inode should be fixed.
838  */
839 NOTE(PRINTFLIKE(2))
840 int
841 dofix(struct inodesc *idesc, caddr_t msg, ...)
842 {
843 	int rval = 0;
844 	va_list ap;
845 
846 	va_start(ap, msg);
847 
848 	switch (idesc->id_fix) {
849 
850 	case DONTKNOW:
851 		if (idesc->id_type == DATA)
852 			vdirerror(idesc->id_number, msg, ap);
853 		else
854 			vpwarn(msg, ap);
855 		if (preen) {
856 			idesc->id_fix = FIX;
857 			rval = ALTERED;
858 			break;
859 		}
860 		if (reply("SALVAGE") == 0) {
861 			idesc->id_fix = NOFIX;
862 			break;
863 		}
864 		idesc->id_fix = FIX;
865 		rval = ALTERED;
866 		break;
867 
868 	case FIX:
869 		rval = ALTERED;
870 		break;
871 
872 	case NOFIX:
873 		break;
874 
875 	default:
876 		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
877 	}
878 
879 	va_end(ap);
880 	return (rval);
881 }
882 
883 NOTE(PRINTFLIKE(1))
884 void
885 errexit(caddr_t fmt, ...)
886 {
887 	va_list ap;
888 
889 	va_start(ap, fmt);
890 	verrexit(fmt, ap);
891 	/* NOTREACHED */
892 }
893 
894 NOTE(PRINTFLIKE(1))
895 static void
896 verrexit(caddr_t fmt, va_list ap)
897 {
898 	static int recursing = 0;
899 
900 	if (!recursing) {
901 		recursing = 1;
902 		if (errorlocked || iscorrupt) {
903 			if (havesb) {
904 				sblock.fs_clean = FSBAD;
905 				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
906 				sblock.fs_state = -sblock.fs_state;
907 				sbdirty();
908 				write_altsb(fswritefd);
909 				flush(fswritefd, &sblk);
910 			}
911 		}
912 		ckfini();
913 		recursing = 0;
914 	}
915 	(void) vprintf(fmt, ap);
916 	if (fmt[strlen(fmt) - 1] != '\n')
917 		(void) putchar('\n');
918 	exit((exitstat != 0) ? exitstat : EXERRFATAL);
919 }
920 
921 /*
922  * An unexpected inconsistency occured.
923  * Die if preening, otherwise just print message and continue.
924  */
925 NOTE(PRINTFLIKE(1))
926 void
927 pfatal(caddr_t fmt, ...)
928 {
929 	va_list ap;
930 
931 	va_start(ap, fmt);
932 	vpfatal(fmt, ap);
933 	va_end(ap);
934 }
935 
936 NOTE(PRINTFLIKE(1))
937 static void
938 vpfatal(caddr_t fmt, va_list ap)
939 {
940 	if (preen) {
941 		if (*fmt != '\0') {
942 			(void) printf("%s: ", devname);
943 			(void) vprintf(fmt, ap);
944 			(void) printf("\n");
945 		}
946 		(void) printf(
947 		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
948 		    devname);
949 		if (havesb) {
950 			sblock.fs_clean = FSBAD;
951 			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
952 			sbdirty();
953 			flush(fswritefd, &sblk);
954 		}
955 		/*
956 		 * We're exiting, it doesn't really matter that our
957 		 * caller doesn't get to call va_end().
958 		 */
959 		if (exitstat == 0)
960 			exitstat = EXFNDERRS;
961 		exit(exitstat);
962 	}
963 	if (*fmt != '\0') {
964 		(void) vprintf(fmt, ap);
965 	}
966 }
967 
968 /*
969  * Pwarn just prints a message when not preening,
970  * or a warning (preceded by filename) when preening.
971  */
972 NOTE(PRINTFLIKE(1))
973 void
974 pwarn(caddr_t fmt, ...)
975 {
976 	va_list ap;
977 
978 	va_start(ap, fmt);
979 	vpwarn(fmt, ap);
980 	va_end(ap);
981 }
982 
983 NOTE(PRINTFLIKE(1))
984 static void
985 vpwarn(caddr_t fmt, va_list ap)
986 {
987 	if (*fmt != '\0') {
988 		if (preen)
989 			(void) printf("%s: ", devname);
990 		(void) vprintf(fmt, ap);
991 	}
992 }
993 
994 /*
995  * Like sprintf(), except the buffer is dynamically allocated
996  * and returned, instead of being passed in.  A pointer to the
997  * buffer is stored in *RET, and FMT is the usual format string.
998  * The number of characters in *RET (excluding the trailing \0,
999  * to be consistent with the other *printf() routines) is returned.
1000  *
1001  * Solaris doesn't have asprintf(3C) yet, unfortunately.
1002  */
1003 NOTE(PRINTFLIKE(2))
1004 int
1005 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1006 {
1007 	int len;
1008 	caddr_t buffer;
1009 	va_list ap;
1010 
1011 	va_start(ap, fmt);
1012 	len = vsnprintf(NULL, 0, fmt, ap);
1013 	va_end(ap);
1014 
1015 	buffer = malloc((len + 1) * sizeof (char));
1016 	if (buffer == NULL) {
1017 		errexit("Out of memory in asprintf\n");
1018 		/* NOTREACHED */
1019 	}
1020 
1021 	va_start(ap, fmt);
1022 	(void) vsnprintf(buffer, len + 1, fmt, ap);
1023 	va_end(ap);
1024 
1025 	*ret = buffer;
1026 	return (len);
1027 }
1028 
1029 /*
1030  * So we can take advantage of kernel routines in ufs_subr.c.
1031  */
1032 /* PRINTFLIKE2 */
1033 void
1034 cmn_err(int level, caddr_t fmt, ...)
1035 {
1036 	va_list ap;
1037 
1038 	va_start(ap, fmt);
1039 	if (level == CE_PANIC) {
1040 		(void) printf("INTERNAL INCONSISTENCY:");
1041 		verrexit(fmt, ap);
1042 	} else {
1043 		(void) vprintf(fmt, ap);
1044 	}
1045 	va_end(ap);
1046 }
1047 
1048 /*
1049  * Check to see if unraw version of name is already mounted.
1050  * Updates devstr with the device name if devstr is not NULL
1051  * and str_size is positive.
1052  */
1053 int
1054 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1055 {
1056 	int found;
1057 	struct mnttab *mntent;
1058 
1059 	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1060 	if (mntent == NULL)
1061 		return (M_NOMNT);
1062 
1063 	/*
1064 	 * It's mounted.  With or without write access?
1065 	 */
1066 	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1067 		found = M_RO;	/* mounted as RO */
1068 	else
1069 		found = M_RW; 	/* mounted as R/W */
1070 
1071 	if (mount_point == NULL) {
1072 		mount_point = strdup(mntent->mnt_mountp);
1073 		if (mount_point == NULL) {
1074 			errexit("fsck: memory allocation failure: %s",
1075 				strerror(errno));
1076 			/* NOTREACHED */
1077 		}
1078 
1079 		if (devstr != NULL && str_size > 0)
1080 			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1081 	}
1082 
1083 	return (found);
1084 }
1085 
1086 /*
1087  * Check to see if name corresponds to an entry in vfstab, and that the entry
1088  * does not have option ro.
1089  */
1090 int
1091 writable(caddr_t name)
1092 {
1093 	int rw = 1;
1094 	struct vfstab vfsbuf, vfskey;
1095 	FILE *vfstab;
1096 
1097 	vfstab = fopen(VFSTAB, "r");
1098 	if (vfstab == NULL) {
1099 		(void) printf("can't open %s\n", VFSTAB);
1100 		return (1);
1101 	}
1102 	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1103 	vfsnull(&vfskey);
1104 	vfskey.vfs_special = unrawname(name);
1105 	vfskey.vfs_fstype = MNTTYPE_UFS;
1106 	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1107 	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1108 		rw = 0;
1109 	}
1110 	(void) fclose(vfstab);
1111 	return (rw);
1112 }
1113 
1114 /*
1115  * debugclean
1116  */
1117 static void
1118 debugclean(void)
1119 {
1120 	if (!debug)
1121 		return;
1122 
1123 	if ((iscorrupt == 0) && (isdirty == 0))
1124 		return;
1125 
1126 	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1127 	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1128 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1129 		return;
1130 
1131 	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1132 	    sblock.fs_clean == FSSTABLE ? "stable" :
1133 	    sblock.fs_clean == FSLOG ? "logging" :
1134 	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1135 	    devname);
1136 }
1137 
1138 /*
1139  * updateclean
1140  *	Carefully and transparently update the clean flag.
1141  *
1142  * `iscorrupt' has to be in its final state before this is called.
1143  */
1144 int
1145 updateclean(void)
1146 {
1147 	int freedlog = 0;
1148 	struct bufarea cleanbuf;
1149 	size_t size;
1150 	ssize_t io_res;
1151 	diskaddr_t bno;
1152 	char fsclean;
1153 	int fsreclaim;
1154 	char fsflags;
1155 	int flags_ok = 1;
1156 	daddr32_t fslogbno;
1157 	offset_t sblkoff;
1158 	time_t t;
1159 
1160 	/*
1161 	 * debug stuff
1162 	 */
1163 	debugclean();
1164 
1165 	/*
1166 	 * set fsclean to its appropriate value
1167 	 */
1168 	fslogbno = sblock.fs_logbno;
1169 	fsclean = sblock.fs_clean;
1170 	fsreclaim = sblock.fs_reclaim;
1171 	fsflags = sblock.fs_flags;
1172 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1173 		fsclean = FSACTIVE;
1174 	}
1175 	/*
1176 	 * If ufs log is not okay, note that we need to clear it.
1177 	 */
1178 	examinelog(NULL);
1179 	if (fslogbno && !(islog && islogok)) {
1180 		fsclean = FSACTIVE;
1181 		fslogbno = 0;
1182 	}
1183 
1184 	/*
1185 	 * if necessary, update fs_clean and fs_state
1186 	 */
1187 	switch (fsclean) {
1188 
1189 	case FSACTIVE:
1190 		if (!iscorrupt) {
1191 			fsclean = FSSTABLE;
1192 			fsreclaim = 0;
1193 		}
1194 		break;
1195 
1196 	case FSCLEAN:
1197 	case FSSTABLE:
1198 		if (iscorrupt) {
1199 			fsclean = FSACTIVE;
1200 		} else {
1201 			fsreclaim = 0;
1202 		}
1203 		break;
1204 
1205 	case FSLOG:
1206 		if (iscorrupt) {
1207 			fsclean = FSACTIVE;
1208 		} else if (!islog || fslogbno == 0) {
1209 			fsclean = FSSTABLE;
1210 			fsreclaim = 0;
1211 		} else if (fflag) {
1212 			fsreclaim = 0;
1213 		}
1214 		break;
1215 
1216 	case FSFIX:
1217 		fsclean = FSBAD;
1218 		if (errorlocked && !iscorrupt) {
1219 			fsclean = islog ? FSLOG : FSCLEAN;
1220 		}
1221 		break;
1222 
1223 	default:
1224 		if (iscorrupt) {
1225 			fsclean = FSACTIVE;
1226 		} else {
1227 			fsclean = FSSTABLE;
1228 			fsreclaim = 0;
1229 		}
1230 	}
1231 
1232 	if (largefile_count > 0)
1233 		fsflags |= FSLARGEFILES;
1234 	else
1235 		fsflags &= ~FSLARGEFILES;
1236 
1237 	/*
1238 	 * There can be two discrepencies here.  A) The superblock
1239 	 * shows no largefiles but we found some while scanning.
1240 	 * B) The superblock indicates the presence of largefiles,
1241 	 * but none are present.  Note that if preening, the superblock
1242 	 * is silently corrected.
1243 	 */
1244 	if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1245 	    (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1246 		flags_ok = 0;
1247 
1248 	if (debug)
1249 		(void) printf(
1250 		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1251 		    largefile_count, sblock.fs_flags, flags_ok);
1252 
1253 	/*
1254 	 * If fs is unchanged, do nothing.
1255 	 */
1256 	if ((!isdirty) && (flags_ok) &&
1257 	    (fslogbno == sblock.fs_logbno) &&
1258 	    (sblock.fs_clean == fsclean) &&
1259 	    (sblock.fs_reclaim == fsreclaim) &&
1260 	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1261 		if (errorlocked) {
1262 			if (!do_errorlock(LOCKFS_ULOCK))
1263 				pwarn(
1264 		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1265 		}
1266 		return (freedlog);
1267 	}
1268 
1269 	/*
1270 	 * if user allows, update superblock state
1271 	 */
1272 	if (debug) {
1273 		(void) printf(
1274 	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1275 		    sblock.fs_flags, sblock.fs_logbno,
1276 		    sblock.fs_clean, sblock.fs_reclaim,
1277 		    sblock.fs_state + sblock.fs_time);
1278 		(void) printf(
1279 	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1280 		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1281 	}
1282 	if (!isdirty && !preen && !rerun &&
1283 	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1284 		return (freedlog);
1285 
1286 	(void) time(&t);
1287 	sblock.fs_time = (time32_t)t;
1288 	if (debug)
1289 		printclean();
1290 
1291 	if (sblock.fs_logbno != fslogbno) {
1292 		examinelog(&freelogblk);
1293 		freedlog++;
1294 	}
1295 
1296 	sblock.fs_logbno = fslogbno;
1297 	sblock.fs_clean = fsclean;
1298 	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1299 	sblock.fs_reclaim = fsreclaim;
1300 	sblock.fs_flags = fsflags;
1301 
1302 	/*
1303 	 * if superblock can't be written, return
1304 	 */
1305 	if (fswritefd < 0)
1306 		return (freedlog);
1307 
1308 	/*
1309 	 * Read private copy of superblock, update clean flag, and write it.
1310 	 */
1311 	bno  = sblk.b_bno;
1312 	size = sblk.b_size;
1313 
1314 	sblkoff = ldbtob(bno);
1315 
1316 	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1317 		errexit("out of memory");
1318 	if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1319 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1320 		    (longlong_t)bno, strerror(errno));
1321 		goto out;
1322 	}
1323 
1324 	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1325 		report_io_prob("READ FROM", bno, size, io_res);
1326 		goto out;
1327 	}
1328 
1329 	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1330 	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1331 	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1332 	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1333 	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1334 	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1335 
1336 	if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1337 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1338 		    (longlong_t)bno, strerror(errno));
1339 		goto out;
1340 	}
1341 
1342 	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1343 		report_io_prob("WRITE TO", bno, size, io_res);
1344 		goto out;
1345 	}
1346 
1347 	/*
1348 	 * 1208040
1349 	 * If we had to use -b to grab an alternate superblock, then we
1350 	 * likely had to do so because of unacceptable differences between
1351 	 * the main and alternate superblocks.  So, we had better update
1352 	 * the alternate superblock as well, or we'll just fail again
1353 	 * the next time we attempt to run fsck!
1354 	 */
1355 	if (bflag != 0) {
1356 		write_altsb(fswritefd);
1357 	}
1358 
1359 	if (errorlocked) {
1360 		if (!do_errorlock(LOCKFS_ULOCK))
1361 			pwarn(
1362 		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1363 	}
1364 
1365 out:
1366 	if (cleanbuf.b_un.b_buf != NULL) {
1367 		free((void *)cleanbuf.b_un.b_buf);
1368 	}
1369 
1370 	return (freedlog);
1371 }
1372 
1373 static void
1374 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1375 {
1376 	if (failure < 0)
1377 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1378 		    what, (int)bno, strerror(errno));
1379 	else if (failure == 0)
1380 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1381 		    what, (int)bno);
1382 	else
1383 		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1384 		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1385 }
1386 
1387 /*
1388  * print out clean info
1389  */
1390 void
1391 printclean(void)
1392 {
1393 	caddr_t s;
1394 
1395 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1396 		s = "unknown";
1397 	else
1398 		switch (sblock.fs_clean) {
1399 
1400 		case FSACTIVE:
1401 			s = "active";
1402 			break;
1403 
1404 		case FSCLEAN:
1405 			s = "clean";
1406 			break;
1407 
1408 		case FSSTABLE:
1409 			s = "stable";
1410 			break;
1411 
1412 		case FSLOG:
1413 			s = "logging";
1414 			break;
1415 
1416 		case FSBAD:
1417 			s = "is bad";
1418 			break;
1419 
1420 		case FSFIX:
1421 			s = "being fixed";
1422 			break;
1423 
1424 		default:
1425 			s = "unknown";
1426 		}
1427 
1428 	if (preen)
1429 		pwarn("is %s.\n", s);
1430 	else
1431 		(void) printf("** %s is %s.\n", devname, s);
1432 }
1433 
1434 int
1435 is_errorlocked(caddr_t fs)
1436 {
1437 	int		retval;
1438 	struct stat64	statb;
1439 	caddr_t		mountp;
1440 	struct mnttab	*mntent;
1441 
1442 	retval = 0;
1443 
1444 	if (!fs)
1445 		return (0);
1446 
1447 	if (stat64(fs, &statb) < 0)
1448 		return (0);
1449 
1450 	if (S_ISDIR(statb.st_mode)) {
1451 		mountp = fs;
1452 	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1453 		mntent = search_mnttab(NULL, fs, NULL, 0);
1454 		if (mntent == NULL)
1455 			return (0);
1456 		mountp = mntent->mnt_mountp;
1457 		if (mountp == NULL) /* theoretically a can't-happen */
1458 			return (0);
1459 	} else {
1460 		return (0);
1461 	}
1462 
1463 	/*
1464 	 * From here on, must `goto out' to avoid memory leakage.
1465 	 */
1466 
1467 	if (elock_combuf == NULL)
1468 		elock_combuf =
1469 			(caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1470 	else
1471 		elock_combuf =
1472 			(caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1473 
1474 	if (elock_combuf == NULL)
1475 		goto out;
1476 
1477 	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1478 
1479 	if (elock_mountp != NULL) {
1480 		free(elock_mountp);
1481 	}
1482 
1483 	elock_mountp = strdup(mountp);
1484 	if (elock_mountp == NULL)
1485 		goto out;
1486 
1487 	if (mountfd < 0) {
1488 		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1489 			goto out;
1490 	}
1491 
1492 	if (lfp == NULL) {
1493 		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1494 		if (lfp == NULL)
1495 			goto out;
1496 		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1497 	}
1498 
1499 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1500 	lfp->lf_comment = elock_combuf;
1501 
1502 	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1503 		goto out;
1504 
1505 	/*
1506 	 * lint believes that the ioctl() (or any other function
1507 	 * taking lfp as an arg) could free lfp.  This is not the
1508 	 * case, however.
1509 	 */
1510 	retval = LOCKFS_IS_ELOCK(lfp);
1511 
1512 out:
1513 	return (retval);
1514 }
1515 
1516 /*
1517  * Given a name which is known to be a directory, see if it appears
1518  * in the vfstab.  If so, return the entry's block (special) device
1519  * field via devstr.
1520  */
1521 int
1522 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1523 {
1524 	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1525 }
1526 
1527 /*
1528  * Given a name which is known to be a directory, see if it appears
1529  * in the mnttab.  If so, return the entry's block (special) device
1530  * field via devstr.
1531  */
1532 int
1533 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1534 {
1535 	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1536 }
1537 
1538 /*
1539  * Search for mount point and/or special device in the given file.
1540  * The first matching entry is returned.
1541  *
1542  * If an entry is found and str_size is greater than zero, then
1543  * up to size_str bytes of the special device name from the entry
1544  * are copied to devstr.
1545  */
1546 
1547 #define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1548 			st_nuller, st_init, st_searcher) \
1549 	{ \
1550 		FILE *fp; \
1551 		struct st_type *retval = NULL; \
1552 		struct st_type key; \
1553 		static struct st_type buffer; \
1554 		\
1555 		/* LINTED ``assigned value never used'' */ \
1556 		st_nuller(&key); \
1557 		key.st_mount = mountp; \
1558 		key.st_special = special; \
1559 		st_init; \
1560 		\
1561 		if ((fp = fopen(st_file, "r")) == NULL) \
1562 			return (NULL); \
1563 		\
1564 		if (st_searcher(fp, &buffer, &key) == 0) { \
1565 			retval = &buffer; \
1566 			if (devstr != NULL && str_size > 0 && \
1567 			    buffer.st_special != NULL) { \
1568 				(void) strlcpy(devstr, buffer.st_special, \
1569 				    str_size); \
1570 			} \
1571 		} \
1572 		(void) fclose(fp); \
1573 		return (retval); \
1574 	}
1575 
1576 static struct vfstab *
1577 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1578 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1579 		(retval = retval), getvfsany)
1580 
1581 static struct mnttab *
1582 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1583 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1584 		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1585 
1586 int
1587 do_errorlock(int lock_type)
1588 {
1589 	caddr_t	   buf;
1590 	time_t	   now;
1591 	struct tm *local;
1592 	int	   rc;
1593 
1594 	if (elock_combuf == NULL)
1595 		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1596 			elock_mountp ? elock_mountp : "<null>",
1597 			lock_type);
1598 
1599 	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1600 	    NULL) {
1601 		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1602 	}
1603 	if (lfp == NULL) {
1604 		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1605 					elock_mountp, lock_type);
1606 	}
1607 
1608 	(void) memmove((void *)buf, (void *)elock_combuf,
1609 	    LOCKFS_MAXCOMMENTLEN-1);
1610 
1611 	switch (lock_type) {
1612 	case LOCKFS_ELOCK:
1613 		/*
1614 		 * Note that if it is error-locked, we won't get an
1615 		 * error back if we try to error-lock it again.
1616 		 */
1617 		if (time(&now) != (time_t)-1) {
1618 			if ((local = localtime(&now)) != NULL)
1619 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1620 		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1621 				    elock_combuf, (int)pid,
1622 				    local->tm_mon + 1, local->tm_mday,
1623 				    (local->tm_year % 100), local->tm_hour,
1624 				    local->tm_min, local->tm_sec);
1625 			else
1626 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1627 				    "%s [fsck pid %d", elock_combuf, pid);
1628 
1629 		} else {
1630 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1631 			    "%s [fsck pid %d", elock_combuf, pid);
1632 		}
1633 		break;
1634 
1635 	case LOCKFS_ULOCK:
1636 		if (time(&now) != (time_t)-1) {
1637 			if ((local = localtime(&now)) != NULL) {
1638 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1639 				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1640 				    elock_combuf,
1641 				    local->tm_mon + 1, local->tm_mday,
1642 				    (local->tm_year % 100), local->tm_hour,
1643 				    local->tm_min, local->tm_sec);
1644 			} else {
1645 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1646 				    "%s]", elock_combuf);
1647 			}
1648 		} else {
1649 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1650 			    "%s]", elock_combuf);
1651 		}
1652 		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1653 			pwarn("do_errorlock: unlock failed: %s\n",
1654 			    strerror(errno));
1655 			goto out;
1656 		}
1657 		break;
1658 
1659 	default:
1660 		break;
1661 	}
1662 
1663 	(void) memmove((void *)elock_combuf, (void *)buf,
1664 	    LOCKFS_MAXCOMMENTLEN - 1);
1665 
1666 	lfp->lf_lock = lock_type;
1667 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1668 	lfp->lf_comment = elock_combuf;
1669 	lfp->lf_flags = 0;
1670 	errno = 0;
1671 
1672 	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1673 		if (errno == EINVAL) {
1674 			pwarn("Another fsck active?\n");
1675 			iscorrupt = 0;	/* don't go away mad, just go away */
1676 		} else {
1677 			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1678 			    lock_type, elock_combuf, strerror(errno));
1679 		}
1680 	}
1681 out:
1682 	if (buf != NULL) {
1683 		free((void *)buf);
1684 	}
1685 
1686 	return (rc != -1);
1687 }
1688 
1689 /*
1690  * Shadow inode support.  To register a shadow with a client is to note
1691  * that an inode (the client) refers to the shadow.
1692  */
1693 
1694 static struct shadowclients *
1695 newshadowclient(struct shadowclients *prev)
1696 {
1697 	struct shadowclients *rc;
1698 
1699 	rc = (struct shadowclients *)malloc(sizeof (*rc));
1700 	if (rc == NULL)
1701 		errexit("newshadowclient: cannot malloc shadow client");
1702 	rc->next = prev;
1703 	rc->nclients = 0;
1704 
1705 	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1706 	    maxshadowclients);
1707 	if (rc->client == NULL)
1708 		errexit("newshadowclient: cannot malloc client array");
1709 	return (rc);
1710 }
1711 
1712 void
1713 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1714 	struct shadowclientinfo **info)
1715 {
1716 	struct shadowclientinfo *sci;
1717 	struct shadowclients *scc;
1718 
1719 	/*
1720 	 * Already have a record for this shadow?
1721 	 */
1722 	for (sci = *info; sci != NULL; sci = sci->next)
1723 		if (sci->shadow == shadow)
1724 			break;
1725 	if (sci == NULL) {
1726 		/*
1727 		 * It's a new shadow, add it to the list
1728 		 */
1729 		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1730 		if (sci == NULL)
1731 			errexit("registershadowclient: cannot malloc");
1732 		sci->next = *info;
1733 		*info = sci;
1734 		sci->shadow = shadow;
1735 		sci->totalClients = 0;
1736 		sci->clients = newshadowclient(NULL);
1737 	}
1738 
1739 	sci->totalClients++;
1740 	scc = sci->clients;
1741 	if (scc->nclients >= maxshadowclients) {
1742 		scc = newshadowclient(sci->clients);
1743 		sci->clients = scc;
1744 	}
1745 
1746 	scc->client[scc->nclients++] = client;
1747 }
1748 
1749 /*
1750  * Locate and discard a shadow.
1751  */
1752 void
1753 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1754 {
1755 	struct shadowclientinfo *sci, *prev;
1756 
1757 	/*
1758 	 * Do we have a record for this shadow?
1759 	 */
1760 	prev = NULL;
1761 	for (sci = *info; sci != NULL; sci = sci->next) {
1762 		if (sci->shadow == shadow)
1763 			break;
1764 		prev = sci;
1765 	}
1766 
1767 	if (sci != NULL) {
1768 		/*
1769 		 * First, pull it off the list, since we know there
1770 		 * shouldn't be any future references to this one.
1771 		 */
1772 		if (prev == NULL)
1773 			*info = sci->next;
1774 		else
1775 			prev->next = sci->next;
1776 		deshadow(sci, clearattrref);
1777 	}
1778 }
1779 
1780 /*
1781  * Discard all memory used to track clients of a shadow.
1782  */
1783 void
1784 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1785 {
1786 	struct shadowclients *clients, *discard;
1787 	int idx;
1788 
1789 	clients = sci->clients;
1790 	while (clients != NULL) {
1791 		discard = clients;
1792 		clients = clients->next;
1793 		if (discard->client != NULL) {
1794 			if (cb != NULL) {
1795 				for (idx = 0; idx < discard->nclients; idx++)
1796 					(*cb)(discard->client[idx]);
1797 			}
1798 			free((void *)discard->client);
1799 		}
1800 		free((void *)discard);
1801 	}
1802 
1803 	free((void *)sci);
1804 }
1805 
1806 /*
1807  * Allocate more buffer as need arises but allocate one at a time.
1808  * This is done to make sure that fsck does not exit with error if it
1809  * needs more buffer to complete its task.
1810  */
1811 static struct bufarea *
1812 alloc_bufarea(void)
1813 {
1814 	struct bufarea *newbp;
1815 	caddr_t bufp;
1816 
1817 	bufp = malloc((unsigned int)sblock.fs_bsize);
1818 	if (bufp == NULL)
1819 		return (NULL);
1820 
1821 	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1822 	if (newbp == NULL) {
1823 		free((void *)bufp);
1824 		return (NULL);
1825 	}
1826 
1827 	initbarea(newbp);
1828 	newbp->b_un.b_buf = bufp;
1829 	newbp->b_prev = &bufhead;
1830 	newbp->b_next = bufhead.b_next;
1831 	bufhead.b_next->b_prev = newbp;
1832 	bufhead.b_next = newbp;
1833 	bufhead.b_size++;
1834 	return (newbp);
1835 }
1836 
1837 /*
1838  * We length-limit in both unrawname() and rawname() to avoid
1839  * overflowing our arrays or those of our naive, trusting callers.
1840  */
1841 
1842 caddr_t
1843 unrawname(caddr_t name)
1844 {
1845 	caddr_t dp;
1846 	static char fullname[MAXPATHLEN + 1];
1847 
1848 	if ((dp = getfullblkname(name)) == NULL)
1849 		return ("");
1850 
1851 	(void) strlcpy(fullname, dp, sizeof (fullname));
1852 	/*
1853 	 * Not reporting under debug, as the allocation isn't
1854 	 * reported by getfullblkname.  The idea is that we
1855 	 * produce balanced alloc/free instances.
1856 	 */
1857 	free(dp);
1858 
1859 	return (fullname);
1860 }
1861 
1862 caddr_t
1863 rawname(caddr_t name)
1864 {
1865 	caddr_t dp;
1866 	static char fullname[MAXPATHLEN + 1];
1867 
1868 	if ((dp = getfullrawname(name)) == NULL)
1869 		return ("");
1870 
1871 	(void) strlcpy(fullname, dp, sizeof (fullname));
1872 	/*
1873 	 * Not reporting under debug, as the allocation isn't
1874 	 * reported by getfullblkname.  The idea is that we
1875 	 * produce balanced alloc/free instances.
1876 	 */
1877 	free(dp);
1878 
1879 	return (fullname);
1880 }
1881 
1882 /*
1883  * Make sure that a cg header looks at least moderately reasonable.
1884  * We want to be able to trust the contents enough to be able to use
1885  * the standard accessor macros.  So, besides looking at the obvious
1886  * such as the magic number, we verify that the offset field values
1887  * are properly aligned and not too big or small.
1888  *
1889  * Returns a NULL pointer if the cg is sane enough for our needs, else
1890  * a dynamically-allocated string describing all of its faults.
1891  */
1892 #define	Append_Error(full, full_len, addition, addition_len) \
1893 	if (full == NULL) { \
1894 		full = addition; \
1895 		full_len = addition_len; \
1896 	} else { \
1897 		/* lint doesn't think realloc() understands NULLs */ \
1898 		full = realloc(full, full_len + addition_len + 1); \
1899 		if (full == NULL) { \
1900 			errexit("Out of memory in cg_sanity"); \
1901 			/* NOTREACHED */ \
1902 		} \
1903 		(void) strcpy(full + full_len, addition); \
1904 		full_len += addition_len; \
1905 		free(addition); \
1906 	}
1907 
1908 caddr_t
1909 cg_sanity(struct cg *cgp, int cgno)
1910 {
1911 	caddr_t full_err;
1912 	caddr_t this_err = NULL;
1913 	int full_len, this_len;
1914 	daddr32_t ndblk;
1915 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1916 	daddr32_t exp_freeoff, exp_nextfreeoff;
1917 
1918 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1919 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1920 
1921 	full_err = NULL;
1922 	full_len = 0;
1923 
1924 	if (!cg_chkmagic(cgp)) {
1925 		this_len = fsck_asprintf(&this_err,
1926 		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1927 		    cgp->cg_magic, CG_MAGIC);
1928 		Append_Error(full_err, full_len, this_err, this_len);
1929 	}
1930 
1931 	if (cgp->cg_cgx != cgno) {
1932 		this_len = fsck_asprintf(&this_err,
1933 		    "WRONG CG NUMBER (%d should be %d)\n",
1934 		    cgp->cg_cgx, cgno);
1935 		Append_Error(full_err, full_len, this_err, this_len);
1936 	}
1937 
1938 	if ((cgp->cg_btotoff & 3) != 0) {
1939 		this_len = fsck_asprintf(&this_err,
1940 		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1941 		    cgp->cg_btotoff);
1942 		Append_Error(full_err, full_len, this_err, this_len);
1943 	}
1944 
1945 	if ((cgp->cg_boff & 1) != 0) {
1946 		this_len = fsck_asprintf(&this_err,
1947 	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1948 		    cgp->cg_boff);
1949 		Append_Error(full_err, full_len, this_err, this_len);
1950 	}
1951 
1952 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1953 		if (cgp->cg_ncyl < 1) {
1954 			this_len = fsck_asprintf(&this_err,
1955 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1956 			    cgp->cg_ncyl);
1957 		} else {
1958 			this_len = fsck_asprintf(&this_err,
1959 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1960 			    cgp->cg_ncyl, sblock.fs_cpg);
1961 		}
1962 		Append_Error(full_err, full_len, this_err, this_len);
1963 	}
1964 
1965 	if (cgp->cg_niblk != sblock.fs_ipg) {
1966 		this_len = fsck_asprintf(&this_err,
1967 		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1968 		    cgp->cg_niblk, sblock.fs_ipg);
1969 		Append_Error(full_err, full_len, this_err, this_len);
1970 	}
1971 
1972 	if (cgp->cg_ndblk != ndblk) {
1973 		this_len = fsck_asprintf(&this_err,
1974 	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1975 		    cgp->cg_ndblk, ndblk);
1976 		Append_Error(full_err, full_len, this_err, this_len);
1977 	}
1978 
1979 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1980 		this_len = fsck_asprintf(&this_err,
1981 		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1982 		    "(%d should be at least 0 and less than %d)\n",
1983 		    cgp->cg_rotor, ndblk);
1984 		Append_Error(full_err, full_len, this_err, this_len);
1985 	}
1986 
1987 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1988 		this_len = fsck_asprintf(&this_err,
1989 		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1990 		    "(%d should be at least 0 and less than %d)\n",
1991 		    cgp->cg_frotor, ndblk);
1992 		Append_Error(full_err, full_len, this_err, this_len);
1993 	}
1994 
1995 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1996 		this_len = fsck_asprintf(&this_err,
1997 		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1998 		    "(%d should be at least 0 and less than %d)\n",
1999 		    cgp->cg_irotor, sblock.fs_ipg);
2000 		Append_Error(full_err, full_len, this_err, this_len);
2001 	}
2002 
2003 	if (cgp->cg_btotoff != exp_btotoff) {
2004 		this_len = fsck_asprintf(&this_err,
2005 		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2006 		    cgp->cg_btotoff, exp_btotoff);
2007 		Append_Error(full_err, full_len, this_err, this_len);
2008 	}
2009 
2010 	if (cgp->cg_boff != exp_boff) {
2011 		this_len = fsck_asprintf(&this_err,
2012 		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2013 		    cgp->cg_boff, exp_boff);
2014 		Append_Error(full_err, full_len, this_err, this_len);
2015 	}
2016 
2017 	if (cgp->cg_iusedoff != exp_iusedoff) {
2018 		this_len = fsck_asprintf(&this_err,
2019 		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2020 		    cgp->cg_iusedoff, exp_iusedoff);
2021 		Append_Error(full_err, full_len, this_err, this_len);
2022 	}
2023 
2024 	if (cgp->cg_freeoff != exp_freeoff) {
2025 		this_len = fsck_asprintf(&this_err,
2026 		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2027 		    cgp->cg_freeoff, exp_freeoff);
2028 		Append_Error(full_err, full_len, this_err, this_len);
2029 	}
2030 
2031 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2032 		this_len = fsck_asprintf(&this_err,
2033 		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2034 		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2035 		Append_Error(full_err, full_len, this_err, this_len);
2036 	}
2037 
2038 	return (full_err);
2039 }
2040 
2041 #undef	Append_Error
2042 
2043 /*
2044  * This is taken from mkfs, and is what is used to come up with the
2045  * original values for a struct cg.  This implies that, since these
2046  * are all constants, recalculating them now should give us the same
2047  * thing as what's on disk.
2048  */
2049 static void
2050 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2051 	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2052 	daddr32_t *ndblk)
2053 {
2054 	daddr32_t cbase, dmax;
2055 	struct cg *cgp;
2056 
2057 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2058 	    (size_t)sblock.fs_cgsize);
2059 	cgp = cgblk.b_un.b_cg;
2060 
2061 	cbase = cgbase(&sblock, cgno);
2062 	dmax = cbase + sblock.fs_fpg;
2063 	if (dmax > sblock.fs_size)
2064 		dmax = sblock.fs_size;
2065 
2066 	/* LINTED pointer difference won't overflow */
2067 	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2068 	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2069 	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2070 	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2071 	*nextfreeoff = *freeoff +
2072 		howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2073 	*ndblk = dmax - cbase;
2074 }
2075 
2076 /*
2077  * Corrects all fields in the cg that can be done with the available
2078  * redundant data.
2079  */
2080 void
2081 fix_cg(struct cg *cgp, int cgno)
2082 {
2083 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2084 	daddr32_t exp_freeoff, exp_nextfreeoff;
2085 	daddr32_t ndblk;
2086 
2087 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2088 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2089 
2090 	if (cgp->cg_cgx != cgno) {
2091 		cgp->cg_cgx = cgno;
2092 	}
2093 
2094 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2095 		if (cgno == (sblock.fs_ncg - 1)) {
2096 			cgp->cg_ncyl = sblock.fs_ncyl -
2097 				(sblock.fs_cpg * cgno);
2098 		} else {
2099 			cgp->cg_ncyl = sblock.fs_cpg;
2100 		}
2101 	}
2102 
2103 	if (cgp->cg_niblk != sblock.fs_ipg) {
2104 		/*
2105 		 * This is not used by the kernel, so it's pretty
2106 		 * harmless if it's wrong.
2107 		 */
2108 		cgp->cg_niblk = sblock.fs_ipg;
2109 	}
2110 
2111 	if (cgp->cg_ndblk != ndblk) {
2112 		cgp->cg_ndblk = ndblk;
2113 	}
2114 
2115 	/*
2116 	 * For the rotors, any position's valid, so pick the one we know
2117 	 * will always exist.
2118 	 */
2119 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2120 		cgp->cg_rotor = 0;
2121 	}
2122 
2123 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2124 		cgp->cg_frotor = 0;
2125 	}
2126 
2127 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2128 		cgp->cg_irotor = 0;
2129 	}
2130 
2131 	/*
2132 	 * For btotoff and boff, if they're misaligned they won't
2133 	 * match the expected values, so we're catching both cases
2134 	 * here.  Of course, if any of these are off, it seems likely
2135 	 * that the tables really won't be where we calculate they
2136 	 * should be anyway.
2137 	 */
2138 	if (cgp->cg_btotoff != exp_btotoff) {
2139 		cgp->cg_btotoff = exp_btotoff;
2140 	}
2141 
2142 	if (cgp->cg_boff != exp_boff) {
2143 		cgp->cg_boff = exp_boff;
2144 	}
2145 
2146 	if (cgp->cg_iusedoff != exp_iusedoff) {
2147 		cgp->cg_iusedoff = exp_iusedoff;
2148 	}
2149 
2150 	if (cgp->cg_freeoff != exp_freeoff) {
2151 		cgp->cg_freeoff = exp_freeoff;
2152 	}
2153 
2154 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2155 		cgp->cg_nextfreeoff = exp_nextfreeoff;
2156 	}
2157 
2158 	/*
2159 	 * Reset the magic, as we've recreated this cg, also
2160 	 * update the cg_time, as we're writing out the cg
2161 	 */
2162 	cgp->cg_magic = CG_MAGIC;
2163 	cgp->cg_time = time(NULL);
2164 
2165 	/*
2166 	 * We know there was at least one correctable problem,
2167 	 * or else we wouldn't have been called.  So instead of
2168 	 * marking the buffer dirty N times above, just do it
2169 	 * once here.
2170 	 */
2171 	cgdirty();
2172 }
2173 
2174 void
2175 examinelog(void (*cb)(daddr32_t))
2176 {
2177 	struct bufarea *bp;
2178 	extent_block_t *ebp;
2179 	extent_t *ep;
2180 	daddr32_t nfno, fno;
2181 	int i;
2182 	int j;
2183 
2184 	/*
2185 	 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2186 	 * we need to translate accordingly using logbtodb()
2187 	 */
2188 
2189 	if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2190 		if (debug) {
2191 			(void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2192 			    "Aborting log examination\n", \
2193 			    logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2194 		}
2195 		return;
2196 	}
2197 
2198 	/*
2199 	 * Read errors will return zeros, which will cause us
2200 	 * to do nothing harmful, so don't need to handle it.
2201 	 */
2202 	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2203 			(size_t)sblock.fs_bsize);
2204 	ebp = (void *)bp->b_un.b_buf;
2205 
2206 	/*
2207 	 * Does it look like a log allocation table?
2208 	 */
2209 	/* LINTED pointer cast is aligned */
2210 	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2211 	    sblock.fs_bsize))
2212 		return;
2213 	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2214 		return;
2215 
2216 	ep = &ebp->extents[0];
2217 	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2218 		fno = logbtofrag(&sblock, ep->pbno);
2219 		nfno = dbtofsb(&sblock, ep->nbno);
2220 		for (j = 0; j < nfno; ++j, ++fno) {
2221 			/*
2222 			 * Invoke the callback first, so that pass1 can
2223 			 * mark the log blocks in-use.  Then, if any
2224 			 * subsequent pass over the log shows us that a
2225 			 * block got freed (say, it was also claimed by
2226 			 * an inode that we cleared), we can safely declare
2227 			 * the log bad.
2228 			 */
2229 			if (cb != NULL)
2230 				(*cb)(fno);
2231 			if (!testbmap(fno))
2232 				islogok = 0;
2233 		}
2234 	}
2235 	brelse(bp);
2236 
2237 	if (cb != NULL) {
2238 		fno = logbtofrag(&sblock, sblock.fs_logbno);
2239 		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2240 			(*cb)(fno);
2241 	}
2242 }
2243 
2244 static void
2245 freelogblk(daddr32_t frag)
2246 {
2247 	freeblk(sblock.fs_logbno, frag, 1);
2248 }
2249 
2250 caddr_t
2251 file_id(fsck_ino_t inum, mode_t mode)
2252 {
2253 	static char name[MAXPATHLEN + 1];
2254 
2255 	if (lfdir == inum) {
2256 		return (lfname);
2257 	}
2258 
2259 	if ((mode & IFMT) == IFDIR) {
2260 		(void) strcpy(name, "DIR");
2261 	} else if ((mode & IFMT) == IFATTRDIR) {
2262 		(void) strcpy(name, "ATTR DIR");
2263 	} else if ((mode & IFMT) == IFSHAD) {
2264 		(void) strcpy(name, "ACL");
2265 	} else {
2266 		(void) strcpy(name, "FILE");
2267 	}
2268 
2269 	return (name);
2270 }
2271 
2272 /*
2273  * Simple initializer for inodesc structures, so users of only a few
2274  * fields don't have to worry about getting the right defaults for
2275  * everything out.
2276  */
2277 void
2278 init_inodesc(struct inodesc *idesc)
2279 {
2280 	/*
2281 	 * Most fields should be zero, just hit the special cases.
2282 	 */
2283 	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2284 	idesc->id_fix = DONTKNOW;
2285 	idesc->id_lbn = -1;
2286 	idesc->id_truncto = -1;
2287 	idesc->id_firsthole = -1;
2288 }
2289 
2290 /*
2291  * Compare routine for tsearch(C) to use on ino_t instances.
2292  */
2293 int
2294 ino_t_cmp(const void *left, const void *right)
2295 {
2296 	const fsck_ino_t lino = (const fsck_ino_t)left;
2297 	const fsck_ino_t rino = (const fsck_ino_t)right;
2298 
2299 	return (lino - rino);
2300 }
2301 
2302 int
2303 cgisdirty(void)
2304 {
2305 	return (cgblk.b_dirty);
2306 }
2307 
2308 void
2309 cgflush(void)
2310 {
2311 	flush(fswritefd, &cgblk);
2312 }
2313 
2314 void
2315 dirty(struct bufarea *bp)
2316 {
2317 	if (fswritefd < 0) {
2318 		pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n");
2319 	} else {
2320 		(bp)->b_dirty = 1;
2321 		isdirty = 1;
2322 	}
2323 }
2324 
2325 void
2326 initbarea(struct bufarea *bp)
2327 {
2328 	(bp)->b_dirty = 0;
2329 	(bp)->b_bno = (diskaddr_t)-1LL;
2330 	(bp)->b_flags = 0;
2331 	(bp)->b_cnt = 0;
2332 	(bp)->b_errs = 0;
2333 }
2334 
2335 /*
2336  * Partition-sizing routines adapted from ../newfs/newfs.c.
2337  * Needed because calcsb() needs to use mkfs to work out what the
2338  * superblock should be, and mkfs insists on being told how many
2339  * sectors to use.
2340  *
2341  * Error handling assumes we're never called while preening.
2342  *
2343  * XXX This should be extracted into a ../ufslib.{c,h},
2344  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2345  *     done, both fsck and newfs should be modified to link
2346  *     against it.
2347  */
2348 
2349 static int label_type;
2350 
2351 #define	LABEL_TYPE_VTOC		1
2352 #define	LABEL_TYPE_EFI		2
2353 #define	LABEL_TYPE_OTHER	3
2354 
2355 #define	MB			(1024 * 1024)
2356 #define	SECTORS_PER_TERABYTE	(1LL << 31)
2357 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2358 
2359 diskaddr_t
2360 getdisksize(caddr_t disk, int fd)
2361 {
2362 	int rpm;
2363 	struct dk_geom g;
2364 	struct dk_cinfo ci;
2365 	diskaddr_t actual_size;
2366 
2367 	/*
2368 	 * get_device_size() determines the actual size of the
2369 	 * device, and also the disk's attributes, such as geometry.
2370 	 */
2371 	actual_size = get_device_size(fd, disk);
2372 
2373 	if (label_type == LABEL_TYPE_VTOC) {
2374 		if (ioctl(fd, DKIOCGGEOM, &g)) {
2375 			pwarn("%s: Unable to read Disk geometry", disk);
2376 			return (0);
2377 		}
2378 		if (sblock.fs_nsect == 0)
2379 			sblock.fs_nsect = g.dkg_nsect;
2380 		if (sblock.fs_ntrak == 0)
2381 			sblock.fs_ntrak = g.dkg_nhead;
2382 		if (sblock.fs_rps == 0) {
2383 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2384 			sblock.fs_rps = rpm / 60;
2385 		}
2386 	}
2387 
2388 	if (sblock.fs_bsize == 0)
2389 		sblock.fs_bsize = MAXBSIZE;
2390 
2391 	/*
2392 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2393 	 * information is not available, default to the min of a MB and
2394 	 * maxphys.
2395 	 */
2396 	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2397 		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2398 		if (sblock.fs_maxcontig < 0) {
2399 			int gotit, maxphys;
2400 
2401 			gotit = fsgetmaxphys(&maxphys, NULL);
2402 
2403 			/*
2404 			 * If we cannot get the maxphys value, default
2405 			 * to ufs_maxmaxphys (MB).
2406 			 */
2407 			if (gotit) {
2408 				sblock.fs_maxcontig = MIN(maxphys, MB);
2409 			} else {
2410 				sblock.fs_maxcontig = MB;
2411 			}
2412 		}
2413 		sblock.fs_maxcontig /= sblock.fs_bsize;
2414 	}
2415 
2416 	return (actual_size);
2417 }
2418 
2419 /*
2420  * Figure out how big the partition we're dealing with is.
2421  */
2422 static diskaddr_t
2423 get_device_size(int fd, caddr_t name)
2424 {
2425 	struct vtoc vtoc;
2426 	struct dk_gpt *efi_vtoc;
2427 	diskaddr_t slicesize = 0;
2428 
2429 	int index = read_vtoc(fd, &vtoc);
2430 
2431 	if (index >= 0) {
2432 		label_type = LABEL_TYPE_VTOC;
2433 	} else {
2434 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2435 			/* it might be an EFI label */
2436 			index = efi_alloc_and_read(fd, &efi_vtoc);
2437 			if (index >= 0)
2438 				label_type = LABEL_TYPE_EFI;
2439 		}
2440 	}
2441 
2442 	if (index < 0) {
2443 		/*
2444 		 * Since both attempts to read the label failed, we're
2445 		 * going to fall back to a brute force approach to
2446 		 * determining the device's size:  see how far out we can
2447 		 * perform reads on the device.
2448 		 */
2449 
2450 		slicesize = brute_force_get_device_size(fd);
2451 		if (slicesize == 0) {
2452 			switch (index) {
2453 			case VT_ERROR:
2454 				pwarn("%s: %s\n", name, strerror(errno));
2455 				break;
2456 			case VT_EIO:
2457 				pwarn("%s: I/O error accessing VTOC", name);
2458 				break;
2459 			case VT_EINVAL:
2460 				pwarn("%s: Invalid field in VTOC", name);
2461 				break;
2462 			default:
2463 				pwarn("%s: unknown error %d accessing VTOC",
2464 				    name, index);
2465 				break;
2466 			}
2467 			return (0);
2468 		} else {
2469 			label_type = LABEL_TYPE_OTHER;
2470 		}
2471 	}
2472 
2473 	if (label_type == LABEL_TYPE_EFI) {
2474 		slicesize = efi_vtoc->efi_parts[index].p_size;
2475 		efi_free(efi_vtoc);
2476 	} else if (label_type == LABEL_TYPE_VTOC) {
2477 		/*
2478 		 * In the vtoc struct, p_size is a 32-bit signed quantity.
2479 		 * In the dk_gpt struct (efi's version of the vtoc), p_size
2480 		 * is an unsigned 64-bit quantity.  By casting the vtoc's
2481 		 * psize to an unsigned 32-bit quantity, it will be copied
2482 		 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without
2483 		 * sign extension.
2484 		 */
2485 
2486 		slicesize = (uint32_t)vtoc.v_part[index].p_size;
2487 	}
2488 
2489 	return (slicesize);
2490 }
2491 
2492 /*
2493  * brute_force_get_device_size
2494  *
2495  * Determine the size of the device by seeing how far we can
2496  * read.  Doing an llseek( , , SEEK_END) would probably work
2497  * in most cases, but we've seen at least one third-party driver
2498  * which doesn't correctly support the SEEK_END option when the
2499  * the device is greater than a terabyte.
2500  */
2501 
2502 static diskaddr_t
2503 brute_force_get_device_size(int fd)
2504 {
2505 	diskaddr_t	min_fail = 0;
2506 	diskaddr_t	max_succeed = 0;
2507 	diskaddr_t	cur_db_off;
2508 	char 		buf[DEV_BSIZE];
2509 
2510 	/*
2511 	 * First, see if we can read the device at all, just to
2512 	 * eliminate errors that have nothing to do with the
2513 	 * device's size.
2514 	 */
2515 
2516 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2517 	    ((read(fd, buf, DEV_BSIZE)) == -1))
2518 		return (0);  /* can't determine size */
2519 
2520 	/*
2521 	 * Now, go sequentially through the multiples of 4TB
2522 	 * to find the first read that fails (this isn't strictly
2523 	 * the most efficient way to find the actual size if the
2524 	 * size really could be anything between 0 and 2**64 bytes.
2525 	 * We expect the sizes to be less than 16 TB for some time,
2526 	 * so why do a bunch of reads that are larger than that?
2527 	 * However, this algorithm *will* work for sizes of greater
2528 	 * than 16 TB.  We're just not optimizing for those sizes.)
2529 	 */
2530 
2531 	/*
2532 	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2533 	 * We're using > 32-bit constants here.  Therefore, its flow
2534 	 * analysis is wrong.  For the time being, ignore complaints
2535 	 * from it about the body of the for() being unreached.
2536 	 */
2537 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2538 	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2539 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2540 		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2541 		    SEEK_SET) == -1) ||
2542 		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2543 			min_fail = cur_db_off;
2544 		else
2545 			max_succeed = cur_db_off;
2546 	}
2547 
2548 	/*
2549 	 * XXX Same lint flow analysis problem as above.
2550 	 */
2551 	if (min_fail == 0)
2552 		return (0);
2553 
2554 	/*
2555 	 * We now know that the size of the device is less than
2556 	 * min_fail and greater than or equal to max_succeed.  Now
2557 	 * keep splitting the difference until the actual size in
2558 	 * sectors in known.  We also know that the difference
2559 	 * between max_succeed and min_fail at this time is
2560 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2561 	 * simplifies the math below.
2562 	 */
2563 
2564 	while (min_fail - max_succeed > 1) {
2565 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2566 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2567 		    SEEK_SET)) == -1) ||
2568 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2569 			min_fail = cur_db_off;
2570 		else
2571 			max_succeed = cur_db_off;
2572 	}
2573 
2574 	/* the size is the last successfully read sector offset plus one */
2575 	return (max_succeed + 1);
2576 }
2577 
2578 static void
2579 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2580 {
2581 	struct dinode *dp;
2582 	char pathbuf[MAXPATHLEN + 1];
2583 
2584 	vpwarn(fmt, ap);
2585 	(void) putchar(' ');
2586 	pinode(ino);
2587 	(void) printf("\n");
2588 	getpathname(pathbuf, cwd, ino);
2589 	if (ino < UFSROOTINO || ino > maxino) {
2590 		pfatal("NAME=%s\n", pathbuf);
2591 		return;
2592 	}
2593 	dp = ginode(ino);
2594 	if (ftypeok(dp))
2595 		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2596 	else
2597 		pfatal("NAME=%s\n", pathbuf);
2598 }
2599 
2600 void
2601 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2602 {
2603 	va_list ap;
2604 
2605 	va_start(ap, fmt);
2606 	vfileerror(ino, ino, fmt, ap);
2607 	va_end(ap);
2608 }
2609 
2610 static void
2611 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2612 {
2613 	vfileerror(ino, ino, fmt, ap);
2614 }
2615 
2616 void
2617 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2618 {
2619 	va_list ap;
2620 
2621 	va_start(ap, fmt);
2622 	vfileerror(cwd, ino, fmt, ap);
2623 	va_end(ap);
2624 }
2625 
2626 /*
2627  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2628  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2629  * entry.
2630  *
2631  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2632  * meaning it's effectively an orphan.  It needs to be noted now, so
2633  * it will be remembered in pass 4.
2634  */
2635 
2636 void
2637 add_orphan_dir(fsck_ino_t ino)
2638 {
2639 	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2640 		errexit("add_orphan_dir: out of memory");
2641 }
2642 
2643 /*
2644  * Remove an inode from the orphaned-directories list, presumably
2645  * because it's been cleared.
2646  */
2647 void
2648 remove_orphan_dir(fsck_ino_t ino)
2649 {
2650 	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2651 }
2652 
2653 /*
2654  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2655  * and lufs.c:checksum().
2656  */
2657 static void
2658 log_setsum(int32_t *sp, int32_t *lp, int nb)
2659 {
2660 	int32_t csum = 0;
2661 
2662 	*sp = 0;
2663 	nb /= sizeof (int32_t);
2664 	while (nb--)
2665 		csum += *lp++;
2666 	*sp = csum;
2667 }
2668 
2669 static int
2670 log_checksum(int32_t *sp, int32_t *lp, int nb)
2671 {
2672 	int32_t ssum = *sp;
2673 
2674 	log_setsum(sp, lp, nb);
2675 	if (ssum != *sp) {
2676 		*sp = ssum;
2677 		return (0);
2678 	}
2679 	return (1);
2680 }
2681