xref: /titanic_50/usr/src/cmd/fs.d/ufs/fsck/utilities.c (revision 921e7e07108d1e3f09fecb1805fa2c79bb584fed)
1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms are permitted
14  * provided that: (1) source distributions retain this entire copyright
15  * notice and comment, and (2) distributions including binaries display
16  * the following acknowledgement:  ``This product includes software
17  * developed by the University of California, Berkeley and its contributors''
18  * in the documentation or other materials provided with the distribution
19  * and in all advertising materials mentioning features or use of this
20  * software. Neither the name of the University nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <stdarg.h>
34 #include <libadm.h>
35 #include <note.h>
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/mntent.h>
39 #include <sys/filio.h>
40 #include <sys/fs/ufs_fs.h>
41 #include <sys/vnode.h>
42 #include <sys/fs/ufs_acl.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_log.h>
45 #define	_KERNEL
46 #include <sys/fs/ufs_fsdir.h>
47 #undef _KERNEL
48 #include <sys/mnttab.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include <signal.h>
53 #include <string.h>
54 #include <ctype.h>
55 #include <sys/vfstab.h>
56 #include <sys/lockfs.h>
57 #include <errno.h>
58 #include <sys/cmn_err.h>
59 #include <sys/dkio.h>
60 #include <sys/vtoc.h>
61 #include <sys/efi_partition.h>
62 #include <fslib.h>
63 #include <inttypes.h>
64 #include "fsck.h"
65 
66 caddr_t mount_point = NULL;
67 
68 static int64_t diskreads, totalreads;	/* Disk cache statistics */
69 
70 static int log_checksum(int32_t *, int32_t *, int);
71 static void vdirerror(fsck_ino_t, caddr_t, va_list);
72 static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
73 static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
74 static void vpwarn(caddr_t, va_list);
75 static int getline(FILE *, caddr_t, int);
76 static struct bufarea *alloc_bufarea(void);
77 static void rwerror(caddr_t, diskaddr_t, int rval);
78 static void debugclean(void);
79 static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
80 static void freelogblk(daddr32_t);
81 static void verrexit(caddr_t, va_list);
82 static void vpfatal(caddr_t, va_list);
83 static diskaddr_t get_device_size(int, caddr_t);
84 static diskaddr_t brute_force_get_device_size(int);
85 static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
86 	    daddr32_t *, daddr32_t *, daddr32_t *);
87 
88 int
89 ftypeok(struct dinode *dp)
90 {
91 	switch (dp->di_mode & IFMT) {
92 
93 	case IFDIR:
94 	case IFREG:
95 	case IFBLK:
96 	case IFCHR:
97 	case IFLNK:
98 	case IFSOCK:
99 	case IFIFO:
100 	case IFSHAD:
101 	case IFATTRDIR:
102 		return (1);
103 
104 	default:
105 		if (debug)
106 			(void) printf("bad file type 0%o\n", dp->di_mode);
107 		return (0);
108 	}
109 }
110 
111 int
112 acltypeok(struct dinode *dp)
113 {
114 	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
115 		return (1);
116 
117 	if (debug)
118 		(void) printf("bad file type for acl I=%d: 0%o\n",
119 		    dp->di_shadow, dp->di_mode);
120 	return (0);
121 }
122 
123 NOTE(PRINTFLIKE(1))
124 int
125 reply(caddr_t fmt, ...)
126 {
127 	va_list ap;
128 	char line[80];
129 
130 	if (preen)
131 		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
132 
133 	if (mflag) {
134 		/*
135 		 * We don't know what's going on, so don't potentially
136 		 * make things worse by having errexit() write stuff
137 		 * out to disk.
138 		 */
139 		(void) printf(
140 		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
141 		    devname);
142 		exit(EXERRFATAL);
143 	}
144 
145 	va_start(ap, fmt);
146 	(void) putchar('\n');
147 	(void) vprintf(fmt, ap);
148 	(void) putchar('?');
149 	(void) putchar(' ');
150 	va_end(ap);
151 
152 	if (nflag || fswritefd < 0) {
153 		(void) printf(" no\n\n");
154 		return (0);
155 	}
156 	if (yflag) {
157 		(void) printf(" yes\n\n");
158 		return (1);
159 	}
160 	(void) fflush(stdout);
161 	if (getline(stdin, line, sizeof (line)) == EOF)
162 		errexit("\n");
163 	(void) printf("\n");
164 	if (line[0] == 'y' || line[0] == 'Y') {
165 		return (1);
166 	} else {
167 		return (0);
168 	}
169 }
170 
171 int
172 getline(FILE *fp, caddr_t loc, int maxlen)
173 {
174 	int n;
175 	caddr_t p, lastloc;
176 
177 	p = loc;
178 	lastloc = &p[maxlen-1];
179 	while ((n = getc(fp)) != '\n') {
180 		if (n == EOF)
181 			return (EOF);
182 		if (!isspace(n) && p < lastloc)
183 			*p++ = (char)n;
184 	}
185 	*p = '\0';
186 	/* LINTED pointer difference won't overflow */
187 	return (p - loc);
188 }
189 
190 /*
191  * Malloc buffers and set up cache.
192  */
193 void
194 bufinit(void)
195 {
196 	struct bufarea *bp;
197 	int bufcnt, i;
198 	caddr_t bufp;
199 
200 	bufp = malloc((size_t)sblock.fs_bsize);
201 	if (bufp == NULL)
202 		goto nomem;
203 	initbarea(&cgblk);
204 	cgblk.b_un.b_buf = bufp;
205 	bufhead.b_next = bufhead.b_prev = &bufhead;
206 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
207 	if (bufcnt < MINBUFS)
208 		bufcnt = MINBUFS;
209 	for (i = 0; i < bufcnt; i++) {
210 		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
211 		if (bp == NULL) {
212 			if (i >= MINBUFS)
213 				goto noalloc;
214 			goto nomem;
215 		}
216 
217 		bufp = malloc((size_t)sblock.fs_bsize);
218 		if (bufp == NULL) {
219 			free((void *)bp);
220 			if (i >= MINBUFS)
221 				goto noalloc;
222 			goto nomem;
223 		}
224 		initbarea(bp);
225 		bp->b_un.b_buf = bufp;
226 		bp->b_prev = &bufhead;
227 		bp->b_next = bufhead.b_next;
228 		bufhead.b_next->b_prev = bp;
229 		bufhead.b_next = bp;
230 	}
231 noalloc:
232 	bufhead.b_size = i;	/* save number of buffers */
233 	pbp = pdirbp = NULL;
234 	return;
235 
236 nomem:
237 	errexit("cannot allocate buffer pool\n");
238 	/* NOTREACHED */
239 }
240 
241 /*
242  * Undo a bufinit().
243  */
244 void
245 unbufinit(void)
246 {
247 	int cnt;
248 	struct bufarea *bp, *nbp;
249 
250 	cnt = 0;
251 	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
252 		cnt++;
253 		flush(fswritefd, bp);
254 		nbp = bp->b_prev;
255 		/*
256 		 * We're discarding the entire chain, so this isn't
257 		 * technically necessary.  However, it doesn't hurt
258 		 * and lint's data flow analysis is much happier
259 		 * (this prevents it from thinking there's a chance
260 		 * of our using memory elsewhere after it's been released).
261 		 */
262 		nbp->b_next = bp->b_next;
263 		bp->b_next->b_prev = nbp;
264 		free((void *)bp->b_un.b_buf);
265 		free((void *)bp);
266 	}
267 
268 	if (bufhead.b_size != cnt)
269 		errexit("Panic: cache lost %d buffers\n",
270 			bufhead.b_size - cnt);
271 }
272 
273 /*
274  * Manage a cache of directory blocks.
275  */
276 struct bufarea *
277 getdatablk(daddr32_t blkno, size_t size)
278 {
279 	struct bufarea *bp;
280 
281 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
282 		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
283 			goto foundit;
284 		}
285 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
286 		if ((bp->b_flags & B_INUSE) == 0)
287 			break;
288 	if (bp == &bufhead) {
289 		bp = alloc_bufarea();
290 		if (bp == NULL) {
291 			errexit("deadlocked buffer pool\n");
292 			/* NOTREACHED */
293 		}
294 	}
295 	/*
296 	 * We're at the same logical level as getblk(), so if there
297 	 * are any errors, we'll let our caller handle them.
298 	 */
299 	diskreads++;
300 	(void) getblk(bp, blkno, size);
301 
302 foundit:
303 	totalreads++;
304 	bp->b_cnt++;
305 	/*
306 	 * Move the buffer to head of linked list if it isn't
307 	 * already there.
308 	 */
309 	if (bufhead.b_next != bp) {
310 		bp->b_prev->b_next = bp->b_next;
311 		bp->b_next->b_prev = bp->b_prev;
312 		bp->b_prev = &bufhead;
313 		bp->b_next = bufhead.b_next;
314 		bufhead.b_next->b_prev = bp;
315 		bufhead.b_next = bp;
316 	}
317 	bp->b_flags |= B_INUSE;
318 	return (bp);
319 }
320 
321 void
322 brelse(struct bufarea *bp)
323 {
324 	bp->b_cnt--;
325 	if (bp->b_cnt == 0) {
326 		bp->b_flags &= ~B_INUSE;
327 	}
328 }
329 
330 struct bufarea *
331 getblk(struct bufarea *bp, daddr32_t blk, size_t size)
332 {
333 	diskaddr_t dblk;
334 
335 	dblk = fsbtodb(&sblock, blk);
336 	if (bp->b_bno == dblk)
337 		return (bp);
338 	flush(fswritefd, bp);
339 	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
340 	bp->b_bno = dblk;
341 	bp->b_size = size;
342 	return (bp);
343 }
344 
345 void
346 flush(int fd, struct bufarea *bp)
347 {
348 	int i, j;
349 	caddr_t sip;
350 	long size;
351 
352 	if (!bp->b_dirty)
353 		return;
354 
355 	/*
356 	 * It's not our buf, so if there are errors, let whoever
357 	 * acquired it deal with the actual problem.
358 	 */
359 	if (bp->b_errs != 0)
360 		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
361 	bp->b_dirty = 0;
362 	bp->b_errs = 0;
363 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
364 	if (bp != &sblk) {
365 		return;
366 	}
367 
368 	/*
369 	 * We're flushing the superblock, so make sure all the
370 	 * ancillary bits go out as well.
371 	 */
372 	sip = (caddr_t)sblock.fs_u.fs_csp;
373 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
374 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
375 		    sblock.fs_cssize - i : sblock.fs_bsize;
376 		bwrite(fswritefd, sip,
377 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
378 		    size);
379 		sip += size;
380 	}
381 }
382 
383 static void
384 rwerror(caddr_t mesg, diskaddr_t blk, int rval)
385 {
386 	int olderr = errno;
387 
388 	if (!preen)
389 		(void) printf("\n");
390 
391 	if (rval == -1)
392 		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
393 		    mesg, blk, strerror(olderr));
394 	else
395 		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
396 
397 	if (reply("CONTINUE") == 0) {
398 		exitstat = EXERRFATAL;
399 		errexit("Program terminated\n");
400 	}
401 }
402 
403 void
404 ckfini(void)
405 {
406 	int64_t percentage;
407 
408 	if (fswritefd < 0)
409 		return;
410 
411 	flush(fswritefd, &sblk);
412 	/*
413 	 * Were we using a backup superblock?
414 	 */
415 	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
416 		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
417 			sblk.b_bno = SBOFF / dev_bsize;
418 			sbdirty();
419 			flush(fswritefd, &sblk);
420 		}
421 	}
422 	flush(fswritefd, &cgblk);
423 	if (cgblk.b_un.b_buf != NULL) {
424 		free((void *)cgblk.b_un.b_buf);
425 		cgblk.b_un.b_buf = NULL;
426 	}
427 	unbufinit();
428 	pbp = NULL;
429 	pdirbp = NULL;
430 	if (debug) {
431 		/*
432 		 * Note that we only count cache-related reads.
433 		 * Anything that called fsck_bread() or getblk()
434 		 * directly are explicitly not cached, so they're not
435 		 * included here.
436 		 */
437 		if (totalreads != 0)
438 			percentage = diskreads * 100 / totalreads;
439 		else
440 			percentage = 0;
441 
442 		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
443 		    (longlong_t)diskreads, (longlong_t)totalreads,
444 		    (longlong_t)percentage);
445 	}
446 
447 	(void) close(fsreadfd);
448 	(void) close(fswritefd);
449 	fsreadfd = -1;
450 	fswritefd = -1;
451 }
452 
453 int
454 fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
455 {
456 	caddr_t cp;
457 	int i;
458 	int errs;
459 	offset_t offset = ldbtob(blk);
460 	offset_t addr;
461 
462 	/*
463 	 * In our universe, nothing exists before the superblock, so
464 	 * just pretend it's always zeros.  This is the complement of
465 	 * bwrite()'s ignoring write requests into that space.
466 	 */
467 	if (blk < SBLOCK) {
468 		if (debug)
469 			(void) printf(
470 			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
471 			    SBLOCK, (longlong_t)blk);
472 		(void) memset(buf, 0, (size_t)size);
473 		return (1);
474 	}
475 
476 	if (llseek(fd, offset, 0) < 0) {
477 		rwerror("SEEK", blk, -1);
478 	}
479 
480 	if ((i = read(fd, buf, size)) == size) {
481 		return (0);
482 	}
483 	rwerror("READ", blk, i);
484 	if (llseek(fd, offset, 0) < 0) {
485 		rwerror("SEEK", blk, -1);
486 	}
487 	errs = 0;
488 	(void) memset(buf, 0, (size_t)size);
489 	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
490 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
491 		addr = ldbtob(blk + i);
492 		if (llseek(fd, addr, SEEK_CUR) < 0 ||
493 		    read(fd, cp, (int)secsize) < 0) {
494 			iscorrupt = 1;
495 			(void) printf(" %llu", blk + (u_longlong_t)i);
496 			errs++;
497 		}
498 	}
499 	(void) printf("\n");
500 	return (errs);
501 }
502 
503 void
504 bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
505 {
506 	int i;
507 	int n;
508 	caddr_t cp;
509 	offset_t offset = ldbtob(blk);
510 	offset_t addr;
511 
512 	if (fd < 0)
513 		return;
514 	if (blk < SBLOCK) {
515 		if (debug)
516 			(void) printf(
517 		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
518 			    (longlong_t)blk, devname);
519 		return;
520 	}
521 	if (llseek(fd, offset, 0) < 0) {
522 		rwerror("SEEK", blk, -1);
523 	}
524 	if ((i = write(fd, buf, (int)size)) == size) {
525 		fsmodified = 1;
526 		return;
527 	}
528 	rwerror("WRITE", blk, i);
529 	if (llseek(fd, offset, 0) < 0) {
530 		rwerror("SEEK", blk, -1);
531 	}
532 	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
533 	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
534 		n = 0;
535 		addr = ldbtob(blk + i);
536 		if (llseek(fd, addr, SEEK_CUR) < 0 ||
537 		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
538 			iscorrupt = 1;
539 			(void) printf(" %llu", blk + (u_longlong_t)i);
540 		} else if (n > 0) {
541 			fsmodified = 1;
542 		}
543 
544 	}
545 	(void) printf("\n");
546 }
547 
548 /*
549  * Allocates the specified number of contiguous fragments.
550  */
551 daddr32_t
552 allocblk(int wantedfrags)
553 {
554 	int block, leadfrag, tailfrag;
555 	daddr32_t selected;
556 	size_t size;
557 	struct bufarea *bp;
558 
559 	/*
560 	 * It's arguable whether we should just fail, or instead
561 	 * error out here.  Since we should only ever be asked for
562 	 * a single fragment or an entire block (i.e., sblock.fs_frag),
563 	 * we'll fail out because anything else means somebody
564 	 * changed code without considering all of the ramifications.
565 	 */
566 	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
567 		exitstat = EXERRFATAL;
568 		errexit("allocblk() asked for %d frags.  "
569 			"Legal range is 1 to %d",
570 			wantedfrags, sblock.fs_frag);
571 	}
572 
573 	/*
574 	 * For each filesystem block, look at every possible starting
575 	 * offset within the block such that we can get the number of
576 	 * contiguous fragments that we need.  This is a drastically
577 	 * simplified version of the kernel's mapsearch() and alloc*().
578 	 * It's also correspondingly slower.
579 	 */
580 	for (block = 0; block < maxfsblock - sblock.fs_frag;
581 	    block += sblock.fs_frag) {
582 		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
583 		    leadfrag++) {
584 			/*
585 			 * Is first fragment of candidate run available?
586 			 */
587 			if (testbmap(block + leadfrag))
588 				continue;
589 			/*
590 			 * Are the rest of them available?
591 			 */
592 			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
593 				if (testbmap(block + leadfrag + tailfrag))
594 					break;
595 			if (tailfrag < wantedfrags) {
596 				/*
597 				 * No, skip the known-unusable run.
598 				 */
599 				leadfrag += tailfrag;
600 				continue;
601 			}
602 			/*
603 			 * Found what we need, so claim them.
604 			 */
605 			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
606 				setbmap(block + leadfrag + tailfrag);
607 			n_blks += wantedfrags;
608 			size = wantedfrags * sblock.fs_fsize;
609 			selected = block + leadfrag;
610 			bp = getdatablk(selected, size);
611 			(void) memset((void *)bp->b_un.b_buf, 0, size);
612 			dirty(bp);
613 			brelse(bp);
614 			if (debug)
615 				(void) printf(
616 		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
617 				    selected, selected % sblock.fs_bsize,
618 				    wantedfrags, (int)size);
619 			return (selected);
620 		}
621 	}
622 	return (0);
623 }
624 
625 /*
626  * Free a previously allocated block
627  */
628 void
629 freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
630 {
631 	struct inodesc idesc;
632 
633 	if (debug)
634 		(void) printf("debug: freeing %d fragments starting at %d\n",
635 		    frags, blkno);
636 
637 	init_inodesc(&idesc);
638 
639 	idesc.id_number = ino;
640 	idesc.id_blkno = blkno;
641 	idesc.id_numfrags = frags;
642 	idesc.id_truncto = -1;
643 
644 	/*
645 	 * Nothing in the return status has any relevance to how
646 	 * we're using pass4check(), so just ignore it.
647 	 */
648 	(void) pass4check(&idesc);
649 }
650 
651 /*
652  * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
653  * that the given buffer is at least MAXPATHLEN + 1 characters.
654  */
655 void
656 getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
657 {
658 	int len;
659 	caddr_t cp;
660 	struct dinode *dp;
661 	struct inodesc idesc;
662 	struct inoinfo *inp;
663 
664 	if (debug)
665 		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
666 		    curdir, ino);
667 
668 	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
669 		(void) strcpy(namebuf, "?");
670 		return;
671 	}
672 
673 	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
674 		(void) strcpy(namebuf, "/");
675 		return;
676 	}
677 
678 	init_inodesc(&idesc);
679 	idesc.id_type = DATA;
680 	cp = &namebuf[MAXPATHLEN - 1];
681 	*cp = '\0';
682 
683 	/*
684 	 * In the case of extended attributes, our
685 	 * parent won't necessarily be a directory, so just
686 	 * return what we've found with a prefix indicating
687 	 * that it's an XATTR.  Presumably our caller will
688 	 * know what's going on and do something useful, like
689 	 * work out the path of the parent and then combine
690 	 * the two names.
691 	 *
692 	 * Can't use strcpy(), etc, because we've probably
693 	 * already got some name information in the buffer and
694 	 * the usual trailing \0 would lose it.
695 	 */
696 	dp = ginode(curdir);
697 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
698 		idesc.id_number = curdir;
699 		idesc.id_parent = ino;
700 		idesc.id_func = findname;
701 		idesc.id_name = namebuf;
702 		idesc.id_fix = NOFIX;
703 		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
704 			*cp-- = '?';
705 		}
706 
707 		len = sizeof (XATTR_DIR_NAME) - 1;
708 		cp -= len;
709 		(void) memmove(cp, XATTR_DIR_NAME, len);
710 		goto attrname;
711 	}
712 
713 	/*
714 	 * If curdir == ino, need to get a handle on .. so we
715 	 * can search it for ino's name.  Otherwise, just search
716 	 * the given directory for ino.  Repeat until out of space
717 	 * or a full path has been built.
718 	 */
719 	if (curdir != ino) {
720 		idesc.id_parent = curdir;
721 		goto namelookup;
722 	}
723 	while (ino != UFSROOTINO && ino != 0) {
724 		idesc.id_number = ino;
725 		idesc.id_func = findino;
726 		idesc.id_name = "..";
727 		idesc.id_fix = NOFIX;
728 		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
729 			inp = getinoinfo(ino);
730 			if ((inp == NULL) || (inp->i_parent == 0)) {
731 				break;
732 			}
733 			idesc.id_parent = inp->i_parent;
734 		}
735 
736 		/*
737 		 * To get this far, id_parent must have the inode
738 		 * number for `..' in it.  By definition, that's got
739 		 * to be a directory, so search it for the inode of
740 		 * interest.
741 		 */
742 namelookup:
743 		idesc.id_number = idesc.id_parent;
744 		idesc.id_parent = ino;
745 		idesc.id_func = findname;
746 		idesc.id_name = namebuf;
747 		idesc.id_fix = NOFIX;
748 		if ((ckinode(ginode(idesc.id_number),
749 		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
750 			break;
751 		}
752 		/*
753 		 * Prepend to what we've accumulated so far.  If
754 		 * there's not enough room for even one more path element
755 		 * (of the worst-case length), then bail out.
756 		 */
757 		len = strlen(namebuf);
758 		cp -= len;
759 		if (cp < &namebuf[MAXNAMLEN])
760 			break;
761 		(void) memmove(cp, namebuf, len);
762 		*--cp = '/';
763 
764 		/*
765 		 * Corner case for a looped-to-itself directory.
766 		 */
767 		if (ino == idesc.id_number)
768 			break;
769 
770 		/*
771 		 * Climb one level of the hierarchy.  In other words,
772 		 * the current .. becomes the inode to search for and
773 		 * its parent becomes the directory to search in.
774 		 */
775 		ino = idesc.id_number;
776 	}
777 
778 	/*
779 	 * If we hit a discontinuity in the hierarchy, indicate it by
780 	 * prefixing the path so far with `?'.  Otherwise, the first
781 	 * character will be `/' as a side-effect of the *--cp above.
782 	 *
783 	 * The special case is to handle the situation where we're
784 	 * trying to look something up in UFSROOTINO, but didn't find
785 	 * it.
786 	 */
787 	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
788 		if (cp > namebuf)
789 			cp--;
790 		*cp = '?';
791 	}
792 
793 	/*
794 	 * The invariants being used for buffer integrity are:
795 	 * - namebuf[] is terminated with \0 before anything else
796 	 * - cp is always <= the last element of namebuf[]
797 	 * - the new path element is always stored at the
798 	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
799 	 *   characters
800 	 * - cp is is decremented by the number of characters in
801 	 *   the new path element
802 	 * - if, after the above accounting for the new element's
803 	 *   size, there is no longer enough room at the beginning of
804 	 *   namebuf[] for a full-sized path element and a slash,
805 	 *   terminate the loop.  cp is in the range
806 	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
807 	 */
808 attrname:
809 	/* LINTED per the above discussion */
810 	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
811 }
812 
813 /* ARGSUSED */
814 void
815 catch(int dummy)
816 {
817 	ckfini();
818 	exit(EXSIGNAL);
819 }
820 
821 /*
822  * When preening, allow a single quit to signal
823  * a special exit after filesystem checks complete
824  * so that reboot sequence may be interrupted.
825  */
826 /* ARGSUSED */
827 void
828 catchquit(int dummy)
829 {
830 	(void) printf("returning to single-user after filesystem check\n");
831 	interrupted = 1;
832 	(void) signal(SIGQUIT, SIG_DFL);
833 }
834 
835 
836 /*
837  * determine whether an inode should be fixed.
838  */
839 NOTE(PRINTFLIKE(2))
840 int
841 dofix(struct inodesc *idesc, caddr_t msg, ...)
842 {
843 	int rval = 0;
844 	va_list ap;
845 
846 	va_start(ap, msg);
847 
848 	switch (idesc->id_fix) {
849 
850 	case DONTKNOW:
851 		if (idesc->id_type == DATA)
852 			vdirerror(idesc->id_number, msg, ap);
853 		else
854 			vpwarn(msg, ap);
855 		if (preen) {
856 			idesc->id_fix = FIX;
857 			rval = ALTERED;
858 			break;
859 		}
860 		if (reply("SALVAGE") == 0) {
861 			idesc->id_fix = NOFIX;
862 			break;
863 		}
864 		idesc->id_fix = FIX;
865 		rval = ALTERED;
866 		break;
867 
868 	case FIX:
869 		rval = ALTERED;
870 		break;
871 
872 	case NOFIX:
873 		break;
874 
875 	default:
876 		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
877 	}
878 
879 	va_end(ap);
880 	return (rval);
881 }
882 
883 NOTE(PRINTFLIKE(1))
884 void
885 errexit(caddr_t fmt, ...)
886 {
887 	va_list ap;
888 
889 	va_start(ap, fmt);
890 	verrexit(fmt, ap);
891 	/* NOTREACHED */
892 }
893 
894 NOTE(PRINTFLIKE(1))
895 static void
896 verrexit(caddr_t fmt, va_list ap)
897 {
898 	static int recursing = 0;
899 
900 	if (!recursing) {
901 		recursing = 1;
902 		if (errorlocked || iscorrupt) {
903 			if (havesb) {
904 				sblock.fs_clean = FSBAD;
905 				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
906 				sblock.fs_state = -sblock.fs_state;
907 				sbdirty();
908 				write_altsb(fswritefd);
909 				flush(fswritefd, &sblk);
910 			}
911 		}
912 		ckfini();
913 		recursing = 0;
914 	}
915 	(void) vprintf(fmt, ap);
916 	if (fmt[strlen(fmt) - 1] != '\n')
917 		(void) putchar('\n');
918 	exit((exitstat != 0) ? exitstat : EXERRFATAL);
919 }
920 
921 /*
922  * An unexpected inconsistency occured.
923  * Die if preening, otherwise just print message and continue.
924  */
925 NOTE(PRINTFLIKE(1))
926 void
927 pfatal(caddr_t fmt, ...)
928 {
929 	va_list ap;
930 
931 	va_start(ap, fmt);
932 	vpfatal(fmt, ap);
933 	va_end(ap);
934 }
935 
936 NOTE(PRINTFLIKE(1))
937 static void
938 vpfatal(caddr_t fmt, va_list ap)
939 {
940 	if (preen) {
941 		if (*fmt != '\0') {
942 			(void) printf("%s: ", devname);
943 			(void) vprintf(fmt, ap);
944 			(void) printf("\n");
945 		}
946 		(void) printf(
947 		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
948 		    devname);
949 		if (havesb) {
950 			sblock.fs_clean = FSBAD;
951 			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
952 			sbdirty();
953 			flush(fswritefd, &sblk);
954 		}
955 		/*
956 		 * We're exiting, it doesn't really matter that our
957 		 * caller doesn't get to call va_end().
958 		 */
959 		if (exitstat == 0)
960 			exitstat = EXFNDERRS;
961 		exit(exitstat);
962 	}
963 	if (*fmt != '\0') {
964 		(void) vprintf(fmt, ap);
965 	}
966 }
967 
968 /*
969  * Pwarn just prints a message when not preening,
970  * or a warning (preceded by filename) when preening.
971  */
972 NOTE(PRINTFLIKE(1))
973 void
974 pwarn(caddr_t fmt, ...)
975 {
976 	va_list ap;
977 
978 	va_start(ap, fmt);
979 	vpwarn(fmt, ap);
980 	va_end(ap);
981 }
982 
983 NOTE(PRINTFLIKE(1))
984 static void
985 vpwarn(caddr_t fmt, va_list ap)
986 {
987 	if (*fmt != '\0') {
988 		if (preen)
989 			(void) printf("%s: ", devname);
990 		(void) vprintf(fmt, ap);
991 	}
992 }
993 
994 /*
995  * Like sprintf(), except the buffer is dynamically allocated
996  * and returned, instead of being passed in.  A pointer to the
997  * buffer is stored in *RET, and FMT is the usual format string.
998  * The number of characters in *RET (excluding the trailing \0,
999  * to be consistent with the other *printf() routines) is returned.
1000  *
1001  * Solaris doesn't have asprintf(3C) yet, unfortunately.
1002  */
1003 NOTE(PRINTFLIKE(2))
1004 int
1005 fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1006 {
1007 	int len;
1008 	caddr_t buffer;
1009 	va_list ap;
1010 
1011 	va_start(ap, fmt);
1012 	len = vsnprintf(NULL, 0, fmt, ap);
1013 	va_end(ap);
1014 
1015 	buffer = malloc((len + 1) * sizeof (char));
1016 	if (buffer == NULL) {
1017 		errexit("Out of memory in asprintf\n");
1018 		/* NOTREACHED */
1019 	}
1020 
1021 	va_start(ap, fmt);
1022 	(void) vsnprintf(buffer, len + 1, fmt, ap);
1023 	va_end(ap);
1024 
1025 	*ret = buffer;
1026 	return (len);
1027 }
1028 
1029 /*
1030  * So we can take advantage of kernel routines in ufs_subr.c.
1031  */
1032 /* PRINTFLIKE2 */
1033 void
1034 cmn_err(int level, caddr_t fmt, ...)
1035 {
1036 	va_list ap;
1037 
1038 	va_start(ap, fmt);
1039 	if (level == CE_PANIC) {
1040 		(void) printf("INTERNAL INCONSISTENCY:");
1041 		verrexit(fmt, ap);
1042 	} else {
1043 		(void) vprintf(fmt, ap);
1044 	}
1045 	va_end(ap);
1046 }
1047 
1048 /*
1049  * Check to see if unraw version of name is already mounted.
1050  * Updates devstr with the device name if devstr is not NULL
1051  * and str_size is positive.
1052  */
1053 int
1054 mounted(caddr_t name, caddr_t devstr, size_t str_size)
1055 {
1056 	int found;
1057 	struct mnttab *mntent;
1058 
1059 	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1060 	if (mntent == NULL)
1061 		return (M_NOMNT);
1062 
1063 	/*
1064 	 * It's mounted.  With or without write access?
1065 	 */
1066 	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1067 		found = M_RO;	/* mounted as RO */
1068 	else
1069 		found = M_RW; 	/* mounted as R/W */
1070 
1071 	if (mount_point == NULL) {
1072 		mount_point = strdup(mntent->mnt_mountp);
1073 		if (mount_point == NULL) {
1074 			errexit("fsck: memory allocation failure: %s",
1075 				strerror(errno));
1076 			/* NOTREACHED */
1077 		}
1078 
1079 		if (devstr != NULL && str_size > 0)
1080 			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1081 	}
1082 
1083 	return (found);
1084 }
1085 
1086 /*
1087  * Check to see if name corresponds to an entry in vfstab, and that the entry
1088  * does not have option ro.
1089  */
1090 int
1091 writable(caddr_t name)
1092 {
1093 	int rw = 1;
1094 	struct vfstab vfsbuf, vfskey;
1095 	FILE *vfstab;
1096 
1097 	vfstab = fopen(VFSTAB, "r");
1098 	if (vfstab == NULL) {
1099 		(void) printf("can't open %s\n", VFSTAB);
1100 		return (1);
1101 	}
1102 	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1103 	vfsnull(&vfskey);
1104 	vfskey.vfs_special = unrawname(name);
1105 	vfskey.vfs_fstype = MNTTYPE_UFS;
1106 	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1107 	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1108 		rw = 0;
1109 	}
1110 	(void) fclose(vfstab);
1111 	return (rw);
1112 }
1113 
1114 /*
1115  * debugclean
1116  */
1117 static void
1118 debugclean(void)
1119 {
1120 	if (!debug)
1121 		return;
1122 
1123 	if ((iscorrupt == 0) && (isdirty == 0))
1124 		return;
1125 
1126 	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1127 	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1128 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1129 		return;
1130 
1131 	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1132 	    sblock.fs_clean == FSSTABLE ? "stable" :
1133 	    sblock.fs_clean == FSLOG ? "logging" :
1134 	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1135 	    devname);
1136 }
1137 
1138 /*
1139  * updateclean
1140  *	Carefully and transparently update the clean flag.
1141  *
1142  * `iscorrupt' has to be in its final state before this is called.
1143  */
1144 int
1145 updateclean(void)
1146 {
1147 	int freedlog = 0;
1148 	struct bufarea cleanbuf;
1149 	size_t size;
1150 	ssize_t io_res;
1151 	diskaddr_t bno;
1152 	char fsclean;
1153 	int fsreclaim;
1154 	char fsflags;
1155 	int flags_ok = 1;
1156 	daddr32_t fslogbno;
1157 	offset_t sblkoff;
1158 	time_t t;
1159 
1160 	/*
1161 	 * debug stuff
1162 	 */
1163 	debugclean();
1164 
1165 	/*
1166 	 * set fsclean to its appropriate value
1167 	 */
1168 	fslogbno = sblock.fs_logbno;
1169 	fsclean = sblock.fs_clean;
1170 	fsreclaim = sblock.fs_reclaim;
1171 	fsflags = sblock.fs_flags;
1172 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1173 		fsclean = FSACTIVE;
1174 	}
1175 	/*
1176 	 * If ufs log is not okay, note that we need to clear it.
1177 	 */
1178 	examinelog(sblock.fs_logbno, NULL);
1179 	if (fslogbno && !(islog && islogok)) {
1180 		fsclean = FSACTIVE;
1181 		fslogbno = 0;
1182 	}
1183 
1184 	/*
1185 	 * if necessary, update fs_clean and fs_state
1186 	 */
1187 	switch (fsclean) {
1188 
1189 	case FSACTIVE:
1190 		if (!iscorrupt) {
1191 			fsclean = FSSTABLE;
1192 			fsreclaim = 0;
1193 		}
1194 		break;
1195 
1196 	case FSCLEAN:
1197 	case FSSTABLE:
1198 		if (iscorrupt) {
1199 			fsclean = FSACTIVE;
1200 		} else {
1201 			fsreclaim = 0;
1202 		}
1203 		break;
1204 
1205 	case FSLOG:
1206 		if (iscorrupt) {
1207 			fsclean = FSACTIVE;
1208 		} else if (!islog || fslogbno == 0) {
1209 			fsclean = FSSTABLE;
1210 			fsreclaim = 0;
1211 		} else if (fflag) {
1212 			fsreclaim = 0;
1213 		}
1214 		break;
1215 
1216 	case FSFIX:
1217 		fsclean = FSBAD;
1218 		if (errorlocked && !iscorrupt) {
1219 			fsclean = islog ? FSLOG : FSCLEAN;
1220 		}
1221 		break;
1222 
1223 	default:
1224 		if (iscorrupt) {
1225 			fsclean = FSACTIVE;
1226 		} else {
1227 			fsclean = FSSTABLE;
1228 			fsreclaim = 0;
1229 		}
1230 	}
1231 
1232 	if (largefile_count > 0)
1233 		fsflags |= FSLARGEFILES;
1234 	else
1235 		fsflags &= ~FSLARGEFILES;
1236 
1237 	/*
1238 	 * There can be two discrepencies here.  A) The superblock
1239 	 * shows no largefiles but we found some while scanning.
1240 	 * B) The superblock indicates the presence of largefiles,
1241 	 * but none are present.  Note that if preening, the superblock
1242 	 * is silently corrected.
1243 	 */
1244 	if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1245 	    (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1246 		flags_ok = 0;
1247 
1248 	if (debug)
1249 		(void) printf(
1250 		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1251 		    largefile_count, sblock.fs_flags, flags_ok);
1252 
1253 	/*
1254 	 * If fs is unchanged, do nothing.
1255 	 */
1256 	if ((!isdirty) && (flags_ok) &&
1257 	    (fslogbno == sblock.fs_logbno) &&
1258 	    (sblock.fs_clean == fsclean) &&
1259 	    (sblock.fs_reclaim == fsreclaim) &&
1260 	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1261 		if (errorlocked) {
1262 			if (!do_errorlock(LOCKFS_ULOCK))
1263 				pwarn(
1264 		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1265 		}
1266 		return (freedlog);
1267 	}
1268 
1269 	/*
1270 	 * if user allows, update superblock state
1271 	 */
1272 	if (debug) {
1273 		(void) printf(
1274 	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1275 		    sblock.fs_flags, sblock.fs_logbno,
1276 		    sblock.fs_clean, sblock.fs_reclaim,
1277 		    sblock.fs_state + sblock.fs_time);
1278 		(void) printf(
1279 	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1280 		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1281 	}
1282 	if (!isdirty && !preen && !rerun &&
1283 	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1284 		return (freedlog);
1285 
1286 	(void) time(&t);
1287 	sblock.fs_time = (time32_t)t;
1288 	if (debug)
1289 		printclean();
1290 
1291 	if (sblock.fs_logbno != fslogbno) {
1292 		examinelog(sblock.fs_logbno, &freelogblk);
1293 		freedlog++;
1294 	}
1295 
1296 	sblock.fs_logbno = fslogbno;
1297 	sblock.fs_clean = fsclean;
1298 	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1299 	sblock.fs_reclaim = fsreclaim;
1300 	sblock.fs_flags = fsflags;
1301 
1302 	/*
1303 	 * if superblock can't be written, return
1304 	 */
1305 	if (fswritefd < 0)
1306 		return (freedlog);
1307 
1308 	/*
1309 	 * Read private copy of superblock, update clean flag, and write it.
1310 	 */
1311 	bno  = sblk.b_bno;
1312 	size = sblk.b_size;
1313 
1314 	sblkoff = ldbtob(bno);
1315 
1316 	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1317 		errexit("out of memory");
1318 	if (llseek(fsreadfd, sblkoff, 0) == -1) {
1319 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1320 		    (longlong_t)bno, strerror(errno));
1321 		goto out;
1322 	}
1323 
1324 	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1325 		report_io_prob("READ FROM", bno, size, io_res);
1326 		goto out;
1327 	}
1328 
1329 	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1330 	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1331 	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1332 	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1333 	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1334 	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1335 
1336 	if (llseek(fswritefd, sblkoff, 0) == -1) {
1337 		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1338 		    (longlong_t)bno, strerror(errno));
1339 		goto out;
1340 	}
1341 
1342 	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1343 		report_io_prob("WRITE TO", bno, size, io_res);
1344 		goto out;
1345 	}
1346 
1347 	/*
1348 	 * 1208040
1349 	 * If we had to use -b to grab an alternate superblock, then we
1350 	 * likely had to do so because of unacceptable differences between
1351 	 * the main and alternate superblocks.  So, we had better update
1352 	 * the alternate superblock as well, or we'll just fail again
1353 	 * the next time we attempt to run fsck!
1354 	 */
1355 	if (bflag != 0) {
1356 		write_altsb(fswritefd);
1357 	}
1358 
1359 	if (errorlocked) {
1360 		if (!do_errorlock(LOCKFS_ULOCK))
1361 			pwarn(
1362 		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1363 	}
1364 
1365 out:
1366 	if (cleanbuf.b_un.b_buf != NULL) {
1367 		free((void *)cleanbuf.b_un.b_buf);
1368 	}
1369 
1370 	return (freedlog);
1371 }
1372 
1373 static void
1374 report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1375 {
1376 	if (failure < 0)
1377 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1378 		    what, (int)bno, strerror(errno));
1379 	else if (failure == 0)
1380 		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1381 		    what, (int)bno);
1382 	else
1383 		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1384 		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1385 }
1386 
1387 /*
1388  * print out clean info
1389  */
1390 void
1391 printclean(void)
1392 {
1393 	caddr_t s;
1394 
1395 	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1396 		s = "unknown";
1397 	else
1398 		switch (sblock.fs_clean) {
1399 
1400 		case FSACTIVE:
1401 			s = "active";
1402 			break;
1403 
1404 		case FSCLEAN:
1405 			s = "clean";
1406 			break;
1407 
1408 		case FSSTABLE:
1409 			s = "stable";
1410 			break;
1411 
1412 		case FSLOG:
1413 			s = "logging";
1414 			break;
1415 
1416 		case FSBAD:
1417 			s = "is bad";
1418 			break;
1419 
1420 		case FSFIX:
1421 			s = "being fixed";
1422 			break;
1423 
1424 		default:
1425 			s = "unknown";
1426 		}
1427 
1428 	if (preen)
1429 		pwarn("is %s.\n", s);
1430 	else
1431 		(void) printf("** %s is %s.\n", devname, s);
1432 }
1433 
1434 int
1435 is_errorlocked(caddr_t fs)
1436 {
1437 	int		retval;
1438 	struct stat64	statb;
1439 	caddr_t		mountp;
1440 	struct mnttab	*mntent;
1441 
1442 	retval = 0;
1443 
1444 	if (!fs)
1445 		return (0);
1446 
1447 	if (stat64(fs, &statb) < 0)
1448 		return (0);
1449 
1450 	if (S_ISDIR(statb.st_mode)) {
1451 		mountp = fs;
1452 	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1453 		mntent = search_mnttab(NULL, fs, NULL, 0);
1454 		if (mntent == NULL)
1455 			return (0);
1456 		mountp = mntent->mnt_mountp;
1457 		if (mountp == NULL) /* theoretically a can't-happen */
1458 			return (0);
1459 	} else {
1460 		return (0);
1461 	}
1462 
1463 	/*
1464 	 * From here on, must `goto out' to avoid memory leakage.
1465 	 */
1466 
1467 	if (elock_combuf == NULL)
1468 		elock_combuf =
1469 			(caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1470 	else
1471 		elock_combuf =
1472 			(caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1473 
1474 	if (elock_combuf == NULL)
1475 		goto out;
1476 
1477 	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1478 
1479 	if (elock_mountp != NULL) {
1480 		free(elock_mountp);
1481 	}
1482 
1483 	elock_mountp = strdup(mountp);
1484 	if (elock_mountp == NULL)
1485 		goto out;
1486 
1487 	if (mountfd < 0) {
1488 		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1489 			goto out;
1490 	}
1491 
1492 	if (lfp == NULL) {
1493 		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1494 		if (lfp == NULL)
1495 			goto out;
1496 		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1497 	}
1498 
1499 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1500 	lfp->lf_comment = elock_combuf;
1501 
1502 	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1503 		goto out;
1504 
1505 	/*
1506 	 * lint believes that the ioctl() (or any other function
1507 	 * taking lfp as an arg) could free lfp.  This is not the
1508 	 * case, however.
1509 	 */
1510 	retval = LOCKFS_IS_ELOCK(lfp);
1511 
1512 out:
1513 	return (retval);
1514 }
1515 
1516 /*
1517  * Given a name which is known to be a directory, see if it appears
1518  * in the vfstab.  If so, return the entry's block (special) device
1519  * field via devstr.
1520  */
1521 int
1522 check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1523 {
1524 	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1525 }
1526 
1527 /*
1528  * Given a name which is known to be a directory, see if it appears
1529  * in the mnttab.  If so, return the entry's block (special) device
1530  * field via devstr.
1531  */
1532 int
1533 check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1534 {
1535 	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1536 }
1537 
1538 /*
1539  * Search for mount point and/or special device in the given file.
1540  * The first matching entry is returned.
1541  *
1542  * If an entry is found and str_size is greater than zero, then
1543  * up to size_str bytes of the special device name from the entry
1544  * are copied to devstr.
1545  */
1546 
1547 #define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1548 			st_nuller, st_init, st_searcher) \
1549 	{ \
1550 		FILE *fp; \
1551 		struct st_type *retval = NULL; \
1552 		struct st_type key; \
1553 		static struct st_type buffer; \
1554 		\
1555 		/* LINTED ``assigned value never used'' */ \
1556 		st_nuller(&key); \
1557 		key.st_mount = mountp; \
1558 		key.st_special = special; \
1559 		st_init; \
1560 		\
1561 		if ((fp = fopen(st_file, "r")) == NULL) \
1562 			return (NULL); \
1563 		\
1564 		if (st_searcher(fp, &buffer, &key) == 0) { \
1565 			retval = &buffer; \
1566 			if (devstr != NULL && str_size > 0 && \
1567 			    buffer.st_special != NULL) { \
1568 				(void) strlcpy(devstr, buffer.st_special, \
1569 				    str_size); \
1570 			} \
1571 		} \
1572 		(void) fclose(fp); \
1573 		return (retval); \
1574 	}
1575 
1576 static struct vfstab *
1577 search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1578 SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1579 		(retval = retval), getvfsany)
1580 
1581 static struct mnttab *
1582 search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1583 SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1584 		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1585 
1586 int
1587 do_errorlock(int lock_type)
1588 {
1589 	caddr_t	   buf;
1590 	time_t	   now;
1591 	struct tm *local;
1592 	int	   rc;
1593 
1594 	if (elock_combuf == NULL)
1595 		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1596 			elock_mountp ? elock_mountp : "<null>",
1597 			lock_type);
1598 
1599 	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1600 	    NULL) {
1601 		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1602 	}
1603 	if (lfp == NULL) {
1604 		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1605 					elock_mountp, lock_type);
1606 	}
1607 
1608 	(void) memmove((void *)buf, (void *)elock_combuf,
1609 	    LOCKFS_MAXCOMMENTLEN-1);
1610 
1611 	switch (lock_type) {
1612 	case LOCKFS_ELOCK:
1613 		/*
1614 		 * Note that if it is error-locked, we won't get an
1615 		 * error back if we try to error-lock it again.
1616 		 */
1617 		if (time(&now) != (time_t)-1) {
1618 			if ((local = localtime(&now)) != NULL)
1619 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1620 		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1621 				    elock_combuf, (int)pid,
1622 				    local->tm_mon + 1, local->tm_mday,
1623 				    (local->tm_year % 100), local->tm_hour,
1624 				    local->tm_min, local->tm_sec);
1625 			else
1626 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1627 				    "%s [fsck pid %d", elock_combuf, pid);
1628 
1629 		} else {
1630 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1631 			    "%s [fsck pid %d", elock_combuf, pid);
1632 		}
1633 		break;
1634 
1635 	case LOCKFS_ULOCK:
1636 		if (time(&now) != (time_t)-1) {
1637 			if ((local = localtime(&now)) != NULL) {
1638 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1639 				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1640 				    elock_combuf,
1641 				    local->tm_mon + 1, local->tm_mday,
1642 				    (local->tm_year % 100), local->tm_hour,
1643 				    local->tm_min, local->tm_sec);
1644 			} else {
1645 				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1646 				    "%s]", elock_combuf);
1647 			}
1648 		} else {
1649 			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1650 			    "%s]", elock_combuf);
1651 		}
1652 		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1653 			pwarn("do_errorlock: unlock failed: %s\n",
1654 			    strerror(errno));
1655 			goto out;
1656 		}
1657 		break;
1658 
1659 	default:
1660 		break;
1661 	}
1662 
1663 	(void) memmove((void *)elock_combuf, (void *)buf,
1664 	    LOCKFS_MAXCOMMENTLEN - 1);
1665 
1666 	lfp->lf_lock = lock_type;
1667 	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1668 	lfp->lf_comment = elock_combuf;
1669 	lfp->lf_flags = 0;
1670 	errno = 0;
1671 
1672 	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1673 		if (errno == EINVAL) {
1674 			pwarn("Another fsck active?\n");
1675 			iscorrupt = 0;	/* don't go away mad, just go away */
1676 		} else {
1677 			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1678 			    lock_type, elock_combuf, strerror(errno));
1679 		}
1680 	}
1681 out:
1682 	if (buf != NULL) {
1683 		free((void *)buf);
1684 	}
1685 
1686 	return (rc != -1);
1687 }
1688 
1689 /*
1690  * Shadow inode support.  To register a shadow with a client is to note
1691  * that an inode (the client) refers to the shadow.
1692  */
1693 
1694 static struct shadowclients *
1695 newshadowclient(struct shadowclients *prev)
1696 {
1697 	struct shadowclients *rc;
1698 
1699 	rc = (struct shadowclients *)malloc(sizeof (*rc));
1700 	if (rc == NULL)
1701 		errexit("newshadowclient: cannot malloc shadow client");
1702 	rc->next = prev;
1703 	rc->nclients = 0;
1704 
1705 	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1706 	    maxshadowclients);
1707 	if (rc->client == NULL)
1708 		errexit("newshadowclient: cannot malloc client array");
1709 	return (rc);
1710 }
1711 
1712 void
1713 registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1714 	struct shadowclientinfo **info)
1715 {
1716 	struct shadowclientinfo *sci;
1717 	struct shadowclients *scc;
1718 
1719 	/*
1720 	 * Already have a record for this shadow?
1721 	 */
1722 	for (sci = *info; sci != NULL; sci = sci->next)
1723 		if (sci->shadow == shadow)
1724 			break;
1725 	if (sci == NULL) {
1726 		/*
1727 		 * It's a new shadow, add it to the list
1728 		 */
1729 		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1730 		if (sci == NULL)
1731 			errexit("registershadowclient: cannot malloc");
1732 		sci->next = *info;
1733 		*info = sci;
1734 		sci->shadow = shadow;
1735 		sci->totalClients = 0;
1736 		sci->clients = newshadowclient(NULL);
1737 	}
1738 
1739 	sci->totalClients++;
1740 	scc = sci->clients;
1741 	if (scc->nclients >= maxshadowclients) {
1742 		scc = newshadowclient(sci->clients);
1743 		sci->clients = scc;
1744 	}
1745 
1746 	scc->client[scc->nclients++] = client;
1747 }
1748 
1749 /*
1750  * Locate and discard a shadow.
1751  */
1752 void
1753 clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1754 {
1755 	struct shadowclientinfo *sci, *prev;
1756 
1757 	/*
1758 	 * Do we have a record for this shadow?
1759 	 */
1760 	prev = NULL;
1761 	for (sci = *info; sci != NULL; sci = sci->next) {
1762 		if (sci->shadow == shadow)
1763 			break;
1764 		prev = sci;
1765 	}
1766 
1767 	if (sci != NULL) {
1768 		/*
1769 		 * First, pull it off the list, since we know there
1770 		 * shouldn't be any future references to this one.
1771 		 */
1772 		if (prev == NULL)
1773 			*info = sci->next;
1774 		else
1775 			prev->next = sci->next;
1776 		deshadow(sci, clearattrref);
1777 	}
1778 }
1779 
1780 /*
1781  * Discard all memory used to track clients of a shadow.
1782  */
1783 void
1784 deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1785 {
1786 	struct shadowclients *clients, *discard;
1787 	int idx;
1788 
1789 	clients = sci->clients;
1790 	while (clients != NULL) {
1791 		discard = clients;
1792 		clients = clients->next;
1793 		if (discard->client != NULL) {
1794 			if (cb != NULL) {
1795 				for (idx = 0; idx < discard->nclients; idx++)
1796 					(*cb)(discard->client[idx]);
1797 			}
1798 			free((void *)discard->client);
1799 		}
1800 		free((void *)discard);
1801 	}
1802 
1803 	free((void *)sci);
1804 }
1805 
1806 /*
1807  * Allocate more buffer as need arises but allocate one at a time.
1808  * This is done to make sure that fsck does not exit with error if it
1809  * needs more buffer to complete its task.
1810  */
1811 static struct bufarea *
1812 alloc_bufarea(void)
1813 {
1814 	struct bufarea *newbp;
1815 	caddr_t bufp;
1816 
1817 	bufp = malloc((unsigned int)sblock.fs_bsize);
1818 	if (bufp == NULL)
1819 		return (NULL);
1820 
1821 	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1822 	if (newbp == NULL) {
1823 		free((void *)bufp);
1824 		return (NULL);
1825 	}
1826 
1827 	initbarea(newbp);
1828 	newbp->b_un.b_buf = bufp;
1829 	newbp->b_prev = &bufhead;
1830 	newbp->b_next = bufhead.b_next;
1831 	bufhead.b_next->b_prev = newbp;
1832 	bufhead.b_next = newbp;
1833 	bufhead.b_size++;
1834 	return (newbp);
1835 }
1836 
1837 /*
1838  * We length-limit in both unrawname() and rawname() to avoid
1839  * overflowing our arrays or those of our naive, trusting callers.
1840  */
1841 
1842 caddr_t
1843 unrawname(caddr_t name)
1844 {
1845 	caddr_t dp;
1846 	static char fullname[MAXPATHLEN + 1];
1847 
1848 	if ((dp = getfullblkname(name)) == NULL)
1849 		return ("");
1850 
1851 	(void) strlcpy(fullname, dp, sizeof (fullname));
1852 	/*
1853 	 * Not reporting under debug, as the allocation isn't
1854 	 * reported by getfullblkname.  The idea is that we
1855 	 * produce balanced alloc/free instances.
1856 	 */
1857 	free(dp);
1858 
1859 	return (fullname);
1860 }
1861 
1862 caddr_t
1863 rawname(caddr_t name)
1864 {
1865 	caddr_t dp;
1866 	static char fullname[MAXPATHLEN + 1];
1867 
1868 	if ((dp = getfullrawname(name)) == NULL)
1869 		return ("");
1870 
1871 	(void) strlcpy(fullname, dp, sizeof (fullname));
1872 	/*
1873 	 * Not reporting under debug, as the allocation isn't
1874 	 * reported by getfullblkname.  The idea is that we
1875 	 * produce balanced alloc/free instances.
1876 	 */
1877 	free(dp);
1878 
1879 	return (fullname);
1880 }
1881 
1882 /*
1883  * Make sure that a cg header looks at least moderately reasonable.
1884  * We want to be able to trust the contents enough to be able to use
1885  * the standard accessor macros.  So, besides looking at the obvious
1886  * such as the magic number, we verify that the offset field values
1887  * are properly aligned and not too big or small.
1888  *
1889  * Returns a NULL pointer if the cg is sane enough for our needs, else
1890  * a dynamically-allocated string describing all of its faults.
1891  */
1892 #define	Append_Error(full, full_len, addition, addition_len) \
1893 	if (full == NULL) { \
1894 		full = addition; \
1895 		full_len = addition_len; \
1896 	} else { \
1897 		/* lint doesn't think realloc() understands NULLs */ \
1898 		full = realloc(full, full_len + addition_len + 1); \
1899 		if (full == NULL) { \
1900 			errexit("Out of memory in cg_sanity"); \
1901 			/* NOTREACHED */ \
1902 		} \
1903 		(void) strcpy(full + full_len, addition); \
1904 		full_len += addition_len; \
1905 		free(addition); \
1906 	}
1907 
1908 caddr_t
1909 cg_sanity(struct cg *cgp, int cgno, int *is_fatal)
1910 {
1911 	caddr_t full_err;
1912 	caddr_t this_err = NULL;
1913 	int full_len, this_len;
1914 	daddr32_t ndblk;
1915 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1916 	daddr32_t exp_freeoff, exp_nextfreeoff;
1917 
1918 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1919 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1920 
1921 	full_err = NULL;
1922 	full_len = 0;
1923 	*is_fatal = 0;
1924 
1925 	if (!cg_chkmagic(cgp)) {
1926 		this_len = fsck_asprintf(&this_err,
1927 		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1928 		    cgp->cg_magic, CG_MAGIC);
1929 		Append_Error(full_err, full_len, this_err, this_len);
1930 		*is_fatal = 1;
1931 	}
1932 
1933 	if (cgp->cg_cgx != cgno) {
1934 		this_len = fsck_asprintf(&this_err,
1935 		    "WRONG CG NUMBER (%d should be %d)\n",
1936 		    cgp->cg_cgx, cgno);
1937 		Append_Error(full_err, full_len, this_err, this_len);
1938 	}
1939 
1940 	if ((cgp->cg_btotoff & 3) != 0) {
1941 		this_len = fsck_asprintf(&this_err,
1942 		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1943 		    cgp->cg_btotoff);
1944 		Append_Error(full_err, full_len, this_err, this_len);
1945 	}
1946 
1947 	if ((cgp->cg_boff & 1) != 0) {
1948 		this_len = fsck_asprintf(&this_err,
1949 	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1950 		    cgp->cg_boff);
1951 		Append_Error(full_err, full_len, this_err, this_len);
1952 	}
1953 
1954 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1955 		if (cgp->cg_ncyl < 1) {
1956 			this_len = fsck_asprintf(&this_err,
1957 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1958 			    cgp->cg_ncyl);
1959 		} else {
1960 			this_len = fsck_asprintf(&this_err,
1961 	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1962 			    cgp->cg_ncyl, sblock.fs_cpg);
1963 		}
1964 		Append_Error(full_err, full_len, this_err, this_len);
1965 	}
1966 
1967 	if (cgp->cg_niblk != sblock.fs_ipg) {
1968 		this_len = fsck_asprintf(&this_err,
1969 		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1970 		    cgp->cg_niblk, sblock.fs_ipg);
1971 		Append_Error(full_err, full_len, this_err, this_len);
1972 	}
1973 
1974 	if (cgp->cg_ndblk != ndblk) {
1975 		this_len = fsck_asprintf(&this_err,
1976 	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1977 		    cgp->cg_ndblk, ndblk);
1978 		Append_Error(full_err, full_len, this_err, this_len);
1979 	}
1980 
1981 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1982 		this_len = fsck_asprintf(&this_err,
1983 		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1984 		    "(%d should be at least 0 and less than %d)\n",
1985 		    cgp->cg_rotor, ndblk);
1986 		Append_Error(full_err, full_len, this_err, this_len);
1987 	}
1988 
1989 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1990 		this_len = fsck_asprintf(&this_err,
1991 		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1992 		    "(%d should be at least 0 and less than %d)\n",
1993 		    cgp->cg_frotor, ndblk);
1994 		Append_Error(full_err, full_len, this_err, this_len);
1995 	}
1996 
1997 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1998 		this_len = fsck_asprintf(&this_err,
1999 		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
2000 		    "(%d should be at least 0 and less than %d)\n",
2001 		    cgp->cg_irotor, sblock.fs_ipg);
2002 		Append_Error(full_err, full_len, this_err, this_len);
2003 	}
2004 
2005 	if (cgp->cg_btotoff != exp_btotoff) {
2006 		this_len = fsck_asprintf(&this_err,
2007 		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2008 		    cgp->cg_btotoff, exp_btotoff);
2009 		Append_Error(full_err, full_len, this_err, this_len);
2010 	}
2011 
2012 	if (cgp->cg_boff != exp_boff) {
2013 		this_len = fsck_asprintf(&this_err,
2014 		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2015 		    cgp->cg_boff, exp_boff);
2016 		Append_Error(full_err, full_len, this_err, this_len);
2017 	}
2018 
2019 	if (cgp->cg_iusedoff != exp_iusedoff) {
2020 		this_len = fsck_asprintf(&this_err,
2021 		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2022 		    cgp->cg_iusedoff, exp_iusedoff);
2023 		Append_Error(full_err, full_len, this_err, this_len);
2024 	}
2025 
2026 	if (cgp->cg_freeoff != exp_freeoff) {
2027 		this_len = fsck_asprintf(&this_err,
2028 		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2029 		    cgp->cg_freeoff, exp_freeoff);
2030 		Append_Error(full_err, full_len, this_err, this_len);
2031 	}
2032 
2033 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2034 		this_len = fsck_asprintf(&this_err,
2035 		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2036 		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2037 		Append_Error(full_err, full_len, this_err, this_len);
2038 	}
2039 
2040 	return (full_err);
2041 }
2042 
2043 #undef	Append_Error
2044 
2045 /*
2046  * This is taken from mkfs, and is what is used to come up with the
2047  * original values for a struct cg.  This implies that, since these
2048  * are all constants, recalculating them now should give us the same
2049  * thing as what's on disk.
2050  */
2051 static void
2052 cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2053 	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2054 	daddr32_t *ndblk)
2055 {
2056 	daddr32_t cbase, dmax;
2057 	struct cg *cgp;
2058 
2059 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2060 	    (size_t)sblock.fs_cgsize);
2061 	cgp = cgblk.b_un.b_cg;
2062 
2063 	cbase = cgbase(&sblock, cgno);
2064 	dmax = cbase + sblock.fs_fpg;
2065 	if (dmax > sblock.fs_size)
2066 		dmax = sblock.fs_size;
2067 
2068 	/* LINTED pointer difference won't overflow */
2069 	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2070 	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2071 	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2072 	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2073 	*nextfreeoff = *freeoff +
2074 		howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2075 	*ndblk = dmax - cbase;
2076 }
2077 
2078 /*
2079  * Corrects all fields in the cg that can be done with the available
2080  * redundant data.
2081  */
2082 void
2083 fix_cg(struct cg *cgp, int cgno)
2084 {
2085 	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2086 	daddr32_t exp_freeoff, exp_nextfreeoff;
2087 	daddr32_t ndblk;
2088 
2089 	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2090 	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2091 
2092 	if (cgp->cg_cgx != cgno) {
2093 		cgp->cg_cgx = cgno;
2094 	}
2095 
2096 	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2097 		if (cgno == sblock.fs_ncg) {
2098 			cgp->cg_ncyl = sblock.fs_ncyl -
2099 				(sblock.fs_ncg * (cgno - 1));
2100 		} else {
2101 			cgp->cg_ncyl = sblock.fs_cpg;
2102 		}
2103 	}
2104 
2105 	if (cgp->cg_niblk != sblock.fs_ipg) {
2106 		/*
2107 		 * This is not used by the kernel, so it's pretty
2108 		 * harmless if it's wrong.
2109 		 */
2110 		cgp->cg_niblk = sblock.fs_ipg;
2111 	}
2112 
2113 	if (cgp->cg_ndblk != ndblk) {
2114 		cgp->cg_ndblk = ndblk;
2115 	}
2116 
2117 	/*
2118 	 * For the rotors, any position's valid, so pick the one we know
2119 	 * will always exist.
2120 	 */
2121 	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2122 		cgp->cg_rotor = 0;
2123 	}
2124 
2125 	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2126 		cgp->cg_frotor = 0;
2127 	}
2128 
2129 	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2130 		cgp->cg_irotor = 0;
2131 	}
2132 
2133 	/*
2134 	 * For btotoff and boff, if they're misaligned they won't
2135 	 * match the expected values, so we're catching both cases
2136 	 * here.  Of course, if any of these are off, it seems likely
2137 	 * that the tables really won't be where we calculate they
2138 	 * should be anyway.
2139 	 */
2140 	if (cgp->cg_btotoff != exp_btotoff) {
2141 		cgp->cg_btotoff = exp_btotoff;
2142 	}
2143 
2144 	if (cgp->cg_boff != exp_boff) {
2145 		cgp->cg_boff = exp_boff;
2146 	}
2147 
2148 	if (cgp->cg_iusedoff != exp_iusedoff) {
2149 		cgp->cg_iusedoff = exp_iusedoff;
2150 	}
2151 
2152 	if (cgp->cg_freeoff != exp_freeoff) {
2153 		cgp->cg_freeoff = exp_freeoff;
2154 	}
2155 
2156 	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2157 		cgp->cg_nextfreeoff = exp_nextfreeoff;
2158 	}
2159 
2160 	/*
2161 	 * We know there was at least one correctable problem,
2162 	 * or else we wouldn't have been called.  So instead of
2163 	 * marking the buffer dirty N times above, just do it
2164 	 * once here.
2165 	 */
2166 	cgdirty();
2167 }
2168 
2169 void
2170 examinelog(daddr32_t start, void (*cb)(daddr32_t))
2171 {
2172 	struct bufarea *bp;
2173 	extent_block_t *ebp;
2174 	extent_t *ep;
2175 	daddr32_t nfno, fno;
2176 	int i;
2177 	int j;
2178 
2179 	if (start < SBLOCK)
2180 		return;
2181 
2182 	/*
2183 	 * Read errors will return zeros, which will cause us
2184 	 * to do nothing harmful, so don't need to handle it.
2185 	 */
2186 	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2187 			(size_t)sblock.fs_bsize);
2188 	ebp = (void *)bp->b_un.b_buf;
2189 
2190 	/*
2191 	 * Does it look like a log allocation table?
2192 	 */
2193 	/* LINTED pointer cast is aligned */
2194 	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2195 	    sblock.fs_bsize))
2196 		return;
2197 	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2198 		return;
2199 
2200 	ep = &ebp->extents[0];
2201 	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2202 		fno = logbtofrag(&sblock, ep->pbno);
2203 		nfno = dbtofsb(&sblock, ep->nbno);
2204 		for (j = 0; j < nfno; ++j, ++fno) {
2205 			/*
2206 			 * Invoke the callback first, so that pass1 can
2207 			 * mark the log blocks in-use.  Then, if any
2208 			 * subsequent pass over the log shows us that a
2209 			 * block got freed (say, it was also claimed by
2210 			 * an inode that we cleared), we can safely declare
2211 			 * the log bad.
2212 			 */
2213 			if (cb != NULL)
2214 				(*cb)(fno);
2215 			if (!testbmap(fno))
2216 				islogok = 0;
2217 		}
2218 	}
2219 	brelse(bp);
2220 
2221 	if (cb != NULL) {
2222 		fno = logbtofrag(&sblock, sblock.fs_logbno);
2223 		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2224 			(*cb)(fno);
2225 	}
2226 }
2227 
2228 static void
2229 freelogblk(daddr32_t frag)
2230 {
2231 	freeblk(sblock.fs_logbno, frag, 1);
2232 }
2233 
2234 caddr_t
2235 file_id(fsck_ino_t inum, mode_t mode)
2236 {
2237 	static char name[MAXPATHLEN + 1];
2238 
2239 	if (lfdir == inum) {
2240 		return (lfname);
2241 	}
2242 
2243 	if ((mode & IFMT) == IFDIR) {
2244 		(void) strcpy(name, "DIR");
2245 	} else if ((mode & IFMT) == IFATTRDIR) {
2246 		(void) strcpy(name, "ATTR DIR");
2247 	} else if ((mode & IFMT) == IFSHAD) {
2248 		(void) strcpy(name, "ACL");
2249 	} else {
2250 		(void) strcpy(name, "FILE");
2251 	}
2252 
2253 	return (name);
2254 }
2255 
2256 /*
2257  * Simple initializer for inodesc structures, so users of only a few
2258  * fields don't have to worry about getting the right defaults for
2259  * everything out.
2260  */
2261 void
2262 init_inodesc(struct inodesc *idesc)
2263 {
2264 	/*
2265 	 * Most fields should be zero, just hit the special cases.
2266 	 */
2267 	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2268 	idesc->id_fix = DONTKNOW;
2269 	idesc->id_lbn = -1;
2270 	idesc->id_truncto = -1;
2271 	idesc->id_firsthole = -1;
2272 }
2273 
2274 /*
2275  * Compare routine for tsearch(C) to use on ino_t instances.
2276  */
2277 int
2278 ino_t_cmp(const void *left, const void *right)
2279 {
2280 	const fsck_ino_t lino = (const fsck_ino_t)left;
2281 	const fsck_ino_t rino = (const fsck_ino_t)right;
2282 
2283 	return (lino - rino);
2284 }
2285 
2286 int
2287 cgisdirty(void)
2288 {
2289 	return (cgblk.b_dirty);
2290 }
2291 
2292 void
2293 cgflush(void)
2294 {
2295 	flush(fswritefd, &cgblk);
2296 }
2297 
2298 void
2299 dirty(struct bufarea *bp)
2300 {
2301 	if (fswritefd < 0) {
2302 		pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n");
2303 	} else {
2304 		(bp)->b_dirty = 1;
2305 		isdirty = 1;
2306 	}
2307 }
2308 
2309 void
2310 initbarea(struct bufarea *bp)
2311 {
2312 	(bp)->b_dirty = 0;
2313 	(bp)->b_bno = (diskaddr_t)-1LL;
2314 	(bp)->b_flags = 0;
2315 	(bp)->b_cnt = 0;
2316 	(bp)->b_errs = 0;
2317 }
2318 
2319 /*
2320  * Partition-sizing routines adapted from ../newfs/newfs.c.
2321  * Needed because calcsb() needs to use mkfs to work out what the
2322  * superblock should be, and mkfs insists on being told how many
2323  * sectors to use.
2324  *
2325  * Error handling assumes we're never called while preening.
2326  *
2327  * XXX This should be extracted into a ../ufslib.{c,h},
2328  *     in the same spirit to ../../fslib.{c,h}.  Once that is
2329  *     done, both fsck and newfs should be modified to link
2330  *     against it.
2331  */
2332 
2333 static int label_type;
2334 
2335 #define	LABEL_TYPE_VTOC		1
2336 #define	LABEL_TYPE_EFI		2
2337 #define	LABEL_TYPE_OTHER	3
2338 
2339 #define	MB			(1024 * 1024)
2340 #define	SECTORS_PER_TERABYTE	(1LL << 31)
2341 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2342 
2343 diskaddr_t
2344 getdisksize(caddr_t disk, int fd)
2345 {
2346 	int rpm;
2347 	struct dk_geom g;
2348 	struct dk_cinfo ci;
2349 	diskaddr_t actual_size;
2350 
2351 	/*
2352 	 * get_device_size() determines the actual size of the
2353 	 * device, and also the disk's attributes, such as geometry.
2354 	 */
2355 	actual_size = get_device_size(fd, disk);
2356 
2357 	if (label_type == LABEL_TYPE_VTOC) {
2358 		if (ioctl(fd, DKIOCGGEOM, &g)) {
2359 			pwarn("%s: Unable to read Disk geometry", disk);
2360 			return (0);
2361 		}
2362 		if (sblock.fs_nsect == 0)
2363 			sblock.fs_nsect = g.dkg_nsect;
2364 		if (sblock.fs_ntrak == 0)
2365 			sblock.fs_ntrak = g.dkg_nhead;
2366 		if (sblock.fs_rps == 0) {
2367 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2368 			sblock.fs_rps = rpm / 60;
2369 		}
2370 	}
2371 
2372 	if (sblock.fs_bsize == 0)
2373 		sblock.fs_bsize = MAXBSIZE;
2374 
2375 	/*
2376 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2377 	 * information is not available, default to the min of a MB and
2378 	 * maxphys.
2379 	 */
2380 	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2381 		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2382 		if (sblock.fs_maxcontig < 0) {
2383 			int gotit, maxphys;
2384 
2385 			gotit = fsgetmaxphys(&maxphys, NULL);
2386 
2387 			/*
2388 			 * If we cannot get the maxphys value, default
2389 			 * to ufs_maxmaxphys (MB).
2390 			 */
2391 			if (gotit) {
2392 				sblock.fs_maxcontig = MIN(maxphys, MB);
2393 			} else {
2394 				sblock.fs_maxcontig = MB;
2395 			}
2396 		}
2397 		sblock.fs_maxcontig /= sblock.fs_bsize;
2398 	}
2399 
2400 	return (actual_size);
2401 }
2402 
2403 /*
2404  * Figure out how big the partition we're dealing with is.
2405  */
2406 static diskaddr_t
2407 get_device_size(int fd, caddr_t name)
2408 {
2409 	struct vtoc vtoc;
2410 	struct dk_gpt *efi_vtoc;
2411 	diskaddr_t slicesize = 0;
2412 
2413 	int index = read_vtoc(fd, &vtoc);
2414 
2415 	if (index >= 0) {
2416 		label_type = LABEL_TYPE_VTOC;
2417 	} else {
2418 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2419 			/* it might be an EFI label */
2420 			index = efi_alloc_and_read(fd, &efi_vtoc);
2421 			if (index >= 0)
2422 				label_type = LABEL_TYPE_EFI;
2423 		}
2424 	}
2425 
2426 	if (index < 0) {
2427 		/*
2428 		 * Since both attempts to read the label failed, we're
2429 		 * going to fall back to a brute force approach to
2430 		 * determining the device's size:  see how far out we can
2431 		 * perform reads on the device.
2432 		 */
2433 
2434 		slicesize = brute_force_get_device_size(fd);
2435 		if (slicesize == 0) {
2436 			switch (index) {
2437 			case VT_ERROR:
2438 				pwarn("%s: %s\n", name, strerror(errno));
2439 				break;
2440 			case VT_EIO:
2441 				pwarn("%s: I/O error accessing VTOC", name);
2442 				break;
2443 			case VT_EINVAL:
2444 				pwarn("%s: Invalid field in VTOC", name);
2445 				break;
2446 			default:
2447 				pwarn("%s: unknown error %d accessing VTOC",
2448 				    name, index);
2449 				break;
2450 			}
2451 			return (0);
2452 		} else {
2453 			label_type = LABEL_TYPE_OTHER;
2454 		}
2455 	}
2456 
2457 	if (label_type == LABEL_TYPE_EFI) {
2458 		slicesize = efi_vtoc->efi_parts[index].p_size;
2459 		efi_free(efi_vtoc);
2460 	} else if (label_type == LABEL_TYPE_VTOC) {
2461 		/*
2462 		 * In the vtoc struct, p_size is a 32-bit signed quantity.
2463 		 * In the dk_gpt struct (efi's version of the vtoc), p_size
2464 		 * is an unsigned 64-bit quantity.  By casting the vtoc's
2465 		 * psize to an unsigned 32-bit quantity, it will be copied
2466 		 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without
2467 		 * sign extension.
2468 		 */
2469 
2470 		slicesize = (uint32_t)vtoc.v_part[index].p_size;
2471 	}
2472 
2473 	return (slicesize);
2474 }
2475 
2476 /*
2477  * brute_force_get_device_size
2478  *
2479  * Determine the size of the device by seeing how far we can
2480  * read.  Doing an llseek( , , SEEK_END) would probably work
2481  * in most cases, but we've seen at least one third-party driver
2482  * which doesn't correctly support the SEEK_END option when the
2483  * the device is greater than a terabyte.
2484  */
2485 
2486 static diskaddr_t
2487 brute_force_get_device_size(int fd)
2488 {
2489 	diskaddr_t	min_fail = 0;
2490 	diskaddr_t	max_succeed = 0;
2491 	diskaddr_t	cur_db_off;
2492 	char 		buf[DEV_BSIZE];
2493 
2494 	/*
2495 	 * First, see if we can read the device at all, just to
2496 	 * eliminate errors that have nothing to do with the
2497 	 * device's size.
2498 	 */
2499 
2500 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2501 	    ((read(fd, buf, DEV_BSIZE)) == -1))
2502 		return (0);  /* can't determine size */
2503 
2504 	/*
2505 	 * Now, go sequentially through the multiples of 4TB
2506 	 * to find the first read that fails (this isn't strictly
2507 	 * the most efficient way to find the actual size if the
2508 	 * size really could be anything between 0 and 2**64 bytes.
2509 	 * We expect the sizes to be less than 16 TB for some time,
2510 	 * so why do a bunch of reads that are larger than that?
2511 	 * However, this algorithm *will* work for sizes of greater
2512 	 * than 16 TB.  We're just not optimizing for those sizes.)
2513 	 */
2514 
2515 	/*
2516 	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2517 	 * We're using > 32-bit constants here.  Therefore, its flow
2518 	 * analysis is wrong.  For the time being, ignore complaints
2519 	 * from it about the body of the for() being unreached.
2520 	 */
2521 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2522 	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2523 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2524 		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2525 		    SEEK_SET) == -1) ||
2526 		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2527 			min_fail = cur_db_off;
2528 		else
2529 			max_succeed = cur_db_off;
2530 	}
2531 
2532 	/*
2533 	 * XXX Same lint flow analysis problem as above.
2534 	 */
2535 	if (min_fail == 0)
2536 		return (0);
2537 
2538 	/*
2539 	 * We now know that the size of the device is less than
2540 	 * min_fail and greater than or equal to max_succeed.  Now
2541 	 * keep splitting the difference until the actual size in
2542 	 * sectors in known.  We also know that the difference
2543 	 * between max_succeed and min_fail at this time is
2544 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2545 	 * simplifies the math below.
2546 	 */
2547 
2548 	while (min_fail - max_succeed > 1) {
2549 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2550 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2551 		    SEEK_SET)) == -1) ||
2552 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2553 			min_fail = cur_db_off;
2554 		else
2555 			max_succeed = cur_db_off;
2556 	}
2557 
2558 	/* the size is the last successfully read sector offset plus one */
2559 	return (max_succeed + 1);
2560 }
2561 
2562 static void
2563 vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2564 {
2565 	struct dinode *dp;
2566 	char pathbuf[MAXPATHLEN + 1];
2567 
2568 	vpwarn(fmt, ap);
2569 	(void) putchar(' ');
2570 	pinode(ino);
2571 	(void) printf("\n");
2572 	getpathname(pathbuf, cwd, ino);
2573 	if (ino < UFSROOTINO || ino > maxino) {
2574 		pfatal("NAME=%s\n", pathbuf);
2575 		return;
2576 	}
2577 	dp = ginode(ino);
2578 	if (ftypeok(dp))
2579 		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2580 	else
2581 		pfatal("NAME=%s\n", pathbuf);
2582 }
2583 
2584 void
2585 direrror(fsck_ino_t ino, caddr_t fmt, ...)
2586 {
2587 	va_list ap;
2588 
2589 	va_start(ap, fmt);
2590 	vfileerror(ino, ino, fmt, ap);
2591 	va_end(ap);
2592 }
2593 
2594 static void
2595 vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2596 {
2597 	vfileerror(ino, ino, fmt, ap);
2598 }
2599 
2600 void
2601 fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2602 {
2603 	va_list ap;
2604 
2605 	va_start(ap, fmt);
2606 	vfileerror(cwd, ino, fmt, ap);
2607 	va_end(ap);
2608 }
2609 
2610 /*
2611  * Adds the given inode to the orphaned-directories list, limbo_dirs.
2612  * Assumes that the caller has set INCLEAR in the inode's statemap[]
2613  * entry.
2614  *
2615  * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2616  * meaning it's effectively an orphan.  It needs to be noted now, so
2617  * it will be remembered in pass 4.
2618  */
2619 
2620 void
2621 add_orphan_dir(fsck_ino_t ino)
2622 {
2623 	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2624 		errexit("add_orphan_dir: out of memory");
2625 }
2626 
2627 /*
2628  * Remove an inode from the orphaned-directories list, presumably
2629  * because it's been cleared.
2630  */
2631 void
2632 remove_orphan_dir(fsck_ino_t ino)
2633 {
2634 	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2635 }
2636 
2637 /*
2638  * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2639  * and lufs.c:checksum().
2640  */
2641 static void
2642 log_setsum(int32_t *sp, int32_t *lp, int nb)
2643 {
2644 	int32_t csum = 0;
2645 
2646 	*sp = 0;
2647 	nb /= sizeof (int32_t);
2648 	while (nb--)
2649 		csum += *lp++;
2650 	*sp = csum;
2651 }
2652 
2653 static int
2654 log_checksum(int32_t *sp, int32_t *lp, int nb)
2655 {
2656 	int32_t ssum = *sp;
2657 
2658 	log_setsum(sp, lp, nb);
2659 	if (ssum != *sp) {
2660 		*sp = ssum;
2661 		return (0);
2662 	}
2663 	return (1);
2664 }
2665