xref: /titanic_44/usr/src/cmd/backup/dump/dumpmain.c (revision ff22156cd2908738696ecaa37cd21cd17ae909e9)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved.  The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include "dump.h"
18 #include <rmt.h>
19 #include <sys/mtio.h>
20 #include <limits.h>
21 #include <priv_utils.h>
22 #include "roll_log.h"
23 
24 int	notify = 0;		/* notify operator flag */
25 int	blockswritten = 0;	/* number of blocks written on current tape */
26 uint_t	tapeno = 0;		/* current tape number */
27 daddr32_t filenum = 0;		/* current file number on tape */
28 int	density = 0;		/* density in bytes/0.1" */
29 int	tenthsperirg;		/* inter-record-gap in 0.1"'s */
30 uint_t	ntrec = 0;		/* # tape blocks in each tape record */
31 uint_t	saved_ntrec = 0;	/* saved value of ntrec */
32 uint_t	forceflag = 0;		/* forced to change tp_bsize */
33 int	cartridge = 0;		/* assume non-cartridge tape */
34 uint_t	tracks;			/* # tracks on a cartridge tape */
35 int	diskette = 0;		/* assume not dumping to a diskette */
36 int	printsize = 0;		/* just print estimated size and exit */
37 int	mapfd = -1;		/* if >= 0, file descriptor for mmap */
38 int32_t	tp_bsize = TP_BSIZE_MIN; /* tape block record size (frag size) */
39 #ifdef DEBUG
40 int	xflag;			/* debugging switch */
41 #endif
42 
43 char	*myname;
44 
45 /*
46  * This should be struct fs, but there are trailing bits on disk
47  * that we also need to read in as part of it.  It's an array of
48  * longs instead of char to force proper alignment.
49  */
50 static long sblock_buf[SBSIZE/sizeof (long)];
51 
52 #ifdef __STDC__
53 static char *mb(u_offset_t);
54 static void nextstate(int);
55 #else
56 static char *mb();
57 static void nextstate();
58 #endif
59 
60 extern	jmp_buf checkpoint_buf;	/* context for return from checkpoint */
61 #define	FUDGE_FACTOR	0x2000000
62 
63 int
64 main(int argc, char *argv[])
65 {
66 	char		*arg;
67 	int		bflag = 0, i, error = 0, saverr;
68 	double		fetapes = 0.0;
69 	struct	mnttab	*dt;
70 	char		msgbuf[3000], *msgp;
71 	char		kbsbuf[BUFSIZ];
72 	u_offset_t	esize_shift = 0;
73 	int32_t	new_mult = 0;
74 	time32_t	snapdate;
75 
76 	host = NULL;
77 
78 	if (myname = strrchr(argv[0], '/'))
79 		myname++;
80 	else
81 		myname = argv[0];
82 
83 	if (strcmp("hsmdump", myname) == 0) {
84 		msg(gettext("hsmdump emulation is no longer supported.\n"));
85 		Exit(X_ABORT);
86 	}
87 
88 	tape = DEFTAPE;
89 	autoload_period = 12;
90 	autoload_tries = 12;	/* traditional default of ~2.5 minutes */
91 
92 	(void) setlocale(LC_ALL, "");
93 #if !defined(TEXT_DOMAIN)
94 #define	TEXT_DOMAIN "SYS_TEST"
95 #endif  /* TEXT_DOMAIN */
96 	(void) textdomain(TEXT_DOMAIN);
97 
98 	/*
99 	 * If someone strips the set-uid bit, dump will still work for local
100 	 * tapes.  Fail when we try to access a remote tape.
101 	 */
102 	(void) __init_suid_priv(0, PRIV_NET_PRIVADDR, (char *)NULL);
103 
104 	if (sysinfo(SI_HOSTNAME, spcl.c_host, sizeof (spcl.c_host)) < 0) {
105 		saverr = errno;
106 		msg(gettext("Could not get host name: %s\n"),
107 		    strerror(saverr));
108 		bzero(spcl.c_host, sizeof (spcl.c_host));
109 	}
110 
111 	dumppid = getpid();
112 	tsize = 0;	/* no default size, detect EOT dynamically */
113 
114 	disk = NULL;
115 	dname = NULL;
116 	disk_dynamic = 0;
117 	increm = NINCREM;
118 	incno = '9';
119 	uflag = 0;
120 	arg = "u";
121 	tlabel = "none";
122 	if (argc > 1) {
123 		argv++;
124 		argc--;
125 		arg = *argv;
126 		if (*arg == '-')
127 			arg++;
128 	}
129 	while (*arg)
130 	switch (*arg++) {		/* BE CAUTIOUS OF FALLTHROUGHS */
131 	case 'M':
132 		/*
133 		 * This undocumented option causes each process to
134 		 * mkdir debug_chdir/getpid(), and chdir to it.  This is
135 		 * to ease the collection of profiling information and
136 		 * core dumps.
137 		 */
138 		if (argc > 1) {
139 			argv++;
140 			argc--;
141 			debug_chdir = *argv;
142 			msg(gettext(
143 			    "Each process shall try to chdir to %s/<pid>\n"),
144 			    debug_chdir);
145 			child_chdir();
146 		} else {
147 			msg(gettext("Missing move-to-dir (M) name\n"));
148 			dumpabort();
149 			/*NOTREACHED*/
150 		}
151 		break;
152 
153 	case 'w':
154 		lastdump('w');		/* tell us only what has to be done */
155 		exit(0);
156 		break;
157 
158 	case 'W':			/* what to do */
159 		lastdump('W');		/* tell state of what has been done */
160 		exit(0);		/* do nothing else */
161 		break;
162 
163 	case 'T':
164 		if (argc > 1) {
165 			int count;
166 			int multiplier;
167 			char units;
168 
169 			argv++;
170 			argc--;
171 			count = atoi(*argv);
172 			if (count < 1) {
173 				msg(gettext(
174 				    "Unreasonable autoload timeout period\n"));
175 				dumpabort();
176 				/*NOTREACHED*/
177 			}
178 			units = *(*argv + strlen(*argv) - 1);
179 			switch (units) {
180 			case 's':
181 				multiplier = 1;
182 				break;
183 			case 'h':
184 				multiplier = 3600;
185 				break;
186 			case '0': case '1': case '2': case '3': case '4':
187 			case '5': case '6': case '7': case '8': case '9':
188 			case 'm':
189 				multiplier = 60;
190 				break;
191 			default:
192 				msg(gettext(
193 				    "Unknown timeout units indicator `%c'\n"),
194 				    units);
195 				dumpabort();
196 				/*NOTREACHED*/
197 			}
198 			autoload_tries = 1 +
199 			    ((count * multiplier) / autoload_period);
200 		} else {
201 			msg(gettext("Missing autoload timeout period\n"));
202 			dumpabort();
203 			/*NOTREACHED*/
204 		}
205 		break;
206 
207 	case 'f':			/* output file */
208 		if (argc > 1) {
209 			argv++;
210 			argc--;
211 			tape = *argv;
212 			if (*tape == '\0') {
213 				msg(gettext("Bad output device name\n"));
214 				dumpabort();
215 				/*NOTREACHED*/
216 			}
217 		} else {
218 			msg(gettext("Missing output device name\n"));
219 			dumpabort();
220 			/*NOTREACHED*/
221 		}
222 		if (strcmp(tape, "-") == 0 && verify) {
223 			msg(gettext(
224 			"Cannot verify when dumping to standard out.\n"));
225 			dumpabort();
226 			/*NOTREACHED*/
227 		}
228 		break;
229 
230 	case 'd':			/* density, in bits per inch */
231 		if (argc > 1) {
232 			argv++;
233 			argc--;
234 			density = atoi(*argv) / 10;
235 			if (density <= 0) {
236 				msg(gettext(
237 				    "Density must be a positive integer\n"));
238 				dumpabort();
239 				/*NOTREACHED*/
240 			}
241 		} else {
242 			msg(gettext("Missing density\n"));
243 			dumpabort();
244 			/*NOTREACHED*/
245 		}
246 		break;
247 
248 	case 's':			/* tape size, feet */
249 		if (argc > 1) {
250 			argv++;
251 			argc--;
252 			tsize = atol(*argv);
253 			if ((*argv[0] == '-') || (tsize == 0)) {
254 				msg(gettext(
255 			    "Tape size must be a positive integer\n"));
256 				dumpabort();
257 				/*NOTREACHED*/
258 			}
259 		} else {
260 			msg(gettext("Missing tape size\n"));
261 			dumpabort();
262 			/*NOTREACHED*/
263 		}
264 		break;
265 
266 	case 't':			/* tracks */
267 		if (argc > 1) {
268 			argv++;
269 			argc--;
270 			tracks = atoi(*argv);
271 		} else {
272 			msg(gettext("Missing track count\n"));
273 			dumpabort();
274 			/*NOTREACHED*/
275 		}
276 		break;
277 
278 	case 'b':			/* blocks per tape write */
279 		if (argc > 1) {
280 			argv++;
281 			argc--;
282 			bflag++;
283 			/*
284 			 * We save the ntrec in case we need to change
285 			 * tp_bsize later, we will have to recalculate
286 			 * it.
287 			 */
288 			saved_ntrec = ntrec = atoi(*argv);
289 			if (ntrec == 0 || (ntrec&1) || ntrec > (MAXNTREC*2)) {
290 				msg(gettext(
291 		    "Block size must be a positive, even integer <= %d\n"),
292 				    MAXNTREC*2);
293 				dumpabort();
294 				/*NOTREACHED*/
295 			}
296 			ntrec /= (tp_bsize/DEV_BSIZE);
297 		} else {
298 			msg(gettext("Missing blocking factor\n"));
299 			dumpabort();
300 			/*NOTREACHED*/
301 		}
302 		break;
303 
304 	case 'c':			/* Tape is cart. not 9-track */
305 	case 'C':			/* 'C' to be consistent with 'D' */
306 		cartridge++;
307 		break;
308 
309 	case '0':			/* dump level */
310 	case '1':
311 	case '2':
312 	case '3':
313 	case '4':
314 	case '5':
315 	case '6':
316 	case '7':
317 	case '8':
318 	case '9':
319 		incno = arg[-1];
320 		break;
321 
322 	case 'u':			/* update /etc/dumpdates */
323 		uflag++;
324 		break;
325 
326 	case 'n':			/* notify operators */
327 		notify++;
328 		break;
329 
330 	case 'a':			/* create archive file */
331 		archive = 1;
332 		if (argc > 1) {
333 			argv++;
334 			argc--;
335 			if (**argv == '\0') {
336 				msg(gettext("Bad archive file name\n"));
337 				dumpabort();
338 				/*NOTREACHED*/
339 			}
340 			archivefile = strdup(*argv);
341 			if (archivefile == NULL) {
342 				saverr = errno;
343 				msg(gettext("Cannot allocate memory: %s\n"),
344 				    strerror(saverr));
345 				dumpabort();
346 				/*NOTREACHED*/
347 			}
348 		} else {
349 			msg(gettext("Missing archive file name\n"));
350 			dumpabort();
351 			/*NOTREACHED*/
352 		}
353 		break;
354 
355 	case 'v':
356 		verify++;
357 		doingverify++;
358 		if (strcmp(tape, "-") == 0) {
359 			msg(gettext(
360 			"Cannot verify when dumping to standard out.\n"));
361 			dumpabort();
362 			/*NOTREACHED*/
363 		}
364 		break;
365 
366 	case 'D':
367 		diskette++;
368 		break;
369 
370 	case 'N':
371 		if (argc > 1) {
372 			argv++;
373 			argc--;
374 			if (**argv == '\0') {
375 				msg(gettext("Missing name for dumpdates "
376 				    "entry.\n"));
377 				dumpabort();
378 				/*NOTREACHED*/
379 			}
380 			dname = *argv;
381 			if (strlen(dname) > MAXNAMLEN + 2) {
382 				msg(gettext("Dumpdates entry name too "
383 				    "long.\n"));
384 				dumpabort();
385 				/*NOTREACHED*/
386 			}
387 			for (i = 0; i < strlen(dname); i++) {
388 				if (isspace(*(dname+i))) {
389 					msg(gettext("Dumpdates entry name may "
390 					    "not contain white space.\n"));
391 					dumpabort();
392 					/*NOTREACHED*/
393 				}
394 			}
395 		} else {
396 			msg(gettext("Missing name for dumpdates entry.\n"));
397 			dumpabort();
398 			/*NOTREACHED*/
399 		}
400 		break;
401 	case 'L':
402 		if (argc > 1) {
403 			argv++;
404 			argc--;
405 			if (**argv == '\0') {
406 				msg(gettext("Missing tape label name\n"));
407 				dumpabort();
408 				/*NOTREACHED*/
409 			}
410 			tlabel = *argv;
411 			if (strlen(tlabel) > (sizeof (spcl.c_label) - 1)) {
412 				tlabel[sizeof (spcl.c_label) - 1] = '\0';
413 				msg(gettext(
414 		    "Truncating label to maximum supported length: `%s'\n"),
415 				    tlabel);
416 			}
417 		} else {
418 			msg(gettext("Missing tape label name\n"));
419 			dumpabort();
420 			/*NOTREACHED*/
421 		}
422 		break;
423 
424 	case 'l':
425 		autoload++;
426 		break;
427 
428 	case 'o':
429 		offline++;
430 		break;
431 
432 	case 'S':
433 		printsize++;
434 		break;
435 
436 #ifdef DEBUG
437 	case 'z':
438 		xflag++;
439 		break;
440 #endif
441 
442 	default:
443 		msg(gettext("Bad option `%c'\n"), arg[-1]);
444 		dumpabort();
445 		/*NOTREACHED*/
446 	}
447 	if (argc > 1) {
448 		argv++;
449 		argc--;
450 		if (**argv == '\0') {
451 			msg(gettext("Bad disk name\n"));
452 			dumpabort();
453 			/*NOTREACHED*/
454 		}
455 		disk = *argv;
456 		disk_dynamic = 0;
457 	}
458 	if (disk == NULL) {
459 		(void) fprintf(stderr, gettext(
460 	"Usage: %s [0123456789fustdWwnNDCcbavloS [argument]] filesystem\n"),
461 		    myname);
462 		Exit(X_ABORT);
463 	}
464 	if (!filenum)
465 		filenum = 1;
466 
467 	if (signal(SIGINT, interrupt) == SIG_IGN)
468 		(void) signal(SIGINT, SIG_IGN);
469 
470 	if (strcmp(tape, "-") == 0) {
471 		pipeout++;
472 		tape = gettext("standard output");
473 		dumpdev = sdumpdev = strdup(tape);
474 		if (dumpdev == NULL) {
475 			saverr = errno;
476 			msg(gettext("Cannot allocate memory: %s\n"),
477 			    strerror(saverr));
478 			dumpabort();
479 			/*NOTREACHED*/
480 		}
481 		/*CONSTANTCONDITION*/
482 		assert(sizeof (spcl.c_label) > 5);
483 		(void) strcpy(spcl.c_label, "none");
484 	} else if (*tape == '+') {
485 		nextdevice();
486 		(void) strcpy(spcl.c_label, tlabel);
487 	} else {
488 		/* if not already set, set diskette to default */
489 		if (diskette && strcmp(tape, DEFTAPE) == 0)
490 			tape = DISKETTE;
491 		nextdevice();
492 		(void) strcpy(spcl.c_label, tlabel);
493 	}
494 	if (cartridge && diskette) {
495 		error = 1;
496 		msg(gettext("Cannot select both cartridge and diskette\n"));
497 	}
498 	if (density && diskette) {
499 		error = 1;
500 		msg(gettext("Cannot select density of diskette\n"));
501 	}
502 	if (tracks && diskette) {
503 		error = 1;
504 		msg(gettext("Cannot select number of tracks of diskette\n"));
505 	}
506 	if (error) {
507 		dumpabort();
508 		/*NOTREACHED*/
509 	}
510 
511 	/*
512 	 * Determine how to default tape size and density
513 	 *
514 	 *		density				tape size
515 	 * 9-track	1600 bpi (160 bytes/.1")	2300 ft.
516 	 * 9-track	6250 bpi (625 bytes/.1")	2300 ft.
517 	 *
518 	 * Most Sun-2's came with 4 track (20MB) cartridge tape drives,
519 	 * while most other machines (Sun-3's and non-Sun's) come with
520 	 * 9 track (45MB) cartridge tape drives.  Some Sun-2's came with
521 	 * 9 track drives, but there is no way for the software to detect
522 	 * which drive type is installed.  Sigh...  We make the gross
523 	 * assumption that #ifdef mc68010 will test for a Sun-2.
524 	 *
525 	 * cartridge	8000 bpi (100 bytes/.1")	425 * tracks ft.
526 	 */
527 	if (density == 0)
528 		density = cartridge ? 100 : 625;
529 	if (tracks == 0)
530 		tracks = 9;
531 	if (!bflag) {
532 		if (cartridge)
533 			ntrec = CARTRIDGETREC;
534 		else if (diskette)
535 			ntrec = NTREC;
536 		else if (density >= 625)
537 			ntrec = HIGHDENSITYTREC;
538 		else
539 			ntrec = NTREC;
540 		/*
541 		 * save ntrec in case we have to change tp_bsize later.
542 		 */
543 		saved_ntrec = (ntrec * (tp_bsize/DEV_BSIZE));
544 	}
545 	if (!diskette) {
546 		tsize *= 12L*10L;
547 		if (cartridge)
548 			tsize *= tracks;
549 	}
550 	rmtinit(msg, Exit);
551 	if (host) {
552 		char	*cp = strchr(host, '@');
553 		if (cp == (char *)0)
554 			cp = host;
555 		else
556 			cp++;
557 
558 		if (rmthost(host, ntrec) == 0) {
559 			msg(gettext("Cannot connect to tape host `%s'\n"), cp);
560 			dumpabort();
561 			/*NOTREACHED*/
562 		}
563 	}
564 	if (signal(SIGHUP, sigAbort) == SIG_IGN)
565 		(void) signal(SIGHUP, SIG_IGN);
566 	if (signal(SIGTRAP, sigAbort) == SIG_IGN)
567 		(void) signal(SIGTRAP, SIG_IGN);
568 	if (signal(SIGFPE, sigAbort) == SIG_IGN)
569 		(void) signal(SIGFPE, SIG_IGN);
570 	if (signal(SIGBUS, sigAbort) == SIG_IGN)
571 		(void) signal(SIGBUS, SIG_IGN);
572 	if (signal(SIGSEGV, sigAbort) == SIG_IGN)
573 		(void) signal(SIGSEGV, SIG_IGN);
574 	if (signal(SIGTERM, sigAbort) == SIG_IGN)
575 		(void) signal(SIGTERM, SIG_IGN);
576 	if (signal(SIGUSR1, sigAbort) == SIG_IGN)
577 		(void) signal(SIGUSR1, SIG_IGN);
578 	if (signal(SIGPIPE, sigAbort) == SIG_IGN)
579 		(void) signal(SIGPIPE, SIG_IGN);
580 
581 	mnttabread();		/* /etc/fstab, /etc/mtab snarfed */
582 
583 	/*
584 	 *	disk can be either the full special file name,
585 	 *	the suffix of the special file name,
586 	 *	the special name missing the leading '/',
587 	 *	the file system name with or without the leading '/'.
588 	 *	NB:  we attempt to avoid dumping the block device
589 	 *	(using rawname) because specfs and the vm system
590 	 *	are not necessarily in sync.
591 	 */
592 
593 	/*
594 	 * Attempt to roll the log before doing the dump.  There's nothing
595 	 * the user can do if we are unable to roll the log, so we'll silently
596 	 * ignore failures.
597 	 */
598 	if ((rl_roll_log(disk) != RL_SUCCESS) && (disk[0] != '/')) {
599 		/* Try it again with leading '/'. */
600 		char	*slashed;
601 
602 		slashed = (char *)malloc(strlen(disk) + 2);
603 		if (slashed != (char *)NULL) {
604 			(void) sprintf(slashed, "%c%s", '/', disk);
605 			(void) rl_roll_log(slashed);
606 			free(slashed);
607 		}
608 	}
609 	dt = mnttabsearch(disk, 0);
610 	if (dt != 0) {
611 		filesystem = dt->mnt_mountp;
612 		if (disk_dynamic) {
613 			/* LINTED: disk is not NULL */
614 			free(disk);
615 		}
616 		disk = rawname(dt->mnt_special);
617 		disk_dynamic = (disk != dt->mnt_special);
618 
619 		(void) strncpy(spcl.c_dev, dt->mnt_special,
620 		    sizeof (spcl.c_dev));
621 		spcl.c_dev[sizeof (spcl.c_dev) - 1] = '\0';
622 		(void) strncpy(spcl.c_filesys, dt->mnt_mountp,
623 		    sizeof (spcl.c_filesys));
624 		spcl.c_filesys[sizeof (spcl.c_filesys) - 1] = '\0';
625 	} else {
626 		(void) strncpy(spcl.c_dev, disk, sizeof (spcl.c_dev));
627 		spcl.c_dev[sizeof (spcl.c_dev) - 1] = '\0';
628 #ifdef PARTIAL
629 		/* check for partial filesystem dump */
630 		partial_check();
631 		dt = mnttabsearch(disk, 1);
632 		if (dt != 0) {
633 			filesystem = dt->mnt_mountp;
634 			if (disk_dynamic)
635 				free(disk);
636 			disk = rawname(dt->mnt_special);
637 			disk_dynamic = (disk != dt->mnt_special);
638 
639 			(void) strncpy(spcl.c_filesys,
640 			    "a partial file system", sizeof (spcl.c_filesys));
641 			spcl.c_filesys[sizeof (spcl.c_filesys) - 1] = '\0';
642 		}
643 		else
644 #endif /* PARTIAL */
645 		{
646 			char *old_disk = disk;
647 
648 			(void) strncpy(spcl.c_filesys,
649 			    "an unlisted file system",
650 			    sizeof (spcl.c_filesys));
651 			spcl.c_filesys[sizeof (spcl.c_filesys) - 1] = '\0';
652 
653 			disk = rawname(old_disk);
654 			if (disk != old_disk) {
655 				if (disk_dynamic)
656 					free(old_disk);
657 				disk_dynamic = 1;
658 			}
659 			/*
660 			 * If disk == old_disk, then disk_dynamic's state
661 			 * does not change.
662 			 */
663 		}
664 	}
665 
666 	fi = open64(disk, O_RDONLY);
667 
668 	if (fi < 0) {
669 		saverr = errno;
670 		msg(gettext("Cannot open dump device `%s': %s\n"),
671 			disk, strerror(saverr));
672 		Exit(X_ABORT);
673 	}
674 
675 	if (sscanf(&incno, "%1d", &spcl.c_level) != 1) {
676 		msg(gettext("Bad dump level `%c' specified\n"), incno);
677 		dumpabort();
678 		/*NOTREACHED*/
679 	}
680 	getitime();		/* /etc/dumpdates snarfed */
681 
682 	sblock = (struct fs *)&sblock_buf;
683 	sync();
684 
685 	bread((diskaddr_t)SBLOCK, (uchar_t *)sblock, (long)SBSIZE);
686 	if ((sblock->fs_magic != FS_MAGIC) &&
687 	    (sblock->fs_magic != MTB_UFS_MAGIC)) {
688 		msg(gettext(
689 	    "Warning - super-block on device `%s' is corrupt - run fsck\n"),
690 		    disk);
691 		dumpabort();
692 		/*NOTREACHED*/
693 	}
694 
695 	if (sblock->fs_magic == FS_MAGIC &&
696 	    (sblock->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
697 	    sblock->fs_version != UFS_VERSION_MIN)) {
698 		msg(gettext("Unrecognized UFS version: %d\n"),
699 		    sblock->fs_version);
700 		dumpabort();
701 		/*NOTREACHED*/
702 	}
703 
704 	if (sblock->fs_magic == MTB_UFS_MAGIC &&
705 	    (sblock->fs_version < MTB_UFS_VERSION_MIN ||
706 	    sblock->fs_version > MTB_UFS_VERSION_1)) {
707 		msg(gettext("Unrecognized UFS version: %d\n"),
708 		    sblock->fs_version);
709 		dumpabort();
710 		/*NOTREACHED*/
711 	}
712 
713 	/*
714 	 * Try to set up for using mmap(2).  It only works on the block
715 	 * device, but if we can use it, things go somewhat faster.  If
716 	 * we can't open it, we'll silently fall back to the old method
717 	 * (read/memcpy). We also only try this if it's been cleanly
718 	 * unmounted. Dumping a live filesystem this way runs into
719 	 * buffer consistency problems. Of course, we don't support
720 	 * running dump on a mounted filesystem, but some people do it
721 	 * anyway.
722 	 */
723 	if (sblock->fs_clean == FSCLEAN) {
724 		char *block = unrawname(disk);
725 
726 		if (block != NULL) {
727 			mapfd = open(block, O_RDONLY, 0);
728 			free(block);
729 		}
730 	}
731 
732 restart:
733 	bread((diskaddr_t)SBLOCK, (uchar_t *)sblock, (long)SBSIZE);
734 	if ((sblock->fs_magic != FS_MAGIC) &&
735 	    (sblock->fs_magic != MTB_UFS_MAGIC)) {	/* paranoia */
736 		msg(gettext("bad super-block magic number, run fsck\n"));
737 		dumpabort();
738 		/*NOTREACHED*/
739 	}
740 
741 	if (sblock->fs_magic == FS_MAGIC &&
742 	    (sblock->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
743 	    sblock->fs_version != UFS_VERSION_MIN)) {
744 		msg(gettext("Unrecognized UFS version: %d\n"),
745 		    sblock->fs_version);
746 		dumpabort();
747 		/*NOTREACHED*/
748 	}
749 
750 	if (sblock->fs_magic == MTB_UFS_MAGIC &&
751 	    (sblock->fs_version < MTB_UFS_VERSION_MIN ||
752 	    sblock->fs_version > MTB_UFS_VERSION_1)) {
753 		msg(gettext("Unrecognized UFS version: %d\n"),
754 		    sblock->fs_version);
755 		dumpabort();
756 		/*NOTREACHED*/
757 	}
758 
759 	if (!doingactive)
760 		allocino();
761 
762 	/* XXX should sanity-check the super block before trusting/using it */
763 
764 	/* LINTED XXX time truncated - tolerate until tape format changes */
765 	spcl.c_date = (time32_t)time((time_t *)NULL);
766 	bcopy(&(spcl.c_shadow), c_shadow_save, sizeof (c_shadow_save));
767 
768 	snapdate = is_fssnap_dump(disk);
769 	if (snapdate)
770 		spcl.c_date = snapdate;
771 
772 	if (!printsize) {
773 		msg(gettext("Date of this level %c dump: %s\n"),
774 		    incno, prdate(spcl.c_date));
775 		msg(gettext("Date of last level %c dump: %s\n"),
776 			(uchar_t)lastincno, prdate(spcl.c_ddate));
777 		msg(gettext("Dumping %s "), disk);
778 		if (filesystem != 0)
779 			msgtail("(%.*s:%s) ",
780 			    /* LINTED unsigned -> signed cast ok */
781 			    (int)sizeof (spcl.c_host), spcl.c_host, filesystem);
782 		msgtail(gettext("to %s.\n"), sdumpdev);
783 	}
784 
785 	esize = f_esize = o_esize = 0;
786 	msiz = roundup(d_howmany(sblock->fs_ipg * sblock->fs_ncg, NBBY),
787 		TP_BSIZE_MAX);
788 	if (!doingactive) {
789 		clrmap = (uchar_t *)xcalloc(msiz, sizeof (*clrmap));
790 		filmap = (uchar_t *)xcalloc(msiz, sizeof (*filmap));
791 		dirmap = (uchar_t *)xcalloc(msiz, sizeof (*dirmap));
792 		nodmap = (uchar_t *)xcalloc(msiz, sizeof (*nodmap));
793 		shamap = (uchar_t *)xcalloc(msiz, sizeof (*shamap));
794 		activemap = (uchar_t *)xcalloc(msiz, sizeof (*activemap));
795 	} else {
796 		if (clrmap == NULL || filmap == NULL || dirmap == NULL ||
797 		    nodmap == NULL || shamap == NULL || activemap == NULL) {
798 			msg(gettext(
799 	    "Internal error: NULL map pointer while re-dumping active files"));
800 			dumpabort();
801 			/*NOTREACHED*/
802 		}
803 		bzero(clrmap, msiz);
804 		bzero(filmap, msiz);
805 		bzero(dirmap, msiz);
806 		bzero(nodmap, msiz);
807 		bzero(shamap, msiz);
808 		/* retain active map */
809 	}
810 
811 	dumpstate = DS_INIT;
812 	dumptoarchive = 1;
813 
814 	/*
815 	 * Read cylinder group inode-used bitmaps to avoid reading clear inodes.
816 	 */
817 	{
818 		uchar_t *clrp = clrmap;
819 		struct cg *cgp =
820 		    (struct cg *)xcalloc((uint_t)sblock->fs_cgsize, 1);
821 
822 		for (i = 0; i < sblock->fs_ncg; i++) {
823 			bread(fsbtodb(sblock, cgtod(sblock, i)),
824 			    (uchar_t *)cgp, sblock->fs_cgsize);
825 			bcopy(cg_inosused(cgp), clrp,
826 			    (int)sblock->fs_ipg / NBBY);
827 			clrp += sblock->fs_ipg / NBBY;
828 		}
829 		free((char *)cgp);
830 		/* XXX right-shift clrmap one bit.  why? */
831 		for (i = 0; clrp > clrmap; i <<= NBBY) {
832 			i |= *--clrp & ((1<<NBBY) - 1);
833 			*clrp = i >> 1;
834 		}
835 	}
836 
837 	if (!printsize) {
838 		msgp = gettext("Mapping (Pass I) [regular files]\n");
839 		msg(msgp);
840 	}
841 
842 	ino = 0;
843 #ifdef PARTIAL
844 	if (partial_mark(argc, argv)) {
845 #endif /* PARTIAL */
846 		if (!doingactive)
847 			pass(mark, clrmap);	/* mark updates 'x'_esize */
848 		else
849 			pass(active_mark, clrmap);	/* updates 'x'_esize */
850 #ifdef PARTIAL
851 	}
852 #endif /* PARTIAL */
853 	do {
854 		if (!printsize) {
855 			msgp = gettext("Mapping (Pass II) [directories]\n");
856 			msg(msgp);
857 		}
858 		nadded = 0;
859 		ino = 0;
860 		pass(add, dirmap);
861 	} while (nadded);
862 
863 	ino = 0; /* adjust estimated size for shadow inodes */
864 	pass(markshad, nodmap);
865 	ino = 0;
866 	pass(estshad, shamap);
867 	freeshad();
868 
869 	bmapest(clrmap);
870 	bmapest(nodmap);
871 	esize = o_esize + f_esize;
872 	if (diskette) {
873 		/* estimate number of floppies */
874 		if (tsize != 0)
875 			fetapes = (double)(esize + ntrec) / (double)tsize;
876 	} else if (cartridge) {
877 		/*
878 		 * Estimate number of tapes, assuming streaming stops at
879 		 * the end of each block written, and not in mid-block.
880 		 * Assume no erroneous blocks; this can be compensated for
881 		 * with an artificially low tape size.
882 		 */
883 		tenthsperirg = 16;	/* actually 15.48, says Archive */
884 		if (tsize != 0)
885 			fetapes = ((double)esize /* blocks */
886 			    * (tp_bsize		/* bytes/block */
887 			    * (1.0/density))	/* 0.1" / byte */
888 			    +
889 			    (double)esize	/* blocks */
890 			    * (1.0/ntrec)	/* streaming-stops per block */
891 			    * tenthsperirg)	/* 0.1" / streaming-stop */
892 			    * (1.0 / tsize);	/* tape / 0.1" */
893 	} else {
894 		/* Estimate number of tapes, for old fashioned 9-track tape */
895 #ifdef sun
896 		/* sun has long irg's */
897 		tenthsperirg = (density == 625) ? 6 : 12;
898 #else
899 		tenthsperirg = (density == 625) ? 5 : 8;
900 #endif
901 		if (tsize != 0)
902 			fetapes = ((double)esize /* blocks */
903 			    * (tp_bsize		/* bytes / block */
904 			    * (1.0/density))	/* 0.1" / byte */
905 			    +
906 			    (double)esize	/* blocks */
907 			    * (1.0/ntrec)	/* IRG's / block */
908 			    * tenthsperirg)	/* 0.1" / IRG */
909 			    * (1.0 / tsize);	/* tape / 0.1" */
910 	}
911 
912 	etapes = fetapes;	/* truncating assignment */
913 	etapes++;
914 	/* count the nodemap on each additional tape */
915 	for (i = 1; i < etapes; i++)
916 		bmapest(nodmap);
917 	/*
918 	 * If the above bmapest is called, it changes o_esize and f_esize.
919 	 * So we will recalculate esize here anyway to make sure.
920 	 * Also, add tape headers and trailer records.
921 	 */
922 	esize = o_esize + f_esize + etapes + ntrec;
923 
924 	/*
925 	 * If the estimated number of tp_bsize tape blocks is greater than
926 	 * INT_MAX we have to adjust tp_bsize and ntrec to handle
927 	 * the larger dump.  esize is an estimate, so we 'fudge'
928 	 * INT_MAX a little.  If tp_bsize is adjusted, it will be adjusted
929 	 * to the size needed for this dump (2048, 4096, 8192, ...)
930 	 */
931 	if (esize > (INT_MAX - FUDGE_FACTOR)) { /* esize is too big */
932 		forceflag++;
933 		esize_shift =
934 		    ((esize + (INT_MAX - FUDGE_FACTOR) - 1)/
935 		    ((u_offset_t)(INT_MAX - FUDGE_FACTOR))) - 1;
936 		if ((esize_shift > ESIZE_SHIFT_MAX) || (ntrec == 0)) {
937 			msgp = gettext(
938 	"Block factor %d ('b' flag) is too small for this size dump.");
939 			msg(msgp, saved_ntrec);
940 			dumpabort();
941 			/*NOTREACHED*/
942 		}
943 		/*
944 		 * recalculate esize from:
945 		 * o_esize - header tape records
946 		 * (f_esize + (num_mult -1)) >> esize_shift - new non-header
947 		 *	tape records for files/maps
948 		 * etapes - TS_TAPE records
949 		 * ntrec - TS_END records
950 		 *
951 		 * ntrec is adjusted so a tape record is still 'b' flag
952 		 * number of DEV_BSIZE (512) in size
953 		 */
954 		new_mult = (tp_bsize << esize_shift)/tp_bsize;
955 		tp_bsize = (tp_bsize << esize_shift);
956 		esize = o_esize + ((f_esize +
957 		    (new_mult - 1)) >> esize_shift) + etapes + ntrec;
958 		ntrec = (saved_ntrec/(tp_bsize/DEV_BSIZE));
959 	}
960 	if (forceflag != 0) {
961 		msgp = gettext(
962 		    "Forcing larger tape block size (%d).\n");
963 		msg(msgp, tp_bsize);
964 	}
965 	alloctape();			/* allocate tape buffers */
966 
967 	assert((tp_bsize / DEV_BSIZE != 0) && (tp_bsize % DEV_BSIZE == 0));
968 	/*
969 	 * If all we wanted was the size estimate,
970 	 * just print it out and exit.
971 	 */
972 	if (printsize) {
973 		(void) printf("%llu\n", esize * tp_bsize);
974 		Exit(0);
975 	}
976 
977 	if (tsize != 0) {
978 		if (diskette)
979 			msgp = gettext(
980 			    "Estimated %lld blocks (%s) on %3.2f diskettes.\n");
981 		else
982 			msgp = gettext(
983 			    "Estimated %lld blocks (%s) on %3.2f tapes.\n");
984 
985 		msg(msgp,
986 		    (esize*(tp_bsize/DEV_BSIZE)), mb(esize), fetapes);
987 	} else {
988 		msgp = gettext("Estimated %lld blocks (%s).\n");
989 		msg(msgp, (esize*(tp_bsize/DEV_BSIZE)), mb(esize));
990 	}
991 
992 	dumpstate = DS_CLRI;
993 
994 	otape(1);			/* bitmap is the first to tape write */
995 	*telapsed = 0;
996 	(void) time(tstart_writing);
997 
998 	/* filmap indicates all non-directory inodes */
999 	{
1000 		uchar_t *np, *fp, *dp;
1001 		np = nodmap;
1002 		dp = dirmap;
1003 		fp = filmap;
1004 		for (i = 0; i < msiz; i++)
1005 			*fp++ = *np++ ^ *dp++;
1006 	}
1007 
1008 	while (dumpstate != DS_DONE) {
1009 		/*
1010 		 * When we receive EOT notification from
1011 		 * the writer, the signal handler calls
1012 		 * rollforward and then jumps here.
1013 		 */
1014 		(void) setjmp(checkpoint_buf);
1015 		switch (dumpstate) {
1016 		case DS_INIT:
1017 			/*
1018 			 * We get here if a tape error occurred
1019 			 * after releasing the name lock but before
1020 			 * the volume containing the last of the
1021 			 * dir info was completed.  We have to start
1022 			 * all over in this case.
1023 			 */
1024 			{
1025 				char *rmsg = gettext(
1026 		"Warning - output error occurred after releasing name lock\n\
1027 \tThe dump will restart\n");
1028 				msg(rmsg);
1029 				goto restart;
1030 			}
1031 			/* NOTREACHED */
1032 		case DS_START:
1033 		case DS_CLRI:
1034 			ino = UFSROOTINO;
1035 			dumptoarchive = 1;
1036 			bitmap(clrmap, TS_CLRI);
1037 			nextstate(DS_BITS);
1038 			/* FALLTHROUGH */
1039 		case DS_BITS:
1040 			ino = UFSROOTINO;
1041 			dumptoarchive = 1;
1042 			if (BIT(UFSROOTINO, nodmap))	/* empty dump check */
1043 				bitmap(nodmap, TS_BITS);
1044 			nextstate(DS_DIRS);
1045 			if (!doingverify) {
1046 				msgp = gettext(
1047 					"Dumping (Pass III) [directories]\n");
1048 				msg(msgp);
1049 			}
1050 			/* FALLTHROUGH */
1051 		case DS_DIRS:
1052 			dumptoarchive = 1;
1053 			pass(dirdump, dirmap);
1054 			nextstate(DS_FILES);
1055 			if (!doingverify) {
1056 				msgp = gettext(
1057 					"Dumping (Pass IV) [regular files]\n");
1058 				msg(msgp);
1059 			}
1060 			/* FALLTHROUGH */
1061 		case DS_FILES:
1062 			dumptoarchive = 0;
1063 
1064 			pass(lf_dump, filmap);
1065 
1066 			flushcmds();
1067 			dumpstate = DS_END;	/* don't reset ino */
1068 			/* FALLTHROUGH */
1069 		case DS_END:
1070 			dumptoarchive = 1;
1071 			spcl.c_type = TS_END;
1072 			for (i = 0; i < ntrec; i++) {
1073 				spclrec();
1074 			}
1075 			flusht();
1076 			break;
1077 		case DS_DONE:
1078 			break;
1079 		default:
1080 			msg(gettext("Internal state error\n"));
1081 			dumpabort();
1082 			/*NOTREACHED*/
1083 		}
1084 	}
1085 
1086 	if ((! doingactive) && (! active))
1087 		trewind();
1088 	if (verify && !doingverify) {
1089 		msgp = gettext("Finished writing last dump volume\n");
1090 		msg(msgp);
1091 		Exit(X_VERIFY);
1092 	}
1093 	if (spcl.c_volume > 1)
1094 		(void) snprintf(msgbuf, sizeof (msgbuf),
1095 		    gettext("%lld blocks (%s) on %ld volumes"),
1096 		    ((uint64_t)spcl.c_tapea*(tp_bsize/DEV_BSIZE)),
1097 		    mb((u_offset_t)(unsigned)(spcl.c_tapea)),
1098 		    spcl.c_volume);
1099 	else
1100 		(void) snprintf(msgbuf, sizeof (msgbuf),
1101 		    gettext("%lld blocks (%s) on 1 volume"),
1102 		    ((uint64_t)spcl.c_tapea*(tp_bsize/DEV_BSIZE)),
1103 		    mb((u_offset_t)(unsigned)(spcl.c_tapea)));
1104 	if (timeclock((time_t)0) != (time_t)0) {
1105 		(void) snprintf(kbsbuf, sizeof (kbsbuf),
1106 		    gettext(" at %ld KB/sec"),
1107 		    (long)(((float)spcl.c_tapea / (float)timeclock((time_t)0))
1108 			* 1000.0));
1109 		(void) strcat(msgbuf, kbsbuf);
1110 	}
1111 	(void) strcat(msgbuf, "\n");
1112 	msg(msgbuf);
1113 	(void) timeclock((time_t)-1);
1114 
1115 	if (archive)
1116 		msg(gettext("Archiving dump to `%s'\n"), archivefile);
1117 	if (active && !verify) {
1118 		nextstate(DS_INIT);
1119 		activepass();
1120 		goto restart;
1121 	}
1122 	msgp = gettext("DUMP IS DONE\n");
1123 	msg(msgp);
1124 	broadcast(msgp);
1125 	if (! doingactive)
1126 		putitime();
1127 	Exit(X_FINOK);
1128 
1129 	/*NOTREACHED*/
1130 	return (0);
1131 }
1132 
1133 void
1134 sigAbort(int sig)
1135 {
1136 	char	*sigtype;
1137 
1138 	switch (sig) {
1139 	case SIGHUP:
1140 		sigtype = "SIGHUP";
1141 		break;
1142 	case SIGTRAP:
1143 		sigtype = "SIGTRAP";
1144 		break;
1145 	case SIGFPE:
1146 		sigtype = "SIGFPE";
1147 		break;
1148 	case SIGBUS:
1149 		msg(gettext("%s  ABORTING!\n"), "SIGBUS()");
1150 		(void) signal(SIGUSR2, SIG_DFL);
1151 		abort();
1152 		/*NOTREACHED*/
1153 	case SIGSEGV:
1154 		msg(gettext("%s  ABORTING!\n"), "SIGSEGV()");
1155 		(void) signal(SIGUSR2, SIG_DFL);
1156 		abort();
1157 		/*NOTREACHED*/
1158 	case SIGALRM:
1159 		sigtype = "SIGALRM";
1160 		break;
1161 	case SIGTERM:
1162 		sigtype = "SIGTERM";
1163 		break;
1164 	case SIGPIPE:
1165 		msg(gettext("Broken pipe\n"));
1166 		dumpabort();
1167 		/*NOTREACHED*/
1168 	default:
1169 		sigtype = "SIGNAL";
1170 		break;
1171 	}
1172 	msg(gettext("%s()  try rewriting\n"), sigtype);
1173 	if (pipeout) {
1174 		msg(gettext("Unknown signal, Cannot recover\n"));
1175 		dumpabort();
1176 		/*NOTREACHED*/
1177 	}
1178 	msg(gettext("Rewriting attempted as response to unknown signal.\n"));
1179 	(void) fflush(stderr);
1180 	(void) fflush(stdout);
1181 	close_rewind();
1182 	Exit(X_REWRITE);
1183 }
1184 
1185 /* Note that returned value is malloc'd if != cp && != NULL */
1186 char *
1187 rawname(char *cp)
1188 {
1189 	struct stat64 st;
1190 	char *dp;
1191 	extern char *getfullrawname();
1192 
1193 	if (stat64(cp, &st) < 0 || (st.st_mode & S_IFMT) != S_IFBLK)
1194 		return (cp);
1195 
1196 	dp = getfullrawname(cp);
1197 	if (dp == 0)
1198 		return (0);
1199 	if (*dp == '\0') {
1200 		free(dp);
1201 		return (0);
1202 	}
1203 
1204 	if (stat64(dp, &st) < 0 || (st.st_mode & S_IFMT) != S_IFCHR) {
1205 		free(dp);
1206 		return (cp);
1207 	}
1208 
1209 	return (dp);
1210 }
1211 
1212 static char *
1213 mb(u_offset_t blks)
1214 {
1215 	static char buf[16];
1216 
1217 	if (blks < 1024)
1218 		(void) snprintf(buf, sizeof (buf), "%lldKB", blks);
1219 	else
1220 		(void) snprintf(buf, sizeof (buf), "%.2fMB",
1221 		    ((double)(blks*tp_bsize)) / (double)(1024*1024));
1222 	return (buf);
1223 }
1224 
1225 #ifdef signal
1226 void (*nsignal(int sig, void (*act)(int)))(int)
1227 {
1228 	struct sigaction sa, osa;
1229 
1230 	sa.sa_handler = act;
1231 	(void) sigemptyset(&sa.sa_mask);
1232 	sa.sa_flags = SA_RESTART;
1233 	if (sigaction(sig, &sa, &osa) < 0)
1234 		return ((void (*)(int))-1);
1235 	return (osa.sa_handler);
1236 }
1237 #endif
1238 
1239 static void
1240 nextstate(int state)
1241 {
1242 	/* LINTED assigned value never used - kept for documentary purposes */
1243 	dumpstate = state;
1244 	/* LINTED assigned value never used - kept for documentary purposes */
1245 	ino = 0;
1246 	/* LINTED assigned value never used - kept for documentary purposes */
1247 	pos = 0;
1248 	leftover = 0;
1249 }
1250 
1251 /*
1252  * timeclock() function, for keeping track of how much time we've spent
1253  * writing to the tape device.  it always returns the amount of time
1254  * already spent, in milliseconds.  if you pass it a positive, then that's
1255  * telling it that we're writing, so the time counts.  if you pass it a
1256  * zero, then that's telling it we're not writing; perhaps we're waiting
1257  * for user input.
1258  *
1259  * a state of -1 resets everything.
1260  */
1261 time32_t
1262 timeclock(time32_t state)
1263 {
1264 	static int *currentState = NULL;
1265 	static struct timeval *clockstart;
1266 	static time32_t *emilli;
1267 
1268 	struct timeval current[1];
1269 	int fd, saverr;
1270 
1271 #ifdef DEBUG
1272 	fprintf(stderr, "pid=%d timeclock ", getpid());
1273 	if (state == (time32_t)-1)
1274 		fprintf(stderr, "cleared\n");
1275 	else if (state > 0)
1276 		fprintf(stderr, "ticking\n");
1277 	else
1278 		fprintf(stderr, "paused\n");
1279 #endif /* DEBUG */
1280 
1281 	/* if we haven't setup the shared memory, init */
1282 	if (currentState == (int *)NULL) {
1283 		if ((fd = open("/dev/zero", O_RDWR)) < 0) {
1284 			saverr = errno;
1285 			msg(gettext("Cannot open `%s': %s\n"),
1286 				"/dev/zero", strerror(saverr));
1287 			dumpabort();
1288 			/*NOTREACHED*/
1289 		}
1290 		/*LINTED [mmap always returns an aligned value]*/
1291 		currentState = (int *)mmap((char *)0, getpagesize(),
1292 			PROT_READ|PROT_WRITE, MAP_SHARED, fd, (off_t)0);
1293 		if (currentState == (int *)-1) {
1294 			saverr = errno;
1295 			msg(gettext(
1296 				"Cannot memory map monitor variables: %s\n"),
1297 				strerror(saverr));
1298 			dumpabort();
1299 			/*NOTREACHED*/
1300 		}
1301 		(void) close(fd);
1302 
1303 		/* LINTED currentState is sufficiently aligned */
1304 		clockstart = (struct timeval *)(currentState + 1);
1305 		emilli = (time32_t *)(clockstart + 1);
1306 		/* Note everything is initialized to zero via /dev/zero */
1307 	}
1308 
1309 	if (state == (time32_t)-1) {
1310 		bzero(clockstart, sizeof (*clockstart));
1311 		*currentState = 0;
1312 		*emilli = (time32_t)0;
1313 		return (0);
1314 	}
1315 
1316 	(void) gettimeofday(current, NULL);
1317 
1318 	if (*currentState != 0) {
1319 		current->tv_usec += 1000000;
1320 		current->tv_sec--;
1321 
1322 		/* LINTED: result will fit in a time32_t */
1323 		*emilli += (current->tv_sec - clockstart->tv_sec) * 1000;
1324 		/* LINTED: result will fit in a time32_t */
1325 		*emilli += (current->tv_usec - clockstart->tv_usec) / 1000;
1326 	}
1327 
1328 	if (state != 0)
1329 		bcopy(current, clockstart, sizeof (current));
1330 
1331 	*currentState = state;
1332 
1333 	return (*emilli);
1334 }
1335 
1336 static int
1337 statcmp(const struct stat64 *left, const struct stat64 *right)
1338 {
1339 	int result = 1;
1340 
1341 	if ((left->st_dev == right->st_dev) &&
1342 	    (left->st_ino == right->st_ino) &&
1343 	    (left->st_mode == right->st_mode) &&
1344 	    (left->st_nlink == right->st_nlink) &&
1345 	    (left->st_uid == right->st_uid) &&
1346 	    (left->st_gid == right->st_gid) &&
1347 	    (left->st_rdev == right->st_rdev) &&
1348 	    (left->st_ctim.tv_sec == right->st_ctim.tv_sec) &&
1349 	    (left->st_ctim.tv_nsec == right->st_ctim.tv_nsec) &&
1350 	    (left->st_mtim.tv_sec == right->st_mtim.tv_sec) &&
1351 	    (left->st_mtim.tv_nsec == right->st_mtim.tv_nsec) &&
1352 	    (left->st_blksize == right->st_blksize) &&
1353 	    (left->st_blocks == right->st_blocks)) {
1354 		result = 0;
1355 	}
1356 
1357 	return (result);
1358 }
1359 
1360 /*
1361  * Safely open a file or device.
1362  */
1363 static int
1364 safe_open_common(const char *filename, int mode, int perms, int device)
1365 {
1366 	int fd;
1367 	int working_mode;
1368 	int saverr;
1369 	char *errtext;
1370 	struct stat64 pre_stat, pre_lstat;
1371 	struct stat64 post_stat, post_lstat;
1372 
1373 	/*
1374 	 * Don't want to be spoofed into trashing something we
1375 	 * shouldn't, thus the following rigamarole.  If it doesn't
1376 	 * exist, we create it and proceed.  Otherwise, require that
1377 	 * what's there be a real file with no extraneous links and
1378 	 * owned by whoever ran us.
1379 	 *
1380 	 * The silliness with using both lstat() and fstat() is to avoid
1381 	 * race-condition games with someone replacing the file with a
1382 	 * symlink after we've opened it.  If there was an flstat(),
1383 	 * we wouldn't need the fstat().
1384 	 *
1385 	 * The initial open with the hard-coded flags is ok even if we
1386 	 * are intending to open only for reading.  If it succeeds,
1387 	 * then the file did not exist, and we'll synthesize an appropriate
1388 	 * complaint below.  Otherwise, it does exist, so we won't be
1389 	 * truncating it with the open.
1390 	 */
1391 	if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_EXCL|O_LARGEFILE,
1392 	    perms)) < 0) {
1393 		if (errno == EEXIST) {
1394 			if (lstat64(filename, &pre_lstat) < 0) {
1395 				return (-1);
1396 			}
1397 
1398 			if (stat64(filename, &pre_stat) < 0) {
1399 				return (-1);
1400 			}
1401 
1402 			working_mode = mode & (O_WRONLY|O_RDWR|O_RDONLY);
1403 			working_mode |= O_LARGEFILE;
1404 			if ((fd = open(filename, working_mode)) < 0) {
1405 				if (errno == ENOENT) {
1406 					errtext = gettext(
1407 "Unexpected condition detected: %s used to exist, but doesn't any longer\n");
1408 					msg(errtext, filename);
1409 					syslog(LOG_WARNING, errtext, filename);
1410 					errno = ENOENT;
1411 				}
1412 				return (-1);
1413 			}
1414 
1415 			if (lstat64(filename, &post_lstat) < 0) {
1416 				saverr = errno;
1417 				(void) close(fd);
1418 				errno = saverr;
1419 				return (-1);
1420 			}
1421 
1422 			if (fstat64(fd, &post_stat) < 0) {
1423 				saverr = errno;
1424 				(void) close(fd);
1425 				errno = saverr;
1426 				return (-1);
1427 			}
1428 
1429 			/*
1430 			 * Can't just use memcmp(3C), because the access
1431 			 * time is updated by open(2).
1432 			 */
1433 			if (statcmp(&pre_lstat, &post_lstat) != 0) {
1434 				errtext = gettext(
1435 	    "Unexpected change detected: %s's lstat(2) information changed\n");
1436 				msg(errtext, filename);
1437 				syslog(LOG_WARNING, errtext, filename);
1438 				errno = EPERM;
1439 				return (-1);
1440 			}
1441 
1442 			if (statcmp(&pre_stat, &post_stat) != 0) {
1443 				errtext = gettext(
1444 	    "Unexpected change detected: %s's stat(2) information changed\n"),
1445 				msg(errtext, filename);
1446 				syslog(LOG_WARNING, errtext, filename);
1447 				errno = EPERM;
1448 				return (-1);
1449 			}
1450 
1451 			/*
1452 			 * If inode, device, or type are wrong, bail out.
1453 			 * Note using post_stat instead of post_lstat for the
1454 			 * S_ISCHR() test.  This is to allow the /dev ->
1455 			 * /devices bit to work, as long as the final target
1456 			 * is a character device (i.e., raw disk or tape).
1457 			 */
1458 			if (device && !(S_ISCHR(post_stat.st_mode)) &&
1459 			    !(S_ISFIFO(post_stat.st_mode)) &&
1460 			    !(S_ISREG(post_lstat.st_mode))) {
1461 				errtext = gettext(
1462 	    "Unexpected condition detected: %s is not a supported device\n"),
1463 				msg(errtext, filename);
1464 				syslog(LOG_WARNING, errtext, filename);
1465 				(void) close(fd);
1466 				errno = EPERM;
1467 				return (-1);
1468 			} else if (!device &&
1469 			    (!S_ISREG(post_lstat.st_mode) ||
1470 			    (post_stat.st_ino != post_lstat.st_ino) ||
1471 			    (post_stat.st_dev != post_lstat.st_dev))) {
1472 				errtext = gettext(
1473 	    "Unexpected condition detected: %s is not a regular file\n"),
1474 				msg(errtext, filename);
1475 				syslog(LOG_WARNING, errtext, filename);
1476 				(void) close(fd);
1477 				errno = EPERM;
1478 				return (-1);
1479 			}
1480 
1481 			/*
1482 			 * Bad link count implies someone's linked our
1483 			 * target to something else, which we probably
1484 			 * shouldn't step on.
1485 			 */
1486 			if (post_lstat.st_nlink != 1) {
1487 				errtext = gettext(
1488 	    "Unexpected condition detected: %s must have exactly one link\n"),
1489 				msg(errtext, filename);
1490 				syslog(LOG_WARNING, errtext, filename);
1491 				(void) close(fd);
1492 				errno = EPERM;
1493 				return (-1);
1494 			}
1495 			/*
1496 			 * Root might make a file, but non-root might
1497 			 * need to open it.  If the permissions let us
1498 			 * get this far, then let it through.
1499 			 */
1500 			if (post_lstat.st_uid != getuid() &&
1501 			    post_lstat.st_uid != 0) {
1502 				errtext = gettext(
1503 "Unsupported condition detected: %s must be owned by uid %ld or 0\n"),
1504 				msg(errtext, filename, (long)getuid());
1505 				syslog(LOG_WARNING, errtext, filename,
1506 				    (long)getuid());
1507 				(void) close(fd);
1508 				errno = EPERM;
1509 				return (-1);
1510 			}
1511 			if (mode & O_TRUNC) {
1512 				if (ftruncate(fd, (off_t)0) < 0) {
1513 					msg("ftruncate(%s): %s\n",
1514 					    filename, strerror(errno));
1515 					(void) close(fd);
1516 					return (-1);
1517 				}
1518 			}
1519 		} else {
1520 			/*
1521 			 * Didn't exist, but couldn't open it.
1522 			 */
1523 			return (-1);
1524 		}
1525 	} else {
1526 		/*
1527 		 * If truncating open succeeded for a read-only open,
1528 		 * bail out, as we really shouldn't have succeeded.
1529 		 */
1530 		if (mode & O_RDONLY) {
1531 			/* Undo the O_CREAT */
1532 			(void) unlink(filename);
1533 			msg("open(%s): %s\n",
1534 			    filename, strerror(ENOENT));
1535 			(void) close(fd);
1536 			errno = ENOENT;
1537 			return (-1);
1538 		}
1539 	}
1540 
1541 	return (fd);
1542 }
1543 
1544 /*
1545  * Safely open a file.
1546  */
1547 int
1548 safe_file_open(const char *filename, int mode, int perms)
1549 {
1550 	return (safe_open_common(filename, mode, perms, 0));
1551 }
1552 
1553 /*
1554  * Safely open a device.
1555  */
1556 int
1557 safe_device_open(const char *filename, int mode, int perms)
1558 {
1559 	return (safe_open_common(filename, mode, perms, 1));
1560 }
1561 
1562 /*
1563  * STDIO version of safe_open
1564  */
1565 FILE *
1566 safe_fopen(const char *filename, const char *smode, int perms)
1567 {
1568 	int fd;
1569 	int bmode;
1570 
1571 	/*
1572 	 * accepts only modes  "r", "r+", and "w"
1573 	 */
1574 	if (smode[0] == 'r') {
1575 		if (smode[1] == '\0') {
1576 			bmode = O_RDONLY;
1577 		} else if ((smode[1] == '+') && (smode[2] == '\0')) {
1578 			bmode = O_RDWR;
1579 		}
1580 	} else if ((smode[0] == 'w') && (smode[1] == '\0')) {
1581 		bmode = O_WRONLY;
1582 	} else {
1583 		msg(gettext("internal error: safe_fopen: invalid mode `%s'\n"),
1584 		    smode);
1585 		return (NULL);
1586 	}
1587 
1588 	fd = safe_file_open(filename, bmode, perms);
1589 
1590 	/*
1591 	 * caller is expected to report error.
1592 	 */
1593 	if (fd >= 0)
1594 	    return (fdopen(fd, smode));
1595 
1596 	return ((FILE *)NULL);
1597 }
1598 
1599 void
1600 child_chdir(void)
1601 {
1602 	char name[MAXPATHLEN];
1603 
1604 	if (debug_chdir != NULL) {
1605 		snprintf(name, sizeof (name), "%s/%ld",
1606 		    debug_chdir, (long)getpid());
1607 		if (mkdir(name, 0755) < 0)
1608 			msg("mkdir(%s): %s", name, strerror(errno));
1609 		if (chdir(name) < 0)
1610 			msg("chdir(%s): %s", name, strerror(errno));
1611 	}
1612 }
1613