xref: /titanic_51/usr/src/cmd/backup/dump/dumpmain.c (revision ebd1706e95186ddae1d4c0d63c47544cf33832ee)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved.  The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include "dump.h"
18 #include <rmt.h>
19 #include <sys/mtio.h>
20 #include <limits.h>
21 #include <priv_utils.h>
22 #include "roll_log.h"
23 
24 int	notify = 0;		/* notify operator flag */
25 int	blockswritten = 0;	/* number of blocks written on current tape */
26 uint_t	tapeno = 0;		/* current tape number */
27 daddr32_t filenum = 0;		/* current file number on tape */
28 int	density = 0;		/* density in bytes/0.1" */
29 int	tenthsperirg;		/* inter-record-gap in 0.1"'s */
30 uint_t	ntrec = 0;		/* # tape blocks in each tape record */
31 uint_t	saved_ntrec = 0;	/* saved value of ntrec */
32 uint_t	forceflag = 0;		/* forced to change tp_bsize */
33 int	cartridge = 0;		/* assume non-cartridge tape */
34 uint_t	tracks;			/* # tracks on a cartridge tape */
35 int	diskette = 0;		/* assume not dumping to a diskette */
36 int	printsize = 0;		/* just print estimated size and exit */
37 int	mapfd = -1;		/* if >= 0, file descriptor for mmap */
38 int32_t	tp_bsize = TP_BSIZE_MIN; /* tape block record size (frag size) */
39 #ifdef DEBUG
40 int	xflag;			/* debugging switch */
41 #endif
42 
43 char	*myname;
44 
45 /*
46  * This should be struct fs, but there are trailing bits on disk
47  * that we also need to read in as part of it.  It's an array of
48  * longs instead of char to force proper alignment.
49  */
50 static long sblock_buf[SBSIZE/sizeof (long)];
51 
52 #ifdef __STDC__
53 static char *mb(u_offset_t);
54 static void nextstate(int);
55 #else
56 static char *mb();
57 static void nextstate();
58 #endif
59 
60 extern	jmp_buf checkpoint_buf;	/* context for return from checkpoint */
61 #define	FUDGE_FACTOR	0x2000000
62 
63 main(argc, argv)
64 	int	argc;
65 	char	*argv[];
66 {
67 	char		*arg;
68 	int		bflag = 0, i, error = 0, saverr;
69 	double		fetapes = 0.0;
70 	struct	mnttab	*dt;
71 	char		msgbuf[3000], *msgp;
72 	char		kbsbuf[BUFSIZ];
73 	u_offset_t	esize_shift = 0;
74 	int32_t	new_mult = 0;
75 	time32_t	snapdate;
76 
77 	host = NULL;
78 
79 	if (myname = strrchr(argv[0], '/'))
80 		myname++;
81 	else
82 		myname = argv[0];
83 
84 	if (strcmp("hsmdump", myname) == 0) {
85 		msg(gettext("hsmdump emulation is no longer supported.\n"));
86 		Exit(X_ABORT);
87 	}
88 
89 	tape = DEFTAPE;
90 	autoload_period = 12;
91 	autoload_tries = 12;	/* traditional default of ~2.5 minutes */
92 
93 	(void) setlocale(LC_ALL, "");
94 #if !defined(TEXT_DOMAIN)
95 #define	TEXT_DOMAIN "SYS_TEST"
96 #endif  /* TEXT_DOMAIN */
97 	(void) textdomain(TEXT_DOMAIN);
98 
99 	/*
100 	 * If someone strips the set-uid bit, dump will still work for local
101 	 * tapes.  Fail when we try to access a remote tape.
102 	 */
103 	(void) __init_suid_priv(0, PRIV_NET_PRIVADDR, (char *)NULL);
104 
105 	if (sysinfo(SI_HOSTNAME, spcl.c_host, sizeof (spcl.c_host)) < 0) {
106 		saverr = errno;
107 		msg(gettext("Could not get host name: %s\n"),
108 		    strerror(saverr));
109 		bzero(spcl.c_host, sizeof (spcl.c_host));
110 	}
111 
112 	dumppid = getpid();
113 	tsize = 0;	/* no default size, detect EOT dynamically */
114 
115 	disk = NULL;
116 	dname = NULL;
117 	disk_dynamic = 0;
118 	increm = NINCREM;
119 	incno = '9';
120 	uflag = 0;
121 	arg = "u";
122 	tlabel = "none";
123 	if (argc > 1) {
124 		argv++;
125 		argc--;
126 		arg = *argv;
127 		if (*arg == '-')
128 			arg++;
129 	}
130 	while (*arg)
131 	switch (*arg++) {		/* BE CAUTIOUS OF FALLTHROUGHS */
132 	case 'M':
133 		/*
134 		 * This undocumented option causes each process to
135 		 * mkdir debug_chdir/getpid(), and chdir to it.  This is
136 		 * to ease the collection of profiling information and
137 		 * core dumps.
138 		 */
139 		if (argc > 1) {
140 			argv++;
141 			argc--;
142 			debug_chdir = *argv;
143 			msg(gettext(
144 			    "Each process shall try to chdir to %s/<pid>\n"),
145 			    debug_chdir);
146 			child_chdir();
147 		} else {
148 			msg(gettext("Missing move-to-dir (M) name\n"));
149 			dumpabort();
150 			/*NOTREACHED*/
151 		}
152 		break;
153 
154 	case 'w':
155 		lastdump('w');		/* tell us only what has to be done */
156 		exit(0);
157 		break;
158 
159 	case 'W':			/* what to do */
160 		lastdump('W');		/* tell state of what has been done */
161 		exit(0);		/* do nothing else */
162 		break;
163 
164 	case 'T':
165 		if (argc > 1) {
166 			int count;
167 			int multiplier;
168 			char units;
169 
170 			argv++;
171 			argc--;
172 			count = atoi(*argv);
173 			if (count < 1) {
174 				msg(gettext(
175 				    "Unreasonable autoload timeout period\n"));
176 				dumpabort();
177 				/*NOTREACHED*/
178 			}
179 			units = *(*argv + strlen(*argv) - 1);
180 			switch (units) {
181 			case 's':
182 				multiplier = 1;
183 				break;
184 			case 'h':
185 				multiplier = 3600;
186 				break;
187 			case '0': case '1': case '2': case '3': case '4':
188 			case '5': case '6': case '7': case '8': case '9':
189 			case 'm':
190 				multiplier = 60;
191 				break;
192 			default:
193 				msg(gettext(
194 				    "Unknown timeout units indicator `%c'\n"),
195 				    units);
196 				dumpabort();
197 				/*NOTREACHED*/
198 			}
199 			autoload_tries = 1 +
200 			    ((count * multiplier) / autoload_period);
201 		} else {
202 			msg(gettext("Missing autoload timeout period\n"));
203 			dumpabort();
204 			/*NOTREACHED*/
205 		}
206 		break;
207 
208 	case 'f':			/* output file */
209 		if (argc > 1) {
210 			argv++;
211 			argc--;
212 			tape = *argv;
213 			if (*tape == '\0') {
214 				msg(gettext("Bad output device name\n"));
215 				dumpabort();
216 				/*NOTREACHED*/
217 			}
218 		} else {
219 			msg(gettext("Missing output device name\n"));
220 			dumpabort();
221 			/*NOTREACHED*/
222 		}
223 		if (strcmp(tape, "-") == 0 && verify) {
224 			msg(gettext(
225 			"Cannot verify when dumping to standard out.\n"));
226 			dumpabort();
227 			/*NOTREACHED*/
228 		}
229 		break;
230 
231 	case 'd':			/* density, in bits per inch */
232 		if (argc > 1) {
233 			argv++;
234 			argc--;
235 			density = atoi(*argv) / 10;
236 			if (density <= 0) {
237 				msg(gettext(
238 				    "Density must be a positive integer\n"));
239 				dumpabort();
240 				/*NOTREACHED*/
241 			}
242 		} else {
243 			msg(gettext("Missing density\n"));
244 			dumpabort();
245 			/*NOTREACHED*/
246 		}
247 		break;
248 
249 	case 's':			/* tape size, feet */
250 		if (argc > 1) {
251 			argv++;
252 			argc--;
253 			tsize = atol(*argv);
254 			if ((*argv[0] == '-') || (tsize == 0)) {
255 				msg(gettext(
256 			    "Tape size must be a positive integer\n"));
257 				dumpabort();
258 				/*NOTREACHED*/
259 			}
260 		} else {
261 			msg(gettext("Missing tape size\n"));
262 			dumpabort();
263 			/*NOTREACHED*/
264 		}
265 		break;
266 
267 	case 't':			/* tracks */
268 		if (argc > 1) {
269 			argv++;
270 			argc--;
271 			tracks = atoi(*argv);
272 		} else {
273 			msg(gettext("Missing track count\n"));
274 			dumpabort();
275 			/*NOTREACHED*/
276 		}
277 		break;
278 
279 	case 'b':			/* blocks per tape write */
280 		if (argc > 1) {
281 			argv++;
282 			argc--;
283 			bflag++;
284 			/*
285 			 * We save the ntrec in case we need to change
286 			 * tp_bsize later, we will have to recalculate
287 			 * it.
288 			 */
289 			saved_ntrec = ntrec = atoi(*argv);
290 			if (ntrec == 0 || (ntrec&1) || ntrec > (MAXNTREC*2)) {
291 				msg(gettext(
292 		    "Block size must be a positive, even integer <= %d\n"),
293 				    MAXNTREC*2);
294 				dumpabort();
295 				/*NOTREACHED*/
296 			}
297 			ntrec /= (tp_bsize/DEV_BSIZE);
298 		} else {
299 			msg(gettext("Missing blocking factor\n"));
300 			dumpabort();
301 			/*NOTREACHED*/
302 		}
303 		break;
304 
305 	case 'c':			/* Tape is cart. not 9-track */
306 	case 'C':			/* 'C' to be consistent with 'D' */
307 		cartridge++;
308 		break;
309 
310 	case '0':			/* dump level */
311 	case '1':
312 	case '2':
313 	case '3':
314 	case '4':
315 	case '5':
316 	case '6':
317 	case '7':
318 	case '8':
319 	case '9':
320 		incno = arg[-1];
321 		break;
322 
323 	case 'u':			/* update /etc/dumpdates */
324 		uflag++;
325 		break;
326 
327 	case 'n':			/* notify operators */
328 		notify++;
329 		break;
330 
331 	case 'a':			/* create archive file */
332 		archive = 1;
333 		if (argc > 1) {
334 			argv++;
335 			argc--;
336 			if (**argv == '\0') {
337 				msg(gettext("Bad archive file name\n"));
338 				dumpabort();
339 				/*NOTREACHED*/
340 			}
341 			archivefile = strdup(*argv);
342 			if (archivefile == NULL) {
343 				saverr = errno;
344 				msg(gettext("Cannot allocate memory: %s\n"),
345 				    strerror(saverr));
346 				dumpabort();
347 				/*NOTREACHED*/
348 			}
349 		} else {
350 			msg(gettext("Missing archive file name\n"));
351 			dumpabort();
352 			/*NOTREACHED*/
353 		}
354 		break;
355 
356 	case 'v':
357 		verify++;
358 		doingverify++;
359 		if (strcmp(tape, "-") == 0) {
360 			msg(gettext(
361 			"Cannot verify when dumping to standard out.\n"));
362 			dumpabort();
363 			/*NOTREACHED*/
364 		}
365 		break;
366 
367 	case 'D':
368 		diskette++;
369 		break;
370 
371 	case 'N':
372 		if (argc > 1) {
373 			argv++;
374 			argc--;
375 			if (**argv == '\0') {
376 				msg(gettext("Missing name for dumpdates "
377 				    "entry.\n"));
378 				dumpabort();
379 				/*NOTREACHED*/
380 			}
381 			dname = *argv;
382 			if (strlen(dname) > MAXNAMLEN + 2) {
383 				msg(gettext("Dumpdates entry name too "
384 				    "long.\n"));
385 				dumpabort();
386 				/*NOTREACHED*/
387 			}
388 			for (i = 0; i < strlen(dname); i++) {
389 				if (isspace(*(dname+i))) {
390 					msg(gettext("Dumpdates entry name may "
391 					    "not contain white space.\n"));
392 					dumpabort();
393 					/*NOTREACHED*/
394 				}
395 			}
396 		} else {
397 			msg(gettext("Missing name for dumpdates entry.\n"));
398 			dumpabort();
399 			/*NOTREACHED*/
400 		}
401 		break;
402 	case 'L':
403 		if (argc > 1) {
404 			argv++;
405 			argc--;
406 			if (**argv == '\0') {
407 				msg(gettext("Missing tape label name\n"));
408 				dumpabort();
409 				/*NOTREACHED*/
410 			}
411 			tlabel = *argv;
412 			if (strlen(tlabel) > (sizeof (spcl.c_label) - 1)) {
413 				tlabel[sizeof (spcl.c_label) - 1] = '\0';
414 				msg(gettext(
415 		    "Truncating label to maximum supported length: `%s'\n"),
416 				    tlabel);
417 			}
418 		} else {
419 			msg(gettext("Missing tape label name\n"));
420 			dumpabort();
421 			/*NOTREACHED*/
422 		}
423 		break;
424 
425 	case 'l':
426 		autoload++;
427 		break;
428 
429 	case 'o':
430 		offline++;
431 		break;
432 
433 	case 'S':
434 		printsize++;
435 		break;
436 
437 #ifdef DEBUG
438 	case 'z':
439 		xflag++;
440 		break;
441 #endif
442 
443 	default:
444 		msg(gettext("Bad option `%c'\n"), arg[-1]);
445 		dumpabort();
446 		/*NOTREACHED*/
447 	}
448 	if (argc > 1) {
449 		argv++;
450 		argc--;
451 		if (**argv == '\0') {
452 			msg(gettext("Bad disk name\n"));
453 			dumpabort();
454 			/*NOTREACHED*/
455 		}
456 		disk = *argv;
457 		disk_dynamic = 0;
458 	}
459 	if (disk == NULL) {
460 		(void) fprintf(stderr, gettext(
461 	"Usage: %s [0123456789fustdWwnNDCcbavloS [argument]] filesystem\n"),
462 		    myname);
463 		Exit(X_ABORT);
464 	}
465 	if (!filenum)
466 		filenum = 1;
467 
468 	if (signal(SIGINT, interrupt) == SIG_IGN)
469 		(void) signal(SIGINT, SIG_IGN);
470 
471 	if (strcmp(tape, "-") == 0) {
472 		pipeout++;
473 		tape = gettext("standard output");
474 		dumpdev = sdumpdev = strdup(tape);
475 		if (dumpdev == NULL) {
476 			saverr = errno;
477 			msg(gettext("Cannot allocate memory: %s\n"),
478 			    strerror(saverr));
479 			dumpabort();
480 			/*NOTREACHED*/
481 		}
482 		/*CONSTANTCONDITION*/
483 		assert(sizeof (spcl.c_label) > 5);
484 		(void) strcpy(spcl.c_label, "none");
485 	} else if (*tape == '+') {
486 		nextdevice();
487 		(void) strcpy(spcl.c_label, tlabel);
488 	} else {
489 		/* if not already set, set diskette to default */
490 		if (diskette && strcmp(tape, DEFTAPE) == 0)
491 			tape = DISKETTE;
492 		nextdevice();
493 		(void) strcpy(spcl.c_label, tlabel);
494 	}
495 	if (cartridge && diskette) {
496 		error = 1;
497 		msg(gettext("Cannot select both cartridge and diskette\n"));
498 	}
499 	if (density && diskette) {
500 		error = 1;
501 		msg(gettext("Cannot select density of diskette\n"));
502 	}
503 	if (tracks && diskette) {
504 		error = 1;
505 		msg(gettext("Cannot select number of tracks of diskette\n"));
506 	}
507 	if (error) {
508 		dumpabort();
509 		/*NOTREACHED*/
510 	}
511 
512 	/*
513 	 * Determine how to default tape size and density
514 	 *
515 	 *		density				tape size
516 	 * 9-track	1600 bpi (160 bytes/.1")	2300 ft.
517 	 * 9-track	6250 bpi (625 bytes/.1")	2300 ft.
518 	 *
519 	 * Most Sun-2's came with 4 track (20MB) cartridge tape drives,
520 	 * while most other machines (Sun-3's and non-Sun's) come with
521 	 * 9 track (45MB) cartridge tape drives.  Some Sun-2's came with
522 	 * 9 track drives, but there is no way for the software to detect
523 	 * which drive type is installed.  Sigh...  We make the gross
524 	 * assumption that #ifdef mc68010 will test for a Sun-2.
525 	 *
526 	 * cartridge	8000 bpi (100 bytes/.1")	425 * tracks ft.
527 	 */
528 	if (density == 0)
529 		density = cartridge ? 100 : 625;
530 	if (tracks == 0)
531 		tracks = 9;
532 	if (!bflag) {
533 		if (cartridge)
534 			ntrec = CARTRIDGETREC;
535 		else if (diskette)
536 			ntrec = NTREC;
537 		else if (density >= 625)
538 			ntrec = HIGHDENSITYTREC;
539 		else
540 			ntrec = NTREC;
541 		/*
542 		 * save ntrec in case we have to change tp_bsize later.
543 		 */
544 		saved_ntrec = (ntrec * (tp_bsize/DEV_BSIZE));
545 	}
546 	if (!diskette) {
547 		tsize *= 12L*10L;
548 		if (cartridge)
549 			tsize *= tracks;
550 	}
551 	rmtinit(msg, Exit);
552 	if (host) {
553 		char	*cp = strchr(host, '@');
554 		if (cp == (char *)0)
555 			cp = host;
556 		else
557 			cp++;
558 
559 		if (rmthost(host, ntrec) == 0) {
560 			msg(gettext("Cannot connect to tape host `%s'\n"), cp);
561 			dumpabort();
562 			/*NOTREACHED*/
563 		}
564 	}
565 	if (signal(SIGHUP, sigAbort) == SIG_IGN)
566 		(void) signal(SIGHUP, SIG_IGN);
567 	if (signal(SIGTRAP, sigAbort) == SIG_IGN)
568 		(void) signal(SIGTRAP, SIG_IGN);
569 	if (signal(SIGFPE, sigAbort) == SIG_IGN)
570 		(void) signal(SIGFPE, SIG_IGN);
571 	if (signal(SIGBUS, sigAbort) == SIG_IGN)
572 		(void) signal(SIGBUS, SIG_IGN);
573 	if (signal(SIGSEGV, sigAbort) == SIG_IGN)
574 		(void) signal(SIGSEGV, SIG_IGN);
575 	if (signal(SIGTERM, sigAbort) == SIG_IGN)
576 		(void) signal(SIGTERM, SIG_IGN);
577 	if (signal(SIGUSR1, sigAbort) == SIG_IGN)
578 		(void) signal(SIGUSR1, SIG_IGN);
579 	if (signal(SIGPIPE, sigAbort) == SIG_IGN)
580 		(void) signal(SIGPIPE, SIG_IGN);
581 
582 	mnttabread();		/* /etc/fstab, /etc/mtab snarfed */
583 
584 	/*
585 	 *	disk can be either the full special file name,
586 	 *	the suffix of the special file name,
587 	 *	the special name missing the leading '/',
588 	 *	the file system name with or without the leading '/'.
589 	 *	NB:  we attempt to avoid dumping the block device
590 	 *	(using rawname) because specfs and the vm system
591 	 *	are not necessarily in sync.
592 	 */
593 
594 	/*
595 	 * Attempt to roll the log before doing the dump.  There's nothing
596 	 * the user can do if we are unable to roll the log, so we'll silently
597 	 * ignore failures.
598 	 */
599 	if ((rl_roll_log(disk) != RL_SUCCESS) && (disk[0] != '/')) {
600 		/* Try it again with leading '/'. */
601 		char	*slashed;
602 
603 		slashed = (char *)malloc(strlen(disk) + 2);
604 		if (slashed != (char *)NULL) {
605 			(void) sprintf(slashed, "%c%s", '/', disk);
606 			(void) rl_roll_log(slashed);
607 			free(slashed);
608 		}
609 	}
610 	dt = mnttabsearch(disk, 0);
611 	if (dt != 0) {
612 		filesystem = dt->mnt_mountp;
613 		if (disk_dynamic) {
614 			/* LINTED: disk is not NULL */
615 			free(disk);
616 		}
617 		disk = rawname(dt->mnt_special);
618 		disk_dynamic = (disk != dt->mnt_special);
619 
620 		(void) strncpy(spcl.c_dev, dt->mnt_special,
621 		    sizeof (spcl.c_dev));
622 		spcl.c_dev[sizeof (spcl.c_dev) - 1] = '\0';
623 		(void) strncpy(spcl.c_filesys, dt->mnt_mountp,
624 		    sizeof (spcl.c_filesys));
625 		spcl.c_filesys[sizeof (spcl.c_filesys) - 1] = '\0';
626 	} else {
627 		(void) strncpy(spcl.c_dev, disk, sizeof (spcl.c_dev));
628 		spcl.c_dev[sizeof (spcl.c_dev) - 1] = '\0';
629 #ifdef PARTIAL
630 		/* check for partial filesystem dump */
631 		partial_check();
632 		dt = mnttabsearch(disk, 1);
633 		if (dt != 0) {
634 			filesystem = dt->mnt_mountp;
635 			if (disk_dynamic)
636 				free(disk);
637 			disk = rawname(dt->mnt_special);
638 			disk_dynamic = (disk != dt->mnt_special);
639 
640 			(void) strncpy(spcl.c_filesys,
641 			    "a partial file system", sizeof (spcl.c_filesys));
642 			spcl.c_filesys[sizeof (spcl.c_filesys) - 1] = '\0';
643 		}
644 		else
645 #endif /* PARTIAL */
646 		{
647 			char *old_disk = disk;
648 
649 			(void) strncpy(spcl.c_filesys,
650 			    "an unlisted file system",
651 			    sizeof (spcl.c_filesys));
652 			spcl.c_filesys[sizeof (spcl.c_filesys) - 1] = '\0';
653 
654 			disk = rawname(old_disk);
655 			if (disk != old_disk) {
656 				if (disk_dynamic)
657 					free(old_disk);
658 				disk_dynamic = 1;
659 			}
660 			/*
661 			 * If disk == old_disk, then disk_dynamic's state
662 			 * does not change.
663 			 */
664 		}
665 	}
666 
667 	fi = open64(disk, O_RDONLY);
668 
669 	if (fi < 0) {
670 		saverr = errno;
671 		msg(gettext("Cannot open dump device `%s': %s\n"),
672 			disk, strerror(saverr));
673 		Exit(X_ABORT);
674 	}
675 
676 	if (sscanf(&incno, "%1d", &spcl.c_level) != 1) {
677 		msg(gettext("Bad dump level `%c' specified\n"), incno);
678 		dumpabort();
679 		/*NOTREACHED*/
680 	}
681 	getitime();		/* /etc/dumpdates snarfed */
682 
683 	sblock = (struct fs *)&sblock_buf;
684 	sync();
685 
686 	bread((diskaddr_t)SBLOCK, (uchar_t *)sblock, (long)SBSIZE);
687 	if ((sblock->fs_magic != FS_MAGIC) &&
688 	    (sblock->fs_magic != MTB_UFS_MAGIC)) {
689 		msg(gettext(
690 	    "Warning - super-block on device `%s' is corrupt - run fsck\n"),
691 		    disk);
692 		dumpabort();
693 		/*NOTREACHED*/
694 	}
695 
696 	if (sblock->fs_magic == FS_MAGIC &&
697 	    (sblock->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
698 	    sblock->fs_version != UFS_VERSION_MIN)) {
699 		msg(gettext("Unrecognized UFS version: %d\n"),
700 		    sblock->fs_version);
701 		dumpabort();
702 		/*NOTREACHED*/
703 	}
704 
705 	if (sblock->fs_magic == MTB_UFS_MAGIC &&
706 	    (sblock->fs_version < MTB_UFS_VERSION_MIN ||
707 	    sblock->fs_version > MTB_UFS_VERSION_1)) {
708 		msg(gettext("Unrecognized UFS version: %d\n"),
709 		    sblock->fs_version);
710 		dumpabort();
711 		/*NOTREACHED*/
712 	}
713 
714 	/*
715 	 * Try to set up for using mmap(2).  It only works on the block
716 	 * device, but if we can use it, things go somewhat faster.  If
717 	 * we can't open it, we'll silently fall back to the old method
718 	 * (read/memcpy). We also only try this if it's been cleanly
719 	 * unmounted. Dumping a live filesystem this way runs into
720 	 * buffer consistency problems. Of course, we don't support
721 	 * running dump on a mounted filesystem, but some people do it
722 	 * anyway.
723 	 */
724 	if (sblock->fs_clean == FSCLEAN) {
725 		char *block = unrawname(disk);
726 
727 		if (block != NULL) {
728 			mapfd = open(block, O_RDONLY, 0);
729 			free(block);
730 		}
731 	}
732 
733 restart:
734 	bread((diskaddr_t)SBLOCK, (uchar_t *)sblock, (long)SBSIZE);
735 	if ((sblock->fs_magic != FS_MAGIC) &&
736 	    (sblock->fs_magic != MTB_UFS_MAGIC)) {	/* paranoia */
737 		msg(gettext("bad super-block magic number, run fsck\n"));
738 		dumpabort();
739 		/*NOTREACHED*/
740 	}
741 
742 	if (sblock->fs_magic == FS_MAGIC &&
743 	    (sblock->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
744 	    sblock->fs_version != UFS_VERSION_MIN)) {
745 		msg(gettext("Unrecognized UFS version: %d\n"),
746 		    sblock->fs_version);
747 		dumpabort();
748 		/*NOTREACHED*/
749 	}
750 
751 	if (sblock->fs_magic == MTB_UFS_MAGIC &&
752 	    (sblock->fs_version < MTB_UFS_VERSION_MIN ||
753 	    sblock->fs_version > MTB_UFS_VERSION_1)) {
754 		msg(gettext("Unrecognized UFS version: %d\n"),
755 		    sblock->fs_version);
756 		dumpabort();
757 		/*NOTREACHED*/
758 	}
759 
760 	if (!doingactive)
761 		allocino();
762 
763 	/* XXX should sanity-check the super block before trusting/using it */
764 
765 	/* LINTED XXX time truncated - tolerate until tape format changes */
766 	spcl.c_date = (time32_t)time((time_t *)NULL);
767 	bcopy(&(spcl.c_shadow), c_shadow_save, sizeof (c_shadow_save));
768 
769 	snapdate = is_fssnap_dump(disk);
770 	if (snapdate)
771 		spcl.c_date = snapdate;
772 
773 	if (!printsize) {
774 		msg(gettext("Date of this level %c dump: %s\n"),
775 		    incno, prdate(spcl.c_date));
776 		msg(gettext("Date of last level %c dump: %s\n"),
777 			(uchar_t)lastincno, prdate(spcl.c_ddate));
778 		msg(gettext("Dumping %s "), disk);
779 		if (filesystem != 0)
780 			msgtail("(%.*s:%s) ",
781 			    /* LINTED unsigned -> signed cast ok */
782 			    (int)sizeof (spcl.c_host), spcl.c_host, filesystem);
783 		msgtail(gettext("to %s.\n"), sdumpdev);
784 	}
785 
786 	esize = f_esize = o_esize = 0;
787 	msiz = roundup(d_howmany(sblock->fs_ipg * sblock->fs_ncg, NBBY),
788 		TP_BSIZE_MAX);
789 	if (!doingactive) {
790 		clrmap = (uchar_t *)xcalloc(msiz, sizeof (*clrmap));
791 		filmap = (uchar_t *)xcalloc(msiz, sizeof (*filmap));
792 		dirmap = (uchar_t *)xcalloc(msiz, sizeof (*dirmap));
793 		nodmap = (uchar_t *)xcalloc(msiz, sizeof (*nodmap));
794 		shamap = (uchar_t *)xcalloc(msiz, sizeof (*shamap));
795 		activemap = (uchar_t *)xcalloc(msiz, sizeof (*activemap));
796 	} else {
797 		if (clrmap == NULL || filmap == NULL || dirmap == NULL ||
798 		    nodmap == NULL || shamap == NULL || activemap == NULL) {
799 			msg(gettext(
800 	    "Internal error: NULL map pointer while re-dumping active files"));
801 			dumpabort();
802 			/*NOTREACHED*/
803 		}
804 		bzero(clrmap, msiz);
805 		bzero(filmap, msiz);
806 		bzero(dirmap, msiz);
807 		bzero(nodmap, msiz);
808 		bzero(shamap, msiz);
809 		/* retain active map */
810 	}
811 
812 	dumpstate = DS_INIT;
813 	dumptoarchive = 1;
814 
815 	/*
816 	 * Read cylinder group inode-used bitmaps to avoid reading clear inodes.
817 	 */
818 	{
819 		uchar_t *clrp = clrmap;
820 		struct cg *cgp =
821 		    (struct cg *)xcalloc((uint_t)sblock->fs_cgsize, 1);
822 
823 		for (i = 0; i < sblock->fs_ncg; i++) {
824 			bread(fsbtodb(sblock, cgtod(sblock, i)),
825 			    (uchar_t *)cgp, sblock->fs_cgsize);
826 			bcopy(cg_inosused(cgp), clrp,
827 			    (int)sblock->fs_ipg / NBBY);
828 			clrp += sblock->fs_ipg / NBBY;
829 		}
830 		free((char *)cgp);
831 		/* XXX right-shift clrmap one bit.  why? */
832 		for (i = 0; clrp > clrmap; i <<= NBBY) {
833 			i |= *--clrp & ((1<<NBBY) - 1);
834 			*clrp = i >> 1;
835 		}
836 	}
837 
838 	if (!printsize) {
839 		msgp = gettext("Mapping (Pass I) [regular files]\n");
840 		msg(msgp);
841 	}
842 
843 	ino = 0;
844 #ifdef PARTIAL
845 	if (partial_mark(argc, argv)) {
846 #endif /* PARTIAL */
847 		if (!doingactive)
848 			pass(mark, clrmap);	/* mark updates 'x'_esize */
849 		else
850 			pass(active_mark, clrmap);	/* updates 'x'_esize */
851 #ifdef PARTIAL
852 	}
853 #endif /* PARTIAL */
854 	do {
855 		if (!printsize) {
856 			msgp = gettext("Mapping (Pass II) [directories]\n");
857 			msg(msgp);
858 		}
859 		nadded = 0;
860 		ino = 0;
861 		pass(add, dirmap);
862 	} while (nadded);
863 
864 	ino = 0; /* adjust estimated size for shadow inodes */
865 	pass(markshad, nodmap);
866 	ino = 0;
867 	pass(estshad, shamap);
868 	freeshad();
869 
870 	bmapest(clrmap);
871 	bmapest(nodmap);
872 	esize = o_esize + f_esize;
873 	if (diskette) {
874 		/* estimate number of floppies */
875 		if (tsize != 0)
876 			fetapes = (double)(esize + ntrec) / (double)tsize;
877 	} else if (cartridge) {
878 		/*
879 		 * Estimate number of tapes, assuming streaming stops at
880 		 * the end of each block written, and not in mid-block.
881 		 * Assume no erroneous blocks; this can be compensated for
882 		 * with an artificially low tape size.
883 		 */
884 		tenthsperirg = 16;	/* actually 15.48, says Archive */
885 		if (tsize != 0)
886 			fetapes = ((double)esize /* blocks */
887 			    * (tp_bsize		/* bytes/block */
888 			    * (1.0/density))	/* 0.1" / byte */
889 			    +
890 			    (double)esize	/* blocks */
891 			    * (1.0/ntrec)	/* streaming-stops per block */
892 			    * tenthsperirg)	/* 0.1" / streaming-stop */
893 			    * (1.0 / tsize);	/* tape / 0.1" */
894 	} else {
895 		/* Estimate number of tapes, for old fashioned 9-track tape */
896 #ifdef sun
897 		/* sun has long irg's */
898 		tenthsperirg = (density == 625) ? 6 : 12;
899 #else
900 		tenthsperirg = (density == 625) ? 5 : 8;
901 #endif
902 		if (tsize != 0)
903 			fetapes = ((double)esize /* blocks */
904 			    * (tp_bsize		/* bytes / block */
905 			    * (1.0/density))	/* 0.1" / byte */
906 			    +
907 			    (double)esize	/* blocks */
908 			    * (1.0/ntrec)	/* IRG's / block */
909 			    * tenthsperirg)	/* 0.1" / IRG */
910 			    * (1.0 / tsize);	/* tape / 0.1" */
911 	}
912 
913 	etapes = fetapes;	/* truncating assignment */
914 	etapes++;
915 	/* count the nodemap on each additional tape */
916 	for (i = 1; i < etapes; i++)
917 		bmapest(nodmap);
918 	/*
919 	 * If the above bmapest is called, it changes o_esize and f_esize.
920 	 * So we will recalculate esize here anyway to make sure.
921 	 * Also, add tape headers and trailer records.
922 	 */
923 	esize = o_esize + f_esize + etapes + ntrec;
924 
925 	/*
926 	 * If the estimated number of tp_bsize tape blocks is greater than
927 	 * INT_MAX we have to adjust tp_bsize and ntrec to handle
928 	 * the larger dump.  esize is an estimate, so we 'fudge'
929 	 * INT_MAX a little.  If tp_bsize is adjusted, it will be adjusted
930 	 * to the size needed for this dump (2048, 4096, 8192, ...)
931 	 */
932 	if (esize > (INT_MAX - FUDGE_FACTOR)) { /* esize is too big */
933 		forceflag++;
934 		esize_shift =
935 		    ((esize + (INT_MAX - FUDGE_FACTOR) - 1)/
936 		    ((u_offset_t)(INT_MAX - FUDGE_FACTOR))) - 1;
937 		if ((esize_shift > ESIZE_SHIFT_MAX) || (ntrec == 0)) {
938 			msgp = gettext(
939 	"Block factor %d ('b' flag) is too small for this size dump.");
940 			msg(msgp, saved_ntrec);
941 			dumpabort();
942 			/*NOTREACHED*/
943 		}
944 		/*
945 		 * recalculate esize from:
946 		 * o_esize - header tape records
947 		 * (f_esize + (num_mult -1)) >> esize_shift - new non-header
948 		 *	tape records for files/maps
949 		 * etapes - TS_TAPE records
950 		 * ntrec - TS_END records
951 		 *
952 		 * ntrec is adjusted so a tape record is still 'b' flag
953 		 * number of DEV_BSIZE (512) in size
954 		 */
955 		new_mult = (tp_bsize << esize_shift)/tp_bsize;
956 		tp_bsize = (tp_bsize << esize_shift);
957 		esize = o_esize + ((f_esize +
958 		    (new_mult - 1)) >> esize_shift) + etapes + ntrec;
959 		ntrec = (saved_ntrec/(tp_bsize/DEV_BSIZE));
960 	}
961 	if (forceflag != 0) {
962 		msgp = gettext(
963 		    "Forcing larger tape block size (%d).\n");
964 		msg(msgp, tp_bsize);
965 	}
966 	alloctape();			/* allocate tape buffers */
967 
968 	assert((tp_bsize / DEV_BSIZE != 0) && (tp_bsize % DEV_BSIZE == 0));
969 	/*
970 	 * If all we wanted was the size estimate,
971 	 * just print it out and exit.
972 	 */
973 	if (printsize) {
974 		(void) printf("%llu\n", esize * tp_bsize);
975 		Exit(0);
976 	}
977 
978 	if (tsize != 0) {
979 		if (diskette)
980 			msgp = gettext(
981 			    "Estimated %lld blocks (%s) on %3.2f diskettes.\n");
982 		else
983 			msgp = gettext(
984 			    "Estimated %lld blocks (%s) on %3.2f tapes.\n");
985 
986 		msg(msgp,
987 		    (esize*(tp_bsize/DEV_BSIZE)), mb(esize), fetapes);
988 	} else {
989 		msgp = gettext("Estimated %lld blocks (%s).\n");
990 		msg(msgp, (esize*(tp_bsize/DEV_BSIZE)), mb(esize));
991 	}
992 
993 	dumpstate = DS_CLRI;
994 
995 	otape(1);			/* bitmap is the first to tape write */
996 	*telapsed = 0;
997 	(void) time(tstart_writing);
998 
999 	/* filmap indicates all non-directory inodes */
1000 	{
1001 		uchar_t *np, *fp, *dp;
1002 		np = nodmap;
1003 		dp = dirmap;
1004 		fp = filmap;
1005 		for (i = 0; i < msiz; i++)
1006 			*fp++ = *np++ ^ *dp++;
1007 	}
1008 
1009 	while (dumpstate != DS_DONE) {
1010 		/*
1011 		 * When we receive EOT notification from
1012 		 * the writer, the signal handler calls
1013 		 * rollforward and then jumps here.
1014 		 */
1015 		(void) setjmp(checkpoint_buf);
1016 		switch (dumpstate) {
1017 		case DS_INIT:
1018 			/*
1019 			 * We get here if a tape error occurred
1020 			 * after releasing the name lock but before
1021 			 * the volume containing the last of the
1022 			 * dir info was completed.  We have to start
1023 			 * all over in this case.
1024 			 */
1025 			{
1026 				char *rmsg = gettext(
1027 		"Warning - output error occurred after releasing name lock\n\
1028 \tThe dump will restart\n");
1029 				msg(rmsg);
1030 				goto restart;
1031 			}
1032 			/* NOTREACHED */
1033 		case DS_START:
1034 		case DS_CLRI:
1035 			ino = UFSROOTINO;
1036 			dumptoarchive = 1;
1037 			bitmap(clrmap, TS_CLRI);
1038 			nextstate(DS_BITS);
1039 			/* FALLTHROUGH */
1040 		case DS_BITS:
1041 			ino = UFSROOTINO;
1042 			dumptoarchive = 1;
1043 			if (BIT(UFSROOTINO, nodmap))	/* empty dump check */
1044 				bitmap(nodmap, TS_BITS);
1045 			nextstate(DS_DIRS);
1046 			if (!doingverify) {
1047 				msgp = gettext(
1048 					"Dumping (Pass III) [directories]\n");
1049 				msg(msgp);
1050 			}
1051 			/* FALLTHROUGH */
1052 		case DS_DIRS:
1053 			dumptoarchive = 1;
1054 			pass(dirdump, dirmap);
1055 			nextstate(DS_FILES);
1056 			if (!doingverify) {
1057 				msgp = gettext(
1058 					"Dumping (Pass IV) [regular files]\n");
1059 				msg(msgp);
1060 			}
1061 			/* FALLTHROUGH */
1062 		case DS_FILES:
1063 			dumptoarchive = 0;
1064 
1065 			pass(lf_dump, filmap);
1066 
1067 			flushcmds();
1068 			dumpstate = DS_END;	/* don't reset ino */
1069 			/* FALLTHROUGH */
1070 		case DS_END:
1071 			dumptoarchive = 1;
1072 			spcl.c_type = TS_END;
1073 			for (i = 0; i < ntrec; i++) {
1074 				spclrec();
1075 			}
1076 			flusht();
1077 			break;
1078 		case DS_DONE:
1079 			break;
1080 		default:
1081 			msg(gettext("Internal state error\n"));
1082 			dumpabort();
1083 			/*NOTREACHED*/
1084 		}
1085 	}
1086 
1087 	if ((! doingactive) && (! active))
1088 		trewind();
1089 	if (verify && !doingverify) {
1090 		msgp = gettext("Finished writing last dump volume\n");
1091 		msg(msgp);
1092 		Exit(X_VERIFY);
1093 	}
1094 	if (spcl.c_volume > 1)
1095 		(void) snprintf(msgbuf, sizeof (msgbuf),
1096 		    gettext("%lld blocks (%s) on %ld volumes"),
1097 		    ((uint64_t)spcl.c_tapea*(tp_bsize/DEV_BSIZE)),
1098 		    mb((u_offset_t)(unsigned)(spcl.c_tapea)),
1099 		    spcl.c_volume);
1100 	else
1101 		(void) snprintf(msgbuf, sizeof (msgbuf),
1102 		    gettext("%lld blocks (%s) on 1 volume"),
1103 		    ((uint64_t)spcl.c_tapea*(tp_bsize/DEV_BSIZE)),
1104 		    mb((u_offset_t)(unsigned)(spcl.c_tapea)));
1105 	if (timeclock((time_t)0) != (time_t)0) {
1106 		(void) snprintf(kbsbuf, sizeof (kbsbuf),
1107 		    gettext(" at %ld KB/sec"),
1108 		    (long)(((float)spcl.c_tapea / (float)timeclock((time_t)0))
1109 			* 1000.0));
1110 		(void) strcat(msgbuf, kbsbuf);
1111 	}
1112 	(void) strcat(msgbuf, "\n");
1113 	msg(msgbuf);
1114 	(void) timeclock((time_t)-1);
1115 
1116 	if (archive)
1117 		msg(gettext("Archiving dump to `%s'\n"), archivefile);
1118 	if (active && !verify) {
1119 		nextstate(DS_INIT);
1120 		activepass();
1121 		goto restart;
1122 	}
1123 	msgp = gettext("DUMP IS DONE\n");
1124 	msg(msgp);
1125 	broadcast(msgp);
1126 	if (! doingactive)
1127 		putitime();
1128 	Exit(X_FINOK);
1129 #ifdef lint
1130 	return (0);
1131 #endif
1132 }
1133 
1134 void
1135 sigAbort(sig)
1136 	int	sig;
1137 {
1138 	char	*sigtype;
1139 
1140 	switch (sig) {
1141 	case SIGHUP:
1142 		sigtype = "SIGHUP";
1143 		break;
1144 	case SIGTRAP:
1145 		sigtype = "SIGTRAP";
1146 		break;
1147 	case SIGFPE:
1148 		sigtype = "SIGFPE";
1149 		break;
1150 	case SIGBUS:
1151 		msg(gettext("%s  ABORTING!\n"), "SIGBUS()");
1152 		(void) signal(SIGUSR2, SIG_DFL);
1153 		abort();
1154 		/*NOTREACHED*/
1155 	case SIGSEGV:
1156 		msg(gettext("%s  ABORTING!\n"), "SIGSEGV()");
1157 		(void) signal(SIGUSR2, SIG_DFL);
1158 		abort();
1159 		/*NOTREACHED*/
1160 	case SIGALRM:
1161 		sigtype = "SIGALRM";
1162 		break;
1163 	case SIGTERM:
1164 		sigtype = "SIGTERM";
1165 		break;
1166 	case SIGPIPE:
1167 		msg(gettext("Broken pipe\n"));
1168 		dumpabort();
1169 		/*NOTREACHED*/
1170 	default:
1171 		sigtype = "SIGNAL";
1172 		break;
1173 	}
1174 	msg(gettext("%s()  try rewriting\n"), sigtype);
1175 	if (pipeout) {
1176 		msg(gettext("Unknown signal, Cannot recover\n"));
1177 		dumpabort();
1178 		/*NOTREACHED*/
1179 	}
1180 	msg(gettext("Rewriting attempted as response to unknown signal.\n"));
1181 	(void) fflush(stderr);
1182 	(void) fflush(stdout);
1183 	close_rewind();
1184 	Exit(X_REWRITE);
1185 }
1186 
1187 /* Note that returned value is malloc'd if != cp && != NULL */
1188 char *
1189 rawname(cp)
1190 	char *cp;
1191 {
1192 	struct stat64 st;
1193 	char *dp;
1194 	extern char *getfullrawname();
1195 
1196 	if (stat64(cp, &st) < 0 || (st.st_mode & S_IFMT) != S_IFBLK)
1197 		return (cp);
1198 
1199 	dp = getfullrawname(cp);
1200 	if (dp == 0)
1201 		return (0);
1202 	if (*dp == '\0') {
1203 		free(dp);
1204 		return (0);
1205 	}
1206 
1207 	if (stat64(dp, &st) < 0 || (st.st_mode & S_IFMT) != S_IFCHR) {
1208 		free(dp);
1209 		return (cp);
1210 	}
1211 
1212 	return (dp);
1213 }
1214 
1215 static char *
1216 mb(blks)
1217 	u_offset_t blks;
1218 {
1219 	static char buf[16];
1220 
1221 	if (blks < 1024)
1222 		(void) snprintf(buf, sizeof (buf), "%lldKB", blks);
1223 	else
1224 		(void) snprintf(buf, sizeof (buf), "%.2fMB",
1225 		    ((double)(blks*tp_bsize)) / (double)(1024*1024));
1226 	return (buf);
1227 }
1228 
1229 #ifdef signal
1230 void (*nsignal(sig, act))(int)
1231 	int	sig;
1232 	void	(*act)(int);
1233 {
1234 	struct sigaction sa, osa;
1235 
1236 	sa.sa_handler = act;
1237 	(void) sigemptyset(&sa.sa_mask);
1238 	sa.sa_flags = SA_RESTART;
1239 	if (sigaction(sig, &sa, &osa) < 0)
1240 		return ((void (*)(int))-1);
1241 	return (osa.sa_handler);
1242 }
1243 #endif
1244 
1245 static void
1246 nextstate(state)
1247 	int	state;
1248 {
1249 	/* LINTED assigned value never used - kept for documentary purposes */
1250 	dumpstate = state;
1251 	/* LINTED assigned value never used - kept for documentary purposes */
1252 	ino = 0;
1253 	/* LINTED assigned value never used - kept for documentary purposes */
1254 	pos = 0;
1255 	leftover = 0;
1256 }
1257 
1258 /*
1259  * timeclock() function, for keeping track of how much time we've spent
1260  * writing to the tape device.  it always returns the amount of time
1261  * already spent, in milliseconds.  if you pass it a positive, then that's
1262  * telling it that we're writing, so the time counts.  if you pass it a
1263  * zero, then that's telling it we're not writing; perhaps we're waiting
1264  * for user input.
1265  *
1266  * a state of -1 resets everything.
1267  */
1268 time32_t
1269 timeclock(state)
1270 	time32_t state;
1271 {
1272 	static int *currentState = NULL;
1273 	static struct timeval *clockstart;
1274 	static time32_t *emilli;
1275 
1276 	struct timeval current[1];
1277 	int fd, saverr;
1278 
1279 #ifdef DEBUG
1280 	fprintf(stderr, "pid=%d timeclock ", getpid());
1281 	if (state == (time32_t)-1)
1282 		fprintf(stderr, "cleared\n");
1283 	else if (state > 0)
1284 		fprintf(stderr, "ticking\n");
1285 	else
1286 		fprintf(stderr, "paused\n");
1287 #endif /* DEBUG */
1288 
1289 	/* if we haven't setup the shared memory, init */
1290 	if (currentState == (int *)NULL) {
1291 		if ((fd = open("/dev/zero", O_RDWR)) < 0) {
1292 			saverr = errno;
1293 			msg(gettext("Cannot open `%s': %s\n"),
1294 				"/dev/zero", strerror(saverr));
1295 			dumpabort();
1296 			/*NOTREACHED*/
1297 		}
1298 		/*LINTED [mmap always returns an aligned value]*/
1299 		currentState = (int *)mmap((char *)0, getpagesize(),
1300 			PROT_READ|PROT_WRITE, MAP_SHARED, fd, (off_t)0);
1301 		if (currentState == (int *)-1) {
1302 			saverr = errno;
1303 			msg(gettext(
1304 				"Cannot memory map monitor variables: %s\n"),
1305 				strerror(saverr));
1306 			dumpabort();
1307 			/*NOTREACHED*/
1308 		}
1309 		(void) close(fd);
1310 
1311 		/* LINTED currentState is sufficiently aligned */
1312 		clockstart = (struct timeval *)(currentState + 1);
1313 		emilli = (time32_t *)(clockstart + 1);
1314 		/* Note everything is initialized to zero via /dev/zero */
1315 	}
1316 
1317 	if (state == (time32_t)-1) {
1318 		bzero(clockstart, sizeof (*clockstart));
1319 		*currentState = 0;
1320 		*emilli = (time32_t)0;
1321 		return (0);
1322 	}
1323 
1324 	(void) gettimeofday(current, NULL);
1325 
1326 	if (*currentState != 0) {
1327 		current->tv_usec += 1000000;
1328 		current->tv_sec--;
1329 
1330 		/* LINTED: result will fit in a time32_t */
1331 		*emilli += (current->tv_sec - clockstart->tv_sec) * 1000;
1332 		/* LINTED: result will fit in a time32_t */
1333 		*emilli += (current->tv_usec - clockstart->tv_usec) / 1000;
1334 	}
1335 
1336 	if (state != 0)
1337 		bcopy(current, clockstart, sizeof (current));
1338 
1339 	*currentState = state;
1340 
1341 	return (*emilli);
1342 }
1343 
1344 static int
1345 statcmp(const struct stat64 *left, const struct stat64 *right)
1346 {
1347 	int result = 1;
1348 
1349 	if ((left->st_dev == right->st_dev) &&
1350 	    (left->st_ino == right->st_ino) &&
1351 	    (left->st_mode == right->st_mode) &&
1352 	    (left->st_nlink == right->st_nlink) &&
1353 	    (left->st_uid == right->st_uid) &&
1354 	    (left->st_gid == right->st_gid) &&
1355 	    (left->st_rdev == right->st_rdev) &&
1356 	    (left->st_ctim.tv_sec == right->st_ctim.tv_sec) &&
1357 	    (left->st_ctim.tv_nsec == right->st_ctim.tv_nsec) &&
1358 	    (left->st_mtim.tv_sec == right->st_mtim.tv_sec) &&
1359 	    (left->st_mtim.tv_nsec == right->st_mtim.tv_nsec) &&
1360 	    (left->st_blksize == right->st_blksize) &&
1361 	    (left->st_blocks == right->st_blocks)) {
1362 		result = 0;
1363 	}
1364 
1365 	return (result);
1366 }
1367 
1368 /*
1369  * Safely open a file or device.
1370  */
1371 static int
1372 safe_open_common(const char *filename, int mode, int perms, int device)
1373 {
1374 	int fd;
1375 	int working_mode;
1376 	int saverr;
1377 	char *errtext;
1378 	struct stat64 pre_stat, pre_lstat;
1379 	struct stat64 post_stat, post_lstat;
1380 
1381 	/*
1382 	 * Don't want to be spoofed into trashing something we
1383 	 * shouldn't, thus the following rigamarole.  If it doesn't
1384 	 * exist, we create it and proceed.  Otherwise, require that
1385 	 * what's there be a real file with no extraneous links and
1386 	 * owned by whoever ran us.
1387 	 *
1388 	 * The silliness with using both lstat() and fstat() is to avoid
1389 	 * race-condition games with someone replacing the file with a
1390 	 * symlink after we've opened it.  If there was an flstat(),
1391 	 * we wouldn't need the fstat().
1392 	 *
1393 	 * The initial open with the hard-coded flags is ok even if we
1394 	 * are intending to open only for reading.  If it succeeds,
1395 	 * then the file did not exist, and we'll synthesize an appropriate
1396 	 * complaint below.  Otherwise, it does exist, so we won't be
1397 	 * truncating it with the open.
1398 	 */
1399 	if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_EXCL|O_LARGEFILE,
1400 	    perms)) < 0) {
1401 		if (errno == EEXIST) {
1402 			if (lstat64(filename, &pre_lstat) < 0) {
1403 				return (-1);
1404 			}
1405 
1406 			if (stat64(filename, &pre_stat) < 0) {
1407 				return (-1);
1408 			}
1409 
1410 			working_mode = mode & (O_WRONLY|O_RDWR|O_RDONLY);
1411 			working_mode |= O_LARGEFILE;
1412 			if ((fd = open(filename, working_mode)) < 0) {
1413 				if (errno == ENOENT) {
1414 					errtext = gettext(
1415 "Unexpected condition detected: %s used to exist, but doesn't any longer\n");
1416 					msg(errtext, filename);
1417 					syslog(LOG_WARNING, errtext, filename);
1418 					errno = ENOENT;
1419 				}
1420 				return (-1);
1421 			}
1422 
1423 			if (lstat64(filename, &post_lstat) < 0) {
1424 				saverr = errno;
1425 				(void) close(fd);
1426 				errno = saverr;
1427 				return (-1);
1428 			}
1429 
1430 			if (fstat64(fd, &post_stat) < 0) {
1431 				saverr = errno;
1432 				(void) close(fd);
1433 				errno = saverr;
1434 				return (-1);
1435 			}
1436 
1437 			/*
1438 			 * Can't just use memcmp(3C), because the access
1439 			 * time is updated by open(2).
1440 			 */
1441 			if (statcmp(&pre_lstat, &post_lstat) != 0) {
1442 				errtext = gettext(
1443 	    "Unexpected change detected: %s's lstat(2) information changed\n");
1444 				msg(errtext, filename);
1445 				syslog(LOG_WARNING, errtext, filename);
1446 				errno = EPERM;
1447 				return (-1);
1448 			}
1449 
1450 			if (statcmp(&pre_stat, &post_stat) != 0) {
1451 				errtext = gettext(
1452 	    "Unexpected change detected: %s's stat(2) information changed\n"),
1453 				msg(errtext, filename);
1454 				syslog(LOG_WARNING, errtext, filename);
1455 				errno = EPERM;
1456 				return (-1);
1457 			}
1458 
1459 			/*
1460 			 * If inode, device, or type are wrong, bail out.
1461 			 * Note using post_stat instead of post_lstat for the
1462 			 * S_ISCHR() test.  This is to allow the /dev ->
1463 			 * /devices bit to work, as long as the final target
1464 			 * is a character device (i.e., raw disk or tape).
1465 			 */
1466 			if (device && !(S_ISCHR(post_stat.st_mode)) &&
1467 			    !(S_ISFIFO(post_stat.st_mode)) &&
1468 			    !(S_ISREG(post_lstat.st_mode))) {
1469 				errtext = gettext(
1470 	    "Unexpected condition detected: %s is not a supported device\n"),
1471 				msg(errtext, filename);
1472 				syslog(LOG_WARNING, errtext, filename);
1473 				(void) close(fd);
1474 				errno = EPERM;
1475 				return (-1);
1476 			} else if (!device &&
1477 			    (!S_ISREG(post_lstat.st_mode) ||
1478 			    (post_stat.st_ino != post_lstat.st_ino) ||
1479 			    (post_stat.st_dev != post_lstat.st_dev))) {
1480 				errtext = gettext(
1481 	    "Unexpected condition detected: %s is not a regular file\n"),
1482 				msg(errtext, filename);
1483 				syslog(LOG_WARNING, errtext, filename);
1484 				(void) close(fd);
1485 				errno = EPERM;
1486 				return (-1);
1487 			}
1488 
1489 			/*
1490 			 * Bad link count implies someone's linked our
1491 			 * target to something else, which we probably
1492 			 * shouldn't step on.
1493 			 */
1494 			if (post_lstat.st_nlink != 1) {
1495 				errtext = gettext(
1496 	    "Unexpected condition detected: %s must have exactly one link\n"),
1497 				msg(errtext, filename);
1498 				syslog(LOG_WARNING, errtext, filename);
1499 				(void) close(fd);
1500 				errno = EPERM;
1501 				return (-1);
1502 			}
1503 			/*
1504 			 * Root might make a file, but non-root might
1505 			 * need to open it.  If the permissions let us
1506 			 * get this far, then let it through.
1507 			 */
1508 			if (post_lstat.st_uid != getuid() &&
1509 			    post_lstat.st_uid != 0) {
1510 				errtext = gettext(
1511 "Unsupported condition detected: %s must be owned by uid %ld or 0\n"),
1512 				msg(errtext, filename, (long)getuid());
1513 				syslog(LOG_WARNING, errtext, filename,
1514 				    (long)getuid());
1515 				(void) close(fd);
1516 				errno = EPERM;
1517 				return (-1);
1518 			}
1519 			if (mode & O_TRUNC) {
1520 				if (ftruncate(fd, (off_t)0) < 0) {
1521 					msg("ftruncate(%s): %s\n",
1522 					    filename, strerror(errno));
1523 					(void) close(fd);
1524 					return (-1);
1525 				}
1526 			}
1527 		} else {
1528 			/*
1529 			 * Didn't exist, but couldn't open it.
1530 			 */
1531 			return (-1);
1532 		}
1533 	} else {
1534 		/*
1535 		 * If truncating open succeeded for a read-only open,
1536 		 * bail out, as we really shouldn't have succeeded.
1537 		 */
1538 		if (mode & O_RDONLY) {
1539 			/* Undo the O_CREAT */
1540 			(void) unlink(filename);
1541 			msg("open(%s): %s\n",
1542 			    filename, strerror(ENOENT));
1543 			(void) close(fd);
1544 			errno = ENOENT;
1545 			return (-1);
1546 		}
1547 	}
1548 
1549 	return (fd);
1550 }
1551 
1552 /*
1553  * Safely open a file.
1554  */
1555 int
1556 safe_file_open(const char *filename, int mode, int perms)
1557 {
1558 	return (safe_open_common(filename, mode, perms, 0));
1559 }
1560 
1561 /*
1562  * Safely open a device.
1563  */
1564 int
1565 safe_device_open(const char *filename, int mode, int perms)
1566 {
1567 	return (safe_open_common(filename, mode, perms, 1));
1568 }
1569 
1570 /*
1571  * STDIO version of safe_open
1572  */
1573 FILE *
1574 safe_fopen(const char *filename, const char *smode, int perms)
1575 {
1576 	int fd;
1577 	int bmode;
1578 
1579 	/*
1580 	 * accepts only modes  "r", "r+", and "w"
1581 	 */
1582 	if (smode[0] == 'r') {
1583 		if (smode[1] == '\0') {
1584 			bmode = O_RDONLY;
1585 		} else if ((smode[1] == '+') && (smode[2] == '\0')) {
1586 			bmode = O_RDWR;
1587 		}
1588 	} else if ((smode[0] == 'w') && (smode[1] == '\0')) {
1589 		bmode = O_WRONLY;
1590 	} else {
1591 		msg(gettext("internal error: safe_fopen: invalid mode `%s'\n"),
1592 		    smode);
1593 		return (NULL);
1594 	}
1595 
1596 	fd = safe_file_open(filename, bmode, perms);
1597 
1598 	/*
1599 	 * caller is expected to report error.
1600 	 */
1601 	if (fd >= 0)
1602 	    return (fdopen(fd, smode));
1603 
1604 	return ((FILE *)NULL);
1605 }
1606 
1607 void
1608 child_chdir(void)
1609 {
1610 	char name[MAXPATHLEN];
1611 
1612 	if (debug_chdir != NULL) {
1613 		snprintf(name, sizeof (name), "%s/%ld",
1614 		    debug_chdir, (long)getpid());
1615 		if (mkdir(name, 0755) < 0)
1616 			msg("mkdir(%s): %s", name, strerror(errno));
1617 		if (chdir(name) < 0)
1618 			msg("chdir(%s): %s", name, strerror(errno));
1619 	}
1620 }
1621