xref: /titanic_44/usr/src/cmd/backup/dump/dumptape.c (revision ea8dc4b6d2251b437950c0056bc626b311c73c27)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 #include "dump.h"
37 #include <rmt.h>
38 #include <setjmp.h>
39 #include <sys/fdio.h>
40 #include <sys/mkdev.h>
41 #include <assert.h>
42 #include <limits.h>
43 
44 #define	SLEEPMS		50
45 
46 static uint_t writesize;	/* size of malloc()ed buffer for tape */
47 static ino_t inos[TP_NINOS];	/* starting inodes on each tape */
48 
49 /*
50  * The req structure is used to pass commands from the parent
51  * process through the pipes to the slave processes.  It comes
52  * in two flavors, depending on which mode dump is operating under:
53  * an inode request (on-line mode) and a disk block request ("old" mode).
54  */
55 /*
56  * The inode request structure is used during on-line mode.
57  * The master passes inode numbers and starting offsets to
58  * the slaves.  The tape writer passes out the current inode,
59  * offset, and number of tape records written after completing a volume.
60  */
61 struct ireq {
62 	ino_t	inumber;	/* inode number to open/dump */
63 	long	igen;		/* inode generation number */
64 	off_t	offset;		/* starting offset in inode */
65 	int	count;		/* count for 1st spclrec */
66 };
67 /*
68  * The block request structure is used in off-line mode to pass
69  * commands to dump disk blocks from the parent process through
70  * the pipes to the slave processes.
71  */
72 struct breq {
73 	diskaddr_t dblk;		/* disk address to read */
74 	size_t	size;		/* number of bytes to read from disk */
75 	ulong_t	spclrec[1];	/* actually longer */
76 };
77 
78 struct req {
79 	short	aflag;		/* write data to archive process as well */
80 	short	tflag;		/* begin new tape */
81 	union	reqdata {
82 		struct ireq ino;	/* used for on-line mode */
83 		struct breq blks;	/* used for off-line mode */
84 	} data;
85 };
86 
87 #define	ir_inumber	data.ino.inumber
88 #define	ir_igen		data.ino.igen
89 #define	ir_offset	data.ino.offset
90 #define	ir_count	data.ino.count
91 
92 #define	br_dblk		data.blks.dblk
93 #define	br_size		data.blks.size
94 #define	br_spcl		data.blks.spclrec
95 
96 static int reqsiz = 0;	/* alloctape will initialize */
97 
98 #define	SLAVES 3
99 struct slaves {
100 	int	sl_slavefd;	/* pipe from master to slave */
101 	pid_t	sl_slavepid;	/* slave pid; used by killall() */
102 	ino_t	sl_inos;	/* inos, if this record starts tape */
103 	int	sl_offset;	/* logical blocks written for object */
104 	int	sl_count;	/* logical blocks left in spclrec */
105 	int	sl_tapea;	/* header number, if starting tape */
106 	int	sl_firstrec;	/* number of first block on tape */
107 	int	sl_state;	/* dump output state */
108 	struct	req *sl_req;	/* instruction packet to slave */
109 };
110 static struct slaves slaves[SLAVES];	/* one per slave */
111 static struct slaves *slp;	/* pointer to current slave */
112 static struct slaves chkpt;	/* checkpointed data */
113 
114 struct bdesc {
115 	char	*b_data;	/* pointer to buffer data */
116 	int	b_flags;	/* flags (see below) */
117 };
118 
119 /*
120  * The following variables are in shared memory, and must be
121  * explicitly checkpointed and/or reset.
122  */
123 static caddr_t shared;		/* pointer to block of shared memory */
124 static struct bdesc *bufp;	/* buffer descriptors */
125 static struct bdesc **current;	/* output buffer to fill */
126 static int *tapea;		/* logical record count */
127 
128 #ifdef INSTRUMENT
129 static int	*readmissp;	/* number of times writer was idle */
130 static int	*idle;		/* number of times slaves were idle */
131 #endif	/* INSTRUMENT */
132 
133 /*
134  * Buffer flags
135  */
136 #define	BUF_EMPTY	0x0	/* nothing in buffer */
137 #define	BUF_FULL	0x1	/* data in buffer */
138 #define	BUF_SPCLREC	0x2	/* contains special record */
139 #define	BUF_ARCHIVE	0x4	/* dump to archive */
140 
141 static int recsout;		/* number of req's sent to slaves */
142 static int totalrecsout;	/* total number of req's sent to slaves */
143 static int rotor;		/* next slave to be instructed */
144 static pid_t master;		/* pid of master, for sending error signals */
145 static int writer = -1;		/* fd of tape writer */
146 static pid_t writepid;		/* pid of tape writer */
147 static int arch;		/* fd of output archiver */
148 static pid_t archivepid;	/* pid of output archiver */
149 static int archivefd;		/* fd of archive file (proper) */
150 static offset_t lf_archoffset;	/* checkpointed offset into archive file */
151 
152 int caught;			/* caught signal -- imported by mapfile() */
153 
154 #ifdef DEBUG
155 extern	int xflag;
156 #endif
157 
158 #ifdef __STDC__
159 static void cmdwrterr(void);
160 static void cmdrderr(void);
161 static void freetape(void);
162 static void bufclear(void);
163 static pid_t setuparchive(void);
164 static pid_t setupwriter(void);
165 static void nextslave(void);
166 static void tperror(int);
167 static void rollforward(int);
168 static void nap(int);
169 static void alrm(int);
170 static void just_rewind(void);
171 static void killall(void);
172 static void proceed(int);
173 static void die(int);
174 static void enslave(void);
175 static void wait_our_turn(void);
176 static void dumpoffline(int, pid_t, int);
177 static void onxfsz(int);
178 static void dowrite(int);
179 static void checkpoint(struct bdesc *, int);
180 static ssize_t atomic(int (*)(), int, char *, int);
181 #else
182 static void cmdwrterr();
183 static void cmdrderr();
184 static void freetape();
185 static void bufclear();
186 static pid_t setuparchive();
187 static pid_t setupwriter();
188 static void nextslave();
189 static void tperror();
190 static void rollforward();
191 static void nap();
192 static void alrm();
193 static void just_rewind();
194 static void killall();
195 static void proceed();
196 static void die();
197 static void enslave();
198 static void wait_our_turn();
199 static void dumpoffline();
200 static void onxfsz();
201 static void dowrite();
202 static void checkpoint();
203 static ssize_t atomic();
204 #endif
205 
206 static size_t tapesize;
207 
208 /*
209  * Allocate buffers and shared memory variables.  Tape buffers are
210  * allocated on page boundaries for tape write() efficiency.
211  */
212 void
213 #ifdef __STDC__
214 #else
215 #endif
216 alloctape(void)
217 {
218 	struct slaves *slavep;
219 	ulong_t pgoff = (unsigned)(getpagesize() - 1); /* 2**n - 1 */
220 	int	mapfd;
221 	char	*obuf;
222 	int	saverr;
223 	int	i, j;
224 
225 	writesize = ntrec * tp_bsize;
226 	if (!printsize)
227 		msg(gettext("Writing %d Kilobyte records\n"),
228 			writesize / TP_BSIZE_MIN);
229 
230 	/*
231 	 * set up shared memory seg for here and child
232 	 */
233 	mapfd = open("/dev/zero", O_RDWR);
234 	if (mapfd == -1) {
235 		saverr = errno;
236 		msg(gettext("Cannot open `%s': %s\n"),
237 			"/dev/zero", strerror(saverr));
238 		dumpabort();
239 		/*NOTREACHED*/
240 	}
241 	/*
242 	 * Allocate space such that buffers are page-aligned and
243 	 * pointers are aligned on 4-byte boundaries (for SPARC).
244 	 * This code assumes that (NBUF * writesize) is a multiple
245 	 * of the page size and that pages are aligned on 4-byte
246 	 * boundaries.  Space is allocated as follows:
247 	 *
248 	 *    (NBUF * writesize) for the actual buffers
249 	 *    (pagesize - 1) for padding so the buffers are page-aligned
250 	 *    (NBUF * ntrec * sizeof (struct bdesc)) for each buffer
251 	 *    (n * sizeof (int)) for [n] debugging variables/pointers
252 	 *    (n * sizeof (int)) for [n] miscellaneous variables/pointers
253 	 */
254 	tapesize =
255 	    (NBUF * writesize)				/* output buffers */
256 		/* LINTED: pgoff fits into a size_t */
257 	    + (size_t)pgoff				/* page alignment */
258 							/* buffer descriptors */
259 	    + (((size_t)sizeof (struct bdesc)) * NBUF * ntrec)
260 #ifdef INSTRUMENT
261 	    + (2 * (size_t)sizeof (int *))		/* instrumentation */
262 #endif
263 							/* shared variables */
264 	    + (size_t)sizeof (struct bdesc **)
265 	    + (size_t)sizeof (int *)
266 	    + (3 * (size_t)sizeof (time_t));
267 
268 	shared = mmap((char *)0, tapesize, PROT_READ|PROT_WRITE,
269 	    MAP_SHARED, mapfd, (off_t)0);
270 	if (shared == (caddr_t)-1) {
271 		saverr = errno;
272 		msg(gettext("Cannot memory map output buffers: %s\n"),
273 		    strerror(saverr));
274 		dumpabort();
275 		/*NOTREACHED*/
276 	}
277 	(void) close(mapfd);
278 
279 	/*
280 	 * Buffers and buffer headers
281 	 */
282 	obuf = (char *)(((ulong_t)shared + pgoff) & ~pgoff);
283 	/* LINTED obuf and writesize are aligned */
284 	bufp = (struct bdesc *)(obuf + NBUF*writesize);
285 	/*
286 	 * Shared memory variables
287 	 */
288 	current = (struct bdesc **)&bufp[NBUF*ntrec];
289 	tapea = (int *)(current + 1);
290 	/* LINTED pointer alignment ok */
291 	telapsed = (time_t *)(tapea + 1);
292 	tstart_writing = telapsed + 1;
293 	tschedule = tstart_writing + 1;
294 #ifdef INSTRUMENT
295 	/*
296 	 * Debugging and instrumentation variables
297 	 */
298 	readmissp = (int *)(tschedule + 1);
299 	idle = readmissp + 1;
300 #endif
301 	for (i = 0, j = 0; i < NBUF * ntrec; i++, j += tp_bsize) {
302 		bufp[i].b_data = &obuf[j];
303 	}
304 
305 	reqsiz = sizeof (struct req) + tp_bsize - sizeof (long);
306 	for (slavep = slaves; slavep < &slaves[SLAVES]; slavep++)
307 		slavep->sl_req = (struct req *)xmalloc(reqsiz);
308 
309 	chkpt.sl_offset = 0;		/* start at offset 0 */
310 	chkpt.sl_count = 0;
311 	chkpt.sl_inos = UFSROOTINO;	/* in root inode */
312 	chkpt.sl_firstrec = 1;
313 	chkpt.sl_tapea = 0;
314 }
315 
316 static void
317 #ifdef __STDC__
318 freetape(void)
319 #else
320 freetape()
321 #endif
322 {
323 	if (shared == NULL)
324 		return;
325 	(void) timeclock((time_t)0);
326 	(void) munmap(shared, tapesize);
327 	shared = NULL;
328 }
329 
330 /*
331  * Reset tape state variables -- called
332  * before a pass to dump active files.
333  */
334 void
335 #ifdef __STDC__
336 reset(void)
337 #else
338 reset()
339 #endif
340 {
341 	bufclear();
342 
343 #ifdef INSTRUMENT
344 	(*readmissp) = 0;
345 	(*idle) = 0;
346 #endif
347 
348 	spcl.c_flags = 0;
349 	spcl.c_volume = 0;
350 	tapeno = 0;
351 
352 	chkpt.sl_offset = 0;		/* start at offset 0 */
353 	chkpt.sl_count = 0;
354 	chkpt.sl_inos = UFSROOTINO;	/* in root inode */
355 	chkpt.sl_firstrec = 1;
356 	chkpt.sl_tapea = 0;
357 }
358 
359 static void
360 #ifdef __STDC__
361 bufclear(void)
362 #else
363 bufclear()
364 #endif
365 {
366 	struct bdesc *bp;
367 	int i;
368 
369 	for (i = 0, bp = bufp; i < NBUF * ntrec; i++, bp++)
370 		bp->b_flags = BUF_EMPTY;
371 	if ((caddr_t)current < shared ||
372 	    (caddr_t)current > (shared + tapesize)) {
373 		msg(gettext(
374 	    "bufclear: current pointer out of range of shared memory\n"));
375 		dumpabort();
376 		/*NOTREACHED*/
377 	}
378 	if ((*current != NULL) &&
379 	    (*current < &bufp[0] || *current > &bufp[NBUF*ntrec])) {
380 		/* ANSI string catenation, to shut cstyle up */
381 		msg(gettext("bufclear: current buffer pointer (0x%x) "
382 			"out of range of buffer\naddresses (0x%x - 0x%x)\n"),
383 		    *current, &bufp[0], &bufp[NBUF*ntrec]);
384 		dumpabort();
385 		/*NOTREACHED*/
386 	}
387 	*current = bufp;
388 }
389 
390 /*
391  * Start a process to collect information describing the dump.
392  * This data takes two forms:
393  *    the bitmap and directory information being written to
394  *	the front of the tape (the "archive" file)
395  *    information describing each directory and inode (to
396  *	be included in the database tmp file)
397  * Write the data to the files as it is received so huge file
398  * systems don't cause dump to consume large amounts of memory.
399  */
400 static pid_t
401 setuparchive(void)
402 {
403 	struct slaves *slavep;
404 	int cmd[2];
405 	pid_t pid;
406 	ssize_t size;
407 	char *data;
408 	char *errmsg;
409 	int flags, saverr;
410 	int punt = 0;
411 
412 	/*
413 	 * Both the archive and database tmp files are
414 	 * checkpointed by taking their current offsets
415 	 * (sizes) after completing each volume.  Restoring
416 	 * from a checkpoint involves truncating to the
417 	 * checkpointed size.
418 	 */
419 	if (archive && !doingactive) {
420 		/* It's allowed/expected to exist, so can't use O_EXCL */
421 		archivefd = safe_file_open(archivefile, O_WRONLY, 0600);
422 		if (archivefd < 0) {
423 			saverr = errno;
424 			msg(gettext("Cannot open archive file `%s': %s\n"),
425 			    archivefile, strerror(saverr));
426 			dumpabort();
427 			/*NOTREACHED*/
428 		}
429 
430 		if (lseek64(archivefd, lf_archoffset, 0) < 0) {
431 			saverr = errno;
432 			msg(gettext(
433 				    "Cannot position archive file `%s' : %s\n"),
434 			    archivefile, strerror(saverr));
435 			dumpabort();
436 			/*NOTREACHED*/
437 		}
438 		if (ftruncate64(archivefd, lf_archoffset) < 0) {
439 			saverr = errno;
440 			msg(gettext(
441 				    "Cannot truncate archive file `%s' : %s\n"),
442 			    archivefile, strerror(saverr));
443 			dumpabort();
444 			/*NOTREACHED*/
445 		}
446 	}
447 
448 	if (pipe(cmd) < 0) {
449 		saverr = errno;
450 		msg(gettext("%s: %s error: %s\n"),
451 		    "setuparchive", "pipe", strerror(saverr));
452 		return (0);
453 	}
454 	sighold(SIGINT);
455 	if ((pid = fork()) < 0) {
456 		saverr = errno;
457 		msg(gettext("%s: %s error: %s\n"),
458 		    "setuparchive", "fork", strerror(saverr));
459 		return (0);
460 	}
461 	if (pid > 0) {
462 		sigrelse(SIGINT);
463 		/* parent process */
464 		(void) close(cmd[0]);
465 		arch = cmd[1];
466 		return (pid);
467 	}
468 	/*
469 	 * child process
470 	 */
471 	(void) signal(SIGINT, SIG_IGN);		/* master handles this */
472 #ifdef TDEBUG
473 	(void) sleep(4);	/* allow time for parent's message to get out */
474 	/* XGETTEXT:  #ifdef TDEBUG only */
475 	msg(gettext("Archiver has pid = %ld\n"), (long)getpid());
476 #endif
477 	freeino();	/* release unneeded resources */
478 	freetape();
479 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
480 		if (slavep->sl_slavefd != -1) {
481 			(void) close(slavep->sl_slavefd);
482 			slavep->sl_slavefd = -1;
483 		}
484 	}
485 	(void) close(to);
486 	(void) close(fi);
487 	to = fi = -1;
488 	(void) close(cmd[1]);
489 	data = xmalloc(tp_bsize);
490 	for (;;) {
491 		size = atomic((int(*)())read, cmd[0], (char *)&flags,
492 		    sizeof (flags));
493 		if ((unsigned)size != sizeof (flags))
494 			break;
495 		size = atomic((int(*)())read, cmd[0], data, tp_bsize);
496 		if (size == tp_bsize) {
497 			if (archive && flags & BUF_ARCHIVE && !punt &&
498 			    (size = write(archivefd, data, tp_bsize))
499 			    != tp_bsize) {
500 				struct stat64 stats;
501 
502 				if (size != -1) {
503 					errmsg = strdup(gettext(
504 					    "Output truncated"));
505 					if (errmsg == NULL)
506 						errmsg = "";
507 				} else {
508 					errmsg = strerror(errno);
509 				}
510 
511 				if (fstat64(archivefd, &stats) < 0)
512 				    stats.st_size = -1;
513 
514 				/* cast to keep lint&printf happy */
515 				msg(gettext(
516 		    "Cannot write archive file `%s' at offset %lld: %s\n"),
517 				    archivefile, (longlong_t)stats.st_size,
518 				    errmsg);
519 				msg(gettext(
520 		    "Archive file will be deleted, dump will continue\n"));
521 				punt++;
522 				if ((size != -1) && (*errmsg != '\0')) {
523 					free(errmsg);
524 				}
525 			}
526 		} else {
527 			break;
528 		}
529 	}
530 	(void) close(cmd[0]);
531 	if (archive) {
532 		(void) close(archivefd);
533 		archivefd = -1;
534 	}
535 	if (punt) {
536 		(void) unlink(archivefile);
537 		Exit(X_ABORT);
538 	}
539 	Exit(X_FINOK);
540 	/* NOTREACHED */
541 	return (0);
542 }
543 
544 /*
545  * Start a process to read the output buffers and write the data
546  * to the output device.
547  */
548 static pid_t
549 setupwriter(void)
550 {
551 	struct slaves *slavep;
552 	int cmd[2];
553 	pid_t pid;
554 	int saverr;
555 
556 	caught = 0;
557 	if (pipe(cmd) < 0) {
558 		saverr = errno;
559 		msg(gettext("%s: %s error: %s\n"),
560 			"setupwriter", "pipe", strerror(saverr));
561 		return (0);
562 	}
563 	sighold(SIGINT);
564 	if ((pid = fork()) < 0) {
565 		saverr = errno;
566 		msg(gettext("%s: %s error: %s\n"),
567 			"setupwriter", "fork", strerror(saverr));
568 		return (0);
569 	}
570 	if (pid > 0) {
571 		/*
572 		 * Parent process
573 		 */
574 		sigrelse(SIGINT);
575 		(void) close(cmd[0]);
576 		writer = cmd[1];
577 		return (pid);
578 	}
579 	/*
580 	 * Child (writer) process
581 	 */
582 	(void) signal(SIGINT, SIG_IGN);		/* master handles this */
583 #ifdef TDEBUG
584 	(void) sleep(4);	/* allow time for parent's message to get out */
585 	/* XGETTEXT:  #ifdef TDEBUG only */
586 	msg(gettext("Writer has pid = %ld\n"), (long)getpid());
587 #endif
588 	child_chdir();
589 	freeino();	/* release unneeded resources */
590 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
591 		if (slavep->sl_slavefd != -1) {
592 			(void) close(slavep->sl_slavefd);
593 			slavep->sl_slavefd = -1;
594 		}
595 	}
596 	(void) close(fi);
597 	fi = -1;
598 	(void) close(cmd[1]);
599 	dowrite(cmd[0]);
600 	if (arch >= 0) {
601 		(void) close(arch);
602 		arch = -1;
603 	}
604 	(void) close(cmd[0]);
605 	Exit(X_FINOK);
606 	/* NOTREACHED */
607 	return (0);
608 }
609 
610 void
611 #ifdef __STDC__
612 spclrec(void)
613 #else
614 spclrec()
615 #endif
616 {
617 	int s, i;
618 	int32_t *ip;
619 	int flags = BUF_SPCLREC;
620 
621 	if ((BIT(ino, shamap)) && (spcl.c_type == TS_INODE)) {
622 		spcl.c_type = TS_ADDR;
623 		/* LINTED: result fits in a short */
624 		spcl.c_dinode.di_mode &= ~S_IFMT;
625 		/* LINTED: result fits in a short */
626 		spcl.c_dinode.di_mode |= IFSHAD;
627 	}
628 
629 	/*
630 	 * Only TS_INODEs should have short metadata, if this
631 	 * isn't such a spclrec, clear the metadata flag and
632 	 * the c_shadow contents.
633 	 */
634 	if (!(spcl.c_type == TS_INODE && (spcl.c_flags & DR_HASMETA))) {
635 		spcl.c_flags &= ~DR_HASMETA;
636 		bcopy(c_shadow_save, &(spcl.c_shadow),
637 		    sizeof (spcl.c_shadow));
638 	}
639 
640 	if (spcl.c_type == TS_END) {
641 		spcl.c_count = 1;
642 		spcl.c_flags |= DR_INODEINFO;
643 		bcopy((char *)inos, (char *)spcl.c_inos, sizeof (inos));
644 	} else if (spcl.c_type == TS_TAPE) {
645 		spcl.c_flags |= DR_NEWHEADER;
646 		if (doingactive)
647 			spcl.c_flags |= DR_REDUMP;
648 	} else if (spcl.c_type != TS_INODE)
649 		flags = BUF_SPCLREC;
650 	spcl.c_tapea = *tapea;
651 	/* LINTED for now, max inode # is 2**31 (ufs max size is 4TB) */
652 	spcl.c_inumber = (ino32_t)ino;
653 	spcl.c_magic = (tp_bsize == TP_BSIZE_MIN) ? NFS_MAGIC : MTB_MAGIC;
654 	spcl.c_checksum = 0;
655 	ip = (int32_t *)&spcl;
656 	s = CHECKSUM;
657 	assert((tp_bsize % sizeof (*ip)) == 0);
658 	i = tp_bsize / sizeof (*ip);
659 	assert((i%8) == 0);
660 	i /= 8;
661 	do {
662 		s -= *ip++; s -= *ip++; s -= *ip++; s -= *ip++;
663 		s -= *ip++; s -= *ip++; s -= *ip++; s -= *ip++;
664 	} while (--i > 0);
665 	spcl.c_checksum = s;
666 	taprec((uchar_t *)&spcl, flags, sizeof (spcl));
667 	if (spcl.c_type == TS_END)
668 		spcl.c_flags &= ~DR_INODEINFO;
669 	else if (spcl.c_type == TS_TAPE)
670 		spcl.c_flags &= ~(DR_NEWHEADER|DR_REDUMP|DR_TRUEINC);
671 }
672 
673 /*
674  * Fill appropriate buffer
675  */
676 void
677 taprec(uchar_t *dp, int flags, int size)
678 {
679 	if (size > tp_bsize) {
680 		msg(gettext(
681 		    "taprec: Unexpected buffer size, expected %d, got %d.\n"),
682 		    tp_bsize, size);
683 		dumpabort();
684 		/*NOTREACHED*/
685 	}
686 
687 	while ((*current)->b_flags & BUF_FULL)
688 		nap(10);
689 
690 	bcopy(dp, (*current)->b_data, (size_t)size);
691 	if (size < tp_bsize) {
692 		bzero((*current)->b_data + size, tp_bsize - size);
693 	}
694 
695 	if (dumptoarchive)
696 		flags |= BUF_ARCHIVE;
697 
698 	/* no locking as we assume only one reader and one writer active */
699 	(*current)->b_flags = (flags | BUF_FULL);
700 	if (++*current >= &bufp[NBUF*ntrec])
701 		(*current) = &bufp[0];
702 	(*tapea)++;
703 }
704 
705 void
706 dmpblk(daddr32_t blkno, size_t size, off_t offset)
707 {
708 	diskaddr_t dblkno;
709 
710 	assert((offset >> DEV_BSHIFT) <= INT32_MAX);
711 	dblkno = fsbtodb(sblock, blkno) + (offset >> DEV_BSHIFT);
712 	size = (size + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
713 	slp->sl_req->br_dblk = dblkno;
714 	slp->sl_req->br_size = size;
715 	if (dumptoarchive) {
716 		/* LINTED: result fits in a short */
717 		slp->sl_req->aflag |= BUF_ARCHIVE;
718 	}
719 	toslave((void(*)())0, ino);
720 }
721 
722 /*ARGSUSED*/
723 static void
724 tperror(int sig)
725 {
726 	char buf[3000];
727 
728 	if (pipeout) {
729 		msg(gettext("Write error on %s\n"), tape);
730 		msg(gettext("Cannot recover\n"));
731 		dumpabort();
732 		/* NOTREACHED */
733 	}
734 	if (!doingverify) {
735 		broadcast(gettext("WRITE ERROR!\n"));
736 		(void) snprintf(buf, sizeof (buf),
737 		    gettext("Do you want to restart?: (\"yes\" or \"no\") "));
738 		if (!query(buf)) {
739 			dumpabort();
740 			/*NOTREACHED*/
741 		}
742 		if (tapeout && (isrewind(to) || offline)) {
743 			/* ANSI string catenation, to shut cstyle up */
744 			msg(gettext("This tape will rewind.  After "
745 				    "it is rewound,\nreplace the faulty tape "
746 				    "with a new one;\nthis dump volume will "
747 				    "be rewritten.\n"));
748 		}
749 	} else {
750 		broadcast(gettext("TAPE VERIFICATION ERROR!\n"));
751 		(void) snprintf(buf, sizeof (buf), gettext(
752 		    "Do you want to rewrite?: (\"yes\" or \"no\") "));
753 		if (!query(buf)) {
754 			dumpabort();
755 			/*NOTREACHED*/
756 		}
757 		msg(gettext(
758 			"This tape will be rewritten and then verified\n"));
759 	}
760 	killall();
761 	trewind();
762 	Exit(X_REWRITE);
763 }
764 
765 /*
766  * Called by master from pass() to send a request to dump files/blocks
767  * to one of the slaves.  Slaves return whether the file was active
768  * when it was being dumped.  The tape writer process sends checkpoint
769  * info when it completes a volume.
770  */
771 void
772 toslave(void (*fn)(), ino_t inumber)
773 {
774 	int	wasactive;
775 
776 	if (recsout >= SLAVES) {
777 		if ((unsigned)atomic((int(*)())read, slp->sl_slavefd,
778 		    (char *)&wasactive, sizeof (wasactive)) !=
779 		    sizeof (wasactive)) {
780 			cmdrderr();
781 			dumpabort();
782 			/*NOTREACHED*/
783 		}
784 		if (wasactive) {
785 			active++;
786 			msg(gettext(
787 		"The file at inode `%lu' was active and will be recopied\n"),
788 				slp->sl_req->ir_inumber);
789 			/* LINTED: 32-bit to 8-bit assignment ok */
790 			BIS(slp->sl_req->ir_inumber, activemap);
791 		}
792 	}
793 	slp->sl_req->aflag = 0;
794 	if (dumptoarchive) {
795 		/* LINTED: result fits in a short */
796 		slp->sl_req->aflag |= BUF_ARCHIVE;
797 	}
798 	if (fn)
799 		(*fn)(inumber);
800 
801 	if (atomic((int(*)())write, slp->sl_slavefd, (char *)slp->sl_req,
802 	    reqsiz) != reqsiz) {
803 		cmdwrterr();
804 		dumpabort();
805 		/*NOTREACHED*/
806 	}
807 	++recsout;
808 	nextslave();
809 }
810 
811 void
812 dospcl(ino_t inumber)
813 {
814 	/* LINTED for now, max inode # is 2**31 (ufs max size is 1TB) */
815 	spcl.c_inumber = (ino32_t)inumber;
816 	slp->sl_req->br_dblk = 0;
817 	bcopy((char *)&spcl, (char *)slp->sl_req->br_spcl, tp_bsize);
818 }
819 
820 static void
821 #ifdef __STDC__
822 nextslave(void)
823 #else
824 nextslave()
825 #endif
826 {
827 	if (++rotor >= SLAVES) {
828 		rotor = 0;
829 	}
830 	slp = &slaves[rotor];
831 }
832 
833 void
834 #ifdef __STDC__
835 flushcmds(void)
836 #else
837 flushcmds()
838 #endif
839 {
840 	int i;
841 	int wasactive;
842 
843 	/*
844 	 * Retrieve all slave status
845 	 */
846 	if (recsout < SLAVES) {
847 		slp = slaves;
848 		rotor = 0;
849 	}
850 	for (i = 0; i < (recsout < SLAVES ? recsout : SLAVES); i++) {
851 		if ((unsigned)atomic((int(*)())read, slp->sl_slavefd,
852 		    (char *)&wasactive, sizeof (wasactive)) !=
853 		    sizeof (wasactive)) {
854 			cmdrderr();
855 			dumpabort();
856 			/*NOTREACHED*/
857 		}
858 		if (wasactive) {
859 			active++;
860 			msg(gettext(
861 			    "inode %d was active and will be recopied\n"),
862 				slp->sl_req->ir_inumber);
863 			/* LINTED: 32-bit to 8-bit assignment ok */
864 			BIS(slp->sl_req->ir_inumber, activemap);
865 		}
866 		nextslave();
867 	}
868 }
869 
870 void
871 #ifdef __STDC__
872 flusht(void)
873 #else
874 flusht()
875 #endif
876 {
877 	sigset_t block_set, oset;	/* hold SIGUSR1 and atomically sleep */
878 
879 	(void) sigemptyset(&block_set);
880 	(void) sigaddset(&block_set, SIGUSR1);
881 	(void) sigprocmask(SIG_BLOCK, &block_set, &oset);
882 	(void) kill(writepid, SIGUSR1);	/* tell writer to flush */
883 	(void) sigpause(SIGUSR1);	/* wait for SIGUSR1 from writer */
884 	/*NOTREACHED*/
885 }
886 
887 jmp_buf	checkpoint_buf;
888 
889 /*
890  * Roll forward to the next volume after receiving
891  * an EOT signal from writer.  Get checkpoint data
892  * from writer and return if done, otherwise fork
893  * a new process and jump back to main state loop
894  * to begin the next volume.  Installed as the master's
895  * signal handler for SIGUSR1.
896  */
897 /*ARGSUSED*/
898 static void
899 rollforward(int sig)
900 {
901 	int status;
902 	(void) sighold(SIGUSR1);
903 
904 	/*
905 	 * Writer sends us checkpoint information after
906 	 * each volume.  A returned state of DS_DONE with no
907 	 * unwritten (left-over) records differentiates a
908 	 * clean flush from one in which EOT was encountered.
909 	 */
910 	if ((unsigned)atomic((int(*)())read, writer, (char *)&chkpt,
911 	    sizeof (struct slaves)) != sizeof (struct slaves)) {
912 		cmdrderr();
913 		dumpabort();
914 		/*NOTREACHED*/
915 	}
916 	if (atomic((int(*)())read, writer, (char *)&spcl,
917 	    TP_BSIZE_MIN) != TP_BSIZE_MIN) {
918 		cmdrderr();
919 		dumpabort();
920 		/*NOTREACHED*/
921 	}
922 	ino = chkpt.sl_inos - 1;
923 	pos = chkpt.sl_offset;
924 	leftover = chkpt.sl_count;
925 	dumpstate = chkpt.sl_state;
926 	blockswritten = ++chkpt.sl_tapea;
927 
928 	if (dumpstate == DS_DONE) {
929 		if (archivepid) {
930 			/*
931 			 * If archiving (either archive or
932 			 * database), signal the archiver
933 			 * to finish up.  This must happen
934 			 * before the writer exits in order
935 			 * to avoid a race.
936 			 */
937 			(void) kill(archivepid, SIGUSR1);
938 		}
939 		(void) signal(SIGUSR1, SIG_IGN);
940 		(void) sigrelse(SIGUSR1);
941 		(void) kill(writepid, SIGUSR1);	/* tell writer to exit */
942 
943 		lf_archoffset = 0LL;
944 		longjmp(checkpoint_buf, 1);
945 		/*NOTREACHED*/
946 	}
947 
948 	if (leftover) {
949 		(void) memmove(spcl.c_addr,
950 		    &spcl.c_addr[spcl.c_count-leftover], leftover);
951 		bzero(&spcl.c_addr[leftover], TP_NINDIR-leftover);
952 	}
953 	if (writepid) {
954 		(void) kill(writepid, SIGUSR1);	/* tell writer to exit */
955 		(void) close(writer);
956 		writer = -1;
957 	}
958 	if (archivepid) {
959 		(void) waitpid(archivepid, &status, 0);	/* wait for archiver */
960 #ifdef TDEBUG
961 
962 		/* XGETTEXT:  #ifdef TDEBUG only */
963 		msg(gettext("Archiver %ld returns with status %d\n"),
964 		    (long)archivepid, status);
965 #endif
966 		archivepid = 0;
967 	}
968 	/*
969 	 * Checkpoint archive file
970 	 */
971 	if (!doingverify && archive) {
972 		lf_archoffset = lseek64(archivefd, (off64_t)0, 2);
973 		if (lf_archoffset < 0) {
974 			int saverr = errno;
975 			msg(gettext("Cannot position archive file `%s': %s\n"),
976 				archivefile, strerror(saverr));
977 			dumpabort();
978 			/*NOTREACHED*/
979 		}
980 		(void) close(archivefd);
981 		archivefd = -1;
982 	}
983 	resetino(ino);
984 
985 	if (dumpstate == DS_START) {
986 		msg(gettext(
987 			"Tape too short: changing volumes and restarting\n"));
988 		reset();
989 	}
990 
991 	if (!pipeout) {
992 		if (verify && !doingverify)
993 			trewind();
994 		else {
995 			close_rewind();
996 			changevol();
997 		}
998 	}
999 
1000 	(void) sigrelse(SIGUSR1);
1001 	otape(0);
1002 	longjmp(checkpoint_buf, 1);
1003 	/*NOTREACHED*/
1004 }
1005 
1006 static void
1007 nap(int ms)
1008 {
1009 	struct timeval tv;
1010 
1011 	tv.tv_sec = ms / 1000;
1012 	tv.tv_usec = (ms - tv.tv_sec * 1000) * 1000;
1013 	(void) select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv);
1014 }
1015 
1016 static jmp_buf alrm_buf;
1017 
1018 /*ARGSUSED*/
1019 static void
1020 alrm(int sig)
1021 {
1022 	longjmp(alrm_buf, 1);
1023 	/*NOTREACHED*/
1024 }
1025 
1026 void
1027 #ifdef __STDC__
1028 nextdevice(void)
1029 #else
1030 nextdevice()
1031 #endif
1032 {
1033 	char	*cp;
1034 
1035 	if (host != NULL)	/* we set the host only once in ufsdump */
1036 		return;
1037 
1038 	host = NULL;
1039 	if (strchr(tape, ':')) {
1040 		if (diskette) {
1041 			msg(gettext("Cannot do remote dump to diskette\n"));
1042 			Exit(X_ABORT);
1043 		}
1044 		host = tape;
1045 		tape = strchr(host, ':');
1046 		*tape++ = 0;
1047 		cp = strchr(host, '@');	/* user@host? */
1048 		if (cp != (char *)0)
1049 			cp++;
1050 		else
1051 			cp = host;
1052 	} else
1053 		cp = spcl.c_host;
1054 	/*
1055 	 * dumpdev is provided for use in prompts and is of
1056 	 * the form:
1057 	 *	hostname:device
1058 	 * sdumpdev is of the form:
1059 	 *	hostname:device
1060 	 * for remote devices, and simply:
1061 	 *	device
1062 	 * for local devices.
1063 	 */
1064 	if (dumpdev != (char *)NULL) {
1065 		/* LINTED: dumpdev is not NULL */
1066 		free(dumpdev);
1067 	}
1068 	/*LINTED [cast to smaller integer]*/
1069 	dumpdev = xmalloc((size_t)((sizeof (spcl.c_host) + strlen(tape) + 2)));
1070 	/* LINTED unsigned -> signed cast ok */
1071 	(void) sprintf(dumpdev, "%.*s:%s", (int)sizeof (spcl.c_host), cp, tape);
1072 	if (cp == spcl.c_host)
1073 		sdumpdev = strchr(dumpdev, ':') + 1;
1074 	else
1075 		sdumpdev = dumpdev;
1076 }
1077 
1078 /*
1079  * Gross hack due to misfeature of mt tape driver that causes
1080  * the device to rewind if we generate any signals.  Guess
1081  * whether tape is rewind device or not -- for local devices
1082  * we can just look at the minor number.  For rmt devices,
1083  * make an educated guess.
1084  */
1085 int
1086 isrewind(int f)
1087 {
1088 	struct stat64 sbuf;
1089 	char    *c;
1090 	int	unit;
1091 	int	rewind;
1092 
1093 	if (host) {
1094 		c = strrchr(tape, '/');
1095 		if (c == NULL)
1096 			c = tape;
1097 		else
1098 			c++;
1099 		/*
1100 		 * If the last component begins or ends with an 'n', it is
1101 		 * assumed to be a non-rewind device.
1102 		 */
1103 		if (c[0] == 'n' || c[strlen(c)-1] == 'n')
1104 			rewind = 0;
1105 		else if ((strstr(tape, "mt") || strstr(tape, "st")) &&
1106 		    sscanf(tape, "%*[a-zA-Z/]%d", &unit) == 1 &&
1107 		    (unit & MT_NOREWIND))
1108 			rewind = 0;
1109 		else
1110 			rewind = 1;
1111 	} else {
1112 		if (fstat64(f, &sbuf) < 0) {
1113 			msg(gettext(
1114 			    "Cannot obtain status of output device `%s'\n"),
1115 				tape);
1116 			dumpabort();
1117 			/*NOTREACHED*/
1118 		}
1119 		rewind = minor(sbuf.st_rdev) & MT_NOREWIND ? 0 : 1;
1120 	}
1121 	return (rewind);
1122 }
1123 
1124 static void
1125 #ifdef __STDC__
1126 just_rewind(void)
1127 #else
1128 just_rewind()
1129 #endif
1130 {
1131 	struct slaves *slavep;
1132 	char *rewinding = gettext("Tape rewinding\n");
1133 
1134 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
1135 		if (slavep->sl_slavepid > 0)	/* signal normal exit */
1136 			(void) kill(slavep->sl_slavepid, SIGTERM);
1137 		if (slavep->sl_slavefd >= 0) {
1138 			(void) close(slavep->sl_slavefd);
1139 			slavep->sl_slavefd = -1;
1140 		}
1141 	}
1142 
1143 	/* wait for any signals from slaves */
1144 	while (waitpid(0, (int *)0, 0) >= 0)
1145 		/*LINTED [empty body]*/
1146 		continue;
1147 
1148 	if (pipeout)
1149 		return;
1150 
1151 	if (doingverify) {
1152 		/*
1153 		 * Space to the end of the tape.
1154 		 * Backup first in case we already read the EOF.
1155 		 */
1156 		if (host) {
1157 			(void) rmtioctl(MTBSR, 1);
1158 			if (rmtioctl(MTEOM, 1) < 0)
1159 				(void) rmtioctl(MTFSF, 1);
1160 		} else {
1161 			static struct mtop bsr = { MTBSR, 1 };
1162 			static struct mtop eom = { MTEOM, 1 };
1163 			static struct mtop fsf = { MTFSF, 1 };
1164 
1165 			(void) ioctl(to, MTIOCTOP, &bsr);
1166 			if (ioctl(to, MTIOCTOP, &eom) < 0)
1167 				(void) ioctl(to, MTIOCTOP, &fsf);
1168 		}
1169 	}
1170 
1171 	/*
1172 	 * Guess whether the tape is rewinding so we can tell
1173 	 * the operator if it's going to take a long time.
1174 	 */
1175 	if (tapeout && isrewind(to)) {
1176 		/* tape is probably rewinding */
1177 		msg(rewinding);
1178 	}
1179 }
1180 
1181 void
1182 #ifdef __STDC__
1183 trewind(void)
1184 #else
1185 trewind()
1186 #endif
1187 {
1188 	(void) timeclock((time_t)0);
1189 	if (offline && (!verify || doingverify)) {
1190 		close_rewind();
1191 	} else {
1192 		just_rewind();
1193 		if (host)
1194 			rmtclose();
1195 		else {
1196 			(void) close(to);
1197 			to = -1;
1198 		}
1199 	}
1200 }
1201 
1202 void
1203 #ifdef __STDC__
1204 close_rewind(void)
1205 #else
1206 close_rewind()
1207 #endif
1208 {
1209 	char *rewinding = gettext("Tape rewinding\n");
1210 
1211 	(void) timeclock((time_t)0);
1212 	just_rewind();
1213 	/*
1214 	 * The check in just_rewind won't catch the case in
1215 	 * which the current volume is being taken off-line
1216 	 * and is not mounted on a no-rewind device (and is
1217 	 * not the last volume, which is not taken off-line).
1218 	 */
1219 	if (tapeout && !isrewind(to) && offline) {
1220 		/* tape is probably rewinding */
1221 		msg(rewinding);
1222 	}
1223 	if (host) {
1224 		if (offline || autoload)
1225 			(void) rmtioctl(MTOFFL, 0);
1226 		rmtclose();
1227 	} else {
1228 		if (offline || autoload) {
1229 			static struct mtop offl = { MTOFFL, 0 };
1230 
1231 			(void) ioctl(to, MTIOCTOP, &offl);
1232 			if (diskette)
1233 				(void) ioctl(to, FDEJECT, 0);
1234 		}
1235 		(void) close(to);
1236 		to = -1;
1237 	}
1238 }
1239 
1240 void
1241 #ifdef __STDC__
1242 changevol(void)
1243 #else
1244 changevol()
1245 #endif
1246 {
1247 	char buf1[3000], buf2[3000];
1248 	char volname[LBLSIZE+1];
1249 
1250 	/*CONSTANTCONDITION*/
1251 	assert(sizeof (spcl.c_label) < sizeof (volname));
1252 
1253 	filenum = 1;
1254 	nextdevice();
1255 	(void) strcpy(spcl.c_label, tlabel);
1256 	if (host) {
1257 		char	*rhost = host;
1258 		char	*cp = strchr(host, '@');
1259 		if (cp == (char *)0)
1260 			cp = host;
1261 		else
1262 			cp++;
1263 
1264 		if (rmthost(rhost, ntrec) == 0) {
1265 			msg(gettext("Cannot connect to tape host `%s'\n"), cp);
1266 			dumpabort();
1267 			/*NOTREACHED*/
1268 		}
1269 		if (rhost != host)
1270 			free(rhost);
1271 	}
1272 
1273 	/*
1274 	 * Make volume switching as automatic as possible
1275 	 * while avoiding overwriting volumes.  We will
1276 	 * switch automatically under the following condition:
1277 	 *    1) The user specified autoloading from the
1278 	 *	command line.
1279 	 * At one time, we (in the guise of hsmdump) had the
1280 	 * concept of a sequence of devices to rotate through,
1281 	 * but that's never been a ufsdump feature.
1282 	 */
1283 	if (autoload) {
1284 		int tries;
1285 
1286 		/*
1287 		 * Stop the clock for throughput calculations.
1288 		 */
1289 		if ((telapsed != NULL) && (tstart_writing != NULL)) {
1290 			*telapsed += time((time_t *)NULL) - *tstart_writing;
1291 		}
1292 
1293 		(void) snprintf(volname, sizeof (volname), "#%d", tapeno+1);
1294 		(void) snprintf(buf1, sizeof (buf1), gettext(
1295 		    "Mounting volume %s on %s\n"), volname, dumpdev);
1296 		msg(buf1);
1297 		broadcast(buf1);
1298 
1299 		/*
1300 		 * Wait for the tape to autoload.  Note that the delay
1301 		 * period doesn't take into account however long it takes
1302 		 * for the open to fail (measured at 21 seconds for an
1303 		 * Exabyte 8200 under 2.7 on an Ultra 2).
1304 		 */
1305 		for (tries = 0; tries < autoload_tries; tries++) {
1306 			if (host) {
1307 				if (rmtopen(tape, O_RDONLY) >= 0) {
1308 					rmtclose();
1309 					return;
1310 				}
1311 			} else {
1312 				int f, m;
1313 
1314 				m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
1315 				if ((f = doingverify ?
1316 				    safe_device_open(tape, O_RDONLY, 0600) :
1317 				    safe_device_open(tape, O_RDONLY|m, 0600))
1318 				    >= 0) {
1319 					(void) close(f);
1320 					return;
1321 				}
1322 			}
1323 			(void) sleep(autoload_period);
1324 		}
1325 		/*
1326 		 * Autoload timed out, ask the operator to do it.
1327 		 * Note that query() will update *telapsed, and we
1328 		 * shouldn't charge for the autoload time.  So, since
1329 		 * we updated *telapsed ourselves above, we just set
1330 		 * tstart_writing to the current time, and query()
1331 		 * will end up making a null-effect change.  This,
1332 		 * of course, assumes that our caller will be resetting
1333 		 * *tstart_writing.  This is currently the case.
1334 		 * If tstart_writing is NULL (should never happen),
1335 		 * we're ok, since time(2) will accept a NULL pointer.
1336 		 */
1337 		(void) time(tstart_writing);
1338 	}
1339 
1340 	if (strncmp(spcl.c_label, "none", 5)) {
1341 		(void) strncpy(volname, spcl.c_label, sizeof (spcl.c_label));
1342 		volname[sizeof (spcl.c_label)] = '\0';
1343 	} else
1344 		(void) snprintf(volname, sizeof (volname), "#%d", tapeno+1);
1345 
1346 	timeest(1, spcl.c_tapea);
1347 	(void) snprintf(buf1, sizeof (buf1), gettext(
1348 	    "Change Volumes: Mount volume `%s' on `%s'\n"), volname, dumpdev);
1349 	msg(buf1);
1350 	broadcast(gettext("CHANGE VOLUMES!\7\7\n"));
1351 	(void) snprintf(buf1, sizeof (buf1), gettext(
1352 	    "Is the new volume (%s) mounted on `%s' and ready to go?: %s"),
1353 	    volname, dumpdev, gettext("(\"yes\" or \"no\") "));
1354 	while (!query(buf1)) {
1355 		(void) snprintf(buf2, sizeof (buf2), gettext(
1356 		    "Do you want to abort dump?: (\"yes\" or \"no\") "));
1357 		if (query(buf2)) {
1358 			dumpabort();
1359 			/*NOTREACHED*/
1360 		}
1361 	}
1362 }
1363 
1364 /*
1365  *	We implement taking and restoring checkpoints on the tape level.
1366  *	When each tape is opened, a new process is created by forking; this
1367  *	saves all of the necessary context in the parent.  The child
1368  *	continues the dump; the parent waits around, saving the context.
1369  *	If the child returns X_REWRITE, then it had problems writing that tape;
1370  *	this causes the parent to fork again, duplicating the context, and
1371  *	everything continues as if nothing had happened.
1372  */
1373 
1374 void
1375 otape(int top)
1376 {
1377 	static struct mtget mt;
1378 	char buf[3000];
1379 	pid_t parentpid;
1380 	pid_t childpid;
1381 	pid_t waitproc;
1382 	int status;
1383 	struct sigvec sv, osv;
1384 
1385 	sv.sv_flags = SA_RESTART;
1386 	(void) sigemptyset(&sv.sa_mask);
1387 	sv.sv_handler = SIG_IGN;
1388 	(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1389 
1390 	parentpid = getpid();
1391 
1392 	if (verify) {
1393 		if (doingverify)
1394 			doingverify = 0;
1395 		else
1396 			Exit(X_VERIFY);
1397 	}
1398 restore_check_point:
1399 
1400 	sv.sv_handler = interrupt;
1401 	(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1402 	(void) fflush(stderr);
1403 	/*
1404 	 *	All signals are inherited...
1405 	 */
1406 	sighold(SIGINT);
1407 	childpid = fork();
1408 	if (childpid < 0) {
1409 		msg(gettext(
1410 		    "Context-saving fork failed in parent %ld\n"),
1411 			(long)parentpid);
1412 		Exit(X_ABORT);
1413 	}
1414 	if (childpid != 0) {
1415 		/*
1416 		 *	PARENT:
1417 		 *	save the context by waiting
1418 		 *	until the child doing all of the work returns.
1419 		 *	let the child catch user interrupts
1420 		 */
1421 		sv.sv_handler = SIG_IGN;
1422 		(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1423 		sigrelse(SIGINT);
1424 #ifdef TDEBUG
1425 
1426 		/* XGETTEXT:  #ifdef TDEBUG only */
1427 		msg(gettext(
1428 		    "Volume: %d; parent process: %ld child process %ld\n"),
1429 			tapeno+1, (long)parentpid, (long)childpid);
1430 #endif /* TDEBUG */
1431 		for (;;) {
1432 			waitproc = waitpid(0, &status, 0);
1433 			if (waitproc == childpid)
1434 				break;
1435 			msg(gettext(
1436 	"Parent %ld waiting for child %ld had another child %ld return\n"),
1437 			    (long)parentpid, (long)childpid, (long)waitproc);
1438 		}
1439 		if (WIFSIGNALED(status)) {
1440 			msg(gettext("Process %ld killed by signal %d: %s\n"),
1441 			    (long)childpid, WTERMSIG(status),
1442 			    strsignal(WTERMSIG(status)));
1443 			status = X_ABORT;
1444 		} else
1445 			status = WEXITSTATUS(status);
1446 #ifdef TDEBUG
1447 		switch (status) {
1448 		case X_FINOK:
1449 			/* XGETTEXT:  #ifdef TDEBUG only */
1450 			msg(gettext(
1451 			    "Child %ld finishes X_FINOK\n"), (long)childpid);
1452 			break;
1453 		case X_ABORT:
1454 			/* XGETTEXT:  #ifdef TDEBUG only */
1455 			msg(gettext(
1456 			    "Child %ld finishes X_ABORT\n"), (long)childpid);
1457 			break;
1458 		case X_REWRITE:
1459 			/* XGETTEXT:  #ifdef TDEBUG only */
1460 			msg(gettext(
1461 			    "Child %ld finishes X_REWRITE\n"), (long)childpid);
1462 			break;
1463 		case X_RESTART:
1464 			/* XGETTEXT:  #ifdef TDEBUG only */
1465 			msg(gettext(
1466 			    "Child %ld finishes X_RESTART\n"), (long)childpid);
1467 			break;
1468 		case X_VERIFY:
1469 			/* XGETTEXT:  #ifdef TDEBUG only */
1470 			msg(gettext(
1471 			    "Child %ld finishes X_VERIFY\n"), (long)childpid);
1472 			break;
1473 		default:
1474 			/* XGETTEXT:  #ifdef TDEBUG only */
1475 			msg(gettext("Child %ld finishes unknown %d\n"),
1476 			    (long)childpid, status);
1477 			break;
1478 		}
1479 #endif /* TDEBUG */
1480 		switch (status) {
1481 		case X_FINOK:
1482 			/* wait for children */
1483 			while (waitpid(0, (int *)0, 0) >= 0)
1484 				/*LINTED [empty body]*/
1485 				continue;
1486 			Exit(X_FINOK);
1487 			/*NOTREACHED*/
1488 		case X_ABORT:
1489 			Exit(X_ABORT);
1490 			/*NOTREACHED*/
1491 		case X_VERIFY:
1492 			doingverify++;
1493 			goto restore_check_point;
1494 			/*NOTREACHED*/
1495 		case X_REWRITE:
1496 			doingverify = 0;
1497 			changevol();
1498 			goto restore_check_point;
1499 			/* NOTREACHED */
1500 		case X_RESTART:
1501 			doingverify = 0;
1502 			if (!top) {
1503 				Exit(X_RESTART);
1504 			}
1505 			if (!offline)
1506 				autoload = 0;
1507 			changevol();
1508 			sv.sv_handler = interrupt;
1509 			(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1510 			return;
1511 			/* NOTREACHED */
1512 		default:
1513 			msg(gettext("Bad return code from dump: %d\n"), status);
1514 			Exit(X_ABORT);
1515 			/*NOTREACHED*/
1516 		}
1517 		/*NOTREACHED*/
1518 	} else {	/* we are the child; just continue */
1519 		child_chdir();
1520 		sigrelse(SIGINT);
1521 #ifdef TDEBUG
1522 		(void) sleep(4); /* time for parent's message to get out */
1523 		/* XGETTEXT:  #ifdef TDEBUG only */
1524 		msg(gettext(
1525 		    "Child on Volume %d has parent %ld, my pid = %ld\n"),
1526 			tapeno+1, (long)parentpid, (long)getpid());
1527 #endif
1528 		(void) snprintf(buf, sizeof (buf), gettext(
1529 "Cannot open `%s'.  Do you want to retry the open?: (\"yes\" or \"no\") "),
1530 		    dumpdev);
1531 		if (doingverify) {
1532 			/* 1 for stdout */
1533 			while ((to = host ? rmtopen(tape, O_RDONLY) :
1534 			    pipeout ? 1 :
1535 			    safe_device_open(tape, O_RDONLY, 0600)) < 0) {
1536 				perror(tape);
1537 				if (autoload) {
1538 					if (!query_once(buf, 1)) {
1539 						dumpabort();
1540 						/*NOTREACHED*/
1541 					}
1542 				} else {
1543 					if (!query(buf)) {
1544 						dumpabort();
1545 						/*NOTREACHED*/
1546 					}
1547 				}
1548 			}
1549 
1550 			/*
1551 			 * If we're using the non-rewinding tape device,
1552 			 * the tape will be left positioned after the
1553 			 * EOF mark.  We need to back up to the beginning
1554 			 * of this tape file (cross two tape marks in the
1555 			 * reverse direction and one in the forward
1556 			 * direction) before the verify pass.
1557 			 */
1558 			if (host) {
1559 				if (rmtioctl(MTBSF, 2) >= 0)
1560 					(void) rmtioctl(MTFSF, 1);
1561 				else
1562 					(void) rmtioctl(MTNBSF, 1);
1563 			} else {
1564 				static struct mtop bsf = { MTBSF, 2 };
1565 				static struct mtop fsf = { MTFSF, 1 };
1566 				static struct mtop nbsf = { MTNBSF, 1 };
1567 
1568 				if (ioctl(to, MTIOCTOP, &bsf) >= 0)
1569 					(void) ioctl(to, MTIOCTOP, &fsf);
1570 				else
1571 					(void) ioctl(to, MTIOCTOP, &nbsf);
1572 			}
1573 		} else {
1574 			/*
1575 			 * XXX Add logic to test for "tape" being a
1576 			 * XXX device or a non-existent file.
1577 			 * Current behaviour is that it must exist,
1578 			 * and we over-write whatever's there.
1579 			 * This can be bad if tape == "/etc/passwd".
1580 			 */
1581 			if (!pipeout && doposition && (tapeno == 0)) {
1582 				positiontape(buf);
1583 				if (setjmp(alrm_buf)) {
1584 					/*
1585 					 * The tape is rewinding;
1586 					 * we're screwed.
1587 					 */
1588 				    msg(gettext(
1589 			    "Cannot position tape using rewind device!\n"));
1590 				    dumpabort();
1591 				    /*NOTREACHED*/
1592 				} else {
1593 					sv.sv_handler = alrm;
1594 					(void) sigvec(SIGALRM, &sv, &osv);
1595 					(void) alarm(15);
1596 				}
1597 				while ((to = host ? rmtopen(tape, O_WRONLY) :
1598 				    safe_device_open(tape, O_WRONLY, 0600)) < 0)
1599 					(void) sleep(10);
1600 				(void) alarm(0);
1601 				(void) sigvec(SIGALRM, &osv,
1602 				    (struct sigvec *)0);
1603 			} else {
1604 				int m;
1605 				m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
1606 				/*
1607 				 * Only verify the tape label if label
1608 				 * verification is on and we are at BOT
1609 				 */
1610 				if (pipeout)
1611 					to = 1;
1612 				else while ((to = host ?
1613 				    rmtopen(tape, O_WRONLY) :
1614 				    safe_device_open(tape, O_WRONLY|m, 0600))
1615 				    < 0)
1616 					if (!query_once(buf, 1)) {
1617 						dumpabort();
1618 						/*NOTREACHED*/
1619 					}
1620 			}
1621 		}
1622 		if (!pipeout) {
1623 			tapeout = host ? rmtstatus(&mt) >= 0 :
1624 			    ioctl(to, MTIOCGET, &mt) >= 0;	/* set state */
1625 			/*
1626 			 * Make sure the tape is positioned
1627 			 * where it is supposed to be
1628 			 */
1629 			if (tapeout && (tapeno > 0) &&
1630 			    (mt.mt_fileno != (filenum-1))) {
1631 				(void) snprintf(buf, sizeof (buf), gettext(
1632 				    "Warning - tape positioning error!\n\
1633 \t%s current file %ld, should be %ld\n"),
1634 				    tape, mt.mt_fileno+1, filenum);
1635 				msg(buf);
1636 				dumpailing();
1637 			}
1638 		}
1639 		tapeno++;		/* current tape sequence */
1640 		if (tapeno < TP_NINOS)
1641 			inos[tapeno] = chkpt.sl_inos;
1642 		spcl.c_firstrec = chkpt.sl_firstrec;
1643 		spcl.c_tapea = (*tapea) = chkpt.sl_tapea;
1644 		spcl.c_volume++;
1645 
1646 		enslave();	/* Share tape buffers with slaves */
1647 
1648 #ifdef DEBUG
1649 		if (xflag) {
1650 			/* XGETTEXT:  #ifdef DEBUG only */
1651 			msg(gettext("Checkpoint state:\n"));
1652 			msg("    blockswritten %u\n", blockswritten);
1653 			msg("    ino %u\n", ino);
1654 			msg("    pos %u\n", pos);
1655 			msg("    left %u\n", leftover);
1656 			msg("    tapea %u\n", (*tapea));
1657 			msg("    state %d\n", dumpstate);
1658 		}
1659 #endif
1660 		spcl.c_type = TS_TAPE;
1661 		spcl.c_tpbsize = tp_bsize;
1662 		if (leftover == 0) {
1663 			spcl.c_count = 0;
1664 			spclrec();
1665 			newtape = 0;
1666 		} else
1667 			newtape++;	/* new volume indication */
1668 		if (doingverify) {
1669 			msg(gettext("Starting verify pass\n"));
1670 		} else if (tapeno > 1) {
1671 			msg(gettext(
1672 			    "Volume %d begins with blocks from inode %lu\n"),
1673 				tapeno, chkpt.sl_inos);
1674 		}
1675 		(void) timeclock((time_t)1);
1676 		(void) time(tstart_writing);
1677 		timeest(0, spcl.c_tapea);
1678 	}
1679 }
1680 
1681 void
1682 #ifdef __STDC__
1683 dumpabort(void)
1684 #else
1685 dumpabort()
1686 #endif
1687 {
1688 
1689 	if (master && master != getpid())
1690 		/*
1691 		 * signal master to call dumpabort
1692 		 */
1693 		(void) kill(master, SIGTERM);
1694 	else {
1695 		killall();
1696 
1697 		if (archivefile)
1698 			(void) unlink(archivefile);
1699 		msg(gettext("The ENTIRE dump is aborted.\n"));
1700 	}
1701 	Exit(X_ABORT);
1702 }
1703 
1704 void
1705 dumpailing(void)
1706 {
1707 
1708 	broadcast(gettext("DUMP IS AILING!\n"));
1709 	if (!query(gettext(
1710 	    "Do you want to attempt to continue? (\"yes\" or \"no\") "))) {
1711 		dumpabort();
1712 		/*NOTREACHED*/
1713 	}
1714 }
1715 
1716 void
1717 Exit(status)
1718 {
1719 	/*
1720 	 * Clean up message system
1721 	 */
1722 #ifdef TDEBUG
1723 
1724 	/* XGETTEXT:  #ifdef TDEBUG only */
1725 	msg(gettext("pid = %ld exits with status %d\n"),
1726 		(long)getpid(), status);
1727 #endif /* TDEBUG */
1728 	exit(status);
1729 }
1730 
1731 static void
1732 #ifdef __STDC__
1733 killall(void)
1734 #else
1735 killall()
1736 #endif
1737 {
1738 	struct slaves *slavep;
1739 
1740 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++)
1741 		if (slavep->sl_slavepid > 0) {
1742 			(void) kill(slavep->sl_slavepid, SIGKILL);
1743 #ifdef TDEBUG
1744 
1745 			/* XGETTEXT:  #ifdef TDEBUG only */
1746 			msg(gettext("Slave child %ld killed\n"),
1747 				(long)slavep->sl_slavepid);
1748 #endif
1749 		}
1750 	if (writepid) {
1751 		(void) kill(writepid, SIGKILL);
1752 #ifdef TDEBUG
1753 
1754 		/* XGETTEXT:  #ifdef TDEBUG only */
1755 		msg(gettext("Writer child %ld killed\n"), (long)writepid);
1756 #endif
1757 	}
1758 	if (archivepid) {
1759 		(void) kill(archivepid, SIGKILL);
1760 #ifdef TDEBUG
1761 
1762 		/* XGETTEXT:  #ifdef TDEBUG only */
1763 		msg(gettext("Archiver child %ld killed\n"), (long)archivepid);
1764 #endif
1765 	}
1766 }
1767 
1768 /*ARGSUSED*/
1769 static void
1770 proceed(int sig)
1771 {
1772 	caught++;
1773 }
1774 
1775 /*ARGSUSED*/
1776 static void
1777 die(int sig)
1778 {
1779 	Exit(X_FINOK);
1780 }
1781 
1782 static void
1783 #ifdef __STDC__
1784 enslave(void)
1785 #else
1786 enslave()
1787 #endif
1788 {
1789 	int cmd[2];			/* file descriptors */
1790 	int i;
1791 	struct sigvec sv;
1792 	struct slaves *slavep;
1793 	int saverr;
1794 
1795 	sv.sv_flags = SA_RESTART;
1796 	(void) sigemptyset(&sv.sa_mask);
1797 	master = getpid();
1798 	/*
1799 	 * slave sends SIGTERM on dumpabort
1800 	 */
1801 	sv.sv_handler = (void(*)(int))dumpabort;
1802 	(void) sigvec(SIGTERM, &sv, (struct sigvec *)0);
1803 	sv.sv_handler = tperror;
1804 	(void) sigvec(SIGUSR2, &sv, (struct sigvec *)0);
1805 	sv.sv_handler = proceed;
1806 	(void) sigvec(SIGUSR1, &sv, (struct sigvec *)0);
1807 	totalrecsout += recsout;
1808 	caught = 0;
1809 	recsout = 0;
1810 	rotor = 0;
1811 	bufclear();
1812 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++)
1813 		slavep->sl_slavefd = -1;
1814 	archivefd = arch = writer = -1;
1815 	for (i = 0; i < SLAVES; i++) {
1816 		if (pipe(cmd) < 0) {
1817 			saverr = errno;
1818 			msg(gettext(
1819 			    "Cannot create pipe for slave process: %s\n"),
1820 			    strerror(saverr));
1821 			dumpabort();
1822 			/*NOTREACHED*/
1823 		}
1824 		sighold(SIGUSR2);
1825 		sighold(SIGINT);
1826 		sighold(SIGTERM);
1827 		if ((slaves[i].sl_slavepid = fork()) < 0) {
1828 			saverr = errno;
1829 			msg(gettext("Cannot create slave process: %s\n"),
1830 			    strerror(saverr));
1831 			dumpabort();
1832 			/*NOTREACHED*/
1833 		}
1834 		slaves[i].sl_slavefd = cmd[1];
1835 		if (slaves[i].sl_slavepid == 0) {   /* Slave starts up here */
1836 			pid_t next;		    /* pid of neighbor */
1837 
1838 			sv.sv_handler = SIG_DFL;
1839 			(void) sigvec(SIGUSR2, &sv, (struct sigvec *)0);
1840 			sv.sv_handler = SIG_IGN;	/* master handler INT */
1841 			(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1842 			sv.sv_handler = die;		/* normal slave exit */
1843 			(void) sigvec(SIGTERM, &sv, (struct sigvec *)0);
1844 
1845 			child_chdir();
1846 			sigrelse(SIGUSR2);
1847 			sigrelse(SIGINT);
1848 			sigrelse(SIGTERM);
1849 
1850 			freeino();	/* release unneeded resources */
1851 #ifdef TDEBUG
1852 		(void) sleep(4); /* time for parent's message to get out */
1853 		/* XGETTEXT:  #ifdef TDEBUG only */
1854 		msg(gettext("Neighbor has pid = %ld\n"), (long)getpid());
1855 #endif
1856 			/* Closes cmd[1] as a side-effect */
1857 			for (slavep = &slaves[0];
1858 			    slavep < &slaves[SLAVES];
1859 			    slavep++)
1860 				if (slavep->sl_slavefd >= 0) {
1861 					(void) close(slavep->sl_slavefd);
1862 					slavep->sl_slavefd = -1;
1863 				}
1864 			(void) close(to);
1865 			(void) close(fi);	    /* Need our own seek ptr */
1866 			to = -1;
1867 
1868 			fi = open(disk, O_RDONLY);
1869 
1870 			if (fi < 0) {
1871 				saverr = errno;
1872 				msg(gettext(
1873 				    "Cannot open dump device `%s': %s\n"),
1874 					disk, strerror(saverr));
1875 				dumpabort();
1876 				/*NOTREACHED*/
1877 			}
1878 
1879 			if ((unsigned)atomic((int(*)())read, cmd[0],
1880 			    (char *)&next, sizeof (next)) != sizeof (next)) {
1881 				cmdrderr();
1882 				dumpabort();
1883 				/*NOTREACHED*/
1884 			}
1885 			dumpoffline(cmd[0], next, i);
1886 			Exit(X_FINOK);
1887 		}
1888 		/* Parent continues here */
1889 		sigrelse(SIGUSR2);
1890 		sigrelse(SIGINT);
1891 		sigrelse(SIGTERM);
1892 		(void) close(cmd[0]);
1893 	}
1894 
1895 	if (archive) {
1896 		archivepid = setuparchive();
1897 		if (!archivepid) {
1898 			dumpabort();
1899 			/*NOTREACHED*/
1900 		}
1901 	}
1902 
1903 	writepid = setupwriter();
1904 	if (!writepid) {
1905 		dumpabort();
1906 		/*NOTREACHED*/
1907 	}
1908 
1909 	if (arch >= 0) {
1910 		(void) close(arch);		/* only writer has this open */
1911 		arch = -1;
1912 	}
1913 
1914 	/* Tell each slave who follows it */
1915 	for (i = 0; i < SLAVES; i++) {
1916 		if ((unsigned)atomic((int(*)())write, slaves[i].sl_slavefd,
1917 		    (char *)&(slaves[(i + 1) % SLAVES].sl_slavepid),
1918 		    sizeof (int)) != sizeof (int)) {
1919 			cmdwrterr();
1920 			dumpabort();
1921 			/*NOTREACHED*/
1922 		}
1923 	}
1924 	sv.sv_handler = rollforward;		/* rcvd from writer on EOT */
1925 	(void) sigvec(SIGUSR1, &sv, (struct sigvec *)0);
1926 	slp = slaves;
1927 	(void) kill(slp->sl_slavepid, SIGUSR1);
1928 	master = 0;
1929 }
1930 
1931 static void
1932 #ifdef __STDC__
1933 wait_our_turn(void)
1934 #else
1935 wait_our_turn()
1936 #endif
1937 {
1938 	(void) sighold(SIGUSR1);
1939 
1940 	if (!caught) {
1941 #ifdef INSTRUMENT
1942 		(*idle)++;
1943 #endif
1944 		(void) sigpause(SIGUSR1);
1945 	}
1946 	caught = 0;
1947 	(void) sigrelse(SIGUSR1);
1948 }
1949 
1950 static void
1951 dumpoffline(int cmd, pid_t next, int mynum)
1952 {
1953 	struct req *p = slaves[mynum].sl_req;
1954 	ulong_t i;
1955 	uchar_t *cp;
1956 	uchar_t *blkbuf;
1957 	int notactive = 0;
1958 
1959 	blkbuf = xmalloc(sblock->fs_bsize);
1960 
1961 	/*CONSTANTCONDITION*/
1962 	assert(sizeof (spcl) == TP_BSIZE_MIN);
1963 
1964 	while (atomic((int(*)())read, cmd, (char *)p, reqsiz) == reqsiz) {
1965 		if (p->br_dblk) {
1966 			bread(p->br_dblk, (uchar_t *)blkbuf, p->br_size);
1967 		} else {
1968 			bcopy((char *)p->br_spcl, (char *)&spcl,
1969 			    sizeof (spcl));
1970 			ino = spcl.c_inumber;
1971 		}
1972 		dumptoarchive = p->aflag & BUF_ARCHIVE;
1973 		wait_our_turn();
1974 		if (p->br_dblk) {
1975 			for (i = p->br_size, cp = blkbuf;
1976 			    i > 0;
1977 			    /* LINTED character pointers aren't signed */
1978 			    cp += i > tp_bsize ? tp_bsize : i,
1979 			    i -= i > tp_bsize ? tp_bsize : i) {
1980 				/* LINTED unsigned to signed conversion ok */
1981 				taprec(cp, 0, i > tp_bsize ? tp_bsize : (int)i);
1982 			}
1983 		} else
1984 			spclrec();
1985 		(void) kill(next, SIGUSR1);	/* Next slave's turn */
1986 		/*
1987 		 * Note that we lie about file activity since we don't
1988 		 * check for it.
1989 		 */
1990 		if ((unsigned)atomic((int(*)())write, cmd, (char *)&notactive,
1991 		    sizeof (notactive)) != sizeof (notactive)) {
1992 			cmdwrterr();
1993 			dumpabort();
1994 			/*NOTREACHED*/
1995 		}
1996 	}
1997 
1998 	free(blkbuf);
1999 }
2000 
2001 static int count;		/* tape blocks written since last spclrec */
2002 
2003 /*ARGSUSED*/
2004 static void
2005 onxfsz(int sig)
2006 {
2007 	msg(gettext("File size limit exceeded writing output volume %d\n"),
2008 	    tapeno);
2009 	(void) kill(master, SIGUSR2);
2010 	Exit(X_REWRITE);
2011 }
2012 
2013 static long	lastnonaddr;		/* last DS_{INODE,CLRI,BITS} written */
2014 static long	lastnonaddrm;		/* and the mode thereof */
2015 /*
2016  * dowrite -- the main body of the output writer process
2017  */
2018 static void
2019 dowrite(int cmd)
2020 {
2021 	struct bdesc *last =
2022 	    &bufp[(NBUF*ntrec)-1];		/* last buffer in pool */
2023 	struct bdesc *bp = bufp;		/* current buf in tape block */
2024 	struct bdesc *begin = bufp;		/* first buf of tape block */
2025 	struct bdesc *end = bufp + (ntrec-1);	/* last buf of tape block */
2026 	int siz;				/* bytes written (block) */
2027 	int trecs;				/* records written (block)  */
2028 	long asize = 0;				/* number of 0.1" units... */
2029 						/* ...written on current tape */
2030 	char *tp, *rbuf = NULL;
2031 	char *recmap = spcl.c_addr;		/* current tape record map */
2032 	char *endmp;				/* end of valid map data */
2033 	char *mp;				/* current map entry */
2034 	union u_spcl *sp;
2035 
2036 	(void) signal(SIGXFSZ, onxfsz);
2037 
2038 	bzero((char *)&spcl, sizeof (spcl));
2039 	count = 0;
2040 
2041 	if (doingverify) {
2042 		rbuf = (char *)malloc((uint_t)writesize);
2043 		if (rbuf == 0) {
2044 			/* Restart from checkpoint */
2045 			(void) kill(master, SIGUSR2);
2046 			Exit(X_REWRITE);
2047 		}
2048 	}
2049 
2050 	for (;;) {
2051 		/* START: wait until all buffers in tape block are full */
2052 		if ((bp->b_flags & BUF_FULL) == 0) {
2053 			if (caught) {		/* master signalled flush */
2054 				(void) sighold(SIGUSR1);
2055 				caught = 0;
2056 				/* signal ready */
2057 				(void) kill(master, SIGUSR1);
2058 				chkpt.sl_count = 0;	/* signal not at EOT */
2059 				checkpoint(bp-1, cmd);	/* send data */
2060 				(void) sigpause(SIGUSR1);
2061 				break;
2062 			}
2063 #ifdef INSTRUMENT
2064 			(*readmissp)++;
2065 #endif
2066 			nap(50);
2067 			continue;
2068 		}
2069 		if (bp < end) {
2070 			bp++;
2071 			continue;
2072 		}
2073 		/* END: wait until all buffers in tape block are full */
2074 
2075 		tp = begin->b_data;
2076 		(void) sighold(SIGUSR1);
2077 		if (host) {
2078 			if (!doingverify)
2079 				siz = rmtwrite(tp, writesize);
2080 			else if ((siz = rmtread(rbuf, writesize)) ==
2081 			    writesize && bcmp(rbuf, tp, writesize))
2082 				siz = -1;
2083 		} else {
2084 			if (!doingverify)
2085 				siz = write(to, tp, writesize);
2086 			else if ((siz = read(to, rbuf, writesize)) ==
2087 			    writesize && bcmp(rbuf, tp, writesize))
2088 				siz = -1;
2089 			if (siz < 0 && diskette && errno == ENOSPC)
2090 				siz = 0;	/* really EOF */
2091 		}
2092 		(void) sigrelse(SIGUSR1);
2093 		if (siz < 0 ||
2094 		    (pipeout && siz != writesize)) {
2095 			char buf[3000];
2096 
2097 			/*
2098 			 * Isn't i18n wonderful?
2099 			 */
2100 			if (doingverify) {
2101 				if (diskette)
2102 					(void) snprintf(buf, sizeof (buf),
2103 					    gettext(
2104 		    "Verification error %ld blocks into diskette %d\n"),
2105 					    asize * 2, tapeno);
2106 				else if (tapeout)
2107 					(void) snprintf(buf, sizeof (buf),
2108 					    gettext(
2109 		    "Verification error %ld feet into tape %d\n"),
2110 					    (cartridge ? asize/tracks :
2111 						asize)/120L,
2112 					    tapeno);
2113 				else
2114 					(void) snprintf(buf, sizeof (buf),
2115 					    gettext(
2116 		    "Verification error %ld blocks into volume %d\n"),
2117 					    asize * 2, tapeno);
2118 
2119 			} else {
2120 				if (diskette)
2121 					(void) snprintf(buf, sizeof (buf),
2122 					    gettext(
2123 			"Write error %ld blocks into diskette %d\n"),
2124 					    asize * 2, tapeno);
2125 				else if (tapeout)
2126 					(void) snprintf(buf, sizeof (buf),
2127 					    gettext(
2128 			"Write error %ld feet into tape %d\n"),
2129 					    (cartridge ? asize/tracks :
2130 						asize)/120L, tapeno);
2131 				else
2132 					(void) snprintf(buf, sizeof (buf),
2133 					    gettext(
2134 			"Write error %ld blocks into volume %d\n"),
2135 					    asize * 2, tapeno);
2136 			}
2137 
2138 			msg(buf);
2139 			/* Restart from checkpoint */
2140 #ifdef TDEBUG
2141 
2142 			/* XGETTEXT:  #ifdef TDEBUG only */
2143 			msg(gettext("sending SIGUSR2 to pid %ld\n"), master);
2144 #endif
2145 			(void) kill(master, SIGUSR2);
2146 			Exit(X_REWRITE);
2147 		}
2148 		trecs = siz / tp_bsize;
2149 		if (diskette)
2150 			asize += trecs;	/* asize == blocks written */
2151 		else
2152 			asize += (siz/density + tenthsperirg);
2153 		if (trecs)
2154 			chkpt.sl_firstrec++;
2155 		for (bp = begin; bp < begin + trecs; bp++) {
2156 			if ((arch >= 0) && (bp->b_flags & BUF_ARCHIVE)) {
2157 				if ((unsigned)atomic((int(*)())write, arch,
2158 				    (char *)&bp->b_flags, sizeof (bp->b_flags))
2159 				    != sizeof (bp->b_flags)) {
2160 					cmdwrterr();
2161 					dumpabort();
2162 					/*NOTREACHED*/
2163 				}
2164 				if (atomic((int(*)())write, arch, bp->b_data,
2165 				    tp_bsize) != tp_bsize) {
2166 					cmdwrterr();
2167 					dumpabort();
2168 					/*NOTREACHED*/
2169 				}
2170 			}
2171 			if (bp->b_flags & BUF_SPCLREC) {
2172 				/*LINTED [bp->b_data is aligned]*/
2173 				sp = (union u_spcl *)bp->b_data;
2174 				if (sp->s_spcl.c_type != TS_ADDR) {
2175 					lastnonaddr = sp->s_spcl.c_type;
2176 					lastnonaddrm =
2177 						sp->s_spcl.c_dinode.di_mode;
2178 					if (sp->s_spcl.c_type != TS_TAPE)
2179 						chkpt.sl_offset = 0;
2180 				}
2181 				chkpt.sl_count = sp->s_spcl.c_count;
2182 				bcopy((char *)sp,
2183 					(char *)&spcl, sizeof (spcl));
2184 				mp = recmap;
2185 				endmp = &recmap[spcl.c_count];
2186 				count = 0;
2187 			} else {
2188 				chkpt.sl_offset++;
2189 				chkpt.sl_count--;
2190 				count++;
2191 				mp++;
2192 			}
2193 			/*
2194 			 * Adjust for contiguous hole
2195 			 */
2196 			for (; mp < endmp; mp++) {
2197 				if (*mp)
2198 					break;
2199 				chkpt.sl_offset++;
2200 				chkpt.sl_count--;
2201 			}
2202 		}
2203 		/*
2204 		 * Check for end of tape
2205 		 */
2206 		if (trecs < ntrec ||
2207 		    (!pipeout && tsize > 0 && asize > tsize)) {
2208 			if (tapeout)
2209 				msg(gettext("End-of-tape detected\n"));
2210 			else
2211 				msg(gettext("End-of-file detected\n"));
2212 			(void) sighold(SIGUSR1);
2213 			caught = 0;
2214 			(void) kill(master, SIGUSR1);	/* signal EOT */
2215 			checkpoint(--bp, cmd);	/* send checkpoint data */
2216 			(void) sigpause(SIGUSR1);
2217 			break;
2218 		}
2219 		for (bp = begin; bp <= end; bp++)
2220 			bp->b_flags = BUF_EMPTY;
2221 		if (end + ntrec > last) {
2222 			bp = begin = bufp;
2223 			timeest(0, spcl.c_tapea);
2224 		} else
2225 			bp = begin = end+1;
2226 		end = begin + (ntrec-1);
2227 	}
2228 
2229 	if (rbuf != NULL)
2230 		free(rbuf);
2231 }
2232 
2233 /*
2234  * Send checkpoint info back to master.  This information
2235  * consists of the current inode number, number of logical
2236  * blocks written for that inode (or bitmap), the last logical
2237  * block number written, the number of logical blocks written
2238  * to this volume, the current dump state, and the current
2239  * special record map.
2240  */
2241 static void
2242 checkpoint(struct bdesc *bp, int cmd)
2243 {
2244 	int	state, type;
2245 	ino_t	ino;
2246 
2247 	if (++bp >= &bufp[NBUF*ntrec])
2248 		bp = bufp;
2249 
2250 	/*
2251 	 * If we are dumping files and the record following
2252 	 * the last written to tape is a special record, use
2253 	 * it to get an accurate indication of current state.
2254 	 */
2255 	if ((bp->b_flags & BUF_SPCLREC) && (bp->b_flags & BUF_FULL) &&
2256 	    lastnonaddr == TS_INODE) {
2257 		/*LINTED [bp->b_data is aligned]*/
2258 		union u_spcl *nextspcl = (union u_spcl *)bp->b_data;
2259 
2260 		if (nextspcl->s_spcl.c_type == TS_INODE) {
2261 			chkpt.sl_offset = 0;
2262 			chkpt.sl_count = 0;
2263 		} else if (nextspcl->s_spcl.c_type == TS_END) {
2264 			chkpt.sl_offset = 0;
2265 			chkpt.sl_count = 1;	/* EOT indicator */
2266 		}
2267 		ino = nextspcl->s_spcl.c_inumber;
2268 		type = nextspcl->s_spcl.c_type;
2269 	} else {
2270 		/*
2271 		 * If not, use what we have.
2272 		 */
2273 		ino = spcl.c_inumber;
2274 		type = spcl.c_type;
2275 	}
2276 
2277 	switch (type) {		/* set output state */
2278 	case TS_ADDR:
2279 		switch (lastnonaddr) {
2280 		case TS_INODE:
2281 		case TS_TAPE:
2282 			if ((lastnonaddrm & IFMT) == IFDIR ||
2283 			    (lastnonaddrm & IFMT) == IFATTRDIR)
2284 				state = DS_DIRS;
2285 			else
2286 				state = DS_FILES;
2287 			break;
2288 		case TS_CLRI:
2289 			state = DS_CLRI;
2290 			break;
2291 		case TS_BITS:
2292 			state = DS_BITS;
2293 			break;
2294 		}
2295 		break;
2296 	case TS_INODE:
2297 		if ((spcl.c_dinode.di_mode & IFMT) == IFDIR ||
2298 		    (spcl.c_dinode.di_mode & IFMT) == IFATTRDIR)
2299 			state = DS_DIRS;
2300 		else
2301 			state = DS_FILES;
2302 		break;
2303 	case 0:			/* EOT on 1st record */
2304 	case TS_TAPE:
2305 		state = DS_START;
2306 		ino = UFSROOTINO;
2307 		break;
2308 	case TS_CLRI:
2309 		state = DS_CLRI;
2310 		break;
2311 	case TS_BITS:
2312 		state = DS_BITS;
2313 		break;
2314 	case TS_END:
2315 		if (spcl.c_type == TS_END)
2316 			state = DS_DONE;
2317 		else
2318 			state = DS_END;
2319 		break;
2320 	}
2321 
2322 	/*
2323 	 * Checkpoint info to be processed by rollforward():
2324 	 *	The inode with which the next volume should begin
2325 	 *	The last inode number on this volume
2326 	 *	The last logical block number on this volume
2327 	 *	The current output state
2328 	 *	The offset within the current inode (already in sl_offset)
2329 	 *	The number of records left from last spclrec (in sl_count)
2330 	 *	The physical block the next vol begins with (in sl_firstrec)
2331 	 */
2332 	chkpt.sl_inos = ino;
2333 	chkpt.sl_tapea = spcl.c_tapea + count;
2334 	chkpt.sl_state = state;
2335 
2336 	if ((unsigned)atomic((int(*)())write, cmd, (char *)&chkpt,
2337 	    sizeof (chkpt)) != sizeof (chkpt)) {
2338 		cmdwrterr();
2339 		dumpabort();
2340 		/*NOTREACHED*/
2341 	}
2342 	if ((unsigned)atomic((int(*)())write, cmd, (char *)&spcl,
2343 	    sizeof (spcl)) != sizeof (spcl)) {
2344 		cmdwrterr();
2345 		dumpabort();
2346 		/*NOTREACHED*/
2347 	}
2348 #ifdef DEBUG
2349 	if (xflag) {
2350 		/* XGETTEXT:  #ifdef DEBUG only */
2351 		msg(gettext("sent chkpt to master:\n"));
2352 		msg("    ino %u\n", chkpt.sl_inos);
2353 		msg("    1strec %u\n", chkpt.sl_firstrec);
2354 		msg("    lastrec %u\n", chkpt.sl_tapea);
2355 		msg("    written %u\n", chkpt.sl_offset);
2356 		msg("    left %u\n", chkpt.sl_count);
2357 		msg("    state %d\n", chkpt.sl_state);
2358 	}
2359 #endif
2360 }
2361 
2362 /*
2363  * Since a read from a pipe may not return all we asked for,
2364  * or a write may not write all we ask if we get a signal,
2365  * loop until the count is satisfied (or error).
2366  */
2367 static ssize_t
2368 atomic(int (*func)(), int fd, char *buf, int count)
2369 {
2370 	ssize_t got = 0, need = count;
2371 
2372 	/* don't inherit random value if immediately get zero back from func */
2373 	errno = 0;
2374 	while (need > 0) {
2375 		got = (*func)(fd, buf, MIN(need, 4096));
2376 		if (got < 0 && errno == EINTR)
2377 			continue;
2378 		if (got <= 0)
2379 			break;
2380 		buf += got;
2381 		need -= got;
2382 	}
2383 	/* if we got what was asked for, return count, else failure (got) */
2384 	return ((need != 0) ? got : count);
2385 }
2386 
2387 void
2388 #ifdef __STDC__
2389 positiontape(char *msgbuf)
2390 #else
2391 positiontape(char *msgbuf)
2392 #endif
2393 {
2394 	/* Static as never change, no need to waste stack space */
2395 	static struct mtget mt;
2396 	static struct mtop rew = { MTREW, 1 };
2397 	static struct mtop fsf = { MTFSF, 1 };
2398 	char *info = strdup(gettext("Positioning `%s' to file %ld\n"));
2399 	char *fail = strdup(gettext("Cannot position tape to file %d\n"));
2400 	int m;
2401 
2402 	/* gettext()'s return value is volatile, hence the strdup()s */
2403 
2404 	m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
2405 
2406 	/*
2407 	 * To avoid writing tape marks at inappropriate places, we open the
2408 	 * device read-only, position it, close it, and reopen it for writing.
2409 	 */
2410 	while ((to = host ? rmtopen(tape, O_RDONLY) :
2411 	    safe_device_open(tape, O_RDONLY|m, 0600)) < 0) {
2412 		if (autoload) {
2413 			if (!query_once(msgbuf, 1)) {
2414 				dumpabort();
2415 				/*NOTREACHED*/
2416 			}
2417 		} else {
2418 			if (!query(msgbuf)) {
2419 				dumpabort();
2420 				/*NOTREACHED*/
2421 			}
2422 		}
2423 	}
2424 
2425 	if (host) {
2426 		if (rmtstatus(&mt) >= 0 &&
2427 		    rmtioctl(MTREW, 1) >= 0 &&
2428 		    filenum > 1) {
2429 			msg(info, dumpdev, filenum);
2430 			if (rmtioctl(MTFSF, filenum-1) < 0) {
2431 				msg(fail, filenum);
2432 				dumpabort();
2433 				/*NOTREACHED*/
2434 			}
2435 		}
2436 		rmtclose();
2437 	} else {
2438 		if (ioctl(to, MTIOCGET, &mt) >= 0 &&
2439 		    ioctl(to, MTIOCTOP, &rew) >= 0 &&
2440 		    filenum > 1) {
2441 			msg(info, dumpdev, filenum);
2442 			fsf.mt_count = filenum - 1;
2443 			if (ioctl(to, MTIOCTOP, &fsf) < 0) {
2444 				msg(fail, filenum);
2445 				dumpabort();
2446 				/*NOTREACHED*/
2447 			}
2448 		}
2449 		(void) close(to);
2450 		to = -1;
2451 	}
2452 
2453 	free(info);
2454 	free(fail);
2455 }
2456 
2457 static void
2458 #ifdef __STDC__
2459 cmdwrterr(void)
2460 #else
2461 cmdwrterr()
2462 #endif
2463 {
2464 	int saverr = errno;
2465 	msg(gettext("Error writing command pipe: %s\n"), strerror(saverr));
2466 }
2467 
2468 static void
2469 #ifdef __STDC__
2470 cmdrderr(void)
2471 #else
2472 cmdrderr()
2473 #endif
2474 {
2475 	int saverr = errno;
2476 	msg(gettext("Error reading command pipe: %s\n"), strerror(saverr));
2477 }
2478