xref: /illumos-gate/usr/src/cmd/backup/dump/dumptape.c (revision 9d6ca3965c3358c32eb68544fe91ff8ad9c3fcde)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * Portions of this source code were derived from Berkeley 4.3 BSD
31  * under license from the Regents of the University of California.
32  */
33 
34 #include "dump.h"
35 #include <rmt.h>
36 #include <setjmp.h>
37 #include <sys/fdio.h>
38 #include <sys/mkdev.h>
39 #include <assert.h>
40 #include <limits.h>
41 
42 #define	SLEEPMS		50
43 
44 int newtape;
45 static uint_t writesize;	/* size of malloc()ed buffer for tape */
46 static ino_t inos[TP_NINOS];	/* starting inodes on each tape */
47 
48 /*
49  * The req structure is used to pass commands from the parent
50  * process through the pipes to the slave processes.  It comes
51  * in two flavors, depending on which mode dump is operating under:
52  * an inode request (on-line mode) and a disk block request ("old" mode).
53  */
54 /*
55  * The inode request structure is used during on-line mode.
56  * The master passes inode numbers and starting offsets to
57  * the slaves.  The tape writer passes out the current inode,
58  * offset, and number of tape records written after completing a volume.
59  */
60 struct ireq {
61 	ino_t	inumber;	/* inode number to open/dump */
62 	long	igen;		/* inode generation number */
63 	off_t	offset;		/* starting offset in inode */
64 	int	count;		/* count for 1st spclrec */
65 };
66 /*
67  * The block request structure is used in off-line mode to pass
68  * commands to dump disk blocks from the parent process through
69  * the pipes to the slave processes.
70  */
71 struct breq {
72 	diskaddr_t dblk;		/* disk address to read */
73 	size_t	size;		/* number of bytes to read from disk */
74 	ulong_t	spclrec[1];	/* actually longer */
75 };
76 
77 struct req {
78 	short	aflag;		/* write data to archive process as well */
79 	short	tflag;		/* begin new tape */
80 	union	reqdata {
81 		struct ireq ino;	/* used for on-line mode */
82 		struct breq blks;	/* used for off-line mode */
83 	} data;
84 };
85 
86 #define	ir_inumber	data.ino.inumber
87 #define	ir_igen		data.ino.igen
88 #define	ir_offset	data.ino.offset
89 #define	ir_count	data.ino.count
90 
91 #define	br_dblk		data.blks.dblk
92 #define	br_size		data.blks.size
93 #define	br_spcl		data.blks.spclrec
94 
95 static int reqsiz = 0;	/* alloctape will initialize */
96 
97 #define	SLAVES 3
98 struct slaves {
99 	int	sl_slavefd;	/* pipe from master to slave */
100 	pid_t	sl_slavepid;	/* slave pid; used by killall() */
101 	ino_t	sl_inos;	/* inos, if this record starts tape */
102 	int	sl_offset;	/* logical blocks written for object */
103 	int	sl_count;	/* logical blocks left in spclrec */
104 	int	sl_tapea;	/* header number, if starting tape */
105 	int	sl_firstrec;	/* number of first block on tape */
106 	int	sl_state;	/* dump output state */
107 	struct	req *sl_req;	/* instruction packet to slave */
108 };
109 static struct slaves slaves[SLAVES];	/* one per slave */
110 static struct slaves *slp;	/* pointer to current slave */
111 static struct slaves chkpt;	/* checkpointed data */
112 
113 struct bdesc {
114 	char	*b_data;	/* pointer to buffer data */
115 	int	b_flags;	/* flags (see below) */
116 };
117 
118 /*
119  * The following variables are in shared memory, and must be
120  * explicitly checkpointed and/or reset.
121  */
122 static caddr_t shared;		/* pointer to block of shared memory */
123 static struct bdesc *bufp;	/* buffer descriptors */
124 static struct bdesc **current;	/* output buffer to fill */
125 static int *tapea;		/* logical record count */
126 
127 #ifdef INSTRUMENT
128 static int	*readmissp;	/* number of times writer was idle */
129 static int	*idle;		/* number of times slaves were idle */
130 #endif	/* INSTRUMENT */
131 
132 /*
133  * Buffer flags
134  */
135 #define	BUF_EMPTY	0x0	/* nothing in buffer */
136 #define	BUF_FULL	0x1	/* data in buffer */
137 #define	BUF_SPCLREC	0x2	/* contains special record */
138 #define	BUF_ARCHIVE	0x4	/* dump to archive */
139 
140 static int recsout;		/* number of req's sent to slaves */
141 static int totalrecsout;	/* total number of req's sent to slaves */
142 static int rotor;		/* next slave to be instructed */
143 static pid_t master;		/* pid of master, for sending error signals */
144 static int writer = -1;		/* fd of tape writer */
145 static pid_t writepid;		/* pid of tape writer */
146 static int arch;		/* fd of output archiver */
147 static pid_t archivepid;	/* pid of output archiver */
148 static int archivefd;		/* fd of archive file (proper) */
149 static offset_t lf_archoffset;	/* checkpointed offset into archive file */
150 
151 int caught;			/* caught signal -- imported by mapfile() */
152 
153 #ifdef DEBUG
154 extern	int xflag;
155 #endif
156 
157 static void cmdwrterr(void);
158 static void cmdrderr(void);
159 static void freetape(void);
160 static void bufclear(void);
161 static pid_t setuparchive(void);
162 static pid_t setupwriter(void);
163 static void nextslave(void);
164 static void tperror(int);
165 static void rollforward(int);
166 static void nap(int);
167 static void alrm(int);
168 static void just_rewind(void);
169 static void killall(void);
170 static void proceed(int);
171 static void die(int);
172 static void enslave(void);
173 static void wait_our_turn(void);
174 static void dumpoffline(int, pid_t, int);
175 static void onxfsz(int);
176 static void dowrite(int);
177 static void checkpoint(struct bdesc *, int);
178 static ssize_t atomic(int (*)(), int, char *, int);
179 
180 static size_t tapesize;
181 
182 /*
183  * Allocate buffers and shared memory variables.  Tape buffers are
184  * allocated on page boundaries for tape write() efficiency.
185  */
186 void
187 alloctape(void)
188 {
189 	struct slaves *slavep;
190 	ulong_t pgoff = (unsigned)(getpagesize() - 1); /* 2**n - 1 */
191 	int	mapfd;
192 	char	*obuf;
193 	int	saverr;
194 	int	i, j;
195 
196 	writesize = ntrec * tp_bsize;
197 	if (!printsize)
198 		msg(gettext("Writing %d Kilobyte records\n"),
199 		    writesize / TP_BSIZE_MIN);
200 
201 	/*
202 	 * set up shared memory seg for here and child
203 	 */
204 	mapfd = open("/dev/zero", O_RDWR);
205 	if (mapfd == -1) {
206 		saverr = errno;
207 		msg(gettext("Cannot open `%s': %s\n"),
208 		    "/dev/zero", strerror(saverr));
209 		dumpabort();
210 		/*NOTREACHED*/
211 	}
212 	/*
213 	 * Allocate space such that buffers are page-aligned and
214 	 * pointers are aligned on 4-byte boundaries (for SPARC).
215 	 * This code assumes that (NBUF * writesize) is a multiple
216 	 * of the page size and that pages are aligned on 4-byte
217 	 * boundaries.  Space is allocated as follows:
218 	 *
219 	 *    (NBUF * writesize) for the actual buffers
220 	 *    (pagesize - 1) for padding so the buffers are page-aligned
221 	 *    (NBUF * ntrec * sizeof (struct bdesc)) for each buffer
222 	 *    (n * sizeof (int)) for [n] debugging variables/pointers
223 	 *    (n * sizeof (int)) for [n] miscellaneous variables/pointers
224 	 */
225 	tapesize =
226 	    (NBUF * writesize)				/* output buffers */
227 		/* LINTED: pgoff fits into a size_t */
228 	    + (size_t)pgoff				/* page alignment */
229 							/* buffer descriptors */
230 	    + (((size_t)sizeof (struct bdesc)) * NBUF * ntrec)
231 #ifdef INSTRUMENT
232 	    + (2 * (size_t)sizeof (int *))		/* instrumentation */
233 #endif
234 							/* shared variables */
235 	    + (size_t)sizeof (struct bdesc **)
236 	    + (size_t)sizeof (int *)
237 	    + (3 * (size_t)sizeof (time_t));
238 
239 	shared = mmap((char *)0, tapesize, PROT_READ|PROT_WRITE,
240 	    MAP_SHARED, mapfd, (off_t)0);
241 	if (shared == (caddr_t)-1) {
242 		saverr = errno;
243 		msg(gettext("Cannot memory map output buffers: %s\n"),
244 		    strerror(saverr));
245 		dumpabort();
246 		/*NOTREACHED*/
247 	}
248 	(void) close(mapfd);
249 
250 	/*
251 	 * Buffers and buffer headers
252 	 */
253 	obuf = (char *)(((ulong_t)shared + pgoff) & ~pgoff);
254 	/* LINTED obuf and writesize are aligned */
255 	bufp = (struct bdesc *)(obuf + NBUF*writesize);
256 	/*
257 	 * Shared memory variables
258 	 */
259 	current = (struct bdesc **)&bufp[NBUF*ntrec];
260 	tapea = (int *)(current + 1);
261 	/* LINTED pointer alignment ok */
262 	telapsed = (time_t *)(tapea + 1);
263 	tstart_writing = telapsed + 1;
264 	tschedule = tstart_writing + 1;
265 #ifdef INSTRUMENT
266 	/*
267 	 * Debugging and instrumentation variables
268 	 */
269 	readmissp = (int *)(tschedule + 1);
270 	idle = readmissp + 1;
271 #endif
272 	for (i = 0, j = 0; i < NBUF * ntrec; i++, j += tp_bsize) {
273 		bufp[i].b_data = &obuf[j];
274 	}
275 
276 	reqsiz = sizeof (struct req) + tp_bsize - sizeof (long);
277 	for (slavep = slaves; slavep < &slaves[SLAVES]; slavep++)
278 		slavep->sl_req = (struct req *)xmalloc(reqsiz);
279 
280 	chkpt.sl_offset = 0;		/* start at offset 0 */
281 	chkpt.sl_count = 0;
282 	chkpt.sl_inos = UFSROOTINO;	/* in root inode */
283 	chkpt.sl_firstrec = 1;
284 	chkpt.sl_tapea = 0;
285 }
286 
287 static void
288 freetape(void)
289 {
290 	if (shared == NULL)
291 		return;
292 	(void) timeclock((time_t)0);
293 	(void) munmap(shared, tapesize);
294 	shared = NULL;
295 }
296 
297 /*
298  * Reset tape state variables -- called
299  * before a pass to dump active files.
300  */
301 void
302 reset(void)
303 {
304 	bufclear();
305 
306 #ifdef INSTRUMENT
307 	(*readmissp) = 0;
308 	(*idle) = 0;
309 #endif
310 
311 	spcl.c_flags = 0;
312 	spcl.c_volume = 0;
313 	tapeno = 0;
314 
315 	chkpt.sl_offset = 0;		/* start at offset 0 */
316 	chkpt.sl_count = 0;
317 	chkpt.sl_inos = UFSROOTINO;	/* in root inode */
318 	chkpt.sl_firstrec = 1;
319 	chkpt.sl_tapea = 0;
320 }
321 
322 static void
323 bufclear(void)
324 {
325 	struct bdesc *bp;
326 	int i;
327 
328 	for (i = 0, bp = bufp; i < NBUF * ntrec; i++, bp++)
329 		bp->b_flags = BUF_EMPTY;
330 	if ((caddr_t)current < shared ||
331 	    (caddr_t)current > (shared + tapesize)) {
332 		msg(gettext(
333 	    "bufclear: current pointer out of range of shared memory\n"));
334 		dumpabort();
335 		/*NOTREACHED*/
336 	}
337 	if ((*current != NULL) &&
338 	    (*current < &bufp[0] || *current > &bufp[NBUF*ntrec])) {
339 		/* ANSI string catenation, to shut cstyle up */
340 		msg(gettext("bufclear: current buffer pointer (0x%x) "
341 		    "out of range of buffer\naddresses (0x%x - 0x%x)\n"),
342 		    *current, &bufp[0], &bufp[NBUF*ntrec]);
343 		dumpabort();
344 		/*NOTREACHED*/
345 	}
346 	*current = bufp;
347 }
348 
349 /*
350  * Start a process to collect information describing the dump.
351  * This data takes two forms:
352  *    the bitmap and directory information being written to
353  *	the front of the tape (the "archive" file)
354  *    information describing each directory and inode (to
355  *	be included in the database tmp file)
356  * Write the data to the files as it is received so huge file
357  * systems don't cause dump to consume large amounts of memory.
358  */
359 static pid_t
360 setuparchive(void)
361 {
362 	struct slaves *slavep;
363 	int cmd[2];
364 	pid_t pid;
365 	ssize_t size;
366 	char *data;
367 	char *errmsg;
368 	int flags, saverr;
369 	int punt = 0;
370 
371 	/*
372 	 * Both the archive and database tmp files are
373 	 * checkpointed by taking their current offsets
374 	 * (sizes) after completing each volume.  Restoring
375 	 * from a checkpoint involves truncating to the
376 	 * checkpointed size.
377 	 */
378 	if (archive && !doingactive) {
379 		/* It's allowed/expected to exist, so can't use O_EXCL */
380 		archivefd = safe_file_open(archivefile, O_WRONLY, 0600);
381 		if (archivefd < 0) {
382 			saverr = errno;
383 			msg(gettext("Cannot open archive file `%s': %s\n"),
384 			    archivefile, strerror(saverr));
385 			dumpabort();
386 			/*NOTREACHED*/
387 		}
388 
389 		archive_opened = 1;
390 
391 		if (lseek64(archivefd, lf_archoffset, 0) < 0) {
392 			saverr = errno;
393 			msg(gettext(
394 			    "Cannot position archive file `%s' : %s\n"),
395 			    archivefile, strerror(saverr));
396 			dumpabort();
397 			/*NOTREACHED*/
398 		}
399 		if (ftruncate64(archivefd, lf_archoffset) < 0) {
400 			saverr = errno;
401 			msg(gettext(
402 			    "Cannot truncate archive file `%s' : %s\n"),
403 			    archivefile, strerror(saverr));
404 			dumpabort();
405 			/*NOTREACHED*/
406 		}
407 	}
408 
409 	if (pipe(cmd) < 0) {
410 		saverr = errno;
411 		msg(gettext("%s: %s error: %s\n"),
412 		    "setuparchive", "pipe", strerror(saverr));
413 		return (0);
414 	}
415 	sighold(SIGINT);
416 	if ((pid = fork()) < 0) {
417 		saverr = errno;
418 		msg(gettext("%s: %s error: %s\n"),
419 		    "setuparchive", "fork", strerror(saverr));
420 		return (0);
421 	}
422 	if (pid > 0) {
423 		sigrelse(SIGINT);
424 		/* parent process */
425 		(void) close(cmd[0]);
426 		arch = cmd[1];
427 		return (pid);
428 	}
429 	/*
430 	 * child process
431 	 */
432 	(void) signal(SIGINT, SIG_IGN);		/* master handles this */
433 #ifdef TDEBUG
434 	(void) sleep(4);	/* allow time for parent's message to get out */
435 	/* XGETTEXT:  #ifdef TDEBUG only */
436 	msg(gettext("Archiver has pid = %ld\n"), (long)getpid());
437 #endif
438 	freeino();	/* release unneeded resources */
439 	freetape();
440 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
441 		if (slavep->sl_slavefd != -1) {
442 			(void) close(slavep->sl_slavefd);
443 			slavep->sl_slavefd = -1;
444 		}
445 	}
446 	(void) close(to);
447 	(void) close(fi);
448 	to = fi = -1;
449 	(void) close(cmd[1]);
450 	data = xmalloc(tp_bsize);
451 	for (;;) {
452 		size = atomic((int(*)())read, cmd[0], (char *)&flags,
453 		    sizeof (flags));
454 		if ((unsigned)size != sizeof (flags))
455 			break;
456 		size = atomic((int(*)())read, cmd[0], data, tp_bsize);
457 		if (size == tp_bsize) {
458 			if (archive && flags & BUF_ARCHIVE && !punt &&
459 			    (size = write(archivefd, data, tp_bsize))
460 			    != tp_bsize) {
461 				struct stat64 stats;
462 
463 				if (size != -1) {
464 					errmsg = strdup(gettext(
465 					    "Output truncated"));
466 					if (errmsg == NULL)
467 						errmsg = "";
468 				} else {
469 					errmsg = strerror(errno);
470 				}
471 
472 				if (fstat64(archivefd, &stats) < 0)
473 					stats.st_size = -1;
474 
475 				/* cast to keep lint&printf happy */
476 				msg(gettext(
477 		    "Cannot write archive file `%s' at offset %lld: %s\n"),
478 				    archivefile, (longlong_t)stats.st_size,
479 				    errmsg);
480 				msg(gettext(
481 		    "Archive file will be deleted, dump will continue\n"));
482 				punt++;
483 				if ((size != -1) && (*errmsg != '\0')) {
484 					free(errmsg);
485 				}
486 			}
487 		} else {
488 			break;
489 		}
490 	}
491 	(void) close(cmd[0]);
492 	if (archive) {
493 		(void) close(archivefd);
494 		archivefd = -1;
495 	}
496 	if (punt) {
497 		(void) unlink(archivefile);
498 		Exit(X_ABORT);
499 	}
500 	Exit(X_FINOK);
501 	/* NOTREACHED */
502 	return (0);
503 }
504 
505 /*
506  * Start a process to read the output buffers and write the data
507  * to the output device.
508  */
509 static pid_t
510 setupwriter(void)
511 {
512 	struct slaves *slavep;
513 	int cmd[2];
514 	pid_t pid;
515 	int saverr;
516 
517 	caught = 0;
518 	if (pipe(cmd) < 0) {
519 		saverr = errno;
520 		msg(gettext("%s: %s error: %s\n"),
521 		    "setupwriter", "pipe", strerror(saverr));
522 		return (0);
523 	}
524 	sighold(SIGINT);
525 	if ((pid = fork()) < 0) {
526 		saverr = errno;
527 		msg(gettext("%s: %s error: %s\n"),
528 		    "setupwriter", "fork", strerror(saverr));
529 		return (0);
530 	}
531 	if (pid > 0) {
532 		/*
533 		 * Parent process
534 		 */
535 		sigrelse(SIGINT);
536 		(void) close(cmd[0]);
537 		writer = cmd[1];
538 		return (pid);
539 	}
540 	/*
541 	 * Child (writer) process
542 	 */
543 	(void) signal(SIGINT, SIG_IGN);		/* master handles this */
544 #ifdef TDEBUG
545 	(void) sleep(4);	/* allow time for parent's message to get out */
546 	/* XGETTEXT:  #ifdef TDEBUG only */
547 	msg(gettext("Writer has pid = %ld\n"), (long)getpid());
548 #endif
549 	child_chdir();
550 	freeino();	/* release unneeded resources */
551 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
552 		if (slavep->sl_slavefd != -1) {
553 			(void) close(slavep->sl_slavefd);
554 			slavep->sl_slavefd = -1;
555 		}
556 	}
557 	(void) close(fi);
558 	fi = -1;
559 	(void) close(cmd[1]);
560 	dowrite(cmd[0]);
561 	if (arch >= 0) {
562 		(void) close(arch);
563 		arch = -1;
564 	}
565 	(void) close(cmd[0]);
566 	Exit(X_FINOK);
567 	/* NOTREACHED */
568 	return (0);
569 }
570 
571 void
572 spclrec(void)
573 {
574 	int s, i;
575 	int32_t *ip;
576 	int flags = BUF_SPCLREC;
577 
578 	if ((BIT(ino, shamap)) && (spcl.c_type == TS_INODE)) {
579 		spcl.c_type = TS_ADDR;
580 		/* LINTED: result fits in a short */
581 		spcl.c_dinode.di_mode &= ~S_IFMT;
582 		/* LINTED: result fits in a short */
583 		spcl.c_dinode.di_mode |= IFSHAD;
584 	}
585 
586 	/*
587 	 * Only TS_INODEs should have short metadata, if this
588 	 * isn't such a spclrec, clear the metadata flag and
589 	 * the c_shadow contents.
590 	 */
591 	if (!(spcl.c_type == TS_INODE && (spcl.c_flags & DR_HASMETA))) {
592 		spcl.c_flags &= ~DR_HASMETA;
593 		bcopy(c_shadow_save, &(spcl.c_shadow),
594 		    sizeof (spcl.c_shadow));
595 	}
596 
597 	if (spcl.c_type == TS_END) {
598 		spcl.c_count = 1;
599 		spcl.c_flags |= DR_INODEINFO;
600 		bcopy((char *)inos, (char *)spcl.c_inos, sizeof (inos));
601 	} else if (spcl.c_type == TS_TAPE) {
602 		spcl.c_flags |= DR_NEWHEADER;
603 		if (doingactive)
604 			spcl.c_flags |= DR_REDUMP;
605 	} else if (spcl.c_type != TS_INODE)
606 		flags = BUF_SPCLREC;
607 	spcl.c_tapea = *tapea;
608 	/* LINTED for now, max inode # is 2**31 (ufs max size is 4TB) */
609 	spcl.c_inumber = (ino32_t)ino;
610 	spcl.c_magic = (tp_bsize == TP_BSIZE_MIN) ? NFS_MAGIC : MTB_MAGIC;
611 	spcl.c_checksum = 0;
612 	ip = (int32_t *)&spcl;
613 	s = CHECKSUM;
614 	assert((tp_bsize % sizeof (*ip)) == 0);
615 	i = tp_bsize / sizeof (*ip);
616 	assert((i%8) == 0);
617 	i /= 8;
618 	do {
619 		s -= *ip++; s -= *ip++; s -= *ip++; s -= *ip++;
620 		s -= *ip++; s -= *ip++; s -= *ip++; s -= *ip++;
621 	} while (--i > 0);
622 	spcl.c_checksum = s;
623 	taprec((uchar_t *)&spcl, flags, sizeof (spcl));
624 	if (spcl.c_type == TS_END)
625 		spcl.c_flags &= ~DR_INODEINFO;
626 	else if (spcl.c_type == TS_TAPE)
627 		spcl.c_flags &= ~(DR_NEWHEADER|DR_REDUMP|DR_TRUEINC);
628 }
629 
630 /*
631  * Fill appropriate buffer
632  */
633 void
634 taprec(uchar_t *dp, int flags, int size)
635 {
636 	if (size > tp_bsize) {
637 		msg(gettext(
638 		    "taprec: Unexpected buffer size, expected %d, got %d.\n"),
639 		    tp_bsize, size);
640 		dumpabort();
641 		/*NOTREACHED*/
642 	}
643 
644 	while ((*current)->b_flags & BUF_FULL)
645 		nap(10);
646 
647 	bcopy(dp, (*current)->b_data, (size_t)size);
648 	if (size < tp_bsize) {
649 		bzero((*current)->b_data + size, tp_bsize - size);
650 	}
651 
652 	if (dumptoarchive)
653 		flags |= BUF_ARCHIVE;
654 
655 	/* no locking as we assume only one reader and one writer active */
656 	(*current)->b_flags = (flags | BUF_FULL);
657 	if (++*current >= &bufp[NBUF*ntrec])
658 		(*current) = &bufp[0];
659 	(*tapea)++;
660 }
661 
662 void
663 dmpblk(daddr32_t blkno, size_t size, off_t offset)
664 {
665 	diskaddr_t dblkno;
666 
667 	assert((offset >> DEV_BSHIFT) <= INT32_MAX);
668 	dblkno = fsbtodb(sblock, blkno) + (offset >> DEV_BSHIFT);
669 	size = (size + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
670 	slp->sl_req->br_dblk = dblkno;
671 	slp->sl_req->br_size = size;
672 	if (dumptoarchive) {
673 		/* LINTED: result fits in a short */
674 		slp->sl_req->aflag |= BUF_ARCHIVE;
675 	}
676 	toslave((void(*)())0, ino);
677 }
678 
679 /*ARGSUSED*/
680 static void
681 tperror(int sig)
682 {
683 	char buf[3000];
684 
685 	if (pipeout) {
686 		msg(gettext("Write error on %s\n"), tape);
687 		msg(gettext("Cannot recover\n"));
688 		dumpabort();
689 		/* NOTREACHED */
690 	}
691 	if (!doingverify) {
692 		broadcast(gettext("WRITE ERROR!\n"));
693 		(void) snprintf(buf, sizeof (buf),
694 		    gettext("Do you want to restart?: (\"yes\" or \"no\") "));
695 		if (!query(buf)) {
696 			dumpabort();
697 			/*NOTREACHED*/
698 		}
699 		if (tapeout && (isrewind(to) || offline)) {
700 			/* ANSI string catenation, to shut cstyle up */
701 			msg(gettext("This tape will rewind.  After "
702 			    "it is rewound,\nreplace the faulty tape "
703 			    "with a new one;\nthis dump volume will "
704 			    "be rewritten.\n"));
705 		}
706 	} else {
707 		broadcast(gettext("TAPE VERIFICATION ERROR!\n"));
708 		(void) snprintf(buf, sizeof (buf), gettext(
709 		    "Do you want to rewrite?: (\"yes\" or \"no\") "));
710 		if (!query(buf)) {
711 			dumpabort();
712 			/*NOTREACHED*/
713 		}
714 		msg(gettext(
715 		    "This tape will be rewritten and then verified\n"));
716 	}
717 	killall();
718 	trewind();
719 	Exit(X_REWRITE);
720 }
721 
722 /*
723  * Called by master from pass() to send a request to dump files/blocks
724  * to one of the slaves.  Slaves return whether the file was active
725  * when it was being dumped.  The tape writer process sends checkpoint
726  * info when it completes a volume.
727  */
728 void
729 toslave(void (*fn)(), ino_t inumber)
730 {
731 	int	wasactive;
732 
733 	if (recsout >= SLAVES) {
734 		if ((unsigned)atomic((int(*)())read, slp->sl_slavefd,
735 		    (char *)&wasactive, sizeof (wasactive)) !=
736 		    sizeof (wasactive)) {
737 			cmdrderr();
738 			dumpabort();
739 			/*NOTREACHED*/
740 		}
741 		if (wasactive) {
742 			active++;
743 			msg(gettext(
744 			    "The file at inode `%lu' was active and will "
745 			    "be recopied\n"),
746 			    slp->sl_req->ir_inumber);
747 			/* LINTED: 32-bit to 8-bit assignment ok */
748 			BIS(slp->sl_req->ir_inumber, activemap);
749 		}
750 	}
751 	slp->sl_req->aflag = 0;
752 	if (dumptoarchive) {
753 		/* LINTED: result fits in a short */
754 		slp->sl_req->aflag |= BUF_ARCHIVE;
755 	}
756 	if (fn)
757 		(*fn)(inumber);
758 
759 	if (atomic((int(*)())write, slp->sl_slavefd, (char *)slp->sl_req,
760 	    reqsiz) != reqsiz) {
761 		cmdwrterr();
762 		dumpabort();
763 		/*NOTREACHED*/
764 	}
765 	++recsout;
766 	nextslave();
767 }
768 
769 void
770 dospcl(ino_t inumber)
771 {
772 	/* LINTED for now, max inode # is 2**31 (ufs max size is 1TB) */
773 	spcl.c_inumber = (ino32_t)inumber;
774 	slp->sl_req->br_dblk = 0;
775 	bcopy((char *)&spcl, (char *)slp->sl_req->br_spcl, tp_bsize);
776 }
777 
778 static void
779 nextslave(void)
780 {
781 	if (++rotor >= SLAVES) {
782 		rotor = 0;
783 	}
784 	slp = &slaves[rotor];
785 }
786 
787 void
788 flushcmds(void)
789 {
790 	int i;
791 	int wasactive;
792 
793 	/*
794 	 * Retrieve all slave status
795 	 */
796 	if (recsout < SLAVES) {
797 		slp = slaves;
798 		rotor = 0;
799 	}
800 	for (i = 0; i < (recsout < SLAVES ? recsout : SLAVES); i++) {
801 		if ((unsigned)atomic((int(*)())read, slp->sl_slavefd,
802 		    (char *)&wasactive, sizeof (wasactive)) !=
803 		    sizeof (wasactive)) {
804 			cmdrderr();
805 			dumpabort();
806 			/*NOTREACHED*/
807 		}
808 		if (wasactive) {
809 			active++;
810 			msg(gettext(
811 			    "inode %d was active and will be recopied\n"),
812 			    slp->sl_req->ir_inumber);
813 			/* LINTED: 32-bit to 8-bit assignment ok */
814 			BIS(slp->sl_req->ir_inumber, activemap);
815 		}
816 		nextslave();
817 	}
818 }
819 
820 void
821 flusht(void)
822 {
823 	sigset_t block_set, oset;	/* hold SIGUSR1 and atomically sleep */
824 
825 	(void) sigemptyset(&block_set);
826 	(void) sigaddset(&block_set, SIGUSR1);
827 	(void) sigprocmask(SIG_BLOCK, &block_set, &oset);
828 	(void) kill(writepid, SIGUSR1);	/* tell writer to flush */
829 	(void) sigpause(SIGUSR1);	/* wait for SIGUSR1 from writer */
830 	/*NOTREACHED*/
831 }
832 
833 jmp_buf	checkpoint_buf;
834 
835 /*
836  * Roll forward to the next volume after receiving
837  * an EOT signal from writer.  Get checkpoint data
838  * from writer and return if done, otherwise fork
839  * a new process and jump back to main state loop
840  * to begin the next volume.  Installed as the master's
841  * signal handler for SIGUSR1.
842  */
843 /*ARGSUSED*/
844 static void
845 rollforward(int sig)
846 {
847 	int status;
848 	(void) sighold(SIGUSR1);
849 
850 	/*
851 	 * Writer sends us checkpoint information after
852 	 * each volume.  A returned state of DS_DONE with no
853 	 * unwritten (left-over) records differentiates a
854 	 * clean flush from one in which EOT was encountered.
855 	 */
856 	if ((unsigned)atomic((int(*)())read, writer, (char *)&chkpt,
857 	    sizeof (struct slaves)) != sizeof (struct slaves)) {
858 		cmdrderr();
859 		dumpabort();
860 		/*NOTREACHED*/
861 	}
862 	if (atomic((int(*)())read, writer, (char *)&spcl,
863 	    TP_BSIZE_MIN) != TP_BSIZE_MIN) {
864 		cmdrderr();
865 		dumpabort();
866 		/*NOTREACHED*/
867 	}
868 	ino = chkpt.sl_inos - 1;
869 	pos = chkpt.sl_offset;
870 	leftover = chkpt.sl_count;
871 	dumpstate = chkpt.sl_state;
872 	blockswritten = ++chkpt.sl_tapea;
873 
874 	if (dumpstate == DS_DONE) {
875 		if (archivepid) {
876 			/*
877 			 * If archiving (either archive or
878 			 * database), signal the archiver
879 			 * to finish up.  This must happen
880 			 * before the writer exits in order
881 			 * to avoid a race.
882 			 */
883 			(void) kill(archivepid, SIGUSR1);
884 		}
885 		(void) signal(SIGUSR1, SIG_IGN);
886 		(void) sigrelse(SIGUSR1);
887 		(void) kill(writepid, SIGUSR1);	/* tell writer to exit */
888 
889 		lf_archoffset = 0LL;
890 		longjmp(checkpoint_buf, 1);
891 		/*NOTREACHED*/
892 	}
893 
894 	if (leftover) {
895 		(void) memmove(spcl.c_addr,
896 		    &spcl.c_addr[spcl.c_count-leftover], leftover);
897 		bzero(&spcl.c_addr[leftover], TP_NINDIR-leftover);
898 	}
899 	if (writepid) {
900 		(void) kill(writepid, SIGUSR1);	/* tell writer to exit */
901 		(void) close(writer);
902 		writer = -1;
903 	}
904 	if (archivepid) {
905 		(void) waitpid(archivepid, &status, 0);	/* wait for archiver */
906 #ifdef TDEBUG
907 
908 		/* XGETTEXT:  #ifdef TDEBUG only */
909 		msg(gettext("Archiver %ld returns with status %d\n"),
910 		    (long)archivepid, status);
911 #endif
912 		archivepid = 0;
913 	}
914 	/*
915 	 * Checkpoint archive file
916 	 */
917 	if (!doingverify && archive) {
918 		lf_archoffset = lseek64(archivefd, (off64_t)0, 2);
919 		if (lf_archoffset < 0) {
920 			int saverr = errno;
921 			msg(gettext("Cannot position archive file `%s': %s\n"),
922 			    archivefile, strerror(saverr));
923 			dumpabort();
924 			/*NOTREACHED*/
925 		}
926 		(void) close(archivefd);
927 		archivefd = -1;
928 	}
929 	resetino(ino);
930 
931 	if (dumpstate == DS_START) {
932 		msg(gettext(
933 		    "Tape too short: changing volumes and restarting\n"));
934 		reset();
935 	}
936 
937 	if (!pipeout) {
938 		if (verify && !doingverify)
939 			trewind();
940 		else {
941 			close_rewind();
942 			changevol();
943 		}
944 	}
945 
946 	(void) sigrelse(SIGUSR1);
947 	otape(0);
948 	longjmp(checkpoint_buf, 1);
949 	/*NOTREACHED*/
950 }
951 
952 static void
953 nap(int ms)
954 {
955 	struct timeval tv;
956 
957 	tv.tv_sec = ms / 1000;
958 	tv.tv_usec = (ms - tv.tv_sec * 1000) * 1000;
959 	(void) select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv);
960 }
961 
962 static jmp_buf alrm_buf;
963 
964 /*ARGSUSED*/
965 static void
966 alrm(int sig)
967 {
968 	longjmp(alrm_buf, 1);
969 	/*NOTREACHED*/
970 }
971 
972 void
973 nextdevice(void)
974 {
975 	char	*cp;
976 
977 	if (host != NULL)	/* we set the host only once in ufsdump */
978 		return;
979 
980 	host = NULL;
981 	if (strchr(tape, ':')) {
982 		if (diskette) {
983 			msg(gettext("Cannot do remote dump to diskette\n"));
984 			Exit(X_ABORT);
985 		}
986 		host = tape;
987 		tape = strchr(host, ':');
988 		*tape++ = 0;
989 		cp = strchr(host, '@');	/* user@host? */
990 		if (cp != (char *)0)
991 			cp++;
992 		else
993 			cp = host;
994 	} else
995 		cp = spcl.c_host;
996 	/*
997 	 * dumpdev is provided for use in prompts and is of
998 	 * the form:
999 	 *	hostname:device
1000 	 * sdumpdev is of the form:
1001 	 *	hostname:device
1002 	 * for remote devices, and simply:
1003 	 *	device
1004 	 * for local devices.
1005 	 */
1006 	if (dumpdev != (char *)NULL) {
1007 		/* LINTED: dumpdev is not NULL */
1008 		free(dumpdev);
1009 	}
1010 	/*LINTED [cast to smaller integer]*/
1011 	dumpdev = xmalloc((size_t)((sizeof (spcl.c_host) + strlen(tape) + 2)));
1012 	/* LINTED unsigned -> signed cast ok */
1013 	(void) sprintf(dumpdev, "%.*s:%s", (int)sizeof (spcl.c_host), cp, tape);
1014 	if (cp == spcl.c_host)
1015 		sdumpdev = strchr(dumpdev, ':') + 1;
1016 	else
1017 		sdumpdev = dumpdev;
1018 }
1019 
1020 /*
1021  * Gross hack due to misfeature of mt tape driver that causes
1022  * the device to rewind if we generate any signals.  Guess
1023  * whether tape is rewind device or not -- for local devices
1024  * we can just look at the minor number.  For rmt devices,
1025  * make an educated guess.
1026  */
1027 int
1028 isrewind(int f)
1029 {
1030 	struct stat64 sbuf;
1031 	char    *c;
1032 	int	unit;
1033 	int	rewind;
1034 
1035 	if (host) {
1036 		c = strrchr(tape, '/');
1037 		if (c == NULL)
1038 			c = tape;
1039 		else
1040 			c++;
1041 		/*
1042 		 * If the last component begins or ends with an 'n', it is
1043 		 * assumed to be a non-rewind device.
1044 		 */
1045 		if (c[0] == 'n' || c[strlen(c)-1] == 'n')
1046 			rewind = 0;
1047 		else if ((strstr(tape, "mt") || strstr(tape, "st")) &&
1048 		    sscanf(tape, "%*[a-zA-Z/]%d", &unit) == 1 &&
1049 		    (unit & MT_NOREWIND))
1050 			rewind = 0;
1051 		else
1052 			rewind = 1;
1053 	} else {
1054 		if (fstat64(f, &sbuf) < 0) {
1055 			msg(gettext(
1056 			    "Cannot obtain status of output device `%s'\n"),
1057 			    tape);
1058 			dumpabort();
1059 			/*NOTREACHED*/
1060 		}
1061 		rewind = minor(sbuf.st_rdev) & MT_NOREWIND ? 0 : 1;
1062 	}
1063 	return (rewind);
1064 }
1065 
1066 static void
1067 just_rewind(void)
1068 {
1069 	struct slaves *slavep;
1070 	char *rewinding = gettext("Tape rewinding\n");
1071 
1072 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
1073 		if (slavep->sl_slavepid > 0)	/* signal normal exit */
1074 			(void) kill(slavep->sl_slavepid, SIGTERM);
1075 		if (slavep->sl_slavefd >= 0) {
1076 			(void) close(slavep->sl_slavefd);
1077 			slavep->sl_slavefd = -1;
1078 		}
1079 	}
1080 
1081 	/* wait for any signals from slaves */
1082 	while (waitpid(0, (int *)0, 0) >= 0)
1083 		/*LINTED [empty body]*/
1084 		continue;
1085 
1086 	if (pipeout)
1087 		return;
1088 
1089 	if (doingverify) {
1090 		/*
1091 		 * Space to the end of the tape.
1092 		 * Backup first in case we already read the EOF.
1093 		 */
1094 		if (host) {
1095 			(void) rmtioctl(MTBSR, 1);
1096 			if (rmtioctl(MTEOM, 1) < 0)
1097 				(void) rmtioctl(MTFSF, 1);
1098 		} else {
1099 			static struct mtop bsr = { MTBSR, 1 };
1100 			static struct mtop eom = { MTEOM, 1 };
1101 			static struct mtop fsf = { MTFSF, 1 };
1102 
1103 			(void) ioctl(to, MTIOCTOP, &bsr);
1104 			if (ioctl(to, MTIOCTOP, &eom) < 0)
1105 				(void) ioctl(to, MTIOCTOP, &fsf);
1106 		}
1107 	}
1108 
1109 	/*
1110 	 * Guess whether the tape is rewinding so we can tell
1111 	 * the operator if it's going to take a long time.
1112 	 */
1113 	if (tapeout && isrewind(to)) {
1114 		/* tape is probably rewinding */
1115 		msg(rewinding);
1116 	}
1117 }
1118 
1119 void
1120 trewind(void)
1121 {
1122 	(void) timeclock((time_t)0);
1123 	if (offline && (!verify || doingverify)) {
1124 		close_rewind();
1125 	} else {
1126 		just_rewind();
1127 		if (host)
1128 			rmtclose();
1129 		else {
1130 			(void) close(to);
1131 			to = -1;
1132 		}
1133 	}
1134 }
1135 
1136 void
1137 close_rewind(void)
1138 {
1139 	char *rewinding = gettext("Tape rewinding\n");
1140 
1141 	(void) timeclock((time_t)0);
1142 	just_rewind();
1143 	/*
1144 	 * The check in just_rewind won't catch the case in
1145 	 * which the current volume is being taken off-line
1146 	 * and is not mounted on a no-rewind device (and is
1147 	 * not the last volume, which is not taken off-line).
1148 	 */
1149 	if (tapeout && !isrewind(to) && offline) {
1150 		/* tape is probably rewinding */
1151 		msg(rewinding);
1152 	}
1153 	if (host) {
1154 		if (offline || autoload)
1155 			(void) rmtioctl(MTOFFL, 0);
1156 		rmtclose();
1157 	} else {
1158 		if (offline || autoload) {
1159 			static struct mtop offl = { MTOFFL, 0 };
1160 
1161 			(void) ioctl(to, MTIOCTOP, &offl);
1162 			if (diskette)
1163 				(void) ioctl(to, FDEJECT, 0);
1164 		}
1165 		(void) close(to);
1166 		to = -1;
1167 	}
1168 }
1169 
1170 void
1171 changevol(void)
1172 {
1173 	char buf1[3000], buf2[3000];
1174 	char volname[LBLSIZE+1];
1175 
1176 	/*CONSTANTCONDITION*/
1177 	assert(sizeof (spcl.c_label) < sizeof (volname));
1178 
1179 	filenum = 1;
1180 	nextdevice();
1181 	(void) strcpy(spcl.c_label, tlabel);
1182 	if (host) {
1183 		char	*rhost = host;
1184 		char	*cp = strchr(host, '@');
1185 		if (cp == (char *)0)
1186 			cp = host;
1187 		else
1188 			cp++;
1189 
1190 		if (rmthost(rhost, ntrec) == 0) {
1191 			msg(gettext("Cannot connect to tape host `%s'\n"), cp);
1192 			dumpabort();
1193 			/*NOTREACHED*/
1194 		}
1195 		if (rhost != host)
1196 			free(rhost);
1197 	}
1198 
1199 	/*
1200 	 * Make volume switching as automatic as possible
1201 	 * while avoiding overwriting volumes.  We will
1202 	 * switch automatically under the following condition:
1203 	 *    1) The user specified autoloading from the
1204 	 *	command line.
1205 	 * At one time, we (in the guise of hsmdump) had the
1206 	 * concept of a sequence of devices to rotate through,
1207 	 * but that's never been a ufsdump feature.
1208 	 */
1209 	if (autoload) {
1210 		int tries;
1211 
1212 		/*
1213 		 * Stop the clock for throughput calculations.
1214 		 */
1215 		if ((telapsed != NULL) && (tstart_writing != NULL)) {
1216 			*telapsed += time((time_t *)NULL) - *tstart_writing;
1217 		}
1218 
1219 		(void) snprintf(volname, sizeof (volname), "#%d", tapeno+1);
1220 		(void) snprintf(buf1, sizeof (buf1), gettext(
1221 		    "Mounting volume %s on %s\n"), volname, dumpdev);
1222 		msg(buf1);
1223 		broadcast(buf1);
1224 
1225 		/*
1226 		 * Wait for the tape to autoload.  Note that the delay
1227 		 * period doesn't take into account however long it takes
1228 		 * for the open to fail (measured at 21 seconds for an
1229 		 * Exabyte 8200 under 2.7 on an Ultra 2).
1230 		 */
1231 		for (tries = 0; tries < autoload_tries; tries++) {
1232 			if (host) {
1233 				if (rmtopen(tape, O_RDONLY) >= 0) {
1234 					rmtclose();
1235 					return;
1236 				}
1237 			} else {
1238 				int f, m;
1239 
1240 				m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
1241 				if ((f = doingverify ?
1242 				    safe_device_open(tape, O_RDONLY, 0600) :
1243 				    safe_device_open(tape, O_RDONLY|m, 0600))
1244 				    >= 0) {
1245 					(void) close(f);
1246 					return;
1247 				}
1248 			}
1249 			(void) sleep(autoload_period);
1250 		}
1251 		/*
1252 		 * Autoload timed out, ask the operator to do it.
1253 		 * Note that query() will update *telapsed, and we
1254 		 * shouldn't charge for the autoload time.  So, since
1255 		 * we updated *telapsed ourselves above, we just set
1256 		 * tstart_writing to the current time, and query()
1257 		 * will end up making a null-effect change.  This,
1258 		 * of course, assumes that our caller will be resetting
1259 		 * *tstart_writing.  This is currently the case.
1260 		 * If tstart_writing is NULL (should never happen),
1261 		 * we're ok, since time(2) will accept a NULL pointer.
1262 		 */
1263 		(void) time(tstart_writing);
1264 	}
1265 
1266 	if (strncmp(spcl.c_label, "none", 5)) {
1267 		(void) strncpy(volname, spcl.c_label, sizeof (spcl.c_label));
1268 		volname[sizeof (spcl.c_label)] = '\0';
1269 	} else
1270 		(void) snprintf(volname, sizeof (volname), "#%d", tapeno+1);
1271 
1272 	timeest(1, spcl.c_tapea);
1273 	(void) snprintf(buf1, sizeof (buf1), gettext(
1274 	    "Change Volumes: Mount volume `%s' on `%s'\n"), volname, dumpdev);
1275 	msg(buf1);
1276 	broadcast(gettext("CHANGE VOLUMES!\7\7\n"));
1277 	(void) snprintf(buf1, sizeof (buf1), gettext(
1278 	    "Is the new volume (%s) mounted on `%s' and ready to go?: %s"),
1279 	    volname, dumpdev, gettext("(\"yes\" or \"no\") "));
1280 	while (!query(buf1)) {
1281 		(void) snprintf(buf2, sizeof (buf2), gettext(
1282 		    "Do you want to abort dump?: (\"yes\" or \"no\") "));
1283 		if (query(buf2)) {
1284 			dumpabort();
1285 			/*NOTREACHED*/
1286 		}
1287 	}
1288 }
1289 
1290 /*
1291  *	We implement taking and restoring checkpoints on the tape level.
1292  *	When each tape is opened, a new process is created by forking; this
1293  *	saves all of the necessary context in the parent.  The child
1294  *	continues the dump; the parent waits around, saving the context.
1295  *	If the child returns X_REWRITE, then it had problems writing that tape;
1296  *	this causes the parent to fork again, duplicating the context, and
1297  *	everything continues as if nothing had happened.
1298  */
1299 
1300 void
1301 otape(int top)
1302 {
1303 	static struct mtget mt;
1304 	char buf[3000];
1305 	pid_t parentpid;
1306 	pid_t childpid;
1307 	pid_t waitproc;
1308 	int status;
1309 	struct sigvec sv, osv;
1310 
1311 	sv.sv_flags = SA_RESTART;
1312 	(void) sigemptyset(&sv.sa_mask);
1313 	sv.sv_handler = SIG_IGN;
1314 	(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1315 
1316 	parentpid = getpid();
1317 
1318 	if (verify) {
1319 		if (doingverify)
1320 			doingverify = 0;
1321 		else
1322 			Exit(X_VERIFY);
1323 	}
1324 restore_check_point:
1325 
1326 	sv.sv_handler = interrupt;
1327 	(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1328 	(void) fflush(stderr);
1329 	/*
1330 	 *	All signals are inherited...
1331 	 */
1332 	sighold(SIGINT);
1333 	childpid = fork();
1334 	if (childpid < 0) {
1335 		msg(gettext(
1336 		    "Context-saving fork failed in parent %ld\n"),
1337 		    (long)parentpid);
1338 		Exit(X_ABORT);
1339 	}
1340 	if (childpid != 0) {
1341 		/*
1342 		 *	PARENT:
1343 		 *	save the context by waiting
1344 		 *	until the child doing all of the work returns.
1345 		 *	let the child catch user interrupts
1346 		 */
1347 		sv.sv_handler = SIG_IGN;
1348 		(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1349 		sigrelse(SIGINT);
1350 #ifdef TDEBUG
1351 
1352 		/* XGETTEXT:  #ifdef TDEBUG only */
1353 		msg(gettext(
1354 		    "Volume: %d; parent process: %ld child process %ld\n"),
1355 		    tapeno+1, (long)parentpid, (long)childpid);
1356 #endif /* TDEBUG */
1357 		for (;;) {
1358 			waitproc = waitpid(0, &status, 0);
1359 			if (waitproc == childpid)
1360 				break;
1361 			msg(gettext(
1362 	"Parent %ld waiting for child %ld had another child %ld return\n"),
1363 			    (long)parentpid, (long)childpid, (long)waitproc);
1364 		}
1365 		if (WIFSIGNALED(status)) {
1366 			msg(gettext("Process %ld killed by signal %d: %s\n"),
1367 			    (long)childpid, WTERMSIG(status),
1368 			    strsignal(WTERMSIG(status)));
1369 			status = X_ABORT;
1370 		} else
1371 			status = WEXITSTATUS(status);
1372 #ifdef TDEBUG
1373 		switch (status) {
1374 		case X_FINOK:
1375 			/* XGETTEXT:  #ifdef TDEBUG only */
1376 			msg(gettext(
1377 			    "Child %ld finishes X_FINOK\n"), (long)childpid);
1378 			break;
1379 		case X_ABORT:
1380 			/* XGETTEXT:  #ifdef TDEBUG only */
1381 			msg(gettext(
1382 			    "Child %ld finishes X_ABORT\n"), (long)childpid);
1383 			break;
1384 		case X_REWRITE:
1385 			/* XGETTEXT:  #ifdef TDEBUG only */
1386 			msg(gettext(
1387 			    "Child %ld finishes X_REWRITE\n"), (long)childpid);
1388 			break;
1389 		case X_RESTART:
1390 			/* XGETTEXT:  #ifdef TDEBUG only */
1391 			msg(gettext(
1392 			    "Child %ld finishes X_RESTART\n"), (long)childpid);
1393 			break;
1394 		case X_VERIFY:
1395 			/* XGETTEXT:  #ifdef TDEBUG only */
1396 			msg(gettext(
1397 			    "Child %ld finishes X_VERIFY\n"), (long)childpid);
1398 			break;
1399 		default:
1400 			/* XGETTEXT:  #ifdef TDEBUG only */
1401 			msg(gettext("Child %ld finishes unknown %d\n"),
1402 			    (long)childpid, status);
1403 			break;
1404 		}
1405 #endif /* TDEBUG */
1406 		switch (status) {
1407 		case X_FINOK:
1408 			/* wait for children */
1409 			while (waitpid(0, (int *)0, 0) >= 0)
1410 				/*LINTED [empty body]*/
1411 				continue;
1412 			Exit(X_FINOK);
1413 			/*NOTREACHED*/
1414 		case X_ABORT:
1415 			Exit(X_ABORT);
1416 			/*NOTREACHED*/
1417 		case X_VERIFY:
1418 			doingverify++;
1419 			goto restore_check_point;
1420 			/*NOTREACHED*/
1421 		case X_REWRITE:
1422 			doingverify = 0;
1423 			changevol();
1424 			goto restore_check_point;
1425 			/* NOTREACHED */
1426 		case X_RESTART:
1427 			doingverify = 0;
1428 			if (!top) {
1429 				Exit(X_RESTART);
1430 			}
1431 			if (!offline)
1432 				autoload = 0;
1433 			changevol();
1434 			sv.sv_handler = interrupt;
1435 			(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1436 			return;
1437 			/* NOTREACHED */
1438 		default:
1439 			msg(gettext("Bad return code from dump: %d\n"), status);
1440 			Exit(X_ABORT);
1441 			/*NOTREACHED*/
1442 		}
1443 		/*NOTREACHED*/
1444 	} else {	/* we are the child; just continue */
1445 		child_chdir();
1446 		sigrelse(SIGINT);
1447 #ifdef TDEBUG
1448 		(void) sleep(4); /* time for parent's message to get out */
1449 		/* XGETTEXT:  #ifdef TDEBUG only */
1450 		msg(gettext(
1451 		    "Child on Volume %d has parent %ld, my pid = %ld\n"),
1452 		    tapeno+1, (long)parentpid, (long)getpid());
1453 #endif
1454 		(void) snprintf(buf, sizeof (buf), gettext(
1455 "Cannot open `%s'.  Do you want to retry the open?: (\"yes\" or \"no\") "),
1456 		    dumpdev);
1457 		if (doingverify) {
1458 			/* 1 for stdout */
1459 			while ((to = host ? rmtopen(tape, O_RDONLY) :
1460 			    pipeout ? 1 :
1461 			    safe_device_open(tape, O_RDONLY, 0600)) < 0) {
1462 				perror(tape);
1463 				if (autoload) {
1464 					if (!query_once(buf, 1)) {
1465 						dumpabort();
1466 						/*NOTREACHED*/
1467 					}
1468 				} else {
1469 					if (!query(buf)) {
1470 						dumpabort();
1471 						/*NOTREACHED*/
1472 					}
1473 				}
1474 			}
1475 
1476 			/*
1477 			 * If we're using the non-rewinding tape device,
1478 			 * the tape will be left positioned after the
1479 			 * EOF mark.  We need to back up to the beginning
1480 			 * of this tape file (cross two tape marks in the
1481 			 * reverse direction and one in the forward
1482 			 * direction) before the verify pass.
1483 			 */
1484 			if (host) {
1485 				if (rmtioctl(MTBSF, 2) >= 0)
1486 					(void) rmtioctl(MTFSF, 1);
1487 				else
1488 					(void) rmtioctl(MTNBSF, 1);
1489 			} else {
1490 				static struct mtop bsf = { MTBSF, 2 };
1491 				static struct mtop fsf = { MTFSF, 1 };
1492 				static struct mtop nbsf = { MTNBSF, 1 };
1493 
1494 				if (ioctl(to, MTIOCTOP, &bsf) >= 0)
1495 					(void) ioctl(to, MTIOCTOP, &fsf);
1496 				else
1497 					(void) ioctl(to, MTIOCTOP, &nbsf);
1498 			}
1499 		} else {
1500 			/*
1501 			 * XXX Add logic to test for "tape" being a
1502 			 * XXX device or a non-existent file.
1503 			 * Current behaviour is that it must exist,
1504 			 * and we over-write whatever's there.
1505 			 * This can be bad if tape == "/etc/passwd".
1506 			 */
1507 			if (!pipeout && doposition && (tapeno == 0)) {
1508 				positiontape(buf);
1509 				if (setjmp(alrm_buf)) {
1510 					/*
1511 					 * The tape is rewinding;
1512 					 * we're screwed.
1513 					 */
1514 					msg(gettext(
1515 					    "Cannot position tape using "
1516 					    "rewind device!\n"));
1517 					dumpabort();
1518 					/*NOTREACHED*/
1519 				} else {
1520 					sv.sv_handler = alrm;
1521 					(void) sigvec(SIGALRM, &sv, &osv);
1522 					(void) alarm(15);
1523 				}
1524 				while ((to = host ? rmtopen(tape, O_WRONLY) :
1525 				    safe_device_open(tape, O_WRONLY, 0600)) < 0)
1526 					(void) sleep(10);
1527 				(void) alarm(0);
1528 				(void) sigvec(SIGALRM, &osv,
1529 				    (struct sigvec *)0);
1530 			} else {
1531 				int m;
1532 				m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
1533 				/*
1534 				 * Only verify the tape label if label
1535 				 * verification is on and we are at BOT
1536 				 */
1537 				if (pipeout)
1538 					to = 1;
1539 				else while ((to = host ?
1540 				    rmtopen(tape, O_WRONLY) :
1541 				    safe_device_open(tape, O_WRONLY|m, 0600))
1542 				    < 0)
1543 					if (!query_once(buf, 1)) {
1544 						dumpabort();
1545 						/*NOTREACHED*/
1546 					}
1547 			}
1548 		}
1549 		if (!pipeout) {
1550 			tapeout = host ? rmtstatus(&mt) >= 0 :
1551 			    ioctl(to, MTIOCGET, &mt) >= 0;	/* set state */
1552 			/*
1553 			 * Make sure the tape is positioned
1554 			 * where it is supposed to be
1555 			 */
1556 			if (tapeout && (tapeno > 0) &&
1557 			    (mt.mt_fileno != (filenum-1))) {
1558 				(void) snprintf(buf, sizeof (buf), gettext(
1559 				    "Warning - tape positioning error!\n\
1560 \t%s current file %ld, should be %ld\n"),
1561 				    tape, mt.mt_fileno+1, filenum);
1562 				msg(buf);
1563 				dumpailing();
1564 			}
1565 		}
1566 		tapeno++;		/* current tape sequence */
1567 		if (tapeno < TP_NINOS)
1568 			inos[tapeno] = chkpt.sl_inos;
1569 		spcl.c_firstrec = chkpt.sl_firstrec;
1570 		spcl.c_tapea = (*tapea) = chkpt.sl_tapea;
1571 		spcl.c_volume++;
1572 
1573 		enslave();	/* Share tape buffers with slaves */
1574 
1575 #ifdef DEBUG
1576 		if (xflag) {
1577 			/* XGETTEXT:  #ifdef DEBUG only */
1578 			msg(gettext("Checkpoint state:\n"));
1579 			msg("    blockswritten %u\n", blockswritten);
1580 			msg("    ino %u\n", ino);
1581 			msg("    pos %u\n", pos);
1582 			msg("    left %u\n", leftover);
1583 			msg("    tapea %u\n", (*tapea));
1584 			msg("    state %d\n", dumpstate);
1585 		}
1586 #endif
1587 		spcl.c_type = TS_TAPE;
1588 		spcl.c_tpbsize = tp_bsize;
1589 		if (leftover == 0) {
1590 			spcl.c_count = 0;
1591 			spclrec();
1592 			newtape = 0;
1593 		} else
1594 			newtape++;	/* new volume indication */
1595 		if (doingverify) {
1596 			msg(gettext("Starting verify pass\n"));
1597 		} else if (tapeno > 1) {
1598 			msg(gettext(
1599 			    "Volume %d begins with blocks from inode %lu\n"),
1600 			    tapeno, chkpt.sl_inos);
1601 		}
1602 		(void) timeclock((time_t)1);
1603 		(void) time(tstart_writing);
1604 		timeest(0, spcl.c_tapea);
1605 	}
1606 }
1607 
1608 void
1609 dumpabort(void)
1610 {
1611 
1612 	if (master && master != getpid())
1613 		/*
1614 		 * signal master to call dumpabort
1615 		 */
1616 		(void) kill(master, SIGTERM);
1617 	else {
1618 		killall();
1619 
1620 		if (archivefile && archive_opened)
1621 			(void) unlink(archivefile);
1622 		msg(gettext("The ENTIRE dump is aborted.\n"));
1623 	}
1624 	Exit(X_ABORT);
1625 }
1626 
1627 void
1628 dumpailing(void)
1629 {
1630 
1631 	broadcast(gettext("DUMP IS AILING!\n"));
1632 	if (!query(gettext(
1633 	    "Do you want to attempt to continue? (\"yes\" or \"no\") "))) {
1634 		dumpabort();
1635 		/*NOTREACHED*/
1636 	}
1637 }
1638 
1639 void
1640 Exit(status)
1641 {
1642 	/*
1643 	 * Clean up message system
1644 	 */
1645 #ifdef TDEBUG
1646 
1647 	/* XGETTEXT:  #ifdef TDEBUG only */
1648 	msg(gettext("pid = %ld exits with status %d\n"),
1649 	    (long)getpid(), status);
1650 #endif /* TDEBUG */
1651 	exit(status);
1652 }
1653 
1654 static void
1655 killall(void)
1656 {
1657 	struct slaves *slavep;
1658 
1659 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++)
1660 		if (slavep->sl_slavepid > 0) {
1661 			(void) kill(slavep->sl_slavepid, SIGKILL);
1662 #ifdef TDEBUG
1663 
1664 			/* XGETTEXT:  #ifdef TDEBUG only */
1665 			msg(gettext("Slave child %ld killed\n"),
1666 			    (long)slavep->sl_slavepid);
1667 #endif
1668 		}
1669 	if (writepid) {
1670 		(void) kill(writepid, SIGKILL);
1671 #ifdef TDEBUG
1672 
1673 		/* XGETTEXT:  #ifdef TDEBUG only */
1674 		msg(gettext("Writer child %ld killed\n"), (long)writepid);
1675 #endif
1676 	}
1677 	if (archivepid) {
1678 		(void) kill(archivepid, SIGKILL);
1679 #ifdef TDEBUG
1680 
1681 		/* XGETTEXT:  #ifdef TDEBUG only */
1682 		msg(gettext("Archiver child %ld killed\n"), (long)archivepid);
1683 #endif
1684 	}
1685 }
1686 
1687 /*ARGSUSED*/
1688 static void
1689 proceed(int sig)
1690 {
1691 	caught++;
1692 }
1693 
1694 /*ARGSUSED*/
1695 static void
1696 die(int sig)
1697 {
1698 	Exit(X_FINOK);
1699 }
1700 
1701 static void
1702 enslave(void)
1703 {
1704 	int cmd[2];			/* file descriptors */
1705 	int i;
1706 	struct sigvec sv;
1707 	struct slaves *slavep;
1708 	int saverr;
1709 
1710 	sv.sv_flags = SA_RESTART;
1711 	(void) sigemptyset(&sv.sa_mask);
1712 	master = getpid();
1713 	/*
1714 	 * slave sends SIGTERM on dumpabort
1715 	 */
1716 	sv.sv_handler = (void(*)(int))dumpabort;
1717 	(void) sigvec(SIGTERM, &sv, (struct sigvec *)0);
1718 	sv.sv_handler = tperror;
1719 	(void) sigvec(SIGUSR2, &sv, (struct sigvec *)0);
1720 	sv.sv_handler = proceed;
1721 	(void) sigvec(SIGUSR1, &sv, (struct sigvec *)0);
1722 	totalrecsout += recsout;
1723 	caught = 0;
1724 	recsout = 0;
1725 	rotor = 0;
1726 	bufclear();
1727 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++)
1728 		slavep->sl_slavefd = -1;
1729 	archivefd = arch = writer = -1;
1730 	for (i = 0; i < SLAVES; i++) {
1731 		if (pipe(cmd) < 0) {
1732 			saverr = errno;
1733 			msg(gettext(
1734 			    "Cannot create pipe for slave process: %s\n"),
1735 			    strerror(saverr));
1736 			dumpabort();
1737 			/*NOTREACHED*/
1738 		}
1739 		sighold(SIGUSR2);
1740 		sighold(SIGINT);
1741 		sighold(SIGTERM);
1742 		if ((slaves[i].sl_slavepid = fork()) < 0) {
1743 			saverr = errno;
1744 			msg(gettext("Cannot create slave process: %s\n"),
1745 			    strerror(saverr));
1746 			dumpabort();
1747 			/*NOTREACHED*/
1748 		}
1749 		slaves[i].sl_slavefd = cmd[1];
1750 		if (slaves[i].sl_slavepid == 0) {   /* Slave starts up here */
1751 			pid_t next;		    /* pid of neighbor */
1752 
1753 			sv.sv_handler = SIG_DFL;
1754 			(void) sigvec(SIGUSR2, &sv, (struct sigvec *)0);
1755 			sv.sv_handler = SIG_IGN;	/* master handler INT */
1756 			(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1757 			sv.sv_handler = die;		/* normal slave exit */
1758 			(void) sigvec(SIGTERM, &sv, (struct sigvec *)0);
1759 
1760 			child_chdir();
1761 			sigrelse(SIGUSR2);
1762 			sigrelse(SIGINT);
1763 			sigrelse(SIGTERM);
1764 
1765 			freeino();	/* release unneeded resources */
1766 #ifdef TDEBUG
1767 		(void) sleep(4); /* time for parent's message to get out */
1768 		/* XGETTEXT:  #ifdef TDEBUG only */
1769 		msg(gettext("Neighbor has pid = %ld\n"), (long)getpid());
1770 #endif
1771 			/* Closes cmd[1] as a side-effect */
1772 			for (slavep = &slaves[0];
1773 			    slavep < &slaves[SLAVES];
1774 			    slavep++)
1775 				if (slavep->sl_slavefd >= 0) {
1776 					(void) close(slavep->sl_slavefd);
1777 					slavep->sl_slavefd = -1;
1778 				}
1779 			(void) close(to);
1780 			(void) close(fi);	    /* Need our own seek ptr */
1781 			to = -1;
1782 
1783 			fi = open(disk, O_RDONLY);
1784 
1785 			if (fi < 0) {
1786 				saverr = errno;
1787 				msg(gettext(
1788 				    "Cannot open dump device `%s': %s\n"),
1789 				    disk, strerror(saverr));
1790 				dumpabort();
1791 				/*NOTREACHED*/
1792 			}
1793 
1794 			if ((unsigned)atomic((int(*)())read, cmd[0],
1795 			    (char *)&next, sizeof (next)) != sizeof (next)) {
1796 				cmdrderr();
1797 				dumpabort();
1798 				/*NOTREACHED*/
1799 			}
1800 			dumpoffline(cmd[0], next, i);
1801 			Exit(X_FINOK);
1802 		}
1803 		/* Parent continues here */
1804 		sigrelse(SIGUSR2);
1805 		sigrelse(SIGINT);
1806 		sigrelse(SIGTERM);
1807 		(void) close(cmd[0]);
1808 	}
1809 
1810 	if (archive) {
1811 		archivepid = setuparchive();
1812 		if (!archivepid) {
1813 			dumpabort();
1814 			/*NOTREACHED*/
1815 		}
1816 	}
1817 
1818 	writepid = setupwriter();
1819 	if (!writepid) {
1820 		dumpabort();
1821 		/*NOTREACHED*/
1822 	}
1823 
1824 	if (arch >= 0) {
1825 		(void) close(arch);		/* only writer has this open */
1826 		arch = -1;
1827 	}
1828 
1829 	/* Tell each slave who follows it */
1830 	for (i = 0; i < SLAVES; i++) {
1831 		if ((unsigned)atomic((int(*)())write, slaves[i].sl_slavefd,
1832 		    (char *)&(slaves[(i + 1) % SLAVES].sl_slavepid),
1833 		    sizeof (int)) != sizeof (int)) {
1834 			cmdwrterr();
1835 			dumpabort();
1836 			/*NOTREACHED*/
1837 		}
1838 	}
1839 	sv.sv_handler = rollforward;		/* rcvd from writer on EOT */
1840 	(void) sigvec(SIGUSR1, &sv, (struct sigvec *)0);
1841 	slp = slaves;
1842 	(void) kill(slp->sl_slavepid, SIGUSR1);
1843 	master = 0;
1844 }
1845 
1846 static void
1847 wait_our_turn(void)
1848 {
1849 	(void) sighold(SIGUSR1);
1850 
1851 	if (!caught) {
1852 #ifdef INSTRUMENT
1853 		(*idle)++;
1854 #endif
1855 		(void) sigpause(SIGUSR1);
1856 	}
1857 	caught = 0;
1858 	(void) sigrelse(SIGUSR1);
1859 }
1860 
1861 static void
1862 dumpoffline(int cmd, pid_t next, int mynum)
1863 {
1864 	struct req *p = slaves[mynum].sl_req;
1865 	ulong_t i;
1866 	uchar_t *cp;
1867 	uchar_t *blkbuf;
1868 	int notactive = 0;
1869 
1870 	blkbuf = xmalloc(sblock->fs_bsize);
1871 
1872 	/*CONSTANTCONDITION*/
1873 	assert(sizeof (spcl) == TP_BSIZE_MIN);
1874 
1875 	while (atomic((int(*)())read, cmd, (char *)p, reqsiz) == reqsiz) {
1876 		if (p->br_dblk) {
1877 			bread(p->br_dblk, (uchar_t *)blkbuf, p->br_size);
1878 		} else {
1879 			bcopy((char *)p->br_spcl, (char *)&spcl,
1880 			    sizeof (spcl));
1881 			ino = spcl.c_inumber;
1882 		}
1883 		dumptoarchive = p->aflag & BUF_ARCHIVE;
1884 		wait_our_turn();
1885 		if (p->br_dblk) {
1886 			for (i = p->br_size, cp = blkbuf;
1887 			    i > 0;
1888 			    /* LINTED character pointers aren't signed */
1889 			    cp += i > tp_bsize ? tp_bsize : i,
1890 			    i -= i > tp_bsize ? tp_bsize : i) {
1891 				/* LINTED unsigned to signed conversion ok */
1892 				taprec(cp, 0, i > tp_bsize ? tp_bsize : (int)i);
1893 			}
1894 		} else
1895 			spclrec();
1896 		(void) kill(next, SIGUSR1);	/* Next slave's turn */
1897 		/*
1898 		 * Note that we lie about file activity since we don't
1899 		 * check for it.
1900 		 */
1901 		if ((unsigned)atomic((int(*)())write, cmd, (char *)&notactive,
1902 		    sizeof (notactive)) != sizeof (notactive)) {
1903 			cmdwrterr();
1904 			dumpabort();
1905 			/*NOTREACHED*/
1906 		}
1907 	}
1908 
1909 	free(blkbuf);
1910 }
1911 
1912 static int count;		/* tape blocks written since last spclrec */
1913 
1914 /*ARGSUSED*/
1915 static void
1916 onxfsz(int sig)
1917 {
1918 	msg(gettext("File size limit exceeded writing output volume %d\n"),
1919 	    tapeno);
1920 	(void) kill(master, SIGUSR2);
1921 	Exit(X_REWRITE);
1922 }
1923 
1924 static long	lastnonaddr;		/* last DS_{INODE,CLRI,BITS} written */
1925 static long	lastnonaddrm;		/* and the mode thereof */
1926 /*
1927  * dowrite -- the main body of the output writer process
1928  */
1929 static void
1930 dowrite(int cmd)
1931 {
1932 	struct bdesc *last =
1933 	    &bufp[(NBUF*ntrec)-1];		/* last buffer in pool */
1934 	struct bdesc *bp = bufp;		/* current buf in tape block */
1935 	struct bdesc *begin = bufp;		/* first buf of tape block */
1936 	struct bdesc *end = bufp + (ntrec-1);	/* last buf of tape block */
1937 	int siz;				/* bytes written (block) */
1938 	int trecs;				/* records written (block)  */
1939 	long asize = 0;				/* number of 0.1" units... */
1940 						/* ...written on current tape */
1941 	char *tp, *rbuf = NULL;
1942 	char *recmap = spcl.c_addr;		/* current tape record map */
1943 	char *endmp;				/* end of valid map data */
1944 	char *mp;				/* current map entry */
1945 	union u_spcl *sp;
1946 
1947 	(void) signal(SIGXFSZ, onxfsz);
1948 
1949 	bzero((char *)&spcl, sizeof (spcl));
1950 	count = 0;
1951 
1952 	if (doingverify) {
1953 		rbuf = (char *)malloc((uint_t)writesize);
1954 		if (rbuf == 0) {
1955 			/* Restart from checkpoint */
1956 			(void) kill(master, SIGUSR2);
1957 			Exit(X_REWRITE);
1958 		}
1959 	}
1960 
1961 	for (;;) {
1962 		/* START: wait until all buffers in tape block are full */
1963 		if ((bp->b_flags & BUF_FULL) == 0) {
1964 			if (caught) {		/* master signalled flush */
1965 				(void) sighold(SIGUSR1);
1966 				caught = 0;
1967 				/* signal ready */
1968 				(void) kill(master, SIGUSR1);
1969 				chkpt.sl_count = 0;	/* signal not at EOT */
1970 				checkpoint(bp-1, cmd);	/* send data */
1971 				(void) sigpause(SIGUSR1);
1972 				break;
1973 			}
1974 #ifdef INSTRUMENT
1975 			(*readmissp)++;
1976 #endif
1977 			nap(50);
1978 			continue;
1979 		}
1980 		if (bp < end) {
1981 			bp++;
1982 			continue;
1983 		}
1984 		/* END: wait until all buffers in tape block are full */
1985 
1986 		tp = begin->b_data;
1987 		(void) sighold(SIGUSR1);
1988 		if (host) {
1989 			if (!doingverify)
1990 				siz = rmtwrite(tp, writesize);
1991 			else if ((siz = rmtread(rbuf, writesize)) ==
1992 			    writesize && bcmp(rbuf, tp, writesize))
1993 				siz = -1;
1994 		} else {
1995 			if (!doingverify)
1996 				siz = write(to, tp, writesize);
1997 			else if ((siz = read(to, rbuf, writesize)) ==
1998 			    writesize && bcmp(rbuf, tp, writesize))
1999 				siz = -1;
2000 			if (siz < 0 && diskette && errno == ENOSPC)
2001 				siz = 0;	/* really EOF */
2002 		}
2003 		(void) sigrelse(SIGUSR1);
2004 		if (siz < 0 ||
2005 		    (pipeout && siz != writesize)) {
2006 			char buf[3000];
2007 
2008 			/*
2009 			 * Isn't i18n wonderful?
2010 			 */
2011 			if (doingverify) {
2012 				if (diskette)
2013 					(void) snprintf(buf, sizeof (buf),
2014 					    gettext(
2015 		    "Verification error %ld blocks into diskette %d\n"),
2016 					    asize * 2, tapeno);
2017 				else if (tapeout)
2018 					(void) snprintf(buf, sizeof (buf),
2019 					    gettext(
2020 		    "Verification error %ld feet into tape %d\n"),
2021 					    (cartridge ? asize/tracks :
2022 					    asize)/120L,
2023 					    tapeno);
2024 				else
2025 					(void) snprintf(buf, sizeof (buf),
2026 					    gettext(
2027 		    "Verification error %ld blocks into volume %d\n"),
2028 					    asize * 2, tapeno);
2029 
2030 			} else {
2031 				if (diskette)
2032 					(void) snprintf(buf, sizeof (buf),
2033 					    gettext(
2034 			"Write error %ld blocks into diskette %d\n"),
2035 					    asize * 2, tapeno);
2036 				else if (tapeout)
2037 					(void) snprintf(buf, sizeof (buf),
2038 					    gettext(
2039 			"Write error %ld feet into tape %d\n"),
2040 					    (cartridge ? asize/tracks :
2041 					    asize)/120L, tapeno);
2042 				else
2043 					(void) snprintf(buf, sizeof (buf),
2044 					    gettext(
2045 			"Write error %ld blocks into volume %d\n"),
2046 					    asize * 2, tapeno);
2047 			}
2048 
2049 			msg(buf);
2050 			/* Restart from checkpoint */
2051 #ifdef TDEBUG
2052 
2053 			/* XGETTEXT:  #ifdef TDEBUG only */
2054 			msg(gettext("sending SIGUSR2 to pid %ld\n"), master);
2055 #endif
2056 			(void) kill(master, SIGUSR2);
2057 			Exit(X_REWRITE);
2058 		}
2059 		trecs = siz / tp_bsize;
2060 		if (diskette)
2061 			asize += trecs;	/* asize == blocks written */
2062 		else
2063 			asize += (siz/density + tenthsperirg);
2064 		if (trecs)
2065 			chkpt.sl_firstrec++;
2066 		for (bp = begin; bp < begin + trecs; bp++) {
2067 			if ((arch >= 0) && (bp->b_flags & BUF_ARCHIVE)) {
2068 				if ((unsigned)atomic((int(*)())write, arch,
2069 				    (char *)&bp->b_flags, sizeof (bp->b_flags))
2070 				    != sizeof (bp->b_flags)) {
2071 					cmdwrterr();
2072 					dumpabort();
2073 					/*NOTREACHED*/
2074 				}
2075 				if (atomic((int(*)())write, arch, bp->b_data,
2076 				    tp_bsize) != tp_bsize) {
2077 					cmdwrterr();
2078 					dumpabort();
2079 					/*NOTREACHED*/
2080 				}
2081 			}
2082 			if (bp->b_flags & BUF_SPCLREC) {
2083 				/*LINTED [bp->b_data is aligned]*/
2084 				sp = (union u_spcl *)bp->b_data;
2085 				if (sp->s_spcl.c_type != TS_ADDR) {
2086 					lastnonaddr = sp->s_spcl.c_type;
2087 					lastnonaddrm =
2088 					    sp->s_spcl.c_dinode.di_mode;
2089 					if (sp->s_spcl.c_type != TS_TAPE)
2090 						chkpt.sl_offset = 0;
2091 				}
2092 				chkpt.sl_count = sp->s_spcl.c_count;
2093 				bcopy((char *)sp, (char *)&spcl, sizeof (spcl));
2094 				mp = recmap;
2095 				endmp = &recmap[spcl.c_count];
2096 				count = 0;
2097 			} else {
2098 				chkpt.sl_offset++;
2099 				chkpt.sl_count--;
2100 				count++;
2101 				mp++;
2102 			}
2103 			/*
2104 			 * Adjust for contiguous hole
2105 			 */
2106 			for (; mp < endmp; mp++) {
2107 				if (*mp)
2108 					break;
2109 				chkpt.sl_offset++;
2110 				chkpt.sl_count--;
2111 			}
2112 		}
2113 		/*
2114 		 * Check for end of tape
2115 		 */
2116 		if (trecs < ntrec ||
2117 		    (!pipeout && tsize > 0 && asize > tsize)) {
2118 			if (tapeout)
2119 				msg(gettext("End-of-tape detected\n"));
2120 			else
2121 				msg(gettext("End-of-file detected\n"));
2122 			(void) sighold(SIGUSR1);
2123 			caught = 0;
2124 			(void) kill(master, SIGUSR1);	/* signal EOT */
2125 			checkpoint(--bp, cmd);	/* send checkpoint data */
2126 			(void) sigpause(SIGUSR1);
2127 			break;
2128 		}
2129 		for (bp = begin; bp <= end; bp++)
2130 			bp->b_flags = BUF_EMPTY;
2131 		if (end + ntrec > last) {
2132 			bp = begin = bufp;
2133 			timeest(0, spcl.c_tapea);
2134 		} else
2135 			bp = begin = end+1;
2136 		end = begin + (ntrec-1);
2137 	}
2138 
2139 	if (rbuf != NULL)
2140 		free(rbuf);
2141 }
2142 
2143 /*
2144  * Send checkpoint info back to master.  This information
2145  * consists of the current inode number, number of logical
2146  * blocks written for that inode (or bitmap), the last logical
2147  * block number written, the number of logical blocks written
2148  * to this volume, the current dump state, and the current
2149  * special record map.
2150  */
2151 static void
2152 checkpoint(struct bdesc *bp, int cmd)
2153 {
2154 	int	state, type;
2155 	ino_t	ino;
2156 
2157 	if (++bp >= &bufp[NBUF*ntrec])
2158 		bp = bufp;
2159 
2160 	/*
2161 	 * If we are dumping files and the record following
2162 	 * the last written to tape is a special record, use
2163 	 * it to get an accurate indication of current state.
2164 	 */
2165 	if ((bp->b_flags & BUF_SPCLREC) && (bp->b_flags & BUF_FULL) &&
2166 	    lastnonaddr == TS_INODE) {
2167 		/*LINTED [bp->b_data is aligned]*/
2168 		union u_spcl *nextspcl = (union u_spcl *)bp->b_data;
2169 
2170 		if (nextspcl->s_spcl.c_type == TS_INODE) {
2171 			chkpt.sl_offset = 0;
2172 			chkpt.sl_count = 0;
2173 		} else if (nextspcl->s_spcl.c_type == TS_END) {
2174 			chkpt.sl_offset = 0;
2175 			chkpt.sl_count = 1;	/* EOT indicator */
2176 		}
2177 		ino = nextspcl->s_spcl.c_inumber;
2178 		type = nextspcl->s_spcl.c_type;
2179 	} else {
2180 		/*
2181 		 * If not, use what we have.
2182 		 */
2183 		ino = spcl.c_inumber;
2184 		type = spcl.c_type;
2185 	}
2186 
2187 	switch (type) {		/* set output state */
2188 	case TS_ADDR:
2189 		switch (lastnonaddr) {
2190 		case TS_INODE:
2191 		case TS_TAPE:
2192 			if ((lastnonaddrm & IFMT) == IFDIR ||
2193 			    (lastnonaddrm & IFMT) == IFATTRDIR)
2194 				state = DS_DIRS;
2195 			else
2196 				state = DS_FILES;
2197 			break;
2198 		case TS_CLRI:
2199 			state = DS_CLRI;
2200 			break;
2201 		case TS_BITS:
2202 			state = DS_BITS;
2203 			break;
2204 		}
2205 		break;
2206 	case TS_INODE:
2207 		if ((spcl.c_dinode.di_mode & IFMT) == IFDIR ||
2208 		    (spcl.c_dinode.di_mode & IFMT) == IFATTRDIR)
2209 			state = DS_DIRS;
2210 		else
2211 			state = DS_FILES;
2212 		break;
2213 	case 0:			/* EOT on 1st record */
2214 	case TS_TAPE:
2215 		state = DS_START;
2216 		ino = UFSROOTINO;
2217 		break;
2218 	case TS_CLRI:
2219 		state = DS_CLRI;
2220 		break;
2221 	case TS_BITS:
2222 		state = DS_BITS;
2223 		break;
2224 	case TS_END:
2225 		if (spcl.c_type == TS_END)
2226 			state = DS_DONE;
2227 		else
2228 			state = DS_END;
2229 		break;
2230 	}
2231 
2232 	/*
2233 	 * Checkpoint info to be processed by rollforward():
2234 	 *	The inode with which the next volume should begin
2235 	 *	The last inode number on this volume
2236 	 *	The last logical block number on this volume
2237 	 *	The current output state
2238 	 *	The offset within the current inode (already in sl_offset)
2239 	 *	The number of records left from last spclrec (in sl_count)
2240 	 *	The physical block the next vol begins with (in sl_firstrec)
2241 	 */
2242 	chkpt.sl_inos = ino;
2243 	chkpt.sl_tapea = spcl.c_tapea + count;
2244 	chkpt.sl_state = state;
2245 
2246 	if ((unsigned)atomic((int(*)())write, cmd, (char *)&chkpt,
2247 	    sizeof (chkpt)) != sizeof (chkpt)) {
2248 		cmdwrterr();
2249 		dumpabort();
2250 		/*NOTREACHED*/
2251 	}
2252 	if ((unsigned)atomic((int(*)())write, cmd, (char *)&spcl,
2253 	    sizeof (spcl)) != sizeof (spcl)) {
2254 		cmdwrterr();
2255 		dumpabort();
2256 		/*NOTREACHED*/
2257 	}
2258 #ifdef DEBUG
2259 	if (xflag) {
2260 		/* XGETTEXT:  #ifdef DEBUG only */
2261 		msg(gettext("sent chkpt to master:\n"));
2262 		msg("    ino %u\n", chkpt.sl_inos);
2263 		msg("    1strec %u\n", chkpt.sl_firstrec);
2264 		msg("    lastrec %u\n", chkpt.sl_tapea);
2265 		msg("    written %u\n", chkpt.sl_offset);
2266 		msg("    left %u\n", chkpt.sl_count);
2267 		msg("    state %d\n", chkpt.sl_state);
2268 	}
2269 #endif
2270 }
2271 
2272 /*
2273  * Since a read from a pipe may not return all we asked for,
2274  * or a write may not write all we ask if we get a signal,
2275  * loop until the count is satisfied (or error).
2276  */
2277 static ssize_t
2278 atomic(int (*func)(), int fd, char *buf, int count)
2279 {
2280 	ssize_t got = 0, need = count;
2281 
2282 	/* don't inherit random value if immediately get zero back from func */
2283 	errno = 0;
2284 	while (need > 0) {
2285 		got = (*func)(fd, buf, MIN(need, 4096));
2286 		if (got < 0 && errno == EINTR)
2287 			continue;
2288 		if (got <= 0)
2289 			break;
2290 		buf += got;
2291 		need -= got;
2292 	}
2293 	/* if we got what was asked for, return count, else failure (got) */
2294 	return ((need != 0) ? got : count);
2295 }
2296 
2297 void
2298 positiontape(char *msgbuf)
2299 {
2300 	/* Static as never change, no need to waste stack space */
2301 	static struct mtget mt;
2302 	static struct mtop rew = { MTREW, 1 };
2303 	static struct mtop fsf = { MTFSF, 1 };
2304 	char *info = strdup(gettext("Positioning `%s' to file %ld\n"));
2305 	char *fail = strdup(gettext("Cannot position tape to file %d\n"));
2306 	int m;
2307 
2308 	/* gettext()'s return value is volatile, hence the strdup()s */
2309 
2310 	m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
2311 
2312 	/*
2313 	 * To avoid writing tape marks at inappropriate places, we open the
2314 	 * device read-only, position it, close it, and reopen it for writing.
2315 	 */
2316 	while ((to = host ? rmtopen(tape, O_RDONLY) :
2317 	    safe_device_open(tape, O_RDONLY|m, 0600)) < 0) {
2318 		if (autoload) {
2319 			if (!query_once(msgbuf, 1)) {
2320 				dumpabort();
2321 				/*NOTREACHED*/
2322 			}
2323 		} else {
2324 			if (!query(msgbuf)) {
2325 				dumpabort();
2326 				/*NOTREACHED*/
2327 			}
2328 		}
2329 	}
2330 
2331 	if (host) {
2332 		if (rmtstatus(&mt) >= 0 &&
2333 		    rmtioctl(MTREW, 1) >= 0 &&
2334 		    filenum > 1) {
2335 			msg(info, dumpdev, filenum);
2336 			if (rmtioctl(MTFSF, filenum-1) < 0) {
2337 				msg(fail, filenum);
2338 				dumpabort();
2339 				/*NOTREACHED*/
2340 			}
2341 		}
2342 		rmtclose();
2343 	} else {
2344 		if (ioctl(to, MTIOCGET, &mt) >= 0 &&
2345 		    ioctl(to, MTIOCTOP, &rew) >= 0 &&
2346 		    filenum > 1) {
2347 			msg(info, dumpdev, filenum);
2348 			fsf.mt_count = filenum - 1;
2349 			if (ioctl(to, MTIOCTOP, &fsf) < 0) {
2350 				msg(fail, filenum);
2351 				dumpabort();
2352 				/*NOTREACHED*/
2353 			}
2354 		}
2355 		(void) close(to);
2356 		to = -1;
2357 	}
2358 
2359 	free(info);
2360 	free(fail);
2361 }
2362 
2363 static void
2364 cmdwrterr(void)
2365 {
2366 	int saverr = errno;
2367 	msg(gettext("Error writing command pipe: %s\n"), strerror(saverr));
2368 }
2369 
2370 static void
2371 cmdrderr(void)
2372 {
2373 	int saverr = errno;
2374 	msg(gettext("Error reading command pipe: %s\n"), strerror(saverr));
2375 }
2376