xref: /titanic_51/usr/src/cmd/backup/dump/dumptape.c (revision 89518a1cfe5021ecf5ad8d04c40f53cf947e95d9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved	*/
29 
30 /*
31  * Portions of this source code were derived from Berkeley 4.3 BSD
32  * under license from the Regents of the University of California.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 #include "dump.h"
38 #include <rmt.h>
39 #include <setjmp.h>
40 #include <sys/fdio.h>
41 #include <sys/mkdev.h>
42 #include <assert.h>
43 #include <limits.h>
44 
45 #define	SLEEPMS		50
46 
47 static uint_t writesize;	/* size of malloc()ed buffer for tape */
48 static ino_t inos[TP_NINOS];	/* starting inodes on each tape */
49 
50 /*
51  * The req structure is used to pass commands from the parent
52  * process through the pipes to the slave processes.  It comes
53  * in two flavors, depending on which mode dump is operating under:
54  * an inode request (on-line mode) and a disk block request ("old" mode).
55  */
56 /*
57  * The inode request structure is used during on-line mode.
58  * The master passes inode numbers and starting offsets to
59  * the slaves.  The tape writer passes out the current inode,
60  * offset, and number of tape records written after completing a volume.
61  */
62 struct ireq {
63 	ino_t	inumber;	/* inode number to open/dump */
64 	long	igen;		/* inode generation number */
65 	off_t	offset;		/* starting offset in inode */
66 	int	count;		/* count for 1st spclrec */
67 };
68 /*
69  * The block request structure is used in off-line mode to pass
70  * commands to dump disk blocks from the parent process through
71  * the pipes to the slave processes.
72  */
73 struct breq {
74 	diskaddr_t dblk;		/* disk address to read */
75 	size_t	size;		/* number of bytes to read from disk */
76 	ulong_t	spclrec[1];	/* actually longer */
77 };
78 
79 struct req {
80 	short	aflag;		/* write data to archive process as well */
81 	short	tflag;		/* begin new tape */
82 	union	reqdata {
83 		struct ireq ino;	/* used for on-line mode */
84 		struct breq blks;	/* used for off-line mode */
85 	} data;
86 };
87 
88 #define	ir_inumber	data.ino.inumber
89 #define	ir_igen		data.ino.igen
90 #define	ir_offset	data.ino.offset
91 #define	ir_count	data.ino.count
92 
93 #define	br_dblk		data.blks.dblk
94 #define	br_size		data.blks.size
95 #define	br_spcl		data.blks.spclrec
96 
97 static int reqsiz = 0;	/* alloctape will initialize */
98 
99 #define	SLAVES 3
100 struct slaves {
101 	int	sl_slavefd;	/* pipe from master to slave */
102 	pid_t	sl_slavepid;	/* slave pid; used by killall() */
103 	ino_t	sl_inos;	/* inos, if this record starts tape */
104 	int	sl_offset;	/* logical blocks written for object */
105 	int	sl_count;	/* logical blocks left in spclrec */
106 	int	sl_tapea;	/* header number, if starting tape */
107 	int	sl_firstrec;	/* number of first block on tape */
108 	int	sl_state;	/* dump output state */
109 	struct	req *sl_req;	/* instruction packet to slave */
110 };
111 static struct slaves slaves[SLAVES];	/* one per slave */
112 static struct slaves *slp;	/* pointer to current slave */
113 static struct slaves chkpt;	/* checkpointed data */
114 
115 struct bdesc {
116 	char	*b_data;	/* pointer to buffer data */
117 	int	b_flags;	/* flags (see below) */
118 };
119 
120 /*
121  * The following variables are in shared memory, and must be
122  * explicitly checkpointed and/or reset.
123  */
124 static caddr_t shared;		/* pointer to block of shared memory */
125 static struct bdesc *bufp;	/* buffer descriptors */
126 static struct bdesc **current;	/* output buffer to fill */
127 static int *tapea;		/* logical record count */
128 
129 #ifdef INSTRUMENT
130 static int	*readmissp;	/* number of times writer was idle */
131 static int	*idle;		/* number of times slaves were idle */
132 #endif	/* INSTRUMENT */
133 
134 /*
135  * Buffer flags
136  */
137 #define	BUF_EMPTY	0x0	/* nothing in buffer */
138 #define	BUF_FULL	0x1	/* data in buffer */
139 #define	BUF_SPCLREC	0x2	/* contains special record */
140 #define	BUF_ARCHIVE	0x4	/* dump to archive */
141 
142 static int recsout;		/* number of req's sent to slaves */
143 static int totalrecsout;	/* total number of req's sent to slaves */
144 static int rotor;		/* next slave to be instructed */
145 static pid_t master;		/* pid of master, for sending error signals */
146 static int writer = -1;		/* fd of tape writer */
147 static pid_t writepid;		/* pid of tape writer */
148 static int arch;		/* fd of output archiver */
149 static pid_t archivepid;	/* pid of output archiver */
150 static int archivefd;		/* fd of archive file (proper) */
151 static offset_t lf_archoffset;	/* checkpointed offset into archive file */
152 
153 int caught;			/* caught signal -- imported by mapfile() */
154 
155 #ifdef DEBUG
156 extern	int xflag;
157 #endif
158 
159 #ifdef __STDC__
160 static void cmdwrterr(void);
161 static void cmdrderr(void);
162 static void freetape(void);
163 static void bufclear(void);
164 static pid_t setuparchive(void);
165 static pid_t setupwriter(void);
166 static void nextslave(void);
167 static void tperror(int);
168 static void rollforward(int);
169 static void nap(int);
170 static void alrm(int);
171 static void just_rewind(void);
172 static void killall(void);
173 static void proceed(int);
174 static void die(int);
175 static void enslave(void);
176 static void wait_our_turn(void);
177 static void dumpoffline(int, pid_t, int);
178 static void onxfsz(int);
179 static void dowrite(int);
180 static void checkpoint(struct bdesc *, int);
181 static ssize_t atomic(int (*)(), int, char *, int);
182 #else
183 static void cmdwrterr();
184 static void cmdrderr();
185 static void freetape();
186 static void bufclear();
187 static pid_t setuparchive();
188 static pid_t setupwriter();
189 static void nextslave();
190 static void tperror();
191 static void rollforward();
192 static void nap();
193 static void alrm();
194 static void just_rewind();
195 static void killall();
196 static void proceed();
197 static void die();
198 static void enslave();
199 static void wait_our_turn();
200 static void dumpoffline();
201 static void onxfsz();
202 static void dowrite();
203 static void checkpoint();
204 static ssize_t atomic();
205 #endif
206 
207 static size_t tapesize;
208 
209 /*
210  * Allocate buffers and shared memory variables.  Tape buffers are
211  * allocated on page boundaries for tape write() efficiency.
212  */
213 void
214 #ifdef __STDC__
215 #else
216 #endif
217 alloctape(void)
218 {
219 	struct slaves *slavep;
220 	ulong_t pgoff = (unsigned)(getpagesize() - 1); /* 2**n - 1 */
221 	int	mapfd;
222 	char	*obuf;
223 	int	saverr;
224 	int	i, j;
225 
226 	writesize = ntrec * tp_bsize;
227 	if (!printsize)
228 		msg(gettext("Writing %d Kilobyte records\n"),
229 			writesize / TP_BSIZE_MIN);
230 
231 	/*
232 	 * set up shared memory seg for here and child
233 	 */
234 	mapfd = open("/dev/zero", O_RDWR);
235 	if (mapfd == -1) {
236 		saverr = errno;
237 		msg(gettext("Cannot open `%s': %s\n"),
238 			"/dev/zero", strerror(saverr));
239 		dumpabort();
240 		/*NOTREACHED*/
241 	}
242 	/*
243 	 * Allocate space such that buffers are page-aligned and
244 	 * pointers are aligned on 4-byte boundaries (for SPARC).
245 	 * This code assumes that (NBUF * writesize) is a multiple
246 	 * of the page size and that pages are aligned on 4-byte
247 	 * boundaries.  Space is allocated as follows:
248 	 *
249 	 *    (NBUF * writesize) for the actual buffers
250 	 *    (pagesize - 1) for padding so the buffers are page-aligned
251 	 *    (NBUF * ntrec * sizeof (struct bdesc)) for each buffer
252 	 *    (n * sizeof (int)) for [n] debugging variables/pointers
253 	 *    (n * sizeof (int)) for [n] miscellaneous variables/pointers
254 	 */
255 	tapesize =
256 	    (NBUF * writesize)				/* output buffers */
257 		/* LINTED: pgoff fits into a size_t */
258 	    + (size_t)pgoff				/* page alignment */
259 							/* buffer descriptors */
260 	    + (((size_t)sizeof (struct bdesc)) * NBUF * ntrec)
261 #ifdef INSTRUMENT
262 	    + (2 * (size_t)sizeof (int *))		/* instrumentation */
263 #endif
264 							/* shared variables */
265 	    + (size_t)sizeof (struct bdesc **)
266 	    + (size_t)sizeof (int *)
267 	    + (3 * (size_t)sizeof (time_t));
268 
269 	shared = mmap((char *)0, tapesize, PROT_READ|PROT_WRITE,
270 	    MAP_SHARED, mapfd, (off_t)0);
271 	if (shared == (caddr_t)-1) {
272 		saverr = errno;
273 		msg(gettext("Cannot memory map output buffers: %s\n"),
274 		    strerror(saverr));
275 		dumpabort();
276 		/*NOTREACHED*/
277 	}
278 	(void) close(mapfd);
279 
280 	/*
281 	 * Buffers and buffer headers
282 	 */
283 	obuf = (char *)(((ulong_t)shared + pgoff) & ~pgoff);
284 	/* LINTED obuf and writesize are aligned */
285 	bufp = (struct bdesc *)(obuf + NBUF*writesize);
286 	/*
287 	 * Shared memory variables
288 	 */
289 	current = (struct bdesc **)&bufp[NBUF*ntrec];
290 	tapea = (int *)(current + 1);
291 	/* LINTED pointer alignment ok */
292 	telapsed = (time_t *)(tapea + 1);
293 	tstart_writing = telapsed + 1;
294 	tschedule = tstart_writing + 1;
295 #ifdef INSTRUMENT
296 	/*
297 	 * Debugging and instrumentation variables
298 	 */
299 	readmissp = (int *)(tschedule + 1);
300 	idle = readmissp + 1;
301 #endif
302 	for (i = 0, j = 0; i < NBUF * ntrec; i++, j += tp_bsize) {
303 		bufp[i].b_data = &obuf[j];
304 	}
305 
306 	reqsiz = sizeof (struct req) + tp_bsize - sizeof (long);
307 	for (slavep = slaves; slavep < &slaves[SLAVES]; slavep++)
308 		slavep->sl_req = (struct req *)xmalloc(reqsiz);
309 
310 	chkpt.sl_offset = 0;		/* start at offset 0 */
311 	chkpt.sl_count = 0;
312 	chkpt.sl_inos = UFSROOTINO;	/* in root inode */
313 	chkpt.sl_firstrec = 1;
314 	chkpt.sl_tapea = 0;
315 }
316 
317 static void
318 #ifdef __STDC__
319 freetape(void)
320 #else
321 freetape()
322 #endif
323 {
324 	if (shared == NULL)
325 		return;
326 	(void) timeclock((time_t)0);
327 	(void) munmap(shared, tapesize);
328 	shared = NULL;
329 }
330 
331 /*
332  * Reset tape state variables -- called
333  * before a pass to dump active files.
334  */
335 void
336 #ifdef __STDC__
337 reset(void)
338 #else
339 reset()
340 #endif
341 {
342 	bufclear();
343 
344 #ifdef INSTRUMENT
345 	(*readmissp) = 0;
346 	(*idle) = 0;
347 #endif
348 
349 	spcl.c_flags = 0;
350 	spcl.c_volume = 0;
351 	tapeno = 0;
352 
353 	chkpt.sl_offset = 0;		/* start at offset 0 */
354 	chkpt.sl_count = 0;
355 	chkpt.sl_inos = UFSROOTINO;	/* in root inode */
356 	chkpt.sl_firstrec = 1;
357 	chkpt.sl_tapea = 0;
358 }
359 
360 static void
361 #ifdef __STDC__
362 bufclear(void)
363 #else
364 bufclear()
365 #endif
366 {
367 	struct bdesc *bp;
368 	int i;
369 
370 	for (i = 0, bp = bufp; i < NBUF * ntrec; i++, bp++)
371 		bp->b_flags = BUF_EMPTY;
372 	if ((caddr_t)current < shared ||
373 	    (caddr_t)current > (shared + tapesize)) {
374 		msg(gettext(
375 	    "bufclear: current pointer out of range of shared memory\n"));
376 		dumpabort();
377 		/*NOTREACHED*/
378 	}
379 	if ((*current != NULL) &&
380 	    (*current < &bufp[0] || *current > &bufp[NBUF*ntrec])) {
381 		/* ANSI string catenation, to shut cstyle up */
382 		msg(gettext("bufclear: current buffer pointer (0x%x) "
383 			"out of range of buffer\naddresses (0x%x - 0x%x)\n"),
384 		    *current, &bufp[0], &bufp[NBUF*ntrec]);
385 		dumpabort();
386 		/*NOTREACHED*/
387 	}
388 	*current = bufp;
389 }
390 
391 /*
392  * Start a process to collect information describing the dump.
393  * This data takes two forms:
394  *    the bitmap and directory information being written to
395  *	the front of the tape (the "archive" file)
396  *    information describing each directory and inode (to
397  *	be included in the database tmp file)
398  * Write the data to the files as it is received so huge file
399  * systems don't cause dump to consume large amounts of memory.
400  */
401 static pid_t
402 #ifdef __STDC__
403 setuparchive(void)
404 #else
405 setuparchive()
406 #endif
407 {
408 	struct slaves *slavep;
409 	int cmd[2];
410 	pid_t pid;
411 	ssize_t size;
412 	char *data;
413 	char *errmsg;
414 	int flags, saverr;
415 	int punt = 0;
416 
417 	/*
418 	 * Both the archive and database tmp files are
419 	 * checkpointed by taking their current offsets
420 	 * (sizes) after completing each volume.  Restoring
421 	 * from a checkpoint involves truncating to the
422 	 * checkpointed size.
423 	 */
424 	if (archive && !doingactive) {
425 		/* It's allowed/expected to exist, so can't use O_EXCL */
426 		archivefd = safe_file_open(archivefile, O_WRONLY, 0600);
427 		if (archivefd < 0) {
428 			saverr = errno;
429 			msg(gettext("Cannot open archive file `%s': %s\n"),
430 			    archivefile, strerror(saverr));
431 			dumpabort();
432 			/*NOTREACHED*/
433 		}
434 
435 		if (lseek64(archivefd, lf_archoffset, 0) < 0) {
436 			saverr = errno;
437 			msg(gettext(
438 				    "Cannot position archive file `%s' : %s\n"),
439 			    archivefile, strerror(saverr));
440 			dumpabort();
441 			/*NOTREACHED*/
442 		}
443 		if (ftruncate64(archivefd, lf_archoffset) < 0) {
444 			saverr = errno;
445 			msg(gettext(
446 				    "Cannot truncate archive file `%s' : %s\n"),
447 			    archivefile, strerror(saverr));
448 			dumpabort();
449 			/*NOTREACHED*/
450 		}
451 	}
452 
453 	if (pipe(cmd) < 0) {
454 		saverr = errno;
455 		msg(gettext("%s: %s error: %s\n"),
456 		    "setuparchive", "pipe", strerror(saverr));
457 		return (0);
458 	}
459 	sighold(SIGINT);
460 	if ((pid = fork()) < 0) {
461 		saverr = errno;
462 		msg(gettext("%s: %s error: %s\n"),
463 		    "setuparchive", "fork", strerror(saverr));
464 		return (0);
465 	}
466 	if (pid > 0) {
467 		sigrelse(SIGINT);
468 		/* parent process */
469 		(void) close(cmd[0]);
470 		arch = cmd[1];
471 		return (pid);
472 	}
473 	/*
474 	 * child process
475 	 */
476 	(void) signal(SIGINT, SIG_IGN);		/* master handles this */
477 #ifdef TDEBUG
478 	(void) sleep(4);	/* allow time for parent's message to get out */
479 	/* XGETTEXT:  #ifdef TDEBUG only */
480 	msg(gettext("Archiver has pid = %ld\n"), (long)getpid());
481 #endif
482 	freeino();	/* release unneeded resources */
483 	freetape();
484 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
485 		if (slavep->sl_slavefd != -1) {
486 			(void) close(slavep->sl_slavefd);
487 			slavep->sl_slavefd = -1;
488 		}
489 	}
490 	(void) close(to);
491 	(void) close(fi);
492 	to = fi = -1;
493 	(void) close(cmd[1]);
494 	data = xmalloc(tp_bsize);
495 	for (;;) {
496 		size = atomic((int(*)())read, cmd[0], (char *)&flags,
497 		    sizeof (flags));
498 		if ((unsigned)size != sizeof (flags))
499 			break;
500 		size = atomic((int(*)())read, cmd[0], data, tp_bsize);
501 		if (size == tp_bsize) {
502 			if (archive && flags & BUF_ARCHIVE && !punt &&
503 			    (size = write(archivefd, data, tp_bsize))
504 			    != tp_bsize) {
505 				struct stat64 stats;
506 
507 				if (size != -1) {
508 					errmsg = strdup(gettext(
509 					    "Output truncated"));
510 					if (errmsg == NULL)
511 						errmsg = "";
512 				} else {
513 					errmsg = strerror(errno);
514 				}
515 
516 				if (fstat64(archivefd, &stats) < 0)
517 				    stats.st_size = -1;
518 
519 				/* cast to keep lint&printf happy */
520 				msg(gettext(
521 		    "Cannot write archive file `%s' at offset %lld: %s\n"),
522 				    archivefile, (longlong_t)stats.st_size,
523 				    errmsg);
524 				msg(gettext(
525 		    "Archive file will be deleted, dump will continue\n"));
526 				punt++;
527 				if ((size != -1) && (*errmsg != '\0')) {
528 					free(errmsg);
529 				}
530 			}
531 		} else {
532 			break;
533 		}
534 	}
535 	(void) close(cmd[0]);
536 	if (archive) {
537 		(void) close(archivefd);
538 		archivefd = -1;
539 	}
540 	if (punt) {
541 		(void) unlink(archivefile);
542 		Exit(X_ABORT);
543 	}
544 	Exit(X_FINOK);
545 	/* NOTREACHED */
546 }
547 
548 /*
549  * Start a process to read the output buffers and write the data
550  * to the output device.
551  */
552 static pid_t
553 #ifdef __STDC__
554 setupwriter(void)
555 #else
556 setupwriter()
557 #endif
558 {
559 	struct slaves *slavep;
560 	int cmd[2];
561 	pid_t pid;
562 	int saverr;
563 
564 	caught = 0;
565 	if (pipe(cmd) < 0) {
566 		saverr = errno;
567 		msg(gettext("%s: %s error: %s\n"),
568 			"setupwriter", "pipe", strerror(saverr));
569 		return (0);
570 	}
571 	sighold(SIGINT);
572 	if ((pid = fork()) < 0) {
573 		saverr = errno;
574 		msg(gettext("%s: %s error: %s\n"),
575 			"setupwriter", "fork", strerror(saverr));
576 		return (0);
577 	}
578 	if (pid > 0) {
579 		/*
580 		 * Parent process
581 		 */
582 		sigrelse(SIGINT);
583 		(void) close(cmd[0]);
584 		writer = cmd[1];
585 		return (pid);
586 	}
587 	/*
588 	 * Child (writer) process
589 	 */
590 	(void) signal(SIGINT, SIG_IGN);		/* master handles this */
591 #ifdef TDEBUG
592 	(void) sleep(4);	/* allow time for parent's message to get out */
593 	/* XGETTEXT:  #ifdef TDEBUG only */
594 	msg(gettext("Writer has pid = %ld\n"), (long)getpid());
595 #endif
596 	child_chdir();
597 	freeino();	/* release unneeded resources */
598 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
599 		if (slavep->sl_slavefd != -1) {
600 			(void) close(slavep->sl_slavefd);
601 			slavep->sl_slavefd = -1;
602 		}
603 	}
604 	(void) close(fi);
605 	fi = -1;
606 	(void) close(cmd[1]);
607 	dowrite(cmd[0]);
608 	if (arch >= 0) {
609 		(void) close(arch);
610 		arch = -1;
611 	}
612 	(void) close(cmd[0]);
613 	Exit(X_FINOK);
614 	/* NOTREACHED */
615 }
616 
617 void
618 #ifdef __STDC__
619 spclrec(void)
620 #else
621 spclrec()
622 #endif
623 {
624 	int s, i;
625 	int32_t *ip;
626 	int flags = BUF_SPCLREC;
627 
628 	if ((BIT(ino, shamap)) && (spcl.c_type == TS_INODE)) {
629 		spcl.c_type = TS_ADDR;
630 		/* LINTED: result fits in a short */
631 		spcl.c_dinode.di_mode &= ~S_IFMT;
632 		/* LINTED: result fits in a short */
633 		spcl.c_dinode.di_mode |= IFSHAD;
634 	}
635 
636 	/*
637 	 * Only TS_INODEs should have short metadata, if this
638 	 * isn't such a spclrec, clear the metadata flag and
639 	 * the c_shadow contents.
640 	 */
641 	if (!(spcl.c_type == TS_INODE && (spcl.c_flags & DR_HASMETA))) {
642 		spcl.c_flags &= ~DR_HASMETA;
643 		bcopy(c_shadow_save, &(spcl.c_shadow),
644 		    sizeof (spcl.c_shadow));
645 	}
646 
647 	if (spcl.c_type == TS_END) {
648 		spcl.c_count = 1;
649 		spcl.c_flags |= DR_INODEINFO;
650 		bcopy((char *)inos, (char *)spcl.c_inos, sizeof (inos));
651 	} else if (spcl.c_type == TS_TAPE) {
652 		spcl.c_flags |= DR_NEWHEADER;
653 		if (doingactive)
654 			spcl.c_flags |= DR_REDUMP;
655 	} else if (spcl.c_type != TS_INODE)
656 		flags = BUF_SPCLREC;
657 	spcl.c_tapea = *tapea;
658 	/* LINTED for now, max inode # is 2**31 (ufs max size is 4TB) */
659 	spcl.c_inumber = (ino32_t)ino;
660 	spcl.c_magic = (tp_bsize == TP_BSIZE_MIN) ? NFS_MAGIC : MTB_MAGIC;
661 	spcl.c_checksum = 0;
662 	ip = (int32_t *)&spcl;
663 	s = CHECKSUM;
664 	assert((tp_bsize % sizeof (*ip)) == 0);
665 	i = tp_bsize / sizeof (*ip);
666 	assert((i%8) == 0);
667 	i /= 8;
668 	do {
669 		s -= *ip++; s -= *ip++; s -= *ip++; s -= *ip++;
670 		s -= *ip++; s -= *ip++; s -= *ip++; s -= *ip++;
671 	} while (--i > 0);
672 	spcl.c_checksum = s;
673 	taprec((uchar_t *)&spcl, flags, sizeof (spcl));
674 	if (spcl.c_type == TS_END)
675 		spcl.c_flags &= ~DR_INODEINFO;
676 	else if (spcl.c_type == TS_TAPE)
677 		spcl.c_flags &= ~(DR_NEWHEADER|DR_REDUMP|DR_TRUEINC);
678 }
679 
680 /*
681  * Fill appropriate buffer
682  */
683 void
684 taprec(dp, flags, size)
685 	uchar_t *dp;
686 	int flags;
687 	int size;
688 {
689 	if (size > tp_bsize) {
690 		msg(gettext(
691 		    "taprec: Unexpected buffer size, expected %d, got %d.\n"),
692 		    tp_bsize, size);
693 		dumpabort();
694 		/*NOTREACHED*/
695 	}
696 
697 	while ((*current)->b_flags & BUF_FULL)
698 		nap(10);
699 
700 	bcopy(dp, (*current)->b_data, (size_t)size);
701 	if (size < tp_bsize) {
702 		bzero((*current)->b_data + size, tp_bsize - size);
703 	}
704 
705 	if (dumptoarchive)
706 		flags |= BUF_ARCHIVE;
707 
708 	/* no locking as we assume only one reader and one writer active */
709 	(*current)->b_flags = (flags | BUF_FULL);
710 	if (++*current >= &bufp[NBUF*ntrec])
711 		(*current) = &bufp[0];
712 	(*tapea)++;
713 }
714 
715 void
716 dmpblk(blkno, size, offset)
717 	daddr32_t blkno;
718 	size_t size;
719 	off_t offset;
720 {
721 	diskaddr_t dblkno;
722 
723 	assert((offset >> DEV_BSHIFT) <= INT32_MAX);
724 	dblkno = fsbtodb(sblock, blkno) + (offset >> DEV_BSHIFT);
725 	size = (size + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
726 	slp->sl_req->br_dblk = dblkno;
727 	slp->sl_req->br_size = size;
728 	if (dumptoarchive) {
729 		/* LINTED: result fits in a short */
730 		slp->sl_req->aflag |= BUF_ARCHIVE;
731 	}
732 	toslave((void(*)())0, ino);
733 }
734 
735 /*ARGSUSED*/
736 static void
737 tperror(sig)
738 	int	sig;
739 {
740 	char buf[3000];
741 
742 	if (pipeout) {
743 		msg(gettext("Write error on %s\n"), tape);
744 		msg(gettext("Cannot recover\n"));
745 		dumpabort();
746 		/* NOTREACHED */
747 	}
748 	if (!doingverify) {
749 		broadcast(gettext("WRITE ERROR!\n"));
750 		(void) snprintf(buf, sizeof (buf),
751 		    gettext("Do you want to restart?: (\"yes\" or \"no\") "));
752 		if (!query(buf)) {
753 			dumpabort();
754 			/*NOTREACHED*/
755 		}
756 		if (tapeout && (isrewind(to) || offline)) {
757 			/* ANSI string catenation, to shut cstyle up */
758 			msg(gettext("This tape will rewind.  After "
759 				    "it is rewound,\nreplace the faulty tape "
760 				    "with a new one;\nthis dump volume will "
761 				    "be rewritten.\n"));
762 		}
763 	} else {
764 		broadcast(gettext("TAPE VERIFICATION ERROR!\n"));
765 		(void) snprintf(buf, sizeof (buf), gettext(
766 		    "Do you want to rewrite?: (\"yes\" or \"no\") "));
767 		if (!query(buf)) {
768 			dumpabort();
769 			/*NOTREACHED*/
770 		}
771 		msg(gettext(
772 			"This tape will be rewritten and then verified\n"));
773 	}
774 	killall();
775 	trewind();
776 	Exit(X_REWRITE);
777 }
778 
779 /*
780  * Called by master from pass() to send a request to dump files/blocks
781  * to one of the slaves.  Slaves return whether the file was active
782  * when it was being dumped.  The tape writer process sends checkpoint
783  * info when it completes a volume.
784  */
785 void
786 toslave(fn, inumber)
787 	void	(*fn)();
788 	ino_t	inumber;
789 {
790 	int	wasactive;
791 
792 	if (recsout >= SLAVES) {
793 		if ((unsigned)atomic((int(*)())read, slp->sl_slavefd,
794 		    (char *)&wasactive, sizeof (wasactive)) !=
795 		    sizeof (wasactive)) {
796 			cmdrderr();
797 			dumpabort();
798 			/*NOTREACHED*/
799 		}
800 		if (wasactive) {
801 			active++;
802 			msg(gettext(
803 		"The file at inode `%lu' was active and will be recopied\n"),
804 				slp->sl_req->ir_inumber);
805 			/* LINTED: 32-bit to 8-bit assignment ok */
806 			BIS(slp->sl_req->ir_inumber, activemap);
807 		}
808 	}
809 	slp->sl_req->aflag = 0;
810 	if (dumptoarchive) {
811 		/* LINTED: result fits in a short */
812 		slp->sl_req->aflag |= BUF_ARCHIVE;
813 	}
814 	if (fn)
815 		(*fn)(inumber);
816 
817 	if (atomic((int(*)())write, slp->sl_slavefd, (char *)slp->sl_req,
818 	    reqsiz) != reqsiz) {
819 		cmdwrterr();
820 		dumpabort();
821 		/*NOTREACHED*/
822 	}
823 	++recsout;
824 	nextslave();
825 }
826 
827 void
828 dospcl(inumber)
829 	ino_t	inumber;
830 {
831 	/* LINTED for now, max inode # is 2**31 (ufs max size is 1TB) */
832 	spcl.c_inumber = (ino32_t)inumber;
833 	slp->sl_req->br_dblk = 0;
834 	bcopy((char *)&spcl, (char *)slp->sl_req->br_spcl, tp_bsize);
835 }
836 
837 static void
838 #ifdef __STDC__
839 nextslave(void)
840 #else
841 nextslave()
842 #endif
843 {
844 	if (++rotor >= SLAVES) {
845 		rotor = 0;
846 	}
847 	slp = &slaves[rotor];
848 }
849 
850 void
851 #ifdef __STDC__
852 flushcmds(void)
853 #else
854 flushcmds()
855 #endif
856 {
857 	int i;
858 	int wasactive;
859 
860 	/*
861 	 * Retrieve all slave status
862 	 */
863 	if (recsout < SLAVES) {
864 		slp = slaves;
865 		rotor = 0;
866 	}
867 	for (i = 0; i < (recsout < SLAVES ? recsout : SLAVES); i++) {
868 		if ((unsigned)atomic((int(*)())read, slp->sl_slavefd,
869 		    (char *)&wasactive, sizeof (wasactive)) !=
870 		    sizeof (wasactive)) {
871 			cmdrderr();
872 			dumpabort();
873 			/*NOTREACHED*/
874 		}
875 		if (wasactive) {
876 			active++;
877 			msg(gettext(
878 			    "inode %d was active and will be recopied\n"),
879 				slp->sl_req->ir_inumber);
880 			/* LINTED: 32-bit to 8-bit assignment ok */
881 			BIS(slp->sl_req->ir_inumber, activemap);
882 		}
883 		nextslave();
884 	}
885 }
886 
887 void
888 #ifdef __STDC__
889 flusht(void)
890 #else
891 flusht()
892 #endif
893 {
894 	sigset_t block_set, oset;	/* hold SIGUSR1 and atomically sleep */
895 
896 	(void) sigemptyset(&block_set);
897 	(void) sigaddset(&block_set, SIGUSR1);
898 	(void) sigprocmask(SIG_BLOCK, &block_set, &oset);
899 	(void) kill(writepid, SIGUSR1);	/* tell writer to flush */
900 	(void) sigpause(SIGUSR1);	/* wait for SIGUSR1 from writer */
901 	/*NOTREACHED*/
902 }
903 
904 jmp_buf	checkpoint_buf;
905 
906 /*
907  * Roll forward to the next volume after receiving
908  * an EOT signal from writer.  Get checkpoint data
909  * from writer and return if done, otherwise fork
910  * a new process and jump back to main state loop
911  * to begin the next volume.  Installed as the master's
912  * signal handler for SIGUSR1.
913  */
914 /*ARGSUSED*/
915 static void
916 rollforward(sig)
917 	int	sig;
918 {
919 	int status;
920 	(void) sighold(SIGUSR1);
921 
922 	/*
923 	 * Writer sends us checkpoint information after
924 	 * each volume.  A returned state of DS_DONE with no
925 	 * unwritten (left-over) records differentiates a
926 	 * clean flush from one in which EOT was encountered.
927 	 */
928 	if ((unsigned)atomic((int(*)())read, writer, (char *)&chkpt,
929 	    sizeof (struct slaves)) != sizeof (struct slaves)) {
930 		cmdrderr();
931 		dumpabort();
932 		/*NOTREACHED*/
933 	}
934 	if (atomic((int(*)())read, writer, (char *)&spcl,
935 	    TP_BSIZE_MIN) != TP_BSIZE_MIN) {
936 		cmdrderr();
937 		dumpabort();
938 		/*NOTREACHED*/
939 	}
940 	ino = chkpt.sl_inos - 1;
941 	pos = chkpt.sl_offset;
942 	leftover = chkpt.sl_count;
943 	dumpstate = chkpt.sl_state;
944 	blockswritten = ++chkpt.sl_tapea;
945 
946 	if (dumpstate == DS_DONE) {
947 		if (archivepid) {
948 			/*
949 			 * If archiving (either archive or
950 			 * database), signal the archiver
951 			 * to finish up.  This must happen
952 			 * before the writer exits in order
953 			 * to avoid a race.
954 			 */
955 			(void) kill(archivepid, SIGUSR1);
956 		}
957 		(void) signal(SIGUSR1, SIG_IGN);
958 		(void) sigrelse(SIGUSR1);
959 		(void) kill(writepid, SIGUSR1);	/* tell writer to exit */
960 
961 		lf_archoffset = 0LL;
962 		longjmp(checkpoint_buf, 1);
963 		/*NOTREACHED*/
964 	}
965 
966 	if (leftover) {
967 		(void) memmove(spcl.c_addr,
968 		    &spcl.c_addr[spcl.c_count-leftover], leftover);
969 		bzero(&spcl.c_addr[leftover], TP_NINDIR-leftover);
970 	}
971 	if (writepid) {
972 		(void) kill(writepid, SIGUSR1);	/* tell writer to exit */
973 		(void) close(writer);
974 		writer = -1;
975 	}
976 	if (archivepid) {
977 		(void) waitpid(archivepid, &status, 0);	/* wait for archiver */
978 #ifdef TDEBUG
979 
980 		/* XGETTEXT:  #ifdef TDEBUG only */
981 		msg(gettext("Archiver %ld returns with status %d\n"),
982 		    (long)archivepid, status);
983 #endif
984 		archivepid = 0;
985 	}
986 	/*
987 	 * Checkpoint archive file
988 	 */
989 	if (!doingverify && archive) {
990 		lf_archoffset = lseek64(archivefd, (off64_t)0, 2);
991 		if (lf_archoffset < 0) {
992 			int saverr = errno;
993 			msg(gettext("Cannot position archive file `%s': %s\n"),
994 				archivefile, strerror(saverr));
995 			dumpabort();
996 			/*NOTREACHED*/
997 		}
998 		(void) close(archivefd);
999 		archivefd = -1;
1000 	}
1001 	resetino(ino);
1002 
1003 	if (dumpstate == DS_START) {
1004 		msg(gettext(
1005 			"Tape too short: changing volumes and restarting\n"));
1006 		reset();
1007 	}
1008 
1009 	if (!pipeout) {
1010 		if (verify && !doingverify)
1011 			trewind();
1012 		else {
1013 			close_rewind();
1014 			changevol();
1015 		}
1016 	}
1017 
1018 	(void) sigrelse(SIGUSR1);
1019 	otape(0);
1020 	longjmp(checkpoint_buf, 1);
1021 	/*NOTREACHED*/
1022 }
1023 
1024 static void
1025 nap(ms)
1026 	int ms;
1027 {
1028 	struct timeval tv;
1029 
1030 	tv.tv_sec = ms / 1000;
1031 	tv.tv_usec = (ms - tv.tv_sec * 1000) * 1000;
1032 	(void) select(0, (fd_set *)0, (fd_set *)0, (fd_set *)0, &tv);
1033 }
1034 
1035 static jmp_buf alrm_buf;
1036 
1037 /*ARGSUSED*/
1038 static void
1039 alrm(sig)
1040 	int	sig;
1041 {
1042 	longjmp(alrm_buf, 1);
1043 	/*NOTREACHED*/
1044 }
1045 
1046 void
1047 #ifdef __STDC__
1048 nextdevice(void)
1049 #else
1050 nextdevice()
1051 #endif
1052 {
1053 	char	*cp;
1054 
1055 	if (host != NULL)	/* we set the host only once in ufsdump */
1056 		return;
1057 
1058 	host = NULL;
1059 	if (strchr(tape, ':')) {
1060 		if (diskette) {
1061 			msg(gettext("Cannot do remote dump to diskette\n"));
1062 			Exit(X_ABORT);
1063 		}
1064 		host = tape;
1065 		tape = strchr(host, ':');
1066 		*tape++ = 0;
1067 		cp = strchr(host, '@');	/* user@host? */
1068 		if (cp != (char *)0)
1069 			cp++;
1070 		else
1071 			cp = host;
1072 	} else
1073 		cp = spcl.c_host;
1074 	/*
1075 	 * dumpdev is provided for use in prompts and is of
1076 	 * the form:
1077 	 *	hostname:device
1078 	 * sdumpdev is of the form:
1079 	 *	hostname:device
1080 	 * for remote devices, and simply:
1081 	 *	device
1082 	 * for local devices.
1083 	 */
1084 	if (dumpdev != (char *)NULL) {
1085 		/* LINTED: dumpdev is not NULL */
1086 		free(dumpdev);
1087 	}
1088 	/*LINTED [cast to smaller integer]*/
1089 	dumpdev = xmalloc((size_t)((sizeof (spcl.c_host) + strlen(tape) + 2)));
1090 	/* LINTED unsigned -> signed cast ok */
1091 	(void) sprintf(dumpdev, "%.*s:%s", (int)sizeof (spcl.c_host), cp, tape);
1092 	if (cp == spcl.c_host)
1093 		sdumpdev = strchr(dumpdev, ':') + 1;
1094 	else
1095 		sdumpdev = dumpdev;
1096 }
1097 
1098 /*
1099  * Gross hack due to misfeature of mt tape driver that causes
1100  * the device to rewind if we generate any signals.  Guess
1101  * whether tape is rewind device or not -- for local devices
1102  * we can just look at the minor number.  For rmt devices,
1103  * make an educated guess.
1104  */
1105 int
1106 isrewind(f)
1107 	int	f;	/* fd, if local device */
1108 {
1109 	struct stat64 sbuf;
1110 	char    *c;
1111 	int	unit;
1112 	int	rewind;
1113 
1114 	if (host) {
1115 		c = strrchr(tape, '/');
1116 		if (c == NULL)
1117 			c = tape;
1118 		else
1119 			c++;
1120 		/*
1121 		 * If the last component begins or ends with an 'n', it is
1122 		 * assumed to be a non-rewind device.
1123 		 */
1124 		if (c[0] == 'n' || c[strlen(c)-1] == 'n')
1125 			rewind = 0;
1126 		else if ((strstr(tape, "mt") || strstr(tape, "st")) &&
1127 		    sscanf(tape, "%*[a-zA-Z/]%d", &unit) == 1 &&
1128 		    (unit & MT_NOREWIND))
1129 			rewind = 0;
1130 		else
1131 			rewind = 1;
1132 	} else {
1133 		if (fstat64(f, &sbuf) < 0) {
1134 			msg(gettext(
1135 			    "Cannot obtain status of output device `%s'\n"),
1136 				tape);
1137 			dumpabort();
1138 			/*NOTREACHED*/
1139 		}
1140 		rewind = minor(sbuf.st_rdev) & MT_NOREWIND ? 0 : 1;
1141 	}
1142 	return (rewind);
1143 }
1144 
1145 static void
1146 #ifdef __STDC__
1147 just_rewind(void)
1148 #else
1149 just_rewind()
1150 #endif
1151 {
1152 	struct slaves *slavep;
1153 	char *rewinding = gettext("Tape rewinding\n");
1154 
1155 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++) {
1156 		if (slavep->sl_slavepid > 0)	/* signal normal exit */
1157 			(void) kill(slavep->sl_slavepid, SIGTERM);
1158 		if (slavep->sl_slavefd >= 0) {
1159 			(void) close(slavep->sl_slavefd);
1160 			slavep->sl_slavefd = -1;
1161 		}
1162 	}
1163 
1164 	/* wait for any signals from slaves */
1165 	while (waitpid(0, (int *)0, 0) >= 0)
1166 		/*LINTED [empty body]*/
1167 		continue;
1168 
1169 	if (pipeout)
1170 		return;
1171 
1172 	if (doingverify) {
1173 		/*
1174 		 * Space to the end of the tape.
1175 		 * Backup first in case we already read the EOF.
1176 		 */
1177 		if (host) {
1178 			(void) rmtioctl(MTBSR, 1);
1179 			if (rmtioctl(MTEOM, 1) < 0)
1180 				(void) rmtioctl(MTFSF, 1);
1181 		} else {
1182 			static struct mtop bsr = { MTBSR, 1 };
1183 			static struct mtop eom = { MTEOM, 1 };
1184 			static struct mtop fsf = { MTFSF, 1 };
1185 
1186 			(void) ioctl(to, MTIOCTOP, &bsr);
1187 			if (ioctl(to, MTIOCTOP, &eom) < 0)
1188 				(void) ioctl(to, MTIOCTOP, &fsf);
1189 		}
1190 	}
1191 
1192 	/*
1193 	 * Guess whether the tape is rewinding so we can tell
1194 	 * the operator if it's going to take a long time.
1195 	 */
1196 	if (tapeout && isrewind(to)) {
1197 		/* tape is probably rewinding */
1198 		msg(rewinding);
1199 	}
1200 }
1201 
1202 void
1203 #ifdef __STDC__
1204 trewind(void)
1205 #else
1206 trewind()
1207 #endif
1208 {
1209 	(void) timeclock((time_t)0);
1210 	if (offline && (!verify || doingverify)) {
1211 		close_rewind();
1212 	} else {
1213 		just_rewind();
1214 		if (host)
1215 			rmtclose();
1216 		else {
1217 			(void) close(to);
1218 			to = -1;
1219 		}
1220 	}
1221 }
1222 
1223 void
1224 #ifdef __STDC__
1225 close_rewind(void)
1226 #else
1227 close_rewind()
1228 #endif
1229 {
1230 	char *rewinding = gettext("Tape rewinding\n");
1231 
1232 	(void) timeclock((time_t)0);
1233 	just_rewind();
1234 	/*
1235 	 * The check in just_rewind won't catch the case in
1236 	 * which the current volume is being taken off-line
1237 	 * and is not mounted on a no-rewind device (and is
1238 	 * not the last volume, which is not taken off-line).
1239 	 */
1240 	if (tapeout && !isrewind(to) && offline) {
1241 		/* tape is probably rewinding */
1242 		msg(rewinding);
1243 	}
1244 	if (host) {
1245 		if (offline || autoload)
1246 			(void) rmtioctl(MTOFFL, 0);
1247 		rmtclose();
1248 	} else {
1249 		if (offline || autoload) {
1250 			static struct mtop offl = { MTOFFL, 0 };
1251 
1252 			(void) ioctl(to, MTIOCTOP, &offl);
1253 			if (diskette)
1254 				(void) ioctl(to, FDEJECT, 0);
1255 		}
1256 		(void) close(to);
1257 		to = -1;
1258 	}
1259 }
1260 
1261 void
1262 #ifdef __STDC__
1263 changevol(void)
1264 #else
1265 changevol()
1266 #endif
1267 {
1268 	char buf1[3000], buf2[3000];
1269 	char volname[LBLSIZE+1];
1270 
1271 	/*CONSTANTCONDITION*/
1272 	assert(sizeof (spcl.c_label) < sizeof (volname));
1273 
1274 	filenum = 1;
1275 	nextdevice();
1276 	(void) strcpy(spcl.c_label, tlabel);
1277 	if (host) {
1278 		char	*rhost = host;
1279 		char	*cp = strchr(host, '@');
1280 		if (cp == (char *)0)
1281 			cp = host;
1282 		else
1283 			cp++;
1284 
1285 		if (rmthost(rhost, ntrec) == 0) {
1286 			msg(gettext("Cannot connect to tape host `%s'\n"), cp);
1287 			dumpabort();
1288 			/*NOTREACHED*/
1289 		}
1290 		if (rhost != host)
1291 			free(rhost);
1292 	}
1293 
1294 	/*
1295 	 * Make volume switching as automatic as possible
1296 	 * while avoiding overwriting volumes.  We will
1297 	 * switch automatically under the following condition:
1298 	 *    1) The user specified autoloading from the
1299 	 *	command line.
1300 	 * At one time, we (in the guise of hsmdump) had the
1301 	 * concept of a sequence of devices to rotate through,
1302 	 * but that's never been a ufsdump feature.
1303 	 */
1304 	if (autoload) {
1305 		int tries;
1306 
1307 		/*
1308 		 * Stop the clock for throughput calculations.
1309 		 */
1310 		if ((telapsed != NULL) && (tstart_writing != NULL)) {
1311 			*telapsed += time((time_t *)NULL) - *tstart_writing;
1312 		}
1313 
1314 		(void) snprintf(volname, sizeof (volname), "#%d", tapeno+1);
1315 		(void) snprintf(buf1, sizeof (buf1), gettext(
1316 		    "Mounting volume %s on %s\n"), volname, dumpdev);
1317 		msg(buf1);
1318 		broadcast(buf1);
1319 
1320 		/*
1321 		 * Wait for the tape to autoload.  Note that the delay
1322 		 * period doesn't take into account however long it takes
1323 		 * for the open to fail (measured at 21 seconds for an
1324 		 * Exabyte 8200 under 2.7 on an Ultra 2).
1325 		 */
1326 		for (tries = 0; tries < autoload_tries; tries++) {
1327 			if (host) {
1328 				if (rmtopen(tape, O_RDONLY) >= 0) {
1329 					rmtclose();
1330 					return;
1331 				}
1332 			} else {
1333 				int f, m;
1334 
1335 				m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
1336 				if ((f = doingverify ?
1337 				    safe_device_open(tape, O_RDONLY, 0600) :
1338 				    safe_device_open(tape, O_RDONLY|m, 0600))
1339 				    >= 0) {
1340 					(void) close(f);
1341 					return;
1342 				}
1343 			}
1344 			(void) sleep(autoload_period);
1345 		}
1346 		/*
1347 		 * Autoload timed out, ask the operator to do it.
1348 		 * Note that query() will update *telapsed, and we
1349 		 * shouldn't charge for the autoload time.  So, since
1350 		 * we updated *telapsed ourselves above, we just set
1351 		 * tstart_writing to the current time, and query()
1352 		 * will end up making a null-effect change.  This,
1353 		 * of course, assumes that our caller will be resetting
1354 		 * *tstart_writing.  This is currently the case.
1355 		 * If tstart_writing is NULL (should never happen),
1356 		 * we're ok, since time(2) will accept a NULL pointer.
1357 		 */
1358 		(void) time(tstart_writing);
1359 	}
1360 
1361 	if (strncmp(spcl.c_label, "none", 5)) {
1362 		(void) strncpy(volname, spcl.c_label, sizeof (spcl.c_label));
1363 		volname[sizeof (spcl.c_label)] = '\0';
1364 	} else
1365 		(void) snprintf(volname, sizeof (volname), "#%d", tapeno+1);
1366 
1367 	timeest(1, spcl.c_tapea);
1368 	(void) snprintf(buf1, sizeof (buf1), gettext(
1369 	    "Change Volumes: Mount volume `%s' on `%s'\n"), volname, dumpdev);
1370 	msg(buf1);
1371 	broadcast(gettext("CHANGE VOLUMES!\7\7\n"));
1372 	(void) snprintf(buf1, sizeof (buf1), gettext(
1373 	    "Is the new volume (%s) mounted on `%s' and ready to go?: %s"),
1374 	    volname, dumpdev, gettext("(\"yes\" or \"no\") "));
1375 	while (!query(buf1)) {
1376 		(void) snprintf(buf2, sizeof (buf2), gettext(
1377 		    "Do you want to abort dump?: (\"yes\" or \"no\") "));
1378 		if (query(buf2)) {
1379 			dumpabort();
1380 			/*NOTREACHED*/
1381 		}
1382 	}
1383 }
1384 
1385 /*
1386  *	We implement taking and restoring checkpoints on the tape level.
1387  *	When each tape is opened, a new process is created by forking; this
1388  *	saves all of the necessary context in the parent.  The child
1389  *	continues the dump; the parent waits around, saving the context.
1390  *	If the child returns X_REWRITE, then it had problems writing that tape;
1391  *	this causes the parent to fork again, duplicating the context, and
1392  *	everything continues as if nothing had happened.
1393  */
1394 
1395 void
1396 otape(top)
1397 	int top;
1398 {
1399 	static struct mtget mt;
1400 	char buf[3000];
1401 	pid_t parentpid;
1402 	pid_t childpid;
1403 	pid_t waitproc;
1404 	int status;
1405 	struct sigvec sv, osv;
1406 
1407 	sv.sv_flags = SA_RESTART;
1408 	(void) sigemptyset(&sv.sa_mask);
1409 	sv.sv_handler = SIG_IGN;
1410 	(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1411 
1412 	parentpid = getpid();
1413 
1414 	if (verify) {
1415 		if (doingverify)
1416 			doingverify = 0;
1417 		else
1418 			Exit(X_VERIFY);
1419 	}
1420 restore_check_point:
1421 
1422 	sv.sv_handler = interrupt;
1423 	(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1424 	(void) fflush(stderr);
1425 	/*
1426 	 *	All signals are inherited...
1427 	 */
1428 	sighold(SIGINT);
1429 	childpid = fork();
1430 	if (childpid < 0) {
1431 		msg(gettext(
1432 		    "Context-saving fork failed in parent %ld\n"),
1433 			(long)parentpid);
1434 		Exit(X_ABORT);
1435 	}
1436 	if (childpid != 0) {
1437 		/*
1438 		 *	PARENT:
1439 		 *	save the context by waiting
1440 		 *	until the child doing all of the work returns.
1441 		 *	let the child catch user interrupts
1442 		 */
1443 		sv.sv_handler = SIG_IGN;
1444 		(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1445 		sigrelse(SIGINT);
1446 #ifdef TDEBUG
1447 
1448 		/* XGETTEXT:  #ifdef TDEBUG only */
1449 		msg(gettext(
1450 		    "Volume: %d; parent process: %ld child process %ld\n"),
1451 			tapeno+1, (long)parentpid, (long)childpid);
1452 #endif /* TDEBUG */
1453 		for (;;) {
1454 			waitproc = waitpid(0, &status, 0);
1455 			if (waitproc == childpid)
1456 				break;
1457 			msg(gettext(
1458 	"Parent %ld waiting for child %ld had another child %ld return\n"),
1459 			    (long)parentpid, (long)childpid, (long)waitproc);
1460 		}
1461 		if (WIFSIGNALED(status)) {
1462 			msg(gettext("Process %ld killed by signal %d: %s\n"),
1463 			    (long)childpid, WTERMSIG(status),
1464 			    strsignal(WTERMSIG(status)));
1465 			status = X_ABORT;
1466 		} else
1467 			status = WEXITSTATUS(status);
1468 #ifdef TDEBUG
1469 		switch (status) {
1470 		case X_FINOK:
1471 			/* XGETTEXT:  #ifdef TDEBUG only */
1472 			msg(gettext(
1473 			    "Child %ld finishes X_FINOK\n"), (long)childpid);
1474 			break;
1475 		case X_ABORT:
1476 			/* XGETTEXT:  #ifdef TDEBUG only */
1477 			msg(gettext(
1478 			    "Child %ld finishes X_ABORT\n"), (long)childpid);
1479 			break;
1480 		case X_REWRITE:
1481 			/* XGETTEXT:  #ifdef TDEBUG only */
1482 			msg(gettext(
1483 			    "Child %ld finishes X_REWRITE\n"), (long)childpid);
1484 			break;
1485 		case X_RESTART:
1486 			/* XGETTEXT:  #ifdef TDEBUG only */
1487 			msg(gettext(
1488 			    "Child %ld finishes X_RESTART\n"), (long)childpid);
1489 			break;
1490 		case X_VERIFY:
1491 			/* XGETTEXT:  #ifdef TDEBUG only */
1492 			msg(gettext(
1493 			    "Child %ld finishes X_VERIFY\n"), (long)childpid);
1494 			break;
1495 		default:
1496 			/* XGETTEXT:  #ifdef TDEBUG only */
1497 			msg(gettext("Child %ld finishes unknown %d\n"),
1498 			    (long)childpid, status);
1499 			break;
1500 		}
1501 #endif /* TDEBUG */
1502 		switch (status) {
1503 		case X_FINOK:
1504 			/* wait for children */
1505 			while (waitpid(0, (int *)0, 0) >= 0)
1506 				/*LINTED [empty body]*/
1507 				continue;
1508 			Exit(X_FINOK);
1509 			/*NOTREACHED*/
1510 		case X_ABORT:
1511 			Exit(X_ABORT);
1512 			/*NOTREACHED*/
1513 		case X_VERIFY:
1514 			doingverify++;
1515 			goto restore_check_point;
1516 			/*NOTREACHED*/
1517 		case X_REWRITE:
1518 			doingverify = 0;
1519 			changevol();
1520 			goto restore_check_point;
1521 			/* NOTREACHED */
1522 		case X_RESTART:
1523 			doingverify = 0;
1524 			if (!top) {
1525 				Exit(X_RESTART);
1526 			}
1527 			if (!offline)
1528 				autoload = 0;
1529 			changevol();
1530 			sv.sv_handler = interrupt;
1531 			(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1532 			return;
1533 			/* NOTREACHED */
1534 		default:
1535 			msg(gettext("Bad return code from dump: %d\n"), status);
1536 			Exit(X_ABORT);
1537 			/*NOTREACHED*/
1538 		}
1539 		/*NOTREACHED*/
1540 	} else {	/* we are the child; just continue */
1541 		child_chdir();
1542 		sigrelse(SIGINT);
1543 #ifdef TDEBUG
1544 		(void) sleep(4); /* time for parent's message to get out */
1545 		/* XGETTEXT:  #ifdef TDEBUG only */
1546 		msg(gettext(
1547 		    "Child on Volume %d has parent %ld, my pid = %ld\n"),
1548 			tapeno+1, (long)parentpid, (long)getpid());
1549 #endif
1550 		(void) snprintf(buf, sizeof (buf), gettext(
1551 "Cannot open `%s'.  Do you want to retry the open?: (\"yes\" or \"no\") "),
1552 		    dumpdev);
1553 		if (doingverify) {
1554 			/* 1 for stdout */
1555 			while ((to = host ? rmtopen(tape, O_RDONLY) :
1556 			    pipeout ? 1 :
1557 			    safe_device_open(tape, O_RDONLY, 0600)) < 0) {
1558 				perror(tape);
1559 				if (autoload) {
1560 					if (!query_once(buf, 1)) {
1561 						dumpabort();
1562 						/*NOTREACHED*/
1563 					}
1564 				} else {
1565 					if (!query(buf)) {
1566 						dumpabort();
1567 						/*NOTREACHED*/
1568 					}
1569 				}
1570 			}
1571 
1572 			/*
1573 			 * If we're using the non-rewinding tape device,
1574 			 * the tape will be left positioned after the
1575 			 * EOF mark.  We need to back up to the beginning
1576 			 * of this tape file (cross two tape marks in the
1577 			 * reverse direction and one in the forward
1578 			 * direction) before the verify pass.
1579 			 */
1580 			if (host) {
1581 				if (rmtioctl(MTBSF, 2) >= 0)
1582 					(void) rmtioctl(MTFSF, 1);
1583 				else
1584 					(void) rmtioctl(MTNBSF, 1);
1585 			} else {
1586 				static struct mtop bsf = { MTBSF, 2 };
1587 				static struct mtop fsf = { MTFSF, 1 };
1588 				static struct mtop nbsf = { MTNBSF, 1 };
1589 
1590 				if (ioctl(to, MTIOCTOP, &bsf) >= 0)
1591 					(void) ioctl(to, MTIOCTOP, &fsf);
1592 				else
1593 					(void) ioctl(to, MTIOCTOP, &nbsf);
1594 			}
1595 		} else {
1596 			/*
1597 			 * XXX Add logic to test for "tape" being a
1598 			 * XXX device or a non-existent file.
1599 			 * Current behaviour is that it must exist,
1600 			 * and we over-write whatever's there.
1601 			 * This can be bad if tape == "/etc/passwd".
1602 			 */
1603 			if (!pipeout && doposition && (tapeno == 0)) {
1604 				positiontape(buf);
1605 				if (setjmp(alrm_buf)) {
1606 					/*
1607 					 * The tape is rewinding;
1608 					 * we're screwed.
1609 					 */
1610 				    msg(gettext(
1611 			    "Cannot position tape using rewind device!\n"));
1612 				    dumpabort();
1613 				    /*NOTREACHED*/
1614 				} else {
1615 					sv.sv_handler = alrm;
1616 					(void) sigvec(SIGALRM, &sv, &osv);
1617 					(void) alarm(15);
1618 				}
1619 				while ((to = host ? rmtopen(tape, O_WRONLY) :
1620 				    safe_device_open(tape, O_WRONLY, 0600)) < 0)
1621 					(void) sleep(10);
1622 				(void) alarm(0);
1623 				(void) sigvec(SIGALRM, &osv,
1624 				    (struct sigvec *)0);
1625 			} else {
1626 				int m;
1627 				m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
1628 				/*
1629 				 * Only verify the tape label if label
1630 				 * verification is on and we are at BOT
1631 				 */
1632 				if (pipeout)
1633 					to = 1;
1634 				else while ((to = host ?
1635 				    rmtopen(tape, O_WRONLY) :
1636 				    safe_device_open(tape, O_WRONLY|m, 0600))
1637 				    < 0)
1638 					if (!query_once(buf, 1)) {
1639 						dumpabort();
1640 						/*NOTREACHED*/
1641 					}
1642 			}
1643 		}
1644 		if (!pipeout) {
1645 			tapeout = host ? rmtstatus(&mt) >= 0 :
1646 			    ioctl(to, MTIOCGET, &mt) >= 0;	/* set state */
1647 			/*
1648 			 * Make sure the tape is positioned
1649 			 * where it is supposed to be
1650 			 */
1651 			if (tapeout && (tapeno > 0) &&
1652 			    (mt.mt_fileno != (filenum-1))) {
1653 				(void) snprintf(buf, sizeof (buf), gettext(
1654 				    "Warning - tape positioning error!\n\
1655 \t%s current file %ld, should be %ld\n"),
1656 				    tape, mt.mt_fileno+1, filenum);
1657 				msg(buf);
1658 				dumpailing();
1659 			}
1660 		}
1661 		tapeno++;		/* current tape sequence */
1662 		if (tapeno < TP_NINOS)
1663 			inos[tapeno] = chkpt.sl_inos;
1664 		spcl.c_firstrec = chkpt.sl_firstrec;
1665 		spcl.c_tapea = (*tapea) = chkpt.sl_tapea;
1666 		spcl.c_volume++;
1667 
1668 		enslave();	/* Share tape buffers with slaves */
1669 
1670 #ifdef DEBUG
1671 		if (xflag) {
1672 			/* XGETTEXT:  #ifdef DEBUG only */
1673 			msg(gettext("Checkpoint state:\n"));
1674 			msg("    blockswritten %u\n", blockswritten);
1675 			msg("    ino %u\n", ino);
1676 			msg("    pos %u\n", pos);
1677 			msg("    left %u\n", leftover);
1678 			msg("    tapea %u\n", (*tapea));
1679 			msg("    state %d\n", dumpstate);
1680 		}
1681 #endif
1682 		spcl.c_type = TS_TAPE;
1683 		spcl.c_tpbsize = tp_bsize;
1684 		if (leftover == 0) {
1685 			spcl.c_count = 0;
1686 			spclrec();
1687 			newtape = 0;
1688 		} else
1689 			newtape++;	/* new volume indication */
1690 		if (doingverify) {
1691 			msg(gettext("Starting verify pass\n"));
1692 		} else if (tapeno > 1) {
1693 			msg(gettext(
1694 			    "Volume %d begins with blocks from inode %lu\n"),
1695 				tapeno, chkpt.sl_inos);
1696 		}
1697 		(void) timeclock((time_t)1);
1698 		(void) time(tstart_writing);
1699 		timeest(0, spcl.c_tapea);
1700 	}
1701 }
1702 
1703 void
1704 #ifdef __STDC__
1705 dumpabort(void)
1706 #else
1707 dumpabort()
1708 #endif
1709 {
1710 
1711 	if (master && master != getpid())
1712 		/*
1713 		 * signal master to call dumpabort
1714 		 */
1715 		(void) kill(master, SIGTERM);
1716 	else {
1717 		killall();
1718 
1719 		if (archivefile)
1720 			(void) unlink(archivefile);
1721 		msg(gettext("The ENTIRE dump is aborted.\n"));
1722 	}
1723 	Exit(X_ABORT);
1724 }
1725 
1726 void
1727 dumpailing(void)
1728 {
1729 
1730 	broadcast(gettext("DUMP IS AILING!\n"));
1731 	if (!query(gettext(
1732 	    "Do you want to attempt to continue? (\"yes\" or \"no\") "))) {
1733 		dumpabort();
1734 		/*NOTREACHED*/
1735 	}
1736 }
1737 
1738 void
1739 Exit(status)
1740 {
1741 	/*
1742 	 * Clean up message system
1743 	 */
1744 #ifdef TDEBUG
1745 
1746 	/* XGETTEXT:  #ifdef TDEBUG only */
1747 	msg(gettext("pid = %ld exits with status %d\n"),
1748 		(long)getpid(), status);
1749 #endif /* TDEBUG */
1750 	exit(status);
1751 }
1752 
1753 static void
1754 #ifdef __STDC__
1755 killall(void)
1756 #else
1757 killall()
1758 #endif
1759 {
1760 	struct slaves *slavep;
1761 
1762 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++)
1763 		if (slavep->sl_slavepid > 0) {
1764 			(void) kill(slavep->sl_slavepid, SIGKILL);
1765 #ifdef TDEBUG
1766 
1767 			/* XGETTEXT:  #ifdef TDEBUG only */
1768 			msg(gettext("Slave child %ld killed\n"),
1769 				(long)slavep->sl_slavepid);
1770 #endif
1771 		}
1772 	if (writepid) {
1773 		(void) kill(writepid, SIGKILL);
1774 #ifdef TDEBUG
1775 
1776 		/* XGETTEXT:  #ifdef TDEBUG only */
1777 		msg(gettext("Writer child %ld killed\n"), (long)writepid);
1778 #endif
1779 	}
1780 	if (archivepid) {
1781 		(void) kill(archivepid, SIGKILL);
1782 #ifdef TDEBUG
1783 
1784 		/* XGETTEXT:  #ifdef TDEBUG only */
1785 		msg(gettext("Archiver child %ld killed\n"), (long)archivepid);
1786 #endif
1787 	}
1788 }
1789 
1790 /*ARGSUSED*/
1791 static void
1792 proceed(sig)
1793 	int	sig;
1794 {
1795 	caught++;
1796 }
1797 
1798 /*ARGSUSED*/
1799 static void
1800 die(sig)
1801 	int	sig;
1802 {
1803 	Exit(X_FINOK);
1804 }
1805 
1806 static void
1807 #ifdef __STDC__
1808 enslave(void)
1809 #else
1810 enslave()
1811 #endif
1812 {
1813 	int cmd[2];			/* file descriptors */
1814 	int i;
1815 	struct sigvec sv;
1816 	struct slaves *slavep;
1817 	int saverr;
1818 
1819 	sv.sv_flags = SA_RESTART;
1820 	(void) sigemptyset(&sv.sa_mask);
1821 	master = getpid();
1822 	/*
1823 	 * slave sends SIGTERM on dumpabort
1824 	 */
1825 	sv.sv_handler = (void(*)(int))dumpabort;
1826 	(void) sigvec(SIGTERM, &sv, (struct sigvec *)0);
1827 	sv.sv_handler = tperror;
1828 	(void) sigvec(SIGUSR2, &sv, (struct sigvec *)0);
1829 	sv.sv_handler = proceed;
1830 	(void) sigvec(SIGUSR1, &sv, (struct sigvec *)0);
1831 	totalrecsout += recsout;
1832 	caught = 0;
1833 	recsout = 0;
1834 	rotor = 0;
1835 	bufclear();
1836 	for (slavep = &slaves[0]; slavep < &slaves[SLAVES]; slavep++)
1837 		slavep->sl_slavefd = -1;
1838 	archivefd = arch = writer = -1;
1839 	for (i = 0; i < SLAVES; i++) {
1840 		if (pipe(cmd) < 0) {
1841 			saverr = errno;
1842 			msg(gettext(
1843 			    "Cannot create pipe for slave process: %s\n"),
1844 			    strerror(saverr));
1845 			dumpabort();
1846 			/*NOTREACHED*/
1847 		}
1848 		sighold(SIGUSR2);
1849 		sighold(SIGINT);
1850 		sighold(SIGTERM);
1851 		if ((slaves[i].sl_slavepid = fork()) < 0) {
1852 			saverr = errno;
1853 			msg(gettext("Cannot create slave process: %s\n"),
1854 			    strerror(saverr));
1855 			dumpabort();
1856 			/*NOTREACHED*/
1857 		}
1858 		slaves[i].sl_slavefd = cmd[1];
1859 		if (slaves[i].sl_slavepid == 0) {   /* Slave starts up here */
1860 			pid_t next;		    /* pid of neighbor */
1861 
1862 			sv.sv_handler = SIG_DFL;
1863 			(void) sigvec(SIGUSR2, &sv, (struct sigvec *)0);
1864 			sv.sv_handler = SIG_IGN;	/* master handler INT */
1865 			(void) sigvec(SIGINT, &sv, (struct sigvec *)0);
1866 			sv.sv_handler = die;		/* normal slave exit */
1867 			(void) sigvec(SIGTERM, &sv, (struct sigvec *)0);
1868 
1869 			child_chdir();
1870 			sigrelse(SIGUSR2);
1871 			sigrelse(SIGINT);
1872 			sigrelse(SIGTERM);
1873 
1874 			freeino();	/* release unneeded resources */
1875 #ifdef TDEBUG
1876 		(void) sleep(4); /* time for parent's message to get out */
1877 		/* XGETTEXT:  #ifdef TDEBUG only */
1878 		msg(gettext("Neighbor has pid = %ld\n"), (long)getpid());
1879 #endif
1880 			/* Closes cmd[1] as a side-effect */
1881 			for (slavep = &slaves[0];
1882 			    slavep < &slaves[SLAVES];
1883 			    slavep++)
1884 				if (slavep->sl_slavefd >= 0) {
1885 					(void) close(slavep->sl_slavefd);
1886 					slavep->sl_slavefd = -1;
1887 				}
1888 			(void) close(to);
1889 			(void) close(fi);	    /* Need our own seek ptr */
1890 			to = -1;
1891 
1892 			fi = open(disk, O_RDONLY);
1893 
1894 			if (fi < 0) {
1895 				saverr = errno;
1896 				msg(gettext(
1897 				    "Cannot open dump device `%s': %s\n"),
1898 					disk, strerror(saverr));
1899 				dumpabort();
1900 				/*NOTREACHED*/
1901 			}
1902 
1903 			if ((unsigned)atomic((int(*)())read, cmd[0],
1904 			    (char *)&next, sizeof (next)) != sizeof (next)) {
1905 				cmdrderr();
1906 				dumpabort();
1907 				/*NOTREACHED*/
1908 			}
1909 			dumpoffline(cmd[0], next, i);
1910 			Exit(X_FINOK);
1911 		}
1912 		/* Parent continues here */
1913 		sigrelse(SIGUSR2);
1914 		sigrelse(SIGINT);
1915 		sigrelse(SIGTERM);
1916 		(void) close(cmd[0]);
1917 	}
1918 
1919 	if (archive) {
1920 		archivepid = setuparchive();
1921 		if (!archivepid) {
1922 			dumpabort();
1923 			/*NOTREACHED*/
1924 		}
1925 	}
1926 
1927 	writepid = setupwriter();
1928 	if (!writepid) {
1929 		dumpabort();
1930 		/*NOTREACHED*/
1931 	}
1932 
1933 	if (arch >= 0) {
1934 		(void) close(arch);		/* only writer has this open */
1935 		arch = -1;
1936 	}
1937 
1938 	/* Tell each slave who follows it */
1939 	for (i = 0; i < SLAVES; i++) {
1940 		if ((unsigned)atomic((int(*)())write, slaves[i].sl_slavefd,
1941 		    (char *)&(slaves[(i + 1) % SLAVES].sl_slavepid),
1942 		    sizeof (int)) != sizeof (int)) {
1943 			cmdwrterr();
1944 			dumpabort();
1945 			/*NOTREACHED*/
1946 		}
1947 	}
1948 	sv.sv_handler = rollforward;		/* rcvd from writer on EOT */
1949 	(void) sigvec(SIGUSR1, &sv, (struct sigvec *)0);
1950 	slp = slaves;
1951 	(void) kill(slp->sl_slavepid, SIGUSR1);
1952 	master = 0;
1953 }
1954 
1955 static void
1956 #ifdef __STDC__
1957 wait_our_turn(void)
1958 #else
1959 wait_our_turn()
1960 #endif
1961 {
1962 	(void) sighold(SIGUSR1);
1963 
1964 	if (!caught) {
1965 #ifdef INSTRUMENT
1966 		(*idle)++;
1967 #endif
1968 		(void) sigpause(SIGUSR1);
1969 	}
1970 	caught = 0;
1971 	(void) sigrelse(SIGUSR1);
1972 }
1973 
1974 static void
1975 dumpoffline(cmd, next, mynum)
1976 	int cmd;
1977 	pid_t next;
1978 	int mynum;
1979 {
1980 	struct req *p = slaves[mynum].sl_req;
1981 	ulong_t i;
1982 	uchar_t *cp;
1983 	uchar_t *blkbuf;
1984 	int notactive = 0;
1985 
1986 	blkbuf = xmalloc(sblock->fs_bsize);
1987 
1988 	/*CONSTANTCONDITION*/
1989 	assert(sizeof (spcl) == TP_BSIZE_MIN);
1990 
1991 	while (atomic((int(*)())read, cmd, (char *)p, reqsiz) == reqsiz) {
1992 		if (p->br_dblk) {
1993 			bread(p->br_dblk, (uchar_t *)blkbuf, p->br_size);
1994 		} else {
1995 			bcopy((char *)p->br_spcl, (char *)&spcl,
1996 			    sizeof (spcl));
1997 			ino = spcl.c_inumber;
1998 		}
1999 		dumptoarchive = p->aflag & BUF_ARCHIVE;
2000 		wait_our_turn();
2001 		if (p->br_dblk) {
2002 			for (i = p->br_size, cp = blkbuf;
2003 			    i > 0;
2004 			    /* LINTED character pointers aren't signed */
2005 			    cp += i > tp_bsize ? tp_bsize : i,
2006 			    i -= i > tp_bsize ? tp_bsize : i) {
2007 				/* LINTED unsigned to signed conversion ok */
2008 				taprec(cp, 0, i > tp_bsize ? tp_bsize : (int)i);
2009 			}
2010 		} else
2011 			spclrec();
2012 		(void) kill(next, SIGUSR1);	/* Next slave's turn */
2013 		/*
2014 		 * Note that we lie about file activity since we don't
2015 		 * check for it.
2016 		 */
2017 		if ((unsigned)atomic((int(*)())write, cmd, (char *)&notactive,
2018 		    sizeof (notactive)) != sizeof (notactive)) {
2019 			cmdwrterr();
2020 			dumpabort();
2021 			/*NOTREACHED*/
2022 		}
2023 	}
2024 
2025 	free(blkbuf);
2026 }
2027 
2028 static int count;		/* tape blocks written since last spclrec */
2029 
2030 /*ARGSUSED*/
2031 static void
2032 onxfsz(sig)
2033 	int	sig;
2034 {
2035 	msg(gettext("File size limit exceeded writing output volume %d\n"),
2036 	    tapeno);
2037 	(void) kill(master, SIGUSR2);
2038 	Exit(X_REWRITE);
2039 }
2040 
2041 static long	lastnonaddr;		/* last DS_{INODE,CLRI,BITS} written */
2042 static long	lastnonaddrm;		/* and the mode thereof */
2043 /*
2044  * dowrite -- the main body of the output writer process
2045  */
2046 static void
2047 dowrite(cmd)
2048 	int	cmd;
2049 {
2050 	struct bdesc *last =
2051 	    &bufp[(NBUF*ntrec)-1];		/* last buffer in pool */
2052 	struct bdesc *bp = bufp;		/* current buf in tape block */
2053 	struct bdesc *begin = bufp;		/* first buf of tape block */
2054 	struct bdesc *end = bufp + (ntrec-1);	/* last buf of tape block */
2055 	int siz;				/* bytes written (block) */
2056 	int trecs;				/* records written (block)  */
2057 	long asize = 0;				/* number of 0.1" units... */
2058 						/* ...written on current tape */
2059 	char *tp, *rbuf = NULL;
2060 	char *recmap = spcl.c_addr;		/* current tape record map */
2061 	char *endmp;				/* end of valid map data */
2062 	char *mp;				/* current map entry */
2063 	union u_spcl *sp;
2064 
2065 	(void) signal(SIGXFSZ, onxfsz);
2066 
2067 	bzero((char *)&spcl, sizeof (spcl));
2068 	count = 0;
2069 
2070 	if (doingverify) {
2071 		rbuf = (char *)malloc((uint_t)writesize);
2072 		if (rbuf == 0) {
2073 			/* Restart from checkpoint */
2074 			(void) kill(master, SIGUSR2);
2075 			Exit(X_REWRITE);
2076 		}
2077 	}
2078 
2079 	for (;;) {
2080 		/* START: wait until all buffers in tape block are full */
2081 		if ((bp->b_flags & BUF_FULL) == 0) {
2082 			if (caught) {		/* master signalled flush */
2083 				(void) sighold(SIGUSR1);
2084 				caught = 0;
2085 				/* signal ready */
2086 				(void) kill(master, SIGUSR1);
2087 				chkpt.sl_count = 0;	/* signal not at EOT */
2088 				checkpoint(bp-1, cmd);	/* send data */
2089 				(void) sigpause(SIGUSR1);
2090 				break;
2091 			}
2092 #ifdef INSTRUMENT
2093 			(*readmissp)++;
2094 #endif
2095 			nap(50);
2096 			continue;
2097 		}
2098 		if (bp < end) {
2099 			bp++;
2100 			continue;
2101 		}
2102 		/* END: wait until all buffers in tape block are full */
2103 
2104 		tp = begin->b_data;
2105 		(void) sighold(SIGUSR1);
2106 		if (host) {
2107 			if (!doingverify)
2108 				siz = rmtwrite(tp, writesize);
2109 			else if ((siz = rmtread(rbuf, writesize)) ==
2110 			    writesize && bcmp(rbuf, tp, writesize))
2111 				siz = -1;
2112 		} else {
2113 			if (!doingverify)
2114 				siz = write(to, tp, writesize);
2115 			else if ((siz = read(to, rbuf, writesize)) ==
2116 			    writesize && bcmp(rbuf, tp, writesize))
2117 				siz = -1;
2118 			if (siz < 0 && diskette && errno == ENOSPC)
2119 				siz = 0;	/* really EOF */
2120 		}
2121 		(void) sigrelse(SIGUSR1);
2122 		if (siz < 0 ||
2123 		    (pipeout && siz != writesize)) {
2124 			char buf[3000];
2125 
2126 			/*
2127 			 * Isn't i18n wonderful?
2128 			 */
2129 			if (doingverify) {
2130 				if (diskette)
2131 					(void) snprintf(buf, sizeof (buf),
2132 					    gettext(
2133 		    "Verification error %ld blocks into diskette %d\n"),
2134 					    asize * 2, tapeno);
2135 				else if (tapeout)
2136 					(void) snprintf(buf, sizeof (buf),
2137 					    gettext(
2138 		    "Verification error %ld feet into tape %d\n"),
2139 					    (cartridge ? asize/tracks :
2140 						asize)/120L,
2141 					    tapeno);
2142 				else
2143 					(void) snprintf(buf, sizeof (buf),
2144 					    gettext(
2145 		    "Verification error %ld blocks into volume %d\n"),
2146 					    asize * 2, tapeno);
2147 
2148 			} else {
2149 				if (diskette)
2150 					(void) snprintf(buf, sizeof (buf),
2151 					    gettext(
2152 			"Write error %ld blocks into diskette %d\n"),
2153 					    asize * 2, tapeno);
2154 				else if (tapeout)
2155 					(void) snprintf(buf, sizeof (buf),
2156 					    gettext(
2157 			"Write error %ld feet into tape %d\n"),
2158 					    (cartridge ? asize/tracks :
2159 						asize)/120L, tapeno);
2160 				else
2161 					(void) snprintf(buf, sizeof (buf),
2162 					    gettext(
2163 			"Write error %ld blocks into volume %d\n"),
2164 					    asize * 2, tapeno);
2165 			}
2166 
2167 			msg(buf);
2168 			/* Restart from checkpoint */
2169 #ifdef TDEBUG
2170 
2171 			/* XGETTEXT:  #ifdef TDEBUG only */
2172 			msg(gettext("sending SIGUSR2 to pid %ld\n"), master);
2173 #endif
2174 			(void) kill(master, SIGUSR2);
2175 			Exit(X_REWRITE);
2176 		}
2177 		trecs = siz / tp_bsize;
2178 		if (diskette)
2179 			asize += trecs;	/* asize == blocks written */
2180 		else
2181 			asize += (siz/density + tenthsperirg);
2182 		if (trecs)
2183 			chkpt.sl_firstrec++;
2184 		for (bp = begin; bp < begin + trecs; bp++) {
2185 			if ((arch >= 0) && (bp->b_flags & BUF_ARCHIVE)) {
2186 				if ((unsigned)atomic((int(*)())write, arch,
2187 				    (char *)&bp->b_flags, sizeof (bp->b_flags))
2188 				    != sizeof (bp->b_flags)) {
2189 					cmdwrterr();
2190 					dumpabort();
2191 					/*NOTREACHED*/
2192 				}
2193 				if (atomic((int(*)())write, arch, bp->b_data,
2194 				    tp_bsize) != tp_bsize) {
2195 					cmdwrterr();
2196 					dumpabort();
2197 					/*NOTREACHED*/
2198 				}
2199 			}
2200 			if (bp->b_flags & BUF_SPCLREC) {
2201 				/*LINTED [bp->b_data is aligned]*/
2202 				sp = (union u_spcl *)bp->b_data;
2203 				if (sp->s_spcl.c_type != TS_ADDR) {
2204 					lastnonaddr = sp->s_spcl.c_type;
2205 					lastnonaddrm =
2206 						sp->s_spcl.c_dinode.di_mode;
2207 					if (sp->s_spcl.c_type != TS_TAPE)
2208 						chkpt.sl_offset = 0;
2209 				}
2210 				chkpt.sl_count = sp->s_spcl.c_count;
2211 				bcopy((char *)sp,
2212 					(char *)&spcl, sizeof (spcl));
2213 				mp = recmap;
2214 				endmp = &recmap[spcl.c_count];
2215 				count = 0;
2216 			} else {
2217 				chkpt.sl_offset++;
2218 				chkpt.sl_count--;
2219 				count++;
2220 				mp++;
2221 			}
2222 			/*
2223 			 * Adjust for contiguous hole
2224 			 */
2225 			for (; mp < endmp; mp++) {
2226 				if (*mp)
2227 					break;
2228 				chkpt.sl_offset++;
2229 				chkpt.sl_count--;
2230 			}
2231 		}
2232 		/*
2233 		 * Check for end of tape
2234 		 */
2235 		if (trecs < ntrec ||
2236 		    (!pipeout && tsize > 0 && asize > tsize)) {
2237 			if (tapeout)
2238 				msg(gettext("End-of-tape detected\n"));
2239 			else
2240 				msg(gettext("End-of-file detected\n"));
2241 			(void) sighold(SIGUSR1);
2242 			caught = 0;
2243 			(void) kill(master, SIGUSR1);	/* signal EOT */
2244 			checkpoint(--bp, cmd);	/* send checkpoint data */
2245 			(void) sigpause(SIGUSR1);
2246 			break;
2247 		}
2248 		for (bp = begin; bp <= end; bp++)
2249 			bp->b_flags = BUF_EMPTY;
2250 		if (end + ntrec > last) {
2251 			bp = begin = bufp;
2252 			timeest(0, spcl.c_tapea);
2253 		} else
2254 			bp = begin = end+1;
2255 		end = begin + (ntrec-1);
2256 	}
2257 
2258 	if (rbuf != NULL)
2259 		free(rbuf);
2260 }
2261 
2262 /*
2263  * Send checkpoint info back to master.  This information
2264  * consists of the current inode number, number of logical
2265  * blocks written for that inode (or bitmap), the last logical
2266  * block number written, the number of logical blocks written
2267  * to this volume, the current dump state, and the current
2268  * special record map.
2269  */
2270 static void
2271 checkpoint(bp, cmd)
2272 	struct bdesc *bp;
2273 	int	cmd;
2274 {
2275 	int	state, type;
2276 	ino_t	ino;
2277 
2278 	if (++bp >= &bufp[NBUF*ntrec])
2279 		bp = bufp;
2280 
2281 	/*
2282 	 * If we are dumping files and the record following
2283 	 * the last written to tape is a special record, use
2284 	 * it to get an accurate indication of current state.
2285 	 */
2286 	if ((bp->b_flags & BUF_SPCLREC) && (bp->b_flags & BUF_FULL) &&
2287 	    lastnonaddr == TS_INODE) {
2288 		/*LINTED [bp->b_data is aligned]*/
2289 		union u_spcl *nextspcl = (union u_spcl *)bp->b_data;
2290 
2291 		if (nextspcl->s_spcl.c_type == TS_INODE) {
2292 			chkpt.sl_offset = 0;
2293 			chkpt.sl_count = 0;
2294 		} else if (nextspcl->s_spcl.c_type == TS_END) {
2295 			chkpt.sl_offset = 0;
2296 			chkpt.sl_count = 1;	/* EOT indicator */
2297 		}
2298 		ino = nextspcl->s_spcl.c_inumber;
2299 		type = nextspcl->s_spcl.c_type;
2300 	} else {
2301 		/*
2302 		 * If not, use what we have.
2303 		 */
2304 		ino = spcl.c_inumber;
2305 		type = spcl.c_type;
2306 	}
2307 
2308 	switch (type) {		/* set output state */
2309 	case TS_ADDR:
2310 		switch (lastnonaddr) {
2311 		case TS_INODE:
2312 		case TS_TAPE:
2313 			if ((lastnonaddrm & IFMT) == IFDIR ||
2314 			    (lastnonaddrm & IFMT) == IFATTRDIR)
2315 				state = DS_DIRS;
2316 			else
2317 				state = DS_FILES;
2318 			break;
2319 		case TS_CLRI:
2320 			state = DS_CLRI;
2321 			break;
2322 		case TS_BITS:
2323 			state = DS_BITS;
2324 			break;
2325 		}
2326 		break;
2327 	case TS_INODE:
2328 		if ((spcl.c_dinode.di_mode & IFMT) == IFDIR ||
2329 		    (spcl.c_dinode.di_mode & IFMT) == IFATTRDIR)
2330 			state = DS_DIRS;
2331 		else
2332 			state = DS_FILES;
2333 		break;
2334 	case 0:			/* EOT on 1st record */
2335 	case TS_TAPE:
2336 		state = DS_START;
2337 		ino = UFSROOTINO;
2338 		break;
2339 	case TS_CLRI:
2340 		state = DS_CLRI;
2341 		break;
2342 	case TS_BITS:
2343 		state = DS_BITS;
2344 		break;
2345 	case TS_END:
2346 		if (spcl.c_type == TS_END)
2347 			state = DS_DONE;
2348 		else
2349 			state = DS_END;
2350 		break;
2351 	}
2352 
2353 	/*
2354 	 * Checkpoint info to be processed by rollforward():
2355 	 *	The inode with which the next volume should begin
2356 	 *	The last inode number on this volume
2357 	 *	The last logical block number on this volume
2358 	 *	The current output state
2359 	 *	The offset within the current inode (already in sl_offset)
2360 	 *	The number of records left from last spclrec (in sl_count)
2361 	 *	The physical block the next vol begins with (in sl_firstrec)
2362 	 */
2363 	chkpt.sl_inos = ino;
2364 	chkpt.sl_tapea = spcl.c_tapea + count;
2365 	chkpt.sl_state = state;
2366 
2367 	if ((unsigned)atomic((int(*)())write, cmd, (char *)&chkpt,
2368 	    sizeof (chkpt)) != sizeof (chkpt)) {
2369 		cmdwrterr();
2370 		dumpabort();
2371 		/*NOTREACHED*/
2372 	}
2373 	if ((unsigned)atomic((int(*)())write, cmd, (char *)&spcl,
2374 	    sizeof (spcl)) != sizeof (spcl)) {
2375 		cmdwrterr();
2376 		dumpabort();
2377 		/*NOTREACHED*/
2378 	}
2379 #ifdef DEBUG
2380 	if (xflag) {
2381 		/* XGETTEXT:  #ifdef DEBUG only */
2382 		msg(gettext("sent chkpt to master:\n"));
2383 		msg("    ino %u\n", chkpt.sl_inos);
2384 		msg("    1strec %u\n", chkpt.sl_firstrec);
2385 		msg("    lastrec %u\n", chkpt.sl_tapea);
2386 		msg("    written %u\n", chkpt.sl_offset);
2387 		msg("    left %u\n", chkpt.sl_count);
2388 		msg("    state %d\n", chkpt.sl_state);
2389 	}
2390 #endif
2391 }
2392 
2393 /*
2394  * Since a read from a pipe may not return all we asked for,
2395  * or a write may not write all we ask if we get a signal,
2396  * loop until the count is satisfied (or error).
2397  */
2398 static ssize_t
2399 atomic(func, fd, buf, count)
2400 	int (*func)(), fd, count;
2401 	char *buf;
2402 {
2403 	ssize_t got = 0, need = count;
2404 
2405 	/* don't inherit random value if immediately get zero back from func */
2406 	errno = 0;
2407 	while (need > 0) {
2408 		got = (*func)(fd, buf, MIN(need, 4096));
2409 		if (got < 0 && errno == EINTR)
2410 			continue;
2411 		if (got <= 0)
2412 			break;
2413 		buf += got;
2414 		need -= got;
2415 	}
2416 	/* if we got what was asked for, return count, else failure (got) */
2417 	return ((need != 0) ? got : count);
2418 }
2419 
2420 void
2421 #ifdef __STDC__
2422 positiontape(char *msgbuf)
2423 #else
2424 positiontape(msgbuf)
2425 	char *msgbuf;
2426 #endif
2427 {
2428 	/* Static as never change, no need to waste stack space */
2429 	static struct mtget mt;
2430 	static struct mtop rew = { MTREW, 1 };
2431 	static struct mtop fsf = { MTFSF, 1 };
2432 	char *info = strdup(gettext("Positioning `%s' to file %ld\n"));
2433 	char *fail = strdup(gettext("Cannot position tape to file %d\n"));
2434 	int m;
2435 
2436 	/* gettext()'s return value is volatile, hence the strdup()s */
2437 
2438 	m = (access(tape, F_OK) == 0) ? 0 : O_CREAT;
2439 
2440 	/*
2441 	 * To avoid writing tape marks at inappropriate places, we open the
2442 	 * device read-only, position it, close it, and reopen it for writing.
2443 	 */
2444 	while ((to = host ? rmtopen(tape, O_RDONLY) :
2445 	    safe_device_open(tape, O_RDONLY|m, 0600)) < 0) {
2446 		if (autoload) {
2447 			if (!query_once(msgbuf, 1)) {
2448 				dumpabort();
2449 				/*NOTREACHED*/
2450 			}
2451 		} else {
2452 			if (!query(msgbuf)) {
2453 				dumpabort();
2454 				/*NOTREACHED*/
2455 			}
2456 		}
2457 	}
2458 
2459 	if (host) {
2460 		if (rmtstatus(&mt) >= 0 &&
2461 		    rmtioctl(MTREW, 1) >= 0 &&
2462 		    filenum > 1) {
2463 			msg(info, dumpdev, filenum);
2464 			if (rmtioctl(MTFSF, filenum-1) < 0) {
2465 				msg(fail, filenum);
2466 				dumpabort();
2467 				/*NOTREACHED*/
2468 			}
2469 		}
2470 		rmtclose();
2471 	} else {
2472 		if (ioctl(to, MTIOCGET, &mt) >= 0 &&
2473 		    ioctl(to, MTIOCTOP, &rew) >= 0 &&
2474 		    filenum > 1) {
2475 			msg(info, dumpdev, filenum);
2476 			fsf.mt_count = filenum - 1;
2477 			if (ioctl(to, MTIOCTOP, &fsf) < 0) {
2478 				msg(fail, filenum);
2479 				dumpabort();
2480 				/*NOTREACHED*/
2481 			}
2482 		}
2483 		(void) close(to);
2484 		to = -1;
2485 	}
2486 
2487 	free(info);
2488 	free(fail);
2489 }
2490 
2491 static void
2492 #ifdef __STDC__
2493 cmdwrterr(void)
2494 #else
2495 cmdwrterr()
2496 #endif
2497 {
2498 	int saverr = errno;
2499 	msg(gettext("Error writing command pipe: %s\n"), strerror(saverr));
2500 }
2501 
2502 static void
2503 #ifdef __STDC__
2504 cmdrderr(void)
2505 #else
2506 cmdrderr()
2507 #endif
2508 {
2509 	int saverr = errno;
2510 	msg(gettext("Error reading command pipe: %s\n"), strerror(saverr));
2511 }
2512