xref: /freebsd/contrib/file/src/compress.c (revision a2dfb7224ec9933ee804cae54d51848dce938b6b)
1b6cee71dSXin LI /*
2b6cee71dSXin LI  * Copyright (c) Ian F. Darwin 1986-1995.
3b6cee71dSXin LI  * Software written by Ian F. Darwin and others;
4b6cee71dSXin LI  * maintained 1995-present by Christos Zoulas and others.
5b6cee71dSXin LI  *
6b6cee71dSXin LI  * Redistribution and use in source and binary forms, with or without
7b6cee71dSXin LI  * modification, are permitted provided that the following conditions
8b6cee71dSXin LI  * are met:
9b6cee71dSXin LI  * 1. Redistributions of source code must retain the above copyright
10b6cee71dSXin LI  *    notice immediately at the beginning of the file, without modification,
11b6cee71dSXin LI  *    this list of conditions, and the following disclaimer.
12b6cee71dSXin LI  * 2. Redistributions in binary form must reproduce the above copyright
13b6cee71dSXin LI  *    notice, this list of conditions and the following disclaimer in the
14b6cee71dSXin LI  *    documentation and/or other materials provided with the distribution.
15b6cee71dSXin LI  *
16b6cee71dSXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17b6cee71dSXin LI  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18b6cee71dSXin LI  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19b6cee71dSXin LI  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20b6cee71dSXin LI  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21b6cee71dSXin LI  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22b6cee71dSXin LI  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23b6cee71dSXin LI  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24b6cee71dSXin LI  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25b6cee71dSXin LI  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26b6cee71dSXin LI  * SUCH DAMAGE.
27b6cee71dSXin LI  */
28b6cee71dSXin LI /*
29b6cee71dSXin LI  * compress routines:
30b6cee71dSXin LI  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31b6cee71dSXin LI  *		   information if recognized
32b6cee71dSXin LI  *	uncompress(method, old, n, newch) - uncompress old into new,
33b6cee71dSXin LI  *					    using method, return sizeof new
34b6cee71dSXin LI  */
35b6cee71dSXin LI #include "file.h"
36b6cee71dSXin LI 
37b6cee71dSXin LI #ifndef lint
38*a2dfb722SXin LI FILE_RCSID("@(#)$File: compress.c,v 1.136 2022/09/13 16:08:34 christos Exp $")
39b6cee71dSXin LI #endif
40b6cee71dSXin LI 
41b6cee71dSXin LI #include "magic.h"
42b6cee71dSXin LI #include <stdlib.h>
43b6cee71dSXin LI #ifdef HAVE_UNISTD_H
44b6cee71dSXin LI #include <unistd.h>
45b6cee71dSXin LI #endif
46a4d6d3b8SXin LI #ifdef HAVE_SPAWN_H
47a4d6d3b8SXin LI #include <spawn.h>
48a4d6d3b8SXin LI #endif
49b6cee71dSXin LI #include <string.h>
50b6cee71dSXin LI #include <errno.h>
513e41d09dSXin LI #include <ctype.h>
523e41d09dSXin LI #include <stdarg.h>
534460e5b0SXin LI #include <signal.h>
545f0216bdSXin LI #ifndef HAVE_SIG_T
555f0216bdSXin LI typedef void (*sig_t)(int);
565f0216bdSXin LI #endif /* HAVE_SIG_T */
57a4d6d3b8SXin LI #ifdef HAVE_SYS_IOCTL_H
58b6cee71dSXin LI #include <sys/ioctl.h>
59b6cee71dSXin LI #endif
60b6cee71dSXin LI #ifdef HAVE_SYS_WAIT_H
61b6cee71dSXin LI #include <sys/wait.h>
62b6cee71dSXin LI #endif
63b6cee71dSXin LI #if defined(HAVE_SYS_TIME_H)
64b6cee71dSXin LI #include <sys/time.h>
65b6cee71dSXin LI #endif
6648c779cdSXin LI 
6740427ccaSGordon Tetlow #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
68b6cee71dSXin LI #define BUILTIN_DECOMPRESS
69b6cee71dSXin LI #include <zlib.h>
703e41d09dSXin LI #endif
7148c779cdSXin LI 
722726a701SXin LI #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
7348c779cdSXin LI #define BUILTIN_BZLIB
7448c779cdSXin LI #include <bzlib.h>
7548c779cdSXin LI #endif
7648c779cdSXin LI 
7743a5ec4eSXin LI #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
78d38c30c0SXin LI #define BUILTIN_XZLIB
79d38c30c0SXin LI #include <lzma.h>
80d38c30c0SXin LI #endif
81d38c30c0SXin LI 
823e41d09dSXin LI #ifdef DEBUG
833e41d09dSXin LI int tty = -1;
843e41d09dSXin LI #define DPRINTF(...)	do { \
853e41d09dSXin LI 	if (tty == -1) \
863e41d09dSXin LI 		tty = open("/dev/tty", O_RDWR); \
873e41d09dSXin LI 	if (tty == -1) \
883e41d09dSXin LI 		abort(); \
893e41d09dSXin LI 	dprintf(tty, __VA_ARGS__); \
903e41d09dSXin LI } while (/*CONSTCOND*/0)
913e41d09dSXin LI #else
923e41d09dSXin LI #define DPRINTF(...)
93b6cee71dSXin LI #endif
94b6cee71dSXin LI 
953e41d09dSXin LI #ifdef ZLIBSUPPORT
963e41d09dSXin LI /*
973e41d09dSXin LI  * The following python code is not really used because ZLIBSUPPORT is only
983e41d09dSXin LI  * defined if we have a built-in zlib, and the built-in zlib handles that.
9940427ccaSGordon Tetlow  * That is not true for android where we have zlib.h and not -lz.
1003e41d09dSXin LI  */
1013e41d09dSXin LI static const char zlibcode[] =
1023e41d09dSXin LI     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
1033e41d09dSXin LI 
1043e41d09dSXin LI static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
1053e41d09dSXin LI 
1063e41d09dSXin LI static int
1073e41d09dSXin LI zlibcmp(const unsigned char *buf)
1083e41d09dSXin LI {
1093e41d09dSXin LI 	unsigned short x = 1;
11040427ccaSGordon Tetlow 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
1113e41d09dSXin LI 
1123e41d09dSXin LI 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
1133e41d09dSXin LI 		return 0;
1143e41d09dSXin LI 	if (s[0] != 1)	/* endianness test */
1153e41d09dSXin LI 		x = buf[0] | (buf[1] << 8);
1163e41d09dSXin LI 	else
1173e41d09dSXin LI 		x = buf[1] | (buf[0] << 8);
1183e41d09dSXin LI 	if (x % 31)
1193e41d09dSXin LI 		return 0;
1203e41d09dSXin LI 	return 1;
1213e41d09dSXin LI }
1223e41d09dSXin LI #endif
1233e41d09dSXin LI 
124d38c30c0SXin LI static int
125d38c30c0SXin LI lzmacmp(const unsigned char *buf)
126d38c30c0SXin LI {
127d38c30c0SXin LI 	if (buf[0] != 0x5d || buf[1] || buf[2])
128d38c30c0SXin LI 		return 0;
129d38c30c0SXin LI 	if (buf[12] && buf[12] != 0xff)
130d38c30c0SXin LI 		return 0;
131d38c30c0SXin LI 	return 1;
132d38c30c0SXin LI }
133d38c30c0SXin LI 
1343e41d09dSXin LI #define gzip_flags "-cd"
1353e41d09dSXin LI #define lrzip_flags "-do"
1363e41d09dSXin LI #define lzip_flags gzip_flags
1373e41d09dSXin LI 
1383e41d09dSXin LI static const char *gzip_args[] = {
1393e41d09dSXin LI 	"gzip", gzip_flags, NULL
1403e41d09dSXin LI };
1413e41d09dSXin LI static const char *uncompress_args[] = {
1423e41d09dSXin LI 	"uncompress", "-c", NULL
1433e41d09dSXin LI };
1443e41d09dSXin LI static const char *bzip2_args[] = {
1453e41d09dSXin LI 	"bzip2", "-cd", NULL
1463e41d09dSXin LI };
1473e41d09dSXin LI static const char *lzip_args[] = {
1483e41d09dSXin LI 	"lzip", lzip_flags, NULL
1493e41d09dSXin LI };
1503e41d09dSXin LI static const char *xz_args[] = {
1513e41d09dSXin LI 	"xz", "-cd", NULL
1523e41d09dSXin LI };
1533e41d09dSXin LI static const char *lrzip_args[] = {
1543e41d09dSXin LI 	"lrzip", lrzip_flags, NULL
1553e41d09dSXin LI };
1563e41d09dSXin LI static const char *lz4_args[] = {
1573e41d09dSXin LI 	"lz4", "-cd", NULL
158b6cee71dSXin LI };
159a5d223e6SXin LI static const char *zstd_args[] = {
160a5d223e6SXin LI 	"zstd", "-cd", NULL
161a5d223e6SXin LI };
162b6cee71dSXin LI 
16348c779cdSXin LI #define	do_zlib		NULL
16448c779cdSXin LI #define	do_bzlib	NULL
16548c779cdSXin LI 
1663e41d09dSXin LI private const struct {
1672726a701SXin LI 	union {
1682726a701SXin LI 		const char *magic;
1692726a701SXin LI 		int (*func)(const unsigned char *);
1702726a701SXin LI 	} u;
171d38c30c0SXin LI 	int maglen;
1723e41d09dSXin LI 	const char **argv;
17348c779cdSXin LI 	void *unused;
1743e41d09dSXin LI } compr[] = {
175d38c30c0SXin LI #define METH_FROZEN	2
176d38c30c0SXin LI #define METH_BZIP	7
177d38c30c0SXin LI #define METH_XZ		9
178d38c30c0SXin LI #define METH_LZMA	13
179d38c30c0SXin LI #define METH_ZLIB	14
1802726a701SXin LI     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
1813e41d09dSXin LI     /* Uncompress can get stuck; so use gzip first if we have it
1823e41d09dSXin LI      * Idea from Damien Clark, thanks! */
1832726a701SXin LI     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
1842726a701SXin LI     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
1852726a701SXin LI     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
1862726a701SXin LI     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
1873e41d09dSXin LI     /* the standard pack utilities do not accept standard input */
1882726a701SXin LI     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
1892726a701SXin LI     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
1903e41d09dSXin LI     /* ...only first file examined */
1912726a701SXin LI     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
1922726a701SXin LI     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
1932726a701SXin LI     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
1942726a701SXin LI     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
1952726a701SXin LI     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
1962726a701SXin LI     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
1972726a701SXin LI     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
1983e41d09dSXin LI #ifdef ZLIBSUPPORT
1992726a701SXin LI     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
2003e41d09dSXin LI #endif
2013e41d09dSXin LI };
2023e41d09dSXin LI 
2033e41d09dSXin LI #define OKDATA 	0
2043e41d09dSXin LI #define NODATA	1
2053e41d09dSXin LI #define ERRDATA	2
206b6cee71dSXin LI 
207b6cee71dSXin LI private ssize_t swrite(int, const void *, size_t);
208b6cee71dSXin LI #if HAVE_FORK
20948c779cdSXin LI private size_t ncompr = __arraycount(compr);
2103e41d09dSXin LI private int uncompressbuf(int, size_t, size_t, const unsigned char *,
2113e41d09dSXin LI     unsigned char **, size_t *);
212b6cee71dSXin LI #ifdef BUILTIN_DECOMPRESS
2133e41d09dSXin LI private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
2143e41d09dSXin LI     size_t *, int);
2153e41d09dSXin LI private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
2163e41d09dSXin LI     size_t *);
217b6cee71dSXin LI #endif
21848c779cdSXin LI #ifdef BUILTIN_BZLIB
21948c779cdSXin LI private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
220d38c30c0SXin LI     size_t *);
221d38c30c0SXin LI #endif
222d38c30c0SXin LI #ifdef BUILTIN_XZLIB
223d38c30c0SXin LI private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
224d38c30c0SXin LI     size_t *);
22548c779cdSXin LI #endif
22648c779cdSXin LI 
2273e41d09dSXin LI static int makeerror(unsigned char **, size_t *, const char *, ...)
2283e41d09dSXin LI     __attribute__((__format__(__printf__, 3, 4)));
2293e41d09dSXin LI private const char *methodname(size_t);
230b6cee71dSXin LI 
2312dc4dbb9SEitan Adler private int
2322dc4dbb9SEitan Adler format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
2332dc4dbb9SEitan Adler {
2342dc4dbb9SEitan Adler 	unsigned char *p;
2352dc4dbb9SEitan Adler 	int mime = ms->flags & MAGIC_MIME;
2362dc4dbb9SEitan Adler 
2372dc4dbb9SEitan Adler 	if (!mime)
2382dc4dbb9SEitan Adler 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
2392dc4dbb9SEitan Adler 
2402dc4dbb9SEitan Adler 	for (p = buf; *p; p++)
2412dc4dbb9SEitan Adler 		if (!isalnum(*p))
2422dc4dbb9SEitan Adler 			*p = '-';
2432dc4dbb9SEitan Adler 
2442dc4dbb9SEitan Adler 	return file_printf(ms, "application/x-decompression-error-%s-%s",
2452dc4dbb9SEitan Adler 	    methodname(i), buf);
2462dc4dbb9SEitan Adler }
2472dc4dbb9SEitan Adler 
248b6cee71dSXin LI protected int
24958a0f0d0SEitan Adler file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
250b6cee71dSXin LI {
251b6cee71dSXin LI 	unsigned char *newbuf = NULL;
252b6cee71dSXin LI 	size_t i, nsz;
2533e41d09dSXin LI 	char *rbuf;
2543e41d09dSXin LI 	file_pushbuf_t *pb;
25520f8619dSXin LI 	int urv, prv, rv = 0;
256b6cee71dSXin LI 	int mime = ms->flags & MAGIC_MIME;
25758a0f0d0SEitan Adler 	int fd = b->fd;
25848c779cdSXin LI 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
25958a0f0d0SEitan Adler 	size_t nbytes = b->flen;
26048c779cdSXin LI 	int sa_saved = 0;
26148c779cdSXin LI 	struct sigaction sig_act;
262b6cee71dSXin LI 
263b6cee71dSXin LI 	if ((ms->flags & MAGIC_COMPRESS) == 0)
264b6cee71dSXin LI 		return 0;
265b6cee71dSXin LI 
266b6cee71dSXin LI 	for (i = 0; i < ncompr; i++) {
2673e41d09dSXin LI 		int zm;
268d38c30c0SXin LI 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
269b6cee71dSXin LI 			continue;
270d38c30c0SXin LI 		if (compr[i].maglen < 0) {
2712726a701SXin LI 			zm = (*compr[i].u.func)(buf);
272d38c30c0SXin LI 		} else {
2732726a701SXin LI 			zm = memcmp(buf, compr[i].u.magic,
274d38c30c0SXin LI 			    CAST(size_t, compr[i].maglen)) == 0;
275d38c30c0SXin LI 		}
276b6cee71dSXin LI 
2773e41d09dSXin LI 		if (!zm)
2783e41d09dSXin LI 			continue;
27948c779cdSXin LI 
28048c779cdSXin LI 		/* Prevent SIGPIPE death if child dies unexpectedly */
28148c779cdSXin LI 		if (!sa_saved) {
28248c779cdSXin LI 			//We can use sig_act for both new and old, but
28348c779cdSXin LI 			struct sigaction new_act;
28448c779cdSXin LI 			memset(&new_act, 0, sizeof(new_act));
28548c779cdSXin LI 			new_act.sa_handler = SIG_IGN;
28648c779cdSXin LI 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
28748c779cdSXin LI 		}
28848c779cdSXin LI 
2893e41d09dSXin LI 		nsz = nbytes;
29020f8619dSXin LI 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
29148c779cdSXin LI 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
29248c779cdSXin LI 		    (char *)newbuf, nsz);
29320f8619dSXin LI 		switch (urv) {
2943e41d09dSXin LI 		case OKDATA:
2953e41d09dSXin LI 		case ERRDATA:
2963e41d09dSXin LI 			ms->flags &= ~MAGIC_COMPRESS;
29720f8619dSXin LI 			if (urv == ERRDATA)
2982dc4dbb9SEitan Adler 				prv = format_decompression_error(ms, i, newbuf);
2993e41d09dSXin LI 			else
30048c779cdSXin LI 				prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
30120f8619dSXin LI 			if (prv == -1)
3023e41d09dSXin LI 				goto error;
30320f8619dSXin LI 			rv = 1;
3043e41d09dSXin LI 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
3053e41d09dSXin LI 				goto out;
3063e41d09dSXin LI 			if (mime != MAGIC_MIME && mime != 0)
3073e41d09dSXin LI 				goto out;
3083e41d09dSXin LI 			if ((file_printf(ms,
3093e41d09dSXin LI 			    mime ? " compressed-encoding=" : " (")) == -1)
3103e41d09dSXin LI 				goto error;
3113e41d09dSXin LI 			if ((pb = file_push_buffer(ms)) == NULL)
312b6cee71dSXin LI 				goto error;
31320f8619dSXin LI 			/*
31420f8619dSXin LI 			 * XXX: If file_buffer fails here, we overwrite
31520f8619dSXin LI 			 * the compressed text. FIXME.
31620f8619dSXin LI 			 */
31748c779cdSXin LI 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
31848c779cdSXin LI 				if (file_pop_buffer(ms, pb) != NULL)
31948c779cdSXin LI 					abort();
320b6cee71dSXin LI 				goto error;
32148c779cdSXin LI 			}
3223e41d09dSXin LI 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
3233e41d09dSXin LI 				if (file_printf(ms, "%s", rbuf) == -1) {
3243e41d09dSXin LI 					free(rbuf);
325b6cee71dSXin LI 					goto error;
326b6cee71dSXin LI 				}
3273e41d09dSXin LI 				free(rbuf);
3283e41d09dSXin LI 			}
3293e41d09dSXin LI 			if (!mime && file_printf(ms, ")") == -1)
3303e41d09dSXin LI 				goto error;
33120f8619dSXin LI 			/*FALLTHROUGH*/
3323e41d09dSXin LI 		case NODATA:
33320f8619dSXin LI 			break;
3343e41d09dSXin LI 		default:
3353e41d09dSXin LI 			abort();
33620f8619dSXin LI 			/*NOTREACHED*/
33720f8619dSXin LI 		error:
33820f8619dSXin LI 			rv = -1;
33920f8619dSXin LI 			break;
3403e41d09dSXin LI 		}
3413e41d09dSXin LI 	}
3423e41d09dSXin LI out:
34320f8619dSXin LI 	DPRINTF("rv = %d\n", rv);
34420f8619dSXin LI 
34548c779cdSXin LI 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
34648c779cdSXin LI 		(void)sigaction(SIGPIPE, &sig_act, NULL);
34748c779cdSXin LI 
348b6cee71dSXin LI 	free(newbuf);
349b6cee71dSXin LI 	ms->flags |= MAGIC_COMPRESS;
3503e41d09dSXin LI 	DPRINTF("Zmagic returns %d\n", rv);
351b6cee71dSXin LI 	return rv;
352b6cee71dSXin LI }
353b6cee71dSXin LI #endif
354b6cee71dSXin LI /*
355b6cee71dSXin LI  * `safe' write for sockets and pipes.
356b6cee71dSXin LI  */
357b6cee71dSXin LI private ssize_t
358b6cee71dSXin LI swrite(int fd, const void *buf, size_t n)
359b6cee71dSXin LI {
360b6cee71dSXin LI 	ssize_t rv;
361b6cee71dSXin LI 	size_t rn = n;
362b6cee71dSXin LI 
363b6cee71dSXin LI 	do
364b6cee71dSXin LI 		switch (rv = write(fd, buf, n)) {
365b6cee71dSXin LI 		case -1:
366b6cee71dSXin LI 			if (errno == EINTR)
367b6cee71dSXin LI 				continue;
368b6cee71dSXin LI 			return -1;
369b6cee71dSXin LI 		default:
370b6cee71dSXin LI 			n -= rv;
371b6cee71dSXin LI 			buf = CAST(const char *, buf) + rv;
372b6cee71dSXin LI 			break;
373b6cee71dSXin LI 		}
374b6cee71dSXin LI 	while (n > 0);
375b6cee71dSXin LI 	return rn;
376b6cee71dSXin LI }
377b6cee71dSXin LI 
378b6cee71dSXin LI 
379b6cee71dSXin LI /*
380b6cee71dSXin LI  * `safe' read for sockets and pipes.
381b6cee71dSXin LI  */
382b6cee71dSXin LI protected ssize_t
383b6cee71dSXin LI sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
384b6cee71dSXin LI {
385b6cee71dSXin LI 	ssize_t rv;
386b6cee71dSXin LI #ifdef FIONREAD
387b6cee71dSXin LI 	int t = 0;
388b6cee71dSXin LI #endif
389b6cee71dSXin LI 	size_t rn = n;
390b6cee71dSXin LI 
391b6cee71dSXin LI 	if (fd == STDIN_FILENO)
392b6cee71dSXin LI 		goto nocheck;
393b6cee71dSXin LI 
394b6cee71dSXin LI #ifdef FIONREAD
395b6cee71dSXin LI 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
396b6cee71dSXin LI #ifdef FD_ZERO
397b6cee71dSXin LI 		ssize_t cnt;
398b6cee71dSXin LI 		for (cnt = 0;; cnt++) {
399b6cee71dSXin LI 			fd_set check;
400b6cee71dSXin LI 			struct timeval tout = {0, 100 * 1000};
401b6cee71dSXin LI 			int selrv;
402b6cee71dSXin LI 
403b6cee71dSXin LI 			FD_ZERO(&check);
404b6cee71dSXin LI 			FD_SET(fd, &check);
405b6cee71dSXin LI 
406b6cee71dSXin LI 			/*
407b6cee71dSXin LI 			 * Avoid soft deadlock: do not read if there
408b6cee71dSXin LI 			 * is nothing to read from sockets and pipes.
409b6cee71dSXin LI 			 */
410b6cee71dSXin LI 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
411b6cee71dSXin LI 			if (selrv == -1) {
412b6cee71dSXin LI 				if (errno == EINTR || errno == EAGAIN)
413b6cee71dSXin LI 					continue;
414b6cee71dSXin LI 			} else if (selrv == 0 && cnt >= 5) {
415b6cee71dSXin LI 				return 0;
416b6cee71dSXin LI 			} else
417b6cee71dSXin LI 				break;
418b6cee71dSXin LI 		}
419b6cee71dSXin LI #endif
420b6cee71dSXin LI 		(void)ioctl(fd, FIONREAD, &t);
421b6cee71dSXin LI 	}
422b6cee71dSXin LI 
42348c779cdSXin LI 	if (t > 0 && CAST(size_t, t) < n) {
424b6cee71dSXin LI 		n = t;
425b6cee71dSXin LI 		rn = n;
426b6cee71dSXin LI 	}
427b6cee71dSXin LI #endif
428b6cee71dSXin LI 
429b6cee71dSXin LI nocheck:
430b6cee71dSXin LI 	do
431b6cee71dSXin LI 		switch ((rv = read(fd, buf, n))) {
432b6cee71dSXin LI 		case -1:
433b6cee71dSXin LI 			if (errno == EINTR)
434b6cee71dSXin LI 				continue;
435b6cee71dSXin LI 			return -1;
436b6cee71dSXin LI 		case 0:
437b6cee71dSXin LI 			return rn - n;
438b6cee71dSXin LI 		default:
439b6cee71dSXin LI 			n -= rv;
440a5d223e6SXin LI 			buf = CAST(char *, CCAST(void *, buf)) + rv;
441b6cee71dSXin LI 			break;
442b6cee71dSXin LI 		}
443b6cee71dSXin LI 	while (n > 0);
444b6cee71dSXin LI 	return rn;
445b6cee71dSXin LI }
446b6cee71dSXin LI 
447b6cee71dSXin LI protected int
448b6cee71dSXin LI file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
449b6cee71dSXin LI     size_t nbytes)
450b6cee71dSXin LI {
451b6cee71dSXin LI 	char buf[4096];
452b6cee71dSXin LI 	ssize_t r;
453b6cee71dSXin LI 	int tfd;
454b6cee71dSXin LI 
455a4d6d3b8SXin LI #ifdef WIN32
456a4d6d3b8SXin LI 	const char *t;
457a4d6d3b8SXin LI 	buf[0] = '\0';
458a4d6d3b8SXin LI 	if ((t = getenv("TEMP")) != NULL)
459a4d6d3b8SXin LI 		(void)strlcpy(buf, t, sizeof(buf));
460a4d6d3b8SXin LI 	else if ((t = getenv("TMP")) != NULL)
461a4d6d3b8SXin LI 		(void)strlcpy(buf, t, sizeof(buf));
462a4d6d3b8SXin LI 	else if ((t = getenv("TMPDIR")) != NULL)
463a4d6d3b8SXin LI 		(void)strlcpy(buf, t, sizeof(buf));
464a4d6d3b8SXin LI 	if (buf[0] != '\0')
465a4d6d3b8SXin LI 		(void)strlcat(buf, "/", sizeof(buf));
466a4d6d3b8SXin LI 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
467a4d6d3b8SXin LI #else
468a4d6d3b8SXin LI 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
469a4d6d3b8SXin LI #endif
470b6cee71dSXin LI #ifndef HAVE_MKSTEMP
471b6cee71dSXin LI 	{
472b6cee71dSXin LI 		char *ptr = mktemp(buf);
473b6cee71dSXin LI 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
474b6cee71dSXin LI 		r = errno;
475b6cee71dSXin LI 		(void)unlink(ptr);
476b6cee71dSXin LI 		errno = r;
477b6cee71dSXin LI 	}
478b6cee71dSXin LI #else
479b6cee71dSXin LI 	{
480b6cee71dSXin LI 		int te;
48148c779cdSXin LI 		mode_t ou = umask(0);
482b6cee71dSXin LI 		tfd = mkstemp(buf);
48348c779cdSXin LI 		(void)umask(ou);
484b6cee71dSXin LI 		te = errno;
485b6cee71dSXin LI 		(void)unlink(buf);
486b6cee71dSXin LI 		errno = te;
487b6cee71dSXin LI 	}
488b6cee71dSXin LI #endif
489b6cee71dSXin LI 	if (tfd == -1) {
490b6cee71dSXin LI 		file_error(ms, errno,
491b6cee71dSXin LI 		    "cannot create temporary file for pipe copy");
492b6cee71dSXin LI 		return -1;
493b6cee71dSXin LI 	}
494b6cee71dSXin LI 
49548c779cdSXin LI 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
496b6cee71dSXin LI 		r = 1;
497b6cee71dSXin LI 	else {
498b6cee71dSXin LI 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
49948c779cdSXin LI 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
500b6cee71dSXin LI 				break;
501b6cee71dSXin LI 	}
502b6cee71dSXin LI 
503b6cee71dSXin LI 	switch (r) {
504b6cee71dSXin LI 	case -1:
505b6cee71dSXin LI 		file_error(ms, errno, "error copying from pipe to temp file");
506b6cee71dSXin LI 		return -1;
507b6cee71dSXin LI 	case 0:
508b6cee71dSXin LI 		break;
509b6cee71dSXin LI 	default:
510b6cee71dSXin LI 		file_error(ms, errno, "error while writing to temp file");
511b6cee71dSXin LI 		return -1;
512b6cee71dSXin LI 	}
513b6cee71dSXin LI 
514b6cee71dSXin LI 	/*
515b6cee71dSXin LI 	 * We duplicate the file descriptor, because fclose on a
516b6cee71dSXin LI 	 * tmpfile will delete the file, but any open descriptors
517b6cee71dSXin LI 	 * can still access the phantom inode.
518b6cee71dSXin LI 	 */
519b6cee71dSXin LI 	if ((fd = dup2(tfd, fd)) == -1) {
520b6cee71dSXin LI 		file_error(ms, errno, "could not dup descriptor for temp file");
521b6cee71dSXin LI 		return -1;
522b6cee71dSXin LI 	}
523b6cee71dSXin LI 	(void)close(tfd);
52448c779cdSXin LI 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
525b6cee71dSXin LI 		file_badseek(ms);
526b6cee71dSXin LI 		return -1;
527b6cee71dSXin LI 	}
528b6cee71dSXin LI 	return fd;
529b6cee71dSXin LI }
530b6cee71dSXin LI #if HAVE_FORK
531b6cee71dSXin LI #ifdef BUILTIN_DECOMPRESS
532b6cee71dSXin LI 
533b6cee71dSXin LI #define FHCRC		(1 << 1)
534b6cee71dSXin LI #define FEXTRA		(1 << 2)
535b6cee71dSXin LI #define FNAME		(1 << 3)
536b6cee71dSXin LI #define FCOMMENT	(1 << 4)
537b6cee71dSXin LI 
5383e41d09dSXin LI 
5393e41d09dSXin LI private int
5403e41d09dSXin LI uncompressgzipped(const unsigned char *old, unsigned char **newch,
5413e41d09dSXin LI     size_t bytes_max, size_t *n)
542b6cee71dSXin LI {
543b6cee71dSXin LI 	unsigned char flg = old[3];
544b6cee71dSXin LI 	size_t data_start = 10;
545b6cee71dSXin LI 
546b6cee71dSXin LI 	if (flg & FEXTRA) {
5473e41d09dSXin LI 		if (data_start + 1 >= *n)
5483e41d09dSXin LI 			goto err;
549b6cee71dSXin LI 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
550b6cee71dSXin LI 	}
551b6cee71dSXin LI 	if (flg & FNAME) {
5523e41d09dSXin LI 		while(data_start < *n && old[data_start])
553b6cee71dSXin LI 			data_start++;
554b6cee71dSXin LI 		data_start++;
555b6cee71dSXin LI 	}
556b6cee71dSXin LI 	if (flg & FCOMMENT) {
5573e41d09dSXin LI 		while(data_start < *n && old[data_start])
558b6cee71dSXin LI 			data_start++;
559b6cee71dSXin LI 		data_start++;
560b6cee71dSXin LI 	}
561b6cee71dSXin LI 	if (flg & FHCRC)
562b6cee71dSXin LI 		data_start += 2;
563b6cee71dSXin LI 
5643e41d09dSXin LI 	if (data_start >= *n)
5653e41d09dSXin LI 		goto err;
5663e41d09dSXin LI 
5673e41d09dSXin LI 	*n -= data_start;
5683e41d09dSXin LI 	old += data_start;
5693e41d09dSXin LI 	return uncompresszlib(old, newch, bytes_max, n, 0);
5703e41d09dSXin LI err:
5713e41d09dSXin LI 	return makeerror(newch, n, "File too short");
572b6cee71dSXin LI }
573b6cee71dSXin LI 
5743e41d09dSXin LI private int
5753e41d09dSXin LI uncompresszlib(const unsigned char *old, unsigned char **newch,
5763e41d09dSXin LI     size_t bytes_max, size_t *n, int zlib)
5773e41d09dSXin LI {
5783e41d09dSXin LI 	int rc;
5793e41d09dSXin LI 	z_stream z;
5803e41d09dSXin LI 
5813e41d09dSXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
5823e41d09dSXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
5833e41d09dSXin LI 
5843e41d09dSXin LI 	z.next_in = CCAST(Bytef *, old);
5853e41d09dSXin LI 	z.avail_in = CAST(uint32_t, *n);
586b6cee71dSXin LI 	z.next_out = *newch;
58740427ccaSGordon Tetlow 	z.avail_out = CAST(unsigned int, bytes_max);
588b6cee71dSXin LI 	z.zalloc = Z_NULL;
589b6cee71dSXin LI 	z.zfree = Z_NULL;
590b6cee71dSXin LI 	z.opaque = Z_NULL;
591b6cee71dSXin LI 
592b6cee71dSXin LI 	/* LINTED bug in header macro */
5933e41d09dSXin LI 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
5943e41d09dSXin LI 	if (rc != Z_OK)
5953e41d09dSXin LI 		goto err;
596b6cee71dSXin LI 
597b6cee71dSXin LI 	rc = inflate(&z, Z_SYNC_FLUSH);
5983e41d09dSXin LI 	if (rc != Z_OK && rc != Z_STREAM_END)
5993e41d09dSXin LI 		goto err;
600b6cee71dSXin LI 
60148c779cdSXin LI 	*n = CAST(size_t, z.total_out);
6023e41d09dSXin LI 	rc = inflateEnd(&z);
6033e41d09dSXin LI 	if (rc != Z_OK)
6043e41d09dSXin LI 		goto err;
605b6cee71dSXin LI 
606b6cee71dSXin LI 	/* let's keep the nul-terminate tradition */
6073e41d09dSXin LI 	(*newch)[*n] = '\0';
608b6cee71dSXin LI 
6093e41d09dSXin LI 	return OKDATA;
6103e41d09dSXin LI err:
61148c779cdSXin LI 	strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
61248c779cdSXin LI 	*n = strlen(RCAST(char *, *newch));
6133e41d09dSXin LI 	return ERRDATA;
614b6cee71dSXin LI }
615b6cee71dSXin LI #endif
616b6cee71dSXin LI 
617d38c30c0SXin LI #ifdef BUILTIN_BZLIB
618d38c30c0SXin LI private int
619d38c30c0SXin LI uncompressbzlib(const unsigned char *old, unsigned char **newch,
620d38c30c0SXin LI     size_t bytes_max, size_t *n)
621d38c30c0SXin LI {
622d38c30c0SXin LI 	int rc;
623d38c30c0SXin LI 	bz_stream bz;
624d38c30c0SXin LI 
625d38c30c0SXin LI 	memset(&bz, 0, sizeof(bz));
626d38c30c0SXin LI 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
627d38c30c0SXin LI 	if (rc != BZ_OK)
628d38c30c0SXin LI 		goto err;
629d38c30c0SXin LI 
630d38c30c0SXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
631d38c30c0SXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
632d38c30c0SXin LI 
633d38c30c0SXin LI 	bz.next_in = CCAST(char *, RCAST(const char *, old));
634d38c30c0SXin LI 	bz.avail_in = CAST(uint32_t, *n);
635d38c30c0SXin LI 	bz.next_out = RCAST(char *, *newch);
636d38c30c0SXin LI 	bz.avail_out = CAST(unsigned int, bytes_max);
637d38c30c0SXin LI 
638d38c30c0SXin LI 	rc = BZ2_bzDecompress(&bz);
639d38c30c0SXin LI 	if (rc != BZ_OK && rc != BZ_STREAM_END)
640d38c30c0SXin LI 		goto err;
641d38c30c0SXin LI 
642d38c30c0SXin LI 	/* Assume byte_max is within 32bit */
643d38c30c0SXin LI 	/* assert(bz.total_out_hi32 == 0); */
644d38c30c0SXin LI 	*n = CAST(size_t, bz.total_out_lo32);
645d38c30c0SXin LI 	rc = BZ2_bzDecompressEnd(&bz);
646d38c30c0SXin LI 	if (rc != BZ_OK)
647d38c30c0SXin LI 		goto err;
648d38c30c0SXin LI 
649d38c30c0SXin LI 	/* let's keep the nul-terminate tradition */
650d38c30c0SXin LI 	(*newch)[*n] = '\0';
651d38c30c0SXin LI 
652d38c30c0SXin LI 	return OKDATA;
653d38c30c0SXin LI err:
654d38c30c0SXin LI 	snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
655d38c30c0SXin LI 	*n = strlen(RCAST(char *, *newch));
656d38c30c0SXin LI 	return ERRDATA;
657d38c30c0SXin LI }
658d38c30c0SXin LI #endif
659d38c30c0SXin LI 
660d38c30c0SXin LI #ifdef BUILTIN_XZLIB
661d38c30c0SXin LI private int
662d38c30c0SXin LI uncompressxzlib(const unsigned char *old, unsigned char **newch,
663d38c30c0SXin LI     size_t bytes_max, size_t *n)
664d38c30c0SXin LI {
665d38c30c0SXin LI 	int rc;
666d38c30c0SXin LI 	lzma_stream xz;
667d38c30c0SXin LI 
668d38c30c0SXin LI 	memset(&xz, 0, sizeof(xz));
669d38c30c0SXin LI 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
670d38c30c0SXin LI 	if (rc != LZMA_OK)
671d38c30c0SXin LI 		goto err;
672d38c30c0SXin LI 
673d38c30c0SXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
674d38c30c0SXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
675d38c30c0SXin LI 
676d38c30c0SXin LI 	xz.next_in = CCAST(const uint8_t *, old);
677d38c30c0SXin LI 	xz.avail_in = CAST(uint32_t, *n);
678d38c30c0SXin LI 	xz.next_out = RCAST(uint8_t *, *newch);
679d38c30c0SXin LI 	xz.avail_out = CAST(unsigned int, bytes_max);
680d38c30c0SXin LI 
681d38c30c0SXin LI 	rc = lzma_code(&xz, LZMA_RUN);
682d38c30c0SXin LI 	if (rc != LZMA_OK && rc != LZMA_STREAM_END)
683d38c30c0SXin LI 		goto err;
684d38c30c0SXin LI 
685d38c30c0SXin LI 	*n = CAST(size_t, xz.total_out);
686d38c30c0SXin LI 
687d38c30c0SXin LI 	lzma_end(&xz);
688d38c30c0SXin LI 
689d38c30c0SXin LI 	/* let's keep the nul-terminate tradition */
690d38c30c0SXin LI 	(*newch)[*n] = '\0';
691d38c30c0SXin LI 
692d38c30c0SXin LI 	return OKDATA;
693d38c30c0SXin LI err:
694d38c30c0SXin LI 	snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
695d38c30c0SXin LI 	*n = strlen(RCAST(char *, *newch));
696d38c30c0SXin LI 	return ERRDATA;
697d38c30c0SXin LI }
698d38c30c0SXin LI #endif
699d38c30c0SXin LI 
700d38c30c0SXin LI 
7013e41d09dSXin LI static int
7023e41d09dSXin LI makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
703b6cee71dSXin LI {
7043e41d09dSXin LI 	char *msg;
7053e41d09dSXin LI 	va_list ap;
7063e41d09dSXin LI 	int rv;
707b6cee71dSXin LI 
7083e41d09dSXin LI 	va_start(ap, fmt);
7093e41d09dSXin LI 	rv = vasprintf(&msg, fmt, ap);
7103e41d09dSXin LI 	va_end(ap);
7113e41d09dSXin LI 	if (rv < 0) {
7123e41d09dSXin LI 		*buf = NULL;
7133e41d09dSXin LI 		*len = 0;
714b6cee71dSXin LI 		return NODATA;
715b6cee71dSXin LI 	}
71648c779cdSXin LI 	*buf = RCAST(unsigned char *, msg);
7173e41d09dSXin LI 	*len = strlen(msg);
7183e41d09dSXin LI 	return ERRDATA;
719b6cee71dSXin LI }
720b6cee71dSXin LI 
7213e41d09dSXin LI static void
7223e41d09dSXin LI closefd(int *fd, size_t i)
7233e41d09dSXin LI {
7243e41d09dSXin LI 	if (fd[i] == -1)
7253e41d09dSXin LI 		return;
7263e41d09dSXin LI 	(void) close(fd[i]);
7273e41d09dSXin LI 	fd[i] = -1;
7283e41d09dSXin LI }
729b6cee71dSXin LI 
7303e41d09dSXin LI static void
7313e41d09dSXin LI closep(int *fd)
7323e41d09dSXin LI {
7333e41d09dSXin LI 	size_t i;
7343e41d09dSXin LI 	for (i = 0; i < 2; i++)
7353e41d09dSXin LI 		closefd(fd, i);
7363e41d09dSXin LI }
7373e41d09dSXin LI 
738a4d6d3b8SXin LI static void
739a4d6d3b8SXin LI movedesc(void *v, int i, int fd)
7403e41d09dSXin LI {
74148c779cdSXin LI 	if (fd == i)
742a4d6d3b8SXin LI 		return; /* "no dup was necessary" */
743a4d6d3b8SXin LI #ifdef HAVE_POSIX_SPAWNP
744a4d6d3b8SXin LI 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
745a4d6d3b8SXin LI 	posix_spawn_file_actions_adddup2(fa, fd, i);
746a4d6d3b8SXin LI 	posix_spawn_file_actions_addclose(fa, fd);
747a4d6d3b8SXin LI #else
74848c779cdSXin LI 	if (dup2(fd, i) == -1) {
74948c779cdSXin LI 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
750b6cee71dSXin LI 		exit(1);
7513e41d09dSXin LI 	}
752a4d6d3b8SXin LI 	close(v ? fd : fd);
753a4d6d3b8SXin LI #endif
754a4d6d3b8SXin LI }
755a4d6d3b8SXin LI 
756a4d6d3b8SXin LI static void
757a4d6d3b8SXin LI closedesc(void *v, int fd)
758a4d6d3b8SXin LI {
759a4d6d3b8SXin LI #ifdef HAVE_POSIX_SPAWNP
760a4d6d3b8SXin LI 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
761a4d6d3b8SXin LI 	posix_spawn_file_actions_addclose(fa, fd);
762a4d6d3b8SXin LI #else
763a4d6d3b8SXin LI 	close(v ? fd : fd);
764a4d6d3b8SXin LI #endif
765a4d6d3b8SXin LI }
766a4d6d3b8SXin LI 
767a4d6d3b8SXin LI static void
768a4d6d3b8SXin LI handledesc(void *v, int fd, int fdp[3][2])
769a4d6d3b8SXin LI {
770a4d6d3b8SXin LI 	if (fd != -1) {
771a4d6d3b8SXin LI 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
772a4d6d3b8SXin LI 		movedesc(v, STDIN_FILENO, fd);
773a4d6d3b8SXin LI 	} else {
774a4d6d3b8SXin LI 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
775a4d6d3b8SXin LI 		if (fdp[STDIN_FILENO][1] > 2)
776a4d6d3b8SXin LI 		    closedesc(v, fdp[STDIN_FILENO][1]);
777a4d6d3b8SXin LI 	}
778a4d6d3b8SXin LI 
779a4d6d3b8SXin LI 	file_clear_closexec(STDIN_FILENO);
780a4d6d3b8SXin LI 
781a4d6d3b8SXin LI ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
782a4d6d3b8SXin LI 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
783a4d6d3b8SXin LI 	if (fdp[STDOUT_FILENO][0] > 2)
784a4d6d3b8SXin LI 		closedesc(v, fdp[STDOUT_FILENO][0]);
785a4d6d3b8SXin LI 
786a4d6d3b8SXin LI 	file_clear_closexec(STDOUT_FILENO);
787a4d6d3b8SXin LI 
788a4d6d3b8SXin LI 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
789a4d6d3b8SXin LI 	if (fdp[STDERR_FILENO][0] > 2)
790a4d6d3b8SXin LI 		closedesc(v, fdp[STDERR_FILENO][0]);
791a4d6d3b8SXin LI 
792a4d6d3b8SXin LI 	file_clear_closexec(STDERR_FILENO);
7933e41d09dSXin LI }
794b6cee71dSXin LI 
79548c779cdSXin LI static pid_t
79648c779cdSXin LI writechild(int fd, const void *old, size_t n)
7973e41d09dSXin LI {
79848c779cdSXin LI 	pid_t pid;
7993e41d09dSXin LI 
800b6cee71dSXin LI 	/*
801b6cee71dSXin LI 	 * fork again, to avoid blocking because both
802b6cee71dSXin LI 	 * pipes filled
803b6cee71dSXin LI 	 */
80448c779cdSXin LI 	pid = fork();
80548c779cdSXin LI 	if (pid == -1) {
80648c779cdSXin LI 		DPRINTF("Fork failed (%s)\n", strerror(errno));
80748c779cdSXin LI 		exit(1);
80848c779cdSXin LI 	}
80948c779cdSXin LI 	if (pid == 0) {
81048c779cdSXin LI 		/* child */
81148c779cdSXin LI 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
8123e41d09dSXin LI 			DPRINTF("Write failed (%s)\n", strerror(errno));
813b6cee71dSXin LI 			exit(1);
814b6cee71dSXin LI 		}
815b6cee71dSXin LI 		exit(0);
816c2931133SXin LI 	}
81748c779cdSXin LI 	/* parent */
81848c779cdSXin LI 	return pid;
819b6cee71dSXin LI }
820b6cee71dSXin LI 
8213e41d09dSXin LI static ssize_t
8223e41d09dSXin LI filter_error(unsigned char *ubuf, ssize_t n)
8233e41d09dSXin LI {
8243e41d09dSXin LI 	char *p;
8253e41d09dSXin LI 	char *buf;
826c2931133SXin LI 
8273e41d09dSXin LI 	ubuf[n] = '\0';
82848c779cdSXin LI 	buf = RCAST(char *, ubuf);
82948c779cdSXin LI 	while (isspace(CAST(unsigned char, *buf)))
8303e41d09dSXin LI 		buf++;
8313e41d09dSXin LI 	DPRINTF("Filter error[[[%s]]]\n", buf);
83248c779cdSXin LI 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
8333e41d09dSXin LI 		*p = '\0';
83448c779cdSXin LI 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
8353e41d09dSXin LI 		*p = '\0';
83648c779cdSXin LI 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
8373e41d09dSXin LI 		++p;
83848c779cdSXin LI 		while (isspace(CAST(unsigned char, *p)))
8393e41d09dSXin LI 			p++;
8403e41d09dSXin LI 		n = strlen(p);
84140427ccaSGordon Tetlow 		memmove(ubuf, p, CAST(size_t, n + 1));
8423e41d09dSXin LI 	}
8433e41d09dSXin LI 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
8443e41d09dSXin LI 	if (islower(*ubuf))
8453e41d09dSXin LI 		*ubuf = toupper(*ubuf);
846b6cee71dSXin LI 	return n;
847b6cee71dSXin LI }
8483e41d09dSXin LI 
8493e41d09dSXin LI private const char *
8503e41d09dSXin LI methodname(size_t method)
8513e41d09dSXin LI {
852d38c30c0SXin LI 	switch (method) {
8533e41d09dSXin LI #ifdef BUILTIN_DECOMPRESS
854d38c30c0SXin LI 	case METH_FROZEN:
855d38c30c0SXin LI 	case METH_ZLIB:
8563e41d09dSXin LI 		return "zlib";
8573e41d09dSXin LI #endif
858d38c30c0SXin LI #ifdef BUILTIN_BZLIB
859d38c30c0SXin LI 	case METH_BZIP:
860d38c30c0SXin LI 		return "bzlib";
861d38c30c0SXin LI #endif
862d38c30c0SXin LI #ifdef BUILTIN_XZLIB
863d38c30c0SXin LI 	case METH_XZ:
864d38c30c0SXin LI 	case METH_LZMA:
865d38c30c0SXin LI 		return "xzlib";
866d38c30c0SXin LI #endif
867d38c30c0SXin LI 	default:
8683e41d09dSXin LI 		return compr[method].argv[0];
8693e41d09dSXin LI 	}
870d38c30c0SXin LI }
8713e41d09dSXin LI 
8723e41d09dSXin LI private int
8733e41d09dSXin LI uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
8743e41d09dSXin LI     unsigned char **newch, size_t* n)
8753e41d09dSXin LI {
8763e41d09dSXin LI 	int fdp[3][2];
87748c779cdSXin LI 	int status, rv, w;
87848c779cdSXin LI 	pid_t pid;
87948c779cdSXin LI 	pid_t writepid = -1;
8803e41d09dSXin LI 	size_t i;
8813e41d09dSXin LI 	ssize_t r;
882a4d6d3b8SXin LI 	char *const *args;
883a4d6d3b8SXin LI #ifdef HAVE_POSIX_SPAWNP
884a4d6d3b8SXin LI 	posix_spawn_file_actions_t fa;
885a4d6d3b8SXin LI #endif
8863e41d09dSXin LI 
887d38c30c0SXin LI 	switch (method) {
8883e41d09dSXin LI #ifdef BUILTIN_DECOMPRESS
889d38c30c0SXin LI 	case METH_FROZEN:
8903e41d09dSXin LI 		return uncompressgzipped(old, newch, bytes_max, n);
891d38c30c0SXin LI 	case METH_ZLIB:
8923e41d09dSXin LI 		return uncompresszlib(old, newch, bytes_max, n, 1);
8933e41d09dSXin LI #endif
894d38c30c0SXin LI #ifdef BUILTIN_BZLIB
895d38c30c0SXin LI 	case METH_BZIP:
896d38c30c0SXin LI 		return uncompressbzlib(old, newch, bytes_max, n);
897d38c30c0SXin LI #endif
898d38c30c0SXin LI #ifdef BUILTIN_XZLIB
899d38c30c0SXin LI 	case METH_XZ:
900d38c30c0SXin LI 	case METH_LZMA:
901d38c30c0SXin LI 		return uncompressxzlib(old, newch, bytes_max, n);
902d38c30c0SXin LI #endif
903d38c30c0SXin LI 	default:
904d38c30c0SXin LI 		break;
905d38c30c0SXin LI 	}
906d38c30c0SXin LI 
9073e41d09dSXin LI 	(void)fflush(stdout);
9083e41d09dSXin LI 	(void)fflush(stderr);
9093e41d09dSXin LI 
9103e41d09dSXin LI 	for (i = 0; i < __arraycount(fdp); i++)
9113e41d09dSXin LI 		fdp[i][0] = fdp[i][1] = -1;
9123e41d09dSXin LI 
91343a5ec4eSXin LI 	/*
91443a5ec4eSXin LI 	 * There are multithreaded users who run magic_file()
91543a5ec4eSXin LI 	 * from dozens of threads. If two parallel magic_file() calls
91643a5ec4eSXin LI 	 * analyze two large compressed files, both will spawn
91743a5ec4eSXin LI 	 * an uncompressing child here, which writes out uncompressed data.
91843a5ec4eSXin LI 	 * We read some portion, then close the pipe, then waitpid() the child.
91943a5ec4eSXin LI 	 * If uncompressed data is larger, child shound get EPIPE and exit.
92043a5ec4eSXin LI 	 * However, with *parallel* calls OTHER child may unintentionally
92143a5ec4eSXin LI 	 * inherit pipe fds, thus keeping pipe open and making writes in
92243a5ec4eSXin LI 	 * our child block instead of failing with EPIPE!
92343a5ec4eSXin LI 	 * (For the bug to occur, two threads must mutually inherit their pipes,
92443a5ec4eSXin LI 	 * and both must have large outputs. Thus it happens not that often).
92543a5ec4eSXin LI 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
92643a5ec4eSXin LI 	 */
92743a5ec4eSXin LI 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
92843a5ec4eSXin LI 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
92943a5ec4eSXin LI 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
9303e41d09dSXin LI 		closep(fdp[STDIN_FILENO]);
9313e41d09dSXin LI 		closep(fdp[STDOUT_FILENO]);
9323e41d09dSXin LI 		return makeerror(newch, n, "Cannot create pipe, %s",
9333e41d09dSXin LI 		    strerror(errno));
9343e41d09dSXin LI 	}
9353e41d09dSXin LI 
936a4d6d3b8SXin LI 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
937a4d6d3b8SXin LI #ifdef HAVE_POSIX_SPAWNP
938a4d6d3b8SXin LI 	posix_spawn_file_actions_init(&fa);
939a4d6d3b8SXin LI 
940a4d6d3b8SXin LI 	handledesc(&fa, fd, fdp);
941a4d6d3b8SXin LI 
942a4d6d3b8SXin LI 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
943a4d6d3b8SXin LI 	    args, NULL);
944a4d6d3b8SXin LI 
945a4d6d3b8SXin LI 	posix_spawn_file_actions_destroy(&fa);
946a4d6d3b8SXin LI 
947a4d6d3b8SXin LI 	if (status == -1) {
948a4d6d3b8SXin LI 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
949a4d6d3b8SXin LI 		    compr[method].argv[0], strerror(errno));
950a4d6d3b8SXin LI 	}
951a4d6d3b8SXin LI #else
95248c779cdSXin LI 	/* For processes with large mapped virtual sizes, vfork
95348c779cdSXin LI 	 * may be _much_ faster (10-100 times) than fork.
95448c779cdSXin LI 	 */
95548c779cdSXin LI 	pid = vfork();
95648c779cdSXin LI 	if (pid == -1) {
95748c779cdSXin LI 		return makeerror(newch, n, "Cannot vfork, %s",
95848c779cdSXin LI 		    strerror(errno));
95948c779cdSXin LI 	}
96048c779cdSXin LI 	if (pid == 0) {
96148c779cdSXin LI 		/* child */
96248c779cdSXin LI 		/* Note: we are after vfork, do not modify memory
96348c779cdSXin LI 		 * in a way which confuses parent. In particular,
96448c779cdSXin LI 		 * do not modify fdp[i][j].
96548c779cdSXin LI 		 */
966a4d6d3b8SXin LI 		handledesc(NULL, fd, fdp);
96743a5ec4eSXin LI 
968a4d6d3b8SXin LI 		(void)execvp(compr[method].argv[0], args);
9693e41d09dSXin LI 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
9703e41d09dSXin LI 		    compr[method].argv[0], strerror(errno));
97148c779cdSXin LI 		_exit(1); /* _exit(), not exit(), because of vfork */
97248c779cdSXin LI 	}
973a4d6d3b8SXin LI #endif
97448c779cdSXin LI 	/* parent */
97548c779cdSXin LI 	/* Close write sides of child stdout/err pipes */
9763e41d09dSXin LI 	for (i = 1; i < __arraycount(fdp); i++)
9773e41d09dSXin LI 		closefd(fdp[i], 1);
97848c779cdSXin LI 	/* Write the buffer data to child stdin, if we don't have fd */
97948c779cdSXin LI 	if (fd == -1) {
98048c779cdSXin LI 		closefd(fdp[STDIN_FILENO], 0);
98148c779cdSXin LI 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
98248c779cdSXin LI 		closefd(fdp[STDIN_FILENO], 1);
98348c779cdSXin LI 	}
9843e41d09dSXin LI 
9853e41d09dSXin LI 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
9863e41d09dSXin LI 	if (*newch == NULL) {
9873e41d09dSXin LI 		rv = makeerror(newch, n, "No buffer, %s",
9883e41d09dSXin LI 		    strerror(errno));
9893e41d09dSXin LI 		goto err;
9903e41d09dSXin LI 	}
9913e41d09dSXin LI 	rv = OKDATA;
992*a2dfb722SXin LI 	errno = 0;
99348c779cdSXin LI 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
994*a2dfb722SXin LI 	if (r == 0 && errno == 0)
995*a2dfb722SXin LI 		goto ok;
99648c779cdSXin LI 	if (r <= 0) {
9973e41d09dSXin LI 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
9983e41d09dSXin LI 		    r != -1 ? strerror(errno) : "no data");
9993e41d09dSXin LI 
10003e41d09dSXin LI 		rv = ERRDATA;
10013e41d09dSXin LI 		if (r == 0 &&
10023e41d09dSXin LI 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
10033e41d09dSXin LI 		{
10043e41d09dSXin LI 			r = filter_error(*newch, r);
100548c779cdSXin LI 			goto ok;
10063e41d09dSXin LI 		}
10073e41d09dSXin LI 		free(*newch);
10083e41d09dSXin LI 		if  (r == 0)
10093e41d09dSXin LI 			rv = makeerror(newch, n, "Read failed, %s",
10103e41d09dSXin LI 			    strerror(errno));
10113e41d09dSXin LI 		else
10123e41d09dSXin LI 			rv = makeerror(newch, n, "No data");
10133e41d09dSXin LI 		goto err;
10143e41d09dSXin LI 	}
101548c779cdSXin LI ok:
10163e41d09dSXin LI 	*n = r;
10173e41d09dSXin LI 	/* NUL terminate, as every buffer is handled here. */
10183e41d09dSXin LI 	(*newch)[*n] = '\0';
10193e41d09dSXin LI err:
10203e41d09dSXin LI 	closefd(fdp[STDIN_FILENO], 1);
10213e41d09dSXin LI 	closefd(fdp[STDOUT_FILENO], 0);
10223e41d09dSXin LI 	closefd(fdp[STDERR_FILENO], 0);
102348c779cdSXin LI 
102448c779cdSXin LI 	w = waitpid(pid, &status, 0);
102548c779cdSXin LI wait_err:
102648c779cdSXin LI 	if (w == -1) {
10273e41d09dSXin LI 		free(*newch);
10283e41d09dSXin LI 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
10293e41d09dSXin LI 		DPRINTF("Child wait return %#x\n", status);
10303e41d09dSXin LI 	} else if (!WIFEXITED(status)) {
103140427ccaSGordon Tetlow 		DPRINTF("Child not exited (%#x)\n", status);
10323e41d09dSXin LI 	} else if (WEXITSTATUS(status) != 0) {
103340427ccaSGordon Tetlow 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
10343e41d09dSXin LI 	}
103548c779cdSXin LI 	if (writepid > 0) {
103648c779cdSXin LI 		/* _After_ we know decompressor has exited, our input writer
103748c779cdSXin LI 		 * definitely will exit now (at worst, writing fails in it,
103848c779cdSXin LI 		 * since output fd is closed now on the reading size).
103948c779cdSXin LI 		 */
104048c779cdSXin LI 		w = waitpid(writepid, &status, 0);
104148c779cdSXin LI 		writepid = -1;
104248c779cdSXin LI 		goto wait_err;
104348c779cdSXin LI 	}
10443e41d09dSXin LI 
104548c779cdSXin LI 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
104648c779cdSXin LI 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
10473e41d09dSXin LI 
10483e41d09dSXin LI 	return rv;
1049b6cee71dSXin LI }
1050b6cee71dSXin LI #endif
1051