xref: /freebsd/contrib/file/src/compress.c (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1b6cee71dSXin LI /*
2b6cee71dSXin LI  * Copyright (c) Ian F. Darwin 1986-1995.
3b6cee71dSXin LI  * Software written by Ian F. Darwin and others;
4b6cee71dSXin LI  * maintained 1995-present by Christos Zoulas and others.
5b6cee71dSXin LI  *
6b6cee71dSXin LI  * Redistribution and use in source and binary forms, with or without
7b6cee71dSXin LI  * modification, are permitted provided that the following conditions
8b6cee71dSXin LI  * are met:
9b6cee71dSXin LI  * 1. Redistributions of source code must retain the above copyright
10b6cee71dSXin LI  *    notice immediately at the beginning of the file, without modification,
11b6cee71dSXin LI  *    this list of conditions, and the following disclaimer.
12b6cee71dSXin LI  * 2. Redistributions in binary form must reproduce the above copyright
13b6cee71dSXin LI  *    notice, this list of conditions and the following disclaimer in the
14b6cee71dSXin LI  *    documentation and/or other materials provided with the distribution.
15b6cee71dSXin LI  *
16b6cee71dSXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17b6cee71dSXin LI  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18b6cee71dSXin LI  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19b6cee71dSXin LI  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20b6cee71dSXin LI  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21b6cee71dSXin LI  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22b6cee71dSXin LI  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23b6cee71dSXin LI  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24b6cee71dSXin LI  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25b6cee71dSXin LI  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26b6cee71dSXin LI  * SUCH DAMAGE.
27b6cee71dSXin LI  */
28b6cee71dSXin LI /*
29b6cee71dSXin LI  * compress routines:
30b6cee71dSXin LI  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31b6cee71dSXin LI  *		   information if recognized
32b6cee71dSXin LI  *	uncompress(method, old, n, newch) - uncompress old into new,
33b6cee71dSXin LI  *					    using method, return sizeof new
34b6cee71dSXin LI  */
35b6cee71dSXin LI #include "file.h"
36b6cee71dSXin LI 
37b6cee71dSXin LI #ifndef lint
38*43a5ec4eSXin LI FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
39b6cee71dSXin LI #endif
40b6cee71dSXin LI 
41b6cee71dSXin LI #include "magic.h"
42b6cee71dSXin LI #include <stdlib.h>
43b6cee71dSXin LI #ifdef HAVE_UNISTD_H
44b6cee71dSXin LI #include <unistd.h>
45b6cee71dSXin LI #endif
46b6cee71dSXin LI #include <string.h>
47b6cee71dSXin LI #include <errno.h>
483e41d09dSXin LI #include <ctype.h>
493e41d09dSXin LI #include <stdarg.h>
504460e5b0SXin LI #include <signal.h>
515f0216bdSXin LI #ifndef HAVE_SIG_T
525f0216bdSXin LI typedef void (*sig_t)(int);
535f0216bdSXin LI #endif /* HAVE_SIG_T */
542726a701SXin LI #if !defined(__MINGW32__) && !defined(WIN32) && !defined(__MINGW64__)
55b6cee71dSXin LI #include <sys/ioctl.h>
56b6cee71dSXin LI #endif
57b6cee71dSXin LI #ifdef HAVE_SYS_WAIT_H
58b6cee71dSXin LI #include <sys/wait.h>
59b6cee71dSXin LI #endif
60b6cee71dSXin LI #if defined(HAVE_SYS_TIME_H)
61b6cee71dSXin LI #include <sys/time.h>
62b6cee71dSXin LI #endif
6348c779cdSXin LI 
6440427ccaSGordon Tetlow #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
65b6cee71dSXin LI #define BUILTIN_DECOMPRESS
66b6cee71dSXin LI #include <zlib.h>
673e41d09dSXin LI #endif
6848c779cdSXin LI 
692726a701SXin LI #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
7048c779cdSXin LI #define BUILTIN_BZLIB
7148c779cdSXin LI #include <bzlib.h>
7248c779cdSXin LI #endif
7348c779cdSXin LI 
74*43a5ec4eSXin LI #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
75d38c30c0SXin LI #define BUILTIN_XZLIB
76d38c30c0SXin LI #include <lzma.h>
77d38c30c0SXin LI #endif
78d38c30c0SXin LI 
793e41d09dSXin LI #ifdef DEBUG
803e41d09dSXin LI int tty = -1;
813e41d09dSXin LI #define DPRINTF(...)	do { \
823e41d09dSXin LI 	if (tty == -1) \
833e41d09dSXin LI 		tty = open("/dev/tty", O_RDWR); \
843e41d09dSXin LI 	if (tty == -1) \
853e41d09dSXin LI 		abort(); \
863e41d09dSXin LI 	dprintf(tty, __VA_ARGS__); \
873e41d09dSXin LI } while (/*CONSTCOND*/0)
883e41d09dSXin LI #else
893e41d09dSXin LI #define DPRINTF(...)
90b6cee71dSXin LI #endif
91b6cee71dSXin LI 
923e41d09dSXin LI #ifdef ZLIBSUPPORT
933e41d09dSXin LI /*
943e41d09dSXin LI  * The following python code is not really used because ZLIBSUPPORT is only
953e41d09dSXin LI  * defined if we have a built-in zlib, and the built-in zlib handles that.
9640427ccaSGordon Tetlow  * That is not true for android where we have zlib.h and not -lz.
973e41d09dSXin LI  */
983e41d09dSXin LI static const char zlibcode[] =
993e41d09dSXin LI     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
1003e41d09dSXin LI 
1013e41d09dSXin LI static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
1023e41d09dSXin LI 
1033e41d09dSXin LI static int
1043e41d09dSXin LI zlibcmp(const unsigned char *buf)
1053e41d09dSXin LI {
1063e41d09dSXin LI 	unsigned short x = 1;
10740427ccaSGordon Tetlow 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
1083e41d09dSXin LI 
1093e41d09dSXin LI 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
1103e41d09dSXin LI 		return 0;
1113e41d09dSXin LI 	if (s[0] != 1)	/* endianness test */
1123e41d09dSXin LI 		x = buf[0] | (buf[1] << 8);
1133e41d09dSXin LI 	else
1143e41d09dSXin LI 		x = buf[1] | (buf[0] << 8);
1153e41d09dSXin LI 	if (x % 31)
1163e41d09dSXin LI 		return 0;
1173e41d09dSXin LI 	return 1;
1183e41d09dSXin LI }
1193e41d09dSXin LI #endif
1203e41d09dSXin LI 
121d38c30c0SXin LI static int
122d38c30c0SXin LI lzmacmp(const unsigned char *buf)
123d38c30c0SXin LI {
124d38c30c0SXin LI 	if (buf[0] != 0x5d || buf[1] || buf[2])
125d38c30c0SXin LI 		return 0;
126d38c30c0SXin LI 	if (buf[12] && buf[12] != 0xff)
127d38c30c0SXin LI 		return 0;
128d38c30c0SXin LI 	return 1;
129d38c30c0SXin LI }
130d38c30c0SXin LI 
1313e41d09dSXin LI #define gzip_flags "-cd"
1323e41d09dSXin LI #define lrzip_flags "-do"
1333e41d09dSXin LI #define lzip_flags gzip_flags
1343e41d09dSXin LI 
1353e41d09dSXin LI static const char *gzip_args[] = {
1363e41d09dSXin LI 	"gzip", gzip_flags, NULL
1373e41d09dSXin LI };
1383e41d09dSXin LI static const char *uncompress_args[] = {
1393e41d09dSXin LI 	"uncompress", "-c", NULL
1403e41d09dSXin LI };
1413e41d09dSXin LI static const char *bzip2_args[] = {
1423e41d09dSXin LI 	"bzip2", "-cd", NULL
1433e41d09dSXin LI };
1443e41d09dSXin LI static const char *lzip_args[] = {
1453e41d09dSXin LI 	"lzip", lzip_flags, NULL
1463e41d09dSXin LI };
1473e41d09dSXin LI static const char *xz_args[] = {
1483e41d09dSXin LI 	"xz", "-cd", NULL
1493e41d09dSXin LI };
1503e41d09dSXin LI static const char *lrzip_args[] = {
1513e41d09dSXin LI 	"lrzip", lrzip_flags, NULL
1523e41d09dSXin LI };
1533e41d09dSXin LI static const char *lz4_args[] = {
1543e41d09dSXin LI 	"lz4", "-cd", NULL
155b6cee71dSXin LI };
156a5d223e6SXin LI static const char *zstd_args[] = {
157a5d223e6SXin LI 	"zstd", "-cd", NULL
158a5d223e6SXin LI };
159b6cee71dSXin LI 
16048c779cdSXin LI #define	do_zlib		NULL
16148c779cdSXin LI #define	do_bzlib	NULL
16248c779cdSXin LI 
1633e41d09dSXin LI private const struct {
1642726a701SXin LI 	union {
1652726a701SXin LI 		const char *magic;
1662726a701SXin LI 		int (*func)(const unsigned char *);
1672726a701SXin LI 	} u;
168d38c30c0SXin LI 	int maglen;
1693e41d09dSXin LI 	const char **argv;
17048c779cdSXin LI 	void *unused;
1713e41d09dSXin LI } compr[] = {
172d38c30c0SXin LI #define METH_FROZEN	2
173d38c30c0SXin LI #define METH_BZIP	7
174d38c30c0SXin LI #define METH_XZ		9
175d38c30c0SXin LI #define METH_LZMA	13
176d38c30c0SXin LI #define METH_ZLIB	14
1772726a701SXin LI     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
1783e41d09dSXin LI     /* Uncompress can get stuck; so use gzip first if we have it
1793e41d09dSXin LI      * Idea from Damien Clark, thanks! */
1802726a701SXin LI     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
1812726a701SXin LI     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
1822726a701SXin LI     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
1832726a701SXin LI     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
1843e41d09dSXin LI     /* the standard pack utilities do not accept standard input */
1852726a701SXin LI     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
1862726a701SXin LI     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
1873e41d09dSXin LI     /* ...only first file examined */
1882726a701SXin LI     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
1892726a701SXin LI     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
1902726a701SXin LI     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
1912726a701SXin LI     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
1922726a701SXin LI     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
1932726a701SXin LI     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
1942726a701SXin LI     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
1953e41d09dSXin LI #ifdef ZLIBSUPPORT
1962726a701SXin LI     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
1973e41d09dSXin LI #endif
1983e41d09dSXin LI };
1993e41d09dSXin LI 
2003e41d09dSXin LI #define OKDATA 	0
2013e41d09dSXin LI #define NODATA	1
2023e41d09dSXin LI #define ERRDATA	2
203b6cee71dSXin LI 
204b6cee71dSXin LI private ssize_t swrite(int, const void *, size_t);
205b6cee71dSXin LI #if HAVE_FORK
20648c779cdSXin LI private size_t ncompr = __arraycount(compr);
2073e41d09dSXin LI private int uncompressbuf(int, size_t, size_t, const unsigned char *,
2083e41d09dSXin LI     unsigned char **, size_t *);
209b6cee71dSXin LI #ifdef BUILTIN_DECOMPRESS
2103e41d09dSXin LI private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
2113e41d09dSXin LI     size_t *, int);
2123e41d09dSXin LI private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
2133e41d09dSXin LI     size_t *);
214b6cee71dSXin LI #endif
21548c779cdSXin LI #ifdef BUILTIN_BZLIB
21648c779cdSXin LI private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
217d38c30c0SXin LI     size_t *);
218d38c30c0SXin LI #endif
219d38c30c0SXin LI #ifdef BUILTIN_XZLIB
220d38c30c0SXin LI private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
221d38c30c0SXin LI     size_t *);
22248c779cdSXin LI #endif
22348c779cdSXin LI 
2243e41d09dSXin LI static int makeerror(unsigned char **, size_t *, const char *, ...)
2253e41d09dSXin LI     __attribute__((__format__(__printf__, 3, 4)));
2263e41d09dSXin LI private const char *methodname(size_t);
227b6cee71dSXin LI 
2282dc4dbb9SEitan Adler private int
2292dc4dbb9SEitan Adler format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
2302dc4dbb9SEitan Adler {
2312dc4dbb9SEitan Adler 	unsigned char *p;
2322dc4dbb9SEitan Adler 	int mime = ms->flags & MAGIC_MIME;
2332dc4dbb9SEitan Adler 
2342dc4dbb9SEitan Adler 	if (!mime)
2352dc4dbb9SEitan Adler 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
2362dc4dbb9SEitan Adler 
2372dc4dbb9SEitan Adler 	for (p = buf; *p; p++)
2382dc4dbb9SEitan Adler 		if (!isalnum(*p))
2392dc4dbb9SEitan Adler 			*p = '-';
2402dc4dbb9SEitan Adler 
2412dc4dbb9SEitan Adler 	return file_printf(ms, "application/x-decompression-error-%s-%s",
2422dc4dbb9SEitan Adler 	    methodname(i), buf);
2432dc4dbb9SEitan Adler }
2442dc4dbb9SEitan Adler 
245b6cee71dSXin LI protected int
24658a0f0d0SEitan Adler file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
247b6cee71dSXin LI {
248b6cee71dSXin LI 	unsigned char *newbuf = NULL;
249b6cee71dSXin LI 	size_t i, nsz;
2503e41d09dSXin LI 	char *rbuf;
2513e41d09dSXin LI 	file_pushbuf_t *pb;
25220f8619dSXin LI 	int urv, prv, rv = 0;
253b6cee71dSXin LI 	int mime = ms->flags & MAGIC_MIME;
25458a0f0d0SEitan Adler 	int fd = b->fd;
25548c779cdSXin LI 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
25658a0f0d0SEitan Adler 	size_t nbytes = b->flen;
25748c779cdSXin LI 	int sa_saved = 0;
25848c779cdSXin LI 	struct sigaction sig_act;
259b6cee71dSXin LI 
260b6cee71dSXin LI 	if ((ms->flags & MAGIC_COMPRESS) == 0)
261b6cee71dSXin LI 		return 0;
262b6cee71dSXin LI 
263b6cee71dSXin LI 	for (i = 0; i < ncompr; i++) {
2643e41d09dSXin LI 		int zm;
265d38c30c0SXin LI 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
266b6cee71dSXin LI 			continue;
267d38c30c0SXin LI 		if (compr[i].maglen < 0) {
2682726a701SXin LI 			zm = (*compr[i].u.func)(buf);
269d38c30c0SXin LI 		} else {
2702726a701SXin LI 			zm = memcmp(buf, compr[i].u.magic,
271d38c30c0SXin LI 			    CAST(size_t, compr[i].maglen)) == 0;
272d38c30c0SXin LI 		}
273b6cee71dSXin LI 
2743e41d09dSXin LI 		if (!zm)
2753e41d09dSXin LI 			continue;
27648c779cdSXin LI 
27748c779cdSXin LI 		/* Prevent SIGPIPE death if child dies unexpectedly */
27848c779cdSXin LI 		if (!sa_saved) {
27948c779cdSXin LI 			//We can use sig_act for both new and old, but
28048c779cdSXin LI 			struct sigaction new_act;
28148c779cdSXin LI 			memset(&new_act, 0, sizeof(new_act));
28248c779cdSXin LI 			new_act.sa_handler = SIG_IGN;
28348c779cdSXin LI 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
28448c779cdSXin LI 		}
28548c779cdSXin LI 
2863e41d09dSXin LI 		nsz = nbytes;
28720f8619dSXin LI 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
28848c779cdSXin LI 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
28948c779cdSXin LI 		    (char *)newbuf, nsz);
29020f8619dSXin LI 		switch (urv) {
2913e41d09dSXin LI 		case OKDATA:
2923e41d09dSXin LI 		case ERRDATA:
2933e41d09dSXin LI 			ms->flags &= ~MAGIC_COMPRESS;
29420f8619dSXin LI 			if (urv == ERRDATA)
2952dc4dbb9SEitan Adler 				prv = format_decompression_error(ms, i, newbuf);
2963e41d09dSXin LI 			else
29748c779cdSXin LI 				prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
29820f8619dSXin LI 			if (prv == -1)
2993e41d09dSXin LI 				goto error;
30020f8619dSXin LI 			rv = 1;
3013e41d09dSXin LI 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
3023e41d09dSXin LI 				goto out;
3033e41d09dSXin LI 			if (mime != MAGIC_MIME && mime != 0)
3043e41d09dSXin LI 				goto out;
3053e41d09dSXin LI 			if ((file_printf(ms,
3063e41d09dSXin LI 			    mime ? " compressed-encoding=" : " (")) == -1)
3073e41d09dSXin LI 				goto error;
3083e41d09dSXin LI 			if ((pb = file_push_buffer(ms)) == NULL)
309b6cee71dSXin LI 				goto error;
31020f8619dSXin LI 			/*
31120f8619dSXin LI 			 * XXX: If file_buffer fails here, we overwrite
31220f8619dSXin LI 			 * the compressed text. FIXME.
31320f8619dSXin LI 			 */
31448c779cdSXin LI 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
31548c779cdSXin LI 				if (file_pop_buffer(ms, pb) != NULL)
31648c779cdSXin LI 					abort();
317b6cee71dSXin LI 				goto error;
31848c779cdSXin LI 			}
3193e41d09dSXin LI 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
3203e41d09dSXin LI 				if (file_printf(ms, "%s", rbuf) == -1) {
3213e41d09dSXin LI 					free(rbuf);
322b6cee71dSXin LI 					goto error;
323b6cee71dSXin LI 				}
3243e41d09dSXin LI 				free(rbuf);
3253e41d09dSXin LI 			}
3263e41d09dSXin LI 			if (!mime && file_printf(ms, ")") == -1)
3273e41d09dSXin LI 				goto error;
32820f8619dSXin LI 			/*FALLTHROUGH*/
3293e41d09dSXin LI 		case NODATA:
33020f8619dSXin LI 			break;
3313e41d09dSXin LI 		default:
3323e41d09dSXin LI 			abort();
33320f8619dSXin LI 			/*NOTREACHED*/
33420f8619dSXin LI 		error:
33520f8619dSXin LI 			rv = -1;
33620f8619dSXin LI 			break;
3373e41d09dSXin LI 		}
3383e41d09dSXin LI 	}
3393e41d09dSXin LI out:
34020f8619dSXin LI 	DPRINTF("rv = %d\n", rv);
34120f8619dSXin LI 
34248c779cdSXin LI 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
34348c779cdSXin LI 		(void)sigaction(SIGPIPE, &sig_act, NULL);
34448c779cdSXin LI 
345b6cee71dSXin LI 	free(newbuf);
346b6cee71dSXin LI 	ms->flags |= MAGIC_COMPRESS;
3473e41d09dSXin LI 	DPRINTF("Zmagic returns %d\n", rv);
348b6cee71dSXin LI 	return rv;
349b6cee71dSXin LI }
350b6cee71dSXin LI #endif
351b6cee71dSXin LI /*
352b6cee71dSXin LI  * `safe' write for sockets and pipes.
353b6cee71dSXin LI  */
354b6cee71dSXin LI private ssize_t
355b6cee71dSXin LI swrite(int fd, const void *buf, size_t n)
356b6cee71dSXin LI {
357b6cee71dSXin LI 	ssize_t rv;
358b6cee71dSXin LI 	size_t rn = n;
359b6cee71dSXin LI 
360b6cee71dSXin LI 	do
361b6cee71dSXin LI 		switch (rv = write(fd, buf, n)) {
362b6cee71dSXin LI 		case -1:
363b6cee71dSXin LI 			if (errno == EINTR)
364b6cee71dSXin LI 				continue;
365b6cee71dSXin LI 			return -1;
366b6cee71dSXin LI 		default:
367b6cee71dSXin LI 			n -= rv;
368b6cee71dSXin LI 			buf = CAST(const char *, buf) + rv;
369b6cee71dSXin LI 			break;
370b6cee71dSXin LI 		}
371b6cee71dSXin LI 	while (n > 0);
372b6cee71dSXin LI 	return rn;
373b6cee71dSXin LI }
374b6cee71dSXin LI 
375b6cee71dSXin LI 
376b6cee71dSXin LI /*
377b6cee71dSXin LI  * `safe' read for sockets and pipes.
378b6cee71dSXin LI  */
379b6cee71dSXin LI protected ssize_t
380b6cee71dSXin LI sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
381b6cee71dSXin LI {
382b6cee71dSXin LI 	ssize_t rv;
383b6cee71dSXin LI #ifdef FIONREAD
384b6cee71dSXin LI 	int t = 0;
385b6cee71dSXin LI #endif
386b6cee71dSXin LI 	size_t rn = n;
387b6cee71dSXin LI 
388b6cee71dSXin LI 	if (fd == STDIN_FILENO)
389b6cee71dSXin LI 		goto nocheck;
390b6cee71dSXin LI 
391b6cee71dSXin LI #ifdef FIONREAD
392b6cee71dSXin LI 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
393b6cee71dSXin LI #ifdef FD_ZERO
394b6cee71dSXin LI 		ssize_t cnt;
395b6cee71dSXin LI 		for (cnt = 0;; cnt++) {
396b6cee71dSXin LI 			fd_set check;
397b6cee71dSXin LI 			struct timeval tout = {0, 100 * 1000};
398b6cee71dSXin LI 			int selrv;
399b6cee71dSXin LI 
400b6cee71dSXin LI 			FD_ZERO(&check);
401b6cee71dSXin LI 			FD_SET(fd, &check);
402b6cee71dSXin LI 
403b6cee71dSXin LI 			/*
404b6cee71dSXin LI 			 * Avoid soft deadlock: do not read if there
405b6cee71dSXin LI 			 * is nothing to read from sockets and pipes.
406b6cee71dSXin LI 			 */
407b6cee71dSXin LI 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
408b6cee71dSXin LI 			if (selrv == -1) {
409b6cee71dSXin LI 				if (errno == EINTR || errno == EAGAIN)
410b6cee71dSXin LI 					continue;
411b6cee71dSXin LI 			} else if (selrv == 0 && cnt >= 5) {
412b6cee71dSXin LI 				return 0;
413b6cee71dSXin LI 			} else
414b6cee71dSXin LI 				break;
415b6cee71dSXin LI 		}
416b6cee71dSXin LI #endif
417b6cee71dSXin LI 		(void)ioctl(fd, FIONREAD, &t);
418b6cee71dSXin LI 	}
419b6cee71dSXin LI 
42048c779cdSXin LI 	if (t > 0 && CAST(size_t, t) < n) {
421b6cee71dSXin LI 		n = t;
422b6cee71dSXin LI 		rn = n;
423b6cee71dSXin LI 	}
424b6cee71dSXin LI #endif
425b6cee71dSXin LI 
426b6cee71dSXin LI nocheck:
427b6cee71dSXin LI 	do
428b6cee71dSXin LI 		switch ((rv = read(fd, buf, n))) {
429b6cee71dSXin LI 		case -1:
430b6cee71dSXin LI 			if (errno == EINTR)
431b6cee71dSXin LI 				continue;
432b6cee71dSXin LI 			return -1;
433b6cee71dSXin LI 		case 0:
434b6cee71dSXin LI 			return rn - n;
435b6cee71dSXin LI 		default:
436b6cee71dSXin LI 			n -= rv;
437a5d223e6SXin LI 			buf = CAST(char *, CCAST(void *, buf)) + rv;
438b6cee71dSXin LI 			break;
439b6cee71dSXin LI 		}
440b6cee71dSXin LI 	while (n > 0);
441b6cee71dSXin LI 	return rn;
442b6cee71dSXin LI }
443b6cee71dSXin LI 
444b6cee71dSXin LI protected int
445b6cee71dSXin LI file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
446b6cee71dSXin LI     size_t nbytes)
447b6cee71dSXin LI {
448b6cee71dSXin LI 	char buf[4096];
449b6cee71dSXin LI 	ssize_t r;
450b6cee71dSXin LI 	int tfd;
451b6cee71dSXin LI 
452b6cee71dSXin LI 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
453b6cee71dSXin LI #ifndef HAVE_MKSTEMP
454b6cee71dSXin LI 	{
455b6cee71dSXin LI 		char *ptr = mktemp(buf);
456b6cee71dSXin LI 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
457b6cee71dSXin LI 		r = errno;
458b6cee71dSXin LI 		(void)unlink(ptr);
459b6cee71dSXin LI 		errno = r;
460b6cee71dSXin LI 	}
461b6cee71dSXin LI #else
462b6cee71dSXin LI 	{
463b6cee71dSXin LI 		int te;
46448c779cdSXin LI 		mode_t ou = umask(0);
465b6cee71dSXin LI 		tfd = mkstemp(buf);
46648c779cdSXin LI 		(void)umask(ou);
467b6cee71dSXin LI 		te = errno;
468b6cee71dSXin LI 		(void)unlink(buf);
469b6cee71dSXin LI 		errno = te;
470b6cee71dSXin LI 	}
471b6cee71dSXin LI #endif
472b6cee71dSXin LI 	if (tfd == -1) {
473b6cee71dSXin LI 		file_error(ms, errno,
474b6cee71dSXin LI 		    "cannot create temporary file for pipe copy");
475b6cee71dSXin LI 		return -1;
476b6cee71dSXin LI 	}
477b6cee71dSXin LI 
47848c779cdSXin LI 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
479b6cee71dSXin LI 		r = 1;
480b6cee71dSXin LI 	else {
481b6cee71dSXin LI 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
48248c779cdSXin LI 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
483b6cee71dSXin LI 				break;
484b6cee71dSXin LI 	}
485b6cee71dSXin LI 
486b6cee71dSXin LI 	switch (r) {
487b6cee71dSXin LI 	case -1:
488b6cee71dSXin LI 		file_error(ms, errno, "error copying from pipe to temp file");
489b6cee71dSXin LI 		return -1;
490b6cee71dSXin LI 	case 0:
491b6cee71dSXin LI 		break;
492b6cee71dSXin LI 	default:
493b6cee71dSXin LI 		file_error(ms, errno, "error while writing to temp file");
494b6cee71dSXin LI 		return -1;
495b6cee71dSXin LI 	}
496b6cee71dSXin LI 
497b6cee71dSXin LI 	/*
498b6cee71dSXin LI 	 * We duplicate the file descriptor, because fclose on a
499b6cee71dSXin LI 	 * tmpfile will delete the file, but any open descriptors
500b6cee71dSXin LI 	 * can still access the phantom inode.
501b6cee71dSXin LI 	 */
502b6cee71dSXin LI 	if ((fd = dup2(tfd, fd)) == -1) {
503b6cee71dSXin LI 		file_error(ms, errno, "could not dup descriptor for temp file");
504b6cee71dSXin LI 		return -1;
505b6cee71dSXin LI 	}
506b6cee71dSXin LI 	(void)close(tfd);
50748c779cdSXin LI 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
508b6cee71dSXin LI 		file_badseek(ms);
509b6cee71dSXin LI 		return -1;
510b6cee71dSXin LI 	}
511b6cee71dSXin LI 	return fd;
512b6cee71dSXin LI }
513b6cee71dSXin LI #if HAVE_FORK
514b6cee71dSXin LI #ifdef BUILTIN_DECOMPRESS
515b6cee71dSXin LI 
516b6cee71dSXin LI #define FHCRC		(1 << 1)
517b6cee71dSXin LI #define FEXTRA		(1 << 2)
518b6cee71dSXin LI #define FNAME		(1 << 3)
519b6cee71dSXin LI #define FCOMMENT	(1 << 4)
520b6cee71dSXin LI 
5213e41d09dSXin LI 
5223e41d09dSXin LI private int
5233e41d09dSXin LI uncompressgzipped(const unsigned char *old, unsigned char **newch,
5243e41d09dSXin LI     size_t bytes_max, size_t *n)
525b6cee71dSXin LI {
526b6cee71dSXin LI 	unsigned char flg = old[3];
527b6cee71dSXin LI 	size_t data_start = 10;
528b6cee71dSXin LI 
529b6cee71dSXin LI 	if (flg & FEXTRA) {
5303e41d09dSXin LI 		if (data_start + 1 >= *n)
5313e41d09dSXin LI 			goto err;
532b6cee71dSXin LI 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
533b6cee71dSXin LI 	}
534b6cee71dSXin LI 	if (flg & FNAME) {
5353e41d09dSXin LI 		while(data_start < *n && old[data_start])
536b6cee71dSXin LI 			data_start++;
537b6cee71dSXin LI 		data_start++;
538b6cee71dSXin LI 	}
539b6cee71dSXin LI 	if (flg & FCOMMENT) {
5403e41d09dSXin LI 		while(data_start < *n && old[data_start])
541b6cee71dSXin LI 			data_start++;
542b6cee71dSXin LI 		data_start++;
543b6cee71dSXin LI 	}
544b6cee71dSXin LI 	if (flg & FHCRC)
545b6cee71dSXin LI 		data_start += 2;
546b6cee71dSXin LI 
5473e41d09dSXin LI 	if (data_start >= *n)
5483e41d09dSXin LI 		goto err;
5493e41d09dSXin LI 
5503e41d09dSXin LI 	*n -= data_start;
5513e41d09dSXin LI 	old += data_start;
5523e41d09dSXin LI 	return uncompresszlib(old, newch, bytes_max, n, 0);
5533e41d09dSXin LI err:
5543e41d09dSXin LI 	return makeerror(newch, n, "File too short");
555b6cee71dSXin LI }
556b6cee71dSXin LI 
5573e41d09dSXin LI private int
5583e41d09dSXin LI uncompresszlib(const unsigned char *old, unsigned char **newch,
5593e41d09dSXin LI     size_t bytes_max, size_t *n, int zlib)
5603e41d09dSXin LI {
5613e41d09dSXin LI 	int rc;
5623e41d09dSXin LI 	z_stream z;
5633e41d09dSXin LI 
5643e41d09dSXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
5653e41d09dSXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
5663e41d09dSXin LI 
5673e41d09dSXin LI 	z.next_in = CCAST(Bytef *, old);
5683e41d09dSXin LI 	z.avail_in = CAST(uint32_t, *n);
569b6cee71dSXin LI 	z.next_out = *newch;
57040427ccaSGordon Tetlow 	z.avail_out = CAST(unsigned int, bytes_max);
571b6cee71dSXin LI 	z.zalloc = Z_NULL;
572b6cee71dSXin LI 	z.zfree = Z_NULL;
573b6cee71dSXin LI 	z.opaque = Z_NULL;
574b6cee71dSXin LI 
575b6cee71dSXin LI 	/* LINTED bug in header macro */
5763e41d09dSXin LI 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
5773e41d09dSXin LI 	if (rc != Z_OK)
5783e41d09dSXin LI 		goto err;
579b6cee71dSXin LI 
580b6cee71dSXin LI 	rc = inflate(&z, Z_SYNC_FLUSH);
5813e41d09dSXin LI 	if (rc != Z_OK && rc != Z_STREAM_END)
5823e41d09dSXin LI 		goto err;
583b6cee71dSXin LI 
58448c779cdSXin LI 	*n = CAST(size_t, z.total_out);
5853e41d09dSXin LI 	rc = inflateEnd(&z);
5863e41d09dSXin LI 	if (rc != Z_OK)
5873e41d09dSXin LI 		goto err;
588b6cee71dSXin LI 
589b6cee71dSXin LI 	/* let's keep the nul-terminate tradition */
5903e41d09dSXin LI 	(*newch)[*n] = '\0';
591b6cee71dSXin LI 
5923e41d09dSXin LI 	return OKDATA;
5933e41d09dSXin LI err:
59448c779cdSXin LI 	strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
59548c779cdSXin LI 	*n = strlen(RCAST(char *, *newch));
5963e41d09dSXin LI 	return ERRDATA;
597b6cee71dSXin LI }
598b6cee71dSXin LI #endif
599b6cee71dSXin LI 
600d38c30c0SXin LI #ifdef BUILTIN_BZLIB
601d38c30c0SXin LI private int
602d38c30c0SXin LI uncompressbzlib(const unsigned char *old, unsigned char **newch,
603d38c30c0SXin LI     size_t bytes_max, size_t *n)
604d38c30c0SXin LI {
605d38c30c0SXin LI 	int rc;
606d38c30c0SXin LI 	bz_stream bz;
607d38c30c0SXin LI 
608d38c30c0SXin LI 	memset(&bz, 0, sizeof(bz));
609d38c30c0SXin LI 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
610d38c30c0SXin LI 	if (rc != BZ_OK)
611d38c30c0SXin LI 		goto err;
612d38c30c0SXin LI 
613d38c30c0SXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
614d38c30c0SXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
615d38c30c0SXin LI 
616d38c30c0SXin LI 	bz.next_in = CCAST(char *, RCAST(const char *, old));
617d38c30c0SXin LI 	bz.avail_in = CAST(uint32_t, *n);
618d38c30c0SXin LI 	bz.next_out = RCAST(char *, *newch);
619d38c30c0SXin LI 	bz.avail_out = CAST(unsigned int, bytes_max);
620d38c30c0SXin LI 
621d38c30c0SXin LI 	rc = BZ2_bzDecompress(&bz);
622d38c30c0SXin LI 	if (rc != BZ_OK && rc != BZ_STREAM_END)
623d38c30c0SXin LI 		goto err;
624d38c30c0SXin LI 
625d38c30c0SXin LI 	/* Assume byte_max is within 32bit */
626d38c30c0SXin LI 	/* assert(bz.total_out_hi32 == 0); */
627d38c30c0SXin LI 	*n = CAST(size_t, bz.total_out_lo32);
628d38c30c0SXin LI 	rc = BZ2_bzDecompressEnd(&bz);
629d38c30c0SXin LI 	if (rc != BZ_OK)
630d38c30c0SXin LI 		goto err;
631d38c30c0SXin LI 
632d38c30c0SXin LI 	/* let's keep the nul-terminate tradition */
633d38c30c0SXin LI 	(*newch)[*n] = '\0';
634d38c30c0SXin LI 
635d38c30c0SXin LI 	return OKDATA;
636d38c30c0SXin LI err:
637d38c30c0SXin LI 	snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
638d38c30c0SXin LI 	*n = strlen(RCAST(char *, *newch));
639d38c30c0SXin LI 	return ERRDATA;
640d38c30c0SXin LI }
641d38c30c0SXin LI #endif
642d38c30c0SXin LI 
643d38c30c0SXin LI #ifdef BUILTIN_XZLIB
644d38c30c0SXin LI private int
645d38c30c0SXin LI uncompressxzlib(const unsigned char *old, unsigned char **newch,
646d38c30c0SXin LI     size_t bytes_max, size_t *n)
647d38c30c0SXin LI {
648d38c30c0SXin LI 	int rc;
649d38c30c0SXin LI 	lzma_stream xz;
650d38c30c0SXin LI 
651d38c30c0SXin LI 	memset(&xz, 0, sizeof(xz));
652d38c30c0SXin LI 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
653d38c30c0SXin LI 	if (rc != LZMA_OK)
654d38c30c0SXin LI 		goto err;
655d38c30c0SXin LI 
656d38c30c0SXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
657d38c30c0SXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
658d38c30c0SXin LI 
659d38c30c0SXin LI 	xz.next_in = CCAST(const uint8_t *, old);
660d38c30c0SXin LI 	xz.avail_in = CAST(uint32_t, *n);
661d38c30c0SXin LI 	xz.next_out = RCAST(uint8_t *, *newch);
662d38c30c0SXin LI 	xz.avail_out = CAST(unsigned int, bytes_max);
663d38c30c0SXin LI 
664d38c30c0SXin LI 	rc = lzma_code(&xz, LZMA_RUN);
665d38c30c0SXin LI 	if (rc != LZMA_OK && rc != LZMA_STREAM_END)
666d38c30c0SXin LI 		goto err;
667d38c30c0SXin LI 
668d38c30c0SXin LI 	*n = CAST(size_t, xz.total_out);
669d38c30c0SXin LI 
670d38c30c0SXin LI 	lzma_end(&xz);
671d38c30c0SXin LI 
672d38c30c0SXin LI 	/* let's keep the nul-terminate tradition */
673d38c30c0SXin LI 	(*newch)[*n] = '\0';
674d38c30c0SXin LI 
675d38c30c0SXin LI 	return OKDATA;
676d38c30c0SXin LI err:
677d38c30c0SXin LI 	snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
678d38c30c0SXin LI 	*n = strlen(RCAST(char *, *newch));
679d38c30c0SXin LI 	return ERRDATA;
680d38c30c0SXin LI }
681d38c30c0SXin LI #endif
682d38c30c0SXin LI 
683d38c30c0SXin LI 
6843e41d09dSXin LI static int
6853e41d09dSXin LI makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
686b6cee71dSXin LI {
6873e41d09dSXin LI 	char *msg;
6883e41d09dSXin LI 	va_list ap;
6893e41d09dSXin LI 	int rv;
690b6cee71dSXin LI 
6913e41d09dSXin LI 	va_start(ap, fmt);
6923e41d09dSXin LI 	rv = vasprintf(&msg, fmt, ap);
6933e41d09dSXin LI 	va_end(ap);
6943e41d09dSXin LI 	if (rv < 0) {
6953e41d09dSXin LI 		*buf = NULL;
6963e41d09dSXin LI 		*len = 0;
697b6cee71dSXin LI 		return NODATA;
698b6cee71dSXin LI 	}
69948c779cdSXin LI 	*buf = RCAST(unsigned char *, msg);
7003e41d09dSXin LI 	*len = strlen(msg);
7013e41d09dSXin LI 	return ERRDATA;
702b6cee71dSXin LI }
703b6cee71dSXin LI 
7043e41d09dSXin LI static void
7053e41d09dSXin LI closefd(int *fd, size_t i)
7063e41d09dSXin LI {
7073e41d09dSXin LI 	if (fd[i] == -1)
7083e41d09dSXin LI 		return;
7093e41d09dSXin LI 	(void) close(fd[i]);
7103e41d09dSXin LI 	fd[i] = -1;
7113e41d09dSXin LI }
712b6cee71dSXin LI 
7133e41d09dSXin LI static void
7143e41d09dSXin LI closep(int *fd)
7153e41d09dSXin LI {
7163e41d09dSXin LI 	size_t i;
7173e41d09dSXin LI 	for (i = 0; i < 2; i++)
7183e41d09dSXin LI 		closefd(fd, i);
7193e41d09dSXin LI }
7203e41d09dSXin LI 
72148c779cdSXin LI static int
72248c779cdSXin LI copydesc(int i, int fd)
7233e41d09dSXin LI {
72448c779cdSXin LI 	if (fd == i)
72548c779cdSXin LI 		return 0; /* "no dup was necessary" */
72648c779cdSXin LI 	if (dup2(fd, i) == -1) {
72748c779cdSXin LI 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
728b6cee71dSXin LI 		exit(1);
7293e41d09dSXin LI 	}
73048c779cdSXin LI 	return 1;
7313e41d09dSXin LI }
732b6cee71dSXin LI 
73348c779cdSXin LI static pid_t
73448c779cdSXin LI writechild(int fd, const void *old, size_t n)
7353e41d09dSXin LI {
73648c779cdSXin LI 	pid_t pid;
7373e41d09dSXin LI 
738b6cee71dSXin LI 	/*
739b6cee71dSXin LI 	 * fork again, to avoid blocking because both
740b6cee71dSXin LI 	 * pipes filled
741b6cee71dSXin LI 	 */
74248c779cdSXin LI 	pid = fork();
74348c779cdSXin LI 	if (pid == -1) {
74448c779cdSXin LI 		DPRINTF("Fork failed (%s)\n", strerror(errno));
74548c779cdSXin LI 		exit(1);
74648c779cdSXin LI 	}
74748c779cdSXin LI 	if (pid == 0) {
74848c779cdSXin LI 		/* child */
74948c779cdSXin LI 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
7503e41d09dSXin LI 			DPRINTF("Write failed (%s)\n", strerror(errno));
751b6cee71dSXin LI 			exit(1);
752b6cee71dSXin LI 		}
753b6cee71dSXin LI 		exit(0);
754c2931133SXin LI 	}
75548c779cdSXin LI 	/* parent */
75648c779cdSXin LI 	return pid;
757b6cee71dSXin LI }
758b6cee71dSXin LI 
7593e41d09dSXin LI static ssize_t
7603e41d09dSXin LI filter_error(unsigned char *ubuf, ssize_t n)
7613e41d09dSXin LI {
7623e41d09dSXin LI 	char *p;
7633e41d09dSXin LI 	char *buf;
764c2931133SXin LI 
7653e41d09dSXin LI 	ubuf[n] = '\0';
76648c779cdSXin LI 	buf = RCAST(char *, ubuf);
76748c779cdSXin LI 	while (isspace(CAST(unsigned char, *buf)))
7683e41d09dSXin LI 		buf++;
7693e41d09dSXin LI 	DPRINTF("Filter error[[[%s]]]\n", buf);
77048c779cdSXin LI 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
7713e41d09dSXin LI 		*p = '\0';
77248c779cdSXin LI 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
7733e41d09dSXin LI 		*p = '\0';
77448c779cdSXin LI 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
7753e41d09dSXin LI 		++p;
77648c779cdSXin LI 		while (isspace(CAST(unsigned char, *p)))
7773e41d09dSXin LI 			p++;
7783e41d09dSXin LI 		n = strlen(p);
77940427ccaSGordon Tetlow 		memmove(ubuf, p, CAST(size_t, n + 1));
7803e41d09dSXin LI 	}
7813e41d09dSXin LI 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
7823e41d09dSXin LI 	if (islower(*ubuf))
7833e41d09dSXin LI 		*ubuf = toupper(*ubuf);
784b6cee71dSXin LI 	return n;
785b6cee71dSXin LI }
7863e41d09dSXin LI 
7873e41d09dSXin LI private const char *
7883e41d09dSXin LI methodname(size_t method)
7893e41d09dSXin LI {
790d38c30c0SXin LI 	switch (method) {
7913e41d09dSXin LI #ifdef BUILTIN_DECOMPRESS
792d38c30c0SXin LI 	case METH_FROZEN:
793d38c30c0SXin LI 	case METH_ZLIB:
7943e41d09dSXin LI 		return "zlib";
7953e41d09dSXin LI #endif
796d38c30c0SXin LI #ifdef BUILTIN_BZLIB
797d38c30c0SXin LI 	case METH_BZIP:
798d38c30c0SXin LI 		return "bzlib";
799d38c30c0SXin LI #endif
800d38c30c0SXin LI #ifdef BUILTIN_XZLIB
801d38c30c0SXin LI 	case METH_XZ:
802d38c30c0SXin LI 	case METH_LZMA:
803d38c30c0SXin LI 		return "xzlib";
804d38c30c0SXin LI #endif
805d38c30c0SXin LI 	default:
8063e41d09dSXin LI 		return compr[method].argv[0];
8073e41d09dSXin LI 	}
808d38c30c0SXin LI }
8093e41d09dSXin LI 
8103e41d09dSXin LI private int
8113e41d09dSXin LI uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
8123e41d09dSXin LI     unsigned char **newch, size_t* n)
8133e41d09dSXin LI {
8143e41d09dSXin LI 	int fdp[3][2];
81548c779cdSXin LI 	int status, rv, w;
81648c779cdSXin LI 	pid_t pid;
81748c779cdSXin LI 	pid_t writepid = -1;
8183e41d09dSXin LI 	size_t i;
8193e41d09dSXin LI 	ssize_t r;
8203e41d09dSXin LI 
821d38c30c0SXin LI 	switch (method) {
8223e41d09dSXin LI #ifdef BUILTIN_DECOMPRESS
823d38c30c0SXin LI 	case METH_FROZEN:
8243e41d09dSXin LI 		return uncompressgzipped(old, newch, bytes_max, n);
825d38c30c0SXin LI 	case METH_ZLIB:
8263e41d09dSXin LI 		return uncompresszlib(old, newch, bytes_max, n, 1);
8273e41d09dSXin LI #endif
828d38c30c0SXin LI #ifdef BUILTIN_BZLIB
829d38c30c0SXin LI 	case METH_BZIP:
830d38c30c0SXin LI 		return uncompressbzlib(old, newch, bytes_max, n);
831d38c30c0SXin LI #endif
832d38c30c0SXin LI #ifdef BUILTIN_XZLIB
833d38c30c0SXin LI 	case METH_XZ:
834d38c30c0SXin LI 	case METH_LZMA:
835d38c30c0SXin LI 		return uncompressxzlib(old, newch, bytes_max, n);
836d38c30c0SXin LI #endif
837d38c30c0SXin LI 	default:
838d38c30c0SXin LI 		break;
839d38c30c0SXin LI 	}
840d38c30c0SXin LI 
8413e41d09dSXin LI 	(void)fflush(stdout);
8423e41d09dSXin LI 	(void)fflush(stderr);
8433e41d09dSXin LI 
8443e41d09dSXin LI 	for (i = 0; i < __arraycount(fdp); i++)
8453e41d09dSXin LI 		fdp[i][0] = fdp[i][1] = -1;
8463e41d09dSXin LI 
847*43a5ec4eSXin LI 	/*
848*43a5ec4eSXin LI 	 * There are multithreaded users who run magic_file()
849*43a5ec4eSXin LI 	 * from dozens of threads. If two parallel magic_file() calls
850*43a5ec4eSXin LI 	 * analyze two large compressed files, both will spawn
851*43a5ec4eSXin LI 	 * an uncompressing child here, which writes out uncompressed data.
852*43a5ec4eSXin LI 	 * We read some portion, then close the pipe, then waitpid() the child.
853*43a5ec4eSXin LI 	 * If uncompressed data is larger, child shound get EPIPE and exit.
854*43a5ec4eSXin LI 	 * However, with *parallel* calls OTHER child may unintentionally
855*43a5ec4eSXin LI 	 * inherit pipe fds, thus keeping pipe open and making writes in
856*43a5ec4eSXin LI 	 * our child block instead of failing with EPIPE!
857*43a5ec4eSXin LI 	 * (For the bug to occur, two threads must mutually inherit their pipes,
858*43a5ec4eSXin LI 	 * and both must have large outputs. Thus it happens not that often).
859*43a5ec4eSXin LI 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
860*43a5ec4eSXin LI 	 */
861*43a5ec4eSXin LI 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
862*43a5ec4eSXin LI 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
863*43a5ec4eSXin LI 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
8643e41d09dSXin LI 		closep(fdp[STDIN_FILENO]);
8653e41d09dSXin LI 		closep(fdp[STDOUT_FILENO]);
8663e41d09dSXin LI 		return makeerror(newch, n, "Cannot create pipe, %s",
8673e41d09dSXin LI 		    strerror(errno));
8683e41d09dSXin LI 	}
8693e41d09dSXin LI 
87048c779cdSXin LI 	/* For processes with large mapped virtual sizes, vfork
87148c779cdSXin LI 	 * may be _much_ faster (10-100 times) than fork.
87248c779cdSXin LI 	 */
87348c779cdSXin LI 	pid = vfork();
87448c779cdSXin LI 	if (pid == -1) {
87548c779cdSXin LI 		return makeerror(newch, n, "Cannot vfork, %s",
87648c779cdSXin LI 		    strerror(errno));
87748c779cdSXin LI 	}
87848c779cdSXin LI 	if (pid == 0) {
87948c779cdSXin LI 		/* child */
88048c779cdSXin LI 		/* Note: we are after vfork, do not modify memory
88148c779cdSXin LI 		 * in a way which confuses parent. In particular,
88248c779cdSXin LI 		 * do not modify fdp[i][j].
88348c779cdSXin LI 		 */
88448c779cdSXin LI 		if (fd != -1) {
88548c779cdSXin LI 			(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
88648c779cdSXin LI 			if (copydesc(STDIN_FILENO, fd))
88748c779cdSXin LI 				(void) close(fd);
88848c779cdSXin LI 		} else {
88948c779cdSXin LI 			if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
89048c779cdSXin LI 				(void) close(fdp[STDIN_FILENO][0]);
89148c779cdSXin LI 			if (fdp[STDIN_FILENO][1] > 2)
89248c779cdSXin LI 				(void) close(fdp[STDIN_FILENO][1]);
89348c779cdSXin LI 		}
894*43a5ec4eSXin LI 		file_clear_closexec(STDIN_FILENO);
895*43a5ec4eSXin LI 
89648c779cdSXin LI ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
89748c779cdSXin LI 		if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
89848c779cdSXin LI 			(void) close(fdp[STDOUT_FILENO][1]);
89948c779cdSXin LI 		if (fdp[STDOUT_FILENO][0] > 2)
90048c779cdSXin LI 			(void) close(fdp[STDOUT_FILENO][0]);
901*43a5ec4eSXin LI 		file_clear_closexec(STDOUT_FILENO);
90248c779cdSXin LI 
90348c779cdSXin LI 		if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
90448c779cdSXin LI 			(void) close(fdp[STDERR_FILENO][1]);
90548c779cdSXin LI 		if (fdp[STDERR_FILENO][0] > 2)
90648c779cdSXin LI 			(void) close(fdp[STDERR_FILENO][0]);
907*43a5ec4eSXin LI 		file_clear_closexec(STDERR_FILENO);
9083e41d09dSXin LI 
9093e41d09dSXin LI 		(void)execvp(compr[method].argv[0],
91048c779cdSXin LI 		    RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
9113e41d09dSXin LI 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
9123e41d09dSXin LI 		    compr[method].argv[0], strerror(errno));
91348c779cdSXin LI 		_exit(1); /* _exit(), not exit(), because of vfork */
91448c779cdSXin LI 	}
91548c779cdSXin LI 	/* parent */
91648c779cdSXin LI 	/* Close write sides of child stdout/err pipes */
9173e41d09dSXin LI 	for (i = 1; i < __arraycount(fdp); i++)
9183e41d09dSXin LI 		closefd(fdp[i], 1);
91948c779cdSXin LI 	/* Write the buffer data to child stdin, if we don't have fd */
92048c779cdSXin LI 	if (fd == -1) {
92148c779cdSXin LI 		closefd(fdp[STDIN_FILENO], 0);
92248c779cdSXin LI 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
92348c779cdSXin LI 		closefd(fdp[STDIN_FILENO], 1);
92448c779cdSXin LI 	}
9253e41d09dSXin LI 
9263e41d09dSXin LI 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
9273e41d09dSXin LI 	if (*newch == NULL) {
9283e41d09dSXin LI 		rv = makeerror(newch, n, "No buffer, %s",
9293e41d09dSXin LI 		    strerror(errno));
9303e41d09dSXin LI 		goto err;
9313e41d09dSXin LI 	}
9323e41d09dSXin LI 	rv = OKDATA;
93348c779cdSXin LI 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
93448c779cdSXin LI 	if (r <= 0) {
9353e41d09dSXin LI 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
9363e41d09dSXin LI 		    r != -1 ? strerror(errno) : "no data");
9373e41d09dSXin LI 
9383e41d09dSXin LI 		rv = ERRDATA;
9393e41d09dSXin LI 		if (r == 0 &&
9403e41d09dSXin LI 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
9413e41d09dSXin LI 		{
9423e41d09dSXin LI 			r = filter_error(*newch, r);
94348c779cdSXin LI 			goto ok;
9443e41d09dSXin LI 		}
9453e41d09dSXin LI 		free(*newch);
9463e41d09dSXin LI 		if  (r == 0)
9473e41d09dSXin LI 			rv = makeerror(newch, n, "Read failed, %s",
9483e41d09dSXin LI 			    strerror(errno));
9493e41d09dSXin LI 		else
9503e41d09dSXin LI 			rv = makeerror(newch, n, "No data");
9513e41d09dSXin LI 		goto err;
9523e41d09dSXin LI 	}
95348c779cdSXin LI ok:
9543e41d09dSXin LI 	*n = r;
9553e41d09dSXin LI 	/* NUL terminate, as every buffer is handled here. */
9563e41d09dSXin LI 	(*newch)[*n] = '\0';
9573e41d09dSXin LI err:
9583e41d09dSXin LI 	closefd(fdp[STDIN_FILENO], 1);
9593e41d09dSXin LI 	closefd(fdp[STDOUT_FILENO], 0);
9603e41d09dSXin LI 	closefd(fdp[STDERR_FILENO], 0);
96148c779cdSXin LI 
96248c779cdSXin LI 	w = waitpid(pid, &status, 0);
96348c779cdSXin LI wait_err:
96448c779cdSXin LI 	if (w == -1) {
9653e41d09dSXin LI 		free(*newch);
9663e41d09dSXin LI 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
9673e41d09dSXin LI 		DPRINTF("Child wait return %#x\n", status);
9683e41d09dSXin LI 	} else if (!WIFEXITED(status)) {
96940427ccaSGordon Tetlow 		DPRINTF("Child not exited (%#x)\n", status);
9703e41d09dSXin LI 	} else if (WEXITSTATUS(status) != 0) {
97140427ccaSGordon Tetlow 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
9723e41d09dSXin LI 	}
97348c779cdSXin LI 	if (writepid > 0) {
97448c779cdSXin LI 		/* _After_ we know decompressor has exited, our input writer
97548c779cdSXin LI 		 * definitely will exit now (at worst, writing fails in it,
97648c779cdSXin LI 		 * since output fd is closed now on the reading size).
97748c779cdSXin LI 		 */
97848c779cdSXin LI 		w = waitpid(writepid, &status, 0);
97948c779cdSXin LI 		writepid = -1;
98048c779cdSXin LI 		goto wait_err;
98148c779cdSXin LI 	}
9823e41d09dSXin LI 
98348c779cdSXin LI 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
98448c779cdSXin LI 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
9853e41d09dSXin LI 
9863e41d09dSXin LI 	return rv;
987b6cee71dSXin LI }
988b6cee71dSXin LI #endif
989