xref: /freebsd/contrib/file/src/compress.c (revision 2dc4dbb9673c9a3309c2dad59cb588c6f04beaea)
1b6cee71dSXin LI /*
2b6cee71dSXin LI  * Copyright (c) Ian F. Darwin 1986-1995.
3b6cee71dSXin LI  * Software written by Ian F. Darwin and others;
4b6cee71dSXin LI  * maintained 1995-present by Christos Zoulas and others.
5b6cee71dSXin LI  *
6b6cee71dSXin LI  * Redistribution and use in source and binary forms, with or without
7b6cee71dSXin LI  * modification, are permitted provided that the following conditions
8b6cee71dSXin LI  * are met:
9b6cee71dSXin LI  * 1. Redistributions of source code must retain the above copyright
10b6cee71dSXin LI  *    notice immediately at the beginning of the file, without modification,
11b6cee71dSXin LI  *    this list of conditions, and the following disclaimer.
12b6cee71dSXin LI  * 2. Redistributions in binary form must reproduce the above copyright
13b6cee71dSXin LI  *    notice, this list of conditions and the following disclaimer in the
14b6cee71dSXin LI  *    documentation and/or other materials provided with the distribution.
15b6cee71dSXin LI  *
16b6cee71dSXin LI  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17b6cee71dSXin LI  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18b6cee71dSXin LI  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19b6cee71dSXin LI  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20b6cee71dSXin LI  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21b6cee71dSXin LI  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22b6cee71dSXin LI  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23b6cee71dSXin LI  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24b6cee71dSXin LI  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25b6cee71dSXin LI  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26b6cee71dSXin LI  * SUCH DAMAGE.
27b6cee71dSXin LI  */
28b6cee71dSXin LI /*
29b6cee71dSXin LI  * compress routines:
30b6cee71dSXin LI  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31b6cee71dSXin LI  *		   information if recognized
32b6cee71dSXin LI  *	uncompress(method, old, n, newch) - uncompress old into new,
33b6cee71dSXin LI  *					    using method, return sizeof new
34b6cee71dSXin LI  */
35b6cee71dSXin LI #include "file.h"
36b6cee71dSXin LI 
37b6cee71dSXin LI #ifndef lint
38*2dc4dbb9SEitan Adler FILE_RCSID("@(#)$File: compress.c,v 1.107 2018/04/28 18:48:22 christos Exp $")
39b6cee71dSXin LI #endif
40b6cee71dSXin LI 
41b6cee71dSXin LI #include "magic.h"
42b6cee71dSXin LI #include <stdlib.h>
43b6cee71dSXin LI #ifdef HAVE_UNISTD_H
44b6cee71dSXin LI #include <unistd.h>
45b6cee71dSXin LI #endif
46b6cee71dSXin LI #include <string.h>
47b6cee71dSXin LI #include <errno.h>
483e41d09dSXin LI #include <ctype.h>
493e41d09dSXin LI #include <stdarg.h>
505f0216bdSXin LI #ifdef HAVE_SIGNAL_H
514460e5b0SXin LI #include <signal.h>
525f0216bdSXin LI # ifndef HAVE_SIG_T
535f0216bdSXin LI typedef void (*sig_t)(int);
545f0216bdSXin LI # endif /* HAVE_SIG_T */
555f0216bdSXin LI #endif
56c2931133SXin LI #if !defined(__MINGW32__) && !defined(WIN32)
57b6cee71dSXin LI #include <sys/ioctl.h>
58b6cee71dSXin LI #endif
59b6cee71dSXin LI #ifdef HAVE_SYS_WAIT_H
60b6cee71dSXin LI #include <sys/wait.h>
61b6cee71dSXin LI #endif
62b6cee71dSXin LI #if defined(HAVE_SYS_TIME_H)
63b6cee71dSXin LI #include <sys/time.h>
64b6cee71dSXin LI #endif
6540427ccaSGordon Tetlow #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
66b6cee71dSXin LI #define BUILTIN_DECOMPRESS
67b6cee71dSXin LI #include <zlib.h>
683e41d09dSXin LI #endif
693e41d09dSXin LI #ifdef DEBUG
703e41d09dSXin LI int tty = -1;
713e41d09dSXin LI #define DPRINTF(...)	do { \
723e41d09dSXin LI 	if (tty == -1) \
733e41d09dSXin LI 		tty = open("/dev/tty", O_RDWR); \
743e41d09dSXin LI 	if (tty == -1) \
753e41d09dSXin LI 		abort(); \
763e41d09dSXin LI 	dprintf(tty, __VA_ARGS__); \
773e41d09dSXin LI } while (/*CONSTCOND*/0)
783e41d09dSXin LI #else
793e41d09dSXin LI #define DPRINTF(...)
80b6cee71dSXin LI #endif
81b6cee71dSXin LI 
823e41d09dSXin LI #ifdef ZLIBSUPPORT
833e41d09dSXin LI /*
843e41d09dSXin LI  * The following python code is not really used because ZLIBSUPPORT is only
853e41d09dSXin LI  * defined if we have a built-in zlib, and the built-in zlib handles that.
8640427ccaSGordon Tetlow  * That is not true for android where we have zlib.h and not -lz.
873e41d09dSXin LI  */
883e41d09dSXin LI static const char zlibcode[] =
893e41d09dSXin LI     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
903e41d09dSXin LI 
913e41d09dSXin LI static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
923e41d09dSXin LI 
933e41d09dSXin LI static int
943e41d09dSXin LI zlibcmp(const unsigned char *buf)
953e41d09dSXin LI {
963e41d09dSXin LI 	unsigned short x = 1;
9740427ccaSGordon Tetlow 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
983e41d09dSXin LI 
993e41d09dSXin LI 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
1003e41d09dSXin LI 		return 0;
1013e41d09dSXin LI 	if (s[0] != 1)	/* endianness test */
1023e41d09dSXin LI 		x = buf[0] | (buf[1] << 8);
1033e41d09dSXin LI 	else
1043e41d09dSXin LI 		x = buf[1] | (buf[0] << 8);
1053e41d09dSXin LI 	if (x % 31)
1063e41d09dSXin LI 		return 0;
1073e41d09dSXin LI 	return 1;
1083e41d09dSXin LI }
1093e41d09dSXin LI #endif
1103e41d09dSXin LI 
1113e41d09dSXin LI #define gzip_flags "-cd"
1123e41d09dSXin LI #define lrzip_flags "-do"
1133e41d09dSXin LI #define lzip_flags gzip_flags
1143e41d09dSXin LI 
1153e41d09dSXin LI static const char *gzip_args[] = {
1163e41d09dSXin LI 	"gzip", gzip_flags, NULL
1173e41d09dSXin LI };
1183e41d09dSXin LI static const char *uncompress_args[] = {
1193e41d09dSXin LI 	"uncompress", "-c", NULL
1203e41d09dSXin LI };
1213e41d09dSXin LI static const char *bzip2_args[] = {
1223e41d09dSXin LI 	"bzip2", "-cd", NULL
1233e41d09dSXin LI };
1243e41d09dSXin LI static const char *lzip_args[] = {
1253e41d09dSXin LI 	"lzip", lzip_flags, NULL
1263e41d09dSXin LI };
1273e41d09dSXin LI static const char *xz_args[] = {
1283e41d09dSXin LI 	"xz", "-cd", NULL
1293e41d09dSXin LI };
1303e41d09dSXin LI static const char *lrzip_args[] = {
1313e41d09dSXin LI 	"lrzip", lrzip_flags, NULL
1323e41d09dSXin LI };
1333e41d09dSXin LI static const char *lz4_args[] = {
1343e41d09dSXin LI 	"lz4", "-cd", NULL
135b6cee71dSXin LI };
136a5d223e6SXin LI static const char *zstd_args[] = {
137a5d223e6SXin LI 	"zstd", "-cd", NULL
138a5d223e6SXin LI };
139b6cee71dSXin LI 
1403e41d09dSXin LI private const struct {
1413e41d09dSXin LI 	const void *magic;
1423e41d09dSXin LI 	size_t maglen;
1433e41d09dSXin LI 	const char **argv;
1443e41d09dSXin LI } compr[] = {
1453e41d09dSXin LI 	{ "\037\235",	2, gzip_args },		/* compressed */
1463e41d09dSXin LI 	/* Uncompress can get stuck; so use gzip first if we have it
1473e41d09dSXin LI 	 * Idea from Damien Clark, thanks! */
1483e41d09dSXin LI 	{ "\037\235",	2, uncompress_args },	/* compressed */
1493e41d09dSXin LI 	{ "\037\213",	2, gzip_args },		/* gzipped */
1503e41d09dSXin LI 	{ "\037\236",	2, gzip_args },		/* frozen */
1513e41d09dSXin LI 	{ "\037\240",	2, gzip_args },		/* SCO LZH */
1523e41d09dSXin LI 	/* the standard pack utilities do not accept standard input */
1533e41d09dSXin LI 	{ "\037\036",	2, gzip_args },		/* packed */
1543e41d09dSXin LI 	{ "PK\3\4",	4, gzip_args },		/* pkzipped, */
1553e41d09dSXin LI 	/* ...only first file examined */
1563e41d09dSXin LI 	{ "BZh",	3, bzip2_args },	/* bzip2-ed */
1573e41d09dSXin LI 	{ "LZIP",	4, lzip_args },		/* lzip-ed */
1583e41d09dSXin LI  	{ "\3757zXZ\0",	6, xz_args },		/* XZ Utils */
1593e41d09dSXin LI  	{ "LRZI",	4, lrzip_args },	/* LRZIP */
1603e41d09dSXin LI  	{ "\004\"M\030",4, lz4_args },		/* LZ4 */
161a5d223e6SXin LI  	{ "\x28\xB5\x2F\xFD", 4, zstd_args },	/* zstd */
1623e41d09dSXin LI #ifdef ZLIBSUPPORT
163a5d223e6SXin LI 	{ RCAST(const void *, zlibcmp),	0, zlib_args },		/* zlib */
1643e41d09dSXin LI #endif
1653e41d09dSXin LI };
1663e41d09dSXin LI 
1673e41d09dSXin LI #define OKDATA 	0
1683e41d09dSXin LI #define NODATA	1
1693e41d09dSXin LI #define ERRDATA	2
170b6cee71dSXin LI 
171b6cee71dSXin LI private ssize_t swrite(int, const void *, size_t);
172b6cee71dSXin LI #if HAVE_FORK
173b6cee71dSXin LI private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
1743e41d09dSXin LI private int uncompressbuf(int, size_t, size_t, const unsigned char *,
1753e41d09dSXin LI     unsigned char **, size_t *);
176b6cee71dSXin LI #ifdef BUILTIN_DECOMPRESS
1773e41d09dSXin LI private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
1783e41d09dSXin LI     size_t *, int);
1793e41d09dSXin LI private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
1803e41d09dSXin LI     size_t *);
181b6cee71dSXin LI #endif
1823e41d09dSXin LI static int makeerror(unsigned char **, size_t *, const char *, ...)
1833e41d09dSXin LI     __attribute__((__format__(__printf__, 3, 4)));
1843e41d09dSXin LI private const char *methodname(size_t);
185b6cee71dSXin LI 
186*2dc4dbb9SEitan Adler private int
187*2dc4dbb9SEitan Adler format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
188*2dc4dbb9SEitan Adler {
189*2dc4dbb9SEitan Adler 	unsigned char *p;
190*2dc4dbb9SEitan Adler 	int mime = ms->flags & MAGIC_MIME;
191*2dc4dbb9SEitan Adler 
192*2dc4dbb9SEitan Adler 	if (!mime)
193*2dc4dbb9SEitan Adler 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
194*2dc4dbb9SEitan Adler 
195*2dc4dbb9SEitan Adler 	for (p = buf; *p; p++)
196*2dc4dbb9SEitan Adler 		if (!isalnum(*p))
197*2dc4dbb9SEitan Adler 			*p = '-';
198*2dc4dbb9SEitan Adler 
199*2dc4dbb9SEitan Adler 	return file_printf(ms, "application/x-decompression-error-%s-%s",
200*2dc4dbb9SEitan Adler 	    methodname(i), buf);
201*2dc4dbb9SEitan Adler }
202*2dc4dbb9SEitan Adler 
203b6cee71dSXin LI protected int
20458a0f0d0SEitan Adler file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
205b6cee71dSXin LI {
206b6cee71dSXin LI 	unsigned char *newbuf = NULL;
207b6cee71dSXin LI 	size_t i, nsz;
2083e41d09dSXin LI 	char *rbuf;
2093e41d09dSXin LI 	file_pushbuf_t *pb;
21020f8619dSXin LI 	int urv, prv, rv = 0;
211b6cee71dSXin LI 	int mime = ms->flags & MAGIC_MIME;
21258a0f0d0SEitan Adler 	int fd = b->fd;
21358a0f0d0SEitan Adler 	const unsigned char *buf = b->fbuf;
21458a0f0d0SEitan Adler 	size_t nbytes = b->flen;
2155f0216bdSXin LI #ifdef HAVE_SIGNAL_H
2164460e5b0SXin LI 	sig_t osigpipe;
2175f0216bdSXin LI #endif
218b6cee71dSXin LI 
219b6cee71dSXin LI 	if ((ms->flags & MAGIC_COMPRESS) == 0)
220b6cee71dSXin LI 		return 0;
221b6cee71dSXin LI 
2225f0216bdSXin LI #ifdef HAVE_SIGNAL_H
2234460e5b0SXin LI 	osigpipe = signal(SIGPIPE, SIG_IGN);
2245f0216bdSXin LI #endif
225b6cee71dSXin LI 	for (i = 0; i < ncompr; i++) {
2263e41d09dSXin LI 		int zm;
227b6cee71dSXin LI 		if (nbytes < compr[i].maglen)
228b6cee71dSXin LI 			continue;
2293e41d09dSXin LI #ifdef ZLIBSUPPORT
2303e41d09dSXin LI 		if (compr[i].maglen == 0)
231a5d223e6SXin LI 			zm = (RCAST(int (*)(const unsigned char *),
2323e41d09dSXin LI 			    CCAST(void *, compr[i].magic)))(buf);
2333e41d09dSXin LI 		else
2343e41d09dSXin LI #endif
2353e41d09dSXin LI 			zm = memcmp(buf, compr[i].magic, compr[i].maglen) == 0;
236b6cee71dSXin LI 
2373e41d09dSXin LI 		if (!zm)
2383e41d09dSXin LI 			continue;
2393e41d09dSXin LI 		nsz = nbytes;
24020f8619dSXin LI 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
24120f8619dSXin LI 		DPRINTF("uncompressbuf = %d, %s, %zu\n", urv, (char *)newbuf,
2423e41d09dSXin LI 		    nsz);
24320f8619dSXin LI 		switch (urv) {
2443e41d09dSXin LI 		case OKDATA:
2453e41d09dSXin LI 		case ERRDATA:
2463e41d09dSXin LI 			ms->flags &= ~MAGIC_COMPRESS;
24720f8619dSXin LI 			if (urv == ERRDATA)
248*2dc4dbb9SEitan Adler 				prv = format_decompression_error(ms, i, newbuf);
2493e41d09dSXin LI 			else
25020f8619dSXin LI 				prv = file_buffer(ms, -1, name, newbuf, nsz);
25120f8619dSXin LI 			if (prv == -1)
2523e41d09dSXin LI 				goto error;
25320f8619dSXin LI 			rv = 1;
2543e41d09dSXin LI 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
2553e41d09dSXin LI 				goto out;
2563e41d09dSXin LI 			if (mime != MAGIC_MIME && mime != 0)
2573e41d09dSXin LI 				goto out;
2583e41d09dSXin LI 			if ((file_printf(ms,
2593e41d09dSXin LI 			    mime ? " compressed-encoding=" : " (")) == -1)
2603e41d09dSXin LI 				goto error;
2613e41d09dSXin LI 			if ((pb = file_push_buffer(ms)) == NULL)
262b6cee71dSXin LI 				goto error;
26320f8619dSXin LI 			/*
26420f8619dSXin LI 			 * XXX: If file_buffer fails here, we overwrite
26520f8619dSXin LI 			 * the compressed text. FIXME.
26620f8619dSXin LI 			 */
267b6cee71dSXin LI 			if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
268b6cee71dSXin LI 				goto error;
2693e41d09dSXin LI 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
2703e41d09dSXin LI 				if (file_printf(ms, "%s", rbuf) == -1) {
2713e41d09dSXin LI 					free(rbuf);
272b6cee71dSXin LI 					goto error;
273b6cee71dSXin LI 				}
2743e41d09dSXin LI 				free(rbuf);
2753e41d09dSXin LI 			}
2763e41d09dSXin LI 			if (!mime && file_printf(ms, ")") == -1)
2773e41d09dSXin LI 				goto error;
27820f8619dSXin LI 			/*FALLTHROUGH*/
2793e41d09dSXin LI 		case NODATA:
28020f8619dSXin LI 			break;
2813e41d09dSXin LI 		default:
2823e41d09dSXin LI 			abort();
28320f8619dSXin LI 			/*NOTREACHED*/
28420f8619dSXin LI 		error:
28520f8619dSXin LI 			rv = -1;
28620f8619dSXin LI 			break;
2873e41d09dSXin LI 		}
2883e41d09dSXin LI 	}
2893e41d09dSXin LI out:
29020f8619dSXin LI 	DPRINTF("rv = %d\n", rv);
29120f8619dSXin LI 
2925f0216bdSXin LI #ifdef HAVE_SIGNAL_H
2934460e5b0SXin LI 	(void)signal(SIGPIPE, osigpipe);
2945f0216bdSXin LI #endif
295b6cee71dSXin LI 	free(newbuf);
296b6cee71dSXin LI 	ms->flags |= MAGIC_COMPRESS;
2973e41d09dSXin LI 	DPRINTF("Zmagic returns %d\n", rv);
298b6cee71dSXin LI 	return rv;
299b6cee71dSXin LI }
300b6cee71dSXin LI #endif
301b6cee71dSXin LI /*
302b6cee71dSXin LI  * `safe' write for sockets and pipes.
303b6cee71dSXin LI  */
304b6cee71dSXin LI private ssize_t
305b6cee71dSXin LI swrite(int fd, const void *buf, size_t n)
306b6cee71dSXin LI {
307b6cee71dSXin LI 	ssize_t rv;
308b6cee71dSXin LI 	size_t rn = n;
309b6cee71dSXin LI 
310b6cee71dSXin LI 	do
311b6cee71dSXin LI 		switch (rv = write(fd, buf, n)) {
312b6cee71dSXin LI 		case -1:
313b6cee71dSXin LI 			if (errno == EINTR)
314b6cee71dSXin LI 				continue;
315b6cee71dSXin LI 			return -1;
316b6cee71dSXin LI 		default:
317b6cee71dSXin LI 			n -= rv;
318b6cee71dSXin LI 			buf = CAST(const char *, buf) + rv;
319b6cee71dSXin LI 			break;
320b6cee71dSXin LI 		}
321b6cee71dSXin LI 	while (n > 0);
322b6cee71dSXin LI 	return rn;
323b6cee71dSXin LI }
324b6cee71dSXin LI 
325b6cee71dSXin LI 
326b6cee71dSXin LI /*
327b6cee71dSXin LI  * `safe' read for sockets and pipes.
328b6cee71dSXin LI  */
329b6cee71dSXin LI protected ssize_t
330b6cee71dSXin LI sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
331b6cee71dSXin LI {
332b6cee71dSXin LI 	ssize_t rv;
333b6cee71dSXin LI #ifdef FIONREAD
334b6cee71dSXin LI 	int t = 0;
335b6cee71dSXin LI #endif
336b6cee71dSXin LI 	size_t rn = n;
337b6cee71dSXin LI 
338b6cee71dSXin LI 	if (fd == STDIN_FILENO)
339b6cee71dSXin LI 		goto nocheck;
340b6cee71dSXin LI 
341b6cee71dSXin LI #ifdef FIONREAD
342b6cee71dSXin LI 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
343b6cee71dSXin LI #ifdef FD_ZERO
344b6cee71dSXin LI 		ssize_t cnt;
345b6cee71dSXin LI 		for (cnt = 0;; cnt++) {
346b6cee71dSXin LI 			fd_set check;
347b6cee71dSXin LI 			struct timeval tout = {0, 100 * 1000};
348b6cee71dSXin LI 			int selrv;
349b6cee71dSXin LI 
350b6cee71dSXin LI 			FD_ZERO(&check);
351b6cee71dSXin LI 			FD_SET(fd, &check);
352b6cee71dSXin LI 
353b6cee71dSXin LI 			/*
354b6cee71dSXin LI 			 * Avoid soft deadlock: do not read if there
355b6cee71dSXin LI 			 * is nothing to read from sockets and pipes.
356b6cee71dSXin LI 			 */
357b6cee71dSXin LI 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
358b6cee71dSXin LI 			if (selrv == -1) {
359b6cee71dSXin LI 				if (errno == EINTR || errno == EAGAIN)
360b6cee71dSXin LI 					continue;
361b6cee71dSXin LI 			} else if (selrv == 0 && cnt >= 5) {
362b6cee71dSXin LI 				return 0;
363b6cee71dSXin LI 			} else
364b6cee71dSXin LI 				break;
365b6cee71dSXin LI 		}
366b6cee71dSXin LI #endif
367b6cee71dSXin LI 		(void)ioctl(fd, FIONREAD, &t);
368b6cee71dSXin LI 	}
369b6cee71dSXin LI 
370b6cee71dSXin LI 	if (t > 0 && (size_t)t < n) {
371b6cee71dSXin LI 		n = t;
372b6cee71dSXin LI 		rn = n;
373b6cee71dSXin LI 	}
374b6cee71dSXin LI #endif
375b6cee71dSXin LI 
376b6cee71dSXin LI nocheck:
377b6cee71dSXin LI 	do
378b6cee71dSXin LI 		switch ((rv = read(fd, buf, n))) {
379b6cee71dSXin LI 		case -1:
380b6cee71dSXin LI 			if (errno == EINTR)
381b6cee71dSXin LI 				continue;
382b6cee71dSXin LI 			return -1;
383b6cee71dSXin LI 		case 0:
384b6cee71dSXin LI 			return rn - n;
385b6cee71dSXin LI 		default:
386b6cee71dSXin LI 			n -= rv;
387a5d223e6SXin LI 			buf = CAST(char *, CCAST(void *, buf)) + rv;
388b6cee71dSXin LI 			break;
389b6cee71dSXin LI 		}
390b6cee71dSXin LI 	while (n > 0);
391b6cee71dSXin LI 	return rn;
392b6cee71dSXin LI }
393b6cee71dSXin LI 
394b6cee71dSXin LI protected int
395b6cee71dSXin LI file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
396b6cee71dSXin LI     size_t nbytes)
397b6cee71dSXin LI {
398b6cee71dSXin LI 	char buf[4096];
399b6cee71dSXin LI 	ssize_t r;
400b6cee71dSXin LI 	int tfd;
401b6cee71dSXin LI 
402b6cee71dSXin LI 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
403b6cee71dSXin LI #ifndef HAVE_MKSTEMP
404b6cee71dSXin LI 	{
405b6cee71dSXin LI 		char *ptr = mktemp(buf);
406b6cee71dSXin LI 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
407b6cee71dSXin LI 		r = errno;
408b6cee71dSXin LI 		(void)unlink(ptr);
409b6cee71dSXin LI 		errno = r;
410b6cee71dSXin LI 	}
411b6cee71dSXin LI #else
412b6cee71dSXin LI 	{
413b6cee71dSXin LI 		int te;
414b6cee71dSXin LI 		tfd = mkstemp(buf);
415b6cee71dSXin LI 		te = errno;
416b6cee71dSXin LI 		(void)unlink(buf);
417b6cee71dSXin LI 		errno = te;
418b6cee71dSXin LI 	}
419b6cee71dSXin LI #endif
420b6cee71dSXin LI 	if (tfd == -1) {
421b6cee71dSXin LI 		file_error(ms, errno,
422b6cee71dSXin LI 		    "cannot create temporary file for pipe copy");
423b6cee71dSXin LI 		return -1;
424b6cee71dSXin LI 	}
425b6cee71dSXin LI 
426b6cee71dSXin LI 	if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
427b6cee71dSXin LI 		r = 1;
428b6cee71dSXin LI 	else {
429b6cee71dSXin LI 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
430b6cee71dSXin LI 			if (swrite(tfd, buf, (size_t)r) != r)
431b6cee71dSXin LI 				break;
432b6cee71dSXin LI 	}
433b6cee71dSXin LI 
434b6cee71dSXin LI 	switch (r) {
435b6cee71dSXin LI 	case -1:
436b6cee71dSXin LI 		file_error(ms, errno, "error copying from pipe to temp file");
437b6cee71dSXin LI 		return -1;
438b6cee71dSXin LI 	case 0:
439b6cee71dSXin LI 		break;
440b6cee71dSXin LI 	default:
441b6cee71dSXin LI 		file_error(ms, errno, "error while writing to temp file");
442b6cee71dSXin LI 		return -1;
443b6cee71dSXin LI 	}
444b6cee71dSXin LI 
445b6cee71dSXin LI 	/*
446b6cee71dSXin LI 	 * We duplicate the file descriptor, because fclose on a
447b6cee71dSXin LI 	 * tmpfile will delete the file, but any open descriptors
448b6cee71dSXin LI 	 * can still access the phantom inode.
449b6cee71dSXin LI 	 */
450b6cee71dSXin LI 	if ((fd = dup2(tfd, fd)) == -1) {
451b6cee71dSXin LI 		file_error(ms, errno, "could not dup descriptor for temp file");
452b6cee71dSXin LI 		return -1;
453b6cee71dSXin LI 	}
454b6cee71dSXin LI 	(void)close(tfd);
455b6cee71dSXin LI 	if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
456b6cee71dSXin LI 		file_badseek(ms);
457b6cee71dSXin LI 		return -1;
458b6cee71dSXin LI 	}
459b6cee71dSXin LI 	return fd;
460b6cee71dSXin LI }
461b6cee71dSXin LI #if HAVE_FORK
462b6cee71dSXin LI #ifdef BUILTIN_DECOMPRESS
463b6cee71dSXin LI 
464b6cee71dSXin LI #define FHCRC		(1 << 1)
465b6cee71dSXin LI #define FEXTRA		(1 << 2)
466b6cee71dSXin LI #define FNAME		(1 << 3)
467b6cee71dSXin LI #define FCOMMENT	(1 << 4)
468b6cee71dSXin LI 
4693e41d09dSXin LI 
4703e41d09dSXin LI private int
4713e41d09dSXin LI uncompressgzipped(const unsigned char *old, unsigned char **newch,
4723e41d09dSXin LI     size_t bytes_max, size_t *n)
473b6cee71dSXin LI {
474b6cee71dSXin LI 	unsigned char flg = old[3];
475b6cee71dSXin LI 	size_t data_start = 10;
476b6cee71dSXin LI 
477b6cee71dSXin LI 	if (flg & FEXTRA) {
4783e41d09dSXin LI 		if (data_start + 1 >= *n)
4793e41d09dSXin LI 			goto err;
480b6cee71dSXin LI 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
481b6cee71dSXin LI 	}
482b6cee71dSXin LI 	if (flg & FNAME) {
4833e41d09dSXin LI 		while(data_start < *n && old[data_start])
484b6cee71dSXin LI 			data_start++;
485b6cee71dSXin LI 		data_start++;
486b6cee71dSXin LI 	}
487b6cee71dSXin LI 	if (flg & FCOMMENT) {
4883e41d09dSXin LI 		while(data_start < *n && old[data_start])
489b6cee71dSXin LI 			data_start++;
490b6cee71dSXin LI 		data_start++;
491b6cee71dSXin LI 	}
492b6cee71dSXin LI 	if (flg & FHCRC)
493b6cee71dSXin LI 		data_start += 2;
494b6cee71dSXin LI 
4953e41d09dSXin LI 	if (data_start >= *n)
4963e41d09dSXin LI 		goto err;
4973e41d09dSXin LI 
4983e41d09dSXin LI 	*n -= data_start;
4993e41d09dSXin LI 	old += data_start;
5003e41d09dSXin LI 	return uncompresszlib(old, newch, bytes_max, n, 0);
5013e41d09dSXin LI err:
5023e41d09dSXin LI 	return makeerror(newch, n, "File too short");
503b6cee71dSXin LI }
504b6cee71dSXin LI 
5053e41d09dSXin LI private int
5063e41d09dSXin LI uncompresszlib(const unsigned char *old, unsigned char **newch,
5073e41d09dSXin LI     size_t bytes_max, size_t *n, int zlib)
5083e41d09dSXin LI {
5093e41d09dSXin LI 	int rc;
5103e41d09dSXin LI 	z_stream z;
5113e41d09dSXin LI 
5123e41d09dSXin LI 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
5133e41d09dSXin LI 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
5143e41d09dSXin LI 
5153e41d09dSXin LI 	z.next_in = CCAST(Bytef *, old);
5163e41d09dSXin LI 	z.avail_in = CAST(uint32_t, *n);
517b6cee71dSXin LI 	z.next_out = *newch;
51840427ccaSGordon Tetlow 	z.avail_out = CAST(unsigned int, bytes_max);
519b6cee71dSXin LI 	z.zalloc = Z_NULL;
520b6cee71dSXin LI 	z.zfree = Z_NULL;
521b6cee71dSXin LI 	z.opaque = Z_NULL;
522b6cee71dSXin LI 
523b6cee71dSXin LI 	/* LINTED bug in header macro */
5243e41d09dSXin LI 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
5253e41d09dSXin LI 	if (rc != Z_OK)
5263e41d09dSXin LI 		goto err;
527b6cee71dSXin LI 
528b6cee71dSXin LI 	rc = inflate(&z, Z_SYNC_FLUSH);
5293e41d09dSXin LI 	if (rc != Z_OK && rc != Z_STREAM_END)
5303e41d09dSXin LI 		goto err;
531b6cee71dSXin LI 
5323e41d09dSXin LI 	*n = (size_t)z.total_out;
5333e41d09dSXin LI 	rc = inflateEnd(&z);
5343e41d09dSXin LI 	if (rc != Z_OK)
5353e41d09dSXin LI 		goto err;
536b6cee71dSXin LI 
537b6cee71dSXin LI 	/* let's keep the nul-terminate tradition */
5383e41d09dSXin LI 	(*newch)[*n] = '\0';
539b6cee71dSXin LI 
5403e41d09dSXin LI 	return OKDATA;
5413e41d09dSXin LI err:
542a5d223e6SXin LI 	strlcpy((char *)*newch, z.msg ? z.msg : zError(rc), bytes_max);
5433e41d09dSXin LI 	*n = strlen((char *)*newch);
5443e41d09dSXin LI 	return ERRDATA;
545b6cee71dSXin LI }
546b6cee71dSXin LI #endif
547b6cee71dSXin LI 
5483e41d09dSXin LI static int
5493e41d09dSXin LI makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
550b6cee71dSXin LI {
5513e41d09dSXin LI 	char *msg;
5523e41d09dSXin LI 	va_list ap;
5533e41d09dSXin LI 	int rv;
554b6cee71dSXin LI 
5553e41d09dSXin LI 	va_start(ap, fmt);
5563e41d09dSXin LI 	rv = vasprintf(&msg, fmt, ap);
5573e41d09dSXin LI 	va_end(ap);
5583e41d09dSXin LI 	if (rv < 0) {
5593e41d09dSXin LI 		*buf = NULL;
5603e41d09dSXin LI 		*len = 0;
561b6cee71dSXin LI 		return NODATA;
562b6cee71dSXin LI 	}
5633e41d09dSXin LI 	*buf = (unsigned char *)msg;
5643e41d09dSXin LI 	*len = strlen(msg);
5653e41d09dSXin LI 	return ERRDATA;
566b6cee71dSXin LI }
567b6cee71dSXin LI 
5683e41d09dSXin LI static void
5693e41d09dSXin LI closefd(int *fd, size_t i)
5703e41d09dSXin LI {
5713e41d09dSXin LI 	if (fd[i] == -1)
5723e41d09dSXin LI 		return;
5733e41d09dSXin LI 	(void) close(fd[i]);
5743e41d09dSXin LI 	fd[i] = -1;
5753e41d09dSXin LI }
576b6cee71dSXin LI 
5773e41d09dSXin LI static void
5783e41d09dSXin LI closep(int *fd)
5793e41d09dSXin LI {
5803e41d09dSXin LI 	size_t i;
5813e41d09dSXin LI 	for (i = 0; i < 2; i++)
5823e41d09dSXin LI 		closefd(fd, i);
5833e41d09dSXin LI }
5843e41d09dSXin LI 
5853e41d09dSXin LI static void
5863e41d09dSXin LI copydesc(int i, int *fd)
5873e41d09dSXin LI {
5883e41d09dSXin LI 	int j = fd[i == STDIN_FILENO ? 0 : 1];
5893e41d09dSXin LI 	if (j == i)
5903e41d09dSXin LI 		return;
5913e41d09dSXin LI 	if (dup2(j, i) == -1) {
5923e41d09dSXin LI 		DPRINTF("dup(%d, %d) failed (%s)\n", j, i, strerror(errno));
593b6cee71dSXin LI 		exit(1);
5943e41d09dSXin LI 	}
5953e41d09dSXin LI 	closep(fd);
5963e41d09dSXin LI }
597b6cee71dSXin LI 
5983e41d09dSXin LI static void
5993e41d09dSXin LI writechild(int fdp[3][2], const void *old, size_t n)
6003e41d09dSXin LI {
6013e41d09dSXin LI 	int status;
6023e41d09dSXin LI 
6033e41d09dSXin LI 	closefd(fdp[STDIN_FILENO], 0);
604b6cee71dSXin LI 	/*
605b6cee71dSXin LI 	 * fork again, to avoid blocking because both
606b6cee71dSXin LI 	 * pipes filled
607b6cee71dSXin LI 	 */
608b6cee71dSXin LI 	switch (fork()) {
609b6cee71dSXin LI 	case 0: /* child */
6103e41d09dSXin LI 		closefd(fdp[STDOUT_FILENO], 0);
6113e41d09dSXin LI 		if (swrite(fdp[STDIN_FILENO][1], old, n) != (ssize_t)n) {
6123e41d09dSXin LI 			DPRINTF("Write failed (%s)\n", strerror(errno));
613b6cee71dSXin LI 			exit(1);
614b6cee71dSXin LI 		}
615b6cee71dSXin LI 		exit(0);
616b6cee71dSXin LI 		/*NOTREACHED*/
617b6cee71dSXin LI 
618b6cee71dSXin LI 	case -1:
6193e41d09dSXin LI 		DPRINTF("Fork failed (%s)\n", strerror(errno));
620b6cee71dSXin LI 		exit(1);
621b6cee71dSXin LI 		/*NOTREACHED*/
622b6cee71dSXin LI 
623b6cee71dSXin LI 	default:  /* parent */
624c2931133SXin LI 		if (wait(&status) == -1) {
6253e41d09dSXin LI 			DPRINTF("Wait failed (%s)\n", strerror(errno));
626c2931133SXin LI 			exit(1);
627c2931133SXin LI 		}
6283e41d09dSXin LI 		DPRINTF("Grandchild wait return %#x\n", status);
629b6cee71dSXin LI 	}
6303e41d09dSXin LI 	closefd(fdp[STDIN_FILENO], 1);
631b6cee71dSXin LI }
632b6cee71dSXin LI 
6333e41d09dSXin LI static ssize_t
6343e41d09dSXin LI filter_error(unsigned char *ubuf, ssize_t n)
6353e41d09dSXin LI {
6363e41d09dSXin LI 	char *p;
6373e41d09dSXin LI 	char *buf;
638c2931133SXin LI 
6393e41d09dSXin LI 	ubuf[n] = '\0';
6403e41d09dSXin LI 	buf = (char *)ubuf;
6413e41d09dSXin LI 	while (isspace((unsigned char)*buf))
6423e41d09dSXin LI 		buf++;
6433e41d09dSXin LI 	DPRINTF("Filter error[[[%s]]]\n", buf);
6443e41d09dSXin LI 	if ((p = strchr((char *)buf, '\n')) != NULL)
6453e41d09dSXin LI 		*p = '\0';
6463e41d09dSXin LI 	if ((p = strchr((char *)buf, ';')) != NULL)
6473e41d09dSXin LI 		*p = '\0';
6483e41d09dSXin LI 	if ((p = strrchr((char *)buf, ':')) != NULL) {
6493e41d09dSXin LI 		++p;
6503e41d09dSXin LI 		while (isspace((unsigned char)*p))
6513e41d09dSXin LI 			p++;
6523e41d09dSXin LI 		n = strlen(p);
65340427ccaSGordon Tetlow 		memmove(ubuf, p, CAST(size_t, n + 1));
6543e41d09dSXin LI 	}
6553e41d09dSXin LI 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
6563e41d09dSXin LI 	if (islower(*ubuf))
6573e41d09dSXin LI 		*ubuf = toupper(*ubuf);
658b6cee71dSXin LI 	return n;
659b6cee71dSXin LI }
6603e41d09dSXin LI 
6613e41d09dSXin LI private const char *
6623e41d09dSXin LI methodname(size_t method)
6633e41d09dSXin LI {
6643e41d09dSXin LI #ifdef BUILTIN_DECOMPRESS
6653e41d09dSXin LI         /* FIXME: This doesn't cope with bzip2 */
6663e41d09dSXin LI 	if (method == 2 || compr[method].maglen == 0)
6673e41d09dSXin LI 	    return "zlib";
6683e41d09dSXin LI #endif
6693e41d09dSXin LI 	return compr[method].argv[0];
6703e41d09dSXin LI }
6713e41d09dSXin LI 
6723e41d09dSXin LI private int
6733e41d09dSXin LI uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
6743e41d09dSXin LI     unsigned char **newch, size_t* n)
6753e41d09dSXin LI {
6763e41d09dSXin LI 	int fdp[3][2];
6773e41d09dSXin LI 	int status, rv;
6783e41d09dSXin LI 	size_t i;
6793e41d09dSXin LI 	ssize_t r;
6803e41d09dSXin LI 
6813e41d09dSXin LI #ifdef BUILTIN_DECOMPRESS
6823e41d09dSXin LI         /* FIXME: This doesn't cope with bzip2 */
6833e41d09dSXin LI 	if (method == 2)
6843e41d09dSXin LI 		return uncompressgzipped(old, newch, bytes_max, n);
6853e41d09dSXin LI 	if (compr[method].maglen == 0)
6863e41d09dSXin LI 		return uncompresszlib(old, newch, bytes_max, n, 1);
6873e41d09dSXin LI #endif
6883e41d09dSXin LI 	(void)fflush(stdout);
6893e41d09dSXin LI 	(void)fflush(stderr);
6903e41d09dSXin LI 
6913e41d09dSXin LI 	for (i = 0; i < __arraycount(fdp); i++)
6923e41d09dSXin LI 		fdp[i][0] = fdp[i][1] = -1;
6933e41d09dSXin LI 
6943e41d09dSXin LI 	if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
6953e41d09dSXin LI 	    pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
6963e41d09dSXin LI 		closep(fdp[STDIN_FILENO]);
6973e41d09dSXin LI 		closep(fdp[STDOUT_FILENO]);
6983e41d09dSXin LI 		return makeerror(newch, n, "Cannot create pipe, %s",
6993e41d09dSXin LI 		    strerror(errno));
7003e41d09dSXin LI 	}
7013e41d09dSXin LI 	switch (fork()) {
7023e41d09dSXin LI 	case 0:	/* child */
7033e41d09dSXin LI 		if (fd != -1) {
7043e41d09dSXin LI 			fdp[STDIN_FILENO][0] = fd;
7053e41d09dSXin LI 			(void) lseek(fd, (off_t)0, SEEK_SET);
7063e41d09dSXin LI 		}
7073e41d09dSXin LI 
7083e41d09dSXin LI 		for (i = 0; i < __arraycount(fdp); i++)
70940427ccaSGordon Tetlow 			copydesc(CAST(int, i), fdp[i]);
7103e41d09dSXin LI 
7113e41d09dSXin LI 		(void)execvp(compr[method].argv[0],
7123e41d09dSXin LI 		    (char *const *)(intptr_t)compr[method].argv);
7133e41d09dSXin LI 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
7143e41d09dSXin LI 		    compr[method].argv[0], strerror(errno));
7153e41d09dSXin LI 		exit(1);
7163e41d09dSXin LI 		/*NOTREACHED*/
7173e41d09dSXin LI 	case -1:
7183e41d09dSXin LI 		return makeerror(newch, n, "Cannot fork, %s",
7193e41d09dSXin LI 		    strerror(errno));
7203e41d09dSXin LI 
7213e41d09dSXin LI 	default: /* parent */
7223e41d09dSXin LI 		for (i = 1; i < __arraycount(fdp); i++)
7233e41d09dSXin LI 			closefd(fdp[i], 1);
7243e41d09dSXin LI 
7253e41d09dSXin LI 		/* Write the buffer data to the child, if we don't have fd */
7263e41d09dSXin LI 		if (fd == -1)
7273e41d09dSXin LI 			writechild(fdp, old, *n);
7283e41d09dSXin LI 
7293e41d09dSXin LI 		*newch = CAST(unsigned char *, malloc(bytes_max + 1));
7303e41d09dSXin LI 		if (*newch == NULL) {
7313e41d09dSXin LI 			rv = makeerror(newch, n, "No buffer, %s",
7323e41d09dSXin LI 			    strerror(errno));
7333e41d09dSXin LI 			goto err;
7343e41d09dSXin LI 		}
7353e41d09dSXin LI 		rv = OKDATA;
7363e41d09dSXin LI 		if ((r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0)) > 0)
7373e41d09dSXin LI 			break;
7383e41d09dSXin LI 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
7393e41d09dSXin LI 		    r != -1 ? strerror(errno) : "no data");
7403e41d09dSXin LI 
7413e41d09dSXin LI 		rv = ERRDATA;
7423e41d09dSXin LI 		if (r == 0 &&
7433e41d09dSXin LI 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
7443e41d09dSXin LI 		{
7453e41d09dSXin LI 			r = filter_error(*newch, r);
7463e41d09dSXin LI 			break;
7473e41d09dSXin LI 		}
7483e41d09dSXin LI 		free(*newch);
7493e41d09dSXin LI 		if  (r == 0)
7503e41d09dSXin LI 			rv = makeerror(newch, n, "Read failed, %s",
7513e41d09dSXin LI 			    strerror(errno));
7523e41d09dSXin LI 		else
7533e41d09dSXin LI 			rv = makeerror(newch, n, "No data");
7543e41d09dSXin LI 		goto err;
7553e41d09dSXin LI 	}
7563e41d09dSXin LI 
7573e41d09dSXin LI 	*n = r;
7583e41d09dSXin LI 	/* NUL terminate, as every buffer is handled here. */
7593e41d09dSXin LI 	(*newch)[*n] = '\0';
7603e41d09dSXin LI err:
7613e41d09dSXin LI 	closefd(fdp[STDIN_FILENO], 1);
7623e41d09dSXin LI 	closefd(fdp[STDOUT_FILENO], 0);
7633e41d09dSXin LI 	closefd(fdp[STDERR_FILENO], 0);
7643e41d09dSXin LI 	if (wait(&status) == -1) {
7653e41d09dSXin LI 		free(*newch);
7663e41d09dSXin LI 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
7673e41d09dSXin LI 		DPRINTF("Child wait return %#x\n", status);
7683e41d09dSXin LI 	} else if (!WIFEXITED(status)) {
76940427ccaSGordon Tetlow 		DPRINTF("Child not exited (%#x)\n", status);
7703e41d09dSXin LI 	} else if (WEXITSTATUS(status) != 0) {
77140427ccaSGordon Tetlow 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
7723e41d09dSXin LI 	}
7733e41d09dSXin LI 
7743e41d09dSXin LI 	closefd(fdp[STDIN_FILENO], 0);
7753e41d09dSXin LI 	DPRINTF("Returning %p n=%zu rv=%d\n", *newch, *n, rv);
7763e41d09dSXin LI 
7773e41d09dSXin LI 	return rv;
778b6cee71dSXin LI }
779b6cee71dSXin LI #endif
780