xref: /freebsd/contrib/file/src/compress.c (revision d4eeb02986980bf33dd56c41ceb9fc5f180c0d47)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.135 2022/04/11 18:14:41 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <string.h>
50 #include <errno.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <signal.h>
54 #ifndef HAVE_SIG_T
55 typedef void (*sig_t)(int);
56 #endif /* HAVE_SIG_T */
57 #ifdef HAVE_SYS_IOCTL_H
58 #include <sys/ioctl.h>
59 #endif
60 #ifdef HAVE_SYS_WAIT_H
61 #include <sys/wait.h>
62 #endif
63 #if defined(HAVE_SYS_TIME_H)
64 #include <sys/time.h>
65 #endif
66 
67 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
68 #define BUILTIN_DECOMPRESS
69 #include <zlib.h>
70 #endif
71 
72 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
73 #define BUILTIN_BZLIB
74 #include <bzlib.h>
75 #endif
76 
77 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
78 #define BUILTIN_XZLIB
79 #include <lzma.h>
80 #endif
81 
82 #ifdef DEBUG
83 int tty = -1;
84 #define DPRINTF(...)	do { \
85 	if (tty == -1) \
86 		tty = open("/dev/tty", O_RDWR); \
87 	if (tty == -1) \
88 		abort(); \
89 	dprintf(tty, __VA_ARGS__); \
90 } while (/*CONSTCOND*/0)
91 #else
92 #define DPRINTF(...)
93 #endif
94 
95 #ifdef ZLIBSUPPORT
96 /*
97  * The following python code is not really used because ZLIBSUPPORT is only
98  * defined if we have a built-in zlib, and the built-in zlib handles that.
99  * That is not true for android where we have zlib.h and not -lz.
100  */
101 static const char zlibcode[] =
102     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
103 
104 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
105 
106 static int
107 zlibcmp(const unsigned char *buf)
108 {
109 	unsigned short x = 1;
110 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
111 
112 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
113 		return 0;
114 	if (s[0] != 1)	/* endianness test */
115 		x = buf[0] | (buf[1] << 8);
116 	else
117 		x = buf[1] | (buf[0] << 8);
118 	if (x % 31)
119 		return 0;
120 	return 1;
121 }
122 #endif
123 
124 static int
125 lzmacmp(const unsigned char *buf)
126 {
127 	if (buf[0] != 0x5d || buf[1] || buf[2])
128 		return 0;
129 	if (buf[12] && buf[12] != 0xff)
130 		return 0;
131 	return 1;
132 }
133 
134 #define gzip_flags "-cd"
135 #define lrzip_flags "-do"
136 #define lzip_flags gzip_flags
137 
138 static const char *gzip_args[] = {
139 	"gzip", gzip_flags, NULL
140 };
141 static const char *uncompress_args[] = {
142 	"uncompress", "-c", NULL
143 };
144 static const char *bzip2_args[] = {
145 	"bzip2", "-cd", NULL
146 };
147 static const char *lzip_args[] = {
148 	"lzip", lzip_flags, NULL
149 };
150 static const char *xz_args[] = {
151 	"xz", "-cd", NULL
152 };
153 static const char *lrzip_args[] = {
154 	"lrzip", lrzip_flags, NULL
155 };
156 static const char *lz4_args[] = {
157 	"lz4", "-cd", NULL
158 };
159 static const char *zstd_args[] = {
160 	"zstd", "-cd", NULL
161 };
162 
163 #define	do_zlib		NULL
164 #define	do_bzlib	NULL
165 
166 private const struct {
167 	union {
168 		const char *magic;
169 		int (*func)(const unsigned char *);
170 	} u;
171 	int maglen;
172 	const char **argv;
173 	void *unused;
174 } compr[] = {
175 #define METH_FROZEN	2
176 #define METH_BZIP	7
177 #define METH_XZ		9
178 #define METH_LZMA	13
179 #define METH_ZLIB	14
180     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
181     /* Uncompress can get stuck; so use gzip first if we have it
182      * Idea from Damien Clark, thanks! */
183     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
184     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
185     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
186     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
187     /* the standard pack utilities do not accept standard input */
188     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
189     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
190     /* ...only first file examined */
191     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
192     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
193     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
194     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
195     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
196     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
197     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
198 #ifdef ZLIBSUPPORT
199     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
200 #endif
201 };
202 
203 #define OKDATA 	0
204 #define NODATA	1
205 #define ERRDATA	2
206 
207 private ssize_t swrite(int, const void *, size_t);
208 #if HAVE_FORK
209 private size_t ncompr = __arraycount(compr);
210 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
211     unsigned char **, size_t *);
212 #ifdef BUILTIN_DECOMPRESS
213 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
214     size_t *, int);
215 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
216     size_t *);
217 #endif
218 #ifdef BUILTIN_BZLIB
219 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
220     size_t *);
221 #endif
222 #ifdef BUILTIN_XZLIB
223 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
224     size_t *);
225 #endif
226 
227 static int makeerror(unsigned char **, size_t *, const char *, ...)
228     __attribute__((__format__(__printf__, 3, 4)));
229 private const char *methodname(size_t);
230 
231 private int
232 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
233 {
234 	unsigned char *p;
235 	int mime = ms->flags & MAGIC_MIME;
236 
237 	if (!mime)
238 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
239 
240 	for (p = buf; *p; p++)
241 		if (!isalnum(*p))
242 			*p = '-';
243 
244 	return file_printf(ms, "application/x-decompression-error-%s-%s",
245 	    methodname(i), buf);
246 }
247 
248 protected int
249 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
250 {
251 	unsigned char *newbuf = NULL;
252 	size_t i, nsz;
253 	char *rbuf;
254 	file_pushbuf_t *pb;
255 	int urv, prv, rv = 0;
256 	int mime = ms->flags & MAGIC_MIME;
257 	int fd = b->fd;
258 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
259 	size_t nbytes = b->flen;
260 	int sa_saved = 0;
261 	struct sigaction sig_act;
262 
263 	if ((ms->flags & MAGIC_COMPRESS) == 0)
264 		return 0;
265 
266 	for (i = 0; i < ncompr; i++) {
267 		int zm;
268 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
269 			continue;
270 		if (compr[i].maglen < 0) {
271 			zm = (*compr[i].u.func)(buf);
272 		} else {
273 			zm = memcmp(buf, compr[i].u.magic,
274 			    CAST(size_t, compr[i].maglen)) == 0;
275 		}
276 
277 		if (!zm)
278 			continue;
279 
280 		/* Prevent SIGPIPE death if child dies unexpectedly */
281 		if (!sa_saved) {
282 			//We can use sig_act for both new and old, but
283 			struct sigaction new_act;
284 			memset(&new_act, 0, sizeof(new_act));
285 			new_act.sa_handler = SIG_IGN;
286 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
287 		}
288 
289 		nsz = nbytes;
290 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
291 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
292 		    (char *)newbuf, nsz);
293 		switch (urv) {
294 		case OKDATA:
295 		case ERRDATA:
296 			ms->flags &= ~MAGIC_COMPRESS;
297 			if (urv == ERRDATA)
298 				prv = format_decompression_error(ms, i, newbuf);
299 			else
300 				prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
301 			if (prv == -1)
302 				goto error;
303 			rv = 1;
304 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
305 				goto out;
306 			if (mime != MAGIC_MIME && mime != 0)
307 				goto out;
308 			if ((file_printf(ms,
309 			    mime ? " compressed-encoding=" : " (")) == -1)
310 				goto error;
311 			if ((pb = file_push_buffer(ms)) == NULL)
312 				goto error;
313 			/*
314 			 * XXX: If file_buffer fails here, we overwrite
315 			 * the compressed text. FIXME.
316 			 */
317 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
318 				if (file_pop_buffer(ms, pb) != NULL)
319 					abort();
320 				goto error;
321 			}
322 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
323 				if (file_printf(ms, "%s", rbuf) == -1) {
324 					free(rbuf);
325 					goto error;
326 				}
327 				free(rbuf);
328 			}
329 			if (!mime && file_printf(ms, ")") == -1)
330 				goto error;
331 			/*FALLTHROUGH*/
332 		case NODATA:
333 			break;
334 		default:
335 			abort();
336 			/*NOTREACHED*/
337 		error:
338 			rv = -1;
339 			break;
340 		}
341 	}
342 out:
343 	DPRINTF("rv = %d\n", rv);
344 
345 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
346 		(void)sigaction(SIGPIPE, &sig_act, NULL);
347 
348 	free(newbuf);
349 	ms->flags |= MAGIC_COMPRESS;
350 	DPRINTF("Zmagic returns %d\n", rv);
351 	return rv;
352 }
353 #endif
354 /*
355  * `safe' write for sockets and pipes.
356  */
357 private ssize_t
358 swrite(int fd, const void *buf, size_t n)
359 {
360 	ssize_t rv;
361 	size_t rn = n;
362 
363 	do
364 		switch (rv = write(fd, buf, n)) {
365 		case -1:
366 			if (errno == EINTR)
367 				continue;
368 			return -1;
369 		default:
370 			n -= rv;
371 			buf = CAST(const char *, buf) + rv;
372 			break;
373 		}
374 	while (n > 0);
375 	return rn;
376 }
377 
378 
379 /*
380  * `safe' read for sockets and pipes.
381  */
382 protected ssize_t
383 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
384 {
385 	ssize_t rv;
386 #ifdef FIONREAD
387 	int t = 0;
388 #endif
389 	size_t rn = n;
390 
391 	if (fd == STDIN_FILENO)
392 		goto nocheck;
393 
394 #ifdef FIONREAD
395 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
396 #ifdef FD_ZERO
397 		ssize_t cnt;
398 		for (cnt = 0;; cnt++) {
399 			fd_set check;
400 			struct timeval tout = {0, 100 * 1000};
401 			int selrv;
402 
403 			FD_ZERO(&check);
404 			FD_SET(fd, &check);
405 
406 			/*
407 			 * Avoid soft deadlock: do not read if there
408 			 * is nothing to read from sockets and pipes.
409 			 */
410 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
411 			if (selrv == -1) {
412 				if (errno == EINTR || errno == EAGAIN)
413 					continue;
414 			} else if (selrv == 0 && cnt >= 5) {
415 				return 0;
416 			} else
417 				break;
418 		}
419 #endif
420 		(void)ioctl(fd, FIONREAD, &t);
421 	}
422 
423 	if (t > 0 && CAST(size_t, t) < n) {
424 		n = t;
425 		rn = n;
426 	}
427 #endif
428 
429 nocheck:
430 	do
431 		switch ((rv = read(fd, buf, n))) {
432 		case -1:
433 			if (errno == EINTR)
434 				continue;
435 			return -1;
436 		case 0:
437 			return rn - n;
438 		default:
439 			n -= rv;
440 			buf = CAST(char *, CCAST(void *, buf)) + rv;
441 			break;
442 		}
443 	while (n > 0);
444 	return rn;
445 }
446 
447 protected int
448 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
449     size_t nbytes)
450 {
451 	char buf[4096];
452 	ssize_t r;
453 	int tfd;
454 
455 #ifdef WIN32
456 	const char *t;
457 	buf[0] = '\0';
458 	if ((t = getenv("TEMP")) != NULL)
459 		(void)strlcpy(buf, t, sizeof(buf));
460 	else if ((t = getenv("TMP")) != NULL)
461 		(void)strlcpy(buf, t, sizeof(buf));
462 	else if ((t = getenv("TMPDIR")) != NULL)
463 		(void)strlcpy(buf, t, sizeof(buf));
464 	if (buf[0] != '\0')
465 		(void)strlcat(buf, "/", sizeof(buf));
466 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
467 #else
468 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
469 #endif
470 #ifndef HAVE_MKSTEMP
471 	{
472 		char *ptr = mktemp(buf);
473 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
474 		r = errno;
475 		(void)unlink(ptr);
476 		errno = r;
477 	}
478 #else
479 	{
480 		int te;
481 		mode_t ou = umask(0);
482 		tfd = mkstemp(buf);
483 		(void)umask(ou);
484 		te = errno;
485 		(void)unlink(buf);
486 		errno = te;
487 	}
488 #endif
489 	if (tfd == -1) {
490 		file_error(ms, errno,
491 		    "cannot create temporary file for pipe copy");
492 		return -1;
493 	}
494 
495 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
496 		r = 1;
497 	else {
498 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
499 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
500 				break;
501 	}
502 
503 	switch (r) {
504 	case -1:
505 		file_error(ms, errno, "error copying from pipe to temp file");
506 		return -1;
507 	case 0:
508 		break;
509 	default:
510 		file_error(ms, errno, "error while writing to temp file");
511 		return -1;
512 	}
513 
514 	/*
515 	 * We duplicate the file descriptor, because fclose on a
516 	 * tmpfile will delete the file, but any open descriptors
517 	 * can still access the phantom inode.
518 	 */
519 	if ((fd = dup2(tfd, fd)) == -1) {
520 		file_error(ms, errno, "could not dup descriptor for temp file");
521 		return -1;
522 	}
523 	(void)close(tfd);
524 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
525 		file_badseek(ms);
526 		return -1;
527 	}
528 	return fd;
529 }
530 #if HAVE_FORK
531 #ifdef BUILTIN_DECOMPRESS
532 
533 #define FHCRC		(1 << 1)
534 #define FEXTRA		(1 << 2)
535 #define FNAME		(1 << 3)
536 #define FCOMMENT	(1 << 4)
537 
538 
539 private int
540 uncompressgzipped(const unsigned char *old, unsigned char **newch,
541     size_t bytes_max, size_t *n)
542 {
543 	unsigned char flg = old[3];
544 	size_t data_start = 10;
545 
546 	if (flg & FEXTRA) {
547 		if (data_start + 1 >= *n)
548 			goto err;
549 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
550 	}
551 	if (flg & FNAME) {
552 		while(data_start < *n && old[data_start])
553 			data_start++;
554 		data_start++;
555 	}
556 	if (flg & FCOMMENT) {
557 		while(data_start < *n && old[data_start])
558 			data_start++;
559 		data_start++;
560 	}
561 	if (flg & FHCRC)
562 		data_start += 2;
563 
564 	if (data_start >= *n)
565 		goto err;
566 
567 	*n -= data_start;
568 	old += data_start;
569 	return uncompresszlib(old, newch, bytes_max, n, 0);
570 err:
571 	return makeerror(newch, n, "File too short");
572 }
573 
574 private int
575 uncompresszlib(const unsigned char *old, unsigned char **newch,
576     size_t bytes_max, size_t *n, int zlib)
577 {
578 	int rc;
579 	z_stream z;
580 
581 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
582 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
583 
584 	z.next_in = CCAST(Bytef *, old);
585 	z.avail_in = CAST(uint32_t, *n);
586 	z.next_out = *newch;
587 	z.avail_out = CAST(unsigned int, bytes_max);
588 	z.zalloc = Z_NULL;
589 	z.zfree = Z_NULL;
590 	z.opaque = Z_NULL;
591 
592 	/* LINTED bug in header macro */
593 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
594 	if (rc != Z_OK)
595 		goto err;
596 
597 	rc = inflate(&z, Z_SYNC_FLUSH);
598 	if (rc != Z_OK && rc != Z_STREAM_END)
599 		goto err;
600 
601 	*n = CAST(size_t, z.total_out);
602 	rc = inflateEnd(&z);
603 	if (rc != Z_OK)
604 		goto err;
605 
606 	/* let's keep the nul-terminate tradition */
607 	(*newch)[*n] = '\0';
608 
609 	return OKDATA;
610 err:
611 	strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
612 	*n = strlen(RCAST(char *, *newch));
613 	return ERRDATA;
614 }
615 #endif
616 
617 #ifdef BUILTIN_BZLIB
618 private int
619 uncompressbzlib(const unsigned char *old, unsigned char **newch,
620     size_t bytes_max, size_t *n)
621 {
622 	int rc;
623 	bz_stream bz;
624 
625 	memset(&bz, 0, sizeof(bz));
626 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
627 	if (rc != BZ_OK)
628 		goto err;
629 
630 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
631 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
632 
633 	bz.next_in = CCAST(char *, RCAST(const char *, old));
634 	bz.avail_in = CAST(uint32_t, *n);
635 	bz.next_out = RCAST(char *, *newch);
636 	bz.avail_out = CAST(unsigned int, bytes_max);
637 
638 	rc = BZ2_bzDecompress(&bz);
639 	if (rc != BZ_OK && rc != BZ_STREAM_END)
640 		goto err;
641 
642 	/* Assume byte_max is within 32bit */
643 	/* assert(bz.total_out_hi32 == 0); */
644 	*n = CAST(size_t, bz.total_out_lo32);
645 	rc = BZ2_bzDecompressEnd(&bz);
646 	if (rc != BZ_OK)
647 		goto err;
648 
649 	/* let's keep the nul-terminate tradition */
650 	(*newch)[*n] = '\0';
651 
652 	return OKDATA;
653 err:
654 	snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
655 	*n = strlen(RCAST(char *, *newch));
656 	return ERRDATA;
657 }
658 #endif
659 
660 #ifdef BUILTIN_XZLIB
661 private int
662 uncompressxzlib(const unsigned char *old, unsigned char **newch,
663     size_t bytes_max, size_t *n)
664 {
665 	int rc;
666 	lzma_stream xz;
667 
668 	memset(&xz, 0, sizeof(xz));
669 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
670 	if (rc != LZMA_OK)
671 		goto err;
672 
673 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
674 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
675 
676 	xz.next_in = CCAST(const uint8_t *, old);
677 	xz.avail_in = CAST(uint32_t, *n);
678 	xz.next_out = RCAST(uint8_t *, *newch);
679 	xz.avail_out = CAST(unsigned int, bytes_max);
680 
681 	rc = lzma_code(&xz, LZMA_RUN);
682 	if (rc != LZMA_OK && rc != LZMA_STREAM_END)
683 		goto err;
684 
685 	*n = CAST(size_t, xz.total_out);
686 
687 	lzma_end(&xz);
688 
689 	/* let's keep the nul-terminate tradition */
690 	(*newch)[*n] = '\0';
691 
692 	return OKDATA;
693 err:
694 	snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
695 	*n = strlen(RCAST(char *, *newch));
696 	return ERRDATA;
697 }
698 #endif
699 
700 
701 static int
702 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
703 {
704 	char *msg;
705 	va_list ap;
706 	int rv;
707 
708 	va_start(ap, fmt);
709 	rv = vasprintf(&msg, fmt, ap);
710 	va_end(ap);
711 	if (rv < 0) {
712 		*buf = NULL;
713 		*len = 0;
714 		return NODATA;
715 	}
716 	*buf = RCAST(unsigned char *, msg);
717 	*len = strlen(msg);
718 	return ERRDATA;
719 }
720 
721 static void
722 closefd(int *fd, size_t i)
723 {
724 	if (fd[i] == -1)
725 		return;
726 	(void) close(fd[i]);
727 	fd[i] = -1;
728 }
729 
730 static void
731 closep(int *fd)
732 {
733 	size_t i;
734 	for (i = 0; i < 2; i++)
735 		closefd(fd, i);
736 }
737 
738 static void
739 movedesc(void *v, int i, int fd)
740 {
741 	if (fd == i)
742 		return; /* "no dup was necessary" */
743 #ifdef HAVE_POSIX_SPAWNP
744 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
745 	posix_spawn_file_actions_adddup2(fa, fd, i);
746 	posix_spawn_file_actions_addclose(fa, fd);
747 #else
748 	if (dup2(fd, i) == -1) {
749 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
750 		exit(1);
751 	}
752 	close(v ? fd : fd);
753 #endif
754 }
755 
756 static void
757 closedesc(void *v, int fd)
758 {
759 #ifdef HAVE_POSIX_SPAWNP
760 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
761 	posix_spawn_file_actions_addclose(fa, fd);
762 #else
763 	close(v ? fd : fd);
764 #endif
765 }
766 
767 static void
768 handledesc(void *v, int fd, int fdp[3][2])
769 {
770 	if (fd != -1) {
771 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
772 		movedesc(v, STDIN_FILENO, fd);
773 	} else {
774 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
775 		if (fdp[STDIN_FILENO][1] > 2)
776 		    closedesc(v, fdp[STDIN_FILENO][1]);
777 	}
778 
779 	file_clear_closexec(STDIN_FILENO);
780 
781 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
782 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
783 	if (fdp[STDOUT_FILENO][0] > 2)
784 		closedesc(v, fdp[STDOUT_FILENO][0]);
785 
786 	file_clear_closexec(STDOUT_FILENO);
787 
788 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
789 	if (fdp[STDERR_FILENO][0] > 2)
790 		closedesc(v, fdp[STDERR_FILENO][0]);
791 
792 	file_clear_closexec(STDERR_FILENO);
793 }
794 
795 static pid_t
796 writechild(int fd, const void *old, size_t n)
797 {
798 	pid_t pid;
799 
800 	/*
801 	 * fork again, to avoid blocking because both
802 	 * pipes filled
803 	 */
804 	pid = fork();
805 	if (pid == -1) {
806 		DPRINTF("Fork failed (%s)\n", strerror(errno));
807 		exit(1);
808 	}
809 	if (pid == 0) {
810 		/* child */
811 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
812 			DPRINTF("Write failed (%s)\n", strerror(errno));
813 			exit(1);
814 		}
815 		exit(0);
816 	}
817 	/* parent */
818 	return pid;
819 }
820 
821 static ssize_t
822 filter_error(unsigned char *ubuf, ssize_t n)
823 {
824 	char *p;
825 	char *buf;
826 
827 	ubuf[n] = '\0';
828 	buf = RCAST(char *, ubuf);
829 	while (isspace(CAST(unsigned char, *buf)))
830 		buf++;
831 	DPRINTF("Filter error[[[%s]]]\n", buf);
832 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
833 		*p = '\0';
834 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
835 		*p = '\0';
836 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
837 		++p;
838 		while (isspace(CAST(unsigned char, *p)))
839 			p++;
840 		n = strlen(p);
841 		memmove(ubuf, p, CAST(size_t, n + 1));
842 	}
843 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
844 	if (islower(*ubuf))
845 		*ubuf = toupper(*ubuf);
846 	return n;
847 }
848 
849 private const char *
850 methodname(size_t method)
851 {
852 	switch (method) {
853 #ifdef BUILTIN_DECOMPRESS
854 	case METH_FROZEN:
855 	case METH_ZLIB:
856 		return "zlib";
857 #endif
858 #ifdef BUILTIN_BZLIB
859 	case METH_BZIP:
860 		return "bzlib";
861 #endif
862 #ifdef BUILTIN_XZLIB
863 	case METH_XZ:
864 	case METH_LZMA:
865 		return "xzlib";
866 #endif
867 	default:
868 		return compr[method].argv[0];
869 	}
870 }
871 
872 private int
873 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
874     unsigned char **newch, size_t* n)
875 {
876 	int fdp[3][2];
877 	int status, rv, w;
878 	pid_t pid;
879 	pid_t writepid = -1;
880 	size_t i;
881 	ssize_t r;
882 	char *const *args;
883 #ifdef HAVE_POSIX_SPAWNP
884 	posix_spawn_file_actions_t fa;
885 #endif
886 
887 	switch (method) {
888 #ifdef BUILTIN_DECOMPRESS
889 	case METH_FROZEN:
890 		return uncompressgzipped(old, newch, bytes_max, n);
891 	case METH_ZLIB:
892 		return uncompresszlib(old, newch, bytes_max, n, 1);
893 #endif
894 #ifdef BUILTIN_BZLIB
895 	case METH_BZIP:
896 		return uncompressbzlib(old, newch, bytes_max, n);
897 #endif
898 #ifdef BUILTIN_XZLIB
899 	case METH_XZ:
900 	case METH_LZMA:
901 		return uncompressxzlib(old, newch, bytes_max, n);
902 #endif
903 	default:
904 		break;
905 	}
906 
907 	(void)fflush(stdout);
908 	(void)fflush(stderr);
909 
910 	for (i = 0; i < __arraycount(fdp); i++)
911 		fdp[i][0] = fdp[i][1] = -1;
912 
913 	/*
914 	 * There are multithreaded users who run magic_file()
915 	 * from dozens of threads. If two parallel magic_file() calls
916 	 * analyze two large compressed files, both will spawn
917 	 * an uncompressing child here, which writes out uncompressed data.
918 	 * We read some portion, then close the pipe, then waitpid() the child.
919 	 * If uncompressed data is larger, child shound get EPIPE and exit.
920 	 * However, with *parallel* calls OTHER child may unintentionally
921 	 * inherit pipe fds, thus keeping pipe open and making writes in
922 	 * our child block instead of failing with EPIPE!
923 	 * (For the bug to occur, two threads must mutually inherit their pipes,
924 	 * and both must have large outputs. Thus it happens not that often).
925 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
926 	 */
927 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
928 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
929 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
930 		closep(fdp[STDIN_FILENO]);
931 		closep(fdp[STDOUT_FILENO]);
932 		return makeerror(newch, n, "Cannot create pipe, %s",
933 		    strerror(errno));
934 	}
935 
936 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
937 #ifdef HAVE_POSIX_SPAWNP
938 	posix_spawn_file_actions_init(&fa);
939 
940 	handledesc(&fa, fd, fdp);
941 
942 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
943 	    args, NULL);
944 
945 	posix_spawn_file_actions_destroy(&fa);
946 
947 	if (status == -1) {
948 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
949 		    compr[method].argv[0], strerror(errno));
950 	}
951 #else
952 	/* For processes with large mapped virtual sizes, vfork
953 	 * may be _much_ faster (10-100 times) than fork.
954 	 */
955 	pid = vfork();
956 	if (pid == -1) {
957 		return makeerror(newch, n, "Cannot vfork, %s",
958 		    strerror(errno));
959 	}
960 	if (pid == 0) {
961 		/* child */
962 		/* Note: we are after vfork, do not modify memory
963 		 * in a way which confuses parent. In particular,
964 		 * do not modify fdp[i][j].
965 		 */
966 		handledesc(NULL, fd, fdp);
967 
968 		(void)execvp(compr[method].argv[0], args);
969 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
970 		    compr[method].argv[0], strerror(errno));
971 		_exit(1); /* _exit(), not exit(), because of vfork */
972 	}
973 #endif
974 	/* parent */
975 	/* Close write sides of child stdout/err pipes */
976 	for (i = 1; i < __arraycount(fdp); i++)
977 		closefd(fdp[i], 1);
978 	/* Write the buffer data to child stdin, if we don't have fd */
979 	if (fd == -1) {
980 		closefd(fdp[STDIN_FILENO], 0);
981 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
982 		closefd(fdp[STDIN_FILENO], 1);
983 	}
984 
985 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
986 	if (*newch == NULL) {
987 		rv = makeerror(newch, n, "No buffer, %s",
988 		    strerror(errno));
989 		goto err;
990 	}
991 	rv = OKDATA;
992 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
993 	if (r <= 0) {
994 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
995 		    r != -1 ? strerror(errno) : "no data");
996 
997 		rv = ERRDATA;
998 		if (r == 0 &&
999 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
1000 		{
1001 			r = filter_error(*newch, r);
1002 			goto ok;
1003 		}
1004 		free(*newch);
1005 		if  (r == 0)
1006 			rv = makeerror(newch, n, "Read failed, %s",
1007 			    strerror(errno));
1008 		else
1009 			rv = makeerror(newch, n, "No data");
1010 		goto err;
1011 	}
1012 ok:
1013 	*n = r;
1014 	/* NUL terminate, as every buffer is handled here. */
1015 	(*newch)[*n] = '\0';
1016 err:
1017 	closefd(fdp[STDIN_FILENO], 1);
1018 	closefd(fdp[STDOUT_FILENO], 0);
1019 	closefd(fdp[STDERR_FILENO], 0);
1020 
1021 	w = waitpid(pid, &status, 0);
1022 wait_err:
1023 	if (w == -1) {
1024 		free(*newch);
1025 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1026 		DPRINTF("Child wait return %#x\n", status);
1027 	} else if (!WIFEXITED(status)) {
1028 		DPRINTF("Child not exited (%#x)\n", status);
1029 	} else if (WEXITSTATUS(status) != 0) {
1030 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1031 	}
1032 	if (writepid > 0) {
1033 		/* _After_ we know decompressor has exited, our input writer
1034 		 * definitely will exit now (at worst, writing fails in it,
1035 		 * since output fd is closed now on the reading size).
1036 		 */
1037 		w = waitpid(writepid, &status, 0);
1038 		writepid = -1;
1039 		goto wait_err;
1040 	}
1041 
1042 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1043 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1044 
1045 	return rv;
1046 }
1047 #endif
1048