xref: /freebsd/contrib/file/src/compress.c (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.158 2024/11/10 16:52:27 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <stdio.h>
50 #include <string.h>
51 #include <errno.h>
52 #include <ctype.h>
53 #include <stdarg.h>
54 #include <signal.h>
55 #ifndef HAVE_SIG_T
56 typedef void (*sig_t)(int);
57 #endif /* HAVE_SIG_T */
58 #ifdef HAVE_SYS_IOCTL_H
59 #include <sys/ioctl.h>
60 #endif
61 #ifdef HAVE_SYS_WAIT_H
62 #include <sys/wait.h>
63 #endif
64 #if defined(HAVE_SYS_TIME_H)
65 #include <sys/time.h>
66 #endif
67 
68 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
69 #define BUILTIN_DECOMPRESS
70 #include <zlib.h>
71 #endif
72 
73 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
74 #define BUILTIN_BZLIB
75 #include <bzlib.h>
76 #endif
77 
78 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
79 #define BUILTIN_XZLIB
80 #include <lzma.h>
81 #endif
82 
83 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
84 #define BUILTIN_ZSTDLIB
85 #include <zstd.h>
86 #include <zstd_errors.h>
87 #endif
88 
89 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
90 #define BUILTIN_LZLIB
91 #include <lzlib.h>
92 #endif
93 
94 #ifdef notyet
95 #if defined(HAVE_LRZIP_H) && defined(LRZIPLIBSUPPORT)
96 #define BUILTIN_LRZIP
97 #include <Lrzip.h>
98 #endif
99 #endif
100 
101 #ifdef DEBUG
102 int tty = -1;
103 #define DPRINTF(...)	do { \
104 	if (tty == -1) \
105 		tty = open("/dev/tty", O_RDWR); \
106 	if (tty == -1) \
107 		abort(); \
108 	dprintf(tty, __VA_ARGS__); \
109 } while (/*CONSTCOND*/0)
110 #else
111 #define DPRINTF(...)
112 #endif
113 
114 #ifdef ZLIBSUPPORT
115 /*
116  * The following python code is not really used because ZLIBSUPPORT is only
117  * defined if we have a built-in zlib, and the built-in zlib handles that.
118  * That is not true for android where we have zlib.h and not -lz.
119  */
120 static const char zlibcode[] =
121     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
122 
123 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
124 
125 static int
126 zlibcmp(const unsigned char *buf)
127 {
128 	unsigned short x = 1;
129 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
130 
131 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
132 		return 0;
133 	if (s[0] != 1)	/* endianness test */
134 		x = buf[0] | (buf[1] << 8);
135 	else
136 		x = buf[1] | (buf[0] << 8);
137 	if (x % 31)
138 		return 0;
139 	return 1;
140 }
141 #endif
142 
143 static int
144 lzmacmp(const unsigned char *buf)
145 {
146 	if (buf[0] != 0x5d || buf[1] || buf[2])
147 		return 0;
148 	if (buf[12] && buf[12] != 0xff)
149 		return 0;
150 	return 1;
151 }
152 
153 #define gzip_flags "-cd"
154 #define lzip_flags gzip_flags
155 
156 static const char *gzip_args[] = {
157 	"gzip", gzip_flags, NULL
158 };
159 static const char *uncompress_args[] = {
160 	"uncompress", "-c", NULL
161 };
162 static const char *bzip2_args[] = {
163 	"bzip2", "-cd", NULL
164 };
165 static const char *lzip_args[] = {
166 	"lzip", lzip_flags, NULL
167 };
168 static const char *xz_args[] = {
169 	"xz", "-cd", NULL
170 };
171 static const char *lrzip_args[] = {
172 	"lrzip", "-qdf", "-", NULL
173 };
174 static const char *lz4_args[] = {
175 	"lz4", "-cd", NULL
176 };
177 static const char *zstd_args[] = {
178 	"zstd", "-cd", NULL
179 };
180 
181 #define	do_zlib		NULL
182 #define	do_bzlib	NULL
183 
184 file_private const struct {
185 	union {
186 		const char *magic;
187 		int (*func)(const unsigned char *);
188 	} u;
189 	int maglen;
190 	const char **argv;
191 	void *unused;
192 } compr[] = {
193 #define METH_FROZEN	2
194 #define METH_BZIP	7
195 #define METH_XZ		9
196 #define METH_LZIP	8
197 #define METH_LRZIP	10
198 #define METH_ZSTD	12
199 #define METH_LZMA	13
200 #define METH_ZLIB	14
201     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
202     /* Uncompress can get stuck; so use gzip first if we have it
203      * Idea from Damien Clark, thanks! */
204     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
205     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
206     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
207     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
208     /* the standard pack utilities do not accept standard input */
209     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
210     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
211     /* ...only first file examined */
212     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
213     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
214     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
215     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
216     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
217     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
218     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
219 #ifdef ZLIBSUPPORT
220     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
221 #endif
222 };
223 
224 #define OKDATA 	0
225 #define NODATA	1
226 #define ERRDATA	2
227 
228 file_private ssize_t swrite(int, const void *, size_t);
229 #if HAVE_FORK
230 file_private size_t ncompr = __arraycount(compr);
231 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
232     unsigned char **, size_t *);
233 #ifdef BUILTIN_DECOMPRESS
234 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
235     size_t *, int);
236 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
237     size_t *, int);
238 #endif
239 #ifdef BUILTIN_BZLIB
240 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
241     size_t *, int);
242 #endif
243 #ifdef BUILTIN_XZLIB
244 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
245     size_t *, int);
246 #endif
247 #ifdef BUILTIN_ZSTDLIB
248 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
249     size_t *, int);
250 #endif
251 #ifdef BUILTIN_LZLIB
252 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
253     size_t *, int);
254 #endif
255 #ifdef BUILTIN_LRZIP
256 file_private int uncompresslrzip(const unsigned char *, unsigned char **, size_t,
257     size_t *, int);
258 #endif
259 
260 
261 static int makeerror(unsigned char **, size_t *, const char *, ...)
262     __attribute__((__format__(__printf__, 3, 4)));
263 file_private const char *methodname(size_t);
264 
265 file_private int
266 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
267 {
268 	unsigned char *p;
269 	int mime = ms->flags & MAGIC_MIME;
270 
271 	if (!mime)
272 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
273 
274 	for (p = buf; *p; p++)
275 		if (!isalnum(*p))
276 			*p = '-';
277 
278 	return file_printf(ms, "application/x-decompression-error-%s-%s",
279 	    methodname(i), buf);
280 }
281 
282 file_protected int
283 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
284 {
285 	unsigned char *newbuf = NULL;
286 	size_t i, nsz;
287 	char *rbuf;
288 	file_pushbuf_t *pb;
289 	int urv, prv, rv = 0;
290 	int mime = ms->flags & MAGIC_MIME;
291 	int fd = b->fd;
292 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
293 	size_t nbytes = b->flen;
294 	int sa_saved = 0;
295 	struct sigaction sig_act;
296 
297 	if ((ms->flags & MAGIC_COMPRESS) == 0)
298 		return 0;
299 
300 	for (i = 0; i < ncompr; i++) {
301 		int zm;
302 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
303 			continue;
304 		if (compr[i].maglen < 0) {
305 			zm = (*compr[i].u.func)(buf);
306 		} else {
307 			zm = memcmp(buf, compr[i].u.magic,
308 			    CAST(size_t, compr[i].maglen)) == 0;
309 		}
310 
311 		if (!zm)
312 			continue;
313 
314 		/* Prevent SIGPIPE death if child dies unexpectedly */
315 		if (!sa_saved) {
316 			//We can use sig_act for both new and old, but
317 			struct sigaction new_act;
318 			memset(&new_act, 0, sizeof(new_act));
319 			new_act.sa_handler = SIG_IGN;
320 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
321 		}
322 
323 		nsz = nbytes;
324 		free(newbuf);
325 		urv = uncompressbuf(fd, ms->bytes_max, i,
326 		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
327 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
328 		    (char *)newbuf, nsz);
329 		switch (urv) {
330 		case OKDATA:
331 		case ERRDATA:
332 			ms->flags &= ~MAGIC_COMPRESS;
333 			if (urv == ERRDATA)
334 				prv = format_decompression_error(ms, i, newbuf);
335 			else
336 				prv = file_buffer(ms, -1, NULL, name, newbuf,
337 				    nsz);
338 			if (prv == -1)
339 				goto error;
340 			rv = 1;
341 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
342 				goto out;
343 			if (mime != MAGIC_MIME && mime != 0)
344 				goto out;
345 			if ((file_printf(ms,
346 			    mime ? " compressed-encoding=" : " (")) == -1)
347 				goto error;
348 			if ((pb = file_push_buffer(ms)) == NULL)
349 				goto error;
350 			/*
351 			 * XXX: If file_buffer fails here, we overwrite
352 			 * the compressed text. FIXME.
353 			 */
354 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
355 			{
356 				if (file_pop_buffer(ms, pb) != NULL)
357 					abort();
358 				goto error;
359 			}
360 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
361 				if (file_printf(ms, "%s", rbuf) == -1) {
362 					free(rbuf);
363 					goto error;
364 				}
365 				free(rbuf);
366 			}
367 			if (!mime && file_printf(ms, ")") == -1)
368 				goto error;
369 			/*FALLTHROUGH*/
370 		case NODATA:
371 			break;
372 		default:
373 			abort();
374 			/*NOTREACHED*/
375 		error:
376 			rv = -1;
377 			break;
378 		}
379 	}
380 out:
381 	DPRINTF("rv = %d\n", rv);
382 
383 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
384 		(void)sigaction(SIGPIPE, &sig_act, NULL);
385 
386 	free(newbuf);
387 	ms->flags |= MAGIC_COMPRESS;
388 	DPRINTF("Zmagic returns %d\n", rv);
389 	return rv;
390 }
391 #endif
392 /*
393  * `safe' write for sockets and pipes.
394  */
395 file_private ssize_t
396 swrite(int fd, const void *buf, size_t n)
397 {
398 	ssize_t rv;
399 	size_t rn = n;
400 
401 	do
402 		switch (rv = write(fd, buf, n)) {
403 		case -1:
404 			if (errno == EINTR)
405 				continue;
406 			return -1;
407 		default:
408 			n -= rv;
409 			buf = CAST(const char *, buf) + rv;
410 			break;
411 		}
412 	while (n > 0);
413 	return rn;
414 }
415 
416 
417 /*
418  * `safe' read for sockets and pipes.
419  */
420 file_protected ssize_t
421 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
422 {
423 	ssize_t rv;
424 #if defined(FIONREAD) && !defined(__MINGW32__)
425 	int t = 0;
426 #endif
427 	size_t rn = n;
428 
429 	if (fd == STDIN_FILENO)
430 		goto nocheck;
431 
432 #if defined(FIONREAD) && !defined(__MINGW32__)
433 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
434 #ifdef FD_ZERO
435 		ssize_t cnt;
436 		for (cnt = 0;; cnt++) {
437 			fd_set check;
438 			struct timeval tout = {0, 100 * 1000};
439 			int selrv;
440 
441 			FD_ZERO(&check);
442 			FD_SET(fd, &check);
443 
444 			/*
445 			 * Avoid soft deadlock: do not read if there
446 			 * is nothing to read from sockets and pipes.
447 			 */
448 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
449 			if (selrv == -1) {
450 				if (errno == EINTR || errno == EAGAIN)
451 					continue;
452 			} else if (selrv == 0 && cnt >= 5) {
453 				return 0;
454 			} else
455 				break;
456 		}
457 #endif
458 		(void)ioctl(fd, FIONREAD, &t);
459 	}
460 
461 	if (t > 0 && CAST(size_t, t) < n) {
462 		n = t;
463 		rn = n;
464 	}
465 #endif
466 
467 nocheck:
468 	do
469 		switch ((rv = read(fd, buf, n))) {
470 		case -1:
471 			if (errno == EINTR)
472 				continue;
473 			return -1;
474 		case 0:
475 			return rn - n;
476 		default:
477 			n -= rv;
478 			buf = CAST(char *, CCAST(void *, buf)) + rv;
479 			break;
480 		}
481 	while (n > 0);
482 	return rn;
483 }
484 
485 file_protected int
486 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
487     size_t nbytes)
488 {
489 	char buf[4096];
490 	ssize_t r;
491 	int tfd;
492 
493 #ifdef WIN32
494 	const char *t;
495 	buf[0] = '\0';
496 	if ((t = getenv("TEMP")) != NULL)
497 		(void)strlcpy(buf, t, sizeof(buf));
498 	else if ((t = getenv("TMP")) != NULL)
499 		(void)strlcpy(buf, t, sizeof(buf));
500 	else if ((t = getenv("TMPDIR")) != NULL)
501 		(void)strlcpy(buf, t, sizeof(buf));
502 	if (buf[0] != '\0')
503 		(void)strlcat(buf, "/", sizeof(buf));
504 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
505 #else
506 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
507 #endif
508 #ifndef HAVE_MKSTEMP
509 	{
510 		char *ptr = mktemp(buf);
511 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
512 		r = errno;
513 		(void)unlink(ptr);
514 		errno = r;
515 	}
516 #else
517 	{
518 		int te;
519 		mode_t ou = umask(0);
520 		tfd = mkstemp(buf);
521 		(void)umask(ou);
522 		te = errno;
523 		(void)unlink(buf);
524 		errno = te;
525 	}
526 #endif
527 	if (tfd == -1) {
528 		file_error(ms, errno,
529 		    "cannot create temporary file for pipe copy");
530 		return -1;
531 	}
532 
533 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
534 		r = 1;
535 	else {
536 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
537 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
538 				break;
539 	}
540 
541 	switch (r) {
542 	case -1:
543 		file_error(ms, errno, "error copying from pipe to temp file");
544 		return -1;
545 	case 0:
546 		break;
547 	default:
548 		file_error(ms, errno, "error while writing to temp file");
549 		return -1;
550 	}
551 
552 	/*
553 	 * We duplicate the file descriptor, because fclose on a
554 	 * tmpfile will delete the file, but any open descriptors
555 	 * can still access the phantom inode.
556 	 */
557 	if ((fd = dup2(tfd, fd)) == -1) {
558 		file_error(ms, errno, "could not dup descriptor for temp file");
559 		return -1;
560 	}
561 	(void)close(tfd);
562 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
563 		file_badseek(ms);
564 		return -1;
565 	}
566 	return fd;
567 }
568 #if HAVE_FORK
569 #ifdef BUILTIN_DECOMPRESS
570 
571 #define FHCRC		(1 << 1)
572 #define FEXTRA		(1 << 2)
573 #define FNAME		(1 << 3)
574 #define FCOMMENT	(1 << 4)
575 
576 
577 file_private int
578 uncompressgzipped(const unsigned char *old, unsigned char **newch,
579     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
580 {
581 	unsigned char flg;
582 	size_t data_start = 10;
583 
584 	if (*n < 4) {
585 		goto err;
586 	}
587 
588 	flg = old[3];
589 
590 	if (flg & FEXTRA) {
591 		if (data_start + 1 >= *n)
592 			goto err;
593 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
594 	}
595 	if (flg & FNAME) {
596 		while(data_start < *n && old[data_start])
597 			data_start++;
598 		data_start++;
599 	}
600 	if (flg & FCOMMENT) {
601 		while(data_start < *n && old[data_start])
602 			data_start++;
603 		data_start++;
604 	}
605 	if (flg & FHCRC)
606 		data_start += 2;
607 
608 	if (data_start >= *n)
609 		goto err;
610 
611 	*n -= data_start;
612 	old += data_start;
613 	return uncompresszlib(old, newch, bytes_max, n, 0);
614 err:
615 	return makeerror(newch, n, "File too short");
616 }
617 
618 file_private int
619 uncompresszlib(const unsigned char *old, unsigned char **newch,
620     size_t bytes_max, size_t *n, int zlib)
621 {
622 	int rc;
623 	z_stream z;
624 
625 	DPRINTF("builtin zlib decompression\n");
626 	z.next_in = CCAST(Bytef *, old);
627 	z.avail_in = CAST(uint32_t, *n);
628 	z.next_out = *newch;
629 	z.avail_out = CAST(unsigned int, bytes_max);
630 	z.zalloc = Z_NULL;
631 	z.zfree = Z_NULL;
632 	z.opaque = Z_NULL;
633 
634 	/* LINTED bug in header macro */
635 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
636 	if (rc != Z_OK)
637 		goto err;
638 
639 	rc = inflate(&z, Z_SYNC_FLUSH);
640 	if (rc != Z_OK && rc != Z_STREAM_END) {
641 		inflateEnd(&z);
642 		goto err;
643 	}
644 
645 	*n = CAST(size_t, z.total_out);
646 	rc = inflateEnd(&z);
647 	if (rc != Z_OK)
648 		goto err;
649 
650 	/* let's keep the nul-terminate tradition */
651 	(*newch)[*n] = '\0';
652 
653 	return OKDATA;
654 err:
655 	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
656 }
657 #endif
658 
659 #ifdef BUILTIN_BZLIB
660 file_private int
661 uncompressbzlib(const unsigned char *old, unsigned char **newch,
662     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
663 {
664 	int rc;
665 	bz_stream bz;
666 
667 	DPRINTF("builtin bzlib decompression\n");
668 	memset(&bz, 0, sizeof(bz));
669 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
670 	if (rc != BZ_OK)
671 		goto err;
672 
673 	bz.next_in = CCAST(char *, RCAST(const char *, old));
674 	bz.avail_in = CAST(uint32_t, *n);
675 	bz.next_out = RCAST(char *, *newch);
676 	bz.avail_out = CAST(unsigned int, bytes_max);
677 
678 	rc = BZ2_bzDecompress(&bz);
679 	if (rc != BZ_OK && rc != BZ_STREAM_END) {
680 		BZ2_bzDecompressEnd(&bz);
681 		goto err;
682 	}
683 
684 	/* Assume byte_max is within 32bit */
685 	/* assert(bz.total_out_hi32 == 0); */
686 	*n = CAST(size_t, bz.total_out_lo32);
687 	rc = BZ2_bzDecompressEnd(&bz);
688 	if (rc != BZ_OK)
689 		goto err;
690 
691 	/* let's keep the nul-terminate tradition */
692 	(*newch)[*n] = '\0';
693 
694 	return OKDATA;
695 err:
696 	return makeerror(newch, n, "bunzip error %d", rc);
697 }
698 #endif
699 
700 #ifdef BUILTIN_XZLIB
701 file_private int
702 uncompressxzlib(const unsigned char *old, unsigned char **newch,
703     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
704 {
705 	int rc;
706 	lzma_stream xz;
707 
708 	DPRINTF("builtin xzlib decompression\n");
709 	memset(&xz, 0, sizeof(xz));
710 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
711 	if (rc != LZMA_OK)
712 		goto err;
713 
714 	xz.next_in = CCAST(const uint8_t *, old);
715 	xz.avail_in = CAST(uint32_t, *n);
716 	xz.next_out = RCAST(uint8_t *, *newch);
717 	xz.avail_out = CAST(unsigned int, bytes_max);
718 
719 	rc = lzma_code(&xz, LZMA_RUN);
720 	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
721 		lzma_end(&xz);
722 		goto err;
723 	}
724 
725 	*n = CAST(size_t, xz.total_out);
726 
727 	lzma_end(&xz);
728 
729 	/* let's keep the nul-terminate tradition */
730 	(*newch)[*n] = '\0';
731 
732 	return OKDATA;
733 err:
734 	return makeerror(newch, n, "unxz error %d", rc);
735 }
736 #endif
737 
738 #ifdef BUILTIN_ZSTDLIB
739 file_private int
740 uncompresszstd(const unsigned char *old, unsigned char **newch,
741     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
742 {
743 	size_t rc;
744 	ZSTD_DStream *zstd;
745 	ZSTD_inBuffer in;
746 	ZSTD_outBuffer out;
747 
748 	DPRINTF("builtin zstd decompression\n");
749 	if ((zstd = ZSTD_createDStream()) == NULL) {
750 		return makeerror(newch, n, "No ZSTD decompression stream, %s",
751 		    strerror(errno));
752 	}
753 
754 	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
755 	if (ZSTD_isError(rc))
756 		goto err;
757 
758 	in.src = CCAST(const void *, old);
759 	in.size = *n;
760 	in.pos = 0;
761 	out.dst = RCAST(void *, *newch);
762 	out.size = bytes_max;
763 	out.pos = 0;
764 
765 	rc = ZSTD_decompressStream(zstd, &out, &in);
766 	if (ZSTD_isError(rc))
767 		goto err;
768 
769 	*n = out.pos;
770 
771 	ZSTD_freeDStream(zstd);
772 
773 	/* let's keep the nul-terminate tradition */
774 	(*newch)[*n] = '\0';
775 
776 	return OKDATA;
777 err:
778 	ZSTD_freeDStream(zstd);
779 	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
780 }
781 #endif
782 
783 #ifdef BUILTIN_LZLIB
784 file_private int
785 uncompresslzlib(const unsigned char *old, unsigned char **newch,
786     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
787 {
788 	enum LZ_Errno err;
789 	size_t old_remaining = *n;
790 	size_t new_remaining = bytes_max;
791 	size_t total_read = 0;
792 	unsigned char *bufp;
793 	struct LZ_Decoder *dec;
794 
795 	bufp = *newch;
796 
797 	DPRINTF("builtin lzlib decompression\n");
798 	dec = LZ_decompress_open();
799 	if (!dec) {
800 		return makeerror(newch, n, "unable to allocate LZ_Decoder");
801 	}
802 	if (LZ_decompress_errno(dec) != LZ_ok)
803 		goto err;
804 
805 	for (;;) {
806 		// LZ_decompress_read() stops at member boundaries, so we may
807 		// have more than one successful read after writing all data
808 		// we have.
809 		if (old_remaining > 0) {
810 			int wr = LZ_decompress_write(dec, old, old_remaining);
811 			if (wr < 0)
812 				goto err;
813 			old_remaining -= wr;
814 			old += wr;
815 		}
816 
817 		int rd = LZ_decompress_read(dec, bufp, new_remaining);
818 		if (rd > 0) {
819 			new_remaining -= rd;
820 			bufp += rd;
821 			total_read += rd;
822 		}
823 
824 		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
825 			goto err;
826 		if (new_remaining == 0)
827 			break;
828 		if (old_remaining == 0 && rd == 0)
829 			break;
830 	}
831 
832 	LZ_decompress_close(dec);
833 	*n = total_read;
834 
835 	/* let's keep the nul-terminate tradition */
836 	*bufp = '\0';
837 
838 	return OKDATA;
839 err:
840 	err = LZ_decompress_errno(dec);
841 	LZ_decompress_close(dec);
842 	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
843 }
844 #endif
845 
846 #ifdef BUILTIN_LRZIP
847 file_private int
848 uncompresslrzip(const unsigned char *old, unsigned char **newch,
849     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
850 {
851 	Lrzip *lr;
852 	FILE *in, *out;
853 	int res = OKDATA;
854 
855 	DPRINTF("builtin rlzip decompression\n");
856 	lr = lrzip_new(LRZIP_MODE_DECOMPRESS);
857 	if (lr == NULL) {
858 		res = makeerror(newch, n, "unable to create an lrzip decoder");
859 		goto out0;
860 	}
861 	lrzip_config_env(lr);
862 	in = fmemopen(RCAST(void *, old), bytes_max, "r");
863 	if (in == NULL) {
864 		res = makeerror(newch, n, "unable to construct input file");
865 		goto out1;
866 	}
867 	if (!lrzip_file_add(lr, in)) {
868 		res = makeerror(newch, n, "unable to add input file");
869 		goto out2;
870 	}
871 	*newch = calloc(*n = 2 * bytes_max, 1);
872 	if (*newch == NULL) {
873 		res = makeerror(newch, n, "unable to allocate output buffer");
874 		goto out2;
875 	}
876 	out = fmemopen(*newch, *n, "w");
877 	if (out == NULL) {
878 		free(*newch);
879 		res = makeerror(newch, n, "unable to allocate output file");
880 		goto out2;
881 	}
882 	lrzip_outfile_set(lr, out);
883 	if (lrzip_run(lr)) {
884 		free(*newch);
885 		res = makeerror(newch, n, "unable to decompress file");
886 		goto out3;
887 	}
888 	*n = (size_t)ftell(out);
889 out3:
890 	fclose(out);
891 out2:
892 	fclose(in);
893 out1:
894 	lrzip_free(lr);
895 out0:
896 	return res;
897 }
898 #endif
899 
900 static int
901 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
902 {
903 	char *msg;
904 	va_list ap;
905 	int rv;
906 
907 	DPRINTF("Makeerror %s\n", fmt);
908 	free(*buf);
909 	va_start(ap, fmt);
910 	rv = vasprintf(&msg, fmt, ap);
911 	va_end(ap);
912 	if (rv < 0) {
913 		DPRINTF("Makeerror failed");
914 		*buf = NULL;
915 		*len = 0;
916 		return NODATA;
917 	}
918 	*buf = RCAST(unsigned char *, msg);
919 	*len = strlen(msg);
920 	return ERRDATA;
921 }
922 
923 static void
924 closefd(int *fd, size_t i)
925 {
926 	if (fd[i] == -1)
927 		return;
928 	(void) close(fd[i]);
929 	fd[i] = -1;
930 }
931 
932 static void
933 closep(int *fd)
934 {
935 	size_t i;
936 	for (i = 0; i < 2; i++)
937 		closefd(fd, i);
938 }
939 
940 static void
941 movedesc(void *v, int i, int fd)
942 {
943 	if (fd == i)
944 		return; /* "no dup was necessary" */
945 #ifdef HAVE_POSIX_SPAWNP
946 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
947 	posix_spawn_file_actions_adddup2(fa, fd, i);
948 	posix_spawn_file_actions_addclose(fa, fd);
949 #else
950 	if (dup2(fd, i) == -1) {
951 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
952 		exit(EXIT_FAILURE);
953 	}
954 	close(v ? fd : fd);
955 #endif
956 }
957 
958 static void
959 closedesc(void *v, int fd)
960 {
961 #ifdef HAVE_POSIX_SPAWNP
962 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
963 	posix_spawn_file_actions_addclose(fa, fd);
964 #else
965 	close(v ? fd : fd);
966 #endif
967 }
968 
969 static void
970 handledesc(void *v, int fd, int fdp[3][2])
971 {
972 	if (fd != -1) {
973 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
974 		movedesc(v, STDIN_FILENO, fd);
975 	} else {
976 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
977 		if (fdp[STDIN_FILENO][1] > 2)
978 		    closedesc(v, fdp[STDIN_FILENO][1]);
979 	}
980 
981 	file_clear_closexec(STDIN_FILENO);
982 
983 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
984 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
985 	if (fdp[STDOUT_FILENO][0] > 2)
986 		closedesc(v, fdp[STDOUT_FILENO][0]);
987 
988 	file_clear_closexec(STDOUT_FILENO);
989 
990 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
991 	if (fdp[STDERR_FILENO][0] > 2)
992 		closedesc(v, fdp[STDERR_FILENO][0]);
993 
994 	file_clear_closexec(STDERR_FILENO);
995 }
996 
997 static pid_t
998 writechild(int fd, const void *old, size_t n)
999 {
1000 	pid_t pid;
1001 
1002 	/*
1003 	 * fork again, to avoid blocking because both
1004 	 * pipes filled
1005 	 */
1006 	pid = fork();
1007 	if (pid == -1) {
1008 		DPRINTF("Fork failed (%s)\n", strerror(errno));
1009 		return -1;
1010 	}
1011 	if (pid == 0) {
1012 		/* child */
1013 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
1014 			DPRINTF("Write failed (%s)\n", strerror(errno));
1015 			exit(EXIT_FAILURE);
1016 		}
1017 		exit(EXIT_SUCCESS);
1018 	}
1019 	/* parent */
1020 	return pid;
1021 }
1022 
1023 static ssize_t
1024 filter_error(unsigned char *ubuf, ssize_t n)
1025 {
1026 	char *p;
1027 	char *buf;
1028 
1029 	ubuf[n] = '\0';
1030 	buf = RCAST(char *, ubuf);
1031 	while (isspace(CAST(unsigned char, *buf)))
1032 		buf++;
1033 	DPRINTF("Filter error[[[%s]]]\n", buf);
1034 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
1035 		*p = '\0';
1036 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
1037 		*p = '\0';
1038 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
1039 		++p;
1040 		while (isspace(CAST(unsigned char, *p)))
1041 			p++;
1042 		n = strlen(p);
1043 		memmove(ubuf, p, CAST(size_t, n + 1));
1044 	}
1045 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
1046 	if (islower(*ubuf))
1047 		*ubuf = toupper(*ubuf);
1048 	return n;
1049 }
1050 
1051 file_private const char *
1052 methodname(size_t method)
1053 {
1054 	switch (method) {
1055 #ifdef BUILTIN_DECOMPRESS
1056 	case METH_FROZEN:
1057 	case METH_ZLIB:
1058 		return "zlib";
1059 #endif
1060 #ifdef BUILTIN_BZLIB
1061 	case METH_BZIP:
1062 		return "bzlib";
1063 #endif
1064 #ifdef BUILTIN_XZLIB
1065 	case METH_XZ:
1066 	case METH_LZMA:
1067 		return "xzlib";
1068 #endif
1069 #ifdef BUILTIN_ZSTDLIB
1070 	case METH_ZSTD:
1071 		return "zstd";
1072 #endif
1073 #ifdef BUILTIN_LZLIB
1074 	case METH_LZIP:
1075 		return "lzlib";
1076 #endif
1077 #ifdef BUILTIN_LRZIP
1078 	case METH_LRZIP:
1079 		return "lrzip";
1080 #endif
1081 	default:
1082 		return compr[method].argv[0];
1083 	}
1084 }
1085 
1086 file_private int (*
1087 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1088     size_t *, int)
1089 {
1090 	switch (method) {
1091 #ifdef BUILTIN_DECOMPRESS
1092 	case METH_FROZEN:
1093 		return uncompressgzipped;
1094 	case METH_ZLIB:
1095 		return uncompresszlib;
1096 #endif
1097 #ifdef BUILTIN_BZLIB
1098 	case METH_BZIP:
1099 		return uncompressbzlib;
1100 #endif
1101 #ifdef BUILTIN_XZLIB
1102 	case METH_XZ:
1103 	case METH_LZMA:
1104 		return uncompressxzlib;
1105 #endif
1106 #ifdef BUILTIN_ZSTDLIB
1107 	case METH_ZSTD:
1108 		return uncompresszstd;
1109 #endif
1110 #ifdef BUILTIN_LZLIB
1111 	case METH_LZIP:
1112 		return uncompresslzlib;
1113 #endif
1114 #ifdef BUILTIN_LRZIP
1115 	case METH_LRZIP:
1116 		return uncompresslrzip;
1117 #endif
1118 	default:
1119 		return NULL;
1120 	}
1121 }
1122 
1123 file_private int
1124 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1125     const unsigned char *old, unsigned char **newch, size_t* n)
1126 {
1127 	int fdp[3][2];
1128 	int status, rv, w;
1129 	pid_t pid;
1130 	pid_t writepid = -1;
1131 	size_t i;
1132 	ssize_t r, re;
1133 	char *const *args;
1134 #ifdef HAVE_POSIX_SPAWNP
1135 	posix_spawn_file_actions_t fa;
1136 #endif
1137 	int (*decompress)(const unsigned char *, unsigned char **,
1138 	    size_t, size_t *, int) = getdecompressor(method);
1139 
1140 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1141 	if (*newch == NULL)
1142 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1143 
1144 	if (decompress) {
1145 		if (nofork) {
1146 			return makeerror(newch, n,
1147 			    "Fork is required to uncompress, but disabled");
1148 		}
1149 		return (*decompress)(old, newch, bytes_max, n, 1);
1150 	}
1151 
1152 	(void)fflush(stdout);
1153 	(void)fflush(stderr);
1154 
1155 	for (i = 0; i < __arraycount(fdp); i++)
1156 		fdp[i][0] = fdp[i][1] = -1;
1157 
1158 	/*
1159 	 * There are multithreaded users who run magic_file()
1160 	 * from dozens of threads. If two parallel magic_file() calls
1161 	 * analyze two large compressed files, both will spawn
1162 	 * an uncompressing child here, which writes out uncompressed data.
1163 	 * We read some portion, then close the pipe, then waitpid() the child.
1164 	 * If uncompressed data is larger, child should get EPIPE and exit.
1165 	 * However, with *parallel* calls OTHER child may unintentionally
1166 	 * inherit pipe fds, thus keeping pipe open and making writes in
1167 	 * our child block instead of failing with EPIPE!
1168 	 * (For the bug to occur, two threads must mutually inherit their pipes,
1169 	 * and both must have large outputs. Thus it happens not that often).
1170 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1171 	 */
1172 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1173 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1174 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1175 		closep(fdp[STDIN_FILENO]);
1176 		closep(fdp[STDOUT_FILENO]);
1177 		return makeerror(newch, n, "Cannot create pipe, %s",
1178 		    strerror(errno));
1179 	}
1180 
1181 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1182 #ifdef HAVE_POSIX_SPAWNP
1183 	posix_spawn_file_actions_init(&fa);
1184 
1185 	handledesc(&fa, fd, fdp);
1186 
1187 	DPRINTF("Executing %s\n", compr[method].argv[0]);
1188 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1189 	    args, NULL);
1190 
1191 	posix_spawn_file_actions_destroy(&fa);
1192 
1193 	if (status == -1) {
1194 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1195 		    compr[method].argv[0], strerror(errno));
1196 	}
1197 #else
1198 	/* For processes with large mapped virtual sizes, vfork
1199 	 * may be _much_ faster (10-100 times) than fork.
1200 	 */
1201 	pid = vfork();
1202 	if (pid == -1) {
1203 		return makeerror(newch, n, "Cannot vfork, %s",
1204 		    strerror(errno));
1205 	}
1206 	if (pid == 0) {
1207 		/* child */
1208 		/* Note: we are after vfork, do not modify memory
1209 		 * in a way which confuses parent. In particular,
1210 		 * do not modify fdp[i][j].
1211 		 */
1212 		handledesc(NULL, fd, fdp);
1213 		DPRINTF("Executing %s\n", compr[method].argv[0]);
1214 
1215 		(void)execvp(compr[method].argv[0], args);
1216 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1217 		    compr[method].argv[0], strerror(errno));
1218 		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1219 	}
1220 #endif
1221 	/* parent */
1222 	/* Close write sides of child stdout/err pipes */
1223 	for (i = 1; i < __arraycount(fdp); i++)
1224 		closefd(fdp[i], 1);
1225 	/* Write the buffer data to child stdin, if we don't have fd */
1226 	if (fd == -1) {
1227 		closefd(fdp[STDIN_FILENO], 0);
1228 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1229 		if (writepid == (pid_t)-1) {
1230 			rv = makeerror(newch, n, "Write to child failed, %s",
1231 			    strerror(errno));
1232 			DPRINTF("Write to child failed\n");
1233 			goto err;
1234 		}
1235 		closefd(fdp[STDIN_FILENO], 1);
1236 	}
1237 
1238 	rv = OKDATA;
1239 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1240 	DPRINTF("read got %zd\n", r);
1241 	if (r < 0) {
1242 		rv = ERRDATA;
1243 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1244 		        strerror(errno));
1245 		goto err;
1246 	}
1247 	if (CAST(size_t, r) == bytes_max) {
1248 		/*
1249 		 * close fd so that the child exits with sigpipe and ignore
1250 		 * errors, otherwise we risk the child blocking and never
1251 		 * exiting.
1252 		 */
1253 		DPRINTF("Closing stdout for bytes_max\n");
1254 		closefd(fdp[STDOUT_FILENO], 0);
1255 		goto ok;
1256 	}
1257 	if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1258 		DPRINTF("Got stuff from stderr %s\n", *newch);
1259 		rv = ERRDATA;
1260 		r = filter_error(*newch, r);
1261 		goto ok;
1262 	}
1263 	if  (re == 0)
1264 		goto ok;
1265 	rv = makeerror(newch, n, "Read stderr failed, %s",
1266 	    strerror(errno));
1267 	goto err;
1268 ok:
1269 	*n = r;
1270 	/* NUL terminate, as every buffer is handled here. */
1271 	(*newch)[*n] = '\0';
1272 err:
1273 	closefd(fdp[STDIN_FILENO], 1);
1274 	closefd(fdp[STDOUT_FILENO], 0);
1275 	closefd(fdp[STDERR_FILENO], 0);
1276 
1277 	w = waitpid(pid, &status, 0);
1278 wait_err:
1279 	if (w == -1) {
1280 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1281 		DPRINTF("Child wait return %#x\n", status);
1282 	} else if (!WIFEXITED(status)) {
1283 		DPRINTF("Child not exited (%#x)\n", status);
1284 	} else if (WEXITSTATUS(status) != 0) {
1285 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1286 	}
1287 	if (writepid > 0) {
1288 		/* _After_ we know decompressor has exited, our input writer
1289 		 * definitely will exit now (at worst, writing fails in it,
1290 		 * since output fd is closed now on the reading size).
1291 		 */
1292 		w = waitpid(writepid, &status, 0);
1293 		writepid = -1;
1294 		goto wait_err;
1295 	}
1296 
1297 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1298 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1299 
1300 	return rv;
1301 }
1302 #endif
1303