xref: /freebsd/contrib/file/src/compress.c (revision aa1a8ff2d6dbc51ef058f46f3db5a8bb77967145)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <string.h>
50 #include <errno.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <signal.h>
54 #ifndef HAVE_SIG_T
55 typedef void (*sig_t)(int);
56 #endif /* HAVE_SIG_T */
57 #ifdef HAVE_SYS_IOCTL_H
58 #include <sys/ioctl.h>
59 #endif
60 #ifdef HAVE_SYS_WAIT_H
61 #include <sys/wait.h>
62 #endif
63 #if defined(HAVE_SYS_TIME_H)
64 #include <sys/time.h>
65 #endif
66 
67 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
68 #define BUILTIN_DECOMPRESS
69 #include <zlib.h>
70 #endif
71 
72 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
73 #define BUILTIN_BZLIB
74 #include <bzlib.h>
75 #endif
76 
77 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
78 #define BUILTIN_XZLIB
79 #include <lzma.h>
80 #endif
81 
82 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
83 #define BUILTIN_ZSTDLIB
84 #include <zstd.h>
85 #include <zstd_errors.h>
86 #endif
87 
88 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
89 #define BUILTIN_LZLIB
90 #include <lzlib.h>
91 #endif
92 
93 #ifdef DEBUG
94 int tty = -1;
95 #define DPRINTF(...)	do { \
96 	if (tty == -1) \
97 		tty = open("/dev/tty", O_RDWR); \
98 	if (tty == -1) \
99 		abort(); \
100 	dprintf(tty, __VA_ARGS__); \
101 } while (/*CONSTCOND*/0)
102 #else
103 #define DPRINTF(...)
104 #endif
105 
106 #ifdef ZLIBSUPPORT
107 /*
108  * The following python code is not really used because ZLIBSUPPORT is only
109  * defined if we have a built-in zlib, and the built-in zlib handles that.
110  * That is not true for android where we have zlib.h and not -lz.
111  */
112 static const char zlibcode[] =
113     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
114 
115 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
116 
117 static int
118 zlibcmp(const unsigned char *buf)
119 {
120 	unsigned short x = 1;
121 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
122 
123 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
124 		return 0;
125 	if (s[0] != 1)	/* endianness test */
126 		x = buf[0] | (buf[1] << 8);
127 	else
128 		x = buf[1] | (buf[0] << 8);
129 	if (x % 31)
130 		return 0;
131 	return 1;
132 }
133 #endif
134 
135 static int
136 lzmacmp(const unsigned char *buf)
137 {
138 	if (buf[0] != 0x5d || buf[1] || buf[2])
139 		return 0;
140 	if (buf[12] && buf[12] != 0xff)
141 		return 0;
142 	return 1;
143 }
144 
145 #define gzip_flags "-cd"
146 #define lzip_flags gzip_flags
147 
148 static const char *gzip_args[] = {
149 	"gzip", gzip_flags, NULL
150 };
151 static const char *uncompress_args[] = {
152 	"uncompress", "-c", NULL
153 };
154 static const char *bzip2_args[] = {
155 	"bzip2", "-cd", NULL
156 };
157 static const char *lzip_args[] = {
158 	"lzip", lzip_flags, NULL
159 };
160 static const char *xz_args[] = {
161 	"xz", "-cd", NULL
162 };
163 static const char *lrzip_args[] = {
164 	"lrzip", "-qdf", "-", NULL
165 };
166 static const char *lz4_args[] = {
167 	"lz4", "-cd", NULL
168 };
169 static const char *zstd_args[] = {
170 	"zstd", "-cd", NULL
171 };
172 
173 #define	do_zlib		NULL
174 #define	do_bzlib	NULL
175 
176 file_private const struct {
177 	union {
178 		const char *magic;
179 		int (*func)(const unsigned char *);
180 	} u;
181 	int maglen;
182 	const char **argv;
183 	void *unused;
184 } compr[] = {
185 #define METH_FROZEN	2
186 #define METH_BZIP	7
187 #define METH_XZ		9
188 #define METH_LZIP	8
189 #define METH_ZSTD	12
190 #define METH_LZMA	13
191 #define METH_ZLIB	14
192     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
193     /* Uncompress can get stuck; so use gzip first if we have it
194      * Idea from Damien Clark, thanks! */
195     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
196     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
197     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
198     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
199     /* the standard pack utilities do not accept standard input */
200     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
201     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
202     /* ...only first file examined */
203     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
204     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
205     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
206     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
207     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
208     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
209     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
210 #ifdef ZLIBSUPPORT
211     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
212 #endif
213 };
214 
215 #define OKDATA 	0
216 #define NODATA	1
217 #define ERRDATA	2
218 
219 file_private ssize_t swrite(int, const void *, size_t);
220 #if HAVE_FORK
221 file_private size_t ncompr = __arraycount(compr);
222 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
223     unsigned char **, size_t *);
224 #ifdef BUILTIN_DECOMPRESS
225 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
226     size_t *, int);
227 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
228     size_t *, int);
229 #endif
230 #ifdef BUILTIN_BZLIB
231 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
232     size_t *, int);
233 #endif
234 #ifdef BUILTIN_XZLIB
235 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
236     size_t *, int);
237 #endif
238 #ifdef BUILTIN_ZSTDLIB
239 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
240     size_t *, int);
241 #endif
242 #ifdef BUILTIN_LZLIB
243 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
244     size_t *, int);
245 #endif
246 
247 static int makeerror(unsigned char **, size_t *, const char *, ...)
248     __attribute__((__format__(__printf__, 3, 4)));
249 file_private const char *methodname(size_t);
250 
251 file_private int
252 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
253 {
254 	unsigned char *p;
255 	int mime = ms->flags & MAGIC_MIME;
256 
257 	if (!mime)
258 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
259 
260 	for (p = buf; *p; p++)
261 		if (!isalnum(*p))
262 			*p = '-';
263 
264 	return file_printf(ms, "application/x-decompression-error-%s-%s",
265 	    methodname(i), buf);
266 }
267 
268 file_protected int
269 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
270 {
271 	unsigned char *newbuf = NULL;
272 	size_t i, nsz;
273 	char *rbuf;
274 	file_pushbuf_t *pb;
275 	int urv, prv, rv = 0;
276 	int mime = ms->flags & MAGIC_MIME;
277 	int fd = b->fd;
278 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
279 	size_t nbytes = b->flen;
280 	int sa_saved = 0;
281 	struct sigaction sig_act;
282 
283 	if ((ms->flags & MAGIC_COMPRESS) == 0)
284 		return 0;
285 
286 	for (i = 0; i < ncompr; i++) {
287 		int zm;
288 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
289 			continue;
290 		if (compr[i].maglen < 0) {
291 			zm = (*compr[i].u.func)(buf);
292 		} else {
293 			zm = memcmp(buf, compr[i].u.magic,
294 			    CAST(size_t, compr[i].maglen)) == 0;
295 		}
296 
297 		if (!zm)
298 			continue;
299 
300 		/* Prevent SIGPIPE death if child dies unexpectedly */
301 		if (!sa_saved) {
302 			//We can use sig_act for both new and old, but
303 			struct sigaction new_act;
304 			memset(&new_act, 0, sizeof(new_act));
305 			new_act.sa_handler = SIG_IGN;
306 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
307 		}
308 
309 		nsz = nbytes;
310 		free(newbuf);
311 		urv = uncompressbuf(fd, ms->bytes_max, i,
312 		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
313 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
314 		    (char *)newbuf, nsz);
315 		switch (urv) {
316 		case OKDATA:
317 		case ERRDATA:
318 			ms->flags &= ~MAGIC_COMPRESS;
319 			if (urv == ERRDATA)
320 				prv = format_decompression_error(ms, i, newbuf);
321 			else
322 				prv = file_buffer(ms, -1, NULL, name, newbuf,
323 				    nsz);
324 			if (prv == -1)
325 				goto error;
326 			rv = 1;
327 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
328 				goto out;
329 			if (mime != MAGIC_MIME && mime != 0)
330 				goto out;
331 			if ((file_printf(ms,
332 			    mime ? " compressed-encoding=" : " (")) == -1)
333 				goto error;
334 			if ((pb = file_push_buffer(ms)) == NULL)
335 				goto error;
336 			/*
337 			 * XXX: If file_buffer fails here, we overwrite
338 			 * the compressed text. FIXME.
339 			 */
340 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
341 			{
342 				if (file_pop_buffer(ms, pb) != NULL)
343 					abort();
344 				goto error;
345 			}
346 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
347 				if (file_printf(ms, "%s", rbuf) == -1) {
348 					free(rbuf);
349 					goto error;
350 				}
351 				free(rbuf);
352 			}
353 			if (!mime && file_printf(ms, ")") == -1)
354 				goto error;
355 			/*FALLTHROUGH*/
356 		case NODATA:
357 			break;
358 		default:
359 			abort();
360 			/*NOTREACHED*/
361 		error:
362 			rv = -1;
363 			break;
364 		}
365 	}
366 out:
367 	DPRINTF("rv = %d\n", rv);
368 
369 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
370 		(void)sigaction(SIGPIPE, &sig_act, NULL);
371 
372 	free(newbuf);
373 	ms->flags |= MAGIC_COMPRESS;
374 	DPRINTF("Zmagic returns %d\n", rv);
375 	return rv;
376 }
377 #endif
378 /*
379  * `safe' write for sockets and pipes.
380  */
381 file_private ssize_t
382 swrite(int fd, const void *buf, size_t n)
383 {
384 	ssize_t rv;
385 	size_t rn = n;
386 
387 	do
388 		switch (rv = write(fd, buf, n)) {
389 		case -1:
390 			if (errno == EINTR)
391 				continue;
392 			return -1;
393 		default:
394 			n -= rv;
395 			buf = CAST(const char *, buf) + rv;
396 			break;
397 		}
398 	while (n > 0);
399 	return rn;
400 }
401 
402 
403 /*
404  * `safe' read for sockets and pipes.
405  */
406 file_protected ssize_t
407 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
408 {
409 	ssize_t rv;
410 #if defined(FIONREAD) && !defined(__MINGW32__)
411 	int t = 0;
412 #endif
413 	size_t rn = n;
414 
415 	if (fd == STDIN_FILENO)
416 		goto nocheck;
417 
418 #if defined(FIONREAD) && !defined(__MINGW32__)
419 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
420 #ifdef FD_ZERO
421 		ssize_t cnt;
422 		for (cnt = 0;; cnt++) {
423 			fd_set check;
424 			struct timeval tout = {0, 100 * 1000};
425 			int selrv;
426 
427 			FD_ZERO(&check);
428 			FD_SET(fd, &check);
429 
430 			/*
431 			 * Avoid soft deadlock: do not read if there
432 			 * is nothing to read from sockets and pipes.
433 			 */
434 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
435 			if (selrv == -1) {
436 				if (errno == EINTR || errno == EAGAIN)
437 					continue;
438 			} else if (selrv == 0 && cnt >= 5) {
439 				return 0;
440 			} else
441 				break;
442 		}
443 #endif
444 		(void)ioctl(fd, FIONREAD, &t);
445 	}
446 
447 	if (t > 0 && CAST(size_t, t) < n) {
448 		n = t;
449 		rn = n;
450 	}
451 #endif
452 
453 nocheck:
454 	do
455 		switch ((rv = read(fd, buf, n))) {
456 		case -1:
457 			if (errno == EINTR)
458 				continue;
459 			return -1;
460 		case 0:
461 			return rn - n;
462 		default:
463 			n -= rv;
464 			buf = CAST(char *, CCAST(void *, buf)) + rv;
465 			break;
466 		}
467 	while (n > 0);
468 	return rn;
469 }
470 
471 file_protected int
472 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
473     size_t nbytes)
474 {
475 	char buf[4096];
476 	ssize_t r;
477 	int tfd;
478 
479 #ifdef WIN32
480 	const char *t;
481 	buf[0] = '\0';
482 	if ((t = getenv("TEMP")) != NULL)
483 		(void)strlcpy(buf, t, sizeof(buf));
484 	else if ((t = getenv("TMP")) != NULL)
485 		(void)strlcpy(buf, t, sizeof(buf));
486 	else if ((t = getenv("TMPDIR")) != NULL)
487 		(void)strlcpy(buf, t, sizeof(buf));
488 	if (buf[0] != '\0')
489 		(void)strlcat(buf, "/", sizeof(buf));
490 	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
491 #else
492 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
493 #endif
494 #ifndef HAVE_MKSTEMP
495 	{
496 		char *ptr = mktemp(buf);
497 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
498 		r = errno;
499 		(void)unlink(ptr);
500 		errno = r;
501 	}
502 #else
503 	{
504 		int te;
505 		mode_t ou = umask(0);
506 		tfd = mkstemp(buf);
507 		(void)umask(ou);
508 		te = errno;
509 		(void)unlink(buf);
510 		errno = te;
511 	}
512 #endif
513 	if (tfd == -1) {
514 		file_error(ms, errno,
515 		    "cannot create temporary file for pipe copy");
516 		return -1;
517 	}
518 
519 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
520 		r = 1;
521 	else {
522 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
523 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
524 				break;
525 	}
526 
527 	switch (r) {
528 	case -1:
529 		file_error(ms, errno, "error copying from pipe to temp file");
530 		return -1;
531 	case 0:
532 		break;
533 	default:
534 		file_error(ms, errno, "error while writing to temp file");
535 		return -1;
536 	}
537 
538 	/*
539 	 * We duplicate the file descriptor, because fclose on a
540 	 * tmpfile will delete the file, but any open descriptors
541 	 * can still access the phantom inode.
542 	 */
543 	if ((fd = dup2(tfd, fd)) == -1) {
544 		file_error(ms, errno, "could not dup descriptor for temp file");
545 		return -1;
546 	}
547 	(void)close(tfd);
548 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
549 		file_badseek(ms);
550 		return -1;
551 	}
552 	return fd;
553 }
554 #if HAVE_FORK
555 #ifdef BUILTIN_DECOMPRESS
556 
557 #define FHCRC		(1 << 1)
558 #define FEXTRA		(1 << 2)
559 #define FNAME		(1 << 3)
560 #define FCOMMENT	(1 << 4)
561 
562 
563 file_private int
564 uncompressgzipped(const unsigned char *old, unsigned char **newch,
565     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
566 {
567 	unsigned char flg;
568 	size_t data_start = 10;
569 
570 	if (*n < 4) {
571 		goto err;
572 	}
573 
574 	flg = old[3];
575 
576 	if (flg & FEXTRA) {
577 		if (data_start + 1 >= *n)
578 			goto err;
579 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
580 	}
581 	if (flg & FNAME) {
582 		while(data_start < *n && old[data_start])
583 			data_start++;
584 		data_start++;
585 	}
586 	if (flg & FCOMMENT) {
587 		while(data_start < *n && old[data_start])
588 			data_start++;
589 		data_start++;
590 	}
591 	if (flg & FHCRC)
592 		data_start += 2;
593 
594 	if (data_start >= *n)
595 		goto err;
596 
597 	*n -= data_start;
598 	old += data_start;
599 	return uncompresszlib(old, newch, bytes_max, n, 0);
600 err:
601 	return makeerror(newch, n, "File too short");
602 }
603 
604 file_private int
605 uncompresszlib(const unsigned char *old, unsigned char **newch,
606     size_t bytes_max, size_t *n, int zlib)
607 {
608 	int rc;
609 	z_stream z;
610 
611 	DPRINTF("builtin zlib decompression\n");
612 	z.next_in = CCAST(Bytef *, old);
613 	z.avail_in = CAST(uint32_t, *n);
614 	z.next_out = *newch;
615 	z.avail_out = CAST(unsigned int, bytes_max);
616 	z.zalloc = Z_NULL;
617 	z.zfree = Z_NULL;
618 	z.opaque = Z_NULL;
619 
620 	/* LINTED bug in header macro */
621 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
622 	if (rc != Z_OK)
623 		goto err;
624 
625 	rc = inflate(&z, Z_SYNC_FLUSH);
626 	if (rc != Z_OK && rc != Z_STREAM_END) {
627 		inflateEnd(&z);
628 		goto err;
629 	}
630 
631 	*n = CAST(size_t, z.total_out);
632 	rc = inflateEnd(&z);
633 	if (rc != Z_OK)
634 		goto err;
635 
636 	/* let's keep the nul-terminate tradition */
637 	(*newch)[*n] = '\0';
638 
639 	return OKDATA;
640 err:
641 	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
642 }
643 #endif
644 
645 #ifdef BUILTIN_BZLIB
646 file_private int
647 uncompressbzlib(const unsigned char *old, unsigned char **newch,
648     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
649 {
650 	int rc;
651 	bz_stream bz;
652 
653 	DPRINTF("builtin bzlib decompression\n");
654 	memset(&bz, 0, sizeof(bz));
655 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
656 	if (rc != BZ_OK)
657 		goto err;
658 
659 	bz.next_in = CCAST(char *, RCAST(const char *, old));
660 	bz.avail_in = CAST(uint32_t, *n);
661 	bz.next_out = RCAST(char *, *newch);
662 	bz.avail_out = CAST(unsigned int, bytes_max);
663 
664 	rc = BZ2_bzDecompress(&bz);
665 	if (rc != BZ_OK && rc != BZ_STREAM_END) {
666 		BZ2_bzDecompressEnd(&bz);
667 		goto err;
668 	}
669 
670 	/* Assume byte_max is within 32bit */
671 	/* assert(bz.total_out_hi32 == 0); */
672 	*n = CAST(size_t, bz.total_out_lo32);
673 	rc = BZ2_bzDecompressEnd(&bz);
674 	if (rc != BZ_OK)
675 		goto err;
676 
677 	/* let's keep the nul-terminate tradition */
678 	(*newch)[*n] = '\0';
679 
680 	return OKDATA;
681 err:
682 	return makeerror(newch, n, "bunzip error %d", rc);
683 }
684 #endif
685 
686 #ifdef BUILTIN_XZLIB
687 file_private int
688 uncompressxzlib(const unsigned char *old, unsigned char **newch,
689     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
690 {
691 	int rc;
692 	lzma_stream xz;
693 
694 	DPRINTF("builtin xzlib decompression\n");
695 	memset(&xz, 0, sizeof(xz));
696 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
697 	if (rc != LZMA_OK)
698 		goto err;
699 
700 	xz.next_in = CCAST(const uint8_t *, old);
701 	xz.avail_in = CAST(uint32_t, *n);
702 	xz.next_out = RCAST(uint8_t *, *newch);
703 	xz.avail_out = CAST(unsigned int, bytes_max);
704 
705 	rc = lzma_code(&xz, LZMA_RUN);
706 	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
707 		lzma_end(&xz);
708 		goto err;
709 	}
710 
711 	*n = CAST(size_t, xz.total_out);
712 
713 	lzma_end(&xz);
714 
715 	/* let's keep the nul-terminate tradition */
716 	(*newch)[*n] = '\0';
717 
718 	return OKDATA;
719 err:
720 	return makeerror(newch, n, "unxz error %d", rc);
721 }
722 #endif
723 
724 #ifdef BUILTIN_ZSTDLIB
725 file_private int
726 uncompresszstd(const unsigned char *old, unsigned char **newch,
727     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
728 {
729 	size_t rc;
730 	ZSTD_DStream *zstd;
731 	ZSTD_inBuffer in;
732 	ZSTD_outBuffer out;
733 
734 	DPRINTF("builtin zstd decompression\n");
735 	if ((zstd = ZSTD_createDStream()) == NULL) {
736 		return makeerror(newch, n, "No ZSTD decompression stream, %s",
737 		    strerror(errno));
738 	}
739 
740 	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
741 	if (ZSTD_isError(rc))
742 		goto err;
743 
744 	in.src = CCAST(const void *, old);
745 	in.size = *n;
746 	in.pos = 0;
747 	out.dst = RCAST(void *, *newch);
748 	out.size = bytes_max;
749 	out.pos = 0;
750 
751 	rc = ZSTD_decompressStream(zstd, &out, &in);
752 	if (ZSTD_isError(rc))
753 		goto err;
754 
755 	*n = out.pos;
756 
757 	ZSTD_freeDStream(zstd);
758 
759 	/* let's keep the nul-terminate tradition */
760 	(*newch)[*n] = '\0';
761 
762 	return OKDATA;
763 err:
764 	ZSTD_freeDStream(zstd);
765 	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
766 }
767 #endif
768 
769 #ifdef BUILTIN_LZLIB
770 file_private int
771 uncompresslzlib(const unsigned char *old, unsigned char **newch,
772     size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
773 {
774 	enum LZ_Errno err;
775 	size_t old_remaining = *n;
776 	size_t new_remaining = bytes_max;
777 	size_t total_read = 0;
778 	unsigned char *bufp;
779 	struct LZ_Decoder *dec;
780 
781 	bufp = *newch;
782 
783 	DPRINTF("builtin lzlib decompression\n");
784 	dec = LZ_decompress_open();
785 	if (!dec) {
786 		return makeerror(newch, n, "unable to allocate LZ_Decoder");
787 	}
788 	if (LZ_decompress_errno(dec) != LZ_ok)
789 		goto err;
790 
791 	for (;;) {
792 		// LZ_decompress_read() stops at member boundaries, so we may
793 		// have more than one successful read after writing all data
794 		// we have.
795 		if (old_remaining > 0) {
796 			int wr = LZ_decompress_write(dec, old, old_remaining);
797 			if (wr < 0)
798 				goto err;
799 			old_remaining -= wr;
800 			old += wr;
801 		}
802 
803 		int rd = LZ_decompress_read(dec, bufp, new_remaining);
804 		if (rd > 0) {
805 			new_remaining -= rd;
806 			bufp += rd;
807 			total_read += rd;
808 		}
809 
810 		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
811 			goto err;
812 		if (new_remaining == 0)
813 			break;
814 		if (old_remaining == 0 && rd == 0)
815 			break;
816 	}
817 
818 	LZ_decompress_close(dec);
819 	*n = total_read;
820 
821 	/* let's keep the nul-terminate tradition */
822 	*bufp = '\0';
823 
824 	return OKDATA;
825 err:
826 	err = LZ_decompress_errno(dec);
827 	LZ_decompress_close(dec);
828 	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
829 }
830 #endif
831 
832 
833 static int
834 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
835 {
836 	char *msg;
837 	va_list ap;
838 	int rv;
839 
840 	DPRINTF("Makeerror %s\n", fmt);
841 	free(*buf);
842 	va_start(ap, fmt);
843 	rv = vasprintf(&msg, fmt, ap);
844 	va_end(ap);
845 	if (rv < 0) {
846 		DPRINTF("Makeerror failed");
847 		*buf = NULL;
848 		*len = 0;
849 		return NODATA;
850 	}
851 	*buf = RCAST(unsigned char *, msg);
852 	*len = strlen(msg);
853 	return ERRDATA;
854 }
855 
856 static void
857 closefd(int *fd, size_t i)
858 {
859 	if (fd[i] == -1)
860 		return;
861 	(void) close(fd[i]);
862 	fd[i] = -1;
863 }
864 
865 static void
866 closep(int *fd)
867 {
868 	size_t i;
869 	for (i = 0; i < 2; i++)
870 		closefd(fd, i);
871 }
872 
873 static void
874 movedesc(void *v, int i, int fd)
875 {
876 	if (fd == i)
877 		return; /* "no dup was necessary" */
878 #ifdef HAVE_POSIX_SPAWNP
879 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
880 	posix_spawn_file_actions_adddup2(fa, fd, i);
881 	posix_spawn_file_actions_addclose(fa, fd);
882 #else
883 	if (dup2(fd, i) == -1) {
884 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
885 		exit(EXIT_FAILURE);
886 	}
887 	close(v ? fd : fd);
888 #endif
889 }
890 
891 static void
892 closedesc(void *v, int fd)
893 {
894 #ifdef HAVE_POSIX_SPAWNP
895 	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
896 	posix_spawn_file_actions_addclose(fa, fd);
897 #else
898 	close(v ? fd : fd);
899 #endif
900 }
901 
902 static void
903 handledesc(void *v, int fd, int fdp[3][2])
904 {
905 	if (fd != -1) {
906 		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
907 		movedesc(v, STDIN_FILENO, fd);
908 	} else {
909 		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
910 		if (fdp[STDIN_FILENO][1] > 2)
911 		    closedesc(v, fdp[STDIN_FILENO][1]);
912 	}
913 
914 	file_clear_closexec(STDIN_FILENO);
915 
916 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
917 	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
918 	if (fdp[STDOUT_FILENO][0] > 2)
919 		closedesc(v, fdp[STDOUT_FILENO][0]);
920 
921 	file_clear_closexec(STDOUT_FILENO);
922 
923 	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
924 	if (fdp[STDERR_FILENO][0] > 2)
925 		closedesc(v, fdp[STDERR_FILENO][0]);
926 
927 	file_clear_closexec(STDERR_FILENO);
928 }
929 
930 static pid_t
931 writechild(int fd, const void *old, size_t n)
932 {
933 	pid_t pid;
934 
935 	/*
936 	 * fork again, to avoid blocking because both
937 	 * pipes filled
938 	 */
939 	pid = fork();
940 	if (pid == -1) {
941 		DPRINTF("Fork failed (%s)\n", strerror(errno));
942 		return -1;
943 	}
944 	if (pid == 0) {
945 		/* child */
946 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
947 			DPRINTF("Write failed (%s)\n", strerror(errno));
948 			exit(EXIT_FAILURE);
949 		}
950 		exit(EXIT_SUCCESS);
951 	}
952 	/* parent */
953 	return pid;
954 }
955 
956 static ssize_t
957 filter_error(unsigned char *ubuf, ssize_t n)
958 {
959 	char *p;
960 	char *buf;
961 
962 	ubuf[n] = '\0';
963 	buf = RCAST(char *, ubuf);
964 	while (isspace(CAST(unsigned char, *buf)))
965 		buf++;
966 	DPRINTF("Filter error[[[%s]]]\n", buf);
967 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
968 		*p = '\0';
969 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
970 		*p = '\0';
971 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
972 		++p;
973 		while (isspace(CAST(unsigned char, *p)))
974 			p++;
975 		n = strlen(p);
976 		memmove(ubuf, p, CAST(size_t, n + 1));
977 	}
978 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
979 	if (islower(*ubuf))
980 		*ubuf = toupper(*ubuf);
981 	return n;
982 }
983 
984 file_private const char *
985 methodname(size_t method)
986 {
987 	switch (method) {
988 #ifdef BUILTIN_DECOMPRESS
989 	case METH_FROZEN:
990 	case METH_ZLIB:
991 		return "zlib";
992 #endif
993 #ifdef BUILTIN_BZLIB
994 	case METH_BZIP:
995 		return "bzlib";
996 #endif
997 #ifdef BUILTIN_XZLIB
998 	case METH_XZ:
999 	case METH_LZMA:
1000 		return "xzlib";
1001 #endif
1002 #ifdef BUILTIN_ZSTDLIB
1003 	case METH_ZSTD:
1004 		return "zstd";
1005 #endif
1006 #ifdef BUILTIN_LZLIB
1007 	case METH_LZIP:
1008 		return "lzlib";
1009 #endif
1010 	default:
1011 		return compr[method].argv[0];
1012 	}
1013 }
1014 
1015 file_private int (*
1016 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1017     size_t *, int)
1018 {
1019 	switch (method) {
1020 #ifdef BUILTIN_DECOMPRESS
1021 	case METH_FROZEN:
1022 		return uncompressgzipped;
1023 	case METH_ZLIB:
1024 		return uncompresszlib;
1025 #endif
1026 #ifdef BUILTIN_BZLIB
1027 	case METH_BZIP:
1028 		return uncompressbzlib;
1029 #endif
1030 #ifdef BUILTIN_XZLIB
1031 	case METH_XZ:
1032 	case METH_LZMA:
1033 		return uncompressxzlib;
1034 #endif
1035 #ifdef BUILTIN_ZSTDLIB
1036 	case METH_ZSTD:
1037 		return uncompresszstd;
1038 #endif
1039 #ifdef BUILTIN_LZLIB
1040 	case METH_LZIP:
1041 		return uncompresslzlib;
1042 #endif
1043 	default:
1044 		return NULL;
1045 	}
1046 }
1047 
1048 file_private int
1049 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1050     const unsigned char *old, unsigned char **newch, size_t* n)
1051 {
1052 	int fdp[3][2];
1053 	int status, rv, w;
1054 	pid_t pid;
1055 	pid_t writepid = -1;
1056 	size_t i;
1057 	ssize_t r, re;
1058 	char *const *args;
1059 #ifdef HAVE_POSIX_SPAWNP
1060 	posix_spawn_file_actions_t fa;
1061 #endif
1062 	int (*decompress)(const unsigned char *, unsigned char **,
1063 	    size_t, size_t *, int) = getdecompressor(method);
1064 
1065 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1066 	if (*newch == NULL)
1067 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1068 
1069 	if (decompress) {
1070 		if (nofork) {
1071 			return makeerror(newch, n,
1072 			    "Fork is required to uncompress, but disabled");
1073 		}
1074 		return (*decompress)(old, newch, bytes_max, n, 1);
1075 	}
1076 
1077 	(void)fflush(stdout);
1078 	(void)fflush(stderr);
1079 
1080 	for (i = 0; i < __arraycount(fdp); i++)
1081 		fdp[i][0] = fdp[i][1] = -1;
1082 
1083 	/*
1084 	 * There are multithreaded users who run magic_file()
1085 	 * from dozens of threads. If two parallel magic_file() calls
1086 	 * analyze two large compressed files, both will spawn
1087 	 * an uncompressing child here, which writes out uncompressed data.
1088 	 * We read some portion, then close the pipe, then waitpid() the child.
1089 	 * If uncompressed data is larger, child should get EPIPE and exit.
1090 	 * However, with *parallel* calls OTHER child may unintentionally
1091 	 * inherit pipe fds, thus keeping pipe open and making writes in
1092 	 * our child block instead of failing with EPIPE!
1093 	 * (For the bug to occur, two threads must mutually inherit their pipes,
1094 	 * and both must have large outputs. Thus it happens not that often).
1095 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1096 	 */
1097 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1098 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1099 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1100 		closep(fdp[STDIN_FILENO]);
1101 		closep(fdp[STDOUT_FILENO]);
1102 		return makeerror(newch, n, "Cannot create pipe, %s",
1103 		    strerror(errno));
1104 	}
1105 
1106 	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1107 #ifdef HAVE_POSIX_SPAWNP
1108 	posix_spawn_file_actions_init(&fa);
1109 
1110 	handledesc(&fa, fd, fdp);
1111 
1112 	DPRINTF("Executing %s\n", compr[method].argv[0]);
1113 	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1114 	    args, NULL);
1115 
1116 	posix_spawn_file_actions_destroy(&fa);
1117 
1118 	if (status == -1) {
1119 		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1120 		    compr[method].argv[0], strerror(errno));
1121 	}
1122 #else
1123 	/* For processes with large mapped virtual sizes, vfork
1124 	 * may be _much_ faster (10-100 times) than fork.
1125 	 */
1126 	pid = vfork();
1127 	if (pid == -1) {
1128 		return makeerror(newch, n, "Cannot vfork, %s",
1129 		    strerror(errno));
1130 	}
1131 	if (pid == 0) {
1132 		/* child */
1133 		/* Note: we are after vfork, do not modify memory
1134 		 * in a way which confuses parent. In particular,
1135 		 * do not modify fdp[i][j].
1136 		 */
1137 		handledesc(NULL, fd, fdp);
1138 		DPRINTF("Executing %s\n", compr[method].argv[0]);
1139 
1140 		(void)execvp(compr[method].argv[0], args);
1141 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1142 		    compr[method].argv[0], strerror(errno));
1143 		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1144 	}
1145 #endif
1146 	/* parent */
1147 	/* Close write sides of child stdout/err pipes */
1148 	for (i = 1; i < __arraycount(fdp); i++)
1149 		closefd(fdp[i], 1);
1150 	/* Write the buffer data to child stdin, if we don't have fd */
1151 	if (fd == -1) {
1152 		closefd(fdp[STDIN_FILENO], 0);
1153 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1154 		if (writepid == (pid_t)-1) {
1155 			rv = makeerror(newch, n, "Write to child failed, %s",
1156 			    strerror(errno));
1157 			DPRINTF("Write to child failed\n");
1158 			goto err;
1159 		}
1160 		closefd(fdp[STDIN_FILENO], 1);
1161 	}
1162 
1163 	rv = OKDATA;
1164 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1165 	DPRINTF("read got %zd\n", r);
1166 	if (r < 0) {
1167 		rv = ERRDATA;
1168 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1169 		        strerror(errno));
1170 		goto err;
1171 	}
1172 	if (CAST(size_t, r) == bytes_max) {
1173 		/*
1174 		 * close fd so that the child exits with sigpipe and ignore
1175 		 * errors, otherwise we risk the child blocking and never
1176 		 * exiting.
1177 		 */
1178 		DPRINTF("Closing stdout for bytes_max\n");
1179 		closefd(fdp[STDOUT_FILENO], 0);
1180 		goto ok;
1181 	}
1182 	if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1183 		DPRINTF("Got stuff from stderr %s\n", *newch);
1184 		rv = ERRDATA;
1185 		r = filter_error(*newch, r);
1186 		goto ok;
1187 	}
1188 	if  (re == 0)
1189 		goto ok;
1190 	rv = makeerror(newch, n, "Read stderr failed, %s",
1191 	    strerror(errno));
1192 	goto err;
1193 ok:
1194 	*n = r;
1195 	/* NUL terminate, as every buffer is handled here. */
1196 	(*newch)[*n] = '\0';
1197 err:
1198 	closefd(fdp[STDIN_FILENO], 1);
1199 	closefd(fdp[STDOUT_FILENO], 0);
1200 	closefd(fdp[STDERR_FILENO], 0);
1201 
1202 	w = waitpid(pid, &status, 0);
1203 wait_err:
1204 	if (w == -1) {
1205 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1206 		DPRINTF("Child wait return %#x\n", status);
1207 	} else if (!WIFEXITED(status)) {
1208 		DPRINTF("Child not exited (%#x)\n", status);
1209 	} else if (WEXITSTATUS(status) != 0) {
1210 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1211 	}
1212 	if (writepid > 0) {
1213 		/* _After_ we know decompressor has exited, our input writer
1214 		 * definitely will exit now (at worst, writing fails in it,
1215 		 * since output fd is closed now on the reading size).
1216 		 */
1217 		w = waitpid(writepid, &status, 0);
1218 		writepid = -1;
1219 		goto wait_err;
1220 	}
1221 
1222 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1223 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1224 
1225 	return rv;
1226 }
1227 #endif
1228