xref: /freebsd/contrib/file/src/compress.c (revision eea7c61590ae8968b3f1f609cf0bc8633222a94f)
1 /*
2  * Copyright (c) Ian F. Darwin 1986-1995.
3  * Software written by Ian F. Darwin and others;
4  * maintained 1995-present by Christos Zoulas and others.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice immediately at the beginning of the file, without modification,
11  *    this list of conditions, and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 /*
29  * compress routines:
30  *	zmagic() - returns 0 if not recognized, uncompresses and prints
31  *		   information if recognized
32  *	uncompress(method, old, n, newch) - uncompress old into new,
33  *					    using method, return sizeof new
34  */
35 #include "file.h"
36 
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $")
39 #endif
40 
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #include <string.h>
47 #include <errno.h>
48 #include <ctype.h>
49 #include <stdarg.h>
50 #include <signal.h>
51 #ifndef HAVE_SIG_T
52 typedef void (*sig_t)(int);
53 #endif /* HAVE_SIG_T */
54 #if !defined(__MINGW32__) && !defined(WIN32) && !defined(__MINGW64__)
55 #include <sys/ioctl.h>
56 #endif
57 #ifdef HAVE_SYS_WAIT_H
58 #include <sys/wait.h>
59 #endif
60 #if defined(HAVE_SYS_TIME_H)
61 #include <sys/time.h>
62 #endif
63 
64 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
65 #define BUILTIN_DECOMPRESS
66 #include <zlib.h>
67 #endif
68 
69 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
70 #define BUILTIN_BZLIB
71 #include <bzlib.h>
72 #endif
73 
74 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
75 #define BUILTIN_XZLIB
76 #include <lzma.h>
77 #endif
78 
79 #ifdef DEBUG
80 int tty = -1;
81 #define DPRINTF(...)	do { \
82 	if (tty == -1) \
83 		tty = open("/dev/tty", O_RDWR); \
84 	if (tty == -1) \
85 		abort(); \
86 	dprintf(tty, __VA_ARGS__); \
87 } while (/*CONSTCOND*/0)
88 #else
89 #define DPRINTF(...)
90 #endif
91 
92 #ifdef ZLIBSUPPORT
93 /*
94  * The following python code is not really used because ZLIBSUPPORT is only
95  * defined if we have a built-in zlib, and the built-in zlib handles that.
96  * That is not true for android where we have zlib.h and not -lz.
97  */
98 static const char zlibcode[] =
99     "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
100 
101 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
102 
103 static int
104 zlibcmp(const unsigned char *buf)
105 {
106 	unsigned short x = 1;
107 	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
108 
109 	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
110 		return 0;
111 	if (s[0] != 1)	/* endianness test */
112 		x = buf[0] | (buf[1] << 8);
113 	else
114 		x = buf[1] | (buf[0] << 8);
115 	if (x % 31)
116 		return 0;
117 	return 1;
118 }
119 #endif
120 
121 static int
122 lzmacmp(const unsigned char *buf)
123 {
124 	if (buf[0] != 0x5d || buf[1] || buf[2])
125 		return 0;
126 	if (buf[12] && buf[12] != 0xff)
127 		return 0;
128 	return 1;
129 }
130 
131 #define gzip_flags "-cd"
132 #define lrzip_flags "-do"
133 #define lzip_flags gzip_flags
134 
135 static const char *gzip_args[] = {
136 	"gzip", gzip_flags, NULL
137 };
138 static const char *uncompress_args[] = {
139 	"uncompress", "-c", NULL
140 };
141 static const char *bzip2_args[] = {
142 	"bzip2", "-cd", NULL
143 };
144 static const char *lzip_args[] = {
145 	"lzip", lzip_flags, NULL
146 };
147 static const char *xz_args[] = {
148 	"xz", "-cd", NULL
149 };
150 static const char *lrzip_args[] = {
151 	"lrzip", lrzip_flags, NULL
152 };
153 static const char *lz4_args[] = {
154 	"lz4", "-cd", NULL
155 };
156 static const char *zstd_args[] = {
157 	"zstd", "-cd", NULL
158 };
159 
160 #define	do_zlib		NULL
161 #define	do_bzlib	NULL
162 
163 private const struct {
164 	union {
165 		const char *magic;
166 		int (*func)(const unsigned char *);
167 	} u;
168 	int maglen;
169 	const char **argv;
170 	void *unused;
171 } compr[] = {
172 #define METH_FROZEN	2
173 #define METH_BZIP	7
174 #define METH_XZ		9
175 #define METH_LZMA	13
176 #define METH_ZLIB	14
177     { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
178     /* Uncompress can get stuck; so use gzip first if we have it
179      * Idea from Damien Clark, thanks! */
180     { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
181     { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
182     { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
183     { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
184     /* the standard pack utilities do not accept standard input */
185     { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
186     { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
187     /* ...only first file examined */
188     { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
189     { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
190     { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
191     { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
192     { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
193     { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
194     { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
195 #ifdef ZLIBSUPPORT
196     { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
197 #endif
198 };
199 
200 #define OKDATA 	0
201 #define NODATA	1
202 #define ERRDATA	2
203 
204 private ssize_t swrite(int, const void *, size_t);
205 #if HAVE_FORK
206 private size_t ncompr = __arraycount(compr);
207 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
208     unsigned char **, size_t *);
209 #ifdef BUILTIN_DECOMPRESS
210 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
211     size_t *, int);
212 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
213     size_t *);
214 #endif
215 #ifdef BUILTIN_BZLIB
216 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
217     size_t *);
218 #endif
219 #ifdef BUILTIN_XZLIB
220 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
221     size_t *);
222 #endif
223 
224 static int makeerror(unsigned char **, size_t *, const char *, ...)
225     __attribute__((__format__(__printf__, 3, 4)));
226 private const char *methodname(size_t);
227 
228 private int
229 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
230 {
231 	unsigned char *p;
232 	int mime = ms->flags & MAGIC_MIME;
233 
234 	if (!mime)
235 		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
236 
237 	for (p = buf; *p; p++)
238 		if (!isalnum(*p))
239 			*p = '-';
240 
241 	return file_printf(ms, "application/x-decompression-error-%s-%s",
242 	    methodname(i), buf);
243 }
244 
245 protected int
246 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
247 {
248 	unsigned char *newbuf = NULL;
249 	size_t i, nsz;
250 	char *rbuf;
251 	file_pushbuf_t *pb;
252 	int urv, prv, rv = 0;
253 	int mime = ms->flags & MAGIC_MIME;
254 	int fd = b->fd;
255 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
256 	size_t nbytes = b->flen;
257 	int sa_saved = 0;
258 	struct sigaction sig_act;
259 
260 	if ((ms->flags & MAGIC_COMPRESS) == 0)
261 		return 0;
262 
263 	for (i = 0; i < ncompr; i++) {
264 		int zm;
265 		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
266 			continue;
267 		if (compr[i].maglen < 0) {
268 			zm = (*compr[i].u.func)(buf);
269 		} else {
270 			zm = memcmp(buf, compr[i].u.magic,
271 			    CAST(size_t, compr[i].maglen)) == 0;
272 		}
273 
274 		if (!zm)
275 			continue;
276 
277 		/* Prevent SIGPIPE death if child dies unexpectedly */
278 		if (!sa_saved) {
279 			//We can use sig_act for both new and old, but
280 			struct sigaction new_act;
281 			memset(&new_act, 0, sizeof(new_act));
282 			new_act.sa_handler = SIG_IGN;
283 			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
284 		}
285 
286 		nsz = nbytes;
287 		urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
288 		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
289 		    (char *)newbuf, nsz);
290 		switch (urv) {
291 		case OKDATA:
292 		case ERRDATA:
293 			ms->flags &= ~MAGIC_COMPRESS;
294 			if (urv == ERRDATA)
295 				prv = format_decompression_error(ms, i, newbuf);
296 			else
297 				prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
298 			if (prv == -1)
299 				goto error;
300 			rv = 1;
301 			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
302 				goto out;
303 			if (mime != MAGIC_MIME && mime != 0)
304 				goto out;
305 			if ((file_printf(ms,
306 			    mime ? " compressed-encoding=" : " (")) == -1)
307 				goto error;
308 			if ((pb = file_push_buffer(ms)) == NULL)
309 				goto error;
310 			/*
311 			 * XXX: If file_buffer fails here, we overwrite
312 			 * the compressed text. FIXME.
313 			 */
314 			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
315 				if (file_pop_buffer(ms, pb) != NULL)
316 					abort();
317 				goto error;
318 			}
319 			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
320 				if (file_printf(ms, "%s", rbuf) == -1) {
321 					free(rbuf);
322 					goto error;
323 				}
324 				free(rbuf);
325 			}
326 			if (!mime && file_printf(ms, ")") == -1)
327 				goto error;
328 			/*FALLTHROUGH*/
329 		case NODATA:
330 			break;
331 		default:
332 			abort();
333 			/*NOTREACHED*/
334 		error:
335 			rv = -1;
336 			break;
337 		}
338 	}
339 out:
340 	DPRINTF("rv = %d\n", rv);
341 
342 	if (sa_saved && sig_act.sa_handler != SIG_IGN)
343 		(void)sigaction(SIGPIPE, &sig_act, NULL);
344 
345 	free(newbuf);
346 	ms->flags |= MAGIC_COMPRESS;
347 	DPRINTF("Zmagic returns %d\n", rv);
348 	return rv;
349 }
350 #endif
351 /*
352  * `safe' write for sockets and pipes.
353  */
354 private ssize_t
355 swrite(int fd, const void *buf, size_t n)
356 {
357 	ssize_t rv;
358 	size_t rn = n;
359 
360 	do
361 		switch (rv = write(fd, buf, n)) {
362 		case -1:
363 			if (errno == EINTR)
364 				continue;
365 			return -1;
366 		default:
367 			n -= rv;
368 			buf = CAST(const char *, buf) + rv;
369 			break;
370 		}
371 	while (n > 0);
372 	return rn;
373 }
374 
375 
376 /*
377  * `safe' read for sockets and pipes.
378  */
379 protected ssize_t
380 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
381 {
382 	ssize_t rv;
383 #ifdef FIONREAD
384 	int t = 0;
385 #endif
386 	size_t rn = n;
387 
388 	if (fd == STDIN_FILENO)
389 		goto nocheck;
390 
391 #ifdef FIONREAD
392 	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
393 #ifdef FD_ZERO
394 		ssize_t cnt;
395 		for (cnt = 0;; cnt++) {
396 			fd_set check;
397 			struct timeval tout = {0, 100 * 1000};
398 			int selrv;
399 
400 			FD_ZERO(&check);
401 			FD_SET(fd, &check);
402 
403 			/*
404 			 * Avoid soft deadlock: do not read if there
405 			 * is nothing to read from sockets and pipes.
406 			 */
407 			selrv = select(fd + 1, &check, NULL, NULL, &tout);
408 			if (selrv == -1) {
409 				if (errno == EINTR || errno == EAGAIN)
410 					continue;
411 			} else if (selrv == 0 && cnt >= 5) {
412 				return 0;
413 			} else
414 				break;
415 		}
416 #endif
417 		(void)ioctl(fd, FIONREAD, &t);
418 	}
419 
420 	if (t > 0 && CAST(size_t, t) < n) {
421 		n = t;
422 		rn = n;
423 	}
424 #endif
425 
426 nocheck:
427 	do
428 		switch ((rv = read(fd, buf, n))) {
429 		case -1:
430 			if (errno == EINTR)
431 				continue;
432 			return -1;
433 		case 0:
434 			return rn - n;
435 		default:
436 			n -= rv;
437 			buf = CAST(char *, CCAST(void *, buf)) + rv;
438 			break;
439 		}
440 	while (n > 0);
441 	return rn;
442 }
443 
444 protected int
445 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
446     size_t nbytes)
447 {
448 	char buf[4096];
449 	ssize_t r;
450 	int tfd;
451 
452 	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
453 #ifndef HAVE_MKSTEMP
454 	{
455 		char *ptr = mktemp(buf);
456 		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
457 		r = errno;
458 		(void)unlink(ptr);
459 		errno = r;
460 	}
461 #else
462 	{
463 		int te;
464 		mode_t ou = umask(0);
465 		tfd = mkstemp(buf);
466 		(void)umask(ou);
467 		te = errno;
468 		(void)unlink(buf);
469 		errno = te;
470 	}
471 #endif
472 	if (tfd == -1) {
473 		file_error(ms, errno,
474 		    "cannot create temporary file for pipe copy");
475 		return -1;
476 	}
477 
478 	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
479 		r = 1;
480 	else {
481 		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
482 			if (swrite(tfd, buf, CAST(size_t, r)) != r)
483 				break;
484 	}
485 
486 	switch (r) {
487 	case -1:
488 		file_error(ms, errno, "error copying from pipe to temp file");
489 		return -1;
490 	case 0:
491 		break;
492 	default:
493 		file_error(ms, errno, "error while writing to temp file");
494 		return -1;
495 	}
496 
497 	/*
498 	 * We duplicate the file descriptor, because fclose on a
499 	 * tmpfile will delete the file, but any open descriptors
500 	 * can still access the phantom inode.
501 	 */
502 	if ((fd = dup2(tfd, fd)) == -1) {
503 		file_error(ms, errno, "could not dup descriptor for temp file");
504 		return -1;
505 	}
506 	(void)close(tfd);
507 	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
508 		file_badseek(ms);
509 		return -1;
510 	}
511 	return fd;
512 }
513 #if HAVE_FORK
514 #ifdef BUILTIN_DECOMPRESS
515 
516 #define FHCRC		(1 << 1)
517 #define FEXTRA		(1 << 2)
518 #define FNAME		(1 << 3)
519 #define FCOMMENT	(1 << 4)
520 
521 
522 private int
523 uncompressgzipped(const unsigned char *old, unsigned char **newch,
524     size_t bytes_max, size_t *n)
525 {
526 	unsigned char flg = old[3];
527 	size_t data_start = 10;
528 
529 	if (flg & FEXTRA) {
530 		if (data_start + 1 >= *n)
531 			goto err;
532 		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
533 	}
534 	if (flg & FNAME) {
535 		while(data_start < *n && old[data_start])
536 			data_start++;
537 		data_start++;
538 	}
539 	if (flg & FCOMMENT) {
540 		while(data_start < *n && old[data_start])
541 			data_start++;
542 		data_start++;
543 	}
544 	if (flg & FHCRC)
545 		data_start += 2;
546 
547 	if (data_start >= *n)
548 		goto err;
549 
550 	*n -= data_start;
551 	old += data_start;
552 	return uncompresszlib(old, newch, bytes_max, n, 0);
553 err:
554 	return makeerror(newch, n, "File too short");
555 }
556 
557 private int
558 uncompresszlib(const unsigned char *old, unsigned char **newch,
559     size_t bytes_max, size_t *n, int zlib)
560 {
561 	int rc;
562 	z_stream z;
563 
564 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
565 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
566 
567 	z.next_in = CCAST(Bytef *, old);
568 	z.avail_in = CAST(uint32_t, *n);
569 	z.next_out = *newch;
570 	z.avail_out = CAST(unsigned int, bytes_max);
571 	z.zalloc = Z_NULL;
572 	z.zfree = Z_NULL;
573 	z.opaque = Z_NULL;
574 
575 	/* LINTED bug in header macro */
576 	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
577 	if (rc != Z_OK)
578 		goto err;
579 
580 	rc = inflate(&z, Z_SYNC_FLUSH);
581 	if (rc != Z_OK && rc != Z_STREAM_END)
582 		goto err;
583 
584 	*n = CAST(size_t, z.total_out);
585 	rc = inflateEnd(&z);
586 	if (rc != Z_OK)
587 		goto err;
588 
589 	/* let's keep the nul-terminate tradition */
590 	(*newch)[*n] = '\0';
591 
592 	return OKDATA;
593 err:
594 	strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
595 	*n = strlen(RCAST(char *, *newch));
596 	return ERRDATA;
597 }
598 #endif
599 
600 #ifdef BUILTIN_BZLIB
601 private int
602 uncompressbzlib(const unsigned char *old, unsigned char **newch,
603     size_t bytes_max, size_t *n)
604 {
605 	int rc;
606 	bz_stream bz;
607 
608 	memset(&bz, 0, sizeof(bz));
609 	rc = BZ2_bzDecompressInit(&bz, 0, 0);
610 	if (rc != BZ_OK)
611 		goto err;
612 
613 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
614 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
615 
616 	bz.next_in = CCAST(char *, RCAST(const char *, old));
617 	bz.avail_in = CAST(uint32_t, *n);
618 	bz.next_out = RCAST(char *, *newch);
619 	bz.avail_out = CAST(unsigned int, bytes_max);
620 
621 	rc = BZ2_bzDecompress(&bz);
622 	if (rc != BZ_OK && rc != BZ_STREAM_END)
623 		goto err;
624 
625 	/* Assume byte_max is within 32bit */
626 	/* assert(bz.total_out_hi32 == 0); */
627 	*n = CAST(size_t, bz.total_out_lo32);
628 	rc = BZ2_bzDecompressEnd(&bz);
629 	if (rc != BZ_OK)
630 		goto err;
631 
632 	/* let's keep the nul-terminate tradition */
633 	(*newch)[*n] = '\0';
634 
635 	return OKDATA;
636 err:
637 	snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
638 	*n = strlen(RCAST(char *, *newch));
639 	return ERRDATA;
640 }
641 #endif
642 
643 #ifdef BUILTIN_XZLIB
644 private int
645 uncompressxzlib(const unsigned char *old, unsigned char **newch,
646     size_t bytes_max, size_t *n)
647 {
648 	int rc;
649 	lzma_stream xz;
650 
651 	memset(&xz, 0, sizeof(xz));
652 	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
653 	if (rc != LZMA_OK)
654 		goto err;
655 
656 	if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
657 		return makeerror(newch, n, "No buffer, %s", strerror(errno));
658 
659 	xz.next_in = CCAST(const uint8_t *, old);
660 	xz.avail_in = CAST(uint32_t, *n);
661 	xz.next_out = RCAST(uint8_t *, *newch);
662 	xz.avail_out = CAST(unsigned int, bytes_max);
663 
664 	rc = lzma_code(&xz, LZMA_RUN);
665 	if (rc != LZMA_OK && rc != LZMA_STREAM_END)
666 		goto err;
667 
668 	*n = CAST(size_t, xz.total_out);
669 
670 	lzma_end(&xz);
671 
672 	/* let's keep the nul-terminate tradition */
673 	(*newch)[*n] = '\0';
674 
675 	return OKDATA;
676 err:
677 	snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
678 	*n = strlen(RCAST(char *, *newch));
679 	return ERRDATA;
680 }
681 #endif
682 
683 
684 static int
685 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
686 {
687 	char *msg;
688 	va_list ap;
689 	int rv;
690 
691 	va_start(ap, fmt);
692 	rv = vasprintf(&msg, fmt, ap);
693 	va_end(ap);
694 	if (rv < 0) {
695 		*buf = NULL;
696 		*len = 0;
697 		return NODATA;
698 	}
699 	*buf = RCAST(unsigned char *, msg);
700 	*len = strlen(msg);
701 	return ERRDATA;
702 }
703 
704 static void
705 closefd(int *fd, size_t i)
706 {
707 	if (fd[i] == -1)
708 		return;
709 	(void) close(fd[i]);
710 	fd[i] = -1;
711 }
712 
713 static void
714 closep(int *fd)
715 {
716 	size_t i;
717 	for (i = 0; i < 2; i++)
718 		closefd(fd, i);
719 }
720 
721 static int
722 copydesc(int i, int fd)
723 {
724 	if (fd == i)
725 		return 0; /* "no dup was necessary" */
726 	if (dup2(fd, i) == -1) {
727 		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
728 		exit(1);
729 	}
730 	return 1;
731 }
732 
733 static pid_t
734 writechild(int fd, const void *old, size_t n)
735 {
736 	pid_t pid;
737 
738 	/*
739 	 * fork again, to avoid blocking because both
740 	 * pipes filled
741 	 */
742 	pid = fork();
743 	if (pid == -1) {
744 		DPRINTF("Fork failed (%s)\n", strerror(errno));
745 		exit(1);
746 	}
747 	if (pid == 0) {
748 		/* child */
749 		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
750 			DPRINTF("Write failed (%s)\n", strerror(errno));
751 			exit(1);
752 		}
753 		exit(0);
754 	}
755 	/* parent */
756 	return pid;
757 }
758 
759 static ssize_t
760 filter_error(unsigned char *ubuf, ssize_t n)
761 {
762 	char *p;
763 	char *buf;
764 
765 	ubuf[n] = '\0';
766 	buf = RCAST(char *, ubuf);
767 	while (isspace(CAST(unsigned char, *buf)))
768 		buf++;
769 	DPRINTF("Filter error[[[%s]]]\n", buf);
770 	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
771 		*p = '\0';
772 	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
773 		*p = '\0';
774 	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
775 		++p;
776 		while (isspace(CAST(unsigned char, *p)))
777 			p++;
778 		n = strlen(p);
779 		memmove(ubuf, p, CAST(size_t, n + 1));
780 	}
781 	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
782 	if (islower(*ubuf))
783 		*ubuf = toupper(*ubuf);
784 	return n;
785 }
786 
787 private const char *
788 methodname(size_t method)
789 {
790 	switch (method) {
791 #ifdef BUILTIN_DECOMPRESS
792 	case METH_FROZEN:
793 	case METH_ZLIB:
794 		return "zlib";
795 #endif
796 #ifdef BUILTIN_BZLIB
797 	case METH_BZIP:
798 		return "bzlib";
799 #endif
800 #ifdef BUILTIN_XZLIB
801 	case METH_XZ:
802 	case METH_LZMA:
803 		return "xzlib";
804 #endif
805 	default:
806 		return compr[method].argv[0];
807 	}
808 }
809 
810 private int
811 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
812     unsigned char **newch, size_t* n)
813 {
814 	int fdp[3][2];
815 	int status, rv, w;
816 	pid_t pid;
817 	pid_t writepid = -1;
818 	size_t i;
819 	ssize_t r;
820 
821 	switch (method) {
822 #ifdef BUILTIN_DECOMPRESS
823 	case METH_FROZEN:
824 		return uncompressgzipped(old, newch, bytes_max, n);
825 	case METH_ZLIB:
826 		return uncompresszlib(old, newch, bytes_max, n, 1);
827 #endif
828 #ifdef BUILTIN_BZLIB
829 	case METH_BZIP:
830 		return uncompressbzlib(old, newch, bytes_max, n);
831 #endif
832 #ifdef BUILTIN_XZLIB
833 	case METH_XZ:
834 	case METH_LZMA:
835 		return uncompressxzlib(old, newch, bytes_max, n);
836 #endif
837 	default:
838 		break;
839 	}
840 
841 	(void)fflush(stdout);
842 	(void)fflush(stderr);
843 
844 	for (i = 0; i < __arraycount(fdp); i++)
845 		fdp[i][0] = fdp[i][1] = -1;
846 
847 	/*
848 	 * There are multithreaded users who run magic_file()
849 	 * from dozens of threads. If two parallel magic_file() calls
850 	 * analyze two large compressed files, both will spawn
851 	 * an uncompressing child here, which writes out uncompressed data.
852 	 * We read some portion, then close the pipe, then waitpid() the child.
853 	 * If uncompressed data is larger, child shound get EPIPE and exit.
854 	 * However, with *parallel* calls OTHER child may unintentionally
855 	 * inherit pipe fds, thus keeping pipe open and making writes in
856 	 * our child block instead of failing with EPIPE!
857 	 * (For the bug to occur, two threads must mutually inherit their pipes,
858 	 * and both must have large outputs. Thus it happens not that often).
859 	 * To avoid this, be sure to create pipes with O_CLOEXEC.
860 	 */
861 	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
862 	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
863 	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
864 		closep(fdp[STDIN_FILENO]);
865 		closep(fdp[STDOUT_FILENO]);
866 		return makeerror(newch, n, "Cannot create pipe, %s",
867 		    strerror(errno));
868 	}
869 
870 	/* For processes with large mapped virtual sizes, vfork
871 	 * may be _much_ faster (10-100 times) than fork.
872 	 */
873 	pid = vfork();
874 	if (pid == -1) {
875 		return makeerror(newch, n, "Cannot vfork, %s",
876 		    strerror(errno));
877 	}
878 	if (pid == 0) {
879 		/* child */
880 		/* Note: we are after vfork, do not modify memory
881 		 * in a way which confuses parent. In particular,
882 		 * do not modify fdp[i][j].
883 		 */
884 		if (fd != -1) {
885 			(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
886 			if (copydesc(STDIN_FILENO, fd))
887 				(void) close(fd);
888 		} else {
889 			if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
890 				(void) close(fdp[STDIN_FILENO][0]);
891 			if (fdp[STDIN_FILENO][1] > 2)
892 				(void) close(fdp[STDIN_FILENO][1]);
893 		}
894 		file_clear_closexec(STDIN_FILENO);
895 
896 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
897 		if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
898 			(void) close(fdp[STDOUT_FILENO][1]);
899 		if (fdp[STDOUT_FILENO][0] > 2)
900 			(void) close(fdp[STDOUT_FILENO][0]);
901 		file_clear_closexec(STDOUT_FILENO);
902 
903 		if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
904 			(void) close(fdp[STDERR_FILENO][1]);
905 		if (fdp[STDERR_FILENO][0] > 2)
906 			(void) close(fdp[STDERR_FILENO][0]);
907 		file_clear_closexec(STDERR_FILENO);
908 
909 		(void)execvp(compr[method].argv[0],
910 		    RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
911 		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
912 		    compr[method].argv[0], strerror(errno));
913 		_exit(1); /* _exit(), not exit(), because of vfork */
914 	}
915 	/* parent */
916 	/* Close write sides of child stdout/err pipes */
917 	for (i = 1; i < __arraycount(fdp); i++)
918 		closefd(fdp[i], 1);
919 	/* Write the buffer data to child stdin, if we don't have fd */
920 	if (fd == -1) {
921 		closefd(fdp[STDIN_FILENO], 0);
922 		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
923 		closefd(fdp[STDIN_FILENO], 1);
924 	}
925 
926 	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
927 	if (*newch == NULL) {
928 		rv = makeerror(newch, n, "No buffer, %s",
929 		    strerror(errno));
930 		goto err;
931 	}
932 	rv = OKDATA;
933 	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
934 	if (r <= 0) {
935 		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
936 		    r != -1 ? strerror(errno) : "no data");
937 
938 		rv = ERRDATA;
939 		if (r == 0 &&
940 		    (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
941 		{
942 			r = filter_error(*newch, r);
943 			goto ok;
944 		}
945 		free(*newch);
946 		if  (r == 0)
947 			rv = makeerror(newch, n, "Read failed, %s",
948 			    strerror(errno));
949 		else
950 			rv = makeerror(newch, n, "No data");
951 		goto err;
952 	}
953 ok:
954 	*n = r;
955 	/* NUL terminate, as every buffer is handled here. */
956 	(*newch)[*n] = '\0';
957 err:
958 	closefd(fdp[STDIN_FILENO], 1);
959 	closefd(fdp[STDOUT_FILENO], 0);
960 	closefd(fdp[STDERR_FILENO], 0);
961 
962 	w = waitpid(pid, &status, 0);
963 wait_err:
964 	if (w == -1) {
965 		free(*newch);
966 		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
967 		DPRINTF("Child wait return %#x\n", status);
968 	} else if (!WIFEXITED(status)) {
969 		DPRINTF("Child not exited (%#x)\n", status);
970 	} else if (WEXITSTATUS(status) != 0) {
971 		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
972 	}
973 	if (writepid > 0) {
974 		/* _After_ we know decompressor has exited, our input writer
975 		 * definitely will exit now (at worst, writing fails in it,
976 		 * since output fd is closed now on the reading size).
977 		 */
978 		w = waitpid(writepid, &status, 0);
979 		writepid = -1;
980 		goto wait_err;
981 	}
982 
983 	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
984 	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
985 
986 	return rv;
987 }
988 #endif
989