1 /*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*
29 * compress routines:
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
34 */
35 #include "file.h"
36
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
39 #endif
40
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <string.h>
50 #include <errno.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <signal.h>
54 #ifndef HAVE_SIG_T
55 typedef void (*sig_t)(int);
56 #endif /* HAVE_SIG_T */
57 #ifdef HAVE_SYS_IOCTL_H
58 #include <sys/ioctl.h>
59 #endif
60 #ifdef HAVE_SYS_WAIT_H
61 #include <sys/wait.h>
62 #endif
63 #if defined(HAVE_SYS_TIME_H)
64 #include <sys/time.h>
65 #endif
66
67 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
68 #define BUILTIN_DECOMPRESS
69 #include <zlib.h>
70 #endif
71
72 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
73 #define BUILTIN_BZLIB
74 #include <bzlib.h>
75 #endif
76
77 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
78 #define BUILTIN_XZLIB
79 #include <lzma.h>
80 #endif
81
82 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
83 #define BUILTIN_ZSTDLIB
84 #include <zstd.h>
85 #include <zstd_errors.h>
86 #endif
87
88 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
89 #define BUILTIN_LZLIB
90 #include <lzlib.h>
91 #endif
92
93 #ifdef DEBUG
94 int tty = -1;
95 #define DPRINTF(...) do { \
96 if (tty == -1) \
97 tty = open("/dev/tty", O_RDWR); \
98 if (tty == -1) \
99 abort(); \
100 dprintf(tty, __VA_ARGS__); \
101 } while (/*CONSTCOND*/0)
102 #else
103 #define DPRINTF(...)
104 #endif
105
106 #ifdef ZLIBSUPPORT
107 /*
108 * The following python code is not really used because ZLIBSUPPORT is only
109 * defined if we have a built-in zlib, and the built-in zlib handles that.
110 * That is not true for android where we have zlib.h and not -lz.
111 */
112 static const char zlibcode[] =
113 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
114
115 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
116
117 static int
zlibcmp(const unsigned char * buf)118 zlibcmp(const unsigned char *buf)
119 {
120 unsigned short x = 1;
121 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
122
123 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
124 return 0;
125 if (s[0] != 1) /* endianness test */
126 x = buf[0] | (buf[1] << 8);
127 else
128 x = buf[1] | (buf[0] << 8);
129 if (x % 31)
130 return 0;
131 return 1;
132 }
133 #endif
134
135 static int
lzmacmp(const unsigned char * buf)136 lzmacmp(const unsigned char *buf)
137 {
138 if (buf[0] != 0x5d || buf[1] || buf[2])
139 return 0;
140 if (buf[12] && buf[12] != 0xff)
141 return 0;
142 return 1;
143 }
144
145 #define gzip_flags "-cd"
146 #define lzip_flags gzip_flags
147
148 static const char *gzip_args[] = {
149 "gzip", gzip_flags, NULL
150 };
151 static const char *uncompress_args[] = {
152 "uncompress", "-c", NULL
153 };
154 static const char *bzip2_args[] = {
155 "bzip2", "-cd", NULL
156 };
157 static const char *lzip_args[] = {
158 "lzip", lzip_flags, NULL
159 };
160 static const char *xz_args[] = {
161 "xz", "-cd", NULL
162 };
163 static const char *lrzip_args[] = {
164 "lrzip", "-qdf", "-", NULL
165 };
166 static const char *lz4_args[] = {
167 "lz4", "-cd", NULL
168 };
169 static const char *zstd_args[] = {
170 "zstd", "-cd", NULL
171 };
172
173 #define do_zlib NULL
174 #define do_bzlib NULL
175
176 file_private const struct {
177 union {
178 const char *magic;
179 int (*func)(const unsigned char *);
180 } u;
181 int maglen;
182 const char **argv;
183 void *unused;
184 } compr[] = {
185 #define METH_FROZEN 2
186 #define METH_BZIP 7
187 #define METH_XZ 9
188 #define METH_LZIP 8
189 #define METH_ZSTD 12
190 #define METH_LZMA 13
191 #define METH_ZLIB 14
192 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
193 /* Uncompress can get stuck; so use gzip first if we have it
194 * Idea from Damien Clark, thanks! */
195 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
196 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
197 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
198 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
199 /* the standard pack utilities do not accept standard input */
200 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
201 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
202 /* ...only first file examined */
203 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
204 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
205 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
206 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
207 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
208 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
209 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
210 #ifdef ZLIBSUPPORT
211 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
212 #endif
213 };
214
215 #define OKDATA 0
216 #define NODATA 1
217 #define ERRDATA 2
218
219 file_private ssize_t swrite(int, const void *, size_t);
220 #if HAVE_FORK
221 file_private size_t ncompr = __arraycount(compr);
222 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
223 unsigned char **, size_t *);
224 #ifdef BUILTIN_DECOMPRESS
225 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
226 size_t *, int);
227 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
228 size_t *, int);
229 #endif
230 #ifdef BUILTIN_BZLIB
231 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
232 size_t *, int);
233 #endif
234 #ifdef BUILTIN_XZLIB
235 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
236 size_t *, int);
237 #endif
238 #ifdef BUILTIN_ZSTDLIB
239 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
240 size_t *, int);
241 #endif
242 #ifdef BUILTIN_LZLIB
243 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
244 size_t *, int);
245 #endif
246
247 static int makeerror(unsigned char **, size_t *, const char *, ...)
248 __attribute__((__format__(__printf__, 3, 4)));
249 file_private const char *methodname(size_t);
250
251 file_private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)252 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
253 {
254 unsigned char *p;
255 int mime = ms->flags & MAGIC_MIME;
256
257 if (!mime)
258 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
259
260 for (p = buf; *p; p++)
261 if (!isalnum(*p))
262 *p = '-';
263
264 return file_printf(ms, "application/x-decompression-error-%s-%s",
265 methodname(i), buf);
266 }
267
268 file_protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)269 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
270 {
271 unsigned char *newbuf = NULL;
272 size_t i, nsz;
273 char *rbuf;
274 file_pushbuf_t *pb;
275 int urv, prv, rv = 0;
276 int mime = ms->flags & MAGIC_MIME;
277 int fd = b->fd;
278 const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
279 size_t nbytes = b->flen;
280 int sa_saved = 0;
281 struct sigaction sig_act;
282
283 if ((ms->flags & MAGIC_COMPRESS) == 0)
284 return 0;
285
286 for (i = 0; i < ncompr; i++) {
287 int zm;
288 if (nbytes < CAST(size_t, abs(compr[i].maglen)))
289 continue;
290 if (compr[i].maglen < 0) {
291 zm = (*compr[i].u.func)(buf);
292 } else {
293 zm = memcmp(buf, compr[i].u.magic,
294 CAST(size_t, compr[i].maglen)) == 0;
295 }
296
297 if (!zm)
298 continue;
299
300 /* Prevent SIGPIPE death if child dies unexpectedly */
301 if (!sa_saved) {
302 //We can use sig_act for both new and old, but
303 struct sigaction new_act;
304 memset(&new_act, 0, sizeof(new_act));
305 new_act.sa_handler = SIG_IGN;
306 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
307 }
308
309 nsz = nbytes;
310 free(newbuf);
311 urv = uncompressbuf(fd, ms->bytes_max, i,
312 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
313 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
314 (char *)newbuf, nsz);
315 switch (urv) {
316 case OKDATA:
317 case ERRDATA:
318 ms->flags &= ~MAGIC_COMPRESS;
319 if (urv == ERRDATA)
320 prv = format_decompression_error(ms, i, newbuf);
321 else
322 prv = file_buffer(ms, -1, NULL, name, newbuf,
323 nsz);
324 if (prv == -1)
325 goto error;
326 rv = 1;
327 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
328 goto out;
329 if (mime != MAGIC_MIME && mime != 0)
330 goto out;
331 if ((file_printf(ms,
332 mime ? " compressed-encoding=" : " (")) == -1)
333 goto error;
334 if ((pb = file_push_buffer(ms)) == NULL)
335 goto error;
336 /*
337 * XXX: If file_buffer fails here, we overwrite
338 * the compressed text. FIXME.
339 */
340 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
341 {
342 if (file_pop_buffer(ms, pb) != NULL)
343 abort();
344 goto error;
345 }
346 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
347 if (file_printf(ms, "%s", rbuf) == -1) {
348 free(rbuf);
349 goto error;
350 }
351 free(rbuf);
352 }
353 if (!mime && file_printf(ms, ")") == -1)
354 goto error;
355 /*FALLTHROUGH*/
356 case NODATA:
357 break;
358 default:
359 abort();
360 /*NOTREACHED*/
361 error:
362 rv = -1;
363 break;
364 }
365 }
366 out:
367 DPRINTF("rv = %d\n", rv);
368
369 if (sa_saved && sig_act.sa_handler != SIG_IGN)
370 (void)sigaction(SIGPIPE, &sig_act, NULL);
371
372 free(newbuf);
373 ms->flags |= MAGIC_COMPRESS;
374 DPRINTF("Zmagic returns %d\n", rv);
375 return rv;
376 }
377 #endif
378 /*
379 * `safe' write for sockets and pipes.
380 */
381 file_private ssize_t
swrite(int fd,const void * buf,size_t n)382 swrite(int fd, const void *buf, size_t n)
383 {
384 ssize_t rv;
385 size_t rn = n;
386
387 do
388 switch (rv = write(fd, buf, n)) {
389 case -1:
390 if (errno == EINTR)
391 continue;
392 return -1;
393 default:
394 n -= rv;
395 buf = CAST(const char *, buf) + rv;
396 break;
397 }
398 while (n > 0);
399 return rn;
400 }
401
402
403 /*
404 * `safe' read for sockets and pipes.
405 */
406 file_protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)407 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
408 {
409 ssize_t rv;
410 #if defined(FIONREAD) && !defined(__MINGW32__)
411 int t = 0;
412 #endif
413 size_t rn = n;
414
415 if (fd == STDIN_FILENO)
416 goto nocheck;
417
418 #if defined(FIONREAD) && !defined(__MINGW32__)
419 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
420 #ifdef FD_ZERO
421 ssize_t cnt;
422 for (cnt = 0;; cnt++) {
423 fd_set check;
424 struct timeval tout = {0, 100 * 1000};
425 int selrv;
426
427 FD_ZERO(&check);
428 FD_SET(fd, &check);
429
430 /*
431 * Avoid soft deadlock: do not read if there
432 * is nothing to read from sockets and pipes.
433 */
434 selrv = select(fd + 1, &check, NULL, NULL, &tout);
435 if (selrv == -1) {
436 if (errno == EINTR || errno == EAGAIN)
437 continue;
438 } else if (selrv == 0 && cnt >= 5) {
439 return 0;
440 } else
441 break;
442 }
443 #endif
444 (void)ioctl(fd, FIONREAD, &t);
445 }
446
447 if (t > 0 && CAST(size_t, t) < n) {
448 n = t;
449 rn = n;
450 }
451 #endif
452
453 nocheck:
454 do
455 switch ((rv = read(fd, buf, n))) {
456 case -1:
457 if (errno == EINTR)
458 continue;
459 return -1;
460 case 0:
461 return rn - n;
462 default:
463 n -= rv;
464 buf = CAST(char *, CCAST(void *, buf)) + rv;
465 break;
466 }
467 while (n > 0);
468 return rn;
469 }
470
471 file_protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)472 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
473 size_t nbytes)
474 {
475 char buf[4096];
476 ssize_t r;
477 int tfd;
478
479 #ifdef WIN32
480 const char *t;
481 buf[0] = '\0';
482 if ((t = getenv("TEMP")) != NULL)
483 (void)strlcpy(buf, t, sizeof(buf));
484 else if ((t = getenv("TMP")) != NULL)
485 (void)strlcpy(buf, t, sizeof(buf));
486 else if ((t = getenv("TMPDIR")) != NULL)
487 (void)strlcpy(buf, t, sizeof(buf));
488 if (buf[0] != '\0')
489 (void)strlcat(buf, "/", sizeof(buf));
490 (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
491 #else
492 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
493 #endif
494 #ifndef HAVE_MKSTEMP
495 {
496 char *ptr = mktemp(buf);
497 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
498 r = errno;
499 (void)unlink(ptr);
500 errno = r;
501 }
502 #else
503 {
504 int te;
505 mode_t ou = umask(0);
506 tfd = mkstemp(buf);
507 (void)umask(ou);
508 te = errno;
509 (void)unlink(buf);
510 errno = te;
511 }
512 #endif
513 if (tfd == -1) {
514 file_error(ms, errno,
515 "cannot create temporary file for pipe copy");
516 return -1;
517 }
518
519 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
520 r = 1;
521 else {
522 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
523 if (swrite(tfd, buf, CAST(size_t, r)) != r)
524 break;
525 }
526
527 switch (r) {
528 case -1:
529 file_error(ms, errno, "error copying from pipe to temp file");
530 return -1;
531 case 0:
532 break;
533 default:
534 file_error(ms, errno, "error while writing to temp file");
535 return -1;
536 }
537
538 /*
539 * We duplicate the file descriptor, because fclose on a
540 * tmpfile will delete the file, but any open descriptors
541 * can still access the phantom inode.
542 */
543 if ((fd = dup2(tfd, fd)) == -1) {
544 file_error(ms, errno, "could not dup descriptor for temp file");
545 return -1;
546 }
547 (void)close(tfd);
548 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
549 file_badseek(ms);
550 return -1;
551 }
552 return fd;
553 }
554 #if HAVE_FORK
555 #ifdef BUILTIN_DECOMPRESS
556
557 #define FHCRC (1 << 1)
558 #define FEXTRA (1 << 2)
559 #define FNAME (1 << 3)
560 #define FCOMMENT (1 << 4)
561
562
563 file_private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)564 uncompressgzipped(const unsigned char *old, unsigned char **newch,
565 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
566 {
567 unsigned char flg;
568 size_t data_start = 10;
569
570 if (*n < 4) {
571 goto err;
572 }
573
574 flg = old[3];
575
576 if (flg & FEXTRA) {
577 if (data_start + 1 >= *n)
578 goto err;
579 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
580 }
581 if (flg & FNAME) {
582 while(data_start < *n && old[data_start])
583 data_start++;
584 data_start++;
585 }
586 if (flg & FCOMMENT) {
587 while(data_start < *n && old[data_start])
588 data_start++;
589 data_start++;
590 }
591 if (flg & FHCRC)
592 data_start += 2;
593
594 if (data_start >= *n)
595 goto err;
596
597 *n -= data_start;
598 old += data_start;
599 return uncompresszlib(old, newch, bytes_max, n, 0);
600 err:
601 return makeerror(newch, n, "File too short");
602 }
603
604 file_private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)605 uncompresszlib(const unsigned char *old, unsigned char **newch,
606 size_t bytes_max, size_t *n, int zlib)
607 {
608 int rc;
609 z_stream z;
610
611 DPRINTF("builtin zlib decompression\n");
612 z.next_in = CCAST(Bytef *, old);
613 z.avail_in = CAST(uint32_t, *n);
614 z.next_out = *newch;
615 z.avail_out = CAST(unsigned int, bytes_max);
616 z.zalloc = Z_NULL;
617 z.zfree = Z_NULL;
618 z.opaque = Z_NULL;
619
620 /* LINTED bug in header macro */
621 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
622 if (rc != Z_OK)
623 goto err;
624
625 rc = inflate(&z, Z_SYNC_FLUSH);
626 if (rc != Z_OK && rc != Z_STREAM_END) {
627 inflateEnd(&z);
628 goto err;
629 }
630
631 *n = CAST(size_t, z.total_out);
632 rc = inflateEnd(&z);
633 if (rc != Z_OK)
634 goto err;
635
636 /* let's keep the nul-terminate tradition */
637 (*newch)[*n] = '\0';
638
639 return OKDATA;
640 err:
641 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
642 }
643 #endif
644
645 #ifdef BUILTIN_BZLIB
646 file_private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)647 uncompressbzlib(const unsigned char *old, unsigned char **newch,
648 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
649 {
650 int rc;
651 bz_stream bz;
652
653 DPRINTF("builtin bzlib decompression\n");
654 memset(&bz, 0, sizeof(bz));
655 rc = BZ2_bzDecompressInit(&bz, 0, 0);
656 if (rc != BZ_OK)
657 goto err;
658
659 bz.next_in = CCAST(char *, RCAST(const char *, old));
660 bz.avail_in = CAST(uint32_t, *n);
661 bz.next_out = RCAST(char *, *newch);
662 bz.avail_out = CAST(unsigned int, bytes_max);
663
664 rc = BZ2_bzDecompress(&bz);
665 if (rc != BZ_OK && rc != BZ_STREAM_END) {
666 BZ2_bzDecompressEnd(&bz);
667 goto err;
668 }
669
670 /* Assume byte_max is within 32bit */
671 /* assert(bz.total_out_hi32 == 0); */
672 *n = CAST(size_t, bz.total_out_lo32);
673 rc = BZ2_bzDecompressEnd(&bz);
674 if (rc != BZ_OK)
675 goto err;
676
677 /* let's keep the nul-terminate tradition */
678 (*newch)[*n] = '\0';
679
680 return OKDATA;
681 err:
682 return makeerror(newch, n, "bunzip error %d", rc);
683 }
684 #endif
685
686 #ifdef BUILTIN_XZLIB
687 file_private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)688 uncompressxzlib(const unsigned char *old, unsigned char **newch,
689 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
690 {
691 int rc;
692 lzma_stream xz;
693
694 DPRINTF("builtin xzlib decompression\n");
695 memset(&xz, 0, sizeof(xz));
696 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
697 if (rc != LZMA_OK)
698 goto err;
699
700 xz.next_in = CCAST(const uint8_t *, old);
701 xz.avail_in = CAST(uint32_t, *n);
702 xz.next_out = RCAST(uint8_t *, *newch);
703 xz.avail_out = CAST(unsigned int, bytes_max);
704
705 rc = lzma_code(&xz, LZMA_RUN);
706 if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
707 lzma_end(&xz);
708 goto err;
709 }
710
711 *n = CAST(size_t, xz.total_out);
712
713 lzma_end(&xz);
714
715 /* let's keep the nul-terminate tradition */
716 (*newch)[*n] = '\0';
717
718 return OKDATA;
719 err:
720 return makeerror(newch, n, "unxz error %d", rc);
721 }
722 #endif
723
724 #ifdef BUILTIN_ZSTDLIB
725 file_private int
uncompresszstd(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)726 uncompresszstd(const unsigned char *old, unsigned char **newch,
727 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
728 {
729 size_t rc;
730 ZSTD_DStream *zstd;
731 ZSTD_inBuffer in;
732 ZSTD_outBuffer out;
733
734 DPRINTF("builtin zstd decompression\n");
735 if ((zstd = ZSTD_createDStream()) == NULL) {
736 return makeerror(newch, n, "No ZSTD decompression stream, %s",
737 strerror(errno));
738 }
739
740 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
741 if (ZSTD_isError(rc))
742 goto err;
743
744 in.src = CCAST(const void *, old);
745 in.size = *n;
746 in.pos = 0;
747 out.dst = RCAST(void *, *newch);
748 out.size = bytes_max;
749 out.pos = 0;
750
751 rc = ZSTD_decompressStream(zstd, &out, &in);
752 if (ZSTD_isError(rc))
753 goto err;
754
755 *n = out.pos;
756
757 ZSTD_freeDStream(zstd);
758
759 /* let's keep the nul-terminate tradition */
760 (*newch)[*n] = '\0';
761
762 return OKDATA;
763 err:
764 ZSTD_freeDStream(zstd);
765 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
766 }
767 #endif
768
769 #ifdef BUILTIN_LZLIB
770 file_private int
uncompresslzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)771 uncompresslzlib(const unsigned char *old, unsigned char **newch,
772 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
773 {
774 enum LZ_Errno err;
775 size_t old_remaining = *n;
776 size_t new_remaining = bytes_max;
777 size_t total_read = 0;
778 unsigned char *bufp;
779 struct LZ_Decoder *dec;
780
781 bufp = *newch;
782
783 DPRINTF("builtin lzlib decompression\n");
784 dec = LZ_decompress_open();
785 if (!dec) {
786 return makeerror(newch, n, "unable to allocate LZ_Decoder");
787 }
788 if (LZ_decompress_errno(dec) != LZ_ok)
789 goto err;
790
791 for (;;) {
792 // LZ_decompress_read() stops at member boundaries, so we may
793 // have more than one successful read after writing all data
794 // we have.
795 if (old_remaining > 0) {
796 int wr = LZ_decompress_write(dec, old, old_remaining);
797 if (wr < 0)
798 goto err;
799 old_remaining -= wr;
800 old += wr;
801 }
802
803 int rd = LZ_decompress_read(dec, bufp, new_remaining);
804 if (rd > 0) {
805 new_remaining -= rd;
806 bufp += rd;
807 total_read += rd;
808 }
809
810 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
811 goto err;
812 if (new_remaining == 0)
813 break;
814 if (old_remaining == 0 && rd == 0)
815 break;
816 }
817
818 LZ_decompress_close(dec);
819 *n = total_read;
820
821 /* let's keep the nul-terminate tradition */
822 *bufp = '\0';
823
824 return OKDATA;
825 err:
826 err = LZ_decompress_errno(dec);
827 LZ_decompress_close(dec);
828 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
829 }
830 #endif
831
832
833 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)834 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
835 {
836 char *msg;
837 va_list ap;
838 int rv;
839
840 DPRINTF("Makeerror %s\n", fmt);
841 free(*buf);
842 va_start(ap, fmt);
843 rv = vasprintf(&msg, fmt, ap);
844 va_end(ap);
845 if (rv < 0) {
846 DPRINTF("Makeerror failed");
847 *buf = NULL;
848 *len = 0;
849 return NODATA;
850 }
851 *buf = RCAST(unsigned char *, msg);
852 *len = strlen(msg);
853 return ERRDATA;
854 }
855
856 static void
closefd(int * fd,size_t i)857 closefd(int *fd, size_t i)
858 {
859 if (fd[i] == -1)
860 return;
861 (void) close(fd[i]);
862 fd[i] = -1;
863 }
864
865 static void
closep(int * fd)866 closep(int *fd)
867 {
868 size_t i;
869 for (i = 0; i < 2; i++)
870 closefd(fd, i);
871 }
872
873 static void
movedesc(void * v,int i,int fd)874 movedesc(void *v, int i, int fd)
875 {
876 if (fd == i)
877 return; /* "no dup was necessary" */
878 #ifdef HAVE_POSIX_SPAWNP
879 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
880 posix_spawn_file_actions_adddup2(fa, fd, i);
881 posix_spawn_file_actions_addclose(fa, fd);
882 #else
883 if (dup2(fd, i) == -1) {
884 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
885 exit(EXIT_FAILURE);
886 }
887 close(v ? fd : fd);
888 #endif
889 }
890
891 static void
closedesc(void * v,int fd)892 closedesc(void *v, int fd)
893 {
894 #ifdef HAVE_POSIX_SPAWNP
895 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
896 posix_spawn_file_actions_addclose(fa, fd);
897 #else
898 close(v ? fd : fd);
899 #endif
900 }
901
902 static void
handledesc(void * v,int fd,int fdp[3][2])903 handledesc(void *v, int fd, int fdp[3][2])
904 {
905 if (fd != -1) {
906 (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
907 movedesc(v, STDIN_FILENO, fd);
908 } else {
909 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
910 if (fdp[STDIN_FILENO][1] > 2)
911 closedesc(v, fdp[STDIN_FILENO][1]);
912 }
913
914 file_clear_closexec(STDIN_FILENO);
915
916 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
917 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
918 if (fdp[STDOUT_FILENO][0] > 2)
919 closedesc(v, fdp[STDOUT_FILENO][0]);
920
921 file_clear_closexec(STDOUT_FILENO);
922
923 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
924 if (fdp[STDERR_FILENO][0] > 2)
925 closedesc(v, fdp[STDERR_FILENO][0]);
926
927 file_clear_closexec(STDERR_FILENO);
928 }
929
930 static pid_t
writechild(int fd,const void * old,size_t n)931 writechild(int fd, const void *old, size_t n)
932 {
933 pid_t pid;
934
935 /*
936 * fork again, to avoid blocking because both
937 * pipes filled
938 */
939 pid = fork();
940 if (pid == -1) {
941 DPRINTF("Fork failed (%s)\n", strerror(errno));
942 return -1;
943 }
944 if (pid == 0) {
945 /* child */
946 if (swrite(fd, old, n) != CAST(ssize_t, n)) {
947 DPRINTF("Write failed (%s)\n", strerror(errno));
948 exit(EXIT_FAILURE);
949 }
950 exit(EXIT_SUCCESS);
951 }
952 /* parent */
953 return pid;
954 }
955
956 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)957 filter_error(unsigned char *ubuf, ssize_t n)
958 {
959 char *p;
960 char *buf;
961
962 ubuf[n] = '\0';
963 buf = RCAST(char *, ubuf);
964 while (isspace(CAST(unsigned char, *buf)))
965 buf++;
966 DPRINTF("Filter error[[[%s]]]\n", buf);
967 if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
968 *p = '\0';
969 if ((p = strchr(CAST(char *, buf), ';')) != NULL)
970 *p = '\0';
971 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
972 ++p;
973 while (isspace(CAST(unsigned char, *p)))
974 p++;
975 n = strlen(p);
976 memmove(ubuf, p, CAST(size_t, n + 1));
977 }
978 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
979 if (islower(*ubuf))
980 *ubuf = toupper(*ubuf);
981 return n;
982 }
983
984 file_private const char *
methodname(size_t method)985 methodname(size_t method)
986 {
987 switch (method) {
988 #ifdef BUILTIN_DECOMPRESS
989 case METH_FROZEN:
990 case METH_ZLIB:
991 return "zlib";
992 #endif
993 #ifdef BUILTIN_BZLIB
994 case METH_BZIP:
995 return "bzlib";
996 #endif
997 #ifdef BUILTIN_XZLIB
998 case METH_XZ:
999 case METH_LZMA:
1000 return "xzlib";
1001 #endif
1002 #ifdef BUILTIN_ZSTDLIB
1003 case METH_ZSTD:
1004 return "zstd";
1005 #endif
1006 #ifdef BUILTIN_LZLIB
1007 case METH_LZIP:
1008 return "lzlib";
1009 #endif
1010 default:
1011 return compr[method].argv[0];
1012 }
1013 }
1014
1015 file_private int (*
getdecompressor(size_t method)1016 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1017 size_t *, int)
1018 {
1019 switch (method) {
1020 #ifdef BUILTIN_DECOMPRESS
1021 case METH_FROZEN:
1022 return uncompressgzipped;
1023 case METH_ZLIB:
1024 return uncompresszlib;
1025 #endif
1026 #ifdef BUILTIN_BZLIB
1027 case METH_BZIP:
1028 return uncompressbzlib;
1029 #endif
1030 #ifdef BUILTIN_XZLIB
1031 case METH_XZ:
1032 case METH_LZMA:
1033 return uncompressxzlib;
1034 #endif
1035 #ifdef BUILTIN_ZSTDLIB
1036 case METH_ZSTD:
1037 return uncompresszstd;
1038 #endif
1039 #ifdef BUILTIN_LZLIB
1040 case METH_LZIP:
1041 return uncompresslzlib;
1042 #endif
1043 default:
1044 return NULL;
1045 }
1046 }
1047
1048 file_private int
uncompressbuf(int fd,size_t bytes_max,size_t method,int nofork,const unsigned char * old,unsigned char ** newch,size_t * n)1049 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1050 const unsigned char *old, unsigned char **newch, size_t* n)
1051 {
1052 int fdp[3][2];
1053 int status, rv, w;
1054 pid_t pid;
1055 pid_t writepid = -1;
1056 size_t i;
1057 ssize_t r, re;
1058 char *const *args;
1059 #ifdef HAVE_POSIX_SPAWNP
1060 posix_spawn_file_actions_t fa;
1061 #endif
1062 int (*decompress)(const unsigned char *, unsigned char **,
1063 size_t, size_t *, int) = getdecompressor(method);
1064
1065 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
1066 if (*newch == NULL)
1067 return makeerror(newch, n, "No buffer, %s", strerror(errno));
1068
1069 if (decompress) {
1070 if (nofork) {
1071 return makeerror(newch, n,
1072 "Fork is required to uncompress, but disabled");
1073 }
1074 return (*decompress)(old, newch, bytes_max, n, 1);
1075 }
1076
1077 (void)fflush(stdout);
1078 (void)fflush(stderr);
1079
1080 for (i = 0; i < __arraycount(fdp); i++)
1081 fdp[i][0] = fdp[i][1] = -1;
1082
1083 /*
1084 * There are multithreaded users who run magic_file()
1085 * from dozens of threads. If two parallel magic_file() calls
1086 * analyze two large compressed files, both will spawn
1087 * an uncompressing child here, which writes out uncompressed data.
1088 * We read some portion, then close the pipe, then waitpid() the child.
1089 * If uncompressed data is larger, child should get EPIPE and exit.
1090 * However, with *parallel* calls OTHER child may unintentionally
1091 * inherit pipe fds, thus keeping pipe open and making writes in
1092 * our child block instead of failing with EPIPE!
1093 * (For the bug to occur, two threads must mutually inherit their pipes,
1094 * and both must have large outputs. Thus it happens not that often).
1095 * To avoid this, be sure to create pipes with O_CLOEXEC.
1096 */
1097 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1098 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1099 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1100 closep(fdp[STDIN_FILENO]);
1101 closep(fdp[STDOUT_FILENO]);
1102 return makeerror(newch, n, "Cannot create pipe, %s",
1103 strerror(errno));
1104 }
1105
1106 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1107 #ifdef HAVE_POSIX_SPAWNP
1108 posix_spawn_file_actions_init(&fa);
1109
1110 handledesc(&fa, fd, fdp);
1111
1112 DPRINTF("Executing %s\n", compr[method].argv[0]);
1113 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1114 args, NULL);
1115
1116 posix_spawn_file_actions_destroy(&fa);
1117
1118 if (status == -1) {
1119 return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1120 compr[method].argv[0], strerror(errno));
1121 }
1122 #else
1123 /* For processes with large mapped virtual sizes, vfork
1124 * may be _much_ faster (10-100 times) than fork.
1125 */
1126 pid = vfork();
1127 if (pid == -1) {
1128 return makeerror(newch, n, "Cannot vfork, %s",
1129 strerror(errno));
1130 }
1131 if (pid == 0) {
1132 /* child */
1133 /* Note: we are after vfork, do not modify memory
1134 * in a way which confuses parent. In particular,
1135 * do not modify fdp[i][j].
1136 */
1137 handledesc(NULL, fd, fdp);
1138 DPRINTF("Executing %s\n", compr[method].argv[0]);
1139
1140 (void)execvp(compr[method].argv[0], args);
1141 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1142 compr[method].argv[0], strerror(errno));
1143 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1144 }
1145 #endif
1146 /* parent */
1147 /* Close write sides of child stdout/err pipes */
1148 for (i = 1; i < __arraycount(fdp); i++)
1149 closefd(fdp[i], 1);
1150 /* Write the buffer data to child stdin, if we don't have fd */
1151 if (fd == -1) {
1152 closefd(fdp[STDIN_FILENO], 0);
1153 writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1154 if (writepid == (pid_t)-1) {
1155 rv = makeerror(newch, n, "Write to child failed, %s",
1156 strerror(errno));
1157 DPRINTF("Write to child failed\n");
1158 goto err;
1159 }
1160 closefd(fdp[STDIN_FILENO], 1);
1161 }
1162
1163 rv = OKDATA;
1164 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1165 DPRINTF("read got %zd\n", r);
1166 if (r < 0) {
1167 rv = ERRDATA;
1168 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1169 strerror(errno));
1170 goto err;
1171 }
1172 if (CAST(size_t, r) == bytes_max) {
1173 /*
1174 * close fd so that the child exits with sigpipe and ignore
1175 * errors, otherwise we risk the child blocking and never
1176 * exiting.
1177 */
1178 DPRINTF("Closing stdout for bytes_max\n");
1179 closefd(fdp[STDOUT_FILENO], 0);
1180 goto ok;
1181 }
1182 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1183 DPRINTF("Got stuff from stderr %s\n", *newch);
1184 rv = ERRDATA;
1185 r = filter_error(*newch, r);
1186 goto ok;
1187 }
1188 if (re == 0)
1189 goto ok;
1190 rv = makeerror(newch, n, "Read stderr failed, %s",
1191 strerror(errno));
1192 goto err;
1193 ok:
1194 *n = r;
1195 /* NUL terminate, as every buffer is handled here. */
1196 (*newch)[*n] = '\0';
1197 err:
1198 closefd(fdp[STDIN_FILENO], 1);
1199 closefd(fdp[STDOUT_FILENO], 0);
1200 closefd(fdp[STDERR_FILENO], 0);
1201
1202 w = waitpid(pid, &status, 0);
1203 wait_err:
1204 if (w == -1) {
1205 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1206 DPRINTF("Child wait return %#x\n", status);
1207 } else if (!WIFEXITED(status)) {
1208 DPRINTF("Child not exited (%#x)\n", status);
1209 } else if (WEXITSTATUS(status) != 0) {
1210 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1211 }
1212 if (writepid > 0) {
1213 /* _After_ we know decompressor has exited, our input writer
1214 * definitely will exit now (at worst, writing fails in it,
1215 * since output fd is closed now on the reading size).
1216 */
1217 w = waitpid(writepid, &status, 0);
1218 writepid = -1;
1219 goto wait_err;
1220 }
1221
1222 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1223 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1224
1225 return rv;
1226 }
1227 #endif
1228