1 /*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 /*
29 * compress routines:
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
34 */
35 #include "file.h"
36
37 #ifndef lint
38 FILE_RCSID("@(#)$File: compress.c,v 1.158 2024/11/10 16:52:27 christos Exp $")
39 #endif
40
41 #include "magic.h"
42 #include <stdlib.h>
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SPAWN_H
47 #include <spawn.h>
48 #endif
49 #include <stdio.h>
50 #include <string.h>
51 #include <errno.h>
52 #include <ctype.h>
53 #include <stdarg.h>
54 #include <signal.h>
55 #ifndef HAVE_SIG_T
56 typedef void (*sig_t)(int);
57 #endif /* HAVE_SIG_T */
58 #ifdef HAVE_SYS_IOCTL_H
59 #include <sys/ioctl.h>
60 #endif
61 #ifdef HAVE_SYS_WAIT_H
62 #include <sys/wait.h>
63 #endif
64 #if defined(HAVE_SYS_TIME_H)
65 #include <sys/time.h>
66 #endif
67
68 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
69 #define BUILTIN_DECOMPRESS
70 #include <zlib.h>
71 #endif
72
73 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
74 #define BUILTIN_BZLIB
75 #include <bzlib.h>
76 #endif
77
78 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
79 #define BUILTIN_XZLIB
80 #include <lzma.h>
81 #endif
82
83 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
84 #define BUILTIN_ZSTDLIB
85 #include <zstd.h>
86 #include <zstd_errors.h>
87 #endif
88
89 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
90 #define BUILTIN_LZLIB
91 #include <lzlib.h>
92 #endif
93
94 #ifdef notyet
95 #if defined(HAVE_LRZIP_H) && defined(LRZIPLIBSUPPORT)
96 #define BUILTIN_LRZIP
97 #include <Lrzip.h>
98 #endif
99 #endif
100
101 #ifdef DEBUG
102 int tty = -1;
103 #define DPRINTF(...) do { \
104 if (tty == -1) \
105 tty = open("/dev/tty", O_RDWR); \
106 if (tty == -1) \
107 abort(); \
108 dprintf(tty, __VA_ARGS__); \
109 } while (/*CONSTCOND*/0)
110 #else
111 #define DPRINTF(...)
112 #endif
113
114 #ifdef ZLIBSUPPORT
115 /*
116 * The following python code is not really used because ZLIBSUPPORT is only
117 * defined if we have a built-in zlib, and the built-in zlib handles that.
118 * That is not true for android where we have zlib.h and not -lz.
119 */
120 static const char zlibcode[] =
121 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
122
123 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
124
125 static int
zlibcmp(const unsigned char * buf)126 zlibcmp(const unsigned char *buf)
127 {
128 unsigned short x = 1;
129 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
130
131 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
132 return 0;
133 if (s[0] != 1) /* endianness test */
134 x = buf[0] | (buf[1] << 8);
135 else
136 x = buf[1] | (buf[0] << 8);
137 if (x % 31)
138 return 0;
139 return 1;
140 }
141 #endif
142
143 static int
lzmacmp(const unsigned char * buf)144 lzmacmp(const unsigned char *buf)
145 {
146 if (buf[0] != 0x5d || buf[1] || buf[2])
147 return 0;
148 if (buf[12] && buf[12] != 0xff)
149 return 0;
150 return 1;
151 }
152
153 #define gzip_flags "-cd"
154 #define lzip_flags gzip_flags
155
156 static const char *gzip_args[] = {
157 "gzip", gzip_flags, NULL
158 };
159 static const char *uncompress_args[] = {
160 "uncompress", "-c", NULL
161 };
162 static const char *bzip2_args[] = {
163 "bzip2", "-cd", NULL
164 };
165 static const char *lzip_args[] = {
166 "lzip", lzip_flags, NULL
167 };
168 static const char *xz_args[] = {
169 "xz", "-cd", NULL
170 };
171 static const char *lrzip_args[] = {
172 "lrzip", "-qdf", "-", NULL
173 };
174 static const char *lz4_args[] = {
175 "lz4", "-cd", NULL
176 };
177 static const char *zstd_args[] = {
178 "zstd", "-cd", NULL
179 };
180
181 #define do_zlib NULL
182 #define do_bzlib NULL
183
184 file_private const struct {
185 union {
186 const char *magic;
187 int (*func)(const unsigned char *);
188 } u;
189 int maglen;
190 const char **argv;
191 void *unused;
192 } compr[] = {
193 #define METH_FROZEN 2
194 #define METH_BZIP 7
195 #define METH_XZ 9
196 #define METH_LZIP 8
197 #define METH_LRZIP 10
198 #define METH_ZSTD 12
199 #define METH_LZMA 13
200 #define METH_ZLIB 14
201 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
202 /* Uncompress can get stuck; so use gzip first if we have it
203 * Idea from Damien Clark, thanks! */
204 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
205 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
206 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
207 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
208 /* the standard pack utilities do not accept standard input */
209 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
210 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
211 /* ...only first file examined */
212 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
213 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
214 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
215 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
216 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
217 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
218 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
219 #ifdef ZLIBSUPPORT
220 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
221 #endif
222 };
223
224 #define OKDATA 0
225 #define NODATA 1
226 #define ERRDATA 2
227
228 file_private ssize_t swrite(int, const void *, size_t);
229 #if HAVE_FORK
230 file_private size_t ncompr = __arraycount(compr);
231 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
232 unsigned char **, size_t *);
233 #ifdef BUILTIN_DECOMPRESS
234 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
235 size_t *, int);
236 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
237 size_t *, int);
238 #endif
239 #ifdef BUILTIN_BZLIB
240 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
241 size_t *, int);
242 #endif
243 #ifdef BUILTIN_XZLIB
244 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
245 size_t *, int);
246 #endif
247 #ifdef BUILTIN_ZSTDLIB
248 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
249 size_t *, int);
250 #endif
251 #ifdef BUILTIN_LZLIB
252 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
253 size_t *, int);
254 #endif
255 #ifdef BUILTIN_LRZIP
256 file_private int uncompresslrzip(const unsigned char *, unsigned char **, size_t,
257 size_t *, int);
258 #endif
259
260
261 static int makeerror(unsigned char **, size_t *, const char *, ...)
262 __attribute__((__format__(__printf__, 3, 4)));
263 file_private const char *methodname(size_t);
264
265 file_private int
format_decompression_error(struct magic_set * ms,size_t i,unsigned char * buf)266 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
267 {
268 unsigned char *p;
269 int mime = ms->flags & MAGIC_MIME;
270
271 if (!mime)
272 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
273
274 for (p = buf; *p; p++)
275 if (!isalnum(*p))
276 *p = '-';
277
278 return file_printf(ms, "application/x-decompression-error-%s-%s",
279 methodname(i), buf);
280 }
281
282 file_protected int
file_zmagic(struct magic_set * ms,const struct buffer * b,const char * name)283 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
284 {
285 unsigned char *newbuf = NULL;
286 size_t i, nsz;
287 char *rbuf;
288 file_pushbuf_t *pb;
289 int urv, prv, rv = 0;
290 int mime = ms->flags & MAGIC_MIME;
291 int fd = b->fd;
292 const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
293 size_t nbytes = b->flen;
294 int sa_saved = 0;
295 struct sigaction sig_act;
296
297 if ((ms->flags & MAGIC_COMPRESS) == 0)
298 return 0;
299
300 for (i = 0; i < ncompr; i++) {
301 int zm;
302 if (nbytes < CAST(size_t, abs(compr[i].maglen)))
303 continue;
304 if (compr[i].maglen < 0) {
305 zm = (*compr[i].u.func)(buf);
306 } else {
307 zm = memcmp(buf, compr[i].u.magic,
308 CAST(size_t, compr[i].maglen)) == 0;
309 }
310
311 if (!zm)
312 continue;
313
314 /* Prevent SIGPIPE death if child dies unexpectedly */
315 if (!sa_saved) {
316 //We can use sig_act for both new and old, but
317 struct sigaction new_act;
318 memset(&new_act, 0, sizeof(new_act));
319 new_act.sa_handler = SIG_IGN;
320 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
321 }
322
323 nsz = nbytes;
324 free(newbuf);
325 urv = uncompressbuf(fd, ms->bytes_max, i,
326 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
327 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
328 (char *)newbuf, nsz);
329 switch (urv) {
330 case OKDATA:
331 case ERRDATA:
332 ms->flags &= ~MAGIC_COMPRESS;
333 if (urv == ERRDATA)
334 prv = format_decompression_error(ms, i, newbuf);
335 else
336 prv = file_buffer(ms, -1, NULL, name, newbuf,
337 nsz);
338 if (prv == -1)
339 goto error;
340 rv = 1;
341 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
342 goto out;
343 if (mime != MAGIC_MIME && mime != 0)
344 goto out;
345 if ((file_printf(ms,
346 mime ? " compressed-encoding=" : " (")) == -1)
347 goto error;
348 if ((pb = file_push_buffer(ms)) == NULL)
349 goto error;
350 /*
351 * XXX: If file_buffer fails here, we overwrite
352 * the compressed text. FIXME.
353 */
354 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
355 {
356 if (file_pop_buffer(ms, pb) != NULL)
357 abort();
358 goto error;
359 }
360 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
361 if (file_printf(ms, "%s", rbuf) == -1) {
362 free(rbuf);
363 goto error;
364 }
365 free(rbuf);
366 }
367 if (!mime && file_printf(ms, ")") == -1)
368 goto error;
369 /*FALLTHROUGH*/
370 case NODATA:
371 break;
372 default:
373 abort();
374 /*NOTREACHED*/
375 error:
376 rv = -1;
377 break;
378 }
379 }
380 out:
381 DPRINTF("rv = %d\n", rv);
382
383 if (sa_saved && sig_act.sa_handler != SIG_IGN)
384 (void)sigaction(SIGPIPE, &sig_act, NULL);
385
386 free(newbuf);
387 ms->flags |= MAGIC_COMPRESS;
388 DPRINTF("Zmagic returns %d\n", rv);
389 return rv;
390 }
391 #endif
392 /*
393 * `safe' write for sockets and pipes.
394 */
395 file_private ssize_t
swrite(int fd,const void * buf,size_t n)396 swrite(int fd, const void *buf, size_t n)
397 {
398 ssize_t rv;
399 size_t rn = n;
400
401 do
402 switch (rv = write(fd, buf, n)) {
403 case -1:
404 if (errno == EINTR)
405 continue;
406 return -1;
407 default:
408 n -= rv;
409 buf = CAST(const char *, buf) + rv;
410 break;
411 }
412 while (n > 0);
413 return rn;
414 }
415
416
417 /*
418 * `safe' read for sockets and pipes.
419 */
420 file_protected ssize_t
sread(int fd,void * buf,size_t n,int canbepipe)421 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
422 {
423 ssize_t rv;
424 #if defined(FIONREAD) && !defined(__MINGW32__)
425 int t = 0;
426 #endif
427 size_t rn = n;
428
429 if (fd == STDIN_FILENO)
430 goto nocheck;
431
432 #if defined(FIONREAD) && !defined(__MINGW32__)
433 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
434 #ifdef FD_ZERO
435 ssize_t cnt;
436 for (cnt = 0;; cnt++) {
437 fd_set check;
438 struct timeval tout = {0, 100 * 1000};
439 int selrv;
440
441 FD_ZERO(&check);
442 FD_SET(fd, &check);
443
444 /*
445 * Avoid soft deadlock: do not read if there
446 * is nothing to read from sockets and pipes.
447 */
448 selrv = select(fd + 1, &check, NULL, NULL, &tout);
449 if (selrv == -1) {
450 if (errno == EINTR || errno == EAGAIN)
451 continue;
452 } else if (selrv == 0 && cnt >= 5) {
453 return 0;
454 } else
455 break;
456 }
457 #endif
458 (void)ioctl(fd, FIONREAD, &t);
459 }
460
461 if (t > 0 && CAST(size_t, t) < n) {
462 n = t;
463 rn = n;
464 }
465 #endif
466
467 nocheck:
468 do
469 switch ((rv = read(fd, buf, n))) {
470 case -1:
471 if (errno == EINTR)
472 continue;
473 return -1;
474 case 0:
475 return rn - n;
476 default:
477 n -= rv;
478 buf = CAST(char *, CCAST(void *, buf)) + rv;
479 break;
480 }
481 while (n > 0);
482 return rn;
483 }
484
485 file_protected int
file_pipe2file(struct magic_set * ms,int fd,const void * startbuf,size_t nbytes)486 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
487 size_t nbytes)
488 {
489 char buf[4096];
490 ssize_t r;
491 int tfd;
492
493 #ifdef WIN32
494 const char *t;
495 buf[0] = '\0';
496 if ((t = getenv("TEMP")) != NULL)
497 (void)strlcpy(buf, t, sizeof(buf));
498 else if ((t = getenv("TMP")) != NULL)
499 (void)strlcpy(buf, t, sizeof(buf));
500 else if ((t = getenv("TMPDIR")) != NULL)
501 (void)strlcpy(buf, t, sizeof(buf));
502 if (buf[0] != '\0')
503 (void)strlcat(buf, "/", sizeof(buf));
504 (void)strlcat(buf, "file.XXXXXX", sizeof(buf));
505 #else
506 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
507 #endif
508 #ifndef HAVE_MKSTEMP
509 {
510 char *ptr = mktemp(buf);
511 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
512 r = errno;
513 (void)unlink(ptr);
514 errno = r;
515 }
516 #else
517 {
518 int te;
519 mode_t ou = umask(0);
520 tfd = mkstemp(buf);
521 (void)umask(ou);
522 te = errno;
523 (void)unlink(buf);
524 errno = te;
525 }
526 #endif
527 if (tfd == -1) {
528 file_error(ms, errno,
529 "cannot create temporary file for pipe copy");
530 return -1;
531 }
532
533 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
534 r = 1;
535 else {
536 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
537 if (swrite(tfd, buf, CAST(size_t, r)) != r)
538 break;
539 }
540
541 switch (r) {
542 case -1:
543 file_error(ms, errno, "error copying from pipe to temp file");
544 return -1;
545 case 0:
546 break;
547 default:
548 file_error(ms, errno, "error while writing to temp file");
549 return -1;
550 }
551
552 /*
553 * We duplicate the file descriptor, because fclose on a
554 * tmpfile will delete the file, but any open descriptors
555 * can still access the phantom inode.
556 */
557 if ((fd = dup2(tfd, fd)) == -1) {
558 file_error(ms, errno, "could not dup descriptor for temp file");
559 return -1;
560 }
561 (void)close(tfd);
562 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
563 file_badseek(ms);
564 return -1;
565 }
566 return fd;
567 }
568 #if HAVE_FORK
569 #ifdef BUILTIN_DECOMPRESS
570
571 #define FHCRC (1 << 1)
572 #define FEXTRA (1 << 2)
573 #define FNAME (1 << 3)
574 #define FCOMMENT (1 << 4)
575
576
577 file_private int
uncompressgzipped(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)578 uncompressgzipped(const unsigned char *old, unsigned char **newch,
579 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
580 {
581 unsigned char flg;
582 size_t data_start = 10;
583
584 if (*n < 4) {
585 goto err;
586 }
587
588 flg = old[3];
589
590 if (flg & FEXTRA) {
591 if (data_start + 1 >= *n)
592 goto err;
593 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
594 }
595 if (flg & FNAME) {
596 while(data_start < *n && old[data_start])
597 data_start++;
598 data_start++;
599 }
600 if (flg & FCOMMENT) {
601 while(data_start < *n && old[data_start])
602 data_start++;
603 data_start++;
604 }
605 if (flg & FHCRC)
606 data_start += 2;
607
608 if (data_start >= *n)
609 goto err;
610
611 *n -= data_start;
612 old += data_start;
613 return uncompresszlib(old, newch, bytes_max, n, 0);
614 err:
615 return makeerror(newch, n, "File too short");
616 }
617
618 file_private int
uncompresszlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int zlib)619 uncompresszlib(const unsigned char *old, unsigned char **newch,
620 size_t bytes_max, size_t *n, int zlib)
621 {
622 int rc;
623 z_stream z;
624
625 DPRINTF("builtin zlib decompression\n");
626 z.next_in = CCAST(Bytef *, old);
627 z.avail_in = CAST(uint32_t, *n);
628 z.next_out = *newch;
629 z.avail_out = CAST(unsigned int, bytes_max);
630 z.zalloc = Z_NULL;
631 z.zfree = Z_NULL;
632 z.opaque = Z_NULL;
633
634 /* LINTED bug in header macro */
635 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
636 if (rc != Z_OK)
637 goto err;
638
639 rc = inflate(&z, Z_SYNC_FLUSH);
640 if (rc != Z_OK && rc != Z_STREAM_END) {
641 inflateEnd(&z);
642 goto err;
643 }
644
645 *n = CAST(size_t, z.total_out);
646 rc = inflateEnd(&z);
647 if (rc != Z_OK)
648 goto err;
649
650 /* let's keep the nul-terminate tradition */
651 (*newch)[*n] = '\0';
652
653 return OKDATA;
654 err:
655 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
656 }
657 #endif
658
659 #ifdef BUILTIN_BZLIB
660 file_private int
uncompressbzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)661 uncompressbzlib(const unsigned char *old, unsigned char **newch,
662 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
663 {
664 int rc;
665 bz_stream bz;
666
667 DPRINTF("builtin bzlib decompression\n");
668 memset(&bz, 0, sizeof(bz));
669 rc = BZ2_bzDecompressInit(&bz, 0, 0);
670 if (rc != BZ_OK)
671 goto err;
672
673 bz.next_in = CCAST(char *, RCAST(const char *, old));
674 bz.avail_in = CAST(uint32_t, *n);
675 bz.next_out = RCAST(char *, *newch);
676 bz.avail_out = CAST(unsigned int, bytes_max);
677
678 rc = BZ2_bzDecompress(&bz);
679 if (rc != BZ_OK && rc != BZ_STREAM_END) {
680 BZ2_bzDecompressEnd(&bz);
681 goto err;
682 }
683
684 /* Assume byte_max is within 32bit */
685 /* assert(bz.total_out_hi32 == 0); */
686 *n = CAST(size_t, bz.total_out_lo32);
687 rc = BZ2_bzDecompressEnd(&bz);
688 if (rc != BZ_OK)
689 goto err;
690
691 /* let's keep the nul-terminate tradition */
692 (*newch)[*n] = '\0';
693
694 return OKDATA;
695 err:
696 return makeerror(newch, n, "bunzip error %d", rc);
697 }
698 #endif
699
700 #ifdef BUILTIN_XZLIB
701 file_private int
uncompressxzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)702 uncompressxzlib(const unsigned char *old, unsigned char **newch,
703 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
704 {
705 int rc;
706 lzma_stream xz;
707
708 DPRINTF("builtin xzlib decompression\n");
709 memset(&xz, 0, sizeof(xz));
710 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
711 if (rc != LZMA_OK)
712 goto err;
713
714 xz.next_in = CCAST(const uint8_t *, old);
715 xz.avail_in = CAST(uint32_t, *n);
716 xz.next_out = RCAST(uint8_t *, *newch);
717 xz.avail_out = CAST(unsigned int, bytes_max);
718
719 rc = lzma_code(&xz, LZMA_RUN);
720 if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
721 lzma_end(&xz);
722 goto err;
723 }
724
725 *n = CAST(size_t, xz.total_out);
726
727 lzma_end(&xz);
728
729 /* let's keep the nul-terminate tradition */
730 (*newch)[*n] = '\0';
731
732 return OKDATA;
733 err:
734 return makeerror(newch, n, "unxz error %d", rc);
735 }
736 #endif
737
738 #ifdef BUILTIN_ZSTDLIB
739 file_private int
uncompresszstd(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)740 uncompresszstd(const unsigned char *old, unsigned char **newch,
741 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
742 {
743 size_t rc;
744 ZSTD_DStream *zstd;
745 ZSTD_inBuffer in;
746 ZSTD_outBuffer out;
747
748 DPRINTF("builtin zstd decompression\n");
749 if ((zstd = ZSTD_createDStream()) == NULL) {
750 return makeerror(newch, n, "No ZSTD decompression stream, %s",
751 strerror(errno));
752 }
753
754 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
755 if (ZSTD_isError(rc))
756 goto err;
757
758 in.src = CCAST(const void *, old);
759 in.size = *n;
760 in.pos = 0;
761 out.dst = RCAST(void *, *newch);
762 out.size = bytes_max;
763 out.pos = 0;
764
765 rc = ZSTD_decompressStream(zstd, &out, &in);
766 if (ZSTD_isError(rc))
767 goto err;
768
769 *n = out.pos;
770
771 ZSTD_freeDStream(zstd);
772
773 /* let's keep the nul-terminate tradition */
774 (*newch)[*n] = '\0';
775
776 return OKDATA;
777 err:
778 ZSTD_freeDStream(zstd);
779 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
780 }
781 #endif
782
783 #ifdef BUILTIN_LZLIB
784 file_private int
uncompresslzlib(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)785 uncompresslzlib(const unsigned char *old, unsigned char **newch,
786 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
787 {
788 enum LZ_Errno err;
789 size_t old_remaining = *n;
790 size_t new_remaining = bytes_max;
791 size_t total_read = 0;
792 unsigned char *bufp;
793 struct LZ_Decoder *dec;
794
795 bufp = *newch;
796
797 DPRINTF("builtin lzlib decompression\n");
798 dec = LZ_decompress_open();
799 if (!dec) {
800 return makeerror(newch, n, "unable to allocate LZ_Decoder");
801 }
802 if (LZ_decompress_errno(dec) != LZ_ok)
803 goto err;
804
805 for (;;) {
806 // LZ_decompress_read() stops at member boundaries, so we may
807 // have more than one successful read after writing all data
808 // we have.
809 if (old_remaining > 0) {
810 int wr = LZ_decompress_write(dec, old, old_remaining);
811 if (wr < 0)
812 goto err;
813 old_remaining -= wr;
814 old += wr;
815 }
816
817 int rd = LZ_decompress_read(dec, bufp, new_remaining);
818 if (rd > 0) {
819 new_remaining -= rd;
820 bufp += rd;
821 total_read += rd;
822 }
823
824 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
825 goto err;
826 if (new_remaining == 0)
827 break;
828 if (old_remaining == 0 && rd == 0)
829 break;
830 }
831
832 LZ_decompress_close(dec);
833 *n = total_read;
834
835 /* let's keep the nul-terminate tradition */
836 *bufp = '\0';
837
838 return OKDATA;
839 err:
840 err = LZ_decompress_errno(dec);
841 LZ_decompress_close(dec);
842 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
843 }
844 #endif
845
846 #ifdef BUILTIN_LRZIP
847 file_private int
uncompresslrzip(const unsigned char * old,unsigned char ** newch,size_t bytes_max,size_t * n,int extra)848 uncompresslrzip(const unsigned char *old, unsigned char **newch,
849 size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
850 {
851 Lrzip *lr;
852 FILE *in, *out;
853 int res = OKDATA;
854
855 DPRINTF("builtin rlzip decompression\n");
856 lr = lrzip_new(LRZIP_MODE_DECOMPRESS);
857 if (lr == NULL) {
858 res = makeerror(newch, n, "unable to create an lrzip decoder");
859 goto out0;
860 }
861 lrzip_config_env(lr);
862 in = fmemopen(RCAST(void *, old), bytes_max, "r");
863 if (in == NULL) {
864 res = makeerror(newch, n, "unable to construct input file");
865 goto out1;
866 }
867 if (!lrzip_file_add(lr, in)) {
868 res = makeerror(newch, n, "unable to add input file");
869 goto out2;
870 }
871 *newch = calloc(*n = 2 * bytes_max, 1);
872 if (*newch == NULL) {
873 res = makeerror(newch, n, "unable to allocate output buffer");
874 goto out2;
875 }
876 out = fmemopen(*newch, *n, "w");
877 if (out == NULL) {
878 free(*newch);
879 res = makeerror(newch, n, "unable to allocate output file");
880 goto out2;
881 }
882 lrzip_outfile_set(lr, out);
883 if (lrzip_run(lr)) {
884 free(*newch);
885 res = makeerror(newch, n, "unable to decompress file");
886 goto out3;
887 }
888 *n = (size_t)ftell(out);
889 out3:
890 fclose(out);
891 out2:
892 fclose(in);
893 out1:
894 lrzip_free(lr);
895 out0:
896 return res;
897 }
898 #endif
899
900 static int
makeerror(unsigned char ** buf,size_t * len,const char * fmt,...)901 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
902 {
903 char *msg;
904 va_list ap;
905 int rv;
906
907 DPRINTF("Makeerror %s\n", fmt);
908 free(*buf);
909 va_start(ap, fmt);
910 rv = vasprintf(&msg, fmt, ap);
911 va_end(ap);
912 if (rv < 0) {
913 DPRINTF("Makeerror failed");
914 *buf = NULL;
915 *len = 0;
916 return NODATA;
917 }
918 *buf = RCAST(unsigned char *, msg);
919 *len = strlen(msg);
920 return ERRDATA;
921 }
922
923 static void
closefd(int * fd,size_t i)924 closefd(int *fd, size_t i)
925 {
926 if (fd[i] == -1)
927 return;
928 (void) close(fd[i]);
929 fd[i] = -1;
930 }
931
932 static void
closep(int * fd)933 closep(int *fd)
934 {
935 size_t i;
936 for (i = 0; i < 2; i++)
937 closefd(fd, i);
938 }
939
940 static void
movedesc(void * v,int i,int fd)941 movedesc(void *v, int i, int fd)
942 {
943 if (fd == i)
944 return; /* "no dup was necessary" */
945 #ifdef HAVE_POSIX_SPAWNP
946 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
947 posix_spawn_file_actions_adddup2(fa, fd, i);
948 posix_spawn_file_actions_addclose(fa, fd);
949 #else
950 if (dup2(fd, i) == -1) {
951 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
952 exit(EXIT_FAILURE);
953 }
954 close(v ? fd : fd);
955 #endif
956 }
957
958 static void
closedesc(void * v,int fd)959 closedesc(void *v, int fd)
960 {
961 #ifdef HAVE_POSIX_SPAWNP
962 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
963 posix_spawn_file_actions_addclose(fa, fd);
964 #else
965 close(v ? fd : fd);
966 #endif
967 }
968
969 static void
handledesc(void * v,int fd,int fdp[3][2])970 handledesc(void *v, int fd, int fdp[3][2])
971 {
972 if (fd != -1) {
973 (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
974 movedesc(v, STDIN_FILENO, fd);
975 } else {
976 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
977 if (fdp[STDIN_FILENO][1] > 2)
978 closedesc(v, fdp[STDIN_FILENO][1]);
979 }
980
981 file_clear_closexec(STDIN_FILENO);
982
983 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
984 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
985 if (fdp[STDOUT_FILENO][0] > 2)
986 closedesc(v, fdp[STDOUT_FILENO][0]);
987
988 file_clear_closexec(STDOUT_FILENO);
989
990 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
991 if (fdp[STDERR_FILENO][0] > 2)
992 closedesc(v, fdp[STDERR_FILENO][0]);
993
994 file_clear_closexec(STDERR_FILENO);
995 }
996
997 static pid_t
writechild(int fd,const void * old,size_t n)998 writechild(int fd, const void *old, size_t n)
999 {
1000 pid_t pid;
1001
1002 /*
1003 * fork again, to avoid blocking because both
1004 * pipes filled
1005 */
1006 pid = fork();
1007 if (pid == -1) {
1008 DPRINTF("Fork failed (%s)\n", strerror(errno));
1009 return -1;
1010 }
1011 if (pid == 0) {
1012 /* child */
1013 if (swrite(fd, old, n) != CAST(ssize_t, n)) {
1014 DPRINTF("Write failed (%s)\n", strerror(errno));
1015 exit(EXIT_FAILURE);
1016 }
1017 exit(EXIT_SUCCESS);
1018 }
1019 /* parent */
1020 return pid;
1021 }
1022
1023 static ssize_t
filter_error(unsigned char * ubuf,ssize_t n)1024 filter_error(unsigned char *ubuf, ssize_t n)
1025 {
1026 char *p;
1027 char *buf;
1028
1029 ubuf[n] = '\0';
1030 buf = RCAST(char *, ubuf);
1031 while (isspace(CAST(unsigned char, *buf)))
1032 buf++;
1033 DPRINTF("Filter error[[[%s]]]\n", buf);
1034 if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
1035 *p = '\0';
1036 if ((p = strchr(CAST(char *, buf), ';')) != NULL)
1037 *p = '\0';
1038 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
1039 ++p;
1040 while (isspace(CAST(unsigned char, *p)))
1041 p++;
1042 n = strlen(p);
1043 memmove(ubuf, p, CAST(size_t, n + 1));
1044 }
1045 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
1046 if (islower(*ubuf))
1047 *ubuf = toupper(*ubuf);
1048 return n;
1049 }
1050
1051 file_private const char *
methodname(size_t method)1052 methodname(size_t method)
1053 {
1054 switch (method) {
1055 #ifdef BUILTIN_DECOMPRESS
1056 case METH_FROZEN:
1057 case METH_ZLIB:
1058 return "zlib";
1059 #endif
1060 #ifdef BUILTIN_BZLIB
1061 case METH_BZIP:
1062 return "bzlib";
1063 #endif
1064 #ifdef BUILTIN_XZLIB
1065 case METH_XZ:
1066 case METH_LZMA:
1067 return "xzlib";
1068 #endif
1069 #ifdef BUILTIN_ZSTDLIB
1070 case METH_ZSTD:
1071 return "zstd";
1072 #endif
1073 #ifdef BUILTIN_LZLIB
1074 case METH_LZIP:
1075 return "lzlib";
1076 #endif
1077 #ifdef BUILTIN_LRZIP
1078 case METH_LRZIP:
1079 return "lrzip";
1080 #endif
1081 default:
1082 return compr[method].argv[0];
1083 }
1084 }
1085
1086 file_private int (*
getdecompressor(size_t method)1087 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1088 size_t *, int)
1089 {
1090 switch (method) {
1091 #ifdef BUILTIN_DECOMPRESS
1092 case METH_FROZEN:
1093 return uncompressgzipped;
1094 case METH_ZLIB:
1095 return uncompresszlib;
1096 #endif
1097 #ifdef BUILTIN_BZLIB
1098 case METH_BZIP:
1099 return uncompressbzlib;
1100 #endif
1101 #ifdef BUILTIN_XZLIB
1102 case METH_XZ:
1103 case METH_LZMA:
1104 return uncompressxzlib;
1105 #endif
1106 #ifdef BUILTIN_ZSTDLIB
1107 case METH_ZSTD:
1108 return uncompresszstd;
1109 #endif
1110 #ifdef BUILTIN_LZLIB
1111 case METH_LZIP:
1112 return uncompresslzlib;
1113 #endif
1114 #ifdef BUILTIN_LRZIP
1115 case METH_LRZIP:
1116 return uncompresslrzip;
1117 #endif
1118 default:
1119 return NULL;
1120 }
1121 }
1122
1123 file_private int
uncompressbuf(int fd,size_t bytes_max,size_t method,int nofork,const unsigned char * old,unsigned char ** newch,size_t * n)1124 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1125 const unsigned char *old, unsigned char **newch, size_t* n)
1126 {
1127 int fdp[3][2];
1128 int status, rv, w;
1129 pid_t pid;
1130 pid_t writepid = -1;
1131 size_t i;
1132 ssize_t r, re;
1133 char *const *args;
1134 #ifdef HAVE_POSIX_SPAWNP
1135 posix_spawn_file_actions_t fa;
1136 #endif
1137 int (*decompress)(const unsigned char *, unsigned char **,
1138 size_t, size_t *, int) = getdecompressor(method);
1139
1140 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
1141 if (*newch == NULL)
1142 return makeerror(newch, n, "No buffer, %s", strerror(errno));
1143
1144 if (decompress) {
1145 if (nofork) {
1146 return makeerror(newch, n,
1147 "Fork is required to uncompress, but disabled");
1148 }
1149 return (*decompress)(old, newch, bytes_max, n, 1);
1150 }
1151
1152 (void)fflush(stdout);
1153 (void)fflush(stderr);
1154
1155 for (i = 0; i < __arraycount(fdp); i++)
1156 fdp[i][0] = fdp[i][1] = -1;
1157
1158 /*
1159 * There are multithreaded users who run magic_file()
1160 * from dozens of threads. If two parallel magic_file() calls
1161 * analyze two large compressed files, both will spawn
1162 * an uncompressing child here, which writes out uncompressed data.
1163 * We read some portion, then close the pipe, then waitpid() the child.
1164 * If uncompressed data is larger, child should get EPIPE and exit.
1165 * However, with *parallel* calls OTHER child may unintentionally
1166 * inherit pipe fds, thus keeping pipe open and making writes in
1167 * our child block instead of failing with EPIPE!
1168 * (For the bug to occur, two threads must mutually inherit their pipes,
1169 * and both must have large outputs. Thus it happens not that often).
1170 * To avoid this, be sure to create pipes with O_CLOEXEC.
1171 */
1172 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1173 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1174 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1175 closep(fdp[STDIN_FILENO]);
1176 closep(fdp[STDOUT_FILENO]);
1177 return makeerror(newch, n, "Cannot create pipe, %s",
1178 strerror(errno));
1179 }
1180
1181 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1182 #ifdef HAVE_POSIX_SPAWNP
1183 posix_spawn_file_actions_init(&fa);
1184
1185 handledesc(&fa, fd, fdp);
1186
1187 DPRINTF("Executing %s\n", compr[method].argv[0]);
1188 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1189 args, NULL);
1190
1191 posix_spawn_file_actions_destroy(&fa);
1192
1193 if (status == -1) {
1194 return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1195 compr[method].argv[0], strerror(errno));
1196 }
1197 #else
1198 /* For processes with large mapped virtual sizes, vfork
1199 * may be _much_ faster (10-100 times) than fork.
1200 */
1201 pid = vfork();
1202 if (pid == -1) {
1203 return makeerror(newch, n, "Cannot vfork, %s",
1204 strerror(errno));
1205 }
1206 if (pid == 0) {
1207 /* child */
1208 /* Note: we are after vfork, do not modify memory
1209 * in a way which confuses parent. In particular,
1210 * do not modify fdp[i][j].
1211 */
1212 handledesc(NULL, fd, fdp);
1213 DPRINTF("Executing %s\n", compr[method].argv[0]);
1214
1215 (void)execvp(compr[method].argv[0], args);
1216 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1217 compr[method].argv[0], strerror(errno));
1218 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1219 }
1220 #endif
1221 /* parent */
1222 /* Close write sides of child stdout/err pipes */
1223 for (i = 1; i < __arraycount(fdp); i++)
1224 closefd(fdp[i], 1);
1225 /* Write the buffer data to child stdin, if we don't have fd */
1226 if (fd == -1) {
1227 closefd(fdp[STDIN_FILENO], 0);
1228 writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1229 if (writepid == (pid_t)-1) {
1230 rv = makeerror(newch, n, "Write to child failed, %s",
1231 strerror(errno));
1232 DPRINTF("Write to child failed\n");
1233 goto err;
1234 }
1235 closefd(fdp[STDIN_FILENO], 1);
1236 }
1237
1238 rv = OKDATA;
1239 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1240 DPRINTF("read got %zd\n", r);
1241 if (r < 0) {
1242 rv = ERRDATA;
1243 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1244 strerror(errno));
1245 goto err;
1246 }
1247 if (CAST(size_t, r) == bytes_max) {
1248 /*
1249 * close fd so that the child exits with sigpipe and ignore
1250 * errors, otherwise we risk the child blocking and never
1251 * exiting.
1252 */
1253 DPRINTF("Closing stdout for bytes_max\n");
1254 closefd(fdp[STDOUT_FILENO], 0);
1255 goto ok;
1256 }
1257 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1258 DPRINTF("Got stuff from stderr %s\n", *newch);
1259 rv = ERRDATA;
1260 r = filter_error(*newch, r);
1261 goto ok;
1262 }
1263 if (re == 0)
1264 goto ok;
1265 rv = makeerror(newch, n, "Read stderr failed, %s",
1266 strerror(errno));
1267 goto err;
1268 ok:
1269 *n = r;
1270 /* NUL terminate, as every buffer is handled here. */
1271 (*newch)[*n] = '\0';
1272 err:
1273 closefd(fdp[STDIN_FILENO], 1);
1274 closefd(fdp[STDOUT_FILENO], 0);
1275 closefd(fdp[STDERR_FILENO], 0);
1276
1277 w = waitpid(pid, &status, 0);
1278 wait_err:
1279 if (w == -1) {
1280 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1281 DPRINTF("Child wait return %#x\n", status);
1282 } else if (!WIFEXITED(status)) {
1283 DPRINTF("Child not exited (%#x)\n", status);
1284 } else if (WEXITSTATUS(status) != 0) {
1285 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1286 }
1287 if (writepid > 0) {
1288 /* _After_ we know decompressor has exited, our input writer
1289 * definitely will exit now (at worst, writing fails in it,
1290 * since output fd is closed now on the reading size).
1291 */
1292 w = waitpid(writepid, &status, 0);
1293 writepid = -1;
1294 goto wait_err;
1295 }
1296
1297 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1298 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1299
1300 return rv;
1301 }
1302 #endif
1303