1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * compress routines: 30 * zmagic() - returns 0 if not recognized, uncompresses and prints 31 * information if recognized 32 * uncompress(method, old, n, newch) - uncompress old into new, 33 * using method, return sizeof new 34 */ 35 #include "file.h" 36 37 #ifndef lint 38 FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $") 39 #endif 40 41 #include "magic.h" 42 #include <stdlib.h> 43 #ifdef HAVE_UNISTD_H 44 #include <unistd.h> 45 #endif 46 #ifdef HAVE_SPAWN_H 47 #include <spawn.h> 48 #endif 49 #include <string.h> 50 #include <errno.h> 51 #include <ctype.h> 52 #include <stdarg.h> 53 #include <signal.h> 54 #ifndef HAVE_SIG_T 55 typedef void (*sig_t)(int); 56 #endif /* HAVE_SIG_T */ 57 #ifdef HAVE_SYS_IOCTL_H 58 #include <sys/ioctl.h> 59 #endif 60 #ifdef HAVE_SYS_WAIT_H 61 #include <sys/wait.h> 62 #endif 63 #if defined(HAVE_SYS_TIME_H) 64 #include <sys/time.h> 65 #endif 66 67 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) 68 #define BUILTIN_DECOMPRESS 69 #include <zlib.h> 70 #endif 71 72 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT) 73 #define BUILTIN_BZLIB 74 #include <bzlib.h> 75 #endif 76 77 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT) 78 #define BUILTIN_XZLIB 79 #include <lzma.h> 80 #endif 81 82 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT) 83 #define BUILTIN_ZSTDLIB 84 #include <zstd.h> 85 #include <zstd_errors.h> 86 #endif 87 88 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT) 89 #define BUILTIN_LZLIB 90 #include <lzlib.h> 91 #endif 92 93 #ifdef DEBUG 94 int tty = -1; 95 #define DPRINTF(...) do { \ 96 if (tty == -1) \ 97 tty = open("/dev/tty", O_RDWR); \ 98 if (tty == -1) \ 99 abort(); \ 100 dprintf(tty, __VA_ARGS__); \ 101 } while (/*CONSTCOND*/0) 102 #else 103 #define DPRINTF(...) 104 #endif 105 106 #ifdef ZLIBSUPPORT 107 /* 108 * The following python code is not really used because ZLIBSUPPORT is only 109 * defined if we have a built-in zlib, and the built-in zlib handles that. 110 * That is not true for android where we have zlib.h and not -lz. 111 */ 112 static const char zlibcode[] = 113 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; 114 115 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL }; 116 117 static int 118 zlibcmp(const unsigned char *buf) 119 { 120 unsigned short x = 1; 121 unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); 122 123 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) 124 return 0; 125 if (s[0] != 1) /* endianness test */ 126 x = buf[0] | (buf[1] << 8); 127 else 128 x = buf[1] | (buf[0] << 8); 129 if (x % 31) 130 return 0; 131 return 1; 132 } 133 #endif 134 135 static int 136 lzmacmp(const unsigned char *buf) 137 { 138 if (buf[0] != 0x5d || buf[1] || buf[2]) 139 return 0; 140 if (buf[12] && buf[12] != 0xff) 141 return 0; 142 return 1; 143 } 144 145 #define gzip_flags "-cd" 146 #define lzip_flags gzip_flags 147 148 static const char *gzip_args[] = { 149 "gzip", gzip_flags, NULL 150 }; 151 static const char *uncompress_args[] = { 152 "uncompress", "-c", NULL 153 }; 154 static const char *bzip2_args[] = { 155 "bzip2", "-cd", NULL 156 }; 157 static const char *lzip_args[] = { 158 "lzip", lzip_flags, NULL 159 }; 160 static const char *xz_args[] = { 161 "xz", "-cd", NULL 162 }; 163 static const char *lrzip_args[] = { 164 "lrzip", "-qdf", "-", NULL 165 }; 166 static const char *lz4_args[] = { 167 "lz4", "-cd", NULL 168 }; 169 static const char *zstd_args[] = { 170 "zstd", "-cd", NULL 171 }; 172 173 #define do_zlib NULL 174 #define do_bzlib NULL 175 176 file_private const struct { 177 union { 178 const char *magic; 179 int (*func)(const unsigned char *); 180 } u; 181 int maglen; 182 const char **argv; 183 void *unused; 184 } compr[] = { 185 #define METH_FROZEN 2 186 #define METH_BZIP 7 187 #define METH_XZ 9 188 #define METH_LZIP 8 189 #define METH_ZSTD 12 190 #define METH_LZMA 13 191 #define METH_ZLIB 14 192 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ 193 /* Uncompress can get stuck; so use gzip first if we have it 194 * Idea from Damien Clark, thanks! */ 195 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ 196 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ 197 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ 198 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ 199 /* the standard pack utilities do not accept standard input */ 200 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ 201 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ 202 /* ...only first file examined */ 203 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ 204 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ 205 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ 206 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ 207 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ 208 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ 209 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ 210 #ifdef ZLIBSUPPORT 211 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ 212 #endif 213 }; 214 215 #define OKDATA 0 216 #define NODATA 1 217 #define ERRDATA 2 218 219 file_private ssize_t swrite(int, const void *, size_t); 220 #if HAVE_FORK 221 file_private size_t ncompr = __arraycount(compr); 222 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *, 223 unsigned char **, size_t *); 224 #ifdef BUILTIN_DECOMPRESS 225 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t, 226 size_t *, int); 227 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, 228 size_t *, int); 229 #endif 230 #ifdef BUILTIN_BZLIB 231 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, 232 size_t *, int); 233 #endif 234 #ifdef BUILTIN_XZLIB 235 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, 236 size_t *, int); 237 #endif 238 #ifdef BUILTIN_ZSTDLIB 239 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t, 240 size_t *, int); 241 #endif 242 #ifdef BUILTIN_LZLIB 243 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t, 244 size_t *, int); 245 #endif 246 247 static int makeerror(unsigned char **, size_t *, const char *, ...) 248 __attribute__((__format__(__printf__, 3, 4))); 249 file_private const char *methodname(size_t); 250 251 file_private int 252 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf) 253 { 254 unsigned char *p; 255 int mime = ms->flags & MAGIC_MIME; 256 257 if (!mime) 258 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf); 259 260 for (p = buf; *p; p++) 261 if (!isalnum(*p)) 262 *p = '-'; 263 264 return file_printf(ms, "application/x-decompression-error-%s-%s", 265 methodname(i), buf); 266 } 267 268 file_protected int 269 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) 270 { 271 unsigned char *newbuf = NULL; 272 size_t i, nsz; 273 char *rbuf; 274 file_pushbuf_t *pb; 275 int urv, prv, rv = 0; 276 int mime = ms->flags & MAGIC_MIME; 277 int fd = b->fd; 278 const unsigned char *buf = CAST(const unsigned char *, b->fbuf); 279 size_t nbytes = b->flen; 280 int sa_saved = 0; 281 struct sigaction sig_act; 282 283 if ((ms->flags & MAGIC_COMPRESS) == 0) 284 return 0; 285 286 for (i = 0; i < ncompr; i++) { 287 int zm; 288 if (nbytes < CAST(size_t, abs(compr[i].maglen))) 289 continue; 290 if (compr[i].maglen < 0) { 291 zm = (*compr[i].u.func)(buf); 292 } else { 293 zm = memcmp(buf, compr[i].u.magic, 294 CAST(size_t, compr[i].maglen)) == 0; 295 } 296 297 if (!zm) 298 continue; 299 300 /* Prevent SIGPIPE death if child dies unexpectedly */ 301 if (!sa_saved) { 302 //We can use sig_act for both new and old, but 303 struct sigaction new_act; 304 memset(&new_act, 0, sizeof(new_act)); 305 new_act.sa_handler = SIG_IGN; 306 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1; 307 } 308 309 nsz = nbytes; 310 free(newbuf); 311 urv = uncompressbuf(fd, ms->bytes_max, i, 312 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz); 313 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv, 314 (char *)newbuf, nsz); 315 switch (urv) { 316 case OKDATA: 317 case ERRDATA: 318 ms->flags &= ~MAGIC_COMPRESS; 319 if (urv == ERRDATA) 320 prv = format_decompression_error(ms, i, newbuf); 321 else 322 prv = file_buffer(ms, -1, NULL, name, newbuf, 323 nsz); 324 if (prv == -1) 325 goto error; 326 rv = 1; 327 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0) 328 goto out; 329 if (mime != MAGIC_MIME && mime != 0) 330 goto out; 331 if ((file_printf(ms, 332 mime ? " compressed-encoding=" : " (")) == -1) 333 goto error; 334 if ((pb = file_push_buffer(ms)) == NULL) 335 goto error; 336 /* 337 * XXX: If file_buffer fails here, we overwrite 338 * the compressed text. FIXME. 339 */ 340 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) 341 { 342 if (file_pop_buffer(ms, pb) != NULL) 343 abort(); 344 goto error; 345 } 346 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) { 347 if (file_printf(ms, "%s", rbuf) == -1) { 348 free(rbuf); 349 goto error; 350 } 351 free(rbuf); 352 } 353 if (!mime && file_printf(ms, ")") == -1) 354 goto error; 355 /*FALLTHROUGH*/ 356 case NODATA: 357 break; 358 default: 359 abort(); 360 /*NOTREACHED*/ 361 error: 362 rv = -1; 363 break; 364 } 365 } 366 out: 367 DPRINTF("rv = %d\n", rv); 368 369 if (sa_saved && sig_act.sa_handler != SIG_IGN) 370 (void)sigaction(SIGPIPE, &sig_act, NULL); 371 372 free(newbuf); 373 ms->flags |= MAGIC_COMPRESS; 374 DPRINTF("Zmagic returns %d\n", rv); 375 return rv; 376 } 377 #endif 378 /* 379 * `safe' write for sockets and pipes. 380 */ 381 file_private ssize_t 382 swrite(int fd, const void *buf, size_t n) 383 { 384 ssize_t rv; 385 size_t rn = n; 386 387 do 388 switch (rv = write(fd, buf, n)) { 389 case -1: 390 if (errno == EINTR) 391 continue; 392 return -1; 393 default: 394 n -= rv; 395 buf = CAST(const char *, buf) + rv; 396 break; 397 } 398 while (n > 0); 399 return rn; 400 } 401 402 403 /* 404 * `safe' read for sockets and pipes. 405 */ 406 file_protected ssize_t 407 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) 408 { 409 ssize_t rv; 410 #if defined(FIONREAD) && !defined(__MINGW32__) 411 int t = 0; 412 #endif 413 size_t rn = n; 414 415 if (fd == STDIN_FILENO) 416 goto nocheck; 417 418 #if defined(FIONREAD) && !defined(__MINGW32__) 419 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { 420 #ifdef FD_ZERO 421 ssize_t cnt; 422 for (cnt = 0;; cnt++) { 423 fd_set check; 424 struct timeval tout = {0, 100 * 1000}; 425 int selrv; 426 427 FD_ZERO(&check); 428 FD_SET(fd, &check); 429 430 /* 431 * Avoid soft deadlock: do not read if there 432 * is nothing to read from sockets and pipes. 433 */ 434 selrv = select(fd + 1, &check, NULL, NULL, &tout); 435 if (selrv == -1) { 436 if (errno == EINTR || errno == EAGAIN) 437 continue; 438 } else if (selrv == 0 && cnt >= 5) { 439 return 0; 440 } else 441 break; 442 } 443 #endif 444 (void)ioctl(fd, FIONREAD, &t); 445 } 446 447 if (t > 0 && CAST(size_t, t) < n) { 448 n = t; 449 rn = n; 450 } 451 #endif 452 453 nocheck: 454 do 455 switch ((rv = read(fd, buf, n))) { 456 case -1: 457 if (errno == EINTR) 458 continue; 459 return -1; 460 case 0: 461 return rn - n; 462 default: 463 n -= rv; 464 buf = CAST(char *, CCAST(void *, buf)) + rv; 465 break; 466 } 467 while (n > 0); 468 return rn; 469 } 470 471 file_protected int 472 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, 473 size_t nbytes) 474 { 475 char buf[4096]; 476 ssize_t r; 477 int tfd; 478 479 #ifdef WIN32 480 const char *t; 481 buf[0] = '\0'; 482 if ((t = getenv("TEMP")) != NULL) 483 (void)strlcpy(buf, t, sizeof(buf)); 484 else if ((t = getenv("TMP")) != NULL) 485 (void)strlcpy(buf, t, sizeof(buf)); 486 else if ((t = getenv("TMPDIR")) != NULL) 487 (void)strlcpy(buf, t, sizeof(buf)); 488 if (buf[0] != '\0') 489 (void)strlcat(buf, "/", sizeof(buf)); 490 (void)strlcat(buf, "file.XXXXXX", sizeof(buf)); 491 #else 492 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf)); 493 #endif 494 #ifndef HAVE_MKSTEMP 495 { 496 char *ptr = mktemp(buf); 497 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); 498 r = errno; 499 (void)unlink(ptr); 500 errno = r; 501 } 502 #else 503 { 504 int te; 505 mode_t ou = umask(0); 506 tfd = mkstemp(buf); 507 (void)umask(ou); 508 te = errno; 509 (void)unlink(buf); 510 errno = te; 511 } 512 #endif 513 if (tfd == -1) { 514 file_error(ms, errno, 515 "cannot create temporary file for pipe copy"); 516 return -1; 517 } 518 519 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes)) 520 r = 1; 521 else { 522 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) 523 if (swrite(tfd, buf, CAST(size_t, r)) != r) 524 break; 525 } 526 527 switch (r) { 528 case -1: 529 file_error(ms, errno, "error copying from pipe to temp file"); 530 return -1; 531 case 0: 532 break; 533 default: 534 file_error(ms, errno, "error while writing to temp file"); 535 return -1; 536 } 537 538 /* 539 * We duplicate the file descriptor, because fclose on a 540 * tmpfile will delete the file, but any open descriptors 541 * can still access the phantom inode. 542 */ 543 if ((fd = dup2(tfd, fd)) == -1) { 544 file_error(ms, errno, "could not dup descriptor for temp file"); 545 return -1; 546 } 547 (void)close(tfd); 548 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) { 549 file_badseek(ms); 550 return -1; 551 } 552 return fd; 553 } 554 #if HAVE_FORK 555 #ifdef BUILTIN_DECOMPRESS 556 557 #define FHCRC (1 << 1) 558 #define FEXTRA (1 << 2) 559 #define FNAME (1 << 3) 560 #define FCOMMENT (1 << 4) 561 562 563 file_private int 564 uncompressgzipped(const unsigned char *old, unsigned char **newch, 565 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 566 { 567 unsigned char flg; 568 size_t data_start = 10; 569 570 if (*n < 4) { 571 goto err; 572 } 573 574 flg = old[3]; 575 576 if (flg & FEXTRA) { 577 if (data_start + 1 >= *n) 578 goto err; 579 data_start += 2 + old[data_start] + old[data_start + 1] * 256; 580 } 581 if (flg & FNAME) { 582 while(data_start < *n && old[data_start]) 583 data_start++; 584 data_start++; 585 } 586 if (flg & FCOMMENT) { 587 while(data_start < *n && old[data_start]) 588 data_start++; 589 data_start++; 590 } 591 if (flg & FHCRC) 592 data_start += 2; 593 594 if (data_start >= *n) 595 goto err; 596 597 *n -= data_start; 598 old += data_start; 599 return uncompresszlib(old, newch, bytes_max, n, 0); 600 err: 601 return makeerror(newch, n, "File too short"); 602 } 603 604 file_private int 605 uncompresszlib(const unsigned char *old, unsigned char **newch, 606 size_t bytes_max, size_t *n, int zlib) 607 { 608 int rc; 609 z_stream z; 610 611 DPRINTF("builtin zlib decompression\n"); 612 z.next_in = CCAST(Bytef *, old); 613 z.avail_in = CAST(uint32_t, *n); 614 z.next_out = *newch; 615 z.avail_out = CAST(unsigned int, bytes_max); 616 z.zalloc = Z_NULL; 617 z.zfree = Z_NULL; 618 z.opaque = Z_NULL; 619 620 /* LINTED bug in header macro */ 621 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15); 622 if (rc != Z_OK) 623 goto err; 624 625 rc = inflate(&z, Z_SYNC_FLUSH); 626 if (rc != Z_OK && rc != Z_STREAM_END) { 627 inflateEnd(&z); 628 goto err; 629 } 630 631 *n = CAST(size_t, z.total_out); 632 rc = inflateEnd(&z); 633 if (rc != Z_OK) 634 goto err; 635 636 /* let's keep the nul-terminate tradition */ 637 (*newch)[*n] = '\0'; 638 639 return OKDATA; 640 err: 641 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc)); 642 } 643 #endif 644 645 #ifdef BUILTIN_BZLIB 646 file_private int 647 uncompressbzlib(const unsigned char *old, unsigned char **newch, 648 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 649 { 650 int rc; 651 bz_stream bz; 652 653 DPRINTF("builtin bzlib decompression\n"); 654 memset(&bz, 0, sizeof(bz)); 655 rc = BZ2_bzDecompressInit(&bz, 0, 0); 656 if (rc != BZ_OK) 657 goto err; 658 659 bz.next_in = CCAST(char *, RCAST(const char *, old)); 660 bz.avail_in = CAST(uint32_t, *n); 661 bz.next_out = RCAST(char *, *newch); 662 bz.avail_out = CAST(unsigned int, bytes_max); 663 664 rc = BZ2_bzDecompress(&bz); 665 if (rc != BZ_OK && rc != BZ_STREAM_END) { 666 BZ2_bzDecompressEnd(&bz); 667 goto err; 668 } 669 670 /* Assume byte_max is within 32bit */ 671 /* assert(bz.total_out_hi32 == 0); */ 672 *n = CAST(size_t, bz.total_out_lo32); 673 rc = BZ2_bzDecompressEnd(&bz); 674 if (rc != BZ_OK) 675 goto err; 676 677 /* let's keep the nul-terminate tradition */ 678 (*newch)[*n] = '\0'; 679 680 return OKDATA; 681 err: 682 return makeerror(newch, n, "bunzip error %d", rc); 683 } 684 #endif 685 686 #ifdef BUILTIN_XZLIB 687 file_private int 688 uncompressxzlib(const unsigned char *old, unsigned char **newch, 689 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 690 { 691 int rc; 692 lzma_stream xz; 693 694 DPRINTF("builtin xzlib decompression\n"); 695 memset(&xz, 0, sizeof(xz)); 696 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); 697 if (rc != LZMA_OK) 698 goto err; 699 700 xz.next_in = CCAST(const uint8_t *, old); 701 xz.avail_in = CAST(uint32_t, *n); 702 xz.next_out = RCAST(uint8_t *, *newch); 703 xz.avail_out = CAST(unsigned int, bytes_max); 704 705 rc = lzma_code(&xz, LZMA_RUN); 706 if (rc != LZMA_OK && rc != LZMA_STREAM_END) { 707 lzma_end(&xz); 708 goto err; 709 } 710 711 *n = CAST(size_t, xz.total_out); 712 713 lzma_end(&xz); 714 715 /* let's keep the nul-terminate tradition */ 716 (*newch)[*n] = '\0'; 717 718 return OKDATA; 719 err: 720 return makeerror(newch, n, "unxz error %d", rc); 721 } 722 #endif 723 724 #ifdef BUILTIN_ZSTDLIB 725 file_private int 726 uncompresszstd(const unsigned char *old, unsigned char **newch, 727 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 728 { 729 size_t rc; 730 ZSTD_DStream *zstd; 731 ZSTD_inBuffer in; 732 ZSTD_outBuffer out; 733 734 DPRINTF("builtin zstd decompression\n"); 735 if ((zstd = ZSTD_createDStream()) == NULL) { 736 return makeerror(newch, n, "No ZSTD decompression stream, %s", 737 strerror(errno)); 738 } 739 740 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only); 741 if (ZSTD_isError(rc)) 742 goto err; 743 744 in.src = CCAST(const void *, old); 745 in.size = *n; 746 in.pos = 0; 747 out.dst = RCAST(void *, *newch); 748 out.size = bytes_max; 749 out.pos = 0; 750 751 rc = ZSTD_decompressStream(zstd, &out, &in); 752 if (ZSTD_isError(rc)) 753 goto err; 754 755 *n = out.pos; 756 757 ZSTD_freeDStream(zstd); 758 759 /* let's keep the nul-terminate tradition */ 760 (*newch)[*n] = '\0'; 761 762 return OKDATA; 763 err: 764 ZSTD_freeDStream(zstd); 765 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc)); 766 } 767 #endif 768 769 #ifdef BUILTIN_LZLIB 770 file_private int 771 uncompresslzlib(const unsigned char *old, unsigned char **newch, 772 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 773 { 774 enum LZ_Errno err; 775 size_t old_remaining = *n; 776 size_t new_remaining = bytes_max; 777 size_t total_read = 0; 778 unsigned char *bufp; 779 struct LZ_Decoder *dec; 780 781 bufp = *newch; 782 783 DPRINTF("builtin lzlib decompression\n"); 784 dec = LZ_decompress_open(); 785 if (!dec) { 786 return makeerror(newch, n, "unable to allocate LZ_Decoder"); 787 } 788 if (LZ_decompress_errno(dec) != LZ_ok) 789 goto err; 790 791 for (;;) { 792 // LZ_decompress_read() stops at member boundaries, so we may 793 // have more than one successful read after writing all data 794 // we have. 795 if (old_remaining > 0) { 796 int wr = LZ_decompress_write(dec, old, old_remaining); 797 if (wr < 0) 798 goto err; 799 old_remaining -= wr; 800 old += wr; 801 } 802 803 int rd = LZ_decompress_read(dec, bufp, new_remaining); 804 if (rd > 0) { 805 new_remaining -= rd; 806 bufp += rd; 807 total_read += rd; 808 } 809 810 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok) 811 goto err; 812 if (new_remaining == 0) 813 break; 814 if (old_remaining == 0 && rd == 0) 815 break; 816 } 817 818 LZ_decompress_close(dec); 819 *n = total_read; 820 821 /* let's keep the nul-terminate tradition */ 822 *bufp = '\0'; 823 824 return OKDATA; 825 err: 826 err = LZ_decompress_errno(dec); 827 LZ_decompress_close(dec); 828 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err)); 829 } 830 #endif 831 832 833 static int 834 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) 835 { 836 char *msg; 837 va_list ap; 838 int rv; 839 840 DPRINTF("Makeerror %s\n", fmt); 841 free(*buf); 842 va_start(ap, fmt); 843 rv = vasprintf(&msg, fmt, ap); 844 va_end(ap); 845 if (rv < 0) { 846 DPRINTF("Makeerror failed"); 847 *buf = NULL; 848 *len = 0; 849 return NODATA; 850 } 851 *buf = RCAST(unsigned char *, msg); 852 *len = strlen(msg); 853 return ERRDATA; 854 } 855 856 static void 857 closefd(int *fd, size_t i) 858 { 859 if (fd[i] == -1) 860 return; 861 (void) close(fd[i]); 862 fd[i] = -1; 863 } 864 865 static void 866 closep(int *fd) 867 { 868 size_t i; 869 for (i = 0; i < 2; i++) 870 closefd(fd, i); 871 } 872 873 static void 874 movedesc(void *v, int i, int fd) 875 { 876 if (fd == i) 877 return; /* "no dup was necessary" */ 878 #ifdef HAVE_POSIX_SPAWNP 879 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 880 posix_spawn_file_actions_adddup2(fa, fd, i); 881 posix_spawn_file_actions_addclose(fa, fd); 882 #else 883 if (dup2(fd, i) == -1) { 884 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno)); 885 exit(EXIT_FAILURE); 886 } 887 close(v ? fd : fd); 888 #endif 889 } 890 891 static void 892 closedesc(void *v, int fd) 893 { 894 #ifdef HAVE_POSIX_SPAWNP 895 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 896 posix_spawn_file_actions_addclose(fa, fd); 897 #else 898 close(v ? fd : fd); 899 #endif 900 } 901 902 static void 903 handledesc(void *v, int fd, int fdp[3][2]) 904 { 905 if (fd != -1) { 906 (void) lseek(fd, CAST(off_t, 0), SEEK_SET); 907 movedesc(v, STDIN_FILENO, fd); 908 } else { 909 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]); 910 if (fdp[STDIN_FILENO][1] > 2) 911 closedesc(v, fdp[STDIN_FILENO][1]); 912 } 913 914 file_clear_closexec(STDIN_FILENO); 915 916 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly 917 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]); 918 if (fdp[STDOUT_FILENO][0] > 2) 919 closedesc(v, fdp[STDOUT_FILENO][0]); 920 921 file_clear_closexec(STDOUT_FILENO); 922 923 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]); 924 if (fdp[STDERR_FILENO][0] > 2) 925 closedesc(v, fdp[STDERR_FILENO][0]); 926 927 file_clear_closexec(STDERR_FILENO); 928 } 929 930 static pid_t 931 writechild(int fd, const void *old, size_t n) 932 { 933 pid_t pid; 934 935 /* 936 * fork again, to avoid blocking because both 937 * pipes filled 938 */ 939 pid = fork(); 940 if (pid == -1) { 941 DPRINTF("Fork failed (%s)\n", strerror(errno)); 942 return -1; 943 } 944 if (pid == 0) { 945 /* child */ 946 if (swrite(fd, old, n) != CAST(ssize_t, n)) { 947 DPRINTF("Write failed (%s)\n", strerror(errno)); 948 exit(EXIT_FAILURE); 949 } 950 exit(EXIT_SUCCESS); 951 } 952 /* parent */ 953 return pid; 954 } 955 956 static ssize_t 957 filter_error(unsigned char *ubuf, ssize_t n) 958 { 959 char *p; 960 char *buf; 961 962 ubuf[n] = '\0'; 963 buf = RCAST(char *, ubuf); 964 while (isspace(CAST(unsigned char, *buf))) 965 buf++; 966 DPRINTF("Filter error[[[%s]]]\n", buf); 967 if ((p = strchr(CAST(char *, buf), '\n')) != NULL) 968 *p = '\0'; 969 if ((p = strchr(CAST(char *, buf), ';')) != NULL) 970 *p = '\0'; 971 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) { 972 ++p; 973 while (isspace(CAST(unsigned char, *p))) 974 p++; 975 n = strlen(p); 976 memmove(ubuf, p, CAST(size_t, n + 1)); 977 } 978 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); 979 if (islower(*ubuf)) 980 *ubuf = toupper(*ubuf); 981 return n; 982 } 983 984 file_private const char * 985 methodname(size_t method) 986 { 987 switch (method) { 988 #ifdef BUILTIN_DECOMPRESS 989 case METH_FROZEN: 990 case METH_ZLIB: 991 return "zlib"; 992 #endif 993 #ifdef BUILTIN_BZLIB 994 case METH_BZIP: 995 return "bzlib"; 996 #endif 997 #ifdef BUILTIN_XZLIB 998 case METH_XZ: 999 case METH_LZMA: 1000 return "xzlib"; 1001 #endif 1002 #ifdef BUILTIN_ZSTDLIB 1003 case METH_ZSTD: 1004 return "zstd"; 1005 #endif 1006 #ifdef BUILTIN_LZLIB 1007 case METH_LZIP: 1008 return "lzlib"; 1009 #endif 1010 default: 1011 return compr[method].argv[0]; 1012 } 1013 } 1014 1015 file_private int (* 1016 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t, 1017 size_t *, int) 1018 { 1019 switch (method) { 1020 #ifdef BUILTIN_DECOMPRESS 1021 case METH_FROZEN: 1022 return uncompressgzipped; 1023 case METH_ZLIB: 1024 return uncompresszlib; 1025 #endif 1026 #ifdef BUILTIN_BZLIB 1027 case METH_BZIP: 1028 return uncompressbzlib; 1029 #endif 1030 #ifdef BUILTIN_XZLIB 1031 case METH_XZ: 1032 case METH_LZMA: 1033 return uncompressxzlib; 1034 #endif 1035 #ifdef BUILTIN_ZSTDLIB 1036 case METH_ZSTD: 1037 return uncompresszstd; 1038 #endif 1039 #ifdef BUILTIN_LZLIB 1040 case METH_LZIP: 1041 return uncompresslzlib; 1042 #endif 1043 default: 1044 return NULL; 1045 } 1046 } 1047 1048 file_private int 1049 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork, 1050 const unsigned char *old, unsigned char **newch, size_t* n) 1051 { 1052 int fdp[3][2]; 1053 int status, rv, w; 1054 pid_t pid; 1055 pid_t writepid = -1; 1056 size_t i; 1057 ssize_t r, re; 1058 char *const *args; 1059 #ifdef HAVE_POSIX_SPAWNP 1060 posix_spawn_file_actions_t fa; 1061 #endif 1062 int (*decompress)(const unsigned char *, unsigned char **, 1063 size_t, size_t *, int) = getdecompressor(method); 1064 1065 *newch = CAST(unsigned char *, malloc(bytes_max + 1)); 1066 if (*newch == NULL) 1067 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 1068 1069 if (decompress) { 1070 if (nofork) { 1071 return makeerror(newch, n, 1072 "Fork is required to uncompress, but disabled"); 1073 } 1074 return (*decompress)(old, newch, bytes_max, n, 1); 1075 } 1076 1077 (void)fflush(stdout); 1078 (void)fflush(stderr); 1079 1080 for (i = 0; i < __arraycount(fdp); i++) 1081 fdp[i][0] = fdp[i][1] = -1; 1082 1083 /* 1084 * There are multithreaded users who run magic_file() 1085 * from dozens of threads. If two parallel magic_file() calls 1086 * analyze two large compressed files, both will spawn 1087 * an uncompressing child here, which writes out uncompressed data. 1088 * We read some portion, then close the pipe, then waitpid() the child. 1089 * If uncompressed data is larger, child should get EPIPE and exit. 1090 * However, with *parallel* calls OTHER child may unintentionally 1091 * inherit pipe fds, thus keeping pipe open and making writes in 1092 * our child block instead of failing with EPIPE! 1093 * (For the bug to occur, two threads must mutually inherit their pipes, 1094 * and both must have large outputs. Thus it happens not that often). 1095 * To avoid this, be sure to create pipes with O_CLOEXEC. 1096 */ 1097 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) || 1098 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 || 1099 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) { 1100 closep(fdp[STDIN_FILENO]); 1101 closep(fdp[STDOUT_FILENO]); 1102 return makeerror(newch, n, "Cannot create pipe, %s", 1103 strerror(errno)); 1104 } 1105 1106 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv)); 1107 #ifdef HAVE_POSIX_SPAWNP 1108 posix_spawn_file_actions_init(&fa); 1109 1110 handledesc(&fa, fd, fdp); 1111 1112 DPRINTF("Executing %s\n", compr[method].argv[0]); 1113 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL, 1114 args, NULL); 1115 1116 posix_spawn_file_actions_destroy(&fa); 1117 1118 if (status == -1) { 1119 return makeerror(newch, n, "Cannot posix_spawn `%s', %s", 1120 compr[method].argv[0], strerror(errno)); 1121 } 1122 #else 1123 /* For processes with large mapped virtual sizes, vfork 1124 * may be _much_ faster (10-100 times) than fork. 1125 */ 1126 pid = vfork(); 1127 if (pid == -1) { 1128 return makeerror(newch, n, "Cannot vfork, %s", 1129 strerror(errno)); 1130 } 1131 if (pid == 0) { 1132 /* child */ 1133 /* Note: we are after vfork, do not modify memory 1134 * in a way which confuses parent. In particular, 1135 * do not modify fdp[i][j]. 1136 */ 1137 handledesc(NULL, fd, fdp); 1138 DPRINTF("Executing %s\n", compr[method].argv[0]); 1139 1140 (void)execvp(compr[method].argv[0], args); 1141 dprintf(STDERR_FILENO, "exec `%s' failed, %s", 1142 compr[method].argv[0], strerror(errno)); 1143 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */ 1144 } 1145 #endif 1146 /* parent */ 1147 /* Close write sides of child stdout/err pipes */ 1148 for (i = 1; i < __arraycount(fdp); i++) 1149 closefd(fdp[i], 1); 1150 /* Write the buffer data to child stdin, if we don't have fd */ 1151 if (fd == -1) { 1152 closefd(fdp[STDIN_FILENO], 0); 1153 writepid = writechild(fdp[STDIN_FILENO][1], old, *n); 1154 if (writepid == (pid_t)-1) { 1155 rv = makeerror(newch, n, "Write to child failed, %s", 1156 strerror(errno)); 1157 DPRINTF("Write to child failed\n"); 1158 goto err; 1159 } 1160 closefd(fdp[STDIN_FILENO], 1); 1161 } 1162 1163 rv = OKDATA; 1164 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); 1165 DPRINTF("read got %zd\n", r); 1166 if (r < 0) { 1167 rv = ERRDATA; 1168 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], 1169 strerror(errno)); 1170 goto err; 1171 } 1172 if (CAST(size_t, r) == bytes_max) { 1173 /* 1174 * close fd so that the child exits with sigpipe and ignore 1175 * errors, otherwise we risk the child blocking and never 1176 * exiting. 1177 */ 1178 DPRINTF("Closing stdout for bytes_max\n"); 1179 closefd(fdp[STDOUT_FILENO], 0); 1180 goto ok; 1181 } 1182 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) { 1183 DPRINTF("Got stuff from stderr %s\n", *newch); 1184 rv = ERRDATA; 1185 r = filter_error(*newch, r); 1186 goto ok; 1187 } 1188 if (re == 0) 1189 goto ok; 1190 rv = makeerror(newch, n, "Read stderr failed, %s", 1191 strerror(errno)); 1192 goto err; 1193 ok: 1194 *n = r; 1195 /* NUL terminate, as every buffer is handled here. */ 1196 (*newch)[*n] = '\0'; 1197 err: 1198 closefd(fdp[STDIN_FILENO], 1); 1199 closefd(fdp[STDOUT_FILENO], 0); 1200 closefd(fdp[STDERR_FILENO], 0); 1201 1202 w = waitpid(pid, &status, 0); 1203 wait_err: 1204 if (w == -1) { 1205 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); 1206 DPRINTF("Child wait return %#x\n", status); 1207 } else if (!WIFEXITED(status)) { 1208 DPRINTF("Child not exited (%#x)\n", status); 1209 } else if (WEXITSTATUS(status) != 0) { 1210 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); 1211 } 1212 if (writepid > 0) { 1213 /* _After_ we know decompressor has exited, our input writer 1214 * definitely will exit now (at worst, writing fails in it, 1215 * since output fd is closed now on the reading size). 1216 */ 1217 w = waitpid(writepid, &status, 0); 1218 writepid = -1; 1219 goto wait_err; 1220 } 1221 1222 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here! 1223 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv); 1224 1225 return rv; 1226 } 1227 #endif 1228