1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * compress routines: 30 * zmagic() - returns 0 if not recognized, uncompresses and prints 31 * information if recognized 32 * uncompress(method, old, n, newch) - uncompress old into new, 33 * using method, return sizeof new 34 */ 35 #include "file.h" 36 37 #ifndef lint 38 FILE_RCSID("@(#)$File: compress.c,v 1.158 2024/11/10 16:52:27 christos Exp $") 39 #endif 40 41 #include "magic.h" 42 #include <stdlib.h> 43 #ifdef HAVE_UNISTD_H 44 #include <unistd.h> 45 #endif 46 #ifdef HAVE_SPAWN_H 47 #include <spawn.h> 48 #endif 49 #include <stdio.h> 50 #include <string.h> 51 #include <errno.h> 52 #include <ctype.h> 53 #include <stdarg.h> 54 #include <signal.h> 55 #ifndef HAVE_SIG_T 56 typedef void (*sig_t)(int); 57 #endif /* HAVE_SIG_T */ 58 #ifdef HAVE_SYS_IOCTL_H 59 #include <sys/ioctl.h> 60 #endif 61 #ifdef HAVE_SYS_WAIT_H 62 #include <sys/wait.h> 63 #endif 64 #if defined(HAVE_SYS_TIME_H) 65 #include <sys/time.h> 66 #endif 67 68 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) 69 #define BUILTIN_DECOMPRESS 70 #include <zlib.h> 71 #endif 72 73 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT) 74 #define BUILTIN_BZLIB 75 #include <bzlib.h> 76 #endif 77 78 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT) 79 #define BUILTIN_XZLIB 80 #include <lzma.h> 81 #endif 82 83 #if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT) 84 #define BUILTIN_ZSTDLIB 85 #include <zstd.h> 86 #include <zstd_errors.h> 87 #endif 88 89 #if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT) 90 #define BUILTIN_LZLIB 91 #include <lzlib.h> 92 #endif 93 94 #ifdef notyet 95 #if defined(HAVE_LRZIP_H) && defined(LRZIPLIBSUPPORT) 96 #define BUILTIN_LRZIP 97 #include <Lrzip.h> 98 #endif 99 #endif 100 101 #ifdef DEBUG 102 int tty = -1; 103 #define DPRINTF(...) do { \ 104 if (tty == -1) \ 105 tty = open("/dev/tty", O_RDWR); \ 106 if (tty == -1) \ 107 abort(); \ 108 dprintf(tty, __VA_ARGS__); \ 109 } while (/*CONSTCOND*/0) 110 #else 111 #define DPRINTF(...) 112 #endif 113 114 #ifdef ZLIBSUPPORT 115 /* 116 * The following python code is not really used because ZLIBSUPPORT is only 117 * defined if we have a built-in zlib, and the built-in zlib handles that. 118 * That is not true for android where we have zlib.h and not -lz. 119 */ 120 static const char zlibcode[] = 121 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; 122 123 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL }; 124 125 static int 126 zlibcmp(const unsigned char *buf) 127 { 128 unsigned short x = 1; 129 unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); 130 131 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) 132 return 0; 133 if (s[0] != 1) /* endianness test */ 134 x = buf[0] | (buf[1] << 8); 135 else 136 x = buf[1] | (buf[0] << 8); 137 if (x % 31) 138 return 0; 139 return 1; 140 } 141 #endif 142 143 static int 144 lzmacmp(const unsigned char *buf) 145 { 146 if (buf[0] != 0x5d || buf[1] || buf[2]) 147 return 0; 148 if (buf[12] && buf[12] != 0xff) 149 return 0; 150 return 1; 151 } 152 153 #define gzip_flags "-cd" 154 #define lzip_flags gzip_flags 155 156 static const char *gzip_args[] = { 157 "gzip", gzip_flags, NULL 158 }; 159 static const char *uncompress_args[] = { 160 "uncompress", "-c", NULL 161 }; 162 static const char *bzip2_args[] = { 163 "bzip2", "-cd", NULL 164 }; 165 static const char *lzip_args[] = { 166 "lzip", lzip_flags, NULL 167 }; 168 static const char *xz_args[] = { 169 "xz", "-cd", NULL 170 }; 171 static const char *lrzip_args[] = { 172 "lrzip", "-qdf", "-", NULL 173 }; 174 static const char *lz4_args[] = { 175 "lz4", "-cd", NULL 176 }; 177 static const char *zstd_args[] = { 178 "zstd", "-cd", NULL 179 }; 180 181 #define do_zlib NULL 182 #define do_bzlib NULL 183 184 file_private const struct { 185 union { 186 const char *magic; 187 int (*func)(const unsigned char *); 188 } u; 189 int maglen; 190 const char **argv; 191 void *unused; 192 } compr[] = { 193 #define METH_FROZEN 2 194 #define METH_BZIP 7 195 #define METH_XZ 9 196 #define METH_LZIP 8 197 #define METH_LRZIP 10 198 #define METH_ZSTD 12 199 #define METH_LZMA 13 200 #define METH_ZLIB 14 201 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ 202 /* Uncompress can get stuck; so use gzip first if we have it 203 * Idea from Damien Clark, thanks! */ 204 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ 205 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ 206 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ 207 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ 208 /* the standard pack utilities do not accept standard input */ 209 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ 210 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ 211 /* ...only first file examined */ 212 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ 213 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ 214 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ 215 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ 216 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ 217 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ 218 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ 219 #ifdef ZLIBSUPPORT 220 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ 221 #endif 222 }; 223 224 #define OKDATA 0 225 #define NODATA 1 226 #define ERRDATA 2 227 228 file_private ssize_t swrite(int, const void *, size_t); 229 #if HAVE_FORK 230 file_private size_t ncompr = __arraycount(compr); 231 file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *, 232 unsigned char **, size_t *); 233 #ifdef BUILTIN_DECOMPRESS 234 file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t, 235 size_t *, int); 236 file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, 237 size_t *, int); 238 #endif 239 #ifdef BUILTIN_BZLIB 240 file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, 241 size_t *, int); 242 #endif 243 #ifdef BUILTIN_XZLIB 244 file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, 245 size_t *, int); 246 #endif 247 #ifdef BUILTIN_ZSTDLIB 248 file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t, 249 size_t *, int); 250 #endif 251 #ifdef BUILTIN_LZLIB 252 file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t, 253 size_t *, int); 254 #endif 255 #ifdef BUILTIN_LRZIP 256 file_private int uncompresslrzip(const unsigned char *, unsigned char **, size_t, 257 size_t *, int); 258 #endif 259 260 261 static int makeerror(unsigned char **, size_t *, const char *, ...) 262 __attribute__((__format__(__printf__, 3, 4))); 263 file_private const char *methodname(size_t); 264 265 file_private int 266 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf) 267 { 268 unsigned char *p; 269 int mime = ms->flags & MAGIC_MIME; 270 271 if (!mime) 272 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf); 273 274 for (p = buf; *p; p++) 275 if (!isalnum(*p)) 276 *p = '-'; 277 278 return file_printf(ms, "application/x-decompression-error-%s-%s", 279 methodname(i), buf); 280 } 281 282 file_protected int 283 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) 284 { 285 unsigned char *newbuf = NULL; 286 size_t i, nsz; 287 char *rbuf; 288 file_pushbuf_t *pb; 289 int urv, prv, rv = 0; 290 int mime = ms->flags & MAGIC_MIME; 291 int fd = b->fd; 292 const unsigned char *buf = CAST(const unsigned char *, b->fbuf); 293 size_t nbytes = b->flen; 294 int sa_saved = 0; 295 struct sigaction sig_act; 296 297 if ((ms->flags & MAGIC_COMPRESS) == 0) 298 return 0; 299 300 for (i = 0; i < ncompr; i++) { 301 int zm; 302 if (nbytes < CAST(size_t, abs(compr[i].maglen))) 303 continue; 304 if (compr[i].maglen < 0) { 305 zm = (*compr[i].u.func)(buf); 306 } else { 307 zm = memcmp(buf, compr[i].u.magic, 308 CAST(size_t, compr[i].maglen)) == 0; 309 } 310 311 if (!zm) 312 continue; 313 314 /* Prevent SIGPIPE death if child dies unexpectedly */ 315 if (!sa_saved) { 316 //We can use sig_act for both new and old, but 317 struct sigaction new_act; 318 memset(&new_act, 0, sizeof(new_act)); 319 new_act.sa_handler = SIG_IGN; 320 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1; 321 } 322 323 nsz = nbytes; 324 free(newbuf); 325 urv = uncompressbuf(fd, ms->bytes_max, i, 326 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz); 327 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv, 328 (char *)newbuf, nsz); 329 switch (urv) { 330 case OKDATA: 331 case ERRDATA: 332 ms->flags &= ~MAGIC_COMPRESS; 333 if (urv == ERRDATA) 334 prv = format_decompression_error(ms, i, newbuf); 335 else 336 prv = file_buffer(ms, -1, NULL, name, newbuf, 337 nsz); 338 if (prv == -1) 339 goto error; 340 rv = 1; 341 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0) 342 goto out; 343 if (mime != MAGIC_MIME && mime != 0) 344 goto out; 345 if ((file_printf(ms, 346 mime ? " compressed-encoding=" : " (")) == -1) 347 goto error; 348 if ((pb = file_push_buffer(ms)) == NULL) 349 goto error; 350 /* 351 * XXX: If file_buffer fails here, we overwrite 352 * the compressed text. FIXME. 353 */ 354 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) 355 { 356 if (file_pop_buffer(ms, pb) != NULL) 357 abort(); 358 goto error; 359 } 360 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) { 361 if (file_printf(ms, "%s", rbuf) == -1) { 362 free(rbuf); 363 goto error; 364 } 365 free(rbuf); 366 } 367 if (!mime && file_printf(ms, ")") == -1) 368 goto error; 369 /*FALLTHROUGH*/ 370 case NODATA: 371 break; 372 default: 373 abort(); 374 /*NOTREACHED*/ 375 error: 376 rv = -1; 377 break; 378 } 379 } 380 out: 381 DPRINTF("rv = %d\n", rv); 382 383 if (sa_saved && sig_act.sa_handler != SIG_IGN) 384 (void)sigaction(SIGPIPE, &sig_act, NULL); 385 386 free(newbuf); 387 ms->flags |= MAGIC_COMPRESS; 388 DPRINTF("Zmagic returns %d\n", rv); 389 return rv; 390 } 391 #endif 392 /* 393 * `safe' write for sockets and pipes. 394 */ 395 file_private ssize_t 396 swrite(int fd, const void *buf, size_t n) 397 { 398 ssize_t rv; 399 size_t rn = n; 400 401 do 402 switch (rv = write(fd, buf, n)) { 403 case -1: 404 if (errno == EINTR) 405 continue; 406 return -1; 407 default: 408 n -= rv; 409 buf = CAST(const char *, buf) + rv; 410 break; 411 } 412 while (n > 0); 413 return rn; 414 } 415 416 417 /* 418 * `safe' read for sockets and pipes. 419 */ 420 file_protected ssize_t 421 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) 422 { 423 ssize_t rv; 424 #if defined(FIONREAD) && !defined(__MINGW32__) 425 int t = 0; 426 #endif 427 size_t rn = n; 428 429 if (fd == STDIN_FILENO) 430 goto nocheck; 431 432 #if defined(FIONREAD) && !defined(__MINGW32__) 433 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { 434 #ifdef FD_ZERO 435 ssize_t cnt; 436 for (cnt = 0;; cnt++) { 437 fd_set check; 438 struct timeval tout = {0, 100 * 1000}; 439 int selrv; 440 441 FD_ZERO(&check); 442 FD_SET(fd, &check); 443 444 /* 445 * Avoid soft deadlock: do not read if there 446 * is nothing to read from sockets and pipes. 447 */ 448 selrv = select(fd + 1, &check, NULL, NULL, &tout); 449 if (selrv == -1) { 450 if (errno == EINTR || errno == EAGAIN) 451 continue; 452 } else if (selrv == 0 && cnt >= 5) { 453 return 0; 454 } else 455 break; 456 } 457 #endif 458 (void)ioctl(fd, FIONREAD, &t); 459 } 460 461 if (t > 0 && CAST(size_t, t) < n) { 462 n = t; 463 rn = n; 464 } 465 #endif 466 467 nocheck: 468 do 469 switch ((rv = read(fd, buf, n))) { 470 case -1: 471 if (errno == EINTR) 472 continue; 473 return -1; 474 case 0: 475 return rn - n; 476 default: 477 n -= rv; 478 buf = CAST(char *, CCAST(void *, buf)) + rv; 479 break; 480 } 481 while (n > 0); 482 return rn; 483 } 484 485 file_protected int 486 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, 487 size_t nbytes) 488 { 489 char buf[4096]; 490 ssize_t r; 491 int tfd; 492 493 #ifdef WIN32 494 const char *t; 495 buf[0] = '\0'; 496 if ((t = getenv("TEMP")) != NULL) 497 (void)strlcpy(buf, t, sizeof(buf)); 498 else if ((t = getenv("TMP")) != NULL) 499 (void)strlcpy(buf, t, sizeof(buf)); 500 else if ((t = getenv("TMPDIR")) != NULL) 501 (void)strlcpy(buf, t, sizeof(buf)); 502 if (buf[0] != '\0') 503 (void)strlcat(buf, "/", sizeof(buf)); 504 (void)strlcat(buf, "file.XXXXXX", sizeof(buf)); 505 #else 506 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf)); 507 #endif 508 #ifndef HAVE_MKSTEMP 509 { 510 char *ptr = mktemp(buf); 511 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); 512 r = errno; 513 (void)unlink(ptr); 514 errno = r; 515 } 516 #else 517 { 518 int te; 519 mode_t ou = umask(0); 520 tfd = mkstemp(buf); 521 (void)umask(ou); 522 te = errno; 523 (void)unlink(buf); 524 errno = te; 525 } 526 #endif 527 if (tfd == -1) { 528 file_error(ms, errno, 529 "cannot create temporary file for pipe copy"); 530 return -1; 531 } 532 533 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes)) 534 r = 1; 535 else { 536 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) 537 if (swrite(tfd, buf, CAST(size_t, r)) != r) 538 break; 539 } 540 541 switch (r) { 542 case -1: 543 file_error(ms, errno, "error copying from pipe to temp file"); 544 return -1; 545 case 0: 546 break; 547 default: 548 file_error(ms, errno, "error while writing to temp file"); 549 return -1; 550 } 551 552 /* 553 * We duplicate the file descriptor, because fclose on a 554 * tmpfile will delete the file, but any open descriptors 555 * can still access the phantom inode. 556 */ 557 if ((fd = dup2(tfd, fd)) == -1) { 558 file_error(ms, errno, "could not dup descriptor for temp file"); 559 return -1; 560 } 561 (void)close(tfd); 562 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) { 563 file_badseek(ms); 564 return -1; 565 } 566 return fd; 567 } 568 #if HAVE_FORK 569 #ifdef BUILTIN_DECOMPRESS 570 571 #define FHCRC (1 << 1) 572 #define FEXTRA (1 << 2) 573 #define FNAME (1 << 3) 574 #define FCOMMENT (1 << 4) 575 576 577 file_private int 578 uncompressgzipped(const unsigned char *old, unsigned char **newch, 579 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 580 { 581 unsigned char flg; 582 size_t data_start = 10; 583 584 if (*n < 4) { 585 goto err; 586 } 587 588 flg = old[3]; 589 590 if (flg & FEXTRA) { 591 if (data_start + 1 >= *n) 592 goto err; 593 data_start += 2 + old[data_start] + old[data_start + 1] * 256; 594 } 595 if (flg & FNAME) { 596 while(data_start < *n && old[data_start]) 597 data_start++; 598 data_start++; 599 } 600 if (flg & FCOMMENT) { 601 while(data_start < *n && old[data_start]) 602 data_start++; 603 data_start++; 604 } 605 if (flg & FHCRC) 606 data_start += 2; 607 608 if (data_start >= *n) 609 goto err; 610 611 *n -= data_start; 612 old += data_start; 613 return uncompresszlib(old, newch, bytes_max, n, 0); 614 err: 615 return makeerror(newch, n, "File too short"); 616 } 617 618 file_private int 619 uncompresszlib(const unsigned char *old, unsigned char **newch, 620 size_t bytes_max, size_t *n, int zlib) 621 { 622 int rc; 623 z_stream z; 624 625 DPRINTF("builtin zlib decompression\n"); 626 z.next_in = CCAST(Bytef *, old); 627 z.avail_in = CAST(uint32_t, *n); 628 z.next_out = *newch; 629 z.avail_out = CAST(unsigned int, bytes_max); 630 z.zalloc = Z_NULL; 631 z.zfree = Z_NULL; 632 z.opaque = Z_NULL; 633 634 /* LINTED bug in header macro */ 635 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15); 636 if (rc != Z_OK) 637 goto err; 638 639 rc = inflate(&z, Z_SYNC_FLUSH); 640 if (rc != Z_OK && rc != Z_STREAM_END) { 641 inflateEnd(&z); 642 goto err; 643 } 644 645 *n = CAST(size_t, z.total_out); 646 rc = inflateEnd(&z); 647 if (rc != Z_OK) 648 goto err; 649 650 /* let's keep the nul-terminate tradition */ 651 (*newch)[*n] = '\0'; 652 653 return OKDATA; 654 err: 655 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc)); 656 } 657 #endif 658 659 #ifdef BUILTIN_BZLIB 660 file_private int 661 uncompressbzlib(const unsigned char *old, unsigned char **newch, 662 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 663 { 664 int rc; 665 bz_stream bz; 666 667 DPRINTF("builtin bzlib decompression\n"); 668 memset(&bz, 0, sizeof(bz)); 669 rc = BZ2_bzDecompressInit(&bz, 0, 0); 670 if (rc != BZ_OK) 671 goto err; 672 673 bz.next_in = CCAST(char *, RCAST(const char *, old)); 674 bz.avail_in = CAST(uint32_t, *n); 675 bz.next_out = RCAST(char *, *newch); 676 bz.avail_out = CAST(unsigned int, bytes_max); 677 678 rc = BZ2_bzDecompress(&bz); 679 if (rc != BZ_OK && rc != BZ_STREAM_END) { 680 BZ2_bzDecompressEnd(&bz); 681 goto err; 682 } 683 684 /* Assume byte_max is within 32bit */ 685 /* assert(bz.total_out_hi32 == 0); */ 686 *n = CAST(size_t, bz.total_out_lo32); 687 rc = BZ2_bzDecompressEnd(&bz); 688 if (rc != BZ_OK) 689 goto err; 690 691 /* let's keep the nul-terminate tradition */ 692 (*newch)[*n] = '\0'; 693 694 return OKDATA; 695 err: 696 return makeerror(newch, n, "bunzip error %d", rc); 697 } 698 #endif 699 700 #ifdef BUILTIN_XZLIB 701 file_private int 702 uncompressxzlib(const unsigned char *old, unsigned char **newch, 703 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 704 { 705 int rc; 706 lzma_stream xz; 707 708 DPRINTF("builtin xzlib decompression\n"); 709 memset(&xz, 0, sizeof(xz)); 710 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); 711 if (rc != LZMA_OK) 712 goto err; 713 714 xz.next_in = CCAST(const uint8_t *, old); 715 xz.avail_in = CAST(uint32_t, *n); 716 xz.next_out = RCAST(uint8_t *, *newch); 717 xz.avail_out = CAST(unsigned int, bytes_max); 718 719 rc = lzma_code(&xz, LZMA_RUN); 720 if (rc != LZMA_OK && rc != LZMA_STREAM_END) { 721 lzma_end(&xz); 722 goto err; 723 } 724 725 *n = CAST(size_t, xz.total_out); 726 727 lzma_end(&xz); 728 729 /* let's keep the nul-terminate tradition */ 730 (*newch)[*n] = '\0'; 731 732 return OKDATA; 733 err: 734 return makeerror(newch, n, "unxz error %d", rc); 735 } 736 #endif 737 738 #ifdef BUILTIN_ZSTDLIB 739 file_private int 740 uncompresszstd(const unsigned char *old, unsigned char **newch, 741 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 742 { 743 size_t rc; 744 ZSTD_DStream *zstd; 745 ZSTD_inBuffer in; 746 ZSTD_outBuffer out; 747 748 DPRINTF("builtin zstd decompression\n"); 749 if ((zstd = ZSTD_createDStream()) == NULL) { 750 return makeerror(newch, n, "No ZSTD decompression stream, %s", 751 strerror(errno)); 752 } 753 754 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only); 755 if (ZSTD_isError(rc)) 756 goto err; 757 758 in.src = CCAST(const void *, old); 759 in.size = *n; 760 in.pos = 0; 761 out.dst = RCAST(void *, *newch); 762 out.size = bytes_max; 763 out.pos = 0; 764 765 rc = ZSTD_decompressStream(zstd, &out, &in); 766 if (ZSTD_isError(rc)) 767 goto err; 768 769 *n = out.pos; 770 771 ZSTD_freeDStream(zstd); 772 773 /* let's keep the nul-terminate tradition */ 774 (*newch)[*n] = '\0'; 775 776 return OKDATA; 777 err: 778 ZSTD_freeDStream(zstd); 779 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc)); 780 } 781 #endif 782 783 #ifdef BUILTIN_LZLIB 784 file_private int 785 uncompresslzlib(const unsigned char *old, unsigned char **newch, 786 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 787 { 788 enum LZ_Errno err; 789 size_t old_remaining = *n; 790 size_t new_remaining = bytes_max; 791 size_t total_read = 0; 792 unsigned char *bufp; 793 struct LZ_Decoder *dec; 794 795 bufp = *newch; 796 797 DPRINTF("builtin lzlib decompression\n"); 798 dec = LZ_decompress_open(); 799 if (!dec) { 800 return makeerror(newch, n, "unable to allocate LZ_Decoder"); 801 } 802 if (LZ_decompress_errno(dec) != LZ_ok) 803 goto err; 804 805 for (;;) { 806 // LZ_decompress_read() stops at member boundaries, so we may 807 // have more than one successful read after writing all data 808 // we have. 809 if (old_remaining > 0) { 810 int wr = LZ_decompress_write(dec, old, old_remaining); 811 if (wr < 0) 812 goto err; 813 old_remaining -= wr; 814 old += wr; 815 } 816 817 int rd = LZ_decompress_read(dec, bufp, new_remaining); 818 if (rd > 0) { 819 new_remaining -= rd; 820 bufp += rd; 821 total_read += rd; 822 } 823 824 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok) 825 goto err; 826 if (new_remaining == 0) 827 break; 828 if (old_remaining == 0 && rd == 0) 829 break; 830 } 831 832 LZ_decompress_close(dec); 833 *n = total_read; 834 835 /* let's keep the nul-terminate tradition */ 836 *bufp = '\0'; 837 838 return OKDATA; 839 err: 840 err = LZ_decompress_errno(dec); 841 LZ_decompress_close(dec); 842 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err)); 843 } 844 #endif 845 846 #ifdef BUILTIN_LRZIP 847 file_private int 848 uncompresslrzip(const unsigned char *old, unsigned char **newch, 849 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 850 { 851 Lrzip *lr; 852 FILE *in, *out; 853 int res = OKDATA; 854 855 DPRINTF("builtin rlzip decompression\n"); 856 lr = lrzip_new(LRZIP_MODE_DECOMPRESS); 857 if (lr == NULL) { 858 res = makeerror(newch, n, "unable to create an lrzip decoder"); 859 goto out0; 860 } 861 lrzip_config_env(lr); 862 in = fmemopen(RCAST(void *, old), bytes_max, "r"); 863 if (in == NULL) { 864 res = makeerror(newch, n, "unable to construct input file"); 865 goto out1; 866 } 867 if (!lrzip_file_add(lr, in)) { 868 res = makeerror(newch, n, "unable to add input file"); 869 goto out2; 870 } 871 *newch = calloc(*n = 2 * bytes_max, 1); 872 if (*newch == NULL) { 873 res = makeerror(newch, n, "unable to allocate output buffer"); 874 goto out2; 875 } 876 out = fmemopen(*newch, *n, "w"); 877 if (out == NULL) { 878 free(*newch); 879 res = makeerror(newch, n, "unable to allocate output file"); 880 goto out2; 881 } 882 lrzip_outfile_set(lr, out); 883 if (lrzip_run(lr)) { 884 free(*newch); 885 res = makeerror(newch, n, "unable to decompress file"); 886 goto out3; 887 } 888 *n = (size_t)ftell(out); 889 out3: 890 fclose(out); 891 out2: 892 fclose(in); 893 out1: 894 lrzip_free(lr); 895 out0: 896 return res; 897 } 898 #endif 899 900 static int 901 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) 902 { 903 char *msg; 904 va_list ap; 905 int rv; 906 907 DPRINTF("Makeerror %s\n", fmt); 908 free(*buf); 909 va_start(ap, fmt); 910 rv = vasprintf(&msg, fmt, ap); 911 va_end(ap); 912 if (rv < 0) { 913 DPRINTF("Makeerror failed"); 914 *buf = NULL; 915 *len = 0; 916 return NODATA; 917 } 918 *buf = RCAST(unsigned char *, msg); 919 *len = strlen(msg); 920 return ERRDATA; 921 } 922 923 static void 924 closefd(int *fd, size_t i) 925 { 926 if (fd[i] == -1) 927 return; 928 (void) close(fd[i]); 929 fd[i] = -1; 930 } 931 932 static void 933 closep(int *fd) 934 { 935 size_t i; 936 for (i = 0; i < 2; i++) 937 closefd(fd, i); 938 } 939 940 static void 941 movedesc(void *v, int i, int fd) 942 { 943 if (fd == i) 944 return; /* "no dup was necessary" */ 945 #ifdef HAVE_POSIX_SPAWNP 946 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 947 posix_spawn_file_actions_adddup2(fa, fd, i); 948 posix_spawn_file_actions_addclose(fa, fd); 949 #else 950 if (dup2(fd, i) == -1) { 951 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno)); 952 exit(EXIT_FAILURE); 953 } 954 close(v ? fd : fd); 955 #endif 956 } 957 958 static void 959 closedesc(void *v, int fd) 960 { 961 #ifdef HAVE_POSIX_SPAWNP 962 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 963 posix_spawn_file_actions_addclose(fa, fd); 964 #else 965 close(v ? fd : fd); 966 #endif 967 } 968 969 static void 970 handledesc(void *v, int fd, int fdp[3][2]) 971 { 972 if (fd != -1) { 973 (void) lseek(fd, CAST(off_t, 0), SEEK_SET); 974 movedesc(v, STDIN_FILENO, fd); 975 } else { 976 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]); 977 if (fdp[STDIN_FILENO][1] > 2) 978 closedesc(v, fdp[STDIN_FILENO][1]); 979 } 980 981 file_clear_closexec(STDIN_FILENO); 982 983 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly 984 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]); 985 if (fdp[STDOUT_FILENO][0] > 2) 986 closedesc(v, fdp[STDOUT_FILENO][0]); 987 988 file_clear_closexec(STDOUT_FILENO); 989 990 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]); 991 if (fdp[STDERR_FILENO][0] > 2) 992 closedesc(v, fdp[STDERR_FILENO][0]); 993 994 file_clear_closexec(STDERR_FILENO); 995 } 996 997 static pid_t 998 writechild(int fd, const void *old, size_t n) 999 { 1000 pid_t pid; 1001 1002 /* 1003 * fork again, to avoid blocking because both 1004 * pipes filled 1005 */ 1006 pid = fork(); 1007 if (pid == -1) { 1008 DPRINTF("Fork failed (%s)\n", strerror(errno)); 1009 return -1; 1010 } 1011 if (pid == 0) { 1012 /* child */ 1013 if (swrite(fd, old, n) != CAST(ssize_t, n)) { 1014 DPRINTF("Write failed (%s)\n", strerror(errno)); 1015 exit(EXIT_FAILURE); 1016 } 1017 exit(EXIT_SUCCESS); 1018 } 1019 /* parent */ 1020 return pid; 1021 } 1022 1023 static ssize_t 1024 filter_error(unsigned char *ubuf, ssize_t n) 1025 { 1026 char *p; 1027 char *buf; 1028 1029 ubuf[n] = '\0'; 1030 buf = RCAST(char *, ubuf); 1031 while (isspace(CAST(unsigned char, *buf))) 1032 buf++; 1033 DPRINTF("Filter error[[[%s]]]\n", buf); 1034 if ((p = strchr(CAST(char *, buf), '\n')) != NULL) 1035 *p = '\0'; 1036 if ((p = strchr(CAST(char *, buf), ';')) != NULL) 1037 *p = '\0'; 1038 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) { 1039 ++p; 1040 while (isspace(CAST(unsigned char, *p))) 1041 p++; 1042 n = strlen(p); 1043 memmove(ubuf, p, CAST(size_t, n + 1)); 1044 } 1045 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); 1046 if (islower(*ubuf)) 1047 *ubuf = toupper(*ubuf); 1048 return n; 1049 } 1050 1051 file_private const char * 1052 methodname(size_t method) 1053 { 1054 switch (method) { 1055 #ifdef BUILTIN_DECOMPRESS 1056 case METH_FROZEN: 1057 case METH_ZLIB: 1058 return "zlib"; 1059 #endif 1060 #ifdef BUILTIN_BZLIB 1061 case METH_BZIP: 1062 return "bzlib"; 1063 #endif 1064 #ifdef BUILTIN_XZLIB 1065 case METH_XZ: 1066 case METH_LZMA: 1067 return "xzlib"; 1068 #endif 1069 #ifdef BUILTIN_ZSTDLIB 1070 case METH_ZSTD: 1071 return "zstd"; 1072 #endif 1073 #ifdef BUILTIN_LZLIB 1074 case METH_LZIP: 1075 return "lzlib"; 1076 #endif 1077 #ifdef BUILTIN_LRZIP 1078 case METH_LRZIP: 1079 return "lrzip"; 1080 #endif 1081 default: 1082 return compr[method].argv[0]; 1083 } 1084 } 1085 1086 file_private int (* 1087 getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t, 1088 size_t *, int) 1089 { 1090 switch (method) { 1091 #ifdef BUILTIN_DECOMPRESS 1092 case METH_FROZEN: 1093 return uncompressgzipped; 1094 case METH_ZLIB: 1095 return uncompresszlib; 1096 #endif 1097 #ifdef BUILTIN_BZLIB 1098 case METH_BZIP: 1099 return uncompressbzlib; 1100 #endif 1101 #ifdef BUILTIN_XZLIB 1102 case METH_XZ: 1103 case METH_LZMA: 1104 return uncompressxzlib; 1105 #endif 1106 #ifdef BUILTIN_ZSTDLIB 1107 case METH_ZSTD: 1108 return uncompresszstd; 1109 #endif 1110 #ifdef BUILTIN_LZLIB 1111 case METH_LZIP: 1112 return uncompresslzlib; 1113 #endif 1114 #ifdef BUILTIN_LRZIP 1115 case METH_LRZIP: 1116 return uncompresslrzip; 1117 #endif 1118 default: 1119 return NULL; 1120 } 1121 } 1122 1123 file_private int 1124 uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork, 1125 const unsigned char *old, unsigned char **newch, size_t* n) 1126 { 1127 int fdp[3][2]; 1128 int status, rv, w; 1129 pid_t pid; 1130 pid_t writepid = -1; 1131 size_t i; 1132 ssize_t r, re; 1133 char *const *args; 1134 #ifdef HAVE_POSIX_SPAWNP 1135 posix_spawn_file_actions_t fa; 1136 #endif 1137 int (*decompress)(const unsigned char *, unsigned char **, 1138 size_t, size_t *, int) = getdecompressor(method); 1139 1140 *newch = CAST(unsigned char *, malloc(bytes_max + 1)); 1141 if (*newch == NULL) 1142 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 1143 1144 if (decompress) { 1145 if (nofork) { 1146 return makeerror(newch, n, 1147 "Fork is required to uncompress, but disabled"); 1148 } 1149 return (*decompress)(old, newch, bytes_max, n, 1); 1150 } 1151 1152 (void)fflush(stdout); 1153 (void)fflush(stderr); 1154 1155 for (i = 0; i < __arraycount(fdp); i++) 1156 fdp[i][0] = fdp[i][1] = -1; 1157 1158 /* 1159 * There are multithreaded users who run magic_file() 1160 * from dozens of threads. If two parallel magic_file() calls 1161 * analyze two large compressed files, both will spawn 1162 * an uncompressing child here, which writes out uncompressed data. 1163 * We read some portion, then close the pipe, then waitpid() the child. 1164 * If uncompressed data is larger, child should get EPIPE and exit. 1165 * However, with *parallel* calls OTHER child may unintentionally 1166 * inherit pipe fds, thus keeping pipe open and making writes in 1167 * our child block instead of failing with EPIPE! 1168 * (For the bug to occur, two threads must mutually inherit their pipes, 1169 * and both must have large outputs. Thus it happens not that often). 1170 * To avoid this, be sure to create pipes with O_CLOEXEC. 1171 */ 1172 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) || 1173 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 || 1174 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) { 1175 closep(fdp[STDIN_FILENO]); 1176 closep(fdp[STDOUT_FILENO]); 1177 return makeerror(newch, n, "Cannot create pipe, %s", 1178 strerror(errno)); 1179 } 1180 1181 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv)); 1182 #ifdef HAVE_POSIX_SPAWNP 1183 posix_spawn_file_actions_init(&fa); 1184 1185 handledesc(&fa, fd, fdp); 1186 1187 DPRINTF("Executing %s\n", compr[method].argv[0]); 1188 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL, 1189 args, NULL); 1190 1191 posix_spawn_file_actions_destroy(&fa); 1192 1193 if (status == -1) { 1194 return makeerror(newch, n, "Cannot posix_spawn `%s', %s", 1195 compr[method].argv[0], strerror(errno)); 1196 } 1197 #else 1198 /* For processes with large mapped virtual sizes, vfork 1199 * may be _much_ faster (10-100 times) than fork. 1200 */ 1201 pid = vfork(); 1202 if (pid == -1) { 1203 return makeerror(newch, n, "Cannot vfork, %s", 1204 strerror(errno)); 1205 } 1206 if (pid == 0) { 1207 /* child */ 1208 /* Note: we are after vfork, do not modify memory 1209 * in a way which confuses parent. In particular, 1210 * do not modify fdp[i][j]. 1211 */ 1212 handledesc(NULL, fd, fdp); 1213 DPRINTF("Executing %s\n", compr[method].argv[0]); 1214 1215 (void)execvp(compr[method].argv[0], args); 1216 dprintf(STDERR_FILENO, "exec `%s' failed, %s", 1217 compr[method].argv[0], strerror(errno)); 1218 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */ 1219 } 1220 #endif 1221 /* parent */ 1222 /* Close write sides of child stdout/err pipes */ 1223 for (i = 1; i < __arraycount(fdp); i++) 1224 closefd(fdp[i], 1); 1225 /* Write the buffer data to child stdin, if we don't have fd */ 1226 if (fd == -1) { 1227 closefd(fdp[STDIN_FILENO], 0); 1228 writepid = writechild(fdp[STDIN_FILENO][1], old, *n); 1229 if (writepid == (pid_t)-1) { 1230 rv = makeerror(newch, n, "Write to child failed, %s", 1231 strerror(errno)); 1232 DPRINTF("Write to child failed\n"); 1233 goto err; 1234 } 1235 closefd(fdp[STDIN_FILENO], 1); 1236 } 1237 1238 rv = OKDATA; 1239 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); 1240 DPRINTF("read got %zd\n", r); 1241 if (r < 0) { 1242 rv = ERRDATA; 1243 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], 1244 strerror(errno)); 1245 goto err; 1246 } 1247 if (CAST(size_t, r) == bytes_max) { 1248 /* 1249 * close fd so that the child exits with sigpipe and ignore 1250 * errors, otherwise we risk the child blocking and never 1251 * exiting. 1252 */ 1253 DPRINTF("Closing stdout for bytes_max\n"); 1254 closefd(fdp[STDOUT_FILENO], 0); 1255 goto ok; 1256 } 1257 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) { 1258 DPRINTF("Got stuff from stderr %s\n", *newch); 1259 rv = ERRDATA; 1260 r = filter_error(*newch, r); 1261 goto ok; 1262 } 1263 if (re == 0) 1264 goto ok; 1265 rv = makeerror(newch, n, "Read stderr failed, %s", 1266 strerror(errno)); 1267 goto err; 1268 ok: 1269 *n = r; 1270 /* NUL terminate, as every buffer is handled here. */ 1271 (*newch)[*n] = '\0'; 1272 err: 1273 closefd(fdp[STDIN_FILENO], 1); 1274 closefd(fdp[STDOUT_FILENO], 0); 1275 closefd(fdp[STDERR_FILENO], 0); 1276 1277 w = waitpid(pid, &status, 0); 1278 wait_err: 1279 if (w == -1) { 1280 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); 1281 DPRINTF("Child wait return %#x\n", status); 1282 } else if (!WIFEXITED(status)) { 1283 DPRINTF("Child not exited (%#x)\n", status); 1284 } else if (WEXITSTATUS(status) != 0) { 1285 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); 1286 } 1287 if (writepid > 0) { 1288 /* _After_ we know decompressor has exited, our input writer 1289 * definitely will exit now (at worst, writing fails in it, 1290 * since output fd is closed now on the reading size). 1291 */ 1292 w = waitpid(writepid, &status, 0); 1293 writepid = -1; 1294 goto wait_err; 1295 } 1296 1297 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here! 1298 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv); 1299 1300 return rv; 1301 } 1302 #endif 1303