1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * compress routines: 30 * zmagic() - returns 0 if not recognized, uncompresses and prints 31 * information if recognized 32 * uncompress(method, old, n, newch) - uncompress old into new, 33 * using method, return sizeof new 34 */ 35 #include "file.h" 36 37 #ifndef lint 38 FILE_RCSID("@(#)$File: compress.c,v 1.136 2022/09/13 16:08:34 christos Exp $") 39 #endif 40 41 #include "magic.h" 42 #include <stdlib.h> 43 #ifdef HAVE_UNISTD_H 44 #include <unistd.h> 45 #endif 46 #ifdef HAVE_SPAWN_H 47 #include <spawn.h> 48 #endif 49 #include <string.h> 50 #include <errno.h> 51 #include <ctype.h> 52 #include <stdarg.h> 53 #include <signal.h> 54 #ifndef HAVE_SIG_T 55 typedef void (*sig_t)(int); 56 #endif /* HAVE_SIG_T */ 57 #ifdef HAVE_SYS_IOCTL_H 58 #include <sys/ioctl.h> 59 #endif 60 #ifdef HAVE_SYS_WAIT_H 61 #include <sys/wait.h> 62 #endif 63 #if defined(HAVE_SYS_TIME_H) 64 #include <sys/time.h> 65 #endif 66 67 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) 68 #define BUILTIN_DECOMPRESS 69 #include <zlib.h> 70 #endif 71 72 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT) 73 #define BUILTIN_BZLIB 74 #include <bzlib.h> 75 #endif 76 77 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT) 78 #define BUILTIN_XZLIB 79 #include <lzma.h> 80 #endif 81 82 #ifdef DEBUG 83 int tty = -1; 84 #define DPRINTF(...) do { \ 85 if (tty == -1) \ 86 tty = open("/dev/tty", O_RDWR); \ 87 if (tty == -1) \ 88 abort(); \ 89 dprintf(tty, __VA_ARGS__); \ 90 } while (/*CONSTCOND*/0) 91 #else 92 #define DPRINTF(...) 93 #endif 94 95 #ifdef ZLIBSUPPORT 96 /* 97 * The following python code is not really used because ZLIBSUPPORT is only 98 * defined if we have a built-in zlib, and the built-in zlib handles that. 99 * That is not true for android where we have zlib.h and not -lz. 100 */ 101 static const char zlibcode[] = 102 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; 103 104 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL }; 105 106 static int 107 zlibcmp(const unsigned char *buf) 108 { 109 unsigned short x = 1; 110 unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); 111 112 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) 113 return 0; 114 if (s[0] != 1) /* endianness test */ 115 x = buf[0] | (buf[1] << 8); 116 else 117 x = buf[1] | (buf[0] << 8); 118 if (x % 31) 119 return 0; 120 return 1; 121 } 122 #endif 123 124 static int 125 lzmacmp(const unsigned char *buf) 126 { 127 if (buf[0] != 0x5d || buf[1] || buf[2]) 128 return 0; 129 if (buf[12] && buf[12] != 0xff) 130 return 0; 131 return 1; 132 } 133 134 #define gzip_flags "-cd" 135 #define lrzip_flags "-do" 136 #define lzip_flags gzip_flags 137 138 static const char *gzip_args[] = { 139 "gzip", gzip_flags, NULL 140 }; 141 static const char *uncompress_args[] = { 142 "uncompress", "-c", NULL 143 }; 144 static const char *bzip2_args[] = { 145 "bzip2", "-cd", NULL 146 }; 147 static const char *lzip_args[] = { 148 "lzip", lzip_flags, NULL 149 }; 150 static const char *xz_args[] = { 151 "xz", "-cd", NULL 152 }; 153 static const char *lrzip_args[] = { 154 "lrzip", lrzip_flags, NULL 155 }; 156 static const char *lz4_args[] = { 157 "lz4", "-cd", NULL 158 }; 159 static const char *zstd_args[] = { 160 "zstd", "-cd", NULL 161 }; 162 163 #define do_zlib NULL 164 #define do_bzlib NULL 165 166 private const struct { 167 union { 168 const char *magic; 169 int (*func)(const unsigned char *); 170 } u; 171 int maglen; 172 const char **argv; 173 void *unused; 174 } compr[] = { 175 #define METH_FROZEN 2 176 #define METH_BZIP 7 177 #define METH_XZ 9 178 #define METH_LZMA 13 179 #define METH_ZLIB 14 180 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ 181 /* Uncompress can get stuck; so use gzip first if we have it 182 * Idea from Damien Clark, thanks! */ 183 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ 184 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ 185 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ 186 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ 187 /* the standard pack utilities do not accept standard input */ 188 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ 189 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ 190 /* ...only first file examined */ 191 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ 192 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ 193 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ 194 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ 195 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ 196 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ 197 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ 198 #ifdef ZLIBSUPPORT 199 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ 200 #endif 201 }; 202 203 #define OKDATA 0 204 #define NODATA 1 205 #define ERRDATA 2 206 207 private ssize_t swrite(int, const void *, size_t); 208 #if HAVE_FORK 209 private size_t ncompr = __arraycount(compr); 210 private int uncompressbuf(int, size_t, size_t, const unsigned char *, 211 unsigned char **, size_t *); 212 #ifdef BUILTIN_DECOMPRESS 213 private int uncompresszlib(const unsigned char *, unsigned char **, size_t, 214 size_t *, int); 215 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, 216 size_t *); 217 #endif 218 #ifdef BUILTIN_BZLIB 219 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, 220 size_t *); 221 #endif 222 #ifdef BUILTIN_XZLIB 223 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, 224 size_t *); 225 #endif 226 227 static int makeerror(unsigned char **, size_t *, const char *, ...) 228 __attribute__((__format__(__printf__, 3, 4))); 229 private const char *methodname(size_t); 230 231 private int 232 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf) 233 { 234 unsigned char *p; 235 int mime = ms->flags & MAGIC_MIME; 236 237 if (!mime) 238 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf); 239 240 for (p = buf; *p; p++) 241 if (!isalnum(*p)) 242 *p = '-'; 243 244 return file_printf(ms, "application/x-decompression-error-%s-%s", 245 methodname(i), buf); 246 } 247 248 protected int 249 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) 250 { 251 unsigned char *newbuf = NULL; 252 size_t i, nsz; 253 char *rbuf; 254 file_pushbuf_t *pb; 255 int urv, prv, rv = 0; 256 int mime = ms->flags & MAGIC_MIME; 257 int fd = b->fd; 258 const unsigned char *buf = CAST(const unsigned char *, b->fbuf); 259 size_t nbytes = b->flen; 260 int sa_saved = 0; 261 struct sigaction sig_act; 262 263 if ((ms->flags & MAGIC_COMPRESS) == 0) 264 return 0; 265 266 for (i = 0; i < ncompr; i++) { 267 int zm; 268 if (nbytes < CAST(size_t, abs(compr[i].maglen))) 269 continue; 270 if (compr[i].maglen < 0) { 271 zm = (*compr[i].u.func)(buf); 272 } else { 273 zm = memcmp(buf, compr[i].u.magic, 274 CAST(size_t, compr[i].maglen)) == 0; 275 } 276 277 if (!zm) 278 continue; 279 280 /* Prevent SIGPIPE death if child dies unexpectedly */ 281 if (!sa_saved) { 282 //We can use sig_act for both new and old, but 283 struct sigaction new_act; 284 memset(&new_act, 0, sizeof(new_act)); 285 new_act.sa_handler = SIG_IGN; 286 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1; 287 } 288 289 nsz = nbytes; 290 urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz); 291 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv, 292 (char *)newbuf, nsz); 293 switch (urv) { 294 case OKDATA: 295 case ERRDATA: 296 ms->flags &= ~MAGIC_COMPRESS; 297 if (urv == ERRDATA) 298 prv = format_decompression_error(ms, i, newbuf); 299 else 300 prv = file_buffer(ms, -1, NULL, name, newbuf, nsz); 301 if (prv == -1) 302 goto error; 303 rv = 1; 304 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0) 305 goto out; 306 if (mime != MAGIC_MIME && mime != 0) 307 goto out; 308 if ((file_printf(ms, 309 mime ? " compressed-encoding=" : " (")) == -1) 310 goto error; 311 if ((pb = file_push_buffer(ms)) == NULL) 312 goto error; 313 /* 314 * XXX: If file_buffer fails here, we overwrite 315 * the compressed text. FIXME. 316 */ 317 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) { 318 if (file_pop_buffer(ms, pb) != NULL) 319 abort(); 320 goto error; 321 } 322 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) { 323 if (file_printf(ms, "%s", rbuf) == -1) { 324 free(rbuf); 325 goto error; 326 } 327 free(rbuf); 328 } 329 if (!mime && file_printf(ms, ")") == -1) 330 goto error; 331 /*FALLTHROUGH*/ 332 case NODATA: 333 break; 334 default: 335 abort(); 336 /*NOTREACHED*/ 337 error: 338 rv = -1; 339 break; 340 } 341 } 342 out: 343 DPRINTF("rv = %d\n", rv); 344 345 if (sa_saved && sig_act.sa_handler != SIG_IGN) 346 (void)sigaction(SIGPIPE, &sig_act, NULL); 347 348 free(newbuf); 349 ms->flags |= MAGIC_COMPRESS; 350 DPRINTF("Zmagic returns %d\n", rv); 351 return rv; 352 } 353 #endif 354 /* 355 * `safe' write for sockets and pipes. 356 */ 357 private ssize_t 358 swrite(int fd, const void *buf, size_t n) 359 { 360 ssize_t rv; 361 size_t rn = n; 362 363 do 364 switch (rv = write(fd, buf, n)) { 365 case -1: 366 if (errno == EINTR) 367 continue; 368 return -1; 369 default: 370 n -= rv; 371 buf = CAST(const char *, buf) + rv; 372 break; 373 } 374 while (n > 0); 375 return rn; 376 } 377 378 379 /* 380 * `safe' read for sockets and pipes. 381 */ 382 protected ssize_t 383 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) 384 { 385 ssize_t rv; 386 #ifdef FIONREAD 387 int t = 0; 388 #endif 389 size_t rn = n; 390 391 if (fd == STDIN_FILENO) 392 goto nocheck; 393 394 #ifdef FIONREAD 395 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { 396 #ifdef FD_ZERO 397 ssize_t cnt; 398 for (cnt = 0;; cnt++) { 399 fd_set check; 400 struct timeval tout = {0, 100 * 1000}; 401 int selrv; 402 403 FD_ZERO(&check); 404 FD_SET(fd, &check); 405 406 /* 407 * Avoid soft deadlock: do not read if there 408 * is nothing to read from sockets and pipes. 409 */ 410 selrv = select(fd + 1, &check, NULL, NULL, &tout); 411 if (selrv == -1) { 412 if (errno == EINTR || errno == EAGAIN) 413 continue; 414 } else if (selrv == 0 && cnt >= 5) { 415 return 0; 416 } else 417 break; 418 } 419 #endif 420 (void)ioctl(fd, FIONREAD, &t); 421 } 422 423 if (t > 0 && CAST(size_t, t) < n) { 424 n = t; 425 rn = n; 426 } 427 #endif 428 429 nocheck: 430 do 431 switch ((rv = read(fd, buf, n))) { 432 case -1: 433 if (errno == EINTR) 434 continue; 435 return -1; 436 case 0: 437 return rn - n; 438 default: 439 n -= rv; 440 buf = CAST(char *, CCAST(void *, buf)) + rv; 441 break; 442 } 443 while (n > 0); 444 return rn; 445 } 446 447 protected int 448 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, 449 size_t nbytes) 450 { 451 char buf[4096]; 452 ssize_t r; 453 int tfd; 454 455 #ifdef WIN32 456 const char *t; 457 buf[0] = '\0'; 458 if ((t = getenv("TEMP")) != NULL) 459 (void)strlcpy(buf, t, sizeof(buf)); 460 else if ((t = getenv("TMP")) != NULL) 461 (void)strlcpy(buf, t, sizeof(buf)); 462 else if ((t = getenv("TMPDIR")) != NULL) 463 (void)strlcpy(buf, t, sizeof(buf)); 464 if (buf[0] != '\0') 465 (void)strlcat(buf, "/", sizeof(buf)); 466 (void)strlcat(buf, "file.XXXXXX", sizeof(buf)); 467 #else 468 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf)); 469 #endif 470 #ifndef HAVE_MKSTEMP 471 { 472 char *ptr = mktemp(buf); 473 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); 474 r = errno; 475 (void)unlink(ptr); 476 errno = r; 477 } 478 #else 479 { 480 int te; 481 mode_t ou = umask(0); 482 tfd = mkstemp(buf); 483 (void)umask(ou); 484 te = errno; 485 (void)unlink(buf); 486 errno = te; 487 } 488 #endif 489 if (tfd == -1) { 490 file_error(ms, errno, 491 "cannot create temporary file for pipe copy"); 492 return -1; 493 } 494 495 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes)) 496 r = 1; 497 else { 498 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) 499 if (swrite(tfd, buf, CAST(size_t, r)) != r) 500 break; 501 } 502 503 switch (r) { 504 case -1: 505 file_error(ms, errno, "error copying from pipe to temp file"); 506 return -1; 507 case 0: 508 break; 509 default: 510 file_error(ms, errno, "error while writing to temp file"); 511 return -1; 512 } 513 514 /* 515 * We duplicate the file descriptor, because fclose on a 516 * tmpfile will delete the file, but any open descriptors 517 * can still access the phantom inode. 518 */ 519 if ((fd = dup2(tfd, fd)) == -1) { 520 file_error(ms, errno, "could not dup descriptor for temp file"); 521 return -1; 522 } 523 (void)close(tfd); 524 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) { 525 file_badseek(ms); 526 return -1; 527 } 528 return fd; 529 } 530 #if HAVE_FORK 531 #ifdef BUILTIN_DECOMPRESS 532 533 #define FHCRC (1 << 1) 534 #define FEXTRA (1 << 2) 535 #define FNAME (1 << 3) 536 #define FCOMMENT (1 << 4) 537 538 539 private int 540 uncompressgzipped(const unsigned char *old, unsigned char **newch, 541 size_t bytes_max, size_t *n) 542 { 543 unsigned char flg = old[3]; 544 size_t data_start = 10; 545 546 if (flg & FEXTRA) { 547 if (data_start + 1 >= *n) 548 goto err; 549 data_start += 2 + old[data_start] + old[data_start + 1] * 256; 550 } 551 if (flg & FNAME) { 552 while(data_start < *n && old[data_start]) 553 data_start++; 554 data_start++; 555 } 556 if (flg & FCOMMENT) { 557 while(data_start < *n && old[data_start]) 558 data_start++; 559 data_start++; 560 } 561 if (flg & FHCRC) 562 data_start += 2; 563 564 if (data_start >= *n) 565 goto err; 566 567 *n -= data_start; 568 old += data_start; 569 return uncompresszlib(old, newch, bytes_max, n, 0); 570 err: 571 return makeerror(newch, n, "File too short"); 572 } 573 574 private int 575 uncompresszlib(const unsigned char *old, unsigned char **newch, 576 size_t bytes_max, size_t *n, int zlib) 577 { 578 int rc; 579 z_stream z; 580 581 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 582 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 583 584 z.next_in = CCAST(Bytef *, old); 585 z.avail_in = CAST(uint32_t, *n); 586 z.next_out = *newch; 587 z.avail_out = CAST(unsigned int, bytes_max); 588 z.zalloc = Z_NULL; 589 z.zfree = Z_NULL; 590 z.opaque = Z_NULL; 591 592 /* LINTED bug in header macro */ 593 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15); 594 if (rc != Z_OK) 595 goto err; 596 597 rc = inflate(&z, Z_SYNC_FLUSH); 598 if (rc != Z_OK && rc != Z_STREAM_END) 599 goto err; 600 601 *n = CAST(size_t, z.total_out); 602 rc = inflateEnd(&z); 603 if (rc != Z_OK) 604 goto err; 605 606 /* let's keep the nul-terminate tradition */ 607 (*newch)[*n] = '\0'; 608 609 return OKDATA; 610 err: 611 strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max); 612 *n = strlen(RCAST(char *, *newch)); 613 return ERRDATA; 614 } 615 #endif 616 617 #ifdef BUILTIN_BZLIB 618 private int 619 uncompressbzlib(const unsigned char *old, unsigned char **newch, 620 size_t bytes_max, size_t *n) 621 { 622 int rc; 623 bz_stream bz; 624 625 memset(&bz, 0, sizeof(bz)); 626 rc = BZ2_bzDecompressInit(&bz, 0, 0); 627 if (rc != BZ_OK) 628 goto err; 629 630 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 631 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 632 633 bz.next_in = CCAST(char *, RCAST(const char *, old)); 634 bz.avail_in = CAST(uint32_t, *n); 635 bz.next_out = RCAST(char *, *newch); 636 bz.avail_out = CAST(unsigned int, bytes_max); 637 638 rc = BZ2_bzDecompress(&bz); 639 if (rc != BZ_OK && rc != BZ_STREAM_END) 640 goto err; 641 642 /* Assume byte_max is within 32bit */ 643 /* assert(bz.total_out_hi32 == 0); */ 644 *n = CAST(size_t, bz.total_out_lo32); 645 rc = BZ2_bzDecompressEnd(&bz); 646 if (rc != BZ_OK) 647 goto err; 648 649 /* let's keep the nul-terminate tradition */ 650 (*newch)[*n] = '\0'; 651 652 return OKDATA; 653 err: 654 snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc); 655 *n = strlen(RCAST(char *, *newch)); 656 return ERRDATA; 657 } 658 #endif 659 660 #ifdef BUILTIN_XZLIB 661 private int 662 uncompressxzlib(const unsigned char *old, unsigned char **newch, 663 size_t bytes_max, size_t *n) 664 { 665 int rc; 666 lzma_stream xz; 667 668 memset(&xz, 0, sizeof(xz)); 669 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); 670 if (rc != LZMA_OK) 671 goto err; 672 673 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 674 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 675 676 xz.next_in = CCAST(const uint8_t *, old); 677 xz.avail_in = CAST(uint32_t, *n); 678 xz.next_out = RCAST(uint8_t *, *newch); 679 xz.avail_out = CAST(unsigned int, bytes_max); 680 681 rc = lzma_code(&xz, LZMA_RUN); 682 if (rc != LZMA_OK && rc != LZMA_STREAM_END) 683 goto err; 684 685 *n = CAST(size_t, xz.total_out); 686 687 lzma_end(&xz); 688 689 /* let's keep the nul-terminate tradition */ 690 (*newch)[*n] = '\0'; 691 692 return OKDATA; 693 err: 694 snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc); 695 *n = strlen(RCAST(char *, *newch)); 696 return ERRDATA; 697 } 698 #endif 699 700 701 static int 702 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) 703 { 704 char *msg; 705 va_list ap; 706 int rv; 707 708 va_start(ap, fmt); 709 rv = vasprintf(&msg, fmt, ap); 710 va_end(ap); 711 if (rv < 0) { 712 *buf = NULL; 713 *len = 0; 714 return NODATA; 715 } 716 *buf = RCAST(unsigned char *, msg); 717 *len = strlen(msg); 718 return ERRDATA; 719 } 720 721 static void 722 closefd(int *fd, size_t i) 723 { 724 if (fd[i] == -1) 725 return; 726 (void) close(fd[i]); 727 fd[i] = -1; 728 } 729 730 static void 731 closep(int *fd) 732 { 733 size_t i; 734 for (i = 0; i < 2; i++) 735 closefd(fd, i); 736 } 737 738 static void 739 movedesc(void *v, int i, int fd) 740 { 741 if (fd == i) 742 return; /* "no dup was necessary" */ 743 #ifdef HAVE_POSIX_SPAWNP 744 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 745 posix_spawn_file_actions_adddup2(fa, fd, i); 746 posix_spawn_file_actions_addclose(fa, fd); 747 #else 748 if (dup2(fd, i) == -1) { 749 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno)); 750 exit(1); 751 } 752 close(v ? fd : fd); 753 #endif 754 } 755 756 static void 757 closedesc(void *v, int fd) 758 { 759 #ifdef HAVE_POSIX_SPAWNP 760 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 761 posix_spawn_file_actions_addclose(fa, fd); 762 #else 763 close(v ? fd : fd); 764 #endif 765 } 766 767 static void 768 handledesc(void *v, int fd, int fdp[3][2]) 769 { 770 if (fd != -1) { 771 (void) lseek(fd, CAST(off_t, 0), SEEK_SET); 772 movedesc(v, STDIN_FILENO, fd); 773 } else { 774 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]); 775 if (fdp[STDIN_FILENO][1] > 2) 776 closedesc(v, fdp[STDIN_FILENO][1]); 777 } 778 779 file_clear_closexec(STDIN_FILENO); 780 781 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly 782 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]); 783 if (fdp[STDOUT_FILENO][0] > 2) 784 closedesc(v, fdp[STDOUT_FILENO][0]); 785 786 file_clear_closexec(STDOUT_FILENO); 787 788 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]); 789 if (fdp[STDERR_FILENO][0] > 2) 790 closedesc(v, fdp[STDERR_FILENO][0]); 791 792 file_clear_closexec(STDERR_FILENO); 793 } 794 795 static pid_t 796 writechild(int fd, const void *old, size_t n) 797 { 798 pid_t pid; 799 800 /* 801 * fork again, to avoid blocking because both 802 * pipes filled 803 */ 804 pid = fork(); 805 if (pid == -1) { 806 DPRINTF("Fork failed (%s)\n", strerror(errno)); 807 exit(1); 808 } 809 if (pid == 0) { 810 /* child */ 811 if (swrite(fd, old, n) != CAST(ssize_t, n)) { 812 DPRINTF("Write failed (%s)\n", strerror(errno)); 813 exit(1); 814 } 815 exit(0); 816 } 817 /* parent */ 818 return pid; 819 } 820 821 static ssize_t 822 filter_error(unsigned char *ubuf, ssize_t n) 823 { 824 char *p; 825 char *buf; 826 827 ubuf[n] = '\0'; 828 buf = RCAST(char *, ubuf); 829 while (isspace(CAST(unsigned char, *buf))) 830 buf++; 831 DPRINTF("Filter error[[[%s]]]\n", buf); 832 if ((p = strchr(CAST(char *, buf), '\n')) != NULL) 833 *p = '\0'; 834 if ((p = strchr(CAST(char *, buf), ';')) != NULL) 835 *p = '\0'; 836 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) { 837 ++p; 838 while (isspace(CAST(unsigned char, *p))) 839 p++; 840 n = strlen(p); 841 memmove(ubuf, p, CAST(size_t, n + 1)); 842 } 843 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); 844 if (islower(*ubuf)) 845 *ubuf = toupper(*ubuf); 846 return n; 847 } 848 849 private const char * 850 methodname(size_t method) 851 { 852 switch (method) { 853 #ifdef BUILTIN_DECOMPRESS 854 case METH_FROZEN: 855 case METH_ZLIB: 856 return "zlib"; 857 #endif 858 #ifdef BUILTIN_BZLIB 859 case METH_BZIP: 860 return "bzlib"; 861 #endif 862 #ifdef BUILTIN_XZLIB 863 case METH_XZ: 864 case METH_LZMA: 865 return "xzlib"; 866 #endif 867 default: 868 return compr[method].argv[0]; 869 } 870 } 871 872 private int 873 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, 874 unsigned char **newch, size_t* n) 875 { 876 int fdp[3][2]; 877 int status, rv, w; 878 pid_t pid; 879 pid_t writepid = -1; 880 size_t i; 881 ssize_t r; 882 char *const *args; 883 #ifdef HAVE_POSIX_SPAWNP 884 posix_spawn_file_actions_t fa; 885 #endif 886 887 switch (method) { 888 #ifdef BUILTIN_DECOMPRESS 889 case METH_FROZEN: 890 return uncompressgzipped(old, newch, bytes_max, n); 891 case METH_ZLIB: 892 return uncompresszlib(old, newch, bytes_max, n, 1); 893 #endif 894 #ifdef BUILTIN_BZLIB 895 case METH_BZIP: 896 return uncompressbzlib(old, newch, bytes_max, n); 897 #endif 898 #ifdef BUILTIN_XZLIB 899 case METH_XZ: 900 case METH_LZMA: 901 return uncompressxzlib(old, newch, bytes_max, n); 902 #endif 903 default: 904 break; 905 } 906 907 (void)fflush(stdout); 908 (void)fflush(stderr); 909 910 for (i = 0; i < __arraycount(fdp); i++) 911 fdp[i][0] = fdp[i][1] = -1; 912 913 /* 914 * There are multithreaded users who run magic_file() 915 * from dozens of threads. If two parallel magic_file() calls 916 * analyze two large compressed files, both will spawn 917 * an uncompressing child here, which writes out uncompressed data. 918 * We read some portion, then close the pipe, then waitpid() the child. 919 * If uncompressed data is larger, child shound get EPIPE and exit. 920 * However, with *parallel* calls OTHER child may unintentionally 921 * inherit pipe fds, thus keeping pipe open and making writes in 922 * our child block instead of failing with EPIPE! 923 * (For the bug to occur, two threads must mutually inherit their pipes, 924 * and both must have large outputs. Thus it happens not that often). 925 * To avoid this, be sure to create pipes with O_CLOEXEC. 926 */ 927 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) || 928 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 || 929 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) { 930 closep(fdp[STDIN_FILENO]); 931 closep(fdp[STDOUT_FILENO]); 932 return makeerror(newch, n, "Cannot create pipe, %s", 933 strerror(errno)); 934 } 935 936 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv)); 937 #ifdef HAVE_POSIX_SPAWNP 938 posix_spawn_file_actions_init(&fa); 939 940 handledesc(&fa, fd, fdp); 941 942 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL, 943 args, NULL); 944 945 posix_spawn_file_actions_destroy(&fa); 946 947 if (status == -1) { 948 return makeerror(newch, n, "Cannot posix_spawn `%s', %s", 949 compr[method].argv[0], strerror(errno)); 950 } 951 #else 952 /* For processes with large mapped virtual sizes, vfork 953 * may be _much_ faster (10-100 times) than fork. 954 */ 955 pid = vfork(); 956 if (pid == -1) { 957 return makeerror(newch, n, "Cannot vfork, %s", 958 strerror(errno)); 959 } 960 if (pid == 0) { 961 /* child */ 962 /* Note: we are after vfork, do not modify memory 963 * in a way which confuses parent. In particular, 964 * do not modify fdp[i][j]. 965 */ 966 handledesc(NULL, fd, fdp); 967 968 (void)execvp(compr[method].argv[0], args); 969 dprintf(STDERR_FILENO, "exec `%s' failed, %s", 970 compr[method].argv[0], strerror(errno)); 971 _exit(1); /* _exit(), not exit(), because of vfork */ 972 } 973 #endif 974 /* parent */ 975 /* Close write sides of child stdout/err pipes */ 976 for (i = 1; i < __arraycount(fdp); i++) 977 closefd(fdp[i], 1); 978 /* Write the buffer data to child stdin, if we don't have fd */ 979 if (fd == -1) { 980 closefd(fdp[STDIN_FILENO], 0); 981 writepid = writechild(fdp[STDIN_FILENO][1], old, *n); 982 closefd(fdp[STDIN_FILENO], 1); 983 } 984 985 *newch = CAST(unsigned char *, malloc(bytes_max + 1)); 986 if (*newch == NULL) { 987 rv = makeerror(newch, n, "No buffer, %s", 988 strerror(errno)); 989 goto err; 990 } 991 rv = OKDATA; 992 errno = 0; 993 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); 994 if (r == 0 && errno == 0) 995 goto ok; 996 if (r <= 0) { 997 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], 998 r != -1 ? strerror(errno) : "no data"); 999 1000 rv = ERRDATA; 1001 if (r == 0 && 1002 (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) 1003 { 1004 r = filter_error(*newch, r); 1005 goto ok; 1006 } 1007 free(*newch); 1008 if (r == 0) 1009 rv = makeerror(newch, n, "Read failed, %s", 1010 strerror(errno)); 1011 else 1012 rv = makeerror(newch, n, "No data"); 1013 goto err; 1014 } 1015 ok: 1016 *n = r; 1017 /* NUL terminate, as every buffer is handled here. */ 1018 (*newch)[*n] = '\0'; 1019 err: 1020 closefd(fdp[STDIN_FILENO], 1); 1021 closefd(fdp[STDOUT_FILENO], 0); 1022 closefd(fdp[STDERR_FILENO], 0); 1023 1024 w = waitpid(pid, &status, 0); 1025 wait_err: 1026 if (w == -1) { 1027 free(*newch); 1028 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); 1029 DPRINTF("Child wait return %#x\n", status); 1030 } else if (!WIFEXITED(status)) { 1031 DPRINTF("Child not exited (%#x)\n", status); 1032 } else if (WEXITSTATUS(status) != 0) { 1033 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); 1034 } 1035 if (writepid > 0) { 1036 /* _After_ we know decompressor has exited, our input writer 1037 * definitely will exit now (at worst, writing fails in it, 1038 * since output fd is closed now on the reading size). 1039 */ 1040 w = waitpid(writepid, &status, 0); 1041 writepid = -1; 1042 goto wait_err; 1043 } 1044 1045 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here! 1046 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv); 1047 1048 return rv; 1049 } 1050 #endif 1051