1 /* 2 * Copyright (c) Ian F. Darwin 1986-1995. 3 * Software written by Ian F. Darwin and others; 4 * maintained 1995-present by Christos Zoulas and others. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice immediately at the beginning of the file, without modification, 11 * this list of conditions, and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * compress routines: 30 * zmagic() - returns 0 if not recognized, uncompresses and prints 31 * information if recognized 32 * uncompress(method, old, n, newch) - uncompress old into new, 33 * using method, return sizeof new 34 */ 35 #include "file.h" 36 37 #ifndef lint 38 FILE_RCSID("@(#)$File: compress.c,v 1.129 2020/12/08 21:26:00 christos Exp $") 39 #endif 40 41 #include "magic.h" 42 #include <stdlib.h> 43 #ifdef HAVE_UNISTD_H 44 #include <unistd.h> 45 #endif 46 #include <string.h> 47 #include <errno.h> 48 #include <ctype.h> 49 #include <stdarg.h> 50 #include <signal.h> 51 #ifndef HAVE_SIG_T 52 typedef void (*sig_t)(int); 53 #endif /* HAVE_SIG_T */ 54 #if !defined(__MINGW32__) && !defined(WIN32) && !defined(__MINGW64__) 55 #include <sys/ioctl.h> 56 #endif 57 #ifdef HAVE_SYS_WAIT_H 58 #include <sys/wait.h> 59 #endif 60 #if defined(HAVE_SYS_TIME_H) 61 #include <sys/time.h> 62 #endif 63 64 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) 65 #define BUILTIN_DECOMPRESS 66 #include <zlib.h> 67 #endif 68 69 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT) 70 #define BUILTIN_BZLIB 71 #include <bzlib.h> 72 #endif 73 74 #if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT) 75 #define BUILTIN_XZLIB 76 #include <lzma.h> 77 #endif 78 79 #ifdef DEBUG 80 int tty = -1; 81 #define DPRINTF(...) do { \ 82 if (tty == -1) \ 83 tty = open("/dev/tty", O_RDWR); \ 84 if (tty == -1) \ 85 abort(); \ 86 dprintf(tty, __VA_ARGS__); \ 87 } while (/*CONSTCOND*/0) 88 #else 89 #define DPRINTF(...) 90 #endif 91 92 #ifdef ZLIBSUPPORT 93 /* 94 * The following python code is not really used because ZLIBSUPPORT is only 95 * defined if we have a built-in zlib, and the built-in zlib handles that. 96 * That is not true for android where we have zlib.h and not -lz. 97 */ 98 static const char zlibcode[] = 99 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; 100 101 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL }; 102 103 static int 104 zlibcmp(const unsigned char *buf) 105 { 106 unsigned short x = 1; 107 unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); 108 109 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) 110 return 0; 111 if (s[0] != 1) /* endianness test */ 112 x = buf[0] | (buf[1] << 8); 113 else 114 x = buf[1] | (buf[0] << 8); 115 if (x % 31) 116 return 0; 117 return 1; 118 } 119 #endif 120 121 static int 122 lzmacmp(const unsigned char *buf) 123 { 124 if (buf[0] != 0x5d || buf[1] || buf[2]) 125 return 0; 126 if (buf[12] && buf[12] != 0xff) 127 return 0; 128 return 1; 129 } 130 131 #define gzip_flags "-cd" 132 #define lrzip_flags "-do" 133 #define lzip_flags gzip_flags 134 135 static const char *gzip_args[] = { 136 "gzip", gzip_flags, NULL 137 }; 138 static const char *uncompress_args[] = { 139 "uncompress", "-c", NULL 140 }; 141 static const char *bzip2_args[] = { 142 "bzip2", "-cd", NULL 143 }; 144 static const char *lzip_args[] = { 145 "lzip", lzip_flags, NULL 146 }; 147 static const char *xz_args[] = { 148 "xz", "-cd", NULL 149 }; 150 static const char *lrzip_args[] = { 151 "lrzip", lrzip_flags, NULL 152 }; 153 static const char *lz4_args[] = { 154 "lz4", "-cd", NULL 155 }; 156 static const char *zstd_args[] = { 157 "zstd", "-cd", NULL 158 }; 159 160 #define do_zlib NULL 161 #define do_bzlib NULL 162 163 private const struct { 164 union { 165 const char *magic; 166 int (*func)(const unsigned char *); 167 } u; 168 int maglen; 169 const char **argv; 170 void *unused; 171 } compr[] = { 172 #define METH_FROZEN 2 173 #define METH_BZIP 7 174 #define METH_XZ 9 175 #define METH_LZMA 13 176 #define METH_ZLIB 14 177 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ 178 /* Uncompress can get stuck; so use gzip first if we have it 179 * Idea from Damien Clark, thanks! */ 180 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ 181 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ 182 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ 183 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ 184 /* the standard pack utilities do not accept standard input */ 185 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ 186 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ 187 /* ...only first file examined */ 188 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ 189 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ 190 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ 191 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ 192 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ 193 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ 194 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ 195 #ifdef ZLIBSUPPORT 196 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ 197 #endif 198 }; 199 200 #define OKDATA 0 201 #define NODATA 1 202 #define ERRDATA 2 203 204 private ssize_t swrite(int, const void *, size_t); 205 #if HAVE_FORK 206 private size_t ncompr = __arraycount(compr); 207 private int uncompressbuf(int, size_t, size_t, const unsigned char *, 208 unsigned char **, size_t *); 209 #ifdef BUILTIN_DECOMPRESS 210 private int uncompresszlib(const unsigned char *, unsigned char **, size_t, 211 size_t *, int); 212 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, 213 size_t *); 214 #endif 215 #ifdef BUILTIN_BZLIB 216 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, 217 size_t *); 218 #endif 219 #ifdef BUILTIN_XZLIB 220 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, 221 size_t *); 222 #endif 223 224 static int makeerror(unsigned char **, size_t *, const char *, ...) 225 __attribute__((__format__(__printf__, 3, 4))); 226 private const char *methodname(size_t); 227 228 private int 229 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf) 230 { 231 unsigned char *p; 232 int mime = ms->flags & MAGIC_MIME; 233 234 if (!mime) 235 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf); 236 237 for (p = buf; *p; p++) 238 if (!isalnum(*p)) 239 *p = '-'; 240 241 return file_printf(ms, "application/x-decompression-error-%s-%s", 242 methodname(i), buf); 243 } 244 245 protected int 246 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) 247 { 248 unsigned char *newbuf = NULL; 249 size_t i, nsz; 250 char *rbuf; 251 file_pushbuf_t *pb; 252 int urv, prv, rv = 0; 253 int mime = ms->flags & MAGIC_MIME; 254 int fd = b->fd; 255 const unsigned char *buf = CAST(const unsigned char *, b->fbuf); 256 size_t nbytes = b->flen; 257 int sa_saved = 0; 258 struct sigaction sig_act; 259 260 if ((ms->flags & MAGIC_COMPRESS) == 0) 261 return 0; 262 263 for (i = 0; i < ncompr; i++) { 264 int zm; 265 if (nbytes < CAST(size_t, abs(compr[i].maglen))) 266 continue; 267 if (compr[i].maglen < 0) { 268 zm = (*compr[i].u.func)(buf); 269 } else { 270 zm = memcmp(buf, compr[i].u.magic, 271 CAST(size_t, compr[i].maglen)) == 0; 272 } 273 274 if (!zm) 275 continue; 276 277 /* Prevent SIGPIPE death if child dies unexpectedly */ 278 if (!sa_saved) { 279 //We can use sig_act for both new and old, but 280 struct sigaction new_act; 281 memset(&new_act, 0, sizeof(new_act)); 282 new_act.sa_handler = SIG_IGN; 283 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1; 284 } 285 286 nsz = nbytes; 287 urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz); 288 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv, 289 (char *)newbuf, nsz); 290 switch (urv) { 291 case OKDATA: 292 case ERRDATA: 293 ms->flags &= ~MAGIC_COMPRESS; 294 if (urv == ERRDATA) 295 prv = format_decompression_error(ms, i, newbuf); 296 else 297 prv = file_buffer(ms, -1, NULL, name, newbuf, nsz); 298 if (prv == -1) 299 goto error; 300 rv = 1; 301 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0) 302 goto out; 303 if (mime != MAGIC_MIME && mime != 0) 304 goto out; 305 if ((file_printf(ms, 306 mime ? " compressed-encoding=" : " (")) == -1) 307 goto error; 308 if ((pb = file_push_buffer(ms)) == NULL) 309 goto error; 310 /* 311 * XXX: If file_buffer fails here, we overwrite 312 * the compressed text. FIXME. 313 */ 314 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) { 315 if (file_pop_buffer(ms, pb) != NULL) 316 abort(); 317 goto error; 318 } 319 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) { 320 if (file_printf(ms, "%s", rbuf) == -1) { 321 free(rbuf); 322 goto error; 323 } 324 free(rbuf); 325 } 326 if (!mime && file_printf(ms, ")") == -1) 327 goto error; 328 /*FALLTHROUGH*/ 329 case NODATA: 330 break; 331 default: 332 abort(); 333 /*NOTREACHED*/ 334 error: 335 rv = -1; 336 break; 337 } 338 } 339 out: 340 DPRINTF("rv = %d\n", rv); 341 342 if (sa_saved && sig_act.sa_handler != SIG_IGN) 343 (void)sigaction(SIGPIPE, &sig_act, NULL); 344 345 free(newbuf); 346 ms->flags |= MAGIC_COMPRESS; 347 DPRINTF("Zmagic returns %d\n", rv); 348 return rv; 349 } 350 #endif 351 /* 352 * `safe' write for sockets and pipes. 353 */ 354 private ssize_t 355 swrite(int fd, const void *buf, size_t n) 356 { 357 ssize_t rv; 358 size_t rn = n; 359 360 do 361 switch (rv = write(fd, buf, n)) { 362 case -1: 363 if (errno == EINTR) 364 continue; 365 return -1; 366 default: 367 n -= rv; 368 buf = CAST(const char *, buf) + rv; 369 break; 370 } 371 while (n > 0); 372 return rn; 373 } 374 375 376 /* 377 * `safe' read for sockets and pipes. 378 */ 379 protected ssize_t 380 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) 381 { 382 ssize_t rv; 383 #ifdef FIONREAD 384 int t = 0; 385 #endif 386 size_t rn = n; 387 388 if (fd == STDIN_FILENO) 389 goto nocheck; 390 391 #ifdef FIONREAD 392 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { 393 #ifdef FD_ZERO 394 ssize_t cnt; 395 for (cnt = 0;; cnt++) { 396 fd_set check; 397 struct timeval tout = {0, 100 * 1000}; 398 int selrv; 399 400 FD_ZERO(&check); 401 FD_SET(fd, &check); 402 403 /* 404 * Avoid soft deadlock: do not read if there 405 * is nothing to read from sockets and pipes. 406 */ 407 selrv = select(fd + 1, &check, NULL, NULL, &tout); 408 if (selrv == -1) { 409 if (errno == EINTR || errno == EAGAIN) 410 continue; 411 } else if (selrv == 0 && cnt >= 5) { 412 return 0; 413 } else 414 break; 415 } 416 #endif 417 (void)ioctl(fd, FIONREAD, &t); 418 } 419 420 if (t > 0 && CAST(size_t, t) < n) { 421 n = t; 422 rn = n; 423 } 424 #endif 425 426 nocheck: 427 do 428 switch ((rv = read(fd, buf, n))) { 429 case -1: 430 if (errno == EINTR) 431 continue; 432 return -1; 433 case 0: 434 return rn - n; 435 default: 436 n -= rv; 437 buf = CAST(char *, CCAST(void *, buf)) + rv; 438 break; 439 } 440 while (n > 0); 441 return rn; 442 } 443 444 protected int 445 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, 446 size_t nbytes) 447 { 448 char buf[4096]; 449 ssize_t r; 450 int tfd; 451 452 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf); 453 #ifndef HAVE_MKSTEMP 454 { 455 char *ptr = mktemp(buf); 456 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); 457 r = errno; 458 (void)unlink(ptr); 459 errno = r; 460 } 461 #else 462 { 463 int te; 464 mode_t ou = umask(0); 465 tfd = mkstemp(buf); 466 (void)umask(ou); 467 te = errno; 468 (void)unlink(buf); 469 errno = te; 470 } 471 #endif 472 if (tfd == -1) { 473 file_error(ms, errno, 474 "cannot create temporary file for pipe copy"); 475 return -1; 476 } 477 478 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes)) 479 r = 1; 480 else { 481 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) 482 if (swrite(tfd, buf, CAST(size_t, r)) != r) 483 break; 484 } 485 486 switch (r) { 487 case -1: 488 file_error(ms, errno, "error copying from pipe to temp file"); 489 return -1; 490 case 0: 491 break; 492 default: 493 file_error(ms, errno, "error while writing to temp file"); 494 return -1; 495 } 496 497 /* 498 * We duplicate the file descriptor, because fclose on a 499 * tmpfile will delete the file, but any open descriptors 500 * can still access the phantom inode. 501 */ 502 if ((fd = dup2(tfd, fd)) == -1) { 503 file_error(ms, errno, "could not dup descriptor for temp file"); 504 return -1; 505 } 506 (void)close(tfd); 507 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) { 508 file_badseek(ms); 509 return -1; 510 } 511 return fd; 512 } 513 #if HAVE_FORK 514 #ifdef BUILTIN_DECOMPRESS 515 516 #define FHCRC (1 << 1) 517 #define FEXTRA (1 << 2) 518 #define FNAME (1 << 3) 519 #define FCOMMENT (1 << 4) 520 521 522 private int 523 uncompressgzipped(const unsigned char *old, unsigned char **newch, 524 size_t bytes_max, size_t *n) 525 { 526 unsigned char flg = old[3]; 527 size_t data_start = 10; 528 529 if (flg & FEXTRA) { 530 if (data_start + 1 >= *n) 531 goto err; 532 data_start += 2 + old[data_start] + old[data_start + 1] * 256; 533 } 534 if (flg & FNAME) { 535 while(data_start < *n && old[data_start]) 536 data_start++; 537 data_start++; 538 } 539 if (flg & FCOMMENT) { 540 while(data_start < *n && old[data_start]) 541 data_start++; 542 data_start++; 543 } 544 if (flg & FHCRC) 545 data_start += 2; 546 547 if (data_start >= *n) 548 goto err; 549 550 *n -= data_start; 551 old += data_start; 552 return uncompresszlib(old, newch, bytes_max, n, 0); 553 err: 554 return makeerror(newch, n, "File too short"); 555 } 556 557 private int 558 uncompresszlib(const unsigned char *old, unsigned char **newch, 559 size_t bytes_max, size_t *n, int zlib) 560 { 561 int rc; 562 z_stream z; 563 564 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 565 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 566 567 z.next_in = CCAST(Bytef *, old); 568 z.avail_in = CAST(uint32_t, *n); 569 z.next_out = *newch; 570 z.avail_out = CAST(unsigned int, bytes_max); 571 z.zalloc = Z_NULL; 572 z.zfree = Z_NULL; 573 z.opaque = Z_NULL; 574 575 /* LINTED bug in header macro */ 576 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15); 577 if (rc != Z_OK) 578 goto err; 579 580 rc = inflate(&z, Z_SYNC_FLUSH); 581 if (rc != Z_OK && rc != Z_STREAM_END) 582 goto err; 583 584 *n = CAST(size_t, z.total_out); 585 rc = inflateEnd(&z); 586 if (rc != Z_OK) 587 goto err; 588 589 /* let's keep the nul-terminate tradition */ 590 (*newch)[*n] = '\0'; 591 592 return OKDATA; 593 err: 594 strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max); 595 *n = strlen(RCAST(char *, *newch)); 596 return ERRDATA; 597 } 598 #endif 599 600 #ifdef BUILTIN_BZLIB 601 private int 602 uncompressbzlib(const unsigned char *old, unsigned char **newch, 603 size_t bytes_max, size_t *n) 604 { 605 int rc; 606 bz_stream bz; 607 608 memset(&bz, 0, sizeof(bz)); 609 rc = BZ2_bzDecompressInit(&bz, 0, 0); 610 if (rc != BZ_OK) 611 goto err; 612 613 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 614 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 615 616 bz.next_in = CCAST(char *, RCAST(const char *, old)); 617 bz.avail_in = CAST(uint32_t, *n); 618 bz.next_out = RCAST(char *, *newch); 619 bz.avail_out = CAST(unsigned int, bytes_max); 620 621 rc = BZ2_bzDecompress(&bz); 622 if (rc != BZ_OK && rc != BZ_STREAM_END) 623 goto err; 624 625 /* Assume byte_max is within 32bit */ 626 /* assert(bz.total_out_hi32 == 0); */ 627 *n = CAST(size_t, bz.total_out_lo32); 628 rc = BZ2_bzDecompressEnd(&bz); 629 if (rc != BZ_OK) 630 goto err; 631 632 /* let's keep the nul-terminate tradition */ 633 (*newch)[*n] = '\0'; 634 635 return OKDATA; 636 err: 637 snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc); 638 *n = strlen(RCAST(char *, *newch)); 639 return ERRDATA; 640 } 641 #endif 642 643 #ifdef BUILTIN_XZLIB 644 private int 645 uncompressxzlib(const unsigned char *old, unsigned char **newch, 646 size_t bytes_max, size_t *n) 647 { 648 int rc; 649 lzma_stream xz; 650 651 memset(&xz, 0, sizeof(xz)); 652 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); 653 if (rc != LZMA_OK) 654 goto err; 655 656 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL) 657 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 658 659 xz.next_in = CCAST(const uint8_t *, old); 660 xz.avail_in = CAST(uint32_t, *n); 661 xz.next_out = RCAST(uint8_t *, *newch); 662 xz.avail_out = CAST(unsigned int, bytes_max); 663 664 rc = lzma_code(&xz, LZMA_RUN); 665 if (rc != LZMA_OK && rc != LZMA_STREAM_END) 666 goto err; 667 668 *n = CAST(size_t, xz.total_out); 669 670 lzma_end(&xz); 671 672 /* let's keep the nul-terminate tradition */ 673 (*newch)[*n] = '\0'; 674 675 return OKDATA; 676 err: 677 snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc); 678 *n = strlen(RCAST(char *, *newch)); 679 return ERRDATA; 680 } 681 #endif 682 683 684 static int 685 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) 686 { 687 char *msg; 688 va_list ap; 689 int rv; 690 691 va_start(ap, fmt); 692 rv = vasprintf(&msg, fmt, ap); 693 va_end(ap); 694 if (rv < 0) { 695 *buf = NULL; 696 *len = 0; 697 return NODATA; 698 } 699 *buf = RCAST(unsigned char *, msg); 700 *len = strlen(msg); 701 return ERRDATA; 702 } 703 704 static void 705 closefd(int *fd, size_t i) 706 { 707 if (fd[i] == -1) 708 return; 709 (void) close(fd[i]); 710 fd[i] = -1; 711 } 712 713 static void 714 closep(int *fd) 715 { 716 size_t i; 717 for (i = 0; i < 2; i++) 718 closefd(fd, i); 719 } 720 721 static int 722 copydesc(int i, int fd) 723 { 724 if (fd == i) 725 return 0; /* "no dup was necessary" */ 726 if (dup2(fd, i) == -1) { 727 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno)); 728 exit(1); 729 } 730 return 1; 731 } 732 733 static pid_t 734 writechild(int fd, const void *old, size_t n) 735 { 736 pid_t pid; 737 738 /* 739 * fork again, to avoid blocking because both 740 * pipes filled 741 */ 742 pid = fork(); 743 if (pid == -1) { 744 DPRINTF("Fork failed (%s)\n", strerror(errno)); 745 exit(1); 746 } 747 if (pid == 0) { 748 /* child */ 749 if (swrite(fd, old, n) != CAST(ssize_t, n)) { 750 DPRINTF("Write failed (%s)\n", strerror(errno)); 751 exit(1); 752 } 753 exit(0); 754 } 755 /* parent */ 756 return pid; 757 } 758 759 static ssize_t 760 filter_error(unsigned char *ubuf, ssize_t n) 761 { 762 char *p; 763 char *buf; 764 765 ubuf[n] = '\0'; 766 buf = RCAST(char *, ubuf); 767 while (isspace(CAST(unsigned char, *buf))) 768 buf++; 769 DPRINTF("Filter error[[[%s]]]\n", buf); 770 if ((p = strchr(CAST(char *, buf), '\n')) != NULL) 771 *p = '\0'; 772 if ((p = strchr(CAST(char *, buf), ';')) != NULL) 773 *p = '\0'; 774 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) { 775 ++p; 776 while (isspace(CAST(unsigned char, *p))) 777 p++; 778 n = strlen(p); 779 memmove(ubuf, p, CAST(size_t, n + 1)); 780 } 781 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); 782 if (islower(*ubuf)) 783 *ubuf = toupper(*ubuf); 784 return n; 785 } 786 787 private const char * 788 methodname(size_t method) 789 { 790 switch (method) { 791 #ifdef BUILTIN_DECOMPRESS 792 case METH_FROZEN: 793 case METH_ZLIB: 794 return "zlib"; 795 #endif 796 #ifdef BUILTIN_BZLIB 797 case METH_BZIP: 798 return "bzlib"; 799 #endif 800 #ifdef BUILTIN_XZLIB 801 case METH_XZ: 802 case METH_LZMA: 803 return "xzlib"; 804 #endif 805 default: 806 return compr[method].argv[0]; 807 } 808 } 809 810 private int 811 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, 812 unsigned char **newch, size_t* n) 813 { 814 int fdp[3][2]; 815 int status, rv, w; 816 pid_t pid; 817 pid_t writepid = -1; 818 size_t i; 819 ssize_t r; 820 821 switch (method) { 822 #ifdef BUILTIN_DECOMPRESS 823 case METH_FROZEN: 824 return uncompressgzipped(old, newch, bytes_max, n); 825 case METH_ZLIB: 826 return uncompresszlib(old, newch, bytes_max, n, 1); 827 #endif 828 #ifdef BUILTIN_BZLIB 829 case METH_BZIP: 830 return uncompressbzlib(old, newch, bytes_max, n); 831 #endif 832 #ifdef BUILTIN_XZLIB 833 case METH_XZ: 834 case METH_LZMA: 835 return uncompressxzlib(old, newch, bytes_max, n); 836 #endif 837 default: 838 break; 839 } 840 841 (void)fflush(stdout); 842 (void)fflush(stderr); 843 844 for (i = 0; i < __arraycount(fdp); i++) 845 fdp[i][0] = fdp[i][1] = -1; 846 847 /* 848 * There are multithreaded users who run magic_file() 849 * from dozens of threads. If two parallel magic_file() calls 850 * analyze two large compressed files, both will spawn 851 * an uncompressing child here, which writes out uncompressed data. 852 * We read some portion, then close the pipe, then waitpid() the child. 853 * If uncompressed data is larger, child shound get EPIPE and exit. 854 * However, with *parallel* calls OTHER child may unintentionally 855 * inherit pipe fds, thus keeping pipe open and making writes in 856 * our child block instead of failing with EPIPE! 857 * (For the bug to occur, two threads must mutually inherit their pipes, 858 * and both must have large outputs. Thus it happens not that often). 859 * To avoid this, be sure to create pipes with O_CLOEXEC. 860 */ 861 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) || 862 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 || 863 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) { 864 closep(fdp[STDIN_FILENO]); 865 closep(fdp[STDOUT_FILENO]); 866 return makeerror(newch, n, "Cannot create pipe, %s", 867 strerror(errno)); 868 } 869 870 /* For processes with large mapped virtual sizes, vfork 871 * may be _much_ faster (10-100 times) than fork. 872 */ 873 pid = vfork(); 874 if (pid == -1) { 875 return makeerror(newch, n, "Cannot vfork, %s", 876 strerror(errno)); 877 } 878 if (pid == 0) { 879 /* child */ 880 /* Note: we are after vfork, do not modify memory 881 * in a way which confuses parent. In particular, 882 * do not modify fdp[i][j]. 883 */ 884 if (fd != -1) { 885 (void) lseek(fd, CAST(off_t, 0), SEEK_SET); 886 if (copydesc(STDIN_FILENO, fd)) 887 (void) close(fd); 888 } else { 889 if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0])) 890 (void) close(fdp[STDIN_FILENO][0]); 891 if (fdp[STDIN_FILENO][1] > 2) 892 (void) close(fdp[STDIN_FILENO][1]); 893 } 894 file_clear_closexec(STDIN_FILENO); 895 896 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly 897 if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1])) 898 (void) close(fdp[STDOUT_FILENO][1]); 899 if (fdp[STDOUT_FILENO][0] > 2) 900 (void) close(fdp[STDOUT_FILENO][0]); 901 file_clear_closexec(STDOUT_FILENO); 902 903 if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1])) 904 (void) close(fdp[STDERR_FILENO][1]); 905 if (fdp[STDERR_FILENO][0] > 2) 906 (void) close(fdp[STDERR_FILENO][0]); 907 file_clear_closexec(STDERR_FILENO); 908 909 (void)execvp(compr[method].argv[0], 910 RCAST(char *const *, RCAST(intptr_t, compr[method].argv))); 911 dprintf(STDERR_FILENO, "exec `%s' failed, %s", 912 compr[method].argv[0], strerror(errno)); 913 _exit(1); /* _exit(), not exit(), because of vfork */ 914 } 915 /* parent */ 916 /* Close write sides of child stdout/err pipes */ 917 for (i = 1; i < __arraycount(fdp); i++) 918 closefd(fdp[i], 1); 919 /* Write the buffer data to child stdin, if we don't have fd */ 920 if (fd == -1) { 921 closefd(fdp[STDIN_FILENO], 0); 922 writepid = writechild(fdp[STDIN_FILENO][1], old, *n); 923 closefd(fdp[STDIN_FILENO], 1); 924 } 925 926 *newch = CAST(unsigned char *, malloc(bytes_max + 1)); 927 if (*newch == NULL) { 928 rv = makeerror(newch, n, "No buffer, %s", 929 strerror(errno)); 930 goto err; 931 } 932 rv = OKDATA; 933 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); 934 if (r <= 0) { 935 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], 936 r != -1 ? strerror(errno) : "no data"); 937 938 rv = ERRDATA; 939 if (r == 0 && 940 (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) 941 { 942 r = filter_error(*newch, r); 943 goto ok; 944 } 945 free(*newch); 946 if (r == 0) 947 rv = makeerror(newch, n, "Read failed, %s", 948 strerror(errno)); 949 else 950 rv = makeerror(newch, n, "No data"); 951 goto err; 952 } 953 ok: 954 *n = r; 955 /* NUL terminate, as every buffer is handled here. */ 956 (*newch)[*n] = '\0'; 957 err: 958 closefd(fdp[STDIN_FILENO], 1); 959 closefd(fdp[STDOUT_FILENO], 0); 960 closefd(fdp[STDERR_FILENO], 0); 961 962 w = waitpid(pid, &status, 0); 963 wait_err: 964 if (w == -1) { 965 free(*newch); 966 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); 967 DPRINTF("Child wait return %#x\n", status); 968 } else if (!WIFEXITED(status)) { 969 DPRINTF("Child not exited (%#x)\n", status); 970 } else if (WEXITSTATUS(status) != 0) { 971 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); 972 } 973 if (writepid > 0) { 974 /* _After_ we know decompressor has exited, our input writer 975 * definitely will exit now (at worst, writing fails in it, 976 * since output fd is closed now on the reading size). 977 */ 978 w = waitpid(writepid, &status, 0); 979 writepid = -1; 980 goto wait_err; 981 } 982 983 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here! 984 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv); 985 986 return rv; 987 } 988 #endif 989