181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 281ad8388SMartin Matuska // 381ad8388SMartin Matuska /// \file xzdec.c 481ad8388SMartin Matuska /// \brief Simple single-threaded tool to uncompress .xz or .lzma files 581ad8388SMartin Matuska // 681ad8388SMartin Matuska // Author: Lasse Collin 781ad8388SMartin Matuska // 881ad8388SMartin Matuska // This file has been put into the public domain. 981ad8388SMartin Matuska // You can do whatever you want with this file. 1081ad8388SMartin Matuska // 1181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 1281ad8388SMartin Matuska 1381ad8388SMartin Matuska #include "sysdefs.h" 1481ad8388SMartin Matuska #include "lzma.h" 1581ad8388SMartin Matuska 1681ad8388SMartin Matuska #include <stdarg.h> 1781ad8388SMartin Matuska #include <errno.h> 1881ad8388SMartin Matuska #include <stdio.h> 1981ad8388SMartin Matuska #include <unistd.h> 2081ad8388SMartin Matuska 2181ad8388SMartin Matuska #include "getopt.h" 2281ad8388SMartin Matuska #include "tuklib_progname.h" 2381ad8388SMartin Matuska #include "tuklib_exit.h" 2481ad8388SMartin Matuska 2581ad8388SMartin Matuska #ifdef TUKLIB_DOSLIKE 2681ad8388SMartin Matuska # include <fcntl.h> 2781ad8388SMartin Matuska # include <io.h> 2881ad8388SMartin Matuska #endif 2981ad8388SMartin Matuska 3081ad8388SMartin Matuska 3181ad8388SMartin Matuska #ifdef LZMADEC 3281ad8388SMartin Matuska # define TOOL_FORMAT "lzma" 3381ad8388SMartin Matuska #else 3481ad8388SMartin Matuska # define TOOL_FORMAT "xz" 3581ad8388SMartin Matuska #endif 3681ad8388SMartin Matuska 3781ad8388SMartin Matuska 3881ad8388SMartin Matuska /// Error messages are suppressed if this is zero, which is the case when 3981ad8388SMartin Matuska /// --quiet has been given at least twice. 4081ad8388SMartin Matuska static unsigned int display_errors = 2; 4181ad8388SMartin Matuska 4281ad8388SMartin Matuska 43e24134bcSMartin Matuska static void lzma_attribute((__format__(__printf__, 1, 2))) 4481ad8388SMartin Matuska my_errorf(const char *fmt, ...) 4581ad8388SMartin Matuska { 4681ad8388SMartin Matuska va_list ap; 4781ad8388SMartin Matuska va_start(ap, fmt); 4881ad8388SMartin Matuska 4981ad8388SMartin Matuska if (display_errors) { 5081ad8388SMartin Matuska fprintf(stderr, "%s: ", progname); 5181ad8388SMartin Matuska vfprintf(stderr, fmt, ap); 5281ad8388SMartin Matuska fprintf(stderr, "\n"); 5381ad8388SMartin Matuska } 5481ad8388SMartin Matuska 5581ad8388SMartin Matuska va_end(ap); 5681ad8388SMartin Matuska return; 5781ad8388SMartin Matuska } 5881ad8388SMartin Matuska 5981ad8388SMartin Matuska 60e24134bcSMartin Matuska static void lzma_attribute((__noreturn__)) 6181ad8388SMartin Matuska help(void) 6281ad8388SMartin Matuska { 6381ad8388SMartin Matuska printf( 6481ad8388SMartin Matuska "Usage: %s [OPTION]... [FILE]...\n" 65*42b10a37SXin LI "Decompress files in the ." TOOL_FORMAT " format to standard output.\n" 6681ad8388SMartin Matuska "\n" 67*42b10a37SXin LI " -d, --decompress (ignored, only decompression is supported)\n" 68*42b10a37SXin LI " -k, --keep (ignored, files are never deleted)\n" 69*42b10a37SXin LI " -c, --stdout (ignored, output is always written to standard output)\n" 7081ad8388SMartin Matuska " -q, --quiet specify *twice* to suppress errors\n" 71*42b10a37SXin LI " -Q, --no-warn (ignored, the exit status 2 is never used)\n" 7281ad8388SMartin Matuska " -h, --help display this help and exit\n" 7381ad8388SMartin Matuska " -V, --version display the version number and exit\n" 7481ad8388SMartin Matuska "\n" 7581ad8388SMartin Matuska "With no FILE, or when FILE is -, read standard input.\n" 7681ad8388SMartin Matuska "\n" 7781ad8388SMartin Matuska "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n" 78e0f0e66dSMartin Matuska PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname); 79e0f0e66dSMartin Matuska 8081ad8388SMartin Matuska tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 8181ad8388SMartin Matuska } 8281ad8388SMartin Matuska 8381ad8388SMartin Matuska 84e24134bcSMartin Matuska static void lzma_attribute((__noreturn__)) 8581ad8388SMartin Matuska version(void) 8681ad8388SMartin Matuska { 8781ad8388SMartin Matuska printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n" 8881ad8388SMartin Matuska "liblzma %s\n", lzma_version_string()); 8981ad8388SMartin Matuska 9081ad8388SMartin Matuska tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 9181ad8388SMartin Matuska } 9281ad8388SMartin Matuska 9381ad8388SMartin Matuska 9481ad8388SMartin Matuska /// Parses command line options. 9581ad8388SMartin Matuska static void 9681ad8388SMartin Matuska parse_options(int argc, char **argv) 9781ad8388SMartin Matuska { 9881ad8388SMartin Matuska static const char short_opts[] = "cdkM:hqQV"; 9981ad8388SMartin Matuska static const struct option long_opts[] = { 10081ad8388SMartin Matuska { "stdout", no_argument, NULL, 'c' }, 10181ad8388SMartin Matuska { "to-stdout", no_argument, NULL, 'c' }, 10281ad8388SMartin Matuska { "decompress", no_argument, NULL, 'd' }, 10381ad8388SMartin Matuska { "uncompress", no_argument, NULL, 'd' }, 10481ad8388SMartin Matuska { "keep", no_argument, NULL, 'k' }, 10581ad8388SMartin Matuska { "quiet", no_argument, NULL, 'q' }, 10681ad8388SMartin Matuska { "no-warn", no_argument, NULL, 'Q' }, 10781ad8388SMartin Matuska { "help", no_argument, NULL, 'h' }, 10881ad8388SMartin Matuska { "version", no_argument, NULL, 'V' }, 10981ad8388SMartin Matuska { NULL, 0, NULL, 0 } 11081ad8388SMartin Matuska }; 11181ad8388SMartin Matuska 11281ad8388SMartin Matuska int c; 11381ad8388SMartin Matuska 11481ad8388SMartin Matuska while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 11581ad8388SMartin Matuska != -1) { 11681ad8388SMartin Matuska switch (c) { 11781ad8388SMartin Matuska case 'c': 11881ad8388SMartin Matuska case 'd': 11981ad8388SMartin Matuska case 'k': 12081ad8388SMartin Matuska case 'Q': 12181ad8388SMartin Matuska break; 12281ad8388SMartin Matuska 12381ad8388SMartin Matuska case 'q': 12481ad8388SMartin Matuska if (display_errors > 0) 12581ad8388SMartin Matuska --display_errors; 12681ad8388SMartin Matuska 12781ad8388SMartin Matuska break; 12881ad8388SMartin Matuska 12981ad8388SMartin Matuska case 'h': 13081ad8388SMartin Matuska help(); 13181ad8388SMartin Matuska 13281ad8388SMartin Matuska case 'V': 13381ad8388SMartin Matuska version(); 13481ad8388SMartin Matuska 13581ad8388SMartin Matuska default: 13681ad8388SMartin Matuska exit(EXIT_FAILURE); 13781ad8388SMartin Matuska } 13881ad8388SMartin Matuska } 13981ad8388SMartin Matuska 14081ad8388SMartin Matuska return; 14181ad8388SMartin Matuska } 14281ad8388SMartin Matuska 14381ad8388SMartin Matuska 14481ad8388SMartin Matuska static void 14581ad8388SMartin Matuska uncompress(lzma_stream *strm, FILE *file, const char *filename) 14681ad8388SMartin Matuska { 14781ad8388SMartin Matuska lzma_ret ret; 14881ad8388SMartin Matuska 14981ad8388SMartin Matuska // Initialize the decoder 15081ad8388SMartin Matuska #ifdef LZMADEC 151e0f0e66dSMartin Matuska ret = lzma_alone_decoder(strm, UINT64_MAX); 15281ad8388SMartin Matuska #else 153e0f0e66dSMartin Matuska ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED); 15481ad8388SMartin Matuska #endif 15581ad8388SMartin Matuska 15681ad8388SMartin Matuska // The only reasonable error here is LZMA_MEM_ERROR. 15781ad8388SMartin Matuska if (ret != LZMA_OK) { 15881ad8388SMartin Matuska my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) 15981ad8388SMartin Matuska : "Internal error (bug)"); 16081ad8388SMartin Matuska exit(EXIT_FAILURE); 16181ad8388SMartin Matuska } 16281ad8388SMartin Matuska 16381ad8388SMartin Matuska // Input and output buffers 16481ad8388SMartin Matuska uint8_t in_buf[BUFSIZ]; 16581ad8388SMartin Matuska uint8_t out_buf[BUFSIZ]; 16681ad8388SMartin Matuska 16781ad8388SMartin Matuska strm->avail_in = 0; 16881ad8388SMartin Matuska strm->next_out = out_buf; 16981ad8388SMartin Matuska strm->avail_out = BUFSIZ; 17081ad8388SMartin Matuska 17181ad8388SMartin Matuska lzma_action action = LZMA_RUN; 17281ad8388SMartin Matuska 17381ad8388SMartin Matuska while (true) { 17481ad8388SMartin Matuska if (strm->avail_in == 0) { 17581ad8388SMartin Matuska strm->next_in = in_buf; 17681ad8388SMartin Matuska strm->avail_in = fread(in_buf, 1, BUFSIZ, file); 17781ad8388SMartin Matuska 17881ad8388SMartin Matuska if (ferror(file)) { 17981ad8388SMartin Matuska // POSIX says that fread() sets errno if 18081ad8388SMartin Matuska // an error occurred. ferror() doesn't 18181ad8388SMartin Matuska // touch errno. 18281ad8388SMartin Matuska my_errorf("%s: Error reading input file: %s", 18381ad8388SMartin Matuska filename, strerror(errno)); 18481ad8388SMartin Matuska exit(EXIT_FAILURE); 18581ad8388SMartin Matuska } 18681ad8388SMartin Matuska 18781ad8388SMartin Matuska #ifndef LZMADEC 18881ad8388SMartin Matuska // When using LZMA_CONCATENATED, we need to tell 18981ad8388SMartin Matuska // liblzma when it has got all the input. 19081ad8388SMartin Matuska if (feof(file)) 19181ad8388SMartin Matuska action = LZMA_FINISH; 19281ad8388SMartin Matuska #endif 19381ad8388SMartin Matuska } 19481ad8388SMartin Matuska 19581ad8388SMartin Matuska ret = lzma_code(strm, action); 19681ad8388SMartin Matuska 19781ad8388SMartin Matuska // Write and check write error before checking decoder error. 19881ad8388SMartin Matuska // This way as much data as possible gets written to output 19981ad8388SMartin Matuska // even if decoder detected an error. 20081ad8388SMartin Matuska if (strm->avail_out == 0 || ret != LZMA_OK) { 20181ad8388SMartin Matuska const size_t write_size = BUFSIZ - strm->avail_out; 20281ad8388SMartin Matuska 20381ad8388SMartin Matuska if (fwrite(out_buf, 1, write_size, stdout) 20481ad8388SMartin Matuska != write_size) { 20581ad8388SMartin Matuska // Wouldn't be a surprise if writing to stderr 20681ad8388SMartin Matuska // would fail too but at least try to show an 20781ad8388SMartin Matuska // error message. 20881ad8388SMartin Matuska my_errorf("Cannot write to standard output: " 20981ad8388SMartin Matuska "%s", strerror(errno)); 21081ad8388SMartin Matuska exit(EXIT_FAILURE); 21181ad8388SMartin Matuska } 21281ad8388SMartin Matuska 21381ad8388SMartin Matuska strm->next_out = out_buf; 21481ad8388SMartin Matuska strm->avail_out = BUFSIZ; 21581ad8388SMartin Matuska } 21681ad8388SMartin Matuska 21781ad8388SMartin Matuska if (ret != LZMA_OK) { 21881ad8388SMartin Matuska if (ret == LZMA_STREAM_END) { 21981ad8388SMartin Matuska #ifdef LZMADEC 22081ad8388SMartin Matuska // Check that there's no trailing garbage. 22181ad8388SMartin Matuska if (strm->avail_in != 0 22281ad8388SMartin Matuska || fread(in_buf, 1, 1, file) 22381ad8388SMartin Matuska != 0 22481ad8388SMartin Matuska || !feof(file)) 22581ad8388SMartin Matuska ret = LZMA_DATA_ERROR; 22681ad8388SMartin Matuska else 22781ad8388SMartin Matuska return; 22881ad8388SMartin Matuska #else 22981ad8388SMartin Matuska // lzma_stream_decoder() already guarantees 23081ad8388SMartin Matuska // that there's no trailing garbage. 23181ad8388SMartin Matuska assert(strm->avail_in == 0); 23281ad8388SMartin Matuska assert(action == LZMA_FINISH); 23381ad8388SMartin Matuska assert(feof(file)); 23481ad8388SMartin Matuska return; 23581ad8388SMartin Matuska #endif 23681ad8388SMartin Matuska } 23781ad8388SMartin Matuska 23881ad8388SMartin Matuska const char *msg; 23981ad8388SMartin Matuska switch (ret) { 24081ad8388SMartin Matuska case LZMA_MEM_ERROR: 24181ad8388SMartin Matuska msg = strerror(ENOMEM); 24281ad8388SMartin Matuska break; 24381ad8388SMartin Matuska 24481ad8388SMartin Matuska case LZMA_FORMAT_ERROR: 24581ad8388SMartin Matuska msg = "File format not recognized"; 24681ad8388SMartin Matuska break; 24781ad8388SMartin Matuska 24881ad8388SMartin Matuska case LZMA_OPTIONS_ERROR: 24981ad8388SMartin Matuska // FIXME: Better message? 25081ad8388SMartin Matuska msg = "Unsupported compression options"; 25181ad8388SMartin Matuska break; 25281ad8388SMartin Matuska 25381ad8388SMartin Matuska case LZMA_DATA_ERROR: 25481ad8388SMartin Matuska msg = "File is corrupt"; 25581ad8388SMartin Matuska break; 25681ad8388SMartin Matuska 25781ad8388SMartin Matuska case LZMA_BUF_ERROR: 25881ad8388SMartin Matuska msg = "Unexpected end of input"; 25981ad8388SMartin Matuska break; 26081ad8388SMartin Matuska 26181ad8388SMartin Matuska default: 26281ad8388SMartin Matuska msg = "Internal error (bug)"; 26381ad8388SMartin Matuska break; 26481ad8388SMartin Matuska } 26581ad8388SMartin Matuska 26681ad8388SMartin Matuska my_errorf("%s: %s", filename, msg); 26781ad8388SMartin Matuska exit(EXIT_FAILURE); 26881ad8388SMartin Matuska } 26981ad8388SMartin Matuska } 27081ad8388SMartin Matuska } 27181ad8388SMartin Matuska 27281ad8388SMartin Matuska 27381ad8388SMartin Matuska int 27481ad8388SMartin Matuska main(int argc, char **argv) 27581ad8388SMartin Matuska { 27681ad8388SMartin Matuska // Initialize progname which we will be used in error messages. 27781ad8388SMartin Matuska tuklib_progname_init(argv); 27881ad8388SMartin Matuska 27981ad8388SMartin Matuska // Parse the command line options. 28081ad8388SMartin Matuska parse_options(argc, argv); 28181ad8388SMartin Matuska 28281ad8388SMartin Matuska // The same lzma_stream is used for all files that we decode. This way 28381ad8388SMartin Matuska // we don't need to reallocate memory for every file if they use same 28481ad8388SMartin Matuska // compression settings. 28581ad8388SMartin Matuska lzma_stream strm = LZMA_STREAM_INIT; 28681ad8388SMartin Matuska 28781ad8388SMartin Matuska // Some systems require setting stdin and stdout to binary mode. 28881ad8388SMartin Matuska #ifdef TUKLIB_DOSLIKE 28981ad8388SMartin Matuska setmode(fileno(stdin), O_BINARY); 29081ad8388SMartin Matuska setmode(fileno(stdout), O_BINARY); 29181ad8388SMartin Matuska #endif 29281ad8388SMartin Matuska 29381ad8388SMartin Matuska if (optind == argc) { 29481ad8388SMartin Matuska // No filenames given, decode from stdin. 29581ad8388SMartin Matuska uncompress(&strm, stdin, "(stdin)"); 29681ad8388SMartin Matuska } else { 29781ad8388SMartin Matuska // Loop through the filenames given on the command line. 29881ad8388SMartin Matuska do { 29981ad8388SMartin Matuska // "-" indicates stdin. 30081ad8388SMartin Matuska if (strcmp(argv[optind], "-") == 0) { 30181ad8388SMartin Matuska uncompress(&strm, stdin, "(stdin)"); 30281ad8388SMartin Matuska } else { 30381ad8388SMartin Matuska FILE *file = fopen(argv[optind], "rb"); 30481ad8388SMartin Matuska if (file == NULL) { 30581ad8388SMartin Matuska my_errorf("%s: %s", argv[optind], 30681ad8388SMartin Matuska strerror(errno)); 30781ad8388SMartin Matuska exit(EXIT_FAILURE); 30881ad8388SMartin Matuska } 30981ad8388SMartin Matuska 31081ad8388SMartin Matuska uncompress(&strm, file, argv[optind]); 31181ad8388SMartin Matuska fclose(file); 31281ad8388SMartin Matuska } 31381ad8388SMartin Matuska } while (++optind < argc); 31481ad8388SMartin Matuska } 31581ad8388SMartin Matuska 31681ad8388SMartin Matuska #ifndef NDEBUG 31781ad8388SMartin Matuska // Free the memory only when debugging. Freeing wastes some time, 31881ad8388SMartin Matuska // but allows detecting possible memory leaks with Valgrind. 31981ad8388SMartin Matuska lzma_end(&strm); 32081ad8388SMartin Matuska #endif 32181ad8388SMartin Matuska 32281ad8388SMartin Matuska tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 32381ad8388SMartin Matuska } 324