xref: /freebsd/contrib/xz/src/xzdec/xzdec.c (revision 42b10a37c6580e4fa7afe04e16a7a6e82188215f)
181ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
281ad8388SMartin Matuska //
381ad8388SMartin Matuska /// \file       xzdec.c
481ad8388SMartin Matuska /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
581ad8388SMartin Matuska //
681ad8388SMartin Matuska //  Author:     Lasse Collin
781ad8388SMartin Matuska //
881ad8388SMartin Matuska //  This file has been put into the public domain.
981ad8388SMartin Matuska //  You can do whatever you want with this file.
1081ad8388SMartin Matuska //
1181ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
1281ad8388SMartin Matuska 
1381ad8388SMartin Matuska #include "sysdefs.h"
1481ad8388SMartin Matuska #include "lzma.h"
1581ad8388SMartin Matuska 
1681ad8388SMartin Matuska #include <stdarg.h>
1781ad8388SMartin Matuska #include <errno.h>
1881ad8388SMartin Matuska #include <stdio.h>
1981ad8388SMartin Matuska #include <unistd.h>
2081ad8388SMartin Matuska 
2181ad8388SMartin Matuska #include "getopt.h"
2281ad8388SMartin Matuska #include "tuklib_progname.h"
2381ad8388SMartin Matuska #include "tuklib_exit.h"
2481ad8388SMartin Matuska 
2581ad8388SMartin Matuska #ifdef TUKLIB_DOSLIKE
2681ad8388SMartin Matuska #	include <fcntl.h>
2781ad8388SMartin Matuska #	include <io.h>
2881ad8388SMartin Matuska #endif
2981ad8388SMartin Matuska 
3081ad8388SMartin Matuska 
3181ad8388SMartin Matuska #ifdef LZMADEC
3281ad8388SMartin Matuska #	define TOOL_FORMAT "lzma"
3381ad8388SMartin Matuska #else
3481ad8388SMartin Matuska #	define TOOL_FORMAT "xz"
3581ad8388SMartin Matuska #endif
3681ad8388SMartin Matuska 
3781ad8388SMartin Matuska 
3881ad8388SMartin Matuska /// Error messages are suppressed if this is zero, which is the case when
3981ad8388SMartin Matuska /// --quiet has been given at least twice.
4081ad8388SMartin Matuska static unsigned int display_errors = 2;
4181ad8388SMartin Matuska 
4281ad8388SMartin Matuska 
43e24134bcSMartin Matuska static void lzma_attribute((__format__(__printf__, 1, 2)))
4481ad8388SMartin Matuska my_errorf(const char *fmt, ...)
4581ad8388SMartin Matuska {
4681ad8388SMartin Matuska 	va_list ap;
4781ad8388SMartin Matuska 	va_start(ap, fmt);
4881ad8388SMartin Matuska 
4981ad8388SMartin Matuska 	if (display_errors) {
5081ad8388SMartin Matuska 		fprintf(stderr, "%s: ", progname);
5181ad8388SMartin Matuska 		vfprintf(stderr, fmt, ap);
5281ad8388SMartin Matuska 		fprintf(stderr, "\n");
5381ad8388SMartin Matuska 	}
5481ad8388SMartin Matuska 
5581ad8388SMartin Matuska 	va_end(ap);
5681ad8388SMartin Matuska 	return;
5781ad8388SMartin Matuska }
5881ad8388SMartin Matuska 
5981ad8388SMartin Matuska 
60e24134bcSMartin Matuska static void lzma_attribute((__noreturn__))
6181ad8388SMartin Matuska help(void)
6281ad8388SMartin Matuska {
6381ad8388SMartin Matuska 	printf(
6481ad8388SMartin Matuska "Usage: %s [OPTION]... [FILE]...\n"
65*42b10a37SXin LI "Decompress files in the ." TOOL_FORMAT " format to standard output.\n"
6681ad8388SMartin Matuska "\n"
67*42b10a37SXin LI "  -d, --decompress   (ignored, only decompression is supported)\n"
68*42b10a37SXin LI "  -k, --keep         (ignored, files are never deleted)\n"
69*42b10a37SXin LI "  -c, --stdout       (ignored, output is always written to standard output)\n"
7081ad8388SMartin Matuska "  -q, --quiet        specify *twice* to suppress errors\n"
71*42b10a37SXin LI "  -Q, --no-warn      (ignored, the exit status 2 is never used)\n"
7281ad8388SMartin Matuska "  -h, --help         display this help and exit\n"
7381ad8388SMartin Matuska "  -V, --version      display the version number and exit\n"
7481ad8388SMartin Matuska "\n"
7581ad8388SMartin Matuska "With no FILE, or when FILE is -, read standard input.\n"
7681ad8388SMartin Matuska "\n"
7781ad8388SMartin Matuska "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
78e0f0e66dSMartin Matuska PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
79e0f0e66dSMartin Matuska 
8081ad8388SMartin Matuska 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
8181ad8388SMartin Matuska }
8281ad8388SMartin Matuska 
8381ad8388SMartin Matuska 
84e24134bcSMartin Matuska static void lzma_attribute((__noreturn__))
8581ad8388SMartin Matuska version(void)
8681ad8388SMartin Matuska {
8781ad8388SMartin Matuska 	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
8881ad8388SMartin Matuska 			"liblzma %s\n", lzma_version_string());
8981ad8388SMartin Matuska 
9081ad8388SMartin Matuska 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
9181ad8388SMartin Matuska }
9281ad8388SMartin Matuska 
9381ad8388SMartin Matuska 
9481ad8388SMartin Matuska /// Parses command line options.
9581ad8388SMartin Matuska static void
9681ad8388SMartin Matuska parse_options(int argc, char **argv)
9781ad8388SMartin Matuska {
9881ad8388SMartin Matuska 	static const char short_opts[] = "cdkM:hqQV";
9981ad8388SMartin Matuska 	static const struct option long_opts[] = {
10081ad8388SMartin Matuska 		{ "stdout",       no_argument,         NULL, 'c' },
10181ad8388SMartin Matuska 		{ "to-stdout",    no_argument,         NULL, 'c' },
10281ad8388SMartin Matuska 		{ "decompress",   no_argument,         NULL, 'd' },
10381ad8388SMartin Matuska 		{ "uncompress",   no_argument,         NULL, 'd' },
10481ad8388SMartin Matuska 		{ "keep",         no_argument,         NULL, 'k' },
10581ad8388SMartin Matuska 		{ "quiet",        no_argument,         NULL, 'q' },
10681ad8388SMartin Matuska 		{ "no-warn",      no_argument,         NULL, 'Q' },
10781ad8388SMartin Matuska 		{ "help",         no_argument,         NULL, 'h' },
10881ad8388SMartin Matuska 		{ "version",      no_argument,         NULL, 'V' },
10981ad8388SMartin Matuska 		{ NULL,           0,                   NULL, 0   }
11081ad8388SMartin Matuska 	};
11181ad8388SMartin Matuska 
11281ad8388SMartin Matuska 	int c;
11381ad8388SMartin Matuska 
11481ad8388SMartin Matuska 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
11581ad8388SMartin Matuska 			!= -1) {
11681ad8388SMartin Matuska 		switch (c) {
11781ad8388SMartin Matuska 		case 'c':
11881ad8388SMartin Matuska 		case 'd':
11981ad8388SMartin Matuska 		case 'k':
12081ad8388SMartin Matuska 		case 'Q':
12181ad8388SMartin Matuska 			break;
12281ad8388SMartin Matuska 
12381ad8388SMartin Matuska 		case 'q':
12481ad8388SMartin Matuska 			if (display_errors > 0)
12581ad8388SMartin Matuska 				--display_errors;
12681ad8388SMartin Matuska 
12781ad8388SMartin Matuska 			break;
12881ad8388SMartin Matuska 
12981ad8388SMartin Matuska 		case 'h':
13081ad8388SMartin Matuska 			help();
13181ad8388SMartin Matuska 
13281ad8388SMartin Matuska 		case 'V':
13381ad8388SMartin Matuska 			version();
13481ad8388SMartin Matuska 
13581ad8388SMartin Matuska 		default:
13681ad8388SMartin Matuska 			exit(EXIT_FAILURE);
13781ad8388SMartin Matuska 		}
13881ad8388SMartin Matuska 	}
13981ad8388SMartin Matuska 
14081ad8388SMartin Matuska 	return;
14181ad8388SMartin Matuska }
14281ad8388SMartin Matuska 
14381ad8388SMartin Matuska 
14481ad8388SMartin Matuska static void
14581ad8388SMartin Matuska uncompress(lzma_stream *strm, FILE *file, const char *filename)
14681ad8388SMartin Matuska {
14781ad8388SMartin Matuska 	lzma_ret ret;
14881ad8388SMartin Matuska 
14981ad8388SMartin Matuska 	// Initialize the decoder
15081ad8388SMartin Matuska #ifdef LZMADEC
151e0f0e66dSMartin Matuska 	ret = lzma_alone_decoder(strm, UINT64_MAX);
15281ad8388SMartin Matuska #else
153e0f0e66dSMartin Matuska 	ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
15481ad8388SMartin Matuska #endif
15581ad8388SMartin Matuska 
15681ad8388SMartin Matuska 	// The only reasonable error here is LZMA_MEM_ERROR.
15781ad8388SMartin Matuska 	if (ret != LZMA_OK) {
15881ad8388SMartin Matuska 		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
15981ad8388SMartin Matuska 				: "Internal error (bug)");
16081ad8388SMartin Matuska 		exit(EXIT_FAILURE);
16181ad8388SMartin Matuska 	}
16281ad8388SMartin Matuska 
16381ad8388SMartin Matuska 	// Input and output buffers
16481ad8388SMartin Matuska 	uint8_t in_buf[BUFSIZ];
16581ad8388SMartin Matuska 	uint8_t out_buf[BUFSIZ];
16681ad8388SMartin Matuska 
16781ad8388SMartin Matuska 	strm->avail_in = 0;
16881ad8388SMartin Matuska 	strm->next_out = out_buf;
16981ad8388SMartin Matuska 	strm->avail_out = BUFSIZ;
17081ad8388SMartin Matuska 
17181ad8388SMartin Matuska 	lzma_action action = LZMA_RUN;
17281ad8388SMartin Matuska 
17381ad8388SMartin Matuska 	while (true) {
17481ad8388SMartin Matuska 		if (strm->avail_in == 0) {
17581ad8388SMartin Matuska 			strm->next_in = in_buf;
17681ad8388SMartin Matuska 			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
17781ad8388SMartin Matuska 
17881ad8388SMartin Matuska 			if (ferror(file)) {
17981ad8388SMartin Matuska 				// POSIX says that fread() sets errno if
18081ad8388SMartin Matuska 				// an error occurred. ferror() doesn't
18181ad8388SMartin Matuska 				// touch errno.
18281ad8388SMartin Matuska 				my_errorf("%s: Error reading input file: %s",
18381ad8388SMartin Matuska 						filename, strerror(errno));
18481ad8388SMartin Matuska 				exit(EXIT_FAILURE);
18581ad8388SMartin Matuska 			}
18681ad8388SMartin Matuska 
18781ad8388SMartin Matuska #ifndef LZMADEC
18881ad8388SMartin Matuska 			// When using LZMA_CONCATENATED, we need to tell
18981ad8388SMartin Matuska 			// liblzma when it has got all the input.
19081ad8388SMartin Matuska 			if (feof(file))
19181ad8388SMartin Matuska 				action = LZMA_FINISH;
19281ad8388SMartin Matuska #endif
19381ad8388SMartin Matuska 		}
19481ad8388SMartin Matuska 
19581ad8388SMartin Matuska 		ret = lzma_code(strm, action);
19681ad8388SMartin Matuska 
19781ad8388SMartin Matuska 		// Write and check write error before checking decoder error.
19881ad8388SMartin Matuska 		// This way as much data as possible gets written to output
19981ad8388SMartin Matuska 		// even if decoder detected an error.
20081ad8388SMartin Matuska 		if (strm->avail_out == 0 || ret != LZMA_OK) {
20181ad8388SMartin Matuska 			const size_t write_size = BUFSIZ - strm->avail_out;
20281ad8388SMartin Matuska 
20381ad8388SMartin Matuska 			if (fwrite(out_buf, 1, write_size, stdout)
20481ad8388SMartin Matuska 					!= write_size) {
20581ad8388SMartin Matuska 				// Wouldn't be a surprise if writing to stderr
20681ad8388SMartin Matuska 				// would fail too but at least try to show an
20781ad8388SMartin Matuska 				// error message.
20881ad8388SMartin Matuska 				my_errorf("Cannot write to standard output: "
20981ad8388SMartin Matuska 						"%s", strerror(errno));
21081ad8388SMartin Matuska 				exit(EXIT_FAILURE);
21181ad8388SMartin Matuska 			}
21281ad8388SMartin Matuska 
21381ad8388SMartin Matuska 			strm->next_out = out_buf;
21481ad8388SMartin Matuska 			strm->avail_out = BUFSIZ;
21581ad8388SMartin Matuska 		}
21681ad8388SMartin Matuska 
21781ad8388SMartin Matuska 		if (ret != LZMA_OK) {
21881ad8388SMartin Matuska 			if (ret == LZMA_STREAM_END) {
21981ad8388SMartin Matuska #ifdef LZMADEC
22081ad8388SMartin Matuska 				// Check that there's no trailing garbage.
22181ad8388SMartin Matuska 				if (strm->avail_in != 0
22281ad8388SMartin Matuska 						|| fread(in_buf, 1, 1, file)
22381ad8388SMartin Matuska 							!= 0
22481ad8388SMartin Matuska 						|| !feof(file))
22581ad8388SMartin Matuska 					ret = LZMA_DATA_ERROR;
22681ad8388SMartin Matuska 				else
22781ad8388SMartin Matuska 					return;
22881ad8388SMartin Matuska #else
22981ad8388SMartin Matuska 				// lzma_stream_decoder() already guarantees
23081ad8388SMartin Matuska 				// that there's no trailing garbage.
23181ad8388SMartin Matuska 				assert(strm->avail_in == 0);
23281ad8388SMartin Matuska 				assert(action == LZMA_FINISH);
23381ad8388SMartin Matuska 				assert(feof(file));
23481ad8388SMartin Matuska 				return;
23581ad8388SMartin Matuska #endif
23681ad8388SMartin Matuska 			}
23781ad8388SMartin Matuska 
23881ad8388SMartin Matuska 			const char *msg;
23981ad8388SMartin Matuska 			switch (ret) {
24081ad8388SMartin Matuska 			case LZMA_MEM_ERROR:
24181ad8388SMartin Matuska 				msg = strerror(ENOMEM);
24281ad8388SMartin Matuska 				break;
24381ad8388SMartin Matuska 
24481ad8388SMartin Matuska 			case LZMA_FORMAT_ERROR:
24581ad8388SMartin Matuska 				msg = "File format not recognized";
24681ad8388SMartin Matuska 				break;
24781ad8388SMartin Matuska 
24881ad8388SMartin Matuska 			case LZMA_OPTIONS_ERROR:
24981ad8388SMartin Matuska 				// FIXME: Better message?
25081ad8388SMartin Matuska 				msg = "Unsupported compression options";
25181ad8388SMartin Matuska 				break;
25281ad8388SMartin Matuska 
25381ad8388SMartin Matuska 			case LZMA_DATA_ERROR:
25481ad8388SMartin Matuska 				msg = "File is corrupt";
25581ad8388SMartin Matuska 				break;
25681ad8388SMartin Matuska 
25781ad8388SMartin Matuska 			case LZMA_BUF_ERROR:
25881ad8388SMartin Matuska 				msg = "Unexpected end of input";
25981ad8388SMartin Matuska 				break;
26081ad8388SMartin Matuska 
26181ad8388SMartin Matuska 			default:
26281ad8388SMartin Matuska 				msg = "Internal error (bug)";
26381ad8388SMartin Matuska 				break;
26481ad8388SMartin Matuska 			}
26581ad8388SMartin Matuska 
26681ad8388SMartin Matuska 			my_errorf("%s: %s", filename, msg);
26781ad8388SMartin Matuska 			exit(EXIT_FAILURE);
26881ad8388SMartin Matuska 		}
26981ad8388SMartin Matuska 	}
27081ad8388SMartin Matuska }
27181ad8388SMartin Matuska 
27281ad8388SMartin Matuska 
27381ad8388SMartin Matuska int
27481ad8388SMartin Matuska main(int argc, char **argv)
27581ad8388SMartin Matuska {
27681ad8388SMartin Matuska 	// Initialize progname which we will be used in error messages.
27781ad8388SMartin Matuska 	tuklib_progname_init(argv);
27881ad8388SMartin Matuska 
27981ad8388SMartin Matuska 	// Parse the command line options.
28081ad8388SMartin Matuska 	parse_options(argc, argv);
28181ad8388SMartin Matuska 
28281ad8388SMartin Matuska 	// The same lzma_stream is used for all files that we decode. This way
28381ad8388SMartin Matuska 	// we don't need to reallocate memory for every file if they use same
28481ad8388SMartin Matuska 	// compression settings.
28581ad8388SMartin Matuska 	lzma_stream strm = LZMA_STREAM_INIT;
28681ad8388SMartin Matuska 
28781ad8388SMartin Matuska 	// Some systems require setting stdin and stdout to binary mode.
28881ad8388SMartin Matuska #ifdef TUKLIB_DOSLIKE
28981ad8388SMartin Matuska 	setmode(fileno(stdin), O_BINARY);
29081ad8388SMartin Matuska 	setmode(fileno(stdout), O_BINARY);
29181ad8388SMartin Matuska #endif
29281ad8388SMartin Matuska 
29381ad8388SMartin Matuska 	if (optind == argc) {
29481ad8388SMartin Matuska 		// No filenames given, decode from stdin.
29581ad8388SMartin Matuska 		uncompress(&strm, stdin, "(stdin)");
29681ad8388SMartin Matuska 	} else {
29781ad8388SMartin Matuska 		// Loop through the filenames given on the command line.
29881ad8388SMartin Matuska 		do {
29981ad8388SMartin Matuska 			// "-" indicates stdin.
30081ad8388SMartin Matuska 			if (strcmp(argv[optind], "-") == 0) {
30181ad8388SMartin Matuska 				uncompress(&strm, stdin, "(stdin)");
30281ad8388SMartin Matuska 			} else {
30381ad8388SMartin Matuska 				FILE *file = fopen(argv[optind], "rb");
30481ad8388SMartin Matuska 				if (file == NULL) {
30581ad8388SMartin Matuska 					my_errorf("%s: %s", argv[optind],
30681ad8388SMartin Matuska 							strerror(errno));
30781ad8388SMartin Matuska 					exit(EXIT_FAILURE);
30881ad8388SMartin Matuska 				}
30981ad8388SMartin Matuska 
31081ad8388SMartin Matuska 				uncompress(&strm, file, argv[optind]);
31181ad8388SMartin Matuska 				fclose(file);
31281ad8388SMartin Matuska 			}
31381ad8388SMartin Matuska 		} while (++optind < argc);
31481ad8388SMartin Matuska 	}
31581ad8388SMartin Matuska 
31681ad8388SMartin Matuska #ifndef NDEBUG
31781ad8388SMartin Matuska 	// Free the memory only when debugging. Freeing wastes some time,
31881ad8388SMartin Matuska 	// but allows detecting possible memory leaks with Valgrind.
31981ad8388SMartin Matuska 	lzma_end(&strm);
32081ad8388SMartin Matuska #endif
32181ad8388SMartin Matuska 
32281ad8388SMartin Matuska 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
32381ad8388SMartin Matuska }
324