xref: /freebsd/contrib/xz/src/xzdec/xzdec.c (revision 9f44a47fd07924afc035991af15d84e6585dea4f)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "sysdefs.h"
14 #include "lzma.h"
15 
16 #include <stdarg.h>
17 #include <errno.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 
21 #include "getopt.h"
22 #include "tuklib_progname.h"
23 #include "tuklib_exit.h"
24 
25 #ifdef TUKLIB_DOSLIKE
26 #	include <fcntl.h>
27 #	include <io.h>
28 #endif
29 
30 
31 #ifdef LZMADEC
32 #	define TOOL_FORMAT "lzma"
33 #else
34 #	define TOOL_FORMAT "xz"
35 #endif
36 
37 
38 /// Error messages are suppressed if this is zero, which is the case when
39 /// --quiet has been given at least twice.
40 static int display_errors = 2;
41 
42 
43 static void lzma_attribute((__format__(__printf__, 1, 2)))
44 my_errorf(const char *fmt, ...)
45 {
46 	va_list ap;
47 	va_start(ap, fmt);
48 
49 	if (display_errors) {
50 		fprintf(stderr, "%s: ", progname);
51 		vfprintf(stderr, fmt, ap);
52 		fprintf(stderr, "\n");
53 	}
54 
55 	va_end(ap);
56 	return;
57 }
58 
59 
60 static void lzma_attribute((__noreturn__))
61 help(void)
62 {
63 	printf(
64 "Usage: %s [OPTION]... [FILE]...\n"
65 "Decompress files in the ." TOOL_FORMAT " format to standard output.\n"
66 "\n"
67 "  -d, --decompress   (ignored, only decompression is supported)\n"
68 "  -k, --keep         (ignored, files are never deleted)\n"
69 "  -c, --stdout       (ignored, output is always written to standard output)\n"
70 "  -q, --quiet        specify *twice* to suppress errors\n"
71 "  -Q, --no-warn      (ignored, the exit status 2 is never used)\n"
72 "  -h, --help         display this help and exit\n"
73 "  -V, --version      display the version number and exit\n"
74 "\n"
75 "With no FILE, or when FILE is -, read standard input.\n"
76 "\n"
77 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
78 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
79 
80 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
81 }
82 
83 
84 static void lzma_attribute((__noreturn__))
85 version(void)
86 {
87 	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
88 			"liblzma %s\n", lzma_version_string());
89 
90 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
91 }
92 
93 
94 /// Parses command line options.
95 static void
96 parse_options(int argc, char **argv)
97 {
98 	static const char short_opts[] = "cdkM:hqQV";
99 	static const struct option long_opts[] = {
100 		{ "stdout",       no_argument,         NULL, 'c' },
101 		{ "to-stdout",    no_argument,         NULL, 'c' },
102 		{ "decompress",   no_argument,         NULL, 'd' },
103 		{ "uncompress",   no_argument,         NULL, 'd' },
104 		{ "keep",         no_argument,         NULL, 'k' },
105 		{ "quiet",        no_argument,         NULL, 'q' },
106 		{ "no-warn",      no_argument,         NULL, 'Q' },
107 		{ "help",         no_argument,         NULL, 'h' },
108 		{ "version",      no_argument,         NULL, 'V' },
109 		{ NULL,           0,                   NULL, 0   }
110 	};
111 
112 	int c;
113 
114 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
115 			!= -1) {
116 		switch (c) {
117 		case 'c':
118 		case 'd':
119 		case 'k':
120 		case 'Q':
121 			break;
122 
123 		case 'q':
124 			if (display_errors > 0)
125 				--display_errors;
126 
127 			break;
128 
129 		case 'h':
130 			help();
131 
132 		case 'V':
133 			version();
134 
135 		default:
136 			exit(EXIT_FAILURE);
137 		}
138 	}
139 
140 	return;
141 }
142 
143 
144 static void
145 uncompress(lzma_stream *strm, FILE *file, const char *filename)
146 {
147 	lzma_ret ret;
148 
149 	// Initialize the decoder
150 #ifdef LZMADEC
151 	ret = lzma_alone_decoder(strm, UINT64_MAX);
152 #else
153 	ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
154 #endif
155 
156 	// The only reasonable error here is LZMA_MEM_ERROR.
157 	if (ret != LZMA_OK) {
158 		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
159 				: "Internal error (bug)");
160 		exit(EXIT_FAILURE);
161 	}
162 
163 	// Input and output buffers
164 	uint8_t in_buf[BUFSIZ];
165 	uint8_t out_buf[BUFSIZ];
166 
167 	strm->avail_in = 0;
168 	strm->next_out = out_buf;
169 	strm->avail_out = BUFSIZ;
170 
171 	lzma_action action = LZMA_RUN;
172 
173 	while (true) {
174 		if (strm->avail_in == 0) {
175 			strm->next_in = in_buf;
176 			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
177 
178 			if (ferror(file)) {
179 				// POSIX says that fread() sets errno if
180 				// an error occurred. ferror() doesn't
181 				// touch errno.
182 				my_errorf("%s: Error reading input file: %s",
183 						filename, strerror(errno));
184 				exit(EXIT_FAILURE);
185 			}
186 
187 #ifndef LZMADEC
188 			// When using LZMA_CONCATENATED, we need to tell
189 			// liblzma when it has got all the input.
190 			if (feof(file))
191 				action = LZMA_FINISH;
192 #endif
193 		}
194 
195 		ret = lzma_code(strm, action);
196 
197 		// Write and check write error before checking decoder error.
198 		// This way as much data as possible gets written to output
199 		// even if decoder detected an error.
200 		if (strm->avail_out == 0 || ret != LZMA_OK) {
201 			const size_t write_size = BUFSIZ - strm->avail_out;
202 
203 			if (fwrite(out_buf, 1, write_size, stdout)
204 					!= write_size) {
205 				// Wouldn't be a surprise if writing to stderr
206 				// would fail too but at least try to show an
207 				// error message.
208 				my_errorf("Cannot write to standard output: "
209 						"%s", strerror(errno));
210 				exit(EXIT_FAILURE);
211 			}
212 
213 			strm->next_out = out_buf;
214 			strm->avail_out = BUFSIZ;
215 		}
216 
217 		if (ret != LZMA_OK) {
218 			if (ret == LZMA_STREAM_END) {
219 #ifdef LZMADEC
220 				// Check that there's no trailing garbage.
221 				if (strm->avail_in != 0
222 						|| fread(in_buf, 1, 1, file)
223 							!= 0
224 						|| !feof(file))
225 					ret = LZMA_DATA_ERROR;
226 				else
227 					return;
228 #else
229 				// lzma_stream_decoder() already guarantees
230 				// that there's no trailing garbage.
231 				assert(strm->avail_in == 0);
232 				assert(action == LZMA_FINISH);
233 				assert(feof(file));
234 				return;
235 #endif
236 			}
237 
238 			const char *msg;
239 			switch (ret) {
240 			case LZMA_MEM_ERROR:
241 				msg = strerror(ENOMEM);
242 				break;
243 
244 			case LZMA_FORMAT_ERROR:
245 				msg = "File format not recognized";
246 				break;
247 
248 			case LZMA_OPTIONS_ERROR:
249 				// FIXME: Better message?
250 				msg = "Unsupported compression options";
251 				break;
252 
253 			case LZMA_DATA_ERROR:
254 				msg = "File is corrupt";
255 				break;
256 
257 			case LZMA_BUF_ERROR:
258 				msg = "Unexpected end of input";
259 				break;
260 
261 			default:
262 				msg = "Internal error (bug)";
263 				break;
264 			}
265 
266 			my_errorf("%s: %s", filename, msg);
267 			exit(EXIT_FAILURE);
268 		}
269 	}
270 }
271 
272 
273 int
274 main(int argc, char **argv)
275 {
276 	// Initialize progname which we will be used in error messages.
277 	tuklib_progname_init(argv);
278 
279 	// Parse the command line options.
280 	parse_options(argc, argv);
281 
282 	// The same lzma_stream is used for all files that we decode. This way
283 	// we don't need to reallocate memory for every file if they use same
284 	// compression settings.
285 	lzma_stream strm = LZMA_STREAM_INIT;
286 
287 	// Some systems require setting stdin and stdout to binary mode.
288 #ifdef TUKLIB_DOSLIKE
289 	setmode(fileno(stdin), O_BINARY);
290 	setmode(fileno(stdout), O_BINARY);
291 #endif
292 
293 	if (optind == argc) {
294 		// No filenames given, decode from stdin.
295 		uncompress(&strm, stdin, "(stdin)");
296 	} else {
297 		// Loop through the filenames given on the command line.
298 		do {
299 			// "-" indicates stdin.
300 			if (strcmp(argv[optind], "-") == 0) {
301 				uncompress(&strm, stdin, "(stdin)");
302 			} else {
303 				FILE *file = fopen(argv[optind], "rb");
304 				if (file == NULL) {
305 					my_errorf("%s: %s", argv[optind],
306 							strerror(errno));
307 					exit(EXIT_FAILURE);
308 				}
309 
310 				uncompress(&strm, file, argv[optind]);
311 				fclose(file);
312 			}
313 		} while (++optind < argc);
314 	}
315 
316 #ifndef NDEBUG
317 	// Free the memory only when debugging. Freeing wastes some time,
318 	// but allows detecting possible memory leaks with Valgrind.
319 	lzma_end(&strm);
320 #endif
321 
322 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
323 }
324