xref: /freebsd/contrib/xz/src/xzdec/xzdec.c (revision 7aa383846770374466b1dcb2cefd71bde9acf463)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       xzdec.c
4 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "sysdefs.h"
14 #include "lzma.h"
15 
16 #include <stdarg.h>
17 #include <errno.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 
21 #include "getopt.h"
22 #include "tuklib_progname.h"
23 #include "tuklib_exit.h"
24 
25 #ifdef TUKLIB_DOSLIKE
26 #	include <fcntl.h>
27 #	include <io.h>
28 #endif
29 
30 
31 #ifdef LZMADEC
32 #	define TOOL_FORMAT "lzma"
33 #else
34 #	define TOOL_FORMAT "xz"
35 #endif
36 
37 
38 /// Number of bytes to use memory at maximum
39 static uint64_t memlimit;
40 
41 /// Total amount of physical RAM
42 static uint64_t total_ram;
43 
44 /// Error messages are suppressed if this is zero, which is the case when
45 /// --quiet has been given at least twice.
46 static unsigned int display_errors = 2;
47 
48 
49 static void lzma_attribute((format(printf, 1, 2)))
50 my_errorf(const char *fmt, ...)
51 {
52 	va_list ap;
53 	va_start(ap, fmt);
54 
55 	if (display_errors) {
56 		fprintf(stderr, "%s: ", progname);
57 		vfprintf(stderr, fmt, ap);
58 		fprintf(stderr, "\n");
59 	}
60 
61 	va_end(ap);
62 	return;
63 }
64 
65 
66 static void lzma_attribute((noreturn))
67 help(void)
68 {
69 	// Round up to the next MiB and do it correctly also with UINT64_MAX.
70 	const uint64_t mem_mib = (memlimit >> 20)
71 			+ ((memlimit & ((UINT32_C(1) << 20) - 1)) != 0);
72 
73 	printf(
74 "Usage: %s [OPTION]... [FILE]...\n"
75 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
76 "\n"
77 "  -c, --stdout       (ignored)\n"
78 "  -d, --decompress   (ignored)\n"
79 "  -k, --keep         (ignored)\n"
80 "  -M, --memory=NUM   use NUM bytes of memory at maximum (0 means default)\n"
81 "  -q, --quiet        specify *twice* to suppress errors\n"
82 "  -Q, --no-warn      (ignored)\n"
83 "  -h, --help         display this help and exit\n"
84 "  -V, --version      display the version number and exit\n"
85 "\n"
86 "With no FILE, or when FILE is -, read standard input.\n"
87 "\n"
88 "On this system and configuration, this program will use a maximum of roughly\n"
89 "%" PRIu64 " MiB RAM.\n"
90 "\n"
91 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
92 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname, mem_mib);
93 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
94 }
95 
96 
97 static void lzma_attribute((noreturn))
98 version(void)
99 {
100 	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
101 			"liblzma %s\n", lzma_version_string());
102 
103 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
104 }
105 
106 
107 /// Find out the amount of physical memory (RAM) in the system, and set
108 /// the memory usage limit to the given percentage of RAM.
109 static void
110 memlimit_set_percentage(uint32_t percentage)
111 {
112 	memlimit = percentage * total_ram / 100;
113 	return;
114 }
115 
116 
117 /// Set the memory usage limit to give number of bytes. Zero is a special
118 /// value to indicate the default limit.
119 static void
120 memlimit_set(uint64_t new_memlimit)
121 {
122 	if (new_memlimit != 0) {
123 		memlimit = new_memlimit;
124 	} else {
125 		memlimit = 40 * total_ram / 100;
126 		if (memlimit < UINT64_C(80) * 1024 * 1024) {
127 			memlimit = 80 * total_ram / 100;
128 			if (memlimit > UINT64_C(80) * 1024 * 1024)
129 				memlimit = UINT64_C(80) * 1024 * 1024;
130 		}
131 	}
132 
133 	return;
134 }
135 
136 
137 /// Get the total amount of physical RAM and set the memory usage limit
138 /// to the default value.
139 static void
140 memlimit_init(void)
141 {
142 	// If we cannot determine the amount of RAM, use the assumption
143 	// defined by the configure script.
144 	total_ram = lzma_physmem();
145 	if (total_ram == 0)
146 		total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
147 
148 	memlimit_set(0);
149 	return;
150 }
151 
152 
153 /// \brief      Convert a string to uint64_t
154 ///
155 /// This is rudely copied from src/xz/util.c and modified a little. :-(
156 ///
157 /// \param      max     Return value when the string "max" was specified.
158 ///
159 static uint64_t
160 str_to_uint64(const char *value, uint64_t max)
161 {
162 	uint64_t result = 0;
163 
164 	// Accept special value "max".
165 	if (strcmp(value, "max") == 0)
166 		return max;
167 
168 	if (*value < '0' || *value > '9') {
169 		my_errorf("%s: Value is not a non-negative decimal integer",
170 				value);
171 		exit(EXIT_FAILURE);
172 	}
173 
174 	do {
175 		// Don't overflow.
176 		if (result > (UINT64_MAX - 9) / 10)
177 			return UINT64_MAX;
178 
179 		result *= 10;
180 		result += *value - '0';
181 		++value;
182 	} while (*value >= '0' && *value <= '9');
183 
184 	if (*value != '\0') {
185 		// Look for suffix.
186 		uint64_t multiplier = 0;
187 		if (*value == 'k' || *value == 'K')
188 			multiplier = UINT64_C(1) << 10;
189 		else if (*value == 'm' || *value == 'M')
190 			multiplier = UINT64_C(1) << 20;
191 		else if (*value == 'g' || *value == 'G')
192 			multiplier = UINT64_C(1) << 30;
193 
194 		++value;
195 
196 		// Allow also e.g. Ki, KiB, and KB.
197 		if (*value != '\0' && strcmp(value, "i") != 0
198 				&& strcmp(value, "iB") != 0
199 				&& strcmp(value, "B") != 0)
200 			multiplier = 0;
201 
202 		if (multiplier == 0) {
203 			my_errorf("%s: Invalid suffix", value - 1);
204 			exit(EXIT_FAILURE);
205 		}
206 
207 		// Don't overflow here either.
208 		if (result > UINT64_MAX / multiplier)
209 			result = UINT64_MAX;
210 		else
211 			result *= multiplier;
212 	}
213 
214 	return result;
215 }
216 
217 
218 /// Parses command line options.
219 static void
220 parse_options(int argc, char **argv)
221 {
222 	static const char short_opts[] = "cdkM:hqQV";
223 	static const struct option long_opts[] = {
224 		{ "stdout",       no_argument,         NULL, 'c' },
225 		{ "to-stdout",    no_argument,         NULL, 'c' },
226 		{ "decompress",   no_argument,         NULL, 'd' },
227 		{ "uncompress",   no_argument,         NULL, 'd' },
228 		{ "keep",         no_argument,         NULL, 'k' },
229 		{ "memory",       required_argument,   NULL, 'M' },
230 		{ "quiet",        no_argument,         NULL, 'q' },
231 		{ "no-warn",      no_argument,         NULL, 'Q' },
232 		{ "help",         no_argument,         NULL, 'h' },
233 		{ "version",      no_argument,         NULL, 'V' },
234 		{ NULL,           0,                   NULL, 0   }
235 	};
236 
237 	int c;
238 
239 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
240 			!= -1) {
241 		switch (c) {
242 		case 'c':
243 		case 'd':
244 		case 'k':
245 		case 'Q':
246 			break;
247 
248 		case 'M': {
249 			// Support specifying the limit as a percentage of
250 			// installed physical RAM.
251 			const size_t len = strlen(optarg);
252 			if (len > 0 && optarg[len - 1] == '%') {
253 				// Memory limit is a percentage of total
254 				// installed RAM.
255 				optarg[len - 1] = '\0';
256 				const uint64_t percentage
257 						= str_to_uint64(optarg, 100);
258 				if (percentage < 1 || percentage > 100) {
259 					my_errorf("Percentage must be in "
260 							"the range [1, 100]");
261 					exit(EXIT_FAILURE);
262 				}
263 
264 				memlimit_set_percentage(percentage);
265 			} else {
266 				memlimit_set(str_to_uint64(
267 						optarg, UINT64_MAX));
268 			}
269 
270 			break;
271 		}
272 
273 		case 'q':
274 			if (display_errors > 0)
275 				--display_errors;
276 
277 			break;
278 
279 		case 'h':
280 			help();
281 
282 		case 'V':
283 			version();
284 
285 		default:
286 			exit(EXIT_FAILURE);
287 		}
288 	}
289 
290 	return;
291 }
292 
293 
294 static void
295 uncompress(lzma_stream *strm, FILE *file, const char *filename)
296 {
297 	lzma_ret ret;
298 
299 	// Initialize the decoder
300 #ifdef LZMADEC
301 	ret = lzma_alone_decoder(strm, memlimit);
302 #else
303 	ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
304 #endif
305 
306 	// The only reasonable error here is LZMA_MEM_ERROR.
307 	// FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
308 	if (ret != LZMA_OK) {
309 		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
310 				: "Internal error (bug)");
311 		exit(EXIT_FAILURE);
312 	}
313 
314 	// Input and output buffers
315 	uint8_t in_buf[BUFSIZ];
316 	uint8_t out_buf[BUFSIZ];
317 
318 	strm->avail_in = 0;
319 	strm->next_out = out_buf;
320 	strm->avail_out = BUFSIZ;
321 
322 	lzma_action action = LZMA_RUN;
323 
324 	while (true) {
325 		if (strm->avail_in == 0) {
326 			strm->next_in = in_buf;
327 			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
328 
329 			if (ferror(file)) {
330 				// POSIX says that fread() sets errno if
331 				// an error occurred. ferror() doesn't
332 				// touch errno.
333 				my_errorf("%s: Error reading input file: %s",
334 						filename, strerror(errno));
335 				exit(EXIT_FAILURE);
336 			}
337 
338 #ifndef LZMADEC
339 			// When using LZMA_CONCATENATED, we need to tell
340 			// liblzma when it has got all the input.
341 			if (feof(file))
342 				action = LZMA_FINISH;
343 #endif
344 		}
345 
346 		ret = lzma_code(strm, action);
347 
348 		// Write and check write error before checking decoder error.
349 		// This way as much data as possible gets written to output
350 		// even if decoder detected an error.
351 		if (strm->avail_out == 0 || ret != LZMA_OK) {
352 			const size_t write_size = BUFSIZ - strm->avail_out;
353 
354 			if (fwrite(out_buf, 1, write_size, stdout)
355 					!= write_size) {
356 				// Wouldn't be a surprise if writing to stderr
357 				// would fail too but at least try to show an
358 				// error message.
359 				my_errorf("Cannot write to standard output: "
360 						"%s", strerror(errno));
361 				exit(EXIT_FAILURE);
362 			}
363 
364 			strm->next_out = out_buf;
365 			strm->avail_out = BUFSIZ;
366 		}
367 
368 		if (ret != LZMA_OK) {
369 			if (ret == LZMA_STREAM_END) {
370 #ifdef LZMADEC
371 				// Check that there's no trailing garbage.
372 				if (strm->avail_in != 0
373 						|| fread(in_buf, 1, 1, file)
374 							!= 0
375 						|| !feof(file))
376 					ret = LZMA_DATA_ERROR;
377 				else
378 					return;
379 #else
380 				// lzma_stream_decoder() already guarantees
381 				// that there's no trailing garbage.
382 				assert(strm->avail_in == 0);
383 				assert(action == LZMA_FINISH);
384 				assert(feof(file));
385 				return;
386 #endif
387 			}
388 
389 			const char *msg;
390 			switch (ret) {
391 			case LZMA_MEM_ERROR:
392 				msg = strerror(ENOMEM);
393 				break;
394 
395 			case LZMA_MEMLIMIT_ERROR:
396 				msg = "Memory usage limit reached";
397 				break;
398 
399 			case LZMA_FORMAT_ERROR:
400 				msg = "File format not recognized";
401 				break;
402 
403 			case LZMA_OPTIONS_ERROR:
404 				// FIXME: Better message?
405 				msg = "Unsupported compression options";
406 				break;
407 
408 			case LZMA_DATA_ERROR:
409 				msg = "File is corrupt";
410 				break;
411 
412 			case LZMA_BUF_ERROR:
413 				msg = "Unexpected end of input";
414 				break;
415 
416 			default:
417 				msg = "Internal error (bug)";
418 				break;
419 			}
420 
421 			my_errorf("%s: %s", filename, msg);
422 			exit(EXIT_FAILURE);
423 		}
424 	}
425 }
426 
427 
428 int
429 main(int argc, char **argv)
430 {
431 	// Initialize progname which we will be used in error messages.
432 	tuklib_progname_init(argv);
433 
434 	// Set the default memory usage limit. This is needed before parsing
435 	// the command line arguments.
436 	memlimit_init();
437 
438 	// Parse the command line options.
439 	parse_options(argc, argv);
440 
441 	// The same lzma_stream is used for all files that we decode. This way
442 	// we don't need to reallocate memory for every file if they use same
443 	// compression settings.
444 	lzma_stream strm = LZMA_STREAM_INIT;
445 
446 	// Some systems require setting stdin and stdout to binary mode.
447 #ifdef TUKLIB_DOSLIKE
448 	setmode(fileno(stdin), O_BINARY);
449 	setmode(fileno(stdout), O_BINARY);
450 #endif
451 
452 	if (optind == argc) {
453 		// No filenames given, decode from stdin.
454 		uncompress(&strm, stdin, "(stdin)");
455 	} else {
456 		// Loop through the filenames given on the command line.
457 		do {
458 			// "-" indicates stdin.
459 			if (strcmp(argv[optind], "-") == 0) {
460 				uncompress(&strm, stdin, "(stdin)");
461 			} else {
462 				FILE *file = fopen(argv[optind], "rb");
463 				if (file == NULL) {
464 					my_errorf("%s: %s", argv[optind],
465 							strerror(errno));
466 					exit(EXIT_FAILURE);
467 				}
468 
469 				uncompress(&strm, file, argv[optind]);
470 				fclose(file);
471 			}
472 		} while (++optind < argc);
473 	}
474 
475 #ifndef NDEBUG
476 	// Free the memory only when debugging. Freeing wastes some time,
477 	// but allows detecting possible memory leaks with Valgrind.
478 	lzma_end(&strm);
479 #endif
480 
481 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
482 }
483