xref: /freebsd/contrib/xz/src/xzdec/xzdec.c (revision 128836d304d93f2d00eb14069c27089ab46c38d4)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       xzdec.c
6 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "sysdefs.h"
13 #include "lzma.h"
14 
15 #include <stdarg.h>
16 #include <errno.h>
17 #include <locale.h>
18 #include <stdio.h>
19 
20 #ifndef _MSC_VER
21 #	include <unistd.h>
22 #endif
23 
24 #ifdef HAVE_CAP_RIGHTS_LIMIT
25 #	include <sys/capsicum.h>
26 #endif
27 
28 #ifdef HAVE_LINUX_LANDLOCK
29 #	include "my_landlock.h"
30 #endif
31 
32 #if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \
33 		|| defined(HAVE_LINUX_LANDLOCK)
34 #	define ENABLE_SANDBOX 1
35 #endif
36 
37 #include "getopt.h"
38 #include "tuklib_progname.h"
39 #include "tuklib_mbstr_nonprint.h"
40 #include "tuklib_exit.h"
41 
42 #ifdef TUKLIB_DOSLIKE
43 #	include <fcntl.h>
44 #	include <io.h>
45 #	ifdef _MSC_VER
46 #		define fileno _fileno
47 #		define setmode _setmode
48 #	endif
49 #endif
50 
51 
52 #ifdef LZMADEC
53 #	define TOOL_FORMAT "lzma"
54 #else
55 #	define TOOL_FORMAT "xz"
56 #endif
57 
58 
59 /// Error messages are suppressed if this is zero, which is the case when
60 /// --quiet has been given at least twice.
61 static int display_errors = 2;
62 
63 
64 lzma_attribute((__format__(__printf__, 1, 2)))
65 static void
my_errorf(const char * fmt,...)66 my_errorf(const char *fmt, ...)
67 {
68 	va_list ap;
69 	va_start(ap, fmt);
70 
71 	if (display_errors) {
72 		fprintf(stderr, "%s: ", progname);
73 		vfprintf(stderr, fmt, ap);
74 		fprintf(stderr, "\n");
75 	}
76 
77 	va_end(ap);
78 	return;
79 }
80 
81 
82 tuklib_attr_noreturn
83 static void
help(void)84 help(void)
85 {
86 	printf(
87 "Usage: %s [OPTION]... [FILE]...\n"
88 "Decompress files in the ." TOOL_FORMAT " format to standard output.\n"
89 "\n"
90 "  -d, --decompress   (ignored, only decompression is supported)\n"
91 "  -k, --keep         (ignored, files are never deleted)\n"
92 "  -c, --stdout       (ignored, output is always written to standard output)\n"
93 "  -q, --quiet        specify *twice* to suppress errors\n"
94 "  -Q, --no-warn      (ignored, the exit status 2 is never used)\n"
95 "  -h, --help         display this help and exit\n"
96 "  -V, --version      display the version number and exit\n"
97 "\n"
98 "With no FILE, or when FILE is -, read standard input.\n"
99 "\n"
100 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
101 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
102 
103 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
104 }
105 
106 
107 tuklib_attr_noreturn
108 static void
version(void)109 version(void)
110 {
111 	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
112 			"liblzma %s\n", lzma_version_string());
113 
114 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
115 }
116 
117 
118 /// Parses command line options.
119 static void
parse_options(int argc,char ** argv)120 parse_options(int argc, char **argv)
121 {
122 	static const char short_opts[] = "cdkhqQV";
123 	static const struct option long_opts[] = {
124 		{ "stdout",       no_argument,         NULL, 'c' },
125 		{ "to-stdout",    no_argument,         NULL, 'c' },
126 		{ "decompress",   no_argument,         NULL, 'd' },
127 		{ "uncompress",   no_argument,         NULL, 'd' },
128 		{ "keep",         no_argument,         NULL, 'k' },
129 		{ "quiet",        no_argument,         NULL, 'q' },
130 		{ "no-warn",      no_argument,         NULL, 'Q' },
131 		{ "help",         no_argument,         NULL, 'h' },
132 		{ "version",      no_argument,         NULL, 'V' },
133 		{ NULL,           0,                   NULL, 0   }
134 	};
135 
136 	int c;
137 
138 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
139 			!= -1) {
140 		switch (c) {
141 		case 'c':
142 		case 'd':
143 		case 'k':
144 		case 'Q':
145 			break;
146 
147 		case 'q':
148 			if (display_errors > 0)
149 				--display_errors;
150 
151 			break;
152 
153 		case 'h':
154 			help();
155 
156 		case 'V':
157 			version();
158 
159 		default:
160 			exit(EXIT_FAILURE);
161 		}
162 	}
163 
164 	return;
165 }
166 
167 
168 static void
uncompress(lzma_stream * strm,FILE * file,const char * filename)169 uncompress(lzma_stream *strm, FILE *file, const char *filename)
170 {
171 	lzma_ret ret;
172 
173 	// Initialize the decoder
174 #ifdef LZMADEC
175 	ret = lzma_alone_decoder(strm, UINT64_MAX);
176 #else
177 	ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
178 #endif
179 
180 	// The only reasonable error here is LZMA_MEM_ERROR.
181 	if (ret != LZMA_OK) {
182 		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
183 				: "Internal error (bug)");
184 		exit(EXIT_FAILURE);
185 	}
186 
187 	// Input and output buffers
188 	uint8_t in_buf[BUFSIZ];
189 	uint8_t out_buf[BUFSIZ];
190 
191 	strm->avail_in = 0;
192 	strm->next_out = out_buf;
193 	strm->avail_out = BUFSIZ;
194 
195 	lzma_action action = LZMA_RUN;
196 
197 	while (true) {
198 		if (strm->avail_in == 0) {
199 			strm->next_in = in_buf;
200 			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
201 
202 			if (ferror(file)) {
203 				// POSIX says that fread() sets errno if
204 				// an error occurred. ferror() doesn't
205 				// touch errno.
206 				my_errorf("%s: Error reading input file: %s",
207 					tuklib_mask_nonprint(filename),
208 					strerror(errno));
209 				exit(EXIT_FAILURE);
210 			}
211 
212 #ifndef LZMADEC
213 			// When using LZMA_CONCATENATED, we need to tell
214 			// liblzma when it has got all the input.
215 			if (feof(file))
216 				action = LZMA_FINISH;
217 #endif
218 		}
219 
220 		ret = lzma_code(strm, action);
221 
222 		// Write and check write error before checking decoder error.
223 		// This way as much data as possible gets written to output
224 		// even if decoder detected an error.
225 		if (strm->avail_out == 0 || ret != LZMA_OK) {
226 			const size_t write_size = BUFSIZ - strm->avail_out;
227 
228 			if (fwrite(out_buf, 1, write_size, stdout)
229 					!= write_size) {
230 				// Wouldn't be a surprise if writing to stderr
231 				// would fail too but at least try to show an
232 				// error message.
233 #if defined(_WIN32) && !defined(__CYGWIN__)
234 				// On native Windows, broken pipe is reported
235 				// as EINVAL. Don't show an error message
236 				// in this case.
237 				if (errno != EINVAL)
238 #endif
239 				{
240 					my_errorf("Cannot write to "
241 						"standard output: "
242 						"%s", strerror(errno));
243 				}
244 				exit(EXIT_FAILURE);
245 			}
246 
247 			strm->next_out = out_buf;
248 			strm->avail_out = BUFSIZ;
249 		}
250 
251 		if (ret != LZMA_OK) {
252 			if (ret == LZMA_STREAM_END) {
253 #ifdef LZMADEC
254 				// Check that there's no trailing garbage.
255 				if (strm->avail_in != 0
256 						|| fread(in_buf, 1, 1, file)
257 							!= 0
258 						|| !feof(file))
259 					ret = LZMA_DATA_ERROR;
260 				else
261 					return;
262 #else
263 				// lzma_stream_decoder() already guarantees
264 				// that there's no trailing garbage.
265 				assert(strm->avail_in == 0);
266 				assert(action == LZMA_FINISH);
267 				assert(feof(file));
268 				return;
269 #endif
270 			}
271 
272 			const char *msg;
273 			switch (ret) {
274 			case LZMA_MEM_ERROR:
275 				msg = strerror(ENOMEM);
276 				break;
277 
278 			case LZMA_FORMAT_ERROR:
279 				msg = "File format not recognized";
280 				break;
281 
282 			case LZMA_OPTIONS_ERROR:
283 				// FIXME: Better message?
284 				msg = "Unsupported compression options";
285 				break;
286 
287 			case LZMA_DATA_ERROR:
288 				msg = "File is corrupt";
289 				break;
290 
291 			case LZMA_BUF_ERROR:
292 				msg = "Unexpected end of input";
293 				break;
294 
295 			default:
296 				msg = "Internal error (bug)";
297 				break;
298 			}
299 
300 			my_errorf("%s: %s", tuklib_mask_nonprint(filename),
301 					msg);
302 			exit(EXIT_FAILURE);
303 		}
304 	}
305 }
306 
307 
308 #ifdef ENABLE_SANDBOX
309 static void
sandbox_enter(int src_fd)310 sandbox_enter(int src_fd)
311 {
312 #if defined(HAVE_CAP_RIGHTS_LIMIT)
313 	// Capsicum needs FreeBSD 10.2 or later.
314 	cap_rights_t rights;
315 
316 	if (cap_enter())
317 		goto error;
318 
319 	if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_READ)))
320 		goto error;
321 
322 	// If not reading from stdin, remove all capabilities from it.
323 	if (src_fd != STDIN_FILENO && cap_rights_limit(
324 			STDIN_FILENO, cap_rights_clear(&rights)))
325 		goto error;
326 
327 	if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
328 			CAP_WRITE)))
329 		goto error;
330 
331 	if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights,
332 			CAP_WRITE)))
333 		goto error;
334 
335 #elif defined(HAVE_PLEDGE)
336 	// pledge() was introduced in OpenBSD 5.9.
337 	if (pledge("stdio", ""))
338 		goto error;
339 
340 	(void)src_fd;
341 
342 #elif defined(HAVE_LINUX_LANDLOCK)
343 	struct landlock_ruleset_attr attr;
344 	if (my_landlock_ruleset_attr_forbid_all(&attr) > 0) {
345 		const int ruleset_fd = my_landlock_create_ruleset(
346 				&attr, sizeof(attr), 0);
347 		if (ruleset_fd < 0)
348 			goto error;
349 
350 		// All files we need should have already been opened. Thus,
351 		// we don't need to add any rules using landlock_add_rule(2)
352 		// before activating the sandbox.
353 		if (my_landlock_restrict_self(ruleset_fd, 0) != 0)
354 			goto error;
355 
356 		(void)close(ruleset_fd);
357 	}
358 
359 	(void)src_fd;
360 
361 #else
362 #	error ENABLE_SANDBOX is defined but no sandboxing method was found.
363 #endif
364 
365 	return;
366 
367 error:
368 #ifdef HAVE_CAP_RIGHTS_LIMIT
369 	// If a kernel is configured without capability mode support or
370 	// used in an emulator that does not implement the capability
371 	// system calls, then the Capsicum system calls will fail and set
372 	// errno to ENOSYS. In that case xzdec will silently run without
373 	// the sandbox.
374 	if (errno == ENOSYS)
375 		return;
376 #endif
377 
378 	my_errorf("Failed to enable the sandbox");
379 	exit(EXIT_FAILURE);
380 }
381 #endif
382 
383 
384 int
main(int argc,char ** argv)385 main(int argc, char **argv)
386 {
387 	// Initialize progname which will be used in error messages.
388 	tuklib_progname_init(argv);
389 
390 #ifdef HAVE_PLEDGE
391 	// OpenBSD's pledge(2) sandbox.
392 	// Initially enable the sandbox slightly more relaxed so that
393 	// the process can still open files. This allows the sandbox to
394 	// be enabled when parsing command line arguments and decompressing
395 	// all files (the more strict sandbox only restricts the last file
396 	// that is decompressed).
397 	if (pledge("stdio rpath", "")) {
398 		my_errorf("Failed to enable the sandbox");
399 		exit(EXIT_FAILURE);
400 	}
401 #endif
402 
403 #ifdef HAVE_LINUX_LANDLOCK
404 	// Prevent the process from gaining new privileges. This must be done
405 	// before landlock_restrict_self(2) but since we will never need new
406 	// privileges, this call can be done here already.
407 	//
408 	// This is supported since Linux 3.5. Ignore the return value to
409 	// keep compatibility with old kernels. landlock_restrict_self(2)
410 	// will fail if the no_new_privs attribute isn't set, thus if prctl()
411 	// fails here the error will still be detected when it matters.
412 	(void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
413 #endif
414 
415 	// We need to set the locale even though we don't have any
416 	// translated messages:
417 	//
418 	//   - tuklib_mask_nonprint() has locale-specific behavior (LC_CTYPE).
419 	//
420 	//   - This is needed on Windows to make non-ASCII filenames display
421 	//     properly when the active code page has been set to UTF-8
422 	//     in the application manifest.
423 	setlocale(LC_ALL, "");
424 
425 	// Parse the command line options.
426 	parse_options(argc, argv);
427 
428 	// The same lzma_stream is used for all files that we decode. This way
429 	// we don't need to reallocate memory for every file if they use same
430 	// compression settings.
431 	lzma_stream strm = LZMA_STREAM_INIT;
432 
433 	// Some systems require setting stdin and stdout to binary mode.
434 #ifdef TUKLIB_DOSLIKE
435 	setmode(fileno(stdin), O_BINARY);
436 	setmode(fileno(stdout), O_BINARY);
437 #endif
438 
439 	if (optind == argc) {
440 		// No filenames given, decode from stdin.
441 #ifdef ENABLE_SANDBOX
442 		sandbox_enter(STDIN_FILENO);
443 #endif
444 		uncompress(&strm, stdin, "(stdin)");
445 	} else {
446 		// Loop through the filenames given on the command line.
447 		do {
448 			FILE *src_file;
449 			const char *src_name;
450 
451 			// "-" indicates stdin.
452 			if (strcmp(argv[optind], "-") == 0) {
453 				src_file = stdin;
454 				src_name = "(stdin)";
455 			} else {
456 				src_name = argv[optind];
457 				src_file = fopen(src_name, "rb");
458 				if (src_file == NULL) {
459 					my_errorf("%s: %s",
460 						tuklib_mask_nonprint(
461 							src_name),
462 						strerror(errno));
463 					exit(EXIT_FAILURE);
464 				}
465 			}
466 #ifdef ENABLE_SANDBOX
467 			// Enable the strict sandbox for the last file.
468 			// Then the process can no longer open additional
469 			// files. The typical xzdec use case is to decompress
470 			// a single file so this way the strictest sandboxing
471 			// is used in most cases.
472 			if (optind == argc - 1)
473 				sandbox_enter(fileno(src_file));
474 #endif
475 			uncompress(&strm, src_file, src_name);
476 
477 			if (src_file != stdin)
478 				(void)fclose(src_file);
479 		} while (++optind < argc);
480 	}
481 
482 #ifndef NDEBUG
483 	// Free the memory only when debugging. Freeing wastes some time,
484 	// but allows detecting possible memory leaks with Valgrind.
485 	lzma_end(&strm);
486 #endif
487 
488 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
489 }
490