xref: /freebsd/contrib/xz/src/xzdec/xzdec.c (revision 9cbf1de7e34a6fced041388fad5d9180cb7705fe)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       xzdec.c
6 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "sysdefs.h"
13 #include "lzma.h"
14 
15 #include <stdarg.h>
16 #include <errno.h>
17 #include <stdio.h>
18 
19 #ifndef _MSC_VER
20 #	include <unistd.h>
21 #endif
22 
23 #ifdef HAVE_CAP_RIGHTS_LIMIT
24 #	include <sys/capsicum.h>
25 #endif
26 
27 #ifdef HAVE_LINUX_LANDLOCK
28 #	include <linux/landlock.h>
29 #	include <sys/prctl.h>
30 #	include <sys/syscall.h>
31 #	ifdef LANDLOCK_ACCESS_NET_BIND_TCP
32 #		define LANDLOCK_ABI_MAX 4
33 #	else
34 #		define LANDLOCK_ABI_MAX 3
35 #	endif
36 #endif
37 
38 #if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \
39 		|| defined(HAVE_LINUX_LANDLOCK)
40 #	define ENABLE_SANDBOX 1
41 #endif
42 
43 #include "getopt.h"
44 #include "tuklib_progname.h"
45 #include "tuklib_exit.h"
46 
47 #ifdef TUKLIB_DOSLIKE
48 #	include <fcntl.h>
49 #	include <io.h>
50 #	ifdef _MSC_VER
51 #		define fileno _fileno
52 #		define setmode _setmode
53 #	endif
54 #endif
55 
56 
57 #ifdef LZMADEC
58 #	define TOOL_FORMAT "lzma"
59 #else
60 #	define TOOL_FORMAT "xz"
61 #endif
62 
63 
64 /// Error messages are suppressed if this is zero, which is the case when
65 /// --quiet has been given at least twice.
66 static int display_errors = 2;
67 
68 
69 lzma_attribute((__format__(__printf__, 1, 2)))
70 static void
71 my_errorf(const char *fmt, ...)
72 {
73 	va_list ap;
74 	va_start(ap, fmt);
75 
76 	if (display_errors) {
77 		fprintf(stderr, "%s: ", progname);
78 		vfprintf(stderr, fmt, ap);
79 		fprintf(stderr, "\n");
80 	}
81 
82 	va_end(ap);
83 	return;
84 }
85 
86 
87 tuklib_attr_noreturn
88 static void
89 help(void)
90 {
91 	printf(
92 "Usage: %s [OPTION]... [FILE]...\n"
93 "Decompress files in the ." TOOL_FORMAT " format to standard output.\n"
94 "\n"
95 "  -d, --decompress   (ignored, only decompression is supported)\n"
96 "  -k, --keep         (ignored, files are never deleted)\n"
97 "  -c, --stdout       (ignored, output is always written to standard output)\n"
98 "  -q, --quiet        specify *twice* to suppress errors\n"
99 "  -Q, --no-warn      (ignored, the exit status 2 is never used)\n"
100 "  -h, --help         display this help and exit\n"
101 "  -V, --version      display the version number and exit\n"
102 "\n"
103 "With no FILE, or when FILE is -, read standard input.\n"
104 "\n"
105 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
106 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
107 
108 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
109 }
110 
111 
112 tuklib_attr_noreturn
113 static void
114 version(void)
115 {
116 	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
117 			"liblzma %s\n", lzma_version_string());
118 
119 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
120 }
121 
122 
123 /// Parses command line options.
124 static void
125 parse_options(int argc, char **argv)
126 {
127 	static const char short_opts[] = "cdkM:hqQV";
128 	static const struct option long_opts[] = {
129 		{ "stdout",       no_argument,         NULL, 'c' },
130 		{ "to-stdout",    no_argument,         NULL, 'c' },
131 		{ "decompress",   no_argument,         NULL, 'd' },
132 		{ "uncompress",   no_argument,         NULL, 'd' },
133 		{ "keep",         no_argument,         NULL, 'k' },
134 		{ "quiet",        no_argument,         NULL, 'q' },
135 		{ "no-warn",      no_argument,         NULL, 'Q' },
136 		{ "help",         no_argument,         NULL, 'h' },
137 		{ "version",      no_argument,         NULL, 'V' },
138 		{ NULL,           0,                   NULL, 0   }
139 	};
140 
141 	int c;
142 
143 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
144 			!= -1) {
145 		switch (c) {
146 		case 'c':
147 		case 'd':
148 		case 'k':
149 		case 'Q':
150 			break;
151 
152 		case 'q':
153 			if (display_errors > 0)
154 				--display_errors;
155 
156 			break;
157 
158 		case 'h':
159 			help();
160 
161 		case 'V':
162 			version();
163 
164 		default:
165 			exit(EXIT_FAILURE);
166 		}
167 	}
168 
169 	return;
170 }
171 
172 
173 static void
174 uncompress(lzma_stream *strm, FILE *file, const char *filename)
175 {
176 	lzma_ret ret;
177 
178 	// Initialize the decoder
179 #ifdef LZMADEC
180 	ret = lzma_alone_decoder(strm, UINT64_MAX);
181 #else
182 	ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
183 #endif
184 
185 	// The only reasonable error here is LZMA_MEM_ERROR.
186 	if (ret != LZMA_OK) {
187 		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
188 				: "Internal error (bug)");
189 		exit(EXIT_FAILURE);
190 	}
191 
192 	// Input and output buffers
193 	uint8_t in_buf[BUFSIZ];
194 	uint8_t out_buf[BUFSIZ];
195 
196 	strm->avail_in = 0;
197 	strm->next_out = out_buf;
198 	strm->avail_out = BUFSIZ;
199 
200 	lzma_action action = LZMA_RUN;
201 
202 	while (true) {
203 		if (strm->avail_in == 0) {
204 			strm->next_in = in_buf;
205 			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
206 
207 			if (ferror(file)) {
208 				// POSIX says that fread() sets errno if
209 				// an error occurred. ferror() doesn't
210 				// touch errno.
211 				my_errorf("%s: Error reading input file: %s",
212 						filename, strerror(errno));
213 				exit(EXIT_FAILURE);
214 			}
215 
216 #ifndef LZMADEC
217 			// When using LZMA_CONCATENATED, we need to tell
218 			// liblzma when it has got all the input.
219 			if (feof(file))
220 				action = LZMA_FINISH;
221 #endif
222 		}
223 
224 		ret = lzma_code(strm, action);
225 
226 		// Write and check write error before checking decoder error.
227 		// This way as much data as possible gets written to output
228 		// even if decoder detected an error.
229 		if (strm->avail_out == 0 || ret != LZMA_OK) {
230 			const size_t write_size = BUFSIZ - strm->avail_out;
231 
232 			if (fwrite(out_buf, 1, write_size, stdout)
233 					!= write_size) {
234 				// Wouldn't be a surprise if writing to stderr
235 				// would fail too but at least try to show an
236 				// error message.
237 				my_errorf("Cannot write to standard output: "
238 						"%s", strerror(errno));
239 				exit(EXIT_FAILURE);
240 			}
241 
242 			strm->next_out = out_buf;
243 			strm->avail_out = BUFSIZ;
244 		}
245 
246 		if (ret != LZMA_OK) {
247 			if (ret == LZMA_STREAM_END) {
248 #ifdef LZMADEC
249 				// Check that there's no trailing garbage.
250 				if (strm->avail_in != 0
251 						|| fread(in_buf, 1, 1, file)
252 							!= 0
253 						|| !feof(file))
254 					ret = LZMA_DATA_ERROR;
255 				else
256 					return;
257 #else
258 				// lzma_stream_decoder() already guarantees
259 				// that there's no trailing garbage.
260 				assert(strm->avail_in == 0);
261 				assert(action == LZMA_FINISH);
262 				assert(feof(file));
263 				return;
264 #endif
265 			}
266 
267 			const char *msg;
268 			switch (ret) {
269 			case LZMA_MEM_ERROR:
270 				msg = strerror(ENOMEM);
271 				break;
272 
273 			case LZMA_FORMAT_ERROR:
274 				msg = "File format not recognized";
275 				break;
276 
277 			case LZMA_OPTIONS_ERROR:
278 				// FIXME: Better message?
279 				msg = "Unsupported compression options";
280 				break;
281 
282 			case LZMA_DATA_ERROR:
283 				msg = "File is corrupt";
284 				break;
285 
286 			case LZMA_BUF_ERROR:
287 				msg = "Unexpected end of input";
288 				break;
289 
290 			default:
291 				msg = "Internal error (bug)";
292 				break;
293 			}
294 
295 			my_errorf("%s: %s", filename, msg);
296 			exit(EXIT_FAILURE);
297 		}
298 	}
299 }
300 
301 
302 #ifdef ENABLE_SANDBOX
303 static void
304 sandbox_enter(int src_fd)
305 {
306 #if defined(HAVE_CAP_RIGHTS_LIMIT)
307 	// Capsicum needs FreeBSD 10.2 or later.
308 	cap_rights_t rights;
309 
310 	if (cap_enter())
311 		goto error;
312 
313 	if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_READ)))
314 		goto error;
315 
316 	// If not reading from stdin, remove all capabilities from it.
317 	if (src_fd != STDIN_FILENO && cap_rights_limit(
318 			STDIN_FILENO, cap_rights_clear(&rights)))
319 		goto error;
320 
321 	if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
322 			CAP_WRITE)))
323 		goto error;
324 
325 	if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights,
326 			CAP_WRITE)))
327 		goto error;
328 
329 #elif defined(HAVE_PLEDGE)
330 	// pledge() was introduced in OpenBSD 5.9.
331 	if (pledge("stdio", ""))
332 		goto error;
333 
334 	(void)src_fd;
335 
336 #elif defined(HAVE_LINUX_LANDLOCK)
337 	int landlock_abi = syscall(SYS_landlock_create_ruleset,
338 			(void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
339 
340 	if (landlock_abi > 0) {
341 		if (landlock_abi > LANDLOCK_ABI_MAX)
342 			landlock_abi = LANDLOCK_ABI_MAX;
343 
344 		const struct landlock_ruleset_attr attr = {
345 			.handled_access_fs = (1ULL
346 				<< (12 + my_min(3, landlock_abi))) - 1,
347 #	if LANDLOCK_ABI_MAX >= 4
348 			.handled_access_net = landlock_abi < 4 ? 0 :
349 				(LANDLOCK_ACCESS_NET_BIND_TCP
350 				| LANDLOCK_ACCESS_NET_CONNECT_TCP),
351 #	endif
352 		};
353 
354 		const int ruleset_fd = syscall(SYS_landlock_create_ruleset,
355 				&attr, sizeof(attr), 0U);
356 		if (ruleset_fd < 0)
357 			goto error;
358 
359 		// All files we need should have already been opened. Thus,
360 		// we don't need to add any rules using landlock_add_rule(2)
361 		// before activating the sandbox.
362 		if (syscall(SYS_landlock_restrict_self, ruleset_fd, 0U) != 0)
363 			goto error;
364 	}
365 
366 	(void)src_fd;
367 
368 #else
369 #	error ENABLE_SANDBOX is defined but no sandboxing method was found.
370 #endif
371 
372 	return;
373 
374 error:
375 #ifdef HAVE_CAP_RIGHTS_LIMIT
376 	// If a kernel is configured without capability mode support or
377 	// used in an emulator that does not implement the capability
378 	// system calls, then the Capsicum system calls will fail and set
379 	// errno to ENOSYS. In that case xzdec will silently run without
380 	// the sandbox.
381 	if (errno == ENOSYS)
382 		return;
383 #endif
384 
385 	my_errorf("Failed to enable the sandbox");
386 	exit(EXIT_FAILURE);
387 }
388 #endif
389 
390 
391 int
392 main(int argc, char **argv)
393 {
394 #ifdef HAVE_PLEDGE
395 	// OpenBSD's pledge(2) sandbox.
396 	// Initially enable the sandbox slightly more relaxed so that
397 	// the process can still open files. This allows the sandbox to
398 	// be enabled when parsing command line arguments and decompressing
399 	// all files (the more strict sandbox only restricts the last file
400 	// that is decompressed).
401 	if (pledge("stdio rpath", "")) {
402 		my_errorf("Failed to enable the sandbox");
403 		exit(EXIT_FAILURE);
404 	}
405 #endif
406 
407 #ifdef HAVE_LINUX_LANDLOCK
408 	// Prevent the process from gaining new privileges. This must be done
409 	// before landlock_restrict_self(2) but since we will never need new
410 	// privileges, this call can be done here already.
411 	//
412 	// This is supported since Linux 3.5. Ignore the return value to
413 	// keep compatibility with old kernels. landlock_restrict_self(2)
414 	// will fail if the no_new_privs attribute isn't set, thus if prctl()
415 	// fails here the error will still be detected when it matters.
416 	(void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
417 #endif
418 
419 	// Initialize progname which we will be used in error messages.
420 	tuklib_progname_init(argv);
421 
422 	// Parse the command line options.
423 	parse_options(argc, argv);
424 
425 	// The same lzma_stream is used for all files that we decode. This way
426 	// we don't need to reallocate memory for every file if they use same
427 	// compression settings.
428 	lzma_stream strm = LZMA_STREAM_INIT;
429 
430 	// Some systems require setting stdin and stdout to binary mode.
431 #ifdef TUKLIB_DOSLIKE
432 	setmode(fileno(stdin), O_BINARY);
433 	setmode(fileno(stdout), O_BINARY);
434 #endif
435 
436 	if (optind == argc) {
437 		// No filenames given, decode from stdin.
438 #ifdef ENABLE_SANDBOX
439 		sandbox_enter(STDIN_FILENO);
440 #endif
441 		uncompress(&strm, stdin, "(stdin)");
442 	} else {
443 		// Loop through the filenames given on the command line.
444 		do {
445 			FILE *src_file;
446 			const char *src_name;
447 
448 			// "-" indicates stdin.
449 			if (strcmp(argv[optind], "-") == 0) {
450 				src_file = stdin;
451 				src_name = "(stdin)";
452 			} else {
453 				src_name = argv[optind];
454 				src_file = fopen(src_name, "rb");
455 				if (src_file == NULL) {
456 					my_errorf("%s: %s", src_name,
457 							strerror(errno));
458 					exit(EXIT_FAILURE);
459 				}
460 			}
461 #ifdef ENABLE_SANDBOX
462 			// Enable the strict sandbox for the last file.
463 			// Then the process can no longer open additional
464 			// files. The typical xzdec use case is to decompress
465 			// a single file so this way the strictest sandboxing
466 			// is used in most cases.
467 			if (optind == argc - 1)
468 				sandbox_enter(fileno(src_file));
469 #endif
470 			uncompress(&strm, src_file, src_name);
471 
472 			if (src_file != stdin)
473 				(void)fclose(src_file);
474 		} while (++optind < argc);
475 	}
476 
477 #ifndef NDEBUG
478 	// Free the memory only when debugging. Freeing wastes some time,
479 	// but allows detecting possible memory leaks with Valgrind.
480 	lzma_end(&strm);
481 #endif
482 
483 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
484 }
485