xref: /freebsd/contrib/xz/src/xzdec/xzdec.c (revision 8a802df1de2d77fd0a62996bd785ca3f1326887f)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       xzdec.c
6 /// \brief      Simple single-threaded tool to uncompress .xz or .lzma files
7 //
8 //  Author:     Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11 
12 #include "sysdefs.h"
13 #include "lzma.h"
14 
15 #include <stdarg.h>
16 #include <errno.h>
17 #include <stdio.h>
18 
19 #ifndef _MSC_VER
20 #	include <unistd.h>
21 #endif
22 
23 #ifdef HAVE_CAP_RIGHTS_LIMIT
24 #	include <sys/capsicum.h>
25 #endif
26 
27 #ifdef HAVE_LINUX_LANDLOCK_H
28 #	include <linux/landlock.h>
29 #	include <sys/prctl.h>
30 #	include <sys/syscall.h>
31 #endif
32 
33 #if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \
34 		|| defined(HAVE_LINUX_LANDLOCK_H)
35 #	define ENABLE_SANDBOX 1
36 #endif
37 
38 #include "getopt.h"
39 #include "tuklib_progname.h"
40 #include "tuklib_exit.h"
41 
42 #ifdef TUKLIB_DOSLIKE
43 #	include <fcntl.h>
44 #	include <io.h>
45 #	ifdef _MSC_VER
46 #		define fileno _fileno
47 #		define setmode _setmode
48 #	endif
49 #endif
50 
51 
52 #ifdef LZMADEC
53 #	define TOOL_FORMAT "lzma"
54 #else
55 #	define TOOL_FORMAT "xz"
56 #endif
57 
58 
59 /// Error messages are suppressed if this is zero, which is the case when
60 /// --quiet has been given at least twice.
61 static int display_errors = 2;
62 
63 
64 lzma_attribute((__format__(__printf__, 1, 2)))
65 static void
66 my_errorf(const char *fmt, ...)
67 {
68 	va_list ap;
69 	va_start(ap, fmt);
70 
71 	if (display_errors) {
72 		fprintf(stderr, "%s: ", progname);
73 		vfprintf(stderr, fmt, ap);
74 		fprintf(stderr, "\n");
75 	}
76 
77 	va_end(ap);
78 	return;
79 }
80 
81 
82 tuklib_attr_noreturn
83 static void
84 help(void)
85 {
86 	printf(
87 "Usage: %s [OPTION]... [FILE]...\n"
88 "Decompress files in the ." TOOL_FORMAT " format to standard output.\n"
89 "\n"
90 "  -d, --decompress   (ignored, only decompression is supported)\n"
91 "  -k, --keep         (ignored, files are never deleted)\n"
92 "  -c, --stdout       (ignored, output is always written to standard output)\n"
93 "  -q, --quiet        specify *twice* to suppress errors\n"
94 "  -Q, --no-warn      (ignored, the exit status 2 is never used)\n"
95 "  -h, --help         display this help and exit\n"
96 "  -V, --version      display the version number and exit\n"
97 "\n"
98 "With no FILE, or when FILE is -, read standard input.\n"
99 "\n"
100 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
101 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
102 
103 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
104 }
105 
106 
107 tuklib_attr_noreturn
108 static void
109 version(void)
110 {
111 	printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
112 			"liblzma %s\n", lzma_version_string());
113 
114 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
115 }
116 
117 
118 /// Parses command line options.
119 static void
120 parse_options(int argc, char **argv)
121 {
122 	static const char short_opts[] = "cdkM:hqQV";
123 	static const struct option long_opts[] = {
124 		{ "stdout",       no_argument,         NULL, 'c' },
125 		{ "to-stdout",    no_argument,         NULL, 'c' },
126 		{ "decompress",   no_argument,         NULL, 'd' },
127 		{ "uncompress",   no_argument,         NULL, 'd' },
128 		{ "keep",         no_argument,         NULL, 'k' },
129 		{ "quiet",        no_argument,         NULL, 'q' },
130 		{ "no-warn",      no_argument,         NULL, 'Q' },
131 		{ "help",         no_argument,         NULL, 'h' },
132 		{ "version",      no_argument,         NULL, 'V' },
133 		{ NULL,           0,                   NULL, 0   }
134 	};
135 
136 	int c;
137 
138 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
139 			!= -1) {
140 		switch (c) {
141 		case 'c':
142 		case 'd':
143 		case 'k':
144 		case 'Q':
145 			break;
146 
147 		case 'q':
148 			if (display_errors > 0)
149 				--display_errors;
150 
151 			break;
152 
153 		case 'h':
154 			help();
155 
156 		case 'V':
157 			version();
158 
159 		default:
160 			exit(EXIT_FAILURE);
161 		}
162 	}
163 
164 	return;
165 }
166 
167 
168 static void
169 uncompress(lzma_stream *strm, FILE *file, const char *filename)
170 {
171 	lzma_ret ret;
172 
173 	// Initialize the decoder
174 #ifdef LZMADEC
175 	ret = lzma_alone_decoder(strm, UINT64_MAX);
176 #else
177 	ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
178 #endif
179 
180 	// The only reasonable error here is LZMA_MEM_ERROR.
181 	if (ret != LZMA_OK) {
182 		my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
183 				: "Internal error (bug)");
184 		exit(EXIT_FAILURE);
185 	}
186 
187 	// Input and output buffers
188 	uint8_t in_buf[BUFSIZ];
189 	uint8_t out_buf[BUFSIZ];
190 
191 	strm->avail_in = 0;
192 	strm->next_out = out_buf;
193 	strm->avail_out = BUFSIZ;
194 
195 	lzma_action action = LZMA_RUN;
196 
197 	while (true) {
198 		if (strm->avail_in == 0) {
199 			strm->next_in = in_buf;
200 			strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
201 
202 			if (ferror(file)) {
203 				// POSIX says that fread() sets errno if
204 				// an error occurred. ferror() doesn't
205 				// touch errno.
206 				my_errorf("%s: Error reading input file: %s",
207 						filename, strerror(errno));
208 				exit(EXIT_FAILURE);
209 			}
210 
211 #ifndef LZMADEC
212 			// When using LZMA_CONCATENATED, we need to tell
213 			// liblzma when it has got all the input.
214 			if (feof(file))
215 				action = LZMA_FINISH;
216 #endif
217 		}
218 
219 		ret = lzma_code(strm, action);
220 
221 		// Write and check write error before checking decoder error.
222 		// This way as much data as possible gets written to output
223 		// even if decoder detected an error.
224 		if (strm->avail_out == 0 || ret != LZMA_OK) {
225 			const size_t write_size = BUFSIZ - strm->avail_out;
226 
227 			if (fwrite(out_buf, 1, write_size, stdout)
228 					!= write_size) {
229 				// Wouldn't be a surprise if writing to stderr
230 				// would fail too but at least try to show an
231 				// error message.
232 				my_errorf("Cannot write to standard output: "
233 						"%s", strerror(errno));
234 				exit(EXIT_FAILURE);
235 			}
236 
237 			strm->next_out = out_buf;
238 			strm->avail_out = BUFSIZ;
239 		}
240 
241 		if (ret != LZMA_OK) {
242 			if (ret == LZMA_STREAM_END) {
243 #ifdef LZMADEC
244 				// Check that there's no trailing garbage.
245 				if (strm->avail_in != 0
246 						|| fread(in_buf, 1, 1, file)
247 							!= 0
248 						|| !feof(file))
249 					ret = LZMA_DATA_ERROR;
250 				else
251 					return;
252 #else
253 				// lzma_stream_decoder() already guarantees
254 				// that there's no trailing garbage.
255 				assert(strm->avail_in == 0);
256 				assert(action == LZMA_FINISH);
257 				assert(feof(file));
258 				return;
259 #endif
260 			}
261 
262 			const char *msg;
263 			switch (ret) {
264 			case LZMA_MEM_ERROR:
265 				msg = strerror(ENOMEM);
266 				break;
267 
268 			case LZMA_FORMAT_ERROR:
269 				msg = "File format not recognized";
270 				break;
271 
272 			case LZMA_OPTIONS_ERROR:
273 				// FIXME: Better message?
274 				msg = "Unsupported compression options";
275 				break;
276 
277 			case LZMA_DATA_ERROR:
278 				msg = "File is corrupt";
279 				break;
280 
281 			case LZMA_BUF_ERROR:
282 				msg = "Unexpected end of input";
283 				break;
284 
285 			default:
286 				msg = "Internal error (bug)";
287 				break;
288 			}
289 
290 			my_errorf("%s: %s", filename, msg);
291 			exit(EXIT_FAILURE);
292 		}
293 	}
294 }
295 
296 
297 #ifdef ENABLE_SANDBOX
298 static void
299 sandbox_enter(int src_fd)
300 {
301 #if defined(HAVE_CAP_RIGHTS_LIMIT)
302 	// Capsicum needs FreeBSD 10.2 or later.
303 	cap_rights_t rights;
304 
305 	if (cap_enter())
306 		goto error;
307 
308 	if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_READ)))
309 		goto error;
310 
311 	// If not reading from stdin, remove all capabilities from it.
312 	if (src_fd != STDIN_FILENO && cap_rights_limit(
313 			STDIN_FILENO, cap_rights_clear(&rights)))
314 		goto error;
315 
316 	if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, CAP_WRITE)))
317 		goto error;
318 
319 	if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, CAP_WRITE)))
320 		goto error;
321 
322 #elif defined(HAVE_PLEDGE)
323 	// pledge() was introduced in OpenBSD 5.9.
324 	if (pledge("stdio", ""))
325 		goto error;
326 
327 	(void)src_fd;
328 #elif defined(HAVE_LINUX_LANDLOCK_H)
329 	int landlock_abi = syscall(SYS_landlock_create_ruleset,
330 			(void *)NULL, 0, LANDLOCK_CREATE_RULESET_VERSION);
331 
332 	if (landlock_abi > 0) {
333 		// We support ABI versions 1-3.
334 		if (landlock_abi > 3)
335 			landlock_abi = 3;
336 
337 		const struct landlock_ruleset_attr attr = {
338 			.handled_access_fs = (1ULL << (12 + landlock_abi)) - 1
339 		};
340 
341 		const int ruleset_fd = syscall(SYS_landlock_create_ruleset,
342 				&attr, sizeof(attr), 0U);
343 		if (ruleset_fd < 0)
344 			goto error;
345 
346 		// All files we need should have already been opened. Thus,
347 		// we don't need to add any rules using landlock_add_rule(2)
348 		// before activating the sandbox.
349 		if (syscall(SYS_landlock_restrict_self, ruleset_fd, 0U) != 0)
350 			goto error;
351 	}
352 
353 	(void)src_fd;
354 #else
355 #	error ENABLE_SANDBOX is defined but no sandboxing method was found.
356 #endif
357 
358 	return;
359 
360 error:
361 #ifdef HAVE_CAP_RIGHTS_LIMIT
362 	// If a kernel is configured without capability mode support or
363 	// used in an emulator that does not implement the capability
364 	// system calls, then the Capsicum system calls will fail and set
365 	// errno to ENOSYS. In that case xzdec will silently run without
366 	// the sandbox.
367 	if (errno == ENOSYS)
368 		return;
369 #endif
370 	my_errorf("Failed to enable the sandbox");
371 	exit(EXIT_FAILURE);
372 }
373 #endif
374 
375 
376 int
377 main(int argc, char **argv)
378 {
379 #ifdef HAVE_PLEDGE
380 	// OpenBSD's pledge(2) sandbox.
381 	// Initially enable the sandbox slightly more relaxed so that
382 	// the process can still open files. This allows the sandbox to
383 	// be enabled when parsing command line arguments and decompressing
384 	// all files (the more strict sandbox only restricts the last file
385 	// that is decompressed).
386 	if (pledge("stdio rpath", "")) {
387 		my_errorf("Failed to enable the sandbox");
388 		exit(EXIT_FAILURE);
389 	}
390 #endif
391 
392 #ifdef HAVE_LINUX_LANDLOCK_H
393 	// Prevent the process from gaining new privileges. The return
394 	// is ignored to keep compatibility with old kernels.
395 	(void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
396 #endif
397 
398 	// Initialize progname which we will be used in error messages.
399 	tuklib_progname_init(argv);
400 
401 	// Parse the command line options.
402 	parse_options(argc, argv);
403 
404 	// The same lzma_stream is used for all files that we decode. This way
405 	// we don't need to reallocate memory for every file if they use same
406 	// compression settings.
407 	lzma_stream strm = LZMA_STREAM_INIT;
408 
409 	// Some systems require setting stdin and stdout to binary mode.
410 #ifdef TUKLIB_DOSLIKE
411 	setmode(fileno(stdin), O_BINARY);
412 	setmode(fileno(stdout), O_BINARY);
413 #endif
414 
415 	if (optind == argc) {
416 		// No filenames given, decode from stdin.
417 #ifdef ENABLE_SANDBOX
418 		sandbox_enter(STDIN_FILENO);
419 #endif
420 		uncompress(&strm, stdin, "(stdin)");
421 	} else {
422 		// Loop through the filenames given on the command line.
423 		do {
424 			FILE *src_file;
425 			const char *src_name;
426 
427 			// "-" indicates stdin.
428 			if (strcmp(argv[optind], "-") == 0) {
429 				src_file = stdin;
430 				src_name = "(stdin)";
431 			} else {
432 				src_name = argv[optind];
433 				src_file = fopen(src_name, "rb");
434 				if (src_file == NULL) {
435 					my_errorf("%s: %s", src_name,
436 							strerror(errno));
437 					exit(EXIT_FAILURE);
438 				}
439 			}
440 #ifdef ENABLE_SANDBOX
441 			// Enable the sandbox for the last file. When the
442 			// strict sandbox is enabled the process can no
443 			// longer open additional files. It is likely that
444 			// the most common way to use xzdec is to
445 			// decompress a single file, so this fully protects
446 			// most use cases.
447 			if (optind == argc - 1)
448 				sandbox_enter(fileno(src_file));
449 #endif
450 			uncompress(&strm, src_file, src_name);
451 
452 			if (src_file != stdin)
453 				fclose(src_file);
454 		} while (++optind < argc);
455 	}
456 
457 #ifndef NDEBUG
458 	// Free the memory only when debugging. Freeing wastes some time,
459 	// but allows detecting possible memory leaks with Valgrind.
460 	lzma_end(&strm);
461 #endif
462 
463 	tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
464 }
465