1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file xzdec.c
6 /// \brief Simple single-threaded tool to uncompress .xz or .lzma files
7 //
8 // Author: Lasse Collin
9 //
10 ///////////////////////////////////////////////////////////////////////////////
11
12 #include "sysdefs.h"
13 #include "lzma.h"
14
15 #include <stdarg.h>
16 #include <errno.h>
17 #include <locale.h>
18 #include <stdio.h>
19
20 #ifndef _MSC_VER
21 # include <unistd.h>
22 #endif
23
24 #ifdef HAVE_CAP_RIGHTS_LIMIT
25 # include <sys/capsicum.h>
26 #endif
27
28 #ifdef HAVE_LINUX_LANDLOCK
29 # include "my_landlock.h"
30 #endif
31
32 #if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \
33 || defined(HAVE_LINUX_LANDLOCK)
34 # define ENABLE_SANDBOX 1
35 #endif
36
37 #include "getopt.h"
38 #include "tuklib_progname.h"
39 #include "tuklib_mbstr_nonprint.h"
40 #include "tuklib_exit.h"
41
42 #ifdef TUKLIB_DOSLIKE
43 # include <fcntl.h>
44 # include <io.h>
45 # ifdef _MSC_VER
46 # define fileno _fileno
47 # define setmode _setmode
48 # endif
49 #endif
50
51
52 #ifdef LZMADEC
53 # define TOOL_FORMAT "lzma"
54 #else
55 # define TOOL_FORMAT "xz"
56 #endif
57
58
59 /// Error messages are suppressed if this is zero, which is the case when
60 /// --quiet has been given at least twice.
61 static int display_errors = 2;
62
63
64 lzma_attribute((__format__(__printf__, 1, 2)))
65 static void
my_errorf(const char * fmt,...)66 my_errorf(const char *fmt, ...)
67 {
68 va_list ap;
69 va_start(ap, fmt);
70
71 if (display_errors) {
72 fprintf(stderr, "%s: ", progname);
73 vfprintf(stderr, fmt, ap);
74 fprintf(stderr, "\n");
75 }
76
77 va_end(ap);
78 return;
79 }
80
81
82 tuklib_attr_noreturn
83 static void
help(void)84 help(void)
85 {
86 printf(
87 "Usage: %s [OPTION]... [FILE]...\n"
88 "Decompress files in the ." TOOL_FORMAT " format to standard output.\n"
89 "\n"
90 " -d, --decompress (ignored, only decompression is supported)\n"
91 " -k, --keep (ignored, files are never deleted)\n"
92 " -c, --stdout (ignored, output is always written to standard output)\n"
93 " -q, --quiet specify *twice* to suppress errors\n"
94 " -Q, --no-warn (ignored, the exit status 2 is never used)\n"
95 " -h, --help display this help and exit\n"
96 " -V, --version display the version number and exit\n"
97 "\n"
98 "With no FILE, or when FILE is -, read standard input.\n"
99 "\n"
100 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
101 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname);
102
103 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
104 }
105
106
107 tuklib_attr_noreturn
108 static void
version(void)109 version(void)
110 {
111 printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
112 "liblzma %s\n", lzma_version_string());
113
114 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
115 }
116
117
118 /// Parses command line options.
119 static void
parse_options(int argc,char ** argv)120 parse_options(int argc, char **argv)
121 {
122 static const char short_opts[] = "cdkhqQV";
123 static const struct option long_opts[] = {
124 { "stdout", no_argument, NULL, 'c' },
125 { "to-stdout", no_argument, NULL, 'c' },
126 { "decompress", no_argument, NULL, 'd' },
127 { "uncompress", no_argument, NULL, 'd' },
128 { "keep", no_argument, NULL, 'k' },
129 { "quiet", no_argument, NULL, 'q' },
130 { "no-warn", no_argument, NULL, 'Q' },
131 { "help", no_argument, NULL, 'h' },
132 { "version", no_argument, NULL, 'V' },
133 { NULL, 0, NULL, 0 }
134 };
135
136 int c;
137
138 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
139 != -1) {
140 switch (c) {
141 case 'c':
142 case 'd':
143 case 'k':
144 case 'Q':
145 break;
146
147 case 'q':
148 if (display_errors > 0)
149 --display_errors;
150
151 break;
152
153 case 'h':
154 help();
155
156 case 'V':
157 version();
158
159 default:
160 exit(EXIT_FAILURE);
161 }
162 }
163
164 return;
165 }
166
167
168 static void
uncompress(lzma_stream * strm,FILE * file,const char * filename)169 uncompress(lzma_stream *strm, FILE *file, const char *filename)
170 {
171 lzma_ret ret;
172
173 // Initialize the decoder
174 #ifdef LZMADEC
175 ret = lzma_alone_decoder(strm, UINT64_MAX);
176 #else
177 ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED);
178 #endif
179
180 // The only reasonable error here is LZMA_MEM_ERROR.
181 if (ret != LZMA_OK) {
182 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
183 : "Internal error (bug)");
184 exit(EXIT_FAILURE);
185 }
186
187 // Input and output buffers
188 uint8_t in_buf[BUFSIZ];
189 uint8_t out_buf[BUFSIZ];
190
191 strm->avail_in = 0;
192 strm->next_out = out_buf;
193 strm->avail_out = BUFSIZ;
194
195 lzma_action action = LZMA_RUN;
196
197 while (true) {
198 if (strm->avail_in == 0) {
199 strm->next_in = in_buf;
200 strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
201
202 if (ferror(file)) {
203 // POSIX says that fread() sets errno if
204 // an error occurred. ferror() doesn't
205 // touch errno.
206 my_errorf("%s: Error reading input file: %s",
207 tuklib_mask_nonprint(filename),
208 strerror(errno));
209 exit(EXIT_FAILURE);
210 }
211
212 #ifndef LZMADEC
213 // When using LZMA_CONCATENATED, we need to tell
214 // liblzma when it has got all the input.
215 if (feof(file))
216 action = LZMA_FINISH;
217 #endif
218 }
219
220 ret = lzma_code(strm, action);
221
222 // Write and check write error before checking decoder error.
223 // This way as much data as possible gets written to output
224 // even if decoder detected an error.
225 if (strm->avail_out == 0 || ret != LZMA_OK) {
226 const size_t write_size = BUFSIZ - strm->avail_out;
227
228 if (fwrite(out_buf, 1, write_size, stdout)
229 != write_size) {
230 // Wouldn't be a surprise if writing to stderr
231 // would fail too but at least try to show an
232 // error message.
233 #if defined(_WIN32) && !defined(__CYGWIN__)
234 // On native Windows, broken pipe is reported
235 // as EINVAL. Don't show an error message
236 // in this case.
237 if (errno != EINVAL)
238 #endif
239 {
240 my_errorf("Cannot write to "
241 "standard output: "
242 "%s", strerror(errno));
243 }
244 exit(EXIT_FAILURE);
245 }
246
247 strm->next_out = out_buf;
248 strm->avail_out = BUFSIZ;
249 }
250
251 if (ret != LZMA_OK) {
252 if (ret == LZMA_STREAM_END) {
253 #ifdef LZMADEC
254 // Check that there's no trailing garbage.
255 if (strm->avail_in != 0
256 || fread(in_buf, 1, 1, file)
257 != 0
258 || !feof(file))
259 ret = LZMA_DATA_ERROR;
260 else
261 return;
262 #else
263 // lzma_stream_decoder() already guarantees
264 // that there's no trailing garbage.
265 assert(strm->avail_in == 0);
266 assert(action == LZMA_FINISH);
267 assert(feof(file));
268 return;
269 #endif
270 }
271
272 const char *msg;
273 switch (ret) {
274 case LZMA_MEM_ERROR:
275 msg = strerror(ENOMEM);
276 break;
277
278 case LZMA_FORMAT_ERROR:
279 msg = "File format not recognized";
280 break;
281
282 case LZMA_OPTIONS_ERROR:
283 // FIXME: Better message?
284 msg = "Unsupported compression options";
285 break;
286
287 case LZMA_DATA_ERROR:
288 msg = "File is corrupt";
289 break;
290
291 case LZMA_BUF_ERROR:
292 msg = "Unexpected end of input";
293 break;
294
295 default:
296 msg = "Internal error (bug)";
297 break;
298 }
299
300 my_errorf("%s: %s", tuklib_mask_nonprint(filename),
301 msg);
302 exit(EXIT_FAILURE);
303 }
304 }
305 }
306
307
308 #ifdef ENABLE_SANDBOX
309 static void
sandbox_enter(int src_fd)310 sandbox_enter(int src_fd)
311 {
312 #if defined(HAVE_CAP_RIGHTS_LIMIT)
313 // Capsicum needs FreeBSD 10.2 or later.
314 cap_rights_t rights;
315
316 if (cap_enter())
317 goto error;
318
319 if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_READ)))
320 goto error;
321
322 // If not reading from stdin, remove all capabilities from it.
323 if (src_fd != STDIN_FILENO && cap_rights_limit(
324 STDIN_FILENO, cap_rights_clear(&rights)))
325 goto error;
326
327 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
328 CAP_WRITE)))
329 goto error;
330
331 if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights,
332 CAP_WRITE)))
333 goto error;
334
335 #elif defined(HAVE_PLEDGE)
336 // pledge() was introduced in OpenBSD 5.9.
337 if (pledge("stdio", ""))
338 goto error;
339
340 (void)src_fd;
341
342 #elif defined(HAVE_LINUX_LANDLOCK)
343 struct landlock_ruleset_attr attr;
344 if (my_landlock_ruleset_attr_forbid_all(&attr) > 0) {
345 const int ruleset_fd = my_landlock_create_ruleset(
346 &attr, sizeof(attr), 0);
347 if (ruleset_fd < 0)
348 goto error;
349
350 // All files we need should have already been opened. Thus,
351 // we don't need to add any rules using landlock_add_rule(2)
352 // before activating the sandbox.
353 if (my_landlock_restrict_self(ruleset_fd, 0) != 0)
354 goto error;
355
356 (void)close(ruleset_fd);
357 }
358
359 (void)src_fd;
360
361 #else
362 # error ENABLE_SANDBOX is defined but no sandboxing method was found.
363 #endif
364
365 return;
366
367 error:
368 #ifdef HAVE_CAP_RIGHTS_LIMIT
369 // If a kernel is configured without capability mode support or
370 // used in an emulator that does not implement the capability
371 // system calls, then the Capsicum system calls will fail and set
372 // errno to ENOSYS. In that case xzdec will silently run without
373 // the sandbox.
374 if (errno == ENOSYS)
375 return;
376 #endif
377
378 my_errorf("Failed to enable the sandbox");
379 exit(EXIT_FAILURE);
380 }
381 #endif
382
383
384 int
main(int argc,char ** argv)385 main(int argc, char **argv)
386 {
387 // Initialize progname which will be used in error messages.
388 tuklib_progname_init(argv);
389
390 #ifdef HAVE_PLEDGE
391 // OpenBSD's pledge(2) sandbox.
392 // Initially enable the sandbox slightly more relaxed so that
393 // the process can still open files. This allows the sandbox to
394 // be enabled when parsing command line arguments and decompressing
395 // all files (the more strict sandbox only restricts the last file
396 // that is decompressed).
397 if (pledge("stdio rpath", "")) {
398 my_errorf("Failed to enable the sandbox");
399 exit(EXIT_FAILURE);
400 }
401 #endif
402
403 #ifdef HAVE_LINUX_LANDLOCK
404 // Prevent the process from gaining new privileges. This must be done
405 // before landlock_restrict_self(2) but since we will never need new
406 // privileges, this call can be done here already.
407 //
408 // This is supported since Linux 3.5. Ignore the return value to
409 // keep compatibility with old kernels. landlock_restrict_self(2)
410 // will fail if the no_new_privs attribute isn't set, thus if prctl()
411 // fails here the error will still be detected when it matters.
412 (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
413 #endif
414
415 // We need to set the locale even though we don't have any
416 // translated messages:
417 //
418 // - tuklib_mask_nonprint() has locale-specific behavior (LC_CTYPE).
419 //
420 // - This is needed on Windows to make non-ASCII filenames display
421 // properly when the active code page has been set to UTF-8
422 // in the application manifest.
423 setlocale(LC_ALL, "");
424
425 // Parse the command line options.
426 parse_options(argc, argv);
427
428 // The same lzma_stream is used for all files that we decode. This way
429 // we don't need to reallocate memory for every file if they use same
430 // compression settings.
431 lzma_stream strm = LZMA_STREAM_INIT;
432
433 // Some systems require setting stdin and stdout to binary mode.
434 #ifdef TUKLIB_DOSLIKE
435 setmode(fileno(stdin), O_BINARY);
436 setmode(fileno(stdout), O_BINARY);
437 #endif
438
439 if (optind == argc) {
440 // No filenames given, decode from stdin.
441 #ifdef ENABLE_SANDBOX
442 sandbox_enter(STDIN_FILENO);
443 #endif
444 uncompress(&strm, stdin, "(stdin)");
445 } else {
446 // Loop through the filenames given on the command line.
447 do {
448 FILE *src_file;
449 const char *src_name;
450
451 // "-" indicates stdin.
452 if (strcmp(argv[optind], "-") == 0) {
453 src_file = stdin;
454 src_name = "(stdin)";
455 } else {
456 src_name = argv[optind];
457 src_file = fopen(src_name, "rb");
458 if (src_file == NULL) {
459 my_errorf("%s: %s",
460 tuklib_mask_nonprint(
461 src_name),
462 strerror(errno));
463 exit(EXIT_FAILURE);
464 }
465 }
466 #ifdef ENABLE_SANDBOX
467 // Enable the strict sandbox for the last file.
468 // Then the process can no longer open additional
469 // files. The typical xzdec use case is to decompress
470 // a single file so this way the strictest sandboxing
471 // is used in most cases.
472 if (optind == argc - 1)
473 sandbox_enter(fileno(src_file));
474 #endif
475 uncompress(&strm, src_file, src_name);
476
477 if (src_file != stdin)
478 (void)fclose(src_file);
479 } while (++optind < argc);
480 }
481
482 #ifndef NDEBUG
483 // Free the memory only when debugging. Freeing wastes some time,
484 // but allows detecting possible memory leaks with Valgrind.
485 lzma_end(&strm);
486 #endif
487
488 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
489 }
490