1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file xzdec.c 6 /// \brief Simple single-threaded tool to uncompress .xz or .lzma files 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "sysdefs.h" 13 #include "lzma.h" 14 15 #include <stdarg.h> 16 #include <errno.h> 17 #include <locale.h> 18 #include <stdio.h> 19 20 #ifndef _MSC_VER 21 # include <unistd.h> 22 #endif 23 24 #ifdef HAVE_CAP_RIGHTS_LIMIT 25 # include <sys/capsicum.h> 26 #endif 27 28 #ifdef HAVE_LINUX_LANDLOCK 29 # include "my_landlock.h" 30 #endif 31 32 #if defined(HAVE_CAP_RIGHTS_LIMIT) || defined(HAVE_PLEDGE) \ 33 || defined(HAVE_LINUX_LANDLOCK) 34 # define ENABLE_SANDBOX 1 35 #endif 36 37 #include "getopt.h" 38 #include "tuklib_progname.h" 39 #include "tuklib_mbstr_nonprint.h" 40 #include "tuklib_exit.h" 41 42 #ifdef TUKLIB_DOSLIKE 43 # include <fcntl.h> 44 # include <io.h> 45 # ifdef _MSC_VER 46 # define fileno _fileno 47 # define setmode _setmode 48 # endif 49 #endif 50 51 52 #ifdef LZMADEC 53 # define TOOL_FORMAT "lzma" 54 #else 55 # define TOOL_FORMAT "xz" 56 #endif 57 58 59 /// Error messages are suppressed if this is zero, which is the case when 60 /// --quiet has been given at least twice. 61 static int display_errors = 2; 62 63 64 lzma_attribute((__format__(__printf__, 1, 2))) 65 static void 66 my_errorf(const char *fmt, ...) 67 { 68 va_list ap; 69 va_start(ap, fmt); 70 71 if (display_errors) { 72 fprintf(stderr, "%s: ", progname); 73 vfprintf(stderr, fmt, ap); 74 fprintf(stderr, "\n"); 75 } 76 77 va_end(ap); 78 return; 79 } 80 81 82 tuklib_attr_noreturn 83 static void 84 help(void) 85 { 86 printf( 87 "Usage: %s [OPTION]... [FILE]...\n" 88 "Decompress files in the ." TOOL_FORMAT " format to standard output.\n" 89 "\n" 90 " -d, --decompress (ignored, only decompression is supported)\n" 91 " -k, --keep (ignored, files are never deleted)\n" 92 " -c, --stdout (ignored, output is always written to standard output)\n" 93 " -q, --quiet specify *twice* to suppress errors\n" 94 " -Q, --no-warn (ignored, the exit status 2 is never used)\n" 95 " -h, --help display this help and exit\n" 96 " -V, --version display the version number and exit\n" 97 "\n" 98 "With no FILE, or when FILE is -, read standard input.\n" 99 "\n" 100 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n" 101 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname); 102 103 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 104 } 105 106 107 tuklib_attr_noreturn 108 static void 109 version(void) 110 { 111 printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n" 112 "liblzma %s\n", lzma_version_string()); 113 114 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 115 } 116 117 118 /// Parses command line options. 119 static void 120 parse_options(int argc, char **argv) 121 { 122 static const char short_opts[] = "cdkhqQV"; 123 static const struct option long_opts[] = { 124 { "stdout", no_argument, NULL, 'c' }, 125 { "to-stdout", no_argument, NULL, 'c' }, 126 { "decompress", no_argument, NULL, 'd' }, 127 { "uncompress", no_argument, NULL, 'd' }, 128 { "keep", no_argument, NULL, 'k' }, 129 { "quiet", no_argument, NULL, 'q' }, 130 { "no-warn", no_argument, NULL, 'Q' }, 131 { "help", no_argument, NULL, 'h' }, 132 { "version", no_argument, NULL, 'V' }, 133 { NULL, 0, NULL, 0 } 134 }; 135 136 int c; 137 138 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 139 != -1) { 140 switch (c) { 141 case 'c': 142 case 'd': 143 case 'k': 144 case 'Q': 145 break; 146 147 case 'q': 148 if (display_errors > 0) 149 --display_errors; 150 151 break; 152 153 case 'h': 154 help(); 155 156 case 'V': 157 version(); 158 159 default: 160 exit(EXIT_FAILURE); 161 } 162 } 163 164 return; 165 } 166 167 168 static void 169 uncompress(lzma_stream *strm, FILE *file, const char *filename) 170 { 171 lzma_ret ret; 172 173 // Initialize the decoder 174 #ifdef LZMADEC 175 ret = lzma_alone_decoder(strm, UINT64_MAX); 176 #else 177 ret = lzma_stream_decoder(strm, UINT64_MAX, LZMA_CONCATENATED); 178 #endif 179 180 // The only reasonable error here is LZMA_MEM_ERROR. 181 if (ret != LZMA_OK) { 182 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) 183 : "Internal error (bug)"); 184 exit(EXIT_FAILURE); 185 } 186 187 // Input and output buffers 188 uint8_t in_buf[BUFSIZ]; 189 uint8_t out_buf[BUFSIZ]; 190 191 strm->avail_in = 0; 192 strm->next_out = out_buf; 193 strm->avail_out = BUFSIZ; 194 195 lzma_action action = LZMA_RUN; 196 197 while (true) { 198 if (strm->avail_in == 0) { 199 strm->next_in = in_buf; 200 strm->avail_in = fread(in_buf, 1, BUFSIZ, file); 201 202 if (ferror(file)) { 203 // POSIX says that fread() sets errno if 204 // an error occurred. ferror() doesn't 205 // touch errno. 206 my_errorf("%s: Error reading input file: %s", 207 tuklib_mask_nonprint(filename), 208 strerror(errno)); 209 exit(EXIT_FAILURE); 210 } 211 212 #ifndef LZMADEC 213 // When using LZMA_CONCATENATED, we need to tell 214 // liblzma when it has got all the input. 215 if (feof(file)) 216 action = LZMA_FINISH; 217 #endif 218 } 219 220 ret = lzma_code(strm, action); 221 222 // Write and check write error before checking decoder error. 223 // This way as much data as possible gets written to output 224 // even if decoder detected an error. 225 if (strm->avail_out == 0 || ret != LZMA_OK) { 226 const size_t write_size = BUFSIZ - strm->avail_out; 227 228 if (fwrite(out_buf, 1, write_size, stdout) 229 != write_size) { 230 // Wouldn't be a surprise if writing to stderr 231 // would fail too but at least try to show an 232 // error message. 233 #if defined(_WIN32) && !defined(__CYGWIN__) 234 // On native Windows, broken pipe is reported 235 // as EINVAL. Don't show an error message 236 // in this case. 237 if (errno != EINVAL) 238 #endif 239 { 240 my_errorf("Cannot write to " 241 "standard output: " 242 "%s", strerror(errno)); 243 } 244 exit(EXIT_FAILURE); 245 } 246 247 strm->next_out = out_buf; 248 strm->avail_out = BUFSIZ; 249 } 250 251 if (ret != LZMA_OK) { 252 if (ret == LZMA_STREAM_END) { 253 #ifdef LZMADEC 254 // Check that there's no trailing garbage. 255 if (strm->avail_in != 0 256 || fread(in_buf, 1, 1, file) 257 != 0 258 || !feof(file)) 259 ret = LZMA_DATA_ERROR; 260 else 261 return; 262 #else 263 // lzma_stream_decoder() already guarantees 264 // that there's no trailing garbage. 265 assert(strm->avail_in == 0); 266 assert(action == LZMA_FINISH); 267 assert(feof(file)); 268 return; 269 #endif 270 } 271 272 const char *msg; 273 switch (ret) { 274 case LZMA_MEM_ERROR: 275 msg = strerror(ENOMEM); 276 break; 277 278 case LZMA_FORMAT_ERROR: 279 msg = "File format not recognized"; 280 break; 281 282 case LZMA_OPTIONS_ERROR: 283 // FIXME: Better message? 284 msg = "Unsupported compression options"; 285 break; 286 287 case LZMA_DATA_ERROR: 288 msg = "File is corrupt"; 289 break; 290 291 case LZMA_BUF_ERROR: 292 msg = "Unexpected end of input"; 293 break; 294 295 default: 296 msg = "Internal error (bug)"; 297 break; 298 } 299 300 my_errorf("%s: %s", tuklib_mask_nonprint(filename), 301 msg); 302 exit(EXIT_FAILURE); 303 } 304 } 305 } 306 307 308 #ifdef ENABLE_SANDBOX 309 static void 310 sandbox_enter(int src_fd) 311 { 312 #if defined(HAVE_CAP_RIGHTS_LIMIT) 313 // Capsicum needs FreeBSD 10.2 or later. 314 cap_rights_t rights; 315 316 if (cap_enter()) 317 goto error; 318 319 if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_READ))) 320 goto error; 321 322 // If not reading from stdin, remove all capabilities from it. 323 if (src_fd != STDIN_FILENO && cap_rights_limit( 324 STDIN_FILENO, cap_rights_clear(&rights))) 325 goto error; 326 327 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, 328 CAP_WRITE))) 329 goto error; 330 331 if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, 332 CAP_WRITE))) 333 goto error; 334 335 #elif defined(HAVE_PLEDGE) 336 // pledge() was introduced in OpenBSD 5.9. 337 if (pledge("stdio", "")) 338 goto error; 339 340 (void)src_fd; 341 342 #elif defined(HAVE_LINUX_LANDLOCK) 343 struct landlock_ruleset_attr attr; 344 if (my_landlock_ruleset_attr_forbid_all(&attr) > 0) { 345 const int ruleset_fd = my_landlock_create_ruleset( 346 &attr, sizeof(attr), 0); 347 if (ruleset_fd < 0) 348 goto error; 349 350 // All files we need should have already been opened. Thus, 351 // we don't need to add any rules using landlock_add_rule(2) 352 // before activating the sandbox. 353 if (my_landlock_restrict_self(ruleset_fd, 0) != 0) 354 goto error; 355 356 (void)close(ruleset_fd); 357 } 358 359 (void)src_fd; 360 361 #else 362 # error ENABLE_SANDBOX is defined but no sandboxing method was found. 363 #endif 364 365 return; 366 367 error: 368 #ifdef HAVE_CAP_RIGHTS_LIMIT 369 // If a kernel is configured without capability mode support or 370 // used in an emulator that does not implement the capability 371 // system calls, then the Capsicum system calls will fail and set 372 // errno to ENOSYS. In that case xzdec will silently run without 373 // the sandbox. 374 if (errno == ENOSYS) 375 return; 376 #endif 377 378 my_errorf("Failed to enable the sandbox"); 379 exit(EXIT_FAILURE); 380 } 381 #endif 382 383 384 int 385 main(int argc, char **argv) 386 { 387 // Initialize progname which will be used in error messages. 388 tuklib_progname_init(argv); 389 390 #ifdef HAVE_PLEDGE 391 // OpenBSD's pledge(2) sandbox. 392 // Initially enable the sandbox slightly more relaxed so that 393 // the process can still open files. This allows the sandbox to 394 // be enabled when parsing command line arguments and decompressing 395 // all files (the more strict sandbox only restricts the last file 396 // that is decompressed). 397 if (pledge("stdio rpath", "")) { 398 my_errorf("Failed to enable the sandbox"); 399 exit(EXIT_FAILURE); 400 } 401 #endif 402 403 #ifdef HAVE_LINUX_LANDLOCK 404 // Prevent the process from gaining new privileges. This must be done 405 // before landlock_restrict_self(2) but since we will never need new 406 // privileges, this call can be done here already. 407 // 408 // This is supported since Linux 3.5. Ignore the return value to 409 // keep compatibility with old kernels. landlock_restrict_self(2) 410 // will fail if the no_new_privs attribute isn't set, thus if prctl() 411 // fails here the error will still be detected when it matters. 412 (void)prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); 413 #endif 414 415 // We need to set the locale even though we don't have any 416 // translated messages: 417 // 418 // - tuklib_mask_nonprint() has locale-specific behavior (LC_CTYPE). 419 // 420 // - This is needed on Windows to make non-ASCII filenames display 421 // properly when the active code page has been set to UTF-8 422 // in the application manifest. 423 setlocale(LC_ALL, ""); 424 425 // Parse the command line options. 426 parse_options(argc, argv); 427 428 // The same lzma_stream is used for all files that we decode. This way 429 // we don't need to reallocate memory for every file if they use same 430 // compression settings. 431 lzma_stream strm = LZMA_STREAM_INIT; 432 433 // Some systems require setting stdin and stdout to binary mode. 434 #ifdef TUKLIB_DOSLIKE 435 setmode(fileno(stdin), O_BINARY); 436 setmode(fileno(stdout), O_BINARY); 437 #endif 438 439 if (optind == argc) { 440 // No filenames given, decode from stdin. 441 #ifdef ENABLE_SANDBOX 442 sandbox_enter(STDIN_FILENO); 443 #endif 444 uncompress(&strm, stdin, "(stdin)"); 445 } else { 446 // Loop through the filenames given on the command line. 447 do { 448 FILE *src_file; 449 const char *src_name; 450 451 // "-" indicates stdin. 452 if (strcmp(argv[optind], "-") == 0) { 453 src_file = stdin; 454 src_name = "(stdin)"; 455 } else { 456 src_name = argv[optind]; 457 src_file = fopen(src_name, "rb"); 458 if (src_file == NULL) { 459 my_errorf("%s: %s", 460 tuklib_mask_nonprint( 461 src_name), 462 strerror(errno)); 463 exit(EXIT_FAILURE); 464 } 465 } 466 #ifdef ENABLE_SANDBOX 467 // Enable the strict sandbox for the last file. 468 // Then the process can no longer open additional 469 // files. The typical xzdec use case is to decompress 470 // a single file so this way the strictest sandboxing 471 // is used in most cases. 472 if (optind == argc - 1) 473 sandbox_enter(fileno(src_file)); 474 #endif 475 uncompress(&strm, src_file, src_name); 476 477 if (src_file != stdin) 478 (void)fclose(src_file); 479 } while (++optind < argc); 480 } 481 482 #ifndef NDEBUG 483 // Free the memory only when debugging. Freeing wastes some time, 484 // but allows detecting possible memory leaks with Valgrind. 485 lzma_end(&strm); 486 #endif 487 488 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors); 489 } 490